introduce auto_set to simplify benchmarking the backward path of operators (#23276)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/23276 This diff introduces a new feature to simplify benchmarking the backward path of ops. Here is an example: ``` ... self.input_one = torch.rand(M, N, K, requires_grad=self.auto_set()) self.input_two = torch.rand(M, N, K, requires_grad=self.auto_set()) ... ``` In this way, the benchmark will generate three different test cases. 1. input_one requires grad 2. input_two requires grad 3. both inputs require grad Here is a sample output: ``` # Benchmarking PyTorch: add # Mode: Eager # Name: add_M1_N8_K8_bwdall # Input: M: 1, N: 8, K: 8 Backward Execution Time (us) : 863.744 # Benchmarking PyTorch: add # Mode: Eager # Name: add_M1_N8_K8_bwd1 # Input: M: 1, N: 8, K: 8 Backward Execution Time (us) : 727.915 # Benchmarking PyTorch: add # Mode: Eager # Name: add_M1_N8_K8_bwd2 # Input: M: 1, N: 8, K: 8 Backward Execution Time (us) : 687.626 ``` Reviewed By: zheng-xq Differential Revision: D16450355 fbshipit-source-id: 50ae0916e81c3ff9f0c482ed6d386319eb15b305

introduce auto_set to simplify benchmarking the backward path of operators (#23276)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/23276 This diff introduces a new feature to simplify benchmarking the backward path of ops. Here is an example: ``` ... self.input_one = torch.rand(M, N, K, requires_grad=self.auto_set()) self.input_two = torch.rand(M, N, K, requires_grad=self.auto_set()) ... ``` In this way, the benchmark will generate three different test cases. 1. input_one requires grad 2. input_two requires grad 3. both inputs require grad Here is a sample output: ``` # Benchmarking PyTorch: add # Mode: Eager # Name: add_M1_N8_K8_bwdall # Input: M: 1, N: 8, K: 8 Backward Execution Time (us) : 863.744 # Benchmarking PyTorch: add # Mode: Eager # Name: add_M1_N8_K8_bwd1 # Input: M: 1, N: 8, K: 8 Backward Execution Time (us) : 727.915 # Benchmarking PyTorch: add # Mode: Eager # Name: add_M1_N8_K8_bwd2 # Input: M: 1, N: 8, K: 8 Backward Execution Time (us) : 687.626 ``` Reviewed By: zheng-xq Differential Revision: D16450355 fbshipit-source-id: 50ae0916e81c3ff9f0c482ed6d386319eb15b305
3c986dff · Mingzhe Li · Facebook Github Bot · 41dfe720 · 3c986dff · 3c986dff
--- a/benchmarks/operator_benchmark/benchmark_caffe2.py
+++ b/benchmarks/operator_benchmark/benchmark_caffe2.py
@@ -25,6 +25,11 @@ class Caffe2BenchmarkBase(object):
    def __init__(self):
        self.args = {}
        self.user_provided_name = None
+        self._num_inputs_require_grads = 0
+        self._pass_count = 0
+
+    def _set_backward_test(self, is_backward):
+        pass

    def _device_option(self, device):
        """ This method is used to set device option.

--- a/benchmarks/operator_benchmark/benchmark_pytorch.py
+++ b/benchmarks/operator_benchmark/benchmark_pytorch.py
@@ -24,6 +24,34 @@ class TorchBenchmarkBase(object):
    def __init__(self):
        self.user_given_name = None
        self._jit_forward = None
+        self._pass_count = 0
+        self._num_inputs_require_grads = 0
+
+    def _set_backward_test(self, is_backward):
+        self._is_backward = is_backward
+
+    def auto_set(self):
+        """ This is used to automatically set the require_grad for the backward patch.
+            It is implemented based on two counters. One counter to save the number of 
+            times init has been called. The other counter to save the number of times 
+            this function itself has been called. In the very first time init is called, 
+            this function counts how many inputs require gradient. In each of the 
+            following init calls, this function will return only one true value. 
+            Here is an example: 
+                ...
+                self.v1 = torch.rand(M, N, K, requires_grad=self.auto_set())
+                self.v2 = torch.rand(M, N, K, requires_grad=self.auto_set())
+                ...
+        """
+        if not self._is_backward:
+            return False
+
+        if self._pass_count == 0:
+            self._num_inputs_require_grads += 1
+            return True
+        else: 
+            self._auto_set_counter += 1
+            return (self._pass_count == self._auto_set_counter)

    def forward(self):
        pass 

--- a/benchmarks/operator_benchmark/benchmark_test_generator.py
+++ b/benchmarks/operator_benchmark/benchmark_test_generator.py
@@ -4,13 +4,37 @@ from __future__ import print_function
 from __future__ import unicode_literals

 from collections import namedtuple
-import copy
+import copy 
+import ast
+import json
 from benchmark_core import TestConfig
 from benchmark_caffe2 import register_caffe2_op_test_case
 from benchmark_pytorch import register_pytorch_op_test_case
 from benchmark_utils import SkipInputShape


+def _register_test(bench_op_obj, orig_test_attrs, tags, OperatorTestCase, run_backward, bwd_input): 
+    """ Register tests with the benchmark backend. 
+        Args: 
+            bench_op_obj: an object which instantiated from a subclass of 
+                Caffe2BenchmarkBase/TorchBenchmarkBase which includes tensor
+                creation and operator execution.
+            test_attrs: a dictionary includes test configs. 
+            tags: a attribute in test config to filter inputs 
+            OperatorTestCase: a named tuple to save the metadata of an test
+            run_backward: a bool parameter indicating backward path
+    """
+    test_attrs = copy.deepcopy(orig_test_attrs)
+    test_attrs = {k: str(v) for k, v in test_attrs.items()}
+    ascii_test_attrs = ast.literal_eval(json.dumps(test_attrs))
+    input_config = str(ascii_test_attrs)[1:-1].replace('\'', '')
+    if bwd_input: 
+        # When auto_set is used, the test name needs to include input.  
+        test_attrs.update({'bwd': bwd_input})
+    test_name = bench_op_obj.test_name(**test_attrs)
+    test_config = TestConfig(test_name, input_config, tags, run_backward)
+    OperatorTestCase(bench_op_obj, test_config)
+
 def _generate_test(configs, bench_op, OperatorTestCase, run_backward, op_name_function=None):
    """Generate PyTorch/Caffe2 tests of operators with different inputs.
       Args:
@@ -38,6 +62,7 @@ def _generate_test(configs, bench_op, OperatorTestCase, run_backward, op_name_fu
            raise ValueError("Missing tags in configs")
        input_config = str(test_attrs)[1:-1].replace('\'', '')
        op = bench_op()
+        assert op is not None, "Can't create test"
        tensor_error_info = None
        # op_name_function is a dictionary which has op_name and op_function.
        # an example of op_name_function is:
@@ -49,18 +74,36 @@ def _generate_test(configs, bench_op, OperatorTestCase, run_backward, op_name_fu
            op_name = op_name_function['op_name']
            init_dict.update({'op_func' : op_name_function['op_func']})
            op.set_module_name(op_name)
+
+        op._set_backward_test(run_backward)
        try:
            op.init(**init_dict)
        except SkipInputShape:
            print("Skipping: Config<{}> is not valid for op<{}>".format(input_config, op.module_name()))
            continue
-        test_name = op.test_name(**test_attrs)
-        input_config = str(test_attrs)[1:-1].replace('\'', '')
-        test_config = TestConfig(test_name, input_config, tags, run_backward)
-        if op is not None:
-            OperatorTestCase(
-                op,
-                test_config)
+
+        input_name = None
+
+        # _num_inputs_require_grads is used to track the number of tensors 
+        # which use auto_set().
+        if op._num_inputs_require_grads > 0: 
+            input_name = 'all'
+        _register_test(op, test_attrs, tags, OperatorTestCase, run_backward, input_name)
+
+        # This for loop is only used when auto_set is used. 
+        # _pass_count counts how many times init has been called. 
+        # _auto_set_counter is reset after init is called. 
+        for i in range(op._num_inputs_require_grads):
+            op._pass_count += 1
+            op._auto_set_counter = 0
+
+            # TODO(mingzhe09088): remove this deepcopy when we encounter 
+            # performance issue. 
+            new_op = copy.deepcopy(op)
+            new_op.init(**init_dict)
+            # Input name index will start from input1
+            input_name = i + 1
+            _register_test(new_op, test_attrs, tags, OperatorTestCase, run_backward, input_name)


 OpMeta = namedtuple("OpMeta", "op_type num_inputs input_dims input_types \

--- a/benchmarks/operator_benchmark/common/tests/pt_backward_test.py
+++ b/benchmarks/operator_benchmark/common/tests/pt_backward_test.py
+from __future__ import absolute_import, division, print_function, unicode_literals
+import operator_benchmark as op_bench
+import torch
+
+
+add_configs = op_bench.cross_product_configs(
+    M=[8, 1],
+    N=[8, 2],
+    K=[8, 4],
+    tags=["short"]
+)
+
+# This benchmark uses the auto_set to automatically set requires_grad
+# for both inputs. The test name can also be used for filtering. 
+class AddBenchmark(op_bench.TorchBenchmarkBase):
+    def init(self, M, N, K): 
+        self.input_one = torch.rand(M, N, K, requires_grad=self.auto_set())
+        self.input_two = torch.rand(M, N, K, requires_grad=self.auto_set())
+        self.set_module_name("add")
+
+    def forward(self):
+        return torch.add(self.input_one, self.input_two)
+
+
+op_bench.generate_pt_test(add_configs, AddBenchmark)
+op_bench.generate_pt_gradient_test(add_configs, AddBenchmark)
+
+
+if __name__ == "__main__":
+    op_bench.benchmark_runner.main()