Remove backward perf calculation for RW inference (pytorch#2019)

Summary: Pull Request resolved: pytorch#2019 As titled. Reviewed By: joshuadeng Differential Revision: D57568258 fbshipit-source-id: b70f4f87e1848c73ac2959e686da30047f978621
kwanghoon-meta · May 21, 2024 · ea04dac · ea04dac
1 parent 77d1866
commit ea04dac
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 4 deletions.
diff --git a/torchrec/distributed/planner/shard_estimators.py b/torchrec/distributed/planner/shard_estimators.py
@@ -364,6 +364,7 @@ def perf_func_emb_wall_time(
                     is_pooled=is_pooled,
                     is_weighted=is_weighted,
                     expected_cache_fetches=expected_cache_fetches,
+                    is_inference=is_inference,
                 )
             elif sharding_type == ShardingType.TABLE_ROW_WISE.value:
                 shard_perf = cls._get_twrw_sharding_perf(
@@ -545,6 +546,7 @@ def _get_rw_sharding_perf(
         is_pooled: bool,
         is_weighted: bool = False,
         expected_cache_fetches: float = 0,
+        is_inference: bool = False,
     ) -> Perf:
         batch_inputs = (
             sum(
@@ -584,6 +586,12 @@ def _get_rw_sharding_perf(
             input_read_size + embedding_lookup_size + fwd_output_write_size
         ) / device_bw
 
+        if is_inference:
+            # only consider forward compute and comms for inference
+            return Perf(
+                fwd_compute=fwd_compute, fwd_comms=fwd_comms, bwd_compute=0, bwd_comms=0
+            )
+
         bwd_comms = bwd_output_write_size / comms_bw
 
         bwd_batched_copy = bwd_output_write_size * BATCHED_COPY_PERF_FACTOR / device_bw

diff --git a/torchrec/distributed/planner/tests/test_shard_estimators.py b/torchrec/distributed/planner/tests/test_shard_estimators.py
@@ -404,11 +404,11 @@ def test_inference_1_table_perf(self) -> None:
             ("quant", "table_wise"): [0.0001296231579222408],
             ("quant_uvm", "table_wise"): [0.018350937787224266],
             ("quant_uvm_caching", "table_wise"): [0.004269758427175579],
-            ("quant", "row_wise"): [0.0001819317157451923, 0.0001819317157451923],
-            ("quant_uvm", "row_wise"): [0.023103601792279417, 0.023103601792279417],
+            ("quant", "row_wise"): [0.000055200413052187844, 0.000055200413052187844],
+            ("quant_uvm", "row_wise"): [0.005261290307138481, 0.005261290307138481],
             ("quant_uvm_caching", "row_wise"): [
-                0.005390052899352861,
-                0.005390052899352861,
+                0.0012380962042674274,
+                0.0012380962042674274,
             ],
             ("quant", "column_wise"): [0.0001296231579222408],
             ("quant_uvm", "column_wise"): [0.018350937787224266],