Skip to content

Commit

Permalink
Remove backward perf calculation for RW inference (pytorch#2019)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: pytorch#2019

As titled.

Reviewed By: joshuadeng

Differential Revision: D57568258

fbshipit-source-id: b70f4f87e1848c73ac2959e686da30047f978621
  • Loading branch information
gnahzg authored and facebook-github-bot committed May 21, 2024
1 parent 77d1866 commit ea04dac
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 4 deletions.
8 changes: 8 additions & 0 deletions torchrec/distributed/planner/shard_estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,7 @@ def perf_func_emb_wall_time(
is_pooled=is_pooled,
is_weighted=is_weighted,
expected_cache_fetches=expected_cache_fetches,
is_inference=is_inference,
)
elif sharding_type == ShardingType.TABLE_ROW_WISE.value:
shard_perf = cls._get_twrw_sharding_perf(
Expand Down Expand Up @@ -545,6 +546,7 @@ def _get_rw_sharding_perf(
is_pooled: bool,
is_weighted: bool = False,
expected_cache_fetches: float = 0,
is_inference: bool = False,
) -> Perf:
batch_inputs = (
sum(
Expand Down Expand Up @@ -584,6 +586,12 @@ def _get_rw_sharding_perf(
input_read_size + embedding_lookup_size + fwd_output_write_size
) / device_bw

if is_inference:
# only consider forward compute and comms for inference
return Perf(
fwd_compute=fwd_compute, fwd_comms=fwd_comms, bwd_compute=0, bwd_comms=0
)

bwd_comms = bwd_output_write_size / comms_bw

bwd_batched_copy = bwd_output_write_size * BATCHED_COPY_PERF_FACTOR / device_bw
Expand Down
8 changes: 4 additions & 4 deletions torchrec/distributed/planner/tests/test_shard_estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,11 +404,11 @@ def test_inference_1_table_perf(self) -> None:
("quant", "table_wise"): [0.0001296231579222408],
("quant_uvm", "table_wise"): [0.018350937787224266],
("quant_uvm_caching", "table_wise"): [0.004269758427175579],
("quant", "row_wise"): [0.0001819317157451923, 0.0001819317157451923],
("quant_uvm", "row_wise"): [0.023103601792279417, 0.023103601792279417],
("quant", "row_wise"): [0.000055200413052187844, 0.000055200413052187844],
("quant_uvm", "row_wise"): [0.005261290307138481, 0.005261290307138481],
("quant_uvm_caching", "row_wise"): [
0.005390052899352861,
0.005390052899352861,
0.0012380962042674274,
0.0012380962042674274,
],
("quant", "column_wise"): [0.0001296231579222408],
("quant_uvm", "column_wise"): [0.018350937787224266],
Expand Down

0 comments on commit ea04dac

Please sign in to comment.