Skip to content

Commit

Permalink
update compile time benchmarks to dump compile times to stdout and cs…
Browse files Browse the repository at this point in the history
…v (#145447)

Summary:
```python
# inductor.csv
dev,name,batch_size,accuracy,calls_captured,unique_graphs,graph_breaks,unique_graph_breaks,autograd_captures,autograd_compiles,cudagraph_skips,compilation_latency
cuda,cait_m36_384,8,pass,2510,1,0,0,0,0,0,87.705186
```

```python
loading model: 0it [01:27, ?it/s]
cuda eval  cait_m36_384
Compilation time (from dynamo_timed): 87.705186276  # <----------------
pass
TIMING: _recursive_pre_grad_passes:0.11023 pad_mm_benchmark:0.50341 _recursive_joint_graph_passes:3.88557 _recursive_post_grad_passes:6.71182 async_compile.wait:4.16914 code_gen:17.57586 inductor_compile:42.55769 backend_compile:72.47122 entire_frame_compile:87.70519 gc:0.00112 total_wall_time:87.70519
STATS: call_* op count: 2510 | FakeTensorMode.__torch_dispatch__:101743 | FakeTensor.__torch_dispatch__:12959 | ProxyTorchDispatchMode.__torch_dispatch__:41079
Dynamo produced 1 graphs covering 2510 ops with 0 graph breaks (0 unique)
```

X-link: pytorch/pytorch#145447
Approved by: https://github.com/ezyang

Reviewed By: izaitsevfb

Differential Revision: D68570811

fbshipit-source-id: c7101c08a3435fa3567bce505f73eda86d056d63
  • Loading branch information
xmfan authored and facebook-github-bot committed Jan 23, 2025
1 parent 412a0f3 commit c19f824
Showing 1 changed file with 12 additions and 4 deletions.
16 changes: 12 additions & 4 deletions userbenchmark/dynamo/dynamobench/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,8 @@ def output_signpost(data, args, suite, error=None):

from torch._dynamo.utils import calculate_time_spent, compilation_time_metrics

wall_time_by_phase = calculate_time_spent()

open_source_signpost(
subsystem="dynamo_benchmark",
name=event_name,
Expand All @@ -550,7 +552,7 @@ def output_signpost(data, args, suite, error=None):
# NB: Externally, compilation_metrics colloquially refers to
# the coarse-grained phase timings, even though internally
# they are called something else
"compilation_metrics": calculate_time_spent(),
"compilation_metrics": wall_time_by_phase,
"agg_compilation_metrics": {
k: sum(v) for k, v in compilation_time_metrics.items()
},
Expand All @@ -563,6 +565,8 @@ def output_signpost(data, args, suite, error=None):
),
)

return wall_time_by_phase["total_wall_time"]


def nothing(f):
return f
Expand Down Expand Up @@ -2917,13 +2921,17 @@ def record_status(accuracy_status, dynamo_start_stats):
headers.append(k)
fields.append(v)

write_outputs(output_filename, headers, fields)

output_signpost(
total_wall_time = output_signpost(
dict(zip(o_headers, o_fields)),
self.args,
self.suite_name,
)
headers.append("compilation_latency")
fields.append(total_wall_time)
write_outputs(output_filename, headers, fields)

if self.args.print_compilation_time:
print(f"Compilation time (from dynamo_timed): {total_wall_time}")

return accuracy_status

Expand Down

0 comments on commit c19f824

Please sign in to comment.