From c19f824391258d78cc123b621b5e5f0b458cf12d Mon Sep 17 00:00:00 2001 From: Simon Fan Date: Thu, 23 Jan 2025 12:29:42 -0800 Subject: [PATCH] update compile time benchmarks to dump compile times to stdout and csv (#145447) Summary: ```python # inductor.csv dev,name,batch_size,accuracy,calls_captured,unique_graphs,graph_breaks,unique_graph_breaks,autograd_captures,autograd_compiles,cudagraph_skips,compilation_latency cuda,cait_m36_384,8,pass,2510,1,0,0,0,0,0,87.705186 ``` ```python loading model: 0it [01:27, ?it/s] cuda eval cait_m36_384 Compilation time (from dynamo_timed): 87.705186276 # <---------------- pass TIMING: _recursive_pre_grad_passes:0.11023 pad_mm_benchmark:0.50341 _recursive_joint_graph_passes:3.88557 _recursive_post_grad_passes:6.71182 async_compile.wait:4.16914 code_gen:17.57586 inductor_compile:42.55769 backend_compile:72.47122 entire_frame_compile:87.70519 gc:0.00112 total_wall_time:87.70519 STATS: call_* op count: 2510 | FakeTensorMode.__torch_dispatch__:101743 | FakeTensor.__torch_dispatch__:12959 | ProxyTorchDispatchMode.__torch_dispatch__:41079 Dynamo produced 1 graphs covering 2510 ops with 0 graph breaks (0 unique) ``` X-link: https://github.com/pytorch/pytorch/pull/145447 Approved by: https://github.com/ezyang Reviewed By: izaitsevfb Differential Revision: D68570811 fbshipit-source-id: c7101c08a3435fa3567bce505f73eda86d056d63 --- userbenchmark/dynamo/dynamobench/common.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/userbenchmark/dynamo/dynamobench/common.py b/userbenchmark/dynamo/dynamobench/common.py index 84c8944c68..c26fa6a1d7 100644 --- a/userbenchmark/dynamo/dynamobench/common.py +++ b/userbenchmark/dynamo/dynamobench/common.py @@ -538,6 +538,8 @@ def output_signpost(data, args, suite, error=None): from torch._dynamo.utils import calculate_time_spent, compilation_time_metrics + wall_time_by_phase = calculate_time_spent() + open_source_signpost( subsystem="dynamo_benchmark", name=event_name, @@ -550,7 +552,7 @@ def output_signpost(data, args, suite, error=None): # NB: Externally, compilation_metrics colloquially refers to # the coarse-grained phase timings, even though internally # they are called something else - "compilation_metrics": calculate_time_spent(), + "compilation_metrics": wall_time_by_phase, "agg_compilation_metrics": { k: sum(v) for k, v in compilation_time_metrics.items() }, @@ -563,6 +565,8 @@ def output_signpost(data, args, suite, error=None): ), ) + return wall_time_by_phase["total_wall_time"] + def nothing(f): return f @@ -2917,13 +2921,17 @@ def record_status(accuracy_status, dynamo_start_stats): headers.append(k) fields.append(v) - write_outputs(output_filename, headers, fields) - - output_signpost( + total_wall_time = output_signpost( dict(zip(o_headers, o_fields)), self.args, self.suite_name, ) + headers.append("compilation_latency") + fields.append(total_wall_time) + write_outputs(output_filename, headers, fields) + + if self.args.print_compilation_time: + print(f"Compilation time (from dynamo_timed): {total_wall_time}") return accuracy_status