From be908b3d7315c357f07102d71ef8f242cbb20331 Mon Sep 17 00:00:00 2001
From: Animesh Jain <anijain@meta.com>
Date: Mon, 27 Jan 2025 12:48:25 -0800
Subject: [PATCH] Log guard latency (#145132)

Summary:
X-link: https://github.com/pytorch/pytorch/pull/145132
Approved by: https://github.com/ezyang
ghstack dependencies: #145509

Reviewed By: ZainRizvi

Differential Revision: D68685480

fbshipit-source-id: fe35b627407e32a580f78027562b092083043d99
---
 .../dynamo/dynamobench/_dynamo/testing.py     | 48 ++++++++++---------
 .../dynamo/dynamobench/_dynamo/utils.py       |  1 +
 2 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/userbenchmark/dynamo/dynamobench/_dynamo/testing.py b/userbenchmark/dynamo/dynamobench/_dynamo/testing.py
index 6474cceb2f..89f1d538c0 100644
--- a/userbenchmark/dynamo/dynamobench/_dynamo/testing.py
+++ b/userbenchmark/dynamo/dynamobench/_dynamo/testing.py
@@ -181,30 +181,32 @@ def insert_nops(instructions: list[Any], code_options: Any) -> None:
         instructions.insert(0, create_instruction("NOP"))
         instructions.insert(0, create_instruction("NOP"))
 
-    if is_generator(frame.f_code):
-        return None
-
-    debug_checks(frame.f_code)
-    code = transform_code_object(frame.f_code, insert_nops)
-    graph = OutputGraph(
-        code_options={},
-        compiler_fn=None,
-        root_tx=None,
-        export=False,
-        export_constraints=None,
-        frame_state={"_id": 0},
-        # TODO: shouldn't this be f_locals/f_globals from frame?
-        local_scope=locals(),
-        global_scope=globals(),
-        f_code=frame.f_code,
-        torch_function_mode_stack=[],
-    )
+    metrics_context = torch._dynamo.utils.get_metrics_context()
+    with torch._dynamo.utils.dynamo_timed("debug_insert_nops"), metrics_context:
+        if is_generator(frame.f_code):
+            return None
+
+        debug_checks(frame.f_code)
+        code = transform_code_object(frame.f_code, insert_nops)
+        graph = OutputGraph(
+            code_options={},
+            compiler_fn=None,
+            root_tx=None,
+            export=False,
+            export_constraints=None,
+            frame_state={"_id": 0},
+            # TODO: shouldn't this be f_locals/f_globals from frame?
+            local_scope=locals(),
+            global_scope=globals(),
+            f_code=frame.f_code,
+            torch_function_mode_stack=[],
+        )
 
-    return GuardedCode(
-        code,
-        CheckFunctionManager(frame.f_code, graph).guard_manager,  # type: ignore[arg-type]
-        CompileId(frame_id=0, frame_compile_id=0),
-    )
+        return GuardedCode(
+            code,
+            CheckFunctionManager(frame.f_code, graph).guard_manager,  # type: ignore[arg-type]
+            CompileId(frame_id=0, frame_compile_id=0),
+        )
 
 
 class CompileCounter:
diff --git a/userbenchmark/dynamo/dynamobench/_dynamo/utils.py b/userbenchmark/dynamo/dynamobench/_dynamo/utils.py
index 3079ec5f2d..5d6fe5e13a 100644
--- a/userbenchmark/dynamo/dynamobench/_dynamo/utils.py
+++ b/userbenchmark/dynamo/dynamobench/_dynamo/utils.py
@@ -1186,6 +1186,7 @@ class CompilationMetrics:
     tensorify_float_attempt: Optional[bool] = None
     tensorify_float_success: Optional[bool] = None
     tensorify_float_failure: Optional[set[str]] = None
+    guard_latency_us: Optional[float] = None
 
     @classmethod
     def create(cls, metrics: dict[str, Any]):