Draft refactor of et replay (#110)

Summary: X-link: facebookresearch/HolisticTraceAnalysis#131 Pull Request resolved: #110 Add a new tree structure for et_replay to enable better encapsulation of this code. We keep comms and compute unchanged, only moving et_replay files. ``` [bcoutinho@devgpu038.ftw6 ~/fbsource/fbcode/param_bench/et_replay (d4b11e786)]$ tree . . ├── lib │ ├── et_replay_utils.py │ ├── execution_trace.py │ └── utils.py ├── README.md ├── tests │ ├── inputs │ │ ├── 1.0.3-chakra.0.0.4 │ │ │ └── resnet_1gpu_et.json.gz │ │ ├── 1.1.0-chakra.0.0.4 │ │ │ └── resnet_2gpu_et.json.gz │ │ ├── dlrm_kineto.tar.gz │ │ ├── dlrm_pytorch_et.tar.gz │ │ ├── __init__.py │ │ ├── linear_et.json.gz │ │ ├── linear_kineto.json.gz │ │ ├── resnet_et.json.gz │ │ └── resnet_kineto.json.gz │ └── test_execution_trace.py └── tools ├── et_replay.py └── validate_trace.py ``` Reviewed By: shengfukevin Differential Revision: D56960365 fbshipit-source-id: d2ef172bc6c4629d78222357e616df9bddaec81e
facebookresearch · May 7, 2024 · 9b1946f · 9b1946f
1 parent 2b4cf3e
commit 9b1946f
Show file tree

Hide file tree

Showing 16 changed files with 18 additions and 28 deletions.
diff --git a/train/compute/python/tools/README.md → et_replay/README.md b/train/compute/python/tools/README.md → et_replay/README.md
diff --git a/...n/compute/python/tools/et_replay_utils.py → et_replay/lib/et_replay_utils.py b/...n/compute/python/tools/et_replay_utils.py → et_replay/lib/et_replay_utils.py
@@ -3,9 +3,9 @@
 
 import torch
 from fbgemm_gpu.split_table_batched_embeddings_ops import PoolingMode, WeightDecayMode
+from param_bench.et_replay.lib.execution_trace import NodeType
 
 from param_bench.train.compute.python.lib.pytorch.config_util import create_op_args
-from param_bench.train.compute.python.tools.execution_trace import NodeType
 
 from param_bench.train.compute.python.workloads.pytorch.split_table_batched_embeddings_ops import (
     SplitTableBatchedEmbeddingBagsCodegenInputDataGenerator,
@@ -473,7 +473,7 @@ def generate_prefix(label, skip_nodes, et_input, cuda, compute_only, tf32, rows)
 
 import torch
 from param_bench.train.comms.pt import commsTraceReplay
-from param_bench.train.compute.python.tools.et_replay_utils import (
+from param_bench.et_replay.lib.et_replay_utils import (
     build_fbgemm_func,
     build_torchscript_func,
     generate_fbgemm_tensors,
@@ -482,8 +482,8 @@ def generate_prefix(label, skip_nodes, et_input, cuda, compute_only, tf32, rows)
     is_qualified,
 )
 
-from param_bench.train.compute.python.tools.execution_trace import ExecutionTrace
-from param_bench.train.compute.python.tools.utility import trace_handler
+from param_bench.et_replay.lib.execution_trace import ExecutionTrace
+from param_bench.et_replay.lib.utils import trace_handler
 
 
 print("PyTorch version: ", torch.__version__)

diff --git a/...n/compute/python/tools/execution_trace.py → et_replay/lib/execution_trace.py b/...n/compute/python/tools/execution_trace.py → et_replay/lib/execution_trace.py
@@ -997,8 +997,7 @@ def main():
         execution_data: TextIO
         execution_trace: ExecutionTrace = ExecutionTrace(json.load(execution_data))
         execution_trace.set_iterations(args.step_annotation)
-        # nocommit remove
-        execution_trace = execution_trace.clone_one_iteration(2)
+        # execution_trace = execution_trace.clone_one_iteration(2)
 
         if args.list_op:
             execution_trace.print_op_stats(args.detail, args.json)

diff --git a/train/compute/python/tools/utility.py → et_replay/lib/utils.py b/train/compute/python/tools/utility.py → et_replay/lib/utils.py
@@ -6,7 +6,7 @@
 import uuid
 from typing import Any, Dict
 
-from param_bench.train.compute.python.tools.execution_trace import ExecutionTrace
+from param_bench.et_replay.lib.execution_trace import ExecutionTrace
 
 
 def get_tmp_trace_filename() -> str:

diff --git a/...1.0.3-chakra.0.0.4/resnet_1gpu_et.json.gz → ...1.0.3-chakra.0.0.4/resnet_1gpu_et.json.gz b/...1.0.3-chakra.0.0.4/resnet_1gpu_et.json.gz → ...1.0.3-chakra.0.0.4/resnet_1gpu_et.json.gz
diff --git a/...1.1.0-chakra.0.0.4/resnet_2gpu_et.json.gz → ...1.1.0-chakra.0.0.4/resnet_2gpu_et.json.gz b/...1.1.0-chakra.0.0.4/resnet_2gpu_et.json.gz → ...1.1.0-chakra.0.0.4/resnet_2gpu_et.json.gz
diff --git a/train/compute/python/test/data/__init__.py → et_replay/tests/inputs/__init__.py b/train/compute/python/test/data/__init__.py → et_replay/tests/inputs/__init__.py
diff --git a/...mpute/python/test/data/dlrm_kineto.tar.gz → et_replay/tests/inputs/dlrm_kineto.tar.gz b/...mpute/python/test/data/dlrm_kineto.tar.gz → et_replay/tests/inputs/dlrm_kineto.tar.gz
diff --git a/...e/python/test/data/dlrm_pytorch_et.tar.gz → ...eplay/tests/inputs/dlrm_pytorch_et.tar.gz b/...e/python/test/data/dlrm_pytorch_et.tar.gz → ...eplay/tests/inputs/dlrm_pytorch_et.tar.gz
diff --git a/...ompute/python/test/data/linear_et.json.gz → et_replay/tests/inputs/linear_et.json.gz b/...ompute/python/test/data/linear_et.json.gz → et_replay/tests/inputs/linear_et.json.gz
diff --git a/...te/python/test/data/linear_kineto.json.gz → et_replay/tests/inputs/linear_kineto.json.gz b/...te/python/test/data/linear_kineto.json.gz → et_replay/tests/inputs/linear_kineto.json.gz
diff --git a/...ompute/python/test/data/resnet_et.json.gz → et_replay/tests/inputs/resnet_et.json.gz b/...ompute/python/test/data/resnet_et.json.gz → et_replay/tests/inputs/resnet_et.json.gz
diff --git a/...te/python/test/data/resnet_kineto.json.gz → et_replay/tests/inputs/resnet_kineto.json.gz b/...te/python/test/data/resnet_kineto.json.gz → et_replay/tests/inputs/resnet_kineto.json.gz
diff --git a/...mpute/python/test/test_execution_trace.py → et_replay/tests/test_execution_trace.py b/...mpute/python/test/test_execution_trace.py → et_replay/tests/test_execution_trace.py
diff --git a/train/compute/python/tools/et_replay.py → et_replay/tools/et_replay.py b/train/compute/python/tools/et_replay.py → et_replay/tools/et_replay.py
@@ -12,11 +12,7 @@
 
 import numpy as np
 import torch
-from param_bench.train.comms.pt import comms_utils, commsTraceReplay
-
-from param_bench.train.compute.python.lib import pytorch as lib_pytorch
-from param_bench.train.compute.python.lib.init_helper import load_modules
-from param_bench.train.compute.python.tools.et_replay_utils import (
+from param_bench.et_replay.lib.et_replay_utils import (
     build_fbgemm_func,
     build_torchscript_func,
     build_triton_func,
@@ -40,12 +36,13 @@
     TORCH_DTYPES_RNG_str,
 )
 
-from param_bench.train.compute.python.tools.execution_trace import (
-    ExecutionTrace,
-    NodeType,
-)
+from param_bench.et_replay.lib.execution_trace import ExecutionTrace, NodeType
+
+from param_bench.et_replay.lib.utils import trace_handler
+from param_bench.train.comms.pt import comms_utils, commsTraceReplay
 
-from param_bench.train.compute.python.tools.utility import trace_handler
+from param_bench.train.compute.python.lib import pytorch as lib_pytorch
+from param_bench.train.compute.python.lib.init_helper import load_modules
 from param_bench.train.compute.python.workloads import pytorch as workloads_pytorch
 from torch._inductor.codecache import AsyncCompile, TritonFuture
 
@@ -129,7 +126,7 @@ def __init__(self):
         self.label = ""
 
         try:
-            from param_bench.train.compute.python.tools.fb.internals import (
+            from param_bench.et_replay.lib.fb.internals import (
                 add_internal_label,
                 add_internal_parallel_nodes_parents,
                 add_internal_skip_nodes,
@@ -212,9 +209,7 @@ def initBench(self):
             # Input et trace should be explicitly specified after --input.
             if "://" in self.args.input:
                 try:
-                    from param_bench.train.compute.python.tools.fb.internals import (
-                        read_remote_trace,
-                    )
+                    from param_bench.et_replay.lib.fb.internals import read_remote_trace
                 except ImportError:
                     logging.info("FB internals not present")
                     exit(1)
@@ -239,9 +234,7 @@ def initBench(self):
             # Different processes should read different traces based on global_rank_id.
             if "://" in self.args.trace_path:
                 try:
-                    from param_bench.train.compute.python.tools.fb.internals import (
-                        read_remote_trace,
-                    )
+                    from param_bench.et_replay.lib.fb.internals import read_remote_trace
                 except ImportError:
                     logging.info("FB internals not present")
                     exit(1)
@@ -1507,9 +1500,7 @@ def benchTime(self):
         end_time = datetime.now()
 
         try:
-            from param_bench.train.compute.python.tools.fb.internals import (
-                generate_query_url,
-            )
+            from param_bench.et_replay.lib.fb.internals import generate_query_url
         except ImportError:
             logging.info("FB internals not present")
         else:

diff --git a/train/compute/python/tools/validate_trace.py → et_replay/tools/validate_trace.py b/train/compute/python/tools/validate_trace.py → et_replay/tools/validate_trace.py
@@ -9,7 +9,7 @@
 import gzip
 import json
 
-from .execution_trace import ExecutionTrace
+from param_bench.et_replay.lib.execution_trace import ExecutionTrace
 
 
 class TraceValidator: