Debugged issues with saving gradient: Issue #36

johnmarktaylor91 · Jan 23, 2025 · 94e3d2b · 94e3d2b
1 parent b1a63a4
commit 94e3d2b
Show file tree

Hide file tree

Showing 4 changed files with 21 additions and 23 deletions.
diff --git a/setup.py b/setup.py
@@ -14,7 +14,7 @@
 
 setup(
     name="torchlens",
-    version="0.1.28",
+    version="0.1.29",
     description="A package for extracting activations from PyTorch models",
     long_description="A package for extracting activations from PyTorch models. Contains functionality for "
                      "extracting model activations, visualizing a model's computational graph, and "

diff --git a/torchlens/constants.py b/torchlens/constants.py
@@ -152,10 +152,10 @@
     "grad_contents",
     "save_gradients",
     "has_saved_grad",
-    "grad_shapes",
-    "grad_dtypes",
-    "grad_fsizes",
-    "grad_fsizes_nice",
+    "grad_shape",
+    "grad_dtype",
+    "grad_fsize",
+    "grad_fsize_nice",
     # Function call info
     "func_applied",
     "func_applied_name",

diff --git a/torchlens/logging_funcs.py b/torchlens/logging_funcs.py
@@ -167,10 +167,10 @@ def log_source_tensor_exhaustive(
         "grad_contents": None,
         "save_gradients": self.save_gradients,
         "has_saved_grad": False,
-        "grad_shapes": None,
-        "grad_dtypes": None,
-        "grad_fsizes": 0,
-        "grad_fsizes_nice": human_readable_size(0),
+        "grad_shape": None,
+        "grad_dtype": None,
+        "grad_fsize": 0,
+        "grad_fsize_nice": human_readable_size(0),
         # Function call info:
         "func_applied": None,
         "func_applied_name": "none",
@@ -416,10 +416,10 @@ def log_function_output_tensors_exhaustive(
     fields_dict["grad_contents"] = None
     fields_dict["save_gradients"] = self.save_gradients
     fields_dict["has_saved_grad"] = False
-    fields_dict["grad_shapes"] = None
-    fields_dict["grad_dtypes"] = None
-    fields_dict["grad_fsizes"] = 0
-    fields_dict["grad_fsizes_nice"] = human_readable_size(0)
+    fields_dict["grad_shape"] = None
+    fields_dict["grad_dtype"] = None
+    fields_dict["grad_fsize"] = 0
+    fields_dict["grad_fsize_nice"] = human_readable_size(0)
 
     # Function call info
     fields_dict["func_applied"] = func

diff --git a/torchlens/tensor_log.py b/torchlens/tensor_log.py
@@ -80,10 +80,10 @@ def __init__(self, fields_dict: Dict):
         self.grad_contents = fields_dict["grad_contents"]
         self.save_gradients = fields_dict["save_gradients"]
         self.has_saved_grad = fields_dict["has_saved_grad"]
-        self.grad_shapes = fields_dict["grad_shapes"]
-        self.grad_dtypes = fields_dict["grad_dtypes"]
-        self.grad_fsizes = fields_dict["grad_fsizes"]
-        self.grad_fsizes_nice = fields_dict["grad_fsizes_nice"]
+        self.grad_shape = fields_dict["grad_shape"]
+        self.grad_dtype = fields_dict["grad_dtype"]
+        self.grad_fsize = fields_dict["grad_fsize"]
+        self.grad_fsize_nice = fields_dict["grad_fsize_nice"]
 
         # Function call info:
         self.func_applied = fields_dict["func_applied"]
@@ -298,12 +298,10 @@ def log_tensor_grad(self, grad: torch.Tensor):
         """
         self.grad_contents = grad
         self.has_saved_grad = True
-        self.grad_shapes = [g.shape for g in grad]
-        self.grad_dtypes = [g.dtype for g in grad]
-        self.grad_fsizes = [get_tensor_memory_amount(g) for g in grad]
-        self.grad_fsizes_nice = [
-            human_readable_size(get_tensor_memory_amount(g)) for g in grad
-        ]
+        self.grad_shape = grad.shape
+        self.grad_dtype = grad.dtype
+        self.grad_fsize = get_tensor_memory_amount(grad)
+        self.grad_fsize_nice = human_readable_size(get_tensor_memory_amount(grad))
 
     # ********************************************
     # ************* Fetcher Functions ************