bugfix: update ggml codegen to support using cached results in operat…

…ions
synnada-ai · Mar 9, 2025 · 3353388 · 3353388
1 parent 9b2f289
commit 3353388
Show file tree

Hide file tree

Showing 2 changed files with 106 additions and 4 deletions.
diff --git a/mithril/framework/codegen/ggml_gen.py b/mithril/framework/codegen/ggml_gen.py
@@ -182,8 +182,8 @@ def update_function(
         # Create tensors
         init_block.append(c_ast.Comment("Create tensors only once"))  # type: ignore
         for key in self.determined_struct_keys[f"{fn_ref_name}_input_keys"]:
-            # If key statically inferred, skip tensor creation
-            if key in self.determined_struct_keys[f"{fn_ref_name}_output_keys"]:
+            # If key is in cache, skip tensor creation
+            if key in self.determined_struct_keys[f"{fn_ref_name}_cache_keys"]:
                 continue
             shape = self._get_tensor_shape(key)
             if shape is not None:
@@ -193,6 +193,18 @@ def update_function(
                 )
                 init_block.append(c_ast.Assign(c_ast.Variable(key), tensor))  # type: ignore
 
+        # Create tensors for static keys if they are going to be used in other operations
+        for out_key in self.determined_struct_keys[f"{fn_ref_name}_cache_keys"]:
+            if (out_key in self.determined_struct_keys[f"{fn_ref_name}_input_keys"]
+                and out_key not in self.determined_struct_keys[f"{fn_ref_name}_output_keys"]
+            ):
+                shape = self._get_tensor_shape(key)
+                tensor = c_ast.Call(
+                    f"ggml_new_tensor_{len(shape)}d",
+                    [ctx_name, "GGML_TYPE_F32"] + [str(size) for size in shape],
+                )
+                init_block.append(c_ast.Assign(self.create_key_ref(out_key, context=fn_ref_name), tensor))  # type: ignore
+
         # Create and build graph
         init_block.extend(
             [
@@ -233,14 +245,28 @@ def update_function(
         update_ptr_block: ast_block_type = []
         update_ptr_block.append(c_ast.Comment("Update tensor data for each call"))  # type: ignore
         for key in self.determined_struct_keys[f"{fn_ref_name}_input_keys"]:
-            # If key is statically inferred, assign to output directly
-            if key in self.determined_struct_keys[f"{fn_ref_name}_output_keys"]:
+            # If cached value is not going to be used in another operation,
+            # assign directly to output.
+            if (key in self.determined_struct_keys[f"{fn_ref_name}_cache_keys"]
+                and key in self.determined_struct_keys[f"{fn_ref_name}_output_keys"]
+                ):
                 update_ptr_block.append(
                     c_ast.Assign(  # type: ignore
                         self.create_key_ref(key, context=fn_ref_name),
                         c_ast.Arrow(c_ast.Variable("inputs"), f"{key}")
                     )
                 )
+            # If cached value is an input to another operation, retrieve 
+            # data from input.
+            elif(key in self.determined_struct_keys[f"{fn_ref_name}_cache_keys"]
+                and key not in self.determined_struct_keys[f"{fn_ref_name}_output_keys"]
+            ):
+                update_ptr_block.append(
+                    c_ast.Assign(  # type: ignore
+                        c_ast.Arrow(self.create_key_ref(key, context=fn_ref_name), "data"),
+                        c_ast.Arrow(c_ast.Arrow(c_ast.Variable("inputs"), key), "data"),
+                    )
+                )
             else:
                 update_ptr_block.append(
                     c_ast.Assign(  # type: ignore

diff --git a/tests/scripts/test_c_static_inference.py b/tests/scripts/test_c_static_inference.py
@@ -364,3 +364,79 @@ def test_c_static_inference_5():
         assert np.allclose(c_backend.to_numpy(out), out_np)
         assert np.allclose(ggml_backend.to_numpy(out_ggml), out_np)
 
+
+def test_c_static_inference_6():
+    """
+    Test static inference support when outputs of 
+    model are used in other operation with partial static inputs
+    """
+    model = Model()
+
+    model += Add()(left="left", right="right", output="output")
+    model |=Multiply()(left="left1", right="right1", output="output2")
+    model |= Add()(left="output2", right="output", output="output3")
+    model.set_types(left=Tensor, right=Tensor, left1=Tensor, right1=Tensor)
+
+    c_backend = CBackend()
+    np_backend = NumpyBackend()
+    ggml_backend = GGMLBackend()
+
+    left_static = np.ones((5, 5), dtype=np.float32)
+    right_static = np.ones((5, 5), dtype=np.float32)
+
+    c_pm = compile(
+        model,
+        c_backend,
+        shapes={"left1": [5, 5], "right1": [5, 5] },
+        constant_keys={
+            "left": c_backend.array(left_static),
+            "right": c_backend.array(right_static),
+        },
+        jit=False,
+        inference=True
+    )
+
+    np_pm = compile(
+        model,
+        np_backend,
+        trainable_keys={"left1", "right1"},
+        constant_keys={
+            "left": left_static,
+            "right": right_static,
+        },
+        jit=False,
+        inference=True
+    )
+
+    ggml_pm = compile(
+        model,
+        ggml_backend,
+        shapes={"left1": [5, 5], "right1": [5, 5] },
+        constant_keys={
+            "left": ggml_backend.array(left_static),
+            "right": ggml_backend.array(right_static),
+        },
+        jit=False,
+        inference=True
+    )
+
+    # Numpy Backend
+    np_outputs = np_pm.evaluate({"left1": left_static, "right1": right_static})
+
+    # Raw C Backend
+    c_left = c_backend.array(left_static)
+    c_right = c_backend.array(right_static)
+    c_outputs = c_pm.evaluate({"left1": c_left, "right1": c_right})
+
+    # GGML Backend
+    ggml_left1 = ggml_backend.array(left_static)
+    ggml_right1 = ggml_backend.array(right_static)
+    ggml_outputs = ggml_pm.evaluate({"left1": ggml_left1, "right1": ggml_right1})
+
+    # Assertions
+    for key in np_outputs:
+        out = c_outputs[key]
+        out_ggml = ggml_outputs[key]
+        out_np = np_outputs[key]
+        assert np.allclose(c_backend.to_numpy(out), out_np)
+        assert np.allclose(ggml_backend.to_numpy(out_ggml), out_np)