Skip to content

Commit

Permalink
bugfix: update ggml codegen to support using cached results in operat…
Browse files Browse the repository at this point in the history
…ions
  • Loading branch information
emrecakmakyurdu committed Mar 9, 2025
1 parent 9b2f289 commit 3353388
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 4 deletions.
34 changes: 30 additions & 4 deletions mithril/framework/codegen/ggml_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,8 @@ def update_function(
# Create tensors
init_block.append(c_ast.Comment("Create tensors only once")) # type: ignore
for key in self.determined_struct_keys[f"{fn_ref_name}_input_keys"]:
# If key statically inferred, skip tensor creation
if key in self.determined_struct_keys[f"{fn_ref_name}_output_keys"]:
# If key is in cache, skip tensor creation
if key in self.determined_struct_keys[f"{fn_ref_name}_cache_keys"]:
continue
shape = self._get_tensor_shape(key)
if shape is not None:
Expand All @@ -193,6 +193,18 @@ def update_function(
)
init_block.append(c_ast.Assign(c_ast.Variable(key), tensor)) # type: ignore

# Create tensors for static keys if they are going to be used in other operations
for out_key in self.determined_struct_keys[f"{fn_ref_name}_cache_keys"]:
if (out_key in self.determined_struct_keys[f"{fn_ref_name}_input_keys"]
and out_key not in self.determined_struct_keys[f"{fn_ref_name}_output_keys"]
):
shape = self._get_tensor_shape(key)
tensor = c_ast.Call(
f"ggml_new_tensor_{len(shape)}d",
[ctx_name, "GGML_TYPE_F32"] + [str(size) for size in shape],
)
init_block.append(c_ast.Assign(self.create_key_ref(out_key, context=fn_ref_name), tensor)) # type: ignore

# Create and build graph
init_block.extend(
[
Expand Down Expand Up @@ -233,14 +245,28 @@ def update_function(
update_ptr_block: ast_block_type = []
update_ptr_block.append(c_ast.Comment("Update tensor data for each call")) # type: ignore
for key in self.determined_struct_keys[f"{fn_ref_name}_input_keys"]:
# If key is statically inferred, assign to output directly
if key in self.determined_struct_keys[f"{fn_ref_name}_output_keys"]:
# If cached value is not going to be used in another operation,
# assign directly to output.
if (key in self.determined_struct_keys[f"{fn_ref_name}_cache_keys"]
and key in self.determined_struct_keys[f"{fn_ref_name}_output_keys"]
):
update_ptr_block.append(
c_ast.Assign( # type: ignore
self.create_key_ref(key, context=fn_ref_name),
c_ast.Arrow(c_ast.Variable("inputs"), f"{key}")
)
)
# If cached value is an input to another operation, retrieve
# data from input.
elif(key in self.determined_struct_keys[f"{fn_ref_name}_cache_keys"]
and key not in self.determined_struct_keys[f"{fn_ref_name}_output_keys"]
):
update_ptr_block.append(
c_ast.Assign( # type: ignore
c_ast.Arrow(self.create_key_ref(key, context=fn_ref_name), "data"),
c_ast.Arrow(c_ast.Arrow(c_ast.Variable("inputs"), key), "data"),
)
)
else:
update_ptr_block.append(
c_ast.Assign( # type: ignore
Expand Down
76 changes: 76 additions & 0 deletions tests/scripts/test_c_static_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,3 +364,79 @@ def test_c_static_inference_5():
assert np.allclose(c_backend.to_numpy(out), out_np)
assert np.allclose(ggml_backend.to_numpy(out_ggml), out_np)


def test_c_static_inference_6():
"""
Test static inference support when outputs of
model are used in other operation with partial static inputs
"""
model = Model()

model += Add()(left="left", right="right", output="output")
model |=Multiply()(left="left1", right="right1", output="output2")
model |= Add()(left="output2", right="output", output="output3")
model.set_types(left=Tensor, right=Tensor, left1=Tensor, right1=Tensor)

c_backend = CBackend()
np_backend = NumpyBackend()
ggml_backend = GGMLBackend()

left_static = np.ones((5, 5), dtype=np.float32)
right_static = np.ones((5, 5), dtype=np.float32)

c_pm = compile(
model,
c_backend,
shapes={"left1": [5, 5], "right1": [5, 5] },
constant_keys={
"left": c_backend.array(left_static),
"right": c_backend.array(right_static),
},
jit=False,
inference=True
)

np_pm = compile(
model,
np_backend,
trainable_keys={"left1", "right1"},
constant_keys={
"left": left_static,
"right": right_static,
},
jit=False,
inference=True
)

ggml_pm = compile(
model,
ggml_backend,
shapes={"left1": [5, 5], "right1": [5, 5] },
constant_keys={
"left": ggml_backend.array(left_static),
"right": ggml_backend.array(right_static),
},
jit=False,
inference=True
)

# Numpy Backend
np_outputs = np_pm.evaluate({"left1": left_static, "right1": right_static})

# Raw C Backend
c_left = c_backend.array(left_static)
c_right = c_backend.array(right_static)
c_outputs = c_pm.evaluate({"left1": c_left, "right1": c_right})

# GGML Backend
ggml_left1 = ggml_backend.array(left_static)
ggml_right1 = ggml_backend.array(right_static)
ggml_outputs = ggml_pm.evaluate({"left1": ggml_left1, "right1": ggml_right1})

# Assertions
for key in np_outputs:
out = c_outputs[key]
out_ggml = ggml_outputs[key]
out_np = np_outputs[key]
assert np.allclose(c_backend.to_numpy(out), out_np)
assert np.allclose(ggml_backend.to_numpy(out_ggml), out_np)

0 comments on commit 3353388

Please sign in to comment.