From 9b2f2899f80ab258e5961f7f0577766138a5ffb0 Mon Sep 17 00:00:00 2001 From: emrecakmakyurdu Date: Sun, 9 Mar 2025 03:17:35 +0300 Subject: [PATCH] add static inference support for rawc and ggml backends --- .../with_manualgrad/c_backend/backend.py | 15 +- .../with_manualgrad/ggml_backend/backend.py | 12 +- mithril/cores/c/array.py | 6 +- mithril/cores/c/ggml/ops.py | 53 +++ mithril/cores/c/raw_c/ops.py | 32 ++ mithril/framework/codegen/ggml_gen.py | 34 +- mithril/framework/physical/model.py | 2 +- tests/scripts/test_c_static_inference.py | 366 ++++++++++++++++++ 8 files changed, 508 insertions(+), 12 deletions(-) create mode 100644 mithril/cores/c/ggml/ops.py create mode 100644 mithril/cores/c/raw_c/ops.py create mode 100644 tests/scripts/test_c_static_inference.py diff --git a/mithril/backends/with_manualgrad/c_backend/backend.py b/mithril/backends/with_manualgrad/c_backend/backend.py index 44ac609c..20c6a2de 100644 --- a/mithril/backends/with_manualgrad/c_backend/backend.py +++ b/mithril/backends/with_manualgrad/c_backend/backend.py @@ -20,14 +20,19 @@ from .... import types from ....cores.c.array import PyArray -from ....cores.c.raw_c import array +from ....cores.c.raw_c import array, ops from ...backend import Backend from ...utils import process_shape +from ....common import BiMap from . import utils __all__ = ["CBackend"] - +dtype_map: BiMap[str, Any] = BiMap( + { + "float32": np.float32, + } +) class CBackend(Backend[PyArray]): backend_type = "c" SRC_PATH = os.path.join( @@ -37,8 +42,10 @@ class CBackend(Backend[PyArray]): def __init__(self) -> None: self._device = "cpu" - self.primitive_function_dict = {} - + self.primitive_function_dict = ops.primitive_func_dict + self.dtype_map = dtype_map + self.registered_primitives = {} + self.array_creation_funcs = {} @property def is_manualgrad(self) -> bool: return True diff --git a/mithril/backends/with_manualgrad/ggml_backend/backend.py b/mithril/backends/with_manualgrad/ggml_backend/backend.py index f588bff8..cefa258f 100644 --- a/mithril/backends/with_manualgrad/ggml_backend/backend.py +++ b/mithril/backends/with_manualgrad/ggml_backend/backend.py @@ -26,9 +26,16 @@ from ...utils import process_shape from ..c_backend.utils import from_numpy from . import utils +from ....cores.c.ggml import ops +from ....common import BiMap __all__ = ["GGMLBackend"] +dtype_map: BiMap[str, Any] = BiMap( + { + "float32": np.float32, + } +) class GGMLBackend(Backend[PyArray]): backend_type = "c" @@ -39,7 +46,10 @@ class GGMLBackend(Backend[PyArray]): def __init__(self) -> None: self._device = "cpu" - self.primitive_function_dict = {} + self.primitive_function_dict = ops.primitive_func_dict + self.dtype_map = dtype_map + self.registered_primitives = {} + self.array_creation_funcs = {} @property def is_manualgrad(self) -> bool: diff --git a/mithril/cores/c/array.py b/mithril/cores/c/array.py index 24a9e81e..057205e9 100644 --- a/mithril/cores/c/array.py +++ b/mithril/cores/c/array.py @@ -14,7 +14,7 @@ import ctypes from collections.abc import Sequence - +import numpy as np class PyArray: def __init__(self, arr: ctypes.Structure, shape: tuple[int, ...] | list[int]): @@ -30,6 +30,10 @@ def __init__(self, arr: ctypes.Structure, shape: tuple[int, ...] | list[int]): # def __del__(self): # lib.delete_struct(self.arr) + @property + def dtype(self) -> type: + return np.float32 + @property def data(self) -> Sequence[int | Sequence[int | Sequence[int]]]: total_elements = 1 diff --git a/mithril/cores/c/ggml/ops.py b/mithril/cores/c/ggml/ops.py new file mode 100644 index 00000000..730cb4c7 --- /dev/null +++ b/mithril/cores/c/ggml/ops.py @@ -0,0 +1,53 @@ +import ctypes +import os +from ....cores.c.array import PyArray +from ....cores.c.raw_c.array import ( + Array, + zeros, + lib +) +from ....backends.with_manualgrad.c_backend.backend import array +from ....backends.with_manualgrad.c_backend.utils import from_numpy +from ....cores.c.ggml.ggml_core import ggml_struct +import numpy as np + +__all__ = [ + "add", + "multiplication" +] + +def convert_to_c_array( + input: PyArray +) -> PyArray: + input_np = np.array(input.data, dtype=input.dtype) + return from_numpy(input_np) + +def add( + left: PyArray, + right: PyArray +) -> PyArray: + # In C backend, output is given as first input + output = zeros(left.shape) + left_c = convert_to_c_array(left) + right_c = convert_to_c_array(right) + lib.add(ctypes.byref(output.arr), ctypes.byref(left_c.arr), ctypes.byref(right_c.arr)) + _shape = output.shape + data_ptr = ctypes.cast(output.arr.data, ctypes.c_void_p) + return PyArray(ggml_struct(data=data_ptr), _shape) + +def multiplication( + left: PyArray, + right: PyArray +) -> PyArray: + # In C backend, output is given as first input + output = zeros(left.shape) + left_c = convert_to_c_array(left) + right_c = convert_to_c_array(right) + lib.multiplication(ctypes.byref(output.arr), ctypes.byref(left_c.arr), ctypes.byref(right_c.arr)) + _shape = output.shape + data_ptr = ctypes.cast(output.arr.data, ctypes.c_void_p) + return PyArray(ggml_struct(data=data_ptr), _shape) + + + +primitive_func_dict = {key: fn for key, fn in globals().items() if callable(fn)} \ No newline at end of file diff --git a/mithril/cores/c/raw_c/ops.py b/mithril/cores/c/raw_c/ops.py new file mode 100644 index 00000000..b3197a93 --- /dev/null +++ b/mithril/cores/c/raw_c/ops.py @@ -0,0 +1,32 @@ +import ctypes +import os +from ....cores.c.array import PyArray +from ....cores.c.raw_c.array import ( + Array, + zeros, + lib +) +__all__ = [ + "add", + "multiplication" +] + +def add( + left: PyArray, + right: PyArray +) -> PyArray: + # In C backend, output is given as first input + output = zeros(left.shape) + lib.add(ctypes.byref(output.arr), ctypes.byref(left.arr), ctypes.byref(right.arr)) + return output + +def multiplication( + left: PyArray, + right: PyArray +) -> PyArray: + # In C backend, output is given as first input + output = zeros(left.shape) + lib.multiplication(ctypes.byref(output.arr), ctypes.byref(left.arr), ctypes.byref(right.arr)) + return output + +primitive_func_dict = {key: fn for key, fn in globals().items() if callable(fn)} \ No newline at end of file diff --git a/mithril/framework/codegen/ggml_gen.py b/mithril/framework/codegen/ggml_gen.py index e7b538fb..0773a5f2 100644 --- a/mithril/framework/codegen/ggml_gen.py +++ b/mithril/framework/codegen/ggml_gen.py @@ -182,6 +182,9 @@ def update_function( # Create tensors init_block.append(c_ast.Comment("Create tensors only once")) # type: ignore for key in self.determined_struct_keys[f"{fn_ref_name}_input_keys"]: + # If key statically inferred, skip tensor creation + if key in self.determined_struct_keys[f"{fn_ref_name}_output_keys"]: + continue shape = self._get_tensor_shape(key) if shape is not None: tensor = c_ast.Call( @@ -206,6 +209,9 @@ def update_function( # Build graph for out_key in self.determined_struct_keys[f"{fn_ref_name}_output_keys"]: + # If key is statically inferred, skip marking + if out_key in self.determined_struct_keys[f"{fn_ref_name}_input_keys"]: + continue init_block.append( c_ast.MakeStmt( # type: ignore c_ast.Call( @@ -227,12 +233,21 @@ def update_function( update_ptr_block: ast_block_type = [] update_ptr_block.append(c_ast.Comment("Update tensor data for each call")) # type: ignore for key in self.determined_struct_keys[f"{fn_ref_name}_input_keys"]: - update_ptr_block.append( - c_ast.Assign( # type: ignore - c_ast.Arrow(c_ast.Variable(f"{key}"), "data"), - c_ast.Arrow(c_ast.Arrow(c_ast.Variable("inputs"), key), "data"), + # If key is statically inferred, assign to output directly + if key in self.determined_struct_keys[f"{fn_ref_name}_output_keys"]: + update_ptr_block.append( + c_ast.Assign( # type: ignore + self.create_key_ref(key, context=fn_ref_name), + c_ast.Arrow(c_ast.Variable("inputs"), f"{key}") + ) + ) + else: + update_ptr_block.append( + c_ast.Assign( # type: ignore + c_ast.Arrow(c_ast.Variable(f"{key}"), "data"), + c_ast.Arrow(c_ast.Arrow(c_ast.Variable("inputs"), key), "data"), + ) ) - ) # Initialization function init_fn = super().define_function( @@ -296,3 +311,12 @@ def create_key_ref( return c_ast.Variable(key) return super().create_key_ref(key, context, load) + + @override + def _determine_struct_keys(self) -> dict[str, list[str]]: + determined_struct_keys = super()._determine_struct_keys() + static_cache_keys = sorted(self.pm.flat_graph.all_static_keys) + if static_cache_keys: + determined_struct_keys["eval_input_keys"] = static_cache_keys + + return determined_struct_keys \ No newline at end of file diff --git a/mithril/framework/physical/model.py b/mithril/framework/physical/model.py index e12096ca..fe628e30 100644 --- a/mithril/framework/physical/model.py +++ b/mithril/framework/physical/model.py @@ -1024,7 +1024,7 @@ def evaluate( ): outputs = self.backend._run_callable(params, data, fn_name="eval_fn") else: - outputs = self._generated_eval_fn(params, data) + outputs = self._generated_eval_fn(params, data, cache=self.flat_graph.cached_data) outputs, state_outputs = self._extract_state_outputs(outputs) if len(state_outputs) == 0: diff --git a/tests/scripts/test_c_static_inference.py b/tests/scripts/test_c_static_inference.py new file mode 100644 index 00000000..1332ae51 --- /dev/null +++ b/tests/scripts/test_c_static_inference.py @@ -0,0 +1,366 @@ +import numpy as np + +from mithril import CBackend, GGMLBackend, NumpyBackend, compile +from mithril.framework.common import Tensor +from mithril.models import Add, Model, Multiply + +def test_c_static_inference_1(): + """ + Test static inference support for add operation + in Rawc and GGML backends with all static inputs + """ + model = Model() + + model += Add()(left="left", right="right", output="output") + model.set_types(left=Tensor, right=Tensor) + + c_backend = CBackend() + np_backend = NumpyBackend() + ggml_backend = GGMLBackend() + + left_static = np.ones((5, 5), dtype=np.float32) + right_static = np.ones((5, 5), dtype=np.float32) + + c_pm = compile( + model, + c_backend, + constant_keys={ + "left": c_backend.array(left_static), + "right": c_backend.array(right_static), + }, + jit=False, + inference=True + ) + + np_pm = compile( + model, + np_backend, + constant_keys={ + "left": left_static, + "right": right_static, + }, + jit=False, + inference=True + ) + + ggml_pm = compile( + model, + ggml_backend, + constant_keys={ + "left": ggml_backend.array(left_static), + "right": ggml_backend.array(right_static), + }, + jit=False, + inference=True + ) + + # Numpy Backend + np_outputs = np_pm.evaluate() + + # Raw C Backend + c_outputs = c_pm.evaluate() + + # GGML Backend + ggml_outputs = ggml_pm.evaluate() + + # Assertions + for key in np_outputs: + out = c_outputs[key] + out_ggml = ggml_outputs[key] + out_np = np_outputs[key] + assert np.allclose(c_backend.to_numpy(out), out_np) + assert np.allclose(ggml_backend.to_numpy(out_ggml), out_np) + +def test_c_static_inference_2(): + """ + Test static inference support for multiplication operation + in Rawc and GGML backends with all static inputs + """ + model = Model() + + model += Multiply()(left="left", right="right", output="output") + model.set_types(left=Tensor, right=Tensor) + + c_backend = CBackend() + np_backend = NumpyBackend() + ggml_backend = GGMLBackend() + + left_static = np.ones((5, 5), dtype=np.float32) + right_static = np.ones((5, 5), dtype=np.float32) + + c_pm = compile( + model, + c_backend, + constant_keys={ + "left": c_backend.array(left_static), + "right": c_backend.array(right_static), + }, + jit=False, + inference=True + ) + + np_pm = compile( + model, + np_backend, + constant_keys={ + "left": left_static, + "right": right_static, + }, + jit=False, + inference=True + ) + + ggml_pm = compile( + model, + ggml_backend, + constant_keys={ + "left": ggml_backend.array(left_static), + "right": ggml_backend.array(right_static), + }, + jit=False, + inference=True + ) + + # Numpy Backend + np_outputs = np_pm.evaluate() + + # Raw C Backend + c_outputs = c_pm.evaluate() + + # GGML Backend + ggml_outputs = ggml_pm.evaluate() + + # Assertions + for key in np_outputs: + out = c_outputs[key] + out_ggml = ggml_outputs[key] + out_np = np_outputs[key] + assert np.allclose(c_backend.to_numpy(out), out_np) + assert np.allclose(ggml_backend.to_numpy(out_ggml), out_np) + +def test_c_static_inference_3(): + """ + Test static inference support for add and multiplication + operations in Rawc and GGML backends with all static inputs + """ + model = Model() + + model += Add()(left="left", right="right", output="output") + model |=Multiply()(left="left1", right="right1", output="output2") + model.set_types(left=Tensor, right=Tensor, left1=Tensor, right1=Tensor) + + c_backend = CBackend() + np_backend = NumpyBackend() + ggml_backend = GGMLBackend() + + left_static = np.ones((5, 5), dtype=np.float32) + right_static = np.ones((5, 5), dtype=np.float32) + + c_pm = compile( + model, + c_backend, + constant_keys={ + "left": c_backend.array(left_static), + "right": c_backend.array(right_static), + "left1": c_backend.array(left_static), + "right1": c_backend.array(right_static), + }, + jit=False, + inference=True + ) + + np_pm = compile( + model, + np_backend, + constant_keys={ + "left": left_static, + "right": right_static, + "left1": left_static, + "right1": right_static, + }, + jit=False, + inference=True + ) + + ggml_pm = compile( + model, + ggml_backend, + constant_keys={ + "left": ggml_backend.array(left_static), + "right": ggml_backend.array(right_static), + "left1": ggml_backend.array(left_static), + "right1": ggml_backend.array(right_static), + }, + jit=False, + inference=True + ) + + # Numpy Backend + np_outputs = np_pm.evaluate() + + # Raw C Backend + c_outputs = c_pm.evaluate() + + # GGML Backend + ggml_outputs = ggml_pm.evaluate() + + # Assertions + for key in np_outputs: + out = c_outputs[key] + out_ggml = ggml_outputs[key] + out_np = np_outputs[key] + assert np.allclose(c_backend.to_numpy(out), out_np) + assert np.allclose(ggml_backend.to_numpy(out_ggml), out_np) + +def test_c_static_inference_4(): + """ + Test static inference support for add and multiplication + operations in Rawc and GGML backends with partial static inputs + """ + model = Model() + + model += Add()(left="left", right="right", output="output") + model |=Multiply()(left="left1", right="right1", output="output2") + model.set_types(left=Tensor, right=Tensor, left1=Tensor, right1=Tensor) + + c_backend = CBackend() + np_backend = NumpyBackend() + ggml_backend = GGMLBackend() + + left_static = np.ones((5, 5), dtype=np.float32) + right_static = np.ones((5, 5), dtype=np.float32) + + c_pm = compile( + model, + c_backend, + shapes={"left1": [5, 5], "right1": [5, 5] }, + constant_keys={ + "left": c_backend.array(left_static), + "right": c_backend.array(right_static), + }, + jit=False, + inference=True + ) + + np_pm = compile( + model, + np_backend, + trainable_keys={"left1", "right1"}, + constant_keys={ + "left": left_static, + "right": right_static, + }, + jit=False, + inference=True + ) + + ggml_pm = compile( + model, + ggml_backend, + shapes={"left1": [5, 5], "right1": [5, 5] }, + constant_keys={ + "left": ggml_backend.array(left_static), + "right": ggml_backend.array(right_static), + }, + jit=False, + inference=True, + file_path="out_ggml_4.c" + ) + + # Numpy Backend + np_outputs = np_pm.evaluate({"left1": left_static, "right1": right_static}) + + # Raw C Backend + c_left = c_backend.array(left_static) + c_right = c_backend.array(right_static) + c_outputs = c_pm.evaluate({"left1": c_left, "right1": c_right}) + + # GGML Backend + ggml_left1 = ggml_backend.array(left_static) + ggml_right1 = ggml_backend.array(right_static) + ggml_outputs = ggml_pm.evaluate({"left": ggml_left1, "right": ggml_right1,"left1": ggml_left1, "right1": ggml_right1}) + + # Assertions + for key in np_outputs: + out = c_outputs[key] + out_ggml = ggml_outputs[key] + out_np = np_outputs[key] + assert np.allclose(c_backend.to_numpy(out), out_np) + assert np.allclose(ggml_backend.to_numpy(out_ggml), out_np) + +def test_c_static_inference_5(): + """ + Test static inference support when cached data is used + as input in RawC and GGML backend with partial static inputs + """ + model = Model() + + model += Add()(left="left", right="right", output="output") + model |=Multiply()(left="left1", right="right1", output="output2") + model |= Multiply()(left="left", right="output", output="output3") + model.set_types(left=Tensor, right=Tensor, left1=Tensor, right1=Tensor) + + c_backend = CBackend() + np_backend = NumpyBackend() + ggml_backend = GGMLBackend() + + left_static = np.ones((5, 5), dtype=np.float32) + right_static = np.ones((5, 5), dtype=np.float32) + + c_pm = compile( + model, + c_backend, + shapes={"left1": [5, 5], "right1": [5, 5] }, + constant_keys={ + "left": c_backend.array(left_static), + "right": c_backend.array(right_static), + }, + jit=False, + inference=True + ) + + np_pm = compile( + model, + np_backend, + trainable_keys={"left1", "right1"}, + constant_keys={ + "left": left_static, + "right": right_static, + }, + jit=False, + inference=True + ) + + ggml_pm = compile( + model, + ggml_backend, + shapes={"left1": [5, 5], "right1": [5, 5] }, + constant_keys={ + "left": ggml_backend.array(left_static), + "right": ggml_backend.array(right_static), + }, + jit=False, + inference=True, + ) + + # Numpy Backend + np_outputs = np_pm.evaluate({"left1": left_static, "right1": right_static}) + + # Raw C Backend + c_left = c_backend.array(left_static) + c_right = c_backend.array(right_static) + c_outputs = c_pm.evaluate({"left1": c_left, "right1": c_right}) + + # GGML Backend + ggml_left1 = ggml_backend.array(left_static) + ggml_right1 = ggml_backend.array(right_static) + ggml_outputs = ggml_pm.evaluate({"left": ggml_left1, "right": ggml_right1,"left1": ggml_left1, "right1": ggml_right1}) + + # Assertions + for key in np_outputs: + out = c_outputs[key] + out_ggml = ggml_outputs[key] + out_np = np_outputs[key] + assert np.allclose(c_backend.to_numpy(out), out_np) + assert np.allclose(ggml_backend.to_numpy(out_ggml), out_np) +