diff --git a/generators/ed25519.py b/generators/ed25519.py index c947673..650c64d 100644 --- a/generators/ed25519.py +++ b/generators/ed25519.py @@ -1,6 +1,5 @@ import fd58 import hashlib -from test_suite.codec_utils import encode_input import test_suite.invoke_pb2 as pb from dataclasses import dataclass import datetime diff --git a/generators/secp256k1.py b/generators/secp256k1.py index 341c5e2..f825762 100644 --- a/generators/secp256k1.py +++ b/generators/secp256k1.py @@ -1,7 +1,6 @@ import fd58 import hashlib from eth_hash.auto import keccak -from test_suite.codec_utils import encode_input import test_suite.invoke_pb2 as pb from dataclasses import dataclass import datetime diff --git a/invoke.proto b/invoke.proto index de44ae8..9f3ba17 100644 --- a/invoke.proto +++ b/invoke.proto @@ -104,3 +104,35 @@ message InstrFixture { InstrContext input = 1; InstrEffects output = 2; } + +message ELFBinary { + bytes data = 1; +} + +// Wrapper for the ELF binary and the features that the loader should use +// Note that we currently hardcode the features to be used by the loader, +// so features isn't actually used yet. +message ELFLoaderCtx { + ELFBinary elf = 1; + FeatureSet features = 2; +} + +// Captures the results of a elf binary load. +// Structurally similar to fd_sbpf_program_t +message ELFLoaderEffects { + bytes rodata = 1; + uint64 rodata_sz = 2; + + // bytes text = 3; // not needed, just points to a region in rodata + uint64 text_cnt = 4; + uint64 text_off = 5; + + uint64 entry_pc = 6; + + repeated uint64 calldests = 7; +} + +message ELFLoaderFixture { + ELFLoaderCtx input = 1; + ELFLoaderEffects output = 2; +} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 6b8a588..2f7e504 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,5 +34,5 @@ dev = [ ] [build-system] -requires = ['setuptools'] +requires = ["setuptools", "setuptools_scm>=8"] build-backend = "setuptools.build_meta" diff --git a/src/test_suite/debugger.py b/src/test_suite/debugger.py index 5a2403a..629c127 100644 --- a/src/test_suite/debugger.py +++ b/src/test_suite/debugger.py @@ -6,8 +6,9 @@ import os from test_suite.multiprocessing_utils import ( initialize_process_output_buffers, - process_instruction, + process_target, ) +import test_suite.globals as globals def debug_target(shared_library, test_input, pipe): @@ -23,7 +24,7 @@ def debug_target(shared_library, test_input, pipe): lib = ctypes.CDLL(shared_library) lib.sol_compat_init() - process_instruction(lib, test_input) + process_target(lib, test_input) lib.sol_compat_fini() @@ -62,7 +63,7 @@ def debug_host(shared_library, instruction_context, gdb): # As soon as the target library gets loaded, set a breakpoint # for the newly appeared executor function "set breakpoint pending on", - "break sol_compat_instr_execute_v1", + f"break {globals.harness_ctx.fuzz_fn_name}", # GDB stops the process when attaching, let it continue "continue", # ... At this point, the child process has SIGSTOP'ed itself diff --git a/src/test_suite/fixture_utils.py b/src/test_suite/fixture_utils.py index 8b32221..3b1c226 100644 --- a/src/test_suite/fixture_utils.py +++ b/src/test_suite/fixture_utils.py @@ -1,9 +1,8 @@ import fd58 -from test_suite.codec_utils import encode_input, encode_output from test_suite.constants import NATIVE_PROGRAM_MAPPING from test_suite.multiprocessing_utils import ( build_test_results, - read_instr, + read_context, process_single_test_case, prune_execution_result, ) @@ -23,9 +22,10 @@ def create_fixture(test_file: Path) -> int: Returns: - int: 1 on success, 0 on failure """ - serialized_instr_context = read_instr(test_file) - results = process_single_test_case(serialized_instr_context) - pruned_results = prune_execution_result(serialized_instr_context, results) + serialized_context = read_context(test_file) + results = process_single_test_case(serialized_context) + + pruned_results = prune_execution_result(serialized_context, results) # This is only relevant when you gather results for multiple targets if globals.only_keep_passing: @@ -38,25 +38,25 @@ def create_fixture(test_file: Path) -> int: serialized_instr_effects = pruned_results[globals.solana_shared_library] - if serialized_instr_context is None or serialized_instr_effects is None: + if serialized_context is None or serialized_instr_effects is None: return 0 # Create instruction fixture - instr_context = pb.InstrContext() - instr_context.ParseFromString(serialized_instr_context) - instr_effects = pb.InstrEffects() - instr_effects.ParseFromString(serialized_instr_effects) + context = globals.harness_ctx.context_type() + context.ParseFromString(serialized_context) + effects = globals.harness_ctx.effects_type() + effects.ParseFromString(serialized_instr_effects) - fixture = pb.InstrFixture() - fixture.input.MergeFrom(instr_context) - fixture.output.MergeFrom(instr_effects) + fixture = globals.harness_ctx.fixture_type() + fixture.input.MergeFrom(context) + fixture.output.MergeFrom(effects) return write_fixture_to_disk( test_file.stem, fixture.SerializeToString(deterministic=True) ) -def write_fixture_to_disk(file_stem: str, serialized_instruction_fixture: str) -> int: +def write_fixture_to_disk(file_stem: str, serialized_fixture: str) -> int: """ Writes instruction fixtures to disk. This function outputs in binary format unless specified otherwise with the --readable flag. @@ -67,47 +67,46 @@ def write_fixture_to_disk(file_stem: str, serialized_instruction_fixture: str) - Returns: - int: 0 on failure, 1 on success """ - if serialized_instruction_fixture is None: + if serialized_fixture is None: return 0 output_dir = globals.output_dir if globals.organize_fixture_dir: - instr_fixture = pb.InstrFixture() - instr_fixture.ParseFromString(serialized_instruction_fixture) - program_type = get_program_type(instr_fixture) + fixture = globals.harness_ctx.fixture_type() + fixture.ParseFromString(serialized_fixture) + program_type = get_program_type(fixture) output_dir = output_dir / program_type output_dir.mkdir(parents=True, exist_ok=True) if globals.readable: # Deserialize fixture - instr_fixture = pb.InstrFixture() - instr_fixture.ParseFromString(serialized_instruction_fixture) + fixture = pb.InstrFixture() + fixture.ParseFromString(serialized_fixture) # Encode fields for instruction context and effects - instr_context = pb.InstrContext() - instr_context.CopyFrom(instr_fixture.input) - encode_input(instr_context) + context = globals.harness_ctx.context_type() + context.CopyFrom(fixture.input) + # encode_input(context) + globals.harness_ctx.context_human_encode_fn(context) - instr_effects = pb.InstrEffects() - instr_effects.CopyFrom(instr_fixture.output) - encode_output(instr_effects) + instr_effects = globals.harness_ctx.effects_type() + instr_effects.CopyFrom(fixture.output) + globals.harness_ctx.effects_human_encode_fn(instr_effects) - instr_fixture.input.CopyFrom(instr_context) - instr_fixture.output.CopyFrom(instr_effects) + fixture.input.CopyFrom(context) + fixture.output.CopyFrom(instr_effects) with open(output_dir / (file_stem + ".fix.txt"), "w") as f: - f.write( - text_format.MessageToString(instr_fixture, print_unknown_fields=False) - ) + f.write(text_format.MessageToString(fixture, print_unknown_fields=False)) else: with open(output_dir / (file_stem + ".fix"), "wb") as f: - f.write(serialized_instruction_fixture) + f.write(serialized_fixture) return 1 -def extract_instr_context_from_fixture(fixture_file: Path): +def extract_context_from_fixture(fixture_file: Path): """ Extract InstrContext from InstrEffects and write to disk. @@ -118,12 +117,12 @@ def extract_instr_context_from_fixture(fixture_file: Path): - int: 1 on success, 0 on failure """ try: - instr_fixture = pb.InstrFixture() + fixture = globals.harness_ctx.fixture_type() with open(fixture_file, "rb") as f: - instr_fixture.ParseFromString(f.read()) + fixture.ParseFromString(f.read()) with open(globals.output_dir / (fixture_file.stem + ".bin"), "wb") as f: - f.write(instr_fixture.input.SerializeToString(deterministic=True)) + f.write(fixture.input.SerializeToString(deterministic=True)) except: return 0 diff --git a/src/test_suite/fuzz_context.py b/src/test_suite/fuzz_context.py new file mode 100644 index 0000000..05f9188 --- /dev/null +++ b/src/test_suite/fuzz_context.py @@ -0,0 +1,16 @@ +from test_suite.fuzz_interface import HarnessCtx +import test_suite.invoke_pb2 as pb +import test_suite.instr.codec_utils as instr_codec + + +ElfHarness = HarnessCtx( + fuzz_fn_name="sol_compat_elf_loader_v1", fixture_desc=pb.ELFLoaderFixture.DESCRIPTOR +) + +InstrHarness = HarnessCtx( + fuzz_fn_name="sol_compat_instr_execute_v1", + fixture_desc=pb.InstrFixture.DESCRIPTOR, + context_human_encode_fn=instr_codec.encode_input, + context_human_decode_fn=instr_codec.decode_input, + effects_human_encode_fn=instr_codec.encode_output, +) diff --git a/src/test_suite/fuzz_interface.py b/src/test_suite/fuzz_interface.py new file mode 100644 index 0000000..7d6d9bc --- /dev/null +++ b/src/test_suite/fuzz_interface.py @@ -0,0 +1,59 @@ +from typing import Callable, Type, TypeVar +from google.protobuf import message, descriptor, message_factory +from dataclasses import dataclass, InitVar + +msg_factory = message_factory.MessageFactory() + +FixtureType = TypeVar("FixtureType", bound=message.Message) +ContextType = TypeVar("ContextType", bound=message.Message) +EffectsType = TypeVar("EffectsType", bound=message.Message) + +""" +Each fuzzing harness should implement this interface in fuzz_context.py + +The following defines the interface: +- fuzz_fn_name: The name of the harness function to call in the fuzz target +- fixture_desc: The protobuf descriptor for the fixture message. + - A fixture message is a message that contains an input and output message. + - input: The fuzz target Context + - output: The fuzz target Effects +- diff_effect_fn: A function that compares two effects messages for equality +- human encode/decode functions for the context and effects messages to + convert the messages to/from human-readable format (in-place). + Both context and effects messages can have their own encode/decode functions. +""" + + +def generic_effects_diff(a: EffectsType, b: EffectsType) -> bool: + return a == b + + +def generic_human_encode(obj: message.Message) -> None: + pass + + +def generic_human_decode(obj: message.Message) -> None: + pass + + +@dataclass +class HarnessCtx: + fuzz_fn_name: str + fixture_desc: InitVar[descriptor.Descriptor] + diff_effect_fn: Callable[[EffectsType, EffectsType], bool] = generic_effects_diff + context_human_encode_fn: Callable[[ContextType], None] = generic_human_encode + context_human_decode_fn: Callable[[ContextType], None] = generic_human_decode + effects_human_encode_fn: Callable[[EffectsType], None] = generic_human_encode + effects_human_decode_fn: Callable[[EffectsType], None] = generic_human_decode + fixture_type: Type[FixtureType] = message.Message + context_type: Type[ContextType] = message.Message + effects_type: Type[EffectsType] = message.Message + + def __post_init__(self, fixture_desc): + self.fixture_type = msg_factory.GetPrototype(fixture_desc) + self.context_type = msg_factory.GetPrototype( + fixture_desc.fields_by_name["input"].message_type + ) + self.effects_type = msg_factory.GetPrototype( + fixture_desc.fields_by_name["output"].message_type + ) diff --git a/src/test_suite/globals.py b/src/test_suite/globals.py index 587483c..f635615 100644 --- a/src/test_suite/globals.py +++ b/src/test_suite/globals.py @@ -1,10 +1,12 @@ +from test_suite.fuzz_interface import HarnessCtx + # Global variables that can be accessed from processes. # Target libraries (for run-tests) target_libraries = {} # Ground truth library (for run-tests) -solana_shared_library = None +reference_shared_library = None # Number of iterations (for check-consistency) n_iterations = 0 @@ -27,3 +29,5 @@ # (For fixtures) Whether to only keep passing tests only_keep_passing = False +# Harness context +harness_ctx: HarnessCtx = None diff --git a/src/test_suite/codec_utils.py b/src/test_suite/instr/codec_utils.py similarity index 100% rename from src/test_suite/codec_utils.py rename to src/test_suite/instr/codec_utils.py diff --git a/src/test_suite/invoke_pb2.py b/src/test_suite/invoke_pb2.py index 3ec7364..5cf4be0 100644 --- a/src/test_suite/invoke_pb2.py +++ b/src/test_suite/invoke_pb2.py @@ -19,7 +19,7 @@ name='invoke.proto', package='org.solana.sealevel.v1', syntax='proto3', - serialized_pb=_b('\n\x0cinvoke.proto\x12\x16org.solana.sealevel.v1\"\x1e\n\nFeatureSet\x12\x10\n\x08\x66\x65\x61tures\x18\x01 \x03(\x06\"s\n\tAcctState\x12\x0f\n\x07\x61\x64\x64ress\x18\x01 \x01(\x0c\x12\x10\n\x08lamports\x18\x02 \x01(\x04\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\x12\x12\n\nexecutable\x18\x04 \x01(\x08\x12\x12\n\nrent_epoch\x18\x05 \x01(\x04\x12\r\n\x05owner\x18\x06 \x01(\x0c\"D\n\x0c\x45pochContext\x12\x34\n\x08\x66\x65\x61tures\x18\x01 \x01(\x0b\x32\".org.solana.sealevel.v1.FeatureSet\"\r\n\x0bSlotContext\"\x0c\n\nTxnContext\"B\n\tInstrAcct\x12\r\n\x05index\x18\x01 \x01(\r\x12\x13\n\x0bis_writable\x18\x02 \x01(\x08\x12\x11\n\tis_signer\x18\x03 \x01(\x08\"\xf6\x02\n\x0cInstrContext\x12\x12\n\nprogram_id\x18\x01 \x01(\x0c\x12\x11\n\tloader_id\x18\x02 \x01(\x0c\x12\x33\n\x08\x61\x63\x63ounts\x18\x03 \x03(\x0b\x32!.org.solana.sealevel.v1.AcctState\x12\x39\n\x0einstr_accounts\x18\x04 \x03(\x0b\x32!.org.solana.sealevel.v1.InstrAcct\x12\x0c\n\x04\x64\x61ta\x18\x05 \x01(\x0c\x12\x10\n\x08\x63u_avail\x18\x06 \x01(\x04\x12\x37\n\x0btxn_context\x18\x07 \x01(\x0b\x32\".org.solana.sealevel.v1.TxnContext\x12\x39\n\x0cslot_context\x18\x08 \x01(\x0b\x32#.org.solana.sealevel.v1.SlotContext\x12;\n\repoch_context\x18\t \x01(\x0b\x32$.org.solana.sealevel.v1.EpochContext\"\x97\x01\n\x0cInstrEffects\x12\x0e\n\x06result\x18\x01 \x01(\x05\x12\x12\n\ncustom_err\x18\x02 \x01(\r\x12<\n\x11modified_accounts\x18\x03 \x03(\x0b\x32!.org.solana.sealevel.v1.AcctState\x12\x10\n\x08\x63u_avail\x18\x04 \x01(\x04\x12\x13\n\x0breturn_data\x18\x05 \x01(\x0c\"y\n\x0cInstrFixture\x12\x33\n\x05input\x18\x01 \x01(\x0b\x32$.org.solana.sealevel.v1.InstrContext\x12\x34\n\x06output\x18\x02 \x01(\x0b\x32$.org.solana.sealevel.v1.InstrEffectsb\x06proto3') + serialized_pb=_b('\n\x0cinvoke.proto\x12\x16org.solana.sealevel.v1\"\x1e\n\nFeatureSet\x12\x10\n\x08\x66\x65\x61tures\x18\x01 \x03(\x06\"s\n\tAcctState\x12\x0f\n\x07\x61\x64\x64ress\x18\x01 \x01(\x0c\x12\x10\n\x08lamports\x18\x02 \x01(\x04\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\x12\x12\n\nexecutable\x18\x04 \x01(\x08\x12\x12\n\nrent_epoch\x18\x05 \x01(\x04\x12\r\n\x05owner\x18\x06 \x01(\x0c\"D\n\x0c\x45pochContext\x12\x34\n\x08\x66\x65\x61tures\x18\x01 \x01(\x0b\x32\".org.solana.sealevel.v1.FeatureSet\"\r\n\x0bSlotContext\"\x0c\n\nTxnContext\"B\n\tInstrAcct\x12\r\n\x05index\x18\x01 \x01(\r\x12\x13\n\x0bis_writable\x18\x02 \x01(\x08\x12\x11\n\tis_signer\x18\x03 \x01(\x08\"\xf6\x02\n\x0cInstrContext\x12\x12\n\nprogram_id\x18\x01 \x01(\x0c\x12\x11\n\tloader_id\x18\x02 \x01(\x0c\x12\x33\n\x08\x61\x63\x63ounts\x18\x03 \x03(\x0b\x32!.org.solana.sealevel.v1.AcctState\x12\x39\n\x0einstr_accounts\x18\x04 \x03(\x0b\x32!.org.solana.sealevel.v1.InstrAcct\x12\x0c\n\x04\x64\x61ta\x18\x05 \x01(\x0c\x12\x10\n\x08\x63u_avail\x18\x06 \x01(\x04\x12\x37\n\x0btxn_context\x18\x07 \x01(\x0b\x32\".org.solana.sealevel.v1.TxnContext\x12\x39\n\x0cslot_context\x18\x08 \x01(\x0b\x32#.org.solana.sealevel.v1.SlotContext\x12;\n\repoch_context\x18\t \x01(\x0b\x32$.org.solana.sealevel.v1.EpochContext\"\x97\x01\n\x0cInstrEffects\x12\x0e\n\x06result\x18\x01 \x01(\x05\x12\x12\n\ncustom_err\x18\x02 \x01(\r\x12<\n\x11modified_accounts\x18\x03 \x03(\x0b\x32!.org.solana.sealevel.v1.AcctState\x12\x10\n\x08\x63u_avail\x18\x04 \x01(\x04\x12\x13\n\x0breturn_data\x18\x05 \x01(\x0c\"y\n\x0cInstrFixture\x12\x33\n\x05input\x18\x01 \x01(\x0b\x32$.org.solana.sealevel.v1.InstrContext\x12\x34\n\x06output\x18\x02 \x01(\x0b\x32$.org.solana.sealevel.v1.InstrEffects\"\x19\n\tELFBinary\x12\x0c\n\x04\x64\x61ta\x18\x01 \x01(\x0c\"t\n\x0c\x45LFLoaderCtx\x12.\n\x03\x65lf\x18\x01 \x01(\x0b\x32!.org.solana.sealevel.v1.ELFBinary\x12\x34\n\x08\x66\x65\x61tures\x18\x02 \x01(\x0b\x32\".org.solana.sealevel.v1.FeatureSet\"~\n\x10\x45LFLoaderEffects\x12\x0e\n\x06rodata\x18\x01 \x01(\x0c\x12\x11\n\trodata_sz\x18\x02 \x01(\x04\x12\x10\n\x08text_cnt\x18\x04 \x01(\x04\x12\x10\n\x08text_off\x18\x05 \x01(\x04\x12\x10\n\x08\x65ntry_pc\x18\x06 \x01(\x04\x12\x11\n\tcalldests\x18\x07 \x03(\x04\"\x81\x01\n\x10\x45LFLoaderFixture\x12\x33\n\x05input\x18\x01 \x01(\x0b\x32$.org.solana.sealevel.v1.ELFLoaderCtx\x12\x38\n\x06output\x18\x02 \x01(\x0b\x32(.org.solana.sealevel.v1.ELFLoaderEffectsb\x06proto3') ) @@ -429,6 +429,179 @@ serialized_end=1008, ) + +_ELFBINARY = _descriptor.Descriptor( + name='ELFBinary', + full_name='org.solana.sealevel.v1.ELFBinary', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='data', full_name='org.solana.sealevel.v1.ELFBinary.data', index=0, + number=1, type=12, cpp_type=9, label=1, + has_default_value=False, default_value=_b(""), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1010, + serialized_end=1035, +) + + +_ELFLOADERCTX = _descriptor.Descriptor( + name='ELFLoaderCtx', + full_name='org.solana.sealevel.v1.ELFLoaderCtx', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='elf', full_name='org.solana.sealevel.v1.ELFLoaderCtx.elf', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='features', full_name='org.solana.sealevel.v1.ELFLoaderCtx.features', index=1, + number=2, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1037, + serialized_end=1153, +) + + +_ELFLOADEREFFECTS = _descriptor.Descriptor( + name='ELFLoaderEffects', + full_name='org.solana.sealevel.v1.ELFLoaderEffects', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='rodata', full_name='org.solana.sealevel.v1.ELFLoaderEffects.rodata', index=0, + number=1, type=12, cpp_type=9, label=1, + has_default_value=False, default_value=_b(""), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='rodata_sz', full_name='org.solana.sealevel.v1.ELFLoaderEffects.rodata_sz', index=1, + number=2, type=4, cpp_type=4, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='text_cnt', full_name='org.solana.sealevel.v1.ELFLoaderEffects.text_cnt', index=2, + number=4, type=4, cpp_type=4, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='text_off', full_name='org.solana.sealevel.v1.ELFLoaderEffects.text_off', index=3, + number=5, type=4, cpp_type=4, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='entry_pc', full_name='org.solana.sealevel.v1.ELFLoaderEffects.entry_pc', index=4, + number=6, type=4, cpp_type=4, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='calldests', full_name='org.solana.sealevel.v1.ELFLoaderEffects.calldests', index=5, + number=7, type=4, cpp_type=4, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1155, + serialized_end=1281, +) + + +_ELFLOADERFIXTURE = _descriptor.Descriptor( + name='ELFLoaderFixture', + full_name='org.solana.sealevel.v1.ELFLoaderFixture', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='input', full_name='org.solana.sealevel.v1.ELFLoaderFixture.input', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='output', full_name='org.solana.sealevel.v1.ELFLoaderFixture.output', index=1, + number=2, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1284, + serialized_end=1413, +) + _EPOCHCONTEXT.fields_by_name['features'].message_type = _FEATURESET _INSTRCONTEXT.fields_by_name['accounts'].message_type = _ACCTSTATE _INSTRCONTEXT.fields_by_name['instr_accounts'].message_type = _INSTRACCT @@ -438,6 +611,10 @@ _INSTREFFECTS.fields_by_name['modified_accounts'].message_type = _ACCTSTATE _INSTRFIXTURE.fields_by_name['input'].message_type = _INSTRCONTEXT _INSTRFIXTURE.fields_by_name['output'].message_type = _INSTREFFECTS +_ELFLOADERCTX.fields_by_name['elf'].message_type = _ELFBINARY +_ELFLOADERCTX.fields_by_name['features'].message_type = _FEATURESET +_ELFLOADERFIXTURE.fields_by_name['input'].message_type = _ELFLOADERCTX +_ELFLOADERFIXTURE.fields_by_name['output'].message_type = _ELFLOADEREFFECTS DESCRIPTOR.message_types_by_name['FeatureSet'] = _FEATURESET DESCRIPTOR.message_types_by_name['AcctState'] = _ACCTSTATE DESCRIPTOR.message_types_by_name['EpochContext'] = _EPOCHCONTEXT @@ -447,6 +624,10 @@ DESCRIPTOR.message_types_by_name['InstrContext'] = _INSTRCONTEXT DESCRIPTOR.message_types_by_name['InstrEffects'] = _INSTREFFECTS DESCRIPTOR.message_types_by_name['InstrFixture'] = _INSTRFIXTURE +DESCRIPTOR.message_types_by_name['ELFBinary'] = _ELFBINARY +DESCRIPTOR.message_types_by_name['ELFLoaderCtx'] = _ELFLOADERCTX +DESCRIPTOR.message_types_by_name['ELFLoaderEffects'] = _ELFLOADEREFFECTS +DESCRIPTOR.message_types_by_name['ELFLoaderFixture'] = _ELFLOADERFIXTURE _sym_db.RegisterFileDescriptor(DESCRIPTOR) FeatureSet = _reflection.GeneratedProtocolMessageType('FeatureSet', (_message.Message,), dict( @@ -512,5 +693,33 @@ )) _sym_db.RegisterMessage(InstrFixture) +ELFBinary = _reflection.GeneratedProtocolMessageType('ELFBinary', (_message.Message,), dict( + DESCRIPTOR = _ELFBINARY, + __module__ = 'invoke_pb2' + # @@protoc_insertion_point(class_scope:org.solana.sealevel.v1.ELFBinary) + )) +_sym_db.RegisterMessage(ELFBinary) + +ELFLoaderCtx = _reflection.GeneratedProtocolMessageType('ELFLoaderCtx', (_message.Message,), dict( + DESCRIPTOR = _ELFLOADERCTX, + __module__ = 'invoke_pb2' + # @@protoc_insertion_point(class_scope:org.solana.sealevel.v1.ELFLoaderCtx) + )) +_sym_db.RegisterMessage(ELFLoaderCtx) + +ELFLoaderEffects = _reflection.GeneratedProtocolMessageType('ELFLoaderEffects', (_message.Message,), dict( + DESCRIPTOR = _ELFLOADEREFFECTS, + __module__ = 'invoke_pb2' + # @@protoc_insertion_point(class_scope:org.solana.sealevel.v1.ELFLoaderEffects) + )) +_sym_db.RegisterMessage(ELFLoaderEffects) + +ELFLoaderFixture = _reflection.GeneratedProtocolMessageType('ELFLoaderFixture', (_message.Message,), dict( + DESCRIPTOR = _ELFLOADERFIXTURE, + __module__ = 'invoke_pb2' + # @@protoc_insertion_point(class_scope:org.solana.sealevel.v1.ELFLoaderFixture) + )) +_sym_db.RegisterMessage(ELFLoaderFixture) + # @@protoc_insertion_point(module_scope) diff --git a/src/test_suite/minimize_utils.py b/src/test_suite/minimize_utils.py deleted file mode 100644 index bcc7cc3..0000000 --- a/src/test_suite/minimize_utils.py +++ /dev/null @@ -1,79 +0,0 @@ -from pathlib import Path -import test_suite.invoke_pb2 as pb -import test_suite.globals as globals -from test_suite.multiprocessing_utils import ( - read_instr, - process_instruction, -) - - -def minimize_single_test_case(test_file: Path) -> int: - """ - Minimize a single test case by pruning any additional accounts / features that do not - affect output. - - Args: - test_file (Path): The test file to minimize - - Returns: - int: 0 on failure, 1 on success - """ - _, serialized_instruction_context = read_instr(test_file) - - # Skip if input is invalid - if serialized_instruction_context is None: - return 0 - - lib = globals.target_libraries[globals.solana_shared_library] - - # Get a base output result (could be None) - baseline_instruction_effects = process_instruction( - lib, serialized_instruction_context - ) - - # Skip if input could not be processed - if baseline_instruction_effects is None: - return 0 - - # Serialize the instruction effects - serialized_baseline_instruction_effects = ( - baseline_instruction_effects.SerializeToString(deterministic=True) - ) - - # Deserialize the instruction context - instruction_context = pb.InstrContext() - instruction_context.ParseFromString(serialized_instruction_context) - - # Incrementally remove features and test the output - feature_count = len(instruction_context.epoch_context.features.features) - feature_idx = feature_count - 1 - while feature_idx >= 0: - removed_feature = instruction_context.epoch_context.features.features[ - feature_idx - ] - del instruction_context.epoch_context.features.features[feature_idx] - test_instruction_effects = process_instruction( - lib, instruction_context.SerializeToString(deterministic=True) - ) - serialized_test_instruction_effects = ( - test_instruction_effects.SerializeToString(deterministic=True) - ) - if ( - serialized_baseline_instruction_effects - != serialized_test_instruction_effects - ): - instruction_context.epoch_context.features.features.extend( - [removed_feature] - ) - feature_idx -= 1 - - features = ( - list(instruction_context.epoch_context.features.features) - + globals.feature_pool.hardcoded - ) - del instruction_context.epoch_context.features.features[:] - instruction_context.epoch_context.features.features.extend(sorted(set(features))) - - with open(globals.output_dir / (test_file.stem + ".txt"), "wb") as f: - f.write(instruction_context.SerializeToString(deterministic=True)) - return 1 diff --git a/src/test_suite/multiprocessing_utils.py b/src/test_suite/multiprocessing_utils.py index 4915966..f181523 100644 --- a/src/test_suite/multiprocessing_utils.py +++ b/src/test_suite/multiprocessing_utils.py @@ -1,7 +1,6 @@ from dataclasses import dataclass, field from test_suite.constants import OUTPUT_BUFFER_SIZE import test_suite.invoke_pb2 as pb -from test_suite.codec_utils import encode_input, encode_output, decode_input from test_suite.validation_utils import check_account_unchanged import ctypes from ctypes import c_uint64, c_int, POINTER, Structure @@ -11,7 +10,7 @@ import os -def process_instruction( +def process_target( library: ctypes.CDLL, serialized_instruction_context: str ) -> pb.InstrEffects | None: """ @@ -41,7 +40,8 @@ def process_instruction( out_sz = ctypes.c_uint64(OUTPUT_BUFFER_SIZE) # Call the function - result = library.sol_compat_instr_execute_v1( + sol_compat_fn = getattr(library, globals.harness_ctx.fuzz_fn_name) + result = sol_compat_fn( globals.output_buffer_pointer, ctypes.byref(out_sz), in_ptr, in_sz ) @@ -57,7 +57,7 @@ def process_instruction( return output_object -def read_instr(test_file: Path) -> str | None: +def read_context(test_file: Path) -> str | None: """ Reads in test files and generates an InstrContext Protobuf object for a test case. @@ -71,16 +71,19 @@ def read_instr(test_file: Path) -> str | None: try: # Read in binary Protobuf messages with open(test_file, "rb") as f: - instruction_context = pb.InstrContext() + instruction_context = globals.harness_ctx.context_type() instruction_context.ParseFromString(f.read()) except: try: # Maybe it's in human-readable Protobuf format? with open(test_file) as f: - instruction_context = text_format.Parse(f.read(), pb.InstrContext()) + instruction_context = text_format.Parse( + f.read(), globals.harness_ctx.context_type() + ) # Decode into digestable fields - decode_input(instruction_context) + # decode_input(instruction_context) + globals.harness_ctx.context_human_decode_fn(instruction_context) except: # Unable to read message, skip and continue instruction_context = None @@ -112,7 +115,7 @@ def read_fixture(fixture_file: Path) -> str | None: try: # Read in binary Protobuf messages with open(fixture_file, "rb") as f: - instruction_fixture = pb.InstrFixture() + instruction_fixture = globals.harness_ctx.fixture_type() instruction_fixture.ParseFromString(f.read()) except: # Unable to read message, skip and continue @@ -139,16 +142,16 @@ def decode_single_test_case(test_file: Path) -> int: Returns: - int: 1 if successfully decoded and written, 0 if skipped. """ - serialized_instruction_context = read_instr(test_file) + serialized_instruction_context = read_context(test_file) # Skip if input is invalid if serialized_instruction_context is None: return 0 # Encode the input fields to be human readable - instruction_context = pb.InstrContext() + instruction_context = globals.harness_ctx.context_type() instruction_context.ParseFromString(serialized_instruction_context) - encode_input(instruction_context) + globals.harness_ctx.context_human_encode_fn(instruction_context) with open(globals.output_dir / (test_file.stem + ".txt"), "w") as f: f.write( @@ -177,7 +180,7 @@ def process_single_test_case( # Execute test case on each target library results = {} for target in globals.target_libraries: - instruction_effects = process_instruction( + instruction_effects = process_target( globals.target_libraries[target], serialized_instruction_context ) result = ( @@ -219,8 +222,8 @@ def merge_results_over_iterations(results: tuple) -> tuple[str, dict]: def prune_execution_result( - serialized_instruction_context: str, - targets_to_serialized_instruction_effects: dict[str, str | None], + serialized_context: str, + targets_to_serialized_effects: dict[str, str | None], ) -> dict[str, str | None] | None: """ Prune execution result to only include actually modified accounts. @@ -232,29 +235,36 @@ def prune_execution_result( Returns: - dict[str, str | None] | None: Serialized pruned instruction effects for each target. """ - if serialized_instruction_context is None: + if serialized_context is None: return None - instruction_context = pb.InstrContext() - instruction_context.ParseFromString(serialized_instruction_context) + EffectsT = globals.harness_ctx.effects_type + + if not hasattr(EffectsT(), "modified_accounts"): + # no execution results to prune + # TODO: perform this check in a more robust way + return targets_to_serialized_effects + + context = globals.harness_ctx.context_type() + context.ParseFromString(serialized_context) targets_to_serialized_pruned_instruction_effects = {} for ( target, serialized_instruction_effects, - ) in targets_to_serialized_instruction_effects.items(): + ) in targets_to_serialized_effects.items(): if serialized_instruction_effects is None: targets_to_serialized_pruned_instruction_effects[target] = None continue - instruction_effects = pb.InstrEffects() + instruction_effects = EffectsT() instruction_effects.ParseFromString(serialized_instruction_effects) # O(n^2) because not performance sensitive new_modified_accounts: list[pb.AcctState] = [] for modified_account in instruction_effects.modified_accounts: account_unchanged = False - for beginning_account_state in instruction_context.accounts: + for beginning_account_state in context.accounts: account_unchanged |= check_account_unchanged( modified_account, beginning_account_state ) @@ -294,9 +304,9 @@ def check_consistency_in_results(file_stem: str, results: dict) -> dict[str, boo protobuf_struct = None if results[target][iteration]: # Turn bytes into human readable fields - protobuf_struct = pb.InstrEffects() + protobuf_struct = globals.harness_ctx.effects_type() protobuf_struct.ParseFromString(results[target][iteration]) - encode_output(protobuf_struct) + globals.harness_ctx.effects_human_encode_fn(protobuf_struct) protobuf_structures[iteration] = protobuf_struct @@ -348,9 +358,9 @@ def build_test_results(results: dict[str, str | None]) -> tuple[int, dict | None instruction_effects = None if result: # Turn bytes into human readable fields - instruction_effects = pb.InstrEffects() + instruction_effects = globals.harness_ctx.effects_type() instruction_effects.ParseFromString(result) - encode_output(instruction_effects) + globals.harness_ctx.effects_human_encode_fn(instruction_effects) outputs[target] = text_format.MessageToString(instruction_effects) protobuf_structures[target] = instruction_effects @@ -428,12 +438,11 @@ def run_test(test_file: Path) -> tuple[str, int, dict | None]: """ # Process fixtures through this entrypoint as well if test_file.suffix == ".fix": - fixture = pb.InstrFixture() - serialized_fixture = read_fixture(test_file) - fixture.MergeFromString(serialized_fixture) + fixture = globals.harness_ctx.fixture_type() + fixture.ParseFromString(test_file.open("rb").read()) serialized_instr_context = fixture.input.SerializeToString(deterministic=True) else: - serialized_instr_context = read_instr(test_file) + serialized_instr_context = read_context(test_file) results = process_single_test_case(serialized_instr_context) pruned_results = prune_execution_result(serialized_instr_context, results) return test_file.stem, *build_test_results(pruned_results) diff --git a/src/test_suite/test_suite.py b/src/test_suite/test_suite.py index bbfd116..1c2962b 100644 --- a/src/test_suite/test_suite.py +++ b/src/test_suite/test_suite.py @@ -9,16 +9,15 @@ from test_suite.constants import LOG_FILE_SEPARATOR_LENGTH, NATIVE_PROGRAM_MAPPING from test_suite.fixture_utils import ( create_fixture, - extract_instr_context_from_fixture, + extract_context_from_fixture, ) import test_suite.invoke_pb2 as pb -from test_suite.codec_utils import encode_output -from test_suite.minimize_utils import minimize_single_test_case +from test_suite.instr.codec_utils import encode_output from test_suite.multiprocessing_utils import ( decode_single_test_case, - read_instr, + read_context, initialize_process_output_buffers, - process_instruction, + process_target, prune_execution_result, get_feature_pool, run_test, @@ -27,6 +26,9 @@ from test_suite.debugger import debug_host import resource import tqdm +from test_suite.fuzz_context import ElfHarness, InstrHarness + +globals.harness_ctx = InstrHarness app = typer.Typer( @@ -36,7 +38,12 @@ @app.command() def exec_instr( - file: Path = typer.Option(None, "--input", "-i", help="Input file"), + file: Path = typer.Option( + None, + "--input", + "-i", + help=f"Input {globals.harness_ctx.context_type.__name__} file", + ), shared_library: Path = typer.Option( Path("impl/firedancer/build/native/clang/lib/libfd_exec_sol_compat.so"), "--target", @@ -50,8 +57,8 @@ def exec_instr( help="Randomizes bytes in output buffer before shared library execution", ), ): - instruction_context = read_instr(file) - assert instruction_context is not None, f"Unable to read {file.name}" + context = read_context(file) + assert context is not None, f"Unable to read {file.name}" # Initialize output buffers and shared library initialize_process_output_buffers(randomize_output_buffer=randomize_output_buffer) @@ -59,29 +66,28 @@ def exec_instr( lib.sol_compat_init() # Execute and cleanup - instruction_effects = process_instruction(lib, instruction_context) + effects = process_target(lib, context) - if not instruction_effects: + if not effects: print("No instruction effects returned") return None - instruction_effects = instruction_effects.SerializeToString(deterministic=True) + serialized_effects = effects.SerializeToString(deterministic=True) # Prune execution results - pruned_instruction_effects = prune_execution_result( - instruction_context, - {shared_library: instruction_effects}, - ) - parsed_instruction_effects = pb.InstrEffects() - parsed_instruction_effects.ParseFromString( - pruned_instruction_effects[shared_library] - ) + serialized_effects = prune_execution_result( + context, + {shared_library: serialized_effects}, + )[shared_library] + + parsed_instruction_effects = globals.harness_ctx.effects_type() + parsed_instruction_effects.ParseFromString(serialized_effects) lib.sol_compat_fini() # Print human-readable output if parsed_instruction_effects: - encode_output(parsed_instruction_effects) + globals.harness_ctx.effects_human_encode_fn(parsed_instruction_effects) print(parsed_instruction_effects) @@ -103,82 +109,24 @@ def debug_instr( print(f"Processing {file.name}...") # Decode the file and pass it into GDB - instruction_context = read_instr(file) + instruction_context = read_context(file) assert instruction_context is not None, f"Unable to read {file.name}" debug_host(shared_library, instruction_context, gdb=debugger) -@app.command() -def minimize_tests( - input_dir: Path = typer.Option( - Path("corpus8"), - "--input-dir", - "-i", - help="Input directory containing instruction context messages", - ), - solana_shared_library: Path = typer.Option( - Path("impl/lib/libsolfuzz_agave_v2.0.so"), - "--solana-target", - "-s", - help="Solana (or ground truth) shared object (.so) target file path", - ), - output_dir: Path = typer.Option( - Path("test_results"), - "--output-dir", - "-o", - help="Output directory for test results", - ), - num_processes: int = typer.Option( - 4, "--num-processes", "-p", help="Number of processes to use" - ), -): - # Specify globals - globals.output_dir = output_dir - globals.solana_shared_library = solana_shared_library - - # Create the output directory, if necessary - if globals.output_dir.exists(): - shutil.rmtree(globals.output_dir) - globals.output_dir.mkdir(parents=True, exist_ok=True) - - # Load in and initialize shared library - lib = ctypes.CDLL(globals.solana_shared_library) - lib.sol_compat_init() - globals.target_libraries[globals.solana_shared_library] = lib - - globals.feature_pool = get_feature_pool(lib) - - num_test_cases = len(list(input_dir.iterdir())) - - minimize_results = [] - with Pool( - processes=num_processes, initializer=initialize_process_output_buffers - ) as pool: - for result in tqdm.tqdm( - pool.imap(minimize_single_test_case, input_dir.iterdir()), - total=num_test_cases, - ): - minimize_results.append(result) - - lib.sol_compat_fini() - print("-" * LOG_FILE_SEPARATOR_LENGTH) - print(f"{len(minimize_results)} total files seen") - print(f"{sum(minimize_results)} files successfully minimized") - - @app.command() def instr_from_fixtures( input_dir: Path = typer.Option( Path("fixtures"), "--input-dir", "-i", - help="Input directory containing instruction fixture messages", + help=f"Input directory containing {globals.harness_ctx.fixture_type.__name__} messages", ), output_dir: Path = typer.Option( Path("instr"), "--output-dir", "-o", - help="Output directory for instr contexts", + help=f"Output directory for {globals.harness_ctx.context_type.__name__} messages", ), num_processes: int = typer.Option( 4, "--num-processes", "-p", help="Number of processes to use" @@ -195,11 +143,11 @@ def instr_from_fixtures( test_cases = list(input_dir.iterdir()) num_test_cases = len(test_cases) - print("Converting to InstrContext...") + print(f"Converting to {globals.harness_ctx.context_type.__name__}...") results = [] with Pool(processes=num_processes) as pool: for result in tqdm.tqdm( - pool.imap(extract_instr_context_from_fixture, test_cases), + pool.imap(extract_context_from_fixture, test_cases), total=num_test_cases, ): results.append(result) @@ -215,7 +163,7 @@ def create_fixtures( Path("corpus8"), "--input-dir", "-i", - help="Input directory containing instruction context messages", + help=f"Input directory containing {globals.harness_ctx.context_type.__name__} messages", ), solana_shared_library: Path = typer.Option( Path("impl/lib/libsolfuzz_agave_v2.0.so"), @@ -227,7 +175,8 @@ def create_fixtures( [], "--target", "-t", - help="Shared object (.so) target file paths (pairs with --keep-passing)", + help="Shared object (.so) target file paths (pairs with --keep-passing)." + f" Targets must have {globals.harness_ctx.fuzz_fn_name} defined", ), output_dir: Path = typer.Option( Path("test_fixtures"), @@ -304,7 +253,8 @@ def run_tests( Path("corpus8"), "--input-dir", "-i", - help="Input directory containing instruction context or fixture messages", + help=f"Input directory containing {globals.harness_ctx.context_type.__name__}" + f" or { globals.harness_ctx.fixture_type.__name__ } messages", ), solana_shared_library: Path = typer.Option( Path("impl/lib/libsolfuzz_agave_v2.0.so"), @@ -435,13 +385,13 @@ def decode_protobuf( Path("raw_instruction_context"), "--input-dir", "-i", - help="Input directory containing instruction context messages in binary format", + help=f"Input directory containing {globals.harness_ctx.context_type.__name__} messages", ), output_dir: Path = typer.Option( Path("readable_instruction_context"), "--output-dir", "-o", - help="Output directory for base58-encoded, human-readable instruction context messages", + help=f"Output directory for base58-encoded, human-readable {globals.harness_ctx.context_type.__name__} messages", ), num_processes: int = typer.Option( 4, "--num-processes", "-p", help="Number of processes to use"