Skip to content

Commit

Permalink
benchmark: update cfg results
Browse files Browse the repository at this point in the history
  • Loading branch information
cdump committed Feb 25, 2025
1 parent 04b1bcb commit 8a0eb90
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 22 deletions.
33 changes: 17 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -420,35 +420,36 @@ $ cast selectors --resolve $(cast code 0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc
<td><a href="benchmark/providers/evmole-rs"><b><i>evmole</i></b></a></td>
<td><a href="benchmark/providers/ethersolve"><b><i>ethersolve</i></b></a></td>
<td><a href="benchmark/providers/evm-cfg"><b><i>evm-cfg</i></b></a></td>
<td><a href="benchmark/providers/heimdall-rs"><b><i>heimdall</i></b></a></td>
<td><a href="benchmark/providers/evm-cfg-builder"><b><i>evm-cfg-builder</i></b></a></td>
<td><a href="benchmark/providers/sevm"><b><i>sevm</i></b></a></td>
<td><a href="benchmark/providers/heimdall-rs"><b><i>heimdall-rs</i></b></a></td>
<td><a href="benchmark/providers/evm-cfg-builder"><b><i>evm-cfg-builder</i></b></a></td>
</tr>
<tr>
<td><i>Total Blocks</i></td>
<td>97.0%🥇<br><sub>661957 </sub></td>
<td>93.7%<br><sub>639155</sub></td>
<td>63.0%<br><sub>429860</sub></td>
<td>31.9%<br><sub>217922</sub></td>
<td>21.7%<br><sub>148162</sub></td>
<td>6.7%<br><sub>45831</sub></td>
<td><i>Basic Blocks</i></td>
<td>97.0% 🥇<br><sub>661959</sub></td>
<td>93.7%<br><sub>639175</sub></td>
<td>63.0%<br><sub>430011</sub></td>
<td>41.4%<br><sub>282599</sub></td>
<td>31.9%<br><sub>217924</sub></td>
<td>21.7%<br><sub>148166</sub></td>
</tr>
<tr>
<td><i>False Negatives</i></td>
<td>3.0%🥇<br><sub>20484</sub></td>
<td>6.3%<br><sub>43286</sub></td>
<td>37.0%<br><sub>252581</sub></td>
<td>68.1%<br><sub>464519</sub></td>
<td>78.3%<br><sub>534279</sub></td>
<td>93.3%<br><sub>636610</sub></td>
<td>3.0% 🥇<br><sub>20482</sub></td>
<td>6.3%<br><sub>43266</sub></td>
<td>37.0%<br><sub>252430</sub></td>
<td>58.6%<br><sub>399842</sub></td>
<td>68.1%<br><sub>464517</sub></td>
<td>78.3%<br><sub>534275</sub></td>
</tr>
<tr>
<td><i>Time</i></td>
<td>34s</td>
<td>1202s</td>
<td>40s</td>
<td>42s</td>
<td>206s</td>
<td>308s</td>
<td>41s</td>
</tr>
</table>

Expand Down
68 changes: 64 additions & 4 deletions benchmark/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,27 +319,38 @@ def process_flow(dname: str, providers: list[str], results_dir: str) -> dict:
pdata, ptimes = load_data('flow', dname, providers, results_dir)
results = []

total_gt_blocks = 0
for fname, (_, reference_data) in gt_blocks[0].items():
provider_stats = []

# for debug: skip file if any provider output 0 results - probably timeout or other error
# if any(len(provider_data[fname][1]) == 0 for provider_data in pdata):
# continue

# start of every block - not edges
ground_truth = set(b[0] for b in reference_data)
total_gt_blocks += len(ground_truth)

for provider_data in pdata:
curr_data = provider_data[fname][1]
curr_edges = flow_filter_reachable(curr_data)
curr_blocks = {node for edge in curr_edges for node in edge}
curr_blocks.add(0) # entrypoint block always exists
total_blocks = len(curr_blocks)
extra_blocks = curr_blocks - ground_truth
missing_blocks = ground_truth - curr_blocks

# debug:
if len(ground_truth) < 100 and len(missing_blocks) > 0:
print(fname, extra_blocks, missing_blocks)
provider_stats.append((total_blocks, extra_blocks, missing_blocks))

results.append({
'addr': fname[2:-5], # '0xFF.json' => 'FF'
'results': provider_stats,
})

return {'dataset': dname, 'results': results, 'timings': ptimes}
return {'dataset': dname, 'results': results, 'timings': ptimes, 'total_gt_blocks': total_gt_blocks}

def show_flow(providers: list[str], all_results: list, show_errors: bool):
for dataset_result in all_results:
Expand All @@ -349,7 +360,8 @@ def show_flow(providers: list[str], all_results: list, show_errors: bool):
extra_blocks_cnt = sum(len(y['results'][provider_idx][1]) for y in dataset_result['results'])
missing_blocks_cnt = sum(len(y['results'][provider_idx][2]) for y in dataset_result['results'])

print(f'dataset {dataset_result["dataset"]} ({cnt_contracts} contracts), {name}:')
total_gt_blocks = dataset_result['total_gt_blocks']
print(f'dataset {dataset_result["dataset"]} ({cnt_contracts} contracts, {total_gt_blocks} blocks), {name}:')
print(f' time: {format_time(dataset_result["timings"][provider_idx])}')
print(f' blocks: {total_blocks_cnt}')
print(f' False Positive: {extra_blocks_cnt} blocks')
Expand All @@ -367,6 +379,51 @@ def show_flow(providers: list[str], all_results: list, show_errors: bool):
print(f' FP : {fp}')
print(f' FN : {fn}')

def markdown_flow(providers: list[str], all_results: list):
assert len(all_results) == 1, 'only 1 dataset supported'
dataset_result = all_results[0]

total_gt_blocks = dataset_result['total_gt_blocks']
cnt_contracts = len(dataset_result['results'])
print(f'dataset {dataset_result["dataset"]}, {cnt_contracts} contracts, {total_gt_blocks} blocks')

print('<table>')
print(' <tr>')
print(' <td>Dataset</td>')
print(' <td></td>')
for name in providers:
print(f' <td><a href="benchmark/providers/{name}/"><b><i>{name}</i></b></a></td>')
print(' </tr>')

print(' <tr>')
print(' <td><i>Basic Blocks</i></td>')
for provider_idx in range(len(providers)):
total_blocks_cnt = sum(y['results'][provider_idx][0] for y in dataset_result['results'])
print(f' <td>{(total_blocks_cnt*100/total_gt_blocks):.1f}%<br><sub>{total_blocks_cnt}</sub></td>')
print(' </tr>')

print(' <tr>')
print(' <td><i>False Negatives</i></td>')
for provider_idx in range(len(providers)):
missing_blocks_cnt = sum(len(y['results'][provider_idx][2]) for y in dataset_result['results'])
print(f' <td>{(missing_blocks_cnt*100/total_gt_blocks):.1f}%<br><sub>{missing_blocks_cnt}</sub></td>')
print(' </tr>')

print(' <tr>')
print(' <td><i>False Positives</i></td>')
for provider_idx in range(len(providers)):
extra_blocks_cnt = sum(len(y['results'][provider_idx][1]) for y in dataset_result['results'])
print(f' <td>{(extra_blocks_cnt*100/total_gt_blocks):.1f}%<br><sub>{extra_blocks_cnt}</sub></td>')
print(' </tr>')

print(' <tr>')
print(' <td><i>Time</i></td>')
for ts in dataset_result["timings"]:
print(f' <td>{format_time(ts)}</td>')
print(' </tr>')
print('</table>')


def show_arguments_or_mutability(providers: list[str], all_results: list, show_errors: bool):
for dataset_result in all_results:
cnt_contracts = len(dataset_result['results'])
Expand Down Expand Up @@ -425,7 +482,7 @@ def show_arguments_or_mutability(providers: list[str], all_results: list, show_e
},
'flow': {
'datasets': ['largest1k'],
'providers': ['evmole-rs', 'evm-cfg', 'ethersolve', 'sevm', 'evm-cfg-builder', 'heimdall-rs']
'providers': ['evmole-rs', 'ethersolve', 'evm-cfg', 'sevm', 'heimdall-rs', 'evm-cfg-builder']
}
}

Expand Down Expand Up @@ -473,4 +530,7 @@ def show_arguments_or_mutability(providers: list[str], all_results: list, show_e

elif cfg.mode == 'flow':
results = [process_flow(d, cfg.providers, cfg.results_dir) for d in cfg.datasets]
show_flow(cfg.providers, results, cfg.show_errors)
if cfg.markdown:
markdown_flow(cfg.providers, results)
else:
show_flow(cfg.providers, results, cfg.show_errors)
4 changes: 2 additions & 2 deletions benchmark/providers/evmlisa/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ FROM docker.io/gradle:jdk23
WORKDIR /app
RUN apt-get update && apt-get install -y git

# TMP branch without pinning
RUN git clone https://github.com/lisa-analyzer/evm-lisa && cd evm-lisa && git checkout basic-blocks && gradle shadowJar
# 24 feb 2025 commit
RUN git clone https://github.com/lisa-analyzer/evm-lisa && cd evm-lisa && git checkout f12cc46d6a87de6c5d553273d841c6d35564b4cd && gradle shadowJar

# COPY evm-lisa-all.jar /app/
RUN mv ./evm-lisa/build/libs/evm-lisa-all.jar ./
Expand Down

0 comments on commit 8a0eb90

Please sign in to comment.