Skip to content

Commit 5c8edcd

Browse files
committed
test(scraper/incremental): print matched and unmatched hashes
1 parent 369934c commit 5c8edcd

File tree

1 file changed

+4
-0
lines changed

1 file changed

+4
-0
lines changed

npiai/tools/web/scraper/__test__/incremental.py

+4
Original file line numberDiff line numberDiff line change
@@ -43,16 +43,20 @@ async def summarize(skip_item_hashes: Set[str] | None = None):
4343
start = time.monotonic()
4444
count = 0
4545
hashes = set()
46+
matched_hashes = set()
4647

4748
async for chunk in stream:
4849
count += len(chunk["items"])
4950
print("Chunk:", json.dumps(chunk, indent=2))
51+
matched_hashes.update(chunk["matched_hashes"])
5052

5153
for item in chunk["items"]:
5254
hashes.add(item["hash"])
5355

5456
end = time.monotonic()
5557
print(f"Summarized {count} items in {end - start:.2f} seconds")
58+
print("Matched hashes:", matched_hashes)
59+
print("Unmatched hashes:", hashes - matched_hashes)
5660

5761
return hashes
5862

0 commit comments

Comments
 (0)