We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 369934c commit 5c8edcdCopy full SHA for 5c8edcd
npiai/tools/web/scraper/__test__/incremental.py
@@ -43,16 +43,20 @@ async def summarize(skip_item_hashes: Set[str] | None = None):
43
start = time.monotonic()
44
count = 0
45
hashes = set()
46
+ matched_hashes = set()
47
48
async for chunk in stream:
49
count += len(chunk["items"])
50
print("Chunk:", json.dumps(chunk, indent=2))
51
+ matched_hashes.update(chunk["matched_hashes"])
52
53
for item in chunk["items"]:
54
hashes.add(item["hash"])
55
56
end = time.monotonic()
57
print(f"Summarized {count} items in {end - start:.2f} seconds")
58
+ print("Matched hashes:", matched_hashes)
59
+ print("Unmatched hashes:", hashes - matched_hashes)
60
61
return hashes
62
0 commit comments