@@ -81,12 +81,17 @@ class Scraper(BrowserTool):
81
81
82
82
_navigator : NavigatorAgent
83
83
84
+ # asyncio lock to prevent concurrent access to the webpage
85
+ # to avoid retrieving the same items multiple times
86
+ _webpage_access_lock : asyncio .Lock
87
+
84
88
def __init__ (self , batch_size : int = 10 , ** kwargs ):
85
89
super ().__init__ (** kwargs )
86
90
self ._navigator = NavigatorAgent (
87
91
playwright = self .playwright ,
88
92
)
89
93
self ._batch_size = batch_size
94
+ self ._webpage_access_lock = asyncio .Lock ()
90
95
self .add_tool (self ._navigator )
91
96
92
97
@classmethod
@@ -392,13 +397,14 @@ async def _parse(
392
397
limit : int = - 1 ,
393
398
skip_item_hashes : Set [str ] | None = None ,
394
399
) -> ParsedResult | None | None :
395
- # convert relative links to absolute links
396
- await self ._process_relative_links ()
400
+ async with self ._webpage_access_lock :
401
+ # convert relative links to absolute links
402
+ await self ._process_relative_links ()
397
403
398
- if items_selector is None :
399
- return await self ._parse_ancestor (ancestor_selector , skip_item_hashes )
400
- else :
401
- return await self ._parse_items (items_selector , limit , skip_item_hashes )
404
+ if items_selector is None :
405
+ return await self ._parse_ancestor (ancestor_selector , skip_item_hashes )
406
+ else :
407
+ return await self ._parse_items (items_selector , limit , skip_item_hashes )
402
408
403
409
async def _parse_items (
404
410
self ,
0 commit comments