Skip to content

Commit a60f008

Browse files
committed
feat(browser/captcha): support hints in captcha detection
1 parent f0321b6 commit a60f008

File tree

1 file changed

+12
-3
lines changed

1 file changed

+12
-3
lines changed

npiai/core/tool/_browser.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ async def load_page(
5050
wait_for_selector: str = None,
5151
timeout: int | None = None,
5252
force_capcha_detection: bool = False,
53+
captcha_detection_hints: str | None = None,
5354
):
5455
await self.playwright.page.goto(url)
5556

@@ -73,7 +74,7 @@ async def load_page(
7374
# await self.playwright.page.wait_for_timeout(wait)
7475

7576
if force_capcha_detection:
76-
await self.detect_captcha(ctx, return_to=url)
77+
await self.detect_captcha(ctx, return_to=url, hints=captcha_detection_hints)
7778

7879
@function
7980
async def get_text(self):
@@ -304,7 +305,12 @@ async def back_to_top(self):
304305

305306
return f"Successfully scrolled to top"
306307

307-
async def detect_captcha(self, ctx: Context, return_to: str | None = None):
308+
async def detect_captcha(
309+
self,
310+
ctx: Context,
311+
return_to: str | None = None,
312+
hints: str | None = None,
313+
):
308314
url = await self.get_page_url()
309315
screenshot = await self.get_screenshot(full_page=True, max_size=(1280, 720))
310316

@@ -350,13 +356,16 @@ async def handle_captcha(captcha_type: Literal["none", "captcha", "login"]):
350356
ChatCompletionSystemMessageParam(
351357
role="system",
352358
content=dedent(
353-
"""
359+
f"""
354360
You are given a screenshot of a webpage. Determine if a captcha or login form with input fields is present in the screenshot. If a captcha is present, call the tool with the argument "captcha". If a login form is present, call the tool with the argument "login". If neither is present, call the tool with the argument "none".
355361
356362
NOTE:
357363
- Popups like cookie consent banners should not be considered as login forms.
358364
- The login action can be ignored if the main content is visible in the screenshot.
359365
- Login button that triggers a login form should not be considered as login forms. Only visible login forms with input fields should be considered.
366+
367+
HINTS:
368+
{hints}
360369
"""
361370
),
362371
),

0 commit comments

Comments
 (0)