-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtender_search.py
125 lines (111 loc) · 5.1 KB
/
tender_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
from scrapybara import Scrapybara
from playwright.async_api import async_playwright
import time
import base64
from dotenv import load_dotenv
import os
from markdown_to_pdf import create_tender_pdf
load_dotenv()
async def perform_tender_search(search_term, external_ip, scrapy):
# Initialize the client
client = Scrapybara(
api_key=scrapy, timeout=200.0)
instance = client.start(instance_type="small")
print(f"Instance {instance.id} is running")
cdp_url = instance.browser.start().cdp_url
p = await async_playwright().start()
browser = await p.chromium.connect_over_cdp(cdp_url)
# Create a new context with proxy
context = await browser.new_context(
proxy={
"server": f"http://{external_ip}:3128",
"username": os.getenv("PROXY_USERNAME"),
"password": os.getenv("PROXY_PASSWORD"),
},
ignore_https_errors=True,
)
page = await context.new_page()
print("done onto next")
await page.goto("https://tender.nprocure.com", timeout=60000)
print("done onto next")
time.sleep(2)
# Use the search term provided
response = instance.agent.act(
cmd=f"first press esc because our focus will be stuck on search bar then Use SEARCH on the site, select ‘{
search_term}’ under Client Name, then press search.",
include_screenshot=True, # Optional: include screenshot in response
model="claude" # Optional: specify model (defaults to claude)
)
print("search done")
time.sleep(10)
schema = {
"tenders": [ # A list of tenders
{
# Sub-department name (top-right in the tender brief)
"sub_department": "string",
"name_of_work": "string", # The name of the work
"tender_id": "string", # The tender ID
"estimated_contract_value": "string", # Estimated Contract Value
"submission_deadline": "string", # Last Date & Time for Submission
}
]
}
response = instance.agent.scrape(
cmd="Extract all tender details from the search results page. For each tender, gather the following information: sub-department, name of work, tender ID, estimated contract value, and submission deadline. If multiple tenders are listed, ensure you extract all of them. Scroll down to view additional tenders until you reach the “Next Page” button. Continue extracting tenders until you either find 4 or more tenders or reach the bottom of the results where fewer than 10 tenders are available. Stop extracting if there are fewer than 4 tenders on the final page.",
schema=schema,
include_screenshot=True,
model="claude"
)
# Access the scraped data
data = response.data # List of dictionaries with tender details
screenshot = response.screenshot # Optional: Use for debugging
print(data)
formatted_data = "\n".join(
f"Tender ID: {tender['tender_id']}, Name of Work: {
tender['name_of_work']}, "
f"Estimated Contract Value: {tender['estimated_contract_value']}, Submission Deadline: {
tender['submission_deadline']}"
for tender in data["tenders"]
)
# Command to ask the agent to write a report
response = instance.agent.act(
cmd=(
f"Based on the following tender data:\n{formatted_data}\n\n"
"Write a detailed report that identifies the suitable contractor type for each tender based on the 'Name of Work' in text file.\n"
"Write the report in markdown format with the following guidelines:\n"
"Follow this format for the report:\n\n"
"# <good tittle that describe this report>\n\n"
"### Tender ID: <Tender ID>\n"
"### Estimated Contract Value: <Estimated Contract Value>\n"
"- **Suitable Contractor**: <Type of Contractor>\n"
"- **Explanation**:\n"
" <Brief explanation of the work and expertise required>\n\n"
"Ensure the final output follows this format."
"- Use **bold styling** for headings and key terms (e.g., **Tender ID**, **Suitable Contractor**).\n"
"- Ensure good formatting with new lines for readability.\n\n"
"then save the file at home/scrapybara/Report.txt"
),
model="claude",
include_screenshot=False # Optional
)
# Output the agent's report
report = response.output
print(report)
# Download a file from the instance
response = instance.file.download(
path="/home/scrapybara/Report.txt"
)
downloaded_content = response.content
# The base64-encoded content
encoded_content = response.content
# Decode the base64 content
decoded_content = base64.b64decode(encoded_content)
# Save it to a file
# Ensure proper cleanup
await context.close()
await browser.close()
await p.stop()
instance.stop()
print(decoded_content.decode('utf-8'))
return create_tender_pdf(decoded_content.decode('utf-8'), "output.pdf")
# Remember to call this function with await and from an asynchronous context