forked from MariuszKu/analiza-duckdb-mieszkania
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnbp_gold.py
58 lines (47 loc) · 2.14 KB
/
nbp_gold.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from datetime import datetime, timedelta
def last_day_of_month(date):
next_month = date.replace(day=28) + timedelta(days=4) # Move to the next month
return next_month - timedelta(days=next_month.day)
def generate_last_days_of_months(start_date, end_date):
date_list = []
current_date = datetime.strptime(start_date, "%d-%m-%Y")
end_date = datetime.strptime(end_date, "%d-%m-%Y")
while current_date <= end_date:
last_day = last_day_of_month(current_date) - timedelta(days=1)
date_list.append(last_day.strftime("%Y-%m-%d"))
last_day = last_day_of_month(current_date) - timedelta(days=2)
date_list.append(last_day.strftime("%Y-%m-%d"))
last_day = last_day_of_month(current_date)
date_list.append(last_day.strftime("%Y-%m-%d"))
current_date = last_day + timedelta(days=1)
return date_list
start_date = "01-01-2006"
end_date = "31-12-2012"
last_days_of_months = generate_last_days_of_months(start_date, end_date)
# instantiate options
options = webdriver.ChromeOptions()
# run browser in headless mode
options.add_argument("--headless=new")
# instantiate driver
service = Service()
driver = webdriver.Chrome(service=service, options=options)
with open("data/gold_2006_2012.csv","a+") as file:
for last_day in last_days_of_months:
url = f'https://nbp.pl/cena-zlota-archiwum/cena-zlota-z-dnia-{last_day}/'
# get the entire website content
driver.get(url)
# select elements by class name
elements = driver.find_elements(By.CLASS_NAME, 'section__single')
for title in elements:
# select H2s, within element, by tag name
#print(title)
#heading = title.find_element(By.TAG_NAME, 'td').text
heading = title.find_element(By.TAG_NAME, 'tbody').text
# print H2s
heading = heading.replace(" ",",")
file.write(f"{heading}\n")
print(heading)