-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
264 lines (210 loc) · 7.69 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from terminal_msg import *
from notifier import notify_users
import json
import requests
import os
all_notifications = []
def get_links(file: str = None):
'''
description:
get links from the saved file. returns data in
python dictionary if data is in valid json
format else prints error message and exits the
program.
params:
file (str): path of the saved file
returns:
dict
'''
if not os.path.exists(file):
error("Links File not found!")
exit(2)
else:
success(f"Using {file} file for links.")
try:
with open(file, 'r') as f:
data = json.loads(f.read())
return data
except json.JSONDecodeError:
error("Json file is in Invalid format.")
exit(2)
def create_dir(path: str):
'''
description:
creates a directory if not present else
prints directory already exists message.
params:
path (str): path of the directory to be created
returns:
None
'''
if not os.path.exists(path):
success(f"{path} directory created.")
os.makedirs(path)
else:
info(f"{path} Already Exists.")
def dump_dict_data(path: str, data: dict, indent: int = 4):
'''
description:
dumps json data to a file with default indent as 4,
raises exception if json is in invalid data format
and exits the program with error code 2.
params:
path (str): path of the directory to be created
data (dict): json data to be stored in the file in dict
indent (int): json indentation
returns:
None
'''
if os.path.exists(path):
info(f"Overwriting {path} file with new data.")
else:
info(f"Writing data to {path}")
try:
with open(path, 'w') as f:
f.write(json.dumps(data, indent=indent))
except json.JSONDecodeError:
error(f"Invalid Json Formatted data.")
exit(2)
def url_to_json_fname(url: str):
'''
description:
converts url into a valid filename and returns
filename as a string.
params:
url (str): url to be converted into a filename
returns:
str
'''
return f'{url.replace("http://","").replace("https://","").replace("/","-")}.json'
def get_last_page(url: str):
'''
description:
extract last page information from the html page.
params:
url (str): url of the page
returns:
int
'''
# get html data
html_doc = requests.get(url).content.decode('utf-8')
# create soup obj using html parser
soup = BeautifulSoup(html_doc, 'html.parser')
# extract last notification page
last_page = int(soup.find('li', {"class": "last"}).find(
'a').get('href').split('?')[-1].split('=')[-1])
return last_page
def get_latest_notifications(url: str):
'''
description:
extract latest notifications from the html page
and stores it into `all_notifications` global
variable.
params:
url (str): url of the page
returns:
None
'''
# get html data
html_doc = requests.get(url).content.decode('utf-8')
# create soup obj using html parser
soup = BeautifulSoup(html_doc, 'html.parser')
content_block = soup.find('div', id='content')
# get notifications from the content block
notifications = content_block.find(
'div', {"class": "item-list"}).find_all('li')
# extract notification and store it into `all_notifications`
for notification in notifications:
a_notification = notification.find('a')
all_notifications.append(
[urljoin(url, a_notification.get('href')), a_notification.contents[0]])
def get_saved_notifications(file_path: str):
'''
description:
returns saved notifications from the json file
in dictionary format. if json data is invalid
or file does not exists then returns False.
params:
file_path (str): path of the saved notifications file
returns:
dict | False
'''
if not os.path.exists(file_path):
warn(f"{file_path} saved notifications file not found.")
return False
info(f"{file_path} saved notifications found.")
try:
with open(file_path, 'r') as f:
data: dict = json.loads(f.read())
return data
except json.JSONDecodeError:
error(f"{file_path} json data is in invalid format.")
return False
def main(csv_file: str, sender_mail: str, sender_passwd: str):
'''
description:
starts the app.
params:
sender_mail (str): sender gmail address
sender_passwd (str): sender gmail app password
returns:
None
'''
# basic conf
saved_file_path = 'notification_data/www.india.gov.in-my-government-schemes.json'
data_dir = os.path.join(os.getcwd(), 'notification_data')
base_link = "https://www.india.gov.in/my-government/schemes"
# create directory to save data
create_dir(data_dir)
# get saved notifications data from file
saved_notifications = get_saved_notifications(saved_file_path)
# get last page from website
last_page = int(get_last_page(base_link))
# append all the notifications from the page into
# all_notifications list
for page_no in range(last_page+1):
page_link = urljoin(base_link, f'?page={page_no}')
get_latest_notifications(page_link)
success(f"{page_link} notifications loaded.")
# extract new notifications
if saved_notifications:
new_notifications = []
saved_notifications = saved_notifications["urls"]
for notification in all_notifications:
if notification not in saved_notifications:
new_notifications.append(notification)
# save new schemes notification to the file
dump_dict_data(os.path.join(data_dir, 'new_notifications.json'), {
"urls": new_notifications})
# inform users about the new schemes if available
# else inform them about all the available schemes
if len(new_notifications) != 0:
info("Informing users about the new schemes.")
notify_users(csv_file=csv_file, notifications=new_notifications,
sender_email=sender_mail, sender_passwd=sender_passwd)
else:
info(
"No new schemes were published. Informing them about the all the schemes available.")
notify_users(csv_file=csv_file, notifications=all_notifications,
sender_email=sender_mail, sender_passwd=sender_passwd)
else:
info("Scraping data for the first time. Informing Users about all currently available schemes.")
notify_users(csv_file=csv_file, notifications=all_notifications,
sender_email=sender_mail, sender_passwd=sender_passwd)
dump_dict_data(os.path.join(data_dir, url_to_json_fname(base_link)), {
"urls": all_notifications})
if __name__ == '__main__':
colorize.cprint('='*35, use_default=False, fgcolor='CYAN', style='BOLD')
colorize.cprint('Govt. Scheme Notification System',
use_default=False, fgcolor='GREEN', style='BOLD')
colorize.cprint('='*35, use_default=False, fgcolor='CYAN', style='BOLD')
colorize.cprint("Written By", use_default=False)
colorize.cprint('dmdhrumilmistry', use_default=False,
fgcolor='YELLOW', style='BOLD')
colorize.cprint('-'*35, use_default=False, fgcolor='CYAN', style='BOLD')
print()
csv_file = 'users.csv'
main(csv_file, sender_mail='yourmail@gmail.com',
sender_passwd='your_app_password')