-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathMiniCrawler.py
51 lines (45 loc) · 1.39 KB
/
MiniCrawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import re
import threading
import time
import sys
from colorama import Fore
import Requester
from urllib.parse import urljoin
result = []
link_re = re.compile(r'href="(.*?)"')
dirs = []
host=""
threads = []
def crawl (url, host):
try:
if host=="" :
host= url
req = Requester.RequestUrl('9050','','no',url.strip())
if (req.status_code!=200):
return []
links = link_re.findall(req.text)
url=url.strip()
for l in links:
exp = re.findall('/([^/]+\.(?:jpg|gif|png|pdf|css|js|zip|doc|docx|rar))', l)
if (l ==url) or l in set(dirs): continue
#if "http" in l : continue
if (host in l ==False) :uri = urljoin(host,l)
else:uri=l
if uri in set(result) or len(exp) > 0: continue
result.append(uri)
print(uri)
dirs.append(l)
t = threading.Thread(target=crawl, args=(uri,host,))
threads.append(t)
try:
try:
t.start()
time.sleep(0.1)
except:
time.sleep(0.2)
except (KeyboardInterrupt, SystemExit):
print(Fore.RED, " [-] Ctrl-c received! Sending kill to threads...")
for t in threads:
t.kill_received = True
sys.exit()
except:return []