forked from githubmaidou/tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbingC.py
executable file
·75 lines (63 loc) · 2.27 KB
/
bingC.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import requests
import re
import time
import json
import sys
class bingSearch:
def __init__(self):
self.num = -1
self.proxys = self.githubproxy()
def githubproxy(self):
proxy = []
s = requests.get("http://proxylist.nslookup.site/proxylist.json")
t = s.text.split('\n')[:-1]
for p in t:
e = json.loads(p)
proxy.append(e['host'].strip() + ':' + str(e['port']).strip())
return proxy
def get_proxy(self):
self.num = self.num + 1
ip = self.proxys[self.num]
proxy = {'http':'http://' + ip,'https':'https://'+ip}
return proxy
def get_urls(self,ip):
self.domains = []
self.headers = {}
self.headers['User-Agent'] = 'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'
self.req = requests.Session()
r = self.req.get("https://www.bing.com/",headers=self.headers)
bing_url = "https://www.bing.com/search?q=ip%%3A%s" % ip
arr = self.urls(bing_url)
pages = arr[-1]
urls = arr[0]
for n in pages:
bing_url = "http://www.bing.com" + n[0]
bing_url = bing_url.replace('&','&')
arr = self.urls(bing_url)
p = arr[-1]
u = arr[0]
self.domains = self.domains + u
for i in p:
i = i[0]
if i.replace('format=rss&','') not in pages and 'PQRE' not in i:
pages.append(i)
self.domains = list(set(self.domains))
for u in self.domains:
print(u)
def urls(self,url):
html = self.req.get(url,timeout=10,verify=False)
html = html.text
while (html.find('Ref A') > -1):
proxy=self.get_proxy()
try:
html = self.req.get(url,timeout=10,verify=False,proxies=proxy).text
except:
html = 'Ref A'
urls = re.findall(r"<cite>(.*?)<",html)
pages = self.get_pages(html)
return (urls,pages)
def get_pages(self,html):
pages = re.findall(r"href=\"(/search.+?PERE(|\d))\"",html)
return pages
s = bingSearch()
s.get_urls(sys.argv[1])