forked from eye9poob/python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathuser-scrapper.py
145 lines (100 loc) · 3.33 KB
/
user-scrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/usr/bin/python
# -*- coding: utf-8 -*-
# by ..:: crazyjunkie ::.. 2014
# Chaturbate username scrapper (user-scrapper.py)
import re
import Queue
import sys
import requests
import threading
from time import sleep
payload = {'sort_by': 'a', 'private': 'false', 'roomname': None }
performers = list()
user_list = list()
cookies = {"csrftoken" : None,
"cbv_vol" : "7",
"cbv_mute" : "0",
"cbv_scale" : "0",
"agreeterms" : "1",
"affkey" : None,
"agreeterms" : "1"}
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/20100101 Firefox/30.0',
"Accept-Language" : "en-US,en;q=0.5",
"Content-Type" : "application/x-www-form-urlencoded; charset=UTF-8",
"X-CSRFToken" : None,
"Referer" : None,
"DNT" : "1",
"Connection" : "keep-alive",
"Pragma" : "no-cache",
"Cache-Control" : "no-cache"
}
#### Define basic bot with api call####
def bot(queue):
while True:
item = queue.get()
payload['roomname'] = item
### Send api request here ####
try:
print "\nSending Request: " + item
r2 = requests.post("http://chaturbate.com/api/getchatuserlist/", data = payload, cookies = cookies, headers = headers, timeout = 5)
room_users = re.findall(",(.+?)\|", r2.text)
user_list.extend(room_users)
except:
print "\nConnection Error getting userlist: " + item + "\n"
sleep(2)
queue.task_done()
def main_loop():
queue = Queue.Queue()
#### Create our multi threads - 5 of them ####
for i in range(1,6):
t = threading.Thread(target=bot,args=(queue,))
t.daemon = True
t.start()
print "Bot", i, " created"
#### Add performers to queue ####
print "Bots Created successfully. Adding performers to queue."
sleep(2)
for name in performers:
queue.put(name)
#### Wait for bots to finish ####
queue.join()
print "Bots have finished"
#### Get list of performers ####
try:
for x in range(1,6):
url = str("http://chaturbate.com/?page=" + str(x))
print "Getting request from", url
r1 = requests.get(url , headers = headers)
regex = "alt=\"(.+?)'s"
page_performers = re.findall("alt=\"(.+?)'s", r1.text)
performers.extend(page_performers)
except:
print "Connection Error"
sys.exit()
performers = sorted(set(performers))
print len(performers)," performers found"
#### Find csrf token and apply it to header and cookies ####
try:
headers["X-CSRFToken"] = re.search("ken=(.+) for", str(r1.cookies)).group(1)
cookies["csrftoken"] = re.search("ken=(.+) for", str(r1.cookies)).group(1)
print "CSRF token successfully found and applied to Cookies and Header"
sleep(2)
except AttributeError:
print "Connection Error - No Token found"
sys.exit()
main_loop()
#### Sort master list ####
print "Sorting and removing duplicates:"
user_list = sorted(set(user_list))
print len(user_list),"unique usernames extracted"
sleep(2)
#### Save list to file user.txt ####
try:
print "Writing to users.txt"
with open("users.txt", 'w') as f:
for name in user_list:
f.write(name + '\n')
print "Finished"
except:
print "Error Writing File"
sys.exit()