-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchooseproxy.py
executable file
·46 lines (38 loc) · 1.96 KB
/
chooseproxy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
# #
# (c) Stefano Polloni #
# #
# Created: 06/02/2016 #
# Updated: 06/02/2016 #
# #
# Description: This function takes as an argument the pool worker #
# ID (integer), assigns the worker-specific proxy, #
# and returns the proxy port to use for scraping. #
# #
#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
import csv
from random import seed, randint, choice, sample
proj = '/Users/stefanopolloni/GoogleDrive/Year3/'
dirc = 'congestion_value/portland/dofiles/scrape_realestate/mesh/ua/'
path = proj + dirc
def match_id(w_id,w):
seed(w_id*randint(1000, 1500) - randint(10, 20)*w_id**2)
if w_id in range(1,500,4):
proxy = choice(['ch','us-fl'])
with open(path + 'useragents_1.csv', 'rU') as csvfile:
lines = list(csv.reader(csvfile))
elif w_id in range(2,500,4):
proxy = choice(['us-dc','us'])
with open(path + 'useragents_2.csv', 'rU') as csvfile:
lines = list(csv.reader(csvfile))
elif w_id in range(3,500,4):
proxy = choice(['us-il','ri'])
with open(path + 'useragents_3.csv', 'rU') as csvfile:
lines = list(csv.reader(csvfile))
else:
proxy = choice(['us-il','ri','us-dc','us','ch','us-fl'])
with open(path + 'useragents_4.csv', 'rU') as csvfile:
lines = list(csv.reader(csvfile))
ua = sample(lines, 1)[0][0]
ua_ind = sample(lines, 1)[0][1]
return proxy, ua, ua_ind