-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathui_china.py
53 lines (47 loc) · 1.59 KB
/
ui_china.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# -*- coding: utf-8 -*-
"""
model: baidu_image_url get
"""
import requests
import os
import re
import urllib.parse
import get_img
try:
word = input('请选择搜索关键词 :')
val = {}
val['keywords'] = word
word = urllib.parse.urlencode(val)
except Exception as e:
word = 'word=python'
headers = {'Accept': '*/*',
'Accept-Language': 'zh-CN,en;q=0.8',
'Cache-Control': 'max-age=0',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36',
'Connection': 'keep-alive',
'Referer': 'http://www.baidu.com/'
}
def get_url(page, words):
base_url = 'http://s.ui.cn/index.html?p=%d&%s&type=project' % (page, words)
response = requests.get(base_url, headers)
try:
content = response.content.decode("utf-8", "ignore")
except UnicodeDecodeError as s:
content = response.content
os.chdir(r'/home/coding/workspace/PyWeb') # TODO: 此处路径请自行修改
f = open('http.txt', 'a', encoding='utf8')
try:
res = re.findall('data-original="http[s]?://[^\s]*\.[jpg]{3}|[png]{3}"\s$', content)
except Exception as e:
res = re.findall('data-original="http[s]?://[^\s]*\.[jpg]{3}|[png]{3}"\s$', str(content, "gb2312", "ignore"))
for i in res:
f.write(i[15:] + '\n')
f.close()
# TODO: 失败,存在Cookie 检测
for i in range(1, 11):
get_url(i, word)
state = input('Do You Want Download Now ? : (y/n) ')
if state.lower() == 'y':
get_img.get_file()
else:
print('Press Enter To Exit : ')