-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path爬电影网站榜单.py
38 lines (31 loc) · 1.36 KB
/
爬电影网站榜单.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import requests
import re
import prettytable as pd
import csv
for i in range(10):
url = 'https://movie.douban.com/top250?start='+str(25*i)+'&filter=' # 评分排行榜的网址
headers = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Mobile Safari/537.36'
}# 修饰请求头
response = requests.get(url, headers=headers) #发送请求 获取网站数据
result = response.text
p = re.compile(r'<li>.*?<span class="title">(?P<name>.*?)</span>.*?<br>(?P<year>.*?) '
r'.*?<span class="rating_num" property="v:average">(?P<score>.*?)</span>'
r'.*?<span>(?P<num>.*?)</span>', re.S)
# 格式化输出
table = pd.PrettyTable()
# 设置表头
table.field_names = ['电影名', '年份', '评分', '评分人数']
for it in p.finditer(result):
# 添加表数据
table.add_row([it.group('name'), it.group('year').strip(), it.group('score'), it.group('num')])
print(table)
# 以追加的形式打开文件
f = open('../../爬微博/data.csv', mode='a')
csv_write = csv.writer(f)
for it in p.finditer(result):
# 将迭代器it转换为字典
dic = it.groupdict()
dic['year'] = dic['year'].strip()
csv_write.writerow(dic.values())
print('写入完成')