This repository was archived by the owner on Aug 27, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsearch.py
121 lines (108 loc) · 4.09 KB
/
search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from ast import arg
from cmath import sin
import re
import json
import argparse
from datetime import datetime
from urllib.parse import quote
from numpy import source
from core.base_scraper import BaseScraper
class Search(BaseScraper):
cursor_re = re.compile('"(scroll:[^"]*)"')
users = []
base_url = (
f'https://twitter.com/i/api/2/search/adaptive.json?'
f'include_profile_interstitial_type=1'
f'&include_blocking=1'
f'&include_blocked_by=1'
f'&include_followed_by=1'
f'&include_want_retweets=1'
f'&include_mute_edge=1'
f'&include_can_dm=1'
f'&include_can_media_tag=1'
f'&skip_status=1'
f'&cards_platform=Web-12'
f'&include_cards=1'
f'&include_ext_alt_text=true'
f'&include_quote_count=true'
f'&include_reply_count=1'
f'&tweet_mode=extended'
f'&include_entities=true'
f'&include_user_entities=true'
f'&include_ext_media_color=true'
f'&include_ext_media_availability=true'
f'&send_error_codes=true'
f'&simple_quoted_tweet=true'
f'&query_source=typed_query'
f'&pc=1'
f'&spelling_corrections=1'
f'&ext=mediaStats%2ChighlightedLabel'
f'&count=20'
f'&tweet_search_mode=live'
) + '&q={query}'
def __init__(self, query, username, limit, year, since, until):
super().__init__(limit)
self.query = query
self.judul = query # menambah variabel judul
if username:
self.query += f" from:{username}"
if year:
self.query += f" until:{year}"
if since:
self.query += f" since:{since}"
if until:
self.query += f" until:{until}"
def get_url(self, url, query, cursor=None):
if cursor:
return url.format(query=quote(query), cursor=quote(cursor))
else:
return url.format(query=quote(query))
def start_request(self, cursor=None):
super().start_request(cursor)
if cursor:
self.base_url += '&cursor={cursor}'
cur_url = self.get_url(
self.base_url,
self.query,
cursor
)
else:
cur_url = self.get_url(
self.base_url,
self.query
)
r = self.request(cur_url)
print(r.text)
try:
cur_cursor = self.cursor_re.search(r.text).group(1)
res_json = r.json()
cur_tweets = res_json['globalObjects']['tweets']
cur_users = res_json['globalObjects']['users']
for t in cur_tweets:
self.tweets.append(cur_tweets[t])
for u in cur_users:
self.users.append(cur_users[u])
except:
cur_cursor = cursor
print(f'Tweet in memory = {len(self.tweets)}')
print(f'Limit = {self.limit}')
print(f'cursor = {cursor}, next cursor = {cur_cursor}')
print('---------------------------------------------------------')
if (cur_cursor != cursor and self.limit == 0) or len(self.tweets)<self.limit:
self.start_request(cur_cursor)
else:
date_now = datetime.now().strftime("%H-%M-%S %d-%m-%Y")
with open(self.settings.get('DATA_DIR') / f'query-{self.judul}-tweets.json', 'w') as outfile:
json.dump(self.tweets, outfile)
with open(self.settings.get('DATA_DIR') / f'query-{self.judul}-users.json', 'w') as outfile:
json.dump(self.users, outfile)
parser = argparse.ArgumentParser(description='Script to get tweet from certain user.')
parser.add_argument("-q", "--query")
parser.add_argument("-u", "--username", default=None, type=str)
parser.add_argument("-l", "--limit", default=0, type=int)
parser.add_argument("-y", "--year", default=None, type=str)
parser.add_argument("-s", "--since", default=None, type=str)
parser.add_argument("-e", "--until", default=None, type=str)
args = parser.parse_args()
p = Search(args.query, args.username, args.limit, args.year, args.since, args.until)
p.start_request()