This repository has been archived by the owner on Feb 5, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathworkshop_sql.py
107 lines (84 loc) · 4.97 KB
/
workshop_sql.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import json
import oauth2 as oauth
import psycopg2
import unicodedata
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
__version__ = '1.0'
__all__ = []
__author__ = 'Axel Oehmichen - ao1011@imparial.ac.uk'
def get_connection():
# http://initd.org/psycopg/docs/usage.html
connection = psycopg2.connect(user="postgres", password="postgres", host="127.0.0.1")
return connection
def get_database(db):
# http://initd.org/psycopg/docs/usage.html
connection = psycopg2.connect(database=db, user="postgres", password="postgres", host="127.0.0.1")
return connection
def submit_to_Database(query, query_values, connection):
# This configuration is mandatory for an automated commit to the database
connection.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
curs = connection.cursor()
answer = curs.execute(query, query_values)
return answer
# A tweet can contain emojis and we are only interested in the text so we normalize it en encode it in ascii for the database
def normalize_text(tweet_text):
str_text = unicodedata.normalize('NFKD', tweet_text).encode('ascii', 'ignore').strip('"')
normalizedText = '\'' + str_text.replace("'", r"''") + '\''
return normalizedText
#####################################
# main program #
#####################################
key = "XXXXXX"
secret = "YYYYYYYYYYY"
raw_connection = get_connection()
user_ids = ["25073877", "813286", "23022687", "1339835893", "216776631", "3235334092"]
if __name__ == "__main__":
# First we create the DataBase
database_create = "CREATE DATABASE sqlworkshop WITH OWNER = postgres ENCODING = 'UTF8' TABLESPACE = pg_default " \
"LC_COLLATE = 'en_GB.UTF-8' LC_CTYPE = 'en_GB.UTF-8' CONNECTION LIMIT = -1; "
submit_to_Database(database_create, (), raw_connection)
raw_connection.close()
# and Tables ...
tweet_table_create = "CREATE TABLE IF NOT EXISTS tweets(id serial, user_id char(50) NOT NULL, text char(160) " \
"NOT NULL, tweet_id char(50) NOT NULL, favourite_count int NOT NULL, retweet_count int NOT NULL);"
twitter_users_create = "CREATE TABLE IF NOT EXISTS twitter_users(id serial, user_id char(50) NOT NULL, username char(50) NOT NULL, " \
"screen_name char(50) NOT NULL, location char(50) NOT NULL, description char(255) NOT NULL, " \
"followers_count int NOT NULL, friends_count int NOT NULL, listed_count int NOT NULL);"
database_connection = get_database("sqlworkshop")
submit_to_Database(tweet_table_create, (), database_connection)
submit_to_Database(twitter_users_create, (), database_connection)
# Create your consumer with the proper key/secret.
consumer = oauth.Consumer(key=key, secret=secret)
# Request token URL for Twitter.
request_trend_url = "https://api.twitter.com/1.1/trends/place.json?id=1"
request_user_url = "https://api.twitter.com/1.1/statuses/user_timeline.json?user_id="
# Create our client.
client = oauth.Client(consumer)
with open("databasedump.sql", 'a') as dump:
for twitter_user_id in user_ids:
request_url = request_user_url + twitter_user_id
# The OAuth Client request works just like httplib2 for the most part.
resp, content = client.request(request_url, "GET")
timeline_tweets_json = json.loads(content)
user = (str(timeline_tweets_json[0]["user"]["id_str"]),
normalize_text(timeline_tweets_json[0]["user"]["name"]),
normalize_text(timeline_tweets_json[0]["user"]["screen_name"]),
normalize_text(timeline_tweets_json[0]["user"]["location"]),
normalize_text(timeline_tweets_json[0]["user"]["description"]),
int(timeline_tweets_json[0]["user"]["followers_count"]),
int(timeline_tweets_json[0]["user"]["friends_count"]),
int(timeline_tweets_json[0]["user"]["listed_count"]))
user_insert = "INSERT INTO twitter_users (user_id, username, screen_name, location, description, followers_count, friends_count, listed_count) " \
"VALUES (%s, %s, %s, %s, %s, %s, %s, %s);\n"
#dump.write(user_insert%user)
submit_to_Database(user_insert, user, database_connection)
for i in range(len(timeline_tweets_json)):
tweet_json = timeline_tweets_json[i]
tweet_text = normalize_text(tweet_json["text"])
tweet = (str(tweet_json["user"]["id_str"]), tweet_text, str(tweet_json["id_str"]),
int(tweet_json["favorite_count"]), int(tweet_json["retweet_count"]))
tweet_insert = "INSERT INTO tweets (user_id, text, tweet_id, favourite_count, retweet_count) VALUES (%s, %s, %s, %s, %s);\n"
#dump.write(tweet_insert%tweet)
submit_to_Database(tweet_insert, tweet, database_connection)
dump.close()
database_connection.close()