Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

uploading interactive twitter bot initital code #1

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions interactive/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
FROM python:3.8.1-slim-buster

RUN apt-get -y update && apt-get -y install gcc

# Copy local code to the container image.
WORKDIR /


# Make changes to the requirements/app here.
# This Dockerfile order allows Docker to cache the checkpoint layer
# and improve build times if making changes.
RUN pip3 --no-cache-dir install sqlalchemy starlette uvicorn ujson tweepy pg8000 aiohttp chardet requests
COPY . ./

# Clean up APT when done.
RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

ENTRYPOINT ["python3", "-X", "utf8", "app.py"]
315 changes: 315 additions & 0 deletions interactive/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,315 @@
from starlette.applications import Starlette
from starlette.responses import UJSONResponse
import uvicorn

import tweepy
import time
import os
import aiohttp
import asyncio
from sqlalchemy import engine, create_engine, MetaData, Table
from sqlalchemy.sql.expression import func, select
from sqlalchemy.sql import text

# Twitter app configuration information: required
CONSUMER_KEY = os.environ.get('CONSUMER_KEY')
CONSUMER_SECRET = os.environ.get('CONSUMER_SECRET')
ACCESS_KEY = os.environ.get('ACCESS_KEY')
ACCESS_SECRET = os.environ.get('ACCESS_SECRET')

ACCOUNT = os.environ.get("ACCOUNT")
DB_USER = os.environ.get("DB_USER")
DB_PASS = os.environ.get("DB_PASS")
DB_NAME = os.environ.get("DB_NAME")
CONNECTION_NAME = os.environ.get("CONNECTION_NAME")

# todo include this for server
assert all([CONSUMER_KEY, CONSUMER_SECRET, ACCESS_KEY, ACCESS_SECRET]
), "Not all Twitter app config tokens have been specified."

INVALID_FILE = 'file_is_invalid'
NO_FILE = 'file_does_not_exist'
FILE_OF_PROFANE_PHRASES = 'profanities_en.txt'
SERVER_ERROR_STATUS = 'server_error_status: '
CLOUD_RUN_URL = "https://url.to.cloud.run"

USER_NAME = os.environ.get('TWITTER_BOT_ACCOUNT')

SERVER_PROBLEM = "sorry, we're having some problems with your request, please try again later"
PROFANITY_PROBLEM = 'sorry, your request for an object breached our list of profanities. ' \
'Please DM us if you think this was a mistake, otherwise please try again'
FORMAT_PROBLEM = 'sorry, your request does not match our format. Please start your tweet with ' \
'our username, do not mention other users in your tweet, and do not use hashtags either.'

auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)

api = tweepy.API(auth)

profanity_list = []
db_results = []

MENTION_ID = 'mention_id'
TIMESTAMP = 'timestamp'
OUR_STATUS = 'our_status'
OUR_RESPONSE = 'our_response'
ORIGINAL_REQUEST = 'original_request'
TWITTER_URL = 'twitter_url'


def build_profanity_list():
# builds a list of profanities taken from:
# https://github.com/LDNOOBW/List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words
file_read = open(FILE_OF_PROFANE_PHRASES, 'r')
for line in file_read:
inner_list = [elt.strip() for elt in line.split(',')]
profanity_list.append(inner_list)


def is_profane(mention):
# check to make sure we've built our list of profanities from our file

if len(profanity_list) < 1:
build_profanity_list()

message_from_user = mention.full_text
# check to see if anything in the user's request could be considered 'profane'
for profanity in profanity_list:
if profanity[0].lower() in message_from_user.lower():
# print('found profanity: ' + profanity[0] + ' in: ' + message_from_user)
return True
return False


def send_reply_to_mention(reply, mention):
# TODO: handle errors! This can crash if it tries to send somethign we have sent before (which
# shouldn't happen). Try catching it.
api.update_status('@' + mention.user.screen_name + " " + reply, mention.id)
our_status = 'ERROR'
if reply == SERVER_PROBLEM:
our_status = 'ERROR 1'
#print('writing to database server response error: ', str(mention.id))
elif reply == PROFANITY_PROBLEM:
our_status = 'ERROR 2'
#print('writing to database profanity warning sent to tweet: ', str(mention.id))
elif reply == FORMAT_PROBLEM:
our_status = 'ERROR 3'
#print('writing to database format warning sent to tweet: ', str(mention.id))
else:
our_status = 'SUCCESS'
#print('writing to database successful tweet sent to', str(mention.id))

# store the data for insertion into the database logs
timestamp = time.strftime("%Y-%m-%d %H:%M:%S+00", time.gmtime())
t_url = f"https://twitter.com/{mention.user.screen_name}/status/{mention.id_str}"
db_results.append({MENTION_ID: mention.id, OUR_STATUS: our_status, TIMESTAMP: timestamp,
OUR_RESPONSE: reply, ORIGINAL_REQUEST: mention.full_text, TWITTER_URL: t_url})


def is_correct_format(mention):
# to be correct format first mention should be our username and there should not be any
# other user names or hashtags
# TODO clean this up, it is pretty ugly
message_from_user = mention.full_text
if message_from_user[0:len(USER_NAME)].lower() == USER_NAME.lower():
# message contains our user name as first part
if '@' in message_from_user[len(USER_NAME):len(message_from_user)] or \
'#' in message_from_user[len(USER_NAME):len(message_from_user)]:
# message contains other users or hashtags
# print('format issue other: ' + message_from_user)
return False
return True
return False


def is_valid_tweet(tweet, mention):
# TODO return error codes
# tweet must be < 240 characters and have returned from the API in full and the should be different to the request
original_request = mention.full_text[len(USER_NAME):len(mention.full_text)]
if len(tweet) > 240:
# it is too long
# print('tweet to long: ' + tweet)
return False
if '<' in tweet:
# if it contains a < we can assume it has part of <|endoftext|> in its response
# print('tweet contains <: ' + tweet)
return False
if len(tweet.lower().rstrip()) < len(original_request.lower()) + 2:
# Iv'e noticed some tweets just come back with a . on the end of them, this should account for those
# print('response from API was unchanged from original request: ' + tweet)
return False
if tweet[-1] == '\n' and tweet[-2] == '\n':
# print('tweet has newlines!: ' + tweet)
# if it has passed the two previous tests and ends with two newline characters we can assume it passes
return True
# print('problem with tweet: ' + tweet)
return False


async def get_json(m, session):
object_request = m.full_text[len(USER_NAME):len(m.full_text)]
# print("request: " + object_request + " for id: " + str(m.id))

post_dict = {
"length": 75,
"temperature": 0.7,
"prefix": object_request,
"truncate": "<|endoftext|>"
}

async with session.post(CLOUD_RUN_URL, json=post_dict) as response:
if response.status == 200:
return await response.json()
else:
return SERVER_ERROR_STATUS + str(response.status)


async def fetch(count, mention, session):
# TODO refactor this
response = '<error>'
data = await get_json(mention, session)
if 'text' in data:
response = data['text']
else:
print('received server error: ' + data)
# print('received data: ' + response)
attempts = 1
while attempts < 4:
if is_valid_tweet(response, mention):
# post the validated response to twitter here.
send_reply_to_mention(response.rstrip(), mention)
break
elif attempts == 3:
send_reply_to_mention(SERVER_PROBLEM, mention)
# TODO: notify someone of this error (email?)
else:
# print('attempting to obtain more json')
data = await get_json(mention, session)
if 'text' in data:
response = data['text']
else:
response = '<error>'
print('received server error: ' + data)
attempts += 1

return 'DONE:', str(count) + '\n'


async def reply():
# print("Replying...", flush=True)

# connect to our database
db = create_engine(
engine.url.URL(
drivername='postgres+pg8000',
username=DB_USER,
password=DB_PASS,
database=DB_NAME,
query={
'unix_sock': '/cloudsql/{}/.s.PGSQL.5432'.format(
CONNECTION_NAME)
}
),
pool_size=1
)

# obtain the most recent mention from our db
with db.connect() as conn:
s = text(
"Select MAX(mention_id) as mention_id FROM requests"
)

max_id = conn.execute(s).fetchone()

# Get the list of recent mentions not ALL mentions
if max_id[0] is None:
# when the table is empty the query returns a tuple with None in the first spot
mentions = api.mentions_timeline(tweet_mode='extended')
else:
mentions = api.mentions_timeline(max_id[0], tweet_mode='extended')

if len(mentions) < 1:
return ('no new mentions')

print('received ' + str(len(mentions)) + ' new mentions.')
most_recent_id = mentions[0].id

# update our db with the most recent mention
metadata = MetaData()
requests_table = Table('requests', metadata, autoload=True, autoload_with=db)

with db.connect() as conn:
ins = requests_table.insert().values(mention_id=most_recent_id)
conn.execute(ins)

# print(mentions)

# this is used to keep the session running and makes the requests take place in
# parallel rather than series.
responses = []

async with aiohttp.ClientSession() as session:
i = 0
while i < len(mentions):
# confirm request is valid and not profane
if is_profane(mentions[i]):
# let the user know their request was considered profane
send_reply_to_mention(PROFANITY_PROBLEM, mentions[i])
break
# confirm request is in valid format
elif not is_correct_format(mentions[i]):
# let the user know their request did not meet our format requirements
send_reply_to_mention(FORMAT_PROBLEM, mentions[i])
break
else:
# attempt to get a response
response = asyncio.ensure_future(fetch(i, mentions[i], session))
responses.append(response)
i += 1

all_responses = await asyncio.gather(*responses)
print('made ' + str(len(responses)) + ' responses.')

# print('sending results to the database')
metadata = MetaData()
interactions = Table('interactions', metadata, autoload=True, autoload_with=db)

global db_results
db_entry = db_results
db_results = []

with db.connect() as conn:
# db_results is a pretty ugly global variable hack and could probably be handled more elegantly
for entry in db_entry:
q_insert = (
interactions
.insert()
.values(mention_id=str(entry[MENTION_ID]),
tweet_timestamp=entry[TIMESTAMP],
our_status=entry[OUR_STATUS],
tweet=entry[OUR_RESPONSE],
original_request=entry[ORIGINAL_REQUEST],
tweet_url=entry[TWITTER_URL])
)

conn.execute(q_insert)

db.dispose()

return 'sent ' + str(len(responses)) + ' responses.'

app = Starlette(debug=False)

# Needed to avoid cross-domain issues
response_header = {
'Access-Control-Allow-Origin': '*'
}

@app.route('/')
async def start(request):
code = await reply()
return UJSONResponse({'text': code},
headers=response_header)

if __name__ == '__main__':
uvicorn.run(app, host='0.0.0.0', port=int(os.environ.get('PORT', 8080)))
Loading