From 0ab755651e6788209b54a67d49e95b3fd7c68a16 Mon Sep 17 00:00:00 2001 From: Scott Brewer Date: Wed, 20 Jan 2021 12:58:39 -0800 Subject: [PATCH] uploading interactive twitter bot initital code --- interactive/Dockerfile | 18 ++ interactive/app.py | 315 ++++++++++++++++++++++++++ interactive/profanities_en.txt | 403 +++++++++++++++++++++++++++++++++ 3 files changed, 736 insertions(+) create mode 100644 interactive/Dockerfile create mode 100644 interactive/app.py create mode 100644 interactive/profanities_en.txt diff --git a/interactive/Dockerfile b/interactive/Dockerfile new file mode 100644 index 0000000..57c2e38 --- /dev/null +++ b/interactive/Dockerfile @@ -0,0 +1,18 @@ +FROM python:3.8.1-slim-buster + +RUN apt-get -y update && apt-get -y install gcc + +# Copy local code to the container image. +WORKDIR / + + +# Make changes to the requirements/app here. +# This Dockerfile order allows Docker to cache the checkpoint layer +# and improve build times if making changes. +RUN pip3 --no-cache-dir install sqlalchemy starlette uvicorn ujson tweepy pg8000 aiohttp chardet requests +COPY . ./ + +# Clean up APT when done. +RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + +ENTRYPOINT ["python3", "-X", "utf8", "app.py"] \ No newline at end of file diff --git a/interactive/app.py b/interactive/app.py new file mode 100644 index 0000000..fc1f57d --- /dev/null +++ b/interactive/app.py @@ -0,0 +1,315 @@ +from starlette.applications import Starlette +from starlette.responses import UJSONResponse +import uvicorn + +import tweepy +import time +import os +import aiohttp +import asyncio +from sqlalchemy import engine, create_engine, MetaData, Table +from sqlalchemy.sql.expression import func, select +from sqlalchemy.sql import text + +# Twitter app configuration information: required +CONSUMER_KEY = os.environ.get('CONSUMER_KEY') +CONSUMER_SECRET = os.environ.get('CONSUMER_SECRET') +ACCESS_KEY = os.environ.get('ACCESS_KEY') +ACCESS_SECRET = os.environ.get('ACCESS_SECRET') + +ACCOUNT = os.environ.get("ACCOUNT") +DB_USER = os.environ.get("DB_USER") +DB_PASS = os.environ.get("DB_PASS") +DB_NAME = os.environ.get("DB_NAME") +CONNECTION_NAME = os.environ.get("CONNECTION_NAME") + +# todo include this for server +assert all([CONSUMER_KEY, CONSUMER_SECRET, ACCESS_KEY, ACCESS_SECRET] + ), "Not all Twitter app config tokens have been specified." + +INVALID_FILE = 'file_is_invalid' +NO_FILE = 'file_does_not_exist' +FILE_OF_PROFANE_PHRASES = 'profanities_en.txt' +SERVER_ERROR_STATUS = 'server_error_status: ' +CLOUD_RUN_URL = "https://url.to.cloud.run" + +USER_NAME = os.environ.get('TWITTER_BOT_ACCOUNT') + +SERVER_PROBLEM = "sorry, we're having some problems with your request, please try again later" +PROFANITY_PROBLEM = 'sorry, your request for an object breached our list of profanities. ' \ + 'Please DM us if you think this was a mistake, otherwise please try again' +FORMAT_PROBLEM = 'sorry, your request does not match our format. Please start your tweet with ' \ + 'our username, do not mention other users in your tweet, and do not use hashtags either.' + +auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) +auth.set_access_token(ACCESS_KEY, ACCESS_SECRET) + +api = tweepy.API(auth) + +profanity_list = [] +db_results = [] + +MENTION_ID = 'mention_id' +TIMESTAMP = 'timestamp' +OUR_STATUS = 'our_status' +OUR_RESPONSE = 'our_response' +ORIGINAL_REQUEST = 'original_request' +TWITTER_URL = 'twitter_url' + + +def build_profanity_list(): + # builds a list of profanities taken from: + # https://github.com/LDNOOBW/List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words + file_read = open(FILE_OF_PROFANE_PHRASES, 'r') + for line in file_read: + inner_list = [elt.strip() for elt in line.split(',')] + profanity_list.append(inner_list) + + +def is_profane(mention): + # check to make sure we've built our list of profanities from our file + + if len(profanity_list) < 1: + build_profanity_list() + + message_from_user = mention.full_text + # check to see if anything in the user's request could be considered 'profane' + for profanity in profanity_list: + if profanity[0].lower() in message_from_user.lower(): + # print('found profanity: ' + profanity[0] + ' in: ' + message_from_user) + return True + return False + + +def send_reply_to_mention(reply, mention): + # TODO: handle errors! This can crash if it tries to send somethign we have sent before (which + # shouldn't happen). Try catching it. + api.update_status('@' + mention.user.screen_name + " " + reply, mention.id) + our_status = 'ERROR' + if reply == SERVER_PROBLEM: + our_status = 'ERROR 1' + #print('writing to database server response error: ', str(mention.id)) + elif reply == PROFANITY_PROBLEM: + our_status = 'ERROR 2' + #print('writing to database profanity warning sent to tweet: ', str(mention.id)) + elif reply == FORMAT_PROBLEM: + our_status = 'ERROR 3' + #print('writing to database format warning sent to tweet: ', str(mention.id)) + else: + our_status = 'SUCCESS' + #print('writing to database successful tweet sent to', str(mention.id)) + + # store the data for insertion into the database logs + timestamp = time.strftime("%Y-%m-%d %H:%M:%S+00", time.gmtime()) + t_url = f"https://twitter.com/{mention.user.screen_name}/status/{mention.id_str}" + db_results.append({MENTION_ID: mention.id, OUR_STATUS: our_status, TIMESTAMP: timestamp, + OUR_RESPONSE: reply, ORIGINAL_REQUEST: mention.full_text, TWITTER_URL: t_url}) + + +def is_correct_format(mention): + # to be correct format first mention should be our username and there should not be any + # other user names or hashtags + # TODO clean this up, it is pretty ugly + message_from_user = mention.full_text + if message_from_user[0:len(USER_NAME)].lower() == USER_NAME.lower(): + # message contains our user name as first part + if '@' in message_from_user[len(USER_NAME):len(message_from_user)] or \ + '#' in message_from_user[len(USER_NAME):len(message_from_user)]: + # message contains other users or hashtags + # print('format issue other: ' + message_from_user) + return False + return True + return False + + +def is_valid_tweet(tweet, mention): + # TODO return error codes + # tweet must be < 240 characters and have returned from the API in full and the should be different to the request + original_request = mention.full_text[len(USER_NAME):len(mention.full_text)] + if len(tweet) > 240: + # it is too long + # print('tweet to long: ' + tweet) + return False + if '<' in tweet: + # if it contains a < we can assume it has part of <|endoftext|> in its response + # print('tweet contains <: ' + tweet) + return False + if len(tweet.lower().rstrip()) < len(original_request.lower()) + 2: + # Iv'e noticed some tweets just come back with a . on the end of them, this should account for those + # print('response from API was unchanged from original request: ' + tweet) + return False + if tweet[-1] == '\n' and tweet[-2] == '\n': + # print('tweet has newlines!: ' + tweet) + # if it has passed the two previous tests and ends with two newline characters we can assume it passes + return True + # print('problem with tweet: ' + tweet) + return False + + +async def get_json(m, session): + object_request = m.full_text[len(USER_NAME):len(m.full_text)] + # print("request: " + object_request + " for id: " + str(m.id)) + + post_dict = { + "length": 75, + "temperature": 0.7, + "prefix": object_request, + "truncate": "<|endoftext|>" + } + + async with session.post(CLOUD_RUN_URL, json=post_dict) as response: + if response.status == 200: + return await response.json() + else: + return SERVER_ERROR_STATUS + str(response.status) + + +async def fetch(count, mention, session): + # TODO refactor this + response = '' + data = await get_json(mention, session) + if 'text' in data: + response = data['text'] + else: + print('received server error: ' + data) + # print('received data: ' + response) + attempts = 1 + while attempts < 4: + if is_valid_tweet(response, mention): + # post the validated response to twitter here. + send_reply_to_mention(response.rstrip(), mention) + break + elif attempts == 3: + send_reply_to_mention(SERVER_PROBLEM, mention) + # TODO: notify someone of this error (email?) + else: + # print('attempting to obtain more json') + data = await get_json(mention, session) + if 'text' in data: + response = data['text'] + else: + response = '' + print('received server error: ' + data) + attempts += 1 + + return 'DONE:', str(count) + '\n' + + +async def reply(): + # print("Replying...", flush=True) + + # connect to our database + db = create_engine( + engine.url.URL( + drivername='postgres+pg8000', + username=DB_USER, + password=DB_PASS, + database=DB_NAME, + query={ + 'unix_sock': '/cloudsql/{}/.s.PGSQL.5432'.format( + CONNECTION_NAME) + } + ), + pool_size=1 + ) + + # obtain the most recent mention from our db + with db.connect() as conn: + s = text( + "Select MAX(mention_id) as mention_id FROM requests" + ) + + max_id = conn.execute(s).fetchone() + + # Get the list of recent mentions not ALL mentions + if max_id[0] is None: + # when the table is empty the query returns a tuple with None in the first spot + mentions = api.mentions_timeline(tweet_mode='extended') + else: + mentions = api.mentions_timeline(max_id[0], tweet_mode='extended') + + if len(mentions) < 1: + return ('no new mentions') + + print('received ' + str(len(mentions)) + ' new mentions.') + most_recent_id = mentions[0].id + + # update our db with the most recent mention + metadata = MetaData() + requests_table = Table('requests', metadata, autoload=True, autoload_with=db) + + with db.connect() as conn: + ins = requests_table.insert().values(mention_id=most_recent_id) + conn.execute(ins) + + # print(mentions) + + # this is used to keep the session running and makes the requests take place in + # parallel rather than series. + responses = [] + + async with aiohttp.ClientSession() as session: + i = 0 + while i < len(mentions): + # confirm request is valid and not profane + if is_profane(mentions[i]): + # let the user know their request was considered profane + send_reply_to_mention(PROFANITY_PROBLEM, mentions[i]) + break + # confirm request is in valid format + elif not is_correct_format(mentions[i]): + # let the user know their request did not meet our format requirements + send_reply_to_mention(FORMAT_PROBLEM, mentions[i]) + break + else: + # attempt to get a response + response = asyncio.ensure_future(fetch(i, mentions[i], session)) + responses.append(response) + i += 1 + + all_responses = await asyncio.gather(*responses) + print('made ' + str(len(responses)) + ' responses.') + + # print('sending results to the database') + metadata = MetaData() + interactions = Table('interactions', metadata, autoload=True, autoload_with=db) + + global db_results + db_entry = db_results + db_results = [] + + with db.connect() as conn: + # db_results is a pretty ugly global variable hack and could probably be handled more elegantly + for entry in db_entry: + q_insert = ( + interactions + .insert() + .values(mention_id=str(entry[MENTION_ID]), + tweet_timestamp=entry[TIMESTAMP], + our_status=entry[OUR_STATUS], + tweet=entry[OUR_RESPONSE], + original_request=entry[ORIGINAL_REQUEST], + tweet_url=entry[TWITTER_URL]) + ) + + conn.execute(q_insert) + + db.dispose() + + return 'sent ' + str(len(responses)) + ' responses.' + +app = Starlette(debug=False) + +# Needed to avoid cross-domain issues +response_header = { + 'Access-Control-Allow-Origin': '*' +} + +@app.route('/') +async def start(request): + code = await reply() + return UJSONResponse({'text': code}, + headers=response_header) + +if __name__ == '__main__': + uvicorn.run(app, host='0.0.0.0', port=int(os.environ.get('PORT', 8080))) diff --git a/interactive/profanities_en.txt b/interactive/profanities_en.txt new file mode 100644 index 0000000..a438b9c --- /dev/null +++ b/interactive/profanities_en.txt @@ -0,0 +1,403 @@ +2g1c +2 girls 1 cup +acrotomophilia +alabama hot pocket +alaskan pipeline +anal +anilingus +anus +apeshit +arsehole +ass +asshole +assmunch +auto erotic +autoerotic +babeland +baby batter +baby juice +ball gag +ball gravy +ball kicking +ball licking +ball sack +ball sucking +bangbros +bangbus +bareback +barely legal +barenaked +bastard +bastardo +bastinado +bbw +bdsm +beaner +beaners +beaver cleaver +beaver lips +beastiality +bestiality +big black +big breasts +big knockers +big tits +bimbos +birdlock +bitch +bitches +black cock +blonde action +blonde on blonde action +blowjob +blow job +blow your load +blue waffle +blumpkin +bollocks +bondage +boner +boob +boobs +booty call +brown showers +brunette action +bukkake +bulldyke +bullet vibe +bullshit +bung hole +bunghole +busty +butt +buttcheeks +butthole +camel toe +camgirl +camslut +camwhore +carpet muncher +carpetmuncher +chocolate rosebuds +cialis +circlejerk +cleveland steamer +clit +clitoris +clover clamps +clusterfuck +cock +cocks +coprolagnia +coprophilia +cornhole +coon +coons +creampie +cum +cumming +cumshot +cumshots +cunnilingus +cunt +darkie +date rape +daterape +deep throat +deepthroat +dendrophilia +dick +dildo +dingleberry +dingleberries +dirty pillows +dirty sanchez +doggie style +doggiestyle +doggy style +doggystyle +dog style +dolcett +domination +dominatrix +dommes +donkey punch +double dong +double penetration +dp action +dry hump +dvda +eat my ass +ecchi +ejaculation +erotic +erotism +escort +eunuch +fag +faggot +fecal +felch +fellatio +feltch +female squirting +femdom +figging +fingerbang +fingering +fisting +foot fetish +footjob +frotting +fuck +fuck buttons +fuckin +fucking +fucktards +fudge packer +fudgepacker +futanari +gangbang +gang bang +gay sex +genitals +giant cock +girl on +girl on top +girls gone wild +goatcx +goatse +god damn +gokkun +golden shower +goodpoop +goo girl +goregasm +grope +group sex +g-spot +guro +hand job +handjob +hard core +hardcore +hentai +homoerotic +honkey +hooker +horny +hot carl +hot chick +how to kill +how to murder +huge fat +humping +incest +intercourse +jack off +jail bait +jailbait +jelly donut +jerk off +jigaboo +jiggaboo +jiggerboo +jizz +juggs +kike +kinbaku +kinkster +kinky +knobbing +leather restraint +leather straight jacket +lemon party +livesex +lolita +lovemaking +make me come +male squirting +masturbate +masturbating +masturbation +menage a trois +milf +missionary position +mong +motherfucker +mound of venus +mr hands +muff diver +muffdiving +nambla +nawashi +negro +neonazi +nigga +nigger +nig nog +nimphomania +nipple +nipples +nsfw +nsfw images +nude +nudity +nutten +nympho +nymphomania +octopussy +omorashi +one cup two girls +one guy one jar +orgasm +orgy +paedophile +paki +panties +panty +pedobear +pedophile +pegging +penis +phone sex +piece of shit +pikey +pissing +piss pig +pisspig +playboy +pleasure chest +pole smoker +ponyplay +poof +poon +poontang +punany +poop chute +poopchute +porn +porno +pornography +prince albert piercing +pthc +pubes +pussy +queaf +queef +quim +raghead +raging boner +rape +raping +rapist +rectum +reverse cowgirl +rimjob +rimming +rosy palm +rosy palm and her 5 sisters +rusty trombone +sadism +santorum +scat +schlong +scissoring +semen +sex +sexcam +sexo +sexy +sexual +sexually +sexuality +shaved beaver +shaved pussy +shemale +shibari +shit +shitblimp +shitty +shota +shrimping +skeet +slanteye +slut +s&m +smut +snatch +snowballing +sodomize +sodomy +spastic +spic +splooge +splooge moose +spooge +spread legs +spunk +strap on +strapon +strappado +strip club +style doggy +suck +sucks +suicide girls +sultry women +swastika +swinger +tainted love +taste my +tea bagging +threesome +throating +thumbzilla +tied up +tight white +tit +tits +titties +titty +tongue in a +topless +tosser +towelhead +tranny +tribadism +tub girl +tubgirl +tushy +twat +twink +twinkie +two girls one cup +undressing +upskirt +urethra play +urophilia +vagina +venus mound +viagra +vibrator +violet wand +vorarephilia +voyeur +voyeurweb +voyuer +vulva +wank +wetback +wet dream +white power +whore +worldsex +wrapping men +wrinkled starfish +xx +xxx +yaoi +yellow showers +yiffy +zoophilia +🖕