From 0ab755651e6788209b54a67d49e95b3fd7c68a16 Mon Sep 17 00:00:00 2001
From: Scott Brewer <scott@goawaygeek.com>
Date: Wed, 20 Jan 2021 12:58:39 -0800
Subject: [PATCH] uploading interactive twitter bot initital code

---
 interactive/Dockerfile         |  18 ++
 interactive/app.py             | 315 ++++++++++++++++++++++++++
 interactive/profanities_en.txt | 403 +++++++++++++++++++++++++++++++++
 3 files changed, 736 insertions(+)
 create mode 100644 interactive/Dockerfile
 create mode 100644 interactive/app.py
 create mode 100644 interactive/profanities_en.txt

diff --git a/interactive/Dockerfile b/interactive/Dockerfile
new file mode 100644
index 0000000..57c2e38
--- /dev/null
+++ b/interactive/Dockerfile
@@ -0,0 +1,18 @@
+FROM python:3.8.1-slim-buster
+
+RUN apt-get -y update && apt-get -y install gcc
+
+# Copy local code to the container image.
+WORKDIR /
+
+
+# Make changes to the requirements/app here.
+# This Dockerfile order allows Docker to cache the checkpoint layer
+# and improve build times if making changes.
+RUN pip3 --no-cache-dir install sqlalchemy starlette uvicorn ujson tweepy pg8000 aiohttp chardet requests
+COPY . ./
+
+# Clean up APT when done.
+RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+
+ENTRYPOINT ["python3", "-X", "utf8", "app.py"]
\ No newline at end of file
diff --git a/interactive/app.py b/interactive/app.py
new file mode 100644
index 0000000..fc1f57d
--- /dev/null
+++ b/interactive/app.py
@@ -0,0 +1,315 @@
+from starlette.applications import Starlette
+from starlette.responses import UJSONResponse
+import uvicorn
+
+import tweepy
+import time
+import os
+import aiohttp
+import asyncio
+from sqlalchemy import engine, create_engine, MetaData, Table
+from sqlalchemy.sql.expression import func, select
+from sqlalchemy.sql import text
+
+# Twitter app configuration information: required
+CONSUMER_KEY = os.environ.get('CONSUMER_KEY')
+CONSUMER_SECRET = os.environ.get('CONSUMER_SECRET')
+ACCESS_KEY = os.environ.get('ACCESS_KEY')
+ACCESS_SECRET = os.environ.get('ACCESS_SECRET')
+
+ACCOUNT = os.environ.get("ACCOUNT")
+DB_USER = os.environ.get("DB_USER")
+DB_PASS = os.environ.get("DB_PASS")
+DB_NAME = os.environ.get("DB_NAME")
+CONNECTION_NAME = os.environ.get("CONNECTION_NAME")
+
+# todo include this for server
+assert all([CONSUMER_KEY, CONSUMER_SECRET, ACCESS_KEY, ACCESS_SECRET]
+           ), "Not all Twitter app config tokens have been specified."
+
+INVALID_FILE = 'file_is_invalid'
+NO_FILE = 'file_does_not_exist'
+FILE_OF_PROFANE_PHRASES = 'profanities_en.txt'
+SERVER_ERROR_STATUS = 'server_error_status: '
+CLOUD_RUN_URL = "https://url.to.cloud.run"
+
+USER_NAME = os.environ.get('TWITTER_BOT_ACCOUNT')
+
+SERVER_PROBLEM = "sorry, we're having some problems with your request, please try again later"
+PROFANITY_PROBLEM = 'sorry, your request for an object breached our list of profanities.  ' \
+                    'Please DM us if you think this was a mistake, otherwise please try again'
+FORMAT_PROBLEM = 'sorry, your request does not match our format. Please start your tweet with ' \
+                 'our username, do not mention other users in your tweet, and do not use hashtags either.'
+
+auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
+auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
+
+api = tweepy.API(auth)
+
+profanity_list = []
+db_results = []
+
+MENTION_ID = 'mention_id'
+TIMESTAMP = 'timestamp'
+OUR_STATUS = 'our_status'
+OUR_RESPONSE = 'our_response'
+ORIGINAL_REQUEST = 'original_request'
+TWITTER_URL = 'twitter_url'
+
+
+def build_profanity_list():
+    # builds a list of profanities taken from:
+    # https://github.com/LDNOOBW/List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words
+    file_read = open(FILE_OF_PROFANE_PHRASES, 'r')
+    for line in file_read:
+        inner_list = [elt.strip() for elt in line.split(',')]
+        profanity_list.append(inner_list)
+
+
+def is_profane(mention):
+    # check to make sure we've built our list of profanities from our file
+
+    if len(profanity_list) < 1:
+        build_profanity_list()
+
+    message_from_user = mention.full_text
+    # check to see if anything in the user's request could be considered 'profane'
+    for profanity in profanity_list:
+        if profanity[0].lower() in message_from_user.lower():
+            # print('found profanity: ' + profanity[0] + ' in: ' + message_from_user)
+            return True
+    return False
+
+
+def send_reply_to_mention(reply, mention):
+    # TODO: handle errors! This can crash if it tries to send somethign we have sent before (which 
+    # shouldn't happen).  Try catching it.
+    api.update_status('@' + mention.user.screen_name + " " + reply, mention.id)
+    our_status = 'ERROR'
+    if reply == SERVER_PROBLEM:
+        our_status = 'ERROR 1'
+        #print('writing to database server response error: ', str(mention.id))
+    elif reply == PROFANITY_PROBLEM:
+        our_status = 'ERROR 2'
+        #print('writing to database profanity warning sent to tweet: ', str(mention.id))
+    elif reply == FORMAT_PROBLEM:
+        our_status = 'ERROR 3'
+        #print('writing to database format warning sent to tweet: ', str(mention.id))
+    else:
+        our_status = 'SUCCESS'
+        #print('writing to database successful tweet sent to', str(mention.id))
+
+    # store the data for insertion into the database logs
+    timestamp = time.strftime("%Y-%m-%d %H:%M:%S+00", time.gmtime())
+    t_url = f"https://twitter.com/{mention.user.screen_name}/status/{mention.id_str}"
+    db_results.append({MENTION_ID: mention.id, OUR_STATUS: our_status, TIMESTAMP: timestamp,
+                       OUR_RESPONSE: reply, ORIGINAL_REQUEST: mention.full_text, TWITTER_URL: t_url})
+
+
+def is_correct_format(mention):
+    # to be correct format first mention should be our username and there should not be any
+    # other user names or hashtags
+    # TODO clean this up, it is pretty ugly
+    message_from_user = mention.full_text
+    if message_from_user[0:len(USER_NAME)].lower() == USER_NAME.lower():
+        # message contains our user name as first part
+        if '@' in message_from_user[len(USER_NAME):len(message_from_user)] or \
+                '#' in message_from_user[len(USER_NAME):len(message_from_user)]:
+            # message contains other users or hashtags
+            # print('format issue other: ' + message_from_user)
+            return False
+        return True
+    return False
+
+
+def is_valid_tweet(tweet, mention):
+    # TODO return error codes
+    # tweet must be < 240 characters and have returned from the API in full and the should be different to the request
+    original_request = mention.full_text[len(USER_NAME):len(mention.full_text)]
+    if len(tweet) > 240:
+        # it is too long
+        # print('tweet to long: ' + tweet)
+        return False
+    if '<' in tweet:
+        # if it contains a < we can assume it has part of <|endoftext|> in its response
+        # print('tweet contains <: ' + tweet)
+        return False
+    if len(tweet.lower().rstrip()) < len(original_request.lower()) + 2:
+        # Iv'e noticed some tweets just come back with a . on the end of them, this should account for those
+        # print('response from API was unchanged from original request: ' + tweet)
+        return False
+    if tweet[-1] == '\n' and tweet[-2] == '\n':
+        # print('tweet has newlines!: ' + tweet)
+        # if it has passed the two previous tests and ends with two newline characters we can assume it passes
+        return True
+    # print('problem with tweet: ' + tweet)
+    return False
+
+
+async def get_json(m, session):
+    object_request = m.full_text[len(USER_NAME):len(m.full_text)]
+    # print("request: " + object_request + " for id: " + str(m.id))
+
+    post_dict = {
+        "length": 75,
+        "temperature": 0.7,
+        "prefix": object_request,
+        "truncate": "<|endoftext|>"
+    }
+
+    async with session.post(CLOUD_RUN_URL, json=post_dict) as response:
+        if response.status == 200:
+            return await response.json()
+        else:
+            return SERVER_ERROR_STATUS + str(response.status)
+
+
+async def fetch(count, mention, session):
+    # TODO refactor this
+    response = '<error>'
+    data = await get_json(mention, session)
+    if 'text' in data:
+        response = data['text']
+    else:
+        print('received server error: ' + data)
+    # print('received data: ' + response)
+    attempts = 1
+    while attempts < 4:
+        if is_valid_tweet(response, mention):
+            # post the validated response to twitter here.
+            send_reply_to_mention(response.rstrip(), mention)
+            break
+        elif attempts == 3:
+            send_reply_to_mention(SERVER_PROBLEM, mention)
+            # TODO: notify someone of this error (email?)
+        else:
+            # print('attempting to obtain more json')
+            data = await get_json(mention, session)
+            if 'text' in data:
+                response = data['text']
+            else:
+                response = '<error>'
+                print('received server error: ' + data)
+        attempts += 1
+
+    return 'DONE:', str(count) + '\n'
+
+
+async def reply():
+    # print("Replying...", flush=True)
+
+    # connect to our database
+    db = create_engine(
+        engine.url.URL(
+            drivername='postgres+pg8000',
+            username=DB_USER,
+            password=DB_PASS,
+            database=DB_NAME,
+            query={
+                'unix_sock': '/cloudsql/{}/.s.PGSQL.5432'.format(
+                    CONNECTION_NAME)
+            }
+        ),
+        pool_size=1
+    )
+
+    # obtain the most recent mention from our db 
+    with db.connect() as conn:
+        s = text(
+            "Select MAX(mention_id) as mention_id FROM requests"
+        )
+
+        max_id = conn.execute(s).fetchone()
+
+    # Get the list of recent mentions not ALL mentions
+    if max_id[0] is None:
+        # when the table is empty the query returns a tuple with None in the first spot
+        mentions = api.mentions_timeline(tweet_mode='extended')
+    else:
+        mentions = api.mentions_timeline(max_id[0], tweet_mode='extended')
+
+    if len(mentions) < 1:
+        return ('no new mentions')
+
+    print('received ' + str(len(mentions)) + ' new mentions.')
+    most_recent_id = mentions[0].id
+    
+    # update our db with the most recent mention
+    metadata = MetaData()
+    requests_table = Table('requests', metadata, autoload=True, autoload_with=db)
+
+    with db.connect() as conn:
+        ins = requests_table.insert().values(mention_id=most_recent_id)
+        conn.execute(ins)
+
+    # print(mentions)
+
+    # this is used to keep the session running and makes the requests take place in
+    # parallel rather than series.
+    responses = []
+
+    async with aiohttp.ClientSession() as session:
+        i = 0
+        while i < len(mentions):
+            # confirm request is valid and not profane
+            if is_profane(mentions[i]):
+                # let the user know their request was considered profane
+                send_reply_to_mention(PROFANITY_PROBLEM, mentions[i])
+                break
+            # confirm request is in valid format
+            elif not is_correct_format(mentions[i]):
+                # let the user know their request did not meet our format requirements
+                send_reply_to_mention(FORMAT_PROBLEM, mentions[i])
+                break
+            else:
+                # attempt to get a response
+                response = asyncio.ensure_future(fetch(i, mentions[i], session))
+                responses.append(response)
+            i += 1
+
+        all_responses = await asyncio.gather(*responses)
+        print('made ' + str(len(responses)) + ' responses.')
+
+    # print('sending results to the database')
+    metadata = MetaData()
+    interactions = Table('interactions', metadata, autoload=True, autoload_with=db)
+
+    global db_results
+    db_entry = db_results
+    db_results = []
+
+    with db.connect() as conn:
+        # db_results is a pretty ugly global variable hack and could probably be handled more elegantly
+        for entry in db_entry:
+            q_insert = (
+                interactions
+                .insert()
+                .values(mention_id=str(entry[MENTION_ID]),
+                        tweet_timestamp=entry[TIMESTAMP],
+                        our_status=entry[OUR_STATUS],
+                        tweet=entry[OUR_RESPONSE],
+                        original_request=entry[ORIGINAL_REQUEST],
+                        tweet_url=entry[TWITTER_URL])
+            )
+
+            conn.execute(q_insert)
+
+    db.dispose()
+
+    return 'sent ' + str(len(responses)) + ' responses.'
+
+app = Starlette(debug=False)
+
+# Needed to avoid cross-domain issues
+response_header = {
+    'Access-Control-Allow-Origin': '*'
+}
+
+@app.route('/')
+async def start(request):
+    code = await reply()
+    return UJSONResponse({'text': code},
+                         headers=response_header)
+
+if __name__ == '__main__':
+    uvicorn.run(app, host='0.0.0.0', port=int(os.environ.get('PORT', 8080)))
diff --git a/interactive/profanities_en.txt b/interactive/profanities_en.txt
new file mode 100644
index 0000000..a438b9c
--- /dev/null
+++ b/interactive/profanities_en.txt
@@ -0,0 +1,403 @@
+2g1c
+2 girls 1 cup
+acrotomophilia
+alabama hot pocket
+alaskan pipeline
+anal
+anilingus
+anus
+apeshit
+arsehole
+ass
+asshole
+assmunch
+auto erotic
+autoerotic
+babeland
+baby batter
+baby juice
+ball gag
+ball gravy
+ball kicking
+ball licking
+ball sack
+ball sucking
+bangbros
+bangbus
+bareback
+barely legal
+barenaked
+bastard
+bastardo
+bastinado
+bbw
+bdsm
+beaner
+beaners
+beaver cleaver
+beaver lips
+beastiality
+bestiality
+big black
+big breasts
+big knockers
+big tits
+bimbos
+birdlock
+bitch
+bitches
+black cock
+blonde action
+blonde on blonde action
+blowjob
+blow job
+blow your load
+blue waffle
+blumpkin
+bollocks
+bondage
+boner
+boob
+boobs
+booty call
+brown showers
+brunette action
+bukkake
+bulldyke
+bullet vibe
+bullshit
+bung hole
+bunghole
+busty
+butt
+buttcheeks
+butthole
+camel toe
+camgirl
+camslut
+camwhore
+carpet muncher
+carpetmuncher
+chocolate rosebuds
+cialis
+circlejerk
+cleveland steamer
+clit
+clitoris
+clover clamps
+clusterfuck
+cock
+cocks
+coprolagnia
+coprophilia
+cornhole
+coon
+coons
+creampie
+cum
+cumming
+cumshot
+cumshots
+cunnilingus
+cunt
+darkie
+date rape
+daterape
+deep throat
+deepthroat
+dendrophilia
+dick
+dildo
+dingleberry
+dingleberries
+dirty pillows
+dirty sanchez
+doggie style
+doggiestyle
+doggy style
+doggystyle
+dog style
+dolcett
+domination
+dominatrix
+dommes
+donkey punch
+double dong
+double penetration
+dp action
+dry hump
+dvda
+eat my ass
+ecchi
+ejaculation
+erotic
+erotism
+escort
+eunuch
+fag
+faggot
+fecal
+felch
+fellatio
+feltch
+female squirting
+femdom
+figging
+fingerbang
+fingering
+fisting
+foot fetish
+footjob
+frotting
+fuck
+fuck buttons
+fuckin
+fucking
+fucktards
+fudge packer
+fudgepacker
+futanari
+gangbang
+gang bang
+gay sex
+genitals
+giant cock
+girl on
+girl on top
+girls gone wild
+goatcx
+goatse
+god damn
+gokkun
+golden shower
+goodpoop
+goo girl
+goregasm
+grope
+group sex
+g-spot
+guro
+hand job
+handjob
+hard core
+hardcore
+hentai
+homoerotic
+honkey
+hooker
+horny
+hot carl
+hot chick
+how to kill
+how to murder
+huge fat
+humping
+incest
+intercourse
+jack off
+jail bait
+jailbait
+jelly donut
+jerk off
+jigaboo
+jiggaboo
+jiggerboo
+jizz
+juggs
+kike
+kinbaku
+kinkster
+kinky
+knobbing
+leather restraint
+leather straight jacket
+lemon party
+livesex
+lolita
+lovemaking
+make me come
+male squirting
+masturbate
+masturbating
+masturbation
+menage a trois
+milf
+missionary position
+mong
+motherfucker
+mound of venus
+mr hands
+muff diver
+muffdiving
+nambla
+nawashi
+negro
+neonazi
+nigga
+nigger
+nig nog
+nimphomania
+nipple
+nipples
+nsfw
+nsfw images
+nude
+nudity
+nutten
+nympho
+nymphomania
+octopussy
+omorashi
+one cup two girls
+one guy one jar
+orgasm
+orgy
+paedophile
+paki
+panties
+panty
+pedobear
+pedophile
+pegging
+penis
+phone sex
+piece of shit
+pikey
+pissing
+piss pig
+pisspig
+playboy
+pleasure chest
+pole smoker
+ponyplay
+poof
+poon
+poontang
+punany
+poop chute
+poopchute
+porn
+porno
+pornography
+prince albert piercing
+pthc
+pubes
+pussy
+queaf
+queef
+quim
+raghead
+raging boner
+rape
+raping
+rapist
+rectum
+reverse cowgirl
+rimjob
+rimming
+rosy palm
+rosy palm and her 5 sisters
+rusty trombone
+sadism
+santorum
+scat
+schlong
+scissoring
+semen
+sex
+sexcam
+sexo
+sexy
+sexual
+sexually
+sexuality
+shaved beaver
+shaved pussy
+shemale
+shibari
+shit
+shitblimp
+shitty
+shota
+shrimping
+skeet
+slanteye
+slut
+s&m
+smut
+snatch
+snowballing
+sodomize
+sodomy
+spastic
+spic
+splooge
+splooge moose
+spooge
+spread legs
+spunk
+strap on
+strapon
+strappado
+strip club
+style doggy
+suck
+sucks
+suicide girls
+sultry women
+swastika
+swinger
+tainted love
+taste my
+tea bagging
+threesome
+throating
+thumbzilla
+tied up
+tight white
+tit
+tits
+titties
+titty
+tongue in a
+topless
+tosser
+towelhead
+tranny
+tribadism
+tub girl
+tubgirl
+tushy
+twat
+twink
+twinkie
+two girls one cup
+undressing
+upskirt
+urethra play
+urophilia
+vagina
+venus mound
+viagra
+vibrator
+violet wand
+vorarephilia
+voyeur
+voyeurweb
+voyuer
+vulva
+wank
+wetback
+wet dream
+white power
+whore
+worldsex
+wrapping men
+wrinkled starfish
+xx
+xxx
+yaoi
+yellow showers
+yiffy
+zoophilia
+🖕