Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated Profanity Filter #1

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 30 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,33 @@
profanity-filter
================

Python module that replaces inappropriate words with something more PG rated.

Usage
-----
```python
f = Filter('badword and bad words', clean_word='unicorn')
safe_string = f.clean()
print safe_string
```
Python module that replaces inappropriate words with something more PG rated. Used in project that allows individuals to text message to sign used in Xmas decorations.

Uses a line separated file listing bad words as it's source
to check if a user submitted something inappropriate.

Code modified from orginal by Jared Mess

Modified by: jjb
Date 1/2/2016

Example of code use in test_profanity_filter.py:
Run
$ python test_profnaity_filter.py

Example Use:
text = "Cassandra is a fuCking piece of shit_on_a_long_stick"
f=Filter(text, "HAPPY")
f.clean_anywhere()
f.clean_start()
f.clean_whole_word()

Example Output---
Original:
Cassandra is a fuCking piece of shit_on_a_long_stick
Output from clean_anywhere:
CHAPPYandra is a HAPPYing piece of HAPPY_on_a_long_stick
Output from clean_start:
Cassandra is a HAPPYing piece of HAPPY_on_a_long_stick
Output from clean whole word:
Cassandra is a HAPPY piece of shit_on_a_long_stick
113 changes: 96 additions & 17 deletions profanity_filter.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,109 @@
"""
Uses a line separated file listing bad words as it's source
to check if a user submitted something inappropriate
to check if a user submitted something inappropriate.
Code modified from: https://github.com/jared-mess/profanity-filter

f = Filter('slut', clean_word='unicorn')
word = f.clean()
print word
>>slut
Modified by: Jeremy Becnel
Date 1/2/2016

Example of Code in test_profanity_filter.py

Example Output---
Original:
Cassandra is a fuCking piece of shit_on_a_long_stick
Output from clean_anywhere:
CHAPPYandra is a HAPPYing piece of HAPPY_on_a_long_stick
Output from clean_start:
Cassandra is a HAPPYing piece of HAPPY_on_a_long_stick
Output from clean whole word:
Cassandra is a HAPPY piece of shit_on_a_long_stick
"""
import re

# bad word file location and and name
badwordfile = 'bad_words.txt'


class Filter(object):
"""
Replaces a bad word in a string with something more PG friendly

Filter('you annoying prick', 'unicorn')
Class is desigend to take a string and clean it up by replacing
instances of "bad" words with a more acceptable word.

"""
def __init__(self, original_string, clean_word='****'):

bad_words_file = open('bad_words.txt', 'r')

# class variable containing all the bad words we are looking for
bad_words = set(line.strip('\n') for line in open(badwordfile))

def __init__(self, original_string, replacement_string='****'):

self.bad_words = set(line.strip('\n') for line in open('bad_words.txt'))
#cls.bad_words = ['ass','fuck', 'shit' ] # used for testing
self.original_string = original_string
self.clean_word = clean_word

def clean(self):
exp = '(%s)' %'|'.join(self.bad_words)
self.replacement_string = replacement_string
self.profanity_found = None
self.__has_been_cleaned = False
self.clean_string = None

#===================================INSTANCE METHODS

#---------------------------standard get set methods with some error checking

def get_original_string(self):
return self.original_string

def get_replacement_string(self):
return self.replacement_string

def is_profanity_found(self):
# check to see if a cleaning has been performed.
assert (self.__has_been_cleaned), "Word must be cleaned before this can be determined."
# after a clean is performed this method can be used to to determine if
# profanity was found
return self.profanity_found

def get_clean_string(self):
# check to see if a cleaning has been performed.
assert (self.__has_been_cleaned), "Word must be cleaned before a clean string can be returned."
# after a clean is performed this method can be used to to determine if
# profanity was found
return self.clean_string

#------------------------------cleaners

# The methods below are instance methods that cleans the given string according
# to different rules.

def __clean(self,exp):
r = re.compile(exp, re.IGNORECASE)
return r.sub(self.clean_word, self.original_string)
# check for any profanity in the string
self.profanity_found = (r.search(self.original_string) != None)
# return the original string where the replacements string has been substituted for the profanity
self.clean_string = r.sub(self.replacement_string, self.original_string)
self.__has_been_cleaned = True
return self.clean_string

# cleans profanity found anywhere in the word
# example with #cls.bad_words = ['ass','fuck', 'shit' ] # used for testing:
# cleans "Cassandra Fuck Off you shithead"
# as 'C****andra **** Off you ****head'
def clean_anywhere(self):
exp = '(%s)' %'|'.join(Filter.bad_words)
return self.__clean(exp)


# requires blank at beginning and end of word, i.e. word must start with profanity
# example with #cls.bad_words = ['ass','fuck', 'shit' ] # used for testing:
# cleans "Cassandra Fuck Off you shithead"
# as 'Cassandra **** Off you ****head'
def clean_start(self):
exp = '(\\b%s)' %'|\\b'.join(Filter.bad_words)
return self.__clean(exp)


# requires blank at beginning and end of word, i.e. will match whole word only
# example with #cls.bad_words = ['ass','fuck', 'shit' ] # used for testing:
# cleans "Cassandra Fuck Off you shithead"
# as 'Cassandra **** Off you shithead'
def clean_whole_word(self):
exp = '(\\b%s\\b)' %'\\b|\\b'.join(Filter.bad_words)
return self.__clean(exp)

60 changes: 60 additions & 0 deletions test_profanity_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from profanity_filter import Filter

text = "Cassandra is a fuCking piece of shit_on_a_long_stick"

print "We first test the three cleaning methods on the message:"
print text
print

f=Filter(text, "HAPPY")
print "Output from clean_anywhere:"
print f.clean_anywhere()
print "Output from clean_start:"
print f.clean_start()
print "Output from clean whole word:"
print f.clean_whole_word()


print

text = "Cassy is an asset to our company."
print "We now test the profanity check with the following:"
print text
print "Output from clean_anywhere:"

f=Filter(text,'HAPPY')
f.clean_anywhere()
if not f.is_profanity_found():
print "No profanity."
else:
print "Here is the clean anywhere:"
print f.get_clean_string()


print
print "Output from clean_start:"
print f.clean_start()
if not f.is_profanity_found():
print "No profanity."
else:
print "Here is the clean string:"
print f.get_clean_string()

print
print "Output from clean whole word:"
print f.clean_whole_word()
if not f.is_profanity_found():
print "No profanity."
else:
print "Here is the clean string:"
print f.get_clean_string()





#print "We now make sure the error handling is working. An error should occur"
#f = Filter(text, "SAD")
#print f.is_profanity_found()
#print f.get_clean_string()