-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathsentiment-analysis-reddit-negativity.html
362 lines (298 loc) · 16.6 KB
/
sentiment-analysis-reddit-negativity.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
<!DOCTYPE html>
<html lang="en">
<head>
<script src="https://use.fontawesome.com/afd448ce82.js"></script>
<!-- Meta Tag -->
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<!-- SEO -->
<meta name="author" content="Bruno Rocha">
<meta name="keywords" content="Software, Engineering, Blog, Posts, iOS, Xcode, Swift, Articles, Tutorials, OBJ-C, Objective-C, Apple">
<meta name="description" content="I have been feeling that Reddit is well on its way to taking away from 4chan the title of internet hate machine, because even when a subreddit is themed around happiness it takes little to no effort to find extremely hostile comment chains.">
<meta name="title" content="Sentiment Analysis with NSTagger: Ranking popular subreddits by the negativity/hostility of its comments">
<meta name="url" content="https://swiftrocks.com/sentiment-analysis-reddit-negativity">
<meta name="image" content="https://swiftrocks.com/images/thumbs/thumb.jpg?4">
<meta name="copyright" content="Bruno Rocha">
<meta name="robots" content="index,follow">
<meta property="og:title" content="Sentiment Analysis with NSTagger: Ranking popular subreddits by the negativity/hostility of its comments"/>
<meta property="og:image" content="https://swiftrocks.com/images/thumbs/thumb.jpg?4"/>
<meta property="og:description" content="I have been feeling that Reddit is well on its way to taking away from 4chan the title of internet hate machine, because even when a subreddit is themed around happiness it takes little to no effort to find extremely hostile comment chains."/>
<meta property="og:type" content="website"/>
<meta property="og:url" content="https://swiftrocks.com/sentiment-analysis-reddit-negativity"/>
<meta name="twitter:card" content="summary_large_image"/>
<meta name="twitter:image" content="https://swiftrocks.com/images/thumbs/thumb.jpg?4"/>
<meta name="twitter:image:alt" content="Page Thumbnail"/>
<meta name="twitter:title" content="Sentiment Analysis with NSTagger: Ranking popular subreddits by the negativity/hostility of its comments"/>
<meta name="twitter:description" content="I have been feeling that Reddit is well on its way to taking away from 4chan the title of internet hate machine, because even when a subreddit is themed around happiness it takes little to no effort to find extremely hostile comment chains."/>
<meta name="twitter:site" content="@rockbruno_"/>
<!-- Favicon -->
<link rel="icon" type="image/png" href="images/favicon/iconsmall2.png" sizes="32x32" />
<link rel="apple-touch-icon" href="images/favicon/iconsmall2.png">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Source+Sans+3:ital,wght@0,200..900;1,200..900&display=swap" rel="stylesheet">
<!-- Bootstrap CSS Plugins -->
<link rel="stylesheet" type="text/css" href="css/bootstrap.css">
<!-- Prism CSS Stylesheet -->
<link rel="stylesheet" type="text/css" href="css/prism4.css">
<!-- Main CSS Stylesheet -->
<link rel="stylesheet" type="text/css" href="css/style48.css">
<link rel="stylesheet" type="text/css" href="css/sponsor4.css">
<!-- HTML5 shiv and Respond.js support IE8 or Older for HTML5 elements and media queries -->
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
<script type="application/ld+json">
{
"@context": "https://schema.org",
"@type": "BlogPosting",
"mainEntityOfPage": {
"@type": "WebPage",
"@id": "https://swiftrocks.com/sentiment-analysis-reddit-negativity"
},
"image": [
"https://swiftrocks.com/images/thumbs/thumb.jpg"
],
"datePublished": "2022-06-12T16:00:00+02:00",
"dateModified": "2022-06-12T16:00:00+02:00",
"author": {
"@type": "Person",
"name": "Bruno Rocha"
},
"publisher": {
"@type": "Organization",
"name": "SwiftRocks",
"logo": {
"@type": "ImageObject",
"url": "https://swiftrocks.com/images/thumbs/thumb.jpg"
}
},
"headline": "Sentiment Analysis with NSTagger: Ranking popular subreddits by the negativity/hostility of its comments",
"abstract": "I have been feeling that Reddit is well on its way to taking away from 4chan the title of internet hate machine, because even when a subreddit is themed around happiness it takes little to no effort to find extremely hostile comment chains."
}
</script>
</head>
<body>
<div id="main">
<!-- Blog Header -->
<!-- Blog Post (Right Sidebar) Start -->
<div class="container">
<div class="col-xs-12">
<div class="page-body">
<div class="row">
<div><a href="https://swiftrocks.com">
<img id="logo" class="logo" alt="SwiftRocks" src="images/bg/logo2light.png">
</a>
<div class="menu-large">
<div class="menu-arrow-right"></div>
<div class="menu-header menu-header-large">
<div class="menu-item">
<a href="blog">blog</a>
</div>
<div class="menu-item">
<a href="about">about</a>
</div>
<div class="menu-item">
<a href="talks">talks</a>
</div>
<div class="menu-item">
<a href="projects">projects</a>
</div>
<div class="menu-item">
<a href="software-engineering-book-recommendations">book recs</a>
</div>
<div class="menu-item">
<a href="games">game recs</a>
</div>
<div class="menu-arrow-right-2"></div>
</div>
</div>
<div class="menu-small">
<div class="menu-arrow-right"></div>
<div class="menu-header menu-header-small-1">
<div class="menu-item">
<a href="blog">blog</a>
</div>
<div class="menu-item">
<a href="about">about</a>
</div>
<div class="menu-item">
<a href="talks">talks</a>
</div>
<div class="menu-item">
<a href="projects">projects</a>
</div>
<div class="menu-arrow-right-2"></div>
</div>
<div class="menu-arrow-right"></div>
<div class="menu-header menu-header-small-2">
<div class="menu-item">
<a href="software-engineering-book-recommendations">book recs</a>
</div>
<div class="menu-item">
<a href="games">game recs</a>
</div>
<div class="menu-arrow-right-2"></div>
</div>
</div>
</div>
<div class="content-page" id="WRITEIT_DYNAMIC_CONTENT">
<!--WRITEIT_POST_NAME=Sentiment Analysis with NSTagger: Ranking popular subreddits by the negativity/hostility of its comments-->
<!--WRITEIT_POST_HTML_NAME=sentiment-analysis-reddit-negativity-->
<!--Add here the additional properties that you want each page to possess.-->
<!--These properties can be used to change content in the template page or in the page itself as shown here.-->
<!--Properties must start with 'WRITEIT_POST'.-->
<!--Writeit provides and injects WRITEIT_POST_NAME and WRITEIT_POST_HTML_NAME by default.-->
<!--WRITEIT_POST_SHORT_DESCRIPTION=I have been feeling that Reddit is well on its way to taking away from 4chan the title of internet hate machine, because even when a subreddit is themed around happiness it takes little to no effort to find extremely hostile comment chains.-->
<!--DateFormat example: 2021-04-12T14:00:00+02:00-->
<!--WRITEIT_POST_SITEMAP_DATE_LAST_MOD=2022-06-12T16:00:00+02:00-->
<!--WRITEIT_POST_SITEMAP_DATE=2022-06-12T16:00:00+02:00-->
<title>Sentiment Analysis with NSTagger: Ranking popular subreddits by the negativity/hostility of its comments</title>
<div class="blog-post">
<div class="post-title-index">
<h1>Sentiment Analysis with NSTagger: Ranking popular subreddits by the negativity/hostility of its comments</h1>
</div>
<div class="post-info">
<div class="post-info-text">Published on 12 Jun 2022</div>
</div>
<p>I have been feeling that Reddit is well on its way to taking away from 4chan the title of <i>internet hate machine</i>, because even when a subreddit is themed around happiness it takes little to no effort to find extremely hostile comment chains with complete strangers arguing about the most pointless things. I was curious to see what this looked like across different subreddits, so I decided to use Reddit's APIs and iOS's built-in sentiment analysis tools to visualize how negative a subreddit is.</p>
<div class="sponsor-article-ad-auto hidden"></div>
<p>This is not a new problem, and you can find many GitHub repos of people who've done similar things with Reddit comments in the past. I started this "project" by <a href="https://github.com/hein-j/reddit-sentiment-analysis">modifying this script made by hein-j</a>, but I wasn't satisfied with the results being given by the usual Python NL frameworks. I thought they were being way too eager at saying that a subreddit was neutral when they're in reality notorious for being negative (maybe they're not the best at detecting passive-aggressiveness?), so I wondered if I could get better results by using iOS's <code>NLTagger</code> that has been available since iOS 12.</p>
<p>With a simple setup of extracting comments and running them through a Swift script, I grabbed a couple of popular subreddits, analyzed the comments of the top 10 submissions at the time and plotted the output by pasting the results in a Google Sheets doc. If you want to try this yourself and/or tweak the parameters, you can find the code I used at the bottom of the article.</p>
<div class="post-image">
<img src="https://i.imgur.com/n14Jsmi.png" alt="Alt">
</div>
<div class="post-image">
<img src="https://i.imgur.com/Xmw2WK9.png" alt="Alt">
</div>
<div class="post-image">
<img src="https://i.imgur.com/JZA3vx0.png" alt="Alt">
</div>
<h2>Info and Comments</h2>
<ul>
<li>Neutrality was defined as sentiment scoring between -0.5 and 0.5. The huge majority of values in this range were on the negative side.</li>
<li><i>Happy</i> is the only subreddit that scored higher in positivity than in negativity.</li>
<li>Despite naturally ranking high in positivity, subreddits themed around happiness showed to still contain an overwhelming amount of negative comments.</li>
<li>Subreddits themed around advice/knowledge ranking as <i>more hostile</i> than subreddits themed around <i>actual</i> hate is interesting, but not surprising. They are famous among redditors for being hotspots for insecure individuals and often host some of the lowest-quality discussions in the platform.</li>
</ul>
<h2>Code</h2>
<h3>Python</h3>
<p>The purpose of the Python script is to connect to Reddit and dump the comments of a subreddit into a JSON file. The script requires <code>praw</code> and a <code>praw.ini</code> file in the project's root named <code>bot</code> with the following Reddit App parameters: <code>client_id</code>, <code>client_secret</code>, and <code>user_agent</code>. See <a href="https://www.geeksforgeeks.org/how-to-get-client_id-and-client_secret-for-python-reddit-api-registration/">here</a> for help with creating a Reddit App and <a href="https://praw.readthedocs.io/en/latest/getting_started/configuration/prawini.html">here</a> for help with praw.</p>
<p>Make sure to modify the output path of this script before running.</p>
<p>Usage example: <code>python comments.py getmotivated</code></p>
<pre><code>import praw
import sys
import argparse
def parse():
print('parsing arguments and options...')
parser = argparse.ArgumentParser(description="Get the comments of a subreddit")
parser.add_argument('subreddit', type=str, help='name of subreddit')
return parser.parse_args()
def gather(subreddit):
print('searching subreddit for key phrase...')
relevant_strings = []
print('gathering texts for analysis...')
try:
for submission in subreddit.hot(limit=10):
print('...')
if submission.selftext:
relevant_strings.append(str(submission.selftext))
for comment in submission.comments.list():
if isinstance(comment, praw.models.MoreComments):
continue
relevant_strings.append(str(comment.body))
if len(relevant_strings) == 0:
raise Exception
return relevant_strings
except:
sys.exit('ERROR: No posts were found for the provided subreddit and key phrase.')
args = parse()
subreddit_str = args.subreddit
print('establishing reddit instance...')
try:
reddit = praw.Reddit("bot")
except:
sys.exit('ERROR: Failed to establish a reddit instance. Have you correctly set up your praw.ini file? See README.md for more detail.')
print('connecting to subreddit...')
subreddit = reddit.subreddit(subreddit_str)
# Get user inputs to analyze
relevant_strings = gather(subreddit)
joined = "||aa||aa||aa||aa||".join(relevant_strings)
#open text file
text_file = open("~/Desktop/r"+subreddit_str+".json", "w")
#write string to file
text_file.write(joined)
#close file
text_file.close()</code></pre>
<h3>Swift</h3>
<p>The Swift part of the script loads the json files dumped by the Python script and runs them through iOS's <code>NSTagger</code>. Make sure to modify the input path and subreddits in the script to the subreddits you're analyzing.</p>
<pre><code>import Foundation
import NaturalLanguage
let subreddits = ["gaming", "wholesomememes", "funny", "technology", "eyebleach", "dogswithjobs", "aww", "comedyheaven", "iamatotalpieceofshit", "mildlyinteresting", "mildlyinfuriating", "upliftingnews", "politics", "mademesmile", "interestingasfuck", "memes", "science", "animalsbeingbros", "askreddit", "relationships", "happy", "getmotivated", "rarepuppers"]
for sub in subreddits {
let str = try! String(contentsOfFile: "~/Desktop/r\(sub).json")
let arr = str.components(separatedBy: "||aa||aa||aa||aa||")
var negative: Double = 0
var neutral: Double = 0
var positive: Double = 0
for input in arr {
let tagger = NLTagger(tagSchemes: [.sentimentScore])
tagger.string = input
let (sentiment, _) = tagger.tag(at: input.startIndex, unit: .paragraph, scheme: .sentimentScore)
let score = Double(sentiment?.rawValue ?? "0") ?? 0
if score > 0.5 {
positive += 1
} else if score < -0.5 {
negative += 1
} else {
neutral += 1
}
}
let total = negative + neutral + positive
print(sub.capitalized)
print(negative / total * 100)
print(neutral / total * 100)
print(positive / total * 100)
}</code></pre>
</div>
</div>
<div class="blog-post footer-main">
<div class="footer-logos">
<a href="https://swiftrocks.com/rss.xml"><i class="fa fa-rss"></i></a>
<a href="https://twitter.com/rockbruno_"><i class="fa fa-twitter"></i></a>
<a href="https://github.com/rockbruno"><i class="fa fa-github"></i></a>
</div>
<div class="footer-text">
© 2025 Bruno Rocha
</div>
<div class="footer-text">
<p><a href="https://swiftrocks.com">Home</a> / <a href="blog">See all posts</a></p>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<!-- Blog Post (Right Sidebar) End -->
</div>
</div>
</div>
<!-- All Javascript Plugins -->
<script type="text/javascript" src="js/jquery.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/js/bootstrap.bundle.min.js" integrity="sha384-MrcW6ZMFYlzcLA8Nl+NtUVF0sA7MsXsP1UyJoMp4YLEuNSfAP+JcXn/tWtIaxVXM" crossorigin="anonymous"></script>
<script type="text/javascript" src="js/prism4.js"></script>
<!-- Main Javascript File -->
<script type="text/javascript" src="js/scripts30.js"></script>
<!-- Google tag (gtag.js) -->
<script async src="https://www.googletagmanager.com/gtag/js?id=G-H8KZTWSQ1R"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-H8KZTWSQ1R');
</script>
</body>
</html>