-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp_funcs.R
330 lines (251 loc) · 11.5 KB
/
app_funcs.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
# Sampo Vesanen's Master's thesis statistical tests and visualisation
# Stats and visualisation functions and essential variables
# "Parking of private cars and spatial accessibility in Helsinki Capital Region"
# by Sampo Vesanen
# 25.10.2020
# Initialise
library(onewaytests)
library(car)
library(plotrix)
library(moments)
library(rlang)
library(classInt)
library(ggplot2)
library(RColorBrewer)
# These custom infix operators work in the manner of += and ++ in C++/C# and
# Java. Save some space and increase readability.
`%+=%` = function(e1, e2) eval.parent(substitute(e1 <- e1 + e2))
`%-=%` = function(e1, e2) eval.parent(substitute(e1 <- e1 - e2))
CalcBoxplotTooltip <- function(inputdata, resp_col, expl_col) {
# This function calculates IQR values for the use of ggiraph's
# geom_boxplot_interactive().
# Calculate min, IQR and max for boxplot tooltip. Creates new columns for
# each.
resp_data <- inputdata[, resp_col]
expl_data <- inputdata[, expl_col]
# Interquartile range (IQR)
q1 <- tapply(resp_data, expl_data, quantile, probs = 0.25)
q1 <- as.numeric(q1[match(expl_data, names(q1))])
inputdata$tooltip_q1 <- q1
mdn <- sapply(split(resp_data, expl_data), median)
mdn <- as.numeric(mdn[match(inputdata[, expl_col], names(mdn))])
inputdata$tooltip_mdn <- mdn
q3 <- tapply(resp_data, expl_data, quantile, probs = 0.75)
q3 <- as.numeric(q3[match(expl_data, names(q3))])
inputdata$tooltip_q3 <- q3
# Whiskers: Max and min. In ggplot, the max and min are only as large/small as
# the corresponding value in resp_col. Therefore, parktime whisker max may be
# in actuality 15.5, but that value could not have been inputted to the survey.
# Closest user inputted value is 15, so ggplot whisker reaches only that.
# First calculate max and min as they one would normally do. Then, use dplyr
# to get the max values used by ggplot for each group.
# - Maximum is the name for unaltered maximum. tooltip_max will be inputted to
# boxplot tooltip.
# - NB! The same treatment is not given to minimum! This is an oversight, but
# the minimum seems to hang around zero 99,99 % of the time.
inputdata$maximum <- inputdata$tooltip_q3 + 1.5 * (inputdata$tooltip_q3 - inputdata$tooltip_q1)
inputdata$tooltip_min <- inputdata$tooltip_q1 - 1.5 * (inputdata$tooltip_q3 - inputdata$tooltip_q1)
inputdata$tooltip_min[inputdata$tooltip_min < 0] <- 0
ggplot_max <- inputdata %>%
dplyr::select(!!rlang::sym(expl_col), !!rlang::sym(resp_col), maximum) %>%
dplyr::group_by(!!rlang::sym(expl_col)) %>%
dplyr::filter(!!rlang::sym(resp_col) <= maximum) %>%
dplyr::select(-maximum) %>%
dplyr::summarise_all(max) %>%
as.data.frame()
# Transform dataframe of two columns to named vector. Named vector is then
# matched with all the values in explanatory column.
named_vec <- ggplot_max[, resp_col]
names(named_vec) <- ggplot_max[, expl_col]
named_vec <- as.numeric(named_vec[match(expl_data, names(named_vec))])
inputdata$tooltip_max <- named_vec
return(inputdata)
}
LabelBuilder <- function(plot_obj, expl, checkGroup, subdivGroup) {
# Download helper function.
if(length(checkGroup) == 0 & length(subdivGroup) == 0) {
# Return inputted ggplot object if there are no values in checkGroup or
# subdivGroup
result_plot <- plot_obj
} else {
# Add conditional disclaimer about excluded groups and/or subdivisions.
# Make use of Every8th() to divide long vectors into many rows
if(length(checkGroup) > 0) {
checklab <- paste("- Groups excluded from the explanatory variable ", expl, ":\n",
Every8th(c(checkGroup)), sep = "")
}
if(length(subdivGroup) > 0) {
subdivlab <- paste("- Subdivisions excluded:\n",
Every8th(c(subdivGroup)), sep = "")
}
# Build caption label
if (!exists("checklab") & exists("subdivlab")) {
full_lab <- subdivlab
} else if (exists("checklab") & !exists("subdivlab")) {
full_lab <- checklab
} else {
full_lab <- paste0(checklab, "\n", subdivlab)
}
# Make additions to ggplot object
result_plot <- plot_obj +
labs(caption = full_lab) +
theme(plot.caption = element_text(size = 15, hjust = 0, face = "italic"),
plot.caption.position = "plot")
}
return(result_plot)
}
Every8th <- function(input) {
# Download helper function.
# This function splits input$checkGroup and input$subdivGroup into bits of
# eight separated by a newline. For the use with downloadable versions of
# plots
# Prevent situation where an empty input is fed to split()
if(length(input) < 1) {
return("")
} else {
result <- split(input, ceiling(seq_along(input) / 8))
result <- sapply(result, function(x) paste0(x, collapse = ", "))
result <- capture.output(cat(paste(result, collapse = "\n")))
result <- paste0(result, collapse = "\n")
}
return(result)
}
GetCentroids <- function(fortified, unique_id, nominator) {
# Annotate desired feature in ggplot. Adapted from:
# https://stackoverflow.com/a/28963405/9455395
# Insert a fortified Spatial object and the column name you want to use as
# the label. With parameters "unique_id" and "nominator" a few functionalities
# can be attained:
# Unique_id tells what column to use as the unique identifier. This can be
# for example "kunta": four rows with coordinates and labels are created.
# If used "zipcode", 167 rows are created with coordinates and labels.
# "nominator" allocates the labels. "nominator" must contain the same amount
# of unique values, or more, than "unique_id", for example combination
# unique_id = "kunta" and nominator = "zipcode" will create broken results.
# unique_id will be stored as rowname for possible later use when row
# identification is needed.
# Examples:
# unique_id = "kunta" and nominator = "kunta":
# --- 4 rows, centroids in the middle of municipalities, labels by "kunta"
# unique_id = "zipcode" and nominator = "parktime_median":
# --- 167 rows, centroids in the middle of zipcodes, labels by "parktime_median"
# Change R options, otherwise as.numeric() loses some important digits
options(digits = 15)
result <-
do.call("rbind.data.frame",
by(fortified,
fortified[, unique_id],
function(x) {c(sp::Polygon(x[c("long", "lat")])@labpt,
x %>%
dplyr::group_by(!!rlang::sym(nominator)) %>%
dplyr::summarise() %>%
as.vector())
})) %>%
setNames(., c("long", "lat", "label"))
# Change long and lat to numeric vectors, if they already aren't
if(is.factor(result$long) == TRUE) {
result$long <- as.numeric(levels(result$long))[result$long]
}
if (is.factor(result$lat) == TRUE) {
result$lat <- as.numeric(levels(result$lat))[result$lat]
}
return(result)
}
InterpolateGgplotColors <- function(plot_obj, active_items, palette_max_cols,
palettename) {
# Use RColorBrewer for the color scale in ggplot. If there are more active
# items to be mapped than the maximum color amount in selected RColorBrewer
# palette, interpolate the extra colors.
if (length(active_items) > palette_max_cols) {
cols <- grDevices::colorRampPalette(RColorBrewer::brewer.pal(palette_max_cols, palettename))
myPal <- cols(length(active_items))
result <- plot_obj + scale_fill_manual(values = myPal)
# Selected RColorBrewer palette works without any tricks
} else {
result <- plot_obj + scale_fill_brewer(palette = palettename)
}
return(result)
}
CreateJenksColumn <- function(fortified, postal, datacol, newcolname, classes_n = 5) {
# Use this function to create a column in fortified dataframe that can be
# used to portray Jenks breaks colouring in a ggplot map. Dplyr note: to
# enable parameters as column names in dplyr, apply !! and := for the left
# side and for the right side !!rlang::sym().
#
# Adapted from:
# https://medium.com/@traffordDataLab/lets-make-a-map-in-r-7bd1d9366098
# Suppress n jenks warnings, problem probably handled
classes <- suppressWarnings(
classInt::classIntervals(postal[, datacol], n = classes_n, style = "jenks"))
# When sample size is reduced drastically, median columns tended to receive
# class intervals starting in the negative. Not possible in data, so fix it.
if(classes$brks[1] < 0) {
classes$brks[1] <- 0
}
# classes$brk has to be wrapped with unique(), otherwise we can't get more
# than six classes for parktime_median or walktime_median
result <- fortified %>%
dplyr::mutate(!!newcolname := cut(!!rlang::sym(datacol),
unique(classes$brks),
include.lowest = T))
# Reverse column values to enable rising values from bottom to top in ggplot.
# In ggplot, use scale_fill_brewer(direction = -1) with this operation to flip
# the legend.
result[, newcolname] = factor(result[, newcolname],
levels = rev(levels(result[, newcolname])))
return(result)
}
SigTableToShiny <- function(sigTable, hasHeading) {
# Use this function to show significance tables in Shiny. It will be useful
# with Levene and ANOVA results.
# Due to the format of the significance table it is difficult to present it
# in Shiny. The main functionality of this method is to make the significance
# star available in the app.
# Levene test dataframe requires transposing. Levene table has an attribute
# heading while ANOVA doesn't. Use this.
if (is.null(attributes(sigTable)$heading)) {
# ANOVA
res <- as.data.frame(do.call(rbind, sigTable))
} else {
# Levene
res <- t(as.data.frame(do.call(rbind, sigTable)))
}
# Take into account that the table may have an attribute heading. Ask if this
# is the case
if (hasHeading == FALSE){
sigTablePosition <- 2
} else {
sigTablePosition <- 3
}
# Get the location of the signif.star
signif_ncol <- ncol(read.table(
textConnection(capture.output(sigTable)[sigTablePosition]),
fill = TRUE,
stringsAsFactors = TRUE))
# get signif.star
signif_star <- read.table(
textConnection(capture.output(sigTable)[sigTablePosition]),
fill = TRUE,
stringsAsFactors = TRUE)[[signif_ncol]]
# Detect if signif_star is something else than factor. If so, the function
# has picked up a value from probability column and the current analysis is
# not significant. Change value to " ".
if(!is.factor(signif_star)){
signif_star <- " "
}
# repeated_na takes into account that the significance table may have more
# rows than two.
repeated_na <- rep("NA", nrow(res) - 1)
signif_star <- c(as.character(signif_star), repeated_na)
# Bind column signif_star to result.
res <- cbind.data.frame(res, signif_star)
# Name rows. Try to detect differences in Levene and ANOVA summary tables.
if(is.null(rownames(sigTable[[1]]))){
# Levene
rownames(res) <- rownames(sigTable)
} else {
# ANOVA
rownames(res) <- rownames(sigTable[[1]])
}
return(res)
}