-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathglmmTMB_demo.R
203 lines (182 loc) · 5.65 KB
/
glmmTMB_demo.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# glmmTMB demo
library(glmmTMB)
library(ggplot2)
library(dplyr)
# For reparameterizing the beta distribution
# From "classical" parameterization
beta_ab2muphi <- function(a, b) {
return(list(
mu = a/(a + b),
phi = a + b
))
}
# From mean-and-precision parameterization
beta_muphi2ab <- function(mu, phi) {
return(list(
a = mu * phi,
b = phi * (1 - mu)
))
}
# For generating random beta-distributed
# observations with mean-precision
# parameterization
rbeta2 <- function(n, mu, phi) {
if(length(mu) > 1 | length(phi) > 1) {
stop("Error: mu and phi parameters must",
" be constants, not vectors.")
}
params <- beta_muphi2ab(mu, phi)
return(rbeta(n, params$a[1], params$b[1]))
}
# Function for plotting beta pdf
beta_pdf <- function(a, b, add=FALSE) {
grid <- seq(0,1, by=0.01)
if(add) {
points(x = grid,
y = dbeta(grid, a, b))
} else {
plot(x = grid,
y = dbeta(grid, a, b))
}
}
beta_pdf(1, 1, FALSE)
beta_pdf(0.5, 0.5, TRUE)
beta_pdf(0.9, 0.9, TRUE)
beta_pdf(1, 0.9, TRUE)
beta_pdf(5, 2)
# Function for plotting beta pdf with
# mean-precision parameterization
beta_pdf2 <- function(mu, phi, add=FALSE) {
ab_parameters <- beta_muphi2ab(mu, phi)
a <- ab_parameters$a
b <- ab_parameters$b
grid <- seq(0,1, by=0.01)
if(add) {
points(x = grid,
y = dbeta(grid, a, b))
} else {
plot(x = grid,
y = dbeta(grid, a, b))
}
}
beta_pdf2(0.5, 2)
beta_pdf2(0.5, 1, TRUE)
beta_pdf2(0.5, 0.5, TRUE)
# Need to transform 0-1 scale because its values
# are inclusive of 0 and 1. We need it to
# exclude those endpoints, in order for logit
# link to avoid Inf and -Inf.
exclude01 <- function(x) {
n <- length(x)
return((x * (n - 1) + 0.5) / n)
}
# Randomly generate data with bistable beta-distributed residuals
# We assume the following model:
# Y ~ Beta(mu, phi)
# logit(mu) = XB
# In the first step, we generate random X,
# specify coefficients B, and calculate
# the vector mu.
set.seed(1)
x1 <- rnorm(250)
x2 <- rnorm(250)
beta1 <- -0.4
beta2 <- 5
mu <- boot::inv.logit( beta1*x1 + beta2*x2 )
hist(mu)
# This actually already looks like it has a
# bimodal beta error distribution, when in fact,
# it's beta-distributed *without* errors.
# Fitting a model to it should yield a perfect fit:
data <- data.frame(mu, x1, x2)
glm1 <- glmmTMB(mu ~ x1 + x2,
data = data,
family = beta_family(link = "logit"))
summary(glm1)
# Yes, it returns the original coefficients
# Actually, inv.logit(rnorm(250, 0, x)) produces a
# seemingly bimodal distribution bounded at 0 and 1
# for any value of x < ~2.
hist(boot::inv.logit(rnorm(250, 0, 1)))
hist(boot::inv.logit(rnorm(250, 0, 2)))
hist(boot::inv.logit(rnorm(250, 0, 3)))
# Now to simulate response values with bimodal
# beta-distributed errors, assume mu_i to be the
# mean of the beta distribution from which y_i
# was drawn.
set.seed(1001)
phi <- 1.5 # arbitrarily assume precision = 1.5
out <- lapply(mu, function(x) {rbeta2(1, x, phi)})
y <- exclude01(unlist(out))
# Now attempt to fit model to the new data
data <- data.frame(y, x1, x2)
glm_beta <- glmmTMB(y ~ x1 + x2,
data = data,
family = beta_family(link = "logit"))
summary(glm_beta)
# Estimate Std. Error z value Pr(>|z|)
# (Intercept) -0.00576 0.07133 -0.081 0.9356
# x1 -0.12210 0.07420 -1.645 0.0999 .
# x2 1.46665 0.11360 12.911 <2e-16 ***
# Unable to return original coefficients,
# though at least the signs and relative
# magnitudes are consistent.
# What if data was generated from a beta
# distribution with a higher (but still < 2)
# precision parameter?
set.seed(1489321)
phi_precise <- 1.9
out <- lapply(mu, function(x) {rbeta2(1, x, phi_precise)})
y_precise <- exclude01(unlist(out))
data <- data.frame(y_precise, x1, x2)
glm_beta_precise <- glmmTMB(y_precise ~ x1 + x2,
data = data,
family = beta_family(link = "logit"))
summary(glm_beta_precise)
# Estimate Std. Error z value Pr(>|z|)
# (Intercept) 0.04590 0.06979 0.658 0.511
# x1 -0.08107 0.07209 -1.124 0.261
# x2 1.58275 0.12130 13.049 <2e-16 ***
# Still unable to return original coefficients,
# though AIC is lower (better) than before.
# What if there was more data?
set.seed(89051)
x1 <- rnorm(10000)
x2 <- rnorm(10000)
beta1 <- -0.4
beta2 <- 1
mu <- boot::inv.logit( beta1*x1 + beta2*x2 )
phi <- 100
set.seed(1)
out <- lapply(mu, function(x) {rbeta2(1, x, phi)})
y <- exclude01(unlist(out))
data <- data.frame(y, x1, x2)
glm_beta <- glmmTMB(y ~ x1 + x2,
data = data,
family = beta_family(link = "logit"))
summary(glm_beta)
# Estimate Std. Error z value Pr(>|z|)
# (Intercept) 0.001569 0.002227 0.7 0.481
# x1 -0.400665 0.002305 -173.8 <2e-16 ***
# x2 0.999744 0.002648 377.5 <2e-16 ***
# (Note: The model retrieves the original
# parameters only if the precision is high.
# However, any precision > 2 would not
# represent a bistable (0,1) distribution.)
# What if it was a much simpler model?
# Try an intercept-only model.
beta0 <- 0.5
mu <- boot::inv.logit( beta0 * rep(1, 10000) )
phi <- 1.5
set.seed(1)
out <- lapply(mu, function(x) {rbeta2(1, x, phi)})
y <- exclude01(unlist(out))
data <- data.frame(y)
glm_beta <- glmmTMB(y ~ 1,
data = data,
family = beta_family(link = "logit"))
summary(glm_beta)
# Estimate Std. Error z value Pr(>|z|)
# (Intercept) 0.49598 0.01218 40.71 <2e-16 ***
# This model returns the
# original parameter, mu = 0.5!