Skip to content

Commit

Permalink
Add support for hazard rate key function in IDS (#35)
Browse files Browse the repository at this point in the history
* Allow hazard key function; not organized in output yet

* Output processing should be working now

* Handle 1-column data in distsamp

* Add tests and fix param ordering bug

* Add warning

Fixes #33
  • Loading branch information
kenkellner authored Jan 28, 2025
1 parent c2451fc commit 537ba18
Show file tree
Hide file tree
Showing 5 changed files with 173 additions and 12 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: unmarked
Version: 1.4.3.9007
Date: 2025-01-09
Version: 1.4.3.9008
Date: 2025-01-28
Type: Package
Title: Models for Data from Unmarked Animals
Authors@R: c(
Expand Down
62 changes: 56 additions & 6 deletions R/IDS.R
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,11 @@ IDS <- function(lambdaformula = ~1,
stopifnot(is.null(durationOC) || (length(durationOC) == numSites(dataOC)))
surveyDurations <- list(ds=durationDS, pc=durationPC, oc=durationOC)

stopifnot(keyfun %in% c("halfnorm", "exp"))
keyidx <- switch(keyfun, "halfnorm"={1}, "exp"={2})
stopifnot(keyfun %in% c("halfnorm", "exp", "hazard"))
if(keyfun == "hazard"){
warning("Support for hazard key function is experimental and may yield poor estimates.", call.=FALSE)
}
keyidx <- switch(keyfun, "halfnorm"={1}, "exp"={2}, "hazard"={3})

if(missing(maxDistPC)) maxDistPC <- max(dataDS@dist.breaks)
if(missing(maxDistOC)) maxDistOC <- max(dataDS@dist.breaks)
Expand Down Expand Up @@ -153,8 +156,7 @@ IDS <- function(lambdaformula = ~1,
kmsq = lam_adjust <- lam_adjust)

# Parameter stuff------------------------------------------------------------
# Doesn't support hazard
pind_mat <- matrix(0, nrow=5, ncol=2)
pind_mat <- matrix(0, nrow=8, ncol=2)
pind_mat[1,] <- c(1, ncol(gd_hds$X))
pind_mat[2,] <- max(pind_mat) + c(1, ncol(gd_hds$V))
if(!is.null(detformulaPC) & !is.null(dataPC)){
Expand All @@ -166,20 +168,37 @@ IDS <- function(lambdaformula = ~1,
if(has_avail){
pind_mat[5,] <- max(pind_mat) + c(1, ncol(Xavail_ds))
}
# Hazard-rate scale parameter(s)
if(keyfun == "hazard"){
pind_mat[6,] <- max(pind_mat) + 1
if(!is.null(detformulaPC) & !is.null(dataPC)){
pind_mat[7,] <- max(pind_mat) + 1
}
if(!is.null(detformulaOC) & !is.null(dataOC)){
pind_mat[8,] <- max(pind_mat) + 1
}
}

if(is.null(starts)){
lam_init <- log(mean(apply(dataDS@y, 1, sum, na.rm=TRUE)) / lam_adjust[1])
params_tmb <- list(beta_lam = c(lam_init, rep(0, ncol(gd_hds$X)-1)),
beta_hds = c(log(median(dataDS@dist.breaks)),rep(0, ncol(gd_hds$V)-1)),
beta_pc = rep(0,0),
beta_oc = rep(0,0),
beta_avail = rep(0,0))
beta_avail = rep(0,0),
beta_schds = rep(0,0), # hazard rate scale inits
beta_scpc = rep(0,0),
beta_scoc = rep(0,0))

if(keyfun == "hazard") params_tmb$beta_schds <- 0

if(!is.null(detformulaPC) & !is.null(dataPC)){
params_tmb$beta_pc <- c(log(maxDistPC/2), rep(0, ncol(gd_pc$V)-1))
if(keyfun == "hazard") params_tmb$beta_scpc <- 0
}
if(!is.null(detformulaOC) & !is.null(dataOC)){
params_tmb$beta_oc <- c(log(maxDistOC/2), rep(0, ncol(gd_oc$V)-1))
if(keyfun == "hazard") params_tmb$beta_scoc <- 0
}
if(has_avail){
params_tmb$beta_avail <- rep(0, ncol(Xavail_ds))
Expand All @@ -193,13 +212,20 @@ IDS <- function(lambdaformula = ~1,
beta_hds = starts[pind_mat[2,1]:pind_mat[2,2]],
beta_pc = rep(0,0),
beta_oc = rep(0,0),
beta_avail = rep(0,0))
beta_avail = rep(0,0),
beta_schds = rep(0,0), # hazard rate scale inits
beta_scpc = rep(0,0),
beta_scoc = rep(0,0))

if(keyfun == "hazard") params_tmb$beta_schds <- pind_mat[6,1]

if(!is.null(detformulaPC) & !is.null(dataPC)){
params_tmb$beta_pc <- starts[pind_mat[3,1]:pind_mat[3,2]]
if(keyfun == "hazard") params_tmb$beta_scpc <- starts[pind_mat[7,1]]
}
if(!is.null(detformulaOC) & !is.null(dataOC)){
params_tmb$beta_oc <- starts[pind_mat[4,1]:pind_mat[4,2]]
if(keyfun == "hazard") params_tmb$beta_scoc <- starts[pind_mat[8,1]]
}
if(has_avail){
params_tmb$beta_avail <- starts[pind_mat[5,1]:pind_mat[5,2]]
Expand Down Expand Up @@ -283,6 +309,30 @@ IDS <- function(lambdaformula = ~1,
est_list <- c(est_list, list(phi=avail_est))
}

if(keyfun == "hazard"){
ds_haz_coef <- get_coef_info(sdr, "schds", "(Intercept)", pind_mat[6,1]:pind_mat[6,2])
ds_est_haz <- unmarkedEstimate("Distance sampling scale", short.name="ds_scale",
estimates = ds_haz_coef$ests[1], covMat = ds_haz_coef$cov, fixed=1,
invlink="exp", invlinkGrad="exp")
est_list <- c(est_list, list(schds=ds_est_haz))

if(!is.null(detformulaPC) & !is.null(dataPC)){
pc_haz_coef <- get_coef_info(sdr, "scpc", "(Intercept)", pind_mat[7,1]:pind_mat[7,2])
pc_est_haz <- unmarkedEstimate("Point count scale", short.name="pc_scale",
estimates = pc_haz_coef$ests[1], covMat = pc_haz_coef$cov, fixed=1,
invlink="exp", invlinkGrad="exp")
est_list <- c(est_list, list(scpc=pc_est_haz))
}

if(!is.null(detformulaOC) & !is.null(dataOC)){
oc_haz_coef <- get_coef_info(sdr, "scoc", "(Intercept)", pind_mat[8,1]:pind_mat[8,2])
oc_est_haz <- unmarkedEstimate("Presence/absence scale", short.name="oc_scale",
estimates = oc_haz_coef$ests[1], covMat = oc_haz_coef$cov, fixed=1,
invlink="exp", invlinkGrad="exp")
est_list <- c(est_list, list(scoc=oc_est_haz))
}
}

est_list <- unmarkedEstimateList(est_list)

new("unmarkedFitIDS", fitType = "IDS", call = match.call(),
Expand Down
5 changes: 4 additions & 1 deletion R/distsamp.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,11 @@ distsamp <- function(formula, data,
point = {
for(i in 1:M) {
a[i, 1] <- pi*db[2]^2
for(j in 2:J)
if(J > 1){
for(j in 2:J){
a[i, j] <- pi*db[j+1]^2 - sum(a[i, 1:(j-1)])
}
}
u[i,] <- a[i,] / sum(a[i,])
}
})
Expand Down
28 changes: 25 additions & 3 deletions src/TMB/tmb_IDS.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@ Type tmb_IDS(objective_function<Type>* obj) {
PARAMETER_VECTOR(beta_pc);
PARAMETER_VECTOR(beta_oc);
PARAMETER_VECTOR(beta_avail);
PARAMETER_VECTOR(beta_schds); // hazard-rate scales
PARAMETER_VECTOR(beta_scpc);
PARAMETER_VECTOR(beta_scoc);

Type scale_hds = 0;
if(key_hds == 3){
scale_hds = exp(beta_schds(0));
}

int survey = 1; // Only point counts supported

Expand All @@ -74,7 +82,7 @@ Type tmb_IDS(objective_function<Type>* obj) {

for (int i=0; i<M; i++){

vector<Type> cp = distance_prob(key_hds, sigma_hds(i), Type(0), survey,
vector<Type> cp = distance_prob(key_hds, sigma_hds(i), scale_hds, survey,
db_hds, w_hds, a_hds, u_hds);

for (int j=0; j<J; j++){
Expand All @@ -95,10 +103,17 @@ Type tmb_IDS(objective_function<Type>* obj) {

//vector<Type> sigma_pc = V_pc * beta_pc;
vector<Type> sigma_pc;
Type scale_pc = 0;
if(beta_pc.size() > 0){
sigma_pc = V_pc * beta_pc;
if(key_hds == 3){
scale_pc = exp(beta_scpc(0));
}
} else{
sigma_pc = V_pc * beta_hds;
if(key_hds == 3){
scale_pc = scale_hds;
}
}
sigma_pc = exp(sigma_pc);

Expand All @@ -111,7 +126,7 @@ Type tmb_IDS(objective_function<Type>* obj) {
}

for (int i=0; i<M; i++){
vector<Type> cp = distance_prob(key_pc, sigma_pc(i), Type(0), survey,
vector<Type> cp = distance_prob(key_pc, sigma_pc(i), scale_pc, survey,
db_pc, w_pc, a_pc, u_pc);
loglik -= dpois(y_pc(i,0), lam_pc(i) * cp(0) * p_avail_pc(i), true);
}
Expand All @@ -131,10 +146,17 @@ Type tmb_IDS(objective_function<Type>* obj) {

//vector<Type> sigma_oc = V_oc * beta_oc;
vector<Type> sigma_oc;
Type scale_oc = 0;
if(beta_oc.size() > 0){
sigma_oc = V_oc * beta_oc;
if(key_hds == 3){
scale_oc = exp(beta_scoc(0));
}
} else{
sigma_oc = V_oc * beta_hds;
if(key_hds == 3){
scale_oc = scale_hds;
}
}
sigma_oc = exp(sigma_oc);

Expand All @@ -153,7 +175,7 @@ Type tmb_IDS(objective_function<Type>* obj) {

for (int i=0; i<M; i++){

vector<Type> q = 1 - distance_prob(key_oc, sigma_oc(i), Type(0), survey,
vector<Type> q = 1 - distance_prob(key_oc, sigma_oc(i), scale_oc, survey,
db_oc, w_oc, a_oc, u_oc) * p_avail_oc(i);
//vector<Type> q = 1 - distance_prob(key_oc, sigma_oc(i), Type(0), survey,
// db_oc, w_oc, a_oc, u_oc);
Expand Down
86 changes: 86 additions & 0 deletions tests/testthat/test_IDS.R
Original file line number Diff line number Diff line change
Expand Up @@ -206,3 +206,89 @@ test_that("IDS handles missing values", {
))

})

test_that("Hazard-rate works with IDS", {

# Simulate IDS dataset
set.seed(123)

# First simulate distance sampling data
# Distance sampling sites
Mds <- 300
elev <- rnorm(Mds)
# Distance breaks
db <- seq(0, 0.3, length.out=7)
# blank unmarked frame
umf_ds <- unmarkedFrameDS(y = matrix(NA, Mds, length(db)-1),
siteCovs = data.frame(elev = elev),
survey="point", dist.breaks = db, unitsIn='km')

# True coefficient values
cf <- list(state = c(3, -0.5), # abundance intercept and effect of elevation
det = -2.5, # detection sigma - exp(-2.5) = 0.08
scale = log(2)) # detection hazard rate scale = 2

# Simulate response
umf_ds <- simulate(umf_ds, formula=~1~elev, coefs=cf,
keyfun = "hazard", output = "density", unitsOut = "kmsq")[[1]]

# Fit regular distance sampling model as test
mod <- distsamp(~1~elev, umf_ds, keyfun="hazard", output="density", unitsOut = "kmsq")
mod

# good correspondance
expect_equivalent(coef(mod), c(2.8913, -0.4381, -2.4697, 0.60489), tol=1e-4)

# "Point count" sites
# simulate these as 1-bin distance
Mpc <- 500
elev2 <- rnorm(Mpc)
max_dist <- 0.5
db2 <- c(0, max_dist) # single bin

# blank unmarked frame
umf_pc <- unmarkedFrameDS(y = matrix(NA, Mpc, 1), # single column in y
siteCovs = data.frame(elev = elev2),
survey = "point", dist.breaks=db2, unitsIn="km")

# simulate response (same coefficients)
umf_pc <- simulate(umf_pc, formula=~1~elev, coefs=cf,
keyfun = "hazard", output = "density", unitsOut = "kmsq")[[1]]


# convert to unmarkedFramePcount
umf_pc2 <- unmarkedFramePCount(y = umf_pc@y,
siteCovs = umf_pc@siteCovs)

# Fit IDS model (in this case: common abundance and detection processes,
# and common duration)
mod2 <- expect_warning(IDS(lambdaformula = ~elev, detformulaDS = ~1,
dataDS = umf_ds, dataPC = umf_pc2, keyfun = "hazard",
unitsOut = "kmsq", maxDistPC = 0.5))

# similar to just distance sampling data
expect_equivalent(coef(mod2),
c(2.8419,-0.5070, -2.4420, 0.6313), tol=1e-5)
#cbind(true=unlist(cf), est=coef(mod2))

# Check with separate formulas
mod3 <- expect_warning(IDS(lambdaformula = ~elev, detformulaDS = ~1, detformulaPC = ~1,
dataDS = umf_ds, dataPC = umf_pc2, keyfun = "hazard",
unitsOut = "kmsq", maxDistPC = 0.5))

# Also make sure the coefs are in the right order: lam, then all sigma, then all haz scales
expect_equal(coef(mod3),
c(`lam(Int)` = 2.86807, `lam(elev)` = -0.50746,
`ds(Int)` = -2.47106, `pc(Int)` = -3.37866,
`ds_scale(Int)` = 0.60447, `pc_scale(Int)` = -0.02817), tol=1e-5)

# estimation doesn't really work for this one
expect_warning(se <- SE(mod3))
expect_true(is.nan(se["pc(Int)"]))
expect_true(is.nan(se["pc_scale(Int)"]))

# Make sure different formulas for sigma work
mod4 <- expect_warning(IDS(lambdaformula = ~1, detformulaDS = ~elev, detformulaPC = ~1,
dataDS = umf_ds, dataPC = umf_pc2, keyfun = "hazard",
unitsOut = "kmsq", maxDistPC = 0.5))
})

0 comments on commit 537ba18

Please sign in to comment.