Add support for hazard rate key function in IDS (#35)

* Allow hazard key function; not organized in output yet * Output processing should be working now * Handle 1-column data in distsamp * Add tests and fix param ordering bug * Add warning Fixes #33
biodiverse · Jan 28, 2025 · 537ba18 · 537ba18
1 parent c2451fc
commit 537ba18
Show file tree

Hide file tree

Showing 5 changed files with 173 additions and 12 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: unmarked
-Version: 1.4.3.9007
-Date: 2025-01-09
+Version: 1.4.3.9008
+Date: 2025-01-28
 Type: Package
 Title: Models for Data from Unmarked Animals
 Authors@R: c(

diff --git a/R/IDS.R b/R/IDS.R
@@ -97,8 +97,11 @@ IDS <- function(lambdaformula = ~1,
   stopifnot(is.null(durationOC) || (length(durationOC) == numSites(dataOC)))
   surveyDurations <- list(ds=durationDS, pc=durationPC, oc=durationOC)
 
-  stopifnot(keyfun %in% c("halfnorm", "exp"))
-  keyidx <- switch(keyfun, "halfnorm"={1}, "exp"={2})
+  stopifnot(keyfun %in% c("halfnorm", "exp", "hazard"))
+  if(keyfun == "hazard"){
+    warning("Support for hazard key function is experimental and may yield poor estimates.", call.=FALSE)
+  }
+  keyidx <- switch(keyfun, "halfnorm"={1}, "exp"={2}, "hazard"={3})
 
   if(missing(maxDistPC)) maxDistPC <- max(dataDS@dist.breaks)
   if(missing(maxDistOC)) maxDistOC <- max(dataDS@dist.breaks)
@@ -153,8 +156,7 @@ IDS <- function(lambdaformula = ~1,
     kmsq = lam_adjust <- lam_adjust)
 
   # Parameter stuff------------------------------------------------------------
-  # Doesn't support hazard
-  pind_mat <- matrix(0, nrow=5, ncol=2)
+  pind_mat <- matrix(0, nrow=8, ncol=2)
   pind_mat[1,] <- c(1, ncol(gd_hds$X))
   pind_mat[2,] <- max(pind_mat) + c(1, ncol(gd_hds$V))
   if(!is.null(detformulaPC) & !is.null(dataPC)){
@@ -166,20 +168,37 @@ IDS <- function(lambdaformula = ~1,
   if(has_avail){
     pind_mat[5,] <- max(pind_mat) + c(1, ncol(Xavail_ds))
   }
+  # Hazard-rate scale parameter(s)
+  if(keyfun == "hazard"){
+    pind_mat[6,] <- max(pind_mat) + 1
+    if(!is.null(detformulaPC) & !is.null(dataPC)){
+      pind_mat[7,] <- max(pind_mat) + 1
+    }
+    if(!is.null(detformulaOC) & !is.null(dataOC)){
+      pind_mat[8,] <- max(pind_mat) + 1
+    }
+  }
 
   if(is.null(starts)){
     lam_init <- log(mean(apply(dataDS@y, 1, sum, na.rm=TRUE)) / lam_adjust[1])
     params_tmb <- list(beta_lam = c(lam_init, rep(0, ncol(gd_hds$X)-1)),
                      beta_hds = c(log(median(dataDS@dist.breaks)),rep(0, ncol(gd_hds$V)-1)),
                      beta_pc = rep(0,0),
                      beta_oc = rep(0,0),
-                     beta_avail = rep(0,0))
+                     beta_avail = rep(0,0),
+                     beta_schds = rep(0,0),  # hazard rate scale inits
+                     beta_scpc = rep(0,0), 
+                     beta_scoc = rep(0,0))
 
+    if(keyfun == "hazard") params_tmb$beta_schds <- 0
+
     if(!is.null(detformulaPC) & !is.null(dataPC)){
       params_tmb$beta_pc <- c(log(maxDistPC/2), rep(0, ncol(gd_pc$V)-1))
+      if(keyfun == "hazard") params_tmb$beta_scpc <- 0
     }
     if(!is.null(detformulaOC) & !is.null(dataOC)){
       params_tmb$beta_oc <- c(log(maxDistOC/2), rep(0, ncol(gd_oc$V)-1))
+      if(keyfun == "hazard") params_tmb$beta_scoc <- 0
     }
     if(has_avail){
       params_tmb$beta_avail <- rep(0, ncol(Xavail_ds))
@@ -193,13 +212,20 @@ IDS <- function(lambdaformula = ~1,
                      beta_hds = starts[pind_mat[2,1]:pind_mat[2,2]],
                      beta_pc = rep(0,0),
                      beta_oc = rep(0,0),
-                     beta_avail = rep(0,0))
+                     beta_avail = rep(0,0),
+                     beta_schds = rep(0,0),  # hazard rate scale inits
+                     beta_scpc = rep(0,0), 
+                     beta_scoc = rep(0,0))
+
+    if(keyfun == "hazard") params_tmb$beta_schds <- pind_mat[6,1]
 
     if(!is.null(detformulaPC) & !is.null(dataPC)){
       params_tmb$beta_pc <- starts[pind_mat[3,1]:pind_mat[3,2]]
+      if(keyfun == "hazard") params_tmb$beta_scpc <- starts[pind_mat[7,1]]
     }
     if(!is.null(detformulaOC) & !is.null(dataOC)){
       params_tmb$beta_oc <- starts[pind_mat[4,1]:pind_mat[4,2]]
+      if(keyfun == "hazard") params_tmb$beta_scoc <- starts[pind_mat[8,1]]
     }
     if(has_avail){
       params_tmb$beta_avail <- starts[pind_mat[5,1]:pind_mat[5,2]]
@@ -283,6 +309,30 @@ IDS <- function(lambdaformula = ~1,
     est_list <- c(est_list, list(phi=avail_est))
   }
 
+  if(keyfun == "hazard"){
+    ds_haz_coef <- get_coef_info(sdr, "schds", "(Intercept)", pind_mat[6,1]:pind_mat[6,2])
+    ds_est_haz <- unmarkedEstimate("Distance sampling scale", short.name="ds_scale",
+      estimates = ds_haz_coef$ests[1], covMat = ds_haz_coef$cov, fixed=1,
+      invlink="exp", invlinkGrad="exp")
+    est_list <- c(est_list, list(schds=ds_est_haz))
+
+    if(!is.null(detformulaPC) & !is.null(dataPC)){
+      pc_haz_coef <- get_coef_info(sdr, "scpc", "(Intercept)", pind_mat[7,1]:pind_mat[7,2])
+      pc_est_haz <- unmarkedEstimate("Point count scale", short.name="pc_scale",
+        estimates = pc_haz_coef$ests[1], covMat = pc_haz_coef$cov, fixed=1,
+        invlink="exp", invlinkGrad="exp")
+      est_list <- c(est_list, list(scpc=pc_est_haz))
+    }
+
+    if(!is.null(detformulaOC) & !is.null(dataOC)){
+      oc_haz_coef <- get_coef_info(sdr, "scoc", "(Intercept)", pind_mat[8,1]:pind_mat[8,2])
+      oc_est_haz <- unmarkedEstimate("Presence/absence scale", short.name="oc_scale",
+        estimates = oc_haz_coef$ests[1], covMat = oc_haz_coef$cov, fixed=1,
+        invlink="exp", invlinkGrad="exp")
+      est_list <- c(est_list, list(scoc=oc_est_haz))
+    }
+  }
+
   est_list <- unmarkedEstimateList(est_list)
 
   new("unmarkedFitIDS", fitType = "IDS", call = match.call(),

diff --git a/R/distsamp.R b/R/distsamp.R
@@ -44,8 +44,11 @@ distsamp <- function(formula, data,
         point = {
             for(i in 1:M) {
                 a[i, 1] <- pi*db[2]^2
-                for(j in 2:J)
+                if(J > 1){
+                  for(j in 2:J){
                     a[i, j] <- pi*db[j+1]^2 - sum(a[i, 1:(j-1)])
+                  }
+                }
                 u[i,] <- a[i,] / sum(a[i,])
                 }
             })

diff --git a/src/TMB/tmb_IDS.hpp b/src/TMB/tmb_IDS.hpp
@@ -48,6 +48,14 @@ Type tmb_IDS(objective_function<Type>* obj) {
   PARAMETER_VECTOR(beta_pc);
   PARAMETER_VECTOR(beta_oc);
   PARAMETER_VECTOR(beta_avail);
+  PARAMETER_VECTOR(beta_schds); // hazard-rate scales
+  PARAMETER_VECTOR(beta_scpc);
+  PARAMETER_VECTOR(beta_scoc);
+
+  Type scale_hds = 0;
+  if(key_hds == 3){
+    scale_hds = exp(beta_schds(0));
+  }
 
   int survey = 1; // Only point counts supported
 
@@ -74,7 +82,7 @@ Type tmb_IDS(objective_function<Type>* obj) {
 
   for (int i=0; i<M; i++){
 
-    vector<Type> cp = distance_prob(key_hds, sigma_hds(i), Type(0), survey,
+    vector<Type> cp = distance_prob(key_hds, sigma_hds(i), scale_hds, survey,
                                     db_hds, w_hds, a_hds, u_hds);
 
     for (int j=0; j<J; j++){
@@ -95,10 +103,17 @@ Type tmb_IDS(objective_function<Type>* obj) {
 
   //vector<Type> sigma_pc = V_pc * beta_pc;
   vector<Type> sigma_pc;
+  Type scale_pc = 0;
   if(beta_pc.size() > 0){
     sigma_pc = V_pc * beta_pc;
+    if(key_hds == 3){
+      scale_pc = exp(beta_scpc(0));
+    }
   } else{
     sigma_pc = V_pc * beta_hds;
+    if(key_hds == 3){
+      scale_pc = scale_hds;
+    }
   }
   sigma_pc = exp(sigma_pc);
 
@@ -111,7 +126,7 @@ Type tmb_IDS(objective_function<Type>* obj) {
   }
 
   for (int i=0; i<M; i++){
-    vector<Type> cp = distance_prob(key_pc, sigma_pc(i), Type(0), survey,
+    vector<Type> cp = distance_prob(key_pc, sigma_pc(i), scale_pc, survey,
                                     db_pc, w_pc, a_pc, u_pc);
     loglik -= dpois(y_pc(i,0), lam_pc(i) * cp(0) * p_avail_pc(i), true);
   }
@@ -131,10 +146,17 @@ Type tmb_IDS(objective_function<Type>* obj) {
 
   //vector<Type> sigma_oc = V_oc * beta_oc;
   vector<Type> sigma_oc;
+  Type scale_oc = 0;
   if(beta_oc.size() > 0){
     sigma_oc = V_oc * beta_oc;
+    if(key_hds == 3){
+      scale_oc = exp(beta_scoc(0));
+    }
   } else{
     sigma_oc = V_oc * beta_hds;
+    if(key_hds == 3){
+      scale_oc = scale_hds;
+    }
   }
   sigma_oc = exp(sigma_oc);
 
@@ -153,7 +175,7 @@ Type tmb_IDS(objective_function<Type>* obj) {
 
   for (int i=0; i<M; i++){
 
-    vector<Type> q = 1 - distance_prob(key_oc, sigma_oc(i), Type(0), survey,
+    vector<Type> q = 1 - distance_prob(key_oc, sigma_oc(i), scale_oc, survey,
                                     db_oc, w_oc, a_oc, u_oc) * p_avail_oc(i);
     //vector<Type> q = 1 - distance_prob(key_oc, sigma_oc(i), Type(0), survey,
     //                                db_oc, w_oc, a_oc, u_oc);

diff --git a/tests/testthat/test_IDS.R b/tests/testthat/test_IDS.R
@@ -206,3 +206,89 @@ test_that("IDS handles missing values", {
   ))
 
 })
+
+test_that("Hazard-rate works with IDS", {
+
+  # Simulate IDS dataset
+  set.seed(123)
+
+  # First simulate distance sampling data
+  # Distance sampling sites
+  Mds <- 300
+  elev <- rnorm(Mds)
+  # Distance breaks
+  db <- seq(0, 0.3, length.out=7)
+  # blank unmarked frame
+  umf_ds <- unmarkedFrameDS(y = matrix(NA, Mds, length(db)-1),
+                            siteCovs = data.frame(elev = elev), 
+                            survey="point", dist.breaks = db, unitsIn='km')
+
+  # True coefficient values
+  cf <- list(state = c(3, -0.5), # abundance intercept and effect of elevation 
+            det = -2.5,         # detection sigma - exp(-2.5) = 0.08 
+            scale = log(2))     # detection hazard rate scale = 2
+
+  # Simulate response
+  umf_ds <- simulate(umf_ds, formula=~1~elev, coefs=cf,
+                     keyfun = "hazard", output = "density", unitsOut = "kmsq")[[1]]
+
+  # Fit regular distance sampling model as test
+  mod <- distsamp(~1~elev, umf_ds, keyfun="hazard", output="density", unitsOut = "kmsq")
+  mod
+
+  # good correspondance
+  expect_equivalent(coef(mod), c(2.8913, -0.4381, -2.4697, 0.60489), tol=1e-4)
+
+  # "Point count" sites
+  # simulate these as 1-bin distance
+  Mpc <- 500
+  elev2 <- rnorm(Mpc)
+  max_dist <- 0.5
+  db2 <- c(0, max_dist) # single bin
+
+  # blank unmarked frame
+  umf_pc <- unmarkedFrameDS(y = matrix(NA, Mpc, 1), # single column in y
+                          siteCovs = data.frame(elev = elev2),
+                          survey = "point", dist.breaks=db2, unitsIn="km")
+
+  # simulate response (same coefficients)
+  umf_pc <- simulate(umf_pc, formula=~1~elev, coefs=cf,
+                   keyfun = "hazard", output = "density", unitsOut = "kmsq")[[1]]
+
+
+  # convert to unmarkedFramePcount
+  umf_pc2 <- unmarkedFramePCount(y = umf_pc@y,
+                                siteCovs = umf_pc@siteCovs)
+
+  # Fit IDS model (in this case: common abundance and detection processes,
+  # and common duration)
+  mod2 <- expect_warning(IDS(lambdaformula = ~elev, detformulaDS = ~1,
+              dataDS = umf_ds, dataPC = umf_pc2, keyfun = "hazard", 
+              unitsOut = "kmsq", maxDistPC = 0.5))
+
+  # similar to just distance sampling data
+  expect_equivalent(coef(mod2),
+                    c(2.8419,-0.5070, -2.4420, 0.6313), tol=1e-5)
+  #cbind(true=unlist(cf), est=coef(mod2))
+
+  # Check with separate formulas
+  mod3 <- expect_warning(IDS(lambdaformula = ~elev, detformulaDS = ~1, detformulaPC = ~1,
+              dataDS = umf_ds, dataPC = umf_pc2, keyfun = "hazard", 
+              unitsOut = "kmsq", maxDistPC = 0.5))
+
+  # Also make sure the coefs are in the right order: lam, then all sigma, then all haz scales
+  expect_equal(coef(mod3),
+                    c(`lam(Int)` = 2.86807, `lam(elev)` = -0.50746, 
+                      `ds(Int)` = -2.47106, `pc(Int)` = -3.37866, 
+                      `ds_scale(Int)` = 0.60447, `pc_scale(Int)` = -0.02817), tol=1e-5)
+
+  # estimation doesn't really work for this one
+  expect_warning(se <- SE(mod3))
+  expect_true(is.nan(se["pc(Int)"]))
+  expect_true(is.nan(se["pc_scale(Int)"]))
+
+  # Make sure different formulas for sigma work
+  mod4 <- expect_warning(IDS(lambdaformula = ~1, detformulaDS = ~elev, detformulaPC = ~1,
+              dataDS = umf_ds, dataPC = umf_pc2, keyfun = "hazard", 
+              unitsOut = "kmsq", maxDistPC = 0.5))
+})