Skip to content

Commit

Permalink
Merge pull request #17 from nimble-dev/Nmix_efficient
Browse files Browse the repository at this point in the history
Add fix for rounding errors in extreme edge cases w many replicates
  • Loading branch information
dochvam authored Sep 25, 2020
2 parents 355f63d + d00abeb commit d4eba34
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 81 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
Package: nimbleEcology
Type: Package
Title: Distributions for Ecological Models in 'nimble'
Version: 0.3.0
Version: 0.3.1
Maintainer: Benjamin R. Goldstein <ben.goldstein@berkeley.edu>
Authors@R: c(person("Benjamin R.", "Goldstein", role = c("aut", "cre"),
email = "ben.goldstein@berkeley.edu"),
person("Daniel", "Turek", role = "aut"),
person("Lauren", "Ponisio", role = "aut"),
person("Perry", "de Valpine", role = "aut"))
Date: 2020-05-27
Date: 2020-09-25
Description: Common ecological distributions for 'nimble' models in the form of nimbleFunction objects.
Includes Cormack-Jolly-Seber, occupancy, dynamic occupancy, hidden Markov, dynamic hidden Markov, and N-mixture models.
(Jolly (1965) <DOI: 10.2307/2333826>, Seber (1965) <DOI: 10.2307/2333827>, Turek et al. (2016) <doi:10.1007/s10651-016-0353-z>).
Expand Down
186 changes: 107 additions & 79 deletions R/dNmixture.R
Original file line number Diff line number Diff line change
Expand Up @@ -159,56 +159,70 @@ dNmixture_v <- nimbleFunction(
Nmax = double(0, default = -1),
len = double(),
log = integer(0, default = 0)) {
if (length(x) != len) stop("in dNmixture_v, len must equal length(x).")
if (length(prob) != len) stop("in dNmixture_v, len must equal length(prob).")
if (length(x) != len) stop("in dNmixture_v, len must equal length(x).")
if (len != length(prob)) stop("in dNmixture_v, len must equal length(prob).")

# Lambda cannot be negative
if (lambda < 0) {
if (log) return(-Inf)
else return(0)
}
# Lambda cannot be negative
if (lambda < 0) {
if (log) return(-Inf)
else return(0)
}

## For each x, the conditional distribution of (N - x | x) is pois(lambda * (1-p))
## We determine the lowest N and highest N at extreme quantiles and sum over those.
if (Nmin == -1) {
Nmin <- min(x + qpois(0.00001, lambda * (1 - prob)))
}
if (Nmax == -1) {
Nmax <- max(x + qpois(0.99999, lambda * (1 - prob)))
}
Nmin <- max( max(x), Nmin ) ## set Nmin to at least the largest x
## For each x, the conditional distribution of (N - x | x) is pois(lambda * (1-p))
## We determine the lowest N and highest N at extreme quantiles and sum over those.
if (Nmin == -1) {
Nmin <- min(x + qpois(0.00001, lambda * (1 - prob)))
}
if (Nmax == -1) {
Nmax <- max(x + qpois(0.99999, lambda * (1 - prob)))
}
Nmin <- max( max(x), Nmin ) ## set Nmin to at least the largest x

logProb <- -Inf
logProb <- -Inf

if (Nmax > Nmin) {
numN <- Nmax - Nmin + 1 - 1 ## remember: +1 for the count, but -1 because the summation should run from N = maxN to N = minN + 1
prods <- rep(0, numN)
for (i in (Nmin + 1):Nmax) {
prods[i - Nmin] <- prod(i/(i - x)) / i
}
if (Nmax > Nmin) {
numN <- Nmax - Nmin + 1 - 1 ## remember: +1 for the count, but -1 because the summation should run from N = maxN to N = minN + 1
prods <- rep(0, numN)
for (i in (Nmin + 1):Nmax) {
prods[i - Nmin] <- prod(i/(i - x)) / i
}

ff <- log(lambda) + log(prod(1-prob)) + log(prods)
ff_g1 <- ff[ff > 0] # largest element is the length(ff_g1)th product
max_index <- length(ff_g1)
ff <- log(lambda) + sum(log(1-prob)) + log(prods)
i <- 1
sum_ff_g1 <- 0
while(i < numN & ff[i] > 0) {
sum_ff_g1 <- sum_ff_g1 + ff[i]
i <- i+1
}
max_index <- i-1
if(ff[i] > 0) max_index <- i
if(max_index == 0) max_index <- 1 # not sure this is relevant. it's defensive.

terms <- rep(0, numN + 1)
terms[max_index + 1] <- 1
terms <- numeric(numN + 1)
terms[max_index + 1] <- 1

for (i in 1:max_index) {
terms[i] <- 1 / exp(sum(ff[i:max_index]))
}
for (i in (max_index + 1):numN) {
terms[i + 1] <- exp(sum(ff[(max_index + 1):i]))
}
sumff <- sum_ff_g1 ## should be the same as sum(ff[1:max_index])

log_fac <- sum(ff_g1) + log(sum(terms)) # Final factor is the largest term * (all factors / largest term)
for (i in 1:max_index) {
# terms[i] <- 1 / exp(sum(ff[i:max_index]))
terms[i] <- 1 / exp(sumff)
sumff <- sumff - ff[i]
}

logProb <- dpois(Nmin, lambda, log = TRUE) + sum(dbinom(x, size = Nmin, prob = prob, log = TRUE)) + log_fac
sumff <- 0
for (i in (max_index + 1):numN) {
# terms[i + 1] <- exp(sum(ff[(max_index + 1):i]))
sumff <- sumff + ff[i]
terms[i + 1] <- exp(sumff)
}
if (log) return(logProb)
else return(exp(logProb))
returnType(double())
})

log_fac <- sum_ff_g1 + log(sum(terms)) # Final factor is the largest term * (all factors / largest term) }
logProb <- dpois(Nmin, lambda, log = TRUE) + sum(dbinom(x, size = Nmin, prob = prob, log = TRUE)) + log_fac
}
if (log) return(logProb)
else return(exp(logProb))
returnType(double())
})

NULL
#' @rdname dNmixture
Expand All @@ -221,55 +235,69 @@ dNmixture_s <- nimbleFunction(
Nmax = double(0, default = -1),
len = double(),
log = integer(0, default = 0)) {
if (length(x) != len) stop("in dNmixture_s, len must equal length(x).")
if (length(x) != len) stop("in dNmixture_s, len must equal length(x).")

# Lambda cannot be negative
if (lambda < 0) {
if (log) return(-Inf)
else return(0)
}
# Lambda cannot be negative
if (lambda < 0) {
if (log) return(-Inf)
else return(0)
}

## For each x, the conditional distribution of (N - x | x) is pois(lambda * (1-p))
## We determine the lowest N and highest N at extreme quantiles and sum over those.
if (Nmin == -1) {
Nmin <- min(x + qpois(0.00001, lambda * (1 - prob)))
}
if (Nmax == -1) {
Nmax <- max(x + qpois(0.99999, lambda * (1 - prob)))
}
Nmin <- max( max(x), Nmin ) ## set Nmin to at least the largest x
## For each x, the conditional distribution of (N - x | x) is pois(lambda * (1-p))
## We determine the lowest N and highest N at extreme quantiles and sum over those.
if (Nmin == -1) {
Nmin <- min(x + qpois(0.00001, lambda * (1 - prob)))
}
if (Nmax == -1) {
Nmax <- max(x + qpois(0.99999, lambda * (1 - prob)))
}
Nmin <- max( max(x), Nmin ) ## set Nmin to at least the largest x

logProb <- -Inf
logProb <- -Inf

if (Nmax > Nmin) {
numN <- Nmax - Nmin + 1 - 1 ## remember: +1 for the count, but -1 because the summation should run from N = maxN to N = minN + 1
prods <- rep(0, numN)
for (i in (Nmin + 1):Nmax) {
prods[i - Nmin] <- prod(i/(i - x)) / i
}
if (Nmax > Nmin) {
numN <- Nmax - Nmin + 1 - 1 ## remember: +1 for the count, but -1 because the summation should run from N = maxN to N = minN + 1
prods <- rep(0, numN)
for (i in (Nmin + 1):Nmax) {
prods[i - Nmin] <- prod(i/(i - x)) / i
}

ff <- log(lambda) + log(1-prob)*len + log(prods)
ff_g1 <- ff[ff > 0] # largest element is the length(ff_g1)th product
max_index <- length(ff_g1)
ff <- log(lambda) + log(1-prob)*len + log(prods)
i <- 1
sum_ff_g1 <- 0
while(i < numN & ff[i] > 0) {
sum_ff_g1 <- sum_ff_g1 + ff[i]
i <- i+1
}
max_index <- i-1
if(ff[i] > 0) max_index <- i
if(max_index == 0) max_index <- 1 # not sure this is relevant. it's defensive.

terms <- rep(0, numN + 1)
terms[max_index + 1] <- 1
terms <- numeric(numN + 1)
terms[max_index + 1] <- 1

for (i in 1:max_index) {
terms[i] <- 1 / exp(sum(ff[i:max_index]))
}
for (i in (max_index + 1):numN) {
terms[i + 1] <- exp(sum(ff[(max_index + 1):i]))
}
sumff <- sum_ff_g1 ## should be the same as sum(ff[1:max_index])

log_fac <- sum(ff_g1) + log(sum(terms)) # Final factor is the largest term * (all factors / largest term)
for (i in 1:max_index) {
# terms[i] <- 1 / exp(sum(ff[i:max_index]))
terms[i] <- 1 / exp(sumff)
sumff <- sumff - ff[i]
}

logProb <- dpois(Nmin, lambda, log = TRUE) + sum(dbinom(x, size = Nmin, prob = prob, log = TRUE)) + log_fac
sumff <- 0
for (i in (max_index + 1):numN) {
# terms[i + 1] <- exp(sum(ff[(max_index + 1):i]))
sumff <- sumff + ff[i]
terms[i + 1] <- exp(sumff)
}
if (log) return(logProb)
else return(exp(logProb))
returnType(double())
})

log_fac <- sum_ff_g1 + log(sum(terms)) # Final factor is the largest term * (all factors / largest term) }
logProb <- dpois(Nmin, lambda, log = TRUE) + sum(dbinom(x, size = Nmin, prob = prob, log = TRUE)) + log_fac
}
if (log) return(logProb)
else return(exp(logProb))
returnType(double())
})

NULL
#' @rdname dNmixture
Expand Down
5 changes: 5 additions & 0 deletions inst/NEWS
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
Release notes for each version of nimbleEcologyy

0.3.1
-- Fixed rounding errors in edge cases of dNmixture with many observations.
Thanks to NIMBLE developer Chris Paciorek for the algorithm.


0.3.0

-- Fixed an internal problem with dHMM and dDHMM resulting in a mismatch between documentation and internal behavior
Expand Down

0 comments on commit d4eba34

Please sign in to comment.