Skip to content

Commit

Permalink
Revert "Delete phylo.d_wrapper.R"
Browse files Browse the repository at this point in the history
This reverts commit 593c2f1.
  • Loading branch information
HedvigS committed Jan 16, 2024
1 parent 593c2f1 commit b4ebdbb
Showing 1 changed file with 73 additions and 0 deletions.
73 changes: 73 additions & 0 deletions R/phylo.d_wrapper.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#' Wrapped function to caper::phylo.d which performs some important sanity checks.
#'
#' @param data A list of the class "comparative.data". Created by `caper::comparative.data`.
#' @param var.name Character vector. The name of the variable in `data` holding the binary variable of interest. This will be passed to `phylo.d` and passed (as a symbol) to the argument `binvar`.
#' @param ... Additional parameters passed to `phylo.d`.
#' @author wrapper: Hedvig Skirgård and Siva Kalyan; original caper::phylo.d-function: Susanne Fritz and David Orme.
#' @return If no error occurs, the function returns an object of class 'phylo.d' just as `caper::phylo.d`. See `?caper::phylo.d` for more details. If the wrapper-function finds problem, it will trigger error messages.
#' @export
#'

phylo.d_wrapper <- function(data, var.name, ...) {
print(match.call())
if ("binvar" %in% names(match.call())) {
stop("Use the 'var.name' argument instead of 'binvar'. Note that 'var.name' must be a string, not a symbol.")
}

if (!(var.name %in% colnames(data$data))) {
stop(paste0("Cannot find a column named '", var.name, "' in the data.\n"))
}

#count the proportion of the less frequent value in the column
min_prop <- data$data[[var.name]] %>%
table() %>% `/`(sum(.)) %>% min()

if (min_prop < 0.05) {
stop(
"The distribution of tips over the two binary values is too skewed; fewer than 5% of tips are in the minority state. Applying D-estimate calculation to such skewed data can generate unreliable results.\n"
)
}

result <-
eval(substitute(
caper::phylo.d(data = data, binvar = this_var, ...),
list(this_var = as.name(var.name))
))

if (result$Pval0 > 0.05 &
result$Pval1 > 0.05) {
stop(
"Brownian and random simulations are not sufficiently distinct from each other to provide a meaningful D-estimate; observed values appear to be explainable under either model (pval0 > 0.05 & pval1 > 0.05)."
)
}


if (result$Pval0 > 0.05 &
result$Pval1 < 0.05) {
cat(
"Observed values are definitely on the Brownian/clumped end of the spectrum (pval0 > 0.05 & pval1 < 0.05).\n"
)
}
else if (result$Pval0 < 0.05 &
result$Pval1 > 0.05) {
cat(
"Observed values are definitely on the random/overdispersed end of the spectrum (pval0 < 0.05 & pval1 > 0.05).\n"
)
}
else if (result$Pval0 < 0.05 &
result$Pval1 < 0.05) {
cat(
"Observed values are definitely between Brownian/clumped and random/over-dispersed (pval0 < 0.05 & pval1 < 0.05).\n"
)
}


if (result$DEstimate < -7) {
warning("The resulting D-estimate is lower than 7. This isn't impossible, but unusual.\n")
}
else if (result$DEstimate > 7) {
warning("The resulting D-estimate is higher than 7. This isn't impossible, but unusual.\n")
}

result
}

0 comments on commit b4ebdbb

Please sign in to comment.