LatentClassAnalysisFTAF.R

#!/usr/bin/env Rscript
# ============================================================================ #
# LCA - Philippines Diagnostics: By Frederick T. A. Freeth          26/01/2025 |
# ============================================================================ #

# ---- Installing Packages ----
if(!require("rjags")){install.packages("rjags"); library(rjags)}
if(!require("runjags")){install.packages("runjags"); library(runjags)}
if(!require("cowplot")){install.packages("cowplot"); library(cowplot)}
if(!require("ggplot2")){install.packages("ggplot2"); library(ggplot2)}
if(!require("MCMCpack")){install.packages("MCMCpack"); library(MCMCpack)}
if(!require("compiler")){install.packages("compiler"); library(compiler)}
if(!require("bayestestR")){install.packages("bayestestR"); library(bayestestR)}
if(!require("rstudioapi")){install.packages("rstudioapi"); library(rstudioapi)}

# ---- Set File Location as Working Directory, Enable JIT, and Set Seeds ----
setwd(dirname(getActiveDocumentContext()$path))
cat("Your working directory has been set to the location of this R file:\n")
cat(getwd(),"\n")
enableJIT(3) # Just-In-Time Compiler
set.seed(314159) # Set the seed

# ---- Data Importing ----
# We add check.names = FALSE since if TRUE, the hyphens are changed to periods.
Human_Data  <- read.csv("Human_Data.csv", check.names = FALSE)
Animal_Data <- read.csv("Animal_Data.csv", check.names = FALSE)

# ---- Plotting Functions ----
# Plot the prevalence of the real data vs the JAGS models.
plotPrev <- function(host, prevSim, prevReal, diagnostics, parasites, PI){
  # host:        A string of the host name. One of Humans, Pigs, Cats and Dogs,
  #              or Water Buffalo and Cattle. Spelling and capitalisation must
  #              be exactly as the same as these.
  # prevSim:     Simulated prevalence generated by analysisHuman or analysisAnimal
  # prevReal:    The prevalence found in the real-world data.
  # diagnostics: A character vector of strings of the diagnotics. The following
  #              are used: Kato-Katz, FEACT, Sedimentation, McMaster, and Flotation
  # parasites:   A list of the parasites to plot. These come from the vectors
  #              parasitesH and parasitesA, which derive from the raw data.
  # PI:          Acronym for "Plot Individually". When TRUE, it prevents the
  #              legend from being plotted and saving to an .svg, so you can
  #              then plot four prevalence plots in a grid without duplicated
  #              legends.
  prevReal <- 100*prevReal
  prevSim  <- 100*prevSim
  diagnostics <- c(diagnostics, "Simulated") # Adding a col for simulated prev
  dx  =  0.06 # The increments as to which legend boxes are translated by in x
  dy  =  0.10 # The increments as to which legend boxes are translated by in y
  nParasites = length(parasites) # Number of parasites for THIS host
  nDiagnostics = length(diagnostics) # Number of diagnostics for THIS host
  tDiagnostics = unique(c(diagnosticsH, diagnosticsA, "Simulated")) # Total diagnostics used
  ntDiagnostics = length(tDiagnostics) # Total number of diagnostics for Human/animal
  tParasites <- unique(c(parasitesH, parasitesA)) # Total parasites in the study
  ntParasites <- length(tParasites) # Total number of parasites in the study
  cls <- c("red", "blue", "orange", "green3", "purple", "black")
  shapes <- c(15, 16, 17, 18) # Filled shapes start at 15
  prevSim_CI <- sapply(as.data.frame(t(prevSim)), FUN = bayestestR::ci, ci = 0.95) # Ignores NAs
  prevSim_mean <- rowMeans(prevSim, na.rm = TRUE) # Ignores NAs when na.rm == TRUE
  if(PI == TRUE){
    extra_margin = 13 # Default extra space to add legends.
    svg(file=paste0("Prev_Real_and_Sim_",gsub(" ","_",host),".svg"), width=8, height=6)
  }else{extra_margin = 1} # Remove the extra space for the legend for panel plots.
  
  # Offsets the legend layers so the pch characters appear to be in a big grid.
  legendCoords <- lapply(strsplit(paste(1+dx*(1:nDiagnostics), 
                                        rep(dy,nDiagnostics))," "),as.numeric)
  
  # In Pigs, Water Buffalo, and Cattle, Hookworms are actually Strongyles. For
  # Humans and animals, Hookworm and Strongyles are indistinguishable. We first
  # italicise the other parasite names but keep Hookworm/Strongyles unitalicised.
  # parasites[which(parasites == "Hookworm")] <- "Hookworm/Strongyles"
  
  # Reorder the vector c(prevReal, prevSim) so that it is arranged in ordered by
  # prevalence of parasite with the real-world data with the simulated prevalence
  # generated by the JAGS model.
  sortedPrev <- unlist(lapply(1:nParasites, FUN = function(i){
    return(c(prevReal, prevSim_mean)[seq(i, nDiagnostics*nParasites, nParasites)])
  }))
  
  # We want to plot the diagnostics in little clusters around each i, where each
  # i is given in 1:nParasites. This gives enough space between the parasites.
  xPoints <- unlist(lapply(1:nParasites, function(i){seq(i-(nDiagnostics-1)/10,i,0.1)}))
  label_x_points <- rowMeans(matrix(xPoints, nrow = nParasites, byrow = TRUE))
  
  # Plot the real and simulated data
  par(mar=c(4,4,1,extra_margin), mgp=c(2.5,1,0), xpd=T, pty="s", bg="transparent")
  plot(x = xPoints, y = sortedPrev, xlab = "", ylab = "Prevalence (%)",
       ylim = c(0, 60), las = 1, pch = sort(rep(shapes, nDiagnostics)), xaxt = "n",
       col = rep(cls[match(diagnostics, tDiagnostics)], nParasites))
  title(main = host, adj = 0.5, line = -1, cex = 0.7)
  axis(1, at = label_x_points, labels = sapply(parasites, function(p) {
         # In Pigs, Water Buffalo, and Cattle, Hookworms are actually Strongyles.
         # For Humans and animals, Hookworm and Strongyles are indistinguishable.
         # We italicise the other parasite names but keep Hookworm/Strongyles
         # unitalicised.
         if(p == "Hookworm"){return("")} # We'll add "Hookworm/Strongyles" later.
        else{return(parse(text = paste0("italic('", p, "')")))}
       }),
       cex.axis = 0.9
  )
  mtext("Hookworm/\nStrongyles", side = 1, line = 1.8, cex = 0.9,
        at = label_x_points[which(parasites == "Hookworm")]
  )
  
  # Vertical error bars where the bars are the 95% confidence intervals
  arrows(1:nParasites, unlist(prevSim_CI[2,]), # Lower confidence interval
         1:nParasites, unlist(prevSim_CI[3,]), # upper confidence interval
         length = 0.05, angle = 90, code = 3, lwd = 1.5)

  if(PI == TRUE){
    # Build the legend for the real-world data (the coloured points)
    for(diagnostic in diagnostics){
      diagnosticNr <- match(diagnostic, diagnostics)
      text(x = 0.3 + nParasites*(1 + dx*(diagnosticNr - 1)),
           y = 82 - dy, pos = 4, srt = 45, cex = 0.8, adj = 0,
           labels = diagnostics[diagnosticNr])
      # Print the inner most column and parasite text on the legend
      if(diagnosticNr == nDiagnostics){
        legend("topleft", legend = parasites, pch = shapes, title = "", cex = 1,
               col = cls[match(diagnostic, tDiagnostics)], bty = "n",
               inset = unlist(legendCoords[diagnosticNr]), bg = "transparent") 
      }
      # Create the unlabeled columns 
      else{
        legend("topleft", legend = rep("", nParasites), pch = shapes, title = "",
               col = alpha(cls[match(diagnostic, tDiagnostics)],1), cex = 1,
               bty="n", bg="transparent", inset=unlist(legendCoords[diagnosticNr]))
      }
    }
  }
  prevPlot <- recordPlot()
  if(PI == TRUE){dev.off()}
  return(prevPlot)
}

# Plot the sensitivity-prevalence across a host for all diagnostics & parasites.
# This plots all sampled sensitivity-prevalence points by host and diagnostic
plotSP <- function(host, diagnostics, parasites, SPD, PI, simple){
  # host:        A string of the host name. One of Humans, Pigs, Cats and Dogs,
  #              or Water Buffalo and Cattle. Spelling and capitalisation must
  #              be exactly as the same as these.
  # diagnostics: A character vector of strings of the diagnotics. The following
  #              are used: Kato-Katz, FEACT, Sedimentation, McMaster, and Flotation
  # parasites:   A list of the parasites to plot. These come from the vectors
  #              parasitesH and parasitesA, which derive from the raw data.
  # SPD:         A dataframe with sensitivity, specificity and prevalence data.
  #              The specificity is ignored and is subsetted out of SPD.
  # PI:          Acronym for "Plot Individually". When TRUE, it prevents the
  #              legend from being plotted and saving to an .svg, so you can
  #              then plot four sensitivity-prevalence plots in a grid without
  #              duplicated legends.
  # simple:      If simple == TRUE, it only plots the the mean of the data with
  #              error bars in both x and y directions of the data.
  SPD <- SPD[-which(grepl("Specificity", rownames(SPD))),] # Ignore Specificity
  dx  =  0.06 # The increments as to which legend boxes are translated by in x
  dy  =  0.10 # The increments as to which legend boxes are translated by in y
  nParasites = length(parasites) # Number of parasites for THIS host
  nDiagnostics = length(diagnostics) # Number of diagnostics for THIS host
  tDiagnostics = unique(c(diagnosticsH, diagnosticsA)) # Total diagnostics used
  ntDiagnostics = length(tDiagnostics) # Total number of diagnostics for Human/animal
  tParasites <- unique(c(parasitesH, parasitesA)) # Total parasites in the study
  ntParasites <- length(tParasites) # Total number of parasites in the study
  cls <- ggplot2::alpha(c("red","blue","orange","green3","purple"), alpha=0.05)
  shapes <- c(15, 16, 17, 18) # Filled shapes start at 15
  if(PI == TRUE){
    extra_margin = 13 # Default extra space to add legends.
    if(simple){
      svg(file=paste0("Sens_Prev_",gsub(" ","_",host),"_Simplified.svg"), width=8, height=6)
    }else{svg(file=paste0("Sens_Prev_",gsub(" ","_",host),".svg"), width=8, height=6)}
  }else{extra_margin = 1} # Remove the extra space for the legend.
  
  # Offsets the legend layers so the pch characters appear to be in a big grid.
  legendCoords <- lapply(strsplit(paste(1+dx*(1:nDiagnostics), 
                                        rep(dy,nDiagnostics))," "),as.numeric)
  
  par(mar=c(4, 4, 1, extra_margin), mgp = c(2.5,1,0), xpd = T, pty = "s", bg = "transparent")
  plot(NULL, xlim=c(0,60), ylim=c(0,100), ylab = "Diagnostic Sensitivity (%)",
       xlab = paste0("Prevalence in ", host, " (%)"), las = 1)
  
  # We match the colours of the plotted points by the diagnostic and the shape by
  # the parasite
  for(i in seq((nDiagnostics+1), (nDiagnostics+1)*nParasites, nDiagnostics+1)){
    shape = match(parasites[i / (1 + nDiagnostics)], tParasites) # For plotting
    for(d in (i - nDiagnostics):(i - 1)){
      cl = match(diagnostics[d %% (nDiagnostics + 1)], tDiagnostics)
      if(simple){
        # Plot all the mean of the sensitivity-prevalence points of the samples
        # followed by the 95% confidence intervals in both axes directions
        sensCI <- bayestestR::ci(t(100*SPD[d,]), ci = 0.95) # CI for sensitivity - Ignores NAs
        prevCI <- bayestestR::ci(t(100*SPD[i,]), ci = 0.95) # CI for prevalence  - Ignores NAs
        
        # Error bars in the y-axis (sensitivity)
        arrows(x0 = 100*mean(unlist(SPD[i,])), y0 = unlist(sensCI[2]),
               x1 = 100*mean(unlist(SPD[i,])), y1 = unlist(sensCI[3]),
               col = ggplot2::alpha(cls[cl], alpha = 0.4), length = 0.05, angle = 90,
               code = 3, lwd = 1.5)
        
        # Error bars in the x-axis (prevalence). Ignore possible NA values
        arrows(y0 = 100*mean(unlist(SPD[d,]), na.rm=T), x0 = unlist(prevCI[2]),
               y1 = 100*mean(unlist(SPD[d,]), na.rm=T), x1 = unlist(prevCI[3]), 
               col = ggplot2::alpha(cls[cl], alpha = 0.4), length = 0.05, angle = 90,
               code = 3, lwd = 1.5)
        
        # Plot the mean of the sensitivity-prevalence data.
        points(x = 100*mean(unlist(SPD[i,]), na.rm = TRUE), # Ignores NA values.
               y = 100*mean(unlist(SPD[d,]), na.rm = TRUE), # Ignores NA values.
               col = ggplot2::alpha(cls[cl], alpha = 1.0), pch = shapes[shape])
      }
      else{
        # Plot all the sensitivity-prevalence points of the samples. NAs are not plotted.
        points(x = 100*SPD[i,], y = 100*SPD[d,], col = cls[cl], pch=shapes[shape])
      }
    }
  }
  
  if(PI == TRUE){
    # Build the legend
    for(diagnostic in diagnostics){
      diagnosticNr <- match(diagnostic, diagnostics)
      text(x = 110*(1 + dx*(diagnosticNr - 1)), y = 88 - dy, pos = 4, srt = 45,
           labels = diagnostics[diagnosticNr], cex = 0.8, adj = 0)
      # Print the inner most column and parasite text on the legend
      if(diagnosticNr == nDiagnostics){
        legend("topleft", legend = parasites, pch = shapes, title = "", cex = 1,
               col = alpha(cls[match(diagnostic, tDiagnostics)],1), bty = "n",
               inset = unlist(legendCoords[diagnosticNr]), bg = "transparent") 
      }
      # Create the unlabeled columns 
      else{
        legend("topleft", legend = rep("", nParasites), pch = shapes, title = "",
               col = alpha(cls[match(diagnostic, tDiagnostics)],1), cex = 1,
               bty="n", bg="transparent", inset=unlist(legendCoords[diagnosticNr]))
      }
    }
  }
  sensPrevPlot <- recordPlot()
  if(PI == TRUE){dev.off()}
  return(sensPrevPlot)
}

# Plot p_1 and p_0 for each diagnostic and parasite, where p0 = 1 - specificity,
# and p1 = sensitivity. Also known as the Receiver Operating Characteristic (ROC)
plotp1p0 <- function(host, diagnostics, parasites, SSD, PI, simple){
  # host:        A string of the host name. One of Humans, Pigs, Cats and Dogs,
  #              or Water Buffalo and Cattle. Spelling and capitalisation must
  #              be exactly as the same as these.
  # diagnostics: A character vector of strings of the diagnotics. The following
  #              are used: Kato-Katz, FEACT, Sedimentation, McMaster, and Flotation
  # parasites:   A list of the parasites to plot. These come from the vectors
  #              parasitesH and parasitesA, which derive from the raw data.
  # SSD:         A dataframe with sensitivity, specificity and prevalence data.
  #              The prevalence is ignored and is subsetted out of SPD.
  # PI:          Acronym for "Plot Individually". When TRUE, it prevents the
  #              legend from being plotted and saving to an .svg, so you can
  #              then plot four sensitivity-prevalence plots in a grid without
  #              duplicated legends.
  # simple:      If simple == TRUE, it only plots the the mean of the data with
  #              error bars in both x and y directions of the data.
  SSD <- SSD[-which(grepl("Prevalence", rownames(SSD))),] # Ignore Prevalence
  dx  =  0.06 # The increments as to which legend boxes are translated by in x
  dy  =  0.10 # The increments as to which legend boxes are translated by in y
  nParasites = length(parasites) # Number of parasites for THIS host
  nDiagnostics = length(diagnostics) # Number of diagnostics for THIS host
  tDiagnostics = unique(c(diagnosticsH, diagnosticsA)) # Total diagnostics used
  ntDiagnostics = length(tDiagnostics) # Total number of diagnostics for Human/animal
  tParasites <- unique(c(parasitesH, parasitesA)) # Total parasites in the study
  ntParasites <- length(tParasites) # Total number of parasites in the study
  cls <- c("red", "blue", "orange", "green3", "purple")
  shapes <- c(15, 16, 17, 18) # Filled shapes start at 15
  if(PI == TRUE){
    extra_margin = 13 # Default extra space to add legends.
    if(simple){
      svg(file=paste0("p1_p0_",gsub(" ","_",host),"_Simplified.svg"), width=8, height=6)
    }else{svg(file=paste0("p1_p0_",gsub(" ","_",host),".svg"), width=8, height=6)}
  }else{extra_margin = 1} # Remove the extra space for the legend.
  
  # Offsets the legend layers so the pch characters appear to be in a big grid.
  legendCoords <- lapply(strsplit(paste(1+dx*(1:nDiagnostics), 
                                        rep(dy,nDiagnostics))," "),as.numeric)
  
  # We match the colours of the plotted points by the diagnostic and the shape by
  # the parasite
  par(mar=c(4, 4, 1, extra_margin), mgp=c(2.5,1,0), xpd = T, pty = "s", bg = "transparent")
  plot(NULL, xlim=c(0,10), ylim=c(0,100), ylab = "Diagnostic Sensitivity (p_1) (%)",
       xlab = paste0("1 - Diagnostic Specificity (p_0) (%)"), las = 1)
  title(main = host, adj = 0.5, line = -1, cex = 0.7)
  for(i in seq(2*nDiagnostics, 2*nParasites*nDiagnostics, 2*nDiagnostics)){
    shape = match(parasites[i/(2*nDiagnostics)], tParasites)
    c = 1 # This counts through the colours
    for(j in seq(i - 2*nDiagnostics + 1, i, 2)){
      cl = match(diagnostics[c], tDiagnostics)
      if(simple){
        # Plot all the mean of the sensitivity-prevalence points of the samples
        # followed by the 95% confidence intervals in both axes directions
        p1CI <- bayestestR::ci(t(100*SSD[j,]), ci = 0.95) # CI for p1: sensitivity
        p0CI <- bayestestR::ci(t(100*(1 - SSD[j+1,])), ci = 0.95) # CI for p0: 1 - specificity
        
        # Error bars in the y-axis (p1)
        arrows(x0 = 100*mean(unlist(1 - SSD[j+1,]), na.rm = TRUE),
               y0 = unlist(p1CI[2]),
               x1 = 100*mean(unlist(1 - SSD[j+1,]), na.rm = TRUE),
               y1 = unlist(p1CI[3]), 
               col = ggplot2::alpha(cls[cl], alpha = 0.4), length = 0.05, angle = 90,
               code = 3, lwd = 1.5)
        
        # Error bars in the x-axis (p0)
        arrows(x0 = unlist(p0CI[2]), y0 = 100*mean(unlist(SSD[j,]), na.rm = TRUE),
               x1 = unlist(p0CI[3]), y1 = 100*mean(unlist(SSD[j,]), na.rm = TRUE),
               col = ggplot2::alpha(cls[cl], alpha = 0.4), length = 0.05, angle = 90,
               code = 3, lwd = 1.5)
        
        # Plot the mean of the sensitivity-prevalence data
        points(x = 100*mean(1 - unlist(SSD[j+1,]), na.rm = TRUE),
               y = 100*mean(unlist(SSD[j,]), na.rm = TRUE),
               col = ggplot2::alpha(cls[cl], alpha = 1.0), pch = shapes[shape])
      }
      else{
        # Plot all the sensitivity-prevalence points of the samples
        points(x = 100*(1-SSD[j+1,]), y = 100*SSD[j,], col = cls[cl], pch=shapes[shape])
      }
      c = c + 1
    }
  }
  
  if(PI == TRUE){
    # Build the legend
    for(diagnostic in diagnostics){
      diagnosticNr <- match(diagnostic, diagnostics)
      text(x = 100*(1 + dx*(diagnosticNr - 1)), y = 88 - dy, pos = 4, srt = 45,
           labels = diagnostics[diagnosticNr], cex = 0.8, adj = 0)
      # Print the inner most column and parasite text on the legend
      if(diagnosticNr == nDiagnostics){
        legend("topleft", legend = parasites, pch = shapes, title = "", cex = 1,
               col = alpha(cls[match(diagnostic, tDiagnostics)],1), bty = "n",
               inset = unlist(legendCoords[diagnosticNr]), bg = "transparent") 
      }
      # Create the unlabeled columns 
      else{
        legend("topleft", legend = rep("", nParasites), pch = shapes, title = "",
               col = alpha(cls[match(diagnostic, tDiagnostics)],1), cex = 1,
               bty="n", bg="transparent", inset=unlist(legendCoords[diagnosticNr]))
      }
    }
  }
  p1p0Plot <- recordPlot()
  if(PI == TRUE){dev.off()}
  return(p1p0Plot)
}

# Create the legend for the panel of four plots of real and simulated prevalence
horizLegend <- function(parasites, diagnostics){
  # diagnostics: A character vector of strings of the diagnotics. The following
  #              are used: Kato-Katz, FEACT, Sedimentation, McMaster, and Flotation
  # parasites:   A list of the parasites to plot. These comes from the vectors
  #              parasitesH and parasitesA, which derive from the raw data.
  cls <- ggplot2::alpha(c("red","blue","orange","green3","purple","black"), alpha=1)
  shapes <- c(15, 16, 17, 18) # Filled shapes start at 15
  plot.new()
  par(mar=rep(0.01,4), mai=rep(0.01,4), oma=rep(0.01,4), pty="s", bg="transparent")
  plot(NULL, xlim = c(0,0), ylim = c(0,0), ylab = "", xlab = "", type = "n", axes=F)
  legend("center", legend = diagnostics, col = cls, lty = 1, horiz = TRUE, bty = "n")
  panelLegend <- recordPlot()
  return(panelLegend)
}

# Create the legend for the panel of four plots of sensitivity-prevalence data 
# for the four hosts across all parasites by diagnostic.
gridLegend <- function(parasites, diagnostics){
  # diagnostics: A character vector of strings of the diagnotics. The following
  #              are used: Kato-Katz, FEACT, Sedimentation, McMaster, and Flotation
  # parasites:   A list of the parasites to plot. These comes from the vectors
  #              parasitesH and parasitesA, which derive from the raw data.
  dx = 0.06 # The increments as to which legend boxes are translated by in x
  dy = 0.10 # The increments as to which legend boxes are translated by in y
  nDiagnostics = length(diagnostics)
  nParasites = length(parasites)
  legendCoords <- lapply(strsplit(paste(dx*(1:nDiagnostics), rep(dy,nDiagnostics))," "), as.numeric)
  cls <- ggplot2::alpha(c("red","blue","orange","green3","purple","black"),alpha=1)
  shapes <- c(15, 16, 17, 18) # Filled shapes start at 15
  
  plot.new()
  par(mar=rep(0.01,4), mai=rep(0.01,4), oma=rep(0.01,4), pty="s", bg="transparent")
  plot(NULL, xlim = c(0,0), ylim = c(0,0), ylab = "", xlab = "", type = "n", axes=F)
  for(diagnostic in diagnostics){
    diagnosticNr <- match(diagnostic, diagnostics)
    text(x=2*dx*(diagnosticNr-1)-0.57, y=0.20-dy, labels = diagnostics[diagnosticNr],
         pos = 4, srt = 40, cex = 1, adj = 0)
    # Print the inner most column and parasite text on the legend
    if(diagnosticNr == nDiagnostics){
      legend("right", legend = parasites, col = cls[diagnosticNr], cex = 1,
             bg = "white", pch = shapes, title = "Legend", bty = "n",
             text.width = 0.4, inset = unlist(legendCoords[diagnosticNr])) 
    }
    # Create the unlabelled columns. We plot them on the left and shift them over
    # otherwise in the big panel plot the legend overlaps with the 4 plots a bit.
    else{
      legend("left", legend = rep("", nParasites), col = cls[diagnosticNr], 
             cex = 1, bty = "n", bg = "white", pch = shapes, title = "", 
             inset = c(0.05 + 2*dx, 0) + unlist(legendCoords[diagnosticNr]))
    }
  }
  panelLegend <- recordPlot()
  return(panelLegend)
}


# ---- JAGS Models For Humans and Animals ----
modelHuman <- "
model{
  # Priors
  P ~ dbeta(1, 1) # Probability of infection
  over_dispersion_KK ~ dgamma(0.001, 0.001)
  over_dispersion_FEACT ~ dgamma(0.001, 0.001)
  
  # Run for all N Human hosts
  for(i in 1:N){
    # Define lambda - infection intensity
    lambda[i, 1] <- 0
    lambda[i, 2] ~ dgamma(sh, rt)
    status[i] ~ dbern(P) # 1 = Infected, 0 = Undetectable Level of Infection
    
    # Iterate through R repeated egg counts for Kato-Katz and FEACT
    for(j in 1:R){
      KK[i, j] ~ dnegbin(over_dispersion_KK/(lambda[i, status[i] + 1] + over_dispersion_KK), over_dispersion_KK)
      FEACT[i, j] ~ dnegbin(over_dispersion_FEACT/((100/24)*lambda[i, status[i] + 1] + over_dispersion_FEACT), over_dispersion_FEACT)
    }
  }
  #inits# status, over_dispersion_KK, over_dispersion_FEACT
  #data# N, R, KK, FEACT, sh, rt
  #monitor# status, P, over_dispersion_KK, over_dispersion_FEACT
}"

modelAnimal <- "
model{
  # Priors
  P ~ dbeta(1, 1)
  s[1] ~ dbeta(1.5, 3)
  s[2] ~ dbeta(4, 1.5)
  f[1] ~ dbeta(1.5, 3)
  f[2] ~ dbeta(4, 1.5)
  over_dispersion_MM ~ dgamma(0.001, 0.001)

  # Run for all hosts
  for(i in 1:N){
    # Define lambda
    lambda[i, 1] <- 0
    lambda[i, 2] ~ dgamma(sh, rt)
    status[i] ~ dbern(P) # 1 = Infected, 0 = Undetectable Level of Infection

    Sed[i] ~ dbern(S[i])
    S[i] <- s[status[i] + 1]

    Flot[i] ~ dbern(F[i])
    F[i] <- f[status[i] + 1]

    # Iterate through 1 egg count for McMaster
    MM[i] ~ dnegbin(over_dispersion_MM/(lambda[i, status[i] + 1] + over_dispersion_MM), over_dispersion_MM)
  }
  #inits# status, over_dispersion_MM
  #data# N, Sed, Flot, MM, sh, rt
  #monitor# status, P, over_dispersion_MM, s, f
}"

# ---- Define Relevant Environment Variables ----
# Extract the parasite names from the column names of the dataframe. This goes
# through and extracts the names of each parasite after breaking the colnames at
# each underscore, "_", and we find the unique names to avoid double counting.
diagnosticsH <- unique(sapply(strsplit(colnames(Human_Data),"_"),"[",1))
parasitesH <- unique(sapply(strsplit(colnames(Human_Data),"_"),"[",2))

animals <- c("Cats and Dogs", "Pigs", "Water Buffalo and Cattle")
diagnosticsA <- unique(sapply(strsplit(colnames(Animal_Data[,-1]),"_"),"[",1))
parasitesA <- unique(sapply(strsplit(colnames(Animal_Data[,-1]),"_"),"[",2))

# Number of Diagnostics and Parasites
nDiagH = length(diagnosticsH)
nDiagA = length(diagnosticsA)
nParaH = length(parasitesH)
nParaA = length(parasitesA)

# Take samples of the last half simulations (These are rows of samplesHuman and
# samplesAnimal later). We will keep the samples consistent across all hosts.
nSamples = 10000 # Number of samples to run in JAGS model
nChains  =     8 # Number of chains to run in the JAGS model
burn_in  =  4000 # The number of burn-in iterations

# Create the directory to place the supplementary plots by host and by disease
dir.create(path = paste("Supplementary_Figures/"), recursive = TRUE)

# ---- Run The Model for Humans and All Defined Parasites ----
write.csv(do.call(rbind, lapply(parasitesH, cmpfun(function(parasite){
  cat("===========================================================\n")
  cat(paste0("Running analysis for ",parasite," in Humans.\n"))
  
  # Define variables from data:
  R  = 2 # Number of egg count repeats for Human dataset
  N  = nrow(Human_Data) # Number of available hosts
  over_dispersion_KK = 1
  over_dispersion_FEACT = 1
  status <- rep(1, N) # 1 = Infected, 0 = Undetectable Level of Infection

  # Define the 1st, 2nd, and 3rd quantile data (refQ) of raw egg counts to fit to
  # a gamma distribution to generate a weakly-informative prior for the model.
  # If no data is provided, then assume a shape and rate of 0.001. Use the line
  # below instead if you have a vector of raw egg counts.
  # sh_rt_fit <- fitdistrplus::fitdist(<Raw Egg Data>, distr = "gamma", method = "mme")$estimate
  refQ <- NULL
  if(parasite == "Ascaris"){refQ <- c(0, 30.49, 0)} # Unpublished work
  else if(parasite == "Trichuris"){refQ <- c(0, 4.417, 1.250)} # Same as Cats and Dogs below
  else if(parasite == "Hookworm"){refQ <- c(1.75, 3.5, 5.25)}  # Same as Cats and Dogs below
  else if(parasite == "Schistosoma"){sh <- 0.5775932; rt <- 0.07099037} # Unpublished work
  else{cat(paste0("Error. ",parasite," in Humans has no fittable data.")); return(NULL)}
  
  # Fit given quantiles, if any, to a gamma distribution
  if(!is.null(refQ)){
    fit <- optim(c(.5,.1), function(x){
      sum(abs(refQ - qgamma(c(0.25, 0.5, 0.75), shape = x[1], rate = x[2]))^2)
    })
    sh <- fit$par[1]
    rt <- fit$par[2]
  }
  
  # Extract the needed columns in our data frames
  KK    <- as.matrix(Human_Data[,paste0("Kato-Katz_",parasite,"_",1:R)])
  FEACT <- as.matrix(Human_Data[,paste0("FEACT_",parasite,"_",1:R)])
  
  # Run Model For Humans
  resultsHuman <- run.jags(model=modelHuman, n.chains=nChains, sample=nSamples, burnin=burn_in)
  samplesHuman <- combine.mcmc(resultsHuman$mcmc, collapse.chains = T)
  
  # Determine probabilities of infection (take means of columns of the samples):
  probInf <- colMeans(samplesHuman[,paste0("status[",1:N,"]")])
  
  # Find the sensitivity and specificity across samples. Recall confusion matrix.
  # TP: True Positive, FN: False Negative, FP: False Positive, TN: True Negative.
  cat("Finding sensitivity and specificity of the diagnostics.\n")
  KK_TP = c((samplesHuman[,paste0("status[",1:N,"]")] == 1) %*% (rowMeans(KK)  > 0))
  KK_FN = c((samplesHuman[,paste0("status[",1:N,"]")] == 1) %*% (rowMeans(KK) == 0))
  KK_FP = c((samplesHuman[,paste0("status[",1:N,"]")] == 0) %*% (rowMeans(KK)  > 0))
  KK_TN = c((samplesHuman[,paste0("status[",1:N,"]")] == 0) %*% (rowMeans(KK) == 0))
  
  FEACT_TP = c((samplesHuman[,paste0("status[",1:N,"]")] == 1) %*% (rowMeans(FEACT)  > 0))
  FEACT_FN = c((samplesHuman[,paste0("status[",1:N,"]")] == 1) %*% (rowMeans(FEACT) == 0))
  FEACT_FP = c((samplesHuman[,paste0("status[",1:N,"]")] == 0) %*% (rowMeans(FEACT)  > 0))
  FEACT_TN = c((samplesHuman[,paste0("status[",1:N,"]")] == 0) %*% (rowMeans(FEACT) == 0))
  
  # Calculate sensitivity and specificity for each diagnostic
  KK_Sens   = KK_TP / (KK_TP + KK_FN)
  KK_Spec   = KK_TN / (KK_FP + KK_TN)
  FEACT_Sens = FEACT_TP / (FEACT_TP + FEACT_FN)
  FEACT_Spec = FEACT_TN / (FEACT_FP + FEACT_TN)
  
  # Create the confusion matrix of just sensitivity and specificity for all samples
  cMat <- as.matrix(rbind(KK_Sens, KK_Spec, FEACT_Sens, FEACT_Spec, samplesHuman[,"P"]))
  rownames(cMat) <- c(paste0("Kato-Katz_Sensitivity_",parasite,"_Humans"),
                      paste0("Kato-Katz_Specificity_",parasite,"_Humans"),
                      paste0("FEACT_Sensitivity_",parasite,"_Humans"),
                      paste0("FEACT_Specificity_",parasite,"_Humans"),
                      paste0("Prevalence_",parasite,"_Humans"))
  
  cat("Producing figure... ")
  plot.new()
  svg(file = paste0("Supplementary_Figures/",parasite,"_Humans.svg"), width = 12, height = 9)
  layout(mat = matrix(c(1, 2, 3, 3), ncol = 2, nrow = 2, byrow = TRUE), heights = c(1, 1))
  par(mar = c(4, 4, 1, 1)) # Histogram for the estimated probabilities of infection
  hist(100*probInf, seq(0,100,length.out=101), col="black", xlim=c(0,100), las=1, main="",
       xlab = paste0("Estimated Probability of Infection of ",parasite," in Humans (%)"))
  
  par(mar = c(4, 4, 1, 1)) # Density plot for Prevalence across all the samples
  hist(100*samplesHuman[,"P"], main="", xlim=c(0, 100), seq(0,100,length.out=101),
       xlab = paste0("Prevalence of ",parasite," in Humans (%)"), col="black", las=1)
  
  # Plotting frequency curves of Sensitivity and Specificity across diagnostics
  # Find the maximum frequency in all the density plots so we can plot together.
  yL <- max(unlist(lapply(1:(nrow(cMat)-1), function(i){max(hist(cMat[i,], plot = FALSE)$counts)})))
  break_intervals <- seq(from = 0, to = 100, length.out = 100)
  par(mar = c(4, 4, 1, 12), xpd = TRUE, las = 1)
  plot(NULL, main = "", xlim = c(0, 100), ylim = c(0, yL), ylab = "Frequency",
       xlab = paste0("Diagnostic Sensitivity and Specificity for ",parasite," in Humans (%)"))
  KK_Sens_Frequency <- hist(100*cMat[1,], breaks = break_intervals, plot=F)$counts
  FEACT_Sens_Frequency <- hist(100*cMat[3,], breaks = break_intervals, plot=F)$counts
  KK_Spec_Frequency <- hist(100*cMat[2,], breaks = break_intervals, plot=F)$counts
  FEACT_Spec_Frequency <- hist(100*cMat[4,], breaks = break_intervals, plot=F)$counts 
  lines(x = break_intervals[-1], y = KK_Sens_Frequency, col = alpha("red", 0.5), lty = 1)
  lines(x = break_intervals[-1], y = FEACT_Sens_Frequency, col = alpha("blue", 0.5), lty = 1)
  lines(x = break_intervals[-1], y = KK_Spec_Frequency, col = alpha("red", 0.5), lty = 2)
  lines(x = break_intervals[-1], y = FEACT_Spec_Frequency, col = alpha("blue", 0.5), lty = 2)
  legend("topright", legend = c(paste(diagnosticsH, "Sensitivity"),
                                paste(diagnosticsH, "Specificity")),
         col = rep(c("red", "blue"), 2), lty = sort(rep(1:2, nDiagH)),
         inset = c(-0.2, 0), title = "Legend", bty = "n")
  dev.off()
  cat("Done.\n")
  
  cat(paste0("Analysis for ",parasite," in Humans finished.\n"))
  cat("===========================================================\n\n")
  return(cMat)
}))), file = "Analysis_Human.csv")


# ---- Run The Model For All Animal Host Groups and All Parasites ----
for(animal in animals){
  write.csv(do.call(rbind, lapply(parasitesA, cmpfun(function(parasite){
    cat("===========================================================\n")
    cat(paste0("Running analysis for ",parasite," in ",animal,".\n"))
    
    # Find the data corresponding to the correct animal host groups in the data.
    # Define the 1st, 2nd, and 3rd quantile data (refQ) of raw egg counts to fit
    # a gamma distribution to generate a weakly-informative prior for the model.
    # If no data is provided, then assume a shape and rate of 0.001.
    refQ <- NULL
    if(animal == "Cats and Dogs"){
      animalCodes <- c("Cats", "Dogs")
      if(parasite == "Ascaris"){refQ <- c(0, 30.49, 0)} # Same as Humans
      else if(parasite == "Trichuris"){refQ <- c(0, 4.417, 1.250)} # Unpublished work
      else if(parasite == "Hookworm"){refQ <- c(1.75, 3.5, 5.25)}  # Unpublished work
      else if(parasite == "Schistosoma"){sh <- 0.5775932; rt <- 0.07099037} # Unpublished work
      else{cat(paste0("Error. ",p," in ",animal," has no fittable data.")); return(NULL)}
    }
    else if(animal == "Pigs"){
      animalCodes <- c("Pigs")
      if(parasite == "Ascaris"){refQ <- c(0, 0.5, 1)} # Unpublished work
      else if(parasite == "Trichuris"){refQ <- c(0, 0.1351, 0)} # Same as Water Buffalo and Cattle
      else if(parasite == "Hookworm"){refQ <- c(3.75, 3.5, 5.25)} # Unpublished work
      else if(parasite == "Schistosoma"){sh <- 0.5775932; rt <- 0.07099037} # Unpublished work
      else{cat(paste0("Error. ",p," in ",animal," has no fittable data.")); return(NULL)}
    }
    else if(animal == "Water Buffalo and Cattle"){
      animalCodes <- c("Water Buffalo", "Cattle")
      if(parasite == "Ascaris"){refQ <- c(0, 0.1892, 0)} # Unpublished work
      else if(parasite == "Trichuris"){refQ <- c(0, 0.1351, 0)} # Unpublished work
      else if(parasite == "Hookworm"){refQ <- c(0, 1.216, 0)} # Unpublished work
      else if(parasite == "Schistosoma"){sh <- 0.5775932; rt <- 0.07099037} # Unpublished work
      else{
        cat(paste0("Error. ",parasite," in ",animal," has no fittable data."))
        return(NULL)
      }
    }
    else{cat("Error. Cannot find animal data."); return(NULL)}
    
    # Fit given quantiles, if any, to a gamma distribution
    if(!is.null(refQ)){
      fit <- optim(c(.5,.1), function(x){
        sum(abs(refQ - qgamma(c(0.25, 0.5, 0.75), shape = x[1], rate = x[2]))^2)
      })
      sh <- fit$par[1]
      rt <- fit$par[2]
    }
    
    # Find rows corresponding to the animals we want to run the simulation for.
    aR <- which(Animal_Data$`Animal Species` %in% animalCodes)
    
    # Define variables from data:
    N = nrow(Animal_Data[aR,]) # Number of available hosts
    status <- rep(1, N) # 1 = Infected, 0 = Undetectable Level of Infection
    over_dispersion_MM <- 100000

    # Extract the needed columns in our data frames
    Sed <- c(as.matrix(Animal_Data[aR,paste0("Sedimentation_",parasite)]))
    Flot <- c(as.matrix(Animal_Data[aR,paste0("Flotation_",parasite)]))
    MM <- c(as.matrix(Animal_Data[aR,paste0("McMaster_",parasite)])) # 1 egg count
    
    # Run model for the given animal
    resultsAnimal <- run.jags(model=modelAnimal, n.chains=nChains, sample=nSamples, burnin=burn_in)
    samplesAnimal <- combine.mcmc(resultsAnimal$mcmc, collapse.chains = T)
    
    # Determine probabilities of infection (take means column-wise of the samples of status):
    probInf <- colMeans(samplesAnimal[,paste0("status[",1:N,"]")])
    
    # Find the sensitivity and specificity across samples. Recall confusion matrix.
    cat("Finding sensitivity and specificity of the diagnostics.\n")
    Sed_TP = c((samplesAnimal[,paste0("status[",1:N,"]")] == 1) %*% (Sed == 1))
    Sed_FN = c((samplesAnimal[,paste0("status[",1:N,"]")] == 1) %*% (Sed == 0))
    Sed_FP = c((samplesAnimal[,paste0("status[",1:N,"]")] == 0) %*% (Sed == 1))
    Sed_TN = c((samplesAnimal[,paste0("status[",1:N,"]")] == 0) %*% (Sed == 0))
    
    Flot_TP = c((samplesAnimal[,paste0("status[",1:N,"]")] == 1) %*% (Flot == 1))
    Flot_FN = c((samplesAnimal[,paste0("status[",1:N,"]")] == 1) %*% (Flot == 0))
    Flot_FP = c((samplesAnimal[,paste0("status[",1:N,"]")] == 0) %*% (Flot == 1))
    Flot_TN = c((samplesAnimal[,paste0("status[",1:N,"]")] == 0) %*% (Flot == 0))
    
    MM_TP = c((samplesAnimal[,paste0("status[",1:N,"]")] == 1) %*% (MM == 1))
    MM_FN = c((samplesAnimal[,paste0("status[",1:N,"]")] == 1) %*% (MM == 0))
    MM_FP = c((samplesAnimal[,paste0("status[",1:N,"]")] == 0) %*% (MM == 1))
    MM_TN = c((samplesAnimal[,paste0("status[",1:N,"]")] == 0) %*% (MM == 0))
    
    # Calculate sensitivity and specificity for each diagnostic
    Sed_Sens  = Sed_TP / (Sed_TP + Sed_FN)
    Sed_Spec  = Sed_TN / (Sed_FP + Sed_TN)
    Flot_Sens = Flot_TP / (Flot_TP + Flot_FN)
    Flot_Spec = Flot_TN / (Flot_FP + Flot_TN)
    MM_Sens   = MM_TP / (MM_TP + MM_FN)
    MM_Spec   = MM_TN / (MM_FP + MM_TN)
    
    # Create the confusion matrix of just sensitivity and specificity for all samples
    cMat <- as.matrix(rbind(Sed_Sens, Sed_Spec, Flot_Sens, Flot_Spec, MM_Sens, MM_Spec, samplesAnimal[,"P"]))
    rownames(cMat) <- c(paste0("Sedimentation_Sensitivity_",parasite,"_",gsub(" ","_",animal)),
                        paste0("Sedimentation_Specificity_",parasite,"_",gsub(" ","_",animal)),
                        paste0("Flotation_Sensitivity_",parasite,"_",gsub(" ","_",animal)),
                        paste0("Flotation_Specificity_",parasite,"_",gsub(" ","_",animal)),
                        paste0("McMaster_Sensitivity_",parasite,"_",gsub(" ","_",animal)),
                        paste0("McMaster_Specificity_",parasite,"_",gsub(" ","_",animal)),
                        paste0("Prevalence_",parasite,"_",gsub(" ","_",animal)))
    
    cat("Producing figure... ")
    plot.new()
    svg(file = paste0("Supplementary_Figures/",parasite,"_",gsub(" ","_",animal),".svg"), width = 12, height = 9)
    layout(mat = matrix(c(1, 2, 3, 3), ncol = 2, nrow = 2, byrow = TRUE), heights = c(1, 1))
    
    par(mar = c(4, 4, 1, 1)) # Histogram for the estimated probabilities of infection
    hist(100*probInf, seq(0,100,length.out=101), col="black", xlim=c(0,100), las=1, main="",
         xlab = paste0("Estimated Probability of Infection of ",parasite," in ",animal," (%)"))
    
    par(mar = c(4, 4, 1, 1)) # Density plot for Prevalence across all the samples
    hist(100*samplesAnimal[,"P"], main="", xlim=c(0, 100), seq(0,100,length.out=101),
         xlab = paste0("Prevalence of ",parasite," in ",animal," (%)"), col="black", las=1)
    
    # Plotting density curves of Sensitivity and Specificity across diagnostics
    # Find the maximum frequency in all the density plots so we can plot them together.
    break_intervals <- seq(from = 0, to = 100, length.out = 100)
    yL <- max(unlist(lapply(1:(nrow(cMat)-1), function(i){max(hist(cMat[i,], plot = FALSE)$counts)})))
    par(mar = c(4, 4, 1, 12), xpd = TRUE, las = 1)
    plot(NULL,  main = "", xlim = c(0,100), ylim = c(0, yL), ylab = "Frequency",
         xlab = paste("Diagnostic Sensitivity and Specificity for",parasite,"in",animal,"(%)"))
    Sed_Sens_Frequency <- hist(100*cMat[1,], breaks = break_intervals, plot = F)$counts
    Flot_Sens_Frequency <- hist(100*cMat[3,], breaks = break_intervals, plot = F)$counts
    MM_Sens_Frequency <- hist(100*cMat[5,], breaks = break_intervals, plot = F)$counts
    Sed_Spec_Frequency <- hist(100*cMat[2,], breaks = break_intervals, plot = F)$counts
    Flot_Spec_Frequency <- hist(100*cMat[4,], breaks = break_intervals, plot = F)$counts
    MM_Spec_Frequency <- hist(100*cMat[6,], breaks = break_intervals, plot = F)$counts
    lines(x = break_intervals[-1], y = Sed_Sens_Frequency, col = alpha("orange", 0.5), lty = 1)
    lines(x = break_intervals[-1], y = Flot_Sens_Frequency, col = alpha("green3", 0.5), lty = 1)
    lines(x = break_intervals[-1], y = MM_Sens_Frequency, col = alpha("purple", 0.5), lty = 1)
    lines(x = break_intervals[-1], y = Sed_Spec_Frequency, col = alpha("orange", 0.5), lty = 2)
    lines(x = break_intervals[-1], y = Flot_Spec_Frequency, col = alpha("green3", 0.5), lty = 2)
    lines(x = break_intervals[-1], y = MM_Spec_Frequency, col = alpha("purple", 0.5), lty = 2)
    legend("topright", legend = c(paste(diagnosticsA, "Sensitivity"),
                                        paste(diagnosticsA, "Specificity")),
           col=c("orange","green3","purple"), inset = c(-0.22, 0), bty = "n",
           title = "Legend", lty = sort(rep(1:2, nDiagA)))
    dev.off()
    cat("Done.\n")
    
    cat(paste0("Analysis for ",parasite," in ",animal," finished.\n"))
    cat("===========================================================\n\n")
    return(cMat)
  }))), file = paste0("Analysis_",gsub(" ","_",animal),".csv"))
}


# ---- Load Sensitivity-Prevalence Data From Saved .csv File ----
analysisH   <- read.csv(file = "Analysis_Human.csv", row.names = 1, header = TRUE)
analysisCD  <- read.csv(file = "Analysis_Cats_and_Dogs.csv", row.names = 1, header = TRUE)
analysisP   <- read.csv(file = "Analysis_Pigs.csv", row.names = 1, header = TRUE)
analysisWBC <- read.csv(file = "Analysis_Water_Buffalo_and_Cattle.csv", row.names = 1, header = TRUE)


# ---- Real-world Data Analysis ----
# Create a table of the real-world prevalences (Table 1 in the paper). Does not
# differentiate by diagnostic. Hosts positive for "Any STH" are those that test
# positive in any of Ascaris, Trichuris or Hookworm.
RealPrevTable <- matrix(0, nrow = 1 + length(animals), ncol = length(unique(parasitesH, parasitesA)) + 1)
rownames(RealPrevTable) <-c("Humans", animals)
colnames(RealPrevTable) <- c(unique(parasitesH, parasitesA), "Any STH")
RealFreqTable <- RealPrevTable

# Calculate the prevalence for Humans
for(p in c(parasitesH, "Any STH")){
  if(p == "Any STH"){
    RealFreqTable[1,p] <- sum(rowSums(as.data.frame(lapply(parasitesH[1:3], function(p){
      return((rowSums(Human_Data[,grepl(p, colnames(Human_Data))]) > 0))
    }))) > 0)
  }
  else{
    RealFreqTable[1,p] <- sum((rowSums(Human_Data[,grepl(p, colnames(Human_Data))]) > 0))
  }
  # Divide the frequency by the total number of humans to get the prevalence
  RealPrevTable[1, p] <- RealFreqTable[1, p]/nrow(Human_Data)
}

# Calculate the prevalence for Animals
for(animal in animals){
  j = grep(animal, rownames(RealPrevTable)) # Which row to insert the data in RealPrevData
  if(animal == "Cats and Dogs"){animalCodes <- c("Cats", "Dogs")}
  else if(animal == "Pigs"){animalCodes <- c("Pigs")}
  else if(animal == "Water Buffalo and Cattle"){animalCodes <- c("Water Buffalo", "Cattle")}
  else{cat("Error. Cannot find animal data."); return(NA)}
  a <- which(Animal_Data$`Animal Species` %in% animalCodes)
  
  for(p in c(parasitesA, "Any STH")){
    if(p == "Any STH"){
      RealFreqTable[j,p] <- sum(rowSums(as.data.frame(lapply(parasitesA[1:3], function(p){
        return((rowSums(Animal_Data[a,grepl(p, colnames(Animal_Data))]) > 0))
      }))) > 0)
    }
    else{
      RealFreqTable[j,p] <- sum((rowSums(Animal_Data[a,grepl(p, colnames(Animal_Data))]) > 0))
    }
    # Divide the frequency by the total number of animals to get the prevalence
    RealPrevTable[j, p] <- RealFreqTable[j, p]/length(a)
  }
}

# Save the prevalences and frequencies
write.csv(RealPrevTable, file = "Prevalence_Real_Table.csv", row.names = TRUE)
write.csv(RealFreqTable, file = "Frequency_Real_Table.csv", row.names = TRUE)

# Real-world prevalence data for each host. Now differentiated by diagnostic.
HPrev <- unlist(lapply(diagnosticsH, function(d){
  return(unlist(lapply(parasitesH, function(p){
    return(sum(rowSums(Human_Data[,grepl(paste(d,p,sep="_"), colnames(Human_Data))]) > 0))
  })))
}))/nrow(Human_Data)

CDPrev <- unlist(lapply(diagnosticsA, function(d){
  return(unlist(lapply(parasitesA, function(p){
    a <- which(Animal_Data$`Animal Species` %in% c("Cats", "Dogs"))
    La <- length(a) # Number of cats and dogs
    return(sum(Animal_Data[a,grepl(paste(d,p,sep="_"), colnames(Animal_Data))] > 0)/La)
  })))
}))

PigPrev <- unlist(lapply(diagnosticsA, function(d){
  return(unlist(lapply(parasitesA, function(p){
    a <- which(Animal_Data$`Animal Species` %in% c("Pigs"))
    La <- length(a) # Number of pigs
    return(sum(Animal_Data[a,grepl(paste(d,p,sep="_"), colnames(Animal_Data))] > 0)/La)
  })))
}))

WBCPrev <- unlist(lapply(diagnosticsA, function(d){
  return(unlist(lapply(parasitesA, function(p){
    a <- which(Animal_Data$`Animal Species` %in% c("Water Buffalo", "Cattle"))
    La <- length(a) # Number of water buffalo and cattle
    return(sum(Animal_Data[a,grepl(paste(d,p,sep="_"), colnames(Animal_Data))] > 0)/La)
  })))
}))

# Simulated prevalence data For each host differentiated by diagnostic
HPrevSim <- analysisH[seq(2*nDiagH + 1, nrow(analysisH), by = 2*nDiagH + 1),]
CDPrevSim <- analysisCD[seq(2*nDiagA + 1, nrow(analysisCD), by = 2*nDiagA + 1),]
PigPrevSim <- analysisP[seq(2*nDiagA + 1, nrow(analysisP), by = 2*nDiagA + 1),]
WBCPrevSim <- analysisWBC[seq(2*nDiagA + 1, nrow(analysisWBC), by = 2*nDiagA + 1),]

write.csv( # Save the quantile data to a .csv file:
  t(sapply(as.data.frame(t(rbind(HPrevSim, CDPrevSim, PigPrevSim, WBCPrevSim))),
  FUN = bayestestR::ci, ci = 0.95)),  file = "Quantile_Data_All_Hosts.csv"
)


# ---- Plotting Main Model Results ----
H <- plotPrev(host = "Humans", prevSim = HPrevSim, prevReal = HPrev,
              diagnostics = diagnosticsH, parasites = parasitesH, PI = FALSE)

CD <- plotPrev(host = "Cats and Dogs", prevSim = CDPrevSim, prevReal = CDPrev,
               diagnostics = diagnosticsA, parasites = parasitesA, PI = FALSE)

P <- plotPrev(host = "Pigs", prevSim = PigPrevSim, prevReal = PigPrev,
              diagnostics = diagnosticsA, parasites = parasitesA, PI = FALSE)

WBC <- plotPrev(host = "Water Buffalo and Cattle", prevSim = WBCPrevSim,
                prevReal = WBCPrev, diagnostics = diagnosticsA,
                parasites = parasitesA, PI = FALSE)

L <- horizLegend(parasites = unique(c(parasitesH, parasitesA)),
                diagnostics = unique(c(diagnosticsH, diagnosticsA, "Model")))

grid <- cowplot::plot_grid(H, CD, P, WBC, byrow=TRUE, nrow=2, ncol=2, align="hv")

plot.new() # Combined plots in a 2x2 grid with a legend on the right.
svg(file = "Prevalence_Real_and_Sim_All_Hosts.svg", width = 9, height = 9)
cowplot::plot_grid(grid, L, nrow=2, ncol=1, rel_heights = c(1,0.02), scale = c(1,47))
dev.off()


H  <- plotSP(host = "Humans", parasites = parasitesH, SPD = analysisH, PI = FALSE,
             simple = FALSE, diagnostics = diagnosticsH)

CD <- plotSP(host = "Cats and Dogs", parasites = parasitesA, SPD = analysisCD, 
             PI = FALSE, simple = FALSE, diagnostics = diagnosticsA)

P  <- plotSP(host = "Pigs", parasites = parasitesA, SPD = analysisP, PI = FALSE,
             simple = FALSE, diagnostics = diagnosticsA)

WBC <- plotSP(host = "Water Buffalo and Cattle", parasites = parasitesA,
              SPD = analysisWBC, PI = F, simple = F, diagnostics = diagnosticsA)

L  <- gridLegend(parasites = unique(c(parasitesH, parasitesA)),
                 diagnostics = unique(c(diagnosticsH, diagnosticsA)))

plot.new() # Combined plots in a 2x2 grid with a legend on the right.
svg(file = "Sensitivity_Prevalence_All_Hosts.svg", width = 14, height = 9)
cowplot::plot_grid(H, CD, L, P, WBC, NULL, byrow = TRUE, nrow = 2, ncol = 3,
                   scale = c(1, 1, 1.7, 1, 1, 1), align = "hv")
dev.off()


H  <- plotSP(host = "Humans", parasites = parasitesH, SPD = analysisH, PI = FALSE,
             simple = TRUE, diagnostics = diagnosticsH)

CD <- plotSP(host = "Cats and Dogs", parasites = parasitesA, SPD = analysisCD, 
             PI = FALSE, simple = TRUE, diagnostics = diagnosticsA)

P  <- plotSP(host = "Pigs", parasites = parasitesA, SPD = analysisP, PI = FALSE,
             simple = TRUE, diagnostics = diagnosticsA)

WBC <- plotSP(host = "Water Buffalo and Cattle", parasites = parasitesA,
              SPD = analysisWBC, PI = F, simple = T, diagnostics = diagnosticsA)

L  <- horizLegend(parasites = unique(c(parasitesH, parasitesA)),
                  diagnostics = unique(c(diagnosticsH, diagnosticsA)))

grid <- cowplot::plot_grid(H, CD, P, WBC, byrow=T, nrow=2, ncol=2, align = "hv")

plot.new() # Combined plots in a 2x2 grid with a legend on the bottom
svg(file = "Sensitivity_Prevalence_All_Hosts_Simplified.svg", width = 9, height = 9)
cowplot::plot_grid(grid, L, nrow=2, ncol=1, rel_heights = c(1,0.02), scale = c(1,47))
dev.off()


H  <- plotp1p0(host = "Humans", parasites = parasitesH, SSD = analysisH, PI = FALSE,
               simple = TRUE, diagnostics = diagnosticsH)

CD <- plotp1p0(host = "Cats and Dogs", parasites = parasitesA, SSD = analysisCD, 
               PI = FALSE, simple = TRUE, diagnostics = diagnosticsA)

P  <- plotp1p0(host = "Pigs", parasites = parasitesA, SSD = analysisP, PI = FALSE,
               simple = TRUE, diagnostics = diagnosticsA)

WBC <- plotp1p0(host = "Water Buffalo and Cattle", parasites = parasitesA,
                SSD = analysisWBC, PI = F, simple = T, diagnostics = diagnosticsA)

L  <- horizLegend(parasites = unique(c(parasitesH, parasitesA)),
                  diagnostics = unique(c(diagnosticsH, diagnosticsA)))

grid <- cowplot::plot_grid(H, CD, P, WBC, byrow=T, nrow=2, ncol=2, align = "hv")

plot.new() # Combined plots in a 2x2 grid with a legend on the bottom
svg(file = "p1_p0_All_Hosts_Simplified.svg", width = 9, height = 9)
cowplot::plot_grid(grid, L, nrow=2, ncol=1, rel_heights = c(1,0.02), scale = c(1,47))
dev.off()


# ============================================================================ #
# Latent-Class Analysis -  Philippines Diagnostics: Code End                   |
# ============================================================================ #