Package: straf
-Title: STR Analysis For Forensics
-Version: 1.5.0
+Title: STR Analysis for Forensics
+Version: 2.0.0
person(given = "Alexandre",
family = "Gouy",
role = c("aut", "cre"),
- email = "alexandre.gouy@protonmail.com")
-Description: straf is a Shiny application to perform STR / microsatellite data analysis.
-License: `use_gpl3_license()`
+ email = "alexandre.gouy@protonmail.com",
+ comment = c(ORCID = "0000-0002-2478-8475"))
+Description: A 'shiny' application to perform Short Tandem Repeats (STRs, also
+ known as microsatellites) data analysis. The application allows one to
+ compute forensic parameters, population genetics indices, and investigate
+ population structure through various methods and generate relevant data
+ visualisations. It also implements file conversion to other popular formats.
+Depends: R (>= 3.5.0)
+License: GPL-3
Encoding: UTF-8
-LazyData: true
+BugReports: https://github.com/agouy/straf/issues
Roxygen: list(markdown = TRUE)
shiny (>= 1.5.0),
+ shinyWidgets,
- shinyWidgets,
- ade4,
- adegenet,
- pegas,
- hierfstat,
- car,
- ggrepel
+ ggrepel,
+ car,
+ ade4,
+ adegenet,
+ pegas,
+ hierfstat
# Generated by roxygen2: do not edit by hand
+import(shiny, except = c(dataTableOutput, renderDataTable))
diff --git a/R/doc_tabs.R b/R/doc_tabs.R
index 9d5ae16..a8cc09b 100644
--- a/R/doc_tabs.R
+++ b/R/doc_tabs.R
@@ -1,4 +1,3 @@
-### documentation tab
documentation_tab <- function() {
@@ -10,7 +9,6 @@ documentation_tab <- function() {
about_tab <- function() {
"About STRAF",
diff --git a/R/helpers.R b/R/helpers.R
index a315415..ea67608 100644
--- a/R/helpers.R
+++ b/R/helpers.R
@@ -1,7 +1,8 @@
### straf helpers
# EXTERNAL FUNCTIONS -----------------------------------------------------------
### convert input to genind object
+#' @importFrom adegenet as.genind df2genind pop<- locNames
+#' @importClassesFrom adegenet genind
createGenind <- function(Ifile, Imicrovariants, Incode, Iploidy) {
if(Imicrovariants == 2) {
@@ -11,7 +12,7 @@ createGenind <- function(Ifile, Imicrovariants, Incode, Iploidy) {
mat <- matrix(unlist(mat), nrow = length(mat), ncol = length(mat[[1]]),
byrow = TRUE)
- mat[mat == "0"] <- NA ###
+ mat[mat == "0"] <- NA
colnames(mat) <- mat[1,]
rownames(mat) <- mat[,1]
@@ -19,26 +20,26 @@ createGenind <- function(Ifile, Imicrovariants, Incode, Iploidy) {
loci <- unique(colnames(mat[,-1:-2]))
freqTAB <- NULL
mat2 <- mat
- mat2 <- sub("[.]","-",mat2)
+ mat2 <- sub("[.]", "-", mat2)
for(i in 1:length(loci)){
ids <- which(colnames(mat)==loci[i])
alleles <- unique(c(mat[,ids]))
alleles <- sub("[.]","-",alleles)
- alleles <- alleles[!is.na(alleles)] ###
+ alleles <- alleles[!is.na(alleles)]
nameCol <- paste(loci[i],".",alleles,sep = "")
- newmat <- matrix(NA,ncol = length(nameCol),nrow = dim(mat)[1])
+ newmat <- matrix(NA, ncol = length(nameCol), nrow = dim(mat)[1])
for(ii in 1:length(alleles)){
- newmat[,ii] <- apply(mat2[,ids]==alleles[ii],1,sum)
+ newmat[, ii] <- apply(mat2[, ids] == alleles[ii], 1, sum)
colnames(newmat) <- nameCol
freqTAB <- cbind(freqTAB,newmat)
- rownames(freqTAB) <- mat[,1]
- colnames(freqTAB) <- sub(" ","",colnames(freqTAB))
+ rownames(freqTAB) <- mat[, 1]
+ colnames(freqTAB) <- sub(" ", "", colnames(freqTAB))
dat2 <- as.genind(tab = freqTAB)
- pop(dat2) <- mat[,"pop"]
+ pop(dat2) <- mat[, "pop"]
} else {
dat <- read.table(Ifile$datapath, header = TRUE,
@@ -59,7 +60,6 @@ createGenind <- function(Ifile, Imicrovariants, Incode, Iploidy) {
dat2 <- df2genind(dat[, -1:-2],ncode = switch(
Incode, "2" = 2, "3" = 3), ploidy = switch(
Iploidy, Diploid = 2, Haploid = 1))
@@ -73,20 +73,14 @@ createGenind <- function(Ifile, Imicrovariants, Incode, Iploidy) {
# input: genind object
# output: a list of allele frequencies tables
getFreqAllPop <- function(data) {
freq <- list()
freq$all <- getFreqFromGenind(data)
for(popu in unique(data$pop)) {
freq <- c(freq, x = NA)
mat <- getFreqFromGenind(data[data@pop == popu, ])
freq$x <- mat
names(freq)[length(freq)] <- popu
@@ -95,12 +89,8 @@ getFreqAllPop <- function(data) {
# input: genind object
# output: an allele frequencies tables
getFreqFromGenind <- function(data) {
freq <- apply(data@tab, 2, sum, na.rm = TRUE)
- nam <- strsplit(
- names(freq),
- split = "[.]"
- )
+ nam <- strsplit(names(freq), split = "[.]")
loc <- as.factor(unlist(
lapply(nam, function(x) x[1])
@@ -132,138 +122,11 @@ getFreqFromGenind <- function(data) {
-## getIndicesAllPop
-## returns indices for each population
-# input: genind object
-# output: a list of indices tables
-getIndicesAllPop <- function(data, hw = FALSE, hwperm = 1000, ploidy = "Diploid") {
- ind <- list()
- ind$all <- getIndicesFromGenind(data, hw, hwperm, ploidy)
- for(popu in unique(data$pop)) {
- ind <- c(ind, x = NA)
- mat <- getIndicesFromGenind(data[data@pop == popu, ], hw, hwperm, ploidy)
- ind$x <- mat
- names(ind)[length(ind)] <- popu
- }
- return(ind)
-## getIndicesFromGenind
-## returns indices for a given population
-# input: genind object
-# output: an indices table
-getIndicesFromGenind <- function(data,
- hw = FALSE,
- hwperm = 1000,
- ploidy = "Diploid") {
- freq <- apply(data@tab, 2, sum, na.rm = TRUE)
- nam <- strsplit(
- names(freq),
- split="[.]"
- )
- loc <- as.factor(unlist(
- lapply(nam, function(x) x[1])
- ))
- alle <- as.numeric(unlist(
- lapply(nam, function(x) sub("-", ".", x[2]))
- ))
- DAT <- data.frame(freq, loc, alle)
- N <- tapply(DAT$freq, DAT$loc, sum)
- DAT$frequency <- DAT$freq / N[DAT$loc]
- for(i in unique(loc)) {
- #FR <- c(DAT$frequency[names(DAT$frequency) == i])
- FR <- c(DAT$frequency[DAT$loc == i])
- xu <- outer(FR, FR, "fu")
- som <- sum(xu[lower.tri(xu)])
- PIC[i] <- 1 - sum(FR ^ 2) - som
- }
- Nall <- tapply(
- DAT[DAT$freq>0, ]$freq,
- DAT[DAT$freq>0, ]$loc,
- length
- )
- GD <- tapply(
- DAT$frequency,
- DAT$loc,
- function(x) 1 - sum(x ^ 2)
- )
- GD <- GD * N / (N - 1)
- PIC <- PIC[names(GD)]
- D2 <- genind2loci(data)
- sumloc <- summary(D2)[names(GD)]
- PM1 <- lapply(sumloc, function(x) {
- sum((x$genotype / sum(x$genotype)) ^ 2)
- })
- PM <- unlist(PM1)
- DF <- data.frame(
- locus = names(GD),
- N = N,
- Nall = Nall,
- GD = GD,
- PIC = PIC,
- PM = PM,
- PD = 1 - PM
- )
- if(ploidy == "Diploid") {
- DF$Hobs <- adegenet::summary(data)$Hobs[names(GD)]
- DF$PE <- (DF$Hobs ^ 2) * (1 - 2 * (DF$Hobs) * ((1 - DF$Hobs) ^ 2))
- DF$TPI <- 1 / (2 * (1 - DF$Hobs))
- }
- if(length(unique(data@pop)) > 1 & length(locNames(data)) > 1) {
- basicstat <- basic.stats(
- data,
- diploid = switch(ploidy, Diploid = TRUE, Haploid = FALSE),
- digits = 4
- )$perloc
- rownames(basicstat) <- as.character(unique(data@loc.fac))
- Fst <- wc(
- data,
- diploid = switch(ploidy, Diploid = TRUE, Haploid = FALSE)
- )$per.loc$FST
- names(Fst) <- as.character(unique(data@loc.fac))
- DF$Fst <- Fst[names(GD)]
- # DF$Fst <- basicstat[names(GD), "Fst"]
- DF$Ht <- basicstat[names(GD), "Ht"]
- DF$Fis <- basicstat[names(GD), "Fis"]
- }
- if(ploidy == "Diploid" & hw) {
- withProgress(message = 'Performing HW test...', value = 0, {
- DF$pHW <- hw.test(data, B = hwperm)[names(GD), 4]
- })
- }
- return(DF)
-fu <- function(a, b){
- 2 * (a ^ 2) * (b ^ 2)
plotPCA <- function(pca, popus, coul, axis) {
- var1 <- round(100*(pca$eig/sum(pca$eig))[axis[1]], 2)
- var2 <- round(100*(pca$eig/sum(pca$eig))[axis[2]], 2)
+ var1 <- round(100 * (pca$eig / sum(pca$eig))[axis[1]], 2)
+ var2 <- round(100 * (pca$eig / sum(pca$eig))[axis[2]], 2)
plot(pca$li[, axis[1]],
pca$li[, axis[2]],
@@ -279,7 +142,7 @@ plotPCA <- function(pca, popus, coul, axis) {
sapply(unique(popus), function(x) {
- ellipse(
+ car::ellipse(
c(mean(pca$li[popus %in% x, axis[1]]), mean(pca$li[popus %in% x, axis[2]])),
cov(pca$li[, axis[1:2]]),
@@ -309,208 +172,3 @@ plotPCA <- function(pca, popus, coul, axis) {
-straf2genepop <- function(f.name, ploidy = 2) {
- df <- readLines(f.name)
- spt <- do.call("rbind", strsplit(df, "\t"))
- colnames(spt) <- spt[1, ]
- df <- as.data.frame(spt[-1, ])
- df_tmp <- lapply(df[, -1:-2], function(x) gsub("[.]", "", x))
- # add leading zeros
- df_tmp2 <- lapply(df_tmp, function(x) {
- x[nchar(x) == 1] <- paste0(x[nchar(x) == 1], "00")
- x[nchar(x) == 2] <- paste0(x[nchar(x) == 2], "0")
- if(any(nchar(x) != 3)) stop("Error while converting allele labels.")
- return(x)
- })
- # concatenate
- idx <- seq_len(length(df_tmp2))
- if(ploidy == 2) {
- ids <- idx %% 2
- df_out <- list()
- for(i in idx[as.logical(ids)]) {
- nm <- names(df_tmp2[i])
- nm <- gsub(" ", "", nm)
- df_out[[nm]] <- paste0(df_tmp2[[i]], df_tmp2[[i + 1]])
- }
- } else if (ploidy == 1) {
- df_out <- list()
- for(i in idx) {
- nm <- names(df_tmp2[i])
- nm <- gsub(" ", "", nm)
- df_out[[nm]] <- df_tmp2[[i]]
- }
- }
- df_out <- as.data.frame(df_out)
- first.line <- "STRAF-generated GENEPOP input file."
- loci <- colnames(df_out)
- str_out <- apply(df_out, 1, paste0, collapse = "\t")
- ## get pops
- populations <- unique(df$pop)
- vec_out <- c()
- for(i in populations) {
- idx <- df$pop %in% i
- vec_out <- c(
- vec_out,
- "Pop",
- paste(df[idx, ]$ind, str_out[idx], sep = "\t,\t")
- )
- }
- ## write file
- output <- c(first.line, loci, vec_out)
- output <- paste(output, "\n", collapse = "")
- return(output)
-straf2familias <- function(f.name) {
- df <- readLines(f.name)
- spt <- do.call("rbind", strsplit(df, "\t"))
- colnames(spt) <- spt[1, ]
- df <- as.data.frame(spt[-1, ])
- df_tmp <- df[, -1:-2]
- # add leading zeros
- # concatenate
- idx <- seq_len(length(df_tmp))
- ids <- as.logical(idx %% 2)
- df_out <- list()
- for(i in idx[ids]) {
- nm <- names(df_tmp[i])
- nm <- gsub(" ", "", nm)
- df_out[[nm]] <- c(df_tmp[[i]], df_tmp[[i + 1]])
- }
- tbs <- lapply(df_out, table)
- prop.tb <- lapply(tbs, prop.table)
- str.list <- lapply(prop.tb, function(x) {
- vec <- x
- str_loc <- paste0(names(vec), "\t", unname(vec), collapse = "\n")
- return(str_loc)
- })
- output <- paste(names(prop.tb), str.list, sep = "\n")
- out <- paste(output, collapse = "\n\n")
- out <- paste0(out, "\n")
- return(out)
-straf2arlequin <- function(f.name) {
- df <- readLines(f.name)
- spt <- do.call("rbind", strsplit(df, "\t"))
- colnames(spt) <- spt[1, ]
- df <- as.data.frame(spt[-1, ])
- df_tmp2 <- list()
- for(i in seq_len(ncol(df[, -1:-2]))) {
- x <- df[, -1:-2][, i]
- if(i %% 2 != 0) {
- x2 <- df[, -1:-2][, i + 1]
- dot_idx <- grep("[.]", x)
- dot_idx2 <- grep("[.]", x2)
- if(length(dot_idx) > 0 | length(dot_idx2) > 0) {
- x <- gsub("[.]", "", x)
- x[-dot_idx] <- paste0(x[-dot_idx], "0")
- x2 <- gsub("[.]", "", x2)
- x2[-dot_idx2] <- paste0(x2[-dot_idx2], "0")
- if(length(dot_idx) == 0) x <- paste0(x, "0")
- if(length(dot_idx2) == 0) x2 <- paste0(x2, "0")
- }
- df_tmp2[[i]] <- x
- df_tmp2[[i+1]] <- x2
- }
- }
- df_tmp2 <- as.data.frame(df_tmp2)
- # concatenate
- idx <- seq_len(length(df_tmp2))
- ids <- as.logical(idx %% 2)
- out_str <- c()
- for(pp in unique(df$pop)) {
- df_pop <- df[df$pop == pp, ]
- out_str <- c(out_str, paste0('SampleName="', pp, '"\nSampleSize=',nrow(df_pop),'\nSampleData={\n'))
- for(i in which(df$pop == pp)) {
- samp_nm <- df[i, 1]
- l1 <- paste0(c(samp_nm, "1", unname(unlist(df_tmp2[i, c(idx[ids])]))), collapse = "\t")
- l2 <- paste0(c("", "", unname(unlist(df_tmp2[i, c(idx[!ids])]))), collapse = "\t")
- out_str <- c(out_str, l1, l2)
- }
- out_str <- c(out_str, "}\n\n")
- }
- npop <- length(unique(df$pop))
- header <- paste0('[Profile]\nTitle="STRAF-generated Arlequin file."\nNbSamples=',npop,'\nDataType=MICROSAT\n
- output <- c(header, out_str)
- ## write file
- output <- paste(output, "\n", collapse = "")
- return(output)
-freq_to_mds <- function(fname) {
- ln <- readLines(fname)
- ln2 <- lapply(ln, function(x) strsplit(x, ",")[[1]])
- ln3 <- lapply(ln2, function(x) {
- if(sum(nchar(x[-1]) == 0) == length(x[-1])) return(x[1])
- else return(x)
- })
- hd <- lengths(ln3)
- names_idx <- which(hd == 1)
- st_idx <- names_idx + 1
- en_idx <- names_idx - 1
- en_idx <- c(en_idx[-1], length(ln2))
- df <- lapply(seq_along(names_idx), function(i) {
- loc_id <- names_idx[i]
- loc_name <- ln3[[loc_id]]
- if(en_idx[i] - st_idx[i] > 1) {
- mat <- do.call(rbind, ln2[st_idx[i]:en_idx[i]])
- colnames(mat) <- mat[1, ]
- mat[mat == ""] <- "0"
- df <- as.data.frame(mat[-1:-2, ])
- colnames(df) <- gsub(pattern = " ", replacement = "_", colnames(df))
- colnames(df) <- gsub(pattern = "\"", replacement = "", colnames(df))
- df_long <- gather(df, location, frequency, -Allele, factor_key=TRUE)
- df_long$locus <- loc_name
- return(df_long)
- } else {
- return(NULL)
- }
- })
- df_l <- do.call(rbind, df)
- df_l$frequency <- as.numeric(df_l$frequency)
- df_l$location <- as.character(df_l$location)
- tt <- reshape2::acast(df_l, location ~ locus + Allele, value.var = 'frequency', fun.aggregate = mean, fill = -1)
- ct <- rownames(tt)
- tt <- tt %>% as_tibble()
- df_f <- tt %>% as_tibble() %>% mutate_all(~ifelse(.x == -1, NA, .x)) #mean(.x[.x != -1], na.rm = TRUE)
- matt <- (as.matrix(df_f))
- rownames(matt) <- ct
- return(matt)
diff --git a/R/module_data.R b/R/module_data.R
index 9dc3c5d..a96e92a 100644
--- a/R/module_data.R
+++ b/R/module_data.R
@@ -1,4 +1,7 @@
### Input data module
+#' @importFrom shinyWidgets awesomeCheckbox
+#' @importFrom DT dataTableOutput renderDataTable datatable
+#' @importFrom openxlsx write.xlsx
data_UI <- function(id) {
ns <- NS(id)
@@ -8,7 +11,7 @@ data_UI <- function(id) {
h3("Allele frequencies per locus"),
- awesomeCheckbox(
+ shinyWidgets::awesomeCheckbox(
ns('displayAlleleFreq'), 'Plot the distribution of allele frequencies',
@@ -19,7 +22,7 @@ data_UI <- function(id) {
- awesomeCheckbox(
+ shinyWidgets::awesomeCheckbox(
'Display a table of allele frequencies',
@@ -81,7 +84,7 @@ data_Server <- function(id, getgenind, getData, barplotcolor, transparency, widt
DAT$freq[DAT$loc == i],
names.arg = DAT$alle[DAT$loc == i],
main = i,
- col = transp(barplotcolor(), transparency()),
+ col = adegenet::transp(barplotcolor(), transparency()),
border = 0
diff --git a/R/module_file_conversion.R b/R/module_file_conversion.R
index 9a9826f..907925c 100644
--- a/R/module_file_conversion.R
+++ b/R/module_file_conversion.R
@@ -51,4 +51,214 @@ file_conv_Server <- function(id, fpath, ploidy) {
- )}
\ No newline at end of file
+ )}
+straf2genepop <- function(f.name, ploidy = 2) {
+ df <- readLines(f.name)
+ spt <- do.call("rbind", strsplit(df, "\t"))
+ colnames(spt) <- spt[1, ]
+ df <- as.data.frame(spt[-1, ])
+ df_tmp <- lapply(df[, -1:-2], function(x) gsub("[.]", "", x))
+ # add leading zeros
+ df_tmp2 <- lapply(df_tmp, function(x) {
+ x[nchar(x) == 1] <- paste0(x[nchar(x) == 1], "00")
+ x[nchar(x) == 2] <- paste0(x[nchar(x) == 2], "0")
+ if(any(nchar(x) != 3)) stop("Error while converting allele labels.")
+ return(x)
+ })
+ # concatenate
+ idx <- seq_len(length(df_tmp2))
+ if(ploidy == 2) {
+ ids <- idx %% 2
+ df_out <- list()
+ for(i in idx[as.logical(ids)]) {
+ nm <- names(df_tmp2[i])
+ nm <- gsub(" ", "", nm)
+ df_out[[nm]] <- paste0(df_tmp2[[i]], df_tmp2[[i + 1]])
+ }
+ } else if (ploidy == 1) {
+ df_out <- list()
+ for(i in idx) {
+ nm <- names(df_tmp2[i])
+ nm <- gsub(" ", "", nm)
+ df_out[[nm]] <- df_tmp2[[i]]
+ }
+ }
+ df_out <- as.data.frame(df_out)
+ first.line <- "STRAF-generated GENEPOP input file."
+ loci <- colnames(df_out)
+ str_out <- apply(df_out, 1, paste0, collapse = "\t")
+ ## get pops
+ populations <- unique(df$pop)
+ vec_out <- c()
+ for(i in populations) {
+ idx <- df$pop %in% i
+ vec_out <- c(
+ vec_out,
+ "Pop",
+ paste(df[idx, ]$ind, str_out[idx], sep = "\t,\t")
+ )
+ }
+ ## write file
+ output <- c(first.line, loci, vec_out)
+ output <- paste(output, "\n", collapse = "")
+ return(output)
+straf2familias <- function(f.name) {
+ df <- readLines(f.name)
+ spt <- do.call("rbind", strsplit(df, "\t"))
+ colnames(spt) <- spt[1, ]
+ df <- as.data.frame(spt[-1, ])
+ df_tmp <- df[, -1:-2]
+ # add leading zeros
+ # concatenate
+ idx <- seq_len(length(df_tmp))
+ ids <- as.logical(idx %% 2)
+ df_out <- list()
+ for(i in idx[ids]) {
+ nm <- names(df_tmp[i])
+ nm <- gsub(" ", "", nm)
+ df_out[[nm]] <- c(df_tmp[[i]], df_tmp[[i + 1]])
+ }
+ tbs <- lapply(df_out, table)
+ prop.tb <- lapply(tbs, prop.table)
+ str.list <- lapply(prop.tb, function(x) {
+ vec <- x
+ str_loc <- paste0(names(vec), "\t", unname(vec), collapse = "\n")
+ return(str_loc)
+ })
+ output <- paste(names(prop.tb), str.list, sep = "\n")
+ out <- paste(output, collapse = "\n\n")
+ out <- paste0(out, "\n")
+ return(out)
+straf2arlequin <- function(f.name) {
+ df <- readLines(f.name)
+ spt <- do.call("rbind", strsplit(df, "\t"))
+ colnames(spt) <- spt[1, ]
+ df <- as.data.frame(spt[-1, ])
+ df_tmp2 <- list()
+ for(i in seq_len(ncol(df[, -1:-2]))) {
+ x <- df[, -1:-2][, i]
+ if(i %% 2 != 0) {
+ x2 <- df[, -1:-2][, i + 1]
+ dot_idx <- grep("[.]", x)
+ dot_idx2 <- grep("[.]", x2)
+ if(length(dot_idx) > 0 | length(dot_idx2) > 0) {
+ x <- gsub("[.]", "", x)
+ x[-dot_idx] <- paste0(x[-dot_idx], "0")
+ x2 <- gsub("[.]", "", x2)
+ x2[-dot_idx2] <- paste0(x2[-dot_idx2], "0")
+ if(length(dot_idx) == 0) x <- paste0(x, "0")
+ if(length(dot_idx2) == 0) x2 <- paste0(x2, "0")
+ }
+ df_tmp2[[i]] <- x
+ df_tmp2[[i+1]] <- x2
+ }
+ }
+ df_tmp2 <- as.data.frame(df_tmp2)
+ # concatenate
+ idx <- seq_len(length(df_tmp2))
+ ids <- as.logical(idx %% 2)
+ out_str <- c()
+ for(pp in unique(df$pop)) {
+ df_pop <- df[df$pop == pp, ]
+ out_str <- c(out_str, paste0('SampleName="', pp, '"\nSampleSize=',nrow(df_pop),'\nSampleData={\n'))
+ for(i in which(df$pop == pp)) {
+ samp_nm <- df[i, 1]
+ l1 <- paste0(c(samp_nm, "1", unname(unlist(df_tmp2[i, c(idx[ids])]))), collapse = "\t")
+ l2 <- paste0(c("", "", unname(unlist(df_tmp2[i, c(idx[!ids])]))), collapse = "\t")
+ out_str <- c(out_str, l1, l2)
+ }
+ out_str <- c(out_str, "}\n\n")
+ }
+ npop <- length(unique(df$pop))
+ header <- paste0('[Profile]\nTitle="STRAF-generated Arlequin file."\nNbSamples=',npop,'\nDataType=MICROSAT\n
+ output <- c(header, out_str)
+ ## write file
+ output <- paste(output, "\n", collapse = "")
+ return(output)
+#' @importFrom reshape2 acast
+#' @importFrom tidyr gather
+freq_to_mds <- function(fname) {
+ ln <- readLines(fname)
+ ln2 <- lapply(ln, function(x) strsplit(x, ",")[[1]])
+ ln3 <- lapply(ln2, function(x) {
+ if(sum(nchar(x[-1]) == 0) == length(x[-1])) return(x[1])
+ else return(x)
+ })
+ hd <- lengths(ln3)
+ names_idx <- which(hd == 1)
+ st_idx <- names_idx + 1
+ en_idx <- names_idx - 1
+ en_idx <- c(en_idx[-1], length(ln2))
+ df <- lapply(seq_along(names_idx), function(i) {
+ loc_id <- names_idx[i]
+ loc_name <- ln3[[loc_id]]
+ if(en_idx[i] - st_idx[i] > 1) {
+ mat <- do.call(rbind, ln2[st_idx[i]:en_idx[i]])
+ colnames(mat) <- mat[1, ]
+ mat[mat == ""] <- "0"
+ df <- as.data.frame(mat[-1:-2, ])
+ colnames(df) <- gsub(pattern = " ", replacement = "_", colnames(df))
+ colnames(df) <- gsub(pattern = "\"", replacement = "", colnames(df))
+ Allele <- NA
+ df_long <- tidyr::gather(df, location, frequency, -Allele, factor_key=TRUE)
+ df_long$locus <- loc_name
+ return(df_long)
+ } else {
+ return(NULL)
+ }
+ })
+ df_l <- do.call(rbind, df)
+ df_l$frequency <- as.numeric(df_l$frequency)
+ df_l$location <- as.character(df_l$location)
+ tt <- reshape2::acast(df_l, location ~ locus + Allele, value.var = 'frequency', fun.aggregate = mean, fill = -1)
+ ct <- rownames(tt)
+ tt <- tt %>% as_tibble()
+ df_f <- tt %>% as_tibble() %>% mutate_all(~ifelse(.x == -1, NA, .x)) #mean(.x[.x != -1], na.rm = TRUE)
+ matt <- (as.matrix(df_f))
+ rownames(matt) <- ct
+ return(matt)
diff --git a/R/module_for_popgen.R b/R/module_for_popgen.R
index ffec30a..639d895 100644
--- a/R/module_for_popgen.R
+++ b/R/module_for_popgen.R
@@ -1,4 +1,5 @@
### Module
+#' @importFrom shinyWidgets awesomeCheckbox
for_UI <- function(id) {
ns <- NS(id)
@@ -122,7 +123,8 @@ popgen_UI <- function(id) {
-for_popgen_Server <- function(id, getgenind, popnames, ploidy) {
+#' @importFrom hierfstat pairwise.WCfst genind2hierfstat
+for_popgen_Server <- function(id, getgenind, popnames, ploidy, barplotcolor, transparency, cexaxis) {
function(input, output, session) {
@@ -172,7 +174,7 @@ for_popgen_Server <- function(id, getgenind, popnames, ploidy) {
if(is.null(input$selectPop2)) taB <- reacIndices()[[1]]
else taB <- reacIndices()[[input$selectPop2]]
- taB[, ! colnames(taB) %in% c("Ht", "Fis", "Fst")],
+ taB[, !colnames(taB) %in% c("Ht", "Fis", "Fst")],
file, sep = "\t", row.names = FALSE
@@ -244,7 +246,7 @@ for_popgen_Server <- function(id, getgenind, popnames, ploidy) {
# plot in the reactive UI
- output$plotIndices <- renderPlotly({
+ output$plotIndices <- plotly::renderPlotly({
if(is.null(input$selectPop2)) return(NULL)
else taB <- reacIndices()[[input$selectPop2]]
@@ -254,7 +256,7 @@ for_popgen_Server <- function(id, getgenind, popnames, ploidy) {
if(is.null(input$plotIndicesFOR)) return(NULL)
- fig <- plot_ly(
+ fig <- plotly::plot_ly(
x = dat[, input$plotIndicesFOR],
y = dat[, "locus"],
name = "bp_for",
@@ -264,8 +266,8 @@ for_popgen_Server <- function(id, getgenind, popnames, ploidy) {
"\nValue: ", round(dat[, input$plotIndicesFOR], 4)
hoverinfo = 'text',
- marker = list(color = transp(input$barplotcolor, input$transparency))
- ) %>% layout(
+ marker = list(color = adegenet::transp(barplotcolor(), transparency()))
+ ) %>% plotly::layout(
xaxis = list(title = input$plotIndicesFOR, zeroline = FALSE),
yaxis = list(title = "Locus")
@@ -274,7 +276,7 @@ for_popgen_Server <- function(id, getgenind, popnames, ploidy) {
output$plotFOR <- renderUI({
- plotlyOutput(ns('plotIndices'))
+ plotly::plotlyOutput(ns('plotIndices'))
output$plotIndicesPopgen <- renderPlot({
@@ -293,10 +295,10 @@ for_popgen_Server <- function(id, getgenind, popnames, ploidy) {
names.arg = datpl[, "locus"],
horiz = TRUE,
las = 1,
- col = transp(input$barplotcolor, input$transparency),
- border = as.numeric(input$borderbarplot),
- cex.axis = input$cexaxis,
- cex.names = input$cexaxis,
+ col = transp(barplotcolor(), transparency()),
+ border = 0,
+ cex.axis = cexaxis(),
+ cex.names = cexaxis(),
xlab = input$plotIndicesPG
@@ -314,7 +316,7 @@ for_popgen_Server <- function(id, getgenind, popnames, ploidy) {
if (length(unique(dat2@pop)) == 1)
stop("Multiple populations are required to perform this analysis")
- matFST <- pairwise.WCfst(genind2hierfstat(dat2))
+ matFST <- hierfstat::pairwise.WCfst(hierfstat::genind2hierfstat(dat2))
matFST[lower.tri(matFST)] <- NA
@@ -361,9 +363,9 @@ for_popgen_Server <- function(id, getgenind, popnames, ploidy) {
for(i in 1:nloc){
for(ii in 1:nloc){
+ return(length(M) > 30)
outputOptions(output, 'LD30', suspendWhenHidden=FALSE)
@@ -398,15 +400,9 @@ for_popgen_Server <- function(id, getgenind, popnames, ploidy) {
M <- -log10(reacLDtable())
M[lower.tri(M)] <- NA
- col <- redpal(100)
+ col <- adegenet::redpal(100)
- image(
- M,
- col = col,
- frame = F,
- xaxt = "n",
- yaxt = "n"
- )
+ image(M, col = col, frame = F, xaxt = "n", yaxt = "n")
at = seq(0, 1, length.out = ncol(M)),
@@ -424,9 +420,7 @@ for_popgen_Server <- function(id, getgenind, popnames, ploidy) {
- output$plotLD2 <- renderUI({
- plotOutput(ns('plotLD'))
- })
+ output$plotLD2 <- renderUI({plotOutput(ns('plotLD'))})
#plot p-values distribution
output$plotLDpval <- renderPlot({
@@ -488,4 +482,126 @@ for_popgen_Server <- function(id, getgenind, popnames, ploidy) {
\ No newline at end of file
+## getIndicesAllPop
+## returns indices for each population
+# input: genind object
+# output: a list of indices tables
+getIndicesAllPop <- function(data, hw = FALSE, hwperm = 1000, ploidy = "Diploid") {
+ ind <- list()
+ ind$all <- getIndicesFromGenind(data, hw, hwperm, ploidy)
+ for(popu in unique(data$pop)) {
+ ind <- c(ind, x = NA)
+ mat <- getIndicesFromGenind(data[data@pop == popu, ], hw, hwperm, ploidy)
+ ind$x <- mat
+ names(ind)[length(ind)] <- popu
+ }
+ return(ind)
+## getIndicesFromGenind
+## returns indices for a given population
+# input: genind object
+# output: an indices table
+#' @importFrom pegas genind2loci
+getIndicesFromGenind <- function(data,
+ hw = FALSE,
+ hwperm = 1000,
+ ploidy = "Diploid") {
+ freq <- apply(data@tab, 2, sum, na.rm = TRUE)
+ nam <- strsplit(
+ names(freq),
+ split="[.]"
+ )
+ loc <- as.factor(unlist(
+ lapply(nam, function(x) x[1])
+ ))
+ alle <- as.numeric(unlist(
+ lapply(nam, function(x) sub("-", ".", x[2]))
+ ))
+ DAT <- data.frame(freq, loc, alle)
+ N <- tapply(DAT$freq, DAT$loc, sum)
+ DAT$frequency <- DAT$freq / N[DAT$loc]
+ for(i in unique(loc)) {
+ #FR <- c(DAT$frequency[names(DAT$frequency) == i])
+ FR <- c(DAT$frequency[DAT$loc == i])
+ xu <- outer(FR, FR, "fu")
+ som <- sum(xu[lower.tri(xu)])
+ PIC[i] <- 1 - sum(FR ^ 2) - som
+ }
+ Nall <- tapply(
+ DAT[DAT$freq > 0, ]$freq,
+ DAT[DAT$freq > 0, ]$loc,
+ length
+ )
+ GD <- tapply(DAT$frequency, DAT$loc, function(x) 1 - sum(x ^ 2))
+ GD <- GD * N / (N - 1)
+ PIC <- PIC[names(GD)]
+ D2 <- pegas::genind2loci(data)
+ sumloc <- summary(D2)[names(GD)]
+ PM1 <- lapply(sumloc, function(x) {
+ sum((x$genotype / sum(x$genotype)) ^ 2)
+ })
+ PM <- unlist(PM1)
+ DF <- data.frame(
+ locus = names(GD),
+ N = N,
+ Nall = Nall,
+ GD = GD,
+ PIC = PIC,
+ PM = PM,
+ PD = 1 - PM
+ )
+ if(ploidy == "Diploid") {
+ DF$Hobs <- adegenet::summary(data)$Hobs[names(GD)]
+ DF$PE <- (DF$Hobs ^ 2) * (1 - 2 * (DF$Hobs) * ((1 - DF$Hobs) ^ 2))
+ DF$TPI <- 1 / (2 * (1 - DF$Hobs))
+ }
+ if(length(unique(data@pop)) > 1 & length(locNames(data)) > 1) {
+ basicstat <- hierfstat::basic.stats(
+ data,
+ diploid = switch(ploidy, Diploid = TRUE, Haploid = FALSE),
+ digits = 4
+ )$perloc
+ rownames(basicstat) <- as.character(unique(data@loc.fac))
+ Fst <- hierfstat::wc(
+ data,
+ diploid = switch(ploidy, Diploid = TRUE, Haploid = FALSE)
+ )$per.loc$FST
+ names(Fst) <- as.character(unique(data@loc.fac))
+ DF$Fst <- Fst[names(GD)]
+ # DF$Fst <- basicstat[names(GD), "Fst"]
+ DF$Ht <- basicstat[names(GD), "Ht"]
+ DF$Fis <- basicstat[names(GD), "Fis"]
+ }
+ if(ploidy == "Diploid" & hw) {
+ withProgress(message = 'Performing HW test...', value = 0, {
+ DF$pHW <- pegas::hw.test(data, B = hwperm)[names(GD), 4]
+ })
+ }
+ return(DF)
+fu <- function(a, b) {2 * (a ^ 2) * (b ^ 2)}
diff --git a/R/module_pca_mds.R b/R/module_pca_mds.R
index b0154d8..2fb6ef5 100644
--- a/R/module_pca_mds.R
+++ b/R/module_pca_mds.R
@@ -1,5 +1,5 @@
### PCA and MDS module
+#' @importFrom shinyWidgets awesomeCheckbox
pca_mds_UI <- function(id) {
ns <- NS(id)
@@ -45,7 +45,9 @@ pca_mds_UI <- function(id) {
+#' @importFrom adegenet genind2genpop transp funky dist.genpop loadingplot makefreq pop
+#' @importFrom ggrepel geom_text_repel
+#' @importFrom ade4 dudi.pca
pca_mds_Server <- function(id, getgenind) {
@@ -100,8 +102,8 @@ pca_mds_Server <- function(id, getgenind) {
dst <- dist.genpop(obj, method = 1)
MDS <- cmdscale(dst)
MDS <- data.frame(ax1 = MDS[, 1], ax2 = MDS[, 2], pop = rownames(MDS))
- p <- ggplot(MDS, aes(x=ax1, y=ax2, color = pop, label = pop)) +
+ p <- ggplot(MDS, aes(x = .data$ax1, y = .data$ax2, color = pop, label = pop)) +
geom_point() +
geom_text_repel() +
labs( x = "MDS Axis 1", y = "MDS Axis 2", title = "MDS based on Nei's distance") +
diff --git a/R/module_reference_mds.R b/R/module_reference_mds.R
index 657a5c9..0154ad4 100644
--- a/R/module_reference_mds.R
+++ b/R/module_reference_mds.R
@@ -1,4 +1,5 @@
## Reference population MDS module
+#' @importFrom shinyWidgets awesomeCheckbox
ref_mds_UI <- function(id) {
ns <- NS(id)
@@ -17,8 +18,7 @@ ref_mds_UI <- function(id) {
+#' @importFrom adegenet genind2genpop
ref_mds_Server <- function(id, getgenind) {
@@ -101,7 +101,7 @@ ref_mds_Server <- function(id, getgenind) {
mds <- cmdscale(d)
MDS <- data.frame(ax1 = mds[, 1], ax2 = mds[, 2], pop = rownames(mds))
- p <- ggplot(MDS, aes(x=ax1, y=ax2, color = pop, label = pop)) +
+ p <- ggplot(MDS, aes(x=.data$ax1, y=.data$ax2, color = pop, label = pop)) +
geom_point() +
geom_text_repel(max.overlaps = 50) +
labs( x = "MDS Axis 1", y = "MDS Axis 2", title = "MDS based on Nei's distance") +
diff --git a/R/runStraf.R b/R/runStraf.R
index b910d7e..794ef2b 100644
--- a/R/runStraf.R
+++ b/R/runStraf.R
@@ -1,19 +1,17 @@
-#' @import shiny
-#' @import colourpicker
-#' @import plotly
-#' @import shinyWidgets
-#' @import ade4
-#' @import adegenet
-#' @import pegas
-#' @import hierfstat
-#' @import car
-#' @import openxlsx
-#' @import reshape2
+#' Run the STRAF application
+#' @description Main function to be called in order to start STRAF,
+#' @return Runs the shiny application.
+#' @examples
+#' # runStraf()
+#' @export
+#' @rawNamespace import(shiny, except = c(dataTableOutput, renderDataTable))
#' @import dplyr
-#' @import tidyr
#' @import ggplot2
-#' @import ggrepel
+#' @importFrom graphics abline axis barplot hist image legend par
+#' @importFrom stats as.dist cmdscale cov frequency ks.test qqplot qunif
+#' @importFrom utils read.table write.table
runStraf <- function() {
warn = -1,
diff --git a/R/sidebar.R b/R/sidebar.R
index e950ae2..b91710d 100644
--- a/R/sidebar.R
+++ b/R/sidebar.R
@@ -1,4 +1,7 @@
-### Sidebar UI code
+#' Generate the sidebar UI.
+#' @noRd
+#' @importFrom shinyWidgets awesomeCheckbox pickerInput
+#' @importFrom colourpicker colourInput
sidebarUI <- function() {
width = 3,
diff --git a/inst/application/server.R b/inst/application/server.R
index 8955310..6a549a6 100644
--- a/inst/application/server.R
+++ b/inst/application/server.R
@@ -33,7 +33,7 @@ shinyServer(function(input, output) {
if(length(unique(testGeno@pop)) > 1 & length(locNames(testGeno)) > 1) {
testGeno3 <- try(
- wc(
+ hierfstat::wc(
diploid = switch(
@@ -56,7 +56,7 @@ shinyServer(function(input, output) {
output$checkInputFile <- renderText({
- if(is.null(input$file1)) { return("Please import a data set.") }
+ if(is.null(input$file1)) { return("Please import a data set.") }
else {
X <- read.table(input$file1$datapath,
header = TRUE,
@@ -83,7 +83,7 @@ shinyServer(function(input, output) {
testGeno3 <- try(
- wc(
+ hierfstat::wc(
diploid = switch(
@@ -136,11 +136,12 @@ shinyServer(function(input, output) {
ploidy <- reactive({input$ploidy})
barplotcolor <- reactive({input$barplotcolor})
transparency <- reactive({input$transparency})
+ cexaxis <- reactive({input$cexaxis})
width <- reactive({input$width})
height <- reactive({input$height})
data_Server("data_ns", getgenind, getData, barplotcolor, transparency, width, height,popnames)
- for_popgen_Server("for_popgen", getgenind, popnames, ploidy)
+ for_popgen_Server("for_popgen", getgenind, popnames, ploidy, barplotcolor, transparency, cexaxis)
pca_mds_Server("pca_mds", getgenind)
ref_mds_Server("ref_mds", getgenind)
file_conv_Server("file_conv", reactive({input$file1$datapath}), reactive({input$ploidy}))
diff --git a/man/runStraf.Rd b/man/runStraf.Rd
new file mode 100644
index 0000000..f5b3195
--- /dev/null
+++ b/man/runStraf.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/runStraf.R
+\title{Run the STRAF application}
+Runs the shiny application.
+Main function to be called in order to start STRAF,
+# runStraf()
diff --git a/operator.json b/operator.json
- "urls": ["https://github.com/agouy/straf"]