diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index e2c2f4d..1a06db6 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -21,10 +21,10 @@ jobs: matrix: config: - {os: macos-latest, r: 'release'} - - {os: windows-latest, r: 'release'} - # - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + #- {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - {os: ubuntu-latest, r: 'release'} - # - {os: ubuntu-latest, r: 'oldrel-1'} + - {os: ubuntu-latest, r: 'oldrel-1'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} @@ -38,7 +38,7 @@ jobs: - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} - http-user-agent: ${{ matrix.config.http-user-agent }} + # http-user-agent: ${{ matrix.config.http-user-agent }} use-public-rspm: true - uses: r-lib/actions/setup-r-dependencies@v2 @@ -47,7 +47,10 @@ jobs: extra-packages: any::rcmdcheck needs: check + - uses: r-lib/actions/check-r-package@v2 with: upload-snapshots: true error-on: '"error"' + + diff --git a/.gitignore b/.gitignore index ec75881..b3ec624 100644 --- a/.gitignore +++ b/.gitignore @@ -6,8 +6,3 @@ *.o .DS_Store *.Rmd -Makevars - -testscripts.R -testscript_op -tests diff --git a/DESCRIPTION b/DESCRIPTION index 33531e8..ce740f8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: LDWeaver Type: Package Title: Genomewide Epistasis Analysis on Bacteria -Version: 1.5 +Version: 1.5.1 Authors@R: person("Sudaraka", "Mallawaarachchi", email = "smallawaarachchi@gmail.com", role = c("aut", "cre")) Maintainer: Sudaraka Mallawaarachchi Description: Perform genomewide epistasis analysis by evaluating the LD structure in bacteria. @@ -10,7 +10,7 @@ Encoding: UTF-8 LazyData: true biocViews: Software Depends: R (>= 4.0.0), -Imports: +Imports: ape, Biostrings, chromoMap, @@ -39,13 +39,15 @@ Imports: utils, VariantAnnotation Suggests: - phytools, - ggtree, - ggnewscale, - spam, - spam64 + phytools, + ggtree, + ggnewscale, + spam, + spam64, + testthat (>= 3.0.0) LinkingTo: Rcpp, RcppArmadillo -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 URL: https://github.com/Sudaraka88/LDWeaver BugReports: https://github.com/Sudaraka88/LDWeaver/issues Biarch: TRUE +Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 721ef0a..7ec0f0a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -52,9 +52,6 @@ importFrom(Matrix,rowSums) importFrom(Matrix,sparseMatrix) importFrom(Matrix,t) importFrom(Matrix,which) -importFrom(MatrixExtra,crossprod) -importFrom(MatrixExtra,t) -importFrom(MatrixExtra,tcrossprod) importFrom(RColorBrewer,brewer.pal) importFrom(Rcpp,sourceCpp) importFrom(RcppArmadillo,fastLm) @@ -114,4 +111,7 @@ importFrom(utils,stack) importFrom(utils,timestamp) importFrom(utils,txtProgressBar) importFrom(utils,write.table) +importMethodsFrom(MatrixExtra,crossprod) +importMethodsFrom(MatrixExtra,t) +importMethodsFrom(MatrixExtra,tcrossprod) useDynLib(LDWeaver) diff --git a/R/BacGWES.R b/R/BacGWES.R index 9362189..a6bfc6f 100644 --- a/R/BacGWES.R +++ b/R/BacGWES.R @@ -46,20 +46,24 @@ #' dset <- "full_dset" #' gbk_path <- system.file("extdata", "sample.gbk", package = "LDWeaver") #' aln_path <- system.file("extdata", "sample.aln.gz", package = "LDWeaver") -#' LDWeaver::LDWeaver(dset = dset, aln_path = aln_path, gbk_path = gbk_path, validate_ref_ann_lengths = F) +#' LDWeaver::LDWeaver(dset = dset, aln_path = aln_path, gbk_path = gbk_path, +#' validate_ref_ann_lengths = F) #' #' # Example 2- using a SNP only alignment #' dset <- "snp_dset" #' gbk_path <- system.file("extdata", "sample.gbk", package = "LDWeaver") #' aln_path <- system.file("extdata", "snp_sample.fa.gz", package = "LDWeaver") -#' pos <- as.numeric(readLines(system.file("extdata", "snp_sample.fa.pos", package = "LDWeaver"))) -#' LDWeaver::LDWeaver(dset = dset, aln_path = aln_path, aln_has_all_bases = F, pos = pos, gbk_path = gbk_path) +#' pos <- as.numeric(readLines(system.file("extdata", "snp_sample.fa.pos", +#' package = "LDWeaver"))) +#' LDWeaver::LDWeaver(dset = dset, aln_path = aln_path, aln_has_all_bases = F, +#' pos = pos, gbk_path = gbk_path) #' # Example 3 - Redoing the full analysis as a mega scale dataset #' dset <- "full_dset_spam" #' gbk_path <- system.file("extdata", "sample.gbk", package = "LDWeaver") #' aln_path <- system.file("extdata", "sample.aln.gz", package = "LDWeaver") -#' LDWeaver::LDWeaver(dset = dset, aln_path = aln_path, gbk_path = gbk_path, validate_ref_ann_lengths = F, mega_dset = T) +#' LDWeaver::LDWeaver(dset = dset, aln_path = aln_path, gbk_path = gbk_path, +#' validate_ref_ann_lengths = F, mega_dset = T) #' } #' @export LDWeaver = function(dset, aln_path, aln_has_all_bases = T, pos = NULL, gbk_path = NULL, gff3_path = NULL, @@ -239,7 +243,8 @@ LDWeaver = function(dset, aln_path, aln_has_all_bases = T, pos = NULL, gbk_path ######## Welcome message ######## { timestamp() - cat("\n ***** This is LDWeaver", as.character(packageVersion(pkg = "LDWeaver")), " *****") + cat("\n ***** This is LDWeaver", as.character(packageVersion(pkg = "LDWeaver")), " *****\n") + test_openmp() if(ncores > 1) cat(paste("\n\n Performing GWES analysis on:", dset, " - using", ncores, "cores\n\n")) if(ncores == 1) cat(paste("\n\n Performing GWES analysis on:", dset, "\n\n")) if(perform_SR_analysis_only) cat("Only short-range analysis requested. \n") diff --git a/R/LDSummaryPlot.R b/R/LDSummaryPlot.R index 601b9ba..1c83ea1 100644 --- a/R/LDSummaryPlot.R +++ b/R/LDSummaryPlot.R @@ -2,7 +2,7 @@ #' #' Function to generate the bird's-eye view of the genomewide LD structure (measured as weighted-MI) #' -#' @importFrom MatrixExtra crossprod +#' @importMethodsFrom MatrixExtra crossprod #' @importFrom Matrix sparseMatrix t #' @importFrom grDevices colorRampPalette png #' @importFrom heatmap3 heatmap3 @@ -97,7 +97,8 @@ genomewide_LDMap = function(lr_links_path, sr_links_path, plot_save_path, reduce cat(paste("Reducing dimensionality from", length(pos_vec) ,"x", length(pos_vec), "-> ")) x = .mat(length(pos_vec), reducer) cat(paste(ncol(x) ,"x", ncol(x), "...\n")) - reduced_mat = MatrixExtra::crossprod(x, MatrixExtra::crossprod(sprs, x)) + # reduced_mat = MatrixExtra::crossprod(x, MatrixExtra::crossprod(sprs, x)) + reduced_mat = crossprod(x, crossprod(sprs, x)) htm = as(reduced_mat, 'matrix')/reducer^2 nms = as.character(pos_vec[seq(from = 1, to = length(pos_vec), by = reducer-1)]) colnames(htm) = rownames(htm) = nms[1:nrow(htm)] diff --git a/R/RcppExports.R b/R/RcppExports.R index e2c4e3d..1346efa 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -37,6 +37,10 @@ .Call('_LDWeaver_extractRef', PACKAGE = 'LDWeaver', file) } +test_openmp <- function() { + invisible(.Call('_LDWeaver_test_openmp', PACKAGE = 'LDWeaver')) +} + .readFasta <- function(file, pos_len) { .Call('_LDWeaver_readFasta', PACKAGE = 'LDWeaver', file, pos_len) } diff --git a/R/computePairwiseMI.R b/R/computePairwiseMI.R index 690b219..1928196 100644 --- a/R/computePairwiseMI.R +++ b/R/computePairwiseMI.R @@ -5,7 +5,7 @@ #' compute short-range p-values for each short-range links. If requested, this #' function can also run the ARACNE algorithm #' -#' @importFrom MatrixExtra tcrossprod t +#' @importMethodsFrom MatrixExtra tcrossprod t #' @importFrom Matrix rowSums #' @importFrom fitdistrplus fitdist #' @importFrom data.table as.data.table @@ -80,7 +80,7 @@ perform_MI_computation = function(snp.dat, hdw, cds_var, ncores, lr_save_path = message("This feature requires spam and spam64 packages.") return(invisible()) } else { - hsq = spam::as.spam(hsq) + # hsq = spam::as.spam(hsq) } } @@ -199,41 +199,68 @@ perform_MI_computation_ACGTN = function(snp.dat, from, to, neff, hsq, cds_var, l ### mega datasets if(mega_dset){ # Using SPAM - if(!requireNamespace("spam") & !requireNamespace("spam64")){ - message("This feature requires spam and spam64 packages.") - return(invisible()) + # if(!requireNamespace("spam") & !requireNamespace("spam64")){ + # message("This feature requires spam and spam64 packages.") + # return(invisible()) + # } else { + + ## Can we make this a lot faster by coercion? + + # tAfh = spam::tcrossprod(snp.dat$snp.matrix_A[from, ], hsq); pAf = spam::rowSums(tAfh^2) + # tCfh = spam::tcrossprod(snp.dat$snp.matrix_C[from, ], hsq); pCf = spam::rowSums(tCfh^2) + # tGfh = spam::tcrossprod(snp.dat$snp.matrix_G[from, ], hsq); pGf = spam::rowSums(tGfh^2) + # tTfh = spam::tcrossprod(snp.dat$snp.matrix_T[from, ], hsq); pTf = spam::rowSums(tTfh^2) + # tNfh = spam::tcrossprod(snp.dat$snp.matrix_N[from, ], hsq); pNf = spam::rowSums(tNfh^2) + # + # if(fromISto){ + # tAth = tAfh; pAt = pAf + # tCth = tCfh; pCt = pCf + # tGth = tGfh; pGt = pGf + # tTth = tTfh; pTt = pTf + # tNth = tNfh; pNt = pNf + # } else { + # # to + # tAth = spam::tcrossprod(snp.dat$snp.matrix_A[to, ], hsq); pAt = spam::rowSums(tAth^2)#+rt*0.5 + # tCth = spam::tcrossprod(snp.dat$snp.matrix_C[to, ], hsq); pCt = spam::rowSums(tCth^2)#+rt*0.5 + # tGth = spam::tcrossprod(snp.dat$snp.matrix_G[to, ], hsq); pGt = spam::rowSums(tGth^2)#+rt*0.5 + # tTth = spam::tcrossprod(snp.dat$snp.matrix_T[to, ], hsq); pTt = spam::rowSums(tTth^2)#+rt*0.5 + # tNth = spam::tcrossprod(snp.dat$snp.matrix_N[to, ], hsq); pNt = spam::rowSums(tNth^2)#+rt*0.5 + # # } + # + # } + + tAf = spam::as.matrix(snp.dat$snp.matrix_A[from, ]); tAfh = tcrossprod(tAf, hsq); pAf = Matrix::rowSums(tAfh^2) + tCf = spam::as.matrix(snp.dat$snp.matrix_C[from, ]); tCfh = tcrossprod(tCf, hsq); pCf = Matrix::rowSums(tCfh^2) + tGf = spam::as.matrix(snp.dat$snp.matrix_G[from, ]); tGfh = tcrossprod(tGf, hsq); pGf = Matrix::rowSums(tGfh^2) + tTf = spam::as.matrix(snp.dat$snp.matrix_T[from, ]); tTfh = tcrossprod(tTf, hsq); pTf = Matrix::rowSums(tTfh^2) + tNf = spam::as.matrix(snp.dat$snp.matrix_N[from, ]); tNfh = tcrossprod(tNf, hsq); pNf = Matrix::rowSums(tNfh^2) + + if(fromISto){ + tAt = tAf; tAth = tAfh; pAt = pAf + tCt = tCf; tCth = tCfh; pCt = pCf + tGt = tGf; tGth = tGfh; pGt = pGf + tTt = tTf; tTth = tTfh; pTt = pTf + tNt = tNf; tNth = tNfh; pNt = pNf } else { - tAfh = spam::tcrossprod(snp.dat$snp.matrix_A[from, ], hsq); pAf = spam::rowSums(tAfh^2) - tCfh = spam::tcrossprod(snp.dat$snp.matrix_C[from, ], hsq); pCf = spam::rowSums(tCfh^2) - tGfh = spam::tcrossprod(snp.dat$snp.matrix_G[from, ], hsq); pGf = spam::rowSums(tGfh^2) - tTfh = spam::tcrossprod(snp.dat$snp.matrix_T[from, ], hsq); pTf = spam::rowSums(tTfh^2) - tNfh = spam::tcrossprod(snp.dat$snp.matrix_N[from, ], hsq); pNf = spam::rowSums(tNfh^2) - - if(fromISto){ - tAth = tAfh; pAt = pAf - tCth = tCfh; pCt = pCf - tGth = tGfh; pGt = pGf - tTth = tTfh; pTt = pTf - tNth = tNfh; pNt = pNf - } else { - # to - tAth = spam::tcrossprod(snp.dat$snp.matrix_A[to, ], hsq); pAt = spam::rowSums(tAth^2)#+rt*0.5 - tCth = spam::tcrossprod(snp.dat$snp.matrix_C[to, ], hsq); pCt = spam::rowSums(tCth^2)#+rt*0.5 - tGth = spam::tcrossprod(snp.dat$snp.matrix_G[to, ], hsq); pGt = spam::rowSums(tGth^2)#+rt*0.5 - tTth = spam::tcrossprod(snp.dat$snp.matrix_T[to, ], hsq); pTt = spam::rowSums(tTth^2)#+rt*0.5 - tNth = spam::tcrossprod(snp.dat$snp.matrix_N[to, ], hsq); pNt = spam::rowSums(tNth^2)#+rt*0.5 - } + # to + tAt = spam::as.matrix(snp.dat$snp.matrix_A[to, ]); tAth = tcrossprod(tAt, hsq); pAt = Matrix::rowSums(tAth^2) + tCt = spam::as.matrix(snp.dat$snp.matrix_C[to, ]); tCth = tcrossprod(tCt, hsq); pCt = Matrix::rowSums(tCth^2) + tGt = spam::as.matrix(snp.dat$snp.matrix_G[to, ]); tGth = tcrossprod(tGt, hsq); pGt = Matrix::rowSums(tGth^2) + tTt = spam::as.matrix(snp.dat$snp.matrix_T[to, ]); tTth = tcrossprod(tTt, hsq); pTt = Matrix::rowSums(tTth^2) + tNt = spam::as.matrix(snp.dat$snp.matrix_N[to, ]); tNth = tcrossprod(tNt, hsq); pNt = Matrix::rowSums(tNth^2) + # } } + } else { ### MATRIX MODE CODE DO NOT CHANGE BELOW # from # tAf = as(snp.dat$snp.matrix_A[from, ], 'unpackedMatrix'); tAfh = MatrixExtra::tcrossprod(tAf, hsq); pAf = Matrix::rowSums(tAfh^2) # crashes in linuxMint - tAf = as(snp.dat$snp.matrix_A[from, ], 'lgeMatrix'); tAfh = MatrixExtra::tcrossprod(tAf, hsq); pAf = Matrix::rowSums(tAfh^2) - tCf = as(snp.dat$snp.matrix_C[from, ], 'lgeMatrix'); tCfh = MatrixExtra::tcrossprod(tCf, hsq); pCf = Matrix::rowSums(tCfh^2) - tGf = as(snp.dat$snp.matrix_G[from, ], 'lgeMatrix'); tGfh = MatrixExtra::tcrossprod(tGf, hsq); pGf = Matrix::rowSums(tGfh^2) - tTf = as(snp.dat$snp.matrix_T[from, ], 'lgeMatrix'); tTfh = MatrixExtra::tcrossprod(tTf, hsq); pTf = Matrix::rowSums(tTfh^2) - tNf = as(snp.dat$snp.matrix_N[from, ], 'lgeMatrix'); tNfh = MatrixExtra::tcrossprod(tNf, hsq); pNf = Matrix::rowSums(tNfh^2) + tAf = as(snp.dat$snp.matrix_A[from, ], 'lgeMatrix'); tAfh = tcrossprod(tAf, hsq); pAf = Matrix::rowSums(tAfh^2) + tCf = as(snp.dat$snp.matrix_C[from, ], 'lgeMatrix'); tCfh = tcrossprod(tCf, hsq); pCf = Matrix::rowSums(tCfh^2) + tGf = as(snp.dat$snp.matrix_G[from, ], 'lgeMatrix'); tGfh = tcrossprod(tGf, hsq); pGf = Matrix::rowSums(tGfh^2) + tTf = as(snp.dat$snp.matrix_T[from, ], 'lgeMatrix'); tTfh = tcrossprod(tTf, hsq); pTf = Matrix::rowSums(tTfh^2) + tNf = as(snp.dat$snp.matrix_N[from, ], 'lgeMatrix'); tNfh = tcrossprod(tNf, hsq); pNf = Matrix::rowSums(tNfh^2) if(fromISto){ tAt = tAf; tAth = tAfh; pAt = pAf @@ -243,16 +270,16 @@ perform_MI_computation_ACGTN = function(snp.dat, from, to, neff, hsq, cds_var, l tNt = tNf; tNth = tNfh; pNt = pNf } else { # to - tAt = as(snp.dat$snp.matrix_A[to, ], 'lgeMatrix'); tAth = MatrixExtra::tcrossprod(tAt, hsq); pAt = Matrix::rowSums(tAth^2)#+rt*0.5 - tCt = as(snp.dat$snp.matrix_C[to, ], 'lgeMatrix'); tCth = MatrixExtra::tcrossprod(tCt, hsq); pCt = Matrix::rowSums(tCth^2)#+rt*0.5 - tGt = as(snp.dat$snp.matrix_G[to, ], 'lgeMatrix'); tGth = MatrixExtra::tcrossprod(tGt, hsq); pGt = Matrix::rowSums(tGth^2)#+rt*0.5 - tTt = as(snp.dat$snp.matrix_T[to, ], 'lgeMatrix'); tTth = MatrixExtra::tcrossprod(tTt, hsq); pTt = Matrix::rowSums(tTth^2)#+rt*0.5 - tNt = as(snp.dat$snp.matrix_N[to, ], 'lgeMatrix'); tNth = MatrixExtra::tcrossprod(tNt, hsq); pNt = Matrix::rowSums(tNth^2)#+rt*0.5 + tAt = as(snp.dat$snp.matrix_A[to, ], 'lgeMatrix'); tAth = tcrossprod(tAt, hsq); pAt = Matrix::rowSums(tAth^2)#+rt*0.5 + tCt = as(snp.dat$snp.matrix_C[to, ], 'lgeMatrix'); tCth = tcrossprod(tCt, hsq); pCt = Matrix::rowSums(tCth^2)#+rt*0.5 + tGt = as(snp.dat$snp.matrix_G[to, ], 'lgeMatrix'); tGth = tcrossprod(tGt, hsq); pGt = Matrix::rowSums(tGth^2)#+rt*0.5 + tTt = as(snp.dat$snp.matrix_T[to, ], 'lgeMatrix'); tTth = tcrossprod(tTt, hsq); pTt = Matrix::rowSums(tTth^2)#+rt*0.5 + tNt = as(snp.dat$snp.matrix_N[to, ], 'lgeMatrix'); tNth = tcrossprod(tNt, hsq); pNt = Matrix::rowSums(tNth^2)#+rt*0.5 } } - den = neff + MatrixExtra::tcrossprod(snp.dat$r[from], snp.dat$r[to]) * 0.5 - rft = MatrixExtra::t(MatrixExtra::tcrossprod(rf, rt))*0.25 + den = neff + tcrossprod(snp.dat$r[from], snp.dat$r[to]) * 0.5 + rft = t(tcrossprod(rf, rt))*0.25 rf = 0.5*rf rt = 0.5*rt # snp_i = 3 @@ -261,35 +288,35 @@ perform_MI_computation_ACGTN = function(snp.dat, from, to, neff, hsq, cds_var, l # t0 = Sys.time() MI = matrix(rep(0, length(from)*length(to)), nrow = length(from)) # tX = tAfh; tY = tAth; pX = pAf; pY = pAt; rX = rf; rY = rt; RXY = rft; uqX = uqf[,1]; uqY = uqt[,1] - computeMI_Sprase(MI, tAfh, tAth, pAf, pAt, rf, rt, rft, uqf[,1], uqt[,1], den, ncores, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) - computeMI_Sprase(MI, tAfh, tCth, pAf, pCt, rf, rt, rft, uqf[,1], uqt[,2], den, ncores, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) - computeMI_Sprase(MI, tAfh, tGth, pAf, pGt, rf, rt, rft, uqf[,1], uqt[,3], den, ncores, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) - computeMI_Sprase(MI, tAfh, tTth, pAf, pTt, rf, rt, rft, uqf[,1], uqt[,4], den, ncores, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) - computeMI_Sprase(MI, tAfh, tNth, pAf, pNt, rf, rt, rft, uqf[,1], uqt[,5], den, ncores, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) - - computeMI_Sprase(MI, tCfh, tAth, pCf, pAt, rf, rt, rft, uqf[,2], uqt[,1], den, ncores, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) - computeMI_Sprase(MI, tCfh, tCth, pCf, pCt, rf, rt, rft, uqf[,2], uqt[,2], den, ncores, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) - computeMI_Sprase(MI, tCfh, tGth, pCf, pGt, rf, rt, rft, uqf[,2], uqt[,3], den, ncores, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) - computeMI_Sprase(MI, tCfh, tTth, pCf, pTt, rf, rt, rft, uqf[,2], uqt[,4], den, ncores, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) - computeMI_Sprase(MI, tCfh, tNth, pCf, pNt, rf, rt, rft, uqf[,2], uqt[,5], den, ncores, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) - - computeMI_Sprase(MI, tGfh, tAth, pGf, pAt, rf, rt, rft, uqf[,3], uqt[,1], den, ncores, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) - computeMI_Sprase(MI, tGfh, tCth, pGf, pCt, rf, rt, rft, uqf[,3], uqt[,2], den, ncores, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) - computeMI_Sprase(MI, tGfh, tGth, pGf, pGt, rf, rt, rft, uqf[,3], uqt[,3], den, ncores, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) - computeMI_Sprase(MI, tGfh, tTth, pGf, pTt, rf, rt, rft, uqf[,3], uqt[,4], den, ncores, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) - computeMI_Sprase(MI, tGfh, tNth, pGf, pNt, rf, rt, rft, uqf[,3], uqt[,5], den, ncores, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) - - computeMI_Sprase(MI, tTfh, tAth, pTf, pAt, rf, rt, rft, uqf[,4], uqt[,1], den, ncores, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) - computeMI_Sprase(MI, tTfh, tCth, pTf, pCt, rf, rt, rft, uqf[,4], uqt[,2], den, ncores, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) - computeMI_Sprase(MI, tTfh, tGth, pTf, pGt, rf, rt, rft, uqf[,4], uqt[,3], den, ncores, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) - computeMI_Sprase(MI, tTfh, tTth, pTf, pTt, rf, rt, rft, uqf[,4], uqt[,4], den, ncores, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) - computeMI_Sprase(MI, tTfh, tNth, pTf, pNt, rf, rt, rft, uqf[,4], uqt[,5], den, ncores, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) - - computeMI_Sprase(MI, tNfh, tAth, pNf, pAt, rf, rt, rft, uqf[,5], uqt[,1], den, ncores, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) - computeMI_Sprase(MI, tNfh, tCth, pNf, pCt, rf, rt, rft, uqf[,5], uqt[,2], den, ncores, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) - computeMI_Sprase(MI, tNfh, tGth, pNf, pGt, rf, rt, rft, uqf[,5], uqt[,3], den, ncores, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) - computeMI_Sprase(MI, tNfh, tTth, pNf, pTt, rf, rt, rft, uqf[,5], uqt[,4], den, ncores, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) - computeMI_Sprase(MI, tNfh, tNth, pNf, pNt, rf, rt, rft, uqf[,5], uqt[,5], den, ncores, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) + computeMI_Sprase(MI, tAfh, tAth, pAf, pAt, rf, rt, rft, uqf[,1], uqt[,1], den, ncores) #, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) + computeMI_Sprase(MI, tAfh, tCth, pAf, pCt, rf, rt, rft, uqf[,1], uqt[,2], den, ncores) #, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) + computeMI_Sprase(MI, tAfh, tGth, pAf, pGt, rf, rt, rft, uqf[,1], uqt[,3], den, ncores) #, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) + computeMI_Sprase(MI, tAfh, tTth, pAf, pTt, rf, rt, rft, uqf[,1], uqt[,4], den, ncores) #, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) + computeMI_Sprase(MI, tAfh, tNth, pAf, pNt, rf, rt, rft, uqf[,1], uqt[,5], den, ncores) #, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) + + computeMI_Sprase(MI, tCfh, tAth, pCf, pAt, rf, rt, rft, uqf[,2], uqt[,1], den, ncores) #, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) + computeMI_Sprase(MI, tCfh, tCth, pCf, pCt, rf, rt, rft, uqf[,2], uqt[,2], den, ncores) #, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) + computeMI_Sprase(MI, tCfh, tGth, pCf, pGt, rf, rt, rft, uqf[,2], uqt[,3], den, ncores) #, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) + computeMI_Sprase(MI, tCfh, tTth, pCf, pTt, rf, rt, rft, uqf[,2], uqt[,4], den, ncores) #, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) + computeMI_Sprase(MI, tCfh, tNth, pCf, pNt, rf, rt, rft, uqf[,2], uqt[,5], den, ncores) #, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) + + computeMI_Sprase(MI, tGfh, tAth, pGf, pAt, rf, rt, rft, uqf[,3], uqt[,1], den, ncores) #, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) + computeMI_Sprase(MI, tGfh, tCth, pGf, pCt, rf, rt, rft, uqf[,3], uqt[,2], den, ncores) #, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) + computeMI_Sprase(MI, tGfh, tGth, pGf, pGt, rf, rt, rft, uqf[,3], uqt[,3], den, ncores) #, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) + computeMI_Sprase(MI, tGfh, tTth, pGf, pTt, rf, rt, rft, uqf[,3], uqt[,4], den, ncores) #, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) + computeMI_Sprase(MI, tGfh, tNth, pGf, pNt, rf, rt, rft, uqf[,3], uqt[,5], den, ncores) #, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) + + computeMI_Sprase(MI, tTfh, tAth, pTf, pAt, rf, rt, rft, uqf[,4], uqt[,1], den, ncores) #, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) + computeMI_Sprase(MI, tTfh, tCth, pTf, pCt, rf, rt, rft, uqf[,4], uqt[,2], den, ncores) #, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) + computeMI_Sprase(MI, tTfh, tGth, pTf, pGt, rf, rt, rft, uqf[,4], uqt[,3], den, ncores) #, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) + computeMI_Sprase(MI, tTfh, tTth, pTf, pTt, rf, rt, rft, uqf[,4], uqt[,4], den, ncores) #, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) + computeMI_Sprase(MI, tTfh, tNth, pTf, pNt, rf, rt, rft, uqf[,4], uqt[,5], den, ncores) #, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) + + computeMI_Sprase(MI, tNfh, tAth, pNf, pAt, rf, rt, rft, uqf[,5], uqt[,1], den, ncores) #, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) + computeMI_Sprase(MI, tNfh, tCth, pNf, pCt, rf, rt, rft, uqf[,5], uqt[,2], den, ncores) #, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) + computeMI_Sprase(MI, tNfh, tGth, pNf, pGt, rf, rt, rft, uqf[,5], uqt[,3], den, ncores) #, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) + computeMI_Sprase(MI, tNfh, tTth, pNf, pTt, rf, rt, rft, uqf[,5], uqt[,4], den, ncores) #, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) + computeMI_Sprase(MI, tNfh, tNth, pNf, pNt, rf, rt, rft, uqf[,5], uqt[,5], den, ncores) #, mega_dset = mega_dset); #print(MI[snp_i,snp_j]) # cat("\n") # cat(str(MI)) @@ -380,30 +407,31 @@ perform_MI_computation_ACGTN = function(snp.dat, from, to, neff, hsq, cds_var, l } # tAfh, tAth, pAf, pAt, rf, rt, rft, uqf[,1], uqt[,1] -computeMI_Sprase = function(MI_t, tX, tY, pX, pY, rX, rY, RXY, uqX, uqY, den, ncores, mega_dset = F){ - if(mega_dset){ # Using SPAM - ## I'm skipping this test here for efficiency, this is not an exported function, unlikely for anyone to use it wrongly! - # if(!requireNamespace("spam") & !requireNamespace("spam64")){ - # message("This feature requires spam and spam64 packages.") - # return(invisible()) - # } else { - # } - pxy_t = spam::tcrossprod(tX, tY) - pxy_t = spam::as.matrix(pxy_t) + 0.5# convert to dense mx (This could potentially be a very large matrix for large datasets, reconsider algorithm?) - } else { - pxy_t = MatrixExtra::tcrossprod(tX, tY) + 0.5; pxy_t = as(pxy_t, "matrix") # convert to dense mx - } +# computeMI_Sprase = function(MI_t, tX, tY, pX, pY, rX, rY, RXY, uqX, uqY, den, ncores, mega_dset = F){ # for deletion +computeMI_Sprase = function(MI_t, tX, tY, pX, pY, rX, rY, RXY, uqX, uqY, den, ncores){ + # if(mega_dset){ # Using SPAM + ## I'm skipping this test here for efficiency, this is not an exported function, unlikely for anyone to use it wrongly! + # if(!requireNamespace("spam") & !requireNamespace("spam64")){ + # message("This feature requires spam and spam64 packages.") + # return(invisible()) + # } else { + # } + # pxy_t = spam::tcrossprod(tX, tY) + # pxy_t = spam::as.matrix(pxy_t) + 0.5# convert to dense mx (This could potentially be a very large matrix for large datasets, reconsider algorithm?) + # } else { + pxy_t = tcrossprod(tX, tY) + 0.5; pxy_t = as(pxy_t, "matrix") # convert to dense mx + # } # t0 = Sys.time(); - uq_t = MatrixExtra::tcrossprod(uqX, uqY) + uq_t = tcrossprod(uqX, uqY) # rX = 0.5*rX # rY = 0.5*rY # RXY = t(MatrixExtra::tcrossprod(rX, rY)) - pXrX = MatrixExtra::tcrossprod(pX*rX, rep(1,length(pY))) + pXrX = tcrossprod(pX*rX, rep(1,length(pY))) # pYrY = MatrixExtra::t(MatrixExtra::tcrossprod(pY*rY, rep(1,length(pY)))) - pYrY = MatrixExtra::tcrossprod(rep(1,length(pX)), pY*rY) # same as above, already transposed - pxpy_tt = MatrixExtra::tcrossprod(pX, pY) # + RXY + pXrX + pYrY + pYrY = tcrossprod(rep(1,length(pX)), pY*rY) # same as above, already transposed + pxpy_tt = tcrossprod(pX, pY) # + RXY + pXrX + pYrY # difftime(Sys.time(), t0) diff --git a/R/io_functions.R b/R/io_functions.R index 050bd6d..5303a67 100644 --- a/R/io_functions.R +++ b/R/io_functions.R @@ -305,7 +305,6 @@ cleanup = function(dset, delete_after_moving = F){ mv_success = c(mv_success, idx) } - #### Relocate or delete moved files #### moved_idx = sort(unique(mv_success)) if(length(moved_idx) > 0){ @@ -316,6 +315,15 @@ cleanup = function(dset, delete_after_moving = F){ } unlink(file.path(dset, files[moved_idx]), recursive = T) } + + ### Sometimes there are a couple of snpEff files being generated in the working directory, move them to Temp + files = dir(getwd()) + idx = c(grep("snpEff_genes.txt", files), grep("snpEff_summary.html", files)) + if(length(idx) > 0){ + fldr = file.path(dset, "Temp") + cleanup_support(files = file.path(getwd(), files[idx]), fldr) + unlink(file.path(getwd(), files[idx])) + } } #' cleanup_support diff --git a/R/performPopulationStuctureCorrection.R b/R/performPopulationStuctureCorrection.R index 513529e..8666f16 100644 --- a/R/performPopulationStuctureCorrection.R +++ b/R/performPopulationStuctureCorrection.R @@ -2,7 +2,7 @@ #' #' Function to estimate the similarity between sequences based on the Hamming distance #' -#' @importFrom MatrixExtra crossprod +#' @importMethodsFrom MatrixExtra crossprod #' @importFrom Matrix colSums #' @importFrom methods as #' @@ -28,11 +28,17 @@ estimate_Hamming_distance_weights = function(snp.dat, threshold = 0.1, mega_dset return(invisible()) } else { - shared.snps.spam = spam::crossprod(snp.dat$snp.matrix_A) - shared.snps.spam = shared.snps.spam + spam::crossprod(snp.dat$snp.matrix_C) - shared.snps.spam = shared.snps.spam + spam::crossprod(snp.dat$snp.matrix_G) - shared.snps.spam = shared.snps.spam + spam::crossprod(snp.dat$snp.matrix_T) - shared.snps.spam = shared.snps.spam + spam::crossprod(snp.dat$snp.matrix_N) + # shared.snps.spam = spam::crossprod(snp.dat$snp.matrix_A) + # shared.snps.spam = shared.snps.spam + spam::crossprod(snp.dat$snp.matrix_C) + # shared.snps.spam = shared.snps.spam + spam::crossprod(snp.dat$snp.matrix_G) + # shared.snps.spam = shared.snps.spam + spam::crossprod(snp.dat$snp.matrix_T) + # shared.snps.spam = shared.snps.spam + spam::crossprod(snp.dat$snp.matrix_N) + + shared.snps.spam = crossprod(snp.dat$snp.matrix_A) + shared.snps.spam = shared.snps.spam + crossprod(snp.dat$snp.matrix_C) + shared.snps.spam = shared.snps.spam + crossprod(snp.dat$snp.matrix_G) + shared.snps.spam = shared.snps.spam + crossprod(snp.dat$snp.matrix_T) + shared.snps.spam = shared.snps.spam + crossprod(snp.dat$snp.matrix_N) ## Is this a bug in the spam package? This should work: # hdw.spam = 1/( spam::colSums( (snp.dat$nsnp - shared.snps.spam) < thresh) + 1) @@ -46,20 +52,21 @@ estimate_Hamming_distance_weights = function(snp.dat, threshold = 0.1, mega_dset } } else { - snpmat_t = as(snp.dat$snp.matrix_A, 'dgCMatrix') - shared.snps = MatrixExtra::crossprod((snpmat_t)) + snpmat_t = as(snp.dat$snp.matrix_A, 'lgeMatrix') + # shared.snps = MatrixExtra::crossprod((snpmat_t)) + shared.snps = crossprod((snpmat_t)) - snpmat_t = as(snp.dat$snp.matrix_C, 'dgCMatrix') - shared.snps = shared.snps + MatrixExtra::crossprod((snpmat_t)) + snpmat_t = as(snp.dat$snp.matrix_C, 'lgeMatrix') + shared.snps = shared.snps + crossprod((snpmat_t)) - snpmat_t = as(snp.dat$snp.matrix_G, 'dgCMatrix') - shared.snps = shared.snps + MatrixExtra::crossprod((snpmat_t)) + snpmat_t = as(snp.dat$snp.matrix_G, 'lgeMatrix') + shared.snps = shared.snps + crossprod((snpmat_t)) - snpmat_t = as(snp.dat$snp.matrix_T, 'dgCMatrix') - shared.snps = shared.snps + MatrixExtra::crossprod((snpmat_t)) + snpmat_t = as(snp.dat$snp.matrix_T, 'lgeMatrix') + shared.snps = shared.snps + crossprod((snpmat_t)) - snpmat_t = as(snp.dat$snp.matrix_N, 'dgCMatrix') - shared.snps = shared.snps + MatrixExtra::crossprod((snpmat_t)) + snpmat_t = as(snp.dat$snp.matrix_N, 'lgeMatrix') + shared.snps = shared.snps + crossprod((snpmat_t)) hdw = 1/( Matrix::colSums( (snp.dat$nsnp - shared.snps) < thresh) + 1) } diff --git a/README.md b/README.md index 549d7ec..d362559 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,20 @@ +

+ + + + LDWeaver logo + +

+ + +## Genomewide Co-selection and Epistasis in Bacteria -[![R](https://github.com/Sudaraka88/LDWeaver/workflows/R-CMD-check/badge.svg)](https://github.com/Sudaraka88/LDWeaver/actions) +[![R](https://github.com/Sudaraka88/LDWeaver/workflows/R-CMD-check-hard/badge.svg)](https://github.com/Sudaraka88/LDWeaver/actions) [![DOI](https://zenodo.org/badge/590009521.svg)](https://zenodo.org/badge/latestdoi/590009521) [![LICESNSE](https://anaconda.org/bioconda/r-ldweaver/badges/license.svg)](https://spdx.org/licenses/GPL-3.0-or-later.html) -## Genomewide Co-selection and Epistasis in Bacteria - - LDWeaver accepts a sequence alignment (fasta) and its reference annotation (genbank or gff) as inputs and identifies linkage disequilibrium (LD) between pairs of variants (links) that is unusually high given the genomic distance @@ -94,7 +101,7 @@ num_clusts_CDS = 2, SnpEff_Annotate = F, snp_filt_method = snp_filt_method) ## Citation -Please cite LDWeaver using: Mallawaarachchi, S. et al. Detecting co-selection through excess linkage disequilibrium in bacterial genomes, NAR Genomics and Bioinformatics, Volume 6, Issue 2, June 2024, lqae061, https://doi.org/10.1093/nargab/lqae061 +Please cite LDWeaver using: Mallawaarachchi, S. et al. Detecting co-selection through excess linkage disequilibrium in bacterial genomes, NAR genom. bioinform., 6(2),lqae061 (2024): https://doi.org/10.1093/nargab/lqae061 ## Detailed Workthrough using Real Data diff --git a/inst/CITATION b/inst/CITATION index 83c2cb6..8d0c7b1 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -1,8 +1,7 @@ -citHeader("If you find LDWeaver useful, please cite:") - -citEntry(entry = "Article", -title = "Detecting co-selection through excess linkage disequilibrium in bacterial genomes", -author = personList(as.person("Sudaraka Mallawaarachchi"), +bibentry( + bibtype = "Article", + title = "Detecting co-selection through excess linkage disequilibrium in bacterial genomes", + author = personList(as.person("Sudaraka Mallawaarachchi"), as.person("Gerry Tonkin-Hill"), as.person("Anna K. Pöntinen"), as.person("Jessica K. Calland"), @@ -14,10 +13,10 @@ author = personList(as.person("Sudaraka Mallawaarachchi"), as.person("David Balding"), as.person("Nicholas J. Croucher"), as.person("Jukka Corander")), -journal = "bioRxiv", -year = "2023", -pages = "2023.08.04.551407", -url = "https://doi.org/10.1101/2023.08.04.551407", - -textVersion = "Sudaraka Mallawaarachchi et al. (2023) Detecting co-selection through excess linkage disequilibrium in bacterial genomes. bioRxiv https://doi.org/10.1101/2023.08.04.551407." + journal = "NAR Genomics and Bioinformatics", + year = "2024", + volume = "6", + number = "2", + pages = "lqae061", + doi = "10.1093/nargab/lqae061" ) diff --git a/inst/extdata/snp_sample.fa.pos b/inst/extdata/snp_sample.pos similarity index 100% rename from inst/extdata/snp_sample.fa.pos rename to inst/extdata/snp_sample.pos diff --git a/man/LDWeaver.Rd b/man/LDWeaver.Rd index 506f67b..bb34ba0 100644 --- a/man/LDWeaver.Rd +++ b/man/LDWeaver.Rd @@ -103,18 +103,22 @@ Function to run the LDWeaver pipeline dset <- "full_dset" gbk_path <- system.file("extdata", "sample.gbk", package = "LDWeaver") aln_path <- system.file("extdata", "sample.aln.gz", package = "LDWeaver") -LDWeaver::LDWeaver(dset = dset, aln_path = aln_path, gbk_path = gbk_path, validate_ref_ann_lengths = F) +LDWeaver::LDWeaver(dset = dset, aln_path = aln_path, gbk_path = gbk_path, + validate_ref_ann_lengths = F) # Example 2- using a SNP only alignment dset <- "snp_dset" gbk_path <- system.file("extdata", "sample.gbk", package = "LDWeaver") aln_path <- system.file("extdata", "snp_sample.fa.gz", package = "LDWeaver") -pos <- as.numeric(readLines(system.file("extdata", "snp_sample.fa.pos", package = "LDWeaver"))) -LDWeaver::LDWeaver(dset = dset, aln_path = aln_path, aln_has_all_bases = F, pos = pos, gbk_path = gbk_path) +pos <- as.numeric(readLines(system.file("extdata", "snp_sample.fa.pos", + package = "LDWeaver"))) +LDWeaver::LDWeaver(dset = dset, aln_path = aln_path, aln_has_all_bases = F, + pos = pos, gbk_path = gbk_path) dset <- "full_dset_spam" gbk_path <- system.file("extdata", "sample.gbk", package = "LDWeaver") aln_path <- system.file("extdata", "sample.aln.gz", package = "LDWeaver") -LDWeaver::LDWeaver(dset = dset, aln_path = aln_path, gbk_path = gbk_path, validate_ref_ann_lengths = F, mega_dset = T) +LDWeaver::LDWeaver(dset = dset, aln_path = aln_path, gbk_path = gbk_path, + validate_ref_ann_lengths = F, mega_dset = T) } } diff --git a/src/Makevars b/src/Makevars new file mode 100644 index 0000000..30483de --- /dev/null +++ b/src/Makevars @@ -0,0 +1,13 @@ +CXX_STD=CXX11 + +CXX1X=$(shell ${R_HOME}/bin/R CMD config CXX11) + +PKG_LIBS = $(shell $(R_HOME)/bin/Rscipt.exe -e "Rcpp:::LdFlags()") -lz + +ifeq ($(shell $(CXX1X) -fopenmp -E -xc++ - 2>&1 >/dev/null && echo 'true'), true) + PKG_CXXFLAGS=$(SHLIB_OPENMP_CXXFLAGS) -DARMA_64BIT_WORD + PKG_LIBS=$(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) +else + PKG_CXXFLAGS=-DARMA_64BIT_WORD + PKG_LIBS=$(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) +endif diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index a342f74..8b29f0c 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -129,6 +129,15 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// test_openmp +void test_openmp(); +RcppExport SEXP _LDWeaver_test_openmp() { +BEGIN_RCPP + Rcpp::RNGScope rcpp_rngScope_gen; + test_openmp(); + return R_NilValue; +END_RCPP +} // readFasta List readFasta(std::string file, int pos_len); RcppExport SEXP _LDWeaver_readFasta(SEXP fileSEXP, SEXP pos_lenSEXP) { @@ -152,6 +161,7 @@ static const R_CallMethodDef CallEntries[] = { {"_LDWeaver_extractAlnParam", (DL_FUNC) &_LDWeaver_extractAlnParam, 4}, {"_LDWeaver_extractSNPs", (DL_FUNC) &_LDWeaver_extractSNPs, 4}, {"_LDWeaver_extractRef", (DL_FUNC) &_LDWeaver_extractRef, 1}, + {"_LDWeaver_test_openmp", (DL_FUNC) &_LDWeaver_test_openmp, 0}, {"_LDWeaver_readFasta", (DL_FUNC) &_LDWeaver_readFasta, 2}, {NULL, NULL, 0} }; diff --git a/src/openMP_tester.cpp b/src/openMP_tester.cpp new file mode 100644 index 0000000..c07551e --- /dev/null +++ b/src/openMP_tester.cpp @@ -0,0 +1,16 @@ +#include +#ifdef _OPENMP +#include +// [[Rcpp::plugins(openmp)]] +#endif + + +// [[Rcpp::export]] +void test_openmp() { +#ifdef _OPENMP + int nthreads = omp_get_max_threads(); + Rcpp::Rcout << "with OpenMP support - system supports up to: " << nthreads << " cores" << std::endl; +#else + Rcpp::Rcout << "without OpenMP support - WARNING! Some functions will run slower..." << std::endl; +#endif +} diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..15259ae --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,12 @@ +# This file is part of the standard setup for testthat. +# It is recommended that you do not modify it. +# +# Where should you do additional test configuration? +# Learn more about the roles of various files in: +# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview +# * https://testthat.r-lib.org/articles/special-files.html + +library(testthat) +library(LDWeaver) + +test_check("LDWeaver") diff --git a/tests/testthat/test-pipeline.R b/tests/testthat/test-pipeline.R new file mode 100644 index 0000000..c8009c3 --- /dev/null +++ b/tests/testthat/test-pipeline.R @@ -0,0 +1,81 @@ + + +# test_that("Test run on mega sample", { +# expect_equal(NULL, ) +# }) + +LDWeaver::LDWeaver(dset = "mega", + aln_path = system.file("extdata", "sample.aln.gz", package = "LDWeaver"), + gbk_path = system.file("extdata", "sample.gbk", package = "LDWeaver"), + validate_ref_ann_lengths = F, mega_dset = T) +# +# test_that("Test run on standard sample", { +# expect_equal(NULL, ) +# }) + +LDWeaver::LDWeaver(dset = "std", + aln_path = system.file("extdata", "sample.aln.gz", package = "LDWeaver"), + gbk_path = system.file("extdata", "sample.gbk", package = "LDWeaver"), + validate_ref_ann_lengths = F) + +# path_to_folder = "tests/testthat" # This is for local testing +path_to_folder = getwd() + + +idx = 1:10# sample(100, 10) + +ths_mega = LDWeaver::read_TopHits(file.path(path_to_folder, "mega/Tophits/sr_tophits.tsv")) +ths_std = LDWeaver::read_TopHits(file.path(path_to_folder, "std/Tophits/sr_tophits.tsv")) + + + +test_that("Short range tophits", { + # expect_equal(all.equal(ths_mega, ths_std), TRUE) + expect_equal(all(apply(cbind(idx, sapply(idx, function(x) c(which((ths_mega$pos1[x] == ths_std$pos1) & (ths_mega$pos2[x] == ths_std$pos2)), + which((ths_mega$pos1[x] == ths_std$pos2) & (ths_mega$pos2[x] == ths_std$pos1))))), + 1, function(x) all.equal( as.vector(ths_mega[x[1],3:ncol(ths_mega)]), as.vector(ths_std[x[2],3:ncol(ths_std)])))), TRUE) +}) + + +thl_mega = LDWeaver::read_TopHits(file.path(path_to_folder, "mega/Tophits/lr_tophits.tsv")) +thl_std = LDWeaver::read_TopHits(file.path(path_to_folder, "std/Tophits/lr_tophits.tsv")) + +test_that("Long range tophits", { + # expect_equal(all.equal(thl_mega, thl_std), TRUE) + expect_equal(all(apply(cbind(idx, sapply(idx, function(x) c(which((thl_mega$pos1[x] == thl_std$pos1) & (thl_mega$pos2[x] == thl_std$pos2)), + which((thl_mega$pos1[x] == thl_std$pos2) & (thl_mega$pos2[x] == thl_std$pos1))))), + 1, function(x) all.equal( as.vector(thl_mega[x[1],3:ncol(thl_mega)]), as.vector(thl_std[x[2],3:ncol(thl_std)])))), TRUE) + +}) + +as_mega = LDWeaver::read_AnnotatedLinks(file.path(path_to_folder, "mega/Annotated_links/sr_links_annotated.tsv")) +as_std = LDWeaver::read_AnnotatedLinks(file.path(path_to_folder, "std/Annotated_links/sr_links_annotated.tsv")) + +test_that("Short range all links", { + # expect_equal(all.equal(as_mega, as_std), TRUE) + expect_equal(all(apply(cbind(idx, sapply(idx, function(x) c(which((as_mega$pos1[x] == as_std$pos1) & (as_mega$pos2[x] == as_std$pos2)), + which((as_mega$pos1[x] == as_std$pos2) & (as_mega$pos2[x] == as_std$pos1))))), + 1, function(x) all.equal( as.vector(as_mega[x[1],c(3,5:ncol(as_mega))]), as.vector(as_std[x[2],c(3,5:ncol(as_std))])))), TRUE) +}) + + +al_mega = LDWeaver::read_AnnotatedLinks(file.path(path_to_folder, "mega/Annotated_links/lr_links_annotated.tsv")) +al_std = LDWeaver::read_AnnotatedLinks(file.path(path_to_folder, "std/Annotated_links/lr_links_annotated.tsv")) + +test_that("Long range all links", { + # expect_equal(all.equal(al_mega, al_std), TRUE) + expect_equal(all(apply(cbind(idx, sapply(idx, function(x) c(which((al_mega$pos1[x] == al_std$pos1) & (al_mega$pos2[x] == al_std$pos2)), + which((al_mega$pos1[x] == al_std$pos2) & (al_mega$pos2[x] == al_std$pos1))))), + 1, function(x) all.equal( as.vector(al_mega[x[1], c(3,5:ncol(al_mega))]), as.vector(al_std[x[2],c(3,5:ncol(al_std))])))), TRUE) +}) + +## Try running on SNP only alignment +pos = as.numeric(readLines(system.file("extdata", "snp_sample.pos", package = "LDWeaver"))) +# test_that("Test run on SNP sample", { +# expect_equal(NULL, ) +# }) + +LDWeaver::LDWeaver(dset = "SNP", aln_has_all_bases = F, pos = pos, + aln_path = system.file("extdata", "snp_sample.fa.gz", package = "LDWeaver"), + gbk_path = system.file("extdata", "sample.gbk", package = "LDWeaver"), + validate_ref_ann_lengths = F, mega_dset = F)