diff --git a/DESCRIPTION b/DESCRIPTION index 5e36344..f37c8bc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: rliger -Version: 2.1.0.9002 -Date: 2024-11-15 +Version: 2.1.0.9003 +Date: 2024-12-04 Type: Package Title: Linked Inference of Genomic Experimental Relationships Description: Uses an extension of nonnegative matrix factorization to identify shared and dataset-specific factors. See Welch J, Kozareva V, et al (2019) , and Liu J, Gao C, Sodicoff J, et al (2020) for more details. diff --git a/NEWS.md b/NEWS.md index daa94ff..7e44f7e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -13,10 +13,13 @@ - Pseudo-bulk should be easy because we are just aggregating cells. - Wilcoxon might be a bit harder because ranks are calculated per gene but the H5 sparse data is column majored. Might need to find a fast on-disk transposition method, which would also enhance RcppPlanc performance when running ANLS on H5 data. -## rliger 2.1.0.9002 +## rliger 2.1.0.9003 - Added `selectBatchHVG()` which implements another HVG selection strategy, credit to SCIB -- Fixed Wilcoxon rank-sum test bug when using ATAC peak counts +- Fixed multiple problems related to ATAC analysis + - Fixed Wilcoxon rank-sum test bug when using ATAC peak counts + - Fixed gene coordinate parsing bug from BED file + - Optimized peak parsing speed ## rliger 2.1.0 diff --git a/R/ATAC.R b/R/ATAC.R index 1f5e54c..4ef1cff 100644 --- a/R/ATAC.R +++ b/R/ATAC.R @@ -420,8 +420,11 @@ exportInteractTrack <- function( colClasses = c("character", "integer", "integer", "character", "NULL", "NULL") ) - genesCoords <- genesCoords[stats::complete.cases(genesCoords$V4), ] - rownames(genesCoords) <- genesCoords[, 4] + mask <- stats::complete.cases(genesCoords$V4) & genesCoords$V4 != "" + genesCoords <- genesCoords[mask, , drop = FALSE] + uniqnames <- make.unique(genesCoords$V4) + rownames(genesCoords) <- uniqnames + # split peak names into chrom and coordinates regions <- .splitPeakRegion(rownames(corrMat)) @@ -440,41 +443,45 @@ exportInteractTrack <- function( 'visibility=full') write(trackDoc, file = outputPath) for (gene in useGenes) { - peaksSel <- corrMat[, gene] != 0 - track <- data.frame( - chrom = regions$chrs[peaksSel], - chromStart = regions$chrsStart[peaksSel], - chromEnd = regions$chrsEnd[peaksSel], - name = paste0(gene, "/", rownames(corrMat)[peaksSel]), - score = 0, - value = as.numeric(corrMat[peaksSel, gene]), - exp = ".", - color = 5, - sourceChrom = regions$chrs[peaksSel], - sourceStart = regions$chrsStart[peaksSel], - sourceEnd = regions$chrsStart[peaksSel] + 1, - sourceName = ".", - sourceStrand = ".", - targetChrom = genesCoords[gene, 1], - targetStart = genesCoords[gene, 2], - targetEnd = genesCoords[gene, 2] + 1, - targetName = gene, - targetStrand = "." - ) - utils::write.table( - track, - file = outputPath, - append = TRUE, - quote = FALSE, - sep = "\t", - eol = "\n", - na = "NA", - dec = ".", - row.names = FALSE, - col.names = FALSE, - qmethod = c("escape", "double"), - fileEncoding = "" - ) + coordSel <- which(genesCoords$V4 == gene) + for (geneUniqIdx in coordSel) { + peaksSel <- corrMat[, gene] != 0 + track <- data.frame( + chrom = regions$chrs[peaksSel], + chromStart = regions$chrsStart[peaksSel], + chromEnd = regions$chrsEnd[peaksSel], + name = paste0(gene, "/", rownames(corrMat)[peaksSel]), + score = 0, + value = as.numeric(corrMat[peaksSel, gene]), + exp = ".", + color = 5, + sourceChrom = regions$chrs[peaksSel], + sourceStart = regions$chrsStart[peaksSel], + sourceEnd = regions$chrsStart[peaksSel] + 1, + sourceName = ".", + sourceStrand = ".", + targetChrom = genesCoords[geneUniqIdx, 1], + targetStart = genesCoords[geneUniqIdx, 2], + targetEnd = genesCoords[geneUniqIdx, 2] + 1, + targetName = gene, + targetStrand = "." + ) + utils::write.table( + track, + file = outputPath, + append = TRUE, + quote = FALSE, + sep = "\t", + eol = "\n", + na = "NA", + dec = ".", + row.names = FALSE, + col.names = FALSE, + qmethod = c("escape", "double"), + fileEncoding = "" + ) + } + } cli::cli_alert_success("Result written at: {.file {outputPath}}") invisible(NULL) @@ -516,10 +523,12 @@ makeInteractTrack <- function( .splitPeakRegion <- function(peakNames) { peakNames <- strsplit(peakNames, "[:-]") - chrs <- Reduce(append, lapply(peakNames, function(peak) peak[1])) - chrsStart <- Reduce(append, lapply(peakNames, function(peak) peak[2])) - chrsEnd <- Reduce(append, lapply(peakNames, function(peak) peak[3])) - list(chrs = chrs, - chrsStart = as.numeric(chrsStart), - chrsEnd = as.numeric(chrsEnd)) + # chrs <- Reduce(append, lapply(peakNames, function(peak) peak[1])) + # chrsStart <- Reduce(append, lapply(peakNames, function(peak) peak[2])) + # chrsEnd <- Reduce(append, lapply(peakNames, function(peak) peak[3])) + list( + chrs = sapply(peakNames, `[`, i = 1), + chrsStart = as.numeric(sapply(peakNames, `[`, i = 2)), + chrsEnd = as.numeric(sapply(peakNames, `[`, i = 3)) + ) } diff --git a/R/selectBatchHVG.R b/R/selectBatchHVG.R index 20b91e6..db2e612 100644 --- a/R/selectBatchHVG.R +++ b/R/selectBatchHVG.R @@ -107,7 +107,7 @@ selectBatchHVG.liger <- function( "Selected {nrow(hvgDFSub)} gene{?s} that {?is/are} highly variable in {n_batch_look} batch{?es}" ) } - nSelected <- nrow(hvgDFSub) + nSelected <- nSelected + nrow(hvgDFSub) nNeeded <- nGenes - nSelected n_batch_look <- n_batch_look - 1 } else {