diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml index 5927ff7e..6263b116 100644 --- a/.github/workflows/check-bioc.yml +++ b/.github/workflows/check-bioc.yml @@ -40,7 +40,7 @@ env: run_covr: 'false' run_pkgdown: 'false' has_RUnit: 'false' - cache-version: 'cache-v3' + cache-version: 'cache-v4' run_docker: 'false' jobs: @@ -54,9 +54,9 @@ jobs: fail-fast: false matrix: config: - - { os: ubuntu-latest, r: '4.3', bioc: '3.18', cont: "bioconductor/bioconductor_docker:RELEASE_3_18", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" } - - { os: macOS-latest, r: '4.3', bioc: '3.18'} - ##- { os: windows-latest, r: '4.3', bioc: '3.18'} + - { os: ubuntu-latest, r: '4.4', bioc: '3.19', cont: "bioconductor/bioconductor_docker:RELEASE_3_19", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" } + - { os: macOS-latest, r: '4.4', bioc: '3.19'} + - { os: windows-latest, r: '4.4', bioc: '3.19'} ## Check https://github.com/r-lib/actions/tree/master/examples ## for examples using the http-user-agent env: @@ -81,7 +81,7 @@ jobs: ## https://github.com/r-lib/actions/blob/master/examples/check-standard.yaml ## If they update their steps, we will also need to update ours. - name: Checkout Repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 ## R is already included in the Bioconductor docker images - name: Setup R from r-lib @@ -104,7 +104,7 @@ jobs: - name: Restore R package cache if: "!contains(github.event.head_commit.message, '/nocache') && runner.os != 'Linux'" - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ${{ env.R_LIBS_USER }} key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE-r-4.3-${{ hashFiles('.github/depends.Rds') }} @@ -112,7 +112,7 @@ jobs: - name: Cache R packages on Linux if: "!contains(github.event.head_commit.message, '/nocache') && runner.os == 'Linux' " - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: /home/runner/work/_temp/Library key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.3-${{ hashFiles('.github/depends.Rds') }} @@ -337,9 +337,9 @@ jobs: - name: Upload check results if: failure() - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: - name: ${{ runner.os }}-biocversion-RELEASE-r-4.3-results + name: ${{ runner.os }}-biocversion-RELEASE-r-4.4-results path: check - uses: docker/build-push-action@v1 diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 9daaa663..aa29e0f1 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -15,7 +15,7 @@ jobs: env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-r@v2 with: diff --git a/.github/workflows/pr-commands.yaml b/.github/workflows/pr-commands.yaml index 98ca228b..35d14666 100644 --- a/.github/workflows/pr-commands.yaml +++ b/.github/workflows/pr-commands.yaml @@ -14,7 +14,7 @@ jobs: env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - uses: r-lib/actions/pr-fetch@v2 with: @@ -49,7 +49,7 @@ jobs: env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - uses: r-lib/actions/pr-fetch@v2 with: diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index a1b6e36e..58383454 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -16,7 +16,7 @@ jobs: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-r@v2 with: diff --git a/DESCRIPTION b/DESCRIPTION index e98eb194..faa7cf21 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -86,7 +86,8 @@ Imports: Rsamtools, methods, Rcpp, - xgboost + xgboost, + Matrix VignetteBuilder: knitr LazyData: true diff --git a/NAMESPACE b/NAMESPACE index b4c51d05..3adb4e37 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,7 +7,9 @@ export(readFromGTF) export(transcriptToGeneExpression) export(writeBambuOutput) export(writeToGTF) +export(writeAnnotationsToGTF) export(trainBambu) +export(setNDR) export(compareTranscripts) importFrom(stats,predict) importFrom(BiocGenerics,basename) @@ -73,7 +75,8 @@ import(data.table, except=c(last, first, shift, second, between)) import(dplyr, except=c(last, first, desc, union, setdiff, intersect, slice)) import(IRanges, except=c(slice, collapse, setdiff, intersect,cor)) import(SummarizedExperiment) -import(S4Vectors, except=c(rename, setequal, setdiff, intersect,cor)) +import(Matrix) +import(S4Vectors, except=c(rename, setequal, setdiff, intersect,cor, unname, expand)) useDynLib(bambu, .registration = TRUE) import(xgboost) import(BSgenome) diff --git a/R/bambu-assignDist.R b/R/bambu-assignDist.R new file mode 100644 index 00000000..25d2d3e2 --- /dev/null +++ b/R/bambu-assignDist.R @@ -0,0 +1,108 @@ +#' Create equivilence classes and assign to transcripts +#' @inheritParams bambu +#' @import data.table +#' @noRd +assignReadClasstoTranscripts <- function(readClassList, annotations, isoreParameters, + verbose, demultiplexed, spatial, + returnDistTable = FALSE, trackReads = TRUE) { + if (is.character(readClassList)) readClassList <- readRDS(file = readClassList) + metadata(readClassList)$readClassDist <- calculateDistTable(readClassList, annotations, isoreParameters, verbose, returnDistTable) + readClassList <- splitReadClassFiles(readClassList) + readClassDt <- genEquiRCs(metadata(readClassList)$readClassDist, annotations, verbose) + readClassDt$eqClass.match = match(readClassDt$eqClassById,metadata(readClassList)$eqClassById) + readClassDt <- simplifyNames(readClassDt) + readClassDt <- readClassDt %>% group_by(eqClassId, gene_sid) %>% + mutate(multi_align = length(unique(txid))>1) %>% + ungroup() %>% + mutate(aval = 1) %>% + data.table() + #return non-em counts + ColData <- generateColData(colnames(metadata(readClassList)$countMatrix), clusters = NULL, demultiplexed, spatial) + quantData <- SummarizedExperiment(assays = SimpleList( + counts = generateUniqueCounts(readClassDt, metadata(readClassList)$countMatrix, annotations)), + rowRanges = annotations, + colData = ColData) + colnames(quantData) <- ColData$id + if(sum(metadata(readClassList)$incompatibleCountMatrix)==0){ + metadata(quantData)$incompatibleCounts <- NULL + }else{ + metadata(quantData)$incompatibleCounts <- generateIncompatibleCounts(metadata(readClassList)$incompatibleCountMatrix, annotations) + } + metadata(quantData)$nonuniqueCounts <- generateNonUniqueCounts(readClassDt, metadata(readClassList)$countMatrix, annotations) + metadata(quantData)$readClassDt <- readClassDt + metadata(quantData)$countMatrix <- metadata(readClassList)$countMatrix + metadata(quantData)$incompatibleCountMatrix <- metadata(readClassList)$incompatibleCountMatrix + metadata(quantData)$sampleNames <- metadata(readClassList)$sampleNames + if(returnDistTable) + metadata(quantData)$distTable <- metadata(metadata(readClassList)$readClassDist)$distTableOld + + if(trackReads) + metadata(quantData)$readToTranscriptMap <- + generateReadToTranscriptMap(readClassList, + metadata(readClassList)$readClassDist, + annotations) + + return(quantData) + +} + +#' Generate unique counts +#' @noRd +generateUniqueCounts <- function(readClassDt, countMatrix, annotations){ + x <- readClassDt %>% filter(!multi_align & !is.na(eqClass.match)) + uniqueCounts <- countMatrix[x$eqClass.match,] + uniqueCounts.tx <- sparse.model.matrix(~ factor(x$txid) - 1) + uniqueCounts <- t(uniqueCounts.tx) %*% uniqueCounts + rownames(uniqueCounts) <- names(annotations)[match(as.numeric(levels(factor(x$txid))),mcols(annotations)$txid)] + counts <- sparseMatrix(length(annotations), ncol(uniqueCounts), x = 0) + rownames(counts) <- names(annotations) + counts[rownames(uniqueCounts),] <- uniqueCounts + return(counts) + + # these three lines appear after return, so it's not used, is this used for debug only? + # counts.total = colSums(countMatrix) + colSums(incompatibleCountMatrix) + # counts.total[counts.total==0] = 1 + # counts.CPM = counts/counts.total * 10^6 + +} + + +#' Generate incompatible counts +#' @noRd +generateIncompatibleCounts <- function(incompatibleCountMatrix, annotations){ + genes <- levels(factor(unique(mcols(annotations)$GENEID))) + rownames(incompatibleCountMatrix) <- genes[as.numeric(rownames(incompatibleCountMatrix))] + geneMat <- sparseMatrix(length(genes), ncol(incompatibleCountMatrix), x = 0) + rownames(geneMat) <- genes + geneMat[rownames(incompatibleCountMatrix),] <- incompatibleCountMatrix + return(geneMat) +} + + +#' Generate non-unique counts +#' @noRd +generateNonUniqueCounts <- function(readClassDt, countMatrix, annotations){ + #fuse multi align RCs by gene + x <- readClassDt %>% filter(multi_align & !is.na(eqClass.match)) + x <- x %>% distinct(eqClassId, .keep_all = TRUE) + nonuniqueCounts <- countMatrix[x$eqClass.match,, drop = FALSE] + if(nrow(x)>1 & length(unique(x$gene_sid))>1){ + nonuniqueCounts.gene <- sparse.model.matrix(~ factor(x$gene_sid) - 1) + nonuniqueCounts <- t(nonuniqueCounts.gene) %*% nonuniqueCounts + } else{ + warning("The factor variable 'gene_sid' has only one level. Adjusting output.") + nonuniqueCounts.gene <- Matrix(1, nrow = nrow(x), ncol = 1, sparse = TRUE) + nonuniqueCounts <- t(nonuniqueCounts.gene) %*% nonuniqueCounts + } + #covert ids into gene ids + geneids <- as.numeric(levels(factor(x$gene_sid))) + geneids <- x$txid[match(geneids, x$gene_sid)] + geneids <- mcols(annotations)$GENEID[as.numeric(geneids)] + rownames(nonuniqueCounts) <- geneids + #create matrix for all annotated genes + genes <- levels(factor(unique(mcols(annotations)$GENEID))) + geneMat <- sparseMatrix(length(genes), ncol(nonuniqueCounts), x = 0) + rownames(geneMat) <- genes + geneMat[rownames(nonuniqueCounts),] <- nonuniqueCounts + return(geneMat) +} diff --git a/R/bambu-extendAnnotations-utilityCombine.R b/R/bambu-extendAnnotations-utilityCombine.R index 7f1a521a..4a644451 100644 --- a/R/bambu-extendAnnotations-utilityCombine.R +++ b/R/bambu-extendAnnotations-utilityCombine.R @@ -19,6 +19,10 @@ isore.combineTranscriptCandidates <- function(readClassList, min.readCount, min.readFractionByGene, min.txScore.multiExon, min.txScore.singleExon, verbose) %>% data.table() combinedSplicedTranscripts[,confidenceType := "highConfidenceJunctionReads"] + # when single exon min score is greater than 1, skip unspliced transcripts combination + # this is a very customized config, useful when data is very big + if (min.txScore.singleExon > 1) + return(combinedSplicedTranscripts) combinedUnsplicedTranscripts <- combineUnsplicedTranscriptModels(readClassList, bpParameters, stranded, min.readCount, min.readFractionByGene, @@ -35,11 +39,11 @@ isore.combineTranscriptCandidates <- function(readClassList, combineSplicedTranscriptModels <- function(readClassList, bpParameters, min.readCount, min.readFractionByGene, min.txScore.multiExon, min.txScore.singleExon, verbose){ - bpParameters$progressbar = FALSE + bpParameters$progressbar <- FALSE options(scipen = 999) #maintain numeric basepair locations not sci.notfi. start.ptm <- proc.time() n_sample <- length(readClassList) - nGroups = max(ceiling(n_sample/10),min(bpworkers(bpParameters), + nGroups <- max(ceiling(n_sample/10),min(bpworkers(bpParameters), round(n_sample/2))) indexList <- sample(rep(seq_len(nGroups), length.out=n_sample)) indexList <- splitAsList(seq_len(n_sample), indexList) @@ -134,7 +138,7 @@ combineFeatureTibble <- function(combinedFeatureTibble, maxTxScore.noFit, NSampleReadCount, NSampleReadProp,NSampleTxScore, starts_with('start'), starts_with('end'), starts_with('readCount')) } else { - combinedTable = full_join(combinedFeatureTibble, + combinedTable <- full_join(combinedFeatureTibble, featureTibbleSummarised, by = c('intronStarts', 'intronEnds', 'chr', 'strand'), suffix=c('.combined','.new')) %>% mutate(NSampleReadCount=pmax0NA(NSampleReadCount.combined) + @@ -214,7 +218,7 @@ combineUnsplicedTranscriptModels <- min.readFractionByGene, min.txScore.multiExon, min.txScore.singleExon, verbose){ start.ptm <- proc.time() - bpParameters$progressbar = FALSE + bpParameters$progressbar <- FALSE newUnsplicedSeList <- bplapply(seq_along(readClassList), function(sample_id) extractNewUnsplicedRanges(readClassSe = @@ -291,7 +295,7 @@ reduceUnsplicedRanges <- function(rangesList, stranded){ makeUnsplicedTibble <- function(combinedNewUnsplicedSe,newUnsplicedSeList, colDataNames,min.readCount, min.readFractionByGene, min.txScore.multiExon, min.txScore.singleExon, bpParameters){ - bpParameters$progressbar = FALSE + bpParameters$progressbar <- FALSE newUnsplicedTibble <- as_tibble(combinedNewUnsplicedSe) %>% rename(chr = seqnames) %>% select(chr, start, end, strand, row_id) %>% separate_rows(row_id, sep = "\\+") diff --git a/R/bambu-extendAnnotations-utilityExtend.R b/R/bambu-extendAnnotations-utilityExtend.R index 75065607..d65d70ea 100644 --- a/R/bambu-extendAnnotations-utilityExtend.R +++ b/R/bambu-extendAnnotations-utilityExtend.R @@ -23,7 +23,9 @@ isore.extendAnnotations <- function(combinedTranscripts, annotationGrangesList, rowDataSplicedTibble, annotationGrangesList, min.exonDistance, min.primarySecondaryDist, min.primarySecondaryDistStartEnd, verbose) - } else{ rowDataFilteredSpliced = NULL} + } else{ + rowDataFilteredSpliced <- NULL + } rowDataFilteredUnspliced <- rowDataTibble[which(confidenceTypeVec == "unsplicedNew"),] SEnRng <- addNewUnsplicedReadClasses(rowDataFilteredUnspliced, rowDataFilteredSpliced, transcriptRanges$exons, @@ -43,6 +45,9 @@ isore.extendAnnotations <- function(combinedTranscripts, annotationGrangesList, extendedAnnotationRanges <- filterTranscriptsByAnnotation( rowDataCombined, annotationGrangesList, exonRangesCombined, prefix, remove.subsetTx, min.readFractionByEqClass, baselineFDR, NDR, defaultModels, verbose) + message(paste0("Novel transcripts detected: ", sum(mcols(extendedAnnotationRanges)$novelTranscript))) + message(paste0("Novel genes detected: ", length(unique(mcols(extendedAnnotationRanges)$GENEID[mcols(extendedAnnotationRanges)$novelGene])))) + message(paste0("Low confidence transcripts excluded: ", length(metadata(extendedAnnotationRanges)$lowConfidenceTranscripts))) return(extendedAnnotationRanges) } else { message("The current filtering criteria filters out all new read @@ -64,7 +69,9 @@ filterTranscripts <- function(combinedTranscripts, min.sampleNumber){ combinedTranscripts$NSampleTxScore >= min.sampleNumber) & ( combinedTranscripts$NSampleReadProp >= min.sampleNumber) } - combinedTranscripts = combinedTranscripts[filterSet,] + #combinedTranscripts = combinedTranscripts[filterSet,] + combinedTranscripts$maxTxScore[!filterSet] = -1 + combinedTranscripts$maxTxScore.noFit[!filterSet] = -1 return(combinedTranscripts) } @@ -80,25 +87,38 @@ filterTranscriptsByAnnotation <- function(rowDataCombined, annotationGrangesList #calculate relative read count before any filtering rowDataCombined <- group_by(rowDataCombined, GENEID) %>% mutate(relReadCount = readCount/sum(readCount)) + + rowDataCombined$TXNAME[is.na(rowDataCombined$TXNAME)] <- paste0( + prefix, "Tx", seq_len(sum(is.na(rowDataCombined$TXNAME)))) + names(exonRangesCombined) <- rowDataCombined$TXNAME + + #filter out subset reads if (remove.subsetTx) { # (1) based on compatibility with annotations - notCompatibleIds <- which(!grepl("compatible", rowDataCombined$readClassType) | + notCompatibleIds <- (!grepl("compatible", rowDataCombined$readClassType) | rowDataCombined$readClassType == "equal:compatible") #keep equal for FDR calculation - exonRangesCombined <- exonRangesCombined[notCompatibleIds] - rowDataCombined <- rowDataCombined[notCompatibleIds,] + subsetTranscripts <- combindRowDataWithRanges( + rowDataCombined[!notCompatibleIds,], + exonRangesCombined[!notCompatibleIds]) + rowDataCombined$maxTxScore[grepl("compatible", rowDataCombined$readClassType) & + rowDataCombined$readClassType != "equal:compatible"] <- -1 + rowDataCombined$maxTxScore.noFit[grepl("compatible", rowDataCombined$readClassType) & + rowDataCombined$readClassType != "equal:compatible"] <- -1 } #(2) remove transcripts below NDR threshold/identical junctions to annotations - rowDataCombined = calculateNDROnTranscripts(rowDataCombined, + rowDataCombined <- calculateNDROnTranscripts(rowDataCombined, useTxScore = length(annotationGrangesList)==0) - if(length(annotationGrangesList)>0){ #only recommend an NDR if its possible to calculate an NDR - NDR = recommendNDR(rowDataCombined, baselineFDR, NDR, defaultModels, verbose) - } else { - if(is.null(NDR)) NDR = 0.5 + NDR <- recommendNDR(rowDataCombined, baselineFDR, NDR, defaultModels, verbose) + } else if(is.null(NDR)) { + NDR <- 0.5 } - filterSet = (rowDataCombined$NDR <= NDR) + filterSet <- (rowDataCombined$NDR <= NDR | rowDataCombined$readClassType == "equal:compatible") + lowConfidenceTranscripts <- combindRowDataWithRanges( + rowDataCombined[!filterSet,], + exonRangesCombined[!filterSet]) exonRangesCombined <- exonRangesCombined[filterSet] rowDataCombined <- rowDataCombined[filterSet,] - #calculate relative subset read count after filtering (increase speed, subsets are not considered here) + mcols(exonRangesCombined)$txid <- seq_along(exonRangesCombined) minEq <- getMinimumEqClassByTx(exonRangesCombined)$eqClassById rowDataCombined$relSubsetCount <- rowDataCombined$readCount/unlist(lapply(minEq, function(x){return(sum(rowDataCombined$readCount[x]))})) @@ -112,10 +132,22 @@ filterTranscriptsByAnnotation <- function(rowDataCombined, annotationGrangesList "WARNING - No annotations were provided. Please increase NDR threshold to use novel transcripts") if(sum(filterSet)==0) message("WARNING - No novel transcripts meet the given thresholds. Try a higher NDR.") # (3) combine novel transcripts with annotations + extendedAnnotationRanges <- combindRowDataWithRanges(rowDataCombined, exonRangesCombined) extendedAnnotationRanges <- combineWithAnnotations( - rowDataCombined, exonRangesCombined, + rowDataCombined, extendedAnnotationRanges, annotationGrangesList, prefix) + minEqClasses <- + getMinimumEqClassByTx(extendedAnnotationRanges) # get eqClasses + if(!identical(names(extendedAnnotationRanges),minEqClasses$queryTxId)) warning('eq classes might be incorrect') + mcols(extendedAnnotationRanges)$eqClassById <- minEqClasses$eqClassById + extendedAnnotationRanges <- calculateRelSubsetCount(extendedAnnotationRanges, minEqClasses$eqClassById, min.readFractionByEqClass) + mcols(extendedAnnotationRanges) <- mcols(extendedAnnotationRanges)[, + c("TXNAME", "GENEID", "NDR", "novelGene", "novelTranscript", + "txClassDescription","readCount","relReadCount", + "relSubsetCount", "txid", "eqClassById", "maxTxScore", "maxTxScore.noFit")] metadata(extendedAnnotationRanges)$NDRthreshold = NDR + if (remove.subsetTx) metadata(extendedAnnotationRanges)$subsetTranscripts = subsetTranscripts + metadata(extendedAnnotationRanges)$lowConfidenceTranscripts = lowConfidenceTranscripts end.ptm <- proc.time() if (verbose) message("transcript filtering in ", round((end.ptm - start.ptm)[3] / 60, 1), " mins.") @@ -126,20 +158,22 @@ filterTranscriptsByAnnotation <- function(rowDataCombined, annotationGrangesList #' @noRd recommendNDR <- function(combinedTranscripts, baselineFDR = 0.1, NDR = NULL, defaultModels = defaultModels, verbose = FALSE){ if(verbose) message("-- Predicting annotation completeness to determine NDR threshold --") - equal = combinedTranscripts$readClassType == "equal:compatible" - equal[is.na(equal)] = FALSE - + combinedTranscripts <- combinedTranscripts[combinedTranscripts$maxTxScore.noFit >=0, ] #ignore filtered out read classes + equal <- combinedTranscripts$readClassType == "equal:compatible" + equal[is.na(equal)] <- FALSE #add envirnment so poly() works attr(defaultModels$lmNDR[["terms"]], ".Environment") <- new.env(parent = parent.env(globalenv())) - baseline = predict(defaultModels$lmNDR, newdata=data.frame(NDR=baselineFDR)) - attr(defaultModels$lmNDR[["terms"]], ".Environment") = c() - - NDRscores = calculateNDR(combinedTranscripts$maxTxScore.noFit, equal) - score = combinedTranscripts$maxTxScore.noFit - NDR.rec = predict(lm(NDRscores~poly(score,3,raw=TRUE)), newdata=data.frame(score=baseline)) - NDR.rec = round(NDR.rec,3) + baseline <- predict(defaultModels$lmNDR, newdata=data.frame(NDR=baselineFDR)) + attr(defaultModels$lmNDR[["terms"]], ".Environment") <- c() + + score <- combinedTranscripts$maxTxScore.noFit + score[is.na(score)] <- 0 + NDRscores <- calculateNDR(score, equal) + NDR.rec <- predict(lm(NDRscores~poly(score,3,raw=TRUE)), newdata=data.frame(score=baseline)) + NDR.rec <- round(NDR.rec,3) + if(NDR.rec > 1) NDR.rec <- 0.999 + if (NDR.rec < 0) NDR.rec <- 0 if(verbose) message("Recommended NDR for baseline FDR of ", baselineFDR, " = ", NDR.rec) - if (NDR.rec < 0) NDR.rec = 0 if(NDR.rec > 0.5){ message("A high NDR threshold is being recommended by Bambu indicating high levels of novel transcripts, ", "limiting the performance of the trained model") @@ -149,45 +183,58 @@ recommendNDR <- function(combinedTranscripts, baselineFDR = 0.1, NDR = NULL, def } #if users are using an NDR let them know if the recommended NDR is different - if(is.null(NDR)) - { - NDR = NDR.rec + if(is.null(NDR)) { + NDR <- NDR.rec message("Using a novel discovery rate (NDR) of: ", NDR) - } - else{ - if(abs(NDR.rec-NDR)>=0.1){ + } else if(abs(NDR.rec-NDR)>=0.1){ message(paste0("For your combination of sample and reference annotations we recommend an NDR of ", NDR.rec, ". You are currently using an NDR threshold of ", NDR, ". A higher NDR is suited for samples where the reference annotations are poor and more novel transcripts are expected,", "whereas a lower NDR is suited for samples with already high quality annotations")) - } } return(NDR) } +recommendNDR.onAnnotations <- function(annotations, prefix = "Bambu", baselineFDR = 0.1, defaultModels2 = defaultModels2){ + mcols <- mcols(annotations)[!is.na(mcols(annotations)$maxTxScore),] + equal <- !grepl(prefix, mcols$TXNAME) + #add envirnment so poly() works + attr(defaultModels2$lmNDR[["terms"]], ".Environment") <- new.env(parent = parent.env(globalenv())) + baseline <- predict(defaultModels2$lmNDR, newdata=data.frame(NDR=baselineFDR)) + attr(defaultModels2$lmNDR[["terms"]], ".Environment") <- c() + score <- mcols$maxTxScore.noFit + NDRscores <- calculateNDR(score, equal) + NDR.rec <- predict(lm(NDRscores~poly(score,3,raw=TRUE)), newdata=data.frame(score=baseline)) + NDR.rec <- round(NDR.rec,3) + return(NDR.rec) +} + #' Calculate NDR based on transcripts #' @noRd calculateNDROnTranscripts <- function(combinedTranscripts, useTxScore = FALSE){ # calculate and filter by NDR - equal = combinedTranscripts$readClassType == "equal:compatible" - equal[is.na(equal)] = FALSE + equal <- combinedTranscripts$readClassType == "equal:compatible" + equal[is.na(equal)] <- FALSE if(sum(equal, na.rm = TRUE)<50 | sum(!equal, na.rm = TRUE)<50 | useTxScore){ - combinedTranscripts$NDR = 1 - combinedTranscripts$maxTxScore + combinedTranscripts$NDR <- 1 - combinedTranscripts$maxTxScore if(!useTxScore) message("WARNING - Less than 50 TRUE or FALSE read classes ", "for NDR precision stabilization.") message("NDR will be approximated as: (1 - Transcript Model Prediction Score)") - } else combinedTranscripts$NDR = calculateNDR(combinedTranscripts$maxTxScore, equal) + } else { + combinedTranscripts$NDR <- calculateNDR(combinedTranscripts$maxTxScore, equal) + } + combinedTranscripts$NDR[combinedTranscripts$maxTxScore==-1] <- 1 return(combinedTranscripts) } #' calculates the minimum NDR for each score #' @noRd -calculateNDR = function(score, labels){ - scoreOrder = order(score, decreasing = TRUE) - labels = labels[scoreOrder] - NDR = cumsum(!labels)/(seq_len(length(labels))) #calculate NDR - NDR = rev(cummin(rev(NDR))) #flatten NDR so its never higher than a lower ranked RC +calculateNDR <- function(score, labels){ + scoreOrder <- order(score, decreasing = TRUE) + labels <- labels[scoreOrder] + NDR <- cumsum(!labels)/(seq_len(length(labels))) #calculate NDR + NDR <- rev(cummin(rev(NDR))) #flatten NDR so its never higher than a lower ranked RC return(NDR[order(scoreOrder)]) #return to original order } @@ -198,8 +245,9 @@ calculateNDR = function(score, labels){ #' @noRd makeExonsIntronsSpliced <- function(transcriptsTibble,annotationSeqLevels){ if(all(is.na(transcriptsTibble$intronStarts))){ - intronsByReadClass = GRangesList()} - else { intronsByReadClass <- makeGRangesListFromFeatureFragments( + intronsByReadClass <- GRangesList() + } else { + intronsByReadClass <- makeGRangesListFromFeatureFragments( seqnames = transcriptsTibble$chr, fragmentStarts = transcriptsTibble$intronStarts, fragmentEnds = transcriptsTibble$intronEnds, @@ -612,10 +660,9 @@ includeOverlapReadClass <- function(candidateList, filteredOverlapList) { return(temp) } -#' combine annotations with predicted transcripts +#' extract the important row range columns and add them to the ranges for final output #' @noRd -combineWithAnnotations <- function(rowDataCombinedFiltered, - exonRangesCombinedFiltered,annotationGrangesList, prefix){ +combindRowDataWithRanges <- function(rowDataCombinedFiltered, exonRangesCombinedFiltered){ rowDataCombinedFiltered$txClassDescription <- rowDataCombinedFiltered$readClassType rowDataCombinedFiltered$txClassDescription[rowDataCombinedFiltered$readClassType == "unsplicedNew" & rowDataCombinedFiltered$novelGene] <- @@ -624,12 +671,25 @@ combineWithAnnotations <- function(rowDataCombinedFiltered, == "allNew" & rowDataCombinedFiltered$novelGene] <- "newGene-spliced" extendedAnnotationRanges <- exonRangesCombinedFiltered + if("NDR" %in% colnames(rowDataCombinedFiltered)){ + mcols(extendedAnnotationRanges) <- + rowDataCombinedFiltered[, c("TXNAME", "GENEID", "novelGene", "novelTranscript", "txClassDescription","readCount", "NDR", + "maxTxScore", "maxTxScore.noFit", "relReadCount")] + } else{ mcols(extendedAnnotationRanges) <- - rowDataCombinedFiltered[, c("GENEID", "novelGene", "novelTranscript", "txClassDescription","readCount", "NDR", - "relReadCount", "relSubsetCount")] - equalRanges = rowDataCombinedFiltered[!is.na(rowDataCombinedFiltered$TXNAME),] + rowDataCombinedFiltered[, c("TXNAME", "GENEID", "novelGene", "novelTranscript", "txClassDescription","readCount", + "maxTxScore", "maxTxScore.noFit", "relReadCount")] + } + return(extendedAnnotationRanges) +} + +#' combine annotations with predicted transcripts +#' @noRd +combineWithAnnotations <- function(rowDataCombinedFiltered, + extendedAnnotationRanges,annotationGrangesList, prefix){ + equalRanges <- rowDataCombinedFiltered[!(rowDataCombinedFiltered$novelTranscript),] #remove extended ranges that are already present in annotation - extendedAnnotationRanges <- extendedAnnotationRanges[is.na(rowDataCombinedFiltered$TXNAME)] + extendedAnnotationRanges <- extendedAnnotationRanges[rowDataCombinedFiltered$novelTranscript] annotationRangesToMerge <- annotationGrangesList if(length(annotationGrangesList)){ mcols(annotationRangesToMerge)$readCount <- NA @@ -637,16 +697,20 @@ combineWithAnnotations <- function(rowDataCombinedFiltered, mcols(annotationRangesToMerge)$novelTranscript <- FALSE mcols(annotationRangesToMerge)$novelGene <- FALSE mcols(annotationRangesToMerge)$NDR <- NA + mcols(annotationRangesToMerge)$maxTxScore <- NA + mcols(annotationRangesToMerge)$maxTxScore.noFit <- NA mcols(extendedAnnotationRanges) <- mcols(extendedAnnotationRanges)[,colnames(mcols(extendedAnnotationRanges))] #copy over stats to annotations from read classes - mcols(annotationRangesToMerge[equalRanges$TXNAME])$NDR = equalRanges$NDR - mcols(annotationRangesToMerge[equalRanges$TXNAME])$readCount = equalRanges$readCount - mcols(annotationRangesToMerge[equalRanges$TXNAME])$relReadCount = equalRanges$relReadCount - mcols(annotationRangesToMerge[equalRanges$TXNAME])$relSubsetCount = equalRanges$relSubsetCount + mcols(annotationRangesToMerge[equalRanges$TXNAME])$NDR <- equalRanges$NDR + mcols(annotationRangesToMerge[equalRanges$TXNAME])$maxTxScore <- equalRanges$maxTxScore + mcols(annotationRangesToMerge[equalRanges$TXNAME])$readCount <- equalRanges$readCount + mcols(annotationRangesToMerge[equalRanges$TXNAME])$relReadCount <- equalRanges$relReadCount + mcols(annotationRangesToMerge[equalRanges$TXNAME])$maxTxScore <- equalRanges$maxTxScore + mcols(annotationRangesToMerge[equalRanges$TXNAME])$maxTxScore.noFit <- equalRanges$maxTxScore.noFit + #mcols(annotationRangesToMerge[equalRanges$TXNAME])$relSubsetCount = equalRanges$relSubsetCount } if (length(extendedAnnotationRanges)) { - mcols(extendedAnnotationRanges)$TXNAME <- paste0( - prefix, "Tx", seq_along(extendedAnnotationRanges)) + mcols(extendedAnnotationRanges)$TXNAME <- rowDataCombinedFiltered[rowDataCombinedFiltered$novelTranscript,]$TXNAME names(extendedAnnotationRanges) <- mcols(extendedAnnotationRanges)$TXNAME extendedAnnotationRanges <- c(extendedAnnotationRanges, annotationRangesToMerge) # this will throw error in line 648-649 when extendedAnnotationRanges is empty @@ -654,18 +718,27 @@ combineWithAnnotations <- function(rowDataCombinedFiltered, }else{ extendedAnnotationRanges <- annotationRangesToMerge mcols(extendedAnnotationRanges)$txid <- seq_along(extendedAnnotationRanges) - mcols(extendedAnnotationRanges)$relReadCount = NA - mcols(extendedAnnotationRanges)$relSubsetCount = NA + mcols(extendedAnnotationRanges)$relReadCount <- NA + #mcols(extendedAnnotationRanges)$relSubsetCount = NA } - minEqClasses <- - getMinimumEqClassByTx(extendedAnnotationRanges) # get eqClasses - if(!identical(names(extendedAnnotationRanges),minEqClasses$queryTxId)) warning('eq classes might be incorrect') - mcols(extendedAnnotationRanges)$eqClassById <- minEqClasses$eqClassById - mcols(extendedAnnotationRanges) <- mcols(extendedAnnotationRanges)[, - c("TXNAME", "GENEID", "NDR", "novelGene", "novelTranscript", "txClassDescription","readCount","relReadCount", "relSubsetCount", "txid", "eqClassById")] return(extendedAnnotationRanges) } +#' calculate relative subset read count after filtering (increase speed, subsets are not considered here)' +#' @noRd +calculateRelSubsetCount <- function(extendedAnnotationRanges, minEq, min.readFractionByEqClass){ + filter <- !is.na(mcols(extendedAnnotationRanges)$readCount) + mcols(extendedAnnotationRanges)$relSubsetCount <- NA + mcols(extendedAnnotationRanges)$relSubsetCount[filter] <- + mcols(extendedAnnotationRanges)$readCount[filter]/ + unlist(lapply(minEq[filter], function(x){return(sum(mcols(extendedAnnotationRanges)$readCount[x], na.rm = TRUE))})) + #post extend annotation filters applied here (currently only subset filter) + if(min.readFractionByEqClass>0) { # filter out subset transcripts based on relative expression + filterSet <- is.na(mcols(extendedAnnotationRanges)$relSubsetCount) | mcols(extendedAnnotationRanges)$relSubsetCount > min.readFractionByEqClass + extendedAnnotationRanges <- extendedAnnotationRanges[filterSet] + } + return(extendedAnnotationRanges) +} #' Estimate distance between read class and annotations #' @param seReadClass seReadClass @@ -686,10 +759,11 @@ isore.estimateDistanceToAnnotations <- function(seReadClass, primarySecondaryDistStartEnd = min.primarySecondaryDistStartEnd, ignore.strand = FALSE) distTable$readCount <- assays(seReadClass)$counts[distTable$readClassId, ] - if (additionalFiltering) - distTable <- left_join(distTable, select(readClassTable, - readClassId, confidenceType), by = "readClassId") %>% - mutate(relativeReadCount = readCount / txNumberFiltered) +# if (additionalFiltering) +# distTable <- left_join(distTable, select(readClassTable, +# readClassId, confidenceType), by = "readClassId") %>% +# mutate(relativeReadCount = readCount / txNumberFiltered) + distTable <- dplyr::select(distTable, annotationTxId, txid, readClassId, readCount, compatible, equal,dist) distTable <- left_join(distTable, as_tibble(mcols(annotationGrangesList)[, c("txid", "GENEID")]), @@ -747,3 +821,113 @@ addGeneIdsToReadClassTable <- function(readClassTable, distTable, round((end.ptm - start.ptm)[3] / 60, 1), " mins.") return(readClassTable) } + +#' Function to change NDR threshold on extendedAnnotations +#' @title Function to change NDR threshold on extendedAnnotations +#' @description This function train a model for use on other data +#' @param extendedAnnotations A GRangesList object produced from bambu(quant = FALSE) or rowRanges(se) +#' @param NDR The maximum NDR for novel transcripts to be in extendedAnnotations (0-1). If not provided a recommended NDR is calculated. +#' @param includeRef A boolean which if TRUE will also filter out reference annotations based on their NDR +#' @param prefix A string which determines which transcripts are considered novel by bambu and will be filtered (by default = 'Bambu') +#' @param baselineFDR a value between 0-1. Bambu uses this FDR on the trained model to recommend an equivilent NDR threshold to be used for the sample. By default, a baseline FDR of 0.1 is used. This does not impact the analysis if an NDR is set. +#' @param defaultModels a bambu trained model object that bambu will use when fitReadClassModel==FALSE or the data is not suitable for training, defaults to the pretrained model in the bambu package +#' Output - returns a similiar GRangesList object with entries swapped into or out of metadata(extendedAnnotations)$lowConfidenceTranscripts +#' @details +#' @return extendedAnnotations with a new NDR threshold +#' @export +setNDR <- function(extendedAnnotations, NDR = NULL, includeRef = FALSE, prefix = 'Bambu', baselineFDR = 0.1, defaultModels2 = defaultModels){ + #Check to see if the annotations/gtf are dervived from Bambu + if(is.null(mcols(extendedAnnotations)$NDR)){ + warning("Annotations were not extended by Bambu (or the wrong prefix was provided). NDR can not be set") + return(extendedAnnotations) + } + + if(is.null(metadata(extendedAnnotations)$lowConfidenceTranscripts)) + metadata(extendedAnnotations)$lowConfidenceTranscripts = GRangesList() + + #recommend an NDR (needed when users read in Bambu GTF) + if(is.null(NDR)){ + tempAnno <- c(extendedAnnotations, metadata(extendedAnnotations)$lowConfidenceTranscripts) + NDR <- recommendNDR.onAnnotations(tempAnno, prefix = prefix, baselineFDR = baselineFDR, defaultModels2 = defaultModels2) + message("Recommending a novel discovery rate (NDR) of: ", NDR) + } + + #If reference annotations should be filtered too (note that reference annotations with no read support arn't filtered) + if(includeRef){ + toRemove <- (!is.na(mcols(extendedAnnotations)$NDR) & mcols(extendedAnnotations)$NDR > NDR) + toAdd <- !is.na(mcols(metadata(extendedAnnotations)$lowConfidenceTranscripts)$NDR) & + mcols(metadata(extendedAnnotations)$lowConfidenceTranscripts)$NDR <= NDR + } else { + toRemove <- (mcols(extendedAnnotations)$NDR > NDR & + grepl(prefix, mcols(extendedAnnotations)$TXNAME)) + toAdd <- (mcols(metadata(extendedAnnotations)$lowConfidenceTranscripts)$NDR <= NDR & + grepl(prefix, mcols(metadata(extendedAnnotations)$lowConfidenceTranscripts)$TXNAME)) + } + + temp <- c(metadata(extendedAnnotations)$lowConfidenceTranscripts[!toAdd], extendedAnnotations[toRemove]) + extendedAnnotations <- c(extendedAnnotations[!toRemove], metadata(extendedAnnotations)$lowConfidenceTranscripts[toAdd]) + metadata(extendedAnnotations)$lowConfidenceTranscripts <- temp + + mcols(extendedAnnotations)$txid <- seq_along(extendedAnnotations) + minEqClasses <- getMinimumEqClassByTx(extendedAnnotations) + mcols(extendedAnnotations)$eqClassById <- minEqClasses$eqClassById + + metadata(extendedAnnotations)$NDRthreshold <- NDR + + return(extendedAnnotations) +} + + +#' Extend annotations by clusters +#' @noRd +isore.extendAnnotations.clusters <- function(readClassList, annotations, clusters, NDR, isoreParameters, stranded, bpParameters, fusionMode, verbose = FALSE){ + message("--- Start extending annotations for clusters ---") + #if clustering is a csv, create a list with the barcodes for each cluster + #csv must have two cols with heading barcode, cluster + if(!is.list(clusters)){ + clusters <- read.csv(clusters) + clusters <- clusters %>% group_by(cluster) %>% summarise(barcodes = list(barcode)) + clusters <- clusters$cluster + clusters <- clusters$barcodes + names(clusters) <- clusters + } + annotations.clusters <- list() + rcfs.clusters <- list() + clusters.rc <- splitReadClassFilesByRC(readClassList[[1]]) + txScores <- c() + for(i in seq_along(clusters)){ + print(names(clusters)[i]) + ###TODO need to account for the sample name here which is added to the barcode + index <- match(clusters[[i]],gsub('demultiplexed','',metadata(readClassList[[1]])$samples)) + index <- index[!is.na(index)] + print(length(index)) + if(length(index)<20) next + rcf.counts <- clusters.rc[,index] + rcf.filt <- readClassList[[1]][rowSums(rcf.counts)>0,] + rowData(rcf.filt)$readCount <- rowSums(rcf.counts)[rowSums(rcf.counts)>0] + countsTBL <- calculateGeneProportion(counts=mcols(rcf.filt)$readCount, + geneIds=mcols(rcf.filt)$GENEID) + rowData(rcf.filt)$geneReadProp <- countsTBL$geneReadProp + rowData(rcf.filt)$geneReadCount <- countsTBL$geneReadCount + rowData(rcf.filt)$startSD <- 0 + rowData(rcf.filt)$endSD <- 0 + rowData(rcf.filt)$readCount.posStrand <- 0 + thresholdIndex <- which(rowData(rcf.filt)$readCount>=isoreParameters$min.readCount) + model <- trainBambu(rcf.filt, verbose = verbose, min.readCount = isoreParameters$min.readCount) + txScore <- getTranscriptScore(rowData(rcf.filt)[thresholdIndex,], model, + defaultModels) + rowData(rcf.filt)$txScore <- rep(NA,nrow(rcf.filt)) + rowData(rcf.filt)$txScore[thresholdIndex] <- txScore + #txScores = cbind(txScores, rowData(rcf.filt)$txScore) + rcfs.clusters[[names(clusters)[i]]] <- rcf.filt + annotations.clusters[[names(clusters)[i]]] <- bambu.extendAnnotations(list(rcf.filt), annotations, NDR, + isoreParameters, stranded, bpParameters, fusionMode, verbose) + } + if(length(rcfs.clusters)>0){ + print("--- Merging all individual clusters ---") + annotations.clusters[["merged"]] <- bambu.extendAnnotations(rcfs.clusters, annotations, NDR, + isoreParameters, stranded, bpParameters, fusionMode, verbose) + } + + return(annotations.clusters) +} \ No newline at end of file diff --git a/R/bambu-processReads.R b/R/bambu-processReads.R index 31246d35..b62e67a1 100644 --- a/R/bambu-processReads.R +++ b/R/bambu-processReads.R @@ -15,7 +15,8 @@ bambu.processReads <- function(reads, annotations, genomeSequence, readClass.outputDir=NULL, yieldSize=1000000, bpParameters, stranded=FALSE, verbose=FALSE, isoreParameters = setIsoreParameters(NULL), - lowMemory=FALSE, trackReads = trackReads, fusionMode = fusionMode) { + processByChromosome = FALSE, processByBam = TRUE, trackReads = trackReads, fusionMode = fusionMode, + demultiplexed = FALSE, cleanReads = FALSE, dedupUMI = FALSE, sampleNames = NULL, barcodesToFilter = NULL) { genomeSequence <- checkInputSequence(genomeSequence) # ===# create BamFileList object from character #===# if (is(reads, "BamFile")) { @@ -39,20 +40,84 @@ bambu.processReads <- function(reads, annotations, genomeSequence, reads <- BamFileList(reads, yieldSize = yieldSize) names(reads) <- tools::file_path_sans_ext(BiocGenerics::basename(reads)) } - min.readCount = isoreParameters[["min.readCount"]] - fitReadClassModel = isoreParameters[["fitReadClassModel"]] - defaultModels = isoreParameters[["defaultModels"]] - returnModel = isoreParameters[["returnModel"]] - min.exonOverlap = isoreParameters[["min.exonOverlap"]] - readClassList <- bplapply(names(reads), function(bamFileName) { - bambu.processReadsByFile(bam.file = reads[bamFileName], - genomeSequence = genomeSequence,annotations = annotations, - readClass.outputDir = readClass.outputDir, - stranded = stranded, min.readCount = min.readCount, - fitReadClassModel = fitReadClassModel, min.exonOverlap = min.exonOverlap, - defaultModels = defaultModels, returnModel = returnModel, verbose = verbose, - lowMemory = lowMemory, trackReads = trackReads, fusionMode = fusionMode)}, - BPPARAM = bpParameters) + if(!is.null(sampleNames)){ + if(length(sampleNames==length(reads))){ + names(reads) <- sampleNames + } else{ + message("Not enough provided sample names. Using them in order of inputted files and the remaining files will use the file names") + names(reads)[seq_along(sampleNames)] <- sampleNames + } + } + min.readCount <- isoreParameters[["min.readCount"]] + fitReadClassModel <- isoreParameters[["fitReadClassModel"]] + defaultModels <- isoreParameters[["defaultModels"]] + returnModel <- isoreParameters[["returnModel"]] + min.exonOverlap <- isoreParameters[["min.exonOverlap"]] + + if(processByBam){ # bulk mode + readClassList <- bplapply(seq_along(reads), function(i) { + bambu.processReadsByFile(bam.file = reads[i], + genomeSequence = genomeSequence,annotations = annotations, + stranded = stranded, min.readCount = min.readCount, + fitReadClassModel = fitReadClassModel, min.exonOverlap = min.exonOverlap, + defaultModels = defaultModels, returnModel = returnModel, verbose = verbose, + processByChromosome = processByChromosome, trackReads = trackReads, fusionMode = fusionMode, + demultiplexed = demultiplexed, cleanReads = cleanReads, dedupUMI = dedupUMI, index = 1, barcodesToFilter = barcodesToFilter)}, + BPPARAM = bpParameters) + } else { # single cell mode + readGrgList <- bplapply(seq_along(reads), function(i) { + bambu.readsByFile(bam.file = reads[i], + genomeSequence = genomeSequence,annotations = annotations, + stranded = stranded, min.readCount = min.readCount, + fitReadClassModel = fitReadClassModel, min.exonOverlap = min.exonOverlap, + defaultModels = defaultModels, returnModel = returnModel, verbose = verbose, + trackReads = trackReads, fusionMode = fusionMode, + demultiplexed = demultiplexed, cleanReads = cleanReads, dedupUMI = dedupUMI, index = i, barcodesToFilter = barcodesToFilter)}, + BPPARAM = bpParameters) + sampleNames <- as.numeric(as.factor(sampleNames)) + for(i in seq_along(readGrgList)){ + if(!isFALSE(demultiplexed)){ + mcols(readGrgList[[i]])$BC <- paste0(names(reads)[i], '_', mcols(readGrgList[[i]])$BC) + } else{ + mcols(readGrgList[[i]])$BC <- sampleNames[i] + } + + mcols(readGrgList[[i]])$BC <- as.factor(mcols(readGrgList[[i]])$BC) + + if(!isFALSE(demultiplexed)){ + mcols(readGrgList[[i]])$sampleID <- as.numeric(mcols(readGrgList[[i]])$BC) + } else { + mcols(readGrgList[[i]])$sampleID <- i + } + } + readGrgList <- do.call(c, readGrgList) + mcols(readGrgList)$id <- seq_along(readGrgList) + readClassList <- constructReadClasses(readGrgList, genomeSequence = genomeSequence,annotations = annotations, + stranded = stranded, min.readCount = min.readCount, + fitReadClassModel = fitReadClassModel, min.exonOverlap = min.exonOverlap, + defaultModels = defaultModels, returnModel = returnModel, verbose = verbose, + processByChromosome = processByChromosome, trackReads = trackReads, fusionMode = fusionMode) + metadata(readClassList)$samples <- names(reads) + metadata(readClassList)$sampleNames <- names(reads) + if(!isFALSE(demultiplexed)) metadata(readClassList)$samples <- levels(mcols(readGrgList)$BC) + readClassList <- list(readClassList) + } + + if (!is.null(readClass.outputDir)) { + for(i in seq_along(readClassList)){ + readClassFile <- "combinedSamples" + if(lowMemory){ + readClassFile <- metadata(readClassList[[i]])$sampleNames + } + readClassFile <- BiocFileCache::bfcnew(BiocFileCache::BiocFileCache( + readClass.outputDir, ask = FALSE), + paste0(readClassFile,"_readClassSe"), ext = ".rds") + saveRDS(readClassList[[i]], file = readClassFile) + readClassList[[i]] <- readClassFile + } + + } + #TODO don't output list, current there because discovery needs it return(readClassList) } @@ -61,13 +126,17 @@ bambu.processReads <- function(reads, annotations, genomeSequence, #' @importFrom GenomeInfoDb seqlevels seqlevels<- keepSeqlevels #' @noRd bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations, - readClass.outputDir = NULL, stranded = FALSE, min.readCount = 2, + yieldSize = NULL, stranded = FALSE, min.readCount = 2, fitReadClassModel = TRUE, min.exonOverlap = 10, defaultModels = NULL, returnModel = FALSE, - verbose = FALSE, lowMemory = FALSE, trackReads = FALSE, fusionMode = FALSE) { + verbose = FALSE, processByChromosome = FALSE, trackReads = FALSE, fusionMode = FALSE, demultiplexed = FALSE, + cleanReads = FALSE, dedupUMI = FALSE, index = 0, barcodesToFilter = NULL) { if(verbose) message(names(bam.file)[1]) - readGrgList <- prepareDataFromBam(bam.file[[1]], verbose = verbose, use.names = trackReads) - warnings = c() - warnings = seqlevelCheckReadsAnnotation(readGrgList, annotations) + readGrgList <- prepareDataFromBam(bam.file[[1]], verbose = verbose, yieldSize = yieldSize, use.names = trackReads, demultiplexed = demultiplexed, cleanReads = cleanReads, dedupUMI = dedupUMI) + if(verbose) message(paste0("Number of alignments/reads: ",length(readGrgList))) + warnings <- c() + if(!is.null(barcodesToFilter) & !isFALSE(demultiplexed)) + readGrgList <- readGrgList[!(mcols(readGrgList)$BC %in% barcodesToFilter)] + warnings <- seqlevelCheckReadsAnnotation(readGrgList, annotations) if(verbose & length(warnings) > 0) warning(paste(warnings,collapse = "\n")) #check seqlevels for consistency, drop ranges not present in genomeSequence refSeqLevels <- seqlevels(genomeSequence) @@ -75,17 +144,17 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations, refSeqLevels <- intersect(refSeqLevels, seqlevels(readGrgList)) if (!all(seqlevels(annotations) %in% refSeqLevels)&(!(length(annotations)==0))) { refSeqLevels <- intersect(refSeqLevels, seqlevels(annotations)) - warningText = paste0("not all chromosomes from annotations present in ", + warningText <- paste0("not all chromosomes from annotations present in ", "reference genome sequence, annotations without reference genomic sequence ", "are dropped") - warnings = c(warnings, warningText) + warnings <- c(warnings, warningText) if(verbose) warning(warningText) annotations <- keepSeqlevels(annotations, value = refSeqLevels, pruning.mode = "coarse") } - warningText = paste0("not all chromosomes from reads present in reference ", + warningText <- paste0("not all chromosomes from reads present in reference ", "genome sequence, reads without reference chromosome sequence are dropped") - warnings = c(warnings, warningText) + warnings <- c(warnings, warningText) if(verbose) warning(warningText) readGrgList <- keepSeqlevels(readGrgList, value = refSeqLevels, pruning.mode = "coarse") @@ -93,48 +162,57 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations, mcols(readGrgList)$id <- seq_along(readGrgList) } #removes reads that are outside genome coordinates - badReads = which(max(end(ranges(readGrgList)))>= + badReads <- which(max(end(ranges(readGrgList)))> seqlengths(genomeSequence)[as.character(getChrFromGrList(readGrgList))]) if(length(badReads) > 0 ){ - readGrgList = readGrgList[-badReads] - warningText = paste0(length(badReads), " reads are mapped outside the provided ", + readGrgList <- readGrgList[-badReads] + warningText <- paste0(length(badReads), " reads are mapped outside the provided ", "genomic regions. These reads will be dropped. Check you are using the ", "same genome used for the alignment") - warnings = c(warnings, warningText) + warnings <- c(warnings, warningText) if(verbose) warning(warningText) } - if(length(readGrgList) == 0) { + if(length(readGrgList) == 0) stop("No reads left after filtering.") + + mcols(readGrgList)$id <- seq_along(readGrgList) + + sampleName <- names(bam.file)[1] + if(!isFALSE(demultiplexed)){ + mcols(readGrgList)$BC <- paste0(sampleName, '_', mcols(readGrgList)$BC) + } else{ + mcols(readGrgList)$BC <- sampleName + } + mcols(readGrgList)$BC <- as.factor(mcols(readGrgList)$BC) + if(!isFALSE(demultiplexed)){ + mcols(readGrgList)$sampleID <- as.numeric(mcols(readGrgList)$BC) + } else { + mcols(readGrgList)$sampleID <- index } + # construct read classes for each chromosome seperately - if(lowMemory) se <- lowMemoryConstructReadClasses(readGrgList, genomeSequence, + if(processByChromosome){ + se <- lowMemoryConstructReadClasses(readGrgList, genomeSequence, annotations, stranded, verbose,bam.file) - else { + } else{ unlisted_junctions <- unlistIntrons(readGrgList, use.ids = TRUE) - if(length(unlisted_junctions)==0){ - warningText = paste0("No aligned spliced reads detected!", - "Bambu expects spliced reads. If this is intended, ", - "see Documentation on how to handle single-exon ", - "transcripts") - warnings = c(warnings, warningText) - if (verbose) warning(warningText) - } uniqueJunctions <- isore.constructJunctionTables(unlisted_junctions, annotations,genomeSequence, stranded = stranded, verbose = verbose) - # create SE object with reconstructed readClasses - se <- isore.constructReadClasses(readGrgList, unlisted_junctions, - uniqueJunctions, runName = names(bam.file)[1], - annotations, stranded, verbose) + se <- isore.constructReadClasses(readGrgList, + unlisted_junctions, uniqueJunctions, runName = "TODO", + annotations, stranded, verbose) + } - metadata(se)$warnings = warnings + + metadata(se)$warnings <- warnings if(trackReads){ - metadata(se)$readNames = names(readGrgList) - metadata(se)$readId = mcols(readGrgList)$id + metadata(se)$readNames <- names(readGrgList) + metadata(se)$readId <- mcols(readGrgList)$id } - rm(readGrgList) + refSeqLevels <- seqlevels(genomeSequence) GenomeInfoDb::seqlevels(se) <- refSeqLevels # create SE object with reconstructed readClasses - se <- scoreReadClasses(se,genomeSequence, annotations, + se <- scoreReadClasses(se, genomeSequence, annotations, defaultModels = defaultModels, fit = fitReadClassModel, returnModel = returnModel, @@ -142,40 +220,157 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations, min.exonOverlap = min.exonOverlap, fusionMode = fusionMode, verbose = verbose) - if (!is.null(readClass.outputDir)) { - readClassFile <- paste0(readClass.outputDir,names(bam.file), - "_readClassSe.rds") - if (file.exists(readClassFile)) { - show(paste(readClassFile, "exists, will be overwritten")) - warning(readClassFile, "exists, will be overwritten") - } else { - readClassFile <- BiocFileCache::bfcnew(BiocFileCache::BiocFileCache( - readClass.outputDir, ask = FALSE), - paste0(names(bam.file),"_readClassSe"), ext = ".rds") + + metadata(se)$samples <- names(bam.file)[1] + metadata(se)$sampleNames <- names(bam.file)[1] + if(!isFALSE(demultiplexed)) metadata(se)$samples <- levels(mcols(readGrgList)$BC) + return(se) +} + +#' Preprocess bam files and save read class files +#' @inheritParams bambu +#' @importFrom GenomeInfoDb seqlevels seqlevels<- keepSeqlevels +#' @noRd +bambu.readsByFile <- function(bam.file, genomeSequence, annotations, + yieldSize = NULL, stranded = FALSE, min.readCount = 2, + fitReadClassModel = TRUE, min.exonOverlap = 10, defaultModels = NULL, returnModel = FALSE, + verbose = FALSE, trackReads = FALSE, fusionMode = FALSE, demultiplexed = FALSE, + cleanReads = TRUE, dedupUMI = FALSE, index = 0, barcodesToFilter = NULL) { + readGrgList <- prepareDataFromBam(bam.file[[1]], verbose = verbose, yieldSize = yieldSize, use.names = trackReads, demultiplexed = demultiplexed, cleanReads = cleanReads, dedupUMI = dedupUMI) + + if(!is.null(barcodesToFilter) & !isFALSE(demultiplexed)) readGrgList <- readGrgList[!mcols(readGrgList)$BC %in% barcodesToFilter] + + if(verbose) message("Number of alignments/reads: ",length(readGrgList)) + + warnings <- c() + warnings <- seqlevelCheckReadsAnnotation(readGrgList, annotations) + + if(verbose & length(warnings) > 0) warning(paste(warnings,collapse = "\n")) + #check seqlevels for consistency, drop ranges not present in genomeSequence + refSeqLevels <- seqlevels(genomeSequence) + if (!all(seqlevels(readGrgList) %in% refSeqLevels)) { + refSeqLevels <- intersect(refSeqLevels, seqlevels(readGrgList)) + if (!all(seqlevels(annotations) %in% refSeqLevels)&(!(length(annotations)==0))) { + refSeqLevels <- intersect(refSeqLevels, seqlevels(annotations)) + warningText <- paste0("not all chromosomes from annotations present in ", + "reference genome sequence, annotations without reference genomic sequence ", + "are dropped") + warnings <- c(warnings, warningText) + if(verbose) warning(warningText) + annotations <- keepSeqlevels(annotations, value = refSeqLevels, + pruning.mode = "coarse") } - saveRDS(se, file = readClassFile) - se <- readClassFile + warningText <- paste0("not all chromosomes from reads present in reference ", + "genome sequence, reads without reference chromosome sequence are dropped") + warnings <- c(warnings, warningText) + if(verbose) warning(warningText) + readGrgList <- keepSeqlevels(readGrgList, value = refSeqLevels, + pruning.mode = "coarse") + # reassign Ids after seqlevels are dropped + mcols(readGrgList)$id <- seq_along(readGrgList) } + #removes reads that are outside genome coordinates + badReads <- which(max(end(ranges(readGrgList)))>= + seqlengths(genomeSequence)[as.character(getChrFromGrList(readGrgList))]) + if(length(badReads) > 0 ){ + readGrgList <- readGrgList[-badReads] + warningText <- paste0(length(badReads), " reads are mapped outside the provided ", + "genomic regions. These reads will be dropped. Check you are using the ", + "same genome used for the alignment") + warnings <- c(warnings, warningText) + if(verbose) warning(warningText) + } + + ### add ### + # reassign Ids after seqlevels are dropped + mcols(readGrgList)$id <- seq_along(readGrgList) + ### add ### + if(verbose) message("Number of post-filter alignments/reads: ",length(readGrgList)) + if(length(readGrgList) == 0) + stop("No reads left after filtering.") + + ## add ### + #if (isTRUE(demultiplexed)){ + # cellBarcodeAssign <- tibble(index = mcols(readGrgList)$id, CB = mcols(readGrgList)$CB) %>% nest(.by = "CB") + + # if (!dir.exists("CB")){ + # dir.create("CB") + # } else{ + # unlink(paste("CB", "*", sep = "/")) + # } + + # invisible(lapply(seq(nrow(cellBarcodeAssign)), + # function(x){saveRDS(readGrgList[pull(cellBarcodeAssign$data[[x]])], paste0("CB/", cellBarcodeAssign$CB[[x]],".rds"))})) + #} + return(readGrgList) +} + +#' Construct read classes +#' @noRd +constructReadClasses <- function(readGrgList, genomeSequence, annotations, + stranded = FALSE, min.readCount = 2, + fitReadClassModel = TRUE, min.exonOverlap = 10, defaultModels = NULL, returnModel = FALSE, + verbose = FALSE, processByChromosome = FALSE, trackReads = FALSE, fusionMode = FALSE){ + warnings <- c() ###TODO + + if(processByChromosome){ + # construct read classes for each chromosome seperately + se <- lowMemoryConstructReadClasses(readGrgList, genomeSequence, + annotations, stranded, verbose,"TODO", fusionMode) + } else{ + unlisted_junctions <- unlistIntrons(readGrgList, use.ids = TRUE) + uniqueJunctions <- isore.constructJunctionTables(unlisted_junctions, + annotations,genomeSequence, stranded = stranded, verbose = verbose) + se <- isore.constructReadClasses(readGrgList, + unlisted_junctions, uniqueJunctions, runName = "TODO", + annotations, stranded, verbose) + + } + metadata(se)$warnings <- warnings + if(trackReads){ + metadata(se)$readNames <- names(readGrgList) + metadata(se)$readId <- mcols(readGrgList)$id + } + rm(readGrgList) + refSeqLevels <- seqlevels(genomeSequence) + GenomeInfoDb::seqlevels(se) <- refSeqLevels + # create SE object with reconstructed readClasses + se <- scoreReadClasses(se, genomeSequence, annotations, + defaultModels = defaultModels, + fit = fitReadClassModel, + returnModel = returnModel, + min.readCount = min.readCount, + min.exonOverlap = min.exonOverlap, + fusionMode = fusionMode, + verbose = verbose) return(se) } + +#' Low memory mode for construct read classes (processByChromosome) +#' @noRd lowMemoryConstructReadClasses <- function(readGrgList, genomeSequence, - annotations, stranded, verbose,bam.file){ - readGrgList = split(readGrgList, getChrFromGrList(readGrgList)) - se = lapply(names(readGrgList),FUN = function(i){ + annotations, stranded, verbose,bam.file, fusionMode = FALSE){ + if(fusionMode){ + readGrgList <- list(readGrgList) + names(readGrgList) <- c("fusion") + } else{ + readGrgList <- split(readGrgList, getChrFromGrList(readGrgList)) + } + se <- lapply(names(readGrgList),FUN = function(i){ if(length(readGrgList[[i]]) == 0) return(NULL) # create error and strand corrected junction tables unlisted_junctions <- unlistIntrons(readGrgList[[i]], use.ids = TRUE) uniqueJunctions <- isore.constructJunctionTables(unlisted_junctions, annotations,genomeSequence, stranded = stranded, verbose = verbose) se.temp <- isore.constructReadClasses(readGrgList[[i]], - unlisted_junctions, uniqueJunctions, runName = names(bam.file)[1], + unlisted_junctions, uniqueJunctions, runName = "TODO", annotations, stranded, verbose) return(se.temp) }) - se = se[!sapply(se, FUN = is.null)] - se = do.call("rbind",se) - rownames(se) = paste("rc", seq_len(nrow(se)), sep = ".") + se <- se[!sapply(se, FUN = is.null)] + se <- do.call("rbind",se) + rownames(se) <- paste("rc", seq_len(nrow(se)), sep = ".") return(se) } @@ -183,15 +378,79 @@ lowMemoryConstructReadClasses <- function(readGrgList, genomeSequence, #' @importFrom GenomeInfoDb seqlevels #' @noRd seqlevelCheckReadsAnnotation <- function(reads, annotations){ - warnings = c() + warnings <- c() if (length(intersect(seqlevels(reads), seqlevels(annotations))) == 0) - warnings = c(warnings, paste0("no annotations with matching seqlevel styles, ", + warnings <- c(warnings, paste0("no annotations with matching seqlevel styles, ", "all missing chromosomes will use de-novo annotations")) if (!all(seqlevels(reads) %in% seqlevels(annotations))) - warnings = c(warnings, paste0("not all chromosomes present in reference annotations, ", + warnings <- c(warnings, paste0("not all chromosomes present in reference annotations, ", "annotations might be incomplete. Please compare objects ", "on the same reference")) return(warnings) -} \ No newline at end of file +} + + +#' Split read class files +#' @importFrom dplyr Matrix +#' @noRd +splitReadClassFiles = function(readClassFile){ + distTable <- metadata(metadata(readClassFile)$readClassDist)$distTable + eqClasses <- distTable %>% group_by(eqClassById) %>% + distinct(eqClassById, readCount,GENEID, totalWidth, firstExonWidth, .keep_all = TRUE) + eqClasses$sampleIDs <- rowData(readClassFile)$sampleIDs[match(eqClasses$readClassId, rownames(readClassFile))] + eqClasses <- eqClasses %>% summarise(nobs = sum(readCount), + sampleIDs = list(unlist(sampleIDs))) + counts.table <- tableFunction(eqClasses$sampleIDs) + counts <- sparseMatrix( + i = rep(seq_along(counts.table), lengths(counts.table)), + j = as.numeric(names(unlist(counts.table))), + x = unlist(counts.table), + dims = c(nrow(eqClasses), length(metadata(readClassFile)$samples))) + #incompatible counts + distTable <- metadata(metadata(readClassFile)$readClassDist)$distTable.incompatible + if(nrow(distTable)==0) { + counts.incompatible <- sparseMatrix(i= 1, j = 1, x = 0, + dims = c(1, length(metadata(readClassFile)$samples))) + rownames(counts.incompatible) <- "TODO" + } else{ + distTable$sampleIDs <- rowData(readClassFile)$sampleIDs[match(distTable$readClassId, rownames(readClassFile))] + distTable <- distTable %>% group_by(GENEID.i) %>% summarise(counts = sum(readCount), + sampleIDs = list(unlist(sampleIDs))) + counts.table <- lapply(distTable$sampleIDs, FUN = function(x){table(x)}) + counts.incompatible <- sparseMatrix( + i = rep(seq_along(counts.table), lengths(counts.table)), + j = as.numeric(names(unlist(counts.table))), + x = unlist(counts.table), + dims = c(nrow(distTable), length(metadata(readClassFile)$samples))) + colnames(counts.incompatible) <- metadata(readClassFile)$samples + rownames(counts.incompatible) <- distTable$GENEID.i + } + colnames(counts) <- metadata(readClassFile)$samples + metadata(readClassFile)$eqClassById <- eqClasses$eqClassById + #rownames(counts) = eqClasses$eqClassById + metadata(readClassFile)$countMatrix <- counts + metadata(readClassFile)$incompatibleCountMatrix <- counts.incompatible + return(readClassFile) +} + + +#' Split read class files by RC +#' @importFrom Matrix +#' @noRd +splitReadClassFilesByRC <- function(readClassFile){ + counts.table <- tableFunction(rowData(readClassFile)$sampleIDs) + counts <- sparseMatrix( + i = rep(seq_along(counts.table), lengths(counts.table)), + j = as.numeric(names(unlist(counts.table))), + x = unlist(counts.table), + dims = c(nrow(readClassFile), length(metadata(readClassFile)$samples))) + return(counts) +} + +#' table sample IDs list column +#' @noRd +tableFunction <- function(xList){ + return(lapply(xList, function(x) table(x))) +} diff --git a/R/bambu-processReads_scoreReadClasses.R b/R/bambu-processReads_scoreReadClasses.R index bbb2a75f..44045f2f 100644 --- a/R/bambu-processReads_scoreReadClasses.R +++ b/R/bambu-processReads_scoreReadClasses.R @@ -7,6 +7,7 @@ scoreReadClasses = function(se, genomeSequence, annotations, defaultModels, fit = TRUE, returnModel = FALSE, min.readCount = 2, min.exonOverlap = 10, fusionMode = FALSE, verbose = FALSE){ + message(paste0("Number of Read Classes - ", nrow(se))) start.ptm <- proc.time() options(scipen = 999) #maintain numeric basepair locations not sci.notfi. geneIds = assignGeneIds(rowRanges(se), annotations, min.exonOverlap, fusionMode) @@ -196,6 +197,7 @@ getTranscriptScore = function(rowData, model = NULL, defaultModels){ #' @return It returns a model object to use in \link{bambu} #' @export trainBambu <- function(rcFile = NULL, min.readCount = 2, nrounds = 50, NDR.threshold = 0.1, verbose = TRUE) { + if(verbose) message(paste0("Read Classes used for training", nrow(rowData))) rowData = rowData(rcFile)[which(rowData(rcFile)$readCount>=min.readCount),] txFeatures = prepareTranscriptModelFeatures(rowData) features = dplyr::select(txFeatures,!c(labels)) diff --git a/R/bambu-processReads_utilityConstructReadClasses.R b/R/bambu-processReads_utilityConstructReadClasses.R index 9e694ea7..fec9adbf 100644 --- a/R/bambu-processReads_utilityConstructReadClasses.R +++ b/R/bambu-processReads_utilityConstructReadClasses.R @@ -16,6 +16,8 @@ isore.constructReadClasses <- function(readGrgList, unlisted_junctions, use.names = FALSE) mcols(reads.singleExon)$id <- mcols(readGrgList[ elementNROWS(readGrgList) == 1])$id + mcols(reads.singleExon)$sampleID <- mcols(readGrgList[ + elementNROWS(readGrgList) == 1])$sampleID #only keep multi exons reads in readGrgList readGrgList <- readGrgList[elementNROWS(readGrgList) > 1] if (!identical(mcols(readGrgList)$id,unique(mcols(unlisted_junctions)$id))) @@ -94,7 +96,7 @@ constructSplicedReadClasses <- function(uniqueJunctions, unlisted_junctions, readTable <- readTable %>% dplyr::select(chr.rc = chr, strand.rc = strand, startSD = startSD, endSD = endSD, readCount.posStrand = readCount.posStrand, intronStarts, intronEnds, - confidenceType, readCount, readIds) + confidenceType, readCount, readIds, sampleIDs) mcols(exonsByReadClass) <- readTable options(scipen = 0) return(exonsByReadClass) @@ -176,7 +178,8 @@ createReadTable <- function(unlisted_junctions_start, unlisted_junctions_end, end = pmax(end(readRanges), intronEndCoordinatesInt), strand = readStrand, confidenceType = readConfidence, alignmentStrand = as.character(getStrandFromGrList(readGrgList))=='+', - readId = mcols(readGrgList)$id) + readId = mcols(readGrgList)$id, + sampleID = mcols(readGrgList)$sampleID) rm(readRanges, readStrand, unlisted_junctions_start, unlisted_junctions_end, unlisted_junctions_id, readConfidence, intronStartCoordinatesInt, intronEndCoordinatesInt) @@ -186,7 +189,8 @@ createReadTable <- function(unlisted_junctions_start, unlisted_junctions_end, summarise(readCount = n(), startSD = sd(start), endSD = sd(end), start = nth(x = start, n = ceiling(readCount / 5), order_by = start), end = nth(x = end, n = ceiling(readCount / 1.25), order_by = end), - readCount.posStrand = sum(alignmentStrand, na.rm = TRUE), readIds = list(readId), + readCount.posStrand = sum(alignmentStrand, na.rm = TRUE), + readIds = list(readId), sampleIDs = list(sampleID), .groups = 'drop') %>% arrange(chr, start, end) %>% mutate(readClassId = paste("rc", row_number(), sep = ".")) @@ -244,14 +248,14 @@ constructUnsplicedReadClasses <- function(reads.singleExon, annotations, # by their minimum read class coordinates #remove duplicate ranges counts = as.data.frame(reads.singleExon) %>% - mutate(id = mcols(reads.singleExon)$id) %>% + mutate(id = mcols(reads.singleExon)$id, + sampleID = mcols(reads.singleExon)$sampleID) %>% group_by(seqnames,start,end,strand) %>% - mutate(n=n(), id = list(id)) %>% # change summarise to mutate as summarise will reorder the table + mutate(counts=n(), id = list(id), sampleID = list(sampleID)) %>% ungroup() %>% as.data.frame() - mcols(reads.singleExon)$counts <- counts$n - mcols(reads.singleExon)$id <- counts$id - reads.singleExon = unique(reads.singleExon) + reads.singleExon = GRanges(counts) + reads.singleExon = unique(reads.singleExon) rcUnsplicedAnnotation <- getUnsplicedReadClassByReference( granges = reads.singleExon, grangesReference = referenceExons, @@ -310,17 +314,18 @@ getUnsplicedReadClassByReference <- function(granges, grangesReference, readStart = start(granges)[queryHits], readEnd = end(granges)[queryHits], counts = mcols(granges)$counts[queryHits], - readId = mcols(granges[queryHits])$id) + readId = mcols(granges[queryHits])$id, + sampleID = mcols(granges[queryHits])$sampleID) hitsDF <- hitsDF %>% dplyr::select(chr, start, end, readStart, readEnd, strand, readClassId, alignmentStrand, - counts, readId) %>% + counts, readId, sampleID) %>% group_by(readClassId) %>% summarise(start = start[1], end = end[1], strand = strand[1], chr = chr[1], readCount = sum(counts), startSD = sd(rep(readStart,counts)), endSD = sd(rep(readEnd,counts)), readCount.posStrand = sum(rep(alignmentStrand,counts)), - readIds = list(unlist(readId))) %>% + readIds = list(unlist(readId)), sampleIDs = list(unlist(sampleID))) %>% mutate(confidenceType = confidenceType, intronStarts = NA, intronEnds = NA) if(nrow(hitsDF)==0){ @@ -339,7 +344,7 @@ getUnsplicedReadClassByReference <- function(granges, grangesReference, hitsDF <- dplyr::select(hitsDF, chr.rc = chr, strand.rc = strand, intronStarts, intronEnds, confidenceType, readCount, startSD, endSD, - readCount.posStrand, readIds) + readCount.posStrand, readIds, sampleIDs) mcols(exByReadClassUnspliced) <- hitsDF return(exByReadClassUnspliced) } @@ -383,7 +388,7 @@ assignGeneIds <- function(grl, annotations, min.exonOverlap = 10, fusionMode = } mcols(grl)$GENEID[strandedRanges] <- assignGeneIdsByReference(grl[strandedRanges], annotations, min.exonOverlap = min.exonOverlap, - fusionMode = fusionMode) + fusionMode = fusionMode) #iteratively assign gene ids for stranded granges newGeneSet <- is.na(mcols(grl)$GENEID) & strandedRanges if(sum(newGeneSet != 0)){ @@ -409,13 +414,13 @@ assignGeneIds <- function(grl, annotations, min.exonOverlap = 10, fusionMode = mcols(grl)$GENEID[!strandedRanges] <- assignGeneIdsByReference(grl[!strandedRanges], grl[!is.na(mcols(grl)$GENEID)], min.exonOverlap = min.exonOverlap, - fusionMode = FALSE) + fusionMode = FALSE) if(any(is.na(mcols(grl)$GENEID)) & length(annotations)>0) { newGeneSet <- is.na(mcols(grl)$GENEID) mcols(grl)$GENEID[newGeneSet] <- assignGeneIdsByReference(grl[newGeneSet], annotations, min.exonOverlap = min.exonOverlap, - fusionMode = FALSE) + fusionMode = FALSE) } if(any(is.na(mcols(grl)$GENEID))) { @@ -443,14 +448,14 @@ assignGeneIdsByReference <- function(grl, annotations, min.exonOverlap = 10, uniqueHits <- which(queryHits(ov) %in% which(countQueryHits(ov)==1)) geneIds[queryHits(ov)[uniqueHits]] <- names(geneRanges)[subjectHits(ov)[uniqueHits]] - if(length(ov)>0){ + if(length(ov)>0){ ## next for non unique hits select one gene (maximum overlap) multiHits <- which(queryHits(ov) %in% which(countQueryHits(ov)>1)) rangeIntersect= intersect(ranges(grl[queryHits(ov)[multiHits]]), ranges(geneRanges[subjectHits(ov)[multiHits]])) filteredMultiHits = data.frame(queryHits = queryHits(ov)[multiHits], intersectWidth = sum(width(rangeIntersect)), - subjectHits = subjectHits(ov)[multiHits]) + subjectHits = subjectHits(ov)[multiHits]) if(fusionMode) { filteredMultiHits <- filteredMultiHits %>% filter(intersectWidth>min.exonOverlap) %>% @@ -458,16 +463,56 @@ assignGeneIdsByReference <- function(grl, annotations, min.exonOverlap = 10, group_by(queryHits) %>% summarise(geneid = paste(geneid, collapse=':')) geneIds[filteredMultiHits$queryHits] <- filteredMultiHits$geneid } else { - filteredMultiHits <- filteredMultiHits %>% - group_by(queryHits) %>% arrange(desc(intersectWidth)) %>% - dplyr::slice(1) - geneIds[filteredMultiHits$queryHits] <- - names(geneRanges)[filteredMultiHits$subjectHits] + if(nrow(filteredMultiHits) > 0){ + filteredMultiHits <- filteredMultiHits %>% + group_by(queryHits) %>% summarise(subjectHits = subjectHits[which.max(intersectWidth)], + intersectWidth = max(intersectWidth)) + geneIds[filteredMultiHits$queryHits] <- + names(geneRanges)[filteredMultiHits$subjectHits] + } } } return(geneIds) } +# assignGeneIdsByReference <- function(grl, annotations, min.exonOverlap = 10, +# fusionMode=FALSE, prefix = 'Bambu') { +# # (1) assign gene Ids based on first intron match to annotations +# geneRanges <- reducedRangesByGenes(annotations) +# ov=findOverlaps(grl, geneRanges, minoverlap = min.exonOverlap) +# geneIds <- rep(NA, length(grl)) +# uniqueHits <- which(queryHits(ov) %in% which(countQueryHits(ov)==1)) +# geneIds[queryHits(ov)[uniqueHits]] <- +# names(geneRanges)[subjectHits(ov)[uniqueHits]] + +# ## next for non unique hits select one gene (maximum overlap) +# multiHits <- which(queryHits(ov) %in% which(countQueryHits(ov)>1)) +# expandedRanges <- expandRangesList(ranges(grl[queryHits(ov)[multiHits]]), +# ranges(geneRanges[subjectHits(ov)[multiHits]])) +# rangeIntersect <- pintersect(expandedRanges, +# mcols(expandedRanges)$matchRng, resolve.empty = 'start.x') +# intersectById <- tapply(width(rangeIntersect), +# mcols(expandedRanges)$IdMap, sum) + +# filteredMultiHits <- as_tibble(ov[multiHits]) %>% +# mutate(intersectWidth = intersectById) +# if(fusionMode) { +# filteredMultiHits <- filteredMultiHits %>% +# filter(intersectWidth>min.exonOverlap) %>% +# mutate(geneid = names(geneRanges)[subjectHits]) %>% distinct() %>% +# group_by(queryHits) %>% summarise(geneid = paste(geneid, collapse=':')) +# geneIds[filteredMultiHits$queryHits] <- filteredMultiHits$geneid + +# } else { +# filteredMultiHits <- filteredMultiHits %>% +# group_by(queryHits) %>% arrange(desc(intersectWidth)) %>% +# dplyr::slice(1) +# geneIds[filteredMultiHits$queryHits] <- +# names(geneRanges)[filteredMultiHits$subjectHits] +# } +# return(geneIds) +# } + #' Create new gene ids for groups of overlapping read classes which #' don't overlap with known annotations. #' @param grl a GrangesList object with read classes @@ -539,7 +584,11 @@ assignGeneIdsNonAssigned = function(geneTxMap, exonTxMap, geneExonMap, dplyr::select(newGeneId, newExonId) %>% distinct() } # combined gene ids - refGeneTxMapMins = refGeneTxMap %>% group_by(newTxId) %>% filter(n() > 1) %>% filter(newGeneId == min(newGeneId)) %>% ungroup() + refGeneTxMap.tmp = refGeneTxMap %>% group_by(newTxId) %>% filter(n() > 1) + if(nrow(refGeneTxMap.tmp) == 0){refGeneTxMapMins = refGeneTxMap[0,] + } else{ + refGeneTxMapMins = refGeneTxMap.tmp %>% filter(newGeneId == min(newGeneId)) %>% ungroup() + } refGeneTxMapNotMins = refGeneTxMap %>% group_by(newTxId) %>% filter(newGeneId != min(newGeneId)) %>% ungroup() geneGeneMap <- left_join(refGeneTxMapMins, dplyr::rename(refGeneTxMapNotMins, newGeneId.merge=newGeneId), by = "newTxId") %>% diff --git a/R/bambu-processReads_utilityCreateJunctionTables.R b/R/bambu-processReads_utilityCreateJunctionTables.R index 1c3b4dd0..44fefe69 100644 --- a/R/bambu-processReads_utilityCreateJunctionTables.R +++ b/R/bambu-processReads_utilityCreateJunctionTables.R @@ -9,7 +9,7 @@ #' @importFrom dplyr tibble %>% mutate select #' @noRd isore.constructJunctionTables <- function(unlisted_junctions, annotations, - genomeSequence, stranded = FALSE, verbose = FALSE) { + genomeSequence, stranded = FALSE, verbose = FALSE, returnModel = FALSE) { start.ptm <- proc.time() if(length(unlisted_junctions)==0) return(NULL) #summarise junction counts and strand for all reads @@ -35,7 +35,7 @@ isore.constructJunctionTables <- function(unlisted_junctions, annotations, junctionEndName, startScore, endScore, id, annotatedJunction, annotatedStart, annotatedEnd) # correct junction coordinates using logistic regression classifier - uniqueJunctions <- junctionErrorCorrection(uniqueJunctions, verbose) + uniqueJunctions <- junctionErrorCorrection(uniqueJunctions, verbose, returnModel) return(uniqueJunctions) } diff --git a/R/bambu-processReads_utilityJunctionErrorCorrection.R b/R/bambu-processReads_utilityJunctionErrorCorrection.R index c2683cdc..9b09b50c 100644 --- a/R/bambu-processReads_utilityJunctionErrorCorrection.R +++ b/R/bambu-processReads_utilityJunctionErrorCorrection.R @@ -2,7 +2,7 @@ #' @param uniqueJunctions uniqueJunctions #' @param verbose verbose #' @noRd -junctionErrorCorrection <- function(uniqueJunctions, verbose) { +junctionErrorCorrection <- function(uniqueJunctions, verbose, returnModel = TRUE) { start.ptm <- proc.time() if (sum(uniqueJunctions$annotatedJunction) > 5000 & sum(!uniqueJunctions$annotatedJunction) > 4000) { @@ -37,6 +37,7 @@ junctionErrorCorrection <- function(uniqueJunctions, verbose) { if (verbose) message("Finished correcting junction based on set of high confidence ", "junctions in ", round((end.ptm - start.ptm)[3] / 60, 1), " mins.") + if(returnModel) {metadata(uniqueJunctions)$junctionModel = junctionModel} return(uniqueJunctions) } diff --git a/R/bambu-quantify.R b/R/bambu-quantify.R index fa5a079d..68b41395 100644 --- a/R/bambu-quantify.R +++ b/R/bambu-quantify.R @@ -2,50 +2,28 @@ #' @inheritParams bambu #' @import data.table #' @noRd -bambu.quantify <- function(readClass, annotations, emParameters, +bambu.quantify <- function(readClassDt, countMatrix, incompatibleCountMatrix, txid.index, GENEIDs, emParameters, trackReads = FALSE, returnDistTable = FALSE, verbose = FALSE, isoreParameters = setIsoreParameters(NULL)) { - min.exonDistance = isoreParameters[["min.exonDistance"]] - min.primarySecondaryDist = - isoreParameters[['min.primarySecondaryDist']] - min.primarySecondaryDistStartEnd = - isoreParameters[['min.primarySecondaryDistStartEnd2']] - if (is.character(readClass)) readClass <- readRDS(file = readClass) - readClassDist <- isore.estimateDistanceToAnnotations(readClass, annotations, - min.exonDistance = min.exonDistance, - min.primarySecondaryDist = min.primarySecondaryDist, - min.primarySecondaryDistStartEnd = min.primarySecondaryDistStartEnd, - verbose = verbose) - metadata(readClassDist)$distTable <- modifyIncompatibleAssignment(metadata(readClassDist)$distTable) - incompatibleCounts <- processIncompatibleCounts(readClassDist) - readClassDt <- genEquiRCs(readClassDist, annotations, verbose) + start.ptm <- proc.time() + readClassDt$nobs = countMatrix[readClassDt$eqClass.match] + readClassDt$nobs[is.na(readClassDt$nobs)] = 0 compatibleCounts <- bambu.quantDT(readClassDt, emParameters = emParameters,verbose = verbose) - incompatibleCounts <- incompatibleCounts[data.table(GENEID = unique(mcols(annotations)$GENEID)), on = "GENEID"] + incompatibleCounts <- incompatibleCountMatrix[data.table(GENEID.i = GENEIDs), on = "GENEID.i"] incompatibleCounts[is.na(counts), counts := 0] compatibleCounts <- calculateCPM(compatibleCounts, incompatibleCounts) - setnames(incompatibleCounts, "counts", colnames(readClass)) - counts <- compatibleCounts[match(mcols(annotations)$txid, txid)] - colNameRC <- colnames(readClass) - colDataRC <- colData(readClass) + counts <- compatibleCounts[match(txid.index, txid)] sig.digit <- emParameters[["sig.digit"]] - seOutput <- SummarizedExperiment( - assays = SimpleList(counts = matrix(round(counts$counts,sig.digit), ncol = 1, - dimnames = list(NULL, colNameRC)), CPM = matrix(round(counts$CPM,sig.digit), - ncol = 1, dimnames = list(NULL, colNameRC)), - fullLengthCounts = matrix(round(counts$fullLengthCounts,sig.digit), ncol = 1, - dimnames = list(NULL, colNameRC)), - uniqueCounts = matrix(counts$uniqueCounts, - ncol = 1, dimnames = list(NULL, colNameRC))), colData = colDataRC) - metadata(seOutput)$incompatibleCounts = incompatibleCounts - if (returnDistTable) metadata(seOutput)$distTable = metadata(readClassDist)$distTable - if (trackReads) metadata(seOutput)$readToTranscriptMap = - generateReadToTranscriptMap(readClass, metadata(readClassDist)$distTable, - annotations) + seOutput <- list(incompatibleCounts = as(incompatibleCounts$counts, "sparseVector"), + counts = as(round(counts$counts,sig.digit), "sparseVector"), + CPM = as(round(counts$CPM,sig.digit), "sparseVector"), + fullLengthCounts = as(round(counts$fullLengthCounts,sig.digit), "sparseVector"), + uniqueCounts = as(round(counts$uniqueCounts,sig.digit), "sparseVector")) + end.ptm <- proc.time() + # if (verbose) message("bambu.quantify ", round((end.ptm - start.ptm)[3] / 60, 3), " mins.") return(seOutput) } - - #' Process data.table object #' @param readClassDt A data.table object #' @inheritParams bambu @@ -62,14 +40,15 @@ bambu.quantDT <- function(readClassDt = readClassDt, readClassDt <- split(readClassDt, by = "gene_grp_id") start.ptm <- proc.time() outEst <- abundance_quantification(inputRcDt, readClassDt, - maxiter = emParameters[["maxiter"]], - conv = emParameters[["conv"]], minvalue = emParameters[["minvalue"]]) + maxiter = emParameters[["maxiter"]], + conv = emParameters[["conv"]], minvalue = emParameters[["minvalue"]]) end.ptm <- proc.time() - if (verbose) message("Finished EM estimation in ", - round((end.ptm - start.ptm)[3] / 60, 1), " mins.") + # if (verbose) message("Finished EM estimation in ", + # round((end.ptm - start.ptm)[3] / 60, 1), " mins.") outEst <- modifyQuantOut(outEst,outIni) theta_est <- rbind(rcPreOut[[2]],outEst) theta_est <- removeDuplicates(theta_est) + end.ptm <- proc.time() return(theta_est) } diff --git a/R/bambu-quantify_utilityFunctions.R b/R/bambu-quantify_utilityFunctions.R index e8ee2e56..936763f1 100644 --- a/R/bambu-quantify_utilityFunctions.R +++ b/R/bambu-quantify_utilityFunctions.R @@ -19,22 +19,16 @@ modifyIncompatibleAssignment <- function(distTable){ #' Process incompatible counts #' @noRd -processIncompatibleCounts <- function(readClassDist){ - distTable <- unique(data.table(as.data.frame(metadata(readClassDist)$distTable))[, - .(readClassId, annotationTxId, readCount, GENEID, equal)], by = NULL) - distTableIncompatible <- distTable[grep("unidentified", annotationTxId)] - # filter out multiple geneIDs mapped to the same readClass using rowData(se) - geneRCMap <- as.data.table(as.data.frame(rowData(readClassDist)), - keep.rownames = TRUE) - setnames(geneRCMap, old = c("rn", "geneId"), - new = c("readClassId", "GENEID")) - distTable <- distTable[geneRCMap[ readClassId %in% - unique(distTableIncompatible$readClassId), .(readClassId, GENEID)], - on = c("readClassId", "GENEID")] - distTable[, readCount := sum(readCount), by = GENEID] - counts <- unique(distTable[,.(GENEID, readCount)]) - setnames(counts, "readCount", "counts") - return(counts) +processIncompatibleCounts <- function(distTable){ + distTable <- data.table(as.data.frame(distTable))[, + .(readClassId, annotationTxId, readCount, GENEID, GENEID.match, GENEID.i, dist,equal)] + distTable <- distTable[grep("unidentified", annotationTxId)] + # filter out multiple geneIDs mapped to the same readClass using rowData(se) + distTable[GENEID.match==TRUE,] + distTable[, readCount := sum(readCount), by = GENEID] + counts <- unique(distTable[,.(GENEID, GENEID.i, readCount)]) + setnames(counts, "readCount", "counts") + return(counts) } @@ -44,21 +38,21 @@ processIncompatibleCounts <- function(readClassDist){ #' @import data.table #' @noRd genEquiRCs <- function(readClassDist, annotations, verbose){ - distTable <- genEquiRCsBasedOnObservedReads(readClassDist) - eqClassCount <- getUniCountPerEquiRC(distTable) + eqClassCount <- getUniCountPerEquiRC(metadata(readClassDist)$distTable) eqClassTable <- addEmptyRC(eqClassCount, annotations) # create equiRC id eqClassTable <- eqClassTable %>% group_by(eqClassById) %>% mutate(eqClassId = cur_group_id()) %>% data.table() - + tx_len <- rbind(data.table(txid = mcols(annotations)$txid, txlen = sum(width(annotations)))) eqClassTable <- tx_len[eqClassTable, on = "txid"] %>% distinct() - + # remove unused columns - eqClassTable[, eqClassById := NULL] + #eqClassTable[, eqClassById := NULL] + return(eqClassTable) } @@ -74,7 +68,7 @@ genEquiRCsBasedOnObservedReads <- function(readClass){ width(unlisted_rowranges[unlisted_rowranges$exon_rank == 1,]), totalWidth = sum(width(rowRanges(readClass)))) distTable <- data.table(as.data.frame(metadata(readClass)$distTable))[!grepl("unidentified", annotationTxId), .(readClassId, - annotationTxId, readCount, GENEID, dist,equal,txid)] + annotationTxId, readCount, GENEID, dist,equal, compatible, txid)] distTable <- rcWidth[distTable, on = "readClassId"] # filter out multiple geneIDs mapped to the same readClass using rowData(se) compatibleData <- as.data.table(as.data.frame(rowData(readClass)), @@ -194,27 +188,28 @@ createEqClassToTxMapping <- function(eqClassTable){ #' Add A matrix for total, full-length, unique #' @noRd addAval <- function(readClassDt, emParameters, verbose){ - if (is.null(readClassDt)) { - stop("Input object is missing.") - } else if (any(!(c("GENEID", "txid", "eqClassId","nobs") %in% - colnames(readClassDt)))) { - stop("Columns GENEID, txid, eqClassId, nobs, - are missing from object.") - } +# if (is.null(readClassDt)) { +# stop("Input object is missing.") +# } else if (any(!(c("GENEID", "txid", "eqClassId","nobs") %in% +# colnames(readClassDt)))) { +# stop("Columns GENEID, txid, eqClassId, nobs, +# are missing from object.") +# } ## ----step2: match to simple numbers to increase claculation efficiency - readClassDt <- simplifyNames(readClassDt) + #readClassDt <- simplifyNames(readClassDt) d_mode <- emParameters[["degradationBias"]] start.ptm <- proc.time() if (d_mode) { d_rateOut <- calculateDegradationRate(readClassDt) + readClassDt <- modifyAvaluewithDegradation_rate(readClassDt, + d_rateOut[1], d_mode = d_mode) }else{ d_rateOut <- rep(NA,2) + readClassDt$aval = 1 } end.ptm <- proc.time() if (verbose) message("Finished estimate degradation bias in ", - round((end.ptm - start.ptm)[3] / 60, 1), " mins.") - readClassDt <- modifyAvaluewithDegradation_rate(readClassDt, - d_rateOut[1], d_mode = d_mode) + round((end.ptm - start.ptm)[3] / 60, 3), " mins.") removeList <- removeUnObservedGenes(readClassDt) readClassDt <- removeList[[1]] # keep only observed genes for estimation outList <- removeList[[2]] #for unobserved genes, set estimates to 0 @@ -224,8 +219,12 @@ addAval <- function(readClassDt, emParameters, verbose){ mutate(K = sum(nobs), n.obs=nobs/K, nobs = NULL) %>% ## check if this is unique by eqClassId ungroup() %>% distinct() %>% - right_join(readClassDt, by = c("gene_sid","eqClassId")) %>% - data.table() + right_join(readClassDt, by = c("gene_sid","eqClassId")) +# readClassDt_withGeneCount <- readClassDt %>% +# group_by(gene_sid, eqClassId, nobs) %>% mutate(n = n(), n = replace(n,1,1)) %>% +# ungroup() %>% group_by(gene_sid) %>% +# mutate(K = sum(nobs), n.obs=(nobs)/(K)) %>% +# ungroup() return(list(readClassDt_withGeneCount,outList)) } @@ -258,8 +257,8 @@ calculateDegradationRate <- function(readClassDt){ geneCountLength[, d_rate := dObs/nobs] if (length(which(geneCountLength$nobs >= 30 & ((geneCountLength$nobs - geneCountLength$dObs) >= 5))) == 0) { - message("There is not enough read count and full length coverage! - Hence degradation rate is estimated using all data!") + # message("There is not enough read count and full length coverage! + # Hence degradation rate is estimated using all data!") } else { geneCountLength <- geneCountLength[nobs >= 30 & ((nobs - dObs) >= 5)] } @@ -283,6 +282,7 @@ modifyAvaluewithDegradation_rate <- function(tmp, d_rate, d_mode){ tmp[which(multi_align) , aval := ifelse(equal, 1 - sum(.SD[which(!equal)]$rcWidth*d_rate/1000), rcWidth*d_rate/1000), by = list(gene_sid,txid)] + if(is.na(d_rate)) d_rate = 0 if (d_rate == 0) { tmp[, par_status := all(!equal & multi_align), by = list(eqClassId, gene_sid)] @@ -299,7 +299,7 @@ modifyAvaluewithDegradation_rate <- function(tmp, d_rate, d_mode){ #' @import data.table #' @noRd removeUnObservedGenes <- function(readClassDt){ - uoGenes <- unique(readClassDt[,.I[sum(nobs) == 0], by = gene_sid]$gene_sid) + uoGenes <- unique(readClassDt[,.I[sum(nobs) == 0], by = gene_sid]$gene_sid) if (length(uoGenes) > 0) { uo_txGeneDt <- unique(readClassDt[(gene_sid %in% uoGenes),.(txid,gene_sid)]) @@ -461,32 +461,57 @@ removeDuplicates <- function(counts){ #' Generate read to transcript mapping #' @noRd generateReadToTranscriptMap <- function(readClass, distTable, annotations){ - if(!is.null(metadata(readClass)$readNames)) { - read_id = metadata(readClass)$readNames} - else { read_id = metadata(readClass)$readId} + if(!is.null(metadata(readClass)$readNames)) { + read_id <- metadata(readClass)$readNames + } else { + read_id <- metadata(readClass)$readId + } #unpack and reverse the read class to read id relationship - readOrder = order(unlist(rowData(readClass)$readIds)) - lens = lengths(rowData(readClass)$readIds) - rcIndex = seq_along(readClass) - readToRC = rep(rcIndex, lens)[readOrder] - read_id = read_id[(match(unlist(rowData(readClass)$readIds)[readOrder], metadata(readClass)$readId))] + readOrder <- order(unlist(rowData(readClass)$readIds)) + lens <- lengths(rowData(readClass)$readIds) + rcIndex <- seq_along(readClass) + readToRC <- rep(rcIndex, lens)[readOrder] + read_id <- read_id[(match(unlist(rowData(readClass)$readIds)[readOrder], + metadata(readClass)$readId))] #get annotation indexs - distTable$annotationTxId = match(distTable$annotationTxId, names(annotations)) + distTable <- metadata(distTable)$distTable + distTable$annotationTxId <- match(distTable$annotationTxId, names(annotations)) #match read classes with transcripts - readClass_id = rownames(readClass)[readToRC] - distTable$exClassById = NULL - equalMatches = as_tibble(distTable) %>% + readClass_id <- rownames(readClass)[readToRC] + distTable$exClassById <- NULL + equalMatches <- as_tibble(distTable) %>% filter(equal) %>% - group_by(readClassId) %>% summarise(annotationTxIds = list(annotationTxId)) - equalMatches = equalMatches$annotationTxIds[match(readClass_id, equalMatches$readClassId)] - compatibleMatches = as_tibble(distTable) %>% + group_by(readClassId) %>% + summarise(annotationTxIds = list(annotationTxId)) + equalMatches <- equalMatches$annotationTxIds[match(readClass_id, + equalMatches$readClassId)] + compatibleMatches <- as_tibble(distTable) %>% filter(!equal & compatible) %>% group_by(readClassId) %>% summarise(annotationTxIds = list(annotationTxId)) - compatibleMatches = compatibleMatches$annotationTxIds[match(readClass_id, compatibleMatches$readClassId)] - readToTranscriptMap = tibble(readId=read_id, equalMatches = equalMatches, compatibleMatches = compatibleMatches) + compatibleMatches <- compatibleMatches$annotationTxIds[match(readClass_id, + compatibleMatches$readClassId)] + readToTranscriptMap <- tibble(readId=read_id, equalMatches = equalMatches, + compatibleMatches = compatibleMatches) return(readToTranscriptMap) } +#' Get counts of equivilent classes from a distTable and match to a readClassDt +#' @noRd +calculateEqClassCounts <- function(distTable, readClassDt){ + eqClasses <- distTable %>% group_by(eqClassById) %>% + mutate(anyEqual = any(equal)) %>% + select(eqClassById, firstExonWidth,totalWidth, + readCount,GENEID,anyEqual) %>% #eqClassByIdTemp, + distinct() %>% + mutate(nobs = sum(readCount), + rcWidth = ifelse(anyEqual, max(totalWidth), + max(firstExonWidth))) %>% + select(eqClassById,GENEID,nobs,rcWidth) %>% + ungroup() %>% distinct() + eqCounts <- eqClasses$nobs[match(readClassDt$eqClassById,eqClasses$eqClassById)] + eqCounts[is.na(eqCounts)] <- 0 + return(eqCounts) +} #' calculate CPM post estimation #' @noRd diff --git a/R/bambu.R b/R/bambu.R index e61d0bfa..a42ae236 100644 --- a/R/bambu.R +++ b/R/bambu.R @@ -136,64 +136,200 @@ #' genome = fa.file, discovery = TRUE, quant = TRUE) #' @export bambu <- function(reads, annotations = NULL, genome = NULL, NDR = NULL, - opt.discovery = NULL, opt.em = NULL, rcOutDir = NULL, discovery = TRUE, - quant = TRUE, stranded = FALSE, ncore = 1, yieldSize = NULL, + mode = NULL, opt.discovery = NULL, opt.em = NULL, rcOutDir = NULL, discovery = TRUE, + assignDist = TRUE, quant = TRUE, stranded = FALSE, ncore = 1, yieldSize = NULL, trackReads = FALSE, returnDistTable = FALSE, lowMemory = FALSE, - fusionMode = FALSE, verbose = FALSE) { - if(is.null(annotations)) { annotations = GRangesList() - } else annotations <- checkInputs(annotations, reads, - readClass.outputDir = rcOutDir, genomeSequence = genome) + fusionMode = FALSE, verbose = FALSE, demultiplexed = FALSE, spatial = NULL, quantData = NULL, + sampleNames = NULL, cleanReads = FALSE, dedupUMI = FALSE, barcodesToFilter = NULL, clusters = NULL, + processByChromosome = FALSE, processByBam = TRUE) { + message(paste0("Running Bambu-v", "3.9.0")) + if(!is.null(mode)){ + if(mode == "bulk"){ + processByChromosome <- FALSE + processByBam <- TRUE + } + if(mode == "multiplexed"){ + if(is.null(demultiplexed)) + demultiplexed <- TRUE + cleanReads <- TRUE + opt.em <- list(degradationBias = FALSE) + quant <- FALSE + processByChromosome <- TRUE + } + if(mode == "fusion"){ + NDR <- 1 + fusionMode <- TRUE + } + if(mode == "debug"){ + verbose <- TRUE + trackReads <- TRUE + returnDistTable <- TRUE + } + } + if(lowMemory) + message("lowMemory has been deprecated and split into processByChromosome and processByBam. Please see Documentation") + + if(is.null(annotations)){ + annotations <- GRangesList() + } else { + annotations <- checkInputs(annotations, reads, + readClass.outputDir = rcOutDir, + genomeSequence = genome, discovery = discovery, + sampleNames = sampleNames, spatial = spatial,quantData = quantData) + } isoreParameters <- setIsoreParameters(isoreParameters = opt.discovery) #below line is to be compatible with earlier version of running bambu if(!is.null(isoreParameters$max.txNDR)) NDR = isoreParameters$max.txNDR emParameters <- setEmParameters(emParameters = opt.em) - bpParameters <- setBiocParallelParameters(reads, ncore, verbose) - - rm.readClassSe <- FALSE - readClassList = reads - isRDSs = all(sapply(reads, class)=="RangedSummarizedExperiment") - isBamFiles = !isRDSs - if(!isRDSs) isBamFiles = ifelse(!is(reads, "BamFileList"), all(grepl(".bam$", reads)), FALSE) - if (isBamFiles | is(reads, "BamFileList")) { - if (length(reads) > 10 & (is.null(rcOutDir))) { - rcOutDir <- tempdir() #>=10 samples, save to temp folder - message("There are more than 10 samples, read class files + bpParameters <- setBiocParallelParameters(reads, ncore, verbose, demultiplexed) + # only when reads is not NULL, this proceed, otherwise, it will jump to quant step + if(!is.null(reads)){ + rm.readClassSe <- FALSE + readClassList <- reads + isRDSs <- all(sapply(reads, class)=="RangedSummarizedExperiment") + isBamFiles <- !isRDSs + warnings <- NULL + if(!isRDSs) + isBamFiles <- ifelse(!is(reads, "BamFileList"), + all(grepl(".bam$", reads)), FALSE) + if (isBamFiles | is(reads, "BamFileList")) { + if (length(reads) > 10 & (is.null(rcOutDir))) { + rcOutDir <- tempdir() #>=10 samples, save to temp folder + message("There are more than 10 samples, read class files will be temporarily saved to ", rcOutDir, - " for more efficient processing") - rm.readClassSe <- TRUE # remove temporary read class files + " for more efficient processing") + rm.readClassSe <- TRUE # remove temporary read class files + } + message("--- Start generating read class files ---") + readClassList <- bambu.processReads(reads, annotations, + genomeSequence = genome, + readClass.outputDir = rcOutDir, yieldSize = yieldSize, + bpParameters = bpParameters, stranded = stranded, verbose = verbose, + isoreParameters = isoreParameters, trackReads = trackReads, + fusionMode = fusionMode, + processByChromosome = processByChromosome, processByBam = processByBam, + demultiplexed = demultiplexed, + sampleNames = sampleNames, cleanReads = cleanReads, + dedupUMI = dedupUMI,barcodesToFilter = barcodesToFilter) + } + + #warnings = handleWarnings(readClassList, verbose) + if (!discovery & !assignDist & !quant) return(readClassList) + if (discovery) { + message("--- Start extending annotations ---") + extendedAnnotations <- bambu.extendAnnotations(readClassList, annotations, NDR, + isoreParameters, stranded, bpParameters, fusionMode, verbose) + metadata(extendedAnnotations)$warnings = warnings + + #### cluster based transcript discovery + if(!is.null(clusters)){ + annotations.clusters <- isore.extendAnnotations.clusters(readClassList, + annotations, clusters, NDR, + isoreParameters, stranded, bpParameters, fusionMode, verbose = FALSE) + metadata(extendedAnnotations)$clusters <- annotations.clusters + } + annotations <- extendedAnnotations + + if (!quant & !assignDist) return(annotations) + } + if(assignDist){ + message("--- Start calculating equivilance classes ---") + quantData <- bplapply(readClassList, + FUN = assignReadClasstoTranscripts, + annotations = annotations, + isoreParameters = isoreParameters, + verbose = verbose, + demultiplexed = demultiplexed, + spatial = spatial, + returnDistTable = returnDistTable, + trackReads = trackReads, + BPPARAM = bpParameters) + if (!quant) return(quantData) } - message("--- Start generating read class files ---") - readClassList <- bambu.processReads(reads, annotations, - genomeSequence = genome, - readClass.outputDir = rcOutDir, yieldSize, - bpParameters, stranded, verbose, - isoreParameters, trackReads = trackReads, fusionMode = fusionMode, - lowMemory = lowMemory) - } - warnings = handleWarnings(readClassList, verbose) - if (!discovery & !quant) return(readClassList) - if (discovery) { - message("--- Start extending annotations ---") - annotations <- bambu.extendAnnotations(readClassList, annotations, NDR, - isoreParameters, stranded, bpParameters, fusionMode, verbose) - metadata(annotations)$warnings = warnings - if (!quant) return(annotations) } + if (quant) { - message("--- Start isoform quantification ---") + message("--- Start isoform EM quantification ---") + if(!is.null(NDR) & !discovery)# this step is used when reset NDR is needed + annotations <- setNDR(annotations, NDR, + prefix = isoreParameters$prefix, + baselineFDR = isoreParameters[["baselineFDR"]], + defaultModels2 = isoreParameters[["defaultModels"]]) if(length(annotations)==0) stop("No valid annotations, if running - de novo please try less stringent parameters") - countsSe <- bplapply(readClassList, bambu.quantify, - annotations = annotations, isoreParameters = isoreParameters, - emParameters = emParameters, trackReads = trackReads, - returnDistTable = returnDistTable, verbose = verbose, - BPPARAM = bpParameters) - countsSe <- combineCountSes(countsSe, trackReads, returnDistTable) - rowRanges(countsSe) <- annotations - metadata(countsSe)$warnings = warnings - if (rm.readClassSe) file.remove(unlist(readClassList)) - message("--- Finished running Bambu ---") + de novo please try less stringent parameters") + if(is.null(quantData)) stop("quantData must be provided or assignDist = TRUE") + GENEIDs.i <- as.numeric(factor(unique(mcols(annotations)$GENEID))) + start.ptm <- proc.time() + countsSeCompressed.all <- NULL + ColNames <- c() + for(i in seq_along(quantData)){ + quantData_i <- quantData[[i]] + #load in the barcode clustering from file if provided + iter <- seq_len(ncol(metadata(quantData_i)$countMatrix)) # iter is integer + if(!is.null(clusters)){ + if(class(clusters[[i]])!="CompressedCharacterList"){ # !is.list(clusters) is FALSE for CompressedCharacterList + clusterMaps <- NULL + for(j in seq_along(metadata(quantData_i)$sampleNames)){ #load in a file per sample name provided + clusterMap <- fread(clusters[[j]], header = FALSE, + data.table = FALSE) + # read.table(clusters[[j]], + # sep = ifelse(grepl(".tsv$",clusters[[j]]), "\t", ","), + # header = FALSE) + clusterMap[,1] <- paste0(metadata(quantData_i)$sampleNames[j], + "_",clusterMap[,1]) + clusterMaps <- rbind(clusterMaps, clusterMap) + } + clustering <- splitAsList(clusterMaps[,1], clusterMaps[,2]) + rm(clusterMaps) + rm(clusterMap) + iter <- clustering + + } else{ #if clusters is a list + if(length(quantData)>1){ + iter <- clusters[[i]] #lowMemory mode + }else{ + iter <- clusters#do.call(c,clusters) + } + } + } + countsSeCompressed <- bplapply(iter, FUN = function(j){ # previous i changed to j to avoid duplicated assignment + #i = iter[i %in% colnames(metadata(quantData_i)$countMatrix)] #bug, after assignment, i become emptyprint(i) + countMatrix <- unname(metadata(quantData_i)$countMatrix[,j]) # same here + incompatibleCountMatrix <- unname(metadata(quantData_i)$incompatibleCountMatrix[,j]) # same here + if(!is.null(dim(countMatrix))){ + countMatrix <- rowSums(countMatrix) + incompatibleCountMatrix <- rowSums(metadata(quantData_i)$incompatibleCountMatrix[,j]) # same here + } + return(bambu.quantify(readClassDt = metadata(quantData_i)$readClassDt, countMatrix = countMatrix, + incompatibleCountMatrix = data.table(GENEID.i = as.numeric(rownames(metadata(quantData_i)$incompatibleCountMatrix)), counts = incompatibleCountMatrix), + txid.index = mcols(annotations)$txid, GENEIDs = GENEIDs.i, isoreParameters = isoreParameters, + emParameters = emParameters, trackReads = trackReads, + verbose = verbose))}, + BPPARAM = bpParameters) + end.ptm <- proc.time() + message("Total Time ", round((end.ptm - start.ptm)[3] / 60, 3), " mins.") + if(!is.null(clusters)){ + ColNames <- c(ColNames, names(iter)) + } else{ + ColNames <- c(ColNames, colnames(quantData_i)) + } + countsSeCompressed.all <- c(countsSeCompressed.all, countsSeCompressed) + } + countsSeCompressed.all$colnames <- ColNames + countsSe <- combineCountSes(countsSeCompressed.all, annotations) + if(returnDistTable){ + distTables = list() + for(i in seq_along(quantData)){ + distTables[[i]] <- metadata(quantData[[i]])$distTable + } + metadata(countsSe)$distTables <- distTables + } + #metadata(countsSe)$warnings = warnings + + ColData <- generateColData(colnames(countsSe), clusters, demultiplexed, spatial) + colData(countsSe) <- ColData + colnames(countsSe) <- ColData[,1] return(countsSe) } -} \ No newline at end of file +} diff --git a/R/bambu_utilityFunctions.R b/R/bambu_utilityFunctions.R index 9ed07047..bc148c71 100644 --- a/R/bambu_utilityFunctions.R +++ b/R/bambu_utilityFunctions.R @@ -3,13 +3,14 @@ #' setBiocParallelParameters #' @importFrom BiocParallel bpparam #' @noRd -setBiocParallelParameters <- function(reads, ncore, verbose){ +setBiocParallelParameters <- function(reads, ncore, verbose, demultiplexed){ if(ncore >= 2) message("WARNING - If you change the number of cores (ncore) ", "between Bambu runs and there is no progress please restart your R session ", "to resolve the issue that originates from the XGboost package.") bpParameters <- bpparam() #===# set parallel options: otherwise use parallel to distribute samples - bpParameters$workers <- ifelse(length(reads) == 1, 1, ncore) + # when demultiplexed is FALSE, isFALSE(demultiplexed) is TRUE + bpParameters$workers <- ifelse(length(reads) == 1 & isFALSE(demultiplexed), 1, ncore) bpParameters$progressbar <- ifelse(length(reads) > 1 & !verbose, TRUE, FALSE) return(bpParameters) } @@ -74,7 +75,8 @@ updateParameters <- function(Parameters, Parameters.default) { #' @param readClass.outputDir path to readClass output directory #' @importFrom methods is #' @noRd -checkInputs <- function(annotations, reads, readClass.outputDir, genomeSequence){ +checkInputs <- function(annotations, reads, readClass.outputDir, genomeSequence, + discovery, sampleNames, spatial, quantData){ # ===# Check annotation inputs #===# if (!is.null(annotations)) { if (is(annotations, "CompressedGRangesList")) { @@ -97,8 +99,8 @@ checkInputs <- function(annotations, reads, readClass.outputDir, genomeSequence) } else { stop("The annotations is not a GRangesList object a TxDb or a path to a .gtf.") } - if(any(grepl("^BambuGene", names(annotations))) | - any(grepl("^BambuTx", mcols(annotations)$TXNAME))){ + if(discovery & (any(grepl("^BambuGene", names(annotations))) | + any(grepl("^BambuTx", mcols(annotations)$TXNAME)))){ message("Detected Bambu derived annotations in the annotations. ", "Set a new prefix with opt.discovery(list(prefix='newPrefix')) ", "to prevent ambigious id assignment.") @@ -111,25 +113,35 @@ checkInputs <- function(annotations, reads, readClass.outputDir, genomeSequence) if (!dir.exists(readClass.outputDir)) stop("output folder does not exist") } - - if (is(reads, "BamFileList")){ - if(is.null(genomeSequence)){ - stop("A genome must be provided when running bambu from bam files") - } - } else{ - # ===# Check whether provided read files are all in the same format (.bam or .rds) #===# - isRDSs = all(sapply(reads, class)=="RangedSummarizedExperiment") - if(!isRDSs){ - if (!all(grepl(".bam$", reads)) & !all(grepl(".rds$", reads))) - stop("Reads should either be: a vector of paths to .bam files, ", - "a vector of paths to Bambu RCfile .rds files, ", - "or a list of loaded Bambu RCfiles") - # if bam files are loaded in check that a genome is provided - if (all(grepl(".bam$", reads)) & is.null(genomeSequence)){ + if(!is.null(reads)){ + if (is(reads, "BamFileList")){ + if(is.null(genomeSequence)){ stop("A genome must be provided when running bambu from bam files") } + } else{ + # ===# Check whether provided read files are all in the same format (.bam or .rds) #===# + isRDSs <- all(sapply(reads, class)=="RangedSummarizedExperiment") + # there is a bug here, when reads is NULL, isRDSs == TRUE + if(!isRDSs){ + if (!all(grepl(".bam$", reads)) & !all(grepl(".rds$", reads))) + stop("Reads should either be: a vector of paths to .bam files, ", + "a vector of paths to Bambu RCfile .rds files, ", + "or a list of loaded Bambu RCfiles") + # if bam files are loaded in check that a genome is provided + if (all(grepl(".bam$", reads)) & is.null(genomeSequence)){ + stop("A genome must be provided when running bambu from bam files") + } + } } + }else if(is.null(quantData)){ + stop("Please provide either reads or quantData!", + "Reads should either be: a vector of paths to .bam files, ", + "a vector of paths to Bambu RCfile .rds files, ", + "or a list of loaded Bambu RCfiles. ", + "quantData should be output from bambu with ", + "assignDist = TRUE and quant = FALSE") } + ## check genomeSequence can't be FaFile in Windows as faFile will be dealt ## strangely in windows system if (.Platform$OS.type == "windows") { @@ -138,6 +150,24 @@ checkInputs <- function(annotations, reads, readClass.outputDir, genomeSequence) fuzzy, recommend to provide the path as a string variable to avoid use of Rsamtools for opening.") } + + #check single-cell and spatial inputs match + if(!is.null(sampleNames)){ + if(length(reads)!=length(sampleNames)){ + stop("There are not the same number of sampleNames as input files to reads. ", + "Make sure these two arguments are vectors of the same length") + } + } + + if(!is.null(spatial)){ + #if(!all(grepl(".tsv^", spatial))){stop("Not all paths for spatial are .tsv files")} + if(length(spatial)==1 & length(reads)>1){ + warning("Using the same whitelist and coordinates for all input samples") + } else if(length(reads)!=length(spatial)){ + stop("There are not the same number spatial whitelist paths as input files to reads. ", + "Make sure these two arguments are vectors of the same length") + } + } return(annotations) } @@ -180,64 +210,128 @@ checkInputSequence <- function(genomeSequence) { #' Function that gathers warnings from several read class lists and outputs the counts #' @noRd handleWarnings <- function(readClassList, verbose){ - warnings = list() - sampleNames = c() + warnings <- list() + sampleNames <- c() for(i in seq_along(readClassList)){ - readClassSe = readClassList[[i]] - if (is.character(readClassSe)){ - readClassSe <- readRDS(file = readClassSe)} - warnings[[i]] = NA - if(!is.null(metadata(readClassSe)$warnings)){ - warnings[[i]] = metadata(readClassSe)$warnings} - sampleNames = c(sampleNames, colnames(readClassList[[i]])) + readClassSe <- readClassList[[i]] + if (is.character(readClassSe)) + readClassSe <- readRDS(file = readClassSe) + + warnings[[i]] <- metadata(readClassSe)$warnings + + if(is.null(metadata(readClassSe)$warnings)) + warnings[[i]] <- NA + sampleNames <- c(sampleNames, colnames(readClassSe)) } - names(warnings) = sampleNames - - if(verbose & any(!is.na(warnings))){ + names(warnings) <- sampleNames + if(verbose & any(lengths(warnings)>0)){ message("--- per sample warnings during read class construction ---") - warnings.tmp = warnings[!is.na(warnings)] - for(i in seq_along(warnings.tmp)){ - message("Warnings for: ", names(warnings.tmp)[i]) - sapply(warnings.tmp[[i]], message) + for(i in seq_along(warnings)){ + if(lengths(warnings)[i]>0){ + message("Warnings for: ", sampleNames[i]) + sapply(warnings[[i]], message) + } } } else { - warningCount = sum(lengths(warnings[!is.na(warnings)])) - if(warningCount > 0){ - message("Detected ", warningCount, " warnings across the samples during ", - "read class construction. Access warnings with metadata(bambuOutput)$warnings")} + message("Detected ", sum(lengths(warnings)), " warnings across the samples during ", + "read class construction. Access warnings with metadata(bambuOutput)$warnings") } return(warnings) } +#' Calculate the dist table used for Bambu Quantification +calculateDistTable <- function(readClassList, annotations, isoreParameters, verbose, returnDistTable){ + readClassDist <- isore.estimateDistanceToAnnotations(readClassList, annotations, + min.exonDistance = isoreParameters[["min.exonDistance"]], + min.primarySecondaryDist = isoreParameters[['min.primarySecondaryDist']], + min.primarySecondaryDistStartEnd = isoreParameters[['min.primarySecondaryDistStartEnd2']], + verbose = verbose) + metadata(readClassDist)$distTable <- modifyIncompatibleAssignment(metadata(readClassDist)$distTable) + if(returnDistTable) metadata(readClassDist)$distTableOld <- metadata(readClassDist)$distTable + #convert string gene ids into index to save memory + GENEIDs <- factor(unique(mcols(annotations)$GENEID)) + GENEID.i <- as.numeric(GENEIDs) + metadata(readClassDist)$distTable$GENEID.i <- GENEID.i[match(metadata(readClassDist)$distTable$GENEID, GENEIDs)] + metadata(readClassDist)$distTable.incompatible <- data.table(as.data.frame(metadata(readClassDist)$distTable)) %>% + filter(grepl("unidentified", annotationTxId)) %>% distinct(readClassId, .keep_all = TRUE) + metadata(readClassDist)$distTable <- genEquiRCsBasedOnObservedReads(readClassDist) + return(readClassDist) +} #' Combine count se object while preserving the metadata objects #' @noRd -combineCountSes <- function(countsSe, trackReads = FALSE, returnDistTable = FALSE){ - sampleNames = sapply(countsSe, FUN = function(x){colnames(x)}) - if(trackReads){ - readToTranscriptMaps = lapply(countsSe, FUN = function(se){metadata(se)$readToTranscriptMap}) - names(readToTranscriptMaps) = sampleNames - countsSe = lapply(countsSe, FUN = function(se){ - metadata(se)$readToTranscriptMap=NULL - return(se)}) +combineCountSes <- function(countsSe, annotations){ + countsData <- c("counts", "CPM", "fullLengthCounts", + "uniqueCounts", "incompatibleCounts") + sampleNames <- countsSe$colnames + countsSe$colnames <- NULL + countsDataMat <- lapply(countsData, FUN = function(k){ + countsVecList <- lapply(countsSe, function(j){j[[k]]}) + countsMat <- sparseMatrix(i = unlist(lapply(countsVecList, function(j) j@i)), + j = unlist(lapply(seq_along(countsVecList), function(j) rep(j, length(countsVecList[[j]]@i)))), + x = unlist(lapply(countsVecList, function(j) j@x)), + dims = c(length(countsVecList[[1]]), length(countsVecList))) + if(all(is.na(countsMat))) + countsMat <- sparseMatrix(i=NULL, j = NULL, dims = c(length(countsVecList[[1]]), length(countsVecList))) + + colnames(countsMat) <- sampleNames + + if (k == "incompatibleCounts") + rownames(countsMat) <- unique(mcols(annotations)$GENEID) + + return(countsMat) + }) + names(countsDataMat) <- countsData + countsSe <- SummarizedExperiment(assays = SimpleList(counts = countsDataMat$counts, + CPM = countsDataMat$CPM, + fullLengthCounts = countsDataMat$fullLengthCounts, + uniqueCounts = countsDataMat$uniqueCounts)) + metadata(countsSe)$incompatibleCounts <- countsDataMat$incompatibleCounts + rowRanges(countsSe) <- annotations + return(countsSe) +} + +#' Generate the coldata for se options using colnames, and other option inputs +#' @noRd +generateColData <- function(sampleNames, clusters, demultiplexed, spatial){ + ColData <- DataFrame(id = sampleNames) + if(!isFALSE(demultiplexed) & is.null(clusters)){ + ColData <- DataFrame(id = sampleNames, + sampleName = gsub("_[^_]+$","", sampleNames, perl = TRUE), + Barcode = gsub(".*_(?=[^_]*$)","", sampleNames, perl = TRUE)) } - if(returnDistTable){ - distTables = lapply(countsSe, FUN = function(se){metadata(se)$distTable}) - names(distTables) = sampleNames - countsSe = lapply(countsSe, FUN = function(se){ - metadata(se)$distTable=NULL - return(se)}) + if(!is.null(spatial) & is.null(clusters)){ + ColData$x_coordinate <- NA + ColData$y_coordinate <- NA + if(length(spatial)==1){ + # the following line takes a regular delimited file as input + # it can either has header or without header + # it can also be compressed + bc_coords <- fread(spatial, + col.names = c("Barcode", "x_coordinate", "y_coordinate"), + data.table = FALSE) + # DataFrame(read.table(gzfile(spatial), + # col.names = c("Barcode", "x_coordinate", "y_coordinate"))) + bcMatch <- match(ColData$Barcode, bc_coords$Barcode) + ColData$x_coordinate <- bc_coords$x_coordinate[bcMatch] + ColData$y_coordinate <- bc_coords$y_coordinate[bcMatch] + } else{ + spatial.unique <- unique(spatial) + for(whitelist in spatial.unique){ + i <- which(spatial.unique==whitelist) + bc_coords <- fread(whitelist, + col.names = c("Barcode", "x_coordinate", "y_coordinate"), + data.table = FALSE) + # DataFrame(read.table(gzfile(whitelist), + # col.names = c("Barcode", "x_coordinate", "y_coordinate"))) + bcSampleIndex <- ColData$sampleName %in% sampleNames[i] + bcMatch <- match(ColData$Barcode[bcSampleIndex], bc_coords$Barcode) + ColData$x_coordinate[bcSampleIndex] <- bc_coords$x_coordinate[bcMatch] + ColData$y_coordinate[bcSampleIndex] <- bc_coords$y_coordinate[bcMatch] + } + } } - # combine incompatible counts - incompatibleCounts = Reduce(merge_wrapper, lapply(countsSe, FUN = function(se){metadata(se)$incompatibleCounts})) - countsSe = lapply(countsSe, FUN = function(se){ - metadata(se)$incompatibleCounts=NULL - return(se)}) - countsSe <- do.call(SummarizedExperiment::cbind, countsSe) - if(trackReads) metadata(countsSe)$readToTranscriptMaps = readToTranscriptMaps - if(returnDistTable) metadata(countsSe)$distTables = distTables - metadata(countsSe)$incompatibleCounts = incompatibleCounts - return(countsSe) + return(ColData) } # Quick wrapper function (https://stackoverflow.com/questions/13273833/merging-multiple-data-tables) diff --git a/R/prepareAnnotations_utilityFunctions.R b/R/prepareAnnotations_utilityFunctions.R index ffa2f48d..bee77db6 100644 --- a/R/prepareAnnotations_utilityFunctions.R +++ b/R/prepareAnnotations_utilityFunctions.R @@ -27,6 +27,21 @@ prepareAnnotationsFromGTF <- function(file) { data$strand[data$strand == "."] <- "*" data$GENEID <- gsub("gene_id (.*?);.*", "\\1", data$attribute) data$TXNAME <- gsub(".*transcript_id (.*?);.*", "\\1", data$attribute) + data$NDR <- NULL + data$maxTxScore <- NULL + data$maxTxScore.noFit <- NULL + data$novelGene = NULL + data$novelTranscript = NULL + data$txClassDescription = NULL + hasNDR = all(grepl("NDR ", data$attribute)) + if(hasNDR) { + data$NDR <- as.numeric(gsub(".*NDR (.*?);.*", "\\1", data$attribute)) + data$maxTxScore <- as.numeric(gsub(".*maxTxScore (.*?);.*", "\\1", data$attribute)) + data$maxTxScore.noFit <- as.numeric(gsub(".*maxTxScore.noFit (.*?);.*", "\\1", data$attribute)) + data$novelGene <- as.logical(gsub(".*novelGene (.*?);.*", "\\1", data$attribute)) + data$novelTranscript <- as.logical(gsub(".*novelTranscript (.*?);.*", "\\1", data$attribute)) + data$txClassDescription <- gsub(".*txClassDescription (.*?);.*", "\\1", data$attribute) + } multiTxCheck <- as_tibble(data) %>% select(seqname, GENEID) %>% distinct() %>% group_by(GENEID) %>% mutate(n=n(), id=paste0('-',row_number())) if(any(multiTxCheck$n>1)) { # identical TXNAMES @@ -45,6 +60,9 @@ prepareAnnotationsFromGTF <- function(file) { data$GENEID <- uniqueNamesTbl$gene_unique } geneData <- unique(data[, c("TXNAME", "GENEID")]) + geneData <- if(hasNDR) { unique(data[, c("TXNAME", "GENEID", "NDR", "maxTxScore", + "maxTxScore.noFit", "novelGene", "novelTranscript", "txClassDescription")]) + } else {unique(data[, c("TXNAME", "GENEID")])} grlist <- makeGRangesListFromDataFrame( data[, c("seqname", "start", "end", "strand", "TXNAME")], split.field = "TXNAME", keep.extra.columns = TRUE) diff --git a/R/prepareDataFromBam.R b/R/prepareDataFromBam.R index 09cf5ae7..f9f8b414 100755 --- a/R/prepareDataFromBam.R +++ b/R/prepareDataFromBam.R @@ -7,7 +7,8 @@ #' @importFrom GenomicAlignments grglist readGAlignments #' @importFrom GenomicRanges width #' @noRd -prepareDataFromBam <- function(bamFile, yieldSize = NULL, verbose = FALSE, use.names = FALSE) { +prepareDataFromBam <- function(bamFile, yieldSize = NULL, verbose = FALSE, + use.names = FALSE, demultiplexed = FALSE, cleanReads = TRUE, dedupUMI = FALSE) { if (is(bamFile, "BamFile")) { if (!is.null(yieldSize)) { yieldSize(bamFile) <- yieldSize @@ -25,15 +26,69 @@ prepareDataFromBam <- function(bamFile, yieldSize = NULL, verbose = FALSE, use.n bf <- open(bamFile) readGrgList <- list() counter <- 1 + cells <- c() + umi <- c() + use.names.OG <- use.names + if(!isFALSE(demultiplexed) | cleanReads) use.names <- TRUE + # grepl(".[ct]sv$",demultiplexed) + if(!is.logical(demultiplexed)){ # if demultiplexed argument is not logical value + if(file.exists(demultiplexed)){ # check if file path exists, it has to be a regular delimited file, can be compressed + readMap <- fread(demultiplexed,header = FALSE, data.table = FALSE) # changed function to more efficiently read in data and also correct error of "no lines available as input" for csv format input + }else{ + stop("Provided barcode to map file does not exists! Please provide the correct path to demultiplex argument!") + } + } while (isIncomplete(bf)) { - readGrgList[[counter]] <- - grglist(readGAlignments(bf, - param = ScanBamParam(flag = - scanBamFlag(isSecondaryAlignment = FALSE)), - use.names = use.names)) + alignmentInfo <- readGAlignments(bf, param = ScanBamParam(tag = c("BC", "UG"), + flag = scanBamFlag(isSecondaryAlignment = FALSE)), + use.names = use.names) + readGrgList[[counter]] <-grglist(alignmentInfo) + if (!isFALSE(demultiplexed)){ # if demultiplexed is TRUE or a string path + if(isTRUE(demultiplexed)){ # if demultiplexed is TRUE + + mcols(readGrgList[[counter]])$BC <- case_when(grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("_.*", "", names(readGrgList[[counter]])), # a checkpoint to see whether BC is contained in the name, with specific format BC_UMI#READNAME, + !is.na(mcols(alignmentInfo)$BC) ~ mcols(alignmentInfo)$BC, + TRUE ~ NA) + + mcols(readGrgList[[counter]])$UMI <- case_when(grepl("^[^_]+_[^#]+#", names(readGrgList[[counter]]), perl = TRUE) ~ sub("^[^_]+_([^#]+)#.*$", "\\1", names(readGrgList[[counter]])), # a checkpoint to see whether UMI is contained in the name, with specific format BC_UMI#READNAME, + !is.na(mcols(alignmentInfo)$UG) ~ mcols(alignmentInfo)$UG, + TRUE ~ NA) + } else{ # if demultiplexed is a string path + mcols(readGrgList[[counter]])$BC <- NA + mcols(readGrgList[[counter]])$UMI <- NA + mcols(readGrgList[[counter]])$BC <- readMap[,2][match(names(readGrgList[[counter]]),readMap[,1])] + if(ncol(readMap)>2){ + mcols(readGrgList[[counter]])$UMI <- readMap[,3][match(names(readGrgList[[counter]]),readMap[,1])] + } + } + cells <- unique(c(cells, mcols(readGrgList[[counter]])$BC)) + mcols(readGrgList[[counter]])$BC <- factor(mcols(readGrgList[[counter]])$BC, levels = cells) + umi <- unique(c(umi, mcols(readGrgList[[counter]])$UMI)) + mcols(readGrgList[[counter]])$UMI <- factor(mcols(readGrgList[[counter]])$UMI, levels = umi) + } + if(cleanReads){ + softClip5Prime <- clipFunction(cigarData = GenomicAlignments::cigar(alignmentInfo), grep_pattern = '^(\\d*)[S].*', replace_pattern = '\\1') + softClip3Prime <- clipFunction(cigarData = GenomicAlignments::cigar(alignmentInfo), grep_pattern = '.*\\D(\\d*)[S]$', replace_pattern = '\\1') + hardClip5Prime <- clipFunction(cigarData = GenomicAlignments::cigar(alignmentInfo), grep_pattern = '^(\\d*)[H].*', replace_pattern = '\\1') + hardClip3Prime <- clipFunction(cigarData = GenomicAlignments::cigar(alignmentInfo), grep_pattern = '.*\\D(\\d*)[H]$', replace_pattern = '\\1') + # softClip5Prime <-suppressWarnings(pmax(0,as.numeric(gsub('^(\\d*)[S].*','\\1',GenomicAlignments::cigar(alignmentInfo))), na.rm=T)) + # softClip3Prime <-suppressWarnings(pmax(0,as.numeric(gsub('.*\\D(\\d*)[S]$','\\1',GenomicAlignments::cigar(alignmentInfo))), na.rm=T)) + # hardClip5Prime <-suppressWarnings(pmax(0,as.numeric(gsub('^(\\d*)[H].*','\\1',GenomicAlignments::cigar(alignmentInfo))), na.rm=T)) + # hardClip3Prime <-suppressWarnings(pmax(0,as.numeric(gsub('.*\\D(\\d*)[H]$','\\1',GenomicAlignments::cigar(alignmentInfo))), na.rm=T)) + mcols(readGrgList[[counter]])$clip5Prime <- pmax(softClip5Prime, hardClip5Prime) + mcols(readGrgList[[counter]])$clip3Prime <- pmax(softClip3Prime, hardClip3Prime) + rev <- as.vector(strand(alignmentInfo) == '-') + rev2 <- grepl("_-.+of", names(alignmentInfo)) + temp <- mcols(readGrgList[[counter]])$clip5Prime + mcols(readGrgList[[counter]])$clip5Prime[rev != rev2] <- mcols(readGrgList[[counter]])$clip3Prime[rev != rev2] + mcols(readGrgList[[counter]])$clip3Prime[rev != rev2] <- temp[rev != rev2] + } + counter <- counter + 1 } on.exit(close(bf)) + rm(cells) + rm(umi) if (length(readGrgList) > 1) { readGrgList <- do.call(c, readGrgList) } else { @@ -41,7 +96,60 @@ prepareDataFromBam <- function(bamFile, yieldSize = NULL, verbose = FALSE, use.n } # remove microexons of width 1bp from list readGrgList <- readGrgList[width(readGrgList) > 1] - mcols(readGrgList)$id <- seq_along(readGrgList) + numNoBCs <- sum(is.na(mcols(readGrgList)$BC)) + if(numNoBCs > 0){ + message("Removing ", numNoBCs, " reads that were not assigned barcodes. If this is unexpected check the barcode map input") + readGrgList <- readGrgList[!is.na(mcols(readGrgList)$BC)] + } + if(cleanReads){ + #extract duplicated reads from flexiplex to clean + #leave other reads alone as supplimental alignments maybe fusion transcripts + #commented out because it takes awhile + # dt = data.table(name = substr(readNames2,0,nchar(readNames2[1])-2), + # strand = substr(readNames2,nchar(readNames2[1]),nchar(readNames2[1]))) + # dt[, id := .I] + # dt <- dt[, .(ids = list(id), toFilt = any(strand == "+" & strand == "-")), by = name] + # readGrgList.keep = readGrgList[c(dt$ids[!dt$toFilt])] + # readGrgList.filt = readGrgList[c(dt$ids[dt$toFilt])] + + #select alignments closest to barcode + start.ptm <- proc.time() + df <- data.frame(name = names(readGrgList), + clip5 = mcols(readGrgList)$clip5Prime) + df <- df %>% mutate(id = row_number()) %>% group_by(name) %>% summarise(primary.id = id[which.min(clip5)]) + readGrgList <- unname(readGrgList[df$primary.id]) + end.ptm <- proc.time() + message("Primary alignment selection time ", round((end.ptm - start.ptm)[3] / 60, 3), " mins.") + + } + if(dedupUMI){ + #UMI deduplication by barcode + start.ptm <- proc.time() + numUMIs <- length(na.omit(unique(mcols(readGrgList)$UMI))) # remove NA UMIs + if(numUMIs > 100){ + df <- data.frame(umi = mcols(readGrgList)$BC, + barcode = mcols(readGrgList)$UMI, + lengths = sum(width(readGrgList))) + df <- df %>% mutate(id = row_number()) %>% group_by(barcode, umi) %>% summarise(primary.id = id[which.max(lengths)]) + readGrgList <- readGrgList[df$primary.id] + end.ptm <- proc.time() + message("UMI deduplication time ", round((end.ptm - start.ptm)[3] / 60, 3), " mins.") + } else { + message("Only ", numUMIs, " detected. Not performing UMI deduplication. If this is unexpected, double check the --chemistry argument") + } + + + #readGrgList = c(readGrgList.filt, unname(readGrgList.keep)) + } + if(!use.names.OG) names(readGrgList) <- NULL + seqlevels(readGrgList) <- as.character(unique(getChrFromGrList(readGrgList))) return(readGrgList) } + +#' Function to clip sequences +#' @noRd +clipFunction <- function(cigarData, grep_pattern, replace_pattern){ + return(suppressWarnings(pmax(0,as.numeric(gsub(grep_pattern,replace_pattern, + cigarData)), na.rm=T))) +} diff --git a/R/readWrite.R b/R/readWrite.R index 44e98de9..cd0bdfcd 100644 --- a/R/readWrite.R +++ b/R/readWrite.R @@ -16,7 +16,8 @@ #' )) #' path <- tempdir() #' writeBambuOutput(se, path) -writeBambuOutput <- function(se, path, prefix = "") { +writeBambuOutput <- function(se, path, prefix = "", outputExtendedAnno = TRUE, + outputAll = TRUE, outputBambuModels = TRUE, outputNovelOnly = TRUE, seperateSamples = FALSE) { if (missing(se) | missing(path)) { stop("Both summarizedExperiment object from bambu and the path for the output files are required.") @@ -27,17 +28,60 @@ writeBambuOutput <- function(se, path, prefix = "") { transcript_grList <- rowRanges(se) transcript_gtffn <- paste(outdir, prefix, - "extended_annotations.gtf", sep = "") - gtf <- writeToGTF(annotation = transcript_grList, - file = transcript_gtffn) + "extended_annotations", sep = "") + gtf <- writeAnnotationsToGTF(annotation = transcript_grList, + file = transcript_gtffn, outputExtendedAnno = outputExtendedAnno, + outputAll = outputAll, outputBambuModels = outputBambuModels, outputNovelOnly = outputNovelOnly) + utils::write.table(colData(se), file = paste0(outdir, "/", prefix, "sampleData.tsv"), + sep = "\t", quote = FALSE, row.names = FALSE, col.names = TRUE) for(d in names(assays(se))){ writeCountsOutput(se, varname=d, - feature='transcript',outdir, prefix) + feature='transcript',outdir, prefix) + print(d) + } + #write incompatible counts + if(!is.null(metadata(se)$incompatibleCounts)){ + estimates = metadata(se)$incompatibleCounts + estimatesfn <- paste(outdir, prefix, "incompatibleCounts.mtx", sep = "") + Matrix::writeMM(estimates, estimatesfn) } seGene <- transcriptToGeneExpression(se) - writeCountsOutput(seGene, varname='counts', - feature='gene',outdir, prefix) + writeCountsOutput(seGene, varname='counts', feature='gene',outdir, prefix) + #utils::write.table(paste0(colnames(se), "-1"), file = paste0(outdir, "barcodes.tsv"), quote = FALSE, row.names = FALSE, col.names = FALSE) + #R.utils::gzip(paste0(outdir, "barcodes.tsv")) + txANDGenes <- data.table(as.data.frame(rowData(se))[,c("TXNAME","GENEID")]) + utils::write.table(txANDGenes, file = paste0(outdir, "txANDgenes.tsv"), + sep = "\t", quote = FALSE, row.names = FALSE, col.names = FALSE) + utils::write.table(names(seGene), file = paste0(outdir, "genes.tsv"), + sep = "\t", quote = FALSE, row.names = FALSE, col.names = FALSE) + + #R.utils::gzip(paste0(outdir, "txANDgenes.tsv")) + #R.utils::gzip(paste0(outdir, "genes.tsv")) + + #If there are multiple samples (when demultiplexed), seperate each sample into its own directory + if(seperateSamples){ + fullSe = se + for(sampleName in unique(colData(fullSe)$sampleName)){ + dir.create(file.path(outdir, sampleName), showWarnings = FALSE) + se = fullSe[,colData(fullSe)$sampleName == sampleName] + metadata(se)$incompatibleCounts = metadata(se)$incompatibleCounts[,colData(fullSe)$sampleName == sampleName] + for(d in names(assays(se))){ + writeCountsOutput(se, varname=d, + feature='transcript',outdir=paste0(outdir, sampleName,"/"), prefix) + } + if(!is.null(metadata(se)$incompatibleCounts)){ + estimates = metadata(se)$incompatibleCounts + estimatesfn <- paste(outdir, "/", sampleName,"/", prefix, "incompatibleCounts.mtx", sep = "") + Matrix::writeMM(estimates, estimatesfn) + } + seGene <- transcriptToGeneExpression(se) + writeCountsOutput(seGene, varname='counts', feature='gene',paste0(outdir, sampleName,"/"), prefix) + utils::write.table(colData(se), file = paste0(outdir, "/", sampleName, "/", prefix, "sampleData.tsv"), + sep = "\t", quote = FALSE, row.names = FALSE, col.names = TRUE) + #utils::write.table(paste0(colnames(se), "-1"), file = paste0(outdir, "barcodes.tsv"), quote = FALSE, row.names = FALSE, col.names = FALSE) + } + } } } @@ -46,19 +90,32 @@ writeBambuOutput <- function(se, path, prefix = "") { #' @noRd writeCountsOutput <- function(se, varname = "counts", feature = "transcript", outdir, prefix){ - estimates <- data.table(as.data.frame(assays(se)[[varname]]), - keep.rownames = TRUE) - if(feature == "transcript"){ + if(!is(assays(se)[[varname]], "sparseMatrix")){ + estimatesfn <- paste(outdir, prefix, varname,"_",feature,".txt", sep = "") + estimates <- data.table(as.data.frame(assays(se)[[varname]]), + keep.rownames = TRUE) + if(feature == "transcript"){ setnames(estimates, "rn", "TXNAME") geneIDs <- data.table(as.data.frame(rowData(se))[,c("TXNAME","GENEID")]) estimates <- geneIDs[estimates, on = "TXNAME"] - }else{ + }else{ setnames(estimates, "rn","GENEID") + } + utils::write.table(estimates, file = estimatesfn, sep = "\t", quote = FALSE, row.names = FALSE) + + } else{ + estimates <- assays(se)[[varname]] + if (feature == "transcript"){ + estimatesfn <- paste(outdir, prefix, varname,"_",feature,".mtx", sep = "") + Matrix::writeMM(estimates, estimatesfn) + #R.utils::gzip(estimatesfn) + + } else{ + estimatesfn <- paste(outdir, prefix, varname,"_",feature,".mtx", sep = "") + Matrix::writeMM(estimates, estimatesfn) + #R.utils::gzip(estimatesfn) + } } - estimatesfn <- paste(outdir, prefix, - varname,"_",feature,".txt", sep = "") - utils::write.table(estimates, file = estimatesfn, - sep = "\t", quote = FALSE, row.names = FALSE) } #' Write annotation GRangesList into a GTF file @@ -85,9 +142,29 @@ writeToGTF <- function(annotation, file, geneIDs = NULL) { } else if (!is(annotation, "CompressedGRangesList")) { stop("The inputted GRangesList is of the wrong class.") } + NDR = NULL + txScore = NULL + txScore.noFit = NULL + novelGene = NULL + novelTranscript = NULL + txClassDescription = NULL df <- as_tibble(annotation) df$exon_rank <- paste('exon_number "', df$exon_rank, '";', sep = "") - if (missing(geneIDs)) { + if(!is.null(mcols(annotation)$NDR)){ + NDR = rep(mcols(annotation)$NDR, unname(elementNROWS(annotation))) + df$NDR <- paste('NDR "', as.character(NDR), '";', sep = "") + txScore = rep(mcols(annotation)$maxTxScore, unname(elementNROWS(annotation))) + df$txScore <- paste('maxTxScore "', as.character(txScore), '";', sep = "") + txScore.noFit = rep(mcols(annotation)$maxTxScore.noFit, unname(elementNROWS(annotation))) + df$txScore.noFit <- paste('maxTxScore.noFit "', as.character(txScore.noFit), '";', sep = "") + novelGene = rep(mcols(annotation)$novelGene, unname(elementNROWS(annotation))) + df$novelGene <- paste('novelGene "', as.character(novelGene), '";', sep = "") + novelTranscript = rep(mcols(annotation)$novelTranscript, unname(elementNROWS(annotation))) + df$novelTranscript <- paste('novelTranscript "', as.character(novelTranscript), '";', sep = "") + txClassDescription = rep(mcols(annotation)$txClassDescription, unname(elementNROWS(annotation))) + df$txClassDescription <- paste('txClassDescription "', as.character(txClassDescription), '";', sep = "") + } + if (is.null(geneIDs)) { if (!is.null(mcols(annotation, use.names = FALSE)$GENEID)) { geneIDs <- as_tibble(mcols(annotation, use.names = FALSE)[, c("TXNAME", "GENEID")]) @@ -100,7 +177,7 @@ writeToGTF <- function(annotation, file, geneIDs = NULL) { df$group_name <- paste('transcript_id "', df$group_name, '";', sep = "") df$GENEID <- paste('gene_id "', df$GENEID, '";', sep = "") dfExon <- mutate(df, source = "Bambu", feature = "exon", score = ".", - frame = ".", attributes = paste(GENEID, group_name, exon_rank)) %>% + frame = ".", attributes = paste(GENEID, group_name, exon_rank, NDR, txScore, txScore.noFit, novelGene, novelTranscript, txClassDescription )) %>% select(seqnames, source, feature, start, end, score, strand, frame, attributes, group_name) dfTx <- as.data.frame(range(ranges(annotation))) @@ -109,9 +186,16 @@ writeToGTF <- function(annotation, file, geneIDs = NULL) { dfTx$group_name <- paste('transcript_id "', dfTx$group_name, '";', sep = "") dfTx$GENEID <- paste('gene_id "', dfTx$GENEID, '";', sep = "") - + if(!is.null(mcols(annotation)$NDR)) { + dfTx$NDR <- paste('NDR "', mcols(annotation)$NDR, '";', sep = "") + dfTx$txScore <- paste('txScore "', mcols(annotation)$txScore, '";', sep = "") + dfTx$txScore.noFit <- paste('txScore.noFit "', mcols(annotation)$txScore.noFit, '";', sep = "") + dfTx$novelGene <- paste('novelGene "', mcols(annotation)$novelGene, '";', sep = "") + dfTx$novelTranscript <- paste('novelTranscript "', mcols(annotation)$novelTranscript, '";', sep = "") + dfTx$txClassDescription <- paste('txClassDescription "', mcols(annotation)$txClassDescription, '";', sep = "") + } dfTx <- mutate(dfTx,source = "Bambu", feature = "transcript", score = ".", - frame = ".", attributes = paste(GENEID, group_name)) %>% + frame = ".", attributes = paste(GENEID, group_name, NDR, txScore, txScore.noFit, novelGene, novelTranscript, txClassDescription )) %>% select(seqnames, source, feature, start, end, score, strand, frame, attributes, group_name) @@ -124,6 +208,52 @@ writeToGTF <- function(annotation, file, geneIDs = NULL) { col.names = FALSE, sep = "\t") } +#' Write annotation GRangesList into multiple filtered GTF files +#' @title write GRangeslist into multiple filtered GTF files +#' @param annotation a \code{GRangesList} object +#' @param file the output gtf file name +#' @param geneIDs an optional dataframe of geneIDs (column 2) with +#' the corresponding transcriptIDs (column 1) +#' @param outputExtendedAnno an optional boolean to write the extended annotations as a GTF +#' @param outputAll an optional boolean to write all transcripts (irrespective of confidence) as a GTF +#' @param outputBambuModels an optional boolean to write only full-length read supported models as a GTF +#' @param outputNovelOnly an optional boolean to write only novel high confidence transcripts as a GTF +#' @return gtf a GTF dataframe +#' @importFrom dplyr select as_tibble mutate %>% left_join arrange group_by +#' ungroup recode_factor +#' @importFrom methods is +#' @export +#' @examples +#' outputGtfFile <- tempfile() +#' gr <- readRDS(system.file("extdata", +#' "annotationGranges_txdbGrch38_91_chr9_1_1000000.rds", +#' package = "bambu" +#' )) +#' writeToGTF(gr, outputGtfFile) +writeAnnotationsToGTF <- function(annotation, file, geneIDs = NULL, outputExtendedAnno = TRUE, + outputAll = TRUE, outputBambuModels = TRUE, outputNovelOnly = TRUE){ + if(outputExtendedAnno){ + writeToGTF(annotation, paste0(file, "_extendedAnnotations.gtf"), geneIDs) + } + if(outputAll){ + annotationAll = setNDR(annotation, 1) + if(length(annotationAll) == length(annotation)) + message("The current NDR threshold already outputs all transcript models. This may result in reduced precision for th extendedAnnotations and supportedTranscriptModels gtfs") + writeToGTF(annotationAll, paste0(file, "_allTranscriptModels.gtf"), geneIDs) + } + + #todo - have this write bambu start and ends for annotated transcripts + if(outputBambuModels){ + annotationBambu = annotation[!is.na(mcols(annotation)$readCount)] + writeToGTF(annotationBambu, paste0(file, "_supportedTranscriptModels.gtf"), geneIDs) + } + + if(outputNovelOnly){ + annotationNovel = annotation[mcols(annotation)$novelTranscript] + writeToGTF(annotationBambu, paste0(file, "_novelTranscripts.gtf"), geneIDs) + } +} + #' Outputs GRangesList object from reading a GTF file #' @title convert a GTF file into a GRangesList @@ -178,3 +308,43 @@ readFromGTF <- function(file, keep.extra.columns = NULL){ } return(grlist) } + +#' @title Read in Bambu results from writeBambuOutput() into se file +#' @param path the destination of the output files +#' (gtf, transcript counts, and gene counts) +#' @param prefixes the prefix of the output files +#' @details The function will read in the output from Bambu as a sumerized experiment object. +#' This SE object can be used for downstream processes. +#' @export +#' @examples +#' se <- importBambuResults(path = "/path/to/bambu/output/", +#' prefixes = c("rep1", "rep2") +#' )) +#' path <- tempdir() +#' writeBambuOutput(se, path) +importBambuResults <- function(path, prefixes = NA){ + annotations = prepareAnnotations(paste0(path, "/extended_annotations.gtf")) + counts = readMM(paste0(path, "/counts_transcript.mtx")) + CPM = readMM(paste0(path, "/CPM_transcript.mtx")) + fullLengthCounts = readMM(paste0(path, "/fullLengthCounts_transcript.mtx")) + uniqueCounts = readMM(paste0(path, "/uniqueCounts_transcript.mtx")) + incompatibleCounts = NULL + if(file.exists(paste0(path, "/incompatibleCounts.mtx"))){ + incompatibleCounts = readMM(paste0(path, "/incompatibleCounts.mtx")) + } + barcodes = read.table(paste0(path, "/barcodes.tsv")) + geneIds = read.table(paste0(path, "/genes.tsv")) + txIds = read.table(paste0(path, "/txANDgenes.tsv")) + colData = read.table(paste0(path, "/sampleData.tsv"), header = TRUE) + rownames(incompatibleCounts) = geneIds[,1] + + countsSe <- SummarizedExperiment(assays = SimpleList(counts = counts, + CPM = CPM, + fullLengthCounts = fullLengthCounts, + uniqueCounts = uniqueCounts)) + metadata(countsSe)$incompatibleCounts <- incompatibleCounts + rowRanges(countsSe) <- annotations + colData(countsSe) = DataFrame(colData) + colnames(countsSe) = colData[,1] + return(countsSe) +} \ No newline at end of file diff --git a/R/sysdata.rda b/R/sysdata.rda index 1cbb949f..e7699bb4 100755 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/R/transcriptToGeneExpression.R b/R/transcriptToGeneExpression.R index 9cf7604b..7ad12076 100644 --- a/R/transcriptToGeneExpression.R +++ b/R/transcriptToGeneExpression.R @@ -11,30 +11,23 @@ #' )) #' transcriptToGeneExpression(se) transcriptToGeneExpression <- function(se) { - counts <- as.data.table(assays(se)$counts, keep.rownames = TRUE) + counts <- assays(se)$counts runnames <- colnames(counts)[-1] - colnames(counts)[-1] <- rename_duplicatedNames(runnames) - colData(se)@rownames <- rename_duplicatedNames(colData(se)@rownames) - counts <- melt(counts, id.vars = "rn", measure.vars = colnames(counts)[-1]) - setnames(counts, "rn", "TXNAME") rowDataSe <- as.data.table(rowData(se)) - counts <- rowDataSe[, .(TXNAME, GENEID)][counts, on = "TXNAME"] - incompatibleCounts <- metadata(se)$incompatibleCounts - incompatibleCounts[, TXNAME := "incompatible"] - counts_incompatible <- melt(incompatibleCounts, id.vars = c("GENEID","TXNAME"), - measure.vars = setdiff(colnames(incompatibleCounts), c("GENEID","TXNAME"))) - # GENEID, TXNAME, variable, value - counts <- rbind(counts, counts_incompatible[variable %in% unique(counts$variable)]) - counts[, valueGene := sum(value), by = list(variable, GENEID)] - counts[, valueGeneCPM := valueGene / max(sum(value), 1) * 10^6, - by = list(variable)] + counts = fac2sparse(rowData(se)$GENEID) %*% counts + if(!is.null(metadata(se)$incompatibleCounts)){ + incompatibleCounts <- metadata(se)$incompatibleCounts + if("nonuniqueCounts" %in% names(metadata(se))){ + incompatibleCounts = incompatibleCounts + metadata(se)$nonuniqueCounts + } + incompatibleCounts = Matrix(incompatibleCounts[match(rownames(counts), rownames(incompatibleCounts)),], sparse = TRUE) + counts = counts + incompatibleCounts + } + counts.total = colSums(counts) + counts.total[counts.total==0] = 1 + counts.CPM = counts/counts.total * 10^6 - ## counts - counts_gene <- dcast(unique(counts[, .(GENEID, variable, - valueGene)]), GENEID ~ variable, value.var = "valueGene") - counts_gene_CPM <- dcast(unique(counts[, .(GENEID, variable, - valueGeneCPM)]), GENEID ~ variable, value.var = "valueGeneCPM") ## geneRanges exByGene <- reducedRangesByGenes(rowRanges(se)) if ("txClassDescription" %in% colnames(rowDataSe)) { @@ -45,23 +38,16 @@ transcriptToGeneExpression <- function(se) { newGeneClass)])[match(names(exByGene), GENEID)] } ## SE - counts_gene <- setDF(counts_gene) - RowNames <- counts_gene$GENEID - rownames(counts_gene) <- RowNames - counts_gene_CPM <- setDF(counts_gene_CPM) - rownames(counts_gene_CPM) <- RowNames - ColNames <- colnames(counts_gene)[-1] + RowNames <- rownames(counts) + ColNames <- colnames(counts) ColData <- colData(se) ColData@rownames <- ColNames ColData@listData$name <- ColNames seOutput <- SummarizedExperiment( - assays = SimpleList(counts = as.matrix(counts_gene[, -1, drop = FALSE], - ncol = length(ColNames), - dimnames = list(RowNames, ColNames)), - CPM = as.matrix(counts_gene_CPM[match(RowNames, - counts_gene_CPM$GENEID), -1, drop = FALSE], ncol = length(ColNames), - dimnames = list(RowNames, ColNames))), + assays = SimpleList(counts = counts, + CPM = counts.CPM), rowRanges = exByGene[RowNames], colData = ColData) + return(seOutput) } \ No newline at end of file diff --git a/README.md b/README.md index f634527d..5f9f81b7 100755 --- a/README.md +++ b/README.md @@ -26,7 +26,8 @@ - [Modulating the sensitivity of discovery (pre and post analysis)](#Modulating-the-sensitivity-of-discovery-pre-and-post-analysis) - [Output](#Output) - [Visualization](#Visualization) -- [*bambu* Advanced Options](#Bambu-Advanced-Options) + - [Single-Cell and Spatial](#Single-Cell-and-Spatial) + - [*bambu* Advanced Options](#Bambu-Advanced-Options) - [Using a pretrained model](#Using-a-pretrained-model) - [De-novo transcript discovery](#De-novo-transcript-discovery) - [Storing and using preprocessed files (rcFiles)](#Storing-and-using-preprocessed-files-rcFiles) @@ -35,6 +36,7 @@ - [Quantification of gene expression](#Quantification-of-gene-expression) - [Including single exons](#Including-single-exons) - [Fusion gene/isoform detection](#Fusion-geneisoform-detection) + - [Custom single-cell and spatial analysis](#Custom-Single-Cell-and-Spatial) - [*bambu* Arguments](#Bambu-Arguments) - [Output Description](#Output-Description) - [Release History](#Release-History) @@ -94,11 +96,17 @@ For information on the output and how to export it to a file see [Output](#Outpu #### **Transcript discovery only (no quantification)** If you are only interested in identifying novel transcripts, the quantification module of *bambu* can be skipped by setting quant to FALSE. -Note that the output will be a GRangeslist object containing the reference and novel annotations (See rowRanges() in [Output](#Output)). We recommend running transcript discovery only mode with NDR = 1, and doing filtering in the downstream analysis to allow flexibility in the analysis. See [Modulating the sensitivity of discovery (pre and post analysis)](#Modulating-the-sensitivity-of-discovery-pre-and-post-analysis) +Note that the output will be a GRangeslist object containing the reference and novel annotations (See rowRanges() in [Output](#Output)). For more details on how to adjust the sensitivity and precision of the results see [Modulating the sensitivity of discovery (pre and post analysis)](#Modulating-the-sensitivity-of-discovery-pre-and-post-analysis) ```rscript se.discoveryOnly <- bambu(reads = test.bam, annotations = gtf.file, genome = fa.file, quant = FALSE) ``` +Transcripts that were above the NDR threshold and filtered out (low confidence transcripts) and subset transcripts can be accessed in the metadata of the GRangesList object. + +```rscript +metadata(se.discoveryOnly)$lowConfidenceTranscripts +metadata(se.discoveryOnly)$subsetTranscripts +``` **Quantification of annotated transcripts and genes only (no transcript/gene discovery)** @@ -153,13 +161,30 @@ To manually select an NDR value, use the NDR argument in *bambu*: ```rscript se.NDR_0.3 <- bambu(reads = test.bam, annotations = annotations, genome = fa.file, NDR = 0.3) ``` -Alternatively transcript discovery can be run without thresholds, producing a GRangesList annotation object with all transcripts scored with its NDR score. Note that this means turning quant = FALSE in running *bambu* (refer to [“Transcript discovery only”](#transcript-discovery-only-no-quantification) section). The annotations can be filtered by their NDR score (see example below), read count and gene read proportion between the discovery and quantification steps or used for other types of analysis. + +Alternatively the NDR threshold can be adjuted after discovery or on the final output (note that this will only effect the gtf output and for quantification to reflect the adddition or removal of transcripts because of the updated NDR, quantification would need to be rerun). the setNDR function will adjust the novel transcripts included in the output by removing any which are above the new threshold and adding those which are now below the threshold. setNDR takes the annotations as its first argument and the new NDR as the second argument. These annotations must have been generated by Bambu and have stored NDR values for this to work. Additionally setNDR can be run with no NDR, if you would prefer Bambu to recommend a threshold for your dataset. Refer to [“Transcript discovery only”](#transcript-discovery-only-no-quantification) for advanced details using setNDR ```rscript -newAnnotations <- bambu(reads = test.bam, annotations = annotations, genome = fa.file, NDR = 1, quant = FALSE) -annotations.filtered <- newAnnotations[(!is.na(mcols(newAnnotations)$NDR) & mcols(newAnnotations)$NDR<0.1) | is.na(mcols(newAnnotations)$NDR)] -se.NDR_1 <- bambu(reads = test.bam, annotations = annotations.filtered, genome = fa.file, NDR = 1, discovery = FALSE) +# after the discovery step +extendedAnnotations_0.3 = setNDR(se.discoveryOnly, 0.3) +writeAnnotationsToGTF(extendedAnnotations, "./output.gtf") + +# after a complete run +extendedAnnotations_0.3 = setNDR(rowRanges(se), 0.3) +writeAnnotationsToGTF(extendedAnnotations, "./output.gtf") ``` + +To run quantification at a different NDR, simply provide bambu annotations alongside the new NDR threshold to bambu and it will automatically adjust the transcripts. + +```rscript +se.quantOnly <- bambu(reads = test.bam, annotations = extendedAnnotatons genome = fa.file, discovery = FALSE, NDR = 0.5) +``` + +You can check the NDR threshold of your annotations by looking at the stored NDR value. This value is updated upon running setNDR. annotations imported from a gtf file will not have this value until after the first running of setNDR. +```rscript +print(metadata(extendedAnnotations_0.3)$NDRthreshold) +``` + Additionally there are other thresholds that advanced users can access through opt.discovery when running *bambu* (see arguments). ### Output @@ -168,6 +193,7 @@ Additionally there are other thresholds that advanced users can access through o - **assays(se)** returns a list of transcript abundance estimates as counts or CPM - **rowRanges(se)** returns a GRangesList with all annotated and newly discovered transcripts - **rowData(se)** returns additional information about each transcript +- **metadata(rowRanges(se))** returns a list of transcripts considered low confidience which were not included in the exnteded Annotations. Access transcript expression estimates by extracting a variable (such as counts or CPM) using assays(): @@ -179,40 +205,41 @@ Access transcript expression estimates by extracting a variable (such as counts For a full description of the other outputs see [Output Description](#Output-Description) -The full output can be written to a file using writeBambuOutput(). Using this function will generate three files, including a .gtf file for the extended annotations, and two .txt files for the expression counts at transcript and gene levels. +The full output can be written to a file using writeBambuOutput(). Using this function will generate six files, including 4four .gtf files(detailed below), and two .txt files for the expression counts at transcript and gene levels. + +By default bambu will write four .gtf files +- **extendedAnnotations.gtf** - Contains all transcript models from the reference annotations and any novel high confidence transcript models (below NDR threshold) from Bambu +- **allTranscriptModels** - Contains all transcript models from the reference annotations and all novel transcript models, irrespective of their NDR score. This is useful for reloading into Bambu with prepareAnnotations() to redo the analysis or reoutput the annotations at different NDR thresholds. +- **supportedTranscriptModels** - Contains only transcript models that are fully supported by at least one read across the samples provided. Note that if multiple reference annotations share the same intron junctions, an abitrary one will selected to be be included in this output. +- **novelTranscripts** - Contains only novel high confidence transcript models (below NDR threshold) from Bambu. + ```rscript writeBambuOutput(se, path = "./bambu/") ``` -If you are only interested in the novel transcripts, one can filter this 'se' object first to remove reference annotations. -```rscript -se.novel = se[mcols(se)$novelTranscript,] -writeBambuOutput(se.novel, path = "./bambu/") -``` -If you are only interested in full-length transcripts that were detected by Bambu. +If you would like to avoid outputting any of the above .gtf for space concerns, each can be toggled off with the below arguments. ```rscript -se.novel = se[assays(se)$fullLengthCounts >= 1,] -writeBambuOutput(se.novel, path = "./bambu/") +writeBambuOutput(se.novel, path = "./bambu/", outputExtendedAnno = FALSE, outputAll = FALSE, outputBambuModels = FALSE, outputNovelOnly = FALSE) ``` If quant is set to FALSE i.e. only transcript discovery is performed, only the rowRanges output of the extended annotations is returned (a GRangesList object). The equivalent rowData can be accessed with mcols() -These annotations can be written to a .gtf file using writeToGTF(GRangesList_object, output_path). +These annotations can be written to a .gtf file using writeAnnotationsToGTF(GRangesList_object, output_path). +This will output the four .gtf files mentioned above, and can be excluded using the same arguments. ```rscript se.discoveryOnly <- bambu(reads = sample, annotations = annotations, genome = fa.file, quant = FALSE) -writeToGTF(se.discoveryOnly, "./output.gtf") +writeAnnotationsToGTF(se.discoveryOnly, "./output.gtf") ``` -As above, to output only the novel annotations, you need to filter out the reference annotations. +If you would prefer to manually filter the annotations, you can also provide the resulting annotations to writeToGTF() which will output the annotations as is. ```rscript se.discoveryOnly.novel = se.discoveryOnly[mcols(se.discoveryOnly)$novelTranscript,] writeToGTF(se.discoveryOnly.novel, "./output.gtf") ``` -If you are only interested in full-length transcripts that were detected by Bambu. If multiple transcripts share exon-junctions, only one will be displayed. To avoid this, do the filter after quantification as in the example above. -```rscript -se.novel = se[!is.na(mcols(se)$readCount) & mcols(se)$readCount >= 1,] -writeBambuOutput(se.novel, path = "./bambu/") -``` If both quant and discovery are set to FALSE, *bambu* will return an intermediate object see [Storing and using preprocessed files (rcFiles)](#Storing-and-using-preprocessed-files-rcFiles) +To reimport the output of writeBambuOutput() use importBambuResults() +```rscript +se <- importBambuResults(path = "/path/to/bambu/output/") +``` ### Visualization You can visualize the novel genes/transcripts using plotBambu function. (Note that the visualization was done by running *bambu* on the three replicates of HepG2 cell line in the SG-NEx project) @@ -258,8 +285,14 @@ plotBambu(se, type = "heatmap", group.var) # heatmap plotBambu(se, type = "pca", group.var) # PCA visualization ``` +### Single-Cell-and-Spatial + +There is a single-cell and spatial pipeline starting from fastq or demultiplexed bam files that include demultiplexing and aligning available here https://github.com/GoekeLab/bambu-singlecell-spatial. We recommend using this pipeline where possible. + +For advanced users see the #[Custom single-cell and spatial analysis](#Custom-Single-Cell-and-Spatial) section under advanced options + ### *Bambu* Advanced Options -Below we include several advanced options and use-cases for *bambu*. We recommend reading and understanding the [paper](https://www.biorxiv.org/content/10.1101/2022.11.14.516358v1) before attempting to use these features. +Below we include several advanced options and use-cases for *bambu*. We recommend reading and understanding the [paper](https://doi.org/10.1038/s41592-023-01908-w) before attempting to use these features. ### Using a pretrained model @@ -270,20 +303,20 @@ se <- bambu(reads = test.bam, annotations = annotations, genome = fa.file, opt.d ``` The default pretrained model was trained on SGNex_HepG2_directRNA_replicate5_run1 and has the following characteristics: -Genome: Homo_sapiens.GRCh38.dna_sm.primary_assembly
-Annotations: Homo_sapiens.GRCh38.91
-Read count: 7,861,846
-Technology: Nanopore (ONT)
-Library preparation: directRNA
-Base Calling Accuracy: 79%
-Average Read Length: 1093
+**Genome**: Homo_sapiens.GRCh38.dna_sm.primary_assembly
+**Annotations**: Homo_sapiens.GRCh38.91
+**Read count**: 7,861,846
+**Technology**: Nanopore (ONT)
+**Library preparation**: directRNA
+**Base Calling Accuracy**: 79%
+**Average Read Length**: 1093
We have found the pretrained model works successfully across species borders (on Arabidopsis thaliana) and on different technologies (PacBio), with only small decreases in performance compared to using a sample specific model. The pretrained model is not always effective in samples with large differences in sequencing quality or if the library preparation results in biases in the overall structure of the transcriptome. In this case, we would recommend training a new model using similar data from a different sample that has quality reference annotations (See [Training a model on another species/dataset and applying it](#Training-a-model-on-another-speciesdataset-and-applying-it)). ### De-novo transcript discovery In cases where the organism does not yet have reference annotations, or unreliable annotations, *bambu* can be run in de-novo mode. In de-novo mode, *bambu* does not train a model, and instead uses the pretrained model to classify novel transcripts (see [Using a pretrained model](#Using-a-pretrained-model). To learn how to train a new model for a more closely related organism/sample see [Training a model on another species/dataset and applying it](#Training-a-model-on-another-speciesdataset-and-applying-it). Without annotations *bambu* is unable to calibrate the NDR output, nor be able to recommend a threshold and will instead use the TPS as the thresholded value. Therefore you should supply a manual NDR threshold ([Modulating the sensitivity of discovery (pre and post analysis)](#Modulating-the-sensitivity-of-discovery-pre-and-post-analysis)) and note that the precision of the output is unlikely to linearly match an applied threshold. -The TPS threshold used is (> 1-NDR). If an NDR is not provided, a default NDR threshold of <0.1 is used (an effective TPS threshold of > 0.9). As in [Modulating the sensitivity of discovery (pre and post analysis)](#Modulating-the-sensitivity-of-discovery-pre-and-post-analysis) an NDR of 1 can be provided to output all possible read classes with their TPS scores +The TPS threshold used is (> 1-NDR). If an NDR is not provided, a default NDR threshold of <0.1 is used (an effective TPS threshold of > 0.9). ```rscript novelAnnotations <- bambu(reads = test.bam, annotations = NULL, genome = fa.file, NDR = 0.5, quant = FALSE) @@ -332,12 +365,14 @@ rowData(se[[1]]) |chr.rc|The chromosome name the read class is found on| |strand.rc|The strand of the read class| |startSD|The standard deviation of the aligned genomic start positions of all reads assigned to the read class| +|endSD|The standard deviation of the aligned genomic end positions of all reads assigned to the read class| |readCount.posStrand|The number of reads assigned to this read class that aligned to the positive strand| |intronStarts|A comma separated character vector of intron start coordinates| |intronEnds|A comma separated character vector of intron end coordinates| -|confidenceType|Category of confidence:
**highConfidenceJunctionReads** - the read class contain no low confidence junctions
**lowConfidenceJunctionReads** - the read class contains low confidence junctions
**unsplicedWithin** - single exon read class that is within the exon boundaries of an annotation
**unsplicedNew** - single exon read class that does not fully overlap with annotated exons| +|confidenceType|Category of confidence:
**highConfidenceJunctionReads** - the read class contain no low confidence junctions
**lowConfidenceJunctionReads** - the read class contains low confidence junctions
**unsplicedWithin** - single exon read class that is within the exon boundaries of an annotation
**unsplicedNew** - single exon read class that does not fully overlap with annotated exons| |readCount|The number of reads assigned to this read class| -|readId *only present when trackReads = TRUE|An integer list of bambu internal read ids that belong to the read class. (See the metadata of the object for full read names)| +|readIds|An integer list of bambu internal read ids that belong to the read class. (See the metadata of the object for full read names)| +|sampleIds|An integer list of bambu internal sample ids based on barcodes.| |GENEID|The gene ID the transcript is associated with| |novelGene|A logical that is true if the read class belongs to a novel gene (does not overlap with an annotated gene loci)| |numExons|The number of exons the read class has| @@ -349,8 +384,8 @@ rowData(se[[1]]) |numAend|An integer counting the number of A nucleotides found within a 20bp window centered on the read class genomic end position| |numTstart|An integer counting the number of T nucleotides found within a 20bp window centered on the read class genomic start position| |numTend|An integer counting the number of T nucleotides found within a 20bp window centered on the read class genomic end position| -|txScore|This is the TPS generated by the sample trained model| |txScore.noFit|This is the TPS generated by the pretrained model| +|txScore|This is the TPS generated by the sample trained model| ### Tracking read-to-transcript assignment @@ -375,12 +410,12 @@ In situations where training is not or cannot be performed, and the default mode ```rscript # first train the model using a related annotated dataset from .bam -se = bambu(reads = sample1.bam, annotations = annotations, genome = fa.file, discovery = FALSE, quant = FALSE, opt.discovery = list(returnModel = TRUE)) # note that discovery and quant need to be set to FALSE, alternatively you can have them set to TRUE and retrieve the model from the rcFile as long as returnModel = TRUE ([see here](#Storing-and-using-preprocessed-files-rcFiles)). +se <- bambu(reads = sample1.bam, annotations = annotations, genome = fa.file, discovery = FALSE, quant = FALSE, opt.discovery = list(returnModel = TRUE)) # note that discovery and quant need to be set to FALSE, alternatively you can have them set to TRUE and retrieve the model from the rcFile as long as returnModel = TRUE ([see here](#Storing-and-using-preprocessed-files-rcFiles)). newDefaultModel = metadata(se[[1]])$model # [[1]] will select the model trained on the first sample # alternatively train the model using an rcFile rcFile <- readRDS(pathToRcFile) -newDefaultModel = trainBambu(rcFile) +newDefaultModel <- trainBambu(rcFile) # use the trained model on another sample # sample2.bam and fa.file2 represent the aligned reads and genome for the poorly annotated sample @@ -437,6 +472,106 @@ To use this feature, it is recommended to detect the fusion gene breakpoints usi se <- bambu(reads = fusionAligned.bam, annotations = fusionAnnotations, genome = fusionFasta, fusionMode = TRUE) ``` +### Custom single-cell and spatial + +If you want to run Bambu-Clump for single-cell or spatial analysis stand alone and not part of the Bambu-Pipe pipeline we recommend running it in 4 stages which we will describe seperately: Read Class Construction, Transcript Discovery, Read Class Assignment, and EM Quantification. Note that this section will only cover arguments that are different or unique to this analysis. + +#### Read Class Construction: + +**reads**: provided bam files should have barcodes in the read name or in the BC tag ( and UG tag for UMI identifiers). In the case where both tags and read names contain barcode information, tags will be used a prior. If not, a regular delimited headerless file that contain the demultiplexing information for each read should be provided to demultiplexed argument below. For exact requirements see https://github.com/GoekeLab/bambu-singlecell-spatial.
+ +**demultiplexed**: should be either set to TRUE or the path to barcode mapping file. Otherwise, bambu will not look for barcodes and seperate reads by barcode rather than sample.
+ +Optional: + +**cleanReads**: A logical TRUE/FALSE. Chimeric reads in samples can cause issues with barcode assignments. Setting this to TRUE will ensure only the first alignment per barcode is used (We recommend using this).
+ +**sampleNames**: A vector of characters assigning names to each sample in the reads argument. By default the sample names are taken from the file names and appended to the barcodes in order to differentiate them. If your sample names are the same across multiple files, but matching barcodes between the samples should be counted seperately, provide them with different sample names using this argument. Similiarly if your samples have different names, but overlapping barcodes should be counted together, give them the same sample name with this argument.
+ +**dedupUMI**: A logical TRUE/FALSE.
+ +**barcodesToFilter**: A string vector indicating barcodes to be filtered out.
+ +```rscript +readClassFile <- bambu(reads = samples, annotations = annotations, genome = fa.file, ncore = 1, discovery = FALSE, quant = FALSE, demultiplexed = barcode_maps, verbose = TRUE, assignDist = FALSE, lowMemory = as.logical("$params.lowMemory"), yieldSize = 10000000, sampleNames = ids, cleanReads = as.logical($cleanReads), dedupUMI = as.logical($deduplicateUMIs)) +``` + +#### Transcript Discovery: + +Transript discovery can be run as usual as typically bulk-level discovery is suitable. However cluster-level transcript discovery can be preformed using the clusters argument which can be redone done after clustering. + +```rscript +extendedAnno <- bambu(reads = readClassFile, annotations = annotations, genome = fa.file, ncore = 1, discovery = TRUE, quant = FALSE, demultiplexed = TRUE, verbose = FALSE, assignDist = FALSE) +``` + +#### Read Class Assignment: + +This step was previously performed together with the quantification, but can be done seperately so that the arguments can be passed to the quantification seperately with different clustering. If you only want barcode level gene counts or unique transcript counts you can stop here and do not need to proceed to the EM quantification. + +**spatial**: This should be a path to your barcode whitelist that also contains the x and y coordinates as extra columns. If provided, the file should contain 3 columns with or without header, where the first column is the barcode, and the second and third column contains the x and y coordinates information accordingly. Compressed file format is accepted as well. + +```rscript +quantData <- bambu(reads = readClassFile, annotations = extendedAnno, genome = fa.file, ncore = 1, discovery = FALSE, quant = FALSE, demultiplexed = TRUE, verbose = FALSE, opt.em = list(degradationBias = FALSE), assignDist = TRUE, spatial = spatial) +``` + +#### EM quantification: + +If you plan to run this step with multiple processes we recommend restarting your R instance to ensure that environmental variables do not inflate the memory usage. + +**reads**: This argument is still mandatory but not needed when performing quantification alone as long as you provide the quantData argument
+ +**quantData**: This is the summerized experiement output from the Read Class Assignment step
+ +**clusters**: This is an optional argument which is either a path to a csv containing the barcode to cluster assignments or a CharacterList which can be produced using the code below.
+ +**opt.em = list(degradationBias=FALSE)**: We recommend including this argument if you are doing barcode level EM quantification to greatly improve runtime with only a small reduction in quantification accuracy. + +```rscript +#use Seurat to generate clusters from gene counts +library(Seurat) + +clusterCells <- function(counts, resolution = 0.8, dim = 15){ + + cellMix <- CreateSeuratObject(counts = counts, + project = "cellMix", min.cells = 1)#, min.features = 200) + #cellMix <- subset(cellMix, subset = nFeature_RNA > nFeature_RNA_threshold & nFeature_RNA < nFeature_RNA_threshold_max) + #nFeature_RNA_threshold <- 1000, nFeature_RNA_threshold_max = 9000, + cellMix <- NormalizeData(cellMix, normalization.method = "LogNormalize", scale.factor = 10000) + cellMix <- FindVariableFeatures(cellMix, selection.method = "vst", nfeatures = 2500) + all.genes <- rownames(cellMix) + cellMix <- ScaleData(cellMix, features = all.genes) + npcs <- ifelse(ncol(counts)>50, 50, ncol(counts)-1) + cellMix <- RunPCA(cellMix, features = VariableFeatures(object = cellMix), npcs = npcs) + dim <- ifelse(dim >= dim(cellMix@reductions$pca)[2], dim(cellMix@reductions$pca)[2],dim) # if data dimension is small, otherwise, cap dimension at 15 + cellMix <- FindNeighbors(cellMix, dims = 1:dim) + cellMix <- FindClusters(cellMix, resolution = resolution) + cellMix <- RunUMAP(cellMix, dims = 1:dim) + + return(cellMix) +} + +quantData.gene <- transcriptToGeneExpression(quantData) +counts <- assays(quantData.gene)$counts #selecting first sample +cellMix <- clusterCells(counts) #resolution can be customized. For larger clusters: 0.2-0.6, for higher resolution: 0.8-2 +x <- setNames(names(cellMix@active.ident), cellMix@active.ident) +clusters_temp <- splitAsList(unname(x), paste)("cluster",names(x)))#make clusters names start with cluster, for better comprehension + + + +se <- bambu( reads = NULL, + annotations = rowRanges(quantDatas), + genome = "$genome", + quantData = quantDatas, + assignDist = FALSE, + ncore = $params.ncore, + discovery = FALSE, + quant = TRUE, + demultiplexed = TRUE, + verbose = FALSE, + opt.em = list(degradationBias = FALSE), + clusters = clusters_temp) +``` + ### *Bambu* Arguments |argument|description| @@ -449,16 +584,39 @@ se <- bambu(reads = fusionAligned.bam, annotations = fusionAnnotations, genome = | ncore | specifying number of cores used when parallel processing is used, defaults to 1. | | NDR | specifying the maximum NDR rate to novel transcript output among detected transcripts, defaults to 0.1 | | yieldSize | see Rsamtools. | -| opt.discovery | A list of controlling parameters for isoform reconstruction process:
**prefix** specifying prefix for new gene Ids (genePrefix.number), defaults to empty
**remove.subsetTx** indicating whether filter to remove read classes which are a subset of known transcripts, defaults to TRUE
**min.readCount** specifying minimun read count to consider a read class valid in a sample, defaults to 2
**min.readFractionByGene** specifying minimum relative read count per gene, highly expressed genes will have many high read count low relative abundance transcripts that can be filtered, defaults to 0.05
**min.sampleNumber** specifying minimum sample number with minimum read count, gene read proportion, and TPS, defaults to 1
**min.exonDistance** specifying minimum distance to known transcript to be considered valid as new, defaults to 35bp
**min.exonOverlap** specifying minimum number of bases shared with annotation to be assigned to the same gene id, defaults to 10bp
**min.primarySecondaryDist** specifying the minimum number of distance threshold between a read class and the annotations internal exons. Read classes with distances less than the threshold are not annotated as novel and counted with the annotations for quantification, defaults to 5bp
**min.primarySecondaryDistStartEnd1** specifying the minimum number of distance threshold between a read class and the annotations start/end exons. Read classes with distances less than the threshold are not annotated as novel, defaults to 5bp
**min.primarySecondaryDistStartEnd2** specifying the minimum number of distance threshold between a read class and the annotations start/end exons. Read classes with distances less than the threshold are counted with the annotations, defaults to 5bp
**min.txScore.multiExon** specifying the minimum transcript probility score threshold for multi-exon transcripts for min.sampleNumber, defaults to 0
**min.txScore.singleExon** specifying the minimum transcript probability score threshold for single-exon transcripts for min.sampleNumber
**fitReadClassModel** a boolean specifying if bambu should train a model on each sample. If set to false bambu will use the default model for ranking novel transcripts. defaults to TRUE
**defaultModels** a bambu trained model object that bambu will use when fitReadClassModel==FALSE or the data is not suitable for training, defaults to the pretrained model in the *bambu* package
**returnModel** a boolean specifying if bambu will output the model it trained on the data, defaults to FALSE
**baselineFDR** a value between 0-1. Bambu uses this FDR on the trained model to recommend an equivilent NDR threshold to be used for the sample. By default, a baseline FDR of 0.1 is used. This does not impact the analysis if an NDR is set.
**min.readFractionByEqClass** indicating the minimum relative read count of a subset transcript compared to all superset transcripts (ie the relative read count within the minimum equivalent class). This filter is applied on the set of annotations across all samples using the total read count, this is not a per-sample filter. Please use with caution. defaults to 0 | -| opt.em | A list of controlling parameters for quantification algorithm estimation process:
**maxiter** specifying maximum number of run iterations, defaults to 10000
**degradationBias** correcting for degradation bias, defaults to TRUE
**conv** specifying the covergence threshold control, defaults to 0.0001
**minvalue** specifying the minvalue for convergence consideration, defaults to 0.00000001 | +| opt.discovery | A list of controlling parameters for isoform reconstruction process:
**prefix** specifying prefix for new gene Ids (genePrefix.number), defaults to empty
**remove.subsetTx** indicating whether filter to remove read classes which are a subset of known transcripts, defaults to TRUE
**min.readCount** specifying minimun read count to consider a read class valid in a sample, defaults to 2
**min.readFractionByGene** specifying minimum relative read count per gene, highly expressed genes will have many high read count low relative abundance transcripts that can be filtered, defaults to 0.05
**min.sampleNumber** specifying minimum sample number with minimum read count, gene read proportion, and TPS, defaults to 1
**min.exonDistance** specifying minimum distance to known transcript to be considered valid as new, defaults to 35bp
**min.exonOverlap** specifying minimum number of bases shared with annotation to be assigned to the same gene id, defaults to 10bp
**min.primarySecondaryDist** specifying the minimum number of distance threshold between a read class and the annotations internal exons. Read classes with distances less than the threshold are not annotated as novel and counted with the annotations for quantification, defaults to 5bp
**min.primarySecondaryDistStartEnd1** specifying the minimum number of distance threshold between a read class and the annotations start/end exons. Read classes with distances less than the threshold are not annotated as novel, defaults to 5bp
**min.primarySecondaryDistStartEnd2** specifying the minimum number of distance threshold between a read class and the annotations start/end exons. Read classes with distances less than the threshold are counted with the annotations, defaults to 5bp
**min.txScore.multiExon** specifying the minimum transcript probility score threshold for multi-exon transcripts for min.sampleNumber, defaults to 0
**min.txScore.singleExon** specifying the minimum transcript probability score threshold for single-exon transcripts for min.sampleNumber
**fitReadClassModel** a boolean specifying if bambu should train a model on each sample. If set to false bambu will use the default model for ranking novel transcripts. defaults to TRUE
**defaultModels** a bambu trained model object that bambu will use when fitReadClassModel==FALSE or the data is not suitable for training, defaults to the pretrained model in the *bambu* package
**returnModel** a boolean specifying if bambu will output the model it trained on the data, defaults to FALSE
**baselineFDR** a value between 0-1. Bambu uses this FDR on the trained model to recommend an equivilent NDR threshold to be used for the sample. By default, a baseline FDR of 0.1 is used. This does not impact the analysis if an NDR is set.
**min.readFractionByEqClass** indicating the minimum relative read count of a subset transcript compared to all superset transcripts (ie the relative read count within the minimum equivalent class). This filter is applied on the set of annotations across all samples using the total read count, this is not a per-sample filter. Please use with caution. defaults to 0 | +| opt.em | A list of controlling parameters for quantification algorithm estimation process:
**maxiter** specifying maximum number of run iterations, defaults to 10000
**degradationBias** correcting for degradation bias, defaults to TRUE
**conv** specifying the covergence threshold control, defaults to 0.0001
**minvalue** specifying the minvalue for convergence consideration, defaults to 0.00000001 | | trackReads | When TRUE read names will be tracked and output as metadata in the final output as readToTranscriptMaps detailing the assignment of reads to transcripts.The output is a list with an entry for each sample. | | returnDistTable | When TRUE the calculated distance table between read classes and annotations will be output as metadata as distTables. The output is a list with an entry for each sample. | | discovery | A logical variable indicating whether annotations are to be extended for quantification, defaults to TRUE. | | quant | A logical variable indicating whether quantification will be performed, defaults to TRUE. | | verbose | A logical variable indicating whether processing messages will be printed. | -| lowMemory | Reads will be processed by chromosomes instead of all together when lowMemory is specified. This option provides an efficient way to process big samples. | +| mode | A string that will set other input arguments ['bulk', 'multiplexed', 'fusion', 'debug']
bulk -
    processByBam = TRUE
    processByChromsome = FALSE
multiplexed -
    demultiplex = TRUE
    cleanReads = TRUE
    opt.em = list(degradationBias = FALSE)
    quant = FALSE
    processByChromosome = TRUE
fusion -
    NDR = 1
    fusionMode = TRUE
debug -
    verbose = TRUE
    trackReads = TRUE
    returnDistTable = TRUE | +| demultiplexed | A logical variable indicating whether the input bam file is demultiplexed. The barcode and umi either need to be present in the read name or the $BC and $UG tags, defaults to FALSE. Alternatively a path to a csv file can be provided where column 1 is read names, column 2 is barcodes, and column 3 is UMI. | +| spatial | A path to the barcode whitelist containing X and Y coordinates, defaults to null. If provided, the file should contain 3 columns with or without header, where the first column is the barcode, and the second and third column contains the x and y coordinates information accordingly. Compressed file format is accepted as well.| +| assignDist | A logical variable indicating whether read class to transcript assignment will be performed, defaults to TRUE. | +| quantData | Advanced use only. A list of se outputs from the assignDist step. Used only to run quantification | +| sampleNames | A vector of strings representing the sample name associated with each input bam. bam files with the same sample name will be combined | +| cleanReads | A logical variable indicating whether only the first sequenced alignment in a read should be kept. This helps to remove chimeric reads, but will remove alignments from fusion genes, defaults to FALSE. | +| dedupUMI | A logical variable indicating whether UMI deduplication is performed. The longest read per UMI will be used and the rest discarded, defaults to FALSE.| +| barcodesToFilter | A vector of strings indicating the barcodes to be filtered out in reads.| +| clusters | Either a list containing the barcodes for each cluster, or a path to a csv file containg the barcode to cluster mapping. When provided, clusters will be used during discovery and EM quant steps, defaults to null. | +| processByBam | A logical variable indicating if each input bam file will be processed seperately (TRUE) or all are read in and processed together (FALSE), defaults to TRUE | +| processByChromosome | A logical variable indicating if read classes will be constructed with all reads together (FALSE), or done by chromsome which uses less memory, but provides less information for the junction correction model (TRUE), defaults to FALSE | + +### setNDR() arguments + +|argument|description| +|---|---| +|extendedAnnotations| A GRangesList object produced from bambu(quant = FALSE) or rowRanges(se) or loaded in from prepareAnnotations() of a Bambu dervived .gtf | +|NDR| The maximum NDR for novel transcripts to be in extendedAnnotations (0-1). If not provided a recommended NDR is calculated. | +|includeRef| A boolean which if TRUE will also filter out reference annotations based on their NDR. Note that reference annotations with no NDR (because they were not detected) are not filtered and will remain potentially impacting quantificaiton. Use with caution. Defaults to FALSE. | +|prefix| A string which determines which transcript names are considered novel by bambu and will be filtered. Defaults to 'Bambu') | +|baselineFDR| a value between 0-1. Bambu uses this FDR on the trained model to recommend an equivilent NDR threshold to be used for the sample. By default, a baseline FDR of 0.1 is used. This does not impact the analysis if an NDR is set. Defaults to NULL| +|defaultModels| a bambu trained model object used to recommend an NDR threshold if no NDR is provided. Defaults to the pretrained model in the bambu package| ### Output Description + Access annotations that are matched to the transcript expression estimates by rowRanges() ```rscript rowRanges(se) @@ -466,7 +624,7 @@ rowRanges(se) |column|description| |---|---| |seqnames|The scaffold name the transcript is found on| -|ranges|An iRanges object containing the start and end coordinates of the transcript (not stranded)| +|ranges|An IRanges object containing the start and end coordinates of the transcript (not stranded)| |strand|The strand of the transcript (+, -, *)| |exon_rank|The exon index of the exons in the transcript starting from the 5’ end of the transcript| |exon_endRank|The exon index of the exons in the transcript starting from the 3’ end of the transcript| @@ -485,19 +643,79 @@ rowData(se) |---|---| |TXNAME|The transcript name for the transcript. Will use either the transcript name from the provided annotations or tx.X if it is a novel transcript where X is a unique integer.| |GENEID|The gene name for the transcript. Will use either the gene name from the provided annotations or gene.X if it is a novel transcript where X is a unique integer.| -|eqClass|A character vector with the transcript names of all the equivalent transcripts (those which have this transcripts contiguous exon junctions)| -|txId|A bambu specific transcript id used for indexing purposes -|eqClassById|A integer list with the transcript ids of all equivalent transcripts -|txClassDescription|A concatenated string containing the classes the transcript falls under:
**annotation** - Transcript matches an annotation transcript
**allNew** - All the intron-junctions are novel
**newFirstJunction** - the first junction is novel and at least one other junction matches an annotated transcript
**newLastJunction** - the last junction is novel and at least one other junction matches an annotated transcript
**newJunction** - an internal junction is novel and at least one other internal junction matches an annotated transcript
**newWithin** - A novel transcript with matching junctions but is not a subset of an annotation
**unsplicedNew** - A single exon transcript that doesn’t completely overlap with annotations
**compatible** - Is a subset of an annotated transcript
**newFirstExon** - The first exon is novel
**newLastExon** - The last exon is novel| -|readCount|The number of full length reads associated with this transcript (filtered by min.readCount)| |NDR|The NDR score calculated for the transcript| +|novelGene|A logical variable that is true if transcript model is from a novel gene (does not overlap with an annotated gene loci)| +|novelTranscript|A logical variable that is true if transcript model is novel (passing NDR threshold)| +|txClassDescription|A concatenated string containing the classes the transcript falls under:
**annotation** - Transcript matches an annotation transcript
**allNew** - All the intron-junctions are novel
**newFirstJunction** - the first junction is novel and at least one other junction matches an annotated transcript
**newLastJunction** - the last junction is novel and at least one other junction matches an annotated transcript
**newJunction** - an internal junction is novel and at least one other internal junction matches an annotated transcript
**newWithin** - A novel transcript with matching junctions but is not a subset of an annotation
**unsplicedNew** - A single exon transcript that doesn’t completely overlap with annotations
**compatible** - Is a subset of an annotated transcript
**newFirstExon** - The first exon is novel
**newLastExon** - The last exon is novel| +|readCount|The number of full length reads associated with this transcript (filtered by min.readCount)| |relReadCount|The proportion of reads this transcript has relative to all reads assigned to its gene| |relSubsetCount|The proportion of reads this transcript has relative to all reads that either fully or partially match this transcript| +|txId|A bambu specific transcript id used for indexing purposes +|eqClassById|A integer list with the transcript ids of all equivalent transcripts +|maxTxScore|The maximum model score across samples from the sample-trained model. Used internally by Bambu to calculate NDR scores| +|maxTxScore.noFit|The maximum model score across samples from the pretrained model. Used internally by Bambu to recommend NDR thresholds| + +```rscript +metadata(se)$incompatibleCounts +metadata(se)$warnings +``` +**IncompatibleCounts** - A table containing counts for incompatible reads that can be assigned to a gene but to none of the provided transcripts.
+**warnings** - A list containing the warnings produced by each sample
+ +```rscript +metadata(rowRanges(se))$NDRthreshold +metadata(rowRanges(se))$subsetTranscripts +metadata(rowRanges(se))$lowConfidenceTranscripts +metadata(rowRanges(se))$warnings +``` +**NDRthreshold** - The NDR threshold currently appled to the novel transcripts. A number between 0 and 1
+ +**subsetTranscripts** - A GrangesList containing subset transcripts when remove.subsetTx = TRUE. readCount and txScore can be accessed from mcols.
+ +**lowConfidenceTranscripts** - A GrangesList containing novel transcripts above the NDR threshold
+ +**warnings** - A list containing the warnings produced by each sample
- ### Release History +**bambu v3.9.0** + +Release date: 2025-xxx-xx + +- Subset transcripts and those above the NDR threshold are placed into the metadata of the annotations in $subsetTranscripts and $lowConfidenceTranscripts respectively (when filtered out by default). +- adds the setNDR function +- outputs the NDR, txScore and txScore.noFit as attributes to the gtf file and these are also read in with prepareAnnotations. +- Added setNDR as part of quant, which means that users can provide their extendedAnnotations alongside an NDR threshold when running bambu and it will automatically adjust the NDR used for quant. This means users do not need to manually filter the NDR value themselves. +- NDR and other stats are now copied over to equal transcripts even if above the NDR threshold (previously only happened for those below the NDR threshold) +- Read class to transcript assignment is now its own step instead of being done with quant. This is turned on and off with assignDist. +- Added demultiplexed argument +- Added spatial argument +- Added sampleNames argument +- Added cleanReads argument +- Added dedupUMI argument +- Added clusters argument +- Deprecated lowMemory - This has been replaced by processByChromosome +- Added processByChomosome (the old memory) +- Added processByBam argument +- Added importBambuResults() +- writeBambuOutput now outputs all information needed to import Bambu results from text files +- Count outputs are all now in sparse matrix format + +Minor changes: +- Warnings will no longer occur if there are seqlevels in the readGrgList that are not in the annotations or genome. This was done by setting seqlevels of the reads to only those in the reads. Warning was constantly occuring because all the scaffolds used in alignment were in the bam files, even if no reads from these scaffolds existed. + +**bambu v3.2.6** + +Release date: 2023-October-25 + +Minor changes: + +- Fix crash cause by de novo mode +- Restore fusion mode functionality and added documentation +- Fixed bug in plot function +- Update release history + **bambu v3.2.5** Release date: 2023-July-07 @@ -530,6 +748,12 @@ Minor changes: Release date: 2022-10-25 Major changes: +- Updated the input parameters of Bambu to simplify the user experience +- Introduced NDR threshold recommendation +- Implemented trainBambu(), allowing users to train and use models on their own data +- Reads that cannot be assigned to any transcript are grouped as incompatible counts +- Partial estimates are removed from output as it can be directly obtained based on total count estimates and full-length count estimates +- The fusion mode is now available, which assigns read classes that align to multiple genes to a new combined fusion gene - Updated the input parameters of Bambu to simplify the user experience - Introduced NDR threshold recommendation @@ -539,6 +763,9 @@ Major changes: - The fusion mode is now available, which assigns read classes that align to multiple genes to a new combined fusion gene Minor changes: +- Novel transcripts and genes are now by default output with a Bambu prefix +- Updated the documentation, messages and errors output by Bambu +- Annotated transcripts (with unique exon-junctions) with at least 1 full-length read are assigned a NDR rank - Novel transcripts and genes are now by default output with a Bambu prefix - Updated the documentation, messages and errors output by Bambu diff --git a/data-raw/DATASET.R b/data-raw/DATASET.R index 95aae23b..fc95dd12 100755 --- a/data-raw/DATASET.R +++ b/data-raw/DATASET.R @@ -23,7 +23,8 @@ data1 <- data.table( txlen = c(546,546,2356,2356), rcWidth = c(300,540,1800,2300), minRC = rep(1,4), - GENEID = 1 + gene_sid = 1, + multi_align = c(FALSE, FALSE, FALSE, FALSE) ) data2 <- data.table( @@ -35,7 +36,8 @@ data2 <- data.table( txlen = c(546,546,2356,2356, 546,2356), rcWidth = c(300,540,1800,2300, 200, 200), minRC = rep(1,6), - GENEID = 2 + gene_sid = 2, + multi_align = c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE) ) data3 <- data.table( @@ -47,7 +49,8 @@ data3 <- data.table( txlen = c(546,546,2356,2356,2356, 546,2356), rcWidth = c(540,540,540,1800,2300, 200, 200), minRC = c(NA,NA,1,1,1,NA,1), - GENEID = 3 + gene_sid = 3, + multi_align = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE) ) data4 <- data.table( @@ -59,7 +62,8 @@ data4 <- data.table( txlen = c(546,546,2356,2356,2356, 546,2356), rcWidth = c(540,540,540,1800,2300, 200, 200), minRC = c(NA,NA,1,1,1,NA,1), - GENEID = 4 + gene_sid = 4, + multi_align = c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE) ) data5 <- data.table( @@ -71,7 +75,8 @@ data5 <- data.table( txlen = c(546,546,2356,2356, 546,2356), rcWidth = c(1700,2200,1800,2300, 2000, 2000), minRC = rep(1,6), - GENEID = 5 + gene_sid = 5, + multi_align = c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE) ) @@ -102,24 +107,17 @@ seCombinedGeneExpected <- transcriptToGeneExpression(seCombined) seCombinedExtendedGeneExpected <- transcriptToGeneExpression(seCombinedExtended) -## prior models to use for scoreReadClass() -#se = readRDS("SGNex_HepG2_directRNA_replicate5_run1_genome.rds") -#defaultModels = trainBambu(se) -xgb.save(defaultModels$transcriptModelME, "./inst/extdata/read_class_ME.model") -xgb.save(defaultModels$transcriptModelSE, "./inst/extdata/read_class_SE.model") -defaultModels$transcriptModelME = NULL -defaultModels$transcriptModelSE = NULL -#saveRDS(defaultModels, "./inst/extdata/defaultModels.rds") -defaultModels = readRDS(system.file("extdata", "defaultModels.rds", - package = "bambu")) +## prior models to use for scoreReadClass() and junctions() +##to train new ones see update_xgboost_models.R +defaultModels = readRDS("./inst/extdata/defaultModels.rds") defaultModels$transcriptModelME = xgb.load("./inst/extdata/read_class_ME.model") -defaultModels$transcriptModelSE = xgb.load("./inst/extdata/read_class_SE.model") +defaultModels$transcriptModelSE = xgb.load("./inst/extdata/read_class_SE.model") -# How to get pre trained junction model standardJunctionModels_temp -# added "saveRDS(junctionModel, "./inst/extdata/standardJunctionModels_temp.txt")" to junctionErrorCorrection -# ran Bambu with GNex_HepG2_directRNA_replicate5_run1_genome -standardJunctionModels_temp = readRDS(system.file( - "extdata", "standardJunctionModels_temp.txt", package = "bambu")) +standardJunctionModels_temp = list() +standardJunctionModels_temp$spliceSitePredictionStart.start = xgb.load("./inst/extdata/spliceSitePredictionStart.start.model") +standardJunctionModels_temp$spliceSitePredictionStart.end = xgb.load("./inst/extdata/spliceSitePredictionStart.end.model") +standardJunctionModels_temp$spliceSitePredictionEnd.start = xgb.load("./inst/extdata/spliceSitePredictionEnd.start.model") +standardJunctionModels_temp$spliceSitePredictionEnd.end = xgb.load("./inst/extdata/spliceSitePredictionEnd.end.model") usethis::use_data(data1, data2, data3, data4, data5, estOutput_woBC, @@ -231,9 +229,9 @@ saveRDS(readGrgList, file = "./inst/extdata/readGrgList_SGNex_A549_directRNA_rep annotations <- readRDS(system.file("extdata", "annotationGranges_txdbGrch38_91_chr9_1_1000000.rds", package = "bambu")) genomeSequence <- system.file("extdata", "Homo_sapiens.GRCh38.dna_sm.primary_assembly_chr9_1_1000000.fa", package = "bambu") -se <- bambu(reads = test.bam, annotations = annotations, genome = genomeSequence, discovery = FALSE, quant = FALSE)[[1]] +se <- bambu(reads = test.bam, annotations = annotations, genome = genomeSequence, discovery = FALSE, assignDist = FALSE, quant = FALSE)[[1]] saveRDS(se, file = "./inst/extdata/seReadClassUnstranded_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds", compress = "xz") -se <- bambu(reads = test.bam, annotations = annotations, genome = genomeSequence, stranded = TRUE, discovery = FALSE, quant = FALSE)[[1]] +se <- bambu(reads = test.bam, annotations = annotations, genome = genomeSequence, stranded = TRUE, assignDist = FALSE, discovery = FALSE, quant = FALSE)[[1]] saveRDS(se, file = "./inst/extdata/seReadClassStranded_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds", compress = "xz") se <- bambu(reads = test.bam, annotations = annotations, genome = genomeSequence) @@ -300,12 +298,12 @@ gr <- readRDS(system.file("extdata", "annotationGranges_txdbGrch38_91_chr9_1_100 extendedAnnotations <- isore.extendAnnotations(combinedTranscripts=seIsoReCombined, annotationGrangesList=gr, - remove.subsetTx = TRUE, min.sampleNumber = 1, NDR = 0.1, + remove.subsetTx = TRUE, min.sampleNumber = 1, NDR = 0.7, min.exonDistance = 35, min.exonOverlap = 10, min.primarySecondaryDist = 5, min.primarySecondaryDistStartEnd = 5, - prefix='', verbose=FALSE, defaultModels = defaultModels) + prefix='Bambu', verbose=FALSE, defaultModels = defaultModels) saveRDS(extendedAnnotations, file = "./inst/extdata/extendedAnnotationGranges_txdbGrch38_91_chr9_1_1000000.rds", compress = "xz") - +writeToGTF(extendedAnnotations, "./inst/extdata/extendedAnnotationGranges_txdbGrch38_91_chr9_1_1000000.gtf") ## expected output for test isore seReadClass1 <- readRDS(system.file("extdata", "seReadClassUnstranded_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds", package = "bambu")) diff --git a/data-raw/update_xgboost_models.R b/data-raw/update_xgboost_models.R new file mode 100644 index 00000000..fa4c5851 --- /dev/null +++ b/data-raw/update_xgboost_models.R @@ -0,0 +1,36 @@ +#download the bam file and make sure you set your own paths for the reference annotations +#change to bambu directory so that outputs are correct + +#aws s3 cp --no-sign-request s3://sg-nex-data/data/sequencing_data_ont/bam/genome/SGNex_HepG2_directRNA_replicate5_run1/SGNex_HepG2_directRNA_replicate5_run1.bam + + +devtools::load_all("bambu") +annotations = readRDS("Homo_sapiens.GRCh38.91.sorted.rds") +fa.file = "hg38_sequins_SIRV_ERCCs_longSIRVs.fa" +sample = "SGNex_HepG2_directRNA_replicate5_run1.bam" + +#Get transcript discovery model +rcf <- bambu(reads = sample, annotations = annotations, genome = fa.file, discovery = FALSE, assignDist = FALSE, quant = FALSE, verbose = TRUE) + +defaultModels = trainBambu(rcf[[1]]) +xgb.save(defaultModels$transcriptModelME, "./inst/extdata/read_class_ME.model") +xgb.save(defaultModels$transcriptModelSE, "./inst/extdata/read_class_SE.model") +defaultModels$transcriptModelME = NULL +defaultModels$transcriptModelSE = NULL +saveRDS(defaultModels, "./inst/extdata/defaultModels.rds") + + +#Get junction model +readGrgList = prepareDataFromBam(sample, verbose = TRUE) +genomeSequence <- checkInputSequence(fa.file) +mcols(readGrgList)$id <- seq_along(readGrgList) +unlisted_junctions <- unlistIntrons(readGrgList, use.ids = TRUE) +uniqueJunctions <- isore.constructJunctionTables(unlisted_junctions, + annotations,genomeSequence, + stranded = FALSE, verbose = TRUE, + returnModel = TRUE) +junctionModel = metadata(uniqueJunctions)$junctionModel +xgb.save(junctionModel$spliceSitePredictionStart.start, "./inst/extdata/spliceSitePredictionStart.start.model") +xgb.save(junctionModel$spliceSitePredictionStart.end, "./inst/extdata/spliceSitePredictionStart.end.model") +xgb.save(junctionModel$spliceSitePredictionEnd.start, "./inst/extdata/spliceSitePredictionEnd.start.model") +xgb.save(junctionModel$spliceSitePredictionEnd.end, "./inst/extdata/spliceSitePredictionEnd.end.model") \ No newline at end of file diff --git a/inst/extdata/defaultModels.rds b/inst/extdata/defaultModels.rds index 1ff6e8ed..052e1034 100644 Binary files a/inst/extdata/defaultModels.rds and b/inst/extdata/defaultModels.rds differ diff --git a/inst/extdata/extendedAnnotationGranges_txdbGrch38_91_chr9_1_1000000.gtf b/inst/extdata/extendedAnnotationGranges_txdbGrch38_91_chr9_1_1000000.gtf new file mode 100644 index 00000000..a58dfa8b --- /dev/null +++ b/inst/extdata/extendedAnnotationGranges_txdbGrch38_91_chr9_1_1000000.gtf @@ -0,0 +1,947 @@ +9 Bambu transcript 12134 13783 . + . gene_id "ENSG00000236875"; transcript_id "ENST00000421620"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 12134 12190 . + . gene_id "ENSG00000236875"; transcript_id "ENST00000421620"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 12291 12340 . + . gene_id "ENSG00000236875"; transcript_id "ENST00000421620"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 12726 12834 . + . gene_id "ENSG00000236875"; transcript_id "ENST00000421620"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 13088 13157 . + . gene_id "ENSG00000236875"; transcript_id "ENST00000421620"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 13338 13487 . + . gene_id "ENSG00000236875"; transcript_id "ENST00000421620"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 13566 13783 . + . gene_id "ENSG00000236875"; transcript_id "ENST00000421620"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 14513 19197 . - . gene_id "ENSG00000181404"; transcript_id "BambuTx1"; NDR "0.636065870523453"; txScore ""; txScore.noFit ""; +9 Bambu exon 14513 14940 . - . gene_id "ENSG00000181404"; transcript_id "BambuTx1"; exon_number "10"; NDR "0.636065870523453"; maxTxScore "0.363934129476547"; maxTxScore.noFit "0.363934129476547"; +9 Bambu transcript 14521 29739 . - . gene_id "ENSG00000181404"; transcript_id "ENST00000442898"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 14521 14940 . - . gene_id "ENSG00000181404"; transcript_id "ENST00000442898"; exon_number "11"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 15081 15149 . - . gene_id "ENSG00000181404"; transcript_id "BambuTx1"; exon_number "9"; NDR "0.636065870523453"; maxTxScore "0.363934129476547"; maxTxScore.noFit "0.363934129476547"; +9 Bambu exon 15081 15149 . - . gene_id "ENSG00000181404"; transcript_id "ENST00000442898"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 15909 16421 . - . gene_id "ENSG00000181404"; transcript_id "BambuTx1"; exon_number "8"; NDR "0.636065870523453"; maxTxScore "0.363934129476547"; maxTxScore.noFit "0.363934129476547"; +9 Bambu exon 15909 16061 . - . gene_id "ENSG00000181404"; transcript_id "ENST00000442898"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 16718 16876 . - . gene_id "ENSG00000181404"; transcript_id "BambuTx1"; exon_number "7"; NDR "0.636065870523453"; maxTxScore "0.363934129476547"; maxTxScore.noFit "0.363934129476547"; +9 Bambu exon 16718 16876 . - . gene_id "ENSG00000181404"; transcript_id "ENST00000442898"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 16965 17166 . - . gene_id "ENSG00000181404"; transcript_id "ENST00000442898"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 16969 17166 . - . gene_id "ENSG00000181404"; transcript_id "BambuTx1"; exon_number "6"; NDR "0.636065870523453"; maxTxScore "0.363934129476547"; maxTxScore.noFit "0.363934129476547"; +9 Bambu exon 17344 17479 . - . gene_id "ENSG00000181404"; transcript_id "BambuTx1"; exon_number "5"; NDR "0.636065870523453"; maxTxScore "0.363934129476547"; maxTxScore.noFit "0.363934129476547"; +9 Bambu exon 17344 17479 . - . gene_id "ENSG00000181404"; transcript_id "ENST00000442898"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 17719 17855 . - . gene_id "ENSG00000181404"; transcript_id "BambuTx1"; exon_number "4"; NDR "0.636065870523453"; maxTxScore "0.363934129476547"; maxTxScore.noFit "0.363934129476547"; +9 Bambu exon 17719 17855 . - . gene_id "ENSG00000181404"; transcript_id "ENST00000442898"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 18028 18174 . - . gene_id "ENSG00000181404"; transcript_id "BambuTx1"; exon_number "3"; NDR "0.636065870523453"; maxTxScore "0.363934129476547"; maxTxScore.noFit "0.363934129476547"; +9 Bambu exon 18028 18174 . - . gene_id "ENSG00000181404"; transcript_id "ENST00000442898"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 18381 18482 . - . gene_id "ENSG00000181404"; transcript_id "BambuTx1"; exon_number "2"; NDR "0.636065870523453"; maxTxScore "0.363934129476547"; maxTxScore.noFit "0.363934129476547"; +9 Bambu exon 18381 18492 . - . gene_id "ENSG00000181404"; transcript_id "ENST00000442898"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 19026 19197 . - . gene_id "ENSG00000181404"; transcript_id "BambuTx1"; exon_number "1"; NDR "0.636065870523453"; maxTxScore "0.363934129476547"; maxTxScore.noFit "0.363934129476547"; +9 Bambu exon 24851 25004 . - . gene_id "ENSG00000181404"; transcript_id "ENST00000442898"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 27657 30891 . + . gene_id "ENSG00000227518"; transcript_id "ENST00000422679"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 27657 30445 . + . gene_id "ENSG00000227518"; transcript_id "ENST00000422679"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 29602 29739 . - . gene_id "ENSG00000181404"; transcript_id "ENST00000442898"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 30144 30281 . + . gene_id "ENSG00000283921"; transcript_id "ENST00000408365"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 30144 30281 . + . gene_id "ENSG00000283921"; transcript_id "ENST00000408365"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 30758 30891 . + . gene_id "ENSG00000227518"; transcript_id "ENST00000422679"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 34394 35860 . - . gene_id "ENSG00000218839"; transcript_id "ENST00000449442"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 34394 34957 . - . gene_id "ENSG00000218839"; transcript_id "ENST00000449442"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 34965 35871 . - . gene_id "ENSG00000218839"; transcript_id "ENST00000305248"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 34965 35264 . - . gene_id "ENSG00000218839"; transcript_id "ENST00000305248"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 35060 35264 . - . gene_id "ENSG00000218839"; transcript_id "ENST00000449442"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 35504 35871 . - . gene_id "ENSG00000218839"; transcript_id "ENST00000305248"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 35504 35860 . - . gene_id "ENSG00000218839"; transcript_id "ENST00000449442"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 72674 87667 . + . gene_id "ENSG00000277631"; transcript_id "ENST00000621255"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 72674 72816 . + . gene_id "ENSG00000277631"; transcript_id "ENST00000621255"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 72675 75293 . + . gene_id "ENSG00000277631"; transcript_id "ENST00000620326"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 72675 72816 . + . gene_id "ENSG00000277631"; transcript_id "ENST00000620326"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 72691 88826 . + . gene_id "ENSG00000277631"; transcript_id "ENST00000429980"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 72691 72816 . + . gene_id "ENSG00000277631"; transcript_id "ENST00000429980"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 72705 75327 . + . gene_id "ENSG00000277631"; transcript_id "ENST00000442069"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 72705 72816 . + . gene_id "ENSG00000277631"; transcript_id "ENST00000442069"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 72727 87739 . + . gene_id "ENSG00000277631"; transcript_id "ENST00000613372"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 72727 72816 . + . gene_id "ENSG00000277631"; transcript_id "ENST00000613372"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 74139 87590 . + . gene_id "ENSG00000277631"; transcript_id "ENST00000622412"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 74139 74205 . + . gene_id "ENSG00000277631"; transcript_id "ENST00000622412"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 74149 74205 . + . gene_id "ENSG00000277631"; transcript_id "ENST00000442069"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 74882 75327 . + . gene_id "ENSG00000277631"; transcript_id "ENST00000442069"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 74882 75293 . + . gene_id "ENSG00000277631"; transcript_id "ENST00000620326"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 79345 79537 . + . gene_id "ENSG00000277631"; transcript_id "ENST00000429980"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 79345 79537 . + . gene_id "ENSG00000277631"; transcript_id "ENST00000613372"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 79345 79537 . + . gene_id "ENSG00000277631"; transcript_id "ENST00000621255"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 79345 79537 . + . gene_id "ENSG00000277631"; transcript_id "ENST00000622412"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 80651 80807 . + . gene_id "ENSG00000277631"; transcript_id "ENST00000621255"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 87284 87739 . + . gene_id "ENSG00000277631"; transcript_id "ENST00000613372"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 87284 87667 . + . gene_id "ENSG00000277631"; transcript_id "ENST00000621255"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 87284 87590 . + . gene_id "ENSG00000277631"; transcript_id "ENST00000622412"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 87661 88775 . + . gene_id "ENSG00000277631"; transcript_id "ENST00000614900"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 87661 88195 . + . gene_id "ENSG00000277631"; transcript_id "ENST00000614900"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 88742 88826 . + . gene_id "ENSG00000277631"; transcript_id "ENST00000429980"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 88742 88775 . + . gene_id "ENSG00000277631"; transcript_id "ENST00000614900"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 100804 102850 . - . gene_id "ENSG00000227917"; transcript_id "ENST00000435421"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 100804 101304 . - . gene_id "ENSG00000227917"; transcript_id "ENST00000435421"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 100994 102941 . - . gene_id "ENSG00000227917"; transcript_id "ENST00000427318"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 100994 101304 . - . gene_id "ENSG00000227917"; transcript_id "ENST00000427318"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 102473 102541 . - . gene_id "ENSG00000227917"; transcript_id "ENST00000427318"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 102740 102941 . - . gene_id "ENSG00000227917"; transcript_id "ENST00000427318"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 102740 102850 . - . gene_id "ENSG00000227917"; transcript_id "ENST00000435421"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 112713 113754 . - . gene_id "ENSG00000231808"; transcript_id "ENST00000416242"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 112713 113067 . - . gene_id "ENSG00000231808"; transcript_id "ENST00000416242"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 113694 113754 . - . gene_id "ENSG00000231808"; transcript_id "ENST00000416242"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 116231 118204 . - . gene_id "ENSG00000170122"; transcript_id "ENST00000382500"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 116231 118204 . - . gene_id "ENSG00000170122"; transcript_id "ENST00000382500"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 121038 179058 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000356521"; NDR "0.191389977931976"; txScore ""; txScore.noFit ""; +9 Bambu exon 121038 121573 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000356521"; exon_number "15"; NDR "0.191389977931976"; maxTxScore "0.808610022068024"; maxTxScore.noFit "0.808610022068024"; +9 Bambu transcript 121041 179147 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000377400"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 121041 121573 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000377400"; exon_number "15"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 121052 146140 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000475990"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 121052 123282 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000475990"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 121058 164009 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000619157"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 121058 121573 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000619157"; exon_number "12"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 121060 179047 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000314367"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 121060 121573 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000314367"; exon_number "16"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 121089 149446 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000475411"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu transcript 121089 179012 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000612045"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 121089 122090 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000475411"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 121089 121573 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000612045"; exon_number "16"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 121091 148625 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000611457"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 121091 121573 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000611457"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 121224 179042 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000465014"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 121224 121573 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000465014"; exon_number "15"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 121407 179008 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613508"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 121407 121573 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613508"; exon_number "13"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 121409 179021 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000382447"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 121409 121573 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000382447"; exon_number "14"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 121410 166599 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000487575"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 121410 121573 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000487575"; exon_number "15"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 121411 123747 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000464198"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 121411 121573 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000464198"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 121418 167605 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000495302"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 121418 121573 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000495302"; exon_number "13"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 121503 146154 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000462513"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 121503 121573 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000462513"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 121961 122090 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000314367"; exon_number "15"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 121961 122090 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000356521"; exon_number "14"; NDR "0.191389977931976"; maxTxScore "0.808610022068024"; maxTxScore.noFit "0.808610022068024"; +9 Bambu exon 121961 122090 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000377400"; exon_number "14"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 121961 122090 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000382447"; exon_number "13"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 121961 122136 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000462513"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 121961 122090 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000464198"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 121961 122090 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000465014"; exon_number "14"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 121961 122090 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000487575"; exon_number "14"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 121961 122090 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000495302"; exon_number "12"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 121961 122090 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000611457"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 121961 122090 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000612045"; exon_number "15"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 121961 122090 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613508"; exon_number "12"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 121961 122090 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000619157"; exon_number "11"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 122691 179052 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000616944"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 122691 123282 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000616944"; exon_number "14"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 122843 179016 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613355"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 122843 123282 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613355"; exon_number "13"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123217 123282 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000314367"; exon_number "14"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123217 123282 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000356521"; exon_number "13"; NDR "0.191389977931976"; maxTxScore "0.808610022068024"; maxTxScore.noFit "0.808610022068024"; +9 Bambu exon 123217 123282 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000377400"; exon_number "13"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123217 123282 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000382447"; exon_number "12"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123217 123282 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000462513"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123217 123282 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000464198"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123217 123282 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000465014"; exon_number "13"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123217 123282 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000475411"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123217 123282 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000487575"; exon_number "13"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123217 123282 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000495302"; exon_number "11"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123217 123282 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000611457"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123217 123282 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000612045"; exon_number "14"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123217 123282 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613508"; exon_number "11"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123217 123282 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000619157"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123386 123454 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000314367"; exon_number "13"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123386 123454 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000356521"; exon_number "12"; NDR "0.191389977931976"; maxTxScore "0.808610022068024"; maxTxScore.noFit "0.808610022068024"; +9 Bambu exon 123386 123454 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000377400"; exon_number "12"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123386 123454 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000382447"; exon_number "11"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123386 123454 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000462513"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123386 123747 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000464198"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123386 123454 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000465014"; exon_number "12"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123386 123454 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000475411"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123386 123454 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000475990"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123386 123454 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000487575"; exon_number "12"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123386 123454 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000495302"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123386 123454 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000611457"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123386 123454 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000612045"; exon_number "13"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123386 123454 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613355"; exon_number "12"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123386 123454 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613508"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123386 123454 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000616944"; exon_number "13"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 123386 123454 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000619157"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 134979 135030 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000314367"; exon_number "12"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 134979 135030 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000356521"; exon_number "11"; NDR "0.191389977931976"; maxTxScore "0.808610022068024"; maxTxScore.noFit "0.808610022068024"; +9 Bambu exon 134979 135030 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000377400"; exon_number "11"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 134979 135030 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000382447"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 134979 135030 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000462513"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 134979 135030 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000465014"; exon_number "11"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 134979 135030 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000475411"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 134979 135030 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000475990"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 134979 135030 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000487575"; exon_number "11"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 134979 135030 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000495302"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 134979 135030 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000611457"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 134979 135030 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000612045"; exon_number "12"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 134979 135030 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613355"; exon_number "11"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 134979 135030 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613508"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 134979 135030 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000616944"; exon_number "12"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 134979 135030 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000619157"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 146102 146158 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000314367"; exon_number "11"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 146102 146158 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000356521"; exon_number "10"; NDR "0.191389977931976"; maxTxScore "0.808610022068024"; maxTxScore.noFit "0.808610022068024"; +9 Bambu exon 146102 146158 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000377400"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 146102 146154 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000462513"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 146102 146158 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000465014"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 146102 149446 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000475411"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 146102 146140 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000475990"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 146102 146158 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000487575"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 146102 146158 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000495302"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 146102 148625 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000611457"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 146102 146158 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000612045"; exon_number "11"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 146102 146158 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000616944"; exon_number "11"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 146102 146158 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000619157"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 146114 172172 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000498044"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 146114 146158 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000498044"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 147901 148040 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000465014"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 147950 164039 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000618361"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 147950 148044 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000618361"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 150132 164039 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000618061"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 150132 152078 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000618061"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 150509 150725 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000487575"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 150642 150725 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000498044"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 151305 151427 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000495302"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 151305 151427 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000612045"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 151305 151427 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000618361"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 151577 179088 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613988"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 151577 152078 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613988"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 151745 161647 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000489272"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 151745 152078 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000489272"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 152034 173325 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000616803"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 152034 152078 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000314367"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 152034 152078 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000356521"; exon_number "9"; NDR "0.191389977931976"; maxTxScore "0.808610022068024"; maxTxScore.noFit "0.808610022068024"; +9 Bambu exon 152034 152078 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000377400"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 152034 152078 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000382447"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 152034 152078 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000465014"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 152034 152078 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000487575"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 152034 152078 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000495302"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 152034 152078 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000498044"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 152034 152078 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000612045"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 152034 152078 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613355"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 152034 152078 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613508"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 152034 152078 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000616803"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 152034 152078 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000616944"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 152034 152078 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000618361"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 152034 152078 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000619157"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 152914 179025 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000377447"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 152914 154795 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000377447"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 154709 154795 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000314367"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 154709 154795 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000356521"; exon_number "8"; NDR "0.191389977931976"; maxTxScore "0.808610022068024"; maxTxScore.noFit "0.808610022068024"; +9 Bambu exon 154709 154795 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000377400"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 154709 154795 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000382447"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 154709 154795 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000487575"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 154709 154795 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000489272"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 154709 154795 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000495302"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 154709 154795 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000498044"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 154709 154795 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613355"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 154709 154795 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613988"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 154709 154795 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000616803"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 154709 154795 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000616944"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 154709 154795 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000618361"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 154709 154795 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000619157"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 154732 172179 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000620292"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 154732 154795 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000620292"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 156481 156527 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000314367"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 156481 156527 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000356521"; exon_number "7"; NDR "0.191389977931976"; maxTxScore "0.808610022068024"; maxTxScore.noFit "0.808610022068024"; +9 Bambu exon 156481 156527 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000377400"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 156481 156527 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000377447"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 156481 156527 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000382447"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 156481 156527 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000465014"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 156481 156527 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000487575"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 156481 156527 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000489272"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 156481 156527 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000495302"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 156481 156527 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000498044"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 156481 156527 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000612045"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 156481 156527 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613355"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 156481 156527 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613508"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 156481 156527 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613988"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 156481 156527 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000616803"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 156481 156527 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000616944"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 156481 156527 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000618061"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 156481 156527 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000618361"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 156481 156527 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000619157"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 156481 156527 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000620292"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 160001 160140 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000487575"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 160001 160140 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000498044"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 160001 160140 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000612045"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 160001 160140 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000620292"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 160088 179018 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000431099"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 160088 160140 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000431099"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 161567 161654 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000465014"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 161567 161654 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000487575"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 161567 161647 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000489272"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 161567 161654 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000612045"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 161567 161654 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000616803"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 161567 161654 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000616944"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 161567 161654 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000618061"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 161567 161654 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000618361"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 161567 161654 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000619157"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 162432 162469 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000314367"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 162432 162469 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000356521"; exon_number "6"; NDR "0.191389977931976"; maxTxScore "0.808610022068024"; maxTxScore.noFit "0.808610022068024"; +9 Bambu exon 162432 162469 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000377400"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 162432 162469 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000377447"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 162432 162469 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000382447"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 162432 162469 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000431099"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 162432 162469 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000465014"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 162432 162469 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000487575"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 162432 162469 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000495302"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 162432 162469 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000498044"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 162432 162469 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000612045"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 162432 162469 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613355"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 162432 162469 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613508"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 162432 162469 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613988"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 162432 162469 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000616803"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 162432 162469 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000616944"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 162432 162469 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000618061"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 162432 162469 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000618361"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 162432 162469 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000619157"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 162432 162469 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000620292"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 163978 164037 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000314367"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 163978 164037 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000356521"; exon_number "5"; NDR "0.191389977931976"; maxTxScore "0.808610022068024"; maxTxScore.noFit "0.808610022068024"; +9 Bambu exon 163978 164037 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000377400"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 163978 164037 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000377447"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 163978 164037 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000382447"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 163978 164037 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000431099"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 163978 164037 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000487575"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 163978 164037 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000495302"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 163978 164037 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000498044"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 163978 164037 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613355"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 163978 164037 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613508"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 163978 164037 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613988"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 163978 164037 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000616944"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 163978 164039 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000618061"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 163978 164039 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000618361"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 163978 164009 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000619157"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 163978 164037 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000620292"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 164403 164448 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000616803"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 165983 166599 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000487575"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 165983 167605 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000495302"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 171596 173371 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000483817"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 171596 172172 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000483817"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 171953 179023 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000382389"; NDR "0.521544337272644"; txScore ""; txScore.noFit ""; +9 Bambu transcript 171953 179045 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000382393"; NDR "0.330390512943268"; txScore ""; txScore.noFit ""; +9 Bambu exon 171953 173366 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000382389"; exon_number "4"; NDR "0.521544337272644"; maxTxScore "0.478455662727356"; maxTxScore.noFit "0.478455662727356"; +9 Bambu exon 171953 173366 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000382393"; exon_number "3"; NDR "0.330390512943268"; maxTxScore "0.669609487056732"; maxTxScore.noFit "0.669609487056732"; +9 Bambu exon 172081 172172 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000314367"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 172081 172172 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000356521"; exon_number "4"; NDR "0.191389977931976"; maxTxScore "0.808610022068024"; maxTxScore.noFit "0.808610022068024"; +9 Bambu exon 172081 172172 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000377400"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 172081 172172 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000377447"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 172081 172172 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000382447"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 172081 172172 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000431099"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 172081 172172 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000465014"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 172081 172172 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000498044"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 172081 172172 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000612045"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 172081 172172 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613355"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 172081 172172 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613508"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 172081 172172 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613988"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 172081 172172 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000616803"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 172081 172172 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000616944"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 172081 172179 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000620292"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 173270 173366 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000314367"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 173270 173366 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000356521"; exon_number "3"; NDR "0.191389977931976"; maxTxScore "0.808610022068024"; maxTxScore.noFit "0.808610022068024"; +9 Bambu exon 173270 173366 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000377400"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 173270 173366 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000377447"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 173270 173366 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000382447"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 173270 173366 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000431099"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 173270 173366 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000465014"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 173270 173371 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000483817"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 173270 173366 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000612045"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 173270 173366 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613355"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 173270 173366 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613508"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 173270 173366 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613988"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 173270 173325 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000616803"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 173270 173366 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000616944"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 175698 175784 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000314367"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 175698 175784 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000356521"; exon_number "2"; NDR "0.191389977931976"; maxTxScore "0.808610022068024"; maxTxScore.noFit "0.808610022068024"; +9 Bambu exon 175698 175784 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000377400"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 175698 175784 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000377447"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 175698 175784 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000382389"; exon_number "3"; NDR "0.521544337272644"; maxTxScore "0.478455662727356"; maxTxScore.noFit "0.478455662727356"; +9 Bambu exon 175698 175784 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000382393"; exon_number "2"; NDR "0.330390512943268"; maxTxScore "0.669609487056732"; maxTxScore.noFit "0.669609487056732"; +9 Bambu exon 175698 175784 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000382447"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 175698 175784 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000431099"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 175698 175784 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000465014"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 175698 175784 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000612045"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 175698 175784 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613355"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 175698 175784 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613508"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 175698 175784 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613988"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 175698 175784 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000616944"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 177723 177820 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000314367"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 177723 177820 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000382389"; exon_number "2"; NDR "0.521544337272644"; maxTxScore "0.478455662727356"; maxTxScore.noFit "0.478455662727356"; +9 Bambu exon 177723 177820 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000431099"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 177723 177820 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613355"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 177723 177820 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613988"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 178816 179047 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000314367"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 178816 179058 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000356521"; exon_number "1"; NDR "0.191389977931976"; maxTxScore "0.808610022068024"; maxTxScore.noFit "0.808610022068024"; +9 Bambu exon 178816 179147 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000377400"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 178816 179025 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000377447"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 178816 179023 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000382389"; exon_number "1"; NDR "0.521544337272644"; maxTxScore "0.478455662727356"; maxTxScore.noFit "0.478455662727356"; +9 Bambu exon 178816 179045 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000382393"; exon_number "1"; NDR "0.330390512943268"; maxTxScore "0.669609487056732"; maxTxScore.noFit "0.669609487056732"; +9 Bambu exon 178816 179021 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000382447"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 178816 179018 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000431099"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 178816 179042 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000465014"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 178816 179012 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000612045"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 178816 179016 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613355"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 178816 179008 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613508"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 178816 179088 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000613988"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 178816 179052 . - . gene_id "ENSG00000172785"; transcript_id "ENST00000616944"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 212824 215741 . - . gene_id "ENSG00000183784"; transcript_id "ENST00000382387"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 212824 215741 . - . gene_id "ENSG00000183784"; transcript_id "ENST00000382387"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 214854 286636 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469197"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 214854 215029 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469197"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 214865 465259 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu transcript 214865 340620 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000524396"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 214865 215029 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 214865 215029 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000524396"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 214868 340620 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000454469"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 214868 215029 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000454469"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 215171 289579 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000479404"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 215171 215416 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000479404"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 220748 220863 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000524396"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 267966 273002 . - . gene_id "ENSG00000235880"; transcript_id "ENST00000429661"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 267966 268273 . - . gene_id "ENSG00000235880"; transcript_id "ENST00000429661"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 271627 271729 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 271627 271729 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000454469"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 271627 271729 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469197"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 271627 271729 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000479404"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 271627 271729 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000524396"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 272905 273002 . - . gene_id "ENSG00000235880"; transcript_id "ENST00000429661"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 273048 465259 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 273048 273100 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 273050 340620 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382341"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu transcript 273050 304703 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000487230"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 273050 273100 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382341"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 273050 273100 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000487230"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 273059 381387 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000483757"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 273059 273100 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000483757"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 273066 312941 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000478380"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 273066 273100 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000478380"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 273070 464526 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 273070 273100 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 273084 465241 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 273084 273100 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 276994 277086 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469197"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 280791 280905 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000487230"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 284375 284567 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469197"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 286461 286636 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382341"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 286461 286636 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 286461 286636 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 286461 286636 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000454469"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 286461 286636 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469197"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 286461 286636 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 286461 286636 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000478380"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 286461 286636 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000479404"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 286461 286636 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000483757"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 286461 286636 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000487230"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 286461 286636 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 286461 286636 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000524396"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 289510 289581 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382341"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 289510 289581 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 289510 289581 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 289510 289581 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000454469"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 289510 289581 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 289510 289581 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000478380"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 289510 289579 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000479404"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 289510 289581 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000483757"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 289510 289581 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000487230"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 289510 289581 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 289510 289581 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000524396"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 304581 304704 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382341"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 304581 304704 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 304581 304704 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 304581 304704 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000454469"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 304581 304704 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 304581 304704 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000478380"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 304581 304704 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000483757"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 304581 304703 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000487230"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 304581 304704 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 304581 304704 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000524396"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 311954 312166 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382341"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 311954 312166 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 311954 312166 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 311954 312166 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000454469"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 311954 312166 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 311954 312941 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000478380"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 311954 312166 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000483757"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 311954 312166 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 311954 312166 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000524396"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 311958 314466 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000474772"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 311958 312166 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000474772"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 314292 314466 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000474772"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 317043 317128 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382341"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 317043 317128 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 317043 317128 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 317043 317128 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000454469"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 317043 317128 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 317043 317128 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000483757"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 317043 317128 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 317043 317128 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000524396"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 325671 325737 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382341"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 325671 325737 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 325671 325737 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 325671 325737 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000454469"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 325671 325737 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 325671 325737 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000483757"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 325671 325737 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 325671 325737 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000524396"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 328022 328171 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382341"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 328022 328171 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 328022 328171 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 328022 328171 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000454469"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 328022 328171 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 328022 328171 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000483757"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 328022 328171 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 328022 328171 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000524396"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 332398 332478 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382341"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 332398 332478 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 332398 332478 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 332398 332478 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000454469"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 332398 332478 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 332398 332478 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000483757"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 332398 332478 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 332398 332478 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000524396"; exon_number "11"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 334225 334384 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382341"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 334225 334384 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "11"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 334225 334384 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 334225 334384 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000454469"; exon_number "11"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 334225 334384 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 334225 334384 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000483757"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 334225 334384 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 334225 334384 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000524396"; exon_number "12"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 336582 336718 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382341"; exon_number "11"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 336582 336718 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "12"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 336582 336718 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "11"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 336582 336718 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000454469"; exon_number "12"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 336582 336718 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "11"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 336582 336718 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000483757"; exon_number "11"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 336582 336718 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "11"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 336582 336718 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000524396"; exon_number "13"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 339006 339099 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382341"; exon_number "12"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 339006 339099 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "13"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 339006 339099 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "12"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 339006 339099 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000454469"; exon_number "13"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 339006 339099 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "12"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 339006 339099 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000483757"; exon_number "12"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 339006 339099 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "12"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 339006 339099 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000524396"; exon_number "14"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 340159 340620 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382341"; exon_number "13"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 340159 340321 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "14"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 340159 340321 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "13"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 340159 340620 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000454469"; exon_number "14"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 340159 340321 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "13"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 340159 340321 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000483757"; exon_number "13"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 340159 340321 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "13"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 340159 340620 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000524396"; exon_number "15"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 365591 366552 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000483757"; exon_number "14"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 366229 405099 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382331"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 366229 366552 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382331"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 366251 465255 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 366251 366552 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 368018 368300 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 368018 368135 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382331"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 368018 368135 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "15"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 368018 368135 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "14"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 368018 368135 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "14"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 368018 368135 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000483757"; exon_number "15"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 368018 370300 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "14"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 369222 369447 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000483757"; exon_number "16"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 370230 370300 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 370230 370300 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382331"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 370230 370300 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "16"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 370230 370300 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "15"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 370230 370300 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "15"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 370230 370300 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000483757"; exon_number "17"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 371428 371566 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 371428 371566 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "17"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 371428 371566 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "16"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 371428 371566 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "16"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 371428 371566 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000483757"; exon_number "18"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 371428 371566 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "15"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 372185 372286 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 372185 372286 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382331"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 372185 372286 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "18"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 372185 372286 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "17"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 372185 372286 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "17"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 372185 372286 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000483757"; exon_number "19"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 372185 372286 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "16"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 376210 376305 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 376210 376305 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382331"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 376210 376305 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "19"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 376210 376305 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "18"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 376210 376305 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "18"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 376210 376305 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000483757"; exon_number "20"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 376210 376305 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "17"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 376977 377211 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 376977 377211 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382331"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 376977 377211 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "20"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 376977 377211 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "19"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 376977 377211 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "19"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 376977 377211 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000483757"; exon_number "21"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 376977 377211 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "18"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 379771 379935 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 379771 379935 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382331"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 379771 379935 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "21"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 379771 379935 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "20"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 379771 379935 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "20"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 379771 379935 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000483757"; exon_number "22"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 379771 379935 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "19"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 380279 381387 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000483757"; exon_number "23"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 382513 382685 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 382513 382685 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382331"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 382513 382685 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "22"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 382513 382685 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "21"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 382513 382685 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "21"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 382513 382685 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "20"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 386331 386426 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 386331 386426 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382331"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 386331 386426 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "23"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 386331 386426 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "22"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 386331 386426 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "21"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 390471 390566 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "11"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 390471 390566 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382331"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 390471 390566 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "24"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 390471 390566 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "23"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 390471 390566 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "22"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 390471 390566 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "22"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 396785 396934 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "12"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 396785 396934 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382331"; exon_number "11"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 396785 396934 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "25"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 396785 396934 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "24"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 396785 396934 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "23"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 396785 396934 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "23"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 399146 399259 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "13"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 399146 399259 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382331"; exon_number "12"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 399146 399259 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "26"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 399146 399259 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "25"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 399146 399259 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "24"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 399146 399259 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "24"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 404918 405073 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "14"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 404918 405099 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382331"; exon_number "13"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 404918 405073 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "27"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 404918 405073 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "26"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 404918 405073 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "25"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 404918 405073 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "25"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 406930 407069 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "15"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 406930 407069 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "28"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 406930 407069 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "27"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 406930 407069 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "26"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 406930 407069 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "26"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 414782 414951 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "16"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 414782 414951 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "29"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 414782 414951 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "28"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 414782 414951 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "27"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 414782 414951 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "27"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 418068 418207 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "17"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 418068 418207 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "30"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 418068 418207 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "29"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 418068 418207 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "28"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 418068 418207 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "28"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 420116 426953 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000493666"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 420116 420180 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000493666"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 420401 420583 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "18"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 420401 420583 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "31"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 420401 420583 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "30"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 420401 420583 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "29"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 420401 420583 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000493666"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 420401 420583 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "29"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 420949 421078 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "19"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 420949 421078 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "32"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 420949 421078 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "31"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 420949 421078 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "30"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 420949 421078 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000493666"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 420949 421078 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "30"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 422048 422135 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "20"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 422048 422135 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "33"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 422048 422135 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "32"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 422048 422135 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "31"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 422048 422135 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000493666"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 422048 422135 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "31"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 426885 426981 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "21"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 426885 426981 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "34"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 426885 426981 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "33"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 426885 426981 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "32"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 426885 426953 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000493666"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 426885 426981 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "32"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 428362 428496 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "22"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 428362 428496 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "35"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 428362 428496 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "34"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 428362 428496 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "33"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 428362 428496 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "33"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 429702 429854 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "23"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 429702 429854 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "36"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 429702 429854 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "35"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 429702 429854 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "34"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 429702 429854 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "34"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 432166 432324 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "24"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 432166 432324 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "37"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 432166 432324 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "36"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 432166 432324 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "35"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 432166 432324 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "35"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 433875 433975 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "25"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 433875 433975 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "38"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 433875 433975 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "37"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 433875 433975 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "36"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 433875 433975 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "36"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 434783 434975 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "26"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 434783 434975 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "39"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 434783 434975 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "38"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 434783 434975 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "37"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 434783 434975 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "37"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 439245 439388 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "27"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 439245 439388 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "40"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 439245 439388 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "39"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 439245 439388 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "38"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 439245 439388 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "38"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 441286 441417 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "28"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 441286 441417 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "41"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 441286 441417 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "40"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 441286 441417 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "39"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 441286 441417 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "39"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 441875 442009 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "29"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 441875 442009 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "42"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 441875 442009 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "41"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 441875 442009 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "40"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 441875 442009 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "40"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 443427 443516 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "30"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 443427 443516 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "43"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 443427 443516 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "42"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 443427 443516 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "41"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 443427 443516 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "41"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 446370 446606 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "31"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 446370 446606 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "44"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 446370 446606 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "43"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 446370 446606 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "42"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 446370 446606 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "42"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 449784 449927 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "32"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 449784 449927 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "45"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 449784 449927 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "44"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 449784 449927 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "43"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 449784 449927 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "43"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 452011 452117 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "33"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 452011 452117 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "46"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 452011 452117 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "45"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 452011 452117 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "44"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 452011 452117 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "44"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 452492 469836 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000591577"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 452492 452948 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000591577"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 452493 456207 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000628764"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 452493 452948 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000628764"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 454304 492248 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000415004"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 454304 454606 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000415004"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 454432 456988 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000585944"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 454432 454606 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000585944"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 454439 454606 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000591577"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 454447 469802 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000589287"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 454447 454606 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000589287"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 454507 454606 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000628764"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 454576 469802 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000589387"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 454576 454606 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000589387"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 454585 470961 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000586805"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 454585 454606 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000586805"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 456201 460507 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000585631"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 456201 456703 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000415004"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 456201 456703 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000585631"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 456201 456988 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000585944"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 456201 456703 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000586805"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 456201 456703 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000589287"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 456201 456703 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000589387"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 456201 456703 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000591577"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 456201 456207 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000628764"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 456204 470816 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000593137"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 456204 456703 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000593137"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 456211 460227 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000590240"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 456211 456703 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000590240"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 456518 467390 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000592805"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 456518 456703 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000592805"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 456521 470934 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000585819"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 456521 456703 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000585819"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 456530 466866 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000588474"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 456530 456703 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000588474"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 456545 467336 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000590518"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 456545 456703 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000590518"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 456915 457047 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000585819"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 456915 457047 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000588474"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 456915 457047 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000590518"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 456915 457047 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000593137"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 457004 467219 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000588989"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 457004 457047 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000588989"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 458117 458281 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000590240"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 458370 458423 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000590240"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 459716 466065 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000608617"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 459716 460507 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000585631"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 459716 459866 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000585819"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 459716 459866 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000588474"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 459716 459866 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000588989"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 459716 460227 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000590240"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 459716 459815 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000608617"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 463517 463687 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "34"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 463517 463687 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "47"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 463517 463687 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "46"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 463517 463687 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "45"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 463517 463687 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "45"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 463555 465247 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000462618"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 463555 463687 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000462618"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 464159 465255 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000382329"; exon_number "35"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 464159 465259 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000432829"; exon_number "48"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 464159 465259 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000453981"; exon_number "47"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 464159 464278 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000462618"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 464159 464526 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000469391"; exon_number "46"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 464159 465241 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000495184"; exon_number "46"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 465073 465247 . + . gene_id "ENSG00000107099"; transcript_id "ENST00000462618"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 465962 466069 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000585819"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 465962 466069 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000588474"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 465962 466069 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000588989"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 465962 466069 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000589387"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 465962 466069 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000590518"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 465962 466069 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000591577"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 465962 466069 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000593137"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 465962 466065 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000608617"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 466688 466866 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000588474"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 466688 467219 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000588989"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 466688 466853 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000589287"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 466688 467390 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000592805"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 466900 467336 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000590518"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 469722 469802 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000589287"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 469722 469802 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000589387"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 469722 469836 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000591577"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 470291 746105 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382303"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 470291 470374 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382303"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 470556 470682 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382303"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 470592 484491 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000467541"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 470592 470677 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000467541"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 470765 470934 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000585819"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 470765 470961 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000586805"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 470765 470816 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000593137"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 471449 471586 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000467541"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 471467 476769 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000475690"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 471467 471586 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000475690"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 473194 473273 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382303"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 473194 473273 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000467541"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 473194 473273 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000475690"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 476628 476769 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000475690"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 477750 478423 . + . gene_id "ENSG00000235330"; transcript_id "ENST00000598100"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 477750 478423 . + . gene_id "ENSG00000235330"; transcript_id "ENST00000598100"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 477856 478349 . + . gene_id "ENSG00000235330"; transcript_id "ENST00000455592"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 477856 478349 . + . gene_id "ENSG00000235330"; transcript_id "ENST00000455592"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 484282 484491 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000467541"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 487774 495610 . + . gene_id "ENSG00000228115"; transcript_id "ENST00000442442"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 487774 487803 . + . gene_id "ENSG00000228115"; transcript_id "ENST00000442442"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 488957 489066 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000415004"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 492084 492248 . - . gene_id "ENSG00000227155"; transcript_id "ENST00000415004"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 495238 495610 . + . gene_id "ENSG00000228115"; transcript_id "ENST00000442442"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 504695 746106 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000619269"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 504695 504754 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000619269"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 504702 746103 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000489369"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 504702 504754 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000489369"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 504703 734129 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000354485"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 504703 504754 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000354485"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 504716 746105 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382297"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 504716 504754 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382297"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 539516 539633 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382303"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 540508 540667 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382303"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 547317 549531 . + . gene_id "ENSG00000226403"; transcript_id "ENST00000441028"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 547317 547341 . + . gene_id "ENSG00000226403"; transcript_id "ENST00000441028"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 547318 549531 . + . gene_id "ENSG00000226403"; transcript_id "ENST00000444793"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 547318 547341 . + . gene_id "ENSG00000226403"; transcript_id "ENST00000444793"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 549107 549531 . + . gene_id "ENSG00000226403"; transcript_id "ENST00000444793"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 549107 549721 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000619269"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 549190 549531 . + . gene_id "ENSG00000226403"; transcript_id "ENST00000441028"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 558826 558930 . + . gene_id "ENSG00000202172"; transcript_id "ENST00000365302"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 558826 558930 . + . gene_id "ENSG00000202172"; transcript_id "ENST00000365302"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 659986 660326 . + . gene_id "ENSG00000229875"; transcript_id "ENST00000421436"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 659986 660326 . + . gene_id "ENSG00000229875"; transcript_id "ENST00000421436"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 673478 685555 . - . gene_id "ENSG00000227914"; transcript_id "ENST00000421645"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 673478 674379 . - . gene_id "ENSG00000227914"; transcript_id "ENST00000421645"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 674124 677628 . - . gene_id "ENSG00000227914"; transcript_id "ENST00000608097"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 674124 674379 . - . gene_id "ENSG00000227914"; transcript_id "ENST00000608097"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 676890 677009 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000354485"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 676890 677009 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382297"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 676890 677009 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382303"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 676890 677009 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000489369"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 676890 677009 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000619269"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 677542 677628 . - . gene_id "ENSG00000227914"; transcript_id "ENST00000608097"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 680857 681046 . - . gene_id "ENSG00000227914"; transcript_id "ENST00000421645"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 685528 685555 . - . gene_id "ENSG00000227914"; transcript_id "ENST00000421645"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 706889 746103 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382293"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 706889 707247 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382293"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 710804 713464 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000354485"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 710804 713464 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382293"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 710804 713464 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382297"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 710804 713464 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382303"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 710804 713464 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000489369"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 710804 713464 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000619269"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 730051 734129 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000354485"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 730051 730248 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382293"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 730051 730248 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382297"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 730051 730248 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382303"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 730051 730248 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000489369"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 730051 730248 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000619269"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 731158 731266 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382293"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 731158 731266 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382297"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 731158 731266 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382303"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 731158 731266 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000489369"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 731158 731266 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000619269"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 732378 732617 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382293"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 732378 732617 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382297"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 732378 732617 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382303"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 732378 734835 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000489369"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 732378 732617 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000619269"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 732442 746102 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382289"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 732442 732617 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382289"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 734748 746104 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382286"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 734748 734835 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382286"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 734748 734835 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382289"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 734748 734835 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382293"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 734748 734835 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382297"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 734748 734835 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382303"; exon_number "11"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 734748 734835 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000619269"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 735712 735780 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382289"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 738285 738504 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382286"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 738285 738504 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382289"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 738285 738504 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382293"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 738285 738504 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382297"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 738285 738504 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382303"; exon_number "12"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 738285 738504 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000489369"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 738285 738504 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000619269"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 740792 740934 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382286"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 740792 740934 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382289"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 740792 740934 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382293"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 740792 740934 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382297"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 740792 740934 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382303"; exon_number "13"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 740792 740934 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000489369"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 740792 740934 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000619269"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 742205 742405 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382286"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 742205 742405 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382289"; exon_number "6"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 742205 742405 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382293"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 742205 742405 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382297"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 742205 742405 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382303"; exon_number "14"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 742205 742405 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000489369"; exon_number "9"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 742205 742405 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000619269"; exon_number "11"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 744491 746104 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382286"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 744491 744589 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382289"; exon_number "7"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 744491 744589 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382293"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 744491 744589 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382297"; exon_number "11"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 744491 744589 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382303"; exon_number "15"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 744491 744589 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000489369"; exon_number "10"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 744491 744589 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000619269"; exon_number "12"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 745173 746102 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382289"; exon_number "8"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 745173 746103 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382293"; exon_number "11"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 745173 746105 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382297"; exon_number "12"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 745173 746105 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000382303"; exon_number "16"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 745173 746103 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000489369"; exon_number "11"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 745173 746106 . + . gene_id "ENSG00000107104"; transcript_id "ENST00000619269"; exon_number "13"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 841690 969090 . + . gene_id "ENSG00000137090"; transcript_id "ENST00000382276"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu transcript 841690 894985 . + . gene_id "ENSG00000137090"; transcript_id "ENST00000564322"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 841690 842192 . + . gene_id "ENSG00000137090"; transcript_id "ENST00000382276"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 841690 842192 . + . gene_id "ENSG00000137090"; transcript_id "ENST00000564322"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 842714 968515 . + . gene_id "ENSG00000137090"; transcript_id "ENST00000569227"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 842714 842954 . + . gene_id "ENSG00000137090"; transcript_id "ENST00000569227"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 846960 847143 . + . gene_id "ENSG00000137090"; transcript_id "ENST00000382276"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 846960 847143 . + . gene_id "ENSG00000137090"; transcript_id "ENST00000564322"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 846960 847143 . + . gene_id "ENSG00000137090"; transcript_id "ENST00000569227"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 893912 894195 . + . gene_id "ENSG00000137090"; transcript_id "ENST00000382276"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 893912 894985 . + . gene_id "ENSG00000137090"; transcript_id "ENST00000564322"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 893912 894195 . + . gene_id "ENSG00000137090"; transcript_id "ENST00000569227"; exon_number "3"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 916763 916907 . + . gene_id "ENSG00000137090"; transcript_id "ENST00000382276"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 916763 916907 . + . gene_id "ENSG00000137090"; transcript_id "ENST00000569227"; exon_number "4"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 967985 969090 . + . gene_id "ENSG00000137090"; transcript_id "ENST00000382276"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 967985 968515 . + . gene_id "ENSG00000137090"; transcript_id "ENST00000569227"; exon_number "5"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 976964 991731 . + . gene_id "ENSG00000064218"; transcript_id "ENST00000190165"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 976964 977455 . + . gene_id "ENSG00000064218"; transcript_id "ENST00000190165"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu transcript 989837 990333 . + . gene_id "ENSG00000064218"; transcript_id "ENST00000417254"; NDR "NA"; txScore ""; txScore.noFit ""; +9 Bambu exon 989837 990333 . + . gene_id "ENSG00000064218"; transcript_id "ENST00000417254"; exon_number "1"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; +9 Bambu exon 990041 991731 . + . gene_id "ENSG00000064218"; transcript_id "ENST00000190165"; exon_number "2"; NDR "NA"; maxTxScore "NA"; maxTxScore.noFit "NA"; diff --git a/inst/extdata/extendedAnnotationGranges_txdbGrch38_91_chr9_1_1000000.rds b/inst/extdata/extendedAnnotationGranges_txdbGrch38_91_chr9_1_1000000.rds index 0b70a66f..f76b7850 100644 Binary files a/inst/extdata/extendedAnnotationGranges_txdbGrch38_91_chr9_1_1000000.rds and b/inst/extdata/extendedAnnotationGranges_txdbGrch38_91_chr9_1_1000000.rds differ diff --git a/inst/extdata/read_class_ME.model b/inst/extdata/read_class_ME.model index 01467903..ebd50d76 100644 Binary files a/inst/extdata/read_class_ME.model and b/inst/extdata/read_class_ME.model differ diff --git a/inst/extdata/read_class_SE.model b/inst/extdata/read_class_SE.model index 3d2ea988..85bfdc92 100644 Binary files a/inst/extdata/read_class_SE.model and b/inst/extdata/read_class_SE.model differ diff --git a/inst/extdata/seOutputCombined2_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds b/inst/extdata/seOutputCombined2_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds index 1cb8a91a..2fa9d9fe 100644 Binary files a/inst/extdata/seOutputCombined2_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds and b/inst/extdata/seOutputCombined2_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds differ diff --git a/inst/extdata/seOutputCombinedExtended_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds b/inst/extdata/seOutputCombinedExtended_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds index dbf77fce..bb99b842 100644 Binary files a/inst/extdata/seOutputCombinedExtended_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds and b/inst/extdata/seOutputCombinedExtended_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds differ diff --git a/inst/extdata/seOutputCombined_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds b/inst/extdata/seOutputCombined_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds index 0334d9a1..c8738909 100644 Binary files a/inst/extdata/seOutputCombined_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds and b/inst/extdata/seOutputCombined_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds differ diff --git a/inst/extdata/seOutputExtended_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds b/inst/extdata/seOutputExtended_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds index ddf8531c..680db58c 100644 Binary files a/inst/extdata/seOutputExtended_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds and b/inst/extdata/seOutputExtended_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds differ diff --git a/inst/extdata/seOutput_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds b/inst/extdata/seOutput_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds index 31b06026..680db58c 100644 Binary files a/inst/extdata/seOutput_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds and b/inst/extdata/seOutput_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds differ diff --git a/inst/extdata/seOutput_denovo_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds b/inst/extdata/seOutput_denovo_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds index 63935b07..37694200 100644 Binary files a/inst/extdata/seOutput_denovo_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds and b/inst/extdata/seOutput_denovo_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds differ diff --git a/inst/extdata/seOutput_distTable_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds b/inst/extdata/seOutput_distTable_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds index a75ae33f..04373219 100644 Binary files a/inst/extdata/seOutput_distTable_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds and b/inst/extdata/seOutput_distTable_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds differ diff --git a/inst/extdata/seOutput_trackReads_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds b/inst/extdata/seOutput_trackReads_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds index aa06671e..8de25bef 100644 Binary files a/inst/extdata/seOutput_trackReads_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds and b/inst/extdata/seOutput_trackReads_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds differ diff --git a/inst/extdata/seReadClassStranded_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds b/inst/extdata/seReadClassStranded_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds index 2d89a197..149887a8 100644 Binary files a/inst/extdata/seReadClassStranded_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds and b/inst/extdata/seReadClassStranded_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds differ diff --git a/inst/extdata/seReadClassUnstranded_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds b/inst/extdata/seReadClassUnstranded_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds index 189a57c7..83a9af75 100644 Binary files a/inst/extdata/seReadClassUnstranded_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds and b/inst/extdata/seReadClassUnstranded_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds differ diff --git a/inst/extdata/spliceSitePredictionEnd.end.model b/inst/extdata/spliceSitePredictionEnd.end.model new file mode 100644 index 00000000..206d9629 Binary files /dev/null and b/inst/extdata/spliceSitePredictionEnd.end.model differ diff --git a/inst/extdata/spliceSitePredictionEnd.start.model b/inst/extdata/spliceSitePredictionEnd.start.model new file mode 100644 index 00000000..54e1691d Binary files /dev/null and b/inst/extdata/spliceSitePredictionEnd.start.model differ diff --git a/inst/extdata/spliceSitePredictionStart.end.model b/inst/extdata/spliceSitePredictionStart.end.model new file mode 100644 index 00000000..f40cf814 Binary files /dev/null and b/inst/extdata/spliceSitePredictionStart.end.model differ diff --git a/inst/extdata/spliceSitePredictionStart.start.model b/inst/extdata/spliceSitePredictionStart.start.model new file mode 100644 index 00000000..b4194d45 Binary files /dev/null and b/inst/extdata/spliceSitePredictionStart.start.model differ diff --git a/inst/extdata/standardJunctionModels_temp.txt b/inst/extdata/standardJunctionModels_temp.txt deleted file mode 100644 index 6a1848bc..00000000 Binary files a/inst/extdata/standardJunctionModels_temp.txt and /dev/null differ diff --git a/tests/testthat/test_prepareAnnotations.R b/tests/testthat/test_prepareAnnotations.R index f9c65007..97903eeb 100644 --- a/tests/testthat/test_prepareAnnotations.R +++ b/tests/testthat/test_prepareAnnotations.R @@ -150,6 +150,14 @@ test_that("eqClassById is correct", { expect_true(all(check$validate)) }) +test_that("prepareAnnotations reads in the NDR, txScore and txScore.noFit correctly", { + extendedAnnotationsExpected <- readRDS(system.file("extdata", "extendedAnnotationGranges_txdbGrch38_91_chr9_1_1000000.rds",package = "bambu")) + extendedAnnotations = prepareAnnotations(system.file("extdata", "extendedAnnotationGranges_txdbGrch38_91_chr9_1_1000000.gtf",package = "bambu")) + expect_equal(mcols(extendedAnnotations)$NDR, mcols(extendedAnnotationsExpected)$NDR) + expect_equal(mcols(extendedAnnotations)$txScore, mcols(extendedAnnotationsExpected)$txScore) + expect_equal(mcols(extendedAnnotations)$txScore.noFit, mcols(extendedAnnotationsExpected)$txScore.noFit) +}) + # test_that("eqClass and eqClassById matches", { # gr <- readRDS(test_path("fixtures", "grGTF.rds")) diff --git a/tests/testthat/test_readWrite.R b/tests/testthat/test_readWrite.R index 7223c1fb..84eb0219 100644 --- a/tests/testthat/test_readWrite.R +++ b/tests/testthat/test_readWrite.R @@ -94,3 +94,17 @@ test_that("readGTF can generate a GRangesList from a GTF file", { expect_s4_class(gr, class = "CompressedGRangesList") expect_named(mcols(gr), c("TXNAME", "GENEID")) }) + +test_that("writeToGTF writes the NDR, txScore and txScore.noFit correctly", { + extendedAnnotationsExpected <- readRDS(system.file("extdata", "extendedAnnotationGranges_txdbGrch38_91_chr9_1_1000000.rds",package = "bambu")) + + writeToGTF(extendedAnnotationsExpected, test_path("fixtures", "grGTF.rds")) + + extendedAnnotations = prepareAnnotations(test_path("fixtures", "grGTF.rds")) + + expect_equal(mcols(extendedAnnotations)$NDR, mcols(extendedAnnotationsExpected)$NDR) + expect_equal(mcols(extendedAnnotations)$txScore, mcols(extendedAnnotationsExpected)$txScore) + expect_equal(mcols(extendedAnnotations)$txScore.noFit, mcols(extendedAnnotationsExpected)$txScore.noFit) + + unlink(test_path("fixtures", "*")) +}) diff --git a/tests/testthat/test_setNDR.R b/tests/testthat/test_setNDR.R new file mode 100644 index 00000000..1c59b7d1 --- /dev/null +++ b/tests/testthat/test_setNDR.R @@ -0,0 +1,94 @@ +context("setNDR") + +test_that("txRange generates a gene and transcript score",{ + readClasses <- readRDS(system.file("extdata","readClassesUnstranded_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds",package = "bambu")) + annotations <- readRDS(system.file("extdata","annotationGranges_txdbGrch38_91_chr9_1_1000000.rds",package = "bambu")) + genomeSequence <- system.file("extdata","Homo_sapiens.GRCh38.dna_sm.primary_assembly_chr9_1_1000000.fa",package = "bambu") + genomeSequence <- checkInputSequence(genomeSequence) + + se = scoreReadClasses(readClasses, genomeSequence, annotations, defaultModels) + + seExpected = readRDS(system.file("extdata", "seReadClassUnstranded_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds", package = "bambu")) + + expect_is(rowData(se)$txScore, class = 'numeric') + expect_equal(rowData(se)$txScore, rowData(seExpected)$txScore) +}) + +test_that("Correctly moves transcripts with NDR above the threshold into lowCofidenceTranscripts",{ + extendedAnnotationsExpected <- readRDS(system.file("extdata", "extendedAnnotationGranges_txdbGrch38_91_chr9_1_1000000.rds",package = "bambu")) + + expect_equal(length(extendedAnnotationsExpected), 106) + expect_equal(length(metadata(extendedAnnotationsExpected)$lowConfidenceTranscripts),1) + + extendedAnnotationsExpected_0.1 = setNDR(extendedAnnotationsExpected, 0.1) + expect_equal(length(extendedAnnotationsExpected_0.1), 105) + expect_equal(length(metadata(extendedAnnotationsExpected_0.1)$lowConfidenceTranscripts),2) +}) + +test_that("Correctly moves transcripts with NDR below the threshold into the extendedAnnotations",{ + extendedAnnotationsExpected <- readRDS(system.file("extdata", "extendedAnnotationGranges_txdbGrch38_91_chr9_1_1000000.rds",package = "bambu")) + + expect_equal(length(extendedAnnotationsExpected), 106) + expect_equal(length(metadata(extendedAnnotationsExpected)$lowConfidenceTranscripts),1) + + extendedAnnotationsExpected_1 = setNDR(extendedAnnotationsExpected, 1) + expect_equal(length(extendedAnnotationsExpected_1), 107) + expect_equal(length(metadata(extendedAnnotationsExpected_1)$lowConfidenceTranscripts),0) +}) + +test_that("Reference annotations are not moved unless includeRef is used", { + extendedAnnotationsExpected <- readRDS(system.file("extdata", "extendedAnnotationGranges_txdbGrch38_91_chr9_1_1000000.rds",package = "bambu")) + + expect_equal(length(extendedAnnotationsExpected), 106) + expect_equal(length(metadata(extendedAnnotationsExpected)$lowConfidenceTranscripts),1) + + extendedAnnotationsExpected_0.5 = setNDR(extendedAnnotationsExpected, 0.5, includeRef = TRUE) + expect_equal(length(extendedAnnotationsExpected_0.5), 104) + expect_equal(length(metadata(extendedAnnotationsExpected_0.5)$lowConfidenceTranscripts),3) +}) + +test_that("setNDR only effects transcripts with the prefix", { + extendedAnnotationsExpected <- readRDS(system.file("extdata", "extendedAnnotationGranges_txdbGrch38_91_chr9_1_1000000.rds",package = "bambu")) + + expect_equal(length(extendedAnnotationsExpected), 106) + expect_equal(length(metadata(extendedAnnotationsExpected)$lowConfidenceTranscripts),1) + + extendedAnnotationsExpected_1 = setNDR(extendedAnnotationsExpected, 1, prefix = "Stringtie2") + expect_equal(length(extendedAnnotationsExpected_1), 106) + expect_equal(length(metadata(extendedAnnotationsExpected_1)$lowConfidenceTranscripts),1) +}) + +test_that("setNDR saves the used NDR threshold correctly", { + extendedAnnotationsExpected <- readRDS(system.file("extdata", "extendedAnnotationGranges_txdbGrch38_91_chr9_1_1000000.rds",package = "bambu")) + #metadata(extendedAnnotationsExpected)$NDR = 0.7 + extendedAnnotationsExpected_1 = setNDR(extendedAnnotationsExpected, 1) + expect_equal(metadata(extendedAnnotationsExpected_1)$NDRthreshold,1) +}) + +test_that("setNDR works when no lowConfidenceTranscripts are present", { + seIsoReCombined <- readRDS(system.file("extdata", "seIsoReCombined_SGNex_A549_directRNA_replicate5_run1_chr9_1_1000000.rds", package = "bambu")) + gr <- readRDS(system.file("extdata", "annotationGranges_txdbGrch38_91_chr9_1_1000000.rds", package = "bambu")) + + extendedAnnotations <- isore.extendAnnotations(combinedTranscripts=seIsoReCombined, + annotationGrangesList=gr, + remove.subsetTx = TRUE, min.sampleNumber = 1, NDR = 1, + min.exonDistance = 35, min.exonOverlap = 10, + min.primarySecondaryDist = 5, min.primarySecondaryDistStartEnd = 5, + prefix='Bambu', verbose=FALSE, defaultModels = defaultModels) + + extendedAnnotationsExpected_0.7 = setNDR(extendedAnnotationsExpected, 0.7) + expect_equal(length(extendedAnnotationsExpected), 106) + expect_equal(length(metadata(extendedAnnotationsExpected)$lowConfidenceTranscripts),1) +}) + +test_that("setNDR recommends the correct NDR", { + extendedAnnotationsExpected <- readRDS(system.file("extdata", "extendedAnnotationGranges_txdbGrch38_91_chr9_1_1000000.rds",package = "bambu")) + + extendedAnnotationsExpected_rec = setNDR(extendedAnnotationsExpected) + expect_equal(unname(metadata(extendedAnnotationsExpected_rec)$NDR), -0.013) +}) + +test_that("setNDR handles annotations with no NDR", { + annotations <- readRDS(system.file("extdata","annotationGranges_txdbGrch38_91_chr9_1_1000000.rds",package = "bambu")) + expect_warning(setNDR(annotations, 0.5), "Annotations were not extended by Bambu") +}) \ No newline at end of file