From 4ba12d780145952e2b7410cce246211cc0337973 Mon Sep 17 00:00:00 2001 From: Karan Jaisingh Date: Tue, 18 Feb 2025 17:14:58 -0500 Subject: [PATCH] Updated BAF/PESR/RD test code to be functionally identical --- src/RdTest/RdTest.R | 21 ++++++++++++++++----- src/svtk/svtk/cli/baf_test.py | 26 +++++++++++++++----------- src/svtk/svtk/pesr/pesr_test.py | 7 ++++--- 3 files changed, 35 insertions(+), 19 deletions(-) diff --git a/src/RdTest/RdTest.R b/src/RdTest/RdTest.R index 75b0e9859..0109961ec 100755 --- a/src/RdTest/RdTest.R +++ b/src/RdTest/RdTest.R @@ -1180,14 +1180,25 @@ runRdTest<-function(bed) samplestokeep<-match(unlist(strsplit(sampleIDs,",")),idsforsearch) sampleIDs<-idsforsearch[na.omit(samplestokeep)] - #Exclude outlier samples only if non-outlier samples exist if (!is.null(opt$outlierSampleIds)) { outlier_ids <- readLines(opt$outlierSampleIds) - non_outlier_samples <- setdiff(sampleIDs, outlier_ids) - if (length(non_outlier_samples) > 0) { - sampleIDs <- non_outlier_samples - cnv_matrix <- cnv_matrix[!(rownames(cnv_matrix) %in% outlier_ids), , drop = FALSE] + + # Create non-outlier sample lists + background_samples <- setdiff(rownames(cnv_matrix), sampleIDs) + non_outlier_called <- setdiff(sampleIDs, outlier_ids) + non_outlier_background <- setdiff(background_samples, outlier_ids) + + # Exclude outlier samples only if non-outlier samples exist + if (length(non_outlier_called) > 0) { + outlier_called <- setdiff(sampleIDs, non_outlier_called) + cnv_matrix <- cnv_matrix[!(rownames(cnv_matrix) %in% outlier_called), , drop = FALSE] + sampleIDs <- non_outlier_called } + if (length(non_outlier_background) > 0) { + outlier_background <- setdiff(background_samples, non_outlier_background) + cnv_matrix <- cnv_matrix[!(rownames(cnv_matrix) %in% outlier_background), , drop = FALSE] + } + } } samplesPrior <-unlist(strsplit(as.character(sampleIDs),split=",")) diff --git a/src/svtk/svtk/cli/baf_test.py b/src/svtk/svtk/cli/baf_test.py index 699b1f8b9..37d56dc00 100644 --- a/src/svtk/svtk/cli/baf_test.py +++ b/src/svtk/svtk/cli/baf_test.py @@ -53,19 +53,23 @@ def preprocess(chrom, start, end, tbx, samples, window=None, called_samples=None return bafs, bafs, called_samples bafs.columns = ['chr', 'pos', 'baf', 'sample'] - # Exclude outlier samples from all samples only if non-outlier samples exist - bafs_no_outliers = bafs[bafs['sample'].isin(samples) & ~bafs['sample'].isin(outlier_sample_ids)] - if not bafs_no_outliers.empty: - bafs = bafs_no_outliers + # Create non-outlier sample lists + background_samples = list(set(samples) - set(called_samples)) + non_outlier_called = [s for s in called_samples if s not in outlier_sample_ids] + non_outlier_background = [s for s in background_samples if s not in outlier_sample_ids] + + # Exclude outlier samples only if non-outlier samples exist + if len(non_outlier_called) > 0: + called_samples = non_outlier_called + if len(non_outlier_background) > 0: + background_samples = non_outlier_background - # Exclude outlier samples from called samples only if non-outlier samples exist - non_outlier_called_samples = [s for s in called_samples if s not in outlier_sample_ids] - if len(non_outlier_called_samples) > 0: - called_samaples = non_outlier_called_samples + samples = list(set(called_samples) | set(background_samples)) + bafs = bafs[bafs['sample'].isin(samples)] if bafs.empty: - return bafs, bafs, called_samaples - + return bafs, bafs, called_samples + bafs['pos'] = bafs.pos.astype(int) bafs['baf'] = bafs.baf.astype(float) bafs.loc[bafs.pos <= start, 'region'] = 'before' @@ -82,7 +86,7 @@ def preprocess(chrom, start, end, tbx, samples, window=None, called_samples=None # Report BAF for variants inside CNV called_bafs = bafs.loc[bafs.region == 'inside'].copy() - return het_counts, called_bafs, called_samaples + return het_counts, called_bafs, called_samples def main(argv): diff --git a/src/svtk/svtk/pesr/pesr_test.py b/src/svtk/svtk/pesr/pesr_test.py index 1b2c8dc24..22b5a9bcf 100644 --- a/src/svtk/svtk/pesr/pesr_test.py +++ b/src/svtk/svtk/pesr/pesr_test.py @@ -142,12 +142,13 @@ def choose_background(self, record, whitelist=None, blacklist=None): called = svu.get_called_samples(record) background = [s for s in self.samples if s not in called] - # Exclude outlier samples only if non-outlier samples exist + # Create non-outlier sample lists non_outlier_called = [s for s in called if s not in self.outlier_sample_ids] + non_outlier_background = [s for s in background if s not in self.outlier_sample_ids] + + # Exclude outlier samples only if non-outlier samples exist if len(non_outlier_called) > 0: called = non_outlier_called - - non_outlier_background = [s for s in background if s not in self.outlier_sample_ids] if len(non_outlier_background) > 0: background = non_outlier_background