Skip to content

Commit

Permalink
Updated BAF/PESR/RD test code to be functionally identical
Browse files Browse the repository at this point in the history
  • Loading branch information
kjaisingh committed Feb 18, 2025
1 parent 1f85580 commit 4ba12d7
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 19 deletions.
21 changes: 16 additions & 5 deletions src/RdTest/RdTest.R
Original file line number Diff line number Diff line change
Expand Up @@ -1180,14 +1180,25 @@ runRdTest<-function(bed)
samplestokeep<-match(unlist(strsplit(sampleIDs,",")),idsforsearch)
sampleIDs<-idsforsearch[na.omit(samplestokeep)]

#Exclude outlier samples only if non-outlier samples exist
if (!is.null(opt$outlierSampleIds)) {
outlier_ids <- readLines(opt$outlierSampleIds)
non_outlier_samples <- setdiff(sampleIDs, outlier_ids)
if (length(non_outlier_samples) > 0) {
sampleIDs <- non_outlier_samples
cnv_matrix <- cnv_matrix[!(rownames(cnv_matrix) %in% outlier_ids), , drop = FALSE]

# Create non-outlier sample lists
background_samples <- setdiff(rownames(cnv_matrix), sampleIDs)
non_outlier_called <- setdiff(sampleIDs, outlier_ids)
non_outlier_background <- setdiff(background_samples, outlier_ids)

# Exclude outlier samples only if non-outlier samples exist
if (length(non_outlier_called) > 0) {
outlier_called <- setdiff(sampleIDs, non_outlier_called)
cnv_matrix <- cnv_matrix[!(rownames(cnv_matrix) %in% outlier_called), , drop = FALSE]
sampleIDs <- non_outlier_called
}
if (length(non_outlier_background) > 0) {
outlier_background <- setdiff(background_samples, non_outlier_background)
cnv_matrix <- cnv_matrix[!(rownames(cnv_matrix) %in% outlier_background), , drop = FALSE]
}
}
}
samplesPrior <-unlist(strsplit(as.character(sampleIDs),split=","))

Expand Down
26 changes: 15 additions & 11 deletions src/svtk/svtk/cli/baf_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,19 +53,23 @@ def preprocess(chrom, start, end, tbx, samples, window=None, called_samples=None
return bafs, bafs, called_samples
bafs.columns = ['chr', 'pos', 'baf', 'sample']

# Exclude outlier samples from all samples only if non-outlier samples exist
bafs_no_outliers = bafs[bafs['sample'].isin(samples) & ~bafs['sample'].isin(outlier_sample_ids)]
if not bafs_no_outliers.empty:
bafs = bafs_no_outliers
# Create non-outlier sample lists
background_samples = list(set(samples) - set(called_samples))
non_outlier_called = [s for s in called_samples if s not in outlier_sample_ids]
non_outlier_background = [s for s in background_samples if s not in outlier_sample_ids]

# Exclude outlier samples only if non-outlier samples exist
if len(non_outlier_called) > 0:
called_samples = non_outlier_called
if len(non_outlier_background) > 0:
background_samples = non_outlier_background

# Exclude outlier samples from called samples only if non-outlier samples exist
non_outlier_called_samples = [s for s in called_samples if s not in outlier_sample_ids]
if len(non_outlier_called_samples) > 0:
called_samaples = non_outlier_called_samples
samples = list(set(called_samples) | set(background_samples))
bafs = bafs[bafs['sample'].isin(samples)]

if bafs.empty:
return bafs, bafs, called_samaples

return bafs, bafs, called_samples
bafs['pos'] = bafs.pos.astype(int)
bafs['baf'] = bafs.baf.astype(float)
bafs.loc[bafs.pos <= start, 'region'] = 'before'
Expand All @@ -82,7 +86,7 @@ def preprocess(chrom, start, end, tbx, samples, window=None, called_samples=None

# Report BAF for variants inside CNV
called_bafs = bafs.loc[bafs.region == 'inside'].copy()
return het_counts, called_bafs, called_samaples
return het_counts, called_bafs, called_samples


def main(argv):
Expand Down
7 changes: 4 additions & 3 deletions src/svtk/svtk/pesr/pesr_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,12 +142,13 @@ def choose_background(self, record, whitelist=None, blacklist=None):
called = svu.get_called_samples(record)
background = [s for s in self.samples if s not in called]

# Exclude outlier samples only if non-outlier samples exist
# Create non-outlier sample lists
non_outlier_called = [s for s in called if s not in self.outlier_sample_ids]
non_outlier_background = [s for s in background if s not in self.outlier_sample_ids]

# Exclude outlier samples only if non-outlier samples exist
if len(non_outlier_called) > 0:
called = non_outlier_called

non_outlier_background = [s for s in background if s not in self.outlier_sample_ids]
if len(non_outlier_background) > 0:
background = non_outlier_background

Expand Down

0 comments on commit 4ba12d7

Please sign in to comment.