From d809f3b2dfccc751b0facbbf7b059e4921e73885 Mon Sep 17 00:00:00 2001 From: David Laehnemann Date: Wed, 17 Apr 2024 18:08:49 +0200 Subject: [PATCH] feat: make circle filtering configurable --- config/config.yaml | 14 +++++++++- workflow/resources/circles.datavzrd.yaml | 2 ++ workflow/rules/circle_map.smk | 8 ++++++ workflow/schemas/config.schema.yaml | 26 +++++++++++++++++++ .../clean_circle_map_realign_output.py | 14 +++++++--- 5 files changed, 59 insertions(+), 5 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index 95a89a4..9424f8c 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -30,4 +30,16 @@ params: cutadapt: "" gatk: BaseRecalibrator: "" - applyBQSR: "" \ No newline at end of file + applyBQSR: "" + +# These filters mostly correspond to the output columns of Circle-Map: +# https://github.com/iprada/Circle-Map/wiki/Circle-Map-Realign-output-files +# In addition, you can filter on the length of the circle. +circle_filtering: + min_circle_score: 100 + min_split_reads: 1 + min_discordant_read_pairs: 1 + max_uncovered fraction: 0.9 + min_mean_coverage: 2.0 + min_circle_length: 500 + max_circle_length: 80000000 \ No newline at end of file diff --git a/workflow/resources/circles.datavzrd.yaml b/workflow/resources/circles.datavzrd.yaml index fa5a858..fb371fb 100644 --- a/workflow/resources/circles.datavzrd.yaml +++ b/workflow/resources/circles.datavzrd.yaml @@ -41,12 +41,14 @@ views: heatmap: scale: linear range: + - "#e7d4e8" - "#e7d4e8" - "white" - "#d9f0d3" - "#7fbf7b" - "#7fbf7b" domain: + - 0 - 50 - 200 - 300 diff --git a/workflow/rules/circle_map.smk b/workflow/rules/circle_map.smk index 9efabbe..9aa4cf9 100644 --- a/workflow/rules/circle_map.smk +++ b/workflow/rules/circle_map.smk @@ -64,5 +64,13 @@ rule clean_circle_map_realign_output: "logs/circle-map/{sample}.circles.cleaned.log", conda: "../envs/pandas.yaml" + params: + min_circle_score=config["circle_filtering"]["min_circle_score"], + min_split_reads=config["circle_filtering"]["min_split_reads"], + min_discordant_read_pairs=config["circle_filtering"]["min_discordant_read_pairs"], + max_uncovered_fraction=config["circle_filtering"]["max_uncovered_fraction"], + min_mean_coverage=config["circle_filtering"]["min_mean_coverage"], + min_circle_length=config["circle_filtering"]["min_circle_length"], + max_circle_length=config["circle_filtering"]["max_circle_length"], script: "../scripts/clean_circle_map_realign_output.py" diff --git a/workflow/schemas/config.schema.yaml b/workflow/schemas/config.schema.yaml index 147b243..572572e 100644 --- a/workflow/schemas/config.schema.yaml +++ b/workflow/schemas/config.schema.yaml @@ -28,8 +28,34 @@ properties: - release - build - n_chromosomes + circle_filtering: + type: object + properties: + min_circle_score: + type: number + min_split_reads: + type: integer + min_discordant_read_pairs: + type: integer + max_uncovered fraction: + type: number + min_mean_coverage: + type: number + min_circle_length: + type: integer + max_circle_length: + type: integer + required: + - min_circle_score + - min_split_reads + - min_discordant_read_pairs + - max_uncovered fraction + - min_mean_coverage + - min_circle_length + - max_circle_length required: - samples - units - ref + - circle_filtering diff --git a/workflow/scripts/clean_circle_map_realign_output.py b/workflow/scripts/clean_circle_map_realign_output.py index 99a5c02..d76f1a1 100644 --- a/workflow/scripts/clean_circle_map_realign_output.py +++ b/workflow/scripts/clean_circle_map_realign_output.py @@ -38,10 +38,11 @@ # filter out low-quality circles, according to: # https://github.com/iprada/Circle-Map/wiki/Circle-Map-Realign-output-files circles = circles.loc[ - ( circles["circle_score"] >= 50 ) & - ( circles["discordant_reads"] > 0 ) & - ( circles["split_reads"] > 0 ) & - ( circles["uncovered_fraction"] < 1 ) + ( circles["circle_score"] >= snakemake.params["min_circle_score"] ) & + ( circles["discordant_reads"] > snakemake.params["min_discordant_read_pairs"] ) & + ( circles["split_reads"] > snakemake.params["min_split_reads"] ) & + ( circles["uncovered_fraction"] <= snakemake.params["max_uncovered_fraction"] ) & + ( circles["mean_coverage"] >= snakemake.params["min_mean_coverage"] ) ] @@ -57,6 +58,11 @@ axis='columns', ) +circles = circles.loc[ + ( circles["length"] >= snakemake.params["min_circle_length"] ) & + ( circles["length"] <= snakemake.params["max_circle_length"] ) +] + circles.sort_values( by=['chromosome', 'start', 'end'], inplace=True