Skip to content

Commit

Permalink
fix: error out if a callset does not match the truth at all (#43)
Browse files Browse the repository at this point in the history
* fix: error out if a callset does not match the truth at all

* fmt

* trigger rerun
  • Loading branch information
johanneskoester authored May 3, 2023
1 parent 2f7dddf commit 480b929
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 0 deletions.
6 changes: 6 additions & 0 deletions workflow/rules/eval.smk
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,9 @@ rule collect_stratifications:
"logs/collect-stratifications/{callset}/{vartype}.log",
conda:
"../envs/stats.yaml"
# We want this to be determined before FP/FN collection in order to avoid memory
# issues with callsets that do not match the truth at all.
priority: 2
script:
"../scripts/collect-stratifications.py"

Expand Down Expand Up @@ -221,6 +224,9 @@ rule collect_fp_fn:
"logs/collect-fp-fn/{genome}/{cov}/{classification}.log",
conda:
"../envs/stats.yaml"
# This has to happen after precision/recall has been computed, otherwise we risk
# extremely high memory usage if a callset does not match the truth at all.
priority: 1
script:
"../scripts/collect-fp-fn.py"

Expand Down
6 changes: 6 additions & 0 deletions workflow/scripts/collect-stratifications.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ def load_data(f, coverage):
load_data(f, cov) for cov, f in zip(snakemake.params.coverages, snakemake.input)
)

if (report["tp_truth"] == 0).all():
raise ValueError(
f"The callset {snakemake.wildcards.callset} does not predict any variant from the truth. "
"This is likely a technical issue in the callset and should be checked before further evaluation."
)

report.to_csv(snakemake.output[0], sep="\t", index=False)
else:
pd.DataFrame(
Expand Down

0 comments on commit 480b929

Please sign in to comment.