Skip to content

Commit

Permalink
Bring parsevcf-1.1.4 over to non-sra myco
Browse files Browse the repository at this point in the history
  • Loading branch information
aofarrel committed Mar 7, 2023
1 parent eed9388 commit 03c49b3
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 25 deletions.
29 changes: 24 additions & 5 deletions myco.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import "https://raw.githubusercontent.com/aofarrel/clockwork-wdl/2.7.0/workflows
import "https://raw.githubusercontent.com/aofarrel/clockwork-wdl/2.7.0/tasks/combined_decontamination.wdl" as clckwrk_combonation
import "https://raw.githubusercontent.com/aofarrel/clockwork-wdl/2.7.0/tasks/variant_call_one_sample.wdl" as clckwrk_var_call
import "https://raw.githubusercontent.com/aofarrel/usher-sampled-wdl/0.0.2/usher_sampled.wdl" as build_treesWF
import "https://raw.githubusercontent.com/aofarrel/parsevcf/1.1.3/vcf_to_diff.wdl" as diff
import "https://raw.githubusercontent.com/aofarrel/parsevcf/1.1.4/vcf_to_diff.wdl" as diff
import "https://raw.githubusercontent.com/aofarrel/fastqc-wdl/main/fastqc.wdl" as fastqc

workflow myco {
Expand All @@ -15,6 +15,7 @@ workflow myco {
Float bad_data_threshold = 0.05
Boolean decorate_tree = false
Boolean fastqc_on_timeout = false
Boolean force_diff = false
File? input_tree
Int min_coverage = 10
File? ref_genome_for_tree_building
Expand All @@ -28,7 +29,8 @@ workflow myco {
parameter_meta {
bad_data_threshold: "If a diff file has higher than this percent (0.5 = 50%) bad data, do not include it in the tree"
decorate_tree: "Should usher, taxonium, and NextStrain trees be generated? Requires input_tree and ref_genome"
fastqc_on_timeout: "If true, fastqc one read from a sample when decontamination times out (see timeout_decontam)"
fastqc_on_timeout: "If true, fastqc one read from a sample when decontamination or variant calling times out"
force_diff: "If true and if decorate_tree is false, generate diff files. (Diff files will always be created if decorate_tree is true.)"
input_tree: "Base tree to use if decorate_tree = true"
min_coverage: "Positions with coverage below this value will be masked in diff files"
paired_fastq_sets: "Nested array of paired fastqs, each inner array representing one samples worth of paired fastqs"
Expand All @@ -41,6 +43,17 @@ workflow myco {
typical_tb_masked_regions: "Bed file of regions to mask when making diff files"
}

# WDL doesn't understand mutual exclusivity, so we have to get a little creative on
# our determination of whether or not we want to create diff files.
if(decorate_tree) { Boolean create_diff_files_ = true }
if(!decorate_tree) {
if(!force_diff){ Boolean create_diff_files__ = false }
if(force_diff) { Boolean create_diff_files___ = true }
}
Boolean create_diff_files = select_first([create_diff_files_,
create_diff_files__,
create_diff_files___])

call clockwork_ref_prepWF.ClockworkRefPrepTB

scatter(paired_fastqs in paired_fastq_sets) {
Expand Down Expand Up @@ -78,6 +91,8 @@ workflow myco {
}

if(fastqc_on_timeout) {
# Note: This might be problematic in some situations -- may need to make this look like myco_sra
# But until then, I'm going to stick with this simpler implementation
if(length(per_sample_decontam.check_this_fastq)>1 && length(varcall_with_array.check_this_fastq)>1) {
Array[File] bad_fastqs_both = select_all(per_sample_decontam.check_this_fastq)
}
Expand Down Expand Up @@ -108,20 +123,24 @@ workflow myco {
}

if(decorate_tree) {
# diff files must exist if decorate_tree is true, so we can force the Array[File?]?
# into an Array[File] with the classic "select_first() with a bogus fallback" hack
Array[File] coerced_diffs = select_first([select_all(make_mask_and_diff.diff), minos_vcfs])
Array[File] coerced_reports = select_first([select_all(make_mask_and_diff.report), minos_vcfs])
call build_treesWF.usher_sampled_diff_to_taxonium as trees {
input:
diffs = make_mask_and_diff.diff,
diffs = coerced_diffs,
i = input_tree,
ref = ref_genome_for_tree_building,
coverage_reports = make_mask_and_diff.report,
coverage_reports = coerced_reports,
bad_data_threshold = bad_data_threshold
}
}

output {
Array[File] minos = minos_vcfs
Array[File] masks = make_mask_and_diff.mask_file
Array[File] diffs = make_mask_and_diff.diff
Array[File?] diffs = make_mask_and_diff.diff
File? tax_tree = trees.taxonium_tree
Array[File]? fastqc_reports = FastqcWF.reports
}
Expand Down
35 changes: 15 additions & 20 deletions myco_sra.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,24 @@ workflow myco {
File typical_tb_masked_regions

Float bad_data_threshold = 0.05
Boolean decorate_tree = false
Boolean fastqc_on_timeout = false
Boolean force_diff = false
Boolean decorate_tree = false
Boolean fastqc_on_timeout = false
Boolean force_diff = false
File? input_tree
Int min_coverage = 10
File? ref_genome_for_tree_building
Int subsample_cutoff = 450
Int subsample_seed = 1965
Int timeout_decontam_part1 = 20
Int timeout_decontam_part2 = 15
Int timeout_variant_caller = 120
Int subsample_cutoff = 450
Int subsample_seed = 1965
Int timeout_decontam_part1 = 20
Int timeout_decontam_part2 = 15
Int timeout_variant_caller = 120
}

parameter_meta {
biosample_accessions: "File of BioSample accessions to pull, one accession per line"
bad_data_threshold: "If a diff file has higher than this percent (0.5 = 50%) bad data, don't include it in the tree"
decorate_tree: "Should usher, taxonium, and NextStrain trees be generated? Requires input_tree and ref_genome"
fastqc_on_timeout: "If true, fastqc one read from a sample when decontamination times out (see timeout_decontam)"
fastqc_on_timeout: "If true, fastqc one read from a sample when decontamination or variant calling times out"
force_diff: "If true and if decorate_tree is false, generate diff files. (Diff files will always be created if decorate_tree is true.)"
input_tree: "Base tree to use if decorate_tree = true"
min_coverage: "Positions with coverage below this value will be masked in diff files"
Expand All @@ -47,19 +47,14 @@ workflow myco {

# WDL doesn't understand mutual exclusivity, so we have to get a little creative on
# our determination of whether or not we want to create diff files.
if(decorate_tree) {
Boolean create_diff_files_ = true
}
if(decorate_tree) { Boolean create_diff_files_ = true }
if(!decorate_tree) {
if(!force_diff) {
Boolean create_diff_files__ = false
}
if(force_diff) {
Boolean create_diff_files___ = true
}
if(!force_diff){ Boolean create_diff_files__ = false }
if(force_diff) { Boolean create_diff_files___ = true }
}
Boolean create_diff_files = select_first([create_diff_files_, create_diff_files__, create_diff_files___])

Boolean create_diff_files = select_first([create_diff_files_,
create_diff_files__,
create_diff_files___])

call clockwork_ref_prepWF.ClockworkRefPrepTB

Expand Down

0 comments on commit 03c49b3

Please sign in to comment.