diff --git a/myco.wdl b/myco.wdl index 29eeaf7..e3bd28e 100644 --- a/myco.wdl +++ b/myco.wdl @@ -4,7 +4,7 @@ import "https://raw.githubusercontent.com/aofarrel/clockwork-wdl/2.7.0/workflows import "https://raw.githubusercontent.com/aofarrel/clockwork-wdl/2.7.0/tasks/combined_decontamination.wdl" as clckwrk_combonation import "https://raw.githubusercontent.com/aofarrel/clockwork-wdl/2.7.0/tasks/variant_call_one_sample.wdl" as clckwrk_var_call import "https://raw.githubusercontent.com/aofarrel/usher-sampled-wdl/0.0.2/usher_sampled.wdl" as build_treesWF -import "https://raw.githubusercontent.com/aofarrel/parsevcf/1.1.3/vcf_to_diff.wdl" as diff +import "https://raw.githubusercontent.com/aofarrel/parsevcf/1.1.4/vcf_to_diff.wdl" as diff import "https://raw.githubusercontent.com/aofarrel/fastqc-wdl/main/fastqc.wdl" as fastqc workflow myco { @@ -15,6 +15,7 @@ workflow myco { Float bad_data_threshold = 0.05 Boolean decorate_tree = false Boolean fastqc_on_timeout = false + Boolean force_diff = false File? input_tree Int min_coverage = 10 File? ref_genome_for_tree_building @@ -28,7 +29,8 @@ workflow myco { parameter_meta { bad_data_threshold: "If a diff file has higher than this percent (0.5 = 50%) bad data, do not include it in the tree" decorate_tree: "Should usher, taxonium, and NextStrain trees be generated? Requires input_tree and ref_genome" - fastqc_on_timeout: "If true, fastqc one read from a sample when decontamination times out (see timeout_decontam)" + fastqc_on_timeout: "If true, fastqc one read from a sample when decontamination or variant calling times out" + force_diff: "If true and if decorate_tree is false, generate diff files. (Diff files will always be created if decorate_tree is true.)" input_tree: "Base tree to use if decorate_tree = true" min_coverage: "Positions with coverage below this value will be masked in diff files" paired_fastq_sets: "Nested array of paired fastqs, each inner array representing one samples worth of paired fastqs" @@ -41,6 +43,17 @@ workflow myco { typical_tb_masked_regions: "Bed file of regions to mask when making diff files" } + # WDL doesn't understand mutual exclusivity, so we have to get a little creative on + # our determination of whether or not we want to create diff files. + if(decorate_tree) { Boolean create_diff_files_ = true } + if(!decorate_tree) { + if(!force_diff){ Boolean create_diff_files__ = false } + if(force_diff) { Boolean create_diff_files___ = true } + } + Boolean create_diff_files = select_first([create_diff_files_, + create_diff_files__, + create_diff_files___]) + call clockwork_ref_prepWF.ClockworkRefPrepTB scatter(paired_fastqs in paired_fastq_sets) { @@ -78,6 +91,8 @@ workflow myco { } if(fastqc_on_timeout) { + # Note: This might be problematic in some situations -- may need to make this look like myco_sra + # But until then, I'm going to stick with this simpler implementation if(length(per_sample_decontam.check_this_fastq)>1 && length(varcall_with_array.check_this_fastq)>1) { Array[File] bad_fastqs_both = select_all(per_sample_decontam.check_this_fastq) } @@ -108,12 +123,16 @@ workflow myco { } if(decorate_tree) { + # diff files must exist if decorate_tree is true, so we can force the Array[File?]? + # into an Array[File] with the classic "select_first() with a bogus fallback" hack + Array[File] coerced_diffs = select_first([select_all(make_mask_and_diff.diff), minos_vcfs]) + Array[File] coerced_reports = select_first([select_all(make_mask_and_diff.report), minos_vcfs]) call build_treesWF.usher_sampled_diff_to_taxonium as trees { input: - diffs = make_mask_and_diff.diff, + diffs = coerced_diffs, i = input_tree, ref = ref_genome_for_tree_building, - coverage_reports = make_mask_and_diff.report, + coverage_reports = coerced_reports, bad_data_threshold = bad_data_threshold } } @@ -121,7 +140,7 @@ workflow myco { output { Array[File] minos = minos_vcfs Array[File] masks = make_mask_and_diff.mask_file - Array[File] diffs = make_mask_and_diff.diff + Array[File?] diffs = make_mask_and_diff.diff File? tax_tree = trees.taxonium_tree Array[File]? fastqc_reports = FastqcWF.reports } diff --git a/myco_sra.wdl b/myco_sra.wdl index d703fd9..ae37b3c 100644 --- a/myco_sra.wdl +++ b/myco_sra.wdl @@ -15,24 +15,24 @@ workflow myco { File typical_tb_masked_regions Float bad_data_threshold = 0.05 - Boolean decorate_tree = false - Boolean fastqc_on_timeout = false - Boolean force_diff = false + Boolean decorate_tree = false + Boolean fastqc_on_timeout = false + Boolean force_diff = false File? input_tree Int min_coverage = 10 File? ref_genome_for_tree_building - Int subsample_cutoff = 450 - Int subsample_seed = 1965 - Int timeout_decontam_part1 = 20 - Int timeout_decontam_part2 = 15 - Int timeout_variant_caller = 120 + Int subsample_cutoff = 450 + Int subsample_seed = 1965 + Int timeout_decontam_part1 = 20 + Int timeout_decontam_part2 = 15 + Int timeout_variant_caller = 120 } parameter_meta { biosample_accessions: "File of BioSample accessions to pull, one accession per line" bad_data_threshold: "If a diff file has higher than this percent (0.5 = 50%) bad data, don't include it in the tree" decorate_tree: "Should usher, taxonium, and NextStrain trees be generated? Requires input_tree and ref_genome" - fastqc_on_timeout: "If true, fastqc one read from a sample when decontamination times out (see timeout_decontam)" + fastqc_on_timeout: "If true, fastqc one read from a sample when decontamination or variant calling times out" force_diff: "If true and if decorate_tree is false, generate diff files. (Diff files will always be created if decorate_tree is true.)" input_tree: "Base tree to use if decorate_tree = true" min_coverage: "Positions with coverage below this value will be masked in diff files" @@ -47,19 +47,14 @@ workflow myco { # WDL doesn't understand mutual exclusivity, so we have to get a little creative on # our determination of whether or not we want to create diff files. - if(decorate_tree) { - Boolean create_diff_files_ = true - } + if(decorate_tree) { Boolean create_diff_files_ = true } if(!decorate_tree) { - if(!force_diff) { - Boolean create_diff_files__ = false - } - if(force_diff) { - Boolean create_diff_files___ = true - } + if(!force_diff){ Boolean create_diff_files__ = false } + if(force_diff) { Boolean create_diff_files___ = true } } - Boolean create_diff_files = select_first([create_diff_files_, create_diff_files__, create_diff_files___]) - + Boolean create_diff_files = select_first([create_diff_files_, + create_diff_files__, + create_diff_files___]) call clockwork_ref_prepWF.ClockworkRefPrepTB