From 1d1350f72745ba7908e641a2bcb23b24f0398695 Mon Sep 17 00:00:00 2001 From: Laptop Date: Mon, 6 Mar 2023 12:39:30 -0800 Subject: [PATCH 1/3] Pin 1.1.2 of vcf to diff WDL --- myco.wdl | 2 +- myco_sra.wdl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/myco.wdl b/myco.wdl index 556ecad..296e10d 100644 --- a/myco.wdl +++ b/myco.wdl @@ -4,7 +4,7 @@ import "https://raw.githubusercontent.com/aofarrel/clockwork-wdl/2.7.0/workflows import "https://raw.githubusercontent.com/aofarrel/clockwork-wdl/2.7.0/tasks/combined_decontamination.wdl" as clckwrk_combonation import "https://raw.githubusercontent.com/aofarrel/clockwork-wdl/2.7.0/tasks/variant_call_one_sample.wdl" as clckwrk_var_call import "https://raw.githubusercontent.com/aofarrel/usher-sampled-wdl/0.0.2/usher_sampled.wdl" as build_treesWF -import "https://raw.githubusercontent.com/aofarrel/parsevcf/main/vcf_to_diff.wdl" as diff +import "https://raw.githubusercontent.com/aofarrel/parsevcf/1.1.2/vcf_to_diff.wdl" as diff import "https://raw.githubusercontent.com/aofarrel/fastqc-wdl/main/fastqc.wdl" as fastqc workflow myco { diff --git a/myco_sra.wdl b/myco_sra.wdl index b93fb09..181f90c 100644 --- a/myco_sra.wdl +++ b/myco_sra.wdl @@ -6,7 +6,7 @@ import "https://raw.githubusercontent.com/aofarrel/clockwork-wdl/2.7.0/tasks/var import "https://raw.githubusercontent.com/aofarrel/SRANWRP/v1.1.7/tasks/pull_fastqs.wdl" as sranwrp_pull import "https://raw.githubusercontent.com/aofarrel/SRANWRP/v1.1.7/tasks/processing_tasks.wdl" as sranwrp_processing import "https://raw.githubusercontent.com/aofarrel/usher-sampled-wdl/0.0.2/usher_sampled.wdl" as build_treesWF -import "https://raw.githubusercontent.com/aofarrel/parsevcf/main/vcf_to_diff.wdl" as diff +import "https://raw.githubusercontent.com/aofarrel/parsevcf/1.1.2/vcf_to_diff.wdl" as diff import "https://raw.githubusercontent.com/aofarrel/fastqc-wdl/main/fastqc.wdl" as fastqc workflow myco { From cf40f91bd23d627bf478262fe492ed7b075410b4 Mon Sep 17 00:00:00 2001 From: Laptop Date: Mon, 6 Mar 2023 16:18:31 -0800 Subject: [PATCH 2/3] Update prereqs, diffs and reports are optional --- myco.wdl | 2 +- myco_sra.wdl | 33 ++++++++++++++++++++++++++++----- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/myco.wdl b/myco.wdl index 296e10d..29eeaf7 100644 --- a/myco.wdl +++ b/myco.wdl @@ -4,7 +4,7 @@ import "https://raw.githubusercontent.com/aofarrel/clockwork-wdl/2.7.0/workflows import "https://raw.githubusercontent.com/aofarrel/clockwork-wdl/2.7.0/tasks/combined_decontamination.wdl" as clckwrk_combonation import "https://raw.githubusercontent.com/aofarrel/clockwork-wdl/2.7.0/tasks/variant_call_one_sample.wdl" as clckwrk_var_call import "https://raw.githubusercontent.com/aofarrel/usher-sampled-wdl/0.0.2/usher_sampled.wdl" as build_treesWF -import "https://raw.githubusercontent.com/aofarrel/parsevcf/1.1.2/vcf_to_diff.wdl" as diff +import "https://raw.githubusercontent.com/aofarrel/parsevcf/1.1.3/vcf_to_diff.wdl" as diff import "https://raw.githubusercontent.com/aofarrel/fastqc-wdl/main/fastqc.wdl" as fastqc workflow myco { diff --git a/myco_sra.wdl b/myco_sra.wdl index 181f90c..d703fd9 100644 --- a/myco_sra.wdl +++ b/myco_sra.wdl @@ -6,7 +6,7 @@ import "https://raw.githubusercontent.com/aofarrel/clockwork-wdl/2.7.0/tasks/var import "https://raw.githubusercontent.com/aofarrel/SRANWRP/v1.1.7/tasks/pull_fastqs.wdl" as sranwrp_pull import "https://raw.githubusercontent.com/aofarrel/SRANWRP/v1.1.7/tasks/processing_tasks.wdl" as sranwrp_processing import "https://raw.githubusercontent.com/aofarrel/usher-sampled-wdl/0.0.2/usher_sampled.wdl" as build_treesWF -import "https://raw.githubusercontent.com/aofarrel/parsevcf/1.1.2/vcf_to_diff.wdl" as diff +import "https://raw.githubusercontent.com/aofarrel/parsevcf/1.1.4/vcf_to_diff.wdl" as diff import "https://raw.githubusercontent.com/aofarrel/fastqc-wdl/main/fastqc.wdl" as fastqc workflow myco { @@ -17,6 +17,7 @@ workflow myco { Float bad_data_threshold = 0.05 Boolean decorate_tree = false Boolean fastqc_on_timeout = false + Boolean force_diff = false File? input_tree Int min_coverage = 10 File? ref_genome_for_tree_building @@ -32,6 +33,7 @@ workflow myco { bad_data_threshold: "If a diff file has higher than this percent (0.5 = 50%) bad data, don't include it in the tree" decorate_tree: "Should usher, taxonium, and NextStrain trees be generated? Requires input_tree and ref_genome" fastqc_on_timeout: "If true, fastqc one read from a sample when decontamination times out (see timeout_decontam)" + force_diff: "If true and if decorate_tree is false, generate diff files. (Diff files will always be created if decorate_tree is true.)" input_tree: "Base tree to use if decorate_tree = true" min_coverage: "Positions with coverage below this value will be masked in diff files" ref_genome_for_tree_building: "Ref genome, ONLY used for building trees, NOT variant calling" @@ -43,6 +45,22 @@ workflow myco { typical_tb_masked_regions: "Bed file of regions to mask when making diff files" } + # WDL doesn't understand mutual exclusivity, so we have to get a little creative on + # our determination of whether or not we want to create diff files. + if(decorate_tree) { + Boolean create_diff_files_ = true + } + if(!decorate_tree) { + if(!force_diff) { + Boolean create_diff_files__ = false + } + if(force_diff) { + Boolean create_diff_files___ = true + } + } + Boolean create_diff_files = select_first([create_diff_files_, create_diff_files__, create_diff_files___]) + + call clockwork_ref_prepWF.ClockworkRefPrepTB call sranwrp_processing.extract_accessions_from_file as get_sample_IDs { @@ -115,7 +133,8 @@ workflow myco { bam = vcfs_and_bams.left, vcf = vcfs_and_bams.right, min_coverage = min_coverage, - tbmf = typical_tb_masked_regions + tbmf = typical_tb_masked_regions, + diffs = create_diff_files } } @@ -139,12 +158,16 @@ workflow myco { } if(decorate_tree) { + # diff files must exist if decorate_tree is true, so we can force the Array[File?]? + # into an Array[File] with the classic "select_first() with a bogus fallback" hack + Array[File] coerced_diffs = select_first([select_all(make_mask_and_diff.diff), minos_vcfs]) + Array[File] coerced_reports = select_first([select_all(make_mask_and_diff.report), minos_vcfs]) call build_treesWF.usher_sampled_diff_to_taxonium as trees { input: - diffs = make_mask_and_diff.diff, + diffs = coerced_diffs, i = input_tree, ref = ref_genome_for_tree_building, - coverage_reports = make_mask_and_diff.report, + coverage_reports = coerced_reports, bad_data_threshold = bad_data_threshold } } @@ -153,7 +176,7 @@ workflow myco { File download_report = cat_reports.outfile Array[File] minos = minos_vcfs Array[File] masks = make_mask_and_diff.mask_file - Array[File] diffs = make_mask_and_diff.diff + Array[File?] diffs = make_mask_and_diff.diff File? tax_tree = trees.taxonium_tree Array[File]? fastqc_reports = FastqcWF.reports } From 03c49b34844b55c51589decfa372067e62009183 Mon Sep 17 00:00:00 2001 From: Laptop Date: Mon, 6 Mar 2023 18:58:06 -0800 Subject: [PATCH 3/3] Bring parsevcf-1.1.4 over to non-sra myco --- myco.wdl | 29 ++++++++++++++++++++++++----- myco_sra.wdl | 35 +++++++++++++++-------------------- 2 files changed, 39 insertions(+), 25 deletions(-) diff --git a/myco.wdl b/myco.wdl index 29eeaf7..e3bd28e 100644 --- a/myco.wdl +++ b/myco.wdl @@ -4,7 +4,7 @@ import "https://raw.githubusercontent.com/aofarrel/clockwork-wdl/2.7.0/workflows import "https://raw.githubusercontent.com/aofarrel/clockwork-wdl/2.7.0/tasks/combined_decontamination.wdl" as clckwrk_combonation import "https://raw.githubusercontent.com/aofarrel/clockwork-wdl/2.7.0/tasks/variant_call_one_sample.wdl" as clckwrk_var_call import "https://raw.githubusercontent.com/aofarrel/usher-sampled-wdl/0.0.2/usher_sampled.wdl" as build_treesWF -import "https://raw.githubusercontent.com/aofarrel/parsevcf/1.1.3/vcf_to_diff.wdl" as diff +import "https://raw.githubusercontent.com/aofarrel/parsevcf/1.1.4/vcf_to_diff.wdl" as diff import "https://raw.githubusercontent.com/aofarrel/fastqc-wdl/main/fastqc.wdl" as fastqc workflow myco { @@ -15,6 +15,7 @@ workflow myco { Float bad_data_threshold = 0.05 Boolean decorate_tree = false Boolean fastqc_on_timeout = false + Boolean force_diff = false File? input_tree Int min_coverage = 10 File? ref_genome_for_tree_building @@ -28,7 +29,8 @@ workflow myco { parameter_meta { bad_data_threshold: "If a diff file has higher than this percent (0.5 = 50%) bad data, do not include it in the tree" decorate_tree: "Should usher, taxonium, and NextStrain trees be generated? Requires input_tree and ref_genome" - fastqc_on_timeout: "If true, fastqc one read from a sample when decontamination times out (see timeout_decontam)" + fastqc_on_timeout: "If true, fastqc one read from a sample when decontamination or variant calling times out" + force_diff: "If true and if decorate_tree is false, generate diff files. (Diff files will always be created if decorate_tree is true.)" input_tree: "Base tree to use if decorate_tree = true" min_coverage: "Positions with coverage below this value will be masked in diff files" paired_fastq_sets: "Nested array of paired fastqs, each inner array representing one samples worth of paired fastqs" @@ -41,6 +43,17 @@ workflow myco { typical_tb_masked_regions: "Bed file of regions to mask when making diff files" } + # WDL doesn't understand mutual exclusivity, so we have to get a little creative on + # our determination of whether or not we want to create diff files. + if(decorate_tree) { Boolean create_diff_files_ = true } + if(!decorate_tree) { + if(!force_diff){ Boolean create_diff_files__ = false } + if(force_diff) { Boolean create_diff_files___ = true } + } + Boolean create_diff_files = select_first([create_diff_files_, + create_diff_files__, + create_diff_files___]) + call clockwork_ref_prepWF.ClockworkRefPrepTB scatter(paired_fastqs in paired_fastq_sets) { @@ -78,6 +91,8 @@ workflow myco { } if(fastqc_on_timeout) { + # Note: This might be problematic in some situations -- may need to make this look like myco_sra + # But until then, I'm going to stick with this simpler implementation if(length(per_sample_decontam.check_this_fastq)>1 && length(varcall_with_array.check_this_fastq)>1) { Array[File] bad_fastqs_both = select_all(per_sample_decontam.check_this_fastq) } @@ -108,12 +123,16 @@ workflow myco { } if(decorate_tree) { + # diff files must exist if decorate_tree is true, so we can force the Array[File?]? + # into an Array[File] with the classic "select_first() with a bogus fallback" hack + Array[File] coerced_diffs = select_first([select_all(make_mask_and_diff.diff), minos_vcfs]) + Array[File] coerced_reports = select_first([select_all(make_mask_and_diff.report), minos_vcfs]) call build_treesWF.usher_sampled_diff_to_taxonium as trees { input: - diffs = make_mask_and_diff.diff, + diffs = coerced_diffs, i = input_tree, ref = ref_genome_for_tree_building, - coverage_reports = make_mask_and_diff.report, + coverage_reports = coerced_reports, bad_data_threshold = bad_data_threshold } } @@ -121,7 +140,7 @@ workflow myco { output { Array[File] minos = minos_vcfs Array[File] masks = make_mask_and_diff.mask_file - Array[File] diffs = make_mask_and_diff.diff + Array[File?] diffs = make_mask_and_diff.diff File? tax_tree = trees.taxonium_tree Array[File]? fastqc_reports = FastqcWF.reports } diff --git a/myco_sra.wdl b/myco_sra.wdl index d703fd9..ae37b3c 100644 --- a/myco_sra.wdl +++ b/myco_sra.wdl @@ -15,24 +15,24 @@ workflow myco { File typical_tb_masked_regions Float bad_data_threshold = 0.05 - Boolean decorate_tree = false - Boolean fastqc_on_timeout = false - Boolean force_diff = false + Boolean decorate_tree = false + Boolean fastqc_on_timeout = false + Boolean force_diff = false File? input_tree Int min_coverage = 10 File? ref_genome_for_tree_building - Int subsample_cutoff = 450 - Int subsample_seed = 1965 - Int timeout_decontam_part1 = 20 - Int timeout_decontam_part2 = 15 - Int timeout_variant_caller = 120 + Int subsample_cutoff = 450 + Int subsample_seed = 1965 + Int timeout_decontam_part1 = 20 + Int timeout_decontam_part2 = 15 + Int timeout_variant_caller = 120 } parameter_meta { biosample_accessions: "File of BioSample accessions to pull, one accession per line" bad_data_threshold: "If a diff file has higher than this percent (0.5 = 50%) bad data, don't include it in the tree" decorate_tree: "Should usher, taxonium, and NextStrain trees be generated? Requires input_tree and ref_genome" - fastqc_on_timeout: "If true, fastqc one read from a sample when decontamination times out (see timeout_decontam)" + fastqc_on_timeout: "If true, fastqc one read from a sample when decontamination or variant calling times out" force_diff: "If true and if decorate_tree is false, generate diff files. (Diff files will always be created if decorate_tree is true.)" input_tree: "Base tree to use if decorate_tree = true" min_coverage: "Positions with coverage below this value will be masked in diff files" @@ -47,19 +47,14 @@ workflow myco { # WDL doesn't understand mutual exclusivity, so we have to get a little creative on # our determination of whether or not we want to create diff files. - if(decorate_tree) { - Boolean create_diff_files_ = true - } + if(decorate_tree) { Boolean create_diff_files_ = true } if(!decorate_tree) { - if(!force_diff) { - Boolean create_diff_files__ = false - } - if(force_diff) { - Boolean create_diff_files___ = true - } + if(!force_diff){ Boolean create_diff_files__ = false } + if(force_diff) { Boolean create_diff_files___ = true } } - Boolean create_diff_files = select_first([create_diff_files_, create_diff_files__, create_diff_files___]) - + Boolean create_diff_files = select_first([create_diff_files_, + create_diff_files__, + create_diff_files___]) call clockwork_ref_prepWF.ClockworkRefPrepTB