diff --git a/doc/generate_inputs_md.sh b/doc/generate_inputs_md.sh index 0b5408a..076572f 100755 --- a/doc/generate_inputs_md.sh +++ b/doc/generate_inputs_md.sh @@ -4,9 +4,9 @@ # and pip3 install git+https://github.com/Nicceboy/python-markdown-generator echo "grabbing inputs from myco_sra..." -java -jar /Applications/womtool-76.jar inputs myco_sra.wdl > raw.txt +java -jar /Applications/womtool-85.jar inputs myco_sra.wdl > raw.txt echo "grabbing inputs from myco..." -java -jar /Applications/womtool-76.jar inputs myco.wdl >> raw.txt +java -jar /Applications/womtool-85.jar inputs myco.wdl >> raw.txt echo "processing..." sort raw.txt > sorted.txt uniq sorted.txt > unique.txt @@ -102,12 +102,12 @@ for input_variable in task_level: if input_variable["name"] in filename_vars: input_variable["description"] = "Override default output file name with this string" not_runtime.append(input_variable) - elif input_variable["name"] == "histograms": - input_variable["description"] = "Should coverage histograms be output?" - not_runtime.append(input_variable) elif input_variable["name"] == "crash_on_timeout": input_variable["description"] = "If this task times out, should it stop the whole pipeline (true), or should we just discard this sample and move on (false)?" not_runtime.append(input_variable) + elif input_variable["name"] == "crash_on_error": + input_variable["description"] = "If this task, should it stop the whole pipeline (true), or should we just discard this sample and move on (false)? Note that errors that crash the VM (such as running out of space on a GCP instance) will stop the whole pipeline regardless of this setting." + not_runtime.append(input_variable) elif input_variable["name"] == "subsample_cutoff": input_variable["description"] = "If a fastq file is larger than than size in MB, subsample it with seqtk (set to -1 to disable)" not_runtime.append(input_variable) @@ -125,6 +125,10 @@ for input_variable in task_level: elif input_variable["name"] == "mem_height": input_variable["description"] = "cortex mem_height option. Must match what was used when reference_prepare was run (in other words do not set this variable unless you are also adjusting the reference preparation task)" not_runtime.append(input_variable) + # diffs and masking + elif input_variable["name"] == "histograms": + input_variable["description"] = "Should coverage histograms be output?" + not_runtime.append(input_variable) else: input_variable["description"] = "" # need this or else the table is missing a column not_runtime.append(input_variable) diff --git a/doc/inputs.md b/doc/inputs.md index 69ba0fd..de2a345 100644 --- a/doc/inputs.md +++ b/doc/inputs.md @@ -13,7 +13,8 @@ See /inputs/example_inputs.json for examples. | bad_data_threshold | Float | 0.05 | If a diff file has higher than this percent (0.5 = 50%) bad data, do not include it in the tree | | biosample_accessions | File | | fastq input -- please see running_myco.md for more information | | decorate_tree | Boolean | false | Should usher, taxonium, and NextStrain trees be generated? Requires input_tree and ref_genome | -| fastqc_on_timeout | Boolean | false | If true, fastqc one read from a sample when decontamination times out (see timeout_decontam) | +| fastqc_on_timeout | Boolean | false | If true, fastqc one read from a sample when decontamination or variant calling times out | +| force_diff | Boolean | false | If true and if decorate_tree is false, generate diff files. (Diff files will always be created if decorate_tree is true.) | | input_tree | File? | | Base tree to use if decorate_tree = true | | min_coverage | Int | 10 | Positions with coverage below this value will be masked in diff files | | paired_fastq_sets | Array | | fastq input -- please see running_myco.md for more information | @@ -50,6 +51,7 @@ If you are on a backend that does not support call cacheing, you can use the `bl | per_sample_decontam | threads | Int? | | Try to use this many threads for decontamination. Note that actual number of threads also relies on your hardware. | | per_sample_decontam | verbose | Boolean | true | | | trees | outfile | String | \'tree\' | Override default output file name with this string | +| varcall_with_array | crash_on_error | Boolean | false | If this task, should it stop the whole pipeline (true), or should we just discard this sample and move on (false)? Note that errors that crash the VM (such as running out of space on a GCP instance) will stop the whole pipeline regardless of this setting. | | varcall_with_array | crash_on_timeout | Boolean | false | If this task times out, should it stop the whole pipeline (true), or should we just discard this sample and move on (false)? | | varcall_with_array | debug | Boolean | false | Do not clean up any files and be verbose | | varcall_with_array | mem_height | Int? | | cortex mem_height option. Must match what was used when reference_prepare was run (in other words do not set this variable unless you are also adjusting the reference preparation task) | @@ -62,9 +64,9 @@ These variables adjust runtime attributes, which includes hardware settings. See |:---:|:---:|:---:|:---:|:---:| | cat_reports | disk_size | Int | 10 | Disk size, in GB. Note that since cannot auto-scale as it cannot anticipate the size of reads from SRA. | | get_sample_IDs | preempt | Int | 1 | How many times should this task be attempted on a preemptible instance before running on a non-preemptible instance? | -| make_mask_and_diff | addldisk | Int | 250 | Additional disk size, in GB, on top of auto-scaling disk size. | -| make_mask_and_diff | cpu | Int | 16 | Number of CPUs (cores) to request from GCP. | -| make_mask_and_diff | memory | Int | 32 | Amount of memory, in GB, to request from GCP. | +| make_mask_and_diff | addldisk | Int | 10 | Additional disk size, in GB, on top of auto-scaling disk size. | +| make_mask_and_diff | cpu | Int | 8 | Number of CPUs (cores) to request from GCP. | +| make_mask_and_diff | memory | Int | 16 | Amount of memory, in GB, to request from GCP. | | make_mask_and_diff | preempt | Int | 1 | How many times should this task be attempted on a preemptible instance before running on a non-preemptible instance? | | make_mask_and_diff | retries | Int | 1 | How many times should we retry this task if it fails after it exhausts all uses of preemptibles? | | per_sample_decontam | addldisk | Int | 100 | Additional disk size, in GB, on top of auto-scaling disk size. | diff --git a/myco.wdl b/myco.wdl index e3bd28e..ed8942d 100644 --- a/myco.wdl +++ b/myco.wdl @@ -118,7 +118,8 @@ workflow myco { bam = vcfs_and_bams.left, vcf = vcfs_and_bams.right, min_coverage = min_coverage, - tbmf = typical_tb_masked_regions + tbmf = typical_tb_masked_regions, + diffs = create_diff_files } } diff --git a/myco_sra.wdl b/myco_sra.wdl index ae37b3c..6bd2541 100644 --- a/myco_sra.wdl +++ b/myco_sra.wdl @@ -39,9 +39,9 @@ workflow myco { ref_genome_for_tree_building: "Ref genome, ONLY used for building trees, NOT variant calling" subsample_cutoff: "If a fastq file is larger than than size in MB, subsample it with seqtk (set to -1 to disable)" subsample_seed: "Seed used for subsampling with seqtk" - timeout_decontam_part1: "Discard any sample that is still running in clockwork map_reads after this many minutes (set to -1 to never timeout)" - timeout_decontam_part2: "Discard any sample that is still running in clockwork rm_contam after this many minutes (set to -1 to never timeout)" - timeout_variant_caller: "Discard any sample that is still running in clockwork variant_call_one_sample after this many minutes (set to -1 to never timeout)" + timeout_decontam_part1: "Discard any sample that is still running in clockwork map_reads after this many minutes (set to 0 to never timeout)" + timeout_decontam_part2: "Discard any sample that is still running in clockwork rm_contam after this many minutes (set to 0 to never timeout)" + timeout_variant_caller: "Discard any sample that is still running in clockwork variant_call_one_sample after this many minutes (set to 0 to never timeout)" typical_tb_masked_regions: "Bed file of regions to mask when making diff files" }