@@ -157,6 +157,7 @@ task BlastContigs_refined_gsnap_out {
157
157
File assembly_nt_refseq_fasta
158
158
File duplicate_cluster_sizes_tsv
159
159
File lineage_db
160
+ File accession2taxid
160
161
File taxon_blacklist
161
162
File deuterostome_db
162
163
Boolean use_deuterostome_filter
@@ -171,7 +172,7 @@ task BlastContigs_refined_gsnap_out {
171
172
--input-files '[["~{gsnap_out_gsnap_m8} ", "~{gsnap_out_gsnap_deduped_m8} ", "~{gsnap_out_gsnap_hitsummary_tab} ", "~{gsnap_out_gsnap_counts_with_dcr_json} "], ["~{assembly_contigs_fasta} ", "~{assembly_scaffolds_fasta} ", "~{assembly_read_contig_sam} ", "~{assembly_contig_stats_json} "], ["~{assembly_nt_refseq_fasta} "], ["~{duplicate_cluster_sizes_tsv} "]]' \
172
173
--output-files '["assembly/gsnap.blast.m8", "assembly/gsnap.reassigned.m8", "assembly/gsnap.hitsummary2.tab", "assembly/refined_gsnap_counts_with_dcr.json", "assembly/gsnap_contig_summary.json", "assembly/gsnap.blast.top.m8"]' \
173
174
--output-dir-s3 '~{s3_wd_uri} ' \
174
- --additional-files '{"lineage_db": "~{lineage_db} ", "taxon_blacklist": "~{taxon_blacklist} ", "deuterostome_db": "~{if use_deuterostome_filter then '~{deuterostome_db} ' else ''}"}' \
175
+ --additional-files '{"lineage_db": "~{lineage_db} ", "accession2taxid": " ~{accession2taxid} ", " taxon_blacklist": "~{taxon_blacklist} ", "deuterostome_db": "~{if use_deuterostome_filter then '~{deuterostome_db} ' else ''}"}' \
175
176
--additional-attributes '{"db_type": "nt", "use_taxon_whitelist": ~{use_taxon_whitelist} }'
176
177
>>>
177
178
output {
@@ -204,6 +205,7 @@ task BlastContigs_refined_rapsearch2_out {
204
205
File assembly_nr_refseq_fasta
205
206
File duplicate_cluster_sizes_tsv
206
207
File lineage_db
208
+ File accession2taxid
207
209
File taxon_blacklist
208
210
Boolean use_taxon_whitelist
209
211
}
@@ -216,7 +218,7 @@ task BlastContigs_refined_rapsearch2_out {
216
218
--input-files '[["~{rapsearch2_out_rapsearch2_m8} ", "~{rapsearch2_out_rapsearch2_deduped_m8} ", "~{rapsearch2_out_rapsearch2_hitsummary_tab} ", "~{rapsearch2_out_rapsearch2_counts_with_dcr_json} "], ["~{assembly_contigs_fasta} ", "~{assembly_scaffolds_fasta} ", "~{assembly_read_contig_sam} ", "~{assembly_contig_stats_json} "], ["~{assembly_nr_refseq_fasta} "], ["~{duplicate_cluster_sizes_tsv} "]]' \
217
219
--output-files '["assembly/rapsearch2.blast.m8", "assembly/rapsearch2.reassigned.m8", "assembly/rapsearch2.hitsummary2.tab", "assembly/refined_rapsearch2_counts_with_dcr.json", "assembly/rapsearch2_contig_summary.json", "assembly/rapsearch2.blast.top.m8"]' \
218
220
--output-dir-s3 '~{s3_wd_uri} ' \
219
- --additional-files '{"lineage_db": "~{lineage_db} ", "taxon_blacklist": "~{taxon_blacklist} "}' \
221
+ --additional-files '{"lineage_db": "~{lineage_db} ", "accession2taxid": " ~{accession2taxid} ", " taxon_blacklist": "~{taxon_blacklist} "}' \
220
222
--additional-attributes '{"db_type": "nr", "use_taxon_whitelist": ~{use_taxon_whitelist} }'
221
223
>>>
222
224
output {
@@ -489,6 +491,7 @@ workflow czid_postprocess {
489
491
String nr_db = "s3://czid-public-references/ncbi-sources/2021-01-22/nr"
490
492
File nr_loc_db = "s3://czid-public-references/alignment_data/2021-01-22/nr_loc.db"
491
493
File lineage_db = "s3://czid-public-references/taxonomy/2021-01-22/taxid-lineages.db"
494
+ File accession2taxid_db = "s3://czid-public-references/ncbi-indexes-prod/2021-01-22/index-generation-2/accession2taxid.marisa"
492
495
File taxon_blacklist = "s3://czid-public-references/taxonomy/2021-01-22/taxon_blacklist.txt"
493
496
File deuterostome_db = "s3://czid-public-references/taxonomy/2021-01-22/deuterostome_taxids.txt"
494
497
Boolean use_deuterostome_filter = true
@@ -556,6 +559,7 @@ workflow czid_postprocess {
556
559
assembly_nt_refseq_fasta = DownloadAccessions_gsnap_accessions_out .assembly_nt_refseq_fasta ,
557
560
duplicate_cluster_sizes_tsv = duplicate_cluster_sizes_tsv ,
558
561
lineage_db = lineage_db ,
562
+ accession2taxid = accession2taxid_db ,
559
563
taxon_blacklist = taxon_blacklist ,
560
564
deuterostome_db = deuterostome_db ,
561
565
use_deuterostome_filter = use_deuterostome_filter ,
@@ -577,6 +581,7 @@ workflow czid_postprocess {
577
581
assembly_nr_refseq_fasta = DownloadAccessions_rapsearch2_accessions_out .assembly_nr_refseq_fasta ,
578
582
duplicate_cluster_sizes_tsv = duplicate_cluster_sizes_tsv ,
579
583
lineage_db = lineage_db ,
584
+ accession2taxid = accession2taxid_db ,
580
585
taxon_blacklist = taxon_blacklist ,
581
586
use_taxon_whitelist = use_taxon_whitelist
582
587
}
0 commit comments