-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfilter_pan.nf
75 lines (57 loc) · 1.97 KB
/
filter_pan.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
nextflow.enable.dsl=2
// Start off with a empty channel for prab input and sample metadata
pangenome_ref_input = ""
prab_input = ""
//pass in the paths for 'input' and 'metadata'
if(params.input != null){
pangenome_ref_input = params.input
}
if(params.prab != null){
prab_input = params.prab
}
params.help = false
if(params.help){
println(
"""
M B O V P A N (v1)
=============================
hierarchical clustering of mbovpan output
usage: nextflow run mbovpan/dev_nf_scripts [options] --input ./path/to/input --meta ./path/to/metadata
options:
--input
The gene_presence_absence.csv that comes as an output of mbovpan - should be located in the mbovpan_results/pangenome directory
--meta
A CSV file linking sequence names to external metadata. IDs should be labelled in the 'Name' column of the metadata.
--help
Prints this help message
=============================
"""
)
exit(0)
}
//verify that it worked out
println "${pangenome_ref_input}"
process filter_pan {
publishDir = "./"
conda "bioconda::blast=2.9.0 pandas"
//errorStrategy 'ignore'
debug 'true'
input:
path x
path y
output:
path 'mb.out.csv'
path 'mbovis_filtered_cogs.csv'
script:
"""
# first determine the genes that are present and use blast to find their true annotation
blastn -query $x -max_target_seqs 1 -db $workflow.projectDir/ref/mbovis_reference -out mb.out -outfmt "6 delim=, qseqid sseqid length qstart qend sstart send qlen slen"
echo "qseqid,sseqid,length,qstart,qend,sstart,send,qlen,slen" > heading.txt
cat heading.txt mb.out >> mb.out.csv
# once mb.out is created, we can use python to 1) create length %, 2) filter by 75% or higher, and 3) reduce repetitive blast results
python $workflow.projectDir/scripts/blast_result_filter.py $y
"""
}
workflow {
filter_pan(Channel.fromPath("${pangenome_ref_input}"),Channel.fromPath("${prab_input}"))
}