-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSnakefile
103 lines (92 loc) · 2.8 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import subprocess, sys
configfile: "config.yaml"
DATASETS = [d for d in config for s in config[d]]
SAMPLES = [s for d in config for s in config[d]]
VERSION = "1.0"
COMMIT = subprocess.check_output(['git', 'rev-parse', '--verify', 'HEAD']).strip().decode()
sys.stderr.write("Running sga-index\n")
sys.stderr.write("Version: {0}\n".format(VERSION))
sys.stderr.write("Commit ID: {0}\n".format(COMMIT))
# PorpidPostproc parameters
# demux
chunk_size = 100000 # default 100000
index_type = "Index_primer" # default "Index_Primer", also accepts "Nextera_primer"
error_rate = 0.01 # default 0.01
min_length = 1200 # default 2100
max_length = 6000 # default 4000
# consensus
min_reads = 5 # default 5
# contam
cluster_thresh = 0.015 # default 0.015
proportion_thresh = 0.2 # default 0.2
dist_thresh = 0.015 # default 0.015
# postproc
agreement_thresh = 0.7 # default 0.7
max_alignment_reads = 1000 # default 1000
rule all:
input:
expand("dataset/{dataset}--sga-index.tar.gz",
dataset = DATASETS)
rule demux:
input:
"raw-reads/{dataset}.fastq.gz"
output:
directory("dataset/{dataset}/demux"),
"dataset/{dataset}/quality_report.csv",
"dataset/{dataset}/demux_report.csv",
"dataset/{dataset}/reject_report.csv"
params:
samples = SAMPLES,
chunk_size = chunk_size,
error_rate = error_rate,
min_length = min_length,
max_length = max_length,
index_type = index_type,
config = lambda wc: config[wc.dataset]
script:
"scripts/demux.jl"
rule consensus:
input:
"dataset/{dataset}/demux"
output:
directory("dataset/{dataset}/consensus")
params:
min_reads = min_reads,
config = lambda wc: config[wc.dataset]
script:
"scripts/consensus.jl"
rule contam:
input:
"dataset/{dataset}/consensus",
panel = "panels/contam_panel.fasta"
output:
"dataset/{dataset}/contam_report.csv"
params:
proportion_thresh = proportion_thresh,
cluster_thresh = cluster_thresh,
dist_thresh = dist_thresh,
config = lambda wc: config[wc.dataset]
script:
"scripts/contam.jl"
rule postproc:
input:
"dataset/{dataset}/consensus",
"dataset/{dataset}/demux",
"dataset/{dataset}/contam_report.csv"
output:
directory("dataset/{dataset}/filtered")
params:
agreement_thresh = agreement_thresh,
max_alignment_reads = max_alignment_reads
script:
"scripts/postproc-sga-templates.jl"
rule tar:
input:
"dataset/{dataset}/filtered"
output:
"dataset/{dataset}--sga-index.tar.gz"
params:
datasets = DATASETS,
samples = SAMPLES
script:
"scripts/tar.jl"