-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.yaml
31 lines (28 loc) · 1.7 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# Created by Michal Bukowski (michal.bukowski@tuta.io) under GPL-3.0 license
# Locations of Pfam database (Pfam-A.hmm file) and directory with genomic sequences.
pfam_db: input/Pfam-A.hmm
gen_dir: genomes
# Dictionary describing domain architectures to search for as pairs of
# architecture custom label : regex describing the architecure with a prefix
# (GRP| or ACC|) that indicates whether domain group labels or Pfam accession
# version number are used. Domains are assigned to groups in input/domain.tsv file.
# In regex strings do not use characters that may confuse Bash (e.g. &;`'"),
# only letters, numbers, dash and underscore symbols are advised.
# Example architectures:
# GRP|CAT-CWT : find sequences with two domain only, one from
# the group CAT and one from the group GRP,
# former directly preceeds the latter
# GRP|.*CWT.*CAT.* : similar to the previous one, but around the domains
# any other may exist (.*), and the order of CAT
# and CWT domains is reversed
# NAM|Peptidase_M14-SH3_5 : like the first example, but instead of domains
# from certain groups, find exactly two domains
# referenced by Pfam names Peptidase_M14 and SH3_5
# ACC|PF01551\.25-PF08460\.13 : instead of domains from certain groups, find
# exactly two domains referenced by Pfam accession
# version numbers PF01551.25 and PF08460.13
archs:
CAT-CWT : GRP|CAT-CWT
xCWTxCATx : GRP|.*CWT.*CAT.*
M23-SH3 : NAM|Peptidase_M23-SH3_5
M23-SLH : NAM|Peptidase_M23-SLH