Skip to content

Commit

Permalink
Merge branch 'dev' of git.oxfordnanolabs.local:research/pomoxis into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
cjw85 committed Apr 18, 2018
2 parents cc9ed74 + 07e20e9 commit 855022e
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 33 deletions.
14 changes: 12 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ OS := $(shell uname)
CXX ?= g++

# Builds a cache of binaries which can just be copied for CI
BINARIES=minimap2 miniasm bwa racon samtools
BINARIES=minimap2 miniasm bwa racon samtools bcftools

BINCACHEDIR=bincache
$(BINCACHEDIR):
Expand All @@ -30,7 +30,6 @@ $(BINCACHEDIR)/racon: | $(BINCACHEDIR)
cd submodules/racon/build && make
cp submodules/racon/build/bin/racon $@


$(BINCACHEDIR)/bwa: | $(BINCACHEDIR)
@echo Making $(@F)
cd submodules/bwa && make
Expand All @@ -49,6 +48,17 @@ $(BINCACHEDIR)/samtools: | $(BINCACHEDIR)
cd submodules/samtools-${SAMVER} && make
cp submodules/samtools-${SAMVER}/samtools $@

BCFVER=1.7
$(BINCACHEDIR)/bcftools: | $(BINCACHEDIR)
@echo Making $(@F)
if [ ! -e submodules/bcftools-${BCFVER}.tar.bz2 ]; then \
cd submodules; \
wget https://github.com/samtools/bcftools/releases/download/${BCFVER}/bcftools-${BCFVER}.tar.bz2; \
fi
cd submodules && tar -xjf bcftools-${BCFVER}.tar.bz2
cd submodules/bcftools-${BCFVER} && make
cp submodules/bcftools-${BCFVER}/bcftools $@

venv: venv/bin/activate
IN_VENV=. ./venv/bin/activate

Expand Down
38 changes: 23 additions & 15 deletions pomoxis/common/catalogue_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

AlignPos = namedtuple('AlignPos', ('qpos', 'qbase', 'rpos', 'rbase'))
AlignSeg = namedtuple('AlignSeg', ('rname', 'qname', 'pairs', 'rlen'))
Error = namedtuple('Error', ('rp', 'rname', 'qp', 'qname', 'ref', 'match', 'read', 'counts', 'klass'))
Error = namedtuple('Error', ('rp', 'rname', 'qp', 'qname', 'ref', 'match', 'read', 'counts', 'klass', 'aggr_klass'))
Context = namedtuple('Context', ('p_i', 'qb', 'rb'))

_match_ = ' '
Expand All @@ -36,10 +36,10 @@
('ins HP split', lambda x: 'ins HP split' in x),
('sub HP join', lambda x: 'sub HP join' in x),
('del HP join', lambda x: 'del HP join' in x),
('multi ins >', lambda x: bool(re.search('multi .+ins >', x))),
('multi ins <', lambda x: bool(re.search('multi .+ins <', x))),
('multi del >', lambda x: bool(re.search('multi .+del >', x))),
('multi del <', lambda x: bool(re.search('multi .+del <', x))),
('multi ins >', lambda x: bool(re.search('multi .*ins >', x))),
('multi ins <', lambda x: bool(re.search('multi .*ins <', x))),
('multi del >', lambda x: bool(re.search('multi .*del >', x))),
('multi del <', lambda x: bool(re.search('multi .*del <', x))),
('HP del', lambda x: 'HP del' in x),
('HP ins', lambda x: 'HP ins' in x),
('HP sub', lambda x: 'HP sub' in x),
Expand Down Expand Up @@ -574,7 +574,7 @@ def _process_read(bam, read_num):
gen = (r for r in bam_obj)
for i in range(read_num + 1):
rec = next(gen)
if rec.is_unmapped or rec.is_supplementary:
if rec.is_unmapped or rec.is_supplementary or rec.is_secondary:
return
seg = AlignSeg(rname=rec.reference_name, qname=rec.query_name,
pairs=list(get_trimmed_pairs(rec)), rlen=rec.reference_length
Expand Down Expand Up @@ -604,7 +604,8 @@ def _process_seg(seg):
if qp is None:
qp = "~{}".format(approx_pos[1])
errors.append(Error(rp=rp, rname=seg.rname, qp=qp, qname=seg.qname, ref=ref,
match=match, read=read, counts=counts, klass=klass))
match=match, read=read, counts=counts, klass=klass,
aggr_klass=get_aggr_klass(klass)))
error_count[klass] += 1

logging.debug('Done processing {} aligned to {}'.format(seg.qname, seg.rname))
Expand Down Expand Up @@ -663,16 +664,22 @@ def get_aggr_counts(total_counts):
aggregate_counts = Counter()
max_indel = max(_indel_sizes_)
for key, val in total_counts.items():
for error_type, is_type in _error_groups_:
if is_type(key):
if '>' in error_type or '<' in error_type:
aggregate_counts['{} {}'.format(error_type, max_indel)] += val
else:
aggregate_counts[error_type] += val
break
aggregate_counts[get_aggr_klass(key)] += val
return aggregate_counts


def get_aggr_klass(klass):
max_indel = max(_indel_sizes_)
for error_type, is_type in _error_groups_:
if is_type(klass):
if '>' in error_type or '<' in error_type:
aggr_klass = '{} {}'.format(error_type, max_indel)
else:
aggr_klass = error_type
break
return aggr_klass


def main():
logging.basicConfig(format='[%(asctime)s - %(name)s] %(message)s', datefmt='%H:%M:%S', level=logging.INFO)
parser = argparse.ArgumentParser('catalogue_errors')
Expand Down Expand Up @@ -704,6 +711,7 @@ def main():
('query_pos', helper(attrgetter('qp'))),
('query_context', attrgetter('read')),
('class', attrgetter('klass')),
('aggr_class', attrgetter('aggr_klass')),
('n_ins', lambda e: len(e.counts['ins'])),
('n_del', lambda e: len(e.counts['del'])),
('n_sub', lambda e: len(e.counts['sub'])),
Expand All @@ -721,7 +729,7 @@ def main():
total_ref_length[ref_name] += ref_length
for e in errors:
db_fh.write(_sep_.join((str(h[1](e)) for h in headers)) + '\n')
txt_fh.write("Ref Pos: {}, {} Pos {}, {}\n".format(e.rp, e.qname, e.qp, e.klass))
txt_fh.write("Ref Pos: {}, {} Pos {}, {}, {}\n".format(e.rp, e.qname, e.qp, e.klass, e.aggr_klass))
txt_fh.write(e.ref + "\n")
txt_fh.write(e.match + "\n")
txt_fh.write(e.read + "\n")
Expand Down
20 changes: 5 additions & 15 deletions scripts/mini_align
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Align fastq/a formatted reads to a genome using minimap2.
prior to alignment.
-t alignment threads (default: 1).
-p output file prefix (default: reads).
-m pass bam through samtools calmd to fill MD tag."
-m fill MD tag."

PREFIX="reads"
ALIGN_OPTS="-x map-ont"
Expand All @@ -28,15 +28,14 @@ SORT=""
CHUNK=""
rflag=false
iflag=false
mdflag=false
while getopts ':hr:i:p:aPmnc:t:' option; do
case "$option" in
h ) echo "$usage" >&2; exit;;
r ) rflag=true; REFERENCE=$OPTARG;;
i ) iflag=true; INPUT=$OPTARG;;
I ) INDEX_SIZE=$OPTARG;;
P ) FILTER="-F 2308";;
m ) mdflag=true;;
m ) ALIGN_OPTS="${ALIGN_OPTS} --MD";;
n ) SORT="-n";;
p ) PREFIX=$OPTARG;;
a ) ALIGN_OPTS="${ALIGN_OPTS} -A1 -B2 -O2 -E1";;
Expand Down Expand Up @@ -81,16 +80,7 @@ if [ "$CHUNK" != "" ]; then
INPUT=${INPUT}.chunks
fi

if ${mdflag}; then
echo "Filling MD tag"
minimap2 ${ALIGN_OPTS} -t ${THREADS} -a ${REFERENCE}.mmi ${INPUT} |
samtools view ${FILTER} -uS - |
samtools calmd -b - ${REFERENCE} 2>/dev/null |
samtools sort -@ ${THREADS} ${SORT} -l 9 -o ${PREFIX}.bam -
else
echo "Not filling MD tag"
minimap2 ${ALIGN_OPTS} -t ${THREADS} -a ${REFERENCE}.mmi ${INPUT} |
samtools view -@ ${THREADS} -T ${REFERENCE} ${FILTER} -bS - |
samtools sort -@ ${THREADS} ${SORT} -l 9 -o ${PREFIX}.bam -
fi
minimap2 ${ALIGN_OPTS} -t ${THREADS} -a ${REFERENCE}.mmi ${INPUT} |
samtools view -@ ${THREADS} -T ${REFERENCE} ${FILTER} -bS - |
samtools sort -@ ${THREADS} ${SORT} -l 9 -o ${PREFIX}.bam -
samtools index ${PREFIX}.bam ${PREFIX}.bam.bai
2 changes: 1 addition & 1 deletion submodules/minimap2

0 comments on commit 855022e

Please sign in to comment.