Skip to content

Commit

Permalink
Merge branch 'narco' into 'dev'
Browse files Browse the repository at this point in the history
Narco

See merge request research/pomoxis!44
  • Loading branch information
cjw85 committed Jun 20, 2018
2 parents 6865d3d + 2f0c689 commit eac4727
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 15 deletions.
18 changes: 14 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
.PHONY: scrappy install docs
OS := $(shell uname)
.PHONY: install docs
OS := $(shell uname | tr '[:upper:]' '[:lower:]')

# for porechop on travis (or other platform with older gcc)
CXX ?= g++

# Builds a cache of binaries which can just be copied for CI
BINARIES=minimap2 miniasm bwa racon samtools bcftools
BINARIES=minimap2 miniasm bwa racon samtools bcftools seqkit

BINCACHEDIR=bincache
$(BINCACHEDIR):
Expand Down Expand Up @@ -59,6 +59,16 @@ $(BINCACHEDIR)/bcftools: | $(BINCACHEDIR)
cd submodules/bcftools-${BCFVER} && make
cp submodules/bcftools-${BCFVER}/bcftools $@

SEQKITVER=0.8.0
$(BINCACHEDIR)/seqkit: | $(BINCACHEDIR)
@echo Making $(@F)
if [ ! -e submodules/seqkit_${OS}_amd64.tar.gz ]; then \
cd submodules; \
wget https://github.com/shenwei356/seqkit/releases/download/v${SEQKITVER}/seqkit_${OS}_amd64.tar.gz; \
fi
cd submodules && tar -xzvf seqkit_${OS}_amd64.tar.gz
cp submodules/seqkit $@

venv: venv/bin/activate
IN_VENV=. ./venv/bin/activate

Expand All @@ -71,7 +81,7 @@ bwapy: venv
cd submodules/bwapy && make bwa/libbwa.a
${IN_VENV} && cd submodules/bwapy && python setup.py install

install: venv bwapy scrappy | $(addprefix $(BINCACHEDIR)/, $(BINARIES))
install: venv bwapy | $(addprefix $(BINCACHEDIR)/, $(BINARIES))
${IN_VENV} && python setup.py install

# You can set these variables from the command line.
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ the `make install` step:
# For porechop to be compiled on older systems set these, e.g.:
export CXX="g++-4.9" CC="gcc-4.9"

Note also that racon requires at least `gcc>=4.8.5` to
[compile smoothly](https://github.com/isovic/racon/issues/57).

Running the above within a pre-exisiting virtual environnment may well fail;
advanced may wish to simply run the `setup.py` file in the standard manner
after compiling the third party programs as in the `Makefile`.
Expand Down
58 changes: 47 additions & 11 deletions scripts/mini_assemble
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,32 @@ Assemble fastq/fasta formatted reads and perform POA consensus.
-o output folder (default: assm).
-p output file prefix (default: reads).
-t number of minimap and racon threads (default: 1).
-m number of racon rounds (default: 4).
-n number of racon shuffles (default: 1).
-c trim adapters from reads prior to everything else.
-e error correct longest e% of reads prior to assembly."

OUTPUT="assm"
NAME="reads"
THREADS=1
ROUNDS=4
SHUFFLES=1
USEQUAL=false
CHOP=false
iflag=false
rflag=false
eflag=false
while getopts ':hi:q:r:o:p:t:ce:' option; do
while getopts ':hi:q:r:o:p:t:m:n:ce:' option; do
case "$option" in
h ) echo "$usage" >&2; exit;;
i ) iflag=true; INPUT=$OPTARG;;
q ) USEQUAL=true;;
r ) rflag=true; REF=$OPTARG;;
r ) rflag=true; REF=$(cd "$(dirname "$OPTARG")"; pwd)/$(basename "$OPTARG");;
o ) OUTPUT=$OPTARG;;
p ) NAME=$OPTARG;;
t ) THREADS=$OPTARG;;
m ) ROUNDS=$OPTARG;;
n ) SHUFFLES=$OPTARG;;
c ) CHOP=true;;
e ) eflag=true; ERRCORR=$OPTARG;;
\? ) echo "Invalid option: -${OPTARG}." >&2; exit 1;;
Expand Down Expand Up @@ -119,16 +125,46 @@ else
DRAFT=${REF}
fi

for ROUND in {01..04}; do
echo "Running round ${ROUND} consensus..."
READS2TIGS=reads2contigs_${ROUND}.paf
NEWDRAFT=racon_${ROUND}.fasta
minimap2 -t${THREADS} ${DRAFT} ${READS} > ${READS2TIGS}
racon ${RACONOPTS} -t ${THREADS} -q -1 ${READS} ${READS2TIGS} ${DRAFT} > ${NEWDRAFT}
DRAFT=${NEWDRAFT}
done;
for SHUF in $(seq -w 1 ${SHUFFLES}); do
echo "Running racon read shuffle ${SHUF}..."
SCAFFOLD=${DRAFT}
if [ ${SHUF} -ne 1 ]; then
echo "Shuffling reads..."
SHUFREADS=shuffled_${SHUF}_${READS}
seqkit shuffle $READS > ${SHUFREADS}
else
SHUFREADS=${READS}
if [ ${SHUFFLES} -ne 1 ]; then
ln -s ${READS} shuffled_${SHUF}_${READS}
fi
fi

for ROUND in $(seq -w 1 ${ROUNDS}); do
echo "Running round ${ROUND} consensus..."
READS2TIGS=reads2contigs_${SHUF}_${ROUND}.paf
NEWSCAF=racon_${SHUF}_${ROUND}.fasta

minimap2 -t${THREADS} ${SCAFFOLD} ${SHUFREADS} > ${READS2TIGS}
racon ${RACONOPTS} -t ${THREADS} -q -1 ${SHUFREADS} ${READS2TIGS} ${SCAFFOLD} > ${NEWSCAF}
SCAFFOLD=${NEWSCAF}
done

done


FINAL=${NAME}_final.fa
sed 's/_C:.\+$//' ${DRAFT} > ${FINAL}
if [ ${SHUFFLES} -eq 1 ]; then
cp ${SCAFFOLD} ${FINAL}
else
# One last compilation step
echo "Combining consensus shuffles."
COMBINED=racon_combined_shuffles.fasta
for SHUF in $(seq 1 ${SHUFFLES}); do
sed "s/\(>[^[:space:]]\+\)/\1_shuffle_${SHUF}/" racon_${SHUF}_${ROUNDS}.fasta >> ${COMBINED}
done

READS2TIGS=combined2contigs.paf
minimap2 -t${THREADS} ${DRAFT} ${COMBINED} > ${READS2TIGS}
racon ${RACONOPTS} -t ${THREADS} -q -1 ${COMBINED} ${READS2TIGS} ${DRAFT} > ${FINAL}
fi
echo "Final assembly written to ${OUTPUT}/${FINAL}. Have a nice day."

0 comments on commit eac4727

Please sign in to comment.