From 1955c6629c27602fec6985720d3667df86b1fbc2 Mon Sep 17 00:00:00 2001 From: cjw85 Date: Wed, 23 May 2018 20:03:53 +0100 Subject: [PATCH 1/5] remove scrappy in Makefile --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 0e75758..bcc1e4e 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: scrappy install docs +.PHONY: install docs OS := $(shell uname) # for porechop on travis (or other platform with older gcc) @@ -71,7 +71,7 @@ bwapy: venv cd submodules/bwapy && make bwa/libbwa.a ${IN_VENV} && cd submodules/bwapy && python setup.py install -install: venv bwapy scrappy | $(addprefix $(BINCACHEDIR)/, $(BINARIES)) +install: venv bwapy | $(addprefix $(BINCACHEDIR)/, $(BINARIES)) ${IN_VENV} && python setup.py install # You can set these variables from the command line. From b77fcebf1864d4311c7d0b164d7c43d8227db528 Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Tue, 29 May 2018 11:31:25 +0100 Subject: [PATCH 2/5] Bump version --- pomoxis/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pomoxis/__init__.py b/pomoxis/__init__.py index aea855c..c02fb94 100644 --- a/pomoxis/__init__.py +++ b/pomoxis/__init__.py @@ -1,4 +1,4 @@ -__version__ = '0.1.8' +__version__ = '0.1.9' import os import sys From 88b1de4b9cdedd586708179df682f818c2a8a5f3 Mon Sep 17 00:00:00 2001 From: cwright Date: Thu, 14 Jun 2018 16:06:26 +0100 Subject: [PATCH 3/5] Racon shuffling --- Makefile | 14 ++++++++-- scripts/mini_assemble | 59 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 60 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index 23f4bba..76b5159 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,11 @@ .PHONY: install docs -OS := $(shell uname) +OS := $(shell uname | tr '[:upper:]' '[:lower:]') # for porechop on travis (or other platform with older gcc) CXX ?= g++ # Builds a cache of binaries which can just be copied for CI -BINARIES=minimap2 miniasm bwa racon samtools bcftools +BINARIES=minimap2 miniasm bwa racon samtools bcftools seqkit BINCACHEDIR=bincache $(BINCACHEDIR): @@ -59,6 +59,16 @@ $(BINCACHEDIR)/bcftools: | $(BINCACHEDIR) cd submodules/bcftools-${BCFVER} && make cp submodules/bcftools-${BCFVER}/bcftools $@ +SEQKITVER=0.8.0 +$(BINCACHEDIR)/seqkit: | $(BINCACHEDIR) + @echo Making $(@F) + if [ ! -e submodules/seqkit_${OS}_amd64.tar.gz ]; then \ + cd submodules; \ + wget https://github.com/shenwei356/seqkit/releases/download/v${SEQKITVER}/seqkit_${OS}_amd64.tar.gz; \ + fi + cd submodules && tar -xzvf seqkit_${OS}_amd64.tar.gz + cp submodules/seqkit $@ + venv: venv/bin/activate IN_VENV=. ./venv/bin/activate diff --git a/scripts/mini_assemble b/scripts/mini_assemble index 995a6c7..99aba48 100755 --- a/scripts/mini_assemble +++ b/scripts/mini_assemble @@ -12,26 +12,32 @@ Assemble fastq/fasta formatted reads and perform POA consensus. -o output folder (default: assm). -p output file prefix (default: reads). -t number of minimap and racon threads (default: 1). + -m number of racon rounds (default: 4). + -n number of racon shuffles (default: 1). -c trim adapters from reads prior to everything else. -e error correct longest e% of reads prior to assembly." OUTPUT="assm" NAME="reads" THREADS=1 +ROUNDS=4 +SHUFFLES=1 USEQUAL=false CHOP=false iflag=false rflag=false eflag=false -while getopts ':hi:q:r:o:p:t:ce:' option; do +while getopts ':hi:q:r:o:p:t:m:n:ce:' option; do case "$option" in h ) echo "$usage" >&2; exit;; i ) iflag=true; INPUT=$OPTARG;; q ) USEQUAL=true;; - r ) rflag=true; REF=$OPTARG;; + r ) rflag=true; REF=$(cd "$(dirname "$OPTARG")"; pwd)/$(basename "$OPTARG");; o ) OUTPUT=$OPTARG;; p ) NAME=$OPTARG;; t ) THREADS=$OPTARG;; + m ) ROUNDS=$OPTARG;; + n ) SHUFFLES=$OPTARG;; c ) CHOP=true;; e ) eflag=true; ERRCORR=$OPTARG;; \? ) echo "Invalid option: -${OPTARG}." >&2; exit 1;; @@ -119,16 +125,47 @@ else DRAFT=${REF} fi -for ROUND in {01..04}; do - echo "Running round ${ROUND} consensus..." - READS2TIGS=reads2contigs_${ROUND}.paf - NEWDRAFT=racon_${ROUND}.fasta - minimap2 -t${THREADS} ${DRAFT} ${READS} > ${READS2TIGS} - racon ${RACONOPTS} -t ${THREADS} -q -1 ${READS} ${READS2TIGS} ${DRAFT} > ${NEWDRAFT} - DRAFT=${NEWDRAFT} -done; +for SHUF in $(seq 1 ${SHUFFLES}); do + echo "Running racon read shuffle ${SHUF}..." + SCAFFOLD=${DRAFT} + if [ ${SHUF} -ne 1 ]; then + echo "Shuffling reads..." + SHUFREADS=shuffled_${SHUF}_${READS} + seqkit shuffle $READS > ${SHUFREADS} + else + SHUFREADS=${READS} + if [ ${SHUFFLES} -ne 1 ]; then + ln -s ${READS} shuffled_${SHUF}_${READS} + fi + fi + + for ROUND in $(seq 1 ${ROUNDS}); do + echo "Running round ${ROUND} consensus..." + READS2TIGS=reads2contigs_${SHUF}_${ROUND}.paf + NEWSCAF=racon_${SHUF}_${ROUND}.fasta + + minimap2 -t${THREADS} ${SCAFFOLD} ${SHUFREADS} > ${READS2TIGS} + racon ${RACONOPTS} -t ${THREADS} -q -1 ${SHUFREADS} ${READS2TIGS} ${SCAFFOLD} > ${NEWSCAF} + SCAFFOLD=${NEWSCAF} + done + +done FINAL=${NAME}_final.fa -sed 's/_C:.\+$//' ${DRAFT} > ${FINAL} +if [ ${SHUFFLES} -eq 1 ]; then + sed 's/_C:.\+$//' ${SCAFFOLD} > ${FINAL} +else + # One last compilation step + echo "Combining consensus shuffles." + COMBINED=racon_combined_shuffles.fasta + for SHUF in $(seq 1 ${SHUFFLES}); do + sed "s/_C:.\+$/_shuffle_${SHUF}/" racon_${SHUF}_${ROUNDS}.fasta >> ${COMBINED} + done + + READS2TIGS=combined2contigs.paf + minimap2 -t${THREADS} ${DRAFT} ${COMBINED} > ${READS2TIGS} + racon ${RACONOPTS} -t ${THREADS} -q -1 ${COMBINED} ${READS2TIGS} ${DRAFT} > ${FINAL} + sed -i 's/_C:.\+$//' ${FINAL} +fi echo "Final assembly written to ${OUTPUT}/${FINAL}. Have a nice day." From f8a5b2f8d9dc16f4724ba1f40d880b8f610d677b Mon Sep 17 00:00:00 2001 From: cwright Date: Thu, 14 Jun 2018 17:25:38 +0100 Subject: [PATCH 4/5] mini_assemble consistent with racon --- scripts/mini_assemble | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/scripts/mini_assemble b/scripts/mini_assemble index 99aba48..86765af 100755 --- a/scripts/mini_assemble +++ b/scripts/mini_assemble @@ -125,7 +125,7 @@ else DRAFT=${REF} fi -for SHUF in $(seq 1 ${SHUFFLES}); do +for SHUF in $(seq -w 1 ${SHUFFLES}); do echo "Running racon read shuffle ${SHUF}..." SCAFFOLD=${DRAFT} if [ ${SHUF} -ne 1 ]; then @@ -139,7 +139,7 @@ for SHUF in $(seq 1 ${SHUFFLES}); do fi fi - for ROUND in $(seq 1 ${ROUNDS}); do + for ROUND in $(seq -w 1 ${ROUNDS}); do echo "Running round ${ROUND} consensus..." READS2TIGS=reads2contigs_${SHUF}_${ROUND}.paf NEWSCAF=racon_${SHUF}_${ROUND}.fasta @@ -154,18 +154,17 @@ done FINAL=${NAME}_final.fa if [ ${SHUFFLES} -eq 1 ]; then - sed 's/_C:.\+$//' ${SCAFFOLD} > ${FINAL} + cp ${SCAFFOLD} ${FINAL} else # One last compilation step echo "Combining consensus shuffles." COMBINED=racon_combined_shuffles.fasta for SHUF in $(seq 1 ${SHUFFLES}); do - sed "s/_C:.\+$/_shuffle_${SHUF}/" racon_${SHUF}_${ROUNDS}.fasta >> ${COMBINED} + sed "s/\(>[^[:space:]]\+\)/\1_shuffle_${SHUF}/" racon_${SHUF}_${ROUNDS}.fasta >> ${COMBINED} done READS2TIGS=combined2contigs.paf minimap2 -t${THREADS} ${DRAFT} ${COMBINED} > ${READS2TIGS} racon ${RACONOPTS} -t ${THREADS} -q -1 ${COMBINED} ${READS2TIGS} ${DRAFT} > ${FINAL} - sed -i 's/_C:.\+$//' ${FINAL} fi echo "Final assembly written to ${OUTPUT}/${FINAL}. Have a nice day." From 2f0c6892ac7cf85159ec03a2b03ab8ec7e56218a Mon Sep 17 00:00:00 2001 From: cwright Date: Wed, 20 Jun 2018 13:41:53 +0100 Subject: [PATCH 5/5] unpin fast5 version and note about racon compiler --- README.md | 3 +++ requirements.txt | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 167e81d..22ed09e 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,9 @@ the `make install` step: # For porechop to be compiled on older systems set these, e.g.: export CXX="g++-4.9" CC="gcc-4.9" +Note also that racon requires at least `gcc>=4.8.5` to +[compile smoothly](https://github.com/isovic/racon/issues/57). + Running the above within a pre-exisiting virtual environnment may well fail; advanced may wish to simply run the `setup.py` file in the standard manner after compiling the third party programs as in the `Makefile`. diff --git a/requirements.txt b/requirements.txt index dc552ff..77aecc1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ fast5_research matplotlib mappy msgpack-python -numpy <= 1.13.3 +numpy pandas pysam scrappie