From 63c6fa707fabc7569c2f256b8bcfc6ebb760f72a Mon Sep 17 00:00:00 2001 From: Alastair Droop Date: Wed, 8 May 2019 11:55:30 +0100 Subject: [PATCH] Update tabbed sequence format, changed name and allow trimming of header. --- Makefile | 2 +- src/fqheader.h | 3 +- src/fqhelp.c | 19 ++++--- src/fqhelp.h | 4 +- ...process_crisprtab.c => fqprocess_tabseq.c} | 51 +++++++++---------- src/fqtools.c | 2 +- 6 files changed, 41 insertions(+), 40 deletions(-) rename src/{fqprocess_crisprtab.c => fqprocess_tabseq.c} (68%) diff --git a/Makefile b/Makefile index 2673073..1de8d12 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ CFLAGS+=-O2 -g CPPFLAGS+=-Wall -Wextra -Wno-unused-parameter -I$(HTSDIR) LIBS=-lz -lhts -lm -SUBPROCESSES=view head count blockview fasta basetab qualtab lengthtab type validate find trim qualmap crisprtab +SUBPROCESSES=view head count blockview fasta basetab qualtab lengthtab type validate find trim qualmap tabseq SUBPROCESS_FILES=$(addsuffix .c, $(addprefix $(SRC)/fqprocess_, $(SUBPROCESSES))) SUBPROCESS_OBJECTS=$(addsuffix .o, $(addprefix $(SRC)/fqprocess_, $(SUBPROCESSES))) diff --git a/src/fqheader.h b/src/fqheader.h index 65ba45b..b5ae7c1 100644 --- a/src/fqheader.h +++ b/src/fqheader.h @@ -23,7 +23,7 @@ // Set the current version data: #define PROG_NAME "fqtools" -#define FQTOOLS_VERSION "2.2 2019-05-08" +#define FQTOOLS_VERSION "2.3 2019-05-08" // Define the flag type: typedef unsigned char fqflag; @@ -108,6 +108,7 @@ fqstatus fqprocess_validate(int argc, const char *argv[], fqglobal options); fqstatus fqprocess_find(int argc, const char *argv[], fqglobal options); fqstatus fqprocess_trim(int argc, const char *argv[], fqglobal options); fqstatus fqprocess_qualmap(int argc, const char *argv[], fqglobal options); +fqstatus fqprocess_tabseq(int argc, const char *argv[], fqglobal options); #include "fqbuffer.h" #include "fqfile.h" diff --git a/src/fqhelp.c b/src/fqhelp.c index aafb91d..1a073fa 100644 --- a/src/fqhelp.c +++ b/src/fqhelp.c @@ -71,7 +71,7 @@ void global_help(){ printf("quality View FASTQ file quality data\n"); printf("header2 View FASTQ file secondary header data\n"); printf("fasta Convert FASTQ files to FASTA format\n"); - printf("fasta Convert FASTQ files to CRISPR tabbed format\n"); + printf("tabseq Convert FASTQ files to tabbed sequence format\n"); printf("basetab Tabulate FASTQ base frequencies\n"); printf("qualtab Tabulate FASTQ quality character frequencies\n"); printf("lengthtab Tabulate FASTQ read lengths\n"); @@ -134,8 +134,8 @@ void fqprocess_qualmap_usage(){ printf("usage: %s [...] qualmap [-hk] [-o OUTFILE] MAP [FILE] [FILE]\n", PROG_NAME); } -void fqprocess_crisprtab_usage(){ - printf("usage: %s [...] crisprtab [-h] [FILE] [FILE]\n", PROG_NAME); +void fqprocess_tabseq_usage(){ + printf("usage: %s [...] tabseq [-h] [FILE] [FILE]\n", PROG_NAME); } @@ -380,16 +380,19 @@ void fqprocess_qualmap_help(){ printf(" If no input file is specified, input will be read from stdin.\n"); } -void fqprocess_crisprtab_help(){ - printf("Convert FASTQ files to CRISPR tabbed format.\n"); +void fqprocess_tabseq_help(){ + printf("Convert FASTQ files to tabbed sequence format.\n"); printf("\n"); - printf("crisprtab options:\n"); + printf("tabseq options:\n"); printf(" -h Show this help message and exit.\n"); + printf(" -t Attempt to trim read number from header.\n"); printf(" FILE The fastq file(s) to view.\n"); printf("\n"); printf("FILE:\n"); printf(" If no input file is specified, input will be read from stdin.\n"); printf("\n"); - printf("The CRISPRtab format is simply
\t for single-end samples,\n"); - printf("or \t\t\t for paired-end samples.\n"); + printf("The tabbed sequence format is simply
\\t for single-end samples,\n"); + printf("or \\t\\t for paired-end samples.\n"); + printf("if -t is specified, the read header will be trimmed of the trailing pair\n"); + printf("numbers (\"/1\" or \"/2\"), if these are present\n"); } diff --git a/src/fqhelp.h b/src/fqhelp.h index 2a98040..2d87dc6 100644 --- a/src/fqhelp.h +++ b/src/fqhelp.h @@ -32,7 +32,7 @@ void fqprocess_validate_usage(); void fqprocess_find_usage(); void fqprocess_trim_usage(); void fqprocess_qualmap_usage(); -void fqprocess_crisprtab_usage(); +void fqprocess_tabseq_usage(); void fqprocess_view_help(); void fqprocess_head_help(); @@ -47,4 +47,4 @@ void fqprocess_validate_help(); void fqprocess_find_help(); void fqprocess_trim_help(); void fqprocess_qualmap_help(); -void fqprocess_crisprtab_help(); +void fqprocess_tabseq_help(); diff --git a/src/fqprocess_crisprtab.c b/src/fqprocess_tabseq.c similarity index 68% rename from src/fqprocess_crisprtab.c rename to src/fqprocess_tabseq.c index 1505814..3a2a46a 100644 --- a/src/fqprocess_crisprtab.c +++ b/src/fqprocess_tabseq.c @@ -19,75 +19,73 @@ fqfsout f_out; fqparser_callbacks callbacks; char interleaving_out; char paired; -fqbuffer p1header, p2header, p1seq, p2seq; +char trim_header; +fqbuffer p1header, p1seq, p2seq; -fqbytecount fqprocess_crisprtab_readBuffer(fqflag pair, char *b, fqbytecount b_size){ +fqbytecount fqprocess_tabseq_readBuffer(fqflag pair, char *b, fqbytecount b_size){ return fqfile_read(&(f_in.files[pair]->file), b, b_size); } -void fqprocess_crisprtab_headerBlock(fqflag pair, char *block, fqbytecount block_n, char final){ +void fqprocess_tabseq_headerBlock(fqflag pair, char *block, fqbytecount block_n, char final){ if(pair == FQ_PAIR_1) fqbuffer_append(&p1header, block, block_n); - else fqbuffer_append(&p2header, block, block_n); } -void fqprocess_crisprtab_sequenceBlock(fqflag pair, char *block, fqbytecount block_n, char final){ +void fqprocess_tabseq_sequenceBlock(fqflag pair, char *block, fqbytecount block_n, char final){ if(pair == FQ_PAIR_1) fqbuffer_append(&p1seq, block, block_n); else fqbuffer_append(&p2seq, block, block_n); } -void fqprocess_crisprtab_endRead(fqflag pair){ +void fqprocess_tabseq_endRead(fqflag pair){ + int header_offset = p1header.offset; if((paired == 0) || (pair == FQ_PAIR_2)){ - fwrite(p1header.data, sizeof(char), p1header.offset, stdout); + if(trim_header == 1){ + if(p1header.offset > 2){ + if((strncmp(p1header.data + p1header.offset - 2, "/1", 2) == 0) || (strncmp(p1header.data + p1header.offset - 2, "/2", 2) == 0)) header_offset = header_offset - 2; + } + } + fwrite(p1header.data, sizeof(char), header_offset, stdout); fwrite("\t", sizeof(char), 1, stdout); fwrite(p1seq.data, sizeof(char), p1seq.offset, stdout); if(paired == 1){ - fwrite("\t", sizeof(char), 1, stdout); - fwrite(p2header.data, sizeof(char), p2header.offset, stdout); fwrite("\t", sizeof(char), 1, stdout); fwrite(p2seq.data, sizeof(char), p2seq.offset, stdout); } fwrite("\n", sizeof(char), 1, stdout); fflush(stdout); fqbuffer_reset(&p1header); - fqbuffer_reset(&p2header); fqbuffer_reset(&p1seq); fqbuffer_reset(&p2seq); } } -fqstatus fqprocess_crisprtab(int argc, const char *argv[], fqglobal options){ +fqstatus fqprocess_tabseq(int argc, const char *argv[], fqglobal options){ int option; fqstatus result; char finished = 0; + trim_header = 0; //Initialise the output buffers: result = fqbuffer_init(&p1header, options.output_bufsize); if(result != FQ_STATUS_OK) return FQ_STATUS_FAIL; - result = fqbuffer_init(&p2header, options.output_bufsize); - if(result != FQ_STATUS_OK){ - fqbuffer_free(&p1header); - return FQ_STATUS_FAIL; - } result = fqbuffer_init(&p1seq, options.output_bufsize); if(result != FQ_STATUS_OK){ fqbuffer_free(&p1header); - fqbuffer_free(&p2header); return FQ_STATUS_FAIL; } result = fqbuffer_init(&p2seq, options.output_bufsize); if(result != FQ_STATUS_OK){ fqbuffer_free(&p1header); - fqbuffer_free(&p2header); fqbuffer_free(&p1seq); return FQ_STATUS_FAIL; } //Parse the subcommand options: optind++; // Skip the subcommand argument - while((option = getopt(argc, (char* const*)argv, "+h")) != -1){ + while((option = getopt(argc, (char* const*)argv, "+ht")) != -1){ switch(option){ - case 'h':{fqprocess_crisprtab_help(); return FQ_STATUS_OK;} - default:{fqprocess_crisprtab_usage(); return FQ_STATUS_FAIL;} + case 'h':{fqprocess_tabseq_help(); return FQ_STATUS_OK;} + case 't':{trim_header=1; break;} + default:{fqprocess_tabseq_usage(); return FQ_STATUS_FAIL;} } } @@ -107,11 +105,11 @@ fqstatus fqprocess_crisprtab(int argc, const char *argv[], fqglobal options){ //Set the callbacks: set_generic_callbacks(&callbacks); - callbacks.readBuffer = fqprocess_crisprtab_readBuffer; - callbacks.endRead = fqprocess_crisprtab_endRead; - callbacks.header1Block = fqprocess_crisprtab_headerBlock; - callbacks.header2Block = fqprocess_crisprtab_headerBlock; - callbacks.sequenceBlock = fqprocess_crisprtab_sequenceBlock; + callbacks.readBuffer = fqprocess_tabseq_readBuffer; + callbacks.endRead = fqprocess_tabseq_endRead; + callbacks.header1Block = fqprocess_tabseq_headerBlock; + callbacks.header2Block = fqprocess_tabseq_headerBlock; + callbacks.sequenceBlock = fqprocess_tabseq_sequenceBlock; // Step through the input fileset: do finished = fqfsin_step(&f_in); @@ -122,7 +120,6 @@ fqstatus fqprocess_crisprtab(int argc, const char *argv[], fqglobal options){ fqfsin_close(&f_in); fqfsout_close(&f_out); fqbuffer_free(&p1header); - fqbuffer_free(&p2header); fqbuffer_free(&p1seq); fqbuffer_free(&p2seq); return result; diff --git a/src/fqtools.c b/src/fqtools.c index dd41677..97e78be 100644 --- a/src/fqtools.c +++ b/src/fqtools.c @@ -131,7 +131,7 @@ int main(int argc, const char *argv[]){ if(strcmp(command, "find") == 0) return fqprocess_find(argc, argv, options); if(strcmp(command, "trim") == 0) return fqprocess_trim(argc, argv, options); if(strcmp(command, "qualmap") == 0) return fqprocess_qualmap(argc, argv, options); - if(strcmp(command, "crisprtab") == 0) return fqprocess_crisprtab(argc, argv, options); + if(strcmp(command, "tabseq") == 0) return fqprocess_tabseq(argc, argv, options); //If we get to here, the given command string was invalid: fprintf(stderr, "ERROR: unknown command: \"%s\"\n", command);