Skip to content

Commit

Permalink
Update tabbed sequence format, changed name and allow trimming of hea…
Browse files Browse the repository at this point in the history
…der.
  • Loading branch information
alastair-droop committed May 8, 2019
1 parent 4e86e6c commit 63c6fa7
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 40 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ CFLAGS+=-O2 -g
CPPFLAGS+=-Wall -Wextra -Wno-unused-parameter -I$(HTSDIR)
LIBS=-lz -lhts -lm

SUBPROCESSES=view head count blockview fasta basetab qualtab lengthtab type validate find trim qualmap crisprtab
SUBPROCESSES=view head count blockview fasta basetab qualtab lengthtab type validate find trim qualmap tabseq
SUBPROCESS_FILES=$(addsuffix .c, $(addprefix $(SRC)/fqprocess_, $(SUBPROCESSES)))
SUBPROCESS_OBJECTS=$(addsuffix .o, $(addprefix $(SRC)/fqprocess_, $(SUBPROCESSES)))

Expand Down
3 changes: 2 additions & 1 deletion src/fqheader.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

// Set the current version data:
#define PROG_NAME "fqtools"
#define FQTOOLS_VERSION "2.2 2019-05-08"
#define FQTOOLS_VERSION "2.3 2019-05-08"

// Define the flag type:
typedef unsigned char fqflag;
Expand Down Expand Up @@ -108,6 +108,7 @@ fqstatus fqprocess_validate(int argc, const char *argv[], fqglobal options);
fqstatus fqprocess_find(int argc, const char *argv[], fqglobal options);
fqstatus fqprocess_trim(int argc, const char *argv[], fqglobal options);
fqstatus fqprocess_qualmap(int argc, const char *argv[], fqglobal options);
fqstatus fqprocess_tabseq(int argc, const char *argv[], fqglobal options);

#include "fqbuffer.h"
#include "fqfile.h"
Expand Down
19 changes: 11 additions & 8 deletions src/fqhelp.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ void global_help(){
printf("quality View FASTQ file quality data\n");
printf("header2 View FASTQ file secondary header data\n");
printf("fasta Convert FASTQ files to FASTA format\n");
printf("fasta Convert FASTQ files to CRISPR tabbed format\n");
printf("tabseq Convert FASTQ files to tabbed sequence format\n");
printf("basetab Tabulate FASTQ base frequencies\n");
printf("qualtab Tabulate FASTQ quality character frequencies\n");
printf("lengthtab Tabulate FASTQ read lengths\n");
Expand Down Expand Up @@ -134,8 +134,8 @@ void fqprocess_qualmap_usage(){
printf("usage: %s [...] qualmap [-hk] [-o OUTFILE] MAP [FILE] [FILE]\n", PROG_NAME);
}

void fqprocess_crisprtab_usage(){
printf("usage: %s [...] crisprtab [-h] [FILE] [FILE]\n", PROG_NAME);
void fqprocess_tabseq_usage(){
printf("usage: %s [...] tabseq [-h] [FILE] [FILE]\n", PROG_NAME);
}


Expand Down Expand Up @@ -380,16 +380,19 @@ void fqprocess_qualmap_help(){
printf(" If no input file is specified, input will be read from stdin.\n");
}

void fqprocess_crisprtab_help(){
printf("Convert FASTQ files to CRISPR tabbed format.\n");
void fqprocess_tabseq_help(){
printf("Convert FASTQ files to tabbed sequence format.\n");
printf("\n");
printf("crisprtab options:\n");
printf("tabseq options:\n");
printf(" -h Show this help message and exit.\n");
printf(" -t Attempt to trim read number from header.\n");
printf(" FILE The fastq file(s) to view.\n");
printf("\n");
printf("FILE:\n");
printf(" If no input file is specified, input will be read from stdin.\n");
printf("\n");
printf("The CRISPRtab format is simply <header>\t<seq> for single-end samples,\n");
printf("or <header1>\t<seq1>\t<header2>\t<seq2> for paired-end samples.\n");
printf("The tabbed sequence format is simply <header>\\t<seq> for single-end samples,\n");
printf("or <header1>\\t<seq1>\\t<seq2> for paired-end samples.\n");
printf("if -t is specified, the read header will be trimmed of the trailing pair\n");
printf("numbers (\"/1\" or \"/2\"), if these are present\n");
}
4 changes: 2 additions & 2 deletions src/fqhelp.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ void fqprocess_validate_usage();
void fqprocess_find_usage();
void fqprocess_trim_usage();
void fqprocess_qualmap_usage();
void fqprocess_crisprtab_usage();
void fqprocess_tabseq_usage();

void fqprocess_view_help();
void fqprocess_head_help();
Expand All @@ -47,4 +47,4 @@ void fqprocess_validate_help();
void fqprocess_find_help();
void fqprocess_trim_help();
void fqprocess_qualmap_help();
void fqprocess_crisprtab_help();
void fqprocess_tabseq_help();
51 changes: 24 additions & 27 deletions src/fqprocess_crisprtab.c → src/fqprocess_tabseq.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,75 +19,73 @@ fqfsout f_out;
fqparser_callbacks callbacks;
char interleaving_out;
char paired;
fqbuffer p1header, p2header, p1seq, p2seq;
char trim_header;
fqbuffer p1header, p1seq, p2seq;

fqbytecount fqprocess_crisprtab_readBuffer(fqflag pair, char *b, fqbytecount b_size){
fqbytecount fqprocess_tabseq_readBuffer(fqflag pair, char *b, fqbytecount b_size){
return fqfile_read(&(f_in.files[pair]->file), b, b_size);
}

void fqprocess_crisprtab_headerBlock(fqflag pair, char *block, fqbytecount block_n, char final){
void fqprocess_tabseq_headerBlock(fqflag pair, char *block, fqbytecount block_n, char final){
if(pair == FQ_PAIR_1) fqbuffer_append(&p1header, block, block_n);
else fqbuffer_append(&p2header, block, block_n);
}

void fqprocess_crisprtab_sequenceBlock(fqflag pair, char *block, fqbytecount block_n, char final){
void fqprocess_tabseq_sequenceBlock(fqflag pair, char *block, fqbytecount block_n, char final){
if(pair == FQ_PAIR_1) fqbuffer_append(&p1seq, block, block_n);
else fqbuffer_append(&p2seq, block, block_n);
}

void fqprocess_crisprtab_endRead(fqflag pair){
void fqprocess_tabseq_endRead(fqflag pair){
int header_offset = p1header.offset;
if((paired == 0) || (pair == FQ_PAIR_2)){
fwrite(p1header.data, sizeof(char), p1header.offset, stdout);
if(trim_header == 1){
if(p1header.offset > 2){
if((strncmp(p1header.data + p1header.offset - 2, "/1", 2) == 0) || (strncmp(p1header.data + p1header.offset - 2, "/2", 2) == 0)) header_offset = header_offset - 2;
}
}
fwrite(p1header.data, sizeof(char), header_offset, stdout);
fwrite("\t", sizeof(char), 1, stdout);
fwrite(p1seq.data, sizeof(char), p1seq.offset, stdout);
if(paired == 1){
fwrite("\t", sizeof(char), 1, stdout);
fwrite(p2header.data, sizeof(char), p2header.offset, stdout);
fwrite("\t", sizeof(char), 1, stdout);
fwrite(p2seq.data, sizeof(char), p2seq.offset, stdout);
}
fwrite("\n", sizeof(char), 1, stdout);
fflush(stdout);
fqbuffer_reset(&p1header);
fqbuffer_reset(&p2header);
fqbuffer_reset(&p1seq);
fqbuffer_reset(&p2seq);
}
}

fqstatus fqprocess_crisprtab(int argc, const char *argv[], fqglobal options){
fqstatus fqprocess_tabseq(int argc, const char *argv[], fqglobal options){
int option;
fqstatus result;
char finished = 0;
trim_header = 0;

//Initialise the output buffers:
result = fqbuffer_init(&p1header, options.output_bufsize);
if(result != FQ_STATUS_OK) return FQ_STATUS_FAIL;
result = fqbuffer_init(&p2header, options.output_bufsize);
if(result != FQ_STATUS_OK){
fqbuffer_free(&p1header);
return FQ_STATUS_FAIL;
}
result = fqbuffer_init(&p1seq, options.output_bufsize);
if(result != FQ_STATUS_OK){
fqbuffer_free(&p1header);
fqbuffer_free(&p2header);
return FQ_STATUS_FAIL;
}
result = fqbuffer_init(&p2seq, options.output_bufsize);
if(result != FQ_STATUS_OK){
fqbuffer_free(&p1header);
fqbuffer_free(&p2header);
fqbuffer_free(&p1seq);
return FQ_STATUS_FAIL;
}

//Parse the subcommand options:
optind++; // Skip the subcommand argument
while((option = getopt(argc, (char* const*)argv, "+h")) != -1){
while((option = getopt(argc, (char* const*)argv, "+ht")) != -1){
switch(option){
case 'h':{fqprocess_crisprtab_help(); return FQ_STATUS_OK;}
default:{fqprocess_crisprtab_usage(); return FQ_STATUS_FAIL;}
case 'h':{fqprocess_tabseq_help(); return FQ_STATUS_OK;}
case 't':{trim_header=1; break;}
default:{fqprocess_tabseq_usage(); return FQ_STATUS_FAIL;}
}
}

Expand All @@ -107,11 +105,11 @@ fqstatus fqprocess_crisprtab(int argc, const char *argv[], fqglobal options){

//Set the callbacks:
set_generic_callbacks(&callbacks);
callbacks.readBuffer = fqprocess_crisprtab_readBuffer;
callbacks.endRead = fqprocess_crisprtab_endRead;
callbacks.header1Block = fqprocess_crisprtab_headerBlock;
callbacks.header2Block = fqprocess_crisprtab_headerBlock;
callbacks.sequenceBlock = fqprocess_crisprtab_sequenceBlock;
callbacks.readBuffer = fqprocess_tabseq_readBuffer;
callbacks.endRead = fqprocess_tabseq_endRead;
callbacks.header1Block = fqprocess_tabseq_headerBlock;
callbacks.header2Block = fqprocess_tabseq_headerBlock;
callbacks.sequenceBlock = fqprocess_tabseq_sequenceBlock;

// Step through the input fileset:
do finished = fqfsin_step(&f_in);
Expand All @@ -122,7 +120,6 @@ fqstatus fqprocess_crisprtab(int argc, const char *argv[], fqglobal options){
fqfsin_close(&f_in);
fqfsout_close(&f_out);
fqbuffer_free(&p1header);
fqbuffer_free(&p2header);
fqbuffer_free(&p1seq);
fqbuffer_free(&p2seq);
return result;
Expand Down
2 changes: 1 addition & 1 deletion src/fqtools.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ int main(int argc, const char *argv[]){
if(strcmp(command, "find") == 0) return fqprocess_find(argc, argv, options);
if(strcmp(command, "trim") == 0) return fqprocess_trim(argc, argv, options);
if(strcmp(command, "qualmap") == 0) return fqprocess_qualmap(argc, argv, options);
if(strcmp(command, "crisprtab") == 0) return fqprocess_crisprtab(argc, argv, options);
if(strcmp(command, "tabseq") == 0) return fqprocess_tabseq(argc, argv, options);

//If we get to here, the given command string was invalid:
fprintf(stderr, "ERROR: unknown command: \"%s\"\n", command);
Expand Down

0 comments on commit 63c6fa7

Please sign in to comment.