From 73b31ffaf5aae3226c8e9ac8fe1bada89853e5fd Mon Sep 17 00:00:00 2001 From: Luiz Irber Date: Mon, 8 May 2017 06:13:52 +0000 Subject: [PATCH] Fix URLs and add an example to the README file --- README.md | 18 +++++++++++++++++- src/stream_data_from_urls_list.sh | 2 +- src/wget_urls | 4 ++-- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index f2897a2..7deff7d 100644 --- a/README.md +++ b/README.md @@ -7,13 +7,29 @@ # Running Faucet (locally) Example usage: - ./faucet -read_load_file interlaced_reads.fq -read_scan_file interlaced_reads.fq -size_kmer 31 -max_read_length 100 -estimated_kmers 1000000000 -singletons 200000000 -file_prefix faucet_outputs --fastq --paired_ends +```bash +./faucet -read_load_file interlaced_reads.fq \ + -read_scan_file interlaced_reads.fq \ + -size_kmer 31 \ + -max_read_length 100 \ + -estimated_kmers 1000000000 \ + -singletons 200000000 \ + -file_prefix faucet_outputs \ + --fastq \ + --paired_ends +``` The above command takes as input the file interlaced_reads.fq (where entries alternate between mates 1 and 2 of a paired end library), and the input format is fastq. Faucet does not accept separate mate files, but can accept fasta format and files composed of read sequences alone. # Streaming from a remote source A demonstration streaming reads from a remote server is provided in the script src/stream_data_from_urls_list.sh +You can run it with: +```bash +./stream_data_from_urls_list.sh out wget_urls 1596741569 12045222 +``` +where `wget_urls` is a file with URLs downloaded from ENA, +`1596741569` is the estimated number of unique kmers (F0) and `12045222` if the estimated number of singleton kmers (f1). # Requirements Faucet was implemented in C++ 11, so requires a compiler that is not too ancient to support it, and has been tested only on Linux so far. diff --git a/src/stream_data_from_urls_list.sh b/src/stream_data_from_urls_list.sh index d946ebc..449d27c 100755 --- a/src/stream_data_from_urls_list.sh +++ b/src/stream_data_from_urls_list.sh @@ -7,7 +7,7 @@ #4) singletons URL_FILE=$2 -READ_COMMAND=wget\ --read-timeout=5\ --timeout=15\ -t\ 0\ -qO-\ -i\ $URL_FILE\ \|\ bzip2\ -d\ -c\ -q +READ_COMMAND=wget\ --read-timeout=5\ --timeout=15\ -t\ 0\ -qO-\ -i\ $URL_FILE\ \|\ gzip\ -d\ -c\ -q eval "./faucet -read_load_file <($READ_COMMAND) -read_scan_file <($READ_COMMAND) -size_kmer 31 -max_read_length 130 -estimated_kmers $3 -singletons $4 -file_prefix $1 --fastq --high_cov" diff --git a/src/wget_urls b/src/wget_urls index ad7677b..49d9826 100644 --- a/src/wget_urls +++ b/src/wget_urls @@ -1,2 +1,2 @@ -ftp://ftp.ddbj.nig.ac.jp/ddbj_database/dra/fastq/SRA010/SRA010896/SRX016231/SRR034939_1.fastq.bz2 -ftp://ftp.ddbj.nig.ac.jp/ddbj_database/dra/fastq/SRA010/SRA010896/SRX016231/SRR034939_2.fastq.bz2 +ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR034/SRR034939/SRR034939_1.fastq.gz +ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR034/SRR034939/SRR034939_2.fastq.gz