diff --git a/Cargo.lock b/Cargo.lock index e5749f8..c61c3e0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,15 +17,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "ansi_term" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" -dependencies = [ - "winapi", -] - [[package]] name = "approx" version = "0.3.2" @@ -59,7 +50,7 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ - "hermit-abi 0.1.17", + "hermit-abi", "libc", "winapi", ] @@ -178,6 +169,15 @@ version = "1.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" +[[package]] +name = "cc" +version = "1.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8293772165d9345bdaaa39b45b2109591e63fe5e6fbc23c6ff930a048aa310b" +dependencies = [ + "shlex", +] + [[package]] name = "cfg-if" version = "0.1.10" @@ -192,17 +192,35 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "2.33.3" +version = "3.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" +checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" dependencies = [ - "ansi_term", "atty", "bitflags", + "clap_lex", + "indexmap", "strsim", + "termcolor", "textwrap", - "unicode-width", - "vec_map", +] + +[[package]] +name = "clap_lex" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" +dependencies = [ + "os_str_bytes", +] + +[[package]] +name = "cmake" +version = "0.1.52" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c682c223677e0e5b6b7f63a64b9351844c3f1b1678a68b7ee617e30fb082620e" +dependencies = [ + "cc", ] [[package]] @@ -227,48 +245,30 @@ dependencies = [ "cfg-if 1.0.0", ] -[[package]] -name = "crossbeam-channel" -version = "0.5.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" -dependencies = [ - "cfg-if 1.0.0", - "crossbeam-utils", -] - [[package]] name = "crossbeam-deque" -version = "0.8.3" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" dependencies = [ - "cfg-if 1.0.0", "crossbeam-epoch", "crossbeam-utils", ] [[package]] name = "crossbeam-epoch" -version = "0.9.14" +version = "0.9.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46bd5f3f85273295a9d14aedfb86f6aadbff6d8f5295c4a9edb08e819dcf5695" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" dependencies = [ - "autocfg", - "cfg-if 1.0.0", "crossbeam-utils", - "memoffset", - "scopeguard", ] [[package]] name = "crossbeam-utils" -version = "0.8.15" +version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c063cd8cc95f5c377ed0d4b49a4b21f632396ff690e8470c29b3359b346984b" -dependencies = [ - "cfg-if 1.0.0", -] +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "csv" @@ -366,15 +366,14 @@ dependencies = [ [[package]] name = "faster" -version = "0.2.2" +version = "0.2.3" dependencies = [ "assert_cmd", "bio", "clap", - "flate2", "indicatif", + "kseq", "predicates", - "rand", "rayon", "regex", ] @@ -400,6 +399,7 @@ dependencies = [ "cfg-if 0.1.10", "crc32fast", "libc", + "libz-sys", "miniz_oxide", ] @@ -473,15 +473,6 @@ dependencies = [ "libc", ] -[[package]] -name = "hermit-abi" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7" -dependencies = [ - "libc", -] - [[package]] name = "indexmap" version = "1.6.0" @@ -538,6 +529,17 @@ version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc6f3ad7b9d11a0c00842ff8de1b60ee58661048eb8049ed33c73594f359d7e6" +[[package]] +name = "kseq" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ddfcd809bada51897edb15cf1959ea73d9696256d767a5e98fd7529d16cf67" +dependencies = [ + "atty", + "flate2", + "memchr", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -550,6 +552,19 @@ version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" +[[package]] +name = "libz-sys" +version = "1.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df9b68e50e6e0b26f672573834882eb57759f6db9b3be2ea3c35c91188bb4eaa" +dependencies = [ + "cc", + "cmake", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "matrixmultiply" version = "0.2.3" @@ -561,18 +576,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.3.4" +version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" - -[[package]] -name = "memoffset" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1" -dependencies = [ - "autocfg", -] +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "miniz_oxide" @@ -650,16 +656,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "num_cpus" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" -dependencies = [ - "hermit-abi 0.2.6", - "libc", -] - [[package]] name = "number_prefix" version = "0.4.0" @@ -675,6 +671,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "os_str_bytes" +version = "6.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" + [[package]] name = "petgraph" version = "0.5.1" @@ -685,6 +687,12 @@ dependencies = [ "indexmap", ] +[[package]] +name = "pkg-config" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" + [[package]] name = "portable-atomic" version = "1.6.0" @@ -799,9 +807,9 @@ checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" [[package]] name = "rayon" -version = "1.7.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" dependencies = [ "either", "rayon-core", @@ -809,14 +817,12 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.11.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" dependencies = [ - "crossbeam-channel", "crossbeam-deque", "crossbeam-utils", - "num_cpus", ] [[package]] @@ -861,12 +867,6 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" -[[package]] -name = "scopeguard" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" - [[package]] name = "semver" version = "0.1.20" @@ -904,6 +904,12 @@ dependencies = [ "serde", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "snafu" version = "0.6.9" @@ -936,9 +942,9 @@ dependencies = [ [[package]] name = "strsim" -version = "0.8.0" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "strum" @@ -969,6 +975,15 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + [[package]] name = "termtree" version = "0.2.3" @@ -977,12 +992,9 @@ checksum = "13a4ec180a2de59b57434704ccfad967f789b12737738798fa08798cd5824c16" [[package]] name = "textwrap" -version = "0.11.0" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" -dependencies = [ - "unicode-width", -] +checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9" [[package]] name = "thread_local" @@ -1017,6 +1029,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "vec_map" version = "0.8.2" @@ -1048,6 +1066,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" diff --git a/Cargo.toml b/Cargo.toml index d3316c8..94028f7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,21 +1,20 @@ [package] name = "faster" -description = "fast statistics and more for a fastx file" +description = "fast statistics and filtering for a fastx file" readme = "README.md" homepage = "https://github.com/angelovangel/faster" repository = "https://github.com/angelovangel/faster" keywords = ["fastq", "bio"] -version = "0.2.2" +version = "0.2.3" authors = ["Angel Angelov "] edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +kseq = "0.5.3" bio = "*" -flate2 = "1.0.18" -clap = "~2.33.3" -rand = "0.7.3" +clap = "3.2.25" regex = "1" rayon = "1.5" indicatif = "0.17.8" diff --git a/src/main.rs b/src/main.rs index 1764c73..39fc38a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,129 +1,86 @@ -use bio::io::{fastq, fastq::FastqRead}; use bio::seq_analysis::gc::gc_content; -use flate2::bufread; -//use rand::seq::IteratorRandom; +use modules::write_fastq; use regex::{bytes::RegexSet, Regex}; -//use std::io::Read; -use std::{fs, io, io::BufRead, io::BufReader, process}; +use std::{fs, io::BufRead, io::BufReader, process, time::Duration}; use indicatif::{HumanCount, ProgressBar}; -use std::time::Duration; +use kseq::parse_path; -extern crate clap; +//extern crate clap; use clap::{App, Arg, ArgGroup}; // own functions mod modules; -// fastq reader, file as arg, decide based on extension -fn get_fastq_reader(path: &String) -> Box { - if path.ends_with(".gz") { - let f = fs::File::open(path).unwrap(); - Box::new(bufread::MultiGzDecoder::new(BufReader::new(f))) - } else { - let f = fs::File::open(path).unwrap(); - Box::new(BufReader::new(f)) - } -} - -fn samplefq(path: &String, n: usize) { - let records = fastq::Reader::new(get_fastq_reader(path)) - .records() - .step_by(n); - let mut writer = fastq::Writer::new(io::stdout()); - - // sampled is a vector of result types - //let sampled = records.choose_multiple(&mut rng, n); - for record in records { - // get the record - let r = record.unwrap(); - - writer - .write_record(&r) - .expect("Failed to write fastq record!"); - } -} - fn main() { let matches = App::new("faster") .version("0.2.1") .author("Angel Angelov ") .about("fast statistics and more for 1 fastq file") - .arg(Arg::with_name("skip_header") - .short("s") + .short('s') .long("skip_header") .help("skip header in table output")) - .arg(Arg::with_name("table") - .short("t") + .short('t') .long("table") .help("Output a (tab separated) table with statistics")) - .arg(Arg::with_name("len") - .short("l") + .short('l') .long("len") .help("Output read lengths, one line per read")) - .arg(Arg::with_name("gc") - .short("g") + .short('g') .long("gc") .help("Output GC-content, one line per read")) - .arg(Arg::with_name("qscore") - .short("q") + .short('q') .long("qscore") .help("Output 'mean' read phred scores, one line per read. For this, the mean of the base probabilities for each read is calculated, and the result is converted back to a phred score")) - .arg(Arg::with_name("nx") .long("nx") - .short("x") + .short('x') .takes_value(true) .help("Output NX value, provide the desired NX value as 0.5 for e.g. N50 [numeric]")) .arg(Arg::with_name("qyield") .long("qyield") - .short("y") + .short('y') .takes_value(true) .help("Percent bases with Q score of x or higher (use range 8..60)")) .arg(Arg::with_name("sample") //.short("s") .long("sample") - .short("p") + .short('p') .takes_value(true) .help("Sub-sample sequences by proportion (0.0 to 1.0). Slow on large files!")) - .arg(Arg::with_name("filterl") - .short("f") + .short('f') .long("filterl") .takes_value(true) .allow_hyphen_values(true) //important to parse negative integers .help("Filter reads based on length - use positive integer to filter for reads LONGER than [integer] and negative integer to filter for reads that are SHORTER than [integer]")) .arg(Arg::with_name("filterq") - .short("w") + .short('w') .long("filterq") .takes_value(true) .allow_hyphen_values(true) //important to parse negative integers .help("Filter reads based on 'mean' read quality - use positive integer to filter for reads with BETTER quality than [integer] and negative integer to filter for reads with WORSE wuality than [integer]. Use range 8..60 for qscores")) - - .arg(Arg::with_name("trim_front") .long("trim_front") - .short("a") + .short('a') .takes_value(true) .help("Trim all reads [integer] bases from the beginning")) - .arg(Arg::with_name("trim_tail") .long("trim_tail") - .short("b") + .short('b') .takes_value(true) .help("Trim all reads [integer] bases from the end")) - .arg(Arg::with_name("regex_string") .long("regex_string") - .short("r") + .short('r') .takes_value(true) .help("Output only reads whose id field matches a regex [string] pattern. See https://docs.rs/regex/1.4.2/regex/#functions")) .arg(Arg::with_name("regex_file") .long("regex_file") - .short("z") + .short('z') .takes_value(true) .help("Output only reads whose id field matches a regex [string] pattern. The regex patterns are read from a file, one line per pattern.")) .arg(Arg::with_name("INPUT") @@ -135,60 +92,32 @@ fn main() { .group(ArgGroup::with_name("group") .required(true).args(&["table", "len", "gc", "qscore", "filterl", "filterq", "sample", "trim_front", "trim_tail", "regex_string", "regex_file", "nx", "qyield"])) .get_matches(); - //println!("Working on {}", matches.value_of("INPUT").unwrap()); - // read file - // Here we can call .unwrap() because the argument is required. - let infile = matches.value_of("INPUT").unwrap().to_string(); - - let mut reader = fastq::Reader::new(get_fastq_reader(&infile)); - let mut record = fastq::Record::new(); - // let mut writer = fastq::Writer::new(io::stdout()); - // define writer in loop where necessary, otherwise undefined behaviour! - - // read first record, then check for arguments - reader - .read(&mut record) - .expect("Failed to parse fastq record!"); - // stay in loop until all records are read + let infile = matches.value_of("INPUT").unwrap().to_string(); + let mut records = parse_path(&infile).unwrap(); + //case len if matches.is_present("len") { - while !record.is_empty() { - let len = record.seq().len() as i64; - println!("{}", len); - - reader - .read(&mut record) - .expect("Failed to parse fastq record!"); + while let Some(record) = records.iter_record().unwrap() { + println!("{}", record.len()); } process::exit(0); // case gc } else if matches.is_present("gc") { - while !record.is_empty() { - let seq = record.seq(); + while let Some(record) = records.iter_record().unwrap() { + let seq = record.seq().as_bytes(); println!("{}", gc_content(seq)); - - reader - .read(&mut record) - .expect("Failed to parse fastq record!"); } process::exit(0); // case qscore } else if matches.is_present("qscore") { - while !record.is_empty() { - let mean_errorp = modules::qscore_probs(record.qual()) / record.seq().len() as f32; + while let Some(record) = records.iter_record().unwrap() { + let mean_errorp = modules::qscore_probs(record.qual().as_bytes()) / record.seq().len() as f32; //println!("{:.4}", mean_errorp); println!("{:.4}", -10.0 * mean_errorp.log10()); - - //let qscore = modules::phred_gm( &record.qual() ); - //println!("{:.4}", qscore); - - reader - .read(&mut record) - .expect("Failed to parse fastq record!"); } process::exit(0); @@ -203,25 +132,17 @@ fn main() { // error on invalid input, rather than trying to guess match filterlen { Ok(x) => { - while !record.is_empty() { - let mut writer = fastq::Writer::new(io::stdout()); + while let Some(record) = records.iter_record().unwrap() { let seqlen = record.seq().len() as i32; if x >= 0 { if seqlen > x { - writer - .write_record(&record) - .expect("Failed to write fastq record!"); + write_fastq(record); } } else if x < 0 { if seqlen < x.abs() { - writer - .write_record(&record) - .expect("Failed to write fastq record!") + write_fastq(record); } } - reader - .read(&mut record) - .expect("Failed to parse fastq record!"); } } Err(e) => eprintln!("Did you use an integer for filter? The error is: '{}'", e), @@ -238,27 +159,19 @@ fn main() { match filterq { q if (8..=60).contains(&q) => { // do stuff - while !record.is_empty() { - let mut writer = fastq::Writer::new(io::stdout()); - let mean_errorp = modules::qscore_probs(record.qual()) / record.seq().len() as f32; + while let Some(record) = records.iter_record().unwrap() { + let mean_errorp = modules::qscore_probs(record.qual().as_bytes()) / record.seq().len() as f32; let mean_qscore = -10.0 * mean_errorp.log10(); let q_f32 = q as f32; if q >= 0 { if mean_qscore > q_f32 { - writer - .write_record(&record) - .expect("Failed to write fastq record!"); + write_fastq(record); } } else if q < 0 { if mean_qscore < q_f32.abs() { - writer - .write_record(&record) - .expect("Failed to write fastq record!") + write_fastq(record); } } - reader - .read(&mut record) - .expect("Failed to parse fastq record!"); } } _ => { @@ -281,14 +194,9 @@ fn main() { // do work let mut lengths: Vec = Vec::new(); - while !record.is_empty() { + while let Some(record) = records.iter_record().unwrap() { let len = record.seq().len() as i64; - lengths.push(len); - - reader - .read(&mut record) - .expect("Failed to parse fastq record!"); - + lengths.push(len); } let nx = modules::get_nx(&mut lengths, 1.0 - nxvalue); let nx_value = nxvalue * 100.0; @@ -314,14 +222,10 @@ fn main() { let mut bases: i64 = 0; let mut qualx: i64 = 0; // do work - while !record.is_empty() { + while let Some(record) = records.iter_record().unwrap() { let len = record.seq().len() as i64; bases += len; - qualx += modules::get_qual_bases(record.qual(), 33 + qvalue); // 33 offset - - reader - .read(&mut record) - .expect("Failed to parse fastq record!") + qualx += modules::get_qual_bases(record.qual().as_bytes(), 33 + qvalue); // 33 offset } let qx = qualx as f64 / bases as f64 * 100.0; println!("Q{}\t{:.2}", qvalue, qx); @@ -345,8 +249,16 @@ fn main() { match fraction { // see x if (0.0..=1.0).contains(&x) => { - - samplefq(&infile, (1 as f32/fraction) as usize); // 1/fraction gives step_by + let nth = 1 as f32/fraction; // 1/fraction gives step_by + let mut recn = 0; + while let Some(record) = records.iter_record().unwrap() { + recn += 1; + if recn != nth as i32 { + continue; + } + recn = 0; + write_fastq(record); + } process::exit(0); } _ => { @@ -365,28 +277,19 @@ fn main() { .parse::() .expect("failed to parse trim value!"); - while !record.is_empty() { - // new writer? - let mut writer = fastq::Writer::new(io::stdout()); - let check = record.check(); - if check.is_err() { - panic!("I got a rubbish record!") - } - //let seqlen = record.seq().len(); - let id = record.id(); - let desc = record.desc(); + while let Some(record) = records.iter_record().unwrap() { + // the new sequence is trim..seq.len let newseq = &record.seq()[trimvalue..]; let newqual = &record.qual()[trimvalue..]; - let newrec = fastq::Record::with_attrs(id, desc, newseq, newqual); - - writer - .write_record(&newrec) - .expect("Failed to write fastq record!"); - - reader - .read(&mut record) - .expect("Failed to parse fastq record!"); + + println!( + "{} {}\n{}\n{}\n{}", + "@".to_string() + record.head(), record.des(), + newseq, + "+", + newqual + ); } process::exit(0); } else if matches.is_present("trim_tail") { @@ -397,24 +300,20 @@ fn main() { .parse::() .expect("failed to parse trim value!"); - while !record.is_empty() { - let mut writer = fastq::Writer::new(io::stdout()); - let seqlen = record.seq().len(); - let id = record.id(); - let desc = record.desc(); + while let Some(record) = records.iter_record().unwrap() { + // the new sequence is 0..seq.len - trim - let trimright = seqlen - trimvalue; + let trimright = record.len() - trimvalue; let newseq = &record.seq()[..trimright]; let newqual = &record.qual()[..trimright]; - let newrec = fastq::Record::with_attrs(id, desc, newseq, newqual); - - writer - .write_record(&newrec) - .expect("Failed to write fastq record!"); - reader - .read(&mut record) - .expect("Failed to parse fastq record!"); + println!( + "{} {}\n{}\n{}\n{}", + "@".to_string() + record.head(), record.des(), + newseq, + "+", + newqual + ); } process::exit(0); } else if matches.is_present("regex_string") { @@ -423,18 +322,11 @@ fn main() { let re = Regex::new(string).expect("Failed to construct regex from string!"); - while !record.is_empty() { - let mut writer = fastq::Writer::new(io::stdout()); - let readid = record.id(); + while let Some(record) = records.iter_record().unwrap() { + let readid = record.head(); if re.is_match(readid) { - writer - .write_record(&mut record) - .expect("Error writing fastq record!"); + write_fastq(record); } - - reader - .read(&mut record) - .expect("Failed to parse fastq record!"); } process::exit(0); } else if matches.is_present("regex_file") { @@ -452,19 +344,12 @@ fn main() { let re_set = RegexSet::new(&revec).unwrap(); // write record to stdout in case of match - while !record.is_empty() { - let mut writer = fastq::Writer::new(io::stdout()); - let readid = record.id().as_bytes(); // as.bytes because RegexSet matches on bytes + while let Some(record) = records.iter_record().unwrap() { + let readid = record.head().as_bytes(); // as.bytes because RegexSet matches on bytes if re_set.is_match(readid) { - writer - .write_record(&mut record) - .expect("Error writing fastq record!"); + write_fastq(record); } - - reader - .read(&mut record) - .expect("Failed to parse fastq record!"); } //println!("vector: {:?}", &revec); process::exit(0); @@ -482,24 +367,20 @@ fn main() { let pb = ProgressBar::new_spinner(); pb.enable_steady_tick(Duration::from_millis(120)); - while !record.is_empty() { + while let Some(record) = records.iter_record().unwrap() { //let seq = record.seq(); - let len = record.seq().len() as i64; // here have to accomodate bigger numbers, as bases can get > 2^32 + let len = record.len() as i64; // here have to accomodate bigger numbers, as bases can get > 2^32 reads += 1; bases += len; - num_n += modules::get_n_bases(record.seq()); - qual20 += modules::get_qual_bases(record.qual(), 53); // 33 offset - qual30 += modules::get_qual_bases(record.qual(), 63); + num_n += modules::get_n_bases(record.seq().as_bytes()); + qual20 += modules::get_qual_bases(record.qual().as_bytes(), 53); // 33 offset + qual30 += modules::get_qual_bases(record.qual().as_bytes(), 63); minlen = len.min(minlen); maxlen = len.max(maxlen); len_vector.push(len); let message = format!("Processed reads: {}", HumanCount(reads as u64).to_string()); pb.set_message(message); - - reader - .read(&mut record) - .expect("Failed to parse fastq record!"); } let mean_len = modules::mean(&len_vector); diff --git a/src/modules.rs b/src/modules.rs index 49c6c13..6c2b57d 100644 --- a/src/modules.rs +++ b/src/modules.rs @@ -1,6 +1,6 @@ // simple helper functions for calcuting mean, quartiles etc use rayon::prelude::*; - +use kseq::record::Fastx; pub fn mean(numbers: &[i64]) -> f64 { numbers.par_iter().sum::() as f64 / numbers.len() as f64 } @@ -84,6 +84,16 @@ pub fn qscore_probs(q: &[u8]) -> f32 { } qprob_sum } + +pub fn write_fastq(rec: Fastx<'_>) { + println!( + "{} {}\n{}\n{}\n{}", + "@".to_string() + rec.head(), rec.des(), + rec.seq(), + "+", + rec.qual() + ); +} // get geometric mean from phred scores // pub fn phred_gm(q: &[u8]) -> f64 { // let mut phred_product = 0.;