diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 9abeb7f..16e2822 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -16,7 +16,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Build - run: cargo build --verbose + run: cargo build --release --verbose - name: File sizes run: find src/ -name '*.rs' | xargs wc -l | sort -nr - name: Run tests diff --git a/benches/speed_performance.rs b/benches/speed_performance.rs index d9ff504..1944962 100644 --- a/benches/speed_performance.rs +++ b/benches/speed_performance.rs @@ -2,7 +2,7 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use rayon::iter::ParallelIterator; #[cfg(feature = "tdf")] use timsrust::readers::FrameReader; -use timsrust::readers::{SpectrumReader, SpectrumReaderConfig}; +use timsrust::readers::SpectrumReader; const DDA_TEST: &str = "/mnt/d/data/mpib/tims05_300SPD/20230505_TIMS05_PaSk_MA_HeLa_6min_ddaP_S1-C10_1_2323.d/"; diff --git a/src/domain_converters.rs b/src/domain_converters.rs index e38bbf4..7aedb74 100644 --- a/src/domain_converters.rs +++ b/src/domain_converters.rs @@ -10,6 +10,5 @@ pub use tof_to_mz::Tof2MzConverter; /// Convert from one domain (e.g. Time of Flight) to another (m/z). pub trait ConvertableDomain { fn convert + Copy>(&self, value: T) -> f64; - fn invert + Copy>(&self, value: T) -> f64; } diff --git a/src/errors.rs b/src/errors.rs index 7758b0b..dfa4b0b 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -2,7 +2,7 @@ use crate::io::readers::{ FrameReaderError, MetadataReaderError, QuadrupoleSettingsReaderError, }; -use crate::io::readers::{PrecursorReaderError, SpectrumReaderError}; +use crate::{io::readers::PrecursorReaderError, readers::SpectrumReaderError}; /// An error that is produced by timsrust (uses [thiserror]). #[derive(thiserror::Error, Debug)] diff --git a/src/io/readers.rs b/src/io/readers.rs index c13f808..b33f6a3 100644 --- a/src/io/readers.rs +++ b/src/io/readers.rs @@ -7,6 +7,7 @@ mod precursor_reader; #[cfg(feature = "tdf")] mod quad_settings_reader; mod spectrum_reader; +mod timstof; #[cfg(feature = "tdf")] pub use frame_reader::*; @@ -16,3 +17,4 @@ pub use precursor_reader::*; #[cfg(feature = "tdf")] pub use quad_settings_reader::*; pub use spectrum_reader::*; +pub use timstof::*; diff --git a/src/io/readers/file_readers/parquet_reader.rs b/src/io/readers/file_readers/parquet_reader.rs index 0525a14..72b44be 100644 --- a/src/io/readers/file_readers/parquet_reader.rs +++ b/src/io/readers/file_readers/parquet_reader.rs @@ -1,7 +1,10 @@ pub mod precursors; +use std::{fs::File, io, str::FromStr}; + use parquet::file::reader::{FileReader, SerializedFileReader}; -use std::{fs::File, io, path::Path, str::FromStr}; + +use crate::readers::TimsTofPathError; pub trait ReadableParquetTable { fn update_from_parquet_file(&mut self, key: &str, value: String); @@ -11,12 +14,13 @@ pub trait ReadableParquetTable { } fn from_parquet_file( - file_name: impl AsRef, - ) -> Result, ParquetError> + path: impl crate::readers::TimsTofPathLike, + ) -> Result, ParquetReaderError> where Self: Sized + Default, { - let file: File = File::open(file_name)?; + let path = path.to_timstof_path()?; + let file: File = File::open(path.ms2_parquet()?)?; let reader: SerializedFileReader = SerializedFileReader::new(file)?; reader @@ -36,9 +40,11 @@ pub trait ReadableParquetTable { } #[derive(Debug, thiserror::Error)] -pub enum ParquetError { +pub enum ParquetReaderError { #[error("{0}")] IO(#[from] io::Error), #[error("Cannot iterate over row {0}")] - ParquetIO(#[from] parquet::errors::ParquetError), + ParquetError(#[from] parquet::errors::ParquetError), + #[error("{0}")] + TimsTofPathError(#[from] TimsTofPathError), } diff --git a/src/io/readers/file_readers/sql_reader.rs b/src/io/readers/file_readers/sql_reader.rs index 279aa06..af24d0a 100644 --- a/src/io/readers/file_readers/sql_reader.rs +++ b/src/io/readers/file_readers/sql_reader.rs @@ -5,31 +5,29 @@ pub mod pasef_frame_msms; pub mod precursors; pub mod quad_settings; -use std::{ - collections::HashMap, - path::{Path, PathBuf}, -}; +use std::collections::HashMap; use rusqlite::{types::FromSql, Connection}; +use crate::readers::{TimsTofPathError, TimsTofPathLike}; + #[derive(Debug)] pub struct SqlReader { connection: Connection, - path: PathBuf, } impl SqlReader { - pub fn open(file_name: impl AsRef) -> Result { - let path = file_name.as_ref().to_path_buf(); - let connection = Connection::open(&path)?; - Ok(Self { connection, path }) + pub fn open(path: impl TimsTofPathLike) -> Result { + let path = path.to_timstof_path()?; + let connection = Connection::open(&path.tdf()?)?; + Ok(Self { connection }) } pub fn read_column_from_table( &self, column_name: &str, table_name: &str, - ) -> Result, SqlError> { + ) -> Result, SqlReaderError> { let query = format!("SELECT {} FROM {}", column_name, table_name); let mut stmt = self.connection.prepare(&query)?; let rows = stmt.query_map([], |row| match row.get::(0) { @@ -39,10 +37,6 @@ impl SqlReader { let result = rows.collect::, _>>()?; Ok(result) } - - pub fn get_path(&self) -> PathBuf { - self.path.clone() - } } pub trait ReadableSqlTable { @@ -50,7 +44,7 @@ pub trait ReadableSqlTable { fn from_sql_row(row: &rusqlite::Row) -> Self; - fn from_sql_reader(reader: &SqlReader) -> Result, SqlError> + fn from_sql_reader(reader: &SqlReader) -> Result, SqlReaderError> where Self: Sized, { @@ -59,7 +53,9 @@ pub trait ReadableSqlTable { let rows = stmt.query_map([], |row| Ok(Self::from_sql_row(row)))?; let result = rows.collect::, _>>()?; if result.len() == 0 { - Err(SqlError(rusqlite::Error::QueryReturnedNoRows)) + Err(SqlReaderError::SqlError( + rusqlite::Error::QueryReturnedNoRows, + )) } else { Ok(result) } @@ -71,7 +67,7 @@ pub trait ReadableSqlHashMap { fn from_sql_reader( reader: &SqlReader, - ) -> Result, SqlError> + ) -> Result, SqlReaderError> where Self: Sized, { @@ -99,6 +95,10 @@ impl ParseDefault for rusqlite::Row<'_> { } } -#[derive(thiserror::Error, Debug)] -#[error("{0}")] -pub struct SqlError(#[from] rusqlite::Error); +#[derive(Debug, thiserror::Error)] +pub enum SqlReaderError { + #[error("{0}")] + SqlError(#[from] rusqlite::Error), + #[error("{0}")] + TimsTofPathError(#[from] TimsTofPathError), +} diff --git a/src/io/readers/file_readers/tdf_blob_reader.rs b/src/io/readers/file_readers/tdf_blob_reader.rs index 56dc3e3..f5aa4df 100644 --- a/src/io/readers/file_readers/tdf_blob_reader.rs +++ b/src/io/readers/file_readers/tdf_blob_reader.rs @@ -3,62 +3,87 @@ mod tdf_blobs; use memmap2::Mmap; use std::fs::File; use std::io; -use std::path::Path; pub use tdf_blobs::*; use zstd::decode_all; +use crate::readers::{TimsTofFileType, TimsTofPathError, TimsTofPathLike}; + const U32_SIZE: usize = std::mem::size_of::(); const HEADER_SIZE: usize = 2; #[derive(Debug)] pub struct TdfBlobReader { - mmap: Mmap, - global_file_offset: usize, + bin_file_reader: TdfBinFileReader, } impl TdfBlobReader { - // TODO parse compression1 - pub fn new( - file_name: impl AsRef, - ) -> Result { - let path = file_name.as_ref().to_path_buf(); - let file = File::open(&path)?; - let mmap = unsafe { Mmap::map(&file)? }; - let reader = Self { - mmap, - global_file_offset: 0, - }; + pub fn new(path: impl TimsTofPathLike) -> Result { + let bin_file_reader = TdfBinFileReader::new(path)?; + let reader = Self { bin_file_reader }; Ok(reader) } pub fn get(&self, offset: usize) -> Result { - let offset = self.global_file_offset + offset; + let offset = self.bin_file_reader.global_file_offset + offset; let byte_count = self + .bin_file_reader .get_byte_count(offset) .ok_or(TdfBlobReaderError::InvalidOffset(offset))?; - let compressed_bytes = self - .get_compressed_bytes(offset, byte_count) + let data = self + .bin_file_reader + .get_data(offset, byte_count) .ok_or(TdfBlobReaderError::CorruptData)?; - let bytes = decode_all(compressed_bytes) - .map_err(|_| TdfBlobReaderError::Decompression)?; + let bytes = + decode_all(data).map_err(|_| TdfBlobReaderError::Decompression)?; let blob = TdfBlob::new(bytes)?; Ok(blob) } +} + +#[derive(Debug)] +struct TdfBinFileReader { + mmap: Mmap, + global_file_offset: usize, +} + +impl TdfBinFileReader { + // TODO parse compression1 + fn new(path: impl TimsTofPathLike) -> Result { + let path = path.to_timstof_path()?; + let bin_path = match path.file_type() { + #[cfg(feature = "tdf")] + TimsTofFileType::TDF => path.tdf_bin()?, + #[cfg(feature = "minitdf")] + TimsTofFileType::MiniTDF => path.ms2_bin()?, + }; + let file = File::open(bin_path)?; + let mmap = unsafe { Mmap::map(&file)? }; + let reader = Self { + mmap, + global_file_offset: 0, + }; + Ok(reader) + } fn get_byte_count(&self, offset: usize) -> Option { let start = offset as usize; - let end = (offset + U32_SIZE) as usize; + let end = start + U32_SIZE as usize; let raw_byte_count = self.mmap.get(start..end)?; let byte_count = u32::from_le_bytes(raw_byte_count.try_into().ok()?) as usize; Some(byte_count) } - fn get_compressed_bytes( - &self, - offset: usize, - byte_count: usize, - ) -> Option<&[u8]> { + // fn get_scan_count(&self, offset: usize) -> Option { + // let start = (offset + U32_SIZE) as usize; + // let end = start + U32_SIZE as usize; + // let raw_scan_count = self.mmap.get(start..end)?; + // let scan_count = + // u32::from_le_bytes(raw_scan_count.try_into().ok()?) as usize; + // Some(scan_count) + // } + + fn get_data(&self, offset: usize, byte_count: usize) -> Option<&[u8]> { let start = offset + HEADER_SIZE * U32_SIZE; let end = offset + byte_count; self.mmap.get(start..end) @@ -75,10 +100,10 @@ pub struct IndexedTdfBlobReader { #[cfg(feature = "minitdf")] impl IndexedTdfBlobReader { pub fn new( - file_name: impl AsRef, + path: impl TimsTofPathLike, binary_offsets: Vec, ) -> Result { - let blob_reader = TdfBlobReader::new(file_name)?; + let blob_reader = TdfBlobReader::new(path)?; let reader = Self { binary_offsets, blob_reader: blob_reader, @@ -111,6 +136,10 @@ pub enum TdfBlobReaderError { Decompression, #[error("Invalid offset {0}")] InvalidOffset(usize), + #[error("{0}")] + TimsTofPathError(#[from] TimsTofPathError), + #[error("No binary file found")] + NoBinary, } #[derive(Debug, thiserror::Error)] diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index 7d959b5..6a0767a 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -1,35 +1,25 @@ -use std::{ - path::{Path, PathBuf}, - sync::Arc, - vec, -}; +use std::sync::Arc; use rayon::iter::{IntoParallelIterator, ParallelIterator}; #[cfg(feature = "timscompress")] -use timscompress::reader::{ - CompressedTdfBlobReader, CompressedTdfBlobReaderError, -}; +use timscompress::reader::CompressedTdfBlobReader; -use crate::{ - ms_data::{AcquisitionType, Frame, MSLevel, QuadrupoleSettings}, - utils::find_extension, -}; +use crate::ms_data::{AcquisitionType, Frame, MSLevel, QuadrupoleSettings}; use super::{ file_readers::{ sql_reader::{ frame_groups::SqlWindowGroup, frames::SqlFrame, ReadableSqlTable, - SqlError, SqlReader, + SqlReader, SqlReaderError, }, tdf_blob_reader::{TdfBlob, TdfBlobReader, TdfBlobReaderError}, }, MetadataReader, MetadataReaderError, QuadrupoleSettingsReader, - QuadrupoleSettingsReaderError, + QuadrupoleSettingsReaderError, TimsTofPathLike, }; #[derive(Debug)] pub struct FrameReader { - path: PathBuf, tdf_bin_reader: TdfBlobReader, #[cfg(feature = "timscompress")] compressed_reader: CompressedTdfBlobReader, @@ -43,12 +33,9 @@ pub struct FrameReader { } impl FrameReader { - pub fn new(path: impl AsRef) -> Result { - let sql_path = find_extension(&path, "analysis.tdf").ok_or( - FrameReaderError::FileNotFound("analysis.tdf".to_string()), - )?; + pub fn new(path: impl TimsTofPathLike) -> Result { let compression_type = - match MetadataReader::new(&sql_path)?.compression_type { + match MetadataReader::new(&path)?.compression_type { 2 => 2, #[cfg(feature = "timscompress")] 3 => 3, @@ -59,14 +46,12 @@ impl FrameReader { }, }; - let tdf_sql_reader = SqlReader::open(sql_path)?; + let tdf_sql_reader = SqlReader::open(&path)?; let sql_frames = SqlFrame::from_sql_reader(&tdf_sql_reader)?; - let bin_path = find_extension(&path, "analysis.tdf_bin").ok_or( - FrameReaderError::FileNotFound("analysis.tdf_bin".to_string()), - )?; - let tdf_bin_reader = TdfBlobReader::new(&bin_path)?; + let tdf_bin_reader = TdfBlobReader::new(&path)?; #[cfg(feature = "timscompress")] - let compressed_reader = CompressedTdfBlobReader::new(&bin_path)?; + let compressed_reader = CompressedTdfBlobReader::new(&path) + .ok_or_else(|| FrameReaderError::TimscompressError)?; let acquisition = if sql_frames.iter().any(|x| x.msms_type == 8) { AcquisitionType::DDAPASEF } else if sql_frames.iter().any(|x| x.msms_type == 9) { @@ -84,8 +69,7 @@ impl FrameReader { window_groups[window_group.frame - 1] = window_group.window_group; } - quadrupole_settings = - QuadrupoleSettingsReader::new(tdf_sql_reader.get_path())?; + quadrupole_settings = QuadrupoleSettingsReader::new(&path)?; } else { quadrupole_settings = vec![]; } @@ -115,7 +99,6 @@ impl FrameReader { as usize; let offsets = sql_frames.iter().map(|x| x.binary_offset).collect(); let reader = Self { - path: path.as_ref().to_path_buf(), tdf_bin_reader, frames, acquisition, @@ -133,6 +116,7 @@ impl FrameReader { Ok(reader) } + // TODO make option result pub fn get_binary_offset(&self, index: usize) -> usize { self.offsets[index] } @@ -245,10 +229,6 @@ impl FrameReader { pub fn len(&self) -> usize { self.frames.len() } - - pub fn get_path(&self) -> PathBuf { - self.path.clone() - } } fn read_scan_offsets( @@ -334,8 +314,8 @@ fn get_frame_without_data( #[derive(Debug, thiserror::Error)] pub enum FrameReaderError { #[cfg(feature = "timscompress")] - #[error("{0}")] - CompressedTdfBlobReaderError(#[from] CompressedTdfBlobReaderError), + #[error("Timscompress error")] + TimscompressError, #[error("{0}")] TdfBlobReaderError(#[from] TdfBlobReaderError), #[error("{0}")] @@ -343,7 +323,7 @@ pub enum FrameReaderError { #[error("{0}")] FileNotFound(String), #[error("{0}")] - SqlError(#[from] SqlError), + SqlReaderError(#[from] SqlReaderError), #[error("Corrupt Frame")] CorruptFrame, #[error("{0}")] diff --git a/src/io/readers/metadata_reader.rs b/src/io/readers/metadata_reader.rs index cf29a9a..a5ba1db 100644 --- a/src/io/readers/metadata_reader.rs +++ b/src/io/readers/metadata_reader.rs @@ -1,12 +1,15 @@ -use std::{collections::HashMap, fmt::Debug, path::Path, str::FromStr}; +use std::{collections::HashMap, fmt::Debug, str::FromStr}; use crate::{ domain_converters::{Frame2RtConverter, Scan2ImConverter, Tof2MzConverter}, ms_data::Metadata, }; -use super::file_readers::sql_reader::{ - metadata::SqlMetadata, ReadableSqlHashMap, SqlError, SqlReader, +use super::{ + file_readers::sql_reader::{ + metadata::SqlMetadata, ReadableSqlHashMap, SqlReader, SqlReaderError, + }, + TimsTofPathLike, }; const OTOF_CONTROL: &str = "Bruker otofControl"; @@ -15,10 +18,9 @@ pub struct MetadataReader; impl MetadataReader { pub fn new( - path: impl AsRef, + path: impl TimsTofPathLike, ) -> Result { - let sql_path = path.as_ref(); - let tdf_sql_reader = SqlReader::open(&sql_path)?; + let tdf_sql_reader = SqlReader::open(path)?; let sql_metadata: HashMap = SqlMetadata::from_sql_reader(&tdf_sql_reader)?; let compression_type = @@ -40,7 +42,6 @@ impl MetadataReader { .max_by(|a, b| a.partial_cmp(b).unwrap()) .unwrap(); let metadata = Metadata { - path: path.as_ref().to_path_buf(), rt_converter: Frame2RtConverter::from_values(rt_values), im_converter: get_im_converter(&sql_metadata, &tdf_sql_reader)?, mz_converter: get_mz_converter(&sql_metadata)?, @@ -123,12 +124,8 @@ fn parse_value( #[derive(Debug, thiserror::Error)] pub enum MetadataReaderError { - // #[error("{0}")] - // TdfBlobReaderError(#[from] TdfBlobReaderError), - // #[error("{0}")] - // FileNotFound(String), #[error("{0}")] - SqlError(#[from] SqlError), + SqlReaderError(#[from] SqlReaderError), #[error("Key not found: {0}")] KeyNotFound(String), #[error("Key not parsable: {0}")] diff --git a/src/io/readers/precursor_reader.rs b/src/io/readers/precursor_reader.rs index 04f0324..612456d 100644 --- a/src/io/readers/precursor_reader.rs +++ b/src/io/readers/precursor_reader.rs @@ -4,7 +4,6 @@ mod minitdf; mod tdf; use core::fmt; -use std::path::{Path, PathBuf}; #[cfg(feature = "minitdf")] use minitdf::{MiniTDFPrecursorReader, MiniTDFPrecursorReaderError}; @@ -15,6 +14,7 @@ use crate::ms_data::Precursor; #[cfg(feature = "tdf")] use super::FrameWindowSplittingConfiguration; +use super::{TimsTofFileType, TimsTofPath, TimsTofPathError, TimsTofPathLike}; pub struct PrecursorReader { precursor_reader: Box, @@ -31,7 +31,9 @@ impl PrecursorReader { PrecursorReaderBuilder::default() } - pub fn new(path: impl AsRef) -> Result { + pub fn new( + path: impl TimsTofPathLike, + ) -> Result { Self::build().with_path(path).finalize() } @@ -46,15 +48,17 @@ impl PrecursorReader { #[derive(Debug, Default, Clone)] pub struct PrecursorReaderBuilder { - path: PathBuf, + path: Option, #[cfg(feature = "tdf")] config: FrameWindowSplittingConfiguration, } impl PrecursorReaderBuilder { - pub fn with_path(&self, path: impl AsRef) -> Self { + pub fn with_path(&self, path: impl TimsTofPathLike) -> Self { + // TODO + let path = Some(path.to_timstof_path().unwrap()); Self { - path: path.as_ref().to_path_buf(), + path, ..self.clone() } } @@ -70,22 +74,20 @@ impl PrecursorReaderBuilder { } } - pub fn finalize(&self) -> Result { + pub fn finalize(self) -> Result { + let path = match self.path { + None => return Err(PrecursorReaderError::NoPath), + Some(path) => path, + }; let precursor_reader: Box = - match self.path.extension().and_then(|e| e.to_str()) { + match path.file_type() { #[cfg(feature = "minitdf")] - Some("parquet") => { - Box::new(MiniTDFPrecursorReader::new(self.path.clone())?) + TimsTofFileType::MiniTDF => { + Box::new(MiniTDFPrecursorReader::new(path)?) }, #[cfg(feature = "tdf")] - Some("tdf") => Box::new(TDFPrecursorReader::new( - self.path.clone(), - self.config.clone(), - )?), - _ => { - return Err(PrecursorReaderError::PrecursorReaderFileError( - self.path.clone(), - )) + TimsTofFileType::TDF => { + Box::new(TDFPrecursorReader::new(path, self.config)?) }, }; let reader = PrecursorReader { precursor_reader }; @@ -106,6 +108,8 @@ pub enum PrecursorReaderError { #[cfg(feature = "tdf")] #[error("{0}")] TDFPrecursorReaderError(#[from] TDFPrecursorReaderError), - #[error("File {0} not valid")] - PrecursorReaderFileError(PathBuf), + #[error("No path provided")] + NoPath, + #[error("{0}")] + TimsTofPathError(#[from] TimsTofPathError), } diff --git a/src/io/readers/precursor_reader/minitdf.rs b/src/io/readers/precursor_reader/minitdf.rs index be11ee0..70c2c25 100644 --- a/src/io/readers/precursor_reader/minitdf.rs +++ b/src/io/readers/precursor_reader/minitdf.rs @@ -1,10 +1,9 @@ -use std::path::Path; - use crate::{ io::readers::file_readers::parquet_reader::{ - precursors::ParquetPrecursor, ParquetError, ReadableParquetTable, + precursors::ParquetPrecursor, ParquetReaderError, ReadableParquetTable, }, ms_data::Precursor, + readers::TimsTofPathLike, }; use super::PrecursorReaderTrait; @@ -16,9 +15,9 @@ pub struct MiniTDFPrecursorReader { impl MiniTDFPrecursorReader { pub fn new( - path: impl AsRef, + path: impl TimsTofPathLike, ) -> Result { - let parquet_precursors = ParquetPrecursor::from_parquet_file(&path)?; + let parquet_precursors = ParquetPrecursor::from_parquet_file(path)?; let reader = Self { parquet_precursors }; Ok(reader) } @@ -46,4 +45,4 @@ impl PrecursorReaderTrait for MiniTDFPrecursorReader { #[derive(thiserror::Error, Debug)] #[error("{0}")] -pub struct MiniTDFPrecursorReaderError(#[from] ParquetError); +pub struct MiniTDFPrecursorReaderError(#[from] ParquetReaderError); diff --git a/src/io/readers/precursor_reader/tdf.rs b/src/io/readers/precursor_reader/tdf.rs index 60d179d..1a37c6d 100644 --- a/src/io/readers/precursor_reader/tdf.rs +++ b/src/io/readers/precursor_reader/tdf.rs @@ -1,17 +1,16 @@ mod dda; mod dia; -use std::path::Path; - use dda::{DDATDFPrecursorReader, DDATDFPrecursorReaderError}; use dia::{DIATDFPrecursorReader, DIATDFPrecursorReaderError}; use crate::{ io::readers::{ - file_readers::sql_reader::{SqlError, SqlReader}, + file_readers::sql_reader::{SqlReader, SqlReaderError}, FrameWindowSplittingConfiguration, }, ms_data::{AcquisitionType, Precursor}, + readers::TimsTofPathLike, }; use super::PrecursorReaderTrait; @@ -22,11 +21,10 @@ pub struct TDFPrecursorReader { impl TDFPrecursorReader { pub fn new( - path: impl AsRef, + path: impl TimsTofPathLike, splitting_strategy: FrameWindowSplittingConfiguration, ) -> Result { - let sql_path = path.as_ref(); - let tdf_sql_reader = SqlReader::open(sql_path)?; + let tdf_sql_reader = SqlReader::open(&path)?; let sql_frames: Vec = tdf_sql_reader.read_column_from_table("ScanMode", "Frames")?; let acquisition_type = if sql_frames.iter().any(|&x| x == 8) { @@ -39,7 +37,7 @@ impl TDFPrecursorReader { let precursor_reader: Box = match acquisition_type { AcquisitionType::DDAPASEF => { - Box::new(DDATDFPrecursorReader::new(path)?) + Box::new(DDATDFPrecursorReader::new(&path)?) }, AcquisitionType::DIAPASEF => Box::new( DIATDFPrecursorReader::new(path, splitting_strategy)?, @@ -70,7 +68,7 @@ impl PrecursorReaderTrait for TDFPrecursorReader { #[derive(Debug, thiserror::Error)] pub enum TDFPrecursorReaderError { #[error("{0}")] - SqlError(#[from] SqlError), + SqlReaderError(#[from] SqlReaderError), #[error("{0}")] DDATDFPrecursorReaderError(#[from] DDATDFPrecursorReaderError), #[error("{0}")] diff --git a/src/io/readers/precursor_reader/tdf/dda.rs b/src/io/readers/precursor_reader/tdf/dda.rs index 80cf641..dc41e9d 100644 --- a/src/io/readers/precursor_reader/tdf/dda.rs +++ b/src/io/readers/precursor_reader/tdf/dda.rs @@ -1,16 +1,16 @@ -use std::path::Path; - use crate::{ domain_converters::{ ConvertableDomain, Frame2RtConverter, Scan2ImConverter, }, io::readers::{ file_readers::sql_reader::{ - precursors::SqlPrecursor, ReadableSqlTable, SqlError, SqlReader, + precursors::SqlPrecursor, ReadableSqlTable, SqlReader, + SqlReaderError, }, MetadataReader, MetadataReaderError, }, ms_data::Precursor, + readers::TimsTofPathLike, }; use super::PrecursorReaderTrait; @@ -24,10 +24,9 @@ pub struct DDATDFPrecursorReader { impl DDATDFPrecursorReader { pub fn new( - path: impl AsRef, + path: impl TimsTofPathLike, ) -> Result { - let sql_path = path.as_ref(); - let tdf_sql_reader = SqlReader::open(sql_path)?; + let tdf_sql_reader = SqlReader::open(&path)?; let metadata = MetadataReader::new(&path)?; let rt_converter: Frame2RtConverter = metadata.rt_converter; let im_converter: Scan2ImConverter = metadata.im_converter; @@ -66,7 +65,7 @@ impl PrecursorReaderTrait for DDATDFPrecursorReader { #[derive(Debug, thiserror::Error)] pub enum DDATDFPrecursorReaderError { #[error("{0}")] - SqlError(#[from] SqlError), + SqlReaderError(#[from] SqlReaderError), #[error("{0}")] MetadataReaderError(#[from] MetadataReaderError), } diff --git a/src/io/readers/precursor_reader/tdf/dia.rs b/src/io/readers/precursor_reader/tdf/dia.rs index 722b80c..e3d0c29 100644 --- a/src/io/readers/precursor_reader/tdf/dia.rs +++ b/src/io/readers/precursor_reader/tdf/dia.rs @@ -1,16 +1,14 @@ -use std::path::Path; - -use crate::io::readers::FrameWindowSplittingConfiguration; use crate::{ domain_converters::{ ConvertableDomain, Frame2RtConverter, Scan2ImConverter, }, io::readers::{ - file_readers::sql_reader::{SqlError, SqlReader}, + file_readers::sql_reader::{SqlReader, SqlReaderError}, MetadataReader, MetadataReaderError, QuadrupoleSettingsReader, QuadrupoleSettingsReaderError, }, ms_data::{Precursor, QuadrupoleSettings}, + readers::{FrameWindowSplittingConfiguration, TimsTofPathLike}, }; use super::PrecursorReaderTrait; @@ -24,11 +22,10 @@ pub struct DIATDFPrecursorReader { impl DIATDFPrecursorReader { pub fn new( - path: impl AsRef, + path: impl TimsTofPathLike, splitting_config: FrameWindowSplittingConfiguration, ) -> Result { - let sql_path = path.as_ref(); - let tdf_sql_reader = SqlReader::open(sql_path)?; + let tdf_sql_reader = SqlReader::open(&path)?; let metadata = MetadataReader::new(&path)?; let rt_converter: Frame2RtConverter = metadata.rt_converter; let im_converter: Scan2ImConverter = metadata.im_converter; @@ -73,7 +70,7 @@ impl PrecursorReaderTrait for DIATDFPrecursorReader { #[derive(Debug, thiserror::Error)] pub enum DIATDFPrecursorReaderError { #[error("{0}")] - SqlError(#[from] SqlError), + SqlReaderError(#[from] SqlReaderError), #[error("{0}")] MetadataReaderError(#[from] MetadataReaderError), #[error("{0}")] diff --git a/src/io/readers/quad_settings_reader.rs b/src/io/readers/quad_settings_reader.rs index 90aa4d3..140905b 100644 --- a/src/io/readers/quad_settings_reader.rs +++ b/src/io/readers/quad_settings_reader.rs @@ -1,6 +1,5 @@ #[cfg(feature = "serialize")] use serde::{Deserialize, Serialize}; -use std::path::Path; use crate::{ domain_converters::{ConvertableDomain, Scan2ImConverter}, @@ -8,9 +7,12 @@ use crate::{ utils::vec_utils::argsort, }; -use super::file_readers::sql_reader::{ - frame_groups::SqlWindowGroup, quad_settings::SqlQuadSettings, - ReadableSqlTable, SqlError, SqlReader, +use super::{ + file_readers::sql_reader::{ + frame_groups::SqlWindowGroup, quad_settings::SqlQuadSettings, + ReadableSqlTable, SqlReader, SqlReaderError, + }, + TimsTofPathLike, }; pub struct QuadrupoleSettingsReader { @@ -21,10 +23,9 @@ pub struct QuadrupoleSettingsReader { impl QuadrupoleSettingsReader { // TODO: refactor due to large size pub fn new( - path: impl AsRef, + path: impl TimsTofPathLike, ) -> Result, QuadrupoleSettingsReaderError> { - let sql_path = path.as_ref(); - let tdf_sql_reader = SqlReader::open(&sql_path)?; + let tdf_sql_reader = SqlReader::open(path)?; Self::from_sql_settings(&tdf_sql_reader) } @@ -123,7 +124,7 @@ impl QuadrupoleSettingsReader { #[derive(Debug, thiserror::Error)] pub enum QuadrupoleSettingsReaderError { #[error("{0}")] - SqlError(#[from] SqlError), + SqlReaderError(#[from] SqlReaderError), } type MobilitySpanStep = (f64, f64); diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index 848e843..ce80658 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -1,42 +1,39 @@ +mod builder; +mod config; +mod errors; #[cfg(feature = "minitdf")] mod minitdf; +mod spectrum_trait; #[cfg(feature = "tdf")] mod tdf; -#[cfg(feature = "minitdf")] -use minitdf::{MiniTDFSpectrumReader, MiniTDFSpectrumReaderError}; -use rayon::prelude::*; -#[cfg(feature = "serialize")] -use serde::{Deserialize, Serialize}; -use std::path::{Path, PathBuf}; -#[cfg(feature = "tdf")] -use tdf::{TDFSpectrumReader, TDFSpectrumReaderError}; +use super::TimsTofPathLike; use crate::ms_data::Spectrum; - -#[cfg(feature = "tdf")] -use super::FrameWindowSplittingConfiguration; +pub use builder::SpectrumReaderBuilder; +pub use config::{SpectrumProcessingParams, SpectrumReaderConfig}; +pub use errors::SpectrumReaderError; +use rayon::prelude::*; +use spectrum_trait::SpectrumReaderTrait; pub struct SpectrumReader { spectrum_reader: Box, } impl SpectrumReader { - pub fn build() -> SpectrumReaderBuilder { - SpectrumReaderBuilder::default() + pub fn new( + path: impl TimsTofPathLike, + ) -> Result { + Self::build().with_path(path).finalize() } - pub fn new(path: impl AsRef) -> Result { - Self::build().with_path(path).finalize() + pub fn build() -> SpectrumReaderBuilder { + SpectrumReaderBuilder::default() } pub fn get(&self, index: usize) -> Result { self.spectrum_reader.get(index) } - pub fn get_path(&self) -> PathBuf { - self.spectrum_reader.get_path() - } - pub fn len(&self) -> usize { self.spectrum_reader.len() } @@ -61,97 +58,3 @@ impl SpectrumReader { self.spectrum_reader.calibrate(); } } - -#[derive(Debug, Default, Clone)] -pub struct SpectrumReaderBuilder { - path: PathBuf, - config: SpectrumReaderConfig, -} - -impl SpectrumReaderBuilder { - pub fn with_path(&self, path: impl AsRef) -> Self { - Self { - path: path.as_ref().to_path_buf(), - ..self.clone() - } - } - - pub fn with_config(&self, config: SpectrumReaderConfig) -> Self { - Self { - config: config, - ..self.clone() - } - } - - pub fn finalize(&self) -> Result { - let spectrum_reader: Box = - match self.path.extension().and_then(|e| e.to_str()) { - #[cfg(feature = "minitdf")] - Some("ms2") => { - Box::new(MiniTDFSpectrumReader::new(self.path.clone())?) - }, - #[cfg(feature = "tdf")] - Some("d") => Box::new(TDFSpectrumReader::new( - self.path.clone(), - self.config.clone(), - )?), - _ => { - return Err(SpectrumReaderError::SpectrumReaderFileError( - self.path.clone(), - )) - }, - }; - let mut reader = SpectrumReader { spectrum_reader }; - if self.config.spectrum_processing_params.calibrate { - reader.calibrate(); - } - Ok(reader) - } -} - -trait SpectrumReaderTrait: Sync + Send { - fn get(&self, index: usize) -> Result; - fn get_path(&self) -> PathBuf; - fn len(&self) -> usize; - fn calibrate(&mut self); -} - -#[derive(Debug, thiserror::Error)] -pub enum SpectrumReaderError { - #[cfg(feature = "minitdf")] - #[error("{0}")] - MiniTDFSpectrumReaderError(#[from] MiniTDFSpectrumReaderError), - #[cfg(feature = "tdf")] - #[error("{0}")] - TDFSpectrumReaderError(#[from] TDFSpectrumReaderError), - #[error("File {0} not valid")] - SpectrumReaderFileError(PathBuf), -} - -#[derive(Debug, Clone, Copy)] -#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))] -pub struct SpectrumProcessingParams { - pub smoothing_window: u32, - pub centroiding_window: u32, - pub calibration_tolerance: f64, - pub calibrate: bool, -} - -impl Default for SpectrumProcessingParams { - fn default() -> Self { - Self { - smoothing_window: 1, - centroiding_window: 1, - calibration_tolerance: 0.1, - calibrate: false, - } - } -} - -#[derive(Debug, Default, Clone, Copy)] -#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))] -pub struct SpectrumReaderConfig { - pub spectrum_processing_params: SpectrumProcessingParams, - #[cfg(feature = "tdf")] - pub frame_splitting_params: FrameWindowSplittingConfiguration, -} diff --git a/src/io/readers/spectrum_reader/builder.rs b/src/io/readers/spectrum_reader/builder.rs new file mode 100644 index 0000000..b191af6 --- /dev/null +++ b/src/io/readers/spectrum_reader/builder.rs @@ -0,0 +1,58 @@ +use crate::readers::{TimsTofFileType, TimsTofPath, TimsTofPathLike}; + +use super::{ + errors::SpectrumReaderError, SpectrumReader, SpectrumReaderConfig, + SpectrumReaderTrait, +}; + +#[cfg(feature = "minitdf")] +use super::minitdf::MiniTDFSpectrumReader; +#[cfg(feature = "tdf")] +use super::tdf::TDFSpectrumReader; + +#[derive(Debug, Default, Clone)] +pub struct SpectrumReaderBuilder { + path: Option, + config: SpectrumReaderConfig, +} + +impl SpectrumReaderBuilder { + pub fn with_path(&self, path: impl TimsTofPathLike) -> Self { + // TODO + let path = Some(path.to_timstof_path().unwrap()); + Self { + path, + ..self.clone() + } + } + + pub fn with_config(&self, config: SpectrumReaderConfig) -> Self { + Self { + config: config, + ..self.clone() + } + } + + pub fn finalize(self) -> Result { + let path = match self.path { + None => return Err(SpectrumReaderError::NoPath), + Some(path) => path, + }; + let spectrum_reader: Box = + match path.file_type() { + #[cfg(feature = "minitdf")] + TimsTofFileType::MiniTDF => { + Box::new(MiniTDFSpectrumReader::new(path)?) + }, + #[cfg(feature = "tdf")] + TimsTofFileType::TDF => { + Box::new(TDFSpectrumReader::new(path, self.config)?) + }, + }; + let mut reader = SpectrumReader { spectrum_reader }; + if self.config.spectrum_processing_params.calibrate { + reader.calibrate(); + } + Ok(reader) + } +} diff --git a/src/io/readers/spectrum_reader/config.rs b/src/io/readers/spectrum_reader/config.rs new file mode 100644 index 0000000..d387b49 --- /dev/null +++ b/src/io/readers/spectrum_reader/config.rs @@ -0,0 +1,33 @@ +#[cfg(feature = "tdf")] +use super::super::FrameWindowSplittingConfiguration; + +#[cfg(feature = "serialize")] +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Copy)] +#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))] +pub struct SpectrumProcessingParams { + pub smoothing_window: u32, + pub centroiding_window: u32, + pub calibration_tolerance: f64, + pub calibrate: bool, +} + +impl Default for SpectrumProcessingParams { + fn default() -> Self { + Self { + smoothing_window: 1, + centroiding_window: 1, + calibration_tolerance: 0.1, + calibrate: false, + } + } +} + +#[derive(Debug, Default, Clone, Copy)] +#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))] +pub struct SpectrumReaderConfig { + pub spectrum_processing_params: SpectrumProcessingParams, + #[cfg(feature = "tdf")] + pub frame_splitting_params: FrameWindowSplittingConfiguration, +} diff --git a/src/io/readers/spectrum_reader/errors.rs b/src/io/readers/spectrum_reader/errors.rs new file mode 100644 index 0000000..1782645 --- /dev/null +++ b/src/io/readers/spectrum_reader/errors.rs @@ -0,0 +1,16 @@ +#[cfg(feature = "minitdf")] +use super::minitdf::MiniTDFSpectrumReaderError; +#[cfg(feature = "tdf")] +use super::tdf::TDFSpectrumReaderError; + +#[derive(Debug, thiserror::Error)] +pub enum SpectrumReaderError { + #[cfg(feature = "minitdf")] + #[error("{0}")] + MiniTDFSpectrumReaderError(#[from] MiniTDFSpectrumReaderError), + #[cfg(feature = "tdf")] + #[error("{0}")] + TDFSpectrumReaderError(#[from] TDFSpectrumReaderError), + #[error("No path provided")] + NoPath, +} diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index 1a6f3f6..00375b3 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -1,10 +1,8 @@ -use std::path::{Path, PathBuf}; - use crate::{ io::readers::{ file_readers::{ parquet_reader::{ - precursors::ParquetPrecursor, ParquetError, + precursors::ParquetPrecursor, ParquetReaderError, ReadableParquetTable, }, tdf_blob_reader::{ @@ -14,14 +12,13 @@ use crate::{ PrecursorReader, PrecursorReaderError, }, ms_data::Spectrum, - utils::find_extension, + readers::TimsTofPathLike, }; use super::{SpectrumReaderError, SpectrumReaderTrait}; #[derive(Debug)] pub struct MiniTDFSpectrumReader { - path: PathBuf, precursor_reader: PrecursorReader, blob_reader: IndexedTdfBlobReader, collision_energies: Vec, @@ -29,32 +26,20 @@ pub struct MiniTDFSpectrumReader { impl MiniTDFSpectrumReader { pub fn new( - path: impl AsRef, + path: impl TimsTofPathLike, ) -> Result { - let parquet_file_name = find_extension(&path, "ms2spectrum.parquet") - .ok_or(MiniTDFSpectrumReaderError::FileNotFound( - "analysis.tdf".to_string(), - ))?; - let precursor_reader = PrecursorReader::build() - .with_path(&parquet_file_name) - .finalize()?; - let offsets = ParquetPrecursor::from_parquet_file(&parquet_file_name)? + let precursor_reader = + PrecursorReader::build().with_path(&path).finalize()?; + let offsets = ParquetPrecursor::from_parquet_file(&path)? .iter() .map(|x| x.offset as usize) .collect(); - let collision_energies = - ParquetPrecursor::from_parquet_file(&parquet_file_name)? - .iter() - .map(|x| x.collision_energy) - .collect(); - let bin_file_name = find_extension(&path, "bin").ok_or( - MiniTDFSpectrumReaderError::FileNotFound( - "analysis.tdf".to_string(), - ), - )?; - let blob_reader = IndexedTdfBlobReader::new(&bin_file_name, offsets)?; + let collision_energies = ParquetPrecursor::from_parquet_file(&path)? + .iter() + .map(|x| x.collision_energy) + .collect(); + let blob_reader = IndexedTdfBlobReader::new(&path, offsets)?; let reader = Self { - path: path.as_ref().to_path_buf(), precursor_reader, blob_reader, collision_energies, @@ -112,10 +97,6 @@ impl SpectrumReaderTrait for MiniTDFSpectrumReader { self.precursor_reader.len() } - fn get_path(&self) -> PathBuf { - self.path.clone() - } - fn calibrate(&mut self) {} } @@ -124,7 +105,7 @@ pub enum MiniTDFSpectrumReaderError { #[error("{0}")] PrecursorReaderError(#[from] PrecursorReaderError), #[error("{0}")] - ParquetError(#[from] ParquetError), + ParquetReaderError(#[from] ParquetReaderError), #[error("{0}")] IndexedTdfBlobReaderError(#[from] IndexedTdfBlobReaderError), #[error("{0}")] diff --git a/src/io/readers/spectrum_reader/spectrum_trait.rs b/src/io/readers/spectrum_reader/spectrum_trait.rs new file mode 100644 index 0000000..c05b8ea --- /dev/null +++ b/src/io/readers/spectrum_reader/spectrum_trait.rs @@ -0,0 +1,9 @@ +use crate::Spectrum; + +use super::errors::SpectrumReaderError; + +pub(crate) trait SpectrumReaderTrait: Sync + Send { + fn get(&self, index: usize) -> Result; + fn len(&self) -> usize; + fn calibrate(&mut self); +} diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index c230040..77af731 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -4,24 +4,22 @@ mod raw_spectra; use raw_spectra::{RawSpectrum, RawSpectrumReader, RawSpectrumReaderError}; use rayon::iter::{IntoParallelIterator, ParallelIterator}; -use std::path::{Path, PathBuf}; use crate::{ domain_converters::{ConvertableDomain, Tof2MzConverter}, io::readers::{ - file_readers::sql_reader::{SqlError, SqlReader}, + file_readers::sql_reader::{SqlReader, SqlReaderError}, FrameReader, FrameReaderError, MetadataReader, MetadataReaderError, PrecursorReader, PrecursorReaderError, }, ms_data::Spectrum, - utils::find_extension, + readers::TimsTofPathLike, }; use super::{SpectrumReaderConfig, SpectrumReaderError, SpectrumReaderTrait}; #[derive(Debug)] pub struct TDFSpectrumReader { - path: PathBuf, precursor_reader: PrecursorReader, mz_reader: Tof2MzConverter, raw_spectrum_reader: RawSpectrumReader, @@ -30,18 +28,15 @@ pub struct TDFSpectrumReader { impl TDFSpectrumReader { pub fn new( - path_name: impl AsRef, + path: impl TimsTofPathLike, config: SpectrumReaderConfig, ) -> Result { - let frame_reader: FrameReader = FrameReader::new(&path_name)?; - let sql_path = find_extension(&path_name, "analysis.tdf").ok_or( - TDFSpectrumReaderError::FileNotFound("analysis.tdf".to_string()), - )?; - let metadata = MetadataReader::new(&sql_path)?; + let frame_reader: FrameReader = FrameReader::new(&path)?; + let metadata = MetadataReader::new(&path)?; let mz_reader: Tof2MzConverter = metadata.mz_converter; - let tdf_sql_reader = SqlReader::open(&sql_path)?; + let tdf_sql_reader = SqlReader::open(&path)?; let precursor_reader = PrecursorReader::build() - .with_path(&sql_path) + .with_path(&path) .with_config(config.frame_splitting_params) .finalize()?; let acquisition_type = frame_reader.get_acquisition(); @@ -55,7 +50,6 @@ impl TDFSpectrumReader { splitting_strategy, )?; let reader = Self { - path: path_name.as_ref().to_path_buf(), precursor_reader, mz_reader, raw_spectrum_reader, @@ -103,10 +97,6 @@ impl SpectrumReaderTrait for TDFSpectrumReader { self.raw_spectrum_reader.len() } - fn get_path(&self) -> PathBuf { - self.path.clone() - } - fn calibrate(&mut self) { let hits: Vec<(f64, u32)> = (0..self.precursor_reader.len()) .into_par_iter() @@ -143,7 +133,7 @@ impl SpectrumReaderTrait for TDFSpectrumReader { #[derive(Debug, thiserror::Error)] pub enum TDFSpectrumReaderError { #[error("{0}")] - SqlError(#[from] SqlError), + SqlReaderError(#[from] SqlReaderError), #[error("{0}")] PrecursorReaderError(#[from] PrecursorReaderError), #[error("{0}")] diff --git a/src/io/readers/spectrum_reader/tdf/dda.rs b/src/io/readers/spectrum_reader/tdf/dda.rs index 2e1e9c5..db8f6c8 100644 --- a/src/io/readers/spectrum_reader/tdf/dda.rs +++ b/src/io/readers/spectrum_reader/tdf/dda.rs @@ -1,8 +1,8 @@ use crate::{ io::readers::{ file_readers::sql_reader::{ - pasef_frame_msms::SqlPasefFrameMsMs, ReadableSqlTable, SqlError, - SqlReader, + pasef_frame_msms::SqlPasefFrameMsMs, ReadableSqlTable, SqlReader, + SqlReaderError, }, FrameReader, FrameReaderError, }, @@ -121,7 +121,7 @@ impl RawSpectrumReaderTrait for DDARawSpectrumReader { #[derive(Debug, thiserror::Error)] pub enum DDARawSpectrumReaderError { #[error("{0}")] - SqlError(#[from] SqlError), + SqlReaderError(#[from] SqlReaderError), #[error("{0}")] FrameReaderError(#[from] FrameReaderError), } diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs index 3dad26c..cc1357d 100644 --- a/src/io/readers/spectrum_reader/tdf/dia.rs +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -2,7 +2,7 @@ use crate::io::readers::quad_settings_reader::FrameWindowSplittingStrategy; use crate::io::readers::FrameReaderError; use crate::{ io::readers::{ - file_readers::sql_reader::{SqlError, SqlReader}, + file_readers::sql_reader::{SqlReader, SqlReaderError}, FrameReader, QuadrupoleSettingsReader, QuadrupoleSettingsReaderError, }, ms_data::QuadrupoleSettings, @@ -83,7 +83,7 @@ impl RawSpectrumReaderTrait for DIARawSpectrumReader { #[derive(Debug, thiserror::Error)] pub enum DIARawSpectrumReaderError { #[error("{0}")] - SqlError(#[from] SqlError), + SqlReaderError(#[from] SqlReaderError), #[error("{0}")] QuadrupoleSettingsReaderError(#[from] QuadrupoleSettingsReaderError), #[error("{0}")] diff --git a/src/io/readers/timstof.rs b/src/io/readers/timstof.rs new file mode 100644 index 0000000..4b8e65a --- /dev/null +++ b/src/io/readers/timstof.rs @@ -0,0 +1,140 @@ +use std::{ + fs, io, + path::{Path, PathBuf}, +}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Copy)] +pub enum TimsTofFileType { + #[cfg(feature = "minitdf")] + MiniTDF, + #[cfg(feature = "tdf")] + TDF, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct TimsTofPath { + path: PathBuf, + file_type: TimsTofFileType, +} + +impl TimsTofPath { + pub fn new(path: impl AsRef) -> Result { + let path = path.as_ref().canonicalize()?; + #[cfg(feature = "tdf")] + if tdf(&path).is_ok() & tdf_bin(&path).is_ok() { + return Ok(Self { + path, + file_type: TimsTofFileType::TDF, + }); + } + #[cfg(feature = "minitdf")] + if ms2_bin(&path).is_ok() & ms2_parquet(&path).is_ok() { + return Ok(Self { + path, + file_type: TimsTofFileType::MiniTDF, + }); + } + match path.parent() { + Some(parent) => match Self::new(parent) { + Ok(result) => Ok(result), + Err(_) => Err(TimsTofPathError::UnknownType(path)), + }, + None => return Err(TimsTofPathError::UnknownType(path)), + } + } + + pub fn tdf(&self) -> Result { + tdf(self) + } + + pub fn tdf_bin(&self) -> Result { + tdf_bin(self) + } + + pub fn ms2_bin(&self) -> Result { + ms2_bin(self) + } + + pub fn ms2_parquet(&self) -> Result { + ms2_parquet(self) + } + + pub fn file_type(&self) -> TimsTofFileType { + self.file_type + } +} + +fn tdf(path: impl AsRef) -> Result { + find_extension(path, "analysis.tdf") +} + +fn tdf_bin(path: impl AsRef) -> Result { + find_extension(path, "analysis.tdf_bin") +} + +fn ms2_bin(path: impl AsRef) -> Result { + // match find_extension(path, "ms2.bin") { + // Ok(result) => Ok(result), + // Err(_) => find_extension(path, "ms2spectrum.bin"), + // } + // find_extension(path, "ms2.bin") + find_extension(path, "ms2spectrum.bin") +} + +fn ms2_parquet(path: impl AsRef) -> Result { + // match find_extension(path, "ms2.parquet") { + // Ok(result) => Ok(result), + // Err(_) => find_extension(path, "ms2spectrum.parquet"), + // } + // find_extension(path, "ms2.parquet") + find_extension(path, "ms2spectrum.parquet") +} + +fn find_extension( + path: impl AsRef, + extension: &str, +) -> Result { + let extension_lower = extension.to_lowercase(); + for entry in fs::read_dir(&path)? { + if let Ok(entry) = entry { + let file_path = entry.path(); + if let Some(file_name) = + file_path.file_name().and_then(|name| name.to_str()) + { + if file_name.to_lowercase().ends_with(&extension_lower) { + return Ok(file_path); + } + } + } + } + Err(TimsTofPathError::Extension( + extension.to_string(), + path.as_ref().to_path_buf(), + )) +} + +impl AsRef for TimsTofPath { + fn as_ref(&self) -> &Path { + &self.path + } +} + +pub trait TimsTofPathLike: AsRef { + fn to_timstof_path(&self) -> Result; +} + +impl> TimsTofPathLike for T { + fn to_timstof_path(&self) -> Result { + TimsTofPath::new(&self) + } +} + +#[derive(Debug, thiserror::Error)] +pub enum TimsTofPathError { + #[error("Extension {0} not found for {1}")] + Extension(String, PathBuf), + #[error("{0}")] + IO(#[from] io::Error), + #[error("No valid type found for {0}")] + UnknownType(PathBuf), +} diff --git a/src/io/writers/mgf.rs b/src/io/writers/mgf.rs index 715a5ed..a44282b 100644 --- a/src/io/writers/mgf.rs +++ b/src/io/writers/mgf.rs @@ -1,8 +1,5 @@ -use std::fs::File; -use std::io::Write; -use std::path::Path; - -use crate::ms_data::Spectrum; +use crate::Spectrum; +use std::{fs::File, io::Write, path::Path}; pub struct MGFWriter; diff --git a/src/ms_data/metadata.rs b/src/ms_data/metadata.rs index 8e78364..06a3ead 100644 --- a/src/ms_data/metadata.rs +++ b/src/ms_data/metadata.rs @@ -1,5 +1,3 @@ -use std::path::PathBuf; - use crate::domain_converters::{ Frame2RtConverter, Scan2ImConverter, Tof2MzConverter, }; @@ -8,7 +6,6 @@ use crate::domain_converters::{ #[derive(Clone, Debug, Default, PartialEq)] pub struct Metadata { - pub path: PathBuf, pub rt_converter: Frame2RtConverter, pub im_converter: Scan2ImConverter, pub mz_converter: Tof2MzConverter, diff --git a/src/utils.rs b/src/utils.rs index 7021ffd..9aebe98 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,26 +1 @@ -use std::{ - fs, - path::{Path, PathBuf}, -}; - pub mod vec_utils; - -pub fn find_extension( - path: impl AsRef, - extension: &str, -) -> Option { - let extension_lower = extension.to_lowercase(); - for entry in fs::read_dir(&path).ok()? { - if let Ok(entry) = entry { - let file_path = entry.path(); - if let Some(file_name) = - file_path.file_name().and_then(|name| name.to_str()) - { - if file_name.to_lowercase().ends_with(&extension_lower) { - return Some(file_path); - } - } - } - } - None -}