Skip to content

Commit

Permalink
FEAT: implemented error propagation for spectrum readers
Browse files Browse the repository at this point in the history
  • Loading branch information
sander-willems-bruker committed Jul 17, 2024
1 parent c6f96de commit ea8be45
Show file tree
Hide file tree
Showing 8 changed files with 157 additions and 62 deletions.
6 changes: 3 additions & 3 deletions benches/speed_performance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ fn criterion_benchmark_dda(c: &mut Criterion) {
group.significance_level(0.001).sample_size(10);
let d_folder_name: &str = DDA_TEST;
let frame_reader = FrameReader::new(d_folder_name).unwrap();
let spectrum_reader = SpectrumReader::new(d_folder_name);
let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap();
group.bench_function("DDA read_all_frames 6m", |b| {
b.iter(|| read_all_frames(black_box(&frame_reader)))
});
Expand All @@ -56,7 +56,7 @@ fn criterion_benchmark_dia(c: &mut Criterion) {
group.significance_level(0.001).sample_size(10);
let d_folder_name: &str = DIA_TEST;
let frame_reader = FrameReader::new(d_folder_name).unwrap();
let spectrum_reader = SpectrumReader::new(d_folder_name);
let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap();
group.bench_function("DIA read_all_frames 6m", |b| {
b.iter(|| read_all_frames(black_box(&frame_reader)))
});
Expand All @@ -75,7 +75,7 @@ fn criterion_benchmark_syp(c: &mut Criterion) {
group.significance_level(0.001).sample_size(10);
let d_folder_name: &str = SYP_TEST;
let frame_reader = FrameReader::new(d_folder_name).unwrap();
let spectrum_reader = SpectrumReader::new(d_folder_name);
let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap();
group.bench_function("SYP read_all_frames 6m", |b| {
b.iter(|| read_all_frames(black_box(&frame_reader)))
});
Expand Down
21 changes: 15 additions & 6 deletions src/io/readers/spectrum_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ mod minitdf;
mod tdf;

use core::fmt;
use minitdf::MiniTDFSpectrumReader;
use minitdf::{MiniTDFSpectrumReader, MiniTDFSpectrumReaderError};
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use std::path::{Path, PathBuf};
use tdf::TDFSpectrumReader;
use tdf::{TDFSpectrumReader, TDFSpectrumReaderError};

use crate::ms_data::Spectrum;

Expand All @@ -20,14 +20,15 @@ impl fmt::Debug for SpectrumReader {
}

impl SpectrumReader {
pub fn new(path: impl AsRef<Path>) -> Self {
pub fn new(path: impl AsRef<Path>) -> Result<Self, SpectrumReaderError> {
let spectrum_reader: Box<dyn SpectrumReaderTrait> =
match path.as_ref().extension().and_then(|e| e.to_str()) {
Some("ms2") => Box::new(MiniTDFSpectrumReader::new(path)),
Some("d") => Box::new(TDFSpectrumReader::new(path)),
Some("ms2") => Box::new(MiniTDFSpectrumReader::new(path)?),
Some("d") => Box::new(TDFSpectrumReader::new(path)?),
_ => panic!(),
};
Self { spectrum_reader }
let reader = Self { spectrum_reader };
Ok(reader)
}

pub fn get(&self, index: usize) -> Spectrum {
Expand Down Expand Up @@ -62,3 +63,11 @@ trait SpectrumReaderTrait: Sync {
fn len(&self) -> usize;
fn calibrate(&mut self);
}

#[derive(Debug, thiserror::Error)]
pub enum SpectrumReaderError {
#[error("{0}")]
MiniTDFSpectrumReaderError(#[from] MiniTDFSpectrumReaderError),
#[error("{0}")]
TDFSpectrumReaderError(#[from] TDFSpectrumReaderError),
}
57 changes: 40 additions & 17 deletions src/io/readers/spectrum_reader/minitdf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@ use crate::{
io::readers::{
file_readers::{
parquet_reader::{
precursors::ParquetPrecursor, ReadableParquetTable,
precursors::ParquetPrecursor, ParquetError,
ReadableParquetTable,
},
sql_reader::SqlError,
tdf_blob_reader::{
IndexedTdfBlobReader, IndexedTdfBlobReaderError,
},
tdf_blob_reader::IndexedTdfBlobReader,
},
PrecursorReader,
PrecursorReader, PrecursorReaderError,
},
ms_data::Spectrum,
utils::find_extension,
Expand All @@ -25,31 +29,36 @@ pub struct MiniTDFSpectrumReader {
}

impl MiniTDFSpectrumReader {
pub fn new(path: impl AsRef<Path>) -> Self {
let parquet_file_name =
find_extension(&path, "ms2spectrum.parquet").unwrap();
let precursor_reader =
PrecursorReader::new(&parquet_file_name).unwrap();
let offsets = ParquetPrecursor::from_parquet_file(&parquet_file_name)
.unwrap()
pub fn new(
path: impl AsRef<Path>,
) -> Result<Self, MiniTDFSpectrumReaderError> {
let parquet_file_name = find_extension(&path, "ms2spectrum.parquet")
.ok_or(MiniTDFSpectrumReaderError::FileNotFound(
"analysis.tdf".to_string(),
))?;
let precursor_reader = PrecursorReader::new(&parquet_file_name)?;
let offsets = ParquetPrecursor::from_parquet_file(&parquet_file_name)?
.iter()
.map(|x| x.offset as usize)
.collect();
let collision_energies =
ParquetPrecursor::from_parquet_file(&parquet_file_name)
.unwrap()
ParquetPrecursor::from_parquet_file(&parquet_file_name)?
.iter()
.map(|x| x.collision_energy)
.collect();
let bin_file_name = find_extension(&path, "bin").unwrap();
let blob_reader =
IndexedTdfBlobReader::new(&bin_file_name, offsets).unwrap();
Self {
let bin_file_name = find_extension(&path, "bin").ok_or(
MiniTDFSpectrumReaderError::FileNotFound(
"analysis.tdf".to_string(),
),
)?;
let blob_reader = IndexedTdfBlobReader::new(&bin_file_name, offsets)?;
let reader = Self {
path: path.as_ref().to_path_buf(),
precursor_reader,
blob_reader,
collision_energies,
}
};
Ok(reader)
}
}

Expand Down Expand Up @@ -100,3 +109,17 @@ impl SpectrumReaderTrait for MiniTDFSpectrumReader {

fn calibrate(&mut self) {}
}

#[derive(Debug, thiserror::Error)]
pub enum MiniTDFSpectrumReaderError {
#[error("{0}")]
SqlError(#[from] SqlError),
#[error("{0}")]
PrecursorReaderError(#[from] PrecursorReaderError),
#[error("{0}")]
ParquetError(#[from] ParquetError),
#[error("{0}")]
IndexedTdfBlobReaderError(#[from] IndexedTdfBlobReaderError),
#[error("{0}")]
FileNotFound(String),
}
46 changes: 34 additions & 12 deletions src/io/readers/spectrum_reader/tdf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@ mod dda;
mod dia;
mod raw_spectra;

use raw_spectra::{RawSpectrum, RawSpectrumReader};
use raw_spectra::{RawSpectrum, RawSpectrumReader, RawSpectrumReaderError};
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use std::path::{Path, PathBuf};

use crate::{
domain_converters::{ConvertableDomain, Tof2MzConverter},
io::readers::{
file_readers::sql_reader::SqlReader, FrameReader, MetadataReader,
PrecursorReader,
file_readers::sql_reader::{SqlError, SqlReader},
FrameReader, FrameReaderError, MetadataReader, MetadataReaderError,
PrecursorReader, PrecursorReaderError,
},
ms_data::Spectrum,
utils::find_extension,
Expand All @@ -31,25 +32,30 @@ pub struct TDFSpectrumReader {
}

impl TDFSpectrumReader {
pub fn new(path_name: impl AsRef<Path>) -> Self {
let frame_reader: FrameReader = FrameReader::new(&path_name).unwrap();
let sql_path = find_extension(&path_name, "analysis.tdf").unwrap();
let metadata = MetadataReader::new(&sql_path).unwrap();
pub fn new(
path_name: impl AsRef<Path>,
) -> Result<Self, TDFSpectrumReaderError> {
let frame_reader: FrameReader = FrameReader::new(&path_name)?;
let sql_path = find_extension(&path_name, "analysis.tdf").ok_or(
TDFSpectrumReaderError::FileNotFound("analysis.tdf".to_string()),
)?;
let metadata = MetadataReader::new(&sql_path)?;
let mz_reader: Tof2MzConverter = metadata.mz_converter;
let tdf_sql_reader = SqlReader::open(&sql_path).unwrap();
let precursor_reader = PrecursorReader::new(&sql_path).unwrap();
let tdf_sql_reader = SqlReader::open(&sql_path)?;
let precursor_reader = PrecursorReader::new(&sql_path)?;
let acquisition_type = frame_reader.get_acquisition();
let raw_spectrum_reader = RawSpectrumReader::new(
&tdf_sql_reader,
frame_reader,
acquisition_type,
);
Self {
)?;
let reader = Self {
path: path_name.as_ref().to_path_buf(),
precursor_reader,
mz_reader,
raw_spectrum_reader,
}
};
Ok(reader)
}

pub fn read_single_raw_spectrum(&self, index: usize) -> RawSpectrum {
Expand Down Expand Up @@ -104,3 +110,19 @@ impl SpectrumReaderTrait for TDFSpectrumReader {
}
}
}

#[derive(Debug, thiserror::Error)]
pub enum TDFSpectrumReaderError {
#[error("{0}")]
SqlError(#[from] SqlError),
#[error("{0}")]
PrecursorReaderError(#[from] PrecursorReaderError),
#[error("{0}")]
MetadaReaderError(#[from] MetadataReaderError),
#[error("{0}")]
FrameReaderError(#[from] FrameReaderError),
#[error("{0}")]
RawSpectrumReaderError(#[from] RawSpectrumReaderError),
#[error("{0}")]
FileNotFound(String),
}
24 changes: 17 additions & 7 deletions src/io/readers/spectrum_reader/tdf/dda.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use crate::{
io::readers::{
file_readers::sql_reader::{
pasef_frame_msms::SqlPasefFrameMsMs, ReadableSqlTable, SqlReader,
pasef_frame_msms::SqlPasefFrameMsMs, ReadableSqlTable, SqlError,
SqlReader,
},
FrameReader,
},
Expand All @@ -19,13 +20,15 @@ pub struct DDARawSpectrumReader {
}

impl DDARawSpectrumReader {
pub fn new(tdf_sql_reader: &SqlReader, frame_reader: FrameReader) -> Self {
let pasef_frames =
SqlPasefFrameMsMs::from_sql_reader(&tdf_sql_reader).unwrap();
pub fn new(
tdf_sql_reader: &SqlReader,
frame_reader: FrameReader,
) -> Result<Self, DDARawSpectrumReaderError> {
let pasef_frames = SqlPasefFrameMsMs::from_sql_reader(&tdf_sql_reader)?;
let pasef_precursors =
&pasef_frames.iter().map(|x| x.precursor).collect();
let order: Vec<usize> = argsort(&pasef_precursors);
let max_precursor = pasef_precursors.iter().max().unwrap();
let max_precursor = pasef_precursors.iter().max().unwrap(); // SqlReader cannot return empty vecs, so always succeeds
let mut offsets: Vec<usize> = Vec::with_capacity(max_precursor + 1);
offsets.push(0);
for (offset, &index) in order.iter().enumerate().take(order.len() - 1) {
Expand All @@ -35,12 +38,13 @@ impl DDARawSpectrumReader {
}
}
offsets.push(order.len());
Self {
let reader = Self {
order,
offsets,
pasef_frames,
frame_reader,
}
};
Ok(reader)
}

pub fn iterate_over_pasef_frames(
Expand Down Expand Up @@ -97,3 +101,9 @@ impl RawSpectrumReaderTrait for DDARawSpectrumReader {
raw_spectrum
}
}

#[derive(Debug, thiserror::Error)]
pub enum DDARawSpectrumReaderError {
#[error("{0}")]
SqlError(#[from] SqlError),
}
27 changes: 19 additions & 8 deletions src/io/readers/spectrum_reader/tdf/dia.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use crate::{
io::readers::{
file_readers::sql_reader::{
frame_groups::SqlWindowGroup, ReadableSqlTable, SqlReader,
frame_groups::SqlWindowGroup, ReadableSqlTable, SqlError, SqlReader,
},
FrameReader, QuadrupoleSettingsReader,
FrameReader, QuadrupoleSettingsReader, QuadrupoleSettingsReaderError,
},
ms_data::QuadrupoleSettings,
utils::vec_utils::group_and_sum,
Expand All @@ -18,11 +18,13 @@ pub struct DIARawSpectrumReader {
}

impl DIARawSpectrumReader {
pub fn new(tdf_sql_reader: &SqlReader, frame_reader: FrameReader) -> Self {
let window_groups =
SqlWindowGroup::from_sql_reader(&tdf_sql_reader).unwrap();
pub fn new(
tdf_sql_reader: &SqlReader,
frame_reader: FrameReader,
) -> Result<Self, DIARawSpectrumReaderError> {
let window_groups = SqlWindowGroup::from_sql_reader(&tdf_sql_reader)?;
let quadrupole_settings =
QuadrupoleSettingsReader::new(&tdf_sql_reader.get_path()).unwrap();
QuadrupoleSettingsReader::new(&tdf_sql_reader.get_path())?;
let mut expanded_quadrupole_settings: Vec<QuadrupoleSettings> = vec![];
for window_group in window_groups {
let window = window_group.window_group;
Expand All @@ -40,10 +42,11 @@ impl DIARawSpectrumReader {
expanded_quadrupole_settings.push(sub_quad_settings)
}
}
Self {
let reader = Self {
expanded_quadrupole_settings,
frame_reader,
}
};
Ok(reader)
}
}

Expand Down Expand Up @@ -76,3 +79,11 @@ impl RawSpectrumReaderTrait for DIARawSpectrumReader {
raw_spectrum
}
}

#[derive(Debug, thiserror::Error)]
pub enum DIARawSpectrumReaderError {
#[error("{0}")]
SqlError(#[from] SqlError),
#[error("{0}")]
QuadrupoleSettingsReaderError(#[from] QuadrupoleSettingsReaderError),
}
Loading

0 comments on commit ea8be45

Please sign in to comment.