Skip to content

Commit

Permalink
paralell and memory usage reduction #277
Browse files Browse the repository at this point in the history
  • Loading branch information
Zhuoqing Fang authored and Zhuoqing Fang committed Feb 10, 2025
1 parent efb1ad9 commit 861ca8d
Showing 1 changed file with 17 additions and 13 deletions.
30 changes: 17 additions & 13 deletions src/gsva.rs
Original file line number Diff line number Diff line change
Expand Up @@ -322,21 +322,25 @@ pub fn gsva(
// Process samples in parallel without transposing
let (mat_score, sort_idxs) = es.compute_rank_score2(&es.compute_density(&gene_expr));

// Process chunks of samples in parallel to control memory usage
let chunk_size = (n_samples / rayon::current_num_threads()).max(1);

// Parallel KS score calculation
let summaries: Vec<_> = gene_set_hits
.par_iter()
.flat_map(|(term, hits)| {
let hits: Vec<usize> = hits.iter().copied().map(|&i| i).collect();
es.ks_matrix(&mat_score, &sort_idxs, &hits)
.into_par_iter()
.enumerate()
.map(|(i, es_val)| GSEASummary {
term: term.clone(),
es: es_val,
index: Some(i),
..Default::default()
})
.collect::<Vec<_>>()
.par_chunks(chunk_size) // Process gene sets in chunks of 100
.flat_map_iter(|chunk| {
chunk.iter().flat_map(|(term, hits)| {
let hits: Vec<usize> = hits.iter().copied().map(|&i| i).collect();
es.ks_matrix(&mat_score, &sort_idxs, &hits)
.into_iter()
.enumerate()
.map(move |(i, es_val)| GSEASummary {
term: term.clone(),
es: es_val,
index: Some(i),
..Default::default()
})
})
})
.collect();

Expand Down

0 comments on commit 861ca8d

Please sign in to comment.