Skip to content

Commit

Permalink
Add cell hashmap
Browse files Browse the repository at this point in the history
  • Loading branch information
timoast committed Jul 4, 2024
1 parent 7e3116c commit 1ffbf5d
Showing 1 changed file with 48 additions and 12 deletions.
60 changes: 48 additions & 12 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@ use std::{
path::Path,
error::Error,
fs::File,
io::Write,
io::BufReader,
io::BufRead,
collections::HashMap,
};
use clap::{
Arg,
Expand Down Expand Up @@ -41,7 +44,8 @@ fn main() -> Result<(), Box<dyn Error>> {
Arg::new("cells")
.short('c')
.long("cells")
.help("List of cells to include"),
.help("List of cells to include")
.required(true),
)
.get_matches();

Expand All @@ -55,35 +59,54 @@ fn main() -> Result<(), Box<dyn Error>> {
.expect("Can't find path to input BED file");
info!("Received BED file: {:?}", bed_file);

let cell_file = matches.get_one::<String>("cells").map(|cell_path| {
Path::new(cell_path)
.canonicalize()
.expect("Can't find path to input cell file")
});
if let Some(ref path) = cell_file {
info!("Received cell file: {:?}", path);
}
let cell_file = Path::new(matches.get_one::<String>("cells").unwrap())
.canonicalize()
.expect("Can't find path to input cell file");
info!("Received cell file: {:?}", cell_file);

fcount(&frag_file, &bed_file, cell_file.as_deref())?;
fcount(&frag_file, &bed_file, &cell_file)?;

Ok(())
}

// TODO add stdout for mtx row,column,value entries
fn fcount(
frag_file: &Path,
bed_file: &Path,
cell_file: Option<&Path>,
cell_file: &Path,
) -> Result<(), Box<dyn Error>> {
info!(
"Processing fragment file: {:?}, BED file: {:?}, Cell file: {:?}",
frag_file, bed_file, cell_file
);

// printing to stdout. TODO: change to file output
let mut stdout = std::io::stdout().lock();
writeln!(stdout, "%%MatrixMarket matrix coordinate integer general").unwrap();

// tabix file reader
let mut tbxreader = tabix::io::indexed_reader::Builder::default().build_from_path(frag_file)?;

// bed file reader
let mut bedreader = File::open(bed_file)
.map(BufReader::new)
.map(bed::io::Reader::new)?;

// create hashmap for cell barcodes
let cellreader = File::open(cell_file)
.map(BufReader::new)?;
let mut cells = HashMap::new();
for (index, line) in cellreader.lines().enumerate() {
let line = line?;
cells.insert(line.clone(), index);
// writeln!(stdout, "{}", line).unwrap();
}

// create region index variable for matrix row index
let mut bed_idx = 1;

// TODO
// might be better to output to a directory rather than stdout as we need the features and cells txt files as well, and to add the matrix dimension to the header

for result in bedreader.records::<3>() {

Expand All @@ -93,8 +116,21 @@ fn fcount(
let query = tbxreader.query(&region)?;
for entry in query {
let frag_entry = entry?;
println!("{}", frag_entry.as_ref());

// TODO match cell barcodes in hashmap, create vector

writeln!(stdout, "{}", frag_entry.as_ref()).unwrap(); // placeholder
}

// - match cell barcodes in entry to cell barcodes in list
// - convert cell barcode to list index
// - count fragments per cell barcode
// - append row,column,value to mtx file
// - increment region index
bed_idx = bed_idx + 1;

// need to track number of nonzero entries (total lines written) and number of bed entries, add this to mtx header at the end?

}
Ok(())
}

0 comments on commit 1ffbf5d

Please sign in to comment.