-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat!: store URIs for ingested VCFs #15
Changes from 9 commits
0522d61
6984c17
3b34904
d031425
cbbd8e5
e22b619
cf6feb0
6ab396e
24a2de2
4959f14
0a4585d
aaf638a
d727cc6
92835d9
3a99421
cb74966
003734e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,12 +19,14 @@ use tokio::{ | |
|
||
async fn load_allele(db_row: DbRow, pool: &SqlitePool) -> Result<(), Box<dyn std::error::Error>> { | ||
let mut conn = pool.acquire().await?; | ||
let result = sqlx::query("INSERT INTO vrs_locations (vrs_id, chr, pos) VALUES (?, ?, ?)") | ||
.bind(db_row.vrs_id) | ||
.bind(db_row.chr) | ||
.bind(db_row.pos) | ||
.execute(&mut *conn) | ||
.await; | ||
let result = | ||
sqlx::query("INSERT INTO vrs_locations (vrs_id, chr, pos, uri_id) VALUES (?, ?, ?, ?);") | ||
.bind(db_row.vrs_id) | ||
.bind(db_row.chr) | ||
.bind(db_row.pos) | ||
.bind(db_row.uri_id) | ||
.execute(&mut *conn) | ||
.await; | ||
if let Err(err) = result { | ||
if let Some(db_error) = err.as_database_error() { | ||
if let Some(sqlite_error) = db_error.try_downcast_ref::<SqliteError>() { | ||
|
@@ -93,7 +95,25 @@ async fn get_reader( | |
} | ||
} | ||
|
||
pub async fn load_vcf(vcf_path: PathBuf, db_url: &str) -> PyResult<()> { | ||
async fn load_file_uri(uri: &str, pool: &SqlitePool) -> Result<i64, Box<dyn std::error::Error>> { | ||
let mut conn = pool.acquire().await?; | ||
|
||
let insert_result = sqlx::query("INSERT OR IGNORE INTO file_uris (uri) VALUES (?);") | ||
.bind(uri) | ||
.execute(&mut *conn) | ||
.await?; | ||
if insert_result.rows_affected() > 0 { | ||
Ok(insert_result.last_insert_rowid()) | ||
} else { | ||
let row_id: (i64,) = sqlx::query_as("SELECT id FROM file_uris WHERE uri = ?;") | ||
.bind(uri) | ||
.fetch_one(&mut *conn) | ||
.await?; | ||
Ok(row_id.0) | ||
} | ||
} | ||
|
||
pub async fn load_vcf(vcf_path: PathBuf, db_url: &str, uri: String) -> PyResult<()> { | ||
let start = Instant::now(); | ||
|
||
if !vcf_path.exists() || !vcf_path.is_file() { | ||
|
@@ -118,6 +138,10 @@ pub async fn load_vcf(vcf_path: PathBuf, db_url: &str) -> PyResult<()> { | |
VrsixDbError::new_err(format!("Failed database connection/call: {}", e)) | ||
})?; | ||
|
||
let uri_id = load_file_uri(&uri, &db_pool) | ||
.await | ||
.map_err(|e| VrsixDbError::new_err(format!("Failed to insert file URI `{uri}`: {e}")))?; | ||
|
||
while let Some(record) = records.try_next().await? { | ||
let vrs_ids = get_vrs_ids(record.info(), &header)?; | ||
let chrom = record.reference_sequence_name(); | ||
|
@@ -131,6 +155,7 @@ pub async fn load_vcf(vcf_path: PathBuf, db_url: &str) -> PyResult<()> { | |
.to_string(), | ||
chr: chrom.strip_prefix("chr").unwrap_or(chrom).to_string(), | ||
pos: pos.try_into().unwrap(), | ||
uri_id, | ||
}; | ||
load_allele(row, &db_pool).await.map_err(|e| { | ||
error!("Failed to load row {:?}", e); | ||
|
@@ -143,3 +168,19 @@ pub async fn load_vcf(vcf_path: PathBuf, db_url: &str) -> PyResult<()> { | |
info!("Time taken: {:?}", duration); | ||
Ok(()) | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
use tempfile::NamedTempFile; | ||
|
||
#[tokio::test] | ||
async fn test_load_file_uri() { | ||
let temp_file = NamedTempFile::new().expect("Failed to create temp file"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. help me out here, so what is this file testing? I'm unfamiliar with Rust so not sure how the db file is being created There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, good q. I think I just wanted to get a minimal test happening inside the Rust code, because failures that happen on the Rust side are often fairly opaque in the context of the Python integration tests (if it's not something you're explicitly looking for, it just looks like a panic without much detail). This test case literally just loads a single entry into the I'd originally just written end to end tests from the Python side to make sure things work the whole way through, but if/as this grows in complexity, it's probably smarter to focus more testing within the Rust code because it's much more granular/interpretable. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. gotcha and for my understanding, how is the sqlite:/// interpreted? I was expecting to see an file exntension like .sqlite on the filename for example. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's just there as a scheme declaration to indicate a sqlite file, I guess. I think it's an expectation imposed by the DB client library. Just checked and this test fails if you change it. I don't know if there's really an accepted file extension for sqlite files (i just googled it and this stackoverflow answer is very unsatisfying). It's definitely not something we try to validate anywhere, although we could if it seems worthwhile. |
||
let db_url = format!("sqlite://{}", temp_file.path().to_str().unwrap()); | ||
crate::sqlite::setup_db(&db_url).await.unwrap(); | ||
let db_pool = get_db_connection(&db_url).await.unwrap(); | ||
let uri_id = load_file_uri("file:///sdlfkjd", &db_pool).await.unwrap(); | ||
assert!(uri_id == 1); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
possibly add it as more declarative:
what was the reasoning behind adding this as an argument vs a flag? Ig it's trivial in terms of functionality but just curious
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I didn't feel super strongly about it... I think it felt a little bit more ergonomic to do it this way, but now that you mention it I'm kind of torn
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since there's not much else needed in terms of flag, I'm good passing in as an optional argument for now.