Skip to content

Commit

Permalink
working info expressions
Browse files Browse the repository at this point in the history
  • Loading branch information
brentp committed Apr 23, 2024
1 parent bb90ddc commit e248911
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 26 deletions.
29 changes: 21 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,22 @@ vcfexpr filter \
input.vcf
```

---

add a new info field (`af_copy`) and set it.
```
$ cat pre.lua
header:add_info({ID="af_copy", Number=1, Description="adding a single field", Type="Float"})
```
then run with:
```
vcfexpr filter -p pre.lua -e 'return variant:format("AD")[1][2] > 0' \
-s 'af_copy=return variant:info("AF", 0)' \
input.vcf > output.vcf
```



# Attributes / Functions

```lua
Expand Down Expand Up @@ -85,6 +101,10 @@ allele.allele -> integer e.g. 0 for "0" allele

header.samples (set/get) -> vec<string> -- TODO: allow setting samples before iteration.

-- these header:add_* are available only in the prelude. currently only Number=1 is supported.
header:add_info({Type="Integer", Number=1, Description="asdf", ID="new field"})
header:add_format({Type="Integer", Number=1, Description="xyz", ID="new format field"})


sample = variant:sample("NA12878")
sample.DP -- any fields in the row are available. special case for GT. use pprint to see structure:
Expand Down Expand Up @@ -134,14 +154,7 @@ Options:

# TODO

+ set fields with --info-set '$name|$lua_expression`. for example:
```
-n 'popmax_AF|math.max(variant:info("AF_afr"), variant:info("AF_ami"), \
variant:info("AF_amr"), variant:info("AF_ami"), variant:info("AF_eas"), variant:info("AF_nfe"), variant:info("AF_sas"))'
```
which would create a new field `popmax_AF=$expression` at each row (note that this would need to handle missing values)
this would also use `header:add_info({Type="Float", Number=1, Description="max of subset of pops", ID="popmax_AF"})`

+ Currently --info-expressions can only be used when output is VCF. Update to support template output as well. So we need the header to translate.
+ add a functional lib such as [Moses](https://github.com/Yonaba/Moses) or [Lume](https://github.com/rxi/lume) which have `map`/`filter` and other functions.
(The user can add these on their own with `--lua`).
+ write a class to simplify accessing CSQ fields.
29 changes: 17 additions & 12 deletions src/header.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,13 @@ fn handle_hash_get<'a>(
) -> Result<&'a str, mlua::Error> {
match tbl.get(key) {
Some(x) => Ok(x),
None => {
Err(mlua::Error::ExternalError(Arc::new(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
format!(
"must specify {} in the argument to add_{}. got {:?}",
key, func, tbl
),
))))
}
None => Err(mlua::Error::ExternalError(Arc::new(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
format!(
"must specify {} in the argument to add_{}. got {:?}",
key, func, tbl
),
)))),
}
}

Expand Down Expand Up @@ -80,7 +78,7 @@ pub(crate) fn register_header(lua: &Lua) -> mlua::Result<()> {
|_lua, (ud, tbl): (AnyUserData, HashMap<String, String>)| {
let this = ud.borrow_mut::<HeaderView>()?;
let c_str = std::ffi::CString::new(format!(
r#"##INFO=<ID={},Number={},Type={},Description="{}">"#,
r#"##INFO=<ID={},Number={},Type={},Description={}>"#,
handle_hash_get(&tbl, "ID", "info")?,
handle_hash_get(&tbl, "Number", "info")?,
handle_hash_get(&tbl, "Type", "info")?,
Expand All @@ -95,7 +93,14 @@ pub(crate) fn register_header(lua: &Lua) -> mlua::Result<()> {
std::io::Error::last_os_error(),
)));
}
_ = unsafe { rust_htslib::htslib::bcf_hdr_sync(this.inner) };
let ret = unsafe { rust_htslib::htslib::bcf_hdr_sync(this.inner) };
if ret != 0 {
log::warn!(
"Error syncing header after adding INFO field for {:?}: {}",
tbl,
ret
);
}
Ok(())
},
);
Expand Down Expand Up @@ -199,7 +204,7 @@ mod tests {
scope.create_any_userdata_ref_mut(&mut header_view)?,
)?;
let result: String = exp.call(())?;
let expected = "##fileformat=VCFv4.2\n##FILTER=<ID=PASS,Description=\"All filters passed\">\n##contig=<ID=chr1,length=10000>\n##INFO=<ID=TEST,Number=1,Type=Integer,Description=\"Test field\">\n#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tSample1\tSample2\n";
let expected = "##fileformat=VCFv4.2\n##FILTER=<ID=PASS,Description=\"All filters passed\">\n##contig=<ID=chr1,length=10000>\n##INFO=<ID=TEST,Number=1,Type=Integer,Description=Test field>\n#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tSample1\tSample2\n";
assert_eq!(
result,
expected,
Expand Down
9 changes: 3 additions & 6 deletions src/vcfexpr.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use log::debug;
use mlua::Lua;
use rust_htslib::bcf::{
self,
Expand Down Expand Up @@ -173,8 +172,6 @@ impl<'lua> VCFExpr<'lua> {
let mut wtr =
bcf::Writer::from_path(&output, &header, !output.ends_with(".gz"), format)?;
_ = wtr.set_threads(2);
//let header_t = unsafe { rust_htslib::htslib::bcf_hdr_dup(reader.header().inner) };
//hv = bcf::header::HeaderView::new(header_t);
wtr
} else {
bcf::Writer::from_stdout(&header, true, bcf::Format::Vcf)?
Expand Down Expand Up @@ -216,7 +213,7 @@ impl<'lua> VCFExpr<'lua> {
.expect("invalid info expression should have name=$expression");
let t = hv
.info_type(name_exp.0.as_bytes())
.unwrap_or_else(|_| panic!("info field {} not found", name_exp.0));
.unwrap_or_else(|_| panic!("ERROR: info field '{}' not found. Make sure it was added to the header in prelude if needed.", name_exp.0));
(
InfoFormat::Info(name_exp.0.to_string()),
(
Expand Down Expand Up @@ -330,7 +327,7 @@ impl<'lua> VCFExpr<'lua> {
let mut record = variant.take();
for (stag, value) in info_results {
let tag = stag.as_bytes();
debug!("Setting info field: {}: {:?}", stag, value);
//debug!("Setting info field: {}: {:?}", stag, value);
let result = match value {
InfoFormatValue::Bool(b) => {
if b {
Expand All @@ -339,7 +336,7 @@ impl<'lua> VCFExpr<'lua> {
record.clear_info_flag(tag)
}
}
InfoFormatValue::Float(f) => record.push_info_float(tag, &[f]),
InfoFormatValue::Float(f) => record.push_info_float(b"af_copy", &[f]),
InfoFormatValue::Integer(i) => record.push_info_integer(tag, &[i]),
InfoFormatValue::String(s) => record.push_info_string(tag, &[s.as_bytes()]),
};
Expand Down

0 comments on commit e248911

Please sign in to comment.