Skip to content

Commit

Permalink
cleanup csq stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
brentp committed Jun 9, 2024
1 parent d96f82b commit e77339f
Show file tree
Hide file tree
Showing 7 changed files with 92 additions and 82 deletions.
69 changes: 69 additions & 0 deletions scripts/csq.lua
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,72 @@ print(c.Allele)
print(c.AF)
--]]

function parse_description(description)
-- Remove quotes and spaces
description = description:gsub('"', ''):gsub('%s+', '')

-- Determine the correct split point
local format_str = nil
if description:find('Format:') then
format_str = description:match('Format:(.+)')
else
format_str = description:match(':\'(.+)')
end

if format_str then
-- Split by | delimiter and store in a table
local result = {}
for value in string.gmatch(format_str, '([^|]+)') do
-- Remove any trailing characters like ')' or ']'
value = value:gsub('[%)>\'%]%[]', '')
-- test if value startswith 'Effect(' and remove it if that's the case.
if value:find('Effect%(') then
value = value:gsub('^\'?Effect%(', '')
end

table.insert(result, value)
end
return result
else
return nil, "Format part not found in description."
end
end

--[[
-- Example usage
local input1 =
'#INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence type as predicted by VEP. Format: Consequence|Codons|Amino_acids|Gene|SYMBOL|Feature|EXON|PolyPhen|SIFT|Protein_position|BIOTYPE">'
local input2 =
'##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: \'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon [ | ERRORS | WARNINGS ] )\'">'
local input3 =
'##INFO=<ID=ANN,Number=.,Type=String,Description="Functional annotations: \'Allele | Annotation | Annotation_Impact | Gene_Name | Gene_ID | Feature_Type | Feature_ID | Transcript_BioType | Rank | HGVS.c | HGVS.p | cDNA.pos / cDNA.length | CDS.pos / CDS.length | AA.pos / AA.length | Distance | ERRORS / WARNINGS / INFO\'">'
local parsed_table1 = parse_description(input1)
local parsed_table2 = parse_description(input2)
local parsed_table3 = parse_description(input3)
print("Parsed Table 1:")
for i, v in ipairs(parsed_table1) do
print(i, v)
end
print("\nParsed Table 2:")
for i, v in ipairs(parsed_table2) do
print(i, v)
end
print("\nParsed Table 3:")
for i, v in ipairs(parsed_table3) do
print(i, v)
end
desc =
'Functional annotations: \'Allele | Annotation | Annotation_Impact | Gene_Name | Gene_ID | Feature_Type | Feature_ID | Transcript_BioType | Rank | HGVS.c | HGVS.p | cDNA.pos / cDNA.length | CDS.pos / CDS.length | AA.pos / AA.length | Distance | ERRORS / WARNINGS / INFO\'"'
for i, v in ipairs(parse_description(desc)) do
print(i, v)
end
--]]
68 changes: 0 additions & 68 deletions scripts/csq_header.lua

This file was deleted.

1 change: 0 additions & 1 deletion scripts/h.lua

This file was deleted.

3 changes: 2 additions & 1 deletion scripts/pre.lua
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
header:add_info({ID="af_copy", Number=1, Description="adding a single field", Type="Float"})
header:add_info({ ID = "af_copy", Number = 1, Description = "adding a single field", Type = "Float" })
--print(header:info_get("af_copy").Description)

header_desc = header:info_get("vep").Description
desc = parse_description(header_desc)
7 changes: 6 additions & 1 deletion scripts/t.sh
Original file line number Diff line number Diff line change
@@ -1 +1,6 @@
./target/debug/vcfexpr filter -e "print(CSQ.new(variant:info('vep', 0), desc).Allele); return variant.id == 'rs2124717267'" -o var.bcf gnomad.genomes.v4.0.sites.chrY.vcf.bgz -p scripts/pre.lua -s 'af_copy=return variant:info("AF", 0)' -l scripts/csq.lua -l scripts/csq_header.lua -l scripts/h.lua
# load the csq code (csq.lua), then add the af_copy field and define `desc` from the header (pre.lua)
./target/debug/vcfexpr filter -e "csq = CSQ.new(variant:info('vep', 0), desc); return csq.IMPACT == 'HIGH'" \
-o var.bcf gnomad.genomes.v4.0.sites.chrY.vcf.bgz \
-p scripts/csq.lua \
-p scripts/pre.lua \
-s 'af_copy=return variant:info("AF", 0)'
9 changes: 6 additions & 3 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,15 @@ pub enum Commands {
template: Option<String>,

/// File(s) containing lua code to load. May contain functions that will be called by the expressions.
/// Prefer to use `--lua_prelude` for code that should be run once before any variants are processed.
/// This option may be deprecated.
#[arg(short, long)]
lua: Vec<String>,

/// File containing lua code to run once before any variants are processed.
/// File(s) containing lua code to run once before any variants are processed.
/// `header` is available here to access or modify the header.
#[arg(short = 'p', long)]
lua_prelude: Option<String>,
lua_prelude: Vec<String>,

/// Optional output file. Default is stdout.
#[arg(short, long)]
Expand All @@ -55,7 +58,7 @@ fn filter_main(
set_expression: Vec<String>,
template: Option<String>,
lua_code: Vec<String>,
lua_prelude: Option<String>,
lua_prelude: Vec<String>,
output: Option<String>,
) -> Result<(), Box<dyn std::error::Error>> {
env_logger::init();
Expand Down
17 changes: 9 additions & 8 deletions src/vcfexpr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ impl<'lua> VCFExpr<'lua> {
expression: Vec<String>,
set_expression: Vec<String>,
template: Option<String>,
lua_prelude: Option<String>,
lua_prelude: Vec<String>,
output: Option<String>,
) -> Result<Self, Box<dyn std::error::Error>> {
lua.load(crate::pprint::PPRINT).set_name("pprint").exec()?;
Expand Down Expand Up @@ -167,13 +167,14 @@ impl<'lua> VCFExpr<'lua> {
rust_htslib::htslib::bcf_hdr_dup(reader.header().inner)
});

if let Some(lua_code) = lua_prelude {
let code = std::fs::read_to_string(lua_code)?;
lua.scope(|scope| {
globals.raw_set("header", scope.create_any_userdata_ref_mut(&mut hv)?)?;
lua.load(&code).exec()
})?;
}
lua.scope(|scope| {
globals.raw_set("header", scope.create_any_userdata_ref_mut(&mut hv)?)?;
for path in lua_prelude {
let code = std::fs::read_to_string(path)?;
lua.load(&code).exec()?;
}
Ok(())
})?;

let info_exps = VCFExpr::load_info_expressions(lua, &mut hv, set_expression)?;

Expand Down

0 comments on commit e77339f

Please sign in to comment.