From d96f82b1a404f5958cc9d25a84ac9da599b5c674 Mon Sep 17 00:00:00 2001 From: Brent Pedersen Date: Sat, 8 Jun 2024 17:08:50 -0700 Subject: [PATCH] work on csq --- Cargo.toml | 1 + scripts/csq.lua | 8 ++++---- scripts/csq_header.lua | 8 ++++++++ scripts/h.lua | 1 + scripts/pre.lua | 4 ++++ scripts/t.sh | 1 + 6 files changed, 19 insertions(+), 4 deletions(-) create mode 100644 scripts/h.lua create mode 100644 scripts/pre.lua create mode 100644 scripts/t.sh diff --git a/Cargo.toml b/Cargo.toml index 36df474..2fd3954 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ env_logger = "0.11.3" log = "0.4.21" parking_lot = { version = "0.12.1", features = ["arc_lock"] } #libc = "0.2.153" +libc = "0.2" [[bin]] name = "vcfexpr" diff --git a/scripts/csq.lua b/scripts/csq.lua index 478bfe3..bc5bb4b 100644 --- a/scripts/csq.lua +++ b/scripts/csq.lua @@ -8,8 +8,8 @@ function CSQ.new(fields, header) end -- now fill a table with keys from header and values from fields local self = setmetatable({}, CSQ) - for i, field in ipairs(fields) do - self[header[i]] = field + for i, h in ipairs(header) do + self[h] = fields[i] end return self end @@ -36,12 +36,12 @@ NUMBER_FIELDS = { "AF", "AFR_AF", "AMR_AF", "ASN_AF", "EUR_AF", "EAS_AF", "SAS_A "gnomAD_NFE_TSI_AF", "gnomAD_NFE_FOE_AF", "gnomAD_NFE_NWE_AF", "gnomAD_NFE_SEU_AF", "gnomAD_NFE_SWE_AF", "gnomAD_NFE_ONF_AF", "gnomAD_NFE_EST_AF", "gnomAD_NFE_MED_AF", "gnomAD_NFE_SCA_AF", "gnomAD_NFE_BAL_AF", "gnomAD_NFE_IB", - "MAX_AF", "MAX_AF_POPS", } + "MAX_AF", "MAX_AF_POPS", "ALLELE_NUM", "DISTANCE" +} -- if the field starts with gnomAD_ also add gnomADe_... and gnomADg_... add_gnomad = {} for _, field in ipairs(NUMBER_FIELDS) do if string.match(field, "^gnomAD_") then - print(string.gsub(field, "^gnomAD_", "gnomADe_")) add_gnomad[#add_gnomad + 1] = string.gsub(field, "^gnomAD_", "gnomADe_") add_gnomad[#add_gnomad + 1] = string.gsub(field, "^gnomAD_", "gnomADg_") end diff --git a/scripts/csq_header.lua b/scripts/csq_header.lua index 16c89b7..ffd4825 100644 --- a/scripts/csq_header.lua +++ b/scripts/csq_header.lua @@ -57,4 +57,12 @@ for i, v in ipairs(parsed_table3) do print(i, v) end + +desc = +'Functional annotations: \'Allele | Annotation | Annotation_Impact | Gene_Name | Gene_ID | Feature_Type | Feature_ID | Transcript_BioType | Rank | HGVS.c | HGVS.p | cDNA.pos / cDNA.length | CDS.pos / CDS.length | AA.pos / AA.length | Distance | ERRORS / WARNINGS / INFO\'"' + +for i, v in ipairs(parse_description(desc)) do + print(i, v) +end + --]] diff --git a/scripts/h.lua b/scripts/h.lua new file mode 100644 index 0000000..50d2da7 --- /dev/null +++ b/scripts/h.lua @@ -0,0 +1 @@ +desc = parse_description(header_desc) diff --git a/scripts/pre.lua b/scripts/pre.lua new file mode 100644 index 0000000..e645f6b --- /dev/null +++ b/scripts/pre.lua @@ -0,0 +1,4 @@ +header:add_info({ID="af_copy", Number=1, Description="adding a single field", Type="Float"}) +--print(header:info_get("af_copy").Description) + +header_desc = header:info_get("vep").Description diff --git a/scripts/t.sh b/scripts/t.sh new file mode 100644 index 0000000..8f9dddb --- /dev/null +++ b/scripts/t.sh @@ -0,0 +1 @@ +./target/debug/vcfexpr filter -e "print(CSQ.new(variant:info('vep', 0), desc).Allele); return variant.id == 'rs2124717267'" -o var.bcf gnomad.genomes.v4.0.sites.chrY.vcf.bgz -p scripts/pre.lua -s 'af_copy=return variant:info("AF", 0)' -l scripts/csq.lua -l scripts/csq_header.lua -l scripts/h.lua