diff --git a/scorpio/__init__.py b/scorpio/__init__.py index 3cb3ecd..66960aa 100644 --- a/scorpio/__init__.py +++ b/scorpio/__init__.py @@ -1,2 +1,2 @@ _program = "scorpio" -__version__ = "0.3.14" +__version__ = "0.3.15" diff --git a/scorpio/__main__.py b/scorpio/__main__.py index acc686b..4477815 100644 --- a/scorpio/__main__.py +++ b/scorpio/__main__.py @@ -113,6 +113,10 @@ def main(sysargs = sys.argv[1:]): "--combination", dest="combination", action="store_true", help="Combines the mutations for the specified constellations, and outputs a string across them all, with counts per found constellation" ) + subparser_haplotype.add_argument( + "--interspersion", dest="interspersion", action="store_true", + help="Evaluates an interspersion score" + ) subparser_haplotype.set_defaults(func=scorpio.subcommands.haplotype.run) # _______________________________ report __________________________________# diff --git a/scorpio/scripts/type_constellations.py b/scorpio/scripts/type_constellations.py index c5ae18b..2fc5c88 100755 --- a/scorpio/scripts/type_constellations.py +++ b/scorpio/scripts/type_constellations.py @@ -133,9 +133,14 @@ def variant_to_variant_record(l, refseq, features_dict, ignore_fails=False): to a dict """ #print("Parsing variant %s" %l) - lsplit = l.split(":") info = {} + if "#" in l: + l = l.split("#")[0].strip() + if l == "": + return info + lsplit = l.split(":") + if "+" in l: m = re.match(r'[aa:]*(?P\w+):(?P\d+)\+(?P[a-zA-Z]+)', l) if not m: @@ -479,7 +484,7 @@ def call_variant_from_fasta(record_seq, var, ins_char="?", oth_char=None, codon= elif var["type"] == "aa": try: query = record_seq.upper()[var["ref_start"] - 1:var["ref_start"] - 1 + 3 * len(var["ref_allele"])] - query_allele = query.translate() + query_allele = query.translate(gap = "-") #query_allele_minus = record_seq.upper()[var["ref_start"] - 2:var["ref_start"] + 1].translate() #query_allele_plus = record_seq.upper()[var["ref_start"]:var["ref_start"] + 3].translate() #print("Found", query_allele, query_allele_minus, query_allele_plus) @@ -532,15 +537,15 @@ def call_variant_from_fasta(record_seq, var, ins_char="?", oth_char=None, codon= if query_allele == var["ref_allele"]: call = 'ref' query_allele = 0 + elif query_allele == "-" * var["length"] or query_allele == "N" * var["length"]: + call = 'alt' + query_allele = max(int(var["length"] / 3), 1) elif "N" in query_allele: call = 'ambig' if not oth_char: query_allele = "X" else: query_allele = "N" - elif query_allele == "-" * var["length"]: - call = 'alt' - query_allele = int(var["length"] / 3) else: call = 'oth' if not oth_char: diff --git a/scorpio/subcommands/haplotype.py b/scorpio/subcommands/haplotype.py index 8ce5831..8e72a87 100644 --- a/scorpio/subcommands/haplotype.py +++ b/scorpio/subcommands/haplotype.py @@ -15,4 +15,5 @@ def run(options): options.append_genotypes, options.mutations, options.dry_run, - options.combination) + options.combination, + options.interspersion) diff --git a/scorpio/tests/data/type_constellations/lineage_X.json b/scorpio/tests/data/type_constellations/lineage_X.json index 358ddc7..60d0392 100644 --- a/scorpio/tests/data/type_constellations/lineage_X.json +++ b/scorpio/tests/data/type_constellations/lineage_X.json @@ -20,7 +20,7 @@ "s:Y144-", "s:E484K", "s:N501Y", - "s:A570D", + "s:A570D # spike ", "s:P681H", "s:T716I", "s:S982A", @@ -29,11 +29,12 @@ "8:R52I", "8:Y73C", "N:D3L", - "N:S235F" + "N:S235F", + "#s:T777I" ], "rules": { "min_alt": 4, "max_ref": 6, "s:E484K": "alt" } -} \ No newline at end of file +} diff --git a/scorpio/tests/type_constellations_test.py b/scorpio/tests/type_constellations_test.py index b1f4e54..36d4bf1 100644 --- a/scorpio/tests/type_constellations_test.py +++ b/scorpio/tests/type_constellations_test.py @@ -217,8 +217,13 @@ def test_call_variant_from_fasta(): for var in variants: call, query_allele = call_variant_from_fasta(Seq(ambig_string), var) - assert call == "ambig" - assert query_allele in ["X", "N", "NN", "--"] + print(var, call, query_allele) + if var["type"] == "del": + assert call == "alt" or call == "ambig" + assert query_allele in [2, "X"] + else: + assert call == "ambig" or call == "alt" + assert query_allele in ["X", "N", "NN", "--"] for var in variants: call, query_allele = call_variant_from_fasta(Seq(oth_string), var, oth_char="X")