From 872bb8ec4a380b0c8f5afa48779c9fb1f8a670a9 Mon Sep 17 00:00:00 2001 From: Jerome Kelleher Date: Tue, 14 Jan 2025 16:25:46 +0000 Subject: [PATCH] Close LAA loophole --- bio2zarr/vcf2zarr/vcz.py | 19 +++++++++++-------- tests/test_local_alleles.py | 3 ++- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/bio2zarr/vcf2zarr/vcz.py b/bio2zarr/vcf2zarr/vcz.py index 2ed62ce..32d7931 100644 --- a/bio2zarr/vcf2zarr/vcz.py +++ b/bio2zarr/vcf2zarr/vcz.py @@ -525,14 +525,17 @@ def compute_laa_field(genotypes) -> np.ndarray: if np.any(genotypes >= v): raise ValueError("Extreme allele value not supported") G = genotypes.astype(np.int32) - # Anything <=0 gets mapped to -2 (pad) in the output, which comes last. - # So, to get this sorting correctly, we remap to the largest value for - # sorting, then map back. We promote the genotypes up to 32 bit for convenience - # here, assuming that we'll never have a allele of 2**31 - 1. - assert np.all(G != v) - G[G <= 0] = v - G.sort(axis=1) - G[G == v] = -2 + if len(G) > 0: + # Anything <=0 gets mapped to -2 (pad) in the output, which comes last. + # So, to get this sorting correctly, we remap to the largest value for + # sorting, then map back. We promote the genotypes up to 32 bit for convenience + # here, assuming that we'll never have a allele of 2**31 - 1. + assert np.all(G != v) + G[G <= 0] = v + G.sort(axis=1) + # Equal non-zero values result in padding also + G[G[:, 0] == G[:, 1], 1] = -2 + G[G == v] = -2 return G.astype(genotypes.dtype) diff --git a/tests/test_local_alleles.py b/tests/test_local_alleles.py index 9ae9aff..1c0bae4 100644 --- a/tests/test_local_alleles.py +++ b/tests/test_local_alleles.py @@ -9,9 +9,10 @@ class TestComputeLAA: @pytest.mark.parametrize( ("genotypes", "expected"), [ - ([[]], [[]]), + ([], []), ([[0, 0]], [[-2, -2]]), ([[0, 0], [0, 0]], [[-2, -2], [-2, -2]]), + ([[1, 1], [0, 0]], [[1, -2], [-2, -2]]), ([[0, 1], [3, 2], [3, 0]], [[1, -2], [2, 3], [3, -2]]), ([[0, 0], [2, 3]], [[-2, -2], [2, 3]]), ([[2, 3], [0, 0]], [[2, 3], [-2, -2]]),