Skip to content

Commit

Permalink
Close LAA loophole
Browse files Browse the repository at this point in the history
  • Loading branch information
jeromekelleher committed Jan 14, 2025
1 parent 49fc182 commit 872bb8e
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 9 deletions.
19 changes: 11 additions & 8 deletions bio2zarr/vcf2zarr/vcz.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,14 +525,17 @@ def compute_laa_field(genotypes) -> np.ndarray:
if np.any(genotypes >= v):
raise ValueError("Extreme allele value not supported")
G = genotypes.astype(np.int32)
# Anything <=0 gets mapped to -2 (pad) in the output, which comes last.
# So, to get this sorting correctly, we remap to the largest value for
# sorting, then map back. We promote the genotypes up to 32 bit for convenience
# here, assuming that we'll never have a allele of 2**31 - 1.
assert np.all(G != v)
G[G <= 0] = v
G.sort(axis=1)
G[G == v] = -2
if len(G) > 0:
# Anything <=0 gets mapped to -2 (pad) in the output, which comes last.
# So, to get this sorting correctly, we remap to the largest value for
# sorting, then map back. We promote the genotypes up to 32 bit for convenience
# here, assuming that we'll never have a allele of 2**31 - 1.
assert np.all(G != v)
G[G <= 0] = v
G.sort(axis=1)
# Equal non-zero values result in padding also
G[G[:, 0] == G[:, 1], 1] = -2
G[G == v] = -2
return G.astype(genotypes.dtype)


Expand Down
3 changes: 2 additions & 1 deletion tests/test_local_alleles.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@ class TestComputeLAA:
@pytest.mark.parametrize(
("genotypes", "expected"),
[
([[]], [[]]),
([], []),
([[0, 0]], [[-2, -2]]),
([[0, 0], [0, 0]], [[-2, -2], [-2, -2]]),
([[1, 1], [0, 0]], [[1, -2], [-2, -2]]),
([[0, 1], [3, 2], [3, 0]], [[1, -2], [2, 3], [3, -2]]),
([[0, 0], [2, 3]], [[-2, -2], [2, 3]]),
([[2, 3], [0, 0]], [[2, 3], [-2, -2]]),
Expand Down

0 comments on commit 872bb8e

Please sign in to comment.