Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

packaging dep and IcfMetadata.__eq__ #322

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion bio2zarr/vcf2zarr/icf.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def fromdict(d):
return VcfFieldSummary(**d)


@dataclasses.dataclass
@dataclasses.dataclass(order=True)
class VcfField:
category: str
name: str
Expand Down Expand Up @@ -192,6 +192,16 @@ def fromdict(d):
d["contigs"] = [Contig(**cd) for cd in d["contigs"]]
return IcfMetadata(**d)

def __eq__(self, other):
if not isinstance(other, IcfMetadata):
return NotImplemented
return (
self.samples == other.samples
and self.contigs == other.contigs
and self.filters == other.filters
and sorted(self.fields) == sorted(other.fields)
)


def fixed_vcf_field_definitions():
def make_field_def(name, vcf_type, vcf_number):
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ dependencies = [
# colouredlogs pulls in humanfriendly",
"cyvcf2",
"bed_reader",
"packaging",
]
requires-python = ">=3.9"
classifiers = [
Expand Down
Binary file added tests/data/vcf/out_of_order_fields/input1.bcf
Binary file not shown.
Binary file added tests/data/vcf/out_of_order_fields/input1.bcf.csi
Binary file not shown.
Binary file added tests/data/vcf/out_of_order_fields/input2.bcf
Binary file not shown.
Binary file added tests/data/vcf/out_of_order_fields/input2.bcf.csi
Binary file not shown.
4 changes: 2 additions & 2 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,8 +237,8 @@ def test_examples(self, chunk_size, size, start, stop):
# It works in CI on Linux, but it'll probably break at some point.
# It's also necessary to update these numbers each time a new data
# file gets added
("tests/data", 4976329),
("tests/data/vcf", 4964192),
("tests/data", 4981734),
("tests/data/vcf", 4969597),
("tests/data/vcf/sample.vcf.gz", 1089),
],
)
Expand Down
55 changes: 55 additions & 0 deletions tests/test_vcf_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -1100,3 +1100,58 @@ def test_missing_filter(tmp_path):
zarr_path = tmp_path / "zarr"
with pytest.raises(ValueError, match="Filter 'q10' was not defined in the header"):
vcf2zarr.convert([path], zarr_path)


class TestOutOfOrderFields:
# Mixing on purpose
data_path1 = "tests/data/vcf/out_of_order_fields/input2.bcf"
data_path2 = "tests/data/vcf/out_of_order_fields/input1.bcf"

@pytest.fixture(scope="class")
def ds(self, tmp_path_factory):
out = tmp_path_factory.mktemp("data") / "ooo_example.vcf.zarr"
vcf2zarr.convert([self.data_path1, self.data_path2], out)
return sg.load_dataset(out)

def test_filters(self, ds):
nt.assert_array_equal(ds["filter_id"], ["PASS", "FAIL"])
nt.assert_array_equal(
ds["variant_filter"],
[
[True, False],
[False, True],
[True, False],
],
)

def test_source(self, ds):
assert ds.attrs["source"] == f"bio2zarr-{provenance.__version__}"

def test_contigs(self, ds):
nt.assert_array_equal(ds["contig_id"], ["chr20", "chr21"])
nt.assert_array_equal(ds["contig_length"], [64444167.0, 46709983.0])
nt.assert_array_equal(ds["variant_contig"], [0, 1, 1])

def test_position(self, ds):
nt.assert_array_equal(ds["variant_position"], [63971, 64506, 64507])

def test_length(self, ds):
nt.assert_array_equal(ds["variant_length"], [11, 1, 1])

def test_info_fields(self, ds):
nt.assert_array_equal(
ds["variant_QNAME"],
["cluster19_000000F", ".", "cluster19_000000F"],
)
nt.assert_array_equal(ds["variant_QSTART"], [25698928, 25698928, -1])

def test_allele(self, ds):
nt.assert_array_equal(
ds["variant_allele"].values.tolist(),
[["TTCCATTCCAC", "T"], ["C", "CTCCAT"], ["G", "A"]],
)
assert ds["variant_allele"].dtype == "O"

def test_call_DPs(self, ds):
nt.assert_array_equal(ds["call_DP"], [[5], [-1], [5]])
nt.assert_array_equal(ds["call_DP2"], [[1], [1], [-1]])