Skip to content

Commit

Permalink
Remove xarray dependency
Browse files Browse the repository at this point in the history
  • Loading branch information
percyfal committed Dec 13, 2024
1 parent d6e9b82 commit 1bf2b00
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 16 deletions.
44 changes: 28 additions & 16 deletions bio2zarr/bed2zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@
import numcodecs
import numpy as np
import pandas as pd
import xarray as xr
import zarr

from . import core
from . import core, provenance

logger = logging.getLogger(__name__)

DEFAULT_ZARR_COMPRESSOR = numcodecs.Blosc(cname="zstd", clevel=7)
BED_ZARR_VERSION = 0.1


class BedType(Enum):
Expand Down Expand Up @@ -200,18 +201,29 @@ def bed2zarr(
fields = update_field_bounds(data, bed_type)
dtypes = {f.name: f.smallest_dtype() for f in fields}
data.index.name = "records"
ds = xr.Dataset.from_dataframe(data)
for k, v in dtypes.items():
ds[k] = ds[k].astype(v)
if records_chunk_size is None:
records_chunk_size = len(data)
chunks = {
"records": records_chunk_size,
"contigs": len(contig_id),
}
ds["contig_id"] = xr.DataArray(contig_id, dims=["contigs"])
data = data.astype(dtypes)
store = zarr.DirectoryStore(zarr_path)
root = zarr.group(store=store)
root.attrs.update(
{
"bed_zarr_version": f"{BED_ZARR_VERSION}",
"source": f"bio2zarr-{provenance.__version__}",
}
)
for field in fields[0 : bed_type.value]:
if field.name == "strand":
root.array(
field.name,
data[field.name].values,
chunks=(records_chunk_size,),
dtype="<U1",
)
else:
root.array(
field.name,
data[field.name].values,
chunks=(records_chunk_size,),
)
root.array("contig_id", contig_id, chunks=(len(contig_id),))
if bed_type.value >= BedType.BED4.value:
ds["name_id"] = xr.DataArray(name_id, dims=["names"])
chunks["names"] = len(name_id)
ds = ds.chunk(chunks)
ds.to_zarr(zarr_path, mode="w")
root.array("name_id", name_id, chunks=(len(name_id),))
1 change: 1 addition & 0 deletions tests/test_bed.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ def test_bed2zarr(self, bed_path, bed_df, tmp_path, request):
np.testing.assert_array_equal(root["thickStart"][:], bed_df[6].values)
if bed_type.value >= bed2zarr.BedType.BED8.value:
np.testing.assert_array_equal(root["thickEnd"][:], bed_df[7].values)
print(zarr_path)


class TestBedData:
Expand Down

0 comments on commit 1bf2b00

Please sign in to comment.