Skip to content

Commit

Permalink
Report correct file sizes in inspect
Browse files Browse the repository at this point in the history
Same as du -sb on Linux.

Closes sgkit-dev#142
  • Loading branch information
jeromekelleher committed Apr 24, 2024
1 parent e6b9a19 commit 2abf626
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 4 deletions.
18 changes: 18 additions & 0 deletions bio2zarr/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import dataclasses
import logging
import multiprocessing
import os
import os.path
import threading
import time

Expand Down Expand Up @@ -45,6 +47,22 @@ def chunk_aligned_slices(z, n, max_chunks=None):
return slices


def du(path):
"""
Return the total bytes stored at this path.
"""
total = os.path.getsize(path)
# pathlib walk method doesn't exist until 3.12 :(
for root, dirs, files in os.walk(path):
for lst in [dirs, files]:
for name in lst:
fullname = os.path.join(root, name)
size = os.path.getsize(fullname)
total += size
logger.debug(f"du({path}) = {total}")
return total


class SynchronousExecutor(cf.Executor):
def submit(self, fn, /, *args, **kwargs):
future = cf.Future()
Expand Down
7 changes: 3 additions & 4 deletions bio2zarr/vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import logging
import math
import os
import os.path
import pathlib
import pickle
import shutil
Expand Down Expand Up @@ -1509,14 +1510,12 @@ class VcfZarr:
def __init__(self, path):
if not (path / ".zmetadata").exists():
raise ValueError("Not in VcfZarr format") # NEEDS TEST
self.path = path
self.root = zarr.open(path, mode="r")

def __repr__(self):
return repr(self.root) # NEEDS TEST

def summary_table(self):
data = []
arrays = [(a.nbytes_stored, a) for _, a in self.root.arrays()]
arrays = [(core.du(self.path / a.basename), a) for _, a in self.root.arrays()]
arrays.sort(key=lambda x: x[0])
for stored, array in reversed(arrays):
d = {
Expand Down
16 changes: 16 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,3 +179,19 @@ def test_5_chunk_1(self, n, expected):
z = zarr.array(np.arange(5), chunks=1, dtype=int)
result = core.chunk_aligned_slices(z, n)
assert result == expected


@pytest.mark.parametrize(
("path", "expected"),
[
# NOTE: this data was generated using du -sb on a Linux system.
# It *might* work in CI, but it may well not either, as it's
# probably dependent on a whole bunch of things. Expect to fail
# at some point.
("tests/data", 4630726),
("tests/data/vcf", 4618589),
("tests/data/vcf/sample.vcf.gz", 1089),
],
)
def test_du(path, expected):
assert core.du(path) == expected

0 comments on commit 2abf626

Please sign in to comment.