Skip to content

Commit

Permalink
Modify getsize to return total size, not just the top level
Browse files Browse the repository at this point in the history
  • Loading branch information
benjeffery committed Apr 24, 2024
1 parent 9d046ea commit 85623b7
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 44 deletions.
7 changes: 7 additions & 0 deletions docs/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,13 @@ Enhancements
By :user:`Deepak Cherian <dcherian>`.


Bug fixes
~~~~~~~~~

* ``getsize`` now returns the total size of all nested arrays.
By :user:`Ben Jeffery <benjeffery>` :issue:`253`.


Docs
~~~~

Expand Down
66 changes: 34 additions & 32 deletions zarr/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,9 +269,15 @@ def _getsize(store: BaseStore, path: Path = None) -> int:
# also include zarr.json?
# members += ['zarr.json']
else:
members = listdir(store, path)
prefix = _path_to_prefix(path)
members = [prefix + k for k in members]
to_visit = [path]
members = []
while to_visit:
print(to_visit)
current_path = to_visit.pop()
current_members = listdir(store, current_path)
prefix = _path_to_prefix(current_path)
members.extend([prefix + k for k in current_members])
to_visit.extend([prefix + k for k in current_members])
for k in members:
try:
v = store[k]
Expand Down Expand Up @@ -971,8 +977,12 @@ def getsize(self, path: Path = None):
elif isinstance(value, self.cls):
# total size for directory
size = 0
for v in value.values():
if not isinstance(v, self.cls):
to_visit = list(value.values())
while to_visit:
v = to_visit.pop()
if isinstance(v, self.cls):
to_visit.extend(v.values())
else:
size += buffer_size(v)
return size

Expand Down Expand Up @@ -1269,9 +1279,10 @@ def getsize(self, path=None):
return os.path.getsize(fs_path)
elif os.path.isdir(fs_path):
size = 0
for child in scandir(fs_path):
if child.is_file():
size += child.stat().st_size
for root, dirs, files in os.walk(fs_path):
for file in files:
file_path = os.path.join(root, file)
size += os.path.getsize(file_path)
return size
else:
return 0
Expand Down Expand Up @@ -1903,29 +1914,19 @@ def listdir(self, path=None):
def getsize(self, path=None):
path = normalize_storage_path(path)
with self.mutex:
children = self.listdir(path)
if children:
size = 0
for child in children:
if path:
name = path + "/" + child
else:
name = child
try:
info = self.zf.getinfo(name)
except KeyError:
pass
else:
size += info.compress_size
return size
elif path:
to_visit = [path] if path else self.listdir(path)
total_size = 0
while to_visit:
current_path = to_visit.pop()
try:
info = self.zf.getinfo(path)
return info.compress_size
info = self.zf.getinfo(current_path)
total_size += info.compress_size
except KeyError:
return 0
else:
return 0
children = self.listdir(current_path)
for child in children:
full_path = current_path + "/" + child if current_path else child
to_visit.append(full_path)
return total_size

def clear(self):
if self.mode == "r":
Expand Down Expand Up @@ -2488,6 +2489,8 @@ def listdir(self, path: Path = None):
return listing

def getsize(self, path=None) -> int:
print("WYF")
print(self._store, path)
return getsize(self._store, path=path)

def _pop_value(self):
Expand Down Expand Up @@ -2745,10 +2748,9 @@ def getsize(self, path=None):
size = self.cursor.execute(
"""
SELECT COALESCE(SUM(LENGTH(v)), 0) FROM zarr
WHERE k LIKE (? || "%") AND
0 == INSTR(LTRIM(SUBSTR(k, LENGTH(?) + 1), "/"), "/")
WHERE k LIKE (? || "%")
""",
(path, path),
(path,)
)
for (s,) in size:
return s
Expand Down
15 changes: 3 additions & 12 deletions zarr/tests/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,19 +366,10 @@ def test_hierarchy(self):

# test getsize (optional)
if hasattr(store, "getsize"):
# TODO: proper behavior of getsize?
# v3 returns size of all nested arrays, not just the
# size of the arrays in the current folder.
if self.version == 2:
assert 6 == store.getsize()
else:
assert 15 == store.getsize()
assert 15 == store.getsize()
assert 3 == store.getsize("a")
assert 3 == store.getsize("b")
if self.version == 2:
assert 3 == store.getsize("c")
else:
assert 9 == store.getsize("c")
assert 9 == store.getsize("c")
assert 3 == store.getsize("c/d")
assert 6 == store.getsize("c/e")
assert 3 == store.getsize("c/e/f")
Expand Down Expand Up @@ -2256,7 +2247,7 @@ def test_getsize():
store["foo"] = b"aaa"
store["bar"] = b"bbbb"
store["baz/quux"] = b"ccccc"
assert 7 == getsize(store)
assert 12 == getsize(store)
assert 5 == getsize(store, "baz")

store = KVStore(dict())
Expand Down

0 comments on commit 85623b7

Please sign in to comment.