Skip to content

Commit

Permalink
Logging improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
jeromekelleher committed Jan 17, 2025
1 parent 524fcb2 commit 9f817a2
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 7 deletions.
7 changes: 6 additions & 1 deletion bio2zarr/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,12 @@ def flush(self):
f"{self.array_offset}:{self.array_offset + self.buffer_row}"
f"{self.buff.nbytes / 2**20: .2f}Mb"
)
self.max_buff_size = max(self.max_buff_size, sys.getsizeof(self.buff))
# Note this is inaccurate for string data as we're just reporting the
# size of the container. When we switch the numpy 2 StringDtype this
# should improve and we can get more visibility on how memory
# is being used.
# https://github.com/sgkit-dev/bio2zarr/issues/30
self.max_buff_size = max(self.max_buff_size, self.buff.nbytes)
self.array_offset += self.variants_chunk_size
self.buffer_row = 0

Expand Down
8 changes: 2 additions & 6 deletions bio2zarr/vcf2zarr/vcz.py
Original file line number Diff line number Diff line change
Expand Up @@ -862,13 +862,9 @@ def init_partition_array(self, partition_index, name):

def finalise_partition_array(self, partition_index, buffered_array):
buffered_array.flush()
# field_map = self.schema.field_map()
# array_spec = field_map[buffered_array.name]
# ba = buffered_array
# print(array_spec.name, "ba.max_buff_size", ba.max_buff_size,
# array_spec.variant_chunk_nbytes)
logger.info(
f"Completed partition {partition_index} array {buffered_array.name}"
f"Completed partition {partition_index} array {buffered_array.name} "
f"max_memory={core.display_size(buffered_array.max_buff_size)}"
)

def encode_array_partition(self, array_spec, partition_index):
Expand Down

0 comments on commit 9f817a2

Please sign in to comment.