Skip to content

Commit

Permalink
Add custom zip function
Browse files Browse the repository at this point in the history
  • Loading branch information
mthrok committed Dec 14, 2024
1 parent a520ba4 commit b2fbdde
Show file tree
Hide file tree
Showing 9 changed files with 458 additions and 10 deletions.
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,9 @@ message(STATUS "########################################")
find_package(fmt REQUIRED)
find_package(gflags REQUIRED)
find_package(glog REQUIRED)
find_package(libzip REQUIRED)

set(deps fmt::fmt gflags glog::glog)
set(deps fmt::fmt gflags glog::glog libzip::zip)
foreach(dep IN LISTS deps)
get_target_property(location "${dep}" IMPORTED_LOCATION_RELEASE)
message(STATUS "Found ${dep}: ${location}")
Expand Down
3 changes: 2 additions & 1 deletion docs/source/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ The libspdl uses the following third party libraries, which are fetched and buil
* `{fmt} <https://github.com/fmtlib/fmt>`_ (`MIT <https://github.com/fmtlib/fmt/blob/10.1.1/LICENSE.rst>`_)
* `gflags <https://github.com/gflags/gflags>`_ (`BSD-3 <https://github.com/gflags/gflags/blob/v2.2.0/COPYING.txt>`_)
* `glog <https://github.com/google/glog>`_ (`BSD-3 <https://github.com/google/glog/blob/v0.5.0/COPYING>`_)
* `libzip <https://libzip.org/>`_ (`BSD-3 <https://github.com/nih-at/libzip/blob/v1.11.2/LICENSE>`_)
* `nanobind <https://github.com/wjakob/nanobind>`_ (`BSD-3 <https://github.com/wjakob/nanobind/blob/v2.0.0/LICENSE>`_) and its dependency `robin-map <https://github.com/Tessil/robin-map/>`_ (`MIT <https://github.com/Tessil/robin-map/blob/v1.3.0/LICENSE>`_)
* `FFmpeg <https://github.com/FFmpeg/FFmpeg>`_ (`LGPL <https://github.com/FFmpeg/FFmpeg/blob/master/COPYING.LGPLv2.1>`_ †)

Expand All @@ -123,7 +124,7 @@ Optional Dependencies
---------------------

* `Perfetto <https://perfetto.dev/docs/instrumentation/tracing-sdk>`_ (`Apache 2.0 <https://github.com/google/perfetto/blob/v41.0/LICENSE>`_)
* `CUDA Toolkit <https://developer.nvidia.com/cuda-toolkit>`_ †† (`CUDA Toolkit EULA <https://docs.nvidia.com/cuda/eula/index.html>`_) and the following family of libraries covered by the same EULA
* `CUDA Toolkit <https://developer.nvidia.com/cuda-toolkit>`_ †† (`CUDA Toolkit EULA <https://docs.nvidia.com/cuda/eula/index.html>`_) and the following family of libraries covered by the same EULA
* `nvJPEG <https://docs.nvidia.com/cuda/nvjpeg/index.html>`_
* `NPP <https://developer.nvidia.com/npp>`_
* The header files of `Video Codec SDK <https://gitlab.com/nvidia/video/video-codec-sdk>`_ ††
Expand Down
3 changes: 2 additions & 1 deletion src/spdl/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,15 @@
# This import is just for annotation, so please overlook this one.
from ._type_stub import * # noqa: F403 # isort: skip

from . import _composite, _config, _convert, _core, _preprocessing, _type_stub
from . import _composite, _config, _convert, _core, _preprocessing, _type_stub, _zip

_mods = [
_composite,
_config,
_convert,
_core,
_preprocessing,
_zip,
]


Expand Down
135 changes: 135 additions & 0 deletions src/spdl/io/_zip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from collections.abc import Mapping

import spdl.lib._zip as _zip
from numpy.lib import format as np_format

__all__ = [
"NpzFile",
"load_npz",
]


class ZipFile:
def __init__(self, handle):
self._handle = handle

def read(self, n: int) -> bytes:
buffer = bytes(bytearray(n))
num_read = self._handle.read(buffer)
return buffer[:num_read]

def __enter__(self):
return self

def __exit__(self, exc_type, exc_value, traceback):
self.close()

def __getattr__(self, name: str):
if hasattr(self._handle, name):
return getattr(self._handle, name)
raise AttributeError(f"{name} not found")


class ZipArchive:
def __init__(self, handle, data: bytes):
self._handle = handle
self._data = data # to keep the reference while the handle is alive

def __getattr__(self, name: str):
if hasattr(self._handle, name):
return getattr(self._handle, name)
raise AttributeError(f"{name} not found")

def open(self, name: str):
return ZipFile(self._handle.open(name))

def read(self, name: str):
with self.open(name) as file:
return file.read()


def zip_archive(data: bytes) -> ZipArchive:
handle = _zip.zip_archive(data)
return ZipArchive(handle, data)


class NpzFile(Mapping):
"""NpzFile()
A class mimic the behavior of :py:class:`numpy.lib.npyio.NpzFile`.
Use :py:func:`load_npz` to create an instance of this class.
"""

def __init__(self, archive):
self._archive = archive
self._files = self._archive.namelist()
self.files = [f.removesuffix(".npy") for f in self._files]

def __enter__(self):
return self

def __exit__(self, exc_type, exc_value, traceback):
self.close()

def close(self):
self._archive.close()

def __iter__(self):
return iter(self.files)

def __len__(self):
return len(self.files)

def __getitem__(self, key):
if key in self._files:
pass
elif key in self.files:
key = f"{key}.npy"
else:
raise KeyError(f"{key} is not a file in the archive")

with self._archive.open(key) as file:
return np_format.read_array(file)

def __contains__(self, key):
return key in self._files or key in self.files

def __repr__(self):
return f"NpzFile object with {len(self)} entries."


def load_npz(data: bytes) -> NpzFile:
"""Load a numpy archive file (``npz``).
It is almost a drop-in replacement for :py:func:`numpy.load` function,
but it only supports the basic use cases.
This function uses the C++ implementation of the zip archive reader, which
releases the GIL. So it is more efficient than the official NumPy implementation
for supported cases.
Args:
data: The data to load.
Example
>>> x = np.arange(10)
>>> y = np.sin(x)
>>>
>>> with tempfile.TemporaryFile() as f:
... np.savez(f, x=x, y=y)
... f.seek(0)
... data = spdl.io.load_npz(f.read())
...
>>> assert np.array_equal(data["x"], x)
>>> assert np.array_equal(data["y"], y)
"""
return NpzFile(zip_archive(data))
34 changes: 27 additions & 7 deletions src/spdl/lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# This adds the library directory to search path
if (APPLE)
set(CMAKE_INSTALL_RPATH "@loader_path")
elseif (UNIX)
set(CMAKE_INSTALL_RPATH $ORIGIN)
endif ()

message(STATUS "########################################")
message(STATUS "# Configuring SPDL Python binding")
message(STATUS "########################################")
Expand All @@ -24,13 +31,6 @@ set(src
io/encoding.cpp
)

# This adds the library directory to search path
if (APPLE)
set(CMAKE_INSTALL_RPATH "@loader_path")
elseif (UNIX)
set(CMAKE_INSTALL_RPATH $ORIGIN)
endif ()

function(add_spdl_extension ffmpeg_version)
set(name "_spdl_ffmpeg${ffmpeg_version}")
message(STATUS "Building ${name}")
Expand Down Expand Up @@ -86,3 +86,23 @@ add_spdl_extension(5)
add_spdl_extension(6)
add_spdl_extension(7)
endif()

###############################################################################
# zip extension
###############################################################################

set(name _zip)
message(STATUS "Building ${name}")

set(srcs zip.cpp)
set(deps libzip::zip fmt::fmt glog::glog)
nanobind_add_module("${name}" "${srcs}")
target_link_libraries("${name}" PRIVATE "${deps}")
target_include_directories("${name}" PRIVATE "${Python_INCLUDE_DIR}")
target_include_directories(nanobind-static PRIVATE "${Python_INCLUDE_DIR}")

install(
TARGETS ${name}
LIBRARY DESTINATION "${SPDL_PYTHON_BINDING_INSTALL_PREFIX}/lib"
RUNTIME DESTINATION "${SPDL_PYTHON_BINDING_INSTALL_PREFIX}/lib"
)
Loading

0 comments on commit b2fbdde

Please sign in to comment.