Skip to content

Commit

Permalink
Merge pull request #262 from MannLabs/refactor_tempmmap2
Browse files Browse the repository at this point in the history
Refactor tempmmap2
  • Loading branch information
mschwoer authored Jan 21, 2025
2 parents 60da836 + 81a8ca9 commit 1d6e901
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 69 deletions.
128 changes: 62 additions & 66 deletions alphabase/io/tempmmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@ def _init_temp_dir(prefix: str = "temp_mmap_") -> str:
TEMP_DIR_NAME = _TEMP_DIR.name

logging.info(
f"Temp mmap arrays are written to {TEMP_DIR_NAME}. "
f"Memory-mapped arrays are written to temporary directory {TEMP_DIR_NAME}. "
"Cleanup of this folder is OS dependent and might need to be triggered manually!"
)

return TEMP_DIR_NAME


def _change_temp_dir_location(abs_path: str) -> str:
def _change_temp_dir_location(abs_path: str) -> None:
"""
Check if the directory to which the temp arrays should be written exists, if so defines this as the new temp dir location. If not raise a value error.
Expand All @@ -51,14 +51,14 @@ def _change_temp_dir_location(abs_path: str) -> str:
if os.path.isdir(abs_path):
TEMP_DIR_NAME = abs_path
else:
raise ValueError(f"The path {abs_path} does not point to a directory.")
raise ValueError(f"The path '{abs_path}' does not point to a directory.")
else:
raise ValueError(
f"The directory {abs_path} in which the file should be created does not exist."
f"The directory '{abs_path}' in which the file should be created does not exist."
)


def _get_file_location(abs_file_path: str, overwrite=False) -> str:
def _get_file_location(abs_file_path: str, overwrite: bool = False) -> str:
"""
Check if the path specified for the new temporary file is valid. If not raise a value error.
Expand All @@ -69,38 +69,34 @@ def _get_file_location(abs_file_path: str, overwrite=False) -> str:
Parameters
----------
abs_path : str
abs_file_path : str
The absolute path to the new temporary file.
Returns
------
str
The file path if it is valid.
"""
# check overwrite status and existence of file
if not overwrite and os.path.exists(abs_file_path):
raise ValueError(
"The file already exists. Set overwrite to True to overwrite the file or choose a different name."
f"The file '{abs_file_path}' already exists. Set overwrite to True to overwrite the file or choose a different name."
)

# ensure that the filename conforms to the naming convention
if not os.path.basename(abs_file_path).endswith(".hdf"):
raise ValueError(
f"The chosen file name '{os.path.basename(abs_file_path)}' needs to end with .hdf"
)

# ensure that the directory in which the file should be created exists
if os.path.isdir(os.path.dirname(abs_file_path)):
return abs_file_path
else:
if not os.path.isdir(os.path.dirname(abs_file_path)):
raise ValueError(
f"The directory '{os.path.dirname(abs_file_path)}' in which the file should be created does not exist."
)

return abs_file_path

def redefine_temp_location(path):
"""
Redfine the location where the temp arrays are written to.

def redefine_temp_location(path: str) -> str:
"""Redefine the location where the temp arrays are written to.
Parameters
----------
Expand All @@ -113,28 +109,21 @@ def redefine_temp_location(path):
"""

global _TEMP_DIR, TEMP_DIR_NAME
global TEMP_DIR_NAME

logging.warning(
f"""Folder {TEMP_DIR_NAME} with temp mmap arrays is being deleted. All existing temp mmapp arrays will be unusable!"""
)
_clear()

# cleaup old temporary directory
# cleanup old temporary directory
shutil.rmtree(TEMP_DIR_NAME, ignore_errors=True)

# create new tempfile at desired location
_TEMP_DIR = tempfile.TemporaryDirectory(prefix=os.path.join(path, "temp_mmap_"))
TEMP_DIR_NAME = _TEMP_DIR.name
temp_dir_name = _init_temp_dir(prefix=os.path.join(path, "temp_mmap_"))

logging.warning(
f"""New temp folder location. Temp mmap arrays are written to {TEMP_DIR_NAME}. Cleanup of this folder is OS dependant, and might need to be triggered manually!"""
)

return TEMP_DIR_NAME
return temp_dir_name


def array(shape: tuple, dtype: np.dtype, tmp_dir_abs_path: str = None) -> np.ndarray:
"""Create a writable temporary mmapped array.
"""Create a writable temporary memory-mapped array.
Parameters
----------
Expand All @@ -150,7 +139,7 @@ def array(shape: tuple, dtype: np.dtype, tmp_dir_abs_path: str = None) -> np.nda
Returns
-------
type
A writable temporary mmapped array.
A writable temporary memory-mapped array.
"""
temp_dir_name = _init_temp_dir()

Expand All @@ -160,17 +149,20 @@ def array(shape: tuple, dtype: np.dtype, tmp_dir_abs_path: str = None) -> np.nda
_change_temp_dir_location(tmp_dir_abs_path)
temp_dir_name = tmp_dir_abs_path

temp_file_name = os.path.join(
temp_file_path = os.path.join(
temp_dir_name, f"temp_mmap_{np.random.randint(2**63, dtype=np.int64)}.hdf"
)

with h5py.File(temp_file_name, "w") as hdf_file:
array = hdf_file.create_dataset("array", shape=shape, dtype=dtype)
array[0] = np.string_("") if isinstance(dtype, np.dtypes.StrDType) else 0
offset = array.id.get_offset()
with h5py.File(temp_file_path, "w") as hdf_file:
created_array = hdf_file.create_dataset("array", shape=shape, dtype=dtype)
created_array[0] = (
np.string_("") if isinstance(dtype, np.dtypes.StrDType) else 0
)
offset = created_array.id.get_offset()

with open(temp_file_name, "rb+") as raw_hdf_file:
with open(temp_file_path, "rb+") as raw_hdf_file:
mmap_obj = mmap.mmap(raw_hdf_file.fileno(), 0, access=mmap.ACCESS_WRITE)

return np.frombuffer(
mmap_obj, dtype=dtype, count=np.prod(shape), offset=offset
).reshape(shape)
Expand Down Expand Up @@ -218,23 +210,23 @@ def create_empty_mmap(

# if path does not exist generate a random file name in the TEMP directory
if file_path is None:
temp_file_name = os.path.join(
temp_file_path = os.path.join(
temp_dir_name, f"temp_mmap_{np.random.randint(2**63, dtype=np.int64)}.hdf"
)
else:
temp_file_name = _get_file_location(
file_path, overwrite=False
) # TODO overwrite=overwrite
temp_file_path = _get_file_location(file_path, overwrite=overwrite)

with h5py.File(temp_file_name, "w") as hdf_file:
array = hdf_file.create_dataset("array", shape=shape, dtype=dtype)
array[0] = np.string_("") if isinstance(dtype, np.dtypes.StrDType) else 0
with h5py.File(temp_file_path, "w") as hdf_file:
created_array = hdf_file.create_dataset("array", shape=shape, dtype=dtype)
created_array[0] = (
np.string_("") if isinstance(dtype, np.dtypes.StrDType) else 0
)

return temp_file_name # TODO temp_file_path
return temp_file_path


def mmap_array_from_path(hdf_file: str) -> np.ndarray:
"""reconnect to an exisiting HDF5 file to generate a writable temporary mmapped array.
"""reconnect to an exisiting HDF5 file to generate a writable temporary memory-mapped array.
Parameters
----------
Expand All @@ -244,17 +236,17 @@ def mmap_array_from_path(hdf_file: str) -> np.ndarray:
Returns
-------
type
A writable temporary mmapped array.
A writable temporary memory-mapped array.
"""

path = os.path.join(hdf_file)

# read parameters required to reinitialize the mmap object
with h5py.File(path, "r") as hdf_file:
array = hdf_file["array"]
offset = array.id.get_offset()
shape = array.shape
dtype = array.dtype
array_ = hdf_file["array"]
offset = array_.id.get_offset()
shape = array_.shape
dtype = array_.dtype

# reinitialize the mmap object
with open(path, "rb+") as raw_hdf_file:
Expand All @@ -265,7 +257,7 @@ def mmap_array_from_path(hdf_file: str) -> np.ndarray:


def zeros(shape: tuple, dtype: np.dtype) -> np.ndarray:
"""Create a writable temporary mmapped array filled with zeros.
"""Create a writable temporary memory-mapped array filled with zeros.
Parameters
----------
Expand All @@ -277,15 +269,15 @@ def zeros(shape: tuple, dtype: np.dtype) -> np.ndarray:
Returns
-------
type
A writable temporary mmapped array filled with zeros.
A writable temporary memory-mapped array filled with zeros.
"""
_array = array(shape, dtype)
_array[:] = 0
return _array
array_ = array(shape, dtype)
array_[:] = 0
return array_


def ones(shape: tuple, dtype: np.dtype) -> np.ndarray:
"""Create a writable temporary mmapped array filled with ones.
"""Create a writable temporary memory-mapped array filled with ones.
Parameters
----------
Expand All @@ -297,33 +289,37 @@ def ones(shape: tuple, dtype: np.dtype) -> np.ndarray:
Returns
-------
type
A writable temporary mmapped array filled with ones.
A writable temporary memory-mapped array filled with ones.
"""
_array = array(shape, dtype)
_array[:] = 1
return _array
array_ = array(shape, dtype)
array_[:] = 1
return array_


@atexit.register
def _clear() -> None:
"""Reset the temporary folder containing temp mmapped arrays.
"""Reset the temporary folder containing temp memory-mapped arrays.
WARNING: All existing temp mmapp arrays will be unusable!
"""
global _TEMP_DIR, TEMP_DIR_NAME

if _TEMP_DIR is not None:
logging.warning(
f"Folder {TEMP_DIR_NAME} with temp mmap arrays is being deleted. "
"All existing temp mmapp arrays will be unusable!"
logging.info(
f"Temporary folder {TEMP_DIR_NAME} with memory-mapped arrays is being deleted. "
"All existing memory-mapped arrays will be unusable!"
)

del _TEMP_DIR
_TEMP_DIR = None # TempDirectory will take care of the cleanup
if os.path.exists(TEMP_DIR_NAME):
logging.warning(
f"Temporary folder {TEMP_DIR_NAME} still exists, manual removal necessary."
)
TEMP_DIR_NAME = None


def clear() -> str:
"""Reset the temporary folder containing temp mmapped arrays and create a new one.
"""Reset the temporary folder containing temp memory-mapped arrays and create a new one.
WARNING: All existing temp mmapp arrays will be unusable!
Expand Down
69 changes: 66 additions & 3 deletions tests/unit/io/test_tempmmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,8 @@ def teardown_function(function):
tempmmap = sys.modules["alphabase.io.tempmmap"]
tempmmap._clear() # simulating @atexit.register

# # later:
# assert tempmmap._TEMP_DIR is None
# assert tempmmap.TEMP_DIR_NAME is None
assert tempmmap._TEMP_DIR is None
assert tempmmap.TEMP_DIR_NAME is None

del sys.modules["alphabase.io.tempmmap"]

Expand All @@ -46,6 +45,22 @@ def test_create_array():
assert tempmmap._TEMP_DIR is not None


def test_check_temp_dir_deletion():
"""Test that tempdir is deleted at exit."""
tempmmap = sys.modules["alphabase.io.tempmmap"]

_ = tempmmap.array((5, 5), np.float32)
temp_dir_name = tempmmap._TEMP_DIR.name

# check presence of temp dir first
assert os.path.exists(temp_dir_name)

# when
tempmmap._clear()

assert not os.path.exists(temp_dir_name)


def test_create_array_with_custom_temp_dir():
"""Test creating and accessing an array with custom temp dir."""
tempmmap = sys.modules["alphabase.io.tempmmap"]
Expand All @@ -66,6 +81,33 @@ def test_create_array_with_custom_temp_dir():
assert temp_dir == tempmmap.TEMP_DIR_NAME


def test_create_array_with_custom_temp_dir_nonexisting():
"""Test creating an array with custom temp dir: not existing."""
tempmmap = sys.modules["alphabase.io.tempmmap"]

temp_dir = "nonexisting_dir"
# when
with pytest.raises(
ValueError,
match="The directory 'nonexisting_dir' in which the file should be created does not exist.",
):
_ = tempmmap.array((5, 5), np.int32, tmp_dir_abs_path=temp_dir)


def test_create_array_with_custom_temp_dir_not_a_dir():
"""Test creating an array with custom temp dir: not a directory."""
tempmmap = sys.modules["alphabase.io.tempmmap"]

with tempfile.TemporaryFile() as temp_file, pytest.raises(
ValueError,
match=f"The path '{temp_file.name}' does not point to a directory.",
):
# when
_ = tempmmap.create_empty_mmap(
(5, 5), np.int32, tmp_dir_abs_path=temp_file.name
)


def test_mmap_array_from_path():
"""Test reconnecting to an existing array."""
tempmmap = sys.modules["alphabase.io.tempmmap"]
Expand Down Expand Up @@ -156,6 +198,27 @@ def test_create_empty_with_custom_file_path():
assert temp_dir != tempmmap.TEMP_DIR_NAME


def test_create_empty_with_custom_file_path_exists():
"""Test creating and accessing an empty array with custom file path that exists."""
tempmmap = sys.modules["alphabase.io.tempmmap"]

# when
with tempfile.TemporaryFile() as temp_file, pytest.raises(
ValueError,
match=f"The file '{temp_file.name}' already exists. Set overwrite to True to overwrite the file or choose a different name.",
):
_ = tempmmap.create_empty_mmap((5, 5), np.float32, file_path=temp_file.name)

# when 2
with tempfile.TemporaryDirectory() as temp_dir, open(
f"{temp_dir}/temp_mmap.hdf", "w"
) as temp_file:
_ = tempmmap.create_empty_mmap(
(5, 5), np.float32, file_path=temp_file.name, overwrite=True
)
# did not raise -> OK


def test_create_empty_with_custom_file_path_error_cases():
"""Test creating and accessing an empty array: error cases."""
tempmmap = sys.modules["alphabase.io.tempmmap"]
Expand Down

0 comments on commit 1d6e901

Please sign in to comment.