From b9bcca0ec86a9aad788202c37481723e71ae4b40 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Thu, 8 Aug 2024 20:52:47 +0200 Subject: [PATCH] BUG: Do not accidentally store dtype metadata in ``np.save`` We had logic in place to drop (most) metadata, but the change had a small bug: During saving, we were still using the one with metadata... Maybe doesn't quite close it, but big enough of an improvement for now, I think, so Closes gh-14142 --- numpy/lib/format.py | 2 ++ numpy/lib/tests/test_format.py | 34 ++++++++++++++++------------------ numpy/lib/tests/test_utils.py | 2 +- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/numpy/lib/format.py b/numpy/lib/format.py index 8e14dfe4bcab..a90403459848 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -271,6 +271,8 @@ def dtype_to_descr(dtype): warnings.warn("metadata on a dtype is not saved to an npy/npz. " "Use another format (such as pickle) to store it.", UserWarning, stacklevel=2) + dtype = new_dtype + if dtype.names is not None: # This is a record array. The .descr is fine. XXX: parts of the # record array with an empty name, like padding bytes, still get diff --git a/numpy/lib/tests/test_format.py b/numpy/lib/tests/test_format.py index 31352864b7e2..bb262e048cba 100644 --- a/numpy/lib/tests/test_format.py +++ b/numpy/lib/tests/test_format.py @@ -998,32 +998,30 @@ def test_header_growth_axis(): assert len(fp.getvalue()) == expected_header_length -@pytest.mark.parametrize('dt, fail', [ - (np.dtype({'names': ['a', 'b'], 'formats': [float, np.dtype('S3', - metadata={'some': 'stuff'})]}), True), - (np.dtype(int, metadata={'some': 'stuff'}), False), - (np.dtype([('subarray', (int, (2,)))], metadata={'some': 'stuff'}), False), +@pytest.mark.parametrize('dt', [ + np.dtype({'names': ['a', 'b'], 'formats': [float, np.dtype('S3', + metadata={'some': 'stuff'})]}), + np.dtype(int, metadata={'some': 'stuff'}), + np.dtype([('subarray', (int, (2,)))], metadata={'some': 'stuff'}), # recursive: metadata on the field of a dtype - (np.dtype({'names': ['a', 'b'], 'formats': [ + np.dtype({'names': ['a', 'b'], 'formats': [ float, np.dtype({'names': ['c'], 'formats': [np.dtype(int, metadata={})]}) - ]}), False) + ]}), ]) @pytest.mark.skipif(IS_PYPY and sys.implementation.version <= (7, 3, 8), reason="PyPy bug in error formatting") -def test_metadata_dtype(dt, fail): +def test_metadata_dtype(dt): # gh-14142 arr = np.ones(10, dtype=dt) buf = BytesIO() with assert_warns(UserWarning): np.save(buf, arr) buf.seek(0) - if fail: - with assert_raises(ValueError): - np.load(buf) - else: - arr2 = np.load(buf) - # BUG: assert_array_equal does not check metadata - from numpy.lib._utils_impl import drop_metadata - assert_array_equal(arr, arr2) - assert drop_metadata(arr.dtype) is not arr.dtype - assert drop_metadata(arr2.dtype) is arr2.dtype + + # Loading should work (metadata was stripped): + arr2 = np.load(buf) + # BUG: assert_array_equal does not check metadata + from numpy.lib._utils_impl import drop_metadata + assert_array_equal(arr, arr2) + assert drop_metadata(arr.dtype) is not arr.dtype + assert drop_metadata(arr2.dtype) is arr2.dtype diff --git a/numpy/lib/tests/test_utils.py b/numpy/lib/tests/test_utils.py index e2f72ac90c92..644912d941e3 100644 --- a/numpy/lib/tests/test_utils.py +++ b/numpy/lib/tests/test_utils.py @@ -43,7 +43,7 @@ def _compare_dtypes(dt1, dt2): assert dt_m.metadata is None assert dt_m['l1'].metadata is None assert dt_m['l1']['l2'].metadata is None - + # alignment dt = np.dtype([('x', '