From cc1117bacf5c39bce6272a0c423be0512242cc5a Mon Sep 17 00:00:00 2001 From: Kevin Michael Dalton Date: Wed, 18 Sep 2024 16:26:04 -0700 Subject: [PATCH] make cell/sg optional. improve docstring --- reciprocalspaceship/io/dials.py | 38 ++++++++++++++++++++++----------- tests/io/test_dials.py | 8 ++++++- 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/reciprocalspaceship/io/dials.py b/reciprocalspaceship/io/dials.py index 61aea215..0009d8f7 100644 --- a/reciprocalspaceship/io/dials.py +++ b/reciprocalspaceship/io/dials.py @@ -84,7 +84,7 @@ def _concat(refl_data): ds = refl_data else: refl_data = [ds for ds in refl_data if ds is not None] - ds = rs.concat(refl_data) + ds = rs.concat(refl_data, check_isomorphous=False) expt_ids = set(ds.BATCH) LOGGER.debug(f"Found {len(ds)} refls from {len(expt_ids)} expts.") LOGGER.debug("Mapping batch column.") @@ -191,8 +191,8 @@ def _read_dials_stills_ray(fnames, unitcell, spacegroup, numjobs=10, extra_cols= @spacegroupify def read_dials_stills( fnames, - unitcell, - spacegroup, + unitcell=None, + spacegroup=None, numjobs=10, parallel_backend=None, extra_cols=None, @@ -200,21 +200,35 @@ def read_dials_stills( comm=None, ): """ + Read reflections from still images processed by DIALS from fnames and return + them as a DataSet. This method does not convert columns to native rs MTZ dtypes. + Parameters ---------- - fnames: filenames - unitcell: unit cell tuple, Gemmi unit cell obj - spacegroup: space group symbol eg P4 - numjobs: if backend==ray, specify the number of jobs (ignored if backend==mpi) - parallel_backend: ray, mpi, or None - extra_cols: list of additional column names to extract from the refltables. By default, this method will search for + fnames : list or tuple + A list or tuple of filenames (strings). + unitcell : gemmi.UnitCell or similar (optional) + The unit cell assigned to the returned dataset. + spacegroup : gemmi.SpaceGroup or similar (optional) + The spacegroup assigned to the returned dataset. + numjobs : int + If backend==ray, specify the number of jobs (ignored if backend==mpi). + parallel_backend : string (optional) + "ray", "mpi", or None for serial. + extra_cols : list (optional) + Optional list of additional column names to extract from the refltables. By default, this method will search for miller_index, id, s1, xyzcal.px, intensity.sum.value, intensity.sum.variance, delpsical.rad - verbose: whether to print stdout - comm: optionally override the communicator used by backend='mpi' + verbose : bool + Whether to print logging info to stdout + comm : mpi4py.MPI.Comm + Optionally override the communicator used by backend='mpi' Returns ------- - rs dataset (pandas Dataframe) + ds : rs.DataSet + The dataset containing reflection info aggregated from fnames. This method will not convert any of the + columns to native rs MTZ dtypes. DIALS data are natively double precision (64-bit). Converting to MTZ + will downcast them to 32-bit. Use ds.infer_mtz_dtypes() to convert to native rs dtypes if you required. """ _set_logger(verbose) diff --git a/tests/io/test_dials.py b/tests/io/test_dials.py index 40212243..72c0fe0d 100644 --- a/tests/io/test_dials.py +++ b/tests/io/test_dials.py @@ -106,7 +106,7 @@ def make_refls(unit_cell, sg, seed=8675309, file_prefix=""): def test_dials_reader(parallel_backend, verbose=False): unit_cell = 78, 78, 235, 90, 90, 120 - sg = "P6522" + sg = "P 65 2 2" comm = None if parallel_backend == "mpi": comm = DummyComm() @@ -162,6 +162,12 @@ def test_dials_reader(parallel_backend, verbose=False): assert np.allclose(df_m.I, df_m["intensity.sum.value"]) assert np.allclose(df_m.varI, df_m["intensity.sum.sigma"] ** 2) + # Test that you don't need cell and symmetry to load the tables + ds =read_dials_stills( + pack_names, parallel_backend=None, numjobs=1, verbose=verbose + ) + assert ds.cell is None + assert ds.spacegroup is None def test_verbosity(): with tempfile.TemporaryDirectory() as tdir: