diff --git a/parsnip/_utils.py b/parsnip/_utils.py new file mode 100644 index 0000000..b0b119d --- /dev/null +++ b/parsnip/_utils.py @@ -0,0 +1,14 @@ +class ParseWarning(Warning): + def __init__(self, message): + self.message = message + + def __str__(self): + return repr(self.message) + + +class ParseError(RuntimeError): + def __init__(self, message): + self.message = message + + def __str__(self): + return repr(self.message) diff --git a/parsnip/parse.py b/parsnip/parse.py new file mode 100644 index 0000000..baca349 --- /dev/null +++ b/parsnip/parse.py @@ -0,0 +1,175 @@ +"""CIF parsing tools.""" + +import warnings + +import numpy as np + +from ._utils import ParseError, ParseWarning +from .patterns import LineCleaner, cast_array_to_float + + +def _remove_comments_from_line(line): + return line.split("#")[0].strip() + + +def read_table( + filename: str, + keys: str, + filter_line: tuple[tuple[str, str]] = ((r",\s+", ",")), + keep_original_key_order=False, +) -> np.ndarray[str]: + r"""Extract data from a CIF file loop_ table. + + CIF files store tabular data as whitespace-delimited blocks that start with `loop_`. + Keys are kept at the top of the table, and the vertical position of keys corresponds + to the horizontal position of the column storing the data for that key. The end of + the table is not necessarily marked: instead, the script detects when the table + format is exited. + + For example: + + ``` + loop_ + _space_group_symop_id + _space_group_symop_operation_xyz + 1 x,y,z + 2 -x,y,-z+1/2 + 3 -x,-y,-z + 4 x,-y,z+1/2 + 5 x+1/2,y+1/2,z + 6 -x+1/2,y+1/2,-z+1/2 + 7 -x+1/2,-y+1/2,-z + 8 x+1/2,-y+1/2,z+1/2 + + ``` + + Only data columns corresponding to a key in the input keys list will be returned. + + Note that this function will ONLY return data from a single table. If keys are + provided that correspond to data from multiple tables, only the first table will + be read. + + The ``filter_line`` argument allows for dynamic input creation of regex filters to + apply to each line that contains data to be saved. The default value is + ``((",\s+",","))``, which helps differentiate between individual data fragments + seperated by commas and whitespace characters, and other sections of the line that + are also whitespace separated. Adding another tuple to remove single quotes can + also be helpful: try ``((",\s+",","),(",",""))`` to achieve this. To disable the + feature entirely, pass in a tuple of empty strings: ``("","")``. Note that doing so + will cause errors if the table contains non-delimiting whitespaces. + + Args: + filename (str): The name of the .cif file to be parsed. + keys (tuple[str]): The names of the keys to be parsed. + filter_line (tuple[tuple[str]], optional): + A tuple of strings that are compiled to a regex filter and applied to each + data line. (Default value: ((r",\s+",",")) ) + keep_original_key_order (bool, optional): + When True, preserve the order of keys in the table from the cif file. + When False, return columns of data in order of the input ``keys`` arg. + (Default value: False) + + Returns: + np.ndarray[str]: A numpy array of the data as strings. + """ + with open(filename) as f: + tables = f.read().split("loop_") + + line_cleaner = LineCleaner(filter_line) + nontable_line_prefixes = ("_", "#") + + for table in tables: + lines = table.strip().split("\n") + in_header = True + data_column_indices, data, column_order = [], [], [] + + for line_number, line in enumerate(lines): + # Check for invalid blank lines in the table header + if in_header and data_column_indices and line == "": + raise ParseError( + "Whitespace may not be used in between keys in the table header. " + "See https://www.iucr.org/resources/cif/spec/version1.1/cifsyntax#general" + ", section 7 for more details." + ) + + # We will get errors if there is a comment after the loop_ block that + # contains our data. This is questionably legal, but very uncommon + + line = _remove_comments_from_line(line) + + # Save current key position if it is one of the keys we want. + if in_header and (line in keys): + data_column_indices.append(line_number) + if not keep_original_key_order: + column_order.append(keys.index(line)) + continue + + # If we exit the header and enter the table body + if data_column_indices and (line[:1] not in nontable_line_prefixes): + in_header = False # Exit the header and start writing data + clean_line = line_cleaner(line) + split_line = clean_line.split() + + # Only add data if the line has at least as many columns as required. + n_cols_found, n_cols_expected = ( + len(split_line), + len(data_column_indices), + ) + if n_cols_found >= n_cols_expected: + data.append(split_line) + elif split_line != [] and n_cols_found < n_cols_expected: + warnings.warn( + f"Data line is a fragment and will be skipped: (expected line " + f"with {n_cols_expected} values, got {split_line}).", + ParseWarning, + stacklevel=2, + ) + continue + elif (not in_header) and (line[:1] == "_"): + break + if data_column_indices: + break + + if not keep_original_key_order: + # Reorder the column indices to match the order of the input keys + data_column_indices = np.array(data_column_indices)[np.argsort(column_order)] + + if len(column_order) != len(keys): + missing_keys = {key for i, key in enumerate(keys) if i not in column_order} + warnings.warn( + f"Keys {missing_keys} were not found in the table.", + ParseWarning, + stacklevel=2, + ) + return np.atleast_2d(data)[:, data_column_indices] + + +def read_fractional_positions( + filename: str, + filter_line: tuple[tuple[str, str]] = ((r",\s+", ",")), +): + r"""Extract the fractional X,Y,Z coordinates from a CIF file. + + Args: + filename (str): The name of the .cif file to be parsed. + filter_line (tuple[tuple[str]], optional): + A tuple of strings that are compiled to a regex filter and applied to each + data line. (Default value: ((r",\s+",",")) ) + + Returns: + np.array[np.float32]: Fractional X,Y,Z coordinates of the unit cell. + """ + xyz_keys = ("_atom_site_fract_x", "_atom_site_fract_y", "_atom_site_fract_z") + # Once #6 is added, we should warnings.catch_warnings(action="error") + xyz_data = read_table( + filename=filename, + keys=xyz_keys, + ) + + xyz_data = cast_array_to_float(arr=xyz_data, dtype=np.float32) + + # Validate results + assert xyz_data.shape[1] == 3 + assert xyz_data.dtype == np.float32 + + return xyz_data diff --git a/parsnip/parsemm.py b/parsnip/parsemm.py new file mode 100644 index 0000000..6f42ec4 --- /dev/null +++ b/parsnip/parsemm.py @@ -0,0 +1,6 @@ +"""mmCIF parsing tools.""" +if __name__ == "__main__": + raise NotImplementedError( + "mmCIF functionality has not been implemented.\n" + "See https://github.com/glotzerlab/parsnip/issues/1 for more details." + ) diff --git a/parsnip/patterns.py b/parsnip/patterns.py new file mode 100644 index 0000000..f81923a --- /dev/null +++ b/parsnip/patterns.py @@ -0,0 +1,78 @@ +"""Functions and classes to process string data.""" +import re + +import numpy as np + +# Compile in common patterns for cif parsing. These are reused throughout the package. +_multiple_whitespace_pattern = re.compile(r"\s+") +_comma_prune_spaces = re.compile(r",\s+") + + +def compile_pattern_from_strings(filter_patterns: tuple[str]): + """Return a regex pattern that matches any of the characters in the filter. + + Args: + filter_patterns (list[str]): Description + + Returns: + re.Pattern: Pattern matching any of the input characters. + """ + return re.compile("|".join(filter_patterns)) + + +def cast_array_to_float(arr: np.ndarray, dtype: type = np.float32): + """Cast a Numpy array to a dtype, pruning significant digits from numerical values. + + Args: + arr (np.array): Array of data to convert + dtype (type, optional): dtype to cast array to (Default value: np.float32). + + Returns: + np.array[float]: Array with new dtype and no significant digit information. + """ + return np.char.partition(arr, "(")[..., 0].astype(dtype) + + +class LineCleaner: + """Simple object to apply a series of regex patterns to a string. + + To intialize a line cleaner, pass in a tuple of strings of the form + ``(pattern, replacement)``. Patterns are compiled on initialization to accelerate + future processing. + + Args: + patterns (tuple[tuple[str,str]]): Tuple of tuples of strings. + The first item in each tuple is the pattern to match, and the second item is + what that pattern will be replaced with. + """ + + def __init__(self, patterns: tuple[tuple[str, str]]): + self.patterns, self.replacements = [], [] + + # If we only have a single tuple + if isinstance(patterns[0], str): + pattern, replacement = patterns + self.patterns.append(re.compile(pattern)) + + self.replacements.append(replacement) + else: + for pattern, replacement in patterns: + self.patterns.append(re.compile(pattern)) + + self.replacements.append(replacement) + + def __call__(self, line: str): + """Apply patterns defined on initialization of the object to the string. + + ``re.sub(pattern,line)`` is run for each pattern (in order) in self.patterns, + which is defined on initialization. + + Args: + line (str): String to apply patterns to. + + Returns: + str: The substituted lines. + """ + for pattern, replacement in zip(self.patterns, self.replacements): + line = pattern.sub(replacement, line) + return line diff --git a/pyproject.toml b/pyproject.toml index 00df5b6..8e6412d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta" [project] name = "parsnip" -version = "0.1.0" -requires-python = ">=3.9" # This could be updated to a minimum of 3.6 +version = "0.0.2" +requires-python = ">=3.9" description = "Minimal library for parsing CIF/mmCIF files in Python." readme = "README.md" license = { file = "LICENSE" } @@ -61,7 +61,7 @@ select = [ "PIE794", # enable c-style single definition of variables ] ignore = [ - "S101", # Assertions are agood thing + "S101", # Assertions are a good thing "D105", # Magic methods don't require documentation. "D107", # __init__ methods don't require documentation. "SIM116", # Allow certain branched if statements diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..5852b37 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,82 @@ +import dataclasses +import os + +import pytest + +# ruff: noqa: N816. Allow mixed-case global variables + + +@dataclasses.dataclass +class CifData: + """Class to hold the filename and stored keys for a CIF file.""" + + filename: str + symop_keys: tuple[str] + atom_site_keys: tuple[str] + + +box_keys = ( + "_cell_angle_alpha", + "_cell_angle_beta", + "_cell_angle_gamma", + "_cell_length_a", + "_cell_length_b", + "_cell_length_c", +) +atom_site_keys = ( + "_atom_site_label", + "_atom_site_type_symbol", + "_atom_site_fract_x", + "_atom_site_fract_y", + "_atom_site_fract_z", + "_atom_site_occupancy", +) + + +data_file_path = os.path.dirname(__file__) + "/sample_data/" + + +aflow_mC24 = CifData( + filename=data_file_path + "AFLOW_mC24.cif", + symop_keys=("_space_group_symop_id", "_space_group_symop_operation_xyz"), + atom_site_keys=atom_site_keys, +) + +bisd_Ccmm = CifData( + filename=data_file_path + "B-IncStrDb_Ccmm.cif", + symop_keys=("_space_group_symop_operation_xyz", "_space_group_symop_id"), + # Our code works with extra keys, but gemmi does not! + atom_site_keys=(atom_site_keys[0], *atom_site_keys[2:]), +) + +ccdc_Pm3m = CifData( + filename=data_file_path + "CCDC_1446529_Pm-3m.cif", + symop_keys=("_space_group_symop_operation_xyz",), + atom_site_keys=sorted(atom_site_keys), +) + +cod_aP16 = CifData( + filename=data_file_path + "COD_1540955_aP16.cif", + symop_keys=("_symmetry_equiv_pos_as_xyz",), + atom_site_keys=atom_site_keys, +) + +bad_cif = CifData( + filename=data_file_path + "INTENTIONALLY_BAD_CIF.cif", + symop_keys=("_space_group_symop_id", "_space_group_symop_operation_xyz"), + atom_site_keys=( + "_atom_site", + "_atom_site_type_symbol", + "_atom_site_symmetry_multiplicity", + "_atom_si te", + "_atom_site_fract_z", + "_this_key_does_not_exist", + ), +) + +cif_data_array = [aflow_mC24, bisd_Ccmm, ccdc_Pm3m, cod_aP16] +cif_files_mark = pytest.mark.parametrize( + argnames="cif_data", + argvalues=cif_data_array, + ids=[cif.filename.split("/")[-1] for cif in cif_data_array], +) diff --git a/tests/requirements.txt b/tests/requirements.txt new file mode 100644 index 0000000..b6d2376 --- /dev/null +++ b/tests/requirements.txt @@ -0,0 +1 @@ +gemmi diff --git a/tests/sample_data/AFLOW_mC24.cif b/tests/sample_data/AFLOW_mC24.cif new file mode 100644 index 0000000..d1b33f8 --- /dev/null +++ b/tests/sample_data/AFLOW_mC24.cif @@ -0,0 +1,82 @@ +# Data taken from AFLOW: ENCYCLOPEDIA OF CRYSTALLOGRAPHIC PROTOTYPES +# All credit goes to the following: +# M. J. Mehl, D. Hicks, C. Toher, O. Levy, R. M. Hanson, G. L. W. Hart, and +# S. Curtarolo, The AFLOW Library of Crystallographic Prototypes: Part 1, +# Comp. Mat. Sci. 136, S1-S828 (2017). (doi=10.1016/j.commatsci.2017.01.017) +# D. Hicks, M. J. Mehl, E. Gossett, C. Toher, O. Levy, R. M. Hanson, G. L. W. Hart, +# and S. Curtarolo, The AFLOW Library of Crystallographic Prototypes: Part 2, +# Comp. Mat. Sci. 161, S1-S1011 (2019). (doi=10.1016/j.commatsci.2018.10.043) +# D. Hicks, M.J. Mehl, M. Esters, C. Oses, O. Levy, G.L.W. Hart, C. Toher, and +# S. Curtarolo, The AFLOW Library of Crystallographic Prototypes: Part 3, +# Comp. Mat. Sci. 199, 110450 (2021). (doi=10.1016/j.commatsci.2021.110450) + +# CIF file +data_findsym-output +_audit_creation_method FINDSYM + +_chemical_name_mineral 'Clinocervantite' +_chemical_formula_sum 'O2 Sb' + +loop_ +_publ_author_name + 'R. Basso' + 'G. Lucchetti' + 'L. Zefiro' + 'A. Palenzona' +_journal_name_full_name +; + European Journal of Mineralogy +; +_journal_volume 11 +_journal_year 1999 +_journal_page_first 95 +_journal_page_last 100 +_publ_Section_title +; + Clinocervantite, $\beta$-Sb$_{2}$O$_{4}$, the natural monoclinic polymorph of cervantite from the Cetine mine, Siena, Italy +; + +# Found in The American Mineralogist Crystal Structure Database, 2003 + +_aflow_title 'Clinocervantite ($\beta$-Sb$_{2}$O$_{4}$) Structure' +_aflow_proto 'A2B_mC24_15_2f_ce' +_aflow_params 'a,b/a,c/a,\beta,y_{2},x_{3},y_{3},z_{3},x_{4},y_{4},z_{4}' +_aflow_params_values '12.061,0.40096177763,0.446314567615,103.12,0.2851,0.1918,0.0517,0.6746,0.0939,0.4122,-0.0351' +_aflow_Strukturbericht 'None' +_aflow_Pearson 'mC24' + +_symmetry_space_group_name_H-M "C 1 2/c 1" +_symmetry_Int_Tables_number 15 + +_cell_length_a 12.06100 +_cell_length_b 4.83600 +_cell_length_c 5.38300 +_cell_angle_alpha 90.00000 +_cell_angle_beta 103.12000 +_cell_angle_gamma 90.00000 + +loop_ +_space_group_symop_id +_space_group_symop_operation_xyz +1 x,y,z +2 -x,y,-z+1/2 +3 -x,-y,-z +4 x,-y,z+1/2 +5 x+1/2,y+1/2,z +6 -x+1/2,y+1/2,-z+1/2 +7 -x+1/2,-y+1/2,-z +8 x+1/2,-y+1/2,z+1/2 + +loop_ +_atom_site_label +_atom_site_type_symbol +_atom_site_symmetry_multiplicity +_atom_site_Wyckoff_label +_atom_site_fract_x +_atom_site_fract_y +_atom_site_fract_z +_atom_site_occupancy +Sb1 Sb 4 c 0.25000 0.25000 0.00000 1.00000 +Sb2 Sb 4 e 0.00000 0.28510 0.25000 1.00000 +O1 O 8 f 0.19180 0.05170 0.67460 1.00000 +O2 O 8 f 0.09390 0.41220 -0.03510 1.00000 diff --git a/tests/sample_data/B-IncStrDb_Ccmm.cif b/tests/sample_data/B-IncStrDb_Ccmm.cif new file mode 100644 index 0000000..5ae4844 --- /dev/null +++ b/tests/sample_data/B-IncStrDb_Ccmm.cif @@ -0,0 +1,99 @@ +#\#CIF_1.0 +################################################################################ +# # +# This CIF is a part of the B-IncStrDB # +# (Bilbao Incommensurate Structures Database) # +# http://www.cryst.ehu.eus/bincstrdb/ # +# # +# Please note that the structure of the CIF file may differ from the one # +# deposited, as it may have been modified to comply with the standard. The # +# file has been validated against official dictionaries as well as local # +# dictionaries including non-standard data names used by SHELXL (Sheldrick, # +# G. M. (2008). Acta Cryst. A 64, 112-122) and JANA (Petricek, V., Dusek, M. # +# and Palatinus, L. (2014), Z. Kristallogr. 229, 345-352). # +# # +# For comments and/or criticisms, please e-mail to administrador-bcs@ehu.es # +# # +################################################################################ +data_mod1023433188 + +_journal_name_full 'Acta Crystallographica Section C' +_journal_volume 39 +_journal_year 1983 +_journal_page_first 678 +_journal_page_last 680 +_journal_paper_doi https://doi.org/10.1107/S0108270183005909 +_publ_contact_author_name 'Overeijnder, H.' +_publ_contact_author_email ? + +loop_ + _publ_author_name + 'van den Berg, A.J.' + 'Overeijnder, H.' + 'Tuinstra, F.' + +_publ_section_title +;The average structure of K~2~MoO~4~ in the incommensurate phase at 633K +; + +_exptl_crystal_type_of_structure cryst +_diffrn_ambient_temperature 633 +_diffrn_source x-ray + +_exptl_special_details +;Guinier-Lenne camera. Peak intensities estimated with an optical densitometer +; + +_chemical_formula_sum 'K6 Mo4 O16' + +_cell_length_a 10.933(3) +_cell_length_b 6.312(2) +_cell_length_c 7.944(2) +_cell_angle_alpha 90 +_cell_angle_beta 90 +_cell_angle_gamma 90 +_cell_volume 548.2 + +_cell_formula_units_Z 4 + +_space_group_crystal_system orthorhombic +_space_group_name_H-M_alt 'C c m m' + +loop_ +_space_group_symop_id +_space_group_symop_operation_xyz +1 x,y,z +2 -x,-y,z+1/2 +3 x,-y,-z+1/2 +4 -x,y,-z +5 x+1/2,y+1/2,z +6 -x+1/2,-y+1/2,z+1/2 +7 x+1/2,-y+1/2,-z+1/2 +8 -x+1/2,y+1/2,-z +9 -x,-y,-z +10 x,y,-z+1/2 +11 -x,y,z+1/2 +12 x,-y,z +13 -x+1/2,-y+1/2,-z +14 x+1/2,y+1/2,-z+1/2 +15 -x+1/2,y+1/2,z+1/2 +16 x+1/2,-y+1/2,z + +_reflns_number_gt 71 +_refine_ls_R_factor_gt ? +_refine_ls_wR_factor_gt 0.063 + +loop_ + _atom_site_label + _atom_site_occupancy + _atom_site_fract_x + _atom_site_fract_y + _atom_site_fract_z + _atom_site_U_iso_or_equiv + _atom_site_adp_type + K(1) 1 0 0 0 ? Uiso + K(2) 1 0.33333 0 0.320(3) ? Uiso + Mo 1 0.33333 0 0.760(4) ? Uiso + O(1) 1 0.179(1) 0 0.791(4) ? Uiso + O(2) 1 0.406(2) 0 0.950(4) ? Uiso + O(3) 1 0.374(1) 0.221(1) 0.650(4) ? Uiso diff --git a/tests/sample_data/CCDC_1446529_Pm-3m.cif b/tests/sample_data/CCDC_1446529_Pm-3m.cif new file mode 100644 index 0000000..e2f7d4f --- /dev/null +++ b/tests/sample_data/CCDC_1446529_Pm-3m.cif @@ -0,0 +1,665 @@ +####################################################################### +# +# This file contains crystal structure data downloaded from the +# Cambridge Structural Database (CSD) hosted by the Cambridge +# Crystallographic Data Centre (CCDC). +# +# Full information about CCDC data access policies and citation +# guidelines are available at http://www.ccdc.cam.ac.uk/access/V1 +# +# Audit and citation data items may have been added by the CCDC. +# Please retain this information to preserve the provenance of +# this file and to allow appropriate attribution of the data. +# +####################################################################### + +data_MAPbBr3_RT +_audit_block_doi 10.5517/ccdc.csd.cc1kk77l +_database_code_depnum_ccdc_archive 'CCDC 1446529' +loop_ +_citation_id +_citation_doi +_citation_year +1 10.1021/acscentsci.6b00055 2016 +_audit_update_record +; +2016-01-10 deposited with the CCDC. 2024-04-02 downloaded from the CCDC. +; + +_audit_creation_method SHELXL-2014/7 +_shelx_SHELXL_version_number 2014/7 +_chemical_name_systematic 'methylammonium lead bromide perovskite' +_chemical_name_common ? +_chemical_melting_point ? +_chemical_formula_moiety ? +_chemical_formula_sum 'C H6 Br3 N Pb' +_chemical_formula_weight 478.99 + +loop_ +_atom_type_symbol +_atom_type_description +_atom_type_scat_dispersion_real +_atom_type_scat_dispersion_imag +_atom_type_scat_source +C C 0.0034 0.0017 'International Tables Vol C Tables 4.2.6.8 and 6.1.1.4' +H H 0.0000 0.0000 'International Tables Vol C Tables 4.2.6.8 and 6.1.1.4' +N N 0.0066 0.0034 'International Tables Vol C Tables 4.2.6.8 and 6.1.1.4' +Br Br -0.2713 2.4705 'International Tables Vol C Tables 4.2.6.8 and 6.1.1.4' +Pb Pb -3.4194 10.2008 'International Tables Vol C Tables 4.2.6.8 and 6.1.1.4' + +_space_group_crystal_system cubic +_space_group_IT_number 221 +_space_group_name_H-M_alt 'P m -3 m' +_space_group_name_Hall '-P 4 2 3' + +_shelx_space_group_comment +; +The symmetry employed for this shelxl refinement is uniquely defined +by the following loop, which should always be used as a source of +symmetry information in preference to the above space-group names. +They are only intended as comments. +; + +loop_ +_space_group_symop_operation_xyz +'x, y, z' +'-x, -y, z' +'-x, y, -z' +'x, -y, -z' +'z, x, y' +'z, -x, -y' +'-z, -x, y' +'-z, x, -y' +'y, z, x' +'-y, z, -x' +'y, -z, -x' +'-y, -z, x' +'y, x, -z' +'-y, -x, -z' +'y, -x, z' +'-y, x, z' +'x, z, -y' +'-x, z, y' +'-x, -z, -y' +'x, -z, y' +'z, y, -x' +'z, -y, x' +'-z, y, x' +'-z, -y, -x' +'-x, -y, -z' +'x, y, -z' +'x, -y, z' +'-x, y, z' +'-z, -x, -y' +'-z, x, y' +'z, x, -y' +'z, -x, y' +'-y, -z, -x' +'y, -z, x' +'-y, z, x' +'y, z, -x' +'-y, -x, z' +'y, x, z' +'-y, x, -z' +'y, -x, -z' +'-x, -z, y' +'x, -z, -y' +'x, z, y' +'-x, z, -y' +'-z, -y, x' +'-z, y, -x' +'z, -y, -x' +'z, y, x' + +_cell_length_a 5.9328(14) +_cell_length_b 5.9328(14) +_cell_length_c 5.9328(14) +_cell_angle_alpha 90 +_cell_angle_beta 90 +_cell_angle_gamma 90 +_cell_volume 208.82(15) +_cell_formula_units_Z 1 +_cell_measurement_temperature 296(2) +_cell_measurement_reflns_used 567 +_cell_measurement_theta_min 3.52 +_cell_measurement_theta_max 21.3 + +_exptl_crystal_description block +_exptl_crystal_colour orange +_exptl_crystal_density_meas ? +_exptl_crystal_density_method ? +_exptl_crystal_density_diffrn 3.809 +_exptl_crystal_F_000 206 +_exptl_transmission_factor_min ? +_exptl_transmission_factor_max ? +_exptl_crystal_size_max 0.030 +_exptl_crystal_size_mid 0.020 +_exptl_crystal_size_min 0.010 +_exptl_absorpt_coefficient_mu 34.335 +_shelx_estimated_absorpt_T_min 0.426 +_shelx_estimated_absorpt_T_max 0.725 +_exptl_absorpt_correction_type multi-scan +_exptl_absorpt_correction_T_min 0.3199 +_exptl_absorpt_correction_T_max 0.4404 +_exptl_absorpt_process_details 'SADABS-2014/5 (Sheldrick, 2015)' +_exptl_absorpt_special_details ? +_diffrn_ambient_temperature 296(2) +_diffrn_radiation_wavelength 0.7293 +_diffrn_radiation_type synchrotron +_diffrn_source +; +Advanced Light Source, station 11.3.1 +; +_diffrn_radiation_monochromator 'silicon 111' +_diffrn_measurement_device_type 'Bruker D8 with PHOTON 100 detector' +_diffrn_measurement_method '\f and \w shutterless scans' +_diffrn_detector_area_resol_mean 10.42 +_diffrn_reflns_number 4451 +_diffrn_reflns_av_unetI/netI 0.0076 +_diffrn_reflns_av_R_equivalents 0.0313 +_diffrn_reflns_limit_h_min -9 +_diffrn_reflns_limit_h_max 9 +_diffrn_reflns_limit_k_min -9 +_diffrn_reflns_limit_k_max 9 +_diffrn_reflns_limit_l_min -9 +_diffrn_reflns_limit_l_max 9 +_diffrn_reflns_theta_min 3.524 +_diffrn_reflns_theta_max 34.053 +_diffrn_reflns_theta_full 25.950 +_diffrn_measured_fraction_theta_max 1.000 +_diffrn_measured_fraction_theta_full 1.000 +_diffrn_reflns_Laue_measured_fraction_max 1.000 +_diffrn_reflns_Laue_measured_fraction_full 1.000 +_diffrn_reflns_point_group_measured_fraction_max 1.000 +_diffrn_reflns_point_group_measured_fraction_full 1.000 +_reflns_number_total 113 +_reflns_number_gt 113 +_reflns_threshold_expression 'I > 2\s(I)' +_reflns_Friedel_coverage 0.000 +_reflns_Friedel_fraction_max . +_reflns_Friedel_fraction_full . + +_reflns_special_details +; + Reflections were merged by SHELXL according to the crystal + class for the calculation of statistics and refinement. + + _reflns_Friedel_fraction is defined as the number of unique + Friedel pairs measured divided by the number that would be + possible theoretically, ignoring centric projections and + systematic absences. +; + +_computing_data_collection 'Apex2 v2014.11-0 (Bruker, 2014)' +_computing_cell_refinement 'SAINT V8.34A(Bruker, 2013)' +_computing_data_reduction SAINT +_computing_structure_solution 'SHELXT (Sheldrick, 2012)' +_computing_structure_refinement 'SHELXL-2014/7 (Sheldrick, 2014)' +_computing_molecular_graphics +; + SHELXTL 5.1, XP (Sheldrick, 1994) + ShelXle Rev 699 (Hubschle, 2011) + WinCoot, (P.Emsley, B.Lohkamp W.G.Scott and K.Cowtand, 2010) +; +_computing_publication_material SHELXL-2014/7 +_refine_special_details +; +Hydrogen atoms were not found in the difference map, so were not refined +in the structure. + +The methylammonium positions were found in the difference map. +The carbon and the nitrogens share the same position in all three sites. +They were refined with EADP & EXYZ. A DFIX was initially used, but then as +the refinement progressed, a SADI was employed over all three. +; +_refine_ls_structure_factor_coef Fsqd +_refine_ls_matrix_type full +_refine_ls_weighting_scheme calc +_refine_ls_weighting_details +'w=1/[\s^2^(Fo^2^)+(0.0263P)^2^+0.0660P] where P=(Fo^2^+2Fc^2^)/3' +_atom_sites_solution_primary 'intrinsic phasing' +_atom_sites_solution_secondary difmap +_atom_sites_solution_hydrogens . +_refine_ls_hydrogen_treatment undef +_refine_ls_extinction_method none +_refine_ls_extinction_coef . +_refine_ls_number_reflns 113 +_refine_ls_number_parameters 10 +_refine_ls_number_restraints 3 +_refine_ls_R_factor_all 0.0153 +_refine_ls_R_factor_gt 0.0153 +_refine_ls_wR_factor_ref 0.0408 +_refine_ls_wR_factor_gt 0.0408 +_refine_ls_goodness_of_fit_ref 1.274 +_refine_ls_restrained_S_all 1.256 +_refine_ls_shift/su_max 0.000 +_refine_ls_shift/su_mean 0.000 + +loop_ +_atom_site_label +_atom_site_type_symbol +_atom_site_fract_x +_atom_site_fract_y +_atom_site_fract_z +_atom_site_U_iso_or_equiv +_atom_site_adp_type +_atom_site_occupancy +_atom_site_site_symmetry_order +_atom_site_calc_flag +_atom_site_refinement_flags_posn +_atom_site_refinement_flags_adp +_atom_site_refinement_flags_occupancy +_atom_site_disorder_assembly +_atom_site_disorder_group +Pb01 Pb 0.5000 0.5000 0.5000 0.02551(17) Uani 1 48 d S T P . . +Br02 Br 0.0000 0.5000 0.5000 0.0983(7) Uani 1 16 d S T P . . +C1 C 0.0000 -0.120(5) 0.0000 0.030(12) Uiso 0.0417 8 d DS . P . 1 +N1 N 0.0000 -0.120(5) 0.0000 0.030(12) Uiso 0.0417 8 d S . P . 1 +C2 C 0.085(3) 0.0000 -0.085(3) 0.05(2) Uiso 0.0416 4 d DS . P . 2 +N2 N 0.085(3) 0.0000 -0.085(3) 0.05(2) Uiso 0.0416 4 d S . P . 2 +C3 C 0.069(3) 0.069(3) 0.069(3) 0.04(2) Uiso 0.0313 6 d DS . P A 3 +N3 N 0.069(3) 0.069(3) 0.069(3) 0.04(2) Uiso 0.0313 6 d S . P B 3 + +loop_ +_atom_site_aniso_label +_atom_site_aniso_U_11 +_atom_site_aniso_U_22 +_atom_site_aniso_U_33 +_atom_site_aniso_U_23 +_atom_site_aniso_U_13 +_atom_site_aniso_U_12 +Pb01 0.02551(17) 0.02551(17) 0.02551(17) 0.000 0.000 0.000 +Br02 0.0222(5) 0.1364(11) 0.1364(11) 0.000 0.000 0.000 + +_geom_special_details +; + All esds (except the esd in the dihedral angle between two l.s. planes) + are estimated using the full covariance matrix. The cell esds are taken + into account individually in the estimation of esds in distances, angles + and torsion angles; correlations between esds in cell parameters are only + used when they are defined by crystal symmetry. An approximate (isotropic) + treatment of cell esds is used for estimating esds involving l.s. planes. +; + +loop_ +_geom_bond_atom_site_label_1 +_geom_bond_atom_site_label_2 +_geom_bond_distance +_geom_bond_site_symmetry_2 +_geom_bond_publ_flag +Pb01 Br02 2.9664(7) 9_556 ? +Pb01 Br02 2.9664(7) . ? +Pb01 Br02 2.9664(7) 1_655 ? +Pb01 Br02 2.9664(7) 5 ? +Pb01 Br02 2.9664(7) 5_565 ? +Pb01 Br02 2.9664(7) 9 ? +Br02 Pb01 2.9664(7) 1_455 ? +C1 N1 1.01(4) 33 ? +C1 N1 1.01(4) 29 ? +C1 C1 1.01(4) 33 ? +C1 C1 1.01(4) 29 ? +C1 N1 1.01(4) 9 ? +C1 C1 1.01(4) 9 ? +C1 C1 1.01(4) 5 ? +C1 N1 1.01(4) 5 ? +C1 N1 1.43(6) 25 ? +C1 C1 1.43(6) 25 ? +C2 N2 0.71(3) 10 ? +C2 N2 0.71(3) 30 ? +C2 C2 0.71(3) 10 ? +C2 C2 0.71(3) 30 ? +C2 N2 0.71(3) 33 ? +C2 N2 0.71(3) 29 ? +C2 C2 0.71(3) 29 ? +C2 C2 0.71(3) 33 ? +C2 N2 1.01(4) 26 ? +C2 N2 1.01(4) 2 ? +C2 C2 1.01(4) 26 ? +C2 C2 1.01(4) 2 ? +C3 N3 0.82(4) 28 ? +C3 N3 0.82(4) 27 ? +C3 N3 0.82(4) 26 ? +C3 C3 0.82(4) 28 ? +C3 C3 0.82(4) 26 ? +C3 C3 0.82(4) 27 ? +C3 N3 1.16(5) 4 ? +C3 C3 1.16(5) 4 ? +C3 C3 1.16(5) 3 ? +C3 N3 1.16(5) 2 ? +C3 C3 1.16(5) 2 ? +C3 N3 1.16(5) 3 ? + +loop_ +_geom_angle_atom_site_label_1 +_geom_angle_atom_site_label_2 +_geom_angle_atom_site_label_3 +_geom_angle +_geom_angle_site_symmetry_1 +_geom_angle_site_symmetry_3 +_geom_angle_publ_flag +Br02 Pb01 Br02 90.0 9_556 . ? +Br02 Pb01 Br02 90.0 9_556 1_655 ? +Br02 Pb01 Br02 180.0 . 1_655 ? +Br02 Pb01 Br02 90.0 9_556 5 ? +Br02 Pb01 Br02 90.0 . 5 ? +Br02 Pb01 Br02 90.0 1_655 5 ? +Br02 Pb01 Br02 90.0 9_556 5_565 ? +Br02 Pb01 Br02 90.0 . 5_565 ? +Br02 Pb01 Br02 90.0 1_655 5_565 ? +Br02 Pb01 Br02 180.0 5 5_565 ? +Br02 Pb01 Br02 180.0 9_556 9 ? +Br02 Pb01 Br02 90.0 . 9 ? +Br02 Pb01 Br02 90.0 1_655 9 ? +Br02 Pb01 Br02 90.0 5 9 ? +Br02 Pb01 Br02 90.0 5_565 9 ? +Pb01 Br02 Pb01 180.0 1_455 . ? +N1 C1 N1 60.001(1) 33 29 ? +N1 C1 C1 0.0 33 33 ? +N1 C1 C1 60.0 29 33 ? +N1 C1 C1 60.0 33 29 ? +N1 C1 C1 0.0 29 29 ? +C1 C1 C1 60.001(1) 33 29 ? +N1 C1 N1 90.001(1) 33 9 ? +N1 C1 N1 60.001(1) 29 9 ? +C1 C1 N1 90.001(1) 33 9 ? +C1 C1 N1 60.001(1) 29 9 ? +N1 C1 C1 90.0 33 9 ? +N1 C1 C1 60.0 29 9 ? +C1 C1 C1 90.001(1) 33 9 ? +C1 C1 C1 60.001(1) 29 9 ? +N1 C1 C1 0.0 9 9 ? +N1 C1 C1 60.0 33 5 ? +N1 C1 C1 90.0 29 5 ? +C1 C1 C1 60.0 33 5 ? +C1 C1 C1 90.001(1) 29 5 ? +N1 C1 C1 60.0 9 5 ? +C1 C1 C1 60.001(3) 9 5 ? +N1 C1 N1 60.0 33 5 ? +N1 C1 N1 90.001(1) 29 5 ? +C1 C1 N1 60.0 33 5 ? +C1 C1 N1 90.001(1) 29 5 ? +N1 C1 N1 60.001(3) 9 5 ? +C1 C1 N1 60.001(3) 9 5 ? +C1 C1 N1 0(2) 5 5 ? +N1 C1 N1 45.001(1) 33 25 ? +N1 C1 N1 45.001(1) 29 25 ? +C1 C1 N1 45.001(1) 33 25 ? +C1 C1 N1 45.001(1) 29 25 ? +N1 C1 N1 45.001(1) 9 25 ? +C1 C1 N1 45.001(1) 9 25 ? +C1 C1 N1 45.001(1) 5 25 ? +N1 C1 N1 45.001(1) 5 25 ? +N1 C1 C1 45.0 33 25 ? +N1 C1 C1 45.0 29 25 ? +C1 C1 C1 45.001(1) 33 25 ? +C1 C1 C1 45.001(1) 29 25 ? +N1 C1 C1 45.0 9 25 ? +C1 C1 C1 45.001(1) 9 25 ? +C1 C1 C1 45.001(1) 5 25 ? +N1 C1 C1 45.0 5 25 ? +N1 C1 C1 0.0 25 25 ? +N2 C2 N2 120.003(5) 10 30 ? +N2 C2 C2 0.0 10 10 ? +N2 C2 C2 120.0 30 10 ? +N2 C2 C2 120.0 10 30 ? +N2 C2 C2 0.0 30 30 ? +C2 C2 C2 120.003(5) 10 30 ? +N2 C2 N2 90.002(3) 10 33 ? +N2 C2 N2 60.001(4) 30 33 ? +C2 C2 N2 90.002(3) 10 33 ? +C2 C2 N2 60.001(4) 30 33 ? +N2 C2 N2 60.001(4) 10 29 ? +N2 C2 N2 90.002(4) 30 29 ? +C2 C2 N2 60.001(4) 10 29 ? +C2 C2 N2 90.002(4) 30 29 ? +N2 C2 N2 120.003(3) 33 29 ? +N2 C2 C2 60.0 10 29 ? +N2 C2 C2 90.0 30 29 ? +C2 C2 C2 60.001(4) 10 29 ? +C2 C2 C2 90.002(4) 30 29 ? +N2 C2 C2 120.0 33 29 ? +N2 C2 C2 0.0 29 29 ? +N2 C2 C2 90.0 10 33 ? +N2 C2 C2 60.0 30 33 ? +C2 C2 C2 90.002(3) 10 33 ? +C2 C2 C2 60.001(4) 30 33 ? +N2 C2 C2 0.0 33 33 ? +N2 C2 C2 120.0 29 33 ? +C2 C2 C2 120.003(3) 29 33 ? +N2 C2 N2 90.002(1) 10 26 ? +N2 C2 N2 45.001(2) 30 26 ? +C2 C2 N2 90.002(1) 10 26 ? +C2 C2 N2 45.001(2) 30 26 ? +N2 C2 N2 90.002(2) 33 26 ? +N2 C2 N2 45.001(2) 29 26 ? +C2 C2 N2 45.001(2) 29 26 ? +C2 C2 N2 90.002(2) 33 26 ? +N2 C2 N2 45.001(1) 10 2 ? +N2 C2 N2 90.0 30 2 ? +C2 C2 N2 45.001(1) 10 2 ? +C2 C2 N2 90.0 30 2 ? +N2 C2 N2 45.001(2) 33 2 ? +N2 C2 N2 90.002(1) 29 2 ? +C2 C2 N2 90.002(1) 29 2 ? +C2 C2 N2 45.001(2) 33 2 ? +N2 C2 N2 90.002(2) 26 2 ? +N2 C2 C2 90.0 10 26 ? +N2 C2 C2 45.0 30 26 ? +C2 C2 C2 90.002(1) 10 26 ? +C2 C2 C2 45.001(2) 30 26 ? +N2 C2 C2 90.0 33 26 ? +N2 C2 C2 45.0 29 26 ? +C2 C2 C2 45.001(2) 29 26 ? +C2 C2 C2 90.002(2) 33 26 ? +N2 C2 C2 0.0 26 26 ? +N2 C2 C2 90.0 2 26 ? +N2 C2 C2 45.0 10 2 ? +N2 C2 C2 90.0 30 2 ? +C2 C2 C2 45.001(1) 10 2 ? +C2 C2 C2 90.0 30 2 ? +N2 C2 C2 45.0 33 2 ? +N2 C2 C2 90.0 29 2 ? +C2 C2 C2 90.002(1) 29 2 ? +C2 C2 C2 45.001(2) 33 2 ? +N2 C2 C2 90.0 26 2 ? +N2 C2 C2 0.0 2 2 ? +C2 C2 C2 90.002(2) 26 2 ? +N3 C3 N3 90.001(1) 28 27 ? +N3 C3 N3 90.001(2) 28 26 ? +N3 C3 N3 90.001(2) 27 26 ? +N3 C3 C3 0.0 28 28 ? +N3 C3 C3 90.0 27 28 ? +N3 C3 C3 90.0 26 28 ? +N3 C3 C3 90.0 28 26 ? +N3 C3 C3 90.0 27 26 ? +N3 C3 C3 0.0 26 26 ? +C3 C3 C3 90.001(2) 28 26 ? +N3 C3 C3 90.0 28 27 ? +N3 C3 C3 0.0 27 27 ? +N3 C3 C3 90.0 26 27 ? +C3 C3 C3 90.001(1) 28 27 ? +C3 C3 C3 90.001(2) 26 27 ? +N3 C3 N3 90.001(3) 28 4 ? +N3 C3 N3 45.0 27 4 ? +N3 C3 N3 45.000(1) 26 4 ? +C3 C3 N3 90.001(3) 28 4 ? +C3 C3 N3 45.000(1) 26 4 ? +C3 C3 N3 45.0 27 4 ? +N3 C3 C3 90.0 28 4 ? +N3 C3 C3 45.0 27 4 ? +N3 C3 C3 45.0 26 4 ? +C3 C3 C3 90.001(3) 28 4 ? +C3 C3 C3 45.000(1) 26 4 ? +C3 C3 C3 45.0 27 4 ? +N3 C3 C3 0.0 4 4 ? +N3 C3 C3 45.0 28 3 ? +N3 C3 C3 90.0 27 3 ? +N3 C3 C3 45.0 26 3 ? +C3 C3 C3 45.0 28 3 ? +C3 C3 C3 45.0 26 3 ? +C3 C3 C3 90.001(3) 27 3 ? +N3 C3 C3 60.0 4 3 ? +C3 C3 C3 60.000(2) 4 3 ? +N3 C3 N3 45.0 28 2 ? +N3 C3 N3 45.0 27 2 ? +N3 C3 N3 90.001(4) 26 2 ? +C3 C3 N3 45.0 28 2 ? +C3 C3 N3 90.001(4) 26 2 ? +C3 C3 N3 45.0 27 2 ? +N3 C3 N3 60.000(1) 4 2 ? +C3 C3 N3 60.000(1) 4 2 ? +C3 C3 N3 60.000(1) 3 2 ? +N3 C3 C3 45.0 28 2 ? +N3 C3 C3 45.0 27 2 ? +N3 C3 C3 90.0 26 2 ? +C3 C3 C3 45.0 28 2 ? +C3 C3 C3 90.001(4) 26 2 ? +C3 C3 C3 45.0 27 2 ? +N3 C3 C3 60.0 4 2 ? +C3 C3 C3 60.000(1) 4 2 ? +C3 C3 C3 60.000(1) 3 2 ? +N3 C3 C3 0.0 2 2 ? +N3 C3 N3 45.0 28 3 ? +N3 C3 N3 90.001(3) 27 3 ? +N3 C3 N3 45.0 26 3 ? +C3 C3 N3 45.0 28 3 ? +C3 C3 N3 45.0 26 3 ? +C3 C3 N3 90.001(3) 27 3 ? +N3 C3 N3 60.000(2) 4 3 ? +C3 C3 N3 60.000(2) 4 3 ? +C3 C3 N3 0.0(7) 3 3 ? +N3 C3 N3 60.000(1) 2 3 ? +C3 C3 N3 60.000(1) 2 3 ? + +_refine_diff_density_max 0.464 +_refine_diff_density_min -0.593 +_refine_diff_density_rms 0.151 + +_shelx_res_file +; + + sad_a.res created by SHELXL-2014/7 + + +TITL sad_a.res in Pm-3m +REM Old TITL sad in Pm-3m + +REM SHELXT solution in Pm-3m +REM R1 0.453, Rweak 0.494, Alpha 0.034, Orientation as input +REM Formula found by SHELXT: N Br3 Pb + +CELL 0.72930 5.9328 5.9328 5.9328 90.000 90.000 90.000 +ZERR 1.000 0.0014 0.0014 0.0014 0.000 0.000 0.000 +LATT 1 +SYMM -X, -Y, Z +SYMM -X, Y, -Z +SYMM X, -Y, -Z +SYMM Z, X, Y +SYMM Z, -X, -Y +SYMM -Z, -X, Y +SYMM -Z, X, -Y +SYMM Y, Z, X +SYMM -Y, Z, -X +SYMM Y, -Z, -X +SYMM -Y, -Z, X +SYMM Y, X, -Z +SYMM -Y, -X, -Z +SYMM Y, -X, Z +SYMM -Y, X, Z +SYMM X, Z, -Y +SYMM -X, Z, Y +SYMM -X, -Z, -Y +SYMM X, -Z, Y +SYMM Z, Y, -X +SYMM Z, -Y, X +SYMM -Z, Y, X +SYMM -Z, -Y, -X +SFAC C H N BR PB +DISP $C 0.00341 0.00169 11.32113!source kissel +DISP $H -0.00002 0.00000 0.66638!source kissel +DISP $N 0.00664 0.00342 19.38436!source kissel +DISP $Br -0.27128 2.47050 10018.37566!source kissel +DISP $Pb -3.41943 10.20082 41609.52882!source kissel +UNIT 1 6 1 3 1 +SIZE 0.03 0.02 0.01 !orange block +REM DFIX 1.5 0.1 C1 N1 +TEMP -273.150 +L.S. 300 +BOND $H +LIST 4 +ACTA +FMAP 2 +PLAN 20 +EXYZ C1 N1 +EADP C1 N1 +EXYZ C2 N2 +EADP C2 N2 +EXYZ C3 N3 +EADP C3 N3 +EQIV $1 -x, -y, -z +EQIV $2 +z, +y, +x +EQIV $3 -z, -x, -y +RTAB MA C1 N1_$1 +RTAB MA C2 N2_$2 +RTAB MA C3 N3_$3 +SADI 0.05 C1 N1_$1 C2 N2_$2 C3 N3_$3 +WGHT 0.026300 0.066000 +FVAR 0.71502 +PB01 5 0.500000 0.500000 0.500000 10.02083 0.02551 0.02551 = + 0.02551 0.00000 0.00000 0.00000 + +BR02 4 0.000000 0.500000 0.500000 10.06250 0.02221 0.13642 = + 0.13642 0.00000 0.00000 0.00000 +part 1 +C1 1 0.000000 -0.120298 0.000000 10.00521 0.02991 +N1 3 0.000000 -0.120298 0.000000 10.00521 0.02991 +part 2 +C2 1 0.084729 0.000000 -0.084729 10.01041 0.05305 +N2 3 0.084729 0.000000 -0.084729 10.01041 0.05305 +Part 3 +C3 1 0.069251 0.069251 0.069251 10.00521 0.04098 +N3 3 0.069251 0.069251 0.069251 10.00521 0.04098 +Part 0 +HKLF 4 + +REM sad_a.res in Pm-3m +REM R1 = 0.0153 for 113 Fo > 4sig(Fo) and 0.0153 for all 113 data +REM 10 parameters refined using 3 restraints + +END + +WGHT 0.0250 0.0722 + +REM Highest difference peak 0.464, deepest hole -0.593, 1-sigma level 0.151 +Q1 1 -0.0664 -0.1960 0.0000 10.50000 0.05 0.46 +Q2 1 0.0000 0.3944 0.5000 10.25000 0.05 0.36 +Q3 1 0.1463 0.1463 0.1463 10.16667 0.05 0.31 +Q4 1 0.2070 0.0000 -0.2585 10.50000 0.05 0.24 +Q5 1 0.2278 0.5000 0.2278 10.25000 0.05 0.19 +Q6 1 0.2971 0.2971 0.2017 10.50000 0.05 0.17 +Q7 1 0.0000 0.2164 0.5000 10.25000 0.05 0.17 +Q8 1 0.2784 -0.0760 -0.2784 10.50000 0.05 0.16 +Q9 1 0.2345 0.4695 0.4313 11.00000 0.05 0.15 +Q10 1 0.5000 0.4036 0.5000 10.12500 0.05 0.13 +Q11 1 0.3993 0.3993 0.2627 10.50000 0.05 0.12 +Q12 1 0.1690 0.3736 0.3736 10.50000 0.05 0.12 +Q13 1 0.1572 0.2610 0.5000 10.50000 0.05 0.12 +Q14 1 0.3609 0.1562 0.2147 11.00000 0.05 0.12 +Q15 1 0.1309 0.5000 0.1309 10.25000 0.05 0.11 +Q16 1 0.2790 0.2790 0.5000 10.25000 0.05 0.09 +Q17 1 0.0000 0.2522 0.3936 10.50000 0.05 0.08 +Q18 1 0.2038 0.2038 0.4262 10.50000 0.05 0.07 +Q19 1 0.2347 0.3648 0.5000 10.50000 0.05 0.07 +Q20 1 0.1002 0.1945 0.5000 10.50000 0.05 0.06 +; +_shelx_res_checksum 94550 + +# start Validation Reply Form + +_vrf_PLAT973_I +; +PROBLEM: Check Calcd Positive Residual Density on Pb01 2.62 eA-3 +RESPONSE: Disorder in this site should have been seen in related disorders, +and as it is less than 3% of a Pb, it was left alone. + +; diff --git a/tests/sample_data/COD_1540955_aP16.cif b/tests/sample_data/COD_1540955_aP16.cif new file mode 100644 index 0000000..7ae58a0 --- /dev/null +++ b/tests/sample_data/COD_1540955_aP16.cif @@ -0,0 +1,82 @@ +# Data taken from COD (Crystallography Open Database) +# All credit goes to the following: +# Grazulis, S., Chateigner, D., Downs, R. T., Yokochi, A. T., Quiros, M., Lutterotti, +# L., Manakova, E., Butkus, J., Moeck, P. & Le Bail, A. (2009). Crystallography Open +# Database – an open-access collection of crystal structures. Journal of Applied +# Crystallography, 42, 726-729. + + +#------------------------------------------------------------------------------ +#$Date: 2016-02-13 21:28:24 +0200 (Sat, 13 Feb 2016) $ +#$Revision: 176429 $ +#$URL: file:///home/coder/svn-repositories/cod/cif/1/54/09/1540955.cif $ +#------------------------------------------------------------------------------ +# +# This file is available in the Crystallography Open Database (COD), +# http://www.crystallography.net/ +# +# All data on this site have been placed in the public domain by the +# contributors. +# +data_1540955 +loop_ +_publ_author_name +'Sheldrick, W.S.' +'Haeusler, H.J.' +_publ_section_title +; + Zur Kenntnis von Alkalimetaselenoarseniten. Darstellung und + Kristallstrukturen von M As Se2, M = K, Rb, Cs +; +_journal_name_full +'Zeitschrift fuer Anorganische und Allgemeine Chemie' +_journal_page_first 139 +_journal_page_last 148 +_journal_volume 561 +_journal_year 1988 +_chemical_formula_sum 'As K Se2' +_chemical_name_systematic 'K (As Se2)' +_space_group_IT_number 1 +_symmetry_space_group_name_Hall 'P 1' +_symmetry_space_group_name_H-M 'P 1' +_cell_angle_alpha 100.43 +_cell_angle_beta 107.53 +_cell_angle_gamma 100.48 +_cell_formula_units_Z 4 +_cell_length_a 6.558 +_cell_length_b 12.628 +_cell_length_c 6.554 +_cell_volume 492.567 +_citation_journal_id_ASTM ZAACAB +_cod_data_source_file Sheldrick_ZAACAB_1988_108.cif +_cod_data_source_block As1K1Se2 +_cod_original_cell_volume 492.5672 +_cod_original_formula_sum 'As1 K1 Se2' +_cod_database_code 1540955 +loop_ +_symmetry_equiv_pos_as_xyz +x,y,z +loop_ +_atom_site_label +_atom_site_type_symbol +_atom_site_fract_x +_atom_site_fract_y +_atom_site_fract_z +_atom_site_occupancy +_atom_site_U_iso_or_equiv +K4 K+1 -0.1465 0.9978 0.4529 1 0.0 +Se1 Se-2 0.686 0.2073 1.2284 1 0.0 +Se4 Se-2 0.2623 0.2448 0.738 1 0.0 +Se5 Se-2 0.6683 0.708 1.2484 1 0.0 +K1 K+1 0.2123 0.2599 0.2261 1 0.0 +As3 As+3 0.2884 0.6767 1.155 1 0.0 +As4 As+3 0.3172 0.9426 1.017 1 0.0 +Se7 Se-2 0 1 1 1 0.0 +Se8 Se-2 0.1758 0.7448 0.8228 1 0.0 +Se3 Se-2 0.4396 0.4998 0.5614 1 0.0 +Se2 Se-2 0.3861 -0.0187 0.691 1 0.0 +K2 K+1 -0.1098 0.4974 0.4143 1 0.0 +As1 As+3 0.595 0.177 0.8503 1 0.0 +Se6 Se-2 0.1298 0.4818 0.9492 1 0.0 +As2 As+3 0.4556 0.4424 0.8774 1 0.0 +K3 K+1 0.6634 0.7583 0.7714 1 0.0 diff --git a/tests/sample_data/INTENTIONALLY_BAD_CIF.cif b/tests/sample_data/INTENTIONALLY_BAD_CIF.cif new file mode 100644 index 0000000..7cd2675 --- /dev/null +++ b/tests/sample_data/INTENTIONALLY_BAD_CIF.cif @@ -0,0 +1,48 @@ +data_# CIF file + +_cell_length_a 1.000000(x) +_cell_length_b 4.32343242 +_cell_length_c 3.1415926535897932384626433832795028841971693993751058209749 +_cell_angle_alpha 90.00000 +_cell_angle_beta -10.12345 +_cell_angle_gamma 210.00000 + +# NOTE: Adding comments on loop_ keyword lines breaks the table reader +loop_ +loop_ +_space_group_symop_id # this is a comment +_space_group_symop_operation_xyz +_atom_site_fracccccccc_z # Intentionally bad key +# COMMENT2 + +1 x, y,z . +2 -x,y, -z*1/2 ? +3 -x,-y, -z (x) # What About Here +4 x,=y, z/1/2 zzzzzzzzzz + +5 x-1/2,y+1/2,z asdf +6 -x+1/2, ya1/2, -z+1/2 :) +# testing +7 -x+1/2, -y81/2, -z ahh + +8 x+1/2, -y+1/2, z01/2 goblue + + + + + + + +"#" must be after whitespace or eol +loop_loop_ +_atom_site +_atom_site_type_symbol +_atom_site_symmetry_multiplicity +_atom_si +_atom_si te +_atom_site_fract_z +_atom_site_occupancy +Aa(3) Bb 1 c 0.00000(1) 0.25000 0.00000 1.00000 +SL SM 3 d 0.00000 0.(28510) 0.25000 . +Oo O 5 e 0.19180 0.05170 0.67460 1.00000 +O0f O 7 f 0.09390 0.41220 -0.03510 1.00000 diff --git a/tests/sample_data/README.md b/tests/sample_data/README.md new file mode 100644 index 0000000..279f86e --- /dev/null +++ b/tests/sample_data/README.md @@ -0,0 +1,19 @@ +# Notes on the data + +All files for this test suite have been drawn from databases across the web. Citations are included in the files themselves. + +## AFLOW: + +[mC24](https://aflow.org/prototype-encyclopedia/A_mC24_15_2e2f.html) + +## Bilbao Incommensurate Structures Database (BISD): + +[incommensurate K2MoO4 @ 633K](https://www.cryst.ehu.eus/bincstrdb/view/?incid=3152Elpt1I) + +## Cambridge Crystallographic Data Centre (CCDC): + +[catena-[methylammonium tris(μ-bromo)-lead]](https://www.ccdc.cam.ac.uk/structures/Search?Compound=perovskite&DatabaseToSearch=Published) + +## Crystallographic Open Database (COD): + +[aP16](http://www.crystallography.net/cod/1540955.html) diff --git a/tests/test_patterns.py b/tests/test_patterns.py new file mode 100644 index 0000000..5d354ac --- /dev/null +++ b/tests/test_patterns.py @@ -0,0 +1,29 @@ +import pytest + +from parsnip.patterns import LineCleaner + + +@pytest.mark.parametrize("string", ["x,y, z", "'x, y, z'", "'x', 'y', 'z'"]) +def test_linecleaner_xyz(string): + patterns = ([r",\s+", ","], ("'", "")) + cleaner = LineCleaner(patterns) + + assert cleaner(string) == "x,y,z" + + +@pytest.mark.parametrize( + "string", + [ + "As4 As+3 0.3172 0.9426 1.017 1 0.0", + "O(1) 1 0.179(1) 0 0.791(4) ? Uiso", + "Br02 Br 0.0000 0.5000 0.5000 0.0983(7) Uani 1 16 d S T P . .", + "C1 C 0.0000 -0.120(5) 0.0000 0.030(12) Uiso 0.0417 8 d DS . P . 1", + ], +) +def test_linecleaner_floatstrip(string): + patterns = [r"\(\d*\)", ""] + cleaner = LineCleaner(patterns) + + assert cleaner(string).split() == [ + substr.split("(")[0] for substr in string.split() + ] diff --git a/tests/test_table_reader.py b/tests/test_table_reader.py new file mode 100644 index 0000000..0a9da70 --- /dev/null +++ b/tests/test_table_reader.py @@ -0,0 +1,111 @@ +import numpy as np +import pytest +from conftest import bad_cif, cif_files_mark +from gemmi import cif + +from parsnip._utils import ParseWarning +from parsnip.parse import read_fractional_positions, read_table + + +def _gemmi_read_table(filename, keys): + return np.array(cif.read_file(filename).sole_block().find(keys)) + + +@cif_files_mark +def test_read_symop(cif_data): + parsnip_data = read_table(filename=cif_data.filename, keys=cif_data.symop_keys) + gemmi_data = _gemmi_read_table(cif_data.filename, cif_data.symop_keys) + + # We replace ", " strings with "," to ensure data is collected properly + # We have to apply this same transformation to the gemmi data to check correctness. + if "CCDC_1446529_Pm-3m.cif" in cif_data.filename: + gemmi_data = np.array( + [[item.replace(", ", ",") for item in row] for row in gemmi_data] + ) + + np.testing.assert_array_equal(parsnip_data, gemmi_data) + + +@cif_files_mark +def test_read_atom_sites(cif_data): + parsnip_data = read_table( + filename=cif_data.filename, + keys=cif_data.atom_site_keys, + ) + gemmi_data = _gemmi_read_table(cif_data.filename, cif_data.atom_site_keys) + + np.testing.assert_array_equal(parsnip_data, gemmi_data) + + +@cif_files_mark +@pytest.mark.parametrize( + "subset", [[0], [1, 2, 3], [4, 0]], ids=["single_el", "slice", "end_and_beginning"] +) +def test_partial_table_read(cif_data, subset): + subset_of_keys = tuple(np.array(cif_data.atom_site_keys)[subset]) + parsnip_data = read_table( + filename=cif_data.filename, + keys=subset_of_keys, + ) + gemmi_data = _gemmi_read_table(cif_data.filename, subset_of_keys) + + np.testing.assert_array_equal(parsnip_data, gemmi_data) + + +def test_bad_cif_symop(cif_data=bad_cif): + # This file is thouroughly cooked - gemmi will not even read it. + with pytest.warns(ParseWarning, match=r"expected line with 2 values, got"): + parsnip_data = read_table( + filename=cif_data.filename, + keys=cif_data.symop_keys, + ) + correct_data = [ + ["1", "x,y,z"], + ["2", "-x,y,-z*1/2"], + ["3", "-x,-y,-z"], + ["4", "x,=y,z/1/2"], + ["5", "x-1/2,y+1/2,z"], + ["6", "-x+1/2,ya1/2,-z+1/2"], + ["7", "-x+1/2,-y81/2,-z"], + ["8", "x+1/2,-y+1/2,z01/2"], + ] + + np.testing.assert_array_equal(parsnip_data, correct_data) + + +def test_bad_cif_atom_sites(cif_data=bad_cif): + expected_warning = "Keys {'_this_key_does_not_exist'} were not found in the table." + with pytest.warns(ParseWarning, match=expected_warning): + parsnip_data = read_table( + filename=cif_data.filename, + keys=cif_data.atom_site_keys, + ) + # "_atom_site" + np.testing.assert_array_equal( + parsnip_data[:, 0], + np.array(["Aa(3)", "SL", "Oo", "O0f"]), + ) + # "_atom_site_type_symbol" + np.testing.assert_array_equal(parsnip_data[:, 1], ["Bb", "SM", "O", "O"]) + + # "_atom_site_symmetry_multiplicity" + np.testing.assert_array_equal(parsnip_data[:, 2], ["1", "3", "5", "7"]) + + # "_atom_si te" + np.testing.assert_array_equal( + parsnip_data[:, 3], ["0.00000(1)", "0.00000", "0.19180", "0.09390"] + ) + + # "_atom_site_fract_z" + np.testing.assert_array_equal( + parsnip_data[:, 4], ["0.25000", "0.(28510)", "0.05170", "0.41220"] + ) + + +@cif_files_mark +def test_read_fractional_positions(cif_data): + keys = ("_atom_site_fract_x", "_atom_site_fract_y", "_atom_site_fract_z") + parsnip_data = read_fractional_positions(filename=cif_data.filename) + gemmi_data = _gemmi_read_table(cif_data.filename, keys) + gemmi_data = [[cif.as_number(val) for val in row] for row in gemmi_data] + np.testing.assert_allclose(parsnip_data, gemmi_data) diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..f33ce3f --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,17 @@ +import pytest + +from parsnip._utils import ParseError, ParseWarning + + +def test_parse_error(): + with pytest.raises(ParseError) as error: + raise ParseError("TEST_ERROR_RAISED") + + assert "TEST_ERROR_RAISED" in str(error.value) + + +def test_parse_warning(): + with pytest.raises(ParseWarning) as warning: + raise ParseWarning("TEST_WARNING_RAISED") + + assert "TEST_WARNING_RAISED" in str(warning.value)