diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 54561d6..01b8bf2 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -6,7 +6,9 @@ repos:
     rev: 'v4.4.0'
     hooks:
       - id: end-of-file-fixer
+        exclude: tests/sample_data
       - id: trailing-whitespace
+        exclude: tests/sample_data
       - id: check-builtin-literals
       - id: check-executables-have-shebangs
       - id: check-json
diff --git a/README.rst b/README.rst
index ad1424d..18db44a 100644
--- a/README.rst
+++ b/README.rst
@@ -1,13 +1,15 @@
-.. _header:
+.. _images:
 
-.. image:: _static/parsnip_header_dark.svg
+.. image:: doc/source/_static/parsnip_header_dark.svg
   :width: 600
   :class: only-light
 
-.. image:: _static/parsnip_header_light.svg
+.. image:: doc/source/_static/parsnip_header_light.svg
   :width: 600
   :class: only-dark
 
+.. _header:
+
 ..
   TODO: set up Readthedocs, PyPI, and conda-forge
 
@@ -27,12 +29,10 @@
 
 **parsnip** is a minimal Python library for parsing `CIF <https://www.iucr.org/resources/cif>`_ files. While its primary focus is on simplicity and portability, performance-oriented design choices are made where possible.
 
-The ``parsnip.parse`` module handles standard CIF files (including those under the `CIF 1.1 <https://www.iucr.org/resources/cif/spec/version1.1>`_ and `CIF 2.0 <https://www.iucr.org/resources/cif/cif2>`_ standards). It includes a table reader for `loop\_`-delimited tables as well as a key-value pair reader. Provide a filename and a list of keys to either of these functions and you're all set to read start parsing CIF files!
-
-
-.. TODO: reintroduce this text when the parsemm module is updated
-  ``parsnip.parsemm`` handles `mmCIF <https://www.iucr.org/resources/cif/dictionaries/cif_mm>` files.
+.. _parse:
 
+The ``parsnip.parse`` module handles standard CIF files (including those under the `CIF 1.1 <https://www.iucr.org/resources/cif/spec/version1.1>`_ and `CIF 2.0 <https://www.iucr.org/resources/cif/cif2>`_ standards), as well as many features from the `mmCIF <https://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/beginner’s-guide-to-pdb-structures-and-the-pdbx-mmcif-format>`_ format.
+The package includes a table reader for `loop\_`-delimited tables as well as a key-value pair reader. Provide a filename and a list of keys to either of these functions and you're all set to read start parsing CIF and mmCIF files!
 
 .. _installing:
 
diff --git a/doc/source/conf.py b/doc/source/conf.py
index 1f84729..cc7bc6c 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -21,6 +21,7 @@
     "sphinx.ext.autodoc",
     "sphinx.ext.autosummary",
     "sphinx.ext.intersphinx",
+    "sphinx.ext.napoleon",
     "autodocsumm",
 ]
 
@@ -36,6 +37,7 @@
     "show-inheritance": True,
     "autosummary": True,
 }
+autodoc_typehints = "description"
 
 pygments_style = "friendly"
 pygments_dark_style = "native"
@@ -50,12 +52,14 @@
     "light_logo": "parsnip_header_dark.svg",
     "dark_logo": "parsnip_header_light.svg",
     "dark_css_variables": {
-        "color-brand-primary": "#5187b2",
+        "color-brand-primary": "#4AA092",
         "color-brand-content": "#5187b2",
     },
     "light_css_variables": {
-        "color-brand-primary": "#406a8c",
+        "color-brand-primary": "#005A50",
         "color-brand-content": "#406a8c",
     },
+    "top_of_page_button": "edit",
+    "source_edit_link": "https://github.com/glotzerlab/parsnip",
 }
 html_favicon = "_static/parsnip_logo_favicon.svg"
diff --git a/doc/source/example_file.cif b/doc/source/example_file.cif
new file mode 100644
index 0000000..a899e8c
--- /dev/null
+++ b/doc/source/example_file.cif
@@ -0,0 +1,27 @@
+data_cif_file
+
+_journal_year 1999
+_journal_page_first 0
+_journal_page_last 123
+
+_chemical_name_mineral 'Copper FCC'
+_chemical_formula_sum 'Cu'
+
+_cell_length_a     3.6
+_cell_length_b     3.6
+_cell_length_c     3.6
+_cell_angle_alpha  90.0
+_cell_angle_beta   90.0
+_cell_angle_gamma  90.0
+
+
+loop_
+_atom_site_label
+_atom_site_fract_x
+_atom_site_fract_y
+_atom_site_fract_z
+_atom_site_type_symbol
+_atom_site_Wyckoff_label
+Cu1 0.0000000000 0.0000000000 0.0000000000  Cu a
+
+_symmetry_space_group_name_H-M  'Fm-3m'
diff --git a/doc/source/index.rst b/doc/source/index.rst
index a3b928c..c68adbd 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -1,11 +1,19 @@
+.. image:: _static/parsnip_header_dark.svg
+  :width: 600
+  :class: only-light
+
+.. image:: _static/parsnip_header_light.svg
+  :width: 600
+  :class: only-dark
+
 .. include:: ../../README.rst
+  :start-after: .. _header:
 
 
 .. toctree::
    :maxdepth: 2
    :caption: Getting Started
 
-   introduction
    installation
    quickstart
 
@@ -15,6 +23,7 @@
    :caption: API
 
    package-parse
+   package-patterns
 
 
 .. toctree::
@@ -22,15 +31,8 @@
    :caption: Reference
 
    genindex
+   modindex
    development
    changelog
    credits
    license
-
-
-Indices and tables
-==================
-
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
diff --git a/doc/source/introduction.rst b/doc/source/introduction.rst
deleted file mode 100644
index bb0083a..0000000
--- a/doc/source/introduction.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-Introduction
-===================
-
-.. include:: ../../README.rst
-    :start-after: .. _introduction:
-    :end-before: .. _installing:
diff --git a/doc/source/package-patterns.rst b/doc/source/package-patterns.rst
new file mode 100644
index 0000000..56b2e66
--- /dev/null
+++ b/doc/source/package-patterns.rst
@@ -0,0 +1,8 @@
+Patterns Module
+==============================
+
+.. rubric:: Overview
+
+.. automodule:: parsnip.patterns
+   :members:
+   :special-members:
diff --git a/doc/source/quickstart.rst b/doc/source/quickstart.rst
index 7fc80ea..ab2f97c 100644
--- a/doc/source/quickstart.rst
+++ b/doc/source/quickstart.rst
@@ -2,3 +2,118 @@
 
 Quickstart Tutorial
 ===================
+
+Once you have :ref:`installed <installation>` **parsnip**, most workflows involve reading a CIF file.
+Let's assume we have the file my_file.cif in the current directory, and these are its contents:
+
+.. literalinclude:: example_file.cif
+
+Reading Keys
+^^^^^^^^^^^^
+
+
+Now, let's read extract the key-value pairs:
+
+.. code-block:: python
+
+    from parsnip import parse
+    filename = "my_file.cif"
+    pairs = parse.read_key_value_pairs(filename)
+    print(pairs)
+    ...    {
+    ...      '_journal_year': '1999',
+    ...      '_journal_page_first': '0',
+    ...      '_journal_page_last': '123',
+    ...      '_chemical_name_mineral': "'Copper FCC'",
+    ...      '_chemical_formula_sum': "'Cu'",
+    ...      '_cell_length_a': '3.6',
+    ...      '_cell_length_b': '3.6',
+    ...      '_cell_length_c': '3.6',
+    ...      '_cell_angle_alpha': '90.0',
+    ...      '_cell_angle_beta': '90.0',
+    ...      '_cell_angle_gamma': '90.0'
+    ...      '_symmetry_space_group_name_H-M':  'Fm-3m'
+    ...    }
+
+By default, read_key_value_pairs reads every key. To read only numeric data values, set
+``only_read_numerics`` to ``True``.To take a subset, provide a tuple of strings to the ``keys`` argument.
+
+.. code-block:: python
+
+    # Only read the numeric data values
+    pairs = parse.read_key_value_pairs(filename,only_read_numerics=True)
+    print(pairs)
+    ...    {
+    ...      '_journal_year': 1999,
+    ...      '_journal_page_first': 0,
+    ...      '_journal_page_last': 123,
+    ...      '_cell_length_a': 3.6,
+    ...      '_cell_length_b': 3.6,
+    ...      '_cell_length_c': 3.6,
+    ...      '_cell_angle_alpha': 90.0,
+    ...      '_cell_angle_beta': 90.0,
+    ...      '_cell_angle_gamma': 90.0
+    ...    }
+
+    # Read only these keys
+    keys = (
+      "_journal_year"
+      "_journal_page_first"
+      "_journal_page_last"
+    )
+    pairs = parse.read_key_value_pairs(filename,keys=keys)
+    print(pairs)
+    ...    {
+    ...      '_journal_year': '1999',
+    ...      '_journal_page_first': '0',
+    ...      '_journal_page_last': '123',
+    ...    }
+
+Reading Tables
+^^^^^^^^^^^^^^
+
+Now, let's read a table. To do this, we need a list of keys:
+
+.. code-block:: python
+
+    keys = (
+      "_atom_site_label",
+      "_atom_site_fract_x",
+      "_atom_site_fract_y",
+      "_atom_site_fract_z",
+      "_atom_site_type_symbol",
+      "_atom_site_Wyckoff_label"
+    )
+    table = parse.read_table(filename,keys=keys)
+    print(table)
+    ...    array([['Cu1',
+    ...            '0.0000000000(0)',
+    ...            '0.0000000000(0)',
+    ...            '0.0000000000(0)',
+    ...            'Cu'
+    ...            'a']],
+    ...            dtype='<U12')
+
+
+Now, maybe don't need the atom site or Wyckoff labels - let's select just the numeric values, and export them as floats:
+
+.. code-block:: python
+
+    keys = (
+      "_atom_site_fract_x",
+      "_atom_site_fract_y",
+      "_atom_site_fract_z",
+    )
+    table = parse.read_table(filename,keys=keys,cast_to_float=True)
+    print(table)
+    ...    array([[0., 0., 0.]], dtype=float32)
+
+The cast_to_float argument automatically converts numeric data types, and removes tolerance and precision markers for us.
+Extracting the fractional coordinates of a unit cell is a pretty common operation, so we have a convenience function that does this as well.
+
+.. code-block:: python
+
+
+    table = parse.read_fractional_positions(filename)
+    print(table)
+    ...    array([[0., 0., 0.]], dtype=float32)
diff --git a/parsnip/_errors.py b/parsnip/_errors.py
new file mode 100644
index 0000000..b0b119d
--- /dev/null
+++ b/parsnip/_errors.py
@@ -0,0 +1,14 @@
+class ParseWarning(Warning):
+    def __init__(self, message):
+        self.message = message
+
+    def __str__(self):
+        return repr(self.message)
+
+
+class ParseError(RuntimeError):
+    def __init__(self, message):
+        self.message = message
+
+    def __str__(self):
+        return repr(self.message)
diff --git a/parsnip/_utils.py b/parsnip/_utils.py
index b0b119d..bda3055 100644
--- a/parsnip/_utils.py
+++ b/parsnip/_utils.py
@@ -1,14 +1,11 @@
-class ParseWarning(Warning):
-    def __init__(self, message):
-        self.message = message
+import numpy as np
 
-    def __str__(self):
-        return repr(self.message)
 
+def _str2num(val: str):
+    """Convert a string value to an integer if possible, or a float otherwise."""
+    return float(val) if "." in val else int(val)
 
-class ParseError(RuntimeError):
-    def __init__(self, message):
-        self.message = message
 
-    def __str__(self):
-        return repr(self.message)
+def _deg2rad(val: float):
+    """Convert a value in degrees to one in radians."""
+    return val * np.pi / 180
diff --git a/parsnip/parse.py b/parsnip/parse.py
index 8d7712c..ce6fd23 100644
--- a/parsnip/parse.py
+++ b/parsnip/parse.py
@@ -1,10 +1,56 @@
-"""CIF parsing tools."""
+r"""Functions for parsing CIF files in Python.
+
+.. include:: ../../README.rst
+    :start-after: .. _parse:
+    :end-before: .. _installing:
+
+.. admonition:: The CIF Format
+
+    This is an example of a simple CIF file. A `key`_ (data name or tag) must start with
+    an underscore, and is seperated from the data value with whitespace characters.
+    A `table`_ begins with the ``loop_`` keyword, and contain a header block and a data
+    block. The vertical position of a tag in the table heading corresponds with the
+    horizontal position of the associated column in the table values.
+
+    .. code-block:: text
+
+        # Key-value pairs describing the unit cell:
+        _cell_length_a  5.40
+        _cell_length_b  3.43
+        _cell_length_c  5.08
+        _cell_angle_alpha  90.0
+        _cell_angle_beta  132.3
+        _cell_angle_gamma  90.0
+
+        # A table with two columns and eight rows:
+        loop_
+        _symmetry_equiv_pos_site_id
+        _symmetry_equiv_pos_as_xyz
+        1  x,y,z
+        2  -x,y,-z
+        3  -x,-y,-z
+        4  x,-y,z
+        5  x+1/2,y+1/2,z
+        6  -x+1/2,y+1/2,-z
+        7  -x+1/2,-y+1/2,-z
+        8  x+1/2,-y+1/2,z
+
+        _symmetry_space_group_name_H-M  'C2 / m' # One more key-value pair
+
+
+.. _key: https://www.iucr.org/resources/cif/spec/version1.1/cifsyntax#definitions
+.. _table: https://www.iucr.org/resources/cif/spec/version1.1/cifsyntax#onelevel
+
+"""
+
+import re
 import warnings
 
 import numpy as np
 
-from ._utils import ParseError, ParseWarning
-from .patterns import LineCleaner, cast_array_to_float
+from ._errors import ParseError, ParseWarning
+from ._utils import _deg2rad, _str2num
+from .patterns import LineCleaner, cast_array_to_float, remove_nondelimiting_whitespace
 
 
 def _remove_comments_from_line(line):
@@ -14,67 +60,73 @@ def _remove_comments_from_line(line):
 def read_table(
     filename: str,
     keys: str,
-    filter_line: tuple = ((r",\s+", ",")),
-    keep_original_key_order=False,
+    keep_original_key_order: bool = False,
+    cast_to_float: bool = False,
+    nondelimiting_whitespace_replacement: str = "_",
+    regex_filter: tuple = None,
 ) -> np.ndarray:
-    r"""Extract data from a CIF file loop_ table.
-
-    CIF files store tabular data as whitespace-delimited blocks that start with `loop_`.
-    Keys are kept at the top of the table, and the vertical position of keys corresponds
-    to the horizontal position of the column storing the data for that key. The end of
-    the table is not necessarily marked: instead, the script detects when the table
-    format is exited.
-
-    For example:
-
-    ```
-    loop_
-    _space_group_symop_id
-    _space_group_symop_operation_xyz
-    1 x,y,z
-    2 -x,y,-z+1/2
-    3 -x,-y,-z
-    4 x,-y,z+1/2
-    5 x+1/2,y+1/2,z
-    6 -x+1/2,y+1/2,-z+1/2
-    7 -x+1/2,-y+1/2,-z
-    8 x+1/2,-y+1/2,z+1/2
-
-    ```
-
-    Only data columns corresponding to a key in the input keys list will be returned.
-
-    Note that this function will ONLY return data from a single table. If keys are
-    provided that correspond to data from multiple tables, only the first table will
-    be read.
-
-    The ``filter_line`` argument allows for dynamic input creation of regex filters to
-    apply to each line that contains data to be saved. The default value is
-    ``((",\s+",","))``, which helps differentiate between individual data fragments
-    seperated by commas and whitespace characters, and other sections of the line that
-    are also whitespace separated. Adding another tuple to remove single quotes can
-    also be helpful: try ``((",\s+",","),(",",""))`` to achieve this. To disable the
-    feature entirely, pass in a tuple of empty strings: ``("","")``. Note that doing so
-    will cause errors if the table contains non-delimiting whitespaces.
+    r"""Extract data from a CIF file loop\_ table.
 
     Args:
-        filename (str): The name of the .cif file to be parsed.
-        keys (tuple[str]): The names of the keys to be parsed.
-        filter_line (tuple[tuple[str,str]], optional):
-            A tuple of strings that are compiled to a regex filter and applied to each
-            data line. (Default value: ((r",\s+",",")) )
+        filename (str):
+            The name of the .cif file to be parsed.
+        keys (tuple[str]):
+            The names of the keys to be parsed. The columns associated with these keys
+            will be returned in the final array.
         keep_original_key_order (bool, optional):
             When True, preserve the order of keys in the table from the cif file.
             When False, return columns of data in order of the input ``keys`` arg.
-            (Default value: False)
+            Default value = ``False``
+        cast_to_float (bool, optional):
+            When True, attempts to cast the entire array to flaoting point numbers,
+            removing precision values (e.g. ``5.98(4)`` would be mapped to ``5.98(4)``).
+            Default value = ``False``
+        nondelimiting_whitespace_replacement (str, optional):
+            Character to replace non-delimiting whitespaces with.
+            Default value = ``"_"``
+        regex_filter (tuple[str,str], optional):
+            A tuple of strings that are compiled to a regex filter and applied to each
+            data line. If a tuple of tuples of strings is provided instead, each pattern
+            will be applied seperately.
+            Default value = ``None``
+
 
     Returns:
-        np.ndarray[str]: A numpy array of the data as strings.
+        :math:`(N, N_{keys})` :class:`numpy.ndarray[str]`:
+            A numpy array of the data as strings.
+
+    .. warning::
+
+        This function will ONLY return data from a single table. If keys are provided
+        that correspond to data from multiple tables, only the first table will be read.
+
+    .. tip::
+
+        CIF tables are whitespace delimited - however, values enclosed in quotation
+        marks may also contain whitespace characters. The parameter
+        ``nondelimiting_whitespace_replacement`` handles this possibility by replacing
+        nondelimiting whitespaces with underscores. This value can be also be set to an
+        empty string, or any arbitrary sequence of characters.
+
+    .. tip::
+
+        The ``regex_filter`` argument allows for dynamic input creation of regex filters
+        to apply to each line that contains data to be saved. Each filter should be a
+        tuple of strings corresponding to a pattern to match and a replacement for that
+        pattern. To apply multiple filters, pass in a list of these tuples.
+
+        For example, single quotes could be removed by setting
+        ``regex_filter=("'","")``.
+
     """
+    # Split tables on the `loop_` keyword and throw away any comments on that line.
+    table_delimiter = r"loop_[^\n]*"
+
     with open(filename) as f:
-        tables = f.read().split("loop_")
+        tables = re.split(table_delimiter, f.read())
 
-    line_cleaner = LineCleaner(filter_line)
+    if regex_filter is not None:
+        line_cleaner = LineCleaner(regex_filter)
     nontable_line_prefixes = ("_", "#")
 
     for table in tables:
@@ -91,14 +143,14 @@ def read_table(
                     ", section 7 for more details."
                 )
 
-            # We will get errors if there is a comment after the loop_ block that
-            # contains our data. This is questionably legal, but very uncommon
-
+            # Remove comments from the line to ensure we only save data.
             line = _remove_comments_from_line(line)
 
             # Save current key position if it is one of the keys we want.
             if in_header and (line in keys):
                 data_column_indices.append(line_number)
+                # If keep_original_key_order is True, we reorder the output to match the
+                # order of columns in the original CIF file
                 if not keep_original_key_order:
                     column_order.append(keys.index(line))
                 continue
@@ -106,7 +158,13 @@ def read_table(
             # If we exit the header and enter the table body
             if data_column_indices and (line[:1] not in nontable_line_prefixes):
                 in_header = False  # Exit the header and start writing data
-                clean_line = line_cleaner(line)
+
+                if regex_filter is not None:  # Apply user-defined regex, if present
+                    line = line_cleaner(line)
+
+                clean_line = remove_nondelimiting_whitespace(
+                    line.strip(), replacement=nondelimiting_whitespace_replacement
+                )
                 split_line = clean_line.split()
 
                 # Only add data if the line has at least as many columns as required.
@@ -140,30 +198,180 @@ def read_table(
             ParseWarning,
             stacklevel=2,
         )
-    return np.atleast_2d(data)[:, data_column_indices]
+
+    result = np.atleast_2d(data)[:, data_column_indices]
+    return cast_array_to_float(result) if cast_to_float else result
+
+
+def _parsed_line_generator(filename, regexp):
+    """Apply a regex pattern line by line and yield the pattern's matches.
+
+    This is intended to be an internal function that handles the reading of CIF files.
+    Abstracting this out clarifies which logic belongs to the file parser and which
+    belongs to the actual data manipulation.
+
+    Args:
+        filename (str): The name of the .cif file to be parsed.
+        regexp (str): String to generate the regex pattern that is applied to each line.
+
+    Yields:
+        tuple(str,str|float|int):
+    """
+    pattern = re.compile(regexp)
+    with open(filename) as file:
+        for line in file:
+            # Line is either empty, or does not start with a valid key
+            if line == "" or line[0] != "_":
+                continue
+            parsed_line = pattern.match(line)
+            if parsed_line:  # Regex matches
+                yield parsed_line
+
+
+def read_key_value_pairs(
+    filename: str,
+    keys: tuple = None,
+    only_read_numerics: bool = False,
+):
+    """Extract key-value pairs from a CIF file.
+
+    By default, this function reads all keys and returns data values as strings. Setting
+    ``only_read_numerics`` to True will cause the program to cast data to a numeric
+    type (float or int). However, keys that cannot be safely cast into a numeric are
+    skipped.
+
+    Args:
+        filename (str): The name of the .cif file to be parsed.
+        keys (tuples[str]|None, optional):
+            A tuple of keys to search and return data for.
+            If keys is None, all keys are returned.
+            Default value = ``None``.
+        only_read_numerics (bool, optional):
+            Whether to read only values that cannot be cast to int or float.
+            Default value = ``False``
+
+    Returns:
+        dict[str,float|int] | dict[str,str]:
+            Dict of the key value pairs. Values will either be all strings, or a mixture
+            of int and float, and the order will match the order of keys (if provided).
+
+    .. note::
+
+            If no data is found for any of the provided keys, a warning will be raised
+            and the output value will be ``None``.
+
+    """
+    # REGEX EXPLANATION
+    # ^         : Match only at the start of the line
+    # (_[\w-]+) : Match any number/mix of alphanumerics, "-", and "_", as a group
+    # [ |\t]+   : Match one or more whitespace " " or tab characters.
+
+    # Parse numbers:
+    # (         : Start new group
+    # -?\d+     : Match 0 or 1 "-" characters, then 1 or more digits 0-9
+    # \.?       : Match 0 or 1 "." characters
+    # \d*       : Match 0 or more digits 0-9
+    # )         : End the group
+
+    # Parse strings:
+    # (         : Start new group
+    # [^#^\n]+  : Match 1 ore more characters that are NOT a "#" or newline "\n"
+    # )         : End the group
+
+    # Ideally, we could use an atomic group (e.g. (?>[ |\t]+)) to match the spaces and
+    # save some time on degenerate cases. However, this feature was added to re in
+    # Python 3.11 so we will exclude it for portability's sake
+
+    data = {}
+
+    if only_read_numerics:
+        regexp = r"^(_[\w\.]+)[ |\t]+(-?\d+\.?\d*)"
+    else:
+        regexp = r"^(_[\w\.-]+)[ |\t]+([^#^\n]+)"
+
+    if keys is not None:
+        # Insertion order our dict with original key order
+        for key in keys:
+            data[key] = None
+        # Convert to mutable datastructure so we can remove identified keys
+        keys = set(keys)
+
+    for parsed_line in _parsed_line_generator(filename, regexp=regexp):
+        key, val = parsed_line.groups()
+        val = _str2num(val) if only_read_numerics else val.strip()
+
+        if keys is None:
+            data[key] = val
+        elif key in keys:
+            data[key] = val
+            keys.remove(key)
+        elif len(keys) == 0:
+            break
+
+    if keys is not None and len(keys) != 0:
+        warnings.warn(
+            f"Keys {keys} did not match any data!", ParseWarning, stacklevel=2
+        )
+
+    return data
+
+
+def read_cell_params(filename, degrees: bool = True, mmcif: bool = False):
+    r"""Read the cell lengths and angles from a CIF file.
+
+    Args:
+        filename (str): The name of the .cif file to be parsed.
+        degrees (bool, optional):
+            When True, angles are returned in degrees (as per the cif spec). When False,
+            angles are converted to radians.
+            Default value = ``True``
+        mmcif (bool, optional):
+            When False, the standard CIF key naming is used (e.g. _cell_angle_alpha).
+            When True, the mmCIF standard is used instead (e.g. cell.angle_alpha).
+            Default value = ``False``
+
+    Returns:
+        tuple:
+            The box vector lengths and angles in degrees or radians
+            :math:`(L_1, L_2, L_3, \alpha, \beta, \gamma)`.
+    """
+    if mmcif:
+        angle_keys = ("_cell.angle_alpha", "_cell.angle_beta", "_cell.angle_gamma")
+        box_keys = ("_cell.length_a", "_cell.length_b", "_cell.length_c") + angle_keys
+    else:
+        angle_keys = ("_cell_angle_alpha", "_cell_angle_beta", "_cell_angle_gamma")
+        box_keys = ("_cell_length_a", "_cell_length_b", "_cell_length_c") + angle_keys
+    cell_data = read_key_value_pairs(filename, keys=box_keys, only_read_numerics=True)
+
+    assert all(value is not None for value in cell_data.values())
+    assert all(0 < cell_data[key] < 180 for key in angle_keys)
+
+    if not degrees:
+        for key in angle_keys:
+            cell_data[key] = _deg2rad(cell_data[key])
+
+    return tuple(cell_data.values())
 
 
 def read_fractional_positions(
     filename: str,
-    filter_line: tuple = ((r",\s+", ",")),
+    regex_filter: tuple = ((r",\s+", ",")),
 ):
     r"""Extract the fractional X,Y,Z coordinates from a CIF file.
 
     Args:
         filename (str): The name of the .cif file to be parsed.
-        filter_line (tuple[tuple[str,str]], optional):
+        regex_filter (tuple[tuple[str,str]], optional):
             A tuple of strings that are compiled to a regex filter and applied to each
-            data line. (Default value: ((r",\s+",",")) )
+            data line. Default value = ``((r",\s+",","))``
 
     Returns:
-        np.array[np.float32]: Fractional X,Y,Z coordinates of the unit cell.
+        :math:`(N, 3)` :class:`numpy.ndarray[np.float32]`:
+            Fractional X,Y,Z coordinates of the unit cell.
     """
     xyz_keys = ("_atom_site_fract_x", "_atom_site_fract_y", "_atom_site_fract_z")
     # Once #6 is added, we should warnings.catch_warnings(action="error")
-    xyz_data = read_table(
-        filename=filename,
-        keys=xyz_keys,
-    )
+    xyz_data = read_table(filename=filename, keys=xyz_keys, regex_filter=regex_filter)
 
     xyz_data = cast_array_to_float(arr=xyz_data, dtype=np.float32)
 
diff --git a/parsnip/parsemm.py b/parsnip/parsemm.py
deleted file mode 100644
index 6f42ec4..0000000
--- a/parsnip/parsemm.py
+++ /dev/null
@@ -1,6 +0,0 @@
-"""mmCIF parsing tools."""
-if __name__ == "__main__":
-    raise NotImplementedError(
-        "mmCIF functionality has not been implemented.\n"
-        "See https://github.com/glotzerlab/parsnip/issues/1 for more details."
-    )
diff --git a/parsnip/patterns.py b/parsnip/patterns.py
index 430a6ff..c402349 100644
--- a/parsnip/patterns.py
+++ b/parsnip/patterns.py
@@ -1,36 +1,57 @@
-"""Functions and classes to process string data."""
+"""Functions and classes to process string data.
+
+As with any text file format, some string manipulation may be required to process CIF
+data. The classes and functions in this module provide simple tools for the manipulation
+of string data extracted from CIF files by methods in ``parsnip.parse``.
+
+"""
 import re
 
 import numpy as np
 
-# Compile in common patterns for cif parsing. These are reused throughout the package.
-_multiple_whitespace_pattern = re.compile(r"\s+")
-_comma_prune_spaces = re.compile(r",\s+")
 
-
-def compile_pattern_from_strings(filter_patterns: tuple):
-    """Return a regex pattern that matches any of the characters in the filter.
+def cast_array_to_float(arr: np.ndarray, dtype: type = np.float32):
+    """Cast a Numpy array to a dtype, pruning significant digits from numerical values.
 
     Args:
-        filter_patterns (tuple[str]): Description
+        arr (np.array[str]): Array of data to convert
+        dtype (type, optional):
+            dtype to cast array to.
+            Default value = ``np.float32``
 
     Returns:
-        re.Pattern: Pattern matching any of the input characters.
+        np.array[float]: Array with new dtype and no significant digit information.
     """
-    return re.compile("|".join(filter_patterns))
+    return np.char.partition(arr, "(")[..., 0].astype(dtype)
 
 
-def cast_array_to_float(arr: np.ndarray, dtype: type = np.float32):
-    """Cast a Numpy array to a dtype, pruning significant digits from numerical values.
+def remove_nondelimiting_whitespace(string: str, replacement: str = "_") -> str:
+    """Remove nondelimiting whitespaces from a string.
+
+    For the purpose of this function (and CIF files in general), nondelimiting
+    whitespaces are those that are enclosed either in single or double quotes.
 
     Args:
-        arr (np.array): Array of data to convert
-        dtype (type, optional): dtype to cast array to (Default value: np.float32).
+        string (str): Input string to process
+        replacement (str):
+          String that will replace each nondelimiting whitespace.
+          Default value = ``"_"``
 
     Returns:
-        np.array[float]: Array with new dtype and no significant digit information.
+        str: String with whitespaces replaced with the replacement character.
     """
-    return np.char.partition(arr, "(")[..., 0].astype(dtype)
+    in_quotes = False
+    new_str = []
+    for char in string:
+        if in_quotes and char == " ":
+            new_str.append(replacement)
+            continue
+        else:
+            new_str.append(char)
+
+        if char == "'" or char == '"':
+            in_quotes = not in_quotes
+    return "".join(new_str)
 
 
 class LineCleaner:
diff --git a/tests/conftest.py b/tests/conftest.py
index 4589560..3bdf601 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,22 +1,47 @@
 import os
 from collections import namedtuple
 
+import numpy as np
 import pytest
 
 # ruff: noqa: N816. Allow mixed-case global variables
 
+data_file_path = os.path.dirname(__file__) + "/sample_data/"
+
+
+CifData = namedtuple(
+    "CifData", ["filename", "symop_keys", "atom_site_keys", "single_value_keys"]
+)
+
+# Assorted keys to select from
+assorted_keys = np.loadtxt(data_file_path + "cif_file_keys.txt", dtype=str)
+
+
+def generate_random_key_sequences(arr, n_samples, seed=42):
+    rng = np.random.default_rng(seed)
+    return [
+        rng.choice(arr, size=size, replace=False)
+        for size in rng.integers(1, len(arr), n_samples)
+    ]
 
-CifData = namedtuple("CifData", ["filename", "symop_keys", "atom_site_keys"])
 
+def random_keys_mark(n_samples=10):
+    return pytest.mark.parametrize(
+        argnames="keys",
+        argvalues=generate_random_key_sequences(assorted_keys, n_samples=n_samples),
+    )
 
+
+# Used for test_read_cell_params
 box_keys = (
-    "_cell_angle_alpha",
-    "_cell_angle_beta",
-    "_cell_angle_gamma",
     "_cell_length_a",
     "_cell_length_b",
     "_cell_length_c",
+    "_cell_angle_alpha",
+    "_cell_angle_beta",
+    "_cell_angle_gamma",
 )
+
 atom_site_keys = (
     "_atom_site_label",
     "_atom_site_type_symbol",
@@ -27,13 +52,21 @@
 )
 
 
-data_file_path = os.path.dirname(__file__) + "/sample_data/"
-
-
 aflow_mC24 = CifData(
     filename=data_file_path + "AFLOW_mC24.cif",
     symop_keys=("_space_group_symop_id", "_space_group_symop_operation_xyz"),
     atom_site_keys=atom_site_keys,
+    single_value_keys=(
+        "_audit_creation_method",
+        "_chemical_name_mineral",
+        "_chemical_formula_sum",
+        "_symmetry_space_group_name_H-M",
+        "_aflow_title",
+        "_aflow_params",
+        "_aflow_params_values",
+        "_aflow_Strukturbericht",
+        "_aflow_Pearson",
+    ),
 )
 
 bisd_Ccmm = CifData(
@@ -41,18 +74,97 @@
     symop_keys=("_space_group_symop_operation_xyz", "_space_group_symop_id"),
     # Our code works with extra keys, but gemmi does not!
     atom_site_keys=(atom_site_keys[0], *atom_site_keys[2:]),
+    single_value_keys=(
+        "_journal_name_full",
+        "_journal_volume",
+        "_journal_year",
+        "_journal_page_first",
+        "_journal_page_last",
+        "_journal_paper_doi",
+        "_publ_contact_author_name",
+        "_publ_contact_author_email",
+        "_chemical_formula_sum",
+        "_space_group_crystal_system",
+        "_refine_ls_wR_factor_gt",
+    ),
 )
 
 ccdc_Pm3m = CifData(
     filename=data_file_path + "CCDC_1446529_Pm-3m.cif",
     symop_keys=("_space_group_symop_operation_xyz",),
     atom_site_keys=sorted(atom_site_keys),
+    single_value_keys=(
+        "_audit_block_doi",
+        "_database_code_depnum_ccdc_archive",
+        "_computing_publication_material",
+        "_chemical_formula_sum",
+        "_cell_formula_units_Z",
+        "_space_group_crystal_system",
+        "_space_group_name_H-M_alt",
+        "_diffrn_ambient_temperature",
+        "_reflns_number_gt",
+        "_refine_ls_R_factor_gt",
+        "_refine_ls_wR_factor_gt",
+        "_refine_diff_density_max",
+        "_refine_diff_density_min",
+        "_refine_diff_density_rms",
+    ),
 )
 
 cod_aP16 = CifData(
     filename=data_file_path + "COD_1540955_aP16.cif",
     symop_keys=("_symmetry_equiv_pos_as_xyz",),
     atom_site_keys=atom_site_keys,
+    single_value_keys=(
+        "_journal_page_first",
+        "_journal_page_last",
+        "_journal_volume",
+        "_journal_year",
+        "_chemical_formula_sum",
+        "_chemical_name_systematic",
+        "_space_group_IT_number",
+        "_symmetry_space_group_name_Hall",
+        "_symmetry_space_group_name_H-M",
+        "_cell_formula_units_Z",
+        "_cell_volume",
+        "_citation_journal_id_ASTM",
+        "_cod_data_source_file",
+        "_cod_data_source_block",
+        "_cod_original_cell_volume",
+        "_cod_original_formula_sum",
+        "_cod_database_code",
+    ),
+)
+
+pdb_4INS = CifData(
+    filename=data_file_path + "PDB_4INS_head.cif",
+    symop_keys=("_pdbx_struct_oper_list.symmetry_operation",),
+    atom_site_keys=(  # mmCIF stores atom sites differently, so use a different table.
+        "_chem_comp.id",
+        "_chem_comp.type",
+        "_chem_comp.mon_nstd_flag",
+        "_chem_comp.name",
+        "_chem_comp.pdbx_synonyms",
+        "_chem_comp.formula",
+        "_chem_comp.formula_weight",
+    ),
+    single_value_keys=(
+        "_symmetry.entry_id",
+        "_symmetry.space_group_name_H-M",
+        "_symmetry.pdbx_full_space_group_name_H-M",
+        "_symmetry.cell_setting",
+        "_symmetry.Int_Tables_number",
+        "_symmetry.space_group_name_Hall",
+        "_refine_hist.pdbx_refine_id",
+        "_refine_hist.cycle_id",
+        "_refine_hist.pdbx_number_atoms_protein",
+        "_refine_hist.pdbx_number_atoms_nucleic_acid",
+        "_refine_hist.pdbx_number_atoms_ligand",
+        "_refine_hist.number_atoms_solvent",
+        "_refine_hist.number_atoms_total",
+        "_refine_hist.d_res_high",
+        "_refine_hist.d_res_low",
+    ),
 )
 
 bad_cif = CifData(
@@ -66,9 +178,20 @@
         "_atom_site_fract_z",
         "_this_key_does_not_exist",
     ),
+    single_value_keys=(
+        "_cell_length_a",
+        "_cell_length_b",
+        "_cell_length_c",
+        "_cell_angle_alpha",
+        "_cell_angle_beta",
+        "_cell_angle_gamma",
+        "__________asdf",
+        "_-wasd",
+        "not_a_valid_key",
+    ),
 )
 
-cif_data_array = [aflow_mC24, bisd_Ccmm, ccdc_Pm3m, cod_aP16]
+cif_data_array = [aflow_mC24, bisd_Ccmm, ccdc_Pm3m, cod_aP16, pdb_4INS]
 cif_files_mark = pytest.mark.parametrize(
     argnames="cif_data",
     argvalues=cif_data_array,
diff --git a/tests/sample_data/AFLOW_mC24.cif b/tests/sample_data/AFLOW_mC24.cif
index d1b33f8..a7c40fd 100644
--- a/tests/sample_data/AFLOW_mC24.cif
+++ b/tests/sample_data/AFLOW_mC24.cif
@@ -10,7 +10,7 @@
 # S. Curtarolo, The AFLOW Library of Crystallographic Prototypes: Part 3,
 # Comp. Mat. Sci. 199, 110450 (2021). (doi=10.1016/j.commatsci.2021.110450)
 
-# CIF file
+# CIF file 
 data_findsym-output
 _audit_creation_method FINDSYM
 
@@ -37,7 +37,7 @@ _publ_Section_title
 ;
 
 # Found in The American Mineralogist Crystal Structure Database, 2003
-
+ 
 _aflow_title 'Clinocervantite ($\beta$-Sb$_{2}$O$_{4}$) Structure'
 _aflow_proto 'A2B_mC24_15_2f_ce'
 _aflow_params 'a,b/a,c/a,\beta,y_{2},x_{3},y_{3},z_{3},x_{4},y_{4},z_{4}'
@@ -47,13 +47,14 @@ _aflow_Pearson 'mC24'
 
 _symmetry_space_group_name_H-M "C 1 2/c 1"
 _symmetry_Int_Tables_number 15
-
+ 
 _cell_length_a    12.06100
 _cell_length_b    4.83600
 _cell_length_c    5.38300
 _cell_angle_alpha 90.00000
 _cell_angle_beta  103.12000
 _cell_angle_gamma 90.00000
+ 
 
 loop_
 _space_group_symop_id
@@ -66,7 +67,7 @@ _space_group_symop_operation_xyz
 6 -x+1/2,y+1/2,-z+1/2
 7 -x+1/2,-y+1/2,-z
 8 x+1/2,-y+1/2,z+1/2
-
+ 
 loop_
 _atom_site_label
 _atom_site_type_symbol
diff --git a/tests/sample_data/B-IncStrDb_Ccmm.cif b/tests/sample_data/B-IncStrDb_Ccmm.cif
index 5ae4844..b9cb8e9 100644
--- a/tests/sample_data/B-IncStrDb_Ccmm.cif
+++ b/tests/sample_data/B-IncStrDb_Ccmm.cif
@@ -32,7 +32,7 @@ loop_
   'Overeijnder, H.'
   'Tuinstra, F.'
 
-_publ_section_title
+_publ_section_title               
 ;The average structure of K~2~MoO~4~ in the incommensurate phase at 633K
 ;
 
@@ -40,7 +40,7 @@ _exptl_crystal_type_of_structure   cryst
 _diffrn_ambient_temperature       633
 _diffrn_source                    x-ray
 
-_exptl_special_details
+_exptl_special_details 
 ;Guinier-Lenne camera. Peak intensities estimated with an optical densitometer
 ;
 
diff --git a/tests/sample_data/CCDC_1446529_Pm-3m.cif b/tests/sample_data/CCDC_1446529_Pm-3m.cif
index e2f7d4f..a9c69a4 100644
--- a/tests/sample_data/CCDC_1446529_Pm-3m.cif
+++ b/tests/sample_data/CCDC_1446529_Pm-3m.cif
@@ -1,16 +1,16 @@
-#######################################################################
-#
-# This file contains crystal structure data downloaded from the
-# Cambridge Structural Database (CSD) hosted by the Cambridge
+####################################################################### 
+# 
+# This file contains crystal structure data downloaded from the 
+# Cambridge Structural Database (CSD) hosted by the Cambridge 
 # Crystallographic Data Centre (CCDC).
-#
-# Full information about CCDC data access policies and citation
-# guidelines are available at http://www.ccdc.cam.ac.uk/access/V1
-#
-# Audit and citation data items may have been added by the CCDC.
-# Please retain this information to preserve the provenance of
-# this file and to allow appropriate attribution of the data.
-#
+# 
+# Full information about CCDC data access policies and citation 
+# guidelines are available at http://www.ccdc.cam.ac.uk/access/V1 
+# 
+# Audit and citation data items may have been added by the CCDC. 
+# Please retain this information to preserve the provenance of 
+# this file and to allow appropriate attribution of the data. 
+# 
 #######################################################################
 
 data_MAPbBr3_RT
@@ -21,9 +21,9 @@ _citation_id
 _citation_doi
 _citation_year
 1 10.1021/acscentsci.6b00055 2016
-_audit_update_record
+_audit_update_record             
 ;
-2016-01-10 deposited with the CCDC.	2024-04-02 downloaded from the CCDC.
+2016-01-10 deposited with the CCDC.	2024-04-08 downloaded from the CCDC.
 ;
 
 _audit_creation_method           SHELXL-2014/7
@@ -52,7 +52,7 @@ _space_group_IT_number           221
 _space_group_name_H-M_alt        'P m -3 m'
 _space_group_name_Hall           '-P 4 2 3'
 
-_shelx_space_group_comment
+_shelx_space_group_comment       
 ;
 The symmetry employed for this shelxl refinement is uniquely defined
 by the following loop, which should always be used as a source of
@@ -146,7 +146,7 @@ _exptl_absorpt_special_details   ?
 _diffrn_ambient_temperature      296(2)
 _diffrn_radiation_wavelength     0.7293
 _diffrn_radiation_type           synchrotron
-_diffrn_source
+_diffrn_source                   
 ;
 Advanced Light Source, station 11.3.1
 ;
@@ -179,11 +179,11 @@ _reflns_Friedel_coverage         0.000
 _reflns_Friedel_fraction_max     .
 _reflns_Friedel_fraction_full    .
 
-_reflns_special_details
+_reflns_special_details          
 ;
  Reflections were merged by SHELXL according to the crystal
  class for the calculation of statistics and refinement.
-
+ 
  _reflns_Friedel_fraction is defined as the number of unique
  Friedel pairs measured divided by the number that would be
  possible theoretically, ignoring centric projections and
@@ -195,27 +195,27 @@ _computing_cell_refinement       'SAINT V8.34A(Bruker, 2013)'
 _computing_data_reduction        SAINT
 _computing_structure_solution    'SHELXT (Sheldrick, 2012)'
 _computing_structure_refinement  'SHELXL-2014/7 (Sheldrick, 2014)'
-_computing_molecular_graphics
+_computing_molecular_graphics    
 ;
      	SHELXTL 5.1, XP (Sheldrick, 1994)
 	ShelXle Rev 699 (Hubschle, 2011)
 	WinCoot, (P.Emsley, B.Lohkamp W.G.Scott and K.Cowtand, 2010)
 ;
 _computing_publication_material  SHELXL-2014/7
-_refine_special_details
+_refine_special_details          
 ;
 Hydrogen atoms were not found in the difference map, so were not refined
-in the structure.
+in the structure. 
 
-The methylammonium positions were found in the difference map.
+The methylammonium positions were found in the difference map. 
 The carbon and the nitrogens share the same position in all three sites.
 They were refined with EADP & EXYZ. A DFIX was initially used, but then as
-the refinement progressed, a SADI was employed over all three.
+the refinement progressed, a SADI was employed over all three. 
 ;
 _refine_ls_structure_factor_coef Fsqd
 _refine_ls_matrix_type           full
 _refine_ls_weighting_scheme      calc
-_refine_ls_weighting_details
+_refine_ls_weighting_details     
 'w=1/[\s^2^(Fo^2^)+(0.0263P)^2^+0.0660P] where P=(Fo^2^+2Fc^2^)/3'
 _atom_sites_solution_primary     'intrinsic phasing'
 _atom_sites_solution_secondary   difmap
@@ -271,7 +271,7 @@ _atom_site_aniso_U_12
 Pb01 0.02551(17) 0.02551(17) 0.02551(17) 0.000 0.000 0.000
 Br02 0.0222(5) 0.1364(11) 0.1364(11) 0.000 0.000 0.000
 
-_geom_special_details
+_geom_special_details            
 ;
  All esds (except the esd in the dihedral angle between two l.s. planes)
  are estimated using the full covariance matrix.  The cell esds are taken
@@ -535,7 +535,7 @@ _refine_diff_density_max         0.464
 _refine_diff_density_min         -0.593
 _refine_diff_density_rms         0.151
 
-_shelx_res_file
+_shelx_res_file                  
 ;
 
     sad_a.res created by SHELXL-2014/7
@@ -656,10 +656,11 @@ _shelx_res_checksum              94550
 
 # start Validation Reply Form
 
-_vrf_PLAT973_I
+_vrf_PLAT973_I                   
 ;
 PROBLEM: Check Calcd Positive Residual Density on Pb01 2.62 eA-3
 RESPONSE: Disorder in this site should have been seen in related disorders,
-and as it is less than 3% of a Pb, it was left alone.
+and as it is less than 3% of a Pb, it was left alone. 
 
 ;
+             
\ No newline at end of file
diff --git a/tests/sample_data/COD_1540955_aP16.cif b/tests/sample_data/COD_1540955_aP16.cif
index 7ae58a0..4be9c64 100644
--- a/tests/sample_data/COD_1540955_aP16.cif
+++ b/tests/sample_data/COD_1540955_aP16.cif
@@ -1,11 +1,3 @@
-# Data taken from COD (Crystallography Open Database)
-# All credit goes to the following:
-# Grazulis, S., Chateigner, D., Downs, R. T., Yokochi, A. T., Quiros, M., Lutterotti,
-# L., Manakova, E., Butkus, J., Moeck, P. & Le Bail, A. (2009). Crystallography Open
-# Database – an open-access collection of crystal structures. Journal of Applied
-# Crystallography, 42, 726-729.
-
-
 #------------------------------------------------------------------------------
 #$Date: 2016-02-13 21:28:24 +0200 (Sat, 13 Feb 2016) $
 #$Revision: 176429 $
diff --git a/tests/sample_data/INTENTIONALLY_BAD_CIF.cif b/tests/sample_data/INTENTIONALLY_BAD_CIF.cif
index 7cd2675..49b0f51 100644
--- a/tests/sample_data/INTENTIONALLY_BAD_CIF.cif
+++ b/tests/sample_data/INTENTIONALLY_BAD_CIF.cif
@@ -1,29 +1,35 @@
 data_# CIF file
 
 _cell_length_a    1.000000(x)
-_cell_length_b    4.32343242
+_cell_length_b    4.32343242   
 _cell_length_c    3.1415926535897932384626433832795028841971693993751058209749
-_cell_angle_alpha 90.00000
+_cell_angle_alpha 90.00000  
 _cell_angle_beta  -10.12345
 _cell_angle_gamma 210.00000
 
+__________asdf  123 
+__________asdf \t _1.234-56789
+_-wasd          45.6a/\s # This is a comment
+not_a_valid_key valid_data
+
+
 # NOTE: Adding comments on loop_ keyword lines breaks the table reader
-loop_
-loop_
-_space_group_symop_id # this is a comment
-_space_group_symop_operation_xyz
+loop_ 
+loop_ # This line breaks str.split() on loops_. re.split works though!
+_space_group_symop_id # this is a comment 
+_space_group_symop_operation_xyz  
 _atom_site_fracccccccc_z # Intentionally bad key
 # COMMENT2
 
 1  x, y,z .
 2  -x,y,    -z*1/2 ?
 3   -x,-y,  -z (x) # What About Here
-4 x,=y, z/1/2 zzzzzzzzzz
+4 x,=y, z/1/2 zzzzzzzzzz 
 
 5    x-1/2,y+1/2,z asdf
-6 -x+1/2,  ya1/2,   -z+1/2 :)
+6 -x+1/2,  ya1/2,   -z+1/2 :) 
 # testing
-7 -x+1/2, -y81/2, -z ahh
+7 -x+1/2, -y81/2, -z ahh  
 
 8    x+1/2,  -y+1/2, z01/2 goblue
 
diff --git a/tests/sample_data/PDB_4INS_head.cif b/tests/sample_data/PDB_4INS_head.cif
new file mode 100644
index 0000000..4dfde89
--- /dev/null
+++ b/tests/sample_data/PDB_4INS_head.cif
@@ -0,0 +1,1196 @@
+# Summary information:
+# Title: THE STRUCTURE OF 2ZN PIG INSULIN CRYSTALS AT 1.5 ANGSTROMS RESOLUTION
+# PDB DOI: https://doi.org/10.2210/pdb4ins/pdb
+# Entry authors: Dodson, G.G., Dodson, E.J., Hodgkin, D.C., Isaacs, N.W., Vijayan, M.
+# Initial deposition on: 10 July 1989
+# Initial release on: 15 April 1990
+# Latest revision on: 29 November 2017
+# A few tables have been removed to save on file size.
+
+data_4INS
+# 
+_entry.id   4INS 
+# 
+_audit_conform.dict_name       mmcif_pdbx.dic 
+_audit_conform.dict_version    5.287 
+_audit_conform.dict_location   http://mmcif.pdb.org/dictionaries/ascii/mmcif_pdbx.dic 
+# 
+loop_
+_database_2.database_id 
+_database_2.database_code 
+PDB   4INS         
+WWPDB D_1000179350 
+# 
+_pdbx_database_PDB_obs_spr.id               SPRSDE 
+_pdbx_database_PDB_obs_spr.date             1990-04-15 
+_pdbx_database_PDB_obs_spr.pdb_id           4INS 
+_pdbx_database_PDB_obs_spr.replace_pdb_id   1INS 
+_pdbx_database_PDB_obs_spr.details          ? 
+# 
+_pdbx_database_status.status_code                     REL 
+_pdbx_database_status.entry_id                        4INS 
+_pdbx_database_status.recvd_initial_deposition_date   1989-07-10 
+_pdbx_database_status.deposit_site                    ? 
+_pdbx_database_status.process_site                    BNL 
+_pdbx_database_status.SG_entry                        . 
+_pdbx_database_status.status_code_sf                  ? 
+_pdbx_database_status.status_code_mr                  ? 
+_pdbx_database_status.status_code_cs                  ? 
+_pdbx_database_status.methods_development_category    ? 
+_pdbx_database_status.pdb_format_compatible           Y 
+# 
+loop_
+_audit_author.name 
+_audit_author.pdbx_ordinal 
+'Dodson, G.G.'  1 
+'Dodson, E.J.'  2 
+'Hodgkin, D.C.' 3 
+'Isaacs, N.W.'  4 
+'Vijayan, M.'   5 
+# 
+loop_
+_citation.id 
+_citation.title 
+_citation.journal_abbrev 
+_citation.journal_volume 
+_citation.page_first 
+_citation.page_last 
+_citation.year 
+_citation.journal_id_ASTM 
+_citation.country 
+_citation.journal_id_ISSN 
+_citation.journal_id_CSD 
+_citation.book_publisher 
+_citation.pdbx_database_id_PubMed 
+_citation.pdbx_database_id_DOI 
+primary 'The structure of 2Zn pig insulin crystals at 1.5 A resolution.' Philos.Trans.R.Soc.London,Ser.B                          
+319 369 456 1988 PTRBAE UK 0080-4622     0441 ?                                                            2905485 ? 
+1       
+;A Comparative Assessment of the Zinc-Protein Coordination in 2Zn-Insulin as Determined by X-Ray Absorption Fine Structure (Exafs) and X-Ray Crystallography
+;
+Proc.R.Soc.London,Ser.B                                  219 21  ?   1983 PRLBA4 UK 0080-4649     0338 ? ?       ? 
+2       'Structural Relationships in the Two-Zinc Insulin Hexamer' Can.J.Biochem.                                           57  
+469 ?   1979 CJBIAE CA 0008-4018     0415 ?                                                            ?       ? 
+3       
+;Experience with Fast Fourier Least Squares in the Refinement of the Crystal Structure of Rhombohedral 2-Zinc Insulin at 1.5 Angstroms Resolution
+;
+'Acta Crystallogr.,Sect.A'                               34  782 ?   1978 ACACEQ DK 0108-7673     0621 ? ?       ? 
+4       'Rhombohedral Insulin Crystal Transformation' J.Mol.Biol.                                              126 871 ?   1978 
+JMOBAK UK 0022-2836     0070 ?                                                            ?       ? 
+5       'A Method for Fitting Satisfactory Models to Sets of Atomic Positions in Protein Structure Refinements' 
+'Acta Crystallogr.,Sect.A'                               32  311 ?   1976 ACACEQ DK 0108-7673     0621 ? ?       ? 
+6       'Varieties of Insulin' J.Endocrinol.                                            63  1   ?   1974 JOENAK UK 0022-0795     
+0907 ?                                                            ?       ? 
+7       'The Structure of Insulin' Dan.Tidsskr.Farm.                                        46  1   ?   1972 DTFAAN DK 0011-6513 
+0168 ?                                                            ?       ? 
+8       'Insulin. The Structure in the Crystal and its Reflection in Chemistry and Biology' 'Adv.Protein Chem.' 26  279 ?   1972 
+APCHA2 US 0065-3233     0433 ?                                                            ?       ? 
+9       'The Crystal Structure of Rhombohedral 2 Zinc Insulin' 'Cold Spring Harbor Symp.Quant.Biol.'                    36  233 ? 
+1972 CSHSAZ US 0091-7451     0421 ?                                                            ?       ? 
+10      'Atomic Positions in Rhombohedral 2-Zinc Insulin Crystals' Nature                                                   231 
+506 ?   1971 NATUAS UK 0028-0836     0006 ?                                                            ?       ? 
+11      'X-Ray Analysis and the Structure of Insulin' 'Recent Prog.Horm.Res.'                                  27  1   ?   1971 
+RPHRA6 US 0079-9963     0908 ?                                                            ?       ? 
+12      'X-Ray Diffraction Data on Some Crystalline Varieties of Insulin' J.Mol.Biol.                                              
+54  605 ?   1970 JMOBAK UK 0022-2836     0070 ?                                                            ?       ? 
+13      'Structure of Rhombohedral 2 Zinc Insulin Crystals' Nature                                                   224 491 ?   
+1969 NATUAS UK 0028-0836     0006 ?                                                            ?       ? 
+14      ? 'Atlas of Protein Sequence and Structure (Data Section)' 5   187 ?   1972 ?      ?  0-912466-02-2 0435 
+'National Biomedical Research Foundation, Silver Spring,Md.' ?       ? 
+# 
+loop_
+_citation_author.citation_id 
+_citation_author.name 
+_citation_author.ordinal 
+primary 'Baker, E.N.'    1  
+primary 'Blundell, T.L.' 2  
+primary 'Cutfield, J.F.' 3  
+primary 'Cutfield, S.M.' 4  
+primary 'Dodson, E.J.'   5  
+primary 'Dodson, G.G.'   6  
+primary 'Hodgkin, D.M.'  7  
+primary 'Hubbard, R.E.'  8  
+primary 'Isaacs, N.W.'   9  
+primary 'Reynolds, C.D.' 10 
+primary 'Sakabe, K.'     11 
+primary 'Sakabe, N.'     12 
+primary 'Vijayan, N.M.'  13 
+1       'Bordas, J.'     14 
+1       'Dodson, G.G.'   15 
+1       'Grewe, H.'      16 
+1       'Koch, M.H.J.'   17 
+1       'Krebs, B.'      18 
+1       'Randall, J.'    19 
+2       'Dodson, E.J.'   20 
+2       'Dodson, G.G.'   21 
+2       'Hodgkin, D.C.'  22 
+2       'Reynolds, C.D.' 23 
+3       'Isaacs, N.W.'   24 
+3       'Agarwal, R.C.'  25 
+4       'Bentley, G.'    26 
+4       'Dodson, G.'     27 
+4       'Lewitova, A.'   28 
+5       'Dodson, E.J.'   29 
+5       'Isaacs, N.W.'   30 
+5       'Rollett, J.S.'  31 
+6       'Hodgkin, D.C.'  32 
+7       'Hodgkin, D.C.'  33 
+8       'Blundell, T.'   34 
+8       'Dodson, G.'     35 
+8       'Hodgkin, D.'    36 
+8       'Mercola, D.'    37 
+9       'Blundell, T.L.' 38 
+9       'Cutfield, J.F.' 39 
+9       'Dodson, E.J.'   40 
+9       'Dodson, G.G.'   41 
+9       'Hodgkin, D.C.'  42 
+9       'Mercola, D.A.'  43 
+10      'Blundell, T.L.' 44 
+10      'Cutfield, J.F.' 45 
+10      'Cutfield, S.M.' 46 
+10      'Dodson, E.J.'   47 
+10      'Dodson, G.G.'   48 
+10      'Hodgkin, D.C.'  49 
+10      'Mercola, D.A.'  50 
+10      'Vijayan, M.'    51 
+11      'Blundell, T.L.' 52 
+11      'Dodson, G.G.'   53 
+11      'Dodson, E.'     54 
+11      'Hodgkin, D.C.'  55 
+11      'Vijayan, M.'    56 
+12      'Baker, E.N.'    57 
+12      'Dodson, G.'     58 
+13      'Adams, M.J.'    59 
+13      'Blundell, T.L.' 60 
+13      'Dodson, E.J.'   61 
+13      'Dodson, G.G.'   62 
+13      'Vijayan, M.'    63 
+13      'Baker, E.N.'    64 
+13      'Harding, M.M.'  65 
+13      'Hodgkin, D.C.'  66 
+13      'Rimmer, B.'     67 
+13      'Sheat, S.'      68 
+# 
+_citation_editor.citation_id   14 
+_citation_editor.name          'Dayhoff, M.O.' 
+_citation_editor.ordinal       1 
+# 
+_cell.entry_id           4INS 
+_cell.length_a           82.500 
+_cell.length_b           82.500 
+_cell.length_c           34.000 
+_cell.angle_alpha        90.00 
+_cell.angle_beta         90.00 
+_cell.angle_gamma        120.00 
+_cell.Z_PDB              18 
+_cell.pdbx_unique_axis   ? 
+_cell.length_a_esd       ? 
+_cell.length_b_esd       ? 
+_cell.length_c_esd       ? 
+_cell.angle_alpha_esd    ? 
+_cell.angle_beta_esd     ? 
+_cell.angle_gamma_esd    ? 
+# 
+_symmetry.entry_id                         4INS 
+_symmetry.space_group_name_H-M             'H 3' 
+_symmetry.pdbx_full_space_group_name_H-M   ? 
+_symmetry.cell_setting                     ? 
+_symmetry.Int_Tables_number                146 
+_symmetry.space_group_name_Hall            ? 
+# 
+loop_
+_entity.id 
+_entity.type 
+_entity.src_method 
+_entity.pdbx_description 
+_entity.formula_weight 
+_entity.pdbx_number_of_molecules 
+_entity.pdbx_ec 
+_entity.pdbx_mutation 
+_entity.pdbx_fragment 
+_entity.details 
+1 polymer     man 'INSULIN (CHAIN A)' 2383.698 2   ? ? ? ? 
+2 polymer     man 'INSULIN (CHAIN B)' 3403.927 2   ? ? ? ? 
+3 non-polymer syn 'ZINC ION'          65.409   2   ? ? ? ? 
+4 water       nat water               18.015   350 ? ? ? ? 
+# 
+loop_
+_entity_poly.entity_id 
+_entity_poly.type 
+_entity_poly.nstd_linkage 
+_entity_poly.nstd_monomer 
+_entity_poly.pdbx_seq_one_letter_code 
+_entity_poly.pdbx_seq_one_letter_code_can 
+_entity_poly.pdbx_strand_id 
+_entity_poly.pdbx_target_identifier 
+1 'polypeptide(L)' no no GIVEQCCTSICSLYQLENYCN          GIVEQCCTSICSLYQLENYCN          A,C ? 
+2 'polypeptide(L)' no no FVNQHLCGSHLVEALYLVCGERGFFYTPKA FVNQHLCGSHLVEALYLVCGERGFFYTPKA B,D ? 
+# 
+loop_
+_entity_poly_seq.entity_id 
+_entity_poly_seq.num 
+_entity_poly_seq.mon_id 
+_entity_poly_seq.hetero 
+1 1  GLY n 
+1 2  ILE n 
+1 3  VAL n 
+1 4  GLU n 
+1 5  GLN n 
+1 6  CYS n 
+1 7  CYS n 
+1 8  THR n 
+1 9  SER n 
+1 10 ILE n 
+1 11 CYS n 
+1 12 SER n 
+1 13 LEU n 
+1 14 TYR n 
+1 15 GLN n 
+1 16 LEU n 
+1 17 GLU n 
+1 18 ASN n 
+1 19 TYR n 
+1 20 CYS n 
+1 21 ASN n 
+2 1  PHE n 
+2 2  VAL n 
+2 3  ASN n 
+2 4  GLN n 
+2 5  HIS n 
+2 6  LEU n 
+2 7  CYS n 
+2 8  GLY n 
+2 9  SER n 
+2 10 HIS n 
+2 11 LEU n 
+2 12 VAL n 
+2 13 GLU n 
+2 14 ALA n 
+2 15 LEU n 
+2 16 TYR n 
+2 17 LEU n 
+2 18 VAL n 
+2 19 CYS n 
+2 20 GLY n 
+2 21 GLU n 
+2 22 ARG n 
+2 23 GLY n 
+2 24 PHE n 
+2 25 PHE n 
+2 26 TYR n 
+2 27 THR n 
+2 28 PRO n 
+2 29 LYS n 
+2 30 ALA n 
+# 
+loop_
+_entity_src_gen.entity_id 
+_entity_src_gen.pdbx_src_id 
+_entity_src_gen.pdbx_alt_source_flag 
+_entity_src_gen.pdbx_seq_type 
+_entity_src_gen.pdbx_beg_seq_num 
+_entity_src_gen.pdbx_end_seq_num 
+_entity_src_gen.gene_src_common_name 
+_entity_src_gen.gene_src_genus 
+_entity_src_gen.pdbx_gene_src_gene 
+_entity_src_gen.gene_src_species 
+_entity_src_gen.gene_src_strain 
+_entity_src_gen.gene_src_tissue 
+_entity_src_gen.gene_src_tissue_fraction 
+_entity_src_gen.gene_src_details 
+_entity_src_gen.pdbx_gene_src_fragment 
+_entity_src_gen.pdbx_gene_src_scientific_name 
+_entity_src_gen.pdbx_gene_src_ncbi_taxonomy_id 
+_entity_src_gen.pdbx_gene_src_variant 
+_entity_src_gen.pdbx_gene_src_cell_line 
+_entity_src_gen.pdbx_gene_src_atcc 
+_entity_src_gen.pdbx_gene_src_organ 
+_entity_src_gen.pdbx_gene_src_organelle 
+_entity_src_gen.pdbx_gene_src_cell 
+_entity_src_gen.pdbx_gene_src_cellular_location 
+_entity_src_gen.host_org_common_name 
+_entity_src_gen.pdbx_host_org_scientific_name 
+_entity_src_gen.pdbx_host_org_ncbi_taxonomy_id 
+_entity_src_gen.host_org_genus 
+_entity_src_gen.pdbx_host_org_gene 
+_entity_src_gen.pdbx_host_org_organ 
+_entity_src_gen.host_org_species 
+_entity_src_gen.pdbx_host_org_tissue 
+_entity_src_gen.pdbx_host_org_tissue_fraction 
+_entity_src_gen.pdbx_host_org_strain 
+_entity_src_gen.pdbx_host_org_variant 
+_entity_src_gen.pdbx_host_org_cell_line 
+_entity_src_gen.pdbx_host_org_atcc 
+_entity_src_gen.pdbx_host_org_culture_collection 
+_entity_src_gen.pdbx_host_org_cell 
+_entity_src_gen.pdbx_host_org_organelle 
+_entity_src_gen.pdbx_host_org_cellular_location 
+_entity_src_gen.pdbx_host_org_vector_type 
+_entity_src_gen.pdbx_host_org_vector 
+_entity_src_gen.host_org_details 
+_entity_src_gen.expression_system_id 
+_entity_src_gen.plasmid_name 
+_entity_src_gen.plasmid_details 
+_entity_src_gen.pdbx_description 
+1 1 sample ? ? ? pig Sus ? ? ? ? ? ? ? 'Sus scrofa' 9823 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 
+2 1 sample ? ? ? pig Sus ? ? ? ? ? ? ? 'Sus scrofa' 9823 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 
+# 
+loop_
+_struct_ref.id 
+_struct_ref.db_name 
+_struct_ref.db_code 
+_struct_ref.pdbx_db_accession 
+_struct_ref.entity_id 
+_struct_ref.pdbx_align_begin 
+_struct_ref.pdbx_seq_one_letter_code 
+_struct_ref.pdbx_db_isoform 
+1 UNP INS_PIG P01315 1 88 ? ? 
+2 UNP INS_PIG P01315 2 25 ? ? 
+# 
+loop_
+_struct_ref_seq.align_id 
+_struct_ref_seq.ref_id 
+_struct_ref_seq.pdbx_PDB_id_code 
+_struct_ref_seq.pdbx_strand_id 
+_struct_ref_seq.seq_align_beg 
+_struct_ref_seq.pdbx_seq_align_beg_ins_code 
+_struct_ref_seq.seq_align_end 
+_struct_ref_seq.pdbx_seq_align_end_ins_code 
+_struct_ref_seq.pdbx_db_accession 
+_struct_ref_seq.db_align_beg 
+_struct_ref_seq.pdbx_db_align_beg_ins_code 
+_struct_ref_seq.db_align_end 
+_struct_ref_seq.pdbx_db_align_end_ins_code 
+_struct_ref_seq.pdbx_auth_seq_align_beg 
+_struct_ref_seq.pdbx_auth_seq_align_end 
+1 1 4INS A 1 ? 21 ? P01315 88 ? 108 ? 1 21 
+2 2 4INS B 1 ? 30 ? P01315 25 ? 54  ? 1 30 
+3 1 4INS C 1 ? 21 ? P01315 88 ? 108 ? 1 21 
+4 2 4INS D 1 ? 30 ? P01315 25 ? 54  ? 1 30 
+# 
+loop_
+_chem_comp.id 
+_chem_comp.type 
+_chem_comp.mon_nstd_flag 
+_chem_comp.name 
+_chem_comp.pdbx_synonyms 
+_chem_comp.formula 
+_chem_comp.formula_weight 
+ALA 'L-peptide linking' y ALANINE         ? 'C3 H7 N O2'     89.093  
+ARG 'L-peptide linking' y ARGININE        ? 'C6 H15 N4 O2 1' 175.209 
+ASN 'L-peptide linking' y ASPARAGINE      ? 'C4 H8 N2 O3'    132.118 
+CYS 'L-peptide linking' y CYSTEINE        ? 'C3 H7 N O2 S'   121.158 
+GLN 'L-peptide linking' y GLUTAMINE       ? 'C5 H10 N2 O3'   146.144 
+GLU 'L-peptide linking' y 'GLUTAMIC ACID' ? 'C5 H9 N O4'     147.129 
+GLY 'peptide linking'   y GLYCINE         ? 'C2 H5 N O2'     75.067  
+HIS 'L-peptide linking' y HISTIDINE       ? 'C6 H10 N3 O2 1' 156.162 
+HOH non-polymer         . WATER           ? 'H2 O'           18.015  
+ILE 'L-peptide linking' y ISOLEUCINE      ? 'C6 H13 N O2'    131.173 
+LEU 'L-peptide linking' y LEUCINE         ? 'C6 H13 N O2'    131.173 
+LYS 'L-peptide linking' y LYSINE          ? 'C6 H15 N2 O2 1' 147.195 
+PHE 'L-peptide linking' y PHENYLALANINE   ? 'C9 H11 N O2'    165.189 
+PRO 'L-peptide linking' y PROLINE         ? 'C5 H9 N O2'     115.130 
+SER 'L-peptide linking' y SERINE          ? 'C3 H7 N O3'     105.093 
+THR 'L-peptide linking' y THREONINE       ? 'C4 H9 N O3'     119.119 
+TYR 'L-peptide linking' y TYROSINE        ? 'C9 H11 N O3'    181.189 
+VAL 'L-peptide linking' y VALINE          ? 'C5 H11 N O2'    117.146 
+ZN  non-polymer         . 'ZINC ION'      ? 'Zn 2'           65.409  
+# 
+_exptl.entry_id          4INS 
+_exptl.method            'X-RAY DIFFRACTION' 
+_exptl.crystals_number   ? 
+# 
+_exptl_crystal.id                    1 
+_exptl_crystal.density_meas          ? 
+_exptl_crystal.density_Matthews      1.92 
+_exptl_crystal.density_percent_sol   36.05 
+_exptl_crystal.description           ? 
+_exptl_crystal.F_000                 ? 
+_exptl_crystal.preparation           ? 
+# 
+_diffrn.id                     1 
+_diffrn.ambient_temp           ? 
+_diffrn.ambient_temp_details   ? 
+_diffrn.crystal_id             1 
+# 
+_diffrn_radiation.diffrn_id                        1 
+_diffrn_radiation.wavelength_id                    1 
+_diffrn_radiation.monochromator                    ? 
+_diffrn_radiation.pdbx_monochromatic_or_laue_m_l   ? 
+_diffrn_radiation.pdbx_diffrn_protocol             ? 
+_diffrn_radiation.pdbx_scattering_type             x-ray 
+# 
+_diffrn_radiation_wavelength.id           1 
+_diffrn_radiation_wavelength.wavelength   . 
+_diffrn_radiation_wavelength.wt           1.0 
+# 
+_refine.entry_id                                 4INS 
+_refine.ls_number_reflns_obs                     ? 
+_refine.ls_number_reflns_all                     ? 
+_refine.pdbx_ls_sigma_I                          ? 
+_refine.pdbx_ls_sigma_F                          ? 
+_refine.pdbx_data_cutoff_high_absF               ? 
+_refine.pdbx_data_cutoff_low_absF                ? 
+_refine.pdbx_data_cutoff_high_rms_absF           ? 
+_refine.ls_d_res_low                             ? 
+_refine.ls_d_res_high                            1.5 
+_refine.ls_percent_reflns_obs                    ? 
+_refine.ls_R_factor_obs                          0.153 
+_refine.ls_R_factor_all                          ? 
+_refine.ls_R_factor_R_work                       ? 
+_refine.ls_R_factor_R_free                       ? 
+_refine.ls_R_factor_R_free_error                 ? 
+_refine.ls_R_factor_R_free_error_details         ? 
+_refine.ls_percent_reflns_R_free                 ? 
+_refine.ls_number_reflns_R_free                  ? 
+_refine.ls_number_parameters                     ? 
+_refine.ls_number_restraints                     ? 
+_refine.occupancy_min                            ? 
+_refine.occupancy_max                            ? 
+_refine.B_iso_mean                               ? 
+_refine.aniso_B[1][1]                            ? 
+_refine.aniso_B[2][2]                            ? 
+_refine.aniso_B[3][3]                            ? 
+_refine.aniso_B[1][2]                            ? 
+_refine.aniso_B[1][3]                            ? 
+_refine.aniso_B[2][3]                            ? 
+_refine.solvent_model_details                    ? 
+_refine.solvent_model_param_ksol                 ? 
+_refine.solvent_model_param_bsol                 ? 
+_refine.pdbx_ls_cross_valid_method               ? 
+_refine.details                                  
+;SOME RESIDUES ARE APPARENTLY DISORDERED BUT DIFFICULT TO
+DESCRIBE IN TERMS OF ATOMIC POSITIONS.  ALA B 30 IS ONE OF
+THESE RESIDUES.
+
+THE FOLLOWING RESIDUES ARE DISORDERED - GLN B 4, VAL B 12, 
+GLU B 21, ARG B 22, ARG D 22, LYS D 29.
+;
+_refine.pdbx_starting_model                      ? 
+_refine.pdbx_method_to_determine_struct          ? 
+_refine.pdbx_isotropic_thermal_model             ? 
+_refine.pdbx_stereochemistry_target_values       ? 
+_refine.pdbx_stereochem_target_val_spec_case     ? 
+_refine.pdbx_R_Free_selection_details            ? 
+_refine.pdbx_overall_ESU_R                       ? 
+_refine.pdbx_overall_ESU_R_Free                  ? 
+_refine.overall_SU_ML                            ? 
+_refine.overall_SU_B                             ? 
+_refine.pdbx_refine_id                           'X-RAY DIFFRACTION' 
+_refine.ls_redundancy_reflns_obs                 ? 
+_refine.pdbx_overall_phase_error                 ? 
+_refine.B_iso_min                                ? 
+_refine.B_iso_max                                ? 
+_refine.correlation_coeff_Fo_to_Fc               ? 
+_refine.correlation_coeff_Fo_to_Fc_free          ? 
+_refine.pdbx_solvent_vdw_probe_radii             ? 
+_refine.pdbx_solvent_ion_probe_radii             ? 
+_refine.pdbx_solvent_shrinkage_radii             ? 
+_refine.overall_SU_R_Cruickshank_DPI             ? 
+_refine.overall_SU_R_free                        ? 
+_refine.ls_wR_factor_R_free                      ? 
+_refine.ls_wR_factor_R_work                      ? 
+_refine.overall_FOM_free_R_set                   ? 
+_refine.overall_FOM_work_R_set                   ? 
+_refine.pdbx_diffrn_id                           1 
+_refine.pdbx_TLS_residual_ADP_flag               ? 
+_refine.pdbx_overall_SU_R_free_Cruickshank_DPI   ? 
+_refine.pdbx_overall_SU_R_Blow_DPI               ? 
+_refine.pdbx_overall_SU_R_free_Blow_DPI          ? 
+# 
+_refine_hist.pdbx_refine_id                   'X-RAY DIFFRACTION' 
+_refine_hist.cycle_id                         LAST 
+_refine_hist.pdbx_number_atoms_protein        806 
+_refine_hist.pdbx_number_atoms_nucleic_acid   0 
+_refine_hist.pdbx_number_atoms_ligand         2 
+_refine_hist.number_atoms_solvent             350 
+_refine_hist.number_atoms_total               1158 
+_refine_hist.d_res_high                       1.5 
+_refine_hist.d_res_low                        . 
+# 
+loop_
+_refine_ls_restr.type 
+_refine_ls_restr.dev_ideal 
+_refine_ls_restr.dev_ideal_target 
+_refine_ls_restr.weight 
+_refine_ls_restr.number 
+_refine_ls_restr.pdbx_refine_id 
+_refine_ls_restr.pdbx_restraint_function 
+p_bond_d    0.005 ? ? ? 'X-RAY DIFFRACTION' ? 
+p_angle_deg 5.9   ? ? ? 'X-RAY DIFFRACTION' ? 
+# 
+_struct_ncs_oper.id             1 
+_struct_ncs_oper.code           given 
+_struct_ncs_oper.details        ? 
+_struct_ncs_oper.matrix[1][1]   -0.878620 
+_struct_ncs_oper.matrix[1][2]   -0.476960 
+_struct_ncs_oper.matrix[1][3]   0.023050 
+_struct_ncs_oper.matrix[2][1]   -0.477430 
+_struct_ncs_oper.matrix[2][2]   0.878370 
+_struct_ncs_oper.matrix[2][3]   -0.022860 
+_struct_ncs_oper.matrix[3][1]   -0.009350 
+_struct_ncs_oper.matrix[3][2]   -0.031090 
+_struct_ncs_oper.matrix[3][3]   -0.999470 
+_struct_ncs_oper.vector[1]      0.00000 
+_struct_ncs_oper.vector[2]      0.00000 
+_struct_ncs_oper.vector[3]      0.00000 
+# 
+_struct.entry_id                  4INS 
+_struct.title                     'THE STRUCTURE OF 2ZN PIG INSULIN CRYSTALS AT 1.5 ANGSTROMS RESOLUTION' 
+_struct.pdbx_descriptor           INSULIN 
+_struct.pdbx_model_details        ? 
+_struct.pdbx_CASP_flag            ? 
+_struct.pdbx_model_type_details   ? 
+# 
+_struct_keywords.entry_id        4INS 
+_struct_keywords.pdbx_keywords   HORMONE 
+_struct_keywords.text            HORMONE 
+# 
+loop_
+_struct_asym.id 
+_struct_asym.pdbx_blank_PDB_chainid_flag 
+_struct_asym.pdbx_modified 
+_struct_asym.entity_id 
+_struct_asym.details 
+A N N 1 ? 
+B N N 2 ? 
+C N N 1 ? 
+D N N 2 ? 
+E N N 3 ? 
+F N N 3 ? 
+G N N 4 ? 
+H N N 4 ? 
+I N N 4 ? 
+J N N 4 ? 
+# 
+loop_
+_struct_biol.id 
+_struct_biol.details 
+_struct_biol.pdbx_parent_biol_id 
+1 
+;THE CRYSTALLOGRAPHIC ASYMMETRIC UNIT OF INSULIN CONSISTS OF
+TWO INSULIN MOLECULES EACH CONSISTING OF TWO CHAINS.  THIS
+ENTRY PRESENTS COORDINATES FOR MOLECULES I (CHAIN
+INDICATORS *A* AND *B*) AND II (CHAIN INDICATORS *C* AND
+*D*).  THE QUASI-TWO-FOLD AXIS THAT TRANSFORMS MOLECULE I
+INTO MOLECULE II IS GIVEN IN THE *MTRIX* RECORDS BELOW.
+APPLYING THE THREE-FOLD CRYSTALLOGRAPHIC AXIS YIELDS A
+HEXAMER AROUND THE AXIS.  THERE ARE TWO ZINC IONS SITUATED
+ON THIS THREE-FOLD AXIS.  COORDINATES FOR THE ZINC IONS AND
+SOME WATER MOLECULES ARE INCLUDED BELOW WITH A BLANK CHAIN
+INDICATOR.
+;
+? 
+2 ? ? 
+# 
+loop_
+_struct_conf.conf_type_id 
+_struct_conf.id 
+_struct_conf.pdbx_PDB_helix_id 
+_struct_conf.beg_label_comp_id 
+_struct_conf.beg_label_asym_id 
+_struct_conf.beg_label_seq_id 
+_struct_conf.pdbx_beg_PDB_ins_code 
+_struct_conf.end_label_comp_id 
+_struct_conf.end_label_asym_id 
+_struct_conf.end_label_seq_id 
+_struct_conf.pdbx_end_PDB_ins_code 
+_struct_conf.beg_auth_comp_id 
+_struct_conf.beg_auth_asym_id 
+_struct_conf.beg_auth_seq_id 
+_struct_conf.end_auth_comp_id 
+_struct_conf.end_auth_asym_id 
+_struct_conf.end_auth_seq_id 
+_struct_conf.pdbx_PDB_helix_class 
+_struct_conf.details 
+_struct_conf.pdbx_PDB_helix_length 
+HELX_P HELX_P1 A11 GLY A 1  ? ILE A 10 ? GLY A 1  ILE A 10 1 'VAL 203 O H-BONDED TO HOH'     10 
+HELX_P HELX_P2 A12 SER A 12 ? GLU A 17 ? SER A 12 GLU A 17 5 'CNTCTS MOSTLY GT 3A,NOT IDEAL' 6  
+HELX_P HELX_P3 B11 SER B 9  ? GLY B 20 ? SER B 9  GLY B 20 1 'CYS 67 GLY 68, 3(10) CONTACTS' 12 
+HELX_P HELX_P4 A21 GLY C 1  ? ILE C 10 ? GLY C 1  ILE C 10 1 'NOT IDEAL ALPH,SOME PI CNTCTS' 10 
+HELX_P HELX_P5 A22 SER C 12 ? GLU C 17 ? SER C 12 GLU C 17 5 'CNTCTS MOSTLY GT 3A,NOT IDEAL' 6  
+HELX_P HELX_P6 B21 SER D 9  ? GLY D 20 ? SER D 9  GLY D 20 1 'CYS 67,GLY 68, 3(10) CONTACTS' 12 
+# 
+_struct_conf_type.id          HELX_P 
+_struct_conf_type.criteria    ? 
+_struct_conf_type.reference   ? 
+# 
+loop_
+_struct_conn.id 
+_struct_conn.conn_type_id 
+_struct_conn.pdbx_leaving_atom_flag 
+_struct_conn.pdbx_PDB_id 
+_struct_conn.ptnr1_label_asym_id 
+_struct_conn.ptnr1_label_comp_id 
+_struct_conn.ptnr1_label_seq_id 
+_struct_conn.ptnr1_label_atom_id 
+_struct_conn.pdbx_ptnr1_label_alt_id 
+_struct_conn.pdbx_ptnr1_PDB_ins_code 
+_struct_conn.pdbx_ptnr1_standard_comp_id 
+_struct_conn.ptnr1_symmetry 
+_struct_conn.ptnr2_label_asym_id 
+_struct_conn.ptnr2_label_comp_id 
+_struct_conn.ptnr2_label_seq_id 
+_struct_conn.ptnr2_label_atom_id 
+_struct_conn.pdbx_ptnr2_label_alt_id 
+_struct_conn.pdbx_ptnr2_PDB_ins_code 
+_struct_conn.ptnr1_auth_asym_id 
+_struct_conn.ptnr1_auth_comp_id 
+_struct_conn.ptnr1_auth_seq_id 
+_struct_conn.ptnr2_auth_asym_id 
+_struct_conn.ptnr2_auth_comp_id 
+_struct_conn.ptnr2_auth_seq_id 
+_struct_conn.ptnr2_symmetry 
+_struct_conn.pdbx_ptnr3_label_atom_id 
+_struct_conn.pdbx_ptnr3_label_seq_id 
+_struct_conn.pdbx_ptnr3_label_comp_id 
+_struct_conn.pdbx_ptnr3_label_asym_id 
+_struct_conn.pdbx_ptnr3_label_alt_id 
+_struct_conn.pdbx_ptnr3_PDB_ins_code 
+_struct_conn.details 
+_struct_conn.pdbx_dist_value 
+_struct_conn.pdbx_value_order 
+disulf1 disulf ? ? A CYS 6  SG ? ? ? 1_555 A CYS 11 SG  ? ? A CYS 6   A CYS 11  1_555 ? ? ? ? ? ? ? 2.053 ? 
+disulf2 disulf ? ? A CYS 7  SG ? ? ? 1_555 B CYS 7  SG  ? ? A CYS 7   B CYS 7   1_555 ? ? ? ? ? ? ? 1.966 ? 
+disulf3 disulf ? ? A CYS 20 SG ? ? ? 1_555 B CYS 19 SG  ? ? A CYS 20  B CYS 19  1_555 ? ? ? ? ? ? ? 2.001 ? 
+disulf4 disulf ? ? C CYS 6  SG ? ? ? 1_555 C CYS 11 SG  ? ? C CYS 6   C CYS 11  1_555 ? ? ? ? ? ? ? 2.060 ? 
+disulf5 disulf ? ? C CYS 7  SG ? ? ? 1_555 D CYS 7  SG  ? ? C CYS 7   D CYS 7   1_555 ? ? ? ? ? ? ? 2.005 ? 
+disulf6 disulf ? ? C CYS 20 SG ? ? ? 1_555 D CYS 19 SG  ? ? C CYS 20  D CYS 19  1_555 ? ? ? ? ? ? ? 2.016 ? 
+metalc1 metalc ? ? E ZN  .  ZN ? ? ? 1_555 B HIS 10 NE2 ? ? B ZN  101 B HIS 10  1_555 ? ? ? ? ? ? ? 2.106 ? 
+metalc2 metalc ? ? F ZN  .  ZN ? ? ? 1_555 D HIS 10 NE2 ? ? D ZN  101 D HIS 10  1_555 ? ? ? ? ? ? ? 2.079 ? 
+metalc3 metalc ? ? E ZN  .  ZN ? ? ? 1_555 B HIS 10 NE2 ? ? B ZN  101 B HIS 10  2_555 ? ? ? ? ? ? ? 2.102 ? 
+metalc4 metalc ? ? E ZN  .  ZN ? ? ? 1_555 B HIS 10 NE2 ? ? B ZN  101 B HIS 10  3_555 ? ? ? ? ? ? ? 2.109 ? 
+metalc5 metalc ? ? F ZN  .  ZN ? ? ? 1_555 D HIS 10 NE2 ? ? D ZN  101 D HIS 10  3_555 ? ? ? ? ? ? ? 2.079 ? 
+metalc6 metalc ? ? F ZN  .  ZN ? ? ? 1_555 D HIS 10 NE2 ? ? D ZN  101 D HIS 10  2_555 ? ? ? ? ? ? ? 2.079 ? 
+metalc7 metalc ? ? E ZN  .  ZN ? ? ? 1_555 H HOH .  O   ? ? B ZN  101 B HOH 213 1_555 ? ? ? ? ? ? ? 2.193 ? 
+# 
+loop_
+_struct_conn_type.id 
+_struct_conn_type.criteria 
+_struct_conn_type.reference 
+disulf ? ? 
+metalc ? ? 
+# 
+_struct_sheet.id               B 
+_struct_sheet.type             ? 
+_struct_sheet.number_strands   2 
+_struct_sheet.details          ? 
+# 
+_struct_sheet_order.sheet_id     B 
+_struct_sheet_order.range_id_1   1 
+_struct_sheet_order.range_id_2   2 
+_struct_sheet_order.offset       ? 
+_struct_sheet_order.sense        anti-parallel 
+# 
+loop_
+_struct_sheet_range.sheet_id 
+_struct_sheet_range.id 
+_struct_sheet_range.beg_label_comp_id 
+_struct_sheet_range.beg_label_asym_id 
+_struct_sheet_range.beg_label_seq_id 
+_struct_sheet_range.pdbx_beg_PDB_ins_code 
+_struct_sheet_range.end_label_comp_id 
+_struct_sheet_range.end_label_asym_id 
+_struct_sheet_range.end_label_seq_id 
+_struct_sheet_range.pdbx_end_PDB_ins_code 
+_struct_sheet_range.beg_auth_comp_id 
+_struct_sheet_range.beg_auth_asym_id 
+_struct_sheet_range.beg_auth_seq_id 
+_struct_sheet_range.end_auth_comp_id 
+_struct_sheet_range.end_auth_asym_id 
+_struct_sheet_range.end_auth_seq_id 
+B 1 PHE B 24 ? TYR B 26 ? PHE B 24 TYR B 26 
+B 2 PHE D 24 ? TYR D 26 ? PHE D 24 TYR D 26 
+# 
+_pdbx_struct_sheet_hbond.sheet_id                B 
+_pdbx_struct_sheet_hbond.range_id_1              1 
+_pdbx_struct_sheet_hbond.range_id_2              2 
+_pdbx_struct_sheet_hbond.range_1_label_atom_id   O 
+_pdbx_struct_sheet_hbond.range_1_label_comp_id   TYR 
+_pdbx_struct_sheet_hbond.range_1_label_asym_id   D 
+_pdbx_struct_sheet_hbond.range_1_label_seq_id    26 
+_pdbx_struct_sheet_hbond.range_1_PDB_ins_code    ? 
+_pdbx_struct_sheet_hbond.range_1_auth_atom_id    O 
+_pdbx_struct_sheet_hbond.range_1_auth_comp_id    TYR 
+_pdbx_struct_sheet_hbond.range_1_auth_asym_id    D 
+_pdbx_struct_sheet_hbond.range_1_auth_seq_id     26 
+_pdbx_struct_sheet_hbond.range_2_label_atom_id   N 
+_pdbx_struct_sheet_hbond.range_2_label_comp_id   PHE 
+_pdbx_struct_sheet_hbond.range_2_label_asym_id   B 
+_pdbx_struct_sheet_hbond.range_2_label_seq_id    24 
+_pdbx_struct_sheet_hbond.range_2_PDB_ins_code    ? 
+_pdbx_struct_sheet_hbond.range_2_auth_atom_id    N 
+_pdbx_struct_sheet_hbond.range_2_auth_comp_id    PHE 
+_pdbx_struct_sheet_hbond.range_2_auth_asym_id    B 
+_pdbx_struct_sheet_hbond.range_2_auth_seq_id     24 
+# 
+loop_
+_struct_site.id 
+_struct_site.pdbx_evidence_code 
+_struct_site.pdbx_auth_asym_id 
+_struct_site.pdbx_auth_comp_id 
+_struct_site.pdbx_auth_seq_id 
+_struct_site.pdbx_auth_ins_code 
+_struct_site.pdbx_num_residues 
+_struct_site.details 
+D1  Author   ? ? ? ? 5 'DIMER-FORMING RESIDUES IN MOLECULE I'                                   
+D2  Author   ? ? ? ? 5 'DIMER-FORMING RESIDUES IN MOLECULE II'                                  
+H1  Author   ? ? ? ? 7 'HEXAMER-FORMING RESIDUES IN MOLECULE I'                                 
+H2  Author   ? ? ? ? 7 'HEXAMER-FORMING RESIDUES IN MOLECULE II'                                
+SI1 Author   ? ? ? ? 7 'SURFACE-INVARIANT RESIDUES IN MOLECULE I NOT INVOLVED IN DIMERIZATION'  
+SI2 Author   ? ? ? ? 7 'SURFACE-INVARIANT RESIDUES IN MOLECULE II NOT INVOLVED IN DIMERIZATION' 
+AC1 Software ? ? ? ? 3 'BINDING SITE FOR RESIDUE ZN B 31'                                       
+AC2 Software ? ? ? ? 3 'BINDING SITE FOR RESIDUE ZN D 31'                                       
+# 
+_database_PDB_matrix.entry_id          4INS 
+_database_PDB_matrix.origx[1][1]       1.000000 
+_database_PDB_matrix.origx[1][2]       0.000000 
+_database_PDB_matrix.origx[1][3]       0.000000 
+_database_PDB_matrix.origx[2][1]       0.000000 
+_database_PDB_matrix.origx[2][2]       1.000000 
+_database_PDB_matrix.origx[2][3]       0.000000 
+_database_PDB_matrix.origx[3][1]       0.000000 
+_database_PDB_matrix.origx[3][2]       0.000000 
+_database_PDB_matrix.origx[3][3]       1.000000 
+_database_PDB_matrix.origx_vector[1]   0.00000 
+_database_PDB_matrix.origx_vector[2]   0.00000 
+_database_PDB_matrix.origx_vector[3]   0.00000 
+# 
+_atom_sites.entry_id                    4INS 
+_atom_sites.fract_transf_matrix[1][1]   0.012121 
+_atom_sites.fract_transf_matrix[1][2]   0.006998 
+_atom_sites.fract_transf_matrix[1][3]   0.000000 
+_atom_sites.fract_transf_matrix[2][1]   0.000000 
+_atom_sites.fract_transf_matrix[2][2]   0.013996 
+_atom_sites.fract_transf_matrix[2][3]   0.000000 
+_atom_sites.fract_transf_matrix[3][1]   0.000000 
+_atom_sites.fract_transf_matrix[3][2]   0.000000 
+_atom_sites.fract_transf_matrix[3][3]   0.029412 
+_atom_sites.fract_transf_vector[1]      0.00000 
+_atom_sites.fract_transf_vector[2]      0.00000 
+_atom_sites.fract_transf_vector[3]      0.00000 
+# 
+loop_
+_atom_sites_footnote.id 
+_atom_sites_footnote.text 
+1 
+;THE QUASI-TWO-FOLD SYMMETRY BREAKS DOWN MOST SERIOUSLY AT RESIDUES GLY A  1 TO GLN A  5   AND   GLY C  1 TO GLN C  5 HIS B  5               AND   HIS D  5 PHE B 25               AND   PHE D 25
+;
+2 'THE FOLLOWING RESIDUES ARE DISORDERED - GLN B 4, VAL B 12, GLU B 21, ARG B 22, ARG D 22, LYS D 29.' 
+3 'SEE REMARK 8.' 
+# 
+loop_
+_atom_type.symbol 
+C  
+N  
+O  
+S  
+ZN 
+# 
+loop_
+_pdbx_poly_seq_scheme.asym_id 
+_pdbx_poly_seq_scheme.entity_id 
+_pdbx_poly_seq_scheme.seq_id 
+_pdbx_poly_seq_scheme.mon_id 
+_pdbx_poly_seq_scheme.ndb_seq_num 
+_pdbx_poly_seq_scheme.pdb_seq_num 
+_pdbx_poly_seq_scheme.auth_seq_num 
+_pdbx_poly_seq_scheme.pdb_mon_id 
+_pdbx_poly_seq_scheme.auth_mon_id 
+_pdbx_poly_seq_scheme.pdb_strand_id 
+_pdbx_poly_seq_scheme.pdb_ins_code 
+_pdbx_poly_seq_scheme.hetero 
+A 1 1  GLY 1  1  1  GLY GLY A . n 
+A 1 2  ILE 2  2  2  ILE ILE A . n 
+A 1 3  VAL 3  3  3  VAL VAL A . n 
+A 1 4  GLU 4  4  4  GLU GLU A . n 
+A 1 5  GLN 5  5  5  GLN GLN A . n 
+A 1 6  CYS 6  6  6  CYS CYS A . n 
+A 1 7  CYS 7  7  7  CYS CYS A . n 
+A 1 8  THR 8  8  8  THR THR A . n 
+A 1 9  SER 9  9  9  SER SER A . n 
+A 1 10 ILE 10 10 10 ILE ILE A . n 
+A 1 11 CYS 11 11 11 CYS CYS A . n 
+A 1 12 SER 12 12 12 SER SER A . n 
+A 1 13 LEU 13 13 13 LEU LEU A . n 
+A 1 14 TYR 14 14 14 TYR TYR A . n 
+A 1 15 GLN 15 15 15 GLN GLN A . n 
+A 1 16 LEU 16 16 16 LEU LEU A . n 
+A 1 17 GLU 17 17 17 GLU GLU A . n 
+A 1 18 ASN 18 18 18 ASN ASN A . n 
+A 1 19 TYR 19 19 19 TYR TYR A . n 
+A 1 20 CYS 20 20 20 CYS CYS A . n 
+A 1 21 ASN 21 21 21 ASN ASN A . n 
+B 2 1  PHE 1  1  1  PHE PHE B . n 
+B 2 2  VAL 2  2  2  VAL VAL B . n 
+B 2 3  ASN 3  3  3  ASN ASN B . n 
+B 2 4  GLN 4  4  4  GLN GLN B . n 
+B 2 5  HIS 5  5  5  HIS HIS B . n 
+B 2 6  LEU 6  6  6  LEU LEU B . n 
+B 2 7  CYS 7  7  7  CYS CYS B . n 
+B 2 8  GLY 8  8  8  GLY GLY B . n 
+B 2 9  SER 9  9  9  SER SER B . n 
+B 2 10 HIS 10 10 10 HIS HIS B . n 
+B 2 11 LEU 11 11 11 LEU LEU B . n 
+B 2 12 VAL 12 12 12 VAL VAL B . n 
+B 2 13 GLU 13 13 13 GLU GLU B . n 
+B 2 14 ALA 14 14 14 ALA ALA B . n 
+B 2 15 LEU 15 15 15 LEU LEU B . n 
+B 2 16 TYR 16 16 16 TYR TYR B . n 
+B 2 17 LEU 17 17 17 LEU LEU B . n 
+B 2 18 VAL 18 18 18 VAL VAL B . n 
+B 2 19 CYS 19 19 19 CYS CYS B . n 
+B 2 20 GLY 20 20 20 GLY GLY B . n 
+B 2 21 GLU 21 21 21 GLU GLU B . n 
+B 2 22 ARG 22 22 22 ARG ARG B . n 
+B 2 23 GLY 23 23 23 GLY GLY B . n 
+B 2 24 PHE 24 24 24 PHE PHE B . n 
+B 2 25 PHE 25 25 25 PHE PHE B . n 
+B 2 26 TYR 26 26 26 TYR TYR B . n 
+B 2 27 THR 27 27 27 THR THR B . n 
+B 2 28 PRO 28 28 28 PRO PRO B . n 
+B 2 29 LYS 29 29 29 LYS LYS B . n 
+B 2 30 ALA 30 30 30 ALA ALA B . n 
+C 1 1  GLY 1  1  1  GLY GLY C . n 
+C 1 2  ILE 2  2  2  ILE ILE C . n 
+C 1 3  VAL 3  3  3  VAL VAL C . n 
+C 1 4  GLU 4  4  4  GLU GLU C . n 
+C 1 5  GLN 5  5  5  GLN GLN C . n 
+C 1 6  CYS 6  6  6  CYS CYS C . n 
+C 1 7  CYS 7  7  7  CYS CYS C . n 
+C 1 8  THR 8  8  8  THR THR C . n 
+C 1 9  SER 9  9  9  SER SER C . n 
+C 1 10 ILE 10 10 10 ILE ILE C . n 
+C 1 11 CYS 11 11 11 CYS CYS C . n 
+C 1 12 SER 12 12 12 SER SER C . n 
+C 1 13 LEU 13 13 13 LEU LEU C . n 
+C 1 14 TYR 14 14 14 TYR TYR C . n 
+C 1 15 GLN 15 15 15 GLN GLN C . n 
+C 1 16 LEU 16 16 16 LEU LEU C . n 
+C 1 17 GLU 17 17 17 GLU GLU C . n 
+C 1 18 ASN 18 18 18 ASN ASN C . n 
+C 1 19 TYR 19 19 19 TYR TYR C . n 
+C 1 20 CYS 20 20 20 CYS CYS C . n 
+C 1 21 ASN 21 21 21 ASN ASN C . n 
+D 2 1  PHE 1  1  1  PHE PHE D . n 
+D 2 2  VAL 2  2  2  VAL VAL D . n 
+D 2 3  ASN 3  3  3  ASN ASN D . n 
+D 2 4  GLN 4  4  4  GLN GLN D . n 
+D 2 5  HIS 5  5  5  HIS HIS D . n 
+D 2 6  LEU 6  6  6  LEU LEU D . n 
+D 2 7  CYS 7  7  7  CYS CYS D . n 
+D 2 8  GLY 8  8  8  GLY GLY D . n 
+D 2 9  SER 9  9  9  SER SER D . n 
+D 2 10 HIS 10 10 10 HIS HIS D . n 
+D 2 11 LEU 11 11 11 LEU LEU D . n 
+D 2 12 VAL 12 12 12 VAL VAL D . n 
+D 2 13 GLU 13 13 13 GLU GLU D . n 
+D 2 14 ALA 14 14 14 ALA ALA D . n 
+D 2 15 LEU 15 15 15 LEU LEU D . n 
+D 2 16 TYR 16 16 16 TYR TYR D . n 
+D 2 17 LEU 17 17 17 LEU LEU D . n 
+D 2 18 VAL 18 18 18 VAL VAL D . n 
+D 2 19 CYS 19 19 19 CYS CYS D . n 
+D 2 20 GLY 20 20 20 GLY GLY D . n 
+D 2 21 GLU 21 21 21 GLU GLU D . n 
+D 2 22 ARG 22 22 22 ARG ARG D . n 
+D 2 23 GLY 23 23 23 GLY GLY D . n 
+D 2 24 PHE 24 24 24 PHE PHE D . n 
+D 2 25 PHE 25 25 25 PHE PHE D . n 
+D 2 26 TYR 26 26 26 TYR TYR D . n 
+D 2 27 THR 27 27 27 THR THR D . n 
+D 2 28 PRO 28 28 28 PRO PRO D . n 
+D 2 29 LYS 29 29 29 LYS LYS D . n 
+D 2 30 ALA 30 30 30 ALA ALA D . n 
+# 
+loop_
+_pdbx_struct_assembly.id 
+_pdbx_struct_assembly.details 
+_pdbx_struct_assembly.method_details 
+_pdbx_struct_assembly.oligomeric_details 
+_pdbx_struct_assembly.oligomeric_count 
+1 author_and_software_defined_assembly PISA dimeric     2  
+2 author_and_software_defined_assembly PISA dimeric     2  
+3 software_defined_assembly            PISA dodecameric 12 
+4 software_defined_assembly            PISA hexameric   6  
+5 software_defined_assembly            PISA hexameric   6  
+6 software_defined_assembly            PISA tetrameric  4  
+7 software_defined_assembly            PISA tetrameric  4  
+# 
+loop_
+_pdbx_struct_assembly_gen.assembly_id 
+_pdbx_struct_assembly_gen.oper_expression 
+_pdbx_struct_assembly_gen.asym_id_list 
+1 1     A,B,E,G,H           
+2 1     C,D,F,I,J           
+3 1,2,3 A,B,C,D,E,F,G,H,I,J 
+4 1,2,3 C,D,F,I,J           
+5 1,2,3 A,B,E,G,H           
+6 1     A,B,E,G,H           
+6 2     C,D,F,I,J           
+7 1     A,B,C,D,E,F,G,H,I,J 
+# 
+loop_
+_pdbx_struct_assembly_prop.biol_id 
+_pdbx_struct_assembly_prop.type 
+_pdbx_struct_assembly_prop.value 
+_pdbx_struct_assembly_prop.details 
+1 'ABSA (A^2)' 1680  ? 
+1 MORE         -15   ? 
+1 'SSA (A^2)'  3790  ? 
+2 'ABSA (A^2)' 1740  ? 
+2 MORE         -15   ? 
+2 'SSA (A^2)'  3620  ? 
+3 'ABSA (A^2)' 20600 ? 
+3 MORE         -260  ? 
+3 'SSA (A^2)'  12080 ? 
+4 'ABSA (A^2)' 5730  ? 
+4 MORE         -95   ? 
+4 'SSA (A^2)'  10440 ? 
+5 'ABSA (A^2)' 5580  ? 
+5 MORE         -95   ? 
+5 'SSA (A^2)'  10930 ? 
+6 'ABSA (A^2)' 5120  ? 
+6 MORE         -45   ? 
+6 'SSA (A^2)'  5710  ? 
+7 'ABSA (A^2)' 4820  ? 
+7 MORE         -40   ? 
+7 'SSA (A^2)'  6010  ? 
+# 
+loop_
+_pdbx_struct_oper_list.id 
+_pdbx_struct_oper_list.type 
+_pdbx_struct_oper_list.name 
+_pdbx_struct_oper_list.symmetry_operation 
+_pdbx_struct_oper_list.matrix[1][1] 
+_pdbx_struct_oper_list.matrix[1][2] 
+_pdbx_struct_oper_list.matrix[1][3] 
+_pdbx_struct_oper_list.vector[1] 
+_pdbx_struct_oper_list.matrix[2][1] 
+_pdbx_struct_oper_list.matrix[2][2] 
+_pdbx_struct_oper_list.matrix[2][3] 
+_pdbx_struct_oper_list.vector[2] 
+_pdbx_struct_oper_list.matrix[3][1] 
+_pdbx_struct_oper_list.matrix[3][2] 
+_pdbx_struct_oper_list.matrix[3][3] 
+_pdbx_struct_oper_list.vector[3] 
+1 'identity operation'         1_555 x,y,z     1.0000000000  0.0000000000  0.0000000000 0.0000000000 0.0000000000  1.0000000000  
+0.0000000000 0.0000000000 0.0000000000 0.0000000000 1.0000000000 0.0000000000 
+2 'crystal symmetry operation' 2_555 -y,x-y,z  -0.5000000000 -0.8660254038 0.0000000000 0.0000000000 0.8660254038  -0.5000000000 
+0.0000000000 0.0000000000 0.0000000000 0.0000000000 1.0000000000 0.0000000000 
+3 'crystal symmetry operation' 3_555 -x+y,-x,z -0.5000000000 0.8660254038  0.0000000000 0.0000000000 -0.8660254038 -0.5000000000 
+0.0000000000 0.0000000000 0.0000000000 0.0000000000 1.0000000000 0.0000000000 
+# 
+loop_
+_pdbx_struct_special_symmetry.id 
+_pdbx_struct_special_symmetry.PDB_model_num 
+_pdbx_struct_special_symmetry.auth_asym_id 
+_pdbx_struct_special_symmetry.auth_comp_id 
+_pdbx_struct_special_symmetry.auth_seq_id 
+_pdbx_struct_special_symmetry.PDB_ins_code 
+_pdbx_struct_special_symmetry.label_asym_id 
+_pdbx_struct_special_symmetry.label_comp_id 
+_pdbx_struct_special_symmetry.label_seq_id 
+1 1 B ZN  101 ? E ZN  . 
+2 1 D ZN  101 ? F ZN  . 
+3 1 B HOH 224 ? H HOH . 
+4 1 B HOH 245 ? H HOH . 
+5 1 D HOH 323 ? J HOH . 
+6 1 D HOH 403 ? J HOH . 
+7 1 D HOH 455 ? J HOH . 
+# 
+loop_
+_pdbx_struct_conn_angle.id 
+_pdbx_struct_conn_angle.ptnr1_label_atom_id 
+_pdbx_struct_conn_angle.ptnr1_label_alt_id 
+_pdbx_struct_conn_angle.ptnr1_label_asym_id 
+_pdbx_struct_conn_angle.ptnr1_label_comp_id 
+_pdbx_struct_conn_angle.ptnr1_label_seq_id 
+_pdbx_struct_conn_angle.ptnr1_auth_atom_id 
+_pdbx_struct_conn_angle.ptnr1_auth_asym_id 
+_pdbx_struct_conn_angle.ptnr1_auth_comp_id 
+_pdbx_struct_conn_angle.ptnr1_auth_seq_id 
+_pdbx_struct_conn_angle.ptnr1_PDB_ins_code 
+_pdbx_struct_conn_angle.ptnr1_symmetry 
+_pdbx_struct_conn_angle.ptnr2_label_atom_id 
+_pdbx_struct_conn_angle.ptnr2_label_alt_id 
+_pdbx_struct_conn_angle.ptnr2_label_asym_id 
+_pdbx_struct_conn_angle.ptnr2_label_comp_id 
+_pdbx_struct_conn_angle.ptnr2_label_seq_id 
+_pdbx_struct_conn_angle.ptnr2_auth_atom_id 
+_pdbx_struct_conn_angle.ptnr2_auth_asym_id 
+_pdbx_struct_conn_angle.ptnr2_auth_comp_id 
+_pdbx_struct_conn_angle.ptnr2_auth_seq_id 
+_pdbx_struct_conn_angle.ptnr2_PDB_ins_code 
+_pdbx_struct_conn_angle.ptnr2_symmetry 
+_pdbx_struct_conn_angle.ptnr3_label_atom_id 
+_pdbx_struct_conn_angle.ptnr3_label_alt_id 
+_pdbx_struct_conn_angle.ptnr3_label_asym_id 
+_pdbx_struct_conn_angle.ptnr3_label_comp_id 
+_pdbx_struct_conn_angle.ptnr3_label_seq_id 
+_pdbx_struct_conn_angle.ptnr3_auth_atom_id 
+_pdbx_struct_conn_angle.ptnr3_auth_asym_id 
+_pdbx_struct_conn_angle.ptnr3_auth_comp_id 
+_pdbx_struct_conn_angle.ptnr3_auth_seq_id 
+_pdbx_struct_conn_angle.ptnr3_PDB_ins_code 
+_pdbx_struct_conn_angle.ptnr3_symmetry 
+_pdbx_struct_conn_angle.value 
+_pdbx_struct_conn_angle.value_esd 
+1 NE2 ? B HIS 10 ? B HIS 10 ? 1_555 ZN ? E ZN . ? B ZN 101 ? 1_555 NE2 ? B HIS 10 ? B HIS 10  ? 2_555 98.9  ? 
+2 NE2 ? B HIS 10 ? B HIS 10 ? 1_555 ZN ? E ZN . ? B ZN 101 ? 1_555 NE2 ? B HIS 10 ? B HIS 10  ? 3_555 98.7  ? 
+3 NE2 ? B HIS 10 ? B HIS 10 ? 2_555 ZN ? E ZN . ? B ZN 101 ? 1_555 NE2 ? B HIS 10 ? B HIS 10  ? 3_555 98.8  ? 
+4 NE2 ? B HIS 10 ? B HIS 10 ? 1_555 ZN ? E ZN . ? B ZN 101 ? 1_555 O   ? H HOH .  ? B HOH 213 ? 1_555 90.2  ? 
+5 NE2 ? B HIS 10 ? B HIS 10 ? 2_555 ZN ? E ZN . ? B ZN 101 ? 1_555 O   ? H HOH .  ? B HOH 213 ? 1_555 163.2 ? 
+6 NE2 ? B HIS 10 ? B HIS 10 ? 3_555 ZN ? E ZN . ? B ZN 101 ? 1_555 O   ? H HOH .  ? B HOH 213 ? 1_555 93.7  ? 
+7 NE2 ? D HIS 10 ? D HIS 10 ? 1_555 ZN ? F ZN . ? D ZN 101 ? 1_555 NE2 ? D HIS 10 ? D HIS 10  ? 3_555 103.4 ? 
+8 NE2 ? D HIS 10 ? D HIS 10 ? 1_555 ZN ? F ZN . ? D ZN 101 ? 1_555 NE2 ? D HIS 10 ? D HIS 10  ? 2_555 103.4 ? 
+9 NE2 ? D HIS 10 ? D HIS 10 ? 3_555 ZN ? F ZN . ? D ZN 101 ? 1_555 NE2 ? D HIS 10 ? D HIS 10  ? 2_555 103.4 ? 
+# 
+loop_
+_pdbx_audit_revision_history.ordinal 
+_pdbx_audit_revision_history.data_content_type 
+_pdbx_audit_revision_history.major_revision 
+_pdbx_audit_revision_history.minor_revision 
+_pdbx_audit_revision_history.revision_date 
+1 'Structure model' 1 0 1990-04-15 
+2 'Structure model' 1 1 2008-03-03 
+3 'Structure model' 1 2 2011-07-13 
+4 'Structure model' 1 3 2012-02-29 
+5 'Structure model' 1 4 2017-11-29 
+# 
+_pdbx_audit_revision_details.ordinal             1 
+_pdbx_audit_revision_details.revision_ordinal    1 
+_pdbx_audit_revision_details.data_content_type   'Structure model' 
+_pdbx_audit_revision_details.provider            repository 
+_pdbx_audit_revision_details.type                'Initial release' 
+_pdbx_audit_revision_details.description         ? 
+# 
+loop_
+_pdbx_audit_revision_group.ordinal 
+_pdbx_audit_revision_group.revision_ordinal 
+_pdbx_audit_revision_group.data_content_type 
+_pdbx_audit_revision_group.group 
+1 2 'Structure model' 'Version format compliance' 
+2 3 'Structure model' 'Version format compliance' 
+3 4 'Structure model' 'Database references'       
+4 5 'Structure model' 'Derived calculations'      
+5 5 'Structure model' Other                       
+# 
+loop_
+_pdbx_audit_revision_category.ordinal 
+_pdbx_audit_revision_category.revision_ordinal 
+_pdbx_audit_revision_category.data_content_type 
+_pdbx_audit_revision_category.category 
+1 5 'Structure model' pdbx_database_status 
+2 5 'Structure model' struct_conf          
+3 5 'Structure model' struct_conf_type     
+# 
+_pdbx_audit_revision_item.ordinal             1 
+_pdbx_audit_revision_item.revision_ordinal    5 
+_pdbx_audit_revision_item.data_content_type   'Structure model' 
+_pdbx_audit_revision_item.item                '_pdbx_database_status.process_site' 
+# 
+_software.name             PROLSQ 
+_software.classification   refinement 
+_software.version          . 
+_software.citation_id      ? 
+_software.pdbx_ordinal     1 
+# 
+_pdbx_validate_rmsd_bond.id                        1 
+_pdbx_validate_rmsd_bond.PDB_model_num             1 
+_pdbx_validate_rmsd_bond.auth_atom_id_1            CD 
+_pdbx_validate_rmsd_bond.auth_asym_id_1            C 
+_pdbx_validate_rmsd_bond.auth_comp_id_1            GLU 
+_pdbx_validate_rmsd_bond.auth_seq_id_1             17 
+_pdbx_validate_rmsd_bond.PDB_ins_code_1            ? 
+_pdbx_validate_rmsd_bond.label_alt_id_1            ? 
+_pdbx_validate_rmsd_bond.auth_atom_id_2            OE1 
+_pdbx_validate_rmsd_bond.auth_asym_id_2            C 
+_pdbx_validate_rmsd_bond.auth_comp_id_2            GLU 
+_pdbx_validate_rmsd_bond.auth_seq_id_2             17 
+_pdbx_validate_rmsd_bond.PDB_ins_code_2            ? 
+_pdbx_validate_rmsd_bond.label_alt_id_2            ? 
+_pdbx_validate_rmsd_bond.bond_value                1.172 
+_pdbx_validate_rmsd_bond.bond_target_value         1.252 
+_pdbx_validate_rmsd_bond.bond_deviation            -0.080 
+_pdbx_validate_rmsd_bond.bond_standard_deviation   0.011 
+_pdbx_validate_rmsd_bond.linker_flag               N 
+# 
+loop_
+_pdbx_validate_rmsd_angle.id 
+_pdbx_validate_rmsd_angle.PDB_model_num 
+_pdbx_validate_rmsd_angle.auth_atom_id_1 
+_pdbx_validate_rmsd_angle.auth_asym_id_1 
+_pdbx_validate_rmsd_angle.auth_comp_id_1 
+_pdbx_validate_rmsd_angle.auth_seq_id_1 
+_pdbx_validate_rmsd_angle.PDB_ins_code_1 
+_pdbx_validate_rmsd_angle.label_alt_id_1 
+_pdbx_validate_rmsd_angle.auth_atom_id_2 
+_pdbx_validate_rmsd_angle.auth_asym_id_2 
+_pdbx_validate_rmsd_angle.auth_comp_id_2 
+_pdbx_validate_rmsd_angle.auth_seq_id_2 
+_pdbx_validate_rmsd_angle.PDB_ins_code_2 
+_pdbx_validate_rmsd_angle.label_alt_id_2 
+_pdbx_validate_rmsd_angle.auth_atom_id_3 
+_pdbx_validate_rmsd_angle.auth_asym_id_3 
+_pdbx_validate_rmsd_angle.auth_comp_id_3 
+_pdbx_validate_rmsd_angle.auth_seq_id_3 
+_pdbx_validate_rmsd_angle.PDB_ins_code_3 
+_pdbx_validate_rmsd_angle.label_alt_id_3 
+_pdbx_validate_rmsd_angle.angle_value 
+_pdbx_validate_rmsd_angle.angle_target_value 
+_pdbx_validate_rmsd_angle.angle_deviation 
+_pdbx_validate_rmsd_angle.angle_standard_deviation 
+_pdbx_validate_rmsd_angle.linker_flag 
+1  1 N  A SER 9  ? ? CA  A SER 9  ? ? CB  A SER 9  ? ? 101.09 110.50 -9.41  1.50 N 
+2  1 CB A TYR 14 ? ? CG  A TYR 14 ? ? CD2 A TYR 14 ? ? 125.31 121.00 4.31   0.60 N 
+3  1 CB A TYR 14 ? ? CG  A TYR 14 ? ? CD1 A TYR 14 ? ? 115.29 121.00 -5.71  0.60 N 
+4  1 CB A TYR 19 ? ? CG  A TYR 19 ? ? CD2 A TYR 19 ? ? 125.04 121.00 4.04   0.60 N 
+5  1 CB A TYR 19 ? ? CG  A TYR 19 ? ? CD1 A TYR 19 ? ? 117.23 121.00 -3.77  0.60 N 
+6  1 CA A ASN 21 ? ? CB  A ASN 21 ? ? CG  A ASN 21 ? ? 127.28 113.40 13.88  2.20 N 
+7  1 CB B GLN 4  ? ? CG  B GLN 4  ? B CD  B GLN 4  ? B 80.21  111.60 -31.39 2.60 N 
+8  1 CA B VAL 12 ? ? CB  B VAL 12 ? ? CG2 B VAL 12 ? B 124.95 110.90 14.05  1.50 N 
+9  1 CB B GLU 21 ? ? CG  B GLU 21 ? B CD  B GLU 21 ? B 130.48 114.20 16.28  2.70 N 
+10 1 CD B ARG 22 ? ? NE  B ARG 22 ? B CZ  B ARG 22 ? B 133.51 123.60 9.91   1.40 N 
+11 1 NE B ARG 22 ? A CZ  B ARG 22 ? A NH1 B ARG 22 ? A 125.97 120.30 5.67   0.50 N 
+12 1 NE B ARG 22 ? A CZ  B ARG 22 ? A NH2 B ARG 22 ? A 116.21 120.30 -4.09  0.50 N 
+13 1 NE B ARG 22 ? B CZ  B ARG 22 ? B NH2 B ARG 22 ? B 126.83 120.30 6.53   0.50 N 
+14 1 CB B PHE 25 ? ? CG  B PHE 25 ? ? CD2 B PHE 25 ? ? 115.98 120.80 -4.82  0.70 N 
+15 1 CG B TYR 26 ? ? CD1 B TYR 26 ? ? CE1 B TYR 26 ? ? 116.15 121.30 -5.15  0.80 N 
+16 1 C  B LYS 29 ? ? N   B ALA 30 ? ? CA  B ALA 30 ? ? 136.99 121.70 15.29  2.50 Y 
+17 1 CB C TYR 14 ? ? CG  C TYR 14 ? ? CD1 C TYR 14 ? ? 116.49 121.00 -4.51  0.60 N 
+18 1 CB D PHE 1  ? ? CG  D PHE 1  ? ? CD1 D PHE 1  ? ? 115.17 120.80 -5.63  0.70 N 
+19 1 CG D HIS 5  ? ? ND1 D HIS 5  ? ? CE1 D HIS 5  ? ? 115.09 109.00 6.09   1.00 N 
+20 1 CB D TYR 16 ? ? CG  D TYR 16 ? ? CD2 D TYR 16 ? ? 116.12 121.00 -4.88  0.60 N 
+21 1 NE D ARG 22 ? A CZ  D ARG 22 ? A NH1 D ARG 22 ? A 126.94 120.30 6.64   0.50 N 
+# 
+loop_
+_pdbx_validate_torsion.id 
+_pdbx_validate_torsion.PDB_model_num 
+_pdbx_validate_torsion.auth_comp_id 
+_pdbx_validate_torsion.auth_asym_id 
+_pdbx_validate_torsion.auth_seq_id 
+_pdbx_validate_torsion.PDB_ins_code 
+_pdbx_validate_torsion.label_alt_id 
+_pdbx_validate_torsion.phi 
+_pdbx_validate_torsion.psi 
+1 1 SER A 9 ? ? -112.97 -123.16 
+2 1 SER C 9 ? ? -95.26  -153.06 
+# 
+_pdbx_validate_planes.id              1 
+_pdbx_validate_planes.PDB_model_num   1 
+_pdbx_validate_planes.auth_comp_id    ARG 
+_pdbx_validate_planes.auth_asym_id    B 
+_pdbx_validate_planes.auth_seq_id     22 
+_pdbx_validate_planes.PDB_ins_code    ? 
+_pdbx_validate_planes.label_alt_id    ? 
+_pdbx_validate_planes.rmsd            0.146 
+_pdbx_validate_planes.type            'SIDE CHAIN' 
+# 
+loop_
+_pdbx_entity_nonpoly.entity_id 
+_pdbx_entity_nonpoly.name 
+_pdbx_entity_nonpoly.comp_id 
+3 'ZINC ION' ZN  
+4 water      HOH 
+# 
diff --git a/tests/sample_data/README.md b/tests/sample_data/README.md
index 279f86e..adb0264 100644
--- a/tests/sample_data/README.md
+++ b/tests/sample_data/README.md
@@ -17,3 +17,7 @@ All files for this test suite have been drawn from databases across the web. Cit
 ## Crystallographic Open Database (COD):
 
 [aP16](http://www.crystallography.net/cod/1540955.html)
+
+## Protein Data Bank (PDB):
+
+[4INS](https://doi.org/10.2210/pdb4ins/pdb)
diff --git a/tests/sample_data/cif_file_keys.txt b/tests/sample_data/cif_file_keys.txt
new file mode 100644
index 0000000..ad8d773
--- /dev/null
+++ b/tests/sample_data/cif_file_keys.txt
@@ -0,0 +1,72 @@
+_journal_volume
+_journal_year
+_journal_page_first
+_journal_page_last
+_symmetry_Int_Tables_number
+_audit_block_doi
+_audit_creation_method
+_shelx_SHELXL_version_number
+_chemical_name_systematic
+_chemical_name_common
+_chemical_melting_point
+_chemical_formula_moiety
+_chemical_formula_sum
+_chemical_formula_weight
+_space_group_crystal_system
+_space_group_IT_number
+_space_group_name_Hall
+_cell_volume
+_cell_formula_units_Z
+_cell_measurement_temperature
+_cell_measurement_reflns_used
+_cell_measurement_theta_min
+_cell_measurement_theta_max
+_exptl_crystal_description
+_exptl_crystal_colour
+_exptl_crystal_density_meas
+_exptl_crystal_density_method
+_exptl_crystal_density_diffrn
+_exptl_crystal_F_000
+_exptl_transmission_factor_min
+_exptl_transmission_factor_max
+_exptl_crystal_size_max
+_exptl_crystal_size_mid
+_exptl_crystal_size_min
+_exptl_absorpt_coefficient_mu
+_shelx_estimated_absorpt_T_min
+_shelx_estimated_absorpt_T_max
+_exptl_absorpt_correction_type
+_exptl_absorpt_correction_T_min
+_exptl_absorpt_correction_T_max
+_exptl_absorpt_process_details
+_exptl_absorpt_special_details
+_diffrn_ambient_temperature
+_diffrn_radiation_wavelength
+_diffrn_radiation_type
+_diffrn_radiation_monochromator
+_diffrn_measurement_device_type
+_diffrn_measurement_method
+_diffrn_detector_area_resol_mean
+_diffrn_reflns_number
+_diffrn_reflns_av_R_equivalents
+_diffrn_reflns_limit_h_min
+_diffrn_reflns_limit_h_max
+_diffrn_reflns_limit_k_min
+_diffrn_reflns_limit_k_max
+_diffrn_reflns_limit_l_min
+_diffrn_reflns_limit_l_max
+_diffrn_reflns_theta_min
+_diffrn_reflns_theta_max
+_diffrn_reflns_theta_full
+_diffrn_measured_fraction_theta_max
+_diffrn_measured_fraction_theta_full
+_diffrn_reflns_Laue_measured_fraction_max
+_diffrn_reflns_Laue_measured_fraction_full
+_diffrn_reflns_point_group_measured_fraction_max
+_diffrn_reflns_point_group_measured_fraction_full
+_reflns_number_total
+_reflns_number_gt
+_reflns_threshold_expression
+_reflns_Friedel_coverage
+_reflns_Friedel_fraction_max
+_reflns_Friedel_fraction_full
diff --git a/tests/test_key_reader.py b/tests/test_key_reader.py
new file mode 100644
index 0000000..f490b56
--- /dev/null
+++ b/tests/test_key_reader.py
@@ -0,0 +1,71 @@
+import numpy as np
+import pytest
+from conftest import bad_cif, box_keys, cif_files_mark, random_keys_mark
+from gemmi import cif
+
+from parsnip._errors import ParseWarning
+from parsnip.parse import read_cell_params, read_key_value_pairs
+
+
+def _gemmi_read_keys(filename, keys, as_number=True):
+    file_block = cif.read_file(filename).sole_block()
+    if as_number:
+        return np.array([cif.as_number(file_block.find_value(key)) for key in keys])
+    else:
+        return np.array([file_block.find_value(key) for key in keys])
+
+
+@cif_files_mark
+def test_read_key_value_pairs(cif_data):
+    parsnip_data = read_key_value_pairs(
+        filename=cif_data.filename, keys=cif_data.single_value_keys
+    )
+    gemmi_data = _gemmi_read_keys(
+        cif_data.filename, keys=cif_data.single_value_keys, as_number=False
+    )
+    np.testing.assert_array_equal([*parsnip_data.values()], gemmi_data)
+
+
+@pytest.mark.filterwarnings("ignore: Keys")
+@cif_files_mark
+@random_keys_mark(n_samples=20)
+def test_read_key_value_pairs_random(cif_data, keys):
+    parsnip_data = read_key_value_pairs(filename=cif_data.filename, keys=keys)
+    gemmi_data = _gemmi_read_keys(cif_data.filename, keys=keys, as_number=False)
+    np.testing.assert_array_equal([*parsnip_data.values()], gemmi_data)
+
+
+def test_read_key_value_pairs_badcif(cif_data=bad_cif):
+    expected_warning = "Keys {'not_a_valid_key'} did not match any data!"
+    with pytest.warns(ParseWarning, match=expected_warning):
+        parsnip_data = read_key_value_pairs(
+            filename=cif_data.filename, keys=cif_data.single_value_keys
+        )
+    correct_data = [
+        "1.000000(x)",
+        "4.32343242",
+        "3.1415926535897932384626433832795028841971693993751058209749",
+        "90.00000",
+        "-10.12345",
+        "210.00000",
+        "123",
+        r"45.6a/\s",
+        None,
+    ]
+    np.testing.assert_array_equal([*parsnip_data.values()], correct_data)
+
+
+@cif_files_mark
+def test_key_value_warnings(cif_data, keys=("_FALSE_KEY")):
+    with pytest.warns(ParseWarning):
+        _ = read_key_value_pairs(filename=cif_data.filename, keys=keys)
+
+
+@cif_files_mark
+def test_read_cell_params(cif_data, keys=box_keys):
+    mmcif = "PDB_4INS_head.cif" in cif_data.filename
+    parsnip_data = read_cell_params(filename=cif_data.filename, mmcif=mmcif)
+    if mmcif:
+        keys = (key[0] + key[1:].replace("_", ".", 1) for key in keys)
+    gemmi_data = _gemmi_read_keys(cif_data.filename, keys)
+    np.testing.assert_array_equal(parsnip_data, gemmi_data)
diff --git a/tests/test_table_reader.py b/tests/test_table_reader.py
index 0a9da70..9a883b9 100644
--- a/tests/test_table_reader.py
+++ b/tests/test_table_reader.py
@@ -3,7 +3,7 @@
 from conftest import bad_cif, cif_files_mark
 from gemmi import cif
 
-from parsnip._utils import ParseWarning
+from parsnip._errors import ParseWarning
 from parsnip.parse import read_fractional_positions, read_table
 
 
@@ -13,6 +13,8 @@ def _gemmi_read_table(filename, keys):
 
 @cif_files_mark
 def test_read_symop(cif_data):
+    if "PDB_4INS_head.cif" in cif_data.filename:
+        return
     parsnip_data = read_table(filename=cif_data.filename, keys=cif_data.symop_keys)
     gemmi_data = _gemmi_read_table(cif_data.filename, cif_data.symop_keys)
 
@@ -20,7 +22,7 @@ def test_read_symop(cif_data):
     # We have to apply this same transformation to the gemmi data to check correctness.
     if "CCDC_1446529_Pm-3m.cif" in cif_data.filename:
         gemmi_data = np.array(
-            [[item.replace(", ", ",") for item in row] for row in gemmi_data]
+            [[item.replace(", ", ",_") for item in row] for row in gemmi_data]
         )
 
     np.testing.assert_array_equal(parsnip_data, gemmi_data)
@@ -28,6 +30,8 @@ def test_read_symop(cif_data):
 
 @cif_files_mark
 def test_read_atom_sites(cif_data):
+    if "PDB_4INS_head.cif" in cif_data.filename:
+        return
     parsnip_data = read_table(
         filename=cif_data.filename,
         keys=cif_data.atom_site_keys,
@@ -48,6 +52,10 @@ def test_partial_table_read(cif_data, subset):
         keys=subset_of_keys,
     )
     gemmi_data = _gemmi_read_table(cif_data.filename, subset_of_keys)
+    if "PDB_4INS_head.cif" in cif_data.filename:
+        parsnip_data = np.array(
+            [[item.replace("_", " ") for item in row] for row in gemmi_data]
+        )
 
     np.testing.assert_array_equal(parsnip_data, gemmi_data)
 
@@ -58,6 +66,7 @@ def test_bad_cif_symop(cif_data=bad_cif):
         parsnip_data = read_table(
             filename=cif_data.filename,
             keys=cif_data.symop_keys,
+            regex_filter=(r",\s+", ","),
         )
     correct_data = [
         ["1", "x,y,z"],
@@ -104,6 +113,8 @@ def test_bad_cif_atom_sites(cif_data=bad_cif):
 
 @cif_files_mark
 def test_read_fractional_positions(cif_data):
+    if "PDB_4INS_head.cif" in cif_data.filename:
+        return
     keys = ("_atom_site_fract_x", "_atom_site_fract_y", "_atom_site_fract_z")
     parsnip_data = read_fractional_positions(filename=cif_data.filename)
     gemmi_data = _gemmi_read_table(cif_data.filename, keys)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index f33ce3f..c4d1e02 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,6 +1,8 @@
+import numpy as np
 import pytest
 
-from parsnip._utils import ParseError, ParseWarning
+from parsnip._errors import ParseError, ParseWarning
+from parsnip._utils import _deg2rad, _str2num
 
 
 def test_parse_error():
@@ -15,3 +17,21 @@ def test_parse_warning():
         raise ParseWarning("TEST_WARNING_RAISED")
 
     assert "TEST_WARNING_RAISED" in str(warning.value)
+
+
+def test_deg2rad(seed=43):
+    rng = np.random.default_rng(seed)
+    angles = rng.uniform(low=0, high=180, size=10_000)
+    np.testing.assert_allclose(
+        np.deg2rad(angles), [_deg2rad(val) for val in angles], atol=2e-15
+    )
+
+
+@pytest.mark.parametrize("string", ["3.1415926", "-12345", str(1e6), "0.00000003579"])
+def test_str2num(string):
+    converted_val = _str2num(string)
+    if "." in string:
+        assert isinstance(converted_val, float)
+    else:
+        assert isinstance(converted_val, int)
+    assert np.isclose(float(string), converted_val)