From 4bf2b4a2a7bb1ab98334f3364db9d9702f4e5417 Mon Sep 17 00:00:00 2001
From: Martin Kuban <kuban@physik.hu-berlin.de>
Date: Wed, 28 Feb 2024 15:51:16 +0100
Subject: [PATCH] Version 2

---
 .gitignore                                 |   9 +-
 LICENSE                                    | 209 +---------
 README.md                                  |  37 +-
 nomad_dos_fingerprints/DOSfingerprint.py   | 443 ++++++++++++++++++---
 nomad_dos_fingerprints/__init__.py         |   6 +-
 nomad_dos_fingerprints/grid.py             | 409 ++++++++++++++++---
 nomad_dos_fingerprints/plotting.py         | 228 ++++++++++-
 nomad_dos_fingerprints/similarity.py       |  44 +-
 pyproject.toml                             |  33 ++
 requirements.txt                           |   2 -
 setup.py                                   |  16 +-
 tests/test_DOSfingerprint.py               |  95 ++++-
 tests/test_Grid.py                         | 100 ++++-
 tests/test_functional.py                   |  21 +-
 tests/test_similarity.py                   |  47 ++-
 timing/profiling_fingerprint_generation.py |  36 ++
 16 files changed, 1330 insertions(+), 405 deletions(-)
 create mode 100644 pyproject.toml
 delete mode 100644 requirements.txt
 create mode 100644 timing/profiling_fingerprint_generation.py

diff --git a/.gitignore b/.gitignore
index 8e3f5d4..1aa6b98 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,9 @@
-*/__pycache__
+__pycache__
 *.egg-info
 *.code-workspace
-run_tests.bat
 .vscode
-.coverage*
\ No newline at end of file
+.coverage*
+.pytest_cache
+notebooks
+build
+venvs
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
index d645695..2e0e784 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,202 +1,13 @@
+Copyright 2022, Martin Kuban, Santiago Rigamonti, Markus Scheidgen, and Claudia Draxl
 
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
 
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+      http://www.apache.org/licenses/LICENSE-2.0
 
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/README.md b/README.md
index 77d100b..5ca0d7a 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,16 @@
-This package implements fingerprints of the electronic density-of-states (DOS) for the evaluation of similarity of materials based on their electronic structures.
+Fingerprinting of materials based on their electronic structure.
 
-The fingerprints are based on a modification on the D-Fingerprints presented in Ref. [1].
-Our modification allows to target specific energy ranges for the evaluation of the similarity of the electronic structure.
-As a similarity measure we use the Tanimoto coefficient [2].
+Python package to compute fingerprints of the electronic density-of-states (DOS) and evaluate the similarity of materials based on their electronic structures.
+
+This package implements the DOS fingerprints and the similarity metrics introduced in Refs. [1,2]. 
+
+Our DOS fingerprints can be tailored to target specific ranges of the energy spectrum. The computed fingerprints allow for the evaluation of the similarity of the electronic structure.
+
+As a similarity measure we use the Tanimoto coefficient [3].
 
 # Usage
 
-Fingerprints are instances of the `DOSFingerprint()` class and can be calculated by providing the energy in [Joule] and the DOS in [states/unit cell/Joule] to the `calculate()` method. Furthermore, the parameters of a non-uniform grid can be chosen. The default grid is specialized on the energy range between -10 and 5 eV and emphasizes the upper valence region.
+Fingerprints are instances of the `DOSFingerprint()` class and can be calculated by providing the energy in [Joule] and the DOS in [states/unit cell/Joule] to the `calculate()` method. Furthermore, the energy axis can be discretized over a non-uniform grid. For this, specific parameters must be provided. By default, the grid is specialized on the energy range between -10 and 5 eV, thereby emphasizing the upper valence region.
 
 ```Python
 from nomad_dos_fingerprints import DOSFingerprint
@@ -20,8 +24,27 @@ from nomad_dos_fingerprints import tanimoto_similarity
 tc = tanimoto_similarity(dos_fingerprint_1, dos_fingerprint_2)
 ```
 
+# Citation
+
+If you use this package in a publication, please cite it in the following way:
+
+Martin Kuban, Santiago Rigamonti, Markus Scheidgen, and Claudia Draxl:
+_Density-of-states similarity descriptor for unsupervised learning from materials data_
+Sci Data *9*, 646 (2022). https://doi.org/10.1038/s41597-022-01754-z
+
+# Links
+
+We maintain a notebook in the NOMAD AI Toolkit:
+https://nomad-lab.eu/aitoolkit/tutorial-dos-similarity
+
 # References
 
-[1] Isayev _et al._, Chem. Mater. 2015, 27, 3, 735–743 (doi:10.1021/cm503507h)
+[1] Martin Kuban, Santiago Rigamonti, Markus Scheidgen, and Claudia Draxl:
+_Density-of-states similarity descriptor for unsupervised learning from materials data_.
+_Sci Data_ *9*, 646 (2022). https://doi.org/10.1038/s41597-022-01754-z
+
+[2] Martin Kuban, Šimon Gabaj, Wahib Aggoune, Cecilia Vona, Santiago Rigamonti, Claudia Draxl:
+_Similarity of materials and data-quality assessment by fingerprinting_.
+_MRS Bulletin_ *47*, 991–999 (2022). https://doi.org/10.1557/s43577-022-00339-w
 
-[2] P. Willet _et al._, J. Chem. Inf. Comput . 38 , 983 996 (1998) (doi:10.1021/ci9800211)
+[3] P. Willet _et al._, _J. Chem. Inf. Comput._ *38* , 983 996 (1998) (https://doi.org/10.1021/ci9800211)
\ No newline at end of file
diff --git a/nomad_dos_fingerprints/DOSfingerprint.py b/nomad_dos_fingerprints/DOSfingerprint.py
index b102711..393dedc 100644
--- a/nomad_dos_fingerprints/DOSfingerprint.py
+++ b/nomad_dos_fingerprints/DOSfingerprint.py
@@ -1,6 +1,8 @@
 import numpy as np
 from bitarray import bitarray
 from functools import partial
+from typing import Callable, List, Union
+from itertools import groupby
 
 from .grid import Grid
 from .similarity import tanimoto_similarity
@@ -8,72 +10,297 @@
 ELECTRON_CHARGE = 1.602176565e-19
 
 class DOSFingerprint():
+    """
+    A fingerprint of the electronic density-of-states (DOS), 
+    obtained by integrating the DOS over discrete intervals
+    and binning of the resulting histogram.
+    
+    **Keyword arguments:**
 
-    def __init__(self, stepsize = 0.05, similarity_function = tanimoto_similarity, **kwargs):
+    similarity_function: `Callable`
+        A function that allows to calculate the similarity between two DOS fingerprints    
+
+        default: `nomad_dos_fingerprints.similarity.tanimoto_similarity`
+
+    Additional keyword arguments are passed to the similarity function.
+    """
+
+    def __init__(self, 
+                 similarity_function: Callable = tanimoto_similarity, **kwargs):
         self.bins = ''
         self.indices = []
-        self.stepsize = stepsize
         self.filling_factor = 0
         self.grid_id = None
         self.set_similarity_function(similarity_function, **kwargs)
+        self._n_state_bins = None
+
+    @property
+    def n_state_bins(self):
+        if self._n_state_bins is None:
+            assert self.grid_id is not None, "Grid id is not set. Will be set when calculating the fingerprint with a grid."
+            self._n_state_bins = Grid.resolve_grid_id(self.grid_id)["n_pix"]
+        return self._n_state_bins
+
+    def calculate(self, 
+                  dos_energies: np.ndarray, 
+                  dos_values: np.ndarray, 
+                  grid_id: str = None, 
+                  convert_data: Union[None, str, Callable] = None, 
+                  normalization_factor: float = 1.0) -> object:
+        """
+        Calculate the fingerprint data from normalized DOS energies and values.
+
+        **Arguments:**
+
+        dos_energies: `np.ndarray`
+            Energies of the DOS spectrum
+
+            By default, units are [eV].
+
+            This behavior can be changed by changing `convert_data` (see below).
+
+        dos_values: `np.ndarray`
+            Values of the DOS spectrum
+
+            By default, units are [states/cell/eV].
+
+            This behavior can be changed by changing `convert_data` (see below).
+
+        **Keyword arguments:**
+
+        grid_id: `str`
+            ID for describing the `Grid` object that is used to calculate the fingerprint data.
+            For details, see documentation there.
+
+            default: `None`: Use `Grid` default values.
+
+        convert_data: `Union[str, None, Callable]`
+            Convert `dos_energies` and `dos_values` to different units.
+
+            The string `'enc'` transforms [Joule] to [eV] and sums all spin channels of DOS values. 
+            Additionally, assumes that `dos_energies` is an `Interable` of `float`s 
+            and `dos_values` is an `Interable` of `Interables` of lenght `len(dos_energies)` of `float`s.
+
+            If set to `None`, no conversion will be performed.
+
+            If a callable is given, it will be called as:
+            ``energy, dos = convert_data(dos_energies, dos_values)``.
+
+            default: `None`
+
+        normalization_factor: `float`
+            Factor used for unit conversion, e.g. if the DOS is given or required per atom.
+            Used only if `convert_data` is "Enc".
 
-    def calculate(self, dos_energies, dos_values, grid_id = 'dg_cut:56:-2:7:(-10, 5)', unit_cell_volume = 1, n_atoms = 1):
-        energy, dos = self._convert_dos(dos_energies, dos_values, unit_cell_volume = unit_cell_volume, n_atoms = n_atoms)
-        raw_energies, raw_dos = self._integrate_to_bins(energy, dos)
-        grid = Grid().create(grid_id = grid_id)
-        self.grid_id = grid.get_grid_id()
-        self.indices, self.bins = self._calculate_bytes(raw_energies, raw_dos, grid)
+            default: `1.0`
+
+        **Returns:**
+
+        self: `DOSFingerprint`
+            Fingerprint after calculation
+        """
+        if isinstance(convert_data, str) and convert_data.lower() == 'enc':
+            energy, dos = self._convert_dos(dos_energies, dos_values, normalization_factor = normalization_factor)
+        elif isinstance(convert_data, Callable):
+            energy, dos = convert_data(dos_energies, dos_values)
+        elif convert_data is None:
+            energy, dos = dos_energies, dos_values
+        else:
+            raise ValueError('Key-word argument `convert_data` must be either the string "enc", a callable, or `None`.')
+        grid = Grid.create(grid_id = grid_id)
+        energy = np.array(energy) - grid.e_ref
+        raw_energies, raw_dos = self._integrate_to_bins(energy, dos, grid.delta_e_min)
+        self.grid_id = grid_id if grid_id is not None else grid.get_grid_id()
+        bin_fp = self._calculate_bit_fingerprint(raw_energies, raw_dos, grid)
+        self.bins = self._compress_binary_fingerprint_string(bin_fp)
+        self._n_state_bins = grid.n_pix
         return self
 
-    def to_dict(self):
-        return dict(bins = self.bins, indices = self.indices, stepsize = self.stepsize, grid_id = self.grid_id, filling_factor = self.filling_factor)
+    def to_dict(self) -> dict:
+        """
+        Convert data to dictionary.
+        """
+        data_dict = {}
+        for key in ['bins', 'indices', 'grid_id', 'filling_factor', 'overflow', 'undersampling']:
+            data_dict[key] = getattr(self, key)
+        return data_dict
 
-    @staticmethod
-    def from_dict(fp_dict):
-        self = DOSFingerprint()
+    @classmethod
+    def from_dict(cls, fp_dict: dict) -> object:
+        """
+        Create fingerprint object from dictionary.
+        """
+        self = cls()
         self.bins = fp_dict['bins']
         self.indices = fp_dict['indices']
-        self.stepsize = fp_dict['stepsize']
         self.grid_id = fp_dict['grid_id']
         self.filling_factor = fp_dict['filling_factor']
+        if 'overflow' in fp_dict.keys():
+            self.overflow = fp_dict['overflow']
+        if 'undersampling' in fp_dict.keys():
+            self.undersampling = fp_dict['undersampling']
+        self._n_state_bins = Grid.resolve_grid_id(self.grid_id)["n_pix"]
         return self
 
-    def set_similarity_function(self, similarity_function, **kwargs):
+    def set_similarity_function(self, similarity_function: Callable, **kwargs):
+        """
+        Set the similarity function of the fingerprint.
+
+        **Arguments:**
+
+        similarity_function: `Callable`
+            Function for calculating the similarity between `DOSFingerprint` objects.
+
+        Keyword arguments are passed to the similarity function.
+        """
         self.similarity_function = partial(similarity_function, **kwargs)
 
-    def get_similarity(self, fingerprint):
+    def get_similarity(self, fingerprint: object) -> float:
+        """
+        Get similarity value between self and another fingerprint object.
+
+        **Arguments:**
+
+        fingerprint: `DOSFingerprint`
+            Other fingerprint to calculate similarity to.
+
+        **Returns:**
+
+        similarity: `float`
+            Similarity between both fingerprints.
+
+        **Raises:**
+
+        `TypeError`: `fingerprint` is not a `DOSFingerprint`
+        """
+        if not isinstance(fingerprint, DOSFingerprint):
+            raise TypeError("Other fingerprint must be a `DOSFingerprint` object.")
         return self.similarity_function(self, fingerprint)
 
-    def get_similarities(self, list_of_fingerprints):
+    def get_similarities(self, list_of_fingerprints: List[object]) -> np.ndarray:
+        """
+        Get similarities of self to a list of other fingerprints.
+
+        **Arguments:**
+
+        list_of_fingerprints: `List[DOSFingerprint]`
+            Other fingerprints to calculate similarity to.
+
+        **Returns:**
+
+        similarities: `np.ndarray`
+            Similarities of self to other fingerprints.
+        """
         return np.array([self.similarity_function(self, fp) for fp in list_of_fingerprints])
 
+    def get_bitarray(self) -> object:
+        """
+        Get `bitarray.bitarray` representing the fingerprint data.
+
+        **Returns:**
+
+        bits: `bitarray`
+            Binary fingerprint data 
+        """
+        if self.bins == '':
+            raise AttributeError("Fingerprint is not calculated! Use `calculate()`.")
+        bit_string = self._expand_fingerprint_string(self.bins)
+        bits = bitarray(bit_string)
+        return bits
+
     def __eq__(self, other):
-        return self.bins == other.bins and self.indices == other.indices and self.stepsize == other.stepsize and self.grid_id == other.grid_id and self.filling_factor == other.filling_factor
+        if not isinstance(other, DOSFingerprint):
+            return False
+        for attr in ['bins', 'indices', 'stepsize', 'grid_id', 'filling_factor']:
+            if getattr(self, attr) != getattr(other, attr):
+                return False
+        return True
+
+    def _integral_house(self, a, b, interval):
+        # integrates a "house" like figure, consisting of a rectangle and a 90 degree triangle, with heights a and b, and width=interval
+        return (a + b) * interval / 2.
 
-    def _integrate_to_bins(self, xs, ys):
+    def _interpolate_dos(self, dos_values, energy_values, requested_energy):
         """
-        Performs stepwise numerical integration of ``ys`` over the range of ``xs``. The stepsize of the generated histogram is controlled by DOSFingerprint().stepsize.
+        Returns a linearly interpolated value between two DOS points.
         """
-        if len(xs) < 2 or len(ys) < 2:
-            raise ValueError('Invalid input. Please provide arrays with len > 2.')
-        xstart = round(int(xs[0] / (self.stepsize * 1.)) * self.stepsize, 8)  # define the limits that fit with the predefined stepsize
-        xstop = round(int(xs[-1] / (self.stepsize * 1.)) * self.stepsize, 8)
-        x_interp = np.arange(xstart, xstop + self.stepsize, self.stepsize)
-        x_interp = np.around(x_interp, decimals=5)
-        y_interp = np.interp(x_interp, xs, ys)
-        y_integ = np.array([np.trapz(y_interp[idx:idx + 2], x_interp[idx:idx + 2]) for idx in range(len(x_interp)-1)])
-        return x_interp[:-1], y_integ
+        if len(dos_values) != 2 or len(energy_values) != 2:
+            raise ValueError("Error in _interpolate_dos: Wrong number of arguments for calculation of gradient.")
+        if energy_values[0] == energy_values[1]:
+            return dos_values[0]
+        gradient = (dos_values[1] - dos_values[0]) / ((energy_values[1] - energy_values[0]) * 1.)
+        difference = requested_energy - energy_values[0]
+        interpolated = dos_values[0] + gradient * difference
+        return interpolated
+    
+    def _find_energy_cutoff_indices(self, energy, estart, estop):
+        """
+        This find the correct indices for the integration, that is, all values of the energy in the interval [estart-1,estop+1] will be in the range [emin_idx,emax_idx]
+        WARNING: Assumes that estart<energy[0]!
+        """
+        emin_idx = None
+        emax_idx = None
+        index = 0
+        while emin_idx is None or emax_idx is None:
+            if energy[index] > estart and emin_idx is None:
+                emin_idx = index - 1
+            if energy[-(index + 1)] < estop and emax_idx is None:
+                emax_idx = -index + 1
+                if emax_idx > 0:
+                    emax_idx = 0 #HOTFIX
+            index += 1
+        if emax_idx == 0:
+            emax_idx = None
+        return emin_idx, emax_idx
+
+    def _integrate_to_bins(self, energy, dos, stepsize=0.05):
+        """
+        Performs stepwise numerical integration of ``ys`` over the range of ``xs``.
+        """
+        if len(energy) < 2 or len(dos) < 2:
+            raise ValueError(f'Invalid input. Please provide arrays with len > 2. len(x) : {len(energy)} len(y) : {len(dos)}')
+        
+        # define the limits that fit with the predefined stepsize 
+        xstart = round(np.ceil(energy[0] / (stepsize * 1.)) * stepsize, 12)  
+        xstop = round(np.floor(energy[-1] / (stepsize * 1.)) * stepsize, 12)
 
-    def _convert_dos(self, energy, dos, unit_cell_volume = 1, n_atoms = 1):
+        # Find the indices in original arrays closest to new limits defined by stepsize
+        idx_min, idx_max = self._find_energy_cutoff_indices(energy, xstart, xstop)
+
+        energy = energy[idx_min:idx_max]
+        dos = dos[idx_min:idx_max]
+        current_energy = xstart
+        current_dos = self._interpolate_dos([dos[0], dos[1]], [energy[0], energy[1]], xstart)
+        dos_binned = []
+        energy_binned = []
+        index = 1  # starting from the second value, because energy[0]<xstart. Thus energy[index] is made to be larger than current_energy.
+        while current_energy < xstop:
+            energy_binned.append(current_energy)
+            next_energy = round(current_energy + stepsize, 9)
+            integral = 0
+            while energy[index] < next_energy and next_energy < xstop:
+                integral += self._integral_house(current_dos, dos[index], energy[index] - current_energy)
+                current_dos = dos[index]
+                current_energy = energy[index]
+                index += 1
+            next_dos = self._interpolate_dos([dos[index - 1], dos[index]], [energy[index - 1], energy[index]],
+                                             next_energy)
+            integral += self._integral_house(current_dos, next_dos, next_energy - current_energy)
+            dos_binned.append(integral)
+            current_energy = next_energy
+            current_dos = next_dos
+        return energy_binned, dos_binned
+
+    def _convert_dos(self, energy, dos, normalization_factor = 1):
         """
         Convert units of DOS from energy: Joule; dos: states/volume/Joule to eV and sum spin channels if they are present.
         """
         energy = np.array([value / ELECTRON_CHARGE for value in energy])
         dos_channels = [np.array(values) for values in dos]
-        dos = sum(dos_channels) * ELECTRON_CHARGE * unit_cell_volume * n_atoms
+        dos = sum(dos_channels) * ELECTRON_CHARGE * normalization_factor
         return energy, dos
 
-    def _binary_bin(self, dos_value, grid_bins):
+    def _binary_bin(self, dos_value: float, grid_bins: np.ndarray):
         bin_dos = ''
         for grid_bin in grid_bins:
             if grid_bin <= dos_value:
@@ -82,31 +309,137 @@ def _binary_bin(self, dos_value, grid_bins):
                 bin_dos += '0'
         return bin_dos
 
-    def _calculate_bytes(self, energy, dos, grid):
+    def _calc_grid_indices(self, energy_bins: np.ndarray, grid: Grid):
+        """
+        Calculate indices of the DOS grid that describe the energy range where the DOS is defined.
+        Args:
+            energy_bins: np.ndarray: energy bins cut to be no larger than the Grid range
+            grid: Grid: description of the DOS discretization
+        Return:
+            self.indices: list: Grid indices that indicate which parts of the Grid are no larger than the energy bins
         """
-        Calculate the byte fingerprint.
+        grid_start, grid_end = grid.get_grid_indices_for_energy_range(energy_bins)
+        self.indices = [grid_start, grid_end]
+        return self.indices
+        
+    def _adapt_energy_bin_sizes(self, energy_bins_raw: np.ndarray, states_raw: np.ndarray, grid: Grid):
+        """
+        Adapt energy bin sizes to the specified values in the grid.
+        Args:
+            energy_bins_raw: numpy.ndarray: locations of energy bins
+            states_raw: numpy.ndarray: states in the bins declared in `energy_bins`
+            grid: Grid: description of the DOS discretization
+        Returns:
+            adapted_bins: list: new energy discretization steps
+            adapted_states: list: state bins with adapted discretization steps
         """
         grid_array = grid.grid()
-        # cut the energy and dos to grid size
-        energy, dos = np.transpose([(e,d) for e,d in zip(energy, dos) if (e >= grid_array[0][0] and e <= grid_array[-1][0])])
-        # calculate fingerprint
+        # cut the energy and states to grid size
+        energy_bins, states = np.transpose([(e,d) for e,d in zip(energy_bins_raw, states_raw) if (e >= grid_array[0][0] and e <= grid_array[-1][0])])
+        # find grid start and end points
+        grid_start, grid_end = self._calc_grid_indices(energy_bins, grid)
+        # if the last energy bin is less than or equal to the last grid point, the pre-to-last grid index was found instead of the correct one
+        # the following lines fix this, but it is not very elegant
+        if grid_end <= len(grid_array) - 1:
+            if len(energy_bins) == len(energy_bins_raw) and energy_bins_raw[-1] > grid_array[grid_end][0]:
+                grid_end += 1
+                self.indices[1] += 1
+        # sum dos bins to adapt to inhomogeneous energy grid
+        adapted_bins = []
+        adapted_states = []
+        for index in range(grid_start, grid_end):
+            eps_i = grid_array[index][0]
+            eps_iplusdelta = grid_array[index+1][0]
+            adapted_bins.append(eps_i)
+            adapted_states.append(sum(np.array([s for e, s in zip(energy_bins, states) if e >= eps_i and e < eps_iplusdelta])))
+        return adapted_bins, adapted_states
+
+    def _calc_bit_vector(self, adapted_states: np.ndarray, grid: Grid):
+        """
+        Calculate bit representation of grid-adapted states per energy bin.
+        Args:
+            adapted_states: numpy.ndarray: states in the bins declared in `energy_bins`
+            grid: Grid: description of the DOS discretization
+        Sets:
+            overflow: float: sum of states that can not be described by the current grid
+            undersampling: float: fraction of states that are in partially filled grid cells (and thus ignored)
+        Returns: 
+            discrete_states: str: binary representation of binned DOS spectrum
+        """
         bin_fp = ''
-        grid_index = 0
-        for idx, grid_e in enumerate(grid_array):
-            if grid_e[0] > energy[0]:
-                grid_index = idx - 1
-                if grid_index < 0:
-                    grid_index = 0
-                break
-        grid_start = grid_index
-        fp_index = 0
-        while grid_array[grid_index + 1][0] < energy[-1]:
-            current_dos = 0
-            while energy[fp_index] < grid_array[grid_index + 1][0]:
-                current_dos += dos[fp_index]
-                fp_index += 1
-            bin_fp += self._binary_bin(current_dos, grid_array[grid_index][1])
-            grid_index += 1
-        self.filling_factor = bin_fp.count('1') / len(bin_fp)
+        grid_array = grid.grid()
+        grid_start, grid_end = self.indices
+        overflow = 0
+        undersampling = 0
+        for states, grid_segment in zip(adapted_states, grid_array[grid_start:grid_end + 1]):
+            bin_fp += self._binary_bin(states, grid_segment[1])
+            overflow += self._get_segment_overflow(states, grid_segment)
+            undersampling += self._get_segment_undersampling(states, grid_segment)
+        self.overflow = overflow
+        self.undersampling = undersampling / sum(adapted_states)
+        return bin_fp
+
+    def _calculate_byte_representation(self, bin_fp: str):
         byte_fp = bitarray(bin_fp).tobytes().hex()
-        return [grid_start, grid_index], byte_fp
+        return byte_fp
+
+    def _compress_group(self, count: int, char: str):
+        representative_char = 't' if char == '1' else 'f'
+        return str(count) + representative_char
+
+    def _calculate_bit_fingerprint(self, binned_energies: np.ndarray, binned_states: np.ndarray, grid: Grid):
+        """
+        Calculate bit representation of DOS fingerprint.
+        """
+        _, adapted_states = self._adapt_energy_bin_sizes(binned_energies, binned_states, grid)
+        bin_fp = self._calc_bit_vector(adapted_states, grid)
+        self.filling_factor = bin_fp.count('1') / len(bin_fp)
+        return bin_fp
+
+    def _get_segment_overflow(self, states, grid_segment):
+        """
+        Calculate the number of states that lies beyond the region described by a grid segment.
+        Args:
+            states: float: states in the bins declared in `energy_bins`
+            grid_segment: Grid: description of the DOS discretization
+        Returns:
+            segment_overflow: float: number of states that can be described by the grid segment
+        """
+        delta_states = states - grid_segment[1][-1] 
+        segment_overflow = delta_states if delta_states >= 0 else 0
+        return  segment_overflow
+    
+    def _get_segment_undersampling(self, states, grid_segment):
+        """
+        Calculate the amount of states that are in partially filled grid cells.
+        Args:
+            states: float: states in the bins declared in `energy_bins`
+            grid_segment: Grid: description of the DOS discretization
+        Returns:
+            segment_undersampling: float: sum of states that are in partially filled grid cells
+
+        """
+        delta_states = 0
+        for idx in range(len(grid_segment[1]) - 1):
+            if grid_segment[1][idx] < states < grid_segment[1][idx+1]:
+                delta_states = states - grid_segment[1][idx]
+                break
+        return delta_states
+
+    def _compress_binary_fingerprint_string(self, fingerprint_string: str):
+        compressed_string = ''
+        for char, count in groupby(fingerprint_string):
+            compressed_string += self._compress_group(len(list(count)), char)
+        return compressed_string
+
+    def _expand_fingerprint_string(self, compressed_fingerprint_string):
+        current_string = ''
+        decompressed_string = ''
+        for char in compressed_fingerprint_string:
+            if char.isnumeric():
+                current_string += char 
+            else:
+                number_to_add = '1' if char == 't' else '0'
+                decompressed_string += number_to_add * int(current_string)
+                current_string = ''
+        return decompressed_string
\ No newline at end of file
diff --git a/nomad_dos_fingerprints/__init__.py b/nomad_dos_fingerprints/__init__.py
index 678b38f..5cd741b 100644
--- a/nomad_dos_fingerprints/__init__.py
+++ b/nomad_dos_fingerprints/__init__.py
@@ -1,3 +1,3 @@
-from .DOSfingerprint import DOSFingerprint
-from .grid import Grid
-from .similarity import tanimoto_similarity
\ No newline at end of file
+from nomad_dos_fingerprints.DOSfingerprint import DOSFingerprint # noqa: F401
+from nomad_dos_fingerprints.grid import Grid # noqa: F401
+from nomad_dos_fingerprints.similarity import tanimoto_similarity # noqa: F401
\ No newline at end of file
diff --git a/nomad_dos_fingerprints/grid.py b/nomad_dos_fingerprints/grid.py
index 57633c6..38c90e9 100644
--- a/nomad_dos_fingerprints/grid.py
+++ b/nomad_dos_fingerprints/grid.py
@@ -1,63 +1,382 @@
+from typing import Callable, List, Set
 import numpy as np
+from functools import partial
+
+class NotCreatedError(Exception):
+    """
+    Empty container for naming the error occuring when the Grid was initialized, but not created.
+    """
+    pass
 
 class Grid():
+
+    """
+    Grid object used to discretize DOS spectra for the generation of DOS fingerprints.
+    To obtain a Grid() object, create it with:
+        grid = Grid.create(**parameters)    
+    """
     
     @staticmethod
-    def create(grid_id = None, grid_type = 'dg_cut', mu = -2, sigma = 7, cutoff = (-10,5), num_bins = 56, original_stepsize = 0.05):
-        if grid_id == None:
+    def create(grid_id = None,
+               grid_type = "nonuniform", 
+               e_ref = -2,
+               delta_e_min = 0.05, 
+               delta_e_max = 1.05, 
+               delta_rho_min = 0.5, 
+               delta_rho_max = 5.5, 
+               width = 7, 
+               n_pix = 56, 
+               cutoff = [-8,7],
+               energy_discretization = None,
+               states_discretization = None):
+        """
+        Create grid object. 
+
+        There are two options to initialize the grid: 
+            1. Initialize from grid id: The grid id, which is a string that contains all grid parameters, is used to set the parameters of the grid.
+            2. Initialize from parameters: The grid is created directly from the parameters passed as key-word arguments.
+
+        **Parameters**
+
+        grid_id: *str* or *None*
+            Identifier to re-create a grid, or None, if new or default parameters are given.
+
+        grid_type: *str*
+            Currently two options are available:
+                * *nonuniform* - Create a non-uniform grid from parameters
+                * *uniform* - Create an uniform grid from parameters
+                * *custom* - Create a custom grid from lists defining the limits of the grid
+            
+        e_ref: float
+            Reference energy in eV, i.e. the center of the grid.
+
+        delta_e_min: float
+            Minimal energy difference in eV between two grid points.
+
+        delta_e_max: float
+            Maximal energy difference in eV between two grid points.
+            Ignored if *uniform* grid is created.
+        
+        delta_rho_min: float
+            Minimal difference of states in an energy interval.
+        
+        delta_rho_max: float
+            Maximal difference of states in an energy interval.
+            Ignored if *uniform* grid is created.
+        
+        width: float
+            Width of the feature region of the Grid in eV.
+            Ignored if *uniform* grid is created.
+
+        n_pix: int
+            Number of intervals to discretize states in an energy interval.
+        
+        cutoff: (float, float)
+            Lowest and highes energies in eV to be included in the grid.
+
+        energy_discretization: list[float]
+            Used by *custom* grid - ignored else.
+            Values of energy intervals of the grid, in eV.
+
+        states_discretization: list[float]
+            Used by *custom* grid - ignored else.
+            Maximal number of states for each energy interval of the grid.       
+
+        **Returns**
+
+        self: *Grid*
+            The grid object with set grid parameters.
+
+        **Raises**
+
+        TypeError:
+            Grid type can not be identified (neither "nonuniform", nor "uniform", nor "custom").
+
+        ValueError:
+            Grid type can no be identified.
+        """
+        if grid_id is None:
             self = Grid()
             self.grid_type = grid_type
-            self.mu = mu
-            self.sigma = sigma
-            self.cutoff = cutoff
-            self.num_bins = num_bins
-            self.original_stepsize = original_stepsize
+            self.e_ref = e_ref
+            self.delta_e_min = delta_e_min
+            self.delta_e_max = delta_e_max
+            self.delta_rho_min = delta_rho_min
+            self.delta_rho_max = delta_rho_max
+            self.width = width
+            self.cutoff_min = cutoff[0]
+            self.cutoff_max = cutoff[1]
+            self.n_pix = n_pix
+            self.energy_discretization = energy_discretization
+            self.states_discretization = states_discretization
             self.grid_id = self.get_grid_id()
         else:
-            self = Grid().create(**Grid().resolve_grid_id(grid_id))
+            self = Grid.create(**Grid.resolve_grid_id(grid_id))
         return self
 
     def get_grid_id(self):
-        id = ':'.join([self.grid_type, str(self.num_bins), str(self.mu), str(self.sigma), str(self.cutoff)])
-        return id
+        self._check_created()
+        if self.grid_type == "nonuniform":
+            grid_id_values = [self.grid_type, self.e_ref, self.delta_e_min, self.delta_e_max, self.delta_rho_min, self.delta_rho_max, self.width, self.cutoff_min, self.cutoff_max, self.n_pix]
+        elif self.grid_type == "uniform":
+            grid_id_values = [self.grid_type, self.e_ref, self.delta_e_min, self.delta_rho_min, self.cutoff_min, self.cutoff_max, self.n_pix]
+        elif self.grid_type == "custom":
+            if self.energy_discretization is None:
+                raise ValueError("Must provide valid energy discretization for custom fingerprint.")
+            if self.states_discretization is None:
+                raise ValueError("Must provide valid states discretization for custom fingerprint.")
+            from hashlib import md5
+            all_values = [self.n_pix, self.e_ref]
+            all_values = np.append(all_values, self.energy_discretization)
+            all_values = np.append(all_values, self.states_discretization)
+            all_values = "".join([str(value) for value in all_values])
+            return f"custom:{md5(all_values.encode()).hexdigest()}"
+        else:
+            raise ValueError("Grid type can not be identified")
+        grid_id = ':'.join([str(value) for value in grid_id_values])
+        return grid_id
 
     @staticmethod
-    def resolve_grid_id(grid_id):
-        grid_type, num_bins, mu, sigma, cutoff = grid_id.split(':')
-        return {'grid_type' : grid_type, 'num_bins' : int(num_bins), 'mu' : float(mu), 'sigma' : float(sigma), 'cutoff' : tuple([float(x) for x in cutoff[1:-1].split(',')])}
-
-    def grid(self):
-        if self.grid_type != 'dg_cut':
-            raise NotImplementedError('Currently, only the grid dg_cut is implemented.')
-        asc = 0
-        desc = 0
-        x_grid = [0]
-        while (asc is not None) or (desc is not None):
-            if asc is not None:
-                asc += self._step_sequencer(asc, self.mu, self.sigma, self.original_stepsize) * self.original_stepsize
-                x_grid = x_grid + [round(asc, 8)]
-                if asc > self.cutoff[1]:
-                    asc = None
-            if desc is not None:
-                desc -= self._step_sequencer(desc, self.mu, self.sigma, self.original_stepsize) * self.original_stepsize
-                x_grid = [round(desc, 8)] + x_grid
-                if desc < self.cutoff[0]:
-                    desc = None
+    def resolve_grid_id(grid_id: str) -> dict:
+        grid_variables = {}
+        grid_id_variables = grid_id.split(':')
+        if grid_id_variables[0] == "nonuniform":
+            grid_variables["grid_type"] = grid_id_variables[0] 
+            grid_variables["e_ref"] = float(grid_id_variables[1])
+            grid_variables["delta_e_min"] = float(grid_id_variables[2])
+            grid_variables["delta_e_max"] = float(grid_id_variables[3])
+            grid_variables["delta_rho_min"] = float(grid_id_variables[4])
+            grid_variables["delta_rho_max"] = float(grid_id_variables[5])
+            grid_variables["width"] = float(grid_id_variables[6])
+            grid_variables["cutoff"] = [float(grid_id_variables[7])]
+            grid_variables["cutoff"].append(float(grid_id_variables[8]))
+            grid_variables["n_pix"] = int(grid_id_variables[9])
+        elif grid_id_variables[0] == "uniform":
+            grid_variables["grid_type"] = grid_id_variables[0]
+            grid_variables["e_ref"] = float(grid_id_variables[1])
+            grid_variables["delta_e_min"] = float(grid_id_variables[2])
+            grid_variables["delta_rho_min"] = float(grid_id_variables[3])
+            grid_variables["cutoff"] = [float(grid_id_variables[4])]
+            grid_variables["cutoff"].append(float(grid_id_variables[5]))
+            grid_variables["n_pix"] = int(grid_id_variables[6])
+        elif grid_id_variables[0] == "custom":
+            raise TypeError("Custom grid id can not be resolved.")
+        else:
+            raise ValueError("Grid type can not be identified")
+        return grid_variables
+
+    def grid(self) -> list:
+        """
+        Generate a grid with parameters set by `self.grid_id`.
+
+        **Returns**
+
+        grid: *list*
+            Nested list of type [[<grid_point_energy_i>, [<dos_bins_i>]], ...]
+        """
+        self._check_created()
+        if self.grid_type == "nonuniform":
+            grid = self._grid_non_uniform(self.delta_e_min, self.delta_e_max, self.delta_rho_min, self.delta_rho_max, self.width, self.n_pix, [self.cutoff_min, self.cutoff_max])
+        elif self.grid_type == "uniform":
+            grid = self._grid_uniform(self.delta_e_min, self.delta_rho_min, self.n_pix, [self.cutoff_min, self.cutoff_max])
+        elif self.grid_type == "custom":
+            grid = self._grid_custom(self.energy_discretization, self.states_discretization, self.n_pix)
+        else:
+            raise ValueError("Grid type can not be identified")
+        return grid
+
+    def copy(self) -> object:
+        """
+        Create a copy of self.
+        """
+        return Grid.create(grid_id=self.get_grid_id())
+
+    def _grid_non_uniform(self, delta_e_min, delta_e_max, delta_rho_min, delta_rho_max, width, n_pix, cutoff):
+        """
+        New implementation of the grid. Follows the description in the publication.
+        """
+
+        f_energies = partial(self.gauss_function, w_min = delta_e_min, w_max = delta_e_max, sigma = width) 
+
+        energies = self.energy_intervals_from_function(f_energies, delta_e_min, cutoff)
+
+        f_heights = partial(self.gauss_function, w_min = delta_rho_min, w_max = delta_rho_max, sigma = width) 
+
+        max_heights = self.grid_height_from_function(f_heights, energies, delta_rho_min)
+
+        grid = self.grid_from_lists(energies, max_heights, n_pix)
+
+        return grid
+
+    def _grid_uniform(self, delta_e, delta_rho, n_pix, cutoff):
+
+        def f_energies(x):
+            return 1
+
+        energies = self.energy_intervals_from_function(f_energies, delta_e, cutoff)
+
+        max_heights = self.grid_height_from_function(f_energies, energies, delta_rho)
+
+        grid = self.grid_from_lists(energies, max_heights, n_pix)
+
+        return grid
+        
+    def _grid_custom(self, energy_discretization, states_discretization, n_pix):
+        return self.grid_from_lists(energy_discretization, states_discretization, n_pix)
+
+    def grid_from_lists(self, energies: List[float], max_heights: List[float], n_bins: int) -> List[List[float]]:
+        """
+        Generate a grid from lists of energies, maximal heights, and the number of bins.
+
+        **Arguments**
+
+        energies: *list*
+            List of energy boundaries, aligned at the negative edge of the energy interval for energy bin.
+
+        max_heights: *list*
+            List of maximal heights, defined for each energy boundary in `energies`.
+
+        n_bins: *int*
+            Number of bins that is used to discretise each energy interval.
+        
+        **Returns**
+
+        grid: *list*
+            Nested list of type [[<grid_point_energy_i>, [<dos_bin_ij>, ...]], ...]
+        """
+        # avoid unexpected behaviour
+        assert len(energies) == len(max_heights), "Number of energy intervals is not equal to the number of maximal heights for each energy."
+
+        # generate grid
         grid = []
-        for item in x_grid:
-            bins = []
-            bin_height = self._step_sequencer(item, self.mu, self.sigma, original_stepsize=0.1) / (self.num_bins * 2.)
-            for idx in range(1, self.num_bins + 1):
-                bins.append(bin_height * idx)
-            grid.append([item, bins])
+        for energy, max_height in zip(energies, max_heights):
+            bin_height = max_height / n_bins
+            grid.append([energy, [round(idx * bin_height, 10) for idx in range(1, n_bins + 1)]])
         return grid
 
-    def _gauss(self, x, mu, sigma, normalized=True):
-        coefficient = (np.sqrt(2 * np.pi) * sigma)
-        value = np.exp((-0.5) * ((x - mu) / (sigma * 1.)) ** 2)
-        if normalized:
-            value = value / (coefficient * 1.)
-        return value
+    def get_grid_indices_for_energy_range(self, energy: List[float]) -> Set[int]:
+        """
+        Compute the grid indices that match a given energy range.
+
+        **Arguments**
+
+            energy: *list*
+            Energies to match grid energies.
+
+        **Returns**
+
+            grid_start, grid_end: set
+            First and last index of the grid that are in the limits of the energies.
+        """
+        grid_energies = [x[0] for x in self.grid()]
+        energy = [e for e in energy if (e >= grid_energies[0] and e <= grid_energies[-1])]
+        for idx, grid_e in enumerate(grid_energies):
+            if grid_e >= energy[0]:
+                grid_start = idx if idx >= 0 else 0
+                break
+        for idx, grid_e in reversed(list(enumerate(grid_energies))):
+            if grid_e <= energy[-1]:
+                grid_end = idx  
+                break
+        return grid_start, grid_end
+
+    def energy_intervals_from_function(self, function: Callable, minimal_interval: float, energy_limits: List[float]) -> list:
+        """
+        Generate a set of energy intervals from a given function. The energy intervals are generated as:
+
+        e_i = \\sum_{j=0}^{i-1} * minimal_interval * function(e_j)
+
+        and 
+
+        e_{-i} = -1 * e_i
+
+        **Arguments**
+
+        function: *Callable*
+            function that maps an energy to a number greater or equal 1
+
+        minimal_interval: *float*
+            minimal interval width, i.e. minimal distance between two intervals
+
+        energy_limits: *List[float]*
+            list [<minimal_energy>, <maximal_energy>] that determines the limits between which energy intervals should be calculated
+        """
+        # assumes the spectrum is shifted to the reference energy, which is then located at e = 0
+        energies = [0]
+        
+        # we need the maximum absolute limit to generate intervals
+        max_limit = max([abs(lim_) for lim_ in energy_limits])
+        
+        while energies[-1] < max_limit:
+            
+            # make sure the minimal interval is kept
+            next_step = max([function(energies[-1]), 1])
+            
+            # get new interval width
+            next_step *= minimal_interval
+
+            # apply to series
+            next_step += energies[-1]
+
+            # increase numerical stability
+            next_step = np.round(next_step, 8)
+
+            # update
+            if next_step <= max_limit:
+                energies.append(next_step)
+            else:
+                break
+        
+        # generate negative indices
+        full_set = [-1 * energy for energy in energies[1:] if energy <= abs(energy_limits[0])]
+        
+        # filter positive energies to match limits, and merge negative and positive indices
+        full_set.extend(list(filter(lambda x: x <= energy_limits[1], energies)))
+
+        # make sure the sorting is correct
+        full_set.sort()
+
+        return full_set
+
+    def grid_height_from_function(self, function: Callable, energies: list, minimal_height: float) -> list:
+        """
+        Use provided function to calculate the grid height at each energy.
+
+        **Arguments**
+
+        function: *Callable*
+            Function that assigns an integer number to an energy value
+
+        energies: *list*
+            List of energies for which the bin heights are calculated
+
+        minimal_height: *float*
+            Minimal height of a bin
+        """
+        return [max([function(energy), 1]) * minimal_height for energy in energies]
+
+    def gauss_function(self, x: float, w_min: float, w_max: float, sigma: float) -> int:
+        """
+        Function that assigns an integer value to each `x`.
+
+        **Arguments**
+
+        x: *float*
+            Input value
+
+        w_min: *float*
+            Minimal width of the interval
+
+        w_max: *float*
+            Maximal width of the interval
+
+        sigma: *float*
+            Parameter controlling the width of the interal gaussian function
+        """
+        g = (1 - np.exp(-0.5 * (x / sigma)**2))
+        value = g * (w_max / w_min - 1) + 1
+        return int(value)
 
-    def _step_sequencer(self, x, mu, sigma, original_stepsize):
-        return int(round((1 + original_stepsize - self._gauss(x, mu, sigma, normalized=False)) / original_stepsize, 9))
+    def _check_created(self):
+        if not hasattr(self, "grid_type"):
+            raise NotCreatedError("Grid was not created. To do so, use Grid.create(**parameters).")
\ No newline at end of file
diff --git a/nomad_dos_fingerprints/plotting.py b/nomad_dos_fingerprints/plotting.py
index b606457..3f9bd68 100644
--- a/nomad_dos_fingerprints/plotting.py
+++ b/nomad_dos_fingerprints/plotting.py
@@ -1,15 +1,52 @@
+from typing import Union, Iterable
+
 import matplotlib.pyplot as plt
-from bitarray import bitarray
-
-def plot_FP_in_grid(byte_fingerprint, grid, show = True, label = '', axes = None, **kwargs):
-    x=[]
-    y=[]
-    all_width=[]
-    bin_fp=bitarray()
-    bin_fp.frombytes(bytes.fromhex(byte_fingerprint.bins))
-    grid_indices=byte_fingerprint.indices
-    plotgrid=grid.grid()
+
+from nomad_dos_fingerprints import DOSFingerprint, Grid
+
+def _apply_grid_offset(grid_array: list, offset: float):
+    for idx in range(len(grid_array)):
+        grid_array[idx][0] += offset
+    return grid_array
+
+def plot_fingerprint_in_grid(fingerprint: DOSFingerprint, 
+                             show: bool = True, 
+                             label: str = '', 
+                             axes: Union[plt.Axes, None] = None, **kwargs) -> None:
+    """
+    Plot a fingerprint in its grid representation.
+
+    **Arguments:**
+
+    fingerprint: `nomad_dos_fingerprints.DOSFingerprint`
+        Fingerprint to be plotted
+
+    **Keyword arguments:**
+
+    show: `bool`
+        Show the plot.
+
+        default = `True`
+    
+    label: `str`
+        Label for the plot. Labels are required for legends in plots.
+
+        default: `''`
+
+    axes: `matplotlib.pyplot.Axes` or `None`
+        Axes to draw fingerprint on. If `None`, create new axes.
+
+        default: `None`
+    
+    Additional keyword arguments are passed to matplotlib.pyplot.bar(*, **kwargs).
+    """
+    x, y, all_width = [], [], []
+    bin_fp=fingerprint.get_bitarray()
+    grid_indices=fingerprint.indices
+    grid = Grid.create(grid_id=fingerprint.grid_id)
+    plotgrid=_apply_grid_offset(grid.grid(), grid.e_ref)
     plotgrid=plotgrid[grid_indices[0]:grid_indices[1]]
+
     bit_position=0
     for index,item in enumerate(plotgrid):
         if index<len(plotgrid)-1:
@@ -22,9 +59,178 @@ def plot_FP_in_grid(byte_fingerprint, grid, show = True, label = '', axes = None
                 y.append(dos_value)
                 all_width.append(width)
             bit_position+=1
-    if axes == None:
+    if axes is None:
         plt.bar(x,y,width=all_width,align='edge', label = label, **kwargs)
     else:
         axes.bar(x,y,width=all_width,align='edge', label = label, **kwargs)
     if show:
         plt.show()
+
+def plot_horizontal_lines(grid: Grid, 
+                          plot_style: dict = {'c':'k'}, 
+                          axes: Union[plt.Axes, None] = None, 
+                          only_top: bool = False) -> None:
+    """
+    Plot horizontal lines of a grid.
+
+    **Arguments:**
+
+    grid: `nomad_dos_fingerprints.Grid`
+        Grid to plot.
+
+    **Keyword arguments:**
+    
+    plot_style: `dict`
+        Style of grid lines
+
+        default: `{'c' : 'k'}` 
+
+    axes: `matplotlib.pyplot.Axes` or `None`
+        Axes to draw on. If `None`, create new axes.
+
+        default: `None`
+
+    only_top: `bool`
+        Show only the highest line
+
+        default: `False`
+    """
+    grid_array = _apply_grid_offset(grid.grid(), grid.e_ref)
+    if axes is None:
+        axes = plt.gca()
+    e_, r_ = grid_array[0]
+    for idx, entry in enumerate(grid_array[1:]):
+        e, rhos = entry
+        if list(rhos) == list(r_):
+            continue
+        elif rhos[-1] > r_[-1]: # increasing bin height
+            if only_top:
+                axes.plot([e_, e], [max(r_), max(r_)], **plot_style)
+            else:
+                for rho in r_:
+                    axes.plot([e_, e], [rho, rho], **plot_style)
+            e_, r_ = entry
+        elif rhos[-1] < r_[-1]: # decreasing bin height
+            if only_top:
+                axes.plot([e_, grid_array[idx][0]], [max(r_), max(r_)], **plot_style)
+            else:
+                for rho in r_:
+                    axes.plot([e_, grid_array[idx][0]], [rho, rho], **plot_style)
+            e_, r_ = grid_array[idx][0], rhos # energy of the *last* bin
+    if only_top:
+        axes.plot([e_, e], [max(r_), max(r_)], **plot_style)
+    else:
+        for rho in r_:
+            axes.plot([e_, e], [rho, rho], **plot_style)
+
+
+def plot_vertical_lines(grid: Grid, 
+                        plot_style: dict = {'c':'k'}, 
+                        axes: Union[plt.Axes, None] = None) -> None:
+    """
+    Plot horizontal lines of a grid.
+
+    **Arguments:**
+
+    grid: `nomad_dos_fingerprints.Grid`
+        Grid to plot.
+
+    **Keyword arguments:**
+    
+    plot_style: `dict`
+        Style of grid lines
+
+        default: `{'c' : 'k'}` 
+
+    axes: `matplotlib.pyplot.Axes` or `None`
+        Axes to draw on. If `None`, create new axes.
+
+        default: `None`
+    """
+    grid_array = _apply_grid_offset(grid.grid(), offset=grid.e_ref)
+    if axes is None:
+        axes = plt.gca()
+    for e, rhos in grid_array:
+        axes.plot([e,e], [0,max(rhos)], **plot_style)
+
+def plot_grid(grid: Grid, 
+              vertical: bool = True, 
+              horizontal: bool = True, 
+              horizontal_only_top: bool = False, 
+              plot_style: dict = {'c':'k', 'linewidth' : 0.5}, 
+              figure: bool = True, 
+              figsize: tuple = (15,10), 
+              limits: Union[Iterable, None] = None, 
+              show: bool = True, 
+              axes:  Union[plt.Axes, None] = None) -> None:
+    """
+    Plot a Grid.
+
+    **Arguments:**
+
+    grid: `nomad_dos_fingerprints.Grid`
+        Grid to plot
+
+    **Keyword arguments:**
+
+    vertical: `bool`
+        Plot vertical grid lines
+
+        default: `True`
+
+    horizontal: `bool`
+        Plot horizontal grid lines
+
+        default: `True`
+    
+    horizontal_only_top: `bool`
+        Plot only uppermost horizontal grid lines
+
+        default: `False`
+
+    plot_style: `dict`
+        Style of grid lines
+
+        default: `{'c' : 'k'}` 
+
+    figure: `bool`
+        Create figure
+
+        default: `True`
+
+    figsize: `tuple`
+        Size of create figure,
+        ignored if `figure` is `False`.
+
+        default: `(15,10)`
+    
+    limits: `list` of `float` or `None`
+        if not `None`, contains the axis limits:
+        `[x_min, x_max, y_min, y_max]`
+
+        default: `None`
+
+    show: `bool`
+        Show the plot.
+
+        default = `True`
+
+    axes: `matplotlib.pyplot.Axes` or `None`
+        Axes to draw on. If `None`, create new axes.
+
+        default: `None`
+
+    """
+    if figure:
+        plt.figure(figsize=figsize)
+    if axes is None:
+        axes = plt.gca()
+    if vertical:
+        plot_vertical_lines(grid, plot_style=plot_style, axes = axes)
+    if horizontal:
+        plot_horizontal_lines(grid, plot_style=plot_style, axes = axes, only_top=horizontal_only_top)
+    if limits is not None:
+        axes.set_xlim(*limits[:2])
+        axes.set_ylim(*limits[2:])
+    if show:
+        plt.show()
diff --git a/nomad_dos_fingerprints/similarity.py b/nomad_dos_fingerprints/similarity.py
index b68a88a..a865ca9 100644
--- a/nomad_dos_fingerprints/similarity.py
+++ b/nomad_dos_fingerprints/similarity.py
@@ -1,14 +1,17 @@
-import numpy as np
+from typing import Tuple
 from bitarray import bitarray
 
-def match_fingerprints(fingerprint1, fingerprint2):
+class FingerprintMismatchError(Exception):
+
+    def __init__(self, *args: object) -> None:
+        super().__init__(*args)
+
+def match_fingerprints(fingerprint1, fingerprint2) -> Tuple[bitarray, bitarray]:
     if fingerprint1.grid_id != fingerprint2.grid_id:
-        raise AssertionError('Can not calculate similarity of fingerprints that have been calculated with different grids.')
-    num_bins = int(fingerprint1.grid_id.split(':')[1])
-    fp1 = bitarray()
-    fp2 = bitarray()
-    fp1.frombytes(bytes.fromhex(fingerprint1.bins))
-    fp2.frombytes(bytes.fromhex(fingerprint2.bins))
+        raise AssertionError(f'Can not calculate similarity of fingerprints that have been calculated with different grids.\nFingerprint 1: {fingerprint1.grid_id}\nFingerprint 2: {fingerprint2.grid_id}')
+    num_bins = fingerprint1.n_state_bins
+    fp1 = fingerprint1.get_bitarray()
+    fp2 = fingerprint2.get_bitarray()
     start_index = max([fingerprint1.indices[0], fingerprint2.indices[0]])
     stop_index = min([fingerprint1.indices[1], fingerprint2.indices[1]])
     # find offsets
@@ -16,12 +19,33 @@ def match_fingerprints(fingerprint1, fingerprint2):
     dsp2 = (start_index - fingerprint2.indices[0]) * num_bins
     dep1 = (fingerprint1.indices[1] - stop_index) * num_bins
     dep2 = (fingerprint2.indices[1] - stop_index) * num_bins
-    fp1 = fp1[dsp1:len(fp1) - 1 - dep1]
-    fp2 = fp2[dsp2:len(fp2) - 1 - dep2]
+    fp1 = fp1[dsp1:len(fp1) - dep1]
+    fp2 = fp2[dsp2:len(fp2) - dep2]
     return fp1, fp2
 
 def tanimoto_similarity(fingerprint1, fingerprint2):
+    """
+    Tanimoto similarity (Tc) between `DOSFingerprint` objects.
+
+    Evaluates:
+
+    $Tc(a, b) = a * b / ( a**2 + b**2 - a * b )$
+
+    for binary-valued fingerprint vectors $a$ and $b$.
+
+    Before calculating Tc, the fingerprints are matched such that the same energy 
+    regions are descibed in both fingerprints.
+    
+    If any fingerprint is the null-vector (e.g. the energy regions do not overlap), Tc = 0.
+
+    **Returns:**
+
+    tc: `float`
+        Tc between both fingerprints.
+    """
     fp1, fp2 = match_fingerprints(fingerprint1, fingerprint2)
+    if len(fp1) != len(fp2):
+        raise FingerprintMismatchError(f"Can not match fingerprints of len {len(fp1)} and {len(fp2)}.")
     a = fp1.count()
     b = fp2.count()
     c = (fp1 & fp2).count()
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..fa65306
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,33 @@
+[build-system]
+requires = ["setuptools >= 61.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name="nomad_dos_fingerprints"
+version="2.0.0"
+description = "Fingerprints of the electronic density of states of materials."
+readme = {file = "README.md", content-type = "text/markdown"}
+authors = [
+    {name="Martin Kuban et al."}
+]
+maintainers = [
+    {name="Martin Kuban", email="kuban@physik.hu-berlin.de"}
+]
+requires-python = ">= 3.7"
+dependencies = [
+    "numpy >= 1.22.2",
+    "bitarray == 2.4.1",
+    "matplotlib == 3.5.1"
+
+]
+license = {file = "LICENSE"}
+
+[project.optional-dependencies]
+tests = ['pytest']
+
+[project.urls]
+Repository = "https://github.com/kubanmar/dos-fingerprints"
+
+[tool.setuptools.packages.find]
+include=["nomad_dos_fingerprints"]
+exclude=["tests"]
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 9955dec..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-pytest
-pytest-cov
diff --git a/setup.py b/setup.py
index 2b699de..b908cbe 100644
--- a/setup.py
+++ b/setup.py
@@ -1,17 +1,3 @@
 import setuptools
 
-with open("README.md", "r") as fh:
-    long_description = fh.read()
-
-setuptools.setup(
-    name="nomad_dos_fingerprints",
-    version="1.0",
-    author="Martin Kuban",
-    author_email="kuban@physik.hu-berlin.de",
-    description="An implementation of DOS fingerprints for the NOMAD Laboratory.",
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    url="https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-dos-fingerprints",
-    install_requires=['numpy', 'bitarray'],
-    packages=['nomad_dos_fingerprints']
-)
+setuptools.setup()
diff --git a/tests/test_DOSfingerprint.py b/tests/test_DOSfingerprint.py
index bc6ed94..e47780a 100644
--- a/tests/test_DOSfingerprint.py
+++ b/tests/test_DOSfingerprint.py
@@ -1,23 +1,31 @@
-import pytest
 import numpy as np
+import pytest
 
-from nomad_dos_fingerprints import DOSFingerprint, tanimoto_similarity
+from nomad_dos_fingerprints import tanimoto_similarity
+from nomad_dos_fingerprints import DOSFingerprint, Grid
 from nomad_dos_fingerprints.DOSfingerprint import ELECTRON_CHARGE
 
 def test_integrate_to_bins():
-    test_data_x = np.linspace(0, np.pi, num = 1000)
-    test_data_y = [np.sin(x) for x in test_data_x]
-    fp = DOSFingerprint(stepsize=0.001)
-    energies, dos = fp._integrate_to_bins(test_data_x, test_data_y)
-    energies_test_data = []
-    current_energy = 0
+
+    def get_area_below_curve(x, func, fp):
+        y = [func(x_) for x_ in x]
+        _, dos = fp._integrate_to_bins(x, y, stepsize=0.001)
+        return sum(dos)
+
+    def dummy_function(x):
+        return np.sin(x)
+    
+    test_data_x = np.linspace(0, np.pi, num = 10000)
+    test_data_y = [dummy_function(x) for x in test_data_x]
+    fp = DOSFingerprint()
+    energies, dos = fp._integrate_to_bins(test_data_x, test_data_y, stepsize=0.01)
     assert 0 in energies
-    while current_energy < np.pi - fp.stepsize:
-        energies_test_data.append(current_energy)
-        current_energy += fp.stepsize
-    assert np.isclose(sum(dos), 2)
+    assert np.isclose(get_area_below_curve([-0.0009,1.0009], lambda _: 1, fp,), 1, atol=1e-12), "integration area was not cut correctly"
+    assert np.isclose(sum(dos), 2, atol=1e-3), "Calculated area is not correct"
+    assert np.isclose(get_area_below_curve(np.linspace(-np.pi, 0, num = 10000), dummy_function, fp), -2, atol=1e-4), "Calculated area is not correct"
     assert len(energies) == len(dos)
-    assert (np.isclose(energies, np.array(energies_test_data))).all()
+    for idx, e in enumerate(energies):
+        assert np.isclose(e, idx * 0.01), "Error in calculating integration intervals"
 
 def test_convert_dos():
     test_data_x = np.arange(1, 5, step = 0.01)
@@ -33,7 +41,66 @@ def test_convert_dos():
 def test_serialization():
     test_data_x = np.linspace(0, np.pi, num = 1000)
     test_data_y = [np.sin(x) for x in test_data_x]
-    fp = DOSFingerprint(stepsize=0.001).calculate([x * ELECTRON_CHARGE for x in test_data_x], [[x / ELECTRON_CHARGE for x in test_data_y]])
+    fp = DOSFingerprint(stepsize=0.001).calculate(test_data_x, test_data_y)
+    print(fp)
     fp_json = fp.to_dict()
     fp_again = DOSFingerprint().from_dict(fp_json)
     assert tanimoto_similarity(fp, fp_again) == 1
+
+def test_adapt_energy_bin_sizes():
+    fp = DOSFingerprint()   
+    dummy_energy, dummy_dos = fp._integrate_to_bins(np.arange(-10,6,1), np.ones(16)) 
+    e, d = fp._adapt_energy_bin_sizes(dummy_energy, dummy_dos, Grid.create(grid_id = fp.grid_id))
+    grid = Grid.create(grid_id = fp.grid_id)
+    grid_start, grid_end = grid.get_grid_indices_for_energy_range(dummy_energy)
+    grid_array = grid.grid()
+    # energy grid points are the same as in the Grid
+    assert e[0] == grid_array[grid_start][0]
+    assert e[-1] == grid_array[grid_end-1][0] # the last grid energy is the final border
+    # the test case is integrated correctly
+    cut_grid_array = grid_array[grid_start:grid_end+1] # +1: inclusion of the last point for "manual" integration below
+    reference = [(cut_grid_array[idx+1][0] - cut_grid_array[idx][0]) for idx in range(len(cut_grid_array) -1)] 
+    assert np.allclose(d, reference)
+    # misc
+    assert (grid_start, grid_end - 1) == grid.get_grid_indices_for_energy_range([np.round(x, 5) for x in e]) # e is 1 block shorter due to summation
+
+def test_calc_grid_indices():
+    fp = DOSFingerprint()   
+    grid = Grid.create(grid_id = fp.grid_id)
+    dummy_energy, dummy_dos = fp._integrate_to_bins(np.arange(-10,6,1), np.ones(16)) 
+    e, _ = fp._adapt_energy_bin_sizes(dummy_energy, dummy_dos, Grid.create(grid_id = fp.grid_id))
+    indices = fp._calc_grid_indices(e, grid)
+    assert indices == fp.indices
+    assert indices == list(grid.get_grid_indices_for_energy_range(e))
+
+def test_calc_bit_vector():
+    grid = Grid.create()
+    fp = DOSFingerprint(grid_id = grid.get_grid_id())
+    grid_array = grid.grid()
+    fp.indices = [0, len(grid_array)-1]
+    all_ones = fp._calc_bit_vector([max(grid_column[1]) for grid_column in grid_array], grid)
+    assert all_ones == '1' * grid.n_pix * abs(fp.indices[1]+1 - fp.indices[0])
+    print(grid.n_pix/2 - 1) 
+    all_half_filled = fp._calc_bit_vector([grid_column[1][int(grid.n_pix/2 - 1)] for grid_column in grid_array], grid)
+    assert all_half_filled == ('1' * int(grid.n_pix / 2) + '0' * int(grid.n_pix / 2)) * abs(fp.indices[1]+1 - fp.indices[0]) 
+
+def test_compress_bit_string():
+    fp = DOSFingerprint()
+    compressed = fp._compress_binary_fingerprint_string("111000011")
+    assert compressed == "3t4f2t", "Compression is not correct"
+
+def test_expand_bit_string():
+    fp = DOSFingerprint()
+    expanded = fp._expand_fingerprint_string("5t3f6t")
+    assert expanded == "11111000111111", "Expandsion of compressed bins failed"
+
+def test_get_similarity():
+    x = np.round(np.linspace(0,1.1, num=1000), 8)
+    grid = Grid.create(grid_type="uniform", e_ref=0.5, cutoff=[-0.5,0.5], delta_e_min=0.01, delta_rho_min=0.015, n_pix=10)
+    fp_a = DOSFingerprint().calculate(x, [1 if x_i <= 0.75 else 0 for x_i in x], grid_id = grid.get_grid_id())
+    fp_b = DOSFingerprint().calculate(x, [1 if x_i > 0.25 else 0 for x_i in x], grid_id = grid.get_grid_id())
+    assert fp_a.get_similarity(fp_b) == 0.5, "Similarity obtained from get_similarity is wrong"
+
+@pytest.mark.skip()
+def test_get_bitarray():
+    raise NotImplementedError("TODO: Implement")
\ No newline at end of file
diff --git a/tests/test_Grid.py b/tests/test_Grid.py
index 89a426b..058b7cc 100644
--- a/tests/test_Grid.py
+++ b/tests/test_Grid.py
@@ -1,20 +1,96 @@
-import pytest, json, os
-import numpy as np
+import pytest
+import json
+import os
 
 from nomad_dos_fingerprints import Grid
+from nomad_dos_fingerprints.grid import NotCreatedError
+
+@pytest.fixture
+def grid():
+    return Grid()
 
 with open(os.path.join(os.path.dirname(__file__), 'grid_test.json'), 'r') as test_data_file:
     test_grid_data = json.load(test_data_file)
 
-def test_gen_grid_id():
-    grid = Grid().create(mu = -5, sigma = 10, grid_type = 'dg_cut', num_bins = 64, cutoff = (-15,10))
-    assert grid.get_grid_id() == 'dg_cut:64:-5:10:(-15, 10)'
+def test_grid_from_lists(grid):
+
+    expected_grid = [
+        [1, [0.5, 1.0, 1.5, 2.0]],
+        [2, [1.0, 2.0, 3.0, 4.0]],
+        [3, [2.0, 4.0, 6.0, 8.0]]
+    ]
+
+    list_grid = grid.grid_from_lists([1,2,3], [2,4,8], 4)
+    assert list_grid == expected_grid, "Got wrong grid from list"
+
+def test_energy_intervals_from_function(grid):
+
+    def test_function(value: float) -> int:
+        return int(value)
+
+    expected_output = [
+        -16, -8, -4, -2, -1, 0, 1, 2, 4, 8
+    ]
+    
+    assert grid.energy_intervals_from_function(test_function, 1, [-16,10]) == expected_output, "Wrong energy intervals calculated"
+
+def test_grid_height_from_function(grid):
+
+    def test_function(value: float) -> int:
+        return int(value)
+
+    expected_output = [
+        2, 2, 10, 8
+    ]
+
+    assert grid.grid_height_from_function(test_function, [0.1, 1, 5, 4], 2) == expected_output, "Wrong bin heights calculated"
+
+def test_gauss_function(grid):
+
+    assert [grid.gauss_function(x, 1, 4, 0.4) for x in range(-5,6)] == [4, 4, 3, 3, 3, 1, 3, 3, 3, 4, 4], "Gauss function has changed unexpectedly"
+
+def test_gen_grid_id_v2(grid):
+    grid = grid.create(grid_type = "nonuniform", e_ref = 10, delta_e_min = -2, delta_e_max = 9, delta_rho_min = 0.1, delta_rho_max = 10, width = 0.7, cutoff = [-20,30], n_pix = 2)
+    assert grid.get_grid_id() == "nonuniform:10:-2:9:0.1:10:0.7:-20:30:2", "Wrong grid id created for v2"
+
+def test_resolve_grid_id_v2(grid):
+    expected = {
+        "grid_type" : "nonuniform",
+        "e_ref" : 10,
+        "delta_e_min" : -2,
+        "delta_e_max" : 9, 
+        "delta_rho_min" : 0.1, 
+        "delta_rho_max" : 10, 
+        "width" : 0.7, 
+        "cutoff" : [-20,30], 
+        "n_pix" : 2
+    }
+    assert grid.resolve_grid_id("nonuniform:10:-2:9:0.1:10:0.7:-20:30:2") == expected, "Resolving grid id for v2 failed"
+
+def test_fails_for_not_created_grids(grid):
+    with pytest.raises(NotCreatedError):
+        grid.get_grid_id()
+
+def test_custom_grid(grid):
+    with pytest.raises(ValueError):
+        grid = grid.create(grid_type = "custom", states_discretization = [1,2,3], n_pix = 2)
+    with pytest.raises(ValueError):
+        grid = grid.create(grid_type = "custom", energy_discretization = [0,1,2], n_pix = 2)
+    grid = grid.create(grid_type = "custom", energy_discretization = [0,1,2], states_discretization = [1,2,3], n_pix = 2)
+    grid2 = grid.create(grid_type = "custom", energy_discretization = [0,1,3], states_discretization = [1,2,3])
+    grid3 = grid.create(grid_type = "custom", energy_discretization = [0,1,3], states_discretization = [1,2,3], n_pix = 2)
+
+    assert grid.grid() == [[0, [0.5,1]], [1, [1,2]], [2, [1.5, 3.0]]], "Did not create correct custom grid"
+
+    assert grid2.get_grid_id() != grid3.get_grid_id()
+
+def test_uniform_grid(grid):
+    grid = grid.create(grid_type = "uniform", delta_e_min = 1, delta_rho_min = 1, cutoff = [-1,1], n_pix = 4)
 
-def test_grid():
-    grid = Grid().create()
-    assert grid.grid() == list(test_grid_data.values())[0]
+    expected_uniform_grid = [
+        [-1, [0.25,0.5,0.75,1]],
+        [0, [0.25,0.5,0.75,1]],
+        [1, [0.25,0.5,0.75,1]]
+    ]
 
-def test_generate_grid_from_id():
-    grid = Grid().create()
-    grid_ids = Grid().create(grid_id = 'dg_cut:56:-2:7:(-10, 5)')
-    assert grid.grid() == grid_ids.grid()
\ No newline at end of file
+    assert grid.grid() == expected_uniform_grid, "Create wrong uniform grid"
\ No newline at end of file
diff --git a/tests/test_functional.py b/tests/test_functional.py
index fb0fd7b..6bb8294 100644
--- a/tests/test_functional.py
+++ b/tests/test_functional.py
@@ -1,21 +1,11 @@
-from nomad_dos_fingerprints import DOSFingerprint, tanimoto_similarity
-import pytest, os, json
+from nomad_dos_fingerprints import DOSFingerprint
+import os
+import json
 import numpy as np
 
 with open(os.path.join(os.path.dirname(__file__), 'fingerprint_generation_test_data.json'), 'r') as test_data_file:
     test_data = json.load(test_data_file)
 
-def test_fingerprint_values():
-
-    for fp, mid in test_data['fingerprints']:
-        raw_data  = test_data[mid]
-        new_fingerprint = DOSFingerprint().calculate(raw_data['dos_energies'], raw_data['dos_values'])
-        old_fingerprint = DOSFingerprint()
-        old_fingerprint.bins = json.loads(fp)['bins']
-        old_fingerprint.indices = json.loads(fp)['indices']
-        old_fingerprint.grid_id = new_fingerprint.grid_id
-        assert old_fingerprint.indices == new_fingerprint.indices
-        assert np.isclose(old_fingerprint.get_similarity(new_fingerprint),1, atol=5e-2)
 
 def test_materials_similarity():
 
@@ -23,9 +13,8 @@ def test_materials_similarity():
     similarity_matrix = test_data['simat']
     mids = [x[1] for x in fingerprints]
     raw_data = [test_data[mid] for mid in mids]
-    new_fingerprints = [DOSFingerprint().calculate(entry['dos_energies'], entry['dos_values']) for entry in raw_data]
+    new_fingerprints = [DOSFingerprint().calculate(entry['dos_energies'], entry['dos_values'], convert_data="ENC") for entry in raw_data]
     matrix = []
     for fp in new_fingerprints:
         matrix.append(fp.get_similarities(new_fingerprints))
-    print(matrix - np.array(similarity_matrix))
-    assert np.isclose(similarity_matrix, matrix, atol = 5e-2).all()
+    assert np.allclose(similarity_matrix, matrix, atol = 1e-2)
diff --git a/tests/test_similarity.py b/tests/test_similarity.py
index 9f4808b..7861d13 100644
--- a/tests/test_similarity.py
+++ b/tests/test_similarity.py
@@ -1,25 +1,46 @@
-import pytest, os, json
-from bitarray import bitarray
-from nomad_dos_fingerprints import tanimoto_similarity, DOSFingerprint, Grid
-from nomad_dos_fingerprints.DOSfingerprint import ELECTRON_CHARGE
+import os
+import json
+from nomad_dos_fingerprints import tanimoto_similarity
+from nomad_dos_fingerprints.similarity import match_fingerprints
+from nomad_dos_fingerprints.DOSfingerprint import ELECTRON_CHARGE, DOSFingerprint
+
 
 with open(os.path.join(os.path.dirname(__file__), 'fingerprint_generation_test_data.json'), 'r') as test_data_file:
     test_data = json.load(test_data_file)
 
-def test_tanimoto():
+def test_match_fingerprints():
+    
+    fp1 = DOSFingerprint()
+    fp2 = DOSFingerprint()
+    fp1.bins = fp1._compress_binary_fingerprint_string('00000000111111110000000011111111')
+    fp2.bins = fp1._compress_binary_fingerprint_string('1111111100000000')
+    grid_id = 'nonuniform:1:1:1:1:1:1:1:1:8'
+    fp1.grid_id = grid_id
+    fp2.grid_id = grid_id
+    fp1.indices = [0,3]
+    fp2.indices = [1,2]
+    print(fp1.n_state_bins, fp2.n_state_bins)
+
+    bits1, bits2 = match_fingerprints(fp1, fp2)
+
+    assert len(bits1) == len(bits2) == 16, "Fingerprints do not match"
+
+def test_tanimoto_v2():
     # generate fp-type data and check if this can be realized with binary-strings only
     fp1 = DOSFingerprint()
     fp2 = DOSFingerprint()
-    fp1.bins = bitarray('00000000111111110000000011111111').tobytes().hex()
-    fp2.bins = bitarray('1111111100000000').tobytes().hex()
-    grid_id = 'a:8:b'
+    fp1.bins = fp1._compress_binary_fingerprint_string('00000000111111110000000011111111')
+    fp2.bins = fp1._compress_binary_fingerprint_string('1111111100000000')
+    grid_id = 'nonuniform:1:1:1:1:1:1:1:1:8'
     fp1.grid_id = grid_id
     fp2.grid_id = grid_id
     fp1.indices = [0,3]
     fp2.indices = [1,2]
-    assert tanimoto_similarity(fp1, fp2) == 1
-    assert tanimoto_similarity(fp1, fp1) == 1
-    assert tanimoto_similarity(fp2, fp2) == 1
+    print(fp1.get_bitarray())
+    print(fp2.get_bitarray())
+    assert tanimoto_similarity(fp1, fp2) == 1, "Non-identical cut fingerprints"
+    assert tanimoto_similarity(fp1, fp1) == 1, "Non-identity for Fingerprint v2 NR 1"
+    assert tanimoto_similarity(fp2, fp2) == 1, "Non-identity for Fingerprint v2 NR 1"
 
 def test_matching_of_spectra():
     data = test_data["17661:2634879"]
@@ -27,7 +48,7 @@ def test_matching_of_spectra():
     cut_dos = []
     cut_energies = [e for e,d in zip(data['dos_energies'], data['dos_values'][0]) if (e / ELECTRON_CHARGE > -7.3 and e / ELECTRON_CHARGE < 2)]
     cut_dos = [d for e,d in zip(data['dos_energies'], data['dos_values'][0]) if (e / ELECTRON_CHARGE > -7.3 and e / ELECTRON_CHARGE < 2)]
-    fp = DOSFingerprint().calculate(data['dos_energies'], data['dos_values'])
-    cut_fp = DOSFingerprint().calculate(cut_energies, [cut_dos])
+    fp = DOSFingerprint().calculate(data['dos_energies'], data['dos_values'], convert_data="enc")
+    cut_fp = DOSFingerprint().calculate(cut_energies, [cut_dos], convert_data="enc")
     assert tanimoto_similarity(cut_fp, fp) == tanimoto_similarity(fp, cut_fp)
     assert 1 - tanimoto_similarity(fp, cut_fp) < 1e-2  
\ No newline at end of file
diff --git a/timing/profiling_fingerprint_generation.py b/timing/profiling_fingerprint_generation.py
new file mode 100644
index 0000000..f9dd5b4
--- /dev/null
+++ b/timing/profiling_fingerprint_generation.py
@@ -0,0 +1,36 @@
+import cProfile
+import pstats
+import io
+from pstats import SortKey
+
+import numpy as np
+
+from nomad_dos_fingerprints import DOSFingerprint, Grid
+
+x = np.linspace(-15,10, num=int(1e3))
+y = np.sin(x)**2 + 1
+
+grid_attrs={'grid_type': 'nonuniform',
+ 'e_ref': -2.0,
+ 'delta_e_min': 0.05,
+ 'delta_e_max': 1.05,
+ 'delta_rho_min': 0.1,
+ 'delta_rho_max': 5.5,
+ 'width': 7.0,
+ 'cutoff': [-8.0, 12.0],
+ 'n_pix': 2048}
+test_grid = Grid.create(**grid_attrs)
+
+calc_params={"grid_id":test_grid.get_grid_id(), "convert_data":None}
+
+pr = cProfile.Profile()
+pr.enable()
+for _ in range(5):
+    fp = DOSFingerprint().calculate(x, y, **calc_params)
+pr.disable()
+
+s = io.StringIO()
+sortby = SortKey.CUMULATIVE
+ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
+ps.print_stats()
+print(s.getvalue())