From 4bf2b4a2a7bb1ab98334f3364db9d9702f4e5417 Mon Sep 17 00:00:00 2001 From: Martin Kuban Date: Wed, 28 Feb 2024 15:51:16 +0100 Subject: [PATCH] Version 2 --- .gitignore | 9 +- LICENSE | 209 +--------- README.md | 37 +- nomad_dos_fingerprints/DOSfingerprint.py | 443 ++++++++++++++++++--- nomad_dos_fingerprints/__init__.py | 6 +- nomad_dos_fingerprints/grid.py | 409 ++++++++++++++++--- nomad_dos_fingerprints/plotting.py | 228 ++++++++++- nomad_dos_fingerprints/similarity.py | 44 +- pyproject.toml | 33 ++ requirements.txt | 2 - setup.py | 16 +- tests/test_DOSfingerprint.py | 95 ++++- tests/test_Grid.py | 100 ++++- tests/test_functional.py | 21 +- tests/test_similarity.py | 47 ++- timing/profiling_fingerprint_generation.py | 36 ++ 16 files changed, 1330 insertions(+), 405 deletions(-) create mode 100644 pyproject.toml delete mode 100644 requirements.txt create mode 100644 timing/profiling_fingerprint_generation.py diff --git a/.gitignore b/.gitignore index 8e3f5d4..1aa6b98 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ -*/__pycache__ +__pycache__ *.egg-info *.code-workspace -run_tests.bat .vscode -.coverage* \ No newline at end of file +.coverage* +.pytest_cache +notebooks +build +venvs \ No newline at end of file diff --git a/LICENSE b/LICENSE index d645695..2e0e784 100644 --- a/LICENSE +++ b/LICENSE @@ -1,202 +1,13 @@ +Copyright 2022, Martin Kuban, Santiago Rigamonti, Markus Scheidgen, and Claudia Draxl - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + http://www.apache.org/licenses/LICENSE-2.0 - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/README.md b/README.md index 77d100b..5ca0d7a 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,16 @@ -This package implements fingerprints of the electronic density-of-states (DOS) for the evaluation of similarity of materials based on their electronic structures. +Fingerprinting of materials based on their electronic structure. -The fingerprints are based on a modification on the D-Fingerprints presented in Ref. [1]. -Our modification allows to target specific energy ranges for the evaluation of the similarity of the electronic structure. -As a similarity measure we use the Tanimoto coefficient [2]. +Python package to compute fingerprints of the electronic density-of-states (DOS) and evaluate the similarity of materials based on their electronic structures. + +This package implements the DOS fingerprints and the similarity metrics introduced in Refs. [1,2]. + +Our DOS fingerprints can be tailored to target specific ranges of the energy spectrum. The computed fingerprints allow for the evaluation of the similarity of the electronic structure. + +As a similarity measure we use the Tanimoto coefficient [3]. # Usage -Fingerprints are instances of the `DOSFingerprint()` class and can be calculated by providing the energy in [Joule] and the DOS in [states/unit cell/Joule] to the `calculate()` method. Furthermore, the parameters of a non-uniform grid can be chosen. The default grid is specialized on the energy range between -10 and 5 eV and emphasizes the upper valence region. +Fingerprints are instances of the `DOSFingerprint()` class and can be calculated by providing the energy in [Joule] and the DOS in [states/unit cell/Joule] to the `calculate()` method. Furthermore, the energy axis can be discretized over a non-uniform grid. For this, specific parameters must be provided. By default, the grid is specialized on the energy range between -10 and 5 eV, thereby emphasizing the upper valence region. ```Python from nomad_dos_fingerprints import DOSFingerprint @@ -20,8 +24,27 @@ from nomad_dos_fingerprints import tanimoto_similarity tc = tanimoto_similarity(dos_fingerprint_1, dos_fingerprint_2) ``` +# Citation + +If you use this package in a publication, please cite it in the following way: + +Martin Kuban, Santiago Rigamonti, Markus Scheidgen, and Claudia Draxl: +_Density-of-states similarity descriptor for unsupervised learning from materials data_ +Sci Data *9*, 646 (2022). https://doi.org/10.1038/s41597-022-01754-z + +# Links + +We maintain a notebook in the NOMAD AI Toolkit: +https://nomad-lab.eu/aitoolkit/tutorial-dos-similarity + # References -[1] Isayev _et al._, Chem. Mater. 2015, 27, 3, 735–743 (doi:10.1021/cm503507h) +[1] Martin Kuban, Santiago Rigamonti, Markus Scheidgen, and Claudia Draxl: +_Density-of-states similarity descriptor for unsupervised learning from materials data_. +_Sci Data_ *9*, 646 (2022). https://doi.org/10.1038/s41597-022-01754-z + +[2] Martin Kuban, Šimon Gabaj, Wahib Aggoune, Cecilia Vona, Santiago Rigamonti, Claudia Draxl: +_Similarity of materials and data-quality assessment by fingerprinting_. +_MRS Bulletin_ *47*, 991–999 (2022). https://doi.org/10.1557/s43577-022-00339-w -[2] P. Willet _et al._, J. Chem. Inf. Comput . 38 , 983 996 (1998) (doi:10.1021/ci9800211) +[3] P. Willet _et al._, _J. Chem. Inf. Comput._ *38* , 983 996 (1998) (https://doi.org/10.1021/ci9800211) \ No newline at end of file diff --git a/nomad_dos_fingerprints/DOSfingerprint.py b/nomad_dos_fingerprints/DOSfingerprint.py index b102711..393dedc 100644 --- a/nomad_dos_fingerprints/DOSfingerprint.py +++ b/nomad_dos_fingerprints/DOSfingerprint.py @@ -1,6 +1,8 @@ import numpy as np from bitarray import bitarray from functools import partial +from typing import Callable, List, Union +from itertools import groupby from .grid import Grid from .similarity import tanimoto_similarity @@ -8,72 +10,297 @@ ELECTRON_CHARGE = 1.602176565e-19 class DOSFingerprint(): + """ + A fingerprint of the electronic density-of-states (DOS), + obtained by integrating the DOS over discrete intervals + and binning of the resulting histogram. + + **Keyword arguments:** - def __init__(self, stepsize = 0.05, similarity_function = tanimoto_similarity, **kwargs): + similarity_function: `Callable` + A function that allows to calculate the similarity between two DOS fingerprints + + default: `nomad_dos_fingerprints.similarity.tanimoto_similarity` + + Additional keyword arguments are passed to the similarity function. + """ + + def __init__(self, + similarity_function: Callable = tanimoto_similarity, **kwargs): self.bins = '' self.indices = [] - self.stepsize = stepsize self.filling_factor = 0 self.grid_id = None self.set_similarity_function(similarity_function, **kwargs) + self._n_state_bins = None + + @property + def n_state_bins(self): + if self._n_state_bins is None: + assert self.grid_id is not None, "Grid id is not set. Will be set when calculating the fingerprint with a grid." + self._n_state_bins = Grid.resolve_grid_id(self.grid_id)["n_pix"] + return self._n_state_bins + + def calculate(self, + dos_energies: np.ndarray, + dos_values: np.ndarray, + grid_id: str = None, + convert_data: Union[None, str, Callable] = None, + normalization_factor: float = 1.0) -> object: + """ + Calculate the fingerprint data from normalized DOS energies and values. + + **Arguments:** + + dos_energies: `np.ndarray` + Energies of the DOS spectrum + + By default, units are [eV]. + + This behavior can be changed by changing `convert_data` (see below). + + dos_values: `np.ndarray` + Values of the DOS spectrum + + By default, units are [states/cell/eV]. + + This behavior can be changed by changing `convert_data` (see below). + + **Keyword arguments:** + + grid_id: `str` + ID for describing the `Grid` object that is used to calculate the fingerprint data. + For details, see documentation there. + + default: `None`: Use `Grid` default values. + + convert_data: `Union[str, None, Callable]` + Convert `dos_energies` and `dos_values` to different units. + + The string `'enc'` transforms [Joule] to [eV] and sums all spin channels of DOS values. + Additionally, assumes that `dos_energies` is an `Interable` of `float`s + and `dos_values` is an `Interable` of `Interables` of lenght `len(dos_energies)` of `float`s. + + If set to `None`, no conversion will be performed. + + If a callable is given, it will be called as: + ``energy, dos = convert_data(dos_energies, dos_values)``. + + default: `None` + + normalization_factor: `float` + Factor used for unit conversion, e.g. if the DOS is given or required per atom. + Used only if `convert_data` is "Enc". - def calculate(self, dos_energies, dos_values, grid_id = 'dg_cut:56:-2:7:(-10, 5)', unit_cell_volume = 1, n_atoms = 1): - energy, dos = self._convert_dos(dos_energies, dos_values, unit_cell_volume = unit_cell_volume, n_atoms = n_atoms) - raw_energies, raw_dos = self._integrate_to_bins(energy, dos) - grid = Grid().create(grid_id = grid_id) - self.grid_id = grid.get_grid_id() - self.indices, self.bins = self._calculate_bytes(raw_energies, raw_dos, grid) + default: `1.0` + + **Returns:** + + self: `DOSFingerprint` + Fingerprint after calculation + """ + if isinstance(convert_data, str) and convert_data.lower() == 'enc': + energy, dos = self._convert_dos(dos_energies, dos_values, normalization_factor = normalization_factor) + elif isinstance(convert_data, Callable): + energy, dos = convert_data(dos_energies, dos_values) + elif convert_data is None: + energy, dos = dos_energies, dos_values + else: + raise ValueError('Key-word argument `convert_data` must be either the string "enc", a callable, or `None`.') + grid = Grid.create(grid_id = grid_id) + energy = np.array(energy) - grid.e_ref + raw_energies, raw_dos = self._integrate_to_bins(energy, dos, grid.delta_e_min) + self.grid_id = grid_id if grid_id is not None else grid.get_grid_id() + bin_fp = self._calculate_bit_fingerprint(raw_energies, raw_dos, grid) + self.bins = self._compress_binary_fingerprint_string(bin_fp) + self._n_state_bins = grid.n_pix return self - def to_dict(self): - return dict(bins = self.bins, indices = self.indices, stepsize = self.stepsize, grid_id = self.grid_id, filling_factor = self.filling_factor) + def to_dict(self) -> dict: + """ + Convert data to dictionary. + """ + data_dict = {} + for key in ['bins', 'indices', 'grid_id', 'filling_factor', 'overflow', 'undersampling']: + data_dict[key] = getattr(self, key) + return data_dict - @staticmethod - def from_dict(fp_dict): - self = DOSFingerprint() + @classmethod + def from_dict(cls, fp_dict: dict) -> object: + """ + Create fingerprint object from dictionary. + """ + self = cls() self.bins = fp_dict['bins'] self.indices = fp_dict['indices'] - self.stepsize = fp_dict['stepsize'] self.grid_id = fp_dict['grid_id'] self.filling_factor = fp_dict['filling_factor'] + if 'overflow' in fp_dict.keys(): + self.overflow = fp_dict['overflow'] + if 'undersampling' in fp_dict.keys(): + self.undersampling = fp_dict['undersampling'] + self._n_state_bins = Grid.resolve_grid_id(self.grid_id)["n_pix"] return self - def set_similarity_function(self, similarity_function, **kwargs): + def set_similarity_function(self, similarity_function: Callable, **kwargs): + """ + Set the similarity function of the fingerprint. + + **Arguments:** + + similarity_function: `Callable` + Function for calculating the similarity between `DOSFingerprint` objects. + + Keyword arguments are passed to the similarity function. + """ self.similarity_function = partial(similarity_function, **kwargs) - def get_similarity(self, fingerprint): + def get_similarity(self, fingerprint: object) -> float: + """ + Get similarity value between self and another fingerprint object. + + **Arguments:** + + fingerprint: `DOSFingerprint` + Other fingerprint to calculate similarity to. + + **Returns:** + + similarity: `float` + Similarity between both fingerprints. + + **Raises:** + + `TypeError`: `fingerprint` is not a `DOSFingerprint` + """ + if not isinstance(fingerprint, DOSFingerprint): + raise TypeError("Other fingerprint must be a `DOSFingerprint` object.") return self.similarity_function(self, fingerprint) - def get_similarities(self, list_of_fingerprints): + def get_similarities(self, list_of_fingerprints: List[object]) -> np.ndarray: + """ + Get similarities of self to a list of other fingerprints. + + **Arguments:** + + list_of_fingerprints: `List[DOSFingerprint]` + Other fingerprints to calculate similarity to. + + **Returns:** + + similarities: `np.ndarray` + Similarities of self to other fingerprints. + """ return np.array([self.similarity_function(self, fp) for fp in list_of_fingerprints]) + def get_bitarray(self) -> object: + """ + Get `bitarray.bitarray` representing the fingerprint data. + + **Returns:** + + bits: `bitarray` + Binary fingerprint data + """ + if self.bins == '': + raise AttributeError("Fingerprint is not calculated! Use `calculate()`.") + bit_string = self._expand_fingerprint_string(self.bins) + bits = bitarray(bit_string) + return bits + def __eq__(self, other): - return self.bins == other.bins and self.indices == other.indices and self.stepsize == other.stepsize and self.grid_id == other.grid_id and self.filling_factor == other.filling_factor + if not isinstance(other, DOSFingerprint): + return False + for attr in ['bins', 'indices', 'stepsize', 'grid_id', 'filling_factor']: + if getattr(self, attr) != getattr(other, attr): + return False + return True + + def _integral_house(self, a, b, interval): + # integrates a "house" like figure, consisting of a rectangle and a 90 degree triangle, with heights a and b, and width=interval + return (a + b) * interval / 2. - def _integrate_to_bins(self, xs, ys): + def _interpolate_dos(self, dos_values, energy_values, requested_energy): """ - Performs stepwise numerical integration of ``ys`` over the range of ``xs``. The stepsize of the generated histogram is controlled by DOSFingerprint().stepsize. + Returns a linearly interpolated value between two DOS points. """ - if len(xs) < 2 or len(ys) < 2: - raise ValueError('Invalid input. Please provide arrays with len > 2.') - xstart = round(int(xs[0] / (self.stepsize * 1.)) * self.stepsize, 8) # define the limits that fit with the predefined stepsize - xstop = round(int(xs[-1] / (self.stepsize * 1.)) * self.stepsize, 8) - x_interp = np.arange(xstart, xstop + self.stepsize, self.stepsize) - x_interp = np.around(x_interp, decimals=5) - y_interp = np.interp(x_interp, xs, ys) - y_integ = np.array([np.trapz(y_interp[idx:idx + 2], x_interp[idx:idx + 2]) for idx in range(len(x_interp)-1)]) - return x_interp[:-1], y_integ + if len(dos_values) != 2 or len(energy_values) != 2: + raise ValueError("Error in _interpolate_dos: Wrong number of arguments for calculation of gradient.") + if energy_values[0] == energy_values[1]: + return dos_values[0] + gradient = (dos_values[1] - dos_values[0]) / ((energy_values[1] - energy_values[0]) * 1.) + difference = requested_energy - energy_values[0] + interpolated = dos_values[0] + gradient * difference + return interpolated + + def _find_energy_cutoff_indices(self, energy, estart, estop): + """ + This find the correct indices for the integration, that is, all values of the energy in the interval [estart-1,estop+1] will be in the range [emin_idx,emax_idx] + WARNING: Assumes that estart estart and emin_idx is None: + emin_idx = index - 1 + if energy[-(index + 1)] < estop and emax_idx is None: + emax_idx = -index + 1 + if emax_idx > 0: + emax_idx = 0 #HOTFIX + index += 1 + if emax_idx == 0: + emax_idx = None + return emin_idx, emax_idx + + def _integrate_to_bins(self, energy, dos, stepsize=0.05): + """ + Performs stepwise numerical integration of ``ys`` over the range of ``xs``. + """ + if len(energy) < 2 or len(dos) < 2: + raise ValueError(f'Invalid input. Please provide arrays with len > 2. len(x) : {len(energy)} len(y) : {len(dos)}') + + # define the limits that fit with the predefined stepsize + xstart = round(np.ceil(energy[0] / (stepsize * 1.)) * stepsize, 12) + xstop = round(np.floor(energy[-1] / (stepsize * 1.)) * stepsize, 12) - def _convert_dos(self, energy, dos, unit_cell_volume = 1, n_atoms = 1): + # Find the indices in original arrays closest to new limits defined by stepsize + idx_min, idx_max = self._find_energy_cutoff_indices(energy, xstart, xstop) + + energy = energy[idx_min:idx_max] + dos = dos[idx_min:idx_max] + current_energy = xstart + current_dos = self._interpolate_dos([dos[0], dos[1]], [energy[0], energy[1]], xstart) + dos_binned = [] + energy_binned = [] + index = 1 # starting from the second value, because energy[0]= grid_array[0][0] and e <= grid_array[-1][0])]) - # calculate fingerprint + # cut the energy and states to grid size + energy_bins, states = np.transpose([(e,d) for e,d in zip(energy_bins_raw, states_raw) if (e >= grid_array[0][0] and e <= grid_array[-1][0])]) + # find grid start and end points + grid_start, grid_end = self._calc_grid_indices(energy_bins, grid) + # if the last energy bin is less than or equal to the last grid point, the pre-to-last grid index was found instead of the correct one + # the following lines fix this, but it is not very elegant + if grid_end <= len(grid_array) - 1: + if len(energy_bins) == len(energy_bins_raw) and energy_bins_raw[-1] > grid_array[grid_end][0]: + grid_end += 1 + self.indices[1] += 1 + # sum dos bins to adapt to inhomogeneous energy grid + adapted_bins = [] + adapted_states = [] + for index in range(grid_start, grid_end): + eps_i = grid_array[index][0] + eps_iplusdelta = grid_array[index+1][0] + adapted_bins.append(eps_i) + adapted_states.append(sum(np.array([s for e, s in zip(energy_bins, states) if e >= eps_i and e < eps_iplusdelta]))) + return adapted_bins, adapted_states + + def _calc_bit_vector(self, adapted_states: np.ndarray, grid: Grid): + """ + Calculate bit representation of grid-adapted states per energy bin. + Args: + adapted_states: numpy.ndarray: states in the bins declared in `energy_bins` + grid: Grid: description of the DOS discretization + Sets: + overflow: float: sum of states that can not be described by the current grid + undersampling: float: fraction of states that are in partially filled grid cells (and thus ignored) + Returns: + discrete_states: str: binary representation of binned DOS spectrum + """ bin_fp = '' - grid_index = 0 - for idx, grid_e in enumerate(grid_array): - if grid_e[0] > energy[0]: - grid_index = idx - 1 - if grid_index < 0: - grid_index = 0 - break - grid_start = grid_index - fp_index = 0 - while grid_array[grid_index + 1][0] < energy[-1]: - current_dos = 0 - while energy[fp_index] < grid_array[grid_index + 1][0]: - current_dos += dos[fp_index] - fp_index += 1 - bin_fp += self._binary_bin(current_dos, grid_array[grid_index][1]) - grid_index += 1 - self.filling_factor = bin_fp.count('1') / len(bin_fp) + grid_array = grid.grid() + grid_start, grid_end = self.indices + overflow = 0 + undersampling = 0 + for states, grid_segment in zip(adapted_states, grid_array[grid_start:grid_end + 1]): + bin_fp += self._binary_bin(states, grid_segment[1]) + overflow += self._get_segment_overflow(states, grid_segment) + undersampling += self._get_segment_undersampling(states, grid_segment) + self.overflow = overflow + self.undersampling = undersampling / sum(adapted_states) + return bin_fp + + def _calculate_byte_representation(self, bin_fp: str): byte_fp = bitarray(bin_fp).tobytes().hex() - return [grid_start, grid_index], byte_fp + return byte_fp + + def _compress_group(self, count: int, char: str): + representative_char = 't' if char == '1' else 'f' + return str(count) + representative_char + + def _calculate_bit_fingerprint(self, binned_energies: np.ndarray, binned_states: np.ndarray, grid: Grid): + """ + Calculate bit representation of DOS fingerprint. + """ + _, adapted_states = self._adapt_energy_bin_sizes(binned_energies, binned_states, grid) + bin_fp = self._calc_bit_vector(adapted_states, grid) + self.filling_factor = bin_fp.count('1') / len(bin_fp) + return bin_fp + + def _get_segment_overflow(self, states, grid_segment): + """ + Calculate the number of states that lies beyond the region described by a grid segment. + Args: + states: float: states in the bins declared in `energy_bins` + grid_segment: Grid: description of the DOS discretization + Returns: + segment_overflow: float: number of states that can be described by the grid segment + """ + delta_states = states - grid_segment[1][-1] + segment_overflow = delta_states if delta_states >= 0 else 0 + return segment_overflow + + def _get_segment_undersampling(self, states, grid_segment): + """ + Calculate the amount of states that are in partially filled grid cells. + Args: + states: float: states in the bins declared in `energy_bins` + grid_segment: Grid: description of the DOS discretization + Returns: + segment_undersampling: float: sum of states that are in partially filled grid cells + + """ + delta_states = 0 + for idx in range(len(grid_segment[1]) - 1): + if grid_segment[1][idx] < states < grid_segment[1][idx+1]: + delta_states = states - grid_segment[1][idx] + break + return delta_states + + def _compress_binary_fingerprint_string(self, fingerprint_string: str): + compressed_string = '' + for char, count in groupby(fingerprint_string): + compressed_string += self._compress_group(len(list(count)), char) + return compressed_string + + def _expand_fingerprint_string(self, compressed_fingerprint_string): + current_string = '' + decompressed_string = '' + for char in compressed_fingerprint_string: + if char.isnumeric(): + current_string += char + else: + number_to_add = '1' if char == 't' else '0' + decompressed_string += number_to_add * int(current_string) + current_string = '' + return decompressed_string \ No newline at end of file diff --git a/nomad_dos_fingerprints/__init__.py b/nomad_dos_fingerprints/__init__.py index 678b38f..5cd741b 100644 --- a/nomad_dos_fingerprints/__init__.py +++ b/nomad_dos_fingerprints/__init__.py @@ -1,3 +1,3 @@ -from .DOSfingerprint import DOSFingerprint -from .grid import Grid -from .similarity import tanimoto_similarity \ No newline at end of file +from nomad_dos_fingerprints.DOSfingerprint import DOSFingerprint # noqa: F401 +from nomad_dos_fingerprints.grid import Grid # noqa: F401 +from nomad_dos_fingerprints.similarity import tanimoto_similarity # noqa: F401 \ No newline at end of file diff --git a/nomad_dos_fingerprints/grid.py b/nomad_dos_fingerprints/grid.py index 57633c6..38c90e9 100644 --- a/nomad_dos_fingerprints/grid.py +++ b/nomad_dos_fingerprints/grid.py @@ -1,63 +1,382 @@ +from typing import Callable, List, Set import numpy as np +from functools import partial + +class NotCreatedError(Exception): + """ + Empty container for naming the error occuring when the Grid was initialized, but not created. + """ + pass class Grid(): + + """ + Grid object used to discretize DOS spectra for the generation of DOS fingerprints. + To obtain a Grid() object, create it with: + grid = Grid.create(**parameters) + """ @staticmethod - def create(grid_id = None, grid_type = 'dg_cut', mu = -2, sigma = 7, cutoff = (-10,5), num_bins = 56, original_stepsize = 0.05): - if grid_id == None: + def create(grid_id = None, + grid_type = "nonuniform", + e_ref = -2, + delta_e_min = 0.05, + delta_e_max = 1.05, + delta_rho_min = 0.5, + delta_rho_max = 5.5, + width = 7, + n_pix = 56, + cutoff = [-8,7], + energy_discretization = None, + states_discretization = None): + """ + Create grid object. + + There are two options to initialize the grid: + 1. Initialize from grid id: The grid id, which is a string that contains all grid parameters, is used to set the parameters of the grid. + 2. Initialize from parameters: The grid is created directly from the parameters passed as key-word arguments. + + **Parameters** + + grid_id: *str* or *None* + Identifier to re-create a grid, or None, if new or default parameters are given. + + grid_type: *str* + Currently two options are available: + * *nonuniform* - Create a non-uniform grid from parameters + * *uniform* - Create an uniform grid from parameters + * *custom* - Create a custom grid from lists defining the limits of the grid + + e_ref: float + Reference energy in eV, i.e. the center of the grid. + + delta_e_min: float + Minimal energy difference in eV between two grid points. + + delta_e_max: float + Maximal energy difference in eV between two grid points. + Ignored if *uniform* grid is created. + + delta_rho_min: float + Minimal difference of states in an energy interval. + + delta_rho_max: float + Maximal difference of states in an energy interval. + Ignored if *uniform* grid is created. + + width: float + Width of the feature region of the Grid in eV. + Ignored if *uniform* grid is created. + + n_pix: int + Number of intervals to discretize states in an energy interval. + + cutoff: (float, float) + Lowest and highes energies in eV to be included in the grid. + + energy_discretization: list[float] + Used by *custom* grid - ignored else. + Values of energy intervals of the grid, in eV. + + states_discretization: list[float] + Used by *custom* grid - ignored else. + Maximal number of states for each energy interval of the grid. + + **Returns** + + self: *Grid* + The grid object with set grid parameters. + + **Raises** + + TypeError: + Grid type can not be identified (neither "nonuniform", nor "uniform", nor "custom"). + + ValueError: + Grid type can no be identified. + """ + if grid_id is None: self = Grid() self.grid_type = grid_type - self.mu = mu - self.sigma = sigma - self.cutoff = cutoff - self.num_bins = num_bins - self.original_stepsize = original_stepsize + self.e_ref = e_ref + self.delta_e_min = delta_e_min + self.delta_e_max = delta_e_max + self.delta_rho_min = delta_rho_min + self.delta_rho_max = delta_rho_max + self.width = width + self.cutoff_min = cutoff[0] + self.cutoff_max = cutoff[1] + self.n_pix = n_pix + self.energy_discretization = energy_discretization + self.states_discretization = states_discretization self.grid_id = self.get_grid_id() else: - self = Grid().create(**Grid().resolve_grid_id(grid_id)) + self = Grid.create(**Grid.resolve_grid_id(grid_id)) return self def get_grid_id(self): - id = ':'.join([self.grid_type, str(self.num_bins), str(self.mu), str(self.sigma), str(self.cutoff)]) - return id + self._check_created() + if self.grid_type == "nonuniform": + grid_id_values = [self.grid_type, self.e_ref, self.delta_e_min, self.delta_e_max, self.delta_rho_min, self.delta_rho_max, self.width, self.cutoff_min, self.cutoff_max, self.n_pix] + elif self.grid_type == "uniform": + grid_id_values = [self.grid_type, self.e_ref, self.delta_e_min, self.delta_rho_min, self.cutoff_min, self.cutoff_max, self.n_pix] + elif self.grid_type == "custom": + if self.energy_discretization is None: + raise ValueError("Must provide valid energy discretization for custom fingerprint.") + if self.states_discretization is None: + raise ValueError("Must provide valid states discretization for custom fingerprint.") + from hashlib import md5 + all_values = [self.n_pix, self.e_ref] + all_values = np.append(all_values, self.energy_discretization) + all_values = np.append(all_values, self.states_discretization) + all_values = "".join([str(value) for value in all_values]) + return f"custom:{md5(all_values.encode()).hexdigest()}" + else: + raise ValueError("Grid type can not be identified") + grid_id = ':'.join([str(value) for value in grid_id_values]) + return grid_id @staticmethod - def resolve_grid_id(grid_id): - grid_type, num_bins, mu, sigma, cutoff = grid_id.split(':') - return {'grid_type' : grid_type, 'num_bins' : int(num_bins), 'mu' : float(mu), 'sigma' : float(sigma), 'cutoff' : tuple([float(x) for x in cutoff[1:-1].split(',')])} - - def grid(self): - if self.grid_type != 'dg_cut': - raise NotImplementedError('Currently, only the grid dg_cut is implemented.') - asc = 0 - desc = 0 - x_grid = [0] - while (asc is not None) or (desc is not None): - if asc is not None: - asc += self._step_sequencer(asc, self.mu, self.sigma, self.original_stepsize) * self.original_stepsize - x_grid = x_grid + [round(asc, 8)] - if asc > self.cutoff[1]: - asc = None - if desc is not None: - desc -= self._step_sequencer(desc, self.mu, self.sigma, self.original_stepsize) * self.original_stepsize - x_grid = [round(desc, 8)] + x_grid - if desc < self.cutoff[0]: - desc = None + def resolve_grid_id(grid_id: str) -> dict: + grid_variables = {} + grid_id_variables = grid_id.split(':') + if grid_id_variables[0] == "nonuniform": + grid_variables["grid_type"] = grid_id_variables[0] + grid_variables["e_ref"] = float(grid_id_variables[1]) + grid_variables["delta_e_min"] = float(grid_id_variables[2]) + grid_variables["delta_e_max"] = float(grid_id_variables[3]) + grid_variables["delta_rho_min"] = float(grid_id_variables[4]) + grid_variables["delta_rho_max"] = float(grid_id_variables[5]) + grid_variables["width"] = float(grid_id_variables[6]) + grid_variables["cutoff"] = [float(grid_id_variables[7])] + grid_variables["cutoff"].append(float(grid_id_variables[8])) + grid_variables["n_pix"] = int(grid_id_variables[9]) + elif grid_id_variables[0] == "uniform": + grid_variables["grid_type"] = grid_id_variables[0] + grid_variables["e_ref"] = float(grid_id_variables[1]) + grid_variables["delta_e_min"] = float(grid_id_variables[2]) + grid_variables["delta_rho_min"] = float(grid_id_variables[3]) + grid_variables["cutoff"] = [float(grid_id_variables[4])] + grid_variables["cutoff"].append(float(grid_id_variables[5])) + grid_variables["n_pix"] = int(grid_id_variables[6]) + elif grid_id_variables[0] == "custom": + raise TypeError("Custom grid id can not be resolved.") + else: + raise ValueError("Grid type can not be identified") + return grid_variables + + def grid(self) -> list: + """ + Generate a grid with parameters set by `self.grid_id`. + + **Returns** + + grid: *list* + Nested list of type [[, []], ...] + """ + self._check_created() + if self.grid_type == "nonuniform": + grid = self._grid_non_uniform(self.delta_e_min, self.delta_e_max, self.delta_rho_min, self.delta_rho_max, self.width, self.n_pix, [self.cutoff_min, self.cutoff_max]) + elif self.grid_type == "uniform": + grid = self._grid_uniform(self.delta_e_min, self.delta_rho_min, self.n_pix, [self.cutoff_min, self.cutoff_max]) + elif self.grid_type == "custom": + grid = self._grid_custom(self.energy_discretization, self.states_discretization, self.n_pix) + else: + raise ValueError("Grid type can not be identified") + return grid + + def copy(self) -> object: + """ + Create a copy of self. + """ + return Grid.create(grid_id=self.get_grid_id()) + + def _grid_non_uniform(self, delta_e_min, delta_e_max, delta_rho_min, delta_rho_max, width, n_pix, cutoff): + """ + New implementation of the grid. Follows the description in the publication. + """ + + f_energies = partial(self.gauss_function, w_min = delta_e_min, w_max = delta_e_max, sigma = width) + + energies = self.energy_intervals_from_function(f_energies, delta_e_min, cutoff) + + f_heights = partial(self.gauss_function, w_min = delta_rho_min, w_max = delta_rho_max, sigma = width) + + max_heights = self.grid_height_from_function(f_heights, energies, delta_rho_min) + + grid = self.grid_from_lists(energies, max_heights, n_pix) + + return grid + + def _grid_uniform(self, delta_e, delta_rho, n_pix, cutoff): + + def f_energies(x): + return 1 + + energies = self.energy_intervals_from_function(f_energies, delta_e, cutoff) + + max_heights = self.grid_height_from_function(f_energies, energies, delta_rho) + + grid = self.grid_from_lists(energies, max_heights, n_pix) + + return grid + + def _grid_custom(self, energy_discretization, states_discretization, n_pix): + return self.grid_from_lists(energy_discretization, states_discretization, n_pix) + + def grid_from_lists(self, energies: List[float], max_heights: List[float], n_bins: int) -> List[List[float]]: + """ + Generate a grid from lists of energies, maximal heights, and the number of bins. + + **Arguments** + + energies: *list* + List of energy boundaries, aligned at the negative edge of the energy interval for energy bin. + + max_heights: *list* + List of maximal heights, defined for each energy boundary in `energies`. + + n_bins: *int* + Number of bins that is used to discretise each energy interval. + + **Returns** + + grid: *list* + Nested list of type [[, [, ...]], ...] + """ + # avoid unexpected behaviour + assert len(energies) == len(max_heights), "Number of energy intervals is not equal to the number of maximal heights for each energy." + + # generate grid grid = [] - for item in x_grid: - bins = [] - bin_height = self._step_sequencer(item, self.mu, self.sigma, original_stepsize=0.1) / (self.num_bins * 2.) - for idx in range(1, self.num_bins + 1): - bins.append(bin_height * idx) - grid.append([item, bins]) + for energy, max_height in zip(energies, max_heights): + bin_height = max_height / n_bins + grid.append([energy, [round(idx * bin_height, 10) for idx in range(1, n_bins + 1)]]) return grid - def _gauss(self, x, mu, sigma, normalized=True): - coefficient = (np.sqrt(2 * np.pi) * sigma) - value = np.exp((-0.5) * ((x - mu) / (sigma * 1.)) ** 2) - if normalized: - value = value / (coefficient * 1.) - return value + def get_grid_indices_for_energy_range(self, energy: List[float]) -> Set[int]: + """ + Compute the grid indices that match a given energy range. + + **Arguments** + + energy: *list* + Energies to match grid energies. + + **Returns** + + grid_start, grid_end: set + First and last index of the grid that are in the limits of the energies. + """ + grid_energies = [x[0] for x in self.grid()] + energy = [e for e in energy if (e >= grid_energies[0] and e <= grid_energies[-1])] + for idx, grid_e in enumerate(grid_energies): + if grid_e >= energy[0]: + grid_start = idx if idx >= 0 else 0 + break + for idx, grid_e in reversed(list(enumerate(grid_energies))): + if grid_e <= energy[-1]: + grid_end = idx + break + return grid_start, grid_end + + def energy_intervals_from_function(self, function: Callable, minimal_interval: float, energy_limits: List[float]) -> list: + """ + Generate a set of energy intervals from a given function. The energy intervals are generated as: + + e_i = \\sum_{j=0}^{i-1} * minimal_interval * function(e_j) + + and + + e_{-i} = -1 * e_i + + **Arguments** + + function: *Callable* + function that maps an energy to a number greater or equal 1 + + minimal_interval: *float* + minimal interval width, i.e. minimal distance between two intervals + + energy_limits: *List[float]* + list [, ] that determines the limits between which energy intervals should be calculated + """ + # assumes the spectrum is shifted to the reference energy, which is then located at e = 0 + energies = [0] + + # we need the maximum absolute limit to generate intervals + max_limit = max([abs(lim_) for lim_ in energy_limits]) + + while energies[-1] < max_limit: + + # make sure the minimal interval is kept + next_step = max([function(energies[-1]), 1]) + + # get new interval width + next_step *= minimal_interval + + # apply to series + next_step += energies[-1] + + # increase numerical stability + next_step = np.round(next_step, 8) + + # update + if next_step <= max_limit: + energies.append(next_step) + else: + break + + # generate negative indices + full_set = [-1 * energy for energy in energies[1:] if energy <= abs(energy_limits[0])] + + # filter positive energies to match limits, and merge negative and positive indices + full_set.extend(list(filter(lambda x: x <= energy_limits[1], energies))) + + # make sure the sorting is correct + full_set.sort() + + return full_set + + def grid_height_from_function(self, function: Callable, energies: list, minimal_height: float) -> list: + """ + Use provided function to calculate the grid height at each energy. + + **Arguments** + + function: *Callable* + Function that assigns an integer number to an energy value + + energies: *list* + List of energies for which the bin heights are calculated + + minimal_height: *float* + Minimal height of a bin + """ + return [max([function(energy), 1]) * minimal_height for energy in energies] + + def gauss_function(self, x: float, w_min: float, w_max: float, sigma: float) -> int: + """ + Function that assigns an integer value to each `x`. + + **Arguments** + + x: *float* + Input value + + w_min: *float* + Minimal width of the interval + + w_max: *float* + Maximal width of the interval + + sigma: *float* + Parameter controlling the width of the interal gaussian function + """ + g = (1 - np.exp(-0.5 * (x / sigma)**2)) + value = g * (w_max / w_min - 1) + 1 + return int(value) - def _step_sequencer(self, x, mu, sigma, original_stepsize): - return int(round((1 + original_stepsize - self._gauss(x, mu, sigma, normalized=False)) / original_stepsize, 9)) + def _check_created(self): + if not hasattr(self, "grid_type"): + raise NotCreatedError("Grid was not created. To do so, use Grid.create(**parameters).") \ No newline at end of file diff --git a/nomad_dos_fingerprints/plotting.py b/nomad_dos_fingerprints/plotting.py index b606457..3f9bd68 100644 --- a/nomad_dos_fingerprints/plotting.py +++ b/nomad_dos_fingerprints/plotting.py @@ -1,15 +1,52 @@ +from typing import Union, Iterable + import matplotlib.pyplot as plt -from bitarray import bitarray - -def plot_FP_in_grid(byte_fingerprint, grid, show = True, label = '', axes = None, **kwargs): - x=[] - y=[] - all_width=[] - bin_fp=bitarray() - bin_fp.frombytes(bytes.fromhex(byte_fingerprint.bins)) - grid_indices=byte_fingerprint.indices - plotgrid=grid.grid() + +from nomad_dos_fingerprints import DOSFingerprint, Grid + +def _apply_grid_offset(grid_array: list, offset: float): + for idx in range(len(grid_array)): + grid_array[idx][0] += offset + return grid_array + +def plot_fingerprint_in_grid(fingerprint: DOSFingerprint, + show: bool = True, + label: str = '', + axes: Union[plt.Axes, None] = None, **kwargs) -> None: + """ + Plot a fingerprint in its grid representation. + + **Arguments:** + + fingerprint: `nomad_dos_fingerprints.DOSFingerprint` + Fingerprint to be plotted + + **Keyword arguments:** + + show: `bool` + Show the plot. + + default = `True` + + label: `str` + Label for the plot. Labels are required for legends in plots. + + default: `''` + + axes: `matplotlib.pyplot.Axes` or `None` + Axes to draw fingerprint on. If `None`, create new axes. + + default: `None` + + Additional keyword arguments are passed to matplotlib.pyplot.bar(*, **kwargs). + """ + x, y, all_width = [], [], [] + bin_fp=fingerprint.get_bitarray() + grid_indices=fingerprint.indices + grid = Grid.create(grid_id=fingerprint.grid_id) + plotgrid=_apply_grid_offset(grid.grid(), grid.e_ref) plotgrid=plotgrid[grid_indices[0]:grid_indices[1]] + bit_position=0 for index,item in enumerate(plotgrid): if index None: + """ + Plot horizontal lines of a grid. + + **Arguments:** + + grid: `nomad_dos_fingerprints.Grid` + Grid to plot. + + **Keyword arguments:** + + plot_style: `dict` + Style of grid lines + + default: `{'c' : 'k'}` + + axes: `matplotlib.pyplot.Axes` or `None` + Axes to draw on. If `None`, create new axes. + + default: `None` + + only_top: `bool` + Show only the highest line + + default: `False` + """ + grid_array = _apply_grid_offset(grid.grid(), grid.e_ref) + if axes is None: + axes = plt.gca() + e_, r_ = grid_array[0] + for idx, entry in enumerate(grid_array[1:]): + e, rhos = entry + if list(rhos) == list(r_): + continue + elif rhos[-1] > r_[-1]: # increasing bin height + if only_top: + axes.plot([e_, e], [max(r_), max(r_)], **plot_style) + else: + for rho in r_: + axes.plot([e_, e], [rho, rho], **plot_style) + e_, r_ = entry + elif rhos[-1] < r_[-1]: # decreasing bin height + if only_top: + axes.plot([e_, grid_array[idx][0]], [max(r_), max(r_)], **plot_style) + else: + for rho in r_: + axes.plot([e_, grid_array[idx][0]], [rho, rho], **plot_style) + e_, r_ = grid_array[idx][0], rhos # energy of the *last* bin + if only_top: + axes.plot([e_, e], [max(r_), max(r_)], **plot_style) + else: + for rho in r_: + axes.plot([e_, e], [rho, rho], **plot_style) + + +def plot_vertical_lines(grid: Grid, + plot_style: dict = {'c':'k'}, + axes: Union[plt.Axes, None] = None) -> None: + """ + Plot horizontal lines of a grid. + + **Arguments:** + + grid: `nomad_dos_fingerprints.Grid` + Grid to plot. + + **Keyword arguments:** + + plot_style: `dict` + Style of grid lines + + default: `{'c' : 'k'}` + + axes: `matplotlib.pyplot.Axes` or `None` + Axes to draw on. If `None`, create new axes. + + default: `None` + """ + grid_array = _apply_grid_offset(grid.grid(), offset=grid.e_ref) + if axes is None: + axes = plt.gca() + for e, rhos in grid_array: + axes.plot([e,e], [0,max(rhos)], **plot_style) + +def plot_grid(grid: Grid, + vertical: bool = True, + horizontal: bool = True, + horizontal_only_top: bool = False, + plot_style: dict = {'c':'k', 'linewidth' : 0.5}, + figure: bool = True, + figsize: tuple = (15,10), + limits: Union[Iterable, None] = None, + show: bool = True, + axes: Union[plt.Axes, None] = None) -> None: + """ + Plot a Grid. + + **Arguments:** + + grid: `nomad_dos_fingerprints.Grid` + Grid to plot + + **Keyword arguments:** + + vertical: `bool` + Plot vertical grid lines + + default: `True` + + horizontal: `bool` + Plot horizontal grid lines + + default: `True` + + horizontal_only_top: `bool` + Plot only uppermost horizontal grid lines + + default: `False` + + plot_style: `dict` + Style of grid lines + + default: `{'c' : 'k'}` + + figure: `bool` + Create figure + + default: `True` + + figsize: `tuple` + Size of create figure, + ignored if `figure` is `False`. + + default: `(15,10)` + + limits: `list` of `float` or `None` + if not `None`, contains the axis limits: + `[x_min, x_max, y_min, y_max]` + + default: `None` + + show: `bool` + Show the plot. + + default = `True` + + axes: `matplotlib.pyplot.Axes` or `None` + Axes to draw on. If `None`, create new axes. + + default: `None` + + """ + if figure: + plt.figure(figsize=figsize) + if axes is None: + axes = plt.gca() + if vertical: + plot_vertical_lines(grid, plot_style=plot_style, axes = axes) + if horizontal: + plot_horizontal_lines(grid, plot_style=plot_style, axes = axes, only_top=horizontal_only_top) + if limits is not None: + axes.set_xlim(*limits[:2]) + axes.set_ylim(*limits[2:]) + if show: + plt.show() diff --git a/nomad_dos_fingerprints/similarity.py b/nomad_dos_fingerprints/similarity.py index b68a88a..a865ca9 100644 --- a/nomad_dos_fingerprints/similarity.py +++ b/nomad_dos_fingerprints/similarity.py @@ -1,14 +1,17 @@ -import numpy as np +from typing import Tuple from bitarray import bitarray -def match_fingerprints(fingerprint1, fingerprint2): +class FingerprintMismatchError(Exception): + + def __init__(self, *args: object) -> None: + super().__init__(*args) + +def match_fingerprints(fingerprint1, fingerprint2) -> Tuple[bitarray, bitarray]: if fingerprint1.grid_id != fingerprint2.grid_id: - raise AssertionError('Can not calculate similarity of fingerprints that have been calculated with different grids.') - num_bins = int(fingerprint1.grid_id.split(':')[1]) - fp1 = bitarray() - fp2 = bitarray() - fp1.frombytes(bytes.fromhex(fingerprint1.bins)) - fp2.frombytes(bytes.fromhex(fingerprint2.bins)) + raise AssertionError(f'Can not calculate similarity of fingerprints that have been calculated with different grids.\nFingerprint 1: {fingerprint1.grid_id}\nFingerprint 2: {fingerprint2.grid_id}') + num_bins = fingerprint1.n_state_bins + fp1 = fingerprint1.get_bitarray() + fp2 = fingerprint2.get_bitarray() start_index = max([fingerprint1.indices[0], fingerprint2.indices[0]]) stop_index = min([fingerprint1.indices[1], fingerprint2.indices[1]]) # find offsets @@ -16,12 +19,33 @@ def match_fingerprints(fingerprint1, fingerprint2): dsp2 = (start_index - fingerprint2.indices[0]) * num_bins dep1 = (fingerprint1.indices[1] - stop_index) * num_bins dep2 = (fingerprint2.indices[1] - stop_index) * num_bins - fp1 = fp1[dsp1:len(fp1) - 1 - dep1] - fp2 = fp2[dsp2:len(fp2) - 1 - dep2] + fp1 = fp1[dsp1:len(fp1) - dep1] + fp2 = fp2[dsp2:len(fp2) - dep2] return fp1, fp2 def tanimoto_similarity(fingerprint1, fingerprint2): + """ + Tanimoto similarity (Tc) between `DOSFingerprint` objects. + + Evaluates: + + $Tc(a, b) = a * b / ( a**2 + b**2 - a * b )$ + + for binary-valued fingerprint vectors $a$ and $b$. + + Before calculating Tc, the fingerprints are matched such that the same energy + regions are descibed in both fingerprints. + + If any fingerprint is the null-vector (e.g. the energy regions do not overlap), Tc = 0. + + **Returns:** + + tc: `float` + Tc between both fingerprints. + """ fp1, fp2 = match_fingerprints(fingerprint1, fingerprint2) + if len(fp1) != len(fp2): + raise FingerprintMismatchError(f"Can not match fingerprints of len {len(fp1)} and {len(fp2)}.") a = fp1.count() b = fp2.count() c = (fp1 & fp2).count() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..fa65306 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,33 @@ +[build-system] +requires = ["setuptools >= 61.0"] +build-backend = "setuptools.build_meta" + +[project] +name="nomad_dos_fingerprints" +version="2.0.0" +description = "Fingerprints of the electronic density of states of materials." +readme = {file = "README.md", content-type = "text/markdown"} +authors = [ + {name="Martin Kuban et al."} +] +maintainers = [ + {name="Martin Kuban", email="kuban@physik.hu-berlin.de"} +] +requires-python = ">= 3.7" +dependencies = [ + "numpy >= 1.22.2", + "bitarray == 2.4.1", + "matplotlib == 3.5.1" + +] +license = {file = "LICENSE"} + +[project.optional-dependencies] +tests = ['pytest'] + +[project.urls] +Repository = "https://github.com/kubanmar/dos-fingerprints" + +[tool.setuptools.packages.find] +include=["nomad_dos_fingerprints"] +exclude=["tests"] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 9955dec..0000000 --- a/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -pytest -pytest-cov diff --git a/setup.py b/setup.py index 2b699de..b908cbe 100644 --- a/setup.py +++ b/setup.py @@ -1,17 +1,3 @@ import setuptools -with open("README.md", "r") as fh: - long_description = fh.read() - -setuptools.setup( - name="nomad_dos_fingerprints", - version="1.0", - author="Martin Kuban", - author_email="kuban@physik.hu-berlin.de", - description="An implementation of DOS fingerprints for the NOMAD Laboratory.", - long_description=long_description, - long_description_content_type="text/markdown", - url="https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-dos-fingerprints", - install_requires=['numpy', 'bitarray'], - packages=['nomad_dos_fingerprints'] -) +setuptools.setup() diff --git a/tests/test_DOSfingerprint.py b/tests/test_DOSfingerprint.py index bc6ed94..e47780a 100644 --- a/tests/test_DOSfingerprint.py +++ b/tests/test_DOSfingerprint.py @@ -1,23 +1,31 @@ -import pytest import numpy as np +import pytest -from nomad_dos_fingerprints import DOSFingerprint, tanimoto_similarity +from nomad_dos_fingerprints import tanimoto_similarity +from nomad_dos_fingerprints import DOSFingerprint, Grid from nomad_dos_fingerprints.DOSfingerprint import ELECTRON_CHARGE def test_integrate_to_bins(): - test_data_x = np.linspace(0, np.pi, num = 1000) - test_data_y = [np.sin(x) for x in test_data_x] - fp = DOSFingerprint(stepsize=0.001) - energies, dos = fp._integrate_to_bins(test_data_x, test_data_y) - energies_test_data = [] - current_energy = 0 + + def get_area_below_curve(x, func, fp): + y = [func(x_) for x_ in x] + _, dos = fp._integrate_to_bins(x, y, stepsize=0.001) + return sum(dos) + + def dummy_function(x): + return np.sin(x) + + test_data_x = np.linspace(0, np.pi, num = 10000) + test_data_y = [dummy_function(x) for x in test_data_x] + fp = DOSFingerprint() + energies, dos = fp._integrate_to_bins(test_data_x, test_data_y, stepsize=0.01) assert 0 in energies - while current_energy < np.pi - fp.stepsize: - energies_test_data.append(current_energy) - current_energy += fp.stepsize - assert np.isclose(sum(dos), 2) + assert np.isclose(get_area_below_curve([-0.0009,1.0009], lambda _: 1, fp,), 1, atol=1e-12), "integration area was not cut correctly" + assert np.isclose(sum(dos), 2, atol=1e-3), "Calculated area is not correct" + assert np.isclose(get_area_below_curve(np.linspace(-np.pi, 0, num = 10000), dummy_function, fp), -2, atol=1e-4), "Calculated area is not correct" assert len(energies) == len(dos) - assert (np.isclose(energies, np.array(energies_test_data))).all() + for idx, e in enumerate(energies): + assert np.isclose(e, idx * 0.01), "Error in calculating integration intervals" def test_convert_dos(): test_data_x = np.arange(1, 5, step = 0.01) @@ -33,7 +41,66 @@ def test_convert_dos(): def test_serialization(): test_data_x = np.linspace(0, np.pi, num = 1000) test_data_y = [np.sin(x) for x in test_data_x] - fp = DOSFingerprint(stepsize=0.001).calculate([x * ELECTRON_CHARGE for x in test_data_x], [[x / ELECTRON_CHARGE for x in test_data_y]]) + fp = DOSFingerprint(stepsize=0.001).calculate(test_data_x, test_data_y) + print(fp) fp_json = fp.to_dict() fp_again = DOSFingerprint().from_dict(fp_json) assert tanimoto_similarity(fp, fp_again) == 1 + +def test_adapt_energy_bin_sizes(): + fp = DOSFingerprint() + dummy_energy, dummy_dos = fp._integrate_to_bins(np.arange(-10,6,1), np.ones(16)) + e, d = fp._adapt_energy_bin_sizes(dummy_energy, dummy_dos, Grid.create(grid_id = fp.grid_id)) + grid = Grid.create(grid_id = fp.grid_id) + grid_start, grid_end = grid.get_grid_indices_for_energy_range(dummy_energy) + grid_array = grid.grid() + # energy grid points are the same as in the Grid + assert e[0] == grid_array[grid_start][0] + assert e[-1] == grid_array[grid_end-1][0] # the last grid energy is the final border + # the test case is integrated correctly + cut_grid_array = grid_array[grid_start:grid_end+1] # +1: inclusion of the last point for "manual" integration below + reference = [(cut_grid_array[idx+1][0] - cut_grid_array[idx][0]) for idx in range(len(cut_grid_array) -1)] + assert np.allclose(d, reference) + # misc + assert (grid_start, grid_end - 1) == grid.get_grid_indices_for_energy_range([np.round(x, 5) for x in e]) # e is 1 block shorter due to summation + +def test_calc_grid_indices(): + fp = DOSFingerprint() + grid = Grid.create(grid_id = fp.grid_id) + dummy_energy, dummy_dos = fp._integrate_to_bins(np.arange(-10,6,1), np.ones(16)) + e, _ = fp._adapt_energy_bin_sizes(dummy_energy, dummy_dos, Grid.create(grid_id = fp.grid_id)) + indices = fp._calc_grid_indices(e, grid) + assert indices == fp.indices + assert indices == list(grid.get_grid_indices_for_energy_range(e)) + +def test_calc_bit_vector(): + grid = Grid.create() + fp = DOSFingerprint(grid_id = grid.get_grid_id()) + grid_array = grid.grid() + fp.indices = [0, len(grid_array)-1] + all_ones = fp._calc_bit_vector([max(grid_column[1]) for grid_column in grid_array], grid) + assert all_ones == '1' * grid.n_pix * abs(fp.indices[1]+1 - fp.indices[0]) + print(grid.n_pix/2 - 1) + all_half_filled = fp._calc_bit_vector([grid_column[1][int(grid.n_pix/2 - 1)] for grid_column in grid_array], grid) + assert all_half_filled == ('1' * int(grid.n_pix / 2) + '0' * int(grid.n_pix / 2)) * abs(fp.indices[1]+1 - fp.indices[0]) + +def test_compress_bit_string(): + fp = DOSFingerprint() + compressed = fp._compress_binary_fingerprint_string("111000011") + assert compressed == "3t4f2t", "Compression is not correct" + +def test_expand_bit_string(): + fp = DOSFingerprint() + expanded = fp._expand_fingerprint_string("5t3f6t") + assert expanded == "11111000111111", "Expandsion of compressed bins failed" + +def test_get_similarity(): + x = np.round(np.linspace(0,1.1, num=1000), 8) + grid = Grid.create(grid_type="uniform", e_ref=0.5, cutoff=[-0.5,0.5], delta_e_min=0.01, delta_rho_min=0.015, n_pix=10) + fp_a = DOSFingerprint().calculate(x, [1 if x_i <= 0.75 else 0 for x_i in x], grid_id = grid.get_grid_id()) + fp_b = DOSFingerprint().calculate(x, [1 if x_i > 0.25 else 0 for x_i in x], grid_id = grid.get_grid_id()) + assert fp_a.get_similarity(fp_b) == 0.5, "Similarity obtained from get_similarity is wrong" + +@pytest.mark.skip() +def test_get_bitarray(): + raise NotImplementedError("TODO: Implement") \ No newline at end of file diff --git a/tests/test_Grid.py b/tests/test_Grid.py index 89a426b..058b7cc 100644 --- a/tests/test_Grid.py +++ b/tests/test_Grid.py @@ -1,20 +1,96 @@ -import pytest, json, os -import numpy as np +import pytest +import json +import os from nomad_dos_fingerprints import Grid +from nomad_dos_fingerprints.grid import NotCreatedError + +@pytest.fixture +def grid(): + return Grid() with open(os.path.join(os.path.dirname(__file__), 'grid_test.json'), 'r') as test_data_file: test_grid_data = json.load(test_data_file) -def test_gen_grid_id(): - grid = Grid().create(mu = -5, sigma = 10, grid_type = 'dg_cut', num_bins = 64, cutoff = (-15,10)) - assert grid.get_grid_id() == 'dg_cut:64:-5:10:(-15, 10)' +def test_grid_from_lists(grid): + + expected_grid = [ + [1, [0.5, 1.0, 1.5, 2.0]], + [2, [1.0, 2.0, 3.0, 4.0]], + [3, [2.0, 4.0, 6.0, 8.0]] + ] + + list_grid = grid.grid_from_lists([1,2,3], [2,4,8], 4) + assert list_grid == expected_grid, "Got wrong grid from list" + +def test_energy_intervals_from_function(grid): + + def test_function(value: float) -> int: + return int(value) + + expected_output = [ + -16, -8, -4, -2, -1, 0, 1, 2, 4, 8 + ] + + assert grid.energy_intervals_from_function(test_function, 1, [-16,10]) == expected_output, "Wrong energy intervals calculated" + +def test_grid_height_from_function(grid): + + def test_function(value: float) -> int: + return int(value) + + expected_output = [ + 2, 2, 10, 8 + ] + + assert grid.grid_height_from_function(test_function, [0.1, 1, 5, 4], 2) == expected_output, "Wrong bin heights calculated" + +def test_gauss_function(grid): + + assert [grid.gauss_function(x, 1, 4, 0.4) for x in range(-5,6)] == [4, 4, 3, 3, 3, 1, 3, 3, 3, 4, 4], "Gauss function has changed unexpectedly" + +def test_gen_grid_id_v2(grid): + grid = grid.create(grid_type = "nonuniform", e_ref = 10, delta_e_min = -2, delta_e_max = 9, delta_rho_min = 0.1, delta_rho_max = 10, width = 0.7, cutoff = [-20,30], n_pix = 2) + assert grid.get_grid_id() == "nonuniform:10:-2:9:0.1:10:0.7:-20:30:2", "Wrong grid id created for v2" + +def test_resolve_grid_id_v2(grid): + expected = { + "grid_type" : "nonuniform", + "e_ref" : 10, + "delta_e_min" : -2, + "delta_e_max" : 9, + "delta_rho_min" : 0.1, + "delta_rho_max" : 10, + "width" : 0.7, + "cutoff" : [-20,30], + "n_pix" : 2 + } + assert grid.resolve_grid_id("nonuniform:10:-2:9:0.1:10:0.7:-20:30:2") == expected, "Resolving grid id for v2 failed" + +def test_fails_for_not_created_grids(grid): + with pytest.raises(NotCreatedError): + grid.get_grid_id() + +def test_custom_grid(grid): + with pytest.raises(ValueError): + grid = grid.create(grid_type = "custom", states_discretization = [1,2,3], n_pix = 2) + with pytest.raises(ValueError): + grid = grid.create(grid_type = "custom", energy_discretization = [0,1,2], n_pix = 2) + grid = grid.create(grid_type = "custom", energy_discretization = [0,1,2], states_discretization = [1,2,3], n_pix = 2) + grid2 = grid.create(grid_type = "custom", energy_discretization = [0,1,3], states_discretization = [1,2,3]) + grid3 = grid.create(grid_type = "custom", energy_discretization = [0,1,3], states_discretization = [1,2,3], n_pix = 2) + + assert grid.grid() == [[0, [0.5,1]], [1, [1,2]], [2, [1.5, 3.0]]], "Did not create correct custom grid" + + assert grid2.get_grid_id() != grid3.get_grid_id() + +def test_uniform_grid(grid): + grid = grid.create(grid_type = "uniform", delta_e_min = 1, delta_rho_min = 1, cutoff = [-1,1], n_pix = 4) -def test_grid(): - grid = Grid().create() - assert grid.grid() == list(test_grid_data.values())[0] + expected_uniform_grid = [ + [-1, [0.25,0.5,0.75,1]], + [0, [0.25,0.5,0.75,1]], + [1, [0.25,0.5,0.75,1]] + ] -def test_generate_grid_from_id(): - grid = Grid().create() - grid_ids = Grid().create(grid_id = 'dg_cut:56:-2:7:(-10, 5)') - assert grid.grid() == grid_ids.grid() \ No newline at end of file + assert grid.grid() == expected_uniform_grid, "Create wrong uniform grid" \ No newline at end of file diff --git a/tests/test_functional.py b/tests/test_functional.py index fb0fd7b..6bb8294 100644 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -1,21 +1,11 @@ -from nomad_dos_fingerprints import DOSFingerprint, tanimoto_similarity -import pytest, os, json +from nomad_dos_fingerprints import DOSFingerprint +import os +import json import numpy as np with open(os.path.join(os.path.dirname(__file__), 'fingerprint_generation_test_data.json'), 'r') as test_data_file: test_data = json.load(test_data_file) -def test_fingerprint_values(): - - for fp, mid in test_data['fingerprints']: - raw_data = test_data[mid] - new_fingerprint = DOSFingerprint().calculate(raw_data['dos_energies'], raw_data['dos_values']) - old_fingerprint = DOSFingerprint() - old_fingerprint.bins = json.loads(fp)['bins'] - old_fingerprint.indices = json.loads(fp)['indices'] - old_fingerprint.grid_id = new_fingerprint.grid_id - assert old_fingerprint.indices == new_fingerprint.indices - assert np.isclose(old_fingerprint.get_similarity(new_fingerprint),1, atol=5e-2) def test_materials_similarity(): @@ -23,9 +13,8 @@ def test_materials_similarity(): similarity_matrix = test_data['simat'] mids = [x[1] for x in fingerprints] raw_data = [test_data[mid] for mid in mids] - new_fingerprints = [DOSFingerprint().calculate(entry['dos_energies'], entry['dos_values']) for entry in raw_data] + new_fingerprints = [DOSFingerprint().calculate(entry['dos_energies'], entry['dos_values'], convert_data="ENC") for entry in raw_data] matrix = [] for fp in new_fingerprints: matrix.append(fp.get_similarities(new_fingerprints)) - print(matrix - np.array(similarity_matrix)) - assert np.isclose(similarity_matrix, matrix, atol = 5e-2).all() + assert np.allclose(similarity_matrix, matrix, atol = 1e-2) diff --git a/tests/test_similarity.py b/tests/test_similarity.py index 9f4808b..7861d13 100644 --- a/tests/test_similarity.py +++ b/tests/test_similarity.py @@ -1,25 +1,46 @@ -import pytest, os, json -from bitarray import bitarray -from nomad_dos_fingerprints import tanimoto_similarity, DOSFingerprint, Grid -from nomad_dos_fingerprints.DOSfingerprint import ELECTRON_CHARGE +import os +import json +from nomad_dos_fingerprints import tanimoto_similarity +from nomad_dos_fingerprints.similarity import match_fingerprints +from nomad_dos_fingerprints.DOSfingerprint import ELECTRON_CHARGE, DOSFingerprint + with open(os.path.join(os.path.dirname(__file__), 'fingerprint_generation_test_data.json'), 'r') as test_data_file: test_data = json.load(test_data_file) -def test_tanimoto(): +def test_match_fingerprints(): + + fp1 = DOSFingerprint() + fp2 = DOSFingerprint() + fp1.bins = fp1._compress_binary_fingerprint_string('00000000111111110000000011111111') + fp2.bins = fp1._compress_binary_fingerprint_string('1111111100000000') + grid_id = 'nonuniform:1:1:1:1:1:1:1:1:8' + fp1.grid_id = grid_id + fp2.grid_id = grid_id + fp1.indices = [0,3] + fp2.indices = [1,2] + print(fp1.n_state_bins, fp2.n_state_bins) + + bits1, bits2 = match_fingerprints(fp1, fp2) + + assert len(bits1) == len(bits2) == 16, "Fingerprints do not match" + +def test_tanimoto_v2(): # generate fp-type data and check if this can be realized with binary-strings only fp1 = DOSFingerprint() fp2 = DOSFingerprint() - fp1.bins = bitarray('00000000111111110000000011111111').tobytes().hex() - fp2.bins = bitarray('1111111100000000').tobytes().hex() - grid_id = 'a:8:b' + fp1.bins = fp1._compress_binary_fingerprint_string('00000000111111110000000011111111') + fp2.bins = fp1._compress_binary_fingerprint_string('1111111100000000') + grid_id = 'nonuniform:1:1:1:1:1:1:1:1:8' fp1.grid_id = grid_id fp2.grid_id = grid_id fp1.indices = [0,3] fp2.indices = [1,2] - assert tanimoto_similarity(fp1, fp2) == 1 - assert tanimoto_similarity(fp1, fp1) == 1 - assert tanimoto_similarity(fp2, fp2) == 1 + print(fp1.get_bitarray()) + print(fp2.get_bitarray()) + assert tanimoto_similarity(fp1, fp2) == 1, "Non-identical cut fingerprints" + assert tanimoto_similarity(fp1, fp1) == 1, "Non-identity for Fingerprint v2 NR 1" + assert tanimoto_similarity(fp2, fp2) == 1, "Non-identity for Fingerprint v2 NR 1" def test_matching_of_spectra(): data = test_data["17661:2634879"] @@ -27,7 +48,7 @@ def test_matching_of_spectra(): cut_dos = [] cut_energies = [e for e,d in zip(data['dos_energies'], data['dos_values'][0]) if (e / ELECTRON_CHARGE > -7.3 and e / ELECTRON_CHARGE < 2)] cut_dos = [d for e,d in zip(data['dos_energies'], data['dos_values'][0]) if (e / ELECTRON_CHARGE > -7.3 and e / ELECTRON_CHARGE < 2)] - fp = DOSFingerprint().calculate(data['dos_energies'], data['dos_values']) - cut_fp = DOSFingerprint().calculate(cut_energies, [cut_dos]) + fp = DOSFingerprint().calculate(data['dos_energies'], data['dos_values'], convert_data="enc") + cut_fp = DOSFingerprint().calculate(cut_energies, [cut_dos], convert_data="enc") assert tanimoto_similarity(cut_fp, fp) == tanimoto_similarity(fp, cut_fp) assert 1 - tanimoto_similarity(fp, cut_fp) < 1e-2 \ No newline at end of file diff --git a/timing/profiling_fingerprint_generation.py b/timing/profiling_fingerprint_generation.py new file mode 100644 index 0000000..f9dd5b4 --- /dev/null +++ b/timing/profiling_fingerprint_generation.py @@ -0,0 +1,36 @@ +import cProfile +import pstats +import io +from pstats import SortKey + +import numpy as np + +from nomad_dos_fingerprints import DOSFingerprint, Grid + +x = np.linspace(-15,10, num=int(1e3)) +y = np.sin(x)**2 + 1 + +grid_attrs={'grid_type': 'nonuniform', + 'e_ref': -2.0, + 'delta_e_min': 0.05, + 'delta_e_max': 1.05, + 'delta_rho_min': 0.1, + 'delta_rho_max': 5.5, + 'width': 7.0, + 'cutoff': [-8.0, 12.0], + 'n_pix': 2048} +test_grid = Grid.create(**grid_attrs) + +calc_params={"grid_id":test_grid.get_grid_id(), "convert_data":None} + +pr = cProfile.Profile() +pr.enable() +for _ in range(5): + fp = DOSFingerprint().calculate(x, y, **calc_params) +pr.disable() + +s = io.StringIO() +sortby = SortKey.CUMULATIVE +ps = pstats.Stats(pr, stream=s).sort_stats(sortby) +ps.print_stats() +print(s.getvalue())