diff --git a/examples/linear-model.py b/examples/linear-model.py index 603145e..039d988 100644 --- a/examples/linear-model.py +++ b/examples/linear-model.py @@ -17,7 +17,7 @@ import ase.io import numpy as np from equistore import Labels -from equistore.operations import ones_like, slice, sum_over_samples +from equistore.operations import multiply, ones_like, slice, sum_over_samples from rascaline import SoapPowerSpectrum from equisolve.numpy.models.linear_model import Ridge @@ -32,7 +32,7 @@ # As data set we use the SHIFTML set. You can obtain the dataset used in this # example from our :download:`website<../../static/dataset.xyz>`. # We read the first 20 structures of the data set using -# `ASE `. +# `ASE `_. frames = ase.io.read("dataset.xyz", ":20") @@ -59,11 +59,11 @@ "center_atom_weight": 1.0, "radial_basis": { "Gto": {}, - }, + }, "cutoff_function": { "ShiftedCosine": {"width": 0.5}, - }, -} + }, + } calculator = SoapPowerSpectrum(**HYPER_PARAMETERS) @@ -81,7 +81,8 @@ # We now move all keys into properties to access them for our model. descriptor = descriptor.keys_to_samples("species_center") -descriptor = descriptor.keys_to_properties(["species_neighbor_1", "species_neighbor_2"]) +descriptor = descriptor.keys_to_properties( + ["species_neighbor_1", "species_neighbor_2"]) # %% # @@ -127,17 +128,31 @@ # Construct the model # ------------------- # -# Before we fit the model we have to define our regression values. +# We first initilize the :class:`equisolve.numpy.models.linear_model.Ridge` +# object. A mandatory parameter are the ``parameter_keys`` determining with +# respect to which parameters the regression or fit is +# performed. Here, we choose a regression wrt. to ``"values"`` (energies) and +# ``"positions"`` (forces). + + +clf = Ridge(parameter_keys=["values", "positions"]) + +# %% # -# For this we create a TensorMap containing with the desired regulerizer +# Before we fit a model we have to define our regulerizer values. +# +# For this we create a TensorMap containing the desired regulerizer values. +# Here we chose a regulerizer strength of :math:`1 \cdot 10^-5`. Note that +# without standardizing the features and values the regulerizer strength +# depends on the system and has to be taken carefully and usually optimized. alpha = ones_like(X) -alpha.block().values[:] *= 1e-5 +alpha = multiply(alpha, 1e-5) # %% # # So far ``alpha`` contains the same number of samples as ``X``. However, -# the regulerizer only has to be one sample, because all samples will be +# the regulerizer must only contain a single sample, because all samples will be # regulerized in the same way in a linear model. # # We remove all sample except the 0th one by using the @@ -146,50 +161,52 @@ samples = Labels( names=["structure"], values=np.array([(0,)]), -) + ) alpha = slice(alpha, samples=samples) +print(alpha) + # %% # # In our regulerizer we use the same values for all properties. However, # :class:`equisolve.numpy.models.linear_model.Ridge` can also handle different -# regularization for each property. You can apply a property wise regularization by -# setting ``"values"`` of ``alpha_dict`` with an 1d array of the same length as the -# number of properties in the training data X (here 7200) +# regularization for each property. You can apply a property wise +# regularization by setting ``"values"`` of ``alpha`` with an 1d array of the +# same length as the number of properties in the training data X (here 7200). # -# With a valid regulerizer object we now initilize the Ridge object. -# ``parameter_keys`` determines with respect to which parameters the regression is -# performed. Here, we choose a regression wrt. to ``"values"`` (energies) and -# ``"positions"`` (forces). - +# Next we create a sample weighting :class:`equistiore.TensorMap` that weights +# energies five times more then the forces. -clf = Ridge(parameter_keys=["values", "positions"], alpha=alpha) +sw = ones_like(y) +sw = multiply(sw, 5.0) # %% # -# Next we create a sample weighting :class:`equistiore.TensorMap` that weights energies -# five times more then the forces. +# The function `equisolve.utils.dictionary_to_tensormap` create a +# :class:`equistore.TensorMap` with the same shape as our target data ``y`` but +# with values a defined by ``sw_dict``. -sw = ones_like(y) -sw.block().values[:] *= 5 +print(sw.block()) # %% # -# The function `equisolve.utils.dictionary_to_tensormap` create a -# :class:`equistore.TensorMap` with the same shape as our target data ``y`` but with -# values a defined by ``sw_dict``. +# Finally, we can fit the model using the regulerizer and sample weights as +# defined above. -print(sw) +clf.fit(X, y, alpha=alpha, sample_weight=sw) -# Finally we can fit the model using the sample weights defined above. - -clf.fit(X, y, sample_weight=sw) +# %% +# +# We now predict values and calculate the root mean squre error +# of our model using the ``score`` method. +print(f"RMSE energies = {clf.score(X, y, parameter_key='values')[0]:.3f} eV") +print(f"RMSE forces = {clf.score(X, y, parameter_key='positions')[0]:.3f} eV/Å") -# Finally we can predict values and calculate the root mean squre error -# of our model. +# %% +# +# If you only want to predict values you can use the +# :meth:`equisolve.numpy.models.linear_model.Ridge.predict` method. clf.predict(X) -print(f"RMSE energies = {clf.score(X, y, parameter_key='values')[0]:.3f} eV") -print(f"RMSE forces = {clf.score(X, y, parameter_key='positions')[0]:.3f} eV/Å") diff --git a/src/equisolve/numpy/models/linear_model.py b/src/equisolve/numpy/models/linear_model.py index febc87e..7bd7da0 100644 --- a/src/equisolve/numpy/models/linear_model.py +++ b/src/equisolve/numpy/models/linear_model.py @@ -9,8 +9,8 @@ from typing import List, Optional, Union import numpy as np -from equistore import TensorBlock, TensorMap -from equistore.operations import dot +from equistore import Labels, TensorBlock, TensorMap +from equistore.operations import dot, multiply, ones_like, slice from equistore.operations._utils import _check_blocks, _check_maps from ...utils.metrics import rmse @@ -24,42 +24,26 @@ class Ridge: .. math:: - w = X^T \left( X \cdot X^T + λ I \right)^{-1} \cdot y \,, + w = X^T \left( X \cdot X^T + α I \right)^{-1} \cdot y \,, - where :math:`X` is the training data, :math:`y` the target data and :math:`λ` the + where :math:`X` is the training data, :math:`y` the target data and :math:`α` is the regularization strength. :param parameter_keys: Parameters to perform the regression for. - Examples are ``"values"``, ``"positions"`` or - ``"cell"``. - :param alpha: Constant :math:`λ` that multiplies the L2 term, controlling - regularization strength. Values must be a non-negative floats - i.e. in [0, inf). :math:`λ` can be different for each column in ``X`` - to regulerize each property differently. - :param rcond: Cut-off ratio for small singular values during the fit. For - the purposes of rank determination, singular values are treated as - zero if they are smaller than ``rcond`` times the largest singular - value in "coefficient" matrix. - - :attr coef_: List :class:`equistore.Tensormap` containing the weights of the - provided training data. + Examples are ``"values"``, ``"positions"``, + ``"cell"`` or a combination of these. """ def __init__( self, - parameter_keys: Union[List[str], str], - alpha: TensorMap, - rcond: float = 1e-13, + parameter_keys: Union[List[str], str] = None, ) -> None: if type(parameter_keys) not in (list, tuple, np.ndarray): self.parameter_keys = [parameter_keys] else: self.parameter_keys = parameter_keys - self.alpha = alpha - self.rcond = rcond - - self.coef_ = None + self._weights = None def _validate_data(self, X: TensorMap, y: Optional[TensorMap] = None) -> None: """Validates :class:`equistore.TensorBlock`'s for the usage in models. @@ -89,7 +73,10 @@ def _validate_data(self, X: TensorMap, y: Optional[TensorMap] = None) -> None: ) def _validate_params( - self, X: TensorBlock, sample_weight: Optional[TensorBlock] = None + self, + X: TensorBlock, + alpha: TensorBlock, + sample_weight: Optional[TensorBlock] = None, ) -> None: """Check regulerizer and sample weights have are correct wrt. to``X``. @@ -97,13 +84,13 @@ def _validate_params( :param sample_weight: sample weights """ - _check_maps(X, self.alpha, "_validate_params") + _check_maps(X, alpha, "_validate_params") if sample_weight is not None: _check_maps(X, sample_weight, "_validate_params") for key, X_block in X: - alpha_block = self.alpha.block(key) + alpha_block = alpha.block(key) _check_blocks( X_block, alpha_block, props=["properties"], fname="_validate_params" ) @@ -141,25 +128,49 @@ def _numpy_lstsq_solver(self, X, y, sample_weights, alphas, rcond): return np.linalg.lstsq(X_eff, y_eff, rcond=rcond)[0] def fit( - self, X: TensorMap, y: TensorMap, sample_weight: Optional[TensorMap] = None + self, + X: TensorMap, + y: TensorMap, + alpha: Union[float, TensorMap] = 1.0, + sample_weight: Optional[TensorMap] = None, + rcond: float = 1e-13, ) -> None: """Fit Ridge regression model to each block in X. :param X: training data :param y: target values + :param alpha: Constant α that multiplies the L2 term, controlling + regularization strength. Values must be non-negative floats + i.e. in [0, inf). α can be different for each column in ``X`` + to regulerize each property differently. :param sample_weight: sample weights + :param rcond: Cut-off ratio for small singular values during the fit. For + the purposes of rank determination, singular values are treated as + zero if they are smaller than ``rcond`` times the largest singular + value in "weightsficient" matrix. """ - # If alpha was converted we convert it back here. - if type(self.alpha) == dict: - self.alpha = dict_to_tensor_map(self.alpha) + + if type(alpha) is float: + alpha_tensor = ones_like(X) + + samples = Labels( + names=X.sample_names, + values=np.zeros([1, len(X.sample_names)], dtype=int), + ) + + alpha_tensor = slice(alpha_tensor, samples=samples) + alpha = multiply(alpha_tensor, alpha) + + if type(alpha) is not TensorMap: + raise ValueError("alpha must either be a float or a TensorMap") self._validate_data(X, y) - self._validate_params(X, sample_weight) + self._validate_params(X, alpha, sample_weight) - coef_blocks = [] + weights_blocks = [] for key, X_block in X: y_block = y.block(key) - alpha_block = self.alpha.block(key) + alpha_block = alpha.block(key) # X_arr has shape of (n_targets, n_properties) X_arr = block_to_array(X_block, self.parameter_keys) @@ -181,22 +192,30 @@ def fit( else: sw_arr = np.ones((len(y_arr),)) - w = self._numpy_lstsq_solver(X_arr, y_arr, sw_arr, alpha_arr, self.rcond) + w = self._numpy_lstsq_solver(X_arr, y_arr, sw_arr, alpha_arr, rcond) - coef_block = TensorBlock( + weights_block = TensorBlock( values=w.reshape(1, -1), samples=y_block.properties, components=[], properties=X_block.properties, ) - coef_blocks.append(coef_block) + weights_blocks.append(weights_block) - # Convert alpha to a dictionary to be used in external models. - self.alpha = tensor_map_to_dict(self.alpha) - self.coef_ = TensorMap(X.keys, coef_blocks) + # convert weightsficients to a dictionary allowing pickle dump of an instance + self._weights = tensor_map_to_dict(TensorMap(X.keys, weights_blocks)) return self + @property + def weights(self) -> TensorMap: + """``Tensormap`` containing the weights of the provided training data.""" + + if self._weights is None: + raise ValueError("No weights. Call fit method first.") + + return dict_to_tensor_map(self._weights) + def predict(self, X: TensorMap) -> TensorMap: """ Predict using the linear model. @@ -204,16 +223,13 @@ def predict(self, X: TensorMap) -> TensorMap: :param X: samples :returns: predicted values """ - if self.coef_ is None: - raise ValueError("No weights. Call fit method first.") - - return dot(X, self.coef_) + return dot(X, self.weights) - def score(self, X: TensorMap, y: TensorMap, parameter_key: str) -> List[float]: - """Return the coefficient of determination of the prediction. + def score(self, X: TensorMap, y: TensorMap, parameter_key: str) -> float: + """Return the weightsficient of determination of the prediction. :param X: Test samples - :param y: True values for `X`. + :param y: True values for ``X``. :param parameter_key: Parameter to score for. Examples are ``"values"``, ``"positions"`` or ``"cell"``. diff --git a/tests/numpy/models/test_linear_model.py b/tests/numpy/models/test_linear_model.py index edee944..5cce86d 100644 --- a/tests/numpy/models/test_linear_model.py +++ b/tests/numpy/models/test_linear_model.py @@ -8,7 +8,7 @@ import numpy as np import pytest from equistore import Labels, TensorBlock, TensorMap -from numpy.testing import assert_allclose +from numpy.testing import assert_allclose, assert_equal from equisolve.numpy.models import Ridge from equisolve.numpy.utils import matrix_to_block, tensor_to_tensormap @@ -59,8 +59,10 @@ def to_equistore(self, X_arr=None, y_arr=None, alpha_arr=None, sw_arr=None): def equisolve_solver_from_numpy_arrays(self, X_arr, y_arr, alpha_arr, sw_arr=None): X, y, alpha, sw = self.to_equistore(X_arr, y_arr, alpha_arr, sw_arr) - clf = Ridge(parameter_keys="values", alpha=alpha) - clf.fit(X=X, y=y, sample_weight=sw) + clf = Ridge( + parameter_keys="values", + ) + clf.fit(X=X, y=y, alpha=alpha, sample_weight=sw) return clf def numpy_solver(self, X, y, sample_weights, regularizations): @@ -95,12 +97,12 @@ def test_ridge(self, num_properties, num_targets): alpha = tensor_to_tensormap(alpha_arr) sw = tensor_to_tensormap(sw_arr) - clf = Ridge(parameter_keys="values", alpha=alpha) - clf.fit(X=X, y=y, sample_weight=sw) + clf = Ridge(parameter_keys="values") + clf.fit(X=X, y=y, alpha=alpha, sample_weight=sw) - assert len(clf.coef_) == 2 - assert clf.coef_.block(0).values.shape[1] == num_properties - assert clf.coef_.block(1).values.shape[1] == num_properties + assert len(clf.weights) == 2 + assert clf.weights.block(0).values.shape[1] == num_properties + assert clf.weights.block(1).values.shape[1] == num_properties def test_double_fit_call(self): """Test if regression works properly if fit method is called multiple times. @@ -119,11 +121,11 @@ def test_double_fit_call(self): y = tensor_to_tensormap(y_arr) alpha = tensor_to_tensormap(alpha_arr) - clf = Ridge(parameter_keys="values", alpha=alpha) - clf.fit(X=X, y=y) - clf.fit(X=X, y=y) + clf = Ridge(parameter_keys="values") + clf.fit(X=X, y=y, alpha=alpha) + clf.fit(X=X, y=y, alpha=alpha) - assert len(clf.coef_) == num_blocks + assert len(clf.weights) == num_blocks @pytest.mark.parametrize("num_properties", num_properties) @pytest.mark.parametrize("num_targets", num_targets) @@ -143,7 +145,7 @@ def test_exact_no_regularization(self, num_properties, num_targets, mean): ridge_class = self.equisolve_solver_from_numpy_arrays( X, y, property_w, sample_w ) - w_solver = ridge_class.coef_.block().values[0, :] + w_solver = ridge_class.weights.block().values[0, :] # Check that the two approaches yield the same result assert_allclose(w_solver, w_exact, atol=1e-13, rtol=1e-10) @@ -175,7 +177,7 @@ def test_exact(self, num_properties, num_targets, mean, regularization): ridge_class = self.equisolve_solver_from_numpy_arrays( X, y, property_w, sample_w ) - w_solver = ridge_class.coef_.block().values[0, :] + w_solver = ridge_class.weights.block().values[0, :] w_exact_with_regularization = self.numpy_solver(X, y, sample_w, property_w) # Check that the two approaches yield the same result @@ -198,7 +200,7 @@ def test_predict(self, num_properties, num_targets, mean): ridge_class = self.equisolve_solver_from_numpy_arrays( X, y, property_w, sample_w ) - w_solver = ridge_class.coef_.block().values[0, :] + w_solver = ridge_class.weights.block().values[0, :] # Generate new data X_validation = self.rng.normal(mean, 1, size=(50, num_properties)) @@ -236,7 +238,7 @@ def test_infinite_regularization( ridge_class = self.equisolve_solver_from_numpy_arrays( X, y, property_w, sample_w ) - w_solver = ridge_class.coef_.block().values[0, :] + w_solver = ridge_class.weights.block().values[0, :] w_zeros = np.zeros((num_properties,)) # Check that the two approaches yield the same result @@ -263,11 +265,11 @@ def test_consistent_weights_scaling( ridge_class = self.equisolve_solver_from_numpy_arrays( X, y, property_w, sample_w ) - w_ref = ridge_class.coef_.block().values[0, :] + w_ref = ridge_class.weights.block().values[0, :] ridge_class_scaled = self.equisolve_solver_from_numpy_arrays( X, y, scaling * property_w, scaling * sample_w ) - w_scaled = ridge_class_scaled.coef_.block().values[0, :] + w_scaled = ridge_class_scaled.weights.block().values[0, :] # Check that the two approaches yield the same result assert_allclose(w_scaled, w_ref, atol=1e-15, rtol=1e-8) @@ -293,31 +295,65 @@ def test_consistent_target_scaling( ridge_class = self.equisolve_solver_from_numpy_arrays( X, y, property_w, sample_w ) - w_ref = ridge_class.coef_.block().values[0, :] + w_ref = ridge_class.weights.block().values[0, :] ridge_class_scaled = self.equisolve_solver_from_numpy_arrays( scaling * X, scaling * y, property_w, scaling * sample_w ) - w_scaled = ridge_class_scaled.coef_.block().values[0, :] + w_scaled = ridge_class_scaled.weights.block().values[0, :] # Check that the two approaches yield the same result assert_allclose(w_scaled, w_ref, atol=1e-11, rtol=1e-8) def test_stability(self): """Test numerical stability of the solver.""" + #TODO pass def test_scalar_alpha(self): """Test ridge regression with scalar alpha.""" + #TODO pass def test_vector_alpha(self): """Test ridge regression with a different alpha per property.""" + #TODO pass def test_sample_weights(self): """Test ridge regression with a different value per target property.""" + #TODO pass + def test_alpha_float(self): + """Test float alpha""" + X_arr = self.rng.random([1, 10, 10]) + y_arr = self.rng.random([1, 10, 1]) + alpha_arr = 2 * np.ones([1, 1, 10]) + + X = tensor_to_tensormap(X_arr) + y = tensor_to_tensormap(y_arr) + alpha = tensor_to_tensormap(alpha_arr) + + clf = Ridge(parameter_keys="values") + + weights_arr = clf.fit(X=X, y=y, alpha=alpha).weights + weights_float = clf.fit(X=X, y=y, alpha=2.0).weights + + assert_equal(weights_float.block().values, weights_arr.block().values) + + def test_alpha_wrong_type(self): + """Test error raise if alpha is neither a float nor a TensorMap.""" + X_arr = self.rng.random([1, 10, 10]) + y_arr = self.rng.random([1, 10, 1]) + + X = tensor_to_tensormap(X_arr) + y = tensor_to_tensormap(y_arr) + + clf = Ridge(parameter_keys="values") + + with pytest.raises(ValueError, match="alpha must either be a float or"): + clf.fit(X=X, y=y, alpha="foo") + @pytest.mark.parametrize( "parameter_keys", [("values"), ("values", "positions"), ("positions")] ) @@ -375,11 +411,11 @@ def test_parameter_keys(self, parameter_keys): np.zeros(num_properties).reshape(1, 1, -1), key_name="_" ) - clf = Ridge(parameter_keys=parameter_keys, alpha=alpha) - clf.fit(X=X, y=y) + clf = Ridge(parameter_keys=parameter_keys) + clf.fit(X=X, y=y, alpha=alpha) assert_allclose( - clf.coef_.block().values, w_exact.reshape(1, -1), atol=1e-15, rtol=1e-8 + clf.weights.block().values, w_exact.reshape(1, -1), atol=1e-15, rtol=1e-8 ) # Test prediction @@ -426,7 +462,7 @@ def test_error_components(self, components): ) y = TensorMap(Labels.single(), [y_block]) - clf = Ridge(parameter_keys="values", alpha=None) + clf = Ridge(parameter_keys="values") with pytest.raises(ValueError, match="contains components"): clf.fit(X=X, y=y) @@ -448,9 +484,9 @@ def test_error_different_blocks(self, n_blocks): alpha = tensor_to_tensormap(alpha_arr) sw = tensor_to_tensormap(sw_arr) - clf = Ridge(parameter_keys="values", alpha=alpha) + clf = Ridge(parameter_keys="values") with pytest.raises(ValueError, match="number of blocks"): - clf.fit(X=X, y=y, sample_weight=sw) + clf.fit(X=X, y=y, alpha=alpha, sample_weight=sw) def test_error_properties(self): """Test error raise for non matching number of properties in X & alpha""" @@ -460,9 +496,9 @@ def test_error_properties(self): alpha_arr=np.ones(self.num_properties[0] + 1), ) - clf = Ridge(parameter_keys="values", alpha=alpha) + clf = Ridge(parameter_keys="values") with pytest.raises(ValueError, match="properties"): - clf.fit(X=X, y=y) + clf.fit(X=X, y=y, alpha=alpha) @pytest.mark.parametrize("extra_samples", [[0, 1], [1, 0]]) def test_error_samples(self, extra_samples): @@ -475,9 +511,9 @@ def test_error_samples(self, extra_samples): sw_arr=np.ones(self.num_targets[0] + extra_samples[1]), ) - clf = Ridge(parameter_keys="values", alpha=alpha) + clf = Ridge(parameter_keys="values") with pytest.raises(ValueError, match="samples"): - clf.fit(X=X, y=y, sample_weight=sw) + clf.fit(X=X, y=y, alpha=alpha, sample_weight=sw) @pytest.mark.parametrize("shapes", [(2, 1, 1), (1, 2, 1), (1, 1, 2)]) def test_error_shape(self, shapes): @@ -493,12 +529,12 @@ def test_error_shape(self, shapes): alpha = tensor_to_tensormap(alpha_arr) sw = tensor_to_tensormap(sw_arr) - clf = Ridge(parameter_keys="values", alpha=alpha) + clf = Ridge(parameter_keys="values") with pytest.raises(ValueError, match="Only one"): - clf.fit(X=X, y=y, sample_weight=sw) + clf.fit(X=X, y=y, alpha=alpha, sample_weight=sw) def test_error_no_weights(self): """Test error raise if fit method was not called.""" - clf = Ridge(parameter_keys="values", alpha=None) + clf = Ridge(parameter_keys="values") with pytest.raises(ValueError, match="No weights"): clf.predict(1)