From 172961bb134113526a7af8c2f1f8f712943a0fa6 Mon Sep 17 00:00:00 2001 From: Niclas Rieger Date: Fri, 13 Sep 2024 15:26:47 +0200 Subject: [PATCH 1/3] perf(rotator): reduce disk space of saved Rotator models --- .../models/cross/test_hilbert_mca_rotator.py | 2 +- tests/models/cross/test_mca_rotator.py | 2 +- xeofs/cross/cpcca_rotator.py | 46 +++++++++---------- xeofs/cross/mca_rotator.py | 3 -- 4 files changed, 25 insertions(+), 28 deletions(-) diff --git a/tests/models/cross/test_hilbert_mca_rotator.py b/tests/models/cross/test_hilbert_mca_rotator.py index 2172028..c1e4752 100644 --- a/tests/models/cross/test_hilbert_mca_rotator.py +++ b/tests/models/cross/test_hilbert_mca_rotator.py @@ -40,7 +40,7 @@ def test_fit(mca_model): mca_rotator = HilbertMCARotator(n_modes=2) mca_rotator.fit(mca_model) - assert hasattr(mca_rotator, "model") + assert hasattr(mca_rotator, "model_data") assert hasattr(mca_rotator, "data") diff --git a/tests/models/cross/test_mca_rotator.py b/tests/models/cross/test_mca_rotator.py index f467d33..9c86608 100644 --- a/tests/models/cross/test_mca_rotator.py +++ b/tests/models/cross/test_mca_rotator.py @@ -43,7 +43,7 @@ def test_fit(mca_model): mca_rotator = MCARotator(n_modes=4) mca_rotator.fit(mca_model) - assert hasattr(mca_rotator, "model") + assert hasattr(mca_rotator, "model_data") assert hasattr(mca_rotator, "data") diff --git a/xeofs/cross/cpcca_rotator.py b/xeofs/cross/cpcca_rotator.py index 1e2fd0d..218759c 100644 --- a/xeofs/cross/cpcca_rotator.py +++ b/xeofs/cross/cpcca_rotator.py @@ -98,35 +98,34 @@ def __init__( self.whitener1 = Whitener() self.whitener2 = Whitener() self.data = DataContainer() - self.model = CPCCA() + self.model_data = DataContainer() self.sorted = False def get_serialization_attrs(self) -> dict: return dict( data=self.data, + model_data=self.model_data, preprocessor1=self.preprocessor1, preprocessor2=self.preprocessor2, whitener1=self.whitener1, whitener2=self.whitener2, - model=self.model, sorted=self.sorted, sample_name=self.sample_name, feature_name=self.feature_name, ) def _fit_algorithm(self, model) -> Self: - self.model = model self.preprocessor1 = model.preprocessor1 self.preprocessor2 = model.preprocessor2 self.whitener1 = model.whitener1 self.whitener2 = model.whitener2 - self.sample_name = self.model.sample_name - self.feature_name = self.model.feature_name + self.sample_name = model.sample_name + self.feature_name = model.feature_name self.sorted = False common_feature_dim = "common_feature_dim" - feature_name = self._get_feature_name() + feature_name = model.feature_name n_modes = self._params["n_modes"] power = self._params["power"] @@ -145,12 +144,12 @@ def _fit_algorithm(self, model) -> Self: # fraction" which is conserved under rotation, but does not have a clear # interpretation as the term covariance fraction is only correct when # both data sets X and Y are equal and MCA reduces to PCA. - svalues = self.model.data["singular_values"].sel(mode=slice(1, n_modes)) + svalues = model.data["singular_values"].sel(mode=slice(1, n_modes)) scaling = np.sqrt(svalues) # Get unrotated singular vectors - Qx = self.model.data["components1"].sel(mode=slice(1, n_modes)) - Qy = self.model.data["components2"].sel(mode=slice(1, n_modes)) + Qx = model.data["components1"].sel(mode=slice(1, n_modes)) + Qy = model.data["components2"].sel(mode=slice(1, n_modes)) # Unwhiten and back-transform into physical space Qx = self.whitener1.inverse_transform_components(Qx) @@ -233,8 +232,8 @@ def _fit_algorithm(self, model) -> Self: idx_modes_sorted.coords.update(squared_covariance.coords) # Rotate scores using rotation matrix - scores1 = self.model.data["scores1"].sel(mode=slice(1, n_modes)) - scores2 = self.model.data["scores2"].sel(mode=slice(1, n_modes)) + scores1 = model.data["scores1"].sel(mode=slice(1, n_modes)) + scores2 = model.data["scores2"].sel(mode=slice(1, n_modes)) scores1 = self.whitener1.inverse_transform_scores(scores1) scores2 = self.whitener2.inverse_transform_scores(scores2) @@ -260,12 +259,18 @@ def _fit_algorithm(self, model) -> Self: scores1_rot = scores1_rot * modes_sign scores2_rot = scores2_rot * modes_sign - # Create data container + # Create data container for Rotator and original model data + self.model_data.add(name="singular_values", data=model.data["singular_values"]) + self.model_data.add(name="components1", data=model.data["components1"]) + self.model_data.add(name="components2", data=model.data["components2"]) + + # Assigning input data to the Rotator object allows us to inherit some functionalities from the original model + # like squared_covariance_fraction(), homogeneous_patterns() etc. self.data.add( - name="input_data1", data=self.model.data["input_data1"], allow_compute=False + name="input_data1", data=model.data["input_data1"], allow_compute=False ) self.data.add( - name="input_data2", data=self.model.data["input_data2"], allow_compute=False + name="input_data2", data=model.data["input_data2"], allow_compute=False ) self.data.add(name="components1", data=Qx_rot) self.data.add(name="components2", data=Qy_rot) @@ -274,7 +279,7 @@ def _fit_algorithm(self, model) -> Self: self.data.add(name="squared_covariance", data=squared_covariance) self.data.add( name="total_squared_covariance", - data=self.model.data["total_squared_covariance"], + data=model.data["total_squared_covariance"], ) self.data.add(name="idx_modes_sorted", data=idx_modes_sorted) @@ -337,14 +342,14 @@ def transform( ) RinvT = RinvT.rename({"mode_n": "mode"}) - scaling = self.model.data["singular_values"].sel(mode=slice(1, n_modes)) + scaling = self.model_data["singular_values"].sel(mode=slice(1, n_modes)) scaling = np.sqrt(scaling) results = [] if X is not None: # Select the (non-rotated) singular vectors of the first dataset - comps1 = self.model.data["components1"].sel(mode=slice(1, n_modes)) + comps1 = self.model_data["components1"].sel(mode=slice(1, n_modes)) # Preprocess the data comps1 = self.whitener1.inverse_transform_components(comps1) @@ -374,7 +379,7 @@ def transform( if Y is not None: # Select the (non-rotated) singular vectors of the second dataset - comps2 = self.model.data["components2"].sel(mode=slice(1, n_modes)) + comps2 = self.model_data["components2"].sel(mode=slice(1, n_modes)) # Preprocess the data comps2 = self.whitener2.inverse_transform_components(comps2) @@ -451,9 +456,6 @@ def _compute_rot_mat_inv_trans(self, rotation_matrix, input_dims) -> xr.DataArra rotation_matrix = rotation_matrix.conj().transpose(*input_dims) return rotation_matrix - def _get_feature_name(self): - return self.model.feature_name - class ComplexCPCCARotator(CPCCARotator, ComplexCPCCA): """Rotate a solution obtained from ``xe.cross.ComplexCPCCA``. @@ -517,7 +519,6 @@ class ComplexCPCCARotator(CPCCARotator, ComplexCPCCA): def __init__(self, **kwargs): CPCCARotator.__init__(self, **kwargs) self.attrs.update({"model": "Rotated Complex CPCCA"}) - self.model = ComplexCPCCA() class HilbertCPCCARotator(ComplexCPCCARotator, HilbertCPCCA): @@ -582,7 +583,6 @@ class HilbertCPCCARotator(ComplexCPCCARotator, HilbertCPCCA): def __init__(self, **kwargs): ComplexCPCCARotator.__init__(self, **kwargs) self.attrs.update({"model": "Rotated Hilbert CPCCA"}) - self.model = HilbertCPCCA() def transform( self, X: DataObject | None = None, Y: DataObject | None = None, normalized=False diff --git a/xeofs/cross/mca_rotator.py b/xeofs/cross/mca_rotator.py index a47146c..be2127a 100644 --- a/xeofs/cross/mca_rotator.py +++ b/xeofs/cross/mca_rotator.py @@ -72,7 +72,6 @@ def __init__( # Define analysis-relevant meta data self.attrs.update({"model": "Rotated MCA"}) - self.model = MCA() class ComplexMCARotator(ComplexCPCCARotator, ComplexMCA): @@ -149,7 +148,6 @@ def __init__( compute=compute, ) self.attrs.update({"model": "Rotated Complex MCA"}) - self.model = ComplexMCA() class HilbertMCARotator(HilbertCPCCARotator, HilbertMCA): @@ -226,4 +224,3 @@ def __init__( compute=compute, ) self.attrs.update({"model": "Rotated Hilbert MCA"}) - self.model = HilbertMCA() From af1ca8f917b1938e704be44aa75786cfec827e44 Mon Sep 17 00:00:00 2001 From: Niclas Rieger Date: Fri, 13 Sep 2024 15:52:11 +0200 Subject: [PATCH 2/3] perf(single): avoid redundant data when saving Rotator models --- tests/models/single/test_eof_rotator.py | 6 +++--- xeofs/single/eof_rotator.py | 17 ++++++++++------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/tests/models/single/test_eof_rotator.py b/tests/models/single/test_eof_rotator.py index b8e8424..22c426e 100644 --- a/tests/models/single/test_eof_rotator.py +++ b/tests/models/single/test_eof_rotator.py @@ -45,12 +45,12 @@ def test_fit(eof_model): eof_rotator.fit(eof_model) assert hasattr( - eof_rotator, "model" - ), 'The attribute "model" should be populated after fitting.' + eof_rotator, "model_data" + ), 'The attribute "model_data" should be populated after fitting.' assert hasattr( eof_rotator, "data" ), 'The attribute "data" should be populated after fitting.' - assert isinstance(eof_rotator.model, EOF) + assert isinstance(eof_rotator.model_data, DataContainer) assert isinstance(eof_rotator.data, DataContainer) diff --git a/xeofs/single/eof_rotator.py b/xeofs/single/eof_rotator.py index b321189..4cc6f38 100644 --- a/xeofs/single/eof_rotator.py +++ b/xeofs/single/eof_rotator.py @@ -88,7 +88,7 @@ def __init__( # Attach empty objects self.preprocessor = Preprocessor() self.data = DataContainer() - self.model = EOF() + self.model_data = DataContainer() self.sorted = False @@ -96,7 +96,7 @@ def get_serialization_attrs(self) -> dict: return dict( data=self.data, preprocessor=self.preprocessor, - model=self.model, + model_data=self.model_data, sorted=self.sorted, ) @@ -117,7 +117,6 @@ def fit(self, model) -> Self: return self def _fit_algorithm(self, model) -> Self: - self.model = model self.preprocessor = model.preprocessor self.sample_name = model.sample_name self.feature_name = model.feature_name @@ -189,6 +188,10 @@ def _fit_algorithm(self, model) -> Self: scores = scores * modes_sign # Store the results + self.model_data.add(model.data["norms"], "singular_values") + self.model_data.add(model.data["components"], "components") + + # Assigning input data to the Rotator object allows us to inherit some functionalities from the original model self.data.add(model.data["input_data"], "input_data", allow_compute=False) self.data.add(rot_components, "components") self.data.add(scores, "scores") @@ -224,10 +227,12 @@ def _sort_by_variance(self): def _transform_algorithm(self, X: DataArray) -> DataArray: n_modes = self._params["n_modes"] - svals = self.model.singular_values().sel(mode=slice(1, self._params["n_modes"])) + svals = self.model_data["singular_values"].sel( + mode=slice(1, self._params["n_modes"]) + ) pseudo_norms = self.data["norms"] # Select the (non-rotated) singular vectors of the first dataset - components = self.model.data["components"].sel(mode=slice(1, n_modes)) + components = self.model_data["components"].sel(mode=slice(1, n_modes)) # Compute non-rotated scores by projecting the data onto non-rotated components projections = xr.dot(X, components) / svals @@ -329,7 +334,6 @@ def __init__( n_modes=n_modes, power=power, max_iter=max_iter, rtol=rtol, compute=compute ) self.attrs.update({"model": "Rotated Complex EOF analysis"}) - self.model = ComplexEOF() class HilbertEOFRotator(EOFRotator, HilbertEOF): @@ -385,7 +389,6 @@ def __init__( n_modes=n_modes, power=power, max_iter=max_iter, rtol=rtol, compute=compute ) self.attrs.update({"model": "Rotated Hilbert EOF analysis"}) - self.model = HilbertEOF() def _transform_algorithm(self, data: DataArray) -> DataArray: # Here we leverage the Method Resolution Order (MRO) to invoke the From cce4e35dac28e5029bb87ec32d1cab32ec2df8da Mon Sep 17 00:00:00 2001 From: Niclas Rieger Date: Fri, 13 Sep 2024 15:59:22 +0200 Subject: [PATCH 3/3] test: update test to match new Rotator structure --- tests/models/single/test_hilbert_eof_rotator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/models/single/test_hilbert_eof_rotator.py b/tests/models/single/test_hilbert_eof_rotator.py index 9fc3bd3..b90960c 100644 --- a/tests/models/single/test_hilbert_eof_rotator.py +++ b/tests/models/single/test_hilbert_eof_rotator.py @@ -42,12 +42,12 @@ def test_fit(ceof_model): ceof_rotator.fit(ceof_model) assert hasattr( - ceof_rotator, "model" - ), 'The attribute "model" should be populated after fitting.' + ceof_rotator, "model_data" + ), 'The attribute "model_data" should be populated after fitting.' assert hasattr( ceof_rotator, "data" ), 'The attribute "data" should be populated after fitting.' - assert isinstance(ceof_rotator.model, HilbertEOF) + assert isinstance(ceof_rotator.model_data, DataContainer) assert isinstance(ceof_rotator.data, DataContainer)