From 5b7cfe63dcb883fcec9a226639a5630f4cc99174 Mon Sep 17 00:00:00 2001 From: Ramana-Raja Date: Sun, 2 Mar 2025 19:11:14 +0530 Subject: [PATCH] updated code in attempt to fix pca problem --- aeon/clustering/feature_based/_r_cluster.py | 23 +++++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py index bde467cc31..246c436422 100644 --- a/aeon/clustering/feature_based/_r_cluster.py +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -418,19 +418,19 @@ def _fit(self, X, y=None): pca = PCA().fit(X_std) optimal_dimensions = np.argmax(pca.explained_variance_ratio_ < 0.01) - self.__optimal_dimensions = max( + self.optimal_dimensions = max( 1, min(optimal_dimensions, X_std.shape[0], X_std.shape[1]) ) pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) transformed_data_pca = pca.fit_transform(X_std) - self.__estimator = KMeans( + self.estimator = KMeans( n_clusters=self.n_clusters, random_state=self.random_state, n_init=self.n_init, ) - self.__estimator.fit(transformed_data_pca) - self.labels_ = self.__estimator.labels_ + self.estimator.fit(transformed_data_pca) + self.labels_ = self.estimator.labels_ def _predict(self, X, y=None) -> np.ndarray: parameters = self._get_parameterised_data(X) @@ -438,10 +438,15 @@ def _predict(self, X, y=None) -> np.ndarray: transformed_data = self._get_transformed_data(X=X, parameters=parameters) X_std = self.scaler.fit_transform(transformed_data) - pca = PCA(n_components=self.__optimal_dimensions, random_state=self.random_state) + if (self.optimal_dimensions > max(1, min(X_std.shape[0], X_std.shape[1]))): + raise ValueError( + f"optimal dimensions={self.optimal_dimensions} must be between 0 and " + f"min(n_samples, n_features)={min(X_std.shape[0], X_std.shape[1])}" + ) + pca = PCA(n_components=self.optimal_dimensions, random_state=self.random_state) transformed_data_pca = pca.fit_transform(X_std) - return self.__estimator.predict(transformed_data_pca) + return self.estimator.predict(transformed_data_pca) def _fit_predict(self, X, y=None) -> np.ndarray: parameters = self._get_parameterised_data(X) @@ -457,13 +462,13 @@ def _fit_predict(self, X, y=None) -> np.ndarray: ) pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) transformed_data_pca = pca.fit_transform(X_std) - self.__estimator = KMeans( + self.estimator = KMeans( n_clusters=self.n_clusters, random_state=self.random_state, n_init=self.n_init, ) - Y = self.__estimator.fit_predict(transformed_data_pca) - self.labels_ = self.__estimator.labels_ + Y = self.estimator.fit_predict(transformed_data_pca) + self.labels_ = self.estimator.labels_ return Y @classmethod