diff --git a/bico/_core.cpp b/bico/_core.cpp index b119d33..afcfdc9 100644 --- a/bico/_core.cpp +++ b/bico/_core.cpp @@ -1,23 +1,12 @@ #include -#include -#include -#include -#include -#include -#include - #include "point/l2metric.h" #include "point/squaredl2metric.h" #include "point/point.h" #include "point/pointweightmodifier.h" #include "clustering/bico.h" -#include "misc/randomness.h" -#include "misc/randomgenerator.h" #include "datastructure/proxysolution.h" #include "point/pointcentroid.h" -#include "point/pointweightmodifier.h" -#include "point/realspaceprovider.h" typedef unsigned int uint; diff --git a/bico/clustering/bico.h b/bico/clustering/bico.h index e7992e1..f931e92 100644 --- a/bico/clustering/bico.h +++ b/bico/clustering/bico.h @@ -18,7 +18,6 @@ #include "../datastructure/proxysolution.h" #include "../evaluation/kmeansevaluator.h" #include "../exception/invalidruntimeconfigurationexception.h" -#include "../misc/randomness.h" namespace CluE { @@ -541,8 +540,7 @@ minDist(std::numeric_limits::infinity()), pairwise_different(0), numOfRebuilds(0) { - Randomness::initialize(seed); - RandomGenerator rg = Randomness::getRandomGenerator(); + std::mt19937 rg(seed); std::vector rndpoint(dimension); rndprojections.resize(L); bucket_radius.resize(L); diff --git a/bico/core.py b/bico/core.py index 5184544..66ec518 100644 --- a/bico/core.py +++ b/bico/core.py @@ -64,8 +64,14 @@ def __init__( @property def labels_(self) -> np.ndarray: - if not hasattr(self, "_labels"): + if not hasattr(self, "_cluster_centers"): raise NotFittedError(self._CORESET_ESTIMATOR_ERROR) + elif not hasattr(self, "_labels"): + raise ValueError( + "The labels have not been computed because the coreset " + "was fit using partial_fit. " + "Please call predict on your data to obtain the labels." + ) return self._labels @property @@ -114,6 +120,7 @@ def partial_fit( def _fit_coreset( self, + X: Optional[np.ndarray] = None, ) -> None: if self.coreset_estimator is None: from sklearn.cluster import KMeans @@ -127,10 +134,13 @@ def _fit_coreset( self._coreset_points, sample_weight=self._coreset_weights ) self._cluster_centers: np.ndarray = self.coreset_estimator.cluster_centers_ - self._labels: np.ndarray = self.coreset_estimator.labels_ + if X is not None: + self._labels: np.ndarray = self.coreset_estimator.predict(X) self._inertia: float = self.coreset_estimator.inertia_ - def _compute_coreset(self, fit_coreset: bool = False) -> "BICO": + def _compute_coreset( + self, X: Optional[np.ndarray] = None, fit_coreset: bool = False + ) -> "BICO": if not hasattr(self, "bico_obj_"): raise NotFittedError( "This BICO instance is not fitted yet. " "Call `fit` or `partial_fit`." @@ -152,7 +162,7 @@ def _compute_coreset(self, fit_coreset: bool = False) -> "BICO": self._n_features_out = n_found_points if self.fit_coreset or fit_coreset: - self._fit_coreset() + self._fit_coreset(X) return self @@ -188,7 +198,9 @@ def _fit( _DLL.addData(self.bico_obj_, c_array, c_n) if not partial or fit_coreset: - self._compute_coreset(fit_coreset) + self._compute_coreset( + X=_X if not partial else None, fit_coreset=fit_coreset + ) return self @@ -204,9 +216,9 @@ def fit_predict( return self.labels_ def predict(self, X: Sequence[Sequence[float]]) -> Any: - self._fit_coreset() - if self.coreset_estimator is None: raise NotFittedError(self._CORESET_ESTIMATOR_ERROR) + self._fit_coreset() + return self.coreset_estimator.predict(X) diff --git a/bico/misc/randomgenerator.h b/bico/misc/randomgenerator.h deleted file mode 100644 index 1b12cc5..0000000 --- a/bico/misc/randomgenerator.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef RANDOMGENERATOR_H -#define RANDOMGENERATOR_H - -/** - * @brief Encapsulates an STL random generator. - */ -class RandomGenerator -{ -private: - std::mt19937 * generator; -public: - typedef decltype((*generator)()) result_type; - - RandomGenerator(std::mt19937 * generator) : - generator(generator) - { - } - - result_type operator()() - { - return (*generator)(); - } - - result_type min() - { - return generator->min(); - } - - result_type max() - { - return generator->max(); - } -}; - -#endif diff --git a/bico/misc/randomness.cpp b/bico/misc/randomness.cpp deleted file mode 100644 index dc18ae9..0000000 --- a/bico/misc/randomness.cpp +++ /dev/null @@ -1,5 +0,0 @@ -#include "../misc/randomness.h" - -using namespace CluE; - -std::mt19937 Randomness::mt19937Generator(static_cast(time(0))); diff --git a/bico/misc/randomness.h b/bico/misc/randomness.h deleted file mode 100644 index 2ce34cd..0000000 --- a/bico/misc/randomness.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef RANDOMNESS_H -#define RANDOMNESS_H - -#include -#include - -#include "randomgenerator.h" - -namespace CluE -{ -/** - * @brief Random number generator. - * - * @ingroup helper_classes - */ -class Randomness -{ -private: - // TODO Use mt19937_64 ? - static std::mt19937 mt19937Generator; - -public: - static RandomGenerator getRandomGenerator() - { - return RandomGenerator(&mt19937Generator); - } - - static void initialize(uint_fast32_t seed) - { - mt19937Generator = std::mt19937(seed); - } -}; - -} - -#endif diff --git a/build_extension.py b/build_extension.py index 6663f8f..362c610 100644 --- a/build_extension.py +++ b/build_extension.py @@ -8,7 +8,6 @@ name="bico._core", sources=[ "bico/_core.cpp", - "bico/misc/randomness.cpp", "bico/point/pointcentroid.cpp", "bico/point/squaredl2metric.cpp", "bico/point/point.cpp",