Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compute labels for fit #5

Merged
merged 5 commits into from
Jul 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 0 additions & 11 deletions bico/_core.cpp
Original file line number Diff line number Diff line change
@@ -1,23 +1,12 @@
#include <Python.h>

#include <iostream>
#include <sstream>
#include <fstream>
#include <random>
#include <ctime>
#include <time.h>

#include "point/l2metric.h"
#include "point/squaredl2metric.h"
#include "point/point.h"
#include "point/pointweightmodifier.h"
#include "clustering/bico.h"
#include "misc/randomness.h"
#include "misc/randomgenerator.h"
#include "datastructure/proxysolution.h"
#include "point/pointcentroid.h"
#include "point/pointweightmodifier.h"
#include "point/realspaceprovider.h"

typedef unsigned int uint;

Expand Down
4 changes: 1 addition & 3 deletions bico/clustering/bico.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
#include "../datastructure/proxysolution.h"
#include "../evaluation/kmeansevaluator.h"
#include "../exception/invalidruntimeconfigurationexception.h"
#include "../misc/randomness.h"

namespace CluE
{
Expand Down Expand Up @@ -541,8 +540,7 @@ minDist(std::numeric_limits<double>::infinity()),
pairwise_different(0),
numOfRebuilds(0)
{
Randomness::initialize(seed);
RandomGenerator rg = Randomness::getRandomGenerator();
std::mt19937 rg(seed);
std::vector<double> rndpoint(dimension);
rndprojections.resize(L);
bucket_radius.resize(L);
Expand Down
26 changes: 19 additions & 7 deletions bico/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,14 @@ def __init__(

@property
def labels_(self) -> np.ndarray:
if not hasattr(self, "_labels"):
if not hasattr(self, "_cluster_centers"):
raise NotFittedError(self._CORESET_ESTIMATOR_ERROR)
elif not hasattr(self, "_labels"):
raise ValueError(
"The labels have not been computed because the coreset "
"was fit using partial_fit. "
"Please call predict on your data to obtain the labels."
)
return self._labels

@property
Expand Down Expand Up @@ -114,6 +120,7 @@ def partial_fit(

def _fit_coreset(
self,
X: Optional[np.ndarray] = None,
) -> None:
if self.coreset_estimator is None:
from sklearn.cluster import KMeans
Expand All @@ -127,10 +134,13 @@ def _fit_coreset(
self._coreset_points, sample_weight=self._coreset_weights
)
self._cluster_centers: np.ndarray = self.coreset_estimator.cluster_centers_
self._labels: np.ndarray = self.coreset_estimator.labels_
if X is not None:
self._labels: np.ndarray = self.coreset_estimator.predict(X)
self._inertia: float = self.coreset_estimator.inertia_

def _compute_coreset(self, fit_coreset: bool = False) -> "BICO":
def _compute_coreset(
self, X: Optional[np.ndarray] = None, fit_coreset: bool = False
) -> "BICO":
if not hasattr(self, "bico_obj_"):
raise NotFittedError(
"This BICO instance is not fitted yet. " "Call `fit` or `partial_fit`."
Expand All @@ -152,7 +162,7 @@ def _compute_coreset(self, fit_coreset: bool = False) -> "BICO":
self._n_features_out = n_found_points

if self.fit_coreset or fit_coreset:
self._fit_coreset()
self._fit_coreset(X)

return self

Expand Down Expand Up @@ -188,7 +198,9 @@ def _fit(
_DLL.addData(self.bico_obj_, c_array, c_n)

if not partial or fit_coreset:
self._compute_coreset(fit_coreset)
self._compute_coreset(
X=_X if not partial else None, fit_coreset=fit_coreset
)

return self

Expand All @@ -204,9 +216,9 @@ def fit_predict(
return self.labels_

def predict(self, X: Sequence[Sequence[float]]) -> Any:
self._fit_coreset()

if self.coreset_estimator is None:
raise NotFittedError(self._CORESET_ESTIMATOR_ERROR)

self._fit_coreset()

return self.coreset_estimator.predict(X)
35 changes: 0 additions & 35 deletions bico/misc/randomgenerator.h

This file was deleted.

5 changes: 0 additions & 5 deletions bico/misc/randomness.cpp

This file was deleted.

36 changes: 0 additions & 36 deletions bico/misc/randomness.h

This file was deleted.

1 change: 0 additions & 1 deletion build_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
name="bico._core",
sources=[
"bico/_core.cpp",
"bico/misc/randomness.cpp",
"bico/point/pointcentroid.cpp",
"bico/point/squaredl2metric.cpp",
"bico/point/point.cpp",
Expand Down
Loading