diff --git a/src/ensemble/__pycache__/boosting.cpython-310.pyc b/src/ensemble/__pycache__/boosting.cpython-310.pyc index 526d2d1..1a2bf45 100644 Binary files a/src/ensemble/__pycache__/boosting.cpython-310.pyc and b/src/ensemble/__pycache__/boosting.cpython-310.pyc differ diff --git a/src/ensemble/boosting.py b/src/ensemble/boosting.py index c7abdc8..d060b68 100644 --- a/src/ensemble/boosting.py +++ b/src/ensemble/boosting.py @@ -1,4 +1,5 @@ from abc import abstractmethod + import numpy as np from src.base import Model @@ -8,7 +9,7 @@ class _GradientBoosting(Model): def __init__( self, - learning_rate: float = 1e-3, + learning_rate: float = 1e-2, n_steps: int = 100, max_depth: int = 3, min_samples_split: int = 2, @@ -77,19 +78,21 @@ def _calculate_loss_gradient(self, y: np.ndarray, predictions: np.ndarray) -> np """ pass + @abstractmethod + def _calculate_predictions(self, x: np.ndarray) -> np.ndarray: + """ + Calculate predictions for input data. + :param x: Input data. + :return: Predictions. + """ + def predict(self, x: np.ndarray) -> np.ndarray: """ Predict target feature using pretrained boosting trees. :param x: Test data. :return: Test predictions. """ - n_samples, _ = x.shape - - predictions = np.ones(shape=(n_samples,)) * self.constant_prediction - - for tree in self.trees: - predictions = predictions + self.learning_rate * tree.predict(x) - + predictions = self._calculate_predictions(x) return predictions @@ -108,11 +111,75 @@ def _calculate_initial_prediction(self, y: np.ndarray) -> np.ndarray: def _calculate_loss_gradient(self, y: np.ndarray, predictions: np.ndarray) -> np.ndarray: """ - Find mean value for the targets. + Calculate gradient of mean-squared error loss. + :param predictions: Target predictions. + :param y: Targets. + :return: Gradient of loss function with respect to predictions. + """ + return y - predictions + + def _calculate_predictions(self, x: np.ndarray) -> np.ndarray: + n_samples, _ = x.shape + + predictions = np.ones(n_samples) * self.constant_prediction + for tree in self.trees: + predictions = predictions + self.learning_rate * tree.predict(x) + + return predictions + + +class GradientBoostingClassifier(_GradientBoosting): + """ + Gradient Boosting for the classification. + Uses cross-entropy as loss. + """ + + def _calculate_initial_prediction(self, y: np.ndarray) -> np.ndarray: + """ + Find natural logarithm of odds. :param y: Targets. :return: Initial predictions. """ - return predictions - y + return np.zeros_like(y, dtype=np.float64) + + def _calculate_loss_gradient(self, y: np.ndarray, predictions: np.ndarray) -> np.ndarray: + """ + Calculate cross-entropy gradient. + :param y: Targets. + :return: Gradient of loss function with respect to predictions. + """ + return y - GradientBoostingClassifier.sigmoid(predictions) + @staticmethod + def sigmoid(x: np.ndarray) -> np.ndarray: + """ + Makes input values to be in (0, 1) range. + :param x: Input array. + :return: Output array of the same shape as an input array. + """ + return 1 / (1 + np.exp(-x)) + + def _calculate_predictions(self, x: np.ndarray) -> np.ndarray: + """ + Calculate targets using prediction probability. + :param x: Input array. + :return: Predictions. + """ + predictions_proba = self.predict_proba(x) + predictions = np.where(predictions_proba >= 0.5, 1, 0) + return predictions + + def predict_proba(self, x): + """ + Predict label using sigmoid function. + :param x: Input array. + :return: Predictions. + """ + n_samples, _ = x.shape + + predictions = np.ones(n_samples) * self.constant_prediction + for tree in self.trees: + predictions = predictions + self.learning_rate * tree.predict(x) + return GradientBoostingClassifier.sigmoid(predictions) diff --git a/tests/base/__pycache__/config.cpython-310.pyc b/tests/base/__pycache__/config.cpython-310.pyc index ea5d843..cd81cb2 100644 Binary files a/tests/base/__pycache__/config.cpython-310.pyc and b/tests/base/__pycache__/config.cpython-310.pyc differ diff --git a/tests/base/config.py b/tests/base/config.py index 9648eba..01ec3b8 100644 --- a/tests/base/config.py +++ b/tests/base/config.py @@ -55,6 +55,7 @@ def check_fit_predict(model, x: np.ndarray, y: np.ndarray): # Fit the model on the mock dataset model.fit(x, y) preds = model.predict(x) + print(preds) assert isinstance(preds, np.ndarray) assert preds.shape == y.shape diff --git a/tests/ensemble/__pycache__/test_boosting.cpython-310-pytest-7.4.4.pyc b/tests/ensemble/__pycache__/test_boosting.cpython-310-pytest-7.4.4.pyc index c51f559..56cf065 100644 Binary files a/tests/ensemble/__pycache__/test_boosting.cpython-310-pytest-7.4.4.pyc and b/tests/ensemble/__pycache__/test_boosting.cpython-310-pytest-7.4.4.pyc differ diff --git a/tests/ensemble/test_boosting.py b/tests/ensemble/test_boosting.py index 52b0eb3..3331fe8 100644 --- a/tests/ensemble/test_boosting.py +++ b/tests/ensemble/test_boosting.py @@ -1,8 +1,14 @@ -from src.ensemble import GradientBoostingRegressor -from tests.base.config import dataset_regression, check_fit_predict # noqa: F401 +from src.ensemble import GradientBoostingRegressor, GradientBoostingClassifier +from tests.base.config import dataset_regression, dataset_classification, check_fit_predict # noqa: F401 def test_gradient_boosting_regression(dataset_regression): x, y = dataset_regression - model = GradientBoostingRegressor() + model = GradientBoostingRegressor(learning_rate=5e-2) + check_fit_predict(model, x, y) + + +def test_gradient_boosting_classification(dataset_classification): + x, y = dataset_classification + model = GradientBoostingClassifier() check_fit_predict(model, x, y)