Skip to content

Commit

Permalink
Add GradientBoostingClassifier, add test cases for boosting
Browse files Browse the repository at this point in the history
  • Loading branch information
chuvalniy committed Jan 17, 2024
1 parent 2002a09 commit 98a58ee
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 13 deletions.
Binary file modified src/ensemble/__pycache__/boosting.cpython-310.pyc
Binary file not shown.
87 changes: 77 additions & 10 deletions src/ensemble/boosting.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from abc import abstractmethod

import numpy as np

from src.base import Model
Expand All @@ -8,7 +9,7 @@
class _GradientBoosting(Model):
def __init__(
self,
learning_rate: float = 1e-3,
learning_rate: float = 1e-2,
n_steps: int = 100,
max_depth: int = 3,
min_samples_split: int = 2,
Expand Down Expand Up @@ -77,19 +78,21 @@ def _calculate_loss_gradient(self, y: np.ndarray, predictions: np.ndarray) -> np
"""
pass

@abstractmethod
def _calculate_predictions(self, x: np.ndarray) -> np.ndarray:
"""
Calculate predictions for input data.
:param x: Input data.
:return: Predictions.
"""

def predict(self, x: np.ndarray) -> np.ndarray:
"""
Predict target feature using pretrained boosting trees.
:param x: Test data.
:return: Test predictions.
"""
n_samples, _ = x.shape

predictions = np.ones(shape=(n_samples,)) * self.constant_prediction

for tree in self.trees:
predictions = predictions + self.learning_rate * tree.predict(x)

predictions = self._calculate_predictions(x)
return predictions


Expand All @@ -108,11 +111,75 @@ def _calculate_initial_prediction(self, y: np.ndarray) -> np.ndarray:

def _calculate_loss_gradient(self, y: np.ndarray, predictions: np.ndarray) -> np.ndarray:
"""
Find mean value for the targets.
Calculate gradient of mean-squared error loss.
:param predictions: Target predictions.
:param y: Targets.
:return: Gradient of loss function with respect to predictions.
"""
return y - predictions

def _calculate_predictions(self, x: np.ndarray) -> np.ndarray:
n_samples, _ = x.shape

predictions = np.ones(n_samples) * self.constant_prediction
for tree in self.trees:
predictions = predictions + self.learning_rate * tree.predict(x)

return predictions


class GradientBoostingClassifier(_GradientBoosting):
"""
Gradient Boosting for the classification.
Uses cross-entropy as loss.
"""

def _calculate_initial_prediction(self, y: np.ndarray) -> np.ndarray:
"""
Find natural logarithm of odds.
:param y: Targets.
:return: Initial predictions.
"""
return predictions - y
return np.zeros_like(y, dtype=np.float64)

def _calculate_loss_gradient(self, y: np.ndarray, predictions: np.ndarray) -> np.ndarray:
"""
Calculate cross-entropy gradient.
:param y: Targets.
:return: Gradient of loss function with respect to predictions.
"""
return y - GradientBoostingClassifier.sigmoid(predictions)

@staticmethod
def sigmoid(x: np.ndarray) -> np.ndarray:
"""
Makes input values to be in (0, 1) range.
:param x: Input array.
:return: Output array of the same shape as an input array.
"""
return 1 / (1 + np.exp(-x))

def _calculate_predictions(self, x: np.ndarray) -> np.ndarray:
"""
Calculate targets using prediction probability.
:param x: Input array.
:return: Predictions.
"""
predictions_proba = self.predict_proba(x)
predictions = np.where(predictions_proba >= 0.5, 1, 0)
return predictions

def predict_proba(self, x):
"""
Predict label using sigmoid function.
:param x: Input array.
:return: Predictions.
"""
n_samples, _ = x.shape

predictions = np.ones(n_samples) * self.constant_prediction
for tree in self.trees:
predictions = predictions + self.learning_rate * tree.predict(x)

return GradientBoostingClassifier.sigmoid(predictions)

Binary file modified tests/base/__pycache__/config.cpython-310.pyc
Binary file not shown.
1 change: 1 addition & 0 deletions tests/base/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def check_fit_predict(model, x: np.ndarray, y: np.ndarray):
# Fit the model on the mock dataset
model.fit(x, y)
preds = model.predict(x)
print(preds)

assert isinstance(preds, np.ndarray)
assert preds.shape == y.shape
Binary file not shown.
12 changes: 9 additions & 3 deletions tests/ensemble/test_boosting.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
from src.ensemble import GradientBoostingRegressor
from tests.base.config import dataset_regression, check_fit_predict # noqa: F401
from src.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
from tests.base.config import dataset_regression, dataset_classification, check_fit_predict # noqa: F401


def test_gradient_boosting_regression(dataset_regression):
x, y = dataset_regression
model = GradientBoostingRegressor()
model = GradientBoostingRegressor(learning_rate=5e-2)
check_fit_predict(model, x, y)


def test_gradient_boosting_classification(dataset_classification):
x, y = dataset_classification
model = GradientBoostingClassifier()
check_fit_predict(model, x, y)

0 comments on commit 98a58ee

Please sign in to comment.