diff --git a/diffprivlib/models/forest.py b/diffprivlib/models/forest.py index 042c61c..4b131fd 100644 --- a/diffprivlib/models/forest.py +++ b/diffprivlib/models/forest.py @@ -345,7 +345,7 @@ class DecisionTreeClassifier(skDecisionTreeClassifier, DiffprivlibMixin): skDecisionTreeClassifier, "max_depth", "random_state") def __init__(self, max_depth=5, *, epsilon=1, bounds=None, classes=None, random_state=None, accountant=None, - **unused_args): + criterion=None, **unused_args): # Todo: Remove when scikit-learn v1.0 is a min requirement try: super().__init__( # pylint: disable=unexpected-keyword-arg @@ -378,6 +378,7 @@ def __init__(self, max_depth=5, *, epsilon=1, bounds=None, classes=None, random_ self.bounds = bounds self.classes = classes self.accountant = BudgetAccountant.load_default(accountant) + del criterion self._warn_unused_args(unused_args) @@ -448,6 +449,11 @@ def fit(self, X, y, sample_weight=None, check_input=True): return self + def _fit(self, X, y, sample_weight=None, check_input=True, missing_values_in_feature_mask=None): + self.fit(X, y, sample_weight=sample_weight, check_input=check_input) + + return self + @property def n_features_(self): return self.n_features_in_ diff --git a/diffprivlib/models/logistic_regression.py b/diffprivlib/models/logistic_regression.py index 112ea9f..82294ad 100644 --- a/diffprivlib/models/logistic_regression.py +++ b/diffprivlib/models/logistic_regression.py @@ -371,7 +371,7 @@ def _logistic_regression_path(X, y, epsilon, data_norm, pos_class=None, Cs=10, f X = check_array(X, accept_sparse='csr', dtype=np.float64, accept_large_sparse=True) y = check_array(y, ensure_2d=False, dtype=None) check_consistent_length(X, y) - _, n_features = X.shape + n_samples, n_features = X.shape classes = np.unique(y) @@ -400,17 +400,20 @@ def _logistic_regression_path(X, y, epsilon, data_norm, pos_class=None, Cs=10, f if SKL_LOSS_MODULE: func = LinearModelLoss(base_loss=HalfBinomialLoss(), fit_intercept=fit_intercept).loss_gradient + sw_sum = n_samples else: func = _logistic_loss_and_grad + sw_sum = 1 coefs = [] n_iter = np.zeros(len(Cs), dtype=np.int32) for i, C in enumerate(Cs): - vector_mech = Vector(epsilon=epsilon, dimension=n_features + int(fit_intercept), alpha=1. / C, + l2_reg_strength = 1.0 / (C * sw_sum) + vector_mech = Vector(epsilon=epsilon, dimension=n_features + int(fit_intercept), alpha=l2_reg_strength, function_sensitivity=0.25, data_sensitivity=data_norm, random_state=random_state) noisy_logistic_loss = vector_mech.randomise(func) - args = (X, target, sample_weight, 1. / C) if SKL_LOSS_MODULE else (X, target, 1. / C, sample_weight) + args = (X, target, sample_weight, l2_reg_strength) if SKL_LOSS_MODULE else (X, target, l2_reg_strength, sample_weight) iprint = [-1, 50, 1, 100, 101][np.searchsorted(np.array([0, 1, 2, 3]), verbose)] output_vec, _, info = optimize.fmin_l_bfgs_b(noisy_logistic_loss, output_vec, fprime=None, diff --git a/tests/models/test_LogisticRegression.py b/tests/models/test_LogisticRegression.py index 06d4ba1..b80a208 100644 --- a/tests/models/test_LogisticRegression.py +++ b/tests/models/test_LogisticRegression.py @@ -180,7 +180,7 @@ def test_simple(self): X -= 3.0 X /= 2.5 - clf = LogisticRegression(epsilon=2, data_norm=1.0, random_state=0) + clf = LogisticRegression(epsilon=2, data_norm=1.0, random_state=1) clf.fit(X, y) self.assertIsNotNone(clf)