diff --git a/docs/.doctrees/about.doctree b/docs/.doctrees/about.doctree index fb112f9..86f57ba 100644 Binary files a/docs/.doctrees/about.doctree and b/docs/.doctrees/about.doctree differ diff --git a/docs/.doctrees/caveats.doctree b/docs/.doctrees/caveats.doctree index 2c47489..9685745 100644 Binary files a/docs/.doctrees/caveats.doctree and b/docs/.doctrees/caveats.doctree differ diff --git a/docs/.doctrees/changelog.doctree b/docs/.doctrees/changelog.doctree index d44a8a1..38f5e0c 100644 Binary files a/docs/.doctrees/changelog.doctree and b/docs/.doctrees/changelog.doctree differ diff --git a/docs/.doctrees/environment.pickle b/docs/.doctrees/environment.pickle index e097eb8..343a52e 100644 Binary files a/docs/.doctrees/environment.pickle and b/docs/.doctrees/environment.pickle differ diff --git a/docs/.doctrees/getting_started.doctree b/docs/.doctrees/getting_started.doctree index 4634f62..9968907 100644 Binary files a/docs/.doctrees/getting_started.doctree and b/docs/.doctrees/getting_started.doctree differ diff --git a/docs/.doctrees/index.doctree b/docs/.doctrees/index.doctree index 96aa24b..30c07e0 100644 Binary files a/docs/.doctrees/index.doctree and b/docs/.doctrees/index.doctree differ diff --git a/docs/.doctrees/references.doctree b/docs/.doctrees/references.doctree index 087828b..cc2f22d 100644 Binary files a/docs/.doctrees/references.doctree and b/docs/.doctrees/references.doctree differ diff --git a/docs/.doctrees/usage_guide.doctree b/docs/.doctrees/usage_guide.doctree index 8609f8b..36696c6 100644 Binary files a/docs/.doctrees/usage_guide.doctree and b/docs/.doctrees/usage_guide.doctree differ diff --git a/docs/_sources/usage_guide.rst.txt b/docs/_sources/usage_guide.rst.txt index de32656..e83e01d 100644 --- a/docs/_sources/usage_guide.rst.txt +++ b/docs/_sources/usage_guide.rst.txt @@ -448,10 +448,18 @@ You can use this function to evaluate the model by printing the output. # ------------------------- VALID AND TEST METRICS ----------------------------- print("Validation Metrics") - class_report_val, cm_val = model_xgb.return_metrics(X_valid, y_valid, optimal_threshold=True) + class_report_val, cm_val = model_xgb.return_metrics( + X_valid, + y_valid, + optimal_threshold=True, + ) print() print("Test Metrics") - class_report_test, cm_test = model_xgb.return_metrics(X_test, y_test, optimal_threshold=True) + class_report_test, cm_test = model_xgb.return_metrics( + X_test, + y_test, + optimal_threshold=True, + ) .. code-block:: bash @@ -521,22 +529,22 @@ Step 10: Calibrate the Model (if needed) import matplotlib.pyplot as plt from sklearn.calibration import calibration_curve - # Get the predicted probabilities for the validation data from the uncalibrated model + ## Get the predicted probabilities for the validation data from uncalibrated model y_prob_uncalibrated = model_xgb.predict_proba(X_test)[:, 1] - # Compute the calibration curve for the uncalibrated model + ## Compute the calibration curve for the uncalibrated model prob_true_uncalibrated, prob_pred_uncalibrated = calibration_curve( y_test, y_prob_uncalibrated, - n_bins=6, + n_bins=10, ) - # Calibrate the model + ## Calibrate the model if model_xgb.calibrate: - model_xgb.calibrateModel(X, y, score="roc_auc") + model_xgb.calibrateModel(X, y, score="roc_auc") - # Predict on the validation set - y_test_pred = model_xgb.predict_proba(X_test)[:,1] + ## Predict on the validation set + y_test_pred = model_xgb.predict_proba(X_test)[:, 1] .. code-block:: bash @@ -568,36 +576,36 @@ Step 10: Calibrate the Model (if needed) .. code-block:: python - # Get the predicted probabilities for the validation data from calibrated model + ## Get the predicted probabilities for the validation data from calibrated model y_prob_calibrated = model_xgb.predict_proba(X_test)[:, 1] - # Compute the calibration curve for the calibrated model + ## Compute the calibration curve for the calibrated model prob_true_calibrated, prob_pred_calibrated = calibration_curve( - y_test, - y_prob_calibrated, - n_bins=6, + y_test, + y_prob_calibrated, + n_bins=10, ) - # Plot the calibration curves + ## Plot the calibration curves plt.figure(figsize=(5, 5)) plt.plot( - prob_pred_uncalibrated, - prob_true_uncalibrated, - marker="o", - label="Uncalibrated XGBoost", + prob_pred_uncalibrated, + prob_true_uncalibrated, + marker="o", + label="Uncalibrated XGBoost", ) plt.plot( - prob_pred_calibrated, - prob_true_calibrated, - marker="o", - label="Calibrated XGBoost", + prob_pred_calibrated, + prob_true_calibrated, + marker="o", + label="Calibrated XGBoost", ) plt.plot( - [0, 1], - [0, 1], - linestyle="--", - label="Perfectly calibrated", + [0, 1], + [0, 1], + linestyle="--", + label="Perfectly calibrated", ) plt.xlabel("Predicted probability") plt.ylabel("True probability in each bin") @@ -605,7 +613,6 @@ Step 10: Calibrate the Model (if needed) plt.legend() plt.show() - .. raw:: html
# ------------------------- VALID AND TEST METRICS -----------------------------
print("Validation Metrics")
-class_report_val, cm_val = model_xgb.return_metrics(X_valid, y_valid, optimal_threshold=True)
+class_report_val, cm_val = model_xgb.return_metrics(
+ X_valid,
+ y_valid,
+ optimal_threshold=True,
+)
print()
print("Test Metrics")
-class_report_test, cm_test = model_xgb.return_metrics(X_test, y_test, optimal_threshold=True)
+class_report_test, cm_test = model_xgb.return_metrics(
+ X_test,
+ y_test,
+ optimal_threshold=True,
+)
Change back to CPU
@@ -624,36 +632,36 @@ Step 10: Calibrate the Model (if needed) after calibration: 0.9280033238366572
# Get the predicted probabilities for the validation data from calibrated model
+## Get the predicted probabilities for the validation data from calibrated model
y_prob_calibrated = model_xgb.predict_proba(X_test)[:, 1]
-# Compute the calibration curve for the calibrated model
+## Compute the calibration curve for the calibrated model
prob_true_calibrated, prob_pred_calibrated = calibration_curve(
-y_test,
-y_prob_calibrated,
-n_bins=6,
+ y_test,
+ y_prob_calibrated,
+ n_bins=10,
)
-# Plot the calibration curves
+## Plot the calibration curves
plt.figure(figsize=(5, 5))
plt.plot(
-prob_pred_uncalibrated,
-prob_true_uncalibrated,
-marker="o",
-label="Uncalibrated XGBoost",
+ prob_pred_uncalibrated,
+ prob_true_uncalibrated,
+ marker="o",
+ label="Uncalibrated XGBoost",
)
plt.plot(
-prob_pred_calibrated,
-prob_true_calibrated,
-marker="o",
-label="Calibrated XGBoost",
+ prob_pred_calibrated,
+ prob_true_calibrated,
+ marker="o",
+ label="Calibrated XGBoost",
)
plt.plot(
-[0, 1],
-[0, 1],
-linestyle="--",
-label="Perfectly calibrated",
+ [0, 1],
+ [0, 1],
+ linestyle="--",
+ label="Perfectly calibrated",
)
plt.xlabel("Predicted probability")
plt.ylabel("True probability in each bin")
diff --git a/source/usage_guide.rst b/source/usage_guide.rst
index de32656..e83e01d 100644
--- a/source/usage_guide.rst
+++ b/source/usage_guide.rst
@@ -448,10 +448,18 @@ You can use this function to evaluate the model by printing the output.
# ------------------------- VALID AND TEST METRICS -----------------------------
print("Validation Metrics")
- class_report_val, cm_val = model_xgb.return_metrics(X_valid, y_valid, optimal_threshold=True)
+ class_report_val, cm_val = model_xgb.return_metrics(
+ X_valid,
+ y_valid,
+ optimal_threshold=True,
+ )
print()
print("Test Metrics")
- class_report_test, cm_test = model_xgb.return_metrics(X_test, y_test, optimal_threshold=True)
+ class_report_test, cm_test = model_xgb.return_metrics(
+ X_test,
+ y_test,
+ optimal_threshold=True,
+ )
.. code-block:: bash
@@ -521,22 +529,22 @@ Step 10: Calibrate the Model (if needed)
import matplotlib.pyplot as plt
from sklearn.calibration import calibration_curve
- # Get the predicted probabilities for the validation data from the uncalibrated model
+ ## Get the predicted probabilities for the validation data from uncalibrated model
y_prob_uncalibrated = model_xgb.predict_proba(X_test)[:, 1]
- # Compute the calibration curve for the uncalibrated model
+ ## Compute the calibration curve for the uncalibrated model
prob_true_uncalibrated, prob_pred_uncalibrated = calibration_curve(
y_test,
y_prob_uncalibrated,
- n_bins=6,
+ n_bins=10,
)
- # Calibrate the model
+ ## Calibrate the model
if model_xgb.calibrate:
- model_xgb.calibrateModel(X, y, score="roc_auc")
+ model_xgb.calibrateModel(X, y, score="roc_auc")
- # Predict on the validation set
- y_test_pred = model_xgb.predict_proba(X_test)[:,1]
+ ## Predict on the validation set
+ y_test_pred = model_xgb.predict_proba(X_test)[:, 1]
.. code-block:: bash
@@ -568,36 +576,36 @@ Step 10: Calibrate the Model (if needed)
.. code-block:: python
- # Get the predicted probabilities for the validation data from calibrated model
+ ## Get the predicted probabilities for the validation data from calibrated model
y_prob_calibrated = model_xgb.predict_proba(X_test)[:, 1]
- # Compute the calibration curve for the calibrated model
+ ## Compute the calibration curve for the calibrated model
prob_true_calibrated, prob_pred_calibrated = calibration_curve(
- y_test,
- y_prob_calibrated,
- n_bins=6,
+ y_test,
+ y_prob_calibrated,
+ n_bins=10,
)
- # Plot the calibration curves
+ ## Plot the calibration curves
plt.figure(figsize=(5, 5))
plt.plot(
- prob_pred_uncalibrated,
- prob_true_uncalibrated,
- marker="o",
- label="Uncalibrated XGBoost",
+ prob_pred_uncalibrated,
+ prob_true_uncalibrated,
+ marker="o",
+ label="Uncalibrated XGBoost",
)
plt.plot(
- prob_pred_calibrated,
- prob_true_calibrated,
- marker="o",
- label="Calibrated XGBoost",
+ prob_pred_calibrated,
+ prob_true_calibrated,
+ marker="o",
+ label="Calibrated XGBoost",
)
plt.plot(
- [0, 1],
- [0, 1],
- linestyle="--",
- label="Perfectly calibrated",
+ [0, 1],
+ [0, 1],
+ linestyle="--",
+ label="Perfectly calibrated",
)
plt.xlabel("Predicted probability")
plt.ylabel("True probability in each bin")
@@ -605,7 +613,6 @@ Step 10: Calibrate the Model (if needed)
plt.legend()
plt.show()
-
.. raw:: html