diff --git a/docs/.doctrees/about.doctree b/docs/.doctrees/about.doctree
deleted file mode 100644
index 447df61..0000000
Binary files a/docs/.doctrees/about.doctree and /dev/null differ
diff --git a/docs/.doctrees/caveats.doctree b/docs/.doctrees/caveats.doctree
deleted file mode 100644
index 47b1a82..0000000
Binary files a/docs/.doctrees/caveats.doctree and /dev/null differ
diff --git a/docs/.doctrees/changelog.doctree b/docs/.doctrees/changelog.doctree
deleted file mode 100644
index 912f5a9..0000000
Binary files a/docs/.doctrees/changelog.doctree and /dev/null differ
diff --git a/docs/.doctrees/environment.pickle b/docs/.doctrees/environment.pickle
deleted file mode 100644
index 46716ba..0000000
Binary files a/docs/.doctrees/environment.pickle and /dev/null differ
diff --git a/docs/.doctrees/getting_started.doctree b/docs/.doctrees/getting_started.doctree
deleted file mode 100644
index 4fd2ec2..0000000
Binary files a/docs/.doctrees/getting_started.doctree and /dev/null differ
diff --git a/docs/.doctrees/index.doctree b/docs/.doctrees/index.doctree
deleted file mode 100644
index 3f0c7fe..0000000
Binary files a/docs/.doctrees/index.doctree and /dev/null differ
diff --git a/docs/.doctrees/main.doctree b/docs/.doctrees/main.doctree
deleted file mode 100644
index ca8d9b2..0000000
Binary files a/docs/.doctrees/main.doctree and /dev/null differ
diff --git a/docs/.doctrees/references copy.doctree b/docs/.doctrees/references copy.doctree
deleted file mode 100644
index 9e44515..0000000
Binary files a/docs/.doctrees/references copy.doctree and /dev/null differ
diff --git a/docs/.doctrees/references.doctree b/docs/.doctrees/references.doctree
deleted file mode 100644
index 1aefa6b..0000000
Binary files a/docs/.doctrees/references.doctree and /dev/null differ
diff --git a/docs/.doctrees/usage_guide.doctree b/docs/.doctrees/usage_guide.doctree
deleted file mode 100644
index 60df0fd..0000000
Binary files a/docs/.doctrees/usage_guide.doctree and /dev/null differ
diff --git a/docs/_sources/usage_guide.rst.txt b/docs/_sources/usage_guide.rst.txt
index 274de22..544710d 100644
--- a/docs/_sources/usage_guide.rst.txt
+++ b/docs/_sources/usage_guide.rst.txt
@@ -531,10 +531,18 @@ You can use this function to evaluate the model by printing the output.
# ------------------------- VALID AND TEST METRICS -----------------------------
print("Validation Metrics")
- class_report_val, cm_val = model_xgb.return_metrics(X_valid, y_valid, optimal_threshold=True)
+ class_report_val, cm_val = model_xgb.return_metrics(
+ X_valid,
+ y_valid,
+ optimal_threshold=True,
+ )
print()
print("Test Metrics")
- class_report_test, cm_test = model_xgb.return_metrics(X_test, y_test, optimal_threshold=True)
+ class_report_test, cm_test = model_xgb.return_metrics(
+ X_test,
+ y_test,
+ optimal_threshold=True,
+ )
.. code-block:: bash
@@ -604,22 +612,22 @@ Step 10: Calibrate the Model (if needed)
import matplotlib.pyplot as plt
from sklearn.calibration import calibration_curve
- # Get the predicted probabilities for the validation data from the uncalibrated model
+ ## Get the predicted probabilities for the validation data from uncalibrated model
y_prob_uncalibrated = model_xgb.predict_proba(X_test)[:, 1]
- # Compute the calibration curve for the uncalibrated model
+ ## Compute the calibration curve for the uncalibrated model
prob_true_uncalibrated, prob_pred_uncalibrated = calibration_curve(
y_test,
y_prob_uncalibrated,
- n_bins=6,
+ n_bins=10,
)
- # Calibrate the model
+ ## Calibrate the model
if model_xgb.calibrate:
- model_xgb.calibrateModel(X, y, score="roc_auc")
+ model_xgb.calibrateModel(X, y, score="roc_auc")
- # Predict on the validation set
- y_test_pred = model_xgb.predict_proba(X_test)[:,1]
+ ## Predict on the validation set
+ y_test_pred = model_xgb.predict_proba(X_test)[:, 1]
.. code-block:: bash
@@ -651,36 +659,36 @@ Step 10: Calibrate the Model (if needed)
.. code-block:: python
- # Get the predicted probabilities for the validation data from calibrated model
+ ## Get the predicted probabilities for the validation data from calibrated model
y_prob_calibrated = model_xgb.predict_proba(X_test)[:, 1]
- # Compute the calibration curve for the calibrated model
+ ## Compute the calibration curve for the calibrated model
prob_true_calibrated, prob_pred_calibrated = calibration_curve(
- y_test,
- y_prob_calibrated,
- n_bins=6,
+ y_test,
+ y_prob_calibrated,
+ n_bins=10,
)
- # Plot the calibration curves
+ ## Plot the calibration curves
plt.figure(figsize=(5, 5))
plt.plot(
- prob_pred_uncalibrated,
- prob_true_uncalibrated,
- marker="o",
- label="Uncalibrated XGBoost",
+ prob_pred_uncalibrated,
+ prob_true_uncalibrated,
+ marker="o",
+ label="Uncalibrated XGBoost",
)
plt.plot(
- prob_pred_calibrated,
- prob_true_calibrated,
- marker="o",
- label="Calibrated XGBoost",
+ prob_pred_calibrated,
+ prob_true_calibrated,
+ marker="o",
+ label="Calibrated XGBoost",
)
plt.plot(
- [0, 1],
- [0, 1],
- linestyle="--",
- label="Perfectly calibrated",
+ [0, 1],
+ [0, 1],
+ linestyle="--",
+ label="Perfectly calibrated",
)
plt.xlabel("Predicted probability")
plt.ylabel("True probability in each bin")
@@ -688,7 +696,6 @@ Step 10: Calibrate the Model (if needed)
plt.legend()
plt.show()
-
.. raw:: html
@@ -762,6 +769,10 @@ parameters are specified:
.. code-block:: python
+ import pandas as pd
+ import numpy as np
+ from sklearn.datasets import make_classification
+
X, y = make_classification(
n_samples=1000,
n_features=20,
@@ -786,6 +797,8 @@ Below, you will see that the dataset we have generated is severely imbalanced wi
.. code-block:: python
+ import matplotlib.pyplot as plt
+
## Create a bar plot
value_counts = pd.Series(y).value_counts()
ax = value_counts.plot(
@@ -838,6 +851,8 @@ Below, we will use an XGBoost classifier with the following hyperparameters:
.. code-block:: python
+ from xgboost import XGBClassifier
+
xgb_name = "xgb"
xgb = XGBClassifier(
random_state=222,
@@ -937,14 +952,13 @@ Initalize and Configure The Model
.. code-block:: python
+ from model_tuner import Model
+
xgb_smote = Model(
name=f"Make_Classification_{model_type}",
estimator_name=estimator_name,
calibrate=calibrate,
- pipeline_steps=[
- ("Imputer", SimpleImputer()),
- ("StandardScalar", StandardScaler()),
- ],
+ model_type="classification",
estimator=clc,
kfold=kfold,
stratify_y=True,
@@ -977,44 +991,32 @@ Perform Grid Search Parameter Tuning and Retrieve Split Data
.. code-block:: bash
Pipeline Steps:
- ========================
- ┌────────────────────────────────────────────┐
- │ Step 1: preprocess_imputer_Imputer │
- │ SimpleImputer │
- └────────────────────────────────────────────┘
- │
- ▼
- ┌────────────────────────────────────────────┐
- │ Step 2: preprocess_scaler_StandardScalar │
- │ StandardScaler │
- └────────────────────────────────────────────┘
- │
- ▼
- ┌────────────────────────────────────────────┐
- │ Step 3: resampler │
- │ SMOTE │
- └────────────────────────────────────────────┘
- │
- ▼
- ┌────────────────────────────────────────────┐
- │ Step 4: xgb │
- │ XGBClassifier │
- └────────────────────────────────────────────┘
+
+ ┌─────────────────────┐
+ │ Step 1: resampler │
+ │ SMOTE │
+ └─────────────────────┘
+ │
+ ▼
+ ┌─────────────────────┐
+ │ Step 2: xgb │
+ │ XGBClassifier │
+ └─────────────────────┘
Distribution of y values after resampling: target
0 540
1 540
Name: count, dtype: int64
- 100%|██████████| 5/5 [00:47<00:00, 9.41s/it]
+ 100%|██████████| 5/5 [00:34<00:00, 6.87s/it]
Fitting model with best params and tuning for best threshold ...
- 100%|██████████| 2/2 [00:00<00:00, 4.01it/s]Best score/param set found on validation set:
+ 100%|██████████| 2/2 [00:00<00:00, 4.37it/s]Best score/param set found on validation set:
{'params': {'xgb__early_stopping_rounds': 100,
'xgb__eval_metric': 'logloss',
'xgb__learning_rate': 0.0001,
- 'xgb__max_depth': 3,
+ 'xgb__max_depth': 10,
'xgb__n_estimators': 999},
- 'score': 0.9994444444444446}
+ 'score': 0.9990277777777777}
Best roc_auc: 0.999
SMOTE: Distribution of y values after resampling
@@ -1037,52 +1039,34 @@ Fit The Model
Return Metrics (Optional)
~~~~~~~~~~~~~~~~~~~~~~~~~~~
-.. code-block:: python
-
- # ------------------------- VALID AND TEST METRICS -----------------------------
-
- print("Validation Metrics")
- class_report_val, cm_val = xgb_smote.return_metrics(
- X_valid,
- y_valid,
- optimal_threshold=True,
- )
- print()
- print("Test Metrics")
- class_report_test, cm_test = xgb_smote.return_metrics(
- X_test,
- y_test,
- optimal_threshold=True,
- )
-
.. code-block:: bash
Validation Metrics
Confusion matrix on set provided:
--------------------------------------------------------------------------------
Predicted:
- Pos Neg
+ Pos Neg
--------------------------------------------------------------------------------
Actual: Pos 20 (tp) 0 (fn)
- Neg 3 (fp) 177 (tn)
+ Neg 6 (fp) 174 (tn)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
- {'AUC ROC': 0.9904166666666667,
- 'Average Precision': 0.8520172219085262,
- 'Brier Score': 0.2096258193295803,
- 'Precision/PPV': 0.8695652173913043,
+ {'AUC ROC': 0.9955555555555555,
+ 'Average Precision': 0.9378696741854636,
+ 'Brier Score': 0.20835571676988004,
+ 'Precision/PPV': 0.7692307692307693,
'Sensitivity': 1.0,
- 'Specificity': 0.9833333333333333}
+ 'Specificity': 0.9666666666666667}
--------------------------------------------------------------------------------
precision recall f1-score support
- 0 1.00 0.98 0.99 180
- 1 0.87 1.00 0.93 20
+ 0 1.00 0.97 0.98 180
+ 1 0.77 1.00 0.87 20
- accuracy 0.98 200
- macro avg 0.93 0.99 0.96 200
- weighted avg 0.99 0.98 0.99 200
+ accuracy 0.97 200
+ macro avg 0.88 0.98 0.93 200
+ weighted avg 0.98 0.97 0.97 200
--------------------------------------------------------------------------------
@@ -1090,31 +1074,30 @@ Return Metrics (Optional)
Confusion matrix on set provided:
--------------------------------------------------------------------------------
Predicted:
- Pos Neg
+ Pos Neg
--------------------------------------------------------------------------------
Actual: Pos 19 (tp) 1 (fn)
- Neg 2 (fp) 178 (tn)
+ Neg 3 (fp) 177 (tn)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
- {'AUC ROC': 0.9951388888888888,
- 'Average Precision': 0.9722222222222222,
- 'Brier Score': 0.20989021789332263,
- 'Precision/PPV': 0.9047619047619048,
+ {'AUC ROC': 0.9945833333333333,
+ 'Average Precision': 0.9334649122807017,
+ 'Brier Score': 0.20820269480995568,
+ 'Precision/PPV': 0.8636363636363636,
'Sensitivity': 0.95,
- 'Specificity': 0.9888888888888889}
+ 'Specificity': 0.9833333333333333}
--------------------------------------------------------------------------------
precision recall f1-score support
- 0 0.99 0.99 0.99 180
- 1 0.90 0.95 0.93 20
+ 0 0.99 0.98 0.99 180
+ 1 0.86 0.95 0.90 20
accuracy 0.98 200
- macro avg 0.95 0.97 0.96 200
- weighted avg 0.99 0.98 0.99 200
+ macro avg 0.93 0.97 0.95 200
+ weighted avg 0.98 0.98 0.98 200
--------------------------------------------------------------------------------
-
.. _Regression:
Regression
@@ -1132,7 +1115,7 @@ Step 1: Import Necessary Libraries
import pandas as pd
import numpy as np
- ifrom xgboost import XGBRegressor
+ from xgboost import XGBRegressor
from sklearn.impute import SimpleImputer
from sklearn.datasets import fetch_california_housing
from model_tuner import Model
@@ -1219,7 +1202,7 @@ when using ``XGBRegressor``.
calibrate=calibrate,
estimator=clc,
kfold=kfold,
- stratify_y=None,
+ stratify_y=False,
grid=tuned_parameters,
randomized_grid=rand_grid,
boost_early=early_stop,
@@ -1243,13 +1226,13 @@ Step 6: Perform Grid Search Parameter Tuning and Retrieve Split Data
.. code-block:: bash
Pipeline Steps:
- ========================
+
┌────────────────┐
│ Step 1: xgb │
│ XGBRegressor │
└────────────────┘
- 100%|██████████| 9/9 [00:05<00:00, 1.60it/s]Best score/param set found on validation set:
+ 100%|██████████| 9/9 [00:22<00:00, 2.45s/it]Best score/param set found on validation set:
{'params': {'xgb__colsample_bytree': 0.8,
'xgb__early_stopping_rounds': 10,
'xgb__eval_metric': 'logloss',
@@ -1259,7 +1242,7 @@ Step 6: Perform Grid Search Parameter Tuning and Retrieve Split Data
'xgb__subsample': 0.8,
'xgb__tree_method': 'hist'},
'score': 0.7651490279157868}
- Best r2: 0.765
+ Best r2: 0.765
Step 7: Fit the Model
@@ -1267,7 +1250,11 @@ Step 7: Fit the Model
.. code-block:: python
- model_xgb.fit(X_train, y_train, validation_data=[X_valid, y_valid])
+ model_xgb.fit(
+ X_train,
+ y_train,
+ validation_data=[X_valid, y_valid],
+ )
Step 8: Return Metrics (Optional)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -1398,7 +1385,7 @@ The ``bootstrapper.py`` module provides utility functions for input type checkin
Bootstrap Metrics Example
-----------------------------
-Continuing from the model output object (``model_xgb``) from the :ref:`regression example ` above, we leverage the ``return_bootstrap_metrics`` method from ``model_tuner_utils.py`` to print bootstrap performance metrics (:math:`R^2` and `explained_variance`) at 95% confidence levels as shown below:
+Continuing from the model output object (``model_xgb``) from the :ref:`regression example ` above, we leverage the ``return_bootstrap_metrics`` method from ``model_tuner_utils.py`` to print bootstrap performance metrics (:math:`R^2` and :math:`\text{explained variance}`) at 95% confidence levels as shown below:
.. code-block:: python
diff --git a/docs/searchindex.js b/docs/searchindex.js
index 8c4d24f..b8273b5 100644
--- a/docs/searchindex.js
+++ b/docs/searchindex.js
@@ -1 +1,5 @@
-Search.setIndex({"alltitles": {"1. Accurate Calculation of Scaling Parameters": [[1, "accurate-calculation-of-scaling-parameters"]], "2. Consistency in Data Transformation": [[1, "consistency-in-data-transformation"]], "3. Prevention of Distortion in Scaling": [[1, "prevention-of-distortion-in-scaling"]], "AIDS Clinical Trials Group Study": [[6, "aids-clinical-trials-group-study"]], "About Model Tuner": [[4, null]], "Acknowledgements": [[0, "acknowledgements"]], "Addressing Class Imbalance in Machine Learning": [[6, "addressing-class-imbalance-in-machine-learning"]], "Bias from Class Distribution": [[1, "bias-from-class-distribution"]], "Binary Classification": [[6, "binary-classification"]], "Binary Classification Examples": [[6, "binary-classification-examples"]], "Bootstrap Metrics": [[6, "bootstrap-metrics"]], "Bootstrap Metrics Example": [[6, "bootstrap-metrics-example"]], "Brier Score": [[1, "brier-score"]], "Calibration Curve": [[1, "calibration-curve"]], "California Housing with XGBoost": [[6, "california-housing-with-xgboost"]], "Caveats": [[4, null]], "Caveats in Imbalanced Learning": [[1, "caveats-in-imbalanced-learning"]], "Changelog": [[2, null]], "Citing Model Tuner": [[0, "citing-model-tuner"]], "Classification Report (Optional)": [[6, "classification-report-optional"]], "Column Stratification with Cross-Validation": [[1, "column-stratification-with-cross-validation"]], "Cross-Validation and Stratification": [[1, "cross-validation-and-stratification"]], "Define Hyperparameters for XGBoost": [[6, "define-hyperparameters-for-xgboost"]], "Define The Model object": [[6, "define-the-model-object"]], "Dependent Variable": [[1, "dependent-variable"]], "Effects on Model Training": [[1, "effects-on-model-training"]], "Example of Synthetic Sample Creation": [[1, "example-of-synthetic-sample-creation"]], "Example: Calibration in Logistic Regression": [[1, "example-calibration-in-logistic-regression"]], "Fit The Model": [[6, "fit-the-model"]], "Generating an Imbalanced Dataset": [[6, "generating-an-imbalanced-dataset"]], "Getting Started": [[4, null]], "GitHub Repository": [[0, null]], "Goal of Calibration": [[1, "goal-of-calibration"]], "Helper Functions": [[6, "helper-functions"]], "Helper Methods for Pipeline Extraction": [[6, "helper-methods-for-pipeline-extraction"]], "Imbalanced Learning": [[6, "imbalanced-learning"]], "Impact of Resampling Techniques": [[1, "impact-of-resampling-techniques"]], "Imputation Before Scaling": [[1, "imputation-before-scaling"]], "Initalize and Configure The Model": [[6, "initalize-and-configure-the-model"]], "Input Parameters": [[6, "input-parameters"]], "Installation": [[3, "installation"]], "Isotonic Regression": [[1, "isotonic-regression"]], "Key Methods and Functionalities": [[6, "key-methods-and-functionalities"]], "Limitations of Accuracy": [[1, "limitations-of-accuracy"]], "Mitigating the Caveats": [[1, "mitigating-the-caveats"]], "Model Calibration": [[1, "model-calibration"]], "Model Tuner Documentation": [[4, null]], "Perform Grid Search Parameter Tuning and Retrieve Split Data": [[6, "perform-grid-search-parameter-tuning-and-retrieve-split-data"]], "Pipeline Management": [[6, "pipeline-management"]], "Platt Scaling": [[1, "platt-scaling"]], "Prerequisites": [[3, "prerequisites"]], "Purpose of Using These Techniques": [[6, "purpose-of-using-these-techniques"]], "References": [[5, null]], "Regression": [[6, "regression"]], "Regression Example": [[6, "regression-example"]], "Return Metrics (Optional)": [[6, "return-metrics-optional"]], "SMOTE: A Mathematical Illustration": [[1, "smote-a-mathematical-illustration"]], "SMOTE: Distribution of y values after resampling": [[6, "smote-distribution-of-y-values-after-resampling"]], "Solution": [[1, "solution"]], "Specifying Pipeline Steps": [[6, "specifying-pipeline-steps"]], "Step 10: Calibrate the Model (if needed)": [[6, "step-10-calibrate-the-model-if-needed"]], "Step 1: Import Necessary Libraries": [[6, "step-1-import-necessary-libraries"], [6, "id2"]], "Step 2: Load the Dataset": [[6, "step-2-load-the-dataset"]], "Step 2: Load the dataset, define X, y": [[6, "step-2-load-the-dataset-define-x-y"]], "Step 3: Check for zero-variance columns and drop accordingly": [[6, "step-3-check-for-zero-variance-columns-and-drop-accordingly"]], "Step 3: Create an Instance of the XGBRegressor": [[6, "step-3-create-an-instance-of-the-xgbregressor"]], "Step 4: Create an Instance of the XGBClassifier": [[6, "step-4-create-an-instance-of-the-xgbclassifier"]], "Step 4: Define Hyperparameters for XGBoost": [[6, "step-4-define-hyperparameters-for-xgboost"]], "Step 5: Define Hyperparameters for XGBoost": [[6, "step-5-define-hyperparameters-for-xgboost"]], "Step 5: Initialize and Configure the Model": [[6, "step-5-initialize-and-configure-the-model"]], "Step 6: Initialize and Configure the Model": [[6, "step-6-initialize-and-configure-the-model"]], "Step 6: Perform Grid Search Parameter Tuning and Retrieve Split Data": [[6, "step-6-perform-grid-search-parameter-tuning-and-retrieve-split-data"]], "Step 7: Fit the Model": [[6, "step-7-fit-the-model"]], "Step 7: Perform Grid Search Parameter Tuning": [[6, "step-7-perform-grid-search-parameter-tuning"]], "Step 8: Fit the Model": [[6, "step-8-fit-the-model"]], "Step 8: Return Metrics (Optional)": [[6, "step-8-return-metrics-optional"]], "Step 9: Return Metrics (Optional)": [[6, "step-9-return-metrics-optional"]], "Summary": [[1, "summary"], [6, "summary"]], "Synthetic Minority Oversampling Technique (SMOTE)": [[6, "synthetic-minority-oversampling-technique-smote"]], "Target Variable Shape and Its Effects": [[1, "target-variable-shape-and-its-effects"]], "Techniques to Address Class Imbalance": [[6, "techniques-to-address-class-imbalance"]], "Threshold-Dependent Predictions": [[1, "threshold-dependent-predictions"]], "Usage Guide": [[4, null]], "Version 0.0.010a": [[2, "version-0-0-010a"]], "Version 0.0.011a": [[2, "version-0-0-011a"]], "Version 0.0.012a": [[2, "version-0-0-012a"]], "Version 0.0.013a": [[2, "version-0-0-013a"]], "Version 0.0.014a": [[2, "version-0-0-014a"]], "Version 0.0.02a": [[2, "version-0-0-02a"]], "Version 0.0.05a": [[2, "version-0-0-05a"]], "Version 0.0.06a": [[2, "version-0-0-06a"]], "Version 0.0.07a": [[2, "version-0-0-07a"]], "Version 0.0.08a": [[2, "version-0-0-08a"]], "Version 0.0.09a": [[2, "version-0-0-09a"]], "Version 0.0.15a": [[2, "version-0-0-15a"]], "Version 0.0.16a": [[2, "version-0-0-16a"]], "Welcome to Model Tuner\u2019s Documentation!": [[3, null]], "What Does Model Tuner Offer?": [[3, "what-does-model-tuner-offer"]], "Zero Variance Columns": [[1, null]], "iPython Notebooks": [[6, null]]}, "docnames": ["about", "caveats", "changelog", "getting_started", "index", "references", "usage_guide"], "envversion": {"sphinx": 62, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1}, "filenames": ["about.rst", "caveats.rst", "changelog.rst", "getting_started.rst", "index.rst", "references.rst", "usage_guide.rst"], "indexentries": {"built-in function": [[6, "check_input_type", false], [6, "evaluate_bootstrap_metrics", false], [6, "get_feature_selection_pipeline", false], [6, "get_preprocessing_and_feature_selection_pipeline", false], [6, "get_preprocessing_pipeline", false], [6, "return_bootstrap_metrics", false], [6, "sampling_method", false]], "check_input_type()": [[6, "check_input_type", false]], "evaluate_bootstrap_metrics()": [[6, "evaluate_bootstrap_metrics", false]], "get_feature_selection_pipeline()": [[6, "get_feature_selection_pipeline", false]], "get_preprocessing_and_feature_selection_pipeline()": [[6, "get_preprocessing_and_feature_selection_pipeline", false]], "get_preprocessing_pipeline()": [[6, "get_preprocessing_pipeline", false]], "model (built-in class)": [[6, "Model", false]], "return_bootstrap_metrics()": [[6, "return_bootstrap_metrics", false]], "sampling_method()": [[6, "sampling_method", false]]}, "objects": {"": [[6, 0, 1, "", "Model"], [6, 1, 1, "", "check_input_type"], [6, 1, 1, "", "evaluate_bootstrap_metrics"], [6, 1, 1, "", "get_feature_selection_pipeline"], [6, 1, 1, "", "get_preprocessing_and_feature_selection_pipeline"], [6, 1, 1, "", "get_preprocessing_pipeline"], [6, 1, 1, "", "return_bootstrap_metrics"], [6, 1, 1, "", "sampling_method"]]}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "function", "Python function"]}, "objtypes": {"0": "py:class", "1": "py:function"}, "terms": {"": [1, 2, 4, 6], "0": [0, 1, 3, 4, 6], "00": 6, "000": 6, "0001": 6, "01": 6, "010a": 4, "011a": 4, "012a": 4, "013a": 4, "014a": 4, "017a": 3, "01it": 6, "02a": 4, "05": 6, "05a": 4, "05it": 6, "06a": 4, "07a": 4, "08a": 4, "09a": 4, "1": [2, 3, 4], "10": [0, 3, 4, 5], "100": 6, "1000": 6, "104": 6, "11": 3, "11a": 2, "12": 3, "12727322": 0, "14": 3, "15a": 4, "16628708993634742": 6, "16713189436073958": 6, "16a": [0, 4], "175": 5, "177": 6, "178": 6, "180": 6, "19": [3, 6], "1998": 5, "1d": 1, "1e": 6, "2": [3, 4], "20": 6, "200": 6, "2024": 0, "2096258193295803": 6, "20989021789332263": 6, "21": [3, 6], "222": 6, "23": 3, "24": 3, "24432": 5, "245": 6, "246": 6, "26": 3, "26315186452865597": 6, "2672762813568116": 6, "28411432705731066": 6, "3": [3, 4], "30": 6, "300": 6, "3066172248224347": 6, "315": 6, "324": 6, "34": 6, "35": 6, "358": 6, "36": 6, "3743548199982513": 6, "3830825326824073": 6, "4": [1, 3, 4], "41": 6, "42": 6, "428": 6, "47": 6, "5": [1, 3, 4], "500": [2, 6], "50it": 6, "5281": 0, "533023758436067": 6, "540": 6, "5459770114942529": 6, "5491329479768786": 6, "55": 6, "5537302816556403": 6, "58": 1, "6": [3, 4], "60it": 6, "66": 3, "67": 6, "68": 6, "69": 6, "7": [3, 4], "70": 6, "71": 6, "75": 6, "7561728395061729": 6, "7592592592592593": 6, "76": 6, "7647433075624044": 6, "7647451659057567": 6, "765": 6, "7651490279157868": 6, "77": 6, "770853": 6, "777898": 6, "78": 6, "781523": 6, "788341": 6, "7888925135381788": 6, "7888942913974833": 6, "79": 6, "792193": 6, "798785": 6, "7992275185850191": 6, "8": [3, 4], "80": 6, "8023014087345259": 6, "81": 6, "82": 6, "83": 6, "84": 6, "85": 6, "8520172219085262": 6, "86": 6, "8695652173913043": 6, "87": 6, "88": 6, "89": 6, "890": 6, "9": [1, 4], "90": 6, "900": 6, "9047619047619048": 6, "91": 6, "9134615384615384": 6, "92": 6, "928": 6, "9280033238366572": 6, "93": 6, "934576804368471": 6, "94": 6, "95": 6, "96": 6, "97": 6, "9722222222222222": 6, "98": 6, "9833333333333333": 6, "9888888888888889": 6, "99": 6, "9904166666666667": 6, "9951388888888888": 6, "999": [1, 6], "9994444444444446": 6, "A": [4, 6], "AND": 6, "By": [1, 6], "For": [1, 3, 6], "If": [1, 6], "In": [1, 2, 6], "It": [1, 3, 6], "Its": 4, "No": 6, "Not": 6, "On": 1, "One": [1, 6], "The": [1, 3, 4], "There": 2, "These": [1, 4], "To": [1, 6], "With": 1, "_": 1, "__colsample_bytre": 6, "__early_stopping_round": 6, "__eval_metr": 6, "__init__": 6, "__learning_r": 6, "__max_depth": 6, "__n_estim": 6, "__param_nam": 6, "__subsampl": 6, "__tree_method": 6, "__verbos": 6, "_confusion_matrix_print": 6, "_i": 1, "_j": 1, "_k": 1, "abil": 6, "about": 1, "abov": 6, "abram": 5, "absolut": 6, "access": [0, 6], "accompani": 6, "accordingli": 4, "account": 1, "accur": 4, "accuraci": [4, 6], "achiev": [1, 2], "acknowledg": 4, "across": [1, 2, 3, 6], "activ": 6, "actual": [1, 6], "ad": [1, 2, 6], "adasyn": [2, 3, 6], "add": 6, "addit": [1, 6], "addition": [1, 6], "address": [1, 4], "adequ": 1, "adjust": 1, "advanc": 6, "aforement": 1, "after": [1, 4], "aid": [4, 5], "aids_clinical_": 6, "aids_clinical_trials_group_study_175": 6, "aim": 6, "alex": 0, "algorithm": [1, 6], "align": 1, "all": [1, 2, 3, 6], "alloc": 6, "allow": [2, 3, 6], "along": 1, "also": [1, 6], "altern": 1, "alwai": 2, "amplifi": 1, "an": 4, "analysi": 1, "angel": 6, "ani": 1, "anoth": [1, 6], "anova": 1, "apach": 2, "appli": [1, 3, 6], "applic": [1, 6], "approach": 1, "appropri": 6, "approx": 1, "ar": [0, 1, 2, 3, 6], "arrai": [1, 6], "arthur": [0, 2], "artifici": 1, "ascii": 6, "assert": 2, "assess": [1, 3, 6], "assign": [2, 6], "assum": 1, "assumpt": 1, "attempt": 1, "attributeerror": 6, "auc": 6, "author": 0, "autokera": 2, "autokerasclassifi": 2, "automat": [1, 3, 6], "avail": [1, 2, 6], "averag": 6, "average_precis": 6, "avg": 6, "avoid": [1, 2, 6], "ax": 6, "axi": 2, "b": 1, "back": 6, "balanc": [1, 2, 3, 6], "bar": [1, 6], "base": [1, 3, 6], "bayesian": 6, "bayessearchcv": 6, "becaus": [1, 2], "becom": 1, "been": [1, 2, 6], "befor": [2, 3, 4, 6], "begin": 1, "behavior": 1, "being": [2, 6], "below": [1, 3, 6], "best": 6, "best_param": 2, "best_params_per_scor": 6, "beta": 6, "better": [1, 6], "between": [1, 3, 6], "beyond": 6, "bia": [4, 6], "bias": [1, 6], "bin": [1, 6], "binari": 4, "block": 6, "bool": 6, "boost": [2, 6], "boost_earli": 6, "bootstrap": [3, 4], "bootstrapp": [2, 6], "both": [1, 2, 6], "brier": [4, 6], "bug": 2, "bui": 0, "build": 6, "c": 1, "c5g896": 5, "c_": 1, "calcul": [4, 6], "calibr": [2, 3, 4], "calibrate_report": 6, "calibratemodel": 6, "calibration_curv": 6, "calibration_method": 6, "california": 4, "call": 6, "can": [0, 1, 3, 6], "cannot": 1, "captur": [1, 6], "care": [1, 6], "carefulli": 1, "case": [1, 2, 6], "catboost": [2, 3], "categor": 6, "categori": 6, "caus": 1, "cdot": 1, "center": 6, "challeng": [1, 6], "chang": [1, 2, 6], "changelog": 4, "char": 2, "check": [1, 4], "check_input_typ": [4, 6], "chunk": 2, "ci": 6, "cite": 4, "clariti": 6, "class": [2, 3, 4], "class_label": 6, "class_proport": 6, "class_report_test": 6, "class_report_v": 6, "classif": [1, 3, 4], "classifi": [1, 6], "classification_report": 6, "clc": 6, "clean": 2, "click": 0, "clinic": [0, 4, 5], "close": 1, "cluster": 1, "cm_test": 6, "cm_val": 6, "code": [1, 2, 6], "codebas": 0, "col": 6, "colab": 6, "color": 6, "column": [2, 4], "combin": [1, 6], "come": 1, "command": 6, "comment": 2, "common": 1, "commonli": 6, "compar": 6, "compat": 3, "complet": [1, 2], "complex": 6, "comprehens": 6, "comput": [1, 6], "concat": 2, "condit": 1, "conduct": 3, "conf_mat_class_kfold": 6, "conf_matrix": 6, "confid": 6, "configur": 4, "conflict": 1, "confus": 6, "connect": 1, "consid": [1, 2], "consist": 4, "constant": 1, "constraint": [1, 2], "construct": 1, "contain": [2, 6], "context": [1, 6], "continu": 6, "contrast": 1, "contribut": [0, 1], "contributor": 0, "convent": 6, "convers": 1, "convert": [1, 6], "correct": [1, 2], "correctli": 1, "count": [2, 6], "cpu": 6, "creat": [1, 4], "creation": [3, 4], "critic": [1, 6], "cross": [3, 4, 6], "crucial": [1, 6], "ctsi": 0, "current": [1, 3], "curs": 1, "curv": [4, 6], "custom": [2, 3, 6], "custom_scor": 6, "d": [1, 5], "d_1": 1, "d_2": 1, "d_j": 1, "d_k": 1, "data": [2, 3, 4], "dataconversionwarn": 1, "datafram": [1, 6], "dataset": [1, 3, 4], "decis": [1, 6], "decreas": 1, "def": 6, "default": [1, 6], "defin": [1, 4], "degrad": 1, "delta": 1, "demonstr": 6, "denot": 1, "depend": [2, 3, 4, 6], "deploi": 6, "deprec": 2, "depth": 6, "design": [1, 3, 6], "despit": 1, "detail": 6, "detect": 6, "determin": 1, "dev": 2, "develop": 3, "deviat": 1, "diagnosi": [1, 6], "dict": 6, "dictionari": 6, "differ": [1, 2, 3], "dimens": 1, "dimension": 1, "directli": 3, "discrep": 1, "diseas": 6, "displai": 6, "disrupt": 1, "distinct": 6, "distinguish": 6, "distort": 4, "distribut": [3, 4], "divid": 1, "divis": 1, "do": [2, 6], "document": 6, "doe": [1, 4], "doi": [0, 5], "domin": [1, 6], "dot": 1, "dr": 0, "draw": 6, "drawn": 1, "drop": [1, 4], "dtype": 6, "due": 1, "duplic": 6, "dure": [1, 2, 6], "e": [1, 6], "each": [1, 6], "earli": [2, 3, 6], "early_stop": 6, "eas": 6, "easier": 6, "easili": 6, "effect": [3, 4, 6], "either": [2, 6], "el": 5, "elimin": 3, "empir": 1, "empti": [1, 6], "enabl": [3, 6], "encount": 1, "end": 1, "engin": 1, "enhanc": 2, "ensur": [1, 2, 3, 6], "entir": 1, "enumer": 6, "equal": [1, 6], "equat": 1, "error": [1, 2, 6], "especi": 6, "essenc": 6, "essenti": [1, 6], "estat": 6, "estim": [1, 2, 3, 6], "estimator_nam": 6, "etc": [2, 6], "evalu": [1, 3, 6], "evaluate_bootstrap_metr": [2, 4, 6], "even": 1, "event": 6, "examin": 6, "exampl": 4, "exceed": 2, "except": 6, "excess": 1, "execut": 6, "exist": [1, 6], "exp": 1, "expect": [1, 6], "explain": 6, "explained_vari": 6, "explan": 1, "explicit": 6, "explicitli": 6, "express": 1, "extend": 6, "extract": [2, 4], "extrem": 1, "f": [1, 6], "f1": [1, 6], "f1_beta_tun": 6, "f1_weight": 6, "f_i": 1, "facilit": 3, "fail": 1, "failur": 1, "fair": 1, "fairli": 6, "fall": 1, "fals": [1, 6], "far": 1, "favor": [1, 2, 6], "feat_num": 1, "featur": [1, 3, 6], "feature_": 6, "feature_nam": 6, "feature_select": 6, "feature_selection_": 6, "fetch": 6, "fetch_california_h": 6, "fetch_ucirepo": 6, "figsiz": 6, "figur": 6, "file": [2, 6], "filter": 2, "find": 1, "fine": [3, 6], "first": 1, "fit": [1, 2, 4], "fix": [2, 6], "flexibl": [3, 6], "flip_i": 6, "float": 6, "fn": 6, "focu": [1, 6], "fold": [1, 3, 6], "follow": [1, 2, 3, 6], "form": 1, "format": 6, "formul": 1, "forthcom": 2, "found": 6, "fp": 6, "frac": 1, "fraction": 1, "fraud": 6, "fraudul": 6, "free": 1, "frequenc": [1, 6], "frequent": 6, "from": [2, 3, 4, 6], "full": 1, "fulli": 1, "function": [1, 2, 3, 4], "funnel": 0, "funnell_2024_12727322": 0, "g": [1, 6], "gender": 6, "gener": [1, 3, 4], "generaliz": 1, "geq": 1, "get": 6, "get_best_score_param": 6, "get_cross_valid": 6, "get_feature_selection_pipelin": [4, 6], "get_preprocessing_and_feature_selection_pipelin": [4, 6], "get_preprocessing_pipelin": [4, 6], "get_test_data": 6, "get_train_data": 6, "get_valid_data": 6, "github": 4, "given": 1, "goal": [4, 6], "googl": 6, "grid": 4, "grid_search_param_tun": 6, "ground": 6, "group": [4, 5], "guidanc": 0, "ha": [1, 2, 6], "had": 1, "hand": 1, "handl": [1, 3, 6], "happen": 2, "harmon": 1, "hat": 1, "have": [2, 6], "haven": 6, "healthcar": 6, "heavili": 1, "help": [1, 6], "helper": 4, "here": [2, 3, 6], "hi": 0, "high": 1, "higher": 3, "highli": 1, "highlight": 1, "hist": 6, "histori": 2, "hold": 1, "homogen": 1, "hous": 4, "how": 6, "howev": 1, "html": 6, "http": [0, 5], "hybrid": 6, "hyperparamet": [2, 3, 4], "i": [1, 2, 3, 6], "id": 6, "ident": 1, "identifi": 1, "ifrom": 6, "ij": 1, "illustr": 4, "imbal": [1, 4], "imbalanc": [2, 3, 4], "imbalance_sampl": 6, "imblearn": 6, "impact": 4, "implement": [2, 3, 6], "import": [1, 2, 4], "importerror": 6, "improp": 1, "improperli": 6, "improv": [3, 6], "imput": [2, 3, 4, 6], "inaccur": 1, "includ": [1, 3, 6], "incomplet": 1, "inconsist": 1, "incorrect": [1, 6], "increas": [1, 6], "index": 6, "indexerror": 6, "indic": [1, 6], "infinit": 1, "inflat": 1, "influenc": 1, "inform": [1, 6], "informat": 0, "inher": [1, 6], "init": 4, "initi": 4, "initialis": 2, "input": [1, 4], "insid": [2, 6], "instal": [4, 6], "instanc": [1, 4], "instead": [1, 2, 6], "institut": 0, "insuffici": 6, "int": 6, "int64": 6, "integr": [1, 3], "interpol": [1, 6], "interpret": 1, "interv": [1, 6], "introduc": [1, 2], "invalid": [1, 6], "invalu": 0, "involv": [1, 2], "ipython": 4, "isinst": 1, "isoton": [3, 4, 6], "issu": [1, 2, 6], "iter": 6, "its": [1, 6], "itself": 2, "j": 1, "job": 6, "joblib": 3, "jul": 0, "just": 1, "k": [1, 3, 6], "kei": [0, 1, 2, 3, 4], "keyerror": 6, "kf": 6, "kfold": [2, 6], "kfold_split": 6, "kind": 6, "known": 6, "label": [1, 3, 6], "larg": 1, "later": [1, 6], "layer": 2, "lead": [1, 6], "learn": [2, 3, 4, 5], "legend": 6, "length": 2, "leon": 2, "leonid": 0, "leq": 1, "less": 1, "let": 1, "level": 6, "leverag": 6, "li": 1, "librari": [3, 4], "licens": 2, "like": [1, 3, 6], "likelihood": 1, "limit": [2, 4], "line": [1, 2], "linear": [1, 6], "linestyl": 6, "link": [0, 6], "list": [2, 6], "ll": 1, "lo": 6, "load": 4, "log": [2, 6], "logic": 2, "logist": [4, 6], "logloss": 6, "logo": 2, "loop": 2, "loss": [1, 6], "low": [2, 6], "lower": [1, 6], "machin": [1, 3, 4, 5], "macro": 6, "mai": 1, "maintain": 1, "major": [1, 6], "make": [1, 6], "make_classif": 6, "make_classification_": 6, "manag": 4, "mani": 1, "marker": 6, "match": 1, "mathbf": 1, "mathemat": 4, "matplotlib": 6, "matric": 6, "matrix": 6, "max": 1, "maximum": [1, 6], "mean": [1, 6], "meaning": [1, 6], "measur": 1, "median": [1, 6], "medic": [0, 1], "meet": 3, "mere": 6, "messag": 6, "method": [1, 2, 3, 4], "metric": [1, 2, 3, 4], "mid": 1, "midwai": 1, "might": [1, 6], "mii": 0, "min": 1, "min_": 1, "minimum": 1, "minmax": 3, "minor": [1, 4], "misclassif": 1, "misinterpret": 1, "mislabel": 1, "mislead": 1, "mismatch": [2, 6], "miss": [1, 6], "mitig": [4, 6], "mlflow": 2, "model": 2, "model_definit": 6, "model_tun": [3, 6], "model_tuner_util": 6, "model_typ": 6, "model_xgb": 6, "modifi": 2, "modul": 6, "monoton": 1, "month": 0, "more": [1, 6], "move": 2, "msb": 1, "msw": 1, "mu": 1, "much": 1, "multi": [3, 6], "multi_label": 6, "multipl": [2, 6], "must": [1, 6], "n": 1, "n_bin": 6, "n_clusters_per_class": 6, "n_featur": 6, "n_inform": 6, "n_iter": 6, "n_j": 1, "n_job": 6, "n_redund": 6, "n_sampl": [1, 6], "n_split": 6, "name": [2, 6], "nan": [1, 6], "natur": 6, "nearest": [1, 6], "necessari": [2, 4], "need": [1, 4], "neg": [1, 6], "neighbor": [1, 6], "new": 6, "nois": [1, 6], "noisi": 1, "non": [1, 2], "none": 6, "normal": 6, "note": 1, "notebook": [2, 4], "notic": 6, "now": [1, 2], "np": [2, 6], "num_resampl": 6, "number": [1, 2, 6], "numer": 6, "numpi": [3, 6], "o": 6, "object": [2, 4], "observ": [1, 6], "occur": [2, 6], "off": 1, "offer": [4, 6], "often": [1, 6], "older": 2, "onc": 6, "one": [1, 6], "ones": 1, "onli": [1, 2, 6], "onto": 2, "oper": 1, "optim": [1, 3, 6], "optimal_threshold": 6, "option": 4, "order": [1, 2, 6], "org": [0, 5], "organ": 6, "origin": [0, 1], "other": [1, 2, 3, 6], "our": [2, 6], "out": [1, 2], "outcom": [1, 6], "output": [1, 6], "outsid": 2, "outweigh": 6, "over": 1, "overal": 1, "overfit": [1, 3, 6], "overlap": 1, "overlook": 1, "oversampl": [1, 3, 4], "p": 1, "p_1": 1, "p_2": 1, "p_i": 1, "p_n": 1, "packag": 6, "panayioti": 0, "panda": [3, 6], "parallel": 6, "param": 6, "paramet": [2, 3, 4], "parametr": 1, "part": 6, "particularli": [1, 3, 6], "pass": [1, 6], "pattern": 6, "pd": [1, 2, 6], "penal": 1, "per": [2, 6], "perfectli": [1, 6], "perform": [1, 3, 4], "petousi": 0, "pickl": 2, "piecewis": 1, "pip": [3, 6], "pip25": 2, "pipelin": [1, 2, 3, 4], "pipeline_assembli": 6, "pipeline_step": [1, 2, 6], "pipelineclass": 6, "placehold": 1, "platt": 4, "pleas": [1, 6], "plot": 6, "plt": 6, "pmatrix": 1, "po": 6, "point": [1, 6], "poor": 6, "poorli": 6, "posit": [1, 6], "possibl": [1, 6], "power": [1, 3], "ppv": 6, "practic": [1, 6], "practition": 1, "pre": 6, "precis": [1, 6], "predict": [4, 6], "predict_proba": 6, "prefix": 6, "preprocess": [1, 6], "preprocess_": 6, "preprocess_imputer_imput": 6, "preprocess_scaler_standardscalar": 6, "preprocessing_step": 6, "preprocessor": 1, "prerequisit": 4, "present": 1, "preserv": 1, "pretti": 2, "prevent": [3, 4], "previou": 2, "previous": 1, "primari": 1, "print": [2, 6], "print_pipelin": 6, "print_result": 6, "print_selected_best_featur": 6, "prior": 1, "priorit": 1, "prob_pred_calibr": 6, "prob_pred_uncalibr": 6, "prob_true_calibr": 6, "prob_true_uncalibr": 6, "probabilist": 1, "probabl": [1, 3, 6], "problem": [1, 6], "proceed": 1, "process": [1, 2, 6], "process_imbalance_sampl": 6, "produc": [1, 6], "properli": 6, "properti": 1, "proport": [1, 6], "provid": [1, 3, 6], "publish": 0, "purpos": 4, "py": [2, 6], "pypi": [2, 3], "pyplot": 6, "pyproject": 2, "python": 3, "quad": 1, "quickli": 6, "r": 6, "r2": 6, "race": 6, "rais": [1, 6], "rand_grid": 6, "random": [1, 6], "random_st": 6, "randomized_grid": 6, "randomli": 6, "randomoversampl": 6, "randomundersampl": 6, "rang": [1, 6], "rare": 6, "rate": 1, "rather": 1, "ratio": [1, 6], "raw": 1, "re": 2, "readili": 6, "readm": 2, "real": 6, "recal": [1, 6], "recommend": 1, "recurs": 3, "redfin": 6, "redistribut": 6, "reduc": [1, 6], "ref": 2, "refactor": 2, "refer": [1, 4, 6], "reflect": 1, "regard": 2, "region": 1, "regress": 4, "regression_report": 6, "regression_report_kfold": 6, "regular": 6, "relat": 2, "relationship": 1, "releas": 2, "reli": 1, "reliabl": 6, "remov": [1, 2, 6], "renam": [2, 6], "repeatedli": 1, "replac": 1, "report": [2, 4], "report_model_metr": 6, "repositori": [4, 5, 6], "repres": [1, 2], "represent": 6, "reproduc": 6, "requir": [1, 2, 3, 6], "resampl": [2, 4], "research": 6, "reset": [2, 6], "reset_estim": 6, "resolut": 2, "resourc": 6, "respect": 6, "result": 1, "retriev": 4, "return": 4, "return_bootstrap_metr": [4, 6], "return_metr": 6, "rfe": [3, 6], "rightarrow": 1, "risk": [1, 6], "rmse": 6, "robust": [3, 6], "roc": 6, "roc_auc": 6, "root": 6, "rot": 6, "rout": 6, "routin": 1, "run": 6, "runtim": 1, "runtimeerror": 6, "runtimewarn": 1, "sadr": 5, "same": [1, 2], "sampl": [2, 4, 6], "sampler": 6, "sampling_method": [4, 6], "save": 2, "scale": [2, 3, 4, 6], "scenario": 6, "scienc": 0, "scikit": 3, "scipi": 3, "score": [4, 6], "seamlessli": 6, "search": 4, "section": 6, "see": 6, "seed": 6, "segment": [1, 2], "select": [3, 6], "selectkbest": [2, 3], "self": [2, 6], "sensit": [1, 6], "separ": [1, 6], "sequenc": [1, 6], "seri": [1, 6], "set": [1, 6], "setup": 2, "sever": [1, 6], "shap": 6, "shape": [4, 6], "should": [1, 2, 6], "show": 6, "shown": 6, "shpaner": 0, "sigma": 1, "sigmoid": [3, 6], "significantli": [1, 6], "sim": 1, "similar": [1, 6], "simpl": 6, "simpleimput": [1, 3, 6], "simpli": 6, "simplifi": 2, "simultan": 2, "sinc": 1, "singl": [1, 6], "size": 6, "skew": 1, "sklearn": 6, "smote": [2, 3, 4], "smoteenn": 1, "smotetomek": 1, "so": [1, 6], "softwar": [0, 2], "solut": 4, "some": [1, 6], "sort": 6, "space": 1, "spam": 6, "special": 0, "specif": [1, 2, 6], "specifi": [1, 2, 4], "split": [1, 2, 3, 4], "sqrt": 1, "squar": [1, 6], "squeez": [1, 6], "stage": 6, "standard": [1, 6], "standardscal": [1, 6], "standardscalar": 6, "startswith": 6, "state": 1, "statist": 1, "step": [2, 4], "step_0": 6, "step_1": 6, "stop": [2, 3, 6], "store": 2, "str": 6, "strat_key_val_test": 2, "strategi": [3, 6], "stratif": [2, 4, 6], "stratifi": [1, 2, 3, 6], "stratify_col": [1, 2, 6], "stratify_i": [1, 2, 6], "stratify_kei": 2, "string": 2, "structur": 1, "struggl": 6, "studi": [4, 5], "subsampl": 6, "subsequ": 1, "subset": 1, "suit": 6, "sum": 6, "sum_": 1, "summari": 4, "supervis": 6, "support": [0, 2, 3, 6], "synthet": 4, "system": 3, "t": 6, "take": [1, 6], "taken": 2, "target": [2, 3, 4, 6], "task": [3, 6], "tau": 1, "techniqu": [3, 4], "temporarili": 2, "tend": 6, "test": [2, 6], "test_model": 6, "test_siz": 6, "text": [1, 6], "th": 1, "than": 1, "thank": 0, "thei": [1, 6], "them": [1, 6], "therefor": [1, 6], "thi": [0, 1, 2, 3, 6], "thoroughli": 6, "three": 6, "threshold": [2, 3, 4, 6], "through": 6, "thu": 6, "time": [1, 2], "titan": 6, "titl": [0, 6], "tn": 6, "toml": 2, "too": 1, "tool": 3, "top": [1, 6], "toward": 6, "tp": 6, "tqdm": 3, "track": 6, "trade": 1, "tradit": 1, "train": [3, 4, 6], "train_siz": 6, "train_val_test": 2, "train_val_test_split": [2, 6], "transact": 6, "transform": [4, 6], "translat": 0, "treat": [1, 6], "tree": 6, "trial": [4, 5], "trigger": 1, "true": [1, 6], "trust": 1, "truth": 6, "tune": [1, 2, 3, 4], "tune_threshold_fbeta": [2, 6], "tuned_paramet": 6, "tuned_parameters_xgb": 6, "tuner": 6, "two": [1, 6], "txt": 2, "type": 6, "typeerror": 6, "typic": 6, "u": 1, "uci": [5, 6], "ucimlrepo": 6, "ucla": 0, "uncalibr": 6, "undefin": 1, "under": [3, 6], "underli": 1, "underrepres": 6, "undersampl": [1, 6], "understand": [1, 6], "unequ": 6, "unexpect": 6, "uniform": 1, "uniqu": 6, "unlik": 1, "unnecessari": [1, 2, 6], "unpredict": 1, "unrealist": 1, "unreli": 1, "unseen": 1, "unsupport": 6, "unus": 2, "up": 2, "updat": 2, "upper": 6, "url": 0, "us": [1, 2, 3, 4], "usag": 2, "user": 6, "userwarn": 1, "util": [2, 6], "va": 6, "valid": [3, 4, 6], "validation_data": 6, "validation_s": 6, "valu": [1, 4], "value_count": 6, "valueerror": 6, "var": [1, 6], "variabl": [2, 3, 4, 6], "varianc": 4, "varieti": 6, "variou": [3, 6], "vdot": 1, "vector": 1, "verbos": 2, "versatil": 3, "version": [0, 3, 4], "visual": 6, "w": [1, 5], "wa": [0, 1, 2], "wai": [1, 6], "warn": 1, "we": [1, 6], "weight": [1, 6], "welcom": 4, "well": [1, 6], "were": 2, "what": 4, "when": [1, 2, 3, 6], "where": [1, 2, 6], "whether": 6, "which": [1, 3, 6], "while": [1, 6], "wide": [1, 6], "width": 6, "wish": 6, "within": [1, 6], "without": [1, 6], "work": [0, 1, 2], "workflow": [3, 6], "world": 6, "would": 1, "wrong": 2, "x": [1, 2, 4], "x_": 1, "x_i": 1, "x_j": 1, "x_test": 6, "x_train": 6, "x_valid": 6, "x_valid_test": 2, "xgb": 6, "xgb_": 6, "xgb__colsample_bytre": 6, "xgb__early_stopping_round": 6, "xgb__eval_metr": 6, "xgb__learning_r": 6, "xgb__max_depth": 6, "xgb__n_estim": 6, "xgb__subsampl": 6, "xgb__tree_method": 6, "xgb_definit": 6, "xgb_early_bootstrap_test": 2, "xgb_model": 6, "xgb_name": 6, "xgb_smote": 6, "xgbclassifi": 4, "xgbearli": 6, "xgboost": [2, 3, 4], "xgbregressor": 4, "xlabel": 6, "y": [1, 2, 4], "y_1": 1, "y_2": 1, "y_i": 1, "y_n": 1, "y_pred": 6, "y_pred_prob": 6, "y_prob_calibr": 6, "y_prob_uncalibr": 6, "y_test": 6, "y_test_pr": 6, "y_train": 6, "y_true": 6, "y_valid": 6, "y_valid_proba": 6, "y_valid_test": 2, "year": 0, "yellow": 6, "yet": 6, "ylabel": 6, "you": [0, 1, 3, 6], "your": [1, 3, 6], "z": 1, "z_": 1, "zenodo": [0, 2], "zero": 4, "zero_variance_column": [1, 6]}, "titles": ["GitHub Repository", "Zero Variance Columns", "Changelog", "Welcome to Model Tuner\u2019s Documentation!", "Model Tuner Documentation", "References", "iPython Notebooks"], "titleterms": {"": 3, "0": 2, "010a": 2, "011a": 2, "012a": 2, "013a": 2, "014a": 2, "02a": 2, "05a": 2, "06a": 2, "07a": 2, "08a": 2, "09a": 2, "1": [1, 6], "10": 6, "15a": 2, "16a": 2, "2": [1, 6], "3": [1, 6], "4": 6, "5": 6, "6": 6, "7": 6, "8": 6, "9": 6, "A": 1, "Its": 1, "The": 6, "These": 6, "about": 4, "accordingli": 6, "accur": 1, "accuraci": 1, "acknowledg": 0, "address": 6, "after": 6, "aid": 6, "an": 6, "befor": 1, "bia": 1, "binari": 6, "bootstrap": 6, "brier": 1, "calcul": 1, "calibr": [1, 6], "california": 6, "caveat": [1, 4], "changelog": 2, "check": 6, "cite": 0, "class": [1, 6], "classif": 6, "clinic": 6, "column": [1, 6], "configur": 6, "consist": 1, "creat": 6, "creation": 1, "cross": 1, "curv": 1, "data": [1, 6], "dataset": 6, "defin": 6, "depend": 1, "distort": 1, "distribut": [1, 6], "document": [3, 4], "doe": 3, "drop": 6, "effect": 1, "exampl": [1, 6], "extract": 6, "fit": 6, "from": 1, "function": 6, "gener": 6, "get": 4, "github": 0, "goal": 1, "grid": 6, "group": 6, "guid": 4, "helper": 6, "hous": 6, "hyperparamet": 6, "illustr": 1, "imbal": 6, "imbalanc": [1, 6], "impact": 1, "import": 6, "imput": 1, "init": 6, "initi": 6, "input": 6, "instal": 3, "instanc": 6, "ipython": 6, "isoton": 1, "kei": 6, "learn": [1, 6], "librari": 6, "limit": 1, "load": 6, "logist": 1, "machin": 6, "manag": 6, "mathemat": 1, "method": 6, "metric": 6, "minor": 6, "mitig": 1, "model": [0, 1, 3, 4, 6], "necessari": 6, "need": 6, "notebook": 6, "object": 6, "offer": 3, "option": 6, "oversampl": 6, "paramet": [1, 6], "perform": 6, "pipelin": 6, "platt": 1, "predict": 1, "prerequisit": 3, "prevent": 1, "purpos": 6, "refer": 5, "regress": [1, 6], "report": 6, "repositori": 0, "resampl": [1, 6], "retriev": 6, "return": 6, "sampl": 1, "scale": 1, "score": 1, "search": 6, "shape": 1, "smote": [1, 6], "solut": 1, "specifi": 6, "split": 6, "start": 4, "step": 6, "stratif": 1, "studi": 6, "summari": [1, 6], "synthet": [1, 6], "target": 1, "techniqu": [1, 6], "threshold": 1, "train": 1, "transform": 1, "trial": 6, "tune": 6, "tuner": [0, 3, 4], "us": 6, "usag": 4, "valid": 1, "valu": 6, "variabl": 1, "varianc": [1, 6], "version": 2, "welcom": 3, "what": 3, "x": 6, "xgbclassifi": 6, "xgboost": 6, "xgbregressor": 6, "y": 6, "zero": [1, 6]}})
\ No newline at end of file
+<<<<<<< HEAD
+Search.setIndex({"alltitles": {"1. Accurate Calculation of Scaling Parameters": [[1, "accurate-calculation-of-scaling-parameters"]], "2. Consistency in Data Transformation": [[1, "consistency-in-data-transformation"]], "3. Prevention of Distortion in Scaling": [[1, "prevention-of-distortion-in-scaling"]], "AIDS Clinical Trials Group Study": [[6, "aids-clinical-trials-group-study"]], "About Model Tuner": [[4, null]], "Acknowledgements": [[0, "acknowledgements"]], "Addressing Class Imbalance in Machine Learning": [[6, "addressing-class-imbalance-in-machine-learning"]], "Bias from Class Distribution": [[1, "bias-from-class-distribution"]], "Binary Classification": [[6, "binary-classification"]], "Binary Classification Examples": [[6, "binary-classification-examples"]], "Bootstrap Metrics": [[6, "bootstrap-metrics"]], "Bootstrap Metrics Example": [[6, "bootstrap-metrics-example"]], "Brier Score": [[1, "brier-score"]], "Calibration Curve": [[1, "calibration-curve"]], "California Housing with XGBoost": [[6, "california-housing-with-xgboost"]], "Caveats": [[4, null]], "Caveats in Imbalanced Learning": [[1, "caveats-in-imbalanced-learning"]], "Changelog": [[2, null]], "Citing Model Tuner": [[0, "citing-model-tuner"]], "Classification Report (Optional)": [[6, "classification-report-optional"]], "Column Stratification with Cross-Validation": [[1, "column-stratification-with-cross-validation"]], "Cross-Validation and Stratification": [[1, "cross-validation-and-stratification"]], "Define Hyperparameters for XGBoost": [[6, "define-hyperparameters-for-xgboost"]], "Define The Model object": [[6, "define-the-model-object"]], "Dependent Variable": [[1, "dependent-variable"]], "Effects on Model Training": [[1, "effects-on-model-training"]], "Example of Synthetic Sample Creation": [[1, "example-of-synthetic-sample-creation"]], "Example: Calibration in Logistic Regression": [[1, "example-calibration-in-logistic-regression"]], "Fit The Model": [[6, "fit-the-model"]], "Generating an Imbalanced Dataset": [[6, "generating-an-imbalanced-dataset"]], "Getting Started": [[4, null]], "GitHub Repository": [[0, null]], "Goal of Calibration": [[1, "goal-of-calibration"]], "Helper Functions": [[6, "helper-functions"]], "Helper Methods for Pipeline Extraction": [[6, "helper-methods-for-pipeline-extraction"]], "Imbalanced Learning": [[6, "imbalanced-learning"]], "Impact of Resampling Techniques": [[1, "impact-of-resampling-techniques"]], "Imputation Before Scaling": [[1, "imputation-before-scaling"]], "Initalize and Configure The Model": [[6, "initalize-and-configure-the-model"]], "Input Parameters": [[6, "input-parameters"]], "Installation": [[3, "installation"]], "Isotonic Regression": [[1, "isotonic-regression"]], "Key Methods and Functionalities": [[6, "key-methods-and-functionalities"]], "Limitations of Accuracy": [[1, "limitations-of-accuracy"]], "Mitigating the Caveats": [[1, "mitigating-the-caveats"]], "Model Calibration": [[1, "model-calibration"]], "Model Tuner Documentation": [[4, null]], "Perform Grid Search Parameter Tuning and Retrieve Split Data": [[6, "perform-grid-search-parameter-tuning-and-retrieve-split-data"]], "Pipeline Management": [[6, "pipeline-management"]], "Platt Scaling": [[1, "platt-scaling"]], "Prerequisites": [[3, "prerequisites"]], "Purpose of Using These Techniques": [[6, "purpose-of-using-these-techniques"]], "References": [[5, null]], "Regression": [[6, "regression"]], "Regression Example": [[6, "regression-example"]], "Return Metrics (Optional)": [[6, "return-metrics-optional"]], "SMOTE: A Mathematical Illustration": [[1, "smote-a-mathematical-illustration"]], "SMOTE: Distribution of y values after resampling": [[6, "smote-distribution-of-y-values-after-resampling"]], "Solution": [[1, "solution"]], "Specifying Pipeline Steps": [[6, "specifying-pipeline-steps"]], "Step 10: Calibrate the Model (if needed)": [[6, "step-10-calibrate-the-model-if-needed"]], "Step 1: Import Necessary Libraries": [[6, "step-1-import-necessary-libraries"], [6, "id2"]], "Step 2: Load the Dataset": [[6, "step-2-load-the-dataset"]], "Step 2: Load the dataset, define X, y": [[6, "step-2-load-the-dataset-define-x-y"]], "Step 3: Check for zero-variance columns and drop accordingly": [[6, "step-3-check-for-zero-variance-columns-and-drop-accordingly"]], "Step 3: Create an Instance of the XGBRegressor": [[6, "step-3-create-an-instance-of-the-xgbregressor"]], "Step 4: Create an Instance of the XGBClassifier": [[6, "step-4-create-an-instance-of-the-xgbclassifier"]], "Step 4: Define Hyperparameters for XGBoost": [[6, "step-4-define-hyperparameters-for-xgboost"]], "Step 5: Define Hyperparameters for XGBoost": [[6, "step-5-define-hyperparameters-for-xgboost"]], "Step 5: Initialize and Configure the Model": [[6, "step-5-initialize-and-configure-the-model"]], "Step 6: Initialize and Configure the Model": [[6, "step-6-initialize-and-configure-the-model"]], "Step 6: Perform Grid Search Parameter Tuning and Retrieve Split Data": [[6, "step-6-perform-grid-search-parameter-tuning-and-retrieve-split-data"]], "Step 7: Fit the Model": [[6, "step-7-fit-the-model"]], "Step 7: Perform Grid Search Parameter Tuning": [[6, "step-7-perform-grid-search-parameter-tuning"]], "Step 8: Fit the Model": [[6, "step-8-fit-the-model"]], "Step 8: Return Metrics (Optional)": [[6, "step-8-return-metrics-optional"]], "Step 9: Return Metrics (Optional)": [[6, "step-9-return-metrics-optional"]], "Summary": [[1, "summary"], [6, "summary"]], "Synthetic Minority Oversampling Technique (SMOTE)": [[6, "synthetic-minority-oversampling-technique-smote"]], "Target Variable Shape and Its Effects": [[1, "target-variable-shape-and-its-effects"]], "Techniques to Address Class Imbalance": [[6, "techniques-to-address-class-imbalance"]], "Threshold-Dependent Predictions": [[1, "threshold-dependent-predictions"]], "Usage Guide": [[4, null]], "Version 0.0.010a": [[2, "version-0-0-010a"]], "Version 0.0.011a": [[2, "version-0-0-011a"]], "Version 0.0.012a": [[2, "version-0-0-012a"]], "Version 0.0.013a": [[2, "version-0-0-013a"]], "Version 0.0.014a": [[2, "version-0-0-014a"]], "Version 0.0.02a": [[2, "version-0-0-02a"]], "Version 0.0.05a": [[2, "version-0-0-05a"]], "Version 0.0.06a": [[2, "version-0-0-06a"]], "Version 0.0.07a": [[2, "version-0-0-07a"]], "Version 0.0.08a": [[2, "version-0-0-08a"]], "Version 0.0.09a": [[2, "version-0-0-09a"]], "Version 0.0.15a": [[2, "version-0-0-15a"]], "Version 0.0.16a": [[2, "version-0-0-16a"]], "Welcome to Model Tuner\u2019s Documentation!": [[3, null]], "What Does Model Tuner Offer?": [[3, "what-does-model-tuner-offer"]], "Zero Variance Columns": [[1, null]], "iPython Notebooks": [[6, null]]}, "docnames": ["about", "caveats", "changelog", "getting_started", "index", "references", "usage_guide"], "envversion": {"sphinx": 62, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1}, "filenames": ["about.rst", "caveats.rst", "changelog.rst", "getting_started.rst", "index.rst", "references.rst", "usage_guide.rst"], "indexentries": {"built-in function": [[6, "check_input_type", false], [6, "evaluate_bootstrap_metrics", false], [6, "get_feature_selection_pipeline", false], [6, "get_preprocessing_and_feature_selection_pipeline", false], [6, "get_preprocessing_pipeline", false], [6, "return_bootstrap_metrics", false], [6, "sampling_method", false]], "check_input_type()": [[6, "check_input_type", false]], "evaluate_bootstrap_metrics()": [[6, "evaluate_bootstrap_metrics", false]], "get_feature_selection_pipeline()": [[6, "get_feature_selection_pipeline", false]], "get_preprocessing_and_feature_selection_pipeline()": [[6, "get_preprocessing_and_feature_selection_pipeline", false]], "get_preprocessing_pipeline()": [[6, "get_preprocessing_pipeline", false]], "model (built-in class)": [[6, "Model", false]], "return_bootstrap_metrics()": [[6, "return_bootstrap_metrics", false]], "sampling_method()": [[6, "sampling_method", false]]}, "objects": {"": [[6, 0, 1, "", "Model"], [6, 1, 1, "", "check_input_type"], [6, 1, 1, "", "evaluate_bootstrap_metrics"], [6, 1, 1, "", "get_feature_selection_pipeline"], [6, 1, 1, "", "get_preprocessing_and_feature_selection_pipeline"], [6, 1, 1, "", "get_preprocessing_pipeline"], [6, 1, 1, "", "return_bootstrap_metrics"], [6, 1, 1, "", "sampling_method"]]}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "function", "Python function"]}, "objtypes": {"0": "py:class", "1": "py:function"}, "terms": {"": [1, 2, 4, 6], "0": [0, 1, 3, 4, 6], "00": 6, "000": 6, "0001": 6, "01": 6, "010a": 4, "011a": 4, "012a": 4, "013a": 4, "014a": 4, "017a": 3, "01it": 6, "02a": 4, "05": 6, "05a": 4, "05it": 6, "06a": 4, "07a": 4, "08a": 4, "09a": 4, "1": [2, 3, 4], "10": [0, 3, 4, 5], "100": 6, "1000": 6, "104": 6, "11": 3, "11a": 2, "12": 3, "12727322": 0, "14": 3, "15a": 4, "16628708993634742": 6, "16713189436073958": 6, "16a": [0, 4], "175": 5, "177": 6, "178": 6, "180": 6, "19": [3, 6], "1998": 5, "1d": 1, "1e": 6, "2": [3, 4], "20": 6, "200": 6, "2024": 0, "2096258193295803": 6, "20989021789332263": 6, "21": [3, 6], "222": 6, "23": 3, "24": 3, "24432": 5, "245": 6, "246": 6, "26": 3, "26315186452865597": 6, "2672762813568116": 6, "28411432705731066": 6, "3": [3, 4], "30": 6, "300": 6, "3066172248224347": 6, "315": 6, "324": 6, "34": 6, "35": 6, "358": 6, "36": 6, "3743548199982513": 6, "3830825326824073": 6, "4": [1, 3, 4], "41": 6, "42": 6, "428": 6, "47": 6, "5": [1, 3, 4], "500": [2, 6], "50it": 6, "5281": 0, "533023758436067": 6, "540": 6, "5459770114942529": 6, "5491329479768786": 6, "55": 6, "5537302816556403": 6, "58": 1, "6": [3, 4], "60it": 6, "66": 3, "67": 6, "68": 6, "69": 6, "7": [3, 4], "70": 6, "71": 6, "75": 6, "7561728395061729": 6, "7592592592592593": 6, "76": 6, "7647433075624044": 6, "7647451659057567": 6, "765": 6, "7651490279157868": 6, "77": 6, "770853": 6, "777898": 6, "78": 6, "781523": 6, "788341": 6, "7888925135381788": 6, "7888942913974833": 6, "79": 6, "792193": 6, "798785": 6, "7992275185850191": 6, "8": [3, 4], "80": 6, "8023014087345259": 6, "81": 6, "82": 6, "83": 6, "84": 6, "85": 6, "8520172219085262": 6, "86": 6, "8695652173913043": 6, "87": 6, "88": 6, "89": 6, "890": 6, "9": [1, 4], "90": 6, "900": 6, "9047619047619048": 6, "91": 6, "9134615384615384": 6, "92": 6, "928": 6, "9280033238366572": 6, "93": 6, "934576804368471": 6, "94": 6, "95": 6, "96": 6, "97": 6, "9722222222222222": 6, "98": 6, "9833333333333333": 6, "9888888888888889": 6, "99": 6, "9904166666666667": 6, "9951388888888888": 6, "999": [1, 6], "9994444444444446": 6, "A": [4, 6], "AND": 6, "By": [1, 6], "For": [1, 3, 6], "If": [1, 6], "In": [1, 2, 6], "It": [1, 3, 6], "Its": 4, "No": 6, "Not": 6, "On": 1, "One": [1, 6], "The": [1, 3, 4], "There": 2, "These": [1, 4], "To": [1, 6], "With": 1, "_": 1, "__colsample_bytre": 6, "__early_stopping_round": 6, "__eval_metr": 6, "__init__": 6, "__learning_r": 6, "__max_depth": 6, "__n_estim": 6, "__param_nam": 6, "__subsampl": 6, "__tree_method": 6, "__verbos": 6, "_confusion_matrix_print": 6, "_i": 1, "_j": 1, "_k": 1, "abil": 6, "about": 1, "abov": 6, "abram": 5, "absolut": 6, "access": [0, 6], "accompani": 6, "accordingli": 4, "account": 1, "accur": 4, "accuraci": [4, 6], "achiev": [1, 2], "acknowledg": 4, "across": [1, 2, 3, 6], "activ": 6, "actual": [1, 6], "ad": [1, 2, 6], "adasyn": [2, 3, 6], "add": 6, "addit": [1, 6], "addition": [1, 6], "address": [1, 4], "adequ": 1, "adjust": 1, "advanc": 6, "aforement": 1, "after": [1, 4], "aid": [4, 5], "aids_clinical_": 6, "aids_clinical_trials_group_study_175": 6, "aim": 6, "alex": 0, "algorithm": [1, 6], "align": 1, "all": [1, 2, 3, 6], "alloc": 6, "allow": [2, 3, 6], "along": 1, "also": [1, 6], "altern": 1, "alwai": 2, "amplifi": 1, "an": 4, "analysi": 1, "angel": 6, "ani": 1, "anoth": [1, 6], "anova": 1, "apach": 2, "appli": [1, 3, 6], "applic": [1, 6], "approach": 1, "appropri": 6, "approx": 1, "ar": [0, 1, 2, 3, 6], "arrai": [1, 6], "arthur": [0, 2], "artifici": 1, "ascii": 6, "assert": 2, "assess": [1, 3, 6], "assign": [2, 6], "assum": 1, "assumpt": 1, "attempt": 1, "attributeerror": 6, "auc": 6, "author": 0, "autokera": 2, "autokerasclassifi": 2, "automat": [1, 3, 6], "avail": [1, 2, 6], "averag": 6, "average_precis": 6, "avg": 6, "avoid": [1, 2, 6], "ax": 6, "axi": 2, "b": 1, "back": 6, "balanc": [1, 2, 3, 6], "bar": [1, 6], "base": [1, 3, 6], "bayesian": 6, "bayessearchcv": 6, "becaus": [1, 2], "becom": 1, "been": [1, 2, 6], "befor": [2, 3, 4, 6], "begin": 1, "behavior": 1, "being": [2, 6], "below": [1, 3, 6], "best": 6, "best_param": 2, "best_params_per_scor": 6, "beta": 6, "better": [1, 6], "between": [1, 3, 6], "beyond": 6, "bia": [4, 6], "bias": [1, 6], "bin": [1, 6], "binari": 4, "block": 6, "bool": 6, "boost": [2, 6], "boost_earli": 6, "bootstrap": [3, 4], "bootstrapp": [2, 6], "both": [1, 2, 6], "brier": [4, 6], "bug": 2, "bui": 0, "build": 6, "c": 1, "c5g896": 5, "c_": 1, "calcul": [4, 6], "calibr": [2, 3, 4], "calibrate_report": 6, "calibratemodel": 6, "calibration_curv": 6, "calibration_method": 6, "california": 4, "call": 6, "can": [0, 1, 3, 6], "cannot": 1, "captur": [1, 6], "care": [1, 6], "carefulli": 1, "case": [1, 2, 6], "catboost": [2, 3], "categor": 6, "categori": 6, "caus": 1, "cdot": 1, "center": 6, "challeng": [1, 6], "chang": [1, 2, 6], "changelog": 4, "char": 2, "check": [1, 4], "check_input_typ": [4, 6], "chunk": 2, "ci": 6, "cite": 4, "clariti": 6, "class": [2, 3, 4], "class_label": 6, "class_proport": 6, "class_report_test": 6, "class_report_v": 6, "classif": [1, 3, 4], "classifi": [1, 6], "classification_report": 6, "clc": 6, "clean": 2, "click": 0, "clinic": [0, 4, 5], "close": 1, "cluster": 1, "cm_test": 6, "cm_val": 6, "code": [1, 2, 6], "codebas": 0, "col": 6, "colab": 6, "color": 6, "column": [2, 4], "combin": [1, 6], "come": 1, "command": 6, "comment": 2, "common": 1, "commonli": 6, "compar": 6, "compat": 3, "complet": [1, 2], "complex": 6, "comprehens": 6, "comput": [1, 6], "concat": 2, "condit": 1, "conduct": 3, "conf_mat_class_kfold": 6, "conf_matrix": 6, "confid": 6, "configur": 4, "conflict": 1, "confus": 6, "connect": 1, "consid": [1, 2], "consist": 4, "constant": 1, "constraint": [1, 2], "construct": 1, "contain": [2, 6], "context": [1, 6], "continu": 6, "contrast": 1, "contribut": [0, 1], "contributor": 0, "convent": 6, "convers": 1, "convert": [1, 6], "correct": [1, 2], "correctli": 1, "count": [2, 6], "cpu": 6, "creat": [1, 4], "creation": [3, 4], "critic": [1, 6], "cross": [3, 4, 6], "crucial": [1, 6], "ctsi": 0, "current": [1, 3], "curs": 1, "curv": [4, 6], "custom": [2, 3, 6], "custom_scor": 6, "d": [1, 5], "d_1": 1, "d_2": 1, "d_j": 1, "d_k": 1, "data": [2, 3, 4], "dataconversionwarn": 1, "datafram": [1, 6], "dataset": [1, 3, 4], "decis": [1, 6], "decreas": 1, "def": 6, "default": [1, 6], "defin": [1, 4], "degrad": 1, "delta": 1, "demonstr": 6, "denot": 1, "depend": [2, 3, 4, 6], "deploi": 6, "deprec": 2, "depth": 6, "design": [1, 3, 6], "despit": 1, "detail": 6, "detect": 6, "determin": 1, "dev": 2, "develop": 3, "deviat": 1, "diagnosi": [1, 6], "dict": 6, "dictionari": 6, "differ": [1, 2, 3], "dimens": 1, "dimension": 1, "directli": 3, "discrep": 1, "diseas": 6, "displai": 6, "disrupt": 1, "distinct": 6, "distinguish": 6, "distort": 4, "distribut": [3, 4], "divid": 1, "divis": 1, "do": [2, 6], "document": 6, "doe": [1, 4], "doi": [0, 5], "domin": [1, 6], "dot": 1, "dr": 0, "draw": 6, "drawn": 1, "drop": [1, 4], "dtype": 6, "due": 1, "duplic": 6, "dure": [1, 2, 6], "e": [1, 6], "each": [1, 6], "earli": [2, 3, 6], "early_stop": 6, "eas": 6, "easier": 6, "easili": 6, "effect": [3, 4, 6], "either": [2, 6], "el": 5, "elimin": 3, "empir": 1, "empti": [1, 6], "enabl": [3, 6], "encount": 1, "end": 1, "engin": 1, "enhanc": 2, "ensur": [1, 2, 3, 6], "entir": 1, "enumer": 6, "equal": [1, 6], "equat": 1, "error": [1, 2, 6], "especi": 6, "essenc": 6, "essenti": [1, 6], "estat": 6, "estim": [1, 2, 3, 6], "estimator_nam": 6, "etc": [2, 6], "evalu": [1, 3, 6], "evaluate_bootstrap_metr": [2, 4, 6], "even": 1, "event": 6, "examin": 6, "exampl": 4, "exceed": 2, "except": 6, "excess": 1, "execut": 6, "exist": [1, 6], "exp": 1, "expect": [1, 6], "explain": 6, "explained_vari": 6, "explan": 1, "explicit": 6, "explicitli": 6, "express": 1, "extend": 6, "extract": [2, 4], "extrem": 1, "f": [1, 6], "f1": [1, 6], "f1_beta_tun": 6, "f1_weight": 6, "f_i": 1, "facilit": 3, "fail": 1, "failur": 1, "fair": 1, "fairli": 6, "fall": 1, "fals": [1, 6], "far": 1, "favor": [1, 2, 6], "feat_num": 1, "featur": [1, 3, 6], "feature_": 6, "feature_nam": 6, "feature_select": 6, "feature_selection_": 6, "fetch": 6, "fetch_california_h": 6, "fetch_ucirepo": 6, "figsiz": 6, "figur": 6, "file": [2, 6], "filter": 2, "find": 1, "fine": [3, 6], "first": 1, "fit": [1, 2, 4], "fix": [2, 6], "flexibl": [3, 6], "flip_i": 6, "float": 6, "fn": 6, "focu": [1, 6], "fold": [1, 3, 6], "follow": [1, 2, 3, 6], "form": 1, "format": 6, "formul": 1, "forthcom": 2, "found": 6, "fp": 6, "frac": 1, "fraction": 1, "fraud": 6, "fraudul": 6, "free": 1, "frequenc": [1, 6], "frequent": 6, "from": [2, 3, 4, 6], "full": 1, "fulli": 1, "function": [1, 2, 3, 4], "funnel": 0, "funnell_2024_12727322": 0, "g": [1, 6], "gender": 6, "gener": [1, 3, 4], "generaliz": 1, "geq": 1, "get": 6, "get_best_score_param": 6, "get_cross_valid": 6, "get_feature_selection_pipelin": [4, 6], "get_preprocessing_and_feature_selection_pipelin": [4, 6], "get_preprocessing_pipelin": [4, 6], "get_test_data": 6, "get_train_data": 6, "get_valid_data": 6, "github": 4, "given": 1, "goal": [4, 6], "googl": 6, "grid": 4, "grid_search_param_tun": 6, "ground": 6, "group": [4, 5], "guidanc": 0, "ha": [1, 2, 6], "had": 1, "hand": 1, "handl": [1, 3, 6], "happen": 2, "harmon": 1, "hat": 1, "have": [2, 6], "haven": 6, "healthcar": 6, "heavili": 1, "help": [1, 6], "helper": 4, "here": [2, 3, 6], "hi": 0, "high": 1, "higher": 3, "highli": 1, "highlight": 1, "hist": 6, "histori": 2, "hold": 1, "homogen": 1, "hous": 4, "how": 6, "howev": 1, "html": 6, "http": [0, 5], "hybrid": 6, "hyperparamet": [2, 3, 4], "i": [1, 2, 3, 6], "id": 6, "ident": 1, "identifi": 1, "ifrom": 6, "ij": 1, "illustr": 4, "imbal": [1, 4], "imbalanc": [2, 3, 4], "imbalance_sampl": 6, "imblearn": 6, "impact": 4, "implement": [2, 3, 6], "import": [1, 2, 4], "importerror": 6, "improp": 1, "improperli": 6, "improv": [3, 6], "imput": [2, 3, 4, 6], "inaccur": 1, "includ": [1, 3, 6], "incomplet": 1, "inconsist": 1, "incorrect": [1, 6], "increas": [1, 6], "index": 6, "indexerror": 6, "indic": [1, 6], "infinit": 1, "inflat": 1, "influenc": 1, "inform": [1, 6], "informat": 0, "inher": [1, 6], "init": 4, "initi": 4, "initialis": 2, "input": [1, 4], "insid": [2, 6], "instal": [4, 6], "instanc": [1, 4], "instead": [1, 2, 6], "institut": 0, "insuffici": 6, "int": 6, "int64": 6, "integr": [1, 3], "interpol": [1, 6], "interpret": 1, "interv": [1, 6], "introduc": [1, 2], "invalid": [1, 6], "invalu": 0, "involv": [1, 2], "ipython": 4, "isinst": 1, "isoton": [3, 4, 6], "issu": [1, 2, 6], "iter": 6, "its": [1, 6], "itself": 2, "j": 1, "job": 6, "joblib": 3, "jul": 0, "just": 1, "k": [1, 3, 6], "kei": [0, 1, 2, 3, 4], "keyerror": 6, "kf": 6, "kfold": [2, 6], "kfold_split": 6, "kind": 6, "known": 6, "label": [1, 3, 6], "larg": 1, "later": [1, 6], "layer": 2, "lead": [1, 6], "learn": [2, 3, 4, 5], "legend": 6, "length": 2, "leon": 2, "leonid": 0, "leq": 1, "less": 1, "let": 1, "level": 6, "leverag": 6, "li": 1, "librari": [3, 4], "licens": 2, "like": [1, 3, 6], "likelihood": 1, "limit": [2, 4], "line": [1, 2], "linear": [1, 6], "linestyl": 6, "link": [0, 6], "list": [2, 6], "ll": 1, "lo": 6, "load": 4, "log": [2, 6], "logic": 2, "logist": [4, 6], "logloss": 6, "logo": 2, "loop": 2, "loss": [1, 6], "low": [2, 6], "lower": [1, 6], "machin": [1, 3, 4, 5], "macro": 6, "mai": 1, "maintain": 1, "major": [1, 6], "make": [1, 6], "make_classif": 6, "make_classification_": 6, "manag": 4, "mani": 1, "marker": 6, "match": 1, "mathbf": 1, "mathemat": 4, "matplotlib": 6, "matric": 6, "matrix": 6, "max": 1, "maximum": [1, 6], "mean": [1, 6], "meaning": [1, 6], "measur": 1, "median": [1, 6], "medic": [0, 1], "meet": 3, "mere": 6, "messag": 6, "method": [1, 2, 3, 4], "metric": [1, 2, 3, 4], "mid": 1, "midwai": 1, "might": [1, 6], "mii": 0, "min": 1, "min_": 1, "minimum": 1, "minmax": 3, "minor": [1, 4], "misclassif": 1, "misinterpret": 1, "mislabel": 1, "mislead": 1, "mismatch": [2, 6], "miss": [1, 6], "mitig": [4, 6], "mlflow": 2, "model": 2, "model_definit": 6, "model_tun": [3, 6], "model_tuner_util": 6, "model_typ": 6, "model_xgb": 6, "modifi": 2, "modul": 6, "monoton": 1, "month": 0, "more": [1, 6], "move": 2, "msb": 1, "msw": 1, "mu": 1, "much": 1, "multi": [3, 6], "multi_label": 6, "multipl": [2, 6], "must": [1, 6], "n": 1, "n_bin": 6, "n_clusters_per_class": 6, "n_featur": 6, "n_inform": 6, "n_iter": 6, "n_j": 1, "n_job": 6, "n_redund": 6, "n_sampl": [1, 6], "n_split": 6, "name": [2, 6], "nan": [1, 6], "natur": 6, "nearest": [1, 6], "necessari": [2, 4], "need": [1, 4], "neg": [1, 6], "neighbor": [1, 6], "new": 6, "nois": [1, 6], "noisi": 1, "non": [1, 2], "none": 6, "normal": 6, "note": 1, "notebook": [2, 4], "notic": 6, "now": [1, 2], "np": [2, 6], "num_resampl": 6, "number": [1, 2, 6], "numer": 6, "numpi": [3, 6], "o": 6, "object": [2, 4], "observ": [1, 6], "occur": [2, 6], "off": 1, "offer": [4, 6], "often": [1, 6], "older": 2, "onc": 6, "one": [1, 6], "ones": 1, "onli": [1, 2, 6], "onto": 2, "oper": 1, "optim": [1, 3, 6], "optimal_threshold": 6, "option": 4, "order": [1, 2, 6], "org": [0, 5], "organ": 6, "origin": [0, 1], "other": [1, 2, 3, 6], "our": [2, 6], "out": [1, 2], "outcom": [1, 6], "output": [1, 6], "outsid": 2, "outweigh": 6, "over": 1, "overal": 1, "overfit": [1, 3, 6], "overlap": 1, "overlook": 1, "oversampl": [1, 3, 4], "p": 1, "p_1": 1, "p_2": 1, "p_i": 1, "p_n": 1, "packag": 6, "panayioti": 0, "panda": [3, 6], "parallel": 6, "param": 6, "paramet": [2, 3, 4], "parametr": 1, "part": 6, "particularli": [1, 3, 6], "pass": [1, 6], "pattern": 6, "pd": [1, 2, 6], "penal": 1, "per": [2, 6], "perfectli": [1, 6], "perform": [1, 3, 4], "petousi": 0, "pickl": 2, "piecewis": 1, "pip": [3, 6], "pip25": 2, "pipelin": [1, 2, 3, 4], "pipeline_assembli": 6, "pipeline_step": [1, 2, 6], "pipelineclass": 6, "placehold": 1, "platt": 4, "pleas": [1, 6], "plot": 6, "plt": 6, "pmatrix": 1, "po": 6, "point": [1, 6], "poor": 6, "poorli": 6, "posit": [1, 6], "possibl": [1, 6], "power": [1, 3], "ppv": 6, "practic": [1, 6], "practition": 1, "pre": 6, "precis": [1, 6], "predict": [4, 6], "predict_proba": 6, "prefix": 6, "preprocess": [1, 6], "preprocess_": 6, "preprocess_imputer_imput": 6, "preprocess_scaler_standardscalar": 6, "preprocessing_step": 6, "preprocessor": 1, "prerequisit": 4, "present": 1, "preserv": 1, "pretti": 2, "prevent": [3, 4], "previou": 2, "previous": 1, "primari": 1, "print": [2, 6], "print_pipelin": 6, "print_result": 6, "print_selected_best_featur": 6, "prior": 1, "priorit": 1, "prob_pred_calibr": 6, "prob_pred_uncalibr": 6, "prob_true_calibr": 6, "prob_true_uncalibr": 6, "probabilist": 1, "probabl": [1, 3, 6], "problem": [1, 6], "proceed": 1, "process": [1, 2, 6], "process_imbalance_sampl": 6, "produc": [1, 6], "properli": 6, "properti": 1, "proport": [1, 6], "provid": [1, 3, 6], "publish": 0, "purpos": 4, "py": [2, 6], "pypi": [2, 3], "pyplot": 6, "pyproject": 2, "python": 3, "quad": 1, "quickli": 6, "r": 6, "r2": 6, "race": 6, "rais": [1, 6], "rand_grid": 6, "random": [1, 6], "random_st": 6, "randomized_grid": 6, "randomli": 6, "randomoversampl": 6, "randomundersampl": 6, "rang": [1, 6], "rare": 6, "rate": 1, "rather": 1, "ratio": [1, 6], "raw": 1, "re": 2, "readili": 6, "readm": 2, "real": 6, "recal": [1, 6], "recommend": 1, "recurs": 3, "redfin": 6, "redistribut": 6, "reduc": [1, 6], "ref": 2, "refactor": 2, "refer": [1, 4, 6], "reflect": 1, "regard": 2, "region": 1, "regress": 4, "regression_report": 6, "regression_report_kfold": 6, "regular": 6, "relat": 2, "relationship": 1, "releas": 2, "reli": 1, "reliabl": 6, "remov": [1, 2, 6], "renam": [2, 6], "repeatedli": 1, "replac": 1, "report": [2, 4], "report_model_metr": 6, "repositori": [4, 5, 6], "repres": [1, 2], "represent": 6, "reproduc": 6, "requir": [1, 2, 3, 6], "resampl": [2, 4], "research": 6, "reset": [2, 6], "reset_estim": 6, "resolut": 2, "resourc": 6, "respect": 6, "result": 1, "retriev": 4, "return": 4, "return_bootstrap_metr": [4, 6], "return_metr": 6, "rfe": [3, 6], "rightarrow": 1, "risk": [1, 6], "rmse": 6, "robust": [3, 6], "roc": 6, "roc_auc": 6, "root": 6, "rot": 6, "rout": 6, "routin": 1, "run": 6, "runtim": 1, "runtimeerror": 6, "runtimewarn": 1, "sadr": 5, "same": [1, 2], "sampl": [2, 4, 6], "sampler": 6, "sampling_method": [4, 6], "save": 2, "scale": [2, 3, 4, 6], "scenario": 6, "scienc": 0, "scikit": 3, "scipi": 3, "score": [4, 6], "seamlessli": 6, "search": 4, "section": 6, "see": 6, "seed": 6, "segment": [1, 2], "select": [3, 6], "selectkbest": [2, 3], "self": [2, 6], "sensit": [1, 6], "separ": [1, 6], "sequenc": [1, 6], "seri": [1, 6], "set": [1, 6], "setup": 2, "sever": [1, 6], "shap": 6, "shape": [4, 6], "should": [1, 2, 6], "show": 6, "shown": 6, "shpaner": 0, "sigma": 1, "sigmoid": [3, 6], "significantli": [1, 6], "sim": 1, "similar": [1, 6], "simpl": 6, "simpleimput": [1, 3, 6], "simpli": 6, "simplifi": 2, "simultan": 2, "sinc": 1, "singl": [1, 6], "size": 6, "skew": 1, "sklearn": 6, "smote": [2, 3, 4], "smoteenn": 1, "smotetomek": 1, "so": [1, 6], "softwar": [0, 2], "solut": 4, "some": [1, 6], "sort": 6, "space": 1, "spam": 6, "special": 0, "specif": [1, 2, 6], "specifi": [1, 2, 4], "split": [1, 2, 3, 4], "sqrt": 1, "squar": [1, 6], "squeez": [1, 6], "stage": 6, "standard": [1, 6], "standardscal": [1, 6], "standardscalar": 6, "startswith": 6, "state": 1, "statist": 1, "step": [2, 4], "step_0": 6, "step_1": 6, "stop": [2, 3, 6], "store": 2, "str": 6, "strat_key_val_test": 2, "strategi": [3, 6], "stratif": [2, 4, 6], "stratifi": [1, 2, 3, 6], "stratify_col": [1, 2, 6], "stratify_i": [1, 2, 6], "stratify_kei": 2, "string": 2, "structur": 1, "struggl": 6, "studi": [4, 5], "subsampl": 6, "subsequ": 1, "subset": 1, "suit": 6, "sum": 6, "sum_": 1, "summari": 4, "supervis": 6, "support": [0, 2, 3, 6], "synthet": 4, "system": 3, "t": 6, "take": [1, 6], "taken": 2, "target": [2, 3, 4, 6], "task": [3, 6], "tau": 1, "techniqu": [3, 4], "temporarili": 2, "tend": 6, "test": [2, 6], "test_model": 6, "test_siz": 6, "text": [1, 6], "th": 1, "than": 1, "thank": 0, "thei": [1, 6], "them": [1, 6], "therefor": [1, 6], "thi": [0, 1, 2, 3, 6], "thoroughli": 6, "three": 6, "threshold": [2, 3, 4, 6], "through": 6, "thu": 6, "time": [1, 2], "titan": 6, "titl": [0, 6], "tn": 6, "toml": 2, "too": 1, "tool": 3, "top": [1, 6], "toward": 6, "tp": 6, "tqdm": 3, "track": 6, "trade": 1, "tradit": 1, "train": [3, 4, 6], "train_siz": 6, "train_val_test": 2, "train_val_test_split": [2, 6], "transact": 6, "transform": [4, 6], "translat": 0, "treat": [1, 6], "tree": 6, "trial": [4, 5], "trigger": 1, "true": [1, 6], "trust": 1, "truth": 6, "tune": [1, 2, 3, 4], "tune_threshold_fbeta": [2, 6], "tuned_paramet": 6, "tuned_parameters_xgb": 6, "tuner": 6, "two": [1, 6], "txt": 2, "type": 6, "typeerror": 6, "typic": 6, "u": 1, "uci": [5, 6], "ucimlrepo": 6, "ucla": 0, "uncalibr": 6, "undefin": 1, "under": [3, 6], "underli": 1, "underrepres": 6, "undersampl": [1, 6], "understand": [1, 6], "unequ": 6, "unexpect": 6, "uniform": 1, "uniqu": 6, "unlik": 1, "unnecessari": [1, 2, 6], "unpredict": 1, "unrealist": 1, "unreli": 1, "unseen": 1, "unsupport": 6, "unus": 2, "up": 2, "updat": 2, "upper": 6, "url": 0, "us": [1, 2, 3, 4], "usag": 2, "user": 6, "userwarn": 1, "util": [2, 6], "va": 6, "valid": [3, 4, 6], "validation_data": 6, "validation_s": 6, "valu": [1, 4], "value_count": 6, "valueerror": 6, "var": [1, 6], "variabl": [2, 3, 4, 6], "varianc": 4, "varieti": 6, "variou": [3, 6], "vdot": 1, "vector": 1, "verbos": 2, "versatil": 3, "version": [0, 3, 4], "visual": 6, "w": [1, 5], "wa": [0, 1, 2], "wai": [1, 6], "warn": 1, "we": [1, 6], "weight": [1, 6], "welcom": 4, "well": [1, 6], "were": 2, "what": 4, "when": [1, 2, 3, 6], "where": [1, 2, 6], "whether": 6, "which": [1, 3, 6], "while": [1, 6], "wide": [1, 6], "width": 6, "wish": 6, "within": [1, 6], "without": [1, 6], "work": [0, 1, 2], "workflow": [3, 6], "world": 6, "would": 1, "wrong": 2, "x": [1, 2, 4], "x_": 1, "x_i": 1, "x_j": 1, "x_test": 6, "x_train": 6, "x_valid": 6, "x_valid_test": 2, "xgb": 6, "xgb_": 6, "xgb__colsample_bytre": 6, "xgb__early_stopping_round": 6, "xgb__eval_metr": 6, "xgb__learning_r": 6, "xgb__max_depth": 6, "xgb__n_estim": 6, "xgb__subsampl": 6, "xgb__tree_method": 6, "xgb_definit": 6, "xgb_early_bootstrap_test": 2, "xgb_model": 6, "xgb_name": 6, "xgb_smote": 6, "xgbclassifi": 4, "xgbearli": 6, "xgboost": [2, 3, 4], "xgbregressor": 4, "xlabel": 6, "y": [1, 2, 4], "y_1": 1, "y_2": 1, "y_i": 1, "y_n": 1, "y_pred": 6, "y_pred_prob": 6, "y_prob_calibr": 6, "y_prob_uncalibr": 6, "y_test": 6, "y_test_pr": 6, "y_train": 6, "y_true": 6, "y_valid": 6, "y_valid_proba": 6, "y_valid_test": 2, "year": 0, "yellow": 6, "yet": 6, "ylabel": 6, "you": [0, 1, 3, 6], "your": [1, 3, 6], "z": 1, "z_": 1, "zenodo": [0, 2], "zero": 4, "zero_variance_column": [1, 6]}, "titles": ["GitHub Repository", "Zero Variance Columns", "Changelog", "Welcome to Model Tuner\u2019s Documentation!", "Model Tuner Documentation", "References", "iPython Notebooks"], "titleterms": {"": 3, "0": 2, "010a": 2, "011a": 2, "012a": 2, "013a": 2, "014a": 2, "02a": 2, "05a": 2, "06a": 2, "07a": 2, "08a": 2, "09a": 2, "1": [1, 6], "10": 6, "15a": 2, "16a": 2, "2": [1, 6], "3": [1, 6], "4": 6, "5": 6, "6": 6, "7": 6, "8": 6, "9": 6, "A": 1, "Its": 1, "The": 6, "These": 6, "about": 4, "accordingli": 6, "accur": 1, "accuraci": 1, "acknowledg": 0, "address": 6, "after": 6, "aid": 6, "an": 6, "befor": 1, "bia": 1, "binari": 6, "bootstrap": 6, "brier": 1, "calcul": 1, "calibr": [1, 6], "california": 6, "caveat": [1, 4], "changelog": 2, "check": 6, "cite": 0, "class": [1, 6], "classif": 6, "clinic": 6, "column": [1, 6], "configur": 6, "consist": 1, "creat": 6, "creation": 1, "cross": 1, "curv": 1, "data": [1, 6], "dataset": 6, "defin": 6, "depend": 1, "distort": 1, "distribut": [1, 6], "document": [3, 4], "doe": 3, "drop": 6, "effect": 1, "exampl": [1, 6], "extract": 6, "fit": 6, "from": 1, "function": 6, "gener": 6, "get": 4, "github": 0, "goal": 1, "grid": 6, "group": 6, "guid": 4, "helper": 6, "hous": 6, "hyperparamet": 6, "illustr": 1, "imbal": 6, "imbalanc": [1, 6], "impact": 1, "import": 6, "imput": 1, "init": 6, "initi": 6, "input": 6, "instal": 3, "instanc": 6, "ipython": 6, "isoton": 1, "kei": 6, "learn": [1, 6], "librari": 6, "limit": 1, "load": 6, "logist": 1, "machin": 6, "manag": 6, "mathemat": 1, "method": 6, "metric": 6, "minor": 6, "mitig": 1, "model": [0, 1, 3, 4, 6], "necessari": 6, "need": 6, "notebook": 6, "object": 6, "offer": 3, "option": 6, "oversampl": 6, "paramet": [1, 6], "perform": 6, "pipelin": 6, "platt": 1, "predict": 1, "prerequisit": 3, "prevent": 1, "purpos": 6, "refer": 5, "regress": [1, 6], "report": 6, "repositori": 0, "resampl": [1, 6], "retriev": 6, "return": 6, "sampl": 1, "scale": 1, "score": 1, "search": 6, "shape": 1, "smote": [1, 6], "solut": 1, "specifi": 6, "split": 6, "start": 4, "step": 6, "stratif": 1, "studi": 6, "summari": [1, 6], "synthet": [1, 6], "target": 1, "techniqu": [1, 6], "threshold": 1, "train": 1, "transform": 1, "trial": 6, "tune": 6, "tuner": [0, 3, 4], "us": 6, "usag": 4, "valid": 1, "valu": 6, "variabl": 1, "varianc": [1, 6], "version": 2, "welcom": 3, "what": 3, "x": 6, "xgbclassifi": 6, "xgboost": 6, "xgbregressor": 6, "y": 6, "zero": [1, 6]}})
+=======
+Search.setIndex({"alltitles": {"1. Accurate Calculation of Scaling Parameters": [[1, "accurate-calculation-of-scaling-parameters"]], "2. Consistency in Data Transformation": [[1, "consistency-in-data-transformation"]], "3. Prevention of Distortion in Scaling": [[1, "prevention-of-distortion-in-scaling"]], "AIDS Clinical Trials Group Study": [[6, "aids-clinical-trials-group-study"]], "About Model Tuner": [[4, null]], "Acknowledgements": [[0, "acknowledgements"]], "Addressing Class Imbalance in Machine Learning": [[6, "addressing-class-imbalance-in-machine-learning"]], "Bias from Class Distribution": [[1, "bias-from-class-distribution"]], "Binary Classification": [[6, "binary-classification"]], "Binary Classification Examples": [[6, "binary-classification-examples"]], "Bootstrap Metrics": [[6, "bootstrap-metrics"]], "Bootstrap Metrics Example": [[6, "bootstrap-metrics-example"]], "Brier Score": [[1, "brier-score"]], "Calibration Curve": [[1, "calibration-curve"]], "California Housing with XGBoost": [[6, "california-housing-with-xgboost"]], "Caveats": [[4, null]], "Caveats in Imbalanced Learning": [[1, "caveats-in-imbalanced-learning"]], "Changelog": [[2, null]], "Citing Model Tuner": [[0, "citing-model-tuner"]], "Classification Report (Optional)": [[6, "classification-report-optional"]], "Column Stratification with Cross-Validation": [[1, "column-stratification-with-cross-validation"]], "Cross-Validation and Stratification": [[1, "cross-validation-and-stratification"]], "Define Hyperparameters for XGBoost": [[6, "define-hyperparameters-for-xgboost"]], "Define The Model object": [[6, "define-the-model-object"]], "Dependent Variable": [[1, "dependent-variable"]], "Effects on Model Training": [[1, "effects-on-model-training"]], "Example of Synthetic Sample Creation": [[1, "example-of-synthetic-sample-creation"]], "Example: Calibration in Logistic Regression": [[1, "example-calibration-in-logistic-regression"]], "Fit The Model": [[6, "fit-the-model"]], "Generating an Imbalanced Dataset": [[6, "generating-an-imbalanced-dataset"]], "Getting Started": [[4, null]], "GitHub Repository": [[0, null]], "Goal of Calibration": [[1, "goal-of-calibration"]], "Helper Functions": [[6, "helper-functions"]], "Imbalanced Learning": [[6, "imbalanced-learning"]], "Impact of Resampling Techniques": [[1, "impact-of-resampling-techniques"]], "Imputation Before Scaling": [[1, "imputation-before-scaling"]], "Initalize and Configure The Model": [[6, "initalize-and-configure-the-model"]], "Input Parameters": [[6, "input-parameters"]], "Installation": [[3, "installation"]], "Isotonic Regression": [[1, "isotonic-regression"]], "Key Methods and Functionalities": [[6, "key-methods-and-functionalities"]], "Limitations of Accuracy": [[1, "limitations-of-accuracy"]], "Mitigating the Caveats": [[1, "mitigating-the-caveats"]], "Model Calibration": [[1, "model-calibration"]], "Model Tuner Documentation": [[4, null]], "Perform Grid Search Parameter Tuning and Retrieve Split Data": [[6, "perform-grid-search-parameter-tuning-and-retrieve-split-data"]], "Platt Scaling": [[1, "platt-scaling"]], "Prerequisites": [[3, "prerequisites"]], "Purpose of Using These Techniques": [[6, "purpose-of-using-these-techniques"]], "References": [[5, null]], "Regression": [[6, "regression"]], "Regression Example": [[6, "regression-example"]], "Return Metrics (Optional)": [[6, "return-metrics-optional"]], "SMOTE: A Mathematical Illustration": [[1, "smote-a-mathematical-illustration"]], "SMOTE: Distribution of y values after resampling": [[6, "smote-distribution-of-y-values-after-resampling"]], "Solution": [[1, "solution"]], "Step 10: Calibrate the Model (if needed)": [[6, "step-10-calibrate-the-model-if-needed"]], "Step 1: Import Necessary Libraries": [[6, "step-1-import-necessary-libraries"], [6, "id2"]], "Step 2: Load the Dataset": [[6, "step-2-load-the-dataset"]], "Step 2: Load the dataset, define X, y": [[6, "step-2-load-the-dataset-define-x-y"]], "Step 3: Check for zero-variance columns and drop accordingly": [[6, "step-3-check-for-zero-variance-columns-and-drop-accordingly"]], "Step 3: Create an Instance of the XGBRegressor": [[6, "step-3-create-an-instance-of-the-xgbregressor"]], "Step 4: Create an Instance of the XGBClassifier": [[6, "step-4-create-an-instance-of-the-xgbclassifier"]], "Step 4: Define Hyperparameters for XGBoost": [[6, "step-4-define-hyperparameters-for-xgboost"]], "Step 5: Define Hyperparameters for XGBoost": [[6, "step-5-define-hyperparameters-for-xgboost"]], "Step 5: Initialize and Configure the Model": [[6, "step-5-initialize-and-configure-the-model"]], "Step 6: Initialize and Configure the Model": [[6, "step-6-initialize-and-configure-the-model"]], "Step 6: Perform Grid Search Parameter Tuning and Retrieve Split Data": [[6, "step-6-perform-grid-search-parameter-tuning-and-retrieve-split-data"]], "Step 7: Fit the Model": [[6, "step-7-fit-the-model"]], "Step 7: Perform Grid Search Parameter Tuning": [[6, "step-7-perform-grid-search-parameter-tuning"]], "Step 8: Fit the Model": [[6, "step-8-fit-the-model"]], "Step 8: Return Metrics (Optional)": [[6, "step-8-return-metrics-optional"]], "Step 9: Return Metrics (Optional)": [[6, "step-9-return-metrics-optional"]], "Summary": [[1, "summary"]], "Synthetic Minority Oversampling Technique (SMOTE)": [[6, "synthetic-minority-oversampling-technique-smote"]], "Target Variable Shape and Its Effects": [[1, "target-variable-shape-and-its-effects"]], "Techniques to Address Class Imbalance": [[6, "techniques-to-address-class-imbalance"]], "Threshold-Dependent Predictions": [[1, "threshold-dependent-predictions"]], "Usage Guide": [[4, null]], "Version 0.0.010a": [[2, "version-0-0-010a"]], "Version 0.0.011a": [[2, "version-0-0-011a"]], "Version 0.0.012a": [[2, "version-0-0-012a"]], "Version 0.0.013a": [[2, "version-0-0-013a"]], "Version 0.0.014a": [[2, "version-0-0-014a"]], "Version 0.0.02a": [[2, "version-0-0-02a"]], "Version 0.0.05a": [[2, "version-0-0-05a"]], "Version 0.0.06a": [[2, "version-0-0-06a"]], "Version 0.0.07a": [[2, "version-0-0-07a"]], "Version 0.0.08a": [[2, "version-0-0-08a"]], "Version 0.0.09a": [[2, "version-0-0-09a"]], "Version 0.0.15a": [[2, "version-0-0-15a"]], "Version 0.0.16a": [[2, "version-0-0-16a"]], "Welcome to Model Tuner\u2019s Documentation!": [[3, null]], "What Does Model Tuner Offer?": [[3, "what-does-model-tuner-offer"]], "Zero Variance Columns": [[1, null]], "iPython Notebooks": [[6, null]]}, "docnames": ["about", "caveats", "changelog", "getting_started", "index", "references", "usage_guide"], "envversion": {"sphinx": 64, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1}, "filenames": ["about.rst", "caveats.rst", "changelog.rst", "getting_started.rst", "index.rst", "references.rst", "usage_guide.rst"], "indexentries": {"built-in function": [[6, "check_input_type", false], [6, "evaluate_bootstrap_metrics", false], [6, "return_bootstrap_metrics", false], [6, "sampling_method", false]], "check_input_type()": [[6, "check_input_type", false]], "evaluate_bootstrap_metrics()": [[6, "evaluate_bootstrap_metrics", false]], "model (built-in class)": [[6, "Model", false]], "return_bootstrap_metrics()": [[6, "return_bootstrap_metrics", false]], "sampling_method()": [[6, "sampling_method", false]]}, "objects": {"": [[6, 0, 1, "", "Model"], [6, 1, 1, "", "check_input_type"], [6, 1, 1, "", "evaluate_bootstrap_metrics"], [6, 1, 1, "", "return_bootstrap_metrics"], [6, 1, 1, "", "sampling_method"]]}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "function", "Python function"]}, "objtypes": {"0": "py:class", "1": "py:function"}, "terms": {"": [1, 2, 4, 6], "0": [0, 1, 3, 4, 6], "00": 6, "000": 6, "0001": 6, "01": 6, "010a": 4, "011a": 4, "012a": 4, "013a": 4, "014a": 4, "017a": 3, "01it": [], "02a": 4, "05": 6, "05a": 4, "05it": 6, "06a": 4, "07a": 4, "08a": 4, "09a": 4, "1": [2, 3, 4], "10": [0, 3, 4, 5], "100": 6, "1000": 6, "104": 6, "105": [], "11": 3, "11a": 2, "12": 3, "12727322": 0, "14": 3, "15a": 4, "16628708993634742": 6, "16665653153377272": [], "16713189436073958": 6, "16737745981389285": [], "16a": [0, 4], "16gwnrajvpujties5y1gfrdx1soasdv6m": [], "172": [], "174": 6, "175": 5, "177": 6, "178": [], "180": 6, "19": [3, 6], "1998": 5, "1d": 1, "1e": 6, "2": [3, 4], "20": 6, "200": 6, "2024": 0, "20820269480995568": 6, "20835571676988004": 6, "2096258193295803": [], "20989021789332263": [], "21": [3, 6], "22": 6, "222": 6, "23": 3, "24": 3, "241": [], "24205514192581173": [], "24432": 5, "245": 6, "246": 6, "248": [], "26": 3, "26315186452865597": 6, "2633964855111536": [], "2672762813568116": 6, "28411432705731066": 6, "3": [3, 4], "30": 6, "300": 6, "304": [], "3066172248224347": 6, "30it": [], "315": 6, "323": [], "324": 6, "34": 6, "35": 6, "35007797000749163": [], "358": 6, "36": 6, "3743548199982513": 6, "37it": 6, "3830825326824073": 6, "3nya_tqwy6hr": [], "4": [1, 3, 4], "41": [], "42": 6, "428": 6, "45": 6, "47": [], "5": [1, 3, 4], "500": [2, 6], "50it": 6, "5132216728774747": [], "5281": 0, "533023758436067": 6, "540": 6, "5459770114942529": 6, "5469613259668509": [], "5491329479768786": 6, "55": 6, "5502958579881657": [], "5537302816556403": 6, "58": 1, "6": [3, 4], "60it": [], "66": 3, "67": 6, "68": 6, "69": 6, "7": [3, 4], "70": 6, "71": 6, "74": [], "7461300309597523": [], "75": 6, "7561728395061729": 6, "7592592592592593": 6, "76": 6, "7647433075624044": 6, "7647451659057567": 6, "765": 6, "7651490279157868": 6, "7654320987654321": [], "7692307692307693": 6, "77": 6, "770853": 6, "777898": 6, "7778980": [], "78": 6, "781523": 6, "788341": 6, "7888925135381788": 6, "7888942913974833": 6, "79": 6, "792193": 6, "7979050719771986": [], "7979060590722392": [], "7979488661159093": [], "798": [], "798785": 6, "7992275185850191": 6, "8": [3, 4], "80": 6, "8023014087345259": 6, "8025676192819657": [], "81": 6, "8169018952192892": [], "81it": [], "82": 6, "83": 6, "84": 6, "85": 6, "8520172219085262": [], "86": 6, "8636363636363636": 6, "8695652173913043": [], "87": 6, "88": 6, "89": 6, "890": 6, "8942307692307693": [], "9": [1, 4], "90": 6, "900": 6, "9047619047619048": [], "91": 6, "9134615384615384": 6, "92": 6, "926": [], "9260891500474834": [], "928": 6, "9280033238366572": 6, "93": 6, "9334649122807017": 6, "9343063541205956": [], "934576804368471": 6, "9378696741854636": 6, "94": 6, "9428571428571428": [], "95": 6, "96": 6, "9666666666666667": 6, "97": 6, "9722222222222222": [], "98": 6, "9833333333333333": 6, "9888888888888889": [], "99": 6, "9904166666666667": [], "9945833333333333": 6, "9951388888888888": [], "9955555555555555": 6, "999": [1, 6], "9990277777777777": 6, "9994444444444446": [], "A": [4, 6], "AND": 6, "By": 1, "For": [1, 3, 6], "If": [1, 6], "In": [1, 2, 6], "It": [1, 3, 6], "Its": 4, "No": 6, "Not": 6, "On": 1, "One": [1, 6], "The": [1, 3, 4], "There": 2, "These": [1, 4], "To": [1, 6], "With": 1, "_": 1, "__colsample_bytre": 6, "__early_stopping_round": 6, "__eval_metr": 6, "__init__": 6, "__learning_r": 6, "__max_depth": 6, "__n_estim": 6, "__param_nam": 6, "__subsampl": 6, "__tree_method": 6, "__verbos": 6, "_confusion_matrix_print": 6, "_i": 1, "_j": 1, "_k": 1, "_regression_exampl": [], "abil": 6, "about": 1, "abov": 6, "abram": 5, "absolut": 6, "access": [0, 6], "accompani": 6, "accordingli": 4, "account": 1, "accur": 4, "accuraci": [4, 6], "achiev": [1, 2], "acknowledg": 4, "across": [1, 2, 3, 6], "activ": 6, "actual": [1, 6], "ad": [1, 2, 6], "adasyn": [2, 3, 6], "add": 6, "addit": [1, 6], "addition": [1, 6], "address": [1, 4], "adequ": 1, "adjust": 1, "advanc": 6, "aforement": 1, "after": [1, 4], "aid": [4, 5], "aids_clinical_": 6, "aids_clinical_trials_group_study_175": 6, "aim": 6, "alex": 0, "algorithm": [1, 6], "align": 1, "all": [1, 2, 3, 6], "alloc": 6, "allow": [2, 3, 6], "along": 1, "also": [1, 6], "altern": 1, "alwai": 2, "amplifi": 1, "an": 4, "analysi": 1, "angel": 6, "ani": 1, "anoth": [1, 6], "anova": 1, "apach": 2, "appli": [1, 3, 6], "applic": [1, 6], "approach": 1, "appropri": 6, "approx": 1, "ar": [0, 1, 2, 3, 6], "arrai": [1, 6], "arthur": [0, 2], "artifici": 1, "ascii": 6, "assert": 2, "assess": [1, 3, 6], "assign": [2, 6], "assum": 1, "assumpt": 1, "attempt": 1, "attributeerror": 6, "auc": 6, "author": 0, "autokera": 2, "autokerasclassifi": 2, "automat": [1, 3], "avail": [1, 2, 6], "averag": 6, "average_precis": 6, "avg": 6, "avoid": [1, 2, 6], "ax": 6, "axi": 2, "b": 1, "back": 6, "balanc": [1, 2, 3, 6], "bar": [1, 6], "base": [1, 3, 6], "bayesian": 6, "bayessearchcv": 6, "becaus": [1, 2], "becom": 1, "been": [1, 2, 6], "befor": [2, 3, 4, 6], "begin": 1, "behavior": 1, "being": [2, 6], "below": [1, 3, 6], "best": 6, "best_param": 2, "best_params_per_scor": 6, "beta": 6, "better": [1, 6], "between": [1, 3, 6], "beyond": 6, "bia": [4, 6], "bias": [1, 6], "bin": [1, 6], "binari": 4, "block": 6, "bool": 6, "boost": [2, 6], "boost_earli": 6, "bootstrap": [3, 4], "bootstrapp": [2, 6], "both": [1, 2, 6], "breast": [], "brier": [4, 6], "bug": 2, "bui": 0, "build": 6, "c": 1, "c5g896": 5, "c_": 1, "calcul": [4, 6], "calibr": [2, 3, 4], "calibrate_report": 6, "calibratemodel": 6, "calibration_curv": 6, "calibration_method": 6, "california": 4, "california_h": [], "call": 6, "can": [0, 1, 3, 6], "cancer": [], "cannot": 1, "captur": [1, 6], "care": [1, 6], "carefulli": 1, "case": [1, 2], "catboost": [2, 3], "categor": 6, "categori": 6, "caus": 1, "cdot": 1, "center": 6, "challeng": [1, 6], "chang": [1, 2, 6], "changelog": 4, "char": 2, "check": [1, 4], "check_input_typ": [4, 6], "chunk": 2, "ci": 6, "cite": 4, "clariti": 6, "class": [2, 3, 4], "class_label": 6, "class_proport": 6, "class_report_test": 6, "class_report_v": 6, "classif": [1, 3, 4], "classifi": [1, 6], "classification_report": 6, "clc": 6, "clean": 2, "click": 0, "clinic": [0, 4, 5], "close": 1, "cluster": 1, "cm_test": 6, "cm_val": 6, "code": [1, 2, 6], "codebas": 0, "col": 6, "colab": 6, "color": 6, "column": [2, 4], "com": [], "combin": [1, 6], "come": 1, "command": 6, "comment": 2, "common": 1, "commonli": 6, "compar": 6, "compat": 3, "complet": [1, 2], "comprehens": 6, "comput": [1, 6], "concat": 2, "condit": 1, "conduct": 3, "conf_mat_class_kfold": 6, "conf_matrix": 6, "confid": 6, "configur": 4, "conflict": 1, "confus": 6, "connect": 1, "consid": [1, 2], "consist": 4, "constant": 1, "constraint": [1, 2], "construct": 1, "contain": [2, 6], "context": [1, 6], "continu": 6, "contrast": 1, "contribut": [0, 1], "contributor": 0, "convers": 1, "convert": [1, 6], "coordin": [], "correct": [1, 2], "correctli": 1, "count": [2, 6], "cpu": 6, "creat": [1, 4], "creation": [3, 4], "critic": [1, 6], "cross": [3, 4, 6], "crucial": [1, 6], "ctsi": 0, "current": [1, 3], "curs": 1, "curv": [4, 6], "custom": [2, 3, 6], "custom_scor": 6, "d": [1, 5], "d_1": 1, "d_2": 1, "d_j": 1, "d_k": 1, "data": [2, 3, 4], "dataconversionwarn": 1, "datafram": [1, 6], "dataset": [1, 3, 4], "decis": [1, 6], "decreas": 1, "default": [1, 6], "defin": [1, 4], "degrad": 1, "delta": 1, "demonstr": 6, "denot": 1, "depend": [2, 3, 4, 6], "deploi": 6, "deprec": 2, "depth": 6, "design": [1, 3, 6], "despit": 1, "detail": 6, "detect": 6, "determin": 1, "dev": 2, "develop": 3, "deviat": 1, "diagnosi": [1, 6], "dict": 6, "dictionari": 6, "differ": [1, 2, 3], "dimens": 1, "dimension": 1, "directli": 3, "discrep": 1, "diseas": 6, "displai": 6, "disrupt": 1, "distinct": 6, "distinguish": 6, "distort": 4, "distribut": [3, 4], "divid": 1, "divis": 1, "do": [2, 6], "document": 6, "doe": [1, 4], "doi": [0, 5], "domin": [1, 6], "dot": 1, "dr": 0, "draw": 6, "drawn": 1, "drive": [], "drop": [1, 4], "dtype": 6, "due": 1, "duplic": 6, "dure": [1, 2, 6], "e": [1, 6], "each": [1, 6], "earli": [2, 3, 6], "early_stop": 6, "easier": 6, "easili": 6, "effect": [3, 4, 6], "either": [2, 6], "el": 5, "elimin": 3, "empir": 1, "empti": [1, 6], "enabl": [3, 6], "encount": 1, "end": 1, "engin": 1, "enhanc": 2, "ensur": [1, 2, 3, 6], "entir": 1, "enumer": 6, "equal": [1, 6], "equat": 1, "equival": [], "error": [1, 2, 6], "especi": 6, "essenc": 6, "essenti": [1, 6], "estat": 6, "estim": [1, 2, 3, 6], "estimator_nam": 6, "estimator_name_xgb": [], "eta": [], "etc": 2, "eval_set": [], "evalu": [1, 3, 6], "evaluate_bootstrap_metr": [2, 4, 6], "even": 1, "event": 6, "examin": 6, "exampl": 4, "exceed": 2, "except": 6, "excess": 1, "execut": 6, "exist": [1, 6], "exp": 1, "expect": [1, 6], "explain": 6, "explained_vari": 6, "explan": 1, "explicit": 6, "explicitli": 6, "express": 1, "extend": 6, "extract": 2, "extrem": 1, "f": [1, 6], "f1": [1, 6], "f1_beta_tun": 6, "f1_weight": 6, "f_i": 1, "facilit": 3, "fail": 1, "failur": 1, "fair": 1, "fairli": 6, "fall": 1, "fals": [1, 6], "far": 1, "favor": [1, 2, 6], "feat_num": 1, "featur": [1, 3, 6], "feature_": 6, "feature_nam": 6, "feature_select": 6, "fetch": 6, "fetch_california_h": 6, "fetch_ucirepo": 6, "figsiz": 6, "figur": 6, "file": [2, 6], "filter": 2, "find": 1, "fine": [3, 6], "first": 1, "fit": [1, 2, 4], "fix": [2, 6], "flexibl": [3, 6], "flip_i": 6, "float": 6, "float64": [], "fn": 6, "focu": 1, "fold": [1, 3, 6], "follow": [1, 2, 3, 6], "form": 1, "format": 6, "formul": 1, "forthcom": 2, "found": 6, "fp": 6, "frac": 1, "fraction": 1, "fraud": 6, "fraudul": 6, "free": 1, "frequenc": [1, 6], "frequent": 6, "from": [2, 3, 4, 6], "full": 1, "fulli": 1, "function": [1, 2, 3, 4], "funnel": 0, "funnell_2024_12727322": 0, "g": [1, 6], "gb": [], "gender": 6, "gener": [1, 3, 4], "generaliz": 1, "geq": 1, "get": 6, "get_best_score_param": 6, "get_cross_valid": 6, "get_test_data": 6, "get_train_data": 6, "get_valid_data": 6, "github": 4, "given": 1, "goal": [4, 6], "googl": 6, "grid": 4, "grid_search_param_tun": 6, "gridsearchcv": [], "ground": 6, "group": [4, 5], "guidanc": 0, "ha": [1, 2, 6], "had": 1, "hand": 1, "handl": [1, 3, 6], "happen": 2, "harmon": 1, "hat": 1, "have": [2, 6], "haven": 6, "healthcar": 6, "heavili": 1, "help": [1, 6], "helper": 4, "here": [2, 3, 6], "hi": 0, "high": 1, "higher": 3, "highli": 1, "highlight": 1, "hist": 6, "histori": 2, "hold": 1, "homogen": 1, "hous": 4, "how": 6, "howev": 1, "html": 6, "http": [0, 5], "hybrid": 6, "hyperparamet": [2, 3, 4], "i": [1, 2, 3, 6], "id": 6, "ident": 1, "identifi": 1, "ifrom": [], "ij": 1, "illustr": 4, "imbal": [1, 4], "imbalanc": [2, 3, 4], "imbalance_sampl": 6, "imblearn": 6, "impact": 4, "implement": [2, 3, 6], "import": [1, 2, 4], "importerror": 6, "improp": 1, "improperli": 6, "improv": [3, 6], "imput": [2, 3, 4, 6], "inaccur": 1, "includ": [1, 3, 6], "incomplet": 1, "inconsist": 1, "incorrect": [1, 6], "increas": [1, 6], "index": 6, "indexerror": 6, "indic": [1, 6], "infinit": 1, "inflat": 1, "influenc": 1, "inform": [1, 6], "informat": 0, "inher": [1, 6], "init": 4, "initi": 4, "initialis": 2, "input": [1, 4], "insid": [2, 6], "instal": [4, 6], "instanc": [1, 4], "instead": [1, 2, 6], "institut": 0, "insuffici": 6, "int": 6, "int64": 6, "integr": [1, 3], "interpol": [1, 6], "interpret": 1, "interv": [1, 6], "introduc": [1, 2], "invalid": [1, 6], "invalu": 0, "involv": [1, 2], "ipython": 4, "isinst": 1, "isoton": [3, 4, 6], "issu": [1, 2, 6], "iter": 6, "its": [1, 6], "itself": 2, "j": 1, "job": 6, "joblib": 3, "jul": 0, "just": 1, "k": [1, 3, 6], "kei": [0, 1, 2, 3, 4], "keyerror": 6, "kf": 6, "kfold": [2, 6], "kfold_split": 6, "kind": 6, "known": 6, "label": [1, 3, 6], "larg": 1, "later": 1, "layer": 2, "lead": [1, 6], "learn": [2, 3, 4, 5], "legend": 6, "length": 2, "leon": 2, "leonid": 0, "leq": 1, "less": 1, "let": 1, "level": 6, "leverag": 6, "li": 1, "librari": [3, 4], "licens": 2, "like": [1, 3, 6], "likelihood": 1, "limit": [2, 4], "line": [1, 2], "linear": [1, 6], "linestyl": 6, "link": [0, 6], "list": [2, 6], "ll": 1, "lo": 6, "load": 4, "log": [2, 6], "logic": 2, "logist": [4, 6], "logloss": 6, "logo": 2, "loop": 2, "loss": [1, 6], "low": [2, 6], "lower": [1, 6], "machin": [1, 3, 4, 5], "macro": 6, "mai": 1, "maintain": 1, "major": [1, 6], "make": [1, 6], "make_classif": 6, "make_classification_": 6, "mani": 1, "marker": 6, "match": 1, "math": [], "mathbf": 1, "mathemat": 4, "mathf": [], "matplotlib": 6, "matric": 6, "matrix": 6, "max": 1, "maximum": [1, 6], "mean": [1, 6], "mean95": [], "meaning": [1, 6], "measur": 1, "median": [1, 6], "medic": [0, 1], "meet": 3, "mere": 6, "messag": 6, "method": [1, 2, 3, 4], "metric": [1, 2, 3, 4], "mid": 1, "middl": [], "midwai": 1, "might": 1, "mii": 0, "min": 1, "min_": 1, "minimum": 1, "minmax": 3, "minor": [1, 4], "misclassif": 1, "misinterpret": 1, "mislabel": 1, "mislead": 1, "mismatch": [2, 6], "miss": [1, 6], "mitig": [4, 6], "mlflow": 2, "model": 2, "model_definit": 6, "model_tun": [3, 6], "model_tuner_util": 6, "model_typ": 6, "model_xgb": 6, "modifi": 2, "modul": 6, "monoton": 1, "month": 0, "more": [1, 6], "move": 2, "msb": 1, "msw": 1, "mu": 1, "much": 1, "multi": [3, 6], "multi_label": 6, "multipl": 2, "must": [1, 6], "n": 1, "n_bin": 6, "n_clusters_per_class": 6, "n_estim": [], "n_featur": 6, "n_inform": 6, "n_iter": 6, "n_j": 1, "n_job": 6, "n_redund": 6, "n_sampl": [1, 6], "n_split": 6, "name": [2, 6], "nan": [1, 6], "natur": 6, "nearest": [1, 6], "necessari": [2, 4], "need": [1, 4], "neg": [1, 6], "neighbor": [1, 6], "new": 6, "nois": [1, 6], "noisi": 1, "non": [1, 2], "none": 6, "normal": 6, "note": 1, "notebook": [2, 4], "notic": 6, "now": [1, 2], "np": [2, 6], "num_resampl": 6, "number": [1, 2, 6], "numer": 6, "numpi": [3, 6], "o": 6, "object": [2, 4], "observ": [1, 6], "occur": [2, 6], "off": 1, "offer": 4, "often": [1, 6], "older": 2, "onc": 6, "one": [1, 6], "ones": 1, "onli": [1, 2], "onto": 2, "oper": 1, "optim": [1, 3, 6], "optimal_threshold": 6, "option": 4, "order": [1, 2], "org": [0, 5], "origin": [0, 1], "other": [1, 2, 3, 6], "our": [2, 6], "out": [1, 2], "outcom": [1, 6], "output": [1, 6], "outsid": 2, "outweigh": 6, "over": 1, "overal": 1, "overfit": [1, 3, 6], "overlap": 1, "overlook": 1, "oversampl": [1, 3, 4], "p": 1, "p_1": 1, "p_2": 1, "p_i": 1, "p_n": 1, "packag": 6, "panayioti": 0, "panda": [3, 6], "parallel": 6, "param": 6, "paramet": [2, 3, 4], "parametr": 1, "part": 6, "particularli": [1, 3, 6], "pass": [1, 6], "pattern": 6, "pd": [1, 2, 6], "penal": 1, "per": [2, 6], "perfectli": [1, 6], "perform": [1, 3, 4], "petousi": 0, "pickl": 2, "piecewis": 1, "pip": [3, 6], "pip25": 2, "pipelin": [1, 2, 3, 6], "pipeline_step": [1, 2, 6], "placehold": 1, "platt": 4, "pleas": [1, 6], "plot": 6, "plt": 6, "pmatrix": 1, "po": 6, "point": [1, 6], "poor": 6, "poorli": 6, "posit": [1, 6], "possibl": [1, 6], "power": [1, 3], "ppv": 6, "practic": [1, 6], "practition": 1, "pre": 6, "precis": [1, 6], "predict": [4, 6], "predict_proba": 6, "prefix": 6, "preprocess": [1, 6], "preprocess_imputer_imput": 6, "preprocess_scaler_standardscalar": 6, "preprocessor": 1, "prerequisit": 4, "present": 1, "preserv": 1, "pretti": 2, "prevent": [3, 4], "previou": 2, "previous": 1, "primari": 1, "print": [2, 6], "print_pipelin": 6, "print_result": 6, "print_selected_best_featur": 6, "prior": 1, "priorit": 1, "prob_pred_calibr": 6, "prob_pred_uncalibr": 6, "prob_true_calibr": 6, "prob_true_uncalibr": 6, "probabilist": 1, "probabl": [1, 3, 6], "problem": [1, 6], "proceed": 1, "process": [1, 2, 6], "process_imbalance_sampl": 6, "produc": [1, 6], "properli": 6, "properti": 1, "proport": [1, 6], "provid": [1, 3, 6], "publish": 0, "purpos": 4, "py": [2, 6], "pypi": [2, 3], "pyplot": 6, "pyproject": 2, "python": 3, "quad": 1, "quickli": 6, "r": 6, "r2": 6, "race": 6, "rais": [1, 6], "rand_grid": 6, "random": [1, 6], "random_st": 6, "randomized_grid": 6, "randomli": 6, "randomoversampl": 6, "randomundersampl": 6, "rang": [1, 6], "rare": 6, "rate": 1, "rather": 1, "ratio": [1, 6], "raw": 1, "re": 2, "readili": 6, "readm": 2, "real": 6, "recal": [1, 6], "recommend": 1, "recurs": 3, "redfin": 6, "redfin_model_xgb": [], "redistribut": 6, "reduc": [1, 6], "redund": [], "ref": 2, "refactor": 2, "refer": [1, 4, 6], "reflect": 1, "regard": 2, "region": 1, "regress": 4, "regression_exampl": [], "regression_example_": [], "regression_report": 6, "regression_report_kfold": 6, "regular": 6, "relat": 2, "relationship": 1, "releas": 2, "reli": 1, "reliabl": 6, "remov": [1, 2, 6], "renam": 2, "repeatedli": 1, "replac": 1, "report": [2, 4], "report_model_metr": 6, "repositori": [4, 5, 6], "repres": [1, 2], "represent": 6, "reproduc": 6, "requir": [1, 2, 3, 6], "resampl": [2, 4], "research": 6, "reset": [2, 6], "reset_estim": 6, "resolut": 2, "resourc": 6, "respect": 6, "result": 1, "retriev": 4, "return": 4, "return_bootstrap_metr": [4, 6], "return_metr": 6, "rfe": 3, "rightarrow": 1, "risk": [1, 6], "rmse": 6, "robust": [3, 6], "roc": 6, "roc_auc": 6, "root": 6, "rot": 6, "rout": 6, "routin": 1, "run": 6, "runtim": 1, "runtimeerror": 6, "runtimewarn": 1, "sadr": 5, "same": [1, 2], "sampl": [2, 4, 6], "sampler": 6, "sampling_method": [4, 6], "save": 2, "scale": [2, 3, 4], "scenario": 6, "scienc": 0, "scikit": 3, "scipi": 3, "score": [4, 6], "scrollto": [], "seamlessli": 6, "search": 4, "section": 6, "see": 6, "seed": 6, "segment": [1, 2], "select": [3, 6], "selectkbest": [2, 3], "self": [2, 6], "sensit": [1, 6], "separ": [1, 6], "sequenc": 1, "seri": [1, 6], "set": [1, 6], "setup": 2, "sever": [1, 6], "shape": [4, 6], "should": [1, 2, 6], "show": 6, "shown": 6, "shpaner": 0, "sigma": 1, "sigmoid": [3, 6], "significantli": [1, 6], "sim": 1, "similar": [1, 6], "simpl": 6, "simpleimput": [1, 3, 6], "simpli": 6, "simplifi": 2, "simultan": 2, "sinc": 1, "singl": [1, 6], "size": 6, "skew": 1, "sklearn": 6, "smote": [2, 3, 4], "smoteenn": 1, "smotetomek": 1, "so": [1, 6], "softwar": [0, 2], "solut": 4, "some": 1, "space": 1, "spam": 6, "special": 0, "specif": [1, 2, 6], "specifi": [1, 2, 6], "split": [1, 2, 3, 4], "sqrt": 1, "squar": [1, 6], "squeez": [1, 6], "stage": 6, "standard": [1, 6], "standardscal": [1, 6], "standardscalar": 6, "state": 1, "statist": 1, "step": [2, 4], "stop": [2, 3, 6], "store": 2, "str": 6, "strat_key_val_test": 2, "strategi": [3, 6], "stratif": [2, 4, 6], "stratifi": [1, 2, 3, 6], "stratify_col": [1, 2, 6], "stratify_i": [1, 2, 6], "stratify_kei": 2, "string": 2, "structur": 1, "struggl": 6, "studi": [4, 5], "subsampl": 6, "subsequ": 1, "subset": 1, "suit": 6, "sum": 6, "sum_": 1, "summari": 4, "supervis": 6, "support": [0, 2, 3, 6], "synthet": 4, "system": 3, "t": 6, "take": [1, 6], "taken": 2, "target": [2, 3, 4, 6], "task": [3, 6], "tau": 1, "techniqu": [3, 4], "temporarili": 2, "tend": 6, "test": [2, 6], "test_model": 6, "test_siz": 6, "text": [1, 6], "th": 1, "than": 1, "thank": 0, "thei": [1, 6], "them": [1, 6], "therefor": [1, 6], "thi": [0, 1, 2, 3, 6], "thoroughli": 6, "threshold": [2, 3, 4, 6], "through": 6, "thu": 6, "time": [1, 2], "titan": 6, "titl": [0, 6], "tn": 6, "toml": 2, "too": 1, "tool": 3, "top": [1, 6], "total": [], "toward": 6, "tp": 6, "tqdm": 3, "track": 6, "trade": 1, "tradit": 1, "train": [3, 4, 6], "train_siz": 6, "train_val_test": 2, "train_val_test_split": [2, 6], "transact": 6, "transform": [4, 6], "translat": 0, "treat": [1, 6], "tree": 6, "trial": [4, 5], "trigger": 1, "true": [1, 6], "trust": 1, "truth": 6, "tune": [1, 2, 3, 4], "tune_threshold_fbeta": [2, 6], "tuned_paramet": 6, "tuned_parameters_xgb": 6, "tuner": 6, "two": [1, 6], "txt": 2, "type": 6, "typeerror": 6, "typic": 6, "u": 1, "uci": [5, 6], "ucimlrepo": 6, "ucla": 0, "uncalibr": 6, "undefin": 1, "under": 3, "underli": 1, "underrepres": 6, "undersampl": [1, 6], "understand": [1, 6], "unequ": 6, "unexpect": 6, "uniform": 1, "uniqu": 6, "unlik": 1, "unnecessari": [1, 2, 6], "unpredict": 1, "unrealist": 1, "unreli": 1, "unseen": 1, "unsupport": 6, "unus": 2, "up": 2, "updat": 2, "upper": 6, "url": 0, "us": [1, 2, 3, 4], "usag": 2, "user": 6, "userwarn": 1, "util": [2, 6], "va": 6, "valid": [3, 4, 6], "validation_data": 6, "validation_s": 6, "valu": [1, 4], "value_count": 6, "valueerror": 6, "var": [1, 6], "variabl": [2, 3, 4, 6], "varianc": 4, "varieti": 6, "variou": [3, 6], "vdot": 1, "vector": 1, "verbos": 2, "versatil": 3, "version": [0, 3, 4], "visual": 6, "w": [1, 5], "wa": [0, 1, 2], "wai": 1, "warn": 1, "we": [1, 6], "weight": [1, 6], "welcom": 4, "well": [1, 6], "were": 2, "what": 4, "when": [1, 2, 3, 6], "where": [1, 2, 6], "whether": 6, "which": [1, 3, 6], "while": [1, 6], "wide": [1, 6], "width": 6, "wish": 6, "within": [1, 6], "without": [1, 6], "work": [0, 1, 2], "workflow": [3, 6], "world": 6, "would": 1, "wrong": 2, "x": [1, 2, 4], "x_": 1, "x_i": 1, "x_j": 1, "x_synthet": [], "x_test": 6, "x_train": 6, "x_valid": 6, "x_valid_test": 2, "xgb": 6, "xgb_": 6, "xgb__colsample_bytre": 6, "xgb__early_stopping_round": 6, "xgb__eval_metr": 6, "xgb__learning_r": 6, "xgb__max_depth": 6, "xgb__n_estim": 6, "xgb__subsampl": 6, "xgb__tree_method": 6, "xgb__verbos": [], "xgb_colsample_bytre": [], "xgb_definit": 6, "xgb_early_bootstrap_test": 2, "xgb_early_stopping_round": [], "xgb_eval_metr": [], "xgb_learning_r": [], "xgb_max_depth": [], "xgb_model": 6, "xgb_n_estim": [], "xgb_name": 6, "xgb_paramet": [], "xgb_smote": 6, "xgb_subsampl": [], "xgb_verbos": [], "xgbclassifi": 4, "xgbearli": 6, "xgboost": [2, 3, 4], "xgboost_earli": [], "xgbregressor": 4, "xlabel": 6, "y": [1, 2, 4], "y_1": 1, "y_2": 1, "y_i": 1, "y_n": 1, "y_pred": 6, "y_pred_prob": 6, "y_prob_calibr": 6, "y_prob_uncalibr": 6, "y_test": 6, "y_test_pr": 6, "y_train": 6, "y_true": 6, "y_valid": 6, "y_valid_proba": 6, "y_valid_test": 2, "year": 0, "yellow": 6, "yet": 6, "ylabel": 6, "you": [0, 1, 3, 6], "your": [1, 3, 6], "z": 1, "z_": 1, "zenodo": [0, 2], "zero": 4, "zero_variance_column": [1, 6]}, "titles": ["GitHub Repository", "Zero Variance Columns", "Changelog", "Welcome to Model Tuner\u2019s Documentation!", "Model Tuner Documentation", "References", "iPython Notebooks"], "titleterms": {"": 3, "0": 2, "010a": 2, "011a": 2, "012a": 2, "013a": 2, "014a": 2, "02a": 2, "05a": 2, "06a": 2, "07a": 2, "08a": 2, "09a": 2, "1": [1, 6], "10": 6, "15a": 2, "16a": 2, "2": [1, 6], "3": [1, 6], "4": 6, "5": 6, "6": 6, "7": 6, "8": 6, "9": 6, "A": 1, "Its": 1, "The": 6, "These": 6, "about": 4, "accordingli": 6, "accur": 1, "accuraci": 1, "acknowledg": 0, "address": 6, "after": 6, "aid": 6, "an": 6, "applic": [], "befor": 1, "bia": 1, "binari": 6, "bootstrap": 6, "bootstrapp": [], "brier": 1, "calcul": 1, "calibr": [1, 6], "california": 6, "caveat": [1, 4], "changelog": 2, "check": 6, "cite": 0, "class": [1, 6], "classif": 6, "clinic": 6, "column": [1, 6], "configur": 6, "consist": 1, "creat": 6, "creation": 1, "cross": 1, "curv": 1, "data": [1, 6], "dataset": 6, "defin": 6, "depend": 1, "distort": 1, "distribut": [1, 6], "document": [3, 4], "doe": 3, "drop": 6, "effect": 1, "exampl": [1, 6], "fit": 6, "from": 1, "function": 6, "gener": 6, "get": 4, "github": 0, "goal": 1, "grid": 6, "group": 6, "guid": 4, "helper": 6, "hous": 6, "hyperparamet": 6, "illustr": 1, "imbal": 6, "imbalanc": [1, 6], "impact": 1, "import": 6, "imput": 1, "init": 6, "initi": 6, "input": 6, "instal": 3, "instanc": 6, "ipython": 6, "isoton": 1, "kei": 6, "learn": [1, 6], "librari": 6, "limit": 1, "load": 6, "logist": 1, "machin": 6, "mathemat": 1, "method": 6, "metric": 6, "minor": 6, "mitig": 1, "model": [0, 1, 3, 4, 6], "necessari": 6, "need": 6, "notebook": 6, "object": 6, "offer": 3, "option": 6, "oversampl": 6, "paramet": [1, 6], "perform": 6, "platt": 1, "predict": 1, "prerequisit": 3, "prevent": 1, "purpos": 6, "refer": 5, "regress": [1, 6], "report": 6, "repositori": 0, "resampl": [1, 6], "retriev": 6, "return": 6, "sampl": 1, "scale": 1, "score": 1, "search": 6, "shape": 1, "smote": [1, 6], "solut": 1, "split": 6, "start": 4, "step": 6, "stratif": 1, "studi": 6, "summari": 1, "synthet": [1, 6], "target": 1, "techniqu": [1, 6], "threshold": 1, "train": 1, "transform": 1, "trial": 6, "tune": 6, "tuner": [0, 3, 4], "us": 6, "usag": 4, "valid": 1, "valu": 6, "variabl": 1, "varianc": [1, 6], "version": 2, "welcom": 3, "what": 3, "x": 6, "xgbclassifi": 6, "xgboost": 6, "xgbregressor": 6, "y": 6, "zero": [1, 6]}})
+>>>>>>> c6b188187e841f10a1f9bbd52c95e5fc1cbd90e5
diff --git a/docs/usage_guide.html b/docs/usage_guide.html
index 838c8ca..56c4472 100644
--- a/docs/usage_guide.html
+++ b/docs/usage_guide.html
@@ -609,10 +609,18 @@ Step 9: Return Metrics (Optional)
Validation Metrics
@@ -680,22 +688,22 @@ Step 10: Calibrate the Model (if needed)
Change back to CPU
@@ -721,36 +729,36 @@ Step 10: Calibrate the Model (if needed)
-
# Get the predicted probabilities for the validation data from calibrated model
+## Get the predicted probabilities for the validation data from calibrated model
y_prob_calibrated = model_xgb.predict_proba(X_test)[:, 1]
-# Compute the calibration curve for the calibrated model
+## Compute the calibration curve for the calibrated model
prob_true_calibrated, prob_pred_calibrated = calibration_curve(
-y_test,
-y_prob_calibrated,
-n_bins=6,
+ y_test,
+ y_prob_calibrated,
+ n_bins=10,
)
-# Plot the calibration curves
+## Plot the calibration curves
plt.figure(figsize=(5, 5))
plt.plot(
-prob_pred_uncalibrated,
-prob_true_uncalibrated,
-marker="o",
-label="Uncalibrated XGBoost",
+ prob_pred_uncalibrated,
+ prob_true_uncalibrated,
+ marker="o",
+ label="Uncalibrated XGBoost",
)
plt.plot(
-prob_pred_calibrated,
-prob_true_calibrated,
-marker="o",
-label="Calibrated XGBoost",
+ prob_pred_calibrated,
+ prob_true_calibrated,
+ marker="o",
+ label="Calibrated XGBoost",
)
plt.plot(
-[0, 1],
-[0, 1],
-linestyle="--",
-label="Perfectly calibrated",
+ [0, 1],
+ [0, 1],
+ linestyle="--",
+ label="Perfectly calibrated",
)
plt.xlabel("Predicted probability")
plt.ylabel("True probability in each bin")
@@ -810,7 +818,11 @@ Generating an Imbalanced Dataset