diff --git a/docs/.doctrees/about.doctree b/docs/.doctrees/about.doctree deleted file mode 100644 index 447df61..0000000 Binary files a/docs/.doctrees/about.doctree and /dev/null differ diff --git a/docs/.doctrees/caveats.doctree b/docs/.doctrees/caveats.doctree deleted file mode 100644 index 47b1a82..0000000 Binary files a/docs/.doctrees/caveats.doctree and /dev/null differ diff --git a/docs/.doctrees/changelog.doctree b/docs/.doctrees/changelog.doctree deleted file mode 100644 index 912f5a9..0000000 Binary files a/docs/.doctrees/changelog.doctree and /dev/null differ diff --git a/docs/.doctrees/environment.pickle b/docs/.doctrees/environment.pickle deleted file mode 100644 index 46716ba..0000000 Binary files a/docs/.doctrees/environment.pickle and /dev/null differ diff --git a/docs/.doctrees/getting_started.doctree b/docs/.doctrees/getting_started.doctree deleted file mode 100644 index 4fd2ec2..0000000 Binary files a/docs/.doctrees/getting_started.doctree and /dev/null differ diff --git a/docs/.doctrees/index.doctree b/docs/.doctrees/index.doctree deleted file mode 100644 index 3f0c7fe..0000000 Binary files a/docs/.doctrees/index.doctree and /dev/null differ diff --git a/docs/.doctrees/main.doctree b/docs/.doctrees/main.doctree deleted file mode 100644 index ca8d9b2..0000000 Binary files a/docs/.doctrees/main.doctree and /dev/null differ diff --git a/docs/.doctrees/references copy.doctree b/docs/.doctrees/references copy.doctree deleted file mode 100644 index 9e44515..0000000 Binary files a/docs/.doctrees/references copy.doctree and /dev/null differ diff --git a/docs/.doctrees/references.doctree b/docs/.doctrees/references.doctree deleted file mode 100644 index 1aefa6b..0000000 Binary files a/docs/.doctrees/references.doctree and /dev/null differ diff --git a/docs/.doctrees/usage_guide.doctree b/docs/.doctrees/usage_guide.doctree deleted file mode 100644 index 60df0fd..0000000 Binary files a/docs/.doctrees/usage_guide.doctree and /dev/null differ diff --git a/docs/_sources/usage_guide.rst.txt b/docs/_sources/usage_guide.rst.txt index 274de22..544710d 100644 --- a/docs/_sources/usage_guide.rst.txt +++ b/docs/_sources/usage_guide.rst.txt @@ -531,10 +531,18 @@ You can use this function to evaluate the model by printing the output. # ------------------------- VALID AND TEST METRICS ----------------------------- print("Validation Metrics") - class_report_val, cm_val = model_xgb.return_metrics(X_valid, y_valid, optimal_threshold=True) + class_report_val, cm_val = model_xgb.return_metrics( + X_valid, + y_valid, + optimal_threshold=True, + ) print() print("Test Metrics") - class_report_test, cm_test = model_xgb.return_metrics(X_test, y_test, optimal_threshold=True) + class_report_test, cm_test = model_xgb.return_metrics( + X_test, + y_test, + optimal_threshold=True, + ) .. code-block:: bash @@ -604,22 +612,22 @@ Step 10: Calibrate the Model (if needed) import matplotlib.pyplot as plt from sklearn.calibration import calibration_curve - # Get the predicted probabilities for the validation data from the uncalibrated model + ## Get the predicted probabilities for the validation data from uncalibrated model y_prob_uncalibrated = model_xgb.predict_proba(X_test)[:, 1] - # Compute the calibration curve for the uncalibrated model + ## Compute the calibration curve for the uncalibrated model prob_true_uncalibrated, prob_pred_uncalibrated = calibration_curve( y_test, y_prob_uncalibrated, - n_bins=6, + n_bins=10, ) - # Calibrate the model + ## Calibrate the model if model_xgb.calibrate: - model_xgb.calibrateModel(X, y, score="roc_auc") + model_xgb.calibrateModel(X, y, score="roc_auc") - # Predict on the validation set - y_test_pred = model_xgb.predict_proba(X_test)[:,1] + ## Predict on the validation set + y_test_pred = model_xgb.predict_proba(X_test)[:, 1] .. code-block:: bash @@ -651,36 +659,36 @@ Step 10: Calibrate the Model (if needed) .. code-block:: python - # Get the predicted probabilities for the validation data from calibrated model + ## Get the predicted probabilities for the validation data from calibrated model y_prob_calibrated = model_xgb.predict_proba(X_test)[:, 1] - # Compute the calibration curve for the calibrated model + ## Compute the calibration curve for the calibrated model prob_true_calibrated, prob_pred_calibrated = calibration_curve( - y_test, - y_prob_calibrated, - n_bins=6, + y_test, + y_prob_calibrated, + n_bins=10, ) - # Plot the calibration curves + ## Plot the calibration curves plt.figure(figsize=(5, 5)) plt.plot( - prob_pred_uncalibrated, - prob_true_uncalibrated, - marker="o", - label="Uncalibrated XGBoost", + prob_pred_uncalibrated, + prob_true_uncalibrated, + marker="o", + label="Uncalibrated XGBoost", ) plt.plot( - prob_pred_calibrated, - prob_true_calibrated, - marker="o", - label="Calibrated XGBoost", + prob_pred_calibrated, + prob_true_calibrated, + marker="o", + label="Calibrated XGBoost", ) plt.plot( - [0, 1], - [0, 1], - linestyle="--", - label="Perfectly calibrated", + [0, 1], + [0, 1], + linestyle="--", + label="Perfectly calibrated", ) plt.xlabel("Predicted probability") plt.ylabel("True probability in each bin") @@ -688,7 +696,6 @@ Step 10: Calibrate the Model (if needed) plt.legend() plt.show() - .. raw:: html
@@ -762,6 +769,10 @@ parameters are specified: .. code-block:: python + import pandas as pd + import numpy as np + from sklearn.datasets import make_classification + X, y = make_classification( n_samples=1000, n_features=20, @@ -786,6 +797,8 @@ Below, you will see that the dataset we have generated is severely imbalanced wi .. code-block:: python + import matplotlib.pyplot as plt + ## Create a bar plot value_counts = pd.Series(y).value_counts() ax = value_counts.plot( @@ -838,6 +851,8 @@ Below, we will use an XGBoost classifier with the following hyperparameters: .. code-block:: python + from xgboost import XGBClassifier + xgb_name = "xgb" xgb = XGBClassifier( random_state=222, @@ -937,14 +952,13 @@ Initalize and Configure The Model .. code-block:: python + from model_tuner import Model + xgb_smote = Model( name=f"Make_Classification_{model_type}", estimator_name=estimator_name, calibrate=calibrate, - pipeline_steps=[ - ("Imputer", SimpleImputer()), - ("StandardScalar", StandardScaler()), - ], + model_type="classification", estimator=clc, kfold=kfold, stratify_y=True, @@ -977,44 +991,32 @@ Perform Grid Search Parameter Tuning and Retrieve Split Data .. code-block:: bash Pipeline Steps: - ======================== - ┌────────────────────────────────────────────┐ - │ Step 1: preprocess_imputer_Imputer │ - │ SimpleImputer │ - └────────────────────────────────────────────┘ - │ - ▼ - ┌────────────────────────────────────────────┐ - │ Step 2: preprocess_scaler_StandardScalar │ - │ StandardScaler │ - └────────────────────────────────────────────┘ - │ - ▼ - ┌────────────────────────────────────────────┐ - │ Step 3: resampler │ - │ SMOTE │ - └────────────────────────────────────────────┘ - │ - ▼ - ┌────────────────────────────────────────────┐ - │ Step 4: xgb │ - │ XGBClassifier │ - └────────────────────────────────────────────┘ + + ┌─────────────────────┐ + │ Step 1: resampler │ + │ SMOTE │ + └─────────────────────┘ + │ + ▼ + ┌─────────────────────┐ + │ Step 2: xgb │ + │ XGBClassifier │ + └─────────────────────┘ Distribution of y values after resampling: target 0 540 1 540 Name: count, dtype: int64 - 100%|██████████| 5/5 [00:47<00:00, 9.41s/it] + 100%|██████████| 5/5 [00:34<00:00, 6.87s/it] Fitting model with best params and tuning for best threshold ... - 100%|██████████| 2/2 [00:00<00:00, 4.01it/s]Best score/param set found on validation set: + 100%|██████████| 2/2 [00:00<00:00, 4.37it/s]Best score/param set found on validation set: {'params': {'xgb__early_stopping_rounds': 100, 'xgb__eval_metric': 'logloss', 'xgb__learning_rate': 0.0001, - 'xgb__max_depth': 3, + 'xgb__max_depth': 10, 'xgb__n_estimators': 999}, - 'score': 0.9994444444444446} + 'score': 0.9990277777777777} Best roc_auc: 0.999 SMOTE: Distribution of y values after resampling @@ -1037,52 +1039,34 @@ Fit The Model Return Metrics (Optional) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. code-block:: python - - # ------------------------- VALID AND TEST METRICS ----------------------------- - - print("Validation Metrics") - class_report_val, cm_val = xgb_smote.return_metrics( - X_valid, - y_valid, - optimal_threshold=True, - ) - print() - print("Test Metrics") - class_report_test, cm_test = xgb_smote.return_metrics( - X_test, - y_test, - optimal_threshold=True, - ) - .. code-block:: bash Validation Metrics Confusion matrix on set provided: -------------------------------------------------------------------------------- Predicted: - Pos Neg + Pos Neg -------------------------------------------------------------------------------- Actual: Pos 20 (tp) 0 (fn) - Neg 3 (fp) 177 (tn) + Neg 6 (fp) 174 (tn) -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- - {'AUC ROC': 0.9904166666666667, - 'Average Precision': 0.8520172219085262, - 'Brier Score': 0.2096258193295803, - 'Precision/PPV': 0.8695652173913043, + {'AUC ROC': 0.9955555555555555, + 'Average Precision': 0.9378696741854636, + 'Brier Score': 0.20835571676988004, + 'Precision/PPV': 0.7692307692307693, 'Sensitivity': 1.0, - 'Specificity': 0.9833333333333333} + 'Specificity': 0.9666666666666667} -------------------------------------------------------------------------------- precision recall f1-score support - 0 1.00 0.98 0.99 180 - 1 0.87 1.00 0.93 20 + 0 1.00 0.97 0.98 180 + 1 0.77 1.00 0.87 20 - accuracy 0.98 200 - macro avg 0.93 0.99 0.96 200 - weighted avg 0.99 0.98 0.99 200 + accuracy 0.97 200 + macro avg 0.88 0.98 0.93 200 + weighted avg 0.98 0.97 0.97 200 -------------------------------------------------------------------------------- @@ -1090,31 +1074,30 @@ Return Metrics (Optional) Confusion matrix on set provided: -------------------------------------------------------------------------------- Predicted: - Pos Neg + Pos Neg -------------------------------------------------------------------------------- Actual: Pos 19 (tp) 1 (fn) - Neg 2 (fp) 178 (tn) + Neg 3 (fp) 177 (tn) -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- - {'AUC ROC': 0.9951388888888888, - 'Average Precision': 0.9722222222222222, - 'Brier Score': 0.20989021789332263, - 'Precision/PPV': 0.9047619047619048, + {'AUC ROC': 0.9945833333333333, + 'Average Precision': 0.9334649122807017, + 'Brier Score': 0.20820269480995568, + 'Precision/PPV': 0.8636363636363636, 'Sensitivity': 0.95, - 'Specificity': 0.9888888888888889} + 'Specificity': 0.9833333333333333} -------------------------------------------------------------------------------- precision recall f1-score support - 0 0.99 0.99 0.99 180 - 1 0.90 0.95 0.93 20 + 0 0.99 0.98 0.99 180 + 1 0.86 0.95 0.90 20 accuracy 0.98 200 - macro avg 0.95 0.97 0.96 200 - weighted avg 0.99 0.98 0.99 200 + macro avg 0.93 0.97 0.95 200 + weighted avg 0.98 0.98 0.98 200 -------------------------------------------------------------------------------- - .. _Regression: Regression @@ -1132,7 +1115,7 @@ Step 1: Import Necessary Libraries import pandas as pd import numpy as np - ifrom xgboost import XGBRegressor + from xgboost import XGBRegressor from sklearn.impute import SimpleImputer from sklearn.datasets import fetch_california_housing from model_tuner import Model @@ -1219,7 +1202,7 @@ when using ``XGBRegressor``. calibrate=calibrate, estimator=clc, kfold=kfold, - stratify_y=None, + stratify_y=False, grid=tuned_parameters, randomized_grid=rand_grid, boost_early=early_stop, @@ -1243,13 +1226,13 @@ Step 6: Perform Grid Search Parameter Tuning and Retrieve Split Data .. code-block:: bash Pipeline Steps: - ======================== + ┌────────────────┐ │ Step 1: xgb │ │ XGBRegressor │ └────────────────┘ - 100%|██████████| 9/9 [00:05<00:00, 1.60it/s]Best score/param set found on validation set: + 100%|██████████| 9/9 [00:22<00:00, 2.45s/it]Best score/param set found on validation set: {'params': {'xgb__colsample_bytree': 0.8, 'xgb__early_stopping_rounds': 10, 'xgb__eval_metric': 'logloss', @@ -1259,7 +1242,7 @@ Step 6: Perform Grid Search Parameter Tuning and Retrieve Split Data 'xgb__subsample': 0.8, 'xgb__tree_method': 'hist'}, 'score': 0.7651490279157868} - Best r2: 0.765 + Best r2: 0.765 Step 7: Fit the Model @@ -1267,7 +1250,11 @@ Step 7: Fit the Model .. code-block:: python - model_xgb.fit(X_train, y_train, validation_data=[X_valid, y_valid]) + model_xgb.fit( + X_train, + y_train, + validation_data=[X_valid, y_valid], + ) Step 8: Return Metrics (Optional) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1398,7 +1385,7 @@ The ``bootstrapper.py`` module provides utility functions for input type checkin Bootstrap Metrics Example ----------------------------- -Continuing from the model output object (``model_xgb``) from the :ref:`regression example ` above, we leverage the ``return_bootstrap_metrics`` method from ``model_tuner_utils.py`` to print bootstrap performance metrics (:math:`R^2` and `explained_variance`) at 95% confidence levels as shown below: +Continuing from the model output object (``model_xgb``) from the :ref:`regression example ` above, we leverage the ``return_bootstrap_metrics`` method from ``model_tuner_utils.py`` to print bootstrap performance metrics (:math:`R^2` and :math:`\text{explained variance}`) at 95% confidence levels as shown below: .. code-block:: python diff --git a/docs/searchindex.js b/docs/searchindex.js index 8c4d24f..b8273b5 100644 --- a/docs/searchindex.js +++ b/docs/searchindex.js @@ -1 +1,5 @@ -Search.setIndex({"alltitles": {"1. Accurate Calculation of Scaling Parameters": [[1, "accurate-calculation-of-scaling-parameters"]], "2. Consistency in Data Transformation": [[1, "consistency-in-data-transformation"]], "3. Prevention of Distortion in Scaling": [[1, "prevention-of-distortion-in-scaling"]], "AIDS Clinical Trials Group Study": [[6, "aids-clinical-trials-group-study"]], "About Model Tuner": [[4, null]], "Acknowledgements": [[0, "acknowledgements"]], "Addressing Class Imbalance in Machine Learning": [[6, "addressing-class-imbalance-in-machine-learning"]], "Bias from Class Distribution": [[1, "bias-from-class-distribution"]], "Binary Classification": [[6, "binary-classification"]], "Binary Classification Examples": [[6, "binary-classification-examples"]], "Bootstrap Metrics": [[6, "bootstrap-metrics"]], "Bootstrap Metrics Example": [[6, "bootstrap-metrics-example"]], "Brier Score": [[1, "brier-score"]], "Calibration Curve": [[1, "calibration-curve"]], "California Housing with XGBoost": [[6, "california-housing-with-xgboost"]], "Caveats": [[4, null]], "Caveats in Imbalanced Learning": [[1, "caveats-in-imbalanced-learning"]], "Changelog": [[2, null]], "Citing Model Tuner": [[0, "citing-model-tuner"]], "Classification Report (Optional)": [[6, "classification-report-optional"]], "Column Stratification with Cross-Validation": [[1, "column-stratification-with-cross-validation"]], "Cross-Validation and Stratification": [[1, "cross-validation-and-stratification"]], "Define Hyperparameters for XGBoost": [[6, "define-hyperparameters-for-xgboost"]], "Define The Model object": [[6, "define-the-model-object"]], "Dependent Variable": [[1, "dependent-variable"]], "Effects on Model Training": [[1, "effects-on-model-training"]], "Example of Synthetic Sample Creation": [[1, "example-of-synthetic-sample-creation"]], "Example: Calibration in Logistic Regression": [[1, "example-calibration-in-logistic-regression"]], "Fit The Model": [[6, "fit-the-model"]], "Generating an Imbalanced Dataset": [[6, "generating-an-imbalanced-dataset"]], "Getting Started": [[4, null]], "GitHub Repository": [[0, null]], "Goal of Calibration": [[1, "goal-of-calibration"]], "Helper Functions": [[6, "helper-functions"]], "Helper Methods for Pipeline Extraction": [[6, "helper-methods-for-pipeline-extraction"]], "Imbalanced Learning": [[6, "imbalanced-learning"]], "Impact of Resampling Techniques": [[1, "impact-of-resampling-techniques"]], "Imputation Before Scaling": [[1, "imputation-before-scaling"]], "Initalize and Configure The Model": [[6, "initalize-and-configure-the-model"]], "Input Parameters": [[6, "input-parameters"]], "Installation": [[3, "installation"]], "Isotonic Regression": [[1, "isotonic-regression"]], "Key Methods and Functionalities": [[6, "key-methods-and-functionalities"]], "Limitations of Accuracy": [[1, "limitations-of-accuracy"]], "Mitigating the Caveats": [[1, "mitigating-the-caveats"]], "Model Calibration": [[1, "model-calibration"]], "Model Tuner Documentation": [[4, null]], "Perform Grid Search Parameter Tuning and Retrieve Split Data": [[6, "perform-grid-search-parameter-tuning-and-retrieve-split-data"]], "Pipeline Management": [[6, "pipeline-management"]], "Platt Scaling": [[1, "platt-scaling"]], "Prerequisites": [[3, "prerequisites"]], "Purpose of Using These Techniques": [[6, "purpose-of-using-these-techniques"]], "References": [[5, null]], "Regression": [[6, "regression"]], "Regression Example": [[6, "regression-example"]], "Return Metrics (Optional)": [[6, "return-metrics-optional"]], "SMOTE: A Mathematical Illustration": [[1, "smote-a-mathematical-illustration"]], "SMOTE: Distribution of y values after resampling": [[6, "smote-distribution-of-y-values-after-resampling"]], "Solution": [[1, "solution"]], "Specifying Pipeline Steps": [[6, "specifying-pipeline-steps"]], "Step 10: Calibrate the Model (if needed)": [[6, "step-10-calibrate-the-model-if-needed"]], "Step 1: Import Necessary Libraries": [[6, "step-1-import-necessary-libraries"], [6, "id2"]], "Step 2: Load the Dataset": [[6, "step-2-load-the-dataset"]], "Step 2: Load the dataset, define X, y": [[6, "step-2-load-the-dataset-define-x-y"]], "Step 3: Check for zero-variance columns and drop accordingly": [[6, "step-3-check-for-zero-variance-columns-and-drop-accordingly"]], "Step 3: Create an Instance of the XGBRegressor": [[6, "step-3-create-an-instance-of-the-xgbregressor"]], "Step 4: Create an Instance of the XGBClassifier": [[6, "step-4-create-an-instance-of-the-xgbclassifier"]], "Step 4: Define Hyperparameters for XGBoost": [[6, "step-4-define-hyperparameters-for-xgboost"]], "Step 5: Define Hyperparameters for XGBoost": [[6, "step-5-define-hyperparameters-for-xgboost"]], "Step 5: Initialize and Configure the Model": [[6, "step-5-initialize-and-configure-the-model"]], "Step 6: Initialize and Configure the Model": [[6, "step-6-initialize-and-configure-the-model"]], "Step 6: Perform Grid Search Parameter Tuning and Retrieve Split Data": [[6, "step-6-perform-grid-search-parameter-tuning-and-retrieve-split-data"]], "Step 7: Fit the Model": [[6, "step-7-fit-the-model"]], "Step 7: Perform Grid Search Parameter Tuning": [[6, "step-7-perform-grid-search-parameter-tuning"]], "Step 8: Fit the Model": [[6, "step-8-fit-the-model"]], "Step 8: Return Metrics (Optional)": [[6, "step-8-return-metrics-optional"]], "Step 9: Return Metrics (Optional)": [[6, "step-9-return-metrics-optional"]], "Summary": [[1, "summary"], [6, "summary"]], "Synthetic Minority Oversampling Technique (SMOTE)": [[6, "synthetic-minority-oversampling-technique-smote"]], "Target Variable Shape and Its Effects": [[1, "target-variable-shape-and-its-effects"]], "Techniques to Address Class Imbalance": [[6, "techniques-to-address-class-imbalance"]], "Threshold-Dependent Predictions": [[1, "threshold-dependent-predictions"]], "Usage Guide": [[4, null]], "Version 0.0.010a": [[2, "version-0-0-010a"]], "Version 0.0.011a": [[2, "version-0-0-011a"]], "Version 0.0.012a": [[2, "version-0-0-012a"]], "Version 0.0.013a": [[2, "version-0-0-013a"]], "Version 0.0.014a": [[2, "version-0-0-014a"]], "Version 0.0.02a": [[2, "version-0-0-02a"]], "Version 0.0.05a": [[2, "version-0-0-05a"]], "Version 0.0.06a": [[2, "version-0-0-06a"]], "Version 0.0.07a": [[2, "version-0-0-07a"]], "Version 0.0.08a": [[2, "version-0-0-08a"]], "Version 0.0.09a": [[2, "version-0-0-09a"]], "Version 0.0.15a": [[2, "version-0-0-15a"]], "Version 0.0.16a": [[2, "version-0-0-16a"]], "Welcome to Model Tuner\u2019s Documentation!": [[3, null]], "What Does Model Tuner Offer?": [[3, "what-does-model-tuner-offer"]], "Zero Variance Columns": [[1, null]], "iPython Notebooks": [[6, null]]}, "docnames": ["about", "caveats", "changelog", "getting_started", "index", "references", "usage_guide"], "envversion": {"sphinx": 62, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1}, "filenames": ["about.rst", "caveats.rst", "changelog.rst", "getting_started.rst", "index.rst", "references.rst", "usage_guide.rst"], "indexentries": {"built-in function": [[6, "check_input_type", false], [6, "evaluate_bootstrap_metrics", false], [6, "get_feature_selection_pipeline", false], [6, "get_preprocessing_and_feature_selection_pipeline", false], [6, "get_preprocessing_pipeline", false], [6, "return_bootstrap_metrics", false], [6, "sampling_method", false]], "check_input_type()": [[6, "check_input_type", false]], "evaluate_bootstrap_metrics()": [[6, "evaluate_bootstrap_metrics", false]], "get_feature_selection_pipeline()": [[6, "get_feature_selection_pipeline", false]], "get_preprocessing_and_feature_selection_pipeline()": [[6, "get_preprocessing_and_feature_selection_pipeline", false]], "get_preprocessing_pipeline()": [[6, "get_preprocessing_pipeline", false]], "model (built-in class)": [[6, "Model", false]], "return_bootstrap_metrics()": [[6, "return_bootstrap_metrics", false]], "sampling_method()": [[6, "sampling_method", false]]}, "objects": {"": [[6, 0, 1, "", "Model"], [6, 1, 1, "", "check_input_type"], [6, 1, 1, "", "evaluate_bootstrap_metrics"], [6, 1, 1, "", "get_feature_selection_pipeline"], [6, 1, 1, "", "get_preprocessing_and_feature_selection_pipeline"], [6, 1, 1, "", "get_preprocessing_pipeline"], [6, 1, 1, "", "return_bootstrap_metrics"], [6, 1, 1, "", "sampling_method"]]}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "function", "Python function"]}, "objtypes": {"0": "py:class", "1": "py:function"}, "terms": {"": [1, 2, 4, 6], "0": [0, 1, 3, 4, 6], "00": 6, "000": 6, "0001": 6, "01": 6, "010a": 4, "011a": 4, "012a": 4, "013a": 4, "014a": 4, "017a": 3, "01it": 6, "02a": 4, "05": 6, "05a": 4, "05it": 6, "06a": 4, "07a": 4, "08a": 4, "09a": 4, "1": [2, 3, 4], "10": [0, 3, 4, 5], "100": 6, "1000": 6, "104": 6, "11": 3, "11a": 2, "12": 3, "12727322": 0, "14": 3, "15a": 4, "16628708993634742": 6, "16713189436073958": 6, "16a": [0, 4], "175": 5, "177": 6, "178": 6, "180": 6, "19": [3, 6], "1998": 5, "1d": 1, "1e": 6, "2": [3, 4], "20": 6, "200": 6, "2024": 0, "2096258193295803": 6, "20989021789332263": 6, "21": [3, 6], "222": 6, "23": 3, "24": 3, "24432": 5, "245": 6, "246": 6, "26": 3, "26315186452865597": 6, "2672762813568116": 6, "28411432705731066": 6, "3": [3, 4], "30": 6, "300": 6, "3066172248224347": 6, "315": 6, "324": 6, "34": 6, "35": 6, "358": 6, "36": 6, "3743548199982513": 6, "3830825326824073": 6, "4": [1, 3, 4], "41": 6, "42": 6, "428": 6, "47": 6, "5": [1, 3, 4], "500": [2, 6], "50it": 6, "5281": 0, "533023758436067": 6, "540": 6, "5459770114942529": 6, "5491329479768786": 6, "55": 6, "5537302816556403": 6, "58": 1, "6": [3, 4], "60it": 6, "66": 3, "67": 6, "68": 6, "69": 6, "7": [3, 4], "70": 6, "71": 6, "75": 6, "7561728395061729": 6, "7592592592592593": 6, "76": 6, "7647433075624044": 6, "7647451659057567": 6, "765": 6, "7651490279157868": 6, "77": 6, "770853": 6, "777898": 6, "78": 6, "781523": 6, "788341": 6, "7888925135381788": 6, "7888942913974833": 6, "79": 6, "792193": 6, "798785": 6, "7992275185850191": 6, "8": [3, 4], "80": 6, "8023014087345259": 6, "81": 6, "82": 6, "83": 6, "84": 6, "85": 6, "8520172219085262": 6, "86": 6, "8695652173913043": 6, "87": 6, "88": 6, "89": 6, "890": 6, "9": [1, 4], "90": 6, "900": 6, "9047619047619048": 6, "91": 6, "9134615384615384": 6, "92": 6, "928": 6, "9280033238366572": 6, "93": 6, "934576804368471": 6, "94": 6, "95": 6, "96": 6, "97": 6, "9722222222222222": 6, "98": 6, "9833333333333333": 6, "9888888888888889": 6, "99": 6, "9904166666666667": 6, "9951388888888888": 6, "999": [1, 6], "9994444444444446": 6, "A": [4, 6], "AND": 6, "By": [1, 6], "For": [1, 3, 6], "If": [1, 6], "In": [1, 2, 6], "It": [1, 3, 6], "Its": 4, "No": 6, "Not": 6, "On": 1, "One": [1, 6], "The": [1, 3, 4], "There": 2, "These": [1, 4], "To": [1, 6], "With": 1, "_": 1, "__colsample_bytre": 6, "__early_stopping_round": 6, "__eval_metr": 6, "__init__": 6, "__learning_r": 6, "__max_depth": 6, "__n_estim": 6, "__param_nam": 6, "__subsampl": 6, "__tree_method": 6, "__verbos": 6, "_confusion_matrix_print": 6, "_i": 1, "_j": 1, "_k": 1, "abil": 6, "about": 1, "abov": 6, "abram": 5, "absolut": 6, "access": [0, 6], "accompani": 6, "accordingli": 4, "account": 1, "accur": 4, "accuraci": [4, 6], "achiev": [1, 2], "acknowledg": 4, "across": [1, 2, 3, 6], "activ": 6, "actual": [1, 6], "ad": [1, 2, 6], "adasyn": [2, 3, 6], "add": 6, "addit": [1, 6], "addition": [1, 6], "address": [1, 4], "adequ": 1, "adjust": 1, "advanc": 6, "aforement": 1, "after": [1, 4], "aid": [4, 5], "aids_clinical_": 6, "aids_clinical_trials_group_study_175": 6, "aim": 6, "alex": 0, "algorithm": [1, 6], "align": 1, "all": [1, 2, 3, 6], "alloc": 6, "allow": [2, 3, 6], "along": 1, "also": [1, 6], "altern": 1, "alwai": 2, "amplifi": 1, "an": 4, "analysi": 1, "angel": 6, "ani": 1, "anoth": [1, 6], "anova": 1, "apach": 2, "appli": [1, 3, 6], "applic": [1, 6], "approach": 1, "appropri": 6, "approx": 1, "ar": [0, 1, 2, 3, 6], "arrai": [1, 6], "arthur": [0, 2], "artifici": 1, "ascii": 6, "assert": 2, "assess": [1, 3, 6], "assign": [2, 6], "assum": 1, "assumpt": 1, "attempt": 1, "attributeerror": 6, "auc": 6, "author": 0, "autokera": 2, "autokerasclassifi": 2, "automat": [1, 3, 6], "avail": [1, 2, 6], "averag": 6, "average_precis": 6, "avg": 6, "avoid": [1, 2, 6], "ax": 6, "axi": 2, "b": 1, "back": 6, "balanc": [1, 2, 3, 6], "bar": [1, 6], "base": [1, 3, 6], "bayesian": 6, "bayessearchcv": 6, "becaus": [1, 2], "becom": 1, "been": [1, 2, 6], "befor": [2, 3, 4, 6], "begin": 1, "behavior": 1, "being": [2, 6], "below": [1, 3, 6], "best": 6, "best_param": 2, "best_params_per_scor": 6, "beta": 6, "better": [1, 6], "between": [1, 3, 6], "beyond": 6, "bia": [4, 6], "bias": [1, 6], "bin": [1, 6], "binari": 4, "block": 6, "bool": 6, "boost": [2, 6], "boost_earli": 6, "bootstrap": [3, 4], "bootstrapp": [2, 6], "both": [1, 2, 6], "brier": [4, 6], "bug": 2, "bui": 0, "build": 6, "c": 1, "c5g896": 5, "c_": 1, "calcul": [4, 6], "calibr": [2, 3, 4], "calibrate_report": 6, "calibratemodel": 6, "calibration_curv": 6, "calibration_method": 6, "california": 4, "call": 6, "can": [0, 1, 3, 6], "cannot": 1, "captur": [1, 6], "care": [1, 6], "carefulli": 1, "case": [1, 2, 6], "catboost": [2, 3], "categor": 6, "categori": 6, "caus": 1, "cdot": 1, "center": 6, "challeng": [1, 6], "chang": [1, 2, 6], "changelog": 4, "char": 2, "check": [1, 4], "check_input_typ": [4, 6], "chunk": 2, "ci": 6, "cite": 4, "clariti": 6, "class": [2, 3, 4], "class_label": 6, "class_proport": 6, "class_report_test": 6, "class_report_v": 6, "classif": [1, 3, 4], "classifi": [1, 6], "classification_report": 6, "clc": 6, "clean": 2, "click": 0, "clinic": [0, 4, 5], "close": 1, "cluster": 1, "cm_test": 6, "cm_val": 6, "code": [1, 2, 6], "codebas": 0, "col": 6, "colab": 6, "color": 6, "column": [2, 4], "combin": [1, 6], "come": 1, "command": 6, "comment": 2, "common": 1, "commonli": 6, "compar": 6, "compat": 3, "complet": [1, 2], "complex": 6, "comprehens": 6, "comput": [1, 6], "concat": 2, "condit": 1, "conduct": 3, "conf_mat_class_kfold": 6, "conf_matrix": 6, "confid": 6, "configur": 4, "conflict": 1, "confus": 6, "connect": 1, "consid": [1, 2], "consist": 4, "constant": 1, "constraint": [1, 2], "construct": 1, "contain": [2, 6], "context": [1, 6], "continu": 6, "contrast": 1, "contribut": [0, 1], "contributor": 0, "convent": 6, "convers": 1, "convert": [1, 6], "correct": [1, 2], "correctli": 1, "count": [2, 6], "cpu": 6, "creat": [1, 4], "creation": [3, 4], "critic": [1, 6], "cross": [3, 4, 6], "crucial": [1, 6], "ctsi": 0, "current": [1, 3], "curs": 1, "curv": [4, 6], "custom": [2, 3, 6], "custom_scor": 6, "d": [1, 5], "d_1": 1, "d_2": 1, "d_j": 1, "d_k": 1, "data": [2, 3, 4], "dataconversionwarn": 1, "datafram": [1, 6], "dataset": [1, 3, 4], "decis": [1, 6], "decreas": 1, "def": 6, "default": [1, 6], "defin": [1, 4], "degrad": 1, "delta": 1, "demonstr": 6, "denot": 1, "depend": [2, 3, 4, 6], "deploi": 6, "deprec": 2, "depth": 6, "design": [1, 3, 6], "despit": 1, "detail": 6, "detect": 6, "determin": 1, "dev": 2, "develop": 3, "deviat": 1, "diagnosi": [1, 6], "dict": 6, "dictionari": 6, "differ": [1, 2, 3], "dimens": 1, "dimension": 1, "directli": 3, "discrep": 1, "diseas": 6, "displai": 6, "disrupt": 1, "distinct": 6, "distinguish": 6, "distort": 4, "distribut": [3, 4], "divid": 1, "divis": 1, "do": [2, 6], "document": 6, "doe": [1, 4], "doi": [0, 5], "domin": [1, 6], "dot": 1, "dr": 0, "draw": 6, "drawn": 1, "drop": [1, 4], "dtype": 6, "due": 1, "duplic": 6, "dure": [1, 2, 6], "e": [1, 6], "each": [1, 6], "earli": [2, 3, 6], "early_stop": 6, "eas": 6, "easier": 6, "easili": 6, "effect": [3, 4, 6], "either": [2, 6], "el": 5, "elimin": 3, "empir": 1, "empti": [1, 6], "enabl": [3, 6], "encount": 1, "end": 1, "engin": 1, "enhanc": 2, "ensur": [1, 2, 3, 6], "entir": 1, "enumer": 6, "equal": [1, 6], "equat": 1, "error": [1, 2, 6], "especi": 6, "essenc": 6, "essenti": [1, 6], "estat": 6, "estim": [1, 2, 3, 6], "estimator_nam": 6, "etc": [2, 6], "evalu": [1, 3, 6], "evaluate_bootstrap_metr": [2, 4, 6], "even": 1, "event": 6, "examin": 6, "exampl": 4, "exceed": 2, "except": 6, "excess": 1, "execut": 6, "exist": [1, 6], "exp": 1, "expect": [1, 6], "explain": 6, "explained_vari": 6, "explan": 1, "explicit": 6, "explicitli": 6, "express": 1, "extend": 6, "extract": [2, 4], "extrem": 1, "f": [1, 6], "f1": [1, 6], "f1_beta_tun": 6, "f1_weight": 6, "f_i": 1, "facilit": 3, "fail": 1, "failur": 1, "fair": 1, "fairli": 6, "fall": 1, "fals": [1, 6], "far": 1, "favor": [1, 2, 6], "feat_num": 1, "featur": [1, 3, 6], "feature_": 6, "feature_nam": 6, "feature_select": 6, "feature_selection_": 6, "fetch": 6, "fetch_california_h": 6, "fetch_ucirepo": 6, "figsiz": 6, "figur": 6, "file": [2, 6], "filter": 2, "find": 1, "fine": [3, 6], "first": 1, "fit": [1, 2, 4], "fix": [2, 6], "flexibl": [3, 6], "flip_i": 6, "float": 6, "fn": 6, "focu": [1, 6], "fold": [1, 3, 6], "follow": [1, 2, 3, 6], "form": 1, "format": 6, "formul": 1, "forthcom": 2, "found": 6, "fp": 6, "frac": 1, "fraction": 1, "fraud": 6, "fraudul": 6, "free": 1, "frequenc": [1, 6], "frequent": 6, "from": [2, 3, 4, 6], "full": 1, "fulli": 1, "function": [1, 2, 3, 4], "funnel": 0, "funnell_2024_12727322": 0, "g": [1, 6], "gender": 6, "gener": [1, 3, 4], "generaliz": 1, "geq": 1, "get": 6, "get_best_score_param": 6, "get_cross_valid": 6, "get_feature_selection_pipelin": [4, 6], "get_preprocessing_and_feature_selection_pipelin": [4, 6], "get_preprocessing_pipelin": [4, 6], "get_test_data": 6, "get_train_data": 6, "get_valid_data": 6, "github": 4, "given": 1, "goal": [4, 6], "googl": 6, "grid": 4, "grid_search_param_tun": 6, "ground": 6, "group": [4, 5], "guidanc": 0, "ha": [1, 2, 6], "had": 1, "hand": 1, "handl": [1, 3, 6], "happen": 2, "harmon": 1, "hat": 1, "have": [2, 6], "haven": 6, "healthcar": 6, "heavili": 1, "help": [1, 6], "helper": 4, "here": [2, 3, 6], "hi": 0, "high": 1, "higher": 3, "highli": 1, "highlight": 1, "hist": 6, "histori": 2, "hold": 1, "homogen": 1, "hous": 4, "how": 6, "howev": 1, "html": 6, "http": [0, 5], "hybrid": 6, "hyperparamet": [2, 3, 4], "i": [1, 2, 3, 6], "id": 6, "ident": 1, "identifi": 1, "ifrom": 6, "ij": 1, "illustr": 4, "imbal": [1, 4], "imbalanc": [2, 3, 4], "imbalance_sampl": 6, "imblearn": 6, "impact": 4, "implement": [2, 3, 6], "import": [1, 2, 4], "importerror": 6, "improp": 1, "improperli": 6, "improv": [3, 6], "imput": [2, 3, 4, 6], "inaccur": 1, "includ": [1, 3, 6], "incomplet": 1, "inconsist": 1, "incorrect": [1, 6], "increas": [1, 6], "index": 6, "indexerror": 6, "indic": [1, 6], "infinit": 1, "inflat": 1, "influenc": 1, "inform": [1, 6], "informat": 0, "inher": [1, 6], "init": 4, "initi": 4, "initialis": 2, "input": [1, 4], "insid": [2, 6], "instal": [4, 6], "instanc": [1, 4], "instead": [1, 2, 6], "institut": 0, "insuffici": 6, "int": 6, "int64": 6, "integr": [1, 3], "interpol": [1, 6], "interpret": 1, "interv": [1, 6], "introduc": [1, 2], "invalid": [1, 6], "invalu": 0, "involv": [1, 2], "ipython": 4, "isinst": 1, "isoton": [3, 4, 6], "issu": [1, 2, 6], "iter": 6, "its": [1, 6], "itself": 2, "j": 1, "job": 6, "joblib": 3, "jul": 0, "just": 1, "k": [1, 3, 6], "kei": [0, 1, 2, 3, 4], "keyerror": 6, "kf": 6, "kfold": [2, 6], "kfold_split": 6, "kind": 6, "known": 6, "label": [1, 3, 6], "larg": 1, "later": [1, 6], "layer": 2, "lead": [1, 6], "learn": [2, 3, 4, 5], "legend": 6, "length": 2, "leon": 2, "leonid": 0, "leq": 1, "less": 1, "let": 1, "level": 6, "leverag": 6, "li": 1, "librari": [3, 4], "licens": 2, "like": [1, 3, 6], "likelihood": 1, "limit": [2, 4], "line": [1, 2], "linear": [1, 6], "linestyl": 6, "link": [0, 6], "list": [2, 6], "ll": 1, "lo": 6, "load": 4, "log": [2, 6], "logic": 2, "logist": [4, 6], "logloss": 6, "logo": 2, "loop": 2, "loss": [1, 6], "low": [2, 6], "lower": [1, 6], "machin": [1, 3, 4, 5], "macro": 6, "mai": 1, "maintain": 1, "major": [1, 6], "make": [1, 6], "make_classif": 6, "make_classification_": 6, "manag": 4, "mani": 1, "marker": 6, "match": 1, "mathbf": 1, "mathemat": 4, "matplotlib": 6, "matric": 6, "matrix": 6, "max": 1, "maximum": [1, 6], "mean": [1, 6], "meaning": [1, 6], "measur": 1, "median": [1, 6], "medic": [0, 1], "meet": 3, "mere": 6, "messag": 6, "method": [1, 2, 3, 4], "metric": [1, 2, 3, 4], "mid": 1, "midwai": 1, "might": [1, 6], "mii": 0, "min": 1, "min_": 1, "minimum": 1, "minmax": 3, "minor": [1, 4], "misclassif": 1, "misinterpret": 1, "mislabel": 1, "mislead": 1, "mismatch": [2, 6], "miss": [1, 6], "mitig": [4, 6], "mlflow": 2, "model": 2, "model_definit": 6, "model_tun": [3, 6], "model_tuner_util": 6, "model_typ": 6, "model_xgb": 6, "modifi": 2, "modul": 6, "monoton": 1, "month": 0, "more": [1, 6], "move": 2, "msb": 1, "msw": 1, "mu": 1, "much": 1, "multi": [3, 6], "multi_label": 6, "multipl": [2, 6], "must": [1, 6], "n": 1, "n_bin": 6, "n_clusters_per_class": 6, "n_featur": 6, "n_inform": 6, "n_iter": 6, "n_j": 1, "n_job": 6, "n_redund": 6, "n_sampl": [1, 6], "n_split": 6, "name": [2, 6], "nan": [1, 6], "natur": 6, "nearest": [1, 6], "necessari": [2, 4], "need": [1, 4], "neg": [1, 6], "neighbor": [1, 6], "new": 6, "nois": [1, 6], "noisi": 1, "non": [1, 2], "none": 6, "normal": 6, "note": 1, "notebook": [2, 4], "notic": 6, "now": [1, 2], "np": [2, 6], "num_resampl": 6, "number": [1, 2, 6], "numer": 6, "numpi": [3, 6], "o": 6, "object": [2, 4], "observ": [1, 6], "occur": [2, 6], "off": 1, "offer": [4, 6], "often": [1, 6], "older": 2, "onc": 6, "one": [1, 6], "ones": 1, "onli": [1, 2, 6], "onto": 2, "oper": 1, "optim": [1, 3, 6], "optimal_threshold": 6, "option": 4, "order": [1, 2, 6], "org": [0, 5], "organ": 6, "origin": [0, 1], "other": [1, 2, 3, 6], "our": [2, 6], "out": [1, 2], "outcom": [1, 6], "output": [1, 6], "outsid": 2, "outweigh": 6, "over": 1, "overal": 1, "overfit": [1, 3, 6], "overlap": 1, "overlook": 1, "oversampl": [1, 3, 4], "p": 1, "p_1": 1, "p_2": 1, "p_i": 1, "p_n": 1, "packag": 6, "panayioti": 0, "panda": [3, 6], "parallel": 6, "param": 6, "paramet": [2, 3, 4], "parametr": 1, "part": 6, "particularli": [1, 3, 6], "pass": [1, 6], "pattern": 6, "pd": [1, 2, 6], "penal": 1, "per": [2, 6], "perfectli": [1, 6], "perform": [1, 3, 4], "petousi": 0, "pickl": 2, "piecewis": 1, "pip": [3, 6], "pip25": 2, "pipelin": [1, 2, 3, 4], "pipeline_assembli": 6, "pipeline_step": [1, 2, 6], "pipelineclass": 6, "placehold": 1, "platt": 4, "pleas": [1, 6], "plot": 6, "plt": 6, "pmatrix": 1, "po": 6, "point": [1, 6], "poor": 6, "poorli": 6, "posit": [1, 6], "possibl": [1, 6], "power": [1, 3], "ppv": 6, "practic": [1, 6], "practition": 1, "pre": 6, "precis": [1, 6], "predict": [4, 6], "predict_proba": 6, "prefix": 6, "preprocess": [1, 6], "preprocess_": 6, "preprocess_imputer_imput": 6, "preprocess_scaler_standardscalar": 6, "preprocessing_step": 6, "preprocessor": 1, "prerequisit": 4, "present": 1, "preserv": 1, "pretti": 2, "prevent": [3, 4], "previou": 2, "previous": 1, "primari": 1, "print": [2, 6], "print_pipelin": 6, "print_result": 6, "print_selected_best_featur": 6, "prior": 1, "priorit": 1, "prob_pred_calibr": 6, "prob_pred_uncalibr": 6, "prob_true_calibr": 6, "prob_true_uncalibr": 6, "probabilist": 1, "probabl": [1, 3, 6], "problem": [1, 6], "proceed": 1, "process": [1, 2, 6], "process_imbalance_sampl": 6, "produc": [1, 6], "properli": 6, "properti": 1, "proport": [1, 6], "provid": [1, 3, 6], "publish": 0, "purpos": 4, "py": [2, 6], "pypi": [2, 3], "pyplot": 6, "pyproject": 2, "python": 3, "quad": 1, "quickli": 6, "r": 6, "r2": 6, "race": 6, "rais": [1, 6], "rand_grid": 6, "random": [1, 6], "random_st": 6, "randomized_grid": 6, "randomli": 6, "randomoversampl": 6, "randomundersampl": 6, "rang": [1, 6], "rare": 6, "rate": 1, "rather": 1, "ratio": [1, 6], "raw": 1, "re": 2, "readili": 6, "readm": 2, "real": 6, "recal": [1, 6], "recommend": 1, "recurs": 3, "redfin": 6, "redistribut": 6, "reduc": [1, 6], "ref": 2, "refactor": 2, "refer": [1, 4, 6], "reflect": 1, "regard": 2, "region": 1, "regress": 4, "regression_report": 6, "regression_report_kfold": 6, "regular": 6, "relat": 2, "relationship": 1, "releas": 2, "reli": 1, "reliabl": 6, "remov": [1, 2, 6], "renam": [2, 6], "repeatedli": 1, "replac": 1, "report": [2, 4], "report_model_metr": 6, "repositori": [4, 5, 6], "repres": [1, 2], "represent": 6, "reproduc": 6, "requir": [1, 2, 3, 6], "resampl": [2, 4], "research": 6, "reset": [2, 6], "reset_estim": 6, "resolut": 2, "resourc": 6, "respect": 6, "result": 1, "retriev": 4, "return": 4, "return_bootstrap_metr": [4, 6], "return_metr": 6, "rfe": [3, 6], "rightarrow": 1, "risk": [1, 6], "rmse": 6, "robust": [3, 6], "roc": 6, "roc_auc": 6, "root": 6, "rot": 6, "rout": 6, "routin": 1, "run": 6, "runtim": 1, "runtimeerror": 6, "runtimewarn": 1, "sadr": 5, "same": [1, 2], "sampl": [2, 4, 6], "sampler": 6, "sampling_method": [4, 6], "save": 2, "scale": [2, 3, 4, 6], "scenario": 6, "scienc": 0, "scikit": 3, "scipi": 3, "score": [4, 6], "seamlessli": 6, "search": 4, "section": 6, "see": 6, "seed": 6, "segment": [1, 2], "select": [3, 6], "selectkbest": [2, 3], "self": [2, 6], "sensit": [1, 6], "separ": [1, 6], "sequenc": [1, 6], "seri": [1, 6], "set": [1, 6], "setup": 2, "sever": [1, 6], "shap": 6, "shape": [4, 6], "should": [1, 2, 6], "show": 6, "shown": 6, "shpaner": 0, "sigma": 1, "sigmoid": [3, 6], "significantli": [1, 6], "sim": 1, "similar": [1, 6], "simpl": 6, "simpleimput": [1, 3, 6], "simpli": 6, "simplifi": 2, "simultan": 2, "sinc": 1, "singl": [1, 6], "size": 6, "skew": 1, "sklearn": 6, "smote": [2, 3, 4], "smoteenn": 1, "smotetomek": 1, "so": [1, 6], "softwar": [0, 2], "solut": 4, "some": [1, 6], "sort": 6, "space": 1, "spam": 6, "special": 0, "specif": [1, 2, 6], "specifi": [1, 2, 4], "split": [1, 2, 3, 4], "sqrt": 1, "squar": [1, 6], "squeez": [1, 6], "stage": 6, "standard": [1, 6], "standardscal": [1, 6], "standardscalar": 6, "startswith": 6, "state": 1, "statist": 1, "step": [2, 4], "step_0": 6, "step_1": 6, "stop": [2, 3, 6], "store": 2, "str": 6, "strat_key_val_test": 2, "strategi": [3, 6], "stratif": [2, 4, 6], "stratifi": [1, 2, 3, 6], "stratify_col": [1, 2, 6], "stratify_i": [1, 2, 6], "stratify_kei": 2, "string": 2, "structur": 1, "struggl": 6, "studi": [4, 5], "subsampl": 6, "subsequ": 1, "subset": 1, "suit": 6, "sum": 6, "sum_": 1, "summari": 4, "supervis": 6, "support": [0, 2, 3, 6], "synthet": 4, "system": 3, "t": 6, "take": [1, 6], "taken": 2, "target": [2, 3, 4, 6], "task": [3, 6], "tau": 1, "techniqu": [3, 4], "temporarili": 2, "tend": 6, "test": [2, 6], "test_model": 6, "test_siz": 6, "text": [1, 6], "th": 1, "than": 1, "thank": 0, "thei": [1, 6], "them": [1, 6], "therefor": [1, 6], "thi": [0, 1, 2, 3, 6], "thoroughli": 6, "three": 6, "threshold": [2, 3, 4, 6], "through": 6, "thu": 6, "time": [1, 2], "titan": 6, "titl": [0, 6], "tn": 6, "toml": 2, "too": 1, "tool": 3, "top": [1, 6], "toward": 6, "tp": 6, "tqdm": 3, "track": 6, "trade": 1, "tradit": 1, "train": [3, 4, 6], "train_siz": 6, "train_val_test": 2, "train_val_test_split": [2, 6], "transact": 6, "transform": [4, 6], "translat": 0, "treat": [1, 6], "tree": 6, "trial": [4, 5], "trigger": 1, "true": [1, 6], "trust": 1, "truth": 6, "tune": [1, 2, 3, 4], "tune_threshold_fbeta": [2, 6], "tuned_paramet": 6, "tuned_parameters_xgb": 6, "tuner": 6, "two": [1, 6], "txt": 2, "type": 6, "typeerror": 6, "typic": 6, "u": 1, "uci": [5, 6], "ucimlrepo": 6, "ucla": 0, "uncalibr": 6, "undefin": 1, "under": [3, 6], "underli": 1, "underrepres": 6, "undersampl": [1, 6], "understand": [1, 6], "unequ": 6, "unexpect": 6, "uniform": 1, "uniqu": 6, "unlik": 1, "unnecessari": [1, 2, 6], "unpredict": 1, "unrealist": 1, "unreli": 1, "unseen": 1, "unsupport": 6, "unus": 2, "up": 2, "updat": 2, "upper": 6, "url": 0, "us": [1, 2, 3, 4], "usag": 2, "user": 6, "userwarn": 1, "util": [2, 6], "va": 6, "valid": [3, 4, 6], "validation_data": 6, "validation_s": 6, "valu": [1, 4], "value_count": 6, "valueerror": 6, "var": [1, 6], "variabl": [2, 3, 4, 6], "varianc": 4, "varieti": 6, "variou": [3, 6], "vdot": 1, "vector": 1, "verbos": 2, "versatil": 3, "version": [0, 3, 4], "visual": 6, "w": [1, 5], "wa": [0, 1, 2], "wai": [1, 6], "warn": 1, "we": [1, 6], "weight": [1, 6], "welcom": 4, "well": [1, 6], "were": 2, "what": 4, "when": [1, 2, 3, 6], "where": [1, 2, 6], "whether": 6, "which": [1, 3, 6], "while": [1, 6], "wide": [1, 6], "width": 6, "wish": 6, "within": [1, 6], "without": [1, 6], "work": [0, 1, 2], "workflow": [3, 6], "world": 6, "would": 1, "wrong": 2, "x": [1, 2, 4], "x_": 1, "x_i": 1, "x_j": 1, "x_test": 6, "x_train": 6, "x_valid": 6, "x_valid_test": 2, "xgb": 6, "xgb_": 6, "xgb__colsample_bytre": 6, "xgb__early_stopping_round": 6, "xgb__eval_metr": 6, "xgb__learning_r": 6, "xgb__max_depth": 6, "xgb__n_estim": 6, "xgb__subsampl": 6, "xgb__tree_method": 6, "xgb_definit": 6, "xgb_early_bootstrap_test": 2, "xgb_model": 6, "xgb_name": 6, "xgb_smote": 6, "xgbclassifi": 4, "xgbearli": 6, "xgboost": [2, 3, 4], "xgbregressor": 4, "xlabel": 6, "y": [1, 2, 4], "y_1": 1, "y_2": 1, "y_i": 1, "y_n": 1, "y_pred": 6, "y_pred_prob": 6, "y_prob_calibr": 6, "y_prob_uncalibr": 6, "y_test": 6, "y_test_pr": 6, "y_train": 6, "y_true": 6, "y_valid": 6, "y_valid_proba": 6, "y_valid_test": 2, "year": 0, "yellow": 6, "yet": 6, "ylabel": 6, "you": [0, 1, 3, 6], "your": [1, 3, 6], "z": 1, "z_": 1, "zenodo": [0, 2], "zero": 4, "zero_variance_column": [1, 6]}, "titles": ["GitHub Repository", "Zero Variance Columns", "Changelog", "Welcome to Model Tuner\u2019s Documentation!", "Model Tuner Documentation", "References", "iPython Notebooks"], "titleterms": {"": 3, "0": 2, "010a": 2, "011a": 2, "012a": 2, "013a": 2, "014a": 2, "02a": 2, "05a": 2, "06a": 2, "07a": 2, "08a": 2, "09a": 2, "1": [1, 6], "10": 6, "15a": 2, "16a": 2, "2": [1, 6], "3": [1, 6], "4": 6, "5": 6, "6": 6, "7": 6, "8": 6, "9": 6, "A": 1, "Its": 1, "The": 6, "These": 6, "about": 4, "accordingli": 6, "accur": 1, "accuraci": 1, "acknowledg": 0, "address": 6, "after": 6, "aid": 6, "an": 6, "befor": 1, "bia": 1, "binari": 6, "bootstrap": 6, "brier": 1, "calcul": 1, "calibr": [1, 6], "california": 6, "caveat": [1, 4], "changelog": 2, "check": 6, "cite": 0, "class": [1, 6], "classif": 6, "clinic": 6, "column": [1, 6], "configur": 6, "consist": 1, "creat": 6, "creation": 1, "cross": 1, "curv": 1, "data": [1, 6], "dataset": 6, "defin": 6, "depend": 1, "distort": 1, "distribut": [1, 6], "document": [3, 4], "doe": 3, "drop": 6, "effect": 1, "exampl": [1, 6], "extract": 6, "fit": 6, "from": 1, "function": 6, "gener": 6, "get": 4, "github": 0, "goal": 1, "grid": 6, "group": 6, "guid": 4, "helper": 6, "hous": 6, "hyperparamet": 6, "illustr": 1, "imbal": 6, "imbalanc": [1, 6], "impact": 1, "import": 6, "imput": 1, "init": 6, "initi": 6, "input": 6, "instal": 3, "instanc": 6, "ipython": 6, "isoton": 1, "kei": 6, "learn": [1, 6], "librari": 6, "limit": 1, "load": 6, "logist": 1, "machin": 6, "manag": 6, "mathemat": 1, "method": 6, "metric": 6, "minor": 6, "mitig": 1, "model": [0, 1, 3, 4, 6], "necessari": 6, "need": 6, "notebook": 6, "object": 6, "offer": 3, "option": 6, "oversampl": 6, "paramet": [1, 6], "perform": 6, "pipelin": 6, "platt": 1, "predict": 1, "prerequisit": 3, "prevent": 1, "purpos": 6, "refer": 5, "regress": [1, 6], "report": 6, "repositori": 0, "resampl": [1, 6], "retriev": 6, "return": 6, "sampl": 1, "scale": 1, "score": 1, "search": 6, "shape": 1, "smote": [1, 6], "solut": 1, "specifi": 6, "split": 6, "start": 4, "step": 6, "stratif": 1, "studi": 6, "summari": [1, 6], "synthet": [1, 6], "target": 1, "techniqu": [1, 6], "threshold": 1, "train": 1, "transform": 1, "trial": 6, "tune": 6, "tuner": [0, 3, 4], "us": 6, "usag": 4, "valid": 1, "valu": 6, "variabl": 1, "varianc": [1, 6], "version": 2, "welcom": 3, "what": 3, "x": 6, "xgbclassifi": 6, "xgboost": 6, "xgbregressor": 6, "y": 6, "zero": [1, 6]}}) \ No newline at end of file +<<<<<<< HEAD +Search.setIndex({"alltitles": {"1. Accurate Calculation of Scaling Parameters": [[1, "accurate-calculation-of-scaling-parameters"]], "2. Consistency in Data Transformation": [[1, "consistency-in-data-transformation"]], "3. Prevention of Distortion in Scaling": [[1, "prevention-of-distortion-in-scaling"]], "AIDS Clinical Trials Group Study": [[6, "aids-clinical-trials-group-study"]], "About Model Tuner": [[4, null]], "Acknowledgements": [[0, "acknowledgements"]], "Addressing Class Imbalance in Machine Learning": [[6, "addressing-class-imbalance-in-machine-learning"]], "Bias from Class Distribution": [[1, "bias-from-class-distribution"]], "Binary Classification": [[6, "binary-classification"]], "Binary Classification Examples": [[6, "binary-classification-examples"]], "Bootstrap Metrics": [[6, "bootstrap-metrics"]], "Bootstrap Metrics Example": [[6, "bootstrap-metrics-example"]], "Brier Score": [[1, "brier-score"]], "Calibration Curve": [[1, "calibration-curve"]], "California Housing with XGBoost": [[6, "california-housing-with-xgboost"]], "Caveats": [[4, null]], "Caveats in Imbalanced Learning": [[1, "caveats-in-imbalanced-learning"]], "Changelog": [[2, null]], "Citing Model Tuner": [[0, "citing-model-tuner"]], "Classification Report (Optional)": [[6, "classification-report-optional"]], "Column Stratification with Cross-Validation": [[1, "column-stratification-with-cross-validation"]], "Cross-Validation and Stratification": [[1, "cross-validation-and-stratification"]], "Define Hyperparameters for XGBoost": [[6, "define-hyperparameters-for-xgboost"]], "Define The Model object": [[6, "define-the-model-object"]], "Dependent Variable": [[1, "dependent-variable"]], "Effects on Model Training": [[1, "effects-on-model-training"]], "Example of Synthetic Sample Creation": [[1, "example-of-synthetic-sample-creation"]], "Example: Calibration in Logistic Regression": [[1, "example-calibration-in-logistic-regression"]], "Fit The Model": [[6, "fit-the-model"]], "Generating an Imbalanced Dataset": [[6, "generating-an-imbalanced-dataset"]], "Getting Started": [[4, null]], "GitHub Repository": [[0, null]], "Goal of Calibration": [[1, "goal-of-calibration"]], "Helper Functions": [[6, "helper-functions"]], "Helper Methods for Pipeline Extraction": [[6, "helper-methods-for-pipeline-extraction"]], "Imbalanced Learning": [[6, "imbalanced-learning"]], "Impact of Resampling Techniques": [[1, "impact-of-resampling-techniques"]], "Imputation Before Scaling": [[1, "imputation-before-scaling"]], "Initalize and Configure The Model": [[6, "initalize-and-configure-the-model"]], "Input Parameters": [[6, "input-parameters"]], "Installation": [[3, "installation"]], "Isotonic Regression": [[1, "isotonic-regression"]], "Key Methods and Functionalities": [[6, "key-methods-and-functionalities"]], "Limitations of Accuracy": [[1, "limitations-of-accuracy"]], "Mitigating the Caveats": [[1, "mitigating-the-caveats"]], "Model Calibration": [[1, "model-calibration"]], "Model Tuner Documentation": [[4, null]], "Perform Grid Search Parameter Tuning and Retrieve Split Data": [[6, "perform-grid-search-parameter-tuning-and-retrieve-split-data"]], "Pipeline Management": [[6, "pipeline-management"]], "Platt Scaling": [[1, "platt-scaling"]], "Prerequisites": [[3, "prerequisites"]], "Purpose of Using These Techniques": [[6, "purpose-of-using-these-techniques"]], "References": [[5, null]], "Regression": [[6, "regression"]], "Regression Example": [[6, "regression-example"]], "Return Metrics (Optional)": [[6, "return-metrics-optional"]], "SMOTE: A Mathematical Illustration": [[1, "smote-a-mathematical-illustration"]], "SMOTE: Distribution of y values after resampling": [[6, "smote-distribution-of-y-values-after-resampling"]], "Solution": [[1, "solution"]], "Specifying Pipeline Steps": [[6, "specifying-pipeline-steps"]], "Step 10: Calibrate the Model (if needed)": [[6, "step-10-calibrate-the-model-if-needed"]], "Step 1: Import Necessary Libraries": [[6, "step-1-import-necessary-libraries"], [6, "id2"]], "Step 2: Load the Dataset": [[6, "step-2-load-the-dataset"]], "Step 2: Load the dataset, define X, y": [[6, "step-2-load-the-dataset-define-x-y"]], "Step 3: Check for zero-variance columns and drop accordingly": [[6, "step-3-check-for-zero-variance-columns-and-drop-accordingly"]], "Step 3: Create an Instance of the XGBRegressor": [[6, "step-3-create-an-instance-of-the-xgbregressor"]], "Step 4: Create an Instance of the XGBClassifier": [[6, "step-4-create-an-instance-of-the-xgbclassifier"]], "Step 4: Define Hyperparameters for XGBoost": [[6, "step-4-define-hyperparameters-for-xgboost"]], "Step 5: Define Hyperparameters for XGBoost": [[6, "step-5-define-hyperparameters-for-xgboost"]], "Step 5: Initialize and Configure the Model": [[6, "step-5-initialize-and-configure-the-model"]], "Step 6: Initialize and Configure the Model": [[6, "step-6-initialize-and-configure-the-model"]], "Step 6: Perform Grid Search Parameter Tuning and Retrieve Split Data": [[6, "step-6-perform-grid-search-parameter-tuning-and-retrieve-split-data"]], "Step 7: Fit the Model": [[6, "step-7-fit-the-model"]], "Step 7: Perform Grid Search Parameter Tuning": [[6, "step-7-perform-grid-search-parameter-tuning"]], "Step 8: Fit the Model": [[6, "step-8-fit-the-model"]], "Step 8: Return Metrics (Optional)": [[6, "step-8-return-metrics-optional"]], "Step 9: Return Metrics (Optional)": [[6, "step-9-return-metrics-optional"]], "Summary": [[1, "summary"], [6, "summary"]], "Synthetic Minority Oversampling Technique (SMOTE)": [[6, "synthetic-minority-oversampling-technique-smote"]], "Target Variable Shape and Its Effects": [[1, "target-variable-shape-and-its-effects"]], "Techniques to Address Class Imbalance": [[6, "techniques-to-address-class-imbalance"]], "Threshold-Dependent Predictions": [[1, "threshold-dependent-predictions"]], "Usage Guide": [[4, null]], "Version 0.0.010a": [[2, "version-0-0-010a"]], "Version 0.0.011a": [[2, "version-0-0-011a"]], "Version 0.0.012a": [[2, "version-0-0-012a"]], "Version 0.0.013a": [[2, "version-0-0-013a"]], "Version 0.0.014a": [[2, "version-0-0-014a"]], "Version 0.0.02a": [[2, "version-0-0-02a"]], "Version 0.0.05a": [[2, "version-0-0-05a"]], "Version 0.0.06a": [[2, "version-0-0-06a"]], "Version 0.0.07a": [[2, "version-0-0-07a"]], "Version 0.0.08a": [[2, "version-0-0-08a"]], "Version 0.0.09a": [[2, "version-0-0-09a"]], "Version 0.0.15a": [[2, "version-0-0-15a"]], "Version 0.0.16a": [[2, "version-0-0-16a"]], "Welcome to Model Tuner\u2019s Documentation!": [[3, null]], "What Does Model Tuner Offer?": [[3, "what-does-model-tuner-offer"]], "Zero Variance Columns": [[1, null]], "iPython Notebooks": [[6, null]]}, "docnames": ["about", "caveats", "changelog", "getting_started", "index", "references", "usage_guide"], "envversion": {"sphinx": 62, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1}, "filenames": ["about.rst", "caveats.rst", "changelog.rst", "getting_started.rst", "index.rst", "references.rst", "usage_guide.rst"], "indexentries": {"built-in function": [[6, "check_input_type", false], [6, "evaluate_bootstrap_metrics", false], [6, "get_feature_selection_pipeline", false], [6, "get_preprocessing_and_feature_selection_pipeline", false], [6, "get_preprocessing_pipeline", false], [6, "return_bootstrap_metrics", false], [6, "sampling_method", false]], "check_input_type()": [[6, "check_input_type", false]], "evaluate_bootstrap_metrics()": [[6, "evaluate_bootstrap_metrics", false]], "get_feature_selection_pipeline()": [[6, "get_feature_selection_pipeline", false]], "get_preprocessing_and_feature_selection_pipeline()": [[6, "get_preprocessing_and_feature_selection_pipeline", false]], "get_preprocessing_pipeline()": [[6, "get_preprocessing_pipeline", false]], "model (built-in class)": [[6, "Model", false]], "return_bootstrap_metrics()": [[6, "return_bootstrap_metrics", false]], "sampling_method()": [[6, "sampling_method", false]]}, "objects": {"": [[6, 0, 1, "", "Model"], [6, 1, 1, "", "check_input_type"], [6, 1, 1, "", "evaluate_bootstrap_metrics"], [6, 1, 1, "", "get_feature_selection_pipeline"], [6, 1, 1, "", "get_preprocessing_and_feature_selection_pipeline"], [6, 1, 1, "", "get_preprocessing_pipeline"], [6, 1, 1, "", "return_bootstrap_metrics"], [6, 1, 1, "", "sampling_method"]]}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "function", "Python function"]}, "objtypes": {"0": "py:class", "1": "py:function"}, "terms": {"": [1, 2, 4, 6], "0": [0, 1, 3, 4, 6], "00": 6, "000": 6, "0001": 6, "01": 6, "010a": 4, "011a": 4, "012a": 4, "013a": 4, "014a": 4, "017a": 3, "01it": 6, "02a": 4, "05": 6, "05a": 4, "05it": 6, "06a": 4, "07a": 4, "08a": 4, "09a": 4, "1": [2, 3, 4], "10": [0, 3, 4, 5], "100": 6, "1000": 6, "104": 6, "11": 3, "11a": 2, "12": 3, "12727322": 0, "14": 3, "15a": 4, "16628708993634742": 6, "16713189436073958": 6, "16a": [0, 4], "175": 5, "177": 6, "178": 6, "180": 6, "19": [3, 6], "1998": 5, "1d": 1, "1e": 6, "2": [3, 4], "20": 6, "200": 6, "2024": 0, "2096258193295803": 6, "20989021789332263": 6, "21": [3, 6], "222": 6, "23": 3, "24": 3, "24432": 5, "245": 6, "246": 6, "26": 3, "26315186452865597": 6, "2672762813568116": 6, "28411432705731066": 6, "3": [3, 4], "30": 6, "300": 6, "3066172248224347": 6, "315": 6, "324": 6, "34": 6, "35": 6, "358": 6, "36": 6, "3743548199982513": 6, "3830825326824073": 6, "4": [1, 3, 4], "41": 6, "42": 6, "428": 6, "47": 6, "5": [1, 3, 4], "500": [2, 6], "50it": 6, "5281": 0, "533023758436067": 6, "540": 6, "5459770114942529": 6, "5491329479768786": 6, "55": 6, "5537302816556403": 6, "58": 1, "6": [3, 4], "60it": 6, "66": 3, "67": 6, "68": 6, "69": 6, "7": [3, 4], "70": 6, "71": 6, "75": 6, "7561728395061729": 6, "7592592592592593": 6, "76": 6, "7647433075624044": 6, "7647451659057567": 6, "765": 6, "7651490279157868": 6, "77": 6, "770853": 6, "777898": 6, "78": 6, "781523": 6, "788341": 6, "7888925135381788": 6, "7888942913974833": 6, "79": 6, "792193": 6, "798785": 6, "7992275185850191": 6, "8": [3, 4], "80": 6, "8023014087345259": 6, "81": 6, "82": 6, "83": 6, "84": 6, "85": 6, "8520172219085262": 6, "86": 6, "8695652173913043": 6, "87": 6, "88": 6, "89": 6, "890": 6, "9": [1, 4], "90": 6, "900": 6, "9047619047619048": 6, "91": 6, "9134615384615384": 6, "92": 6, "928": 6, "9280033238366572": 6, "93": 6, "934576804368471": 6, "94": 6, "95": 6, "96": 6, "97": 6, "9722222222222222": 6, "98": 6, "9833333333333333": 6, "9888888888888889": 6, "99": 6, "9904166666666667": 6, "9951388888888888": 6, "999": [1, 6], "9994444444444446": 6, "A": [4, 6], "AND": 6, "By": [1, 6], "For": [1, 3, 6], "If": [1, 6], "In": [1, 2, 6], "It": [1, 3, 6], "Its": 4, "No": 6, "Not": 6, "On": 1, "One": [1, 6], "The": [1, 3, 4], "There": 2, "These": [1, 4], "To": [1, 6], "With": 1, "_": 1, "__colsample_bytre": 6, "__early_stopping_round": 6, "__eval_metr": 6, "__init__": 6, "__learning_r": 6, "__max_depth": 6, "__n_estim": 6, "__param_nam": 6, "__subsampl": 6, "__tree_method": 6, "__verbos": 6, "_confusion_matrix_print": 6, "_i": 1, "_j": 1, "_k": 1, "abil": 6, "about": 1, "abov": 6, "abram": 5, "absolut": 6, "access": [0, 6], "accompani": 6, "accordingli": 4, "account": 1, "accur": 4, "accuraci": [4, 6], "achiev": [1, 2], "acknowledg": 4, "across": [1, 2, 3, 6], "activ": 6, "actual": [1, 6], "ad": [1, 2, 6], "adasyn": [2, 3, 6], "add": 6, "addit": [1, 6], "addition": [1, 6], "address": [1, 4], "adequ": 1, "adjust": 1, "advanc": 6, "aforement": 1, "after": [1, 4], "aid": [4, 5], "aids_clinical_": 6, "aids_clinical_trials_group_study_175": 6, "aim": 6, "alex": 0, "algorithm": [1, 6], "align": 1, "all": [1, 2, 3, 6], "alloc": 6, "allow": [2, 3, 6], "along": 1, "also": [1, 6], "altern": 1, "alwai": 2, "amplifi": 1, "an": 4, "analysi": 1, "angel": 6, "ani": 1, "anoth": [1, 6], "anova": 1, "apach": 2, "appli": [1, 3, 6], "applic": [1, 6], "approach": 1, "appropri": 6, "approx": 1, "ar": [0, 1, 2, 3, 6], "arrai": [1, 6], "arthur": [0, 2], "artifici": 1, "ascii": 6, "assert": 2, "assess": [1, 3, 6], "assign": [2, 6], "assum": 1, "assumpt": 1, "attempt": 1, "attributeerror": 6, "auc": 6, "author": 0, "autokera": 2, "autokerasclassifi": 2, "automat": [1, 3, 6], "avail": [1, 2, 6], "averag": 6, "average_precis": 6, "avg": 6, "avoid": [1, 2, 6], "ax": 6, "axi": 2, "b": 1, "back": 6, "balanc": [1, 2, 3, 6], "bar": [1, 6], "base": [1, 3, 6], "bayesian": 6, "bayessearchcv": 6, "becaus": [1, 2], "becom": 1, "been": [1, 2, 6], "befor": [2, 3, 4, 6], "begin": 1, "behavior": 1, "being": [2, 6], "below": [1, 3, 6], "best": 6, "best_param": 2, "best_params_per_scor": 6, "beta": 6, "better": [1, 6], "between": [1, 3, 6], "beyond": 6, "bia": [4, 6], "bias": [1, 6], "bin": [1, 6], "binari": 4, "block": 6, "bool": 6, "boost": [2, 6], "boost_earli": 6, "bootstrap": [3, 4], "bootstrapp": [2, 6], "both": [1, 2, 6], "brier": [4, 6], "bug": 2, "bui": 0, "build": 6, "c": 1, "c5g896": 5, "c_": 1, "calcul": [4, 6], "calibr": [2, 3, 4], "calibrate_report": 6, "calibratemodel": 6, "calibration_curv": 6, "calibration_method": 6, "california": 4, "call": 6, "can": [0, 1, 3, 6], "cannot": 1, "captur": [1, 6], "care": [1, 6], "carefulli": 1, "case": [1, 2, 6], "catboost": [2, 3], "categor": 6, "categori": 6, "caus": 1, "cdot": 1, "center": 6, "challeng": [1, 6], "chang": [1, 2, 6], "changelog": 4, "char": 2, "check": [1, 4], "check_input_typ": [4, 6], "chunk": 2, "ci": 6, "cite": 4, "clariti": 6, "class": [2, 3, 4], "class_label": 6, "class_proport": 6, "class_report_test": 6, "class_report_v": 6, "classif": [1, 3, 4], "classifi": [1, 6], "classification_report": 6, "clc": 6, "clean": 2, "click": 0, "clinic": [0, 4, 5], "close": 1, "cluster": 1, "cm_test": 6, "cm_val": 6, "code": [1, 2, 6], "codebas": 0, "col": 6, "colab": 6, "color": 6, "column": [2, 4], "combin": [1, 6], "come": 1, "command": 6, "comment": 2, "common": 1, "commonli": 6, "compar": 6, "compat": 3, "complet": [1, 2], "complex": 6, "comprehens": 6, "comput": [1, 6], "concat": 2, "condit": 1, "conduct": 3, "conf_mat_class_kfold": 6, "conf_matrix": 6, "confid": 6, "configur": 4, "conflict": 1, "confus": 6, "connect": 1, "consid": [1, 2], "consist": 4, "constant": 1, "constraint": [1, 2], "construct": 1, "contain": [2, 6], "context": [1, 6], "continu": 6, "contrast": 1, "contribut": [0, 1], "contributor": 0, "convent": 6, "convers": 1, "convert": [1, 6], "correct": [1, 2], "correctli": 1, "count": [2, 6], "cpu": 6, "creat": [1, 4], "creation": [3, 4], "critic": [1, 6], "cross": [3, 4, 6], "crucial": [1, 6], "ctsi": 0, "current": [1, 3], "curs": 1, "curv": [4, 6], "custom": [2, 3, 6], "custom_scor": 6, "d": [1, 5], "d_1": 1, "d_2": 1, "d_j": 1, "d_k": 1, "data": [2, 3, 4], "dataconversionwarn": 1, "datafram": [1, 6], "dataset": [1, 3, 4], "decis": [1, 6], "decreas": 1, "def": 6, "default": [1, 6], "defin": [1, 4], "degrad": 1, "delta": 1, "demonstr": 6, "denot": 1, "depend": [2, 3, 4, 6], "deploi": 6, "deprec": 2, "depth": 6, "design": [1, 3, 6], "despit": 1, "detail": 6, "detect": 6, "determin": 1, "dev": 2, "develop": 3, "deviat": 1, "diagnosi": [1, 6], "dict": 6, "dictionari": 6, "differ": [1, 2, 3], "dimens": 1, "dimension": 1, "directli": 3, "discrep": 1, "diseas": 6, "displai": 6, "disrupt": 1, "distinct": 6, "distinguish": 6, "distort": 4, "distribut": [3, 4], "divid": 1, "divis": 1, "do": [2, 6], "document": 6, "doe": [1, 4], "doi": [0, 5], "domin": [1, 6], "dot": 1, "dr": 0, "draw": 6, "drawn": 1, "drop": [1, 4], "dtype": 6, "due": 1, "duplic": 6, "dure": [1, 2, 6], "e": [1, 6], "each": [1, 6], "earli": [2, 3, 6], "early_stop": 6, "eas": 6, "easier": 6, "easili": 6, "effect": [3, 4, 6], "either": [2, 6], "el": 5, "elimin": 3, "empir": 1, "empti": [1, 6], "enabl": [3, 6], "encount": 1, "end": 1, "engin": 1, "enhanc": 2, "ensur": [1, 2, 3, 6], "entir": 1, "enumer": 6, "equal": [1, 6], "equat": 1, "error": [1, 2, 6], "especi": 6, "essenc": 6, "essenti": [1, 6], "estat": 6, "estim": [1, 2, 3, 6], "estimator_nam": 6, "etc": [2, 6], "evalu": [1, 3, 6], "evaluate_bootstrap_metr": [2, 4, 6], "even": 1, "event": 6, "examin": 6, "exampl": 4, "exceed": 2, "except": 6, "excess": 1, "execut": 6, "exist": [1, 6], "exp": 1, "expect": [1, 6], "explain": 6, "explained_vari": 6, "explan": 1, "explicit": 6, "explicitli": 6, "express": 1, "extend": 6, "extract": [2, 4], "extrem": 1, "f": [1, 6], "f1": [1, 6], "f1_beta_tun": 6, "f1_weight": 6, "f_i": 1, "facilit": 3, "fail": 1, "failur": 1, "fair": 1, "fairli": 6, "fall": 1, "fals": [1, 6], "far": 1, "favor": [1, 2, 6], "feat_num": 1, "featur": [1, 3, 6], "feature_": 6, "feature_nam": 6, "feature_select": 6, "feature_selection_": 6, "fetch": 6, "fetch_california_h": 6, "fetch_ucirepo": 6, "figsiz": 6, "figur": 6, "file": [2, 6], "filter": 2, "find": 1, "fine": [3, 6], "first": 1, "fit": [1, 2, 4], "fix": [2, 6], "flexibl": [3, 6], "flip_i": 6, "float": 6, "fn": 6, "focu": [1, 6], "fold": [1, 3, 6], "follow": [1, 2, 3, 6], "form": 1, "format": 6, "formul": 1, "forthcom": 2, "found": 6, "fp": 6, "frac": 1, "fraction": 1, "fraud": 6, "fraudul": 6, "free": 1, "frequenc": [1, 6], "frequent": 6, "from": [2, 3, 4, 6], "full": 1, "fulli": 1, "function": [1, 2, 3, 4], "funnel": 0, "funnell_2024_12727322": 0, "g": [1, 6], "gender": 6, "gener": [1, 3, 4], "generaliz": 1, "geq": 1, "get": 6, "get_best_score_param": 6, "get_cross_valid": 6, "get_feature_selection_pipelin": [4, 6], "get_preprocessing_and_feature_selection_pipelin": [4, 6], "get_preprocessing_pipelin": [4, 6], "get_test_data": 6, "get_train_data": 6, "get_valid_data": 6, "github": 4, "given": 1, "goal": [4, 6], "googl": 6, "grid": 4, "grid_search_param_tun": 6, "ground": 6, "group": [4, 5], "guidanc": 0, "ha": [1, 2, 6], "had": 1, "hand": 1, "handl": [1, 3, 6], "happen": 2, "harmon": 1, "hat": 1, "have": [2, 6], "haven": 6, "healthcar": 6, "heavili": 1, "help": [1, 6], "helper": 4, "here": [2, 3, 6], "hi": 0, "high": 1, "higher": 3, "highli": 1, "highlight": 1, "hist": 6, "histori": 2, "hold": 1, "homogen": 1, "hous": 4, "how": 6, "howev": 1, "html": 6, "http": [0, 5], "hybrid": 6, "hyperparamet": [2, 3, 4], "i": [1, 2, 3, 6], "id": 6, "ident": 1, "identifi": 1, "ifrom": 6, "ij": 1, "illustr": 4, "imbal": [1, 4], "imbalanc": [2, 3, 4], "imbalance_sampl": 6, "imblearn": 6, "impact": 4, "implement": [2, 3, 6], "import": [1, 2, 4], "importerror": 6, "improp": 1, "improperli": 6, "improv": [3, 6], "imput": [2, 3, 4, 6], "inaccur": 1, "includ": [1, 3, 6], "incomplet": 1, "inconsist": 1, "incorrect": [1, 6], "increas": [1, 6], "index": 6, "indexerror": 6, "indic": [1, 6], "infinit": 1, "inflat": 1, "influenc": 1, "inform": [1, 6], "informat": 0, "inher": [1, 6], "init": 4, "initi": 4, "initialis": 2, "input": [1, 4], "insid": [2, 6], "instal": [4, 6], "instanc": [1, 4], "instead": [1, 2, 6], "institut": 0, "insuffici": 6, "int": 6, "int64": 6, "integr": [1, 3], "interpol": [1, 6], "interpret": 1, "interv": [1, 6], "introduc": [1, 2], "invalid": [1, 6], "invalu": 0, "involv": [1, 2], "ipython": 4, "isinst": 1, "isoton": [3, 4, 6], "issu": [1, 2, 6], "iter": 6, "its": [1, 6], "itself": 2, "j": 1, "job": 6, "joblib": 3, "jul": 0, "just": 1, "k": [1, 3, 6], "kei": [0, 1, 2, 3, 4], "keyerror": 6, "kf": 6, "kfold": [2, 6], "kfold_split": 6, "kind": 6, "known": 6, "label": [1, 3, 6], "larg": 1, "later": [1, 6], "layer": 2, "lead": [1, 6], "learn": [2, 3, 4, 5], "legend": 6, "length": 2, "leon": 2, "leonid": 0, "leq": 1, "less": 1, "let": 1, "level": 6, "leverag": 6, "li": 1, "librari": [3, 4], "licens": 2, "like": [1, 3, 6], "likelihood": 1, "limit": [2, 4], "line": [1, 2], "linear": [1, 6], "linestyl": 6, "link": [0, 6], "list": [2, 6], "ll": 1, "lo": 6, "load": 4, "log": [2, 6], "logic": 2, "logist": [4, 6], "logloss": 6, "logo": 2, "loop": 2, "loss": [1, 6], "low": [2, 6], "lower": [1, 6], "machin": [1, 3, 4, 5], "macro": 6, "mai": 1, "maintain": 1, "major": [1, 6], "make": [1, 6], "make_classif": 6, "make_classification_": 6, "manag": 4, "mani": 1, "marker": 6, "match": 1, "mathbf": 1, "mathemat": 4, "matplotlib": 6, "matric": 6, "matrix": 6, "max": 1, "maximum": [1, 6], "mean": [1, 6], "meaning": [1, 6], "measur": 1, "median": [1, 6], "medic": [0, 1], "meet": 3, "mere": 6, "messag": 6, "method": [1, 2, 3, 4], "metric": [1, 2, 3, 4], "mid": 1, "midwai": 1, "might": [1, 6], "mii": 0, "min": 1, "min_": 1, "minimum": 1, "minmax": 3, "minor": [1, 4], "misclassif": 1, "misinterpret": 1, "mislabel": 1, "mislead": 1, "mismatch": [2, 6], "miss": [1, 6], "mitig": [4, 6], "mlflow": 2, "model": 2, "model_definit": 6, "model_tun": [3, 6], "model_tuner_util": 6, "model_typ": 6, "model_xgb": 6, "modifi": 2, "modul": 6, "monoton": 1, "month": 0, "more": [1, 6], "move": 2, "msb": 1, "msw": 1, "mu": 1, "much": 1, "multi": [3, 6], "multi_label": 6, "multipl": [2, 6], "must": [1, 6], "n": 1, "n_bin": 6, "n_clusters_per_class": 6, "n_featur": 6, "n_inform": 6, "n_iter": 6, "n_j": 1, "n_job": 6, "n_redund": 6, "n_sampl": [1, 6], "n_split": 6, "name": [2, 6], "nan": [1, 6], "natur": 6, "nearest": [1, 6], "necessari": [2, 4], "need": [1, 4], "neg": [1, 6], "neighbor": [1, 6], "new": 6, "nois": [1, 6], "noisi": 1, "non": [1, 2], "none": 6, "normal": 6, "note": 1, "notebook": [2, 4], "notic": 6, "now": [1, 2], "np": [2, 6], "num_resampl": 6, "number": [1, 2, 6], "numer": 6, "numpi": [3, 6], "o": 6, "object": [2, 4], "observ": [1, 6], "occur": [2, 6], "off": 1, "offer": [4, 6], "often": [1, 6], "older": 2, "onc": 6, "one": [1, 6], "ones": 1, "onli": [1, 2, 6], "onto": 2, "oper": 1, "optim": [1, 3, 6], "optimal_threshold": 6, "option": 4, "order": [1, 2, 6], "org": [0, 5], "organ": 6, "origin": [0, 1], "other": [1, 2, 3, 6], "our": [2, 6], "out": [1, 2], "outcom": [1, 6], "output": [1, 6], "outsid": 2, "outweigh": 6, "over": 1, "overal": 1, "overfit": [1, 3, 6], "overlap": 1, "overlook": 1, "oversampl": [1, 3, 4], "p": 1, "p_1": 1, "p_2": 1, "p_i": 1, "p_n": 1, "packag": 6, "panayioti": 0, "panda": [3, 6], "parallel": 6, "param": 6, "paramet": [2, 3, 4], "parametr": 1, "part": 6, "particularli": [1, 3, 6], "pass": [1, 6], "pattern": 6, "pd": [1, 2, 6], "penal": 1, "per": [2, 6], "perfectli": [1, 6], "perform": [1, 3, 4], "petousi": 0, "pickl": 2, "piecewis": 1, "pip": [3, 6], "pip25": 2, "pipelin": [1, 2, 3, 4], "pipeline_assembli": 6, "pipeline_step": [1, 2, 6], "pipelineclass": 6, "placehold": 1, "platt": 4, "pleas": [1, 6], "plot": 6, "plt": 6, "pmatrix": 1, "po": 6, "point": [1, 6], "poor": 6, "poorli": 6, "posit": [1, 6], "possibl": [1, 6], "power": [1, 3], "ppv": 6, "practic": [1, 6], "practition": 1, "pre": 6, "precis": [1, 6], "predict": [4, 6], "predict_proba": 6, "prefix": 6, "preprocess": [1, 6], "preprocess_": 6, "preprocess_imputer_imput": 6, "preprocess_scaler_standardscalar": 6, "preprocessing_step": 6, "preprocessor": 1, "prerequisit": 4, "present": 1, "preserv": 1, "pretti": 2, "prevent": [3, 4], "previou": 2, "previous": 1, "primari": 1, "print": [2, 6], "print_pipelin": 6, "print_result": 6, "print_selected_best_featur": 6, "prior": 1, "priorit": 1, "prob_pred_calibr": 6, "prob_pred_uncalibr": 6, "prob_true_calibr": 6, "prob_true_uncalibr": 6, "probabilist": 1, "probabl": [1, 3, 6], "problem": [1, 6], "proceed": 1, "process": [1, 2, 6], "process_imbalance_sampl": 6, "produc": [1, 6], "properli": 6, "properti": 1, "proport": [1, 6], "provid": [1, 3, 6], "publish": 0, "purpos": 4, "py": [2, 6], "pypi": [2, 3], "pyplot": 6, "pyproject": 2, "python": 3, "quad": 1, "quickli": 6, "r": 6, "r2": 6, "race": 6, "rais": [1, 6], "rand_grid": 6, "random": [1, 6], "random_st": 6, "randomized_grid": 6, "randomli": 6, "randomoversampl": 6, "randomundersampl": 6, "rang": [1, 6], "rare": 6, "rate": 1, "rather": 1, "ratio": [1, 6], "raw": 1, "re": 2, "readili": 6, "readm": 2, "real": 6, "recal": [1, 6], "recommend": 1, "recurs": 3, "redfin": 6, "redistribut": 6, "reduc": [1, 6], "ref": 2, "refactor": 2, "refer": [1, 4, 6], "reflect": 1, "regard": 2, "region": 1, "regress": 4, "regression_report": 6, "regression_report_kfold": 6, "regular": 6, "relat": 2, "relationship": 1, "releas": 2, "reli": 1, "reliabl": 6, "remov": [1, 2, 6], "renam": [2, 6], "repeatedli": 1, "replac": 1, "report": [2, 4], "report_model_metr": 6, "repositori": [4, 5, 6], "repres": [1, 2], "represent": 6, "reproduc": 6, "requir": [1, 2, 3, 6], "resampl": [2, 4], "research": 6, "reset": [2, 6], "reset_estim": 6, "resolut": 2, "resourc": 6, "respect": 6, "result": 1, "retriev": 4, "return": 4, "return_bootstrap_metr": [4, 6], "return_metr": 6, "rfe": [3, 6], "rightarrow": 1, "risk": [1, 6], "rmse": 6, "robust": [3, 6], "roc": 6, "roc_auc": 6, "root": 6, "rot": 6, "rout": 6, "routin": 1, "run": 6, "runtim": 1, "runtimeerror": 6, "runtimewarn": 1, "sadr": 5, "same": [1, 2], "sampl": [2, 4, 6], "sampler": 6, "sampling_method": [4, 6], "save": 2, "scale": [2, 3, 4, 6], "scenario": 6, "scienc": 0, "scikit": 3, "scipi": 3, "score": [4, 6], "seamlessli": 6, "search": 4, "section": 6, "see": 6, "seed": 6, "segment": [1, 2], "select": [3, 6], "selectkbest": [2, 3], "self": [2, 6], "sensit": [1, 6], "separ": [1, 6], "sequenc": [1, 6], "seri": [1, 6], "set": [1, 6], "setup": 2, "sever": [1, 6], "shap": 6, "shape": [4, 6], "should": [1, 2, 6], "show": 6, "shown": 6, "shpaner": 0, "sigma": 1, "sigmoid": [3, 6], "significantli": [1, 6], "sim": 1, "similar": [1, 6], "simpl": 6, "simpleimput": [1, 3, 6], "simpli": 6, "simplifi": 2, "simultan": 2, "sinc": 1, "singl": [1, 6], "size": 6, "skew": 1, "sklearn": 6, "smote": [2, 3, 4], "smoteenn": 1, "smotetomek": 1, "so": [1, 6], "softwar": [0, 2], "solut": 4, "some": [1, 6], "sort": 6, "space": 1, "spam": 6, "special": 0, "specif": [1, 2, 6], "specifi": [1, 2, 4], "split": [1, 2, 3, 4], "sqrt": 1, "squar": [1, 6], "squeez": [1, 6], "stage": 6, "standard": [1, 6], "standardscal": [1, 6], "standardscalar": 6, "startswith": 6, "state": 1, "statist": 1, "step": [2, 4], "step_0": 6, "step_1": 6, "stop": [2, 3, 6], "store": 2, "str": 6, "strat_key_val_test": 2, "strategi": [3, 6], "stratif": [2, 4, 6], "stratifi": [1, 2, 3, 6], "stratify_col": [1, 2, 6], "stratify_i": [1, 2, 6], "stratify_kei": 2, "string": 2, "structur": 1, "struggl": 6, "studi": [4, 5], "subsampl": 6, "subsequ": 1, "subset": 1, "suit": 6, "sum": 6, "sum_": 1, "summari": 4, "supervis": 6, "support": [0, 2, 3, 6], "synthet": 4, "system": 3, "t": 6, "take": [1, 6], "taken": 2, "target": [2, 3, 4, 6], "task": [3, 6], "tau": 1, "techniqu": [3, 4], "temporarili": 2, "tend": 6, "test": [2, 6], "test_model": 6, "test_siz": 6, "text": [1, 6], "th": 1, "than": 1, "thank": 0, "thei": [1, 6], "them": [1, 6], "therefor": [1, 6], "thi": [0, 1, 2, 3, 6], "thoroughli": 6, "three": 6, "threshold": [2, 3, 4, 6], "through": 6, "thu": 6, "time": [1, 2], "titan": 6, "titl": [0, 6], "tn": 6, "toml": 2, "too": 1, "tool": 3, "top": [1, 6], "toward": 6, "tp": 6, "tqdm": 3, "track": 6, "trade": 1, "tradit": 1, "train": [3, 4, 6], "train_siz": 6, "train_val_test": 2, "train_val_test_split": [2, 6], "transact": 6, "transform": [4, 6], "translat": 0, "treat": [1, 6], "tree": 6, "trial": [4, 5], "trigger": 1, "true": [1, 6], "trust": 1, "truth": 6, "tune": [1, 2, 3, 4], "tune_threshold_fbeta": [2, 6], "tuned_paramet": 6, "tuned_parameters_xgb": 6, "tuner": 6, "two": [1, 6], "txt": 2, "type": 6, "typeerror": 6, "typic": 6, "u": 1, "uci": [5, 6], "ucimlrepo": 6, "ucla": 0, "uncalibr": 6, "undefin": 1, "under": [3, 6], "underli": 1, "underrepres": 6, "undersampl": [1, 6], "understand": [1, 6], "unequ": 6, "unexpect": 6, "uniform": 1, "uniqu": 6, "unlik": 1, "unnecessari": [1, 2, 6], "unpredict": 1, "unrealist": 1, "unreli": 1, "unseen": 1, "unsupport": 6, "unus": 2, "up": 2, "updat": 2, "upper": 6, "url": 0, "us": [1, 2, 3, 4], "usag": 2, "user": 6, "userwarn": 1, "util": [2, 6], "va": 6, "valid": [3, 4, 6], "validation_data": 6, "validation_s": 6, "valu": [1, 4], "value_count": 6, "valueerror": 6, "var": [1, 6], "variabl": [2, 3, 4, 6], "varianc": 4, "varieti": 6, "variou": [3, 6], "vdot": 1, "vector": 1, "verbos": 2, "versatil": 3, "version": [0, 3, 4], "visual": 6, "w": [1, 5], "wa": [0, 1, 2], "wai": [1, 6], "warn": 1, "we": [1, 6], "weight": [1, 6], "welcom": 4, "well": [1, 6], "were": 2, "what": 4, "when": [1, 2, 3, 6], "where": [1, 2, 6], "whether": 6, "which": [1, 3, 6], "while": [1, 6], "wide": [1, 6], "width": 6, "wish": 6, "within": [1, 6], "without": [1, 6], "work": [0, 1, 2], "workflow": [3, 6], "world": 6, "would": 1, "wrong": 2, "x": [1, 2, 4], "x_": 1, "x_i": 1, "x_j": 1, "x_test": 6, "x_train": 6, "x_valid": 6, "x_valid_test": 2, "xgb": 6, "xgb_": 6, "xgb__colsample_bytre": 6, "xgb__early_stopping_round": 6, "xgb__eval_metr": 6, "xgb__learning_r": 6, "xgb__max_depth": 6, "xgb__n_estim": 6, "xgb__subsampl": 6, "xgb__tree_method": 6, "xgb_definit": 6, "xgb_early_bootstrap_test": 2, "xgb_model": 6, "xgb_name": 6, "xgb_smote": 6, "xgbclassifi": 4, "xgbearli": 6, "xgboost": [2, 3, 4], "xgbregressor": 4, "xlabel": 6, "y": [1, 2, 4], "y_1": 1, "y_2": 1, "y_i": 1, "y_n": 1, "y_pred": 6, "y_pred_prob": 6, "y_prob_calibr": 6, "y_prob_uncalibr": 6, "y_test": 6, "y_test_pr": 6, "y_train": 6, "y_true": 6, "y_valid": 6, "y_valid_proba": 6, "y_valid_test": 2, "year": 0, "yellow": 6, "yet": 6, "ylabel": 6, "you": [0, 1, 3, 6], "your": [1, 3, 6], "z": 1, "z_": 1, "zenodo": [0, 2], "zero": 4, "zero_variance_column": [1, 6]}, "titles": ["GitHub Repository", "Zero Variance Columns", "Changelog", "Welcome to Model Tuner\u2019s Documentation!", "Model Tuner Documentation", "References", "iPython Notebooks"], "titleterms": {"": 3, "0": 2, "010a": 2, "011a": 2, "012a": 2, "013a": 2, "014a": 2, "02a": 2, "05a": 2, "06a": 2, "07a": 2, "08a": 2, "09a": 2, "1": [1, 6], "10": 6, "15a": 2, "16a": 2, "2": [1, 6], "3": [1, 6], "4": 6, "5": 6, "6": 6, "7": 6, "8": 6, "9": 6, "A": 1, "Its": 1, "The": 6, "These": 6, "about": 4, "accordingli": 6, "accur": 1, "accuraci": 1, "acknowledg": 0, "address": 6, "after": 6, "aid": 6, "an": 6, "befor": 1, "bia": 1, "binari": 6, "bootstrap": 6, "brier": 1, "calcul": 1, "calibr": [1, 6], "california": 6, "caveat": [1, 4], "changelog": 2, "check": 6, "cite": 0, "class": [1, 6], "classif": 6, "clinic": 6, "column": [1, 6], "configur": 6, "consist": 1, "creat": 6, "creation": 1, "cross": 1, "curv": 1, "data": [1, 6], "dataset": 6, "defin": 6, "depend": 1, "distort": 1, "distribut": [1, 6], "document": [3, 4], "doe": 3, "drop": 6, "effect": 1, "exampl": [1, 6], "extract": 6, "fit": 6, "from": 1, "function": 6, "gener": 6, "get": 4, "github": 0, "goal": 1, "grid": 6, "group": 6, "guid": 4, "helper": 6, "hous": 6, "hyperparamet": 6, "illustr": 1, "imbal": 6, "imbalanc": [1, 6], "impact": 1, "import": 6, "imput": 1, "init": 6, "initi": 6, "input": 6, "instal": 3, "instanc": 6, "ipython": 6, "isoton": 1, "kei": 6, "learn": [1, 6], "librari": 6, "limit": 1, "load": 6, "logist": 1, "machin": 6, "manag": 6, "mathemat": 1, "method": 6, "metric": 6, "minor": 6, "mitig": 1, "model": [0, 1, 3, 4, 6], "necessari": 6, "need": 6, "notebook": 6, "object": 6, "offer": 3, "option": 6, "oversampl": 6, "paramet": [1, 6], "perform": 6, "pipelin": 6, "platt": 1, "predict": 1, "prerequisit": 3, "prevent": 1, "purpos": 6, "refer": 5, "regress": [1, 6], "report": 6, "repositori": 0, "resampl": [1, 6], "retriev": 6, "return": 6, "sampl": 1, "scale": 1, "score": 1, "search": 6, "shape": 1, "smote": [1, 6], "solut": 1, "specifi": 6, "split": 6, "start": 4, "step": 6, "stratif": 1, "studi": 6, "summari": [1, 6], "synthet": [1, 6], "target": 1, "techniqu": [1, 6], "threshold": 1, "train": 1, "transform": 1, "trial": 6, "tune": 6, "tuner": [0, 3, 4], "us": 6, "usag": 4, "valid": 1, "valu": 6, "variabl": 1, "varianc": [1, 6], "version": 2, "welcom": 3, "what": 3, "x": 6, "xgbclassifi": 6, "xgboost": 6, "xgbregressor": 6, "y": 6, "zero": [1, 6]}}) +======= +Search.setIndex({"alltitles": {"1. Accurate Calculation of Scaling Parameters": [[1, "accurate-calculation-of-scaling-parameters"]], "2. Consistency in Data Transformation": [[1, "consistency-in-data-transformation"]], "3. Prevention of Distortion in Scaling": [[1, "prevention-of-distortion-in-scaling"]], "AIDS Clinical Trials Group Study": [[6, "aids-clinical-trials-group-study"]], "About Model Tuner": [[4, null]], "Acknowledgements": [[0, "acknowledgements"]], "Addressing Class Imbalance in Machine Learning": [[6, "addressing-class-imbalance-in-machine-learning"]], "Bias from Class Distribution": [[1, "bias-from-class-distribution"]], "Binary Classification": [[6, "binary-classification"]], "Binary Classification Examples": [[6, "binary-classification-examples"]], "Bootstrap Metrics": [[6, "bootstrap-metrics"]], "Bootstrap Metrics Example": [[6, "bootstrap-metrics-example"]], "Brier Score": [[1, "brier-score"]], "Calibration Curve": [[1, "calibration-curve"]], "California Housing with XGBoost": [[6, "california-housing-with-xgboost"]], "Caveats": [[4, null]], "Caveats in Imbalanced Learning": [[1, "caveats-in-imbalanced-learning"]], "Changelog": [[2, null]], "Citing Model Tuner": [[0, "citing-model-tuner"]], "Classification Report (Optional)": [[6, "classification-report-optional"]], "Column Stratification with Cross-Validation": [[1, "column-stratification-with-cross-validation"]], "Cross-Validation and Stratification": [[1, "cross-validation-and-stratification"]], "Define Hyperparameters for XGBoost": [[6, "define-hyperparameters-for-xgboost"]], "Define The Model object": [[6, "define-the-model-object"]], "Dependent Variable": [[1, "dependent-variable"]], "Effects on Model Training": [[1, "effects-on-model-training"]], "Example of Synthetic Sample Creation": [[1, "example-of-synthetic-sample-creation"]], "Example: Calibration in Logistic Regression": [[1, "example-calibration-in-logistic-regression"]], "Fit The Model": [[6, "fit-the-model"]], "Generating an Imbalanced Dataset": [[6, "generating-an-imbalanced-dataset"]], "Getting Started": [[4, null]], "GitHub Repository": [[0, null]], "Goal of Calibration": [[1, "goal-of-calibration"]], "Helper Functions": [[6, "helper-functions"]], "Imbalanced Learning": [[6, "imbalanced-learning"]], "Impact of Resampling Techniques": [[1, "impact-of-resampling-techniques"]], "Imputation Before Scaling": [[1, "imputation-before-scaling"]], "Initalize and Configure The Model": [[6, "initalize-and-configure-the-model"]], "Input Parameters": [[6, "input-parameters"]], "Installation": [[3, "installation"]], "Isotonic Regression": [[1, "isotonic-regression"]], "Key Methods and Functionalities": [[6, "key-methods-and-functionalities"]], "Limitations of Accuracy": [[1, "limitations-of-accuracy"]], "Mitigating the Caveats": [[1, "mitigating-the-caveats"]], "Model Calibration": [[1, "model-calibration"]], "Model Tuner Documentation": [[4, null]], "Perform Grid Search Parameter Tuning and Retrieve Split Data": [[6, "perform-grid-search-parameter-tuning-and-retrieve-split-data"]], "Platt Scaling": [[1, "platt-scaling"]], "Prerequisites": [[3, "prerequisites"]], "Purpose of Using These Techniques": [[6, "purpose-of-using-these-techniques"]], "References": [[5, null]], "Regression": [[6, "regression"]], "Regression Example": [[6, "regression-example"]], "Return Metrics (Optional)": [[6, "return-metrics-optional"]], "SMOTE: A Mathematical Illustration": [[1, "smote-a-mathematical-illustration"]], "SMOTE: Distribution of y values after resampling": [[6, "smote-distribution-of-y-values-after-resampling"]], "Solution": [[1, "solution"]], "Step 10: Calibrate the Model (if needed)": [[6, "step-10-calibrate-the-model-if-needed"]], "Step 1: Import Necessary Libraries": [[6, "step-1-import-necessary-libraries"], [6, "id2"]], "Step 2: Load the Dataset": [[6, "step-2-load-the-dataset"]], "Step 2: Load the dataset, define X, y": [[6, "step-2-load-the-dataset-define-x-y"]], "Step 3: Check for zero-variance columns and drop accordingly": [[6, "step-3-check-for-zero-variance-columns-and-drop-accordingly"]], "Step 3: Create an Instance of the XGBRegressor": [[6, "step-3-create-an-instance-of-the-xgbregressor"]], "Step 4: Create an Instance of the XGBClassifier": [[6, "step-4-create-an-instance-of-the-xgbclassifier"]], "Step 4: Define Hyperparameters for XGBoost": [[6, "step-4-define-hyperparameters-for-xgboost"]], "Step 5: Define Hyperparameters for XGBoost": [[6, "step-5-define-hyperparameters-for-xgboost"]], "Step 5: Initialize and Configure the Model": [[6, "step-5-initialize-and-configure-the-model"]], "Step 6: Initialize and Configure the Model": [[6, "step-6-initialize-and-configure-the-model"]], "Step 6: Perform Grid Search Parameter Tuning and Retrieve Split Data": [[6, "step-6-perform-grid-search-parameter-tuning-and-retrieve-split-data"]], "Step 7: Fit the Model": [[6, "step-7-fit-the-model"]], "Step 7: Perform Grid Search Parameter Tuning": [[6, "step-7-perform-grid-search-parameter-tuning"]], "Step 8: Fit the Model": [[6, "step-8-fit-the-model"]], "Step 8: Return Metrics (Optional)": [[6, "step-8-return-metrics-optional"]], "Step 9: Return Metrics (Optional)": [[6, "step-9-return-metrics-optional"]], "Summary": [[1, "summary"]], "Synthetic Minority Oversampling Technique (SMOTE)": [[6, "synthetic-minority-oversampling-technique-smote"]], "Target Variable Shape and Its Effects": [[1, "target-variable-shape-and-its-effects"]], "Techniques to Address Class Imbalance": [[6, "techniques-to-address-class-imbalance"]], "Threshold-Dependent Predictions": [[1, "threshold-dependent-predictions"]], "Usage Guide": [[4, null]], "Version 0.0.010a": [[2, "version-0-0-010a"]], "Version 0.0.011a": [[2, "version-0-0-011a"]], "Version 0.0.012a": [[2, "version-0-0-012a"]], "Version 0.0.013a": [[2, "version-0-0-013a"]], "Version 0.0.014a": [[2, "version-0-0-014a"]], "Version 0.0.02a": [[2, "version-0-0-02a"]], "Version 0.0.05a": [[2, "version-0-0-05a"]], "Version 0.0.06a": [[2, "version-0-0-06a"]], "Version 0.0.07a": [[2, "version-0-0-07a"]], "Version 0.0.08a": [[2, "version-0-0-08a"]], "Version 0.0.09a": [[2, "version-0-0-09a"]], "Version 0.0.15a": [[2, "version-0-0-15a"]], "Version 0.0.16a": [[2, "version-0-0-16a"]], "Welcome to Model Tuner\u2019s Documentation!": [[3, null]], "What Does Model Tuner Offer?": [[3, "what-does-model-tuner-offer"]], "Zero Variance Columns": [[1, null]], "iPython Notebooks": [[6, null]]}, "docnames": ["about", "caveats", "changelog", "getting_started", "index", "references", "usage_guide"], "envversion": {"sphinx": 64, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1}, "filenames": ["about.rst", "caveats.rst", "changelog.rst", "getting_started.rst", "index.rst", "references.rst", "usage_guide.rst"], "indexentries": {"built-in function": [[6, "check_input_type", false], [6, "evaluate_bootstrap_metrics", false], [6, "return_bootstrap_metrics", false], [6, "sampling_method", false]], "check_input_type()": [[6, "check_input_type", false]], "evaluate_bootstrap_metrics()": [[6, "evaluate_bootstrap_metrics", false]], "model (built-in class)": [[6, "Model", false]], "return_bootstrap_metrics()": [[6, "return_bootstrap_metrics", false]], "sampling_method()": [[6, "sampling_method", false]]}, "objects": {"": [[6, 0, 1, "", "Model"], [6, 1, 1, "", "check_input_type"], [6, 1, 1, "", "evaluate_bootstrap_metrics"], [6, 1, 1, "", "return_bootstrap_metrics"], [6, 1, 1, "", "sampling_method"]]}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "function", "Python function"]}, "objtypes": {"0": "py:class", "1": "py:function"}, "terms": {"": [1, 2, 4, 6], "0": [0, 1, 3, 4, 6], "00": 6, "000": 6, "0001": 6, "01": 6, "010a": 4, "011a": 4, "012a": 4, "013a": 4, "014a": 4, "017a": 3, "01it": [], "02a": 4, "05": 6, "05a": 4, "05it": 6, "06a": 4, "07a": 4, "08a": 4, "09a": 4, "1": [2, 3, 4], "10": [0, 3, 4, 5], "100": 6, "1000": 6, "104": 6, "105": [], "11": 3, "11a": 2, "12": 3, "12727322": 0, "14": 3, "15a": 4, "16628708993634742": 6, "16665653153377272": [], "16713189436073958": 6, "16737745981389285": [], "16a": [0, 4], "16gwnrajvpujties5y1gfrdx1soasdv6m": [], "172": [], "174": 6, "175": 5, "177": 6, "178": [], "180": 6, "19": [3, 6], "1998": 5, "1d": 1, "1e": 6, "2": [3, 4], "20": 6, "200": 6, "2024": 0, "20820269480995568": 6, "20835571676988004": 6, "2096258193295803": [], "20989021789332263": [], "21": [3, 6], "22": 6, "222": 6, "23": 3, "24": 3, "241": [], "24205514192581173": [], "24432": 5, "245": 6, "246": 6, "248": [], "26": 3, "26315186452865597": 6, "2633964855111536": [], "2672762813568116": 6, "28411432705731066": 6, "3": [3, 4], "30": 6, "300": 6, "304": [], "3066172248224347": 6, "30it": [], "315": 6, "323": [], "324": 6, "34": 6, "35": 6, "35007797000749163": [], "358": 6, "36": 6, "3743548199982513": 6, "37it": 6, "3830825326824073": 6, "3nya_tqwy6hr": [], "4": [1, 3, 4], "41": [], "42": 6, "428": 6, "45": 6, "47": [], "5": [1, 3, 4], "500": [2, 6], "50it": 6, "5132216728774747": [], "5281": 0, "533023758436067": 6, "540": 6, "5459770114942529": 6, "5469613259668509": [], "5491329479768786": 6, "55": 6, "5502958579881657": [], "5537302816556403": 6, "58": 1, "6": [3, 4], "60it": [], "66": 3, "67": 6, "68": 6, "69": 6, "7": [3, 4], "70": 6, "71": 6, "74": [], "7461300309597523": [], "75": 6, "7561728395061729": 6, "7592592592592593": 6, "76": 6, "7647433075624044": 6, "7647451659057567": 6, "765": 6, "7651490279157868": 6, "7654320987654321": [], "7692307692307693": 6, "77": 6, "770853": 6, "777898": 6, "7778980": [], "78": 6, "781523": 6, "788341": 6, "7888925135381788": 6, "7888942913974833": 6, "79": 6, "792193": 6, "7979050719771986": [], "7979060590722392": [], "7979488661159093": [], "798": [], "798785": 6, "7992275185850191": 6, "8": [3, 4], "80": 6, "8023014087345259": 6, "8025676192819657": [], "81": 6, "8169018952192892": [], "81it": [], "82": 6, "83": 6, "84": 6, "85": 6, "8520172219085262": [], "86": 6, "8636363636363636": 6, "8695652173913043": [], "87": 6, "88": 6, "89": 6, "890": 6, "8942307692307693": [], "9": [1, 4], "90": 6, "900": 6, "9047619047619048": [], "91": 6, "9134615384615384": 6, "92": 6, "926": [], "9260891500474834": [], "928": 6, "9280033238366572": 6, "93": 6, "9334649122807017": 6, "9343063541205956": [], "934576804368471": 6, "9378696741854636": 6, "94": 6, "9428571428571428": [], "95": 6, "96": 6, "9666666666666667": 6, "97": 6, "9722222222222222": [], "98": 6, "9833333333333333": 6, "9888888888888889": [], "99": 6, "9904166666666667": [], "9945833333333333": 6, "9951388888888888": [], "9955555555555555": 6, "999": [1, 6], "9990277777777777": 6, "9994444444444446": [], "A": [4, 6], "AND": 6, "By": 1, "For": [1, 3, 6], "If": [1, 6], "In": [1, 2, 6], "It": [1, 3, 6], "Its": 4, "No": 6, "Not": 6, "On": 1, "One": [1, 6], "The": [1, 3, 4], "There": 2, "These": [1, 4], "To": [1, 6], "With": 1, "_": 1, "__colsample_bytre": 6, "__early_stopping_round": 6, "__eval_metr": 6, "__init__": 6, "__learning_r": 6, "__max_depth": 6, "__n_estim": 6, "__param_nam": 6, "__subsampl": 6, "__tree_method": 6, "__verbos": 6, "_confusion_matrix_print": 6, "_i": 1, "_j": 1, "_k": 1, "_regression_exampl": [], "abil": 6, "about": 1, "abov": 6, "abram": 5, "absolut": 6, "access": [0, 6], "accompani": 6, "accordingli": 4, "account": 1, "accur": 4, "accuraci": [4, 6], "achiev": [1, 2], "acknowledg": 4, "across": [1, 2, 3, 6], "activ": 6, "actual": [1, 6], "ad": [1, 2, 6], "adasyn": [2, 3, 6], "add": 6, "addit": [1, 6], "addition": [1, 6], "address": [1, 4], "adequ": 1, "adjust": 1, "advanc": 6, "aforement": 1, "after": [1, 4], "aid": [4, 5], "aids_clinical_": 6, "aids_clinical_trials_group_study_175": 6, "aim": 6, "alex": 0, "algorithm": [1, 6], "align": 1, "all": [1, 2, 3, 6], "alloc": 6, "allow": [2, 3, 6], "along": 1, "also": [1, 6], "altern": 1, "alwai": 2, "amplifi": 1, "an": 4, "analysi": 1, "angel": 6, "ani": 1, "anoth": [1, 6], "anova": 1, "apach": 2, "appli": [1, 3, 6], "applic": [1, 6], "approach": 1, "appropri": 6, "approx": 1, "ar": [0, 1, 2, 3, 6], "arrai": [1, 6], "arthur": [0, 2], "artifici": 1, "ascii": 6, "assert": 2, "assess": [1, 3, 6], "assign": [2, 6], "assum": 1, "assumpt": 1, "attempt": 1, "attributeerror": 6, "auc": 6, "author": 0, "autokera": 2, "autokerasclassifi": 2, "automat": [1, 3], "avail": [1, 2, 6], "averag": 6, "average_precis": 6, "avg": 6, "avoid": [1, 2, 6], "ax": 6, "axi": 2, "b": 1, "back": 6, "balanc": [1, 2, 3, 6], "bar": [1, 6], "base": [1, 3, 6], "bayesian": 6, "bayessearchcv": 6, "becaus": [1, 2], "becom": 1, "been": [1, 2, 6], "befor": [2, 3, 4, 6], "begin": 1, "behavior": 1, "being": [2, 6], "below": [1, 3, 6], "best": 6, "best_param": 2, "best_params_per_scor": 6, "beta": 6, "better": [1, 6], "between": [1, 3, 6], "beyond": 6, "bia": [4, 6], "bias": [1, 6], "bin": [1, 6], "binari": 4, "block": 6, "bool": 6, "boost": [2, 6], "boost_earli": 6, "bootstrap": [3, 4], "bootstrapp": [2, 6], "both": [1, 2, 6], "breast": [], "brier": [4, 6], "bug": 2, "bui": 0, "build": 6, "c": 1, "c5g896": 5, "c_": 1, "calcul": [4, 6], "calibr": [2, 3, 4], "calibrate_report": 6, "calibratemodel": 6, "calibration_curv": 6, "calibration_method": 6, "california": 4, "california_h": [], "call": 6, "can": [0, 1, 3, 6], "cancer": [], "cannot": 1, "captur": [1, 6], "care": [1, 6], "carefulli": 1, "case": [1, 2], "catboost": [2, 3], "categor": 6, "categori": 6, "caus": 1, "cdot": 1, "center": 6, "challeng": [1, 6], "chang": [1, 2, 6], "changelog": 4, "char": 2, "check": [1, 4], "check_input_typ": [4, 6], "chunk": 2, "ci": 6, "cite": 4, "clariti": 6, "class": [2, 3, 4], "class_label": 6, "class_proport": 6, "class_report_test": 6, "class_report_v": 6, "classif": [1, 3, 4], "classifi": [1, 6], "classification_report": 6, "clc": 6, "clean": 2, "click": 0, "clinic": [0, 4, 5], "close": 1, "cluster": 1, "cm_test": 6, "cm_val": 6, "code": [1, 2, 6], "codebas": 0, "col": 6, "colab": 6, "color": 6, "column": [2, 4], "com": [], "combin": [1, 6], "come": 1, "command": 6, "comment": 2, "common": 1, "commonli": 6, "compar": 6, "compat": 3, "complet": [1, 2], "comprehens": 6, "comput": [1, 6], "concat": 2, "condit": 1, "conduct": 3, "conf_mat_class_kfold": 6, "conf_matrix": 6, "confid": 6, "configur": 4, "conflict": 1, "confus": 6, "connect": 1, "consid": [1, 2], "consist": 4, "constant": 1, "constraint": [1, 2], "construct": 1, "contain": [2, 6], "context": [1, 6], "continu": 6, "contrast": 1, "contribut": [0, 1], "contributor": 0, "convers": 1, "convert": [1, 6], "coordin": [], "correct": [1, 2], "correctli": 1, "count": [2, 6], "cpu": 6, "creat": [1, 4], "creation": [3, 4], "critic": [1, 6], "cross": [3, 4, 6], "crucial": [1, 6], "ctsi": 0, "current": [1, 3], "curs": 1, "curv": [4, 6], "custom": [2, 3, 6], "custom_scor": 6, "d": [1, 5], "d_1": 1, "d_2": 1, "d_j": 1, "d_k": 1, "data": [2, 3, 4], "dataconversionwarn": 1, "datafram": [1, 6], "dataset": [1, 3, 4], "decis": [1, 6], "decreas": 1, "default": [1, 6], "defin": [1, 4], "degrad": 1, "delta": 1, "demonstr": 6, "denot": 1, "depend": [2, 3, 4, 6], "deploi": 6, "deprec": 2, "depth": 6, "design": [1, 3, 6], "despit": 1, "detail": 6, "detect": 6, "determin": 1, "dev": 2, "develop": 3, "deviat": 1, "diagnosi": [1, 6], "dict": 6, "dictionari": 6, "differ": [1, 2, 3], "dimens": 1, "dimension": 1, "directli": 3, "discrep": 1, "diseas": 6, "displai": 6, "disrupt": 1, "distinct": 6, "distinguish": 6, "distort": 4, "distribut": [3, 4], "divid": 1, "divis": 1, "do": [2, 6], "document": 6, "doe": [1, 4], "doi": [0, 5], "domin": [1, 6], "dot": 1, "dr": 0, "draw": 6, "drawn": 1, "drive": [], "drop": [1, 4], "dtype": 6, "due": 1, "duplic": 6, "dure": [1, 2, 6], "e": [1, 6], "each": [1, 6], "earli": [2, 3, 6], "early_stop": 6, "easier": 6, "easili": 6, "effect": [3, 4, 6], "either": [2, 6], "el": 5, "elimin": 3, "empir": 1, "empti": [1, 6], "enabl": [3, 6], "encount": 1, "end": 1, "engin": 1, "enhanc": 2, "ensur": [1, 2, 3, 6], "entir": 1, "enumer": 6, "equal": [1, 6], "equat": 1, "equival": [], "error": [1, 2, 6], "especi": 6, "essenc": 6, "essenti": [1, 6], "estat": 6, "estim": [1, 2, 3, 6], "estimator_nam": 6, "estimator_name_xgb": [], "eta": [], "etc": 2, "eval_set": [], "evalu": [1, 3, 6], "evaluate_bootstrap_metr": [2, 4, 6], "even": 1, "event": 6, "examin": 6, "exampl": 4, "exceed": 2, "except": 6, "excess": 1, "execut": 6, "exist": [1, 6], "exp": 1, "expect": [1, 6], "explain": 6, "explained_vari": 6, "explan": 1, "explicit": 6, "explicitli": 6, "express": 1, "extend": 6, "extract": 2, "extrem": 1, "f": [1, 6], "f1": [1, 6], "f1_beta_tun": 6, "f1_weight": 6, "f_i": 1, "facilit": 3, "fail": 1, "failur": 1, "fair": 1, "fairli": 6, "fall": 1, "fals": [1, 6], "far": 1, "favor": [1, 2, 6], "feat_num": 1, "featur": [1, 3, 6], "feature_": 6, "feature_nam": 6, "feature_select": 6, "fetch": 6, "fetch_california_h": 6, "fetch_ucirepo": 6, "figsiz": 6, "figur": 6, "file": [2, 6], "filter": 2, "find": 1, "fine": [3, 6], "first": 1, "fit": [1, 2, 4], "fix": [2, 6], "flexibl": [3, 6], "flip_i": 6, "float": 6, "float64": [], "fn": 6, "focu": 1, "fold": [1, 3, 6], "follow": [1, 2, 3, 6], "form": 1, "format": 6, "formul": 1, "forthcom": 2, "found": 6, "fp": 6, "frac": 1, "fraction": 1, "fraud": 6, "fraudul": 6, "free": 1, "frequenc": [1, 6], "frequent": 6, "from": [2, 3, 4, 6], "full": 1, "fulli": 1, "function": [1, 2, 3, 4], "funnel": 0, "funnell_2024_12727322": 0, "g": [1, 6], "gb": [], "gender": 6, "gener": [1, 3, 4], "generaliz": 1, "geq": 1, "get": 6, "get_best_score_param": 6, "get_cross_valid": 6, "get_test_data": 6, "get_train_data": 6, "get_valid_data": 6, "github": 4, "given": 1, "goal": [4, 6], "googl": 6, "grid": 4, "grid_search_param_tun": 6, "gridsearchcv": [], "ground": 6, "group": [4, 5], "guidanc": 0, "ha": [1, 2, 6], "had": 1, "hand": 1, "handl": [1, 3, 6], "happen": 2, "harmon": 1, "hat": 1, "have": [2, 6], "haven": 6, "healthcar": 6, "heavili": 1, "help": [1, 6], "helper": 4, "here": [2, 3, 6], "hi": 0, "high": 1, "higher": 3, "highli": 1, "highlight": 1, "hist": 6, "histori": 2, "hold": 1, "homogen": 1, "hous": 4, "how": 6, "howev": 1, "html": 6, "http": [0, 5], "hybrid": 6, "hyperparamet": [2, 3, 4], "i": [1, 2, 3, 6], "id": 6, "ident": 1, "identifi": 1, "ifrom": [], "ij": 1, "illustr": 4, "imbal": [1, 4], "imbalanc": [2, 3, 4], "imbalance_sampl": 6, "imblearn": 6, "impact": 4, "implement": [2, 3, 6], "import": [1, 2, 4], "importerror": 6, "improp": 1, "improperli": 6, "improv": [3, 6], "imput": [2, 3, 4, 6], "inaccur": 1, "includ": [1, 3, 6], "incomplet": 1, "inconsist": 1, "incorrect": [1, 6], "increas": [1, 6], "index": 6, "indexerror": 6, "indic": [1, 6], "infinit": 1, "inflat": 1, "influenc": 1, "inform": [1, 6], "informat": 0, "inher": [1, 6], "init": 4, "initi": 4, "initialis": 2, "input": [1, 4], "insid": [2, 6], "instal": [4, 6], "instanc": [1, 4], "instead": [1, 2, 6], "institut": 0, "insuffici": 6, "int": 6, "int64": 6, "integr": [1, 3], "interpol": [1, 6], "interpret": 1, "interv": [1, 6], "introduc": [1, 2], "invalid": [1, 6], "invalu": 0, "involv": [1, 2], "ipython": 4, "isinst": 1, "isoton": [3, 4, 6], "issu": [1, 2, 6], "iter": 6, "its": [1, 6], "itself": 2, "j": 1, "job": 6, "joblib": 3, "jul": 0, "just": 1, "k": [1, 3, 6], "kei": [0, 1, 2, 3, 4], "keyerror": 6, "kf": 6, "kfold": [2, 6], "kfold_split": 6, "kind": 6, "known": 6, "label": [1, 3, 6], "larg": 1, "later": 1, "layer": 2, "lead": [1, 6], "learn": [2, 3, 4, 5], "legend": 6, "length": 2, "leon": 2, "leonid": 0, "leq": 1, "less": 1, "let": 1, "level": 6, "leverag": 6, "li": 1, "librari": [3, 4], "licens": 2, "like": [1, 3, 6], "likelihood": 1, "limit": [2, 4], "line": [1, 2], "linear": [1, 6], "linestyl": 6, "link": [0, 6], "list": [2, 6], "ll": 1, "lo": 6, "load": 4, "log": [2, 6], "logic": 2, "logist": [4, 6], "logloss": 6, "logo": 2, "loop": 2, "loss": [1, 6], "low": [2, 6], "lower": [1, 6], "machin": [1, 3, 4, 5], "macro": 6, "mai": 1, "maintain": 1, "major": [1, 6], "make": [1, 6], "make_classif": 6, "make_classification_": 6, "mani": 1, "marker": 6, "match": 1, "math": [], "mathbf": 1, "mathemat": 4, "mathf": [], "matplotlib": 6, "matric": 6, "matrix": 6, "max": 1, "maximum": [1, 6], "mean": [1, 6], "mean95": [], "meaning": [1, 6], "measur": 1, "median": [1, 6], "medic": [0, 1], "meet": 3, "mere": 6, "messag": 6, "method": [1, 2, 3, 4], "metric": [1, 2, 3, 4], "mid": 1, "middl": [], "midwai": 1, "might": 1, "mii": 0, "min": 1, "min_": 1, "minimum": 1, "minmax": 3, "minor": [1, 4], "misclassif": 1, "misinterpret": 1, "mislabel": 1, "mislead": 1, "mismatch": [2, 6], "miss": [1, 6], "mitig": [4, 6], "mlflow": 2, "model": 2, "model_definit": 6, "model_tun": [3, 6], "model_tuner_util": 6, "model_typ": 6, "model_xgb": 6, "modifi": 2, "modul": 6, "monoton": 1, "month": 0, "more": [1, 6], "move": 2, "msb": 1, "msw": 1, "mu": 1, "much": 1, "multi": [3, 6], "multi_label": 6, "multipl": 2, "must": [1, 6], "n": 1, "n_bin": 6, "n_clusters_per_class": 6, "n_estim": [], "n_featur": 6, "n_inform": 6, "n_iter": 6, "n_j": 1, "n_job": 6, "n_redund": 6, "n_sampl": [1, 6], "n_split": 6, "name": [2, 6], "nan": [1, 6], "natur": 6, "nearest": [1, 6], "necessari": [2, 4], "need": [1, 4], "neg": [1, 6], "neighbor": [1, 6], "new": 6, "nois": [1, 6], "noisi": 1, "non": [1, 2], "none": 6, "normal": 6, "note": 1, "notebook": [2, 4], "notic": 6, "now": [1, 2], "np": [2, 6], "num_resampl": 6, "number": [1, 2, 6], "numer": 6, "numpi": [3, 6], "o": 6, "object": [2, 4], "observ": [1, 6], "occur": [2, 6], "off": 1, "offer": 4, "often": [1, 6], "older": 2, "onc": 6, "one": [1, 6], "ones": 1, "onli": [1, 2], "onto": 2, "oper": 1, "optim": [1, 3, 6], "optimal_threshold": 6, "option": 4, "order": [1, 2], "org": [0, 5], "origin": [0, 1], "other": [1, 2, 3, 6], "our": [2, 6], "out": [1, 2], "outcom": [1, 6], "output": [1, 6], "outsid": 2, "outweigh": 6, "over": 1, "overal": 1, "overfit": [1, 3, 6], "overlap": 1, "overlook": 1, "oversampl": [1, 3, 4], "p": 1, "p_1": 1, "p_2": 1, "p_i": 1, "p_n": 1, "packag": 6, "panayioti": 0, "panda": [3, 6], "parallel": 6, "param": 6, "paramet": [2, 3, 4], "parametr": 1, "part": 6, "particularli": [1, 3, 6], "pass": [1, 6], "pattern": 6, "pd": [1, 2, 6], "penal": 1, "per": [2, 6], "perfectli": [1, 6], "perform": [1, 3, 4], "petousi": 0, "pickl": 2, "piecewis": 1, "pip": [3, 6], "pip25": 2, "pipelin": [1, 2, 3, 6], "pipeline_step": [1, 2, 6], "placehold": 1, "platt": 4, "pleas": [1, 6], "plot": 6, "plt": 6, "pmatrix": 1, "po": 6, "point": [1, 6], "poor": 6, "poorli": 6, "posit": [1, 6], "possibl": [1, 6], "power": [1, 3], "ppv": 6, "practic": [1, 6], "practition": 1, "pre": 6, "precis": [1, 6], "predict": [4, 6], "predict_proba": 6, "prefix": 6, "preprocess": [1, 6], "preprocess_imputer_imput": 6, "preprocess_scaler_standardscalar": 6, "preprocessor": 1, "prerequisit": 4, "present": 1, "preserv": 1, "pretti": 2, "prevent": [3, 4], "previou": 2, "previous": 1, "primari": 1, "print": [2, 6], "print_pipelin": 6, "print_result": 6, "print_selected_best_featur": 6, "prior": 1, "priorit": 1, "prob_pred_calibr": 6, "prob_pred_uncalibr": 6, "prob_true_calibr": 6, "prob_true_uncalibr": 6, "probabilist": 1, "probabl": [1, 3, 6], "problem": [1, 6], "proceed": 1, "process": [1, 2, 6], "process_imbalance_sampl": 6, "produc": [1, 6], "properli": 6, "properti": 1, "proport": [1, 6], "provid": [1, 3, 6], "publish": 0, "purpos": 4, "py": [2, 6], "pypi": [2, 3], "pyplot": 6, "pyproject": 2, "python": 3, "quad": 1, "quickli": 6, "r": 6, "r2": 6, "race": 6, "rais": [1, 6], "rand_grid": 6, "random": [1, 6], "random_st": 6, "randomized_grid": 6, "randomli": 6, "randomoversampl": 6, "randomundersampl": 6, "rang": [1, 6], "rare": 6, "rate": 1, "rather": 1, "ratio": [1, 6], "raw": 1, "re": 2, "readili": 6, "readm": 2, "real": 6, "recal": [1, 6], "recommend": 1, "recurs": 3, "redfin": 6, "redfin_model_xgb": [], "redistribut": 6, "reduc": [1, 6], "redund": [], "ref": 2, "refactor": 2, "refer": [1, 4, 6], "reflect": 1, "regard": 2, "region": 1, "regress": 4, "regression_exampl": [], "regression_example_": [], "regression_report": 6, "regression_report_kfold": 6, "regular": 6, "relat": 2, "relationship": 1, "releas": 2, "reli": 1, "reliabl": 6, "remov": [1, 2, 6], "renam": 2, "repeatedli": 1, "replac": 1, "report": [2, 4], "report_model_metr": 6, "repositori": [4, 5, 6], "repres": [1, 2], "represent": 6, "reproduc": 6, "requir": [1, 2, 3, 6], "resampl": [2, 4], "research": 6, "reset": [2, 6], "reset_estim": 6, "resolut": 2, "resourc": 6, "respect": 6, "result": 1, "retriev": 4, "return": 4, "return_bootstrap_metr": [4, 6], "return_metr": 6, "rfe": 3, "rightarrow": 1, "risk": [1, 6], "rmse": 6, "robust": [3, 6], "roc": 6, "roc_auc": 6, "root": 6, "rot": 6, "rout": 6, "routin": 1, "run": 6, "runtim": 1, "runtimeerror": 6, "runtimewarn": 1, "sadr": 5, "same": [1, 2], "sampl": [2, 4, 6], "sampler": 6, "sampling_method": [4, 6], "save": 2, "scale": [2, 3, 4], "scenario": 6, "scienc": 0, "scikit": 3, "scipi": 3, "score": [4, 6], "scrollto": [], "seamlessli": 6, "search": 4, "section": 6, "see": 6, "seed": 6, "segment": [1, 2], "select": [3, 6], "selectkbest": [2, 3], "self": [2, 6], "sensit": [1, 6], "separ": [1, 6], "sequenc": 1, "seri": [1, 6], "set": [1, 6], "setup": 2, "sever": [1, 6], "shape": [4, 6], "should": [1, 2, 6], "show": 6, "shown": 6, "shpaner": 0, "sigma": 1, "sigmoid": [3, 6], "significantli": [1, 6], "sim": 1, "similar": [1, 6], "simpl": 6, "simpleimput": [1, 3, 6], "simpli": 6, "simplifi": 2, "simultan": 2, "sinc": 1, "singl": [1, 6], "size": 6, "skew": 1, "sklearn": 6, "smote": [2, 3, 4], "smoteenn": 1, "smotetomek": 1, "so": [1, 6], "softwar": [0, 2], "solut": 4, "some": 1, "space": 1, "spam": 6, "special": 0, "specif": [1, 2, 6], "specifi": [1, 2, 6], "split": [1, 2, 3, 4], "sqrt": 1, "squar": [1, 6], "squeez": [1, 6], "stage": 6, "standard": [1, 6], "standardscal": [1, 6], "standardscalar": 6, "state": 1, "statist": 1, "step": [2, 4], "stop": [2, 3, 6], "store": 2, "str": 6, "strat_key_val_test": 2, "strategi": [3, 6], "stratif": [2, 4, 6], "stratifi": [1, 2, 3, 6], "stratify_col": [1, 2, 6], "stratify_i": [1, 2, 6], "stratify_kei": 2, "string": 2, "structur": 1, "struggl": 6, "studi": [4, 5], "subsampl": 6, "subsequ": 1, "subset": 1, "suit": 6, "sum": 6, "sum_": 1, "summari": 4, "supervis": 6, "support": [0, 2, 3, 6], "synthet": 4, "system": 3, "t": 6, "take": [1, 6], "taken": 2, "target": [2, 3, 4, 6], "task": [3, 6], "tau": 1, "techniqu": [3, 4], "temporarili": 2, "tend": 6, "test": [2, 6], "test_model": 6, "test_siz": 6, "text": [1, 6], "th": 1, "than": 1, "thank": 0, "thei": [1, 6], "them": [1, 6], "therefor": [1, 6], "thi": [0, 1, 2, 3, 6], "thoroughli": 6, "threshold": [2, 3, 4, 6], "through": 6, "thu": 6, "time": [1, 2], "titan": 6, "titl": [0, 6], "tn": 6, "toml": 2, "too": 1, "tool": 3, "top": [1, 6], "total": [], "toward": 6, "tp": 6, "tqdm": 3, "track": 6, "trade": 1, "tradit": 1, "train": [3, 4, 6], "train_siz": 6, "train_val_test": 2, "train_val_test_split": [2, 6], "transact": 6, "transform": [4, 6], "translat": 0, "treat": [1, 6], "tree": 6, "trial": [4, 5], "trigger": 1, "true": [1, 6], "trust": 1, "truth": 6, "tune": [1, 2, 3, 4], "tune_threshold_fbeta": [2, 6], "tuned_paramet": 6, "tuned_parameters_xgb": 6, "tuner": 6, "two": [1, 6], "txt": 2, "type": 6, "typeerror": 6, "typic": 6, "u": 1, "uci": [5, 6], "ucimlrepo": 6, "ucla": 0, "uncalibr": 6, "undefin": 1, "under": 3, "underli": 1, "underrepres": 6, "undersampl": [1, 6], "understand": [1, 6], "unequ": 6, "unexpect": 6, "uniform": 1, "uniqu": 6, "unlik": 1, "unnecessari": [1, 2, 6], "unpredict": 1, "unrealist": 1, "unreli": 1, "unseen": 1, "unsupport": 6, "unus": 2, "up": 2, "updat": 2, "upper": 6, "url": 0, "us": [1, 2, 3, 4], "usag": 2, "user": 6, "userwarn": 1, "util": [2, 6], "va": 6, "valid": [3, 4, 6], "validation_data": 6, "validation_s": 6, "valu": [1, 4], "value_count": 6, "valueerror": 6, "var": [1, 6], "variabl": [2, 3, 4, 6], "varianc": 4, "varieti": 6, "variou": [3, 6], "vdot": 1, "vector": 1, "verbos": 2, "versatil": 3, "version": [0, 3, 4], "visual": 6, "w": [1, 5], "wa": [0, 1, 2], "wai": 1, "warn": 1, "we": [1, 6], "weight": [1, 6], "welcom": 4, "well": [1, 6], "were": 2, "what": 4, "when": [1, 2, 3, 6], "where": [1, 2, 6], "whether": 6, "which": [1, 3, 6], "while": [1, 6], "wide": [1, 6], "width": 6, "wish": 6, "within": [1, 6], "without": [1, 6], "work": [0, 1, 2], "workflow": [3, 6], "world": 6, "would": 1, "wrong": 2, "x": [1, 2, 4], "x_": 1, "x_i": 1, "x_j": 1, "x_synthet": [], "x_test": 6, "x_train": 6, "x_valid": 6, "x_valid_test": 2, "xgb": 6, "xgb_": 6, "xgb__colsample_bytre": 6, "xgb__early_stopping_round": 6, "xgb__eval_metr": 6, "xgb__learning_r": 6, "xgb__max_depth": 6, "xgb__n_estim": 6, "xgb__subsampl": 6, "xgb__tree_method": 6, "xgb__verbos": [], "xgb_colsample_bytre": [], "xgb_definit": 6, "xgb_early_bootstrap_test": 2, "xgb_early_stopping_round": [], "xgb_eval_metr": [], "xgb_learning_r": [], "xgb_max_depth": [], "xgb_model": 6, "xgb_n_estim": [], "xgb_name": 6, "xgb_paramet": [], "xgb_smote": 6, "xgb_subsampl": [], "xgb_verbos": [], "xgbclassifi": 4, "xgbearli": 6, "xgboost": [2, 3, 4], "xgboost_earli": [], "xgbregressor": 4, "xlabel": 6, "y": [1, 2, 4], "y_1": 1, "y_2": 1, "y_i": 1, "y_n": 1, "y_pred": 6, "y_pred_prob": 6, "y_prob_calibr": 6, "y_prob_uncalibr": 6, "y_test": 6, "y_test_pr": 6, "y_train": 6, "y_true": 6, "y_valid": 6, "y_valid_proba": 6, "y_valid_test": 2, "year": 0, "yellow": 6, "yet": 6, "ylabel": 6, "you": [0, 1, 3, 6], "your": [1, 3, 6], "z": 1, "z_": 1, "zenodo": [0, 2], "zero": 4, "zero_variance_column": [1, 6]}, "titles": ["GitHub Repository", "Zero Variance Columns", "Changelog", "Welcome to Model Tuner\u2019s Documentation!", "Model Tuner Documentation", "References", "iPython Notebooks"], "titleterms": {"": 3, "0": 2, "010a": 2, "011a": 2, "012a": 2, "013a": 2, "014a": 2, "02a": 2, "05a": 2, "06a": 2, "07a": 2, "08a": 2, "09a": 2, "1": [1, 6], "10": 6, "15a": 2, "16a": 2, "2": [1, 6], "3": [1, 6], "4": 6, "5": 6, "6": 6, "7": 6, "8": 6, "9": 6, "A": 1, "Its": 1, "The": 6, "These": 6, "about": 4, "accordingli": 6, "accur": 1, "accuraci": 1, "acknowledg": 0, "address": 6, "after": 6, "aid": 6, "an": 6, "applic": [], "befor": 1, "bia": 1, "binari": 6, "bootstrap": 6, "bootstrapp": [], "brier": 1, "calcul": 1, "calibr": [1, 6], "california": 6, "caveat": [1, 4], "changelog": 2, "check": 6, "cite": 0, "class": [1, 6], "classif": 6, "clinic": 6, "column": [1, 6], "configur": 6, "consist": 1, "creat": 6, "creation": 1, "cross": 1, "curv": 1, "data": [1, 6], "dataset": 6, "defin": 6, "depend": 1, "distort": 1, "distribut": [1, 6], "document": [3, 4], "doe": 3, "drop": 6, "effect": 1, "exampl": [1, 6], "fit": 6, "from": 1, "function": 6, "gener": 6, "get": 4, "github": 0, "goal": 1, "grid": 6, "group": 6, "guid": 4, "helper": 6, "hous": 6, "hyperparamet": 6, "illustr": 1, "imbal": 6, "imbalanc": [1, 6], "impact": 1, "import": 6, "imput": 1, "init": 6, "initi": 6, "input": 6, "instal": 3, "instanc": 6, "ipython": 6, "isoton": 1, "kei": 6, "learn": [1, 6], "librari": 6, "limit": 1, "load": 6, "logist": 1, "machin": 6, "mathemat": 1, "method": 6, "metric": 6, "minor": 6, "mitig": 1, "model": [0, 1, 3, 4, 6], "necessari": 6, "need": 6, "notebook": 6, "object": 6, "offer": 3, "option": 6, "oversampl": 6, "paramet": [1, 6], "perform": 6, "platt": 1, "predict": 1, "prerequisit": 3, "prevent": 1, "purpos": 6, "refer": 5, "regress": [1, 6], "report": 6, "repositori": 0, "resampl": [1, 6], "retriev": 6, "return": 6, "sampl": 1, "scale": 1, "score": 1, "search": 6, "shape": 1, "smote": [1, 6], "solut": 1, "split": 6, "start": 4, "step": 6, "stratif": 1, "studi": 6, "summari": 1, "synthet": [1, 6], "target": 1, "techniqu": [1, 6], "threshold": 1, "train": 1, "transform": 1, "trial": 6, "tune": 6, "tuner": [0, 3, 4], "us": 6, "usag": 4, "valid": 1, "valu": 6, "variabl": 1, "varianc": [1, 6], "version": 2, "welcom": 3, "what": 3, "x": 6, "xgbclassifi": 6, "xgboost": 6, "xgbregressor": 6, "y": 6, "zero": [1, 6]}}) +>>>>>>> c6b188187e841f10a1f9bbd52c95e5fc1cbd90e5 diff --git a/docs/usage_guide.html b/docs/usage_guide.html index 838c8ca..56c4472 100644 --- a/docs/usage_guide.html +++ b/docs/usage_guide.html @@ -609,10 +609,18 @@

Step 9: Return Metrics (Optional)
# ------------------------- VALID AND TEST METRICS -----------------------------
 
 print("Validation Metrics")
-class_report_val, cm_val = model_xgb.return_metrics(X_valid, y_valid, optimal_threshold=True)
+class_report_val, cm_val = model_xgb.return_metrics(
+   X_valid,
+   y_valid,
+   optimal_threshold=True,
+)
 print()
 print("Test Metrics")
-class_report_test, cm_test = model_xgb.return_metrics(X_test, y_test, optimal_threshold=True)
+class_report_test, cm_test = model_xgb.return_metrics(
+   X_test,
+   y_test,
+   optimal_threshold=True,
+)
 

Validation Metrics
@@ -680,22 +688,22 @@ 

Step 10: Calibrate the Model (if needed)
import matplotlib.pyplot as plt
 from sklearn.calibration import calibration_curve
 
-# Get the predicted probabilities for the validation data from the uncalibrated model
+## Get the predicted probabilities for the validation data from uncalibrated model
 y_prob_uncalibrated = model_xgb.predict_proba(X_test)[:, 1]
 
-# Compute the calibration curve for the uncalibrated model
+## Compute the calibration curve for the uncalibrated model
 prob_true_uncalibrated, prob_pred_uncalibrated = calibration_curve(
    y_test,
    y_prob_uncalibrated,
-   n_bins=6,
+   n_bins=10,
 )
 
-# Calibrate the model
+## Calibrate the model
 if model_xgb.calibrate:
-model_xgb.calibrateModel(X, y, score="roc_auc")
+   model_xgb.calibrateModel(X, y, score="roc_auc")
 
-# Predict on the validation set
-y_test_pred = model_xgb.predict_proba(X_test)[:,1]
+## Predict on the validation set
+y_test_pred = model_xgb.predict_proba(X_test)[:, 1]
 

-
# Get the predicted probabilities for the validation data from calibrated model
+
## Get the predicted probabilities for the validation data from calibrated model
 y_prob_calibrated = model_xgb.predict_proba(X_test)[:, 1]
 
-# Compute the calibration curve for the calibrated model
+## Compute the calibration curve for the calibrated model
 prob_true_calibrated, prob_pred_calibrated = calibration_curve(
-y_test,
-y_prob_calibrated,
-n_bins=6,
+   y_test,
+   y_prob_calibrated,
+   n_bins=10,
 )
 
 
-# Plot the calibration curves
+## Plot the calibration curves
 plt.figure(figsize=(5, 5))
 plt.plot(
-prob_pred_uncalibrated,
-prob_true_uncalibrated,
-marker="o",
-label="Uncalibrated XGBoost",
+   prob_pred_uncalibrated,
+   prob_true_uncalibrated,
+   marker="o",
+   label="Uncalibrated XGBoost",
 )
 plt.plot(
-prob_pred_calibrated,
-prob_true_calibrated,
-marker="o",
-label="Calibrated XGBoost",
+   prob_pred_calibrated,
+   prob_true_calibrated,
+   marker="o",
+   label="Calibrated XGBoost",
 )
 plt.plot(
-[0, 1],
-[0, 1],
-linestyle="--",
-label="Perfectly calibrated",
+   [0, 1],
+   [0, 1],
+   linestyle="--",
+   label="Perfectly calibrated",
 )
 plt.xlabel("Predicted probability")
 plt.ylabel("True probability in each bin")
@@ -810,7 +818,11 @@ 

Generating an Imbalanced Datasetflip_y=0: No label noise is added to the target variable.

  • random_state=42: Ensures reproducibility by using a fixed random seed.

  • -
    X, y = make_classification(
    +
    import pandas as pd
    +import numpy as np
    +from sklearn.datasets import make_classification
    +
    +X, y = make_classification(
        n_samples=1000,
        n_features=20,
        n_informative=2,
    @@ -831,7 +843,9 @@ 

    Generating an Imbalanced Dataset
    ## Create a bar plot
    +
    import matplotlib.pyplot as plt
    +
    +## Create a bar plot
     value_counts = pd.Series(y).value_counts()
     ax = value_counts.plot(
        kind="bar",
    @@ -864,7 +878,9 @@ 

    Generating an Imbalanced Dataset

    Define Hyperparameters for XGBoost

    Below, we will use an XGBoost classifier with the following hyperparameters:

    -
    xgb_name = "xgb"
    +
    from xgboost import XGBClassifier
    +
    +xgb_name = "xgb"
     xgb = XGBClassifier(
        random_state=222,
     )
    @@ -956,14 +972,13 @@ 

    Initalize and Configure The Modelimbalance_sampler=SMOTE() as a necessary step of activating this imbalanced sampler.

    -
    xgb_smote = Model(
    +
    from model_tuner import Model
    +
    +xgb_smote = Model(
        name=f"Make_Classification_{model_type}",
        estimator_name=estimator_name,
        calibrate=calibrate,
    -   pipeline_steps=[
    -      ("Imputer", SimpleImputer()),
    -      ("StandardScalar", StandardScaler()),
    -   ],
    +   model_type="classification",
        estimator=clc,
        kfold=kfold,
        stratify_y=True,
    @@ -993,44 +1008,32 @@ 

    Perform Grid Search Parameter Tuning and Retrieve Split Data
    Pipeline Steps:
    -========================
    -┌────────────────────────────────────────────┐
    -│ Step 1: preprocess_imputer_Imputer         │
    -│ SimpleImputer                              │
    -└────────────────────────────────────────────┘
    -                     │
    -                     ▼
    -┌────────────────────────────────────────────┐
    -│ Step 2: preprocess_scaler_StandardScalar   │
    -│ StandardScaler                             │
    -└────────────────────────────────────────────┘
    -                     │
    -                     ▼
    -┌────────────────────────────────────────────┐
    -│ Step 3: resampler                          │
    -│ SMOTE                                      │
    -└────────────────────────────────────────────┘
    -                     │
    -                     ▼
    -┌────────────────────────────────────────────┐
    -│ Step 4: xgb                                │
    -│ XGBClassifier                              │
    -└────────────────────────────────────────────┘
    +
    +┌─────────────────────┐
    +│ Step 1: resampler   │
    +│ SMOTE               │
    +└─────────────────────┘
    +         │
    +         ▼
    +┌─────────────────────┐
    +│ Step 2: xgb         │
    +│ XGBClassifier       │
    +└─────────────────────┘
     
     Distribution of y values after resampling: target
     0         540
     1         540
     Name: count, dtype: int64
     
    -100%|██████████| 5/5 [00:47<00:00,  9.41s/it]
    +100%|██████████| 5/5 [00:34<00:00,  6.87s/it]
     Fitting model with best params and tuning for best threshold ...
    -100%|██████████| 2/2 [00:00<00:00,  4.01it/s]Best score/param set found on validation set:
    +100%|██████████| 2/2 [00:00<00:00,  4.37it/s]Best score/param set found on validation set:
     {'params': {'xgb__early_stopping_rounds': 100,
                 'xgb__eval_metric': 'logloss',
                 'xgb__learning_rate': 0.0001,
    -            'xgb__max_depth': 3,
    +            'xgb__max_depth': 10,
                 'xgb__n_estimators': 999},
    -'score': 0.9994444444444446}
    +'score': 0.9990277777777777}
     Best roc_auc: 0.999
     

    @@ -1052,49 +1055,32 @@

    Fit The Model

    Return Metrics (Optional)

    -
    # ------------------------- VALID AND TEST METRICS -----------------------------
    -
    -print("Validation Metrics")
    -class_report_val, cm_val = xgb_smote.return_metrics(
    -   X_valid,
    -   y_valid,
    -   optimal_threshold=True,
    -)
    -print()
    -print("Test Metrics")
    -class_report_test, cm_test = xgb_smote.return_metrics(
    -   X_test,
    -   y_test,
    -   optimal_threshold=True,
    -)
    -
    -
    Validation Metrics
     Confusion matrix on set provided:
     --------------------------------------------------------------------------------
              Predicted:
    -            Pos   Neg
    +             Pos   Neg
     --------------------------------------------------------------------------------
     Actual: Pos  20 (tp)    0 (fn)
    -        Neg   3 (fp)  177 (tn)
    +        Neg   6 (fp)  174 (tn)
     --------------------------------------------------------------------------------
     --------------------------------------------------------------------------------
    -{'AUC ROC': 0.9904166666666667,
    -'Average Precision': 0.8520172219085262,
    -'Brier Score': 0.2096258193295803,
    -'Precision/PPV': 0.8695652173913043,
    +{'AUC ROC': 0.9955555555555555,
    +'Average Precision': 0.9378696741854636,
    +'Brier Score': 0.20835571676988004,
    +'Precision/PPV': 0.7692307692307693,
     'Sensitivity': 1.0,
    -'Specificity': 0.9833333333333333}
    +'Specificity': 0.9666666666666667}
     --------------------------------------------------------------------------------
     
                 precision    recall  f1-score   support
     
    -         0       1.00      0.98      0.99       180
    -         1       0.87      1.00      0.93        20
    +         0       1.00      0.97      0.98       180
    +         1       0.77      1.00      0.87        20
     
    -   accuracy                          0.98       200
    -   macro avg     0.93      0.99      0.96       200
    -weighted avg     0.99      0.98      0.99       200
    +   accuracy                          0.97       200
    +   macro avg     0.88      0.98      0.93       200
    +weighted avg     0.98      0.97      0.97       200
     
     --------------------------------------------------------------------------------
     
    @@ -1102,28 +1088,28 @@ 

    Return Metrics (Optional) matrix on set provided: -------------------------------------------------------------------------------- Predicted: - Pos Neg + Pos Neg -------------------------------------------------------------------------------- Actual: Pos 19 (tp) 1 (fn) - Neg 2 (fp) 178 (tn) + Neg 3 (fp) 177 (tn) -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- -{'AUC ROC': 0.9951388888888888, -'Average Precision': 0.9722222222222222, -'Brier Score': 0.20989021789332263, -'Precision/PPV': 0.9047619047619048, +{'AUC ROC': 0.9945833333333333, +'Average Precision': 0.9334649122807017, +'Brier Score': 0.20820269480995568, +'Precision/PPV': 0.8636363636363636, 'Sensitivity': 0.95, -'Specificity': 0.9888888888888889} +'Specificity': 0.9833333333333333} -------------------------------------------------------------------------------- precision recall f1-score support - 0 0.99 0.99 0.99 180 - 1 0.90 0.95 0.93 20 + 0 0.99 0.98 0.99 180 + 1 0.86 0.95 0.90 20 accuracy 0.98 200 - macro avg 0.95 0.97 0.96 200 -weighted avg 0.99 0.98 0.99 200 + macro avg 0.93 0.97 0.95 200 +weighted avg 0.98 0.98 0.98 200 --------------------------------------------------------------------------------

    @@ -1141,7 +1127,7 @@

    California Housing with XGBoost

    import pandas as pd
     import numpy as np
    -ifrom xgboost import XGBRegressor
    +from xgboost import XGBRegressor
     from sklearn.impute import SimpleImputer
     from sklearn.datasets import fetch_california_housing
     from model_tuner import Model
    @@ -1220,7 +1206,7 @@ 

    Step 5: Initialize and Configure the calibrate=calibrate, estimator=clc, kfold=kfold, - stratify_y=None, + stratify_y=False, grid=tuned_parameters, randomized_grid=rand_grid, boost_early=early_stop, @@ -1241,13 +1227,13 @@

    Step 6: Perform Grid Search Parameter Tuning and Retrieve Split Data

    Pipeline Steps:
    -========================
    +
     ┌────────────────┐
     │ Step 1: xgb    │
     │ XGBRegressor   │
     └────────────────┘
     
    -100%|██████████| 9/9 [00:05<00:00,  1.60it/s]Best score/param set found on validation set:
    +100%|██████████| 9/9 [00:22<00:00,  2.45s/it]Best score/param set found on validation set:
     {'params': {'xgb__colsample_bytree': 0.8,
                 'xgb__early_stopping_rounds': 10,
                 'xgb__eval_metric': 'logloss',
    @@ -1263,7 +1249,11 @@ 

    Step 6: Perform Grid Search Parameter Tuning and Retrieve Split Data

    Step 7: Fit the Model

    -
    model_xgb.fit(X_train, y_train, validation_data=[X_valid, y_valid])
    +
    model_xgb.fit(
    +   X_train,
    +   y_train,
    +   validation_data=[X_valid, y_valid],
    +)
     
    @@ -1418,7 +1408,7 @@

    Bootstrap Metrics

    Bootstrap Metrics Example

    -

    Continuing from the model output object (model_xgb) from the regression example above, we leverage the return_bootstrap_metrics method from model_tuner_utils.py to print bootstrap performance metrics (\(R^2\) and explained_variance) at 95% confidence levels as shown below:

    +

    Continuing from the model output object (model_xgb) from the regression example above, we leverage the return_bootstrap_metrics method from model_tuner_utils.py to print bootstrap performance metrics (\(R^2\) and \(\text{explained variance}\)) at 95% confidence levels as shown below:

    print("Bootstrap Metrics")
     
     model_xgb.return_bootstrap_metrics(
    diff --git a/source/usage_guide.rst b/source/usage_guide.rst
    index 274de22..544710d 100644
    --- a/source/usage_guide.rst
    +++ b/source/usage_guide.rst
    @@ -531,10 +531,18 @@ You can use this function to evaluate the model by printing the output.
        # ------------------------- VALID AND TEST METRICS -----------------------------
     
        print("Validation Metrics")
    -   class_report_val, cm_val = model_xgb.return_metrics(X_valid, y_valid, optimal_threshold=True)
    +   class_report_val, cm_val = model_xgb.return_metrics(
    +      X_valid,
    +      y_valid,
    +      optimal_threshold=True,
    +   )
        print()
        print("Test Metrics")
    -   class_report_test, cm_test = model_xgb.return_metrics(X_test, y_test, optimal_threshold=True)
    +   class_report_test, cm_test = model_xgb.return_metrics(
    +      X_test,
    +      y_test,
    +      optimal_threshold=True,
    +   )
     
     .. code-block:: bash
     
    @@ -604,22 +612,22 @@ Step 10: Calibrate the Model (if needed)
        import matplotlib.pyplot as plt
        from sklearn.calibration import calibration_curve
     
    -   # Get the predicted probabilities for the validation data from the uncalibrated model
    +   ## Get the predicted probabilities for the validation data from uncalibrated model
        y_prob_uncalibrated = model_xgb.predict_proba(X_test)[:, 1]
     
    -   # Compute the calibration curve for the uncalibrated model
    +   ## Compute the calibration curve for the uncalibrated model
        prob_true_uncalibrated, prob_pred_uncalibrated = calibration_curve(
           y_test,
           y_prob_uncalibrated,
    -      n_bins=6,
    +      n_bins=10,
        )
     
    -   # Calibrate the model
    +   ## Calibrate the model
        if model_xgb.calibrate:
    -   model_xgb.calibrateModel(X, y, score="roc_auc")
    +      model_xgb.calibrateModel(X, y, score="roc_auc")
     
    -   # Predict on the validation set
    -   y_test_pred = model_xgb.predict_proba(X_test)[:,1]
    +   ## Predict on the validation set
    +   y_test_pred = model_xgb.predict_proba(X_test)[:, 1]
     
     
     .. code-block:: bash
    @@ -651,36 +659,36 @@ Step 10: Calibrate the Model (if needed)
     
     .. code-block:: python
     
    -   # Get the predicted probabilities for the validation data from calibrated model
    +   ## Get the predicted probabilities for the validation data from calibrated model
        y_prob_calibrated = model_xgb.predict_proba(X_test)[:, 1]
     
    -   # Compute the calibration curve for the calibrated model
    +   ## Compute the calibration curve for the calibrated model
        prob_true_calibrated, prob_pred_calibrated = calibration_curve(
    -   y_test,
    -   y_prob_calibrated,
    -   n_bins=6,
    +      y_test,
    +      y_prob_calibrated,
    +      n_bins=10,
        )
     
     
    -   # Plot the calibration curves
    +   ## Plot the calibration curves
        plt.figure(figsize=(5, 5))
        plt.plot(
    -   prob_pred_uncalibrated,
    -   prob_true_uncalibrated,
    -   marker="o",
    -   label="Uncalibrated XGBoost",
    +      prob_pred_uncalibrated,
    +      prob_true_uncalibrated,
    +      marker="o",
    +      label="Uncalibrated XGBoost",
        )
        plt.plot(
    -   prob_pred_calibrated,
    -   prob_true_calibrated,
    -   marker="o",
    -   label="Calibrated XGBoost",
    +      prob_pred_calibrated,
    +      prob_true_calibrated,
    +      marker="o",
    +      label="Calibrated XGBoost",
        )
        plt.plot(
    -   [0, 1],
    -   [0, 1],
    -   linestyle="--",
    -   label="Perfectly calibrated",
    +      [0, 1],
    +      [0, 1],
    +      linestyle="--",
    +      label="Perfectly calibrated",
        )
        plt.xlabel("Predicted probability")
        plt.ylabel("True probability in each bin")
    @@ -688,7 +696,6 @@ Step 10: Calibrate the Model (if needed)
        plt.legend()
        plt.show()
     
    -
     .. raw:: html
     
        
    @@ -762,6 +769,10 @@ parameters are specified: .. code-block:: python + import pandas as pd + import numpy as np + from sklearn.datasets import make_classification + X, y = make_classification( n_samples=1000, n_features=20, @@ -786,6 +797,8 @@ Below, you will see that the dataset we have generated is severely imbalanced wi .. code-block:: python + import matplotlib.pyplot as plt + ## Create a bar plot value_counts = pd.Series(y).value_counts() ax = value_counts.plot( @@ -838,6 +851,8 @@ Below, we will use an XGBoost classifier with the following hyperparameters: .. code-block:: python + from xgboost import XGBClassifier + xgb_name = "xgb" xgb = XGBClassifier( random_state=222, @@ -937,14 +952,13 @@ Initalize and Configure The Model .. code-block:: python + from model_tuner import Model + xgb_smote = Model( name=f"Make_Classification_{model_type}", estimator_name=estimator_name, calibrate=calibrate, - pipeline_steps=[ - ("Imputer", SimpleImputer()), - ("StandardScalar", StandardScaler()), - ], + model_type="classification", estimator=clc, kfold=kfold, stratify_y=True, @@ -977,44 +991,32 @@ Perform Grid Search Parameter Tuning and Retrieve Split Data .. code-block:: bash Pipeline Steps: - ======================== - ┌────────────────────────────────────────────┐ - │ Step 1: preprocess_imputer_Imputer │ - │ SimpleImputer │ - └────────────────────────────────────────────┘ - │ - ▼ - ┌────────────────────────────────────────────┐ - │ Step 2: preprocess_scaler_StandardScalar │ - │ StandardScaler │ - └────────────────────────────────────────────┘ - │ - ▼ - ┌────────────────────────────────────────────┐ - │ Step 3: resampler │ - │ SMOTE │ - └────────────────────────────────────────────┘ - │ - ▼ - ┌────────────────────────────────────────────┐ - │ Step 4: xgb │ - │ XGBClassifier │ - └────────────────────────────────────────────┘ + + ┌─────────────────────┐ + │ Step 1: resampler │ + │ SMOTE │ + └─────────────────────┘ + │ + ▼ + ┌─────────────────────┐ + │ Step 2: xgb │ + │ XGBClassifier │ + └─────────────────────┘ Distribution of y values after resampling: target 0 540 1 540 Name: count, dtype: int64 - 100%|██████████| 5/5 [00:47<00:00, 9.41s/it] + 100%|██████████| 5/5 [00:34<00:00, 6.87s/it] Fitting model with best params and tuning for best threshold ... - 100%|██████████| 2/2 [00:00<00:00, 4.01it/s]Best score/param set found on validation set: + 100%|██████████| 2/2 [00:00<00:00, 4.37it/s]Best score/param set found on validation set: {'params': {'xgb__early_stopping_rounds': 100, 'xgb__eval_metric': 'logloss', 'xgb__learning_rate': 0.0001, - 'xgb__max_depth': 3, + 'xgb__max_depth': 10, 'xgb__n_estimators': 999}, - 'score': 0.9994444444444446} + 'score': 0.9990277777777777} Best roc_auc: 0.999 SMOTE: Distribution of y values after resampling @@ -1037,52 +1039,34 @@ Fit The Model Return Metrics (Optional) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. code-block:: python - - # ------------------------- VALID AND TEST METRICS ----------------------------- - - print("Validation Metrics") - class_report_val, cm_val = xgb_smote.return_metrics( - X_valid, - y_valid, - optimal_threshold=True, - ) - print() - print("Test Metrics") - class_report_test, cm_test = xgb_smote.return_metrics( - X_test, - y_test, - optimal_threshold=True, - ) - .. code-block:: bash Validation Metrics Confusion matrix on set provided: -------------------------------------------------------------------------------- Predicted: - Pos Neg + Pos Neg -------------------------------------------------------------------------------- Actual: Pos 20 (tp) 0 (fn) - Neg 3 (fp) 177 (tn) + Neg 6 (fp) 174 (tn) -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- - {'AUC ROC': 0.9904166666666667, - 'Average Precision': 0.8520172219085262, - 'Brier Score': 0.2096258193295803, - 'Precision/PPV': 0.8695652173913043, + {'AUC ROC': 0.9955555555555555, + 'Average Precision': 0.9378696741854636, + 'Brier Score': 0.20835571676988004, + 'Precision/PPV': 0.7692307692307693, 'Sensitivity': 1.0, - 'Specificity': 0.9833333333333333} + 'Specificity': 0.9666666666666667} -------------------------------------------------------------------------------- precision recall f1-score support - 0 1.00 0.98 0.99 180 - 1 0.87 1.00 0.93 20 + 0 1.00 0.97 0.98 180 + 1 0.77 1.00 0.87 20 - accuracy 0.98 200 - macro avg 0.93 0.99 0.96 200 - weighted avg 0.99 0.98 0.99 200 + accuracy 0.97 200 + macro avg 0.88 0.98 0.93 200 + weighted avg 0.98 0.97 0.97 200 -------------------------------------------------------------------------------- @@ -1090,31 +1074,30 @@ Return Metrics (Optional) Confusion matrix on set provided: -------------------------------------------------------------------------------- Predicted: - Pos Neg + Pos Neg -------------------------------------------------------------------------------- Actual: Pos 19 (tp) 1 (fn) - Neg 2 (fp) 178 (tn) + Neg 3 (fp) 177 (tn) -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- - {'AUC ROC': 0.9951388888888888, - 'Average Precision': 0.9722222222222222, - 'Brier Score': 0.20989021789332263, - 'Precision/PPV': 0.9047619047619048, + {'AUC ROC': 0.9945833333333333, + 'Average Precision': 0.9334649122807017, + 'Brier Score': 0.20820269480995568, + 'Precision/PPV': 0.8636363636363636, 'Sensitivity': 0.95, - 'Specificity': 0.9888888888888889} + 'Specificity': 0.9833333333333333} -------------------------------------------------------------------------------- precision recall f1-score support - 0 0.99 0.99 0.99 180 - 1 0.90 0.95 0.93 20 + 0 0.99 0.98 0.99 180 + 1 0.86 0.95 0.90 20 accuracy 0.98 200 - macro avg 0.95 0.97 0.96 200 - weighted avg 0.99 0.98 0.99 200 + macro avg 0.93 0.97 0.95 200 + weighted avg 0.98 0.98 0.98 200 -------------------------------------------------------------------------------- - .. _Regression: Regression @@ -1132,7 +1115,7 @@ Step 1: Import Necessary Libraries import pandas as pd import numpy as np - ifrom xgboost import XGBRegressor + from xgboost import XGBRegressor from sklearn.impute import SimpleImputer from sklearn.datasets import fetch_california_housing from model_tuner import Model @@ -1219,7 +1202,7 @@ when using ``XGBRegressor``. calibrate=calibrate, estimator=clc, kfold=kfold, - stratify_y=None, + stratify_y=False, grid=tuned_parameters, randomized_grid=rand_grid, boost_early=early_stop, @@ -1243,13 +1226,13 @@ Step 6: Perform Grid Search Parameter Tuning and Retrieve Split Data .. code-block:: bash Pipeline Steps: - ======================== + ┌────────────────┐ │ Step 1: xgb │ │ XGBRegressor │ └────────────────┘ - 100%|██████████| 9/9 [00:05<00:00, 1.60it/s]Best score/param set found on validation set: + 100%|██████████| 9/9 [00:22<00:00, 2.45s/it]Best score/param set found on validation set: {'params': {'xgb__colsample_bytree': 0.8, 'xgb__early_stopping_rounds': 10, 'xgb__eval_metric': 'logloss', @@ -1259,7 +1242,7 @@ Step 6: Perform Grid Search Parameter Tuning and Retrieve Split Data 'xgb__subsample': 0.8, 'xgb__tree_method': 'hist'}, 'score': 0.7651490279157868} - Best r2: 0.765 + Best r2: 0.765 Step 7: Fit the Model @@ -1267,7 +1250,11 @@ Step 7: Fit the Model .. code-block:: python - model_xgb.fit(X_train, y_train, validation_data=[X_valid, y_valid]) + model_xgb.fit( + X_train, + y_train, + validation_data=[X_valid, y_valid], + ) Step 8: Return Metrics (Optional) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1398,7 +1385,7 @@ The ``bootstrapper.py`` module provides utility functions for input type checkin Bootstrap Metrics Example ----------------------------- -Continuing from the model output object (``model_xgb``) from the :ref:`regression example ` above, we leverage the ``return_bootstrap_metrics`` method from ``model_tuner_utils.py`` to print bootstrap performance metrics (:math:`R^2` and `explained_variance`) at 95% confidence levels as shown below: +Continuing from the model output object (``model_xgb``) from the :ref:`regression example ` above, we leverage the ``return_bootstrap_metrics`` method from ``model_tuner_utils.py`` to print bootstrap performance metrics (:math:`R^2` and :math:`\text{explained variance}`) at 95% confidence levels as shown below: .. code-block:: python