COINtoolbox · sidi-elwely · Jun 17, 2024 · Jun 17, 2024 · Jun 18, 2024 · Jun 19, 2024
diff --git a/README.md b/README.md
@@ -54,3 +54,33 @@ In order to install this code you should clone this repository and do::
     (ActSNClass) >> pip install -r requirements
     (ActSNClass) >> python setup.py install
 
+
+# Mlflow Integration 
+
+I have successfully integrated MLflow for tracking and model registry in the project. This integration is implemented in the classifier.py file.
+
+### Function Name:
+
+- mlflow_tracking_And_Registry: this function handles the tracking of model metrics, parameters, and artifacts with MLflow, and registers the trained model in the MLflow Model Registry.
+
+
+### Enabling Mlflow
+
+To utilize MLflow for tracking and model registry, simply set the mlflow parameter to True in the random_forest function, which is located in the classifier.py file.
+
+- Example Usage: random_forest( Parameters, mlflow=True)
+
+
+### Accessing the Mlflow User Interface 
+
+Once you have run the code with MLflow enabled, you can access the MLflow user interface to monitor your experiments and models.
+
+- Start the MLflow UI by running the following command in your terminal::
+
+     (ActSNClass) >> mlflow ui 
+
+- Visit the MLflow UI by opening your web browser and navigating to: http://127.0.0.1:5000 
+
+### Reusing Models from MLflow Registry
+
+To download and reuse models that have been registered in the MLflow Model Registry, you can refer to the example provided in the managemodel.py file located in the actsnclass directory
diff --git a/actsnclass/classifiers.py b/actsnclass/classifiers.py
@@ -16,15 +16,105 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__all__ = ['random_forest']
+__all__ = ['mlflow_tracking_And_Registry','random_forest']
 
 import numpy as np
 from sklearn.ensemble import RandomForestClassifier
+import mlflow
+import mlflow.sklearn
+import pandas as pd
+from mlflow.tracking import MlflowClient
+from datetime import datetime
+
+
+def mlflow_tracking_And_Registry(clf, train_features):
+
+    """Integrates MLflow tracking and registry features for machine learning model management.
+
+    This function sets up an MLflow experiment, logs various components including model parameters,
+    training data, and registers the model in MLflow's model registry. It handles creating unique
+    identifiers for experiments and models based on training size and current date.
+
+    Parameters
+    ----------
+    clf : estimator
+        A fitted model or classifier.
+    train_features : np.array
+        Features of the training data used to fit the model.
+
+    Notes
+    -----
+    - Requires an active MLflow environment.
+    - The function assumes that `mlflow` and its related functions are properly configured.
+
+    Outputs
+    -------
+    - Logs model parameters, training data sample, and registers the model with a unique name.
+    - Saves the training data sample to a CSV file.
+    - The model is registered under a generated name, combining the experiment's name with the current date.
+
+    Examples
+    --------
+    >>> from sklearn.ensemble import RandomForestClassifier
+    >>> clf = RandomForestClassifier(n_estimators=100)
+    >>> train_features = np.random.rand(100, 4)  # 100 samples, 4 features each
+    >>> mlflow_tracking_And_Registry(clf, train_features)
+    """
+
+    # Set the MLflow experiment
+    mlflow.set_experiment("Random_Forest_Experiment")
+
+    # Enable autolog
+    mlflow.sklearn.autolog()
+
+    train_size = train_features.shape[0]
+
+    # Generate the run name with the train size
+    run_name = f"Train_size_{train_size}"
+
+    # Get the current date in day-month-year format
+    current_date = datetime.now().strftime("%d-%m-%Y")
+
+    # Generate the model name with the current date
+    model_name = f"Random_Forest_Experiment_{current_date}"
+
+    # Start a new run
+    with mlflow.start_run(run_name=run_name) as run:
+
+        # Log the model parameters
+        params = clf.get_params()
+        for param_name, param_value in params.items():
+            mlflow.log_param(param_name, param_value)
+
+        train_sample = pd.DataFrame(train_features)
+        train_sample_file = f"train_sample.csv"
+
+        train_sample.to_csv(train_sample_file, index=False)
+
+        # Log the training Data
+        mlflow.log_artifact(train_sample_file)
+
+
+        # Log the model with the generated name
+        mlflow.sklearn.log_model(clf, model_name)
+
+
+        # Register the model in the Model Registry
+        model_uri = f"runs:/{run.info.run_id}/{model_name}"
+        registered_model = mlflow.register_model(model_uri, model_name)
+
+        # Add a description of the model version if needed
+        client = MlflowClient()
+        client.update_model_version(
+            name=model_name,
+            version=registered_model.version,
+            description="Random Forest model registered on " + current_date
+        )
 
 
 def random_forest(train_features:  np.array, train_labels: np.array,
                   test_features: np.array, nest=1000, seed=42, max_depth=None,
-                  n_jobs=1):
+                  n_jobs=1, mlflow=False):
     """Random Forest classifier.
 
     Parameters
@@ -61,6 +151,11 @@ def random_forest(train_features:  np.array, train_labels: np.array,
     predictions = clf.predict(test_features)                # predict
     prob = clf.predict_proba(test_features)       # get probabilities
 
+
+    if(mlflow):
+    	# Call mlflow_tracking_And_Registry function to handle MLflow logging
+    	mlflow_tracking_And_Registry(clf, train_features)
+
     return predictions, prob
 
 

diff --git a/actsnclass/database.py b/actsnclass/database.py
@@ -86,7 +86,7 @@ class DataBase:
         Save current metrics to file.
     save_queried_sample(queried_sample_file: str, loop: int, full_sample: str)
         Save queried sample to file.
-
+    
     Examples
     --------
     >>> from actsnclass import DataBase

diff --git a/actsnclass/learn_loop.py b/actsnclass/learn_loop.py
@@ -73,7 +73,7 @@ def learn_loop(nloops: int, strategy: str, path_to_features: str,
 
         # classify
         data.classify(method=classifier)
-
+        
         # calculate metrics
         data.evaluate_classification()
 

diff --git a/actsnclass/managemodel.py b/actsnclass/managemodel.py
@@ -0,0 +1,29 @@
+from mlflow.tracking import MlflowClient
+import mlflow.pyfunc
+import numpy as np
+import pandas as pd
+
+client = MlflowClient()
+
+"""# Transitionner la version 1 de "RandomForestModel" à "Staging"
+client.transition_model_version_stage(
+    name="Random_Forest_Experiment_18-06-2024",
+    version=2,
+    stage="Staging"
+) """
+
+# Charger la version 1 du modèle en stage "Staging"
+model = mlflow.pyfunc.load_model(model_uri="models:/Random_Forest_Experiment_Vi_19-06-2024/5")
+
+
+data = np.random.rand(100, 12)
+
+feature_names = [f"feature_{i+1}" for i in range(12)]
+df = pd.DataFrame(data, columns=feature_names)
+
+X_test = df.values
+predictions = model.predict(X_test)
+print(X_test)
+print(predictions)
+
+