From 08c465fead87f97466e7a966538ff1c01f4744f4 Mon Sep 17 00:00:00 2001
From: davidusb-geek <davidusb@gmail.com>
Date: Thu, 6 Feb 2025 00:12:24 +0100
Subject: [PATCH] A first batch of fixes

---
 docs/_static/css/custom.css                   |  1 -
 scripts/load_clustering.py                    | 87 +------------------
 scripts/load_forecast_sklearn.py              | 13 +--
 scripts/optim_results_analysis.py             |  7 +-
 .../save_pvlib_module_inverter_database.py    | 13 +--
 scripts/special_config_analysis.py            |  9 +-
 scripts/use_cases_analysis.py                 | 13 +--
 src/emhass/command_line.py                    | 41 +++++----
 8 files changed, 53 insertions(+), 131 deletions(-)

diff --git a/docs/_static/css/custom.css b/docs/_static/css/custom.css
index cc8b6fc7..15ff6818 100644
--- a/docs/_static/css/custom.css
+++ b/docs/_static/css/custom.css
@@ -32,7 +32,6 @@
 
 div {
   border-radius: 7px;
-  box-align: start !important;
   align-items: start;
   text-align: left;
 }
diff --git a/scripts/load_clustering.py b/scripts/load_clustering.py
index ff921649..25c09382 100644
--- a/scripts/load_clustering.py
+++ b/scripts/load_clustering.py
@@ -23,9 +23,6 @@
 pio.renderers.default = "browser"
 pd.options.plotting.backend = "plotly"
 
-# from skopt.space import Categorical, Real, Integer
-# from tslearn.clustering import TimeSeriesKMeans
-
 # the root folder
 root = pathlib.Path(str(get_root(__file__, num_parent=2)))
 emhass_conf = {}
@@ -191,86 +188,4 @@ def load_forecast(data, forecast_date, freq, template):
 
     # Call the forecasting method
     data.columns = ["load"]
-    forecast, used_days = load_forecast(data, forecast_date, freq, template)
-
-    # data_lag = pd.concat([data, data.shift()], axis=1)
-    # data_lag.columns = ["power_load y(t)", "power_load y(t+1)"]
-    # data_lag = data_lag.dropna()
-
-    # fig2 = data_lag.plot.scatter(
-    #     x="power_load y(t)", y="power_load y(t+1)", c="DarkBlue"
-    # )
-    # fig2.layout.template = template
-    # fig2.show()
-
-    # Elbow method to check how many clusters
-    # distortions = []
-    # K = range(1,12)
-
-    # for cluster_size in K:
-    #     kmeans = KMeans(n_clusters=cluster_size, init='k-means++')
-    #     kmeans = kmeans.fit(data_lag)
-    #     distortions.append(kmeans.inertia_)
-
-    # df = pd.DataFrame({'Clusters': K, 'Distortions': distortions})
-    # fig = (px.line(df, x='Clusters', y='Distortions', template=template)).update_traces(mode='lines+markers')
-    # fig.show()
-
-    # The silouhette method
-    # silhouette_scores = []
-    # K = range(2, 12)
-
-    # for cluster_size in K:
-    #     kmeans = KMeans(n_clusters=cluster_size, init="k-means++", random_state=200)
-    #     labels = kmeans.fit(data_lag).labels_
-    #     silhouette_score_tmp = silhouette_score(
-    #         data_lag, labels, metric="euclidean", sample_size=1000, random_state=200
-    #     )
-    #     silhouette_scores.append(silhouette_score_tmp)
-
-    # df = pd.DataFrame({"Clusters": K, "Silhouette Score": silhouette_scores})
-    # fig = (
-    #     px.line(df, x="Clusters", y="Silhouette Score", template=template)
-    # ).update_traces(mode="lines+markers")
-    # fig.show()
-
-    # The clustering
-    # kmeans = KMeans(n_clusters=6, init="k-means++")
-    # kmeans = kmeans.fit(data_lag)
-    # data_lag["cluster_group"] = kmeans.labels_
-
-    # fig = px.scatter(
-    #     data_lag,
-    #     x="power_load y(t)",
-    #     y="power_load y(t+1)",
-    #     color="cluster_group",
-    #     template=template,
-    # )
-    # fig.show()
-
-    # km = TimeSeriesKMeans(n_clusters=6, verbose=True, random_state=200)
-    # y_pred = km.fit_predict(data_lag)
-    # data_lag["cluster_group_tslearn_euclidean"] = y_pred
-
-    # fig = px.scatter(
-    #     data_lag,
-    #     x="power_load y(t)",
-    #     y="power_load y(t+1)",
-    #     color="cluster_group_tslearn_euclidean",
-    #     template=template,
-    # )
-    # fig.show()
-
-    # dba_km = TimeSeriesKMeans(n_clusters=6, n_init=2, metric="dtw", verbose=True, max_iter_barycenter=10, random_state=200)
-    # y_pred = dba_km.fit_predict(data_lag)
-    # data_lag['cluster_group_tslearn_dba'] = y_pred
-
-    # fig = px.scatter(data_lag, x='power_load y(t)', y='power_load y(t+1)', color='cluster_group_tslearn_dba', template=template)
-    # fig.show()
-
-    # sdtw_km = TimeSeriesKMeans(n_clusters=6, metric="softdtw", metric_params={"gamma": .01}, verbose=True, random_state=200)
-    # y_pred = sdtw_km.fit_predict(data_lag)
-    # data_lag['cluster_group_tslearn_sdtw'] = y_pred
-
-    # fig = px.scatter(data_lag, x='power_load y(t)', y='power_load y(t+1)', color='cluster_group_tslearn_sdtw', template=template)
-    # fig.show()
+    forecast, used_days = load_forecast(data, forecast_date, freq, template)
\ No newline at end of file
diff --git a/scripts/load_forecast_sklearn.py b/scripts/load_forecast_sklearn.py
index df9a2aaa..3dc2a5e3 100644
--- a/scripts/load_forecast_sklearn.py
+++ b/scripts/load_forecast_sklearn.py
@@ -108,10 +108,11 @@ def neg_r2_score(y_true, y_pred):
 
         data = copy.deepcopy(rh.df_final)
 
+    y_axis_title = "Power (W)"
     logger.info(data.describe())
     fig = data.plot()
     fig.layout.template = template
-    fig.update_yaxes(title_text="Power (W)")
+    fig.update_yaxes(title_text=y_axis_title)
     fig.update_xaxes(title_text="Time")
     fig.show()
     fig.write_image(
@@ -169,7 +170,7 @@ def neg_r2_score(y_true, y_pred):
     df["pred"] = predictions
     fig = df.plot()
     fig.layout.template = template
-    fig.update_yaxes(title_text="Power (W)")
+    fig.update_yaxes(title_text=y_axis_title)
     fig.update_xaxes(title_text="Time")
     fig.update_xaxes(range=[date_train + pd.Timedelta("10days"), data_exo.index[-1]])
     fig.show()
@@ -200,7 +201,7 @@ def neg_r2_score(y_true, y_pred):
     df["pred"] = predictions_backtest
     fig = df.plot()
     fig.layout.template = template
-    fig.update_yaxes(title_text="Power (W)")
+    fig.update_yaxes(title_text=y_axis_title)
     fig.update_xaxes(title_text="Time")
     fig.show()
     fig.write_image(
@@ -273,7 +274,7 @@ def search_space(trial):
     df["pred_optim"] = predictions_loaded
     fig = df.plot()
     fig.layout.template = template
-    fig.update_yaxes(title_text="Power (W)")
+    fig.update_yaxes(title_text=y_axis_title)
     fig.update_xaxes(title_text="Time")
     fig.update_xaxes(range=[date_train + pd.Timedelta("10days"), data_exo.index[-1]])
     fig.show()
@@ -326,7 +327,7 @@ def search_space(trial):
 
     # Let's perform a naive load forecast for comparison
     retrieve_hass_conf, optim_conf, plant_conf = get_yaml_parse(
-        emhass_conf, use_secrets=True
+        params, logger
     )
     fcst = Forecast(
         retrieve_hass_conf, optim_conf, plant_conf, params, emhass_conf, logger
@@ -365,7 +366,7 @@ def search_space(trial):
     df["pred_prod"] = predictions_prod
     fig = df.plot()
     fig.layout.template = template
-    fig.update_yaxes(title_text="Power (W)")
+    fig.update_yaxes(title_text=y_axis_title)
     fig.update_xaxes(title_text="Time")
     fig.show()
     fig.write_image(
diff --git a/scripts/optim_results_analysis.py b/scripts/optim_results_analysis.py
index 80451180..363831ba 100644
--- a/scripts/optim_results_analysis.py
+++ b/scripts/optim_results_analysis.py
@@ -135,6 +135,7 @@ def get_forecast_optim_objects(
     template = "presentation"
 
     # Let's plot the input data
+    y_axis_title = "Power (W)"
     fig_inputs1 = df_input_data[
         [
             retrieve_hass_conf["sensor_power_photovoltaics"],
@@ -142,7 +143,7 @@ def get_forecast_optim_objects(
         ]
     ].plot()
     fig_inputs1.layout.template = template
-    fig_inputs1.update_yaxes(title_text="Powers (W)")
+    fig_inputs1.update_yaxes(title_text=y_axis_title)
     fig_inputs1.update_xaxes(title_text="Time")
     if show_figures:
         fig_inputs1.show()
@@ -155,7 +156,7 @@ def get_forecast_optim_objects(
 
     fig_inputs_dah = df_input_data_dayahead.plot()
     fig_inputs_dah.layout.template = template
-    fig_inputs_dah.update_yaxes(title_text="Powers (W)")
+    fig_inputs_dah.update_yaxes(title_text=y_axis_title)
     fig_inputs_dah.update_xaxes(title_text="Time")
     if show_figures:
         fig_inputs_dah.show()
@@ -192,7 +193,7 @@ def get_forecast_optim_objects(
         ]
     ].plot()  # 'P_def_start_0', 'P_def_start_1', 'P_def_bin2_0', 'P_def_bin2_1'
     fig_res_dah.layout.template = template
-    fig_res_dah.update_yaxes(title_text="Powers (W)")
+    fig_res_dah.update_yaxes(title_text=y_axis_title)
     fig_res_dah.update_xaxes(title_text="Time")
     # if show_figures:
     fig_res_dah.show()
diff --git a/scripts/save_pvlib_module_inverter_database.py b/scripts/save_pvlib_module_inverter_database.py
index 17133318..a9b50734 100644
--- a/scripts/save_pvlib_module_inverter_database.py
+++ b/scripts/save_pvlib_module_inverter_database.py
@@ -58,8 +58,9 @@
     cec_inverters_emhass = pvlib.pvsystem.retrieve_sam(
         path=str(emhass_conf["data_path"] / "emhass_inverters.csv")
     )
-    logger.info("=================")
-    logger.info("=================")
+    strait_str = "================="
+    logger.info(strait_str)
+    logger.info(strait_str)
 
     logger.info("Updating and saving databases")
 
@@ -91,8 +92,8 @@
             tablefmt="psql",
         )
     )
-    logger.info("=================")
-    logger.info("=================")
+    logger.info(strait_str)
+    logger.info(strait_str)
 
     # Inverters
     cols_to_keep_inverters = [
@@ -124,8 +125,8 @@
             tablefmt="psql",
         )
     )
-    logger.info("=================")
-    logger.info("=================")
+    logger.info(strait_str)
+    logger.info(strait_str)
     logger.info("Modules databases")
     print(tabulate(cec_modules.head(20).iloc[:, :5], headers="keys", tablefmt="psql"))
     logger.info("Inverters databases")
diff --git a/scripts/special_config_analysis.py b/scripts/special_config_analysis.py
index 0b344d37..335dcd76 100644
--- a/scripts/special_config_analysis.py
+++ b/scripts/special_config_analysis.py
@@ -282,6 +282,7 @@ def get_forecast_optim_objects(
             )
 
     template = "presentation"
+    y_axis_title = "Power (W)"
 
     # Let's plot the input data
     fig_inputs1 = df_input_data[
@@ -291,7 +292,7 @@ def get_forecast_optim_objects(
         ]
     ].plot()
     fig_inputs1.layout.template = template
-    fig_inputs1.update_yaxes(title_text="Powers (W)")
+    fig_inputs1.update_yaxes(title_text=y_axis_title)
     fig_inputs1.update_xaxes(title_text="Time")
     fig_inputs1.show()
 
@@ -303,7 +304,7 @@ def get_forecast_optim_objects(
 
     fig_inputs_dah = df_input_data_dayahead.plot()
     fig_inputs_dah.layout.template = template
-    fig_inputs_dah.update_yaxes(title_text="Powers (W)")
+    fig_inputs_dah.update_yaxes(title_text=y_axis_title)
     fig_inputs_dah.update_xaxes(title_text="Time")
     fig_inputs_dah.show()
 
@@ -313,7 +314,7 @@ def get_forecast_optim_objects(
     opt_res_dah = opt.perform_dayahead_forecast_optim(df_input_data_dayahead, P_PV_forecast, P_load_forecast)
     fig_res_dah = opt_res_dah[['P_deferrable0', 'P_deferrable1', 'P_grid']].plot()
     fig_res_dah.layout.template = template
-    fig_res_dah.update_yaxes(title_text = "Powers (W)")
+    fig_res_dah.update_yaxes(title_text = y_axis_title)
     fig_res_dah.update_xaxes(title_text = "Time")
     fig_res_dah.show()"""
 
@@ -351,6 +352,6 @@ def get_forecast_optim_objects(
     )
     fig_res_mpc = opt_res_dayahead[["P_batt", "P_grid"]].plot()
     fig_res_mpc.layout.template = template
-    fig_res_mpc.update_yaxes(title_text="Powers (W)")
+    fig_res_mpc.update_yaxes(title_text=y_axis_title)
     fig_res_mpc.update_xaxes(title_text="Time")
     fig_res_mpc.show()
diff --git a/scripts/use_cases_analysis.py b/scripts/use_cases_analysis.py
index a19adc96..f9a651e5 100644
--- a/scripts/use_cases_analysis.py
+++ b/scripts/use_cases_analysis.py
@@ -119,6 +119,7 @@ def get_forecast_optim_objects(
     df_input_data = fcst.get_prod_price_forecast(df_input_data)
 
     template = "presentation"
+    y_axis_title = "Power (W)"
 
     # Let's plot the input data
     fig_inputs1 = df_input_data[
@@ -128,7 +129,7 @@ def get_forecast_optim_objects(
         ]
     ].plot()
     fig_inputs1.layout.template = template
-    fig_inputs1.update_yaxes(title_text="Powers (W)")
+    fig_inputs1.update_yaxes(title_text=y_axis_title)
     fig_inputs1.update_xaxes(title_text="Time")
     fig_inputs1.show()
     if save_figures:
@@ -152,7 +153,7 @@ def get_forecast_optim_objects(
 
     fig_inputs_dah = df_input_data_dayahead.plot()
     fig_inputs_dah.layout.template = template
-    fig_inputs_dah.update_yaxes(title_text="Powers (W)")
+    fig_inputs_dah.update_yaxes(title_text=y_axis_title)
     fig_inputs_dah.update_xaxes(title_text="Time")
     fig_inputs_dah.show()
     if save_figures:
@@ -166,7 +167,7 @@ def get_forecast_optim_objects(
     opt_res = opt.perform_perfect_forecast_optim(df_input_data, days_list)
     fig_res = opt_res[["P_deferrable0", "P_deferrable1", "P_grid"]].plot()
     fig_res.layout.template = template
-    fig_res.update_yaxes(title_text="Powers (W)")
+    fig_res.update_yaxes(title_text=y_axis_title)
     fig_res.update_xaxes(title_text="Time")
     fig_res.show()
     if save_figures:
@@ -190,7 +191,7 @@ def get_forecast_optim_objects(
     )
     fig_res_dah = opt_res_dah[["P_deferrable0", "P_deferrable1", "P_grid"]].plot()
     fig_res_dah.layout.template = template
-    fig_res_dah.update_yaxes(title_text="Powers (W)")
+    fig_res_dah.update_yaxes(title_text=y_axis_title)
     fig_res_dah.update_xaxes(title_text="Time")
     fig_res_dah.show()
     if save_figures:
@@ -220,7 +221,7 @@ def get_forecast_optim_objects(
     )
     fig_res_dah = opt_res_dah[["P_deferrable0", "P_deferrable1", "P_grid"]].plot()
     fig_res_dah.layout.template = template
-    fig_res_dah.update_yaxes(title_text="Powers (W)")
+    fig_res_dah.update_yaxes(title_text=y_axis_title)
     fig_res_dah.update_xaxes(title_text="Time")
     fig_res_dah.show()
     if save_figures:
@@ -253,7 +254,7 @@ def get_forecast_optim_objects(
         ["P_deferrable0", "P_deferrable1", "P_grid", "P_batt"]
     ].plot()
     fig_res_dah.layout.template = template
-    fig_res_dah.update_yaxes(title_text="Powers (W)")
+    fig_res_dah.update_yaxes(title_text=y_axis_title)
     fig_res_dah.update_xaxes(title_text="Time")
     fig_res_dah.show()
     if save_figures:
diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index 3c46333c..cfa79e18 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -24,6 +24,9 @@
 from emhass.optimization import Optimization
 from emhass.retrieve_hass import RetrieveHass
 
+default_csv_filename = "opt_res_latest.csv"
+default_pkl_suffix = "_mlf.pkl"
+default_metadata_json = "metadata.json"
 
 def set_input_data_dict(
     emhass_conf: dict,
@@ -94,8 +97,9 @@ def set_input_data_dict(
     )
 
     # Retrieve basic configuration data from hass
+    test_df_literal = "test_df_final.pkl"
     if get_data_from_file:
-        with open(emhass_conf["data_path"] / "test_df_final.pkl", "rb") as inp:
+        with open(emhass_conf["data_path"] / test_df_literal, "rb") as inp:
             _, _, _, rh.ha_config = pickle.load(inp)
     else:
         response = rh.get_ha_config()
@@ -133,7 +137,7 @@ def set_input_data_dict(
     if set_type == "perfect-optim":
         # Retrieve data from hass
         if get_data_from_file:
-            with open(emhass_conf["data_path"] / "test_df_final.pkl", "rb") as inp:
+            with open(emhass_conf["data_path"] / test_df_literal, "rb") as inp:
                 rh.df_final, days_list, var_list, rh.ha_config = pickle.load(inp)
             retrieve_hass_conf["sensor_power_load_no_var_loads"] = str(var_list[0])
             retrieve_hass_conf["sensor_power_photovoltaics"] = str(var_list[1])
@@ -231,7 +235,7 @@ def set_input_data_dict(
     elif set_type == "naive-mpc-optim":
         # Retrieve data from hass
         if get_data_from_file:
-            with open(emhass_conf["data_path"] / "test_df_final.pkl", "rb") as inp:
+            with open(emhass_conf["data_path"] / test_df_literal, "rb") as inp:
                 rh.df_final, days_list, var_list, rh.ha_config = pickle.load(inp)
             retrieve_hass_conf["sensor_power_load_no_var_loads"] = str(var_list[0])
             retrieve_hass_conf["sensor_power_photovoltaics"] = str(var_list[1])
@@ -375,7 +379,6 @@ def set_input_data_dict(
                     + str(emhass_conf["data_path"])
                 )
                 return False
-                # raise ValueError("The CSV file " + csv_file + " was not found.")
             required_columns = []
             required_columns.extend(features)
             required_columns.append(target)
@@ -508,7 +511,7 @@ def perfect_forecast_optim(
     if save_data_to_file:
         filename = "opt_res_perfect_optim_" + input_data_dict["costfun"] + ".csv"
     else:  # Just save the latest optimization results
-        filename = "opt_res_latest.csv"
+        filename = default_csv_filename
     if not debug:
         opt_res.to_csv(
             input_data_dict["emhass_conf"]["data_path"] / filename,
@@ -580,7 +583,7 @@ def dayahead_forecast_optim(
         )
         filename = "opt_res_dayahead_" + today.strftime("%Y_%m_%d") + ".csv"
     else:  # Just save the latest optimization results
-        filename = "opt_res_latest.csv"
+        filename = default_csv_filename
     if not debug:
         opt_res_dayahead.to_csv(
             input_data_dict["emhass_conf"]["data_path"] / filename,
@@ -676,7 +679,7 @@ def naive_mpc_optim(
         )
         filename = "opt_res_naive_mpc_" + today.strftime("%Y_%m_%d") + ".csv"
     else:  # Just save the latest optimization results
-        filename = "opt_res_latest.csv"
+        filename = default_csv_filename
     if not debug:
         opt_res_naive_mpc.to_csv(
             input_data_dict["emhass_conf"]["data_path"] / filename,
@@ -735,7 +738,7 @@ def forecast_model_fit(
     )
     # Save model
     if not debug:
-        filename = model_type + "_mlf.pkl"
+        filename = model_type + default_pkl_suffix
         filename_path = input_data_dict["emhass_conf"]["data_path"] / filename
         with open(filename_path, "wb") as outp:
             pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL)
@@ -771,7 +774,7 @@ def forecast_model_predict(
     """
     # Load model
     model_type = input_data_dict["params"]["passed_data"]["model_type"]
-    filename = model_type + "_mlf.pkl"
+    filename = model_type + default_pkl_suffix
     filename_path = input_data_dict["emhass_conf"]["data_path"] / filename
     if not debug:
         if filename_path.is_file():
@@ -858,7 +861,7 @@ def forecast_model_tune(
     """
     # Load model
     model_type = input_data_dict["params"]["passed_data"]["model_type"]
-    filename = model_type + "_mlf.pkl"
+    filename = model_type + default_pkl_suffix
     filename_path = input_data_dict["emhass_conf"]["data_path"] / filename
     if not debug:
         if filename_path.is_file():
@@ -873,7 +876,7 @@ def forecast_model_tune(
     df_pred_optim = mlf.tune(debug=debug)
     # Save model
     if not debug:
-        filename = model_type + "_mlf.pkl"
+        filename = model_type + default_pkl_suffix
         filename_path = input_data_dict["emhass_conf"]["data_path"] / filename
         with open(filename_path, "wb") as outp:
             pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL)
@@ -1058,7 +1061,7 @@ def publish_data(
                 for entity in entity_path_contents:
                     # If publish_prefix is "all" publish all saved entities to Home Assistant
                     # If publish_prefix matches the prefix from saved entities, publish to Home Assistant
-                    if entity != "metadata.json" and (
+                    if entity != default_metadata_json and (
                         publish_prefix in entity or publish_prefix == "all"
                     ):
                         entity_data = publish_json(
@@ -1082,9 +1085,9 @@ def publish_data(
         else:
             logger.warning("No saved entity json files in path:" + str(entity_path))
             logger.warning("Falling back to opt_res_latest")
-        filename = "opt_res_latest.csv"
+        filename = default_csv_filename
     else:
-        filename = "opt_res_latest.csv"
+        filename = default_csv_filename
     if opt_res_latest is None:
         if not os.path.isfile(input_data_dict["emhass_conf"]["data_path"] / filename):
             logger.error("File not found error, run an optimization task first.")
@@ -1372,7 +1375,7 @@ def continual_publish(
         if os.path.exists(entity_path) and len(os.listdir(entity_path)) > 0:
             entity_path_contents = os.listdir(entity_path)
             for entity in entity_path_contents:
-                if entity != "metadata.json":
+                if entity != default_metadata_json:
                     # Call publish_json with entity file, build entity, and publish
                     publish_json(
                         entity,
@@ -1382,8 +1385,8 @@ def continual_publish(
                         "continual_publish",
                     )
             # Retrieve entity metadata from file
-            if os.path.isfile(entity_path / "metadata.json"):
-                with open(entity_path / "metadata.json", "r") as file:
+            if os.path.isfile(entity_path / default_metadata_json):
+                with open(entity_path / default_metadata_json, "r") as file:
                     metadata = json.load(file)
                     # Check if freq should be shorter
                     if metadata.get("lowest_time_step", None) is not None:
@@ -1416,8 +1419,8 @@ def publish_json(
 
     """
     # Retrieve entity metadata from file
-    if os.path.isfile(entity_path / "metadata.json"):
-        with open(entity_path / "metadata.json", "r") as file:
+    if os.path.isfile(entity_path / default_metadata_json):
+        with open(entity_path / default_metadata_json, "r") as file:
             metadata = json.load(file)
     else:
         logger.error("unable to located metadata.json in:" + entity_path)