From 1689426f2267542fa810f7110d37abfd8eacc724 Mon Sep 17 00:00:00 2001 From: MarcusHolly Date: Mon, 24 Feb 2025 14:59:04 -0500 Subject: [PATCH 1/8] Update documentation based on Adam's comments in #1358 --- .../grid_integration/multiperiod/Price_Taker.rst | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/docs/reference_guides/apps/grid_integration/multiperiod/Price_Taker.rst b/docs/reference_guides/apps/grid_integration/multiperiod/Price_Taker.rst index 6ae60840eb..487c831cb7 100644 --- a/docs/reference_guides/apps/grid_integration/multiperiod/Price_Taker.rst +++ b/docs/reference_guides/apps/grid_integration/multiperiod/Price_Taker.rst @@ -5,7 +5,7 @@ Price takers are entities that must accept market prices since they lack the mar to directly influence the market price. Likewise, it is assumed that a price taker's resource or energy system is small enough such that it does not significantly impact the market. When coupled with multi-period modeling, the price-taker model is able to synthesize grid-centric modeling with steady-state process-centric modeling, as -depicted in figure below. +depicted in the figure below. .. |pricetaker| image:: images/pricetaker.png :width: 1200 @@ -32,15 +32,13 @@ The following equations represent the multi-period price taker model, where :mat h(d,u_{s,t},δ_{s,t},u_{s,t+1},δ_{s,t+1}) = 0; ∀_{s} ∈ S, t ∈ T -The price taker multi-period modeling workflow involves the integration of multiple software platforms into the IDAES optimization model -and can be broken down into two distinct functions, as shown in the figure below. In part 1, simulated or historical -ISO (Independent System Operator) data is used to generate locational marginal price (LMP) +The price taker multi-period modeling workflow is shown in part 1 of the figure below. The price taker model uses +simulated or historical ISO (Independent System Operator) data to generate locational marginal price (LMP) signals, and production cost models (PCMs) are used to compute and optimize the time-varying dispatch schedules for each -resource based on their respective bid curves. Advanced data analytics (RAVEN) reinterpret the LMP signals and PCM +resource based on their respective bid curves. Advanced data analytics reinterpret the LMP signals and PCM as stochastic realizations of the LMPs in the form of representative days (or simply the full-year price signals). -In part 2, PRESCIENT uses a variety of input parameters (design capacity, minimum power output, ramp rate, minimum up/down time, marginal cost, no load cost, and startup profile) -to generate data for the market surrogates. Meanwhile, IDAES uses the double loop simulation to integrate detailed -process models (b, ii) into the daily (a, c) and hourly (i, iii) grid operations workflow. +Part 2 describes how a variety of input parameters (design capacity, minimum power output, ramp rate, minimum up/down time, marginal cost, no load cost, and startup profile) +are used generate data for market surrogates, which, in conjunction with the price taker model, can be used to optimize hybrid energy systems. .. |hybrid_energy_system| image:: images/hybrid_energy_system.png :width: 1200 From 09f01ee123ac6f837a737309f794394086ea2860 Mon Sep 17 00:00:00 2001 From: MarcusHolly Date: Mon, 24 Feb 2025 15:00:15 -0500 Subject: [PATCH 2/8] Minor pricetaker updates based on Adam's feedback in #1358 --- .../grid_integration/pricetaker/clustering.py | 11 ++++- .../pricetaker/design_and_operation_models.py | 4 +- .../pricetaker/price_taker_model.py | 46 +++++++++---------- 3 files changed, 34 insertions(+), 27 deletions(-) diff --git a/idaes/apps/grid_integration/pricetaker/clustering.py b/idaes/apps/grid_integration/pricetaker/clustering.py index 2e1cc2758e..1931cfc110 100644 --- a/idaes/apps/grid_integration/pricetaker/clustering.py +++ b/idaes/apps/grid_integration/pricetaker/clustering.py @@ -73,7 +73,11 @@ def generate_daily_data(raw_data: list, horizon_length: int): def cluster_lmp_data( - raw_data: list, horizon_length: int, n_clusters: int, seed: int = 42 + raw_data: list, + horizon_length: int, + n_clusters: int, + seed: int = 42, + eps: int = 1e-4, ): """ Clusters the given price signal into n_clusters using the k-means clustering @@ -92,6 +96,9 @@ def cluster_lmp_data( seed: int, Seed value for initializing random number generator within Kmeans + eps: int, + Centroid values below this threshold are set to 0 to limit noise in the data + Returns: lmp_data_clusters: dict A dictionary of representative day LMP data, indices are indexed @@ -116,7 +123,7 @@ def cluster_lmp_data( labels = kmeans.labels_ # Set any centroid values that are < 1e-4 to 0 to avoid noise - centroids = centroids * (abs(centroids) >= 1e-4) + centroids = centroids * (abs(centroids) >= eps) # Create dicts for lmp data and the weight of each cluster # By default, the data is of type numpy.int or numpy.float. diff --git a/idaes/apps/grid_integration/pricetaker/design_and_operation_models.py b/idaes/apps/grid_integration/pricetaker/design_and_operation_models.py index 83b8d85acf..1d015f379b 100644 --- a/idaes/apps/grid_integration/pricetaker/design_and_operation_models.py +++ b/idaes/apps/grid_integration/pricetaker/design_and_operation_models.py @@ -173,7 +173,7 @@ def my_operation_model(m, design_blk): ConfigValue( default=True, domain=Bool, - doc="Should op_mode, startup, shutdown vars be defined?", + doc="Boolean flag to determine if `op_mode`, `startup`, and `shutdown` vars should be defined", ), ) CONFIG.declare( @@ -181,7 +181,7 @@ def my_operation_model(m, design_blk): ConfigValue( default=True, domain=Bool, - doc="Should LMP data automatically be appended to the model?", + doc="Boolean flag to determine if LMP data should automatically be appended to the model", ), ) diff --git a/idaes/apps/grid_integration/pricetaker/price_taker_model.py b/idaes/apps/grid_integration/pricetaker/price_taker_model.py index d3a6b3b056..59439d3fcf 100644 --- a/idaes/apps/grid_integration/pricetaker/price_taker_model.py +++ b/idaes/apps/grid_integration/pricetaker/price_taker_model.py @@ -71,7 +71,7 @@ "horizon_length", ConfigValue( domain=PositiveInt, - doc="Length of each representative day/period", + doc="Length of each time horizon", ), ) CONFIG.declare( @@ -99,14 +99,14 @@ # List of arguments needed for adding startup/shutdown constraints CONFIG.declare( - "up_time", + "minimum_up_time", ConfigValue( domain=PositiveInt, doc="Minimum uptime [in hours]", ), ) CONFIG.declare( - "down_time", + "minimum_down_time", ConfigValue( domain=PositiveInt, doc="Minimum downtime [in hours]", @@ -136,10 +136,10 @@ ), ) CONFIG.declare( - "annualization_factor", + "capital_recovery_factor", ConfigValue( domain=is_in_range(0, 1), - doc="Capital cost annualization factor [fraction]", + doc="Capital cost annualization factor [fraction of investment cost/year]", ), ) CONFIG.declare( @@ -620,8 +620,8 @@ def add_startup_shutdown( self, op_block_name: str, des_block_name: Optional[str] = None, - up_time: int = 1, - down_time: int = 1, + minimum_up_time: int = 1, + minimum_down_time: int = 1, ): """ Adds minimum uptime/downtime constraints for a given unit/process @@ -635,19 +635,19 @@ def add_startup_shutdown( op_block_name. This argument is specified if the design is being optimized simultaneously, e.g., "ngcc_design" - up_time: int, default=1, + minimum_up_time: int, default=1, Total uptime (must be >= 1), e.g., 4 time periods Uptime must include the minimum uptime and the time required for shutdown. - down_time: int, default=1, + minimum_down_time: int, default=1, Total downtime (must be >= 1), e.g., 4 time periods Downtime must include the minimum downtime and the time required for startup """ - # Check up_time and down_time for validity - self.config.up_time = up_time - self.config.down_time = down_time + # Check minimum_up_time and minimum_down_time for validity + self.config.minimum_up_time = minimum_up_time + self.config.minimum_down_time = minimum_down_time op_blocks = self._get_operation_blocks( blk_name=op_block_name, @@ -681,15 +681,15 @@ def add_startup_shutdown( blk=start_shut_blk[d], op_blocks=op_blocks[d], install_unit=install_unit, - up_time=up_time, - down_time=down_time, + minimum_up_time=minimum_up_time, + minimum_down_time=minimum_down_time, set_time=self.set_time, ) # Save the uptime and downtime data for reference self._op_blk_uptime_downtime[op_block_name] = { - "up_time": up_time, - "down_time": down_time, + "minimum_up_time": minimum_up_time, + "minimum_down_time": minimum_down_time, } # Logger info for where constraint is located on the model @@ -803,7 +803,7 @@ def add_overall_cashflows( lifetime: int = 30, discount_rate: float = 0.08, corporate_tax_rate: float = 0.2, - annualization_factor: Optional[float] = None, + capital_recovery_factor: Optional[float] = None, cash_inflow_scale_factor: Optional[float] = 1.0, ): """ @@ -822,7 +822,7 @@ def add_overall_cashflows( Fractional value of corporate tax used in NPV calculations. Must be between 0 and 1. - annualization_factor: float, default=None, + capital_recovery_factor: float, default=None, Annualization factor cash_inflow_scale_factor: float, default=1.0, @@ -841,7 +841,7 @@ def add_overall_cashflows( self.config.lifetime = lifetime self.config.discount_rate = discount_rate self.config.corporate_tax = corporate_tax_rate - self.config.annualization_factor = annualization_factor + self.config.capital_recovery_factor = capital_recovery_factor self.config.cash_inflow_scale_factor = cash_inflow_scale_factor if not self._has_hourly_cashflows: @@ -902,17 +902,17 @@ def add_overall_cashflows( expr=cf.net_profit == cf.net_cash_inflow - cf.fom - cf.corporate_tax ) - if annualization_factor is None: + if capital_recovery_factor is None: # If the annualization factor is not specified - annualization_factor = discount_rate / ( + capital_recovery_factor = discount_rate / ( 1 - (1 + discount_rate) ** (-lifetime) ) cf.lifetime_npv = Expression( - expr=(1 / annualization_factor) * cf.net_profit - cf.capex + expr=(1 / capital_recovery_factor) * cf.net_profit - cf.capex ) cf.npv = Expression( - expr=cf.net_profit - annualization_factor * cf.capex, + expr=cf.net_profit - capital_recovery_factor * cf.capex, ) self._has_overall_cashflows = True From cebc584457e64e1a07f62f3849e31d98e169bef0 Mon Sep 17 00:00:00 2001 From: MarcusHolly Date: Mon, 24 Feb 2025 21:48:18 -0500 Subject: [PATCH 3/8] Add initial implementation of custom clustering method --- .../grid_integration/pricetaker/clustering.py | 141 +++++++++++++++--- 1 file changed, 121 insertions(+), 20 deletions(-) diff --git a/idaes/apps/grid_integration/pricetaker/clustering.py b/idaes/apps/grid_integration/pricetaker/clustering.py index 1931cfc110..8f0c3a310d 100644 --- a/idaes/apps/grid_integration/pricetaker/clustering.py +++ b/idaes/apps/grid_integration/pricetaker/clustering.py @@ -19,6 +19,7 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd +from scipy.interpolate import splrep, splev from pyomo.common.dependencies import attempt_import import idaes.logger as idaeslog @@ -142,7 +143,8 @@ def get_optimal_num_clusters( samples: Union[pd.DataFrame, np.array], kmin: int = 2, kmax: int = 30, - method: str = "silhouette", + method: str = "elbow", + sensitivity: int = 1, generate_elbow_plot: bool = False, seed: int = 42, ): @@ -165,6 +167,9 @@ def get_optimal_num_clusters( Method for obtaining the optimal number of clusters. Supported methods are elbow and silhouette + sensitivity : int, + difficulty of detecting knees, where 0 detects knees the easiest + generate_elbow_plot : bool, If True, generates an elbow plot for inertia as a function of number of clusters @@ -188,28 +193,30 @@ def get_optimal_num_clusters( k_values = list(range(kmin, kmax + 1)) inertia_values = [] - mean_silhouette = [] for k in k_values: kmeans = KMeans(n_clusters=k, random_state=seed).fit(samples) inertia_values.append(kmeans.inertia_) - if method == "silhouette": - # Calculate the average silhouette score, if the chosen method - # is silhouette - mean_silhouette.append(silhouette_score(samples, kmeans.labels_)) - - # Identify the optimal number of clusters - if method == "elbow": - raise NotImplementedError( - "elbow method is not supported currently for finding the optimal " - "number of clusters." - ) + if method == "silhouette": + mean_silhouette = [] + mean_silhouette.append(silhouette_score(samples, kmeans.labels_)) - elif method == "silhouette": + # Identify the optimal number of clusters max_index = mean_silhouette.index(max(mean_silhouette)) n_clusters = k_values[max_index] + elif method == "elbow": + # Invert inertia values such that plot is concave down and increasing + inverted_inertia_values = [-y for y in inertia_values] + n_clusters = locate_elbow( + x=k_values, + y=inverted_inertia_values, + sensitivity=sensitivity, + kmin=kmin, + kmax=kmax, + ) + else: raise ValueError( f"Unrecognized method {method} for optimal number of clusters." @@ -236,15 +243,31 @@ def get_optimal_num_clusters( return n_clusters -def locate_elbow(x: list, y: list): +def locate_elbow( + x: list, + y: list, + sensitivity: int = 1, + kmin: int = 2, + kmax: int = 30, +): """ Identifies the elbow/knee for the input/output data Args: - x : list - List of independent variables - y : list - List of dependent variables + x : list, + List of independent variables (i.e. k_values) + + y : list, + List of dependent variables (i.e. inertia values) + + sensitivity : int, + difficulty of detecting knees, where 0 detects knees the easiest + + kmin : int, + Minimum number of clusters + + kmax : int, + Maximum number of clusters Returns: opt_x : float @@ -256,4 +279,82 @@ def locate_elbow(x: list, y: list): # Detecting Knee Points in System Behavior # https://raghavan.usc.edu/papers/kneedle-simplex11.pdf - return len(np.array(x)) + len(np.array(y)) + # TODO: See if make_splrep works and generates similar results + # Use a smoothing spline that retains the data's original shape + spline = splrep(x, y, k=3) + k_smooth = np.linspace(kmin, kmax + 1, 100) + inertia_smooth = splev(k_smooth, spline) + + k_norm = _normalize_values(k_smooth) + inertia_norm = _normalize_values(inertia_smooth) + + # Compute the set of differences (x, y) -> (x, y-x) + inertia_diff = [] + for i in range(len(inertia_norm)): + set_of_differences = inertia_norm[i] - k_norm[i] + inertia_diff.append(set_of_differences) + + # Identify local maxima + local_maxima = [ + (k_norm[i], inertia_diff[i]) + for i in range(1, len(inertia_diff) - 1) + if inertia_diff[i] > inertia_diff[i - 1] + and inertia_diff[i] > inertia_diff[i + 1] + ] + + # Calculate optimal # of clusters based on the # of local maxima + threshold_values = [] + threshold_triggered = False + n_clusters = 0 + + if len(local_maxima) == 0: + n_clusters = 0 + _logger.warning( + "The optimal number of cluster cannot be determined for this dataset." + ) + elif len(local_maxima) == 1: + n_clusters_norm = local_maxima[0][0] + ind = k_norm.index(n_clusters_norm) + n_clusters = x[ind] + else: + n = len(local_maxima) + summation = 0 + for i in range(0, n - 1): + summation += local_maxima[i + 1][0] - local_maxima[i][0] + # For each local maxima, compute the threshold and determine the index of the current and next local maxima + for i in range(0, n - 1): + threshold = local_maxima[i][1] - (sensitivity * summation / (n - 1)) + threshold_values.append(threshold) + l_max_index = inertia_diff.index(local_maxima[i][1]) + next_l_max_index = inertia_diff.index(local_maxima[i + 1][1]) + for j in range(l_max_index + 1, next_l_max_index): + if inertia_diff[j] < threshold: + threshold_triggered = True + normalized_optimal_n_clusters = local_maxima[i][0] + index = k_norm.index(normalized_optimal_n_clusters) + n_clusters = x[index] + if threshold_triggered: + break + # If optimal # of clusters cannot be identified, use the first local maxima + if not threshold_triggered: + normalized_optimal_n_clusters = local_maxima[0][0] + index = k_norm.index(normalized_optimal_n_clusters) + n_clusters = x[index] + _logger.warning( + "The number of optimal clusters could not be accurately identified. " + "Consider lowering the sensitivity." + ) + + return n_clusters + + +def _normalize_values(values): + normalized_values = [] + + for i in values: + max_value = max(values) + min_value = min(values) + normalized_value = (i - min_value) / (max_value - min_value) + normalized_values.append(normalized_value) + + return normalized_values From cbb1ad651f46d681b8bc7cf6f5c0b96158d1711f Mon Sep 17 00:00:00 2001 From: MarcusHolly Date: Mon, 24 Feb 2025 22:51:51 -0500 Subject: [PATCH 4/8] Add testing for new clustering method --- .../grid_integration/pricetaker/clustering.py | 4 +- .../pricetaker/tests/test_clustering.py | 58 ++++++++++--------- 2 files changed, 34 insertions(+), 28 deletions(-) diff --git a/idaes/apps/grid_integration/pricetaker/clustering.py b/idaes/apps/grid_integration/pricetaker/clustering.py index 8f0c3a310d..4d6a349c2f 100644 --- a/idaes/apps/grid_integration/pricetaker/clustering.py +++ b/idaes/apps/grid_integration/pricetaker/clustering.py @@ -143,7 +143,7 @@ def get_optimal_num_clusters( samples: Union[pd.DataFrame, np.array], kmin: int = 2, kmax: int = 30, - method: str = "elbow", + method: str = "silhouette", sensitivity: int = 1, generate_elbow_plot: bool = False, seed: int = 42, @@ -251,7 +251,7 @@ def locate_elbow( kmax: int = 30, ): """ - Identifies the elbow/knee for the input/output data + Identifies the elbow/knee for the input/output data that is concave down and increasing Args: x : list, diff --git a/idaes/apps/grid_integration/pricetaker/tests/test_clustering.py b/idaes/apps/grid_integration/pricetaker/tests/test_clustering.py index 8b711e52ae..5160b4324c 100644 --- a/idaes/apps/grid_integration/pricetaker/tests/test_clustering.py +++ b/idaes/apps/grid_integration/pricetaker/tests/test_clustering.py @@ -31,7 +31,7 @@ @pytest.fixture(name="sample_data") def sample_data_fixture(): """Returns a sample price signal for testing""" - file_path = Path(__file__).parent / "FLECCS_princeton.csv" + file_path = "../../multiperiod/tests/lmp_data.csv" data = pd.read_csv(file_path) return data @@ -130,24 +130,30 @@ def test_cluster_lmp_data(dummy_data): @pytest.mark.skipif( not sklearn_avail, reason="sklearn (optional dependency) not available" ) -def test_optimal_num_clusters_elbow(dummy_data): +def test_optimal_num_clusters_elbow(sample_data): """Tests the get_optimal_num_clusters function""" - samples = generate_daily_data(dummy_data, horizon_length=2) - with pytest.raises( - NotImplementedError, - match=( - "elbow method is not supported currently for finding the optimal " - "number of clusters." - ), - ): - get_optimal_num_clusters( - samples=samples, - kmin=2, - kmax=7, - method="elbow", - generate_elbow_plot=True, - seed=20, - ) + samples = generate_daily_data( + sample_data["BaseCase_2030"].tolist(), horizon_length=24 + ) + n_clusters = get_optimal_num_clusters( + samples=samples, + kmin=4, + kmax=30, + method="elbow", + generate_elbow_plot=True, + seed=42, + ) + + assert n_clusters == 7 + + # Test that a figure was created + assert plt.gcf() is not None + # Test that axes were created + assert plt.gca() is not None + # Test that the plot has data + # assert plt.gca().has_data() + + plt.close("all") @pytest.mark.unit @@ -165,7 +171,7 @@ def test_optimal_num_clusters_silhouette(dummy_data): generate_elbow_plot=True, ) - assert n_clusters == 3 + assert n_clusters == 2 # Test that a figure was created assert plt.gcf() is not None @@ -266,16 +272,16 @@ def test_optimal_clusters_logger_message5(dummy_data, caplog): samples = generate_daily_data(dummy_data, 2) with caplog.at_level(idaeslog.WARNING): - get_optimal_num_clusters(samples, kmin=2, kmax=5, seed=20) + get_optimal_num_clusters(samples, kmin=4, kmax=5, seed=20) assert ( "Optimal number of clusters is close to kmax: 5. Consider increasing kmax." in caplog.text ) -@pytest.mark.unit -def test_locate_elbow(): - """Tests the locate_elbow function""" - opt_x = locate_elbow([1, 2, 3], [4, 5, 6]) - - assert opt_x == 6 +# @pytest.mark.unit +# def test_locate_elbow(sample_data): +# """Tests the locate_elbow function""" +# opt_x = locate_elbow([1, 2, 3], [5, 3.8, 1.2]) +# +# assert opt_x == 2 From abbd4200f509647f835e3e002a5473037fbf4ce9 Mon Sep 17 00:00:00 2001 From: MarcusHolly Date: Mon, 24 Feb 2025 23:04:38 -0500 Subject: [PATCH 5/8] Clean up code and testing --- .../grid_integration/pricetaker/clustering.py | 183 +++++++----------- 1 file changed, 71 insertions(+), 112 deletions(-) diff --git a/idaes/apps/grid_integration/pricetaker/clustering.py b/idaes/apps/grid_integration/pricetaker/clustering.py index 4d6a349c2f..a00809d7b5 100644 --- a/idaes/apps/grid_integration/pricetaker/clustering.py +++ b/idaes/apps/grid_integration/pricetaker/clustering.py @@ -206,16 +206,80 @@ def get_optimal_num_clusters( max_index = mean_silhouette.index(max(mean_silhouette)) n_clusters = k_values[max_index] + # The implementation below is based on + # Ville Satopaa, Jeannie Albrecht, David Irwin, Barath Raghavan + # Finding a “Kneedle” in a Haystack: + # Detecting Knee Points in System Behavior + # https://raghavan.usc.edu/papers/kneedle-simplex11.pdf + elif method == "elbow": # Invert inertia values such that plot is concave down and increasing inverted_inertia_values = [-y for y in inertia_values] - n_clusters = locate_elbow( - x=k_values, - y=inverted_inertia_values, - sensitivity=sensitivity, - kmin=kmin, - kmax=kmax, - ) + # TODO: See if make_splrep works and generates similar results + # Use a smoothing spline that retains the data's original shape + spline = splrep(k_values, inverted_inertia_values, k=3) + k_smooth = np.linspace(kmin, kmax + 1, 100) + inertia_smooth = splev(k_smooth, spline) + + k_norm = _normalize_values(k_smooth) + inertia_norm = _normalize_values(inertia_smooth) + + # Compute the set of differences (x, y) -> (x, y-x) + inertia_diff = [] + for i in range(len(inertia_norm)): + set_of_differences = inertia_norm[i] - k_norm[i] + inertia_diff.append(set_of_differences) + + # Identify local maxima + local_maxima = [ + (k_norm[i], inertia_diff[i]) + for i in range(1, len(inertia_diff) - 1) + if inertia_diff[i] > inertia_diff[i - 1] + and inertia_diff[i] > inertia_diff[i + 1] + ] + + # Calculate optimal # of clusters based on the # of local maxima + threshold_values = [] + threshold_triggered = False + n_clusters = 0 + + if len(local_maxima) == 0: + n_clusters = 0 + _logger.warning( + "The optimal number of cluster cannot be determined for this dataset." + ) + elif len(local_maxima) == 1: + n_clusters_norm = local_maxima[0][0] + ind = k_norm.index(n_clusters_norm) + n_clusters = k_values[ind] + else: + n = len(local_maxima) + summation = 0 + for i in range(0, n - 1): + summation += local_maxima[i + 1][0] - local_maxima[i][0] + # For each local maxima, compute the threshold and determine the index of the current and next local maxima + for i in range(0, n - 1): + threshold = local_maxima[i][1] - (sensitivity * summation / (n - 1)) + threshold_values.append(threshold) + l_max_index = inertia_diff.index(local_maxima[i][1]) + next_l_max_index = inertia_diff.index(local_maxima[i + 1][1]) + for j in range(l_max_index + 1, next_l_max_index): + if inertia_diff[j] < threshold: + threshold_triggered = True + normalized_optimal_n_clusters = local_maxima[i][0] + index = k_norm.index(normalized_optimal_n_clusters) + n_clusters = x[index] + if threshold_triggered: + break + # If optimal # of clusters cannot be identified, use the first local maxima + if not threshold_triggered: + normalized_optimal_n_clusters = local_maxima[0][0] + index = k_norm.index(normalized_optimal_n_clusters) + n_clusters = k_values[index] + _logger.warning( + "The number of optimal clusters could not be accurately identified. " + "Consider lowering the sensitivity." + ) else: raise ValueError( @@ -243,111 +307,6 @@ def get_optimal_num_clusters( return n_clusters -def locate_elbow( - x: list, - y: list, - sensitivity: int = 1, - kmin: int = 2, - kmax: int = 30, -): - """ - Identifies the elbow/knee for the input/output data that is concave down and increasing - - Args: - x : list, - List of independent variables (i.e. k_values) - - y : list, - List of dependent variables (i.e. inertia values) - - sensitivity : int, - difficulty of detecting knees, where 0 detects knees the easiest - - kmin : int, - Minimum number of clusters - - kmax : int, - Maximum number of clusters - - Returns: - opt_x : float - Optimal x at which curvature changes significantly - """ - # The implementation is based on - # Ville Satopaa, Jeannie Albrecht, David Irwin, Barath Raghavan - # Finding a “Kneedle” in a Haystack: - # Detecting Knee Points in System Behavior - # https://raghavan.usc.edu/papers/kneedle-simplex11.pdf - - # TODO: See if make_splrep works and generates similar results - # Use a smoothing spline that retains the data's original shape - spline = splrep(x, y, k=3) - k_smooth = np.linspace(kmin, kmax + 1, 100) - inertia_smooth = splev(k_smooth, spline) - - k_norm = _normalize_values(k_smooth) - inertia_norm = _normalize_values(inertia_smooth) - - # Compute the set of differences (x, y) -> (x, y-x) - inertia_diff = [] - for i in range(len(inertia_norm)): - set_of_differences = inertia_norm[i] - k_norm[i] - inertia_diff.append(set_of_differences) - - # Identify local maxima - local_maxima = [ - (k_norm[i], inertia_diff[i]) - for i in range(1, len(inertia_diff) - 1) - if inertia_diff[i] > inertia_diff[i - 1] - and inertia_diff[i] > inertia_diff[i + 1] - ] - - # Calculate optimal # of clusters based on the # of local maxima - threshold_values = [] - threshold_triggered = False - n_clusters = 0 - - if len(local_maxima) == 0: - n_clusters = 0 - _logger.warning( - "The optimal number of cluster cannot be determined for this dataset." - ) - elif len(local_maxima) == 1: - n_clusters_norm = local_maxima[0][0] - ind = k_norm.index(n_clusters_norm) - n_clusters = x[ind] - else: - n = len(local_maxima) - summation = 0 - for i in range(0, n - 1): - summation += local_maxima[i + 1][0] - local_maxima[i][0] - # For each local maxima, compute the threshold and determine the index of the current and next local maxima - for i in range(0, n - 1): - threshold = local_maxima[i][1] - (sensitivity * summation / (n - 1)) - threshold_values.append(threshold) - l_max_index = inertia_diff.index(local_maxima[i][1]) - next_l_max_index = inertia_diff.index(local_maxima[i + 1][1]) - for j in range(l_max_index + 1, next_l_max_index): - if inertia_diff[j] < threshold: - threshold_triggered = True - normalized_optimal_n_clusters = local_maxima[i][0] - index = k_norm.index(normalized_optimal_n_clusters) - n_clusters = x[index] - if threshold_triggered: - break - # If optimal # of clusters cannot be identified, use the first local maxima - if not threshold_triggered: - normalized_optimal_n_clusters = local_maxima[0][0] - index = k_norm.index(normalized_optimal_n_clusters) - n_clusters = x[index] - _logger.warning( - "The number of optimal clusters could not be accurately identified. " - "Consider lowering the sensitivity." - ) - - return n_clusters - - def _normalize_values(values): normalized_values = [] From 65e491c33bdcaa858c68afbe7e12cae67948b86a Mon Sep 17 00:00:00 2001 From: MarcusHolly Date: Mon, 24 Feb 2025 23:15:45 -0500 Subject: [PATCH 6/8] Remove locate_elbow function --- .../grid_integration/pricetaker/tests/test_clustering.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/idaes/apps/grid_integration/pricetaker/tests/test_clustering.py b/idaes/apps/grid_integration/pricetaker/tests/test_clustering.py index 5160b4324c..874d900f40 100644 --- a/idaes/apps/grid_integration/pricetaker/tests/test_clustering.py +++ b/idaes/apps/grid_integration/pricetaker/tests/test_clustering.py @@ -21,7 +21,6 @@ get_optimal_num_clusters, cluster_lmp_data, sklearn_avail, - locate_elbow, ) import idaes.logger as idaeslog @@ -277,11 +276,3 @@ def test_optimal_clusters_logger_message5(dummy_data, caplog): "Optimal number of clusters is close to kmax: 5. Consider increasing kmax." in caplog.text ) - - -# @pytest.mark.unit -# def test_locate_elbow(sample_data): -# """Tests the locate_elbow function""" -# opt_x = locate_elbow([1, 2, 3], [5, 3.8, 1.2]) -# -# assert opt_x == 2 From bae6d21415b2e558023ee722deae6227ce7f32a1 Mon Sep 17 00:00:00 2001 From: MarcusHolly Date: Mon, 24 Feb 2025 23:18:17 -0500 Subject: [PATCH 7/8] Change epsilon from an int to a float --- idaes/apps/grid_integration/pricetaker/clustering.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/idaes/apps/grid_integration/pricetaker/clustering.py b/idaes/apps/grid_integration/pricetaker/clustering.py index a00809d7b5..77e0ef8cd4 100644 --- a/idaes/apps/grid_integration/pricetaker/clustering.py +++ b/idaes/apps/grid_integration/pricetaker/clustering.py @@ -78,7 +78,7 @@ def cluster_lmp_data( horizon_length: int, n_clusters: int, seed: int = 42, - eps: int = 1e-4, + eps: float = 1e-4, ): """ Clusters the given price signal into n_clusters using the k-means clustering @@ -97,7 +97,7 @@ def cluster_lmp_data( seed: int, Seed value for initializing random number generator within Kmeans - eps: int, + eps: float, Centroid values below this threshold are set to 0 to limit noise in the data Returns: From 68a5de1a2717fb6fb841f3f5ad0fac9c17fd9f11 Mon Sep 17 00:00:00 2001 From: MarcusHolly Date: Mon, 24 Feb 2025 23:20:06 -0500 Subject: [PATCH 8/8] Resolve To-Do and typo --- idaes/apps/grid_integration/pricetaker/clustering.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/idaes/apps/grid_integration/pricetaker/clustering.py b/idaes/apps/grid_integration/pricetaker/clustering.py index 77e0ef8cd4..3619e1567c 100644 --- a/idaes/apps/grid_integration/pricetaker/clustering.py +++ b/idaes/apps/grid_integration/pricetaker/clustering.py @@ -215,7 +215,6 @@ def get_optimal_num_clusters( elif method == "elbow": # Invert inertia values such that plot is concave down and increasing inverted_inertia_values = [-y for y in inertia_values] - # TODO: See if make_splrep works and generates similar results # Use a smoothing spline that retains the data's original shape spline = splrep(k_values, inverted_inertia_values, k=3) k_smooth = np.linspace(kmin, kmax + 1, 100) @@ -246,7 +245,7 @@ def get_optimal_num_clusters( if len(local_maxima) == 0: n_clusters = 0 _logger.warning( - "The optimal number of cluster cannot be determined for this dataset." + "The optimal number of clusters cannot be determined for this dataset." ) elif len(local_maxima) == 1: n_clusters_norm = local_maxima[0][0]