From c834b77d8da144b252591c5172aea78c13187ea7 Mon Sep 17 00:00:00 2001 From: "jason.regina" Date: Thu, 13 Feb 2025 21:45:54 +0000 Subject: [PATCH] add bayesian bootstrap --- .../src/hydrotools/metrics/signature.py | 27 ++++++++++++++++--- python/metrics/tests/test_signature.py | 10 +++++++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/python/metrics/src/hydrotools/metrics/signature.py b/python/metrics/src/hydrotools/metrics/signature.py index 855e511c..c0882316 100644 --- a/python/metrics/src/hydrotools/metrics/signature.py +++ b/python/metrics/src/hydrotools/metrics/signature.py @@ -38,7 +38,8 @@ def compute_fdc( alpha: float = 0.0, beta: float = 0.0, quantiles: npt.ArrayLike | None = None, - boostrap_samples: int = 1000 + boostrap_samples: int = 1000, + bayesian: bool = False ) -> FlowDurationCurve: """ Computes an empirical flow duration curve derived from series. The flow @@ -68,6 +69,13 @@ def compute_fdc( bootstrap_samples: int, optional, default 1000 Used with `quantiles`. Indicates the number of bootstrap samples to generate for the posterior distribution of values. + bayesian: bool, optional, default False + When True alters the generation of quantile-based FDCs by randomly + weighting each bootstrapped sample. Samples are drawn from an + uninformative dirichlet distribution. This method will tend to produce + smoother FDCs, but the empirical FDC may not fall between the + quantiles. It's recommended to use the median quantile as the central + FDC. For example, `quantiles=[0.025, 0.5, 0.975]`. Returns ------- @@ -82,6 +90,11 @@ def compute_fdc( assert length > 0, "cannot derive FDC from an empty series" assert 0 <= alpha <= 1, "alpha must be between 0 and 1" assert 0 <= beta <= 1, "beta must be between 0 and 1" + if quantiles is not None: + assert boostrap_samples > 0, \ + "bootstrap_samples must be positive integer" + if bayesian: + assert quantiles != None, "bayesian only used with quantiles" # Build FDC p = (np.arange(1, length+1) - alpha) / (length+1-alpha-beta) @@ -91,10 +104,18 @@ def compute_fdc( ) if quantiles is None: return fdc - + + # Set weight function + if bayesian: + rng = np.random.default_rng() + weight_function = lambda: rng.dirichlet(np.ones(series.shape)) + else: + weight_function = lambda: 1 + # Generate additional quantiles bs = IIDBootstrap(series, seed=2025) - posterior = bs.apply(lambda a: np.sort(a)[::-1], boostrap_samples) + posterior = bs.apply(lambda a: np.sort(a)[::-1] * weight_function(), + boostrap_samples) fdc.quantiles = quantiles fdc.quantile_values = np.quantile(posterior, quantiles, axis=0) return fdc diff --git a/python/metrics/tests/test_signature.py b/python/metrics/tests/test_signature.py index 387a860b..1a67a26f 100644 --- a/python/metrics/tests/test_signature.py +++ b/python/metrics/tests/test_signature.py @@ -16,6 +16,8 @@ def test_compute_fdc(): assert fdc.values[-1] < fdc.values[0] assert np.min(fdc.values) == fdc.values[-1] assert np.max(fdc.values) == fdc.values[0] + assert np.min(fdc.values) == -1.0 + assert np.max(fdc.values) == 100.0 assert fdc.quantiles == None assert fdc.quantile_values == None @@ -39,3 +41,11 @@ def test_compute_fdc(): assert fdc.quantile_values[1][-1] < fdc.quantile_values[1][0] assert np.min(fdc.quantile_values[1]) == fdc.quantile_values[1][-1] assert np.max(fdc.quantile_values[1]) == fdc.quantile_values[1][0] + + fdc = signature.compute_fdc(y, quantiles=[0.025, 0.5, 0.975], + bayesian=True) + low = fdc.quantile_values[0] + mid = fdc.quantile_values[1] + high = fdc.quantile_values[2] + assert np.all(np.where(low <= mid, True, False) & + np.where(mid <= high, True, False))