From c834b77d8da144b252591c5172aea78c13187ea7 Mon Sep 17 00:00:00 2001
From: "jason.regina" <jason.regina@noaa.gov>
Date: Thu, 13 Feb 2025 21:45:54 +0000
Subject: [PATCH] add bayesian bootstrap

---
 .../src/hydrotools/metrics/signature.py       | 27 ++++++++++++++++---
 python/metrics/tests/test_signature.py        | 10 +++++++
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/python/metrics/src/hydrotools/metrics/signature.py b/python/metrics/src/hydrotools/metrics/signature.py
index 855e511c..c0882316 100644
--- a/python/metrics/src/hydrotools/metrics/signature.py
+++ b/python/metrics/src/hydrotools/metrics/signature.py
@@ -38,7 +38,8 @@ def compute_fdc(
         alpha: float = 0.0,
         beta: float = 0.0,
         quantiles: npt.ArrayLike | None = None,
-        boostrap_samples: int = 1000
+        boostrap_samples: int = 1000,
+        bayesian: bool = False
         ) -> FlowDurationCurve:
     """
     Computes an empirical flow duration curve derived from series. The flow
@@ -68,6 +69,13 @@ def compute_fdc(
     bootstrap_samples: int, optional, default 1000
         Used with `quantiles`. Indicates the number of bootstrap samples
         to generate for the posterior distribution of values.
+    bayesian: bool, optional, default False
+        When True alters the generation of quantile-based FDCs by randomly
+        weighting each bootstrapped sample. Samples are drawn from an
+        uninformative dirichlet distribution. This method will tend to produce
+        smoother FDCs, but the empirical FDC may not fall between the
+        quantiles. It's recommended to use the median quantile as the central
+        FDC. For example, `quantiles=[0.025, 0.5, 0.975]`.
     
     Returns
     -------
@@ -82,6 +90,11 @@ def compute_fdc(
     assert length > 0, "cannot derive FDC from an empty series"
     assert 0 <= alpha <= 1, "alpha must be between 0 and 1"
     assert 0 <= beta <= 1, "beta must be between 0 and 1"
+    if quantiles is not None:
+        assert boostrap_samples > 0, \
+            "bootstrap_samples must be positive integer"
+    if bayesian:
+        assert quantiles != None, "bayesian only used with quantiles"
 
     # Build FDC
     p = (np.arange(1, length+1) - alpha) / (length+1-alpha-beta)
@@ -91,10 +104,18 @@ def compute_fdc(
     )
     if quantiles is None:
         return fdc
-    
+
+    # Set weight function
+    if bayesian:
+        rng = np.random.default_rng()
+        weight_function = lambda: rng.dirichlet(np.ones(series.shape))
+    else:
+        weight_function = lambda: 1
+
     # Generate additional quantiles
     bs = IIDBootstrap(series, seed=2025)
-    posterior = bs.apply(lambda a: np.sort(a)[::-1], boostrap_samples)
+    posterior = bs.apply(lambda a: np.sort(a)[::-1] * weight_function(),
+        boostrap_samples)
     fdc.quantiles = quantiles
     fdc.quantile_values = np.quantile(posterior, quantiles, axis=0)
     return fdc
diff --git a/python/metrics/tests/test_signature.py b/python/metrics/tests/test_signature.py
index 387a860b..1a67a26f 100644
--- a/python/metrics/tests/test_signature.py
+++ b/python/metrics/tests/test_signature.py
@@ -16,6 +16,8 @@ def test_compute_fdc():
     assert fdc.values[-1] < fdc.values[0]
     assert np.min(fdc.values) == fdc.values[-1]
     assert np.max(fdc.values) == fdc.values[0]
+    assert np.min(fdc.values) == -1.0
+    assert np.max(fdc.values) == 100.0
     assert fdc.quantiles == None
     assert fdc.quantile_values == None
 
@@ -39,3 +41,11 @@ def test_compute_fdc():
     assert fdc.quantile_values[1][-1] < fdc.quantile_values[1][0]
     assert np.min(fdc.quantile_values[1]) == fdc.quantile_values[1][-1]
     assert np.max(fdc.quantile_values[1]) == fdc.quantile_values[1][0]
+
+    fdc = signature.compute_fdc(y, quantiles=[0.025, 0.5, 0.975],
+        bayesian=True)
+    low = fdc.quantile_values[0]
+    mid = fdc.quantile_values[1]
+    high = fdc.quantile_values[2]
+    assert np.all(np.where(low <= mid, True, False) & 
+        np.where(mid <= high, True, False))