add bayesian bootstrap

jarq6c · Feb 13, 2025 · c834b77 · c834b77
1 parent acfb5ac
commit c834b77
Show file tree

Hide file tree

Showing 2 changed files with 34 additions and 3 deletions.
diff --git a/python/metrics/src/hydrotools/metrics/signature.py b/python/metrics/src/hydrotools/metrics/signature.py
@@ -38,7 +38,8 @@ def compute_fdc(
         alpha: float = 0.0,
         beta: float = 0.0,
         quantiles: npt.ArrayLike | None = None,
-        boostrap_samples: int = 1000
+        boostrap_samples: int = 1000,
+        bayesian: bool = False
         ) -> FlowDurationCurve:
     """
     Computes an empirical flow duration curve derived from series. The flow
@@ -68,6 +69,13 @@ def compute_fdc(
     bootstrap_samples: int, optional, default 1000
         Used with `quantiles`. Indicates the number of bootstrap samples
         to generate for the posterior distribution of values.
+    bayesian: bool, optional, default False
+        When True alters the generation of quantile-based FDCs by randomly
+        weighting each bootstrapped sample. Samples are drawn from an
+        uninformative dirichlet distribution. This method will tend to produce
+        smoother FDCs, but the empirical FDC may not fall between the
+        quantiles. It's recommended to use the median quantile as the central
+        FDC. For example, `quantiles=[0.025, 0.5, 0.975]`.
     
     Returns
     -------
@@ -82,6 +90,11 @@ def compute_fdc(
     assert length > 0, "cannot derive FDC from an empty series"
     assert 0 <= alpha <= 1, "alpha must be between 0 and 1"
     assert 0 <= beta <= 1, "beta must be between 0 and 1"
+    if quantiles is not None:
+        assert boostrap_samples > 0, \
+            "bootstrap_samples must be positive integer"
+    if bayesian:
+        assert quantiles != None, "bayesian only used with quantiles"
 
     # Build FDC
     p = (np.arange(1, length+1) - alpha) / (length+1-alpha-beta)
@@ -91,10 +104,18 @@ def compute_fdc(
     )
     if quantiles is None:
         return fdc
-
+
+    # Set weight function
+    if bayesian:
+        rng = np.random.default_rng()
+        weight_function = lambda: rng.dirichlet(np.ones(series.shape))
+    else:
+        weight_function = lambda: 1
+
     # Generate additional quantiles
     bs = IIDBootstrap(series, seed=2025)
-    posterior = bs.apply(lambda a: np.sort(a)[::-1], boostrap_samples)
+    posterior = bs.apply(lambda a: np.sort(a)[::-1] * weight_function(),
+        boostrap_samples)
     fdc.quantiles = quantiles
     fdc.quantile_values = np.quantile(posterior, quantiles, axis=0)
     return fdc
diff --git a/python/metrics/tests/test_signature.py b/python/metrics/tests/test_signature.py
@@ -16,6 +16,8 @@ def test_compute_fdc():
     assert fdc.values[-1] < fdc.values[0]
     assert np.min(fdc.values) == fdc.values[-1]
     assert np.max(fdc.values) == fdc.values[0]
+    assert np.min(fdc.values) == -1.0
+    assert np.max(fdc.values) == 100.0
     assert fdc.quantiles == None
     assert fdc.quantile_values == None
 
@@ -39,3 +41,11 @@ def test_compute_fdc():
     assert fdc.quantile_values[1][-1] < fdc.quantile_values[1][0]
     assert np.min(fdc.quantile_values[1]) == fdc.quantile_values[1][-1]
     assert np.max(fdc.quantile_values[1]) == fdc.quantile_values[1][0]
+
+    fdc = signature.compute_fdc(y, quantiles=[0.025, 0.5, 0.975],
+        bayesian=True)
+    low = fdc.quantile_values[0]
+    mid = fdc.quantile_values[1]
+    high = fdc.quantile_values[2]
+    assert np.all(np.where(low <= mid, True, False) & 
+        np.where(mid <= high, True, False))