Skip to content

Commit

Permalink
add bayesian bootstrap
Browse files Browse the repository at this point in the history
  • Loading branch information
jarq6c committed Feb 13, 2025
1 parent acfb5ac commit c834b77
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 3 deletions.
27 changes: 24 additions & 3 deletions python/metrics/src/hydrotools/metrics/signature.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ def compute_fdc(
alpha: float = 0.0,
beta: float = 0.0,
quantiles: npt.ArrayLike | None = None,
boostrap_samples: int = 1000
boostrap_samples: int = 1000,
bayesian: bool = False
) -> FlowDurationCurve:
"""
Computes an empirical flow duration curve derived from series. The flow
Expand Down Expand Up @@ -68,6 +69,13 @@ def compute_fdc(
bootstrap_samples: int, optional, default 1000
Used with `quantiles`. Indicates the number of bootstrap samples
to generate for the posterior distribution of values.
bayesian: bool, optional, default False
When True alters the generation of quantile-based FDCs by randomly
weighting each bootstrapped sample. Samples are drawn from an
uninformative dirichlet distribution. This method will tend to produce
smoother FDCs, but the empirical FDC may not fall between the
quantiles. It's recommended to use the median quantile as the central
FDC. For example, `quantiles=[0.025, 0.5, 0.975]`.
Returns
-------
Expand All @@ -82,6 +90,11 @@ def compute_fdc(
assert length > 0, "cannot derive FDC from an empty series"
assert 0 <= alpha <= 1, "alpha must be between 0 and 1"
assert 0 <= beta <= 1, "beta must be between 0 and 1"
if quantiles is not None:
assert boostrap_samples > 0, \
"bootstrap_samples must be positive integer"
if bayesian:
assert quantiles != None, "bayesian only used with quantiles"

# Build FDC
p = (np.arange(1, length+1) - alpha) / (length+1-alpha-beta)
Expand All @@ -91,10 +104,18 @@ def compute_fdc(
)
if quantiles is None:
return fdc


# Set weight function
if bayesian:
rng = np.random.default_rng()
weight_function = lambda: rng.dirichlet(np.ones(series.shape))
else:
weight_function = lambda: 1

# Generate additional quantiles
bs = IIDBootstrap(series, seed=2025)
posterior = bs.apply(lambda a: np.sort(a)[::-1], boostrap_samples)
posterior = bs.apply(lambda a: np.sort(a)[::-1] * weight_function(),
boostrap_samples)
fdc.quantiles = quantiles
fdc.quantile_values = np.quantile(posterior, quantiles, axis=0)
return fdc
10 changes: 10 additions & 0 deletions python/metrics/tests/test_signature.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ def test_compute_fdc():
assert fdc.values[-1] < fdc.values[0]
assert np.min(fdc.values) == fdc.values[-1]
assert np.max(fdc.values) == fdc.values[0]
assert np.min(fdc.values) == -1.0
assert np.max(fdc.values) == 100.0
assert fdc.quantiles == None
assert fdc.quantile_values == None

Expand All @@ -39,3 +41,11 @@ def test_compute_fdc():
assert fdc.quantile_values[1][-1] < fdc.quantile_values[1][0]
assert np.min(fdc.quantile_values[1]) == fdc.quantile_values[1][-1]
assert np.max(fdc.quantile_values[1]) == fdc.quantile_values[1][0]

fdc = signature.compute_fdc(y, quantiles=[0.025, 0.5, 0.975],
bayesian=True)
low = fdc.quantile_values[0]
mid = fdc.quantile_values[1]
high = fdc.quantile_values[2]
assert np.all(np.where(low <= mid, True, False) &
np.where(mid <= high, True, False))

0 comments on commit c834b77

Please sign in to comment.