From 93025b48930d8744a55b9842204bfa91131fb841 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mat=C4=9Bj=20T=C3=BD=C4=8D?= Date: Sun, 11 Aug 2024 18:19:03 +0200 Subject: [PATCH 1/3] Add basic comparison code --- estimage/statops/compare.py | 34 ++++++++++++++++++++++++++++++++++ tests/test_statops_compare.py | 26 ++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 estimage/statops/compare.py create mode 100644 tests/test_statops_compare.py diff --git a/estimage/statops/compare.py b/estimage/statops/compare.py new file mode 100644 index 0000000..a58459b --- /dev/null +++ b/estimage/statops/compare.py @@ -0,0 +1,34 @@ +import numpy as np +import scipy as sp + + +# \int_{-\inf}^{\inf}l(x)\int_{x}^{\inf}g(t)\, \textup{d}t\, \textup{d}x +# where l is the function that we think should be lower, and g the greater +def _integrate(dom, one, two): + res = 0 + for i, x in enumerate(dom): + toadd = two[i] * 0.5 + toadd += sum(two[i + 1:]) + toadd *= one[i] + res += toadd + return res + + +def _integrate(dom, one, two): + res = 0 + for i, x in enumerate(dom): + toadd = two[i] * 0.5 + toadd += sum(two[i + 1:]) + toadd *= one[i] + res += toadd + return res + + +def is_lower(dom, one, two): + dom = np.array(dom, dtype=float) + one = np.array(one, dtype=float) + one /= one.sum() + two = np.array(two, dtype=float) + two /= two.sum() + return integrate(dom, one, two) + diff --git a/tests/test_statops_compare.py b/tests/test_statops_compare.py new file mode 100644 index 0000000..03ca5bf --- /dev/null +++ b/tests/test_statops_compare.py @@ -0,0 +1,26 @@ +import estimage.statops.compare as tm + + +def test_compare_trivial(): + assert tm.is_lower([0], [1], [1]) == 0.5 + + +def test_compare_equal(): + assert tm.is_lower([0, 1], [1, 1], [1, 1]) == 0.5 + assert tm.is_lower([0, 1, 2], [1, 1, 1], [1, 1, 1]) == 0.5 + assert tm.is_lower([0, 1, 2, 3], [1, 1, 1, 1], [1, 1, 1, 1]) == 0.5 + assert tm.is_lower([0, 1, 2], [0, 1, 0], [1, 1, 1]) == 0.5 + + +def test_compare_clear(): + assert tm.is_lower([0, 1], [0, 1], [1, 0]) == 0 + assert tm.is_lower([0, 1], [1, 0], [0, 1]) == 1 + assert tm.is_lower([0, 1, 2, 3], [1, 1, 0, 0], [0, 0, 1, 1]) == 1 + assert tm.is_lower([0, 1, 2, 3], [0, 0, 1, 1], [1, 1, 0, 0]) == 0 + + +def test_compare_weighted(): + assert 0.99 < tm.is_lower([0, 1, 2, 3], [1, 1, 0, 0], [0, 0.01, 1, 1]) < 1 + +# TODO: Comparison of estimates +# TODO: %-complete From 317acba6647d2e30ab604af18754854a0f6ff04c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mat=C4=9Bj=20T=C3=BD=C4=8D?= Date: Thu, 5 Sep 2024 22:15:25 +0200 Subject: [PATCH 2/3] Implement neat is_lower calculation --- estimage/statops/compare.py | 55 +++++++++++++++++++++++++++++------ tests/test_statops_compare.py | 31 ++++++++++++++++++++ 2 files changed, 77 insertions(+), 9 deletions(-) diff --git a/estimage/statops/compare.py b/estimage/statops/compare.py index a58459b..31520eb 100644 --- a/estimage/statops/compare.py +++ b/estimage/statops/compare.py @@ -14,14 +14,42 @@ def _integrate(dom, one, two): return res -def _integrate(dom, one, two): - res = 0 - for i, x in enumerate(dom): - toadd = two[i] * 0.5 - toadd += sum(two[i + 1:]) - toadd *= one[i] - res += toadd - return res +def _integrate_estimate_both_degenerate(one, two): + if one.expected == two.expected: + return 0.5 + if one.expected < two.expected: + return 1 + return 0 + + +def _integrate_estimate_second_degenerate(one, two): + threshold = two.expected + rv = one._get_rv() + return rv.cdf(threshold) + + +def _integrate_estimate_first_degenerate(one, two): + threshold = one.expected + rv = two._get_rv() + return 1 - rv.cdf(threshold) + + +INTEGRATION_PRECISION = 0.001 + + +# \int_{-\inf}^{\inf}l(x)\int_{x}^{\inf}g(t)\, \textup{d}t\, \textup{d}x is in fact +# \int_{-\inf}^{\inf}l(x)\, \textup{d}x - \int_{-\inf}^{\inf}l(x)G(x)\, \textup{d}x = +# = 1 - \int_{-\inf}^{\inf}l(x)G(x)\, \textup{d}x where +# G(x) is distribution function of the g randvar +def _integrate_estimate(one, two): + # unlike upper bound case, PDF and CDF are zero below their respective lower bound + effective_lower_bound = max(one.source.optimistic, two.source.optimistic) + safe_upper_bound = max(one.source.pessimistic, two.source.pessimistic) + lower_rv = one._get_rv() + upper_rv = two._get_rv() + def integrand(x): return lower_rv.pdf(x) * upper_rv.cdf(x) + result = sp.integrate.quad(integrand, effective_lower_bound, safe_upper_bound, epsabs=INTEGRATION_PRECISION) + return 1 - result[0] def is_lower(dom, one, two): @@ -30,5 +58,14 @@ def is_lower(dom, one, two): one /= one.sum() two = np.array(two, dtype=float) two /= two.sum() - return integrate(dom, one, two) + return _integrate(dom, one, two) + +def estimate_is_lower(one, two): + if one.sigma == 0 and two.sigma == 0: + return _integrate_estimate_both_degenerate(one, two) + if two.sigma == 0: + return _integrate_estimate_second_degenerate(one, two) + if one.sigma == 0: + return _integrate_estimate_first_degenerate(one, two) + return _integrate_estimate(one, two) diff --git a/tests/test_statops_compare.py b/tests/test_statops_compare.py index 03ca5bf..e83be41 100644 --- a/tests/test_statops_compare.py +++ b/tests/test_statops_compare.py @@ -1,8 +1,17 @@ import estimage.statops.compare as tm +import estimage.data as data + +import pytest + + +def integ_approx(x): + return pytest.approx(x, abs=tm.INTEGRATION_PRECISION) def test_compare_trivial(): assert tm.is_lower([0], [1], [1]) == 0.5 + degenerate_estimate = data.Estimate(1, 0) + assert tm.estimate_is_lower(degenerate_estimate, degenerate_estimate) == 0.5 def test_compare_equal(): @@ -10,13 +19,35 @@ def test_compare_equal(): assert tm.is_lower([0, 1, 2], [1, 1, 1], [1, 1, 1]) == 0.5 assert tm.is_lower([0, 1, 2, 3], [1, 1, 1, 1], [1, 1, 1, 1]) == 0.5 assert tm.is_lower([0, 1, 2], [0, 1, 0], [1, 1, 1]) == 0.5 + simple_estimate = data.Estimate.from_triple(1, 0, 2) + assert tm.estimate_is_lower(simple_estimate, simple_estimate) == 0.5 + concrete_estimate = data.Estimate.from_triple(2, 1, 3) + assert tm.estimate_is_lower(concrete_estimate, concrete_estimate) == 0.5 def test_compare_clear(): assert tm.is_lower([0, 1], [0, 1], [1, 0]) == 0 assert tm.is_lower([0, 1], [1, 0], [0, 1]) == 1 + assert tm.estimate_is_lower(data.Estimate(1, 0), data.Estimate(0, 0)) == 0 + assert tm.estimate_is_lower(data.Estimate(0, 0), data.Estimate(1, 0)) == 1 assert tm.is_lower([0, 1, 2, 3], [1, 1, 0, 0], [0, 0, 1, 1]) == 1 assert tm.is_lower([0, 1, 2, 3], [0, 0, 1, 1], [1, 1, 0, 0]) == 0 + low_estimate = data.Estimate.from_triple(1, 0, 3) + high_estimate = data.Estimate.from_triple(4, 3, 6) + assert tm.estimate_is_lower(low_estimate, high_estimate) == integ_approx(1) + assert tm.estimate_is_lower(high_estimate, low_estimate) == integ_approx(0) + + +def test_compare_overlapping(): + low_estimate = data.Estimate.from_triple(1, 0, 3) + test_estimate = data.Estimate(1, 0) + assert 0 < tm.estimate_is_lower(low_estimate, test_estimate) < 0.5 + assert 0.5 < tm.estimate_is_lower(test_estimate, low_estimate) < 1 + almost_low_estimate = data.Estimate.from_triple(1.1, 0.1, 3.1) + assert 0.5 < tm.estimate_is_lower(low_estimate, almost_low_estimate) < 0.6 + low_end_estimate = data.Estimate.from_triple(0.5, 0, 1) + assert 0.1 < tm.estimate_is_lower(low_estimate, low_end_estimate) < 0.2 + assert 0.8 < tm.estimate_is_lower(low_end_estimate, low_estimate) < 0.9 def test_compare_weighted(): From d96cbde0c6589279ce0d944444d06d02386aa23c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mat=C4=9Bj=20T=C3=BD=C4=8D?= Date: Sun, 8 Sep 2024 15:16:53 +0200 Subject: [PATCH 3/3] Add robust tests --- tests/test_statops_compare.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tests/test_statops_compare.py b/tests/test_statops_compare.py index e83be41..70bf8fe 100644 --- a/tests/test_statops_compare.py +++ b/tests/test_statops_compare.py @@ -4,6 +4,11 @@ import pytest +def montecarlo_is_lower_test(one, two, samples=1000): + rvs_one, rvs_two = [est._get_rv().rvs(samples) for est in (one, two)] + return sum(rvs_one < rvs_two) / samples + + def integ_approx(x): return pytest.approx(x, abs=tm.INTEGRATION_PRECISION) @@ -23,6 +28,7 @@ def test_compare_equal(): assert tm.estimate_is_lower(simple_estimate, simple_estimate) == 0.5 concrete_estimate = data.Estimate.from_triple(2, 1, 3) assert tm.estimate_is_lower(concrete_estimate, concrete_estimate) == 0.5 + assert montecarlo_is_lower_test(concrete_estimate, concrete_estimate) == pytest.approx(0.5, abs=0.05) def test_compare_clear(): @@ -47,7 +53,20 @@ def test_compare_overlapping(): assert 0.5 < tm.estimate_is_lower(low_estimate, almost_low_estimate) < 0.6 low_end_estimate = data.Estimate.from_triple(0.5, 0, 1) assert 0.1 < tm.estimate_is_lower(low_estimate, low_end_estimate) < 0.2 - assert 0.8 < tm.estimate_is_lower(low_end_estimate, low_estimate) < 0.9 + rate = tm.estimate_is_lower(low_end_estimate, low_estimate) + assert 0.8 < rate < 0.9 + assert montecarlo_is_lower_test(low_end_estimate, low_estimate) == pytest.approx(rate, abs=0.05) + + +def test_compare_general(): + one = data.Estimate.from_triple(2, 1, 3) + two = data.Estimate.from_triple(1.5, -2, 4) + rate = tm.estimate_is_lower(one, two) + assert montecarlo_is_lower_test(one, two, 4000) == pytest.approx(rate, abs=0.01) + one = data.Estimate.from_triple(2.5, 1, 3) + two = data.Estimate.from_triple(1.5, 1, 4) + rate = tm.estimate_is_lower(one, two) + assert montecarlo_is_lower_test(one, two) == pytest.approx(rate, abs=0.05) def test_compare_weighted():