-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy patheffect_size.py
115 lines (90 loc) · 3.75 KB
/
effect_size.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
from bisect import bisect_left
from typing import List, Tuple
import numpy as np
import scipy.stats as ss
def _get_cohend_thresholds(effect_size: float) -> str:
magnitude = "large"
if abs(effect_size) < 0.50:
magnitude = "small"
elif 0.50 <= abs(effect_size) < 0.80:
magnitude = "medium"
return magnitude
# function to calculate Cohen's d for independent samples
def cohend(a: List[float], b: List[float]) -> Tuple[float, str]:
if isinstance(a, list):
a = np.asarray(a)
if isinstance(b, list):
b = np.asarray(b)
# calculate the size of samples
m, n = len(a), len(b)
# assert m == n, "The two list must be of the same length: {}, {}".format(m, n) # FIXME: check
# calculate the variance of the samples
s1, s2 = np.var(a, ddof=1), np.var(b, ddof=1)
# calculate the pooled standard deviation
s = np.sqrt(((m - 1) * s1 + (n - 1) * s2) / (m + n - 2))
# calculate the means of the samples
u1, u2 = np.mean(a), np.mean(b)
# calculate the effect size
effect_size = (u1 - u2) / s
return effect_size, _get_cohend_thresholds(effect_size=effect_size)
def odds_ratio_to_cohend(odds_ratio: float) -> Tuple[float, str]:
# see https://cran.r-project.org/web/packages/effectsize/effectsize.pdf at pg 15.
effect_size = np.log(odds_ratio) * np.sqrt(3) / np.pi
return effect_size, _get_cohend_thresholds(effect_size=effect_size)
def _compute_magnitude_vargha_delaney(a12: float) -> str:
levels = [0.147, 0.33, 0.474] # effect sizes from Hess and Kromrey, 2004
magnitude = ["negligible", "small", "medium", "large"]
scaled_a12 = (a12 - 0.5) * 2
magnitude = magnitude[bisect_left(levels, abs(scaled_a12))]
return magnitude
# https://gist.github.com/timm/5630491
def vargha_delaney_unpaired(a: List[float], b: List[float]) -> Tuple[float, str]:
"""
Computes Vargha and Delaney A index
A. Vargha and H. D. Delaney.
A critique and improvement of the CL common language
effect size statistics of McGraw and Wong.
Journal of Educational and Behavioral Statistics, 25(2):101-132, 2000
The formula to compute A has been transformed to minimize accuracy errors
See: http://mtorchiano.wordpress.com/2014/05/19/effect-size-of-r-precision/
:param a: a numeric list
:param b: another numeric list
:returns the value estimate and the magnitude
"""
assert isinstance(a, list)
assert isinstance(b, list)
more = same = 0.0
for x in a:
for y in b:
if x == y:
same += 1
elif x > y:
more += 1
A = (more + 0.5 * same) / (len(a) * len(b))
return A, _compute_magnitude_vargha_delaney(a12=A)
def vargha_delaney(a: List[float], b: List[float]) -> Tuple[float, str]:
"""
Computes Vargha and Delaney A index
A. Vargha and H. D. Delaney.
A critique and improvement of the CL common language
effect size statistics of McGraw and Wong.
Journal of Educational and Behavioral Statistics, 25(2):101-132, 2000
The formula to compute A has been transformed to minimize accuracy errors
See: http://mtorchiano.wordpress.com/2014/05/19/effect-size-of-r-precision/
:param a: a numeric list
:param b: another numeric list
:returns the value estimate and the magnitude
"""
assert isinstance(a, list)
assert isinstance(b, list)
m = len(a)
n = len(b)
assert m == n, "The two list must be of the same length: {}, {}".format(m, n)
r = ss.rankdata(a + b)
r1 = sum(r[0:m])
# Compute the measure
# A = (r1/m - (m+1)/2)/n # formula (14) in Vargha and Delaney, 2000
A = (2 * r1 - m * (m + 1)) / (
2 * n * m
) # equivalent formula to avoid accuracy errors
return A, _compute_magnitude_vargha_delaney(a12=A)