Skip to content

Commit 4cb3e2a

Browse files
committed
update parametric assumptions test with multiple comparison correction
1 parent ecc9170 commit 4cb3e2a

File tree

2 files changed

+21
-21
lines changed

2 files changed

+21
-21
lines changed

pages/6_Parametric_assumptions_evaluation.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -33,20 +33,20 @@
3333
help="Select two options.",
3434
)
3535
if st.session_state.test_attribute and len(st.session_state.test_options) == 2:
36-
tabs = st.tabs(["📊 Normal distribution", "📊 Equal variance"])
36+
tabs = st.tabs(["📊 Normal distribution (Shapiro-Wilk test)", "📊 Equal variance (Levene test)"])
3737
with tabs[0]:
38-
fig = test_normal_distribution(st.session_state.test_attribute, st.session_state.test_options)
38+
fig = test_normal_distribution(st.session_state.test_attribute, st.session_state.test_options, corrections_map[st.session_state.p_value_correction])
3939
if fig:
4040
show_fig(fig, "test-normal-distribution")
4141
with tabs[1]:
42-
fig = test_equal_variance(st.session_state.test_attribute, st.session_state.test_options)
42+
fig = test_equal_variance(st.session_state.test_attribute, st.session_state.test_options, corrections_map[st.session_state.p_value_correction])
4343
show_fig(fig, "test-equal-variance")
4444

4545
st.info(
4646
"""💡 **Interpretation**
4747
4848
In both tests low p-values indicate that data points for a feature are **NOT** normal distributed or have similar variance.
49-
To meet **parametric** criteria the p-values in the histograms should be equally distributed between 0 and 1.
49+
To meet **parametric** criteria the p-values in the histograms should not be smaller than 0.05.
5050
When a larger number of data points indicate low p-values, it would be advisable to opt for a **non-parametric** statistical test.
5151
"""
5252
)

src/testparametric.py

+17-17
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,16 @@
22
import pandas as pd
33
import plotly.express as px
44
import scipy.stats as stats
5+
import pingouin as pg
56

67

78
@st.cache_data
8-
def test_equal_variance(attribute, between):
9+
def test_equal_variance(attribute, between, correction):
910
# test for equal variance
1011
data = pd.concat([st.session_state.data, st.session_state.md], axis=1)
1112
variance = pd.DataFrame(
1213
{
13-
f"{between[0]} - {between[1]}": [
14+
f"{between[0]} - {between[1]}": pg.multicomp([
1415
stats.levene(
1516
data.loc[
1617
(data[attribute] == between[0]),
@@ -22,19 +23,19 @@ def test_equal_variance(attribute, between):
2223
],
2324
)[1]
2425
for f in st.session_state.data.columns
25-
]
26+
], method=correction)[1]
2627
}
2728
)
2829
fig = px.histogram(
2930
variance,
30-
nbins=100,
31+
nbins=20,
3132
template="plotly_white",
33+
range_x=[-0.025, 1.025],
3234
)
33-
fig.update_traces(marker_color="#696880")
3435
fig.update_layout(
3536
bargap=0.2,
3637
font={"color": "grey", "size": 12, "family": "Sans"},
37-
title={"text": f"TEST FOR EQUAL VARIANCE", "font_color": "#3E3D53"},
38+
title={"text": f"TEST FOR EQUAL VARIANCE (LEVENE)", "font_color": "#3E3D53"},
3839
xaxis_title="p-value",
3940
yaxis_title="count",
4041
showlegend=False
@@ -43,7 +44,7 @@ def test_equal_variance(attribute, between):
4344

4445

4546
@st.cache_data
46-
def test_normal_distribution(attribute, between):
47+
def test_normal_distribution(attribute, between, correction):
4748
# test for normal distribution
4849
data = pd.concat([st.session_state.data, st.session_state.md], axis=1)
4950
for b in between:
@@ -52,34 +53,33 @@ def test_normal_distribution(attribute, between):
5253
return None
5354
normality = pd.DataFrame(
5455
{
55-
f"{b}": [
56+
f"{b}": pg.multicomp([
5657
stats.shapiro(
5758
data.loc[
58-
(data[attribute] == between[0]),
59+
(data[attribute] == b),
5960
f,
6061
]
6162
)[1]
6263
for f in st.session_state.data.columns
63-
]
64+
], method = correction)[1]
6465
for b in between
6566
}
6667
)
6768

6869
fig = px.histogram(
69-
normality.iloc[:, 1],
70-
nbins=100,
70+
normality,
71+
nbins=20,
7172
template="plotly_white",
72-
color_discrete_sequence=["#696880", "#ef553b"],
73-
opacity=0.8,
73+
range_x=[-0.025, 1.025],
74+
barmode="group",
7475
)
75-
fig.update_traces(marker_color="#696880")
7676

7777
fig.update_layout(
7878
bargap=0.2,
7979
font={"color": "grey", "size": 12, "family": "Sans"},
80-
title={"text": f"TEST FOR NORMALITY", "font_color": "#3E3D53"},
80+
title={"text": f"TEST FOR NORMALITY (SHAPIRO-WILK)", "font_color": "#3E3D53"},
8181
xaxis_title="p-value",
8282
yaxis_title="count",
83-
showlegend=False
83+
showlegend=True
8484
)
8585
return fig

0 commit comments

Comments
 (0)