Skip to content

Commit

Permalink
added basic regression functionalities
Browse files Browse the repository at this point in the history
  • Loading branch information
MamadouSDiallo committed Feb 4, 2025
1 parent 798cbf6 commit ac6362e
Show file tree
Hide file tree
Showing 52 changed files with 1,370 additions and 428 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ exclude = '''
# target-version = ["py310", "py311"]

[tool.pytest.ini_options]
addopts = "--ignore=tests/apis --ignore=tests/sae --ignore=tests/types --ignore=tests/categorical"
addopts = "--ignore=tests/apis --ignore=tests/sae --ignore=tests/types"

# testpaths = ["tests"]

Expand Down
4 changes: 3 additions & 1 deletion src/samplics/apis/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,6 @@ def predict(
intercept: bool = True, # if True, it adds an intercept of 1
b_const: DictStrNum | Number = 1.0,
):
return _predict_eblup(x=x, fit_eblup=fit_stats, y=y, intercept=intercept, b_const=b_const)
return _predict_eblup(
x=x, fit_eblup=fit_stats, y=y, intercept=intercept, b_const=b_const
)
93 changes: 58 additions & 35 deletions src/samplics/categorical/comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,13 @@


class Ttest:
def __init__(self, samp_type: str, paired: bool = False, alpha: float = 0.05) -> None:
def __init__(
self, samp_type: str, paired: bool = False, alpha: float = 0.05
) -> None:
if samp_type.lower() not in ("one-sample", "two-sample"):
raise ValueError("Parameter 'type' must be equal to 'one-sample', 'two-sample'!")
raise ValueError(
"Parameter 'type' must be equal to 'one-sample', 'two-sample'!"
)
assert_probabilities(x=alpha)

self.samp_type = samp_type.lower()
Expand All @@ -58,17 +62,27 @@ def __str__(self) -> str:
return "No table to display"
else:
tbl_head = f"Design-based {self.samp_type.title()} T-test"
if (self.samp_type == "one-sample" and self.group_names == []) or self.paired:
if (
self.samp_type == "one-sample" and self.group_names == []
) or self.paired:
if self.samp_type == "one-sample":
tbl_subhead1 = f" Null hypothesis (Ho): mean = {self.stats['known_mean']}"
tbl_subhead1 = (
f" Null hypothesis (Ho): mean = {self.stats['known_mean']}"
)
else:
tbl_subhead1 = f" Null hypothesis (Ho): mean(Diff = {self.vars_names[0]} - {self.vars_names[1]}) = 0"
tbl_subhead2 = f" t statistics: {self.stats['t']:.4f}"
tbl_subhead3 = f" Degrees of freedom: {self.stats['df']:.2f}"
tbl_subhead4 = " Alternative hypothesis (Ha):"
tbl_subhead4a = f" Prob(T < t) = {self.stats['p_value']['less_than']:.4f}"
tbl_subhead4b = f" Prob(|T| > |t|) = {self.stats['p_value']['not_equal']:.4f}"
tbl_subhead4c = f" Prob(T > t) = {self.stats['p_value']['greater_than']:.4f}"
tbl_subhead4a = (
f" Prob(T < t) = {self.stats['p_value']['less_than']:.4f}"
)
tbl_subhead4b = (
f" Prob(|T| > |t|) = {self.stats['p_value']['not_equal']:.4f}"
)
tbl_subhead4c = (
f" Prob(T > t) = {self.stats['p_value']['greater_than']:.4f}"
)

return f"\n{tbl_head}\n{tbl_subhead1}\n{tbl_subhead2}\n{tbl_subhead3}\n{tbl_subhead4}\n{tbl_subhead4a}\n{tbl_subhead4b}\n{tbl_subhead4c} \n\n{self.to_dataframe().to_string(index=False)}\n"

Expand All @@ -78,30 +92,22 @@ def __str__(self) -> str:
tbl_subhead1 = f" Null hypothesis (Ho): mean({self.group_names[0]}) = mean({self.group_names[1]}) "
tbl_subhead2 = " Equal variance assumption:"
tbl_subhead2a = f" t statistics: {self.stats['t_eq_variance']:.4f}"
tbl_subhead2b = f" Degrees of freedom: {self.stats['df_eq_variance']:.2f}"
tbl_subhead3 = " Alternative hypothesis (Ha):"
tbl_subhead3a = (
f" Prob(T < t) = {self.stats['p_value_eq_variance']['less_than']:.4f}"
)
tbl_subhead3b = (
f" Prob(|T| > |t|) = {self.stats['p_value_eq_variance']['not_equal']:.4f}"
)
tbl_subhead3c = (
f" Prob(T > t) = {self.stats['p_value_eq_variance']['greater_than']:.4f}"
tbl_subhead2b = (
f" Degrees of freedom: {self.stats['df_eq_variance']:.2f}"
)
tbl_subhead3 = " Alternative hypothesis (Ha):"
tbl_subhead3a = f" Prob(T < t) = {self.stats['p_value_eq_variance']['less_than']:.4f}"
tbl_subhead3b = f" Prob(|T| > |t|) = {self.stats['p_value_eq_variance']['not_equal']:.4f}"
tbl_subhead3c = f" Prob(T > t) = {self.stats['p_value_eq_variance']['greater_than']:.4f}"
tbl_subhead4 = " Unequal variance assumption:"
tbl_subhead4a = f" t statistics: {self.stats['t_uneq_variance']:.4f}"
tbl_subhead4b = f" Degrees of freedom: {self.stats['df_uneq_variance']:.2f}"
tbl_subhead5 = " Alternative hypothesis (Ha):"
tbl_subhead5a = (
f" Prob(T < t) = {self.stats['p_value_uneq_variance']['less_than']:.4f}"
)
tbl_subhead5b = (
f" Prob(|T| > |t|) = {self.stats['p_value_uneq_variance']['not_equal']:.4f}"
)
tbl_subhead5c = (
f" Prob(T > t) = {self.stats['p_value_uneq_variance']['greater_than']:.4f}"
tbl_subhead4b = (
f" Degrees of freedom: {self.stats['df_uneq_variance']:.2f}"
)
tbl_subhead5 = " Alternative hypothesis (Ha):"
tbl_subhead5a = f" Prob(T < t) = {self.stats['p_value_uneq_variance']['less_than']:.4f}"
tbl_subhead5b = f" Prob(|T| > |t|) = {self.stats['p_value_uneq_variance']['not_equal']:.4f}"
tbl_subhead5c = f" Prob(T > t) = {self.stats['p_value_uneq_variance']['greater_than']:.4f}"

return f"\n{tbl_head}\n{tbl_subhead1}\n{tbl_subhead2}\n{tbl_subhead2a}\n{tbl_subhead2b}\n{tbl_subhead3}\n{tbl_subhead3a}\n{tbl_subhead3b}\n{tbl_subhead3c}\n{tbl_subhead4}\n{tbl_subhead4a}\n{tbl_subhead4b}\n{tbl_subhead5}\n{tbl_subhead5a}\n{tbl_subhead5b}\n{tbl_subhead5c} \n\n{self.to_dataframe().to_string(index=False)}\n"
else:
Expand All @@ -117,7 +123,9 @@ def _one_sample_one_group(
ssu: Array,
fpc: Union[Dict, float] = 1,
coef_var: bool = False,
single_psu: Union[SinglePSUEst, dict[StringNumber, SinglePSUEst]] = SinglePSUEst.error,
single_psu: Union[
SinglePSUEst, dict[StringNumber, SinglePSUEst]
] = SinglePSUEst.error,
strata_comb: Optional[dict[Array, Array]] = None,
) -> None:
one_sample = TaylorEstimator(param=PopParam.mean, alpha=self.alpha)
Expand Down Expand Up @@ -186,7 +194,10 @@ def _two_groups_unpaired(

t_equal_variance = (mean_group1 - mean_group2) / (
math.sqrt(
((nb_obs_group1 - 1) * stddev_group1**2 + (nb_obs_group2 - 1) * stddev_group2**2)
(
(nb_obs_group1 - 1) * stddev_group1**2
+ (nb_obs_group2 - 1) * stddev_group2**2
)
/ (nb_obs_group1 + nb_obs_group2 - 2)
)
* math.sqrt(1 / nb_obs_group1 + 1 / nb_obs_group2)
Expand All @@ -206,10 +217,14 @@ def _two_groups_unpaired(
)

left_p_value_equal_variance = t.cdf(t_equal_variance, t_df_equal_variance)
both_p_value_equal_variance = 2 * t.cdf(-abs(t_equal_variance), t_df_equal_variance)
both_p_value_equal_variance = 2 * t.cdf(
-abs(t_equal_variance), t_df_equal_variance
)

left_p_value_unequal_variance = t.cdf(t_unequal_variance, t_df_unequal_variance)
both_p_value_unequal_variance = 2 * t.cdf(-abs(t_unequal_variance), t_df_unequal_variance)
both_p_value_unequal_variance = 2 * t.cdf(
-abs(t_unequal_variance), t_df_unequal_variance
)

stats = {
"number_obs": {group1: nb_obs_group1, group2: nb_obs_group2},
Expand Down Expand Up @@ -262,7 +277,9 @@ def _two_samples_unpaired(
ssu: Optional[Array] = None,
fpc: Union[Dict, float] = 1,
coef_var: bool = False,
single_psu: Union[SinglePSUEst, dict[StringNumber, SinglePSUEst]] = SinglePSUEst.error,
single_psu: Union[
SinglePSUEst, dict[StringNumber, SinglePSUEst]
] = SinglePSUEst.error,
strata_comb: Optional[dict[Array, Array]] = None,
) -> None:
two_samples_unpaired = TaylorEstimator(param=PopParam.mean, alpha=self.alpha)
Expand Down Expand Up @@ -292,7 +309,9 @@ def compare(
ssu: Optional[Array] = None,
fpc: Union[Dict, float] = 1,
coef_var: bool = False,
single_psu: Union[SinglePSUEst, dict[StringNumber, SinglePSUEst]] = SinglePSUEst.error,
single_psu: Union[
SinglePSUEst, dict[StringNumber, SinglePSUEst]
] = SinglePSUEst.error,
strata_comb: Optional[dict[Array, Array]] = None,
remove_nan: bool = False,
) -> None:
Expand All @@ -301,7 +320,9 @@ def compare(
if known_mean is None and group is None:
raise AssertionError("Parameters 'known_mean' or 'group' must be provided!")
if known_mean is not None and group is not None:
raise AssertionError("Only one parameter 'known_mean' or 'group' should be provided!")
raise AssertionError(
"Only one parameter 'known_mean' or 'group' should be provided!"
)

if varnames is None:
self.vars_names = set_variables_names(y, None, "var")
Expand Down Expand Up @@ -369,7 +390,9 @@ def compare(
strata_comb=strata_comb,
)

two_samples_unpaired = TaylorEstimator(param=PopParam.mean, alpha=self.alpha)
two_samples_unpaired = TaylorEstimator(
param=PopParam.mean, alpha=self.alpha
)
two_samples_unpaired.estimate(
y=_y,
by=_group,
Expand Down
Loading

0 comments on commit ac6362e

Please sign in to comment.