feat(browser): auto-commit con 178 cambios
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,106 @@
|
||||
"""Normality tests for a numeric column.
|
||||
|
||||
Pure, deterministic helper that runs a battery of normality hypothesis
|
||||
tests over a numeric sample and reports, per test, whether the data is
|
||||
consistent with a normal distribution at a given significance level.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
|
||||
from scipy import stats
|
||||
|
||||
|
||||
def _clean(values: list) -> list[float]:
|
||||
"""Keep only finite numeric values, dropping None/NaN/non-numeric.
|
||||
|
||||
Booleans are excluded explicitly: in Python ``bool`` is a subclass of
|
||||
``int`` but treating True/False as numbers in a normality test is
|
||||
almost always a data-typing mistake.
|
||||
"""
|
||||
out: list[float] = []
|
||||
for v in values:
|
||||
if v is None or isinstance(v, bool):
|
||||
continue
|
||||
if not isinstance(v, (int, float)):
|
||||
continue
|
||||
x = float(v)
|
||||
if math.isnan(x) or math.isinf(x):
|
||||
continue
|
||||
out.append(x)
|
||||
return out
|
||||
|
||||
|
||||
def normality_tests(values: list, alpha: float = 0.05) -> dict:
|
||||
"""Run normality hypothesis tests on a numeric sample.
|
||||
|
||||
Cleans the input (drops None, NaN, infinities and non-numeric values)
|
||||
and applies up to three normality tests: Jarque-Bera, D'Agostino-Pearson
|
||||
(``scipy.stats.normaltest``) and Shapiro-Wilk. For each test the
|
||||
null hypothesis is "the data comes from a normal distribution", so the
|
||||
sample is flagged ``normal = p > alpha`` (fail to reject the null).
|
||||
|
||||
Shapiro-Wilk is only applied when ``3 <= n <= 5000``; outside that range
|
||||
its key is ``None``.
|
||||
|
||||
Args:
|
||||
values: Sample of numeric values. None/NaN/non-numeric are discarded.
|
||||
alpha: Significance level for each test (default 0.05).
|
||||
|
||||
Returns:
|
||||
For ``n < 8`` (insufficient sample) a dict
|
||||
``{"n": n, "note": "muestra insuficiente", "is_normal": None}``.
|
||||
|
||||
Otherwise a dict with::
|
||||
|
||||
{
|
||||
"n": int,
|
||||
"jarque_bera": {"stat": float, "p": float, "normal": bool},
|
||||
"dagostino": {"stat": float, "p": float, "normal": bool},
|
||||
"shapiro": {"stat": float, "p": float, "normal": bool} | None,
|
||||
"is_normal": bool, # consensus of applicable tests
|
||||
}
|
||||
|
||||
``is_normal`` is the consensus (all applicable tests agree the data
|
||||
is normal) over the tests that were actually run.
|
||||
"""
|
||||
clean = _clean(values)
|
||||
n = len(clean)
|
||||
|
||||
if n < 8:
|
||||
return {"n": n, "note": "muestra insuficiente", "is_normal": None}
|
||||
|
||||
jb_stat, jb_p = stats.jarque_bera(clean)
|
||||
jb = {
|
||||
"stat": float(jb_stat),
|
||||
"p": float(jb_p),
|
||||
"normal": bool(jb_p > alpha),
|
||||
}
|
||||
|
||||
da_stat, da_p = stats.normaltest(clean)
|
||||
dagostino = {
|
||||
"stat": float(da_stat),
|
||||
"p": float(da_p),
|
||||
"normal": bool(da_p > alpha),
|
||||
}
|
||||
|
||||
shapiro: dict | None = None
|
||||
if 3 <= n <= 5000:
|
||||
sw_stat, sw_p = stats.shapiro(clean)
|
||||
shapiro = {
|
||||
"stat": float(sw_stat),
|
||||
"p": float(sw_p),
|
||||
"normal": bool(sw_p > alpha),
|
||||
}
|
||||
|
||||
applicable = [jb, dagostino] + ([shapiro] if shapiro is not None else [])
|
||||
is_normal = all(t["normal"] for t in applicable)
|
||||
|
||||
return {
|
||||
"n": n,
|
||||
"jarque_bera": jb,
|
||||
"dagostino": dagostino,
|
||||
"shapiro": shapiro,
|
||||
"is_normal": bool(is_normal),
|
||||
}
|
||||
Reference in New Issue
Block a user