feat(browser): auto-commit con 178 cambios

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-20 18:22:23 +02:00
parent 7d100e7f3e
commit 763e06c127
178 changed files with 19917 additions and 317 deletions
@@ -0,0 +1,106 @@
+"""Normality tests for a numeric column.
+
+Pure, deterministic helper that runs a battery of normality hypothesis
+tests over a numeric sample and reports, per test, whether the data is
+consistent with a normal distribution at a given significance level.
+"""
+
+from __future__ import annotations
+
+import math
+
+from scipy import stats
+
+
+def _clean(values: list) -> list[float]:
+    """Keep only finite numeric values, dropping None/NaN/non-numeric.
+
+    Booleans are excluded explicitly: in Python ``bool`` is a subclass of
+    ``int`` but treating True/False as numbers in a normality test is
+    almost always a data-typing mistake.
+    """
+    out: list[float] = []
+    for v in values:
+        if v is None or isinstance(v, bool):
+            continue
+        if not isinstance(v, (int, float)):
+            continue
+        x = float(v)
+        if math.isnan(x) or math.isinf(x):
+            continue
+        out.append(x)
+    return out
+
+
+def normality_tests(values: list, alpha: float = 0.05) -> dict:
+    """Run normality hypothesis tests on a numeric sample.
+
+    Cleans the input (drops None, NaN, infinities and non-numeric values)
+    and applies up to three normality tests: Jarque-Bera, D'Agostino-Pearson
+    (``scipy.stats.normaltest``) and Shapiro-Wilk. For each test the
+    null hypothesis is "the data comes from a normal distribution", so the
+    sample is flagged ``normal = p > alpha`` (fail to reject the null).
+
+    Shapiro-Wilk is only applied when ``3 <= n <= 5000``; outside that range
+    its key is ``None``.
+
+    Args:
+        values: Sample of numeric values. None/NaN/non-numeric are discarded.
+        alpha: Significance level for each test (default 0.05).
+
+    Returns:
+        For ``n < 8`` (insufficient sample) a dict
+        ``{"n": n, "note": "muestra insuficiente", "is_normal": None}``.
+
+        Otherwise a dict with::
+
+            {
+              "n": int,
+              "jarque_bera": {"stat": float, "p": float, "normal": bool},
+              "dagostino":   {"stat": float, "p": float, "normal": bool},
+              "shapiro":     {"stat": float, "p": float, "normal": bool} | None,
+              "is_normal":   bool,  # consensus of applicable tests
+            }
+
+        ``is_normal`` is the consensus (all applicable tests agree the data
+        is normal) over the tests that were actually run.
+    """
+    clean = _clean(values)
+    n = len(clean)
+
+    if n < 8:
+        return {"n": n, "note": "muestra insuficiente", "is_normal": None}
+
+    jb_stat, jb_p = stats.jarque_bera(clean)
+    jb = {
+        "stat": float(jb_stat),
+        "p": float(jb_p),
+        "normal": bool(jb_p > alpha),
+    }
+
+    da_stat, da_p = stats.normaltest(clean)
+    dagostino = {
+        "stat": float(da_stat),
+        "p": float(da_p),
+        "normal": bool(da_p > alpha),
+    }
+
+    shapiro: dict | None = None
+    if 3 <= n <= 5000:
+        sw_stat, sw_p = stats.shapiro(clean)
+        shapiro = {
+            "stat": float(sw_stat),
+            "p": float(sw_p),
+            "normal": bool(sw_p > alpha),
+        }
+
+    applicable = [jb, dagostino] + ([shapiro] if shapiro is not None else [])
+    is_normal = all(t["normal"] for t in applicable)
+
+    return {
+        "n": n,
+        "jarque_bera": jb,
+        "dagostino": dagostino,
+        "shapiro": shapiro,
+        "is_normal": bool(is_normal),
+    }