fn_registry/python/functions/datascience/normality_tests.py

"""Normality tests for a numeric column.

Pure, deterministic helper that runs a battery of normality hypothesis
tests over a numeric sample and reports, per test, whether the data is
consistent with a normal distribution at a given significance level.
"""

from __future__ import annotations

import math

from scipy import stats


def _clean(values: list) -> list[float]:
    """Keep only finite numeric values, dropping None/NaN/non-numeric.

    Booleans are excluded explicitly: in Python ``bool`` is a subclass of
    ``int`` but treating True/False as numbers in a normality test is
    almost always a data-typing mistake.
    """
    out: list[float] = []
    for v in values:
        if v is None or isinstance(v, bool):
            continue
        if not isinstance(v, (int, float)):
            continue
        x = float(v)
        if math.isnan(x) or math.isinf(x):
            continue
        out.append(x)
    return out


def normality_tests(values: list, alpha: float = 0.05) -> dict:
    """Run normality hypothesis tests on a numeric sample.

    Cleans the input (drops None, NaN, infinities and non-numeric values)
    and applies up to three normality tests: Jarque-Bera, D'Agostino-Pearson
    (``scipy.stats.normaltest``) and Shapiro-Wilk. For each test the
    null hypothesis is "the data comes from a normal distribution", so the
    sample is flagged ``normal = p > alpha`` (fail to reject the null).

    Shapiro-Wilk is only applied when ``3 <= n <= 5000``; outside that range
    its key is ``None``.

    Args:
        values: Sample of numeric values. None/NaN/non-numeric are discarded.
        alpha: Significance level for each test (default 0.05).

    Returns:
        For ``n < 8`` (insufficient sample) a dict
        ``{"n": n, "note": "muestra insuficiente", "is_normal": None}``.

        Otherwise a dict with::

            {
              "n": int,
              "jarque_bera": {"stat": float, "p": float, "normal": bool},
              "dagostino":   {"stat": float, "p": float, "normal": bool},
              "shapiro":     {"stat": float, "p": float, "normal": bool} | None,
              "is_normal":   bool,  # consensus of applicable tests
            }

        ``is_normal`` is the consensus (all applicable tests agree the data
        is normal) over the tests that were actually run.
    """
    clean = _clean(values)
    n = len(clean)

    if n < 8:
        return {"n": n, "note": "muestra insuficiente", "is_normal": None}

    jb_stat, jb_p = stats.jarque_bera(clean)
    jb = {
        "stat": float(jb_stat),
        "p": float(jb_p),
        "normal": bool(jb_p > alpha),
    }

    da_stat, da_p = stats.normaltest(clean)
    dagostino = {
        "stat": float(da_stat),
        "p": float(da_p),
        "normal": bool(da_p > alpha),
    }

    shapiro: dict | None = None
    if 3 <= n <= 5000:
        sw_stat, sw_p = stats.shapiro(clean)
        shapiro = {
            "stat": float(sw_stat),
            "p": float(sw_p),
            "normal": bool(sw_p > alpha),
        }

    applicable = [jb, dagostino] + ([shapiro] if shapiro is not None else [])
    is_normal = all(t["normal"] for t in applicable)

    return {
        "n": n,
        "jarque_bera": jb,
        "dagostino": dagostino,
        "shapiro": shapiro,
        "is_normal": bool(is_normal),
    }