Files
fn_registry/python/functions/datascience/mutual_info_columns_test.py
T
egutierrez 763e06c127 feat(browser): auto-commit con 178 cambios
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-20 18:22:23 +02:00

69 lines
2.0 KiB
Python

"""Tests para mutual_info_columns."""
import math
import os
import random
import sys
sys.path.insert(0, os.path.dirname(__file__))
from mutual_info_columns import mutual_info_columns
def test_identical_categoricals_nmi_near_one():
a = ["x", "y", "z", "x", "y", "z", "x", "y", "z", "w", "w", "w"]
b = list(a) # b == a -> dependencia total
nmi = mutual_info_columns(a, b)
assert nmi > 0.99
assert nmi <= 1.0
def test_nonlinear_numeric_relation_has_positive_nmi():
# b = sign(sin(a)) -> relacion NO lineal fuerte (Pearson ~ 0).
rng = random.Random(11)
a = [rng.uniform(0.0, 6.0 * math.pi) for _ in range(2000)]
b = [1.0 if math.sin(x) >= 0 else -1.0 for x in a]
nmi = mutual_info_columns(a, b, a_numeric=True, b_numeric=False, bins=20)
assert nmi > 0.1
def test_independent_columns_near_zero():
rng = random.Random(42)
a = [rng.gauss(0.0, 1.0) for _ in range(3000)]
b = [rng.gauss(0.0, 1.0) for _ in range(3000)]
nmi = mutual_info_columns(a, b, a_numeric=True, b_numeric=True, bins=10)
assert 0.0 <= nmi < 0.1
def test_fewer_than_two_pairs_returns_zero():
assert mutual_info_columns([], []) == 0.0
assert mutual_info_columns(["a"], ["b"]) == 0.0
def test_none_pairs_are_discarded():
a = ["x", None, "y", "x", None, "y", "x", "y"]
b = ["x", "z", "y", "x", "z", "y", None, "y"]
nmi = mutual_info_columns(a, b)
assert isinstance(nmi, float)
assert 0.0 <= nmi <= 1.0
def test_constant_column_returns_zero_when_normalized():
a = ["c"] * 20 # entropia 0
b = ["x", "y"] * 10
assert mutual_info_columns(a, b) == 0.0
def test_unnormalized_returns_mi_in_nats():
a = ["x", "y", "z", "x", "y", "z", "x", "y", "z"]
b = list(a)
mi = mutual_info_columns(a, b, normalized=False)
# MI cruda de columnas identicas = entropia ~ log(3) nats.
assert mi > 0.9
assert mi == mi # no NaN
def test_always_returns_float_never_none():
assert isinstance(mutual_info_columns(["a", "b"], ["a", "b"]), float)
assert isinstance(mutual_info_columns([None], [None]), float)