"""Tests para mutual_info_columns.""" import math import os import random import sys sys.path.insert(0, os.path.dirname(__file__)) from mutual_info_columns import mutual_info_columns def test_identical_categoricals_nmi_near_one(): a = ["x", "y", "z", "x", "y", "z", "x", "y", "z", "w", "w", "w"] b = list(a) # b == a -> dependencia total nmi = mutual_info_columns(a, b) assert nmi > 0.99 assert nmi <= 1.0 def test_nonlinear_numeric_relation_has_positive_nmi(): # b = sign(sin(a)) -> relacion NO lineal fuerte (Pearson ~ 0). rng = random.Random(11) a = [rng.uniform(0.0, 6.0 * math.pi) for _ in range(2000)] b = [1.0 if math.sin(x) >= 0 else -1.0 for x in a] nmi = mutual_info_columns(a, b, a_numeric=True, b_numeric=False, bins=20) assert nmi > 0.1 def test_independent_columns_near_zero(): rng = random.Random(42) a = [rng.gauss(0.0, 1.0) for _ in range(3000)] b = [rng.gauss(0.0, 1.0) for _ in range(3000)] nmi = mutual_info_columns(a, b, a_numeric=True, b_numeric=True, bins=10) assert 0.0 <= nmi < 0.1 def test_fewer_than_two_pairs_returns_zero(): assert mutual_info_columns([], []) == 0.0 assert mutual_info_columns(["a"], ["b"]) == 0.0 def test_none_pairs_are_discarded(): a = ["x", None, "y", "x", None, "y", "x", "y"] b = ["x", "z", "y", "x", "z", "y", None, "y"] nmi = mutual_info_columns(a, b) assert isinstance(nmi, float) assert 0.0 <= nmi <= 1.0 def test_constant_column_returns_zero_when_normalized(): a = ["c"] * 20 # entropia 0 b = ["x", "y"] * 10 assert mutual_info_columns(a, b) == 0.0 def test_unnormalized_returns_mi_in_nats(): a = ["x", "y", "z", "x", "y", "z", "x", "y", "z"] b = list(a) mi = mutual_info_columns(a, b, normalized=False) # MI cruda de columnas identicas = entropia ~ log(3) nats. assert mi > 0.9 assert mi == mi # no NaN def test_always_returns_float_never_none(): assert isinstance(mutual_info_columns(["a", "b"], ["a", "b"]), float) assert isinstance(mutual_info_columns([None], [None]), float)