fn_registry/python/functions/datascience/compute_text_readability_test.py

"""Tests para compute_text_readability."""

import sys
import os
import builtins

sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))

from datascience.compute_text_readability import compute_text_readability


EXPECTED_KEYS = {"available", "n_scored", "flesch"}
FLESCH_KEYS = {"mean", "p50", "min", "max"}


def test_prosa_ingles():
    """Varios textos en prosa inglesa: available True, n_scored>0, mean no None."""
    texts = [
        "The cat sat on the mat. It was a warm and sunny day in the park.",
        "She sells sea shells by the sea shore. The shells she sells are surely sea shells.",
        "Reading is a wonderful habit. Books open doors to new worlds and ideas.",
        "He ran quickly to the store to buy some fresh bread and a bottle of milk.",
    ]
    out = compute_text_readability(texts)

    assert set(out.keys()) == EXPECTED_KEYS
    assert out["available"] is True
    assert out["n_scored"] > 0
    assert set(out["flesch"].keys()) == FLESCH_KEYS
    assert out["flesch"]["mean"] is not None
    assert out["flesch"]["p50"] is not None
    assert out["flesch"]["min"] is not None
    assert out["flesch"]["max"] is not None
    # min <= mean/p50 <= max coherente.
    assert out["flesch"]["min"] <= out["flesch"]["max"]


def test_vacio():
    """Corpus vacío con textstat presente: available True, n_scored 0, flesch None."""
    out = compute_text_readability([])

    assert set(out.keys()) == EXPECTED_KEYS
    assert out["available"] is True
    assert out["n_scored"] == 0
    assert out["flesch"]["mean"] is None
    assert out["flesch"]["p50"] is None
    assert out["flesch"]["min"] is None
    assert out["flesch"]["max"] is None

    # Elementos no-str / vacíos también se descartan -> n_scored 0.
    out2 = compute_text_readability([None, "", "   ", 123])
    assert out2["available"] is True
    assert out2["n_scored"] == 0


def test_degradacion(monkeypatch):
    """Sin textstat (ImportError forzado): degrada a available False sin lanzar."""
    import datascience.compute_text_readability as m

    real = builtins.__import__

    def fake(name, *a, **k):
        if name == "textstat" or name.startswith("textstat."):
            raise ImportError("simulado")
        return real(name, *a, **k)

    monkeypatch.setattr(builtins, "__import__", fake)
    out = m.compute_text_readability(["The cat sat on the mat. It was happy and warm."])
    assert out["available"] is False
    assert out["n_scored"] == 0
    assert out["flesch"]["mean"] is None
    assert out["flesch"]["p50"] is None
    assert out["flesch"]["min"] is None
    assert out["flesch"]["max"] is None