"""Tests para render_eda_markdown.""" import sys import os sys.path.insert(0, os.path.dirname(__file__)) from render_eda_markdown import render_eda_markdown def _sample_profile(correlations=None, llm=None): return { "table": "sales", "source": "data/sales.csv", "profiled_at": "2026-06-20T10:00:00Z", "n_rows": 1000, "n_cols": 2, "size_bytes": 40960, "duplicate_rows": 3, "duplicate_pct": 0.003, "constant_cols": [], "all_null_cols": [], "null_cell_pct": 0.015, "type_breakdown": {"numeric": 1, "categorical": 1}, "quality_score": 0.92, "key_candidates": ["order_id"], "correlations": correlations, "llm": llm, "models": None, "columns": [ { "name": "price", "physical_type": "DOUBLE", "inferred_type": "float", "semantic_type": "currency", "count": 1000, "n_rows": 1000, "null_count": 0, "null_pct": 0.0, "distinct_count": 857, "unique_pct": 0.857, "flags": [], "quality_score": 0.95, "numeric": { "min": 1.0, "max": 99.0, "mean": 42.5, "median": 40.0, "std": 12.3, "p25": 30.0, "p75": 55.0, "p95": 80.0, "p99": 95.0, "skew": 0.4, "kurtosis": 2.1, "outlier_pct": 0.012, "distribution_type": "right-skewed", "histogram": [ {"lo": 0, "hi": 25, "count": 100}, {"lo": 25, "hi": 50, "count": 500}, {"lo": 50, "hi": 75, "count": 300}, {"lo": 75, "hi": 100, "count": 50}, ], }, "categorical": None, "datetime": None, }, { "name": "region", "physical_type": "VARCHAR", "inferred_type": "string", "semantic_type": "category", "count": 1000, "n_rows": 1000, "null_count": 10, "null_pct": 0.01, "distinct_count": 3, "unique_pct": 0.003, "flags": ["low_cardinality"], "quality_score": 0.80, "numeric": None, "categorical": { "top": [ {"value": "north", "count": 500, "pct": 0.5}, {"value": "south", "count": 300, "pct": 0.3}, {"value": "east", "count": 200, "pct": 0.2}, ], "mode": "north", "mode_pct": 0.5, "n_distinct": 3, "entropy": 1.48, }, "datetime": None, }, ], } def test_contains_title_and_sections(): md = render_eda_markdown(_sample_profile()) assert "# EDA — sales" in md assert "## Overview" in md assert "## Columnas" in md assert "## Numéricas" in md assert "## Categóricas" in md def test_contains_column_names(): md = render_eda_markdown(_sample_profile()) assert "price" in md assert "region" in md def test_contains_sparkline(): md = render_eda_markdown(_sample_profile()) # Histogram sparkline must render with block characters. assert "histogram: `" in md assert any(block in md for block in "▁▂▃▄▅▆▇█") def test_pct_fields_scaled_by_100(): # *_pct fields are fractions 0-1; the render must show them ×100. md = render_eda_markdown(_sample_profile()) # unique_pct=0.857 -> "85.70%" (must NOT show the raw "0.86%"). assert "85.7" in md assert "0.86%" not in md # categorical top pct=0.5 -> "50.0%". assert "50.0" in md # outlier_pct=0.012 -> "1.20%". assert "1.20%" in md def test_pct_handles_none_as_blank(): profile = { "table": "t", "columns": [ { "name": "c", "inferred_type": "float", "null_pct": None, "unique_pct": None, "quality_score": 0.5, } ], } # None pct renders as empty cell, never "None%" or a crash. md = render_eda_markdown(profile) assert "None%" not in md def test_tolerates_none_correlations_and_llm(): md = render_eda_markdown(_sample_profile(correlations=None, llm=None)) assert "## Correlaciones" not in md assert "## Análisis LLM" not in md # Still produced the main body. assert "# EDA — sales" in md def test_tolerates_empty_profile(): md = render_eda_markdown({}) assert "# EDA — (unnamed)" in md def test_tolerates_none_profile(): md = render_eda_markdown(None) assert "# EDA — (unnamed)" in md