Files
fn_registry/python/functions/datascience/render_eda_markdown_test.py
T
egutierrez 763e06c127 feat(browser): auto-commit con 178 cambios
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-20 18:22:23 +02:00

167 lines
5.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Tests para render_eda_markdown."""
import sys
import os
sys.path.insert(0, os.path.dirname(__file__))
from render_eda_markdown import render_eda_markdown
def _sample_profile(correlations=None, llm=None):
return {
"table": "sales",
"source": "data/sales.csv",
"profiled_at": "2026-06-20T10:00:00Z",
"n_rows": 1000,
"n_cols": 2,
"size_bytes": 40960,
"duplicate_rows": 3,
"duplicate_pct": 0.003,
"constant_cols": [],
"all_null_cols": [],
"null_cell_pct": 0.015,
"type_breakdown": {"numeric": 1, "categorical": 1},
"quality_score": 0.92,
"key_candidates": ["order_id"],
"correlations": correlations,
"llm": llm,
"models": None,
"columns": [
{
"name": "price",
"physical_type": "DOUBLE",
"inferred_type": "float",
"semantic_type": "currency",
"count": 1000,
"n_rows": 1000,
"null_count": 0,
"null_pct": 0.0,
"distinct_count": 857,
"unique_pct": 0.857,
"flags": [],
"quality_score": 0.95,
"numeric": {
"min": 1.0,
"max": 99.0,
"mean": 42.5,
"median": 40.0,
"std": 12.3,
"p25": 30.0,
"p75": 55.0,
"p95": 80.0,
"p99": 95.0,
"skew": 0.4,
"kurtosis": 2.1,
"outlier_pct": 0.012,
"distribution_type": "right-skewed",
"histogram": [
{"lo": 0, "hi": 25, "count": 100},
{"lo": 25, "hi": 50, "count": 500},
{"lo": 50, "hi": 75, "count": 300},
{"lo": 75, "hi": 100, "count": 50},
],
},
"categorical": None,
"datetime": None,
},
{
"name": "region",
"physical_type": "VARCHAR",
"inferred_type": "string",
"semantic_type": "category",
"count": 1000,
"n_rows": 1000,
"null_count": 10,
"null_pct": 0.01,
"distinct_count": 3,
"unique_pct": 0.003,
"flags": ["low_cardinality"],
"quality_score": 0.80,
"numeric": None,
"categorical": {
"top": [
{"value": "north", "count": 500, "pct": 0.5},
{"value": "south", "count": 300, "pct": 0.3},
{"value": "east", "count": 200, "pct": 0.2},
],
"mode": "north",
"mode_pct": 0.5,
"n_distinct": 3,
"entropy": 1.48,
},
"datetime": None,
},
],
}
def test_contains_title_and_sections():
md = render_eda_markdown(_sample_profile())
assert "# EDA — sales" in md
assert "## Overview" in md
assert "## Columnas" in md
assert "## Numéricas" in md
assert "## Categóricas" in md
def test_contains_column_names():
md = render_eda_markdown(_sample_profile())
assert "price" in md
assert "region" in md
def test_contains_sparkline():
md = render_eda_markdown(_sample_profile())
# Histogram sparkline must render with block characters.
assert "histogram: `" in md
assert any(block in md for block in "▁▂▃▄▅▆▇█")
def test_pct_fields_scaled_by_100():
# *_pct fields are fractions 0-1; the render must show them ×100.
md = render_eda_markdown(_sample_profile())
# unique_pct=0.857 -> "85.70%" (must NOT show the raw "0.86%").
assert "85.7" in md
assert "0.86%" not in md
# categorical top pct=0.5 -> "50.0%".
assert "50.0" in md
# outlier_pct=0.012 -> "1.20%".
assert "1.20%" in md
def test_pct_handles_none_as_blank():
profile = {
"table": "t",
"columns": [
{
"name": "c",
"inferred_type": "float",
"null_pct": None,
"unique_pct": None,
"quality_score": 0.5,
}
],
}
# None pct renders as empty cell, never "None%" or a crash.
md = render_eda_markdown(profile)
assert "None%" not in md
def test_tolerates_none_correlations_and_llm():
md = render_eda_markdown(_sample_profile(correlations=None, llm=None))
assert "## Correlaciones" not in md
assert "## Análisis LLM" not in md
# Still produced the main body.
assert "# EDA — sales" in md
def test_tolerates_empty_profile():
md = render_eda_markdown({})
assert "# EDA — (unnamed)" in md
def test_tolerates_none_profile():
md = render_eda_markdown(None)
assert "# EDA — (unnamed)" in md