feat(eda): poblar head_rows real en el capitulo OVERVIEW (df.head)

El capitulo OVERVIEW del motor AutomaticEDA mostraba "df.head no disponible" porque ninguna fase de calculo poblaba las primeras filas crudas de la tabla. - build_eda_render_ctx: nuevo bloque que muestrea SELECT * LIMIT head_n (param nuevo head_n=10) y lo expone en ctx["head_rows"] como lista de dicts fila. Estilo dict-no-throw: si la query falla, se omite la clave. - profile_table: puebla prof["head_rows"] reusando _sample_rows (SELECT de las columnas LIMIT 10) tras recalcular el type_breakdown. Asi el report JSON sidecar tambien lo lleva y el capitulo lo recoge via profile aunque no se construya el ctx. - overview.py: la nota del DataTable de df.head ahora indica el total de filas del dataset cuando se conoce ("primeras 10 filas de 891"). Bump CHAPTER_VERSION 1.0.0 -> 1.1.0. - overview_test.py (nuevo): golden (head via profile y via ctx, render PDF + PPTX muestran las filas reales, placeholder ausente), edge (sin head_rows degrada a nota honesta sin romper, None/vacio devuelven None). Verificado end-to-end con titanic: render_automatic_eda emite PDF + PPTX con df.head visible (Braund/Cumings/Heikkinen + columnas) y sin el placeholder. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-30 17:56:24 +02:00
14 changed files with 239 additions and 1636 deletions
@@ -34,7 +34,6 @@ from .theils_u import theils_u
 from .correlation_ratio import correlation_ratio
 from .mutual_info_columns import mutual_info_columns
 from .infer_fk_containment_duckdb import infer_fk_containment_duckdb
-from .detect_declared_keys_duckdb import detect_declared_keys_duckdb
 from .build_join_graph import build_join_graph
 from .association_matrix import association_matrix
 from .correlation_matrix_duckdb import correlation_matrix_duckdb
@@ -70,10 +69,8 @@ from .build_eda_render_ctx import build_eda_render_ctx
 from .profile_datetime import profile_datetime
 from .resample_timeseries import resample_timeseries
 from .add_pdf_internal_links import add_pdf_internal_links
-from .suggest_intratable_fk_candidates import suggest_intratable_fk_candidates

 __all__ = [
-    "suggest_intratable_fk_candidates",
    "detect_time_column",
    "extract_timeseries_raw",
    "build_eda_render_ctx",
@@ -100,7 +97,6 @@ __all__ = [
    "correlation_ratio",
    "mutual_info_columns",
    "infer_fk_containment_duckdb",
-    "detect_declared_keys_duckdb",
    "build_join_graph",
    "association_matrix",
    "correlation_matrix_duckdb",
@@ -20,7 +20,7 @@ from __future__ import annotations

 from .. import model

-CHAPTER_VERSION = "1.0.0"
+CHAPTER_VERSION = "1.1.0"
 CHAPTER_ID = "overview"
 CHAPTER_TITLE = "Overview"

@@ -90,8 +90,14 @@ def _head_block(profile: dict, ctx: dict):
        if not cols:
            cols = list(head[0].keys())
        rows = [[model._safe_str(r.get(c)) for c in cols] for r in head[:10]]
-        return model.DataTable(header=cols, rows=rows,
-                               note=f"primeras {len(rows)} filas")
+        # Honest note: how many rows are shown and, when known, out of how many
+        # rows the dataset has (so "primeras 10 filas de 891" gives context).
+        note = f"primeras {len(rows)} filas"
+        n_rows = profile.get("n_rows")
+        if isinstance(n_rows, int) and not isinstance(n_rows, bool) \
+                and n_rows > len(rows):
+            note += f" de {n_rows:,}".replace(",", ".")
+        return model.DataTable(header=cols, rows=rows, note=note)
    return model.Note(
        "df.head no disponible: el TableProfile no incluye 'head_rows'. La fase "
        "de cálculo debe añadir profile['head_rows'] (lista de dicts fila) o "
@@ -0,0 +1,187 @@
+"""Tests for the OVERVIEW chapter — DoD: golden + edges + degradation.
+
+Self-contained: builds synthetic TableProfiles (no DuckDB) so the suite is fast
+and deterministic. Verifies that ``build_overview`` renders the raw first rows
+(``df.head``) as a DataTable when ``head_rows`` is present — both when it arrives
+via ``profile['head_rows']`` (populated by ``profile_table``) and via
+``ctx['head_rows']`` (populated by ``build_eda_render_ctx``) — that the chapter
+also renders the column dictionary and the numeric describe, that the full
+document renders to PDF and PPTX showing the head values, and that a profile with
+NO head data degrades to an honest note instead of raising or inventing rows.
+"""
+
+import os
+import re
+import tempfile
+
+from pypdf import PdfReader
+from pptx import Presentation
+
+from datascience.automatic_eda.model import DataTable, Note
+from datascience.automatic_eda.chapters.overview import (
+    CHAPTER_ID, CHAPTER_VERSION, build_overview,
+)
+from datascience.render_automatic_eda_pdf import render_automatic_eda_pdf
+from datascience.render_automatic_eda_pptx import render_automatic_eda_pptx
+
+
+def _columns() -> list:
+    return [
+        {"name": "PassengerId", "inferred_type": "numeric", "null_pct": 0.0,
+         "null_count": 0, "numeric": {"mean": 2.0, "median": 2.0, "min": 1.0,
+                                      "max": 3.0, "std": 1.0}},
+        {"name": "Survived", "inferred_type": "numeric", "null_pct": 0.0,
+         "null_count": 0, "numeric": {"mean": 0.33, "median": 0.0, "min": 0.0,
+                                      "max": 1.0, "std": 0.58}},
+        {"name": "Pclass", "inferred_type": "numeric", "null_pct": 0.0,
+         "null_count": 0, "numeric": {"mean": 2.33, "median": 3.0, "min": 1.0,
+                                      "max": 3.0, "std": 1.15}},
+        {"name": "Name", "inferred_type": "categorical", "null_pct": 0.0,
+         "null_count": 0, "distinct_count": 3},
+        {"name": "Sex", "inferred_type": "categorical", "null_pct": 0.0,
+         "null_count": 0, "distinct_count": 2,
+         "categorical": {"top": [{"value": "male", "count": 2},
+                                 {"value": "female", "count": 1}]}},
+    ]
+
+
+def _head_rows() -> list:
+    return [
+        {"PassengerId": 1, "Survived": 0, "Pclass": 3,
+         "Name": "Braund Owen", "Sex": "male"},
+        {"PassengerId": 2, "Survived": 1, "Pclass": 1,
+         "Name": "Cumings Florence", "Sex": "female"},
+        {"PassengerId": 3, "Survived": 1, "Pclass": 3,
+         "Name": "Heikkinen Laina", "Sex": "female"},
+    ]
+
+
+def _profile(with_head: bool = True) -> dict:
+    prof = {
+        "table": "titanic",
+        "source": "/data/titanic.csv",
+        "profiled_at": "2026-06-30T10:00:00+00:00",
+        "n_rows": 891,
+        "n_cols": 5,
+        "quality_score": 88.0,
+        "columns": _columns(),
+    }
+    if with_head:
+        prof["head_rows"] = _head_rows()
+    return prof
+
+
+def _pdf_text(path: str) -> str:
+    txt = "".join((pg.extract_text() or "") for pg in PdfReader(path).pages)
+    return re.sub(r"\s+", " ", txt)
+
+
+def _pptx_text(path: str) -> str:
+    prs = Presentation(path)
+    parts = []
+    for sl in prs.slides:
+        for sh in sl.shapes:
+            if sh.has_text_frame:
+                parts.append(sh.text_frame.text)
+            if sh.has_table:
+                tb = sh.table
+                for r in range(len(tb.rows)):
+                    for c in range(len(tb.columns)):
+                        parts.append(tb.cell(r, c).text)
+    return re.sub(r"\s+", " ", " ".join(parts))
+
+
+def _flatten(blocks):
+    """Recursively flatten Group blocks into a flat list (none here today)."""
+    out = []
+    for b in blocks:
+        inner = getattr(b, "blocks", None)
+        if inner is not None and getattr(b, "kind", None) == "group":
+            out.extend(_flatten(inner))
+        else:
+            out.append(b)
+    return out
+
+
+def test_golden_build_overview_muestra_head_desde_profile():
+    ch = build_overview(_profile(), {})
+    assert ch is not None
+    assert ch.id == CHAPTER_ID
+    assert ch.version == CHAPTER_VERSION
+    blocks = _flatten(ch.blocks)
+    # The first DataTable is df.head: its header is the column names and the
+    # real first rows are present (not a placeholder note).
+    tables = [b for b in blocks if isinstance(b, DataTable)]
+    assert tables, "overview must emit at least the df.head DataTable"
+    head_tbl = tables[0]
+    assert head_tbl.header == ["PassengerId", "Survived", "Pclass",
+                               "Name", "Sex"]
+    assert len(head_tbl.rows) == 3
+    flat = [str(c) for row in head_tbl.rows for c in row]
+    assert "Braund Owen" in flat and "Cumings Florence" in flat
+    # Honest note carries how many rows shown out of the dataset total.
+    assert head_tbl.note is not None
+    assert "primeras 3 filas" in head_tbl.note and "891" in head_tbl.note
+    # No "df.head no disponible" placeholder when head_rows is present.
+    assert not any(isinstance(b, Note) and "no disponible" in b.text
+                   for b in blocks)
+
+
+def test_golden_head_desde_ctx_tambien_funciona():
+    # head_rows absent in profile but present in ctx (build_eda_render_ctx path).
+    prof = _profile(with_head=False)
+    ch = build_overview(prof, {"head_rows": _head_rows()})
+    assert ch is not None
+    tables = [b for b in _flatten(ch.blocks) if isinstance(b, DataTable)]
+    flat = [str(c) for row in tables[0].rows for c in row]
+    assert "Braund Owen" in flat
+
+
+def test_golden_render_pdf_muestra_head():
+    with tempfile.TemporaryDirectory() as d:
+        out = os.path.join(d, "eda.pdf")
+        res = render_automatic_eda_pdf(_profile(), out, {"title": "EDA"})
+        assert res["path"] == out and os.path.exists(out)
+        assert CHAPTER_ID in [c["id"] for c in res["chapters"]]
+        txt = _pdf_text(out)
+        assert "Braund" in txt and "male" in txt
+        assert "primeras" in txt          # head note rendered.
+        assert "df.head" in txt           # chapter heading rendered.
+        assert "no disponible" not in txt  # placeholder NOT shown.
+
+
+def test_golden_render_pptx_muestra_head():
+    with tempfile.TemporaryDirectory() as d:
+        out = os.path.join(d, "eda.pptx")
+        res = render_automatic_eda_pptx(_profile(), out, {"title": "EDA"})
+        assert res["path"] == out and os.path.exists(out)
+        assert CHAPTER_ID in [c["id"] for c in res["chapters"]]
+        txt = _pptx_text(out)
+        assert "Braund" in txt and "Cumings" in txt
+
+
+def test_edge_sin_head_rows_degrada_a_nota_honesta():
+    # No head data anywhere: chapter still builds (columns exist), shows the
+    # honest placeholder note, and never invents rows nor raises.
+    prof = _profile(with_head=False)
+    ch = build_overview(prof, {})
+    assert ch is not None
+    blocks = _flatten(ch.blocks)
+    assert any(isinstance(b, Note) and "no disponible" in b.text
+               for b in blocks)
+    # The first DataTable now is the column dictionary, not df.head rows.
+    tables = [b for b in blocks if isinstance(b, DataTable)]
+    assert all("Braund" not in str(c)
+               for tbl in tables for row in tbl.rows for c in row)
+
+
+def test_edge_none_y_vacio_no_rompen():
+    # Nothing to render at all -> None, no raise.
+    assert build_overview(None, None) is None
+    assert build_overview({}, {}) is None
+    assert build_overview({"columns": []}, {}) is None
+    # Only head_rows (no columns) still yields a chapter with the head table.
+    ch = build_overview({"columns": []}, {"head_rows": _head_rows()})
+    assert ch is not None
+    tables = [b for b in _flatten(ch.blocks) if isinstance(b, DataTable)]
+    assert tables and len(tables[0].rows) == 3
@@ -1,500 +0,0 @@
-"""Key-relations chapter (RELACIONES) — the keys / join structure of the data.
-
-This chapter is the *relational* section of an AutomaticEDA report. It answers a
-single question for the table (or the whole DuckDB source it lives in): **how do
-the keys relate?** It composes, without reimplementing them, the registry's
-relation primitives and degrades honestly when a layer does not apply.
-
-It renders, in order, only the layers that have something to say:
-
-1. **Declared keys** (real schema constraints) — when the DuckDB source declares
-   PRIMARY KEY / FOREIGN KEY / UNIQUE constraints, they are read verbatim via
-   ``detect_declared_keys_duckdb`` and shown as ground truth: which column is the
-   PK, which columns are FKs and the table/column they point to.
-2. **Primary-key candidates** — the ``key_candidates`` the TableProfile already
-   carries (columns whose cardinality equals the row count, with no nulls). These
-   are *candidates*: a column that could serve as the row identifier.
-3. **Foreign-key candidates** when none are declared:
-   - **Inter-table** (the DuckDB source has several tables): real FK candidates by
-     name signal + value containment via ``infer_fk_containment_duckdb``, plus the
-     join graph (roles + a pasteable Mermaid diagram) via ``build_join_graph``.
-   - **Intra-table** (a single table): columns that *look* like a foreign key by a
-     name+cardinality heuristic (``suggest_intratable_fk_candidates``). This is a
-     **suggestion**, explicitly flagged as a heuristic, never an assertion.
-
-``build_relaciones(profile, ctx) -> Chapter | None``: returns ``None`` when there
-is nothing to say (no declared key, no key candidates, and no FK candidate —
-inter- or intra-table). Reads everything defensively (``.get``) and never raises:
-anything missing degrades to a note or is omitted; a failing registry call drops
-its layer instead of aborting the chapter.
-
-ctx keys this chapter consumes (all optional):
-    db_path, table : str — the DuckDB file and table being profiled (set by
-        ``build_eda_render_ctx``). ``db_path`` is needed to read declared
-        constraints, to list the sibling tables, and to run the containment-based
-        FK inference. Without it, only the profile-derived layers (PK candidates,
-        intra-table FK heuristic) are available.
-    glossary : model.GlossaryCollector — shared glossary; the chapter registers
-        the relational terms (PK, FK, containment, cardinality) and marks their
-        first appearance clickable.
-
-Contract: build_<id>(profile, ctx) -> Chapter | None ; CHAPTER_VERSION = "x.y.z".
-"""
-
-from __future__ import annotations
-
-from .. import model
-
-# Pure/impure registry functions (group ``eda``) this chapter composes. Imported
-# defensively (module-leaf imports, like the AGREGACION chapter) so the chapter
-# still builds — degrading the affected layer to nothing — if a function is
-# somehow unavailable / not indexed yet.
-try:
-    from datascience.detect_declared_keys_duckdb import detect_declared_keys_duckdb
-except Exception:  # noqa: BLE001 — keep the chapter importable no matter what.
-    detect_declared_keys_duckdb = None  # type: ignore[assignment]
-try:
-    from datascience.infer_fk_containment_duckdb import infer_fk_containment_duckdb
-except Exception:  # noqa: BLE001
-    infer_fk_containment_duckdb = None  # type: ignore[assignment]
-try:
-    from datascience.build_join_graph import build_join_graph
-except Exception:  # noqa: BLE001
-    build_join_graph = None  # type: ignore[assignment]
-try:
-    from datascience.suggest_intratable_fk_candidates import (
-        suggest_intratable_fk_candidates,
-    )
-except Exception:  # noqa: BLE001
-    suggest_intratable_fk_candidates = None  # type: ignore[assignment]
-try:
-    from infra import duckdb_list_tables
-except Exception:  # noqa: BLE001
-    duckdb_list_tables = None  # type: ignore[assignment]
-
-CHAPTER_VERSION = "1.0.0"
-CHAPTER_ID = "relaciones"
-CHAPTER_TITLE = "Relaciones de clave"
-
-# Cap the inter-table FK table so a wide schema does not blow up the page; the
-# rest is summarized in a closing note (no silent truncation).
-MAX_FK_ROWS = 40
-
-# --------------------------------------------------------------------------- #
-# Glossary terms this chapter explains. Registered in the shared collector and
-# marked clickable on their first appearance (contract §11.1).
-# --------------------------------------------------------------------------- #
-_TERMS = {
-    "pk": (
-        "Clave primaria (PK)",
-        "Columna (o conjunto de columnas) que identifica de forma única cada fila "
-        "de una tabla: sus valores no se repiten y no son nulos. Una tabla tiene "
-        "como mucho una clave primaria; es el ancla por la que otras tablas la "
-        "referencian.",
-    ),
-    "fk": (
-        "Clave foránea (FK)",
-        "Columna de una tabla cuyos valores apuntan a la clave primaria de otra "
-        "tabla (o de la misma), creando una relación entre ambas. Una FK suele ser "
-        "N:1: muchas filas de la tabla origen comparten el mismo valor de la tabla "
-        "destino.",
-    ),
-    "containment": (
-        "Containment / inclusión",
-        "Señal con la que se infiere una clave foránea sin que la base la declare: "
-        "la fracción de valores distintos de una columna A que también aparecen "
-        "como valores de otra columna B. Si casi todos los valores de A están "
-        "contenidos en B (inclusión ≈ 1) y B parece una clave, A → B es una FK "
-        "candidata.",
-    ),
-    "cardinalidad": (
-        "Cardinalidad",
-        "Número de valores distintos de una columna. Cardinalidad igual al número "
-        "de filas (y sin nulos) señala un identificador (candidato a clave "
-        "primaria); cardinalidad alta pero menor que el número de filas, con "
-        "valores repetidos, es típica de una clave foránea.",
-    ),
-}
-
-
-def _register_terms(ctx: dict) -> bool:
-    """Register the relational terms in the shared glossary. Returns whether the
-    in-text appearances should be marked clickable."""
-    glossary = ctx.get("glossary")
-    if not isinstance(glossary, model.GlossaryCollector):
-        return False
-    for key, (label, definition) in _TERMS.items():
-        glossary.add(key, label, definition)
-    return True
-
-
-# --------------------------------------------------------------------------- #
-# Formatting helpers (mirror the other chapters' defensive style).
-# --------------------------------------------------------------------------- #
-def _fmt_int(value) -> str:
-    if value is None:
-        return "—"
-    try:
-        return f"{int(value):,}".replace(",", ".")
-    except (TypeError, ValueError):
-        return model._safe_str(value)
-
-
-def _fmt_pct_fraction(value, decimals: int = 1) -> str:
-    """Format a 0–1 fraction as a percentage. None -> placeholder."""
-    if value is None:
-        return "—"
-    try:
-        v = float(value)
-    except (TypeError, ValueError):
-        return model._safe_str(value)
-    if v <= 1.0:
-        v *= 100.0
-    return f"{v:.{decimals}f}%"
-
-
-def _fmt_ratio(value, decimals: int = 3) -> str:
-    """Format an already-0–1 ratio (inclusion) as a plain number."""
-    if value is None:
-        return "—"
-    try:
-        return f"{float(value):.{decimals}f}".rstrip("0").rstrip(".")
-    except (TypeError, ValueError):
-        return model._safe_str(value)
-
-
-def _is_dict(v) -> bool:
-    return isinstance(v, dict)
-
-
-def _columns_by_name(profile: dict) -> dict:
-    """Index the profile columns by name for quick metric lookup."""
-    out = {}
-    for col in (profile.get("columns") or []):
-        if _is_dict(col) and col.get("name") is not None:
-            out[col.get("name")] = col
-    return out
-
-
-# --------------------------------------------------------------------------- #
-# Layer 1 — declared keys (real schema constraints).
-# --------------------------------------------------------------------------- #
-def _declared_keys(db_path: str, table: str):
-    """Read declared PK/FK/UNIQUE for the source, or None if unavailable."""
-    if not db_path or detect_declared_keys_duckdb is None:
-        return None
-    try:
-        out = detect_declared_keys_duckdb(db_path, table)
-    except Exception:  # noqa: BLE001 — dict-no-throw: treat as unavailable.
-        return None
-    if not _is_dict(out) or out.get("status") != "ok":
-        return None
-    return out
-
-
-def _declared_section(declared: dict) -> list:
-    """Blocks for the declared-keys layer, or [] if there is nothing declared."""
-    pks = [p for p in (declared.get("primary_keys") or []) if _is_dict(p)]
-    fks = [f for f in (declared.get("foreign_keys") or []) if _is_dict(f)]
-    uqs = [u for u in (declared.get("unique") or []) if _is_dict(u)]
-    if not (pks or fks or uqs):
-        return []
-
-    blocks = [
-        model.Heading(text="Claves declaradas en el esquema", level=2),
-        model.Markdown(text=(
-            "La base **declara** estas relaciones de clave como restricciones "
-            "reales del esquema (constraints). Son la verdad de referencia: no se "
-            "infieren, se leen tal cual de la definición de las tablas.")),
-    ]
-
-    if pks:
-        rows = [[model._safe_str(p.get("table")),
-                 ", ".join(model._safe_str(c) for c in (p.get("columns") or []))]
-                for p in pks]
-        blocks.append(model.DataTable(
-            header=["Tabla", "Columna(s) PK"], rows=rows,
-            title="Claves primarias declaradas",
-            note="Cada fila: la clave primaria declarada de una tabla."))
-
-    if fks:
-        rows = []
-        for f in fks:
-            src = ", ".join(model._safe_str(c) for c in (f.get("columns") or []))
-            dst = ", ".join(
-                model._safe_str(c) for c in (f.get("referenced_columns") or []))
-            rows.append([
-                model._safe_str(f.get("table")), src,
-                model._safe_str(f.get("referenced_table")), dst])
-        blocks.append(model.DataTable(
-            header=["Tabla origen", "Columna(s) FK", "→ Tabla destino",
-                    "Columna(s) destino"],
-            rows=rows, title="Claves foráneas declaradas",
-            note="Cada fila: una FK declarada — origen → destino."))
-
-    if uqs:
-        rows = [[model._safe_str(u.get("table")),
-                 ", ".join(model._safe_str(c) for c in (u.get("columns") or []))]
-                for u in uqs]
-        blocks.append(model.DataTable(
-            header=["Tabla", "Columna(s) UNIQUE"], rows=rows,
-            title="Restricciones UNIQUE declaradas"))
-
-    return blocks
-
-
-# --------------------------------------------------------------------------- #
-# Layer 2 — primary-key candidates (from the profile).
-# --------------------------------------------------------------------------- #
-def _pk_candidates_section(profile: dict, mark: bool) -> list:
-    """Blocks for the PK-candidates layer, or [] if there are none."""
-    keys = [k for k in (profile.get("key_candidates") or []) if k is not None]
-    if not keys:
-        return []
-    by_name = _columns_by_name(profile)
-
-    pk = ("[[term:pk]]**clave primaria**[[/term]]" if mark
-          else "**clave primaria**")
-    intro = (
-        f"Estas columnas son **candidatas a {pk}**: su "
-        "[[term:cardinalidad]]cardinalidad[[/term]] iguala al número de filas y no "
-        "tienen nulos, así que cada valor identifica una fila distinta. Son "
-        "candidatas, no una clave declarada: la base no las marca como tal."
-        if mark else
-        "Estas columnas son **candidatas a clave primaria**: su cardinalidad "
-        "iguala al número de filas y no tienen nulos, así que cada valor "
-        "identifica una fila distinta.")
-
-    rows = []
-    for name in keys:
-        col = by_name.get(name) or {}
-        rows.append([
-            model._safe_str(name),
-            _fmt_int(col.get("distinct_count")),
-            _fmt_pct_fraction(col.get("unique_pct")),
-            model._safe_str(col.get("inferred_type") or col.get("physical_type") or "—"),
-        ])
-    return [
-        model.Heading(text="Candidatos a clave primaria", level=2),
-        model.Markdown(text=intro),
-        model.DataTable(
-            header=["Columna", "Valores distintos", "% único", "Tipo"],
-            rows=rows, title="Candidatas a clave primaria",
-            note=f"{_fmt_int(profile.get('n_rows'))} filas en total como referencia."),
-    ]
-
-
-# --------------------------------------------------------------------------- #
-# Layer 3a — inter-table FK candidates (containment) + join graph.
-# --------------------------------------------------------------------------- #
-def _list_source_tables(db_path: str) -> list:
-    """List the tables in the DuckDB source, or [] if it can't be listed."""
-    if not db_path or duckdb_list_tables is None:
-        return []
-    try:
-        out = duckdb_list_tables(db_path)
-    except Exception:  # noqa: BLE001
-        return []
-    if not _is_dict(out) or out.get("status") != "ok":
-        return []
-    return [t for t in (out.get("tables") or []) if isinstance(t, str)]
-
-
-def _inter_table_section(db_path: str, tables: list, mark: bool) -> list:
-    """Blocks for the inter-table FK layer (containment + join graph), or []."""
-    if infer_fk_containment_duckdb is None or len(tables) < 2:
-        return []
-    try:
-        fk = infer_fk_containment_duckdb(db_path, tables=tables)
-    except Exception:  # noqa: BLE001
-        return []
-    if not _is_dict(fk) or fk.get("status") != "ok":
-        return []
-    candidates = [c for c in (fk.get("fk_candidates") or []) if _is_dict(c)]
-    if not candidates:
-        return []
-
-    containment = ("[[term:containment]]containment (inclusión de valores)[[/term]]"
-                   if mark else "containment (inclusión de valores)")
-    fk_term = "[[term:fk]]**claves foráneas**[[/term]]" if mark else "**claves foráneas**"
-    blocks = [
-        model.Heading(text="Claves foráneas candidatas (inter-tabla)", level=2),
-        model.Markdown(text=(
-            f"La fuente tiene varias tablas. Estas {fk_term} candidatas se infieren "
-            f"por señal de nombre y por {containment}: una columna de una tabla cuyos "
-            "valores están contenidos en la clave de otra. No están declaradas por "
-            "la base; son la relación más probable según los datos.")),
-    ]
-
-    shown = candidates[:MAX_FK_ROWS]
-    rows = []
-    for c in shown:
-        rows.append([
-            f"{model._safe_str(c.get('from_table'))}.{model._safe_str(c.get('from_col'))}",
-            f"{model._safe_str(c.get('to_table'))}.{model._safe_str(c.get('to_col'))}",
-            _fmt_ratio(c.get("inclusion")),
-            model._safe_str(c.get("cardinality") or "—"),
-            "sí" if c.get("name_match") else "no",
-        ])
-    note = "Ordenadas por señal de nombre e inclusión."
-    if len(candidates) > len(shown):
-        note += f" Se muestran {len(shown)} de {len(candidates)} candidatas."
-    blocks.append(model.DataTable(
-        header=["Origen", "→ Destino", "Inclusión", "Cardinalidad", "Coincide nombre"],
-        rows=rows, title="FK candidatas por containment", note=note))
-
-    # Join graph: node roles + a pasteable Mermaid diagram, kept together.
-    if build_join_graph is not None:
-        try:
-            graph = build_join_graph(candidates, tables=tables)
-        except Exception:  # noqa: BLE001
-            graph = None
-        if _is_dict(graph):
-            graph_blocks = [model.Heading(text="Grafo de relaciones", level=3)]
-            nodes = [n for n in (graph.get("nodes") or []) if _is_dict(n)]
-            if nodes:
-                node_rows = [[
-                    model._safe_str(n.get("table")),
-                    model._safe_str(n.get("role") or "—"),
-                    _fmt_int(n.get("out_degree")),
-                    _fmt_int(n.get("in_degree")),
-                ] for n in nodes]
-                graph_blocks.append(model.DataTable(
-                    header=["Tabla", "Rol", "FK salientes", "FK entrantes"],
-                    rows=node_rows, title="Tablas y su rol en el grafo",
-                    note="Rol: fact (apunta a otras), dimension (referenciada), "
-                         "bridge (ambas), standalone (aislada)."))
-            hubs = [h for h in (graph.get("hubs") or []) if h]
-            if hubs:
-                graph_blocks.append(model.Markdown(text=(
-                    "Tablas con más relaciones salientes (candidatas a tabla de "
-                    "hechos): " + ", ".join(model._safe_str(h) for h in hubs) + ".")))
-            mermaid = model._safe_str(graph.get("mermaid")).strip()
-            if mermaid:
-                graph_blocks.append(model.Markdown(text=(
-                    "Diagrama de las relaciones (pegable en un bloque Mermaid):")))
-                graph_blocks.append(model.Markdown(
-                    text="```mermaid\n" + mermaid + "\n```"))
-            if len(graph_blocks) > 1:
-                blocks.append(model.Group(blocks=graph_blocks,
-                                          title="Grafo de relaciones"))
-
-    skipped = [s for s in (fk.get("skipped") or []) if s]
-    if skipped:
-        blocks.append(model.Note(
-            "Algunos pares se omitieron por tamaño: "
-            + "; ".join(model._safe_str(s) for s in skipped) + "."))
-    return blocks
-
-
-# --------------------------------------------------------------------------- #
-# Layer 3b — intra-table FK candidates (name+cardinality heuristic).
-# --------------------------------------------------------------------------- #
-def _intra_table_section(profile: dict, mark: bool) -> list:
-    """Blocks for the intra-table FK heuristic layer, or [] if no candidates."""
-    if suggest_intratable_fk_candidates is None:
-        return []
-    try:
-        cands = suggest_intratable_fk_candidates(profile)
-    except Exception:  # noqa: BLE001
-        return []
-    cands = [c for c in (cands or []) if _is_dict(c)]
-    if not cands:
-        return []
-
-    fk_term = "[[term:fk]]**claves foráneas**[[/term]]" if mark else "**claves foráneas**"
-    blocks = [
-        model.Heading(text="Posibles claves foráneas (heurística de nombre)", level=2),
-        model.Markdown(text=(
-            f"No hay otras tablas que referenciar, pero algunas columnas **parecen** "
-            f"{fk_term} por su nombre (terminan en «id») y su cardinalidad (muchos "
-            "valores repetidos, N:1). Es una **sugerencia heurística**, no una "
-            "afirmación: el nombre de la tabla destino es una conjetura y no se "
-            "comprueba inclusión de valores contra ninguna tabla real.")),
-    ]
-    rows = []
-    for c in cands:
-        rows.append([
-            model._safe_str(c.get("column")),
-            model._safe_str(c.get("ref_table_guess") or "—"),
-            _fmt_int(c.get("distinct_count")),
-            _fmt_pct_fraction(c.get("unique_pct")),
-            model._safe_str(c.get("inferred_type") or c.get("physical_type") or "—"),
-            model._safe_str(c.get("reason") or ""),
-        ])
-    blocks.append(model.DataTable(
-        header=["Columna", "Posible tabla", "Valores distintos", "% único",
-                "Tipo", "Motivo"],
-        rows=rows, title="Posibles FK por nombre y cardinalidad",
-        note="Heurística: posibles falsos positivos/negativos. No confirma containment."))
-    blocks.append(model.Note(
-        "Estas sugerencias se basan solo en el nombre y la cardinalidad. Para "
-        "confirmarlas haría falta la tabla destino y comprobar la inclusión de "
-        "valores (containment)."))
-    return blocks
-
-
-# --------------------------------------------------------------------------- #
-# Entry point.
-# --------------------------------------------------------------------------- #
-def _intro_blocks(mark: bool) -> list:
-    pk = "[[term:pk]]clave primaria[[/term]]" if mark else "clave primaria"
-    fk = "[[term:fk]]clave foránea[[/term]]" if mark else "clave foránea"
-    text = (
-        f"Este capítulo analiza las **relaciones de clave** de la tabla: qué columna "
-        f"identifica cada fila (la {pk}) y qué columnas referencian a otra tabla (las "
-        f"{fk}). Cuando la base las **declara** como restricciones del esquema, se "
-        "muestran tal cual; cuando no, se proponen las más probables a partir de los "
-        "datos —por inclusión de valores entre tablas (containment) o, en una sola "
-        "tabla, por una heurística de nombre y cardinalidad— siempre marcadas como "
-        "candidatas, nunca como hechos.")
-    return [model.Heading(text=CHAPTER_TITLE, level=1), model.Markdown(text=text)]
-
-
-def build_relaciones(profile: dict, ctx: dict):
-    """Build the RELACIONES Chapter, or None if there is nothing to say.
-
-    Args:
-        profile: the ``eda`` group TableProfile dict (may be None/empty).
-        ctx: presentation context. Consumes ``db_path`` + ``table`` (to read
-            declared constraints, list sibling tables and run the containment FK
-            inference) and ``glossary`` (to register the relational terms).
-
-    Returns:
-        A ``model.Chapter`` with the applicable relation layers; or ``None`` when
-        the dataset has no declared key, no key candidates and no FK candidate
-        (neither inter- nor intra-table).
-    """
-    if not isinstance(profile, dict):
-        profile = {}
-    ctx = ctx if isinstance(ctx, dict) else {}
-    db_path = ctx.get("db_path")
-    table = ctx.get("table")
-
-    mark = _register_terms(ctx)
-
-    # Build each layer; the chapter is the concatenation of the non-empty ones.
-    declared = _declared_keys(db_path, table)
-    declared_blocks = _declared_section(declared) if declared else []
-    declared_has_fk = bool(declared and declared.get("foreign_keys"))
-
-    pk_blocks = _pk_candidates_section(profile, mark)
-
-    tables = _list_source_tables(db_path)
-    inter_blocks = _inter_table_section(db_path, tables, mark)
-
-    # The intra-table heuristic only makes sense when no real FK is available for
-    # this table — neither declared nor inferred inter-table. Otherwise the real
-    # relations already answer the question and the heuristic is just noise.
-    if declared_has_fk or inter_blocks:
-        intra_blocks = []
-    else:
-        intra_blocks = _intra_table_section(profile, mark)
-
-    body = declared_blocks + pk_blocks + inter_blocks + intra_blocks
-    if not body:
-        return None  # chapter does not apply: nothing to say about relations.
-
-    blocks = _intro_blocks(mark) + body
-    return model.Chapter(id=CHAPTER_ID, title=CHAPTER_TITLE,
-                         version=CHAPTER_VERSION, blocks=blocks)
@@ -1,273 +0,0 @@
-"""Tests for the RELACIONES chapter — DoD: golden(s) + edges + no-cut render.
-
-Two goldens covering the two real paths of the chapter:
-
- **Intra-table** (a single table, no db source for relations): the chapter shows
-  the primary-key candidates from the profile and the heuristic foreign-key
-  suggestions (name + cardinality), explicitly flagged as a heuristic. Renders to
-  PDF and PPTX with nothing cut.
- **Inter-table** (a real DuckDB file with two related tables, customers/orders,
-  with a declared FK): the chapter shows the declared keys, the containment-based
-  FK candidates and the join graph (roles + a pasteable Mermaid diagram).
-
-Edges: a profile with no key candidate and no FK-looking column returns None;
-``None`` / ``{}`` profiles do not raise. The chapter registers its glossary terms.
-
-Layers that depend on the sibling registry functions delegated alongside this
-chapter (``detect_declared_keys_duckdb``, ``suggest_intratable_fk_candidates``)
-are asserted **conditionally on the function being importable**, so the chapter's
-honest-degradation contract is what is tested, never a hard dependency on import
-timing.
-"""
-
-import os
-import tempfile
-
-import duckdb
-from pptx import Presentation
-from pypdf import PdfReader
-
-from datascience.automatic_eda.chapters.relaciones import build_relaciones
-from datascience.automatic_eda.model import Chapter, Group, GlossaryCollector
-from datascience.render_automatic_eda_pdf import render_automatic_eda_pdf
-from datascience.render_automatic_eda_pptx import render_automatic_eda_pptx
-
-# The optional sibling functions: their layers are asserted only when present.
-try:
-    from datascience.detect_declared_keys_duckdb import detect_declared_keys_duckdb
-except Exception:  # noqa: BLE001
-    detect_declared_keys_duckdb = None
-try:
-    from datascience.suggest_intratable_fk_candidates import (
-        suggest_intratable_fk_candidates,
-    )
-except Exception:  # noqa: BLE001
-    suggest_intratable_fk_candidates = None
-
-
-# --------------------------------------------------------------------------- #
-# Helpers.
-# --------------------------------------------------------------------------- #
-def _flatten(blocks) -> list:
-    """Flatten Group blocks so a test can inspect every leaf block."""
-    out = []
-    for b in blocks:
-        if isinstance(b, Group):
-            out.extend(_flatten(b.blocks))
-        else:
-            out.append(b)
-    return out
-
-
-def _text_of(chapter: Chapter) -> str:
-    """Collect all visible text of a chapter's blocks into one string."""
-    parts = []
-    for b in _flatten(chapter.blocks):
-        for attr in ("text", "title", "note"):
-            v = getattr(b, attr, None)
-            if isinstance(v, str):
-                parts.append(v)
-        header = getattr(b, "header", None)
-        if isinstance(header, list):
-            parts.extend(str(c) for c in header)
-        rows = getattr(b, "rows", None)
-        if isinstance(rows, list):
-            for r in rows:
-                if isinstance(r, (list, tuple)):
-                    parts.extend(str(c) for c in r)
-                else:
-                    parts.append(str(r))
-    return "\n".join(parts)
-
-
-def _render_both(chapter: Chapter, tag: str):
-    """Render the chapter to PDF and PPTX; return (pdf_text, n_slides)."""
-    tmp = tempfile.mkdtemp(prefix=f"relaciones_{tag}_")
-    pdf_path = os.path.join(tmp, "out.pdf")
-    pptx_path = os.path.join(tmp, "out.pptx")
-    meta = {"title": f"EDA — {tag}"}
-    render_automatic_eda_pdf([chapter], pdf_path, meta)
-    render_automatic_eda_pptx([chapter], pptx_path, meta)
-    assert os.path.exists(pdf_path) and os.path.getsize(pdf_path) > 0
-    assert os.path.exists(pptx_path) and os.path.getsize(pptx_path) > 0
-    text = "".join(p.extract_text() or "" for p in PdfReader(pdf_path).pages)
-    n_slides = len(Presentation(pptx_path).slides)
-    return text, n_slides
-
-
-# --------------------------------------------------------------------------- #
-# Fixtures.
-# --------------------------------------------------------------------------- #
-def _titanic_profile() -> dict:
-    """A single-table profile: a PK candidate + a column that looks like a FK."""
-    return {
-        "table": "titanic",
-        "source": "/data/titanic.csv",
-        "n_rows": 891,
-        "n_cols": 4,
-        "key_candidates": ["PassengerId"],
-        "columns": [
-            {"name": "PassengerId", "inferred_type": "numeric",
-             "physical_type": "BIGINT", "distinct_count": 891,
-             "unique_pct": 1.0, "flags": ["possible_id"]},
-            {"name": "ticket_id", "inferred_type": "numeric",
-             "physical_type": "BIGINT", "distinct_count": 681,
-             "unique_pct": 0.76, "flags": []},
-            {"name": "fare", "inferred_type": "numeric",
-             "physical_type": "DOUBLE", "distinct_count": 248,
-             "unique_pct": 0.28, "flags": []},
-            {"name": "sex", "inferred_type": "categorical",
-             "physical_type": "VARCHAR", "distinct_count": 2,
-             "unique_pct": 0.002, "flags": []},
-        ],
-    }
-
-
-def _make_relational_db(path: str) -> None:
-    """Create a small DuckDB with customers(id) <- orders(customer_id), real FK."""
-    con = duckdb.connect(path)
-    con.execute("CREATE TABLE customers(id INTEGER PRIMARY KEY, name TEXT)")
-    con.execute(
-        "CREATE TABLE orders(id INTEGER PRIMARY KEY, "
-        "customer_id INTEGER REFERENCES customers(id), amount DOUBLE)")
-    con.execute("INSERT INTO customers VALUES "
-                "(1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e')")
-    con.execute("INSERT INTO orders VALUES "
-                "(1,1,10.0),(2,1,20.0),(3,2,30.0),(4,3,40.0),"
-                "(5,3,50.0),(6,4,60.0),(7,5,70.0),(8,2,80.0)")
-    con.close()
-
-
-def _orders_profile() -> dict:
-    """A profile for the `orders` table of the relational DB."""
-    return {
-        "table": "orders",
-        "source": "orders",
-        "n_rows": 8,
-        "n_cols": 3,
-        "key_candidates": ["id"],
-        "columns": [
-            {"name": "id", "inferred_type": "numeric", "physical_type": "INTEGER",
-             "distinct_count": 8, "unique_pct": 1.0, "flags": ["possible_id"]},
-            {"name": "customer_id", "inferred_type": "numeric",
-             "physical_type": "INTEGER", "distinct_count": 5, "unique_pct": 0.625,
-             "flags": []},
-            {"name": "amount", "inferred_type": "numeric", "physical_type": "DOUBLE",
-             "distinct_count": 8, "unique_pct": 1.0, "flags": []},
-        ],
-    }
-
-
-# --------------------------------------------------------------------------- #
-# Golden 1 — intra-table.
-# --------------------------------------------------------------------------- #
-def test_golden_intra_table_pk_and_fk_heuristic():
-    """Single table: PK candidate shown; FK heuristic shown (if fn available);
-    renders to PDF + PPTX with nothing cut."""
-    prof = _titanic_profile()
-    glossary = GlossaryCollector()
-    # No db_path: only the profile-derived layers apply (no declared, no inter).
-    chapter = build_relaciones(prof, {"glossary": glossary})
-
-    assert isinstance(chapter, Chapter)
-    assert chapter.id == "relaciones"
-    text = _text_of(chapter)
-
-    # PK candidate is always present (comes from the profile).
-    assert "Candidatos a clave primaria" in text
-    assert "PassengerId" in text
-
-    # Glossary terms got registered.
-    for key in ("pk", "fk", "cardinalidad"):
-        assert glossary.has(key)
-
-    # FK heuristic layer: present iff the delegated function is importable.
-    if suggest_intratable_fk_candidates is not None:
-        assert "Posibles claves foráneas" in text
-        assert "ticket_id" in text
-        # The float measure and the PK itself are NOT suggested as FKs.
-        assert "Posibles FK por nombre" in text
-
-    pdf_text, n_slides = _render_both(chapter, "intra")
-    assert "PassengerId" in pdf_text
-    assert n_slides >= 1
-
-
-# --------------------------------------------------------------------------- #
-# Golden 2 — inter-table (real DuckDB).
-# --------------------------------------------------------------------------- #
-def test_golden_inter_table_containment_and_join_graph():
-    """Two related tables: declared FK (if fn available) + containment FK
-    candidate + Mermaid join graph."""
-    tmp = tempfile.mkdtemp(prefix="relaciones_db_")
-    db_path = os.path.join(tmp, "shop.duckdb")
-    _make_relational_db(db_path)
-
-    prof = _orders_profile()
-    glossary = GlossaryCollector()
-    chapter = build_relaciones(
-        prof, {"db_path": db_path, "table": "orders", "glossary": glossary})
-
-    assert isinstance(chapter, Chapter)
-    text = _text_of(chapter)
-
-    # Inter-table containment FK candidate: customer_id -> customers.id. This path
-    # uses infer_fk_containment_duckdb + build_join_graph, both already in the
-    # registry, so it must be present.
-    assert "Claves foráneas candidatas (inter-tabla)" in text
-    assert "orders.customer_id" in text
-    assert "customers.id" in text
-    # Join graph with a pasteable Mermaid diagram.
-    assert "Grafo de relaciones" in text
-    assert "mermaid" in text
-    assert "graph LR" in text
-    assert "containment" in text.lower()
-
-    # Declared-keys layer: present iff the delegated function is importable.
-    if detect_declared_keys_duckdb is not None:
-        assert "Claves declaradas en el esquema" in text
-        assert "Claves foráneas declaradas" in text
-
-    pdf_text, n_slides = _render_both(chapter, "inter")
-    assert "customer_id" in pdf_text
-    assert n_slides >= 1
-
-
-# --------------------------------------------------------------------------- #
-# Edges.
-# --------------------------------------------------------------------------- #
-def test_none_when_no_relations():
-    """No key candidates, no FK-looking columns, no db source -> None."""
-    prof = {
-        "table": "flat", "n_rows": 100, "n_cols": 2, "key_candidates": [],
-        "columns": [
-            {"name": "value", "inferred_type": "numeric", "physical_type": "DOUBLE",
-             "distinct_count": 50, "unique_pct": 0.5, "flags": []},
-            {"name": "label", "inferred_type": "categorical",
-             "physical_type": "VARCHAR", "distinct_count": 3, "unique_pct": 0.03,
-             "flags": []},
-        ],
-    }
-    assert build_relaciones(prof, {}) is None
-
-
-def test_empty_and_none_profile_do_not_raise():
-    """None / {} profile and missing ctx degrade to None without raising."""
-    assert build_relaciones(None, None) is None
-    assert build_relaciones({}, {}) is None
-    assert build_relaciones({}, {"glossary": GlossaryCollector()}) is None
-
-
-def test_pk_candidate_only_builds_chapter():
-    """A profile with only a key candidate (no FK anything, no db) still builds:
-    the relations chapter applies because there is a PK candidate to report."""
-    prof = {
-        "table": "t", "n_rows": 10, "n_cols": 1, "key_candidates": ["row_id"],
-        "columns": [
-            {"name": "row_id", "inferred_type": "numeric", "physical_type": "BIGINT",
-             "distinct_count": 10, "unique_pct": 1.0, "flags": ["possible_id"]},
-        ],
-    }
-    chapter = build_relaciones(prof, {})
-    assert isinstance(chapter, Chapter)
-    assert "Candidatos a clave primaria" in _text_of(chapter)
@@ -33,7 +33,6 @@ CHAPTER_ORDER = [
    "cat_distr",     # categorical distributions
    "calidad",       # data quality
    "correlacion",   # correlations / associations
-    "relaciones",    # key relations: declared/candidate PK + FK (inter/intra-table)
    "modelos",       # cheap models (PCA/KMeans/outliers)
    "timeseries",    # time-series analysis
    "geospatial",    # geospatial
@@ -20,6 +20,10 @@ vacia y el resto del ctx se construye igual. Ante un fallo global devuelve al
 menos ``{**base_ctx, "db_path": db_path, "table": table}``.

 Claves de DATOS que produce (las consumen los capitulos):
+  - ``head_rows``      : [ {col: valor, ...}, ... ] primeras filas CRUDAS de la
+                         tabla (``SELECT * LIMIT head_n``), una entrada por fila.
+                         La lee el capitulo OVERVIEW para mostrar df.head real en
+                         lugar del placeholder "df.head no disponible".
  - ``raw_numeric``    : {col: [float|None, ...]} muestra cruda de las columnas
                         numericas, ALINEADA POR FILA (una entrada por fila aunque
                         sea None). La leen modelos (clustering 2D en vivo) y
@@ -56,7 +60,7 @@ def _to_float(value):
        return None


-def build_eda_render_ctx(db_path, table, profile, backend="duckdb", sample=5000, base_ctx=None):
+def build_eda_render_ctx(db_path, table, profile, backend="duckdb", sample=5000, base_ctx=None, head_n=10):
    """Construye el ctx de datos crudos para los renderers de AutomaticEDA.

    Args:
@@ -77,13 +81,15 @@ def build_eda_render_ctx(db_path, table, profile, backend="duckdb", sample=5000,
        base_ctx: dict opcional con claves de presentacion ya preparadas
            (dataset_name, source_origin, ...). Se parte de una copia y NO se
            pisan sus claves; solo se añaden las de datos. Default None -> {}.
+        head_n: numero de filas crudas a muestrear para ``ctx["head_rows"]``
+            (df.head del capitulo OVERVIEW). Default 10. <=0 omite la clave.

    Returns:
        El dict ``ctx`` directamente (NO un wrapper {status,...}): se pasa tal
        cual como ``meta={"ctx": <ese dict>}`` a render_automatic_eda_pdf/pptx.
-        Nunca lanza. Claves que puede contener: raw_numeric, timeseries_raw,
-        geo_points (omitidas si no aplican o fallan), y siempre db_path + table
-        para backends validos.
+        Nunca lanza. Claves que puede contener: head_rows, raw_numeric,
+        timeseries_raw, geo_points (omitidas si no aplican o fallan), y siempre
+        db_path + table para backends validos.
    """
    # Copia de base_ctx: nunca mutamos el dict del caller. Las claves de
    # presentacion que ya traiga se conservan; las de datos se añaden encima.
@@ -117,6 +123,24 @@ def build_eda_render_ctx(db_path, table, profile, backend="duckdb", sample=5000,
        ctx["db_path"] = db_path
        ctx["table"] = table

+        # 1.5) head_rows: primeras filas CRUDAS de la tabla (SELECT * LIMIT n)
+        # para que el capitulo OVERVIEW muestre df.head real en vez del
+        # placeholder. Una sola query, dict-no-throw: si falla, se omite la
+        # clave (el capitulo degrada a su nota honesta). No se pisa una clave
+        # head_rows que ya viniera en base_ctx (presentacion).
+        if head_n and int(head_n) > 0 and "head_rows" not in ctx:
+            try:
+                hq = query_fn(f'SELECT * FROM "{table}" LIMIT {int(head_n)}')
+                if isinstance(hq, dict) and hq.get("status") == "ok":
+                    hrows = [
+                        dict(r) for r in (hq.get("rows") or [])
+                        if isinstance(r, dict)
+                    ]
+                    if hrows:
+                        ctx["head_rows"] = hrows
+            except Exception:  # noqa: BLE001 - dict-no-throw: omitir la clave
+                pass
+
        # 2) Columnas del perfil agregado (lectura defensiva).
        cols = profile.get("columns") if isinstance(profile, dict) else None
        cols = cols or []
@@ -1,107 +0,0 @@
---
-name: detect_declared_keys_duckdb
-kind: function
-lang: py
-domain: datascience
-version: "1.0.0"
-purity: impure
-signature: "def detect_declared_keys_duckdb(db_path: str, table: str = None) -> dict"
-description: "Detecta las claves DECLARADAS (constraints reales) de un schema DuckDB leyendo la table function duckdb_constraints(): extrae PRIMARY KEY, FOREIGN KEY y UNIQUE (ignora NOT NULL y CHECK) y las devuelve normalizadas con sus columnas, y para las FK con su tabla y columnas referenciadas. Con table=None procesa todas las tablas; con table='X' filtra a PK/UNIQUE de X y a FK cuyo origen es X (case-sensitive). A diferencia de infer_fk_containment_duckdb (que INFIERE FKs candidatas por containment de valores cuando el schema no las declara), esta funcion devuelve las relaciones de clave REALES del schema. Estilo dict-no-throw: nunca lanza. Parte del grupo eda (relaciones de clave)."
-tags: [eda, duckdb, datascience, relations, primary-key, foreign-key, schema, exploratory-data-analysis]
-params:
-  - name: db_path
-    desc: "Ruta al archivo DuckDB. Debe existir (lectura read-only via duckdb_query_readonly; no se crea). Un path inexistente devuelve {status:'error', ...}."
-  - name: table
-    desc: "Si se pasa, filtra los resultados a esa tabla: incluye PRIMARY KEY y UNIQUE cuya tabla sea `table`, y FOREIGN KEY cuya tabla ORIGEN sea `table` (no la referenciada). None (default) devuelve los constraints de todas las tablas. La comparacion es case-sensitive (nombres tal cual los devuelve DuckDB)."
-output: "dict dict-no-throw. En exito {status:'ok', primary_keys:[{table:str, columns:[str,...]}, ...], foreign_keys:[{table:str, columns:[str,...], referenced_table:str, referenced_columns:[str,...]}, ...], unique:[{table:str, columns:[str,...]}, ...], tables:[str,...]} donde tables es la lista ordenada de tablas (origen) que poseen al menos un constraint PK/FK/UNIQUE emitido. Solo se emiten constraints de clave: NOT NULL y CHECK se ignoran. En error {status:'error', error:str}."
-uses_functions: [duckdb_query_readonly_py_infra]
-uses_types: []
-returns: []
-returns_optional: false
-error_type: "error_go_core"
-imports: []
-tested: true
-tests: ["test_golden_detecta_pks_y_fk", "test_golden_ignora_not_null_y_check", "test_edge_filtra_por_tabla_orders", "test_edge_filtra_por_tabla_customers", "test_edge_unique_declarado", "test_edge_sin_constraints_listas_vacias", "test_error_db_inexistente_no_lanza", "test_shape_resultado"]
-test_file_path: "python/functions/datascience/detect_declared_keys_duckdb_test.py"
-file_path: "python/functions/datascience/detect_declared_keys_duckdb.py"
---
-
-## Ejemplo
-
-```python
-import sys, os, duckdb
-sys.path.insert(0, os.path.join("python", "functions"))
-from datascience import detect_declared_keys_duckdb
-
-# Base de ejemplo en /tmp: orders.customer_id -> customers.id (FK declarada)
-path = "/tmp/declared_keys_demo.duckdb"
-if os.path.exists(path):
-    os.remove(path)
-con = duckdb.connect(path)
-con.execute("CREATE TABLE customers(id INTEGER PRIMARY KEY, name TEXT)")
-con.execute(
-    "CREATE TABLE orders("
-    "  id INTEGER PRIMARY KEY,"
-    "  customer_id INTEGER REFERENCES customers(id),"
-    "  amt DOUBLE)"
-)
-con.close()
-
-res = detect_declared_keys_duckdb(path)
-if res["status"] == "ok":
-    for pk in res["primary_keys"]:
-        print(f"PK  {pk['table']}({', '.join(pk['columns'])})")
-    for fk in res["foreign_keys"]:
-        print(f"FK  {fk['table']}({', '.join(fk['columns'])}) -> "
-              f"{fk['referenced_table']}({', '.join(fk['referenced_columns'])})")
-    # PK  customers(id)
-    # PK  orders(id)
-    # FK  orders(customer_id) -> customers(id)
-else:
-    print("error:", res["error"])
-
-# Filtrar a una tabla concreta (PK/UNIQUE de orders + FK con origen orders):
-solo_orders = detect_declared_keys_duckdb(path, table="orders")
-print(solo_orders["tables"])  # ['orders']
-```
-
-## Cuando usarla
-
- Cuando exploras un esquema DuckDB y quieres mostrar las relaciones de clave REALES (PK/FK/UNIQUE) que el schema ha declarado, sin inferir nada.
- Como paso del capitulo RELACIONES del grupo `eda`: primero mira las claves declaradas con esta funcion; si el schema no declara FKs, complementa con `infer_fk_containment_duckdb` (inferencia por containment).
- Antes de documentar o migrar un esquema, para listar el contrato de integridad referencial que el motor ya conoce.
- Para validar que las constraints que esperas (esa FK que creaste con `REFERENCES`) realmente estan declaradas en la base materializada.
-
-## Gotchas
-
- **Impura**: lee de disco via la primitiva read-only `duckdb_query_readonly` (no crea ni modifica la base). El `db_path` debe existir; un path inexistente devuelve `{status:'error'}` (read_only NO crea la base).
- **Requiere `duckdb_constraints()`**: usa la table function `duckdb_constraints()`, disponible en DuckDB modernos (verificado en 1.5.2). En versiones antiguas sin esa funcion, la query falla y se devuelve `{status:'error'}`.
- **Solo claves DECLARADAS**: devuelve lo que el schema declaro con `PRIMARY KEY` / `FOREIGN KEY (... REFERENCES ...)` / `UNIQUE`. Una tabla materializada con `CREATE TABLE AS SELECT` NO lleva constraints — para esos casos no habra claves que mostrar y hay que INFERIRLAS (`infer_fk_containment_duckdb`).
- **NOT NULL y CHECK se ignoran**: `duckdb_constraints()` tambien emite filas `NOT NULL` (DuckDB genera una por cada columna PK) y `CHECK`; esta funcion las descarta y solo conserva PK/FK/UNIQUE.
- **Nombres case-sensitive**: el filtro `table='Orders'` no casa con una tabla `orders`. Se comparan los nombres tal cual los devuelve DuckDB.
- **FK atribuida al origen**: una FOREIGN KEY se atribuye a su tabla ORIGEN (el `table` de la entrada), no a la referenciada. El filtro `table='X'` trae las FK cuyo origen es X, no las que apuntan a X.
- **`tables` = tablas dueñas de constraints emitidos**: la lista `tables` contiene solo las tablas que poseen al menos un PK/FK/UNIQUE en el resultado (su campo `table`), ordenadas. No incluye tablas referenciadas que no tengan constraint propio en la salida.
- **Columnas como listas**: `constraint_column_names` y `referenced_column_names` son columnas LIST de DuckDB; en 1.5.2 llegan como listas Python. La funcion las normaliza a listas de strings con una red de seguridad por si llegaran como string.
-
-## Notas
-
-`duckdb_constraints()` devuelve una fila por constraint con los campos
-`table_name`, `constraint_type`, `constraint_column_names`, `referenced_table`,
-`referenced_column_names`. Mapeo a la salida:
-
-```text
-PRIMARY KEY -> primary_keys[]: {table, columns}
-UNIQUE      -> unique[]:       {table, columns}
-FOREIGN KEY -> foreign_keys[]: {table, columns, referenced_table, referenced_columns}
-NOT NULL    -> ignorado
-CHECK       -> ignorado
-```
-
-Para una FK, `referenced_table` y `referenced_column_names` vienen poblados; para
-PK/UNIQUE, `referenced_table` es NULL y `referenced_column_names` una lista vacia.
-
-Complementa a `infer_fk_containment_duckdb`: esta funcion devuelve las relaciones
-de clave REALES del schema (declaradas); la otra INFIERE FKs candidatas por
-containment de valores cuando el schema no las declaro. En el capitulo RELACIONES
-de AutomaticEDA se usan en orden: primero las declaradas, luego la inferencia como
-respaldo.
@@ -1,127 +0,0 @@
-"""detect_declared_keys_duckdb — lee las claves DECLARADAS de un schema DuckDB.
-
-Funcion impura: lee de disco a traves de la primitiva read-only del grupo
-`duckdb` (duckdb_query_readonly). Pertenece al grupo de capacidad `eda`
-(relaciones de clave): a diferencia de infer_fk_containment_duckdb, que INFIERE
-FOREIGN KEYs candidatas por containment de valores, esta funcion devuelve las
-constraints REALES que el schema ha declarado (PRIMARY KEY / FOREIGN KEY /
-UNIQUE) leyendo la table function `duckdb_constraints()`.
-
-Es la pieza del capitulo RELACIONES de AutomaticEDA que muestra las relaciones de
-clave reales cuando existen — frente a la inferencia, que se usa cuando el schema
-no las declaro.
-
-Estilo dict-no-throw del grupo duckdb: nunca lanza; captura cualquier error y
-devuelve {status:'error', error:str}.
-"""
-
-from infra import duckdb_query_readonly
-
-
-def _as_list(value) -> list:
-    """Normaliza el valor de una columna LIST de DuckDB a una lista de strings.
-
-    En DuckDB 1.5.2, `constraint_column_names` y `referenced_column_names` llegan
-    ya como listas Python a traves de duckdb_query_readonly. Este helper es solo
-    una red de seguridad: si por cualquier motivo llegara como string (p.ej. la
-    representacion `[id, customer_id]`), la parsea de forma defensiva.
-    """
-    if value is None:
-        return []
-    if isinstance(value, (list, tuple)):
-        return [str(v) for v in value]
-    if isinstance(value, str):
-        s = value.strip()
-        if s.startswith("[") and s.endswith("]"):
-            s = s[1:-1]
-        if not s.strip():
-            return []
-        return [
-            part.strip().strip("'\"")
-            for part in s.split(",")
-            if part.strip().strip("'\"")
-        ]
-    return [str(value)]
-
-
-def detect_declared_keys_duckdb(db_path: str, table: str = None) -> dict:
-    """Detecta las claves PRIMARY KEY / FOREIGN KEY / UNIQUE declaradas en DuckDB.
-
-    Lee la table function `duckdb_constraints()` y extrae solo las constraints de
-    clave (PRIMARY KEY, FOREIGN KEY, UNIQUE), ignorando NOT NULL y CHECK.
-
-    Args:
-        db_path: ruta al archivo DuckDB. Debe existir (lectura read-only; no se
-            crea). Un path inexistente devuelve {status:'error', ...} sin lanzar.
-        table: si se pasa, filtra los resultados a esa tabla: incluye PRIMARY KEY
-            y UNIQUE cuya tabla sea `table`, y FOREIGN KEY cuya tabla ORIGEN sea
-            `table`. None (default) devuelve los constraints de todas las tablas.
-            La comparacion de nombres es case-sensitive (tal cual los devuelve
-            DuckDB).
-
-    Returns:
-        dict dict-no-throw. En exito:
-            {status:'ok',
-             primary_keys:[{table:str, columns:[str, ...]}, ...],
-             foreign_keys:[{table:str, columns:[str, ...],
-                            referenced_table:str,
-                            referenced_columns:[str, ...]}, ...],
-             unique:[{table:str, columns:[str, ...]}, ...],
-             tables:[str, ...]}   # tablas (origen) con algun PK/FK/UNIQUE emitido
-        En error (sin lanzar): {status:'error', error:str}.
-    """
-    try:
-        sql = (
-            "SELECT table_name, constraint_type, constraint_column_names, "
-            "referenced_table, referenced_column_names FROM duckdb_constraints()"
-        )
-        res = duckdb_query_readonly(db_path, sql)
-        if res["status"] != "ok":
-            return {"status": "error", "error": res["error"]}
-
-        primary_keys = []
-        foreign_keys = []
-        unique = []
-        tables = set()
-
-        for row in res["rows"]:
-            ctype = row["constraint_type"]
-            tname = row["table_name"]
-
-            # Filtro por tabla origen: para PK/FK/UNIQUE el dueño del constraint es
-            # `table_name`. Una FK se atribuye a su tabla origen (no a la
-            # referenciada), igual que el filtro pide.
-            if table is not None and tname != table:
-                continue
-
-            cols = _as_list(row["constraint_column_names"])
-
-            if ctype == "PRIMARY KEY":
-                primary_keys.append({"table": tname, "columns": cols})
-                tables.add(tname)
-            elif ctype == "UNIQUE":
-                unique.append({"table": tname, "columns": cols})
-                tables.add(tname)
-            elif ctype == "FOREIGN KEY":
-                foreign_keys.append(
-                    {
-                        "table": tname,
-                        "columns": cols,
-                        "referenced_table": row["referenced_table"],
-                        "referenced_columns": _as_list(
-                            row["referenced_column_names"]
-                        ),
-                    }
-                )
-                tables.add(tname)
-            # NOT NULL y CHECK se ignoran: no son relaciones de clave.
-
-        return {
-            "status": "ok",
-            "primary_keys": primary_keys,
-            "foreign_keys": foreign_keys,
-            "unique": unique,
-            "tables": sorted(tables),
-        }
-    except Exception as e:  # noqa: BLE001
-        return {"status": "error", "error": str(e)}
@@ -1,167 +0,0 @@
-"""Tests para detect_declared_keys_duckdb."""
-
-import duckdb
-import pytest
-
-from .detect_declared_keys_duckdb import detect_declared_keys_duckdb
-
-
-@pytest.fixture
-def db(tmp_path):
-    """DuckDB temporal con claves declaradas.
-
-    - customers(id PRIMARY KEY, name)
-    - orders(id PRIMARY KEY, customer_id REFERENCES customers(id), amt)
-
-    Esto declara dos PRIMARY KEY (customers.id, orders.id) y una FOREIGN KEY
-    (orders.customer_id -> customers.id). DuckDB ademas genera constraints
-    NOT NULL para las columnas PK, que la funcion debe ignorar.
-    """
-    path = str(tmp_path / "keys_test.duckdb")
-    con = duckdb.connect(path)
-    con.execute("CREATE TABLE customers(id INTEGER PRIMARY KEY, name TEXT)")
-    con.execute(
-        "CREATE TABLE orders("
-        "  id INTEGER PRIMARY KEY,"
-        "  customer_id INTEGER REFERENCES customers(id),"
-        "  amt DOUBLE"
-        ")"
-    )
-    con.close()
-    return path
-
-
-def _pk_for(res, table):
-    """Devuelve la entrada primary_keys cuya tabla es `table`, o None."""
-    for pk in res["primary_keys"]:
-        if pk["table"] == table:
-            return pk
-    return None
-
-
-def test_golden_detecta_pks_y_fk(db):
-    """Golden: detecta las dos PK y la FK declaradas, con valores concretos."""
-    res = detect_declared_keys_duckdb(db)
-    assert res["status"] == "ok"
-
-    # PRIMARY KEY de customers y de orders.
-    pk_customers = _pk_for(res, "customers")
-    pk_orders = _pk_for(res, "orders")
-    assert pk_customers is not None
-    assert pk_customers["columns"] == ["id"]
-    assert pk_orders is not None
-    assert pk_orders["columns"] == ["id"]
-
-    # FOREIGN KEY orders.customer_id -> customers.id.
-    assert len(res["foreign_keys"]) == 1
-    fk = res["foreign_keys"][0]
-    assert fk["table"] == "orders"
-    assert fk["columns"] == ["customer_id"]
-    assert fk["referenced_table"] == "customers"
-    assert fk["referenced_columns"] == ["id"]
-
-    # tables incluye ambas (origen de algun constraint).
-    assert res["tables"] == ["customers", "orders"]
-
-
-def test_golden_ignora_not_null_y_check(db):
-    """NOT NULL (auto-generado por las PK) no aparece como clave."""
-    res = detect_declared_keys_duckdb(db)
-    assert res["status"] == "ok"
-    # Solo 2 PK reales (no las NOT NULL que DuckDB genera por cada columna PK).
-    assert len(res["primary_keys"]) == 2
-    # No hay UNIQUE declarado en este schema.
-    assert res["unique"] == []
-
-
-def test_edge_filtra_por_tabla_orders(db):
-    """Edge table='orders': PK de orders + su FK; NO la PK de customers."""
-    res = detect_declared_keys_duckdb(db, table="orders")
-    assert res["status"] == "ok"
-
-    # Solo la PK de orders.
-    assert len(res["primary_keys"]) == 1
-    assert res["primary_keys"][0]["table"] == "orders"
-    assert res["primary_keys"][0]["columns"] == ["id"]
-    # La PK de customers NO esta.
-    assert _pk_for(res, "customers") is None
-
-    # La FK de orders si esta (origen = orders).
-    assert len(res["foreign_keys"]) == 1
-    assert res["foreign_keys"][0]["table"] == "orders"
-    assert res["foreign_keys"][0]["referenced_table"] == "customers"
-
-    # tables solo contiene orders (la dueña de los constraints emitidos).
-    assert res["tables"] == ["orders"]
-
-
-def test_edge_filtra_por_tabla_customers(db):
-    """Edge table='customers': solo su PK; ninguna FK (orders queda fuera)."""
-    res = detect_declared_keys_duckdb(db, table="customers")
-    assert res["status"] == "ok"
-    assert len(res["primary_keys"]) == 1
-    assert res["primary_keys"][0]["table"] == "customers"
-    assert res["foreign_keys"] == []
-    assert res["tables"] == ["customers"]
-
-
-def test_edge_unique_declarado(tmp_path):
-    """Edge: una constraint UNIQUE declarada aparece en `unique`."""
-    path = str(tmp_path / "unique_test.duckdb")
-    con = duckdb.connect(path)
-    con.execute("CREATE TABLE products(sku INTEGER UNIQUE, name TEXT)")
-    con.close()
-
-    res = detect_declared_keys_duckdb(path)
-    assert res["status"] == "ok"
-    assert len(res["unique"]) == 1
-    assert res["unique"][0]["table"] == "products"
-    assert res["unique"][0]["columns"] == ["sku"]
-    assert res["primary_keys"] == []
-    assert res["foreign_keys"] == []
-    assert res["tables"] == ["products"]
-
-
-def test_edge_sin_constraints_listas_vacias(tmp_path):
-    """Edge: tabla sin PK/FK/UNIQUE -> todas las listas vacias, status ok."""
-    path = str(tmp_path / "no_keys.duckdb")
-    con = duckdb.connect(path)
-    con.execute("CREATE TABLE log(a INTEGER, b INTEGER)")
-    con.close()
-
-    res = detect_declared_keys_duckdb(path)
-    assert res["status"] == "ok"
-    assert res["primary_keys"] == []
-    assert res["foreign_keys"] == []
-    assert res["unique"] == []
-    assert res["tables"] == []
-
-
-def test_error_db_inexistente_no_lanza(tmp_path):
-    """Error: db_path inexistente -> status error, sin lanzar excepcion."""
-    path = str(tmp_path / "does_not_exist.duckdb")
-    res = detect_declared_keys_duckdb(path)
-    assert res["status"] == "error"
-    assert isinstance(res["error"], str)
-    assert res["error"] != ""
-
-
-def test_shape_resultado(db):
-    """El retorno tiene exactamente las claves esperadas."""
-    res = detect_declared_keys_duckdb(db)
-    assert set(res.keys()) == {
-        "status",
-        "primary_keys",
-        "foreign_keys",
-        "unique",
-        "tables",
-    }
-    for pk in res["primary_keys"]:
-        assert set(pk.keys()) == {"table", "columns"}
-    for fk in res["foreign_keys"]:
-        assert set(fk.keys()) == {
-            "table",
-            "columns",
-            "referenced_table",
-            "referenced_columns",
-        }
@@ -1,91 +0,0 @@
---
-name: suggest_intratable_fk_candidates
-kind: function
-lang: py
-domain: datascience
-version: "1.0.0"
-purity: pure
-signature: "def suggest_intratable_fk_candidates(profile: dict, max_candidates: int = 20) -> list"
-description: "Sobre el TableProfile de UNA tabla (el dict de profile_table), sugiere por heuristica de nombre + cardinalidad que columnas PARECEN una clave foranea hacia otra tabla, cuando no hay relaciones inter-tabla que medir (una sola tabla). Es una SUGERENCIA, no una afirmacion: el ref_table_guess es el stem del nombre (customer_id -> customer) y NO confirma containment. Pura: solo lee el dict, sin I/O; nunca lanza (devuelve [])."
-tags: [eda, datascience, relationships, foreign-key, fk, heuristic, schema, python]
-uses_functions: []
-uses_types: []
-returns: []
-returns_optional: false
-error_type: ""
-imports: []
-params:
-  - name: profile
-    desc: "TableProfile (dict que produce profile_table / summarize_table_*). Se leen de forma defensiva `columns` (lista de ColumnProfile con name/inferred_type/physical_type/distinct_count/unique_pct/flags), `n_rows` (int) y `key_candidates` (lista de nombres de columna ya candidatos a PK, que se excluyen). Si no es dict o no trae columns -> []."
-  - name: max_candidates
-    desc: "Tope de sugerencias devueltas (default 20). Las columnas candidatas se ordenan por distinct_count descendente (mas informativas primero) antes de cortar a este maximo."
-output: "list (posiblemente vacia) de dicts, uno por columna sugerida, con claves: `column` (nombre), `ref_table_guess` (tabla conjeturada por el stem del nombre, p.ej. customer_id -> 'customer'), `reason` (frase humana que deja claro que es heuristica sin confirmar containment), `distinct_count` (int|None), `unique_pct` (float|None, fraccion 0-1 tal como viene del profile), `inferred_type` (str), `physical_type` (str). Nunca lanza."
-tested: true
-tests: ["test_golden_customer_id_detectado_otras_no", "test_camelcase_albumid_detectado", "test_constante_status_id_no_aparece", "test_profile_vacio_y_none_devuelven_lista_vacia", "test_category_id_casi_unico_parece_pk_no_aparece", "test_ref_table_guess_multitoken_y_orden_por_distinct", "test_max_candidates_corta_la_lista", "test_id_generico_solo_nunca_es_fk"]
-test_file_path: "python/functions/datascience/suggest_intratable_fk_candidates_test.py"
-file_path: "python/functions/datascience/suggest_intratable_fk_candidates.py"
---
-
-## Ejemplo
-
-```python
-from datascience import suggest_intratable_fk_candidates
-
-# TableProfile de UNA tabla (tipo titanic): customer_id es FK N:1; id es la PK;
-# amount es una medida float; name es categorica sin sufijo de id.
-profile = {
-    "n_rows": 891,
-    "key_candidates": ["id"],
-    "columns": [
-        {"name": "id", "inferred_type": "numeric", "physical_type": "BIGINT",
-         "distinct_count": 891, "unique_pct": 1.0, "flags": ["possible_id"]},
-        {"name": "customer_id", "inferred_type": "numeric", "physical_type": "BIGINT",
-         "distinct_count": 137, "unique_pct": 0.15, "flags": []},
-        {"name": "amount", "inferred_type": "numeric", "physical_type": "DOUBLE",
-         "distinct_count": 400, "unique_pct": 0.45, "flags": []},
-        {"name": "name", "inferred_type": "categorical", "physical_type": "VARCHAR",
-         "distinct_count": 700, "unique_pct": 0.78, "flags": []},
-    ],
-}
-
-out = suggest_intratable_fk_candidates(profile)
-[c["column"] for c in out]              # -> ["customer_id"]
-out[0]["ref_table_guess"]               # -> "customer"
-out[0]["reason"]
-# -> "el nombre termina en '_id' y es N:1 (137 valores distintos < 891 filas):
-#     parece (heuristica por nombre, sin confirmar containment) una referencia a
-#     una tabla «customer»"
-```
-
-## Cuando usarla
-
-Cuando el EDA tiene SOLO UNA tabla y, por tanto, no se puede inferir una FK
-inter-tabla por containment (no hay otra tabla cuyos valores contener). Es el plan B
-del capitulo RELACIONES de AutomaticEDA: en vez de medir solapamiento de valores
-entre tablas (lo correcto cuando hay varias, ver `infer_fk_containment_duckdb` /
-`build_join_graph`), conjetura por el NOMBRE de la columna (`<algo>_id`) y por su
-CARDINALIDAD N:1 que columnas parecen apuntar a una entidad externa. Usala para
-enriquecer el reporte con "estas columnas parecen referencias a otras tablas" sin
-prometer que esa tabla exista. NO la uses si tienes varias tablas: ahi mide
-containment de verdad.
-
-## Gotchas
-
- Es **heuristica**, no una verdad: produce **falsos positivos** (una columna
-  `period_id` que en realidad es un codigo libre, no una FK) y **falsos negativos**
-  (una FK que no se llama `*_id`, p.ej. `parent`, `owner`, `sku`). No la trates como
-  una afirmacion de esquema.
- `ref_table_guess` es una **conjetura por el nombre** (el stem sin el sufijo id):
-  `customer_id` -> `customer`, `AlbumId` -> `album`, `manager_staff_id` ->
-  `manager_staff`. Puede no coincidir con el nombre real de la tabla (plurales,
-  prefijos, alias). Es una pista, no un join garantizado.
- **NO confirma containment**: no comprueba que los valores de la columna existan en
-  ninguna otra tabla (no puede — solo recibe el perfil de una tabla). Para confirmar
-  una FK real con varias tablas usa `infer_fk_containment_duckdb`.
- Excluye deliberadamente: el `id`/`Id`/`ID` generico a secas (suele ser la PK
-  propia, no una referencia), las columnas constantes, las que parecen unicas
-  (`unique_pct >= 0.99`, mas PK que FK) y los tipos no-clave (float/decimal son
-  medidas; date/time/timestamp y boolean no son claves). En camelCase, `paid`,
-  `valid`, `grid` (con `id` en minuscula y sin separador) NO se confunden con FK.
- `unique_pct` se interpreta como **fraccion 0-1** (tal como la emite el profile), no
-  como porcentaje 0-100.
@@ -1,202 +0,0 @@
-"""suggest_intratable_fk_candidates — heuristica de FK intra-tabla del grupo `eda`.
-
-Sobre el TableProfile de UNA tabla (el dict que produce ``profile_table``), sugiere
-por heuristica de NOMBRE + CARDINALIDAD que columnas PARECEN una clave foranea hacia
-otra tabla, util cuando no hay relaciones inter-tabla disponibles (una sola tabla y,
-por tanto, sin containment cruzado que medir). Es una SUGERENCIA, no una afirmacion:
-no confirma que exista la tabla referida ni que los valores esten contenidos en ella.
-
-La consume el capitulo RELACIONES de AutomaticEDA cuando solo hay una tabla.
-
-Funcion PURA: solo lee el dict (lectura defensiva con ``.get``), no hace I/O y nunca
-lanza por inputs raros (devuelve ``[]``).
-"""
-
-# inferred_type que es compatible con una clave foranea (entero/categorico).
-_FK_INFERRED_OK = {"numeric", "categorical", "integer"}
-
-# Prefijos de physical_type que admiten ser clave foranea (enteros, texto, uuid).
-_FK_PHYSICAL_PREFIXES = (
-    "int", "bigint", "smallint", "tinyint", "hugeint", "uint",
-    "varchar", "text", "char", "bpchar", "string", "uuid",
-)
-
-# Prefijos de physical_type que EXCLUYEN ser clave foranea: medidas en coma flotante
-# (float/double/decimal/numeric/real), temporales (date/time/timestamp/interval) y
-# boolean. Se comprueban ANTES que las senales positivas (la exclusion gana: una
-# columna numeric con physical DOUBLE es una medida, no una FK).
-_FK_PHYSICAL_EXCLUDE = (
-    "float", "double", "decimal", "numeric", "real",
-    "date", "time", "timestamp", "interval",
-    "bool",
-)
-
-
-def _fk_name_signal(name):
-    """Detecta el sufijo de clave foranea en el nombre y devuelve ``(stem, sufijo)``.
-
-    Reconoce ``<algo>_id`` (snake), ``<Algo>Id`` y ``<algo>ID`` (camel). NO reconoce
-    el ``id``/``Id``/``ID`` generico a secas (suele ser la PK propia de la tabla, no
-    una referencia). En camelCase la ``I`` mayuscula marca el limite de palabra, asi
-    que ``paid``/``valid``/``grid`` (``id`` en minuscula y sin separador) NO matchean.
-
-    El ``stem`` se devuelve en minusculas y sirve de ``ref_table_guess`` (la tabla a
-    la que probablemente apunta): ``customer_id`` -> ``"customer"``, ``AlbumId`` ->
-    ``"album"``, ``manager_staff_id`` -> ``"manager_staff"``. Devuelve ``None`` si no
-    hay senal de nombre.
-    """
-    if not isinstance(name, str):
-        return None
-    raw = name.strip()
-    if not raw:
-        return None
-    # Snake: termina en "_id" (indiferente a mayusculas en la parte "id").
-    if raw.lower().endswith("_id"):
-        stem = raw[:-3].rstrip("_-. ")
-        if not stem:
-            return None
-        return (stem.lower(), "_id")
-    # Camel todo-mayuscula: "...ID" (p.ej. customerID).
-    if raw.endswith("ID"):
-        stem = raw[:-2].rstrip("_-. ")
-        if not stem:
-            return None
-        return (stem.lower(), "ID")
-    # Camel: "...Id" (p.ej. AlbumId).
-    if raw.endswith("Id"):
-        stem = raw[:-2].rstrip("_-. ")
-        if not stem:
-            return None
-        return (stem.lower(), "Id")
-    return None
-
-
-def _fk_type_compatible(col):
-    """True si el tipo de la columna admite ser clave foranea.
-
-    Compatible si el ``physical_type`` NO es una medida flotante, una temporal ni
-    boolean, Y ademas (``inferred_type`` en {numeric, categorical, integer} O el
-    ``physical_type`` empieza por entero/varchar/text/char/uuid). La comparacion es
-    indistinta a mayusculas/minusculas.
-    """
-    phys = (col.get("physical_type") or "").strip().lower()
-    inferred = (col.get("inferred_type") or "").strip().lower()
-    # Exclusion por tipo fisico (gana sobre cualquier senal positiva).
-    for bad in _FK_PHYSICAL_EXCLUDE:
-        if phys.startswith(bad):
-            return False
-    # Senal positiva por tipo inferido.
-    if inferred in _FK_INFERRED_OK:
-        return True
-    # Senal positiva por tipo fisico (entero/texto/uuid).
-    for good in _FK_PHYSICAL_PREFIXES:
-        if phys.startswith(good):
-            return True
-    return False
-
-
-def suggest_intratable_fk_candidates(profile: dict, max_candidates: int = 20) -> list:
-    """Sugiere columnas que parecen una FK intra-tabla por nombre + cardinalidad.
-
-    Heuristica (no afirma nada): una columna es candidata a clave foranea si su nombre
-    tiene sufijo de id con stem no vacio (``<algo>_id`` / ``<Algo>Id`` / ``<algo>ID``,
-    NUNCA el ``id`` generico), no es ya candidata a PK, no es constante, tiene
-    cardinalidad alta pero por debajo del numero de filas (N:1, no unica) y un tipo
-    compatible con clave (entero/categorico/texto/uuid; nunca float/fecha/boolean).
-
-    Args:
-        profile: TableProfile (dict de ``profile_table``). Se leen, de forma
-            defensiva, ``columns`` (lista de ColumnProfile), ``n_rows`` y
-            ``key_candidates`` (nombres de columna ya candidatos a PK).
-        max_candidates: tope de sugerencias devueltas (default 20). Las columnas se
-            ordenan por ``distinct_count`` descendente (mas informativas primero)
-            antes de cortar.
-
-    Returns:
-        list de dicts (posiblemente vacia), uno por columna sugerida, con claves:
-        ``column``, ``ref_table_guess`` (stem del nombre), ``reason`` (frase humana),
-        ``distinct_count``, ``unique_pct`` (fraccion 0-1 tal como viene del profile),
-        ``inferred_type``, ``physical_type``. Nunca lanza: si ``profile`` no es dict o
-        no hay columnas, devuelve ``[]``.
-    """
-    if not isinstance(profile, dict):
-        return []
-    columns = profile.get("columns")
-    if not isinstance(columns, list):
-        return []
-
-    n_rows = profile.get("n_rows")
-    has_n_rows = (
-        isinstance(n_rows, int) and not isinstance(n_rows, bool) and n_rows > 0
-    )
-
-    key_candidates = profile.get("key_candidates")
-    if not isinstance(key_candidates, (list, tuple, set)):
-        key_candidates = []
-    key_set = set(key_candidates)
-
-    out = []
-    for col in columns:
-        if not isinstance(col, dict):
-            continue
-        name = col.get("name")
-
-        # 1) Senal de nombre: sufijo de id con stem no vacio.
-        signal = _fk_name_signal(name)
-        if signal is None:
-            continue
-        ref_guess, suffix = signal
-
-        # 2) No es ya candidata a PK (clave primaria de la propia tabla).
-        if name in key_set:
-            continue
-
-        # 3) No constante y con >= 2 valores distintos.
-        flags = col.get("flags") or []
-        if "constant" in flags:
-            continue
-        dc = col.get("distinct_count")
-        if not (isinstance(dc, int) and not isinstance(dc, bool) and dc >= 2):
-            continue
-
-        # 4) Cardinalidad alta pero < n_rows (no es PK) y no parece unica.
-        if has_n_rows and dc >= n_rows:
-            continue
-        unique_pct = col.get("unique_pct")
-        has_unique = (
-            isinstance(unique_pct, (int, float)) and not isinstance(unique_pct, bool)
-        )
-        if has_unique and unique_pct >= 0.99:
-            continue
-
-        # 5) Tipo compatible con clave foranea (entero/categorico/texto; no medida).
-        if not _fk_type_compatible(col):
-            continue
-
-        out.append(
-            {
-                "column": name,
-                "ref_table_guess": ref_guess,
-                "reason": _build_reason(suffix, dc, n_rows if has_n_rows else None, ref_guess),
-                "distinct_count": dc,
-                "unique_pct": float(unique_pct) if has_unique else None,
-                "inferred_type": col.get("inferred_type") or "",
-                "physical_type": col.get("physical_type") or "",
-            }
-        )
-
-    # Mas informativas primero (mayor cardinalidad), luego corte.
-    out.sort(key=lambda d: d.get("distinct_count") or 0, reverse=True)
-    return out[: max(0, int(max_candidates))]
-
-
-def _build_reason(suffix, dc, n_rows, ref_guess):
-    """Frase humana que deja claro que la sugerencia es heuristica, no confirmada."""
-    if n_rows is not None:
-        card = f"es N:1 ({dc} valores distintos < {n_rows} filas)"
-    else:
-        card = f"tiene {dc} valores distintos que se repiten (cardinalidad N:1)"
-    return (
-        f"el nombre termina en '{suffix}' y {card}: parece (heuristica por nombre, "
-        f"sin confirmar containment) una referencia a una tabla «{ref_guess}»"
-    )
@@ -1,157 +0,0 @@
-"""Tests para suggest_intratable_fk_candidates (funcion pura, sin I/O)."""
-
-from suggest_intratable_fk_candidates import suggest_intratable_fk_candidates
-
-
-def _col(name, inferred_type="numeric", physical_type="BIGINT", distinct_count=10,
-         unique_pct=0.1, flags=None):
-    """Construye un ColumnProfile minimo a mano (el dict que emite profile_table)."""
-    return {
-        "name": name,
-        "inferred_type": inferred_type,
-        "physical_type": physical_type,
-        "semantic_type": "",
-        "distinct_count": distinct_count,
-        "unique_pct": unique_pct,
-        "null_count": 0,
-        "null_pct": 0.0,
-        "flags": list(flags) if flags else [],
-    }
-
-
-def test_golden_customer_id_detectado_otras_no():
-    # Tabla tipo titanic: customer_id es FK N:1; id es la PK; amount es medida;
-    # name es categorica sin sufijo de id. Solo customer_id debe aparecer.
-    profile = {
-        "n_rows": 891,
-        "key_candidates": ["id"],
-        "columns": [
-            _col("id", inferred_type="numeric", physical_type="BIGINT",
-                 distinct_count=891, unique_pct=1.0, flags=["possible_id"]),
-            _col("customer_id", inferred_type="numeric", physical_type="BIGINT",
-                 distinct_count=137, unique_pct=0.15, flags=[]),
-            _col("amount", inferred_type="numeric", physical_type="DOUBLE",
-                 distinct_count=400, unique_pct=0.45),
-            _col("name", inferred_type="categorical", physical_type="VARCHAR",
-                 distinct_count=700, unique_pct=0.78),
-        ],
-    }
-    out = suggest_intratable_fk_candidates(profile)
-    assert isinstance(out, list)
-    assert [c["column"] for c in out] == ["customer_id"]
-    cand = out[0]
-    assert cand["ref_table_guess"] == "customer"
-    assert cand["distinct_count"] == 137
-    assert cand["unique_pct"] == 0.15
-    assert cand["inferred_type"] == "numeric"
-    assert cand["physical_type"] == "BIGINT"
-    # La razon deja claro que es heuristica + cita el sufijo y la tabla.
-    assert "customer" in cand["reason"]
-    assert "_id" in cand["reason"]
-
-
-def test_camelcase_albumid_detectado():
-    # AlbumId (camelCase, VARCHAR) -> detectada, ref_table_guess "album".
-    profile = {
-        "n_rows": 3503,
-        "key_candidates": ["TrackId"],
-        "columns": [
-            _col("AlbumId", inferred_type="categorical", physical_type="VARCHAR",
-                 distinct_count=347, unique_pct=0.10),
-        ],
-    }
-    out = suggest_intratable_fk_candidates(profile)
-    # TrackId es PK candidata (en key_candidates), AlbumId no -> AlbumId aparece.
-    assert [c["column"] for c in out] == ["AlbumId"]
-    assert out[0]["ref_table_guess"] == "album"
-
-
-def test_constante_status_id_no_aparece():
-    # status_id constante (flag "constant", distinct_count 1) NO es FK util.
-    profile = {
-        "n_rows": 1000,
-        "key_candidates": [],
-        "columns": [
-            _col("status_id", inferred_type="numeric", physical_type="INTEGER",
-                 distinct_count=1, unique_pct=0.001, flags=["constant"]),
-        ],
-    }
-    out = suggest_intratable_fk_candidates(profile)
-    assert out == []
-
-
-def test_profile_vacio_y_none_devuelven_lista_vacia():
-    # Lectura defensiva: ni {} ni None lanzan; devuelven [].
-    assert suggest_intratable_fk_candidates({}) == []
-    assert suggest_intratable_fk_candidates(None) == []
-    # profile sin columns o con columns no-lista tampoco lanza.
-    assert suggest_intratable_fk_candidates({"n_rows": 10}) == []
-    assert suggest_intratable_fk_candidates({"columns": "no-soy-lista"}) == []
-
-
-def test_category_id_casi_unico_parece_pk_no_aparece():
-    # unique_pct 0.999 -> parece PK (no N:1) -> NO se sugiere como FK.
-    profile = {
-        "n_rows": 891,
-        "key_candidates": [],
-        "columns": [
-            _col("category_id", inferred_type="numeric", physical_type="BIGINT",
-                 distinct_count=890, unique_pct=0.999),
-        ],
-    }
-    out = suggest_intratable_fk_candidates(profile)
-    assert out == []
-
-
-def test_ref_table_guess_multitoken_y_orden_por_distinct():
-    # manager_staff_id conserva los underscores del stem -> "manager_staff".
-    # Ademas, con varias candidatas, se ordenan por distinct_count descendente.
-    profile = {
-        "n_rows": 10000,
-        "key_candidates": ["staff_id"],  # staff_id es PK aqui, no debe aparecer
-        "columns": [
-            _col("staff_id", inferred_type="numeric", physical_type="BIGINT",
-                 distinct_count=10000, unique_pct=1.0, flags=["possible_id"]),
-            _col("store_id", inferred_type="numeric", physical_type="INTEGER",
-                 distinct_count=2, unique_pct=0.0002),
-            _col("manager_staff_id", inferred_type="numeric", physical_type="INTEGER",
-                 distinct_count=40, unique_pct=0.004),
-        ],
-    }
-    out = suggest_intratable_fk_candidates(profile)
-    cols = [c["column"] for c in out]
-    # staff_id excluida (PK); las otras dos ordenadas por distinct desc.
-    assert cols == ["manager_staff_id", "store_id"]
-    refs = {c["column"]: c["ref_table_guess"] for c in out}
-    assert refs["manager_staff_id"] == "manager_staff"
-    assert refs["store_id"] == "store"
-
-
-def test_max_candidates_corta_la_lista():
-    # max_candidates limita el numero de sugerencias devueltas.
-    profile = {
-        "n_rows": 10000,
-        "key_candidates": [],
-        "columns": [
-            _col("a_id", distinct_count=300, unique_pct=0.03),
-            _col("b_id", distinct_count=200, unique_pct=0.02),
-            _col("c_id", distinct_count=100, unique_pct=0.01),
-        ],
-    }
-    out = suggest_intratable_fk_candidates(profile, max_candidates=2)
-    assert [c["column"] for c in out] == ["a_id", "b_id"]
-
-
-def test_id_generico_solo_nunca_es_fk():
-    # 'id'/'Id'/'ID' a secas (sin stem) jamas se sugieren como FK.
-    profile = {
-        "n_rows": 500,
-        "key_candidates": [],
-        "columns": [
-            _col("id", distinct_count=500, unique_pct=1.0),
-            _col("Id", distinct_count=120, unique_pct=0.24),
-            _col("ID", distinct_count=80, unique_pct=0.16),
-        ],
-    }
-    out = suggest_intratable_fk_candidates(profile)
-    assert out == []
@@ -536,6 +536,21 @@ def profile_table(
                type_breakdown[it] += 1
        prof["type_breakdown"] = type_breakdown

+        # 8.1) Primeras filas crudas (df.head) para el capitulo OVERVIEW del motor
+        # AutomaticEDA: una muestra SELECT col1,col2,... LIMIT 10 alineada por fila.
+        # Se reusa _sample_rows (mismo lector read-only). Estilo dict-no-throw: si
+        # falla, head_rows queda None y el capitulo degrada a su nota honesta. El
+        # capitulo lo recoge via profile["head_rows"]; build_eda_render_ctx ademas
+        # lo replica en ctx["head_rows"] cuando se construye el contexto de render.
+        try:
+            head_names = [c.get("name") for c in cols if c.get("name")]
+            head_rows = _sample_rows(_q, table, head_names, 10)
+            prof["head_rows"] = [
+                dict(r) for r in head_rows if isinstance(r, dict)
+            ] or None
+        except Exception:  # noqa: BLE001
+            prof["head_rows"] = None
+
        # 8.5) Matriz de correlacion/asociacion sobre una muestra de filas
        # alineadas. Elige la metrica por par de tipos (Pearson/Spearman,
        # Cramer's V/Theil's U, correlation ratio, MI) via association_matrix.