feat(eda): capítulo RELACIONES para AutomaticEDA

Añade el capítulo `relaciones` al motor AutomaticEDA: analiza las relaciones de clave de la tabla/base y se coloca tras `correlacion`, antes de `modelos`, en CHAPTER_ORDER. Capas que renderiza (solo las que aplican; None si no hay nada que decir): - Claves declaradas: PK/FK/UNIQUE reales del esquema DuckDB, vía la nueva función `detect_declared_keys_duckdb` (lee `duckdb_constraints()`). - Candidatos a clave primaria: los `key_candidates` del TableProfile. - FK candidatas inter-tabla: reusa `infer_fk_containment_duckdb` (containment + señal de nombre) y `build_join_graph` (roles de nodos + diagrama Mermaid pegable). Solo si la fuente DuckDB tiene varias tablas. - FK candidatas intra-tabla: heurística nombre + cardinalidad, vía la nueva función pura `suggest_intratable_fk_candidates`, marcada como sugerencia. Engancha al glosario clicable los términos PK, FK, containment/inclusión y cardinalidad (contrato §11.1) y usa Group (keep-together) para el grafo. Funciones nuevas del registry (grupo `eda`): - detect_declared_keys_duckdb (impure, datascience) + test. - suggest_intratable_fk_candidates (pure, datascience) + test. Tests: relaciones_test.py (golden intra + inter, edges, no-cut render) + los tests de ambas funciones. Suite automatic_eda + render_automatic_eda verde (89 passed). Golden end-to-end con el pipeline render_automatic_eda verificado sobre titanic (intra) y una BD customers/orders (inter). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-30 18:15:15 +02:00
12 changed files with 1635 additions and 65 deletions
@@ -34,6 +34,7 @@ from .theils_u import theils_u
 from .correlation_ratio import correlation_ratio
 from .mutual_info_columns import mutual_info_columns
 from .infer_fk_containment_duckdb import infer_fk_containment_duckdb
+from .detect_declared_keys_duckdb import detect_declared_keys_duckdb
 from .build_join_graph import build_join_graph
 from .association_matrix import association_matrix
 from .correlation_matrix_duckdb import correlation_matrix_duckdb
@@ -69,8 +70,10 @@ from .build_eda_render_ctx import build_eda_render_ctx
 from .profile_datetime import profile_datetime
 from .resample_timeseries import resample_timeseries
 from .add_pdf_internal_links import add_pdf_internal_links
+from .suggest_intratable_fk_candidates import suggest_intratable_fk_candidates

 __all__ = [
+    "suggest_intratable_fk_candidates",
    "detect_time_column",
    "extract_timeseries_raw",
    "build_eda_render_ctx",
@@ -97,6 +100,7 @@ __all__ = [
    "correlation_ratio",
    "mutual_info_columns",
    "infer_fk_containment_duckdb",
+    "detect_declared_keys_duckdb",
    "build_join_graph",
    "association_matrix",
    "correlation_matrix_duckdb",
@@ -1,10 +1,9 @@
 """Numeric distributions chapter (NUM DISTR) for AutomaticEDA.

 For every numeric column the chapter draws, as a single indivisible figure, a
-histogram with the **mean, median and ±1σ band drawn as reference lines** (the
-legend reports the numeric value of the mean, the median **and the standard
-deviation σ**) and a **Tukey boxplot right below it** sharing the same X axis —
-exactly the user requirement for this chapter. Each figure is emitted as a lazy ``Figure`` block
+histogram with the **mean, median and ±1σ band drawn as reference lines** and a
+**Tukey boxplot right below it** sharing the same X axis — exactly the user
+requirement for this chapter. Each figure is emitted as a lazy ``Figure`` block
 so the renderers rasterize and scale it to fit a whole page/slide and nothing is
 ever cut; columns with many numerics simply flow across pages as small
 multiples.
@@ -35,7 +34,7 @@ try:
 except Exception:  # noqa: BLE001 — keep the chapter importable no matter what.
    build_boxplot_stats = None  # type: ignore[assignment]

-CHAPTER_VERSION = "1.2.0"
+CHAPTER_VERSION = "1.1.0"
 CHAPTER_ID = "num_distr"
 CHAPTER_TITLE = "Distribuciones numéricas"

@@ -141,11 +140,9 @@ def _make_hist_box(name: str, numeric: dict, box: dict):
    std = numeric.get("std")

    # ±1σ band first (behind the lines), then median (solid) and mean (dashed).
-    # The band's legend entry also reports the numeric value of the standard
-    # deviation, so the reader sees mean, median AND σ at a glance.
    if mean is not None and std is not None and std > 0:
        ax_h.axvspan(mean - std, mean + std, color="#f0c27b", alpha=0.22,
-                     zorder=1, label=f"±1σ (σ = {_fmt_num(std)})")
+                     zorder=1, label="±1σ")
    if median is not None:
        ax_h.axvline(median, color="#2e8b57", linestyle="-", linewidth=1.6,
                     zorder=4, label=f"mediana = {_fmt_num(median)}")
@@ -155,19 +152,7 @@ def _make_hist_box(name: str, numeric: dict, box: dict):

    ax_h.set_ylabel("frecuencia", fontsize=8)
    ax_h.tick_params(labelsize=7)
-    # Always surface σ in the legend: if the ±1σ band could not be drawn (no mean
-    # or std<=0) but σ is still known, add a label-only proxy handle so the value
-    # of the standard deviation is reported regardless of the band.
-    handles, labels = ax_h.get_legend_handles_labels()
-    if std is not None and not any("σ =" in lbl for lbl in labels):
-        from matplotlib.lines import Line2D
-        proxy = Line2D([], [], linestyle="none", marker="",
-                       label=f"σ = {_fmt_num(std)}")
-        handles.append(proxy)
-        labels.append(f"σ = {_fmt_num(std)}")
-    if handles:
-        ax_h.legend(handles, labels, fontsize=6.5, loc="upper right",
-                    framealpha=0.85)
+    ax_h.legend(fontsize=6.5, loc="upper right", framealpha=0.85)
    for spine in ("top", "right"):
        ax_h.spines[spine].set_visible(False)

@@ -159,50 +159,6 @@ def test_anti_corte_muchas_columnas_pdf_y_pptx():
        assert res_pptx["n_slides"] >= 8  # at least one slide per column figure.


-def _hist_legend_texts(numeric, box=None):
-    """Build the per-column figure and return its histogram-legend label texts."""
-    from datascience.automatic_eda.chapters.num_distr import _make_hist_box
-    import matplotlib.pyplot as plt
-    fig = _make_hist_box("col", numeric, box or {})
-    ax_h = fig.axes[0]  # the histogram is the top axis.
-    leg = ax_h.get_legend()
-    texts = [t.get_text() for t in leg.get_texts()] if leg else []
-    plt.close(fig)
-    return texts
-
-
-def test_golden_leyenda_histograma_reporta_valor_std():
-    # The histogram legend must report the numeric value of the standard
-    # deviation σ next to mean and median.
-    numeric = _numeric_block(42.5, 40.0, 12.3, 1.0, 100.0, "right-skewed", 5)
-    texts = _hist_legend_texts(numeric)
-    joined = " ".join(texts)
-    assert any("σ =" in t for t in texts), f"σ value missing in legend: {texts}"
-    assert "12.3" in joined, f"std value 12.3 not in legend: {texts}"
-    assert any("media =" in t for t in texts)
-    assert any("mediana =" in t for t in texts)
-
-
-def test_edge_std_en_leyenda_aunque_no_haya_banda():
-    # When the ±1σ band cannot be drawn (no mean) but σ is known, the legend
-    # still surfaces the σ value via a label-only proxy handle.
-    numeric = _numeric_block(42.5, 40.0, 7.5, 1.0, 100.0, "right-skewed", 0)
-    numeric["mean"] = None  # forces the band off; σ must still appear.
-    texts = _hist_legend_texts(numeric)
-    assert any("σ = 7.5" in t for t in texts), f"σ proxy missing: {texts}"
-
-
-def test_edge_sin_std_no_revienta_la_figura():
-    # A numeric block without σ must not raise and simply omits the σ entry.
-    import matplotlib.pyplot as plt
-    numeric = _numeric_block(42.5, 40.0, 0.0, 1.0, 100.0, "discrete", 0)
-    numeric["std"] = None
-    texts = _hist_legend_texts(numeric)
-    assert not any("σ =" in t for t in texts)
-    # mean/median lines still produce their own legend entries.
-    assert any("media =" in t for t in texts)
-
-
 def test_distribution_gloss_cubre_todas_las_etiquetas():
    # Every label detect_distribution_type can emit has a Spanish gloss.
    for label in ("normal-ish", "right-skewed", "left-skewed", "heavy-tail",
@@ -0,0 +1,500 @@
+"""Key-relations chapter (RELACIONES) — the keys / join structure of the data.
+
+This chapter is the *relational* section of an AutomaticEDA report. It answers a
+single question for the table (or the whole DuckDB source it lives in): **how do
+the keys relate?** It composes, without reimplementing them, the registry's
+relation primitives and degrades honestly when a layer does not apply.
+
+It renders, in order, only the layers that have something to say:
+
+1. **Declared keys** (real schema constraints) — when the DuckDB source declares
+   PRIMARY KEY / FOREIGN KEY / UNIQUE constraints, they are read verbatim via
+   ``detect_declared_keys_duckdb`` and shown as ground truth: which column is the
+   PK, which columns are FKs and the table/column they point to.
+2. **Primary-key candidates** — the ``key_candidates`` the TableProfile already
+   carries (columns whose cardinality equals the row count, with no nulls). These
+   are *candidates*: a column that could serve as the row identifier.
+3. **Foreign-key candidates** when none are declared:
+   - **Inter-table** (the DuckDB source has several tables): real FK candidates by
+     name signal + value containment via ``infer_fk_containment_duckdb``, plus the
+     join graph (roles + a pasteable Mermaid diagram) via ``build_join_graph``.
+   - **Intra-table** (a single table): columns that *look* like a foreign key by a
+     name+cardinality heuristic (``suggest_intratable_fk_candidates``). This is a
+     **suggestion**, explicitly flagged as a heuristic, never an assertion.
+
+``build_relaciones(profile, ctx) -> Chapter | None``: returns ``None`` when there
+is nothing to say (no declared key, no key candidates, and no FK candidate —
+inter- or intra-table). Reads everything defensively (``.get``) and never raises:
+anything missing degrades to a note or is omitted; a failing registry call drops
+its layer instead of aborting the chapter.
+
+ctx keys this chapter consumes (all optional):
+    db_path, table : str — the DuckDB file and table being profiled (set by
+        ``build_eda_render_ctx``). ``db_path`` is needed to read declared
+        constraints, to list the sibling tables, and to run the containment-based
+        FK inference. Without it, only the profile-derived layers (PK candidates,
+        intra-table FK heuristic) are available.
+    glossary : model.GlossaryCollector — shared glossary; the chapter registers
+        the relational terms (PK, FK, containment, cardinality) and marks their
+        first appearance clickable.
+
+Contract: build_<id>(profile, ctx) -> Chapter | None ; CHAPTER_VERSION = "x.y.z".
+"""
+
+from __future__ import annotations
+
+from .. import model
+
+# Pure/impure registry functions (group ``eda``) this chapter composes. Imported
+# defensively (module-leaf imports, like the AGREGACION chapter) so the chapter
+# still builds — degrading the affected layer to nothing — if a function is
+# somehow unavailable / not indexed yet.
+try:
+    from datascience.detect_declared_keys_duckdb import detect_declared_keys_duckdb
+except Exception:  # noqa: BLE001 — keep the chapter importable no matter what.
+    detect_declared_keys_duckdb = None  # type: ignore[assignment]
+try:
+    from datascience.infer_fk_containment_duckdb import infer_fk_containment_duckdb
+except Exception:  # noqa: BLE001
+    infer_fk_containment_duckdb = None  # type: ignore[assignment]
+try:
+    from datascience.build_join_graph import build_join_graph
+except Exception:  # noqa: BLE001
+    build_join_graph = None  # type: ignore[assignment]
+try:
+    from datascience.suggest_intratable_fk_candidates import (
+        suggest_intratable_fk_candidates,
+    )
+except Exception:  # noqa: BLE001
+    suggest_intratable_fk_candidates = None  # type: ignore[assignment]
+try:
+    from infra import duckdb_list_tables
+except Exception:  # noqa: BLE001
+    duckdb_list_tables = None  # type: ignore[assignment]
+
+CHAPTER_VERSION = "1.0.0"
+CHAPTER_ID = "relaciones"
+CHAPTER_TITLE = "Relaciones de clave"
+
+# Cap the inter-table FK table so a wide schema does not blow up the page; the
+# rest is summarized in a closing note (no silent truncation).
+MAX_FK_ROWS = 40
+
+# --------------------------------------------------------------------------- #
+# Glossary terms this chapter explains. Registered in the shared collector and
+# marked clickable on their first appearance (contract §11.1).
+# --------------------------------------------------------------------------- #
+_TERMS = {
+    "pk": (
+        "Clave primaria (PK)",
+        "Columna (o conjunto de columnas) que identifica de forma única cada fila "
+        "de una tabla: sus valores no se repiten y no son nulos. Una tabla tiene "
+        "como mucho una clave primaria; es el ancla por la que otras tablas la "
+        "referencian.",
+    ),
+    "fk": (
+        "Clave foránea (FK)",
+        "Columna de una tabla cuyos valores apuntan a la clave primaria de otra "
+        "tabla (o de la misma), creando una relación entre ambas. Una FK suele ser "
+        "N:1: muchas filas de la tabla origen comparten el mismo valor de la tabla "
+        "destino.",
+    ),
+    "containment": (
+        "Containment / inclusión",
+        "Señal con la que se infiere una clave foránea sin que la base la declare: "
+        "la fracción de valores distintos de una columna A que también aparecen "
+        "como valores de otra columna B. Si casi todos los valores de A están "
+        "contenidos en B (inclusión ≈ 1) y B parece una clave, A → B es una FK "
+        "candidata.",
+    ),
+    "cardinalidad": (
+        "Cardinalidad",
+        "Número de valores distintos de una columna. Cardinalidad igual al número "
+        "de filas (y sin nulos) señala un identificador (candidato a clave "
+        "primaria); cardinalidad alta pero menor que el número de filas, con "
+        "valores repetidos, es típica de una clave foránea.",
+    ),
+}
+
+
+def _register_terms(ctx: dict) -> bool:
+    """Register the relational terms in the shared glossary. Returns whether the
+    in-text appearances should be marked clickable."""
+    glossary = ctx.get("glossary")
+    if not isinstance(glossary, model.GlossaryCollector):
+        return False
+    for key, (label, definition) in _TERMS.items():
+        glossary.add(key, label, definition)
+    return True
+
+
+# --------------------------------------------------------------------------- #
+# Formatting helpers (mirror the other chapters' defensive style).
+# --------------------------------------------------------------------------- #
+def _fmt_int(value) -> str:
+    if value is None:
+        return "—"
+    try:
+        return f"{int(value):,}".replace(",", ".")
+    except (TypeError, ValueError):
+        return model._safe_str(value)
+
+
+def _fmt_pct_fraction(value, decimals: int = 1) -> str:
+    """Format a 0–1 fraction as a percentage. None -> placeholder."""
+    if value is None:
+        return "—"
+    try:
+        v = float(value)
+    except (TypeError, ValueError):
+        return model._safe_str(value)
+    if v <= 1.0:
+        v *= 100.0
+    return f"{v:.{decimals}f}%"
+
+
+def _fmt_ratio(value, decimals: int = 3) -> str:
+    """Format an already-0–1 ratio (inclusion) as a plain number."""
+    if value is None:
+        return "—"
+    try:
+        return f"{float(value):.{decimals}f}".rstrip("0").rstrip(".")
+    except (TypeError, ValueError):
+        return model._safe_str(value)
+
+
+def _is_dict(v) -> bool:
+    return isinstance(v, dict)
+
+
+def _columns_by_name(profile: dict) -> dict:
+    """Index the profile columns by name for quick metric lookup."""
+    out = {}
+    for col in (profile.get("columns") or []):
+        if _is_dict(col) and col.get("name") is not None:
+            out[col.get("name")] = col
+    return out
+
+
+# --------------------------------------------------------------------------- #
+# Layer 1 — declared keys (real schema constraints).
+# --------------------------------------------------------------------------- #
+def _declared_keys(db_path: str, table: str):
+    """Read declared PK/FK/UNIQUE for the source, or None if unavailable."""
+    if not db_path or detect_declared_keys_duckdb is None:
+        return None
+    try:
+        out = detect_declared_keys_duckdb(db_path, table)
+    except Exception:  # noqa: BLE001 — dict-no-throw: treat as unavailable.
+        return None
+    if not _is_dict(out) or out.get("status") != "ok":
+        return None
+    return out
+
+
+def _declared_section(declared: dict) -> list:
+    """Blocks for the declared-keys layer, or [] if there is nothing declared."""
+    pks = [p for p in (declared.get("primary_keys") or []) if _is_dict(p)]
+    fks = [f for f in (declared.get("foreign_keys") or []) if _is_dict(f)]
+    uqs = [u for u in (declared.get("unique") or []) if _is_dict(u)]
+    if not (pks or fks or uqs):
+        return []
+
+    blocks = [
+        model.Heading(text="Claves declaradas en el esquema", level=2),
+        model.Markdown(text=(
+            "La base **declara** estas relaciones de clave como restricciones "
+            "reales del esquema (constraints). Son la verdad de referencia: no se "
+            "infieren, se leen tal cual de la definición de las tablas.")),
+    ]
+
+    if pks:
+        rows = [[model._safe_str(p.get("table")),
+                 ", ".join(model._safe_str(c) for c in (p.get("columns") or []))]
+                for p in pks]
+        blocks.append(model.DataTable(
+            header=["Tabla", "Columna(s) PK"], rows=rows,
+            title="Claves primarias declaradas",
+            note="Cada fila: la clave primaria declarada de una tabla."))
+
+    if fks:
+        rows = []
+        for f in fks:
+            src = ", ".join(model._safe_str(c) for c in (f.get("columns") or []))
+            dst = ", ".join(
+                model._safe_str(c) for c in (f.get("referenced_columns") or []))
+            rows.append([
+                model._safe_str(f.get("table")), src,
+                model._safe_str(f.get("referenced_table")), dst])
+        blocks.append(model.DataTable(
+            header=["Tabla origen", "Columna(s) FK", "→ Tabla destino",
+                    "Columna(s) destino"],
+            rows=rows, title="Claves foráneas declaradas",
+            note="Cada fila: una FK declarada — origen → destino."))
+
+    if uqs:
+        rows = [[model._safe_str(u.get("table")),
+                 ", ".join(model._safe_str(c) for c in (u.get("columns") or []))]
+                for u in uqs]
+        blocks.append(model.DataTable(
+            header=["Tabla", "Columna(s) UNIQUE"], rows=rows,
+            title="Restricciones UNIQUE declaradas"))
+
+    return blocks
+
+
+# --------------------------------------------------------------------------- #
+# Layer 2 — primary-key candidates (from the profile).
+# --------------------------------------------------------------------------- #
+def _pk_candidates_section(profile: dict, mark: bool) -> list:
+    """Blocks for the PK-candidates layer, or [] if there are none."""
+    keys = [k for k in (profile.get("key_candidates") or []) if k is not None]
+    if not keys:
+        return []
+    by_name = _columns_by_name(profile)
+
+    pk = ("[[term:pk]]**clave primaria**[[/term]]" if mark
+          else "**clave primaria**")
+    intro = (
+        f"Estas columnas son **candidatas a {pk}**: su "
+        "[[term:cardinalidad]]cardinalidad[[/term]] iguala al número de filas y no "
+        "tienen nulos, así que cada valor identifica una fila distinta. Son "
+        "candidatas, no una clave declarada: la base no las marca como tal."
+        if mark else
+        "Estas columnas son **candidatas a clave primaria**: su cardinalidad "
+        "iguala al número de filas y no tienen nulos, así que cada valor "
+        "identifica una fila distinta.")
+
+    rows = []
+    for name in keys:
+        col = by_name.get(name) or {}
+        rows.append([
+            model._safe_str(name),
+            _fmt_int(col.get("distinct_count")),
+            _fmt_pct_fraction(col.get("unique_pct")),
+            model._safe_str(col.get("inferred_type") or col.get("physical_type") or "—"),
+        ])
+    return [
+        model.Heading(text="Candidatos a clave primaria", level=2),
+        model.Markdown(text=intro),
+        model.DataTable(
+            header=["Columna", "Valores distintos", "% único", "Tipo"],
+            rows=rows, title="Candidatas a clave primaria",
+            note=f"{_fmt_int(profile.get('n_rows'))} filas en total como referencia."),
+    ]
+
+
+# --------------------------------------------------------------------------- #
+# Layer 3a — inter-table FK candidates (containment) + join graph.
+# --------------------------------------------------------------------------- #
+def _list_source_tables(db_path: str) -> list:
+    """List the tables in the DuckDB source, or [] if it can't be listed."""
+    if not db_path or duckdb_list_tables is None:
+        return []
+    try:
+        out = duckdb_list_tables(db_path)
+    except Exception:  # noqa: BLE001
+        return []
+    if not _is_dict(out) or out.get("status") != "ok":
+        return []
+    return [t for t in (out.get("tables") or []) if isinstance(t, str)]
+
+
+def _inter_table_section(db_path: str, tables: list, mark: bool) -> list:
+    """Blocks for the inter-table FK layer (containment + join graph), or []."""
+    if infer_fk_containment_duckdb is None or len(tables) < 2:
+        return []
+    try:
+        fk = infer_fk_containment_duckdb(db_path, tables=tables)
+    except Exception:  # noqa: BLE001
+        return []
+    if not _is_dict(fk) or fk.get("status") != "ok":
+        return []
+    candidates = [c for c in (fk.get("fk_candidates") or []) if _is_dict(c)]
+    if not candidates:
+        return []
+
+    containment = ("[[term:containment]]containment (inclusión de valores)[[/term]]"
+                   if mark else "containment (inclusión de valores)")
+    fk_term = "[[term:fk]]**claves foráneas**[[/term]]" if mark else "**claves foráneas**"
+    blocks = [
+        model.Heading(text="Claves foráneas candidatas (inter-tabla)", level=2),
+        model.Markdown(text=(
+            f"La fuente tiene varias tablas. Estas {fk_term} candidatas se infieren "
+            f"por señal de nombre y por {containment}: una columna de una tabla cuyos "
+            "valores están contenidos en la clave de otra. No están declaradas por "
+            "la base; son la relación más probable según los datos.")),
+    ]
+
+    shown = candidates[:MAX_FK_ROWS]
+    rows = []
+    for c in shown:
+        rows.append([
+            f"{model._safe_str(c.get('from_table'))}.{model._safe_str(c.get('from_col'))}",
+            f"{model._safe_str(c.get('to_table'))}.{model._safe_str(c.get('to_col'))}",
+            _fmt_ratio(c.get("inclusion")),
+            model._safe_str(c.get("cardinality") or "—"),
+            "sí" if c.get("name_match") else "no",
+        ])
+    note = "Ordenadas por señal de nombre e inclusión."
+    if len(candidates) > len(shown):
+        note += f" Se muestran {len(shown)} de {len(candidates)} candidatas."
+    blocks.append(model.DataTable(
+        header=["Origen", "→ Destino", "Inclusión", "Cardinalidad", "Coincide nombre"],
+        rows=rows, title="FK candidatas por containment", note=note))
+
+    # Join graph: node roles + a pasteable Mermaid diagram, kept together.
+    if build_join_graph is not None:
+        try:
+            graph = build_join_graph(candidates, tables=tables)
+        except Exception:  # noqa: BLE001
+            graph = None
+        if _is_dict(graph):
+            graph_blocks = [model.Heading(text="Grafo de relaciones", level=3)]
+            nodes = [n for n in (graph.get("nodes") or []) if _is_dict(n)]
+            if nodes:
+                node_rows = [[
+                    model._safe_str(n.get("table")),
+                    model._safe_str(n.get("role") or "—"),
+                    _fmt_int(n.get("out_degree")),
+                    _fmt_int(n.get("in_degree")),
+                ] for n in nodes]
+                graph_blocks.append(model.DataTable(
+                    header=["Tabla", "Rol", "FK salientes", "FK entrantes"],
+                    rows=node_rows, title="Tablas y su rol en el grafo",
+                    note="Rol: fact (apunta a otras), dimension (referenciada), "
+                         "bridge (ambas), standalone (aislada)."))
+            hubs = [h for h in (graph.get("hubs") or []) if h]
+            if hubs:
+                graph_blocks.append(model.Markdown(text=(
+                    "Tablas con más relaciones salientes (candidatas a tabla de "
+                    "hechos): " + ", ".join(model._safe_str(h) for h in hubs) + ".")))
+            mermaid = model._safe_str(graph.get("mermaid")).strip()
+            if mermaid:
+                graph_blocks.append(model.Markdown(text=(
+                    "Diagrama de las relaciones (pegable en un bloque Mermaid):")))
+                graph_blocks.append(model.Markdown(
+                    text="```mermaid\n" + mermaid + "\n```"))
+            if len(graph_blocks) > 1:
+                blocks.append(model.Group(blocks=graph_blocks,
+                                          title="Grafo de relaciones"))
+
+    skipped = [s for s in (fk.get("skipped") or []) if s]
+    if skipped:
+        blocks.append(model.Note(
+            "Algunos pares se omitieron por tamaño: "
+            + "; ".join(model._safe_str(s) for s in skipped) + "."))
+    return blocks
+
+
+# --------------------------------------------------------------------------- #
+# Layer 3b — intra-table FK candidates (name+cardinality heuristic).
+# --------------------------------------------------------------------------- #
+def _intra_table_section(profile: dict, mark: bool) -> list:
+    """Blocks for the intra-table FK heuristic layer, or [] if no candidates."""
+    if suggest_intratable_fk_candidates is None:
+        return []
+    try:
+        cands = suggest_intratable_fk_candidates(profile)
+    except Exception:  # noqa: BLE001
+        return []
+    cands = [c for c in (cands or []) if _is_dict(c)]
+    if not cands:
+        return []
+
+    fk_term = "[[term:fk]]**claves foráneas**[[/term]]" if mark else "**claves foráneas**"
+    blocks = [
+        model.Heading(text="Posibles claves foráneas (heurística de nombre)", level=2),
+        model.Markdown(text=(
+            f"No hay otras tablas que referenciar, pero algunas columnas **parecen** "
+            f"{fk_term} por su nombre (terminan en «id») y su cardinalidad (muchos "
+            "valores repetidos, N:1). Es una **sugerencia heurística**, no una "
+            "afirmación: el nombre de la tabla destino es una conjetura y no se "
+            "comprueba inclusión de valores contra ninguna tabla real.")),
+    ]
+    rows = []
+    for c in cands:
+        rows.append([
+            model._safe_str(c.get("column")),
+            model._safe_str(c.get("ref_table_guess") or "—"),
+            _fmt_int(c.get("distinct_count")),
+            _fmt_pct_fraction(c.get("unique_pct")),
+            model._safe_str(c.get("inferred_type") or c.get("physical_type") or "—"),
+            model._safe_str(c.get("reason") or ""),
+        ])
+    blocks.append(model.DataTable(
+        header=["Columna", "Posible tabla", "Valores distintos", "% único",
+                "Tipo", "Motivo"],
+        rows=rows, title="Posibles FK por nombre y cardinalidad",
+        note="Heurística: posibles falsos positivos/negativos. No confirma containment."))
+    blocks.append(model.Note(
+        "Estas sugerencias se basan solo en el nombre y la cardinalidad. Para "
+        "confirmarlas haría falta la tabla destino y comprobar la inclusión de "
+        "valores (containment)."))
+    return blocks
+
+
+# --------------------------------------------------------------------------- #
+# Entry point.
+# --------------------------------------------------------------------------- #
+def _intro_blocks(mark: bool) -> list:
+    pk = "[[term:pk]]clave primaria[[/term]]" if mark else "clave primaria"
+    fk = "[[term:fk]]clave foránea[[/term]]" if mark else "clave foránea"
+    text = (
+        f"Este capítulo analiza las **relaciones de clave** de la tabla: qué columna "
+        f"identifica cada fila (la {pk}) y qué columnas referencian a otra tabla (las "
+        f"{fk}). Cuando la base las **declara** como restricciones del esquema, se "
+        "muestran tal cual; cuando no, se proponen las más probables a partir de los "
+        "datos —por inclusión de valores entre tablas (containment) o, en una sola "
+        "tabla, por una heurística de nombre y cardinalidad— siempre marcadas como "
+        "candidatas, nunca como hechos.")
+    return [model.Heading(text=CHAPTER_TITLE, level=1), model.Markdown(text=text)]
+
+
+def build_relaciones(profile: dict, ctx: dict):
+    """Build the RELACIONES Chapter, or None if there is nothing to say.
+
+    Args:
+        profile: the ``eda`` group TableProfile dict (may be None/empty).
+        ctx: presentation context. Consumes ``db_path`` + ``table`` (to read
+            declared constraints, list sibling tables and run the containment FK
+            inference) and ``glossary`` (to register the relational terms).
+
+    Returns:
+        A ``model.Chapter`` with the applicable relation layers; or ``None`` when
+        the dataset has no declared key, no key candidates and no FK candidate
+        (neither inter- nor intra-table).
+    """
+    if not isinstance(profile, dict):
+        profile = {}
+    ctx = ctx if isinstance(ctx, dict) else {}
+    db_path = ctx.get("db_path")
+    table = ctx.get("table")
+
+    mark = _register_terms(ctx)
+
+    # Build each layer; the chapter is the concatenation of the non-empty ones.
+    declared = _declared_keys(db_path, table)
+    declared_blocks = _declared_section(declared) if declared else []
+    declared_has_fk = bool(declared and declared.get("foreign_keys"))
+
+    pk_blocks = _pk_candidates_section(profile, mark)
+
+    tables = _list_source_tables(db_path)
+    inter_blocks = _inter_table_section(db_path, tables, mark)
+
+    # The intra-table heuristic only makes sense when no real FK is available for
+    # this table — neither declared nor inferred inter-table. Otherwise the real
+    # relations already answer the question and the heuristic is just noise.
+    if declared_has_fk or inter_blocks:
+        intra_blocks = []
+    else:
+        intra_blocks = _intra_table_section(profile, mark)
+
+    body = declared_blocks + pk_blocks + inter_blocks + intra_blocks
+    if not body:
+        return None  # chapter does not apply: nothing to say about relations.
+
+    blocks = _intro_blocks(mark) + body
+    return model.Chapter(id=CHAPTER_ID, title=CHAPTER_TITLE,
+                         version=CHAPTER_VERSION, blocks=blocks)
@@ -0,0 +1,273 @@
+"""Tests for the RELACIONES chapter — DoD: golden(s) + edges + no-cut render.
+
+Two goldens covering the two real paths of the chapter:
+
+- **Intra-table** (a single table, no db source for relations): the chapter shows
+  the primary-key candidates from the profile and the heuristic foreign-key
+  suggestions (name + cardinality), explicitly flagged as a heuristic. Renders to
+  PDF and PPTX with nothing cut.
+- **Inter-table** (a real DuckDB file with two related tables, customers/orders,
+  with a declared FK): the chapter shows the declared keys, the containment-based
+  FK candidates and the join graph (roles + a pasteable Mermaid diagram).
+
+Edges: a profile with no key candidate and no FK-looking column returns None;
+``None`` / ``{}`` profiles do not raise. The chapter registers its glossary terms.
+
+Layers that depend on the sibling registry functions delegated alongside this
+chapter (``detect_declared_keys_duckdb``, ``suggest_intratable_fk_candidates``)
+are asserted **conditionally on the function being importable**, so the chapter's
+honest-degradation contract is what is tested, never a hard dependency on import
+timing.
+"""
+
+import os
+import tempfile
+
+import duckdb
+from pptx import Presentation
+from pypdf import PdfReader
+
+from datascience.automatic_eda.chapters.relaciones import build_relaciones
+from datascience.automatic_eda.model import Chapter, Group, GlossaryCollector
+from datascience.render_automatic_eda_pdf import render_automatic_eda_pdf
+from datascience.render_automatic_eda_pptx import render_automatic_eda_pptx
+
+# The optional sibling functions: their layers are asserted only when present.
+try:
+    from datascience.detect_declared_keys_duckdb import detect_declared_keys_duckdb
+except Exception:  # noqa: BLE001
+    detect_declared_keys_duckdb = None
+try:
+    from datascience.suggest_intratable_fk_candidates import (
+        suggest_intratable_fk_candidates,
+    )
+except Exception:  # noqa: BLE001
+    suggest_intratable_fk_candidates = None
+
+
+# --------------------------------------------------------------------------- #
+# Helpers.
+# --------------------------------------------------------------------------- #
+def _flatten(blocks) -> list:
+    """Flatten Group blocks so a test can inspect every leaf block."""
+    out = []
+    for b in blocks:
+        if isinstance(b, Group):
+            out.extend(_flatten(b.blocks))
+        else:
+            out.append(b)
+    return out
+
+
+def _text_of(chapter: Chapter) -> str:
+    """Collect all visible text of a chapter's blocks into one string."""
+    parts = []
+    for b in _flatten(chapter.blocks):
+        for attr in ("text", "title", "note"):
+            v = getattr(b, attr, None)
+            if isinstance(v, str):
+                parts.append(v)
+        header = getattr(b, "header", None)
+        if isinstance(header, list):
+            parts.extend(str(c) for c in header)
+        rows = getattr(b, "rows", None)
+        if isinstance(rows, list):
+            for r in rows:
+                if isinstance(r, (list, tuple)):
+                    parts.extend(str(c) for c in r)
+                else:
+                    parts.append(str(r))
+    return "\n".join(parts)
+
+
+def _render_both(chapter: Chapter, tag: str):
+    """Render the chapter to PDF and PPTX; return (pdf_text, n_slides)."""
+    tmp = tempfile.mkdtemp(prefix=f"relaciones_{tag}_")
+    pdf_path = os.path.join(tmp, "out.pdf")
+    pptx_path = os.path.join(tmp, "out.pptx")
+    meta = {"title": f"EDA — {tag}"}
+    render_automatic_eda_pdf([chapter], pdf_path, meta)
+    render_automatic_eda_pptx([chapter], pptx_path, meta)
+    assert os.path.exists(pdf_path) and os.path.getsize(pdf_path) > 0
+    assert os.path.exists(pptx_path) and os.path.getsize(pptx_path) > 0
+    text = "".join(p.extract_text() or "" for p in PdfReader(pdf_path).pages)
+    n_slides = len(Presentation(pptx_path).slides)
+    return text, n_slides
+
+
+# --------------------------------------------------------------------------- #
+# Fixtures.
+# --------------------------------------------------------------------------- #
+def _titanic_profile() -> dict:
+    """A single-table profile: a PK candidate + a column that looks like a FK."""
+    return {
+        "table": "titanic",
+        "source": "/data/titanic.csv",
+        "n_rows": 891,
+        "n_cols": 4,
+        "key_candidates": ["PassengerId"],
+        "columns": [
+            {"name": "PassengerId", "inferred_type": "numeric",
+             "physical_type": "BIGINT", "distinct_count": 891,
+             "unique_pct": 1.0, "flags": ["possible_id"]},
+            {"name": "ticket_id", "inferred_type": "numeric",
+             "physical_type": "BIGINT", "distinct_count": 681,
+             "unique_pct": 0.76, "flags": []},
+            {"name": "fare", "inferred_type": "numeric",
+             "physical_type": "DOUBLE", "distinct_count": 248,
+             "unique_pct": 0.28, "flags": []},
+            {"name": "sex", "inferred_type": "categorical",
+             "physical_type": "VARCHAR", "distinct_count": 2,
+             "unique_pct": 0.002, "flags": []},
+        ],
+    }
+
+
+def _make_relational_db(path: str) -> None:
+    """Create a small DuckDB with customers(id) <- orders(customer_id), real FK."""
+    con = duckdb.connect(path)
+    con.execute("CREATE TABLE customers(id INTEGER PRIMARY KEY, name TEXT)")
+    con.execute(
+        "CREATE TABLE orders(id INTEGER PRIMARY KEY, "
+        "customer_id INTEGER REFERENCES customers(id), amount DOUBLE)")
+    con.execute("INSERT INTO customers VALUES "
+                "(1,'a'),(2,'b'),(3,'c'),(4,'d'),(5,'e')")
+    con.execute("INSERT INTO orders VALUES "
+                "(1,1,10.0),(2,1,20.0),(3,2,30.0),(4,3,40.0),"
+                "(5,3,50.0),(6,4,60.0),(7,5,70.0),(8,2,80.0)")
+    con.close()
+
+
+def _orders_profile() -> dict:
+    """A profile for the `orders` table of the relational DB."""
+    return {
+        "table": "orders",
+        "source": "orders",
+        "n_rows": 8,
+        "n_cols": 3,
+        "key_candidates": ["id"],
+        "columns": [
+            {"name": "id", "inferred_type": "numeric", "physical_type": "INTEGER",
+             "distinct_count": 8, "unique_pct": 1.0, "flags": ["possible_id"]},
+            {"name": "customer_id", "inferred_type": "numeric",
+             "physical_type": "INTEGER", "distinct_count": 5, "unique_pct": 0.625,
+             "flags": []},
+            {"name": "amount", "inferred_type": "numeric", "physical_type": "DOUBLE",
+             "distinct_count": 8, "unique_pct": 1.0, "flags": []},
+        ],
+    }
+
+
+# --------------------------------------------------------------------------- #
+# Golden 1 — intra-table.
+# --------------------------------------------------------------------------- #
+def test_golden_intra_table_pk_and_fk_heuristic():
+    """Single table: PK candidate shown; FK heuristic shown (if fn available);
+    renders to PDF + PPTX with nothing cut."""
+    prof = _titanic_profile()
+    glossary = GlossaryCollector()
+    # No db_path: only the profile-derived layers apply (no declared, no inter).
+    chapter = build_relaciones(prof, {"glossary": glossary})
+
+    assert isinstance(chapter, Chapter)
+    assert chapter.id == "relaciones"
+    text = _text_of(chapter)
+
+    # PK candidate is always present (comes from the profile).
+    assert "Candidatos a clave primaria" in text
+    assert "PassengerId" in text
+
+    # Glossary terms got registered.
+    for key in ("pk", "fk", "cardinalidad"):
+        assert glossary.has(key)
+
+    # FK heuristic layer: present iff the delegated function is importable.
+    if suggest_intratable_fk_candidates is not None:
+        assert "Posibles claves foráneas" in text
+        assert "ticket_id" in text
+        # The float measure and the PK itself are NOT suggested as FKs.
+        assert "Posibles FK por nombre" in text
+
+    pdf_text, n_slides = _render_both(chapter, "intra")
+    assert "PassengerId" in pdf_text
+    assert n_slides >= 1
+
+
+# --------------------------------------------------------------------------- #
+# Golden 2 — inter-table (real DuckDB).
+# --------------------------------------------------------------------------- #
+def test_golden_inter_table_containment_and_join_graph():
+    """Two related tables: declared FK (if fn available) + containment FK
+    candidate + Mermaid join graph."""
+    tmp = tempfile.mkdtemp(prefix="relaciones_db_")
+    db_path = os.path.join(tmp, "shop.duckdb")
+    _make_relational_db(db_path)
+
+    prof = _orders_profile()
+    glossary = GlossaryCollector()
+    chapter = build_relaciones(
+        prof, {"db_path": db_path, "table": "orders", "glossary": glossary})
+
+    assert isinstance(chapter, Chapter)
+    text = _text_of(chapter)
+
+    # Inter-table containment FK candidate: customer_id -> customers.id. This path
+    # uses infer_fk_containment_duckdb + build_join_graph, both already in the
+    # registry, so it must be present.
+    assert "Claves foráneas candidatas (inter-tabla)" in text
+    assert "orders.customer_id" in text
+    assert "customers.id" in text
+    # Join graph with a pasteable Mermaid diagram.
+    assert "Grafo de relaciones" in text
+    assert "mermaid" in text
+    assert "graph LR" in text
+    assert "containment" in text.lower()
+
+    # Declared-keys layer: present iff the delegated function is importable.
+    if detect_declared_keys_duckdb is not None:
+        assert "Claves declaradas en el esquema" in text
+        assert "Claves foráneas declaradas" in text
+
+    pdf_text, n_slides = _render_both(chapter, "inter")
+    assert "customer_id" in pdf_text
+    assert n_slides >= 1
+
+
+# --------------------------------------------------------------------------- #
+# Edges.
+# --------------------------------------------------------------------------- #
+def test_none_when_no_relations():
+    """No key candidates, no FK-looking columns, no db source -> None."""
+    prof = {
+        "table": "flat", "n_rows": 100, "n_cols": 2, "key_candidates": [],
+        "columns": [
+            {"name": "value", "inferred_type": "numeric", "physical_type": "DOUBLE",
+             "distinct_count": 50, "unique_pct": 0.5, "flags": []},
+            {"name": "label", "inferred_type": "categorical",
+             "physical_type": "VARCHAR", "distinct_count": 3, "unique_pct": 0.03,
+             "flags": []},
+        ],
+    }
+    assert build_relaciones(prof, {}) is None
+
+
+def test_empty_and_none_profile_do_not_raise():
+    """None / {} profile and missing ctx degrade to None without raising."""
+    assert build_relaciones(None, None) is None
+    assert build_relaciones({}, {}) is None
+    assert build_relaciones({}, {"glossary": GlossaryCollector()}) is None
+
+
+def test_pk_candidate_only_builds_chapter():
+    """A profile with only a key candidate (no FK anything, no db) still builds:
+    the relations chapter applies because there is a PK candidate to report."""
+    prof = {
+        "table": "t", "n_rows": 10, "n_cols": 1, "key_candidates": ["row_id"],
+        "columns": [
+            {"name": "row_id", "inferred_type": "numeric", "physical_type": "BIGINT",
+             "distinct_count": 10, "unique_pct": 1.0, "flags": ["possible_id"]},
+        ],
+    }
+    chapter = build_relaciones(prof, {})
+    assert isinstance(chapter, Chapter)
+    assert "Candidatos a clave primaria" in _text_of(chapter)
@@ -33,6 +33,7 @@ CHAPTER_ORDER = [
    "cat_distr",     # categorical distributions
    "calidad",       # data quality
    "correlacion",   # correlations / associations
+    "relaciones",    # key relations: declared/candidate PK + FK (inter/intra-table)
    "modelos",       # cheap models (PCA/KMeans/outliers)
    "timeseries",    # time-series analysis
    "geospatial",    # geospatial
@@ -0,0 +1,107 @@
+---
+name: detect_declared_keys_duckdb
+kind: function
+lang: py
+domain: datascience
+version: "1.0.0"
+purity: impure
+signature: "def detect_declared_keys_duckdb(db_path: str, table: str = None) -> dict"
+description: "Detecta las claves DECLARADAS (constraints reales) de un schema DuckDB leyendo la table function duckdb_constraints(): extrae PRIMARY KEY, FOREIGN KEY y UNIQUE (ignora NOT NULL y CHECK) y las devuelve normalizadas con sus columnas, y para las FK con su tabla y columnas referenciadas. Con table=None procesa todas las tablas; con table='X' filtra a PK/UNIQUE de X y a FK cuyo origen es X (case-sensitive). A diferencia de infer_fk_containment_duckdb (que INFIERE FKs candidatas por containment de valores cuando el schema no las declara), esta funcion devuelve las relaciones de clave REALES del schema. Estilo dict-no-throw: nunca lanza. Parte del grupo eda (relaciones de clave)."
+tags: [eda, duckdb, datascience, relations, primary-key, foreign-key, schema, exploratory-data-analysis]
+params:
+  - name: db_path
+    desc: "Ruta al archivo DuckDB. Debe existir (lectura read-only via duckdb_query_readonly; no se crea). Un path inexistente devuelve {status:'error', ...}."
+  - name: table
+    desc: "Si se pasa, filtra los resultados a esa tabla: incluye PRIMARY KEY y UNIQUE cuya tabla sea `table`, y FOREIGN KEY cuya tabla ORIGEN sea `table` (no la referenciada). None (default) devuelve los constraints de todas las tablas. La comparacion es case-sensitive (nombres tal cual los devuelve DuckDB)."
+output: "dict dict-no-throw. En exito {status:'ok', primary_keys:[{table:str, columns:[str,...]}, ...], foreign_keys:[{table:str, columns:[str,...], referenced_table:str, referenced_columns:[str,...]}, ...], unique:[{table:str, columns:[str,...]}, ...], tables:[str,...]} donde tables es la lista ordenada de tablas (origen) que poseen al menos un constraint PK/FK/UNIQUE emitido. Solo se emiten constraints de clave: NOT NULL y CHECK se ignoran. En error {status:'error', error:str}."
+uses_functions: [duckdb_query_readonly_py_infra]
+uses_types: []
+returns: []
+returns_optional: false
+error_type: "error_go_core"
+imports: []
+tested: true
+tests: ["test_golden_detecta_pks_y_fk", "test_golden_ignora_not_null_y_check", "test_edge_filtra_por_tabla_orders", "test_edge_filtra_por_tabla_customers", "test_edge_unique_declarado", "test_edge_sin_constraints_listas_vacias", "test_error_db_inexistente_no_lanza", "test_shape_resultado"]
+test_file_path: "python/functions/datascience/detect_declared_keys_duckdb_test.py"
+file_path: "python/functions/datascience/detect_declared_keys_duckdb.py"
+---
+
+## Ejemplo
+
+```python
+import sys, os, duckdb
+sys.path.insert(0, os.path.join("python", "functions"))
+from datascience import detect_declared_keys_duckdb
+
+# Base de ejemplo en /tmp: orders.customer_id -> customers.id (FK declarada)
+path = "/tmp/declared_keys_demo.duckdb"
+if os.path.exists(path):
+    os.remove(path)
+con = duckdb.connect(path)
+con.execute("CREATE TABLE customers(id INTEGER PRIMARY KEY, name TEXT)")
+con.execute(
+    "CREATE TABLE orders("
+    "  id INTEGER PRIMARY KEY,"
+    "  customer_id INTEGER REFERENCES customers(id),"
+    "  amt DOUBLE)"
+)
+con.close()
+
+res = detect_declared_keys_duckdb(path)
+if res["status"] == "ok":
+    for pk in res["primary_keys"]:
+        print(f"PK  {pk['table']}({', '.join(pk['columns'])})")
+    for fk in res["foreign_keys"]:
+        print(f"FK  {fk['table']}({', '.join(fk['columns'])}) -> "
+              f"{fk['referenced_table']}({', '.join(fk['referenced_columns'])})")
+    # PK  customers(id)
+    # PK  orders(id)
+    # FK  orders(customer_id) -> customers(id)
+else:
+    print("error:", res["error"])
+
+# Filtrar a una tabla concreta (PK/UNIQUE de orders + FK con origen orders):
+solo_orders = detect_declared_keys_duckdb(path, table="orders")
+print(solo_orders["tables"])  # ['orders']
+```
+
+## Cuando usarla
+
+- Cuando exploras un esquema DuckDB y quieres mostrar las relaciones de clave REALES (PK/FK/UNIQUE) que el schema ha declarado, sin inferir nada.
+- Como paso del capitulo RELACIONES del grupo `eda`: primero mira las claves declaradas con esta funcion; si el schema no declara FKs, complementa con `infer_fk_containment_duckdb` (inferencia por containment).
+- Antes de documentar o migrar un esquema, para listar el contrato de integridad referencial que el motor ya conoce.
+- Para validar que las constraints que esperas (esa FK que creaste con `REFERENCES`) realmente estan declaradas en la base materializada.
+
+## Gotchas
+
+- **Impura**: lee de disco via la primitiva read-only `duckdb_query_readonly` (no crea ni modifica la base). El `db_path` debe existir; un path inexistente devuelve `{status:'error'}` (read_only NO crea la base).
+- **Requiere `duckdb_constraints()`**: usa la table function `duckdb_constraints()`, disponible en DuckDB modernos (verificado en 1.5.2). En versiones antiguas sin esa funcion, la query falla y se devuelve `{status:'error'}`.
+- **Solo claves DECLARADAS**: devuelve lo que el schema declaro con `PRIMARY KEY` / `FOREIGN KEY (... REFERENCES ...)` / `UNIQUE`. Una tabla materializada con `CREATE TABLE AS SELECT` NO lleva constraints — para esos casos no habra claves que mostrar y hay que INFERIRLAS (`infer_fk_containment_duckdb`).
+- **NOT NULL y CHECK se ignoran**: `duckdb_constraints()` tambien emite filas `NOT NULL` (DuckDB genera una por cada columna PK) y `CHECK`; esta funcion las descarta y solo conserva PK/FK/UNIQUE.
+- **Nombres case-sensitive**: el filtro `table='Orders'` no casa con una tabla `orders`. Se comparan los nombres tal cual los devuelve DuckDB.
+- **FK atribuida al origen**: una FOREIGN KEY se atribuye a su tabla ORIGEN (el `table` de la entrada), no a la referenciada. El filtro `table='X'` trae las FK cuyo origen es X, no las que apuntan a X.
+- **`tables` = tablas dueñas de constraints emitidos**: la lista `tables` contiene solo las tablas que poseen al menos un PK/FK/UNIQUE en el resultado (su campo `table`), ordenadas. No incluye tablas referenciadas que no tengan constraint propio en la salida.
+- **Columnas como listas**: `constraint_column_names` y `referenced_column_names` son columnas LIST de DuckDB; en 1.5.2 llegan como listas Python. La funcion las normaliza a listas de strings con una red de seguridad por si llegaran como string.
+
+## Notas
+
+`duckdb_constraints()` devuelve una fila por constraint con los campos
+`table_name`, `constraint_type`, `constraint_column_names`, `referenced_table`,
+`referenced_column_names`. Mapeo a la salida:
+
+```text
+PRIMARY KEY -> primary_keys[]: {table, columns}
+UNIQUE      -> unique[]:       {table, columns}
+FOREIGN KEY -> foreign_keys[]: {table, columns, referenced_table, referenced_columns}
+NOT NULL    -> ignorado
+CHECK       -> ignorado
+```
+
+Para una FK, `referenced_table` y `referenced_column_names` vienen poblados; para
+PK/UNIQUE, `referenced_table` es NULL y `referenced_column_names` una lista vacia.
+
+Complementa a `infer_fk_containment_duckdb`: esta funcion devuelve las relaciones
+de clave REALES del schema (declaradas); la otra INFIERE FKs candidatas por
+containment de valores cuando el schema no las declaro. En el capitulo RELACIONES
+de AutomaticEDA se usan en orden: primero las declaradas, luego la inferencia como
+respaldo.
@@ -0,0 +1,127 @@
+"""detect_declared_keys_duckdb — lee las claves DECLARADAS de un schema DuckDB.
+
+Funcion impura: lee de disco a traves de la primitiva read-only del grupo
+`duckdb` (duckdb_query_readonly). Pertenece al grupo de capacidad `eda`
+(relaciones de clave): a diferencia de infer_fk_containment_duckdb, que INFIERE
+FOREIGN KEYs candidatas por containment de valores, esta funcion devuelve las
+constraints REALES que el schema ha declarado (PRIMARY KEY / FOREIGN KEY /
+UNIQUE) leyendo la table function `duckdb_constraints()`.
+
+Es la pieza del capitulo RELACIONES de AutomaticEDA que muestra las relaciones de
+clave reales cuando existen — frente a la inferencia, que se usa cuando el schema
+no las declaro.
+
+Estilo dict-no-throw del grupo duckdb: nunca lanza; captura cualquier error y
+devuelve {status:'error', error:str}.
+"""
+
+from infra import duckdb_query_readonly
+
+
+def _as_list(value) -> list:
+    """Normaliza el valor de una columna LIST de DuckDB a una lista de strings.
+
+    En DuckDB 1.5.2, `constraint_column_names` y `referenced_column_names` llegan
+    ya como listas Python a traves de duckdb_query_readonly. Este helper es solo
+    una red de seguridad: si por cualquier motivo llegara como string (p.ej. la
+    representacion `[id, customer_id]`), la parsea de forma defensiva.
+    """
+    if value is None:
+        return []
+    if isinstance(value, (list, tuple)):
+        return [str(v) for v in value]
+    if isinstance(value, str):
+        s = value.strip()
+        if s.startswith("[") and s.endswith("]"):
+            s = s[1:-1]
+        if not s.strip():
+            return []
+        return [
+            part.strip().strip("'\"")
+            for part in s.split(",")
+            if part.strip().strip("'\"")
+        ]
+    return [str(value)]
+
+
+def detect_declared_keys_duckdb(db_path: str, table: str = None) -> dict:
+    """Detecta las claves PRIMARY KEY / FOREIGN KEY / UNIQUE declaradas en DuckDB.
+
+    Lee la table function `duckdb_constraints()` y extrae solo las constraints de
+    clave (PRIMARY KEY, FOREIGN KEY, UNIQUE), ignorando NOT NULL y CHECK.
+
+    Args:
+        db_path: ruta al archivo DuckDB. Debe existir (lectura read-only; no se
+            crea). Un path inexistente devuelve {status:'error', ...} sin lanzar.
+        table: si se pasa, filtra los resultados a esa tabla: incluye PRIMARY KEY
+            y UNIQUE cuya tabla sea `table`, y FOREIGN KEY cuya tabla ORIGEN sea
+            `table`. None (default) devuelve los constraints de todas las tablas.
+            La comparacion de nombres es case-sensitive (tal cual los devuelve
+            DuckDB).
+
+    Returns:
+        dict dict-no-throw. En exito:
+            {status:'ok',
+             primary_keys:[{table:str, columns:[str, ...]}, ...],
+             foreign_keys:[{table:str, columns:[str, ...],
+                            referenced_table:str,
+                            referenced_columns:[str, ...]}, ...],
+             unique:[{table:str, columns:[str, ...]}, ...],
+             tables:[str, ...]}   # tablas (origen) con algun PK/FK/UNIQUE emitido
+        En error (sin lanzar): {status:'error', error:str}.
+    """
+    try:
+        sql = (
+            "SELECT table_name, constraint_type, constraint_column_names, "
+            "referenced_table, referenced_column_names FROM duckdb_constraints()"
+        )
+        res = duckdb_query_readonly(db_path, sql)
+        if res["status"] != "ok":
+            return {"status": "error", "error": res["error"]}
+
+        primary_keys = []
+        foreign_keys = []
+        unique = []
+        tables = set()
+
+        for row in res["rows"]:
+            ctype = row["constraint_type"]
+            tname = row["table_name"]
+
+            # Filtro por tabla origen: para PK/FK/UNIQUE el dueño del constraint es
+            # `table_name`. Una FK se atribuye a su tabla origen (no a la
+            # referenciada), igual que el filtro pide.
+            if table is not None and tname != table:
+                continue
+
+            cols = _as_list(row["constraint_column_names"])
+
+            if ctype == "PRIMARY KEY":
+                primary_keys.append({"table": tname, "columns": cols})
+                tables.add(tname)
+            elif ctype == "UNIQUE":
+                unique.append({"table": tname, "columns": cols})
+                tables.add(tname)
+            elif ctype == "FOREIGN KEY":
+                foreign_keys.append(
+                    {
+                        "table": tname,
+                        "columns": cols,
+                        "referenced_table": row["referenced_table"],
+                        "referenced_columns": _as_list(
+                            row["referenced_column_names"]
+                        ),
+                    }
+                )
+                tables.add(tname)
+            # NOT NULL y CHECK se ignoran: no son relaciones de clave.
+
+        return {
+            "status": "ok",
+            "primary_keys": primary_keys,
+            "foreign_keys": foreign_keys,
+            "unique": unique,
+            "tables": sorted(tables),
+        }
+    except Exception as e:  # noqa: BLE001
+        return {"status": "error", "error": str(e)}
@@ -0,0 +1,167 @@
+"""Tests para detect_declared_keys_duckdb."""
+
+import duckdb
+import pytest
+
+from .detect_declared_keys_duckdb import detect_declared_keys_duckdb
+
+
+@pytest.fixture
+def db(tmp_path):
+    """DuckDB temporal con claves declaradas.
+
+    - customers(id PRIMARY KEY, name)
+    - orders(id PRIMARY KEY, customer_id REFERENCES customers(id), amt)
+
+    Esto declara dos PRIMARY KEY (customers.id, orders.id) y una FOREIGN KEY
+    (orders.customer_id -> customers.id). DuckDB ademas genera constraints
+    NOT NULL para las columnas PK, que la funcion debe ignorar.
+    """
+    path = str(tmp_path / "keys_test.duckdb")
+    con = duckdb.connect(path)
+    con.execute("CREATE TABLE customers(id INTEGER PRIMARY KEY, name TEXT)")
+    con.execute(
+        "CREATE TABLE orders("
+        "  id INTEGER PRIMARY KEY,"
+        "  customer_id INTEGER REFERENCES customers(id),"
+        "  amt DOUBLE"
+        ")"
+    )
+    con.close()
+    return path
+
+
+def _pk_for(res, table):
+    """Devuelve la entrada primary_keys cuya tabla es `table`, o None."""
+    for pk in res["primary_keys"]:
+        if pk["table"] == table:
+            return pk
+    return None
+
+
+def test_golden_detecta_pks_y_fk(db):
+    """Golden: detecta las dos PK y la FK declaradas, con valores concretos."""
+    res = detect_declared_keys_duckdb(db)
+    assert res["status"] == "ok"
+
+    # PRIMARY KEY de customers y de orders.
+    pk_customers = _pk_for(res, "customers")
+    pk_orders = _pk_for(res, "orders")
+    assert pk_customers is not None
+    assert pk_customers["columns"] == ["id"]
+    assert pk_orders is not None
+    assert pk_orders["columns"] == ["id"]
+
+    # FOREIGN KEY orders.customer_id -> customers.id.
+    assert len(res["foreign_keys"]) == 1
+    fk = res["foreign_keys"][0]
+    assert fk["table"] == "orders"
+    assert fk["columns"] == ["customer_id"]
+    assert fk["referenced_table"] == "customers"
+    assert fk["referenced_columns"] == ["id"]
+
+    # tables incluye ambas (origen de algun constraint).
+    assert res["tables"] == ["customers", "orders"]
+
+
+def test_golden_ignora_not_null_y_check(db):
+    """NOT NULL (auto-generado por las PK) no aparece como clave."""
+    res = detect_declared_keys_duckdb(db)
+    assert res["status"] == "ok"
+    # Solo 2 PK reales (no las NOT NULL que DuckDB genera por cada columna PK).
+    assert len(res["primary_keys"]) == 2
+    # No hay UNIQUE declarado en este schema.
+    assert res["unique"] == []
+
+
+def test_edge_filtra_por_tabla_orders(db):
+    """Edge table='orders': PK de orders + su FK; NO la PK de customers."""
+    res = detect_declared_keys_duckdb(db, table="orders")
+    assert res["status"] == "ok"
+
+    # Solo la PK de orders.
+    assert len(res["primary_keys"]) == 1
+    assert res["primary_keys"][0]["table"] == "orders"
+    assert res["primary_keys"][0]["columns"] == ["id"]
+    # La PK de customers NO esta.
+    assert _pk_for(res, "customers") is None
+
+    # La FK de orders si esta (origen = orders).
+    assert len(res["foreign_keys"]) == 1
+    assert res["foreign_keys"][0]["table"] == "orders"
+    assert res["foreign_keys"][0]["referenced_table"] == "customers"
+
+    # tables solo contiene orders (la dueña de los constraints emitidos).
+    assert res["tables"] == ["orders"]
+
+
+def test_edge_filtra_por_tabla_customers(db):
+    """Edge table='customers': solo su PK; ninguna FK (orders queda fuera)."""
+    res = detect_declared_keys_duckdb(db, table="customers")
+    assert res["status"] == "ok"
+    assert len(res["primary_keys"]) == 1
+    assert res["primary_keys"][0]["table"] == "customers"
+    assert res["foreign_keys"] == []
+    assert res["tables"] == ["customers"]
+
+
+def test_edge_unique_declarado(tmp_path):
+    """Edge: una constraint UNIQUE declarada aparece en `unique`."""
+    path = str(tmp_path / "unique_test.duckdb")
+    con = duckdb.connect(path)
+    con.execute("CREATE TABLE products(sku INTEGER UNIQUE, name TEXT)")
+    con.close()
+
+    res = detect_declared_keys_duckdb(path)
+    assert res["status"] == "ok"
+    assert len(res["unique"]) == 1
+    assert res["unique"][0]["table"] == "products"
+    assert res["unique"][0]["columns"] == ["sku"]
+    assert res["primary_keys"] == []
+    assert res["foreign_keys"] == []
+    assert res["tables"] == ["products"]
+
+
+def test_edge_sin_constraints_listas_vacias(tmp_path):
+    """Edge: tabla sin PK/FK/UNIQUE -> todas las listas vacias, status ok."""
+    path = str(tmp_path / "no_keys.duckdb")
+    con = duckdb.connect(path)
+    con.execute("CREATE TABLE log(a INTEGER, b INTEGER)")
+    con.close()
+
+    res = detect_declared_keys_duckdb(path)
+    assert res["status"] == "ok"
+    assert res["primary_keys"] == []
+    assert res["foreign_keys"] == []
+    assert res["unique"] == []
+    assert res["tables"] == []
+
+
+def test_error_db_inexistente_no_lanza(tmp_path):
+    """Error: db_path inexistente -> status error, sin lanzar excepcion."""
+    path = str(tmp_path / "does_not_exist.duckdb")
+    res = detect_declared_keys_duckdb(path)
+    assert res["status"] == "error"
+    assert isinstance(res["error"], str)
+    assert res["error"] != ""
+
+
+def test_shape_resultado(db):
+    """El retorno tiene exactamente las claves esperadas."""
+    res = detect_declared_keys_duckdb(db)
+    assert set(res.keys()) == {
+        "status",
+        "primary_keys",
+        "foreign_keys",
+        "unique",
+        "tables",
+    }
+    for pk in res["primary_keys"]:
+        assert set(pk.keys()) == {"table", "columns"}
+    for fk in res["foreign_keys"]:
+        assert set(fk.keys()) == {
+            "table",
+            "columns",
+            "referenced_table",
+            "referenced_columns",
+        }
@@ -0,0 +1,91 @@
+---
+name: suggest_intratable_fk_candidates
+kind: function
+lang: py
+domain: datascience
+version: "1.0.0"
+purity: pure
+signature: "def suggest_intratable_fk_candidates(profile: dict, max_candidates: int = 20) -> list"
+description: "Sobre el TableProfile de UNA tabla (el dict de profile_table), sugiere por heuristica de nombre + cardinalidad que columnas PARECEN una clave foranea hacia otra tabla, cuando no hay relaciones inter-tabla que medir (una sola tabla). Es una SUGERENCIA, no una afirmacion: el ref_table_guess es el stem del nombre (customer_id -> customer) y NO confirma containment. Pura: solo lee el dict, sin I/O; nunca lanza (devuelve [])."
+tags: [eda, datascience, relationships, foreign-key, fk, heuristic, schema, python]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: ""
+imports: []
+params:
+  - name: profile
+    desc: "TableProfile (dict que produce profile_table / summarize_table_*). Se leen de forma defensiva `columns` (lista de ColumnProfile con name/inferred_type/physical_type/distinct_count/unique_pct/flags), `n_rows` (int) y `key_candidates` (lista de nombres de columna ya candidatos a PK, que se excluyen). Si no es dict o no trae columns -> []."
+  - name: max_candidates
+    desc: "Tope de sugerencias devueltas (default 20). Las columnas candidatas se ordenan por distinct_count descendente (mas informativas primero) antes de cortar a este maximo."
+output: "list (posiblemente vacia) de dicts, uno por columna sugerida, con claves: `column` (nombre), `ref_table_guess` (tabla conjeturada por el stem del nombre, p.ej. customer_id -> 'customer'), `reason` (frase humana que deja claro que es heuristica sin confirmar containment), `distinct_count` (int|None), `unique_pct` (float|None, fraccion 0-1 tal como viene del profile), `inferred_type` (str), `physical_type` (str). Nunca lanza."
+tested: true
+tests: ["test_golden_customer_id_detectado_otras_no", "test_camelcase_albumid_detectado", "test_constante_status_id_no_aparece", "test_profile_vacio_y_none_devuelven_lista_vacia", "test_category_id_casi_unico_parece_pk_no_aparece", "test_ref_table_guess_multitoken_y_orden_por_distinct", "test_max_candidates_corta_la_lista", "test_id_generico_solo_nunca_es_fk"]
+test_file_path: "python/functions/datascience/suggest_intratable_fk_candidates_test.py"
+file_path: "python/functions/datascience/suggest_intratable_fk_candidates.py"
+---
+
+## Ejemplo
+
+```python
+from datascience import suggest_intratable_fk_candidates
+
+# TableProfile de UNA tabla (tipo titanic): customer_id es FK N:1; id es la PK;
+# amount es una medida float; name es categorica sin sufijo de id.
+profile = {
+    "n_rows": 891,
+    "key_candidates": ["id"],
+    "columns": [
+        {"name": "id", "inferred_type": "numeric", "physical_type": "BIGINT",
+         "distinct_count": 891, "unique_pct": 1.0, "flags": ["possible_id"]},
+        {"name": "customer_id", "inferred_type": "numeric", "physical_type": "BIGINT",
+         "distinct_count": 137, "unique_pct": 0.15, "flags": []},
+        {"name": "amount", "inferred_type": "numeric", "physical_type": "DOUBLE",
+         "distinct_count": 400, "unique_pct": 0.45, "flags": []},
+        {"name": "name", "inferred_type": "categorical", "physical_type": "VARCHAR",
+         "distinct_count": 700, "unique_pct": 0.78, "flags": []},
+    ],
+}
+
+out = suggest_intratable_fk_candidates(profile)
+[c["column"] for c in out]              # -> ["customer_id"]
+out[0]["ref_table_guess"]               # -> "customer"
+out[0]["reason"]
+# -> "el nombre termina en '_id' y es N:1 (137 valores distintos < 891 filas):
+#     parece (heuristica por nombre, sin confirmar containment) una referencia a
+#     una tabla «customer»"
+```
+
+## Cuando usarla
+
+Cuando el EDA tiene SOLO UNA tabla y, por tanto, no se puede inferir una FK
+inter-tabla por containment (no hay otra tabla cuyos valores contener). Es el plan B
+del capitulo RELACIONES de AutomaticEDA: en vez de medir solapamiento de valores
+entre tablas (lo correcto cuando hay varias, ver `infer_fk_containment_duckdb` /
+`build_join_graph`), conjetura por el NOMBRE de la columna (`<algo>_id`) y por su
+CARDINALIDAD N:1 que columnas parecen apuntar a una entidad externa. Usala para
+enriquecer el reporte con "estas columnas parecen referencias a otras tablas" sin
+prometer que esa tabla exista. NO la uses si tienes varias tablas: ahi mide
+containment de verdad.
+
+## Gotchas
+
+- Es **heuristica**, no una verdad: produce **falsos positivos** (una columna
+  `period_id` que en realidad es un codigo libre, no una FK) y **falsos negativos**
+  (una FK que no se llama `*_id`, p.ej. `parent`, `owner`, `sku`). No la trates como
+  una afirmacion de esquema.
+- `ref_table_guess` es una **conjetura por el nombre** (el stem sin el sufijo id):
+  `customer_id` -> `customer`, `AlbumId` -> `album`, `manager_staff_id` ->
+  `manager_staff`. Puede no coincidir con el nombre real de la tabla (plurales,
+  prefijos, alias). Es una pista, no un join garantizado.
+- **NO confirma containment**: no comprueba que los valores de la columna existan en
+  ninguna otra tabla (no puede — solo recibe el perfil de una tabla). Para confirmar
+  una FK real con varias tablas usa `infer_fk_containment_duckdb`.
+- Excluye deliberadamente: el `id`/`Id`/`ID` generico a secas (suele ser la PK
+  propia, no una referencia), las columnas constantes, las que parecen unicas
+  (`unique_pct >= 0.99`, mas PK que FK) y los tipos no-clave (float/decimal son
+  medidas; date/time/timestamp y boolean no son claves). En camelCase, `paid`,
+  `valid`, `grid` (con `id` en minuscula y sin separador) NO se confunden con FK.
+- `unique_pct` se interpreta como **fraccion 0-1** (tal como la emite el profile), no
+  como porcentaje 0-100.
@@ -0,0 +1,202 @@
+"""suggest_intratable_fk_candidates — heuristica de FK intra-tabla del grupo `eda`.
+
+Sobre el TableProfile de UNA tabla (el dict que produce ``profile_table``), sugiere
+por heuristica de NOMBRE + CARDINALIDAD que columnas PARECEN una clave foranea hacia
+otra tabla, util cuando no hay relaciones inter-tabla disponibles (una sola tabla y,
+por tanto, sin containment cruzado que medir). Es una SUGERENCIA, no una afirmacion:
+no confirma que exista la tabla referida ni que los valores esten contenidos en ella.
+
+La consume el capitulo RELACIONES de AutomaticEDA cuando solo hay una tabla.
+
+Funcion PURA: solo lee el dict (lectura defensiva con ``.get``), no hace I/O y nunca
+lanza por inputs raros (devuelve ``[]``).
+"""
+
+# inferred_type que es compatible con una clave foranea (entero/categorico).
+_FK_INFERRED_OK = {"numeric", "categorical", "integer"}
+
+# Prefijos de physical_type que admiten ser clave foranea (enteros, texto, uuid).
+_FK_PHYSICAL_PREFIXES = (
+    "int", "bigint", "smallint", "tinyint", "hugeint", "uint",
+    "varchar", "text", "char", "bpchar", "string", "uuid",
+)
+
+# Prefijos de physical_type que EXCLUYEN ser clave foranea: medidas en coma flotante
+# (float/double/decimal/numeric/real), temporales (date/time/timestamp/interval) y
+# boolean. Se comprueban ANTES que las senales positivas (la exclusion gana: una
+# columna numeric con physical DOUBLE es una medida, no una FK).
+_FK_PHYSICAL_EXCLUDE = (
+    "float", "double", "decimal", "numeric", "real",
+    "date", "time", "timestamp", "interval",
+    "bool",
+)
+
+
+def _fk_name_signal(name):
+    """Detecta el sufijo de clave foranea en el nombre y devuelve ``(stem, sufijo)``.
+
+    Reconoce ``<algo>_id`` (snake), ``<Algo>Id`` y ``<algo>ID`` (camel). NO reconoce
+    el ``id``/``Id``/``ID`` generico a secas (suele ser la PK propia de la tabla, no
+    una referencia). En camelCase la ``I`` mayuscula marca el limite de palabra, asi
+    que ``paid``/``valid``/``grid`` (``id`` en minuscula y sin separador) NO matchean.
+
+    El ``stem`` se devuelve en minusculas y sirve de ``ref_table_guess`` (la tabla a
+    la que probablemente apunta): ``customer_id`` -> ``"customer"``, ``AlbumId`` ->
+    ``"album"``, ``manager_staff_id`` -> ``"manager_staff"``. Devuelve ``None`` si no
+    hay senal de nombre.
+    """
+    if not isinstance(name, str):
+        return None
+    raw = name.strip()
+    if not raw:
+        return None
+    # Snake: termina en "_id" (indiferente a mayusculas en la parte "id").
+    if raw.lower().endswith("_id"):
+        stem = raw[:-3].rstrip("_-. ")
+        if not stem:
+            return None
+        return (stem.lower(), "_id")
+    # Camel todo-mayuscula: "...ID" (p.ej. customerID).
+    if raw.endswith("ID"):
+        stem = raw[:-2].rstrip("_-. ")
+        if not stem:
+            return None
+        return (stem.lower(), "ID")
+    # Camel: "...Id" (p.ej. AlbumId).
+    if raw.endswith("Id"):
+        stem = raw[:-2].rstrip("_-. ")
+        if not stem:
+            return None
+        return (stem.lower(), "Id")
+    return None
+
+
+def _fk_type_compatible(col):
+    """True si el tipo de la columna admite ser clave foranea.
+
+    Compatible si el ``physical_type`` NO es una medida flotante, una temporal ni
+    boolean, Y ademas (``inferred_type`` en {numeric, categorical, integer} O el
+    ``physical_type`` empieza por entero/varchar/text/char/uuid). La comparacion es
+    indistinta a mayusculas/minusculas.
+    """
+    phys = (col.get("physical_type") or "").strip().lower()
+    inferred = (col.get("inferred_type") or "").strip().lower()
+    # Exclusion por tipo fisico (gana sobre cualquier senal positiva).
+    for bad in _FK_PHYSICAL_EXCLUDE:
+        if phys.startswith(bad):
+            return False
+    # Senal positiva por tipo inferido.
+    if inferred in _FK_INFERRED_OK:
+        return True
+    # Senal positiva por tipo fisico (entero/texto/uuid).
+    for good in _FK_PHYSICAL_PREFIXES:
+        if phys.startswith(good):
+            return True
+    return False
+
+
+def suggest_intratable_fk_candidates(profile: dict, max_candidates: int = 20) -> list:
+    """Sugiere columnas que parecen una FK intra-tabla por nombre + cardinalidad.
+
+    Heuristica (no afirma nada): una columna es candidata a clave foranea si su nombre
+    tiene sufijo de id con stem no vacio (``<algo>_id`` / ``<Algo>Id`` / ``<algo>ID``,
+    NUNCA el ``id`` generico), no es ya candidata a PK, no es constante, tiene
+    cardinalidad alta pero por debajo del numero de filas (N:1, no unica) y un tipo
+    compatible con clave (entero/categorico/texto/uuid; nunca float/fecha/boolean).
+
+    Args:
+        profile: TableProfile (dict de ``profile_table``). Se leen, de forma
+            defensiva, ``columns`` (lista de ColumnProfile), ``n_rows`` y
+            ``key_candidates`` (nombres de columna ya candidatos a PK).
+        max_candidates: tope de sugerencias devueltas (default 20). Las columnas se
+            ordenan por ``distinct_count`` descendente (mas informativas primero)
+            antes de cortar.
+
+    Returns:
+        list de dicts (posiblemente vacia), uno por columna sugerida, con claves:
+        ``column``, ``ref_table_guess`` (stem del nombre), ``reason`` (frase humana),
+        ``distinct_count``, ``unique_pct`` (fraccion 0-1 tal como viene del profile),
+        ``inferred_type``, ``physical_type``. Nunca lanza: si ``profile`` no es dict o
+        no hay columnas, devuelve ``[]``.
+    """
+    if not isinstance(profile, dict):
+        return []
+    columns = profile.get("columns")
+    if not isinstance(columns, list):
+        return []
+
+    n_rows = profile.get("n_rows")
+    has_n_rows = (
+        isinstance(n_rows, int) and not isinstance(n_rows, bool) and n_rows > 0
+    )
+
+    key_candidates = profile.get("key_candidates")
+    if not isinstance(key_candidates, (list, tuple, set)):
+        key_candidates = []
+    key_set = set(key_candidates)
+
+    out = []
+    for col in columns:
+        if not isinstance(col, dict):
+            continue
+        name = col.get("name")
+
+        # 1) Senal de nombre: sufijo de id con stem no vacio.
+        signal = _fk_name_signal(name)
+        if signal is None:
+            continue
+        ref_guess, suffix = signal
+
+        # 2) No es ya candidata a PK (clave primaria de la propia tabla).
+        if name in key_set:
+            continue
+
+        # 3) No constante y con >= 2 valores distintos.
+        flags = col.get("flags") or []
+        if "constant" in flags:
+            continue
+        dc = col.get("distinct_count")
+        if not (isinstance(dc, int) and not isinstance(dc, bool) and dc >= 2):
+            continue
+
+        # 4) Cardinalidad alta pero < n_rows (no es PK) y no parece unica.
+        if has_n_rows and dc >= n_rows:
+            continue
+        unique_pct = col.get("unique_pct")
+        has_unique = (
+            isinstance(unique_pct, (int, float)) and not isinstance(unique_pct, bool)
+        )
+        if has_unique and unique_pct >= 0.99:
+            continue
+
+        # 5) Tipo compatible con clave foranea (entero/categorico/texto; no medida).
+        if not _fk_type_compatible(col):
+            continue
+
+        out.append(
+            {
+                "column": name,
+                "ref_table_guess": ref_guess,
+                "reason": _build_reason(suffix, dc, n_rows if has_n_rows else None, ref_guess),
+                "distinct_count": dc,
+                "unique_pct": float(unique_pct) if has_unique else None,
+                "inferred_type": col.get("inferred_type") or "",
+                "physical_type": col.get("physical_type") or "",
+            }
+        )
+
+    # Mas informativas primero (mayor cardinalidad), luego corte.
+    out.sort(key=lambda d: d.get("distinct_count") or 0, reverse=True)
+    return out[: max(0, int(max_candidates))]
+
+
+def _build_reason(suffix, dc, n_rows, ref_guess):
+    """Frase humana que deja claro que la sugerencia es heuristica, no confirmada."""
+    if n_rows is not None:
+        card = f"es N:1 ({dc} valores distintos < {n_rows} filas)"
+    else:
+        card = f"tiene {dc} valores distintos que se repiten (cardinalidad N:1)"
+    return (
+        f"el nombre termina en '{suffix}' y {card}: parece (heuristica por nombre, "
+        f"sin confirmar containment) una referencia a una tabla «{ref_guess}»"
+    )
@@ -0,0 +1,157 @@
+"""Tests para suggest_intratable_fk_candidates (funcion pura, sin I/O)."""
+
+from suggest_intratable_fk_candidates import suggest_intratable_fk_candidates
+
+
+def _col(name, inferred_type="numeric", physical_type="BIGINT", distinct_count=10,
+         unique_pct=0.1, flags=None):
+    """Construye un ColumnProfile minimo a mano (el dict que emite profile_table)."""
+    return {
+        "name": name,
+        "inferred_type": inferred_type,
+        "physical_type": physical_type,
+        "semantic_type": "",
+        "distinct_count": distinct_count,
+        "unique_pct": unique_pct,
+        "null_count": 0,
+        "null_pct": 0.0,
+        "flags": list(flags) if flags else [],
+    }
+
+
+def test_golden_customer_id_detectado_otras_no():
+    # Tabla tipo titanic: customer_id es FK N:1; id es la PK; amount es medida;
+    # name es categorica sin sufijo de id. Solo customer_id debe aparecer.
+    profile = {
+        "n_rows": 891,
+        "key_candidates": ["id"],
+        "columns": [
+            _col("id", inferred_type="numeric", physical_type="BIGINT",
+                 distinct_count=891, unique_pct=1.0, flags=["possible_id"]),
+            _col("customer_id", inferred_type="numeric", physical_type="BIGINT",
+                 distinct_count=137, unique_pct=0.15, flags=[]),
+            _col("amount", inferred_type="numeric", physical_type="DOUBLE",
+                 distinct_count=400, unique_pct=0.45),
+            _col("name", inferred_type="categorical", physical_type="VARCHAR",
+                 distinct_count=700, unique_pct=0.78),
+        ],
+    }
+    out = suggest_intratable_fk_candidates(profile)
+    assert isinstance(out, list)
+    assert [c["column"] for c in out] == ["customer_id"]
+    cand = out[0]
+    assert cand["ref_table_guess"] == "customer"
+    assert cand["distinct_count"] == 137
+    assert cand["unique_pct"] == 0.15
+    assert cand["inferred_type"] == "numeric"
+    assert cand["physical_type"] == "BIGINT"
+    # La razon deja claro que es heuristica + cita el sufijo y la tabla.
+    assert "customer" in cand["reason"]
+    assert "_id" in cand["reason"]
+
+
+def test_camelcase_albumid_detectado():
+    # AlbumId (camelCase, VARCHAR) -> detectada, ref_table_guess "album".
+    profile = {
+        "n_rows": 3503,
+        "key_candidates": ["TrackId"],
+        "columns": [
+            _col("AlbumId", inferred_type="categorical", physical_type="VARCHAR",
+                 distinct_count=347, unique_pct=0.10),
+        ],
+    }
+    out = suggest_intratable_fk_candidates(profile)
+    # TrackId es PK candidata (en key_candidates), AlbumId no -> AlbumId aparece.
+    assert [c["column"] for c in out] == ["AlbumId"]
+    assert out[0]["ref_table_guess"] == "album"
+
+
+def test_constante_status_id_no_aparece():
+    # status_id constante (flag "constant", distinct_count 1) NO es FK util.
+    profile = {
+        "n_rows": 1000,
+        "key_candidates": [],
+        "columns": [
+            _col("status_id", inferred_type="numeric", physical_type="INTEGER",
+                 distinct_count=1, unique_pct=0.001, flags=["constant"]),
+        ],
+    }
+    out = suggest_intratable_fk_candidates(profile)
+    assert out == []
+
+
+def test_profile_vacio_y_none_devuelven_lista_vacia():
+    # Lectura defensiva: ni {} ni None lanzan; devuelven [].
+    assert suggest_intratable_fk_candidates({}) == []
+    assert suggest_intratable_fk_candidates(None) == []
+    # profile sin columns o con columns no-lista tampoco lanza.
+    assert suggest_intratable_fk_candidates({"n_rows": 10}) == []
+    assert suggest_intratable_fk_candidates({"columns": "no-soy-lista"}) == []
+
+
+def test_category_id_casi_unico_parece_pk_no_aparece():
+    # unique_pct 0.999 -> parece PK (no N:1) -> NO se sugiere como FK.
+    profile = {
+        "n_rows": 891,
+        "key_candidates": [],
+        "columns": [
+            _col("category_id", inferred_type="numeric", physical_type="BIGINT",
+                 distinct_count=890, unique_pct=0.999),
+        ],
+    }
+    out = suggest_intratable_fk_candidates(profile)
+    assert out == []
+
+
+def test_ref_table_guess_multitoken_y_orden_por_distinct():
+    # manager_staff_id conserva los underscores del stem -> "manager_staff".
+    # Ademas, con varias candidatas, se ordenan por distinct_count descendente.
+    profile = {
+        "n_rows": 10000,
+        "key_candidates": ["staff_id"],  # staff_id es PK aqui, no debe aparecer
+        "columns": [
+            _col("staff_id", inferred_type="numeric", physical_type="BIGINT",
+                 distinct_count=10000, unique_pct=1.0, flags=["possible_id"]),
+            _col("store_id", inferred_type="numeric", physical_type="INTEGER",
+                 distinct_count=2, unique_pct=0.0002),
+            _col("manager_staff_id", inferred_type="numeric", physical_type="INTEGER",
+                 distinct_count=40, unique_pct=0.004),
+        ],
+    }
+    out = suggest_intratable_fk_candidates(profile)
+    cols = [c["column"] for c in out]
+    # staff_id excluida (PK); las otras dos ordenadas por distinct desc.
+    assert cols == ["manager_staff_id", "store_id"]
+    refs = {c["column"]: c["ref_table_guess"] for c in out}
+    assert refs["manager_staff_id"] == "manager_staff"
+    assert refs["store_id"] == "store"
+
+
+def test_max_candidates_corta_la_lista():
+    # max_candidates limita el numero de sugerencias devueltas.
+    profile = {
+        "n_rows": 10000,
+        "key_candidates": [],
+        "columns": [
+            _col("a_id", distinct_count=300, unique_pct=0.03),
+            _col("b_id", distinct_count=200, unique_pct=0.02),
+            _col("c_id", distinct_count=100, unique_pct=0.01),
+        ],
+    }
+    out = suggest_intratable_fk_candidates(profile, max_candidates=2)
+    assert [c["column"] for c in out] == ["a_id", "b_id"]
+
+
+def test_id_generico_solo_nunca_es_fk():
+    # 'id'/'Id'/'ID' a secas (sin stem) jamas se sugieren como FK.
+    profile = {
+        "n_rows": 500,
+        "key_candidates": [],
+        "columns": [
+            _col("id", distinct_count=500, unique_pct=1.0),
+            _col("Id", distinct_count=120, unique_pct=0.24),
+            _col("ID", distinct_count=80, unique_pct=0.16),
+        ],
+    }
+    out = suggest_intratable_fk_candidates(profile)
+    assert out == []