fn_registry/python/functions/datascience/automatic_eda/chapters_registry.py

"""Chapter registry — the canonical order of an AutomaticEDA document.

``CHAPTER_ORDER`` declares every chapter the engine will *ever* place, in the
order they appear in the document. Each id maps by convention to a module
``automatic_eda/chapters/<id>.py`` exposing ``build_<id>(profile, ctx) ->
Chapter | None`` and a ``CHAPTER_VERSION`` constant.

This pre-declared order is what lets many agents add chapters in parallel
without contention: an agent only creates its own ``chapters/<id>.py`` module —
it never edits this file. ``build_document`` imports each chapter lazily; a
chapter whose module does not exist yet (not implemented) is simply skipped, so
the document is always renderable with whatever chapters are present today.

``build_document`` never raises: a chapter that errors out is dropped with a
note, and a chapter that returns ``None`` (does not apply to this dataset, e.g.
time series on a dataset with no date column) is omitted.
"""

from __future__ import annotations

import importlib

from . import model

# Canonical document order. Implemented today: portada, overview. The rest are
# placeholders other agents will fill by creating chapters/<id>.py — they will
# appear in this exact position automatically once their module exists.
CHAPTER_ORDER = [
    "portada",       # cover
    "overview",      # df.head + columns/types/nulls/examples + describe
    "num_distr",     # numeric distributions
    "cat_distr",     # categorical distributions
    "calidad",       # data quality
    "correlacion",   # correlations / associations
    "modelos",       # cheap models (PCA/KMeans/outliers)
    "analisis_llm",  # LLM interpretation
    "timeseries",    # time-series analysis
    "geospatial",    # geospatial
    "agregacion",    # aggregations / pivots
]


def build_chapter(chapter_id: str, profile: dict, ctx: dict):
    """Build a single chapter by id, or None if absent/not-applicable/error.

    Looks up ``automatic_eda.chapters.<chapter_id>`` and calls its
    ``build_<chapter_id>(profile, ctx)``. Returns a normalized Chapter, or None
    when the module is missing, the builder returns None, or anything raises.
    """
    mod_name = f"{__package__}.chapters.{chapter_id}"
    try:
        mod = importlib.import_module(mod_name)
    except Exception:  # noqa: BLE001 — chapter not implemented yet → skip.
        return None
    builder = getattr(mod, f"build_{chapter_id}", None)
    if builder is None:
        return None
    try:
        result = builder(profile or {}, ctx or {})
    except Exception:  # noqa: BLE001 — a broken chapter never aborts the doc.
        return None
    return model.as_chapter(result)


def build_document(profile: dict, ctx: dict = None) -> list:
    """Build the full ordered list of chapters for a TableProfile.

    Args:
        profile: the ``eda`` group TableProfile dict (may be None/empty).
        ctx: optional context dict carrying presentation metadata not present in
            the profile (dataset_name, source_origin, storage, generated_at,
            description, granularity, quality_criteria, head_rows, ...).

    Returns:
        list[Chapter] in canonical order, containing only the chapters that are
        implemented and applicable. Never raises.
    """
    if profile is None:
        profile = {}
    if not isinstance(profile, dict):
        profile = {}
    if ctx is None:
        ctx = {}
    chapters = []
    for cid in CHAPTER_ORDER:
        ch = build_chapter(cid, profile, ctx)
        if ch is not None and ch.blocks:
            chapters.append(ch)
    return chapters