8a78a70ef6
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
108 lines
5.3 KiB
Python
108 lines
5.3 KiB
Python
"""Glossary chapter (GLOSARIO) — always the last chapter, clickable terms.
|
||
|
||
Renders one entry per glossary term that the other chapters registered during
|
||
the document build through ``ctx['glossary'].add(key, label, definition)`` (see
|
||
``GlossaryCollector`` in ``model.py``). Each entry is a clickable destination:
|
||
every in-text appearance a chapter marked with ``[[term:key]]texto[[/term]]``
|
||
becomes a real jump to its entry here — PDF link annotations (PyMuPDF) and PPTX
|
||
native slide jumps, both wired by the renderers.
|
||
|
||
Returns ``None`` when no term was registered (there is nothing to show), so the
|
||
chapter simply disappears from documents that did not mark any term.
|
||
|
||
Contract: build_<id>(profile, ctx) -> Chapter | None ; CHAPTER_VERSION = "x.y.z".
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from .. import model
|
||
|
||
CHAPTER_VERSION = "1.1.1"
|
||
CHAPTER_ID = "glosario"
|
||
CHAPTER_TITLE = "Glosario"
|
||
|
||
# Canonical definitions for cross-cutting terms — the "how to read it" entries
|
||
# that do not belong to a single chapter. A chapter only needs to *register* the
|
||
# term (``ctx['glossary'].add(key, label)``) and mark its in-text appearance with
|
||
# ``[[term:key]]…[[/term]]``; this chapter supplies the full definition here when
|
||
# the collector carries the term without one. Keeping the prose in a single place
|
||
# avoids repeating a long paragraph inline in every chapter that names the term
|
||
# (the explanation moved out of the NUM DISTR and CAT DISTR intros lives here).
|
||
_BASELINE_TERMS = {
|
||
"histograma_boxplot": {
|
||
"label": "Cómo leer el histograma y el boxplot",
|
||
"definition": (
|
||
"Para cada columna numérica se muestra su histograma con tres líneas "
|
||
"de referencia: la media (línea roja discontinua), la mediana (línea "
|
||
"verde continua) y la banda ±1σ (zona sombreada que cubre una "
|
||
"desviación estándar a cada lado de la media). Debajo, alineado al "
|
||
"mismo eje horizontal, un boxplot de Tukey: la caja abarca del primer "
|
||
"al tercer cuartil (P25–P75), la línea interior es la mediana y los "
|
||
"bigotes llegan hasta 1,5·IQR; los puntos rojos señalan que hay "
|
||
"valores más allá de las vallas (posibles atípicos). Comparar la media "
|
||
"con la mediana revela la asimetría: si la media supera a la mediana la "
|
||
"cola larga cae hacia los valores altos (asimetría a la derecha), y al "
|
||
"revés hacia los bajos."),
|
||
},
|
||
"pagina_categorica": {
|
||
"label": "Cómo se organiza cada página categórica",
|
||
"definition": (
|
||
"Cada columna categórica ocupa su propia página: muestra sus métricas "
|
||
"de cardinalidad —incluida la entropía—, una nota que señala "
|
||
"cardinalidad problemática (columnas que se comportan como "
|
||
"identificador, con casi todos los valores distintos, o dominadas por "
|
||
"una sola categoría), la tabla de las categorías más frecuentes (top-k, "
|
||
"con su conteo y porcentaje) y un gráfico de barras de las categorías "
|
||
"más comunes (top-k más una barra «Otros» que agrupa la cola). El total "
|
||
"de filas del dataset se usa como referencia para interpretar los "
|
||
"conteos."),
|
||
},
|
||
}
|
||
|
||
|
||
def _resolve_term(term: dict) -> tuple:
|
||
"""Return (label, definition) for a collected term, completing a missing
|
||
definition (and, if absent, the label) from the canonical baseline catalog."""
|
||
key = model._safe_str(term.get("key"))
|
||
label = model._safe_str(term.get("label"))
|
||
definition = model._safe_str(term.get("definition"))
|
||
base = _BASELINE_TERMS.get(key)
|
||
if base:
|
||
if not definition.strip():
|
||
definition = model._safe_str(base.get("definition"))
|
||
if not label.strip() or label == key:
|
||
label = model._safe_str(base.get("label")) or label
|
||
return label, definition
|
||
|
||
|
||
def build_glosario(profile: dict, ctx: dict):
|
||
"""Build the glossary Chapter from the shared collector, or None if empty."""
|
||
ctx = ctx or {}
|
||
glossary = ctx.get("glossary")
|
||
if not isinstance(glossary, model.GlossaryCollector) or not glossary:
|
||
return None
|
||
|
||
blocks = [
|
||
model.Heading(text="Glosario de términos", level=1),
|
||
model.Markdown(text=(
|
||
"Definición de los términos técnicos que aparecen en el informe. "
|
||
"Cada término va resaltado en el texto y, al pulsarlo, salta a su "
|
||
"definición en esta sección.")),
|
||
]
|
||
# One clickable destination per term, alphabetically by *visible* label. The
|
||
# baseline resolution must happen BEFORE sorting: a term registered bare (no
|
||
# label) carries its key as label in the collector, so ordering by the
|
||
# collector's label would place it by its key instead of by the human label
|
||
# supplied by the baseline catalog. Resolve first, then sort by the final label.
|
||
resolved = []
|
||
for term in glossary.terms(by="order"):
|
||
label, definition = _resolve_term(term)
|
||
resolved.append((label, definition, model._safe_str(term.get("key"))))
|
||
resolved.sort(key=lambda e: model._safe_str(e[0]).lower())
|
||
for label, definition, key in resolved:
|
||
blocks.append(model.GlossaryEntry(
|
||
key=key, label=label, definition=definition))
|
||
|
||
return model.Chapter(id=CHAPTER_ID, title=CHAPTER_TITLE,
|
||
version=CHAPTER_VERSION, blocks=blocks)
|