feat(datascience): auto-commit con 7 cambios
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -17,7 +17,7 @@ from __future__ import annotations
|
||||
|
||||
from .. import model
|
||||
|
||||
CHAPTER_VERSION = "1.1.0"
|
||||
CHAPTER_VERSION = "1.1.1"
|
||||
CHAPTER_ID = "glosario"
|
||||
CHAPTER_TITLE = "Glosario"
|
||||
|
||||
@@ -89,14 +89,19 @@ def build_glosario(profile: dict, ctx: dict):
|
||||
"Cada término va resaltado en el texto y, al pulsarlo, salta a su "
|
||||
"definición en esta sección.")),
|
||||
]
|
||||
# One clickable destination per term, alphabetically by visible label. A term
|
||||
# registered without a definition is completed from the canonical baseline.
|
||||
for term in glossary.terms(by="label"):
|
||||
# One clickable destination per term, alphabetically by *visible* label. The
|
||||
# baseline resolution must happen BEFORE sorting: a term registered bare (no
|
||||
# label) carries its key as label in the collector, so ordering by the
|
||||
# collector's label would place it by its key instead of by the human label
|
||||
# supplied by the baseline catalog. Resolve first, then sort by the final label.
|
||||
resolved = []
|
||||
for term in glossary.terms(by="order"):
|
||||
label, definition = _resolve_term(term)
|
||||
resolved.append((label, definition, model._safe_str(term.get("key"))))
|
||||
resolved.sort(key=lambda e: model._safe_str(e[0]).lower())
|
||||
for label, definition, key in resolved:
|
||||
blocks.append(model.GlossaryEntry(
|
||||
key=model._safe_str(term.get("key")),
|
||||
label=label,
|
||||
definition=definition))
|
||||
key=key, label=label, definition=definition))
|
||||
|
||||
return model.Chapter(id=CHAPTER_ID, title=CHAPTER_TITLE,
|
||||
version=CHAPTER_VERSION, blocks=blocks)
|
||||
|
||||
@@ -0,0 +1,181 @@
|
||||
"""Tests for the GLOSARIO chapter — DoD: golden + edges + degradation + no-cut render.
|
||||
|
||||
The glossary is the last chapter of every AutomaticEDA document. It does not read
|
||||
the profile: it turns the terms that the other chapters registered on the shared
|
||||
``GlossaryCollector`` (``ctx['glossary']``) into one clickable ``GlossaryEntry``
|
||||
destination each, alphabetically by visible label.
|
||||
|
||||
Covered here:
|
||||
|
||||
- **Golden**: a collector with three terms (one carrying its own definition, two
|
||||
registered bare and completed from the canonical baseline catalog) builds a
|
||||
``Chapter`` with three ``GlossaryEntry`` blocks, alphabetically ordered, and
|
||||
renders to PDF and PPTX with nothing cut.
|
||||
- **Baseline resolution** (``_resolve_term``): a bare term whose key is in the
|
||||
baseline gets its label *and* definition filled in; a term that already carries
|
||||
its own definition is never overwritten.
|
||||
- **Edges**: ``None`` / ``{}`` ctx, an empty collector and a non-collector value in
|
||||
``ctx['glossary']`` all return ``None`` (the chapter simply disappears) and never
|
||||
raise, even with a ``None`` profile.
|
||||
- **Click target**: every emitted entry carries the registered ``key`` so each
|
||||
in-text ``[[term:key]]`` appearance resolves to a real jump.
|
||||
"""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
from pptx import Presentation
|
||||
from pypdf import PdfReader
|
||||
|
||||
from datascience.automatic_eda.chapters.glosario import (
|
||||
_BASELINE_TERMS,
|
||||
_resolve_term,
|
||||
build_glosario,
|
||||
)
|
||||
from datascience.automatic_eda.model import (
|
||||
Chapter,
|
||||
GlossaryCollector,
|
||||
GlossaryEntry,
|
||||
)
|
||||
from datascience.render_automatic_eda_pdf import render_automatic_eda_pdf
|
||||
from datascience.render_automatic_eda_pptx import render_automatic_eda_pptx
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Helpers.
|
||||
# --------------------------------------------------------------------------- #
|
||||
def _entries(chapter: Chapter) -> list:
|
||||
"""The GlossaryEntry blocks of a built chapter, in document order."""
|
||||
return [b for b in chapter.blocks if isinstance(b, GlossaryEntry)]
|
||||
|
||||
|
||||
def _render_both(chapter: Chapter, tag: str):
|
||||
"""Render the chapter to PDF and PPTX; return (pdf_text, n_slides)."""
|
||||
tmp = tempfile.mkdtemp(prefix=f"glosario_{tag}_")
|
||||
pdf_path = os.path.join(tmp, "out.pdf")
|
||||
pptx_path = os.path.join(tmp, "out.pptx")
|
||||
meta = {"title": f"EDA — {tag}"}
|
||||
render_automatic_eda_pdf([chapter], pdf_path, meta)
|
||||
render_automatic_eda_pptx([chapter], pptx_path, meta)
|
||||
assert os.path.exists(pdf_path) and os.path.getsize(pdf_path) > 0
|
||||
assert os.path.exists(pptx_path) and os.path.getsize(pptx_path) > 0
|
||||
text = "".join(p.extract_text() or "" for p in PdfReader(pdf_path).pages)
|
||||
n_slides = len(Presentation(pptx_path).slides)
|
||||
return text, n_slides
|
||||
|
||||
|
||||
def _collector_three_terms() -> GlossaryCollector:
|
||||
"""A collector with three terms registered out of alphabetical order:
|
||||
|
||||
- ``entropia``: its own label + definition (must not be baseline-overwritten).
|
||||
- ``pagina_categorica``: bare, completed from the baseline.
|
||||
- ``histograma_boxplot``: bare, completed from the baseline.
|
||||
"""
|
||||
g = GlossaryCollector()
|
||||
g.add("entropia", "Entropía",
|
||||
"Medida de la incertidumbre o dispersión de una variable categórica.")
|
||||
g.add("pagina_categorica") # bare -> baseline label + definition
|
||||
g.add("histograma_boxplot") # bare -> baseline label + definition
|
||||
return g
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Golden.
|
||||
# --------------------------------------------------------------------------- #
|
||||
def test_golden_terms_render_clickable_entries():
|
||||
g = _collector_three_terms()
|
||||
chapter = build_glosario({"table": "x"}, {"glossary": g})
|
||||
|
||||
assert isinstance(chapter, Chapter)
|
||||
assert chapter.id == "glosario"
|
||||
assert chapter.title == "Glosario"
|
||||
assert chapter.version == "1.1.1"
|
||||
|
||||
entries = _entries(chapter)
|
||||
assert len(entries) == 3
|
||||
assert all(isinstance(e, GlossaryEntry) for e in entries)
|
||||
|
||||
# Alphabetical by visible label: "Cómo leer…" < "Cómo se organiza…" < "Entropía".
|
||||
labels = [e.label for e in entries]
|
||||
assert labels == sorted(labels, key=str.lower)
|
||||
assert labels[0] == "Cómo leer el histograma y el boxplot"
|
||||
assert labels[-1] == "Entropía"
|
||||
|
||||
# Bare terms were completed from the baseline; the own-definition term survived.
|
||||
by_key = {e.key: e for e in entries}
|
||||
assert "boxplot de Tukey" in by_key["histograma_boxplot"].definition
|
||||
assert "identificador" in by_key["pagina_categorica"].definition
|
||||
assert by_key["entropia"].definition.startswith("Medida de la incertidumbre")
|
||||
|
||||
# Renders with nothing cut; the labels and a definition fragment reach the PDF.
|
||||
pdf_text, n_slides = _render_both(chapter, "golden")
|
||||
assert "Entropía" in pdf_text
|
||||
assert n_slides >= 1
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Baseline resolution (_resolve_term).
|
||||
# --------------------------------------------------------------------------- #
|
||||
def test_resolve_term_completes_label_and_definition_from_baseline():
|
||||
# A bare registration keeps label == key and an empty definition; the resolver
|
||||
# fills both from the canonical catalog.
|
||||
key = "histograma_boxplot"
|
||||
label, definition = _resolve_term({"key": key, "label": key, "definition": ""})
|
||||
assert label == _BASELINE_TERMS[key]["label"]
|
||||
assert "boxplot de Tukey" in definition
|
||||
|
||||
|
||||
def test_resolve_term_keeps_own_definition_over_baseline():
|
||||
# Even when the key is in the baseline, a term that already carries its own
|
||||
# definition (and a real label) must not be overwritten.
|
||||
key = "pagina_categorica"
|
||||
own_def = "Definición propia que no debe pisarse."
|
||||
label, definition = _resolve_term(
|
||||
{"key": key, "label": "Mi etiqueta", "definition": own_def})
|
||||
assert label == "Mi etiqueta"
|
||||
assert definition == own_def
|
||||
|
||||
|
||||
def test_resolve_term_unknown_key_returns_as_is():
|
||||
label, definition = _resolve_term(
|
||||
{"key": "sin_baseline", "label": "Término libre", "definition": "Texto."})
|
||||
assert label == "Término libre"
|
||||
assert definition == "Texto."
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Edges / degradation — the chapter disappears instead of raising.
|
||||
# --------------------------------------------------------------------------- #
|
||||
def test_none_when_no_glossary():
|
||||
assert build_glosario({"table": "x"}, {}) is None
|
||||
assert build_glosario({"table": "x"}, None) is None
|
||||
|
||||
|
||||
def test_none_when_empty_collector():
|
||||
assert build_glosario({"table": "x"}, {"glossary": GlossaryCollector()}) is None
|
||||
|
||||
|
||||
def test_none_when_glossary_is_not_a_collector():
|
||||
# A stray value in ctx['glossary'] must not be treated as a collector.
|
||||
assert build_glosario({"table": "x"}, {"glossary": ["not", "a", "collector"]}) is None
|
||||
assert build_glosario({"table": "x"}, {"glossary": {"entropia": "x"}}) is None
|
||||
|
||||
|
||||
def test_none_profile_does_not_raise():
|
||||
# The glossary ignores the profile; a None profile with a valid collector still
|
||||
# builds, and a None profile with no glossary still returns None (no crash).
|
||||
g = GlossaryCollector()
|
||||
g.add("entropia", "Entropía", "def")
|
||||
chapter = build_glosario(None, {"glossary": g})
|
||||
assert isinstance(chapter, Chapter)
|
||||
assert build_glosario(None, None) is None
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Click target — each entry carries its registration key.
|
||||
# --------------------------------------------------------------------------- #
|
||||
def test_entries_carry_registered_key_as_click_target():
|
||||
g = _collector_three_terms()
|
||||
chapter = build_glosario({}, {"glossary": g})
|
||||
keys = {e.key for e in _entries(chapter)}
|
||||
assert keys == {"entropia", "pagina_categorica", "histograma_boxplot"}
|
||||
Reference in New Issue
Block a user