From 80d10010f5c49fa1be01e3e42710b4d7bad442a8 Mon Sep 17 00:00:00 2001 From: Egutierrez Date: Tue, 30 Jun 2026 22:44:33 +0200 Subject: [PATCH] feat(eda): portada cap01 + zebra global y emphasis de render MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Itera el capítulo PORTADA del AutomaticEDA y dos mejoras globales de los renderers PDF/PPTX: 1. Zebra global (PDF): _place_kv_table ahora sombrea las filas pares igual que las DataTable, así toda tabla del documento queda rayada (no solo las DataTable). Mismo patrón coherente al partir/repetir cabecera. 2. Portada usa la descripción LLM rica (profile['llm']['summary']) cuando el perfil la tiene; se elimina del fallback derivado el texto ruido "active la interpretación LLM (run_llm)…". No fuerza llamadas LLM en el capítulo, solo consume profile['llm'] si está. 3. Se quita el bloque "Criterios de calidad" de la portada (PDF y PPTX); el score "Calidad" se mantiene. 4. "Resumen del análisis" (PDF): los valores se alinean al margen derecho via el nuevo KVTable.value_align="right". 5. Nombre del dataset en la portada PPTX más grande (44pt) y subrayado via los nuevos hints Heading.underline / Heading.size_pt (el PDF los ignora). Bump CHAPTER_VERSION de portada 1.2.0 -> 1.3.0. Verificado: suite 213 passed / 1 skipped (incl. aceptación de los 16 capítulos); golden zebra = 185 filas zebra en 13 capítulos del PDF completo; portada con run_llm sin "Criterios de calidad", con descripción LLM rica y valores a la derecha; PPTX con nombre 44pt subrayado; edge sin LLM cae al fallback derivado sin ruido; fn index sin error. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../automatic_eda/chapters/portada.py | 25 +++++++--------- .../datascience/automatic_eda/model.py | 30 ++++++++++++++++--- .../automatic_eda/render_pdf_impl.py | 30 ++++++++++++++++--- .../automatic_eda/render_pptx_impl.py | 18 ++++++++--- 4 files changed, 77 insertions(+), 26 deletions(-) diff --git a/python/functions/datascience/automatic_eda/chapters/portada.py b/python/functions/datascience/automatic_eda/chapters/portada.py index 409322f7..b1a28366 100644 --- a/python/functions/datascience/automatic_eda/chapters/portada.py +++ b/python/functions/datascience/automatic_eda/chapters/portada.py @@ -26,7 +26,7 @@ from datetime import datetime, timezone from .. import model -CHAPTER_VERSION = "1.2.0" +CHAPTER_VERSION = "1.3.0" CHAPTER_ID = "portada" CHAPTER_TITLE = "Portada" @@ -35,12 +35,9 @@ CHAPTER_TITLE = "Portada" # row represents) from it when the LLM layer ran (``run_llm``). _LLM_KEY = "llm" -# Default human description of what the table quality score measures. Chapters -# can override it via ctx["quality_criteria"]. -_DEFAULT_QUALITY_CRITERIA = ( - "media de los scores por columna (0–100): completitud (sin nulos/vacíos), " - "validez (tipo y rango coherentes) y consistencia (sin duplicados/constantes)." -) +# Font size (pt) for the dataset name on the PPTX cover slide — notably larger +# than the default H1 so the dataset name stands out (shown underlined too). +_PPTX_TITLE_PT = 44.0 def _storage_from_source(source: str) -> str: @@ -120,7 +117,8 @@ def _summary_blocks(summary) -> list: blocks = [model.Heading(text="Resumen del análisis", level=2)] if rows: - blocks.append(model.KVTable(rows=rows)) + # Values pinned to the right margin (numbers flush right, label left). + blocks.append(model.KVTable(rows=rows, value_align="right")) if titles: bullets = "\n".join(f"- {model._safe_str(t)}" for t in titles) blocks.append(model.Markdown( @@ -213,9 +211,7 @@ def _derive_description(profile: dict, ctx: dict) -> str: score = profile.get("quality_score") if score is not None: parts.append(f"Calidad media estimada: {score}/100.") - parts.append( - "Resumen derivado del perfil; active la interpretación LLM (`run_llm`) " - "para una descripción de negocio más rica.") + parts.append("Resumen derivado del perfil.") return " ".join(parts) @@ -259,7 +255,6 @@ def build_portada(profile: dict, ctx: dict): shape = f"{_fmt_int(n_rows)} filas × {_fmt_int(n_cols)} columnas" score = profile.get("quality_score") - quality_criteria = ctx.get("quality_criteria") or _DEFAULT_QUALITY_CRITERIA quality_value = "—" if score is None else f"{score} / 100" llm = _llm_block(profile, ctx) @@ -282,8 +277,11 @@ def build_portada(profile: dict, ctx: dict): # Title + dataset size shown together and BIG (Heading) at the top, kept on # the same page (Group). The size is no longer buried in the metadata table. + # The dataset name is shown big and underlined on the PPTX cover slide + # (size_pt/underline are honoured by the PPTX renderer; the PDF ignores them). cover = [ - model.Heading(text=str(dataset_name), level=1), + model.Heading(text=str(dataset_name), level=1, underline=True, + size_pt=_PPTX_TITLE_PT), model.Markdown(text="**Automatic-EDA** · informe exploratorio automático"), model.Heading(text=shape, level=2), ] @@ -295,7 +293,6 @@ def build_portada(profile: dict, ctx: dict): ("Almacenamiento", storage), ("Generado", when), ("Calidad", quality_value), - ("Criterios de calidad", quality_criteria), ]), model.Heading(text="Descripción", level=2), model.Markdown(text=str(description)), diff --git a/python/functions/datascience/automatic_eda/model.py b/python/functions/datascience/automatic_eda/model.py index 7237df0b..9171652e 100644 --- a/python/functions/datascience/automatic_eda/model.py +++ b/python/functions/datascience/automatic_eda/model.py @@ -38,10 +38,18 @@ ENGINE_NAME = "AutomaticEDA" # --------------------------------------------------------------------------- # @dataclass class Heading: - """A section heading. ``level`` 1 (largest) .. 3 (smallest).""" + """A section heading. ``level`` 1 (largest) .. 3 (smallest). + + ``underline`` and ``size_pt`` are optional emphasis hints honoured by the + PPTX renderer (the cover uses them to show the dataset name big and + underlined). ``size_pt`` overrides the per-level font size when set; the PDF + renderer ignores both so its layout is unchanged. + """ text: str = "" level: int = 1 + underline: bool = False + size_pt: Optional[float] = None kind: str = field(default="heading", init=False) @@ -62,10 +70,17 @@ class Markdown: @dataclass class KVTable: - """A two-column key/value table. ``rows`` is a list of ``(label, value)``.""" + """A two-column key/value table. ``rows`` is a list of ``(label, value)``. + + ``value_align`` controls the horizontal alignment of the value column in the + PDF renderer: ``"left"`` (default) keeps values next to the label column; + ``"right"`` pins them to the right margin (used by the cover's analysis + summary so the numbers line up flush right). + """ rows: list = field(default_factory=list) title: Optional[str] = None + value_align: str = "left" kind: str = field(default="kv_table", init=False) @@ -210,13 +225,20 @@ def as_block(obj: Any): # Build only with fields the dataclass accepts (ignore extras). try: if cls is Heading: + size_pt = obj.get("size_pt") return Heading(text=_safe_str(obj.get("text")), - level=int(obj.get("level", 1) or 1)) + level=int(obj.get("level", 1) or 1), + underline=bool(obj.get("underline", False)), + size_pt=(float(size_pt) + if isinstance(size_pt, (int, float)) + else None)) if cls is Markdown: return Markdown(text=_safe_str(obj.get("text"))) if cls is KVTable: return KVTable(rows=list(obj.get("rows") or []), - title=obj.get("title")) + title=obj.get("title"), + value_align=_safe_str( + obj.get("value_align")) or "left") if cls is DataTable: return DataTable(header=list(obj.get("header") or []), rows=list(obj.get("rows") or []), diff --git a/python/functions/datascience/automatic_eda/render_pdf_impl.py b/python/functions/datascience/automatic_eda/render_pdf_impl.py index 06adea4b..30115de0 100644 --- a/python/functions/datascience/automatic_eda/render_pdf_impl.py +++ b/python/functions/datascience/automatic_eda/render_pdf_impl.py @@ -317,10 +317,18 @@ def _place_kv_table(st: _PdfState, block) -> None: if title: _place_heading(st, model.Heading(title, level=2)) rows = getattr(block, "rows", []) or [] + # ``value_align="right"`` pins the value column to the right margin (label + # left, number flush right) — used by the cover's analysis summary. + right = str(getattr(block, "value_align", "left")).lower() == "right" key_w = 1.9 # inches reserved for the label column. + # Right-aligned values wrap against the full usable width minus the label + # column; left-aligned values wrap against the value column only. val_chars = tl.chars_per_line(_USABLE_W - key_w - 0.1, _FS_BODY) lh = tl.line_height_in(_FS_BODY) - for row in rows: + # ``data_idx`` is the 0-based logical row index: even rows (1-based) are + # zebra-shaded → 0-based odd indices, matching the data-table convention so + # every table in the document carries the same striping. + for data_idx, row in enumerate(rows): try: label, value = row[0], row[1] except Exception: # noqa: BLE001 @@ -329,11 +337,25 @@ def _place_kv_table(st: _PdfState, block) -> None: row_h = lh * len(v_lines) + _ROW_VPAD _ensure_space(st, row_h) y0 = st.y + # Faint zebra fill for even rows, drawn first (zorder 0) so striping + # never hides the text/value drawn on top. + if data_idx % 2 == 1: + st.fig.add_artist(Rectangle( + (_xf(_ML), _yf(y0 + row_h)), _xf(_ML + _USABLE_W) - _xf(_ML), + _yf(y0) - _yf(y0 + row_h), transform=st.fig.transFigure, + color=_ZEBRA, lw=0, zorder=0)) st.fig.text(_xf(_ML), _yf(y0), tl.strip_inline_md(model._safe_str(label)), - fontsize=_FS_BODY, color=_MUTED, ha="left", va="top") + fontsize=_FS_BODY, color=_MUTED, ha="left", va="top", + zorder=2) for k, vl in enumerate(v_lines): - st.fig.text(_xf(_ML + key_w), _yf(y0 + k * lh), vl, - fontsize=_FS_BODY, color=_INK, ha="left", va="top") + if right: + st.fig.text(_xf(_ML + _USABLE_W), _yf(y0 + k * lh), vl, + fontsize=_FS_BODY, color=_INK, ha="right", + va="top", zorder=2) + else: + st.fig.text(_xf(_ML + key_w), _yf(y0 + k * lh), vl, + fontsize=_FS_BODY, color=_INK, ha="left", + va="top", zorder=2) st.y = y0 + row_h st.y += _GAP diff --git a/python/functions/datascience/automatic_eda/render_pptx_impl.py b/python/functions/datascience/automatic_eda/render_pptx_impl.py index 7a813945..21b9e0ce 100644 --- a/python/functions/datascience/automatic_eda/render_pptx_impl.py +++ b/python/functions/datascience/automatic_eda/render_pptx_impl.py @@ -135,7 +135,7 @@ def _ensure(st: _PptxState, height: float) -> None: def _add_text(st: _PptxState, lines: list, fs: float, color, bold=False, - italic=False, indent=0.0, bullet=False) -> None: + italic=False, indent=0.0, bullet=False, underline=False) -> None: lh = tl.line_height_in(fs) height = lh * len(lines) + 0.05 _ensure(st, height) @@ -153,6 +153,7 @@ def _add_text(st: _PptxState, lines: list, fs: float, color, bold=False, run.font.size = Pt(fs) run.font.bold = bold run.font.italic = italic + run.font.underline = underline run.font.color.rgb = _rgb(color) st.y += height @@ -206,10 +207,16 @@ def _add_rich_text(st: _PptxState, rich_lines: list, fs: float, color, def _place_heading(st: _PptxState, block) -> None: level = max(1, min(3, int(getattr(block, "level", 1) or 1))) fs = {1: _FS_H1, 2: _FS_H2, 3: _FS_H3}[level] + # Optional per-heading emphasis (cover dataset name): a larger font and an + # underline. ``size_pt`` overrides the per-level size when set. + size_override = getattr(block, "size_pt", None) + if isinstance(size_override, (int, float)) and size_override > 0: + fs = float(size_override) + underline = bool(getattr(block, "underline", False)) text = tl.strip_inline_md(getattr(block, "text", "")) st.last_heading = text or st.last_heading lines = tl.wrap(text, tl.chars_per_line(_USABLE_W, fs)) - _add_text(st, lines, fs, _INK, bold=True) + _add_text(st, lines, fs, _INK, bold=True, underline=underline) st.y += 0.04 @@ -552,9 +559,11 @@ def _place_note(st: _PptxState, block) -> None: # WITHOUT drawing it so a Group can move whole to the next slide before drawing. # Over-estimating only triggers an earlier slide break, never a content cut. # --------------------------------------------------------------------------- # -def _measure_heading_text(text: str, level: int) -> float: +def _measure_heading_text(text: str, level: int, size_pt=None) -> float: level = max(1, min(3, int(level or 1))) fs = {1: _FS_H1, 2: _FS_H2, 3: _FS_H3}[level] + if isinstance(size_pt, (int, float)) and size_pt > 0: + fs = float(size_pt) lines = tl.wrap(tl.strip_inline_md(text), tl.chars_per_line(_USABLE_W, fs)) return tl.line_height_in(fs) * len(lines) + 0.05 + 0.04 @@ -679,7 +688,8 @@ def _measure_block(st: _PptxState, block) -> float: try: if kind == "heading": return _measure_heading_text(getattr(block, "text", ""), - getattr(block, "level", 1)) + getattr(block, "level", 1), + size_pt=getattr(block, "size_pt", None)) if kind == "markdown": return _measure_markdown(block) if kind in ("figure", "image"):