feat(eda): portada cap01 + zebra global y emphasis de render
Itera el capítulo PORTADA del AutomaticEDA y dos mejoras globales de los renderers PDF/PPTX: 1. Zebra global (PDF): _place_kv_table ahora sombrea las filas pares igual que las DataTable, así toda tabla del documento queda rayada (no solo las DataTable). Mismo patrón coherente al partir/repetir cabecera. 2. Portada usa la descripción LLM rica (profile['llm']['summary']) cuando el perfil la tiene; se elimina del fallback derivado el texto ruido "active la interpretación LLM (run_llm)…". No fuerza llamadas LLM en el capítulo, solo consume profile['llm'] si está. 3. Se quita el bloque "Criterios de calidad" de la portada (PDF y PPTX); el score "Calidad" se mantiene. 4. "Resumen del análisis" (PDF): los valores se alinean al margen derecho via el nuevo KVTable.value_align="right". 5. Nombre del dataset en la portada PPTX más grande (44pt) y subrayado via los nuevos hints Heading.underline / Heading.size_pt (el PDF los ignora). Bump CHAPTER_VERSION de portada 1.2.0 -> 1.3.0. Verificado: suite 213 passed / 1 skipped (incl. aceptación de los 16 capítulos); golden zebra = 185 filas zebra en 13 capítulos del PDF completo; portada con run_llm sin "Criterios de calidad", con descripción LLM rica y valores a la derecha; PPTX con nombre 44pt subrayado; edge sin LLM cae al fallback derivado sin ruido; fn index sin error. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -26,7 +26,7 @@ from datetime import datetime, timezone
|
|||||||
|
|
||||||
from .. import model
|
from .. import model
|
||||||
|
|
||||||
CHAPTER_VERSION = "1.2.0"
|
CHAPTER_VERSION = "1.3.0"
|
||||||
CHAPTER_ID = "portada"
|
CHAPTER_ID = "portada"
|
||||||
CHAPTER_TITLE = "Portada"
|
CHAPTER_TITLE = "Portada"
|
||||||
|
|
||||||
@@ -35,12 +35,9 @@ CHAPTER_TITLE = "Portada"
|
|||||||
# row represents) from it when the LLM layer ran (``run_llm``).
|
# row represents) from it when the LLM layer ran (``run_llm``).
|
||||||
_LLM_KEY = "llm"
|
_LLM_KEY = "llm"
|
||||||
|
|
||||||
# Default human description of what the table quality score measures. Chapters
|
# Font size (pt) for the dataset name on the PPTX cover slide — notably larger
|
||||||
# can override it via ctx["quality_criteria"].
|
# than the default H1 so the dataset name stands out (shown underlined too).
|
||||||
_DEFAULT_QUALITY_CRITERIA = (
|
_PPTX_TITLE_PT = 44.0
|
||||||
"media de los scores por columna (0–100): completitud (sin nulos/vacíos), "
|
|
||||||
"validez (tipo y rango coherentes) y consistencia (sin duplicados/constantes)."
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _storage_from_source(source: str) -> str:
|
def _storage_from_source(source: str) -> str:
|
||||||
@@ -120,7 +117,8 @@ def _summary_blocks(summary) -> list:
|
|||||||
|
|
||||||
blocks = [model.Heading(text="Resumen del análisis", level=2)]
|
blocks = [model.Heading(text="Resumen del análisis", level=2)]
|
||||||
if rows:
|
if rows:
|
||||||
blocks.append(model.KVTable(rows=rows))
|
# Values pinned to the right margin (numbers flush right, label left).
|
||||||
|
blocks.append(model.KVTable(rows=rows, value_align="right"))
|
||||||
if titles:
|
if titles:
|
||||||
bullets = "\n".join(f"- {model._safe_str(t)}" for t in titles)
|
bullets = "\n".join(f"- {model._safe_str(t)}" for t in titles)
|
||||||
blocks.append(model.Markdown(
|
blocks.append(model.Markdown(
|
||||||
@@ -213,9 +211,7 @@ def _derive_description(profile: dict, ctx: dict) -> str:
|
|||||||
score = profile.get("quality_score")
|
score = profile.get("quality_score")
|
||||||
if score is not None:
|
if score is not None:
|
||||||
parts.append(f"Calidad media estimada: {score}/100.")
|
parts.append(f"Calidad media estimada: {score}/100.")
|
||||||
parts.append(
|
parts.append("Resumen derivado del perfil.")
|
||||||
"Resumen derivado del perfil; active la interpretación LLM (`run_llm`) "
|
|
||||||
"para una descripción de negocio más rica.")
|
|
||||||
return " ".join(parts)
|
return " ".join(parts)
|
||||||
|
|
||||||
|
|
||||||
@@ -259,7 +255,6 @@ def build_portada(profile: dict, ctx: dict):
|
|||||||
shape = f"{_fmt_int(n_rows)} filas × {_fmt_int(n_cols)} columnas"
|
shape = f"{_fmt_int(n_rows)} filas × {_fmt_int(n_cols)} columnas"
|
||||||
|
|
||||||
score = profile.get("quality_score")
|
score = profile.get("quality_score")
|
||||||
quality_criteria = ctx.get("quality_criteria") or _DEFAULT_QUALITY_CRITERIA
|
|
||||||
quality_value = "—" if score is None else f"{score} / 100"
|
quality_value = "—" if score is None else f"{score} / 100"
|
||||||
|
|
||||||
llm = _llm_block(profile, ctx)
|
llm = _llm_block(profile, ctx)
|
||||||
@@ -282,8 +277,11 @@ def build_portada(profile: dict, ctx: dict):
|
|||||||
|
|
||||||
# Title + dataset size shown together and BIG (Heading) at the top, kept on
|
# Title + dataset size shown together and BIG (Heading) at the top, kept on
|
||||||
# the same page (Group). The size is no longer buried in the metadata table.
|
# the same page (Group). The size is no longer buried in the metadata table.
|
||||||
|
# The dataset name is shown big and underlined on the PPTX cover slide
|
||||||
|
# (size_pt/underline are honoured by the PPTX renderer; the PDF ignores them).
|
||||||
cover = [
|
cover = [
|
||||||
model.Heading(text=str(dataset_name), level=1),
|
model.Heading(text=str(dataset_name), level=1, underline=True,
|
||||||
|
size_pt=_PPTX_TITLE_PT),
|
||||||
model.Markdown(text="**Automatic-EDA** · informe exploratorio automático"),
|
model.Markdown(text="**Automatic-EDA** · informe exploratorio automático"),
|
||||||
model.Heading(text=shape, level=2),
|
model.Heading(text=shape, level=2),
|
||||||
]
|
]
|
||||||
@@ -295,7 +293,6 @@ def build_portada(profile: dict, ctx: dict):
|
|||||||
("Almacenamiento", storage),
|
("Almacenamiento", storage),
|
||||||
("Generado", when),
|
("Generado", when),
|
||||||
("Calidad", quality_value),
|
("Calidad", quality_value),
|
||||||
("Criterios de calidad", quality_criteria),
|
|
||||||
]),
|
]),
|
||||||
model.Heading(text="Descripción", level=2),
|
model.Heading(text="Descripción", level=2),
|
||||||
model.Markdown(text=str(description)),
|
model.Markdown(text=str(description)),
|
||||||
|
|||||||
@@ -38,10 +38,18 @@ ENGINE_NAME = "AutomaticEDA"
|
|||||||
# --------------------------------------------------------------------------- #
|
# --------------------------------------------------------------------------- #
|
||||||
@dataclass
|
@dataclass
|
||||||
class Heading:
|
class Heading:
|
||||||
"""A section heading. ``level`` 1 (largest) .. 3 (smallest)."""
|
"""A section heading. ``level`` 1 (largest) .. 3 (smallest).
|
||||||
|
|
||||||
|
``underline`` and ``size_pt`` are optional emphasis hints honoured by the
|
||||||
|
PPTX renderer (the cover uses them to show the dataset name big and
|
||||||
|
underlined). ``size_pt`` overrides the per-level font size when set; the PDF
|
||||||
|
renderer ignores both so its layout is unchanged.
|
||||||
|
"""
|
||||||
|
|
||||||
text: str = ""
|
text: str = ""
|
||||||
level: int = 1
|
level: int = 1
|
||||||
|
underline: bool = False
|
||||||
|
size_pt: Optional[float] = None
|
||||||
kind: str = field(default="heading", init=False)
|
kind: str = field(default="heading", init=False)
|
||||||
|
|
||||||
|
|
||||||
@@ -62,10 +70,17 @@ class Markdown:
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class KVTable:
|
class KVTable:
|
||||||
"""A two-column key/value table. ``rows`` is a list of ``(label, value)``."""
|
"""A two-column key/value table. ``rows`` is a list of ``(label, value)``.
|
||||||
|
|
||||||
|
``value_align`` controls the horizontal alignment of the value column in the
|
||||||
|
PDF renderer: ``"left"`` (default) keeps values next to the label column;
|
||||||
|
``"right"`` pins them to the right margin (used by the cover's analysis
|
||||||
|
summary so the numbers line up flush right).
|
||||||
|
"""
|
||||||
|
|
||||||
rows: list = field(default_factory=list)
|
rows: list = field(default_factory=list)
|
||||||
title: Optional[str] = None
|
title: Optional[str] = None
|
||||||
|
value_align: str = "left"
|
||||||
kind: str = field(default="kv_table", init=False)
|
kind: str = field(default="kv_table", init=False)
|
||||||
|
|
||||||
|
|
||||||
@@ -210,13 +225,20 @@ def as_block(obj: Any):
|
|||||||
# Build only with fields the dataclass accepts (ignore extras).
|
# Build only with fields the dataclass accepts (ignore extras).
|
||||||
try:
|
try:
|
||||||
if cls is Heading:
|
if cls is Heading:
|
||||||
|
size_pt = obj.get("size_pt")
|
||||||
return Heading(text=_safe_str(obj.get("text")),
|
return Heading(text=_safe_str(obj.get("text")),
|
||||||
level=int(obj.get("level", 1) or 1))
|
level=int(obj.get("level", 1) or 1),
|
||||||
|
underline=bool(obj.get("underline", False)),
|
||||||
|
size_pt=(float(size_pt)
|
||||||
|
if isinstance(size_pt, (int, float))
|
||||||
|
else None))
|
||||||
if cls is Markdown:
|
if cls is Markdown:
|
||||||
return Markdown(text=_safe_str(obj.get("text")))
|
return Markdown(text=_safe_str(obj.get("text")))
|
||||||
if cls is KVTable:
|
if cls is KVTable:
|
||||||
return KVTable(rows=list(obj.get("rows") or []),
|
return KVTable(rows=list(obj.get("rows") or []),
|
||||||
title=obj.get("title"))
|
title=obj.get("title"),
|
||||||
|
value_align=_safe_str(
|
||||||
|
obj.get("value_align")) or "left")
|
||||||
if cls is DataTable:
|
if cls is DataTable:
|
||||||
return DataTable(header=list(obj.get("header") or []),
|
return DataTable(header=list(obj.get("header") or []),
|
||||||
rows=list(obj.get("rows") or []),
|
rows=list(obj.get("rows") or []),
|
||||||
|
|||||||
@@ -317,10 +317,18 @@ def _place_kv_table(st: _PdfState, block) -> None:
|
|||||||
if title:
|
if title:
|
||||||
_place_heading(st, model.Heading(title, level=2))
|
_place_heading(st, model.Heading(title, level=2))
|
||||||
rows = getattr(block, "rows", []) or []
|
rows = getattr(block, "rows", []) or []
|
||||||
|
# ``value_align="right"`` pins the value column to the right margin (label
|
||||||
|
# left, number flush right) — used by the cover's analysis summary.
|
||||||
|
right = str(getattr(block, "value_align", "left")).lower() == "right"
|
||||||
key_w = 1.9 # inches reserved for the label column.
|
key_w = 1.9 # inches reserved for the label column.
|
||||||
|
# Right-aligned values wrap against the full usable width minus the label
|
||||||
|
# column; left-aligned values wrap against the value column only.
|
||||||
val_chars = tl.chars_per_line(_USABLE_W - key_w - 0.1, _FS_BODY)
|
val_chars = tl.chars_per_line(_USABLE_W - key_w - 0.1, _FS_BODY)
|
||||||
lh = tl.line_height_in(_FS_BODY)
|
lh = tl.line_height_in(_FS_BODY)
|
||||||
for row in rows:
|
# ``data_idx`` is the 0-based logical row index: even rows (1-based) are
|
||||||
|
# zebra-shaded → 0-based odd indices, matching the data-table convention so
|
||||||
|
# every table in the document carries the same striping.
|
||||||
|
for data_idx, row in enumerate(rows):
|
||||||
try:
|
try:
|
||||||
label, value = row[0], row[1]
|
label, value = row[0], row[1]
|
||||||
except Exception: # noqa: BLE001
|
except Exception: # noqa: BLE001
|
||||||
@@ -329,11 +337,25 @@ def _place_kv_table(st: _PdfState, block) -> None:
|
|||||||
row_h = lh * len(v_lines) + _ROW_VPAD
|
row_h = lh * len(v_lines) + _ROW_VPAD
|
||||||
_ensure_space(st, row_h)
|
_ensure_space(st, row_h)
|
||||||
y0 = st.y
|
y0 = st.y
|
||||||
|
# Faint zebra fill for even rows, drawn first (zorder 0) so striping
|
||||||
|
# never hides the text/value drawn on top.
|
||||||
|
if data_idx % 2 == 1:
|
||||||
|
st.fig.add_artist(Rectangle(
|
||||||
|
(_xf(_ML), _yf(y0 + row_h)), _xf(_ML + _USABLE_W) - _xf(_ML),
|
||||||
|
_yf(y0) - _yf(y0 + row_h), transform=st.fig.transFigure,
|
||||||
|
color=_ZEBRA, lw=0, zorder=0))
|
||||||
st.fig.text(_xf(_ML), _yf(y0), tl.strip_inline_md(model._safe_str(label)),
|
st.fig.text(_xf(_ML), _yf(y0), tl.strip_inline_md(model._safe_str(label)),
|
||||||
fontsize=_FS_BODY, color=_MUTED, ha="left", va="top")
|
fontsize=_FS_BODY, color=_MUTED, ha="left", va="top",
|
||||||
|
zorder=2)
|
||||||
for k, vl in enumerate(v_lines):
|
for k, vl in enumerate(v_lines):
|
||||||
|
if right:
|
||||||
|
st.fig.text(_xf(_ML + _USABLE_W), _yf(y0 + k * lh), vl,
|
||||||
|
fontsize=_FS_BODY, color=_INK, ha="right",
|
||||||
|
va="top", zorder=2)
|
||||||
|
else:
|
||||||
st.fig.text(_xf(_ML + key_w), _yf(y0 + k * lh), vl,
|
st.fig.text(_xf(_ML + key_w), _yf(y0 + k * lh), vl,
|
||||||
fontsize=_FS_BODY, color=_INK, ha="left", va="top")
|
fontsize=_FS_BODY, color=_INK, ha="left",
|
||||||
|
va="top", zorder=2)
|
||||||
st.y = y0 + row_h
|
st.y = y0 + row_h
|
||||||
st.y += _GAP
|
st.y += _GAP
|
||||||
|
|
||||||
|
|||||||
@@ -135,7 +135,7 @@ def _ensure(st: _PptxState, height: float) -> None:
|
|||||||
|
|
||||||
|
|
||||||
def _add_text(st: _PptxState, lines: list, fs: float, color, bold=False,
|
def _add_text(st: _PptxState, lines: list, fs: float, color, bold=False,
|
||||||
italic=False, indent=0.0, bullet=False) -> None:
|
italic=False, indent=0.0, bullet=False, underline=False) -> None:
|
||||||
lh = tl.line_height_in(fs)
|
lh = tl.line_height_in(fs)
|
||||||
height = lh * len(lines) + 0.05
|
height = lh * len(lines) + 0.05
|
||||||
_ensure(st, height)
|
_ensure(st, height)
|
||||||
@@ -153,6 +153,7 @@ def _add_text(st: _PptxState, lines: list, fs: float, color, bold=False,
|
|||||||
run.font.size = Pt(fs)
|
run.font.size = Pt(fs)
|
||||||
run.font.bold = bold
|
run.font.bold = bold
|
||||||
run.font.italic = italic
|
run.font.italic = italic
|
||||||
|
run.font.underline = underline
|
||||||
run.font.color.rgb = _rgb(color)
|
run.font.color.rgb = _rgb(color)
|
||||||
st.y += height
|
st.y += height
|
||||||
|
|
||||||
@@ -206,10 +207,16 @@ def _add_rich_text(st: _PptxState, rich_lines: list, fs: float, color,
|
|||||||
def _place_heading(st: _PptxState, block) -> None:
|
def _place_heading(st: _PptxState, block) -> None:
|
||||||
level = max(1, min(3, int(getattr(block, "level", 1) or 1)))
|
level = max(1, min(3, int(getattr(block, "level", 1) or 1)))
|
||||||
fs = {1: _FS_H1, 2: _FS_H2, 3: _FS_H3}[level]
|
fs = {1: _FS_H1, 2: _FS_H2, 3: _FS_H3}[level]
|
||||||
|
# Optional per-heading emphasis (cover dataset name): a larger font and an
|
||||||
|
# underline. ``size_pt`` overrides the per-level size when set.
|
||||||
|
size_override = getattr(block, "size_pt", None)
|
||||||
|
if isinstance(size_override, (int, float)) and size_override > 0:
|
||||||
|
fs = float(size_override)
|
||||||
|
underline = bool(getattr(block, "underline", False))
|
||||||
text = tl.strip_inline_md(getattr(block, "text", ""))
|
text = tl.strip_inline_md(getattr(block, "text", ""))
|
||||||
st.last_heading = text or st.last_heading
|
st.last_heading = text or st.last_heading
|
||||||
lines = tl.wrap(text, tl.chars_per_line(_USABLE_W, fs))
|
lines = tl.wrap(text, tl.chars_per_line(_USABLE_W, fs))
|
||||||
_add_text(st, lines, fs, _INK, bold=True)
|
_add_text(st, lines, fs, _INK, bold=True, underline=underline)
|
||||||
st.y += 0.04
|
st.y += 0.04
|
||||||
|
|
||||||
|
|
||||||
@@ -552,9 +559,11 @@ def _place_note(st: _PptxState, block) -> None:
|
|||||||
# WITHOUT drawing it so a Group can move whole to the next slide before drawing.
|
# WITHOUT drawing it so a Group can move whole to the next slide before drawing.
|
||||||
# Over-estimating only triggers an earlier slide break, never a content cut.
|
# Over-estimating only triggers an earlier slide break, never a content cut.
|
||||||
# --------------------------------------------------------------------------- #
|
# --------------------------------------------------------------------------- #
|
||||||
def _measure_heading_text(text: str, level: int) -> float:
|
def _measure_heading_text(text: str, level: int, size_pt=None) -> float:
|
||||||
level = max(1, min(3, int(level or 1)))
|
level = max(1, min(3, int(level or 1)))
|
||||||
fs = {1: _FS_H1, 2: _FS_H2, 3: _FS_H3}[level]
|
fs = {1: _FS_H1, 2: _FS_H2, 3: _FS_H3}[level]
|
||||||
|
if isinstance(size_pt, (int, float)) and size_pt > 0:
|
||||||
|
fs = float(size_pt)
|
||||||
lines = tl.wrap(tl.strip_inline_md(text), tl.chars_per_line(_USABLE_W, fs))
|
lines = tl.wrap(tl.strip_inline_md(text), tl.chars_per_line(_USABLE_W, fs))
|
||||||
return tl.line_height_in(fs) * len(lines) + 0.05 + 0.04
|
return tl.line_height_in(fs) * len(lines) + 0.05 + 0.04
|
||||||
|
|
||||||
@@ -679,7 +688,8 @@ def _measure_block(st: _PptxState, block) -> float:
|
|||||||
try:
|
try:
|
||||||
if kind == "heading":
|
if kind == "heading":
|
||||||
return _measure_heading_text(getattr(block, "text", ""),
|
return _measure_heading_text(getattr(block, "text", ""),
|
||||||
getattr(block, "level", 1))
|
getattr(block, "level", 1),
|
||||||
|
size_pt=getattr(block, "size_pt", None))
|
||||||
if kind == "markdown":
|
if kind == "markdown":
|
||||||
return _measure_markdown(block)
|
return _measure_markdown(block)
|
||||||
if kind in ("figure", "image"):
|
if kind in ("figure", "image"):
|
||||||
|
|||||||
Reference in New Issue
Block a user