fix(eda): keep-together de grafico+titulo+descripcion en 5 capitulos
modelos, timeseries, geospatial, agregacion y missingness (bloque de ranking) emitian Heading+Figure sueltos, de modo que el paginador podia dejar el titulo y la descripcion de una columna/par en una pagina y su grafico en la siguiente. Se envuelve cada unidad (Heading + descripcion/tablas + Figure) en un model.Group, la unidad keep-together que ambos renderers (PDF/PPTX) miden entera y mueven en bloque cuando no cabe, siguiendo el patron ya usado por num_distr y correlacion. Orden y contenido de bloques identicos: solo se envuelven. La degradacion honesta se conserva (una figura None nunca queda dentro de un Group vacio). Los tests que asertaban figuras sueltas se ajustaron para comprobar la Figure DENTRO del Group, sin relajar ningun assert. Bump CHAPTER_VERSION PATCH (1.0.0->1.0.1) en los 5 capitulos. El heatmap de co-ocurrencia de missingness ya iba agrupado y no se toca.
This commit is contained in:
@@ -73,7 +73,10 @@ try:
|
|||||||
except Exception: # noqa: BLE001
|
except Exception: # noqa: BLE001
|
||||||
suggest_aggregations_llm = None # type: ignore[assignment]
|
suggest_aggregations_llm = None # type: ignore[assignment]
|
||||||
|
|
||||||
CHAPTER_VERSION = "1.0.0"
|
# 1.0.1 — keep-together: cada gráfico (barras por grupo, barras del pivot) se
|
||||||
|
# envuelve con su Heading + Markdown + tabla resumen en un model.Group para que el
|
||||||
|
# paginador no separe el gráfico de su título/descripción. Cada unidad, su grupo.
|
||||||
|
CHAPTER_VERSION = "1.0.1"
|
||||||
CHAPTER_ID = "agregacion"
|
CHAPTER_ID = "agregacion"
|
||||||
CHAPTER_TITLE = "Agregación por grupos"
|
CHAPTER_TITLE = "Agregación por grupos"
|
||||||
|
|
||||||
@@ -395,11 +398,11 @@ def _groupby_section(group_by: str, measures: list, result: dict, why: str) -> l
|
|||||||
return []
|
return []
|
||||||
eff_measures = result.get("measures") or measures or []
|
eff_measures = result.get("measures") or measures or []
|
||||||
|
|
||||||
blocks = [model.Heading(text=f"Agrupado por «{group_by}»", level=2)]
|
head = model.Heading(text=f"Agrupado por «{group_by}»", level=2)
|
||||||
intro = f"**{why}.** " if why else ""
|
intro = f"**{why}.** " if why else ""
|
||||||
intro += (f"{_fmt_num(result.get('n_groups') or len(groups))} grupos"
|
intro += (f"{_fmt_num(result.get('n_groups') or len(groups))} grupos"
|
||||||
f"{' (top por tamaño)' if result.get('truncated') else ''}.")
|
f"{' (top por tamaño)' if result.get('truncated') else ''}.")
|
||||||
blocks.append(model.Markdown(text=intro))
|
intro_md = model.Markdown(text=intro)
|
||||||
|
|
||||||
# Summary table: one row per group, count + mean of every measure.
|
# Summary table: one row per group, count + mean of every measure.
|
||||||
header = ["Grupo", "n"] + [f"{m} (media)" for m in eff_measures]
|
header = ["Grupo", "n"] + [f"{m} (media)" for m in eff_measures]
|
||||||
@@ -409,20 +412,16 @@ def _groupby_section(group_by: str, measures: list, result: dict, why: str) -> l
|
|||||||
for m in eff_measures:
|
for m in eff_measures:
|
||||||
row.append(_fmt_num(_measure_mean(g, m), 2))
|
row.append(_fmt_num(_measure_mean(g, m), 2))
|
||||||
rows.append(row)
|
rows.append(row)
|
||||||
blocks.append(model.DataTable(
|
summary_tbl = model.DataTable(
|
||||||
header=header, rows=rows, title=f"Resumen por «{group_by}»",
|
header=header, rows=rows, title=f"Resumen por «{group_by}»",
|
||||||
note="Conteo de filas y media de cada medida por grupo."))
|
note="Conteo de filas y media de cada medida por grupo.")
|
||||||
|
|
||||||
if not eff_measures:
|
if not eff_measures:
|
||||||
return blocks
|
return [head, intro_md, summary_tbl]
|
||||||
|
|
||||||
# Primary measure: a bar chart + a detail table (mean/median/std/min/max).
|
# Primary measure: a bar chart + a detail table (mean/median/std/min/max).
|
||||||
primary = eff_measures[0]
|
primary = eff_measures[0]
|
||||||
bars = _make_group_bars(group_by, primary, groups)
|
bars = _make_group_bars(group_by, primary, groups)
|
||||||
if bars is not None:
|
|
||||||
blocks.append(model.Figure(
|
|
||||||
make=_group_bars_maker(group_by, primary, groups),
|
|
||||||
caption=f"Media de «{primary}» por «{group_by}» (barras desde cero)."))
|
|
||||||
|
|
||||||
det_header = ["Grupo", "n", "media", "mediana", "σ", "mín", "máx"]
|
det_header = ["Grupo", "n", "media", "mediana", "σ", "mín", "máx"]
|
||||||
det_rows = []
|
det_rows = []
|
||||||
@@ -435,10 +434,20 @@ def _groupby_section(group_by: str, measures: list, result: dict, why: str) -> l
|
|||||||
_fmt_num(ms.get("std"), 2), _fmt_num(ms.get("min"), 2),
|
_fmt_num(ms.get("std"), 2), _fmt_num(ms.get("min"), 2),
|
||||||
_fmt_num(ms.get("max"), 2),
|
_fmt_num(ms.get("max"), 2),
|
||||||
])
|
])
|
||||||
blocks.append(model.DataTable(
|
detail_tbl = model.DataTable(
|
||||||
header=det_header, rows=det_rows,
|
header=det_header, rows=det_rows,
|
||||||
title=f"Detalle de «{primary}» por «{group_by}»"))
|
title=f"Detalle de «{primary}» por «{group_by}»")
|
||||||
return blocks
|
|
||||||
|
if bars is not None:
|
||||||
|
# Keep-together: heading + intro + summary table + the bar chart ride on
|
||||||
|
# the same page/slide (the renderers move the whole Group when it does not
|
||||||
|
# fit), so the chart never gets stranded from its title. The per-measure
|
||||||
|
# detail table (split-safe) flows after the group.
|
||||||
|
fig = model.Figure(
|
||||||
|
make=_group_bars_maker(group_by, primary, groups),
|
||||||
|
caption=f"Media de «{primary}» por «{group_by}» (barras desde cero).")
|
||||||
|
return [model.Group(blocks=[head, intro_md, summary_tbl, fig]), detail_tbl]
|
||||||
|
return [head, intro_md, summary_tbl, detail_tbl]
|
||||||
|
|
||||||
|
|
||||||
def _pivot_section(pivot_spec: dict, result: dict) -> list:
|
def _pivot_section(pivot_spec: dict, result: dict) -> list:
|
||||||
@@ -457,13 +466,13 @@ def _pivot_section(pivot_spec: dict, result: dict) -> list:
|
|||||||
agg = result.get("agg") or pivot_spec.get("agg") or "mean"
|
agg = result.get("agg") or pivot_spec.get("agg") or "mean"
|
||||||
why = pivot_spec.get("why") or ""
|
why = pivot_spec.get("why") or ""
|
||||||
|
|
||||||
blocks = [model.Heading(text=f"Pivot: «{index}» × «{columns}»", level=2)]
|
head = model.Heading(text=f"Pivot: «{index}» × «{columns}»", level=2)
|
||||||
intro = f"**{why}.** " if why else ""
|
intro = f"**{why}.** " if why else ""
|
||||||
intro += (f"{agg} de «{value}» cruzando «{index}» (filas) y «{columns}» "
|
intro += (f"{agg} de «{value}» cruzando «{index}» (filas) y «{columns}» "
|
||||||
f"(columnas).")
|
f"(columnas).")
|
||||||
if result.get("truncated_rows") or result.get("truncated_cols"):
|
if result.get("truncated_rows") or result.get("truncated_cols"):
|
||||||
intro += " Limitado a las filas/columnas más frecuentes."
|
intro += " Limitado a las filas/columnas más frecuentes."
|
||||||
blocks.append(model.Markdown(text=intro))
|
intro_md = model.Markdown(text=intro)
|
||||||
|
|
||||||
header = [model._safe_str(index)] + [model._safe_str(c) for c in col_labels]
|
header = [model._safe_str(index)] + [model._safe_str(c) for c in col_labels]
|
||||||
rows = []
|
rows = []
|
||||||
@@ -474,20 +483,23 @@ def _pivot_section(pivot_spec: dict, result: dict) -> list:
|
|||||||
cell = cells[j] if j < len(cells) else None
|
cell = cells[j] if j < len(cells) else None
|
||||||
row.append(_fmt_num(cell, 2))
|
row.append(_fmt_num(cell, 2))
|
||||||
rows.append(row)
|
rows.append(row)
|
||||||
blocks.append(model.DataTable(
|
matrix_tbl = model.DataTable(
|
||||||
header=header, rows=rows,
|
header=header, rows=rows,
|
||||||
title=f"{agg} de «{value}»",
|
title=f"{agg} de «{value}»",
|
||||||
note=f"Cada celda es {agg} de «{value}» para esa combinación."))
|
note=f"Cada celda es {agg} de «{value}» para esa combinación.")
|
||||||
|
|
||||||
fig_pivot = {"row_labels": row_labels, "col_labels": col_labels,
|
fig_pivot = {"row_labels": row_labels, "col_labels": col_labels,
|
||||||
"matrix": matrix, "index": index, "columns": columns,
|
"matrix": matrix, "index": index, "columns": columns,
|
||||||
"value": value, "agg": agg}
|
"value": value, "agg": agg}
|
||||||
if _make_pivot_bars(fig_pivot) is not None:
|
if _make_pivot_bars(fig_pivot) is not None:
|
||||||
blocks.append(model.Figure(
|
# Keep-together: heading + intro + pivot table + the grouped-bar chart on
|
||||||
|
# one page/slide, so the chart is never stranded from its title/table.
|
||||||
|
fig = model.Figure(
|
||||||
make=_pivot_bars_maker(fig_pivot),
|
make=_pivot_bars_maker(fig_pivot),
|
||||||
caption=f"{agg} de «{value}» por «{index}» y «{columns}» "
|
caption=f"{agg} de «{value}» por «{index}» y «{columns}» "
|
||||||
f"(barras agrupadas)."))
|
f"(barras agrupadas).")
|
||||||
return blocks
|
return [model.Group(blocks=[head, intro_md, matrix_tbl, fig])]
|
||||||
|
return [head, intro_md, matrix_tbl]
|
||||||
|
|
||||||
|
|
||||||
def _insights_section(ctx: dict) -> list:
|
def _insights_section(ctx: dict) -> list:
|
||||||
|
|||||||
@@ -114,6 +114,19 @@ def _pdf_text(path: str) -> str:
|
|||||||
return re.sub(r"\s+", " ", txt)
|
return re.sub(r"\s+", " ", txt)
|
||||||
|
|
||||||
|
|
||||||
|
def _flat(chapter):
|
||||||
|
"""All blocks, descending into per-unit keep-together Groups (mejora
|
||||||
|
keep-together): each groupby/pivot section now wraps its heading + intro +
|
||||||
|
summary table + bar chart in a model.Group, so assertions look inside it."""
|
||||||
|
out = []
|
||||||
|
for b in chapter.blocks:
|
||||||
|
if getattr(b, "kind", None) == "group":
|
||||||
|
out.extend(getattr(b, "blocks", []))
|
||||||
|
else:
|
||||||
|
out.append(b)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
def _pptx_text(path: str) -> str:
|
def _pptx_text(path: str) -> str:
|
||||||
prs = Presentation(path)
|
prs = Presentation(path)
|
||||||
parts = []
|
parts = []
|
||||||
@@ -136,12 +149,13 @@ def test_golden_chapter_blocks_present():
|
|||||||
ch = build_agregacion(_profile(), _ctx_precomputed())
|
ch = build_agregacion(_profile(), _ctx_precomputed())
|
||||||
assert isinstance(ch, Chapter)
|
assert isinstance(ch, Chapter)
|
||||||
assert ch.id == "agregacion"
|
assert ch.id == "agregacion"
|
||||||
kinds = [b.kind for b in ch.blocks]
|
flat = _flat(ch)
|
||||||
|
kinds = [b.kind for b in flat]
|
||||||
assert "heading" in kinds
|
assert "heading" in kinds
|
||||||
assert kinds.count("data_table") >= 3 # 2 group summaries + pivot (+details)
|
assert kinds.count("data_table") >= 3 # 2 group summaries + pivot (+details)
|
||||||
assert "figure" in kinds # at least one bar chart.
|
assert "figure" in kinds # at least one bar chart.
|
||||||
# Headings mention the group keys and the pivot.
|
# Headings mention the group keys and the pivot.
|
||||||
htext = " ".join(b.text for b in ch.blocks if b.kind == "heading")
|
htext = " ".join(b.text for b in flat if b.kind == "heading")
|
||||||
assert "sex" in htext and "pclass" in htext and "Pivot" in htext
|
assert "sex" in htext and "pclass" in htext and "Pivot" in htext
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -61,7 +61,9 @@ try:
|
|||||||
except Exception: # noqa: BLE001
|
except Exception: # noqa: BLE001
|
||||||
build_geo_scatter = None # type: ignore[assignment]
|
build_geo_scatter = None # type: ignore[assignment]
|
||||||
|
|
||||||
CHAPTER_VERSION = "1.0.0"
|
# 1.0.1 — keep-together: el mapa (scatter geográfico) se envuelve con su Heading e
|
||||||
|
# intro en un model.Group para que el paginador no lo separe de su título/descripción.
|
||||||
|
CHAPTER_VERSION = "1.0.1"
|
||||||
CHAPTER_ID = "geospatial"
|
CHAPTER_ID = "geospatial"
|
||||||
CHAPTER_TITLE = "Análisis geoespacial"
|
CHAPTER_TITLE = "Análisis geoespacial"
|
||||||
|
|
||||||
@@ -455,11 +457,14 @@ def build_geospatial(profile: dict, ctx: dict):
|
|||||||
scatter = {}
|
scatter = {}
|
||||||
maker = _make_geo_scatter(scatter, lat_col, lon_col) if scatter else None
|
maker = _make_geo_scatter(scatter, lat_col, lon_col) if scatter else None
|
||||||
if maker is not None:
|
if maker is not None:
|
||||||
blocks.append(model.Figure(
|
# Keep-together: the chapter heading + intro + the map figure ride on
|
||||||
|
# the same page/slide (the renderers move the whole Group when it does
|
||||||
|
# not fit), so the map never gets stranded from its title/description.
|
||||||
|
blocks = [model.Group(blocks=blocks + [model.Figure(
|
||||||
make=maker,
|
make=maker,
|
||||||
caption="Cada punto es una observación situada por sus "
|
caption="Cada punto es una observación situada por sus "
|
||||||
"coordenadas; el recuadro rojo es el bounding box. La "
|
"coordenadas; el recuadro rojo es el bounding box. La "
|
||||||
"escala respeta la latitud (proyección equirectangular)."))
|
"escala respeta la latitud (proyección equirectangular).")])]
|
||||||
else:
|
else:
|
||||||
blocks.append(model.Note(
|
blocks.append(model.Note(
|
||||||
"No se pudo construir el scatter geográfico a partir de las "
|
"No se pudo construir el scatter geográfico a partir de las "
|
||||||
|
|||||||
@@ -64,16 +64,28 @@ def _ctx_points(lats, lons):
|
|||||||
return {"geo_points": {"lats": lats, "lons": lons}}
|
return {"geo_points": {"lats": lats, "lons": lons}}
|
||||||
|
|
||||||
|
|
||||||
|
def _all_blocks(chapter):
|
||||||
|
"""Flatten blocks, descending into the keep-together Group that now wraps the
|
||||||
|
map heading + intro + scatter figure (mejora keep-together)."""
|
||||||
|
out = []
|
||||||
|
for b in chapter.blocks:
|
||||||
|
if getattr(b, "kind", None) == "group":
|
||||||
|
out.extend(getattr(b, "blocks", []))
|
||||||
|
else:
|
||||||
|
out.append(b)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
def _kinds(chapter):
|
def _kinds(chapter):
|
||||||
return [getattr(b, "kind", None) for b in chapter.blocks]
|
return [getattr(b, "kind", None) for b in _all_blocks(chapter)]
|
||||||
|
|
||||||
|
|
||||||
def _tables(chapter):
|
def _tables(chapter):
|
||||||
return [b for b in chapter.blocks if getattr(b, "kind", None) == "data_table"]
|
return [b for b in _all_blocks(chapter) if getattr(b, "kind", None) == "data_table"]
|
||||||
|
|
||||||
|
|
||||||
def _figures(chapter):
|
def _figures(chapter):
|
||||||
return [b for b in chapter.blocks if getattr(b, "kind", None) == "figure"]
|
return [b for b in _all_blocks(chapter) if getattr(b, "kind", None) == "figure"]
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------- #
|
# --------------------------------------------------------------------------- #
|
||||||
@@ -98,7 +110,7 @@ def test_golden_detecta_columnas_y_nombra_ejes():
|
|||||||
lats, lons = _grid(40.4, -3.7, 30, spread=0.8)
|
lats, lons = _grid(40.4, -3.7, 30, spread=0.8)
|
||||||
prof = _profile_with_coords("latitude", "longitude", lats, lons)
|
prof = _profile_with_coords("latitude", "longitude", lats, lons)
|
||||||
ch = build_geospatial(prof, _ctx_points(lats, lons))
|
ch = build_geospatial(prof, _ctx_points(lats, lons))
|
||||||
intro = [b for b in ch.blocks if b.kind == "markdown"][0].text
|
intro = [b for b in _all_blocks(ch) if b.kind == "markdown"][0].text
|
||||||
assert "latitude" in intro and "longitude" in intro
|
assert "latitude" in intro and "longitude" in intro
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -45,7 +45,10 @@ from __future__ import annotations
|
|||||||
|
|
||||||
from .. import model
|
from .. import model
|
||||||
|
|
||||||
CHAPTER_VERSION = "1.0.0"
|
# 1.0.1 — keep-together: el ranking "Faltantes por columna" (su Heading + tabla +
|
||||||
|
# figura) se envuelve en un model.Group para que el paginador no separe la figura
|
||||||
|
# de su título/tabla (el heatmap de co-ocurrencia ya iba agrupado).
|
||||||
|
CHAPTER_VERSION = "1.0.1"
|
||||||
CHAPTER_ID = "missingness"
|
CHAPTER_ID = "missingness"
|
||||||
CHAPTER_TITLE = "Datos faltantes"
|
CHAPTER_TITLE = "Datos faltantes"
|
||||||
|
|
||||||
@@ -547,14 +550,22 @@ def build_missingness(profile: dict, ctx: dict):
|
|||||||
model.Heading(text="Cuánto y dónde faltan datos", level=2),
|
model.Heading(text="Cuánto y dónde faltan datos", level=2),
|
||||||
_intro_block(mark, source),
|
_intro_block(mark, source),
|
||||||
_summary_block(profile, with_nulls, overview, sampled, n_total),
|
_summary_block(profile, with_nulls, overview, sampled, n_total),
|
||||||
model.Heading(text="Faltantes por columna", level=2),
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Ranking "Faltantes por columna": keep the heading, its table and the bar
|
||||||
|
# figure together on the same page/slide (keep-together) so the paginator never
|
||||||
|
# strands the figure from its title/table. When there is no figure to draw, the
|
||||||
|
# unit degrades honestly and stays flat (never a Group around a missing figure).
|
||||||
|
rank_unit = [model.Heading(text="Faltantes por columna", level=2)]
|
||||||
ranking = _ranking_block(with_nulls)
|
ranking = _ranking_block(with_nulls)
|
||||||
if ranking is not None:
|
if ranking is not None:
|
||||||
blocks.append(ranking)
|
rank_unit.append(ranking)
|
||||||
rank_fig = _ranking_figure(with_nulls)
|
rank_fig = _ranking_figure(with_nulls)
|
||||||
if rank_fig is not None:
|
if rank_fig is not None:
|
||||||
blocks.append(rank_fig)
|
rank_unit.append(rank_fig)
|
||||||
|
blocks.append(model.Group(blocks=rank_unit))
|
||||||
|
else:
|
||||||
|
blocks.extend(rank_unit)
|
||||||
|
|
||||||
# Co-occurrence + row patterns need the per-row mask. Without it, say so.
|
# Co-occurrence + row patterns need the per-row mask. Without it, say so.
|
||||||
if not mask:
|
if not mask:
|
||||||
|
|||||||
@@ -45,7 +45,10 @@ from __future__ import annotations
|
|||||||
|
|
||||||
from .. import model
|
from .. import model
|
||||||
|
|
||||||
CHAPTER_VERSION = "1.0.0"
|
# 1.0.1 — keep-together: cada gráfico (scree PCA, scatter KMeans) se envuelve con
|
||||||
|
# su Heading + su Markdown introductorio en un model.Group para que el paginador
|
||||||
|
# no separe el gráfico de su título/descripción.
|
||||||
|
CHAPTER_VERSION = "1.0.1"
|
||||||
CHAPTER_ID = "modelos"
|
CHAPTER_ID = "modelos"
|
||||||
CHAPTER_TITLE = "Modelos"
|
CHAPTER_TITLE = "Modelos"
|
||||||
|
|
||||||
@@ -326,7 +329,6 @@ def _pca_section(pca: dict, gloss=None, mark_term: bool = False) -> list:
|
|||||||
if not _is_dict(pca) or not pca.get("explained_variance_ratio"):
|
if not _is_dict(pca) or not pca.get("explained_variance_ratio"):
|
||||||
return []
|
return []
|
||||||
_register(gloss, "pca")
|
_register(gloss, "pca")
|
||||||
blocks = [model.Heading(text="PCA — varianza explicada", level=2)]
|
|
||||||
|
|
||||||
n_used = pca.get("n_rows_used")
|
n_used = pca.get("n_rows_used")
|
||||||
n_feat = pca.get("n_features")
|
n_feat = pca.get("n_features")
|
||||||
@@ -337,12 +339,20 @@ def _pca_section(pca: dict, gloss=None, mark_term: bool = False) -> list:
|
|||||||
"muestra cuánta varianza aporta cada componente y su acumulado: un "
|
"muestra cuánta varianza aporta cada componente y su acumulado: un "
|
||||||
"codo marca cuántos componentes bastan."
|
"codo marca cuántos componentes bastan."
|
||||||
)
|
)
|
||||||
blocks.append(model.Markdown(text=intro))
|
|
||||||
|
|
||||||
|
# Keep-together: the heading, its intro and the scree figure ride together on
|
||||||
|
# the same page/slide (the renderers measure the whole Group and move it whole
|
||||||
|
# if it does not fit), so the scree never gets stranded from its title. The
|
||||||
|
# variance/loadings tables (split-safe) flow after the group.
|
||||||
|
unit = [model.Heading(text="PCA — varianza explicada", level=2),
|
||||||
|
model.Markdown(text=intro)]
|
||||||
scree = _make_scree(pca)
|
scree = _make_scree(pca)
|
||||||
if scree is not None:
|
if scree is not None:
|
||||||
blocks.append(model.Figure(
|
unit.append(model.Figure(
|
||||||
make=scree, caption="Varianza explicada y acumulada por componente."))
|
make=scree, caption="Varianza explicada y acumulada por componente."))
|
||||||
|
blocks = [model.Group(blocks=unit)]
|
||||||
|
else:
|
||||||
|
blocks = list(unit)
|
||||||
|
|
||||||
evr = pca.get("explained_variance_ratio") or []
|
evr = pca.get("explained_variance_ratio") or []
|
||||||
cum = pca.get("cumulative") or []
|
cum = pca.get("cumulative") or []
|
||||||
@@ -390,8 +400,6 @@ def _kmeans_section(kmeans: dict, projection: dict, titles,
|
|||||||
_register(gloss, "kmeans")
|
_register(gloss, "kmeans")
|
||||||
_register(gloss, "silhouette")
|
_register(gloss, "silhouette")
|
||||||
|
|
||||||
blocks = [model.Heading(text="Segmentación (KMeans)", level=2)]
|
|
||||||
|
|
||||||
best_k = (projection or {}).get("best_k") or (kmeans or {}).get("best_k")
|
best_k = (projection or {}).get("best_k") or (kmeans or {}).get("best_k")
|
||||||
sil = (projection or {}).get("silhouette")
|
sil = (projection or {}).get("silhouette")
|
||||||
if sil is None:
|
if sil is None:
|
||||||
@@ -404,26 +412,31 @@ def _kmeans_section(kmeans: dict, projection: dict, titles,
|
|||||||
f"(**{_fmt_num(sil)}**). Los segmentos se proyectan sobre el plano de "
|
f"(**{_fmt_num(sil)}**). Los segmentos se proyectan sobre el plano de "
|
||||||
"los dos primeros componentes principales para visualizarlos."
|
"los dos primeros componentes principales para visualizarlos."
|
||||||
)
|
)
|
||||||
blocks.append(model.Markdown(text=intro))
|
head = model.Heading(text="Segmentación (KMeans)", level=2)
|
||||||
|
intro_md = model.Markdown(text=intro)
|
||||||
|
|
||||||
if has_proj:
|
scatter = _make_cluster_scatter(projection) if has_proj else None
|
||||||
scatter = _make_cluster_scatter(projection)
|
if scatter is not None:
|
||||||
if scatter is not None:
|
# Keep-together: heading + intro + the cluster scatter on one page/slide.
|
||||||
blocks.append(model.Figure(
|
blocks = [model.Group(blocks=[
|
||||||
|
head, intro_md,
|
||||||
|
model.Figure(
|
||||||
make=scatter,
|
make=scatter,
|
||||||
caption="Cada punto es una fila coloreada por su segmento "
|
caption="Cada punto es una fila coloreada por su segmento "
|
||||||
"KMeans; las «X» son los centroides."))
|
"KMeans; las «X» son los centroides.")])]
|
||||||
else:
|
elif has_proj:
|
||||||
blocks.append(model.Note(
|
# Points present but not drawable: honest note, kept flat (never a Group
|
||||||
"Proyección de clusters no dibujable (puntos y etiquetas "
|
# wrapping a missing figure).
|
||||||
"desalineados)."))
|
blocks = [head, intro_md, model.Note(
|
||||||
|
"Proyección de clusters no dibujable (puntos y etiquetas "
|
||||||
|
"desalineados).")]
|
||||||
else:
|
else:
|
||||||
# We have kmeans stats but no aligned points+labels to colour by.
|
# We have kmeans stats but no aligned points+labels to colour by.
|
||||||
blocks.append(model.Note(
|
blocks = [head, intro_md, model.Note(
|
||||||
"Scatter coloreado por segmento no disponible: el perfil no incluye "
|
"Scatter coloreado por segmento no disponible: el perfil no incluye "
|
||||||
"la proyección con etiquetas alineadas (pásala en "
|
"la proyección con etiquetas alineadas (pásala en "
|
||||||
"ctx['cluster_projection'] o las columnas crudas en "
|
"ctx['cluster_projection'] o las columnas crudas en "
|
||||||
"ctx['raw_numeric'] para colorear el plano PCA)."))
|
"ctx['raw_numeric'] para colorear el plano PCA).")]
|
||||||
|
|
||||||
# Cluster sizes table.
|
# Cluster sizes table.
|
||||||
sizes = (projection or {}).get("cluster_sizes") or (kmeans or {}).get("cluster_sizes") or []
|
sizes = (projection or {}).get("cluster_sizes") or (kmeans or {}).get("cluster_sizes") or []
|
||||||
|
|||||||
@@ -136,6 +136,19 @@ def _pptx_text(path: str) -> str:
|
|||||||
return re.sub(r"\s+", " ", " ".join(out))
|
return re.sub(r"\s+", " ", " ".join(out))
|
||||||
|
|
||||||
|
|
||||||
|
def _flat(chapter):
|
||||||
|
"""All blocks, descending into keep-together Groups (mejora keep-together):
|
||||||
|
the scree/scatter figures now ride inside a model.Group with their heading and
|
||||||
|
intro, so the assertions look for them inside the group too."""
|
||||||
|
out = []
|
||||||
|
for b in chapter.blocks:
|
||||||
|
if getattr(b, "kind", None) == "group":
|
||||||
|
out.extend(b.blocks)
|
||||||
|
else:
|
||||||
|
out.append(b)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------- #
|
# --------------------------------------------------------------------------- #
|
||||||
# Golden.
|
# Golden.
|
||||||
# --------------------------------------------------------------------------- #
|
# --------------------------------------------------------------------------- #
|
||||||
@@ -143,13 +156,14 @@ def test_golden_build_modelos_bloques_requeridos():
|
|||||||
ch = build_modelos(_profile(), _ctx_full())
|
ch = build_modelos(_profile(), _ctx_full())
|
||||||
assert ch is not None
|
assert ch is not None
|
||||||
assert ch.id == "modelos" and ch.version
|
assert ch.id == "modelos" and ch.version
|
||||||
# Both figures present: scree plot + cluster scatter.
|
flat = _flat(ch)
|
||||||
n_figures = sum(1 for b in ch.blocks if isinstance(b, Figure))
|
# Both figures present: scree plot + cluster scatter (inside their Groups).
|
||||||
|
n_figures = sum(1 for b in flat if isinstance(b, Figure))
|
||||||
assert n_figures >= 2
|
assert n_figures >= 2
|
||||||
# Tables present (variance, loadings, sizes, normality).
|
# Tables present (variance, loadings, sizes, normality).
|
||||||
assert sum(1 for b in ch.blocks if isinstance(b, DataTable)) >= 3
|
assert sum(1 for b in flat if isinstance(b, DataTable)) >= 3
|
||||||
# Markdown carries the required explanations.
|
# Markdown carries the required explanations.
|
||||||
md = " ".join(b.text for b in ch.blocks if isinstance(b, Markdown))
|
md = " ".join(b.text for b in flat if isinstance(b, Markdown))
|
||||||
assert "z-score" in md # normalization explained
|
assert "z-score" in md # normalization explained
|
||||||
assert "Isolation Forest" in md # outlier generation explained
|
assert "Isolation Forest" in md # outlier generation explained
|
||||||
assert "silhouette" in md # kmeans
|
assert "silhouette" in md # kmeans
|
||||||
@@ -272,11 +286,11 @@ def test_glosario_engancha_terminos_modelos():
|
|||||||
assert ch is not None
|
assert ch is not None
|
||||||
keys = {t["key"] for t in g.terms()}
|
keys = {t["key"] for t in g.terms()}
|
||||||
assert {"zscore", "pca", "kmeans", "silhouette", "isolation_forest"} <= keys
|
assert {"zscore", "pca", "kmeans", "silhouette", "isolation_forest"} <= keys
|
||||||
body = " ".join(b.text for b in ch.blocks if b.kind == "markdown")
|
body = " ".join(b.text for b in _flat(ch) if b.kind == "markdown")
|
||||||
for k in ("zscore", "pca", "kmeans", "silhouette", "isolation_forest"):
|
for k in ("zscore", "pca", "kmeans", "silhouette", "isolation_forest"):
|
||||||
assert f"[[term:{k}]]" in body, k
|
assert f"[[term:{k}]]" in body, k
|
||||||
|
|
||||||
# Sin colector: degrada limpio (ningún marcador en el cuerpo).
|
# Sin colector: degrada limpio (ningún marcador en el cuerpo).
|
||||||
ch2 = build_modelos(_profile(), _ctx_full())
|
ch2 = build_modelos(_profile(), _ctx_full())
|
||||||
body2 = " ".join(b.text for b in ch2.blocks if b.kind == "markdown")
|
body2 = " ".join(b.text for b in _flat(ch2) if b.kind == "markdown")
|
||||||
assert "[[term:" not in body2
|
assert "[[term:" not in body2
|
||||||
|
|||||||
@@ -58,7 +58,10 @@ try:
|
|||||||
except Exception: # noqa: BLE001
|
except Exception: # noqa: BLE001
|
||||||
resample_timeseries = None # type: ignore[assignment]
|
resample_timeseries = None # type: ignore[assignment]
|
||||||
|
|
||||||
CHAPTER_VERSION = "1.0.0"
|
# 1.0.1 — keep-together: cada serie (su Heading + figuras de evolución/STL/ACF +
|
||||||
|
# análisis textual) se envuelve en un model.Group para que el paginador no separe
|
||||||
|
# los gráficos de su título/descripción. Una serie = un grupo.
|
||||||
|
CHAPTER_VERSION = "1.0.1"
|
||||||
CHAPTER_ID = "timeseries"
|
CHAPTER_ID = "timeseries"
|
||||||
CHAPTER_TITLE = "Series temporales"
|
CHAPTER_TITLE = "Series temporales"
|
||||||
|
|
||||||
@@ -470,7 +473,12 @@ def _analysis_markdown(sblock: dict) -> str:
|
|||||||
# Per-column section.
|
# Per-column section.
|
||||||
# --------------------------------------------------------------------------- #
|
# --------------------------------------------------------------------------- #
|
||||||
def _column_section(name: str, sblock: dict, raw: dict, collapsed_into) -> list:
|
def _column_section(name: str, sblock: dict, raw: dict, collapsed_into) -> list:
|
||||||
"""Blocks for one numeric column: evolution figure + STL + ACF + analysis."""
|
"""Blocks for one numeric column: evolution figure + STL + ACF + analysis.
|
||||||
|
|
||||||
|
The whole series is wrapped in a single keep-together ``model.Group`` (a series
|
||||||
|
= a group) so the renderers never strand the column heading / its analysis from
|
||||||
|
the figures it introduces. Only real figures are ever appended (a missing
|
||||||
|
figure is simply omitted — never a Group around a None figure)."""
|
||||||
blocks = [model.Heading(text=model._safe_str(name), level=2)]
|
blocks = [model.Heading(text=model._safe_str(name), level=2)]
|
||||||
|
|
||||||
# --- Value-vs-time line + per-period row count (MUST-9.1). ---
|
# --- Value-vs-time line + per-period row count (MUST-9.1). ---
|
||||||
@@ -522,7 +530,8 @@ def _column_section(name: str, sblock: dict, raw: dict, collapsed_into) -> list:
|
|||||||
analysis = _analysis_markdown(sblock)
|
analysis = _analysis_markdown(sblock)
|
||||||
if analysis:
|
if analysis:
|
||||||
blocks.append(model.Markdown(text=analysis))
|
blocks.append(model.Markdown(text=analysis))
|
||||||
return blocks
|
# One series = one keep-together group (heading + figures + analysis).
|
||||||
|
return [model.Group(blocks=blocks)]
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------- #
|
# --------------------------------------------------------------------------- #
|
||||||
|
|||||||
@@ -112,6 +112,19 @@ def _pdf_text(path: str) -> str:
|
|||||||
return re.sub(r"\s+", " ", txt)
|
return re.sub(r"\s+", " ", txt)
|
||||||
|
|
||||||
|
|
||||||
|
def _flat(chapter):
|
||||||
|
"""All blocks, descending into per-series keep-together Groups (mejora
|
||||||
|
keep-together): each series' heading, figures and analysis now live inside a
|
||||||
|
model.Group, so the assertions look for them inside the group too."""
|
||||||
|
out = []
|
||||||
|
for b in chapter.blocks:
|
||||||
|
if getattr(b, "kind", None) == "group":
|
||||||
|
out.extend(b.blocks)
|
||||||
|
else:
|
||||||
|
out.append(b)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------- #
|
# --------------------------------------------------------------------------- #
|
||||||
# Golden.
|
# Golden.
|
||||||
# --------------------------------------------------------------------------- #
|
# --------------------------------------------------------------------------- #
|
||||||
@@ -124,8 +137,9 @@ def test_golden_estructura_y_figuras():
|
|||||||
assert kinds[0] == "heading" # chapter title
|
assert kinds[0] == "heading" # chapter title
|
||||||
assert kinds[1] == "markdown" # intro
|
assert kinds[1] == "markdown" # intro
|
||||||
assert "kv_table" in kinds # datetime profile header (MUST-9.3)
|
assert "kv_table" in kinds # datetime profile header (MUST-9.3)
|
||||||
# Per column: evolution figure + STL figure + ACF figure + analysis markdown.
|
# Per column: evolution figure + STL figure + ACF figure + analysis markdown
|
||||||
figs = [b for b in ch.blocks if b.kind == "figure"]
|
# (now inside the per-series Group).
|
||||||
|
figs = [b for b in _flat(ch) if b.kind == "figure"]
|
||||||
assert len(figs) >= 3, "evolución + STL + ACF esperadas"
|
assert len(figs) >= 3, "evolución + STL + ACF esperadas"
|
||||||
# Lazy makers must produce real matplotlib figures.
|
# Lazy makers must produce real matplotlib figures.
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
@@ -138,7 +152,7 @@ def test_golden_estructura_y_figuras():
|
|||||||
def test_golden_evolucion_tiene_dos_paneles_valor_y_conteo():
|
def test_golden_evolucion_tiene_dos_paneles_valor_y_conteo():
|
||||||
# MUST-9.1: the evolution figure has a value panel + a row-count panel.
|
# MUST-9.1: the evolution figure has a value panel + a row-count panel.
|
||||||
ch = build_timeseries(_profile(("precio",)), _ctx_raw(("precio",)))
|
ch = build_timeseries(_profile(("precio",)), _ctx_raw(("precio",)))
|
||||||
figs = [b for b in ch.blocks if b.kind == "figure"]
|
figs = [b for b in _flat(ch) if b.kind == "figure"]
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
fig = figs[0].make() # first figure is the evolution one.
|
fig = figs[0].make() # first figure is the evolution one.
|
||||||
assert len(fig.axes) == 2, "panel de valor + panel de conteo de filas"
|
assert len(fig.axes) == 2, "panel de valor + panel de conteo de filas"
|
||||||
@@ -147,7 +161,7 @@ def test_golden_evolucion_tiene_dos_paneles_valor_y_conteo():
|
|||||||
|
|
||||||
def test_golden_analisis_textual_presente():
|
def test_golden_analisis_textual_presente():
|
||||||
ch = build_timeseries(_profile(("precio",)), _ctx_raw(("precio",)))
|
ch = build_timeseries(_profile(("precio",)), _ctx_raw(("precio",)))
|
||||||
md = " ".join(b.text for b in ch.blocks if b.kind == "markdown")
|
md = " ".join(b.text for b in _flat(ch) if b.kind == "markdown")
|
||||||
assert "Estacionariedad" in md
|
assert "Estacionariedad" in md
|
||||||
assert "Autocorrelación" in md
|
assert "Autocorrelación" in md
|
||||||
assert "STL" in md
|
assert "STL" in md
|
||||||
@@ -183,9 +197,9 @@ def test_edge_sin_raw_degrada_pero_mantiene_analisis():
|
|||||||
# from the profile) and note that the evolution chart is unavailable.
|
# from the profile) and note that the evolution chart is unavailable.
|
||||||
ch = build_timeseries(_profile(("precio",)), {})
|
ch = build_timeseries(_profile(("precio",)), {})
|
||||||
assert ch is not None
|
assert ch is not None
|
||||||
notes = " ".join(b.text for b in ch.blocks if b.kind == "note")
|
notes = " ".join(b.text for b in _flat(ch) if b.kind == "note")
|
||||||
assert "evolución temporal no disponible" in notes
|
assert "evolución temporal no disponible" in notes
|
||||||
md = " ".join(b.text for b in ch.blocks if b.kind == "markdown")
|
md = " ".join(b.text for b in _flat(ch) if b.kind == "markdown")
|
||||||
assert "Estacionariedad" in md
|
assert "Estacionariedad" in md
|
||||||
|
|
||||||
|
|
||||||
@@ -195,7 +209,7 @@ def test_edge_stl_solo_estadisticos_no_dibuja_panel_pero_no_revienta():
|
|||||||
ch = build_timeseries(_profile(("precio",), with_stl_values=False),
|
ch = build_timeseries(_profile(("precio",), with_stl_values=False),
|
||||||
_ctx_raw(("precio",)))
|
_ctx_raw(("precio",)))
|
||||||
assert ch is not None
|
assert ch is not None
|
||||||
md = " ".join(b.text for b in ch.blocks if b.kind == "markdown")
|
md = " ".join(b.text for b in _flat(ch) if b.kind == "markdown")
|
||||||
assert "STL" in md
|
assert "STL" in md
|
||||||
|
|
||||||
|
|
||||||
@@ -206,15 +220,15 @@ def test_ohlc_consolidacion():
|
|||||||
names = ("Open", "High", "Low", "Close")
|
names = ("Open", "High", "Low", "Close")
|
||||||
ch = build_timeseries(_profile(names), _ctx_raw(names))
|
ch = build_timeseries(_profile(names), _ctx_raw(names))
|
||||||
assert ch is not None
|
assert ch is not None
|
||||||
notes = " ".join(b.text for b in ch.blocks if b.kind == "note")
|
notes = " ".join(b.text for b in _flat(ch) if b.kind == "note")
|
||||||
assert "OHLC" in notes
|
assert "OHLC" in notes
|
||||||
# Only the representative draws the evolution figure; the other 3 are collapsed
|
# Only the representative draws the evolution figure; the other 3 are collapsed
|
||||||
# so there are fewer evolution figures than columns.
|
# so there are fewer evolution figures than columns.
|
||||||
captions = [b.caption or "" for b in ch.blocks if b.kind == "figure"]
|
captions = [b.caption or "" for b in _flat(ch) if b.kind == "figure"]
|
||||||
evo = [c for c in captions if "Evolución" in c]
|
evo = [c for c in captions if "Evolución" in c]
|
||||||
assert len(evo) < len(names), "las series OHLC deben consolidarse"
|
assert len(evo) < len(names), "las series OHLC deben consolidarse"
|
||||||
# Every column still has its analysis markdown (one heading per column).
|
# Every column still has its analysis markdown (one heading per column).
|
||||||
headings = [b.text for b in ch.blocks if b.kind == "heading" and b.level == 2]
|
headings = [b.text for b in _flat(ch) if b.kind == "heading" and b.level == 2]
|
||||||
for nm in names:
|
for nm in names:
|
||||||
assert nm in headings
|
assert nm in headings
|
||||||
|
|
||||||
@@ -227,7 +241,7 @@ def test_anti_corte_pdf_y_pptx():
|
|||||||
prof = _profile(names, n=90)
|
prof = _profile(names, n=90)
|
||||||
ctx = _ctx_raw(names, n=90)
|
ctx = _ctx_raw(names, n=90)
|
||||||
ch = build_timeseries(prof, ctx)
|
ch = build_timeseries(prof, ctx)
|
||||||
col_headings = [b.text for b in ch.blocks if b.kind == "heading" and b.level == 2]
|
col_headings = [b.text for b in _flat(ch) if b.kind == "heading" and b.level == 2]
|
||||||
assert len(col_headings) == 6
|
assert len(col_headings) == 6
|
||||||
with tempfile.TemporaryDirectory() as d:
|
with tempfile.TemporaryDirectory() as d:
|
||||||
pdf = os.path.join(d, "ts.pdf")
|
pdf = os.path.join(d, "ts.pdf")
|
||||||
|
|||||||
Reference in New Issue
Block a user