"""Overview chapter — df.head, column dictionary and describe (reference). Second reference chapter for AutomaticEDA. Renders (across as many pages/slides as needed, the renderers paginate): 1. ``df.head`` — the first rows of the table. The current ``TableProfile`` does NOT carry the raw head, so this is read from ``ctx['head_rows']`` / ``profile['head_rows']`` (a list of row dicts). When absent the chapter shows an honest placeholder documenting the missing key instead of inventing data. 2. Column dictionary — name / type / nulls / non-null examples. Examples come from ``columns[i]['examples']`` when present; otherwise they are derived from real non-null profile values (categorical top values, numeric min/median/max) so the cell is never empty nor fabricated. 3. ``df.describe`` — mean / median / min / max / std for every numeric column. Contract: build_(profile, ctx) -> Chapter | None ; CHAPTER_VERSION = "x.y.z". """ from __future__ import annotations from .. import model CHAPTER_VERSION = "1.0.0" CHAPTER_ID = "overview" CHAPTER_TITLE = "Overview" # Profile/ctx keys the calculation phase must add for a full head + examples. HEAD_KEY = "head_rows" # list[dict] — df.head(n) EXAMPLES_KEY = "examples" # per column: list of non-null sample values def _fmt_num(value, decimals: int = 3) -> str: if value is None: return "—" if isinstance(value, bool): return str(value) if isinstance(value, int): return f"{value:,}".replace(",", ".") if isinstance(value, float): if value != value: # NaN return "NaN" if value in (float("inf"), float("-inf")): return str(value) text = f"{value:.{decimals}f}".rstrip("0").rstrip(".") return text if text else "0" return str(value) def _fmt_pct(value, decimals: int = 1) -> str: if value is None: return "—" try: return f"{float(value) * 100:.{decimals}f}%" except (TypeError, ValueError): return str(value) def _examples_for(col: dict) -> str: """Build a short string of real non-null example values for a column.""" explicit = col.get(EXAMPLES_KEY) if isinstance(explicit, (list, tuple)) and explicit: return ", ".join(model._safe_str(v) for v in explicit[:4]) cat = col.get("categorical") or {} top = cat.get("top") or [] if top: vals = [model._safe_str((t or {}).get("value")) for t in top[:4] if isinstance(t, dict)] vals = [v for v in vals if v] if vals: return ", ".join(vals) num = col.get("numeric") or {} if num: bits = [] for key in ("min", "median", "max"): v = num.get(key) if v is not None: bits.append(_fmt_num(v)) if bits: return ", ".join(bits) return "—" def _head_block(profile: dict, ctx: dict): """Return a DataTable for df.head, or a Note documenting the missing key.""" head = ctx.get(HEAD_KEY) or profile.get(HEAD_KEY) if isinstance(head, list) and head and isinstance(head[0], dict): # Column order from the profile, then any extra keys present in rows. cols = [c.get("name") for c in (profile.get("columns") or []) if c.get("name")] if not cols: cols = list(head[0].keys()) rows = [[model._safe_str(r.get(c)) for c in cols] for r in head[:10]] return model.DataTable(header=cols, rows=rows, note=f"primeras {len(rows)} filas") return model.Note( "df.head no disponible: el TableProfile no incluye 'head_rows'. La fase " "de cálculo debe añadir profile['head_rows'] (lista de dicts fila) o " "pasarlo en ctx['head_rows'] para mostrar las primeras filas.") def _columns_block(profile: dict): cols = profile.get("columns") or [] header = ["Columna", "Tipo", "Nulos", "Ejemplos (no nulos)"] rows = [] for c in cols: if not isinstance(c, dict): continue name = c.get("name") or "(col)" ctype = c.get("inferred_type") or c.get("physical_type") or "—" sem = c.get("semantic_type") if sem: ctype = f"{ctype} ({sem})" null_pct = c.get("null_pct") null_count = c.get("null_count") if null_pct is not None: nulls = _fmt_pct(null_pct) if null_count is not None: nulls += f" ({null_count})" elif null_count is not None: nulls = str(null_count) else: nulls = "—" rows.append([name, ctype, nulls, _examples_for(c)]) if not rows: return None return model.DataTable(header=header, rows=rows, title="Columnas") def _describe_block(profile: dict): cols = profile.get("columns") or [] header = ["Columna", "mean", "median", "min", "max", "std"] rows = [] for c in cols: if not isinstance(c, dict) or c.get("inferred_type") != "numeric": continue num = c.get("numeric") or {} if not num: continue rows.append([ c.get("name") or "(col)", _fmt_num(num.get("mean")), _fmt_num(num.get("median")), _fmt_num(num.get("min")), _fmt_num(num.get("max")), _fmt_num(num.get("std")), ]) if not rows: return None return model.DataTable(header=header, rows=rows, title="Estadística (describe)") def build_overview(profile: dict, ctx: dict): """Build the Overview Chapter, or None if the profile has no columns.""" profile = profile or {} ctx = ctx or {} cols = profile.get("columns") or [] if not cols and not (ctx.get(HEAD_KEY) or profile.get(HEAD_KEY)): return None blocks = [ model.Heading(text="Primeras filas (df.head)", level=2), _head_block(profile, ctx), ] cols_block = _columns_block(profile) if cols_block is not None: blocks.append(model.Heading( text="Diccionario de columnas", level=2)) blocks.append(cols_block) desc_block = _describe_block(profile) if desc_block is not None: blocks.append(model.Heading( text="Resumen estadístico numérico", level=2)) blocks.append(desc_block) return model.Chapter(id=CHAPTER_ID, title=CHAPTER_TITLE, version=CHAPTER_VERSION, blocks=blocks)