diff --git a/python/functions/datascience/automatic_eda/chapters/cat_distr.py b/python/functions/datascience/automatic_eda/chapters/cat_distr.py index 3a252605..731aa7b7 100644 --- a/python/functions/datascience/automatic_eda/chapters/cat_distr.py +++ b/python/functions/datascience/automatic_eda/chapters/cat_distr.py @@ -439,24 +439,17 @@ def _topk_table(cat: dict): note=note) -def _intro_blocks(n_rows, mark_term: bool = False): - total = _fmt_int(n_rows) - # Mark the first appearance of each term as a clickable glossary jump when the - # terms were registered (mark_term). The full definition of the entropy term - # AND of how each categorical page is laid out live in the GLOSARIO chapter, so - # the intro only names the clickable terms instead of repeating the long - # explanation (avoids the redundancy with the glossary). +def _intro_blocks(mark_term: bool = False): + # The full explanation of entropy AND of how each categorical page is laid out + # lives in the GLOSARIO chapter; the chapter body keeps only the minimal + # clickable terms — no descriptive prose — to avoid duplicating the glossary. + # The dataset row total is not repeated here: each column's cardinality table + # already carries "Total filas (dataset)". entropia = ("[[term:entropia]]entropía[[/term]]" if mark_term else "entropía") pagina = ("[[term:pagina_categorica]]cómo se organiza cada página[[/term]]" if mark_term else "cómo se organiza cada página") - text = ( - f"Cada columna categórica ocupa su propia página — {pagina}: " - f"cardinalidad (incluida la {entropia}), top de categorías y un gráfico " - "de barras de las más comunes." - ) - if n_rows is not None: - text += f" El dataset tiene {total} filas en total como referencia." + text = f"Términos: {entropia} · {pagina}." return [ model.Heading(text="Entropía y cardinalidad", level=2), model.Markdown(text=text), @@ -484,7 +477,7 @@ def build_cat_distr(profile: dict, ctx: dict): _TERM_ENTROPIA_DEF) glossary.add(_TERM_PAGINA_KEY, _TERM_PAGINA_LABEL) mark_term = True - blocks = list(_intro_blocks(n_rows, mark_term=mark_term)) + blocks = list(_intro_blocks(mark_term=mark_term)) # Business description + unit per column come from the LLM dictionary # (profile['llm']['dictionary'], matched by column name); absent without