"""Impure EDA helper: ranked bar figure of missing-value share (`eda` group). Builds a horizontal bar chart ranking the columns of a dataset by their percentage of missing values (0-100), largest at the top, each bar labelled with its ``NN.N%`` at the end. Returns a ready-to-rasterize ``matplotlib.figure.Figure``; it never shows nor saves it. Impure because it touches matplotlib's rendering machinery. It uses the headless Agg backend and the object-oriented ``Figure`` API (no ``pyplot``) so it leaks no global state and is safe to call repeatedly from a report renderer. """ import matplotlib matplotlib.use("Agg") from matplotlib.figure import Figure # noqa: E402 # Muted gray for secondary text (no-data / fallback messages). _MUTED_TEXT = "#5f6b7a" # Soft red for the error fallback message. _ERROR_TEXT = "#b00020" # Bar fill — a calm blue that reads well on white at report size. _BAR_COLOR = "#4C72B0" def _truncate(text, width: int = 22) -> str: """Truncate ``text`` to ``width`` chars, appending an ellipsis if cut.""" s = "" if text is None else str(text) if len(s) <= width: return s if width <= 1: return s[:width] return s[: width - 1] + "…" def _message_figure(message: str, color: str = _MUTED_TEXT) -> "Figure": """Return a fallback ``Figure`` carrying a single centered message.""" fig = Figure(figsize=(6.4, 4.0), dpi=150) ax = fig.add_subplot(111) ax.axis("off") ax.text( 0.5, 0.5, message, ha="center", va="center", fontsize=12, color=color, wrap=True, transform=ax.transAxes, ) fig.tight_layout() return fig def missingness_rank_bar_figure( names, pcts, title: str = "% de valores faltantes por columna", ) -> "matplotlib.figure.Figure": """Build a horizontal ranked bar figure of missing-value share per column. Pairs each column name with its missing percentage, sorts by percentage descending and draws horizontal bars with the largest at the top. The X axis is pinned to ``[0, 100]`` so bars are comparable across reports, each bar is annotated with its ``NN.N%`` at the end, and the Y tick labels are truncated to ~22 chars. The function is fully defensive: empty/mismatched/non-numeric input never raises. When there is nothing valid to draw it returns a ``Figure`` carrying a centered "sin datos faltantes" message, and any unexpected error is caught and turned into a fallback ``Figure`` carrying the error text. Args: names: List of column names. May be empty. Items are stringified and truncated for display; the originals are not mutated. pcts: List parallel to ``names`` of missing-value percentages in ``[0, 100]``. Non-numeric/``None`` values are coerced to ``0.0`` and negatives are clamped to ``0``. The list is truncated to ``min(len(names), len(pcts))`` so a length mismatch never crashes. title: Figure title. Default "% de valores faltantes por columna". Returns: A ``matplotlib.figure.Figure`` with a single horizontal-bar Axes. The caller is responsible for rasterizing/closing it. """ try: if ( not isinstance(names, (list, tuple)) or not isinstance(pcts, (list, tuple)) or len(names) == 0 or len(pcts) == 0 ): return _message_figure("sin datos faltantes") # --- Pair names with coerced percentages, tolerating length mismatch. pairs = [] for name, pct in zip(names, pcts): try: val = float(pct) except (TypeError, ValueError): val = 0.0 if val != val: # NaN guard. val = 0.0 val = max(0.0, val) pairs.append((name, val)) if not pairs: return _message_figure("sin datos faltantes") # Sort by percentage descending; barh draws bottom-up, so the largest # ends at the top when we reverse the order before plotting. pairs.sort(key=lambda p: p[1], reverse=True) ordered = list(reversed(pairs)) # smallest first -> largest on top. labels = [_truncate(name, 22) for name, _ in ordered] values = [val for _, val in ordered] y_pos = range(len(ordered)) # Height scales with the number of bars so dense reports stay readable. height = max(2.4, min(0.4 * len(ordered) + 1.2, 14.0)) fig = Figure(figsize=(6.4, height), dpi=150) ax = fig.add_subplot(111) ax.barh(list(y_pos), values, color=_BAR_COLOR, edgecolor="white") ax.set_yticks(list(y_pos)) ax.set_yticklabels(labels, fontsize=8) ax.set_xlim(0, 100) ax.set_xlabel("% faltante", fontsize=9) # Annotate each bar with its percentage at the end of the bar. for y, val in zip(y_pos, values): ax.text( min(val + 1.5, 99.0), y, f"{val:.1f}%", va="center", ha="left" if val < 90 else "right", fontsize=7, color="#202020", ) if title: ax.set_title(_truncate(title, 60), fontsize=12, loc="left", pad=10) fig.tight_layout() return fig except Exception as exc: # noqa: BLE001 — never raise from a figure builder. return _message_figure(f"error al dibujar barras: {exc}", color=_ERROR_TEXT)