"""Impure EDA helper: a single figure of horizontal Tukey boxplots (`eda` group). Draws, in one ``matplotlib.figure.Figure``, a stack of horizontal Tukey boxplots (one per column) using ``ax.bxp``: each carries its box (Q1–Q3), whiskers (up to 1.5·IQR), the median line and its outlier points. It consumes the output of the pure registry function ``build_boxplot_stats`` (one ``box`` dict per column) plus an optional list of raw outlier values per column; it never recomputes anything. It is the "small-multiples" companion of ``num_distr`` (which draws one histogram+boxplot per column): here every column shares a single figure so the caller can show, at a glance, *which* columns are the most contaminated by outliers (the caller passes them already ordered by contamination). Impure because it touches matplotlib's rendering machinery. It uses the headless Agg backend and the object-oriented ``Figure`` API (no ``pyplot``) so it leaks no global state and is safe to call repeatedly from a report renderer. It is fully defensive and NEVER raises: invalid entries are skipped and, if nothing valid remains, it returns a placeholder figure carrying a centered "(sin boxplots)". """ import matplotlib matplotlib.use("Agg") from matplotlib.figure import Figure # noqa: E402 # Blue palette shared with the ``num_distr`` chapter so the report stays coherent. _BOX_FACE = "#9ec6df" # box fill. _BOX_EDGE = "#5b8aa6" # box / whisker / cap border. _MEDIAN = "#2e8b57" # median line (sea green). _OUTLIER = "#c0392b" # outlier points (soft red). # Muted gray for the placeholder / fallback message text. _MUTED_TEXT = "#5f6b7a" # Soft red for the error fallback message. _ERROR_TEXT = "#b00020" def _num(value): """Coerce ``value`` to float defensively; None for None/bool/non-numeric/NaN.""" # bool is a subclass of int; a stat value is never a real bool, so treat # True/False as missing instead of silently coercing to 1.0/0.0. if value is None or isinstance(value, bool): return None try: f = float(value) except (TypeError, ValueError): return None if f != f: # NaN guard. return None return f def _placeholder_figure(message: str, color: str = _MUTED_TEXT) -> "Figure": """Return a fallback ``Figure`` carrying a single centered message.""" fig = Figure(figsize=(7.0, 2.4), dpi=150) ax = fig.add_subplot(111) ax.axis("off") ax.text( 0.5, 0.5, message, ha="center", va="center", fontsize=12, color=color, wrap=True, transform=ax.transAxes, ) fig.tight_layout() return fig def build_boxplots_figure( boxes: list, title: str = "", max_boxes: int = 12, ) -> "matplotlib.figure.Figure": """Build one figure of stacked horizontal Tukey boxplots (one per column). For each entry the function builds a ``bxp`` stats record (``med, q1, q3, whislo, whishi, fliers, label``) from its ``box`` sub-dict (the output of ``build_boxplot_stats``) and draws all of them as horizontal boxplots sharing the X axis, top-to-bottom in the order received (the caller is expected to pass them already sorted by contamination). Outliers are shown two ways: - If an entry carries a ``fliers`` list (the raw out-of-fence values), they are drawn as red points via ``ax.bxp(..., showfliers=True)``. - If ``fliers`` is ``None``/absent, the raw values are unknown, so only the extremes are marked: a red point at ``box["min"]`` when ``box["has_low_outliers"]`` and at ``box["max"]`` when ``box["has_high_outliers"]`` (same convention as ``num_distr``). The function is fully defensive and NEVER raises. Entries that are not dicts, lack a ``box`` dict, or miss any of ``q1``/``median``/``q3`` are skipped. If after filtering no valid box remains it returns a placeholder ``Figure`` with a centered "(sin boxplots)"; any unexpected error is caught and turned into a fallback figure carrying the error text. It always returns a ``Figure``. Args: boxes: List of dicts ``{"name": str, "box": dict, "fliers": list|None}``. ``box`` is exactly the output of ``build_boxplot_stats`` (read with ``.get``: ``q1, median, q3, whisker_lo, whisker_hi, min, max, has_low_outliers, has_high_outliers, ...``). ``fliers`` is the optional list of raw outlier values; when present they are plotted, otherwise only the extremes are marked. title: Figure title (``fig.suptitle``). Empty => no title. When the list is longer than ``max_boxes`` a "(mostrando N de M)" note is appended. max_boxes: Draw at most the first ``max_boxes`` entries (default 12). The rest are dropped but their omission is surfaced in the title note, so the truncation is never silent. Returns: A ``matplotlib.figure.Figure`` with a single Axes holding the horizontal boxplots (height adaptive to the box count so none overlap). The caller is responsible for rasterizing/closing it; this function never shows nor saves it. """ try: if not isinstance(boxes, (list, tuple)) or len(boxes) == 0: return _placeholder_figure("(sin boxplots)") total = len(boxes) # Cap the number of boxes; tolerate a non-int / non-positive max_boxes. try: cap = int(max_boxes) except (TypeError, ValueError): cap = 12 if cap <= 0: cap = 12 candidates = list(boxes)[:cap] stats_list = [] # bxp stats records, in draw order. labels = [] # Y tick labels (column names). manual_markers = [] # (position, box) for entries without raw fliers. any_fliers = False # whether to enable showfliers in the bxp call. for entry in candidates: if not isinstance(entry, dict): continue box = entry.get("box") if not isinstance(box, dict): continue q1 = _num(box.get("q1")) med = _num(box.get("median")) q3 = _num(box.get("q3")) # Without the three quartiles a boxplot cannot be drawn — skip it. if q1 is None or med is None or q3 is None: continue # Whisker extremes fall back to the quartiles when missing. whislo = _num(box.get("whisker_lo")) whishi = _num(box.get("whisker_hi")) if whislo is None: whislo = q1 if whishi is None: whishi = q3 name = entry.get("name") label = "" if name is None else str(name) position = len(stats_list) + 1 # bxp positions are 1-indexed. fliers_raw = entry.get("fliers") if isinstance(fliers_raw, (list, tuple)): fliers = [v for v in (_num(x) for x in fliers_raw) if v is not None] if fliers: any_fliers = True else: # Raw values unknown: draw no bxp fliers, mark min/max by hand. fliers = [] manual_markers.append((position, box)) stats_list.append({ "med": med, "q1": q1, "q3": q3, "whislo": whislo, "whishi": whishi, "fliers": fliers, "label": label, }) labels.append(label) if not stats_list: return _placeholder_figure("(sin boxplots)") n = len(stats_list) positions = list(range(1, n + 1)) # Height grows with the box count so none of them overlap. height = max(2.0, 0.5 * n + 1.0) fig = Figure(figsize=(7.0, height), dpi=150) ax = fig.add_subplot(111) bxp_kw = dict( showfliers=any_fliers, widths=0.5, patch_artist=True, boxprops={"facecolor": _BOX_FACE, "edgecolor": _BOX_EDGE}, medianprops={"color": _MEDIAN, "linewidth": 1.6}, whiskerprops={"color": _BOX_EDGE}, capprops={"color": _BOX_EDGE}, flierprops={"marker": "o", "markersize": 3.5, "markerfacecolor": _OUTLIER, "markeredgecolor": _OUTLIER, "linestyle": "none"}) try: # ``orientation`` is the current API; older matplotlib uses ``vert``. try: ax.bxp(stats_list, positions=positions, orientation="horizontal", **bxp_kw) except TypeError: ax.bxp(stats_list, positions=positions, vert=False, **bxp_kw) except Exception: # noqa: BLE001 — never let bxp kill the whole figure. ax.text(0.5, 0.5, "(boxplot no disponible)", ha="center", va="center", fontsize=10, color=_MUTED_TEXT, transform=ax.transAxes) # For entries without raw fliers, mark only the out-of-fence extremes. for position, box in manual_markers: mn = _num(box.get("min")) mx = _num(box.get("max")) if box.get("has_low_outliers") and mn is not None: ax.plot([mn], [position], marker="o", markersize=3.5, color=_OUTLIER, zorder=5) if box.get("has_high_outliers") and mx is not None: ax.plot([mx], [position], marker="o", markersize=3.5, color=_OUTLIER, zorder=5) # Pin the Y tick labels explicitly so they work across matplotlib # versions regardless of whether ``bxp`` consumed the ``label`` key. ax.set_yticks(positions) ax.set_yticklabels(labels, fontsize=8) ax.set_xlabel("valor", fontsize=9) ax.tick_params(labelsize=7) ax.margins(y=0.15) for spine in ("top", "right"): ax.spines[spine].set_visible(False) # Surface truncation in the title instead of silently dropping boxes. note = f"(mostrando {n} de {total})" if total > cap else "" heading = " ".join(p for p in (title, note) if p) if heading: fig.suptitle(heading, fontsize=12, x=0.02, ha="left") fig.tight_layout() return fig except Exception as exc: # noqa: BLE001 — never raise from a figure builder. return _placeholder_figure( f"error al dibujar boxplots: {exc}", color=_ERROR_TEXT)