merge: 4b num_distr — desv std (sigma) en leyenda del histograma (verificado met)
This commit is contained in:
@@ -1,9 +1,10 @@
|
|||||||
"""Numeric distributions chapter (NUM DISTR) for AutomaticEDA.
|
"""Numeric distributions chapter (NUM DISTR) for AutomaticEDA.
|
||||||
|
|
||||||
For every numeric column the chapter draws, as a single indivisible figure, a
|
For every numeric column the chapter draws, as a single indivisible figure, a
|
||||||
histogram with the **mean, median and ±1σ band drawn as reference lines** and a
|
histogram with the **mean, median and ±1σ band drawn as reference lines** (the
|
||||||
**Tukey boxplot right below it** sharing the same X axis — exactly the user
|
legend reports the numeric value of the mean, the median **and the standard
|
||||||
requirement for this chapter. Each figure is emitted as a lazy ``Figure`` block
|
deviation σ**) and a **Tukey boxplot right below it** sharing the same X axis —
|
||||||
|
exactly the user requirement for this chapter. Each figure is emitted as a lazy ``Figure`` block
|
||||||
so the renderers rasterize and scale it to fit a whole page/slide and nothing is
|
so the renderers rasterize and scale it to fit a whole page/slide and nothing is
|
||||||
ever cut; columns with many numerics simply flow across pages as small
|
ever cut; columns with many numerics simply flow across pages as small
|
||||||
multiples.
|
multiples.
|
||||||
@@ -34,7 +35,7 @@ try:
|
|||||||
except Exception: # noqa: BLE001 — keep the chapter importable no matter what.
|
except Exception: # noqa: BLE001 — keep the chapter importable no matter what.
|
||||||
build_boxplot_stats = None # type: ignore[assignment]
|
build_boxplot_stats = None # type: ignore[assignment]
|
||||||
|
|
||||||
CHAPTER_VERSION = "1.1.0"
|
CHAPTER_VERSION = "1.2.0"
|
||||||
CHAPTER_ID = "num_distr"
|
CHAPTER_ID = "num_distr"
|
||||||
CHAPTER_TITLE = "Distribuciones numéricas"
|
CHAPTER_TITLE = "Distribuciones numéricas"
|
||||||
|
|
||||||
@@ -140,9 +141,11 @@ def _make_hist_box(name: str, numeric: dict, box: dict):
|
|||||||
std = numeric.get("std")
|
std = numeric.get("std")
|
||||||
|
|
||||||
# ±1σ band first (behind the lines), then median (solid) and mean (dashed).
|
# ±1σ band first (behind the lines), then median (solid) and mean (dashed).
|
||||||
|
# The band's legend entry also reports the numeric value of the standard
|
||||||
|
# deviation, so the reader sees mean, median AND σ at a glance.
|
||||||
if mean is not None and std is not None and std > 0:
|
if mean is not None and std is not None and std > 0:
|
||||||
ax_h.axvspan(mean - std, mean + std, color="#f0c27b", alpha=0.22,
|
ax_h.axvspan(mean - std, mean + std, color="#f0c27b", alpha=0.22,
|
||||||
zorder=1, label="±1σ")
|
zorder=1, label=f"±1σ (σ = {_fmt_num(std)})")
|
||||||
if median is not None:
|
if median is not None:
|
||||||
ax_h.axvline(median, color="#2e8b57", linestyle="-", linewidth=1.6,
|
ax_h.axvline(median, color="#2e8b57", linestyle="-", linewidth=1.6,
|
||||||
zorder=4, label=f"mediana = {_fmt_num(median)}")
|
zorder=4, label=f"mediana = {_fmt_num(median)}")
|
||||||
@@ -152,7 +155,19 @@ def _make_hist_box(name: str, numeric: dict, box: dict):
|
|||||||
|
|
||||||
ax_h.set_ylabel("frecuencia", fontsize=8)
|
ax_h.set_ylabel("frecuencia", fontsize=8)
|
||||||
ax_h.tick_params(labelsize=7)
|
ax_h.tick_params(labelsize=7)
|
||||||
ax_h.legend(fontsize=6.5, loc="upper right", framealpha=0.85)
|
# Always surface σ in the legend: if the ±1σ band could not be drawn (no mean
|
||||||
|
# or std<=0) but σ is still known, add a label-only proxy handle so the value
|
||||||
|
# of the standard deviation is reported regardless of the band.
|
||||||
|
handles, labels = ax_h.get_legend_handles_labels()
|
||||||
|
if std is not None and not any("σ =" in lbl for lbl in labels):
|
||||||
|
from matplotlib.lines import Line2D
|
||||||
|
proxy = Line2D([], [], linestyle="none", marker="",
|
||||||
|
label=f"σ = {_fmt_num(std)}")
|
||||||
|
handles.append(proxy)
|
||||||
|
labels.append(f"σ = {_fmt_num(std)}")
|
||||||
|
if handles:
|
||||||
|
ax_h.legend(handles, labels, fontsize=6.5, loc="upper right",
|
||||||
|
framealpha=0.85)
|
||||||
for spine in ("top", "right"):
|
for spine in ("top", "right"):
|
||||||
ax_h.spines[spine].set_visible(False)
|
ax_h.spines[spine].set_visible(False)
|
||||||
|
|
||||||
|
|||||||
@@ -159,6 +159,50 @@ def test_anti_corte_muchas_columnas_pdf_y_pptx():
|
|||||||
assert res_pptx["n_slides"] >= 8 # at least one slide per column figure.
|
assert res_pptx["n_slides"] >= 8 # at least one slide per column figure.
|
||||||
|
|
||||||
|
|
||||||
|
def _hist_legend_texts(numeric, box=None):
|
||||||
|
"""Build the per-column figure and return its histogram-legend label texts."""
|
||||||
|
from datascience.automatic_eda.chapters.num_distr import _make_hist_box
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
fig = _make_hist_box("col", numeric, box or {})
|
||||||
|
ax_h = fig.axes[0] # the histogram is the top axis.
|
||||||
|
leg = ax_h.get_legend()
|
||||||
|
texts = [t.get_text() for t in leg.get_texts()] if leg else []
|
||||||
|
plt.close(fig)
|
||||||
|
return texts
|
||||||
|
|
||||||
|
|
||||||
|
def test_golden_leyenda_histograma_reporta_valor_std():
|
||||||
|
# The histogram legend must report the numeric value of the standard
|
||||||
|
# deviation σ next to mean and median.
|
||||||
|
numeric = _numeric_block(42.5, 40.0, 12.3, 1.0, 100.0, "right-skewed", 5)
|
||||||
|
texts = _hist_legend_texts(numeric)
|
||||||
|
joined = " ".join(texts)
|
||||||
|
assert any("σ =" in t for t in texts), f"σ value missing in legend: {texts}"
|
||||||
|
assert "12.3" in joined, f"std value 12.3 not in legend: {texts}"
|
||||||
|
assert any("media =" in t for t in texts)
|
||||||
|
assert any("mediana =" in t for t in texts)
|
||||||
|
|
||||||
|
|
||||||
|
def test_edge_std_en_leyenda_aunque_no_haya_banda():
|
||||||
|
# When the ±1σ band cannot be drawn (no mean) but σ is known, the legend
|
||||||
|
# still surfaces the σ value via a label-only proxy handle.
|
||||||
|
numeric = _numeric_block(42.5, 40.0, 7.5, 1.0, 100.0, "right-skewed", 0)
|
||||||
|
numeric["mean"] = None # forces the band off; σ must still appear.
|
||||||
|
texts = _hist_legend_texts(numeric)
|
||||||
|
assert any("σ = 7.5" in t for t in texts), f"σ proxy missing: {texts}"
|
||||||
|
|
||||||
|
|
||||||
|
def test_edge_sin_std_no_revienta_la_figura():
|
||||||
|
# A numeric block without σ must not raise and simply omits the σ entry.
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
numeric = _numeric_block(42.5, 40.0, 0.0, 1.0, 100.0, "discrete", 0)
|
||||||
|
numeric["std"] = None
|
||||||
|
texts = _hist_legend_texts(numeric)
|
||||||
|
assert not any("σ =" in t for t in texts)
|
||||||
|
# mean/median lines still produce their own legend entries.
|
||||||
|
assert any("media =" in t for t in texts)
|
||||||
|
|
||||||
|
|
||||||
def test_distribution_gloss_cubre_todas_las_etiquetas():
|
def test_distribution_gloss_cubre_todas_las_etiquetas():
|
||||||
# Every label detect_distribution_type can emit has a Spanish gloss.
|
# Every label detect_distribution_type can emit has a Spanish gloss.
|
||||||
for label in ("normal-ish", "right-skewed", "left-skewed", "heavy-tail",
|
for label in ("normal-ish", "right-skewed", "left-skewed", "heavy-tail",
|
||||||
|
|||||||
Reference in New Issue
Block a user