fix(eda): bugs de bajo riesgo del benchmark (H1,H5,H12,H13,H14) + tests faltantes
- H1: render_eda_markdown ya no aplica doble x100 a outlier_pct (336% -> real) - H5: profile_database filtra base_tables_only (excluye VIEWs; sakila 21->16) - H12: suggest_reexpression salta columnas no-continuas - H13: to_returns/profile_table elige retornos (financiera) vs diferencias (fisica) - H14: test de regresion ATTACH sqlite via information_schema - +8 tests de las funciones eda nuevas (acf_pacf, adf_kpss, ...). 77 tests verdes - L/M (H2,H3,H4,H6,H7,H8,H9,H10,H11) quedan en issues 0174-0177 para revision Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -53,7 +53,9 @@ def _sample_profile(correlations=None, llm=None):
|
||||
"p99": 95.0,
|
||||
"skew": 0.4,
|
||||
"kurtosis": 2.1,
|
||||
"outlier_pct": 0.012,
|
||||
# outlier_pct ya viene en escala 0-100 desde describe_numeric
|
||||
# (100 * n_outliers / n), NO en fracción 0-1.
|
||||
"outlier_pct": 3.5,
|
||||
"distribution_type": "right-skewed",
|
||||
"histogram": [
|
||||
{"lo": 0, "hi": 25, "count": 100},
|
||||
@@ -126,8 +128,15 @@ def test_pct_fields_scaled_by_100():
|
||||
assert "0.86%" not in md
|
||||
# categorical top pct=0.5 -> "50.0%".
|
||||
assert "50.0" in md
|
||||
# outlier_pct=0.012 -> "1.20%".
|
||||
assert "1.20%" in md
|
||||
|
||||
|
||||
def test_outlier_pct_not_double_scaled():
|
||||
# outlier_pct ya viene en escala 0-100 (describe_numeric): el render lo muestra
|
||||
# tal cual + '%', SIN multiplicar otra vez por 100. outlier_pct=3.5 -> "3.5%",
|
||||
# nunca "350%" (el bug del doble ×100).
|
||||
md = render_eda_markdown(_sample_profile())
|
||||
assert "3.5%" in md
|
||||
assert "350" not in md
|
||||
|
||||
|
||||
def test_pct_handles_none_as_blank():
|
||||
|
||||
Reference in New Issue
Block a user