feat(eda): render de models en markdown + PDF DB-level para profile_database (H4,H9)
- H4: render_eda_markdown anade seccion Modelos (PCA/KMeans/normalidad/outliers); render_eda_pdf formatea models/series/caveats como tablas (no str(dict) crudo) - H9: profile_database gana flag emit_pdf -> PDF movil DB-level (resumen tablas + join graph) via render_eda_pdf_relational; clave report_pdf_path - aditivos y retrocompatibles (flags default False). 38 tests verdes Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -12,6 +12,7 @@ Funciones del registry compuestas (NO se reimplementa su logica):
|
||||
- build_join_graph : grafo de relaciones inter-tabla + diagrama Mermaid.
|
||||
- duckdb_list_tables : introspeccion "que tablas hay" (read-only).
|
||||
- render_eda_markdown : report legible de un TableProfile.
|
||||
- render_eda_pdf_relational : PDF movil DB-level (resumen de tablas + join graph).
|
||||
|
||||
Aporta una capa propia de AGREGACION A NIVEL DE BASE: ensambla un DatabaseProfile
|
||||
con el resumen de cada tabla, los TableProfiles completos, las FK candidatas y el
|
||||
@@ -31,6 +32,7 @@ from datascience import (
|
||||
build_join_graph,
|
||||
infer_fk_containment_duckdb,
|
||||
render_eda_markdown,
|
||||
render_eda_pdf_relational,
|
||||
)
|
||||
from infra import duckdb_list_tables
|
||||
from pipelines.profile_table import profile_table
|
||||
@@ -118,6 +120,7 @@ def profile_database(
|
||||
report_dir: str = "reports",
|
||||
write_report: bool = True,
|
||||
min_inclusion: float = 0.9,
|
||||
emit_pdf: bool = False,
|
||||
) -> dict:
|
||||
"""Perfila una base DuckDB entera + sus relaciones inter-tabla.
|
||||
|
||||
@@ -134,11 +137,16 @@ def profile_database(
|
||||
paths del retorno son None.
|
||||
min_inclusion: umbral minimo de inclusion (0-1) para emitir una FK
|
||||
candidata (se pasa a infer_fk_containment_duckdb). Default 0.9.
|
||||
emit_pdf: si True (default False) renderiza un PDF movil DB-level con
|
||||
render_eda_pdf_relational (resumen de tablas + relaciones FK + join
|
||||
graph) junto a los reports y devuelve su ruta en report_pdf_path. Con
|
||||
False no se toca el PDF (retrocompatible) y report_pdf_path es None.
|
||||
|
||||
Returns:
|
||||
dict dict-no-throw. En exito:
|
||||
{status:'ok', db_profile:<DatabaseProfile>,
|
||||
report_md_path:str|None, report_json_path:str|None}.
|
||||
report_md_path:str|None, report_json_path:str|None,
|
||||
report_pdf_path:str|None}.
|
||||
En error (sin lanzar): {status:'error', error:str}.
|
||||
|
||||
DatabaseProfile = {
|
||||
@@ -204,12 +212,13 @@ def profile_database(
|
||||
"errors": errors,
|
||||
}
|
||||
|
||||
# 6) Reports opcionales.
|
||||
# 6) Reports opcionales (markdown + JSON sidecar + PDF movil DB-level).
|
||||
report_md_path = None
|
||||
report_json_path = None
|
||||
report_pdf_path = None
|
||||
ts = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
|
||||
if write_report:
|
||||
os.makedirs(report_dir, exist_ok=True)
|
||||
ts = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
|
||||
report_json_path = os.path.join(report_dir, f"eda_db_{ts}.json")
|
||||
report_md_path = os.path.join(report_dir, f"eda_db_{ts}.md")
|
||||
with open(report_json_path, "w", encoding="utf-8") as fh:
|
||||
@@ -219,11 +228,23 @@ def profile_database(
|
||||
with open(report_md_path, "w", encoding="utf-8") as fh:
|
||||
fh.write(_render_db_markdown(db_profile))
|
||||
|
||||
# PDF DB-level (legible en movil): resumen de tablas + join graph. Se
|
||||
# genera bajo demanda (emit_pdf) reusando el renderer relational del grupo.
|
||||
if emit_pdf:
|
||||
try:
|
||||
os.makedirs(report_dir, exist_ok=True)
|
||||
pdf_target = os.path.join(report_dir, f"eda_db_{ts}.pdf")
|
||||
pres = render_eda_pdf_relational(db_profile, pdf_target)
|
||||
report_pdf_path = pres.get("pdf_path")
|
||||
except Exception: # noqa: BLE001
|
||||
report_pdf_path = None
|
||||
|
||||
return {
|
||||
"status": "ok",
|
||||
"db_profile": db_profile,
|
||||
"report_md_path": report_md_path,
|
||||
"report_json_path": report_json_path,
|
||||
"report_pdf_path": report_pdf_path,
|
||||
}
|
||||
except Exception as e: # noqa: BLE001
|
||||
return {"status": "error", "error": str(e)}
|
||||
|
||||
@@ -165,3 +165,36 @@ def test_profile_database_writes_report(tmp_path):
|
||||
assert "# EDA base —" in md
|
||||
assert "## Relaciones inter-tabla" in md
|
||||
assert "```mermaid" in md
|
||||
|
||||
|
||||
def test_profile_database_emit_pdf(tmp_path):
|
||||
# H9: con emit_pdf=True, profile_database genera un PDF DB-level (>0 bytes,
|
||||
# cabecera %PDF) además del markdown + JSON.
|
||||
db_path = os.path.join(str(tmp_path), "shop3.duckdb")
|
||||
_build_related_db(db_path)
|
||||
report_dir = os.path.join(str(tmp_path), "reports")
|
||||
|
||||
res = profile_database(
|
||||
db_path, report_dir=report_dir, write_report=True, emit_pdf=True
|
||||
)
|
||||
|
||||
assert res["status"] == "ok", res
|
||||
pdf = res.get("report_pdf_path")
|
||||
assert pdf is not None
|
||||
assert os.path.exists(pdf)
|
||||
assert os.path.getsize(pdf) > 0
|
||||
with open(pdf, "rb") as fh:
|
||||
assert fh.read(4) == b"%PDF"
|
||||
|
||||
|
||||
def test_profile_database_emit_pdf_false_retrocompat(tmp_path):
|
||||
# Edge: emit_pdf=False (default) se comporta como antes — no genera PDF y
|
||||
# report_pdf_path es None.
|
||||
db_path = os.path.join(str(tmp_path), "shop4.duckdb")
|
||||
_build_related_db(db_path)
|
||||
report_dir = os.path.join(str(tmp_path), "reports")
|
||||
|
||||
res = profile_database(db_path, report_dir=report_dir, write_report=True)
|
||||
|
||||
assert res["status"] == "ok", res
|
||||
assert res.get("report_pdf_path") is None
|
||||
|
||||
Reference in New Issue
Block a user