feat(eda): render de models en markdown + PDF DB-level para profile_database (H4,H9)
- H4: render_eda_markdown anade seccion Modelos (PCA/KMeans/normalidad/outliers); render_eda_pdf formatea models/series/caveats como tablas (no str(dict) crudo) - H9: profile_database gana flag emit_pdf -> PDF movil DB-level (resumen tablas + join graph) via render_eda_pdf_relational; clave report_pdf_path - aditivos y retrocompatibles (flags default False). 38 tests verdes Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -12,6 +12,7 @@ Funciones del registry compuestas (NO se reimplementa su logica):
|
||||
- build_join_graph : grafo de relaciones inter-tabla + diagrama Mermaid.
|
||||
- duckdb_list_tables : introspeccion "que tablas hay" (read-only).
|
||||
- render_eda_markdown : report legible de un TableProfile.
|
||||
- render_eda_pdf_relational : PDF movil DB-level (resumen de tablas + join graph).
|
||||
|
||||
Aporta una capa propia de AGREGACION A NIVEL DE BASE: ensambla un DatabaseProfile
|
||||
con el resumen de cada tabla, los TableProfiles completos, las FK candidatas y el
|
||||
@@ -31,6 +32,7 @@ from datascience import (
|
||||
build_join_graph,
|
||||
infer_fk_containment_duckdb,
|
||||
render_eda_markdown,
|
||||
render_eda_pdf_relational,
|
||||
)
|
||||
from infra import duckdb_list_tables
|
||||
from pipelines.profile_table import profile_table
|
||||
@@ -118,6 +120,7 @@ def profile_database(
|
||||
report_dir: str = "reports",
|
||||
write_report: bool = True,
|
||||
min_inclusion: float = 0.9,
|
||||
emit_pdf: bool = False,
|
||||
) -> dict:
|
||||
"""Perfila una base DuckDB entera + sus relaciones inter-tabla.
|
||||
|
||||
@@ -134,11 +137,16 @@ def profile_database(
|
||||
paths del retorno son None.
|
||||
min_inclusion: umbral minimo de inclusion (0-1) para emitir una FK
|
||||
candidata (se pasa a infer_fk_containment_duckdb). Default 0.9.
|
||||
emit_pdf: si True (default False) renderiza un PDF movil DB-level con
|
||||
render_eda_pdf_relational (resumen de tablas + relaciones FK + join
|
||||
graph) junto a los reports y devuelve su ruta en report_pdf_path. Con
|
||||
False no se toca el PDF (retrocompatible) y report_pdf_path es None.
|
||||
|
||||
Returns:
|
||||
dict dict-no-throw. En exito:
|
||||
{status:'ok', db_profile:<DatabaseProfile>,
|
||||
report_md_path:str|None, report_json_path:str|None}.
|
||||
report_md_path:str|None, report_json_path:str|None,
|
||||
report_pdf_path:str|None}.
|
||||
En error (sin lanzar): {status:'error', error:str}.
|
||||
|
||||
DatabaseProfile = {
|
||||
@@ -204,12 +212,13 @@ def profile_database(
|
||||
"errors": errors,
|
||||
}
|
||||
|
||||
# 6) Reports opcionales.
|
||||
# 6) Reports opcionales (markdown + JSON sidecar + PDF movil DB-level).
|
||||
report_md_path = None
|
||||
report_json_path = None
|
||||
report_pdf_path = None
|
||||
ts = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
|
||||
if write_report:
|
||||
os.makedirs(report_dir, exist_ok=True)
|
||||
ts = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
|
||||
report_json_path = os.path.join(report_dir, f"eda_db_{ts}.json")
|
||||
report_md_path = os.path.join(report_dir, f"eda_db_{ts}.md")
|
||||
with open(report_json_path, "w", encoding="utf-8") as fh:
|
||||
@@ -219,11 +228,23 @@ def profile_database(
|
||||
with open(report_md_path, "w", encoding="utf-8") as fh:
|
||||
fh.write(_render_db_markdown(db_profile))
|
||||
|
||||
# PDF DB-level (legible en movil): resumen de tablas + join graph. Se
|
||||
# genera bajo demanda (emit_pdf) reusando el renderer relational del grupo.
|
||||
if emit_pdf:
|
||||
try:
|
||||
os.makedirs(report_dir, exist_ok=True)
|
||||
pdf_target = os.path.join(report_dir, f"eda_db_{ts}.pdf")
|
||||
pres = render_eda_pdf_relational(db_profile, pdf_target)
|
||||
report_pdf_path = pres.get("pdf_path")
|
||||
except Exception: # noqa: BLE001
|
||||
report_pdf_path = None
|
||||
|
||||
return {
|
||||
"status": "ok",
|
||||
"db_profile": db_profile,
|
||||
"report_md_path": report_md_path,
|
||||
"report_json_path": report_json_path,
|
||||
"report_pdf_path": report_pdf_path,
|
||||
}
|
||||
except Exception as e: # noqa: BLE001
|
||||
return {"status": "error", "error": str(e)}
|
||||
|
||||
Reference in New Issue
Block a user