chore: auto-commit (26 archivos)
- python/functions/bigquery/bq_auth.md - python/functions/bigquery/bq_load_from_file.md - python/functions/bigquery/bq_load_from_gcs.md - python/functions/bigquery/client.py - python/functions/bigquery/queries.py - python/functions/datascience/__init__.py - python/functions/datascience/decode_qr_image.py - python/functions/datascience/load_bq_table_to_duckdb.md - python/functions/datascience/load_bq_table_to_duckdb.py - python/functions/pipelines/profile_bq_table.md - ... Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -42,6 +42,8 @@ def profile_bq_table(
|
||||
report_dir: str = "reports",
|
||||
duckdb_path: str = "",
|
||||
keep_duckdb: bool = False,
|
||||
where_sql: str = "",
|
||||
select_sql: str = "",
|
||||
) -> dict:
|
||||
"""EDA one-shot de una tabla/vista BigQuery.
|
||||
|
||||
@@ -63,6 +65,13 @@ def profile_bq_table(
|
||||
report_dir: Directorio de salida de los reports.
|
||||
duckdb_path: Ruta DuckDB a usar. Vacio = temporal autogestionado.
|
||||
keep_duckdb: Si True conserva el DuckDB materializado.
|
||||
where_sql: Clausula WHERE SQL (sin la palabra WHERE) aplicada al origen y a
|
||||
su COUNT. Pass-through a `load_bq_table_to_duckdb`. Ej:
|
||||
"fecha <= CURRENT_DATE() AND venta_n IS NOT NULL". Se interpola tal cual:
|
||||
no usar con input no confiable.
|
||||
select_sql: Expresiones del SELECT (sin la palabra SELECT); vacio = `*`.
|
||||
Pass-through a `load_bq_table_to_duckdb`. Util para castear tipos
|
||||
problematicos (p. ej. BIGNUMERIC->FLOAT64) antes de perfilar.
|
||||
|
||||
Returns:
|
||||
dict dict-no-throw con el resultado del pipeline (ver output del .md).
|
||||
@@ -83,6 +92,8 @@ def profile_bq_table(
|
||||
max_rows=max_rows,
|
||||
project_id=project_id,
|
||||
pseudonymize_cols=pseudonymize_cols,
|
||||
where_sql=where_sql,
|
||||
select_sql=select_sql,
|
||||
)
|
||||
if load.get("status") != "ok":
|
||||
return {
|
||||
@@ -111,14 +122,24 @@ def profile_bq_table(
|
||||
"load": load,
|
||||
}
|
||||
|
||||
load_block = {
|
||||
k: load[k]
|
||||
for k in (
|
||||
"n_rows_source", "n_rows_fetched", "sampled", "sample_frac",
|
||||
"pseudonymized", "table", "streamed",
|
||||
)
|
||||
if k in load
|
||||
}
|
||||
# Trazabilidad de los filtros de origen (solo si vienen informados).
|
||||
if where_sql:
|
||||
load_block["where_sql"] = where_sql
|
||||
if select_sql:
|
||||
load_block["select_sql"] = select_sql
|
||||
|
||||
return {
|
||||
"status": "ok",
|
||||
"table_fqn": table_fqn,
|
||||
"load": {
|
||||
k: load[k]
|
||||
for k in ("n_rows_source", "n_rows_fetched", "sampled", "sample_frac", "pseudonymized", "table")
|
||||
if k in load
|
||||
},
|
||||
"load": load_block,
|
||||
"duckdb_path": duckdb_path if keep_duckdb else None,
|
||||
"report_md_path": prof.get("report_md_path"),
|
||||
"report_json_path": prof.get("report_json_path"),
|
||||
|
||||
Reference in New Issue
Block a user