chore: auto-commit (26 archivos)

- python/functions/bigquery/bq_auth.md
- python/functions/bigquery/bq_load_from_file.md
- python/functions/bigquery/bq_load_from_gcs.md
- python/functions/bigquery/client.py
- python/functions/bigquery/queries.py
- python/functions/datascience/__init__.py
- python/functions/datascience/decode_qr_image.py
- python/functions/datascience/load_bq_table_to_duckdb.md
- python/functions/datascience/load_bq_table_to_duckdb.py
- python/functions/pipelines/profile_bq_table.md
- ...

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-07-02 19:00:13 +02:00
parent 2ebc9efeb2
commit 5a4f82cf76
26 changed files with 2573 additions and 94 deletions
+26 -5
View File
@@ -42,6 +42,8 @@ def profile_bq_table(
report_dir: str = "reports",
duckdb_path: str = "",
keep_duckdb: bool = False,
where_sql: str = "",
select_sql: str = "",
) -> dict:
"""EDA one-shot de una tabla/vista BigQuery.
@@ -63,6 +65,13 @@ def profile_bq_table(
report_dir: Directorio de salida de los reports.
duckdb_path: Ruta DuckDB a usar. Vacio = temporal autogestionado.
keep_duckdb: Si True conserva el DuckDB materializado.
where_sql: Clausula WHERE SQL (sin la palabra WHERE) aplicada al origen y a
su COUNT. Pass-through a `load_bq_table_to_duckdb`. Ej:
"fecha <= CURRENT_DATE() AND venta_n IS NOT NULL". Se interpola tal cual:
no usar con input no confiable.
select_sql: Expresiones del SELECT (sin la palabra SELECT); vacio = `*`.
Pass-through a `load_bq_table_to_duckdb`. Util para castear tipos
problematicos (p. ej. BIGNUMERIC->FLOAT64) antes de perfilar.
Returns:
dict dict-no-throw con el resultado del pipeline (ver output del .md).
@@ -83,6 +92,8 @@ def profile_bq_table(
max_rows=max_rows,
project_id=project_id,
pseudonymize_cols=pseudonymize_cols,
where_sql=where_sql,
select_sql=select_sql,
)
if load.get("status") != "ok":
return {
@@ -111,14 +122,24 @@ def profile_bq_table(
"load": load,
}
load_block = {
k: load[k]
for k in (
"n_rows_source", "n_rows_fetched", "sampled", "sample_frac",
"pseudonymized", "table", "streamed",
)
if k in load
}
# Trazabilidad de los filtros de origen (solo si vienen informados).
if where_sql:
load_block["where_sql"] = where_sql
if select_sql:
load_block["select_sql"] = select_sql
return {
"status": "ok",
"table_fqn": table_fqn,
"load": {
k: load[k]
for k in ("n_rows_source", "n_rows_fetched", "sampled", "sample_frac", "pseudonymized", "table")
if k in load
},
"load": load_block,
"duckdb_path": duckdb_path if keep_duckdb else None,
"report_md_path": prof.get("report_md_path"),
"report_json_path": prof.get("report_json_path"),