chore: auto-commit (8 archivos)

- scratchpad/gen_docs.py - scratchpad/gen_intel.py - scratchpad/gen_verify.py - scratchpad/intel_build.json - scratchpad/intel_lineage.json - scratchpad/lineage_graph.json - scratchpad/trace_intel.py - scratchpad/trace_lineage.py Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-07-01 19:00:06 +02:00
parent fbdf80bd71
commit 2ebc9efeb2
8 changed files with 1724 additions and 0 deletions
@@ -0,0 +1,106 @@
+"""Traza la construccion de clientes_intel: para cada tabla, recupera el SQL del ultimo
+job que la escribio (INFORMATION_SCHEMA.JOBS) + sus referenced_tables, y recorre hacia
+atras hasta las tablas fuente (TPV, customers, users, Navision, Salesforce).
+
+Vuelca todo a scratchpad/intel_build.json.
+"""
+import json
+import warnings
+
+warnings.filterwarnings("ignore")
+import google.auth
+from google.cloud import bigquery
+
+PROJECT = "autingo-159109"
+REGION = "region-europe-west1"
+
+creds, _ = google.auth.default(scopes=["https://www.googleapis.com/auth/bigquery"])
+creds = creds.with_quota_project(None)
+c = bigquery.Client(project=PROJECT, credentials=creds)
+
+# Ultimo job por tabla destino en clientes_intel: query + referenced_tables + stmt.
+sql = f"""
+WITH j AS (
+  SELECT
+    dest.table_id AS tbl,
+    query,
+    statement_type AS stmt,
+    creation_time,
+    ARRAY(
+      SELECT AS STRUCT rt.project_id, rt.dataset_id, rt.table_id
+      FROM UNNEST(referenced_tables) rt
+    ) AS refs,
+    ROW_NUMBER() OVER (PARTITION BY dest.table_id ORDER BY creation_time DESC) AS rn
+  FROM `{PROJECT}`.`{REGION}`.INFORMATION_SCHEMA.JOBS_BY_PROJECT,
+       UNNEST([destination_table]) dest
+  WHERE dest.dataset_id = 'clientes_intel'
+    AND state = 'DONE' AND error_result IS NULL
+    AND statement_type IS NOT NULL
+    AND creation_time > TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 120 DAY)
+)
+SELECT tbl, query, stmt, creation_time, refs FROM j WHERE rn = 1
+ORDER BY tbl
+"""
+
+builds = {}
+for r in c.query(sql).result():
+    refs = []
+    for rt in r.refs:
+        refs.append(f"{rt['dataset_id']}.{rt['table_id']}")
+    builds[r.tbl] = {
+        "query": r.query or "",
+        "stmt": r.stmt,
+        "last_run": str(r.creation_time),
+        "refs": sorted(set(x for x in refs if not x.endswith(f".{r.tbl}"))),
+    }
+
+json.dump(builds, open("scratchpad/intel_build.json", "w"), indent=2, ensure_ascii=False)
+print(f"tablas clientes_intel con SQL de construccion capturado: {len(builds)}\n")
+
+# Recursion desde las 12 tablas usadas por customer_marts.
+SEED = [
+    "dim_persona", "dim_vehiculo", "fact_transaccion", "fact_campana_respuesta",
+    "feat_cliente_persona", "feat_cliente_vehiculo", "seg_cliente_360", "score_clv",
+    "reco_acciones", "map_persona_canal8", "map_persona_fuente", "map_persona_vehiculo",
+]
+intel_involved = set()
+external_sources = set()
+stack = list(SEED)
+while stack:
+    t = stack.pop()
+    if t in intel_involved:
+        continue
+    intel_involved.add(t)
+    b = builds.get(t)
+    if not b:
+        continue
+    for ref in b["refs"]:
+        ds, tbl = ref.split(".", 1)
+        if ds == "clientes_intel":
+            if tbl not in intel_involved:
+                stack.append(tbl)
+        else:
+            external_sources.add(ref)
+
+print("== tablas clientes_intel implicadas en el linaje de customer_marts ==")
+for t in sorted(intel_involved):
+    b = builds.get(t, {})
+    print(f"  {t:26s} {b.get('stmt','(sin job)')}")
+
+print("\n== FUENTES EXTERNAS (fuera de clientes_intel) usadas por el pipeline ==")
+for s in sorted(external_sources):
+    print(f"  {s}")
+
+# Marcar las fuentes de CLIENTE que pide el usuario.
+KEYS = ["customer", "customers", "cliente", "user", "usuario", "tpv", "salesforce",
+        "sf_", "contact", "mkt_cloud", "persona"]
+print("\n== fuentes que parecen de CLIENTE/usuario ==")
+for s in sorted(external_sources):
+    low = s.lower()
+    if any(k in low for k in KEYS):
+        print(f"  {s}")
+
+json.dump({
+    "intel_involved": sorted(intel_involved),
+    "external_sources": sorted(external_sources),
+}, open("scratchpad/intel_lineage.json", "w"), indent=2, ensure_ascii=False)