docs(flows): DoD obligatorio con user-facing surface + abrir issues 0100-0103 (taxonomia, frontmatter migration, dev_console, work dashboard)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -41,9 +41,14 @@ def _ws_send_recv(ws, msg_id: int, method: str, params: dict, timeout: float = 1
|
||||
|
||||
|
||||
def _poll_selector(ws, selector: str, timeout_s: float = 10.0) -> bool:
|
||||
"""Polling cada 200ms hasta que document.querySelector(selector) no sea null."""
|
||||
"""Polling cada 200ms hasta que document.querySelector(selector) no sea null.
|
||||
|
||||
Drena eventos CDP (paginas con Page.enable emiten loads, frames, etc.) y
|
||||
matchea por `id` para evitar leer respuestas ajenas o eventos del server.
|
||||
"""
|
||||
deadline = time.time() + timeout_s
|
||||
msg_id = 1000
|
||||
ws.settimeout(0.5)
|
||||
while time.time() < deadline:
|
||||
ws.send(json.dumps({
|
||||
"id": msg_id,
|
||||
@@ -53,19 +58,28 @@ def _poll_selector(ws, selector: str, timeout_s: float = 10.0) -> bool:
|
||||
"returnByValue": True,
|
||||
}
|
||||
}))
|
||||
time.sleep(0.2)
|
||||
msg_id += 1
|
||||
# Leer respuesta en loop simple (websocket-client sync)
|
||||
# Para modo sync usamos recv()
|
||||
try:
|
||||
raw = ws.sock.recv()
|
||||
if raw:
|
||||
# Leer hasta 30 frames buscando uno con nuestro id; ignorar eventos.
|
||||
got_response = False
|
||||
for _ in range(30):
|
||||
try:
|
||||
raw = ws.recv()
|
||||
except Exception:
|
||||
break
|
||||
if not raw:
|
||||
break
|
||||
try:
|
||||
msg = json.loads(raw)
|
||||
except Exception:
|
||||
continue
|
||||
if msg.get("id") == msg_id:
|
||||
got_response = True
|
||||
val = msg.get("result", {}).get("result", {}).get("value", False)
|
||||
if val:
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
break
|
||||
msg_id += 1
|
||||
if not got_response:
|
||||
time.sleep(0.2)
|
||||
return False
|
||||
|
||||
|
||||
@@ -188,16 +202,114 @@ def cdp_extract_recipe(
|
||||
out_path = output_cfg.get("path", "output.json")
|
||||
with open(out_path, "w", encoding="utf-8") as f:
|
||||
json.dump(rows, f, ensure_ascii=False, indent=2)
|
||||
elif sink == "duckdb":
|
||||
duckdb_path = output_cfg.get("duckdb_path", "")
|
||||
table_name = output_cfg.get("table", "")
|
||||
if not duckdb_path or not table_name:
|
||||
# not fatal: rows already returned via sample_rows
|
||||
pass
|
||||
else:
|
||||
import duckdb
|
||||
import uuid
|
||||
import datetime
|
||||
# resolve duckdb_path relative to FN_REGISTRY_ROOT if not absolute
|
||||
if not os.path.isabs(duckdb_path):
|
||||
duckdb_path = os.path.join(os.environ.get("FN_REGISTRY_ROOT", ""), duckdb_path)
|
||||
os.makedirs(os.path.dirname(duckdb_path), exist_ok=True)
|
||||
conn = duckdb.connect(duckdb_path)
|
||||
try:
|
||||
if rows:
|
||||
# Detect columns from first row keys (assumes list of dicts).
|
||||
if not isinstance(rows[0], dict):
|
||||
# Fallback: wrap scalar rows as {"value": v}.
|
||||
rows = [{"value": r} for r in rows]
|
||||
cols = list(rows[0].keys())
|
||||
# Build CREATE TABLE IF NOT EXISTS with VARCHAR for safety
|
||||
# plus extracted_at TIMESTAMP and run_id VARCHAR for lineage.
|
||||
col_defs = ", ".join(f'"{c}" VARCHAR' for c in cols)
|
||||
ddl = (
|
||||
f'CREATE TABLE IF NOT EXISTS "{table_name}" ('
|
||||
f' run_id VARCHAR, extracted_at TIMESTAMP, {col_defs}'
|
||||
f')'
|
||||
)
|
||||
conn.execute(ddl)
|
||||
run_id_str = uuid.uuid4().hex[:16]
|
||||
now_iso = datetime.datetime.utcnow().isoformat() + "Z"
|
||||
placeholders = ", ".join(["?"] * (len(cols) + 2))
|
||||
insert_sql = (
|
||||
f'INSERT INTO "{table_name}" '
|
||||
f'(run_id, extracted_at, {", ".join(chr(34) + c + chr(34) for c in cols)}) '
|
||||
f'VALUES ({placeholders})'
|
||||
)
|
||||
for r in rows:
|
||||
vals = [run_id_str, now_iso] + [str(r.get(c, "")) for c in cols]
|
||||
conn.execute(insert_sql, vals)
|
||||
# Also record into data_factory.runs with storage info
|
||||
registry_root = os.environ.get("FN_REGISTRY_ROOT", "")
|
||||
if registry_root and record_run:
|
||||
import sqlite3
|
||||
df_db = os.path.join(registry_root, "apps", "data_factory", "data_factory.db")
|
||||
if os.path.exists(df_db):
|
||||
try:
|
||||
df_conn = sqlite3.connect(df_db)
|
||||
df_conn.execute("PRAGMA foreign_keys = ON")
|
||||
trigger = "dag" if os.environ.get("DAGU_ENV") else "manual"
|
||||
db_id = output_cfg.get("database_id", recipe.get("name", "unknown") + "_db")
|
||||
df_run_id = uuid.uuid4().hex[:16]
|
||||
df_conn.execute(
|
||||
"INSERT INTO runs(id, node_id, started_at, finished_at, status,"
|
||||
" rows_in, rows_out, kb_in, kb_out, duration_ms, trigger, error, notes,"
|
||||
" storage_db_id, storage_table)"
|
||||
" VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
|
||||
(
|
||||
df_run_id, recipe.get("name", "unknown"),
|
||||
now_iso, now_iso, "success",
|
||||
0, rows_out, 0, int(round(kb_out)), duration_ms,
|
||||
trigger, "",
|
||||
json.dumps({"sample": sample_rows[:2]}, ensure_ascii=False)[:1000],
|
||||
db_id, table_name,
|
||||
),
|
||||
)
|
||||
df_conn.commit()
|
||||
df_conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
conn.close()
|
||||
elif sink == "data_factory.runs" and record_run:
|
||||
# Escribe DIRECTO a data_factory.db evitando spawn `fn run` (loop infinito
|
||||
# si data_factory_record_run re-ejecuta esta misma funcion). Confia en que
|
||||
# el node ya existe en `nodes` con id == recipe.name.
|
||||
try:
|
||||
from pipelines.data_factory_record_run import data_factory_record_run
|
||||
data_factory_record_run(
|
||||
node_id=recipe.get("name", "unknown"),
|
||||
function_id="cdp_extract_recipe_py_pipelines",
|
||||
args={"recipe_path": recipe_path, "debug_port": debug_port},
|
||||
import sqlite3
|
||||
import datetime
|
||||
import uuid
|
||||
registry_root = os.environ.get("FN_REGISTRY_ROOT", "").strip()
|
||||
if not registry_root:
|
||||
# No fatal — el dato ya fue extraido / impreso por otro sink
|
||||
raise RuntimeError("FN_REGISTRY_ROOT not set; cannot locate data_factory.db")
|
||||
db_path = os.path.join(registry_root, "apps", "data_factory", "data_factory.db")
|
||||
trigger = "dag" if os.environ.get("DAGU_ENV") else "manual"
|
||||
run_id = uuid.uuid4().hex[:16]
|
||||
now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
node_id = recipe.get("name", "unknown")
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.execute("PRAGMA foreign_keys = ON")
|
||||
conn.execute(
|
||||
"INSERT INTO runs(id, node_id, started_at, finished_at, status,"
|
||||
" rows_in, rows_out, kb_in, kb_out, duration_ms, trigger, error, notes)"
|
||||
" VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?)",
|
||||
(
|
||||
run_id, node_id, now, now, "success",
|
||||
0, rows_out, 0, int(round(kb_out)), duration_ms,
|
||||
trigger, "",
|
||||
json.dumps({"sample": sample_rows[:2]}, ensure_ascii=False)[:1000],
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
# No fatal — el dato ya fue extraido
|
||||
conn.commit()
|
||||
conn.close()
|
||||
except Exception:
|
||||
# No fatal — el dato ya fue extraido (sample_rows en retorno)
|
||||
pass
|
||||
|
||||
return {
|
||||
|
||||
Reference in New Issue
Block a user