docs(flows): DoD obligatorio con user-facing surface + abrir issues 0100-0103 (taxonomia, frontmatter migration, dev_console, work dashboard)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-17 00:07:03 +02:00
parent 212875ed0d
commit 5d2a14e50a
77 changed files with 4062 additions and 311 deletions
@@ -3,7 +3,7 @@ name: cdp_extract_recipe
 kind: pipeline
 lang: py
 domain: pipelines
-version: "1.0.0"
+version: "1.2.0"
 purity: impure
 signature: "def cdp_extract_recipe(recipe_path: str, debug_port: int = 9222, tab_id: str | None = None, record_run: bool = True) -> dict"
 description: "Ejecuta una recipe YAML contra Chrome remoto via CDP. Valida recipe, busca tab por url_pattern, ejecuta steps (wait_selector/js) y envia resultado al sink declarado."
@@ -22,7 +22,7 @@ params:
  - name: tab_id
    desc: "ID del tab a usar. Si None, busca tab cuyo URL matchee url_pattern de la recipe."
  - name: record_run
-    desc: "Si True y output.sink=='data_factory.runs', registra la ejecucion en data_factory."
+    desc: "Si True, registra la ejecucion en data_factory.runs (para sink 'data_factory.runs' y 'duckdb')."
 output: "dict {status: ok|error, rows_out: int, kb_out: float, duration_ms: int, error: str, sample_rows: list}"
 tested: false
 tests: []
@@ -60,6 +60,10 @@ output:

 Cuando tienes una recipe YAML validada y Chrome corriendo con remote debugging, y quieres extraer datos en un solo paso sin montar pipeline manualmente. Encadena con `cdp_open_url_and_wait` si necesitas abrir la URL primero.

+## Capability growth log
+
+- v1.2.0 (2026-05-16) — sink `duckdb` writes rows to a DuckDB file + registers run in data_factory.runs with storage_db_id/storage_table for traceability.
+
 ## Gotchas

 - Chrome debe estar corriendo con `--remote-debugging-port=<debug_port>`.
@@ -41,9 +41,14 @@ def _ws_send_recv(ws, msg_id: int, method: str, params: dict, timeout: float = 1


 def _poll_selector(ws, selector: str, timeout_s: float = 10.0) -> bool:
-    """Polling cada 200ms hasta que document.querySelector(selector) no sea null."""
+    """Polling cada 200ms hasta que document.querySelector(selector) no sea null.
+
+    Drena eventos CDP (paginas con Page.enable emiten loads, frames, etc.) y
+    matchea por `id` para evitar leer respuestas ajenas o eventos del server.
+    """
    deadline = time.time() + timeout_s
    msg_id = 1000
+    ws.settimeout(0.5)
    while time.time() < deadline:
        ws.send(json.dumps({
            "id": msg_id,
@@ -53,19 +58,28 @@ def _poll_selector(ws, selector: str, timeout_s: float = 10.0) -> bool:
                "returnByValue": True,
            }
        }))
-        time.sleep(0.2)
-        msg_id += 1
-        # Leer respuesta en loop simple (websocket-client sync)
-        # Para modo sync usamos recv()
-        try:
-            raw = ws.sock.recv()
-            if raw:
+        # Leer hasta 30 frames buscando uno con nuestro id; ignorar eventos.
+        got_response = False
+        for _ in range(30):
+            try:
+                raw = ws.recv()
+            except Exception:
+                break
+            if not raw:
+                break
+            try:
                msg = json.loads(raw)
+            except Exception:
+                continue
+            if msg.get("id") == msg_id:
+                got_response = True
                val = msg.get("result", {}).get("result", {}).get("value", False)
                if val:
                    return True
-        except Exception:
-            pass
+                break
+        msg_id += 1
+        if not got_response:
+            time.sleep(0.2)
    return False


@@ -188,16 +202,114 @@ def cdp_extract_recipe(
        out_path = output_cfg.get("path", "output.json")
        with open(out_path, "w", encoding="utf-8") as f:
            json.dump(rows, f, ensure_ascii=False, indent=2)
+    elif sink == "duckdb":
+        duckdb_path = output_cfg.get("duckdb_path", "")
+        table_name  = output_cfg.get("table", "")
+        if not duckdb_path or not table_name:
+            # not fatal: rows already returned via sample_rows
+            pass
+        else:
+            import duckdb
+            import uuid
+            import datetime
+            # resolve duckdb_path relative to FN_REGISTRY_ROOT if not absolute
+            if not os.path.isabs(duckdb_path):
+                duckdb_path = os.path.join(os.environ.get("FN_REGISTRY_ROOT", ""), duckdb_path)
+            os.makedirs(os.path.dirname(duckdb_path), exist_ok=True)
+            conn = duckdb.connect(duckdb_path)
+            try:
+                if rows:
+                    # Detect columns from first row keys (assumes list of dicts).
+                    if not isinstance(rows[0], dict):
+                        # Fallback: wrap scalar rows as {"value": v}.
+                        rows = [{"value": r} for r in rows]
+                    cols = list(rows[0].keys())
+                    # Build CREATE TABLE IF NOT EXISTS with VARCHAR for safety
+                    # plus extracted_at TIMESTAMP and run_id VARCHAR for lineage.
+                    col_defs = ", ".join(f'"{c}" VARCHAR' for c in cols)
+                    ddl = (
+                        f'CREATE TABLE IF NOT EXISTS "{table_name}" ('
+                        f'  run_id VARCHAR, extracted_at TIMESTAMP, {col_defs}'
+                        f')'
+                    )
+                    conn.execute(ddl)
+                    run_id_str = uuid.uuid4().hex[:16]
+                    now_iso = datetime.datetime.utcnow().isoformat() + "Z"
+                    placeholders = ", ".join(["?"] * (len(cols) + 2))
+                    insert_sql = (
+                        f'INSERT INTO "{table_name}" '
+                        f'(run_id, extracted_at, {", ".join(chr(34) + c + chr(34) for c in cols)}) '
+                        f'VALUES ({placeholders})'
+                    )
+                    for r in rows:
+                        vals = [run_id_str, now_iso] + [str(r.get(c, "")) for c in cols]
+                        conn.execute(insert_sql, vals)
+                    # Also record into data_factory.runs with storage info
+                    registry_root = os.environ.get("FN_REGISTRY_ROOT", "")
+                    if registry_root and record_run:
+                        import sqlite3
+                        df_db = os.path.join(registry_root, "apps", "data_factory", "data_factory.db")
+                        if os.path.exists(df_db):
+                            try:
+                                df_conn = sqlite3.connect(df_db)
+                                df_conn.execute("PRAGMA foreign_keys = ON")
+                                trigger = "dag" if os.environ.get("DAGU_ENV") else "manual"
+                                db_id = output_cfg.get("database_id", recipe.get("name", "unknown") + "_db")
+                                df_run_id = uuid.uuid4().hex[:16]
+                                df_conn.execute(
+                                    "INSERT INTO runs(id, node_id, started_at, finished_at, status,"
+                                    " rows_in, rows_out, kb_in, kb_out, duration_ms, trigger, error, notes,"
+                                    " storage_db_id, storage_table)"
+                                    " VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
+                                    (
+                                        df_run_id, recipe.get("name", "unknown"),
+                                        now_iso, now_iso, "success",
+                                        0, rows_out, 0, int(round(kb_out)), duration_ms,
+                                        trigger, "",
+                                        json.dumps({"sample": sample_rows[:2]}, ensure_ascii=False)[:1000],
+                                        db_id, table_name,
+                                    ),
+                                )
+                                df_conn.commit()
+                                df_conn.close()
+                            except Exception:
+                                pass
+            finally:
+                conn.close()
    elif sink == "data_factory.runs" and record_run:
+        # Escribe DIRECTO a data_factory.db evitando spawn `fn run` (loop infinito
+        # si data_factory_record_run re-ejecuta esta misma funcion). Confia en que
+        # el node ya existe en `nodes` con id == recipe.name.
        try:
-            from pipelines.data_factory_record_run import data_factory_record_run
-            data_factory_record_run(
-                node_id=recipe.get("name", "unknown"),
-                function_id="cdp_extract_recipe_py_pipelines",
-                args={"recipe_path": recipe_path, "debug_port": debug_port},
+            import sqlite3
+            import datetime
+            import uuid
+            registry_root = os.environ.get("FN_REGISTRY_ROOT", "").strip()
+            if not registry_root:
+                # No fatal — el dato ya fue extraido / impreso por otro sink
+                raise RuntimeError("FN_REGISTRY_ROOT not set; cannot locate data_factory.db")
+            db_path = os.path.join(registry_root, "apps", "data_factory", "data_factory.db")
+            trigger = "dag" if os.environ.get("DAGU_ENV") else "manual"
+            run_id = uuid.uuid4().hex[:16]
+            now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ")
+            node_id = recipe.get("name", "unknown")
+            conn = sqlite3.connect(db_path)
+            conn.execute("PRAGMA foreign_keys = ON")
+            conn.execute(
+                "INSERT INTO runs(id, node_id, started_at, finished_at, status,"
+                " rows_in, rows_out, kb_in, kb_out, duration_ms, trigger, error, notes)"
+                " VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?)",
+                (
+                    run_id, node_id, now, now, "success",
+                    0, rows_out, 0, int(round(kb_out)), duration_ms,
+                    trigger, "",
+                    json.dumps({"sample": sample_rows[:2]}, ensure_ascii=False)[:1000],
+                ),
            )
-        except Exception as e:
-            # No fatal — el dato ya fue extraido
+            conn.commit()
+            conn.close()
+        except Exception:
+            # No fatal — el dato ya fue extraido (sample_rows en retorno)
            pass

    return {
@@ -0,0 +1,60 @@
+---
+name: dedup_duckdb_table_by_hash
+kind: pipeline
+lang: py
+domain: pipelines
+purity: impure
+version: "1.0.0"
+signature: "def dedup_duckdb_table_by_hash(duckdb_path: str, table: str, exclude_cols: list[str] | None = None) -> dict"
+description: "Elimina filas duplicadas de una tabla DuckDB calculando un md5 de las columnas de datos. Anade columna row_hash idempotentemente, actualiza hashes nulos y borra duplicados conservando la primera insercion por rowid."
+tags: [dedup, duckdb, transformer, pipeline, dataops]
+uses_functions: [cdp_extract_recipe_py_pipelines]
+uses_types: []
+returns: []
+returns_optional: false
+error_type: error_go_core
+imports: [duckdb]
+tested: true
+tests:
+  - "dedup elimina filas duplicadas y conserva unicas"
+test_file_path: "python/functions/pipelines/dedup_duckdb_table_by_hash_test.py"
+file_path: "python/functions/pipelines/dedup_duckdb_table_by_hash.py"
+params:
+  - name: duckdb_path
+    desc: "Ruta DuckDB file (absoluta o relativa a FN_REGISTRY_ROOT)."
+  - name: table
+    desc: "Nombre tabla a deduplicar."
+  - name: exclude_cols
+    desc: "Cols a excluir del hash (metadata como run_id, extracted_at, row_hash). None usa default [run_id, extracted_at, row_hash]."
+output: "dict {status, rows_before, rows_after, dedup_removed, duration_ms, hash_column}"
+---
+
+## Ejemplo
+
+```python
+from pipelines.dedup_duckdb_table_by_hash import dedup_duckdb_table_by_hash
+
+r = dedup_duckdb_table_by_hash("apps/data_factory/data/hn_top_stories.duckdb", "hn_stories")
+print(r)
+# {"status": "ok", "rows_before": 120, "rows_after": 30, "dedup_removed": 90, "duration_ms": 45, "hash_column": "row_hash"}
+```
+
+CLI directo:
+
+```bash
+/home/lucas/fn_registry/python/.venv/bin/python3 \
+  python/functions/pipelines/dedup_duckdb_table_by_hash.py \
+  apps/data_factory/data/hn_top_stories.duckdb hn_stories
+```
+
+## Cuando usarla
+
+Cuando un extractor periodico re-inserta filas iguales (mismo contenido, distinto `run_id`/`extracted_at`) y quieres deduplicar in-place sin tocar el pipeline upstream. Tipicamente como paso `transformer` despues de `cdp_extract_recipe` en un DAG de scraping.
+
+## Gotchas
+
+- **rowid y VACUUM**: DuckDB rowid puede recalcularse tras `VACUUM`. En esta funcion solo se usa dentro de la misma transaccion de DELETE, por lo que no hay inconsistencia practica.
+- **Colisiones md5**: md5 no colisiona en practica para tablas de escala HN (miles de filas). Si la tabla crece a millones de filas con datos binarios, cambiar `md5(...)` por `sha256(...)` en el SQL.
+- **Tabla inexistente**: si `<table>` no existe en el DuckDB, retorna `status=error` con mensaje descriptivo en lugar de lanzar excepcion.
+- **exclude_cols case**: la comparacion de columnas excluidas es case-insensitive (`c.lower()`), pero el nombre en la query se usa tal cual lo devuelve `DESCRIBE`.
+- **Primera ejecucion**: si la tabla ya tiene `row_hash` de una ejecucion anterior, solo se actualizan las filas con `row_hash IS NULL` (idempotente).
@@ -0,0 +1,141 @@
+"""dedup_duckdb_table_by_hash — Remove duplicate rows from a DuckDB table using md5 hash of data columns."""
+
+from __future__ import annotations
+
+import os
+import time
+from typing import Any
+
+
+def dedup_duckdb_table_by_hash(
+    duckdb_path: str,
+    table: str,
+    exclude_cols: list[str] | None = None,
+) -> dict[str, Any]:
+    """Remove duplicate rows from a DuckDB table by computing md5 hash of data columns.
+
+    Args:
+        duckdb_path: Path to DuckDB file. Absolute or relative to FN_REGISTRY_ROOT.
+        table: Table name to deduplicate.
+        exclude_cols: Columns to exclude from hash computation (metadata cols).
+                      Defaults to ["run_id", "extracted_at", "row_hash"].
+
+    Returns:
+        dict with keys: status, rows_before, rows_after, dedup_removed,
+        duration_ms, hash_column.
+    """
+    import duckdb  # type: ignore
+
+    t0 = time.monotonic()
+
+    # Resolve path against FN_REGISTRY_ROOT if relative
+    if not os.path.isabs(duckdb_path):
+        root = os.environ.get("FN_REGISTRY_ROOT", os.getcwd())
+        duckdb_path = os.path.join(root, duckdb_path)
+
+    if exclude_cols is None:
+        exclude_cols = ["run_id", "extracted_at", "row_hash"]
+
+    exclude_set = {c.lower() for c in exclude_cols}
+
+    conn = duckdb.connect(duckdb_path)
+    try:
+        # Verify table exists
+        tables = [r[0] for r in conn.execute("SHOW TABLES").fetchall()]
+        if table not in tables:
+            return {
+                "status": "error",
+                "error": f"Table '{table}' not found in {duckdb_path}. Available: {tables}",
+                "rows_before": 0,
+                "rows_after": 0,
+                "dedup_removed": 0,
+                "duration_ms": int((time.monotonic() - t0) * 1000),
+                "hash_column": "row_hash",
+            }
+
+        # Introspect columns
+        desc = conn.execute(f'DESCRIBE "{table}"').fetchall()
+        all_cols = [r[0] for r in desc]
+        existing_col_names_lower = {c.lower() for c in all_cols}
+
+        # Add row_hash column if missing (idempotent)
+        if "row_hash" not in existing_col_names_lower:
+            conn.execute(f'ALTER TABLE "{table}" ADD COLUMN row_hash VARCHAR')
+            all_cols.append("row_hash")
+            existing_col_names_lower.add("row_hash")
+
+        # Data columns = all columns minus excluded
+        data_cols = [c for c in all_cols if c.lower() not in exclude_set]
+
+        if not data_cols:
+            return {
+                "status": "error",
+                "error": "No data columns remaining after exclusion.",
+                "rows_before": 0,
+                "rows_after": 0,
+                "dedup_removed": 0,
+                "duration_ms": int((time.monotonic() - t0) * 1000),
+                "hash_column": "row_hash",
+            }
+
+        # Build md5 expression: md5(col1 || '\t' || col2 || ...)
+        # Each col: COALESCE(CAST("colname" AS VARCHAR), '')
+        parts = " || '\t' || ".join(
+            f"COALESCE(CAST(\"{c}\" AS VARCHAR), '')" for c in data_cols
+        )
+        hash_expr = f"md5({parts})"
+
+        # Update row_hash where NULL
+        conn.execute(
+            f'UPDATE "{table}" SET row_hash = {hash_expr} WHERE row_hash IS NULL'
+        )
+
+        # Count rows before dedup
+        rows_before = conn.execute(f'SELECT count(*) FROM "{table}"').fetchone()[0]
+
+        # Delete duplicates, keeping row with smallest rowid (earliest insert)
+        conn.execute(
+            f"""
+            DELETE FROM "{table}"
+            WHERE rowid NOT IN (
+                SELECT min(rowid) FROM "{table}" GROUP BY row_hash
+            )
+            """
+        )
+
+        # Count rows after dedup
+        rows_after = conn.execute(f'SELECT count(*) FROM "{table}"').fetchone()[0]
+
+    finally:
+        conn.close()
+
+    duration_ms = int((time.monotonic() - t0) * 1000)
+    dedup_removed = rows_before - rows_after
+
+    return {
+        "status": "ok",
+        "rows_before": rows_before,
+        "rows_after": rows_after,
+        "dedup_removed": dedup_removed,
+        "duration_ms": duration_ms,
+        "hash_column": "row_hash",
+    }
+
+
+if __name__ == "__main__":
+    import argparse
+    import json
+
+    parser = argparse.ArgumentParser(description="Dedup a DuckDB table by row hash.")
+    parser.add_argument("duckdb_path", help="Path to DuckDB file")
+    parser.add_argument("table", help="Table name to deduplicate")
+    parser.add_argument(
+        "--exclude-cols",
+        nargs="*",
+        default=None,
+        help="Columns to exclude from hash (default: run_id extracted_at row_hash)",
+    )
+    args = parser.parse_args()
+
+    result = dedup_duckdb_table_by_hash(args.duckdb_path, args.table, args.exclude_cols)
+    print(json.dumps(result, indent=2))
@@ -0,0 +1,95 @@
+"""Tests para dedup_duckdb_table_by_hash."""
+
+from __future__ import annotations
+
+import os
+import tempfile
+
+import duckdb
+import pytest
+
+from pipelines.dedup_duckdb_table_by_hash import dedup_duckdb_table_by_hash
+
+
+def _make_test_db(path: str) -> None:
+    """Create a test DuckDB with 5 rows: 3 unique data, 2 duplicates."""
+    conn = duckdb.connect(path)
+    conn.execute(
+        """
+        CREATE TABLE stories (
+            run_id      VARCHAR,
+            extracted_at TIMESTAMP,
+            rank        INTEGER,
+            title       VARCHAR,
+            url         VARCHAR,
+            points      INTEGER
+        )
+        """
+    )
+    conn.execute(
+        """
+        INSERT INTO stories VALUES
+          ('run-001', '2026-05-16 10:00:00', 1, 'Story A', 'https://a.com', 100),
+          ('run-001', '2026-05-16 10:00:00', 2, 'Story B', 'https://b.com', 200),
+          ('run-001', '2026-05-16 10:00:00', 3, 'Story C', 'https://c.com', 300),
+          ('run-002', '2026-05-16 10:30:00', 1, 'Story A', 'https://a.com', 100),
+          ('run-002', '2026-05-16 10:30:00', 2, 'Story B', 'https://b.com', 200)
+        """
+    )
+    conn.close()
+
+
+def test_dedup_elimina_filas_duplicadas_y_conserva_unicas():
+    """dedup elimina filas duplicadas y conserva unicas"""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        db_path = os.path.join(tmpdir, "test.duckdb")
+        _make_test_db(db_path)
+
+        result = dedup_duckdb_table_by_hash(db_path, "stories")
+
+        assert result["status"] == "ok", f"Expected ok, got: {result}"
+        assert result["rows_before"] == 5
+        assert result["rows_after"] == 3, f"Expected 3 unique rows, got {result['rows_after']}"
+        assert result["dedup_removed"] == 2
+        assert result["hash_column"] == "row_hash"
+        assert result["duration_ms"] >= 0
+
+        # Verify row_hash column exists and is populated
+        conn = duckdb.connect(db_path)
+        hashes = conn.execute("SELECT DISTINCT row_hash FROM stories").fetchall()
+        conn.close()
+        assert len(hashes) == 3, f"Expected 3 distinct hashes, got {len(hashes)}"
+        # All hashes should be non-null
+        assert all(h[0] is not None for h in hashes), "Some row_hash values are NULL"
+
+
+def test_dedup_idempotente():
+    """Running dedup twice leaves rows_after unchanged."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        db_path = os.path.join(tmpdir, "test.duckdb")
+        _make_test_db(db_path)
+
+        r1 = dedup_duckdb_table_by_hash(db_path, "stories")
+        r2 = dedup_duckdb_table_by_hash(db_path, "stories")
+
+        assert r1["status"] == "ok"
+        assert r2["status"] == "ok"
+        assert r2["rows_before"] == 3
+        assert r2["rows_after"] == 3
+        assert r2["dedup_removed"] == 0
+
+
+def test_dedup_tabla_inexistente():
+    """Returns status=error when table does not exist."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        db_path = os.path.join(tmpdir, "empty.duckdb")
+        conn = duckdb.connect(db_path)
+        conn.close()
+
+        result = dedup_duckdb_table_by_hash(db_path, "nonexistent_table")
+        assert result["status"] == "error"
+        assert "nonexistent_table" in result["error"]
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
@@ -0,0 +1,66 @@
+---
+name: regenerate_app_icons
+kind: pipeline
+lang: py
+domain: pipelines
+version: "1.0.0"
+purity: impure
+signature: "def regenerate_app_icons(only: list[str] | None = None) -> dict"
+description: "Escanea todas las apps C++ del registry, lee el bloque `icon: {phosphor, accent}` de cada app.md y regenera el appicon.ico via generate_app_icon. Reemplaza el script ad-hoc dev/gen_app_icons.py."
+tags: [cpp-windows, icon, phosphor, batch]
+uses_functions: [generate_app_icon_py_infra]
+uses_types: []
+returns: []
+returns_optional: false
+error_type: "error_go_core"
+imports: [os, sys, pathlib, typing, yaml]
+params:
+  - name: only
+    desc: "Lista opcional de nombres de app (campo `name` del frontmatter) a procesar. Si None, regenera todas las apps C++ con icon: declarado."
+output: "dict {ok: [name], skipped: [{name, reason}], failed: [{name, error}]}"
+tested: false
+tests: []
+test_file_path: ""
+file_path: "python/functions/pipelines/regenerate_app_icons.py"
+---
+
+## Ejemplo
+
+```bash
+# Regenerar todas las apps C++ con icon: declarado
+./fn run regenerate_app_icons
+
+# Solo una app
+./fn run regenerate_app_icons chart_demo
+
+# Varias apps
+./fn run regenerate_app_icons chart_demo registry_dashboard
+```
+
+```python
+import sys
+sys.path.insert(0, "python/functions")
+from pipelines.regenerate_app_icons import regenerate_app_icons
+
+result = regenerate_app_icons()
+print(f"OK: {len(result['ok'])}, FAIL: {len(result['failed'])}")
+```
+
+Bloque `icon:` esperado en `app.md`:
+```yaml
+icon:
+  phosphor: "chart-bar"
+  accent: "#0ea5e9"
+```
+
+## Cuando usarla
+
+Cuando anades una app C++ nueva (anades `icon:` a su `app.md` y corres el pipeline), cambias el color/glyph de una app existente, o pulleas cambios de iconos desde otra rama. Antes de `redeploy_cpp_app_windows` para que el `.exe` lleve el icono actualizado.
+
+## Gotchas
+
+- **Sobreescribe `appicon.ico` sin warning** — igual que `generate_app_icon`. Hacer backup si necesitas preservar version anterior.
+- **Requiere `sources/phosphor-core/`**: clonar con `git clone --depth=1 https://github.com/phosphor-icons/core.git sources/phosphor-core` si no existe.
+- **Solo procesa apps con `lang: cpp`** en frontmatter — apps Go/Python se ignoran aunque tengan `icon:`.
+- **Apps sin `icon:` se reportan en `skipped`**, no son error. Util para detectar apps C++ a las que falta declarar el icono.
+- **No invalida el cache de iconos de Windows** — si Explorer no muestra el icono nuevo tras redeploy: `ie4uinit.exe -show` o reiniciar Explorer.
@@ -0,0 +1,97 @@
+"""Regenera el appicon.ico de todas las apps C++ que declaren bloque icon: en su app.md."""
+
+import os
+import sys
+from pathlib import Path
+from typing import Optional
+
+import yaml
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+from infra.generate_app_icon import generate_app_icon
+
+
+def _find_registry_root() -> Path:
+    env_root = os.environ.get("FN_REGISTRY_ROOT")
+    if env_root:
+        return Path(env_root).resolve()
+    current = Path(__file__).resolve()
+    for parent in current.parents:
+        if (parent / "registry.db").exists():
+            return parent
+    raise FileNotFoundError("registry.db no encontrado; define FN_REGISTRY_ROOT")
+
+
+def _read_frontmatter(md_path: Path) -> Optional[dict]:
+    text = md_path.read_text(encoding="utf-8")
+    if not text.startswith("---"):
+        return None
+    end = text.find("\n---", 3)
+    if end < 0:
+        return None
+    try:
+        return yaml.safe_load(text[3:end])
+    except yaml.YAMLError:
+        return None
+
+
+def _iter_cpp_app_mds(root: Path):
+    for pattern in ("apps/*/app.md", "projects/*/apps/*/app.md"):
+        for md in sorted(root.glob(pattern)):
+            fm = _read_frontmatter(md)
+            if not fm or fm.get("lang") != "cpp":
+                continue
+            yield md, fm
+
+
+def regenerate_app_icons(only: Optional[list[str]] = None) -> dict:
+    """Recorre apps C++ con bloque icon: en su frontmatter y regenera appicon.ico.
+
+    Args:
+        only: Lista opcional de nombres de app a filtrar (campo `name`). Si None,
+            procesa todas las apps C++ con `icon:` declarado.
+
+    Returns:
+        dict con keys: ok (list[str]), skipped (list[dict]), failed (list[dict]).
+    """
+    root = _find_registry_root()
+    ok, skipped, failed = [], [], []
+
+    for md, fm in _iter_cpp_app_mds(root):
+        name = fm.get("name", md.parent.name)
+        if only and name not in only:
+            continue
+        icon = fm.get("icon")
+        if not icon or not isinstance(icon, dict):
+            skipped.append({"name": name, "reason": "no icon: block"})
+            continue
+        phosphor = icon.get("phosphor")
+        accent = icon.get("accent")
+        if not phosphor or not accent:
+            skipped.append({"name": name, "reason": "icon: missing phosphor/accent"})
+            continue
+        out_ico = md.parent / "appicon.ico"
+        try:
+            generate_app_icon(
+                phosphor_icon_name=phosphor,
+                accent_hex=accent,
+                out_ico_path=str(out_ico),
+            )
+            ok.append(name)
+        except Exception as e:
+            failed.append({"name": name, "error": str(e)})
+
+    return {"ok": ok, "skipped": skipped, "failed": failed}
+
+
+if __name__ == "__main__":
+    only = sys.argv[1:] or None
+    result = regenerate_app_icons(only=only)
+    for name in result["ok"]:
+        print(f"OK   {name}")
+    for s in result["skipped"]:
+        print(f"SKIP {s['name']}: {s['reason']}")
+    for f in result["failed"]:
+        print(f"FAIL {f['name']}: {f['error']}")
+    sys.exit(1 if result["failed"] else 0)