diff --git a/app.md b/app.md index 3b6315e..139309e 100644 --- a/app.md +++ b/app.md @@ -45,6 +45,28 @@ python_runtime_deps: - certifi - urllib3 - cryptography + +# Validacion end-to-end (fase 4 del bucle reactivo). Ver issue 0068. +# C++ ImGui app: build con cmake, smoke via --self-test, tests pytest WSL. +e2e_checks: + - id: build + cmd: "cmake --build build --target graph_explorer -j" + timeout_s: 300 + expect_exit: 0 + - id: tests_pytest_wsl + cmd: "cd tests && python3 -m pytest -x -q" + timeout_s: 180 + expect_exit: 0 + - id: smoke_self_test + cmd: "./build/graph_explorer --self-test" + timeout_s: 30 + expect_exit: 0 + - id: enricher_fetch_webpage + cmd: "./build/graph_explorer --run-enricher fetch_webpage --target https://example.com --json" + timeout_s: 60 + expect_stdout_contains: "\"status\":\"done\"" + - id: ops_audit + ref: "fn-recopilador:projects/osint_graph/apps/graph_explorer" --- ## Arquitectura diff --git a/cdp-cli/README.md b/cdp-cli/README.md new file mode 100644 index 0000000..9d6bb96 --- /dev/null +++ b/cdp-cli/README.md @@ -0,0 +1,73 @@ +# cdp-cli + +Wrapper Go de las funciones del dominio `browser` del registry. Subcomandos one-shot para hablar con una instancia de Chrome via CDP. + +Diseñado para coexistir con la navegacion humana: el binario NO mata al browser al salir, conecta, ejecuta y se va. Tu sigues navegando, los enrichers/agentes invocan `cdp-cli` cuando necesitan HTML post-JS, screenshots, evaluar JS, etc. + +Issue: [`0038-browser-launch-cdp-control.md`](../issues/0038-browser-launch-cdp-control.md) (fase 0038c). + +## Build + +```bash +cd projects/osint_graph/apps/graph_explorer/cdp-cli +go build -o cdp-cli . +``` + +## Funciones del registry envueltas + +| Subcomando | Funcion | +|---|---| +| `launch` | `chrome_launch_go_browser` | +| `navigate` | `cdp_navigate_go_browser` (+ `cdp_wait_load` opcional) | +| `get-html` | `cdp_get_html_go_browser` | +| `screenshot` | `cdp_screenshot_go_browser` | +| `evaluate` | `cdp_evaluate_go_browser` | +| `click` | `cdp_click_go_browser` | +| `type` | `cdp_type_text_go_browser` | +| `wait-load` | `cdp_wait_load_go_browser` | +| `wait-element` | `cdp_wait_element_go_browser` | +| `set-cookie` | `cdp_set_cookie_go_browser` | + +## Uso tipico + +```bash +# 1) Lanzar Chrome con perfil aislado del app +./cdp-cli launch \ + --port 9222 \ + --user-data-dir /home/lucas/fn_registry/projects/osint_graph/apps/graph_explorer/local_files/browser_profiles/default + +# Output: pid=12345 port=9222 + +# 2) Navegar (en otra terminal o desde otro proceso) +./cdp-cli navigate --port 9222 --url https://example.com + +# 3) HTML post-JS +./cdp-cli get-html --port 9222 > page.html + +# 4) Screenshot pagina completa +./cdp-cli screenshot --port 9222 --out /tmp/shot.png --full-page + +# 5) Eval JS +./cdp-cli evaluate --port 9222 --js "document.querySelectorAll('a').length" +``` + +## Modo "tu y yo a la vez" + +Una unica instancia de Chrome aceptando varios clientes CDP simultaneos: + +``` + ┌──────────────────────────────┐ +chrome --remote-debugging-port=9222 --user-data-dir=... │ + │ │ + ▼ ▼ + Tu navegando manualmente cdp-cli get-html + (extension issue 0014) (enricher / agente) +``` + +Cookies/login son los del profile → tu logueas LinkedIn una vez, los enrichers heredan la sesion. + +## Roadmap (fuera de v0) + +- `cdp-cli` lee `browser_sessions` de `graph_explorer.db` para resolver `--profile NAME` → puerto (fase 0038d, panel UI). +- Auto-launch si el profile pedido no esta vivo (0038g). +- Enricher `fetch_webpage_browser` que invoca `cdp-cli get-html` en lugar de HTTP plano (0038e). diff --git a/cdp-cli/cdp-cli b/cdp-cli/cdp-cli new file mode 100755 index 0000000..523ed92 Binary files /dev/null and b/cdp-cli/cdp-cli differ diff --git a/cdp-cli/go.mod b/cdp-cli/go.mod new file mode 100644 index 0000000..b908330 --- /dev/null +++ b/cdp-cli/go.mod @@ -0,0 +1,7 @@ +module cdp-cli + +go 1.25.0 + +require fn-registry v0.0.0-00010101000000-000000000000 + +replace fn-registry => ../../../../.. diff --git a/cdp-cli/main.go b/cdp-cli/main.go new file mode 100644 index 0000000..61a3f36 --- /dev/null +++ b/cdp-cli/main.go @@ -0,0 +1,463 @@ +// cdp-cli — wrapper de las funciones del registry domain `browser`. +// +// Subcomandos one-shot que abren conexion CDP, ejecutan accion y salen. +// El proceso Chrome NO se mata al cerrar — sigue vivo para que el usuario +// y otros clientes CDP (incluida la extension del issue 0014) sigan +// hablando con la misma instancia. CDP soporta multiples clientes sobre +// el mismo --remote-debugging-port. +// +// Uso tipico: +// +// cdp-cli launch --port 9222 --user-data-dir /path/to/profile +// cdp-cli navigate --port 9222 --url https://example.com +// cdp-cli get-html --port 9222 > page.html +// +// Issue: projects/osint_graph/apps/graph_explorer/issues/0038-browser-launch-cdp-control.md +package main + +import ( + "encoding/json" + "flag" + "fmt" + "os" + "time" + + "fn-registry/functions/browser" +) + +const usage = `cdp-cli — control de Chrome via CDP + +Subcomandos: + launch Lanza Chrome con --remote-debugging-port. Imprime "pid=N port=M". + navigate Page.navigate a la URL indicada. + get-html Imprime document.documentElement.outerHTML por stdout. + screenshot Page.captureScreenshot a archivo (--out). + evaluate Runtime.evaluate de la expresion (--js). Resultado por stdout. + click Click en selector CSS (--selector). + type Escribe texto en elemento activo (--text). + wait-load Espera document.readyState=='complete' (--timeout segundos). + wait-element Espera selector CSS (--selector, --timeout). + set-cookie Network.setCookie (--name, --value, --domain, [--path], [--http-only]). + find-by-text Localiza elemento por innerText (--text, [--tag], [--exact], [--case-sensitive]). Imprime selector CSS. + click-text find-by-text + click. Mismos flags que find-by-text. + har-record Captura trafico HTTP/WS durante navegacion. (--url, --out, [--settle-ms]). Output HAR 1.2 JSON. + list-tabs Lista pestañas/targets de la instancia. Salida JSON o tabla con --format=text. + new-tab Abre pestaña nueva (--url opcional). Imprime el id. + close-tab Cierra pestaña por id (--id). + activate-tab Pone pestaña en foreground (--id). + +Flags globales (todos los subcomandos excepto launch): + --port N Puerto CDP (default 9222) + --host H Host CDP (default localhost) + +Ejemplos: + cdp-cli launch --port 9222 --user-data-dir /tmp/cdp-profile + cdp-cli navigate --url https://example.com + cdp-cli get-html > page.html + cdp-cli screenshot --out /tmp/shot.png --full-page + cdp-cli evaluate --js "document.title" + cdp-cli click --selector "#submit" + cdp-cli wait-element --selector ".result" --timeout 10 +` + +func main() { + if len(os.Args) < 2 { + fmt.Fprint(os.Stderr, usage) + os.Exit(2) + } + cmd, args := os.Args[1], os.Args[2:] + switch cmd { + case "launch": + cmdLaunch(args) + case "navigate": + cmdNavigate(args) + case "get-html": + cmdGetHTML(args) + case "screenshot": + cmdScreenshot(args) + case "evaluate": + cmdEvaluate(args) + case "click": + cmdClick(args) + case "type": + cmdType(args) + case "wait-load": + cmdWaitLoad(args) + case "wait-element": + cmdWaitElement(args) + case "set-cookie": + cmdSetCookie(args) + case "find-by-text": + cmdFindByText(args) + case "click-text": + cmdClickText(args) + case "har-record": + cmdHarRecord(args) + case "list-tabs": + cmdListTabs(args) + case "new-tab": + cmdNewTab(args) + case "close-tab": + cmdCloseTab(args) + case "activate-tab": + cmdActivateTab(args) + case "-h", "--help", "help": + fmt.Print(usage) + default: + fmt.Fprintf(os.Stderr, "unknown subcommand: %s\n\n%s", cmd, usage) + os.Exit(2) + } +} + +func dieF(format string, a ...any) { + fmt.Fprintf(os.Stderr, "cdp-cli: "+format+"\n", a...) + os.Exit(1) +} + +func mustConnect(host string, port int) *browser.CDPConn { + c, err := browser.CdpConnectHost(host, port) + if err != nil { + dieF("connect %s:%d: %v", host, port, err) + } + return c +} + +func addConnFlags(fs *flag.FlagSet) (*string, *int) { + host := fs.String("host", "localhost", "host CDP") + port := fs.Int("port", 9222, "puerto CDP") + return host, port +} + +// stringList implementa flag.Value para flags repetibles (--extra-arg foo --extra-arg bar). +type stringList []string + +func (s *stringList) String() string { return fmt.Sprint([]string(*s)) } +func (s *stringList) Set(v string) error { *s = append(*s, v); return nil } + +func cmdLaunch(args []string) { + fs := flag.NewFlagSet("launch", flag.ExitOnError) + port := fs.Int("port", 9222, "puerto remote-debugging") + userDataDir := fs.String("user-data-dir", "", "directorio de profile (default /tmp/chrome-cdp-profile)") + headless := fs.Bool("headless", false, "modo headless (--headless=new)") + chromePath := fs.String("chrome-path", "", "ruta a chrome.exe (auto si vacio)") + bindAddr := fs.String("bind-address", "", "valor para --remote-debugging-address (ej. 0.0.0.0 para WSL→Windows)") + var extra stringList + fs.Var(&extra, "extra-arg", "flag adicional pasado tal cual a chrome (repetible)") + _ = fs.Parse(args) + + extraArgs := []string(extra) + if *bindAddr != "" { + extraArgs = append(extraArgs, fmt.Sprintf("--remote-debugging-address=%s", *bindAddr)) + } + + pid, err := browser.ChromeLaunch(browser.ChromeLaunchOpts{ + Port: *port, + UserDataDir: *userDataDir, + Headless: *headless, + ChromePath: *chromePath, + ExtraArgs: extraArgs, + }) + if err != nil { + dieF("launch: %v", err) + } + fmt.Printf("pid=%d port=%d\n", pid, *port) +} + +func cmdNavigate(args []string) { + fs := flag.NewFlagSet("navigate", flag.ExitOnError) + host, port := addConnFlags(fs) + url := fs.String("url", "", "URL destino (obligatorio)") + wait := fs.Bool("wait-load", true, "esperar a que termine de cargar") + timeout := fs.Int("timeout", 30, "timeout de wait-load en segundos") + _ = fs.Parse(args) + if *url == "" { + dieF("--url obligatorio") + } + c := mustConnect(*host, *port) + defer browser.CdpClose(c, 0) + if err := browser.CdpNavigate(c, *url); err != nil { + dieF("navigate: %v", err) + } + if *wait { + if err := browser.CdpWaitLoad(c, time.Duration(*timeout)*time.Second); err != nil { + dieF("wait-load: %v", err) + } + } +} + +func cmdGetHTML(args []string) { + fs := flag.NewFlagSet("get-html", flag.ExitOnError) + host, port := addConnFlags(fs) + _ = fs.Parse(args) + c := mustConnect(*host, *port) + defer browser.CdpClose(c, 0) + html, err := browser.CdpGetHTML(c) + if err != nil { + dieF("get-html: %v", err) + } + fmt.Print(html) +} + +func cmdScreenshot(args []string) { + fs := flag.NewFlagSet("screenshot", flag.ExitOnError) + host, port := addConnFlags(fs) + out := fs.String("out", "", "archivo destino (.png o .jpg). Obligatorio.") + fullPage := fs.Bool("full-page", false, "capturar pagina completa") + format := fs.String("format", "png", "formato: png|jpeg") + quality := fs.Int("quality", 80, "calidad JPEG 1-100") + _ = fs.Parse(args) + if *out == "" { + dieF("--out obligatorio") + } + c := mustConnect(*host, *port) + defer browser.CdpClose(c, 0) + err := browser.CdpScreenshot(c, *out, browser.CdpScreenshotOpts{ + FullPage: *fullPage, + Format: *format, + Quality: *quality, + }) + if err != nil { + dieF("screenshot: %v", err) + } + fmt.Println(*out) +} + +func cmdEvaluate(args []string) { + fs := flag.NewFlagSet("evaluate", flag.ExitOnError) + host, port := addConnFlags(fs) + js := fs.String("js", "", "expresion JavaScript (obligatorio)") + _ = fs.Parse(args) + if *js == "" { + dieF("--js obligatorio") + } + c := mustConnect(*host, *port) + defer browser.CdpClose(c, 0) + res, err := browser.CdpEvaluate(c, *js) + if err != nil { + dieF("evaluate: %v", err) + } + fmt.Println(res) +} + +func cmdClick(args []string) { + fs := flag.NewFlagSet("click", flag.ExitOnError) + host, port := addConnFlags(fs) + selector := fs.String("selector", "", "selector CSS (obligatorio)") + _ = fs.Parse(args) + if *selector == "" { + dieF("--selector obligatorio") + } + c := mustConnect(*host, *port) + defer browser.CdpClose(c, 0) + if err := browser.CdpClick(c, *selector); err != nil { + dieF("click: %v", err) + } +} + +func cmdType(args []string) { + fs := flag.NewFlagSet("type", flag.ExitOnError) + host, port := addConnFlags(fs) + text := fs.String("text", "", "texto a escribir (obligatorio)") + _ = fs.Parse(args) + if *text == "" { + dieF("--text obligatorio") + } + c := mustConnect(*host, *port) + defer browser.CdpClose(c, 0) + if err := browser.CdpTypeText(c, *text); err != nil { + dieF("type: %v", err) + } +} + +func cmdWaitLoad(args []string) { + fs := flag.NewFlagSet("wait-load", flag.ExitOnError) + host, port := addConnFlags(fs) + timeout := fs.Int("timeout", 30, "timeout en segundos") + _ = fs.Parse(args) + c := mustConnect(*host, *port) + defer browser.CdpClose(c, 0) + if err := browser.CdpWaitLoad(c, time.Duration(*timeout)*time.Second); err != nil { + dieF("wait-load: %v", err) + } +} + +func cmdWaitElement(args []string) { + fs := flag.NewFlagSet("wait-element", flag.ExitOnError) + host, port := addConnFlags(fs) + selector := fs.String("selector", "", "selector CSS (obligatorio)") + timeout := fs.Int("timeout", 10, "timeout en segundos") + _ = fs.Parse(args) + if *selector == "" { + dieF("--selector obligatorio") + } + c := mustConnect(*host, *port) + defer browser.CdpClose(c, 0) + if err := browser.CdpWaitElement(c, *selector, time.Duration(*timeout)*time.Second); err != nil { + dieF("wait-element: %v", err) + } +} + +func cmdFindByText(args []string) { + fs := flag.NewFlagSet("find-by-text", flag.ExitOnError) + host, port := addConnFlags(fs) + text := fs.String("text", "", "texto a localizar (obligatorio)") + tag := fs.String("tag", "", "filtrar por tag (button, a, ...)") + exact := fs.Bool("exact", false, "match exacto vs substring") + caseSensitive := fs.Bool("case-sensitive", false, "comparacion case-sensitive") + _ = fs.Parse(args) + if *text == "" { + dieF("--text obligatorio") + } + c := mustConnect(*host, *port) + defer browser.CdpClose(c, 0) + sel, err := browser.CdpFindByText(c, *text, browser.FindByTextOpts{ + Tag: *tag, Exact: *exact, CaseSensitive: *caseSensitive, + }) + if err != nil { + dieF("find-by-text: %v", err) + } + if sel == "" { + fmt.Fprintln(os.Stderr, "no encontrado") + os.Exit(2) + } + fmt.Println(sel) +} + +func cmdClickText(args []string) { + fs := flag.NewFlagSet("click-text", flag.ExitOnError) + host, port := addConnFlags(fs) + text := fs.String("text", "", "texto a clickar (obligatorio)") + tag := fs.String("tag", "", "filtrar por tag") + exact := fs.Bool("exact", false, "match exacto") + caseSensitive := fs.Bool("case-sensitive", false, "case-sensitive") + _ = fs.Parse(args) + if *text == "" { + dieF("--text obligatorio") + } + c := mustConnect(*host, *port) + defer browser.CdpClose(c, 0) + if err := browser.CdpClickText(c, *text, browser.FindByTextOpts{ + Tag: *tag, Exact: *exact, CaseSensitive: *caseSensitive, + }); err != nil { + dieF("click-text: %v", err) + } +} + +func cmdHarRecord(args []string) { + fs := flag.NewFlagSet("har-record", flag.ExitOnError) + host, port := addConnFlags(fs) + url := fs.String("url", "", "URL a navegar mientras se graba (vacio = graba sin navegar)") + out := fs.String("out", "", "archivo destino (vacio = stdout)") + settle := fs.Int("settle-ms", 1500, "ms a esperar tras la accion para eventos trailing") + loadTimeout := fs.Int("load-timeout", 20, "timeout en segundos para wait-load") + _ = fs.Parse(args) + c := mustConnect(*host, *port) + defer browser.CdpClose(c, 0) + har, err := browser.CdpHarRecord(c, func() error { + if *url == "" { + return nil + } + if err := browser.CdpNavigate(c, *url); err != nil { + return err + } + return browser.CdpWaitLoad(c, time.Duration(*loadTimeout)*time.Second) + }, *settle) + if err != nil { + fmt.Fprintln(os.Stderr, "har-record warning:", err) + } + if *out == "" { + fmt.Println(har) + } else { + if err := os.WriteFile(*out, []byte(har), 0644); err != nil { + dieF("har-record: write %s: %v", *out, err) + } + fmt.Println(*out) + } +} + +func cmdListTabs(args []string) { + fs := flag.NewFlagSet("list-tabs", flag.ExitOnError) + host, port := addConnFlags(fs) + format := fs.String("format", "json", "json|text") + onlyType := fs.String("type", "", "filtrar por type (page|iframe|service_worker|...)") + _ = fs.Parse(args) + tabs, err := browser.CdpListTabs(*host, *port) + if err != nil { + dieF("list-tabs: %v", err) + } + if *onlyType != "" { + filt := tabs[:0] + for _, t := range tabs { + if t.Type == *onlyType { + filt = append(filt, t) + } + } + tabs = filt + } + if *format == "text" { + for _, t := range tabs { + fmt.Printf("%s\t%s\t%s\t%s\n", t.ID, t.Type, t.Title, t.URL) + } + return + } + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + _ = enc.Encode(tabs) +} + +func cmdNewTab(args []string) { + fs := flag.NewFlagSet("new-tab", flag.ExitOnError) + host, port := addConnFlags(fs) + startURL := fs.String("url", "", "URL inicial (vacio = about:blank)") + _ = fs.Parse(args) + tab, err := browser.CdpNewTab(*host, *port, *startURL) + if err != nil { + dieF("new-tab: %v", err) + } + fmt.Printf("id=%s url=%s ws=%s\n", tab.ID, tab.URL, tab.WebSocketDebuggerURL) +} + +func cmdCloseTab(args []string) { + fs := flag.NewFlagSet("close-tab", flag.ExitOnError) + host, port := addConnFlags(fs) + id := fs.String("id", "", "id de la pestaña (obligatorio)") + _ = fs.Parse(args) + if *id == "" { + dieF("--id obligatorio") + } + if err := browser.CdpCloseTab(*host, *port, *id); err != nil { + dieF("close-tab: %v", err) + } +} + +func cmdActivateTab(args []string) { + fs := flag.NewFlagSet("activate-tab", flag.ExitOnError) + host, port := addConnFlags(fs) + id := fs.String("id", "", "id de la pestaña (obligatorio)") + _ = fs.Parse(args) + if *id == "" { + dieF("--id obligatorio") + } + if err := browser.CdpActivateTab(*host, *port, *id); err != nil { + dieF("activate-tab: %v", err) + } +} + +func cmdSetCookie(args []string) { + fs := flag.NewFlagSet("set-cookie", flag.ExitOnError) + host, port := addConnFlags(fs) + name := fs.String("name", "", "nombre cookie (obligatorio)") + value := fs.String("value", "", "valor cookie (obligatorio)") + domain := fs.String("domain", "", "dominio cookie (obligatorio)") + path := fs.String("path", "/", "path") + httpOnly := fs.Bool("http-only", true, "marca HttpOnly") + _ = fs.Parse(args) + if *name == "" || *value == "" || *domain == "" { + dieF("--name, --value y --domain obligatorios") + } + c := mustConnect(*host, *port) + defer browser.CdpClose(c, 0) + if err := browser.CdpSetCookie(c, *name, *value, *domain, *path, *httpOnly); err != nil { + dieF("set-cookie: %v", err) + } +}