commit 6ecaf9a96960a92b09e51527e6a42004b55c8a85 Author: agent Date: Sat Jun 6 10:57:13 2026 +0200 feat: browser_mcp — servidor MCP de control de navegador CDP (33 tools + pool de conexiones) diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6eab868 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/browser_mcp +*.log diff --git a/README.md b/README.md new file mode 100644 index 0000000..48156da --- /dev/null +++ b/README.md @@ -0,0 +1,75 @@ +# browser_mcp + +MCP server (Go) that exposes the registry's CDP browser-control functions +(`fn-registry/functions/browser`) as MCP tools. Drive a live Chrome/Chromium over the +Chrome DevTools Protocol: navigate, read the DOM, click, manage cookies, evaluate +JavaScript, operate iframes, and persist/restore session state. + +33 tools total, grouped by domain. See `app.md` for the full per-tool reference and the +"Omitido en v1" section. + +## Build + +```bash +cd projects/web_scraping/apps/browser_mcp +go mod tidy # first time only +go build -o browser_mcp . +``` + +`browser_mcp` only imports `fn-registry/functions/browser` (no sqlite/cgo), so a plain +`go build` works. If transitive deps ever require it, fall back to +`CGO_ENABLED=1 go build -tags fts5 -o browser_mcp .`. + +## Architecture: live CDP connection pool + +Unlike `registry_mcp` (one DB handle), `browser_mcp` keeps a **pool of live CDP +connections** keyed by port. A CDP connection is a live WebSocket session to a "page" +tab; reusing it avoids paying the ~50-200ms handshake on every tool and preserves state +between tools (e.g. the persistent dialog auto-handler armed by `handle_dialog`). The +pool retries once on a dead-connection error (Chrome may have closed the tab between +tools). See `pool.go` and `deps.withConn` in `main.go`. + +## Register in Claude Code + +Add to a `.mcp.json` (the project's `projects/web_scraping/.mcp.json` already has it): + +```json +{ + "mcpServers": { + "browser": { + "command": "/home/enmanuel/fn_registry/projects/web_scraping/apps/browser_mcp/browser_mcp", + "args": [] + } + } +} +``` + +For an inspection-only session that cannot mutate browser state, pass `"args": ["--read-only"]`. + +## Transports + +- **stdio** (default) — for MCP clients. +- **HTTP** — `./browser_mcp --http :7740` (Streamable HTTP). `--bind 0.0.0.0` requires + `REGISTRY_API_TOKEN` (bearer auth). + +## Example session + +Assuming a Chrome already running with `--remote-debugging-port=9222` (or call +`browser_launch` first), a typical agent flow: + +``` +browser_launch { "port": 9222, "url": "https://example.com" } # -> "launched pid=... port=9222" +browser_connect { "port": 9222 } # -> "connected port=9222" +tab_navigate { "port": 9222, "url": "https://example.com" } +page_wait_load { "port": 9222, "timeout_ms": 10000 } +page_get_html { "port": 9222 } # -> serialized HTML (truncated 200k) +dom_find_by_text { "port": 9222, "text": "More information" } # -> "a" / "#id" selector +dom_click { "port": 9222, "selector": "a" } +page_eval_js { "port": 9222, "expression": "document.title" } # -> page title +page_screenshot { "port": 9222, "path": "/tmp/example.png", "full_page": true } +browser_disconnect{ "port": 9222 } +``` + +Cookies, iframes (`frame_list` -> `frame_eval`/`frame_get_html`), keyboard/scroll +(`press_key`, `scroll`), JS dialogs (`handle_dialog`), and session persistence +(`storage_save` / `storage_load`) follow the same per-port pattern. diff --git a/app.md b/app.md new file mode 100644 index 0000000..6a0082b --- /dev/null +++ b/app.md @@ -0,0 +1,174 @@ +--- +name: browser_mcp +lang: go +domain: infra +version: 0.1.0 +description: "Servidor MCP que expone control total del navegador via CDP (33 tools: navegación, DOM, cookies, iframes, teclado/scroll, diálogos, estado de sesión) reusando funciones del dominio browser del registry con un pool de conexiones CDP vivas." +tags: [mcp, browser, cdp, automation, scraping] +uses_functions: + - chrome_launch_go_browser + - cdp_connect_go_browser + - cdp_close_go_browser + - cdp_navigate_go_browser + - cdp_list_tabs_go_browser + - cdp_new_tab_go_browser + - cdp_close_tab_go_browser + - cdp_activate_tab_go_browser + - cdp_nav_back_go_browser + - cdp_nav_forward_go_browser + - cdp_wait_load_go_browser + - cdp_wait_idle_go_browser + - cdp_get_html_go_browser + - cdp_evaluate_go_browser + - cdp_screenshot_go_browser + - cdp_click_go_browser + - cdp_click_human_go_browser + - cdp_click_text_go_browser + - cdp_type_text_go_browser + - cdp_find_by_text_go_browser + - cdp_wait_element_go_browser + - cdp_press_key_go_browser + - cdp_scroll_go_browser + - cdp_handle_dialog_go_browser + - cdp_set_cookie_go_browser + - cdp_get_cookies_go_browser + - cdp_delete_cookies_go_browser + - cdp_clear_cookies_go_browser + - cdp_list_frames_go_browser + - cdp_eval_in_frame_go_browser + - cdp_get_frame_html_go_browser + - cdp_save_storage_state_go_browser + - cdp_load_storage_state_go_browser +uses_types: [] +framework: "" +entry_point: "main.go" +dir_path: "projects/web_scraping/apps/browser_mcp" +repo_url: "" +--- + +# browser_mcp + +Servidor MCP (Model Context Protocol) en Go que expone el control de navegador via CDP +del registry `fn_registry` como tools MCP. Cualquier cliente MCP (Claude Code, otros +agentes) puede manejar un Chrome/Chromium vivo: navegar, leer el DOM, hacer clicks, +gestionar cookies, evaluar JavaScript, operar iframes y persistir/restaurar sesiones. + +Clona el patrón de `apps/registry_mcp/` (librería `github.com/mark3labs/mcp-go` v0.52.0, +`server.NewMCPServer` + `server.ServeStdio`, tools con `mcp.NewTool` + handlers tipados +via `mcp.NewTypedToolHandler`, transporte stdio por defecto + HTTP opcional con `--http`, +slog a stderr porque stdout pertenece al JSON-RPC). + +## Arquitectura: pool de conexiones CDP + +A diferencia de `registry_mcp` (que abre la DB una vez), `browser_mcp` mantiene un +**pool de conexiones CDP vivas** indexado por puerto (`pool.go`). Razón: +`browser.CdpConnect(port)` hace un handshake WebSocket contra una tab "page" de Chrome +(~50-200ms) y esa conexión ES una sesión viva (soporta `Page.*`, `Runtime.*`, `Input.*`). +El agente llama muchas tools seguidas (navigate → wait → click → eval); reconectar en +cada tool pagaría el handshake repetidamente y perdería estado entre tools (los event +handlers persistentes, como el de `handle_dialog`, viven mientras la conexión esté viva). +Por eso reusamos la conexión por puerto. + +- `connPool.get(port)` devuelve la conexión cacheada o abre una nueva. +- `connPool.drop(port)` cancela el handler de diálogo (si lo hay) y cierra la conexión. +- `connPool.setCancel(port, cancel)` registra el cancel del auto-handler de `handle_dialog`. +- `connPool.closeAll()` se ejecuta con `defer` en `main()`. +- `deps.withConn(port, fn)` ejecuta `fn` con la conexión del pool y, si el error indica + conexión muerta (`isConnErr`: connection close, broken pipe, use of closed, ws read, EOF), + descarta la conexión y reintenta UNA vez (Chrome pudo cerrar la tab entre tools). + +Toda tool con argumento `port` usa `portOr(a.Port)` (default 9222). Las tools de tabs +(`tab_list`, `tab_new`, `tab_close`, `tab_activate`) usan el endpoint HTTP `/json` de CDP +directamente (host `localhost`), no el pool, porque no requieren una sesión WebSocket viva. + +## Tools (33) + +### Sesión (`tools_session.go`) +- `browser_launch` (MUTA) — lanza Chrome con CDP. args: port, headless, user_data_dir, url. +- `browser_connect` — abre/poolea la conexión CDP del puerto. args: port. +- `browser_disconnect` — cierra y descarta la conexión del puerto (no mata Chrome). args: port. + +### Navegación + tabs (`tools_nav.go`) +- `tab_navigate` (MUTA) — `Page.navigate`. args: port, url. +- `tab_list` — lista targets via `GET /json`. args: port. +- `tab_new` (MUTA) — abre tab via `PUT /json/new`. args: port, url. +- `tab_close` (MUTA) — cierra tab por ID. args: port, tab_id. +- `tab_activate` — pone tab en foreground. args: port, tab_id. +- `nav_back` (MUTA) — atrás en el historial. args: port. +- `nav_forward` (MUTA) — adelante en el historial. args: port. +- `page_wait_load` — espera el evento load. args: port, timeout_ms (default 10000). +- `page_wait_idle` — espera red idle. args: port, timeout_ms (default 15000). + +### Lectura (`tools_read.go`) +- `page_get_html` — HTML serializado (truncado a 200000 chars). args: port. +- `page_eval_js` (MUTA) — `Runtime.evaluate`. args: port, expression. +- `page_screenshot` — captura a archivo. args: port, path, full_page. + +### DOM (`tools_dom.go`) +- `dom_click` (MUTA) — click por selector. args: port, selector. +- `dom_click_human` (MUTA) — click con movimiento humano. args: port, selector. +- `dom_click_text` (MUTA) — click sobre el primer elemento con ese texto. args: port, text. +- `dom_type` (MUTA) — escribe texto en el elemento enfocado. args: port, text. +- `dom_find_by_text` — devuelve un selector CSS único para un texto visible. args: port, text. +- `dom_wait_element` — espera a que aparezca un selector. args: port, selector, timeout_ms (default 10000). + +### Input (`tools_input.go`) — todas MUTA +- `press_key` — presiona una tecla nombrada (Enter/Tab/Escape/ArrowDown/...). args: port, key. +- `scroll` — scroll por (delta_x, delta_y). args: port, delta_x (default 0), delta_y (default 300). +- `handle_dialog` — arma un auto-handler de diálogos JS (vive en la conexión del pool). args: port, accept (default true), prompt_text. + +### Cookies (`tools_cookies.go`) +- `cookie_get` — todas las cookies como JSON. args: port. +- `cookie_set` (MUTA) — set cookie. args: port, name, value, domain, path, http_only. +- `cookie_delete` (MUTA) — borra cookies por nombre. args: port, name, domain. +- `cookie_clear` (MUTA) — borra todas las cookies. args: port. + +### Iframes (`tools_frames.go`) +- `frame_list` — lista frames con sus IDs. args: port. +- `frame_eval` (MUTA) — evalúa JS dentro de un frame. args: port, frame_id, expression. +- `frame_get_html` — HTML de un frame (truncado a 200000). args: port, frame_id. + +### Estado de sesión (`tools_storage.go`) +- `storage_save` — guarda cookies + localStorage a JSON. args: port, path. +- `storage_load` (MUTA) — carga cookies + localStorage desde JSON. args: port, path. + +## Cómo lanzarlo + +Transporte stdio (default, para clientes MCP): + +```bash +cd projects/web_scraping/apps/browser_mcp +go build -o browser_mcp . +./browser_mcp +``` + +Transporte HTTP (Streamable HTTP): + +```bash +./browser_mcp --http :7740 # bind 127.0.0.1:7740 +./browser_mcp --http :7740 --bind 0.0.0.0 # requiere REGISTRY_API_TOKEN (bearer auth) +``` + +### Flag `--read-only` + +Con `--read-only`, el servidor NO registra las tools mutantes (marcadas MUTA arriba): +solo expone las 14 tools de lectura (`browser_connect`, `browser_disconnect`, `tab_list`, +`tab_activate`, `page_wait_load`, `page_wait_idle`, `page_get_html`, `page_screenshot`, +`dom_find_by_text`, `dom_wait_element`, `cookie_get`, `frame_list`, `frame_get_html`, +`storage_save`). Útil para sesiones de inspección sin riesgo de modificar el estado del +navegador. + +## Omitido en v1 + +Funciones del dominio `browser` que NO se exponen como tools en esta versión, con su razón: + +- **`cdp_har_record_go_browser`** — graba el tráfico de red (HAR). Requiere un callback de + larga duración (registrar handlers + un punto de "stop" que devuelve los datos + acumulados); no encaja en el modelo request/response de una tool MCP simple. Pendiente + de un diseño con tool de start + tool de stop. +- **`cdp_get_ax_tree`** — el árbol de accesibilidad se obtiene hoy via un pipeline Python; + futuro a exponer via `fn run` en vez de duplicar la lógica aquí. +- **Funciones de perfiles Chrome (Bash: create/delete/appearance/reset)** — requieren que + Chrome esté CERRADO para modificar el `Local State` / `Preferences` del perfil; son + incompatibles con un MCP cuyo propósito es controlar un Chrome vivo. Quedan disponibles + como `fn run` aparte. diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..163ca18 --- /dev/null +++ b/go.mod @@ -0,0 +1,19 @@ +module browser_mcp + +go 1.25.5 + +replace fn-registry => ../../../.. + +require ( + fn-registry v0.0.0-00010101000000-000000000000 + github.com/mark3labs/mcp-go v0.52.0 +) + +require ( + github.com/google/jsonschema-go v0.4.2 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 // indirect + github.com/spf13/cast v1.7.1 // indirect + github.com/yosida95/uritemplate/v3 v3.0.2 // indirect + golang.org/x/text v0.37.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..0ac432d --- /dev/null +++ b/go.sum @@ -0,0 +1,34 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dlclark/regexp2 v1.11.0 h1:G/nrcoOa7ZXlpoa/91N3X7mM3r8eIlMBBJZvsz/mxKI= +github.com/dlclark/regexp2 v1.11.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= +github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= +github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/jsonschema-go v0.4.2 h1:tmrUohrwoLZZS/P3x7ex0WAVknEkBZM46iALbcqoRA8= +github.com/google/jsonschema-go v0.4.2/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/mark3labs/mcp-go v0.52.0 h1:uRSzupNSUyPGDpF4owY5X4zEpACPwBnlM3FAFuXN6gQ= +github.com/mark3labs/mcp-go v0.52.0/go.mod h1:Zg9cB2HdwdMMVgY0xtTzq3KvYIOJQDsaut+jWjwDaQY= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= +github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= +github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 h1:KRzFb2m7YtdldCEkzs6KqmJw4nqEVZGK7IN2kJkjTuQ= +github.com/santhosh-tekuri/jsonschema/v6 v6.0.2/go.mod h1:JXeL+ps8p7/KNMjDQk3TCwPpBy0wYklyWTfbkIzdIFU= +github.com/spf13/cast v1.7.1 h1:cuNEagBQEHWN1FnbGEjCXL2szYEXqfJPbP2HNUaca9Y= +github.com/spf13/cast v1.7.1/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4= +github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4= +golang.org/x/text v0.37.0 h1:Cqjiwd9eSg8e0QAkyCaQTNHFIIzWtidPahFWR83rTrc= +golang.org/x/text v0.37.0/go.mod h1:a5sjxXGs9hsn/AJVwuElvCAo9v8QYLzvavO5z2PiM38= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/main.go b/main.go new file mode 100644 index 0000000..c7aff80 --- /dev/null +++ b/main.go @@ -0,0 +1,174 @@ +package main + +import ( + "flag" + "fmt" + "log/slog" + "net/http" + "os" + "strings" + + "github.com/mark3labs/mcp-go/server" + + "fn-registry/functions/browser" +) + +const version = "0.1.0" + +type config struct { + httpAddr string + bind string + readOnly bool + logLevel string +} + +// deps carries shared state into tool handlers. +type deps struct { + pool *connPool + readOnly bool +} + +func main() { + var cfg config + flag.StringVar(&cfg.httpAddr, "http", "", "Listen on HTTP address (e.g. :7740). Empty = stdio.") + flag.StringVar(&cfg.bind, "bind", "127.0.0.1", "HTTP bind address. Use 0.0.0.0 only with REGISTRY_API_TOKEN set.") + flag.BoolVar(&cfg.readOnly, "read-only", false, "Register only read tools (no mutating browser actions).") + flag.StringVar(&cfg.logLevel, "log-level", "info", "Log level: debug, info, warn, error.") + flag.Parse() + + // Slog → stderr (stdio JSON-RPC owns stdout). + lvl := parseLevel(cfg.logLevel) + slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: lvl}))) + + pool := newConnPool() + defer pool.closeAll() + + d := &deps{pool: pool, readOnly: cfg.readOnly} + + srv := server.NewMCPServer( + "browser_mcp", + version, + server.WithToolCapabilities(true), + ) + + registerTools(srv, d) + + slog.Info("starting browser_mcp", + "version", version, + "transport", transportLabel(cfg), + "read_only", cfg.readOnly, + ) + + if cfg.httpAddr == "" { + if err := server.ServeStdio(srv); err != nil { + slog.Error("stdio server", "err", err) + os.Exit(1) + } + return + } + + if err := serveHTTP(srv, cfg); err != nil { + slog.Error("http server", "err", err) + os.Exit(1) + } +} + +// registerTools wires every tool group. Mutating tools are skipped under --read-only. +func registerTools(s *server.MCPServer, d *deps) { + registerSessionTools(s, d) + registerNavTools(s, d) + registerReadTools(s, d) + registerDomTools(s, d) + registerInputTools(s, d) + registerCookieTools(s, d) + registerFrameTools(s, d) + registerStorageTools(s, d) +} + +// portOr returns the CDP port, defaulting to 9222 when zero. +func portOr(p int) int { + if p == 0 { + return 9222 + } + return p +} + +// withConn obtiene la conexión del puerto y ejecuta fn. Si falla con error de +// conexión muerta, descarta y reintenta UNA vez (Chrome pudo cerrar la tab). +func (d *deps) withConn(port int, fn func(c *browser.CDPConn) error) error { + c, err := d.pool.get(port) + if err != nil { + return err + } + err = fn(c) + if err != nil && isConnErr(err) { + d.pool.drop(port) + c2, err2 := d.pool.get(port) + if err2 != nil { + return err2 + } + return fn(c2) + } + return err +} + +// serveHTTP hosts the MCP server over Streamable HTTP with optional bearer auth. +func serveHTTP(s *server.MCPServer, cfg config) error { + addr := cfg.bind + cfg.httpAddr + + httpSrv := server.NewStreamableHTTPServer(s) + + token := os.Getenv("REGISTRY_API_TOKEN") + if cfg.bind == "0.0.0.0" && token == "" { + return fmt.Errorf("--bind 0.0.0.0 requires REGISTRY_API_TOKEN") + } + + mux := http.NewServeMux() + if token != "" { + mux.Handle("/", authMiddleware(token, httpSrv)) + } else { + mux.Handle("/", httpSrv) + } + + slog.Info("listening http", "addr", addr) + return http.ListenAndServe(addr, mux) +} + +func authMiddleware(token string, next http.Handler) http.Handler { + expected := "Bearer " + token + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Header.Get("Authorization") != expected { + http.Error(w, "unauthorized", http.StatusUnauthorized) + return + } + next.ServeHTTP(w, r) + }) +} + +func transportLabel(cfg config) string { + if cfg.httpAddr == "" { + return "stdio" + } + return fmt.Sprintf("http %s%s", cfg.bind, cfg.httpAddr) +} + +func parseLevel(s string) slog.Level { + switch strings.ToLower(s) { + case "debug": + return slog.LevelDebug + case "warn": + return slog.LevelWarn + case "error": + return slog.LevelError + default: + return slog.LevelInfo + } +} + +// truncate caps a string at n chars, appending a marker when cut. +func truncate(s string, n int) string { + if len(s) <= n { + return s + } + return s[:n] + "\n... [truncated]" +} diff --git a/pool.go b/pool.go new file mode 100644 index 0000000..eb22eae --- /dev/null +++ b/pool.go @@ -0,0 +1,79 @@ +package main + +import ( + "strings" + "sync" + + "fn-registry/functions/browser" +) + +// connPool reusa conexiones CDP entre invocaciones de tools. Clave = puerto CDP. +// Una conexión = una sesión viva a una tab "page". Mantenerla evita pagar el +// handshake WebSocket en cada tool y preserva estado (event handlers, contexto). +type connPool struct { + mu sync.Mutex + conns map[int]*browser.CDPConn + cancels map[int]func() // cancels de handlers persistentes (handle_dialog) +} + +func newConnPool() *connPool { + return &connPool{conns: map[int]*browser.CDPConn{}, cancels: map[int]func(){}} +} + +func (p *connPool) get(port int) (*browser.CDPConn, error) { + p.mu.Lock() + defer p.mu.Unlock() + if c, ok := p.conns[port]; ok && c != nil { + return c, nil + } + c, err := browser.CdpConnect(port) + if err != nil { + return nil, err + } + p.conns[port] = c + return c, nil +} + +func (p *connPool) drop(port int) { + p.mu.Lock() + defer p.mu.Unlock() + if cancel, ok := p.cancels[port]; ok && cancel != nil { + cancel() + delete(p.cancels, port) + } + if c, ok := p.conns[port]; ok && c != nil { + _ = browser.CdpClose(c, 0) + delete(p.conns, port) + } +} + +func (p *connPool) setCancel(port int, cancel func()) { + p.mu.Lock() + defer p.mu.Unlock() + if old := p.cancels[port]; old != nil { + old() + } + p.cancels[port] = cancel +} + +func (p *connPool) closeAll() { + p.mu.Lock() + defer p.mu.Unlock() + for port, c := range p.conns { + if cancel := p.cancels[port]; cancel != nil { + cancel() + } + if c != nil { + _ = browser.CdpClose(c, 0) + } + } + p.conns = map[int]*browser.CDPConn{} + p.cancels = map[int]func(){} +} + +// isConnErr reconoce errores de conexión CDP muerta para reintentar UNA vez. +func isConnErr(err error) bool { + s := err.Error() + return strings.Contains(s, "connection close") || strings.Contains(s, "broken pipe") || + strings.Contains(s, "use of closed") || strings.Contains(s, "ws read") || strings.Contains(s, "EOF") +} diff --git a/tools_cookies.go b/tools_cookies.go new file mode 100644 index 0000000..22f4566 --- /dev/null +++ b/tools_cookies.go @@ -0,0 +1,145 @@ +package main + +import ( + "context" + "encoding/json" + + "github.com/mark3labs/mcp-go/mcp" + "github.com/mark3labs/mcp-go/server" + + "fn-registry/functions/browser" +) + +// registerCookieTools wires cookie_get (read) + set/delete/clear (MUTA). +func registerCookieTools(s *server.MCPServer, d *deps) { + s.AddTool(cookieGetTool(), mcp.NewTypedToolHandler(d.handleCookieGet)) + + if !d.readOnly { + s.AddTool(cookieSetTool(), mcp.NewTypedToolHandler(d.handleCookieSet)) + s.AddTool(cookieDeleteTool(), mcp.NewTypedToolHandler(d.handleCookieDelete)) + s.AddTool(cookieClearTool(), mcp.NewTypedToolHandler(d.handleCookieClear)) + } +} + +// ---- cookie_get ---- + +type cookieGetArgs struct { + Port int `json:"port"` +} + +func cookieGetTool() mcp.Tool { + return mcp.NewTool("cookie_get", + mcp.WithDescription("Return all browser cookies (Network.getAllCookies) as JSON."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + ) +} + +func (d *deps) handleCookieGet(_ context.Context, _ mcp.CallToolRequest, a cookieGetArgs) (*mcp.CallToolResult, error) { + var cookies []browser.CdpCookie + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + var e error + cookies, e = browser.CdpGetCookies(c) + return e + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + b, _ := json.MarshalIndent(cookies, "", " ") + return mcp.NewToolResultText(string(b)), nil +} + +// ---- cookie_set (MUTA) ---- + +type cookieSetArgs struct { + Port int `json:"port"` + Name string `json:"name"` + Value string `json:"value"` + Domain string `json:"domain"` + Path string `json:"path"` + HTTPOnly bool `json:"http_only"` +} + +func cookieSetTool() mcp.Tool { + return mcp.NewTool("cookie_set", + mcp.WithDescription("Set a cookie via Network.setCookie."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("name", mcp.Required(), mcp.Description("Cookie name.")), + mcp.WithString("value", mcp.Description("Cookie value.")), + mcp.WithString("domain", mcp.Required(), mcp.Description("Cookie domain.")), + mcp.WithString("path", mcp.Description("Cookie path. Default /.")), + mcp.WithBoolean("http_only", mcp.Description("Mark the cookie HttpOnly.")), + ) +} + +func (d *deps) handleCookieSet(_ context.Context, _ mcp.CallToolRequest, a cookieSetArgs) (*mcp.CallToolResult, error) { + if a.Name == "" { + return mcp.NewToolResultError("name is required"), nil + } + if a.Domain == "" { + return mcp.NewToolResultError("domain is required"), nil + } + path := a.Path + if path == "" { + path = "/" + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpSetCookie(c, a.Name, a.Value, a.Domain, path, a.HTTPOnly) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("cookie set: " + a.Name), nil +} + +// ---- cookie_delete (MUTA) ---- + +type cookieDeleteArgs struct { + Port int `json:"port"` + Name string `json:"name"` + Domain string `json:"domain"` +} + +func cookieDeleteTool() mcp.Tool { + return mcp.NewTool("cookie_delete", + mcp.WithDescription("Delete cookies by name (optionally scoped to a domain) via Network.deleteCookies."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("name", mcp.Required(), mcp.Description("Cookie name to delete.")), + mcp.WithString("domain", mcp.Description("Optional domain scope.")), + ) +} + +func (d *deps) handleCookieDelete(_ context.Context, _ mcp.CallToolRequest, a cookieDeleteArgs) (*mcp.CallToolResult, error) { + if a.Name == "" { + return mcp.NewToolResultError("name is required"), nil + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpDeleteCookies(c, a.Name, a.Domain) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("cookie deleted: " + a.Name), nil +} + +// ---- cookie_clear (MUTA) ---- + +type cookieClearArgs struct { + Port int `json:"port"` +} + +func cookieClearTool() mcp.Tool { + return mcp.NewTool("cookie_clear", + mcp.WithDescription("Clear all browser cookies via Network.clearBrowserCookies."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + ) +} + +func (d *deps) handleCookieClear(_ context.Context, _ mcp.CallToolRequest, a cookieClearArgs) (*mcp.CallToolResult, error) { + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpClearCookies(c) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("cookies cleared"), nil +} diff --git a/tools_dom.go b/tools_dom.go new file mode 100644 index 0000000..47236db --- /dev/null +++ b/tools_dom.go @@ -0,0 +1,201 @@ +package main + +import ( + "context" + "time" + + "github.com/mark3labs/mcp-go/mcp" + "github.com/mark3labs/mcp-go/server" + + "fn-registry/functions/browser" +) + +// registerDomTools wires DOM interaction tools. find/wait stay on under --read-only. +func registerDomTools(s *server.MCPServer, d *deps) { + s.AddTool(domFindByTextTool(), mcp.NewTypedToolHandler(d.handleDomFindByText)) + s.AddTool(domWaitElementTool(), mcp.NewTypedToolHandler(d.handleDomWaitElement)) + + if !d.readOnly { + s.AddTool(domClickTool(), mcp.NewTypedToolHandler(d.handleDomClick)) + s.AddTool(domClickHumanTool(), mcp.NewTypedToolHandler(d.handleDomClickHuman)) + s.AddTool(domClickTextTool(), mcp.NewTypedToolHandler(d.handleDomClickText)) + s.AddTool(domTypeTool(), mcp.NewTypedToolHandler(d.handleDomType)) + } +} + +// ---- dom_click (MUTA) ---- + +type domClickArgs struct { + Port int `json:"port"` + Selector string `json:"selector"` +} + +func domClickTool() mcp.Tool { + return mcp.NewTool("dom_click", + mcp.WithDescription("Click the element matching the CSS selector (synthetic CDP click)."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("selector", mcp.Required(), mcp.Description("CSS selector of the element to click.")), + ) +} + +func (d *deps) handleDomClick(_ context.Context, _ mcp.CallToolRequest, a domClickArgs) (*mcp.CallToolResult, error) { + if a.Selector == "" { + return mcp.NewToolResultError("selector is required"), nil + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpClick(c, a.Selector) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("clicked " + a.Selector), nil +} + +// ---- dom_click_human (MUTA) ---- + +type domClickHumanArgs struct { + Port int `json:"port"` + Selector string `json:"selector"` +} + +func domClickHumanTool() mcp.Tool { + return mcp.NewTool("dom_click_human", + mcp.WithDescription("Click the element matching the CSS selector with human-like mouse movement (Bézier path + jitter + press/release pause)."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("selector", mcp.Required(), mcp.Description("CSS selector of the element to click.")), + ) +} + +func (d *deps) handleDomClickHuman(_ context.Context, _ mcp.CallToolRequest, a domClickHumanArgs) (*mcp.CallToolResult, error) { + if a.Selector == "" { + return mcp.NewToolResultError("selector is required"), nil + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpClickHuman(c, a.Selector, browser.MouseHumanOpts{}) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("clicked (human) " + a.Selector), nil +} + +// ---- dom_click_text (MUTA) ---- + +type domClickTextArgs struct { + Port int `json:"port"` + Text string `json:"text"` +} + +func domClickTextTool() mcp.Tool { + return mcp.NewTool("dom_click_text", + mcp.WithDescription("Find the first element whose visible text matches and click it."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("text", mcp.Required(), mcp.Description("Visible text to match (substring).")), + ) +} + +func (d *deps) handleDomClickText(_ context.Context, _ mcp.CallToolRequest, a domClickTextArgs) (*mcp.CallToolResult, error) { + if a.Text == "" { + return mcp.NewToolResultError("text is required"), nil + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpClickText(c, a.Text, browser.FindByTextOpts{}) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("clicked text " + a.Text), nil +} + +// ---- dom_type (MUTA) ---- + +type domTypeArgs struct { + Port int `json:"port"` + Text string `json:"text"` +} + +func domTypeTool() mcp.Tool { + return mcp.NewTool("dom_type", + mcp.WithDescription("Type text into the currently focused element (dispatches key events char by char)."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("text", mcp.Required(), mcp.Description("Text to type.")), + ) +} + +func (d *deps) handleDomType(_ context.Context, _ mcp.CallToolRequest, a domTypeArgs) (*mcp.CallToolResult, error) { + if a.Text == "" { + return mcp.NewToolResultError("text is required"), nil + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpTypeText(c, a.Text) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("typed text"), nil +} + +// ---- dom_find_by_text ---- + +type domFindByTextArgs struct { + Port int `json:"port"` + Text string `json:"text"` +} + +func domFindByTextTool() mcp.Tool { + return mcp.NewTool("dom_find_by_text", + mcp.WithDescription("Find the first element whose visible text matches and return a unique CSS selector for it (empty string if none)."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("text", mcp.Required(), mcp.Description("Visible text to match (substring).")), + ) +} + +func (d *deps) handleDomFindByText(_ context.Context, _ mcp.CallToolRequest, a domFindByTextArgs) (*mcp.CallToolResult, error) { + if a.Text == "" { + return mcp.NewToolResultError("text is required"), nil + } + var sel string + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + var e error + sel, e = browser.CdpFindByText(c, a.Text, browser.FindByTextOpts{}) + return e + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText(sel), nil +} + +// ---- dom_wait_element ---- + +type domWaitElementArgs struct { + Port int `json:"port"` + Selector string `json:"selector"` + TimeoutMs int `json:"timeout_ms"` +} + +func domWaitElementTool() mcp.Tool { + return mcp.NewTool("dom_wait_element", + mcp.WithDescription("Block until an element matching the CSS selector appears in the DOM (or timeout)."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("selector", mcp.Required(), mcp.Description("CSS selector to wait for.")), + mcp.WithNumber("timeout_ms", mcp.Description("Max wait in ms. Default 10000.")), + ) +} + +func (d *deps) handleDomWaitElement(_ context.Context, _ mcp.CallToolRequest, a domWaitElementArgs) (*mcp.CallToolResult, error) { + if a.Selector == "" { + return mcp.NewToolResultError("selector is required"), nil + } + timeout := a.TimeoutMs + if timeout <= 0 { + timeout = 10000 + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpWaitElement(c, a.Selector, time.Duration(timeout)*time.Millisecond) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("element appeared: " + a.Selector), nil +} diff --git a/tools_frames.go b/tools_frames.go new file mode 100644 index 0000000..e753ab9 --- /dev/null +++ b/tools_frames.go @@ -0,0 +1,115 @@ +package main + +import ( + "context" + "encoding/json" + + "github.com/mark3labs/mcp-go/mcp" + "github.com/mark3labs/mcp-go/server" + + "fn-registry/functions/browser" +) + +// registerFrameTools wires frame_list + frame_get_html (read) and frame_eval (MUTA). +func registerFrameTools(s *server.MCPServer, d *deps) { + s.AddTool(frameListTool(), mcp.NewTypedToolHandler(d.handleFrameList)) + s.AddTool(frameGetHTMLTool(), mcp.NewTypedToolHandler(d.handleFrameGetHTML)) + + if !d.readOnly { + s.AddTool(frameEvalTool(), mcp.NewTypedToolHandler(d.handleFrameEval)) + } +} + +// ---- frame_list ---- + +type frameListArgs struct { + Port int `json:"port"` +} + +func frameListTool() mcp.Tool { + return mcp.NewTool("frame_list", + mcp.WithDescription("List all frames (including iframes) of the current page via Page.getFrameTree. Returns JSON with frame IDs."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + ) +} + +func (d *deps) handleFrameList(_ context.Context, _ mcp.CallToolRequest, a frameListArgs) (*mcp.CallToolResult, error) { + var frames []browser.CdpFrame + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + var e error + frames, e = browser.CdpListFrames(c) + return e + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + b, _ := json.MarshalIndent(frames, "", " ") + return mcp.NewToolResultText(string(b)), nil +} + +// ---- frame_eval (MUTA) ---- + +type frameEvalArgs struct { + Port int `json:"port"` + FrameID string `json:"frame_id"` + Expression string `json:"expression"` +} + +func frameEvalTool() mcp.Tool { + return mcp.NewTool("frame_eval", + mcp.WithDescription("Evaluate a JavaScript expression inside a specific frame's execution context. Returns the stringified result."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("frame_id", mcp.Required(), mcp.Description("Frame ID (from frame_list).")), + mcp.WithString("expression", mcp.Required(), mcp.Description("JavaScript expression to evaluate.")), + ) +} + +func (d *deps) handleFrameEval(_ context.Context, _ mcp.CallToolRequest, a frameEvalArgs) (*mcp.CallToolResult, error) { + if a.FrameID == "" { + return mcp.NewToolResultError("frame_id is required"), nil + } + if a.Expression == "" { + return mcp.NewToolResultError("expression is required"), nil + } + var res string + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + var e error + res, e = browser.CdpEvalInFrame(c, a.FrameID, a.Expression) + return e + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText(truncate(res, htmlMax)), nil +} + +// ---- frame_get_html ---- + +type frameGetHTMLArgs struct { + Port int `json:"port"` + FrameID string `json:"frame_id"` +} + +func frameGetHTMLTool() mcp.Tool { + return mcp.NewTool("frame_get_html", + mcp.WithDescription("Return the serialized HTML of a specific frame. Truncated to 200000 chars."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("frame_id", mcp.Required(), mcp.Description("Frame ID (from frame_list).")), + ) +} + +func (d *deps) handleFrameGetHTML(_ context.Context, _ mcp.CallToolRequest, a frameGetHTMLArgs) (*mcp.CallToolResult, error) { + if a.FrameID == "" { + return mcp.NewToolResultError("frame_id is required"), nil + } + var html string + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + var e error + html, e = browser.CdpGetFrameHTML(c, a.FrameID) + return e + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText(truncate(html, htmlMax)), nil +} diff --git a/tools_input.go b/tools_input.go new file mode 100644 index 0000000..27df385 --- /dev/null +++ b/tools_input.go @@ -0,0 +1,110 @@ +package main + +import ( + "context" + + "github.com/mark3labs/mcp-go/mcp" + "github.com/mark3labs/mcp-go/server" + + "fn-registry/functions/browser" +) + +// registerInputTools wires press_key, scroll, handle_dialog. All MUTA. +func registerInputTools(s *server.MCPServer, d *deps) { + if d.readOnly { + return + } + s.AddTool(pressKeyTool(), mcp.NewTypedToolHandler(d.handlePressKey)) + s.AddTool(scrollTool(), mcp.NewTypedToolHandler(d.handleScroll)) + s.AddTool(handleDialogTool(), mcp.NewTypedToolHandler(d.handleHandleDialog)) +} + +// ---- press_key (MUTA) ---- + +type pressKeyArgs struct { + Port int `json:"port"` + Key string `json:"key"` +} + +func pressKeyTool() mcp.Tool { + return mcp.NewTool("press_key", + mcp.WithDescription("Press a named key (Enter, Tab, Escape, ArrowDown, Backspace, ...) on the focused element."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("key", mcp.Required(), mcp.Description("Key name, e.g. Enter, Tab, Escape, ArrowDown.")), + ) +} + +func (d *deps) handlePressKey(_ context.Context, _ mcp.CallToolRequest, a pressKeyArgs) (*mcp.CallToolResult, error) { + if a.Key == "" { + return mcp.NewToolResultError("key is required"), nil + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpPressKey(c, a.Key) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("pressed " + a.Key), nil +} + +// ---- scroll (MUTA) ---- + +type scrollArgs struct { + Port int `json:"port"` + DeltaX float64 `json:"delta_x"` + DeltaY float64 `json:"delta_y"` +} + +func scrollTool() mcp.Tool { + return mcp.NewTool("scroll", + mcp.WithDescription("Scroll the page by (delta_x, delta_y) pixels via a synthetic mouse wheel event."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithNumber("delta_x", mcp.Description("Horizontal scroll delta in pixels. Default 0.")), + mcp.WithNumber("delta_y", mcp.Description("Vertical scroll delta in pixels. Default 300.")), + ) +} + +func (d *deps) handleScroll(_ context.Context, _ mcp.CallToolRequest, a scrollArgs) (*mcp.CallToolResult, error) { + deltaY := a.DeltaY + if deltaY == 0 && a.DeltaX == 0 { + deltaY = 300 + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpScroll(c, a.DeltaX, deltaY) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("scrolled"), nil +} + +// ---- handle_dialog (MUTA) ---- + +type handleDialogArgs struct { + Port int `json:"port"` + Accept bool `json:"accept"` + PromptText string `json:"prompt_text"` +} + +func handleDialogTool() mcp.Tool { + return mcp.NewTool("handle_dialog", + mcp.WithDescription("Arm an auto-handler that responds to every JS dialog (alert/confirm/prompt/beforeunload) on the tab until disconnect. The handler lives in the pooled connection."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithBoolean("accept", mcp.DefaultBool(true), mcp.Description("Whether to accept (true) or dismiss (false) dialogs. Default true.")), + mcp.WithString("prompt_text", mcp.Description("Text to enter for prompt() dialogs.")), + ) +} + +func (d *deps) handleHandleDialog(_ context.Context, _ mcp.CallToolRequest, a handleDialogArgs) (*mcp.CallToolResult, error) { + port := portOr(a.Port) + c, err := d.pool.get(port) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + cancel, err := browser.CdpHandleDialog(c, a.Accept, a.PromptText) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + d.pool.setCancel(port, cancel) + return mcp.NewToolResultText("dialog auto-handler armed"), nil +} diff --git a/tools_nav.go b/tools_nav.go new file mode 100644 index 0000000..90b9d68 --- /dev/null +++ b/tools_nav.go @@ -0,0 +1,260 @@ +package main + +import ( + "context" + "encoding/json" + "time" + + "github.com/mark3labs/mcp-go/mcp" + "github.com/mark3labs/mcp-go/server" + + "fn-registry/functions/browser" +) + +// registerNavTools wires navigation + tab management + page-wait tools. +func registerNavTools(s *server.MCPServer, d *deps) { + // Tab tools use HTTP /json directly (no pool) — list/activate are read-only. + s.AddTool(tabListTool(), mcp.NewTypedToolHandler(d.handleTabList)) + s.AddTool(tabActivateTool(), mcp.NewTypedToolHandler(d.handleTabActivate)) + s.AddTool(pageWaitLoadTool(), mcp.NewTypedToolHandler(d.handlePageWaitLoad)) + s.AddTool(pageWaitIdleTool(), mcp.NewTypedToolHandler(d.handlePageWaitIdle)) + + if !d.readOnly { + s.AddTool(tabNavigateTool(), mcp.NewTypedToolHandler(d.handleTabNavigate)) + s.AddTool(tabNewTool(), mcp.NewTypedToolHandler(d.handleTabNew)) + s.AddTool(tabCloseTool(), mcp.NewTypedToolHandler(d.handleTabClose)) + s.AddTool(navBackTool(), mcp.NewTypedToolHandler(d.handleNavBack)) + s.AddTool(navForwardTool(), mcp.NewTypedToolHandler(d.handleNavForward)) + } +} + +// ---- tab_navigate (MUTA) ---- + +type tabNavigateArgs struct { + Port int `json:"port"` + URL string `json:"url"` +} + +func tabNavigateTool() mcp.Tool { + return mcp.NewTool("tab_navigate", + mcp.WithDescription("Navigate the connected tab to a URL via Page.navigate."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("url", mcp.Required(), mcp.Description("Target URL.")), + ) +} + +func (d *deps) handleTabNavigate(_ context.Context, _ mcp.CallToolRequest, a tabNavigateArgs) (*mcp.CallToolResult, error) { + if a.URL == "" { + return mcp.NewToolResultError("url is required"), nil + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpNavigate(c, a.URL) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("navigated to " + a.URL), nil +} + +// ---- tab_list ---- + +type tabListArgs struct { + Port int `json:"port"` +} + +func tabListTool() mcp.Tool { + return mcp.NewTool("tab_list", + mcp.WithDescription("List all CDP targets (tabs, iframes, workers) via GET /json. Returns JSON."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + ) +} + +func (d *deps) handleTabList(_ context.Context, _ mcp.CallToolRequest, a tabListArgs) (*mcp.CallToolResult, error) { + tabs, err := browser.CdpListTabs("localhost", portOr(a.Port)) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + b, _ := json.MarshalIndent(tabs, "", " ") + return mcp.NewToolResultText(string(b)), nil +} + +// ---- tab_new (MUTA) ---- + +type tabNewArgs struct { + Port int `json:"port"` + URL string `json:"url"` +} + +func tabNewTool() mcp.Tool { + return mcp.NewTool("tab_new", + mcp.WithDescription("Open a new tab via PUT /json/new. Returns the new tab's JSON."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("url", mcp.Description("Optional start URL. Empty = about:blank.")), + ) +} + +func (d *deps) handleTabNew(_ context.Context, _ mcp.CallToolRequest, a tabNewArgs) (*mcp.CallToolResult, error) { + tab, err := browser.CdpNewTab("localhost", portOr(a.Port), a.URL) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + b, _ := json.MarshalIndent(tab, "", " ") + return mcp.NewToolResultText(string(b)), nil +} + +// ---- tab_close (MUTA) ---- + +type tabCloseArgs struct { + Port int `json:"port"` + TabID string `json:"tab_id"` +} + +func tabCloseTool() mcp.Tool { + return mcp.NewTool("tab_close", + mcp.WithDescription("Close a tab by its target ID via GET /json/close/."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("tab_id", mcp.Required(), mcp.Description("Target ID of the tab to close.")), + ) +} + +func (d *deps) handleTabClose(_ context.Context, _ mcp.CallToolRequest, a tabCloseArgs) (*mcp.CallToolResult, error) { + if a.TabID == "" { + return mcp.NewToolResultError("tab_id is required"), nil + } + if err := browser.CdpCloseTab("localhost", portOr(a.Port), a.TabID); err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("closed tab " + a.TabID), nil +} + +// ---- tab_activate ---- + +type tabActivateArgs struct { + Port int `json:"port"` + TabID string `json:"tab_id"` +} + +func tabActivateTool() mcp.Tool { + return mcp.NewTool("tab_activate", + mcp.WithDescription("Bring a tab to the foreground via GET /json/activate/."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("tab_id", mcp.Required(), mcp.Description("Target ID of the tab to activate.")), + ) +} + +func (d *deps) handleTabActivate(_ context.Context, _ mcp.CallToolRequest, a tabActivateArgs) (*mcp.CallToolResult, error) { + if a.TabID == "" { + return mcp.NewToolResultError("tab_id is required"), nil + } + if err := browser.CdpActivateTab("localhost", portOr(a.Port), a.TabID); err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("activated tab " + a.TabID), nil +} + +// ---- nav_back (MUTA) ---- + +type navBackArgs struct { + Port int `json:"port"` +} + +func navBackTool() mcp.Tool { + return mcp.NewTool("nav_back", + mcp.WithDescription("Navigate back in the connected tab's history."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + ) +} + +func (d *deps) handleNavBack(_ context.Context, _ mcp.CallToolRequest, a navBackArgs) (*mcp.CallToolResult, error) { + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpNavBack(c) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("navigated back"), nil +} + +// ---- nav_forward (MUTA) ---- + +type navForwardArgs struct { + Port int `json:"port"` +} + +func navForwardTool() mcp.Tool { + return mcp.NewTool("nav_forward", + mcp.WithDescription("Navigate forward in the connected tab's history."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + ) +} + +func (d *deps) handleNavForward(_ context.Context, _ mcp.CallToolRequest, a navForwardArgs) (*mcp.CallToolResult, error) { + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpNavForward(c) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("navigated forward"), nil +} + +// ---- page_wait_load ---- + +type pageWaitLoadArgs struct { + Port int `json:"port"` + TimeoutMs int `json:"timeout_ms"` +} + +func pageWaitLoadTool() mcp.Tool { + return mcp.NewTool("page_wait_load", + mcp.WithDescription("Block until the page fires the load event (or timeout)."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithNumber("timeout_ms", mcp.Description("Max wait in ms. Default 10000.")), + ) +} + +func (d *deps) handlePageWaitLoad(_ context.Context, _ mcp.CallToolRequest, a pageWaitLoadArgs) (*mcp.CallToolResult, error) { + timeout := a.TimeoutMs + if timeout <= 0 { + timeout = 10000 + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpWaitLoad(c, time.Duration(timeout)*time.Millisecond) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("page loaded"), nil +} + +// ---- page_wait_idle ---- + +type pageWaitIdleArgs struct { + Port int `json:"port"` + TimeoutMs int `json:"timeout_ms"` +} + +func pageWaitIdleTool() mcp.Tool { + return mcp.NewTool("page_wait_idle", + mcp.WithDescription("Block until network activity quiets down (inflight requests reach 0 for a quiet window) or timeout. Immune to DOM-mutating extensions/animations."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithNumber("timeout_ms", mcp.Description("Max wait in ms. Default 15000.")), + ) +} + +func (d *deps) handlePageWaitIdle(_ context.Context, _ mcp.CallToolRequest, a pageWaitIdleArgs) (*mcp.CallToolResult, error) { + timeout := a.TimeoutMs + if timeout <= 0 { + timeout = 15000 + } + opts := browser.CdpWaitIdleOpts{ + Timeout: time.Duration(timeout) * time.Millisecond, + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpWaitIdle(c, opts) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("network idle"), nil +} diff --git a/tools_read.go b/tools_read.go new file mode 100644 index 0000000..424edc9 --- /dev/null +++ b/tools_read.go @@ -0,0 +1,110 @@ +package main + +import ( + "context" + + "github.com/mark3labs/mcp-go/mcp" + "github.com/mark3labs/mcp-go/server" + + "fn-registry/functions/browser" +) + +const htmlMax = 200_000 + +// registerReadTools wires page_get_html, page_eval_js (MUTA), page_screenshot. +func registerReadTools(s *server.MCPServer, d *deps) { + s.AddTool(pageGetHTMLTool(), mcp.NewTypedToolHandler(d.handlePageGetHTML)) + s.AddTool(pageScreenshotTool(), mcp.NewTypedToolHandler(d.handlePageScreenshot)) + + if !d.readOnly { + s.AddTool(pageEvalJSTool(), mcp.NewTypedToolHandler(d.handlePageEvalJS)) + } +} + +// ---- page_get_html ---- + +type pageGetHTMLArgs struct { + Port int `json:"port"` +} + +func pageGetHTMLTool() mcp.Tool { + return mcp.NewTool("page_get_html", + mcp.WithDescription("Return the current page's full serialized HTML (outerHTML). Truncated to 200000 chars."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + ) +} + +func (d *deps) handlePageGetHTML(_ context.Context, _ mcp.CallToolRequest, a pageGetHTMLArgs) (*mcp.CallToolResult, error) { + var html string + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + var e error + html, e = browser.CdpGetHTML(c) + return e + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText(truncate(html, htmlMax)), nil +} + +// ---- page_eval_js (MUTA) ---- + +type pageEvalJSArgs struct { + Port int `json:"port"` + Expression string `json:"expression"` +} + +func pageEvalJSTool() mcp.Tool { + return mcp.NewTool("page_eval_js", + mcp.WithDescription("Evaluate a JavaScript expression in the page context via Runtime.evaluate. Returns the stringified result."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("expression", mcp.Required(), mcp.Description("JavaScript expression to evaluate.")), + ) +} + +func (d *deps) handlePageEvalJS(_ context.Context, _ mcp.CallToolRequest, a pageEvalJSArgs) (*mcp.CallToolResult, error) { + if a.Expression == "" { + return mcp.NewToolResultError("expression is required"), nil + } + var res string + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + var e error + res, e = browser.CdpEvaluate(c, a.Expression) + return e + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText(truncate(res, htmlMax)), nil +} + +// ---- page_screenshot ---- + +type pageScreenshotArgs struct { + Port int `json:"port"` + Path string `json:"path"` + FullPage bool `json:"full_page"` +} + +func pageScreenshotTool() mcp.Tool { + return mcp.NewTool("page_screenshot", + mcp.WithDescription("Capture a screenshot of the current page and write it to a local path (.png/.jpg)."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("path", mcp.Required(), mcp.Description("Output file path (.png or .jpg).")), + mcp.WithBoolean("full_page", mcp.Description("Capture the full scroll height instead of just the viewport.")), + ) +} + +func (d *deps) handlePageScreenshot(_ context.Context, _ mcp.CallToolRequest, a pageScreenshotArgs) (*mcp.CallToolResult, error) { + if a.Path == "" { + return mcp.NewToolResultError("path is required"), nil + } + opts := browser.CdpScreenshotOpts{FullPage: a.FullPage} + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpScreenshot(c, a.Path, opts) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("screenshot saved to " + a.Path), nil +} diff --git a/tools_session.go b/tools_session.go new file mode 100644 index 0000000..e8514ef --- /dev/null +++ b/tools_session.go @@ -0,0 +1,95 @@ +package main + +import ( + "context" + "fmt" + + "github.com/mark3labs/mcp-go/mcp" + "github.com/mark3labs/mcp-go/server" + + "fn-registry/functions/browser" +) + +// registerSessionTools wires browser_launch (MUTA), browser_connect, browser_disconnect. +func registerSessionTools(s *server.MCPServer, d *deps) { + if !d.readOnly { + s.AddTool(launchTool(), mcp.NewTypedToolHandler(d.handleLaunch)) + } + s.AddTool(connectTool(), mcp.NewTypedToolHandler(d.handleConnect)) + s.AddTool(disconnectTool(), mcp.NewTypedToolHandler(d.handleDisconnect)) +} + +// ---- browser_launch (MUTA) ---- + +type launchArgs struct { + Port int `json:"port"` + Headless bool `json:"headless"` + UserDataDir string `json:"user_data_dir"` + URL string `json:"url"` +} + +func launchTool() mcp.Tool { + return mcp.NewTool("browser_launch", + mcp.WithDescription("Launch a Chrome/Chromium instance with CDP remote debugging enabled. Returns the launched PID. Waits up to 15s for the CDP port to be ready."), + mcp.WithNumber("port", mcp.Description("CDP remote debugging port. Default 9222.")), + mcp.WithBoolean("headless", mcp.Description("Run headless (--headless=new). Default false.")), + mcp.WithString("user_data_dir", mcp.Description("Chrome profile directory. Empty = /tmp/chrome-cdp-profile.")), + mcp.WithString("url", mcp.Description("Optional initial URL to open on launch.")), + ) +} + +func (d *deps) handleLaunch(_ context.Context, _ mcp.CallToolRequest, a launchArgs) (*mcp.CallToolResult, error) { + opts := browser.ChromeLaunchOpts{ + Port: portOr(a.Port), + Headless: a.Headless, + UserDataDir: a.UserDataDir, + } + if a.URL != "" { + opts.ExtraArgs = append(opts.ExtraArgs, a.URL) + } + pid, err := browser.ChromeLaunch(opts) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText(fmt.Sprintf("launched pid=%d port=%d", pid, opts.Port)), nil +} + +// ---- browser_connect ---- + +type connectArgs struct { + Port int `json:"port"` +} + +func connectTool() mcp.Tool { + return mcp.NewTool("browser_connect", + mcp.WithDescription("Open (and pool) a CDP WebSocket connection to a running Chrome's first 'page' tab on the given port. Subsequent tools reuse this live session."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + ) +} + +func (d *deps) handleConnect(_ context.Context, _ mcp.CallToolRequest, a connectArgs) (*mcp.CallToolResult, error) { + port := portOr(a.Port) + if _, err := d.pool.get(port); err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText(fmt.Sprintf("connected port=%d", port)), nil +} + +// ---- browser_disconnect ---- + +type disconnectArgs struct { + Port int `json:"port"` +} + +func disconnectTool() mcp.Tool { + return mcp.NewTool("browser_disconnect", + mcp.WithDescription("Close and drop the pooled CDP connection for the given port (cancels any armed dialog handler). Does NOT kill Chrome."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + ) +} + +func (d *deps) handleDisconnect(_ context.Context, _ mcp.CallToolRequest, a disconnectArgs) (*mcp.CallToolResult, error) { + port := portOr(a.Port) + d.pool.drop(port) + return mcp.NewToolResultText(fmt.Sprintf("disconnected port=%d", port)), nil +} diff --git a/tools_storage.go b/tools_storage.go new file mode 100644 index 0000000..fca03ce --- /dev/null +++ b/tools_storage.go @@ -0,0 +1,75 @@ +package main + +import ( + "context" + + "github.com/mark3labs/mcp-go/mcp" + "github.com/mark3labs/mcp-go/server" + + "fn-registry/functions/browser" +) + +// registerStorageTools wires storage_save (read) + storage_load (MUTA). +func registerStorageTools(s *server.MCPServer, d *deps) { + s.AddTool(storageSaveTool(), mcp.NewTypedToolHandler(d.handleStorageSave)) + + if !d.readOnly { + s.AddTool(storageLoadTool(), mcp.NewTypedToolHandler(d.handleStorageLoad)) + } +} + +// ---- storage_save ---- + +type storageSaveArgs struct { + Port int `json:"port"` + Path string `json:"path"` +} + +func storageSaveTool() mcp.Tool { + return mcp.NewTool("storage_save", + mcp.WithDescription("Save the current session storage state (cookies + localStorage) to a JSON file for later reuse."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("path", mcp.Required(), mcp.Description("Output JSON file path.")), + ) +} + +func (d *deps) handleStorageSave(_ context.Context, _ mcp.CallToolRequest, a storageSaveArgs) (*mcp.CallToolResult, error) { + if a.Path == "" { + return mcp.NewToolResultError("path is required"), nil + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpSaveStorageState(c, a.Path) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("storage state saved to " + a.Path), nil +} + +// ---- storage_load (MUTA) ---- + +type storageLoadArgs struct { + Port int `json:"port"` + Path string `json:"path"` +} + +func storageLoadTool() mcp.Tool { + return mcp.NewTool("storage_load", + mcp.WithDescription("Load a previously saved session storage state (cookies + localStorage) from a JSON file into the live browser."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("path", mcp.Required(), mcp.Description("Input JSON file path.")), + ) +} + +func (d *deps) handleStorageLoad(_ context.Context, _ mcp.CallToolRequest, a storageLoadArgs) (*mcp.CallToolResult, error) { + if a.Path == "" { + return mcp.NewToolResultError("path is required"), nil + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpLoadStorageState(c, a.Path) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("storage state loaded from " + a.Path), nil +}