From 6ecaf9a96960a92b09e51527e6a42004b55c8a85 Mon Sep 17 00:00:00 2001 From: agent Date: Sat, 6 Jun 2026 10:57:13 +0200 Subject: [PATCH] =?UTF-8?q?feat:=20browser=5Fmcp=20=E2=80=94=20servidor=20?= =?UTF-8?q?MCP=20de=20control=20de=20navegador=20CDP=20(33=20tools=20+=20p?= =?UTF-8?q?ool=20de=20conexiones)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 2 + README.md | 75 ++++++++++++++ app.md | 174 +++++++++++++++++++++++++++++++ go.mod | 19 ++++ go.sum | 34 +++++++ main.go | 174 +++++++++++++++++++++++++++++++ pool.go | 79 ++++++++++++++ tools_cookies.go | 145 ++++++++++++++++++++++++++ tools_dom.go | 201 ++++++++++++++++++++++++++++++++++++ tools_frames.go | 115 +++++++++++++++++++++ tools_input.go | 110 ++++++++++++++++++++ tools_nav.go | 260 +++++++++++++++++++++++++++++++++++++++++++++++ tools_read.go | 110 ++++++++++++++++++++ tools_session.go | 95 +++++++++++++++++ tools_storage.go | 75 ++++++++++++++ 15 files changed, 1668 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 app.md create mode 100644 go.mod create mode 100644 go.sum create mode 100644 main.go create mode 100644 pool.go create mode 100644 tools_cookies.go create mode 100644 tools_dom.go create mode 100644 tools_frames.go create mode 100644 tools_input.go create mode 100644 tools_nav.go create mode 100644 tools_read.go create mode 100644 tools_session.go create mode 100644 tools_storage.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6eab868 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/browser_mcp +*.log diff --git a/README.md b/README.md new file mode 100644 index 0000000..48156da --- /dev/null +++ b/README.md @@ -0,0 +1,75 @@ +# browser_mcp + +MCP server (Go) that exposes the registry's CDP browser-control functions +(`fn-registry/functions/browser`) as MCP tools. Drive a live Chrome/Chromium over the +Chrome DevTools Protocol: navigate, read the DOM, click, manage cookies, evaluate +JavaScript, operate iframes, and persist/restore session state. + +33 tools total, grouped by domain. See `app.md` for the full per-tool reference and the +"Omitido en v1" section. + +## Build + +```bash +cd projects/web_scraping/apps/browser_mcp +go mod tidy # first time only +go build -o browser_mcp . +``` + +`browser_mcp` only imports `fn-registry/functions/browser` (no sqlite/cgo), so a plain +`go build` works. If transitive deps ever require it, fall back to +`CGO_ENABLED=1 go build -tags fts5 -o browser_mcp .`. + +## Architecture: live CDP connection pool + +Unlike `registry_mcp` (one DB handle), `browser_mcp` keeps a **pool of live CDP +connections** keyed by port. A CDP connection is a live WebSocket session to a "page" +tab; reusing it avoids paying the ~50-200ms handshake on every tool and preserves state +between tools (e.g. the persistent dialog auto-handler armed by `handle_dialog`). The +pool retries once on a dead-connection error (Chrome may have closed the tab between +tools). See `pool.go` and `deps.withConn` in `main.go`. + +## Register in Claude Code + +Add to a `.mcp.json` (the project's `projects/web_scraping/.mcp.json` already has it): + +```json +{ + "mcpServers": { + "browser": { + "command": "/home/enmanuel/fn_registry/projects/web_scraping/apps/browser_mcp/browser_mcp", + "args": [] + } + } +} +``` + +For an inspection-only session that cannot mutate browser state, pass `"args": ["--read-only"]`. + +## Transports + +- **stdio** (default) — for MCP clients. +- **HTTP** — `./browser_mcp --http :7740` (Streamable HTTP). `--bind 0.0.0.0` requires + `REGISTRY_API_TOKEN` (bearer auth). + +## Example session + +Assuming a Chrome already running with `--remote-debugging-port=9222` (or call +`browser_launch` first), a typical agent flow: + +``` +browser_launch { "port": 9222, "url": "https://example.com" } # -> "launched pid=... port=9222" +browser_connect { "port": 9222 } # -> "connected port=9222" +tab_navigate { "port": 9222, "url": "https://example.com" } +page_wait_load { "port": 9222, "timeout_ms": 10000 } +page_get_html { "port": 9222 } # -> serialized HTML (truncated 200k) +dom_find_by_text { "port": 9222, "text": "More information" } # -> "a" / "#id" selector +dom_click { "port": 9222, "selector": "a" } +page_eval_js { "port": 9222, "expression": "document.title" } # -> page title +page_screenshot { "port": 9222, "path": "/tmp/example.png", "full_page": true } +browser_disconnect{ "port": 9222 } +``` + +Cookies, iframes (`frame_list` -> `frame_eval`/`frame_get_html`), keyboard/scroll +(`press_key`, `scroll`), JS dialogs (`handle_dialog`), and session persistence +(`storage_save` / `storage_load`) follow the same per-port pattern. diff --git a/app.md b/app.md new file mode 100644 index 0000000..6a0082b --- /dev/null +++ b/app.md @@ -0,0 +1,174 @@ +--- +name: browser_mcp +lang: go +domain: infra +version: 0.1.0 +description: "Servidor MCP que expone control total del navegador via CDP (33 tools: navegación, DOM, cookies, iframes, teclado/scroll, diálogos, estado de sesión) reusando funciones del dominio browser del registry con un pool de conexiones CDP vivas." +tags: [mcp, browser, cdp, automation, scraping] +uses_functions: + - chrome_launch_go_browser + - cdp_connect_go_browser + - cdp_close_go_browser + - cdp_navigate_go_browser + - cdp_list_tabs_go_browser + - cdp_new_tab_go_browser + - cdp_close_tab_go_browser + - cdp_activate_tab_go_browser + - cdp_nav_back_go_browser + - cdp_nav_forward_go_browser + - cdp_wait_load_go_browser + - cdp_wait_idle_go_browser + - cdp_get_html_go_browser + - cdp_evaluate_go_browser + - cdp_screenshot_go_browser + - cdp_click_go_browser + - cdp_click_human_go_browser + - cdp_click_text_go_browser + - cdp_type_text_go_browser + - cdp_find_by_text_go_browser + - cdp_wait_element_go_browser + - cdp_press_key_go_browser + - cdp_scroll_go_browser + - cdp_handle_dialog_go_browser + - cdp_set_cookie_go_browser + - cdp_get_cookies_go_browser + - cdp_delete_cookies_go_browser + - cdp_clear_cookies_go_browser + - cdp_list_frames_go_browser + - cdp_eval_in_frame_go_browser + - cdp_get_frame_html_go_browser + - cdp_save_storage_state_go_browser + - cdp_load_storage_state_go_browser +uses_types: [] +framework: "" +entry_point: "main.go" +dir_path: "projects/web_scraping/apps/browser_mcp" +repo_url: "" +--- + +# browser_mcp + +Servidor MCP (Model Context Protocol) en Go que expone el control de navegador via CDP +del registry `fn_registry` como tools MCP. Cualquier cliente MCP (Claude Code, otros +agentes) puede manejar un Chrome/Chromium vivo: navegar, leer el DOM, hacer clicks, +gestionar cookies, evaluar JavaScript, operar iframes y persistir/restaurar sesiones. + +Clona el patrón de `apps/registry_mcp/` (librería `github.com/mark3labs/mcp-go` v0.52.0, +`server.NewMCPServer` + `server.ServeStdio`, tools con `mcp.NewTool` + handlers tipados +via `mcp.NewTypedToolHandler`, transporte stdio por defecto + HTTP opcional con `--http`, +slog a stderr porque stdout pertenece al JSON-RPC). + +## Arquitectura: pool de conexiones CDP + +A diferencia de `registry_mcp` (que abre la DB una vez), `browser_mcp` mantiene un +**pool de conexiones CDP vivas** indexado por puerto (`pool.go`). Razón: +`browser.CdpConnect(port)` hace un handshake WebSocket contra una tab "page" de Chrome +(~50-200ms) y esa conexión ES una sesión viva (soporta `Page.*`, `Runtime.*`, `Input.*`). +El agente llama muchas tools seguidas (navigate → wait → click → eval); reconectar en +cada tool pagaría el handshake repetidamente y perdería estado entre tools (los event +handlers persistentes, como el de `handle_dialog`, viven mientras la conexión esté viva). +Por eso reusamos la conexión por puerto. + +- `connPool.get(port)` devuelve la conexión cacheada o abre una nueva. +- `connPool.drop(port)` cancela el handler de diálogo (si lo hay) y cierra la conexión. +- `connPool.setCancel(port, cancel)` registra el cancel del auto-handler de `handle_dialog`. +- `connPool.closeAll()` se ejecuta con `defer` en `main()`. +- `deps.withConn(port, fn)` ejecuta `fn` con la conexión del pool y, si el error indica + conexión muerta (`isConnErr`: connection close, broken pipe, use of closed, ws read, EOF), + descarta la conexión y reintenta UNA vez (Chrome pudo cerrar la tab entre tools). + +Toda tool con argumento `port` usa `portOr(a.Port)` (default 9222). Las tools de tabs +(`tab_list`, `tab_new`, `tab_close`, `tab_activate`) usan el endpoint HTTP `/json` de CDP +directamente (host `localhost`), no el pool, porque no requieren una sesión WebSocket viva. + +## Tools (33) + +### Sesión (`tools_session.go`) +- `browser_launch` (MUTA) — lanza Chrome con CDP. args: port, headless, user_data_dir, url. +- `browser_connect` — abre/poolea la conexión CDP del puerto. args: port. +- `browser_disconnect` — cierra y descarta la conexión del puerto (no mata Chrome). args: port. + +### Navegación + tabs (`tools_nav.go`) +- `tab_navigate` (MUTA) — `Page.navigate`. args: port, url. +- `tab_list` — lista targets via `GET /json`. args: port. +- `tab_new` (MUTA) — abre tab via `PUT /json/new`. args: port, url. +- `tab_close` (MUTA) — cierra tab por ID. args: port, tab_id. +- `tab_activate` — pone tab en foreground. args: port, tab_id. +- `nav_back` (MUTA) — atrás en el historial. args: port. +- `nav_forward` (MUTA) — adelante en el historial. args: port. +- `page_wait_load` — espera el evento load. args: port, timeout_ms (default 10000). +- `page_wait_idle` — espera red idle. args: port, timeout_ms (default 15000). + +### Lectura (`tools_read.go`) +- `page_get_html` — HTML serializado (truncado a 200000 chars). args: port. +- `page_eval_js` (MUTA) — `Runtime.evaluate`. args: port, expression. +- `page_screenshot` — captura a archivo. args: port, path, full_page. + +### DOM (`tools_dom.go`) +- `dom_click` (MUTA) — click por selector. args: port, selector. +- `dom_click_human` (MUTA) — click con movimiento humano. args: port, selector. +- `dom_click_text` (MUTA) — click sobre el primer elemento con ese texto. args: port, text. +- `dom_type` (MUTA) — escribe texto en el elemento enfocado. args: port, text. +- `dom_find_by_text` — devuelve un selector CSS único para un texto visible. args: port, text. +- `dom_wait_element` — espera a que aparezca un selector. args: port, selector, timeout_ms (default 10000). + +### Input (`tools_input.go`) — todas MUTA +- `press_key` — presiona una tecla nombrada (Enter/Tab/Escape/ArrowDown/...). args: port, key. +- `scroll` — scroll por (delta_x, delta_y). args: port, delta_x (default 0), delta_y (default 300). +- `handle_dialog` — arma un auto-handler de diálogos JS (vive en la conexión del pool). args: port, accept (default true), prompt_text. + +### Cookies (`tools_cookies.go`) +- `cookie_get` — todas las cookies como JSON. args: port. +- `cookie_set` (MUTA) — set cookie. args: port, name, value, domain, path, http_only. +- `cookie_delete` (MUTA) — borra cookies por nombre. args: port, name, domain. +- `cookie_clear` (MUTA) — borra todas las cookies. args: port. + +### Iframes (`tools_frames.go`) +- `frame_list` — lista frames con sus IDs. args: port. +- `frame_eval` (MUTA) — evalúa JS dentro de un frame. args: port, frame_id, expression. +- `frame_get_html` — HTML de un frame (truncado a 200000). args: port, frame_id. + +### Estado de sesión (`tools_storage.go`) +- `storage_save` — guarda cookies + localStorage a JSON. args: port, path. +- `storage_load` (MUTA) — carga cookies + localStorage desde JSON. args: port, path. + +## Cómo lanzarlo + +Transporte stdio (default, para clientes MCP): + +```bash +cd projects/web_scraping/apps/browser_mcp +go build -o browser_mcp . +./browser_mcp +``` + +Transporte HTTP (Streamable HTTP): + +```bash +./browser_mcp --http :7740 # bind 127.0.0.1:7740 +./browser_mcp --http :7740 --bind 0.0.0.0 # requiere REGISTRY_API_TOKEN (bearer auth) +``` + +### Flag `--read-only` + +Con `--read-only`, el servidor NO registra las tools mutantes (marcadas MUTA arriba): +solo expone las 14 tools de lectura (`browser_connect`, `browser_disconnect`, `tab_list`, +`tab_activate`, `page_wait_load`, `page_wait_idle`, `page_get_html`, `page_screenshot`, +`dom_find_by_text`, `dom_wait_element`, `cookie_get`, `frame_list`, `frame_get_html`, +`storage_save`). Útil para sesiones de inspección sin riesgo de modificar el estado del +navegador. + +## Omitido en v1 + +Funciones del dominio `browser` que NO se exponen como tools en esta versión, con su razón: + +- **`cdp_har_record_go_browser`** — graba el tráfico de red (HAR). Requiere un callback de + larga duración (registrar handlers + un punto de "stop" que devuelve los datos + acumulados); no encaja en el modelo request/response de una tool MCP simple. Pendiente + de un diseño con tool de start + tool de stop. +- **`cdp_get_ax_tree`** — el árbol de accesibilidad se obtiene hoy via un pipeline Python; + futuro a exponer via `fn run` en vez de duplicar la lógica aquí. +- **Funciones de perfiles Chrome (Bash: create/delete/appearance/reset)** — requieren que + Chrome esté CERRADO para modificar el `Local State` / `Preferences` del perfil; son + incompatibles con un MCP cuyo propósito es controlar un Chrome vivo. Quedan disponibles + como `fn run` aparte. diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..163ca18 --- /dev/null +++ b/go.mod @@ -0,0 +1,19 @@ +module browser_mcp + +go 1.25.5 + +replace fn-registry => ../../../.. + +require ( + fn-registry v0.0.0-00010101000000-000000000000 + github.com/mark3labs/mcp-go v0.52.0 +) + +require ( + github.com/google/jsonschema-go v0.4.2 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 // indirect + github.com/spf13/cast v1.7.1 // indirect + github.com/yosida95/uritemplate/v3 v3.0.2 // indirect + golang.org/x/text v0.37.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..0ac432d --- /dev/null +++ b/go.sum @@ -0,0 +1,34 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dlclark/regexp2 v1.11.0 h1:G/nrcoOa7ZXlpoa/91N3X7mM3r8eIlMBBJZvsz/mxKI= +github.com/dlclark/regexp2 v1.11.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= +github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= +github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/jsonschema-go v0.4.2 h1:tmrUohrwoLZZS/P3x7ex0WAVknEkBZM46iALbcqoRA8= +github.com/google/jsonschema-go v0.4.2/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/mark3labs/mcp-go v0.52.0 h1:uRSzupNSUyPGDpF4owY5X4zEpACPwBnlM3FAFuXN6gQ= +github.com/mark3labs/mcp-go v0.52.0/go.mod h1:Zg9cB2HdwdMMVgY0xtTzq3KvYIOJQDsaut+jWjwDaQY= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= +github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= +github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 h1:KRzFb2m7YtdldCEkzs6KqmJw4nqEVZGK7IN2kJkjTuQ= +github.com/santhosh-tekuri/jsonschema/v6 v6.0.2/go.mod h1:JXeL+ps8p7/KNMjDQk3TCwPpBy0wYklyWTfbkIzdIFU= +github.com/spf13/cast v1.7.1 h1:cuNEagBQEHWN1FnbGEjCXL2szYEXqfJPbP2HNUaca9Y= +github.com/spf13/cast v1.7.1/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4= +github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4= +golang.org/x/text v0.37.0 h1:Cqjiwd9eSg8e0QAkyCaQTNHFIIzWtidPahFWR83rTrc= +golang.org/x/text v0.37.0/go.mod h1:a5sjxXGs9hsn/AJVwuElvCAo9v8QYLzvavO5z2PiM38= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/main.go b/main.go new file mode 100644 index 0000000..c7aff80 --- /dev/null +++ b/main.go @@ -0,0 +1,174 @@ +package main + +import ( + "flag" + "fmt" + "log/slog" + "net/http" + "os" + "strings" + + "github.com/mark3labs/mcp-go/server" + + "fn-registry/functions/browser" +) + +const version = "0.1.0" + +type config struct { + httpAddr string + bind string + readOnly bool + logLevel string +} + +// deps carries shared state into tool handlers. +type deps struct { + pool *connPool + readOnly bool +} + +func main() { + var cfg config + flag.StringVar(&cfg.httpAddr, "http", "", "Listen on HTTP address (e.g. :7740). Empty = stdio.") + flag.StringVar(&cfg.bind, "bind", "127.0.0.1", "HTTP bind address. Use 0.0.0.0 only with REGISTRY_API_TOKEN set.") + flag.BoolVar(&cfg.readOnly, "read-only", false, "Register only read tools (no mutating browser actions).") + flag.StringVar(&cfg.logLevel, "log-level", "info", "Log level: debug, info, warn, error.") + flag.Parse() + + // Slog → stderr (stdio JSON-RPC owns stdout). + lvl := parseLevel(cfg.logLevel) + slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: lvl}))) + + pool := newConnPool() + defer pool.closeAll() + + d := &deps{pool: pool, readOnly: cfg.readOnly} + + srv := server.NewMCPServer( + "browser_mcp", + version, + server.WithToolCapabilities(true), + ) + + registerTools(srv, d) + + slog.Info("starting browser_mcp", + "version", version, + "transport", transportLabel(cfg), + "read_only", cfg.readOnly, + ) + + if cfg.httpAddr == "" { + if err := server.ServeStdio(srv); err != nil { + slog.Error("stdio server", "err", err) + os.Exit(1) + } + return + } + + if err := serveHTTP(srv, cfg); err != nil { + slog.Error("http server", "err", err) + os.Exit(1) + } +} + +// registerTools wires every tool group. Mutating tools are skipped under --read-only. +func registerTools(s *server.MCPServer, d *deps) { + registerSessionTools(s, d) + registerNavTools(s, d) + registerReadTools(s, d) + registerDomTools(s, d) + registerInputTools(s, d) + registerCookieTools(s, d) + registerFrameTools(s, d) + registerStorageTools(s, d) +} + +// portOr returns the CDP port, defaulting to 9222 when zero. +func portOr(p int) int { + if p == 0 { + return 9222 + } + return p +} + +// withConn obtiene la conexión del puerto y ejecuta fn. Si falla con error de +// conexión muerta, descarta y reintenta UNA vez (Chrome pudo cerrar la tab). +func (d *deps) withConn(port int, fn func(c *browser.CDPConn) error) error { + c, err := d.pool.get(port) + if err != nil { + return err + } + err = fn(c) + if err != nil && isConnErr(err) { + d.pool.drop(port) + c2, err2 := d.pool.get(port) + if err2 != nil { + return err2 + } + return fn(c2) + } + return err +} + +// serveHTTP hosts the MCP server over Streamable HTTP with optional bearer auth. +func serveHTTP(s *server.MCPServer, cfg config) error { + addr := cfg.bind + cfg.httpAddr + + httpSrv := server.NewStreamableHTTPServer(s) + + token := os.Getenv("REGISTRY_API_TOKEN") + if cfg.bind == "0.0.0.0" && token == "" { + return fmt.Errorf("--bind 0.0.0.0 requires REGISTRY_API_TOKEN") + } + + mux := http.NewServeMux() + if token != "" { + mux.Handle("/", authMiddleware(token, httpSrv)) + } else { + mux.Handle("/", httpSrv) + } + + slog.Info("listening http", "addr", addr) + return http.ListenAndServe(addr, mux) +} + +func authMiddleware(token string, next http.Handler) http.Handler { + expected := "Bearer " + token + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Header.Get("Authorization") != expected { + http.Error(w, "unauthorized", http.StatusUnauthorized) + return + } + next.ServeHTTP(w, r) + }) +} + +func transportLabel(cfg config) string { + if cfg.httpAddr == "" { + return "stdio" + } + return fmt.Sprintf("http %s%s", cfg.bind, cfg.httpAddr) +} + +func parseLevel(s string) slog.Level { + switch strings.ToLower(s) { + case "debug": + return slog.LevelDebug + case "warn": + return slog.LevelWarn + case "error": + return slog.LevelError + default: + return slog.LevelInfo + } +} + +// truncate caps a string at n chars, appending a marker when cut. +func truncate(s string, n int) string { + if len(s) <= n { + return s + } + return s[:n] + "\n... [truncated]" +} diff --git a/pool.go b/pool.go new file mode 100644 index 0000000..eb22eae --- /dev/null +++ b/pool.go @@ -0,0 +1,79 @@ +package main + +import ( + "strings" + "sync" + + "fn-registry/functions/browser" +) + +// connPool reusa conexiones CDP entre invocaciones de tools. Clave = puerto CDP. +// Una conexión = una sesión viva a una tab "page". Mantenerla evita pagar el +// handshake WebSocket en cada tool y preserva estado (event handlers, contexto). +type connPool struct { + mu sync.Mutex + conns map[int]*browser.CDPConn + cancels map[int]func() // cancels de handlers persistentes (handle_dialog) +} + +func newConnPool() *connPool { + return &connPool{conns: map[int]*browser.CDPConn{}, cancels: map[int]func(){}} +} + +func (p *connPool) get(port int) (*browser.CDPConn, error) { + p.mu.Lock() + defer p.mu.Unlock() + if c, ok := p.conns[port]; ok && c != nil { + return c, nil + } + c, err := browser.CdpConnect(port) + if err != nil { + return nil, err + } + p.conns[port] = c + return c, nil +} + +func (p *connPool) drop(port int) { + p.mu.Lock() + defer p.mu.Unlock() + if cancel, ok := p.cancels[port]; ok && cancel != nil { + cancel() + delete(p.cancels, port) + } + if c, ok := p.conns[port]; ok && c != nil { + _ = browser.CdpClose(c, 0) + delete(p.conns, port) + } +} + +func (p *connPool) setCancel(port int, cancel func()) { + p.mu.Lock() + defer p.mu.Unlock() + if old := p.cancels[port]; old != nil { + old() + } + p.cancels[port] = cancel +} + +func (p *connPool) closeAll() { + p.mu.Lock() + defer p.mu.Unlock() + for port, c := range p.conns { + if cancel := p.cancels[port]; cancel != nil { + cancel() + } + if c != nil { + _ = browser.CdpClose(c, 0) + } + } + p.conns = map[int]*browser.CDPConn{} + p.cancels = map[int]func(){} +} + +// isConnErr reconoce errores de conexión CDP muerta para reintentar UNA vez. +func isConnErr(err error) bool { + s := err.Error() + return strings.Contains(s, "connection close") || strings.Contains(s, "broken pipe") || + strings.Contains(s, "use of closed") || strings.Contains(s, "ws read") || strings.Contains(s, "EOF") +} diff --git a/tools_cookies.go b/tools_cookies.go new file mode 100644 index 0000000..22f4566 --- /dev/null +++ b/tools_cookies.go @@ -0,0 +1,145 @@ +package main + +import ( + "context" + "encoding/json" + + "github.com/mark3labs/mcp-go/mcp" + "github.com/mark3labs/mcp-go/server" + + "fn-registry/functions/browser" +) + +// registerCookieTools wires cookie_get (read) + set/delete/clear (MUTA). +func registerCookieTools(s *server.MCPServer, d *deps) { + s.AddTool(cookieGetTool(), mcp.NewTypedToolHandler(d.handleCookieGet)) + + if !d.readOnly { + s.AddTool(cookieSetTool(), mcp.NewTypedToolHandler(d.handleCookieSet)) + s.AddTool(cookieDeleteTool(), mcp.NewTypedToolHandler(d.handleCookieDelete)) + s.AddTool(cookieClearTool(), mcp.NewTypedToolHandler(d.handleCookieClear)) + } +} + +// ---- cookie_get ---- + +type cookieGetArgs struct { + Port int `json:"port"` +} + +func cookieGetTool() mcp.Tool { + return mcp.NewTool("cookie_get", + mcp.WithDescription("Return all browser cookies (Network.getAllCookies) as JSON."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + ) +} + +func (d *deps) handleCookieGet(_ context.Context, _ mcp.CallToolRequest, a cookieGetArgs) (*mcp.CallToolResult, error) { + var cookies []browser.CdpCookie + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + var e error + cookies, e = browser.CdpGetCookies(c) + return e + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + b, _ := json.MarshalIndent(cookies, "", " ") + return mcp.NewToolResultText(string(b)), nil +} + +// ---- cookie_set (MUTA) ---- + +type cookieSetArgs struct { + Port int `json:"port"` + Name string `json:"name"` + Value string `json:"value"` + Domain string `json:"domain"` + Path string `json:"path"` + HTTPOnly bool `json:"http_only"` +} + +func cookieSetTool() mcp.Tool { + return mcp.NewTool("cookie_set", + mcp.WithDescription("Set a cookie via Network.setCookie."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("name", mcp.Required(), mcp.Description("Cookie name.")), + mcp.WithString("value", mcp.Description("Cookie value.")), + mcp.WithString("domain", mcp.Required(), mcp.Description("Cookie domain.")), + mcp.WithString("path", mcp.Description("Cookie path. Default /.")), + mcp.WithBoolean("http_only", mcp.Description("Mark the cookie HttpOnly.")), + ) +} + +func (d *deps) handleCookieSet(_ context.Context, _ mcp.CallToolRequest, a cookieSetArgs) (*mcp.CallToolResult, error) { + if a.Name == "" { + return mcp.NewToolResultError("name is required"), nil + } + if a.Domain == "" { + return mcp.NewToolResultError("domain is required"), nil + } + path := a.Path + if path == "" { + path = "/" + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpSetCookie(c, a.Name, a.Value, a.Domain, path, a.HTTPOnly) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("cookie set: " + a.Name), nil +} + +// ---- cookie_delete (MUTA) ---- + +type cookieDeleteArgs struct { + Port int `json:"port"` + Name string `json:"name"` + Domain string `json:"domain"` +} + +func cookieDeleteTool() mcp.Tool { + return mcp.NewTool("cookie_delete", + mcp.WithDescription("Delete cookies by name (optionally scoped to a domain) via Network.deleteCookies."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("name", mcp.Required(), mcp.Description("Cookie name to delete.")), + mcp.WithString("domain", mcp.Description("Optional domain scope.")), + ) +} + +func (d *deps) handleCookieDelete(_ context.Context, _ mcp.CallToolRequest, a cookieDeleteArgs) (*mcp.CallToolResult, error) { + if a.Name == "" { + return mcp.NewToolResultError("name is required"), nil + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpDeleteCookies(c, a.Name, a.Domain) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("cookie deleted: " + a.Name), nil +} + +// ---- cookie_clear (MUTA) ---- + +type cookieClearArgs struct { + Port int `json:"port"` +} + +func cookieClearTool() mcp.Tool { + return mcp.NewTool("cookie_clear", + mcp.WithDescription("Clear all browser cookies via Network.clearBrowserCookies."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + ) +} + +func (d *deps) handleCookieClear(_ context.Context, _ mcp.CallToolRequest, a cookieClearArgs) (*mcp.CallToolResult, error) { + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpClearCookies(c) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("cookies cleared"), nil +} diff --git a/tools_dom.go b/tools_dom.go new file mode 100644 index 0000000..47236db --- /dev/null +++ b/tools_dom.go @@ -0,0 +1,201 @@ +package main + +import ( + "context" + "time" + + "github.com/mark3labs/mcp-go/mcp" + "github.com/mark3labs/mcp-go/server" + + "fn-registry/functions/browser" +) + +// registerDomTools wires DOM interaction tools. find/wait stay on under --read-only. +func registerDomTools(s *server.MCPServer, d *deps) { + s.AddTool(domFindByTextTool(), mcp.NewTypedToolHandler(d.handleDomFindByText)) + s.AddTool(domWaitElementTool(), mcp.NewTypedToolHandler(d.handleDomWaitElement)) + + if !d.readOnly { + s.AddTool(domClickTool(), mcp.NewTypedToolHandler(d.handleDomClick)) + s.AddTool(domClickHumanTool(), mcp.NewTypedToolHandler(d.handleDomClickHuman)) + s.AddTool(domClickTextTool(), mcp.NewTypedToolHandler(d.handleDomClickText)) + s.AddTool(domTypeTool(), mcp.NewTypedToolHandler(d.handleDomType)) + } +} + +// ---- dom_click (MUTA) ---- + +type domClickArgs struct { + Port int `json:"port"` + Selector string `json:"selector"` +} + +func domClickTool() mcp.Tool { + return mcp.NewTool("dom_click", + mcp.WithDescription("Click the element matching the CSS selector (synthetic CDP click)."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("selector", mcp.Required(), mcp.Description("CSS selector of the element to click.")), + ) +} + +func (d *deps) handleDomClick(_ context.Context, _ mcp.CallToolRequest, a domClickArgs) (*mcp.CallToolResult, error) { + if a.Selector == "" { + return mcp.NewToolResultError("selector is required"), nil + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpClick(c, a.Selector) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("clicked " + a.Selector), nil +} + +// ---- dom_click_human (MUTA) ---- + +type domClickHumanArgs struct { + Port int `json:"port"` + Selector string `json:"selector"` +} + +func domClickHumanTool() mcp.Tool { + return mcp.NewTool("dom_click_human", + mcp.WithDescription("Click the element matching the CSS selector with human-like mouse movement (Bézier path + jitter + press/release pause)."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("selector", mcp.Required(), mcp.Description("CSS selector of the element to click.")), + ) +} + +func (d *deps) handleDomClickHuman(_ context.Context, _ mcp.CallToolRequest, a domClickHumanArgs) (*mcp.CallToolResult, error) { + if a.Selector == "" { + return mcp.NewToolResultError("selector is required"), nil + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpClickHuman(c, a.Selector, browser.MouseHumanOpts{}) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("clicked (human) " + a.Selector), nil +} + +// ---- dom_click_text (MUTA) ---- + +type domClickTextArgs struct { + Port int `json:"port"` + Text string `json:"text"` +} + +func domClickTextTool() mcp.Tool { + return mcp.NewTool("dom_click_text", + mcp.WithDescription("Find the first element whose visible text matches and click it."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("text", mcp.Required(), mcp.Description("Visible text to match (substring).")), + ) +} + +func (d *deps) handleDomClickText(_ context.Context, _ mcp.CallToolRequest, a domClickTextArgs) (*mcp.CallToolResult, error) { + if a.Text == "" { + return mcp.NewToolResultError("text is required"), nil + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpClickText(c, a.Text, browser.FindByTextOpts{}) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("clicked text " + a.Text), nil +} + +// ---- dom_type (MUTA) ---- + +type domTypeArgs struct { + Port int `json:"port"` + Text string `json:"text"` +} + +func domTypeTool() mcp.Tool { + return mcp.NewTool("dom_type", + mcp.WithDescription("Type text into the currently focused element (dispatches key events char by char)."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("text", mcp.Required(), mcp.Description("Text to type.")), + ) +} + +func (d *deps) handleDomType(_ context.Context, _ mcp.CallToolRequest, a domTypeArgs) (*mcp.CallToolResult, error) { + if a.Text == "" { + return mcp.NewToolResultError("text is required"), nil + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpTypeText(c, a.Text) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("typed text"), nil +} + +// ---- dom_find_by_text ---- + +type domFindByTextArgs struct { + Port int `json:"port"` + Text string `json:"text"` +} + +func domFindByTextTool() mcp.Tool { + return mcp.NewTool("dom_find_by_text", + mcp.WithDescription("Find the first element whose visible text matches and return a unique CSS selector for it (empty string if none)."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("text", mcp.Required(), mcp.Description("Visible text to match (substring).")), + ) +} + +func (d *deps) handleDomFindByText(_ context.Context, _ mcp.CallToolRequest, a domFindByTextArgs) (*mcp.CallToolResult, error) { + if a.Text == "" { + return mcp.NewToolResultError("text is required"), nil + } + var sel string + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + var e error + sel, e = browser.CdpFindByText(c, a.Text, browser.FindByTextOpts{}) + return e + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText(sel), nil +} + +// ---- dom_wait_element ---- + +type domWaitElementArgs struct { + Port int `json:"port"` + Selector string `json:"selector"` + TimeoutMs int `json:"timeout_ms"` +} + +func domWaitElementTool() mcp.Tool { + return mcp.NewTool("dom_wait_element", + mcp.WithDescription("Block until an element matching the CSS selector appears in the DOM (or timeout)."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("selector", mcp.Required(), mcp.Description("CSS selector to wait for.")), + mcp.WithNumber("timeout_ms", mcp.Description("Max wait in ms. Default 10000.")), + ) +} + +func (d *deps) handleDomWaitElement(_ context.Context, _ mcp.CallToolRequest, a domWaitElementArgs) (*mcp.CallToolResult, error) { + if a.Selector == "" { + return mcp.NewToolResultError("selector is required"), nil + } + timeout := a.TimeoutMs + if timeout <= 0 { + timeout = 10000 + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpWaitElement(c, a.Selector, time.Duration(timeout)*time.Millisecond) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("element appeared: " + a.Selector), nil +} diff --git a/tools_frames.go b/tools_frames.go new file mode 100644 index 0000000..e753ab9 --- /dev/null +++ b/tools_frames.go @@ -0,0 +1,115 @@ +package main + +import ( + "context" + "encoding/json" + + "github.com/mark3labs/mcp-go/mcp" + "github.com/mark3labs/mcp-go/server" + + "fn-registry/functions/browser" +) + +// registerFrameTools wires frame_list + frame_get_html (read) and frame_eval (MUTA). +func registerFrameTools(s *server.MCPServer, d *deps) { + s.AddTool(frameListTool(), mcp.NewTypedToolHandler(d.handleFrameList)) + s.AddTool(frameGetHTMLTool(), mcp.NewTypedToolHandler(d.handleFrameGetHTML)) + + if !d.readOnly { + s.AddTool(frameEvalTool(), mcp.NewTypedToolHandler(d.handleFrameEval)) + } +} + +// ---- frame_list ---- + +type frameListArgs struct { + Port int `json:"port"` +} + +func frameListTool() mcp.Tool { + return mcp.NewTool("frame_list", + mcp.WithDescription("List all frames (including iframes) of the current page via Page.getFrameTree. Returns JSON with frame IDs."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + ) +} + +func (d *deps) handleFrameList(_ context.Context, _ mcp.CallToolRequest, a frameListArgs) (*mcp.CallToolResult, error) { + var frames []browser.CdpFrame + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + var e error + frames, e = browser.CdpListFrames(c) + return e + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + b, _ := json.MarshalIndent(frames, "", " ") + return mcp.NewToolResultText(string(b)), nil +} + +// ---- frame_eval (MUTA) ---- + +type frameEvalArgs struct { + Port int `json:"port"` + FrameID string `json:"frame_id"` + Expression string `json:"expression"` +} + +func frameEvalTool() mcp.Tool { + return mcp.NewTool("frame_eval", + mcp.WithDescription("Evaluate a JavaScript expression inside a specific frame's execution context. Returns the stringified result."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("frame_id", mcp.Required(), mcp.Description("Frame ID (from frame_list).")), + mcp.WithString("expression", mcp.Required(), mcp.Description("JavaScript expression to evaluate.")), + ) +} + +func (d *deps) handleFrameEval(_ context.Context, _ mcp.CallToolRequest, a frameEvalArgs) (*mcp.CallToolResult, error) { + if a.FrameID == "" { + return mcp.NewToolResultError("frame_id is required"), nil + } + if a.Expression == "" { + return mcp.NewToolResultError("expression is required"), nil + } + var res string + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + var e error + res, e = browser.CdpEvalInFrame(c, a.FrameID, a.Expression) + return e + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText(truncate(res, htmlMax)), nil +} + +// ---- frame_get_html ---- + +type frameGetHTMLArgs struct { + Port int `json:"port"` + FrameID string `json:"frame_id"` +} + +func frameGetHTMLTool() mcp.Tool { + return mcp.NewTool("frame_get_html", + mcp.WithDescription("Return the serialized HTML of a specific frame. Truncated to 200000 chars."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("frame_id", mcp.Required(), mcp.Description("Frame ID (from frame_list).")), + ) +} + +func (d *deps) handleFrameGetHTML(_ context.Context, _ mcp.CallToolRequest, a frameGetHTMLArgs) (*mcp.CallToolResult, error) { + if a.FrameID == "" { + return mcp.NewToolResultError("frame_id is required"), nil + } + var html string + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + var e error + html, e = browser.CdpGetFrameHTML(c, a.FrameID) + return e + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText(truncate(html, htmlMax)), nil +} diff --git a/tools_input.go b/tools_input.go new file mode 100644 index 0000000..27df385 --- /dev/null +++ b/tools_input.go @@ -0,0 +1,110 @@ +package main + +import ( + "context" + + "github.com/mark3labs/mcp-go/mcp" + "github.com/mark3labs/mcp-go/server" + + "fn-registry/functions/browser" +) + +// registerInputTools wires press_key, scroll, handle_dialog. All MUTA. +func registerInputTools(s *server.MCPServer, d *deps) { + if d.readOnly { + return + } + s.AddTool(pressKeyTool(), mcp.NewTypedToolHandler(d.handlePressKey)) + s.AddTool(scrollTool(), mcp.NewTypedToolHandler(d.handleScroll)) + s.AddTool(handleDialogTool(), mcp.NewTypedToolHandler(d.handleHandleDialog)) +} + +// ---- press_key (MUTA) ---- + +type pressKeyArgs struct { + Port int `json:"port"` + Key string `json:"key"` +} + +func pressKeyTool() mcp.Tool { + return mcp.NewTool("press_key", + mcp.WithDescription("Press a named key (Enter, Tab, Escape, ArrowDown, Backspace, ...) on the focused element."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("key", mcp.Required(), mcp.Description("Key name, e.g. Enter, Tab, Escape, ArrowDown.")), + ) +} + +func (d *deps) handlePressKey(_ context.Context, _ mcp.CallToolRequest, a pressKeyArgs) (*mcp.CallToolResult, error) { + if a.Key == "" { + return mcp.NewToolResultError("key is required"), nil + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpPressKey(c, a.Key) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("pressed " + a.Key), nil +} + +// ---- scroll (MUTA) ---- + +type scrollArgs struct { + Port int `json:"port"` + DeltaX float64 `json:"delta_x"` + DeltaY float64 `json:"delta_y"` +} + +func scrollTool() mcp.Tool { + return mcp.NewTool("scroll", + mcp.WithDescription("Scroll the page by (delta_x, delta_y) pixels via a synthetic mouse wheel event."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithNumber("delta_x", mcp.Description("Horizontal scroll delta in pixels. Default 0.")), + mcp.WithNumber("delta_y", mcp.Description("Vertical scroll delta in pixels. Default 300.")), + ) +} + +func (d *deps) handleScroll(_ context.Context, _ mcp.CallToolRequest, a scrollArgs) (*mcp.CallToolResult, error) { + deltaY := a.DeltaY + if deltaY == 0 && a.DeltaX == 0 { + deltaY = 300 + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpScroll(c, a.DeltaX, deltaY) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("scrolled"), nil +} + +// ---- handle_dialog (MUTA) ---- + +type handleDialogArgs struct { + Port int `json:"port"` + Accept bool `json:"accept"` + PromptText string `json:"prompt_text"` +} + +func handleDialogTool() mcp.Tool { + return mcp.NewTool("handle_dialog", + mcp.WithDescription("Arm an auto-handler that responds to every JS dialog (alert/confirm/prompt/beforeunload) on the tab until disconnect. The handler lives in the pooled connection."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithBoolean("accept", mcp.DefaultBool(true), mcp.Description("Whether to accept (true) or dismiss (false) dialogs. Default true.")), + mcp.WithString("prompt_text", mcp.Description("Text to enter for prompt() dialogs.")), + ) +} + +func (d *deps) handleHandleDialog(_ context.Context, _ mcp.CallToolRequest, a handleDialogArgs) (*mcp.CallToolResult, error) { + port := portOr(a.Port) + c, err := d.pool.get(port) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + cancel, err := browser.CdpHandleDialog(c, a.Accept, a.PromptText) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + d.pool.setCancel(port, cancel) + return mcp.NewToolResultText("dialog auto-handler armed"), nil +} diff --git a/tools_nav.go b/tools_nav.go new file mode 100644 index 0000000..90b9d68 --- /dev/null +++ b/tools_nav.go @@ -0,0 +1,260 @@ +package main + +import ( + "context" + "encoding/json" + "time" + + "github.com/mark3labs/mcp-go/mcp" + "github.com/mark3labs/mcp-go/server" + + "fn-registry/functions/browser" +) + +// registerNavTools wires navigation + tab management + page-wait tools. +func registerNavTools(s *server.MCPServer, d *deps) { + // Tab tools use HTTP /json directly (no pool) — list/activate are read-only. + s.AddTool(tabListTool(), mcp.NewTypedToolHandler(d.handleTabList)) + s.AddTool(tabActivateTool(), mcp.NewTypedToolHandler(d.handleTabActivate)) + s.AddTool(pageWaitLoadTool(), mcp.NewTypedToolHandler(d.handlePageWaitLoad)) + s.AddTool(pageWaitIdleTool(), mcp.NewTypedToolHandler(d.handlePageWaitIdle)) + + if !d.readOnly { + s.AddTool(tabNavigateTool(), mcp.NewTypedToolHandler(d.handleTabNavigate)) + s.AddTool(tabNewTool(), mcp.NewTypedToolHandler(d.handleTabNew)) + s.AddTool(tabCloseTool(), mcp.NewTypedToolHandler(d.handleTabClose)) + s.AddTool(navBackTool(), mcp.NewTypedToolHandler(d.handleNavBack)) + s.AddTool(navForwardTool(), mcp.NewTypedToolHandler(d.handleNavForward)) + } +} + +// ---- tab_navigate (MUTA) ---- + +type tabNavigateArgs struct { + Port int `json:"port"` + URL string `json:"url"` +} + +func tabNavigateTool() mcp.Tool { + return mcp.NewTool("tab_navigate", + mcp.WithDescription("Navigate the connected tab to a URL via Page.navigate."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("url", mcp.Required(), mcp.Description("Target URL.")), + ) +} + +func (d *deps) handleTabNavigate(_ context.Context, _ mcp.CallToolRequest, a tabNavigateArgs) (*mcp.CallToolResult, error) { + if a.URL == "" { + return mcp.NewToolResultError("url is required"), nil + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpNavigate(c, a.URL) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("navigated to " + a.URL), nil +} + +// ---- tab_list ---- + +type tabListArgs struct { + Port int `json:"port"` +} + +func tabListTool() mcp.Tool { + return mcp.NewTool("tab_list", + mcp.WithDescription("List all CDP targets (tabs, iframes, workers) via GET /json. Returns JSON."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + ) +} + +func (d *deps) handleTabList(_ context.Context, _ mcp.CallToolRequest, a tabListArgs) (*mcp.CallToolResult, error) { + tabs, err := browser.CdpListTabs("localhost", portOr(a.Port)) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + b, _ := json.MarshalIndent(tabs, "", " ") + return mcp.NewToolResultText(string(b)), nil +} + +// ---- tab_new (MUTA) ---- + +type tabNewArgs struct { + Port int `json:"port"` + URL string `json:"url"` +} + +func tabNewTool() mcp.Tool { + return mcp.NewTool("tab_new", + mcp.WithDescription("Open a new tab via PUT /json/new. Returns the new tab's JSON."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("url", mcp.Description("Optional start URL. Empty = about:blank.")), + ) +} + +func (d *deps) handleTabNew(_ context.Context, _ mcp.CallToolRequest, a tabNewArgs) (*mcp.CallToolResult, error) { + tab, err := browser.CdpNewTab("localhost", portOr(a.Port), a.URL) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + b, _ := json.MarshalIndent(tab, "", " ") + return mcp.NewToolResultText(string(b)), nil +} + +// ---- tab_close (MUTA) ---- + +type tabCloseArgs struct { + Port int `json:"port"` + TabID string `json:"tab_id"` +} + +func tabCloseTool() mcp.Tool { + return mcp.NewTool("tab_close", + mcp.WithDescription("Close a tab by its target ID via GET /json/close/."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("tab_id", mcp.Required(), mcp.Description("Target ID of the tab to close.")), + ) +} + +func (d *deps) handleTabClose(_ context.Context, _ mcp.CallToolRequest, a tabCloseArgs) (*mcp.CallToolResult, error) { + if a.TabID == "" { + return mcp.NewToolResultError("tab_id is required"), nil + } + if err := browser.CdpCloseTab("localhost", portOr(a.Port), a.TabID); err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("closed tab " + a.TabID), nil +} + +// ---- tab_activate ---- + +type tabActivateArgs struct { + Port int `json:"port"` + TabID string `json:"tab_id"` +} + +func tabActivateTool() mcp.Tool { + return mcp.NewTool("tab_activate", + mcp.WithDescription("Bring a tab to the foreground via GET /json/activate/."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("tab_id", mcp.Required(), mcp.Description("Target ID of the tab to activate.")), + ) +} + +func (d *deps) handleTabActivate(_ context.Context, _ mcp.CallToolRequest, a tabActivateArgs) (*mcp.CallToolResult, error) { + if a.TabID == "" { + return mcp.NewToolResultError("tab_id is required"), nil + } + if err := browser.CdpActivateTab("localhost", portOr(a.Port), a.TabID); err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("activated tab " + a.TabID), nil +} + +// ---- nav_back (MUTA) ---- + +type navBackArgs struct { + Port int `json:"port"` +} + +func navBackTool() mcp.Tool { + return mcp.NewTool("nav_back", + mcp.WithDescription("Navigate back in the connected tab's history."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + ) +} + +func (d *deps) handleNavBack(_ context.Context, _ mcp.CallToolRequest, a navBackArgs) (*mcp.CallToolResult, error) { + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpNavBack(c) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("navigated back"), nil +} + +// ---- nav_forward (MUTA) ---- + +type navForwardArgs struct { + Port int `json:"port"` +} + +func navForwardTool() mcp.Tool { + return mcp.NewTool("nav_forward", + mcp.WithDescription("Navigate forward in the connected tab's history."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + ) +} + +func (d *deps) handleNavForward(_ context.Context, _ mcp.CallToolRequest, a navForwardArgs) (*mcp.CallToolResult, error) { + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpNavForward(c) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("navigated forward"), nil +} + +// ---- page_wait_load ---- + +type pageWaitLoadArgs struct { + Port int `json:"port"` + TimeoutMs int `json:"timeout_ms"` +} + +func pageWaitLoadTool() mcp.Tool { + return mcp.NewTool("page_wait_load", + mcp.WithDescription("Block until the page fires the load event (or timeout)."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithNumber("timeout_ms", mcp.Description("Max wait in ms. Default 10000.")), + ) +} + +func (d *deps) handlePageWaitLoad(_ context.Context, _ mcp.CallToolRequest, a pageWaitLoadArgs) (*mcp.CallToolResult, error) { + timeout := a.TimeoutMs + if timeout <= 0 { + timeout = 10000 + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpWaitLoad(c, time.Duration(timeout)*time.Millisecond) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("page loaded"), nil +} + +// ---- page_wait_idle ---- + +type pageWaitIdleArgs struct { + Port int `json:"port"` + TimeoutMs int `json:"timeout_ms"` +} + +func pageWaitIdleTool() mcp.Tool { + return mcp.NewTool("page_wait_idle", + mcp.WithDescription("Block until network activity quiets down (inflight requests reach 0 for a quiet window) or timeout. Immune to DOM-mutating extensions/animations."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithNumber("timeout_ms", mcp.Description("Max wait in ms. Default 15000.")), + ) +} + +func (d *deps) handlePageWaitIdle(_ context.Context, _ mcp.CallToolRequest, a pageWaitIdleArgs) (*mcp.CallToolResult, error) { + timeout := a.TimeoutMs + if timeout <= 0 { + timeout = 15000 + } + opts := browser.CdpWaitIdleOpts{ + Timeout: time.Duration(timeout) * time.Millisecond, + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpWaitIdle(c, opts) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("network idle"), nil +} diff --git a/tools_read.go b/tools_read.go new file mode 100644 index 0000000..424edc9 --- /dev/null +++ b/tools_read.go @@ -0,0 +1,110 @@ +package main + +import ( + "context" + + "github.com/mark3labs/mcp-go/mcp" + "github.com/mark3labs/mcp-go/server" + + "fn-registry/functions/browser" +) + +const htmlMax = 200_000 + +// registerReadTools wires page_get_html, page_eval_js (MUTA), page_screenshot. +func registerReadTools(s *server.MCPServer, d *deps) { + s.AddTool(pageGetHTMLTool(), mcp.NewTypedToolHandler(d.handlePageGetHTML)) + s.AddTool(pageScreenshotTool(), mcp.NewTypedToolHandler(d.handlePageScreenshot)) + + if !d.readOnly { + s.AddTool(pageEvalJSTool(), mcp.NewTypedToolHandler(d.handlePageEvalJS)) + } +} + +// ---- page_get_html ---- + +type pageGetHTMLArgs struct { + Port int `json:"port"` +} + +func pageGetHTMLTool() mcp.Tool { + return mcp.NewTool("page_get_html", + mcp.WithDescription("Return the current page's full serialized HTML (outerHTML). Truncated to 200000 chars."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + ) +} + +func (d *deps) handlePageGetHTML(_ context.Context, _ mcp.CallToolRequest, a pageGetHTMLArgs) (*mcp.CallToolResult, error) { + var html string + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + var e error + html, e = browser.CdpGetHTML(c) + return e + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText(truncate(html, htmlMax)), nil +} + +// ---- page_eval_js (MUTA) ---- + +type pageEvalJSArgs struct { + Port int `json:"port"` + Expression string `json:"expression"` +} + +func pageEvalJSTool() mcp.Tool { + return mcp.NewTool("page_eval_js", + mcp.WithDescription("Evaluate a JavaScript expression in the page context via Runtime.evaluate. Returns the stringified result."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("expression", mcp.Required(), mcp.Description("JavaScript expression to evaluate.")), + ) +} + +func (d *deps) handlePageEvalJS(_ context.Context, _ mcp.CallToolRequest, a pageEvalJSArgs) (*mcp.CallToolResult, error) { + if a.Expression == "" { + return mcp.NewToolResultError("expression is required"), nil + } + var res string + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + var e error + res, e = browser.CdpEvaluate(c, a.Expression) + return e + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText(truncate(res, htmlMax)), nil +} + +// ---- page_screenshot ---- + +type pageScreenshotArgs struct { + Port int `json:"port"` + Path string `json:"path"` + FullPage bool `json:"full_page"` +} + +func pageScreenshotTool() mcp.Tool { + return mcp.NewTool("page_screenshot", + mcp.WithDescription("Capture a screenshot of the current page and write it to a local path (.png/.jpg)."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("path", mcp.Required(), mcp.Description("Output file path (.png or .jpg).")), + mcp.WithBoolean("full_page", mcp.Description("Capture the full scroll height instead of just the viewport.")), + ) +} + +func (d *deps) handlePageScreenshot(_ context.Context, _ mcp.CallToolRequest, a pageScreenshotArgs) (*mcp.CallToolResult, error) { + if a.Path == "" { + return mcp.NewToolResultError("path is required"), nil + } + opts := browser.CdpScreenshotOpts{FullPage: a.FullPage} + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpScreenshot(c, a.Path, opts) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("screenshot saved to " + a.Path), nil +} diff --git a/tools_session.go b/tools_session.go new file mode 100644 index 0000000..e8514ef --- /dev/null +++ b/tools_session.go @@ -0,0 +1,95 @@ +package main + +import ( + "context" + "fmt" + + "github.com/mark3labs/mcp-go/mcp" + "github.com/mark3labs/mcp-go/server" + + "fn-registry/functions/browser" +) + +// registerSessionTools wires browser_launch (MUTA), browser_connect, browser_disconnect. +func registerSessionTools(s *server.MCPServer, d *deps) { + if !d.readOnly { + s.AddTool(launchTool(), mcp.NewTypedToolHandler(d.handleLaunch)) + } + s.AddTool(connectTool(), mcp.NewTypedToolHandler(d.handleConnect)) + s.AddTool(disconnectTool(), mcp.NewTypedToolHandler(d.handleDisconnect)) +} + +// ---- browser_launch (MUTA) ---- + +type launchArgs struct { + Port int `json:"port"` + Headless bool `json:"headless"` + UserDataDir string `json:"user_data_dir"` + URL string `json:"url"` +} + +func launchTool() mcp.Tool { + return mcp.NewTool("browser_launch", + mcp.WithDescription("Launch a Chrome/Chromium instance with CDP remote debugging enabled. Returns the launched PID. Waits up to 15s for the CDP port to be ready."), + mcp.WithNumber("port", mcp.Description("CDP remote debugging port. Default 9222.")), + mcp.WithBoolean("headless", mcp.Description("Run headless (--headless=new). Default false.")), + mcp.WithString("user_data_dir", mcp.Description("Chrome profile directory. Empty = /tmp/chrome-cdp-profile.")), + mcp.WithString("url", mcp.Description("Optional initial URL to open on launch.")), + ) +} + +func (d *deps) handleLaunch(_ context.Context, _ mcp.CallToolRequest, a launchArgs) (*mcp.CallToolResult, error) { + opts := browser.ChromeLaunchOpts{ + Port: portOr(a.Port), + Headless: a.Headless, + UserDataDir: a.UserDataDir, + } + if a.URL != "" { + opts.ExtraArgs = append(opts.ExtraArgs, a.URL) + } + pid, err := browser.ChromeLaunch(opts) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText(fmt.Sprintf("launched pid=%d port=%d", pid, opts.Port)), nil +} + +// ---- browser_connect ---- + +type connectArgs struct { + Port int `json:"port"` +} + +func connectTool() mcp.Tool { + return mcp.NewTool("browser_connect", + mcp.WithDescription("Open (and pool) a CDP WebSocket connection to a running Chrome's first 'page' tab on the given port. Subsequent tools reuse this live session."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + ) +} + +func (d *deps) handleConnect(_ context.Context, _ mcp.CallToolRequest, a connectArgs) (*mcp.CallToolResult, error) { + port := portOr(a.Port) + if _, err := d.pool.get(port); err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText(fmt.Sprintf("connected port=%d", port)), nil +} + +// ---- browser_disconnect ---- + +type disconnectArgs struct { + Port int `json:"port"` +} + +func disconnectTool() mcp.Tool { + return mcp.NewTool("browser_disconnect", + mcp.WithDescription("Close and drop the pooled CDP connection for the given port (cancels any armed dialog handler). Does NOT kill Chrome."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + ) +} + +func (d *deps) handleDisconnect(_ context.Context, _ mcp.CallToolRequest, a disconnectArgs) (*mcp.CallToolResult, error) { + port := portOr(a.Port) + d.pool.drop(port) + return mcp.NewToolResultText(fmt.Sprintf("disconnected port=%d", port)), nil +} diff --git a/tools_storage.go b/tools_storage.go new file mode 100644 index 0000000..fca03ce --- /dev/null +++ b/tools_storage.go @@ -0,0 +1,75 @@ +package main + +import ( + "context" + + "github.com/mark3labs/mcp-go/mcp" + "github.com/mark3labs/mcp-go/server" + + "fn-registry/functions/browser" +) + +// registerStorageTools wires storage_save (read) + storage_load (MUTA). +func registerStorageTools(s *server.MCPServer, d *deps) { + s.AddTool(storageSaveTool(), mcp.NewTypedToolHandler(d.handleStorageSave)) + + if !d.readOnly { + s.AddTool(storageLoadTool(), mcp.NewTypedToolHandler(d.handleStorageLoad)) + } +} + +// ---- storage_save ---- + +type storageSaveArgs struct { + Port int `json:"port"` + Path string `json:"path"` +} + +func storageSaveTool() mcp.Tool { + return mcp.NewTool("storage_save", + mcp.WithDescription("Save the current session storage state (cookies + localStorage) to a JSON file for later reuse."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("path", mcp.Required(), mcp.Description("Output JSON file path.")), + ) +} + +func (d *deps) handleStorageSave(_ context.Context, _ mcp.CallToolRequest, a storageSaveArgs) (*mcp.CallToolResult, error) { + if a.Path == "" { + return mcp.NewToolResultError("path is required"), nil + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpSaveStorageState(c, a.Path) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("storage state saved to " + a.Path), nil +} + +// ---- storage_load (MUTA) ---- + +type storageLoadArgs struct { + Port int `json:"port"` + Path string `json:"path"` +} + +func storageLoadTool() mcp.Tool { + return mcp.NewTool("storage_load", + mcp.WithDescription("Load a previously saved session storage state (cookies + localStorage) from a JSON file into the live browser."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), + mcp.WithString("path", mcp.Required(), mcp.Description("Input JSON file path.")), + ) +} + +func (d *deps) handleStorageLoad(_ context.Context, _ mcp.CallToolRequest, a storageLoadArgs) (*mcp.CallToolResult, error) { + if a.Path == "" { + return mcp.NewToolResultError("path is required"), nil + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpLoadStorageState(c, a.Path) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText("storage state loaded from " + a.Path), nil +}