Files
browser_mcp/main.go
T
egutierrez 1fae6c1df9 feat(browser_mcp): add browser_list/launch_profile/close lifecycle tools
Three MCP tools to manage the user's Chromium instances by profile, distinct
from browser_launch's isolated automation Chrome:

- browser_list: enumerate running Chromium master processes by scanning
  /proc/*/cmdline (has --user-data-dir, no --type=). Returns pid, profile,
  user_data_dir, cdp_port, has_cdp as a JSON array.
- browser_launch_profile: launch a concrete profile using the REAL binary
  /usr/lib/chromium/chromium (bypassing the /usr/bin/chromium wrapper). No CDP
  by default so Google keeps the session for human profiles; cdp=true adds
  --remote-debugging-port + --remote-allow-origins=*. Detects DISPLAY/XAUTHORITY
  from the XFCE session and launches decoupled via setsid.
- browser_close: locate a master by profile/cdp_port/pid, SIGTERM with a 10s
  wait, then SIGKILL as a last resort.

Per-profile instances are NOT registered in the connection pool: they are
user-facing and survive the MCP dying; cleanup is explicit via browser_close.

Unit tests for cmdline master detection, flag parsing, and close-target
matching. Bumps version 0.6.0 -> 0.7.0 (42 -> 45 tools).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-10 18:23:45 +02:00

201 lines
5.2 KiB
Go

package main
import (
"flag"
"fmt"
"log/slog"
"net/http"
"os"
"os/signal"
"strings"
"syscall"
"github.com/mark3labs/mcp-go/server"
"fn-registry/functions/browser"
)
const version = "0.7.0"
type config struct {
httpAddr string
bind string
readOnly bool
logLevel string
}
// deps carries shared state into tool handlers.
type deps struct {
pool *connPool
readOnly bool
}
func main() {
var cfg config
flag.StringVar(&cfg.httpAddr, "http", "", "Listen on HTTP address (e.g. :7740). Empty = stdio.")
flag.StringVar(&cfg.bind, "bind", "127.0.0.1", "HTTP bind address. Use 0.0.0.0 only with REGISTRY_API_TOKEN set.")
flag.BoolVar(&cfg.readOnly, "read-only", false, "Register only read tools (no mutating browser actions).")
flag.StringVar(&cfg.logLevel, "log-level", "info", "Log level: debug, info, warn, error.")
flag.Parse()
// Slog → stderr (stdio JSON-RPC owns stdout).
lvl := parseLevel(cfg.logLevel)
slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: lvl})))
pool := newConnPool()
// Cierre por EOF de stdio (ServeStdio retorna) o salida normal de serveHTTP.
defer pool.closeAll()
// Cierre por señal: SIGTERM/SIGINT NO ejecutan defers, así que matamos los
// Chrome propios explícitamente antes de salir. Sin esto, al matar el MCP los
// chromium lanzados quedaban vivos y huérfanos (~789 MiB RSS cada uno) — el
// leak que provocó el apagón por saturación de RAM (06/06/2026).
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
go func() {
sig := <-sigCh
slog.Info("signal received, killing launched chromes", "signal", sig.String())
pool.closeAll()
os.Exit(0)
}()
d := &deps{pool: pool, readOnly: cfg.readOnly}
srv := server.NewMCPServer(
"browser_mcp",
version,
server.WithToolCapabilities(true),
)
registerTools(srv, d)
slog.Info("starting browser_mcp",
"version", version,
"transport", transportLabel(cfg),
"read_only", cfg.readOnly,
)
if cfg.httpAddr == "" {
if err := server.ServeStdio(srv); err != nil {
slog.Error("stdio server", "err", err)
os.Exit(1)
}
return
}
if err := serveHTTP(srv, cfg); err != nil {
slog.Error("http server", "err", err)
os.Exit(1)
}
}
// registerTools wires every tool group. Mutating tools are skipped under --read-only.
func registerTools(s *server.MCPServer, d *deps) {
registerSessionTools(s, d)
registerLifecycleTools(s, d)
registerNavTools(s, d)
registerReadTools(s, d)
registerDomTools(s, d)
registerInputTools(s, d)
registerCookieTools(s, d)
registerFrameTools(s, d)
registerStorageTools(s, d)
}
// portOr returns the CDP port, defaulting to 9333 when zero.
//
// SECURITY (P0.3): the default is 9333 — the MCP's OWN isolated Chrome — NOT
// 9222. Port 9222 is the user's daily chromium (CDP enabled globally via
// /etc/chromium.d/cdp). Defaulting there would let the agent drive the user's
// banking/email tabs. The MCP operates on its dedicated browser by default;
// pass port=9222 explicitly only to deliberately attach to the daily browser.
func portOr(p int) int {
if p == 0 {
return 9333
}
return p
}
// withConn obtiene la conexión del puerto y ejecuta fn. Si falla con error de
// conexión muerta, descarta y reintenta UNA vez (Chrome pudo cerrar la tab).
func (d *deps) withConn(port int, fn func(c *browser.CDPConn) error) error {
c, err := d.pool.get(port)
if err != nil {
return err
}
err = fn(c)
if err != nil && isConnErr(err) {
// La conexión murió (Chrome pudo cerrar la tab). Soltamos SOLO el
// WebSocket y reconectamos al mismo Chrome — releaseConn, no drop: drop
// mataría el proceso y dejaría sin nada a qué reconectar.
d.pool.releaseConn(port)
c2, err2 := d.pool.get(port)
if err2 != nil {
return err2
}
return fn(c2)
}
return err
}
// serveHTTP hosts the MCP server over Streamable HTTP with optional bearer auth.
func serveHTTP(s *server.MCPServer, cfg config) error {
addr := cfg.bind + cfg.httpAddr
httpSrv := server.NewStreamableHTTPServer(s)
token := os.Getenv("REGISTRY_API_TOKEN")
if cfg.bind == "0.0.0.0" && token == "" {
return fmt.Errorf("--bind 0.0.0.0 requires REGISTRY_API_TOKEN")
}
mux := http.NewServeMux()
if token != "" {
mux.Handle("/", authMiddleware(token, httpSrv))
} else {
mux.Handle("/", httpSrv)
}
slog.Info("listening http", "addr", addr)
return http.ListenAndServe(addr, mux)
}
func authMiddleware(token string, next http.Handler) http.Handler {
expected := "Bearer " + token
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Header.Get("Authorization") != expected {
http.Error(w, "unauthorized", http.StatusUnauthorized)
return
}
next.ServeHTTP(w, r)
})
}
func transportLabel(cfg config) string {
if cfg.httpAddr == "" {
return "stdio"
}
return fmt.Sprintf("http %s%s", cfg.bind, cfg.httpAddr)
}
func parseLevel(s string) slog.Level {
switch strings.ToLower(s) {
case "debug":
return slog.LevelDebug
case "warn":
return slog.LevelWarn
case "error":
return slog.LevelError
default:
return slog.LevelInfo
}
}
// truncate caps a string at n chars, appending a marker when cut.
func truncate(s string, n int) string {
if len(s) <= n {
return s
}
return s[:n] + "\n... [truncated]"
}