Files
browser_mcp/tools_lifecycle.go
T
egutierrez 1fae6c1df9 feat(browser_mcp): add browser_list/launch_profile/close lifecycle tools
Three MCP tools to manage the user's Chromium instances by profile, distinct
from browser_launch's isolated automation Chrome:

- browser_list: enumerate running Chromium master processes by scanning
  /proc/*/cmdline (has --user-data-dir, no --type=). Returns pid, profile,
  user_data_dir, cdp_port, has_cdp as a JSON array.
- browser_launch_profile: launch a concrete profile using the REAL binary
  /usr/lib/chromium/chromium (bypassing the /usr/bin/chromium wrapper). No CDP
  by default so Google keeps the session for human profiles; cdp=true adds
  --remote-debugging-port + --remote-allow-origins=*. Detects DISPLAY/XAUTHORITY
  from the XFCE session and launches decoupled via setsid.
- browser_close: locate a master by profile/cdp_port/pid, SIGTERM with a 10s
  wait, then SIGKILL as a last resort.

Per-profile instances are NOT registered in the connection pool: they are
user-facing and survive the MCP dying; cleanup is explicit via browser_close.

Unit tests for cmdline master detection, flag parsing, and close-target
matching. Bumps version 0.6.0 -> 0.7.0 (42 -> 45 tools).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-10 18:23:45 +02:00

466 lines
16 KiB
Go

package main
import (
"context"
"encoding/json"
"fmt"
"net"
"os"
"os/exec"
"path/filepath"
"sort"
"strconv"
"strings"
"syscall"
"time"
"github.com/mark3labs/mcp-go/mcp"
"github.com/mark3labs/mcp-go/server"
)
// registerLifecycleTools wires the per-profile Chromium lifecycle tools:
// - browser_list (read) — enumerate running Chromium master processes.
// - browser_launch_profile (MUTA) — launch Chromium for a concrete profile, with/without CDP.
// - browser_close (MUTA) — terminate a master process (SIGTERM, then SIGKILL).
//
// These manage the USER's Chromium instances by profile (e.g. "Personal", "Work"),
// distinct from browser_launch which spins the MCP's own isolated automation Chrome.
// Because the launched instances are user-facing (not driven by the MCP), they are
// NOT registered in the connection pool: the pool's shutdown-kill is reserved for
// automation Chromes the MCP owns, so a user's "Personal" window survives the MCP
// dying. Cleanup is explicit via browser_close.
func registerLifecycleTools(s *server.MCPServer, d *deps) {
s.AddTool(browserListTool(), mcp.NewTypedToolHandler(d.handleBrowserList))
if !d.readOnly {
s.AddTool(browserLaunchProfileTool(), mcp.NewTypedToolHandler(d.handleBrowserLaunchProfile))
s.AddTool(browserCloseTool(), mcp.NewTypedToolHandler(d.handleBrowserClose))
}
}
// realChromiumBin is the REAL Chromium binary, bypassing the /usr/bin/chromium
// wrapper. The wrapper sources /etc/chromium.d/* and injects global flags
// (--user-data-dir=$HOME/.config/chromium-cdp, --remote-debugging-port=9222,
// --remote-allow-origins=*). Launching the wrapper would force CDP on every
// instance, which breaks Google's session-keeping for human profiles. The real
// binary sources none of that, so we control the flags exactly.
const realChromiumBin = "/usr/lib/chromium/chromium"
// ---- master process discovery ----
// chromiumMaster describes one running Chromium master process (the top process
// that owns a user-data-dir, NOT a zygote/gpu/renderer child which carries --type=).
type chromiumMaster struct {
PID int `json:"pid"`
Profile string `json:"profile"` // value of --profile-directory ("" if absent)
UserDataDir string `json:"user_data_dir"` // value of --user-data-dir
CDPPort string `json:"cdp_port"` // value of --remote-debugging-port ("" if none)
HasCDP bool `json:"has_cdp"`
}
// readProcCmdline reads /proc/<pid>/cmdline and splits it on NUL into argv.
// Returns nil if the process is gone or unreadable.
func readProcCmdline(pid int) []string {
b, err := os.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "cmdline"))
if err != nil || len(b) == 0 {
return nil
}
raw := strings.Split(string(b), "\x00")
args := make([]string, 0, len(raw))
for _, a := range raw {
if a != "" {
args = append(args, a)
}
}
return args
}
// flagValue returns the value of a "--name=value" flag from argv, plus whether it
// was present. Matches the exact "--name=" prefix; the first occurrence wins.
func flagValue(args []string, name string) (string, bool) {
prefix := "--" + name + "="
for _, a := range args {
if strings.HasPrefix(a, prefix) {
return strings.TrimPrefix(a, prefix), true
}
}
return "", false
}
// hasFlagPrefix reports whether any arg starts with the given prefix (e.g. "--type=").
func hasFlagPrefix(args []string, prefix string) bool {
for _, a := range args {
if strings.HasPrefix(a, prefix) {
return true
}
}
return false
}
// isChromiumExe reports whether argv[0] looks like a chromium/chrome executable.
func isChromiumExe(args []string) bool {
if len(args) == 0 {
return false
}
base := strings.ToLower(filepath.Base(args[0]))
return strings.Contains(base, "chromium") || strings.Contains(base, "chrome")
}
// parseChromiumMaster builds a chromiumMaster from argv if (and only if) the process
// is a Chromium MASTER: argv[0] is a chromium/chrome binary, it carries
// --user-data-dir, and it does NOT carry --type= (which all child processes have:
// zygote, gpu-process, renderer, utility...). Returns ok=false otherwise.
func parseChromiumMaster(pid int, args []string) (chromiumMaster, bool) {
if !isChromiumExe(args) {
return chromiumMaster{}, false
}
udd, hasUDD := flagValue(args, "user-data-dir")
if !hasUDD {
return chromiumMaster{}, false
}
if hasFlagPrefix(args, "--type=") {
return chromiumMaster{}, false // child process, not the master
}
port, hasCDP := flagValue(args, "remote-debugging-port")
return chromiumMaster{
PID: pid,
Profile: firstNonEmpty(args, "profile-directory"),
UserDataDir: udd,
CDPPort: port,
HasCDP: hasCDP,
}, true
}
// firstNonEmpty returns the flag value or "" if absent.
func firstNonEmpty(args []string, name string) string {
v, _ := flagValue(args, name)
return v
}
// listChromiumMasters walks /proc and returns every running Chromium master process,
// sorted by PID for stable output.
func listChromiumMasters() ([]chromiumMaster, error) {
entries, err := os.ReadDir("/proc")
if err != nil {
return nil, fmt.Errorf("read /proc: %w", err)
}
var masters []chromiumMaster
for _, e := range entries {
if !e.IsDir() {
continue
}
pid, err := strconv.Atoi(e.Name())
if err != nil {
continue // not a PID dir
}
args := readProcCmdline(pid)
if m, ok := parseChromiumMaster(pid, args); ok {
masters = append(masters, m)
}
}
sort.Slice(masters, func(i, j int) bool { return masters[i].PID < masters[j].PID })
return masters, nil
}
// ---- X session env detection ----
// xSessionEnv returns DISPLAY and XAUTHORITY scraped from a live XFCE session
// process. A decoupled Chromium launched from the MCP (no inherited X env) needs
// these to open a window on the user's screen. Falls back to :0 + ~/.Xauthority.
func xSessionEnv() (display, xauthority string) {
display = ":0"
if home, err := os.UserHomeDir(); err == nil {
xauthority = filepath.Join(home, ".Xauthority")
}
for _, proc := range []string{"xfwm4", "xfce4-session", "xfdesktop"} {
out, err := exec.Command("pgrep", "-x", proc).Output()
if err != nil {
continue
}
for _, line := range strings.Fields(string(out)) {
pid, err := strconv.Atoi(line)
if err != nil {
continue
}
d, x, ok := readProcEnviron(pid)
if ok {
if d != "" {
display = d
}
if x != "" {
xauthority = x
}
return display, xauthority
}
}
}
return display, xauthority
}
// readProcEnviron reads DISPLAY and XAUTHORITY from /proc/<pid>/environ (NUL-separated).
// ok is true if the environ was readable.
func readProcEnviron(pid int) (display, xauthority string, ok bool) {
b, err := os.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "environ"))
if err != nil {
return "", "", false
}
for _, kv := range strings.Split(string(b), "\x00") {
if v, found := strings.CutPrefix(kv, "DISPLAY="); found {
display = v
} else if v, found := strings.CutPrefix(kv, "XAUTHORITY="); found {
xauthority = v
}
}
return display, xauthority, true
}
// defaultProfileUserDataDir is the user's daily Chromium user-data-dir where the
// named profiles (Automation, Default, Personal, "Profile 1", osint_01) live.
func defaultProfileUserDataDir() string {
home, err := os.UserHomeDir()
if err != nil {
return ".config/chromium-cdp"
}
return filepath.Join(home, ".config", "chromium-cdp")
}
// ---- browser_list ----
type browserListArgs struct{}
func browserListTool() mcp.Tool {
return mcp.NewTool("browser_list",
mcp.WithDescription("List the running Chromium MASTER processes (one per user-data-dir master, NOT zygote/gpu/renderer children). For each: pid, profile (--profile-directory value), user_data_dir, cdp_port (--remote-debugging-port value, empty if none), has_cdp. Returns a JSON array. Read-only."),
)
}
func (d *deps) handleBrowserList(_ context.Context, _ mcp.CallToolRequest, _ browserListArgs) (*mcp.CallToolResult, error) {
masters, err := listChromiumMasters()
if err != nil {
return mcp.NewToolResultError(err.Error()), nil
}
if masters == nil {
masters = []chromiumMaster{}
}
b, _ := json.MarshalIndent(masters, "", " ")
return mcp.NewToolResultText(string(b)), nil
}
// ---- browser_launch_profile (MUTA) ----
type launchProfileArgs struct {
Profile string `json:"profile"`
UserDataDir string `json:"user_data_dir"`
URL string `json:"url"`
CDP bool `json:"cdp"`
CDPPort int `json:"cdp_port"`
}
func browserLaunchProfileTool() mcp.Tool {
return mcp.NewTool("browser_launch_profile",
mcp.WithDescription("Launch Chromium for a CONCRETE profile (e.g. \"Personal\", \"Work\") on the user's screen. Uses the REAL chromium binary (/usr/lib/chromium/chromium), bypassing the /usr/bin/chromium wrapper, so flags are controlled exactly. With cdp=false (default) NO remote-debugging flags are added — REQUIRED for human profiles where Google must keep the session (CDP makes Google treat the browser as automated and drop the login). With cdp=true adds --remote-debugging-port=<cdp_port> and --remote-allow-origins=*. Detects DISPLAY/XAUTHORITY from the XFCE session and launches DECOUPLED (setsid). If a master already owns the user_data_dir, Chromium forwards the open to it (note in the result). Returns {pid, profile, cdp, cdp_port[, note]}."),
mcp.WithString("profile", mcp.Required(), mcp.Description("Profile directory name to launch (--profile-directory value), e.g. \"Personal\", \"Default\", \"Automation\".")),
mcp.WithString("user_data_dir", mcp.Description("Chromium user-data-dir holding the profiles. Default ~/.config/chromium-cdp.")),
mcp.WithString("url", mcp.Description("Optional URL to open.")),
mcp.WithBoolean("cdp", mcp.Description("Enable CDP remote debugging. Default false. Leave false for human profiles (Google session-keeping). true only for automation.")),
mcp.WithNumber("cdp_port", mcp.Description("CDP port when cdp=true. Default 9222.")),
)
}
func (d *deps) handleBrowserLaunchProfile(_ context.Context, _ mcp.CallToolRequest, a launchProfileArgs) (*mcp.CallToolResult, error) {
if a.Profile == "" {
return mcp.NewToolResultError("profile is required"), nil
}
userDataDir := a.UserDataDir
if userDataDir == "" {
userDataDir = defaultProfileUserDataDir()
}
cdpPort := a.CDPPort
if cdpPort == 0 {
cdpPort = 9222
}
// Detect whether a master already owns this user-data-dir. If so, Chromium will
// forward the open to that master (it can't run two masters on one dir).
note := ""
if masters, err := listChromiumMasters(); err == nil {
for _, m := range masters {
if m.UserDataDir == userDataDir {
note = "forwarded to existing master"
break
}
}
}
args := []string{
"--user-data-dir=" + userDataDir,
"--profile-directory=" + a.Profile,
}
if a.CDP {
args = append(args,
fmt.Sprintf("--remote-debugging-port=%d", cdpPort),
"--remote-allow-origins=*",
)
}
if a.URL != "" {
args = append(args, a.URL)
}
display, xauthority := xSessionEnv()
cmd := exec.Command(realChromiumBin, args...)
cmd.Env = append(os.Environ(),
"DISPLAY="+display,
"XAUTHORITY="+xauthority,
)
// Decouple from the MCP: new session leader (setsid) so the child survives the
// launcher dying, and no inherited stdio (avoids the exit-144 / SIGPIPE death
// when the parent's pipes close). We Release the process: never reaped here.
cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
cmd.Stdin, cmd.Stdout, cmd.Stderr = nil, nil, nil
if err := cmd.Start(); err != nil {
return mcp.NewToolResultError(fmt.Sprintf("launch chromium: %v", err)), nil
}
pid := cmd.Process.Pid
_ = cmd.Process.Release()
// Give Chromium a moment to come up. If it forwarded to an existing master the
// child exits fast; the launched pid is still informative.
time.Sleep(1 * time.Second)
// When cdp=true, opportunistically confirm the port responds (best-effort: a
// forwarded launch may not bind the port if the master had no CDP).
if a.CDP && note == "" {
if !cdpPortResponds(cdpPort) {
note = "cdp port not confirmed listening yet"
}
}
out := map[string]any{
"pid": pid,
"profile": a.Profile,
"cdp": a.CDP,
"cdp_port": cdpPort,
}
if note != "" {
out["note"] = note
}
b, _ := json.MarshalIndent(out, "", " ")
return mcp.NewToolResultText(string(b)), nil
}
// ---- browser_close (MUTA) ----
type browserCloseArgs struct {
Profile string `json:"profile"`
CDPPort int `json:"cdp_port"`
PID int `json:"pid"`
}
func browserCloseTool() mcp.Tool {
return mcp.NewTool("browser_close",
mcp.WithDescription("Cleanly close a running Chromium master. Identify it by one of: profile (--profile-directory), cdp_port (--remote-debugging-port), or pid. Sends SIGTERM, waits up to 10s for it to die, then SIGKILL as a last resort (flagged in the result). Returns {closed, pid, method}."),
mcp.WithString("profile", mcp.Description("Match the master by --profile-directory value.")),
mcp.WithNumber("cdp_port", mcp.Description("Match the master by --remote-debugging-port value.")),
mcp.WithNumber("pid", mcp.Description("Match the master by exact PID.")),
)
}
func (d *deps) handleBrowserClose(_ context.Context, _ mcp.CallToolRequest, a browserCloseArgs) (*mcp.CallToolResult, error) {
if a.Profile == "" && a.CDPPort == 0 && a.PID == 0 {
return mcp.NewToolResultError("one of profile, cdp_port or pid is required"), nil
}
masters, err := listChromiumMasters()
if err != nil {
return mcp.NewToolResultError(err.Error()), nil
}
target, found := matchMaster(masters, a)
if !found {
return mcp.NewToolResultError("no running Chromium master matched the given criteria"), nil
}
proc, err := os.FindProcess(target.PID)
if err != nil {
return mcp.NewToolResultError(fmt.Sprintf("find process %d: %v", target.PID, err)), nil
}
method := "SIGTERM"
if err := proc.Signal(syscall.SIGTERM); err != nil {
return mcp.NewToolResultError(fmt.Sprintf("SIGTERM pid=%d: %v", target.PID, err)), nil
}
// Wait up to ~10s for the process to die (poll /proc liveness).
if !waitProcessGone(target.PID, 10*time.Second) {
method = "SIGKILL"
_ = proc.Signal(syscall.SIGKILL)
waitProcessGone(target.PID, 3*time.Second)
}
out := map[string]any{
"closed": true,
"pid": target.PID,
"method": method,
}
b, _ := json.MarshalIndent(out, "", " ")
return mcp.NewToolResultText(string(b)), nil
}
// matchMaster picks the master matching the close criteria. PID is most specific,
// then cdp_port, then profile (first match wins for the latter two).
func matchMaster(masters []chromiumMaster, a browserCloseArgs) (chromiumMaster, bool) {
if a.PID != 0 {
for _, m := range masters {
if m.PID == a.PID {
return m, true
}
}
return chromiumMaster{}, false
}
if a.CDPPort != 0 {
want := strconv.Itoa(a.CDPPort)
for _, m := range masters {
if m.CDPPort == want {
return m, true
}
}
return chromiumMaster{}, false
}
for _, m := range masters {
if m.Profile == a.Profile {
return m, true
}
}
return chromiumMaster{}, false
}
// waitProcessGone polls until the PID no longer exists in /proc or the timeout
// elapses. Returns true if the process is gone.
func waitProcessGone(pid int, timeout time.Duration) bool {
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
if !processAlive(pid) {
return true
}
time.Sleep(150 * time.Millisecond)
}
return !processAlive(pid)
}
// processAlive reports whether /proc/<pid> still exists.
func processAlive(pid int) bool {
_, err := os.Stat(filepath.Join("/proc", strconv.Itoa(pid)))
return err == nil
}
// cdpPortResponds reports whether something is listening on the CDP port on
// 127.0.0.1. Single TCP dial with a short timeout; best-effort confirmation only.
func cdpPortResponds(port int) bool {
addr := net.JoinHostPort("127.0.0.1", strconv.Itoa(port))
conn, err := net.DialTimeout("tcp", addr, 300*time.Millisecond)
if err != nil {
return false
}
conn.Close()
return true
}