Files
browser_mcp/tools_lifecycle.go
T
egutierrez fa1efe6fd5 feat: modo de velocidad de sesión (browser_set_mode) + acciones más rápidas en auto
Añade un flag de velocidad por sesión para que el manejo del navegador sea muy rápido por defecto, conservando un modo sigiloso para cuando haya detección anti-bot fuerte.

- Nueva tool browser_set_mode (tools_session.go): fija el modo de la sesión por puerto en el pool. 'auto' (default del MCP) = rápido; 'human' = sigiloso anti-detección; también admite 'fast'/'instant'. Cada tool de acción puede overridearlo con su arg mode.
- pool.go: estado de modo por puerto (modes map + setMode/getMode), limpiado en drop y closeAll.
- tools_dom.go: effectiveMode resuelve el modo (arg de la llamada > modo de sesión > 'auto'). settleForMode reemplaza el sleep ciego fijo de 400ms tras cada acción mutante: 60ms en auto/fast, aleatorio 250-650ms en human (ritmo no-máquina), 0 en instant. dom_type_ref gana arg mode y rutea a CdpTypeRefFast (insertText, un round-trip) en auto o CdpTypeRef (carácter a carácter) en human. Descripciones del arg mode actualizadas (el default ya no es human).
- tools_lifecycle.go: browser_launch_profile reemplaza el sleep(1s) ciego por un poll del puerto CDP (waitCDPPort).
- .gitignore: ignora registry.db/operations.db (no deben vivir en la app; regla db_locations).

Doctrina invertida respecto a la anterior 'humanizado siempre': ahora rápido por defecto, sigiloso bajo demanda.
2026-06-13 14:27:56 +02:00

479 lines
16 KiB
Go

package main
import (
"context"
"encoding/json"
"fmt"
"net"
"os"
"os/exec"
"path/filepath"
"sort"
"strconv"
"strings"
"syscall"
"time"
"github.com/mark3labs/mcp-go/mcp"
"github.com/mark3labs/mcp-go/server"
)
// registerLifecycleTools wires the per-profile Chromium lifecycle tools:
// - browser_list (read) — enumerate running Chromium master processes.
// - browser_launch_profile (MUTA) — launch Chromium for a concrete profile, with/without CDP.
// - browser_close (MUTA) — terminate a master process (SIGTERM, then SIGKILL).
//
// These manage the USER's Chromium instances by profile (e.g. "Personal", "Work"),
// distinct from browser_launch which spins the MCP's own isolated automation Chrome.
// Because the launched instances are user-facing (not driven by the MCP), they are
// NOT registered in the connection pool: the pool's shutdown-kill is reserved for
// automation Chromes the MCP owns, so a user's "Personal" window survives the MCP
// dying. Cleanup is explicit via browser_close.
func registerLifecycleTools(s *server.MCPServer, d *deps) {
s.AddTool(browserListTool(), mcp.NewTypedToolHandler(d.handleBrowserList))
if !d.readOnly {
s.AddTool(browserLaunchProfileTool(), mcp.NewTypedToolHandler(d.handleBrowserLaunchProfile))
s.AddTool(browserCloseTool(), mcp.NewTypedToolHandler(d.handleBrowserClose))
}
}
// realChromiumBin is the REAL Chromium binary, bypassing the /usr/bin/chromium
// wrapper. The wrapper sources /etc/chromium.d/* and injects global flags
// (--user-data-dir=$HOME/.config/chromium-cdp, --remote-debugging-port=9222,
// --remote-allow-origins=*). Launching the wrapper would force CDP on every
// instance, which breaks Google's session-keeping for human profiles. The real
// binary sources none of that, so we control the flags exactly.
const realChromiumBin = "/usr/lib/chromium/chromium"
// ---- master process discovery ----
// chromiumMaster describes one running Chromium master process (the top process
// that owns a user-data-dir, NOT a zygote/gpu/renderer child which carries --type=).
type chromiumMaster struct {
PID int `json:"pid"`
Profile string `json:"profile"` // value of --profile-directory ("" if absent)
UserDataDir string `json:"user_data_dir"` // value of --user-data-dir
CDPPort string `json:"cdp_port"` // value of --remote-debugging-port ("" if none)
HasCDP bool `json:"has_cdp"`
}
// readProcCmdline reads /proc/<pid>/cmdline and splits it on NUL into argv.
// Returns nil if the process is gone or unreadable.
func readProcCmdline(pid int) []string {
b, err := os.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "cmdline"))
if err != nil || len(b) == 0 {
return nil
}
raw := strings.Split(string(b), "\x00")
args := make([]string, 0, len(raw))
for _, a := range raw {
if a != "" {
args = append(args, a)
}
}
return args
}
// flagValue returns the value of a "--name=value" flag from argv, plus whether it
// was present. Matches the exact "--name=" prefix; the first occurrence wins.
func flagValue(args []string, name string) (string, bool) {
prefix := "--" + name + "="
for _, a := range args {
if strings.HasPrefix(a, prefix) {
return strings.TrimPrefix(a, prefix), true
}
}
return "", false
}
// hasFlagPrefix reports whether any arg starts with the given prefix (e.g. "--type=").
func hasFlagPrefix(args []string, prefix string) bool {
for _, a := range args {
if strings.HasPrefix(a, prefix) {
return true
}
}
return false
}
// isChromiumExe reports whether argv[0] looks like a chromium/chrome executable.
func isChromiumExe(args []string) bool {
if len(args) == 0 {
return false
}
base := strings.ToLower(filepath.Base(args[0]))
return strings.Contains(base, "chromium") || strings.Contains(base, "chrome")
}
// parseChromiumMaster builds a chromiumMaster from argv if (and only if) the process
// is a Chromium MASTER: argv[0] is a chromium/chrome binary, it carries
// --user-data-dir, and it does NOT carry --type= (which all child processes have:
// zygote, gpu-process, renderer, utility...). Returns ok=false otherwise.
func parseChromiumMaster(pid int, args []string) (chromiumMaster, bool) {
if !isChromiumExe(args) {
return chromiumMaster{}, false
}
udd, hasUDD := flagValue(args, "user-data-dir")
if !hasUDD {
return chromiumMaster{}, false
}
if hasFlagPrefix(args, "--type=") {
return chromiumMaster{}, false // child process, not the master
}
port, hasCDP := flagValue(args, "remote-debugging-port")
return chromiumMaster{
PID: pid,
Profile: firstNonEmpty(args, "profile-directory"),
UserDataDir: udd,
CDPPort: port,
HasCDP: hasCDP,
}, true
}
// firstNonEmpty returns the flag value or "" if absent.
func firstNonEmpty(args []string, name string) string {
v, _ := flagValue(args, name)
return v
}
// listChromiumMasters walks /proc and returns every running Chromium master process,
// sorted by PID for stable output.
func listChromiumMasters() ([]chromiumMaster, error) {
entries, err := os.ReadDir("/proc")
if err != nil {
return nil, fmt.Errorf("read /proc: %w", err)
}
var masters []chromiumMaster
for _, e := range entries {
if !e.IsDir() {
continue
}
pid, err := strconv.Atoi(e.Name())
if err != nil {
continue // not a PID dir
}
args := readProcCmdline(pid)
if m, ok := parseChromiumMaster(pid, args); ok {
masters = append(masters, m)
}
}
sort.Slice(masters, func(i, j int) bool { return masters[i].PID < masters[j].PID })
return masters, nil
}
// ---- X session env detection ----
// xSessionEnv returns DISPLAY and XAUTHORITY scraped from a live XFCE session
// process. A decoupled Chromium launched from the MCP (no inherited X env) needs
// these to open a window on the user's screen. Falls back to :0 + ~/.Xauthority.
func xSessionEnv() (display, xauthority string) {
display = ":0"
if home, err := os.UserHomeDir(); err == nil {
xauthority = filepath.Join(home, ".Xauthority")
}
for _, proc := range []string{"xfwm4", "xfce4-session", "xfdesktop"} {
out, err := exec.Command("pgrep", "-x", proc).Output()
if err != nil {
continue
}
for _, line := range strings.Fields(string(out)) {
pid, err := strconv.Atoi(line)
if err != nil {
continue
}
d, x, ok := readProcEnviron(pid)
if ok {
if d != "" {
display = d
}
if x != "" {
xauthority = x
}
return display, xauthority
}
}
}
return display, xauthority
}
// readProcEnviron reads DISPLAY and XAUTHORITY from /proc/<pid>/environ (NUL-separated).
// ok is true if the environ was readable.
func readProcEnviron(pid int) (display, xauthority string, ok bool) {
b, err := os.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "environ"))
if err != nil {
return "", "", false
}
for _, kv := range strings.Split(string(b), "\x00") {
if v, found := strings.CutPrefix(kv, "DISPLAY="); found {
display = v
} else if v, found := strings.CutPrefix(kv, "XAUTHORITY="); found {
xauthority = v
}
}
return display, xauthority, true
}
// defaultProfileUserDataDir is the user's daily Chromium user-data-dir where the
// named profiles (Automation, Default, Personal, "Profile 1", osint_01) live.
func defaultProfileUserDataDir() string {
home, err := os.UserHomeDir()
if err != nil {
return ".config/chromium-cdp"
}
return filepath.Join(home, ".config", "chromium-cdp")
}
// ---- browser_list ----
type browserListArgs struct{}
func browserListTool() mcp.Tool {
return mcp.NewTool("browser_list",
mcp.WithDescription("List the running Chromium MASTER processes (one per user-data-dir master, NOT zygote/gpu/renderer children). For each: pid, profile (--profile-directory value), user_data_dir, cdp_port (--remote-debugging-port value, empty if none), has_cdp. Returns a JSON array. Read-only."),
)
}
func (d *deps) handleBrowserList(_ context.Context, _ mcp.CallToolRequest, _ browserListArgs) (*mcp.CallToolResult, error) {
masters, err := listChromiumMasters()
if err != nil {
return mcp.NewToolResultError(err.Error()), nil
}
if masters == nil {
masters = []chromiumMaster{}
}
b, _ := json.MarshalIndent(masters, "", " ")
return mcp.NewToolResultText(string(b)), nil
}
// ---- browser_launch_profile (MUTA) ----
type launchProfileArgs struct {
Profile string `json:"profile"`
UserDataDir string `json:"user_data_dir"`
URL string `json:"url"`
CDP bool `json:"cdp"`
CDPPort int `json:"cdp_port"`
}
func browserLaunchProfileTool() mcp.Tool {
return mcp.NewTool("browser_launch_profile",
mcp.WithDescription("Launch Chromium for a CONCRETE profile (e.g. \"Personal\", \"Work\") on the user's screen. Uses the REAL chromium binary (/usr/lib/chromium/chromium), bypassing the /usr/bin/chromium wrapper, so flags are controlled exactly. With cdp=false (default) NO remote-debugging flags are added — REQUIRED for human profiles where Google must keep the session (CDP makes Google treat the browser as automated and drop the login). With cdp=true adds --remote-debugging-port=<cdp_port> and --remote-allow-origins=*. Detects DISPLAY/XAUTHORITY from the XFCE session and launches DECOUPLED (setsid). If a master already owns the user_data_dir, Chromium forwards the open to it (note in the result). Returns {pid, profile, cdp, cdp_port[, note]}."),
mcp.WithString("profile", mcp.Required(), mcp.Description("Profile directory name to launch (--profile-directory value), e.g. \"Personal\", \"Default\", \"Automation\".")),
mcp.WithString("user_data_dir", mcp.Description("Chromium user-data-dir holding the profiles. Default ~/.config/chromium-cdp.")),
mcp.WithString("url", mcp.Description("Optional URL to open.")),
mcp.WithBoolean("cdp", mcp.Description("Enable CDP remote debugging. Default false. Leave false for human profiles (Google session-keeping). true only for automation.")),
mcp.WithNumber("cdp_port", mcp.Description("CDP port when cdp=true. Default 9222.")),
)
}
func (d *deps) handleBrowserLaunchProfile(_ context.Context, _ mcp.CallToolRequest, a launchProfileArgs) (*mcp.CallToolResult, error) {
if a.Profile == "" {
return mcp.NewToolResultError("profile is required"), nil
}
userDataDir := a.UserDataDir
if userDataDir == "" {
userDataDir = defaultProfileUserDataDir()
}
cdpPort := a.CDPPort
if cdpPort == 0 {
cdpPort = 9222
}
// Detect whether a master already owns this user-data-dir. If so, Chromium will
// forward the open to that master (it can't run two masters on one dir).
note := ""
if masters, err := listChromiumMasters(); err == nil {
for _, m := range masters {
if m.UserDataDir == userDataDir {
note = "forwarded to existing master"
break
}
}
}
args := []string{
"--user-data-dir=" + userDataDir,
"--profile-directory=" + a.Profile,
}
if a.CDP {
args = append(args,
fmt.Sprintf("--remote-debugging-port=%d", cdpPort),
"--remote-allow-origins=*",
)
}
if a.URL != "" {
args = append(args, a.URL)
}
display, xauthority := xSessionEnv()
cmd := exec.Command(realChromiumBin, args...)
cmd.Env = append(os.Environ(),
"DISPLAY="+display,
"XAUTHORITY="+xauthority,
)
// Decouple from the MCP: new session leader (setsid) so the child survives the
// launcher dying, and no inherited stdio (avoids the exit-144 / SIGPIPE death
// when the parent's pipes close). We Release the process: never reaped here.
cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
cmd.Stdin, cmd.Stdout, cmd.Stderr = nil, nil, nil
if err := cmd.Start(); err != nil {
return mcp.NewToolResultError(fmt.Sprintf("launch chromium: %v", err)), nil
}
pid := cmd.Process.Pid
_ = cmd.Process.Release()
// Give Chromium a moment to come up. With CDP we poll the port instead of a
// blind 1s sleep: we return as soon as it responds (best-effort: a forwarded
// launch may not bind the port if the master had no CDP). Without CDP there's
// no port to poll, so we give the window a short margin to appear / forward.
if a.CDP && note == "" {
if !waitCDPPort(cdpPort, 5*time.Second) {
note = "cdp port not confirmed listening yet"
}
} else {
time.Sleep(300 * time.Millisecond)
}
out := map[string]any{
"pid": pid,
"profile": a.Profile,
"cdp": a.CDP,
"cdp_port": cdpPort,
}
if note != "" {
out["note"] = note
}
b, _ := json.MarshalIndent(out, "", " ")
return mcp.NewToolResultText(string(b)), nil
}
// ---- browser_close (MUTA) ----
type browserCloseArgs struct {
Profile string `json:"profile"`
CDPPort int `json:"cdp_port"`
PID int `json:"pid"`
}
func browserCloseTool() mcp.Tool {
return mcp.NewTool("browser_close",
mcp.WithDescription("Cleanly close a running Chromium master. Identify it by one of: profile (--profile-directory), cdp_port (--remote-debugging-port), or pid. Sends SIGTERM, waits up to 10s for it to die, then SIGKILL as a last resort (flagged in the result). Returns {closed, pid, method}."),
mcp.WithString("profile", mcp.Description("Match the master by --profile-directory value.")),
mcp.WithNumber("cdp_port", mcp.Description("Match the master by --remote-debugging-port value.")),
mcp.WithNumber("pid", mcp.Description("Match the master by exact PID.")),
)
}
func (d *deps) handleBrowserClose(_ context.Context, _ mcp.CallToolRequest, a browserCloseArgs) (*mcp.CallToolResult, error) {
if a.Profile == "" && a.CDPPort == 0 && a.PID == 0 {
return mcp.NewToolResultError("one of profile, cdp_port or pid is required"), nil
}
masters, err := listChromiumMasters()
if err != nil {
return mcp.NewToolResultError(err.Error()), nil
}
target, found := matchMaster(masters, a)
if !found {
return mcp.NewToolResultError("no running Chromium master matched the given criteria"), nil
}
proc, err := os.FindProcess(target.PID)
if err != nil {
return mcp.NewToolResultError(fmt.Sprintf("find process %d: %v", target.PID, err)), nil
}
method := "SIGTERM"
if err := proc.Signal(syscall.SIGTERM); err != nil {
return mcp.NewToolResultError(fmt.Sprintf("SIGTERM pid=%d: %v", target.PID, err)), nil
}
// Wait up to ~10s for the process to die (poll /proc liveness).
if !waitProcessGone(target.PID, 10*time.Second) {
method = "SIGKILL"
_ = proc.Signal(syscall.SIGKILL)
waitProcessGone(target.PID, 3*time.Second)
}
out := map[string]any{
"closed": true,
"pid": target.PID,
"method": method,
}
b, _ := json.MarshalIndent(out, "", " ")
return mcp.NewToolResultText(string(b)), nil
}
// matchMaster picks the master matching the close criteria. PID is most specific,
// then cdp_port, then profile (first match wins for the latter two).
func matchMaster(masters []chromiumMaster, a browserCloseArgs) (chromiumMaster, bool) {
if a.PID != 0 {
for _, m := range masters {
if m.PID == a.PID {
return m, true
}
}
return chromiumMaster{}, false
}
if a.CDPPort != 0 {
want := strconv.Itoa(a.CDPPort)
for _, m := range masters {
if m.CDPPort == want {
return m, true
}
}
return chromiumMaster{}, false
}
for _, m := range masters {
if m.Profile == a.Profile {
return m, true
}
}
return chromiumMaster{}, false
}
// waitProcessGone polls until the PID no longer exists in /proc or the timeout
// elapses. Returns true if the process is gone.
func waitProcessGone(pid int, timeout time.Duration) bool {
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
if !processAlive(pid) {
return true
}
time.Sleep(150 * time.Millisecond)
}
return !processAlive(pid)
}
// processAlive reports whether /proc/<pid> still exists.
func processAlive(pid int) bool {
_, err := os.Stat(filepath.Join("/proc", strconv.Itoa(pid)))
return err == nil
}
// waitCDPPort polls the CDP port until it accepts a TCP connection or the timeout
// elapses. Replaces a blind sleep: returns as soon as Chromium binds the port.
func waitCDPPort(port int, timeout time.Duration) bool {
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
if cdpPortResponds(port) {
return true
}
time.Sleep(100 * time.Millisecond)
}
return cdpPortResponds(port)
}
// cdpPortResponds reports whether something is listening on the CDP port on
// 127.0.0.1. Single TCP dial with a short timeout; best-effort confirmation only.
func cdpPortResponds(port int) bool {
addr := net.JoinHostPort("127.0.0.1", strconv.Itoa(port))
conn, err := net.DialTimeout("tcp", addr, 300*time.Millisecond)
if err != nil {
return false
}
conn.Close()
return true
}