3b68c02b25
4 tools nuevas, wrappers de las primitivas CDP recien creadas: - dom_find_by_role: localizar por rol ARIA + accessible name (getByRole), devuelve #ref - dom_wait_actionable: visible+stable+enabled+hit-test antes de click (anti-overlay) - dom_select_dropdown: desplegables custom (combobox/MUI/select2/headlessui) - dom_fill: rellenar inputs React/Vue de forma fiable (reemplaza, no concatena) Total tools: 50 -> 54. uses_functions del app.md actualizado. Smoke real (Chrome headless 9333) verde para las 4. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
634 lines
28 KiB
Go
634 lines
28 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"math/rand"
|
|
"time"
|
|
|
|
"github.com/mark3labs/mcp-go/mcp"
|
|
"github.com/mark3labs/mcp-go/server"
|
|
|
|
"fn-registry/functions/browser"
|
|
)
|
|
|
|
// registerDomTools wires DOM interaction tools. find/wait stay on under --read-only.
|
|
func registerDomTools(s *server.MCPServer, d *deps) {
|
|
s.AddTool(domFindByTextTool(), mcp.NewTypedToolHandler(d.handleDomFindByText))
|
|
s.AddTool(domFindRefByTextTool(), mcp.NewTypedToolHandler(d.handleDomFindRefByText))
|
|
s.AddTool(domWaitElementTool(), mcp.NewTypedToolHandler(d.handleDomWaitElement))
|
|
s.AddTool(domFindByRoleTool(), mcp.NewTypedToolHandler(d.handleDomFindByRole))
|
|
s.AddTool(domWaitActionableTool(), mcp.NewTypedToolHandler(d.handleDomWaitActionable))
|
|
|
|
if !d.readOnly {
|
|
s.AddTool(domClickTool(), mcp.NewTypedToolHandler(d.handleDomClick))
|
|
s.AddTool(domClickHumanTool(), mcp.NewTypedToolHandler(d.handleDomClickHuman))
|
|
s.AddTool(domClickTextTool(), mcp.NewTypedToolHandler(d.handleDomClickText))
|
|
s.AddTool(domTypeTool(), mcp.NewTypedToolHandler(d.handleDomType))
|
|
s.AddTool(domClickRefTool(), mcp.NewTypedToolHandler(d.handleDomClickRef))
|
|
s.AddTool(domTypeRefTool(), mcp.NewTypedToolHandler(d.handleDomTypeRef))
|
|
s.AddTool(domHoverRefTool(), mcp.NewTypedToolHandler(d.handleDomHoverRef))
|
|
s.AddTool(domClickXYTool(), mcp.NewTypedToolHandler(d.handleDomClickXY))
|
|
s.AddTool(domSelectOptionTool(), mcp.NewTypedToolHandler(d.handleDomSelectOption))
|
|
s.AddTool(domSetFilesTool(), mcp.NewTypedToolHandler(d.handleDomSetFiles))
|
|
s.AddTool(domSelectDropdownTool(), mcp.NewTypedToolHandler(d.handleDomSelectDropdown))
|
|
s.AddTool(domFillTool(), mcp.NewTypedToolHandler(d.handleDomFill))
|
|
}
|
|
}
|
|
|
|
// ---- dom_find_by_role ----
|
|
|
|
type domFindByRoleArgs struct {
|
|
Port int `json:"port"`
|
|
Role string `json:"role"`
|
|
Name string `json:"name"`
|
|
Exact bool `json:"exact"`
|
|
Regex bool `json:"regex"`
|
|
}
|
|
|
|
func domFindByRoleTool() mcp.Tool {
|
|
return mcp.NewTool("dom_find_by_role",
|
|
mcp.WithDescription("Find an element by ARIA role + accessible name (like Playwright getByRole), reusing the accessibility tree. Returns its #ref (usable with dom_click_ref/dom_hover_ref/dom_type_ref) and how many elements matched (count>1 means ambiguous). More robust to DOM/CSS changes than CSS or text selectors — prefer it to move around the page."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
|
|
mcp.WithString("role", mcp.Required(), mcp.Description("ARIA role, e.g. button, link, textbox, checkbox, combobox, option, tab.")),
|
|
mcp.WithString("name", mcp.Description("Accessible name to match (computed, not innerText). Empty = match any element of that role.")),
|
|
mcp.WithBoolean("exact", mcp.Description("Exact name match instead of substring. Default false (substring).")),
|
|
mcp.WithBoolean("regex", mcp.Description("Treat name as a regular expression. Takes precedence over exact.")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handleDomFindByRole(_ context.Context, _ mcp.CallToolRequest, a domFindByRoleArgs) (*mcp.CallToolResult, error) {
|
|
if a.Role == "" {
|
|
return mcp.NewToolResultError("role is required"), nil
|
|
}
|
|
var ref, count int
|
|
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
|
|
var e error
|
|
ref, count, e = browser.CdpFindByRole(c, a.Role, browser.CdpFindByRoleOpts{Name: a.Name, Exact: a.Exact, Regex: a.Regex})
|
|
return e
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
return mcp.NewToolResultText(fmt.Sprintf(`{"ref":%d,"count":%d}`, ref, count)), nil
|
|
}
|
|
|
|
// ---- dom_wait_actionable ----
|
|
|
|
type domWaitActionableArgs struct {
|
|
Port int `json:"port"`
|
|
Ref int `json:"ref"`
|
|
NeedEnabled bool `json:"need_enabled"`
|
|
TimeoutMs int `json:"timeout_ms"`
|
|
}
|
|
|
|
func domWaitActionableTool() mcp.Tool {
|
|
return mcp.NewTool("dom_wait_actionable",
|
|
mcp.WithDescription("Wait until a #ref element is truly actionable before clicking: visible + stable (not animating) + optionally enabled + hit-test passes (no overlay/cookie-banner intercepting the click point). Returns the validated center point {x,y}. Use it before dom_click_xy when a click seems to do nothing — it catches the #1 cause: an overlay swallowing the click, or the element still mounting/animating."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
|
|
mcp.WithNumber("ref", mcp.Required(), mcp.Description("#ref (backend node id) from page_perceive / dom_find_*.")),
|
|
mcp.WithBoolean("need_enabled", mcp.Description("Also require the element not be disabled/aria-disabled. Default false.")),
|
|
mcp.WithNumber("timeout_ms", mcp.Description("Max wait in milliseconds. Default 3000.")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handleDomWaitActionable(_ context.Context, _ mcp.CallToolRequest, a domWaitActionableArgs) (*mcp.CallToolResult, error) {
|
|
if a.Ref == 0 {
|
|
return mcp.NewToolResultError("ref is required"), nil
|
|
}
|
|
timeout := time.Duration(a.TimeoutMs) * time.Millisecond
|
|
if a.TimeoutMs == 0 {
|
|
timeout = 3 * time.Second
|
|
}
|
|
var x, y float64
|
|
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
|
|
var e error
|
|
x, y, e = browser.CdpWaitActionable(c, a.Ref, a.NeedEnabled, timeout)
|
|
return e
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
return mcp.NewToolResultText(fmt.Sprintf(`{"actionable":true,"x":%.1f,"y":%.1f}`, x, y)), nil
|
|
}
|
|
|
|
// ---- dom_select_dropdown (MUTA) ----
|
|
|
|
type domSelectDropdownArgs struct {
|
|
Port int `json:"port"`
|
|
Trigger string `json:"trigger"`
|
|
Option string `json:"option"`
|
|
Exact bool `json:"exact"`
|
|
TimeoutMs int `json:"timeout_ms"`
|
|
OptionRole string `json:"option_role"`
|
|
}
|
|
|
|
func domSelectDropdownTool() mcp.Tool {
|
|
return mcp.NewTool("dom_select_dropdown",
|
|
mcp.WithDescription("Select an option in a CUSTOM dropdown (combobox/listbox built with divs — MUI, react-select, headlessui, select2), NOT a native <select>. Clicks the trigger, waits for the list to actually open (aria-expanded / visible [role=option]), then real-clicks the matching option. For native <select> use dom_select_option instead."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
|
|
mcp.WithString("trigger", mcp.Required(), mcp.Description("CSS selector of the element that opens the dropdown.")),
|
|
mcp.WithString("option", mcp.Required(), mcp.Description("Visible text of the option to pick.")),
|
|
mcp.WithBoolean("exact", mcp.Description("Exact option text match instead of substring. Default false.")),
|
|
mcp.WithNumber("timeout_ms", mcp.Description("Max wait for open + option in milliseconds. Default 3000.")),
|
|
mcp.WithString("option_role", mcp.Description("ARIA role of options. Default \"option\".")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handleDomSelectDropdown(_ context.Context, _ mcp.CallToolRequest, a domSelectDropdownArgs) (*mcp.CallToolResult, error) {
|
|
if a.Trigger == "" || a.Option == "" {
|
|
return mcp.NewToolResultError("trigger and option are required"), nil
|
|
}
|
|
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
|
|
return browser.CdpSelectDropdown(c, a.Trigger, a.Option, browser.CdpDropdownOpts{Exact: a.Exact, TimeoutMs: a.TimeoutMs, OptionRole: a.OptionRole})
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
return mcp.NewToolResultText(fmt.Sprintf("selected %q in dropdown %s", a.Option, a.Trigger)), nil
|
|
}
|
|
|
|
// ---- dom_fill (MUTA) ----
|
|
|
|
type domFillArgs struct {
|
|
Port int `json:"port"`
|
|
Selector string `json:"selector"`
|
|
Value string `json:"value"`
|
|
}
|
|
|
|
func domFillTool() mcp.Tool {
|
|
return mcp.NewTool("dom_fill",
|
|
mcp.WithDescription("Fill a text input/textarea/contenteditable reliably (like Playwright fill): focus + select existing text + insert the value via real input events, so React/Vue-controlled fields update correctly. Replaces the focus+type pattern that concatenates onto the old value. For native special inputs (date/range/color) it sets the value and fires input/change."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
|
|
mcp.WithString("selector", mcp.Required(), mcp.Description("CSS selector of the field.")),
|
|
mcp.WithString("value", mcp.Description("Value to set. Empty string clears the field.")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handleDomFill(_ context.Context, _ mcp.CallToolRequest, a domFillArgs) (*mcp.CallToolResult, error) {
|
|
if a.Selector == "" {
|
|
return mcp.NewToolResultError("selector is required"), nil
|
|
}
|
|
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
|
|
return browser.CdpFillSelector(c, a.Selector, a.Value)
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
return mcp.NewToolResultText(fmt.Sprintf("filled %s", a.Selector)), nil
|
|
}
|
|
|
|
// ---- dom_select_option (MUTA) ----
|
|
|
|
type domSelectOptionArgs struct {
|
|
Port int `json:"port"`
|
|
Selector string `json:"selector"`
|
|
Value string `json:"value"`
|
|
}
|
|
|
|
func domSelectOptionTool() mcp.Tool {
|
|
return mcp.NewTool("dom_select_option",
|
|
mcp.WithDescription("Select an <option> in a native <select> element (by CSS selector), matching by option value first, then by visible text, and firing input/change events so React/Vue react. For custom (non-<select>) dropdowns use dom_click_ref on the trigger then on the option instead."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
|
|
mcp.WithString("selector", mcp.Required(), mcp.Description("CSS selector of the <select> element.")),
|
|
mcp.WithString("value", mcp.Required(), mcp.Description("Option value (or visible text if no value matches).")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handleDomSelectOption(_ context.Context, _ mcp.CallToolRequest, a domSelectOptionArgs) (*mcp.CallToolResult, error) {
|
|
if a.Selector == "" || a.Value == "" {
|
|
return mcp.NewToolResultError("selector and value are required"), nil
|
|
}
|
|
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
|
|
return browser.CdpSelectOption(c, a.Selector, a.Value)
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
return mcp.NewToolResultText(fmt.Sprintf("selected %q in %s", a.Value, a.Selector)), nil
|
|
}
|
|
|
|
// ---- dom_set_files (MUTA) ----
|
|
|
|
type domSetFilesArgs struct {
|
|
Port int `json:"port"`
|
|
Selector string `json:"selector"`
|
|
Paths []string `json:"paths"`
|
|
}
|
|
|
|
func domSetFilesTool() mcp.Tool {
|
|
return mcp.NewTool("dom_set_files",
|
|
mcp.WithDescription("Upload files to an <input type=\"file\"> (by CSS selector) via DOM.setFileInputFiles, without driving the OS file picker. Paths must be absolute and readable by the Chrome process."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
|
|
mcp.WithString("selector", mcp.Required(), mcp.Description("CSS selector of the file input element.")),
|
|
mcp.WithArray("paths", mcp.Required(), mcp.Description("Absolute file paths to attach."), mcp.Items(map[string]any{"type": "string"})),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handleDomSetFiles(_ context.Context, _ mcp.CallToolRequest, a domSetFilesArgs) (*mcp.CallToolResult, error) {
|
|
if a.Selector == "" {
|
|
return mcp.NewToolResultError("selector is required"), nil
|
|
}
|
|
if len(a.Paths) == 0 {
|
|
return mcp.NewToolResultError("paths is required (at least one file)"), nil
|
|
}
|
|
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
|
|
return browser.CdpSetFileInput(c, a.Selector, a.Paths)
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
return mcp.NewToolResultText(fmt.Sprintf("attached %d file(s) to %s", len(a.Paths), a.Selector)), nil
|
|
}
|
|
|
|
// defaultMode es el modo de velocidad cuando ni la llamada ni la sesión fijan uno.
|
|
// "auto" = rápido (movimiento de ratón mínimo, escritura en un solo evento, settle
|
|
// breve) — el modo por defecto del MCP. "human" (Bézier + esperas aleatorias) se
|
|
// activa explícitamente vía browser_set_mode o el arg `mode` cuando un sitio
|
|
// aplique detección anti-bot fuerte.
|
|
const defaultMode = "auto"
|
|
|
|
// effectiveMode resuelve el modo de velocidad de una acción: el arg de la llamada
|
|
// gana; si está vacío, el modo de sesión fijado por browser_set_mode; si tampoco
|
|
// hay, defaultMode.
|
|
func (d *deps) effectiveMode(port int, callMode string) string {
|
|
if callMode != "" {
|
|
return callMode
|
|
}
|
|
if m := d.pool.getMode(port); m != "" {
|
|
return m
|
|
}
|
|
return defaultMode
|
|
}
|
|
|
|
// settleForMode es la espera tras una acción mutante antes de re-percibir, dando
|
|
// tiempo a que el DOM se asiente (navegación, focus, repaint). En "human" es
|
|
// ALEATORIA (250-650ms) para no exhibir un ritmo de máquina; en auto/fast es breve
|
|
// y fija (60ms); en "instant" es nula.
|
|
func settleForMode(mode string) time.Duration {
|
|
switch mode {
|
|
case "human", "":
|
|
return time.Duration(250+rand.Intn(401)) * time.Millisecond // 250..650
|
|
case "instant":
|
|
return 0
|
|
default: // auto, fast
|
|
return 60 * time.Millisecond
|
|
}
|
|
}
|
|
|
|
// ---- dom_click_ref (MUTA) — bucle percibir→actuar ----
|
|
|
|
type domClickRefArgs struct {
|
|
Port int `json:"port"`
|
|
Ref int `json:"ref"`
|
|
Mode string `json:"mode"`
|
|
}
|
|
|
|
func domClickRefTool() mcp.Tool {
|
|
return mcp.NewTool("dom_click_ref",
|
|
mcp.WithDescription("Click sobre el elemento por su #ref del outline de page_perceive (backendDOMNodeId estable). Devuelve el outline actualizado tras la acción (auto-observe)."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
|
|
mcp.WithNumber("ref", mcp.Required(), mcp.Description("#ref del elemento (backendDOMNodeId) leído del outline de page_perceive.")),
|
|
mcp.WithString("mode", mcp.Description("Velocidad: 'auto' (default de sesión: movimiento de ratón reducido, rápido), 'human' (Bézier+jitter+pausas aleatorias anti-bot, para detección fuerte), 'instant' (element.click() JS, sin eventos de ratón; también fallback si el elemento no tiene geometría). Vacío = modo de sesión (browser_set_mode) o 'auto'.")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handleDomClickRef(_ context.Context, _ mcp.CallToolRequest, a domClickRefArgs) (*mcp.CallToolResult, error) {
|
|
port := portOr(a.Port)
|
|
mode := d.effectiveMode(port, a.Mode)
|
|
err := d.withConn(port, func(c *browser.CDPConn) error {
|
|
return browser.CdpClickRef(c, a.Ref, browser.MouseProfileForMode(mode))
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
if dl := settleForMode(mode); dl > 0 {
|
|
time.Sleep(dl)
|
|
}
|
|
outline, _ := d.perceiveOutline(port, 8000)
|
|
return mcp.NewToolResultText("clicked ref " + fmt.Sprint(a.Ref) + "\n\n" + outline), nil
|
|
}
|
|
|
|
// ---- dom_type_ref (MUTA) — bucle percibir→actuar ----
|
|
|
|
type domTypeRefArgs struct {
|
|
Port int `json:"port"`
|
|
Ref int `json:"ref"`
|
|
Text string `json:"text"`
|
|
Mode string `json:"mode"`
|
|
}
|
|
|
|
func domTypeRefTool() mcp.Tool {
|
|
return mcp.NewTool("dom_type_ref",
|
|
mcp.WithDescription("Enfoca el elemento por su #ref del outline de page_perceive (backendDOMNodeId estable) y escribe el texto. Devuelve el outline actualizado tras la acción (auto-observe)."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
|
|
mcp.WithNumber("ref", mcp.Required(), mcp.Description("#ref del elemento (backendDOMNodeId) leído del outline de page_perceive.")),
|
|
mcp.WithString("text", mcp.Required(), mcp.Description("Texto a escribir en el elemento.")),
|
|
mcp.WithString("mode", mcp.Description("Velocidad: 'auto' (default de sesión, escribe en un solo evento Input.insertText — rápido) o 'human' (caracter a caracter con pausas aleatorias, anti-detección). Vacío = modo de sesión (browser_set_mode) o 'auto'.")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handleDomTypeRef(_ context.Context, _ mcp.CallToolRequest, a domTypeRefArgs) (*mcp.CallToolResult, error) {
|
|
if a.Text == "" {
|
|
return mcp.NewToolResultError("text is required"), nil
|
|
}
|
|
port := portOr(a.Port)
|
|
mode := d.effectiveMode(port, a.Mode)
|
|
err := d.withConn(port, func(c *browser.CDPConn) error {
|
|
// human => teclea caracter a caracter (eventos de tecla reales + ritmo
|
|
// irregular). auto/fast/instant => inserta todo en un solo round-trip.
|
|
if mode == "human" {
|
|
return browser.CdpTypeRef(c, a.Ref, a.Text)
|
|
}
|
|
return browser.CdpTypeRefFast(c, a.Ref, a.Text)
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
if dl := settleForMode(mode); dl > 0 {
|
|
time.Sleep(dl)
|
|
}
|
|
outline, _ := d.perceiveOutline(port, 8000)
|
|
return mcp.NewToolResultText("typed into ref " + fmt.Sprint(a.Ref) + "\n\n" + outline), nil
|
|
}
|
|
|
|
// ---- dom_hover_ref (MUTA) — bucle percibir→actuar ----
|
|
|
|
type domHoverRefArgs struct {
|
|
Port int `json:"port"`
|
|
Ref int `json:"ref"`
|
|
Mode string `json:"mode"`
|
|
}
|
|
|
|
func domHoverRefTool() mcp.Tool {
|
|
return mcp.NewTool("dom_hover_ref",
|
|
mcp.WithDescription("Hover sobre el elemento por su #ref del outline de page_perceive (backendDOMNodeId estable). Devuelve el outline actualizado tras la acción (auto-observe)."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
|
|
mcp.WithNumber("ref", mcp.Required(), mcp.Description("#ref del elemento (backendDOMNodeId) leído del outline de page_perceive.")),
|
|
mcp.WithString("mode", mcp.Description("Velocidad: 'auto' (default de sesión: movimiento reducido, rápido), 'human' (Bézier+jitter+pausas aleatorias anti-bot), 'instant' (sin movimiento de ratón). Vacío = modo de sesión (browser_set_mode) o 'auto'.")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handleDomHoverRef(_ context.Context, _ mcp.CallToolRequest, a domHoverRefArgs) (*mcp.CallToolResult, error) {
|
|
port := portOr(a.Port)
|
|
mode := d.effectiveMode(port, a.Mode)
|
|
err := d.withConn(port, func(c *browser.CDPConn) error {
|
|
return browser.CdpHoverRef(c, a.Ref, browser.MouseProfileForMode(mode))
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
if dl := settleForMode(mode); dl > 0 {
|
|
time.Sleep(dl)
|
|
}
|
|
outline, _ := d.perceiveOutline(port, 8000)
|
|
return mcp.NewToolResultText("hovered ref " + fmt.Sprint(a.Ref) + "\n\n" + outline), nil
|
|
}
|
|
|
|
// ---- dom_click_xy (MUTA) — click humanizado por coordenadas absolutas ----
|
|
|
|
type domClickXYArgs struct {
|
|
Port int `json:"port"`
|
|
X float64 `json:"x"`
|
|
Y float64 `json:"y"`
|
|
Mode string `json:"mode"`
|
|
}
|
|
|
|
func domClickXYTool() mcp.Tool {
|
|
return mcp.NewTool("dom_click_xy",
|
|
mcp.WithDescription("Fallback de click por coordenadas absolutas (x, y) en CSS pixels del viewport, con movimiento de ratón humanizado por defecto. Pensado para usarse sobre lo que el agente VE en page_screenshot cuando el outline de page_perceive no basta (canvas, mapas, layouts visuales). Prefiere dom_click_ref cuando el elemento aparece en el outline. Devuelve el outline actualizado tras la acción (auto-observe)."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
|
|
mcp.WithNumber("x", mcp.Required(), mcp.Description("Coordenada X absoluta en CSS pixels del viewport.")),
|
|
mcp.WithNumber("y", mcp.Required(), mcp.Description("Coordenada Y absoluta en CSS pixels del viewport.")),
|
|
mcp.WithString("mode", mcp.Description("Velocidad: 'auto' (default de sesión: movimiento reducido, rápido), 'human' (Bézier+jitter+pausas aleatorias anti-bot), 'instant' (sin movimiento de ratón). Vacío = modo de sesión (browser_set_mode) o 'auto'.")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handleDomClickXY(_ context.Context, _ mcp.CallToolRequest, a domClickXYArgs) (*mcp.CallToolResult, error) {
|
|
port := portOr(a.Port)
|
|
mode := d.effectiveMode(port, a.Mode)
|
|
err := d.withConn(port, func(c *browser.CDPConn) error {
|
|
return browser.CdpClickXYHuman(c, a.X, a.Y, browser.MouseProfileForMode(mode))
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
if dl := settleForMode(mode); dl > 0 {
|
|
time.Sleep(dl)
|
|
}
|
|
outline, _ := d.perceiveOutline(port, 8000)
|
|
return mcp.NewToolResultText(fmt.Sprintf("clicked at (%g, %g)\n\n%s", a.X, a.Y, outline)), nil
|
|
}
|
|
|
|
// ---- dom_click (MUTA) ----
|
|
|
|
type domClickArgs struct {
|
|
Port int `json:"port"`
|
|
Selector string `json:"selector"`
|
|
}
|
|
|
|
func domClickTool() mcp.Tool {
|
|
return mcp.NewTool("dom_click",
|
|
mcp.WithDescription("Click the element matching the CSS selector (synthetic CDP click)."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
|
|
mcp.WithString("selector", mcp.Required(), mcp.Description("CSS selector of the element to click.")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handleDomClick(_ context.Context, _ mcp.CallToolRequest, a domClickArgs) (*mcp.CallToolResult, error) {
|
|
if a.Selector == "" {
|
|
return mcp.NewToolResultError("selector is required"), nil
|
|
}
|
|
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
|
|
return browser.CdpClick(c, a.Selector)
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
return mcp.NewToolResultText("clicked " + a.Selector), nil
|
|
}
|
|
|
|
// ---- dom_click_human (MUTA) ----
|
|
|
|
type domClickHumanArgs struct {
|
|
Port int `json:"port"`
|
|
Selector string `json:"selector"`
|
|
}
|
|
|
|
func domClickHumanTool() mcp.Tool {
|
|
return mcp.NewTool("dom_click_human",
|
|
mcp.WithDescription("Click the element matching the CSS selector with human-like mouse movement (Bézier path + jitter + press/release pause)."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
|
|
mcp.WithString("selector", mcp.Required(), mcp.Description("CSS selector of the element to click.")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handleDomClickHuman(_ context.Context, _ mcp.CallToolRequest, a domClickHumanArgs) (*mcp.CallToolResult, error) {
|
|
if a.Selector == "" {
|
|
return mcp.NewToolResultError("selector is required"), nil
|
|
}
|
|
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
|
|
return browser.CdpClickHuman(c, a.Selector, browser.MouseHumanOpts{})
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
return mcp.NewToolResultText("clicked (human) " + a.Selector), nil
|
|
}
|
|
|
|
// ---- dom_click_text (MUTA) ----
|
|
|
|
type domClickTextArgs struct {
|
|
Port int `json:"port"`
|
|
Text string `json:"text"`
|
|
}
|
|
|
|
func domClickTextTool() mcp.Tool {
|
|
return mcp.NewTool("dom_click_text",
|
|
mcp.WithDescription("Find the first element whose visible text matches and click it."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
|
|
mcp.WithString("text", mcp.Required(), mcp.Description("Visible text to match (substring).")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handleDomClickText(_ context.Context, _ mcp.CallToolRequest, a domClickTextArgs) (*mcp.CallToolResult, error) {
|
|
if a.Text == "" {
|
|
return mcp.NewToolResultError("text is required"), nil
|
|
}
|
|
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
|
|
return browser.CdpClickText(c, a.Text, browser.FindByTextOpts{})
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
return mcp.NewToolResultText("clicked text " + a.Text), nil
|
|
}
|
|
|
|
// ---- dom_type (MUTA) ----
|
|
|
|
type domTypeArgs struct {
|
|
Port int `json:"port"`
|
|
Text string `json:"text"`
|
|
}
|
|
|
|
func domTypeTool() mcp.Tool {
|
|
return mcp.NewTool("dom_type",
|
|
mcp.WithDescription("Type text into the currently focused element (dispatches key events char by char)."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
|
|
mcp.WithString("text", mcp.Required(), mcp.Description("Text to type.")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handleDomType(_ context.Context, _ mcp.CallToolRequest, a domTypeArgs) (*mcp.CallToolResult, error) {
|
|
if a.Text == "" {
|
|
return mcp.NewToolResultError("text is required"), nil
|
|
}
|
|
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
|
|
return browser.CdpTypeText(c, a.Text)
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
return mcp.NewToolResultText("typed text"), nil
|
|
}
|
|
|
|
// ---- dom_find_by_text ----
|
|
|
|
type domFindByTextArgs struct {
|
|
Port int `json:"port"`
|
|
Text string `json:"text"`
|
|
}
|
|
|
|
func domFindByTextTool() mcp.Tool {
|
|
return mcp.NewTool("dom_find_by_text",
|
|
mcp.WithDescription("Find the first element whose visible text matches and return a unique CSS selector for it (empty string if none)."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
|
|
mcp.WithString("text", mcp.Required(), mcp.Description("Visible text to match (substring).")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handleDomFindByText(_ context.Context, _ mcp.CallToolRequest, a domFindByTextArgs) (*mcp.CallToolResult, error) {
|
|
if a.Text == "" {
|
|
return mcp.NewToolResultError("text is required"), nil
|
|
}
|
|
var sel string
|
|
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
|
|
var e error
|
|
sel, e = browser.CdpFindByText(c, a.Text, browser.FindByTextOpts{})
|
|
return e
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
return mcp.NewToolResultText(sel), nil
|
|
}
|
|
|
|
// ---- dom_find_ref_by_text ----
|
|
|
|
type domFindRefByTextArgs struct {
|
|
Port int `json:"port"`
|
|
Text string `json:"text"`
|
|
}
|
|
|
|
func domFindRefByTextTool() mcp.Tool {
|
|
return mcp.NewTool("dom_find_ref_by_text",
|
|
mcp.WithDescription("Find the first element whose visible text matches and return its #ref (backendDOMNodeId) ready for dom_click_ref/dom_hover_ref — no fragile CSS selector. Also reports how many elements match (count>1 = ambiguous)."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
|
|
mcp.WithString("text", mcp.Required(), mcp.Description("Visible text to match (substring).")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handleDomFindRefByText(_ context.Context, _ mcp.CallToolRequest, a domFindRefByTextArgs) (*mcp.CallToolResult, error) {
|
|
if a.Text == "" {
|
|
return mcp.NewToolResultError("text is required"), nil
|
|
}
|
|
var ref, count int
|
|
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
|
|
var e error
|
|
ref, count, e = browser.CdpFindRefByText(c, a.Text, browser.FindByTextOpts{})
|
|
return e
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
msg := fmt.Sprintf("ref=%d count=%d", ref, count)
|
|
if count > 1 {
|
|
msg += " (ambiguous: returning the first match; refine the text to disambiguate)"
|
|
}
|
|
return mcp.NewToolResultText(msg), nil
|
|
}
|
|
|
|
// ---- dom_wait_element ----
|
|
|
|
type domWaitElementArgs struct {
|
|
Port int `json:"port"`
|
|
Selector string `json:"selector"`
|
|
TimeoutMs int `json:"timeout_ms"`
|
|
}
|
|
|
|
func domWaitElementTool() mcp.Tool {
|
|
return mcp.NewTool("dom_wait_element",
|
|
mcp.WithDescription("Block until an element matching the CSS selector appears in the DOM (or timeout)."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
|
|
mcp.WithString("selector", mcp.Required(), mcp.Description("CSS selector to wait for.")),
|
|
mcp.WithNumber("timeout_ms", mcp.Description("Max wait in ms. Default 10000.")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handleDomWaitElement(_ context.Context, _ mcp.CallToolRequest, a domWaitElementArgs) (*mcp.CallToolResult, error) {
|
|
if a.Selector == "" {
|
|
return mcp.NewToolResultError("selector is required"), nil
|
|
}
|
|
timeout := a.TimeoutMs
|
|
if timeout <= 0 {
|
|
timeout = 10000
|
|
}
|
|
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
|
|
return browser.CdpWaitElement(c, a.Selector, time.Duration(timeout)*time.Millisecond)
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
return mcp.NewToolResultText("element appeared: " + a.Selector), nil
|
|
}
|