feat: tools de interaccion estilo Playwright (dropdowns, fill, role, actionable)

4 tools nuevas, wrappers de las primitivas CDP recien creadas:
- dom_find_by_role: localizar por rol ARIA + accessible name (getByRole), devuelve #ref
- dom_wait_actionable: visible+stable+enabled+hit-test antes de click (anti-overlay)
- dom_select_dropdown: desplegables custom (combobox/MUI/select2/headlessui)
- dom_fill: rellenar inputs React/Vue de forma fiable (reemplaza, no concatena)

Total tools: 50 -> 54. uses_functions del app.md actualizado.
Smoke real (Chrome headless 9333) verde para las 4.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Egutierrez
2026-06-16 20:55:13 +02:00
parent d687a501ba
commit 3b68c02b25
2 changed files with 150 additions and 0 deletions
+4
View File
@@ -63,6 +63,10 @@ uses_functions:
- cdp_print_pdf_go_browser
- cdp_select_option_go_browser
- cdp_set_file_input_go_browser
- cdp_wait_actionable_go_browser
- cdp_select_dropdown_go_browser
- cdp_fill_go_browser
- cdp_find_by_role_go_browser
uses_types: []
framework: ""
entry_point: "main.go"
+146
View File
@@ -17,6 +17,8 @@ func registerDomTools(s *server.MCPServer, d *deps) {
s.AddTool(domFindByTextTool(), mcp.NewTypedToolHandler(d.handleDomFindByText))
s.AddTool(domFindRefByTextTool(), mcp.NewTypedToolHandler(d.handleDomFindRefByText))
s.AddTool(domWaitElementTool(), mcp.NewTypedToolHandler(d.handleDomWaitElement))
s.AddTool(domFindByRoleTool(), mcp.NewTypedToolHandler(d.handleDomFindByRole))
s.AddTool(domWaitActionableTool(), mcp.NewTypedToolHandler(d.handleDomWaitActionable))
if !d.readOnly {
s.AddTool(domClickTool(), mcp.NewTypedToolHandler(d.handleDomClick))
@@ -29,9 +31,153 @@ func registerDomTools(s *server.MCPServer, d *deps) {
s.AddTool(domClickXYTool(), mcp.NewTypedToolHandler(d.handleDomClickXY))
s.AddTool(domSelectOptionTool(), mcp.NewTypedToolHandler(d.handleDomSelectOption))
s.AddTool(domSetFilesTool(), mcp.NewTypedToolHandler(d.handleDomSetFiles))
s.AddTool(domSelectDropdownTool(), mcp.NewTypedToolHandler(d.handleDomSelectDropdown))
s.AddTool(domFillTool(), mcp.NewTypedToolHandler(d.handleDomFill))
}
}
// ---- dom_find_by_role ----
type domFindByRoleArgs struct {
Port int `json:"port"`
Role string `json:"role"`
Name string `json:"name"`
Exact bool `json:"exact"`
Regex bool `json:"regex"`
}
func domFindByRoleTool() mcp.Tool {
return mcp.NewTool("dom_find_by_role",
mcp.WithDescription("Find an element by ARIA role + accessible name (like Playwright getByRole), reusing the accessibility tree. Returns its #ref (usable with dom_click_ref/dom_hover_ref/dom_type_ref) and how many elements matched (count>1 means ambiguous). More robust to DOM/CSS changes than CSS or text selectors — prefer it to move around the page."),
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
mcp.WithString("role", mcp.Required(), mcp.Description("ARIA role, e.g. button, link, textbox, checkbox, combobox, option, tab.")),
mcp.WithString("name", mcp.Description("Accessible name to match (computed, not innerText). Empty = match any element of that role.")),
mcp.WithBoolean("exact", mcp.Description("Exact name match instead of substring. Default false (substring).")),
mcp.WithBoolean("regex", mcp.Description("Treat name as a regular expression. Takes precedence over exact.")),
)
}
func (d *deps) handleDomFindByRole(_ context.Context, _ mcp.CallToolRequest, a domFindByRoleArgs) (*mcp.CallToolResult, error) {
if a.Role == "" {
return mcp.NewToolResultError("role is required"), nil
}
var ref, count int
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
var e error
ref, count, e = browser.CdpFindByRole(c, a.Role, browser.CdpFindByRoleOpts{Name: a.Name, Exact: a.Exact, Regex: a.Regex})
return e
})
if err != nil {
return mcp.NewToolResultError(err.Error()), nil
}
return mcp.NewToolResultText(fmt.Sprintf(`{"ref":%d,"count":%d}`, ref, count)), nil
}
// ---- dom_wait_actionable ----
type domWaitActionableArgs struct {
Port int `json:"port"`
Ref int `json:"ref"`
NeedEnabled bool `json:"need_enabled"`
TimeoutMs int `json:"timeout_ms"`
}
func domWaitActionableTool() mcp.Tool {
return mcp.NewTool("dom_wait_actionable",
mcp.WithDescription("Wait until a #ref element is truly actionable before clicking: visible + stable (not animating) + optionally enabled + hit-test passes (no overlay/cookie-banner intercepting the click point). Returns the validated center point {x,y}. Use it before dom_click_xy when a click seems to do nothing — it catches the #1 cause: an overlay swallowing the click, or the element still mounting/animating."),
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
mcp.WithNumber("ref", mcp.Required(), mcp.Description("#ref (backend node id) from page_perceive / dom_find_*.")),
mcp.WithBoolean("need_enabled", mcp.Description("Also require the element not be disabled/aria-disabled. Default false.")),
mcp.WithNumber("timeout_ms", mcp.Description("Max wait in milliseconds. Default 3000.")),
)
}
func (d *deps) handleDomWaitActionable(_ context.Context, _ mcp.CallToolRequest, a domWaitActionableArgs) (*mcp.CallToolResult, error) {
if a.Ref == 0 {
return mcp.NewToolResultError("ref is required"), nil
}
timeout := time.Duration(a.TimeoutMs) * time.Millisecond
if a.TimeoutMs == 0 {
timeout = 3 * time.Second
}
var x, y float64
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
var e error
x, y, e = browser.CdpWaitActionable(c, a.Ref, a.NeedEnabled, timeout)
return e
})
if err != nil {
return mcp.NewToolResultError(err.Error()), nil
}
return mcp.NewToolResultText(fmt.Sprintf(`{"actionable":true,"x":%.1f,"y":%.1f}`, x, y)), nil
}
// ---- dom_select_dropdown (MUTA) ----
type domSelectDropdownArgs struct {
Port int `json:"port"`
Trigger string `json:"trigger"`
Option string `json:"option"`
Exact bool `json:"exact"`
TimeoutMs int `json:"timeout_ms"`
OptionRole string `json:"option_role"`
}
func domSelectDropdownTool() mcp.Tool {
return mcp.NewTool("dom_select_dropdown",
mcp.WithDescription("Select an option in a CUSTOM dropdown (combobox/listbox built with divs — MUI, react-select, headlessui, select2), NOT a native <select>. Clicks the trigger, waits for the list to actually open (aria-expanded / visible [role=option]), then real-clicks the matching option. For native <select> use dom_select_option instead."),
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
mcp.WithString("trigger", mcp.Required(), mcp.Description("CSS selector of the element that opens the dropdown.")),
mcp.WithString("option", mcp.Required(), mcp.Description("Visible text of the option to pick.")),
mcp.WithBoolean("exact", mcp.Description("Exact option text match instead of substring. Default false.")),
mcp.WithNumber("timeout_ms", mcp.Description("Max wait for open + option in milliseconds. Default 3000.")),
mcp.WithString("option_role", mcp.Description("ARIA role of options. Default \"option\".")),
)
}
func (d *deps) handleDomSelectDropdown(_ context.Context, _ mcp.CallToolRequest, a domSelectDropdownArgs) (*mcp.CallToolResult, error) {
if a.Trigger == "" || a.Option == "" {
return mcp.NewToolResultError("trigger and option are required"), nil
}
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
return browser.CdpSelectDropdown(c, a.Trigger, a.Option, browser.CdpDropdownOpts{Exact: a.Exact, TimeoutMs: a.TimeoutMs, OptionRole: a.OptionRole})
})
if err != nil {
return mcp.NewToolResultError(err.Error()), nil
}
return mcp.NewToolResultText(fmt.Sprintf("selected %q in dropdown %s", a.Option, a.Trigger)), nil
}
// ---- dom_fill (MUTA) ----
type domFillArgs struct {
Port int `json:"port"`
Selector string `json:"selector"`
Value string `json:"value"`
}
func domFillTool() mcp.Tool {
return mcp.NewTool("dom_fill",
mcp.WithDescription("Fill a text input/textarea/contenteditable reliably (like Playwright fill): focus + select existing text + insert the value via real input events, so React/Vue-controlled fields update correctly. Replaces the focus+type pattern that concatenates onto the old value. For native special inputs (date/range/color) it sets the value and fires input/change."),
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
mcp.WithString("selector", mcp.Required(), mcp.Description("CSS selector of the field.")),
mcp.WithString("value", mcp.Description("Value to set. Empty string clears the field.")),
)
}
func (d *deps) handleDomFill(_ context.Context, _ mcp.CallToolRequest, a domFillArgs) (*mcp.CallToolResult, error) {
if a.Selector == "" {
return mcp.NewToolResultError("selector is required"), nil
}
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
return browser.CdpFillSelector(c, a.Selector, a.Value)
})
if err != nil {
return mcp.NewToolResultError(err.Error()), nil
}
return mcp.NewToolResultText(fmt.Sprintf("filled %s", a.Selector)), nil
}
// ---- dom_select_option (MUTA) ----
type domSelectOptionArgs struct {