diff --git a/app.md b/app.md index 327c2f7..e762456 100644 --- a/app.md +++ b/app.md @@ -63,6 +63,10 @@ uses_functions: - cdp_print_pdf_go_browser - cdp_select_option_go_browser - cdp_set_file_input_go_browser + - cdp_wait_actionable_go_browser + - cdp_select_dropdown_go_browser + - cdp_fill_go_browser + - cdp_find_by_role_go_browser uses_types: [] framework: "" entry_point: "main.go" diff --git a/tools_dom.go b/tools_dom.go index 6a617c9..269d519 100644 --- a/tools_dom.go +++ b/tools_dom.go @@ -17,6 +17,8 @@ func registerDomTools(s *server.MCPServer, d *deps) { s.AddTool(domFindByTextTool(), mcp.NewTypedToolHandler(d.handleDomFindByText)) s.AddTool(domFindRefByTextTool(), mcp.NewTypedToolHandler(d.handleDomFindRefByText)) s.AddTool(domWaitElementTool(), mcp.NewTypedToolHandler(d.handleDomWaitElement)) + s.AddTool(domFindByRoleTool(), mcp.NewTypedToolHandler(d.handleDomFindByRole)) + s.AddTool(domWaitActionableTool(), mcp.NewTypedToolHandler(d.handleDomWaitActionable)) if !d.readOnly { s.AddTool(domClickTool(), mcp.NewTypedToolHandler(d.handleDomClick)) @@ -29,9 +31,153 @@ func registerDomTools(s *server.MCPServer, d *deps) { s.AddTool(domClickXYTool(), mcp.NewTypedToolHandler(d.handleDomClickXY)) s.AddTool(domSelectOptionTool(), mcp.NewTypedToolHandler(d.handleDomSelectOption)) s.AddTool(domSetFilesTool(), mcp.NewTypedToolHandler(d.handleDomSetFiles)) + s.AddTool(domSelectDropdownTool(), mcp.NewTypedToolHandler(d.handleDomSelectDropdown)) + s.AddTool(domFillTool(), mcp.NewTypedToolHandler(d.handleDomFill)) } } +// ---- dom_find_by_role ---- + +type domFindByRoleArgs struct { + Port int `json:"port"` + Role string `json:"role"` + Name string `json:"name"` + Exact bool `json:"exact"` + Regex bool `json:"regex"` +} + +func domFindByRoleTool() mcp.Tool { + return mcp.NewTool("dom_find_by_role", + mcp.WithDescription("Find an element by ARIA role + accessible name (like Playwright getByRole), reusing the accessibility tree. Returns its #ref (usable with dom_click_ref/dom_hover_ref/dom_type_ref) and how many elements matched (count>1 means ambiguous). More robust to DOM/CSS changes than CSS or text selectors — prefer it to move around the page."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")), + mcp.WithString("role", mcp.Required(), mcp.Description("ARIA role, e.g. button, link, textbox, checkbox, combobox, option, tab.")), + mcp.WithString("name", mcp.Description("Accessible name to match (computed, not innerText). Empty = match any element of that role.")), + mcp.WithBoolean("exact", mcp.Description("Exact name match instead of substring. Default false (substring).")), + mcp.WithBoolean("regex", mcp.Description("Treat name as a regular expression. Takes precedence over exact.")), + ) +} + +func (d *deps) handleDomFindByRole(_ context.Context, _ mcp.CallToolRequest, a domFindByRoleArgs) (*mcp.CallToolResult, error) { + if a.Role == "" { + return mcp.NewToolResultError("role is required"), nil + } + var ref, count int + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + var e error + ref, count, e = browser.CdpFindByRole(c, a.Role, browser.CdpFindByRoleOpts{Name: a.Name, Exact: a.Exact, Regex: a.Regex}) + return e + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText(fmt.Sprintf(`{"ref":%d,"count":%d}`, ref, count)), nil +} + +// ---- dom_wait_actionable ---- + +type domWaitActionableArgs struct { + Port int `json:"port"` + Ref int `json:"ref"` + NeedEnabled bool `json:"need_enabled"` + TimeoutMs int `json:"timeout_ms"` +} + +func domWaitActionableTool() mcp.Tool { + return mcp.NewTool("dom_wait_actionable", + mcp.WithDescription("Wait until a #ref element is truly actionable before clicking: visible + stable (not animating) + optionally enabled + hit-test passes (no overlay/cookie-banner intercepting the click point). Returns the validated center point {x,y}. Use it before dom_click_xy when a click seems to do nothing — it catches the #1 cause: an overlay swallowing the click, or the element still mounting/animating."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")), + mcp.WithNumber("ref", mcp.Required(), mcp.Description("#ref (backend node id) from page_perceive / dom_find_*.")), + mcp.WithBoolean("need_enabled", mcp.Description("Also require the element not be disabled/aria-disabled. Default false.")), + mcp.WithNumber("timeout_ms", mcp.Description("Max wait in milliseconds. Default 3000.")), + ) +} + +func (d *deps) handleDomWaitActionable(_ context.Context, _ mcp.CallToolRequest, a domWaitActionableArgs) (*mcp.CallToolResult, error) { + if a.Ref == 0 { + return mcp.NewToolResultError("ref is required"), nil + } + timeout := time.Duration(a.TimeoutMs) * time.Millisecond + if a.TimeoutMs == 0 { + timeout = 3 * time.Second + } + var x, y float64 + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + var e error + x, y, e = browser.CdpWaitActionable(c, a.Ref, a.NeedEnabled, timeout) + return e + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText(fmt.Sprintf(`{"actionable":true,"x":%.1f,"y":%.1f}`, x, y)), nil +} + +// ---- dom_select_dropdown (MUTA) ---- + +type domSelectDropdownArgs struct { + Port int `json:"port"` + Trigger string `json:"trigger"` + Option string `json:"option"` + Exact bool `json:"exact"` + TimeoutMs int `json:"timeout_ms"` + OptionRole string `json:"option_role"` +} + +func domSelectDropdownTool() mcp.Tool { + return mcp.NewTool("dom_select_dropdown", + mcp.WithDescription("Select an option in a CUSTOM dropdown (combobox/listbox built with divs — MUI, react-select, headlessui, select2), NOT a native use dom_select_option instead."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")), + mcp.WithString("trigger", mcp.Required(), mcp.Description("CSS selector of the element that opens the dropdown.")), + mcp.WithString("option", mcp.Required(), mcp.Description("Visible text of the option to pick.")), + mcp.WithBoolean("exact", mcp.Description("Exact option text match instead of substring. Default false.")), + mcp.WithNumber("timeout_ms", mcp.Description("Max wait for open + option in milliseconds. Default 3000.")), + mcp.WithString("option_role", mcp.Description("ARIA role of options. Default \"option\".")), + ) +} + +func (d *deps) handleDomSelectDropdown(_ context.Context, _ mcp.CallToolRequest, a domSelectDropdownArgs) (*mcp.CallToolResult, error) { + if a.Trigger == "" || a.Option == "" { + return mcp.NewToolResultError("trigger and option are required"), nil + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpSelectDropdown(c, a.Trigger, a.Option, browser.CdpDropdownOpts{Exact: a.Exact, TimeoutMs: a.TimeoutMs, OptionRole: a.OptionRole}) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText(fmt.Sprintf("selected %q in dropdown %s", a.Option, a.Trigger)), nil +} + +// ---- dom_fill (MUTA) ---- + +type domFillArgs struct { + Port int `json:"port"` + Selector string `json:"selector"` + Value string `json:"value"` +} + +func domFillTool() mcp.Tool { + return mcp.NewTool("dom_fill", + mcp.WithDescription("Fill a text input/textarea/contenteditable reliably (like Playwright fill): focus + select existing text + insert the value via real input events, so React/Vue-controlled fields update correctly. Replaces the focus+type pattern that concatenates onto the old value. For native special inputs (date/range/color) it sets the value and fires input/change."), + mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")), + mcp.WithString("selector", mcp.Required(), mcp.Description("CSS selector of the field.")), + mcp.WithString("value", mcp.Description("Value to set. Empty string clears the field.")), + ) +} + +func (d *deps) handleDomFill(_ context.Context, _ mcp.CallToolRequest, a domFillArgs) (*mcp.CallToolResult, error) { + if a.Selector == "" { + return mcp.NewToolResultError("selector is required"), nil + } + err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { + return browser.CdpFillSelector(c, a.Selector, a.Value) + }) + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + return mcp.NewToolResultText(fmt.Sprintf("filled %s", a.Selector)), nil +} + // ---- dom_select_option (MUTA) ---- type domSelectOptionArgs struct {