package main import ( "context" "fmt" "time" "github.com/mark3labs/mcp-go/mcp" "github.com/mark3labs/mcp-go/server" "fn-registry/functions/browser" ) // registerDomTools wires DOM interaction tools. find/wait stay on under --read-only. func registerDomTools(s *server.MCPServer, d *deps) { s.AddTool(domFindByTextTool(), mcp.NewTypedToolHandler(d.handleDomFindByText)) s.AddTool(domWaitElementTool(), mcp.NewTypedToolHandler(d.handleDomWaitElement)) if !d.readOnly { s.AddTool(domClickTool(), mcp.NewTypedToolHandler(d.handleDomClick)) s.AddTool(domClickHumanTool(), mcp.NewTypedToolHandler(d.handleDomClickHuman)) s.AddTool(domClickTextTool(), mcp.NewTypedToolHandler(d.handleDomClickText)) s.AddTool(domTypeTool(), mcp.NewTypedToolHandler(d.handleDomType)) s.AddTool(domClickRefTool(), mcp.NewTypedToolHandler(d.handleDomClickRef)) s.AddTool(domTypeRefTool(), mcp.NewTypedToolHandler(d.handleDomTypeRef)) s.AddTool(domHoverRefTool(), mcp.NewTypedToolHandler(d.handleDomHoverRef)) } } // settleDelay es la espera breve tras una acción mutante antes de re-percibir, // dando tiempo a que el DOM se asiente (navegación, focus, repaint). const settleDelay = 400 * time.Millisecond // ---- dom_click_ref (MUTA) — bucle percibir→actuar ---- type domClickRefArgs struct { Port int `json:"port"` Ref int `json:"ref"` } func domClickRefTool() mcp.Tool { return mcp.NewTool("dom_click_ref", mcp.WithDescription("Click humanizado sobre el elemento por su #ref del outline de page_perceive (backendDOMNodeId estable). Usa humanización por defecto (Bézier+jitter). Devuelve el outline actualizado tras la acción (auto-observe)."), mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")), mcp.WithNumber("ref", mcp.Required(), mcp.Description("#ref del elemento (backendDOMNodeId) leído del outline de page_perceive.")), ) } func (d *deps) handleDomClickRef(_ context.Context, _ mcp.CallToolRequest, a domClickRefArgs) (*mcp.CallToolResult, error) { port := portOr(a.Port) // TODO: preset de humanización por sesión (human/fast/instant) err := d.withConn(port, func(c *browser.CDPConn) error { return browser.CdpClickRef(c, a.Ref, browser.MouseHumanOpts{}) }) if err != nil { return mcp.NewToolResultError(err.Error()), nil } time.Sleep(settleDelay) outline, _ := d.perceiveOutline(port, 4000) return mcp.NewToolResultText("clicked ref " + fmt.Sprint(a.Ref) + "\n\n" + outline), nil } // ---- dom_type_ref (MUTA) — bucle percibir→actuar ---- type domTypeRefArgs struct { Port int `json:"port"` Ref int `json:"ref"` Text string `json:"text"` } func domTypeRefTool() mcp.Tool { return mcp.NewTool("dom_type_ref", mcp.WithDescription("Enfoca el elemento por su #ref del outline de page_perceive (backendDOMNodeId estable) y escribe el texto. Devuelve el outline actualizado tras la acción (auto-observe)."), mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")), mcp.WithNumber("ref", mcp.Required(), mcp.Description("#ref del elemento (backendDOMNodeId) leído del outline de page_perceive.")), mcp.WithString("text", mcp.Required(), mcp.Description("Texto a escribir en el elemento.")), ) } func (d *deps) handleDomTypeRef(_ context.Context, _ mcp.CallToolRequest, a domTypeRefArgs) (*mcp.CallToolResult, error) { if a.Text == "" { return mcp.NewToolResultError("text is required"), nil } port := portOr(a.Port) // TODO: preset de humanización por sesión (human/fast/instant) err := d.withConn(port, func(c *browser.CDPConn) error { return browser.CdpTypeRef(c, a.Ref, a.Text) }) if err != nil { return mcp.NewToolResultError(err.Error()), nil } time.Sleep(settleDelay) outline, _ := d.perceiveOutline(port, 4000) return mcp.NewToolResultText("typed into ref " + fmt.Sprint(a.Ref) + "\n\n" + outline), nil } // ---- dom_hover_ref (MUTA) — bucle percibir→actuar ---- type domHoverRefArgs struct { Port int `json:"port"` Ref int `json:"ref"` } func domHoverRefTool() mcp.Tool { return mcp.NewTool("dom_hover_ref", mcp.WithDescription("Hover humanizado sobre el elemento por su #ref del outline de page_perceive (backendDOMNodeId estable). Usa humanización por defecto (Bézier+jitter). Devuelve el outline actualizado tras la acción (auto-observe)."), mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")), mcp.WithNumber("ref", mcp.Required(), mcp.Description("#ref del elemento (backendDOMNodeId) leído del outline de page_perceive.")), ) } func (d *deps) handleDomHoverRef(_ context.Context, _ mcp.CallToolRequest, a domHoverRefArgs) (*mcp.CallToolResult, error) { port := portOr(a.Port) // TODO: preset de humanización por sesión (human/fast/instant) err := d.withConn(port, func(c *browser.CDPConn) error { return browser.CdpHoverRef(c, a.Ref, browser.MouseHumanOpts{}) }) if err != nil { return mcp.NewToolResultError(err.Error()), nil } time.Sleep(settleDelay) outline, _ := d.perceiveOutline(port, 4000) return mcp.NewToolResultText("hovered ref " + fmt.Sprint(a.Ref) + "\n\n" + outline), nil } // ---- dom_click (MUTA) ---- type domClickArgs struct { Port int `json:"port"` Selector string `json:"selector"` } func domClickTool() mcp.Tool { return mcp.NewTool("dom_click", mcp.WithDescription("Click the element matching the CSS selector (synthetic CDP click)."), mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), mcp.WithString("selector", mcp.Required(), mcp.Description("CSS selector of the element to click.")), ) } func (d *deps) handleDomClick(_ context.Context, _ mcp.CallToolRequest, a domClickArgs) (*mcp.CallToolResult, error) { if a.Selector == "" { return mcp.NewToolResultError("selector is required"), nil } err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { return browser.CdpClick(c, a.Selector) }) if err != nil { return mcp.NewToolResultError(err.Error()), nil } return mcp.NewToolResultText("clicked " + a.Selector), nil } // ---- dom_click_human (MUTA) ---- type domClickHumanArgs struct { Port int `json:"port"` Selector string `json:"selector"` } func domClickHumanTool() mcp.Tool { return mcp.NewTool("dom_click_human", mcp.WithDescription("Click the element matching the CSS selector with human-like mouse movement (Bézier path + jitter + press/release pause)."), mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), mcp.WithString("selector", mcp.Required(), mcp.Description("CSS selector of the element to click.")), ) } func (d *deps) handleDomClickHuman(_ context.Context, _ mcp.CallToolRequest, a domClickHumanArgs) (*mcp.CallToolResult, error) { if a.Selector == "" { return mcp.NewToolResultError("selector is required"), nil } err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { return browser.CdpClickHuman(c, a.Selector, browser.MouseHumanOpts{}) }) if err != nil { return mcp.NewToolResultError(err.Error()), nil } return mcp.NewToolResultText("clicked (human) " + a.Selector), nil } // ---- dom_click_text (MUTA) ---- type domClickTextArgs struct { Port int `json:"port"` Text string `json:"text"` } func domClickTextTool() mcp.Tool { return mcp.NewTool("dom_click_text", mcp.WithDescription("Find the first element whose visible text matches and click it."), mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), mcp.WithString("text", mcp.Required(), mcp.Description("Visible text to match (substring).")), ) } func (d *deps) handleDomClickText(_ context.Context, _ mcp.CallToolRequest, a domClickTextArgs) (*mcp.CallToolResult, error) { if a.Text == "" { return mcp.NewToolResultError("text is required"), nil } err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { return browser.CdpClickText(c, a.Text, browser.FindByTextOpts{}) }) if err != nil { return mcp.NewToolResultError(err.Error()), nil } return mcp.NewToolResultText("clicked text " + a.Text), nil } // ---- dom_type (MUTA) ---- type domTypeArgs struct { Port int `json:"port"` Text string `json:"text"` } func domTypeTool() mcp.Tool { return mcp.NewTool("dom_type", mcp.WithDescription("Type text into the currently focused element (dispatches key events char by char)."), mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), mcp.WithString("text", mcp.Required(), mcp.Description("Text to type.")), ) } func (d *deps) handleDomType(_ context.Context, _ mcp.CallToolRequest, a domTypeArgs) (*mcp.CallToolResult, error) { if a.Text == "" { return mcp.NewToolResultError("text is required"), nil } err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { return browser.CdpTypeText(c, a.Text) }) if err != nil { return mcp.NewToolResultError(err.Error()), nil } return mcp.NewToolResultText("typed text"), nil } // ---- dom_find_by_text ---- type domFindByTextArgs struct { Port int `json:"port"` Text string `json:"text"` } func domFindByTextTool() mcp.Tool { return mcp.NewTool("dom_find_by_text", mcp.WithDescription("Find the first element whose visible text matches and return a unique CSS selector for it (empty string if none)."), mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), mcp.WithString("text", mcp.Required(), mcp.Description("Visible text to match (substring).")), ) } func (d *deps) handleDomFindByText(_ context.Context, _ mcp.CallToolRequest, a domFindByTextArgs) (*mcp.CallToolResult, error) { if a.Text == "" { return mcp.NewToolResultError("text is required"), nil } var sel string err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { var e error sel, e = browser.CdpFindByText(c, a.Text, browser.FindByTextOpts{}) return e }) if err != nil { return mcp.NewToolResultError(err.Error()), nil } return mcp.NewToolResultText(sel), nil } // ---- dom_wait_element ---- type domWaitElementArgs struct { Port int `json:"port"` Selector string `json:"selector"` TimeoutMs int `json:"timeout_ms"` } func domWaitElementTool() mcp.Tool { return mcp.NewTool("dom_wait_element", mcp.WithDescription("Block until an element matching the CSS selector appears in the DOM (or timeout)."), mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")), mcp.WithString("selector", mcp.Required(), mcp.Description("CSS selector to wait for.")), mcp.WithNumber("timeout_ms", mcp.Description("Max wait in ms. Default 10000.")), ) } func (d *deps) handleDomWaitElement(_ context.Context, _ mcp.CallToolRequest, a domWaitElementArgs) (*mcp.CallToolResult, error) { if a.Selector == "" { return mcp.NewToolResultError("selector is required"), nil } timeout := a.TimeoutMs if timeout <= 0 { timeout = 10000 } err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error { return browser.CdpWaitElement(c, a.Selector, time.Duration(timeout)*time.Millisecond) }) if err != nil { return mcp.NewToolResultError(err.Error()), nil } return mcp.NewToolResultText("element appeared: " + a.Selector), nil }