302 lines
11 KiB
Go
302 lines
11 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/mark3labs/mcp-go/mcp"
|
|
"github.com/mark3labs/mcp-go/server"
|
|
|
|
"fn-registry/functions/browser"
|
|
)
|
|
|
|
// registerDomTools wires DOM interaction tools. find/wait stay on under --read-only.
|
|
func registerDomTools(s *server.MCPServer, d *deps) {
|
|
s.AddTool(domFindByTextTool(), mcp.NewTypedToolHandler(d.handleDomFindByText))
|
|
s.AddTool(domWaitElementTool(), mcp.NewTypedToolHandler(d.handleDomWaitElement))
|
|
|
|
if !d.readOnly {
|
|
s.AddTool(domClickTool(), mcp.NewTypedToolHandler(d.handleDomClick))
|
|
s.AddTool(domClickHumanTool(), mcp.NewTypedToolHandler(d.handleDomClickHuman))
|
|
s.AddTool(domClickTextTool(), mcp.NewTypedToolHandler(d.handleDomClickText))
|
|
s.AddTool(domTypeTool(), mcp.NewTypedToolHandler(d.handleDomType))
|
|
s.AddTool(domClickRefTool(), mcp.NewTypedToolHandler(d.handleDomClickRef))
|
|
s.AddTool(domTypeRefTool(), mcp.NewTypedToolHandler(d.handleDomTypeRef))
|
|
s.AddTool(domHoverRefTool(), mcp.NewTypedToolHandler(d.handleDomHoverRef))
|
|
}
|
|
}
|
|
|
|
// settleDelay es la espera breve tras una acción mutante antes de re-percibir,
|
|
// dando tiempo a que el DOM se asiente (navegación, focus, repaint).
|
|
const settleDelay = 400 * time.Millisecond
|
|
|
|
// ---- dom_click_ref (MUTA) — bucle percibir→actuar ----
|
|
|
|
type domClickRefArgs struct {
|
|
Port int `json:"port"`
|
|
Ref int `json:"ref"`
|
|
}
|
|
|
|
func domClickRefTool() mcp.Tool {
|
|
return mcp.NewTool("dom_click_ref",
|
|
mcp.WithDescription("Click humanizado sobre el elemento por su #ref del outline de page_perceive (backendDOMNodeId estable). Usa humanización por defecto (Bézier+jitter). Devuelve el outline actualizado tras la acción (auto-observe)."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
|
|
mcp.WithNumber("ref", mcp.Required(), mcp.Description("#ref del elemento (backendDOMNodeId) leído del outline de page_perceive.")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handleDomClickRef(_ context.Context, _ mcp.CallToolRequest, a domClickRefArgs) (*mcp.CallToolResult, error) {
|
|
port := portOr(a.Port)
|
|
// TODO: preset de humanización por sesión (human/fast/instant)
|
|
err := d.withConn(port, func(c *browser.CDPConn) error {
|
|
return browser.CdpClickRef(c, a.Ref, browser.MouseHumanOpts{})
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
time.Sleep(settleDelay)
|
|
outline, _ := d.perceiveOutline(port, 4000)
|
|
return mcp.NewToolResultText("clicked ref " + fmt.Sprint(a.Ref) + "\n\n" + outline), nil
|
|
}
|
|
|
|
// ---- dom_type_ref (MUTA) — bucle percibir→actuar ----
|
|
|
|
type domTypeRefArgs struct {
|
|
Port int `json:"port"`
|
|
Ref int `json:"ref"`
|
|
Text string `json:"text"`
|
|
}
|
|
|
|
func domTypeRefTool() mcp.Tool {
|
|
return mcp.NewTool("dom_type_ref",
|
|
mcp.WithDescription("Enfoca el elemento por su #ref del outline de page_perceive (backendDOMNodeId estable) y escribe el texto. Devuelve el outline actualizado tras la acción (auto-observe)."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
|
|
mcp.WithNumber("ref", mcp.Required(), mcp.Description("#ref del elemento (backendDOMNodeId) leído del outline de page_perceive.")),
|
|
mcp.WithString("text", mcp.Required(), mcp.Description("Texto a escribir en el elemento.")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handleDomTypeRef(_ context.Context, _ mcp.CallToolRequest, a domTypeRefArgs) (*mcp.CallToolResult, error) {
|
|
if a.Text == "" {
|
|
return mcp.NewToolResultError("text is required"), nil
|
|
}
|
|
port := portOr(a.Port)
|
|
// TODO: preset de humanización por sesión (human/fast/instant)
|
|
err := d.withConn(port, func(c *browser.CDPConn) error {
|
|
return browser.CdpTypeRef(c, a.Ref, a.Text)
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
time.Sleep(settleDelay)
|
|
outline, _ := d.perceiveOutline(port, 4000)
|
|
return mcp.NewToolResultText("typed into ref " + fmt.Sprint(a.Ref) + "\n\n" + outline), nil
|
|
}
|
|
|
|
// ---- dom_hover_ref (MUTA) — bucle percibir→actuar ----
|
|
|
|
type domHoverRefArgs struct {
|
|
Port int `json:"port"`
|
|
Ref int `json:"ref"`
|
|
}
|
|
|
|
func domHoverRefTool() mcp.Tool {
|
|
return mcp.NewTool("dom_hover_ref",
|
|
mcp.WithDescription("Hover humanizado sobre el elemento por su #ref del outline de page_perceive (backendDOMNodeId estable). Usa humanización por defecto (Bézier+jitter). Devuelve el outline actualizado tras la acción (auto-observe)."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
|
|
mcp.WithNumber("ref", mcp.Required(), mcp.Description("#ref del elemento (backendDOMNodeId) leído del outline de page_perceive.")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handleDomHoverRef(_ context.Context, _ mcp.CallToolRequest, a domHoverRefArgs) (*mcp.CallToolResult, error) {
|
|
port := portOr(a.Port)
|
|
// TODO: preset de humanización por sesión (human/fast/instant)
|
|
err := d.withConn(port, func(c *browser.CDPConn) error {
|
|
return browser.CdpHoverRef(c, a.Ref, browser.MouseHumanOpts{})
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
time.Sleep(settleDelay)
|
|
outline, _ := d.perceiveOutline(port, 4000)
|
|
return mcp.NewToolResultText("hovered ref " + fmt.Sprint(a.Ref) + "\n\n" + outline), nil
|
|
}
|
|
|
|
// ---- dom_click (MUTA) ----
|
|
|
|
type domClickArgs struct {
|
|
Port int `json:"port"`
|
|
Selector string `json:"selector"`
|
|
}
|
|
|
|
func domClickTool() mcp.Tool {
|
|
return mcp.NewTool("dom_click",
|
|
mcp.WithDescription("Click the element matching the CSS selector (synthetic CDP click)."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")),
|
|
mcp.WithString("selector", mcp.Required(), mcp.Description("CSS selector of the element to click.")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handleDomClick(_ context.Context, _ mcp.CallToolRequest, a domClickArgs) (*mcp.CallToolResult, error) {
|
|
if a.Selector == "" {
|
|
return mcp.NewToolResultError("selector is required"), nil
|
|
}
|
|
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
|
|
return browser.CdpClick(c, a.Selector)
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
return mcp.NewToolResultText("clicked " + a.Selector), nil
|
|
}
|
|
|
|
// ---- dom_click_human (MUTA) ----
|
|
|
|
type domClickHumanArgs struct {
|
|
Port int `json:"port"`
|
|
Selector string `json:"selector"`
|
|
}
|
|
|
|
func domClickHumanTool() mcp.Tool {
|
|
return mcp.NewTool("dom_click_human",
|
|
mcp.WithDescription("Click the element matching the CSS selector with human-like mouse movement (Bézier path + jitter + press/release pause)."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")),
|
|
mcp.WithString("selector", mcp.Required(), mcp.Description("CSS selector of the element to click.")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handleDomClickHuman(_ context.Context, _ mcp.CallToolRequest, a domClickHumanArgs) (*mcp.CallToolResult, error) {
|
|
if a.Selector == "" {
|
|
return mcp.NewToolResultError("selector is required"), nil
|
|
}
|
|
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
|
|
return browser.CdpClickHuman(c, a.Selector, browser.MouseHumanOpts{})
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
return mcp.NewToolResultText("clicked (human) " + a.Selector), nil
|
|
}
|
|
|
|
// ---- dom_click_text (MUTA) ----
|
|
|
|
type domClickTextArgs struct {
|
|
Port int `json:"port"`
|
|
Text string `json:"text"`
|
|
}
|
|
|
|
func domClickTextTool() mcp.Tool {
|
|
return mcp.NewTool("dom_click_text",
|
|
mcp.WithDescription("Find the first element whose visible text matches and click it."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")),
|
|
mcp.WithString("text", mcp.Required(), mcp.Description("Visible text to match (substring).")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handleDomClickText(_ context.Context, _ mcp.CallToolRequest, a domClickTextArgs) (*mcp.CallToolResult, error) {
|
|
if a.Text == "" {
|
|
return mcp.NewToolResultError("text is required"), nil
|
|
}
|
|
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
|
|
return browser.CdpClickText(c, a.Text, browser.FindByTextOpts{})
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
return mcp.NewToolResultText("clicked text " + a.Text), nil
|
|
}
|
|
|
|
// ---- dom_type (MUTA) ----
|
|
|
|
type domTypeArgs struct {
|
|
Port int `json:"port"`
|
|
Text string `json:"text"`
|
|
}
|
|
|
|
func domTypeTool() mcp.Tool {
|
|
return mcp.NewTool("dom_type",
|
|
mcp.WithDescription("Type text into the currently focused element (dispatches key events char by char)."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")),
|
|
mcp.WithString("text", mcp.Required(), mcp.Description("Text to type.")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handleDomType(_ context.Context, _ mcp.CallToolRequest, a domTypeArgs) (*mcp.CallToolResult, error) {
|
|
if a.Text == "" {
|
|
return mcp.NewToolResultError("text is required"), nil
|
|
}
|
|
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
|
|
return browser.CdpTypeText(c, a.Text)
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
return mcp.NewToolResultText("typed text"), nil
|
|
}
|
|
|
|
// ---- dom_find_by_text ----
|
|
|
|
type domFindByTextArgs struct {
|
|
Port int `json:"port"`
|
|
Text string `json:"text"`
|
|
}
|
|
|
|
func domFindByTextTool() mcp.Tool {
|
|
return mcp.NewTool("dom_find_by_text",
|
|
mcp.WithDescription("Find the first element whose visible text matches and return a unique CSS selector for it (empty string if none)."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")),
|
|
mcp.WithString("text", mcp.Required(), mcp.Description("Visible text to match (substring).")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handleDomFindByText(_ context.Context, _ mcp.CallToolRequest, a domFindByTextArgs) (*mcp.CallToolResult, error) {
|
|
if a.Text == "" {
|
|
return mcp.NewToolResultError("text is required"), nil
|
|
}
|
|
var sel string
|
|
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
|
|
var e error
|
|
sel, e = browser.CdpFindByText(c, a.Text, browser.FindByTextOpts{})
|
|
return e
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
return mcp.NewToolResultText(sel), nil
|
|
}
|
|
|
|
// ---- dom_wait_element ----
|
|
|
|
type domWaitElementArgs struct {
|
|
Port int `json:"port"`
|
|
Selector string `json:"selector"`
|
|
TimeoutMs int `json:"timeout_ms"`
|
|
}
|
|
|
|
func domWaitElementTool() mcp.Tool {
|
|
return mcp.NewTool("dom_wait_element",
|
|
mcp.WithDescription("Block until an element matching the CSS selector appears in the DOM (or timeout)."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")),
|
|
mcp.WithString("selector", mcp.Required(), mcp.Description("CSS selector to wait for.")),
|
|
mcp.WithNumber("timeout_ms", mcp.Description("Max wait in ms. Default 10000.")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handleDomWaitElement(_ context.Context, _ mcp.CallToolRequest, a domWaitElementArgs) (*mcp.CallToolResult, error) {
|
|
if a.Selector == "" {
|
|
return mcp.NewToolResultError("selector is required"), nil
|
|
}
|
|
timeout := a.TimeoutMs
|
|
if timeout <= 0 {
|
|
timeout = 10000
|
|
}
|
|
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
|
|
return browser.CdpWaitElement(c, a.Selector, time.Duration(timeout)*time.Millisecond)
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
return mcp.NewToolResultText("element appeared: " + a.Selector), nil
|
|
}
|