Files

238 lines
8.1 KiB
Go

package main
import (
"context"
"fmt"
"os/exec"
"path/filepath"
"strings"
"github.com/mark3labs/mcp-go/mcp"
"github.com/mark3labs/mcp-go/server"
"fn-registry/functions/browser"
)
const htmlMax = 200_000
// registerReadTools wires page_get_html, page_get_text, page_perceive,
// page_eval_js (MUTA), page_screenshot.
func registerReadTools(s *server.MCPServer, d *deps) {
s.AddTool(pageGetHTMLTool(), mcp.NewTypedToolHandler(d.handlePageGetHTML))
s.AddTool(pageGetTextTool(), mcp.NewTypedToolHandler(d.handlePageGetText))
s.AddTool(pagePerceiveTool(), mcp.NewTypedToolHandler(d.handlePagePerceive))
s.AddTool(pageScreenshotTool(), mcp.NewTypedToolHandler(d.handlePageScreenshot))
if !d.readOnly {
s.AddTool(pageEvalJSTool(), mcp.NewTypedToolHandler(d.handlePageEvalJS))
}
}
// ---- page_get_text ----
type pageGetTextArgs struct {
Port int `json:"port"`
Selector string `json:"selector"`
MaxBytes int `json:"max_bytes"`
}
func pageGetTextTool() mcp.Tool {
return mcp.NewTool("page_get_text",
mcp.WithDescription("Devuelve el texto visible (innerText) de la página o de un elemento (selector CSS), truncado a max_bytes. Preferir sobre page_get_html cuando solo necesitas leer contenido — no revienta el contexto."),
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
mcp.WithString("selector", mcp.Description("Selector CSS opcional. Vacío = body (toda la página).")),
mcp.WithNumber("max_bytes", mcp.Description("Máximo de bytes a devolver. Default 20000. 0 = sin límite.")),
)
}
func (d *deps) handlePageGetText(_ context.Context, _ mcp.CallToolRequest, a pageGetTextArgs) (*mcp.CallToolResult, error) {
maxBytes := a.MaxBytes
if maxBytes == 0 {
maxBytes = 20000
}
var text string
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
var e error
text, e = browser.CdpGetText(c, a.Selector, maxBytes)
return e
})
if err != nil {
return mcp.NewToolResultError(err.Error()), nil
}
return mcp.NewToolResultText(text), nil
}
// ---- page_perceive ----
type pagePerceiveArgs struct {
Port int `json:"port"`
TabID string `json:"tab_id"`
MaxChars int `json:"max_chars"`
}
func pagePerceiveTool() mcp.Tool {
return mcp.NewTool("page_perceive",
mcp.WithDescription("Devuelve un outline indentado y accionable del árbol de accesibilidad (roles, nombres, #ref) — la forma compacta de que el agente 'perciba' la página sin reventar el contexto. Si tab_id se omite, usa la primera pestaña page. Gotcha: requiere el binario `fn` y el venv de Python del registry disponibles en runtime."),
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
mcp.WithString("tab_id", mcp.Description("Target id de la pestaña. Vacío = primera pestaña page.")),
mcp.WithNumber("max_chars", mcp.Description("Máximo de chars del outline. Default 20000.")),
)
}
func (d *deps) handlePagePerceive(_ context.Context, _ mcp.CallToolRequest, a pagePerceiveArgs) (*mcp.CallToolResult, error) {
port := portOr(a.Port)
maxChars := a.MaxChars
if maxChars == 0 {
maxChars = 20000
}
outline, err := d.perceiveOutlineTab(port, a.TabID, maxChars)
if err != nil {
return mcp.NewToolResultError(err.Error()), nil
}
return mcp.NewToolResultText(outline), nil
}
// perceiveOutline genera el outline AX accionable de la pestaña (vía el pipeline
// cdp_perceive_outline). Usa la primera pestaña 'page' del puerto.
func (d *deps) perceiveOutline(port, maxChars int) (string, error) {
return d.perceiveOutlineTab(port, "", maxChars)
}
// perceiveOutlineTab genera el outline AX accionable de la pestaña indicada (vía
// el pipeline cdp_perceive_outline). Si tabID es "", usa la primera pestaña 'page'.
// Resuelve la raíz del registry para localizar el binario `fn` + el venv de Python
// y ejecuta `<root>/fn run cdp_perceive_outline <port> <tabID> <maxChars>` por
// subprocess, devolviendo su stdout truncado a htmlMax.
func (d *deps) perceiveOutlineTab(port int, tabID string, maxChars int) (string, error) {
root, err := resolveRoot()
if err != nil {
return "", fmt.Errorf("resolve registry root: %w", err)
}
if tabID == "" {
tabs, err := browser.CdpListTabs("localhost", port)
if err != nil {
return "", fmt.Errorf("list tabs: %w", err)
}
for _, t := range tabs {
if t.Type == "page" {
tabID = t.ID
break
}
}
if tabID == "" {
return "", fmt.Errorf("no 'page' tab found on port %d", port)
}
}
// `fn run` pasa los argumentos POSICIONALMENTE a la función del pipeline
// (no como flags argparse): el orden debe coincidir con la firma
// cdp_perceive_outline(debug_port, tab_id, max_chars).
cmd := exec.Command(filepath.Join(root, "fn"), "run", "cdp_perceive_outline",
fmt.Sprint(port),
tabID,
fmt.Sprint(maxChars),
)
cmd.Dir = root
var stdout, stderr strings.Builder
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
msg := strings.TrimSpace(stderr.String())
if msg == "" {
msg = err.Error()
}
return "", fmt.Errorf("cdp_perceive_outline failed: %s", msg)
}
return truncate(stdout.String(), htmlMax), nil
}
// ---- page_get_html ----
type pageGetHTMLArgs struct {
Port int `json:"port"`
}
func pageGetHTMLTool() mcp.Tool {
return mcp.NewTool("page_get_html",
mcp.WithDescription("Return the current page's full serialized HTML (outerHTML). Truncated to 200000 chars."),
mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")),
)
}
func (d *deps) handlePageGetHTML(_ context.Context, _ mcp.CallToolRequest, a pageGetHTMLArgs) (*mcp.CallToolResult, error) {
var html string
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
var e error
html, e = browser.CdpGetHTML(c)
return e
})
if err != nil {
return mcp.NewToolResultError(err.Error()), nil
}
return mcp.NewToolResultText(truncate(html, htmlMax)), nil
}
// ---- page_eval_js (MUTA) ----
type pageEvalJSArgs struct {
Port int `json:"port"`
Expression string `json:"expression"`
}
func pageEvalJSTool() mcp.Tool {
return mcp.NewTool("page_eval_js",
mcp.WithDescription("Evaluate a JavaScript expression in the page context via Runtime.evaluate. Returns the stringified result."),
mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")),
mcp.WithString("expression", mcp.Required(), mcp.Description("JavaScript expression to evaluate.")),
)
}
func (d *deps) handlePageEvalJS(_ context.Context, _ mcp.CallToolRequest, a pageEvalJSArgs) (*mcp.CallToolResult, error) {
if a.Expression == "" {
return mcp.NewToolResultError("expression is required"), nil
}
var res string
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
var e error
res, e = browser.CdpEvaluate(c, a.Expression)
return e
})
if err != nil {
return mcp.NewToolResultError(err.Error()), nil
}
return mcp.NewToolResultText(truncate(res, htmlMax)), nil
}
// ---- page_screenshot ----
type pageScreenshotArgs struct {
Port int `json:"port"`
Path string `json:"path"`
FullPage bool `json:"full_page"`
}
func pageScreenshotTool() mcp.Tool {
return mcp.NewTool("page_screenshot",
mcp.WithDescription("Capture a screenshot of the current page and write it to a local path (.png/.jpg)."),
mcp.WithNumber("port", mcp.Description("CDP port. Default 9222.")),
mcp.WithString("path", mcp.Required(), mcp.Description("Output file path (.png or .jpg).")),
mcp.WithBoolean("full_page", mcp.Description("Capture the full scroll height instead of just the viewport.")),
)
}
func (d *deps) handlePageScreenshot(_ context.Context, _ mcp.CallToolRequest, a pageScreenshotArgs) (*mcp.CallToolResult, error) {
if a.Path == "" {
return mcp.NewToolResultError("path is required"), nil
}
opts := browser.CdpScreenshotOpts{FullPage: a.FullPage}
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
return browser.CdpScreenshot(c, a.Path, opts)
})
if err != nil {
return mcp.NewToolResultError(err.Error()), nil
}
return mcp.NewToolResultText("screenshot saved to " + a.Path), nil
}