6b7f71c39f
Tools nuevas (wrappers finos sobre funciones del registry functions/browser): - page_collect_console -> cdp_collect_console (console + exceptions + log, snapshot) - page_pdf -> cdp_print_pdf (Page.printToPDF a archivo) - dom_select_option -> cdp_select_option (<select> por value/texto + input/change) - dom_set_files -> cdp_set_file_input (subir archivos a <input type=file>) browser_list ahora enriquece cada master con CDP con pages (nº de page targets), active_title y active_url via GET /json (best-effort: si el puerto no responde los campos quedan a cero y el listado de procesos no falla). Total tools: 46 -> 50. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
302 lines
12 KiB
Go
302 lines
12 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"encoding/base64"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
|
|
"github.com/mark3labs/mcp-go/mcp"
|
|
"github.com/mark3labs/mcp-go/server"
|
|
|
|
"fn-registry/functions/browser"
|
|
)
|
|
|
|
const htmlMax = 200_000
|
|
|
|
// registerReadTools wires page_get_html, page_get_text, page_perceive,
|
|
// page_eval_js (MUTA), page_screenshot.
|
|
func registerReadTools(s *server.MCPServer, d *deps) {
|
|
s.AddTool(pageGetHTMLTool(), mcp.NewTypedToolHandler(d.handlePageGetHTML))
|
|
s.AddTool(pageGetTextTool(), mcp.NewTypedToolHandler(d.handlePageGetText))
|
|
s.AddTool(pagePerceiveTool(), mcp.NewTypedToolHandler(d.handlePagePerceive))
|
|
s.AddTool(pageScreenshotTool(), mcp.NewTypedToolHandler(d.handlePageScreenshot))
|
|
s.AddTool(pageCollectConsoleTool(), mcp.NewTypedToolHandler(d.handlePageCollectConsole))
|
|
s.AddTool(pagePDFTool(), mcp.NewTypedToolHandler(d.handlePagePDF))
|
|
|
|
if !d.readOnly {
|
|
s.AddTool(pageEvalJSTool(), mcp.NewTypedToolHandler(d.handlePageEvalJS))
|
|
}
|
|
}
|
|
|
|
// ---- page_collect_console ----
|
|
|
|
type pageCollectConsoleArgs struct {
|
|
Port int `json:"port"`
|
|
DurationMs int `json:"duration_ms"`
|
|
}
|
|
|
|
func pageCollectConsoleTool() mcp.Tool {
|
|
return mcp.NewTool("page_collect_console",
|
|
mcp.WithDescription("Capture the page's console output (console.log/info/warn/error), uncaught JS exceptions and browser log entries during a time window, and return them as JSON. It is a SNAPSHOT: it records what happens during duration_ms AFTER the call starts, not past history — so trigger the action you want to observe (reload, click) right before or during the window. Use this to debug why a page misbehaves without flying blind."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
|
|
mcp.WithNumber("duration_ms", mcp.Description("Capture window in milliseconds. Default 1500.")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handlePageCollectConsole(_ context.Context, _ mcp.CallToolRequest, a pageCollectConsoleArgs) (*mcp.CallToolResult, error) {
|
|
var entries []browser.ConsoleEntry
|
|
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
|
|
var e error
|
|
entries, e = browser.CdpCollectConsole(c, a.DurationMs)
|
|
return e
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
if entries == nil {
|
|
entries = []browser.ConsoleEntry{}
|
|
}
|
|
b, _ := json.MarshalIndent(entries, "", " ")
|
|
return mcp.NewToolResultText(truncate(string(b), htmlMax)), nil
|
|
}
|
|
|
|
// ---- page_pdf ----
|
|
|
|
type pagePDFArgs struct {
|
|
Port int `json:"port"`
|
|
Path string `json:"path"`
|
|
Landscape bool `json:"landscape"`
|
|
PrintBackground bool `json:"print_background"`
|
|
Scale float64 `json:"scale"`
|
|
}
|
|
|
|
func pagePDFTool() mcp.Tool {
|
|
return mcp.NewTool("page_pdf",
|
|
mcp.WithDescription("Render the current page to a PDF (Page.printToPDF) and write it to a local file path. Use for archiving an article/invoice/report exactly as laid out, when a screenshot is not enough (multi-page, selectable text)."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
|
|
mcp.WithString("path", mcp.Required(), mcp.Description("Output .pdf file path.")),
|
|
mcp.WithBoolean("landscape", mcp.Description("Landscape orientation. Default false (portrait).")),
|
|
mcp.WithBoolean("print_background", mcp.Description("Include background graphics/colors. Default false.")),
|
|
mcp.WithNumber("scale", mcp.Description("Render scale. Default 1.0.")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handlePagePDF(_ context.Context, _ mcp.CallToolRequest, a pagePDFArgs) (*mcp.CallToolResult, error) {
|
|
if a.Path == "" {
|
|
return mcp.NewToolResultError("path is required"), nil
|
|
}
|
|
opts := browser.CdpPrintPDFOpts{
|
|
Landscape: a.Landscape,
|
|
PrintBackground: a.PrintBackground,
|
|
Scale: a.Scale,
|
|
}
|
|
var data []byte
|
|
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
|
|
var e error
|
|
data, e = browser.CdpPrintPDF(c, opts)
|
|
return e
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
if e := os.WriteFile(a.Path, data, 0o644); e != nil {
|
|
return mcp.NewToolResultError("saving pdf to " + a.Path + ": " + e.Error()), nil
|
|
}
|
|
return mcp.NewToolResultText(fmt.Sprintf("pdf saved to %s (%d bytes)", a.Path, len(data))), nil
|
|
}
|
|
|
|
// ---- page_get_text ----
|
|
|
|
type pageGetTextArgs struct {
|
|
Port int `json:"port"`
|
|
Selector string `json:"selector"`
|
|
MaxBytes int `json:"max_bytes"`
|
|
}
|
|
|
|
func pageGetTextTool() mcp.Tool {
|
|
return mcp.NewTool("page_get_text",
|
|
mcp.WithDescription("Devuelve el texto visible (innerText) de la página o de un elemento (selector CSS), truncado a max_bytes. Preferir sobre page_get_html cuando solo necesitas leer contenido — no revienta el contexto."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
|
|
mcp.WithString("selector", mcp.Description("Selector CSS opcional. Vacío = body (toda la página).")),
|
|
mcp.WithNumber("max_bytes", mcp.Description("Máximo de bytes a devolver. Default 20000. 0 = sin límite.")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handlePageGetText(_ context.Context, _ mcp.CallToolRequest, a pageGetTextArgs) (*mcp.CallToolResult, error) {
|
|
maxBytes := a.MaxBytes
|
|
if maxBytes == 0 {
|
|
maxBytes = 20000
|
|
}
|
|
var text string
|
|
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
|
|
var e error
|
|
text, e = browser.CdpGetText(c, a.Selector, maxBytes)
|
|
return e
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
return mcp.NewToolResultText(text), nil
|
|
}
|
|
|
|
// ---- page_perceive ----
|
|
|
|
type pagePerceiveArgs struct {
|
|
Port int `json:"port"`
|
|
TabID string `json:"tab_id"`
|
|
FrameID string `json:"frame_id"`
|
|
MaxChars int `json:"max_chars"`
|
|
}
|
|
|
|
func pagePerceiveTool() mcp.Tool {
|
|
return mcp.NewTool("page_perceive",
|
|
mcp.WithDescription("Devuelve un outline indentado y accionable del árbol de accesibilidad (roles, nombres, #ref) — la forma compacta de que el agente 'perciba' la página sin reventar el contexto. Generado de forma nativa en Go sobre la conexión CDP viva (sin subprocess ni Python). Para elegir la pestaña, usa tab_select ANTES de percibir (la conexión del pool ya está fijada a esa pestaña). Si frame_id se pasa, percibe DENTRO de ese iframe (obtén el id con frame_list)."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
|
|
mcp.WithString("tab_id", mcp.Description("OBSOLETO: la conexión del pool ya está fijada a una pestaña vía tab_select. Para elegir pestaña usa tab_select primero; este campo se conserva por compatibilidad y se ignora.")),
|
|
mcp.WithString("frame_id", mcp.Description("Frame ID (de frame_list) para percibir DENTRO de ese iframe. Vacío = página entera.")),
|
|
mcp.WithNumber("max_chars", mcp.Description("Máximo de chars del outline. Default 20000.")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handlePagePerceive(_ context.Context, _ mcp.CallToolRequest, a pagePerceiveArgs) (*mcp.CallToolResult, error) {
|
|
port := portOr(a.Port)
|
|
maxChars := a.MaxChars
|
|
if maxChars == 0 {
|
|
maxChars = 20000
|
|
}
|
|
|
|
outline, err := d.perceiveOutlineFrame(port, a.FrameID, maxChars)
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
return mcp.NewToolResultText(outline), nil
|
|
}
|
|
|
|
// perceiveOutline genera el outline AX accionable de la página entera sobre la
|
|
// conexión viva del pool (sin subprocess). Lo usan los auto-observe de las tools
|
|
// *_ref tras una acción.
|
|
func (d *deps) perceiveOutline(port, maxChars int) (string, error) {
|
|
return d.perceiveOutlineFrame(port, "", maxChars)
|
|
}
|
|
|
|
// perceiveOutlineFrame genera el outline AX accionable de forma NATIVA en Go,
|
|
// reusando la conexión CDP viva del pool (browser.CdpGetAXOutline). Si frameID
|
|
// != "", percibe DENTRO de ese iframe; frameID == "" = página entera. No lanza
|
|
// subprocess `fn run` ni levanta el venv de Python — la lógica de poda y render
|
|
// del árbol de accesibilidad vive en la función del registry.
|
|
func (d *deps) perceiveOutlineFrame(port int, frameID string, maxChars int) (string, error) {
|
|
var outline string
|
|
err := d.withConn(port, func(c *browser.CDPConn) error {
|
|
var e error
|
|
outline, e = browser.CdpGetAXOutline(c, frameID, maxChars)
|
|
return e
|
|
})
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return truncate(outline, htmlMax), nil
|
|
}
|
|
|
|
// ---- page_get_html ----
|
|
|
|
type pageGetHTMLArgs struct {
|
|
Port int `json:"port"`
|
|
}
|
|
|
|
func pageGetHTMLTool() mcp.Tool {
|
|
return mcp.NewTool("page_get_html",
|
|
mcp.WithDescription("Return the current page's full serialized HTML (outerHTML). Truncated to 200000 chars."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handlePageGetHTML(_ context.Context, _ mcp.CallToolRequest, a pageGetHTMLArgs) (*mcp.CallToolResult, error) {
|
|
var html string
|
|
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
|
|
var e error
|
|
html, e = browser.CdpGetHTML(c)
|
|
return e
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
return mcp.NewToolResultText(truncate(html, htmlMax)), nil
|
|
}
|
|
|
|
// ---- page_eval_js (MUTA) ----
|
|
|
|
type pageEvalJSArgs struct {
|
|
Port int `json:"port"`
|
|
Expression string `json:"expression"`
|
|
}
|
|
|
|
func pageEvalJSTool() mcp.Tool {
|
|
return mcp.NewTool("page_eval_js",
|
|
mcp.WithDescription("Evaluate a JavaScript expression in the page context via Runtime.evaluate. Returns the stringified result."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
|
|
mcp.WithString("expression", mcp.Required(), mcp.Description("JavaScript expression to evaluate.")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handlePageEvalJS(_ context.Context, _ mcp.CallToolRequest, a pageEvalJSArgs) (*mcp.CallToolResult, error) {
|
|
if a.Expression == "" {
|
|
return mcp.NewToolResultError("expression is required"), nil
|
|
}
|
|
var res string
|
|
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
|
|
var e error
|
|
res, e = browser.CdpEvaluate(c, a.Expression)
|
|
return e
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
return mcp.NewToolResultText(truncate(res, htmlMax)), nil
|
|
}
|
|
|
|
// ---- page_screenshot ----
|
|
|
|
type pageScreenshotArgs struct {
|
|
Port int `json:"port"`
|
|
Path string `json:"path"`
|
|
FullPage bool `json:"full_page"`
|
|
}
|
|
|
|
func pageScreenshotTool() mcp.Tool {
|
|
return mcp.NewTool("page_screenshot",
|
|
mcp.WithDescription("Capture a screenshot of the current page and return it as image content so the LLM can actually see the pixels. Optionally also writes it to a local path. Use this when the accessibility outline (page_perceive) is not enough — e.g. canvas/visual layouts — then act with dom_click_xy over what you see."),
|
|
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
|
|
mcp.WithString("path", mcp.Description("Optional output file path (.png or .jpg). If given, the image is ALSO saved to disk; the image content is always returned regardless.")),
|
|
mcp.WithBoolean("full_page", mcp.Description("Capture the full scroll height instead of just the viewport.")),
|
|
)
|
|
}
|
|
|
|
func (d *deps) handlePageScreenshot(_ context.Context, _ mcp.CallToolRequest, a pageScreenshotArgs) (*mcp.CallToolResult, error) {
|
|
opts := browser.CdpScreenshotOpts{FullPage: a.FullPage}
|
|
var data []byte
|
|
var mimeType string
|
|
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
|
|
var e error
|
|
data, mimeType, e = browser.CdpScreenshotBytes(c, opts)
|
|
return e
|
|
})
|
|
if err != nil {
|
|
return mcp.NewToolResultError(err.Error()), nil
|
|
}
|
|
|
|
text := "screenshot captured"
|
|
// Si se pidió un path, persistimos además los bytes capturados (mismo origen
|
|
// que la imagen devuelta al LLM, así no se captura dos veces).
|
|
if a.Path != "" {
|
|
if e := os.WriteFile(a.Path, data, 0o644); e != nil {
|
|
return mcp.NewToolResultError("saving screenshot to " + a.Path + ": " + e.Error()), nil
|
|
}
|
|
text = "screenshot saved to " + a.Path
|
|
}
|
|
|
|
b64 := base64.StdEncoding.EncodeToString(data)
|
|
return mcp.NewToolResultImage(text, b64, mimeType), nil
|
|
}
|