feat: P0 LLM-readiness — Chrome aislado (9333), tab_select determinista, page_get_text, page_perceive

This commit is contained in:
agent
2026-06-06 11:15:12 +02:00
parent 6ecaf9a969
commit 9af2e75246
7 changed files with 272 additions and 36 deletions
+107 -1
View File
@@ -2,6 +2,10 @@ package main
import (
"context"
"fmt"
"os/exec"
"path/filepath"
"strings"
"github.com/mark3labs/mcp-go/mcp"
"github.com/mark3labs/mcp-go/server"
@@ -11,9 +15,12 @@ import (
const htmlMax = 200_000
// registerReadTools wires page_get_html, page_eval_js (MUTA), page_screenshot.
// registerReadTools wires page_get_html, page_get_text, page_perceive,
// page_eval_js (MUTA), page_screenshot.
func registerReadTools(s *server.MCPServer, d *deps) {
s.AddTool(pageGetHTMLTool(), mcp.NewTypedToolHandler(d.handlePageGetHTML))
s.AddTool(pageGetTextTool(), mcp.NewTypedToolHandler(d.handlePageGetText))
s.AddTool(pagePerceiveTool(), mcp.NewTypedToolHandler(d.handlePagePerceive))
s.AddTool(pageScreenshotTool(), mcp.NewTypedToolHandler(d.handlePageScreenshot))
if !d.readOnly {
@@ -21,6 +28,105 @@ func registerReadTools(s *server.MCPServer, d *deps) {
}
}
// ---- page_get_text ----
type pageGetTextArgs struct {
Port int `json:"port"`
Selector string `json:"selector"`
MaxBytes int `json:"max_bytes"`
}
func pageGetTextTool() mcp.Tool {
return mcp.NewTool("page_get_text",
mcp.WithDescription("Devuelve el texto visible (innerText) de la página o de un elemento (selector CSS), truncado a max_bytes. Preferir sobre page_get_html cuando solo necesitas leer contenido — no revienta el contexto."),
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
mcp.WithString("selector", mcp.Description("Selector CSS opcional. Vacío = body (toda la página).")),
mcp.WithNumber("max_bytes", mcp.Description("Máximo de bytes a devolver. Default 20000. 0 = sin límite.")),
)
}
func (d *deps) handlePageGetText(_ context.Context, _ mcp.CallToolRequest, a pageGetTextArgs) (*mcp.CallToolResult, error) {
maxBytes := a.MaxBytes
if maxBytes == 0 {
maxBytes = 20000
}
var text string
err := d.withConn(portOr(a.Port), func(c *browser.CDPConn) error {
var e error
text, e = browser.CdpGetText(c, a.Selector, maxBytes)
return e
})
if err != nil {
return mcp.NewToolResultError(err.Error()), nil
}
return mcp.NewToolResultText(text), nil
}
// ---- page_perceive ----
type pagePerceiveArgs struct {
Port int `json:"port"`
TabID string `json:"tab_id"`
MaxChars int `json:"max_chars"`
}
func pagePerceiveTool() mcp.Tool {
return mcp.NewTool("page_perceive",
mcp.WithDescription("Devuelve un outline indentado y accionable del árbol de accesibilidad (roles, nombres, #ref) — la forma compacta de que el agente 'perciba' la página sin reventar el contexto. Si tab_id se omite, usa la primera pestaña page. Gotcha: requiere el binario `fn` y el venv de Python del registry disponibles en runtime."),
mcp.WithNumber("port", mcp.Description("CDP port. Default 9333 (Chrome isolated del MCP); usa 9222 explícito solo para adjuntarte al navegador diario.")),
mcp.WithString("tab_id", mcp.Description("Target id de la pestaña. Vacío = primera pestaña page.")),
mcp.WithNumber("max_chars", mcp.Description("Máximo de chars del outline. Default 20000.")),
)
}
func (d *deps) handlePagePerceive(_ context.Context, _ mcp.CallToolRequest, a pagePerceiveArgs) (*mcp.CallToolResult, error) {
port := portOr(a.Port)
maxChars := a.MaxChars
if maxChars == 0 {
maxChars = 20000
}
root, err := resolveRoot()
if err != nil {
return mcp.NewToolResultError("resolve registry root: " + err.Error()), nil
}
tabID := a.TabID
if tabID == "" {
tabs, err := browser.CdpListTabs("localhost", port)
if err != nil {
return mcp.NewToolResultError("list tabs: " + err.Error()), nil
}
for _, t := range tabs {
if t.Type == "page" {
tabID = t.ID
break
}
}
if tabID == "" {
return mcp.NewToolResultError("no 'page' tab found on port " + fmt.Sprint(port)), nil
}
}
cmd := exec.Command(filepath.Join(root, "fn"), "run", "cdp_perceive_outline",
"--debug-port", fmt.Sprint(port),
"--tab-id", tabID,
"--max-chars", fmt.Sprint(maxChars),
)
cmd.Dir = root
var stdout, stderr strings.Builder
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
msg := strings.TrimSpace(stderr.String())
if msg == "" {
msg = err.Error()
}
return mcp.NewToolResultError("cdp_perceive_outline failed: " + msg), nil
}
return mcp.NewToolResultText(truncate(stdout.String(), htmlMax)), nil
}
// ---- page_get_html ----
type pageGetHTMLArgs struct {