8742cb25be
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
410 lines
10 KiB
Go
410 lines
10 KiB
Go
package browser
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
)
|
|
|
|
// axoActionableRoles son los roles que el LLM puede referir con #ref. Misma
|
|
// lista que _ACTIONABLE_ROLES de render_ax_outline.py.
|
|
var axoActionableRoles = map[string]struct{}{
|
|
"button": {},
|
|
"link": {},
|
|
"textbox": {},
|
|
"searchbox": {},
|
|
"checkbox": {},
|
|
"radio": {},
|
|
"combobox": {},
|
|
"listbox": {},
|
|
"menuitem": {},
|
|
"menuitemcheckbox": {},
|
|
"menuitemradio": {},
|
|
"tab": {},
|
|
"option": {},
|
|
"switch": {},
|
|
"slider": {},
|
|
"spinbutton": {},
|
|
"treeitem": {},
|
|
"gridcell": {},
|
|
}
|
|
|
|
// axoSkipRoles son roles sin valor semantico: se omiten y sus hijos se elevan al
|
|
// nivel actual. Misma lista que _SKIP_ROLES de render_ax_outline.py.
|
|
var axoSkipRoles = map[string]struct{}{
|
|
"none": {},
|
|
"presentation": {},
|
|
"ignored": {},
|
|
}
|
|
|
|
// axoMaxDepth limita la profundidad de render (guard anti-RecursionError de
|
|
// arboles AX patologicos). Igual que _MAX_DEPTH del .py.
|
|
const axoMaxDepth = 60
|
|
|
|
// axNode es la representacion interna de un AXNode CDP, ya extraida del
|
|
// map[string]any de la respuesta. Los helpers de poda y render operan sobre
|
|
// estos structs, lo que los hace puros y testeables sin Chrome.
|
|
type axNode struct {
|
|
nodeID string
|
|
backendDOMNodeID string
|
|
ignored bool
|
|
role string
|
|
name string
|
|
value string
|
|
childIDs []string
|
|
parentID string
|
|
}
|
|
|
|
// CdpGetAXOutline percibe la pagina (o un iframe concreto via frameID) como un
|
|
// outline accesible indentado y accionable, reusando la conexion CDP viva del
|
|
// pool — sin abrir un WebSocket nuevo ni levantar el venv de Python.
|
|
//
|
|
// Envia Accessibility.enable (idempotente) y Accessibility.getFullAXTree. Si
|
|
// frameID != "", pasa {"frameId": frameID} para obtener el arbol DENTRO de ese
|
|
// iframe; con frameID == "" obtiene el arbol completo de la pagina (depth -1).
|
|
//
|
|
// El resultado se poda (trim) y luego se renderiza replicando exactamente el
|
|
// formato del pipeline Python cdp_get_ax_tree -> trim_ax_tree -> render_ax_outline:
|
|
// indentacion de 2 espacios por nivel, `role "name"`, ` = 'value'` para inputs,
|
|
// y marcador ` #ref=<backendDOMNodeId>` en roles accionables. maxChars > 0
|
|
// trunca y añade "\n…[outline truncado]"; maxChars <= 0 = sin limite.
|
|
func CdpGetAXOutline(c *CDPConn, frameID string, maxChars int) (string, error) {
|
|
if c == nil {
|
|
return "", fmt.Errorf("cdp get ax outline: conexion nula")
|
|
}
|
|
|
|
// Accessibility.enable es idempotente; necesario antes de getFullAXTree.
|
|
if _, err := c.sendCDP("Accessibility.enable", nil); err != nil {
|
|
return "", fmt.Errorf("cdp get ax outline: Accessibility.enable: %w", err)
|
|
}
|
|
|
|
var params map[string]any
|
|
if frameID != "" {
|
|
params = map[string]any{"frameId": frameID}
|
|
}
|
|
|
|
res, err := c.sendCDP("Accessibility.getFullAXTree", params)
|
|
if err != nil {
|
|
return "", fmt.Errorf("cdp get ax outline: Accessibility.getFullAXTree: %w", err)
|
|
}
|
|
|
|
nodes := axoParseNodes(res)
|
|
trimmed := trimAXTree(nodes)
|
|
return renderAXOutline(trimmed, maxChars), nil
|
|
}
|
|
|
|
// axoParseNodes extrae la lista de axNode del result de getFullAXTree. Tras el
|
|
// JSON unmarshal a map[string]any, los nodos vienen como []any de
|
|
// map[string]any y los enteros (backendDOMNodeId, nodeId) como float64; nodeId y
|
|
// childIds suelen llegar como strings. Normalizamos todo a string.
|
|
func axoParseNodes(result map[string]any) []axNode {
|
|
raw, ok := result["nodes"].([]any)
|
|
if !ok {
|
|
return nil
|
|
}
|
|
out := make([]axNode, 0, len(raw))
|
|
for _, item := range raw {
|
|
m, ok := item.(map[string]any)
|
|
if !ok {
|
|
continue
|
|
}
|
|
n := axNode{
|
|
nodeID: axoStr(m["nodeId"]),
|
|
backendDOMNodeID: axoStr(m["backendDOMNodeId"]),
|
|
ignored: axoBool(m["ignored"]),
|
|
role: axoNested(m["role"]),
|
|
name: axoNested(m["name"]),
|
|
value: axoNested(m["value"]),
|
|
childIDs: axoStrSlice(m["childIds"]),
|
|
parentID: axoStr(m["parentId"]),
|
|
}
|
|
out = append(out, n)
|
|
}
|
|
return out
|
|
}
|
|
|
|
// axoNested extrae el campo "value" de un objeto CDP del tipo {value: ...} (role,
|
|
// name, value vienen asi). Devuelve "" si esta ausente o vacio.
|
|
func axoNested(v any) string {
|
|
m, ok := v.(map[string]any)
|
|
if !ok {
|
|
if v == nil {
|
|
return ""
|
|
}
|
|
return axoStr(v)
|
|
}
|
|
return axoStr(m["value"])
|
|
}
|
|
|
|
// axoStr normaliza cualquier escalar JSON a string. Los enteros CDP llegan como
|
|
// float64 tras el unmarshal; los renderizamos sin decimales.
|
|
func axoStr(v any) string {
|
|
switch t := v.(type) {
|
|
case nil:
|
|
return ""
|
|
case string:
|
|
return t
|
|
case float64:
|
|
// IDs CDP son enteros: evitar notacion 1.234e+06 / sufijo .0.
|
|
return fmt.Sprintf("%d", int64(t))
|
|
case bool:
|
|
if t {
|
|
return "true"
|
|
}
|
|
return "false"
|
|
default:
|
|
return fmt.Sprintf("%v", t)
|
|
}
|
|
}
|
|
|
|
func axoBool(v any) bool {
|
|
b, _ := v.(bool)
|
|
return b
|
|
}
|
|
|
|
func axoStrSlice(v any) []string {
|
|
raw, ok := v.([]any)
|
|
if !ok {
|
|
return nil
|
|
}
|
|
out := make([]string, 0, len(raw))
|
|
for _, item := range raw {
|
|
out = append(out, axoStr(item))
|
|
}
|
|
return out
|
|
}
|
|
|
|
// trimAXTree compacta la lista de axNode descartando nodos irrelevantes y
|
|
// colapsando cadenas padre->hijo del mismo role. Puro: porta trim_ax_tree.py.
|
|
//
|
|
// Descarta: ignored=true; role 'generic'/'none' sin name ni childIds;
|
|
// role 'StaticText' con name vacio. Colapsa: nodo con exactamente 1 hijo del
|
|
// mismo role hereda los childIds del hijo (el hijo se descarta). Itera hasta
|
|
// convergencia. Preserva el orden original de aparicion.
|
|
func trimAXTree(nodes []axNode) []axNode {
|
|
if len(nodes) == 0 {
|
|
return nil
|
|
}
|
|
|
|
shouldDiscard := func(n axNode) bool {
|
|
if n.ignored {
|
|
return true
|
|
}
|
|
if (n.role == "generic" || n.role == "none") && n.name == "" && len(n.childIDs) == 0 {
|
|
return true
|
|
}
|
|
if n.role == "StaticText" && n.name == "" {
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
byID := map[string]axNode{}
|
|
for _, n := range nodes {
|
|
if shouldDiscard(n) {
|
|
continue
|
|
}
|
|
byID[n.nodeID] = n
|
|
}
|
|
|
|
// Colapso iterativo hasta convergencia.
|
|
for {
|
|
changed := false
|
|
removed := map[string]struct{}{}
|
|
for _, node := range byID {
|
|
if _, gone := removed[node.nodeID]; gone {
|
|
continue
|
|
}
|
|
if len(node.childIDs) != 1 {
|
|
continue
|
|
}
|
|
childID := node.childIDs[0]
|
|
child, ok := byID[childID]
|
|
if !ok || child.role != node.role {
|
|
continue
|
|
}
|
|
// Fusionar: el padre hereda los childIds del hijo.
|
|
merged := node
|
|
merged.childIDs = child.childIDs
|
|
byID[node.nodeID] = merged
|
|
removed[childID] = struct{}{}
|
|
changed = true
|
|
}
|
|
if !changed {
|
|
break
|
|
}
|
|
for id := range removed {
|
|
delete(byID, id)
|
|
}
|
|
}
|
|
|
|
// Preservar orden original.
|
|
result := make([]axNode, 0, len(byID))
|
|
seen := map[string]struct{}{}
|
|
for _, n := range nodes {
|
|
node, ok := byID[n.nodeID]
|
|
if !ok {
|
|
continue
|
|
}
|
|
if _, dup := seen[n.nodeID]; dup {
|
|
continue
|
|
}
|
|
result = append(result, node)
|
|
seen[n.nodeID] = struct{}{}
|
|
}
|
|
return result
|
|
}
|
|
|
|
// renderAXOutline convierte axNode en un outline indentado, legible y
|
|
// accionable. Puro: porta render_ax_outline.py al caracter. La jerarquia se
|
|
// reconstruye con childIDs; las raices son nodeIds que no aparecen como hijo de
|
|
// nadie (fallback al primer nodo). maxChars > 0 trunca con sufijo.
|
|
func renderAXOutline(nodes []axNode, maxChars int) string {
|
|
if len(nodes) == 0 {
|
|
return ""
|
|
}
|
|
|
|
byID := map[string]axNode{}
|
|
for _, n := range nodes {
|
|
if n.nodeID != "" {
|
|
byID[n.nodeID] = n
|
|
}
|
|
}
|
|
|
|
allChildIDs := map[string]struct{}{}
|
|
for _, n := range nodes {
|
|
for _, cid := range n.childIDs {
|
|
allChildIDs[cid] = struct{}{}
|
|
}
|
|
}
|
|
|
|
var roots []axNode
|
|
for _, n := range nodes {
|
|
if _, isChild := allChildIDs[n.nodeID]; !isChild {
|
|
roots = append(roots, n)
|
|
}
|
|
}
|
|
if len(roots) == 0 {
|
|
roots = []axNode{nodes[0]}
|
|
}
|
|
|
|
var lines []string
|
|
visited := map[string]struct{}{} // guard de ciclo: un nodeId no se renderiza dos veces
|
|
|
|
var renderNode func(node axNode, depth int)
|
|
renderNode = func(node axNode, depth int) {
|
|
nid := node.nodeID
|
|
if depth > axoMaxDepth {
|
|
return
|
|
}
|
|
if nid != "" {
|
|
if _, dup := visited[nid]; dup {
|
|
return
|
|
}
|
|
visited[nid] = struct{}{}
|
|
}
|
|
|
|
if node.ignored {
|
|
return
|
|
}
|
|
|
|
role := node.role
|
|
if _, skip := axoSkipRoles[role]; role == "" || skip {
|
|
// Nodos sin role util: elevar los hijos al nivel actual.
|
|
for _, cid := range node.childIDs {
|
|
if child, ok := byID[cid]; ok {
|
|
renderNode(child, depth)
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
indent := strings.Repeat(" ", depth)
|
|
var base string
|
|
if node.name != "" {
|
|
base = fmt.Sprintf("%s%s %q", indent, role, node.name)
|
|
} else {
|
|
base = indent + role
|
|
}
|
|
|
|
// Estado actual del campo (texto escrito, valor de slider/combobox).
|
|
if node.value != "" {
|
|
base += " = " + axoPyRepr(node.value)
|
|
}
|
|
|
|
// Ref accionable, sin padding.
|
|
if _, ok := axoActionableRoles[role]; ok {
|
|
ref := axoRefID(node)
|
|
if ref != "" {
|
|
base += " #ref=" + ref
|
|
}
|
|
}
|
|
|
|
lines = append(lines, base)
|
|
|
|
for _, cid := range node.childIDs {
|
|
if child, ok := byID[cid]; ok {
|
|
renderNode(child, depth+1)
|
|
}
|
|
}
|
|
}
|
|
|
|
for _, root := range roots {
|
|
renderNode(root, 0)
|
|
}
|
|
|
|
result := strings.Join(lines, "\n")
|
|
|
|
if maxChars > 0 && len(result) > maxChars {
|
|
result = strings.TrimRight(result[:maxChars], " \t\n\r\v\f")
|
|
result += "\n…[outline truncado]"
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
// axoRefID devuelve el ref estable del nodo: backendDOMNodeId (apunta al nodo DOM
|
|
// real, estable mientras el nodo viva) con fallback al nodeId. Igual que
|
|
// _ref_id() del .py.
|
|
func axoRefID(n axNode) string {
|
|
if n.backendDOMNodeID != "" {
|
|
return n.backendDOMNodeID
|
|
}
|
|
return n.nodeID
|
|
}
|
|
|
|
// axoPyRepr replica Python repr() para strings: comillas simples por defecto;
|
|
// comillas dobles si la cadena contiene comilla simple pero no doble; escape de
|
|
// backslash y de la comilla delimitadora. Reproduce el efecto de `{value!r}`
|
|
// del render_ax_outline.py para que la salida coincida al caracter.
|
|
func axoPyRepr(s string) string {
|
|
hasSingle := strings.Contains(s, "'")
|
|
hasDouble := strings.Contains(s, "\"")
|
|
quote := byte('\'')
|
|
if hasSingle && !hasDouble {
|
|
quote = '"'
|
|
}
|
|
|
|
var b strings.Builder
|
|
b.WriteByte(quote)
|
|
for i := 0; i < len(s); i++ {
|
|
ch := s[i]
|
|
switch ch {
|
|
case '\\':
|
|
b.WriteString("\\\\")
|
|
case '\n':
|
|
b.WriteString("\\n")
|
|
case '\r':
|
|
b.WriteString("\\r")
|
|
case '\t':
|
|
b.WriteString("\\t")
|
|
case quote:
|
|
b.WriteByte('\\')
|
|
b.WriteByte(quote)
|
|
default:
|
|
b.WriteByte(ch)
|
|
}
|
|
}
|
|
b.WriteByte(quote)
|
|
return b.String()
|
|
}
|