5b10b419a2
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
55 lines
1.6 KiB
Go
55 lines
1.6 KiB
Go
package browser
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
// CdpGetText retorna el texto visible (innerText) de la pagina o de un elemento.
|
|
// Si selector es "" lee document.body.innerText completo.
|
|
// Si selector no matchea ningun elemento retorna error.
|
|
// Si maxBytes > 0 trunca al limite dado (corte rune-safe) y añade sufijo con total original.
|
|
// Si maxBytes <= 0 no hay limite.
|
|
func CdpGetText(c *CDPConn, selector string, maxBytes int) (string, error) {
|
|
if c == nil {
|
|
return "", fmt.Errorf("cdp get text: conexion nula")
|
|
}
|
|
|
|
var expr string
|
|
if selector == "" {
|
|
expr = `document.body ? document.body.innerText : ""`
|
|
} else {
|
|
// Escapa el selector como string JSON para evitar inyeccion via comillas/backslash.
|
|
selectorJSON, err := json.Marshal(selector)
|
|
if err != nil {
|
|
return "", fmt.Errorf("cdp get text: escapar selector: %w", err)
|
|
}
|
|
expr = fmt.Sprintf(
|
|
`(function(){var e=document.querySelector(%s); return e ? e.innerText : "__FN_GET_TEXT_NOTFOUND__";})()`,
|
|
string(selectorJSON),
|
|
)
|
|
}
|
|
|
|
text, err := CdpEvaluate(c, expr)
|
|
if err != nil {
|
|
return "", fmt.Errorf("cdp get text: %w", err)
|
|
}
|
|
|
|
if selector != "" && text == "__FN_GET_TEXT_NOTFOUND__" {
|
|
return "", fmt.Errorf("cdp get text: elemento no encontrado: %s", selector)
|
|
}
|
|
|
|
if maxBytes > 0 && len(text) > maxBytes {
|
|
total := len(text)
|
|
// Corte rune-safe: retrocede hasta encontrar un rune valido completo.
|
|
cut := maxBytes
|
|
for cut > 0 && !utf8.RuneStart(text[cut]) {
|
|
cut--
|
|
}
|
|
text = text[:cut] + fmt.Sprintf("\n…[truncado, total %d bytes]", total)
|
|
}
|
|
|
|
return text, nil
|
|
}
|