Files
navegator/pkg/browser/navigation.go
T
Developer 3253828fef
Tests / Lint (push) Has been cancelled
Tests / Unit Tests (push) Has been cancelled
Tests / E2E Tests (push) Has been cancelled
Tests / Integration Tests (push) Has been cancelled
Initial commit: navegator - Chrome CDP automation for LLMs
Add complete navegator system for stealthy browser automation:
- CDP client with WebSocket communication
- Browser API with navigation, storage, network, runtime
- Stealth flags and anti-detection scripts
- Persistent profile support
- Examples and comprehensive documentation

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-03-24 23:33:07 +01:00

495 lines
12 KiB
Go

package browser
import (
"context"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"time"
)
// NavigateOptions opciones para la navegación.
type NavigateOptions struct {
// WaitUntil define cuándo se considera completada la navegación
// "load" = evento load, "domcontentloaded" = DOM listo, "networkidle" = red inactiva
WaitUntil string
// Timeout para la navegación
Timeout time.Duration
// Referer personalizado
Referer string
}
// DefaultNavigateOptions retorna opciones por defecto.
func DefaultNavigateOptions() *NavigateOptions {
return &NavigateOptions{
WaitUntil: "load",
Timeout: 30 * time.Second,
}
}
// Navigate navega a una URL.
func (b *Browser) Navigate(ctx context.Context, url string, opts *NavigateOptions) error {
if opts == nil {
opts = DefaultNavigateOptions()
}
navCtx, cancel := context.WithTimeout(ctx, opts.Timeout)
defer cancel()
// Preparar parámetros
params := map[string]interface{}{
"url": url,
}
if opts.Referer != "" {
params["referrer"] = opts.Referer
}
// Canal para eventos de navegación
loadedCh := make(chan struct{})
var loadErr error
// Registrar eventos según WaitUntil
switch opts.WaitUntil {
case "domcontentloaded":
b.cdpClient.On("Page.domContentEventFired", func(params json.RawMessage) {
close(loadedCh)
})
case "networkidle":
// Implementación simple: esperar a que no haya requests por 500ms
idleTimer := time.NewTimer(500 * time.Millisecond)
activeRequests := 0
b.cdpClient.On("Network.requestWillBeSent", func(params json.RawMessage) {
activeRequests++
idleTimer.Reset(500 * time.Millisecond)
})
b.cdpClient.On("Network.loadingFinished", func(params json.RawMessage) {
activeRequests--
if activeRequests <= 0 {
idleTimer.Reset(500 * time.Millisecond)
}
})
b.cdpClient.On("Network.loadingFailed", func(params json.RawMessage) {
activeRequests--
if activeRequests <= 0 {
idleTimer.Reset(500 * time.Millisecond)
}
})
go func() {
<-idleTimer.C
close(loadedCh)
}()
default: // "load"
b.cdpClient.On("Page.loadEventFired", func(params json.RawMessage) {
close(loadedCh)
})
}
// Navegar
if err := b.cdpClient.Execute(navCtx, "Page.navigate", params, nil); err != nil {
return fmt.Errorf("failed to navigate: %w", err)
}
// Esperar a que se complete la navegación
var err error
select {
case <-loadedCh:
err = loadErr
case <-navCtx.Done():
err = fmt.Errorf("navigation timeout: %w", navCtx.Err())
}
// Registrar acción
if b.recorder != nil {
b.recorder.Record("Navigate", map[string]interface{}{
"url": url,
"waitUntil": opts.WaitUntil,
}, nil, err)
}
return err
}
// Click hace clic en un elemento usando un selector CSS.
func (b *Browser) Click(ctx context.Context, selector string) error {
// Obtener el NodeID del elemento
nodeID, err := b.querySelector(ctx, selector)
if err != nil {
return err
}
// Obtener las coordenadas del elemento
box, err := b.getElementBox(ctx, nodeID)
if err != nil {
return err
}
// Calcular centro del elemento
x := box.X + box.Width/2
y := box.Y + box.Height/2
// Simular click (mousePressed + mouseReleased)
if err := b.cdpClient.Execute(ctx, "Input.dispatchMouseEvent", map[string]interface{}{
"type": "mousePressed",
"x": x,
"y": y,
"button": "left",
"clickCount": 1,
}, nil); err != nil {
return fmt.Errorf("failed to press mouse: %w", err)
}
// Pequeño delay entre pressed y released (más natural)
time.Sleep(50 * time.Millisecond)
err = b.cdpClient.Execute(ctx, "Input.dispatchMouseEvent", map[string]interface{}{
"type": "mouseReleased",
"x": x,
"y": y,
"button": "left",
"clickCount": 1,
}, nil)
if err != nil {
if b.recorder != nil {
b.recorder.Record("Click", map[string]interface{}{"selector": selector}, nil, err)
}
return fmt.Errorf("failed to release mouse: %w", err)
}
// Registrar acción
if b.recorder != nil {
b.recorder.Record("Click", map[string]interface{}{"selector": selector}, nil, nil)
}
return nil
}
// Type escribe texto en un elemento.
func (b *Browser) Type(ctx context.Context, selector string, text string, opts *TypeOptions) error {
if opts == nil {
opts = DefaultTypeOptions()
}
// Focus en el elemento primero
if err := b.Focus(ctx, selector); err != nil {
if b.recorder != nil {
b.recorder.Record("Type", map[string]interface{}{
"selector": selector,
"text": text,
}, nil, err)
}
return err
}
// Escribir cada carácter con delay
for _, char := range text {
if err := b.typeChar(ctx, string(char)); err != nil {
if b.recorder != nil {
b.recorder.Record("Type", map[string]interface{}{
"selector": selector,
"text": text,
}, nil, err)
}
return err
}
if opts.Delay > 0 {
time.Sleep(opts.Delay)
}
}
// Registrar acción exitosa
if b.recorder != nil {
b.recorder.Record("Type", map[string]interface{}{
"selector": selector,
"text": text,
"delay": opts.Delay.String(),
}, nil, nil)
}
return nil
}
// TypeOptions opciones para escribir texto.
type TypeOptions struct {
// Delay entre caracteres (más natural)
Delay time.Duration
}
// DefaultTypeOptions retorna opciones por defecto.
func DefaultTypeOptions() *TypeOptions {
return &TypeOptions{
Delay: 50 * time.Millisecond,
}
}
// typeChar escribe un solo carácter.
func (b *Browser) typeChar(ctx context.Context, char string) error {
params := map[string]interface{}{
"type": "char",
"text": char,
}
return b.cdpClient.Execute(ctx, "Input.dispatchKeyEvent", params, nil)
}
// Focus hace foco en un elemento.
func (b *Browser) Focus(ctx context.Context, selector string) error {
nodeID, err := b.querySelector(ctx, selector)
if err != nil {
return err
}
params := map[string]interface{}{
"nodeId": nodeID,
}
return b.cdpClient.Execute(ctx, "DOM.focus", params, nil)
}
// Screenshot toma una captura de pantalla.
func (b *Browser) Screenshot(ctx context.Context, fullPage bool) ([]byte, error) {
params := map[string]interface{}{
"format": "png",
"captureBeyondViewport": fullPage,
}
var result struct {
Data string `json:"data"`
}
if err := b.cdpClient.Execute(ctx, "Page.captureScreenshot", params, &result); err != nil {
return nil, fmt.Errorf("failed to capture screenshot: %w", err)
}
data, err := base64.StdEncoding.DecodeString(result.Data)
if err != nil {
return nil, fmt.Errorf("failed to decode screenshot: %w", err)
}
return data, nil
}
// GetHTML obtiene el HTML de la página o de un elemento específico.
func (b *Browser) GetHTML(ctx context.Context, selector string) (string, error) {
if selector == "" {
// Obtener HTML completo
var result struct {
Result struct {
Value string `json:"value"`
} `json:"result"`
}
params := map[string]interface{}{
"expression": "document.documentElement.outerHTML",
}
if err := b.cdpClient.Execute(ctx, "Runtime.evaluate", params, &result); err != nil {
return "", fmt.Errorf("failed to get HTML: %w", err)
}
return result.Result.Value, nil
}
// Obtener HTML de un elemento específico
nodeID, err := b.querySelector(ctx, selector)
if err != nil {
return "", err
}
var result struct {
OuterHTML string `json:"outerHTML"`
}
params := map[string]interface{}{
"nodeId": nodeID,
}
if err := b.cdpClient.Execute(ctx, "DOM.getOuterHTML", params, &result); err != nil {
return "", fmt.Errorf("failed to get HTML: %w", err)
}
return result.OuterHTML, nil
}
// GetText obtiene el texto visible de un elemento.
func (b *Browser) GetText(ctx context.Context, selector string) (string, error) {
script := fmt.Sprintf(`document.querySelector('%s')?.textContent`, selector)
var result struct {
Result struct {
Value string `json:"value"`
} `json:"result"`
}
params := map[string]interface{}{
"expression": script,
}
if err := b.cdpClient.Execute(ctx, "Runtime.evaluate", params, &result); err != nil {
return "", fmt.Errorf("failed to get text: %w", err)
}
return result.Result.Value, nil
}
// WaitForSelector espera a que un selector esté disponible.
func (b *Browser) WaitForSelector(ctx context.Context, selector string, timeout time.Duration) error {
ctx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
ticker := time.NewTicker(100 * time.Millisecond)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return fmt.Errorf("timeout waiting for selector: %s", selector)
case <-ticker.C:
_, err := b.querySelector(ctx, selector)
if err == nil {
return nil
}
}
}
}
// querySelector helper para obtener NodeID de un selector.
func (b *Browser) querySelector(ctx context.Context, selector string) (int64, error) {
// Primero obtener el documento root
var docResult struct {
Root struct {
NodeID int64 `json:"nodeId"`
} `json:"root"`
}
if err := b.cdpClient.Execute(ctx, "DOM.getDocument", nil, &docResult); err != nil {
return 0, fmt.Errorf("failed to get document: %w", err)
}
// Buscar el elemento
var queryResult struct {
NodeID int64 `json:"nodeId"`
}
params := map[string]interface{}{
"nodeId": docResult.Root.NodeID,
"selector": selector,
}
if err := b.cdpClient.Execute(ctx, "DOM.querySelector", params, &queryResult); err != nil {
return 0, fmt.Errorf("failed to query selector: %w", err)
}
if queryResult.NodeID == 0 {
return 0, fmt.Errorf("element not found: %s", selector)
}
return queryResult.NodeID, nil
}
// Box representa las coordenadas de un elemento.
type Box struct {
X float64
Y float64
Width float64
Height float64
}
// getElementBox obtiene las coordenadas de un elemento.
func (b *Browser) getElementBox(ctx context.Context, nodeID int64) (*Box, error) {
var result struct {
Model struct {
Content []float64 `json:"content"`
} `json:"model"`
}
params := map[string]interface{}{
"nodeId": nodeID,
}
if err := b.cdpClient.Execute(ctx, "DOM.getBoxModel", params, &result); err != nil {
return nil, fmt.Errorf("failed to get box model: %w", err)
}
if len(result.Model.Content) < 8 {
return nil, errors.New("invalid box model")
}
// Content es un array [x1, y1, x2, y2, x3, y3, x4, y4] (cuatro esquinas)
x := result.Model.Content[0]
y := result.Model.Content[1]
width := result.Model.Content[2] - x
height := result.Model.Content[5] - y
return &Box{
X: x,
Y: y,
Width: width,
Height: height,
}, nil
}
// Reload recarga la página actual.
func (b *Browser) Reload(ctx context.Context) error {
return b.cdpClient.Execute(ctx, "Page.reload", nil, nil)
}
// GoBack navega hacia atrás en el historial.
func (b *Browser) GoBack(ctx context.Context) error {
// Obtener historial
var history struct {
CurrentIndex int `json:"currentIndex"`
Entries []struct {
ID int `json:"id"`
} `json:"entries"`
}
if err := b.cdpClient.Execute(ctx, "Page.getNavigationHistory", nil, &history); err != nil {
return fmt.Errorf("failed to get history: %w", err)
}
if history.CurrentIndex <= 0 {
return errors.New("no history to go back")
}
// Navegar a la entrada anterior
params := map[string]interface{}{
"entryId": history.Entries[history.CurrentIndex-1].ID,
}
return b.cdpClient.Execute(ctx, "Page.navigateToHistoryEntry", params, nil)
}
// GoForward navega hacia adelante en el historial.
func (b *Browser) GoForward(ctx context.Context) error {
var history struct {
CurrentIndex int `json:"currentIndex"`
Entries []struct {
ID int `json:"id"`
} `json:"entries"`
}
if err := b.cdpClient.Execute(ctx, "Page.getNavigationHistory", nil, &history); err != nil {
return fmt.Errorf("failed to get history: %w", err)
}
if history.CurrentIndex >= len(history.Entries)-1 {
return errors.New("no history to go forward")
}
params := map[string]interface{}{
"entryId": history.Entries[history.CurrentIndex+1].ID,
}
return b.cdpClient.Execute(ctx, "Page.navigateToHistoryEntry", params, nil)
}