3253828fef
Add complete navegator system for stealthy browser automation: - CDP client with WebSocket communication - Browser API with navigation, storage, network, runtime - Stealth flags and anti-detection scripts - Persistent profile support - Examples and comprehensive documentation Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
371 lines
8.7 KiB
Go
371 lines
8.7 KiB
Go
package browser
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"runtime"
|
|
"strings"
|
|
"time"
|
|
|
|
"navegator/pkg/cdp"
|
|
"navegator/pkg/stealth"
|
|
)
|
|
|
|
// Browser representa una instancia de Chrome/Chromium.
|
|
type Browser struct {
|
|
cmd *exec.Cmd
|
|
cdpClient *cdp.Client
|
|
config *Config
|
|
profilePath string
|
|
debugURL string
|
|
ctx context.Context
|
|
cancel context.CancelFunc
|
|
targetID string
|
|
recorder *Recorder
|
|
}
|
|
|
|
// Config contiene la configuración para lanzar el navegador.
|
|
type Config struct {
|
|
// ExecutablePath es la ruta al ejecutable de Chrome/Chromium
|
|
// Si está vacío, se buscará automáticamente
|
|
ExecutablePath string
|
|
|
|
// ProfileName es el nombre del perfil a usar/crear
|
|
ProfileName string
|
|
|
|
// ProfilesBaseDir es el directorio base donde se guardan los perfiles
|
|
// Por defecto: ~/.navegator/profiles
|
|
ProfilesBaseDir string
|
|
|
|
// StealthFlags son las configuraciones stealth
|
|
StealthFlags *stealth.StealthFlags
|
|
|
|
// Timeout para iniciar el navegador
|
|
StartTimeout time.Duration
|
|
|
|
// Env variables de entorno adicionales
|
|
Env []string
|
|
}
|
|
|
|
// DefaultConfig retorna una configuración por defecto.
|
|
func DefaultConfig() *Config {
|
|
homeDir, _ := os.UserHomeDir()
|
|
defaultProfilesDir := filepath.Join(homeDir, ".navegator", "profiles")
|
|
|
|
return &Config{
|
|
ProfilesBaseDir: defaultProfilesDir,
|
|
ProfileName: "default",
|
|
StealthFlags: stealth.DefaultStealthFlags(),
|
|
StartTimeout: 30 * time.Second,
|
|
}
|
|
}
|
|
|
|
// Launch inicia una nueva instancia del navegador.
|
|
func Launch(ctx context.Context, config *Config) (*Browser, error) {
|
|
if config == nil {
|
|
config = DefaultConfig()
|
|
}
|
|
|
|
// Buscar ejecutable de Chrome si no está especificado
|
|
if config.ExecutablePath == "" {
|
|
exe, err := findChrome()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to find Chrome executable: %w", err)
|
|
}
|
|
config.ExecutablePath = exe
|
|
}
|
|
|
|
// Crear directorio de perfil
|
|
profilePath := filepath.Join(config.ProfilesBaseDir, config.ProfileName)
|
|
if err := os.MkdirAll(profilePath, 0755); err != nil {
|
|
return nil, fmt.Errorf("failed to create profile directory: %w", err)
|
|
}
|
|
|
|
// Configurar flags stealth con el profilePath
|
|
config.StealthFlags.UserDataDir = profilePath
|
|
config.StealthFlags.ProfileName = "Default"
|
|
|
|
// Construir flags
|
|
flags := config.StealthFlags.Build()
|
|
|
|
// Crear comando
|
|
cmd := exec.CommandContext(ctx, config.ExecutablePath, flags...)
|
|
cmd.Env = append(os.Environ(), config.Env...)
|
|
|
|
// Iniciar Chrome
|
|
if err := cmd.Start(); err != nil {
|
|
return nil, fmt.Errorf("failed to start Chrome: %w", err)
|
|
}
|
|
|
|
browserCtx, cancel := context.WithCancel(ctx)
|
|
|
|
b := &Browser{
|
|
cmd: cmd,
|
|
config: config,
|
|
profilePath: profilePath,
|
|
ctx: browserCtx,
|
|
cancel: cancel,
|
|
}
|
|
|
|
// Esperar a que Chrome esté listo
|
|
if err := b.waitForChrome(config.StartTimeout); err != nil {
|
|
b.Close()
|
|
return nil, err
|
|
}
|
|
|
|
// Conectar CDP
|
|
if err := b.connectCDP(); err != nil {
|
|
b.Close()
|
|
return nil, err
|
|
}
|
|
|
|
// Inyectar script anti-detección
|
|
if err := b.injectAntiDetection(); err != nil {
|
|
// No es crítico, continuar
|
|
fmt.Fprintf(os.Stderr, "Warning: failed to inject anti-detection script: %v\n", err)
|
|
}
|
|
|
|
return b, nil
|
|
}
|
|
|
|
// waitForChrome espera a que Chrome esté listo y escuchando CDP.
|
|
func (b *Browser) waitForChrome(timeout time.Duration) error {
|
|
// Leer el archivo DevToolsActivePort para obtener el puerto
|
|
devToolsFile := filepath.Join(b.profilePath, "DevToolsActivePort")
|
|
|
|
ctx, cancel := context.WithTimeout(b.ctx, timeout)
|
|
defer cancel()
|
|
|
|
ticker := time.NewTicker(100 * time.Millisecond)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return errors.New("timeout waiting for Chrome to start")
|
|
case <-ticker.C:
|
|
data, err := os.ReadFile(devToolsFile)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
lines := strings.Split(string(data), "\n")
|
|
if len(lines) < 1 {
|
|
continue
|
|
}
|
|
|
|
port := strings.TrimSpace(lines[0])
|
|
if port == "" {
|
|
continue
|
|
}
|
|
|
|
b.debugURL = "http://127.0.0.1:" + port
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
|
|
// connectCDP conecta al cliente CDP.
|
|
func (b *Browser) connectCDP() error {
|
|
wsURL, err := cdp.GetWebSocketURL(b.ctx, b.debugURL)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get WebSocket URL: %w", err)
|
|
}
|
|
|
|
client, err := cdp.NewClient(b.ctx, wsURL)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create CDP client: %w", err)
|
|
}
|
|
|
|
b.cdpClient = client
|
|
|
|
// Habilitar dominios necesarios
|
|
if err := b.enableDomains(); err != nil {
|
|
return fmt.Errorf("failed to enable CDP domains: %w", err)
|
|
}
|
|
|
|
// Obtener target ID
|
|
if err := b.getTargetID(); err != nil {
|
|
return fmt.Errorf("failed to get target ID: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// enableDomains habilita los dominios CDP necesarios.
|
|
func (b *Browser) enableDomains() error {
|
|
ctx, cancel := context.WithTimeout(b.ctx, 5*time.Second)
|
|
defer cancel()
|
|
|
|
// Solo algunos dominios tienen método .enable
|
|
domains := []string{
|
|
"Network",
|
|
"Runtime",
|
|
"DOM",
|
|
}
|
|
|
|
for _, domain := range domains {
|
|
if err := b.cdpClient.Execute(ctx, domain+".enable", nil, nil); err != nil {
|
|
return fmt.Errorf("failed to enable %s domain: %w", domain, err)
|
|
}
|
|
}
|
|
|
|
// Page.enable no existe, Page se activa automáticamente
|
|
// Storage.enable no existe, Storage funciona directamente
|
|
// Fetch.enable se llama manualmente cuando se necesita interceptación
|
|
|
|
return nil
|
|
}
|
|
|
|
// getTargetID obtiene el ID del target principal.
|
|
func (b *Browser) getTargetID() error {
|
|
ctx, cancel := context.WithTimeout(b.ctx, 5*time.Second)
|
|
defer cancel()
|
|
|
|
var result struct {
|
|
TargetInfos []struct {
|
|
TargetID string `json:"targetId"`
|
|
Type string `json:"type"`
|
|
} `json:"targetInfos"`
|
|
}
|
|
|
|
if err := b.cdpClient.Execute(ctx, "Target.getTargets", nil, &result); err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, info := range result.TargetInfos {
|
|
if info.Type == "page" {
|
|
b.targetID = info.TargetID
|
|
return nil
|
|
}
|
|
}
|
|
|
|
return errors.New("no page target found")
|
|
}
|
|
|
|
// injectAntiDetection inyecta el script anti-detección en todas las páginas.
|
|
func (b *Browser) injectAntiDetection() error {
|
|
script := stealth.GetAntiDetectionScript()
|
|
|
|
ctx, cancel := context.WithTimeout(b.ctx, 5*time.Second)
|
|
defer cancel()
|
|
|
|
params := map[string]interface{}{
|
|
"source": script,
|
|
}
|
|
|
|
return b.cdpClient.Execute(ctx, "Page.addScriptToEvaluateOnNewDocument", params, nil)
|
|
}
|
|
|
|
// Client retorna el cliente CDP subyacente.
|
|
func (b *Browser) Client() *cdp.Client {
|
|
return b.cdpClient
|
|
}
|
|
|
|
// ProfilePath retorna la ruta del perfil usado.
|
|
func (b *Browser) ProfilePath() string {
|
|
return b.profilePath
|
|
}
|
|
|
|
// DebugURL retorna la URL de debugging de Chrome.
|
|
func (b *Browser) DebugURL() string {
|
|
return b.debugURL
|
|
}
|
|
|
|
// TargetID retorna el ID del target principal.
|
|
func (b *Browser) TargetID() string {
|
|
return b.targetID
|
|
}
|
|
|
|
// Close cierra el navegador y limpia recursos.
|
|
func (b *Browser) Close() error {
|
|
b.cancel()
|
|
|
|
if b.recorder != nil {
|
|
b.recorder.Close()
|
|
}
|
|
|
|
if b.cdpClient != nil {
|
|
b.cdpClient.Close()
|
|
}
|
|
|
|
if b.cmd != nil && b.cmd.Process != nil {
|
|
b.cmd.Process.Kill()
|
|
b.cmd.Wait()
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// StartRecording inicia el registro de acciones en un archivo.
|
|
func (b *Browser) StartRecording(filepath string) error {
|
|
recorder, err := NewRecorder(filepath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
b.recorder = recorder
|
|
return nil
|
|
}
|
|
|
|
// StopRecording detiene el registro de acciones.
|
|
func (b *Browser) StopRecording() error {
|
|
if b.recorder != nil {
|
|
return b.recorder.Close()
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// AddComment agrega un comentario al log de recording.
|
|
func (b *Browser) AddComment(comment string) {
|
|
if b.recorder != nil {
|
|
b.recorder.AddComment(comment)
|
|
}
|
|
}
|
|
|
|
// findChrome busca el ejecutable de Chrome en las ubicaciones comunes.
|
|
func findChrome() (string, error) {
|
|
var candidates []string
|
|
|
|
switch runtime.GOOS {
|
|
case "darwin":
|
|
candidates = []string{
|
|
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
|
"/Applications/Chromium.app/Contents/MacOS/Chromium",
|
|
}
|
|
case "windows":
|
|
candidates = []string{
|
|
"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
|
|
"C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe",
|
|
"C:\\Users\\" + os.Getenv("USERNAME") + "\\AppData\\Local\\Google\\Chrome\\Application\\chrome.exe",
|
|
}
|
|
default: // linux
|
|
candidates = []string{
|
|
"/usr/bin/google-chrome",
|
|
"/usr/bin/google-chrome-stable",
|
|
"/usr/bin/chromium",
|
|
"/usr/bin/chromium-browser",
|
|
"/snap/bin/chromium",
|
|
}
|
|
}
|
|
|
|
// Verificar cada candidato
|
|
for _, path := range candidates {
|
|
if _, err := os.Stat(path); err == nil {
|
|
return path, nil
|
|
}
|
|
}
|
|
|
|
// Intentar buscar en PATH
|
|
for _, name := range []string{"google-chrome", "google-chrome-stable", "chromium", "chromium-browser"} {
|
|
if path, err := exec.LookPath(name); err == nil {
|
|
return path, nil
|
|
}
|
|
}
|
|
|
|
return "", errors.New("Chrome/Chromium executable not found")
|
|
}
|