3253828fef
Add complete navegator system for stealthy browser automation: - CDP client with WebSocket communication - Browser API with navigation, storage, network, runtime - Stealth flags and anti-detection scripts - Persistent profile support - Examples and comprehensive documentation Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
217 lines
6.5 KiB
Go
217 lines
6.5 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"flag"
|
|
"fmt"
|
|
"log"
|
|
"strings"
|
|
"time"
|
|
|
|
"navegator/pkg/browser"
|
|
)
|
|
|
|
func main() {
|
|
// Flags de línea de comandos
|
|
visible := flag.Bool("visible", false, "Ejecutar en modo visible (con interfaz gráfica para debugging)")
|
|
videoURL := flag.String("url", "https://www.youtube.com/watch?v=S1J8rx2Jw98", "URL del video de YouTube")
|
|
numComments := flag.Int("n", 10, "Número de comentarios a extraer (máximo)")
|
|
flag.Parse()
|
|
|
|
ctx := context.Background()
|
|
|
|
// Configuración del navegador
|
|
config := browser.DefaultConfig()
|
|
config.ProfileName = "youtube-scraper"
|
|
|
|
// Por defecto headless, solo visible si se especifica
|
|
config.StealthFlags.Headless = !*visible
|
|
|
|
// Siempre usar ventana pequeña (incluso en modo visible)
|
|
config.StealthFlags.WindowSize = [2]int{600, 400}
|
|
|
|
// User agent actualizado
|
|
config.StealthFlags.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
|
|
|
|
log.Println("🚀 Lanzando navegador...")
|
|
b, err := browser.Launch(ctx, config)
|
|
if err != nil {
|
|
log.Fatalf("❌ Error al lanzar navegador: %v", err)
|
|
}
|
|
defer b.Close()
|
|
|
|
log.Printf("✅ Navegador iniciado. Perfil: %s\n", b.ProfilePath())
|
|
|
|
log.Printf("📺 Navegando a YouTube: %s\n", *videoURL)
|
|
log.Printf("📊 Extrayendo hasta %d comentarios\n", *numComments)
|
|
|
|
// Simplemente navegar sin esperar eventos específicos (más confiable)
|
|
if err := b.Navigate(ctx, *videoURL, nil); err != nil {
|
|
// Si hay error, intentar continuar de todos modos
|
|
log.Printf("⚠️ Advertencia al navegar: %v", err)
|
|
}
|
|
|
|
// Esperar a que cargue la página
|
|
log.Println("⏳ Esperando a que cargue la página...")
|
|
time.Sleep(3 * time.Second)
|
|
|
|
// Manejar banner de cookies si aparece
|
|
log.Println("🍪 Verificando banner de cookies...")
|
|
cookieBannerScript := `
|
|
(() => {
|
|
// Buscar botones de aceptar cookies
|
|
const selectors = [
|
|
'button[aria-label*="Accept"]',
|
|
'button[aria-label*="Aceptar"]',
|
|
'button:contains("Accept all")',
|
|
'ytd-button-renderer button[aria-label*="Accept"]',
|
|
'button.yt-spec-button-shape-next--filled'
|
|
];
|
|
|
|
for (const selector of selectors) {
|
|
const button = document.querySelector(selector);
|
|
if (button && button.textContent.toLowerCase().includes('accept')) {
|
|
button.click();
|
|
return 'Cookie banner clicked';
|
|
}
|
|
}
|
|
|
|
return 'No cookie banner found';
|
|
})()
|
|
`
|
|
|
|
cookieResult, _ := b.Evaluate(ctx, cookieBannerScript)
|
|
if cookieResult != nil && cookieResult.Value != nil {
|
|
log.Printf("🍪 %v\n", cookieResult.Value)
|
|
}
|
|
|
|
// Esperar después de manejar cookies
|
|
time.Sleep(2 * time.Second)
|
|
|
|
// Scroll hacia abajo para activar la carga de comentarios (YouTube usa lazy loading)
|
|
log.Println("📜 Haciendo scroll para cargar comentarios...")
|
|
for i := 0; i < 5; i++ {
|
|
scrollScript := fmt.Sprintf(`window.scrollTo(0, %d);`, 400*(i+1))
|
|
b.Evaluate(ctx, scrollScript)
|
|
time.Sleep(1500 * time.Millisecond)
|
|
}
|
|
|
|
// Esperar más tiempo a que aparezcan los comentarios (especialmente en headless)
|
|
log.Println("⏳ Esperando a que aparezcan los comentarios...")
|
|
time.Sleep(3 * time.Second)
|
|
|
|
// Extraer comentarios usando JavaScript
|
|
log.Println("📝 Extrayendo comentarios...")
|
|
|
|
extractScript := fmt.Sprintf(`
|
|
(() => {
|
|
const comments = [];
|
|
const commentElements = document.querySelectorAll('ytd-comment-thread-renderer');
|
|
|
|
// Limitar según el parámetro
|
|
const limit = Math.min(commentElements.length, %d);`, *numComments) + `
|
|
|
|
for (let i = 0; i < limit; i++) {
|
|
const comment = commentElements[i];
|
|
|
|
// Extraer autor
|
|
const authorElement = comment.querySelector('#author-text');
|
|
const author = authorElement ? authorElement.textContent.trim() : 'Unknown';
|
|
|
|
// Extraer texto del comentario
|
|
const contentElement = comment.querySelector('#content-text');
|
|
const text = contentElement ? contentElement.textContent.trim() : '';
|
|
|
|
// Extraer fecha (si está disponible)
|
|
const dateElement = comment.querySelector('.published-time-text a');
|
|
const date = dateElement ? dateElement.textContent.trim() : '';
|
|
|
|
// Extraer likes (si está disponible)
|
|
const likeElement = comment.querySelector('#vote-count-middle');
|
|
const likes = likeElement ? likeElement.textContent.trim() : '0';
|
|
|
|
comments.push({
|
|
author: author,
|
|
text: text,
|
|
date: date,
|
|
likes: likes,
|
|
index: i + 1
|
|
});
|
|
}
|
|
|
|
return comments;
|
|
})()
|
|
`
|
|
|
|
result, err := b.Evaluate(ctx, extractScript)
|
|
if err != nil {
|
|
log.Fatalf("❌ Error al extraer comentarios: %v", err)
|
|
}
|
|
|
|
// Mostrar resultados
|
|
separator := strings.Repeat("=", 80)
|
|
log.Println("\n" + separator)
|
|
log.Println("📋 COMENTARIOS EXTRAÍDOS:")
|
|
log.Println(separator + "\n")
|
|
|
|
// El resultado viene como un array de mapas
|
|
if result.Value != nil {
|
|
if comments, ok := result.Value.([]interface{}); ok {
|
|
if len(comments) == 0 {
|
|
log.Println("⚠️ No se encontraron comentarios")
|
|
} else {
|
|
for _, c := range comments {
|
|
if comment, ok := c.(map[string]interface{}); ok {
|
|
index := comment["index"]
|
|
author := comment["author"]
|
|
likes := comment["likes"]
|
|
text := comment["text"]
|
|
|
|
fmt.Printf("\n%v. %s (%s likes)\n", index, author, likes)
|
|
|
|
// Truncar texto si es muy largo
|
|
textStr := fmt.Sprintf("%v", text)
|
|
if len(textStr) > 200 {
|
|
textStr = textStr[:200] + "..."
|
|
}
|
|
fmt.Printf(" %s\n", textStr)
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
log.Printf("⚠️ Formato inesperado: %+v\n", result.Value)
|
|
}
|
|
} else {
|
|
log.Println("⚠️ No se encontraron comentarios")
|
|
}
|
|
|
|
// También podemos obtener el título del video
|
|
log.Println("\n" + separator)
|
|
log.Println("📌 Información del Video:")
|
|
log.Println(separator)
|
|
|
|
titleResult, err := b.Evaluate(ctx, "document.querySelector('h1.ytd-watch-metadata yt-formatted-string')?.textContent || 'No title found'")
|
|
if err == nil && titleResult.Value != nil {
|
|
fmt.Printf("Título: %v\n", titleResult.Value)
|
|
}
|
|
|
|
viewsResult, err := b.Evaluate(ctx, "document.querySelector('.view-count')?.textContent || 'No views found'")
|
|
if err == nil && viewsResult.Value != nil {
|
|
fmt.Printf("Vistas: %v\n", viewsResult.Value)
|
|
}
|
|
|
|
log.Println("\n✅ Extracción completada exitosamente!")
|
|
|
|
// Si es visible, mantener abierto brevemente para inspección
|
|
if *visible {
|
|
log.Println("⏳ Manteniendo navegador abierto por 2 segundos...")
|
|
time.Sleep(2 * time.Second)
|
|
}
|
|
|
|
// Asegurar cierre del navegador
|
|
log.Println("🔒 Cerrando navegador...")
|
|
if err := b.Close(); err != nil {
|
|
log.Printf("⚠️ Error al cerrar navegador: %v", err)
|
|
}
|
|
}
|