01c6cafd23
Implementa ToMarkdown() para convertir HTML a Markdown usando Turndown.js inline. Incluye: - Soporte para títulos, enlaces, imágenes, listas, tablas - Opciones para incluir/excluir imágenes y enlaces - Selector CSS opcional para convertir secciones específicas - Comando CLI to_markdown.go para uso directo Archivo: pkg/browser/markdown.go, cmd/to_markdown.go
73 lines
1.8 KiB
Go
73 lines
1.8 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"flag"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
|
|
"navegator/pkg/browser"
|
|
)
|
|
|
|
func main() {
|
|
urlFlag := flag.String("url", "", "URL to convert to markdown")
|
|
selectorFlag := flag.String("selector", "", "CSS selector to convert (optional)")
|
|
outputFlag := flag.String("output", "", "Output file (default: stdout)")
|
|
noImages := flag.Bool("no-images", false, "Exclude images")
|
|
noLinks := flag.Bool("no-links", false, "Convert links to plain text")
|
|
flag.Parse()
|
|
|
|
if *urlFlag == "" {
|
|
log.Fatal("Usage: to_markdown -url <url> [-selector <css>] [-output <file>] [-no-images] [-no-links]")
|
|
}
|
|
|
|
ctx := context.Background()
|
|
|
|
// Configurar navegador
|
|
config := browser.DefaultConfig()
|
|
config.ProfileName = "markdown-converter"
|
|
config.StealthFlags.Headless = true
|
|
|
|
// Lanzar navegador
|
|
log.Println("Launching browser...")
|
|
b, err := browser.Launch(ctx, config)
|
|
if err != nil {
|
|
log.Fatalf("Error launching browser: %v", err)
|
|
}
|
|
defer b.Close()
|
|
|
|
// Navegar a URL
|
|
log.Printf("Navigating to %s...\n", *urlFlag)
|
|
opts := browser.DefaultNavigateOptions()
|
|
opts.WaitUntil = "networkidle"
|
|
|
|
if err := b.Navigate(ctx, *urlFlag, opts); err != nil {
|
|
log.Printf("Warning: navigation error: %v\n", err)
|
|
}
|
|
|
|
// Configurar opciones de markdown
|
|
mdOpts := browser.DefaultMarkdownOptions()
|
|
mdOpts.Selector = *selectorFlag
|
|
mdOpts.IncludeImages = !*noImages
|
|
mdOpts.IncludeLinks = !*noLinks
|
|
|
|
// Convertir a markdown
|
|
log.Println("Converting to markdown...")
|
|
markdown, err := b.ToMarkdown(ctx, mdOpts)
|
|
if err != nil {
|
|
log.Fatalf("Error converting to markdown: %v", err)
|
|
}
|
|
|
|
// Output
|
|
if *outputFlag != "" {
|
|
if err := os.WriteFile(*outputFlag, []byte(markdown), 0644); err != nil {
|
|
log.Fatalf("Error writing to file: %v", err)
|
|
}
|
|
log.Printf("Markdown saved to %s\n", *outputFlag)
|
|
} else {
|
|
fmt.Println("\n=== MARKDOWN OUTPUT ===\n")
|
|
fmt.Println(markdown)
|
|
}
|
|
}
|