feat: conversor de páginas web a markdown

Implementa ToMarkdown() para convertir HTML a Markdown usando Turndown.js inline.

Incluye:
- Soporte para títulos, enlaces, imágenes, listas, tablas
- Opciones para incluir/excluir imágenes y enlaces
- Selector CSS opcional para convertir secciones específicas
- Comando CLI to_markdown.go para uso directo

Archivo: pkg/browser/markdown.go, cmd/to_markdown.go
This commit is contained in:
Developer
2026-03-25 00:47:38 +01:00
parent 3253828fef
commit 01c6cafd23
2 changed files with 310 additions and 0 deletions
+72
View File
@@ -0,0 +1,72 @@
package main
import (
"context"
"flag"
"fmt"
"log"
"os"
"navegator/pkg/browser"
)
func main() {
urlFlag := flag.String("url", "", "URL to convert to markdown")
selectorFlag := flag.String("selector", "", "CSS selector to convert (optional)")
outputFlag := flag.String("output", "", "Output file (default: stdout)")
noImages := flag.Bool("no-images", false, "Exclude images")
noLinks := flag.Bool("no-links", false, "Convert links to plain text")
flag.Parse()
if *urlFlag == "" {
log.Fatal("Usage: to_markdown -url <url> [-selector <css>] [-output <file>] [-no-images] [-no-links]")
}
ctx := context.Background()
// Configurar navegador
config := browser.DefaultConfig()
config.ProfileName = "markdown-converter"
config.StealthFlags.Headless = true
// Lanzar navegador
log.Println("Launching browser...")
b, err := browser.Launch(ctx, config)
if err != nil {
log.Fatalf("Error launching browser: %v", err)
}
defer b.Close()
// Navegar a URL
log.Printf("Navigating to %s...\n", *urlFlag)
opts := browser.DefaultNavigateOptions()
opts.WaitUntil = "networkidle"
if err := b.Navigate(ctx, *urlFlag, opts); err != nil {
log.Printf("Warning: navigation error: %v\n", err)
}
// Configurar opciones de markdown
mdOpts := browser.DefaultMarkdownOptions()
mdOpts.Selector = *selectorFlag
mdOpts.IncludeImages = !*noImages
mdOpts.IncludeLinks = !*noLinks
// Convertir a markdown
log.Println("Converting to markdown...")
markdown, err := b.ToMarkdown(ctx, mdOpts)
if err != nil {
log.Fatalf("Error converting to markdown: %v", err)
}
// Output
if *outputFlag != "" {
if err := os.WriteFile(*outputFlag, []byte(markdown), 0644); err != nil {
log.Fatalf("Error writing to file: %v", err)
}
log.Printf("Markdown saved to %s\n", *outputFlag)
} else {
fmt.Println("\n=== MARKDOWN OUTPUT ===\n")
fmt.Println(markdown)
}
}