package wikipedia import ( "context" "encoding/json" "fmt" "io" "net/http" "net/url" "strings" "time" "github.com/enmanuel/agents/tools" ) // NewWikipediaSearch creates a wikipedia_search tool that searches Wikipedia // and returns a structured summary of the top result. // Uses the public Wikipedia REST API — no API key required. func NewWikipediaSearch() tools.Tool { client := &http.Client{Timeout: 15 * time.Second} return tools.Tool{ Def: tools.Def{ Name: "wikipedia_search", Description: "Search Wikipedia and retrieve a structured article summary. " + "Returns the title, description, extract (plain text summary), and article URL. " + "Supports multiple languages via the 'lang' parameter (e.g. 'es', 'en', 'fr'). " + "Use this tool whenever the user asks about a topic, person, place, concept, or event " + "that would have a Wikipedia article.", Parameters: []tools.Param{ { Name: "query", Type: "string", Description: "Search term or topic to look up on Wikipedia (e.g. 'Albert Einstein', 'fotosíntesis', 'Segunda Guerra Mundial')", Required: true, }, { Name: "lang", Type: "string", Description: "Wikipedia language code (default: 'es' for Spanish). Common values: 'es' (Spanish), 'en' (English), 'fr' (French), 'de' (German), 'pt' (Portuguese)", Required: false, }, }, }, Exec: func(ctx context.Context, args map[string]any) tools.Result { query := tools.GetString(args, "query") if query == "" { return tools.Result{Err: fmt.Errorf("wikipedia_search: query is required")} } lang := tools.GetString(args, "lang") if lang == "" { lang = "es" } // Sanitize lang: only allow simple language codes (2-3 chars, letters only) lang = sanitizeLang(lang) // Step 1: Search for the best matching article title title, err := searchArticle(ctx, client, lang, query) if err != nil { return tools.Result{Err: fmt.Errorf("wikipedia_search: search failed: %w", err)} } if title == "" { return tools.Result{Output: fmt.Sprintf("No se encontraron artículos en Wikipedia (%s) para: %q", lang, query)} } // Step 2: Fetch the article summary summary, err := fetchSummary(ctx, client, lang, title) if err != nil { return tools.Result{Err: fmt.Errorf("wikipedia_search: summary fetch failed: %w", err)} } return tools.Result{Output: formatSummary(lang, summary)} }, } } // sanitizeLang normalizes and validates a language code. // Only allows alphanumeric characters (e.g. "es", "en", "pt-br"). func sanitizeLang(lang string) string { var b strings.Builder for _, r := range strings.ToLower(lang) { if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' { b.WriteRune(r) } } result := b.String() if result == "" { return "es" } return result } // opensearchResponse models the Wikipedia OpenSearch API response. // Format: [query, [titles...], [descriptions...], [urls...]] type opensearchResponse [4]json.RawMessage // searchArticle uses the Wikipedia OpenSearch API to find the best matching title. func searchArticle(ctx context.Context, client *http.Client, lang, query string) (string, error) { apiURL := fmt.Sprintf( "https://%s.wikipedia.org/w/api.php?action=opensearch&search=%s&limit=1&namespace=0&format=json&redirects=resolve", url.PathEscape(lang), url.QueryEscape(query), ) req, err := http.NewRequestWithContext(ctx, http.MethodGet, apiURL, nil) if err != nil { return "", fmt.Errorf("build request: %w", err) } req.Header.Set("User-Agent", "wikipedia-bot/1.0 (Matrix agent; educational use)") req.Header.Set("Accept", "application/json") resp, err := client.Do(req) if err != nil { return "", fmt.Errorf("http request: %w", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return "", fmt.Errorf("API returned HTTP %d", resp.StatusCode) } body, err := io.ReadAll(io.LimitReader(resp.Body, 32*1024)) if err != nil { return "", fmt.Errorf("read response: %w", err) } var result opensearchResponse if err := json.Unmarshal(body, &result); err != nil { return "", fmt.Errorf("parse response: %w", err) } // result[1] is the array of titles var titles []string if err := json.Unmarshal(result[1], &titles); err != nil || len(titles) == 0 { return "", nil // no results } return titles[0], nil } // articleSummary models the Wikipedia REST API summary response. type articleSummary struct { Title string `json:"title"` DisplayTitle string `json:"displaytitle"` Description string `json:"description"` Extract string `json:"extract"` ContentURLs struct { Desktop struct { Page string `json:"page"` } `json:"desktop"` } `json:"content_urls"` Thumbnail struct { Source string `json:"source"` } `json:"thumbnail"` } // fetchSummary retrieves a structured article summary from the Wikipedia REST API. func fetchSummary(ctx context.Context, client *http.Client, lang, title string) (*articleSummary, error) { apiURL := fmt.Sprintf( "https://%s.wikipedia.org/api/rest_v1/page/summary/%s", url.PathEscape(lang), url.PathEscape(title), ) req, err := http.NewRequestWithContext(ctx, http.MethodGet, apiURL, nil) if err != nil { return nil, fmt.Errorf("build request: %w", err) } req.Header.Set("User-Agent", "wikipedia-bot/1.0 (Matrix agent; educational use)") req.Header.Set("Accept", "application/json") resp, err := client.Do(req) if err != nil { return nil, fmt.Errorf("http request: %w", err) } defer resp.Body.Close() if resp.StatusCode == http.StatusNotFound { return nil, fmt.Errorf("article %q not found", title) } if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("API returned HTTP %d", resp.StatusCode) } body, err := io.ReadAll(io.LimitReader(resp.Body, 64*1024)) if err != nil { return nil, fmt.Errorf("read response: %w", err) } var summary articleSummary if err := json.Unmarshal(body, &summary); err != nil { return nil, fmt.Errorf("parse response: %w", err) } return &summary, nil } // formatSummary converts an articleSummary into a human-readable string. func formatSummary(lang string, s *articleSummary) string { var b strings.Builder fmt.Fprintf(&b, "📖 **%s**\n", s.Title) if s.Description != "" { fmt.Fprintf(&b, "_%s_\n", s.Description) } fmt.Fprintln(&b) if s.Extract != "" { // Truncate extract to ~1500 chars to stay within context limits extract := s.Extract if len(extract) > 1500 { // Find last sentence boundary before 1500 chars cutoff := 1500 for cutoff > 1000 && extract[cutoff] != '.' { cutoff-- } extract = extract[:cutoff+1] + " [...]" } fmt.Fprintln(&b, extract) } if s.ContentURLs.Desktop.Page != "" { fmt.Fprintln(&b) fmt.Fprintf(&b, "🔗 %s\n", s.ContentURLs.Desktop.Page) } fmt.Fprintf(&b, "_(Wikipedia %s)_", strings.ToUpper(lang)) return b.String() }