feat(browser): auto-commit con 178 cambios

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-20 18:22:23 +02:00
parent 7d100e7f3e
commit 763e06c127
178 changed files with 19917 additions and 317 deletions
@@ -0,0 +1,99 @@
+"""fetch_reddit_search — busca posts en Reddit via la API JSON publica (sin auth).
+
+Funcion impura: hace peticiones HTTP a www.reddit.com. Tolera errores por
+subreddit y normaliza cada post a un shape comun de market intelligence.
+"""
+
+import requests
+
+_UA = "demand_radar/0.1 (registry market-intel)"
+_TIMEOUT = 15
+
+
+def _parse_children(children: list, query: str) -> list[dict]:
+    """Normaliza la lista children de la respuesta de Reddit al shape comun."""
+    rows = []
+    for child in children:
+        data = child.get("data", {}) if isinstance(child, dict) else {}
+        permalink = data.get("permalink", "") or ""
+        rows.append({
+            "source": "reddit",
+            "platform_id": str(data.get("id", "")),
+            "title": data.get("title", "") or "",
+            "body": data.get("selftext", "") or "",
+            "url": "https://www.reddit.com" + permalink,
+            "author": data.get("author", "") or "",
+            "channel": data.get("subreddit", "") or "",
+            "created_utc": float(data.get("created_utc") or 0.0),
+            "platform_score": int(data.get("ups") or 0),
+            "query": query,
+        })
+    return rows
+
+
+def fetch_reddit_search(
+    query: str,
+    subreddits: list[str] = None,
+    limit: int = 50,
+    sort: str = "new",
+) -> list[dict]:
+    """Busca posts en Reddit usando la API JSON publica (sin autenticacion).
+
+    Por cada subreddit en `subreddits` hace una busqueda restringida a ese
+    subreddit. Si `subreddits` es None o vacio hace una busqueda global. Cada
+    fallo por subreddit (red, 429, JSON malformado) se captura y se omite,
+    continuando con los demas.
+
+    Args:
+        query: Termino de busqueda.
+        subreddits: Lista de subreddits a buscar (sin el prefijo "r/"). Si None
+                    o vacio, busqueda global en todo Reddit.
+        limit: Maximo de resultados por subreddit (o por la busqueda global).
+        sort: Orden de Reddit: "new", "relevance", "top", "comments", "hot".
+
+    Returns:
+        Lista de dicts normalizados (puede ser []). Cada dict tiene las claves:
+        source, platform_id, title, body, url, author, channel, created_utc,
+        platform_score, query.
+    """
+    headers = {"User-Agent": _UA}
+    results: list[dict] = []
+
+    targets = subreddits if subreddits else [None]
+
+    for sub in targets:
+        try:
+            if sub:
+                url = f"https://www.reddit.com/r/{sub}/search.json"
+                params = {
+                    "q": query,
+                    "restrict_sr": 1,
+                    "sort": sort,
+                    "limit": limit,
+                    "t": "year",
+                }
+            else:
+                url = "https://www.reddit.com/search.json"
+                params = {
+                    "q": query,
+                    "sort": sort,
+                    "limit": limit,
+                    "t": "year",
+                }
+
+            resp = requests.get(
+                url, params=params, headers=headers, timeout=_TIMEOUT
+            )
+            resp.raise_for_status()
+            payload = resp.json()
+            children = (
+                payload.get("data", {}).get("children", [])
+                if isinstance(payload, dict)
+                else []
+            )
+            results.extend(_parse_children(children, query))
+        except Exception:
+            # Tolerar fallo por subreddit (red, 429, parsing) y seguir.
+            continue
+
+    return results