"""fetch_reddit_search — busca posts en Reddit via la API JSON publica (sin auth). Funcion impura: hace peticiones HTTP a www.reddit.com. Tolera errores por subreddit y normaliza cada post a un shape comun de market intelligence. """ import requests _UA = "demand_radar/0.1 (registry market-intel)" _TIMEOUT = 15 def _parse_children(children: list, query: str) -> list[dict]: """Normaliza la lista children de la respuesta de Reddit al shape comun.""" rows = [] for child in children: data = child.get("data", {}) if isinstance(child, dict) else {} permalink = data.get("permalink", "") or "" rows.append({ "source": "reddit", "platform_id": str(data.get("id", "")), "title": data.get("title", "") or "", "body": data.get("selftext", "") or "", "url": "https://www.reddit.com" + permalink, "author": data.get("author", "") or "", "channel": data.get("subreddit", "") or "", "created_utc": float(data.get("created_utc") or 0.0), "platform_score": int(data.get("ups") or 0), "query": query, }) return rows def fetch_reddit_search( query: str, subreddits: list[str] = None, limit: int = 50, sort: str = "new", ) -> list[dict]: """Busca posts en Reddit usando la API JSON publica (sin autenticacion). Por cada subreddit en `subreddits` hace una busqueda restringida a ese subreddit. Si `subreddits` es None o vacio hace una busqueda global. Cada fallo por subreddit (red, 429, JSON malformado) se captura y se omite, continuando con los demas. Args: query: Termino de busqueda. subreddits: Lista de subreddits a buscar (sin el prefijo "r/"). Si None o vacio, busqueda global en todo Reddit. limit: Maximo de resultados por subreddit (o por la busqueda global). sort: Orden de Reddit: "new", "relevance", "top", "comments", "hot". Returns: Lista de dicts normalizados (puede ser []). Cada dict tiene las claves: source, platform_id, title, body, url, author, channel, created_utc, platform_score, query. """ headers = {"User-Agent": _UA} results: list[dict] = [] targets = subreddits if subreddits else [None] for sub in targets: try: if sub: url = f"https://www.reddit.com/r/{sub}/search.json" params = { "q": query, "restrict_sr": 1, "sort": sort, "limit": limit, "t": "year", } else: url = "https://www.reddit.com/search.json" params = { "q": query, "sort": sort, "limit": limit, "t": "year", } resp = requests.get( url, params=params, headers=headers, timeout=_TIMEOUT ) resp.raise_for_status() payload = resp.json() children = ( payload.get("data", {}).get("children", []) if isinstance(payload, dict) else [] ) results.extend(_parse_children(children, query)) except Exception: # Tolerar fallo por subreddit (red, 429, parsing) y seguir. continue return results