"""fetch_hackernews_search — busca en Hacker News via la API Algolia publica. Funcion impura: hace peticiones HTTP a hn.algolia.com (sin auth ni anti-bot). Normaliza cada hit a un shape comun de market intelligence. """ import requests _TIMEOUT = 15 def _parse_hits(hits: list, query: str) -> list[dict]: """Normaliza la lista hits de la respuesta de Algolia al shape comun.""" rows = [] for hit in hits: if not isinstance(hit, dict): continue object_id = str(hit.get("objectID", "")) external_url = hit.get("url") url = external_url if external_url else ( f"https://news.ycombinator.com/item?id={object_id}" ) body = hit.get("story_text") or hit.get("comment_text") or "" rows.append({ "source": "hackernews", "platform_id": object_id, "title": hit.get("title", "") or "", "body": body, "url": url, "author": hit.get("author", "") or "", "channel": "hn", "created_utc": float(hit.get("created_at_i") or 0.0), "platform_score": int(hit.get("points") or 0), "query": query, }) return rows def fetch_hackernews_search( query: str, limit: int = 50, tags: str = "story", ) -> list[dict]: """Busca en Hacker News usando la API Algolia publica (sin autenticacion). Args: query: Termino de busqueda. limit: Maximo de resultados (hitsPerPage de Algolia). tags: Filtro de tipo de item: "story" (default), "comment", "story,comment", "show_hn", "ask_hn", etc. Returns: Lista de dicts normalizados (puede ser []). Cada dict tiene las claves: source, platform_id, title, body, url, author, channel, created_utc, platform_score, query. """ url = "https://hn.algolia.com/api/v1/search" params = { "query": query, "tags": tags, "hitsPerPage": limit, } try: resp = requests.get(url, params=params, timeout=_TIMEOUT) resp.raise_for_status() payload = resp.json() hits = payload.get("hits", []) if isinstance(payload, dict) else [] return _parse_hits(hits, query) except Exception: return []