763e06c127
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
72 lines
2.2 KiB
Python
72 lines
2.2 KiB
Python
"""fetch_hackernews_search — busca en Hacker News via la API Algolia publica.
|
|
|
|
Funcion impura: hace peticiones HTTP a hn.algolia.com (sin auth ni anti-bot).
|
|
Normaliza cada hit a un shape comun de market intelligence.
|
|
"""
|
|
|
|
import requests
|
|
|
|
_TIMEOUT = 15
|
|
|
|
|
|
def _parse_hits(hits: list, query: str) -> list[dict]:
|
|
"""Normaliza la lista hits de la respuesta de Algolia al shape comun."""
|
|
rows = []
|
|
for hit in hits:
|
|
if not isinstance(hit, dict):
|
|
continue
|
|
object_id = str(hit.get("objectID", ""))
|
|
external_url = hit.get("url")
|
|
url = external_url if external_url else (
|
|
f"https://news.ycombinator.com/item?id={object_id}"
|
|
)
|
|
body = hit.get("story_text") or hit.get("comment_text") or ""
|
|
rows.append({
|
|
"source": "hackernews",
|
|
"platform_id": object_id,
|
|
"title": hit.get("title", "") or "",
|
|
"body": body,
|
|
"url": url,
|
|
"author": hit.get("author", "") or "",
|
|
"channel": "hn",
|
|
"created_utc": float(hit.get("created_at_i") or 0.0),
|
|
"platform_score": int(hit.get("points") or 0),
|
|
"query": query,
|
|
})
|
|
return rows
|
|
|
|
|
|
def fetch_hackernews_search(
|
|
query: str,
|
|
limit: int = 50,
|
|
tags: str = "story",
|
|
) -> list[dict]:
|
|
"""Busca en Hacker News usando la API Algolia publica (sin autenticacion).
|
|
|
|
Args:
|
|
query: Termino de busqueda.
|
|
limit: Maximo de resultados (hitsPerPage de Algolia).
|
|
tags: Filtro de tipo de item: "story" (default), "comment",
|
|
"story,comment", "show_hn", "ask_hn", etc.
|
|
|
|
Returns:
|
|
Lista de dicts normalizados (puede ser []). Cada dict tiene las claves:
|
|
source, platform_id, title, body, url, author, channel, created_utc,
|
|
platform_score, query.
|
|
"""
|
|
url = "https://hn.algolia.com/api/v1/search"
|
|
params = {
|
|
"query": query,
|
|
"tags": tags,
|
|
"hitsPerPage": limit,
|
|
}
|
|
|
|
try:
|
|
resp = requests.get(url, params=params, timeout=_TIMEOUT)
|
|
resp.raise_for_status()
|
|
payload = resp.json()
|
|
hits = payload.get("hits", []) if isinstance(payload, dict) else []
|
|
return _parse_hits(hits, query)
|
|
except Exception:
|
|
return []
|