Files
fn_registry/python/functions/datascience/fetch_hackernews_search.py
T
egutierrez 763e06c127 feat(browser): auto-commit con 178 cambios
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-20 18:22:23 +02:00

72 lines
2.2 KiB
Python

"""fetch_hackernews_search — busca en Hacker News via la API Algolia publica.
Funcion impura: hace peticiones HTTP a hn.algolia.com (sin auth ni anti-bot).
Normaliza cada hit a un shape comun de market intelligence.
"""
import requests
_TIMEOUT = 15
def _parse_hits(hits: list, query: str) -> list[dict]:
"""Normaliza la lista hits de la respuesta de Algolia al shape comun."""
rows = []
for hit in hits:
if not isinstance(hit, dict):
continue
object_id = str(hit.get("objectID", ""))
external_url = hit.get("url")
url = external_url if external_url else (
f"https://news.ycombinator.com/item?id={object_id}"
)
body = hit.get("story_text") or hit.get("comment_text") or ""
rows.append({
"source": "hackernews",
"platform_id": object_id,
"title": hit.get("title", "") or "",
"body": body,
"url": url,
"author": hit.get("author", "") or "",
"channel": "hn",
"created_utc": float(hit.get("created_at_i") or 0.0),
"platform_score": int(hit.get("points") or 0),
"query": query,
})
return rows
def fetch_hackernews_search(
query: str,
limit: int = 50,
tags: str = "story",
) -> list[dict]:
"""Busca en Hacker News usando la API Algolia publica (sin autenticacion).
Args:
query: Termino de busqueda.
limit: Maximo de resultados (hitsPerPage de Algolia).
tags: Filtro de tipo de item: "story" (default), "comment",
"story,comment", "show_hn", "ask_hn", etc.
Returns:
Lista de dicts normalizados (puede ser []). Cada dict tiene las claves:
source, platform_id, title, body, url, author, channel, created_utc,
platform_score, query.
"""
url = "https://hn.algolia.com/api/v1/search"
params = {
"query": query,
"tags": tags,
"hitsPerPage": limit,
}
try:
resp = requests.get(url, params=params, timeout=_TIMEOUT)
resp.raise_for_status()
payload = resp.json()
hits = payload.get("hits", []) if isinstance(payload, dict) else []
return _parse_hits(hits, query)
except Exception:
return []