feat(browser): auto-commit con 178 cambios
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,99 @@
|
||||
"""fetch_reddit_search — busca posts en Reddit via la API JSON publica (sin auth).
|
||||
|
||||
Funcion impura: hace peticiones HTTP a www.reddit.com. Tolera errores por
|
||||
subreddit y normaliza cada post a un shape comun de market intelligence.
|
||||
"""
|
||||
|
||||
import requests
|
||||
|
||||
_UA = "demand_radar/0.1 (registry market-intel)"
|
||||
_TIMEOUT = 15
|
||||
|
||||
|
||||
def _parse_children(children: list, query: str) -> list[dict]:
|
||||
"""Normaliza la lista children de la respuesta de Reddit al shape comun."""
|
||||
rows = []
|
||||
for child in children:
|
||||
data = child.get("data", {}) if isinstance(child, dict) else {}
|
||||
permalink = data.get("permalink", "") or ""
|
||||
rows.append({
|
||||
"source": "reddit",
|
||||
"platform_id": str(data.get("id", "")),
|
||||
"title": data.get("title", "") or "",
|
||||
"body": data.get("selftext", "") or "",
|
||||
"url": "https://www.reddit.com" + permalink,
|
||||
"author": data.get("author", "") or "",
|
||||
"channel": data.get("subreddit", "") or "",
|
||||
"created_utc": float(data.get("created_utc") or 0.0),
|
||||
"platform_score": int(data.get("ups") or 0),
|
||||
"query": query,
|
||||
})
|
||||
return rows
|
||||
|
||||
|
||||
def fetch_reddit_search(
|
||||
query: str,
|
||||
subreddits: list[str] = None,
|
||||
limit: int = 50,
|
||||
sort: str = "new",
|
||||
) -> list[dict]:
|
||||
"""Busca posts en Reddit usando la API JSON publica (sin autenticacion).
|
||||
|
||||
Por cada subreddit en `subreddits` hace una busqueda restringida a ese
|
||||
subreddit. Si `subreddits` es None o vacio hace una busqueda global. Cada
|
||||
fallo por subreddit (red, 429, JSON malformado) se captura y se omite,
|
||||
continuando con los demas.
|
||||
|
||||
Args:
|
||||
query: Termino de busqueda.
|
||||
subreddits: Lista de subreddits a buscar (sin el prefijo "r/"). Si None
|
||||
o vacio, busqueda global en todo Reddit.
|
||||
limit: Maximo de resultados por subreddit (o por la busqueda global).
|
||||
sort: Orden de Reddit: "new", "relevance", "top", "comments", "hot".
|
||||
|
||||
Returns:
|
||||
Lista de dicts normalizados (puede ser []). Cada dict tiene las claves:
|
||||
source, platform_id, title, body, url, author, channel, created_utc,
|
||||
platform_score, query.
|
||||
"""
|
||||
headers = {"User-Agent": _UA}
|
||||
results: list[dict] = []
|
||||
|
||||
targets = subreddits if subreddits else [None]
|
||||
|
||||
for sub in targets:
|
||||
try:
|
||||
if sub:
|
||||
url = f"https://www.reddit.com/r/{sub}/search.json"
|
||||
params = {
|
||||
"q": query,
|
||||
"restrict_sr": 1,
|
||||
"sort": sort,
|
||||
"limit": limit,
|
||||
"t": "year",
|
||||
}
|
||||
else:
|
||||
url = "https://www.reddit.com/search.json"
|
||||
params = {
|
||||
"q": query,
|
||||
"sort": sort,
|
||||
"limit": limit,
|
||||
"t": "year",
|
||||
}
|
||||
|
||||
resp = requests.get(
|
||||
url, params=params, headers=headers, timeout=_TIMEOUT
|
||||
)
|
||||
resp.raise_for_status()
|
||||
payload = resp.json()
|
||||
children = (
|
||||
payload.get("data", {}).get("children", [])
|
||||
if isinstance(payload, dict)
|
||||
else []
|
||||
)
|
||||
results.extend(_parse_children(children, query))
|
||||
except Exception:
|
||||
# Tolerar fallo por subreddit (red, 429, parsing) y seguir.
|
||||
continue
|
||||
|
||||
return results
|
||||
Reference in New Issue
Block a user