feat(browser): auto-commit con 178 cambios

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-20 18:22:23 +02:00
parent 7d100e7f3e
commit 763e06c127
178 changed files with 19917 additions and 317 deletions
@@ -0,0 +1,99 @@
"""fetch_reddit_search — busca posts en Reddit via la API JSON publica (sin auth).
Funcion impura: hace peticiones HTTP a www.reddit.com. Tolera errores por
subreddit y normaliza cada post a un shape comun de market intelligence.
"""
import requests
_UA = "demand_radar/0.1 (registry market-intel)"
_TIMEOUT = 15
def _parse_children(children: list, query: str) -> list[dict]:
"""Normaliza la lista children de la respuesta de Reddit al shape comun."""
rows = []
for child in children:
data = child.get("data", {}) if isinstance(child, dict) else {}
permalink = data.get("permalink", "") or ""
rows.append({
"source": "reddit",
"platform_id": str(data.get("id", "")),
"title": data.get("title", "") or "",
"body": data.get("selftext", "") or "",
"url": "https://www.reddit.com" + permalink,
"author": data.get("author", "") or "",
"channel": data.get("subreddit", "") or "",
"created_utc": float(data.get("created_utc") or 0.0),
"platform_score": int(data.get("ups") or 0),
"query": query,
})
return rows
def fetch_reddit_search(
query: str,
subreddits: list[str] = None,
limit: int = 50,
sort: str = "new",
) -> list[dict]:
"""Busca posts en Reddit usando la API JSON publica (sin autenticacion).
Por cada subreddit en `subreddits` hace una busqueda restringida a ese
subreddit. Si `subreddits` es None o vacio hace una busqueda global. Cada
fallo por subreddit (red, 429, JSON malformado) se captura y se omite,
continuando con los demas.
Args:
query: Termino de busqueda.
subreddits: Lista de subreddits a buscar (sin el prefijo "r/"). Si None
o vacio, busqueda global en todo Reddit.
limit: Maximo de resultados por subreddit (o por la busqueda global).
sort: Orden de Reddit: "new", "relevance", "top", "comments", "hot".
Returns:
Lista de dicts normalizados (puede ser []). Cada dict tiene las claves:
source, platform_id, title, body, url, author, channel, created_utc,
platform_score, query.
"""
headers = {"User-Agent": _UA}
results: list[dict] = []
targets = subreddits if subreddits else [None]
for sub in targets:
try:
if sub:
url = f"https://www.reddit.com/r/{sub}/search.json"
params = {
"q": query,
"restrict_sr": 1,
"sort": sort,
"limit": limit,
"t": "year",
}
else:
url = "https://www.reddit.com/search.json"
params = {
"q": query,
"sort": sort,
"limit": limit,
"t": "year",
}
resp = requests.get(
url, params=params, headers=headers, timeout=_TIMEOUT
)
resp.raise_for_status()
payload = resp.json()
children = (
payload.get("data", {}).get("children", [])
if isinstance(payload, dict)
else []
)
results.extend(_parse_children(children, query))
except Exception:
# Tolerar fallo por subreddit (red, 429, parsing) y seguir.
continue
return results