feat: gnula_grabber 2-part pipeline (crawl ES + CDP stream download)
This commit is contained in:
+119
@@ -0,0 +1,119 @@
|
||||
#!/usr/bin/env python3
|
||||
"""PARTE 2 — Downloader: consume el catálogo (gnula_crawl.py) y descarga las pelis
|
||||
español pendientes via la sesión Chrome+CDP+NordVPN.
|
||||
|
||||
Por cada peli pending: navega a su página, clica el server Español (luluvid/luluvdo),
|
||||
quita el overlay de ad + play (jwplayer), espera master.m3u8, y delega a grab_stream
|
||||
(streaming + descifrado AES local) → manual/movies. Marca downloaded en el catálogo.
|
||||
El captcha (si sale) lo resuelve el humano: el script espera a que aparezca master.
|
||||
|
||||
Uso: python gnula_download.py [limit] (default 1 peli; usar N para varias)
|
||||
Requiere: Chrome con --remote-debugging-port=9222 (NordVPN proxy) abierto.
|
||||
"""
|
||||
import json, re, sqlite3, subprocess, sys, time, urllib.request
|
||||
import websocket
|
||||
|
||||
PORT = 9222
|
||||
DB = "/home/lucas/.config/popelis/gnula_catalog.db"
|
||||
GRAB = "/home/lucas/fn_registry/apps/gnula_grabber/grab_stream.py"
|
||||
|
||||
def tabs(): return json.load(urllib.request.urlopen(f"http://127.0.0.1:{PORT}/json/list", timeout=8))
|
||||
def conn(t):
|
||||
w = websocket.create_connection(t["webSocketDebuggerUrl"], timeout=20, header=["Origin: http://localhost"]); w.settimeout(3); return w
|
||||
def ev(w, expr, to=15):
|
||||
w.send(json.dumps({"id": 1, "method": "Runtime.evaluate", "params": {"expression": expr, "returnByValue": True}}))
|
||||
s = time.time()
|
||||
while time.time() - s < to:
|
||||
try: m = json.loads(w.recv())
|
||||
except Exception: continue
|
||||
if m.get("id") == 1: return m.get("result", {}).get("result", {}).get("value")
|
||||
def cmd(w, method, params, to=8):
|
||||
w.send(json.dumps({"id": 2, "method": method, "params": params}))
|
||||
s = time.time()
|
||||
while time.time() - s < to:
|
||||
try: m = json.loads(w.recv())
|
||||
except Exception: continue
|
||||
if m.get("id") == 2: return m.get("result", {})
|
||||
|
||||
def page_target(sub):
|
||||
return next((t for t in tabs() if sub in t.get("url", "") and t.get("type") == "page"), None)
|
||||
def player_target():
|
||||
return next((t for t in tabs() if any(h in t.get("url", "") for h in ["luluvdo", "lulustream"])), None)
|
||||
|
||||
def safe_name(title, year):
|
||||
base = re.split(r"\s*\|", title)[0].strip()
|
||||
base = re.sub(r"\(20\d{2}\)", "", base).strip()
|
||||
return re.sub(r"[^\w .-]", "", f"{base} ({year})" if year else base)[:90]
|
||||
|
||||
def download_one(href, title, year):
|
||||
name = safe_name(title, year)
|
||||
print(f"[dl] {name} -> {href}", flush=True)
|
||||
# 1. navegar a la página de la peli
|
||||
g = next((t for t in tabs() if t.get("type") == "page"), None)
|
||||
w = conn(g); ev(w, f"location.href={json.dumps(href)}"); w.close(); time.sleep(7)
|
||||
# 2. clic server Español (dnume 2 = luluvid). Prefiere luluvid; si no, primer dnume del grupo es.png
|
||||
pg = page_target("/ver-pelicula/")
|
||||
if not pg: print("[dl] no movie page"); return False
|
||||
w = conn(pg)
|
||||
clicked = ev(w, r'''(()=>{
|
||||
const lis=[...document.querySelectorAll('li[data-nume]')];
|
||||
// grupo Español: li cuyo flag es es.png; preferir luluvid
|
||||
const esLis=lis.filter(l=>[...l.querySelectorAll('img')].some(i=>/es\.png/.test(i.src||'')));
|
||||
const pick=esLis.find(l=>/luluv/i.test(l.textContent||''))||esLis[0]||lis[0];
|
||||
if(!pick)return"no-options"; pick.click(); return"clicked:"+(pick.textContent||'').trim().slice(0,15);
|
||||
})()'''); w.close()
|
||||
print("[dl] server:", clicked); time.sleep(6)
|
||||
# 3. play: quitar overlays + jwplayer.play + trusted click
|
||||
pl = player_target()
|
||||
for _ in range(4):
|
||||
if pl: break
|
||||
time.sleep(3); pl = player_target()
|
||||
if not pl: print("[dl] no player iframe (captcha?)"); return False
|
||||
w = conn(pl)
|
||||
ev(w, r'''(()=>{let n=0;document.querySelectorAll('div,iframe').forEach(e=>{if(parseInt(getComputedStyle(e).zIndex||0)>=1000){e.remove();n++;}});return n;})()''')
|
||||
rect = ev(w, r'''(()=>{const b=document.querySelector('.jw-icon-display,.jw-display-icon-container,video');if(!b)return null;const r=b.getBoundingClientRect();return Math.round(r.left+r.width/2)+","+Math.round(r.top+r.height/2);})()''')
|
||||
if rect and "," in rect:
|
||||
cx, cy = [int(x) for x in rect.split(",")]
|
||||
for _ in range(2):
|
||||
cmd(w, "Input.dispatchMouseEvent", {"type": "mousePressed", "x": cx, "y": cy, "button": "left", "clickCount": 1})
|
||||
cmd(w, "Input.dispatchMouseEvent", {"type": "mouseReleased", "x": cx, "y": cy, "button": "left", "clickCount": 1})
|
||||
time.sleep(2)
|
||||
ev(w, r'''(()=>{try{jwplayer().play(true);}catch(e){}})()''')
|
||||
# 4. esperar master (humano resuelve captcha si hace falta) hasta 120s
|
||||
got = False
|
||||
for _ in range(40):
|
||||
ev(w, r'''(()=>{document.querySelectorAll('div,iframe').forEach(e=>{if(parseInt(getComputedStyle(e).zIndex||0)>=1000)e.remove();});})()''')
|
||||
if ev(w, r'''performance.getEntriesByType('resource').some(e=>/master\.m3u8/.test(e.name))'''):
|
||||
got = True; break
|
||||
time.sleep(3)
|
||||
w.close()
|
||||
if not got:
|
||||
print("[dl] master no apareció (captcha/no play). Skip."); return False
|
||||
# 5. grab streaming
|
||||
r = subprocess.run(["uv", "run", "--with", "websocket-client", "--with", "pycryptodome",
|
||||
"python", GRAB, name], cwd="/home/lucas/fn_registry/apps/gnula_grabber",
|
||||
capture_output=True, text=True, timeout=3600)
|
||||
print(r.stdout[-400:]);
|
||||
ok = '"status": "ok"' in r.stdout
|
||||
return ok
|
||||
|
||||
def main():
|
||||
limit = int(sys.argv[1]) if len(sys.argv) > 1 else 1
|
||||
c = sqlite3.connect(DB)
|
||||
rows = c.execute("SELECT href,title,year FROM movies WHERE lang_es=1 AND status='pending' ORDER BY detected_at LIMIT ?", (limit,)).fetchall()
|
||||
print(f"[dl] {len(rows)} pelis pending a descargar (limit {limit})")
|
||||
done = 0
|
||||
for href, title, year in rows:
|
||||
try:
|
||||
ok = download_one(href, title, year)
|
||||
except Exception as e:
|
||||
print("[dl] error:", e); ok = False
|
||||
if ok:
|
||||
c.execute("UPDATE movies SET status='downloaded', downloaded_at=? WHERE href=?",
|
||||
(time.strftime("%Y-%m-%dT%H:%M:%S"), href)); c.commit(); done += 1
|
||||
else:
|
||||
c.execute("UPDATE movies SET status='failed' WHERE href=?", (href,)); c.commit()
|
||||
print(json.dumps({"attempted": len(rows), "downloaded": done}))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user