120 lines
6.1 KiB
Python
120 lines
6.1 KiB
Python
#!/usr/bin/env python3
|
|
"""PARTE 2 — Downloader: consume el catálogo (gnula_crawl.py) y descarga las pelis
|
|
español pendientes via la sesión Chrome+CDP+NordVPN.
|
|
|
|
Por cada peli pending: navega a su página, clica el server Español (luluvid/luluvdo),
|
|
quita el overlay de ad + play (jwplayer), espera master.m3u8, y delega a grab_stream
|
|
(streaming + descifrado AES local) → manual/movies. Marca downloaded en el catálogo.
|
|
El captcha (si sale) lo resuelve el humano: el script espera a que aparezca master.
|
|
|
|
Uso: python gnula_download.py [limit] (default 1 peli; usar N para varias)
|
|
Requiere: Chrome con --remote-debugging-port=9222 (NordVPN proxy) abierto.
|
|
"""
|
|
import json, re, sqlite3, subprocess, sys, time, urllib.request
|
|
import websocket
|
|
|
|
PORT = 9222
|
|
DB = "/home/lucas/.config/popelis/gnula_catalog.db"
|
|
GRAB = "/home/lucas/fn_registry/apps/gnula_grabber/grab_stream.py"
|
|
|
|
def tabs(): return json.load(urllib.request.urlopen(f"http://127.0.0.1:{PORT}/json/list", timeout=8))
|
|
def conn(t):
|
|
w = websocket.create_connection(t["webSocketDebuggerUrl"], timeout=20, header=["Origin: http://localhost"]); w.settimeout(3); return w
|
|
def ev(w, expr, to=15):
|
|
w.send(json.dumps({"id": 1, "method": "Runtime.evaluate", "params": {"expression": expr, "returnByValue": True}}))
|
|
s = time.time()
|
|
while time.time() - s < to:
|
|
try: m = json.loads(w.recv())
|
|
except Exception: continue
|
|
if m.get("id") == 1: return m.get("result", {}).get("result", {}).get("value")
|
|
def cmd(w, method, params, to=8):
|
|
w.send(json.dumps({"id": 2, "method": method, "params": params}))
|
|
s = time.time()
|
|
while time.time() - s < to:
|
|
try: m = json.loads(w.recv())
|
|
except Exception: continue
|
|
if m.get("id") == 2: return m.get("result", {})
|
|
|
|
def page_target(sub):
|
|
return next((t for t in tabs() if sub in t.get("url", "") and t.get("type") == "page"), None)
|
|
def player_target():
|
|
return next((t for t in tabs() if any(h in t.get("url", "") for h in ["luluvdo", "lulustream"])), None)
|
|
|
|
def safe_name(title, year):
|
|
base = re.split(r"\s*\|", title)[0].strip()
|
|
base = re.sub(r"\(20\d{2}\)", "", base).strip()
|
|
return re.sub(r"[^\w .-]", "", f"{base} ({year})" if year else base)[:90]
|
|
|
|
def download_one(href, title, year):
|
|
name = safe_name(title, year)
|
|
print(f"[dl] {name} -> {href}", flush=True)
|
|
# 1. navegar a la página de la peli
|
|
g = next((t for t in tabs() if t.get("type") == "page"), None)
|
|
w = conn(g); ev(w, f"location.href={json.dumps(href)}"); w.close(); time.sleep(7)
|
|
# 2. clic server Español (dnume 2 = luluvid). Prefiere luluvid; si no, primer dnume del grupo es.png
|
|
pg = page_target("/ver-pelicula/")
|
|
if not pg: print("[dl] no movie page"); return False
|
|
w = conn(pg)
|
|
clicked = ev(w, r'''(()=>{
|
|
const lis=[...document.querySelectorAll('li[data-nume]')];
|
|
// grupo Español: li cuyo flag es es.png; preferir luluvid
|
|
const esLis=lis.filter(l=>[...l.querySelectorAll('img')].some(i=>/es\.png/.test(i.src||'')));
|
|
const pick=esLis.find(l=>/luluv/i.test(l.textContent||''))||esLis[0]||lis[0];
|
|
if(!pick)return"no-options"; pick.click(); return"clicked:"+(pick.textContent||'').trim().slice(0,15);
|
|
})()'''); w.close()
|
|
print("[dl] server:", clicked); time.sleep(6)
|
|
# 3. play: quitar overlays + jwplayer.play + trusted click
|
|
pl = player_target()
|
|
for _ in range(4):
|
|
if pl: break
|
|
time.sleep(3); pl = player_target()
|
|
if not pl: print("[dl] no player iframe (captcha?)"); return False
|
|
w = conn(pl)
|
|
ev(w, r'''(()=>{let n=0;document.querySelectorAll('div,iframe').forEach(e=>{if(parseInt(getComputedStyle(e).zIndex||0)>=1000){e.remove();n++;}});return n;})()''')
|
|
rect = ev(w, r'''(()=>{const b=document.querySelector('.jw-icon-display,.jw-display-icon-container,video');if(!b)return null;const r=b.getBoundingClientRect();return Math.round(r.left+r.width/2)+","+Math.round(r.top+r.height/2);})()''')
|
|
if rect and "," in rect:
|
|
cx, cy = [int(x) for x in rect.split(",")]
|
|
for _ in range(2):
|
|
cmd(w, "Input.dispatchMouseEvent", {"type": "mousePressed", "x": cx, "y": cy, "button": "left", "clickCount": 1})
|
|
cmd(w, "Input.dispatchMouseEvent", {"type": "mouseReleased", "x": cx, "y": cy, "button": "left", "clickCount": 1})
|
|
time.sleep(2)
|
|
ev(w, r'''(()=>{try{jwplayer().play(true);}catch(e){}})()''')
|
|
# 4. esperar master (humano resuelve captcha si hace falta) hasta 120s
|
|
got = False
|
|
for _ in range(40):
|
|
ev(w, r'''(()=>{document.querySelectorAll('div,iframe').forEach(e=>{if(parseInt(getComputedStyle(e).zIndex||0)>=1000)e.remove();});})()''')
|
|
if ev(w, r'''performance.getEntriesByType('resource').some(e=>/master\.m3u8/.test(e.name))'''):
|
|
got = True; break
|
|
time.sleep(3)
|
|
w.close()
|
|
if not got:
|
|
print("[dl] master no apareció (captcha/no play). Skip."); return False
|
|
# 5. grab streaming
|
|
r = subprocess.run(["uv", "run", "--with", "websocket-client", "--with", "pycryptodome",
|
|
"python", GRAB, name], cwd="/home/lucas/fn_registry/apps/gnula_grabber",
|
|
capture_output=True, text=True, timeout=3600)
|
|
print(r.stdout[-400:]);
|
|
ok = '"status": "ok"' in r.stdout
|
|
return ok
|
|
|
|
def main():
|
|
limit = int(sys.argv[1]) if len(sys.argv) > 1 else 1
|
|
c = sqlite3.connect(DB)
|
|
rows = c.execute("SELECT href,title,year FROM movies WHERE lang_es=1 AND status='pending' ORDER BY detected_at LIMIT ?", (limit,)).fetchall()
|
|
print(f"[dl] {len(rows)} pelis pending a descargar (limit {limit})")
|
|
done = 0
|
|
for href, title, year in rows:
|
|
try:
|
|
ok = download_one(href, title, year)
|
|
except Exception as e:
|
|
print("[dl] error:", e); ok = False
|
|
if ok:
|
|
c.execute("UPDATE movies SET status='downloaded', downloaded_at=? WHERE href=?",
|
|
(time.strftime("%Y-%m-%dT%H:%M:%S"), href)); c.commit(); done += 1
|
|
else:
|
|
c.execute("UPDATE movies SET status='failed' WHERE href=?", (href,)); c.commit()
|
|
print(json.dumps({"attempted": len(rows), "downloaded": done}))
|
|
|
|
if __name__ == "__main__":
|
|
main()
|