Files
gnula_grabber/grab_stream.py
T

133 lines
6.1 KiB
Python

#!/usr/bin/env python3
"""Descarga HLS en streaming a disco (descifra AES-128 LOCAL) via CDP.
No usa Blob gigante en el navegador (eso petaba Chrome). En su lugar:
- in-page fetchea cada segmento CRUDO (sesion browser = pasa 522 CF) y lo
devuelve base64 por CDP.
- LOCAL: base64-decode + AES-128-CBC decrypt (key+IV) + append al .ts en disco.
Asi solo 1 segmento vive en memoria a la vez. Aguanta pelis de cualquier tamaño.
Uso: python grab_stream.py <out_basename> [url_substring]
"""
import base64, json, os, subprocess, sys, time, urllib.request
import websocket # uv --with websocket-client
from Crypto.Cipher import AES # uv --with pycryptodome
from Crypto.Util.Padding import unpad
PORT = 9222
DEST = "/mnt/f/POPELIS/manual/movies"
PLAYER_HOSTS = ["luluvdo", "luluvid", "filemoon", "streamwish", "dood", "vidhide",
"bigwarp", "voe", "streamtape", "vidmoly", "cdn-tnmr", "/e/", "/embed"]
def targets():
return json.load(urllib.request.urlopen(f"http://127.0.0.1:{PORT}/json/list", timeout=8))
class CDP:
def __init__(self, ws_url):
self.ws = websocket.create_connection(ws_url, timeout=30, header=["Origin: http://localhost"])
self.ws.settimeout(60); self.id = 0
def ev(self, expr, await_p=False, to=120):
self.id += 1; mid = self.id
self.ws.send(json.dumps({"id": mid, "method": "Runtime.evaluate",
"params": {"expression": expr, "returnByValue": True, "awaitPromise": await_p}}))
s = time.time()
while time.time() - s < to:
try: m = json.loads(self.ws.recv())
except Exception: continue
if m.get("id") == mid:
r = m.get("result", {})
if "exceptionDetails" in r:
raise RuntimeError(json.dumps(r["exceptionDetails"])[:300])
return r.get("result", {}).get("value")
raise TimeoutError("CDP eval timeout")
def close(self):
try: self.ws.close()
except Exception: pass
def find_player():
for t in targets():
if t.get("type") in ("page", "iframe") and t.get("webSocketDebuggerUrl") \
and any(h in t.get("url", "") for h in PLAYER_HOSTS):
return t
return None
# JS: devuelve metadata del playlist (key url, segmentos, media-sequence, iv)
JS_META = r'''(async()=>{
const abs=(b,u)=>new URL(u,b).href;
const gt=async u=>{const r=await fetch(u,{headers:{Referer:location.origin+"/"}});if(!r.ok)throw new Error("HTTP "+r.status);return r.text();};
const masters=performance.getEntriesByType('resource').map(e=>e.name).filter(n=>/master\.m3u8/.test(n));
if(!masters.length)return JSON.stringify({err:"no master (¿diste play?)"});
let m=masters[0],t=await gt(m),pl=m;
if(/#EXT-X-STREAM-INF/.test(t)){const L=t.split("\n");for(let i=0;i<L.length;i++)if(L[i].startsWith("#EXT-X-STREAM-INF")){pl=abs(m,L[i+1].trim());break;}}
const media=await gt(pl);
const km=media.match(/#EXT-X-KEY:METHOD=AES-128,URI="([^"]+)"(?:,IV=0x([0-9A-Fa-f]+))?/);
let seq=1;const ms=media.match(/#EXT-X-MEDIA-SEQUENCE:(\d+)/);if(ms)seq=parseInt(ms[1]);
const segs=media.split("\n").map(s=>s.trim()).filter(s=>s&&!s.startsWith("#")).map(s=>abs(pl,s));
return JSON.stringify({keyUrl:km?abs(pl,km[1]):null, ivHex:km&&km[2]?km[2]:null, seq, segs, master:m});
})()'''
# JS: fetch de un recurso -> base64 (chunked para binarios grandes)
JS_FETCH_B64 = '''(async()=>{
const r=await fetch(%s,{headers:{Referer:location.origin+"/"}});
if(!r.ok)return "ERR"+r.status;
const b=new Uint8Array(await r.arrayBuffer());let s="";const C=0x8000;
for(let i=0;i<b.length;i+=C)s+=String.fromCharCode.apply(null,b.subarray(i,i+C));
return btoa(s);
})()'''
def main():
name = sys.argv[1] if len(sys.argv) > 1 else "video"
pl = find_player()
if not pl:
print(json.dumps({"status": "error", "error": "no player iframe (¿diste play?)"})); return
print(f"[grab] player: {pl['url'][:70]}", flush=True)
cdp = CDP(pl["webSocketDebuggerUrl"])
meta = json.loads(cdp.ev(JS_META, await_p=True, to=60))
if meta.get("err"):
print(json.dumps({"status": "error", "error": meta["err"]})); cdp.close(); return
segs = meta["segs"]; seq = meta["seq"]
print(f"[grab] {len(segs)} segmentos, key={'si' if meta['keyUrl'] else 'no'}", flush=True)
key = None
if meta["keyUrl"]:
kb = cdp.ev(JS_FETCH_B64 % json.dumps(meta["keyUrl"]), await_p=True, to=30)
key = base64.b64decode(kb)
os.makedirs("/tmp/grab", exist_ok=True)
ts_path = f"/tmp/grab/{name}.ts"
t0 = time.time()
with open(ts_path, "wb") as f:
for i, su in enumerate(segs):
b64 = cdp.ev(JS_FETCH_B64 % json.dumps(su), await_p=True, to=90)
if isinstance(b64, str) and b64.startswith("ERR"):
raise RuntimeError(f"seg {i}: {b64}")
data = base64.b64decode(b64)
if key:
if meta["ivHex"]:
iv = bytes.fromhex(meta["ivHex"])
else:
iv = (seq + i).to_bytes(16, "big")
dec = AES.new(key, AES.MODE_CBC, iv).decrypt(data)
try: dec = unpad(dec, 16)
except ValueError: pass # ultimo segmento sin padding exacto
data = dec
f.write(data)
if i % 50 == 0:
el = time.time() - t0
print(f"[grab] {i}/{len(segs)} ({el:.0f}s)", flush=True)
cdp.close()
sz = os.path.getsize(ts_path)
print(f"[grab] .ts listo: {sz/1e9:.2f} GB en {time.time()-t0:.0f}s", flush=True)
# remux -> mkv directo a manual/movies
os.makedirs(DEST, exist_ok=True)
mkv = os.path.join(DEST, f"{name}.mkv")
r = subprocess.run(["ffmpeg", "-y", "-fflags", "+genpts", "-i", ts_path,
"-map", "0:v:0", "-map", "0:a:0", "-c", "copy", mkv],
capture_output=True, text=True)
if r.returncode != 0:
print(json.dumps({"status": "error", "error": "ffmpeg: " + r.stderr[-300:]})); return
os.remove(ts_path)
print(json.dumps({"status": "ok", "mkv": mkv, "size_gb": round(os.path.getsize(mkv)/1e9, 2),
"segments": len(segs)}))
if __name__ == "__main__":
main()