feat: gnula_grabber 2-part pipeline (crawl ES + CDP stream download)
This commit is contained in:
+132
@@ -0,0 +1,132 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Descarga HLS en streaming a disco (descifra AES-128 LOCAL) via CDP.
|
||||
|
||||
No usa Blob gigante en el navegador (eso petaba Chrome). En su lugar:
|
||||
- in-page fetchea cada segmento CRUDO (sesion browser = pasa 522 CF) y lo
|
||||
devuelve base64 por CDP.
|
||||
- LOCAL: base64-decode + AES-128-CBC decrypt (key+IV) + append al .ts en disco.
|
||||
Asi solo 1 segmento vive en memoria a la vez. Aguanta pelis de cualquier tamaño.
|
||||
|
||||
Uso: python grab_stream.py <out_basename> [url_substring]
|
||||
"""
|
||||
import base64, json, os, subprocess, sys, time, urllib.request
|
||||
import websocket # uv --with websocket-client
|
||||
from Crypto.Cipher import AES # uv --with pycryptodome
|
||||
from Crypto.Util.Padding import unpad
|
||||
|
||||
PORT = 9222
|
||||
DEST = "/mnt/f/POPELIS/manual/movies"
|
||||
PLAYER_HOSTS = ["luluvdo", "luluvid", "filemoon", "streamwish", "dood", "vidhide",
|
||||
"bigwarp", "voe", "streamtape", "vidmoly", "cdn-tnmr", "/e/", "/embed"]
|
||||
|
||||
def targets():
|
||||
return json.load(urllib.request.urlopen(f"http://127.0.0.1:{PORT}/json/list", timeout=8))
|
||||
|
||||
class CDP:
|
||||
def __init__(self, ws_url):
|
||||
self.ws = websocket.create_connection(ws_url, timeout=30, header=["Origin: http://localhost"])
|
||||
self.ws.settimeout(60); self.id = 0
|
||||
def ev(self, expr, await_p=False, to=120):
|
||||
self.id += 1; mid = self.id
|
||||
self.ws.send(json.dumps({"id": mid, "method": "Runtime.evaluate",
|
||||
"params": {"expression": expr, "returnByValue": True, "awaitPromise": await_p}}))
|
||||
s = time.time()
|
||||
while time.time() - s < to:
|
||||
try: m = json.loads(self.ws.recv())
|
||||
except Exception: continue
|
||||
if m.get("id") == mid:
|
||||
r = m.get("result", {})
|
||||
if "exceptionDetails" in r:
|
||||
raise RuntimeError(json.dumps(r["exceptionDetails"])[:300])
|
||||
return r.get("result", {}).get("value")
|
||||
raise TimeoutError("CDP eval timeout")
|
||||
def close(self):
|
||||
try: self.ws.close()
|
||||
except Exception: pass
|
||||
|
||||
def find_player():
|
||||
for t in targets():
|
||||
if t.get("type") in ("page", "iframe") and t.get("webSocketDebuggerUrl") \
|
||||
and any(h in t.get("url", "") for h in PLAYER_HOSTS):
|
||||
return t
|
||||
return None
|
||||
|
||||
# JS: devuelve metadata del playlist (key url, segmentos, media-sequence, iv)
|
||||
JS_META = r'''(async()=>{
|
||||
const abs=(b,u)=>new URL(u,b).href;
|
||||
const gt=async u=>{const r=await fetch(u,{headers:{Referer:location.origin+"/"}});if(!r.ok)throw new Error("HTTP "+r.status);return r.text();};
|
||||
const masters=performance.getEntriesByType('resource').map(e=>e.name).filter(n=>/master\.m3u8/.test(n));
|
||||
if(!masters.length)return JSON.stringify({err:"no master (¿diste play?)"});
|
||||
let m=masters[0],t=await gt(m),pl=m;
|
||||
if(/#EXT-X-STREAM-INF/.test(t)){const L=t.split("\n");for(let i=0;i<L.length;i++)if(L[i].startsWith("#EXT-X-STREAM-INF")){pl=abs(m,L[i+1].trim());break;}}
|
||||
const media=await gt(pl);
|
||||
const km=media.match(/#EXT-X-KEY:METHOD=AES-128,URI="([^"]+)"(?:,IV=0x([0-9A-Fa-f]+))?/);
|
||||
let seq=1;const ms=media.match(/#EXT-X-MEDIA-SEQUENCE:(\d+)/);if(ms)seq=parseInt(ms[1]);
|
||||
const segs=media.split("\n").map(s=>s.trim()).filter(s=>s&&!s.startsWith("#")).map(s=>abs(pl,s));
|
||||
return JSON.stringify({keyUrl:km?abs(pl,km[1]):null, ivHex:km&&km[2]?km[2]:null, seq, segs, master:m});
|
||||
})()'''
|
||||
|
||||
# JS: fetch de un recurso -> base64 (chunked para binarios grandes)
|
||||
JS_FETCH_B64 = '''(async()=>{
|
||||
const r=await fetch(%s,{headers:{Referer:location.origin+"/"}});
|
||||
if(!r.ok)return "ERR"+r.status;
|
||||
const b=new Uint8Array(await r.arrayBuffer());let s="";const C=0x8000;
|
||||
for(let i=0;i<b.length;i+=C)s+=String.fromCharCode.apply(null,b.subarray(i,i+C));
|
||||
return btoa(s);
|
||||
})()'''
|
||||
|
||||
def main():
|
||||
name = sys.argv[1] if len(sys.argv) > 1 else "video"
|
||||
pl = find_player()
|
||||
if not pl:
|
||||
print(json.dumps({"status": "error", "error": "no player iframe (¿diste play?)"})); return
|
||||
print(f"[grab] player: {pl['url'][:70]}", flush=True)
|
||||
cdp = CDP(pl["webSocketDebuggerUrl"])
|
||||
meta = json.loads(cdp.ev(JS_META, await_p=True, to=60))
|
||||
if meta.get("err"):
|
||||
print(json.dumps({"status": "error", "error": meta["err"]})); cdp.close(); return
|
||||
segs = meta["segs"]; seq = meta["seq"]
|
||||
print(f"[grab] {len(segs)} segmentos, key={'si' if meta['keyUrl'] else 'no'}", flush=True)
|
||||
key = None
|
||||
if meta["keyUrl"]:
|
||||
kb = cdp.ev(JS_FETCH_B64 % json.dumps(meta["keyUrl"]), await_p=True, to=30)
|
||||
key = base64.b64decode(kb)
|
||||
os.makedirs("/tmp/grab", exist_ok=True)
|
||||
ts_path = f"/tmp/grab/{name}.ts"
|
||||
t0 = time.time()
|
||||
with open(ts_path, "wb") as f:
|
||||
for i, su in enumerate(segs):
|
||||
b64 = cdp.ev(JS_FETCH_B64 % json.dumps(su), await_p=True, to=90)
|
||||
if isinstance(b64, str) and b64.startswith("ERR"):
|
||||
raise RuntimeError(f"seg {i}: {b64}")
|
||||
data = base64.b64decode(b64)
|
||||
if key:
|
||||
if meta["ivHex"]:
|
||||
iv = bytes.fromhex(meta["ivHex"])
|
||||
else:
|
||||
iv = (seq + i).to_bytes(16, "big")
|
||||
dec = AES.new(key, AES.MODE_CBC, iv).decrypt(data)
|
||||
try: dec = unpad(dec, 16)
|
||||
except ValueError: pass # ultimo segmento sin padding exacto
|
||||
data = dec
|
||||
f.write(data)
|
||||
if i % 50 == 0:
|
||||
el = time.time() - t0
|
||||
print(f"[grab] {i}/{len(segs)} ({el:.0f}s)", flush=True)
|
||||
cdp.close()
|
||||
sz = os.path.getsize(ts_path)
|
||||
print(f"[grab] .ts listo: {sz/1e9:.2f} GB en {time.time()-t0:.0f}s", flush=True)
|
||||
# remux -> mkv directo a manual/movies
|
||||
os.makedirs(DEST, exist_ok=True)
|
||||
mkv = os.path.join(DEST, f"{name}.mkv")
|
||||
r = subprocess.run(["ffmpeg", "-y", "-fflags", "+genpts", "-i", ts_path,
|
||||
"-map", "0:v:0", "-map", "0:a:0", "-c", "copy", mkv],
|
||||
capture_output=True, text=True)
|
||||
if r.returncode != 0:
|
||||
print(json.dumps({"status": "error", "error": "ffmpeg: " + r.stderr[-300:]})); return
|
||||
os.remove(ts_path)
|
||||
print(json.dumps({"status": "ok", "mkv": mkv, "size_gb": round(os.path.getsize(mkv)/1e9, 2),
|
||||
"segments": len(segs)}))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user