feat: Implement cookie extraction script for Chrome v20 and enhance browser interaction

This commit is contained in:
2025-06-01 15:31:13 +02:00
parent 628cddc3ae
commit e1b756ac99
8 changed files with 717 additions and 64 deletions
+179
View File
@@ -0,0 +1,179 @@
import os
import sys
import json
import binascii
import ctypes
import base64
import sqlite3
import pandas as pd
import pathlib
from Crypto.Cipher import AES, ChaCha20_Poly1305
from pypsexec.client import Client
"""
Este script extrae cookies v20 de Google Chrome y las guarda en un archivo CSV.
Requiere privilegios de administrador para acceder a los datos de Chrome.
Conseguido para poder extraer cookies de Chrome v20, que utiliza un nuevo formato de cifrado.
"""
def is_admin():
try:
return ctypes.windll.shell32.IsUserAnAdmin() != 0
except:
return False
def get_app_bound_key(local_state_path):
with open(local_state_path, "r", encoding="utf-8") as f:
local_state = json.load(f)
return local_state["os_crypt"]["app_bound_encrypted_key"]
def decrypt_app_bound_key(encrypted_key_b64):
arguments = "-c \"" + """import win32crypt
import binascii
encrypted_key = win32crypt.CryptUnprotectData(binascii.a2b_base64('{}'), None, None, None, 0)
print(binascii.b2a_base64(encrypted_key[1]).decode())
""".replace("\n", ";") + "\""
c = Client("localhost")
c.connect()
decrypted_key = None
try:
c.create_service()
assert(binascii.a2b_base64(encrypted_key_b64)[:4] == b"APPB")
stripped_key_b64 = binascii.b2a_base64(binascii.a2b_base64(encrypted_key_b64)[4:]).decode().strip()
encrypted_key_b64_sys, _, _ = c.run_executable(
sys.executable,
arguments=arguments.format(stripped_key_b64),
use_system_account=True
)
decrypted_key_b64, _, _ = c.run_executable(
sys.executable,
arguments=arguments.format(encrypted_key_b64_sys.decode().strip()),
use_system_account=False
)
decrypted_key = binascii.a2b_base64(decrypted_key_b64)[-61:]
finally:
c.remove_service()
c.disconnect()
return decrypted_key
def decrypt_final_key(encrypted_key):
aes_key = bytes.fromhex("B31C6E241AC846728DA9C1FAC4936651CFFB944D143AB816276BCC6DA0284787")
chacha20_key = bytes.fromhex("E98F37D7F4E1FA433D19304DC2258042090E2D1D7EEA7670D41F738D08729660")
flag = encrypted_key[0]
iv = encrypted_key[1:13]
ciphertext = encrypted_key[13:45]
tag = encrypted_key[45:]
if flag == 1:
cipher = AES.new(aes_key, AES.MODE_GCM, nonce=iv)
elif flag == 2:
cipher = ChaCha20_Poly1305.new(key=chacha20_key, nonce=iv)
else:
raise ValueError(f"Unsupported flag: {flag}")
return cipher.decrypt_and_verify(ciphertext, tag)
def decrypt_cookie_v20(encrypted_value, key):
cookie_iv = encrypted_value[3:15]
encrypted_cookie = encrypted_value[15:-16]
cookie_tag = encrypted_value[-16:]
cookie_cipher = AES.new(key, AES.MODE_GCM, nonce=cookie_iv)
decrypted_cookie = cookie_cipher.decrypt_and_verify(encrypted_cookie, cookie_tag)
return decrypted_cookie[32:].decode('utf-8')
def extract_all_v20_cookies():
user_profile = os.environ['USERPROFILE']
local_state_path = rf"{user_profile}\AppData\Local\Google\Chrome\User Data\Local State"
base_profile_path = rf"{user_profile}\AppData\Local\Google\Chrome\User Data"
app_bound_key_b64 = get_app_bound_key(local_state_path)
decrypted_key_raw = decrypt_app_bound_key(app_bound_key_b64)
final_key = decrypt_final_key(decrypted_key_raw)
perfiles_invalidos = {"System Profile", "Guest Profile", "CrashpadMetrics"}
perfiles = [
name for name in os.listdir(base_profile_path)
if os.path.isdir(os.path.join(base_profile_path, name))
and name not in perfiles_invalidos
and os.path.exists(os.path.join(base_profile_path, name, "Network", "Cookies"))
]
all_cookies = []
for profile in perfiles:
db_path = os.path.join(base_profile_path, profile, "Network", "Cookies")
con = sqlite3.connect(pathlib.Path(db_path).as_uri() + "?mode=ro", uri=True)
cur = con.cursor()
r = cur.execute("SELECT host_key, name, path, is_secure, is_httponly, expires_utc, last_access_utc, CAST(encrypted_value AS BLOB) from cookies;")
cookies = cur.fetchall()
con.close()
for row in cookies:
host, name, path, is_secure, is_httponly, expires_utc, last_access_utc, encrypted_value = row
encrypted_value_b64 = base64.b64encode(encrypted_value).decode()
if encrypted_value.startswith(b"v20"):
try:
value = decrypt_cookie_v20(encrypted_value, final_key)
print(f"[✓] {host} {name}: {value}")
all_cookies.append({
"host": host,
"name": name,
"path": path,
"value": value,
"encrypted_value_b64": encrypted_value_b64,
"expires_utc": expires_utc,
"is_secure": is_secure,
"is_httponly": is_httponly,
"last_access_utc": last_access_utc,
"profile": profile,
"is_decrypted": True,
"decrypt_error": ""
})
except Exception as e:
print(f"[x] Error decrypting {host} {name}: {e}")
all_cookies.append({
"host": host,
"name": name,
"path": path,
"value": "",
"encrypted_value_b64": encrypted_value_b64,
"expires_utc": expires_utc,
"is_secure": is_secure,
"is_httponly": is_httponly,
"last_access_utc": last_access_utc,
"profile": profile,
"is_decrypted": False,
"decrypt_error": str(e)
})
return pd.DataFrame(all_cookies)
if __name__ == "__main__":
if not is_admin():
input("Este script necesita ejecutarse como administrador. Presiona Enter para reiniciar con privilegios...")
ctypes.windll.shell32.ShellExecuteW(None, "runas", sys.executable, " ".join([sys.argv[0]] + sys.argv[1:]), None, 1)
sys.exit()
print("[*] Extrayendo cookies v20 desde todos los perfiles...")
df = extract_all_v20_cookies()
df.to_csv("cookies_extraidas.csv", index=False, encoding="utf-8")
print(f"[✓] Cookies v20 extraídas: {len(df)}")
print("[✓] Guardado en 'cookies_extraidas.csv'")
@@ -0,0 +1,87 @@
import asyncio
import os
import pyperclip
import re
from src.ScrappingWeb.Scrapper import Scrapper
def sanitizar(nombre: str) -> str:
return re.sub(r'[\\/*?:"<>|]', "_", nombre).strip()[:100]
OUTPUT_DIR = "esquemas_json"
os.makedirs(OUTPUT_DIR, exist_ok=True)
async def main():
ws_id = "F51AC05B27E1DEC4011E67369781596C"
ws_url = f"ws://127.0.0.1:9222/devtools/page/{ws_id}"
scrapper = Scrapper(debugging_url="http://127.0.0.1:9222")
print("🔌 Conectando a pestaña específica...")
tab = scrapper.get_tab(ws_url) or scrapper.get_tab(ws_id)
if not tab:
nuevas_tabs = await scrapper.obtener_tabs_existentes()
tab = next((t for t in nuevas_tabs if t.ws_url.rsplit("/", 1)[-1] == ws_id), None)
if not tab:
print("⚠️ La pestaña con ese ID no se encontró.")
return
elementos = await tab.get_elements_by_css_selector(
"#_0rif_bq-resource-tree > div.cfctest-tree-main.ng-tns-c3578326070-0 > ul > cfc-virtual-scroller > div > div.item-container > div > li"
)
for i, elemento in enumerate(elementos[:12]):
print(f"🖱️ Click #{i + 1}")
clickeable = await elemento.encontrar_hijo_clickeable()
if clickeable:
await clickeable.click()
else:
print(f"⚠️ No se encontró subelemento clickeable en #{i+1}")
continue
await asyncio.sleep(1)
texto_crudo = await elemento.obtener_texto()
nombre_archivo = sanitizar(texto_crudo or f"esquema_item_{i+1}")
print(f"📄 Nombre base del archivo: {nombre_archivo}.txt")
# ✅ Ejecutar JS en el navegador para simular flujo de copia
await tab.evaluar_js("""
(() => {
const boton = document.querySelector('button[id^="_0rif_bqui-table-copy-schema-btn"] span.mdc-button__label > span');
if (boton) boton.click();
})()
""")
await asyncio.sleep(1)
await tab.evaluar_js("""
(() => {
const overlays = document.querySelectorAll("div.cdk-overlay-pane");
for (let overlay of overlays) {
const items = overlay.querySelectorAll("cfc-menu-item .cfc-menu-item-label");
for (let item of items) {
if (item.textContent.includes("Copiar como JSON")) {
item.click();
break;
}
}
}
})()
""")
await asyncio.sleep(1.5)
try:
texto_json = pyperclip.paste()
file_path = os.path.join(OUTPUT_DIR, f"{nombre_archivo}.txt")
with open(file_path, "w", encoding="utf-8") as f:
f.write(texto_json)
print(f"✅ Guardado: {file_path}")
except Exception as e:
print(f"❌ Error al leer el portapapeles o guardar archivo: {e}")
if __name__ == "__main__":
asyncio.run(main())
+80
View File
@@ -0,0 +1,80 @@
import subprocess
import os
import time
import signal
def iniciar_chrome(chrome_path,
user_data_dir,
headless=False,
debugging_port=9222,
user_agent=None,
):
# Asegúrate de que el directorio del perfil exista
os.makedirs(user_data_dir, exist_ok=True)
# Lista de argumentos para Chrome
chrome_args = [
f"--remote-debugging-port={debugging_port}",
f"--user-data-dir={user_data_dir}",
"--disable-blink-features=AutomationControlled",
"--no-sandbox",
"--disable-web-security",
"--disable-extensions",
"--disable-dev-shm-usage",
"--disable-infobars",
"--disable-popup-blocking",
"--disable-default-apps",
"--mute-audio",
"--window-size=1024,1024",
]
if not headless:
pass
else:
chrome_args.append("--headless=new") # para versiones recientes de Chrome
if not user_agent:
pass
else:
chrome_args.append(f"--user-agent={user_agent}")
# Comando para iniciar Chrome
chrome_process = subprocess.Popen([chrome_path] + chrome_args)
try:
print(f"Chrome iniciado (headless={headless}). Presiona Ctrl+C para salir.")
while True:
if chrome_process.poll() is not None:
print("Chrome se ha cerrado.")
break
time.sleep(1)
except KeyboardInterrupt:
print("Terminando proceso de Chrome...")
chrome_process.terminate()
try:
chrome_process.wait(timeout=5)
except subprocess.TimeoutExpired:
chrome_process.kill()
print("Chrome cerrado correctamente.")
# Ruta al ejecutable de Chrome
chrome_path = r"C:\Program Files\Google\Chrome\Application\chrome.exe"
# Directorio para el perfil de usuario
user_data_dir = os.path.abspath("./Perfiles_usuario/chrome_profile")
# Puerto para la depuración remota
port = 9222
user_agent= "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"
# Llama a la función con True o False
iniciar_chrome(chrome_path=chrome_path,
user_data_dir=user_data_dir,
debugging_port=port,
headless=False,
user_agent=user_agent) # Cambia a True para modo headless
+122
View File
@@ -0,0 +1,122 @@
import asyncio
import os
import re
from src.ScrappingWeb.Navegador import Navegador
from src.ScrappingWeb.Scrapper import Scrapper
from src.ScrappingWeb.Tab import Tab
import aiohttp
import csv
async def esperar_chrome_listo(port, timeout=10):
url = f"http://127.0.0.1:{port}/json"
for _ in range(timeout * 2):
try:
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
if resp.status == 200:
return
except Exception:
pass
await asyncio.sleep(0.5)
raise TimeoutError(f"Chrome en puerto {port} no respondió dentro del tiempo esperado.")
chrome_path = r"C:\Program Files\Google\Chrome\Application\chrome.exe"
def sanitizar_nombre(nombre: str) -> str:
# Eliminar caracteres inválidos para nombre de archivo
return re.sub(r'[\\/*?:"<>|]', "_", nombre).strip()[:100]
async def iniciar_y_scrapear(id: int):
user_data_dir = os.path.abspath(f"./Perfiles_usuario/chrome_profile_{id}")
port = 9222 + id
navegador = Navegador(
chrome_path=chrome_path,
user_data_dir=user_data_dir,
id=id,
download_dir=os.path.join(user_data_dir, "downloads"),
debugging_port=port,
headless=False,
user_agent=f"Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/{100+id}.0.0.0 Safari/537.36"
)
# Iniciar navegador en background
asyncio.create_task(navegador.iniciar())
# Esperamos a que el navegador esté listo
await esperar_chrome_listo(port)
# Conectarse con el scraper al navegador
scrapper = Scrapper(debugging_url=f"http://127.0.0.1:{port}")
tab = await scrapper.nueva_tab("", wait_time=6)
# Ejecutar acciones desde la clase Tab
ua = await tab.obtener_user_agent()
print(f"🧭 [{id}] User-Agent:", ua)
title = await tab.evaluar_js("document.title")
print(f"📄 [{id}] Título:", title)
# botones= await tab.get_elements_by_css_selector("#mw-content-text > div.mw-content-ltr.mw-parser-output > figure:nth-child(27) > a > img")
# for boton in botones:
# await boton.click()
# # Crear carpeta si no existe
# os.makedirs("wikipedia_md", exist_ok=True)
# # Guardar el HTML completo
# html = await tab.obtener_html_completo()
# with open(f"contenido.html", "w", encoding="utf-8") as f:
# f.write(html)
# # Leer enlaces del CSV
# with open("enlaces_extraidos.csv", "r", encoding="utf-8") as f:
# reader = csv.reader(f)
# next(reader) # saltar encabezados
# enlaces = list(reader)
# for texto, enlace in enlaces:
# nombre_archivo = sanitizar_nombre(texto or "sin_titulo") + ".png"
# ruta_archivo = os.path.join("wikipedia", nombre_archivo)
# try:
# print(f"🌐 Visitando: {enlace}")
# tab = await scrapper.nueva_tab(enlace, wait_time=6)
# await tab.capturar_screenshot(ruta_archivo)
# print(f"📸 Captura guardada: {ruta_archivo}")
# await tab.cerrar()
# except Exception as e:
# print(f"❌ Error con {enlace}: {e}")
# await tab.capturar_screenshot(f"screenshot_{id}.png")
# html = await tab.obtener_html_completo()
# print(html)
# with open("contenido.html", "w", encoding="utf-8") as f:
# f.write(html)
# Extraer enlaces y guardarlos en CSV
# # # Cerrar tab y navegador si quieres
# await asyncio.sleep(10)
# await tab.cerrar()
# await navegador.cerrar()
async def main():
tareas = [iniciar_y_scrapear(i) for i in range(1)]
await asyncio.gather(*tareas)
if __name__ == "__main__":
asyncio.run(main())