aef8791151
- Added Appshell component with responsive navbar and main content area - Integrated ColorSchemeToggle for light/dark mode switching - Created Welcome component with styled title and introductory text - Developed ChatPage for LLM interaction with WebSocket support - Implemented Biblioteca for managing notes with rich text editor - Added LoginPage for user authentication with error handling - Introduced MessageList and MessageBubble components for chat messages - Styled components with CSS modules for consistent design
138 lines
5.1 KiB
Python
138 lines
5.1 KiB
Python
import aiohttp
|
|
import websockets
|
|
import json
|
|
import asyncio
|
|
from .Tab import Tab
|
|
from typing import Optional
|
|
|
|
|
|
|
|
class Scrapper:
|
|
def __init__(self, debugging_url: str = "http://127.0.0.1:9222"):
|
|
self.debugging_url = debugging_url
|
|
self.tabs: list[Tab] = []
|
|
|
|
async def _crear_tab_websocket_url(self, target_url: str = "about:blank") -> str:
|
|
"""
|
|
Crea una nueva pestaña usando el método oficial Target.createTarget
|
|
y devuelve su WebSocketDebuggerUrl.
|
|
"""
|
|
# 1. Obtener el WebSocket general del browser (root)
|
|
async with aiohttp.ClientSession() as session:
|
|
async with session.get(f"{self.debugging_url}/json/version") as resp:
|
|
if resp.status != 200:
|
|
raise RuntimeError("No se pudo obtener información del navegador")
|
|
data = await resp.json()
|
|
browser_ws_url = data["webSocketDebuggerUrl"]
|
|
|
|
# 2. Conectarse al WebSocket del browser
|
|
async with websockets.connect(browser_ws_url) as websocket:
|
|
# 3. Enviar comando para crear target
|
|
msg_id = 1
|
|
await websocket.send(json.dumps({
|
|
"id": msg_id,
|
|
"method": "Target.createTarget",
|
|
"params": {
|
|
"url": target_url,
|
|
"newWindow": False
|
|
}
|
|
}))
|
|
|
|
# 4. Esperar respuesta con el targetId
|
|
while True:
|
|
respuesta = await websocket.recv()
|
|
data = json.loads(respuesta)
|
|
if data.get("id") == msg_id:
|
|
target_id = data["result"]["targetId"]
|
|
break
|
|
|
|
# 5. Esperar a que el target aparezca en /json
|
|
for _ in range(30): # máximo ~3 segundos
|
|
await asyncio.sleep(0.1)
|
|
async with aiohttp.ClientSession() as session:
|
|
async with session.get(f"{self.debugging_url}/json") as resp:
|
|
if resp.status == 200:
|
|
tabs = await resp.json()
|
|
for tab in tabs:
|
|
if tab.get("id") == target_id:
|
|
return tab["webSocketDebuggerUrl"]
|
|
|
|
raise RuntimeError("No se pudo obtener el WebSocket de la nueva pestaña")
|
|
|
|
async def nueva_tab(self, url: str = "", wait_time: float = 5.0) -> Tab:
|
|
websocket_url = await self._crear_tab_websocket_url()
|
|
tab = await Tab.crear_desde_websocket(websocket_url)
|
|
self.tabs.append(tab)
|
|
|
|
if url:
|
|
print(f"🌍 Navegando a: {url}")
|
|
await tab.navegar(url, wait_time)
|
|
else:
|
|
print("⚠️ No se especificó URL. La pestaña se creó pero no se navegó a ninguna página.")
|
|
|
|
return tab
|
|
|
|
async def cerrar_todos(self):
|
|
for tab in list(self.tabs):
|
|
await tab.cerrar()
|
|
self.tabs.clear()
|
|
|
|
def get_tab(self, identifier: str) -> Optional[Tab]:
|
|
"""
|
|
Devuelve una instancia de Tab según su WebSocket URL o su ID final (extraído del WebSocket URL).
|
|
Acepta:
|
|
- ws_url completo: ws://127.0.0.1:9222/devtools/page/XYZ
|
|
- id directo: XYZ
|
|
"""
|
|
for tab in self.tabs:
|
|
# Comparar directamente contra ws_url
|
|
if tab.ws_url == identifier:
|
|
return tab
|
|
|
|
# Comparar contra el ID extraído
|
|
ws_id = tab.ws_url.rsplit("/", 1)[-1]
|
|
if ws_id == identifier:
|
|
return tab
|
|
|
|
return None
|
|
|
|
async def obtener_tabs_existentes(self) -> list[Tab]:
|
|
"""
|
|
Recupera todas las pestañas de tipo 'page' que no están ya en self.tabs,
|
|
las conecta y devuelve como lista. Muestra resumen limpio por consola.
|
|
"""
|
|
async with aiohttp.ClientSession() as session:
|
|
async with session.get(f"{self.debugging_url}/json") as resp:
|
|
if resp.status != 200:
|
|
raise RuntimeError("No se pudo obtener la lista de pestañas")
|
|
|
|
tabs_info = await resp.json()
|
|
|
|
print("\n🧾 Pestañas activas (filtradas: solo 'type': 'page'):\n")
|
|
nuevas_tabs = []
|
|
for idx, tab_info in enumerate(tabs_info, start=1):
|
|
tipo = tab_info.get("type")
|
|
if tipo != "page":
|
|
continue # Filtrar todo lo que no sea página visible
|
|
|
|
ws_url = tab_info.get("webSocketDebuggerUrl")
|
|
tab_id = tab_info.get("id")
|
|
title = tab_info.get("title", "<Sin título>")
|
|
url = tab_info.get("url", "<Sin URL>")
|
|
|
|
# Verifica si ya la tienes cargada
|
|
if any(t.ws_url == ws_url for t in self.tabs):
|
|
continue
|
|
|
|
# Conectar
|
|
try:
|
|
tab = await Tab.crear_desde_websocket(ws_url)
|
|
self.tabs.append(tab)
|
|
nuevas_tabs.append(tab)
|
|
except Exception as e:
|
|
print(f"⚠️ No se pudo conectar a pestaña {tab_id}: {e}")
|
|
|
|
if not nuevas_tabs:
|
|
print("⚠️ No se encontraron nuevas pestañas para agregar.\n")
|
|
|
|
return nuevas_tabs |