feat: Implement cookie extraction script for Chrome v20 and enhance browser interaction

This commit is contained in:
2025-06-01 15:31:13 +02:00
parent 628cddc3ae
commit e1b756ac99
8 changed files with 717 additions and 64 deletions
+100 -26
View File
@@ -1,54 +1,58 @@
from typing import TYPE_CHECKING, Optional
import random
import asyncio
import json
if TYPE_CHECKING:
from src.ScrappingWeb.Tab import Tab
from .Tab import Tab
class ElementoWeb:
def __init__(self, tab: "Tab", object_id: str):
def __init__(self, tab: "Tab", object_id: Optional[str]):
self.tab = tab
self.object_id = object_id
self._node_id = None # Lazy resolved
@classmethod
def from_node(cls, tab: "Tab", node_id: int) -> "ElementoWeb":
inst = cls(tab, object_id=None)
inst._node_id = node_id
return inst
async def _asegurar_object_id(self):
if not self.object_id and self._node_id:
try:
resolved = await self.tab._enviar("DOM.resolveNode", {"nodeId": self._node_id})
self.object_id = resolved["object"]["objectId"]
except Exception as e:
print(f"⚠️ No se pudo resolver objectId desde nodeId: {e}")
async def scroll_into_view(self):
try:
await self._asegurar_object_id()
await self.tab._enviar("Runtime.callFunctionOn", {
"objectId": self.object_id,
"functionDeclaration": "function() { this.scrollIntoView({block: 'center'}); }",
"awaitPromise": True
})
print("📜 Elemento desplazado a la vista.")
if self.tab.verbose:
print("📜 Elemento desplazado a la vista.")
except Exception as e:
print(f"⚠️ Error al hacer scroll hacia el elemento: {e}")
@classmethod
def from_node(cls, tab: "Tab", node_id: int) -> "ElementoWeb":
# Creamos un objectId a partir del nodeId usando DOM.resolveNode
cls._node_id = node_id
cls._resolved_object_id = None # Lazy resolution opcional
return cls(tab, object_id=None)
async def click(self):
try:
await self.scroll_into_view()
# Resolver objectId si es necesario
if not self.object_id and hasattr(self, "_node_id"):
resolved = await self.tab._enviar("DOM.resolveNode", {"nodeId": self._node_id})
self.object_id = resolved["object"]["objectId"]
await self._asegurar_object_id()
if not self.object_id:
raise ValueError("No se puede obtener objectId del elemento para hacer click.")
# Obtener nodeId
# Intenta obtener coordenadas del nodo
node_result = await self.tab._enviar("DOM.describeNode", {
"objectId": self.object_id
})
node_id = node_result["node"]["nodeId"]
# Obtener coordenadas con fallback
try:
box_model = await self.tab._enviar("DOM.getBoxModel", {"nodeId": node_id})
content = box_model["model"]["content"]
@@ -60,7 +64,12 @@ class ElementoWeb:
x = (quad[0] + quad[4]) / 2
y = (quad[1] + quad[5]) / 2
# Simular movimiento humano del mouse
# 🧠 Enfocar el elemento antes de clickear
await self.tab._enviar("DOM.focus", {
"objectId": self.object_id
})
# 🎯 Movimiento humanoide opcional
start_x, start_y = x + random.uniform(-100, 100), y + random.uniform(-100, 100)
steps = random.randint(5, 12)
for i in range(1, steps + 1):
@@ -73,7 +82,7 @@ class ElementoWeb:
})
await asyncio.sleep(random.uniform(0.01, 0.05))
# Click humano
# 👆 Mouse Down
await self.tab._enviar("Input.dispatchMouseEvent", {
"type": "mousePressed",
"x": x,
@@ -81,7 +90,10 @@ class ElementoWeb:
"button": "left",
"clickCount": 1
})
await asyncio.sleep(random.uniform(0.05, 0.15))
# 👇 Mouse Up
await self.tab._enviar("Input.dispatchMouseEvent", {
"type": "mouseReleased",
"x": x,
@@ -90,27 +102,89 @@ class ElementoWeb:
"clickCount": 1
})
print(f"🖱️ Click humano simulado en ({x:.1f}, {y:.1f})")
await asyncio.sleep(random.uniform(0.01, 0.05))
# 🖱️ Click manual adicional
await self.tab._enviar("Input.dispatchMouseEvent", {
"type": "mouseClicked",
"x": x,
"y": y,
"button": "left",
"clickCount": 1
})
if self.tab.verbose:
print(f"🖱️ Click humano simulado en ({x:.1f}, {y:.1f})")
except Exception as e:
print(f"⚠️ Error al hacer click físico: {e}")
print("🧪 Intentando fallback con JavaScript click()...")
await self.click_js()
async def click_js(self):
try:
await self._asegurar_object_id()
if not self.object_id:
print("⚠️ No se puede hacer click JS: objectId no disponible.")
return
await self.tab._enviar("Runtime.callFunctionOn", {
"objectId": self.object_id,
"functionDeclaration": "function() { this.click(); }",
"awaitPromise": True
})
print("🖱️ Click simulado por JavaScript (element.click())")
if self.tab.verbose:
print("🖱️ Click simulado por JavaScript (element.click())")
except Exception as e:
print(f"⚠️ Error al ejecutar click en JS: {e}")
async def obtener_texto(self) -> Optional[str]:
return await self.tab.evaluar_js(f'document.getElementById("{self.object_id}").textContent')
try:
await self._asegurar_object_id()
result = await self.tab._enviar("Runtime.callFunctionOn", {
"objectId": self.object_id,
"functionDeclaration": "function() { return this.textContent; }",
"returnByValue": True
})
return result.get("result", {}).get("value")
except Exception as e:
print(f"⚠️ Error al obtener texto del elemento: {e}")
return None
async def escribir_texto(self, texto: str):
await self.tab.evaluar_js(f'document.getElementById("{self.object_id}").value = "{texto}"')
try:
await self._asegurar_object_id()
await self.tab._enviar("Runtime.callFunctionOn", {
"objectId": self.object_id,
"functionDeclaration": f"function() {{ this.value = {json.dumps(texto)}; this.dispatchEvent(new Event('input')); }}",
"awaitPromise": True
})
if self.tab.verbose:
print(f"⌨️ Texto escrito en elemento: '{texto}'")
except Exception as e:
print(f"⚠️ Error al escribir texto: {e}")
async def encontrar_hijo_clickeable(self) -> Optional["ElementoWeb"]:
try:
await self._asegurar_object_id()
resultado = await self.tab._enviar("Runtime.callFunctionOn", {
"objectId": self.object_id,
"functionDeclaration": """
function() {
const candidatos = this.querySelectorAll("span, div, a, button");
for (const el of candidatos) {
const style = window.getComputedStyle(el);
const visible = style.display !== "none" && style.visibility !== "hidden";
const interactivo = style.pointerEvents !== "none";
if (visible && interactivo) return el;
}
return this;
}
""",
"returnByValue": False
})
if "result" in resultado and "objectId" in resultado["result"]:
return ElementoWeb(self.tab, resultado["result"]["objectId"])
except Exception as e:
print(f"⚠️ No se pudo encontrar hijo clickeable: {e}")
return None