Refactor and enhance MCP client and server functionality

- Removed prueba_cliente_mcp.py as it was no longer needed. - Updated prueba_loop_agente.py to integrate MCPServerRunner for managing server instances. - Modified prueba_mcp.py to implement a new structure for starting and stopping MCP servers. - Enhanced AgenteAI class to support multiple MCP blocks execution. - Improved MCPClient with timeout handling for tool calls. - Added new sandbox files for children's stories. - Created a simple ERP system with a main entry point. - Added unit tests for the ERP system. - Implemented MCPServerRunner to manage server processes. - Developed server_files.py to handle file operations securely within a sandbox environment. - Introduced ElementoWeb and Navegador classes for web scraping functionalities. - Enhanced Scrapper and Tab classes for better interaction with web pages.
2025-05-25 13:49:08 +02:00
parent a62778a030
commit cf6a768f6b
18 changed files with 880 additions and 107 deletions
@@ -1,29 +0,0 @@
 # client.py
 import asyncio
 from src.Llms.MCPs.Mcp_client import MCPClient
 from src.Llms.MCPs.Http_mcp_server import HttpMCPServer
 async def main():
    client = MCPClient()
    client.register_server(HttpMCPServer(
        name="tools",
        path="IGNORED_IN_CLIENT",  # no importa aquí
        host="127.0.0.1",
        port=4300,
        path_http="/tools"
    ))
    await client.connect_all()
    result = await client.call_tool({
        "server": "tools",
        "tool": "get_hostname",
        "input": {}
    })
    print("RESULT:", result)
    await client.disconnect_all()
 if __name__ == "__main__":
    asyncio.run(main())
@@ -14,6 +14,7 @@ from src.Llms.MCPs.McpClient_Registry import ClientRegistry  # o ajusta según t
 from src.Credenciales.ollama_credencial import OllamaCredencial
 from src.ConexionApis.Ollama_cliente import OllamaCliente
 from src.Llms.Modelos.Ollama_model import ModeloOllama
 from src.Llms.MCPs.McpServer import MCPServerRunner
 import asyncio
@@ -22,32 +23,60 @@ async def main():
    # # Usar Credencial openai
-    # conexion_admin = PostgresConexion(db_credencial)
+    conexion_admin = PostgresConexion(db_credencial)
-    # repo = OpenAICredencialRepo(conexion_admin)
+    repo = OpenAICredencialRepo(conexion_admin)
-    # credencial_openai = repo.get_by_id("OPAK20250513-61b29978b7604031014")
+    credencial_openai = repo.get_by_id("OPAK20250513-61b29978b7604031014")
-    # if credencial_openai is None:
+    if credencial_openai is None:
-    #     raise ValueError("No se encontró la credencial OpenAI con el ID proporcionado.")
+        raise ValueError("No se encontró la credencial OpenAI con el ID proporcionado.")
-    # cliente = OpenAICliente(credencial_openai)
+    cliente = OpenAICliente(credencial_openai)
    # # Llamamos a los servidores para iniciarlos
    # venv_python = r"E:\Fitz_Studio\.venv\Scripts\python.exe"
    # # runner_math = MCPServerRunner(
    # #     r"E:\Fitz_Studio\src\Llms\MCPs\McpServers\server_math.py",
    # #     python_path=venv_python
    # # )
    # # runner_tools = MCPServerRunner(
    # #     r"E:\Fitz_Studio\src\Llms\MCPs\McpServers\server_utils.py",
    # #     python_path=venv_python
    # # )
    # runner_files = MCPServerRunner(
    #     r"E:\Fitz_Studio\src\Llms\MCPs\McpServers\server_files.py",
    #     python_path=venv_python
    # )
    # # await runner_math.start()
    # # await runner_tools.start()
    # await runner_files.start()
    # # Esperamos un poco para asegurarnos de que los servidores estén listos
    # await asyncio.sleep(2) 
    # Usar Credencial ollama
-    credencial_ollama = OllamaCredencial(titulo="Ollama")
+    # credencial_ollama = OllamaCredencial(titulo="Ollama")
-    cliente = OllamaCliente(credencial_ollama)
+    # cliente = OllamaCliente(credencial_ollama)
-    modelo = ModeloOllama(
+    # modelo = ModeloOllama(
-        cliente=cliente,
+    #     cliente=cliente,
-        model="llama3.1:8b")
+    #     model="llama3.1:8b")
    # # crea el modelo (openai)
-    # modelo = ModeloOpenAI(
+    modelo = ModeloOpenAI(
-    #     cliente=cliente,
+        cliente=cliente,
-    #     model="gpt-4o",
+        model="gpt-4o",
-    #     temperature=1
+        temperature=1
-    # )
+    )
    # Le otorga memoria 
@@ -60,24 +89,30 @@ async def main():
    # Cargamos las herramientas
-    herramientas = MCPClient.from_http(
+    # herramientas = MCPClient.from_http(
-        name="tools",
+    #     name="tools",
-        url="http://127.0.0.1:4300/tools/"
+    #     url="http://127.0.0.1:4300/tools/"
    # )
    # math = MCPClient.from_http(
    #     name="math",
    #     url="http://127.0.0.1:4200/math/"
    # )
    archivos = MCPClient.from_http(
        name="files",
        url="http://127.0.0.1:4201/fs"
    )
    math = MCPClient.from_http(
        name="math",
        url="http://127.0.0.1:4200/math/"
    )
    # Las añadimos al registro de herramientas
    registry = ClientRegistry()
-    registry.add("tools", herramientas)
+    # registry.add("tools", herramientas)
-    registry.add("math", math)
+    # registry.add("math", math)
-
+    registry.add("files", archivos)
    # --- INICIALIZACIÓN DEL AGENTE ---
@@ -98,7 +133,7 @@ async def main():
        ],
        max_iterations=0,
-        # memoria=memoria,
+        memoria=memoria,
        mcp=registry  # ← ✅ Integración del cliente MCP
    )
@@ -133,6 +168,3 @@ async def main():
 if __name__ == "__main__":
    asyncio.run(main())
@@ -1,29 +1,29 @@
 import asyncio
 from src.Llms.MCPs.McpServer import MCPServerRunner
 async def main():
    venv_python = r"E:\Fitz_Studio\.venv\Scripts\python.exe"
    runner_math = MCPServerRunner(
        r"E:\Fitz_Studio\src\Llms\MCPs\McpServers\server_math.py",
        python_path=venv_python
    )
    runner_tools = MCPServerRunner(
        r"E:\Fitz_Studio\src\Llms\MCPs\McpServers\server_utils.py",
        python_path=venv_python
    )
    await runner_math.start()
    await runner_tools.start()
-async def test_registry(registry: ClientRegistry):
+    try:
-    tools = await registry.listar_tools_por_cliente()
+        while True:
-    prompts = await registry.listar_prompts_por_cliente()
+            await asyncio.sleep(1)
-    resources = await registry.listar_resources_por_cliente()
+    except KeyboardInterrupt:
        print("\n⛔ Terminando servidores...")
-    print("\n🔧 Herramientas:", tools)
+    await runner_math.stop()
    await runner_tools.stop()
-
+if __name__ == "__main__":
-    print("\n📋 Prompts:", prompts)
+    asyncio.run(main())
    print("\n📂 Resources:", resources)
 asyncio.run(test_registry(registry))
 async def test_wrapper():
        # 2. Llamar a una herramienta de prueba
        result = await herramientas.call_tool("generate_uuid")
        print("\n🆔 UUID generado:", result[0].text)  # Accedemos al contenido directamente
 # asyncio.run(test_wrapper())
@@ -0,0 +1 @@
 Érase una vez un pequeño ratón que vivía en un bosque mágico. Un día, encontró una pequeña llave dorada...
@@ -0,0 +1 @@
 Había una vez un pequeño conejo que soñaba con saltar más alto que las nubes. Un día, encontró unas botas mágicas...
@@ -0,0 +1 @@
 En un bosque encantado vivía una pequeña hada que siempre ayudaba a los animales a encontrar su camino de regreso a casa...
@@ -0,0 +1 @@
 Había una vez un osito que quería aprender a tocar la flauta mágica para alegrar a los habitantes del bosque...
@@ -0,0 +1 @@
 En una colina lejana, vivía un conejo que podía correr tan rápido como el viento. Un día, decidió participar en la gran carrera del bosque...
@@ -0,0 +1,4 @@
 # main.py
 if __name__ == '__main__':
    print('Bienvenido al sistema ERP')
@@ -0,0 +1,4 @@
 # test_sample.py
 def test_placeholder():
    assert True
@@ -114,7 +114,6 @@ class AgenteAI:
            - **Puedes pensar y decidir con texto normal**, pero:
            - El **bloque MCP debe ser lo último** que aparece en tu mensaje.
            - **NO escribas nada después del bloque MCP.**
            - **NO pongas `<END>` justo después de usar MCP.**
            - Solo usa `<END>` cuando:
            - hayas terminado completamente la tarea del usuario,
            - e interpretado la salida de las herramientas que usaste.
@@ -124,21 +123,6 @@ class AgenteAI:
            ---
            ### ⚠️ Ejemplos comunes de errores y cómo evitarlos:
            ❌ Incorrecto:
            ```
            {  
            "server": "tools",  
            "tool": "generate_uuid",  
            "input": {}  
            }
            ````
            🔴 Este bloque no funcionará. Le falta indicar que es un bloque MCP.
            ✅ Correcto:
            ```mcp
            {
@@ -296,6 +280,52 @@ class AgenteAI:
 ### Ejecutar VARIOS bloques MCP
    async def ejecutar_multiples_bloques_mcp(self, respuesta: str) -> Optional[List[str]]:
        logger.info("Buscando múltiples bloques MCP en la respuesta.")
        patron = r"```mcp\s*(\{.*?\})\s*```"
        matches = re.finditer(patron, respuesta, re.DOTALL)
        resultados = []
        hubo_bloques = False
        for match in matches:
            hubo_bloques = True
            bloque_json_str = match.group(1)
            try:
                bloque = json.loads(bloque_json_str)
                server_name = bloque["server"]
                tool_name = bloque["tool"]
                input_args = bloque.get("input", {})
                logger.info(f"Ejecutando bloque MCP: servidor={server_name}, herramienta={tool_name}")
                try:
                    cliente_mcp = self.mcp.get(server_name)
                except KeyError:
                    msg = f"No se encontró el cliente MCP para el servidor '{server_name}'"
                    logger.warning(msg)
                    resultados.append(msg)
                    continue
                async with cliente_mcp:
                    resultado = await cliente_mcp.call_tool(tool_name, input_args)
                    resultado_str = f"[{server_name}.{tool_name}] → {resultado}"
                    resultados.append(resultado_str)
            except Exception as e:
                error_msg = f"Error al procesar bloque MCP: {str(e)}"
                logger.error(error_msg, exc_info=True)
                resultados.append(error_msg)
        if not hubo_bloques:
            logger.info("No se encontró ningún bloque MCP en la respuesta.")
            return None
        return resultados
@@ -376,14 +406,14 @@ class AgenteAI:
                # Revisar y ejecutar bloque MCP si existe
                resultado_mcp_anterior = None
                if "```mcp" in respuesta_anterior:
-                    resultado_mcp = await self.ejecutar_bloque_mcp(respuesta_anterior)
+                    resultados_mcp = await self.ejecutar_multiples_bloques_mcp(respuesta_anterior)
-                    if resultado_mcp:
+                    if resultados_mcp:
-                        resultado_mcp_anterior = resultado_mcp
+                        resultado_mcp_anterior = "\n".join(resultados_mcp)
                        if stream:
-                            yield "\n" + resultado_mcp
+                            yield "\n" + resultado_mcp_anterior
                        else:
-                            respuestas.append(resultado_mcp)
+                            respuestas.append(resultado_mcp_anterior)
                # Guardar historial si hay memoria
                if self.memoria:
@@ -9,6 +9,7 @@ from fastmcp.client.transports import (
 )
 from mcp.types import *
 from fastmcp.exceptions import ClientError
 import asyncio
 class MCPClient:
@@ -52,10 +53,13 @@ class MCPClient:
    # Delegación MCP
-    async def call_tool(
+    async def call_tool(self, name: str, arguments: dict[str, Any] | None = None) -> list[TextContent | ImageContent | EmbeddedResource]:
-        self, name: str, arguments: dict[str, Any] | None = None
+        try:
-    ) -> list[TextContent | ImageContent | EmbeddedResource]:
+            return await asyncio.wait_for(
-        return await self.client.call_tool(name, arguments)
+                self.client.call_tool(name, arguments), timeout=10
            )
        except asyncio.TimeoutError:
            raise RuntimeError(f"Timeout al ejecutar herramienta '{name}'")
    async def get_prompt(
        self, name: str, arguments: dict[str, str] | None = None
@@ -0,0 +1,48 @@
 # server_runner.py
 import subprocess
 import asyncio
 import socket
 import re
 from pathlib import Path
 async def wait_for_port(host: str, port: int, timeout: float = 10.0):
    for _ in range(int(timeout * 10)):
        try:
            with socket.create_connection((host, port), timeout=0.5):
                return True
        except (OSError, ConnectionRefusedError):
            await asyncio.sleep(0.1)
    raise TimeoutError(f"No se pudo conectar al servidor en {host}:{port}")
 class MCPServerRunner:
    def __init__(self, server_script_path: str, python_path: str = "python"):
        self.server_script_path = server_script_path
        self.python_path = python_path
        self.port: int = self._extraer_puerto()
        self.process: subprocess.Popen | None = None
    def _extraer_puerto(self) -> int:
        contenido = Path(self.server_script_path).read_text(encoding="utf-8")
        coincidencias = re.findall(r"port\s*=\s*(\d+)", contenido)
        if not coincidencias:
            raise ValueError(f"No se pudo detectar el puerto en {self.server_script_path}")
        return int(coincidencias[0])
    async def start(self):
        if self.process is None or self.process.poll() is not None:
            self.process = subprocess.Popen(
                [self.python_path, self.server_script_path],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE
            )
            await wait_for_port("127.0.0.1", self.port)
            print(f"🟢 Servidor MCP iniciado en puerto {self.port}")
    async def stop(self):
        if self.process and self.process.poll() is None:
            self.process.terminate()
            try:
                self.process.wait(timeout=5)
            except subprocess.TimeoutExpired:
                self.process.kill()
            print("🔴 Servidor MCP detenido")
@@ -0,0 +1,133 @@
 from fastmcp import FastMCP
 from pathlib import Path
 import shutil
 from datetime import datetime
 # Directorio base seguro
 SANDBOX_DIR = Path("./sandbox").resolve()
 SANDBOX_DIR.mkdir(parents=True, exist_ok=True)
 def safe_path(requested_path: str) -> Path:
    """Siempre interpreta la ruta como relativa al SANDBOX_DIR, incluso si empieza con '/'."""
    # Normaliza la ruta quitando el primer '/'
    normalized = requested_path.strip().lstrip("/")
    full_path = (SANDBOX_DIR / normalized).resolve()
    if not full_path.is_relative_to(SANDBOX_DIR):
        raise ValueError("Ruta fuera del directorio permitido.")
    return full_path
 mcp = FastMCP()
@mcp.tool(description="Lee y devuelve el contenido de un archivo de texto ubicado en el sistema de archivos seguro. El archivo debe estar dentro del sandbox.")
 def read_file(path: str) -> str:
    try:
        file_path = safe_path(path)
        if not file_path.is_file():
            raise FileNotFoundError(f"Archivo '{path}' no encontrado.")
        return file_path.read_text(encoding="utf-8")
    except Exception as e:
        return f"⚠️ Error al leer archivo '{path}': {str(e)}"
@mcp.tool(description="Escribe contenido de texto en un archivo dentro del sandbox. Si el archivo ya existe, será sobrescrito.")
 def write_file(path: str, content: str) -> str:
    file_path = safe_path(path)
    file_path.parent.mkdir(parents=True, exist_ok=True)
    file_path.write_text(content, encoding="utf-8")
    return "Archivo guardado correctamente."
@mcp.tool(description="Elimina de forma segura un archivo ubicado dentro del sandbox.")
 def delete_file(path: str) -> str:
    file_path = safe_path(path)
    if not file_path.is_file():
        raise FileNotFoundError("Archivo no encontrado.")
    file_path.unlink()
    return "Archivo eliminado."
@mcp.tool(description="Crea una carpeta (y sus carpetas padre si es necesario) dentro del sandbox.")
 def create_folder(path: str) -> str:
    folder_path = safe_path(path)
    folder_path.mkdir(parents=True, exist_ok=True)
    return "Carpeta creada."
@mcp.tool(description="Lista archivos y carpetas dentro de una ruta del sandbox.")
 def list_directory(path: str = ".") -> list[str]:
    folder = safe_path(path)
    if not folder.is_dir():
        raise NotADirectoryError("Ruta no corresponde a una carpeta.")
    return sorted(str(p.relative_to(SANDBOX_DIR)) for p in folder.iterdir())
@mcp.tool(description="Muestra la estructura de carpetas y archivos como un árbol, desde una ruta dentro del sandbox.")
 def tree(path: str = ".", depth: int = 3) -> str:
    base = safe_path(path)
    if not base.is_dir():
        raise NotADirectoryError("Ruta no corresponde a una carpeta.")
    tree_output = []
    def walk(dir_path: Path, prefix: str = "", level: int = 0):
        if level > depth:
            return
        entries = sorted(dir_path.iterdir())
        for i, entry in enumerate(entries):
            connector = "└── " if i == len(entries) - 1 else "├── "
            tree_output.append(f"{prefix}{connector}{entry.name}")
            if entry.is_dir():
                extension = "    " if i == len(entries) - 1 else "│   "
                walk(entry, prefix + extension, level + 1)
    tree_output.append(f"{base.name}/")
    walk(base)
    return "\n".join(tree_output)
@mcp.tool(description="Devuelve información detallada sobre un archivo: tamaño en bytes, fecha de modificación y tipo.")
 def file_info(path: str) -> dict:
    fpath = safe_path(path)
    if not fpath.exists():
        raise FileNotFoundError("Archivo no encontrado.")
    return {
        "nombre": fpath.name,
        "tipo": "carpeta" if fpath.is_dir() else "archivo",
        "tamaño_bytes": fpath.stat().st_size,
        "última_modificación": datetime.fromtimestamp(fpath.stat().st_mtime).isoformat(),
        "relativo": str(fpath.relative_to(SANDBOX_DIR))
    }
@mcp.tool(description="Copia un archivo o carpeta dentro del sandbox a otra ruta.")
 def copy_file(src: str, dest: str) -> str:
    src_path = safe_path(src)
    dest_path = safe_path(dest)
    if src_path.is_dir():
        shutil.copytree(src_path, dest_path, dirs_exist_ok=True)
    else:
        dest_path.parent.mkdir(parents=True, exist_ok=True)
        shutil.copy2(src_path, dest_path)
    return "Copia completada."
@mcp.tool(description="Mueve o renombra un archivo o carpeta dentro del sandbox.")
 def move_file(src: str, dest: str) -> str:
    src_path = safe_path(src)
    dest_path = safe_path(dest)
    dest_path.parent.mkdir(parents=True, exist_ok=True)
    src_path.rename(dest_path)
    return "Movimiento completado."
@mcp.tool(description="Elimina todos los archivos y subcarpetas dentro de una carpeta del sandbox.")
 def clear_folder(path: str) -> str:
    folder_path = safe_path(path)
    if not folder_path.is_dir():
        raise NotADirectoryError("La ruta no es una carpeta.")
    for item in folder_path.iterdir():
        if item.is_file() or item.is_symlink():
            item.unlink()
        elif item.is_dir():
            shutil.rmtree(item)
    return "Carpeta vaciada."
 if __name__ == "__main__":
    mcp.run(transport="streamable-http", host="127.0.0.1", port=4201, path="/fs")
@@ -0,0 +1,116 @@
 from typing import TYPE_CHECKING, Optional
 import random
 import asyncio
 if TYPE_CHECKING:
    from src.ScrappingWeb.Tab import Tab
 class ElementoWeb:
    def __init__(self, tab: "Tab", object_id: str):
        self.tab = tab
        self.object_id = object_id
    async def scroll_into_view(self):
        try:
            await self.tab._enviar("Runtime.callFunctionOn", {
                "objectId": self.object_id,
                "functionDeclaration": "function() { this.scrollIntoView({block: 'center'}); }",
                "awaitPromise": True
            })
            print("📜 Elemento desplazado a la vista.")
        except Exception as e:
            print(f"⚠️ Error al hacer scroll hacia el elemento: {e}")
    @classmethod
    def from_node(cls, tab: "Tab", node_id: int) -> "ElementoWeb":
        # Creamos un objectId a partir del nodeId usando DOM.resolveNode
        cls._node_id = node_id
        cls._resolved_object_id = None  # Lazy resolution opcional
        return cls(tab, object_id=None)
    async def click(self):
        try:
            await self.scroll_into_view()
            # Resolver objectId si es necesario
            if not self.object_id and hasattr(self, "_node_id"):
                resolved = await self.tab._enviar("DOM.resolveNode", {"nodeId": self._node_id})
                self.object_id = resolved["object"]["objectId"]
            if not self.object_id:
                raise ValueError("No se puede obtener objectId del elemento para hacer click.")
            # Obtener nodeId
            node_result = await self.tab._enviar("DOM.describeNode", {
                "objectId": self.object_id
            })
            node_id = node_result["node"]["nodeId"]
            # Obtener coordenadas con fallback
            try:
                box_model = await self.tab._enviar("DOM.getBoxModel", {"nodeId": node_id})
                content = box_model["model"]["content"]
                x = (content[0] + content[2]) / 2
                y = (content[1] + content[5]) / 2
            except:
                quads_result = await self.tab._enviar("DOM.getContentQuads", {"nodeId": node_id})
                quad = quads_result["quads"][0]
                x = (quad[0] + quad[4]) / 2
                y = (quad[1] + quad[5]) / 2
            # Simular movimiento humano del mouse
            start_x, start_y = x + random.uniform(-100, 100), y + random.uniform(-100, 100)
            steps = random.randint(5, 12)
            for i in range(1, steps + 1):
                curr_x = start_x + (x - start_x) * i / steps + random.uniform(-1, 1)
                curr_y = start_y + (y - start_y) * i / steps + random.uniform(-1, 1)
                await self.tab._enviar("Input.dispatchMouseEvent", {
                    "type": "mouseMoved",
                    "x": curr_x,
                    "y": curr_y,
                })
                await asyncio.sleep(random.uniform(0.01, 0.05))
            # Click humano
            await self.tab._enviar("Input.dispatchMouseEvent", {
                "type": "mousePressed",
                "x": x,
                "y": y,
                "button": "left",
                "clickCount": 1
            })
            await asyncio.sleep(random.uniform(0.05, 0.15))
            await self.tab._enviar("Input.dispatchMouseEvent", {
                "type": "mouseReleased",
                "x": x,
                "y": y,
                "button": "left",
                "clickCount": 1
            })
            print(f"🖱️ Click humano simulado en ({x:.1f}, {y:.1f})")
        except Exception as e:
            print(f"⚠️ Error al hacer click físico: {e}")
            print("🧪 Intentando fallback con JavaScript click()...")
            await self.click_js()
    async def click_js(self):
        try:
            await self.tab._enviar("Runtime.callFunctionOn", {
                "objectId": self.object_id,
                "functionDeclaration": "function() { this.click(); }",
                "awaitPromise": True
            })
            print("🖱️ Click simulado por JavaScript (element.click())")
        except Exception as e:
            print(f"⚠️ Error al ejecutar click en JS: {e}")
    async def obtener_texto(self) -> Optional[str]:
        return await self.tab.evaluar_js(f'document.getElementById("{self.object_id}").textContent')
    async def escribir_texto(self, texto: str):
        await self.tab.evaluar_js(f'document.getElementById("{self.object_id}").value = "{texto}"')
@@ -0,0 +1,193 @@
 import asyncio
 import os
 import signal
 import subprocess
 import json
 from typing import Optional
 import aiohttp
 class Navegador:
    def __init__(self,
                 chrome_path: str,
                 user_data_dir: str,
                 id: Optional[int] = None,
                 download_dir: Optional[str] = None,
                 debugging_port: int = 9222,
                 headless: bool = False,
                 user_agent: Optional[str] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"):
        self.chrome_path = chrome_path
        self.user_data_dir = user_data_dir
        self.id = id
        self.download_dir = download_dir or os.path.join(self.user_data_dir, "downloads")
        self.debugging_port = debugging_port
        self.headless = headless
        self.user_agent = user_agent
        self.chrome_process: Optional[subprocess.Popen] = None
    async def _esperar_debugger(self, timeout=10):
        url = f"http://127.0.0.1:{self.debugging_port}/json"
        for _ in range(timeout * 10):  # 10 intentos por segundo
            try:
                async with aiohttp.ClientSession() as session:
                    async with session.get(url) as resp:
                        if resp.status == 200:
                            print("✅ Chrome listo para debugging.")
                            return
            except Exception:
                pass
            await asyncio.sleep(0.1)
        raise RuntimeError("❌ Chrome no respondió en el puerto de debugging.")
    def _preconfigurar_preferencias(self):
        prefs_path = os.path.join(self.user_data_dir, "Default", "Preferences")
        os.makedirs(os.path.dirname(prefs_path), exist_ok=True)
        os.makedirs(self.download_dir, exist_ok=True)
        prefs = {
            "profile": {
                "exit_type": "Normal",
                "exited_cleanly": True
            },
            "browser": {
                "has_seen_welcome_page": True
            },
            "distribution": {
                "skip_first_run_ui": True
            },
            "download": {
                "default_directory": self.download_dir,
                "prompt_for_download": False,
                "directory_upgrade": True,
                "extensions_to_open": ""
            },
            "savefile": {
                "default_directory": self.download_dir
            }
        }
        if os.path.exists(prefs_path):
            try:
                with open(prefs_path, "r", encoding="utf-8") as f:
                    existing = json.load(f)
                existing.update(prefs)
                prefs = existing
            except Exception:
                pass
        with open(prefs_path, "w", encoding="utf-8") as f:
            json.dump(prefs, f, indent=2)
    def _build_args(self):
        os.makedirs(self.user_data_dir, exist_ok=True)
        self._preconfigurar_preferencias()
        args = [
            f"--remote-debugging-port={self.debugging_port}",
            f"--user-data-dir={self.user_data_dir}",
            "--disable-blink-features=AutomationControlled",
            "--no-sandbox",
            "--disable-web-security",
            # "--disable-extensions",
            "--disable-dev-shm-usage",
            "--disable-infobars",
            "--disable-popup-blocking",
            "--disable-default-apps",
            "--mute-audio",
            "--window-size=1024,1024",
            "--no-first-run",
            "--no-default-browser-check",
            "--disable-features=DefaultBrowserPrompt",
            "--disable-component-update",
            "--disable-background-networking",
            "--disable-sync",
            "--disable-translate",
            "--disable-background-timer-throttling",
            "--disable-client-side-phishing-detection",
            "--disable-component-extensions-with-background-pages",
            "--metrics-recording-only",
            "--safebrowsing-disable-auto-update",
        ]
        if self.headless:
            args.append("--headless=new")
        if self.user_agent:
            args.append(f"--user-agent={self.user_agent}")
        return args
    async def inyectar_spoof_chrome(self):
        script = """
        window.chrome = {
            app: {
                isInstalled: false,
                InstallState: {
                    DISABLED: 'disabled',
                    INSTALLED: 'installed',
                    NOT_INSTALLED: 'not_installed'
                },
                RunningState: {
                    CANNOT_RUN: 'cannot_run',
                    READY_TO_RUN: 'ready_to_run',
                    RUNNING: 'running'
                }
            },
            runtime: {
                PlatformOs: { MAC: 'mac', WIN: 'win', ANDROID: 'android', CROS: 'cros', LINUX: 'linux', OPENBSD: 'openbsd' },
                PlatformArch: { ARM: 'arm', X86_32: 'x86-32', X86_64: 'x86-64' },
                PlatformNaclArch: { ARM: 'arm', X86_32: 'x86-32', X86_64: 'x86-64' },
                RequestUpdateCheckStatus: { THROTTLED: 'throttled', NO_UPDATE: 'no_update', UPDATE_AVAILABLE: 'update_available' },
                OnInstalledReason: { INSTALL: 'install', UPDATE: 'update', CHROME_UPDATE: 'chrome_update', SHARED_MODULE_UPDATE: 'shared_module_update' },
                OnRestartRequiredReason: { APP_UPDATE: 'app_update', OS_UPDATE: 'os_update', PERIODIC: 'periodic' }
            }
        };
        """
        url = f"http://127.0.0.1:{self.debugging_port}/json"
        async with aiohttp.ClientSession() as session:
            async with session.get(url) as resp:
                targets = await resp.json()
            for target in targets:
                if "webSocketDebuggerUrl" not in target:
                    continue
                target_id = target["id"]
                async with session.post(
                    f"http://127.0.0.1:{self.debugging_port}/json/protocol",
                    json={"targetId": target_id}
                ):
                    pass  # CDP protocol fetch optional
                async with session.post(
                    f"http://127.0.0.1:{self.debugging_port}/json/send",
                    json={
                        "id": 1,
                        "method": "Page.addScriptToEvaluateOnNewDocument",
                        "params": {"source": script}
                    }
                ) as inject_resp:
                    if inject_resp.status == 200:
                        print("✅ chrome.* spoof inyectado.")
    async def iniciar(self):
        args = self._build_args()
        self.chrome_process = subprocess.Popen([self.chrome_path] + args)
        print(f"Chrome iniciado (headless={self.headless}). Esperando disponibilidad del debugger...")
        await self._esperar_debugger()
        await self.inyectar_spoof_chrome()
    async def cerrar(self):
        if self.chrome_process and self.chrome_process.poll() is None:
            self.chrome_process.terminate()
            try:
                await asyncio.wait_for(asyncio.to_thread(self.chrome_process.wait), timeout=5)
            except asyncio.TimeoutError:
                self.chrome_process.kill()
            print("🛑 Chrome cerrado correctamente.")
@@ -0,0 +1,69 @@
 import aiohttp
 import websockets
 import json
 import asyncio
 from src.ScrappingWeb.Tab import Tab
 class Scrapper:
    def __init__(self, debugging_url: str = "http://127.0.0.1:9222"):
        self.debugging_url = debugging_url
        self.tabs: list[Tab] = []
    async def _crear_tab_websocket_url(self, target_url: str = "about:blank") -> str:
        """
        Crea una nueva pestaña usando el método oficial Target.createTarget
        y devuelve su WebSocketDebuggerUrl.
        """
        # 1. Obtener el WebSocket general del browser (root)
        async with aiohttp.ClientSession() as session:
            async with session.get(f"{self.debugging_url}/json/version") as resp:
                if resp.status != 200:
                    raise RuntimeError("No se pudo obtener información del navegador")
                data = await resp.json()
                browser_ws_url = data["webSocketDebuggerUrl"]
        # 2. Conectarse al WebSocket del browser
        async with websockets.connect(browser_ws_url) as websocket:
            # 3. Enviar comando para crear target
            msg_id = 1
            await websocket.send(json.dumps({
                "id": msg_id,
                "method": "Target.createTarget",
                "params": {
                    "url": target_url,
                    "newWindow": False
                }
            }))
            # 4. Esperar respuesta con el targetId
            while True:
                respuesta = await websocket.recv()
                data = json.loads(respuesta)
                if data.get("id") == msg_id:
                    target_id = data["result"]["targetId"]
                    break
        # 5. Esperar a que el target aparezca en /json
        for _ in range(30):  # máximo ~3 segundos
            await asyncio.sleep(0.1)
            async with aiohttp.ClientSession() as session:
                async with session.get(f"{self.debugging_url}/json") as resp:
                    if resp.status == 200:
                        tabs = await resp.json()
                        for tab in tabs:
                            if tab.get("id") == target_id:
                                return tab["webSocketDebuggerUrl"]
        raise RuntimeError("No se pudo obtener el WebSocket de la nueva pestaña")
    async def nueva_tab(self, url: str, wait_time: float = 5.0) -> Tab:
        websocket_url = await self._crear_tab_websocket_url()
        tab = await Tab.crear_desde_websocket(websocket_url)
        self.tabs.append(tab)
        await tab.navegar(url, wait_time)
        return tab
    async def cerrar_todos(self):
        for tab in list(self.tabs):
            await tab.cerrar()
        self.tabs.clear()
@@ -0,0 +1,164 @@
 import asyncio
 import json
 import base64
 import websockets
 from typing import Optional
 from typing import List
 from src.ScrappingWeb.ElementoWeb import ElementoWeb 
 class Tab:
    def __init__(self, websocket: websockets.WebSocketClientProtocol, ws_url: str):
        self.websocket = websocket
        self.ws_url = ws_url
        self._message_id = 0
        self._pending = {}
        self._load_event = asyncio.Event()
    @classmethod
    async def crear_desde_websocket(cls, ws_url: str) -> "Tab":
        websocket = await websockets.connect(ws_url)
        tab = cls(websocket, ws_url)
        asyncio.create_task(tab._recibir_eventos())
        await tab._enviar("Page.enable")
        await tab._enviar("Network.enable")
        return tab
    async def _recibir_eventos(self):
        async for mensaje in self.websocket:
            data = json.loads(mensaje)
            if "id" in data and data["id"] in self._pending:
                future = self._pending.pop(data["id"])
                future.set_result(data.get("result"))
            elif data.get("method") == "Page.loadEventFired":
                self._load_event.set()
    async def _enviar(self, metodo: str, parametros: Optional[dict] = None) -> dict:
        self._message_id += 1
        msg_id = self._message_id
        mensaje = {
            "id": msg_id,
            "method": metodo,
            "params": parametros or {}
        }
        future = asyncio.get_event_loop().create_future()
        self._pending[msg_id] = future
        await self.websocket.send(json.dumps(mensaje))
        return await future
    async def navegar(self, url: str, wait_time: float = 5.0):
        self._load_event.clear()
        print(f"🌍 Navegando a: {url}")
        await self._enviar("Page.navigate", {"url": url})
        try:
            await asyncio.wait_for(self._load_event.wait(), timeout=wait_time)
            print("✅ Página cargada correctamente.")
        except asyncio.TimeoutError:
            print(f"⚠️ Tiempo de espera agotado ({wait_time}s) al cargar la página.")
    async def evaluar_js(self, js_code: str) -> Optional[str]:
        try:
            result = await self._enviar("Runtime.evaluate", {
                "expression": js_code,
                "returnByValue": True
            })
            return result["result"]["value"]
        except Exception as e:
            print(f"⚠️ Error al ejecutar JS: {e}")
            return None
    async def obtener_user_agent(self) -> Optional[str]:
        return await self.evaluar_js("navigator.userAgent")
    async def capturar_screenshot(self, output_path: str = "screenshot.png"):
        try:
            result = await self._enviar("Page.captureScreenshot")
            data = result["data"]
            with open(output_path, "wb") as f:
                f.write(base64.b64decode(data))
            print(f"📸 Screenshot guardado como {output_path}")
        except Exception as e:
            print(f"⚠️ Error al capturar screenshot: {e}")
    async def cerrar(self):
        try:
            await self.websocket.close()
            print("🛑 WebSocket cerrado.")
        except Exception as e:
            print(f"⚠️ Error al cerrar pestaña: {e}")
    async def obtener_html_completo(self) -> Optional[str]:
        """
        Devuelve el HTML completo de la página actual.
        """
        try:
            result = await self._enviar("Runtime.evaluate", {
                "expression": "document.documentElement.outerHTML",
                "returnByValue": True
            })
            html = result["result"]["value"]
            print("📄 HTML completo obtenido.")
            return html
        except Exception as e:
            print(f"⚠️ Error al obtener HTML: {e}")
            return None
    async def obtener_dominio(self) -> Optional[str]:
        """
        Devuelve el dominio (hostname) de la página actual, por ejemplo: 'example.com'.
        """
        try:
            dominio = await self.evaluar_js("window.location.hostname")
            print(f"🌐 Dominio actual: {dominio}")
            return dominio
        except Exception as e:
            print(f"⚠️ Error al obtener dominio: {e}")
            return None
    async def get_element_by_selector_node(self, selector: str) -> Optional["ElementoWeb"]:
        try:
            # Obtener nodo raíz del documento
            doc = await self._enviar("DOM.getDocument")
            root_node_id = doc["root"]["nodeId"]
            # Buscar el nodo desde el DOM (más confiable que Runtime.evaluate)
            result = await self._enviar("DOM.querySelector", {
                "nodeId": root_node_id,
                "selector": selector
            })
            node_id = result["nodeId"]
            if not node_id:
                print(f"⚠️ Nodo no encontrado con selector: {selector}")
                return None
            return ElementoWeb.from_node(self, node_id=node_id)
        except Exception as e:
            print(f"⚠️ Error al buscar nodo desde DOM.querySelector: {e}")
            return None
    async def get_elements_by_css_selector(self, selector: str) -> List["ElementoWeb"]:
        try:
            result = await self._enviar("Runtime.evaluate", {
                "expression": f'Array.from(document.querySelectorAll("{selector}"))',
                "objectGroup": "grupo_elementos_css",
                "includeCommandLineAPI": True,
                "returnByValue": False
            })
            array_id = result["result"]["objectId"]
            props = await self._enviar("Runtime.getProperties", {
                "objectId": array_id,
                "ownProperties": True
            })
            elementos = []
            for prop in props["result"]:
                if "value" in prop and "objectId" in prop["value"]:
                    elementos.append(ElementoWeb(self, prop["value"]["objectId"]))
            print(f"🔍 Se encontraron {len(elementos)} elementos con el selector CSS '{selector}'.")
            return elementos
        except Exception as e:
            print(f"⚠️ Error al buscar elementos por selector CSS '{selector}': {e}")
            return []
		`@@ -0,0 +1 @@`
							`Érase una vez un pequeño ratón que vivía en un bosque mágico. Un día, encontró una pequeña llave dorada...`
		`@@ -0,0 +1 @@`
							`Había una vez un pequeño conejo que soñaba con saltar más alto que las nubes. Un día, encontró unas botas mágicas...`
		`@@ -0,0 +1 @@`
							`En un bosque encantado vivía una pequeña hada que siempre ayudaba a los animales a encontrar su camino de regreso a casa...`
		`@@ -0,0 +1 @@`
							`Había una vez un osito que quería aprender a tocar la flauta mágica para alegrar a los habitantes del bosque...`
		`@@ -0,0 +1 @@`
							`En una colina lejana, vivía un conejo que podía correr tan rápido como el viento. Un día, decidió participar en la gran carrera del bosque...`