This repository has been archived on 2025-11-27. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
Fitz_Studio/scrappers/ejecucion_iterativa_navegador.py
T
egutierrez aef8791151 feat: Implement main application shell with navigation and color scheme toggle
- Added Appshell component with responsive navbar and main content area
- Integrated ColorSchemeToggle for light/dark mode switching
- Created Welcome component with styled title and introductory text
- Developed ChatPage for LLM interaction with WebSocket support
- Implemented Biblioteca for managing notes with rich text editor
- Added LoginPage for user authentication with error handling
- Introduced MessageList and MessageBubble components for chat messages
- Styled components with CSS modules for consistent design
2025-06-21 02:01:21 +02:00

88 lines
2.9 KiB
Python

import asyncio
import os
import pyperclip
import re
from domains.ScrappingWeb.Scrapper import Scrapper
def sanitizar(nombre: str) -> str:
return re.sub(r'[\\/*?:"<>|]', "_", nombre).strip()[:100]
OUTPUT_DIR = "esquemas_json"
os.makedirs(OUTPUT_DIR, exist_ok=True)
async def main():
ws_id = "F51AC05B27E1DEC4011E67369781596C"
ws_url = f"ws://127.0.0.1:9222/devtools/page/{ws_id}"
scrapper = Scrapper(debugging_url="http://127.0.0.1:9222")
print("🔌 Conectando a pestaña específica...")
tab = scrapper.get_tab(ws_url) or scrapper.get_tab(ws_id)
if not tab:
nuevas_tabs = await scrapper.obtener_tabs_existentes()
tab = next((t for t in nuevas_tabs if t.ws_url.rsplit("/", 1)[-1] == ws_id), None)
if not tab:
print("⚠️ La pestaña con ese ID no se encontró.")
return
elementos = await tab.get_elements_by_css_selector(
"#_0rif_bq-resource-tree > div.cfctest-tree-main.ng-tns-c3578326070-0 > ul > cfc-virtual-scroller > div > div.item-container > div > li"
)
for i, elemento in enumerate(elementos[:12]):
print(f"🖱️ Click #{i + 1}")
clickeable = await elemento.encontrar_hijo_clickeable()
if clickeable:
await clickeable.click()
else:
print(f"⚠️ No se encontró subelemento clickeable en #{i+1}")
continue
await asyncio.sleep(1)
texto_crudo = await elemento.obtener_texto()
nombre_archivo = sanitizar(texto_crudo or f"esquema_item_{i+1}")
print(f"📄 Nombre base del archivo: {nombre_archivo}.txt")
# ✅ Ejecutar JS en el navegador para simular flujo de copia
await tab.evaluar_js("""
(() => {
const boton = document.querySelector('button[id^="_0rif_bqui-table-copy-schema-btn"] span.mdc-button__label > span');
if (boton) boton.click();
})()
""")
await asyncio.sleep(1)
await tab.evaluar_js("""
(() => {
const overlays = document.querySelectorAll("div.cdk-overlay-pane");
for (let overlay of overlays) {
const items = overlay.querySelectorAll("cfc-menu-item .cfc-menu-item-label");
for (let item of items) {
if (item.textContent.includes("Copiar como JSON")) {
item.click();
break;
}
}
}
})()
""")
await asyncio.sleep(1.5)
try:
texto_json = pyperclip.paste()
file_path = os.path.join(OUTPUT_DIR, f"{nombre_archivo}.txt")
with open(file_path, "w", encoding="utf-8") as f:
f.write(texto_json)
print(f"✅ Guardado: {file_path}")
except Exception as e:
print(f"❌ Error al leer el portapapeles o guardar archivo: {e}")
if __name__ == "__main__":
asyncio.run(main())