diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md index 028934d..a72df08 100644 --- a/.claude/CLAUDE.md +++ b/.claude/CLAUDE.md @@ -61,8 +61,26 @@ cmd/launcher/ entrypoint principal (rulesRegistry) cmd/agentctl/ CLI de gestion dev-scripts/server/ start, stop, restart, ps, logs, dashboard dev-scripts/agent/ new, register, verify, avatar, remove, list +dev-scripts/e2e/ install, run — E2E tests con Playwright +e2e/ proyecto Node.js con Playwright (tests, fixtures, Element Web) ``` +## E2E Tests + +Tests end-to-end con Playwright contra Element Web + homeserver real. Proyecto Node.js separado en `e2e/`. + +```bash +./dev-scripts/e2e/install.sh # instalar dependencias +cp e2e/.env.example e2e/.env # configurar credenciales +./dev-scripts/e2e/run.sh # ejecutar tests (headless) +./dev-scripts/e2e/run.sh --headed # con browser visible +``` + +- **Fixtures**: `e2e/fixtures/` — login E2EE (`element-auth.ts`), helpers de room (`matrix-room.ts`) +- **Tests**: `e2e/tests/` — login, assistant-bot, asistente-2 +- **Assertions flexibles** para respuestas LLM (no-deterministicas), estrictas para commands (`!help`, `!ping`) +- Documentacion completa: `e2e/README.md` + ## Reglas operativas Guias detalladas en `.claude/rules/index.md`: diff --git a/.gitignore b/.gitignore index 4752434..d15a3aa 100644 --- a/.gitignore +++ b/.gitignore @@ -18,4 +18,5 @@ e2e/node_modules/ e2e/test-results/ e2e/.auth/ e2e/.env -e2e/element-web/ \ No newline at end of file +e2e/element-web/ +e2e/playwright-report/ \ No newline at end of file diff --git a/dev-scripts/e2e/run.sh b/dev-scripts/e2e/run.sh index 0a056c6..82ef11a 100755 --- a/dev-scripts/e2e/run.sh +++ b/dev-scripts/e2e/run.sh @@ -1,19 +1,42 @@ #!/usr/bin/env bash # run.sh — ejecutar E2E tests con Playwright +# +# Uso: +# ./dev-scripts/e2e/run.sh # headless (default) +# ./dev-scripts/e2e/run.sh --headed # con browser visible (requiere DISPLAY) +# set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" E2E_DIR="$REPO_ROOT/e2e" +ELEMENT_SCRIPT="$E2E_DIR/scripts/setup-element.sh" +PS_SCRIPT="$REPO_ROOT/dev-scripts/server/ps.sh" -# Verificar dependencias instaladas +HEADED=false +EXTRA_ARGS=() + +for arg in "$@"; do + case "$arg" in + --headed) + HEADED=true + ;; + *) + EXTRA_ARGS+=("$arg") + ;; + esac +done + +# --- Verificaciones previas --- + +# 1. Verificar dependencias instaladas if [ ! -d "$E2E_DIR/node_modules" ]; then echo "ERROR: node_modules no encontrado. Ejecutar primero:" echo " ./dev-scripts/e2e/install.sh" exit 1 fi -# Verificar .env +# 2. Verificar .env if [ ! -f "$E2E_DIR/.env" ]; then echo "ERROR: e2e/.env no encontrado. Crear desde el template:" echo " cp e2e/.env.example e2e/.env" @@ -21,6 +44,91 @@ if [ ! -f "$E2E_DIR/.env" ]; then exit 1 fi -echo "Los tests E2E se agregan en el issue 0022c." -echo "Cuando esten listos, ejecutar:" -echo " cd $E2E_DIR && npx playwright test" +# 3. Verificar que los agentes estan corriendo +echo "=== Verificando agentes ===" +if [ -x "$PS_SCRIPT" ]; then + if ! "$PS_SCRIPT" 2>/dev/null | grep -q "running"; then + echo "WARN: el launcher no parece estar corriendo." + echo " Iniciar con: ./dev-scripts/server/start.sh" + echo " Continuando de todas formas..." + else + echo "Launcher corriendo OK" + fi +else + echo "WARN: no se encontro ps.sh, no se puede verificar el estado de los agentes" +fi + +# --- Element Web --- + +echo "" +echo "=== Element Web ===" +ELEMENT_STARTED_BY_US=false + +if [ -x "$ELEMENT_SCRIPT" ]; then + if "$ELEMENT_SCRIPT" status 2>/dev/null | grep -q "corriendo\|running\|listening"; then + echo "Element Web ya esta corriendo" + else + echo "Levantando Element Web..." + "$ELEMENT_SCRIPT" start + ELEMENT_STARTED_BY_US=true + # Esperar a que el servidor este listo + sleep 2 + fi +else + echo "WARN: setup-element.sh no encontrado. Asegurarse de que Element Web esta corriendo." +fi + +# --- Ejecutar tests --- + +echo "" +echo "=== Ejecutando E2E tests ===" + +PLAYWRIGHT_ARGS=() +if [ "$HEADED" = true ]; then + if [ -z "${DISPLAY:-}" ] && [ -z "${WAYLAND_DISPLAY:-}" ]; then + echo "WARN: --headed solicitado pero no se detecta DISPLAY. Ejecutando headless." + else + PLAYWRIGHT_ARGS+=("--headed") + fi +fi + +# Agregar argumentos extra del usuario +if [ ${#EXTRA_ARGS[@]} -gt 0 ]; then + PLAYWRIGHT_ARGS+=("${EXTRA_ARGS[@]}") +fi + +EXIT_CODE=0 +cd "$E2E_DIR" +npx playwright test "${PLAYWRIGHT_ARGS[@]}" || EXIT_CODE=$? + +# Generar reporte HTML si hay fallos +if [ "$EXIT_CODE" -ne 0 ]; then + echo "" + echo "=== Generando reporte HTML ===" + npx playwright show-report --host 0.0.0.0 --port 0 2>/dev/null & + REPORT_PID=$! + sleep 1 + kill "$REPORT_PID" 2>/dev/null || true + echo "Reporte disponible en: $E2E_DIR/playwright-report/" + echo " Para verlo: cd e2e && npx playwright show-report" +fi + +# --- Teardown --- + +if [ "$ELEMENT_STARTED_BY_US" = true ]; then + echo "" + echo "=== Deteniendo Element Web ===" + "$ELEMENT_SCRIPT" stop 2>/dev/null || true +fi + +# --- Resultado --- + +echo "" +if [ "$EXIT_CODE" -eq 0 ]; then + echo "=== Todos los tests pasaron ===" +else + echo "=== Algunos tests fallaron (exit code: $EXIT_CODE) ===" + echo "Ver screenshots en: $E2E_DIR/test-results/" +fi + +exit "$EXIT_CODE" diff --git a/dev/issues/README.md b/dev/issues/README.md index f5ac23f..d8e290b 100644 --- a/dev/issues/README.md +++ b/dev/issues/README.md @@ -26,7 +26,7 @@ afectados y notas de implementacion. | 19 | Prompt injection hardening | [0019-prompt-injection-hardening.md](completed/0019-prompt-injection-hardening.md) | completado | | 20 | Aislar claude -p del repo | [0020-claude-code-sandbox.md](completed/0020-claude-code-sandbox.md) | completado | | 21 | Threads default config | (completado via branch) | completado | -| 22 | Tests E2E con Playwright | [0022-e2e-tests-playwright.md](0022-e2e-tests-playwright.md) | pendiente | +| 22 | Tests E2E con Playwright | [0022-e2e-tests-playwright.md](completed/0022-e2e-tests-playwright.md) | completado | | 22a | E2E: Infraestructura base | [0022a-e2e-infra.md](completed/0022a-e2e-infra.md) | completado | | 22b | E2E: Auth fixtures y helpers | [0022b-e2e-auth-helpers.md](completed/0022b-e2e-auth-helpers.md) | completado | -| 22c | E2E: Tests de agentes + docs | [0022c-e2e-agent-tests.md](0022c-e2e-agent-tests.md) | pendiente | +| 22c | E2E: Tests de agentes + docs | [0022c-e2e-agent-tests.md](completed/0022c-e2e-agent-tests.md) | completado | diff --git a/dev/issues/0022-e2e-tests-playwright.md b/dev/issues/completed/0022-e2e-tests-playwright.md similarity index 100% rename from dev/issues/0022-e2e-tests-playwright.md rename to dev/issues/completed/0022-e2e-tests-playwright.md diff --git a/dev/issues/0022c-e2e-agent-tests.md b/dev/issues/completed/0022c-e2e-agent-tests.md similarity index 100% rename from dev/issues/0022c-e2e-agent-tests.md rename to dev/issues/completed/0022c-e2e-agent-tests.md diff --git a/e2e/README.md b/e2e/README.md new file mode 100644 index 0000000..ad7b970 --- /dev/null +++ b/e2e/README.md @@ -0,0 +1,130 @@ +# E2E Tests — agents_and_robots + +Tests end-to-end con Playwright para verificar que los agentes Matrix responden correctamente via Element Web. + +## Requisitos + +- Node.js v18+ +- Agentes corriendo contra el homeserver (`./dev-scripts/server/start.sh`) +- Credenciales de un usuario de test en el homeserver + +## Instalacion + +```bash +./dev-scripts/e2e/install.sh +``` + +Esto instala dependencias npm y Chromium para Playwright. + +## Configuracion + +```bash +cp e2e/.env.example e2e/.env +``` + +Editar `e2e/.env` con las credenciales del usuario de test: + +| Variable | Descripcion | +|----------|-------------| +| `ELEMENT_URL` | URL de Element Web local (default: `http://localhost:8090`) | +| `MATRIX_HOMESERVER` | URL del homeserver Matrix | +| `MATRIX_USER` | MXID del usuario de test (`@user:server`) | +| `MATRIX_PASSWORD` | Password del usuario de test | +| `MATRIX_RECOVERY_KEY` | Recovery key para cross-signing/E2EE | + +## Ejecucion + +```bash +# Ejecutar todos los tests (headless) +./dev-scripts/e2e/run.sh + +# Con browser visible (requiere DISPLAY) +./dev-scripts/e2e/run.sh --headed + +# Ejecutar un spec especifico +./dev-scripts/e2e/run.sh assistant-bot + +# Directamente con Playwright +cd e2e && npx playwright test +cd e2e && npx playwright test --headed +cd e2e && npx playwright test assistant-bot.spec.ts +``` + +El script `run.sh` se encarga de: +1. Verificar que los agentes estan corriendo +2. Levantar Element Web si no esta activo +3. Ejecutar los tests +4. Generar reporte en caso de fallos +5. Teardown de Element Web (si lo levanto) + +## Estructura + +``` +e2e/ +├── package.json dependencias (Playwright, dotenv) +├── playwright.config.ts configuracion de Playwright +├── global-setup.ts login unico antes de todos los tests +├── .env.example template de credenciales +├── fixtures/ +│ ├── element-auth.ts login y verificacion E2EE +│ └── matrix-room.ts helpers: goToRoom, sendMessage, waitForBotReply +├── tests/ +│ ├── login.spec.ts smoke test: sesion y E2EE +│ ├── assistant-bot.spec.ts tests del assistant-bot +│ └── asistente-2.spec.ts tests del asistente-2 (con tools) +├── scripts/ +│ └── setup-element.sh descarga y sirve Element Web local +└── element-web/ Element Web descargado (gitignored) + +dev-scripts/e2e/ +├── install.sh instalacion de dependencias +└── run.sh orquestacion completa de tests +``` + +## Debug de fallos + +### Screenshots + +Cuando un test falla, Playwright captura screenshot automaticamente en `e2e/test-results/`. Revisarlos para entender el estado de la UI al momento del fallo. + +### Reporte HTML + +Si hay fallos, `run.sh` genera un reporte HTML: + +```bash +cd e2e && npx playwright show-report +``` + +### Modo headed + +Para ver el browser en tiempo real (requiere entorno grafico): + +```bash +./dev-scripts/e2e/run.sh --headed +``` + +### Traces + +En el primer retry, Playwright captura un trace completo. Verlo con: + +```bash +cd e2e && npx playwright show-trace test-results//trace.zip +``` + +### Login cacheado + +El global-setup cachea la sesion autenticada en `e2e/.auth/state.json` por 12 horas. Si hay problemas de autenticacion: + +```bash +rm -rf e2e/.auth/ +``` + +Y re-ejecutar los tests para forzar login fresco. + +## Notas de diseno + +- **Assertions flexibles para LLM**: las respuestas de los bots son no-deterministicas. Solo se verifica que responde, que no esta vacio, y longitud razonable. +- **Commands con assertions estrictas**: `!help` y `!ping` tienen respuestas deterministicas y se validan con mayor precision. +- **Tests secuenciales**: `fullyParallel: false` y `workers: 1` para evitar race conditions en el timeline de Matrix. +- **Timeouts generosos**: 60s por test, 30s para expect. Los LLMs pueden tardar 5-20s en responder. +- **Retry en CI**: 1 retry en CI para manejar timeouts ocasionales. diff --git a/e2e/tests/asistente-2.spec.ts b/e2e/tests/asistente-2.spec.ts new file mode 100644 index 0000000..e6bc093 --- /dev/null +++ b/e2e/tests/asistente-2.spec.ts @@ -0,0 +1,71 @@ +import { test, expect } from "@playwright/test"; +import { + goToRoom, + sendMessage, + waitForBotReply, + assertNoDecryptionErrors, +} from "../fixtures/matrix-room"; + +test.describe("asistente-2", () => { + test.beforeEach(async ({ page }) => { + await page.goto("/"); + + // Esperar a que la sesion este lista + await expect( + page.locator('[role="tree"][aria-label="Rooms"]') + ).toBeVisible({ timeout: 30_000 }); + + await goToRoom(page, "Asistente 2"); + }); + + test("responde a un saludo", async ({ page }) => { + await sendMessage(page, "Hola, que tal?"); + + const reply = await waitForBotReply(page, { + timeout: 60_000, + sender: "Asistente 2", + }); + expect(reply).toBeTruthy(); + expect(reply.length).toBeGreaterThan(10); + }); + + test("!tools muestra herramientas disponibles", async ({ page }) => { + await sendMessage(page, "!tools"); + + const reply = await waitForBotReply(page, { + timeout: 10_000, + sender: "Asistente 2", + }); + expect(reply).toBeTruthy(); + // asistente-2 tiene al menos current_time + expect(reply.toLowerCase()).toMatch(/current_time|hora|herramienta|tool/); + }); + + test("pregunta que activa tool use (que hora es?)", async ({ page }) => { + await sendMessage(page, "Que hora es ahora mismo?"); + + const reply = await waitForBotReply(page, { + timeout: 60_000, + sender: "Asistente 2", + }); + expect(reply).toBeTruthy(); + // La respuesta debe contener algo relacionado con tiempo/hora + expect(reply.length).toBeGreaterThan(5); + }); + + test("!help muestra comandos", async ({ page }) => { + await sendMessage(page, "!help"); + + const reply = await waitForBotReply(page, { + timeout: 10_000, + sender: "Asistente 2", + }); + expect(reply).toBeTruthy(); + expect(reply.toLowerCase()).toContain("help"); + expect(reply.toLowerCase()).toContain("ping"); + }); + + test("no hay errores de E2EE en el timeline", async ({ page }) => { + await assertNoDecryptionErrors(page); + }); +}); diff --git a/e2e/tests/assistant-bot.spec.ts b/e2e/tests/assistant-bot.spec.ts new file mode 100644 index 0000000..a75b73b --- /dev/null +++ b/e2e/tests/assistant-bot.spec.ts @@ -0,0 +1,68 @@ +import { test, expect } from "@playwright/test"; +import { + goToRoom, + sendMessage, + waitForBotReply, + assertNoDecryptionErrors, +} from "../fixtures/matrix-room"; + +test.describe("assistant-bot", () => { + test.beforeEach(async ({ page }) => { + await page.goto("/"); + + // Esperar a que la sesion este lista + await expect( + page.locator('[role="tree"][aria-label="Rooms"]') + ).toBeVisible({ timeout: 30_000 }); + + await goToRoom(page, "Assistant"); + }); + + test("responde a un saludo en DM", async ({ page }) => { + await sendMessage(page, "Hola, como estas?"); + + const reply = await waitForBotReply(page, { + timeout: 60_000, + sender: "Assistant", + }); + expect(reply).toBeTruthy(); + expect(reply.length).toBeGreaterThan(10); + }); + + test("responde a una pregunta con contenido coherente", async ({ page }) => { + await sendMessage(page, "Que es la fotosintesis? Responde en una frase."); + + const reply = await waitForBotReply(page, { + timeout: 60_000, + sender: "Assistant", + }); + expect(reply).toBeTruthy(); + expect(reply.length).toBeGreaterThan(10); + }); + + test("!help muestra lista de comandos", async ({ page }) => { + await sendMessage(page, "!help"); + + const reply = await waitForBotReply(page, { + timeout: 10_000, + sender: "Assistant", + }); + expect(reply).toBeTruthy(); + expect(reply.toLowerCase()).toContain("help"); + expect(reply.toLowerCase()).toContain("ping"); + }); + + test("!ping responde", async ({ page }) => { + await sendMessage(page, "!ping"); + + const reply = await waitForBotReply(page, { + timeout: 10_000, + sender: "Assistant", + }); + expect(reply).toBeTruthy(); + }); + + test("no hay errores de E2EE en el timeline", async ({ page }) => { + await assertNoDecryptionErrors(page); + }); +});