From 8a5002e7a38c873659bf6a7e5e5c7219704b0397 Mon Sep 17 00:00:00 2001 From: Developer Date: Wed, 25 Mar 2026 00:48:23 +0100 Subject: [PATCH] feat: Actions API para acciones complejas MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implementa API para acciones avanzadas de mouse y teclado. Mouse actions: - Hover(), DoubleClick(), RightClick() - DragAndDrop() con animación suave - ScrollTo(), ScrollBy(), ScrollToElement() - MoveMouse() a coordenadas específicas Keyboard actions: - PressKey() con modificadores (Ctrl+C, Alt+F4) - HoldKey() y ReleaseKey() - SendKeys() para secuencias Usa CDP Input.dispatchMouseEvent y Input.dispatchKeyEvent. Archivo: pkg/browser/actions.go --- pkg/browser/actions.go | 348 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 348 insertions(+) create mode 100644 pkg/browser/actions.go diff --git a/pkg/browser/actions.go b/pkg/browser/actions.go new file mode 100644 index 0000000..6e9077a --- /dev/null +++ b/pkg/browser/actions.go @@ -0,0 +1,348 @@ +package browser + +import ( + "context" + "fmt" + "time" +) + +// Hover mueve el mouse sobre un elemento (sin hacer click) +func (b *Browser) Hover(ctx context.Context, selector string) error { + // Obtener posición del elemento + x, y, err := b.getElementCenter(ctx, selector) + if err != nil { + return fmt.Errorf("failed to get element position: %w", err) + } + + // Mover mouse al centro del elemento + if err := b.dispatchMouseEvent(ctx, "mouseMoved", x, y, "none", 0); err != nil { + return fmt.Errorf("failed to hover: %w", err) + } + + return nil +} + +// DoubleClick hace doble click en un elemento +func (b *Browser) DoubleClick(ctx context.Context, selector string) error { + // Obtener posición + x, y, err := b.getElementCenter(ctx, selector) + if err != nil { + return err + } + + // Primer click + if err := b.dispatchMouseEvent(ctx, "mousePressed", x, y, "left", 1); err != nil { + return err + } + if err := b.dispatchMouseEvent(ctx, "mouseReleased", x, y, "left", 1); err != nil { + return err + } + + // Pequeña pausa + time.Sleep(50 * time.Millisecond) + + // Segundo click (clickCount = 2) + if err := b.dispatchMouseEvent(ctx, "mousePressed", x, y, "left", 2); err != nil { + return err + } + if err := b.dispatchMouseEvent(ctx, "mouseReleased", x, y, "left", 2); err != nil { + return err + } + + return nil +} + +// RightClick hace click derecho en un elemento +func (b *Browser) RightClick(ctx context.Context, selector string) error { + // Obtener posición + x, y, err := b.getElementCenter(ctx, selector) + if err != nil { + return err + } + + // Click derecho + if err := b.dispatchMouseEvent(ctx, "mousePressed", x, y, "right", 1); err != nil { + return err + } + if err := b.dispatchMouseEvent(ctx, "mouseReleased", x, y, "right", 1); err != nil { + return err + } + + return nil +} + +// DragAndDrop arrastra un elemento y lo suelta en otro +func (b *Browser) DragAndDrop(ctx context.Context, sourceSelector, targetSelector string) error { + // Obtener posición de origen + sourceX, sourceY, err := b.getElementCenter(ctx, sourceSelector) + if err != nil { + return fmt.Errorf("source element not found: %w", err) + } + + // Obtener posición de destino + targetX, targetY, err := b.getElementCenter(ctx, targetSelector) + if err != nil { + return fmt.Errorf("target element not found: %w", err) + } + + // 1. Mover a elemento origen + if err := b.dispatchMouseEvent(ctx, "mouseMoved", sourceX, sourceY, "none", 0); err != nil { + return err + } + + // 2. Mouse down en origen + if err := b.dispatchMouseEvent(ctx, "mousePressed", sourceX, sourceY, "left", 1); err != nil { + return err + } + + // 3. Simular arrastre (mover en pasos) + steps := 10 + for i := 1; i <= steps; i++ { + fraction := float64(i) / float64(steps) + intermediateX := sourceX + int(float64(targetX-sourceX)*fraction) + intermediateY := sourceY + int(float64(targetY-sourceY)*fraction) + + if err := b.dispatchMouseEvent(ctx, "mouseMoved", intermediateX, intermediateY, "left", 0); err != nil { + return err + } + + time.Sleep(10 * time.Millisecond) + } + + // 4. Mouse up en destino + if err := b.dispatchMouseEvent(ctx, "mouseReleased", targetX, targetY, "left", 1); err != nil { + return err + } + + return nil +} + +// ScrollTo hace scroll a una posición absoluta (x, y) +func (b *Browser) ScrollTo(ctx context.Context, x, y int) error { + script := fmt.Sprintf("window.scrollTo(%d, %d)", x, y) + _, err := b.Evaluate(ctx, script) + return err +} + +// ScrollBy hace scroll relativo por x, y pixels +func (b *Browser) ScrollBy(ctx context.Context, x, y int) error { + script := fmt.Sprintf("window.scrollBy(%d, %d)", x, y) + _, err := b.Evaluate(ctx, script) + return err +} + +// ScrollToElement hace scroll hasta que un elemento sea visible +func (b *Browser) ScrollToElement(ctx context.Context, selector string) error { + script := fmt.Sprintf(` + const element = document.querySelector('%s'); + if (element) { + element.scrollIntoView({ + behavior: 'smooth', + block: 'center' + }); + } + `, selector) + + _, err := b.Evaluate(ctx, script) + return err +} + +// MoveMouse mueve el mouse a coordenadas específicas +func (b *Browser) MoveMouse(ctx context.Context, x, y int) error { + return b.dispatchMouseEvent(ctx, "mouseMoved", x, y, "none", 0) +} + +// PressKey presiona una tecla (soporta modificadores) +func (b *Browser) PressKey(ctx context.Context, key string) error { + // Parsear si hay modificadores (Ctrl+C, Alt+F4, etc.) + keys, modifiers := parseKeyCombo(key) + + // Presionar modificadores + for _, mod := range modifiers { + if err := b.dispatchKeyEvent(ctx, "keyDown", mod, "", modifiersFor(mod)); err != nil { + return err + } + } + + // Presionar tecla principal + mainKey := keys[len(keys)-1] + mods := modifiersValue(modifiers) + + if err := b.dispatchKeyEvent(ctx, "keyDown", mainKey, "", mods); err != nil { + return err + } + if err := b.dispatchKeyEvent(ctx, "keyUp", mainKey, "", mods); err != nil { + return err + } + + // Soltar modificadores + for i := len(modifiers) - 1; i >= 0; i-- { + if err := b.dispatchKeyEvent(ctx, "keyUp", modifiers[i], "", modifiersFor(modifiers[i])); err != nil { + return err + } + } + + return nil +} + +// HoldKey mantiene presionada una tecla (sin soltarla) +func (b *Browser) HoldKey(ctx context.Context, key string) error { + return b.dispatchKeyEvent(ctx, "keyDown", key, "", 0) +} + +// ReleaseKey suelta una tecla previamente presionada +func (b *Browser) ReleaseKey(ctx context.Context, key string) error { + return b.dispatchKeyEvent(ctx, "keyUp", key, "", 0) +} + +// SendKeys envía una secuencia de teclas +func (b *Browser) SendKeys(ctx context.Context, keys ...string) error { + for _, key := range keys { + if err := b.PressKey(ctx, key); err != nil { + return err + } + time.Sleep(50 * time.Millisecond) + } + return nil +} + +// Helper: obtener centro de un elemento +func (b *Browser) getElementCenter(ctx context.Context, selector string) (int, int, error) { + script := fmt.Sprintf(` + (() => { + const element = document.querySelector('%s'); + if (!element) return null; + + const rect = element.getBoundingClientRect(); + return { + x: Math.round(rect.left + rect.width / 2), + y: Math.round(rect.top + rect.height / 2) + }; + })() + `, selector) + + result, err := b.Evaluate(ctx, script) + if err != nil { + return 0, 0, err + } + + if result.Value == nil { + return 0, 0, fmt.Errorf("element not found: %s", selector) + } + + coords, ok := result.Value.(map[string]interface{}) + if !ok { + return 0, 0, fmt.Errorf("invalid coordinates") + } + + x := int(coords["x"].(float64)) + y := int(coords["y"].(float64)) + + return x, y, nil +} + +// Helper: dispatch mouse event +func (b *Browser) dispatchMouseEvent(ctx context.Context, eventType string, x, y int, button string, clickCount int) error { + params := map[string]interface{}{ + "type": eventType, + "x": x, + "y": y, + "button": button, + "clickCount": clickCount, + } + + return b.cdpClient.Execute(ctx, "Input.dispatchMouseEvent", params, nil) +} + +// Helper: dispatch key event +func (b *Browser) dispatchKeyEvent(ctx context.Context, eventType, key, text string, modifiers int) error { + params := map[string]interface{}{ + "type": eventType, + } + + if key != "" { + params["key"] = key + } + if text != "" { + params["text"] = text + } + if modifiers > 0 { + params["modifiers"] = modifiers + } + + return b.cdpClient.Execute(ctx, "Input.dispatchKeyEvent", params, nil) +} + +// Helper: parsear combinación de teclas +func parseKeyCombo(combo string) ([]string, []string) { + // Separar por + + parts := splitKey(combo, '+') + + var modifiers []string + var keys []string + + for _, part := range parts { + switch part { + case "Control", "Ctrl": + modifiers = append(modifiers, "Control") + case "Alt": + modifiers = append(modifiers, "Alt") + case "Shift": + modifiers = append(modifiers, "Shift") + case "Meta", "Command", "Cmd": + modifiers = append(modifiers, "Meta") + default: + keys = append(keys, part) + } + } + + return keys, modifiers +} + +// Helper: split key combo +func splitKey(s string, sep rune) []string { + var parts []string + var current string + + for _, ch := range s { + if ch == sep { + if current != "" { + parts = append(parts, current) + current = "" + } + } else { + current += string(ch) + } + } + + if current != "" { + parts = append(parts, current) + } + + return parts +} + +// Helper: valor de modificadores +func modifiersFor(key string) int { + switch key { + case "Control": + return 2 + case "Shift": + return 8 + case "Alt": + return 1 + case "Meta": + return 4 + default: + return 0 + } +} + +// Helper: combinar modificadores +func modifiersValue(modifiers []string) int { + value := 0 + for _, mod := range modifiers { + value |= modifiersFor(mod) + } + return value +}