feat(browser): auto-commit con 60 cambios

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-07 11:42:31 +02:00
parent 37aacfcfa9
commit 8742cb25be
71 changed files with 5660 additions and 192 deletions
+2
View File
@@ -1,8 +1,10 @@
from .setup_logger import setup_logger, get_logger
from .generate_app_icon import generate_app_icon
from .http_replay_sequence import http_replay_sequence
__all__ = [
"setup_logger",
"get_logger",
"generate_app_icon",
"http_replay_sequence",
]
+8 -1
View File
@@ -12,9 +12,12 @@ import io
import os
from pathlib import Path
import cairosvg
from PIL import Image, ImageDraw
# cairosvg se importa de forma perezosa dentro de los renderers que lo usan
# (ver _render_glyph_*). Asi el modulo (y el paquete infra que lo reexporta)
# se importa sin requerir cairosvg instalado; solo rasterizar SVGs lo exige.
DEFAULT_SIZES = [16, 24, 32, 48, 64, 128, 256]
@@ -62,6 +65,8 @@ def _luminance(accent_hex: str) -> float:
def _render_glyph_colored(svg_path: Path, size: int, fill: str) -> Image.Image:
"""Renderiza un SVG Phosphor reemplazando currentColor por `fill`."""
import cairosvg
svg = svg_path.read_text(encoding="utf-8")
svg = svg.replace('fill="currentColor"', f'fill="{fill}"')
png_bytes = cairosvg.svg2png(
@@ -82,6 +87,8 @@ def _render_glyph_white(svg_path: Path, size: int) -> Image.Image:
Returns:
Imagen RGBA con el glyph en blanco sobre fondo transparente.
"""
import cairosvg
svg = svg_path.read_text(encoding="utf-8")
# Phosphor usa fill="currentColor" — forzar blanco.
svg = svg.replace('fill="currentColor"', 'fill="#ffffff"')
@@ -0,0 +1,87 @@
---
name: http_replay_sequence
kind: function
lang: py
domain: infra
version: "1.0.0"
purity: impure
signature: "def http_replay_sequence(calls: list[dict], *, params: dict | None = None, extract: list[dict] | None = None, timeout_s: float = 30.0, verify_tls: bool = True, allow_redirects: bool = True, base_headers: dict | None = None) -> dict"
description: "Motor de replay HTTP: ejecuta en orden una secuencia de call specs (las que produce har_extract_calls_py_cybersecurity) compartiendo una sesion (cookie jar) entre pasos, con substitucion de parametros {{param}} y extraccion de valores de una respuesta para usarlos en pasos siguientes (p.ej. token CSRF del GET inicial -> header del POST). Pieza reutilizable del Nivel 1 (HTTP puro) del patron grabar->destilar->reproducir."
tags: [flow-replay, http, replay, client, infra]
uses_functions: []
uses_types: []
returns: []
returns_optional: false
error_type: "error_go_core"
imports: [re, requests]
tested: true
tests: ["test_golden_extract_and_subst", "test_edge_missing_param", "test_error_path_request_exception"]
test_file_path: "python/functions/infra/http_replay_sequence_test.py"
file_path: "python/functions/infra/http_replay_sequence.py"
params:
- name: calls
desc: "Lista ordenada de call specs. Cada spec: {method, url, headers(dict), cookies(dict opc), body(str|None), body_type:'json'|'form'|'raw'|None}. El body ya es texto (no se re-serializa). Es el formato de salida de har_extract_calls_py_cybersecurity."
- name: params
desc: "Dict inicial de contexto para la substitucion {{param}}. Se copia (no se muta el original). Pasa aqui secretos/tokens desde un vault/pass, nunca hardcodeados en los call specs."
- name: extract
desc: "Lista de reglas de extraccion {from: int|'last', type: 'json'|'regex'|'header'|'set_cookie', expr: str, as: str}. Se aplican justo tras ejecutar el step indicado en 'from' ('last' = el step recien ejecutado) y guardan el valor en ctx[as] para los pasos siguientes."
- name: timeout_s
desc: "Timeout por request en segundos (default: 30.0)."
- name: verify_tls
desc: "Verificar certificados TLS; se setea en la sesion (default: True). No desactivar salvo entorno de pruebas controlado."
- name: allow_redirects
desc: "Si los requests siguen redirects (default: True)."
- name: base_headers
desc: "Headers por defecto que se mezclan en la sesion (se aplican a todos los pasos). Util para User-Agent / Accept comunes."
output: "Dict {status: 'ok'|'error', steps: [{idx, method, url, status_code, ok, extracted, missing_params, error}], params_final: dict (ctx tras todos los pasos), error: str}. status='error' solo ante excepcion de transporte (requests.RequestException) o entrada invalida; en ese caso corta y deja de ejecutar. Un 4xx/5xx NO corta: el step queda con ok=False y status global sigue 'ok'."
---
## Ejemplo
```python
from infra import http_replay_sequence
# Requiere red: usa httpbin.org (publico). 2 pasos:
# 1) GET /uuid -> extrae el uuid del JSON como param "u"
# 2) POST /anything -> manda header X-Token: {{u}} (el uuid del paso 1)
calls = [
{"method": "GET", "url": "https://httpbin.org/uuid",
"headers": {"Accept": "application/json"}, "body": None, "body_type": None},
{"method": "POST", "url": "https://httpbin.org/anything",
"headers": {"X-Token": "{{u}}", "Content-Type": "application/json"},
"body": '{"hello": "world"}', "body_type": "json"},
]
extract = [
{"from": 0, "type": "json", "expr": "uuid", "as": "u"},
]
result = http_replay_sequence(calls, extract=extract)
print(result["status"]) # "ok"
token = result["params_final"]["u"] # el uuid extraido del paso 0
print("token:", token)
# httpbin /anything devuelve los headers que recibio; comprobamos que el
# paso 2 llevo el valor substituido:
print(result["steps"][1]["status_code"]) # 200
print(result["steps"][1]["ok"]) # True
# El header X-Token: {{u}} se substituyo por el uuid antes de enviarse.
```
## Cuando usarla
Usala tras `har_extract_calls_py_cybersecurity`, para validar que un flujo capturado se reproduce SIN navegador (Nivel 1 del patron grabar->destilar->reproducir). Es la base de las funciones-accion guardadas en el registry: cuando una secuencia HTTP demuestra reproducir un login + accion, se promueve a una funcion/pipeline dedicada. Tambien sirve para encadenar requests dependientes (token CSRF, session id, paginacion con cursor) compartiendo cookie jar y propagando valores entre pasos.
## Gotchas
- **Seguridad — secretos via params, nunca hardcodeados.** Los call specs pueden contener cookies/tokens. El caller debe inyectarlos via `{{param}}` desde un vault/pass (`params={...}`), no escribirlos en los specs ni commitearlos.
- **Seguridad — replay con efectos es PELIGROSO.** Reproducir una secuencia con efectos (POST que reinicia un server, borra, paga, envia) ejecuta esos efectos de verdad. El caller debe confirmar antes de lanzar una secuencia mutante.
- **Seguridad — `verify_tls` default True.** No lo pongas en False salvo en un entorno de pruebas controlado; desactivar la verificacion TLS abre la puerta a MITM.
- **Extraccion JSON es dot-path simple, NO jsonpath completo.** `"data.items.0.token"` funciona (claves + indices de lista por digito), pero no hay filtros, wildcards ni expresiones. Para casos complejos, usa `type: regex` o post-procesa.
- **Sigue redirects por defecto** (`allow_redirects=True`). Si la secuencia capturada depende del 302 explicito (p.ej. para leer el Location o una cookie intermedia), pon `allow_redirects=False`.
- **Params faltantes NO abortan.** Si un `{{nombre}}` no esta en ctx, se deja el literal `{{nombre}}` y se anade a `step.missing_params`. El request se envia igual; solo una excepcion de transporte corta la ejecucion.
- **El body no se re-serializa.** `body_type: "json"` solo documenta el tipo; el body ya es texto y se manda como `data=body`. Asegurate de incluir el header `Content-Type` adecuado en el spec.
- **4xx/5xx no es error global.** El step queda `ok=False` con su `status_code`, pero `status` global sigue `"ok"`. Solo `requests.RequestException` (DNS, conexion, timeout) marca `status="error"` y corta.
## Capability growth log
v1.0.0 — version inicial.
@@ -0,0 +1,252 @@
"""HTTP replay engine: reproduce an ordered sequence of captured HTTP calls.
This is the reusable core of Level 1 ("pure HTTP") of the record -> distill ->
replay pattern. It takes the call specs produced by
``har_extract_calls_py_cybersecurity`` and replays them in order over a single
``requests.Session`` (shared cookie jar), supporting ``{{param}}`` substitution
and extracting values from one response to feed later steps (e.g. a CSRF token
from the initial GET injected as a header in a subsequent POST).
"""
import re
import requests
_PLACEHOLDER_RE = re.compile(r"\{\{\s*([A-Za-z0-9_]+)\s*\}\}")
def _subst(value, ctx, missing):
"""Replace every ``{{name}}`` occurrence in ``value`` using ``ctx``.
If a referenced param is missing from ``ctx``, the literal ``{{name}}`` is
kept untouched and the name is appended to ``missing`` (deduplicated).
Non-string values are returned unchanged.
"""
if not isinstance(value, str):
return value
def repl(match: "re.Match") -> str:
name = match.group(1)
if name in ctx and ctx[name] is not None:
return str(ctx[name])
if name not in missing:
missing.append(name)
return match.group(0)
return _PLACEHOLDER_RE.sub(repl, value)
def _subst_dict(d, ctx, missing):
"""Apply ``_subst`` to every value of a dict, returning a new dict."""
if not d:
return {}
out = {}
for k, v in d.items():
out[k] = _subst(v, ctx, missing)
return out
def _json_dot_path(data, expr: str):
"""Walk a simple dot-path over a parsed JSON value.
Supports dict keys and list indices: ``"data.items.0.token"``. A segment
that is all digits is treated as a list index. Returns the value or ``None``
if any segment cannot be resolved.
"""
cur = data
for seg in expr.split("."):
if seg == "":
continue
if isinstance(cur, list) and seg.isdigit():
idx = int(seg)
if 0 <= idx < len(cur):
cur = cur[idx]
else:
return None
elif isinstance(cur, dict) and seg in cur:
cur = cur[seg]
else:
return None
return cur
def _apply_extract_rule(rule, resp, session):
"""Resolve a single extract rule against a response. Returns str value or "".
Rule types:
- json: dot-path over ``resp.json()``.
- regex: ``re.search`` over ``resp.text``; group(1) if present, else group(0).
- header: ``resp.headers.get(expr)``.
- set_cookie: ``session.cookies.get(expr)``.
"""
rtype = rule.get("type", "json")
expr = rule.get("expr", "")
try:
if rtype == "json":
value = _json_dot_path(resp.json(), expr)
return "" if value is None else str(value)
if rtype == "regex":
m = re.search(expr, resp.text)
if not m:
return ""
if m.groups():
return "" if m.group(1) is None else str(m.group(1))
return str(m.group(0))
if rtype == "header":
value = resp.headers.get(expr)
return "" if value is None else str(value)
if rtype == "set_cookie":
value = session.cookies.get(expr)
return "" if value is None else str(value)
except (ValueError, TypeError):
return ""
return ""
def http_replay_sequence(
calls: list[dict],
*,
params: dict | None = None,
extract: list[dict] | None = None,
timeout_s: float = 30.0,
verify_tls: bool = True,
allow_redirects: bool = True,
base_headers: dict | None = None,
) -> dict:
"""Replay an ordered sequence of HTTP call specs over a shared session.
Args:
calls: List of call specs, each
``{"method","url","headers"(dict),"cookies"(dict opc),"body"(str|None),
"body_type":"json"|"form"|"raw"|None}``.
params: Initial context dict for ``{{param}}`` substitution (copied).
extract: List of extract rules
``{"from": int|"last", "type": "json"|"regex"|"header"|"set_cookie",
"expr": str, "as": str}``. Applied right after the referenced step runs.
timeout_s: Per-request timeout in seconds.
verify_tls: Whether to verify TLS certificates (set on the session).
allow_redirects: Whether requests should follow redirects.
base_headers: Default headers merged into the session.
Returns:
Dict with ``status`` ("ok"|"error"), ``steps`` (per-step records),
``params_final`` (the context after all steps) and ``error`` (message
when ``status == "error"``).
"""
ctx: dict = dict(params) if params else {}
extract = extract or []
steps: list[dict] = []
# Validate input shape before opening a session.
if not isinstance(calls, list):
return {
"status": "error",
"steps": [],
"params_final": ctx,
"error": "calls must be a list of call specs",
}
session = requests.Session()
session.verify = verify_tls
if base_headers:
session.headers.update(base_headers)
status = "ok"
error_msg = ""
try:
for i, call in enumerate(calls):
if not isinstance(call, dict):
status = "error"
error_msg = f"step {i}: call spec must be a dict"
steps.append(
{
"idx": i,
"method": "",
"url": "",
"status_code": 0,
"ok": False,
"extracted": {},
"missing_params": [],
"error": "call spec must be a dict",
}
)
break
missing: list[str] = []
method = (call.get("method") or "GET").upper()
url = _subst(call.get("url") or "", ctx, missing)
headers = _subst_dict(call.get("headers"), ctx, missing)
cookies = _subst_dict(call.get("cookies"), ctx, missing)
body = _subst(call.get("body"), ctx, missing)
body_type = call.get("body_type")
kwargs: dict = {
"headers": headers or None,
"cookies": cookies or None,
"timeout": timeout_s,
"allow_redirects": allow_redirects,
}
# json/form/raw all send the body as-is via data= (the body is
# already a serialized string; do NOT re-serialize JSON).
if body is not None:
kwargs["data"] = body
try:
resp = session.request(method, url, **kwargs)
except requests.RequestException as exc:
status = "error"
error_msg = f"step {i}: {exc}"
steps.append(
{
"idx": i,
"method": method,
"url": url,
"status_code": 0,
"ok": False,
"extracted": {},
"missing_params": missing,
"error": str(exc),
}
)
break
code = resp.status_code
ok = 200 <= code < 400
# Apply extract rules targeting this step. "last" == the step just run.
extracted: dict = {}
extract_notes: list[str] = []
for rule in extract:
frm = rule.get("from")
if frm == "last" or frm == i:
as_name = rule.get("as")
if not as_name:
continue
value = _apply_extract_rule(rule, resp, session)
ctx[as_name] = value
extracted[as_name] = value
if value == "":
extract_notes.append(f"extract '{as_name}' not found")
steps.append(
{
"idx": i,
"method": method,
"url": url,
"status_code": code,
"ok": ok,
"extracted": extracted,
"missing_params": missing,
"error": "; ".join(extract_notes),
}
)
finally:
session.close()
return {
"status": status,
"steps": steps,
"params_final": ctx,
"error": error_msg,
}
@@ -0,0 +1,120 @@
"""Tests para http_replay_sequence.
No dependen de red: mockean requests.Session.request con unittest.mock para
verificar substitucion, extraccion y manejo de errores de transporte.
"""
from unittest.mock import patch
import requests
from .http_replay_sequence import http_replay_sequence
class _FakeResp:
"""Respuesta minima que imita lo que usa la funcion de requests.Response."""
def __init__(self, status_code=200, json_data=None, text="", headers=None):
self.status_code = status_code
self._json = json_data if json_data is not None else {}
self.text = text
self.headers = headers or {}
def json(self):
return self._json
def test_golden_extract_and_subst():
"""2 pasos: extract json del paso 0 -> usado en {{token}} del paso 1.
Verifica que la url y el header del paso 1 llevaron el valor substituido.
"""
sent = [] # captura (method, url, kwargs) de cada request
def fake_request(self, method, url, **kwargs):
sent.append((method, url, kwargs))
if "/uuid" in url:
return _FakeResp(200, json_data={"data": {"items": [{"token": "ABC123"}]}})
return _FakeResp(200, json_data={"echo": True})
calls = [
{"method": "GET", "url": "https://api.example/uuid",
"headers": {"Accept": "application/json"}, "body": None, "body_type": None},
{"method": "POST", "url": "https://api.example/use/{{token}}",
"headers": {"X-Token": "{{token}}"}, "body": '{"k": "v"}', "body_type": "json"},
]
extract = [
{"from": 0, "type": "json", "expr": "data.items.0.token", "as": "token"},
]
with patch.object(requests.Session, "request", fake_request):
result = http_replay_sequence(calls, extract=extract)
assert result["status"] == "ok"
assert result["error"] == ""
assert result["params_final"]["token"] == "ABC123"
# Paso 0 extrajo el token.
assert result["steps"][0]["extracted"] == {"token": "ABC123"}
assert result["steps"][0]["ok"] is True
# Paso 1: la URL fue substituida.
method1, url1, kwargs1 = sent[1]
assert method1 == "POST"
assert url1 == "https://api.example/use/ABC123"
# El header X-Token llevo el valor substituido.
assert kwargs1["headers"]["X-Token"] == "ABC123"
# El body se manda como data= sin re-serializar.
assert kwargs1["data"] == '{"k": "v"}'
assert result["steps"][1]["ok"] is True
assert result["steps"][1]["missing_params"] == []
def test_edge_missing_param():
"""Param faltante -> missing_params poblado y literal {{x}} intacto."""
sent = []
def fake_request(self, method, url, **kwargs):
sent.append((method, url, kwargs))
return _FakeResp(200, json_data={})
calls = [
{"method": "GET", "url": "https://api.example/path/{{missing}}",
"headers": {"X-H": "{{missing}}"}, "body": None, "body_type": None},
]
with patch.object(requests.Session, "request", fake_request):
result = http_replay_sequence(calls)
assert result["status"] == "ok"
# El literal {{missing}} queda intacto tanto en url como en header.
method0, url0, kwargs0 = sent[0]
assert url0 == "https://api.example/path/{{missing}}"
assert kwargs0["headers"]["X-H"] == "{{missing}}"
# El step registra el param faltante (deduplicado, una sola vez).
assert result["steps"][0]["missing_params"] == ["missing"]
def test_error_path_request_exception():
"""La sesion lanza requests.RequestException -> status=error, corta, step.error poblado."""
def fake_request(self, method, url, **kwargs):
raise requests.RequestException("connection refused")
calls = [
{"method": "GET", "url": "https://down.example/a", "headers": {},
"body": None, "body_type": None},
{"method": "GET", "url": "https://down.example/b", "headers": {},
"body": None, "body_type": None},
]
with patch.object(requests.Session, "request", fake_request):
result = http_replay_sequence(calls)
assert result["status"] == "error"
assert "connection refused" in result["error"]
# Corta tras la excepcion: solo se registro el primer step.
assert len(result["steps"]) == 1
assert result["steps"][0]["ok"] is False
assert result["steps"][0]["status_code"] == 0
assert "connection refused" in result["steps"][0]["error"]