8742cb25be
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
169 lines
5.7 KiB
Python
169 lines
5.7 KiB
Python
"""Normaliza entries HAR en call specs reproducibles.
|
|
|
|
Segundo paso del patron "grabar -> destilar -> reproducir" un flujo web como
|
|
funcion del registry. Toma la salida de `har_filter_flows` (lista de entries
|
|
HAR) y produce call specs limpias, con auth (cookies/headers) expuesta para
|
|
que el humano/Claude marque luego los valores dinamicos con `{{param}}`.
|
|
|
|
Funcion PURA: sin I/O, transforma listas/dicts de forma determinista.
|
|
"""
|
|
|
|
# Headers hop-by-hop / ruidosos que se eliminan por defecto (case-insensitive).
|
|
# `cookie` se trata aparte: se extrae a `cookies` y se quita de `headers`.
|
|
_HOP_BY_HOP = frozenset(
|
|
{
|
|
"host",
|
|
"content-length",
|
|
"connection",
|
|
"keep-alive",
|
|
"proxy-connection",
|
|
"accept-encoding",
|
|
"te",
|
|
"trailer",
|
|
"transfer-encoding",
|
|
"upgrade",
|
|
}
|
|
)
|
|
|
|
|
|
def _parse_cookie_header(value: str) -> dict:
|
|
"""Parsea el valor de un header `Cookie` en un dict {name: value}.
|
|
|
|
Formato: `a=1; b=2; c=3`. El ultimo gana si hay nombres repetidos.
|
|
"""
|
|
cookies: dict = {}
|
|
for pair in value.split(";"):
|
|
pair = pair.strip()
|
|
if not pair:
|
|
continue
|
|
name, sep, val = pair.partition("=")
|
|
name = name.strip()
|
|
if not name:
|
|
continue
|
|
cookies[name] = val.strip() if sep else ""
|
|
return cookies
|
|
|
|
|
|
def _infer_body_type(mime_type: str | None) -> str | None:
|
|
"""Infiere el tipo de body a partir del mimeType de postData.
|
|
|
|
application/json -> "json"
|
|
application/x-www-form-... -> "form"
|
|
multipart/* -> "raw"
|
|
otro / None -> "raw" si hay body, None lo decide el caller.
|
|
"""
|
|
if not mime_type:
|
|
return None
|
|
mt = mime_type.split(";", 1)[0].strip().lower()
|
|
if mt == "application/json":
|
|
return "json"
|
|
if mt == "application/x-www-form-urlencoded":
|
|
return "form"
|
|
if mt.startswith("multipart/"):
|
|
return "raw"
|
|
return "raw"
|
|
|
|
|
|
def _set_cookie_names_from_headers(headers: list[dict]) -> list[str]:
|
|
"""Extrae nombres de cookies de los headers `Set-Cookie` de la respuesta."""
|
|
names: list[str] = []
|
|
for h in headers:
|
|
if str(h.get("name", "")).lower() != "set-cookie":
|
|
continue
|
|
raw = str(h.get("value", ""))
|
|
# Set-Cookie: name=value; Path=/; HttpOnly -> nos quedamos con `name`.
|
|
first = raw.split(";", 1)[0].strip()
|
|
name = first.split("=", 1)[0].strip()
|
|
if name:
|
|
names.append(name)
|
|
return names
|
|
|
|
|
|
def har_extract_calls(
|
|
entries: list[dict],
|
|
*,
|
|
drop_headers: list[str] | None = None,
|
|
) -> list[dict]:
|
|
"""Convierte entries HAR en call specs normalizadas y reproducibles.
|
|
|
|
Por cada entry HAR produce un dict call spec con method, url, headers
|
|
(sin hop-by-hop, sin Cookie), cookies (parseadas del header Cookie),
|
|
body, body_type inferido, status de respuesta y nombres de cookies que
|
|
la respuesta setea. NO auto-parametriza: deja los valores tal cual para
|
|
que el humano marque despues los dinamicos con `{{param}}`.
|
|
|
|
Args:
|
|
entries: lista de entries HAR (cada uno con `request` y, opcional,
|
|
`response`). Tipicamente la salida de `har_filter_flows`.
|
|
drop_headers: nombres extra de headers a eliminar (case-insensitive),
|
|
aparte de los hop-by-hop por defecto. None = no quitar extras.
|
|
|
|
Returns:
|
|
lista de call specs, una por entry, con las claves: method, url,
|
|
headers, cookies, body, body_type, status, sets_cookies.
|
|
"""
|
|
extra_drop = {h.lower() for h in (drop_headers or [])}
|
|
|
|
specs: list[dict] = []
|
|
for entry in entries:
|
|
request = entry.get("request") or {}
|
|
response = entry.get("response") or {}
|
|
|
|
method = str(request.get("method", "")).upper()
|
|
url = request.get("url", "")
|
|
|
|
# Headers: lista [{name, value}] -> dict, con drop de hop-by-hop +
|
|
# extras, y extraccion del header Cookie a `cookies`.
|
|
headers: dict = {}
|
|
cookies: dict = {}
|
|
for h in request.get("headers") or []:
|
|
name = str(h.get("name", ""))
|
|
value = str(h.get("value", ""))
|
|
lname = name.lower()
|
|
if lname == "cookie":
|
|
cookies.update(_parse_cookie_header(value))
|
|
continue
|
|
if lname in _HOP_BY_HOP or lname in extra_drop:
|
|
continue
|
|
headers[name] = value # ultimo gana si repetidos
|
|
|
|
# Body desde postData.
|
|
post_data = request.get("postData") or {}
|
|
body = post_data.get("text")
|
|
mime_type = post_data.get("mimeType")
|
|
body_type = _infer_body_type(mime_type) if body is not None else None
|
|
|
|
# Status de respuesta.
|
|
raw_status = response.get("status")
|
|
status = int(raw_status) if isinstance(raw_status, (int, float)) and raw_status else None
|
|
if isinstance(raw_status, str) and raw_status.isdigit():
|
|
status = int(raw_status)
|
|
|
|
# Cookies que setea la respuesta: preferir response.cookies (HAR);
|
|
# si no, parsear los headers Set-Cookie.
|
|
sets_cookies: list[str] = []
|
|
resp_cookies = response.get("cookies")
|
|
if resp_cookies:
|
|
sets_cookies = [
|
|
str(c.get("name", "")) for c in resp_cookies if c.get("name")
|
|
]
|
|
else:
|
|
sets_cookies = _set_cookie_names_from_headers(
|
|
response.get("headers") or []
|
|
)
|
|
|
|
specs.append(
|
|
{
|
|
"method": method,
|
|
"url": url,
|
|
"headers": headers,
|
|
"cookies": cookies,
|
|
"body": body,
|
|
"body_type": body_type,
|
|
"status": status,
|
|
"sets_cookies": sets_cookies,
|
|
}
|
|
)
|
|
|
|
return specs
|