Files
fn_registry/python/functions/cybersecurity/har_extract_calls.py
T
egutierrez 8742cb25be feat(browser): auto-commit con 60 cambios
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-07 11:42:31 +02:00

169 lines
5.7 KiB
Python

"""Normaliza entries HAR en call specs reproducibles.
Segundo paso del patron "grabar -> destilar -> reproducir" un flujo web como
funcion del registry. Toma la salida de `har_filter_flows` (lista de entries
HAR) y produce call specs limpias, con auth (cookies/headers) expuesta para
que el humano/Claude marque luego los valores dinamicos con `{{param}}`.
Funcion PURA: sin I/O, transforma listas/dicts de forma determinista.
"""
# Headers hop-by-hop / ruidosos que se eliminan por defecto (case-insensitive).
# `cookie` se trata aparte: se extrae a `cookies` y se quita de `headers`.
_HOP_BY_HOP = frozenset(
{
"host",
"content-length",
"connection",
"keep-alive",
"proxy-connection",
"accept-encoding",
"te",
"trailer",
"transfer-encoding",
"upgrade",
}
)
def _parse_cookie_header(value: str) -> dict:
"""Parsea el valor de un header `Cookie` en un dict {name: value}.
Formato: `a=1; b=2; c=3`. El ultimo gana si hay nombres repetidos.
"""
cookies: dict = {}
for pair in value.split(";"):
pair = pair.strip()
if not pair:
continue
name, sep, val = pair.partition("=")
name = name.strip()
if not name:
continue
cookies[name] = val.strip() if sep else ""
return cookies
def _infer_body_type(mime_type: str | None) -> str | None:
"""Infiere el tipo de body a partir del mimeType de postData.
application/json -> "json"
application/x-www-form-... -> "form"
multipart/* -> "raw"
otro / None -> "raw" si hay body, None lo decide el caller.
"""
if not mime_type:
return None
mt = mime_type.split(";", 1)[0].strip().lower()
if mt == "application/json":
return "json"
if mt == "application/x-www-form-urlencoded":
return "form"
if mt.startswith("multipart/"):
return "raw"
return "raw"
def _set_cookie_names_from_headers(headers: list[dict]) -> list[str]:
"""Extrae nombres de cookies de los headers `Set-Cookie` de la respuesta."""
names: list[str] = []
for h in headers:
if str(h.get("name", "")).lower() != "set-cookie":
continue
raw = str(h.get("value", ""))
# Set-Cookie: name=value; Path=/; HttpOnly -> nos quedamos con `name`.
first = raw.split(";", 1)[0].strip()
name = first.split("=", 1)[0].strip()
if name:
names.append(name)
return names
def har_extract_calls(
entries: list[dict],
*,
drop_headers: list[str] | None = None,
) -> list[dict]:
"""Convierte entries HAR en call specs normalizadas y reproducibles.
Por cada entry HAR produce un dict call spec con method, url, headers
(sin hop-by-hop, sin Cookie), cookies (parseadas del header Cookie),
body, body_type inferido, status de respuesta y nombres de cookies que
la respuesta setea. NO auto-parametriza: deja los valores tal cual para
que el humano marque despues los dinamicos con `{{param}}`.
Args:
entries: lista de entries HAR (cada uno con `request` y, opcional,
`response`). Tipicamente la salida de `har_filter_flows`.
drop_headers: nombres extra de headers a eliminar (case-insensitive),
aparte de los hop-by-hop por defecto. None = no quitar extras.
Returns:
lista de call specs, una por entry, con las claves: method, url,
headers, cookies, body, body_type, status, sets_cookies.
"""
extra_drop = {h.lower() for h in (drop_headers or [])}
specs: list[dict] = []
for entry in entries:
request = entry.get("request") or {}
response = entry.get("response") or {}
method = str(request.get("method", "")).upper()
url = request.get("url", "")
# Headers: lista [{name, value}] -> dict, con drop de hop-by-hop +
# extras, y extraccion del header Cookie a `cookies`.
headers: dict = {}
cookies: dict = {}
for h in request.get("headers") or []:
name = str(h.get("name", ""))
value = str(h.get("value", ""))
lname = name.lower()
if lname == "cookie":
cookies.update(_parse_cookie_header(value))
continue
if lname in _HOP_BY_HOP or lname in extra_drop:
continue
headers[name] = value # ultimo gana si repetidos
# Body desde postData.
post_data = request.get("postData") or {}
body = post_data.get("text")
mime_type = post_data.get("mimeType")
body_type = _infer_body_type(mime_type) if body is not None else None
# Status de respuesta.
raw_status = response.get("status")
status = int(raw_status) if isinstance(raw_status, (int, float)) and raw_status else None
if isinstance(raw_status, str) and raw_status.isdigit():
status = int(raw_status)
# Cookies que setea la respuesta: preferir response.cookies (HAR);
# si no, parsear los headers Set-Cookie.
sets_cookies: list[str] = []
resp_cookies = response.get("cookies")
if resp_cookies:
sets_cookies = [
str(c.get("name", "")) for c in resp_cookies if c.get("name")
]
else:
sets_cookies = _set_cookie_names_from_headers(
response.get("headers") or []
)
specs.append(
{
"method": method,
"url": url,
"headers": headers,
"cookies": cookies,
"body": body,
"body_type": body_type,
"status": status,
"sets_cookies": sets_cookies,
}
)
return specs