feat(browser): auto-commit con 178 cambios

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-20 18:22:23 +02:00
parent 7d100e7f3e
commit 763e06c127
178 changed files with 19917 additions and 317 deletions
@@ -0,0 +1,220 @@
+"""Descarga y parsea un mensaje IMAP por UID a un dict estructurado.
+
+Funcion IMPURA: hace I/O de red sobre una conexion `imaplib` viva (la produce
+`imap_connect`). Ejecuta `conn.uid("FETCH", uid, "(BODY.PEEK[])")` (que NO marca
+el mensaje como leido) o `"(RFC822)"` (que SI lo marca) segun `mark_seen`,
+parsea los bytes con `email.message_from_bytes` y extrae las cabeceras y el
+cuerpo a un dict.
+
+Las cabeceras codificadas (RFC 2047, ej. `=?UTF-8?B?...?=`) se decodifican a
+Unicode con `email.header.decode_header`. Los cuerpos de texto se decodifican
+respetando el charset declarado en cada parte (con fallback a utf-8/latin-1).
+Los adjuntos se listan con metadatos (nombre, tipo, tamano) SIN incluir el
+binario completo en el resultado.
+
+NUNCA lanza: devuelve un dict con `status` ("ok"/"error").
+"""
+
+import email
+from email.header import decode_header
+from email.utils import parseaddr, getaddresses
+
+
+def imap_fetch_message(conn, uid: int, mark_seen: bool = False) -> dict:
+    """Descarga el mensaje de UID `uid` y lo devuelve parseado.
+
+    Args:
+        conn: objeto `imaplib.IMAP4[_SSL]` vivo y autenticado (de `imap_connect`).
+        uid: UID del mensaje (de `imap_search`). Numero de secuencia NO valido.
+        mark_seen: False (default) usa `BODY.PEEK[]` y NO marca leido; True usa
+            `RFC822` y marca el mensaje como `\\Seen`.
+
+    Returns:
+        Dict de estado. En exito::
+
+            {
+                "status": "ok",
+                "message": {
+                    "uid": <int>,
+                    "from": <str>, "to": <str>, "cc": <str>,
+                    "subject": <str>, "date": <str>, "message_id": <str>,
+                    "body_text": <str>,   # text/plain concatenado
+                    "body_html": <str>,   # text/html concatenado
+                    "attachments": [
+                        {"filename": <str>, "content_type": <str>, "size_bytes": <int>},
+                        ...
+                    ],
+                },
+            }
+
+        En fallo (conn invalido, UID inexistente, FETCH no OK)::
+
+            {"status": "error", "error": <str>}
+    """
+    if conn is None:
+        return {"status": "error", "error": "imap_fetch_message: conn es None"}
+    try:
+        uid_int = int(uid)
+    except (ValueError, TypeError):
+        return {"status": "error", "error": f"imap_fetch_message: uid invalido: {uid!r}"}
+
+    fetch_spec = "(RFC822)" if mark_seen else "(BODY.PEEK[])"
+    try:
+        typ, data = conn.uid("FETCH", str(uid_int), fetch_spec)
+        if typ != "OK":
+            return {
+                "status": "error",
+                "error": f"imap_fetch_message: FETCH uid {uid_int} devolvio {typ}",
+            }
+
+        raw = _extract_rfc822(data)
+        if raw is None:
+            return {
+                "status": "error",
+                "error": f"imap_fetch_message: UID {uid_int} sin contenido (inexistente?)",
+            }
+
+        msg = email.message_from_bytes(raw)
+        parsed = _parse_message(msg, uid_int)
+        return {"status": "ok", "message": parsed}
+    except Exception as exc:  # noqa: BLE001 — contrato: nunca lanzar.
+        return {"status": "error", "error": f"imap_fetch_message: {exc}"}
+
+
+def _extract_rfc822(data):
+    """Extrae los bytes RFC822 de la respuesta de FETCH.
+
+    imaplib devuelve algo como ``[(b'1 (BODY[] {N}', b'<bytes>'), b')']``.
+    Buscamos la primera tupla cuyo segundo elemento sean los bytes del mensaje.
+    """
+    if not data:
+        return None
+    for item in data:
+        if isinstance(item, tuple) and len(item) >= 2:
+            payload = item[1]
+            if isinstance(payload, (bytes, bytearray)):
+                return bytes(payload)
+    return None
+
+
+def _parse_message(msg, uid_int: int) -> dict:
+    """Convierte un email.message.Message en el dict del contrato."""
+    body_text_parts: list[str] = []
+    body_html_parts: list[str] = []
+    attachments: list[dict] = []
+
+    if msg.is_multipart():
+        for part in msg.walk():
+            if part.is_multipart():
+                continue
+            _consume_part(part, body_text_parts, body_html_parts, attachments)
+    else:
+        _consume_part(msg, body_text_parts, body_html_parts, attachments)
+
+    return {
+        "uid": uid_int,
+        "from": _decode_header(msg.get("From", "")),
+        "to": _decode_addr_list(msg.get_all("To", [])),
+        "cc": _decode_addr_list(msg.get_all("Cc", [])),
+        "subject": _decode_header(msg.get("Subject", "")),
+        "date": _decode_header(msg.get("Date", "")),
+        "message_id": (msg.get("Message-ID", "") or "").strip(),
+        "body_text": "\n".join(p for p in body_text_parts if p),
+        "body_html": "\n".join(p for p in body_html_parts if p),
+        "attachments": attachments,
+    }
+
+
+def _consume_part(part, body_text_parts, body_html_parts, attachments) -> None:
+    """Clasifica una parte: adjunto, text/plain o text/html."""
+    content_type = part.get_content_type()
+    disposition = (part.get("Content-Disposition") or "").lower()
+    filename = part.get_filename()
+    if filename:
+        filename = _decode_header(filename)
+
+    is_attachment = "attachment" in disposition or (
+        filename and content_type not in ("text/plain", "text/html")
+    )
+
+    if is_attachment:
+        payload = part.get_payload(decode=True) or b""
+        attachments.append(
+            {
+                "filename": filename or "",
+                "content_type": content_type,
+                "size_bytes": len(payload),
+            }
+        )
+        return
+
+    if content_type == "text/plain":
+        body_text_parts.append(_decode_body(part))
+    elif content_type == "text/html":
+        body_html_parts.append(_decode_body(part))
+    # Otros tipos inline sin filename (ej. multipart/alternative wrappers) se ignoran.
+
+
+def _decode_body(part) -> str:
+    """Decodifica el payload de una parte de texto respetando su charset."""
+    payload = part.get_payload(decode=True)
+    if payload is None:
+        return ""
+    charset = part.get_content_charset()
+    candidates = []
+    if charset:
+        candidates.append(charset)
+    candidates += ["utf-8", "latin-1"]
+    for enc in candidates:
+        try:
+            return payload.decode(enc)
+        except (LookupError, UnicodeDecodeError):
+            continue
+    # Ultimo recurso: nunca falla.
+    return payload.decode("utf-8", errors="replace")
+
+
+def _decode_header(value: str) -> str:
+    """Decodifica una cabecera RFC 2047 (=?charset?enc?...?=) a Unicode."""
+    if value is None:
+        return ""
+    if isinstance(value, bytes):
+        value = value.decode("latin-1", errors="replace")
+    parts = []
+    try:
+        for chunk, enc in decode_header(value):
+            if isinstance(chunk, bytes):
+                if enc:
+                    try:
+                        parts.append(chunk.decode(enc, errors="replace"))
+                    except (LookupError, UnicodeDecodeError):
+                        parts.append(chunk.decode("utf-8", errors="replace"))
+                else:
+                    # Sin charset declarado: ASCII con fallback latin-1.
+                    parts.append(chunk.decode("utf-8", errors="replace"))
+            else:
+                parts.append(chunk)
+    except Exception:  # noqa: BLE001 — cabecera mal formada: best-effort.
+        return str(value)
+    return "".join(parts).strip()
+
+
+def _decode_addr_list(values) -> str:
+    """Decodifica una lista de cabeceras de direcciones a una cadena unica.
+
+    Une multiples cabeceras (To/Cc pueden repetirse) y decodifica el nombre
+    de cada direccion (RFC 2047) preservando la parte addr-spec.
+    """
+    if not values:
+        return ""
+    addrs = getaddresses(values)
+    out = []
+    for name, addr in addrs:
+        name = _decode_header(name) if name else ""
+        if name and addr:
+            out.append(f"{name} <{addr}>")
+        elif addr:
+            out.append(addr)
+        elif name:
+            out.append(name)
+    return ", ".join(out)