chore: auto-commit (7 archivos)

- python/functions/core/__init__.py
- python/functions/pipelines/metabase_bulk_add_users_to_group.md
- python/functions/pipelines/metabase_bulk_add_users_to_group.py
- cpp/apps/
- python/functions/core/clean_email.md
- python/functions/core/clean_email.py
- python/functions/core/clean_email_test.py

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-01 11:39:08 +02:00
parent dbf5b45acd
commit ec9857acbe
8 changed files with 186 additions and 4 deletions
+2
View File
@@ -1,5 +1,6 @@
"""Core functional programming utilities."""
from .clean_email import clean_email
from .core import (
all_of,
any_of,
@@ -25,6 +26,7 @@ __all__ = [
"all_of",
"any_of",
"chunk",
"clean_email",
"compose",
"drop",
"filter_list",
+59
View File
@@ -0,0 +1,59 @@
---
name: clean_email
kind: function
lang: py
domain: core
version: "1.0.0"
purity: pure
signature: "def clean_email(raw: str) -> str"
description: "Normaliza un email crudo que puede provenir de fuentes sucias (TSVs, CSVs, hojas de calculo). Elimina whitespace, angle brackets y comillas, convierte a minusculas y valida la forma basica. Lanza ValueError si el resultado no es un email plausible."
tags: [email, sanitize, validation, core]
uses_functions: []
uses_types: []
returns: []
returns_optional: false
error_type: ""
imports: []
tested: true
tests:
- "angle brackets y whitespace se limpian"
- "comillas dobles y mayusculas se normalizan"
- "whitespace puro se limpia y lowercase"
- "email sin arroba lanza ValueError"
- "string vacio lanza ValueError"
- "email sin punto en dominio lanza ValueError"
test_file_path: "python/functions/core/clean_email_test.py"
file_path: "python/functions/core/clean_email.py"
params:
- name: raw
desc: "String crudo con la direccion de email, potencialmente rodeado de whitespace, angle brackets (<foo@bar.com>) o comillas simples/dobles."
output: "Email limpio en minusculas con la forma 'local@dominio.tld'. Lanza ValueError si no es un email plausible tras la limpieza."
---
## Ejemplo
```python
from core.clean_email import clean_email
# Caso tipico de TSV/Outlook con angle brackets y whitespace
clean_email(" <eaznarez@mutuamadmotor.com>")
# -> "eaznarez@mutuamadmotor.com"
# Comillas dobles y mayusculas desde spreadsheet
clean_email('"BAR@BAZ.com"')
# -> "bar@baz.com"
# Caso invalido: lanza ValueError
import pytest
with pytest.raises(ValueError):
clean_email("not-an-email")
```
## Cuando usarla
Antes de enviar una direccion de email a una API que valida estrictamente (ej. Metabase `POST /api/user`, SendGrid, etc.) cuando el origen del dato es un CSV, TSV, Excel, o campo pegado a mano. Usar esta funcion como paso de limpieza antes de cualquier llamada de creacion de usuarios o envio de correos.
## Gotchas
- NO valida registros MX ni que el dominio exista realmente — solo comprueba forma basica (un `@`, local part no vacia, al menos un `.` en el dominio).
- NO decodifica quoted-printable ni dominios IDN/internacionales — solo limpieza de caracteres ASCII de envoltura.
+53
View File
@@ -0,0 +1,53 @@
"""Normalize a raw email string from messy sources (TSVs, CSVs, spreadsheets)."""
def clean_email(raw: str) -> str:
"""Strip whitespace, angle brackets, surrounding quotes; lowercase; validate basic shape.
Args:
raw: Raw email string that may contain surrounding whitespace, angle brackets
(Outlook/RFC 5322 display-name form), or surrounding quotes.
Returns:
Cleaned, lowercased email address.
Raises:
ValueError: If the cleaned result is not a plausible email (must contain
exactly one '@', non-empty local part, and at least one '.'
in the domain part).
"""
cleaned = raw.strip()
# Strip surrounding angle brackets: <foo@bar.com> -> foo@bar.com
if cleaned.startswith("<") and cleaned.endswith(">"):
cleaned = cleaned[1:-1].strip()
# Strip surrounding double quotes: "foo@bar.com" -> foo@bar.com
if len(cleaned) >= 2 and cleaned[0] == '"' and cleaned[-1] == '"':
cleaned = cleaned[1:-1].strip()
# Strip surrounding single quotes: 'foo@bar.com' -> foo@bar.com
elif len(cleaned) >= 2 and cleaned[0] == "'" and cleaned[-1] == "'":
cleaned = cleaned[1:-1].strip()
cleaned = cleaned.lower()
# Validate basic shape
at_count = cleaned.count("@")
if at_count != 1:
raise ValueError(
f"invalid email after cleaning: {raw!r} -> {cleaned!r}"
)
local, domain = cleaned.split("@", 1)
if not local:
raise ValueError(
f"invalid email after cleaning: {raw!r} -> {cleaned!r}"
)
if "." not in domain:
raise ValueError(
f"invalid email after cleaning: {raw!r} -> {cleaned!r}"
)
return cleaned
+43
View File
@@ -0,0 +1,43 @@
"""Tests para clean_email."""
import sys
import os
sys.path.insert(0, os.path.dirname(__file__))
import pytest
from clean_email import clean_email
def test_angle_brackets_y_whitespace_se_limpian():
assert clean_email(" <foo@bar.com>") == "foo@bar.com"
def test_comillas_dobles_y_mayusculas_se_normalizan():
assert clean_email('"BAR@BAZ.com"') == "bar@baz.com"
def test_whitespace_puro_se_limpia_y_lowercase():
assert clean_email(" Mixed.Case@Example.COM ") == "mixed.case@example.com"
def test_email_sin_arroba_lanza_valueerror():
with pytest.raises(ValueError):
clean_email("not-an-email")
def test_string_vacio_lanza_valueerror():
with pytest.raises(ValueError):
clean_email("")
def test_email_sin_punto_en_dominio_lanza_valueerror():
with pytest.raises(ValueError):
clean_email("a@b")
def test_comillas_simples_se_limpian():
assert clean_email("'user@domain.es'") == "user@domain.es"
def test_angle_brackets_con_espacio_interior():
assert clean_email("< user@example.com >") == "user@example.com"
@@ -3,12 +3,13 @@ name: metabase_bulk_add_users_to_group
kind: pipeline
lang: py
domain: pipelines
version: "1.0.0"
version: "1.1.0"
purity: impure
signature: "def metabase_bulk_add_users_to_group(client: MetabaseClient, group: int | str, targets: list[dict], send_reset_existing: bool = False) -> list[dict]"
description: "Crea (si faltan) y añade al Permission Group dado una lista de usuarios. Idempotente: usuarios existentes se re-añaden solo si no son miembros. Soporta resolver group por id o por nombre (substring case-insensitive). Opcionalmente reenvia mail de reset a los ya existentes."
tags: [metabase, users, groups, bulk]
uses_functions:
- clean_email_py_core
- metabase_list_groups_py_infra
- metabase_list_users_py_infra
- metabase_get_group_py_infra
@@ -79,3 +80,8 @@ Cuando tengas que dar de alta o re-añadir N usuarios al mismo Permission Group
- `send_reset_existing=True` solo afecta a los `status="existing"`. Los `status="created"` no reciben reset (ya recibieron invitacion).
- `group` por nombre (str): si el substring matchea 0 o >1 grupos -> `ValueError`. Preferir `group_id` (int) si hay nombres ambiguos.
- La respuesta de `metabase_list_users` puede ser un dict con key `data` o directamente la lista, dependiendo de la version de Metabase. El pipeline maneja ambos formatos.
- Desde v1.1.0 cada `target["email"]` pasa por `clean_email` antes de cualquier llamada: strippea espacios, angle brackets `<...>`, comillas y normaliza a minusculas. Si la limpieza falla (ValueError) la fila sale como `status="create_failed"` sin tocar Metabase. Esto absorbe TSVs/CSVs con formato Outlook (`" <foo@bar.com>"`) que antes provocaban 400 Bad Request al POST /api/user.
## Capability growth log
- v1.1.0 (2026-05-28) — integra `clean_email_py_core` para normalizar emails de cada target. Absorbe `<...>`, comillas y whitespace antes del POST a Metabase. Fix tras incidente real con TSV de 90 jefes de centro donde 1 fila tenia `" <eaznarez@mutuamadmotor.com>"` y rompia create_user con 400.
@@ -12,6 +12,7 @@ import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import httpx
from core import clean_email
from metabase import (
MetabaseClient,
metabase_list_groups,
@@ -39,6 +40,10 @@ def metabase_bulk_add_users_to_group(
group: group_id (int) o nombre del grupo (str, substring
case-insensitive). Debe matchear exactamente 1 grupo.
targets: lista de dicts con keys first_name, last_name, email.
El email se normaliza via clean_email (strip whitespace,
angle brackets `<...>`, comillas, lowercase). Si el raw
es invalido, esa fila sale como status=create_failed sin
hacer ninguna llamada a Metabase.
send_reset_existing: si True, reenvia POST /api/session/forgot_password
a los usuarios ya existentes (no a los recien
creados, que ya recibieron invitacion).
@@ -93,11 +98,10 @@ def metabase_bulk_add_users_to_group(
for target in targets:
first_name = target["first_name"]
last_name = target["last_name"]
email = target["email"]
email_lower = email.lower()
raw_email = target["email"]
row: dict = {
"email": email,
"email": raw_email,
"user_id": None,
"status": None,
"membership": None,
@@ -105,6 +109,19 @@ def metabase_bulk_add_users_to_group(
"error": None,
}
# Normalizar email (strip whitespace, <...>, comillas, lowercase).
# Si el raw es invalido (no parseable como email), saltar con error.
try:
email = clean_email(raw_email)
except ValueError as exc:
row["status"] = "create_failed"
row["membership"] = "skipped"
row["error"] = str(exc)
results.append(row)
continue
row["email"] = email
email_lower = email # ya lowercase tras clean_email
# Determinar si el usuario ya existe
if email_lower in by_email:
uid = by_email[email_lower]["id"]