chore: auto-commit (7 archivos)
- python/functions/core/__init__.py - python/functions/pipelines/metabase_bulk_add_users_to_group.md - python/functions/pipelines/metabase_bulk_add_users_to_group.py - cpp/apps/ - python/functions/core/clean_email.md - python/functions/core/clean_email.py - python/functions/core/clean_email_test.py Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
"""Core functional programming utilities."""
|
||||
|
||||
from .clean_email import clean_email
|
||||
from .core import (
|
||||
all_of,
|
||||
any_of,
|
||||
@@ -25,6 +26,7 @@ __all__ = [
|
||||
"all_of",
|
||||
"any_of",
|
||||
"chunk",
|
||||
"clean_email",
|
||||
"compose",
|
||||
"drop",
|
||||
"filter_list",
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
---
|
||||
name: clean_email
|
||||
kind: function
|
||||
lang: py
|
||||
domain: core
|
||||
version: "1.0.0"
|
||||
purity: pure
|
||||
signature: "def clean_email(raw: str) -> str"
|
||||
description: "Normaliza un email crudo que puede provenir de fuentes sucias (TSVs, CSVs, hojas de calculo). Elimina whitespace, angle brackets y comillas, convierte a minusculas y valida la forma basica. Lanza ValueError si el resultado no es un email plausible."
|
||||
tags: [email, sanitize, validation, core]
|
||||
uses_functions: []
|
||||
uses_types: []
|
||||
returns: []
|
||||
returns_optional: false
|
||||
error_type: ""
|
||||
imports: []
|
||||
tested: true
|
||||
tests:
|
||||
- "angle brackets y whitespace se limpian"
|
||||
- "comillas dobles y mayusculas se normalizan"
|
||||
- "whitespace puro se limpia y lowercase"
|
||||
- "email sin arroba lanza ValueError"
|
||||
- "string vacio lanza ValueError"
|
||||
- "email sin punto en dominio lanza ValueError"
|
||||
test_file_path: "python/functions/core/clean_email_test.py"
|
||||
file_path: "python/functions/core/clean_email.py"
|
||||
params:
|
||||
- name: raw
|
||||
desc: "String crudo con la direccion de email, potencialmente rodeado de whitespace, angle brackets (<foo@bar.com>) o comillas simples/dobles."
|
||||
output: "Email limpio en minusculas con la forma 'local@dominio.tld'. Lanza ValueError si no es un email plausible tras la limpieza."
|
||||
---
|
||||
|
||||
## Ejemplo
|
||||
|
||||
```python
|
||||
from core.clean_email import clean_email
|
||||
|
||||
# Caso tipico de TSV/Outlook con angle brackets y whitespace
|
||||
clean_email(" <eaznarez@mutuamadmotor.com>")
|
||||
# -> "eaznarez@mutuamadmotor.com"
|
||||
|
||||
# Comillas dobles y mayusculas desde spreadsheet
|
||||
clean_email('"BAR@BAZ.com"')
|
||||
# -> "bar@baz.com"
|
||||
|
||||
# Caso invalido: lanza ValueError
|
||||
import pytest
|
||||
with pytest.raises(ValueError):
|
||||
clean_email("not-an-email")
|
||||
```
|
||||
|
||||
## Cuando usarla
|
||||
|
||||
Antes de enviar una direccion de email a una API que valida estrictamente (ej. Metabase `POST /api/user`, SendGrid, etc.) cuando el origen del dato es un CSV, TSV, Excel, o campo pegado a mano. Usar esta funcion como paso de limpieza antes de cualquier llamada de creacion de usuarios o envio de correos.
|
||||
|
||||
## Gotchas
|
||||
|
||||
- NO valida registros MX ni que el dominio exista realmente — solo comprueba forma basica (un `@`, local part no vacia, al menos un `.` en el dominio).
|
||||
- NO decodifica quoted-printable ni dominios IDN/internacionales — solo limpieza de caracteres ASCII de envoltura.
|
||||
@@ -0,0 +1,53 @@
|
||||
"""Normalize a raw email string from messy sources (TSVs, CSVs, spreadsheets)."""
|
||||
|
||||
|
||||
def clean_email(raw: str) -> str:
|
||||
"""Strip whitespace, angle brackets, surrounding quotes; lowercase; validate basic shape.
|
||||
|
||||
Args:
|
||||
raw: Raw email string that may contain surrounding whitespace, angle brackets
|
||||
(Outlook/RFC 5322 display-name form), or surrounding quotes.
|
||||
|
||||
Returns:
|
||||
Cleaned, lowercased email address.
|
||||
|
||||
Raises:
|
||||
ValueError: If the cleaned result is not a plausible email (must contain
|
||||
exactly one '@', non-empty local part, and at least one '.'
|
||||
in the domain part).
|
||||
"""
|
||||
cleaned = raw.strip()
|
||||
|
||||
# Strip surrounding angle brackets: <foo@bar.com> -> foo@bar.com
|
||||
if cleaned.startswith("<") and cleaned.endswith(">"):
|
||||
cleaned = cleaned[1:-1].strip()
|
||||
|
||||
# Strip surrounding double quotes: "foo@bar.com" -> foo@bar.com
|
||||
if len(cleaned) >= 2 and cleaned[0] == '"' and cleaned[-1] == '"':
|
||||
cleaned = cleaned[1:-1].strip()
|
||||
# Strip surrounding single quotes: 'foo@bar.com' -> foo@bar.com
|
||||
elif len(cleaned) >= 2 and cleaned[0] == "'" and cleaned[-1] == "'":
|
||||
cleaned = cleaned[1:-1].strip()
|
||||
|
||||
cleaned = cleaned.lower()
|
||||
|
||||
# Validate basic shape
|
||||
at_count = cleaned.count("@")
|
||||
if at_count != 1:
|
||||
raise ValueError(
|
||||
f"invalid email after cleaning: {raw!r} -> {cleaned!r}"
|
||||
)
|
||||
|
||||
local, domain = cleaned.split("@", 1)
|
||||
|
||||
if not local:
|
||||
raise ValueError(
|
||||
f"invalid email after cleaning: {raw!r} -> {cleaned!r}"
|
||||
)
|
||||
|
||||
if "." not in domain:
|
||||
raise ValueError(
|
||||
f"invalid email after cleaning: {raw!r} -> {cleaned!r}"
|
||||
)
|
||||
|
||||
return cleaned
|
||||
@@ -0,0 +1,43 @@
|
||||
"""Tests para clean_email."""
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
|
||||
import pytest
|
||||
from clean_email import clean_email
|
||||
|
||||
|
||||
def test_angle_brackets_y_whitespace_se_limpian():
|
||||
assert clean_email(" <foo@bar.com>") == "foo@bar.com"
|
||||
|
||||
|
||||
def test_comillas_dobles_y_mayusculas_se_normalizan():
|
||||
assert clean_email('"BAR@BAZ.com"') == "bar@baz.com"
|
||||
|
||||
|
||||
def test_whitespace_puro_se_limpia_y_lowercase():
|
||||
assert clean_email(" Mixed.Case@Example.COM ") == "mixed.case@example.com"
|
||||
|
||||
|
||||
def test_email_sin_arroba_lanza_valueerror():
|
||||
with pytest.raises(ValueError):
|
||||
clean_email("not-an-email")
|
||||
|
||||
|
||||
def test_string_vacio_lanza_valueerror():
|
||||
with pytest.raises(ValueError):
|
||||
clean_email("")
|
||||
|
||||
|
||||
def test_email_sin_punto_en_dominio_lanza_valueerror():
|
||||
with pytest.raises(ValueError):
|
||||
clean_email("a@b")
|
||||
|
||||
|
||||
def test_comillas_simples_se_limpian():
|
||||
assert clean_email("'user@domain.es'") == "user@domain.es"
|
||||
|
||||
|
||||
def test_angle_brackets_con_espacio_interior():
|
||||
assert clean_email("< user@example.com >") == "user@example.com"
|
||||
@@ -3,12 +3,13 @@ name: metabase_bulk_add_users_to_group
|
||||
kind: pipeline
|
||||
lang: py
|
||||
domain: pipelines
|
||||
version: "1.0.0"
|
||||
version: "1.1.0"
|
||||
purity: impure
|
||||
signature: "def metabase_bulk_add_users_to_group(client: MetabaseClient, group: int | str, targets: list[dict], send_reset_existing: bool = False) -> list[dict]"
|
||||
description: "Crea (si faltan) y añade al Permission Group dado una lista de usuarios. Idempotente: usuarios existentes se re-añaden solo si no son miembros. Soporta resolver group por id o por nombre (substring case-insensitive). Opcionalmente reenvia mail de reset a los ya existentes."
|
||||
tags: [metabase, users, groups, bulk]
|
||||
uses_functions:
|
||||
- clean_email_py_core
|
||||
- metabase_list_groups_py_infra
|
||||
- metabase_list_users_py_infra
|
||||
- metabase_get_group_py_infra
|
||||
@@ -79,3 +80,8 @@ Cuando tengas que dar de alta o re-añadir N usuarios al mismo Permission Group
|
||||
- `send_reset_existing=True` solo afecta a los `status="existing"`. Los `status="created"` no reciben reset (ya recibieron invitacion).
|
||||
- `group` por nombre (str): si el substring matchea 0 o >1 grupos -> `ValueError`. Preferir `group_id` (int) si hay nombres ambiguos.
|
||||
- La respuesta de `metabase_list_users` puede ser un dict con key `data` o directamente la lista, dependiendo de la version de Metabase. El pipeline maneja ambos formatos.
|
||||
- Desde v1.1.0 cada `target["email"]` pasa por `clean_email` antes de cualquier llamada: strippea espacios, angle brackets `<...>`, comillas y normaliza a minusculas. Si la limpieza falla (ValueError) la fila sale como `status="create_failed"` sin tocar Metabase. Esto absorbe TSVs/CSVs con formato Outlook (`" <foo@bar.com>"`) que antes provocaban 400 Bad Request al POST /api/user.
|
||||
|
||||
## Capability growth log
|
||||
|
||||
- v1.1.0 (2026-05-28) — integra `clean_email_py_core` para normalizar emails de cada target. Absorbe `<...>`, comillas y whitespace antes del POST a Metabase. Fix tras incidente real con TSV de 90 jefes de centro donde 1 fila tenia `" <eaznarez@mutuamadmotor.com>"` y rompia create_user con 400.
|
||||
|
||||
@@ -12,6 +12,7 @@ import os
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
import httpx
|
||||
from core import clean_email
|
||||
from metabase import (
|
||||
MetabaseClient,
|
||||
metabase_list_groups,
|
||||
@@ -39,6 +40,10 @@ def metabase_bulk_add_users_to_group(
|
||||
group: group_id (int) o nombre del grupo (str, substring
|
||||
case-insensitive). Debe matchear exactamente 1 grupo.
|
||||
targets: lista de dicts con keys first_name, last_name, email.
|
||||
El email se normaliza via clean_email (strip whitespace,
|
||||
angle brackets `<...>`, comillas, lowercase). Si el raw
|
||||
es invalido, esa fila sale como status=create_failed sin
|
||||
hacer ninguna llamada a Metabase.
|
||||
send_reset_existing: si True, reenvia POST /api/session/forgot_password
|
||||
a los usuarios ya existentes (no a los recien
|
||||
creados, que ya recibieron invitacion).
|
||||
@@ -93,11 +98,10 @@ def metabase_bulk_add_users_to_group(
|
||||
for target in targets:
|
||||
first_name = target["first_name"]
|
||||
last_name = target["last_name"]
|
||||
email = target["email"]
|
||||
email_lower = email.lower()
|
||||
raw_email = target["email"]
|
||||
|
||||
row: dict = {
|
||||
"email": email,
|
||||
"email": raw_email,
|
||||
"user_id": None,
|
||||
"status": None,
|
||||
"membership": None,
|
||||
@@ -105,6 +109,19 @@ def metabase_bulk_add_users_to_group(
|
||||
"error": None,
|
||||
}
|
||||
|
||||
# Normalizar email (strip whitespace, <...>, comillas, lowercase).
|
||||
# Si el raw es invalido (no parseable como email), saltar con error.
|
||||
try:
|
||||
email = clean_email(raw_email)
|
||||
except ValueError as exc:
|
||||
row["status"] = "create_failed"
|
||||
row["membership"] = "skipped"
|
||||
row["error"] = str(exc)
|
||||
results.append(row)
|
||||
continue
|
||||
row["email"] = email
|
||||
email_lower = email # ya lowercase tras clean_email
|
||||
|
||||
# Determinar si el usuario ya existe
|
||||
if email_lower in by_email:
|
||||
uid = by_email[email_lower]["id"]
|
||||
|
||||
Reference in New Issue
Block a user