From ec9857acbe39883877591b042e38a7aaeca327b5 Mon Sep 17 00:00:00 2001 From: egutierrez Date: Mon, 1 Jun 2026 11:39:08 +0200 Subject: [PATCH] chore: auto-commit (7 archivos) - python/functions/core/__init__.py - python/functions/pipelines/metabase_bulk_add_users_to_group.md - python/functions/pipelines/metabase_bulk_add_users_to_group.py - cpp/apps/ - python/functions/core/clean_email.md - python/functions/core/clean_email.py - python/functions/core/clean_email_test.py Co-Authored-By: Claude Opus 4.7 (1M context) --- cpp/apps/chart_demo | 1 + cpp/apps/shaders_lab | 1 + python/functions/core/__init__.py | 2 + python/functions/core/clean_email.md | 59 +++++++++++++++++++ python/functions/core/clean_email.py | 53 +++++++++++++++++ python/functions/core/clean_email_test.py | 43 ++++++++++++++ .../metabase_bulk_add_users_to_group.md | 8 ++- .../metabase_bulk_add_users_to_group.py | 23 +++++++- 8 files changed, 186 insertions(+), 4 deletions(-) create mode 160000 cpp/apps/chart_demo create mode 160000 cpp/apps/shaders_lab create mode 100644 python/functions/core/clean_email.md create mode 100644 python/functions/core/clean_email.py create mode 100644 python/functions/core/clean_email_test.py diff --git a/cpp/apps/chart_demo b/cpp/apps/chart_demo new file mode 160000 index 00000000..026f514b --- /dev/null +++ b/cpp/apps/chart_demo @@ -0,0 +1 @@ +Subproject commit 026f514bb72d5fb00c57493ea09101a3d2cf4894 diff --git a/cpp/apps/shaders_lab b/cpp/apps/shaders_lab new file mode 160000 index 00000000..dc9a970a --- /dev/null +++ b/cpp/apps/shaders_lab @@ -0,0 +1 @@ +Subproject commit dc9a970aff759837bfe12c5e640a66dc64edfbd2 diff --git a/python/functions/core/__init__.py b/python/functions/core/__init__.py index 38a60350..491b87c1 100644 --- a/python/functions/core/__init__.py +++ b/python/functions/core/__init__.py @@ -1,5 +1,6 @@ """Core functional programming utilities.""" +from .clean_email import clean_email from .core import ( all_of, any_of, @@ -25,6 +26,7 @@ __all__ = [ "all_of", "any_of", "chunk", + "clean_email", "compose", "drop", "filter_list", diff --git a/python/functions/core/clean_email.md b/python/functions/core/clean_email.md new file mode 100644 index 00000000..498835b2 --- /dev/null +++ b/python/functions/core/clean_email.md @@ -0,0 +1,59 @@ +--- +name: clean_email +kind: function +lang: py +domain: core +version: "1.0.0" +purity: pure +signature: "def clean_email(raw: str) -> str" +description: "Normaliza un email crudo que puede provenir de fuentes sucias (TSVs, CSVs, hojas de calculo). Elimina whitespace, angle brackets y comillas, convierte a minusculas y valida la forma basica. Lanza ValueError si el resultado no es un email plausible." +tags: [email, sanitize, validation, core] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +tested: true +tests: + - "angle brackets y whitespace se limpian" + - "comillas dobles y mayusculas se normalizan" + - "whitespace puro se limpia y lowercase" + - "email sin arroba lanza ValueError" + - "string vacio lanza ValueError" + - "email sin punto en dominio lanza ValueError" +test_file_path: "python/functions/core/clean_email_test.py" +file_path: "python/functions/core/clean_email.py" +params: + - name: raw + desc: "String crudo con la direccion de email, potencialmente rodeado de whitespace, angle brackets () o comillas simples/dobles." +output: "Email limpio en minusculas con la forma 'local@dominio.tld'. Lanza ValueError si no es un email plausible tras la limpieza." +--- + +## Ejemplo + +```python +from core.clean_email import clean_email + +# Caso tipico de TSV/Outlook con angle brackets y whitespace +clean_email(" ") +# -> "eaznarez@mutuamadmotor.com" + +# Comillas dobles y mayusculas desde spreadsheet +clean_email('"BAR@BAZ.com"') +# -> "bar@baz.com" + +# Caso invalido: lanza ValueError +import pytest +with pytest.raises(ValueError): + clean_email("not-an-email") +``` + +## Cuando usarla + +Antes de enviar una direccion de email a una API que valida estrictamente (ej. Metabase `POST /api/user`, SendGrid, etc.) cuando el origen del dato es un CSV, TSV, Excel, o campo pegado a mano. Usar esta funcion como paso de limpieza antes de cualquier llamada de creacion de usuarios o envio de correos. + +## Gotchas + +- NO valida registros MX ni que el dominio exista realmente — solo comprueba forma basica (un `@`, local part no vacia, al menos un `.` en el dominio). +- NO decodifica quoted-printable ni dominios IDN/internacionales — solo limpieza de caracteres ASCII de envoltura. diff --git a/python/functions/core/clean_email.py b/python/functions/core/clean_email.py new file mode 100644 index 00000000..09349333 --- /dev/null +++ b/python/functions/core/clean_email.py @@ -0,0 +1,53 @@ +"""Normalize a raw email string from messy sources (TSVs, CSVs, spreadsheets).""" + + +def clean_email(raw: str) -> str: + """Strip whitespace, angle brackets, surrounding quotes; lowercase; validate basic shape. + + Args: + raw: Raw email string that may contain surrounding whitespace, angle brackets + (Outlook/RFC 5322 display-name form), or surrounding quotes. + + Returns: + Cleaned, lowercased email address. + + Raises: + ValueError: If the cleaned result is not a plausible email (must contain + exactly one '@', non-empty local part, and at least one '.' + in the domain part). + """ + cleaned = raw.strip() + + # Strip surrounding angle brackets: -> foo@bar.com + if cleaned.startswith("<") and cleaned.endswith(">"): + cleaned = cleaned[1:-1].strip() + + # Strip surrounding double quotes: "foo@bar.com" -> foo@bar.com + if len(cleaned) >= 2 and cleaned[0] == '"' and cleaned[-1] == '"': + cleaned = cleaned[1:-1].strip() + # Strip surrounding single quotes: 'foo@bar.com' -> foo@bar.com + elif len(cleaned) >= 2 and cleaned[0] == "'" and cleaned[-1] == "'": + cleaned = cleaned[1:-1].strip() + + cleaned = cleaned.lower() + + # Validate basic shape + at_count = cleaned.count("@") + if at_count != 1: + raise ValueError( + f"invalid email after cleaning: {raw!r} -> {cleaned!r}" + ) + + local, domain = cleaned.split("@", 1) + + if not local: + raise ValueError( + f"invalid email after cleaning: {raw!r} -> {cleaned!r}" + ) + + if "." not in domain: + raise ValueError( + f"invalid email after cleaning: {raw!r} -> {cleaned!r}" + ) + + return cleaned diff --git a/python/functions/core/clean_email_test.py b/python/functions/core/clean_email_test.py new file mode 100644 index 00000000..a00f4e4d --- /dev/null +++ b/python/functions/core/clean_email_test.py @@ -0,0 +1,43 @@ +"""Tests para clean_email.""" +import sys +import os + +sys.path.insert(0, os.path.dirname(__file__)) + +import pytest +from clean_email import clean_email + + +def test_angle_brackets_y_whitespace_se_limpian(): + assert clean_email(" ") == "foo@bar.com" + + +def test_comillas_dobles_y_mayusculas_se_normalizan(): + assert clean_email('"BAR@BAZ.com"') == "bar@baz.com" + + +def test_whitespace_puro_se_limpia_y_lowercase(): + assert clean_email(" Mixed.Case@Example.COM ") == "mixed.case@example.com" + + +def test_email_sin_arroba_lanza_valueerror(): + with pytest.raises(ValueError): + clean_email("not-an-email") + + +def test_string_vacio_lanza_valueerror(): + with pytest.raises(ValueError): + clean_email("") + + +def test_email_sin_punto_en_dominio_lanza_valueerror(): + with pytest.raises(ValueError): + clean_email("a@b") + + +def test_comillas_simples_se_limpian(): + assert clean_email("'user@domain.es'") == "user@domain.es" + + +def test_angle_brackets_con_espacio_interior(): + assert clean_email("< user@example.com >") == "user@example.com" diff --git a/python/functions/pipelines/metabase_bulk_add_users_to_group.md b/python/functions/pipelines/metabase_bulk_add_users_to_group.md index 6ca4dd54..3a1187f7 100644 --- a/python/functions/pipelines/metabase_bulk_add_users_to_group.md +++ b/python/functions/pipelines/metabase_bulk_add_users_to_group.md @@ -3,12 +3,13 @@ name: metabase_bulk_add_users_to_group kind: pipeline lang: py domain: pipelines -version: "1.0.0" +version: "1.1.0" purity: impure signature: "def metabase_bulk_add_users_to_group(client: MetabaseClient, group: int | str, targets: list[dict], send_reset_existing: bool = False) -> list[dict]" description: "Crea (si faltan) y añade al Permission Group dado una lista de usuarios. Idempotente: usuarios existentes se re-añaden solo si no son miembros. Soporta resolver group por id o por nombre (substring case-insensitive). Opcionalmente reenvia mail de reset a los ya existentes." tags: [metabase, users, groups, bulk] uses_functions: + - clean_email_py_core - metabase_list_groups_py_infra - metabase_list_users_py_infra - metabase_get_group_py_infra @@ -79,3 +80,8 @@ Cuando tengas que dar de alta o re-añadir N usuarios al mismo Permission Group - `send_reset_existing=True` solo afecta a los `status="existing"`. Los `status="created"` no reciben reset (ya recibieron invitacion). - `group` por nombre (str): si el substring matchea 0 o >1 grupos -> `ValueError`. Preferir `group_id` (int) si hay nombres ambiguos. - La respuesta de `metabase_list_users` puede ser un dict con key `data` o directamente la lista, dependiendo de la version de Metabase. El pipeline maneja ambos formatos. +- Desde v1.1.0 cada `target["email"]` pasa por `clean_email` antes de cualquier llamada: strippea espacios, angle brackets `<...>`, comillas y normaliza a minusculas. Si la limpieza falla (ValueError) la fila sale como `status="create_failed"` sin tocar Metabase. Esto absorbe TSVs/CSVs con formato Outlook (`" "`) que antes provocaban 400 Bad Request al POST /api/user. + +## Capability growth log + +- v1.1.0 (2026-05-28) — integra `clean_email_py_core` para normalizar emails de cada target. Absorbe `<...>`, comillas y whitespace antes del POST a Metabase. Fix tras incidente real con TSV de 90 jefes de centro donde 1 fila tenia `" "` y rompia create_user con 400. diff --git a/python/functions/pipelines/metabase_bulk_add_users_to_group.py b/python/functions/pipelines/metabase_bulk_add_users_to_group.py index 1bb6804a..982686bd 100644 --- a/python/functions/pipelines/metabase_bulk_add_users_to_group.py +++ b/python/functions/pipelines/metabase_bulk_add_users_to_group.py @@ -12,6 +12,7 @@ import os sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) import httpx +from core import clean_email from metabase import ( MetabaseClient, metabase_list_groups, @@ -39,6 +40,10 @@ def metabase_bulk_add_users_to_group( group: group_id (int) o nombre del grupo (str, substring case-insensitive). Debe matchear exactamente 1 grupo. targets: lista de dicts con keys first_name, last_name, email. + El email se normaliza via clean_email (strip whitespace, + angle brackets `<...>`, comillas, lowercase). Si el raw + es invalido, esa fila sale como status=create_failed sin + hacer ninguna llamada a Metabase. send_reset_existing: si True, reenvia POST /api/session/forgot_password a los usuarios ya existentes (no a los recien creados, que ya recibieron invitacion). @@ -93,11 +98,10 @@ def metabase_bulk_add_users_to_group( for target in targets: first_name = target["first_name"] last_name = target["last_name"] - email = target["email"] - email_lower = email.lower() + raw_email = target["email"] row: dict = { - "email": email, + "email": raw_email, "user_id": None, "status": None, "membership": None, @@ -105,6 +109,19 @@ def metabase_bulk_add_users_to_group( "error": None, } + # Normalizar email (strip whitespace, <...>, comillas, lowercase). + # Si el raw es invalido (no parseable como email), saltar con error. + try: + email = clean_email(raw_email) + except ValueError as exc: + row["status"] = "create_failed" + row["membership"] = "skipped" + row["error"] = str(exc) + results.append(row) + continue + row["email"] = email + email_lower = email # ya lowercase tras clean_email + # Determinar si el usuario ya existe if email_lower in by_email: uid = by_email[email_lower]["id"]