chore: auto-commit (7 archivos)

- python/functions/core/__init__.py
- python/functions/pipelines/metabase_bulk_add_users_to_group.md
- python/functions/pipelines/metabase_bulk_add_users_to_group.py
- cpp/apps/
- python/functions/core/clean_email.md
- python/functions/core/clean_email.py
- python/functions/core/clean_email_test.py

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-01 11:39:08 +02:00
parent 876020addf
commit 7eef442444
8 changed files with 186 additions and 4 deletions
+2
View File
@@ -1,5 +1,6 @@
"""Core functional programming utilities."""
from .clean_email import clean_email
from .core import (
all_of,
any_of,
@@ -25,6 +26,7 @@ __all__ = [
"all_of",
"any_of",
"chunk",
"clean_email",
"compose",
"drop",
"filter_list",
+59
View File
@@ -0,0 +1,59 @@
---
name: clean_email
kind: function
lang: py
domain: core
version: "1.0.0"
purity: pure
signature: "def clean_email(raw: str) -> str"
description: "Normaliza un email crudo que puede provenir de fuentes sucias (TSVs, CSVs, hojas de calculo). Elimina whitespace, angle brackets y comillas, convierte a minusculas y valida la forma basica. Lanza ValueError si el resultado no es un email plausible."
tags: [email, sanitize, validation, core]
uses_functions: []
uses_types: []
returns: []
returns_optional: false
error_type: ""
imports: []
tested: true
tests:
- "angle brackets y whitespace se limpian"
- "comillas dobles y mayusculas se normalizan"
- "whitespace puro se limpia y lowercase"
- "email sin arroba lanza ValueError"
- "string vacio lanza ValueError"
- "email sin punto en dominio lanza ValueError"
test_file_path: "python/functions/core/clean_email_test.py"
file_path: "python/functions/core/clean_email.py"
params:
- name: raw
desc: "String crudo con la direccion de email, potencialmente rodeado de whitespace, angle brackets (<foo@bar.com>) o comillas simples/dobles."
output: "Email limpio en minusculas con la forma 'local@dominio.tld'. Lanza ValueError si no es un email plausible tras la limpieza."
---
## Ejemplo
```python
from core.clean_email import clean_email
# Caso tipico de TSV/Outlook con angle brackets y whitespace
clean_email(" <eaznarez@mutuamadmotor.com>")
# -> "eaznarez@mutuamadmotor.com"
# Comillas dobles y mayusculas desde spreadsheet
clean_email('"BAR@BAZ.com"')
# -> "bar@baz.com"
# Caso invalido: lanza ValueError
import pytest
with pytest.raises(ValueError):
clean_email("not-an-email")
```
## Cuando usarla
Antes de enviar una direccion de email a una API que valida estrictamente (ej. Metabase `POST /api/user`, SendGrid, etc.) cuando el origen del dato es un CSV, TSV, Excel, o campo pegado a mano. Usar esta funcion como paso de limpieza antes de cualquier llamada de creacion de usuarios o envio de correos.
## Gotchas
- NO valida registros MX ni que el dominio exista realmente — solo comprueba forma basica (un `@`, local part no vacia, al menos un `.` en el dominio).
- NO decodifica quoted-printable ni dominios IDN/internacionales — solo limpieza de caracteres ASCII de envoltura.
+53
View File
@@ -0,0 +1,53 @@
"""Normalize a raw email string from messy sources (TSVs, CSVs, spreadsheets)."""
def clean_email(raw: str) -> str:
"""Strip whitespace, angle brackets, surrounding quotes; lowercase; validate basic shape.
Args:
raw: Raw email string that may contain surrounding whitespace, angle brackets
(Outlook/RFC 5322 display-name form), or surrounding quotes.
Returns:
Cleaned, lowercased email address.
Raises:
ValueError: If the cleaned result is not a plausible email (must contain
exactly one '@', non-empty local part, and at least one '.'
in the domain part).
"""
cleaned = raw.strip()
# Strip surrounding angle brackets: <foo@bar.com> -> foo@bar.com
if cleaned.startswith("<") and cleaned.endswith(">"):
cleaned = cleaned[1:-1].strip()
# Strip surrounding double quotes: "foo@bar.com" -> foo@bar.com
if len(cleaned) >= 2 and cleaned[0] == '"' and cleaned[-1] == '"':
cleaned = cleaned[1:-1].strip()
# Strip surrounding single quotes: 'foo@bar.com' -> foo@bar.com
elif len(cleaned) >= 2 and cleaned[0] == "'" and cleaned[-1] == "'":
cleaned = cleaned[1:-1].strip()
cleaned = cleaned.lower()
# Validate basic shape
at_count = cleaned.count("@")
if at_count != 1:
raise ValueError(
f"invalid email after cleaning: {raw!r} -> {cleaned!r}"
)
local, domain = cleaned.split("@", 1)
if not local:
raise ValueError(
f"invalid email after cleaning: {raw!r} -> {cleaned!r}"
)
if "." not in domain:
raise ValueError(
f"invalid email after cleaning: {raw!r} -> {cleaned!r}"
)
return cleaned
+43
View File
@@ -0,0 +1,43 @@
"""Tests para clean_email."""
import sys
import os
sys.path.insert(0, os.path.dirname(__file__))
import pytest
from clean_email import clean_email
def test_angle_brackets_y_whitespace_se_limpian():
assert clean_email(" <foo@bar.com>") == "foo@bar.com"
def test_comillas_dobles_y_mayusculas_se_normalizan():
assert clean_email('"BAR@BAZ.com"') == "bar@baz.com"
def test_whitespace_puro_se_limpia_y_lowercase():
assert clean_email(" Mixed.Case@Example.COM ") == "mixed.case@example.com"
def test_email_sin_arroba_lanza_valueerror():
with pytest.raises(ValueError):
clean_email("not-an-email")
def test_string_vacio_lanza_valueerror():
with pytest.raises(ValueError):
clean_email("")
def test_email_sin_punto_en_dominio_lanza_valueerror():
with pytest.raises(ValueError):
clean_email("a@b")
def test_comillas_simples_se_limpian():
assert clean_email("'user@domain.es'") == "user@domain.es"
def test_angle_brackets_con_espacio_interior():
assert clean_email("< user@example.com >") == "user@example.com"