chore: auto-commit (7 archivos)
- python/functions/core/__init__.py - python/functions/pipelines/metabase_bulk_add_users_to_group.md - python/functions/pipelines/metabase_bulk_add_users_to_group.py - cpp/apps/ - python/functions/core/clean_email.md - python/functions/core/clean_email.py - python/functions/core/clean_email_test.py Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
"""Core functional programming utilities."""
|
||||
|
||||
from .clean_email import clean_email
|
||||
from .core import (
|
||||
all_of,
|
||||
any_of,
|
||||
@@ -25,6 +26,7 @@ __all__ = [
|
||||
"all_of",
|
||||
"any_of",
|
||||
"chunk",
|
||||
"clean_email",
|
||||
"compose",
|
||||
"drop",
|
||||
"filter_list",
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
---
|
||||
name: clean_email
|
||||
kind: function
|
||||
lang: py
|
||||
domain: core
|
||||
version: "1.0.0"
|
||||
purity: pure
|
||||
signature: "def clean_email(raw: str) -> str"
|
||||
description: "Normaliza un email crudo que puede provenir de fuentes sucias (TSVs, CSVs, hojas de calculo). Elimina whitespace, angle brackets y comillas, convierte a minusculas y valida la forma basica. Lanza ValueError si el resultado no es un email plausible."
|
||||
tags: [email, sanitize, validation, core]
|
||||
uses_functions: []
|
||||
uses_types: []
|
||||
returns: []
|
||||
returns_optional: false
|
||||
error_type: ""
|
||||
imports: []
|
||||
tested: true
|
||||
tests:
|
||||
- "angle brackets y whitespace se limpian"
|
||||
- "comillas dobles y mayusculas se normalizan"
|
||||
- "whitespace puro se limpia y lowercase"
|
||||
- "email sin arroba lanza ValueError"
|
||||
- "string vacio lanza ValueError"
|
||||
- "email sin punto en dominio lanza ValueError"
|
||||
test_file_path: "python/functions/core/clean_email_test.py"
|
||||
file_path: "python/functions/core/clean_email.py"
|
||||
params:
|
||||
- name: raw
|
||||
desc: "String crudo con la direccion de email, potencialmente rodeado de whitespace, angle brackets (<foo@bar.com>) o comillas simples/dobles."
|
||||
output: "Email limpio en minusculas con la forma 'local@dominio.tld'. Lanza ValueError si no es un email plausible tras la limpieza."
|
||||
---
|
||||
|
||||
## Ejemplo
|
||||
|
||||
```python
|
||||
from core.clean_email import clean_email
|
||||
|
||||
# Caso tipico de TSV/Outlook con angle brackets y whitespace
|
||||
clean_email(" <eaznarez@mutuamadmotor.com>")
|
||||
# -> "eaznarez@mutuamadmotor.com"
|
||||
|
||||
# Comillas dobles y mayusculas desde spreadsheet
|
||||
clean_email('"BAR@BAZ.com"')
|
||||
# -> "bar@baz.com"
|
||||
|
||||
# Caso invalido: lanza ValueError
|
||||
import pytest
|
||||
with pytest.raises(ValueError):
|
||||
clean_email("not-an-email")
|
||||
```
|
||||
|
||||
## Cuando usarla
|
||||
|
||||
Antes de enviar una direccion de email a una API que valida estrictamente (ej. Metabase `POST /api/user`, SendGrid, etc.) cuando el origen del dato es un CSV, TSV, Excel, o campo pegado a mano. Usar esta funcion como paso de limpieza antes de cualquier llamada de creacion de usuarios o envio de correos.
|
||||
|
||||
## Gotchas
|
||||
|
||||
- NO valida registros MX ni que el dominio exista realmente — solo comprueba forma basica (un `@`, local part no vacia, al menos un `.` en el dominio).
|
||||
- NO decodifica quoted-printable ni dominios IDN/internacionales — solo limpieza de caracteres ASCII de envoltura.
|
||||
@@ -0,0 +1,53 @@
|
||||
"""Normalize a raw email string from messy sources (TSVs, CSVs, spreadsheets)."""
|
||||
|
||||
|
||||
def clean_email(raw: str) -> str:
|
||||
"""Strip whitespace, angle brackets, surrounding quotes; lowercase; validate basic shape.
|
||||
|
||||
Args:
|
||||
raw: Raw email string that may contain surrounding whitespace, angle brackets
|
||||
(Outlook/RFC 5322 display-name form), or surrounding quotes.
|
||||
|
||||
Returns:
|
||||
Cleaned, lowercased email address.
|
||||
|
||||
Raises:
|
||||
ValueError: If the cleaned result is not a plausible email (must contain
|
||||
exactly one '@', non-empty local part, and at least one '.'
|
||||
in the domain part).
|
||||
"""
|
||||
cleaned = raw.strip()
|
||||
|
||||
# Strip surrounding angle brackets: <foo@bar.com> -> foo@bar.com
|
||||
if cleaned.startswith("<") and cleaned.endswith(">"):
|
||||
cleaned = cleaned[1:-1].strip()
|
||||
|
||||
# Strip surrounding double quotes: "foo@bar.com" -> foo@bar.com
|
||||
if len(cleaned) >= 2 and cleaned[0] == '"' and cleaned[-1] == '"':
|
||||
cleaned = cleaned[1:-1].strip()
|
||||
# Strip surrounding single quotes: 'foo@bar.com' -> foo@bar.com
|
||||
elif len(cleaned) >= 2 and cleaned[0] == "'" and cleaned[-1] == "'":
|
||||
cleaned = cleaned[1:-1].strip()
|
||||
|
||||
cleaned = cleaned.lower()
|
||||
|
||||
# Validate basic shape
|
||||
at_count = cleaned.count("@")
|
||||
if at_count != 1:
|
||||
raise ValueError(
|
||||
f"invalid email after cleaning: {raw!r} -> {cleaned!r}"
|
||||
)
|
||||
|
||||
local, domain = cleaned.split("@", 1)
|
||||
|
||||
if not local:
|
||||
raise ValueError(
|
||||
f"invalid email after cleaning: {raw!r} -> {cleaned!r}"
|
||||
)
|
||||
|
||||
if "." not in domain:
|
||||
raise ValueError(
|
||||
f"invalid email after cleaning: {raw!r} -> {cleaned!r}"
|
||||
)
|
||||
|
||||
return cleaned
|
||||
@@ -0,0 +1,43 @@
|
||||
"""Tests para clean_email."""
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
|
||||
import pytest
|
||||
from clean_email import clean_email
|
||||
|
||||
|
||||
def test_angle_brackets_y_whitespace_se_limpian():
|
||||
assert clean_email(" <foo@bar.com>") == "foo@bar.com"
|
||||
|
||||
|
||||
def test_comillas_dobles_y_mayusculas_se_normalizan():
|
||||
assert clean_email('"BAR@BAZ.com"') == "bar@baz.com"
|
||||
|
||||
|
||||
def test_whitespace_puro_se_limpia_y_lowercase():
|
||||
assert clean_email(" Mixed.Case@Example.COM ") == "mixed.case@example.com"
|
||||
|
||||
|
||||
def test_email_sin_arroba_lanza_valueerror():
|
||||
with pytest.raises(ValueError):
|
||||
clean_email("not-an-email")
|
||||
|
||||
|
||||
def test_string_vacio_lanza_valueerror():
|
||||
with pytest.raises(ValueError):
|
||||
clean_email("")
|
||||
|
||||
|
||||
def test_email_sin_punto_en_dominio_lanza_valueerror():
|
||||
with pytest.raises(ValueError):
|
||||
clean_email("a@b")
|
||||
|
||||
|
||||
def test_comillas_simples_se_limpian():
|
||||
assert clean_email("'user@domain.es'") == "user@domain.es"
|
||||
|
||||
|
||||
def test_angle_brackets_con_espacio_interior():
|
||||
assert clean_email("< user@example.com >") == "user@example.com"
|
||||
Reference in New Issue
Block a user