Files
fn_registry/python/functions/datascience/build_geo_scatter_test.py
T
egutierrez cd658cc703 feat(eda): primitivas geoespaciales del grupo eda (detección lat/lon + extensión + scatter)
Tres funciones puras nuevas del dominio datascience (tags eda + geospatial) que
sostienen el capítulo GEOSPATIAL del AutomaticEDA, delegadas a fn-constructor:

- detect_latlon_columns: identifica el par (lat, lon) por nombre de columna +
  rango de valores ([-90,90] / [-180,180]) desde profile['columns']. Devuelve
  {lat_col, lon_col, confidence, reason}. 9 tests.
- analyze_geo_extent: bbox, centroide, span haversine, conteo por zona/país
  (lookup offline con bounding boxes embebidos, KISS sin geopandas) y
  hemisferios. 7 tests.
- build_geo_scatter: prepara los puntos del scatter en orden [lon, lat] con
  downsampling determinista por paso fijo + aspect equirectangular 1/cos(lat)
  clampado. 6 tests.

Registradas en datascience/__init__.py. Todas pure, params_schema completo,
.md autosuficiente (Ejemplo + Cuando usarla + Gotchas).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-30 15:29:33 +02:00

141 lines
4.8 KiB
Python

"""Tests para build_geo_scatter."""
import math
import os
import sys
sys.path.insert(0, os.path.dirname(__file__))
from build_geo_scatter import build_geo_scatter
# Keys that a non-empty result dict must always contain.
_EXPECTED_KEYS = {
"points", "n_total", "n_shown", "downsampled", "bbox", "aspect", "pad",
}
def test_geo_scatter_nube_espana():
"""Golden: nube en Espana -> points en orden [lon, lat], bbox, aspect>1, pad 5%."""
# Cuatro puntos alrededor de Madrid (lat ~40, lon negativo).
lats = [40.0, 41.0, 39.0, 40.5]
lons = [-3.7, -3.0, -4.0, -3.5]
r = build_geo_scatter(lats, lons)
assert set(r.keys()) == _EXPECTED_KEYS
# points en orden [x=lon, y=lat]: primer elemento lon (negativo), segundo lat (~40).
assert r["points"] == [[-3.7, 40.0], [-3.0, 41.0], [-4.0, 39.0], [-3.5, 40.5]]
for lon, lat in r["points"]:
assert lon < 0.0 # longitudes de Espana son negativas
assert 36.0 < lat < 44.0 # latitudes peninsulares
# Sin downsampling: 4 < 2000.
assert r["n_total"] == 4
assert r["n_shown"] == 4
assert r["downsampled"] is False
# bbox correcto.
assert r["bbox"] == {
"lat_min": 39.0, "lat_max": 41.0,
"lon_min": -4.0, "lon_max": -3.0,
}
# aspect = 1/cos(centroid_lat); centroid = 40.125 -> ~1.31 > 1.
centroid_lat = (40.0 + 41.0 + 39.0 + 40.5) / 4.0
expected_aspect = 1.0 / math.cos(math.radians(centroid_lat))
assert r["aspect"] > 1.0
assert abs(r["aspect"] - expected_aspect) < 1e-9
assert abs(r["aspect"] - 1.305) < 0.02 # cos(40) ~ 0.77
# pad 5% del rango (lon_range=1.0 -> 0.05 ; lat_range=2.0 -> 0.1).
assert abs(r["pad"]["lon"] - 0.05) < 1e-9
assert abs(r["pad"]["lat"] - 0.10) < 1e-9
def test_downsampling_determinista_y_reproducible():
"""Golden: 5000 puntos, max_points=2000 -> n_shown<=2000, downsampled, reproducible."""
lats = [40.0 + (i % 100) * 0.01 for i in range(5000)]
lons = [-3.0 - (i % 100) * 0.01 for i in range(5000)]
r1 = build_geo_scatter(lats, lons, max_points=2000)
assert r1["n_total"] == 5000
assert r1["n_shown"] <= 2000
assert r1["downsampled"] is True
# step = ceil(5000/2000) = 3 -> len(pairs[::3]) = 1667.
assert r1["n_shown"] == 1667
# Determinista: dos llamadas con la misma entrada dan exactamente lo mismo.
r2 = build_geo_scatter(lats, lons, max_points=2000)
assert r1 == r2
assert r1["points"] == r2["points"]
# El primer punto del downsample es el primer par valido (step parte de 0).
assert r1["points"][0] == [lons[0], lats[0]]
def test_listas_vacias_no_lanza():
"""Edge: listas vacias / None -> points [] sin lanzar."""
r = build_geo_scatter([], [])
assert r["points"] == []
assert r["n_total"] == 0
assert r["n_shown"] == 0
assert r["downsampled"] is False
assert r["bbox"] is None
assert r["aspect"] == 1.0
assert r["pad"] == {"lon": 0.0, "lat": 0.0}
# None como entrada tampoco lanza.
assert build_geo_scatter(None, None)["points"] == []
assert build_geo_scatter([40.0], None)["n_total"] == 0
assert build_geo_scatter(None, [-3.0])["n_total"] == 0
def test_un_solo_punto_pad_minimo_y_aspect_finito():
"""Edge: un solo punto -> pad minimo no cero, bbox degenerado, aspect finito."""
r = build_geo_scatter([40.0], [-3.7])
assert r["n_total"] == 1
assert r["n_shown"] == 1
assert r["points"] == [[-3.7, 40.0]]
assert r["downsampled"] is False
assert r["bbox"] == {
"lat_min": 40.0, "lat_max": 40.0,
"lon_min": -3.7, "lon_max": -3.7,
}
# rango 0 -> pad cae al floor minimo (no cero).
assert r["pad"]["lon"] == 0.01
assert r["pad"]["lat"] == 0.01
# aspect finito y dentro del clamp.
assert math.isfinite(r["aspect"])
assert 0.3 <= r["aspect"] <= 5.0
def test_filtra_none_nan_y_fuera_de_rango():
"""Edge: pares con None/NaN/fuera de rango se descartan por indice."""
nan = float("nan")
inf = float("inf")
# i=0 i=1 i=2 i=3 i=4 i=5 i=6
lats = [40.0, None, nan, 200.0, 41.0, 39.0, inf]
lons = [-3.0, -3.5, -3.6, -3.7, 999.0, -4.0, -2.0]
r = build_geo_scatter(lats, lons)
# Validos solo i=0 (40,-3.0) e i=5 (39,-4.0):
# i=1 lat None, i=2 lat NaN, i=3 lat 200 fuera de rango,
# i=4 lon 999 fuera de rango, i=6 lat inf.
assert r["n_total"] == 2
assert r["points"] == [[-3.0, 40.0], [-4.0, 39.0]]
assert r["bbox"] == {
"lat_min": 39.0, "lat_max": 40.0,
"lon_min": -4.0, "lon_max": -3.0,
}
def test_latitud_alta_aspect_clamped():
"""Edge: latitudes ~85 -> aspect clamped <= 5.0."""
r = build_geo_scatter([85.0, 85.0, 84.0], [10.0, 11.0, 9.0])
# cos(~84.7) ~ 0.093 -> 1/0.093 ~ 10.7 -> clamp a 5.0.
assert r["aspect"] <= 5.0
assert r["aspect"] == 5.0
assert math.isfinite(r["aspect"])