cd658cc703
Tres funciones puras nuevas del dominio datascience (tags eda + geospatial) que
sostienen el capítulo GEOSPATIAL del AutomaticEDA, delegadas a fn-constructor:
- detect_latlon_columns: identifica el par (lat, lon) por nombre de columna +
rango de valores ([-90,90] / [-180,180]) desde profile['columns']. Devuelve
{lat_col, lon_col, confidence, reason}. 9 tests.
- analyze_geo_extent: bbox, centroide, span haversine, conteo por zona/país
(lookup offline con bounding boxes embebidos, KISS sin geopandas) y
hemisferios. 7 tests.
- build_geo_scatter: prepara los puntos del scatter en orden [lon, lat] con
downsampling determinista por paso fijo + aspect equirectangular 1/cos(lat)
clampado. 6 tests.
Registradas en datascience/__init__.py. Todas pure, params_schema completo,
.md autosuficiente (Ejemplo + Cuando usarla + Gotchas).
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
141 lines
4.8 KiB
Python
141 lines
4.8 KiB
Python
"""Tests para build_geo_scatter."""
|
|
|
|
import math
|
|
import os
|
|
import sys
|
|
|
|
sys.path.insert(0, os.path.dirname(__file__))
|
|
|
|
from build_geo_scatter import build_geo_scatter
|
|
|
|
# Keys that a non-empty result dict must always contain.
|
|
_EXPECTED_KEYS = {
|
|
"points", "n_total", "n_shown", "downsampled", "bbox", "aspect", "pad",
|
|
}
|
|
|
|
|
|
def test_geo_scatter_nube_espana():
|
|
"""Golden: nube en Espana -> points en orden [lon, lat], bbox, aspect>1, pad 5%."""
|
|
# Cuatro puntos alrededor de Madrid (lat ~40, lon negativo).
|
|
lats = [40.0, 41.0, 39.0, 40.5]
|
|
lons = [-3.7, -3.0, -4.0, -3.5]
|
|
r = build_geo_scatter(lats, lons)
|
|
|
|
assert set(r.keys()) == _EXPECTED_KEYS
|
|
|
|
# points en orden [x=lon, y=lat]: primer elemento lon (negativo), segundo lat (~40).
|
|
assert r["points"] == [[-3.7, 40.0], [-3.0, 41.0], [-4.0, 39.0], [-3.5, 40.5]]
|
|
for lon, lat in r["points"]:
|
|
assert lon < 0.0 # longitudes de Espana son negativas
|
|
assert 36.0 < lat < 44.0 # latitudes peninsulares
|
|
|
|
# Sin downsampling: 4 < 2000.
|
|
assert r["n_total"] == 4
|
|
assert r["n_shown"] == 4
|
|
assert r["downsampled"] is False
|
|
|
|
# bbox correcto.
|
|
assert r["bbox"] == {
|
|
"lat_min": 39.0, "lat_max": 41.0,
|
|
"lon_min": -4.0, "lon_max": -3.0,
|
|
}
|
|
|
|
# aspect = 1/cos(centroid_lat); centroid = 40.125 -> ~1.31 > 1.
|
|
centroid_lat = (40.0 + 41.0 + 39.0 + 40.5) / 4.0
|
|
expected_aspect = 1.0 / math.cos(math.radians(centroid_lat))
|
|
assert r["aspect"] > 1.0
|
|
assert abs(r["aspect"] - expected_aspect) < 1e-9
|
|
assert abs(r["aspect"] - 1.305) < 0.02 # cos(40) ~ 0.77
|
|
|
|
# pad 5% del rango (lon_range=1.0 -> 0.05 ; lat_range=2.0 -> 0.1).
|
|
assert abs(r["pad"]["lon"] - 0.05) < 1e-9
|
|
assert abs(r["pad"]["lat"] - 0.10) < 1e-9
|
|
|
|
|
|
def test_downsampling_determinista_y_reproducible():
|
|
"""Golden: 5000 puntos, max_points=2000 -> n_shown<=2000, downsampled, reproducible."""
|
|
lats = [40.0 + (i % 100) * 0.01 for i in range(5000)]
|
|
lons = [-3.0 - (i % 100) * 0.01 for i in range(5000)]
|
|
|
|
r1 = build_geo_scatter(lats, lons, max_points=2000)
|
|
|
|
assert r1["n_total"] == 5000
|
|
assert r1["n_shown"] <= 2000
|
|
assert r1["downsampled"] is True
|
|
# step = ceil(5000/2000) = 3 -> len(pairs[::3]) = 1667.
|
|
assert r1["n_shown"] == 1667
|
|
|
|
# Determinista: dos llamadas con la misma entrada dan exactamente lo mismo.
|
|
r2 = build_geo_scatter(lats, lons, max_points=2000)
|
|
assert r1 == r2
|
|
assert r1["points"] == r2["points"]
|
|
|
|
# El primer punto del downsample es el primer par valido (step parte de 0).
|
|
assert r1["points"][0] == [lons[0], lats[0]]
|
|
|
|
|
|
def test_listas_vacias_no_lanza():
|
|
"""Edge: listas vacias / None -> points [] sin lanzar."""
|
|
r = build_geo_scatter([], [])
|
|
assert r["points"] == []
|
|
assert r["n_total"] == 0
|
|
assert r["n_shown"] == 0
|
|
assert r["downsampled"] is False
|
|
assert r["bbox"] is None
|
|
assert r["aspect"] == 1.0
|
|
assert r["pad"] == {"lon": 0.0, "lat": 0.0}
|
|
|
|
# None como entrada tampoco lanza.
|
|
assert build_geo_scatter(None, None)["points"] == []
|
|
assert build_geo_scatter([40.0], None)["n_total"] == 0
|
|
assert build_geo_scatter(None, [-3.0])["n_total"] == 0
|
|
|
|
|
|
def test_un_solo_punto_pad_minimo_y_aspect_finito():
|
|
"""Edge: un solo punto -> pad minimo no cero, bbox degenerado, aspect finito."""
|
|
r = build_geo_scatter([40.0], [-3.7])
|
|
|
|
assert r["n_total"] == 1
|
|
assert r["n_shown"] == 1
|
|
assert r["points"] == [[-3.7, 40.0]]
|
|
assert r["downsampled"] is False
|
|
assert r["bbox"] == {
|
|
"lat_min": 40.0, "lat_max": 40.0,
|
|
"lon_min": -3.7, "lon_max": -3.7,
|
|
}
|
|
# rango 0 -> pad cae al floor minimo (no cero).
|
|
assert r["pad"]["lon"] == 0.01
|
|
assert r["pad"]["lat"] == 0.01
|
|
# aspect finito y dentro del clamp.
|
|
assert math.isfinite(r["aspect"])
|
|
assert 0.3 <= r["aspect"] <= 5.0
|
|
|
|
|
|
def test_filtra_none_nan_y_fuera_de_rango():
|
|
"""Edge: pares con None/NaN/fuera de rango se descartan por indice."""
|
|
nan = float("nan")
|
|
inf = float("inf")
|
|
# i=0 i=1 i=2 i=3 i=4 i=5 i=6
|
|
lats = [40.0, None, nan, 200.0, 41.0, 39.0, inf]
|
|
lons = [-3.0, -3.5, -3.6, -3.7, 999.0, -4.0, -2.0]
|
|
r = build_geo_scatter(lats, lons)
|
|
|
|
# Validos solo i=0 (40,-3.0) e i=5 (39,-4.0):
|
|
# i=1 lat None, i=2 lat NaN, i=3 lat 200 fuera de rango,
|
|
# i=4 lon 999 fuera de rango, i=6 lat inf.
|
|
assert r["n_total"] == 2
|
|
assert r["points"] == [[-3.0, 40.0], [-4.0, 39.0]]
|
|
assert r["bbox"] == {
|
|
"lat_min": 39.0, "lat_max": 40.0,
|
|
"lon_min": -4.0, "lon_max": -3.0,
|
|
}
|
|
|
|
|
|
def test_latitud_alta_aspect_clamped():
|
|
"""Edge: latitudes ~85 -> aspect clamped <= 5.0."""
|
|
r = build_geo_scatter([85.0, 85.0, 84.0], [10.0, 11.0, 9.0])
|
|
# cos(~84.7) ~ 0.093 -> 1/0.093 ~ 10.7 -> clamp a 5.0.
|
|
assert r["aspect"] <= 5.0
|
|
assert r["aspect"] == 5.0
|
|
assert math.isfinite(r["aspect"])
|