feat: extraccion masiva footprint_aurgi (41 funcs + 4 types + stack Docker geo)
Extrae al registry funciones del proyecto interno footprint_aurgi: - core (6): slugify_ascii, normalize_for_join, cp_provincia_es, infer_provincia_from_cp, safe_read_csv_fallback, csv_to_parquet_duckdb - geo puras (7): haversine_km, point_in_ring, point_in_polygon, point_in_polygons_bbox, polygon_bbox, extent_with_padding, distance_bucket - geo I/O (4): load_geojson_polygons, load_boundary_gdf, add_basemap_osm, add_basemap_with_timeout - valhalla client (4): valhalla_route, valhalla_isochrone, valhalla_isochrones_async, valhalla_matrix_1_to_n - datascience stats (7): trimmed_mean, geometric_mean, detect_distribution_type, best_central_tendency, summary_stats, kde_density_levels, alpha_shape_concave_hull - datascience fuzzy (3): fuzzy_merge_adaptive (rapidfuzz), words_to_dataset, remove_words_from_column - datascience viz (2): plot_kde_2d, plot_heatmap_log - infra (4): compress_pdf_ghostscript, render_table_page_pdfpages, add_header_logo, osm2pgsql_ingest - pipelines (4): setup_geo_stack_docker, compute_centers_reachability, generate_isochrones_by_zone, count_points_per_zone - types geo (4): LonLat, BBox, IsochroneRequest, Centro Incluye: - apps/footprint_geo_stack/ (PostGIS + Martin + Valhalla via docker-compose) - 131/132 tests pasan (1 skip esperado: osm2pgsql en PATH) - Issue tracker dev/issues/0052-footprint-aurgi-extraction.md - Atribucion uniforme: source_repo internal:footprint_aurgi, source_license internal-aurgi - Build con 9 agentes en paralelo (8 wave 1 + 1 wave 2 pipelines) Tambien commitea trabajo previo no commiteado: aggregate_extraction_results, chunk_with_overlap, clean_pdf_text, merge_entity_aliases, extract_graph_gliner2, extract_relations_mrebel, extract_triples_spacy_es, gliner2/mrebel/marianmt/rebel/spacy_es load_model, parse_rebel_output, translate_es_to_en, issue 0050/0051. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,22 @@
|
||||
"""Tests para add_basemap_osm."""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import matplotlib
|
||||
matplotlib.use("Agg")
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
||||
|
||||
from geo.add_basemap_osm import add_basemap_osm
|
||||
|
||||
|
||||
def test_no_lanza_excepcion_con_Axes_valido():
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
fig, ax = plt.subplots()
|
||||
ax.set_xlim(-430000, -350000)
|
||||
ax.set_ylim(4500000, 4600000)
|
||||
# Must not raise regardless of network availability
|
||||
add_basemap_osm(ax, zoom=5)
|
||||
plt.close(fig)
|
||||
@@ -0,0 +1,23 @@
|
||||
"""Tests para add_basemap_with_timeout."""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import matplotlib
|
||||
matplotlib.use("Agg")
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
||||
|
||||
from geo.add_basemap_with_timeout import add_basemap_with_timeout
|
||||
|
||||
|
||||
def test_timeout_muy_corto_retorna_False_sin_colgar():
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
fig, ax = plt.subplots()
|
||||
ax.set_xlim(-430000, -350000)
|
||||
ax.set_ylim(4500000, 4600000)
|
||||
# 0.001 s timeout — should fail/timeout fast and return False
|
||||
result = add_basemap_with_timeout(ax, zoom=9, timeout_s=0.001)
|
||||
plt.close(fig)
|
||||
assert result is False, f"expected False with 0.001s timeout, got {result}"
|
||||
@@ -0,0 +1,25 @@
|
||||
"""Tests para distance_bucket."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "..", ".."))
|
||||
|
||||
from python.functions.geo.distance_bucket import distance_bucket
|
||||
|
||||
|
||||
def test_bucket_0_5():
|
||||
assert distance_bucket(3.0) == "0-5"
|
||||
|
||||
|
||||
def test_bucket_5_10():
|
||||
assert distance_bucket(7.0) == "5-10"
|
||||
|
||||
|
||||
def test_bucket_borde_exacto():
|
||||
# 10 <= 10 → "5-10"
|
||||
assert distance_bucket(10.0) == "5-10"
|
||||
|
||||
|
||||
def test_bucket_160_mas():
|
||||
assert distance_bucket(200.0) == "160+"
|
||||
@@ -0,0 +1,19 @@
|
||||
"""Tests para extent_with_padding."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "..", ".."))
|
||||
|
||||
from python.functions.geo.extent_with_padding import extent_with_padding
|
||||
|
||||
|
||||
def test_bbox_cuadrado_con_10_pct():
|
||||
result = extent_with_padding((0.0, 0.0, 10.0, 10.0), 0.1)
|
||||
assert result == (-1.0, 11.0, -1.0, 11.0)
|
||||
|
||||
|
||||
def test_pad_ratio_cero_no_cambia():
|
||||
bounds = (2.0, 3.0, 8.0, 9.0)
|
||||
result = extent_with_padding(bounds, 0.0)
|
||||
assert result == (2.0, 8.0, 3.0, 9.0)
|
||||
@@ -0,0 +1,18 @@
|
||||
"""Tests para haversine_km."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "..", ".."))
|
||||
|
||||
from python.functions.geo.haversine_km import haversine_km
|
||||
|
||||
|
||||
def test_madrid_barcelona_aproximado():
|
||||
d = haversine_km(-3.7038, 40.4168, 2.1686, 41.3874)
|
||||
assert abs(d - 504.0) < 2.0, f"Esperado ~504 km, got {d:.1f}"
|
||||
|
||||
|
||||
def test_misma_coordenada_es_cero():
|
||||
d = haversine_km(0.0, 0.0, 0.0, 0.0)
|
||||
assert d == 0.0, f"Misma coordenada debe ser 0, got {d}"
|
||||
@@ -0,0 +1,61 @@
|
||||
"""Tests para load_boundary_gdf."""
|
||||
|
||||
import json
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
||||
|
||||
from geo.load_boundary_gdf import load_boundary_gdf
|
||||
|
||||
|
||||
def _write_geojson(data: dict) -> Path:
|
||||
f = tempfile.NamedTemporaryFile(
|
||||
mode="w", suffix=".geojson", delete=False, encoding="utf-8"
|
||||
)
|
||||
json.dump(data, f)
|
||||
f.close()
|
||||
return Path(f.name)
|
||||
|
||||
|
||||
def test_retorna_GeoDataFrame_con_CRS_EPSG4326():
|
||||
geojson = {
|
||||
"type": "FeatureCollection",
|
||||
"features": [
|
||||
{
|
||||
"type": "Feature",
|
||||
"geometry": {
|
||||
"type": "Polygon",
|
||||
"coordinates": [
|
||||
[
|
||||
[-3.7, 40.4],
|
||||
[-3.6, 40.4],
|
||||
[-3.6, 40.5],
|
||||
[-3.7, 40.5],
|
||||
[-3.7, 40.4],
|
||||
]
|
||||
],
|
||||
},
|
||||
"properties": {"name": "test"},
|
||||
}
|
||||
],
|
||||
}
|
||||
path = _write_geojson(geojson)
|
||||
try:
|
||||
gdf = load_boundary_gdf(path, crs="EPSG:4326")
|
||||
import geopandas as gpd # type: ignore
|
||||
|
||||
assert isinstance(gdf, gpd.GeoDataFrame), "result should be a GeoDataFrame"
|
||||
assert gdf.crs is not None, "CRS should be set"
|
||||
assert gdf.crs.to_epsg() == 4326, f"expected EPSG:4326, got {gdf.crs}"
|
||||
assert len(gdf) == 1, f"expected 1 feature, got {len(gdf)}"
|
||||
finally:
|
||||
path.unlink(missing_ok=True)
|
||||
|
||||
|
||||
def test_archivo_inexistente_lanza_FileNotFoundError():
|
||||
import pytest
|
||||
|
||||
with pytest.raises(FileNotFoundError):
|
||||
load_boundary_gdf("/tmp/this_file_does_not_exist_xyz.geojson")
|
||||
@@ -0,0 +1,59 @@
|
||||
"""Tests para load_geojson_polygons."""
|
||||
|
||||
import json
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
||||
|
||||
from geo.load_geojson_polygons import load_geojson_polygons
|
||||
|
||||
|
||||
def _write_geojson(data: dict) -> Path:
|
||||
f = tempfile.NamedTemporaryFile(
|
||||
mode="w", suffix=".geojson", delete=False, encoding="utf-8"
|
||||
)
|
||||
json.dump(data, f)
|
||||
f.close()
|
||||
return Path(f.name)
|
||||
|
||||
|
||||
def test_polygon_simple_produce_1_poligono_con_1_anillo():
|
||||
geojson = {
|
||||
"type": "FeatureCollection",
|
||||
"features": [
|
||||
{
|
||||
"type": "Feature",
|
||||
"geometry": {
|
||||
"type": "Polygon",
|
||||
"coordinates": [
|
||||
[
|
||||
[-3.7, 40.4],
|
||||
[-3.6, 40.4],
|
||||
[-3.6, 40.5],
|
||||
[-3.7, 40.5],
|
||||
[-3.7, 40.4],
|
||||
]
|
||||
],
|
||||
},
|
||||
"properties": {},
|
||||
}
|
||||
],
|
||||
}
|
||||
path = _write_geojson(geojson)
|
||||
try:
|
||||
result = load_geojson_polygons(path)
|
||||
assert len(result) == 1, f"expected 1 polygon, got {len(result)}"
|
||||
assert len(result[0]) == 1, "expected 1 ring"
|
||||
assert len(result[0][0]) >= 4, "ring should have >= 4 points"
|
||||
assert isinstance(result[0][0][0], tuple), "points should be tuples"
|
||||
finally:
|
||||
path.unlink(missing_ok=True)
|
||||
|
||||
|
||||
def test_archivo_inexistente_lanza_FileNotFoundError():
|
||||
import pytest
|
||||
|
||||
with pytest.raises(FileNotFoundError):
|
||||
load_geojson_polygons("/tmp/this_file_does_not_exist_xyz.geojson")
|
||||
@@ -0,0 +1,29 @@
|
||||
"""Tests para point_in_polygon."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "..", ".."))
|
||||
|
||||
from python.functions.geo.point_in_polygon import point_in_polygon
|
||||
|
||||
OUTER = [(0.0, 0.0), (4.0, 0.0), (4.0, 4.0), (0.0, 4.0)]
|
||||
HOLE = [(1.0, 1.0), (3.0, 1.0), (3.0, 3.0), (1.0, 3.0)]
|
||||
|
||||
|
||||
def test_punto_en_exterior():
|
||||
# Punto en el anillo exterior, fuera del hole
|
||||
assert point_in_polygon(0.5, 0.5, [OUTER, HOLE]) is True
|
||||
|
||||
|
||||
def test_punto_en_hole():
|
||||
# Punto dentro del hole → False
|
||||
assert point_in_polygon(2.0, 2.0, [OUTER, HOLE]) is False
|
||||
|
||||
|
||||
def test_punto_fuera():
|
||||
assert point_in_polygon(10.0, 10.0, [OUTER, HOLE]) is False
|
||||
|
||||
|
||||
def test_poligono_vacio():
|
||||
assert point_in_polygon(0.5, 0.5, []) is False
|
||||
@@ -0,0 +1,25 @@
|
||||
"""Tests para point_in_polygons_bbox."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "..", ".."))
|
||||
|
||||
from python.functions.geo.point_in_polygons_bbox import point_in_polygons_bbox
|
||||
from python.functions.geo.polygon_bbox import polygon_bbox
|
||||
|
||||
P1 = [[(0.0, 0.0), (1.0, 0.0), (1.0, 1.0), (0.0, 1.0)]]
|
||||
P2 = [[(5.0, 5.0), (6.0, 5.0), (6.0, 6.0), (5.0, 6.0)]]
|
||||
BBOXES = [polygon_bbox(P1), polygon_bbox(P2)]
|
||||
|
||||
|
||||
def test_punto_en_primer_poligono():
|
||||
assert point_in_polygons_bbox(0.5, 0.5, [P1, P2], BBOXES) is True
|
||||
|
||||
|
||||
def test_punto_en_segundo_poligono():
|
||||
assert point_in_polygons_bbox(5.5, 5.5, [P1, P2], BBOXES) is True
|
||||
|
||||
|
||||
def test_punto_fuera_de_todos():
|
||||
assert point_in_polygons_bbox(10.0, 10.0, [P1, P2], BBOXES) is False
|
||||
@@ -0,0 +1,22 @@
|
||||
"""Tests para point_in_ring."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "..", ".."))
|
||||
|
||||
from python.functions.geo.point_in_ring import point_in_ring
|
||||
|
||||
SQUARE = [(0.0, 0.0), (1.0, 0.0), (1.0, 1.0), (0.0, 1.0)]
|
||||
|
||||
|
||||
def test_punto_dentro_cuadrado():
|
||||
assert point_in_ring(0.5, 0.5, SQUARE) is True
|
||||
|
||||
|
||||
def test_punto_fuera_cuadrado():
|
||||
assert point_in_ring(2.0, 2.0, SQUARE) is False
|
||||
|
||||
|
||||
def test_ring_menor_3_vertices():
|
||||
assert point_in_ring(0.0, 0.0, [(0.0, 0.0), (1.0, 1.0)]) is False
|
||||
@@ -0,0 +1,19 @@
|
||||
"""Tests para polygon_bbox."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "..", ".."))
|
||||
|
||||
from python.functions.geo.polygon_bbox import polygon_bbox
|
||||
|
||||
|
||||
def test_cuadrado_unitario():
|
||||
ring = [(0.0, 0.0), (1.0, 0.0), (1.0, 1.0), (0.0, 1.0)]
|
||||
assert polygon_bbox([ring]) == (0.0, 0.0, 1.0, 1.0)
|
||||
|
||||
|
||||
def test_poligono_con_hole():
|
||||
outer = [(0.0, 0.0), (5.0, 0.0), (5.0, 5.0), (0.0, 5.0)]
|
||||
hole = [(1.0, 1.0), (3.0, 1.0), (3.0, 3.0), (1.0, 3.0)]
|
||||
assert polygon_bbox([outer, hole]) == (0.0, 0.0, 5.0, 5.0)
|
||||
@@ -0,0 +1,36 @@
|
||||
"""Tests para valhalla_isochrone."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from valhalla_isochrone import valhalla_isochrone
|
||||
|
||||
|
||||
def _valhalla_alive(url: str = "http://localhost:8002") -> bool:
|
||||
try:
|
||||
r = httpx.get(f"{url}/status", timeout=2.0)
|
||||
return r.status_code < 500
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
VALHALLA_OK = _valhalla_alive()
|
||||
skip_if_no_valhalla = pytest.mark.skipif(
|
||||
not VALHALLA_OK, reason="Valhalla no activo en :8002"
|
||||
)
|
||||
|
||||
|
||||
@skip_if_no_valhalla
|
||||
def test_isócrona_10_min_madrid_contiene_features():
|
||||
"""isócrona 10 min Madrid contiene features"""
|
||||
gj = valhalla_isochrone(lat=40.4168, lon=-3.7038, minutes=10)
|
||||
assert gj is not None, "Esperaba GeoJSON, obtuvo None"
|
||||
assert "features" in gj, "GeoJSON no contiene 'features'"
|
||||
assert len(gj["features"]) > 0, "features está vacío"
|
||||
@@ -0,0 +1,43 @@
|
||||
"""Tests para valhalla_isochrones_async."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from valhalla_isochrones_async import valhalla_isochrones_async
|
||||
|
||||
|
||||
def _valhalla_alive(url: str = "http://localhost:8002") -> bool:
|
||||
try:
|
||||
r = httpx.get(f"{url}/status", timeout=2.0)
|
||||
return r.status_code < 500
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
VALHALLA_OK = _valhalla_alive()
|
||||
skip_if_no_valhalla = pytest.mark.skipif(
|
||||
not VALHALLA_OK, reason="Valhalla no activo en :8002"
|
||||
)
|
||||
|
||||
|
||||
@skip_if_no_valhalla
|
||||
def test_3_puntos_madrid_retornan_lista_de_3():
|
||||
"""3 puntos Madrid retornan lista de 3"""
|
||||
pts = [
|
||||
{"lat": 40.4168, "lon": -3.7038, "minutes": 10, "id": "sol"},
|
||||
{"lat": 40.4530, "lon": -3.6883, "minutes": 10, "id": "retiro"},
|
||||
{"lat": 40.4005, "lon": -3.7057, "minutes": 10, "id": "atocha"},
|
||||
]
|
||||
results = asyncio.run(valhalla_isochrones_async(pts))
|
||||
assert len(results) == 3, f"Esperaba 3 resultados, obtuvo {len(results)}"
|
||||
for i, gj in enumerate(results):
|
||||
assert gj is not None, f"Resultado {i} es None"
|
||||
assert "features" in gj, f"Resultado {i} no contiene 'features'"
|
||||
@@ -0,0 +1,46 @@
|
||||
"""Tests para valhalla_matrix_1_to_n."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from valhalla_matrix_1_to_n import valhalla_matrix_1_to_n
|
||||
|
||||
|
||||
def _valhalla_alive(url: str = "http://localhost:8002") -> bool:
|
||||
try:
|
||||
r = httpx.get(f"{url}/status", timeout=2.0)
|
||||
return r.status_code < 500
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
VALHALLA_OK = _valhalla_alive()
|
||||
skip_if_no_valhalla = pytest.mark.skipif(
|
||||
not VALHALLA_OK, reason="Valhalla no activo en :8002"
|
||||
)
|
||||
|
||||
|
||||
@skip_if_no_valhalla
|
||||
def test_matrix_1_origen_2_destinos_retorna_2_dicts_con_meters_mayor_0():
|
||||
"""matrix 1 origen 2 destinos retorna 2 dicts con meters > 0"""
|
||||
origins = [(40.4168, -3.7038)] # Madrid
|
||||
destinations = [
|
||||
(41.3874, 2.1686), # Barcelona
|
||||
(37.3886, -5.9823), # Sevilla
|
||||
]
|
||||
pairs = [(0, 0), (0, 1)]
|
||||
|
||||
results = valhalla_matrix_1_to_n(origins, destinations, pairs)
|
||||
assert len(results) == 2, f"Esperaba 2 resultados, obtuvo {len(results)}"
|
||||
for i, r in enumerate(results):
|
||||
assert r["error"] == 0, f"Par {i} tiene error={r['error']}"
|
||||
assert r["meters"] > 0, f"Par {i} tiene meters={r['meters']}"
|
||||
assert not math.isnan(r["seconds"]), f"Par {i} tiene seconds=NaN"
|
||||
@@ -0,0 +1,41 @@
|
||||
"""Tests para valhalla_route."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from valhalla_route import valhalla_route
|
||||
|
||||
|
||||
def _valhalla_alive(url: str = "http://localhost:8002") -> bool:
|
||||
try:
|
||||
r = httpx.get(f"{url}/status", timeout=2.0)
|
||||
return r.status_code < 500
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
VALHALLA_OK = _valhalla_alive()
|
||||
skip_if_no_valhalla = pytest.mark.skipif(
|
||||
not VALHALLA_OK, reason="Valhalla no activo en :8002"
|
||||
)
|
||||
|
||||
|
||||
@skip_if_no_valhalla
|
||||
def test_ruta_madrid_barcelona_supera_500_km():
|
||||
"""ruta Madrid-Barcelona supera 500 km"""
|
||||
result = valhalla_route(
|
||||
locations=[
|
||||
{"lat": 40.4168, "lon": -3.7038},
|
||||
{"lat": 41.3874, "lon": 2.1686},
|
||||
]
|
||||
)
|
||||
assert result is not None, "Esperaba respuesta, obtuvo None"
|
||||
summary = result["trip"]["summary"]
|
||||
assert summary["length"] > 500, f"Distancia {summary['length']} km < 500 km"
|
||||
Reference in New Issue
Block a user