feat: extraccion masiva footprint_aurgi (41 funcs + 4 types + stack Docker geo)
Extrae al registry funciones del proyecto interno footprint_aurgi: - core (6): slugify_ascii, normalize_for_join, cp_provincia_es, infer_provincia_from_cp, safe_read_csv_fallback, csv_to_parquet_duckdb - geo puras (7): haversine_km, point_in_ring, point_in_polygon, point_in_polygons_bbox, polygon_bbox, extent_with_padding, distance_bucket - geo I/O (4): load_geojson_polygons, load_boundary_gdf, add_basemap_osm, add_basemap_with_timeout - valhalla client (4): valhalla_route, valhalla_isochrone, valhalla_isochrones_async, valhalla_matrix_1_to_n - datascience stats (7): trimmed_mean, geometric_mean, detect_distribution_type, best_central_tendency, summary_stats, kde_density_levels, alpha_shape_concave_hull - datascience fuzzy (3): fuzzy_merge_adaptive (rapidfuzz), words_to_dataset, remove_words_from_column - datascience viz (2): plot_kde_2d, plot_heatmap_log - infra (4): compress_pdf_ghostscript, render_table_page_pdfpages, add_header_logo, osm2pgsql_ingest - pipelines (4): setup_geo_stack_docker, compute_centers_reachability, generate_isochrones_by_zone, count_points_per_zone - types geo (4): LonLat, BBox, IsochroneRequest, Centro Incluye: - apps/footprint_geo_stack/ (PostGIS + Martin + Valhalla via docker-compose) - 131/132 tests pasan (1 skip esperado: osm2pgsql en PATH) - Issue tracker dev/issues/0052-footprint-aurgi-extraction.md - Atribucion uniforme: source_repo internal:footprint_aurgi, source_license internal-aurgi - Build con 9 agentes en paralelo (8 wave 1 + 1 wave 2 pipelines) Tambien commitea trabajo previo no commiteado: aggregate_extraction_results, chunk_with_overlap, clean_pdf_text, merge_entity_aliases, extract_graph_gliner2, extract_relations_mrebel, extract_triples_spacy_es, gliner2/mrebel/marianmt/rebel/spacy_es load_model, parse_rebel_output, translate_es_to_en, issue 0050/0051. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,45 @@
|
||||
"""Tests para add_header_logo."""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import matplotlib
|
||||
matplotlib.use("Agg")
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
def test_figura_nueva_con_imagen_zeros_no_lanza_excepcion():
|
||||
"""figura nueva con imagen zeros no lanza excepcion"""
|
||||
import sys
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
|
||||
from infra.add_header_logo import add_header_logo
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
fig, ax = plt.subplots(figsize=(11.69, 8.27))
|
||||
image = np.zeros((50, 200, 3), dtype=np.uint8)
|
||||
|
||||
# Should not raise
|
||||
add_header_logo(fig, image)
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
def test_axes_de_logo_tiene_axis_off():
|
||||
"""axes de logo tiene axis off"""
|
||||
import sys
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
|
||||
from infra.add_header_logo import add_header_logo
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
fig, ax = plt.subplots(figsize=(11.69, 8.27))
|
||||
initial_axes_count = len(fig.axes)
|
||||
image = np.zeros((10, 10, 3), dtype=np.uint8)
|
||||
|
||||
add_header_logo(fig, image, x=0.88, y=0.905, width=0.08, height=0.08)
|
||||
|
||||
# A new axes should have been added
|
||||
assert len(fig.axes) == initial_axes_count + 1
|
||||
logo_ax = fig.axes[-1]
|
||||
# axis("off") disables both x and y axis visibility
|
||||
assert not logo_ax.axison
|
||||
plt.close(fig)
|
||||
@@ -0,0 +1,62 @@
|
||||
"""Tests para compress_pdf_ghostscript."""
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def _make_simple_pdf(path: Path) -> None:
|
||||
"""Create a minimal valid PDF using fpdf2."""
|
||||
try:
|
||||
from fpdf import FPDF
|
||||
pdf = FPDF()
|
||||
pdf.add_page()
|
||||
pdf.set_font("Helvetica", size=12)
|
||||
pdf.cell(200, 10, text="Test PDF for ghostscript compression", ln=True)
|
||||
pdf.output(str(path))
|
||||
except ImportError:
|
||||
# Fallback: write a minimal PDF manually
|
||||
content = (
|
||||
b"%PDF-1.4\n"
|
||||
b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n"
|
||||
b"2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n"
|
||||
b"3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >>\nendobj\n"
|
||||
b"xref\n0 4\n0000000000 65535 f \n0000000009 00000 n \n"
|
||||
b"0000000068 00000 n \n0000000125 00000 n \n"
|
||||
b"trailer\n<< /Size 4 /Root 1 0 R >>\nstartxref\n210\n%%EOF\n"
|
||||
)
|
||||
path.write_bytes(content)
|
||||
|
||||
|
||||
def test_crea_pdf_temporal_y_comprime_retorna_bool_sin_excepcion():
|
||||
"""crea pdf temporal y comprime - retorna bool sin excepcion"""
|
||||
import sys
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
|
||||
from infra.compress_pdf_ghostscript import compress_pdf_ghostscript
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
pdf_path = Path(tmpdir) / "test.pdf"
|
||||
_make_simple_pdf(pdf_path)
|
||||
assert pdf_path.exists()
|
||||
result = compress_pdf_ghostscript(pdf_path)
|
||||
assert isinstance(result, bool)
|
||||
# File must still exist regardless of whether compression happened
|
||||
assert pdf_path.exists()
|
||||
|
||||
|
||||
def test_retorna_False_cuando_gs_no_esta_disponible(monkeypatch):
|
||||
"""retorna False cuando gs no esta disponible"""
|
||||
import sys
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
|
||||
from infra.compress_pdf_ghostscript import compress_pdf_ghostscript
|
||||
|
||||
monkeypatch.setattr("shutil.which", lambda x: None)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
pdf_path = Path(tmpdir) / "test.pdf"
|
||||
_make_simple_pdf(pdf_path)
|
||||
result = compress_pdf_ghostscript(pdf_path)
|
||||
assert result is False
|
||||
@@ -0,0 +1,38 @@
|
||||
"""Tests para osm2pgsql_ingest."""
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def test_lanza_FileNotFoundError_con_path_inexistente():
|
||||
"""lanza FileNotFoundError con path inexistente"""
|
||||
import sys
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
|
||||
from infra.osm2pgsql_ingest import osm2pgsql_ingest
|
||||
|
||||
with pytest.raises(FileNotFoundError):
|
||||
osm2pgsql_ingest("/tmp/non_existent_file_that_does_not_exist.osm.pbf")
|
||||
|
||||
|
||||
def test_lanza_RuntimeError_si_osm2pgsql_no_esta_en_PATH():
|
||||
"""lanza RuntimeError si osm2pgsql no esta en PATH"""
|
||||
import sys
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
|
||||
from infra.osm2pgsql_ingest import osm2pgsql_ingest
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
pbf_path = Path(tmpdir) / "fake.osm.pbf"
|
||||
# Create a dummy file so FileNotFoundError is not raised first
|
||||
pbf_path.write_bytes(b"PBF")
|
||||
|
||||
# Skip test if osm2pgsql is actually in PATH (CI environment may have it)
|
||||
if shutil.which("osm2pgsql") is not None:
|
||||
pytest.skip("osm2pgsql is available in PATH; skipping RuntimeError test")
|
||||
|
||||
with pytest.raises(RuntimeError, match="osm2pgsql"):
|
||||
osm2pgsql_ingest(pbf_path)
|
||||
@@ -0,0 +1,53 @@
|
||||
"""Tests para render_table_page_pdfpages."""
|
||||
from __future__ import annotations
|
||||
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import matplotlib
|
||||
matplotlib.use("Agg")
|
||||
import pytest
|
||||
|
||||
|
||||
def test_50_filas_con_max_rows_28_genera_2_paginas_en_pdf_no_vacio():
|
||||
"""50 filas con max_rows=28 genera 2 paginas en pdf no vacio"""
|
||||
import sys
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
|
||||
from infra.render_table_page_pdfpages import render_table_page_pdfpages
|
||||
from matplotlib.backends.backend_pdf import PdfPages
|
||||
|
||||
rows = [[str(i), f"valor_{i}", f"extra_{i}"] for i in range(50)]
|
||||
col_labels = ["ID", "Valor", "Extra"]
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
pdf_path = Path(tmpdir) / "test_table.pdf"
|
||||
with PdfPages(str(pdf_path)) as pdf:
|
||||
render_table_page_pdfpages(pdf, "Test Tabla", rows, col_labels, max_rows=28)
|
||||
|
||||
assert pdf_path.exists()
|
||||
assert pdf_path.stat().st_size > 0
|
||||
|
||||
# Verify 2 pages were generated by reading PDF metadata
|
||||
try:
|
||||
from pypdf import PdfReader
|
||||
reader = PdfReader(str(pdf_path))
|
||||
assert len(reader.pages) == 2
|
||||
except ImportError:
|
||||
# If pypdf not available, just check file size
|
||||
assert pdf_path.stat().st_size > 1000
|
||||
|
||||
|
||||
def test_0_filas_genera_1_pagina_vacia_sin_excepcion():
|
||||
"""0 filas genera 1 pagina vacia sin excepcion"""
|
||||
import sys
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
|
||||
from infra.render_table_page_pdfpages import render_table_page_pdfpages
|
||||
from matplotlib.backends.backend_pdf import PdfPages
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
pdf_path = Path(tmpdir) / "empty_table.pdf"
|
||||
with PdfPages(str(pdf_path)) as pdf:
|
||||
render_table_page_pdfpages(pdf, "Vacío", [], ["Col1", "Col2"])
|
||||
|
||||
assert pdf_path.exists()
|
||||
assert pdf_path.stat().st_size > 0
|
||||
Reference in New Issue
Block a user