feat: extraccion masiva footprint_aurgi (41 funcs + 4 types + stack Docker geo)

Extrae al registry funciones del proyecto interno footprint_aurgi: - core (6): slugify_ascii, normalize_for_join, cp_provincia_es, infer_provincia_from_cp, safe_read_csv_fallback, csv_to_parquet_duckdb - geo puras (7): haversine_km, point_in_ring, point_in_polygon, point_in_polygons_bbox, polygon_bbox, extent_with_padding, distance_bucket - geo I/O (4): load_geojson_polygons, load_boundary_gdf, add_basemap_osm, add_basemap_with_timeout - valhalla client (4): valhalla_route, valhalla_isochrone, valhalla_isochrones_async, valhalla_matrix_1_to_n - datascience stats (7): trimmed_mean, geometric_mean, detect_distribution_type, best_central_tendency, summary_stats, kde_density_levels, alpha_shape_concave_hull - datascience fuzzy (3): fuzzy_merge_adaptive (rapidfuzz), words_to_dataset, remove_words_from_column - datascience viz (2): plot_kde_2d, plot_heatmap_log - infra (4): compress_pdf_ghostscript, render_table_page_pdfpages, add_header_logo, osm2pgsql_ingest - pipelines (4): setup_geo_stack_docker, compute_centers_reachability, generate_isochrones_by_zone, count_points_per_zone - types geo (4): LonLat, BBox, IsochroneRequest, Centro Incluye: - apps/footprint_geo_stack/ (PostGIS + Martin + Valhalla via docker-compose) - 131/132 tests pasan (1 skip esperado: osm2pgsql en PATH) - Issue tracker dev/issues/0052-footprint-aurgi-extraction.md - Atribucion uniforme: source_repo internal:footprint_aurgi, source_license internal-aurgi - Build con 9 agentes en paralelo (8 wave 1 + 1 wave 2 pipelines) Tambien commitea trabajo previo no commiteado: aggregate_extraction_results, chunk_with_overlap, clean_pdf_text, merge_entity_aliases, extract_graph_gliner2, extract_relations_mrebel, extract_triples_spacy_es, gliner2/mrebel/marianmt/rebel/spacy_es load_model, parse_rebel_output, translate_es_to_en, issue 0050/0051. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-04 23:35:22 +02:00
parent f73ea072bd
commit faac610745
193 changed files with 13146 additions and 3 deletions
@@ -0,0 +1,58 @@
+---
+name: add_header_logo
+kind: function
+lang: py
+domain: infra
+version: "1.0.0"
+purity: impure
+signature: "add_header_logo(fig: Figure, image: np.ndarray, x: float = 0.88, y: float = 0.905, width: float = 0.08, height: float = 0.08) -> None"
+description: "Añade un logo como axes inset en la esquina superior derecha de una figura matplotlib. Usa fig.add_axes + imshow + axis off. Útil para branding en páginas de informe PDF."
+tags: [pdf, matplotlib, logo, report, infra]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: "error_go_core"
+imports: [matplotlib]
+params:
+  - name: fig
+    desc: "Figura matplotlib donde se inserta el logo."
+  - name: image
+    desc: "Array numpy H×W×C con los datos de imagen del logo (e.g. de imread o PIL)."
+  - name: x
+    desc: "Borde izquierdo del axes en coordenadas de figura (0-1). Default 0.88."
+  - name: y
+    desc: "Borde inferior del axes en coordenadas de figura (0-1). Default 0.905."
+  - name: width
+    desc: "Ancho del axes en coordenadas de figura (0-1). Default 0.08."
+  - name: height
+    desc: "Alto del axes en coordenadas de figura (0-1). Default 0.08."
+output: "None. Modifica la figura in-place añadiendo un axes con el logo."
+tested: true
+tests:
+  - "figura nueva con imagen zeros no lanza excepcion"
+  - "axes de logo tiene axis off"
+test_file_path: "python/functions/infra/tests/test_add_header_logo.py"
+file_path: "python/functions/infra/add_header_logo.py"
+source_repo: "internal:footprint_aurgi"
+source_license: "internal-aurgi"
+source_file: "ponderacion_isochronas/src/recomendador_centros.py"
+---
+
+## Ejemplo
+
+```python
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import numpy as np
+
+logo = np.zeros((50, 200, 3), dtype=np.uint8)  # o matplotlib.image.imread("logo.png")
+fig, ax = plt.subplots(figsize=(11.69, 8.27))
+add_header_logo(fig, logo)
+```
+
+## Notas
+
+La posición por defecto (x=0.88, y=0.905) coloca el logo en la esquina superior derecha
+para figuras A4 landscape. Ajustar x, y, width, height para otros tamaños.
@@ -0,0 +1,35 @@
+"""Add a logo image as an inset axes header to a matplotlib figure."""
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    import numpy as np
+    from matplotlib.figure import Figure
+
+
+def add_header_logo(
+    fig: "Figure",
+    image: "np.ndarray",
+    x: float = 0.88,
+    y: float = 0.905,
+    width: float = 0.08,
+    height: float = 0.08,
+) -> None:
+    """Add a logo image as an inset axes in the upper-right area of a figure.
+
+    Creates a new axes at the given figure coordinates and renders the image
+    with axis lines and ticks hidden. Suitable for branding in report pages.
+
+    Args:
+        fig: The matplotlib Figure to add the logo to.
+        image: Image array (H x W x C) as numpy ndarray, e.g. loaded with
+               matplotlib.image.imread or PIL.
+        x: Left edge of the logo axes in figure coordinates (0-1).
+        y: Bottom edge of the logo axes in figure coordinates (0-1).
+        width: Width of the logo axes in figure coordinates (0-1).
+        height: Height of the logo axes in figure coordinates (0-1).
+    """
+    ax_logo = fig.add_axes([x, y, width, height])
+    ax_logo.imshow(image)
+    ax_logo.axis("off")
@@ -0,0 +1,46 @@
+---
+name: compress_pdf_ghostscript
+kind: function
+lang: py
+domain: infra
+version: "1.0.0"
+purity: impure
+signature: "compress_pdf_ghostscript(pdf_path: str | Path, quality: str = 'screen') -> bool"
+description: "Comprime un PDF en disco usando Ghostscript con downsampling 96/200 dpi. Reemplaza el archivo solo si el comprimido es menor. Retorna True si comprimió, False si gs no disponible o no hubo mejora."
+tags: [pdf, ghostscript, compression, infra]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: "error_go_core"
+imports: [shutil, subprocess, tempfile, pathlib]
+params:
+  - name: pdf_path
+    desc: "Ruta al archivo PDF a comprimir. Se modifica en sitio si la compresión mejora el tamaño."
+  - name: quality
+    desc: "Perfil PDFSETTINGS de Ghostscript: screen (96 dpi), ebook, printer, prepress."
+output: "True si el archivo fue reemplazado por la versión comprimida, False si gs no está disponible, el archivo no existe, falló o el resultado no era menor."
+tested: true
+tests:
+  - "crea pdf temporal y comprime - retorna bool sin excepcion"
+  - "retorna False cuando gs no esta disponible"
+test_file_path: "python/functions/infra/tests/test_compress_pdf_ghostscript.py"
+file_path: "python/functions/infra/compress_pdf_ghostscript.py"
+source_repo: "internal:footprint_aurgi"
+source_license: "internal-aurgi"
+source_file: "ponderacion_isochronas/src/recomendador_centros.py"
+---
+
+## Ejemplo
+
+```python
+compressed = compress_pdf_ghostscript("report.pdf", quality="ebook")
+if compressed:
+    print("PDF comprimido correctamente")
+```
+
+## Notas
+
+Requiere `gs` (Ghostscript) en el PATH. Si no está disponible retorna False sin lanzar excepción.
+El perfil `screen` produce la mayor compresión (96 dpi), útil para distribución web.
+El reemplazo es atómico: el original no se toca si la compresión falla o no mejora el tamaño.
@@ -0,0 +1,63 @@
+"""Compress a PDF file in-place using Ghostscript."""
+from __future__ import annotations
+
+import shutil
+import subprocess
+import tempfile
+from pathlib import Path
+
+
+def compress_pdf_ghostscript(
+    pdf_path: "str | Path",
+    quality: str = "screen",
+) -> bool:
+    """Compress a PDF in-place using Ghostscript.
+
+    Runs gs with downsampling (96 dpi color/gray, 200 dpi mono). Replaces the
+    original file only when the compressed output is strictly smaller. Returns
+    True if the file was replaced, False if gs is not available, the file does
+    not exist, compression failed, or the output was not smaller.
+
+    Args:
+        pdf_path: Path to the PDF file to compress (modified in-place on success).
+        quality: Ghostscript PDFSETTINGS profile. One of "screen", "ebook",
+                 "printer", "prepress".
+
+    Returns:
+        True if the file was compressed and replaced, False otherwise.
+    """
+    path = Path(pdf_path)
+    gs = shutil.which("gs")
+    if not gs or not path.exists():
+        return False
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        compressed = Path(tmpdir) / "compressed.pdf"
+        cmd = [
+            gs,
+            "-sDEVICE=pdfwrite",
+            "-dCompatibilityLevel=1.4",
+            f"-dPDFSETTINGS=/{quality}",
+            "-dDownsampleColorImages=true",
+            "-dDownsampleGrayImages=true",
+            "-dDownsampleMonoImages=true",
+            "-dColorImageResolution=96",
+            "-dGrayImageResolution=96",
+            "-dMonoImageResolution=200",
+            "-dNOPAUSE",
+            "-dQUIET",
+            "-dBATCH",
+            f"-sOutputFile={compressed}",
+            str(path),
+        ]
+        try:
+            subprocess.run(cmd, check=True, capture_output=True)
+        except subprocess.CalledProcessError:
+            return False
+
+        if compressed.exists() and compressed.stat().st_size < path.stat().st_size:
+            import shutil as _sh
+            _sh.copy2(str(compressed), str(path))
+            return True
+
+    return False
@@ -0,0 +1,68 @@
+---
+name: osm2pgsql_ingest
+kind: function
+lang: py
+domain: infra
+version: "1.0.0"
+purity: impure
+signature: "osm2pgsql_ingest(osm_pbf_path: str | Path, host: str = 'localhost', port: int = 5432, dbname: str = 'gis', user: str = 'geoserver', password: str = 'geoserver', style: str | None = None, ensure_hstore: bool = True) -> dict"
+description: "Ingesta un archivo .osm.pbf en PostGIS usando osm2pgsql con --create --slim --hstore --multi-geometry. Verifica osm2pgsql en PATH, opcionalmente crea extensión hstore. Retorna dict {ok, rows_loaded, stderr}."
+tags: [osm, postgis, gis, osm2pgsql, infra]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: "error_go_core"
+imports: [os, shutil, subprocess, pathlib]
+params:
+  - name: osm_pbf_path
+    desc: "Ruta al archivo .osm.pbf a ingestar."
+  - name: host
+    desc: "Host de PostGIS (default: localhost)."
+  - name: port
+    desc: "Puerto de PostGIS (default: 5432)."
+  - name: dbname
+    desc: "Nombre de la base de datos PostGIS (default: gis)."
+  - name: user
+    desc: "Usuario de la base de datos (default: geoserver)."
+  - name: password
+    desc: "Contraseña de la base de datos (default: geoserver)."
+  - name: style
+    desc: "Ruta opcional a archivo .style de osm2pgsql. Si None usa el estilo por defecto."
+  - name: ensure_hstore
+    desc: "Si True, ejecuta psql para crear la extensión hstore antes de la ingesta."
+output: "dict con ok (bool), rows_loaded (int|None, siempre None porque osm2pgsql no reporta conteos), stderr (str con salida combinada stdout+stderr)."
+tested: true
+tests:
+  - "lanza FileNotFoundError con path inexistente"
+  - "lanza RuntimeError si osm2pgsql no esta en PATH"
+test_file_path: "python/functions/infra/tests/test_osm2pgsql_ingest.py"
+file_path: "python/functions/infra/osm2pgsql_ingest.py"
+source_repo: "internal:footprint_aurgi"
+source_license: "internal-aurgi"
+source_file: "better_maps/ingest_osm.py"
+---
+
+## Ejemplo
+
+```python
+result = osm2pgsql_ingest(
+    "data/spain-latest.osm.pbf",
+    host="localhost",
+    dbname="gis",
+    user="geoserver",
+    password="secret",
+)
+if result["ok"]:
+    print("Ingesta completada")
+else:
+    print(result["stderr"])
+```
+
+## Notas
+
+Requiere `osm2pgsql` en el PATH. Lanza RuntimeError si no está disponible.
+El campo `rows_loaded` siempre es None: osm2pgsql no reporta conteos de filas
+en su salida estándar. Para obtener conteos, consultar directamente las tablas
+planet_osm_* en PostGIS.
+La contraseña se pasa via PGPASSWORD en el entorno del subproceso.
@@ -0,0 +1,97 @@
+"""Ingest an OSM PBF file into a PostGIS database using osm2pgsql."""
+from __future__ import annotations
+
+import os
+import shutil
+import subprocess
+from pathlib import Path
+
+
+def osm2pgsql_ingest(
+    osm_pbf_path: "str | Path",
+    host: str = "localhost",
+    port: int = 5432,
+    dbname: str = "gis",
+    user: str = "geoserver",
+    password: str = "geoserver",
+    style: "str | None" = None,
+    ensure_hstore: bool = True,
+) -> dict:
+    """Ingest an OSM PBF file into PostGIS using osm2pgsql.
+
+    Verifies osm2pgsql is in PATH (raises RuntimeError if not). If
+    ensure_hstore=True, runs psql to CREATE EXTENSION IF NOT EXISTS hstore.
+    Then runs osm2pgsql with --create --slim --hstore --multi-geometry.
+
+    Args:
+        osm_pbf_path: Path to the .osm.pbf file to ingest.
+        host: PostGIS host (default: localhost).
+        port: PostGIS port (default: 5432).
+        dbname: Database name (default: gis).
+        user: Database user (default: geoserver).
+        password: Database password (default: geoserver).
+        style: Optional path to a .style file for osm2pgsql. If None, uses
+               osm2pgsql's built-in default style.
+        ensure_hstore: If True, create the hstore extension before ingesting.
+
+    Returns:
+        dict with keys:
+          - ok (bool): True if ingestion succeeded.
+          - rows_loaded (int | None): Not directly available from osm2pgsql stdout;
+            always None (osm2pgsql does not report row counts).
+          - stderr (str): Combined stdout+stderr output from osm2pgsql.
+
+    Raises:
+        RuntimeError: If osm2pgsql is not found in PATH.
+        FileNotFoundError: If osm_pbf_path does not exist.
+    """
+    pbf = Path(osm_pbf_path)
+    if not pbf.exists():
+        raise FileNotFoundError(f"OSM PBF file not found: {pbf}")
+
+    if shutil.which("osm2pgsql") is None:
+        raise RuntimeError(
+            "osm2pgsql not found in PATH. Install it before calling this function."
+        )
+
+    env = os.environ.copy()
+    env["PGPASSWORD"] = password
+
+    if ensure_hstore and shutil.which("psql") is not None:
+        psql_cmd = [
+            "psql",
+            f"--host={host}",
+            f"--port={port}",
+            f"--dbname={dbname}",
+            f"--username={user}",
+            "--command",
+            "CREATE EXTENSION IF NOT EXISTS hstore;",
+        ]
+        subprocess.run(psql_cmd, env=env, capture_output=True, text=True)
+
+    cmd = [
+        "osm2pgsql",
+        f"--host={host}",
+        f"--port={port}",
+        f"--database={dbname}",
+        f"--user={user}",
+        "--create",
+        "--slim",
+        "--hstore",
+        "--multi-geometry",
+    ]
+    if style:
+        cmd += ["--style", str(style)]
+    cmd.append(str(pbf))
+
+    result = subprocess.run(
+        cmd,
+        env=env,
+        capture_output=True,
+        text=True,
+    )
+
+    combined = result.stdout + result.stderr
+    if result.returncode == 0:
+        return {"ok": True, "rows_loaded": None, "stderr": combined}
+    return {"ok": False, "rows_loaded": None, "stderr": combined}
@@ -0,0 +1,61 @@
+---
+name: render_table_page_pdfpages
+kind: function
+lang: py
+domain: infra
+version: "1.0.0"
+purity: impure
+signature: "render_table_page_pdfpages(pdf: PdfPages, title: str, rows: list[list[str]], col_labels: list[str], max_rows: int = 28, figsize: tuple[float, float] = (11.69, 8.27), fontsize: int = 8, dpi: int = 300) -> None"
+description: "Renderiza filas como páginas de tabla paginadas en un PdfPages abierto. Usa matplotlib.pyplot.table con paginación automática por max_rows. Una página A4 landscape por chunk."
+tags: [pdf, matplotlib, table, report, infra]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: "error_go_core"
+imports: [matplotlib]
+params:
+  - name: pdf
+    desc: "Objeto PdfPages abierto de matplotlib donde se escriben las páginas."
+  - name: title
+    desc: "Título mostrado encima de la tabla en cada página."
+  - name: rows
+    desc: "Lista de filas; cada fila es una lista de strings con los valores de celda."
+  - name: col_labels
+    desc: "Etiquetas de las columnas (cabecera de tabla)."
+  - name: max_rows
+    desc: "Número máximo de filas por página antes de crear una nueva (default 28)."
+  - name: figsize
+    desc: "Tamaño de figura en pulgadas. Default A4 landscape (11.69x8.27)."
+  - name: fontsize
+    desc: "Tamaño de fuente para las celdas de la tabla."
+  - name: dpi
+    desc: "Resolución al guardar cada página (default 300)."
+output: "None. Escribe páginas directamente en el PdfPages proporcionado."
+tested: true
+tests:
+  - "50 filas con max_rows=28 genera 2 paginas en pdf no vacio"
+  - "0 filas genera 1 pagina vacia sin excepcion"
+test_file_path: "python/functions/infra/tests/test_render_table_page_pdfpages.py"
+file_path: "python/functions/infra/render_table_page_pdfpages.py"
+source_repo: "internal:footprint_aurgi"
+source_license: "internal-aurgi"
+source_file: "ponderacion_isochronas/src/recomendador_centros.py"
+---
+
+## Ejemplo
+
+```python
+import matplotlib
+matplotlib.use("Agg")
+from matplotlib.backends.backend_pdf import PdfPages
+
+rows = [[str(i), f"valor_{i}"] for i in range(50)]
+with PdfPages("tabla.pdf") as pdf:
+    render_table_page_pdfpages(pdf, "Informe de centros", rows, ["ID", "Valor"])
+```
+
+## Notas
+
+Requiere `matplotlib`. Backend Agg recomendado en entornos sin pantalla.
+Cada chunk de filas genera exactamente una página. Con rows vacío genera una página vacía.
@@ -0,0 +1,57 @@
+"""Render paginated table pages into a matplotlib PdfPages object."""
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from matplotlib.backends.backend_pdf import PdfPages
+
+
+def render_table_page_pdfpages(
+    pdf: "PdfPages",
+    title: str,
+    rows: list[list[str]],
+    col_labels: list[str],
+    max_rows: int = 28,
+    figsize: tuple[float, float] = (11.69, 8.27),
+    fontsize: int = 8,
+    dpi: int = 300,
+) -> None:
+    """Render rows as paginated table pages into an open PdfPages object.
+
+    Partitions rows into chunks of max_rows and writes one A4-landscape page
+    per chunk using matplotlib's table widget. Each page carries the given title.
+
+    Args:
+        pdf: An open matplotlib PdfPages context.
+        title: Page title shown above the table.
+        rows: List of rows, each row is a list of string cell values.
+        col_labels: Column header labels.
+        max_rows: Maximum rows per page before starting a new page.
+        figsize: Figure size in inches (default A4 landscape 11.69x8.27).
+        fontsize: Font size for table cells.
+        dpi: Resolution used when saving each page.
+    """
+    import matplotlib
+    matplotlib.use("Agg")
+    import matplotlib.pyplot as plt
+
+    # Always render at least one page; use a placeholder row when rows is empty
+    chunks: list[list[list[str]]] = []
+    if not rows:
+        chunks = [[]]
+    else:
+        for start in range(0, len(rows), max_rows):
+            chunks.append(rows[start: start + max_rows])
+
+    for chunk in chunks:
+        fig, ax = plt.subplots(figsize=figsize)
+        ax.axis("off")
+        if chunk:
+            table = ax.table(cellText=chunk, colLabels=col_labels, loc="center")
+            table.auto_set_font_size(False)
+            table.set_fontsize(fontsize)
+            table.scale(1, 1.3)
+        ax.set_title(title, fontsize=14, pad=12)
+        pdf.savefig(fig, dpi=dpi)
+        plt.close(fig)
@@ -0,0 +1,45 @@
+"""Tests para add_header_logo."""
+from __future__ import annotations
+
+from pathlib import Path
+
+import matplotlib
+matplotlib.use("Agg")
+import numpy as np
+import pytest
+
+
+def test_figura_nueva_con_imagen_zeros_no_lanza_excepcion():
+    """figura nueva con imagen zeros no lanza excepcion"""
+    import sys
+    sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
+    from infra.add_header_logo import add_header_logo
+    import matplotlib.pyplot as plt
+
+    fig, ax = plt.subplots(figsize=(11.69, 8.27))
+    image = np.zeros((50, 200, 3), dtype=np.uint8)
+
+    # Should not raise
+    add_header_logo(fig, image)
+    plt.close(fig)
+
+
+def test_axes_de_logo_tiene_axis_off():
+    """axes de logo tiene axis off"""
+    import sys
+    sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
+    from infra.add_header_logo import add_header_logo
+    import matplotlib.pyplot as plt
+
+    fig, ax = plt.subplots(figsize=(11.69, 8.27))
+    initial_axes_count = len(fig.axes)
+    image = np.zeros((10, 10, 3), dtype=np.uint8)
+
+    add_header_logo(fig, image, x=0.88, y=0.905, width=0.08, height=0.08)
+
+    # A new axes should have been added
+    assert len(fig.axes) == initial_axes_count + 1
+    logo_ax = fig.axes[-1]
+    # axis("off") disables both x and y axis visibility
+    assert not logo_ax.axison
+    plt.close(fig)
@@ -0,0 +1,62 @@
+"""Tests para compress_pdf_ghostscript."""
+from __future__ import annotations
+
+import shutil
+import tempfile
+from pathlib import Path
+
+import pytest
+
+
+def _make_simple_pdf(path: Path) -> None:
+    """Create a minimal valid PDF using fpdf2."""
+    try:
+        from fpdf import FPDF
+        pdf = FPDF()
+        pdf.add_page()
+        pdf.set_font("Helvetica", size=12)
+        pdf.cell(200, 10, text="Test PDF for ghostscript compression", ln=True)
+        pdf.output(str(path))
+    except ImportError:
+        # Fallback: write a minimal PDF manually
+        content = (
+            b"%PDF-1.4\n"
+            b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n"
+            b"2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n"
+            b"3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >>\nendobj\n"
+            b"xref\n0 4\n0000000000 65535 f \n0000000009 00000 n \n"
+            b"0000000068 00000 n \n0000000125 00000 n \n"
+            b"trailer\n<< /Size 4 /Root 1 0 R >>\nstartxref\n210\n%%EOF\n"
+        )
+        path.write_bytes(content)
+
+
+def test_crea_pdf_temporal_y_comprime_retorna_bool_sin_excepcion():
+    """crea pdf temporal y comprime - retorna bool sin excepcion"""
+    import sys
+    sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
+    from infra.compress_pdf_ghostscript import compress_pdf_ghostscript
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        pdf_path = Path(tmpdir) / "test.pdf"
+        _make_simple_pdf(pdf_path)
+        assert pdf_path.exists()
+        result = compress_pdf_ghostscript(pdf_path)
+        assert isinstance(result, bool)
+        # File must still exist regardless of whether compression happened
+        assert pdf_path.exists()
+
+
+def test_retorna_False_cuando_gs_no_esta_disponible(monkeypatch):
+    """retorna False cuando gs no esta disponible"""
+    import sys
+    sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
+    from infra.compress_pdf_ghostscript import compress_pdf_ghostscript
+
+    monkeypatch.setattr("shutil.which", lambda x: None)
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        pdf_path = Path(tmpdir) / "test.pdf"
+        _make_simple_pdf(pdf_path)
+        result = compress_pdf_ghostscript(pdf_path)
+        assert result is False
@@ -0,0 +1,38 @@
+"""Tests para osm2pgsql_ingest."""
+from __future__ import annotations
+
+import shutil
+import tempfile
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+
+def test_lanza_FileNotFoundError_con_path_inexistente():
+    """lanza FileNotFoundError con path inexistente"""
+    import sys
+    sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
+    from infra.osm2pgsql_ingest import osm2pgsql_ingest
+
+    with pytest.raises(FileNotFoundError):
+        osm2pgsql_ingest("/tmp/non_existent_file_that_does_not_exist.osm.pbf")
+
+
+def test_lanza_RuntimeError_si_osm2pgsql_no_esta_en_PATH():
+    """lanza RuntimeError si osm2pgsql no esta en PATH"""
+    import sys
+    sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
+    from infra.osm2pgsql_ingest import osm2pgsql_ingest
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        pbf_path = Path(tmpdir) / "fake.osm.pbf"
+        # Create a dummy file so FileNotFoundError is not raised first
+        pbf_path.write_bytes(b"PBF")
+
+        # Skip test if osm2pgsql is actually in PATH (CI environment may have it)
+        if shutil.which("osm2pgsql") is not None:
+            pytest.skip("osm2pgsql is available in PATH; skipping RuntimeError test")
+
+        with pytest.raises(RuntimeError, match="osm2pgsql"):
+            osm2pgsql_ingest(pbf_path)
@@ -0,0 +1,53 @@
+"""Tests para render_table_page_pdfpages."""
+from __future__ import annotations
+
+import tempfile
+from pathlib import Path
+
+import matplotlib
+matplotlib.use("Agg")
+import pytest
+
+
+def test_50_filas_con_max_rows_28_genera_2_paginas_en_pdf_no_vacio():
+    """50 filas con max_rows=28 genera 2 paginas en pdf no vacio"""
+    import sys
+    sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
+    from infra.render_table_page_pdfpages import render_table_page_pdfpages
+    from matplotlib.backends.backend_pdf import PdfPages
+
+    rows = [[str(i), f"valor_{i}", f"extra_{i}"] for i in range(50)]
+    col_labels = ["ID", "Valor", "Extra"]
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        pdf_path = Path(tmpdir) / "test_table.pdf"
+        with PdfPages(str(pdf_path)) as pdf:
+            render_table_page_pdfpages(pdf, "Test Tabla", rows, col_labels, max_rows=28)
+
+        assert pdf_path.exists()
+        assert pdf_path.stat().st_size > 0
+
+        # Verify 2 pages were generated by reading PDF metadata
+        try:
+            from pypdf import PdfReader
+            reader = PdfReader(str(pdf_path))
+            assert len(reader.pages) == 2
+        except ImportError:
+            # If pypdf not available, just check file size
+            assert pdf_path.stat().st_size > 1000
+
+
+def test_0_filas_genera_1_pagina_vacia_sin_excepcion():
+    """0 filas genera 1 pagina vacia sin excepcion"""
+    import sys
+    sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
+    from infra.render_table_page_pdfpages import render_table_page_pdfpages
+    from matplotlib.backends.backend_pdf import PdfPages
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        pdf_path = Path(tmpdir) / "empty_table.pdf"
+        with PdfPages(str(pdf_path)) as pdf:
+            render_table_page_pdfpages(pdf, "Vacío", [], ["Col1", "Col2"])
+
+        assert pdf_path.exists()
+        assert pdf_path.stat().st_size > 0