feat(infra): auto-commit con 56 cambios

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-21 14:22:55 +02:00
parent c1071a82b3
commit 32c7336bf6
56 changed files with 5307 additions and 100 deletions
@@ -0,0 +1,135 @@
+"""
+Estimación de profundidad monocular a partir de una sola imagen con Depth-Anything-V2.
+
+Función del registry (grupo de capacidad `img-to-3d`, dominio `datascience`). Promovida desde
+la app `img_to_3d_webapp` para que cualquier artefacto pueda estimar un mapa de profundidad sin
+reimplementar la carga del modelo HuggingFace ni la normalización del resultado.
+
+Impura: descarga/carga pesos de un modelo de transformers, usa GPU si está disponible y mantiene
+una caché de pipelines a nivel de proceso para no recargar en cada llamada.
+"""
+
+from __future__ import annotations
+
+import numpy as np
+from PIL import Image
+
+# El pipeline de transformers es caro de instanciar (carga de pesos). Se cachea por
+# (modelo, device) a nivel de módulo para que un servicio no recargue en cada request.
+# Es estado mutable de PROCESO: documentado como impureza (ver .md "Gotchas"). Se puede
+# desactivar por llamada con use_cache=False.
+_PIPE_CACHE: dict = {}
+
+
+def _resolve_device(device: str) -> int:
+    """Resuelve el índice de device para transformers.pipeline (0=GPU0, -1=CPU)."""
+    import torch
+
+    if device == "cpu":
+        return -1
+    if device == "auto":
+        return 0 if torch.cuda.is_available() else -1
+    # device explícito tipo "cuda:0" o un índice
+    try:
+        return int(device)
+    except ValueError:
+        return 0 if device.startswith("cuda") else -1
+
+
+def _build_pipe(model_name: str, device: str):
+    from transformers import pipeline
+
+    return pipeline("depth-estimation", model=model_name, device=_resolve_device(device))
+
+
+def _get_pipe(model_name: str, device: str, use_cache: bool):
+    if not use_cache:
+        return _build_pipe(model_name, device)
+    key = (model_name, device)
+    pipe = _PIPE_CACHE.get(key)
+    if pipe is None:
+        pipe = _build_pipe(model_name, device)
+        _PIPE_CACHE[key] = pipe
+    return pipe
+
+
+def estimate_image_depth(
+    image_path: str,
+    model_name: str = "depth-anything/Depth-Anything-V2-Small-hf",
+    device: str = "auto",
+    use_cache: bool = True,
+) -> dict:
+    """
+    Estima un mapa de profundidad monocular a partir de una única imagen.
+
+    Parámetros:
+        image_path: ruta a la imagen de entrada (cualquier formato que PIL abra).
+        model_name: id de modelo HuggingFace de estimación de profundidad.
+        device: "auto" (GPU si hay), "cpu", o índice/cadena cuda explícita ("cuda:0", "0").
+        use_cache: si True (default) reutiliza el pipeline cacheado por (modelo, device) a
+            nivel de proceso; si False construye uno nuevo y no toca la caché.
+
+    Devuelve (dict, nunca lanza):
+        Éxito: {"status": "ok", "depth": ndarray HxW float32 normalizado a [0,1]
+                (1 = más cerca de la cámara), "image": PIL.Image RGB original,
+                "height": H, "width": W, "model": model_name, "device": device}.
+        Error: {"status": "error", "error": str} (ruta inválida, modelo no disponible,
+                device inválido, fallo de inferencia).
+    """
+    try:
+        image = Image.open(image_path).convert("RGB")
+        pipe = _get_pipe(model_name, device, use_cache)
+        result = pipe(image)
+        depth = np.asarray(result["depth"], dtype=np.float32)
+
+        # Normalizar a [0,1]. Depth-Anything devuelve disparidad relativa (mayor = más cerca).
+        d = depth - depth.min()
+        peak = d.max()
+        if peak > 0:
+            d = d / peak
+
+        H, W = d.shape
+        return {
+            "status": "ok",
+            "depth": d,
+            "image": image,
+            "height": int(H),
+            "width": int(W),
+            "model": model_name,
+            "device": device,
+        }
+    except Exception as e:  # noqa: BLE001
+        return {"status": "error", "error": str(e)}
+
+
+if __name__ == "__main__":
+    # Demo runner para `fn run estimate_image_depth_py_datascience <image_path> [model] [device]`.
+    # Imprime un resumen JSON-serializable (el ndarray y la PIL.Image no se serializan).
+    import json
+    import sys
+
+    if len(sys.argv) < 2:
+        print(json.dumps({"status": "error", "error": "uso: <image_path> [model_name] [device]"}))
+        sys.exit(1)
+
+    path = sys.argv[1]
+    model = sys.argv[2] if len(sys.argv) > 2 else "depth-anything/Depth-Anything-V2-Small-hf"
+    dev = sys.argv[3] if len(sys.argv) > 3 else "auto"
+
+    res = estimate_image_depth(path, model_name=model, device=dev)
+    if res["status"] == "ok":
+        depth = res["depth"]
+        summary = {
+            "status": "ok",
+            "height": res["height"],
+            "width": res["width"],
+            "depth_min": float(depth.min()),
+            "depth_max": float(depth.max()),
+            "depth_mean": round(float(depth.mean()), 4),
+            "model": res["model"],
+            "device": res["device"],
+        }
+        print(json.dumps(summary))
+    else:
+        print(json.dumps(res))
+        sys.exit(1)