chore: auto-commit (5 archivos)

- CMakeLists.txt - app.md - appicon.ico - backend/ - main.cpp Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-30 17:28:48 +02:00
commit d3c83053f2
14 changed files with 1020 additions and 0 deletions
@@ -0,0 +1,17 @@
+"""Interfaz comun para backends image-to-3D.
+
+Cada backend (triposr, hunyuan3d_2, trellis) expone:
+  load() -> Handle
+  donde Handle tiene metodo infer(image: PIL.Image, cfg: dict) -> bytes (GLB).
+
+`cfg` recibe: seed, mc_resolution, foreground_ratio, texture.
+"""
+from __future__ import annotations
+
+from typing import Protocol, Any, Dict
+from PIL import Image
+
+
+class BackendHandle(Protocol):
+    def infer(self, image: Image.Image, cfg: Dict[str, Any]) -> bytes: ...
+    def close(self) -> None: ...
@@ -0,0 +1,23 @@
+"""Backend Hunyuan3D-2 (Tencent, Community License).
+
+STUB. Para implementar: clonar github.com/Tencent/Hunyuan3D-2 a sources/,
+instalar deps, cargar pipeline shape + texture.
+"""
+from __future__ import annotations
+
+from typing import Any, Dict
+from PIL import Image
+
+
+class Handle:
+    def infer(self, image: Image.Image, cfg: Dict[str, Any]) -> bytes:
+        raise NotImplementedError(
+            "hunyuan3d-2 backend pendiente. Ver notebook 03_smoke_hunyuan3d.ipynb"
+        )
+
+    def close(self) -> None:  # pragma: no cover
+        return
+
+
+def load() -> Handle:
+    return Handle()
@@ -0,0 +1,23 @@
+"""Backend Trellis (Microsoft, MIT code).
+
+STUB. Para implementar: clonar github.com/microsoft/TRELLIS a sources/,
+instalar deps (kaolin + custom CUDA), cargar pipeline structured latents.
+"""
+from __future__ import annotations
+
+from typing import Any, Dict
+from PIL import Image
+
+
+class Handle:
+    def infer(self, image: Image.Image, cfg: Dict[str, Any]) -> bytes:
+        raise NotImplementedError(
+            "trellis backend pendiente. Ver notebook 04_smoke_trellis.ipynb"
+        )
+
+    def close(self) -> None:  # pragma: no cover
+        return
+
+
+def load() -> Handle:
+    return Handle()
@@ -0,0 +1,93 @@
+"""Backend TripoSR (Stability + Tripo, MIT).
+
+Asume que `sources/TripoSR` esta clonado en el registry. Importa `tsr.system.TSR`.
+Descarga checkpoint desde HF en la primera carga (~1.2 GB).
+"""
+from __future__ import annotations
+
+import io
+import os
+import pathlib
+import sys
+from dataclasses import dataclass
+from typing import Any, Dict
+
+import numpy as np
+import torch
+import trimesh
+from PIL import Image
+
+
+def _ensure_sources_on_path() -> pathlib.Path:
+    root = pathlib.Path(os.environ.get("FN_REGISTRY_ROOT", "/home/lucas/fn_registry"))
+    src  = root / "sources" / "TripoSR"
+    if not src.exists():
+        raise RuntimeError(
+            f"TripoSR no clonado en {src}. "
+            "git clone --depth=1 https://github.com/VAST-AI-Research/TripoSR.git "
+            f"{src}"
+        )
+    if str(src) not in sys.path:
+        sys.path.insert(0, str(src))
+    return src
+
+
+@dataclass
+class Handle:
+    model: Any
+    rembg_session: Any
+    device: str
+
+    def infer(self, image: Image.Image, cfg: Dict[str, Any]) -> bytes:
+        from tsr.utils import remove_background, resize_foreground
+
+        fg_ratio = float(cfg.get("foreground_ratio", 0.85))
+        mc_res   = int(cfg.get("mc_resolution", 256))
+
+        fg = remove_background(image, self.rembg_session)
+        fg = resize_foreground(fg, fg_ratio)
+
+        # Composite RGBA -> RGB sobre gris 0.5 (preprocesado canonico TripoSR
+        # run.py). Sin esto el tokenizer DINO recibe 4 canales y peta:
+        # "The size of tensor a (4) must match tensor b (3) at dim 2".
+        arr = np.asarray(fg).astype(np.float32) / 255.0
+        if arr.shape[-1] == 4:
+            arr = arr[:, :, :3] * arr[:, :, 3:4] + (1.0 - arr[:, :, 3:4]) * 0.5
+            fg = Image.fromarray((arr * 255.0).astype(np.uint8))
+
+        with torch.no_grad():
+            scene_codes = self.model([fg], device=self.device)
+            meshes = self.model.extract_mesh(
+                scene_codes, has_vertex_color=False, resolution=mc_res
+            )
+        m = meshes[0]
+        tm = trimesh.Trimesh(
+            vertices=np.asarray(m.vertices),
+            faces=np.asarray(m.faces),
+            process=True,
+        )
+        buf = io.BytesIO()
+        tm.export(buf, file_type="glb")
+        return buf.getvalue()
+
+    def close(self) -> None:
+        del self.model
+        del self.rembg_session
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+
+
+def load() -> Handle:
+    _ensure_sources_on_path()
+    from tsr.system import TSR
+    from rembg import new_session
+
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model = TSR.from_pretrained(
+        "stabilityai/TripoSR",
+        config_name="config.yaml",
+        weight_name="model.ckpt",
+    )
+    model.renderer.set_chunk_size(8192)
+    model.to(device)
+    return Handle(model=model, rembg_session=new_session(), device=device)
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+# Lanza el backend FastAPI reutilizando el venv del analysis (mismas deps:
+# torch + diffusers + transformers + trimesh + Pillow). No crea venv propio.
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "$0")"/../../../../.. && pwd)"
+VENV="$ROOT/projects/imagegen/analysis/spike_image_to_3d/.venv"
+HERE="$(cd "$(dirname "$0")" && pwd)"
+
+if [ ! -x "$VENV/bin/python" ]; then
+    echo "venv del analysis no existe: $VENV" >&2
+    echo "Crea el analysis primero: ./fn run init_jupyter_analysis --project imagegen spike_image_to_3d" >&2
+    exit 1
+fi
+
+export FN_REGISTRY_ROOT="$ROOT"
+exec "$VENV/bin/python" -m uvicorn server:app \
+    --host 127.0.0.1 --port "${PORT:-8600}" \
+    --app-dir "$HERE" \
+    "$@"
@@ -0,0 +1,201 @@
+"""FastAPI dispatcher para image-to-3D.
+
+Endpoints:
+  GET  /health               -> {"status":"ok","models":[...loaded...]}
+  GET  /models               -> {"models":[{"id","loaded","vram_gb_est","license"}...]}
+  POST /generate             -> bytes GLB (Content-Type: model/gltf-binary)
+    multipart/form-data:
+      file=<imagen png/jpg>
+      model=<id>             (triposr | hunyuan3d-2 | trellis)
+      seed=<int>             (opcional, default 0)
+      mc_resolution=<int>    (opcional, default 256)
+      foreground_ratio=<f>   (opcional, default 0.85)
+      texture=<bool>         (opcional, default true)
+
+Implementacion:
+  - Cada backend (triposr/hunyuan3d-2/trellis) vive en backends/<id>.py
+    con interfaz comun: load() -> handle, infer(handle, image_pil, cfg) -> bytes_glb.
+  - Lazy load: el primer POST con un model_id carga el modelo en GPU y lo
+    guarda en un dict global. Liberacion manual via DELETE /models/<id>.
+  - Single-process, single-GPU: serializamos peticiones por modelo con un
+    asyncio.Lock para no chocar dos infer simultaneos en la misma GPU.
+
+Lanzar:
+    cd projects/imagegen/apps/image_to_3d_studio/backend
+    ../../../analysis/spike_image_to_3d/.venv/bin/python -m uvicorn server:app \\
+        --host 127.0.0.1 --port 8600 --reload
+"""
+from __future__ import annotations
+
+import asyncio
+import io
+import os
+import sys
+import time
+from dataclasses import dataclass
+from typing import Any, Dict
+
+from fastapi import FastAPI, File, Form, HTTPException, UploadFile
+from fastapi.responses import Response
+from PIL import Image
+
+
+# --- Catalogo de modelos ------------------------------------------------------
+
+@dataclass(frozen=True)
+class ModelSpec:
+    id: str
+    label: str
+    license: str
+    vram_gb_est: float
+    backend_module: str  # backends.<name>
+
+
+CATALOG: Dict[str, ModelSpec] = {
+    "triposr": ModelSpec(
+        id="triposr",
+        label="TripoSR (Stability + Tripo, MIT)",
+        license="MIT",
+        vram_gb_est=6.0,
+        backend_module="backends.triposr",
+    ),
+    "hunyuan3d-2": ModelSpec(
+        id="hunyuan3d-2",
+        label="Hunyuan3D-2 (Tencent, Community License)",
+        license="Tencent Community",
+        vram_gb_est=12.0,
+        backend_module="backends.hunyuan3d_2",
+    ),
+    "trellis": ModelSpec(
+        id="trellis",
+        label="Trellis (Microsoft, MIT code / research weights)",
+        license="MIT (code)",
+        vram_gb_est=16.0,
+        backend_module="backends.trellis",
+    ),
+}
+
+
+# --- Estado en memoria --------------------------------------------------------
+
+_loaded: Dict[str, Any] = {}              # model_id -> handle del backend
+_locks: Dict[str, asyncio.Lock] = {}      # model_id -> lock (uno por modelo)
+
+
+def _lock_for(model_id: str) -> asyncio.Lock:
+    lk = _locks.get(model_id)
+    if lk is None:
+        lk = asyncio.Lock()
+        _locks[model_id] = lk
+    return lk
+
+
+def _load_backend(spec: ModelSpec) -> Any:
+    """Importa el modulo del backend on-demand y llama load(). Cada backend
+    debe exponer load() -> handle e infer(handle, image, cfg) -> bytes."""
+    # Garantiza que `backends/` esta en sys.path
+    here = os.path.dirname(os.path.abspath(__file__))
+    if here not in sys.path:
+        sys.path.insert(0, here)
+    import importlib
+    mod = importlib.import_module(spec.backend_module)
+    return mod.load()
+
+
+# --- FastAPI app --------------------------------------------------------------
+
+app = FastAPI(title="image_to_3d_studio backend", version="0.1.0")
+
+
+@app.get("/health")
+def health() -> Dict[str, Any]:
+    return {
+        "status": "ok",
+        "loaded": sorted(_loaded.keys()),
+        "version": "0.1.0",
+    }
+
+
+@app.get("/models")
+def list_models() -> Dict[str, Any]:
+    return {
+        "models": [
+            {
+                "id": m.id,
+                "label": m.label,
+                "license": m.license,
+                "vram_gb_est": m.vram_gb_est,
+                "loaded": m.id in _loaded,
+            }
+            for m in CATALOG.values()
+        ]
+    }
+
+
+@app.delete("/models/{model_id}")
+def unload(model_id: str) -> Dict[str, Any]:
+    handle = _loaded.pop(model_id, None)
+    if handle is None:
+        raise HTTPException(404, f"not loaded: {model_id}")
+    # Si el backend expone close(), lo llamamos
+    close = getattr(handle, "close", None)
+    if callable(close):
+        close()
+    # Empuja VRAM libre
+    try:
+        import torch
+        torch.cuda.empty_cache()
+    except Exception:
+        pass
+    return {"unloaded": model_id}
+
+
+@app.post("/generate")
+async def generate(
+    file: UploadFile = File(...),
+    model: str = Form("triposr"),
+    seed: int = Form(0),
+    mc_resolution: int = Form(256),
+    foreground_ratio: float = Form(0.85),
+    texture: bool = Form(True),
+) -> Response:
+    spec = CATALOG.get(model)
+    if spec is None:
+        raise HTTPException(400, f"unknown model: {model} (catalog: {list(CATALOG)})")
+
+    raw = await file.read()
+    try:
+        image = Image.open(io.BytesIO(raw)).convert("RGB")
+    except Exception as e:
+        raise HTTPException(400, f"bad image: {e}")
+
+    cfg = dict(
+        seed=seed,
+        mc_resolution=mc_resolution,
+        foreground_ratio=foreground_ratio,
+        texture=texture,
+    )
+
+    lock = _lock_for(model)
+    async with lock:
+        if model not in _loaded:
+            _loaded[model] = await asyncio.to_thread(_load_backend, spec)
+        handle = _loaded[model]
+        t0 = time.perf_counter()
+        try:
+            glb_bytes = await asyncio.to_thread(handle.infer, image, cfg)
+        except NotImplementedError as e:
+            raise HTTPException(501, str(e))
+        except Exception as e:
+            raise HTTPException(500, f"{model} infer failed: {e}")
+        dt_ms = int((time.perf_counter() - t0) * 1000)
+
+    return Response(
+        content=glb_bytes,
+        media_type="model/gltf-binary",
+        headers={
+            "X-Model": model,
+            "X-Duration-ms": str(dt_ms),
+            "X-Bytes": str(len(glb_bytes)),
+        },
+    )