image_to_3d_studio/backend/server.py

"""FastAPI dispatcher para image-to-3D.

Endpoints:
  GET  /health               -> {"status":"ok","models":[...loaded...]}
  GET  /models               -> {"models":[{"id","loaded","vram_gb_est","license"}...]}
  POST /generate             -> bytes GLB (Content-Type: model/gltf-binary)
    multipart/form-data:
      file=<imagen png/jpg>
      model=<id>             (triposr | hunyuan3d-2 | trellis)
      seed=<int>             (opcional, default 0)
      mc_resolution=<int>    (opcional, default 256)
      foreground_ratio=<f>   (opcional, default 0.85)
      texture=<bool>         (opcional, default true)

Implementacion:
  - Cada backend (triposr/hunyuan3d-2/trellis) vive en backends/<id>.py
    con interfaz comun: load() -> handle, infer(handle, image_pil, cfg) -> bytes_glb.
  - Lazy load: el primer POST con un model_id carga el modelo en GPU y lo
    guarda en un dict global. Liberacion manual via DELETE /models/<id>.
  - Single-process, single-GPU: serializamos peticiones por modelo con un
    asyncio.Lock para no chocar dos infer simultaneos en la misma GPU.

Lanzar:
    cd projects/imagegen/apps/image_to_3d_studio/backend
    ../../../analysis/spike_image_to_3d/.venv/bin/python -m uvicorn server:app \\
        --host 127.0.0.1 --port 8600 --reload
"""
from __future__ import annotations

import asyncio
import io
import os
import sys
import time
from dataclasses import dataclass
from typing import Any, Dict

from fastapi import FastAPI, File, Form, HTTPException, UploadFile
from fastapi.responses import Response
from PIL import Image


# --- Catalogo de modelos ------------------------------------------------------

@dataclass(frozen=True)
class ModelSpec:
    id: str
    label: str
    license: str
    vram_gb_est: float
    backend_module: str  # backends.<name>


CATALOG: Dict[str, ModelSpec] = {
    "triposr": ModelSpec(
        id="triposr",
        label="TripoSR (Stability + Tripo, MIT)",
        license="MIT",
        vram_gb_est=6.0,
        backend_module="backends.triposr",
    ),
    "hunyuan3d-2": ModelSpec(
        id="hunyuan3d-2",
        label="Hunyuan3D-2 (Tencent, Community License)",
        license="Tencent Community",
        vram_gb_est=12.0,
        backend_module="backends.hunyuan3d_2",
    ),
    "trellis": ModelSpec(
        id="trellis",
        label="Trellis (Microsoft, MIT code / research weights)",
        license="MIT (code)",
        vram_gb_est=16.0,
        backend_module="backends.trellis",
    ),
}


# --- Estado en memoria --------------------------------------------------------

_loaded: Dict[str, Any] = {}              # model_id -> handle del backend
_locks: Dict[str, asyncio.Lock] = {}      # model_id -> lock (uno por modelo)


def _lock_for(model_id: str) -> asyncio.Lock:
    lk = _locks.get(model_id)
    if lk is None:
        lk = asyncio.Lock()
        _locks[model_id] = lk
    return lk


def _load_backend(spec: ModelSpec) -> Any:
    """Importa el modulo del backend on-demand y llama load(). Cada backend
    debe exponer load() -> handle e infer(handle, image, cfg) -> bytes."""
    # Garantiza que `backends/` esta en sys.path
    here = os.path.dirname(os.path.abspath(__file__))
    if here not in sys.path:
        sys.path.insert(0, here)
    import importlib
    mod = importlib.import_module(spec.backend_module)
    return mod.load()


# --- FastAPI app --------------------------------------------------------------

app = FastAPI(title="image_to_3d_studio backend", version="0.1.0")


@app.get("/health")
def health() -> Dict[str, Any]:
    return {
        "status": "ok",
        "loaded": sorted(_loaded.keys()),
        "version": "0.1.0",
    }


@app.get("/models")
def list_models() -> Dict[str, Any]:
    return {
        "models": [
            {
                "id": m.id,
                "label": m.label,
                "license": m.license,
                "vram_gb_est": m.vram_gb_est,
                "loaded": m.id in _loaded,
            }
            for m in CATALOG.values()
        ]
    }


@app.delete("/models/{model_id}")
def unload(model_id: str) -> Dict[str, Any]:
    handle = _loaded.pop(model_id, None)
    if handle is None:
        raise HTTPException(404, f"not loaded: {model_id}")
    # Si el backend expone close(), lo llamamos
    close = getattr(handle, "close", None)
    if callable(close):
        close()
    # Empuja VRAM libre
    try:
        import torch
        torch.cuda.empty_cache()
    except Exception:
        pass
    return {"unloaded": model_id}


@app.post("/generate")
async def generate(
    file: UploadFile = File(...),
    model: str = Form("triposr"),
    seed: int = Form(0),
    mc_resolution: int = Form(256),
    foreground_ratio: float = Form(0.85),
    texture: bool = Form(True),
) -> Response:
    spec = CATALOG.get(model)
    if spec is None:
        raise HTTPException(400, f"unknown model: {model} (catalog: {list(CATALOG)})")

    raw = await file.read()
    try:
        image = Image.open(io.BytesIO(raw)).convert("RGB")
    except Exception as e:
        raise HTTPException(400, f"bad image: {e}")

    cfg = dict(
        seed=seed,
        mc_resolution=mc_resolution,
        foreground_ratio=foreground_ratio,
        texture=texture,
    )

    lock = _lock_for(model)
    async with lock:
        if model not in _loaded:
            _loaded[model] = await asyncio.to_thread(_load_backend, spec)
        handle = _loaded[model]
        t0 = time.perf_counter()
        try:
            glb_bytes = await asyncio.to_thread(handle.infer, image, cfg)
        except NotImplementedError as e:
            raise HTTPException(501, str(e))
        except Exception as e:
            raise HTTPException(500, f"{model} infer failed: {e}")
        dt_ms = int((time.perf_counter() - t0) * 1000)

    return Response(
        content=glb_bytes,
        media_type="model/gltf-binary",
        headers={
            "X-Model": model,
            "X-Duration-ms": str(dt_ms),
            "X-Bytes": str(len(glb_bytes)),
        },
    )