"""FastAPI dispatcher para image-to-3D. Endpoints: GET /health -> {"status":"ok","models":[...loaded...]} GET /models -> {"models":[{"id","loaded","vram_gb_est","license"}...]} POST /generate -> bytes GLB (Content-Type: model/gltf-binary) multipart/form-data: file= model= (triposr | hunyuan3d-2 | trellis) seed= (opcional, default 0) mc_resolution= (opcional, default 256) foreground_ratio= (opcional, default 0.85) texture= (opcional, default true) Implementacion: - Cada backend (triposr/hunyuan3d-2/trellis) vive en backends/.py con interfaz comun: load() -> handle, infer(handle, image_pil, cfg) -> bytes_glb. - Lazy load: el primer POST con un model_id carga el modelo en GPU y lo guarda en un dict global. Liberacion manual via DELETE /models/. - Single-process, single-GPU: serializamos peticiones por modelo con un asyncio.Lock para no chocar dos infer simultaneos en la misma GPU. Lanzar: cd projects/imagegen/apps/image_to_3d_studio/backend ../../../analysis/spike_image_to_3d/.venv/bin/python -m uvicorn server:app \\ --host 127.0.0.1 --port 8600 --reload """ from __future__ import annotations import asyncio import io import os import sys import time from dataclasses import dataclass from typing import Any, Dict from fastapi import FastAPI, File, Form, HTTPException, UploadFile from fastapi.responses import Response from PIL import Image # --- Catalogo de modelos ------------------------------------------------------ @dataclass(frozen=True) class ModelSpec: id: str label: str license: str vram_gb_est: float backend_module: str # backends. CATALOG: Dict[str, ModelSpec] = { "triposr": ModelSpec( id="triposr", label="TripoSR (Stability + Tripo, MIT)", license="MIT", vram_gb_est=6.0, backend_module="backends.triposr", ), "hunyuan3d-2": ModelSpec( id="hunyuan3d-2", label="Hunyuan3D-2 (Tencent, Community License)", license="Tencent Community", vram_gb_est=12.0, backend_module="backends.hunyuan3d_2", ), "trellis": ModelSpec( id="trellis", label="Trellis (Microsoft, MIT code / research weights)", license="MIT (code)", vram_gb_est=16.0, backend_module="backends.trellis", ), } # --- Estado en memoria -------------------------------------------------------- _loaded: Dict[str, Any] = {} # model_id -> handle del backend _locks: Dict[str, asyncio.Lock] = {} # model_id -> lock (uno por modelo) def _lock_for(model_id: str) -> asyncio.Lock: lk = _locks.get(model_id) if lk is None: lk = asyncio.Lock() _locks[model_id] = lk return lk def _load_backend(spec: ModelSpec) -> Any: """Importa el modulo del backend on-demand y llama load(). Cada backend debe exponer load() -> handle e infer(handle, image, cfg) -> bytes.""" # Garantiza que `backends/` esta en sys.path here = os.path.dirname(os.path.abspath(__file__)) if here not in sys.path: sys.path.insert(0, here) import importlib mod = importlib.import_module(spec.backend_module) return mod.load() # --- FastAPI app -------------------------------------------------------------- app = FastAPI(title="image_to_3d_studio backend", version="0.1.0") @app.get("/health") def health() -> Dict[str, Any]: return { "status": "ok", "loaded": sorted(_loaded.keys()), "version": "0.1.0", } @app.get("/models") def list_models() -> Dict[str, Any]: return { "models": [ { "id": m.id, "label": m.label, "license": m.license, "vram_gb_est": m.vram_gb_est, "loaded": m.id in _loaded, } for m in CATALOG.values() ] } @app.delete("/models/{model_id}") def unload(model_id: str) -> Dict[str, Any]: handle = _loaded.pop(model_id, None) if handle is None: raise HTTPException(404, f"not loaded: {model_id}") # Si el backend expone close(), lo llamamos close = getattr(handle, "close", None) if callable(close): close() # Empuja VRAM libre try: import torch torch.cuda.empty_cache() except Exception: pass return {"unloaded": model_id} @app.post("/generate") async def generate( file: UploadFile = File(...), model: str = Form("triposr"), seed: int = Form(0), mc_resolution: int = Form(256), foreground_ratio: float = Form(0.85), texture: bool = Form(True), ) -> Response: spec = CATALOG.get(model) if spec is None: raise HTTPException(400, f"unknown model: {model} (catalog: {list(CATALOG)})") raw = await file.read() try: image = Image.open(io.BytesIO(raw)).convert("RGB") except Exception as e: raise HTTPException(400, f"bad image: {e}") cfg = dict( seed=seed, mc_resolution=mc_resolution, foreground_ratio=foreground_ratio, texture=texture, ) lock = _lock_for(model) async with lock: if model not in _loaded: _loaded[model] = await asyncio.to_thread(_load_backend, spec) handle = _loaded[model] t0 = time.perf_counter() try: glb_bytes = await asyncio.to_thread(handle.infer, image, cfg) except NotImplementedError as e: raise HTTPException(501, str(e)) except Exception as e: raise HTTPException(500, f"{model} infer failed: {e}") dt_ms = int((time.perf_counter() - t0) * 1000) return Response( content=glb_bytes, media_type="model/gltf-binary", headers={ "X-Model": model, "X-Duration-ms": str(dt_ms), "X-Bytes": str(len(glb_bytes)), }, )