chore: auto-commit (5 archivos)
- CMakeLists.txt - app.md - appicon.ico - backend/ - main.cpp Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,201 @@
|
||||
"""FastAPI dispatcher para image-to-3D.
|
||||
|
||||
Endpoints:
|
||||
GET /health -> {"status":"ok","models":[...loaded...]}
|
||||
GET /models -> {"models":[{"id","loaded","vram_gb_est","license"}...]}
|
||||
POST /generate -> bytes GLB (Content-Type: model/gltf-binary)
|
||||
multipart/form-data:
|
||||
file=<imagen png/jpg>
|
||||
model=<id> (triposr | hunyuan3d-2 | trellis)
|
||||
seed=<int> (opcional, default 0)
|
||||
mc_resolution=<int> (opcional, default 256)
|
||||
foreground_ratio=<f> (opcional, default 0.85)
|
||||
texture=<bool> (opcional, default true)
|
||||
|
||||
Implementacion:
|
||||
- Cada backend (triposr/hunyuan3d-2/trellis) vive en backends/<id>.py
|
||||
con interfaz comun: load() -> handle, infer(handle, image_pil, cfg) -> bytes_glb.
|
||||
- Lazy load: el primer POST con un model_id carga el modelo en GPU y lo
|
||||
guarda en un dict global. Liberacion manual via DELETE /models/<id>.
|
||||
- Single-process, single-GPU: serializamos peticiones por modelo con un
|
||||
asyncio.Lock para no chocar dos infer simultaneos en la misma GPU.
|
||||
|
||||
Lanzar:
|
||||
cd projects/imagegen/apps/image_to_3d_studio/backend
|
||||
../../../analysis/spike_image_to_3d/.venv/bin/python -m uvicorn server:app \\
|
||||
--host 127.0.0.1 --port 8600 --reload
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict
|
||||
|
||||
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
|
||||
from fastapi.responses import Response
|
||||
from PIL import Image
|
||||
|
||||
|
||||
# --- Catalogo de modelos ------------------------------------------------------
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ModelSpec:
|
||||
id: str
|
||||
label: str
|
||||
license: str
|
||||
vram_gb_est: float
|
||||
backend_module: str # backends.<name>
|
||||
|
||||
|
||||
CATALOG: Dict[str, ModelSpec] = {
|
||||
"triposr": ModelSpec(
|
||||
id="triposr",
|
||||
label="TripoSR (Stability + Tripo, MIT)",
|
||||
license="MIT",
|
||||
vram_gb_est=6.0,
|
||||
backend_module="backends.triposr",
|
||||
),
|
||||
"hunyuan3d-2": ModelSpec(
|
||||
id="hunyuan3d-2",
|
||||
label="Hunyuan3D-2 (Tencent, Community License)",
|
||||
license="Tencent Community",
|
||||
vram_gb_est=12.0,
|
||||
backend_module="backends.hunyuan3d_2",
|
||||
),
|
||||
"trellis": ModelSpec(
|
||||
id="trellis",
|
||||
label="Trellis (Microsoft, MIT code / research weights)",
|
||||
license="MIT (code)",
|
||||
vram_gb_est=16.0,
|
||||
backend_module="backends.trellis",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
# --- Estado en memoria --------------------------------------------------------
|
||||
|
||||
_loaded: Dict[str, Any] = {} # model_id -> handle del backend
|
||||
_locks: Dict[str, asyncio.Lock] = {} # model_id -> lock (uno por modelo)
|
||||
|
||||
|
||||
def _lock_for(model_id: str) -> asyncio.Lock:
|
||||
lk = _locks.get(model_id)
|
||||
if lk is None:
|
||||
lk = asyncio.Lock()
|
||||
_locks[model_id] = lk
|
||||
return lk
|
||||
|
||||
|
||||
def _load_backend(spec: ModelSpec) -> Any:
|
||||
"""Importa el modulo del backend on-demand y llama load(). Cada backend
|
||||
debe exponer load() -> handle e infer(handle, image, cfg) -> bytes."""
|
||||
# Garantiza que `backends/` esta en sys.path
|
||||
here = os.path.dirname(os.path.abspath(__file__))
|
||||
if here not in sys.path:
|
||||
sys.path.insert(0, here)
|
||||
import importlib
|
||||
mod = importlib.import_module(spec.backend_module)
|
||||
return mod.load()
|
||||
|
||||
|
||||
# --- FastAPI app --------------------------------------------------------------
|
||||
|
||||
app = FastAPI(title="image_to_3d_studio backend", version="0.1.0")
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
def health() -> Dict[str, Any]:
|
||||
return {
|
||||
"status": "ok",
|
||||
"loaded": sorted(_loaded.keys()),
|
||||
"version": "0.1.0",
|
||||
}
|
||||
|
||||
|
||||
@app.get("/models")
|
||||
def list_models() -> Dict[str, Any]:
|
||||
return {
|
||||
"models": [
|
||||
{
|
||||
"id": m.id,
|
||||
"label": m.label,
|
||||
"license": m.license,
|
||||
"vram_gb_est": m.vram_gb_est,
|
||||
"loaded": m.id in _loaded,
|
||||
}
|
||||
for m in CATALOG.values()
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@app.delete("/models/{model_id}")
|
||||
def unload(model_id: str) -> Dict[str, Any]:
|
||||
handle = _loaded.pop(model_id, None)
|
||||
if handle is None:
|
||||
raise HTTPException(404, f"not loaded: {model_id}")
|
||||
# Si el backend expone close(), lo llamamos
|
||||
close = getattr(handle, "close", None)
|
||||
if callable(close):
|
||||
close()
|
||||
# Empuja VRAM libre
|
||||
try:
|
||||
import torch
|
||||
torch.cuda.empty_cache()
|
||||
except Exception:
|
||||
pass
|
||||
return {"unloaded": model_id}
|
||||
|
||||
|
||||
@app.post("/generate")
|
||||
async def generate(
|
||||
file: UploadFile = File(...),
|
||||
model: str = Form("triposr"),
|
||||
seed: int = Form(0),
|
||||
mc_resolution: int = Form(256),
|
||||
foreground_ratio: float = Form(0.85),
|
||||
texture: bool = Form(True),
|
||||
) -> Response:
|
||||
spec = CATALOG.get(model)
|
||||
if spec is None:
|
||||
raise HTTPException(400, f"unknown model: {model} (catalog: {list(CATALOG)})")
|
||||
|
||||
raw = await file.read()
|
||||
try:
|
||||
image = Image.open(io.BytesIO(raw)).convert("RGB")
|
||||
except Exception as e:
|
||||
raise HTTPException(400, f"bad image: {e}")
|
||||
|
||||
cfg = dict(
|
||||
seed=seed,
|
||||
mc_resolution=mc_resolution,
|
||||
foreground_ratio=foreground_ratio,
|
||||
texture=texture,
|
||||
)
|
||||
|
||||
lock = _lock_for(model)
|
||||
async with lock:
|
||||
if model not in _loaded:
|
||||
_loaded[model] = await asyncio.to_thread(_load_backend, spec)
|
||||
handle = _loaded[model]
|
||||
t0 = time.perf_counter()
|
||||
try:
|
||||
glb_bytes = await asyncio.to_thread(handle.infer, image, cfg)
|
||||
except NotImplementedError as e:
|
||||
raise HTTPException(501, str(e))
|
||||
except Exception as e:
|
||||
raise HTTPException(500, f"{model} infer failed: {e}")
|
||||
dt_ms = int((time.perf_counter() - t0) * 1000)
|
||||
|
||||
return Response(
|
||||
content=glb_bytes,
|
||||
media_type="model/gltf-binary",
|
||||
headers={
|
||||
"X-Model": model,
|
||||
"X-Duration-ms": str(dt_ms),
|
||||
"X-Bytes": str(len(glb_bytes)),
|
||||
},
|
||||
)
|
||||
Reference in New Issue
Block a user