chore: auto-commit (5 archivos)
- CMakeLists.txt - app.md - appicon.ico - backend/ - main.cpp Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,17 @@
|
||||
"""Interfaz comun para backends image-to-3D.
|
||||
|
||||
Cada backend (triposr, hunyuan3d_2, trellis) expone:
|
||||
load() -> Handle
|
||||
donde Handle tiene metodo infer(image: PIL.Image, cfg: dict) -> bytes (GLB).
|
||||
|
||||
`cfg` recibe: seed, mc_resolution, foreground_ratio, texture.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Protocol, Any, Dict
|
||||
from PIL import Image
|
||||
|
||||
|
||||
class BackendHandle(Protocol):
|
||||
def infer(self, image: Image.Image, cfg: Dict[str, Any]) -> bytes: ...
|
||||
def close(self) -> None: ...
|
||||
@@ -0,0 +1,23 @@
|
||||
"""Backend Hunyuan3D-2 (Tencent, Community License).
|
||||
|
||||
STUB. Para implementar: clonar github.com/Tencent/Hunyuan3D-2 a sources/,
|
||||
instalar deps, cargar pipeline shape + texture.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict
|
||||
from PIL import Image
|
||||
|
||||
|
||||
class Handle:
|
||||
def infer(self, image: Image.Image, cfg: Dict[str, Any]) -> bytes:
|
||||
raise NotImplementedError(
|
||||
"hunyuan3d-2 backend pendiente. Ver notebook 03_smoke_hunyuan3d.ipynb"
|
||||
)
|
||||
|
||||
def close(self) -> None: # pragma: no cover
|
||||
return
|
||||
|
||||
|
||||
def load() -> Handle:
|
||||
return Handle()
|
||||
@@ -0,0 +1,23 @@
|
||||
"""Backend Trellis (Microsoft, MIT code).
|
||||
|
||||
STUB. Para implementar: clonar github.com/microsoft/TRELLIS a sources/,
|
||||
instalar deps (kaolin + custom CUDA), cargar pipeline structured latents.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict
|
||||
from PIL import Image
|
||||
|
||||
|
||||
class Handle:
|
||||
def infer(self, image: Image.Image, cfg: Dict[str, Any]) -> bytes:
|
||||
raise NotImplementedError(
|
||||
"trellis backend pendiente. Ver notebook 04_smoke_trellis.ipynb"
|
||||
)
|
||||
|
||||
def close(self) -> None: # pragma: no cover
|
||||
return
|
||||
|
||||
|
||||
def load() -> Handle:
|
||||
return Handle()
|
||||
@@ -0,0 +1,93 @@
|
||||
"""Backend TripoSR (Stability + Tripo, MIT).
|
||||
|
||||
Asume que `sources/TripoSR` esta clonado en el registry. Importa `tsr.system.TSR`.
|
||||
Descarga checkpoint desde HF en la primera carga (~1.2 GB).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import os
|
||||
import pathlib
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import trimesh
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def _ensure_sources_on_path() -> pathlib.Path:
|
||||
root = pathlib.Path(os.environ.get("FN_REGISTRY_ROOT", "/home/lucas/fn_registry"))
|
||||
src = root / "sources" / "TripoSR"
|
||||
if not src.exists():
|
||||
raise RuntimeError(
|
||||
f"TripoSR no clonado en {src}. "
|
||||
"git clone --depth=1 https://github.com/VAST-AI-Research/TripoSR.git "
|
||||
f"{src}"
|
||||
)
|
||||
if str(src) not in sys.path:
|
||||
sys.path.insert(0, str(src))
|
||||
return src
|
||||
|
||||
|
||||
@dataclass
|
||||
class Handle:
|
||||
model: Any
|
||||
rembg_session: Any
|
||||
device: str
|
||||
|
||||
def infer(self, image: Image.Image, cfg: Dict[str, Any]) -> bytes:
|
||||
from tsr.utils import remove_background, resize_foreground
|
||||
|
||||
fg_ratio = float(cfg.get("foreground_ratio", 0.85))
|
||||
mc_res = int(cfg.get("mc_resolution", 256))
|
||||
|
||||
fg = remove_background(image, self.rembg_session)
|
||||
fg = resize_foreground(fg, fg_ratio)
|
||||
|
||||
# Composite RGBA -> RGB sobre gris 0.5 (preprocesado canonico TripoSR
|
||||
# run.py). Sin esto el tokenizer DINO recibe 4 canales y peta:
|
||||
# "The size of tensor a (4) must match tensor b (3) at dim 2".
|
||||
arr = np.asarray(fg).astype(np.float32) / 255.0
|
||||
if arr.shape[-1] == 4:
|
||||
arr = arr[:, :, :3] * arr[:, :, 3:4] + (1.0 - arr[:, :, 3:4]) * 0.5
|
||||
fg = Image.fromarray((arr * 255.0).astype(np.uint8))
|
||||
|
||||
with torch.no_grad():
|
||||
scene_codes = self.model([fg], device=self.device)
|
||||
meshes = self.model.extract_mesh(
|
||||
scene_codes, has_vertex_color=False, resolution=mc_res
|
||||
)
|
||||
m = meshes[0]
|
||||
tm = trimesh.Trimesh(
|
||||
vertices=np.asarray(m.vertices),
|
||||
faces=np.asarray(m.faces),
|
||||
process=True,
|
||||
)
|
||||
buf = io.BytesIO()
|
||||
tm.export(buf, file_type="glb")
|
||||
return buf.getvalue()
|
||||
|
||||
def close(self) -> None:
|
||||
del self.model
|
||||
del self.rembg_session
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
|
||||
def load() -> Handle:
|
||||
_ensure_sources_on_path()
|
||||
from tsr.system import TSR
|
||||
from rembg import new_session
|
||||
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
model = TSR.from_pretrained(
|
||||
"stabilityai/TripoSR",
|
||||
config_name="config.yaml",
|
||||
weight_name="model.ckpt",
|
||||
)
|
||||
model.renderer.set_chunk_size(8192)
|
||||
model.to(device)
|
||||
return Handle(model=model, rembg_session=new_session(), device=device)
|
||||
Executable
+20
@@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env bash
|
||||
# Lanza el backend FastAPI reutilizando el venv del analysis (mismas deps:
|
||||
# torch + diffusers + transformers + trimesh + Pillow). No crea venv propio.
|
||||
set -euo pipefail
|
||||
|
||||
ROOT="$(cd "$(dirname "$0")"/../../../../.. && pwd)"
|
||||
VENV="$ROOT/projects/imagegen/analysis/spike_image_to_3d/.venv"
|
||||
HERE="$(cd "$(dirname "$0")" && pwd)"
|
||||
|
||||
if [ ! -x "$VENV/bin/python" ]; then
|
||||
echo "venv del analysis no existe: $VENV" >&2
|
||||
echo "Crea el analysis primero: ./fn run init_jupyter_analysis --project imagegen spike_image_to_3d" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
export FN_REGISTRY_ROOT="$ROOT"
|
||||
exec "$VENV/bin/python" -m uvicorn server:app \
|
||||
--host 127.0.0.1 --port "${PORT:-8600}" \
|
||||
--app-dir "$HERE" \
|
||||
"$@"
|
||||
@@ -0,0 +1,201 @@
|
||||
"""FastAPI dispatcher para image-to-3D.
|
||||
|
||||
Endpoints:
|
||||
GET /health -> {"status":"ok","models":[...loaded...]}
|
||||
GET /models -> {"models":[{"id","loaded","vram_gb_est","license"}...]}
|
||||
POST /generate -> bytes GLB (Content-Type: model/gltf-binary)
|
||||
multipart/form-data:
|
||||
file=<imagen png/jpg>
|
||||
model=<id> (triposr | hunyuan3d-2 | trellis)
|
||||
seed=<int> (opcional, default 0)
|
||||
mc_resolution=<int> (opcional, default 256)
|
||||
foreground_ratio=<f> (opcional, default 0.85)
|
||||
texture=<bool> (opcional, default true)
|
||||
|
||||
Implementacion:
|
||||
- Cada backend (triposr/hunyuan3d-2/trellis) vive en backends/<id>.py
|
||||
con interfaz comun: load() -> handle, infer(handle, image_pil, cfg) -> bytes_glb.
|
||||
- Lazy load: el primer POST con un model_id carga el modelo en GPU y lo
|
||||
guarda en un dict global. Liberacion manual via DELETE /models/<id>.
|
||||
- Single-process, single-GPU: serializamos peticiones por modelo con un
|
||||
asyncio.Lock para no chocar dos infer simultaneos en la misma GPU.
|
||||
|
||||
Lanzar:
|
||||
cd projects/imagegen/apps/image_to_3d_studio/backend
|
||||
../../../analysis/spike_image_to_3d/.venv/bin/python -m uvicorn server:app \\
|
||||
--host 127.0.0.1 --port 8600 --reload
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict
|
||||
|
||||
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
|
||||
from fastapi.responses import Response
|
||||
from PIL import Image
|
||||
|
||||
|
||||
# --- Catalogo de modelos ------------------------------------------------------
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ModelSpec:
|
||||
id: str
|
||||
label: str
|
||||
license: str
|
||||
vram_gb_est: float
|
||||
backend_module: str # backends.<name>
|
||||
|
||||
|
||||
CATALOG: Dict[str, ModelSpec] = {
|
||||
"triposr": ModelSpec(
|
||||
id="triposr",
|
||||
label="TripoSR (Stability + Tripo, MIT)",
|
||||
license="MIT",
|
||||
vram_gb_est=6.0,
|
||||
backend_module="backends.triposr",
|
||||
),
|
||||
"hunyuan3d-2": ModelSpec(
|
||||
id="hunyuan3d-2",
|
||||
label="Hunyuan3D-2 (Tencent, Community License)",
|
||||
license="Tencent Community",
|
||||
vram_gb_est=12.0,
|
||||
backend_module="backends.hunyuan3d_2",
|
||||
),
|
||||
"trellis": ModelSpec(
|
||||
id="trellis",
|
||||
label="Trellis (Microsoft, MIT code / research weights)",
|
||||
license="MIT (code)",
|
||||
vram_gb_est=16.0,
|
||||
backend_module="backends.trellis",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
# --- Estado en memoria --------------------------------------------------------
|
||||
|
||||
_loaded: Dict[str, Any] = {} # model_id -> handle del backend
|
||||
_locks: Dict[str, asyncio.Lock] = {} # model_id -> lock (uno por modelo)
|
||||
|
||||
|
||||
def _lock_for(model_id: str) -> asyncio.Lock:
|
||||
lk = _locks.get(model_id)
|
||||
if lk is None:
|
||||
lk = asyncio.Lock()
|
||||
_locks[model_id] = lk
|
||||
return lk
|
||||
|
||||
|
||||
def _load_backend(spec: ModelSpec) -> Any:
|
||||
"""Importa el modulo del backend on-demand y llama load(). Cada backend
|
||||
debe exponer load() -> handle e infer(handle, image, cfg) -> bytes."""
|
||||
# Garantiza que `backends/` esta en sys.path
|
||||
here = os.path.dirname(os.path.abspath(__file__))
|
||||
if here not in sys.path:
|
||||
sys.path.insert(0, here)
|
||||
import importlib
|
||||
mod = importlib.import_module(spec.backend_module)
|
||||
return mod.load()
|
||||
|
||||
|
||||
# --- FastAPI app --------------------------------------------------------------
|
||||
|
||||
app = FastAPI(title="image_to_3d_studio backend", version="0.1.0")
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
def health() -> Dict[str, Any]:
|
||||
return {
|
||||
"status": "ok",
|
||||
"loaded": sorted(_loaded.keys()),
|
||||
"version": "0.1.0",
|
||||
}
|
||||
|
||||
|
||||
@app.get("/models")
|
||||
def list_models() -> Dict[str, Any]:
|
||||
return {
|
||||
"models": [
|
||||
{
|
||||
"id": m.id,
|
||||
"label": m.label,
|
||||
"license": m.license,
|
||||
"vram_gb_est": m.vram_gb_est,
|
||||
"loaded": m.id in _loaded,
|
||||
}
|
||||
for m in CATALOG.values()
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@app.delete("/models/{model_id}")
|
||||
def unload(model_id: str) -> Dict[str, Any]:
|
||||
handle = _loaded.pop(model_id, None)
|
||||
if handle is None:
|
||||
raise HTTPException(404, f"not loaded: {model_id}")
|
||||
# Si el backend expone close(), lo llamamos
|
||||
close = getattr(handle, "close", None)
|
||||
if callable(close):
|
||||
close()
|
||||
# Empuja VRAM libre
|
||||
try:
|
||||
import torch
|
||||
torch.cuda.empty_cache()
|
||||
except Exception:
|
||||
pass
|
||||
return {"unloaded": model_id}
|
||||
|
||||
|
||||
@app.post("/generate")
|
||||
async def generate(
|
||||
file: UploadFile = File(...),
|
||||
model: str = Form("triposr"),
|
||||
seed: int = Form(0),
|
||||
mc_resolution: int = Form(256),
|
||||
foreground_ratio: float = Form(0.85),
|
||||
texture: bool = Form(True),
|
||||
) -> Response:
|
||||
spec = CATALOG.get(model)
|
||||
if spec is None:
|
||||
raise HTTPException(400, f"unknown model: {model} (catalog: {list(CATALOG)})")
|
||||
|
||||
raw = await file.read()
|
||||
try:
|
||||
image = Image.open(io.BytesIO(raw)).convert("RGB")
|
||||
except Exception as e:
|
||||
raise HTTPException(400, f"bad image: {e}")
|
||||
|
||||
cfg = dict(
|
||||
seed=seed,
|
||||
mc_resolution=mc_resolution,
|
||||
foreground_ratio=foreground_ratio,
|
||||
texture=texture,
|
||||
)
|
||||
|
||||
lock = _lock_for(model)
|
||||
async with lock:
|
||||
if model not in _loaded:
|
||||
_loaded[model] = await asyncio.to_thread(_load_backend, spec)
|
||||
handle = _loaded[model]
|
||||
t0 = time.perf_counter()
|
||||
try:
|
||||
glb_bytes = await asyncio.to_thread(handle.infer, image, cfg)
|
||||
except NotImplementedError as e:
|
||||
raise HTTPException(501, str(e))
|
||||
except Exception as e:
|
||||
raise HTTPException(500, f"{model} infer failed: {e}")
|
||||
dt_ms = int((time.perf_counter() - t0) * 1000)
|
||||
|
||||
return Response(
|
||||
content=glb_bytes,
|
||||
media_type="model/gltf-binary",
|
||||
headers={
|
||||
"X-Model": model,
|
||||
"X-Duration-ms": str(dt_ms),
|
||||
"X-Bytes": str(len(glb_bytes)),
|
||||
},
|
||||
)
|
||||
Reference in New Issue
Block a user