chore: auto-commit (5 archivos)

- CMakeLists.txt
- app.md
- appicon.ico
- backend/
- main.cpp

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-30 17:28:48 +02:00
commit d3c83053f2
14 changed files with 1020 additions and 0 deletions
Binary file not shown.
View File
Binary file not shown.
+17
View File
@@ -0,0 +1,17 @@
"""Interfaz comun para backends image-to-3D.
Cada backend (triposr, hunyuan3d_2, trellis) expone:
load() -> Handle
donde Handle tiene metodo infer(image: PIL.Image, cfg: dict) -> bytes (GLB).
`cfg` recibe: seed, mc_resolution, foreground_ratio, texture.
"""
from __future__ import annotations
from typing import Protocol, Any, Dict
from PIL import Image
class BackendHandle(Protocol):
def infer(self, image: Image.Image, cfg: Dict[str, Any]) -> bytes: ...
def close(self) -> None: ...
+23
View File
@@ -0,0 +1,23 @@
"""Backend Hunyuan3D-2 (Tencent, Community License).
STUB. Para implementar: clonar github.com/Tencent/Hunyuan3D-2 a sources/,
instalar deps, cargar pipeline shape + texture.
"""
from __future__ import annotations
from typing import Any, Dict
from PIL import Image
class Handle:
def infer(self, image: Image.Image, cfg: Dict[str, Any]) -> bytes:
raise NotImplementedError(
"hunyuan3d-2 backend pendiente. Ver notebook 03_smoke_hunyuan3d.ipynb"
)
def close(self) -> None: # pragma: no cover
return
def load() -> Handle:
return Handle()
+23
View File
@@ -0,0 +1,23 @@
"""Backend Trellis (Microsoft, MIT code).
STUB. Para implementar: clonar github.com/microsoft/TRELLIS a sources/,
instalar deps (kaolin + custom CUDA), cargar pipeline structured latents.
"""
from __future__ import annotations
from typing import Any, Dict
from PIL import Image
class Handle:
def infer(self, image: Image.Image, cfg: Dict[str, Any]) -> bytes:
raise NotImplementedError(
"trellis backend pendiente. Ver notebook 04_smoke_trellis.ipynb"
)
def close(self) -> None: # pragma: no cover
return
def load() -> Handle:
return Handle()
+93
View File
@@ -0,0 +1,93 @@
"""Backend TripoSR (Stability + Tripo, MIT).
Asume que `sources/TripoSR` esta clonado en el registry. Importa `tsr.system.TSR`.
Descarga checkpoint desde HF en la primera carga (~1.2 GB).
"""
from __future__ import annotations
import io
import os
import pathlib
import sys
from dataclasses import dataclass
from typing import Any, Dict
import numpy as np
import torch
import trimesh
from PIL import Image
def _ensure_sources_on_path() -> pathlib.Path:
root = pathlib.Path(os.environ.get("FN_REGISTRY_ROOT", "/home/lucas/fn_registry"))
src = root / "sources" / "TripoSR"
if not src.exists():
raise RuntimeError(
f"TripoSR no clonado en {src}. "
"git clone --depth=1 https://github.com/VAST-AI-Research/TripoSR.git "
f"{src}"
)
if str(src) not in sys.path:
sys.path.insert(0, str(src))
return src
@dataclass
class Handle:
model: Any
rembg_session: Any
device: str
def infer(self, image: Image.Image, cfg: Dict[str, Any]) -> bytes:
from tsr.utils import remove_background, resize_foreground
fg_ratio = float(cfg.get("foreground_ratio", 0.85))
mc_res = int(cfg.get("mc_resolution", 256))
fg = remove_background(image, self.rembg_session)
fg = resize_foreground(fg, fg_ratio)
# Composite RGBA -> RGB sobre gris 0.5 (preprocesado canonico TripoSR
# run.py). Sin esto el tokenizer DINO recibe 4 canales y peta:
# "The size of tensor a (4) must match tensor b (3) at dim 2".
arr = np.asarray(fg).astype(np.float32) / 255.0
if arr.shape[-1] == 4:
arr = arr[:, :, :3] * arr[:, :, 3:4] + (1.0 - arr[:, :, 3:4]) * 0.5
fg = Image.fromarray((arr * 255.0).astype(np.uint8))
with torch.no_grad():
scene_codes = self.model([fg], device=self.device)
meshes = self.model.extract_mesh(
scene_codes, has_vertex_color=False, resolution=mc_res
)
m = meshes[0]
tm = trimesh.Trimesh(
vertices=np.asarray(m.vertices),
faces=np.asarray(m.faces),
process=True,
)
buf = io.BytesIO()
tm.export(buf, file_type="glb")
return buf.getvalue()
def close(self) -> None:
del self.model
del self.rembg_session
if torch.cuda.is_available():
torch.cuda.empty_cache()
def load() -> Handle:
_ensure_sources_on_path()
from tsr.system import TSR
from rembg import new_session
device = "cuda" if torch.cuda.is_available() else "cpu"
model = TSR.from_pretrained(
"stabilityai/TripoSR",
config_name="config.yaml",
weight_name="model.ckpt",
)
model.renderer.set_chunk_size(8192)
model.to(device)
return Handle(model=model, rembg_session=new_session(), device=device)
Executable
+20
View File
@@ -0,0 +1,20 @@
#!/usr/bin/env bash
# Lanza el backend FastAPI reutilizando el venv del analysis (mismas deps:
# torch + diffusers + transformers + trimesh + Pillow). No crea venv propio.
set -euo pipefail
ROOT="$(cd "$(dirname "$0")"/../../../../.. && pwd)"
VENV="$ROOT/projects/imagegen/analysis/spike_image_to_3d/.venv"
HERE="$(cd "$(dirname "$0")" && pwd)"
if [ ! -x "$VENV/bin/python" ]; then
echo "venv del analysis no existe: $VENV" >&2
echo "Crea el analysis primero: ./fn run init_jupyter_analysis --project imagegen spike_image_to_3d" >&2
exit 1
fi
export FN_REGISTRY_ROOT="$ROOT"
exec "$VENV/bin/python" -m uvicorn server:app \
--host 127.0.0.1 --port "${PORT:-8600}" \
--app-dir "$HERE" \
"$@"
+201
View File
@@ -0,0 +1,201 @@
"""FastAPI dispatcher para image-to-3D.
Endpoints:
GET /health -> {"status":"ok","models":[...loaded...]}
GET /models -> {"models":[{"id","loaded","vram_gb_est","license"}...]}
POST /generate -> bytes GLB (Content-Type: model/gltf-binary)
multipart/form-data:
file=<imagen png/jpg>
model=<id> (triposr | hunyuan3d-2 | trellis)
seed=<int> (opcional, default 0)
mc_resolution=<int> (opcional, default 256)
foreground_ratio=<f> (opcional, default 0.85)
texture=<bool> (opcional, default true)
Implementacion:
- Cada backend (triposr/hunyuan3d-2/trellis) vive en backends/<id>.py
con interfaz comun: load() -> handle, infer(handle, image_pil, cfg) -> bytes_glb.
- Lazy load: el primer POST con un model_id carga el modelo en GPU y lo
guarda en un dict global. Liberacion manual via DELETE /models/<id>.
- Single-process, single-GPU: serializamos peticiones por modelo con un
asyncio.Lock para no chocar dos infer simultaneos en la misma GPU.
Lanzar:
cd projects/imagegen/apps/image_to_3d_studio/backend
../../../analysis/spike_image_to_3d/.venv/bin/python -m uvicorn server:app \\
--host 127.0.0.1 --port 8600 --reload
"""
from __future__ import annotations
import asyncio
import io
import os
import sys
import time
from dataclasses import dataclass
from typing import Any, Dict
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
from fastapi.responses import Response
from PIL import Image
# --- Catalogo de modelos ------------------------------------------------------
@dataclass(frozen=True)
class ModelSpec:
id: str
label: str
license: str
vram_gb_est: float
backend_module: str # backends.<name>
CATALOG: Dict[str, ModelSpec] = {
"triposr": ModelSpec(
id="triposr",
label="TripoSR (Stability + Tripo, MIT)",
license="MIT",
vram_gb_est=6.0,
backend_module="backends.triposr",
),
"hunyuan3d-2": ModelSpec(
id="hunyuan3d-2",
label="Hunyuan3D-2 (Tencent, Community License)",
license="Tencent Community",
vram_gb_est=12.0,
backend_module="backends.hunyuan3d_2",
),
"trellis": ModelSpec(
id="trellis",
label="Trellis (Microsoft, MIT code / research weights)",
license="MIT (code)",
vram_gb_est=16.0,
backend_module="backends.trellis",
),
}
# --- Estado en memoria --------------------------------------------------------
_loaded: Dict[str, Any] = {} # model_id -> handle del backend
_locks: Dict[str, asyncio.Lock] = {} # model_id -> lock (uno por modelo)
def _lock_for(model_id: str) -> asyncio.Lock:
lk = _locks.get(model_id)
if lk is None:
lk = asyncio.Lock()
_locks[model_id] = lk
return lk
def _load_backend(spec: ModelSpec) -> Any:
"""Importa el modulo del backend on-demand y llama load(). Cada backend
debe exponer load() -> handle e infer(handle, image, cfg) -> bytes."""
# Garantiza que `backends/` esta en sys.path
here = os.path.dirname(os.path.abspath(__file__))
if here not in sys.path:
sys.path.insert(0, here)
import importlib
mod = importlib.import_module(spec.backend_module)
return mod.load()
# --- FastAPI app --------------------------------------------------------------
app = FastAPI(title="image_to_3d_studio backend", version="0.1.0")
@app.get("/health")
def health() -> Dict[str, Any]:
return {
"status": "ok",
"loaded": sorted(_loaded.keys()),
"version": "0.1.0",
}
@app.get("/models")
def list_models() -> Dict[str, Any]:
return {
"models": [
{
"id": m.id,
"label": m.label,
"license": m.license,
"vram_gb_est": m.vram_gb_est,
"loaded": m.id in _loaded,
}
for m in CATALOG.values()
]
}
@app.delete("/models/{model_id}")
def unload(model_id: str) -> Dict[str, Any]:
handle = _loaded.pop(model_id, None)
if handle is None:
raise HTTPException(404, f"not loaded: {model_id}")
# Si el backend expone close(), lo llamamos
close = getattr(handle, "close", None)
if callable(close):
close()
# Empuja VRAM libre
try:
import torch
torch.cuda.empty_cache()
except Exception:
pass
return {"unloaded": model_id}
@app.post("/generate")
async def generate(
file: UploadFile = File(...),
model: str = Form("triposr"),
seed: int = Form(0),
mc_resolution: int = Form(256),
foreground_ratio: float = Form(0.85),
texture: bool = Form(True),
) -> Response:
spec = CATALOG.get(model)
if spec is None:
raise HTTPException(400, f"unknown model: {model} (catalog: {list(CATALOG)})")
raw = await file.read()
try:
image = Image.open(io.BytesIO(raw)).convert("RGB")
except Exception as e:
raise HTTPException(400, f"bad image: {e}")
cfg = dict(
seed=seed,
mc_resolution=mc_resolution,
foreground_ratio=foreground_ratio,
texture=texture,
)
lock = _lock_for(model)
async with lock:
if model not in _loaded:
_loaded[model] = await asyncio.to_thread(_load_backend, spec)
handle = _loaded[model]
t0 = time.perf_counter()
try:
glb_bytes = await asyncio.to_thread(handle.infer, image, cfg)
except NotImplementedError as e:
raise HTTPException(501, str(e))
except Exception as e:
raise HTTPException(500, f"{model} infer failed: {e}")
dt_ms = int((time.perf_counter() - t0) * 1000)
return Response(
content=glb_bytes,
media_type="model/gltf-binary",
headers={
"X-Model": model,
"X-Duration-ms": str(dt_ms),
"X-Bytes": str(len(glb_bytes)),
},
)