chore: auto-commit (5 archivos)

- CMakeLists.txt
- app.md
- appicon.ico
- backend/
- main.cpp

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-30 17:28:48 +02:00
commit d3c83053f2
14 changed files with 1020 additions and 0 deletions
+23
View File
@@ -0,0 +1,23 @@
add_imgui_app(image_to_3d_studio
main.cpp
# Funciones del registry usadas (issue 0085 — declaracion explicita en CMake):
${CMAKE_SOURCE_DIR}/functions/core/http_request.cpp
${CMAKE_SOURCE_DIR}/functions/gfx/gl_texture_load.cpp
# Implementacion stb_image (gl_texture_load la usa):
${CMAKE_SOURCE_DIR}/vendor/stb/stb_image_impl.cpp
# Visor 3D: GLB loader + mesh GPU + orbit camera + FBO + mesh_viewer.
# (gl_loader viene bundled en fn_framework; el resto se enlaza aqui.)
${CMAKE_SOURCE_DIR}/functions/gfx/gltf_load_mesh.cpp
${CMAKE_SOURCE_DIR}/functions/gfx/mesh_gpu.cpp
${CMAKE_SOURCE_DIR}/functions/gfx/gl_framebuffer.cpp
${CMAKE_SOURCE_DIR}/functions/core/orbit_camera.cpp
${CMAKE_SOURCE_DIR}/functions/viz/mesh_viewer.cpp
)
target_include_directories(image_to_3d_studio PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_SOURCE_DIR}/vendor # nlohmann/json.hpp para gltf_load_mesh
)
if(WIN32)
set_target_properties(image_to_3d_studio PROPERTIES WIN32_EXECUTABLE TRUE)
endif()
+71
View File
@@ -0,0 +1,71 @@
---
name: image_to_3d_studio
lang: cpp
domain: gfx
description: "Image to 3D studio: imagen origen + POST a backend Python (TripoSR/Hunyuan3D/Trellis) y guarda mesh GLB."
tags: [imagegen, 3d, mesh, viewer, imgui]
icon:
phosphor: "cube-transparent"
accent: "#0ea5e9"
uses_functions:
- http_request_cpp_core # POST /generate al backend Python
- gl_texture_load_cpp_gfx # preview de la imagen origen en panel Input
- gltf_load_mesh_cpp_gfx # parse GLB -> Mesh CPU para el viewer 3D
- mesh_gpu_cpp_gfx # sube Mesh a GPU (VAO/VBO/EBO)
- mesh_viewer_cpp_viz # render orbit + FBO depth del mesh
- orbit_camera_cpp_core # camara orbital (drag rotar / wheel zoom)
- gl_framebuffer_cpp_gfx # FBO con depth para el render offscreen
uses_types: []
framework: "imgui"
entry_point: "main.cpp"
dir_path: "projects/imagegen/apps/image_to_3d_studio"
repo_url: "https://gitea.organic-machine.com/dataforge/image_to_3d_studio"
---
# image_to_3d_studio
UI ImGui que envia una imagen al backend Python (FastAPI) y guarda el
mesh GLB resultante en `local_files/cache/`. Despachador soporta varios
modelos image-to-3D detras del mismo endpoint:
- **TripoSR** (MIT, ~0.5 s, ~6 GB VRAM) — backend listo.
- **Hunyuan3D-2** (Tencent Community, mejor textura) — stub.
- **Trellis** (Microsoft MIT, mesh/3DGS/NeRF) — stub.
Viewer GLB integrado: panel **Viewer 3D** carga el mesh con `gltf_load_mesh`,
lo sube a GPU y lo renderiza con `mesh_viewer` (orbit camera + FBO con depth).
Drag = rotar, rueda = zoom, "Reset cam" recentra, checkbox wireframe. Auto-carga
el mesh al terminar Generate; tambien boton "View 3D" en el panel Output.
## Arquitectura
```
image_to_3d_studio (C++ ImGui)
── multipart POST /generate ──▶ backend FastAPI 127.0.0.1:8600
dispatcher → backends/<id>.py
◀── bytes GLB ──
guarda en <exe>/local_files/cache/<model>_<hash>.glb
```
## Build
```bash
cd cpp && cmake --build build --target image_to_3d_studio -j
```
## Run
```bash
# 1) backend Python (otra terminal)
projects/imagegen/apps/image_to_3d_studio/backend/run.sh
# 2) app C++
./cpp/build/image_to_3d_studio
```
## Estado
- Notebook `projects/imagegen/analysis/spike_image_to_3d/notebooks/01_panorama_image_to_3d.ipynb` — panorama + tabla comparativa.
- Notebook `02_smoke_triposr.ipynb` — smoke end-to-end.
- Backend `backend/server.py` — dispatcher, TripoSR funcional, Hunyuan3D-2 + Trellis = stubs `501 Not Implemented`.
- App C++ — Input/Models/Output paneles + POST + cache. Sin viewer GLB todavia.
BIN
View File
Binary file not shown.

After

Width:  |  Height:  |  Size: 7.6 KiB

Binary file not shown.
View File
Binary file not shown.
+17
View File
@@ -0,0 +1,17 @@
"""Interfaz comun para backends image-to-3D.
Cada backend (triposr, hunyuan3d_2, trellis) expone:
load() -> Handle
donde Handle tiene metodo infer(image: PIL.Image, cfg: dict) -> bytes (GLB).
`cfg` recibe: seed, mc_resolution, foreground_ratio, texture.
"""
from __future__ import annotations
from typing import Protocol, Any, Dict
from PIL import Image
class BackendHandle(Protocol):
def infer(self, image: Image.Image, cfg: Dict[str, Any]) -> bytes: ...
def close(self) -> None: ...
+23
View File
@@ -0,0 +1,23 @@
"""Backend Hunyuan3D-2 (Tencent, Community License).
STUB. Para implementar: clonar github.com/Tencent/Hunyuan3D-2 a sources/,
instalar deps, cargar pipeline shape + texture.
"""
from __future__ import annotations
from typing import Any, Dict
from PIL import Image
class Handle:
def infer(self, image: Image.Image, cfg: Dict[str, Any]) -> bytes:
raise NotImplementedError(
"hunyuan3d-2 backend pendiente. Ver notebook 03_smoke_hunyuan3d.ipynb"
)
def close(self) -> None: # pragma: no cover
return
def load() -> Handle:
return Handle()
+23
View File
@@ -0,0 +1,23 @@
"""Backend Trellis (Microsoft, MIT code).
STUB. Para implementar: clonar github.com/microsoft/TRELLIS a sources/,
instalar deps (kaolin + custom CUDA), cargar pipeline structured latents.
"""
from __future__ import annotations
from typing import Any, Dict
from PIL import Image
class Handle:
def infer(self, image: Image.Image, cfg: Dict[str, Any]) -> bytes:
raise NotImplementedError(
"trellis backend pendiente. Ver notebook 04_smoke_trellis.ipynb"
)
def close(self) -> None: # pragma: no cover
return
def load() -> Handle:
return Handle()
+93
View File
@@ -0,0 +1,93 @@
"""Backend TripoSR (Stability + Tripo, MIT).
Asume que `sources/TripoSR` esta clonado en el registry. Importa `tsr.system.TSR`.
Descarga checkpoint desde HF en la primera carga (~1.2 GB).
"""
from __future__ import annotations
import io
import os
import pathlib
import sys
from dataclasses import dataclass
from typing import Any, Dict
import numpy as np
import torch
import trimesh
from PIL import Image
def _ensure_sources_on_path() -> pathlib.Path:
root = pathlib.Path(os.environ.get("FN_REGISTRY_ROOT", "/home/lucas/fn_registry"))
src = root / "sources" / "TripoSR"
if not src.exists():
raise RuntimeError(
f"TripoSR no clonado en {src}. "
"git clone --depth=1 https://github.com/VAST-AI-Research/TripoSR.git "
f"{src}"
)
if str(src) not in sys.path:
sys.path.insert(0, str(src))
return src
@dataclass
class Handle:
model: Any
rembg_session: Any
device: str
def infer(self, image: Image.Image, cfg: Dict[str, Any]) -> bytes:
from tsr.utils import remove_background, resize_foreground
fg_ratio = float(cfg.get("foreground_ratio", 0.85))
mc_res = int(cfg.get("mc_resolution", 256))
fg = remove_background(image, self.rembg_session)
fg = resize_foreground(fg, fg_ratio)
# Composite RGBA -> RGB sobre gris 0.5 (preprocesado canonico TripoSR
# run.py). Sin esto el tokenizer DINO recibe 4 canales y peta:
# "The size of tensor a (4) must match tensor b (3) at dim 2".
arr = np.asarray(fg).astype(np.float32) / 255.0
if arr.shape[-1] == 4:
arr = arr[:, :, :3] * arr[:, :, 3:4] + (1.0 - arr[:, :, 3:4]) * 0.5
fg = Image.fromarray((arr * 255.0).astype(np.uint8))
with torch.no_grad():
scene_codes = self.model([fg], device=self.device)
meshes = self.model.extract_mesh(
scene_codes, has_vertex_color=False, resolution=mc_res
)
m = meshes[0]
tm = trimesh.Trimesh(
vertices=np.asarray(m.vertices),
faces=np.asarray(m.faces),
process=True,
)
buf = io.BytesIO()
tm.export(buf, file_type="glb")
return buf.getvalue()
def close(self) -> None:
del self.model
del self.rembg_session
if torch.cuda.is_available():
torch.cuda.empty_cache()
def load() -> Handle:
_ensure_sources_on_path()
from tsr.system import TSR
from rembg import new_session
device = "cuda" if torch.cuda.is_available() else "cpu"
model = TSR.from_pretrained(
"stabilityai/TripoSR",
config_name="config.yaml",
weight_name="model.ckpt",
)
model.renderer.set_chunk_size(8192)
model.to(device)
return Handle(model=model, rembg_session=new_session(), device=device)
Executable
+20
View File
@@ -0,0 +1,20 @@
#!/usr/bin/env bash
# Lanza el backend FastAPI reutilizando el venv del analysis (mismas deps:
# torch + diffusers + transformers + trimesh + Pillow). No crea venv propio.
set -euo pipefail
ROOT="$(cd "$(dirname "$0")"/../../../../.. && pwd)"
VENV="$ROOT/projects/imagegen/analysis/spike_image_to_3d/.venv"
HERE="$(cd "$(dirname "$0")" && pwd)"
if [ ! -x "$VENV/bin/python" ]; then
echo "venv del analysis no existe: $VENV" >&2
echo "Crea el analysis primero: ./fn run init_jupyter_analysis --project imagegen spike_image_to_3d" >&2
exit 1
fi
export FN_REGISTRY_ROOT="$ROOT"
exec "$VENV/bin/python" -m uvicorn server:app \
--host 127.0.0.1 --port "${PORT:-8600}" \
--app-dir "$HERE" \
"$@"
+201
View File
@@ -0,0 +1,201 @@
"""FastAPI dispatcher para image-to-3D.
Endpoints:
GET /health -> {"status":"ok","models":[...loaded...]}
GET /models -> {"models":[{"id","loaded","vram_gb_est","license"}...]}
POST /generate -> bytes GLB (Content-Type: model/gltf-binary)
multipart/form-data:
file=<imagen png/jpg>
model=<id> (triposr | hunyuan3d-2 | trellis)
seed=<int> (opcional, default 0)
mc_resolution=<int> (opcional, default 256)
foreground_ratio=<f> (opcional, default 0.85)
texture=<bool> (opcional, default true)
Implementacion:
- Cada backend (triposr/hunyuan3d-2/trellis) vive en backends/<id>.py
con interfaz comun: load() -> handle, infer(handle, image_pil, cfg) -> bytes_glb.
- Lazy load: el primer POST con un model_id carga el modelo en GPU y lo
guarda en un dict global. Liberacion manual via DELETE /models/<id>.
- Single-process, single-GPU: serializamos peticiones por modelo con un
asyncio.Lock para no chocar dos infer simultaneos en la misma GPU.
Lanzar:
cd projects/imagegen/apps/image_to_3d_studio/backend
../../../analysis/spike_image_to_3d/.venv/bin/python -m uvicorn server:app \\
--host 127.0.0.1 --port 8600 --reload
"""
from __future__ import annotations
import asyncio
import io
import os
import sys
import time
from dataclasses import dataclass
from typing import Any, Dict
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
from fastapi.responses import Response
from PIL import Image
# --- Catalogo de modelos ------------------------------------------------------
@dataclass(frozen=True)
class ModelSpec:
id: str
label: str
license: str
vram_gb_est: float
backend_module: str # backends.<name>
CATALOG: Dict[str, ModelSpec] = {
"triposr": ModelSpec(
id="triposr",
label="TripoSR (Stability + Tripo, MIT)",
license="MIT",
vram_gb_est=6.0,
backend_module="backends.triposr",
),
"hunyuan3d-2": ModelSpec(
id="hunyuan3d-2",
label="Hunyuan3D-2 (Tencent, Community License)",
license="Tencent Community",
vram_gb_est=12.0,
backend_module="backends.hunyuan3d_2",
),
"trellis": ModelSpec(
id="trellis",
label="Trellis (Microsoft, MIT code / research weights)",
license="MIT (code)",
vram_gb_est=16.0,
backend_module="backends.trellis",
),
}
# --- Estado en memoria --------------------------------------------------------
_loaded: Dict[str, Any] = {} # model_id -> handle del backend
_locks: Dict[str, asyncio.Lock] = {} # model_id -> lock (uno por modelo)
def _lock_for(model_id: str) -> asyncio.Lock:
lk = _locks.get(model_id)
if lk is None:
lk = asyncio.Lock()
_locks[model_id] = lk
return lk
def _load_backend(spec: ModelSpec) -> Any:
"""Importa el modulo del backend on-demand y llama load(). Cada backend
debe exponer load() -> handle e infer(handle, image, cfg) -> bytes."""
# Garantiza que `backends/` esta en sys.path
here = os.path.dirname(os.path.abspath(__file__))
if here not in sys.path:
sys.path.insert(0, here)
import importlib
mod = importlib.import_module(spec.backend_module)
return mod.load()
# --- FastAPI app --------------------------------------------------------------
app = FastAPI(title="image_to_3d_studio backend", version="0.1.0")
@app.get("/health")
def health() -> Dict[str, Any]:
return {
"status": "ok",
"loaded": sorted(_loaded.keys()),
"version": "0.1.0",
}
@app.get("/models")
def list_models() -> Dict[str, Any]:
return {
"models": [
{
"id": m.id,
"label": m.label,
"license": m.license,
"vram_gb_est": m.vram_gb_est,
"loaded": m.id in _loaded,
}
for m in CATALOG.values()
]
}
@app.delete("/models/{model_id}")
def unload(model_id: str) -> Dict[str, Any]:
handle = _loaded.pop(model_id, None)
if handle is None:
raise HTTPException(404, f"not loaded: {model_id}")
# Si el backend expone close(), lo llamamos
close = getattr(handle, "close", None)
if callable(close):
close()
# Empuja VRAM libre
try:
import torch
torch.cuda.empty_cache()
except Exception:
pass
return {"unloaded": model_id}
@app.post("/generate")
async def generate(
file: UploadFile = File(...),
model: str = Form("triposr"),
seed: int = Form(0),
mc_resolution: int = Form(256),
foreground_ratio: float = Form(0.85),
texture: bool = Form(True),
) -> Response:
spec = CATALOG.get(model)
if spec is None:
raise HTTPException(400, f"unknown model: {model} (catalog: {list(CATALOG)})")
raw = await file.read()
try:
image = Image.open(io.BytesIO(raw)).convert("RGB")
except Exception as e:
raise HTTPException(400, f"bad image: {e}")
cfg = dict(
seed=seed,
mc_resolution=mc_resolution,
foreground_ratio=foreground_ratio,
texture=texture,
)
lock = _lock_for(model)
async with lock:
if model not in _loaded:
_loaded[model] = await asyncio.to_thread(_load_backend, spec)
handle = _loaded[model]
t0 = time.perf_counter()
try:
glb_bytes = await asyncio.to_thread(handle.infer, image, cfg)
except NotImplementedError as e:
raise HTTPException(501, str(e))
except Exception as e:
raise HTTPException(500, f"{model} infer failed: {e}")
dt_ms = int((time.perf_counter() - t0) * 1000)
return Response(
content=glb_bytes,
media_type="model/gltf-binary",
headers={
"X-Model": model,
"X-Duration-ms": str(dt_ms),
"X-Bytes": str(len(glb_bytes)),
},
)
+549
View File
@@ -0,0 +1,549 @@
// image_to_3d_studio — UI ImGui para single-image-to-3D.
//
// Carga una imagen (path en text field), envia POST multipart/form-data al
// backend Python (FastAPI en 127.0.0.1:8600), recibe bytes GLB, los guarda
// en local_files/cache/<hash>.glb y reporta path al usuario.
//
// Viewer GLB integrado: panel "Viewer 3D" carga el mesh GLB con
// gltf_load_mesh, lo sube a GPU (mesh_gpu) y lo renderiza con mesh_viewer
// (orbit camera + FBO + depth). Drag = rotar, rueda = zoom.
//
// Backend levantarlo aparte:
// cd projects/imagegen/apps/image_to_3d_studio/backend && ./run.sh
#include <imgui.h>
#include <atomic>
#include <chrono>
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <filesystem>
#include <fstream>
#include <mutex>
#include <random>
#include <sstream>
#include <string>
#include <thread>
#include <vector>
#include "app_base.h"
#include "core/http_request.h"
#include "core/icons_tabler.h"
#include "core/logger.h"
#include "core/orbit_camera.h"
#include "core/panel_menu.h"
#include "gfx/gl_texture_load.h"
#include "gfx/gltf_load_mesh.h"
#include "gfx/mesh_gpu.h"
#include "viz/mesh_viewer.h"
namespace {
// ── Estado UI ──────────────────────────────────────────────────────────────
bool g_show_input = true;
bool g_show_models = true;
bool g_show_output = true;
bool g_show_viewer = true;
// Viewer 3D
fn::gfx::MeshGpu g_mesh_gpu{};
fn::core::OrbitCamera g_cam{};
std::string g_viewer_path; // GLB cargado actualmente en el viewer
std::string g_viewer_err; // error de carga
int g_viewer_tris = 0;
bool g_viewer_wireframe = false;
// Path pendiente de cargar en el viewer (lo setea el worker/boton; lo consume
// render() en el main thread porque las llamadas GL necesitan contexto activo).
std::mutex g_viewer_load_mu;
std::string g_viewer_pending_path;
// Input
char g_image_path[1024] = "";
fn::GlTexture g_preview_tex{};
int g_preview_w = 0, g_preview_h = 0;
std::string g_preview_err;
// Backend
char g_backend_url[256] = "http://127.0.0.1:8600";
// Modelo + params
int g_model_idx = 0;
int g_seed = 0;
int g_mc_resolution = 256;
float g_foreground_ratio = 0.85f;
bool g_texture = true;
const char* const MODEL_IDS[] = {
"triposr",
"hunyuan3d-2",
"trellis",
};
const char* const MODEL_LABELS[] = {
"TripoSR (MIT, ~6 GB)",
"Hunyuan3D-2 (Tencent, ~12 GB)",
"Trellis (MIT code, ~16 GB)",
};
constexpr int MODEL_COUNT = (int)(sizeof(MODEL_IDS) / sizeof(MODEL_IDS[0]));
// Output / job state
enum class JobState { Idle, Running, Done, Failed };
std::atomic<JobState> g_state{JobState::Idle};
std::mutex g_result_mu;
std::string g_result_path; // GLB en disco (en Done)
std::string g_result_err; // mensaje (en Failed)
int g_result_status_http = 0;
int64_t g_result_duration_ms = 0;
size_t g_result_bytes = 0;
// Backend health
std::atomic<bool> g_health_pinged{false};
std::string g_health_text;
std::mutex g_health_mu;
// ── Helpers ────────────────────────────────────────────────────────────────
std::vector<uint8_t> read_file_bytes(const std::string& path) {
std::ifstream f(path, std::ios::binary | std::ios::ate);
if (!f) return {};
auto sz = f.tellg();
f.seekg(0);
std::vector<uint8_t> out((size_t)sz);
f.read((char*)out.data(), sz);
return out;
}
std::string basename_of(const std::string& path) {
auto p = path.find_last_of("/\\");
return p == std::string::npos ? path : path.substr(p + 1);
}
// Heuristica simple por extension.
const char* mime_for(const std::string& path) {
auto dot = path.find_last_of('.');
if (dot == std::string::npos) return "application/octet-stream";
std::string ext = path.substr(dot + 1);
for (auto& c : ext) c = (char)tolower((unsigned char)c);
if (ext == "png") return "image/png";
if (ext == "jpg" || ext == "jpeg") return "image/jpeg";
if (ext == "webp") return "image/webp";
if (ext == "bmp") return "image/bmp";
return "application/octet-stream";
}
std::string random_boundary() {
static std::mt19937_64 rng{std::random_device{}()};
std::ostringstream o;
o << "----fn_boundary_" << std::hex << rng() << rng();
return o.str();
}
// Construye body multipart/form-data a mano. fn_http::Request acepta string,
// no vector — pero string puede contener bytes binarios (data()+size()).
std::string build_multipart(
const std::string& boundary,
const std::vector<std::pair<std::string, std::string>>& fields, // name -> value (texto)
const std::string& file_field,
const std::string& file_name,
const std::string& file_mime,
const std::vector<uint8_t>& file_bytes)
{
std::string body;
body.reserve(file_bytes.size() + 1024);
auto add = [&](const std::string& s) { body.append(s); };
for (const auto& [name, value] : fields) {
add("--"); add(boundary); add("\r\n");
add("Content-Disposition: form-data; name=\""); add(name); add("\"\r\n\r\n");
add(value); add("\r\n");
}
add("--"); add(boundary); add("\r\n");
add("Content-Disposition: form-data; name=\""); add(file_field);
add("\"; filename=\""); add(file_name); add("\"\r\n");
add("Content-Type: "); add(file_mime); add("\r\n\r\n");
body.append((const char*)file_bytes.data(), file_bytes.size());
add("\r\n");
add("--"); add(boundary); add("--\r\n");
return body;
}
// Hash rapido FNV-1a de bytes — para nombrar el GLB en cache sin colision practica.
std::string fnv1a_hex(const uint8_t* data, size_t n) {
uint64_t h = 1469598103934665603ull;
for (size_t i = 0; i < n; ++i) {
h ^= data[i];
h *= 1099511628211ull;
}
char buf[17];
std::snprintf(buf, sizeof(buf), "%016llx", (unsigned long long)h);
return buf;
}
// Normaliza un Mesh in-place: recentra el centroide del bounding box al origen
// y escala para encajar en una esfera de radio ~1. Asi la orbit camera (target
// fijo en 0,0,0, distance ~3) enmarca cualquier mesh sin importar su escala.
void normalize_mesh(fn::gfx::Mesh& m) {
if (m.positions.size() < 3) return;
float mn[3] = { m.positions[0], m.positions[1], m.positions[2] };
float mx[3] = { m.positions[0], m.positions[1], m.positions[2] };
for (size_t i = 0; i < m.positions.size(); i += 3) {
for (int k = 0; k < 3; ++k) {
float v = m.positions[i + k];
if (v < mn[k]) mn[k] = v;
if (v > mx[k]) mx[k] = v;
}
}
float cx = 0.5f * (mn[0] + mx[0]);
float cy = 0.5f * (mn[1] + mx[1]);
float cz = 0.5f * (mn[2] + mx[2]);
float ext = 0.0f;
for (int k = 0; k < 3; ++k) ext = std::max(ext, mx[k] - mn[k]);
float scale = (ext > 1e-6f) ? (2.0f / ext) : 1.0f; // diametro -> ~2
for (size_t i = 0; i < m.positions.size(); i += 3) {
m.positions[i + 0] = (m.positions[i + 0] - cx) * scale;
m.positions[i + 1] = (m.positions[i + 1] - cy) * scale;
m.positions[i + 2] = (m.positions[i + 2] - cz) * scale;
}
}
// Carga un GLB del disco en el viewer 3D. DEBE llamarse desde el main thread
// (las llamadas GL de mesh_gpu_upload requieren contexto activo).
void load_mesh_into_viewer(const std::string& path) {
g_viewer_err.clear();
fn::gfx::Mesh m = fn::gfx::gltf_load_mesh_from_file(path.c_str());
if (m.positions.empty()) {
g_viewer_err = std::string("GLB load failed (") +
fn::gfx::gltf_load_last_error() + "): " + path;
fn_log::log_error(g_viewer_err.c_str());
return;
}
normalize_mesh(m);
if (g_mesh_gpu.ok()) fn::gfx::mesh_gpu_destroy(g_mesh_gpu);
g_mesh_gpu = fn::gfx::mesh_gpu_upload(m);
if (!g_mesh_gpu.ok()) {
g_viewer_err = "mesh_gpu_upload failed (contexto GL?): " + path;
fn_log::log_error(g_viewer_err.c_str());
return;
}
g_viewer_tris = g_mesh_gpu.index_count / 3;
g_viewer_path = path;
g_cam = fn::core::OrbitCamera{}; // reset camara al cargar mesh nuevo
g_show_viewer = true;
fn_log::log_info(("viewer loaded " + path + " tris=" +
std::to_string(g_viewer_tris)).c_str());
}
// Encola un path para cargar en el viewer en el proximo frame del main thread.
void request_view(const std::string& path) {
std::lock_guard<std::mutex> lk(g_viewer_load_mu);
g_viewer_pending_path = path;
}
// ── Acciones ───────────────────────────────────────────────────────────────
void reload_preview() {
g_preview_err.clear();
if (g_preview_tex.id) { fn::gl_texture_destroy(g_preview_tex); g_preview_tex = {}; }
g_preview_w = g_preview_h = 0;
std::string path = g_image_path;
if (path.empty()) return;
fn::GlTexture t = fn::gl_texture_load(path.c_str(), /*flip_y=*/false, /*srgb=*/true);
if (!t.id) {
g_preview_err = std::string("no se pudo cargar imagen (") +
fn::gl_texture_last_error() + "): " + path;
fn_log::log_error(g_preview_err.c_str());
return;
}
g_preview_tex = t;
g_preview_w = t.w;
g_preview_h = t.h;
fn_log::log_info(("preview ok " + path + " " +
std::to_string(t.w) + "x" + std::to_string(t.h)).c_str());
}
void ping_backend() {
g_health_pinged = false;
std::thread([url = std::string(g_backend_url)]() {
fn_http::Request req;
req.method = "GET";
req.url = url + "/health";
req.timeout_ms = 2000;
auto res = fn_http::request(req);
std::lock_guard<std::mutex> lk(g_health_mu);
if (!res.error.empty()) {
g_health_text = "ERR: " + res.error;
} else if (res.status / 100 != 2) {
g_health_text = "HTTP " + std::to_string(res.status) + ": " + res.body;
} else {
g_health_text = std::to_string((long long)res.duration_ms) +
" ms — " + res.body;
}
g_health_pinged = true;
}).detach();
}
void start_generate() {
if (g_state.load() == JobState::Running) return;
std::string path = g_image_path;
if (path.empty()) {
std::lock_guard<std::mutex> lk(g_result_mu);
g_result_err = "image_path vacio";
g_state = JobState::Failed;
return;
}
g_state = JobState::Running;
std::thread([
path, url = std::string(g_backend_url),
model = std::string(MODEL_IDS[g_model_idx]),
seed = g_seed, mc = g_mc_resolution,
fg = g_foreground_ratio, tex = g_texture
]() {
auto image_bytes = read_file_bytes(path);
if (image_bytes.empty()) {
std::lock_guard<std::mutex> lk(g_result_mu);
g_result_err = "no se pudo leer imagen: " + path;
g_state = JobState::Failed;
return;
}
std::string boundary = random_boundary();
std::string body = build_multipart(
boundary,
{
{"model", model},
{"seed", std::to_string(seed)},
{"mc_resolution", std::to_string(mc)},
{"foreground_ratio", std::to_string(fg)},
{"texture", tex ? "true" : "false"},
},
"file", basename_of(path), mime_for(path), image_bytes
);
fn_http::Request req;
req.method = "POST";
req.url = url + "/generate";
req.headers = {{"Content-Type", "multipart/form-data; boundary=" + boundary}};
req.body = std::move(body);
req.timeout_ms = 5 * 60 * 1000; // 5 min — modelos grandes son lentos
auto res = fn_http::request(req);
std::lock_guard<std::mutex> lk(g_result_mu);
g_result_status_http = res.status;
g_result_duration_ms = res.duration_ms;
if (!res.error.empty()) {
g_result_err = "transport: " + res.error;
g_state = JobState::Failed;
return;
}
if (res.status / 100 != 2) {
g_result_err = "HTTP " + std::to_string(res.status) + ": " + res.body;
g_state = JobState::Failed;
return;
}
// Guardar GLB en local_files/cache/<hash>.glb
std::string cache_dir = fn::local_path("cache");
std::error_code ec;
std::filesystem::create_directories(cache_dir, ec);
std::string hash = fnv1a_hex(
(const uint8_t*)res.body.data(),
res.body.size() < 4096 ? res.body.size() : 4096);
std::string out_path = cache_dir + "/" + model + "_" + hash + ".glb";
std::ofstream f(out_path, std::ios::binary);
f.write(res.body.data(), (std::streamsize)res.body.size());
f.close();
g_result_path = out_path;
g_result_bytes = res.body.size();
g_result_err.clear();
g_state = JobState::Done;
// Encola la carga en el viewer; la hace el main thread (GL context).
request_view(out_path);
fn_log::log_info(("generated " + out_path + " (" +
std::to_string(res.body.size()) + " bytes, " +
std::to_string((long long)res.duration_ms) + " ms)").c_str());
}).detach();
}
// ── Paneles ────────────────────────────────────────────────────────────────
void draw_input() {
if (!ImGui::Begin(TI_PHOTO " Input", &g_show_input)) { ImGui::End(); return; }
ImGui::TextUnformatted("Imagen origen");
ImGui::PushItemWidth(-100);
bool changed = ImGui::InputText("##path", g_image_path, sizeof(g_image_path),
ImGuiInputTextFlags_EnterReturnsTrue);
ImGui::PopItemWidth();
ImGui::SameLine();
if (ImGui::Button(TI_REFRESH " Load") || changed) {
reload_preview();
}
ImGui::TextDisabled("(arrastra path o pega aqui — PNG/JPG/WEBP)");
if (!g_preview_err.empty()) {
ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(1, 0.4f, 0.4f, 1));
ImGui::TextWrapped("%s", g_preview_err.c_str());
ImGui::PopStyleColor();
}
if (g_preview_tex.id) {
ImGui::Separator();
ImGui::Text("%dx%d", g_preview_w, g_preview_h);
float avail = ImGui::GetContentRegionAvail().x;
float scale = avail / (float)g_preview_w;
if (scale > 1.0f) scale = 1.0f;
ImGui::Image((ImTextureID)(intptr_t)g_preview_tex.id,
ImVec2(g_preview_w * scale, g_preview_h * scale));
}
ImGui::End();
}
void draw_models() {
if (!ImGui::Begin(TI_CPU " Models", &g_show_models)) { ImGui::End(); return; }
ImGui::Combo("modelo", &g_model_idx, MODEL_LABELS, MODEL_COUNT);
ImGui::Separator();
ImGui::InputInt("seed", &g_seed);
ImGui::SliderInt("mc_resolution", &g_mc_resolution, 64, 512);
ImGui::SliderFloat("foreground_ratio", &g_foreground_ratio, 0.5f, 1.0f);
ImGui::Checkbox("texture (Hunyuan3D)", &g_texture);
ImGui::Separator();
ImGui::PushItemWidth(-100);
ImGui::InputText("backend_url", g_backend_url, sizeof(g_backend_url));
ImGui::PopItemWidth();
if (ImGui::Button(TI_HEART_RATE_MONITOR " Ping /health")) ping_backend();
if (g_health_pinged.load()) {
std::lock_guard<std::mutex> lk(g_health_mu);
ImGui::SameLine();
ImGui::TextDisabled("%s", g_health_text.c_str());
}
ImGui::Separator();
bool busy = g_state.load() == JobState::Running;
if (busy) ImGui::BeginDisabled();
if (ImGui::Button(TI_ROCKET " Generate", ImVec2(-1, 36))) start_generate();
if (busy) ImGui::EndDisabled();
ImGui::End();
}
void draw_output() {
if (!ImGui::Begin(TI_BOX " Output", &g_show_output)) { ImGui::End(); return; }
JobState st = g_state.load();
switch (st) {
case JobState::Idle: ImGui::TextDisabled("idle — pulsa Generate"); break;
case JobState::Running: ImGui::Text("%s generando...", TI_LOADER); break;
case JobState::Done: {
std::lock_guard<std::mutex> lk(g_result_mu);
ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(0.4f, 1, 0.5f, 1));
ImGui::Text(TI_CIRCLE_CHECK " done");
ImGui::PopStyleColor();
ImGui::Text("status: %d", g_result_status_http);
ImGui::Text("duration: %lld ms",(long long)g_result_duration_ms);
ImGui::Text("bytes: %zu", g_result_bytes);
ImGui::Separator();
ImGui::TextWrapped("path: %s", g_result_path.c_str());
if (ImGui::Button(TI_COPY " Copy path")) {
ImGui::SetClipboardText(g_result_path.c_str());
}
ImGui::SameLine();
if (ImGui::Button(TI_CUBE " View 3D")) {
request_view(g_result_path);
}
break;
}
case JobState::Failed: {
std::lock_guard<std::mutex> lk(g_result_mu);
ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(1, 0.4f, 0.4f, 1));
ImGui::Text(TI_CIRCLE_X " failed");
ImGui::PopStyleColor();
ImGui::TextWrapped("%s", g_result_err.c_str());
break;
}
}
ImGui::End();
}
void draw_viewer() {
if (!ImGui::Begin(TI_CUBE " Viewer 3D", &g_show_viewer)) { ImGui::End(); return; }
ImGui::Checkbox("wireframe", &g_viewer_wireframe);
ImGui::SameLine();
if (ImGui::Button(TI_REFRESH " Reset cam")) g_cam = fn::core::OrbitCamera{};
ImGui::SameLine();
ImGui::TextDisabled("drag=rotar rueda=zoom");
if (!g_viewer_err.empty()) {
ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(1, 0.4f, 0.4f, 1));
ImGui::TextWrapped("%s", g_viewer_err.c_str());
ImGui::PopStyleColor();
}
if (g_mesh_gpu.ok()) {
ImGui::Text("tris: %d", g_viewer_tris);
fn::viz::MeshViewerConfig vc{};
vc.mesh = &g_mesh_gpu;
vc.cam = &g_cam;
vc.size = {-1.0f, -1.0f}; // stretch a todo el panel
vc.wireframe = g_viewer_wireframe;
vc.color = IM_COL32(170, 200, 235, 255);
fn::viz::mesh_viewer("##i23d_mesh", vc);
} else {
ImGui::TextDisabled("sin mesh — genera (Generate) o pulsa View 3D en Output");
}
ImGui::End();
}
void render() {
// Consumir path pendiente en el main thread (GL context activo aqui).
{
std::string pending;
{
std::lock_guard<std::mutex> lk(g_viewer_load_mu);
if (!g_viewer_pending_path.empty()) {
pending = g_viewer_pending_path;
g_viewer_pending_path.clear();
}
}
if (!pending.empty()) load_mesh_into_viewer(pending);
}
if (g_show_input) draw_input();
if (g_show_models) draw_models();
if (g_show_output) draw_output();
if (g_show_viewer) draw_viewer();
}
} // namespace
int main(int /*argc*/, char** /*argv*/) {
static fn_ui::PanelToggle panels[] = {
{ "Input", nullptr, &g_show_input },
{ "Models", nullptr, &g_show_models },
{ "Output", nullptr, &g_show_output },
};
fn::AppConfig cfg;
cfg.title = "image_to_3d_studio — single image to 3D";
cfg.about = { "image_to_3d_studio", "0.1.0",
"UI ImGui + backend Python (TripoSR / Hunyuan3D-2 / Trellis)." };
cfg.log = { "image_to_3d_studio.log", 1 };
cfg.panels = panels;
cfg.panel_count = sizeof(panels) / sizeof(panels[0]);
cfg.init_gl_loader = true; // gl_texture_load llama glGenTextures + glGenerateMipmap
return fn::run_app(cfg, render);
}