commit d3c83053f235de1bad8e8e66c6e8f3eae25268cd Author: Egutierrez Date: Sat May 30 17:28:48 2026 +0200 chore: auto-commit (5 archivos) - CMakeLists.txt - app.md - appicon.ico - backend/ - main.cpp Co-Authored-By: Claude Opus 4.7 (1M context) diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..47f944d --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,23 @@ +add_imgui_app(image_to_3d_studio + main.cpp + # Funciones del registry usadas (issue 0085 — declaracion explicita en CMake): + ${CMAKE_SOURCE_DIR}/functions/core/http_request.cpp + ${CMAKE_SOURCE_DIR}/functions/gfx/gl_texture_load.cpp + # Implementacion stb_image (gl_texture_load la usa): + ${CMAKE_SOURCE_DIR}/vendor/stb/stb_image_impl.cpp + # Visor 3D: GLB loader + mesh GPU + orbit camera + FBO + mesh_viewer. + # (gl_loader viene bundled en fn_framework; el resto se enlaza aqui.) + ${CMAKE_SOURCE_DIR}/functions/gfx/gltf_load_mesh.cpp + ${CMAKE_SOURCE_DIR}/functions/gfx/mesh_gpu.cpp + ${CMAKE_SOURCE_DIR}/functions/gfx/gl_framebuffer.cpp + ${CMAKE_SOURCE_DIR}/functions/core/orbit_camera.cpp + ${CMAKE_SOURCE_DIR}/functions/viz/mesh_viewer.cpp +) +target_include_directories(image_to_3d_studio PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/vendor # nlohmann/json.hpp para gltf_load_mesh +) + +if(WIN32) + set_target_properties(image_to_3d_studio PROPERTIES WIN32_EXECUTABLE TRUE) +endif() diff --git a/app.md b/app.md new file mode 100644 index 0000000..f719d95 --- /dev/null +++ b/app.md @@ -0,0 +1,71 @@ +--- +name: image_to_3d_studio +lang: cpp +domain: gfx +description: "Image to 3D studio: imagen origen + POST a backend Python (TripoSR/Hunyuan3D/Trellis) y guarda mesh GLB." +tags: [imagegen, 3d, mesh, viewer, imgui] +icon: + phosphor: "cube-transparent" + accent: "#0ea5e9" +uses_functions: + - http_request_cpp_core # POST /generate al backend Python + - gl_texture_load_cpp_gfx # preview de la imagen origen en panel Input + - gltf_load_mesh_cpp_gfx # parse GLB -> Mesh CPU para el viewer 3D + - mesh_gpu_cpp_gfx # sube Mesh a GPU (VAO/VBO/EBO) + - mesh_viewer_cpp_viz # render orbit + FBO depth del mesh + - orbit_camera_cpp_core # camara orbital (drag rotar / wheel zoom) + - gl_framebuffer_cpp_gfx # FBO con depth para el render offscreen +uses_types: [] +framework: "imgui" +entry_point: "main.cpp" +dir_path: "projects/imagegen/apps/image_to_3d_studio" +repo_url: "https://gitea.organic-machine.com/dataforge/image_to_3d_studio" +--- + +# image_to_3d_studio + +UI ImGui que envia una imagen al backend Python (FastAPI) y guarda el +mesh GLB resultante en `local_files/cache/`. Despachador soporta varios +modelos image-to-3D detras del mismo endpoint: + +- **TripoSR** (MIT, ~0.5 s, ~6 GB VRAM) — backend listo. +- **Hunyuan3D-2** (Tencent Community, mejor textura) — stub. +- **Trellis** (Microsoft MIT, mesh/3DGS/NeRF) — stub. + +Viewer GLB integrado: panel **Viewer 3D** carga el mesh con `gltf_load_mesh`, +lo sube a GPU y lo renderiza con `mesh_viewer` (orbit camera + FBO con depth). +Drag = rotar, rueda = zoom, "Reset cam" recentra, checkbox wireframe. Auto-carga +el mesh al terminar Generate; tambien boton "View 3D" en el panel Output. + +## Arquitectura + +``` +image_to_3d_studio (C++ ImGui) + ── multipart POST /generate ──▶ backend FastAPI 127.0.0.1:8600 + dispatcher → backends/.py + ◀── bytes GLB ── + guarda en /local_files/cache/_.glb +``` + +## Build + +```bash +cd cpp && cmake --build build --target image_to_3d_studio -j +``` + +## Run + +```bash +# 1) backend Python (otra terminal) +projects/imagegen/apps/image_to_3d_studio/backend/run.sh + +# 2) app C++ +./cpp/build/image_to_3d_studio +``` + +## Estado + +- Notebook `projects/imagegen/analysis/spike_image_to_3d/notebooks/01_panorama_image_to_3d.ipynb` — panorama + tabla comparativa. +- Notebook `02_smoke_triposr.ipynb` — smoke end-to-end. +- Backend `backend/server.py` — dispatcher, TripoSR funcional, Hunyuan3D-2 + Trellis = stubs `501 Not Implemented`. +- App C++ — Input/Models/Output paneles + POST + cache. Sin viewer GLB todavia. diff --git a/appicon.ico b/appicon.ico new file mode 100644 index 0000000..75d3224 Binary files /dev/null and b/appicon.ico differ diff --git a/backend/__pycache__/server.cpython-313.pyc b/backend/__pycache__/server.cpython-313.pyc new file mode 100644 index 0000000..c29fc39 Binary files /dev/null and b/backend/__pycache__/server.cpython-313.pyc differ diff --git a/backend/backends/__init__.py b/backend/backends/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/backends/__pycache__/__init__.cpython-313.pyc b/backend/backends/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..aceb312 Binary files /dev/null and b/backend/backends/__pycache__/__init__.cpython-313.pyc differ diff --git a/backend/backends/__pycache__/triposr.cpython-313.pyc b/backend/backends/__pycache__/triposr.cpython-313.pyc new file mode 100644 index 0000000..1902895 Binary files /dev/null and b/backend/backends/__pycache__/triposr.cpython-313.pyc differ diff --git a/backend/backends/_iface.py b/backend/backends/_iface.py new file mode 100644 index 0000000..ab5ace4 --- /dev/null +++ b/backend/backends/_iface.py @@ -0,0 +1,17 @@ +"""Interfaz comun para backends image-to-3D. + +Cada backend (triposr, hunyuan3d_2, trellis) expone: + load() -> Handle + donde Handle tiene metodo infer(image: PIL.Image, cfg: dict) -> bytes (GLB). + +`cfg` recibe: seed, mc_resolution, foreground_ratio, texture. +""" +from __future__ import annotations + +from typing import Protocol, Any, Dict +from PIL import Image + + +class BackendHandle(Protocol): + def infer(self, image: Image.Image, cfg: Dict[str, Any]) -> bytes: ... + def close(self) -> None: ... diff --git a/backend/backends/hunyuan3d_2.py b/backend/backends/hunyuan3d_2.py new file mode 100644 index 0000000..fb24a77 --- /dev/null +++ b/backend/backends/hunyuan3d_2.py @@ -0,0 +1,23 @@ +"""Backend Hunyuan3D-2 (Tencent, Community License). + +STUB. Para implementar: clonar github.com/Tencent/Hunyuan3D-2 a sources/, +instalar deps, cargar pipeline shape + texture. +""" +from __future__ import annotations + +from typing import Any, Dict +from PIL import Image + + +class Handle: + def infer(self, image: Image.Image, cfg: Dict[str, Any]) -> bytes: + raise NotImplementedError( + "hunyuan3d-2 backend pendiente. Ver notebook 03_smoke_hunyuan3d.ipynb" + ) + + def close(self) -> None: # pragma: no cover + return + + +def load() -> Handle: + return Handle() diff --git a/backend/backends/trellis.py b/backend/backends/trellis.py new file mode 100644 index 0000000..9160448 --- /dev/null +++ b/backend/backends/trellis.py @@ -0,0 +1,23 @@ +"""Backend Trellis (Microsoft, MIT code). + +STUB. Para implementar: clonar github.com/microsoft/TRELLIS a sources/, +instalar deps (kaolin + custom CUDA), cargar pipeline structured latents. +""" +from __future__ import annotations + +from typing import Any, Dict +from PIL import Image + + +class Handle: + def infer(self, image: Image.Image, cfg: Dict[str, Any]) -> bytes: + raise NotImplementedError( + "trellis backend pendiente. Ver notebook 04_smoke_trellis.ipynb" + ) + + def close(self) -> None: # pragma: no cover + return + + +def load() -> Handle: + return Handle() diff --git a/backend/backends/triposr.py b/backend/backends/triposr.py new file mode 100644 index 0000000..2478c34 --- /dev/null +++ b/backend/backends/triposr.py @@ -0,0 +1,93 @@ +"""Backend TripoSR (Stability + Tripo, MIT). + +Asume que `sources/TripoSR` esta clonado en el registry. Importa `tsr.system.TSR`. +Descarga checkpoint desde HF en la primera carga (~1.2 GB). +""" +from __future__ import annotations + +import io +import os +import pathlib +import sys +from dataclasses import dataclass +from typing import Any, Dict + +import numpy as np +import torch +import trimesh +from PIL import Image + + +def _ensure_sources_on_path() -> pathlib.Path: + root = pathlib.Path(os.environ.get("FN_REGISTRY_ROOT", "/home/lucas/fn_registry")) + src = root / "sources" / "TripoSR" + if not src.exists(): + raise RuntimeError( + f"TripoSR no clonado en {src}. " + "git clone --depth=1 https://github.com/VAST-AI-Research/TripoSR.git " + f"{src}" + ) + if str(src) not in sys.path: + sys.path.insert(0, str(src)) + return src + + +@dataclass +class Handle: + model: Any + rembg_session: Any + device: str + + def infer(self, image: Image.Image, cfg: Dict[str, Any]) -> bytes: + from tsr.utils import remove_background, resize_foreground + + fg_ratio = float(cfg.get("foreground_ratio", 0.85)) + mc_res = int(cfg.get("mc_resolution", 256)) + + fg = remove_background(image, self.rembg_session) + fg = resize_foreground(fg, fg_ratio) + + # Composite RGBA -> RGB sobre gris 0.5 (preprocesado canonico TripoSR + # run.py). Sin esto el tokenizer DINO recibe 4 canales y peta: + # "The size of tensor a (4) must match tensor b (3) at dim 2". + arr = np.asarray(fg).astype(np.float32) / 255.0 + if arr.shape[-1] == 4: + arr = arr[:, :, :3] * arr[:, :, 3:4] + (1.0 - arr[:, :, 3:4]) * 0.5 + fg = Image.fromarray((arr * 255.0).astype(np.uint8)) + + with torch.no_grad(): + scene_codes = self.model([fg], device=self.device) + meshes = self.model.extract_mesh( + scene_codes, has_vertex_color=False, resolution=mc_res + ) + m = meshes[0] + tm = trimesh.Trimesh( + vertices=np.asarray(m.vertices), + faces=np.asarray(m.faces), + process=True, + ) + buf = io.BytesIO() + tm.export(buf, file_type="glb") + return buf.getvalue() + + def close(self) -> None: + del self.model + del self.rembg_session + if torch.cuda.is_available(): + torch.cuda.empty_cache() + + +def load() -> Handle: + _ensure_sources_on_path() + from tsr.system import TSR + from rembg import new_session + + device = "cuda" if torch.cuda.is_available() else "cpu" + model = TSR.from_pretrained( + "stabilityai/TripoSR", + config_name="config.yaml", + weight_name="model.ckpt", + ) + model.renderer.set_chunk_size(8192) + model.to(device) + return Handle(model=model, rembg_session=new_session(), device=device) diff --git a/backend/run.sh b/backend/run.sh new file mode 100755 index 0000000..b40a9dc --- /dev/null +++ b/backend/run.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# Lanza el backend FastAPI reutilizando el venv del analysis (mismas deps: +# torch + diffusers + transformers + trimesh + Pillow). No crea venv propio. +set -euo pipefail + +ROOT="$(cd "$(dirname "$0")"/../../../../.. && pwd)" +VENV="$ROOT/projects/imagegen/analysis/spike_image_to_3d/.venv" +HERE="$(cd "$(dirname "$0")" && pwd)" + +if [ ! -x "$VENV/bin/python" ]; then + echo "venv del analysis no existe: $VENV" >&2 + echo "Crea el analysis primero: ./fn run init_jupyter_analysis --project imagegen spike_image_to_3d" >&2 + exit 1 +fi + +export FN_REGISTRY_ROOT="$ROOT" +exec "$VENV/bin/python" -m uvicorn server:app \ + --host 127.0.0.1 --port "${PORT:-8600}" \ + --app-dir "$HERE" \ + "$@" diff --git a/backend/server.py b/backend/server.py new file mode 100644 index 0000000..173c81c --- /dev/null +++ b/backend/server.py @@ -0,0 +1,201 @@ +"""FastAPI dispatcher para image-to-3D. + +Endpoints: + GET /health -> {"status":"ok","models":[...loaded...]} + GET /models -> {"models":[{"id","loaded","vram_gb_est","license"}...]} + POST /generate -> bytes GLB (Content-Type: model/gltf-binary) + multipart/form-data: + file= + model= (triposr | hunyuan3d-2 | trellis) + seed= (opcional, default 0) + mc_resolution= (opcional, default 256) + foreground_ratio= (opcional, default 0.85) + texture= (opcional, default true) + +Implementacion: + - Cada backend (triposr/hunyuan3d-2/trellis) vive en backends/.py + con interfaz comun: load() -> handle, infer(handle, image_pil, cfg) -> bytes_glb. + - Lazy load: el primer POST con un model_id carga el modelo en GPU y lo + guarda en un dict global. Liberacion manual via DELETE /models/. + - Single-process, single-GPU: serializamos peticiones por modelo con un + asyncio.Lock para no chocar dos infer simultaneos en la misma GPU. + +Lanzar: + cd projects/imagegen/apps/image_to_3d_studio/backend + ../../../analysis/spike_image_to_3d/.venv/bin/python -m uvicorn server:app \\ + --host 127.0.0.1 --port 8600 --reload +""" +from __future__ import annotations + +import asyncio +import io +import os +import sys +import time +from dataclasses import dataclass +from typing import Any, Dict + +from fastapi import FastAPI, File, Form, HTTPException, UploadFile +from fastapi.responses import Response +from PIL import Image + + +# --- Catalogo de modelos ------------------------------------------------------ + +@dataclass(frozen=True) +class ModelSpec: + id: str + label: str + license: str + vram_gb_est: float + backend_module: str # backends. + + +CATALOG: Dict[str, ModelSpec] = { + "triposr": ModelSpec( + id="triposr", + label="TripoSR (Stability + Tripo, MIT)", + license="MIT", + vram_gb_est=6.0, + backend_module="backends.triposr", + ), + "hunyuan3d-2": ModelSpec( + id="hunyuan3d-2", + label="Hunyuan3D-2 (Tencent, Community License)", + license="Tencent Community", + vram_gb_est=12.0, + backend_module="backends.hunyuan3d_2", + ), + "trellis": ModelSpec( + id="trellis", + label="Trellis (Microsoft, MIT code / research weights)", + license="MIT (code)", + vram_gb_est=16.0, + backend_module="backends.trellis", + ), +} + + +# --- Estado en memoria -------------------------------------------------------- + +_loaded: Dict[str, Any] = {} # model_id -> handle del backend +_locks: Dict[str, asyncio.Lock] = {} # model_id -> lock (uno por modelo) + + +def _lock_for(model_id: str) -> asyncio.Lock: + lk = _locks.get(model_id) + if lk is None: + lk = asyncio.Lock() + _locks[model_id] = lk + return lk + + +def _load_backend(spec: ModelSpec) -> Any: + """Importa el modulo del backend on-demand y llama load(). Cada backend + debe exponer load() -> handle e infer(handle, image, cfg) -> bytes.""" + # Garantiza que `backends/` esta en sys.path + here = os.path.dirname(os.path.abspath(__file__)) + if here not in sys.path: + sys.path.insert(0, here) + import importlib + mod = importlib.import_module(spec.backend_module) + return mod.load() + + +# --- FastAPI app -------------------------------------------------------------- + +app = FastAPI(title="image_to_3d_studio backend", version="0.1.0") + + +@app.get("/health") +def health() -> Dict[str, Any]: + return { + "status": "ok", + "loaded": sorted(_loaded.keys()), + "version": "0.1.0", + } + + +@app.get("/models") +def list_models() -> Dict[str, Any]: + return { + "models": [ + { + "id": m.id, + "label": m.label, + "license": m.license, + "vram_gb_est": m.vram_gb_est, + "loaded": m.id in _loaded, + } + for m in CATALOG.values() + ] + } + + +@app.delete("/models/{model_id}") +def unload(model_id: str) -> Dict[str, Any]: + handle = _loaded.pop(model_id, None) + if handle is None: + raise HTTPException(404, f"not loaded: {model_id}") + # Si el backend expone close(), lo llamamos + close = getattr(handle, "close", None) + if callable(close): + close() + # Empuja VRAM libre + try: + import torch + torch.cuda.empty_cache() + except Exception: + pass + return {"unloaded": model_id} + + +@app.post("/generate") +async def generate( + file: UploadFile = File(...), + model: str = Form("triposr"), + seed: int = Form(0), + mc_resolution: int = Form(256), + foreground_ratio: float = Form(0.85), + texture: bool = Form(True), +) -> Response: + spec = CATALOG.get(model) + if spec is None: + raise HTTPException(400, f"unknown model: {model} (catalog: {list(CATALOG)})") + + raw = await file.read() + try: + image = Image.open(io.BytesIO(raw)).convert("RGB") + except Exception as e: + raise HTTPException(400, f"bad image: {e}") + + cfg = dict( + seed=seed, + mc_resolution=mc_resolution, + foreground_ratio=foreground_ratio, + texture=texture, + ) + + lock = _lock_for(model) + async with lock: + if model not in _loaded: + _loaded[model] = await asyncio.to_thread(_load_backend, spec) + handle = _loaded[model] + t0 = time.perf_counter() + try: + glb_bytes = await asyncio.to_thread(handle.infer, image, cfg) + except NotImplementedError as e: + raise HTTPException(501, str(e)) + except Exception as e: + raise HTTPException(500, f"{model} infer failed: {e}") + dt_ms = int((time.perf_counter() - t0) * 1000) + + return Response( + content=glb_bytes, + media_type="model/gltf-binary", + headers={ + "X-Model": model, + "X-Duration-ms": str(dt_ms), + "X-Bytes": str(len(glb_bytes)), + }, + ) diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..25256bc --- /dev/null +++ b/main.cpp @@ -0,0 +1,549 @@ +// image_to_3d_studio — UI ImGui para single-image-to-3D. +// +// Carga una imagen (path en text field), envia POST multipart/form-data al +// backend Python (FastAPI en 127.0.0.1:8600), recibe bytes GLB, los guarda +// en local_files/cache/.glb y reporta path al usuario. +// +// Viewer GLB integrado: panel "Viewer 3D" carga el mesh GLB con +// gltf_load_mesh, lo sube a GPU (mesh_gpu) y lo renderiza con mesh_viewer +// (orbit camera + FBO + depth). Drag = rotar, rueda = zoom. +// +// Backend levantarlo aparte: +// cd projects/imagegen/apps/image_to_3d_studio/backend && ./run.sh +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "app_base.h" +#include "core/http_request.h" +#include "core/icons_tabler.h" +#include "core/logger.h" +#include "core/orbit_camera.h" +#include "core/panel_menu.h" +#include "gfx/gl_texture_load.h" +#include "gfx/gltf_load_mesh.h" +#include "gfx/mesh_gpu.h" +#include "viz/mesh_viewer.h" + +namespace { + +// ── Estado UI ────────────────────────────────────────────────────────────── +bool g_show_input = true; +bool g_show_models = true; +bool g_show_output = true; +bool g_show_viewer = true; + +// Viewer 3D +fn::gfx::MeshGpu g_mesh_gpu{}; +fn::core::OrbitCamera g_cam{}; +std::string g_viewer_path; // GLB cargado actualmente en el viewer +std::string g_viewer_err; // error de carga +int g_viewer_tris = 0; +bool g_viewer_wireframe = false; +// Path pendiente de cargar en el viewer (lo setea el worker/boton; lo consume +// render() en el main thread porque las llamadas GL necesitan contexto activo). +std::mutex g_viewer_load_mu; +std::string g_viewer_pending_path; + +// Input +char g_image_path[1024] = ""; +fn::GlTexture g_preview_tex{}; +int g_preview_w = 0, g_preview_h = 0; +std::string g_preview_err; + +// Backend +char g_backend_url[256] = "http://127.0.0.1:8600"; + +// Modelo + params +int g_model_idx = 0; +int g_seed = 0; +int g_mc_resolution = 256; +float g_foreground_ratio = 0.85f; +bool g_texture = true; + +const char* const MODEL_IDS[] = { + "triposr", + "hunyuan3d-2", + "trellis", +}; +const char* const MODEL_LABELS[] = { + "TripoSR (MIT, ~6 GB)", + "Hunyuan3D-2 (Tencent, ~12 GB)", + "Trellis (MIT code, ~16 GB)", +}; +constexpr int MODEL_COUNT = (int)(sizeof(MODEL_IDS) / sizeof(MODEL_IDS[0])); + +// Output / job state +enum class JobState { Idle, Running, Done, Failed }; +std::atomic g_state{JobState::Idle}; +std::mutex g_result_mu; +std::string g_result_path; // GLB en disco (en Done) +std::string g_result_err; // mensaje (en Failed) +int g_result_status_http = 0; +int64_t g_result_duration_ms = 0; +size_t g_result_bytes = 0; + +// Backend health +std::atomic g_health_pinged{false}; +std::string g_health_text; +std::mutex g_health_mu; + +// ── Helpers ──────────────────────────────────────────────────────────────── + +std::vector read_file_bytes(const std::string& path) { + std::ifstream f(path, std::ios::binary | std::ios::ate); + if (!f) return {}; + auto sz = f.tellg(); + f.seekg(0); + std::vector out((size_t)sz); + f.read((char*)out.data(), sz); + return out; +} + +std::string basename_of(const std::string& path) { + auto p = path.find_last_of("/\\"); + return p == std::string::npos ? path : path.substr(p + 1); +} + +// Heuristica simple por extension. +const char* mime_for(const std::string& path) { + auto dot = path.find_last_of('.'); + if (dot == std::string::npos) return "application/octet-stream"; + std::string ext = path.substr(dot + 1); + for (auto& c : ext) c = (char)tolower((unsigned char)c); + if (ext == "png") return "image/png"; + if (ext == "jpg" || ext == "jpeg") return "image/jpeg"; + if (ext == "webp") return "image/webp"; + if (ext == "bmp") return "image/bmp"; + return "application/octet-stream"; +} + +std::string random_boundary() { + static std::mt19937_64 rng{std::random_device{}()}; + std::ostringstream o; + o << "----fn_boundary_" << std::hex << rng() << rng(); + return o.str(); +} + +// Construye body multipart/form-data a mano. fn_http::Request acepta string, +// no vector — pero string puede contener bytes binarios (data()+size()). +std::string build_multipart( + const std::string& boundary, + const std::vector>& fields, // name -> value (texto) + const std::string& file_field, + const std::string& file_name, + const std::string& file_mime, + const std::vector& file_bytes) +{ + std::string body; + body.reserve(file_bytes.size() + 1024); + + auto add = [&](const std::string& s) { body.append(s); }; + + for (const auto& [name, value] : fields) { + add("--"); add(boundary); add("\r\n"); + add("Content-Disposition: form-data; name=\""); add(name); add("\"\r\n\r\n"); + add(value); add("\r\n"); + } + + add("--"); add(boundary); add("\r\n"); + add("Content-Disposition: form-data; name=\""); add(file_field); + add("\"; filename=\""); add(file_name); add("\"\r\n"); + add("Content-Type: "); add(file_mime); add("\r\n\r\n"); + body.append((const char*)file_bytes.data(), file_bytes.size()); + add("\r\n"); + + add("--"); add(boundary); add("--\r\n"); + return body; +} + +// Hash rapido FNV-1a de bytes — para nombrar el GLB en cache sin colision practica. +std::string fnv1a_hex(const uint8_t* data, size_t n) { + uint64_t h = 1469598103934665603ull; + for (size_t i = 0; i < n; ++i) { + h ^= data[i]; + h *= 1099511628211ull; + } + char buf[17]; + std::snprintf(buf, sizeof(buf), "%016llx", (unsigned long long)h); + return buf; +} + +// Normaliza un Mesh in-place: recentra el centroide del bounding box al origen +// y escala para encajar en una esfera de radio ~1. Asi la orbit camera (target +// fijo en 0,0,0, distance ~3) enmarca cualquier mesh sin importar su escala. +void normalize_mesh(fn::gfx::Mesh& m) { + if (m.positions.size() < 3) return; + float mn[3] = { m.positions[0], m.positions[1], m.positions[2] }; + float mx[3] = { m.positions[0], m.positions[1], m.positions[2] }; + for (size_t i = 0; i < m.positions.size(); i += 3) { + for (int k = 0; k < 3; ++k) { + float v = m.positions[i + k]; + if (v < mn[k]) mn[k] = v; + if (v > mx[k]) mx[k] = v; + } + } + float cx = 0.5f * (mn[0] + mx[0]); + float cy = 0.5f * (mn[1] + mx[1]); + float cz = 0.5f * (mn[2] + mx[2]); + float ext = 0.0f; + for (int k = 0; k < 3; ++k) ext = std::max(ext, mx[k] - mn[k]); + float scale = (ext > 1e-6f) ? (2.0f / ext) : 1.0f; // diametro -> ~2 + for (size_t i = 0; i < m.positions.size(); i += 3) { + m.positions[i + 0] = (m.positions[i + 0] - cx) * scale; + m.positions[i + 1] = (m.positions[i + 1] - cy) * scale; + m.positions[i + 2] = (m.positions[i + 2] - cz) * scale; + } +} + +// Carga un GLB del disco en el viewer 3D. DEBE llamarse desde el main thread +// (las llamadas GL de mesh_gpu_upload requieren contexto activo). +void load_mesh_into_viewer(const std::string& path) { + g_viewer_err.clear(); + fn::gfx::Mesh m = fn::gfx::gltf_load_mesh_from_file(path.c_str()); + if (m.positions.empty()) { + g_viewer_err = std::string("GLB load failed (") + + fn::gfx::gltf_load_last_error() + "): " + path; + fn_log::log_error(g_viewer_err.c_str()); + return; + } + normalize_mesh(m); + if (g_mesh_gpu.ok()) fn::gfx::mesh_gpu_destroy(g_mesh_gpu); + g_mesh_gpu = fn::gfx::mesh_gpu_upload(m); + if (!g_mesh_gpu.ok()) { + g_viewer_err = "mesh_gpu_upload failed (contexto GL?): " + path; + fn_log::log_error(g_viewer_err.c_str()); + return; + } + g_viewer_tris = g_mesh_gpu.index_count / 3; + g_viewer_path = path; + g_cam = fn::core::OrbitCamera{}; // reset camara al cargar mesh nuevo + g_show_viewer = true; + fn_log::log_info(("viewer loaded " + path + " tris=" + + std::to_string(g_viewer_tris)).c_str()); +} + +// Encola un path para cargar en el viewer en el proximo frame del main thread. +void request_view(const std::string& path) { + std::lock_guard lk(g_viewer_load_mu); + g_viewer_pending_path = path; +} + +// ── Acciones ─────────────────────────────────────────────────────────────── + +void reload_preview() { + g_preview_err.clear(); + if (g_preview_tex.id) { fn::gl_texture_destroy(g_preview_tex); g_preview_tex = {}; } + g_preview_w = g_preview_h = 0; + + std::string path = g_image_path; + if (path.empty()) return; + + fn::GlTexture t = fn::gl_texture_load(path.c_str(), /*flip_y=*/false, /*srgb=*/true); + if (!t.id) { + g_preview_err = std::string("no se pudo cargar imagen (") + + fn::gl_texture_last_error() + "): " + path; + fn_log::log_error(g_preview_err.c_str()); + return; + } + g_preview_tex = t; + g_preview_w = t.w; + g_preview_h = t.h; + fn_log::log_info(("preview ok " + path + " " + + std::to_string(t.w) + "x" + std::to_string(t.h)).c_str()); +} + +void ping_backend() { + g_health_pinged = false; + std::thread([url = std::string(g_backend_url)]() { + fn_http::Request req; + req.method = "GET"; + req.url = url + "/health"; + req.timeout_ms = 2000; + auto res = fn_http::request(req); + std::lock_guard lk(g_health_mu); + if (!res.error.empty()) { + g_health_text = "ERR: " + res.error; + } else if (res.status / 100 != 2) { + g_health_text = "HTTP " + std::to_string(res.status) + ": " + res.body; + } else { + g_health_text = std::to_string((long long)res.duration_ms) + + " ms — " + res.body; + } + g_health_pinged = true; + }).detach(); +} + +void start_generate() { + if (g_state.load() == JobState::Running) return; + std::string path = g_image_path; + if (path.empty()) { + std::lock_guard lk(g_result_mu); + g_result_err = "image_path vacio"; + g_state = JobState::Failed; + return; + } + g_state = JobState::Running; + + std::thread([ + path, url = std::string(g_backend_url), + model = std::string(MODEL_IDS[g_model_idx]), + seed = g_seed, mc = g_mc_resolution, + fg = g_foreground_ratio, tex = g_texture + ]() { + auto image_bytes = read_file_bytes(path); + if (image_bytes.empty()) { + std::lock_guard lk(g_result_mu); + g_result_err = "no se pudo leer imagen: " + path; + g_state = JobState::Failed; + return; + } + + std::string boundary = random_boundary(); + std::string body = build_multipart( + boundary, + { + {"model", model}, + {"seed", std::to_string(seed)}, + {"mc_resolution", std::to_string(mc)}, + {"foreground_ratio", std::to_string(fg)}, + {"texture", tex ? "true" : "false"}, + }, + "file", basename_of(path), mime_for(path), image_bytes + ); + + fn_http::Request req; + req.method = "POST"; + req.url = url + "/generate"; + req.headers = {{"Content-Type", "multipart/form-data; boundary=" + boundary}}; + req.body = std::move(body); + req.timeout_ms = 5 * 60 * 1000; // 5 min — modelos grandes son lentos + + auto res = fn_http::request(req); + + std::lock_guard lk(g_result_mu); + g_result_status_http = res.status; + g_result_duration_ms = res.duration_ms; + + if (!res.error.empty()) { + g_result_err = "transport: " + res.error; + g_state = JobState::Failed; + return; + } + if (res.status / 100 != 2) { + g_result_err = "HTTP " + std::to_string(res.status) + ": " + res.body; + g_state = JobState::Failed; + return; + } + + // Guardar GLB en local_files/cache/.glb + std::string cache_dir = fn::local_path("cache"); + std::error_code ec; + std::filesystem::create_directories(cache_dir, ec); + std::string hash = fnv1a_hex( + (const uint8_t*)res.body.data(), + res.body.size() < 4096 ? res.body.size() : 4096); + std::string out_path = cache_dir + "/" + model + "_" + hash + ".glb"; + + std::ofstream f(out_path, std::ios::binary); + f.write(res.body.data(), (std::streamsize)res.body.size()); + f.close(); + + g_result_path = out_path; + g_result_bytes = res.body.size(); + g_result_err.clear(); + g_state = JobState::Done; + // Encola la carga en el viewer; la hace el main thread (GL context). + request_view(out_path); + fn_log::log_info(("generated " + out_path + " (" + + std::to_string(res.body.size()) + " bytes, " + + std::to_string((long long)res.duration_ms) + " ms)").c_str()); + }).detach(); +} + +// ── Paneles ──────────────────────────────────────────────────────────────── + +void draw_input() { + if (!ImGui::Begin(TI_PHOTO " Input", &g_show_input)) { ImGui::End(); return; } + + ImGui::TextUnformatted("Imagen origen"); + ImGui::PushItemWidth(-100); + bool changed = ImGui::InputText("##path", g_image_path, sizeof(g_image_path), + ImGuiInputTextFlags_EnterReturnsTrue); + ImGui::PopItemWidth(); + ImGui::SameLine(); + if (ImGui::Button(TI_REFRESH " Load") || changed) { + reload_preview(); + } + + ImGui::TextDisabled("(arrastra path o pega aqui — PNG/JPG/WEBP)"); + + if (!g_preview_err.empty()) { + ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(1, 0.4f, 0.4f, 1)); + ImGui::TextWrapped("%s", g_preview_err.c_str()); + ImGui::PopStyleColor(); + } + + if (g_preview_tex.id) { + ImGui::Separator(); + ImGui::Text("%dx%d", g_preview_w, g_preview_h); + float avail = ImGui::GetContentRegionAvail().x; + float scale = avail / (float)g_preview_w; + if (scale > 1.0f) scale = 1.0f; + ImGui::Image((ImTextureID)(intptr_t)g_preview_tex.id, + ImVec2(g_preview_w * scale, g_preview_h * scale)); + } + ImGui::End(); +} + +void draw_models() { + if (!ImGui::Begin(TI_CPU " Models", &g_show_models)) { ImGui::End(); return; } + + ImGui::Combo("modelo", &g_model_idx, MODEL_LABELS, MODEL_COUNT); + ImGui::Separator(); + ImGui::InputInt("seed", &g_seed); + ImGui::SliderInt("mc_resolution", &g_mc_resolution, 64, 512); + ImGui::SliderFloat("foreground_ratio", &g_foreground_ratio, 0.5f, 1.0f); + ImGui::Checkbox("texture (Hunyuan3D)", &g_texture); + + ImGui::Separator(); + ImGui::PushItemWidth(-100); + ImGui::InputText("backend_url", g_backend_url, sizeof(g_backend_url)); + ImGui::PopItemWidth(); + if (ImGui::Button(TI_HEART_RATE_MONITOR " Ping /health")) ping_backend(); + if (g_health_pinged.load()) { + std::lock_guard lk(g_health_mu); + ImGui::SameLine(); + ImGui::TextDisabled("%s", g_health_text.c_str()); + } + + ImGui::Separator(); + bool busy = g_state.load() == JobState::Running; + if (busy) ImGui::BeginDisabled(); + if (ImGui::Button(TI_ROCKET " Generate", ImVec2(-1, 36))) start_generate(); + if (busy) ImGui::EndDisabled(); + + ImGui::End(); +} + +void draw_output() { + if (!ImGui::Begin(TI_BOX " Output", &g_show_output)) { ImGui::End(); return; } + + JobState st = g_state.load(); + switch (st) { + case JobState::Idle: ImGui::TextDisabled("idle — pulsa Generate"); break; + case JobState::Running: ImGui::Text("%s generando...", TI_LOADER); break; + case JobState::Done: { + std::lock_guard lk(g_result_mu); + ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(0.4f, 1, 0.5f, 1)); + ImGui::Text(TI_CIRCLE_CHECK " done"); + ImGui::PopStyleColor(); + ImGui::Text("status: %d", g_result_status_http); + ImGui::Text("duration: %lld ms",(long long)g_result_duration_ms); + ImGui::Text("bytes: %zu", g_result_bytes); + ImGui::Separator(); + ImGui::TextWrapped("path: %s", g_result_path.c_str()); + if (ImGui::Button(TI_COPY " Copy path")) { + ImGui::SetClipboardText(g_result_path.c_str()); + } + ImGui::SameLine(); + if (ImGui::Button(TI_CUBE " View 3D")) { + request_view(g_result_path); + } + break; + } + case JobState::Failed: { + std::lock_guard lk(g_result_mu); + ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(1, 0.4f, 0.4f, 1)); + ImGui::Text(TI_CIRCLE_X " failed"); + ImGui::PopStyleColor(); + ImGui::TextWrapped("%s", g_result_err.c_str()); + break; + } + } + ImGui::End(); +} + +void draw_viewer() { + if (!ImGui::Begin(TI_CUBE " Viewer 3D", &g_show_viewer)) { ImGui::End(); return; } + + ImGui::Checkbox("wireframe", &g_viewer_wireframe); + ImGui::SameLine(); + if (ImGui::Button(TI_REFRESH " Reset cam")) g_cam = fn::core::OrbitCamera{}; + ImGui::SameLine(); + ImGui::TextDisabled("drag=rotar rueda=zoom"); + + if (!g_viewer_err.empty()) { + ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(1, 0.4f, 0.4f, 1)); + ImGui::TextWrapped("%s", g_viewer_err.c_str()); + ImGui::PopStyleColor(); + } + + if (g_mesh_gpu.ok()) { + ImGui::Text("tris: %d", g_viewer_tris); + fn::viz::MeshViewerConfig vc{}; + vc.mesh = &g_mesh_gpu; + vc.cam = &g_cam; + vc.size = {-1.0f, -1.0f}; // stretch a todo el panel + vc.wireframe = g_viewer_wireframe; + vc.color = IM_COL32(170, 200, 235, 255); + fn::viz::mesh_viewer("##i23d_mesh", vc); + } else { + ImGui::TextDisabled("sin mesh — genera (Generate) o pulsa View 3D en Output"); + } + ImGui::End(); +} + +void render() { + // Consumir path pendiente en el main thread (GL context activo aqui). + { + std::string pending; + { + std::lock_guard lk(g_viewer_load_mu); + if (!g_viewer_pending_path.empty()) { + pending = g_viewer_pending_path; + g_viewer_pending_path.clear(); + } + } + if (!pending.empty()) load_mesh_into_viewer(pending); + } + + if (g_show_input) draw_input(); + if (g_show_models) draw_models(); + if (g_show_output) draw_output(); + if (g_show_viewer) draw_viewer(); +} + +} // namespace + +int main(int /*argc*/, char** /*argv*/) { + static fn_ui::PanelToggle panels[] = { + { "Input", nullptr, &g_show_input }, + { "Models", nullptr, &g_show_models }, + { "Output", nullptr, &g_show_output }, + }; + + fn::AppConfig cfg; + cfg.title = "image_to_3d_studio — single image to 3D"; + cfg.about = { "image_to_3d_studio", "0.1.0", + "UI ImGui + backend Python (TripoSR / Hunyuan3D-2 / Trellis)." }; + cfg.log = { "image_to_3d_studio.log", 1 }; + cfg.panels = panels; + cfg.panel_count = sizeof(panels) / sizeof(panels[0]); + cfg.init_gl_loader = true; // gl_texture_load llama glGenTextures + glGenerateMipmap + + return fn::run_app(cfg, render); +}