Files
egutierrez d3c83053f2 chore: auto-commit (5 archivos)
- CMakeLists.txt
- app.md
- appicon.ico
- backend/
- main.cpp

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-30 17:28:48 +02:00

550 lines
20 KiB
C++

// image_to_3d_studio — UI ImGui para single-image-to-3D.
//
// Carga una imagen (path en text field), envia POST multipart/form-data al
// backend Python (FastAPI en 127.0.0.1:8600), recibe bytes GLB, los guarda
// en local_files/cache/<hash>.glb y reporta path al usuario.
//
// Viewer GLB integrado: panel "Viewer 3D" carga el mesh GLB con
// gltf_load_mesh, lo sube a GPU (mesh_gpu) y lo renderiza con mesh_viewer
// (orbit camera + FBO + depth). Drag = rotar, rueda = zoom.
//
// Backend levantarlo aparte:
// cd projects/imagegen/apps/image_to_3d_studio/backend && ./run.sh
#include <imgui.h>
#include <atomic>
#include <chrono>
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <filesystem>
#include <fstream>
#include <mutex>
#include <random>
#include <sstream>
#include <string>
#include <thread>
#include <vector>
#include "app_base.h"
#include "core/http_request.h"
#include "core/icons_tabler.h"
#include "core/logger.h"
#include "core/orbit_camera.h"
#include "core/panel_menu.h"
#include "gfx/gl_texture_load.h"
#include "gfx/gltf_load_mesh.h"
#include "gfx/mesh_gpu.h"
#include "viz/mesh_viewer.h"
namespace {
// ── Estado UI ──────────────────────────────────────────────────────────────
bool g_show_input = true;
bool g_show_models = true;
bool g_show_output = true;
bool g_show_viewer = true;
// Viewer 3D
fn::gfx::MeshGpu g_mesh_gpu{};
fn::core::OrbitCamera g_cam{};
std::string g_viewer_path; // GLB cargado actualmente en el viewer
std::string g_viewer_err; // error de carga
int g_viewer_tris = 0;
bool g_viewer_wireframe = false;
// Path pendiente de cargar en el viewer (lo setea el worker/boton; lo consume
// render() en el main thread porque las llamadas GL necesitan contexto activo).
std::mutex g_viewer_load_mu;
std::string g_viewer_pending_path;
// Input
char g_image_path[1024] = "";
fn::GlTexture g_preview_tex{};
int g_preview_w = 0, g_preview_h = 0;
std::string g_preview_err;
// Backend
char g_backend_url[256] = "http://127.0.0.1:8600";
// Modelo + params
int g_model_idx = 0;
int g_seed = 0;
int g_mc_resolution = 256;
float g_foreground_ratio = 0.85f;
bool g_texture = true;
const char* const MODEL_IDS[] = {
"triposr",
"hunyuan3d-2",
"trellis",
};
const char* const MODEL_LABELS[] = {
"TripoSR (MIT, ~6 GB)",
"Hunyuan3D-2 (Tencent, ~12 GB)",
"Trellis (MIT code, ~16 GB)",
};
constexpr int MODEL_COUNT = (int)(sizeof(MODEL_IDS) / sizeof(MODEL_IDS[0]));
// Output / job state
enum class JobState { Idle, Running, Done, Failed };
std::atomic<JobState> g_state{JobState::Idle};
std::mutex g_result_mu;
std::string g_result_path; // GLB en disco (en Done)
std::string g_result_err; // mensaje (en Failed)
int g_result_status_http = 0;
int64_t g_result_duration_ms = 0;
size_t g_result_bytes = 0;
// Backend health
std::atomic<bool> g_health_pinged{false};
std::string g_health_text;
std::mutex g_health_mu;
// ── Helpers ────────────────────────────────────────────────────────────────
std::vector<uint8_t> read_file_bytes(const std::string& path) {
std::ifstream f(path, std::ios::binary | std::ios::ate);
if (!f) return {};
auto sz = f.tellg();
f.seekg(0);
std::vector<uint8_t> out((size_t)sz);
f.read((char*)out.data(), sz);
return out;
}
std::string basename_of(const std::string& path) {
auto p = path.find_last_of("/\\");
return p == std::string::npos ? path : path.substr(p + 1);
}
// Heuristica simple por extension.
const char* mime_for(const std::string& path) {
auto dot = path.find_last_of('.');
if (dot == std::string::npos) return "application/octet-stream";
std::string ext = path.substr(dot + 1);
for (auto& c : ext) c = (char)tolower((unsigned char)c);
if (ext == "png") return "image/png";
if (ext == "jpg" || ext == "jpeg") return "image/jpeg";
if (ext == "webp") return "image/webp";
if (ext == "bmp") return "image/bmp";
return "application/octet-stream";
}
std::string random_boundary() {
static std::mt19937_64 rng{std::random_device{}()};
std::ostringstream o;
o << "----fn_boundary_" << std::hex << rng() << rng();
return o.str();
}
// Construye body multipart/form-data a mano. fn_http::Request acepta string,
// no vector — pero string puede contener bytes binarios (data()+size()).
std::string build_multipart(
const std::string& boundary,
const std::vector<std::pair<std::string, std::string>>& fields, // name -> value (texto)
const std::string& file_field,
const std::string& file_name,
const std::string& file_mime,
const std::vector<uint8_t>& file_bytes)
{
std::string body;
body.reserve(file_bytes.size() + 1024);
auto add = [&](const std::string& s) { body.append(s); };
for (const auto& [name, value] : fields) {
add("--"); add(boundary); add("\r\n");
add("Content-Disposition: form-data; name=\""); add(name); add("\"\r\n\r\n");
add(value); add("\r\n");
}
add("--"); add(boundary); add("\r\n");
add("Content-Disposition: form-data; name=\""); add(file_field);
add("\"; filename=\""); add(file_name); add("\"\r\n");
add("Content-Type: "); add(file_mime); add("\r\n\r\n");
body.append((const char*)file_bytes.data(), file_bytes.size());
add("\r\n");
add("--"); add(boundary); add("--\r\n");
return body;
}
// Hash rapido FNV-1a de bytes — para nombrar el GLB en cache sin colision practica.
std::string fnv1a_hex(const uint8_t* data, size_t n) {
uint64_t h = 1469598103934665603ull;
for (size_t i = 0; i < n; ++i) {
h ^= data[i];
h *= 1099511628211ull;
}
char buf[17];
std::snprintf(buf, sizeof(buf), "%016llx", (unsigned long long)h);
return buf;
}
// Normaliza un Mesh in-place: recentra el centroide del bounding box al origen
// y escala para encajar en una esfera de radio ~1. Asi la orbit camera (target
// fijo en 0,0,0, distance ~3) enmarca cualquier mesh sin importar su escala.
void normalize_mesh(fn::gfx::Mesh& m) {
if (m.positions.size() < 3) return;
float mn[3] = { m.positions[0], m.positions[1], m.positions[2] };
float mx[3] = { m.positions[0], m.positions[1], m.positions[2] };
for (size_t i = 0; i < m.positions.size(); i += 3) {
for (int k = 0; k < 3; ++k) {
float v = m.positions[i + k];
if (v < mn[k]) mn[k] = v;
if (v > mx[k]) mx[k] = v;
}
}
float cx = 0.5f * (mn[0] + mx[0]);
float cy = 0.5f * (mn[1] + mx[1]);
float cz = 0.5f * (mn[2] + mx[2]);
float ext = 0.0f;
for (int k = 0; k < 3; ++k) ext = std::max(ext, mx[k] - mn[k]);
float scale = (ext > 1e-6f) ? (2.0f / ext) : 1.0f; // diametro -> ~2
for (size_t i = 0; i < m.positions.size(); i += 3) {
m.positions[i + 0] = (m.positions[i + 0] - cx) * scale;
m.positions[i + 1] = (m.positions[i + 1] - cy) * scale;
m.positions[i + 2] = (m.positions[i + 2] - cz) * scale;
}
}
// Carga un GLB del disco en el viewer 3D. DEBE llamarse desde el main thread
// (las llamadas GL de mesh_gpu_upload requieren contexto activo).
void load_mesh_into_viewer(const std::string& path) {
g_viewer_err.clear();
fn::gfx::Mesh m = fn::gfx::gltf_load_mesh_from_file(path.c_str());
if (m.positions.empty()) {
g_viewer_err = std::string("GLB load failed (") +
fn::gfx::gltf_load_last_error() + "): " + path;
fn_log::log_error(g_viewer_err.c_str());
return;
}
normalize_mesh(m);
if (g_mesh_gpu.ok()) fn::gfx::mesh_gpu_destroy(g_mesh_gpu);
g_mesh_gpu = fn::gfx::mesh_gpu_upload(m);
if (!g_mesh_gpu.ok()) {
g_viewer_err = "mesh_gpu_upload failed (contexto GL?): " + path;
fn_log::log_error(g_viewer_err.c_str());
return;
}
g_viewer_tris = g_mesh_gpu.index_count / 3;
g_viewer_path = path;
g_cam = fn::core::OrbitCamera{}; // reset camara al cargar mesh nuevo
g_show_viewer = true;
fn_log::log_info(("viewer loaded " + path + " tris=" +
std::to_string(g_viewer_tris)).c_str());
}
// Encola un path para cargar en el viewer en el proximo frame del main thread.
void request_view(const std::string& path) {
std::lock_guard<std::mutex> lk(g_viewer_load_mu);
g_viewer_pending_path = path;
}
// ── Acciones ───────────────────────────────────────────────────────────────
void reload_preview() {
g_preview_err.clear();
if (g_preview_tex.id) { fn::gl_texture_destroy(g_preview_tex); g_preview_tex = {}; }
g_preview_w = g_preview_h = 0;
std::string path = g_image_path;
if (path.empty()) return;
fn::GlTexture t = fn::gl_texture_load(path.c_str(), /*flip_y=*/false, /*srgb=*/true);
if (!t.id) {
g_preview_err = std::string("no se pudo cargar imagen (") +
fn::gl_texture_last_error() + "): " + path;
fn_log::log_error(g_preview_err.c_str());
return;
}
g_preview_tex = t;
g_preview_w = t.w;
g_preview_h = t.h;
fn_log::log_info(("preview ok " + path + " " +
std::to_string(t.w) + "x" + std::to_string(t.h)).c_str());
}
void ping_backend() {
g_health_pinged = false;
std::thread([url = std::string(g_backend_url)]() {
fn_http::Request req;
req.method = "GET";
req.url = url + "/health";
req.timeout_ms = 2000;
auto res = fn_http::request(req);
std::lock_guard<std::mutex> lk(g_health_mu);
if (!res.error.empty()) {
g_health_text = "ERR: " + res.error;
} else if (res.status / 100 != 2) {
g_health_text = "HTTP " + std::to_string(res.status) + ": " + res.body;
} else {
g_health_text = std::to_string((long long)res.duration_ms) +
" ms — " + res.body;
}
g_health_pinged = true;
}).detach();
}
void start_generate() {
if (g_state.load() == JobState::Running) return;
std::string path = g_image_path;
if (path.empty()) {
std::lock_guard<std::mutex> lk(g_result_mu);
g_result_err = "image_path vacio";
g_state = JobState::Failed;
return;
}
g_state = JobState::Running;
std::thread([
path, url = std::string(g_backend_url),
model = std::string(MODEL_IDS[g_model_idx]),
seed = g_seed, mc = g_mc_resolution,
fg = g_foreground_ratio, tex = g_texture
]() {
auto image_bytes = read_file_bytes(path);
if (image_bytes.empty()) {
std::lock_guard<std::mutex> lk(g_result_mu);
g_result_err = "no se pudo leer imagen: " + path;
g_state = JobState::Failed;
return;
}
std::string boundary = random_boundary();
std::string body = build_multipart(
boundary,
{
{"model", model},
{"seed", std::to_string(seed)},
{"mc_resolution", std::to_string(mc)},
{"foreground_ratio", std::to_string(fg)},
{"texture", tex ? "true" : "false"},
},
"file", basename_of(path), mime_for(path), image_bytes
);
fn_http::Request req;
req.method = "POST";
req.url = url + "/generate";
req.headers = {{"Content-Type", "multipart/form-data; boundary=" + boundary}};
req.body = std::move(body);
req.timeout_ms = 5 * 60 * 1000; // 5 min — modelos grandes son lentos
auto res = fn_http::request(req);
std::lock_guard<std::mutex> lk(g_result_mu);
g_result_status_http = res.status;
g_result_duration_ms = res.duration_ms;
if (!res.error.empty()) {
g_result_err = "transport: " + res.error;
g_state = JobState::Failed;
return;
}
if (res.status / 100 != 2) {
g_result_err = "HTTP " + std::to_string(res.status) + ": " + res.body;
g_state = JobState::Failed;
return;
}
// Guardar GLB en local_files/cache/<hash>.glb
std::string cache_dir = fn::local_path("cache");
std::error_code ec;
std::filesystem::create_directories(cache_dir, ec);
std::string hash = fnv1a_hex(
(const uint8_t*)res.body.data(),
res.body.size() < 4096 ? res.body.size() : 4096);
std::string out_path = cache_dir + "/" + model + "_" + hash + ".glb";
std::ofstream f(out_path, std::ios::binary);
f.write(res.body.data(), (std::streamsize)res.body.size());
f.close();
g_result_path = out_path;
g_result_bytes = res.body.size();
g_result_err.clear();
g_state = JobState::Done;
// Encola la carga en el viewer; la hace el main thread (GL context).
request_view(out_path);
fn_log::log_info(("generated " + out_path + " (" +
std::to_string(res.body.size()) + " bytes, " +
std::to_string((long long)res.duration_ms) + " ms)").c_str());
}).detach();
}
// ── Paneles ────────────────────────────────────────────────────────────────
void draw_input() {
if (!ImGui::Begin(TI_PHOTO " Input", &g_show_input)) { ImGui::End(); return; }
ImGui::TextUnformatted("Imagen origen");
ImGui::PushItemWidth(-100);
bool changed = ImGui::InputText("##path", g_image_path, sizeof(g_image_path),
ImGuiInputTextFlags_EnterReturnsTrue);
ImGui::PopItemWidth();
ImGui::SameLine();
if (ImGui::Button(TI_REFRESH " Load") || changed) {
reload_preview();
}
ImGui::TextDisabled("(arrastra path o pega aqui — PNG/JPG/WEBP)");
if (!g_preview_err.empty()) {
ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(1, 0.4f, 0.4f, 1));
ImGui::TextWrapped("%s", g_preview_err.c_str());
ImGui::PopStyleColor();
}
if (g_preview_tex.id) {
ImGui::Separator();
ImGui::Text("%dx%d", g_preview_w, g_preview_h);
float avail = ImGui::GetContentRegionAvail().x;
float scale = avail / (float)g_preview_w;
if (scale > 1.0f) scale = 1.0f;
ImGui::Image((ImTextureID)(intptr_t)g_preview_tex.id,
ImVec2(g_preview_w * scale, g_preview_h * scale));
}
ImGui::End();
}
void draw_models() {
if (!ImGui::Begin(TI_CPU " Models", &g_show_models)) { ImGui::End(); return; }
ImGui::Combo("modelo", &g_model_idx, MODEL_LABELS, MODEL_COUNT);
ImGui::Separator();
ImGui::InputInt("seed", &g_seed);
ImGui::SliderInt("mc_resolution", &g_mc_resolution, 64, 512);
ImGui::SliderFloat("foreground_ratio", &g_foreground_ratio, 0.5f, 1.0f);
ImGui::Checkbox("texture (Hunyuan3D)", &g_texture);
ImGui::Separator();
ImGui::PushItemWidth(-100);
ImGui::InputText("backend_url", g_backend_url, sizeof(g_backend_url));
ImGui::PopItemWidth();
if (ImGui::Button(TI_HEART_RATE_MONITOR " Ping /health")) ping_backend();
if (g_health_pinged.load()) {
std::lock_guard<std::mutex> lk(g_health_mu);
ImGui::SameLine();
ImGui::TextDisabled("%s", g_health_text.c_str());
}
ImGui::Separator();
bool busy = g_state.load() == JobState::Running;
if (busy) ImGui::BeginDisabled();
if (ImGui::Button(TI_ROCKET " Generate", ImVec2(-1, 36))) start_generate();
if (busy) ImGui::EndDisabled();
ImGui::End();
}
void draw_output() {
if (!ImGui::Begin(TI_BOX " Output", &g_show_output)) { ImGui::End(); return; }
JobState st = g_state.load();
switch (st) {
case JobState::Idle: ImGui::TextDisabled("idle — pulsa Generate"); break;
case JobState::Running: ImGui::Text("%s generando...", TI_LOADER); break;
case JobState::Done: {
std::lock_guard<std::mutex> lk(g_result_mu);
ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(0.4f, 1, 0.5f, 1));
ImGui::Text(TI_CIRCLE_CHECK " done");
ImGui::PopStyleColor();
ImGui::Text("status: %d", g_result_status_http);
ImGui::Text("duration: %lld ms",(long long)g_result_duration_ms);
ImGui::Text("bytes: %zu", g_result_bytes);
ImGui::Separator();
ImGui::TextWrapped("path: %s", g_result_path.c_str());
if (ImGui::Button(TI_COPY " Copy path")) {
ImGui::SetClipboardText(g_result_path.c_str());
}
ImGui::SameLine();
if (ImGui::Button(TI_CUBE " View 3D")) {
request_view(g_result_path);
}
break;
}
case JobState::Failed: {
std::lock_guard<std::mutex> lk(g_result_mu);
ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(1, 0.4f, 0.4f, 1));
ImGui::Text(TI_CIRCLE_X " failed");
ImGui::PopStyleColor();
ImGui::TextWrapped("%s", g_result_err.c_str());
break;
}
}
ImGui::End();
}
void draw_viewer() {
if (!ImGui::Begin(TI_CUBE " Viewer 3D", &g_show_viewer)) { ImGui::End(); return; }
ImGui::Checkbox("wireframe", &g_viewer_wireframe);
ImGui::SameLine();
if (ImGui::Button(TI_REFRESH " Reset cam")) g_cam = fn::core::OrbitCamera{};
ImGui::SameLine();
ImGui::TextDisabled("drag=rotar rueda=zoom");
if (!g_viewer_err.empty()) {
ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(1, 0.4f, 0.4f, 1));
ImGui::TextWrapped("%s", g_viewer_err.c_str());
ImGui::PopStyleColor();
}
if (g_mesh_gpu.ok()) {
ImGui::Text("tris: %d", g_viewer_tris);
fn::viz::MeshViewerConfig vc{};
vc.mesh = &g_mesh_gpu;
vc.cam = &g_cam;
vc.size = {-1.0f, -1.0f}; // stretch a todo el panel
vc.wireframe = g_viewer_wireframe;
vc.color = IM_COL32(170, 200, 235, 255);
fn::viz::mesh_viewer("##i23d_mesh", vc);
} else {
ImGui::TextDisabled("sin mesh — genera (Generate) o pulsa View 3D en Output");
}
ImGui::End();
}
void render() {
// Consumir path pendiente en el main thread (GL context activo aqui).
{
std::string pending;
{
std::lock_guard<std::mutex> lk(g_viewer_load_mu);
if (!g_viewer_pending_path.empty()) {
pending = g_viewer_pending_path;
g_viewer_pending_path.clear();
}
}
if (!pending.empty()) load_mesh_into_viewer(pending);
}
if (g_show_input) draw_input();
if (g_show_models) draw_models();
if (g_show_output) draw_output();
if (g_show_viewer) draw_viewer();
}
} // namespace
int main(int /*argc*/, char** /*argv*/) {
static fn_ui::PanelToggle panels[] = {
{ "Input", nullptr, &g_show_input },
{ "Models", nullptr, &g_show_models },
{ "Output", nullptr, &g_show_output },
};
fn::AppConfig cfg;
cfg.title = "image_to_3d_studio — single image to 3D";
cfg.about = { "image_to_3d_studio", "0.1.0",
"UI ImGui + backend Python (TripoSR / Hunyuan3D-2 / Trellis)." };
cfg.log = { "image_to_3d_studio.log", 1 };
cfg.panels = panels;
cfg.panel_count = sizeof(panels) / sizeof(panels[0]);
cfg.init_gl_loader = true; // gl_texture_load llama glGenTextures + glGenerateMipmap
return fn::run_app(cfg, render);
}