chore: sync from fn-registry agent
This commit is contained in:
@@ -0,0 +1,40 @@
|
||||
# JUPYTER HABILITADO EN ESTE ANALISIS
|
||||
|
||||
## Reglas OBLIGATORIAS para Claude
|
||||
|
||||
### 1. CODIGO INMUTABLE — NUNCA MODIFICAR CELDAS EXISTENTES
|
||||
- **PROHIBIDO** usar NotebookEdit para reemplazar celdas existentes
|
||||
- **SIEMPRE** anadir celdas NUEVAS al final del notebook
|
||||
- Si hay un error en una celda, crear celda nueva con la correccion
|
||||
- El historial de trabajo debe quedar intacto para trazabilidad
|
||||
|
||||
### 2. PROGRAMACION FUNCIONAL OBLIGATORIA
|
||||
- **Funciones puras**: sin efectos secundarios, mismo input -> mismo output
|
||||
- **Inmutabilidad**: nunca mutar datos, crear copias transformadas
|
||||
- **Composicion**: funciones pequenas que se combinan
|
||||
- Preferir: `map`, `filter`, `reduce`, list comprehensions
|
||||
- Evitar: loops con mutacion, `global`, modificar argumentos in-place
|
||||
|
||||
### 3. SIEMPRE usar MCP jupyter para ejecutar codigo Python
|
||||
- Las ejecuciones se ven en tiempo real en Jupyter Lab del usuario
|
||||
- Compartimos variables y estado del kernel
|
||||
- **NUNCA usar bash para ejecutar Python en este analisis**
|
||||
|
||||
### 4. Verificar Jupyter activo ANTES de ejecutar
|
||||
- Si no esta activo: pedir al usuario que ejecute `./run-jupyter-lab.sh`
|
||||
|
||||
### 5. Gestion de notebooks
|
||||
- Notebooks en la carpeta `notebooks/` o subcarpetas
|
||||
- Si un notebook tiene >50 celdas, crear uno nuevo
|
||||
- Nombrar descriptivamente: `01_exploracion.ipynb`, `02_limpieza.ipynb`
|
||||
|
||||
### 6. Gestion de Python
|
||||
- **SIEMPRE usar `uv`** para gestionar dependencias
|
||||
- Anadir paquetes con `uv add nombre_paquete`
|
||||
|
||||
### 7. Acceso al fn_registry
|
||||
- `FN_REGISTRY_ROOT` apunta a la raiz del registry
|
||||
- Para importar funciones Python: `sys.path.insert(0, os.path.join(os.environ["FN_REGISTRY_ROOT"], "python", "functions"))`
|
||||
- Para consultar registry.db: `sqlite3` o `import sqlite3` con la ruta `$FN_REGISTRY_ROOT/registry.db`
|
||||
|
||||
|
||||
Binary file not shown.
@@ -0,0 +1,100 @@
|
||||
"""
|
||||
fn_registry kernel startup
|
||||
Autoconfigura acceso al registry en cada notebook.
|
||||
Generado por write_jupyter_registry_kernel (fn_registry).
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
|
||||
# ── FN_REGISTRY_ROOT ────────────────────────────────────────
|
||||
# Prioridad: env var > path hardcoded > descubrimiento automatico
|
||||
def _discover_registry_root():
|
||||
if os.environ.get("FN_REGISTRY_ROOT"):
|
||||
return Path(os.environ["FN_REGISTRY_ROOT"]).resolve()
|
||||
hardcoded = Path("/home/lucas/fn_registry")
|
||||
if (hardcoded / "registry.db").exists():
|
||||
return hardcoded
|
||||
# Subir desde CWD hasta encontrar registry.db
|
||||
p = Path.cwd()
|
||||
for _ in range(10):
|
||||
if (p / "registry.db").exists():
|
||||
return p
|
||||
if p.parent == p:
|
||||
break
|
||||
p = p.parent
|
||||
return hardcoded
|
||||
|
||||
FN_REGISTRY_ROOT = _discover_registry_root()
|
||||
os.environ["FN_REGISTRY_ROOT"] = str(FN_REGISTRY_ROOT)
|
||||
|
||||
# ── sys.path: importar funciones Python del registry ────────
|
||||
_python_functions = FN_REGISTRY_ROOT / "python" / "functions"
|
||||
for _domain in sorted(_python_functions.iterdir()) if _python_functions.exists() else []:
|
||||
if _domain.is_dir() and not _domain.name.startswith("_"):
|
||||
_path = str(_domain)
|
||||
if _path not in sys.path:
|
||||
sys.path.insert(0, _path)
|
||||
|
||||
# Tambien el directorio padre para imports por dominio: from core import filter_list
|
||||
_pf = str(_python_functions)
|
||||
if _pf not in sys.path:
|
||||
sys.path.insert(0, _pf)
|
||||
|
||||
# ── fn_query: consultar registry.db desde el notebook ───────
|
||||
_REGISTRY_DB = FN_REGISTRY_ROOT / "registry.db"
|
||||
|
||||
def fn_query(sql, params=()):
|
||||
"""Ejecuta una consulta SQL sobre registry.db y retorna las filas.
|
||||
|
||||
Ejemplos:
|
||||
fn_query("SELECT id, description FROM functions WHERE domain = ?", ("finance",))
|
||||
fn_query("SELECT id FROM functions_fts WHERE functions_fts MATCH ?", ("slice*",))
|
||||
"""
|
||||
if not _REGISTRY_DB.exists():
|
||||
raise FileNotFoundError(f"registry.db no encontrado en {_REGISTRY_DB}")
|
||||
con = sqlite3.connect(str(_REGISTRY_DB))
|
||||
con.row_factory = sqlite3.Row
|
||||
try:
|
||||
rows = con.execute(sql, params).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
def fn_search(term):
|
||||
"""Busca funciones y tipos en el registry por nombre o descripcion.
|
||||
|
||||
Ejemplo:
|
||||
fn_search("slice")
|
||||
fn_search("finance")
|
||||
"""
|
||||
fts_term = f"name:{term}* OR description:{term}*"
|
||||
functions = fn_query(
|
||||
"SELECT id, kind, purity, lang, description FROM functions "
|
||||
"WHERE id IN (SELECT id FROM functions_fts WHERE functions_fts MATCH ?) "
|
||||
"ORDER BY name", (fts_term,)
|
||||
)
|
||||
types = fn_query(
|
||||
"SELECT id, algebraic, lang, description FROM types "
|
||||
"WHERE id IN (SELECT id FROM types_fts WHERE types_fts MATCH ?) "
|
||||
"ORDER BY name", (fts_term,)
|
||||
)
|
||||
return {"functions": functions, "types": types}
|
||||
|
||||
def fn_code(function_id):
|
||||
"""Retorna el codigo fuente de una funcion del registry.
|
||||
|
||||
Ejemplo:
|
||||
print(fn_code("filter_list_py_core"))
|
||||
"""
|
||||
rows = fn_query("SELECT code FROM functions WHERE id = ?", (function_id,))
|
||||
if not rows:
|
||||
raise KeyError(f"Funcion no encontrada: {function_id}")
|
||||
return rows[0]["code"]
|
||||
|
||||
# ── Mensaje de bienvenida ───────────────────────────────────
|
||||
print(f"fn_registry conectado: {FN_REGISTRY_ROOT}")
|
||||
print(f" registry.db: {'OK' if _REGISTRY_DB.exists() else 'NO ENCONTRADO'}")
|
||||
print(f" Python functions: {_pf}")
|
||||
print(f" Helpers: fn_query(), fn_search(), fn_code()")
|
||||
@@ -0,0 +1 @@
|
||||
8888
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"7da6e738-4e80-4249-8b21-6196c839c170": {
|
||||
"version": "2.4.0",
|
||||
"created_at": "2026-05-28T20:46:18.369137+00:00",
|
||||
"document_version": "2.0.0"
|
||||
}
|
||||
}
|
||||
Binary file not shown.
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"mcpServers": {
|
||||
"jupyter": {
|
||||
"command": "/home/lucas/fn_registry/projects/imagegen/analysis/spike_image_to_3d/.venv/bin/jupyter-mcp-server",
|
||||
"args": [
|
||||
"--transport", "stdio",
|
||||
"--jupyter-url", "http://localhost:8888",
|
||||
"--jupyter-token", ""
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
3.13
|
||||
@@ -0,0 +1,107 @@
|
||||
# Multi-View / Multi-Image Image-to-3D — Research (mayo 2026)
|
||||
|
||||
Investigacion para extender `image_to_3d_studio` (hoy TripoSR single-image) a
|
||||
condicionar con **varias fotos reales** del objeto (frente/espalda/lados).
|
||||
|
||||
## Distincion clave
|
||||
|
||||
Dos cosas se llaman "multi-view 3D" — solo UNA acepta tus fotos reales:
|
||||
|
||||
| Clase | Input | Que hace | Ejemplos |
|
||||
|---|---|---|---|
|
||||
| **A. Multi-view *conditioning*** | Tus N fotos reales | Reconstruye desde las fotos que das | **Hunyuan3D-2mv**, **TRELLIS multi-image**, **EscherNet**, GS-LRM/MeshLRM (posed) |
|
||||
| **B. Single-image con NVS interno** | 1 foto | Alucina los lados que faltan, luego reconstruye. NO puedes meter fotos reales de atras/lados | InstantMesh, Unique3D, CRM, Wonder3D, Era3D, SV3D, Zero123++, LGM, GRM |
|
||||
|
||||
**Hallazgo principal**: la mayoria de repos famosos "multi-view" (InstantMesh, CRM, Unique3D, LGM, GRM) son **Clase B** — el "multi-view" es andamiaje interno, no input del usuario. Para "pasar detalles de varios lados" necesitas **Clase A**.
|
||||
|
||||
## Tabla comparativa
|
||||
|
||||
| Modelo | Autor/Año | Clase | Fotos reales? | #vistas | Poses | Output | Licencia | VRAM | Repo |
|
||||
|---|---|---|---|---|---|---|---|---|---|
|
||||
| **Hunyuan3D-2mv** | Tencent 2025 | **A** ✅ | Si | 3 (front/left/back, config) | Fijas canonicas | Mesh | tencent-hunyuan-community (restrictiva) | ~6 GB shape / ~12 GB +tex | [GH](https://github.com/Tencent-Hunyuan/Hunyuan3D-2) · [HF](https://huggingface.co/tencent/Hunyuan3D-2mv) |
|
||||
| **TRELLIS multi-image** | Microsoft 2024/25 | **A** ✅ | Si | N arbitrario | Libres/unposed | 3DGS+mesh+RF | MIT (codigo) | ~16-24 GB | [GH](https://github.com/microsoft/TRELLIS) |
|
||||
| **EscherNet** | CVPR'24 oral | **A** ✅ | Si | Any (1..10) | 6-DoF relativas | NVS → NeuS mesh | ver repo | alta (A100) | [GH](https://github.com/kxhit/EscherNet) |
|
||||
| **GS-LRM** | Adobe 2024 | A | Si (posed) | 2-4 | **Poses conocidas** | 3DGS | research, **sin weights** | A100 | [paper](https://arxiv.org/abs/2404.19702) |
|
||||
| **MeshLRM/Long-LRM** | Adobe 2024 | A | Si (posed) | 4 / 32 | Conocidas | Mesh/3DGS | research, weights escasos | A100 | [Long-LRM](https://arxiv.org/pdf/2410.12781) |
|
||||
| **MV-Adapter** | ICCV'25 | helper | n/a | — | — | Solo multi-view imgs (no mesh) | Apache-2.0 | ~14 GB | [GH](https://github.com/huanngzh/MV-Adapter) |
|
||||
| InstantMesh | TencentARC 2024 | B | No | 6 interno | — | Mesh+NeRF | Apache-2.0 | ~12-16 GB | [GH](https://github.com/TencentARC/InstantMesh) |
|
||||
| Unique3D | NeurIPS'24 | B | No | 4 interno | — | Mesh texturizado | MIT | ~10-12 GB (problemas en 8GB) | [GH](https://github.com/AiuniAI/Unique3D) |
|
||||
| CRM | ECCV'24 | B | No | 6 interno | — | Mesh texturizado | MIT | ~9-16 GB | [GH](https://github.com/thu-ml/CRM) |
|
||||
| Wonder3D/++ | — | B | No | 6 interno | — | Mesh via NeuS | MIT | ~12+ GB | [GH](https://github.com/xxlong0/Wonder3D) |
|
||||
| SV3D | Stability 2024 | B (NVS) | No | 21 orbit | opcional | Video orbit → 3D | non-commercial | alta | [HF](https://huggingface.co/stabilityai/sv3d) |
|
||||
| Zero123++ | SUDO-AI 2023/24 | B (NVS) | No | 6 fijas | fijas | Multi-view imgs | ver repo | ~12 GB | [GH](https://github.com/SUDO-AI-3D/zero123plus) |
|
||||
| LGM | ECCV'24 | B | No | 4 interno | — | 3DGS → mesh | ver repo | ~10-16 GB | [project](https://me.kiui.moe/lgm/) |
|
||||
| GRM | ECCV'24 | B | (posed) | 4 | conocidas | 3DGS, 0.1s | ver repo | A100 | [paper](https://arxiv.org/abs/2403.14621) |
|
||||
|
||||
Baseline: **TripoSR** MIT, 1 img, ~6-8 GB, <0.5s (A100) / 5-10s (3060-class).
|
||||
|
||||
## Recomendacion para image_to_3d_studio
|
||||
|
||||
**Integracion primaria: Hunyuan3D-2mv.**
|
||||
|
||||
1. **Hecho para input multi-lado** — el dict `{"front":..,"left":..,"back":..}` ES la UX. Sin NeuS, sin pose estimation, sin NVS interno.
|
||||
2. **VRAM cabe** — shape ~6 GB (vs TRELLIS 16-24 GB que haria OOM en 3070 8GB/WSL2). Variante `Hunyuan3D-2mini` ~5 GB.
|
||||
3. **Mesh directo** (trimesh), mismo shape que TripoSR → trivial en el dispatcher.
|
||||
4. **Maduro**, ComfyUI + diffusers, weights HF, ejemplos claros.
|
||||
5. **Calidad** SOTA multi-view open mayo 2026; mejora geometria atras/lados vs single-image.
|
||||
|
||||
Watch-out: licencia `tencent-hunyuan-community` NO es OSI-permisiva (restricciones uso/escala). OK personal; revisar si comercial.
|
||||
|
||||
**Secundaria "max flexibilidad": TRELLIS multi-image** detras de flag "high VRAM". MIT, N vistas unposed, 3DGS+mesh. Solo practico con CPU offload o GPU mayor.
|
||||
|
||||
**Evitar como backend multi-view**: InstantMesh, Unique3D, CRM, LGM, GRM, SV3D, Zero123++ (son Clase B, no consumen fotos reales de lados). Unique3D/CRM siguen siendo buenos *single-image* alternativos a TripoSR (mejor textura, MIT).
|
||||
|
||||
### Forma del dispatcher
|
||||
|
||||
```
|
||||
backends = {
|
||||
"triposr": single_image -> mesh # existente
|
||||
"hunyuan3d_mv": multi_view -> mesh # NUEVO default multi-lado
|
||||
"trellis_mv": multi_view(N) -> mesh+3DGS # opcional, flag high-VRAM
|
||||
}
|
||||
```
|
||||
|
||||
## Pipeline UI propuesto
|
||||
|
||||
```
|
||||
Mode toggle: [ Single image ] [ Multi-side ]
|
||||
|
||||
Multi-side: drop zones etiquetadas
|
||||
[ FRONT* ] [ BACK ] [ LEFT ] [ RIGHT ] (+TOP)
|
||||
* front obligatorio; resto opcional
|
||||
[ Backend: Hunyuan3D-2mv v ] [ Generate ]
|
||||
| POST {front, back?, left?, right?, top?}
|
||||
v
|
||||
Backend dispatcher:
|
||||
1. Validar: >=1 imagen, front presente
|
||||
2. Preprocess cada vista: bg removal + center + square pad (reusar paso TripoSR)
|
||||
3. Route: 1 img -> triposr ; >=2 labeled -> hunyuan3d_mv(image={front,left,back})
|
||||
4. (opt) texture pass Hunyuan3D-Paint
|
||||
5. Export .glb -> path + thumbnail
|
||||
v
|
||||
Viewer 3D existente + download
|
||||
```
|
||||
|
||||
Notas:
|
||||
- **Reusar bg-removal/centering de TripoSR** por vista — Hunyuan3D-2mv espera inputs limpios, centrados, orientacion canonica.
|
||||
- **Mapear labels UI -> claves canonicas** (`front`/`left`/`back`). Poses fijas → el etiquetado ES la pose, sin estimar camara. Por eso gana a EscherNet/TRELLIS (que necesitan poses o mas compute).
|
||||
- Fallback front-only → TripoSR.
|
||||
- Gate TRELLIS por `torch.cuda.mem_get_info`.
|
||||
|
||||
## Fuentes
|
||||
|
||||
Hunyuan3D-2/2mv: github.com/Tencent-Hunyuan/Hunyuan3D-2 · huggingface.co/tencent/Hunyuan3D-2mv · arxiv 2501.12202
|
||||
TRELLIS: github.com/microsoft/TRELLIS
|
||||
EscherNet: github.com/kxhit/EscherNet · kxhit.github.io/EscherNet
|
||||
InstantMesh: github.com/TencentARC/InstantMesh · arxiv 2404.07191
|
||||
Unique3D: github.com/AiuniAI/Unique3D
|
||||
CRM: github.com/thu-ml/CRM · arxiv 2403.05034
|
||||
Wonder3D: github.com/xxlong0/Wonder3D
|
||||
SV3D: huggingface.co/stabilityai/sv3d
|
||||
Zero123++: github.com/SUDO-AI-3D/zero123plus
|
||||
LGM: me.kiui.moe/lgm · arxiv 2402.05054
|
||||
GRM: arxiv 2403.14621
|
||||
GS-LRM: arxiv 2404.19702
|
||||
Long-LRM: arxiv 2410.12781
|
||||
MV-Adapter: github.com/huanngzh/MV-Adapter · arxiv 2412.03632
|
||||
TripoSR: github.com/VAST-AI-Research/TripoSR · arxiv 2403.02151
|
||||
+17
@@ -0,0 +1,17 @@
|
||||
---
|
||||
name: spike_image_to_3d
|
||||
lang: py
|
||||
domain: datascience
|
||||
description: "Benchmark single-image-to-3D: TripoSR, SF3D, Hunyuan3D-2, Trellis, InstantMesh, Wonder3D — latencia, VRAM, calidad mesh, licencias"
|
||||
tags: [ml, 3d, image-to-3d, benchmark, mesh, gaussian-splatting]
|
||||
uses_functions: []
|
||||
uses_types: []
|
||||
framework: "jupyterlab"
|
||||
entry_point: "notebooks/main.ipynb"
|
||||
dir_path: "projects/imagegen/analysis/spike_image_to_3d"
|
||||
repo_url: ""
|
||||
---
|
||||
|
||||
## Notas
|
||||
|
||||
Benchmark single-image-to-3D: TripoSR, SF3D, Hunyuan3D-2, Trellis, InstantMesh, Wonder3D — latencia, VRAM, calidad mesh, licencias
|
||||
@@ -0,0 +1,6 @@
|
||||
def main():
|
||||
print("Hello from spike-image-to-3d!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,230 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "aabd4af1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 01 — Panorama single-image-to-3D (2024-2026)\n",
|
||||
"\n",
|
||||
"**Objetivo:** mapear modelos open source serios para generar 3D (mesh / gaussians / NeRF) desde **una sola imagen**. Sirve de base para:\n",
|
||||
"\n",
|
||||
"1. Decidir cual integramos primero en el backend Python de `image_to_3d_studio`.\n",
|
||||
"2. Documentar trade-offs (latencia, VRAM, calidad, licencia) que el bucle reactivo del registry pueda referenciar.\n",
|
||||
"3. Generar contratos / wrappers comunes en `projects/imagegen/`.\n",
|
||||
"\n",
|
||||
"**Hardware target:** WSL2 Linux + CUDA (RTX local).\n",
|
||||
"\n",
|
||||
"**Vault de outputs:** `projects/imagegen/vaults/imagegen_models/` (mismo que diffusion 2D)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5f7f4a7c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Taxonomia\n",
|
||||
"\n",
|
||||
"Los modelos varian en 3 ejes:\n",
|
||||
"\n",
|
||||
"| Eje | Opciones | Notas |\n",
|
||||
"|---|---|---|\n",
|
||||
"| **Representacion 3D** | mesh (GLB/OBJ), 3D gaussians (PLY), NeRF, SDF/voxel, multi-view RGB | Mesh = mas portable. Gaussians = render rapido pero pipeline raro. |\n",
|
||||
"| **Pipeline** | feed-forward 1-pass, multi-view diffusion + reconstruction, optimization per-image | 1-pass = segundos. Multi-view = decenas de segundos. Optim = minutos. |\n",
|
||||
"| **Texturizado** | albedo baked, PBR (albedo+normal+roughness), sin textura | Hunyuan3D-2 y Trellis = textura aparte. TripoSR = baked vertex color basico. |\n",
|
||||
"\n",
|
||||
"**Familias principales:**\n",
|
||||
"\n",
|
||||
"1. **LRM-likes** (feed-forward triplane→mesh): TripoSR, SF3D, OpenLRM, InstantMesh-recon. Rapidos.\n",
|
||||
"2. **Multi-view diffusion + recon**: Zero123++, Wonder3D, SyncDreamer, InstantMesh. Mejor calidad geometrica con vistas multiples consistentes.\n",
|
||||
"3. **Latent 3D structures** (Microsoft Trellis): structured latents (sparse voxels + features) → decoded a mesh/gaussian/NeRF.\n",
|
||||
"4. **3D-native diffusion**: Hunyuan3D-2, Shap-E, Direct3D. Diffusion entrenado directamente en representacion 3D.\n",
|
||||
"5. **3D Gaussian feed-forward**: LGM, GRM, GS-LRM. Bueno para gaussians, conversion a mesh pierde calidad."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6112267f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tabla comparativa\n",
|
||||
"\n",
|
||||
"| Modelo | Autor / año | Repr. | Latencia GPU* | VRAM | Licencia | Repo | Output nativo |\n",
|
||||
"|---|---|---|---|---|---|---|---|\n",
|
||||
"| **TripoSR** | Stability+Tripo 2024-03 | mesh | ~0.5s | ~6 GB | MIT | github.com/VAST-AI-Research/TripoSR | OBJ (vertex color) |\n",
|
||||
"| **SF3D (StableFast3D)** | Stability 2024-08 | mesh PBR | ~0.5s | ~7 GB | SAIL-1.0 (no comercial) | github.com/Stability-AI/stable-fast-3d | GLB (PBR baked) |\n",
|
||||
"| **InstantMesh** | Tencent 2024-04 | mesh | ~10s | ~12 GB | Apache 2.0 | github.com/TencentARC/InstantMesh | OBJ + textura |\n",
|
||||
"| **Wonder3D** | HKU 2024-01 | mesh + normals | ~3min (optim) | ~16 GB | AGPL-3.0 | github.com/xxlong0/Wonder3D | OBJ |\n",
|
||||
"| **Trellis** | Microsoft 2024-12 | mesh / 3DGS / RF | ~30-60s | ~16 GB | MIT (modelo: research-only) | github.com/microsoft/TRELLIS | GLB / PLY |\n",
|
||||
"| **Hunyuan3D-2** | Tencent 2025-01 | mesh + textura | ~25s mesh + ~20s tex | ~12 GB mesh / ~24 GB full | Tencent Community (uso libre <100M MAU) | github.com/Tencent/Hunyuan3D-2 | GLB con PBR |\n",
|
||||
"\n",
|
||||
"*Latencia aproximada en RTX 4090, batch=1, sin optimizaciones extra (FP16 por defecto).\n",
|
||||
"\n",
|
||||
"**Lectura rapida:**\n",
|
||||
"\n",
|
||||
"- **Velocidad** → TripoSR / SF3D (sub-segundo).\n",
|
||||
"- **Calidad mesh** → Hunyuan3D-2 (mejor textura), Trellis (mejor geometria limpia).\n",
|
||||
"- **Mas balance / variedad de outputs** → Trellis (mesh+gaussian+NeRF del mismo modelo).\n",
|
||||
"- **Mas restrictivo legalmente** → SF3D (no comercial), Wonder3D (AGPL contagia).\n",
|
||||
"- **Mas seguro para producto** → TripoSR (MIT), InstantMesh (Apache 2.0), Trellis (MIT codigo).\n",
|
||||
"- **Mejor textura PBR** → Hunyuan3D-2, SF3D."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9d5dfabe",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Decision de roadmap (propuesta)\n",
|
||||
"\n",
|
||||
"**Fase A — backend Python con TripoSR.** Razones:\n",
|
||||
"\n",
|
||||
"- MIT, sin friccion legal.\n",
|
||||
"- Sub-segundo en GPU, perfecto para iterar UI C++.\n",
|
||||
"- VRAM baja, libera GPU para diffusion 2D paralelo.\n",
|
||||
"- Output OBJ trivial de convertir a GLB con `trimesh`.\n",
|
||||
"\n",
|
||||
"**Fase B — anadir Hunyuan3D-2 (calidad).** Razones:\n",
|
||||
"\n",
|
||||
"- SOTA open en textura.\n",
|
||||
"- Licencia community OK para uso personal.\n",
|
||||
"- Necesita VRAM, pero el RTX lo aguanta.\n",
|
||||
"\n",
|
||||
"**Fase C — anadir Trellis (variedad output).** Razones:\n",
|
||||
"\n",
|
||||
"- MIT codigo (modelo solo research, OK para nosotros).\n",
|
||||
"- Mismo modelo da mesh + 3DGS + NeRF → util para experimentar con gaussians sin re-instalar nada.\n",
|
||||
"- Pesado pero VRAM razonable.\n",
|
||||
"\n",
|
||||
"**Aplazado:**\n",
|
||||
"\n",
|
||||
"- **SF3D** — licencia no comercial nos bloquearia despues.\n",
|
||||
"- **Wonder3D** — AGPL contagia el repo entero.\n",
|
||||
"- **InstantMesh** — buen Apache pero queda eclipsado por Trellis y Hunyuan3D-2."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "79b13fbf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Contrato compartido (propuesta tipo del registry)\n",
|
||||
"\n",
|
||||
"Espejo del `GenerationConfig_py_ml` de diffusion 2D:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"@dataclass\n",
|
||||
"class ImageTo3DConfig:\n",
|
||||
" model_id: str # 'triposr' | 'hunyuan3d-2' | 'trellis'\n",
|
||||
" image_path: str # o bytes\n",
|
||||
" seed: int = 0\n",
|
||||
" foreground_ratio: float = 0.85 # bbox crop antes de inferir\n",
|
||||
" mc_resolution: int = 256 # marching cubes (TripoSR / Trellis)\n",
|
||||
" texture: bool = True # Hunyuan3D-2: skip texture pass si False\n",
|
||||
" output_format: str = 'glb' # 'glb' | 'obj' | 'ply'\n",
|
||||
" output_path: str = '' # vault destino\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Cuando estabilice, se promueve a `python/types/ml/image_to_3d_config.py` + `.md` y se anade `tags: [image-to-3d, imagegen]`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ac91228a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Integracion con app C++\n",
|
||||
"\n",
|
||||
"Arquitectura:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"image_to_3d_studio (C++/ImGui)\n",
|
||||
" POST /generate (model_id, image bytes, config) →\n",
|
||||
" backend FastAPI (uvicorn, 127.0.0.1:8600)\n",
|
||||
" dispatcher(model_id)\n",
|
||||
" → triposr_backend() / triposr / 3.10+\n",
|
||||
" → hunyuan3d_backend() / hunyuan3d-2\n",
|
||||
" → trellis_backend() / TRELLIS\n",
|
||||
" ← bytes GLB (Content-Type: model/gltf-binary)\n",
|
||||
" C++ guarda en local_files/cache/{hash}.glb\n",
|
||||
" Viewer GLB (tinygltf + OpenGL) en panel ImGui via FBO\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Aislamiento: cada modelo en su `.venv` separado dentro de `backend/envs/<model>/` para evitar conflictos de torch/CUDA. El dispatcher arranca el subproceso correcto bajo demanda."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e9949f01",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Verificacion entorno\n",
|
||||
"\n",
|
||||
"Comprobar que torch + CUDA funcionan antes de cargar modelos pesados."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c506c304",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import torch\n",
|
||||
"print(f'torch: {torch.__version__}')\n",
|
||||
"print(f'cuda available: {torch.cuda.is_available()}')\n",
|
||||
"if torch.cuda.is_available():\n",
|
||||
" print(f'device: {torch.cuda.get_device_name(0)}')\n",
|
||||
" print(f'vram total: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB')\n",
|
||||
" print(f'cuda runtime: {torch.version.cuda}')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "518eb741",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Acceso al registry desde el kernel\n",
|
||||
"fn_search('imagegen')[:5]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bcbd7d5d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Pasos siguientes\n",
|
||||
"\n",
|
||||
"- **02_smoke_triposr.ipynb** — clonar TripoSR, generar 1 mesh, medir latencia/VRAM, dump GLB en vault.\n",
|
||||
"- **03_smoke_hunyuan3d.ipynb** — repetir con Hunyuan3D-2.\n",
|
||||
"- **04_smoke_trellis.ipynb** — repetir con Trellis (probar los 3 outputs).\n",
|
||||
"- **05_benchmark.ipynb** — set fijo de 10 imagenes, cruzar metricas (latencia, VRAM peak, mesh stats: #vertices, manifoldness, area, watertight).\n",
|
||||
"- Promover contrato `ImageTo3DConfig` a `python/types/ml/`.\n",
|
||||
"- Refactor backend `image_to_3d_studio/backend/` con dispatcher final."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,304 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c9b49319",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 02 — Smoke TripoSR\n",
|
||||
"\n",
|
||||
"**Objetivo:** validar pipeline single-image-to-3D end-to-end con el modelo mas barato (MIT, sub-segundo, ~6 GB VRAM). Antes de pelearnos con Hunyuan3D-2 y Trellis, confirmar:\n",
|
||||
"\n",
|
||||
"1. Repo upstream clona y compila sin drama.\n",
|
||||
"2. Modelo se descarga (HF hub) y carga en GPU.\n",
|
||||
"3. Inferencia genera mesh valido (watertight, manifold-ish).\n",
|
||||
"4. Latencia + VRAM peak coinciden con lo prometido (~0.5s / ~6 GB).\n",
|
||||
"5. Export GLB usable por el viewer C++.\n",
|
||||
"\n",
|
||||
"**Output:** GLB en `vaults/imagegen_models/image_to_3d/triposr/smoke_<seed>.glb`.\n",
|
||||
"\n",
|
||||
"## Plan\n",
|
||||
"\n",
|
||||
"- Seccion A: setup (clone repo + install deps + descarga checkpoint).\n",
|
||||
"- Seccion B: imagen sample (de cualquier vault o descargada).\n",
|
||||
"- Seccion C: inferencia + medicion (warm-up + 3 runs).\n",
|
||||
"- Seccion D: mesh stats (vertices, faces, watertight, bounds).\n",
|
||||
"- Seccion E: export GLB con trimesh + verificacion."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a943b390",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## A. Setup\n",
|
||||
"\n",
|
||||
"TripoSR vive en `github.com/VAST-AI-Research/TripoSR`. Clonamos a `sources/` (gitignored, fuera del repo del analysis). Deps extra: `rembg` (segmentacion background), `omegaconf`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4e94d4d4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os, sys, subprocess, pathlib\n",
|
||||
"\n",
|
||||
"REGISTRY_ROOT = pathlib.Path(os.environ['FN_REGISTRY_ROOT'])\n",
|
||||
"SOURCES_DIR = REGISTRY_ROOT / 'sources' / 'TripoSR'\n",
|
||||
"VAULT_DIR = pathlib.Path.home() / 'vaults' / 'imagegen_models' / 'image_to_3d' / 'triposr'\n",
|
||||
"VAULT_DIR.mkdir(parents=True, exist_ok=True)\n",
|
||||
"\n",
|
||||
"if not SOURCES_DIR.exists():\n",
|
||||
" SOURCES_DIR.parent.mkdir(parents=True, exist_ok=True)\n",
|
||||
" subprocess.run(['git', 'clone', '--depth=1',\n",
|
||||
" 'https://github.com/VAST-AI-Research/TripoSR.git',\n",
|
||||
" str(SOURCES_DIR)], check=True)\n",
|
||||
"\n",
|
||||
"# Anadir al sys.path para importar el paquete tsr/\n",
|
||||
"if str(SOURCES_DIR) not in sys.path:\n",
|
||||
" sys.path.insert(0, str(SOURCES_DIR))\n",
|
||||
"\n",
|
||||
"print('SOURCES_DIR:', SOURCES_DIR)\n",
|
||||
"print('VAULT_DIR: ', VAULT_DIR)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "66dbea10",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Deps extra del repo. Las instalamos en el .venv del analysis con uv.\n",
|
||||
"# omegaconf, einops, rembg vienen en su requirements.txt. xatlas para UV unwrap (opcional).\n",
|
||||
"import subprocess\n",
|
||||
"subprocess.run(['uv', 'add', 'omegaconf', 'einops', 'rembg[gpu]', 'xatlas', 'onnxruntime-gpu'],\n",
|
||||
" cwd=str(REGISTRY_ROOT / 'projects' / 'imagegen' / 'analysis' / 'spike_image_to_3d'),\n",
|
||||
" check=True)\n",
|
||||
"print('deps OK')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "776fe837",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## B. Imagen sample\n",
|
||||
"\n",
|
||||
"Usamos `examples/chair.png` del propio repo (incluido en el clone)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3ab0f81e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from PIL import Image\n",
|
||||
"sample_path = SOURCES_DIR / 'examples' / 'chair.png'\n",
|
||||
"assert sample_path.exists(), f'sample faltante: {sample_path}'\n",
|
||||
"img = Image.open(sample_path).convert('RGB')\n",
|
||||
"print('imagen:', img.size, img.mode)\n",
|
||||
"img.thumbnail((256, 256))\n",
|
||||
"img\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1aeb1dda",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## C. Inferencia + medicion\n",
|
||||
"\n",
|
||||
"TripoSR API minimo: `TSR.from_pretrained(repo, ...)` → `model(image)` → mesh. El repo guia con `run.py`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "eda8e7d2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import time, torch\n",
|
||||
"from tsr.system import TSR\n",
|
||||
"from tsr.utils import remove_background, resize_foreground\n",
|
||||
"from rembg import new_session\n",
|
||||
"\n",
|
||||
"DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
|
||||
"print('device:', DEVICE)\n",
|
||||
"\n",
|
||||
"# Carga (la primera vez descarga ~1.2 GB desde HF)\n",
|
||||
"t0 = time.perf_counter()\n",
|
||||
"model = TSR.from_pretrained(\n",
|
||||
" 'stabilityai/TripoSR',\n",
|
||||
" config_name='config.yaml',\n",
|
||||
" weight_name='model.ckpt',\n",
|
||||
")\n",
|
||||
"model.renderer.set_chunk_size(8192)\n",
|
||||
"model.to(DEVICE)\n",
|
||||
"print(f'load: {time.perf_counter() - t0:.2f}s')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "34911de7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Pre-procesado: quitar fondo (necesario para TripoSR — espera foreground sobre canvas vacio)\n",
|
||||
"rembg_session = new_session()\n",
|
||||
"raw = Image.open(sample_path)\n",
|
||||
"fg = remove_background(raw, rembg_session)\n",
|
||||
"fg = resize_foreground(fg, 0.85)\n",
|
||||
"fg.thumbnail((384, 384))\n",
|
||||
"fg\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f646c3c5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Warm-up (compilacion CUDA + cache) + 3 runs medidos\n",
|
||||
"torch.cuda.reset_peak_memory_stats()\n",
|
||||
"_ = model([fg], device=DEVICE)\n",
|
||||
"torch.cuda.synchronize()\n",
|
||||
"\n",
|
||||
"runs = []\n",
|
||||
"for i in range(3):\n",
|
||||
" torch.cuda.reset_peak_memory_stats()\n",
|
||||
" t0 = time.perf_counter()\n",
|
||||
" scene_codes = model([fg], device=DEVICE)\n",
|
||||
" torch.cuda.synchronize()\n",
|
||||
" dt = time.perf_counter() - t0\n",
|
||||
" peak = torch.cuda.max_memory_allocated() / 1e9\n",
|
||||
" runs.append({'idx': i, 'inference_s': dt, 'vram_peak_gb': peak})\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"pd.DataFrame(runs)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8150499f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Extraer mesh (marching cubes en la representacion triplane)\n",
|
||||
"t0 = time.perf_counter()\n",
|
||||
"meshes = model.extract_mesh(scene_codes, resolution=256)\n",
|
||||
"dt_mesh = time.perf_counter() - t0\n",
|
||||
"mesh = meshes[0]\n",
|
||||
"print(f'extract_mesh: {dt_mesh:.2f}s')\n",
|
||||
"print(f'vertices: {len(mesh.vertices):,}')\n",
|
||||
"print(f'faces: {len(mesh.faces):,}')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "339bc8f9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## D. Mesh stats\n",
|
||||
"\n",
|
||||
"Pasamos por `trimesh` para validar manifoldness y exportar a GLB con normales."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "daffd271",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import trimesh, numpy as np\n",
|
||||
"tm = trimesh.Trimesh(vertices=np.asarray(mesh.vertices),\n",
|
||||
" faces=np.asarray(mesh.faces),\n",
|
||||
" process=True)\n",
|
||||
"stats = {\n",
|
||||
" 'n_vertices': len(tm.vertices),\n",
|
||||
" 'n_faces': len(tm.faces),\n",
|
||||
" 'watertight': bool(tm.is_watertight),\n",
|
||||
" 'volume': float(tm.volume) if tm.is_watertight else None,\n",
|
||||
" 'area': float(tm.area),\n",
|
||||
" 'bounds_min': tm.bounds[0].tolist(),\n",
|
||||
" 'bounds_max': tm.bounds[1].tolist(),\n",
|
||||
" 'euler': int(tm.euler_number),\n",
|
||||
"}\n",
|
||||
"stats\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a21c430",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## E. Export GLB\n",
|
||||
"\n",
|
||||
"Convencion: `<vault>/triposr/smoke_<seed>.glb`. Verificamos que `trimesh.load` lo lee de vuelta sin warnings."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "78614388",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from datetime import datetime\n",
|
||||
"out_path = VAULT_DIR / f'smoke_{datetime.now():%Y%m%d_%H%M%S}.glb'\n",
|
||||
"tm.export(out_path)\n",
|
||||
"print('saved:', out_path, '-', out_path.stat().st_size, 'bytes')\n",
|
||||
"\n",
|
||||
"# Round-trip verification\n",
|
||||
"roundtrip = trimesh.load(out_path, force='mesh')\n",
|
||||
"print('roundtrip:', len(roundtrip.vertices), 'verts /', len(roundtrip.faces), 'faces')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0b2f2d9f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Conclusion + handoff a backend\n",
|
||||
"\n",
|
||||
"Si las metricas confirman lo prometido (latencia <2s end-to-end, VRAM peak <8 GB, mesh manifold):\n",
|
||||
"\n",
|
||||
"1. Funcion del registry candidata: `triposr_infer_py_ml(image_bytes, seed, mc_resolution) -> bytes (GLB)`. Stub si no quieres atar deps al registry.\n",
|
||||
"2. Backend dispatcher en `apps/image_to_3d_studio/backend/` puede importar directamente desde `sources/TripoSR` (no atado al registry).\n",
|
||||
"3. Cliente C++ envia POST `/generate?model=triposr` con la imagen, recibe bytes GLB y los guarda en `local_files/cache/`.\n",
|
||||
"\n",
|
||||
"Si no cumple (lento / VRAM mayor / mesh roto): debug pre-procesado (rembg buena? foreground bien cropeado?), probar `chunk_size` mas grande, validar checkpoint."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,230 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "aabd4af1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 01 — Panorama single-image-to-3D (2024-2026)\n",
|
||||
"\n",
|
||||
"**Objetivo:** mapear modelos open source serios para generar 3D (mesh / gaussians / NeRF) desde **una sola imagen**. Sirve de base para:\n",
|
||||
"\n",
|
||||
"1. Decidir cual integramos primero en el backend Python de `image_to_3d_studio`.\n",
|
||||
"2. Documentar trade-offs (latencia, VRAM, calidad, licencia) que el bucle reactivo del registry pueda referenciar.\n",
|
||||
"3. Generar contratos / wrappers comunes en `projects/imagegen/`.\n",
|
||||
"\n",
|
||||
"**Hardware target:** WSL2 Linux + CUDA (RTX local).\n",
|
||||
"\n",
|
||||
"**Vault de outputs:** `projects/imagegen/vaults/imagegen_models/` (mismo que diffusion 2D)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5f7f4a7c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Taxonomia\n",
|
||||
"\n",
|
||||
"Los modelos varian en 3 ejes:\n",
|
||||
"\n",
|
||||
"| Eje | Opciones | Notas |\n",
|
||||
"|---|---|---|\n",
|
||||
"| **Representacion 3D** | mesh (GLB/OBJ), 3D gaussians (PLY), NeRF, SDF/voxel, multi-view RGB | Mesh = mas portable. Gaussians = render rapido pero pipeline raro. |\n",
|
||||
"| **Pipeline** | feed-forward 1-pass, multi-view diffusion + reconstruction, optimization per-image | 1-pass = segundos. Multi-view = decenas de segundos. Optim = minutos. |\n",
|
||||
"| **Texturizado** | albedo baked, PBR (albedo+normal+roughness), sin textura | Hunyuan3D-2 y Trellis = textura aparte. TripoSR = baked vertex color basico. |\n",
|
||||
"\n",
|
||||
"**Familias principales:**\n",
|
||||
"\n",
|
||||
"1. **LRM-likes** (feed-forward triplane→mesh): TripoSR, SF3D, OpenLRM, InstantMesh-recon. Rapidos.\n",
|
||||
"2. **Multi-view diffusion + recon**: Zero123++, Wonder3D, SyncDreamer, InstantMesh. Mejor calidad geometrica con vistas multiples consistentes.\n",
|
||||
"3. **Latent 3D structures** (Microsoft Trellis): structured latents (sparse voxels + features) → decoded a mesh/gaussian/NeRF.\n",
|
||||
"4. **3D-native diffusion**: Hunyuan3D-2, Shap-E, Direct3D. Diffusion entrenado directamente en representacion 3D.\n",
|
||||
"5. **3D Gaussian feed-forward**: LGM, GRM, GS-LRM. Bueno para gaussians, conversion a mesh pierde calidad."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6112267f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tabla comparativa\n",
|
||||
"\n",
|
||||
"| Modelo | Autor / año | Repr. | Latencia GPU* | VRAM | Licencia | Repo | Output nativo |\n",
|
||||
"|---|---|---|---|---|---|---|---|\n",
|
||||
"| **TripoSR** | Stability+Tripo 2024-03 | mesh | ~0.5s | ~6 GB | MIT | github.com/VAST-AI-Research/TripoSR | OBJ (vertex color) |\n",
|
||||
"| **SF3D (StableFast3D)** | Stability 2024-08 | mesh PBR | ~0.5s | ~7 GB | SAIL-1.0 (no comercial) | github.com/Stability-AI/stable-fast-3d | GLB (PBR baked) |\n",
|
||||
"| **InstantMesh** | Tencent 2024-04 | mesh | ~10s | ~12 GB | Apache 2.0 | github.com/TencentARC/InstantMesh | OBJ + textura |\n",
|
||||
"| **Wonder3D** | HKU 2024-01 | mesh + normals | ~3min (optim) | ~16 GB | AGPL-3.0 | github.com/xxlong0/Wonder3D | OBJ |\n",
|
||||
"| **Trellis** | Microsoft 2024-12 | mesh / 3DGS / RF | ~30-60s | ~16 GB | MIT (modelo: research-only) | github.com/microsoft/TRELLIS | GLB / PLY |\n",
|
||||
"| **Hunyuan3D-2** | Tencent 2025-01 | mesh + textura | ~25s mesh + ~20s tex | ~12 GB mesh / ~24 GB full | Tencent Community (uso libre <100M MAU) | github.com/Tencent/Hunyuan3D-2 | GLB con PBR |\n",
|
||||
"\n",
|
||||
"*Latencia aproximada en RTX 4090, batch=1, sin optimizaciones extra (FP16 por defecto).\n",
|
||||
"\n",
|
||||
"**Lectura rapida:**\n",
|
||||
"\n",
|
||||
"- **Velocidad** → TripoSR / SF3D (sub-segundo).\n",
|
||||
"- **Calidad mesh** → Hunyuan3D-2 (mejor textura), Trellis (mejor geometria limpia).\n",
|
||||
"- **Mas balance / variedad de outputs** → Trellis (mesh+gaussian+NeRF del mismo modelo).\n",
|
||||
"- **Mas restrictivo legalmente** → SF3D (no comercial), Wonder3D (AGPL contagia).\n",
|
||||
"- **Mas seguro para producto** → TripoSR (MIT), InstantMesh (Apache 2.0), Trellis (MIT codigo).\n",
|
||||
"- **Mejor textura PBR** → Hunyuan3D-2, SF3D."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9d5dfabe",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Decision de roadmap (propuesta)\n",
|
||||
"\n",
|
||||
"**Fase A — backend Python con TripoSR.** Razones:\n",
|
||||
"\n",
|
||||
"- MIT, sin friccion legal.\n",
|
||||
"- Sub-segundo en GPU, perfecto para iterar UI C++.\n",
|
||||
"- VRAM baja, libera GPU para diffusion 2D paralelo.\n",
|
||||
"- Output OBJ trivial de convertir a GLB con `trimesh`.\n",
|
||||
"\n",
|
||||
"**Fase B — anadir Hunyuan3D-2 (calidad).** Razones:\n",
|
||||
"\n",
|
||||
"- SOTA open en textura.\n",
|
||||
"- Licencia community OK para uso personal.\n",
|
||||
"- Necesita VRAM, pero el RTX lo aguanta.\n",
|
||||
"\n",
|
||||
"**Fase C — anadir Trellis (variedad output).** Razones:\n",
|
||||
"\n",
|
||||
"- MIT codigo (modelo solo research, OK para nosotros).\n",
|
||||
"- Mismo modelo da mesh + 3DGS + NeRF → util para experimentar con gaussians sin re-instalar nada.\n",
|
||||
"- Pesado pero VRAM razonable.\n",
|
||||
"\n",
|
||||
"**Aplazado:**\n",
|
||||
"\n",
|
||||
"- **SF3D** — licencia no comercial nos bloquearia despues.\n",
|
||||
"- **Wonder3D** — AGPL contagia el repo entero.\n",
|
||||
"- **InstantMesh** — buen Apache pero queda eclipsado por Trellis y Hunyuan3D-2."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "79b13fbf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Contrato compartido (propuesta tipo del registry)\n",
|
||||
"\n",
|
||||
"Espejo del `GenerationConfig_py_ml` de diffusion 2D:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"@dataclass\n",
|
||||
"class ImageTo3DConfig:\n",
|
||||
" model_id: str # 'triposr' | 'hunyuan3d-2' | 'trellis'\n",
|
||||
" image_path: str # o bytes\n",
|
||||
" seed: int = 0\n",
|
||||
" foreground_ratio: float = 0.85 # bbox crop antes de inferir\n",
|
||||
" mc_resolution: int = 256 # marching cubes (TripoSR / Trellis)\n",
|
||||
" texture: bool = True # Hunyuan3D-2: skip texture pass si False\n",
|
||||
" output_format: str = 'glb' # 'glb' | 'obj' | 'ply'\n",
|
||||
" output_path: str = '' # vault destino\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Cuando estabilice, se promueve a `python/types/ml/image_to_3d_config.py` + `.md` y se anade `tags: [image-to-3d, imagegen]`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ac91228a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Integracion con app C++\n",
|
||||
"\n",
|
||||
"Arquitectura:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"image_to_3d_studio (C++/ImGui)\n",
|
||||
" POST /generate (model_id, image bytes, config) →\n",
|
||||
" backend FastAPI (uvicorn, 127.0.0.1:8600)\n",
|
||||
" dispatcher(model_id)\n",
|
||||
" → triposr_backend() / triposr / 3.10+\n",
|
||||
" → hunyuan3d_backend() / hunyuan3d-2\n",
|
||||
" → trellis_backend() / TRELLIS\n",
|
||||
" ← bytes GLB (Content-Type: model/gltf-binary)\n",
|
||||
" C++ guarda en local_files/cache/{hash}.glb\n",
|
||||
" Viewer GLB (tinygltf + OpenGL) en panel ImGui via FBO\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Aislamiento: cada modelo en su `.venv` separado dentro de `backend/envs/<model>/` para evitar conflictos de torch/CUDA. El dispatcher arranca el subproceso correcto bajo demanda."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e9949f01",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Verificacion entorno\n",
|
||||
"\n",
|
||||
"Comprobar que torch + CUDA funcionan antes de cargar modelos pesados."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c506c304",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import torch\n",
|
||||
"print(f'torch: {torch.__version__}')\n",
|
||||
"print(f'cuda available: {torch.cuda.is_available()}')\n",
|
||||
"if torch.cuda.is_available():\n",
|
||||
" print(f'device: {torch.cuda.get_device_name(0)}')\n",
|
||||
" print(f'vram total: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB')\n",
|
||||
" print(f'cuda runtime: {torch.version.cuda}')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "518eb741",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Acceso al registry desde el kernel\n",
|
||||
"fn_search('imagegen')[:5]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bcbd7d5d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Pasos siguientes\n",
|
||||
"\n",
|
||||
"- **02_smoke_triposr.ipynb** — clonar TripoSR, generar 1 mesh, medir latencia/VRAM, dump GLB en vault.\n",
|
||||
"- **03_smoke_hunyuan3d.ipynb** — repetir con Hunyuan3D-2.\n",
|
||||
"- **04_smoke_trellis.ipynb** — repetir con Trellis (probar los 3 outputs).\n",
|
||||
"- **05_benchmark.ipynb** — set fijo de 10 imagenes, cruzar metricas (latencia, VRAM peak, mesh stats: #vertices, manifoldness, area, watertight).\n",
|
||||
"- Promover contrato `ImageTo3DConfig` a `python/types/ml/`.\n",
|
||||
"- Refactor backend `image_to_3d_studio/backend/` con dispatcher final."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
@@ -0,0 +1,28 @@
|
||||
[project]
|
||||
name = "spike-image-to-3d"
|
||||
version = "0.1.0"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.13"
|
||||
dependencies = [
|
||||
"accelerate>=1.13.0",
|
||||
"diffusers>=0.37.1",
|
||||
"einops>=0.8.2",
|
||||
"huggingface-hub<1",
|
||||
"jupyter>=1.1.1",
|
||||
"jupyter-collaboration>=4.4.0",
|
||||
"jupyter-mcp-server>=1.0.2",
|
||||
"jupyterlab>=4.5.7",
|
||||
"matplotlib>=3.10.9",
|
||||
"numpy>=2.4.6",
|
||||
"omegaconf>=2.3.0",
|
||||
"pandas>=3.0.3",
|
||||
"pillow>=12.2.0",
|
||||
"rembg[cpu]>=2.0.75",
|
||||
"safetensors>=0.7.0",
|
||||
"torch>=2.12.0",
|
||||
"torchvision>=0.27.0",
|
||||
"transformers==4.46.3",
|
||||
"trimesh>=4.12.2",
|
||||
"xatlas>=0.0.11",
|
||||
]
|
||||
Executable
+50
@@ -0,0 +1,50 @@
|
||||
#!/bin/bash
|
||||
# Jupyter Lab — modo colaborativo con autodeteccion de puerto
|
||||
# Generado por write_jupyter_launcher (fn_registry)
|
||||
|
||||
find_free_port() {
|
||||
for port in 8888 8889 8890 8891 8892 8893 8894 8895 8896 8897 8898 8899; do
|
||||
if ! ss -tln 2>/dev/null | grep -q ":${port} " && \
|
||||
! lsof -i:"$port" >/dev/null 2>&1; then
|
||||
echo $port
|
||||
return
|
||||
fi
|
||||
done
|
||||
echo 8888
|
||||
}
|
||||
|
||||
PORT=${1:-$(find_free_port)}
|
||||
cd "$(dirname "$0")"
|
||||
|
||||
echo $PORT > .jupyter-port
|
||||
|
||||
source .venv/bin/activate 2>/dev/null || true
|
||||
|
||||
# IPython startup: cargar .ipython/ local (FN_REGISTRY_ROOT, helpers, sys.path)
|
||||
if [ -d "$(pwd)/.ipython" ]; then
|
||||
export IPYTHONDIR="$(pwd)/.ipython"
|
||||
fi
|
||||
|
||||
if ! python -c "import jupyter_collaboration" 2>/dev/null; then
|
||||
echo "ERROR: jupyter-collaboration no esta instalado"
|
||||
echo "Instala con: uv add jupyter-collaboration"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "════════════════════════════════════════════════"
|
||||
echo " Jupyter Lab + Colaboracion en puerto $PORT"
|
||||
echo "════════════════════════════════════════════════"
|
||||
echo ""
|
||||
echo " Abre: http://localhost:$PORT"
|
||||
echo " Ctrl+C para detener"
|
||||
echo ""
|
||||
|
||||
jupyter lab \
|
||||
--port=$PORT \
|
||||
--no-browser \
|
||||
--ServerApp.token='' \
|
||||
--ServerApp.password='' \
|
||||
--ServerApp.disable_check_xsrf=True \
|
||||
--ServerApp.allow_origin='*' \
|
||||
--ServerApp.root_dir="$(pwd)" \
|
||||
--collaborative
|
||||
Reference in New Issue
Block a user