From 8284afcba57f0e733286704dfac3db8f2257784d Mon Sep 17 00:00:00 2001
From: Egutierrez <egutierrez@dead.dd>
Date: Wed, 13 May 2026 01:22:02 +0200
Subject: [PATCH] feat(ml): auto-commit con 14 cambios

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 dev/issues/0082-compile-sd-cpp-binary.md      |  94 ++++++++++
 .../0083-imagegen-spike02-cross-validation.md |  94 ++++++++++
 dev/issues/0084-imagegen-studio-go-app.md     | 135 ++++++++++++++
 dev/issues/README.md                          |   3 +
 functions/ml/sdcli_generate.go                | 134 ++++++++++++++
 functions/ml/sdcli_generate.md                |  92 ++++++++++
 functions/ml/sdcli_resolve_binary.go          |  88 +++++++++
 functions/ml/sdcli_resolve_binary.md          |  55 ++++++
 functions/ml/sdcli_test.go                    | 114 ++++++++++++
 python/functions/ml/sdcpp_python_generate.md  |  76 ++++++++
 python/functions/ml/sdcpp_python_generate.py  | 103 +++++++++++
 python/functions/ml/sdcpp_python_load.md      |  61 +++++++
 python/functions/ml/sdcpp_python_load.py      |  86 +++++++++
 .../ml/tests/test_sdcpp_python_backend.py     | 167 ++++++++++++++++++
 14 files changed, 1302 insertions(+)
 create mode 100644 dev/issues/0082-compile-sd-cpp-binary.md
 create mode 100644 dev/issues/0083-imagegen-spike02-cross-validation.md
 create mode 100644 dev/issues/0084-imagegen-studio-go-app.md
 create mode 100644 functions/ml/sdcli_generate.go
 create mode 100644 functions/ml/sdcli_generate.md
 create mode 100644 functions/ml/sdcli_resolve_binary.go
 create mode 100644 functions/ml/sdcli_resolve_binary.md
 create mode 100644 functions/ml/sdcli_test.go
 create mode 100644 python/functions/ml/sdcpp_python_generate.md
 create mode 100644 python/functions/ml/sdcpp_python_generate.py
 create mode 100644 python/functions/ml/sdcpp_python_load.md
 create mode 100644 python/functions/ml/sdcpp_python_load.py
 create mode 100644 python/functions/ml/tests/test_sdcpp_python_backend.py

diff --git a/dev/issues/0082-compile-sd-cpp-binary.md b/dev/issues/0082-compile-sd-cpp-binary.md
new file mode 100644
index 00000000..1cb382d4
--- /dev/null
+++ b/dev/issues/0082-compile-sd-cpp-binary.md
@@ -0,0 +1,94 @@
+---
+id: 0082
+title: Compilar binario `sd` (stable-diffusion.cpp) para sdcli_generate_go_ml
+status: pendiente
+priority: media
+created: 2026-05-13
+type: feature
+related_components: [functions/ml/sdcli_generate.go, functions/ml/sdcli_resolve_binary.go, projects/imagegen]
+---
+
+## Objetivo
+
+Compilar el binario `sd` de [leejet/stable-diffusion.cpp](https://github.com/leejet/stable-diffusion.cpp)
+con backend CUDA en este host (WSL2 + RTX 3070) e instalarlo en `$PATH`. Habilita
+los tests reales de `sdcli_generate_go_ml` y el wrapper Go subprocess (Ola 3.C ya
+construido pero con tests en `skip` por falta de binario).
+
+## Contexto
+
+- Funcion Go `sdcli_resolve_binary_go_ml` busca `sd` o `sd-cli` en `$PATH`.
+- `sdcli_generate_go_ml` orquesta args via `genconfig_to_sdcli_args_go_ml`, lanza
+  subproceso con `subprocess_stream_go_core`, parsea progreso con
+  `sdcli_parse_progress_go_ml`, lee PNG de salida.
+- Tests `TestSdcliResolveBinary_NotFound`, `..._Hint` pasan; `TestSdcliGenerate_RequiresBinary`
+  hace `t.Skip()` porque `sd` no existe en `$PATH`.
+- Backend `sdcpp_python_load_py_ml` ya validado con SD Turbo (CPU, 27s/imagen).
+  El binario Go nativo deberia ser comparable o mejor con CUDA.
+
+## Arquitectura
+
+Archivos NUEVOS sugeridos:
+
+- `bash/functions/infra/build_sd_cpp.sh` + `.md` — funcion del registry que clona y
+  compila stable-diffusion.cpp con flags configurables (`-DSD_CUDA=ON`, `-DSD_FLASH_ATTN=ON`,
+  `-DSD_FAST_SOFTMAX=ON`). Idempotente.
+- `bash/functions/infra/install_sd_cpp_bin.sh` + `.md` — copia el binario compilado
+  a `~/.local/bin/sd` o equivalente en `$PATH`.
+
+NO modificar:
+- `functions/ml/sdcli_*.go` — su contrato no cambia, solo se desbloquea el path feliz.
+
+## Tareas
+
+1. Compilacion
+   1.1. Clonar `https://github.com/leejet/stable-diffusion.cpp` en `sources/stable-diffusion.cpp/`.
+   1.2. Verificar requisitos: `cmake >= 3.18`, `gcc`, CUDA toolkit (instalable con
+        `cuda_toolkit_check_bash_infra`). Si CUDA toolkit falta, instalarlo o
+        documentar pasos manuales.
+   1.3. Crear `bash/functions/infra/build_sd_cpp.sh` que:
+        - Acepta flag `--backend cuda|cpu|vulkan`
+        - cmake -B build -DSD_CUDA=ON (segun flag)
+        - cmake --build build -j
+        - Verifica que `build/bin/sd` o `build/sd` existe.
+   1.4. Crear `bash/functions/infra/install_sd_cpp_bin.sh` que copia `sd` a
+        `~/.local/bin/` y verifica `command -v sd`.
+
+2. Smoke test
+   2.1. Ejecutar `sd --version` desde Go: `SdcliResolveBinary("")` debe encontrarlo.
+   2.2. Generar 1 imagen con SD Turbo `.safetensors` y comparar tiempo vs
+        `sdcpp_python` (esperado: similar o mejor con CUDA).
+
+3. Indexar
+   3.1. `./fn index` y verificar 2 funciones nuevas.
+
+4. Cleanup
+   4.1. Re-run `CGO_ENABLED=1 go test -tags fts5 -run TestSdcliGenerate ./functions/ml/`
+        — `TestSdcliGenerate_RequiresBinary` debe pasar sin skip.
+
+## Ejemplo de uso
+
+```bash
+fn run build_sd_cpp --backend cuda
+fn run install_sd_cpp_bin
+sd --help                    # ya en PATH
+./fn doctor ml               # sd_cli debe pasar a "ok"
+```
+
+## Decisiones
+
+- **Compilar en `sources/`** (gitignored) — no commitear binario.
+- **Instalar en `~/.local/bin/`** — sin sudo, en `$PATH` por defecto en shells.
+- **Backend CUDA preferido** — esta maquina tiene RTX 3070 (8GB). CPU es fallback.
+
+## Prerequisitos
+
+- Issues 3.B/3.C completados (sdcpp_python + sdcli go scaffolding).
+- Modelo SD Turbo en vault (ya esta).
+
+## Riesgos
+
+- CUDA toolkit no instalado: `nvcc` ausente segun `fn doctor ml`. Mitigacion:
+  fallback CPU (`-DSD_CUDA=OFF`) o instalar toolkit primero.
+- API rota entre versiones de `sd`: pinear release concreto (tag git) en el script.
+- Binario grande (~200MB con CUDA libs estaticas): vale, sources/ esta gitignored.
diff --git a/dev/issues/0083-imagegen-spike02-cross-validation.md b/dev/issues/0083-imagegen-spike02-cross-validation.md
new file mode 100644
index 00000000..2ff0e6b9
--- /dev/null
+++ b/dev/issues/0083-imagegen-spike02-cross-validation.md
@@ -0,0 +1,94 @@
+---
+id: 0083
+title: imagegen — notebook 02 validacion cruzada diffusers vs sdcpp_python
+status: pendiente
+priority: alta
+created: 2026-05-13
+type: feature
+related_components: [projects/imagegen/analysis/spike_diffusers_vs_sdcpp]
+---
+
+## Objetivo
+
+Notebook `02_cross_validation.ipynb` que ejecuta los mismos `GenerationConfig` con
+los dos backends operativos (diffusers GPU + sdcpp_python CPU) sobre SD Turbo,
+genera grid lado-a-lado con `image_compare_side_by_side_py_ml` y decide
+cuales configs portan bien entre backends y cuales requieren ajuste.
+
+## Contexto
+
+- Backend diffusers GPU operativo (Ola 3.A) — 192ms/imagen warm, VRAM 3097MB.
+- Backend sdcpp_python CPU operativo (Ola 3.B) — 27s/imagen 512x512 4 steps.
+- Funcion `image_compare_side_by_side_py_ml` lista (Ola 3.D) con grid + diff
+  perceptual + pHash + pixel MSE.
+- Documento `Stack de generacion de imagenes` (raiz proyecto imagegen) dice:
+  "Bit-exact entre backends es imposible. Aceptamos diff perceptual."
+
+## Arquitectura
+
+Archivos NUEVOS:
+- `projects/imagegen/analysis/spike_diffusers_vs_sdcpp/notebooks/02_cross_validation.ipynb`
+- `~/vaults/imagegen_models/configs/cross_validated/*.json` — configs que pasan
+- `~/vaults/imagegen_models/outputs/cross/*.png` — grids A | diff | B
+
+NO se crean funciones nuevas — todo se compone de funciones existentes del registry.
+
+## Tareas
+
+1. Plan del notebook (declarar al usuario antes de escribirlo)
+   1.1. Titulo, objetivo, criterio PASS (pHash distance < umbral X, pixel_mse < Y)
+   1.2. Lista de celdas y output esperado por celda
+
+2. Notebook
+   2.1. Celda hardware check + GPU info (reuse).
+   2.2. Definir 4 configs base (seeds 42, 123, 7, 999), SD Turbo, 1-step euler_a, 512x512.
+   2.3. Loop config: generar A=diffusers, B=sdcpp_python.
+   2.4. `image_compare_side_by_side(A, B, label_a="diffusers", label_b="sdcpp")` por par.
+   2.5. Tabla resumen: pHash distance, pixel MSE, duration_a, duration_b.
+   2.6. Veredicto por config: PASS si pHash<=N (a calibrar), FAIL si no.
+   2.7. Guardar grids comparativos a vault.
+
+3. Ejecutar el notebook desde la sesion claude
+   3.1. Lanzar Jupyter si no esta arriba.
+   3.2. Ejecutar celdas 1..N via `jupyter_exec_py_notebook cell`.
+   3.3. Reportar veredicto por config.
+
+4. Conclusion
+   4.1. Si todos los configs PASS → contrato `GenerationConfig` es portable.
+   4.2. Si alguno FAIL → documentar campo problematico (sampler, cfg_scale, ...)
+        y abrir proposal de ajuste.
+
+## Ejemplo de uso
+
+Output esperado al final:
+
+```
+seed=42   pHash_dist=12  pixel_mse=812.4  diffusers=189ms  sdcpp=26200ms  PASS
+seed=123  pHash_dist=14  pixel_mse=901.0  diffusers=192ms  sdcpp=27100ms  PASS
+seed=7    pHash_dist=11  pixel_mse=750.8  diffusers=187ms  sdcpp=26800ms  PASS
+seed=999  pHash_dist=18  pixel_mse=1102.3 diffusers=194ms  sdcpp=27500ms  PASS
+VEREDICTO GLOBAL: PASS (contrato portable)
+```
+
+## Decisiones
+
+- **Umbrales pHash y MSE** se calibran en este notebook — no hay valor previo.
+  Empezar con pHash<=20 (bastante permisivo), pixel_mse<=2000.
+- **`imagehash` puede no estar instalado** en el venv — `pip install imagehash`
+  como primera celda si falta.
+- **Solo SD Turbo en este notebook** — modelos mayores (SDXL Turbo, FLUX) iran en
+  notebooks separados cuando se descarguen.
+
+## Prerequisitos
+
+- Backends 3.A + 3.B operativos (hechos).
+- SD Turbo en vault (hecho).
+- Jupyter del analysis levantado (script `run-jupyter-lab.sh`).
+
+## Riesgos
+
+- sdcpp_python tarda ~27s por imagen en CPU → 4 imagenes x 2 backends = ~2 min
+  de espera real, aceptable.
+- pHash de imagehash requiere instalar el paquete — documentar en cell 0.
+- Si el sampler de sd.cpp difiere demasiado del de diffusers (ej. trailing
+  timesteps de SD Turbo), la diff sera grande aunque la implementacion sea OK.
diff --git a/dev/issues/0084-imagegen-studio-go-app.md b/dev/issues/0084-imagegen-studio-go-app.md
new file mode 100644
index 00000000..fd1a8314
--- /dev/null
+++ b/dev/issues/0084-imagegen-studio-go-app.md
@@ -0,0 +1,135 @@
+---
+id: 0084
+title: imagegen_studio — app Go binario producto (Fase 3 plan stack)
+status: pendiente
+priority: media
+created: 2026-05-13
+type: feature
+related_components: [functions/ml/sdcli_*.go, functions/ml/generation_config_go_ml, projects/imagegen]
+---
+
+## Objetivo
+
+App Go autocontenida `imagegen_studio` que orquesta `sd-cli` para generar imagenes
+sin Python en runtime. Encarna la Fase 3 del documento del stack: binario
+distribuible, `GenerationConfig` Go nativo, subprocess streaming con progreso.
+
+## Contexto
+
+- Toda la capa Go del contrato esta lista: tipos `GenerationConfig_go_ml`,
+  `ModelRef_go_ml`, `LoraRef_go_ml`, `ImageGenResult_go_ml`, interface
+  `ImageGenerator_go_ml`.
+- Funciones Go ya construidas: `sdcli_resolve_binary_go_ml`, `sdcli_generate_go_ml`,
+  `subprocess_stream_go_core`, `genconfig_to_sdcli_args_go_ml`,
+  `genconfig_json_marshal_go_ml`, `sdcli_parse_progress_go_ml`, `get_gpu_info_go_infra`.
+- Plan del documento: producto Go con subprocess gestionando `sd-cli`,
+  binario embebido via `go:embed` o descarga al primer arranque.
+
+## Arquitectura
+
+Path: `projects/imagegen/apps/imagegen_studio/`
+
+```
+imagegen_studio/
+  main.go               # CLI args / TUI entry / HTTP API
+  studio.go             # ImageGenerator wrapper sobre sdcli_generate
+  app.md                # frontmatter del registry
+  CMakeLists.txt        # NO (es Go, usa go.mod)
+  go.mod
+  README.md
+  embed/                # opcional: sd binary embebido
+```
+
+**Pure core / impure shell:**
+- `pkg/`: validacion `GenerationConfig`, serializacion JSON, formato outputs (paths derivados).
+- `shell/` o `studio.go`: invocacion `SdcliGenerate`, IO disco, manejo subproceso.
+
+Tres modos de uso:
+
+1. **CLI**: `imagegen_studio generate --prompt "..." --seed 42 --out out.png`
+2. **HTTP API**: `imagegen_studio serve --port 8088` → POST /generate {GenerationConfig JSON}
+3. **TUI (opcional, Bubble Tea)**: forma interactiva, preview, queue
+
+Empezar por CLI; HTTP API y TUI iterativos.
+
+## Tareas
+
+1. Scaffolding
+   1.1. `fn run init_go_app --project imagegen imagegen_studio` (si existe pipeline)
+        o crear estructura manual.
+   1.2. `app.md` con `framework: cli`, `tags: [ml, imagegen, service?]`,
+        `uses_functions:` lista de las 7 funciones Go citadas.
+   1.3. `go.mod` con dependencias minimas (registry imports + cobra opcional).
+
+2. CLI minima (Fase A)
+   2.1. Subcomando `generate`: flags --prompt/--negative/--seed/--steps/--cfg/--sampler/
+        --width/--height/--model/--out
+   2.2. Construir `GenerationConfig`, llamar `SdcliResolveBinary("")`, `SdcliGenerate(...)`.
+   2.3. Stream progreso a stderr (callback `SdcliProgressCallback`).
+   2.4. Salida final: imprime path de la imagen + duration_ms + JSON meta.
+
+3. JSON I/O (Fase B)
+   3.1. Subcomando `generate-from-json --config path/cfg.json --out out.png`.
+   3.2. Permite pegar configs validados de la fase 2 (notebook cross-validation).
+
+4. HTTP server (Fase C, feature flag)
+   4.1. `imagegen_studio serve --port 8088`.
+   4.2. POST `/generate` body = GenerationConfig JSON → respuesta multipart PNG + meta.
+   4.3. GET `/health` → 200 + version + GpuInfo.
+   4.4. Feature flag `imagegen-studio-server` para esconder cuando no compila/no testeado.
+
+5. e2e_checks
+   5.1. Anadir bloque `e2e_checks` en `app.md`:
+        - `build`: go build con CGO_ENABLED=0
+        - `cli_help`: ./imagegen_studio --help, contiene "generate"
+        - `smoke`: si `sd` binario en $PATH + SD Turbo en vault, generar 1 imagen
+          a /tmp/, verificar PNG valido. Si no: SKIP (warning).
+
+6. Tests
+   6.1. Tests unitarios sobre helpers puros (path derivation, JSON marshaling).
+   6.2. Test integracion en e2e_checks (smoke).
+
+## Ejemplo de uso
+
+```bash
+# CLI directo
+imagegen_studio generate \
+  --model /home/lucas/vaults/imagegen_models/diffusers/sd-turbo/sd_turbo.safetensors \
+  --prompt "a red apple on a wooden table" \
+  --seed 42 --steps 1 --cfg-scale 0.0 --sampler euler_a \
+  --width 512 --height 512 \
+  --out /tmp/apple.png
+
+# Desde config JSON validado en spike notebook 02
+imagegen_studio generate-from-json \
+  --config ~/vaults/imagegen_models/configs/spike01_seed42_*.json \
+  --out /tmp/seed42.png
+
+# HTTP API (feature flag activado)
+imagegen_studio serve --port 8088 &
+curl -X POST -H "Content-Type: application/json" \
+  -d @config.json http://localhost:8088/generate -o out.png
+```
+
+## Decisiones
+
+- **Subprocess via SdcliGenerate** — no cgo ni bindings. Mas robusto, mas lento al
+  arrancar (~200ms cold start), pero overhead irrelevante frente a 1-30s generacion.
+- **NO `go:embed` del binario `sd` en Fase A** — el binario depende de la GPU del
+  usuario (CUDA/CPU/Vulkan). Documentar requisito: tener `sd` en $PATH (issue 0082).
+- **Feature flag para HTTP API** — Fase A es CLI, no romper master con server a medias.
+
+## Prerequisitos
+
+- Issue 0082 (binario `sd` compilado en $PATH) — sin esto el smoke falla pero la
+  app compila. Se puede arrancar el scaffolding antes.
+- Funciones Go de Ola 3.C (hechas).
+
+## Riesgos
+
+- `sd-cli` no soporta SD Turbo cleanly con 1-step euler_a → puede requerir 4-step
+  como minimo. Validar en issue 0082.
+- Distribucion sin binario `sd` empotrado obliga al usuario a instalarlo. Aceptable
+  para Fase A; reevaluar `go:embed` con build-per-backend en Fase C+.
+- Si Fase 0 (spike) del documento revela calidad insuficiente: replantear stack y
+  pausar este issue.
diff --git a/dev/issues/README.md b/dev/issues/README.md
index 14013688..f740401e 100644
--- a/dev/issues/README.md
+++ b/dev/issues/README.md
@@ -104,3 +104,6 @@
 | [0072j](0072j-gamedev-physics-box2d.md) | gamedev — physics 2D (Box2D integration) | pendiente | media | feature | parte de 0072, depende 0072b |
 | [0072k](0072k-gamedev-demo-platformer.md) | gamedev — demo plataformero `engine_demo` (referencia stack completo) | pendiente | alta | feature | parte de 0072, depende 0072b/c/d/j |
 | [0072l](0072l-gamedev-scripting-optional.md) | gamedev — scripting opcional (wren / lua / hot reload) | diferido | baja | feature | parte de 0072 |
+| [0082](0082-compile-sd-cpp-binary.md) | Compilar binario `sd` (stable-diffusion.cpp) para sdcli_generate_go_ml | pendiente | media | feature | desbloquea 0084 |
+| [0083](0083-imagegen-spike02-cross-validation.md) | imagegen — notebook 02 validacion cruzada diffusers vs sdcpp_python | pendiente | alta | feature | — |
+| [0084](0084-imagegen-studio-go-app.md) | imagegen_studio — app Go binario producto (Fase 3 plan stack) | pendiente | media | feature | 0082 |
diff --git a/functions/ml/sdcli_generate.go b/functions/ml/sdcli_generate.go
new file mode 100644
index 00000000..d2fbf466
--- /dev/null
+++ b/functions/ml/sdcli_generate.go
@@ -0,0 +1,134 @@
+package ml
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"strconv"
+	"strings"
+	"time"
+
+	"fn-registry/functions/core"
+)
+
+// SdcliProgressCallback es una funcion llamada cada vez que se parsea una linea
+// de progreso del proceso sd. Puede ser nil.
+type SdcliProgressCallback func(p SdcliProgress)
+
+// SdcliGenerate ejecuta el binario sd para generar una imagen y escribe el
+// resultado en outPath.
+//
+// Flujo:
+//  1. Construye args con GenconfigToSdcliArgs(cfg) + ["-o", outPath].
+//  2. Lanza el proceso via SubprocessStream.
+//  3. Goroutine interna lee eventos: lineas stderr se pasan a SdcliParseProgress;
+//     si onProgress != nil y hay progreso reconocible, llama onProgress(p).
+//  4. Espera el resultado. ExitCode != 0 => error con las ultimas 10 lineas de stderr.
+//  5. Lee outPath y retorna ImageGenResult con bytes, meta y duration_ms.
+//
+// ctx controla el timeout y cancelacion: se pasa directamente a SubprocessStream,
+// que maneja SIGTERM -> grace 2s -> SIGKILL.
+func SdcliGenerate(
+	ctx context.Context,
+	bin SdcliBinary,
+	cfg GenerationConfig,
+	outPath string,
+	onProgress SdcliProgressCallback,
+) (ImageGenResult, error) {
+	start := time.Now()
+
+	args := GenconfigToSdcliArgs(cfg)
+	args = append(args, "-o", outPath)
+
+	events, results := core.SubprocessStream(ctx, bin.Path, args, nil, nil)
+
+	// Consumir eventos en goroutine: parsear progreso y acumular stderr para
+	// mensajes de error utiles.
+	type collectedStderr struct {
+		lines []string
+	}
+	stderrCh := make(chan collectedStderr, 1)
+
+	go func() {
+		var stderrLines []string
+		for ev := range events {
+			if ev.Stream == "stderr" {
+				stderrLines = append(stderrLines, ev.Line)
+				if onProgress != nil {
+					if p, ok := SdcliParseProgress(ev.Line); ok {
+						onProgress(p)
+					}
+				}
+			}
+		}
+		stderrCh <- collectedStderr{lines: stderrLines}
+	}()
+
+	res := <-results
+	collected := <-stderrCh
+
+	if res.Err != nil {
+		return ImageGenResult{}, fmt.Errorf("sdcli subprocess: %w", res.Err)
+	}
+
+	if res.ExitCode != 0 {
+		tail := stderrTail(collected.lines, 10)
+		return ImageGenResult{}, fmt.Errorf(
+			"sdcli exited with code %d:\n%s",
+			res.ExitCode, tail,
+		)
+	}
+
+	imageBytes, err := os.ReadFile(outPath)
+	if err != nil {
+		return ImageGenResult{}, fmt.Errorf("sdcli: reading output image %q: %w", outPath, err)
+	}
+
+	durationMs := time.Since(start).Milliseconds()
+
+	meta := map[string]any{
+		"backend":     "sdcli",
+		"binary_path": bin.Path,
+		"model":       cfg.Model.Name,
+		"seed":        cfg.Seed,
+		"steps":       cfg.Steps,
+		"cfg_scale":   cfg.CfgScale,
+		"sampler":     cfg.Sampler,
+		"width":       cfg.Width,
+		"height":      cfg.Height,
+	}
+	if bin.Version != "" {
+		meta["version"] = bin.Version
+	}
+	if cfg.Model.Path != "" {
+		meta["model_path"] = cfg.Model.Path
+	}
+	if cfg.Model.ModelType != "" {
+		meta["model_type"] = cfg.Model.ModelType
+	}
+	if cfg.Model.Quantization != "" {
+		meta["quantization"] = cfg.Model.Quantization
+	}
+	if len(cfg.Loras) > 0 {
+		loras := make([]string, len(cfg.Loras))
+		for i, l := range cfg.Loras {
+			loras[i] = l.Path + ":" + strconv.FormatFloat(l.Weight, 'f', -1, 64)
+		}
+		meta["loras"] = strings.Join(loras, ",")
+	}
+
+	return ImageGenResult{
+		ImageBytes: imageBytes,
+		Format:     "png",
+		Meta:       meta,
+		DurationMs: durationMs,
+	}, nil
+}
+
+// stderrTail retorna las ultimas n lineas de lines, unidas con newline.
+func stderrTail(lines []string, n int) string {
+	if len(lines) <= n {
+		return strings.Join(lines, "\n")
+	}
+	return strings.Join(lines[len(lines)-n:], "\n")
+}
diff --git a/functions/ml/sdcli_generate.md b/functions/ml/sdcli_generate.md
new file mode 100644
index 00000000..895452c6
--- /dev/null
+++ b/functions/ml/sdcli_generate.md
@@ -0,0 +1,92 @@
+---
+name: sdcli_generate
+kind: function
+lang: go
+domain: ml
+version: "1.0.0"
+purity: impure
+signature: "func SdcliGenerate(ctx context.Context, bin SdcliBinary, cfg GenerationConfig, outPath string, onProgress SdcliProgressCallback) (ImageGenResult, error)"
+description: "Ejecuta el binario sd de stable-diffusion.cpp para generar una imagen. Construye los args CLI via GenconfigToSdcliArgs, lanza el proceso via SubprocessStream, parsea el progreso de stderr en tiempo real via SdcliParseProgress, y retorna ImageGenResult con los bytes PNG, metadata y duration_ms."
+tags: [ml, sdcli, stablediffusion, imagegen, subprocess, inference, cpp]
+uses_functions:
+  - subprocess_stream_go_core
+  - genconfig_to_sdcli_args_go_ml
+  - sdcli_parse_progress_go_ml
+uses_types:
+  - generation_config_go_ml
+  - image_gen_result_go_ml
+returns: []
+returns_optional: false
+error_type: "error_go_core"
+imports: ["context", "fmt", "os", "strconv", "strings", "time", "fn-registry/functions/core"]
+params:
+  - name: ctx
+    desc: "Context para cancelacion y timeout. Se pasa a SubprocessStream que gestiona SIGTERM -> grace 2s -> SIGKILL."
+  - name: bin
+    desc: "Binario sd resuelto via SdcliResolveBinary. Contiene path absoluto y version."
+  - name: cfg
+    desc: "Parametros de generacion: prompt, seed, steps, sampler, model, loras, etc."
+  - name: outPath
+    desc: "Path donde sd escribe la imagen PNG generada. El archivo se lee y se incluye en ImageGenResult.ImageBytes."
+  - name: onProgress
+    desc: "Callback opcional llamado con cada SdcliProgress parseado de stderr. Nil es valido."
+output: "ImageGenResult con ImageBytes (bytes del PNG), Format='png', Meta (backend, binary_path, model, seed, steps, etc.) y DurationMs medido desde el inicio de la llamada."
+tested: true
+tests:
+  - "integration test skipped when sd binary not in PATH"
+test_file_path: "functions/ml/sdcli_test.go"
+file_path: "functions/ml/sdcli_generate.go"
+---
+
+## Ejemplo
+
+```go
+ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
+defer cancel()
+
+bin, err := SdcliResolveBinary("")
+if err != nil {
+    log.Fatal(err)
+}
+
+cfg := GenerationConfig{
+    Prompt:   "a red apple on a wooden table",
+    Seed:     42,
+    Steps:    20,
+    CfgScale: 7.0,
+    Sampler:  "euler_a",
+    Width:    512,
+    Height:   512,
+    Model: ModelRef{
+        Name:         "sd15",
+        ModelType:    "sd15",
+        Quantization: "fp16",
+        Path:         "/path/to/model.safetensors",
+    },
+}
+
+result, err := SdcliGenerate(ctx, bin, cfg, "/tmp/out.png", func(p SdcliProgress) {
+    fmt.Printf("step %d/%d (%.1f%%)\n", p.Step, p.TotalSteps, p.Percent)
+})
+if err != nil {
+    log.Fatal(err)
+}
+fmt.Printf("generated %d bytes in %dms\n", len(result.ImageBytes), result.DurationMs)
+```
+
+## Notas
+
+El binario `sd` de stable-diffusion.cpp escribe la imagen directamente en disco
+(`-o outPath`). La funcion lee el archivo tras la finalizacion del proceso y
+carga los bytes en `ImageGenResult.ImageBytes`.
+
+Si el proceso termina con `ExitCode != 0`, el error incluye las ultimas 10 lineas
+de stderr para facilitar el diagnostico.
+
+El callback `onProgress` se llama desde la goroutine de lectura de eventos.
+Si el callback hace I/O o es lento, considera usar un canal con buffer para
+desacoplar.
+
+Para modelos SD Turbo / SDXL Turbo con `steps <= 4` y `cfg_scale = 1.0`, el
+sampler `euler_a` es el recomendado. Para SD 1.5 estandar usar `euler` o
+`dpm++2m` con `steps >= 20`.
diff --git a/functions/ml/sdcli_resolve_binary.go b/functions/ml/sdcli_resolve_binary.go
new file mode 100644
index 00000000..7058e855
--- /dev/null
+++ b/functions/ml/sdcli_resolve_binary.go
@@ -0,0 +1,88 @@
+package ml
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"os/exec"
+	"strings"
+	"time"
+)
+
+// SdcliBinary describe el binario sd resuelto: su path absoluto, version detectada
+// y como fue localizado.
+type SdcliBinary struct {
+	Path    string `json:"path"`
+	Version string `json:"version,omitempty"`
+	Source  string `json:"source"` // "config" | "path"
+}
+
+// SdcliResolveBinary localiza el binario sd / sd-cli de stable-diffusion.cpp.
+//
+// Orden de busqueda:
+//  1. Si hint != "" y el archivo existe y es ejecutable: usar como "config".
+//  2. exec.LookPath("sd"): primer candidato en PATH.
+//  3. exec.LookPath("sd-cli"): segundo candidato en PATH.
+//  4. Error descriptivo si no se encuentra ninguno.
+//
+// Tras localizar el binario, intenta obtener la version ejecutando
+// `<bin> --version` con timeout de 3 segundos. Si el comando falla
+// o no produce output, Version queda vacia (no es error fatal).
+func SdcliResolveBinary(hint string) (SdcliBinary, error) {
+	var binPath string
+	var source string
+
+	if hint != "" {
+		info, err := os.Stat(hint)
+		if err != nil {
+			return SdcliBinary{}, fmt.Errorf("sdcli hint %q: %w", hint, err)
+		}
+		if info.Mode()&0o111 == 0 {
+			return SdcliBinary{}, fmt.Errorf("sdcli hint %q: file exists but is not executable", hint)
+		}
+		binPath = hint
+		source = "config"
+	}
+
+	if binPath == "" {
+		if p, err := exec.LookPath("sd"); err == nil {
+			binPath = p
+			source = "path"
+		}
+	}
+
+	if binPath == "" {
+		if p, err := exec.LookPath("sd-cli"); err == nil {
+			binPath = p
+			source = "path"
+		}
+	}
+
+	if binPath == "" {
+		return SdcliBinary{}, fmt.Errorf(
+			"sd binary not found in PATH (hint: install from leejet/stable-diffusion.cpp)",
+		)
+	}
+
+	version := sdcliProbeVersion(binPath)
+	return SdcliBinary{
+		Path:    binPath,
+		Version: version,
+		Source:  source,
+	}, nil
+}
+
+// sdcliProbeVersion ejecuta `<bin> --version` con timeout 3s y retorna
+// la primera linea de la salida. Retorna "" si el comando falla o no
+// produce output; no propaga el error (version es best-effort).
+func sdcliProbeVersion(binPath string) string {
+	ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
+	defer cancel()
+
+	out, err := exec.CommandContext(ctx, binPath, "--version").Output()
+	if err != nil {
+		return ""
+	}
+	line := strings.TrimSpace(strings.SplitN(string(out), "\n", 2)[0])
+	return line
+}
diff --git a/functions/ml/sdcli_resolve_binary.md b/functions/ml/sdcli_resolve_binary.md
new file mode 100644
index 00000000..4d73fa60
--- /dev/null
+++ b/functions/ml/sdcli_resolve_binary.md
@@ -0,0 +1,55 @@
+---
+name: sdcli_resolve_binary
+kind: function
+lang: go
+domain: ml
+version: "1.0.0"
+purity: impure
+signature: "func SdcliResolveBinary(hint string) (SdcliBinary, error)"
+description: "Localiza el binario sd / sd-cli de stable-diffusion.cpp. Busca en orden: hint explicito, LookPath('sd'), LookPath('sd-cli'). Detecta la version ejecutando --version con timeout 3s. Retorna SdcliBinary con path, version y fuente de resolucion."
+tags: [ml, sdcli, stablediffusion, binary, subprocess, imagegen]
+uses_functions: []
+uses_types: []
+returns: []
+returns_optional: false
+error_type: "error_go_core"
+imports: ["context", "fmt", "os", "os/exec", "strings", "time"]
+params:
+  - name: hint
+    desc: "Path explicito al binario sd. Si es string vacio se busca en PATH. Si no es vacio debe existir y ser ejecutable."
+output: "SdcliBinary con Path absoluto, Version detectada (puede ser vacia si --version falla) y Source ('config' si viene de hint, 'path' si viene de LookPath)."
+tested: true
+tests:
+  - "missing binary returns error when PATH empty"
+  - "hint path resolves to config source"
+test_file_path: "functions/ml/sdcli_test.go"
+file_path: "functions/ml/sdcli_resolve_binary.go"
+---
+
+## Ejemplo
+
+```go
+// Buscar automaticamente en PATH
+bin, err := SdcliResolveBinary("")
+if err != nil {
+    log.Fatal(err)
+}
+fmt.Printf("sd found at %s (version: %s)\n", bin.Path, bin.Version)
+
+// Hint explicito (ej. desde config de usuario)
+bin, err = SdcliResolveBinary("/opt/stable-diffusion/sd")
+```
+
+## Notas
+
+`SdcliBinary` es el token de resolucion que se pasa a `SdcliGenerate`. Separar
+la resolucion de la ejecucion permite validar el binario al arrancar la app sin
+lanzar una generacion.
+
+La deteccion de version es best-effort: si `sd --version` no existe o falla,
+`Version` queda vacia y no se propaga error. Algunos builds de stable-diffusion.cpp
+no implementan `--version`; en ese caso `Version == ""` es el comportamiento
+esperado.
+
+`Source` distingue binarios configurados explicitamente (`"config"`) de los
+encontrados en PATH (`"path"`), util para logging y diagnostico.
diff --git a/functions/ml/sdcli_test.go b/functions/ml/sdcli_test.go
new file mode 100644
index 00000000..06ca407a
--- /dev/null
+++ b/functions/ml/sdcli_test.go
@@ -0,0 +1,114 @@
+package ml
+
+import (
+	"context"
+	"os"
+	"testing"
+	"time"
+)
+
+// TestSdcliResolveBinary_NotFound verifica que SdcliResolveBinary retorna error
+// cuando no hay binario en PATH ni hint. Forzamos PATH="" para que LookPath
+// no encuentre nada, lo que hace el test determinista independientemente del
+// entorno del desarrollador.
+func TestSdcliResolveBinary_NotFound(t *testing.T) {
+	t.Setenv("PATH", "")
+	_, err := SdcliResolveBinary("")
+	if err == nil {
+		t.Fatal("expected error when sd not in PATH, got nil")
+	}
+}
+
+// TestSdcliResolveBinary_Hint verifica que un hint valido (archivo ejecutable)
+// se resuelve con Source="config" sin necesidad de PATH.
+func TestSdcliResolveBinary_Hint(t *testing.T) {
+	// Crear archivo temporal ejecutable que simula el binario sd.
+	// El script simplemente sale con 0; --version devolvera string vacio
+	// pero eso no es error (version es best-effort).
+	f, err := os.CreateTemp("", "sd-test-*")
+	if err != nil {
+		t.Fatalf("creating temp file: %v", err)
+	}
+	defer os.Remove(f.Name())
+
+	f.Close()
+
+	script := []byte("#!/bin/sh\necho 'sd-test 0.1'\n")
+	if err := os.WriteFile(f.Name(), script, 0o755); err != nil {
+		t.Fatalf("writing temp file: %v", err)
+	}
+	if err := os.Chmod(f.Name(), 0o755); err != nil {
+		t.Fatalf("chmod temp file: %v", err)
+	}
+
+	bin, err := SdcliResolveBinary(f.Name())
+	if err != nil {
+		t.Fatalf("SdcliResolveBinary(hint): %v", err)
+	}
+	if bin.Source != "config" {
+		t.Fatalf("expected source=config, got %q", bin.Source)
+	}
+	if bin.Path != f.Name() {
+		t.Fatalf("expected path=%q, got %q", f.Name(), bin.Path)
+	}
+}
+
+// TestSdcliGenerate_RequiresBinary es un test de integracion que salta si el
+// binario sd no esta instalado en PATH. Si esta disponible, tambien requiere
+// el modelo SD Turbo en el vault para ejecutar una generacion real.
+func TestSdcliGenerate_RequiresBinary(t *testing.T) {
+	bin, err := SdcliResolveBinary("")
+	if err != nil {
+		t.Skipf("sd binary not in PATH, skipping integration test: %v", err)
+	}
+
+	modelPath := "/home/lucas/vaults/imagegen_models/diffusers/sd-turbo/sd_turbo.safetensors"
+	if _, err := os.Stat(modelPath); err != nil {
+		t.Skipf("SD Turbo model not in vault (%s), skipping: %v", modelPath, err)
+	}
+
+	cfg := GenerationConfig{
+		Prompt:   "a red apple",
+		Seed:     42,
+		Steps:    4,
+		CfgScale: 1.0,
+		Sampler:  "euler_a",
+		Width:    512,
+		Height:   512,
+		Model: ModelRef{
+			Name:         "sd-turbo",
+			ModelType:    "sd15",
+			Quantization: "fp16",
+			Path:         modelPath,
+		},
+	}
+
+	outPath := t.TempDir() + "/out.png"
+
+	ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
+	defer cancel()
+
+	var progressCalled bool
+	res, err := SdcliGenerate(ctx, bin, cfg, outPath, func(p SdcliProgress) {
+		progressCalled = true
+		t.Logf("progress: step %d/%d (%.1f%%) %.2fit/s",
+			p.Step, p.TotalSteps, p.Percent, p.ItPerSec)
+	})
+	if err != nil {
+		t.Fatalf("SdcliGenerate: %v", err)
+	}
+	if len(res.ImageBytes) == 0 {
+		t.Fatal("expected non-empty image bytes")
+	}
+	if res.Format != "png" {
+		t.Fatalf("expected format=png, got %q", res.Format)
+	}
+	if res.DurationMs <= 0 {
+		t.Fatalf("expected positive duration_ms, got %d", res.DurationMs)
+	}
+	if res.Meta["backend"] != "sdcli" {
+		t.Fatalf("expected meta.backend=sdcli, got %v", res.Meta["backend"])
+	}
+	t.Logf("generated %d bytes in %dms (progress_called=%v)",
+		len(res.ImageBytes), res.DurationMs, progressCalled)
+}
diff --git a/python/functions/ml/sdcpp_python_generate.md b/python/functions/ml/sdcpp_python_generate.md
new file mode 100644
index 00000000..37eb8624
--- /dev/null
+++ b/python/functions/ml/sdcpp_python_generate.md
@@ -0,0 +1,76 @@
+---
+name: sdcpp_python_generate
+kind: function
+lang: py
+domain: ml
+version: "1.0.0"
+purity: impure
+signature: "def sdcpp_python_generate(sd: Any, cfg: GenerationConfig) -> ImageGenResult"
+description: "Genera una imagen con un StableDiffusion (stable-diffusion-cpp-python) usando GenerationConfig como contrato. Mapea sampler, mide duracion y retorna ImageGenResult con meta del backend."
+tags: [ml, stable-diffusion, sdcpp, inference, backend, generate, txt2img]
+uses_functions: [sdcpp_python_load_py_ml]
+uses_types: [generation_config_py_ml, image_gen_result_py_ml]
+returns: [image_gen_result_py_ml]
+returns_optional: false
+error_type: "error_go_core"
+imports: [stable_diffusion_cpp, PIL]
+params:
+  - name: sd
+    desc: "Instancia StableDiffusion cargada via sdcpp_python_load. Debe tener metodo generate_image()."
+  - name: cfg
+    desc: "Contrato de parametros de generacion. cfg.sampler debe ser uno de: euler | euler_a | dpm++2m | dpm++2m_v2 | heun | dpm2 | lcm."
+output: "ImageGenResult con image=PIL.Image (primera del batch), meta con backend/model/sampler/seed/wtype, duration_ms medido, vram_peak_mb=None."
+tested: true
+tests:
+  - "generate retorna ImageGenResult valido"
+  - "duration_ms mayor que cero"
+  - "meta backend es sdcpp_python"
+test_file_path: "python/functions/ml/tests/test_sdcpp_python_backend.py"
+file_path: "python/functions/ml/sdcpp_python_generate.py"
+---
+
+## Ejemplo
+
+```python
+import sys
+sys.path.insert(0, "python/functions/ml")
+from model_ref import ModelRef
+from generation_config import GenerationConfig
+from sdcpp_python_load import sdcpp_python_load
+from sdcpp_python_generate import sdcpp_python_generate
+
+model = ModelRef(
+    name="sd-turbo",
+    model_type="sd15",
+    quantization="fp16",
+    path="/home/lucas/vaults/imagegen_models/diffusers/sd-turbo/sd_turbo.safetensors",
+)
+sd = sdcpp_python_load(model)
+
+cfg = GenerationConfig(
+    prompt="a red cat sitting on a wooden table",
+    seed=42,
+    steps=4,
+    cfg_scale=1.0,
+    sampler="euler_a",
+    width=512,
+    height=512,
+    model=model,
+)
+result = sdcpp_python_generate(sd, cfg)
+result.image.save("/tmp/output.png")
+print(f"Generado en {result.duration_ms}ms, meta={result.meta}")
+```
+
+## Notas
+
+- El sampler mapping canonico: euler->euler, euler_a->euler_a, dpm++2m->dpmpp2m,
+  dpm++2m_v2->dpmpp2mv2, heun->heun, dpm2->dpm2, lcm->lcm.
+- API usada: `StableDiffusion.generate_image()` (binding 0.4.7+). Versiones anteriores
+  exponían `txt_to_img()` — actualizar el package si se encuentra ese error.
+- `vram_peak_mb` siempre None: stable-diffusion-cpp-python no expone medicion de VRAM.
+- `clip_skip`: -1 le dice al backend que use el valor por defecto del modelo (equivale a
+  no especificarlo). Si cfg.clip_skip es None, se pasa -1.
+- El campo `wtype` en meta se extrae via `getattr(sd, 'wtype', 'unknown')` ya que el
+  binding no garantiza el atributo en todas las versiones.
+---
diff --git a/python/functions/ml/sdcpp_python_generate.py b/python/functions/ml/sdcpp_python_generate.py
new file mode 100644
index 00000000..f57b7d0d
--- /dev/null
+++ b/python/functions/ml/sdcpp_python_generate.py
@@ -0,0 +1,103 @@
+"""sdcpp_python_generate — genera una imagen con stable-diffusion-cpp-python a partir de GenerationConfig."""
+
+from __future__ import annotations
+
+import sys
+import os
+import time
+from typing import Any
+
+sys.path.insert(0, os.path.dirname(__file__))
+
+from generation_config import GenerationConfig
+from image_gen_result import ImageGenResult
+
+# Mapa de sampler del registry al nombre que espera stable-diffusion-cpp-python
+_SAMPLER_MAP: dict[str, str] = {
+    "euler": "euler",
+    "euler_a": "euler_a",
+    "dpm++2m": "dpmpp2m",
+    "dpm++2m_v2": "dpmpp2mv2",
+    "heun": "heun",
+    "dpm2": "dpm2",
+    "lcm": "lcm",
+}
+
+
+def sdcpp_python_generate(sd: Any, cfg: GenerationConfig) -> ImageGenResult:
+    """Genera una imagen con un objeto StableDiffusion usando GenerationConfig como contrato.
+
+    Mapea los campos del GenerationConfig canonico a los parametros de
+    StableDiffusion.generate_image(). Mide la duracion total de la llamada.
+    Retorna un ImageGenResult con la primera imagen del batch, metadata del backend
+    y duracion en milisegundos. VRAM no se mide (None).
+
+    Args:
+        sd:  Instancia StableDiffusion cargada via sdcpp_python_load.
+        cfg: Contrato de parametros de generacion. Todos los campos son leidos.
+             cfg.sampler debe ser uno de los valores del SamplerName del registry.
+
+    Returns:
+        ImageGenResult con image=PIL.Image, meta con backend/modelo/sampler/seed/wtype,
+        duration_ms medido via time.perf_counter(), vram_peak_mb=None.
+
+    Raises:
+        KeyError:   Si cfg.sampler no tiene correspondencia en _SAMPLER_MAP.
+        ImportError: Si stable_diffusion_cpp no esta instalado.
+        RuntimeError: Si generate_image retorna lista vacia o None.
+    """
+    try:
+        from stable_diffusion_cpp import StableDiffusion  # noqa: F401 — verifica disponibilidad
+    except ImportError as exc:
+        raise ImportError(
+            "sdcpp_python_generate requiere stable-diffusion-cpp-python. "
+            "Instalar con: pip install stable-diffusion-cpp-python"
+        ) from exc
+
+    sample_method = _SAMPLER_MAP.get(cfg.sampler)
+    if sample_method is None:
+        raise KeyError(
+            f"Sampler '{cfg.sampler}' no tiene correspondencia en sdcpp_python_generate. "
+            f"Valores soportados: {list(_SAMPLER_MAP.keys())}"
+        )
+
+    # wtype del objeto sd (para metadata)
+    wtype = getattr(sd, "wtype", "unknown")
+
+    t0 = time.perf_counter()
+
+    images = sd.generate_image(
+        prompt=cfg.prompt,
+        negative_prompt=cfg.negative_prompt or "",
+        cfg_scale=cfg.cfg_scale,
+        sample_method=sample_method,
+        sample_steps=cfg.steps,
+        seed=cfg.seed,
+        width=cfg.width,
+        height=cfg.height,
+        clip_skip=cfg.clip_skip if cfg.clip_skip is not None else -1,
+        batch_count=1,
+    )
+
+    duration_ms = int((time.perf_counter() - t0) * 1000)
+
+    if not images:
+        raise RuntimeError(
+            "sdcpp_python_generate: generate_image retorno lista vacia o None."
+        )
+
+    meta: dict[str, Any] = {
+        "backend": "sdcpp_python",
+        "model": cfg.model.name,
+        "sampler": cfg.sampler,
+        "actual_steps": cfg.steps,
+        "seed": cfg.seed,
+        "wtype": wtype,
+    }
+
+    return ImageGenResult(
+        image=images[0],
+        meta=meta,
+        duration_ms=duration_ms,
+        vram_peak_mb=None,
+    )
diff --git a/python/functions/ml/sdcpp_python_load.md b/python/functions/ml/sdcpp_python_load.md
new file mode 100644
index 00000000..12f570ab
--- /dev/null
+++ b/python/functions/ml/sdcpp_python_load.md
@@ -0,0 +1,61 @@
+---
+name: sdcpp_python_load
+kind: function
+lang: py
+domain: ml
+version: "1.0.0"
+purity: impure
+signature: "def sdcpp_python_load(model: ModelRef, n_threads: int = -1, wtype: str = 'default', rng_type: str = 'cuda') -> Any"
+description: "Carga un StableDiffusion via stable-diffusion-cpp-python con cache global por (model_key, wtype, n_threads). Segunda llamada con mismos params retorna instancia cacheada sin recargar disco."
+tags: [ml, stable-diffusion, sdcpp, inference, backend, cache, load]
+uses_functions: []
+uses_types: [model_ref_py_ml]
+returns: []
+returns_optional: false
+error_type: "error_go_core"
+imports: [stable_diffusion_cpp]
+params:
+  - name: model
+    desc: "Referencia al modelo. model.path se usa si presente; si no, model.name como ruta local o HuggingFace hub."
+  - name: n_threads
+    desc: "Numero de hilos CPU para inferencia. -1 usa todos los disponibles."
+  - name: wtype
+    desc: "Tipo de pesos en memoria: 'default' | 'f32' | 'f16' | 'q8_0' | 'q5_1' | 'q5_0' | 'q4_1' | 'q4_0'. 'default' respeta el tipo original del checkpoint."
+  - name: rng_type
+    desc: "Generador de aleatorios: 'std_default' | 'cuda'. 'cuda' produce resultados compatibles con la implementacion CUDA incluso en CPU."
+output: "Instancia StableDiffusion (stable_diffusion_cpp.StableDiffusion) lista para llamar a generate_image()."
+tested: true
+tests:
+  - "load retorna objeto StableDiffusion"
+  - "segunda llamada retorna instancia cacheada"
+test_file_path: "python/functions/ml/tests/test_sdcpp_python_backend.py"
+file_path: "python/functions/ml/sdcpp_python_load.py"
+---
+
+## Ejemplo
+
+```python
+import sys
+sys.path.insert(0, "python/functions/ml")
+from model_ref import ModelRef
+from sdcpp_python_load import sdcpp_python_load
+
+model = ModelRef(
+    name="sd-turbo",
+    model_type="sd15",
+    quantization="fp16",
+    path="/home/lucas/vaults/imagegen_models/diffusers/sd-turbo/sd_turbo.safetensors",
+)
+sd = sdcpp_python_load(model, n_threads=-1, wtype="default")
+# sd listo para sd.generate_image(...)
+```
+
+## Notas
+
+- El cache evita recargas de disco en bucles de generacion con el mismo modelo.
+- `wtype="default"` respeta el tipo de cuantizacion del checkpoint; util para safetensors mixtos.
+- `rng_type="cuda"` produce seeds compatibles con la implementacion GPU aunque se corra en CPU.
+- Para limpiar el cache en tests: `sdcpp_python_load._clear_sd_cache()`.
+- Compilacion sin CUDA: `CMAKE_ARGS="-DSD_CUDA=OFF" pip install stable-diffusion-cpp-python`.
+- El binding 0.4.7 usa `generate_image()` (no `txt_to_img` que era la API de versiones anteriores).
+---
diff --git a/python/functions/ml/sdcpp_python_load.py b/python/functions/ml/sdcpp_python_load.py
new file mode 100644
index 00000000..986252de
--- /dev/null
+++ b/python/functions/ml/sdcpp_python_load.py
@@ -0,0 +1,86 @@
+"""sdcpp_python_load — carga un StableDiffusion (stable-diffusion-cpp-python) con cache global."""
+
+from __future__ import annotations
+
+import sys
+import os
+from typing import Any
+
+sys.path.insert(0, os.path.dirname(__file__))
+
+from model_ref import ModelRef
+
+# Cache global: (model_key, wtype, n_threads) -> StableDiffusion object
+_SD_CACHE: dict[tuple[str, str, int], Any] = {}
+
+
+def _get_model_key(model: ModelRef) -> str:
+    """Retorna la clave de cache para un ModelRef."""
+    return model.path if model.path else model.name
+
+
+def sdcpp_python_load(
+    model: ModelRef,
+    n_threads: int = -1,
+    wtype: str = "default",
+    rng_type: str = "cuda",
+) -> Any:
+    """Carga un StableDiffusion via stable-diffusion-cpp-python con cache global.
+
+    Instancia StableDiffusion con el checkpoint indicado por model.path (o model.name
+    si path es None). Los objetos se cachean en memoria por (model_key, wtype, n_threads)
+    — una segunda llamada con los mismos parametros retorna la instancia cacheada sin
+    recargar el modelo del disco.
+
+    Args:
+        model:     Referencia al modelo. model.path se usa si esta presente;
+                   si no, model.name se pasa como model_path (ruta local o hub).
+        n_threads: Numero de hilos de CPU para inferencia. -1 usa todos los disponibles.
+        wtype:     Tipo de pesos / cuantizacion en memoria.
+                   Valores: "default" | "f32" | "f16" | "q8_0" | "q5_1" | "q5_0" | "q4_1" | "q4_0".
+                   "default" respeta el tipo original del checkpoint.
+        rng_type:  Tipo de generador de numeros aleatorios.
+                   Valores: "std_default" | "cuda".
+                   "cuda" produce resultados compatibles con la implementacion CUDA
+                   incluso en CPU.
+
+    Returns:
+        Instancia StableDiffusion lista para llamar a generate_image().
+
+    Raises:
+        ImportError: Si stable_diffusion_cpp no esta instalado.
+                     Instalar con: pip install stable-diffusion-cpp-python
+        OSError:     Si el path del modelo no existe o es invalido.
+    """
+    try:
+        from stable_diffusion_cpp import StableDiffusion
+    except ImportError as exc:
+        raise ImportError(
+            "sdcpp_python_load requiere stable-diffusion-cpp-python. "
+            "Instalar con: pip install stable-diffusion-cpp-python\n"
+            "Para compilar sin CUDA: "
+            "CMAKE_ARGS='-DSD_CUDA=OFF' pip install stable-diffusion-cpp-python"
+        ) from exc
+
+    model_key = _get_model_key(model)
+    cache_key = (model_key, wtype, n_threads)
+
+    if cache_key in _SD_CACHE:
+        return _SD_CACHE[cache_key]
+
+    load_path = model.path if model.path else model.name
+
+    sd = StableDiffusion(
+        model_path=load_path,
+        wtype=wtype,
+        n_threads=n_threads,
+        rng_type=rng_type,
+    )
+
+    _SD_CACHE[cache_key] = sd
+    return sd
+
+
+def _clear_sd_cache() -> None:
+    """Limpia el cache global de instancias StableDiffusion (uso interno y tests)."""
+    _SD_CACHE.clear()
diff --git a/python/functions/ml/tests/test_sdcpp_python_backend.py b/python/functions/ml/tests/test_sdcpp_python_backend.py
new file mode 100644
index 00000000..4ab4db8b
--- /dev/null
+++ b/python/functions/ml/tests/test_sdcpp_python_backend.py
@@ -0,0 +1,167 @@
+"""Tests para el backend sdcpp_python: sdcpp_python_load y sdcpp_python_generate."""
+
+from __future__ import annotations
+
+import sys
+import os
+
+import pytest
+
+# Ajustar path para importar desde python/functions/ml/
+_ML_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
+sys.path.insert(0, _ML_PATH)
+
+# Importacion lazy — salta todos los tests si el package no esta instalado.
+pytest.importorskip(
+    "stable_diffusion_cpp",
+    reason="stable_diffusion_cpp no instalado — skip tests sdcpp_python backend",
+)
+
+# El paquete usa modulos hermanos sin prefijo (model_ref, generation_config...).
+# Para evitar el double-import problem, mapeamos los aliases antes de importar
+# las funciones bajo test.
+import ml.model_ref as _mref_module
+import ml.generation_config as _gcfg_module
+import ml.image_gen_result as _igr_module
+
+for _alias, _mod in [
+    ("model_ref", _mref_module),
+    ("generation_config", _gcfg_module),
+    ("image_gen_result", _igr_module),
+]:
+    if _alias not in sys.modules:
+        sys.modules[_alias] = _mod  # type: ignore[assignment]
+
+from ml.model_ref import ModelRef
+from ml.generation_config import GenerationConfig
+from ml.image_gen_result import ImageGenResult
+from ml.sdcpp_python_load import sdcpp_python_load, _clear_sd_cache
+from ml.sdcpp_python_generate import sdcpp_python_generate
+
+
+# ---------------------------------------------------------------------------
+# Constantes
+# ---------------------------------------------------------------------------
+
+SD_TURBO_SAFETENSORS = (
+    "/home/lucas/vaults/imagegen_models/diffusers/sd-turbo/sd_turbo.safetensors"
+)
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(scope="session")
+def sd_turbo_model() -> ModelRef:
+    """ModelRef apuntando al safetensors de SD Turbo en local."""
+    if not os.path.isfile(SD_TURBO_SAFETENSORS):
+        pytest.skip(
+            f"Modelo SD Turbo no encontrado en {SD_TURBO_SAFETENSORS}"
+        )
+    return ModelRef(
+        name="sd-turbo",
+        model_type="sd15",
+        quantization="fp16",
+        path=SD_TURBO_SAFETENSORS,
+    )
+
+
+@pytest.fixture(scope="session")
+def loaded_sd(sd_turbo_model: ModelRef):
+    """StableDiffusion cargado una sola vez para toda la sesion de tests."""
+    _clear_sd_cache()
+    sd = sdcpp_python_load(sd_turbo_model, n_threads=-1, wtype="default")
+    yield sd
+    _clear_sd_cache()
+
+
+@pytest.fixture(scope="session")
+def sd_turbo_cfg(sd_turbo_model: ModelRef) -> GenerationConfig:
+    """GenerationConfig para SD Turbo: 512x512, 4 steps, euler_a, seed=42."""
+    return GenerationConfig(
+        prompt="a simple red apple on a white table",
+        negative_prompt=None,
+        seed=42,
+        steps=4,
+        cfg_scale=1.0,
+        sampler="euler_a",
+        width=512,
+        height=512,
+        model=sd_turbo_model,
+        loras=[],
+    )
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+def test_load_retorna_objeto(loaded_sd) -> None:
+    """load retorna objeto StableDiffusion"""
+    from stable_diffusion_cpp import StableDiffusion
+
+    assert isinstance(loaded_sd, StableDiffusion), (
+        f"Se esperaba StableDiffusion, se obtuvo {type(loaded_sd)}"
+    )
+
+
+def test_load_caches(sd_turbo_model: ModelRef, loaded_sd) -> None:
+    """segunda llamada retorna instancia cacheada"""
+    import time
+
+    t0 = time.perf_counter()
+    sd2 = sdcpp_python_load(sd_turbo_model, n_threads=-1, wtype="default")
+    elapsed = time.perf_counter() - t0
+
+    assert sd2 is loaded_sd, "Segunda llamada debe retornar la misma instancia cacheada"
+    assert elapsed < 0.5, (
+        f"Segunda llamada tardo {elapsed:.3f}s — deberia ser inmediata (cache hit)"
+    )
+
+
+def test_generate_retorna_image_gen_result(
+    loaded_sd, sd_turbo_cfg: GenerationConfig
+) -> None:
+    """generate retorna ImageGenResult valido"""
+    result = sdcpp_python_generate(loaded_sd, sd_turbo_cfg)
+
+    assert isinstance(result, ImageGenResult), (
+        f"Se esperaba ImageGenResult, se obtuvo {type(result)}"
+    )
+    assert result.image is not None, "result.image no debe ser None"
+
+
+def test_duration_ms_mayor_que_cero(
+    loaded_sd, sd_turbo_cfg: GenerationConfig
+) -> None:
+    """duration_ms mayor que cero"""
+    result = sdcpp_python_generate(loaded_sd, sd_turbo_cfg)
+    assert result.duration_ms > 0, (
+        f"duration_ms debe ser > 0, se obtuvo {result.duration_ms}"
+    )
+
+
+def test_meta_backend_es_sdcpp_python(
+    loaded_sd, sd_turbo_cfg: GenerationConfig
+) -> None:
+    """meta backend es sdcpp_python"""
+    result = sdcpp_python_generate(loaded_sd, sd_turbo_cfg)
+    assert result.meta.get("backend") == "sdcpp_python", (
+        f"meta['backend'] debe ser 'sdcpp_python', se obtuvo {result.meta.get('backend')!r}"
+    )
+    assert result.meta.get("model") == sd_turbo_cfg.model.name
+    assert result.meta.get("sampler") == sd_turbo_cfg.sampler
+    assert result.meta.get("seed") == sd_turbo_cfg.seed
+
+
+def test_vram_peak_mb_es_none(
+    loaded_sd, sd_turbo_cfg: GenerationConfig
+) -> None:
+    """vram_peak_mb es None — sdcpp no expone medicion de VRAM"""
+    result = sdcpp_python_generate(loaded_sd, sd_turbo_cfg)
+    assert result.vram_peak_mb is None, (
+        f"vram_peak_mb debe ser None, se obtuvo {result.vram_peak_mb}"
+    )