From 69d9aed46a4d86d10df70fb24e62b1025b9fdac2 Mon Sep 17 00:00:00 2001 From: Egutierrez Date: Wed, 24 Jun 2026 19:02:10 +0200 Subject: [PATCH] feat(ml): mixer de capacidades comfyui (compose + generate_mixed_oneshot + inject controlnet/ipadapter) Mezclador del grupo comfyui-skill que promueve a una sola llamada la secuencia base -> compose -> submit -> wait -> fetch -> judge (issue 0087): - comfyui_compose_capabilities_py_ml (PURA): aplica en orden las capacidades activadas (loras, controlnet, ipadapter, facedetailer, hires) sobre un workflow base, sin mutar la entrada. - comfyui_generate_mixed_oneshot_py_pipelines: one-shot que resuelve el base (skill/txt2img/dict), compone, encola, espera, descarga el PNG y lo puntua con el panel comfyui-judge. - comfyui_inject_controlnet_py_ml, comfyui_inject_ipadapter_py_ml: inyectores encadenables que consume el compose. - Tests (24 passed) + pagina madre docs/capabilities/comfyui-skill.md. Prueba real en GPU: txt2img dreamshaper_8 + 2 LoRAs (3d_render_redmond + detail_tweaker) + FaceDetailer -> imagen 512x512 en ~24s, juez verdict 'good' (score 4.69, votos aesthetic+clip good; voto llm degradado por rate-limit 429). Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/capabilities/comfyui-skill.md | 68 ++++++ .../ml/comfyui_compose_capabilities.md | 87 +++++++ .../ml/comfyui_compose_capabilities.py | 203 ++++++++++++++++ .../functions/ml/comfyui_inject_controlnet.md | 68 ++++++ .../functions/ml/comfyui_inject_controlnet.py | 144 ++++++++++++ .../functions/ml/comfyui_inject_ipadapter.md | 91 ++++++++ .../functions/ml/comfyui_inject_ipadapter.py | 218 +++++++++++++++++ .../test_comfyui_compose_capabilities.py | 146 ++++++++++++ .../tests/test_comfyui_inject_controlnet.py | 70 ++++++ .../ml/tests/test_comfyui_inject_ipadapter.py | 79 +++++++ .../comfyui_generate_mixed_oneshot.md | 100 ++++++++ .../comfyui_generate_mixed_oneshot.py | 220 ++++++++++++++++++ 12 files changed, 1494 insertions(+) create mode 100644 python/functions/ml/comfyui_compose_capabilities.md create mode 100644 python/functions/ml/comfyui_compose_capabilities.py create mode 100644 python/functions/ml/comfyui_inject_controlnet.md create mode 100644 python/functions/ml/comfyui_inject_controlnet.py create mode 100644 python/functions/ml/comfyui_inject_ipadapter.md create mode 100644 python/functions/ml/comfyui_inject_ipadapter.py create mode 100644 python/functions/ml/tests/test_comfyui_compose_capabilities.py create mode 100644 python/functions/ml/tests/test_comfyui_inject_controlnet.py create mode 100644 python/functions/ml/tests/test_comfyui_inject_ipadapter.py create mode 100644 python/functions/pipelines/comfyui_generate_mixed_oneshot.md create mode 100644 python/functions/pipelines/comfyui_generate_mixed_oneshot.py diff --git a/docs/capabilities/comfyui-skill.md b/docs/capabilities/comfyui-skill.md index a4b86865..3c323d5a 100644 --- a/docs/capabilities/comfyui-skill.md +++ b/docs/capabilities/comfyui-skill.md @@ -256,6 +256,74 @@ Notas de uso: commitea ni se indexa. - El token Civitai es secreto: viene de `pass civitai/api-token`, nunca hardcodeado. +## Mezclar capacidades (mixer) + +Una skill fija *una* receta. El **mixer** resuelve el otro eje: combinar **a la carta** todas las +capacidades de generación sobre un mismo workflow base y activar/desactivar cada una para iterar. +Misma doctrina del issue 0087 (componer piezas probadas, no reescribir el grafo), pero aplicada a +mezclar capacidades en vez de a guardar una receta. + +Dos funciones: + +| ID | firma corta | qué hace | +|---|---|---| +| `comfyui_compose_capabilities_py_ml` | `compose_capabilities(base, *, loras, controlnet, ipadapter, hires, facedetailer) -> dict` | **PURA.** Aplica EN ORDEN las capacidades activadas (cada arg `None` = desactivada) sobre un dict base, componiendo los inyectores/builders encadenables. Reconecta MODEL/CLIP/positive/IMAGE. Sin ninguna = base intacto. | +| `comfyui_generate_mixed_oneshot_py_pipelines` | `generate_mixed_oneshot(base, subject, *, capabilities, server, judge, ...) -> dict` | **Pipeline.** base (skill slug / `'txt2img'` / dict) → compose → submit → wait → fetch → (si `judge`) juzga. Devuelve `{ok, prompt_id, image_path, capabilities_active, judge, error}`. | + +El mixer se apoya en los **inyectores encadenables-sobre-dict** (cada uno la versión componible de +su builder-desde-cero hermano): + +| Capacidad | Inyector | Reconecta | +|---|---|---| +| LoRAs (N) | `comfyui_inject_multi_lora_py_ml` | cadena MODEL/CLIP tras el checkpoint | +| ControlNet | `comfyui_inject_controlnet_py_ml` | `KSampler.positive` ← `ControlNetApply` | +| IPAdapter (style/faceid) | `comfyui_inject_ipadapter_py_ml` | `KSampler.model` ← IPAdapter (tras las LoRAs) | +| hires/upscale | `comfyui_inject_hires_fix_py_ml` | `UltimateSDUpscale` tras el `VAEDecode` | +| FaceDetailer | `comfyui_build_facedetailer_workflow_py_ml` | regenera caras del `VAEDecode` | + +Orden fijo: `loras → controlnet → ipadapter → facedetailer → hires`. El IPAdapter se aplica sobre +el MODEL ya modificado por los LoRAs (orden correcto). Tras FaceDetailer el mixer deja un único +`SaveImage` (el del detailer). + +### Ejemplo canónico (≥3 capacidades, juzgado) + +```python +import sys, os +sys.path.insert(0, os.path.join(os.environ["HOME"], "fn_registry", "python", "functions")) +from pipelines.comfyui_generate_mixed_oneshot import comfyui_generate_mixed_oneshot + +# txt2img dreamshaper + 2 LoRAs + FaceDetailer (3 capacidades). Activar/desactivar = cambiar args. +res = comfyui_generate_mixed_oneshot( + "txt2img", + "a heroic knight portrait, 3d render style, dramatic lighting, detailed face", + checkpoint="dreamshaper_8.safetensors", + capabilities={ + "loras": [ + {"name": "3d_render_redmond_sd15.safetensors", "strength_model": 0.9}, + {"name": "detail_tweaker_sd15.safetensors", "strength_model": 0.5, "strength_clip": 0.5}, + ], + "facedetailer": {"denoise": 0.45}, + # "ipadapter": {"ref_image": "face.png", "mode": "faceid"}, # se activa con solo añadirla + # "hires": {"upscale_by": 1.5}, + }, + dest="/tmp/comfy_mixed", seed=42, judge=True, +) +print(res["ok"], res["prompt_id"], res["capabilities_active"], res["judge"]) +``` + +### Límite conocido (8GB / piezas actuales) + +- **hires + facedetailer no encadenan**: ambos toman su imagen del `VAEDecode` del render base, así + que combinarlos deja a uno sin efecto sobre la salida final (con los dos activos, hires "gana" y + facedetailer queda sin consumidor). Usa uno U otro por workflow. El resto de combinaciones + (LoRAs + ControlNet + IPAdapter + uno de los dos post-procesos) encadenan limpio. +- **VRAM**: en 8GB lowvram con SD1.5 entran ~2-3 capacidades modestas (p.ej. 2 LoRAs + FaceDetailer + a 512px). Apilar IPAdapter FaceID + ControlNet + hires + facedetailer a la vez puede dar OOM — + baja resolución o reduce capacidades. `mixer` no valida VRAM; el OOM aflora en `wait`. +- **Incompatibilidad explícita, no silenciosa**: ControlNet sin `control_image` o IPAdapter sin + `ref_image` lanzan `ValueError` del inyector (no petan a medias). Las imágenes de control/referencia + deben estar en el `input/` del servidor antes de encolar. + ## Fronteras - **No genera ni descarga modelos**: una skill referencia checkpoints/LoRAs por nombre; deben diff --git a/python/functions/ml/comfyui_compose_capabilities.md b/python/functions/ml/comfyui_compose_capabilities.md new file mode 100644 index 00000000..855c8351 --- /dev/null +++ b/python/functions/ml/comfyui_compose_capabilities.md @@ -0,0 +1,87 @@ +--- +name: comfyui_compose_capabilities +kind: function +lang: py +domain: ml +version: "1.0.0" +purity: pure +signature: "def comfyui_compose_capabilities(base_workflow: dict, *, loras: list[dict] | None = None, controlnet: dict | None = None, ipadapter: dict | None = None, hires: dict | None = None, facedetailer: dict | None = None) -> dict" +description: "Mezclador de capacidades ComfyUI: toma un workflow base en API format (skill o txt2img) y aplica EN ORDEN las capacidades activadas (cada arg None = desactivada), componiendo los inyectores/builders encadenables del registry: loras (inject_multi_lora) -> controlnet (inject_controlnet) -> ipadapter (inject_ipadapter) -> facedetailer (build_facedetailer_workflow) -> hires (inject_hires_fix), reconectando MODEL/CLIP/positive/IMAGE. Cada capacidad es opcional e independiente; sin ninguna devuelve el base intacto. Pura: no muta el dict de entrada." +tags: [comfyui, comfyui-skill, ml, mixer, lora, controlnet, ipadapter, facedetailer, hires, workflow] +uses_functions: [comfyui_inject_multi_lora_py_ml, comfyui_inject_controlnet_py_ml, comfyui_inject_ipadapter_py_ml, comfyui_build_facedetailer_workflow_py_ml, comfyui_inject_hires_fix_py_ml] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +params: + - name: base_workflow + desc: "dict en API format (salida de comfyui_build_skill_workflow o comfyui_build_txt2img_workflow). No se muta; se devuelve una copia." + - name: loras + desc: "Lista de dicts {name, strength_model?, strength_clip?} para inject_multi_lora. None o vacia = sin LoRAs. keyword-only." + - name: controlnet + desc: "Dict para inject_controlnet: {control_image (obligatoria), cn_name (obligatoria), strength?, positive_node?}. None = sin ControlNet. keyword-only." + - name: ipadapter + desc: "Dict para inject_ipadapter: {ref_image (obligatoria), mode ('style'|'faceid'), weight?, ...}. None = sin IPAdapter. keyword-only." + - name: hires + desc: "Dict de kwargs para inject_hires_fix (upscale_by, denoise, steps, cfg, seed, upscale_model, ...). {} = hires con defaults. None = sin hires. keyword-only." + - name: facedetailer + desc: "Dict de overrides para build_facedetailer_workflow. ckpt_name/positive/negative se detectan del workflow si faltan; resto = params del builder (denoise, steps, bbox_model, ...). {} = detect + defaults. None = sin facedetailer. keyword-only." +output: "copia del base con las capacidades activadas encadenadas en orden (loras -> controlnet -> ipadapter -> facedetailer -> hires). Sin ninguna activada, copia del base intacta. Tras facedetailer deja un unico SaveImage (el del detailer)." +tested: true +tests: ["sin capacidades devuelve el base intacto (mismos nodos)", "solo loras encadena los LoraLoader", "loras + facedetailer: cadena de loras + FaceDetailer + un solo SaveImage", "ipadapter + lora: IPAdapter toma el MODEL del ultimo LoraLoader", "hires anade UltimateSDUpscale", "controlnet sin control_image propaga ValueError", "ipadapter sin ref_image propaga ValueError", "no muta el dict de entrada (pureza)", "api format valido en todas las combinaciones", "activar una capacidad cambia el conjunto de class_types"] +test_file_path: "python/functions/ml/tests/test_comfyui_compose_capabilities.py" +file_path: "python/functions/ml/comfyui_compose_capabilities.py" +--- + +## Ejemplo + +```python +import sys, os +sys.path.insert(0, os.path.join(os.environ["HOME"], "fn_registry", "python", "functions")) +from ml.comfyui_build_txt2img_workflow import comfyui_build_txt2img_workflow +from ml.comfyui_compose_capabilities import comfyui_compose_capabilities + +base = comfyui_build_txt2img_workflow("dreamshaper_8.safetensors", "a hero, 3d render style") + +# 3 capacidades a la vez: 2 LoRAs + FaceDetailer (activar/desactivar = cambiar args) +mixed = comfyui_compose_capabilities( + base, + loras=[ + {"name": "3d_render_redmond_sd15.safetensors", "strength_model": 0.9}, + {"name": "detail_tweaker_sd15.safetensors", "strength_model": 0.5}, + ], + facedetailer={"denoise": 0.45}, + # controlnet=..., ipadapter=..., hires=... -> None = desactivadas +) +``` + +## Cuando usarla + +Cuando quieras **mezclar varias capacidades de generacion** (LoRAs + ControlNet + +IPAdapter + FaceDetailer + hires) sobre un mismo workflow base y poder +activar/desactivar cada una para iterar y mejorar. Es el "mixer" del grupo +`comfyui-skill`: una sola funcion en vez de encadenar los inyectores a mano. La +salida va directa a `comfyui_submit_workflow` (o usa el one-shot +`comfyui_generate_mixed_oneshot` para submit + juicio). + +## Gotchas + +- Pura: no muta el `base_workflow` y NO valida que checkpoints/loras/modelos + existan en el servidor. Las imagenes de control/referencia (ControlNet, + IPAdapter) deben estar en el `input/` del servidor antes de submit. +- **Orden fijo**: loras -> controlnet -> ipadapter -> facedetailer -> hires. El + IPAdapter se aplica sobre el MODEL ya modificado por los LoRAs (orden correcto). +- **hires + facedetailer NO encadenan** con las piezas actuales: ambos toman su + imagen del VAEDecode del render base, asi que combinarlos deja a uno sin efecto + sobre la salida final (con los dos activos, hires "gana" y facedetailer queda + sin consumidor). Usa uno U otro por workflow. Es la limitacion documentada del + mixer; el resto de combinaciones (loras+controlnet+ipadapter+uno de los dos + post-procesos) encadenan limpio. +- Cada capacidad apila coste de VRAM. En 8GB lowvram con SD1.5 entran ~2-3 + capacidades modestas (p.ej. 2 LoRAs + FaceDetailer a 512px). Apilar IPAdapter + FaceID + ControlNet + hires + facedetailer a la vez puede dar OOM: baja + resolucion o desactiva capacidades. +- Errores de incompatibilidad (controlnet sin `control_image`, ipadapter sin + `ref_image`, mode invalido) se propagan como `ValueError` del inyector, no + petan en silencio. diff --git a/python/functions/ml/comfyui_compose_capabilities.py b/python/functions/ml/comfyui_compose_capabilities.py new file mode 100644 index 00000000..1f072492 --- /dev/null +++ b/python/functions/ml/comfyui_compose_capabilities.py @@ -0,0 +1,203 @@ +"""comfyui_compose_capabilities — mezclador de capacidades sobre un workflow base. + +Toma un workflow ComfyUI en API format (la base: salida de +comfyui_build_skill_workflow o comfyui_build_txt2img_workflow) y aplica EN ORDEN +las capacidades que se activen, componiendo los inyectores/builders ENCADENABLES +del registry. Cada capacidad es un argumento keyword opcional: None (default) = +desactivada. Asi el mismo dict base se mezcla a la carta y se puede ir mejorando +(activar/desactivar una capacidad cambia el grafo resultante). + +Orden de aplicacion (de mas cerca del checkpoint a la salida): + + 1. loras -> comfyui_inject_multi_lora (cadena MODEL/CLIP) + 2. controlnet -> comfyui_inject_controlnet (re-condiciona KSampler.positive) + 3. ipadapter -> comfyui_inject_ipadapter (re-condiciona KSampler.model, tras loras) + 4. facedetailer -> comfyui_build_facedetailer_workflow (regenera caras del VAEDecode) + 5. hires -> comfyui_inject_hires_fix (UltimateSDUpscale tras el VAEDecode) + +Cada capacidad es independiente: se puede activar cualquier subconjunto. Sin +ninguna activada devuelve una copia del base intacta. + +Funcion PURA: sin red, sin I/O. No muta el dict de entrada (copia profunda). Solo +compone funciones puras del registry. + +Limitacion conocida (piezas actuales): hires y facedetailer NO encadenan entre +si. Ambos toman su imagen del VAEDecode original del render; combinarlos deja a +uno de los dos sin efecto sobre la salida final. Usa uno U otro por workflow, o +encadenalos manualmente fuera del mixer. Ver el .md (## Gotchas). +""" +from __future__ import annotations + +import copy +import os +import sys + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from ml.comfyui_build_facedetailer_workflow import comfyui_build_facedetailer_workflow # noqa: E402 +from ml.comfyui_inject_controlnet import comfyui_inject_controlnet # noqa: E402 +from ml.comfyui_inject_hires_fix import comfyui_inject_hires_fix # noqa: E402 +from ml.comfyui_inject_ipadapter import comfyui_inject_ipadapter # noqa: E402 +from ml.comfyui_inject_multi_lora import comfyui_inject_multi_lora # noqa: E402 + + +def _is_link(v) -> bool: + """True si v es una conexion ComfyUI [node_id(str), output_index(int)].""" + return ( + isinstance(v, list) + and len(v) == 2 + and isinstance(v[0], str) + and isinstance(v[1], int) + ) + + +def _detect_checkpoint(wf: dict) -> str: + """Nombre del checkpoint del primer CheckpointLoaderSimple, o '' si no hay.""" + for node in wf.values(): + if node.get("class_type") == "CheckpointLoaderSimple": + return str(node.get("inputs", {}).get("ckpt_name", "")) or "" + return "" + + +def _detect_prompts(wf: dict) -> tuple[str, str]: + """Texto (positivo, negativo) de los dos primeros CLIPTextEncode del workflow. + + En los builders del registry el positivo se inserta antes que el negativo, asi + que el primer CLIPTextEncode es el positivo y el segundo el negativo. + """ + texts = [ + str(n.get("inputs", {}).get("text", "")) + for n in wf.values() + if n.get("class_type") == "CLIPTextEncode" + ] + positive = texts[0] if texts else "" + negative = texts[1] if len(texts) > 1 else "" + return positive, negative + + +def _prune_redundant_saveimages(wf: dict, keep_source_class: str) -> None: + """Deja un unico SaveImage: el alimentado por un nodo `keep_source_class`. + + Tras encadenar facedetailer queda el SaveImage del render base (que ya no es + la salida final) ademas del SaveImage del detailer. Se borra el primero para + que el workflow tenga una sola imagen de salida (la procesada). Muta `wf` in + situ (el caller ya trabaja sobre una copia). No-op si hay <=1 SaveImage o si + no se encuentra el SaveImage alimentado por `keep_source_class`. + """ + saves = [ + (nid, n) for nid, n in wf.items() if n.get("class_type") == "SaveImage" + ] + if len(saves) <= 1: + return + keep = None + for nid, node in saves: + src = node.get("inputs", {}).get("images") + if _is_link(src) and wf.get(src[0], {}).get("class_type") == keep_source_class: + keep = nid + break + if keep is None: + return + for nid, _ in saves: + if nid != keep: + del wf[nid] + + +def comfyui_compose_capabilities( + base_workflow: dict, + *, + loras: list[dict] | None = None, + controlnet: dict | None = None, + ipadapter: dict | None = None, + hires: dict | None = None, + facedetailer: dict | None = None, +) -> dict: + """Aplica en orden las capacidades activadas sobre un workflow base. + + Args: + base_workflow: dict en API format (salida de + comfyui_build_skill_workflow o comfyui_build_txt2img_workflow). No se + muta; se devuelve una copia. + loras: lista de dicts {name, strength_model?, strength_clip?} para + comfyui_inject_multi_lora. None o lista vacia = sin LoRAs. keyword-only. + controlnet: dict para comfyui_inject_controlnet. Claves: control_image + (str, obligatoria), cn_name (str, obligatoria), strength (float), + positive_node (str). None = sin ControlNet. keyword-only. + ipadapter: dict para comfyui_inject_ipadapter. Claves: ref_image (str, + obligatoria), mode ('style'|'faceid'), weight (float) y demas + keyword-only del inyector. None = sin IPAdapter. keyword-only. + hires: dict de kwargs para comfyui_inject_hires_fix (upscale_by, denoise, + steps, cfg, seed, upscale_model, ...). {} = hires con defaults. None = + sin hires. keyword-only. + facedetailer: dict de overrides para comfyui_build_facedetailer_workflow. + Claves opcionales: ckpt_name (str; si falta se detecta del workflow), + positive / negative (str; si faltan se detectan de los CLIPTextEncode), + y demas params del builder (denoise, steps, cfg, seed, bbox_model, ...). + {} = facedetailer con detect + defaults. None = sin facedetailer. + keyword-only. + + Returns: + copia del base con las capacidades activadas encadenadas en orden. Si no + se activa ninguna, una copia del base intacta. + + Raises: + ValueError: si una capacidad activada es incompatible (p.ej. controlnet + sin control_image, ipadapter sin ref_image): se propaga el ValueError + del inyector correspondiente con el contexto del fallo. + """ + wf = copy.deepcopy(base_workflow) + + if loras: + wf = comfyui_inject_multi_lora(wf, loras) + + if controlnet is not None: + cn = dict(controlnet) + control_image = cn.pop("control_image", "") + cn_name = cn.pop("cn_name", "") + wf = comfyui_inject_controlnet(wf, control_image, cn_name, **cn) + + if ipadapter is not None: + ip = dict(ipadapter) + ref_image = ip.pop("ref_image", "") + wf = comfyui_inject_ipadapter(wf, ref_image, **ip) + + if facedetailer is not None: + fd = dict(facedetailer) + ckpt_name = fd.pop("ckpt_name", None) or _detect_checkpoint(wf) + det_pos, det_neg = _detect_prompts(wf) + positive = fd.pop("positive", None) + if positive is None: + positive = det_pos + negative = fd.pop("negative", None) + if negative is None: + negative = det_neg + wf = comfyui_build_facedetailer_workflow(wf, ckpt_name, positive, negative, **fd) + # facedetailer anade su propio SaveImage; el del render base ya no es la + # salida final -> dejar solo el del detailer. + _prune_redundant_saveimages(wf, "FaceDetailer") + + if hires is not None: + h = dict(hires) if isinstance(hires, dict) else {} + wf = comfyui_inject_hires_fix(wf, **h) + + return wf + + +# Alias con el nombre completo del ID para descubrimiento por convencion. +compose_capabilities = comfyui_compose_capabilities + + +if __name__ == "__main__": + import json + + from ml.comfyui_build_txt2img_workflow import comfyui_build_txt2img_workflow + + base = comfyui_build_txt2img_workflow("dreamshaper_8.safetensors", "a hero, 3d render") + mixed = comfyui_compose_capabilities( + base, + loras=[ + {"name": "3d_render_redmond_sd15.safetensors", "strength_model": 0.9}, + {"name": "detail_tweaker_sd15.safetensors", "strength_model": 0.5}, + ], + facedetailer={"denoise": 0.45}, + ) + print(json.dumps({"base_nodes": list(base), "mixed_nodes": list(mixed)}, indent=2)) diff --git a/python/functions/ml/comfyui_inject_controlnet.md b/python/functions/ml/comfyui_inject_controlnet.md new file mode 100644 index 00000000..2be49790 --- /dev/null +++ b/python/functions/ml/comfyui_inject_controlnet.md @@ -0,0 +1,68 @@ +--- +name: comfyui_inject_controlnet +kind: function +lang: py +domain: ml +version: "1.0.0" +purity: pure +signature: "def comfyui_inject_controlnet(workflow: dict, control_image: str, cn_name: str, *, strength: float = 1.0, positive_node: str | None = None) -> dict" +description: "Inyecta una rama ControlNet (LoadImage + ControlNetLoader + ControlNetApply) en un workflow ComfyUI ya construido (API format), repuntando el KSampler.positive al condicionamiento condicionado por la imagen de control. Version ENCADENABLE-sobre-dict del builder comfyui_build_controlnet_workflow (que construye desde cero). Pensada para componerse con inject_lora/inject_ipadapter/inject_hires_fix. Pura: no muta el dict de entrada." +tags: [comfyui, comfyui-skill, ml, controlnet, stable-diffusion, workflow] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +params: + - name: workflow + desc: "dict en API format (ej. salida de comfyui_build_txt2img_workflow). No se muta; se devuelve una copia." + - name: control_image + desc: "Nombre del archivo de la imagen de control en input/ del servidor ComfyUI (mapa preprocesado canny/depth/openpose). No puede estar vacio (raise ValueError)." + - name: cn_name + desc: "Nombre del modelo ControlNet en models/controlnet/ (control_net_name de ControlNetLoader)." + - name: strength + desc: "Fuerza con la que el ControlNet condiciona la generacion (0.0 nula, 1.0 plena). keyword-only." + - name: positive_node + desc: "node_id cuya salida CONDITIONING (slot 0) sera el positivo de entrada del ControlNetApply. Si None, se detecta la fuente que hoy alimenta el KSampler.positive. keyword-only." +output: "copia del workflow con LoadImage + ControlNetLoader + ControlNetApply insertados y el KSampler.positive repuntado a la salida del ControlNetApply. node_ids = max id numerico + 1/2/3." +tested: true +tests: ["inyecta los 3 nodos (LoadImage, ControlNetLoader, ControlNetApply) y repunta KSampler.positive", "ControlNetApply toma el positivo original como conditioning", "respeta strength y cn_name", "no muta el dict de entrada (pureza)", "control_image vacio lanza ValueError", "workflow sin KSampler lanza ValueError", "api format valido"] +test_file_path: "python/functions/ml/tests/test_comfyui_inject_controlnet.py" +file_path: "python/functions/ml/comfyui_inject_controlnet.py" +--- + +## Ejemplo + +```python +import sys, os +sys.path.insert(0, os.path.join(os.environ["HOME"], "fn_registry", "python", "functions")) +from ml.comfyui_build_txt2img_workflow import comfyui_build_txt2img_workflow +from ml.comfyui_inject_controlnet import comfyui_inject_controlnet + +base = comfyui_build_txt2img_workflow("dreamshaper_8.safetensors", "a knight, dramatic light") +wf = comfyui_inject_controlnet(base, "pose_canny.png", "control_v11p_sd15_canny.pth", strength=0.8) +# KSampler.positive ahora viene de ControlNetApply(conditioning=CLIPTextEncode+, image=pose_canny) +``` + +## Cuando usarla + +Cuando quieras **guiar la composicion** (pose, bordes, profundidad) de un +workflow txt2img ya construido con una imagen de control, sin reconstruir el +grafo desde cero. Es la pieza ControlNet del mixer +`comfyui_compose_capabilities`: encadena sobre el mismo dict que las LoRAs y el +IPAdapter. Para construir un workflow ControlNet aislado desde cero usa +`comfyui_build_controlnet_workflow`. + +## Gotchas + +- Pura: no muta el `workflow` de entrada y NO valida que `cn_name`/`control_image` + existan en el servidor. La imagen de control debe estar subida al `input/` del + servidor ANTES de submit; si no, el LoadImage falla en ejecucion. +- **control_image obligatorio**: una llamada con `control_image=""` lanza + `ValueError` (ControlNet sin imagen de control no tiene sentido). Es el error + path tipico cuando se activa la capacidad sin proveer el mapa de control. +- Repunta solo `KSampler.positive`. Si el workflow tiene varios KSampler, opera + sobre el primero que encuentra; pasa `positive_node` para casos no triviales. +- Apila coste de VRAM (carga el modelo ControlNet). En 8GB con SD1.5 cabe junto a + 1-2 LoRAs; combinandolo con IPAdapter + hires vigila la memoria. diff --git a/python/functions/ml/comfyui_inject_controlnet.py b/python/functions/ml/comfyui_inject_controlnet.py new file mode 100644 index 00000000..d0c2c8a8 --- /dev/null +++ b/python/functions/ml/comfyui_inject_controlnet.py @@ -0,0 +1,144 @@ +"""Inyecta una rama ControlNet en un workflow ComfyUI ya construido (API format). + +Toma un workflow en API format (dict, p.ej. salida de +comfyui_build_txt2img_workflow) que tiene un KSampler cuyo condicionamiento +positivo viene de un CLIPTextEncode, y le encadena la rama de ControlNet: + + LoadImage (imagen de control) ---+ + ControlNetLoader (modelo CN) ----+--> ControlNetApply --> KSampler.positive + CLIPTextEncode (positivo) -------+ + +ControlNetApply re-condiciona el positivo con la imagen de control (canny, depth, +pose, scribble, ...) y el KSampler se repunta para tomar ese condicionamiento. + +Es la version ENCADENABLE-sobre-dict del builder +comfyui_build_controlnet_workflow, que construye el grafo entero desde cero y NO +encadena. Reusa los mismos class_types/inputs (LoadImage, ControlNetLoader, +ControlNetApply). Pensada para componerse con inject_lora / inject_ipadapter / +inject_hires_fix sobre un mismo dict base (ver comfyui_compose_capabilities). + +Funcion pura: sin red, sin I/O. No muta el dict de entrada (copia profunda). +""" +import copy + + +def _is_link(v) -> bool: + """True si v es una conexion ComfyUI [node_id(str), output_index(int)].""" + return ( + isinstance(v, list) + and len(v) == 2 + and isinstance(v[0], str) + and isinstance(v[1], int) + ) + + +def comfyui_inject_controlnet( + workflow: dict, + control_image: str, + cn_name: str, + *, + strength: float = 1.0, + positive_node: str | None = None, +) -> dict: + """Devuelve una copia del workflow con una rama ControlNet inyectada. + + Localiza el condicionamiento positivo actual del KSampler (lo que hoy + alimenta su input `positive`), inserta LoadImage + ControlNetLoader + + ControlNetApply, y repunta el KSampler para que tome el positivo ya + condicionado por el ControlNet. + + Args: + workflow: dict en API format (ej. salida de + comfyui_build_txt2img_workflow). No se muta; se devuelve una copia. + control_image: nombre del archivo de la imagen de control dentro de la + carpeta input/ del servidor ComfyUI (lo carga el nodo LoadImage). + Suele ser un mapa preprocesado (canny/depth/openpose). No puede estar + vacio. + cn_name: nombre del modelo ControlNet en models/controlnet/ tal como lo + lista /object_info para ControlNetLoader (control_net_name). + strength: fuerza con la que el ControlNet condiciona la generacion + (0.0 = nula, 1.0 = plena). keyword-only. + positive_node: node_id cuya salida CONDITIONING (slot 0) se usara como + positivo de entrada del ControlNetApply. Si None, se detecta la + fuente que hoy alimenta el KSampler.positive. keyword-only. + + Returns: + copia del workflow con LoadImage + ControlNetLoader + ControlNetApply + anadidos (node_ids = max id numerico existente + 1, + 2, + 3) y el + KSampler.positive repuntado a la salida del ControlNetApply. + + Raises: + ValueError: si control_image esta vacio, si no se encuentra un KSampler, + o si no se puede determinar la fuente del condicionamiento positivo + (y no se pasa positive_node explicito). + """ + if not control_image: + raise ValueError( + "comfyui_inject_controlnet: control_image no puede estar vacio " + "(ControlNet necesita una imagen de control en input/)." + ) + + wf = copy.deepcopy(workflow) + + ksampler_id = next( + (nid for nid, n in wf.items() if str(n.get("class_type", "")).endswith("KSampler")), + None, + ) + if ksampler_id is None: + raise ValueError( + "comfyui_inject_controlnet: no se encontro ningun KSampler en el workflow." + ) + + ks_inputs = wf[ksampler_id].get("inputs", {}) + if positive_node is not None: + pos_src = [positive_node, 0] + elif _is_link(ks_inputs.get("positive")): + pos_src = list(ks_inputs["positive"]) + else: + raise ValueError( + "comfyui_inject_controlnet: no se pudo determinar la fuente del " + "condicionamiento positivo; pasa positive_node explicito." + ) + + numeric = [int(k) for k in wf.keys() if str(k).isdigit()] + base = (max(numeric) + 1) if numeric else len(wf) + 1 + load_id = str(base) + loader_id = str(base + 1) + apply_id = str(base + 2) + + wf[load_id] = { + "class_type": "LoadImage", + "inputs": {"image": control_image}, + } + wf[loader_id] = { + "class_type": "ControlNetLoader", + "inputs": {"control_net_name": cn_name}, + } + wf[apply_id] = { + "class_type": "ControlNetApply", + "inputs": { + "conditioning": list(pos_src), + "control_net": [loader_id, 0], + "image": [load_id, 0], + "strength": strength, + }, + } + + # Repunta el KSampler para que tome el positivo condicionado por el ControlNet. + wf[ksampler_id]["inputs"]["positive"] = [apply_id, 0] + return wf + + +if __name__ == "__main__": + import json + import os + import sys + + sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + from comfyui_build_txt2img_workflow import comfyui_build_txt2img_workflow + + base = comfyui_build_txt2img_workflow("dreamshaper_8.safetensors", "a knight, dramatic") + wf = comfyui_inject_controlnet( + base, "pose_canny.png", "control_v11p_sd15_canny.pth", strength=0.8 + ) + print(json.dumps(wf, indent=2)) diff --git a/python/functions/ml/comfyui_inject_ipadapter.md b/python/functions/ml/comfyui_inject_ipadapter.md new file mode 100644 index 00000000..56f15074 --- /dev/null +++ b/python/functions/ml/comfyui_inject_ipadapter.md @@ -0,0 +1,91 @@ +--- +name: comfyui_inject_ipadapter +kind: function +lang: py +domain: ml +version: "1.0.0" +purity: pure +signature: "def comfyui_inject_ipadapter(workflow: dict, ref_image: str, *, mode: str = \"style\", weight: float = 0.8, preset: str | None = None, weight_type: str | None = None, start_at: float = 0.0, end_at: float = 1.0, weight_faceidv2: float = 1.0, lora_strength: float = 0.6, combine_embeds: str = \"concat\", embeds_scaling: str = \"V only\", provider: str = \"CPU\", model_node: str | None = None) -> dict" +description: "Inyecta una rama IPAdapter (LoadImage + UnifiedLoader + IPAdapter, modo style o faceid) en un workflow ComfyUI ya construido (API format), repuntando el KSampler.model al MODEL condicionado por una imagen de referencia. La fuente del MODEL es la que hoy alimenta el KSampler (tras las LoRAs, no el checkpoint crudo). Version ENCADENABLE-sobre-dict del builder comfyui_build_ipadapter_workflow; reutiliza sus defaults de preset/weight_type. Pensada para componerse con inject_lora/inject_controlnet/inject_hires_fix. Pura: no muta el dict de entrada." +tags: [comfyui, comfyui-skill, ml, ipadapter, faceid, stable-diffusion, workflow] +uses_functions: [comfyui_build_ipadapter_workflow_py_ml] +uses_types: [] +returns: [] +returns_optional: false +error_type: "" +imports: [] +params: + - name: workflow + desc: "dict en API format (ej. salida de comfyui_build_txt2img_workflow, posiblemente con LoRAs). No se muta; se devuelve una copia." + - name: ref_image + desc: "Nombre del archivo de imagen de referencia en input/ del servidor. En faceid debe contener una cara nitida; en style es la imagen de estilo. No puede estar vacio (raise ValueError)." + - name: mode + desc: "'style' (transfiere estilo/composicion) o 'faceid' (rostro consistente via insightface + FaceID). keyword-only." + - name: weight + desc: "Peso de la influencia IPAdapter (0..1+). 0.8 buen punto de partida. keyword-only." + - name: preset + desc: "Preset del UnifiedLoader. None = default del modo ('STANDARD (medium strength)' style, 'FACEID PLUS V2' faceid). keyword-only." + - name: weight_type + desc: "Tipo de ponderacion del nodo IPAdapter/FaceID. None = default del modo ('standard' style, 'linear' faceid). keyword-only." + - name: start_at + desc: "Fraccion del sampling donde empieza a aplicar IPAdapter (0..1). keyword-only." + - name: end_at + desc: "Fraccion del sampling donde deja de aplicar (0..1). keyword-only." + - name: weight_faceidv2 + desc: "Peso del embedding FaceID v2 (solo mode='faceid'). keyword-only." + - name: lora_strength + desc: "Fuerza de la LoRA FaceID que carga el UnifiedLoaderFaceID (solo faceid). keyword-only." + - name: combine_embeds + desc: "Como combinar embeddings si hay varias caras ('concat'|'add'|...). Solo faceid. keyword-only." + - name: embeds_scaling + desc: "Escalado de embeddings ('V only'|'K+V'|...). Solo faceid. keyword-only." + - name: provider + desc: "Backend de insightface ('CPU'|'CUDA'|...). CPU por defecto para no competir por VRAM. Solo faceid. keyword-only." + - name: model_node + desc: "node_id cuya salida MODEL (slot 0) alimentara la rama IPAdapter. None = detecta la fuente del KSampler.model (CheckpointLoader como fallback). keyword-only." +output: "copia del workflow con LoadImage + (UnifiedLoader|UnifiedLoaderFaceID) + (IPAdapter|IPAdapterFaceID) insertados y el KSampler.model repuntado a la salida MODEL de la rama IPAdapter. node_ids = max id numerico + 1/2/3." +tested: true +tests: ["mode style inyecta IPAdapterUnifiedLoader + IPAdapter y repunta KSampler.model", "mode faceid inyecta IPAdapterUnifiedLoaderFaceID + IPAdapterFaceID", "la rama toma el MODEL actual del KSampler (tras loras, no el checkpoint)", "respeta weight/preset/weight_type", "no muta el dict de entrada (pureza)", "mode invalido lanza ValueError", "ref_image vacio lanza ValueError", "api format valido"] +test_file_path: "python/functions/ml/tests/test_comfyui_inject_ipadapter.py" +file_path: "python/functions/ml/comfyui_inject_ipadapter.py" +--- + +## Ejemplo + +```python +import sys, os +sys.path.insert(0, os.path.join(os.environ["HOME"], "fn_registry", "python", "functions")) +from ml.comfyui_build_txt2img_workflow import comfyui_build_txt2img_workflow +from ml.comfyui_inject_multi_lora import comfyui_inject_multi_lora +from ml.comfyui_inject_ipadapter import comfyui_inject_ipadapter + +base = comfyui_build_txt2img_workflow("dreamshaper_8.safetensors", "portrait of a knight") +wf = comfyui_inject_multi_lora(base, [{"name": "detail_tweaker_sd15.safetensors", "strength_model": 0.5}]) +wf = comfyui_inject_ipadapter(wf, "hero_face.png", mode="faceid", weight=0.9) +# KSampler.model viene de IPAdapterFaceID(model=ultimo LoraLoader, image=hero_face) +``` + +## Cuando usarla + +Cuando quieras condicionar un workflow txt2img ya construido con una **imagen de +referencia** (estilo en `style`, rostro consistente en `faceid`) encadenando +sobre el mismo dict que las LoRAs, sin reconstruir el grafo. Es la pieza +IPAdapter del mixer `comfyui_compose_capabilities`. Para un workflow IPAdapter +aislado desde cero usa `comfyui_build_ipadapter_workflow`. + +## Gotchas + +- Pura: no muta el `workflow` de entrada y NO valida que los modelos IPAdapter + esten instalados. La imagen de referencia debe estar subida al `input/` del + servidor ANTES de submit. +- **ref_image obligatorio** y **mode in {style, faceid}**: ambos validados con + `ValueError`. Es el error path tipico al activar la capacidad sin referencia. +- **Compatibilidad de checkpoint**: usa modelos IPAdapter SD1.5 con checkpoints + SD1.5 (dreamshaper_8) y SDXL con SDXL. Mezclar familias da un error de shape en + ejecucion. +- `faceid` carga insightface (provider CPU por defecto) + un FaceID .bin + su + LoRA: es la rama mas pesada. En 8GB lowvram funciona pero apilarla con + ControlNet + hires + facedetailer a la vez puede dar OOM — baja resolucion o + desactiva capacidades. +- Se aplica DESPUES de las LoRAs (toma el MODEL actual del KSampler), que es el + orden correcto del mixer. diff --git a/python/functions/ml/comfyui_inject_ipadapter.py b/python/functions/ml/comfyui_inject_ipadapter.py new file mode 100644 index 00000000..fbd50063 --- /dev/null +++ b/python/functions/ml/comfyui_inject_ipadapter.py @@ -0,0 +1,218 @@ +"""Inyecta una rama IPAdapter en un workflow ComfyUI ya construido (API format). + +Toma un workflow en API format (dict, p.ej. salida de +comfyui_build_txt2img_workflow, ya con LoRAs encadenadas si las hay) y le injerta +la rama IPAdapter del custom node ComfyUI_IPAdapter_plus (cubiq), repuntando el +KSampler para que su MODEL venga condicionado por una imagen de referencia: + +- mode='style': IPAdapterUnifiedLoader + IPAdapter. La imagen de referencia + transfiere estilo/composicion al resultado (image prompt clasico). +- mode='faceid': IPAdapterUnifiedLoaderFaceID + IPAdapterFaceID. Usa insightface + para extraer el embedding de la cara de la referencia e imponer un rostro + consistente en el personaje generado. + +La fuente del MODEL es la que HOY alimenta el KSampler.model (tras las LoRAs, no +el checkpoint crudo): asi el IPAdapter se aplica sobre el modelo ya modificado +por los LoRAs, en el orden correcto del mixer. + +Es la version ENCADENABLE-sobre-dict del builder +comfyui_build_ipadapter_workflow, que construye el grafo entero desde cero y NO +encadena. Reusa sus constantes de preset/weight_type por defecto. Pensada para +componerse con inject_lora / inject_controlnet / inject_hires_fix sobre un mismo +dict base (ver comfyui_compose_capabilities). + +Funcion pura: sin red, sin I/O. No muta el dict de entrada (copia profunda). Los +class_type/inputs estan verificados contra /object_info del servidor (IPAdapter +plus), reutilizando exactamente los del builder. +""" +from __future__ import annotations + +import copy +import os +import sys + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +# Reutiliza los defaults de preset/weight_type del builder para no duplicarlos. +from ml.comfyui_build_ipadapter_workflow import ( # noqa: E402 + _DEFAULT_PRESET, + _DEFAULT_WEIGHT_TYPE, +) + + +def _is_link(v) -> bool: + """True si v es una conexion ComfyUI [node_id(str), output_index(int)].""" + return ( + isinstance(v, list) + and len(v) == 2 + and isinstance(v[0], str) + and isinstance(v[1], int) + ) + + +def comfyui_inject_ipadapter( + workflow: dict, + ref_image: str, + *, + mode: str = "style", + weight: float = 0.8, + preset: str | None = None, + weight_type: str | None = None, + start_at: float = 0.0, + end_at: float = 1.0, + weight_faceidv2: float = 1.0, + lora_strength: float = 0.6, + combine_embeds: str = "concat", + embeds_scaling: str = "V only", + provider: str = "CPU", + model_node: str | None = None, +) -> dict: + """Devuelve una copia del workflow con una rama IPAdapter inyectada. + + Args: + workflow: dict en API format (ej. salida de + comfyui_build_txt2img_workflow, posiblemente con LoRAs). No se muta; + se devuelve una copia. + ref_image: nombre del archivo de imagen de referencia en el directorio + input/ del servidor ComfyUI (lo carga un nodo LoadImage). En faceid + debe contener una cara nitida; en style es la imagen de estilo. No + puede estar vacio. + mode: 'style' (transfiere estilo/composicion) o 'faceid' (rostro + consistente via insightface + FaceID). keyword-only. + weight: peso de la influencia IPAdapter (0..1+). 0.8 es un buen punto de + partida; sube para mas parecido, baja para mas libertad del prompt. + preset: preset del UnifiedLoader. Si None usa el default del modo + ('STANDARD (medium strength)' para style, 'FACEID PLUS V2' para faceid). + weight_type: tipo de ponderacion del nodo IPAdapter/FaceID. Si None usa el + default del modo ('standard' para style, 'linear' para faceid). + start_at: fraccion del sampling donde empieza a aplicar IPAdapter (0..1). + end_at: fraccion del sampling donde deja de aplicar (0..1). + weight_faceidv2: peso del embedding FaceID v2 (solo mode='faceid'). + lora_strength: fuerza de la LoRA FaceID que carga el UnifiedLoaderFaceID + (solo mode='faceid'). + combine_embeds: como combinar embeddings si hay varias caras + ('concat'|'add'|'subtract'|'average'|'norm average'). Solo faceid. + embeds_scaling: escalado de embeddings ('V only'|'K+V'|...). Solo faceid. + provider: backend de insightface ('CPU'|'CUDA'|...). CPU por defecto para + no competir por VRAM con el modelo de difusion. Solo faceid. + model_node: node_id cuya salida MODEL (slot 0) alimentara la rama + IPAdapter. Si None, se detecta la fuente que hoy alimenta el + KSampler.model (con el CheckpointLoader como fallback). keyword-only. + + Returns: + copia del workflow con LoadImage + (UnifiedLoader|UnifiedLoaderFaceID) + + (IPAdapter|IPAdapterFaceID) anadidos (node_ids = max id numerico + 1/2/3) + y el KSampler.model repuntado a la salida MODEL de la rama IPAdapter. + + Raises: + ValueError: si mode no es 'style' ni 'faceid', si ref_image esta vacio, si + no se encuentra un KSampler, o si no se puede determinar la fuente del + MODEL (y no se pasa model_node explicito). + """ + if mode not in ("style", "faceid"): + raise ValueError( + f"comfyui_inject_ipadapter: mode debe ser 'style' o 'faceid', no {mode!r}" + ) + if not ref_image: + raise ValueError("comfyui_inject_ipadapter: ref_image no puede estar vacio") + + wf = copy.deepcopy(workflow) + + ksampler_id = next( + (nid for nid, n in wf.items() if str(n.get("class_type", "")).endswith("KSampler")), + None, + ) + if ksampler_id is None: + raise ValueError( + "comfyui_inject_ipadapter: no se encontro ningun KSampler en el workflow." + ) + + ks_inputs = wf[ksampler_id].get("inputs", {}) + if model_node is not None: + model_src = [model_node, 0] + elif _is_link(ks_inputs.get("model")): + model_src = list(ks_inputs["model"]) + else: + ckpt = next( + (nid for nid, n in wf.items() + if str(n.get("class_type", "")).startswith("CheckpointLoader")), + None, + ) + if ckpt is None: + raise ValueError( + "comfyui_inject_ipadapter: no se pudo determinar la fuente del " + "MODEL; pasa model_node explicito." + ) + model_src = [ckpt, 0] + + numeric = [int(k) for k in wf.keys() if str(k).isdigit()] + base = (max(numeric) + 1) if numeric else len(wf) + 1 + load_id = str(base) + loader_id = str(base + 1) + apply_id = str(base + 2) + + used_preset = preset if preset is not None else _DEFAULT_PRESET[mode] + used_wtype = weight_type if weight_type is not None else _DEFAULT_WEIGHT_TYPE[mode] + + wf[load_id] = { + "class_type": "LoadImage", + "inputs": {"image": ref_image}, + } + + if mode == "style": + wf[loader_id] = { + "class_type": "IPAdapterUnifiedLoader", + "inputs": {"model": list(model_src), "preset": used_preset}, + } + wf[apply_id] = { + "class_type": "IPAdapter", + "inputs": { + "model": [loader_id, 0], + "ipadapter": [loader_id, 1], + "image": [load_id, 0], + "weight": weight, + "start_at": start_at, + "end_at": end_at, + "weight_type": used_wtype, + }, + } + else: # faceid + wf[loader_id] = { + "class_type": "IPAdapterUnifiedLoaderFaceID", + "inputs": { + "model": list(model_src), + "preset": used_preset, + "lora_strength": lora_strength, + "provider": provider, + }, + } + wf[apply_id] = { + "class_type": "IPAdapterFaceID", + "inputs": { + "model": [loader_id, 0], + "ipadapter": [loader_id, 1], + "image": [load_id, 0], + "weight": weight, + "weight_faceidv2": weight_faceidv2, + "weight_type": used_wtype, + "combine_embeds": combine_embeds, + "start_at": start_at, + "end_at": end_at, + "embeds_scaling": embeds_scaling, + }, + } + + # Repunta el KSampler para que tome el MODEL condicionado por IPAdapter. + wf[ksampler_id]["inputs"]["model"] = [apply_id, 0] + return wf + + +if __name__ == "__main__": + import json + + from ml.comfyui_build_txt2img_workflow import comfyui_build_txt2img_workflow + + base = comfyui_build_txt2img_workflow("dreamshaper_8.safetensors", "a knight, cinematic") + wf_style = comfyui_inject_ipadapter(base, "style_ref.png", mode="style", weight=0.8) + wf_face = comfyui_inject_ipadapter(base, "face_ref.png", mode="faceid", weight=0.9) + print(json.dumps({"style_nodes": list(wf_style), "faceid_nodes": list(wf_face)}, indent=2)) diff --git a/python/functions/ml/tests/test_comfyui_compose_capabilities.py b/python/functions/ml/tests/test_comfyui_compose_capabilities.py new file mode 100644 index 00000000..399448e4 --- /dev/null +++ b/python/functions/ml/tests/test_comfyui_compose_capabilities.py @@ -0,0 +1,146 @@ +"""Tests del mixer comfyui_compose_capabilities (funcion pura). + +Cubre: base intacto sin capacidades, combinaciones (solo loras; loras+facedetailer; +ipadapter+lora; hires), error paths (controlnet/ipadapter incompatibles), pureza, +conexiones validas en todas, y que activar una capacidad cambia el grafo. +""" + +import os +import sys + +sys.path.insert(0, os.path.dirname(__file__)) +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) + +import pytest + +from ml.comfyui_build_txt2img_workflow import comfyui_build_txt2img_workflow +from ml.comfyui_compose_capabilities import comfyui_compose_capabilities +from _comfyui_wf_assert import assert_api_format, class_types, node_by_ct + + +def _base(): + return comfyui_build_txt2img_workflow("dreamshaper_8.safetensors", "POS", "NEG") + + +def _count_ct(wf, ct): + return sum(1 for n in wf.values() if n["class_type"] == ct) + + +def test_sin_capacidades_base_intacto(): + base = _base() + out = comfyui_compose_capabilities(base) + assert_api_format(out) + # Mismos class_types y mismo numero de nodos que el base. + assert class_types(out) == class_types(base) + assert set(out) == set(base) + + +def test_solo_loras_encadena(): + base = _base() + out = comfyui_compose_capabilities( + base, + loras=[ + {"name": "3d_render_redmond_sd15.safetensors", "strength_model": 0.9}, + {"name": "detail_tweaker_sd15.safetensors", "strength_model": 0.5}, + ], + ) + assert_api_format(out) + assert _count_ct(out, "LoraLoader") == 2 + # El KSampler cuelga del ultimo LoraLoader. + ks = node_by_ct(out, "KSampler") + assert out[ks["inputs"]["model"][0]]["class_type"] == "LoraLoader" + + +def test_loras_mas_facedetailer(): + out = comfyui_compose_capabilities( + _base(), + loras=[{"name": "detail_tweaker_sd15.safetensors", "strength_model": 0.5}], + facedetailer={"denoise": 0.45}, + ) + assert_api_format(out) + assert _count_ct(out, "LoraLoader") == 1 + assert _count_ct(out, "FaceDetailer") == 1 + # Tras facedetailer queda un unico SaveImage (el del detailer). + assert _count_ct(out, "SaveImage") == 1 + save = node_by_ct(out, "SaveImage") + assert out[save["inputs"]["images"][0]]["class_type"] == "FaceDetailer" + + +def test_ipadapter_mas_lora_toma_model_del_lora(): + out = comfyui_compose_capabilities( + _base(), + loras=[{"name": "a.safetensors"}], + ipadapter={"ref_image": "ref.png", "mode": "style", "weight": 0.8}, + ) + assert_api_format(out) + lora_id = next(nid for nid, n in out.items() if n["class_type"] == "LoraLoader") + loader = node_by_ct(out, "IPAdapterUnifiedLoader") + assert loader["inputs"]["model"] == [lora_id, 0] + # KSampler.model cuelga del IPAdapter. + ks = node_by_ct(out, "KSampler") + assert out[ks["inputs"]["model"][0]]["class_type"] == "IPAdapter" + + +def test_hires_anade_upscale(): + out = comfyui_compose_capabilities(_base(), hires={}) + assert_api_format(out) + assert "UltimateSDUpscale" in class_types(out) + assert "UpscaleModelLoader" in class_types(out) + + +def test_facedetailer_detecta_checkpoint_y_prompt(): + # Sin pasar ckpt_name ni positive, se detectan del workflow. + out = comfyui_compose_capabilities(_base(), facedetailer={}) + fd = node_by_ct(out, "FaceDetailer") + # El FaceDetailer usa el checkpoint del base (reutilizado). + assert "FaceDetailer" in class_types(out) + # El CLIPTextEncode positivo del detailer lleva el texto del base. + pos_text = out[fd["inputs"]["positive"][0]]["inputs"]["text"] + assert pos_text == "POS" + + +def test_controlnet_sin_imagen_propaga_valueerror(): + with pytest.raises(ValueError): + comfyui_compose_capabilities(_base(), controlnet={"cn_name": "m.pth"}) + + +def test_ipadapter_sin_ref_propaga_valueerror(): + with pytest.raises(ValueError): + comfyui_compose_capabilities(_base(), ipadapter={"mode": "style"}) + + +def test_no_muta_base(): + base = _base() + snapshot = set(base) + comfyui_compose_capabilities( + base, + loras=[{"name": "a.safetensors"}], + facedetailer={}, + ) + assert set(base) == snapshot + + +def test_activar_capacidad_cambia_grafo(): + base = _base() + plain = comfyui_compose_capabilities(base) + with_lora = comfyui_compose_capabilities(base, loras=[{"name": "a.safetensors"}]) + with_fd = comfyui_compose_capabilities(base, facedetailer={}) + # Cada activacion introduce class_types nuevos respecto al base. + assert "LoraLoader" not in class_types(plain) + assert "LoraLoader" in class_types(with_lora) + assert "FaceDetailer" not in class_types(plain) + assert "FaceDetailer" in class_types(with_fd) + assert len(set(with_lora)) > len(set(plain)) + + +def test_combinacion_controlnet_ipadapter_lora_valida(): + # ControlNet + IPAdapter + LoRA juntos producen api format valido. + out = comfyui_compose_capabilities( + _base(), + loras=[{"name": "a.safetensors"}], + controlnet={"control_image": "ctrl.png", "cn_name": "cn.pth", "strength": 0.6}, + ipadapter={"ref_image": "ref.png", "mode": "style"}, + ) + assert_api_format(out) + cts = class_types(out) + assert {"LoraLoader", "ControlNetApply", "IPAdapter"} <= cts diff --git a/python/functions/ml/tests/test_comfyui_inject_controlnet.py b/python/functions/ml/tests/test_comfyui_inject_controlnet.py new file mode 100644 index 00000000..9d4d0d93 --- /dev/null +++ b/python/functions/ml/tests/test_comfyui_inject_controlnet.py @@ -0,0 +1,70 @@ +"""Tests de estructura, repunte y pureza para comfyui_inject_controlnet (funcion pura).""" + +import os +import sys + +sys.path.insert(0, os.path.dirname(__file__)) +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) + +import pytest + +from ml.comfyui_build_txt2img_workflow import comfyui_build_txt2img_workflow +from ml.comfyui_inject_controlnet import comfyui_inject_controlnet +from _comfyui_wf_assert import assert_api_format, class_types, node_by_ct + + +def _base(): + return comfyui_build_txt2img_workflow("ck.safetensors", "POS", "NEG") + + +def test_inyecta_tres_nodos_y_repunta_positive(): + base = _base() + inj = comfyui_inject_controlnet(base, "ctrl.png", "cn_canny.pth", strength=0.7) + assert_api_format(inj) + cts = class_types(inj) + assert {"LoadImage", "ControlNetLoader", "ControlNetApply"} <= cts + # El KSampler.positive ahora viene de un ControlNetApply. + ks = node_by_ct(inj, "KSampler") + pos_link = ks["inputs"]["positive"] + assert inj[pos_link[0]]["class_type"] == "ControlNetApply" + + +def test_controlnetapply_toma_el_positivo_original(): + base = _base() + # En el base, KSampler.positive apunta al CLIPTextEncode positivo (nodo "6"). + orig_pos = base["3"]["inputs"]["positive"] + inj = comfyui_inject_controlnet(base, "ctrl.png", "cn_canny.pth") + apply_node = node_by_ct(inj, "ControlNetApply") + assert apply_node["inputs"]["conditioning"] == orig_pos + # control_net e image apuntan al loader y al LoadImage. + assert inj[apply_node["inputs"]["control_net"][0]]["class_type"] == "ControlNetLoader" + assert inj[apply_node["inputs"]["image"][0]]["class_type"] == "LoadImage" + + +def test_respeta_strength_y_cn_name(): + inj = comfyui_inject_controlnet(_base(), "c.png", "mymodel.pth", strength=0.42) + apply_node = node_by_ct(inj, "ControlNetApply") + loader = node_by_ct(inj, "ControlNetLoader") + load = node_by_ct(inj, "LoadImage") + assert apply_node["inputs"]["strength"] == 0.42 + assert loader["inputs"]["control_net_name"] == "mymodel.pth" + assert load["inputs"]["image"] == "c.png" + + +def test_no_muta_entrada(): + base = _base() + snapshot = {k: dict(v) for k, v in base.items()} + comfyui_inject_controlnet(base, "c.png", "m.pth") + assert set(base) == set(snapshot) + assert base["3"]["inputs"]["positive"] == snapshot["3"]["inputs"]["positive"] + + +def test_control_image_vacio_lanza(): + with pytest.raises(ValueError): + comfyui_inject_controlnet(_base(), "", "m.pth") + + +def test_sin_ksampler_lanza(): + wf = {"4": {"class_type": "CheckpointLoaderSimple", "inputs": {"ckpt_name": "x"}}} + with pytest.raises(ValueError): + comfyui_inject_controlnet(wf, "c.png", "m.pth") diff --git a/python/functions/ml/tests/test_comfyui_inject_ipadapter.py b/python/functions/ml/tests/test_comfyui_inject_ipadapter.py new file mode 100644 index 00000000..15cc5b4c --- /dev/null +++ b/python/functions/ml/tests/test_comfyui_inject_ipadapter.py @@ -0,0 +1,79 @@ +"""Tests de estructura, repunte y pureza para comfyui_inject_ipadapter (funcion pura).""" + +import os +import sys + +sys.path.insert(0, os.path.dirname(__file__)) +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) + +import pytest + +from ml.comfyui_build_txt2img_workflow import comfyui_build_txt2img_workflow +from ml.comfyui_inject_multi_lora import comfyui_inject_multi_lora +from ml.comfyui_inject_ipadapter import comfyui_inject_ipadapter +from _comfyui_wf_assert import assert_api_format, class_types, node_by_ct + + +def _base(): + return comfyui_build_txt2img_workflow("ck.safetensors", "POS", "NEG") + + +def test_style_inyecta_unified_loader_y_repunta_model(): + inj = comfyui_inject_ipadapter(_base(), "ref.png", mode="style", weight=0.8) + assert_api_format(inj) + cts = class_types(inj) + assert "IPAdapterUnifiedLoader" in cts + assert "IPAdapter" in cts + ks = node_by_ct(inj, "KSampler") + assert inj[ks["inputs"]["model"][0]]["class_type"] == "IPAdapter" + + +def test_faceid_inyecta_faceid_nodes(): + inj = comfyui_inject_ipadapter(_base(), "face.png", mode="faceid", weight=0.9) + assert_api_format(inj) + cts = class_types(inj) + assert "IPAdapterUnifiedLoaderFaceID" in cts + assert "IPAdapterFaceID" in cts + ks = node_by_ct(inj, "KSampler") + assert inj[ks["inputs"]["model"][0]]["class_type"] == "IPAdapterFaceID" + + +def test_toma_model_actual_tras_loras(): + # Con LoRAs encadenados, el IPAdapter debe colgar del ultimo LoraLoader, + # no del checkpoint crudo. + base = _base() + with_lora = comfyui_inject_multi_lora(base, [{"name": "a.safetensors"}]) + lora_id = next(nid for nid, n in with_lora.items() if n["class_type"] == "LoraLoader") + inj = comfyui_inject_ipadapter(with_lora, "ref.png", mode="style") + loader = node_by_ct(inj, "IPAdapterUnifiedLoader") + assert loader["inputs"]["model"] == [lora_id, 0] + + +def test_respeta_weight_y_preset(): + inj = comfyui_inject_ipadapter( + _base(), "ref.png", mode="style", weight=0.55, preset="PLUS (high strength)" + ) + ip = node_by_ct(inj, "IPAdapter") + loader = node_by_ct(inj, "IPAdapterUnifiedLoader") + load = node_by_ct(inj, "LoadImage") + assert ip["inputs"]["weight"] == 0.55 + assert loader["inputs"]["preset"] == "PLUS (high strength)" + assert load["inputs"]["image"] == "ref.png" + + +def test_no_muta_entrada(): + base = _base() + snapshot = set(base) + comfyui_inject_ipadapter(base, "ref.png") + assert set(base) == snapshot + assert base["3"]["inputs"]["model"] == ["4", 0] + + +def test_mode_invalido_lanza(): + with pytest.raises(ValueError): + comfyui_inject_ipadapter(_base(), "ref.png", mode="bogus") + + +def test_ref_image_vacio_lanza(): + with pytest.raises(ValueError): + comfyui_inject_ipadapter(_base(), "", mode="style") diff --git a/python/functions/pipelines/comfyui_generate_mixed_oneshot.md b/python/functions/pipelines/comfyui_generate_mixed_oneshot.md new file mode 100644 index 00000000..69f922a2 --- /dev/null +++ b/python/functions/pipelines/comfyui_generate_mixed_oneshot.md @@ -0,0 +1,100 @@ +--- +name: comfyui_generate_mixed_oneshot +kind: pipeline +lang: py +domain: pipelines +version: "1.0.0" +purity: impure +signature: "def comfyui_generate_mixed_oneshot(base, subject: str, *, capabilities: dict | None = None, server: str = \"127.0.0.1:8188\", dest: str | None = None, seed: int = 0, judge: bool = True, checkpoint: str | None = None, negative: str = \"\", library_dir: str | None = None, wait_timeout: float = 600.0) -> dict" +description: "Pipeline one-shot del mixer comfyui-skill: parte de un workflow base (skill slug, builder 'txt2img', o dict ya construido), aplica el conjunto de capacidades elegido con comfyui_compose_capabilities (LoRAs + ControlNet + IPAdapter + hires + FaceDetailer, cada una activable), encola, espera, descarga el PNG y si judge=True lo puntua con el panel comfyui-judge. Promueve a una llamada la secuencia base->compose->submit->wait->fetch->judge (issue 0087). Devuelve {ok, prompt_id, image_path, capabilities_active, judge, error}. Impuro: HTTP + disco + API Anthropic." +tags: [comfyui, comfyui-skill, pipelines, mixer, txt2img, lora, ipadapter, controlnet, facedetailer, judge, launcher] +uses_functions: [comfyui_build_txt2img_workflow_py_ml, comfyui_load_skill_py_ml, comfyui_build_skill_workflow_py_ml, comfyui_compose_capabilities_py_ml, comfyui_submit_workflow_py_ml, comfyui_wait_result_py_ml, comfyui_fetch_output_image_py_ml, comfyui_judge_image_py_ml] +uses_types: [] +returns: [] +returns_optional: false +error_type: error_py_core +imports: [comfyui_build_txt2img_workflow_py_ml, comfyui_compose_capabilities_py_ml, comfyui_submit_workflow_py_ml, comfyui_wait_result_py_ml, comfyui_fetch_output_image_py_ml, comfyui_judge_image_py_ml] +params: + - name: base + desc: "Workflow base: dict (API format ya construido), la cadena 'txt2img' (construye con checkpoint+subject), o un slug de skill guardada (carga su receta y la compila con subject)." + - name: subject + desc: "Sujeto/prompt principal. En 'txt2img' es el prompt positivo; en una skill sustituye {subject} en el scaffold." + - name: capabilities + desc: "Dict de capacidades a mezclar tal cual las acepta comfyui_compose_capabilities: {loras, controlnet, ipadapter, hires, facedetailer}. Ausentes/None = desactivadas. None = solo el base. keyword-only." + - name: server + desc: "host:port del servidor ComfyUI (sin esquema). keyword-only." + - name: dest + desc: "Directorio local donde guardar el PNG (None = cwd). keyword-only." + - name: seed + desc: "Semilla de generacion. keyword-only." + - name: judge + desc: "Si True, puntua el PNG con el panel comfyui-judge. keyword-only." + - name: checkpoint + desc: "Checkpoint para base='txt2img' (obligatorio en ese caso). keyword-only." + - name: negative + desc: "Prompt negativo para base='txt2img'. keyword-only." + - name: library_dir + desc: "Raiz de la libreria de skills (base = slug). keyword-only." + - name: wait_timeout + desc: "Segundos maximos esperando al servidor. keyword-only." +output: "dict {ok, base, prompt_id, image_path, prompt_resolved, capabilities_active, judge, error}. capabilities_active = lista de capacidades activadas (evidencia de la mezcla). judge = {verdict, score, votes} o None. Si falla un paso, ok=False y error explica cual." +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/pipelines/comfyui_generate_mixed_oneshot.py" +--- + +# comfyui_generate_mixed_oneshot + +One-shot del **mixer** del grupo [`comfyui-skill`](../../../docs/capabilities/comfyui-skill.md): +de un workflow base + un conjunto de capacidades activables a un PNG **ya puntuado** por el +panel [`comfyui-judge`](../../../docs/capabilities/comfyui-judge.md), en una llamada. El bucle +del juez afina qué capacidades y pesos dan mejor resultado. + +## Ejemplo + +```python +import sys, os +sys.path.insert(0, os.path.join(os.environ["HOME"], "fn_registry", "python", "functions")) +from pipelines.comfyui_generate_mixed_oneshot import comfyui_generate_mixed_oneshot + +# txt2img dreamshaper + 2 LoRAs + FaceDetailer (3 capacidades), juzgado: +res = comfyui_generate_mixed_oneshot( + "txt2img", + "a heroic knight in 3d render style, dramatic lighting", + checkpoint="dreamshaper_8.safetensors", + capabilities={ + "loras": [ + {"name": "3d_render_redmond_sd15.safetensors", "strength_model": 0.9}, + {"name": "detail_tweaker_sd15.safetensors", "strength_model": 0.5}, + ], + "facedetailer": {"denoise": 0.45}, + # "ipadapter": {"ref_image": "face.png", "mode": "faceid"}, # activar/desactivar + # "hires": {"upscale_by": 1.5}, + }, + dest="/tmp/comfy_mixed", seed=42, judge=True, +) +print(res["ok"], res["prompt_id"], res["capabilities_active"], res["judge"]) +``` + +## Cuando usarla + +Cuando quieras **generar mezclando varias capacidades** y obtener de vuelta el +PNG ya puntuado, en una sola llamada — para iterar (activar/desactivar/ajustar +capacidades) guiado por el score del juez. Es la promocion a one-shot de +`compose_capabilities` + el ciclo submit/wait/fetch/judge. + +## Gotchas + +- Impuro: necesita el servidor ComfyUI vivo (`server`) y, si `judge=True`, la API + Anthropic para el juez critico. Las imagenes de referencia/control de IPAdapter + y ControlNet deben estar en el `input/` del servidor antes de llamar. +- `base='txt2img'` exige `checkpoint`. Un slug de skill exige que la skill exista + en `library_dir`. Un `base` dict se usa tal cual. +- Hereda la limitacion del mixer: **hires + facedetailer juntos no encadenan** + (ver `comfyui_compose_capabilities`). Activa uno U otro. +- En 8GB lowvram, apilar muchas capacidades (IPAdapter FaceID + ControlNet + hires + + facedetailer) puede dar OOM y `wait` devolvera el error del servidor: baja + resolucion (`width`/`height` via un base dict) o reduce capacidades. +- Si el juez falla pero la imagen se genero, `ok=True` con `error` describiendo el + fallo del panel (la imagen no se pierde). diff --git a/python/functions/pipelines/comfyui_generate_mixed_oneshot.py b/python/functions/pipelines/comfyui_generate_mixed_oneshot.py new file mode 100644 index 00000000..5b39c129 --- /dev/null +++ b/python/functions/pipelines/comfyui_generate_mixed_oneshot.py @@ -0,0 +1,220 @@ +"""comfyui_generate_mixed_oneshot — mezcla de capacidades + subject -> PNG juzgado. + +One-shot del "mixer" del grupo `comfyui-skill`: parte de un workflow base (una +skill guardada, el builder `txt2img`, o un dict ya construido), le aplica el +conjunto de capacidades elegido con `comfyui_compose_capabilities` (LoRAs + +ControlNet + IPAdapter + hires + FaceDetailer, cada una activable), encola, +espera, descarga el PNG y (si `judge=True`) lo puntua con el panel +`comfyui-judge`. Promueve a una sola llamada la secuencia repetida +base -> compose -> submit -> wait -> fetch -> judge (issue 0087). + +Compone funciones del registry: + + comfyui_build_txt2img_workflow_py_ml (base 'txt2img') + comfyui_load_skill_py_ml (base = slug de skill) + comfyui_build_skill_workflow_py_ml (receta + subject -> workflow, base = skill) + comfyui_compose_capabilities_py_ml (mezcla de capacidades, PURA) + comfyui_submit_workflow_py_ml (POST /prompt) + comfyui_wait_result_py_ml (poll /history) + comfyui_fetch_output_image_py_ml (GET /view -> disco) + comfyui_judge_image_py_ml (panel multi-juez) + +Pipeline impuro: red (HTTP) + escritura en disco + (si juzga) API Anthropic. +""" +from __future__ import annotations + +import os +import sys + +_FUNCTIONS_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if _FUNCTIONS_ROOT not in sys.path: + sys.path.insert(0, _FUNCTIONS_ROOT) + +from ml.comfyui_build_txt2img_workflow import comfyui_build_txt2img_workflow +from ml.comfyui_compose_capabilities import comfyui_compose_capabilities +from ml.comfyui_fetch_output_image import comfyui_fetch_output_image +from ml.comfyui_judge_image import comfyui_judge_image +from ml.comfyui_submit_workflow import comfyui_submit_workflow +from ml.comfyui_wait_result import comfyui_wait_result + + +def _resolve_skill_prompt(recipe: dict, subject: str) -> str: + """Prompt positivo resuelto de una receta de skill (para el juez de fidelidad).""" + scaffold = recipe.get("prompt_scaffold") or {} + positive = str(scaffold.get("positive", "") or "") + if "{subject}" in positive: + positive = positive.replace("{subject}", subject) + elif not positive: + positive = subject + else: + positive = f"{subject}, {positive}" + triggers = scaffold.get("trigger_words") or [] + if triggers: + positive = ", ".join(list(triggers) + [positive]) if positive else ", ".join(triggers) + return positive + + +def _resolve_base(base, subject, *, checkpoint, negative, seed, library_dir): + """Devuelve (workflow_base, prompt_resolved). Despacha por tipo de `base`. + + - dict -> se usa tal cual; prompt_resolved = subject. + - 'txt2img' -> comfyui_build_txt2img_workflow(checkpoint, subject, negative). + - otro str -> slug de skill: load_skill + build_skill_workflow. + """ + if isinstance(base, dict): + return base, subject + + if base == "txt2img": + if not checkpoint: + raise ValueError( + "comfyui_generate_mixed_oneshot: base='txt2img' requiere checkpoint." + ) + wf = comfyui_build_txt2img_workflow(checkpoint, subject, negative, seed=seed) + return wf, subject + + # Cualquier otro str se trata como slug de skill. + from ml.comfyui_build_skill_workflow import build_skill_workflow + from ml.comfyui_load_skill import comfyui_load_skill + + loaded = comfyui_load_skill(base, library_dir=library_dir) + if not loaded.get("ok"): + raise ValueError(f"load_skill('{base}') fallo: {loaded.get('error')}") + recipe = loaded["recipe"] + wf = build_skill_workflow(recipe, subject, seed=seed) + return wf, _resolve_skill_prompt(recipe, subject) + + +def comfyui_generate_mixed_oneshot( + base, + subject: str, + *, + capabilities: dict | None = None, + server: str = "127.0.0.1:8188", + dest: str | None = None, + seed: int = 0, + judge: bool = True, + checkpoint: str | None = None, + negative: str = "", + library_dir: str | None = None, + wait_timeout: float = 600.0, +) -> dict: + """Genera (y opcionalmente juzga) una imagen mezclando capacidades, end-to-end. + + Args: + base: workflow base. dict (workflow API format ya construido), la cadena + 'txt2img' (construye uno con `checkpoint`+`subject`), o un slug de + skill guardada (carga su receta y la compila con `subject`). + subject: sujeto/prompt principal. En 'txt2img' es el prompt positivo; en + una skill sustituye `{subject}` en el scaffold. + capabilities: dict con las capacidades a mezclar, tal cual las acepta + comfyui_compose_capabilities: {loras, controlnet, ipadapter, hires, + facedetailer}. Las ausentes/None quedan desactivadas. None = sin + mezcla (solo el base). keyword-only. + server: host:port del servidor ComfyUI (sin esquema). keyword-only. + dest: directorio local donde guardar el PNG (None = cwd). keyword-only. + seed: semilla de generacion. keyword-only. + judge: si True, puntua el PNG con el panel comfyui-judge. keyword-only. + checkpoint: checkpoint para base='txt2img' (obligatorio en ese caso). + keyword-only. + negative: prompt negativo para base='txt2img'. keyword-only. + library_dir: raiz de la libreria de skills (base = slug). keyword-only. + wait_timeout: segundos maximos esperando al servidor. keyword-only. + + Returns: + dict {ok, base, prompt_id, image_path, prompt_resolved, capabilities_active, + judge, error}. capabilities_active = lista de las capacidades activadas + (evidencia de la mezcla). judge = {verdict, score, votes} o None (si + judge=False o el panel falla). Si falla un paso, ok=False y error explica + cual. + """ + base_label = base if isinstance(base, str) else "dict" + caps = capabilities or {} + caps_active = [k for k, v in caps.items() if v is not None] + out = {"ok": False, "base": base_label, "prompt_id": "", "image_path": "", + "prompt_resolved": "", "capabilities_active": caps_active, + "judge": None, "error": ""} + + # 1. Resolver el workflow base (skill / txt2img / dict). + try: + base_wf, prompt_resolved = _resolve_base( + base, subject, checkpoint=checkpoint, negative=negative, + seed=seed, library_dir=library_dir, + ) + except (ValueError, KeyError) as exc: + return {**out, "error": f"resolver base fallo: {exc}"} + out["prompt_resolved"] = prompt_resolved + + # 2. Mezclar las capacidades (funcion pura del registry). + try: + workflow = comfyui_compose_capabilities(base_wf, **caps) + except (ValueError, TypeError) as exc: + return {**out, "error": f"compose_capabilities fallo: {exc}"} + + # 3. Encolar. + try: + sub = comfyui_submit_workflow(workflow, server=server) + prompt_id = sub["prompt_id"] + except (RuntimeError, KeyError) as exc: + return {**out, "error": f"submit fallo: {exc}"} + out["prompt_id"] = prompt_id + + # 4. Esperar a que termine. + try: + outputs = comfyui_wait_result(prompt_id, server=server, timeout=wait_timeout) + except (TimeoutError, RuntimeError) as exc: + return {**out, "error": f"wait fallo: {exc}"} + + # 5. Localizar el primer PNG en los outputs. + img = None + for node_out in outputs.values(): + images = node_out.get("images") if isinstance(node_out, dict) else None + if images: + img = images[0] + break + if img is None: + return {**out, "error": f"el workflow no produjo imagenes (outputs={list(outputs)})"} + + # 6. Descargar la imagen a disco. + fetched = comfyui_fetch_output_image( + img["filename"], subfolder=img.get("subfolder", ""), + type_=img.get("type", "output"), server=server, dest_dir=dest or ".", + ) + if not fetched.get("ok"): + return {**out, "error": f"fetch de imagen fallo: {fetched.get('error')}"} + out["image_path"] = fetched["path"] + out["ok"] = True + + if not judge: + return out + + # 7. Juzgar el resultado con el panel multi-juez. + verdict = comfyui_judge_image(out["image_path"], prompt_resolved, server=server) + if not verdict.get("ok"): + out["error"] = f"juez fallo (imagen generada igualmente): {verdict.get('error')}" + return out + out["judge"] = {"verdict": verdict["verdict"], "score": verdict["score"], + "votes": verdict["votes"]} + return out + + +# Alias con el nombre completo del ID para descubrimiento por convencion. +generate_mixed_oneshot = comfyui_generate_mixed_oneshot + + +if __name__ == "__main__": + import json + + res = comfyui_generate_mixed_oneshot( + "txt2img", + "a heroic knight in 3d render style, dramatic lighting", + checkpoint="dreamshaper_8.safetensors", + capabilities={ + "loras": [ + {"name": "3d_render_redmond_sd15.safetensors", "strength_model": 0.9}, + {"name": "detail_tweaker_sd15.safetensors", "strength_model": 0.5}, + ], + "facedetailer": {"denoise": 0.45}, + }, + dest="/tmp/comfy_mixed", seed=42, judge=True, + ) + print(json.dumps(res, indent=2, ensure_ascii=False))