feat(ml): mixer de capacidades comfyui (compose + generate_mixed_oneshot + inject controlnet/ipadapter)
Mezclador del grupo comfyui-skill que promueve a una sola llamada la secuencia base -> compose -> submit -> wait -> fetch -> judge (issue 0087): - comfyui_compose_capabilities_py_ml (PURA): aplica en orden las capacidades activadas (loras, controlnet, ipadapter, facedetailer, hires) sobre un workflow base, sin mutar la entrada. - comfyui_generate_mixed_oneshot_py_pipelines: one-shot que resuelve el base (skill/txt2img/dict), compone, encola, espera, descarga el PNG y lo puntua con el panel comfyui-judge. - comfyui_inject_controlnet_py_ml, comfyui_inject_ipadapter_py_ml: inyectores encadenables que consume el compose. - Tests (24 passed) + pagina madre docs/capabilities/comfyui-skill.md. Prueba real en GPU: txt2img dreamshaper_8 + 2 LoRAs (3d_render_redmond + detail_tweaker) + FaceDetailer -> imagen 512x512 en ~24s, juez verdict 'good' (score 4.69, votos aesthetic+clip good; voto llm degradado por rate-limit 429). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,220 @@
|
||||
"""comfyui_generate_mixed_oneshot — mezcla de capacidades + subject -> PNG juzgado.
|
||||
|
||||
One-shot del "mixer" del grupo `comfyui-skill`: parte de un workflow base (una
|
||||
skill guardada, el builder `txt2img`, o un dict ya construido), le aplica el
|
||||
conjunto de capacidades elegido con `comfyui_compose_capabilities` (LoRAs +
|
||||
ControlNet + IPAdapter + hires + FaceDetailer, cada una activable), encola,
|
||||
espera, descarga el PNG y (si `judge=True`) lo puntua con el panel
|
||||
`comfyui-judge`. Promueve a una sola llamada la secuencia repetida
|
||||
base -> compose -> submit -> wait -> fetch -> judge (issue 0087).
|
||||
|
||||
Compone funciones del registry:
|
||||
|
||||
comfyui_build_txt2img_workflow_py_ml (base 'txt2img')
|
||||
comfyui_load_skill_py_ml (base = slug de skill)
|
||||
comfyui_build_skill_workflow_py_ml (receta + subject -> workflow, base = skill)
|
||||
comfyui_compose_capabilities_py_ml (mezcla de capacidades, PURA)
|
||||
comfyui_submit_workflow_py_ml (POST /prompt)
|
||||
comfyui_wait_result_py_ml (poll /history)
|
||||
comfyui_fetch_output_image_py_ml (GET /view -> disco)
|
||||
comfyui_judge_image_py_ml (panel multi-juez)
|
||||
|
||||
Pipeline impuro: red (HTTP) + escritura en disco + (si juzga) API Anthropic.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
_FUNCTIONS_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
if _FUNCTIONS_ROOT not in sys.path:
|
||||
sys.path.insert(0, _FUNCTIONS_ROOT)
|
||||
|
||||
from ml.comfyui_build_txt2img_workflow import comfyui_build_txt2img_workflow
|
||||
from ml.comfyui_compose_capabilities import comfyui_compose_capabilities
|
||||
from ml.comfyui_fetch_output_image import comfyui_fetch_output_image
|
||||
from ml.comfyui_judge_image import comfyui_judge_image
|
||||
from ml.comfyui_submit_workflow import comfyui_submit_workflow
|
||||
from ml.comfyui_wait_result import comfyui_wait_result
|
||||
|
||||
|
||||
def _resolve_skill_prompt(recipe: dict, subject: str) -> str:
|
||||
"""Prompt positivo resuelto de una receta de skill (para el juez de fidelidad)."""
|
||||
scaffold = recipe.get("prompt_scaffold") or {}
|
||||
positive = str(scaffold.get("positive", "") or "")
|
||||
if "{subject}" in positive:
|
||||
positive = positive.replace("{subject}", subject)
|
||||
elif not positive:
|
||||
positive = subject
|
||||
else:
|
||||
positive = f"{subject}, {positive}"
|
||||
triggers = scaffold.get("trigger_words") or []
|
||||
if triggers:
|
||||
positive = ", ".join(list(triggers) + [positive]) if positive else ", ".join(triggers)
|
||||
return positive
|
||||
|
||||
|
||||
def _resolve_base(base, subject, *, checkpoint, negative, seed, library_dir):
|
||||
"""Devuelve (workflow_base, prompt_resolved). Despacha por tipo de `base`.
|
||||
|
||||
- dict -> se usa tal cual; prompt_resolved = subject.
|
||||
- 'txt2img' -> comfyui_build_txt2img_workflow(checkpoint, subject, negative).
|
||||
- otro str -> slug de skill: load_skill + build_skill_workflow.
|
||||
"""
|
||||
if isinstance(base, dict):
|
||||
return base, subject
|
||||
|
||||
if base == "txt2img":
|
||||
if not checkpoint:
|
||||
raise ValueError(
|
||||
"comfyui_generate_mixed_oneshot: base='txt2img' requiere checkpoint."
|
||||
)
|
||||
wf = comfyui_build_txt2img_workflow(checkpoint, subject, negative, seed=seed)
|
||||
return wf, subject
|
||||
|
||||
# Cualquier otro str se trata como slug de skill.
|
||||
from ml.comfyui_build_skill_workflow import build_skill_workflow
|
||||
from ml.comfyui_load_skill import comfyui_load_skill
|
||||
|
||||
loaded = comfyui_load_skill(base, library_dir=library_dir)
|
||||
if not loaded.get("ok"):
|
||||
raise ValueError(f"load_skill('{base}') fallo: {loaded.get('error')}")
|
||||
recipe = loaded["recipe"]
|
||||
wf = build_skill_workflow(recipe, subject, seed=seed)
|
||||
return wf, _resolve_skill_prompt(recipe, subject)
|
||||
|
||||
|
||||
def comfyui_generate_mixed_oneshot(
|
||||
base,
|
||||
subject: str,
|
||||
*,
|
||||
capabilities: dict | None = None,
|
||||
server: str = "127.0.0.1:8188",
|
||||
dest: str | None = None,
|
||||
seed: int = 0,
|
||||
judge: bool = True,
|
||||
checkpoint: str | None = None,
|
||||
negative: str = "",
|
||||
library_dir: str | None = None,
|
||||
wait_timeout: float = 600.0,
|
||||
) -> dict:
|
||||
"""Genera (y opcionalmente juzga) una imagen mezclando capacidades, end-to-end.
|
||||
|
||||
Args:
|
||||
base: workflow base. dict (workflow API format ya construido), la cadena
|
||||
'txt2img' (construye uno con `checkpoint`+`subject`), o un slug de
|
||||
skill guardada (carga su receta y la compila con `subject`).
|
||||
subject: sujeto/prompt principal. En 'txt2img' es el prompt positivo; en
|
||||
una skill sustituye `{subject}` en el scaffold.
|
||||
capabilities: dict con las capacidades a mezclar, tal cual las acepta
|
||||
comfyui_compose_capabilities: {loras, controlnet, ipadapter, hires,
|
||||
facedetailer}. Las ausentes/None quedan desactivadas. None = sin
|
||||
mezcla (solo el base). keyword-only.
|
||||
server: host:port del servidor ComfyUI (sin esquema). keyword-only.
|
||||
dest: directorio local donde guardar el PNG (None = cwd). keyword-only.
|
||||
seed: semilla de generacion. keyword-only.
|
||||
judge: si True, puntua el PNG con el panel comfyui-judge. keyword-only.
|
||||
checkpoint: checkpoint para base='txt2img' (obligatorio en ese caso).
|
||||
keyword-only.
|
||||
negative: prompt negativo para base='txt2img'. keyword-only.
|
||||
library_dir: raiz de la libreria de skills (base = slug). keyword-only.
|
||||
wait_timeout: segundos maximos esperando al servidor. keyword-only.
|
||||
|
||||
Returns:
|
||||
dict {ok, base, prompt_id, image_path, prompt_resolved, capabilities_active,
|
||||
judge, error}. capabilities_active = lista de las capacidades activadas
|
||||
(evidencia de la mezcla). judge = {verdict, score, votes} o None (si
|
||||
judge=False o el panel falla). Si falla un paso, ok=False y error explica
|
||||
cual.
|
||||
"""
|
||||
base_label = base if isinstance(base, str) else "dict"
|
||||
caps = capabilities or {}
|
||||
caps_active = [k for k, v in caps.items() if v is not None]
|
||||
out = {"ok": False, "base": base_label, "prompt_id": "", "image_path": "",
|
||||
"prompt_resolved": "", "capabilities_active": caps_active,
|
||||
"judge": None, "error": ""}
|
||||
|
||||
# 1. Resolver el workflow base (skill / txt2img / dict).
|
||||
try:
|
||||
base_wf, prompt_resolved = _resolve_base(
|
||||
base, subject, checkpoint=checkpoint, negative=negative,
|
||||
seed=seed, library_dir=library_dir,
|
||||
)
|
||||
except (ValueError, KeyError) as exc:
|
||||
return {**out, "error": f"resolver base fallo: {exc}"}
|
||||
out["prompt_resolved"] = prompt_resolved
|
||||
|
||||
# 2. Mezclar las capacidades (funcion pura del registry).
|
||||
try:
|
||||
workflow = comfyui_compose_capabilities(base_wf, **caps)
|
||||
except (ValueError, TypeError) as exc:
|
||||
return {**out, "error": f"compose_capabilities fallo: {exc}"}
|
||||
|
||||
# 3. Encolar.
|
||||
try:
|
||||
sub = comfyui_submit_workflow(workflow, server=server)
|
||||
prompt_id = sub["prompt_id"]
|
||||
except (RuntimeError, KeyError) as exc:
|
||||
return {**out, "error": f"submit fallo: {exc}"}
|
||||
out["prompt_id"] = prompt_id
|
||||
|
||||
# 4. Esperar a que termine.
|
||||
try:
|
||||
outputs = comfyui_wait_result(prompt_id, server=server, timeout=wait_timeout)
|
||||
except (TimeoutError, RuntimeError) as exc:
|
||||
return {**out, "error": f"wait fallo: {exc}"}
|
||||
|
||||
# 5. Localizar el primer PNG en los outputs.
|
||||
img = None
|
||||
for node_out in outputs.values():
|
||||
images = node_out.get("images") if isinstance(node_out, dict) else None
|
||||
if images:
|
||||
img = images[0]
|
||||
break
|
||||
if img is None:
|
||||
return {**out, "error": f"el workflow no produjo imagenes (outputs={list(outputs)})"}
|
||||
|
||||
# 6. Descargar la imagen a disco.
|
||||
fetched = comfyui_fetch_output_image(
|
||||
img["filename"], subfolder=img.get("subfolder", ""),
|
||||
type_=img.get("type", "output"), server=server, dest_dir=dest or ".",
|
||||
)
|
||||
if not fetched.get("ok"):
|
||||
return {**out, "error": f"fetch de imagen fallo: {fetched.get('error')}"}
|
||||
out["image_path"] = fetched["path"]
|
||||
out["ok"] = True
|
||||
|
||||
if not judge:
|
||||
return out
|
||||
|
||||
# 7. Juzgar el resultado con el panel multi-juez.
|
||||
verdict = comfyui_judge_image(out["image_path"], prompt_resolved, server=server)
|
||||
if not verdict.get("ok"):
|
||||
out["error"] = f"juez fallo (imagen generada igualmente): {verdict.get('error')}"
|
||||
return out
|
||||
out["judge"] = {"verdict": verdict["verdict"], "score": verdict["score"],
|
||||
"votes": verdict["votes"]}
|
||||
return out
|
||||
|
||||
|
||||
# Alias con el nombre completo del ID para descubrimiento por convencion.
|
||||
generate_mixed_oneshot = comfyui_generate_mixed_oneshot
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import json
|
||||
|
||||
res = comfyui_generate_mixed_oneshot(
|
||||
"txt2img",
|
||||
"a heroic knight in 3d render style, dramatic lighting",
|
||||
checkpoint="dreamshaper_8.safetensors",
|
||||
capabilities={
|
||||
"loras": [
|
||||
{"name": "3d_render_redmond_sd15.safetensors", "strength_model": 0.9},
|
||||
{"name": "detail_tweaker_sd15.safetensors", "strength_model": 0.5},
|
||||
],
|
||||
"facedetailer": {"denoise": 0.45},
|
||||
},
|
||||
dest="/tmp/comfy_mixed", seed=42, judge=True,
|
||||
)
|
||||
print(json.dumps(res, indent=2, ensure_ascii=False))
|
||||
Reference in New Issue
Block a user