feat(ml): núcleo subsistema comfyui-skill + ask_llm_vision

Grupo nuevo comfyui-skill: recetas versionadas de generación ComfyUI que
compilan a un workflow cambiando solo el subject.

- comfyui_build_skill_workflow (pura): receta -> workflow API format,
  despacha base (txt2img/flux/sdxl_refiner), sustituye {subject}+triggers,
  encadena loras e inject blocks (facedetailer, hires_fix). SkillWorkflowError tipada.
- comfyui_inject_hires_fix (pura): inyecta 2ª pasada UltimateSDUpscale sobre dict.
- comfyui_save/load/list_skill (impuras): CRUD de la librería en disco con
  versionado por snapshots, round-trip idéntico, filtro NSFW.
- ask_llm_vision (core, claude-direct): pregunta multimodal imagen+texto via
  API directa Anthropic, para puntuar generaciones.
- Página madre docs/capabilities/comfyui-skill.md con schema canónico de recipe.json.

Tests offline: 11 verdes (6 builder + 5 inject_hires_fix). Sin GPU.
This commit is contained in:
agent
2026-06-24 14:35:46 +02:00
parent e8a66f0dad
commit 70d541fca9
15 changed files with 1666 additions and 0 deletions
@@ -0,0 +1,102 @@
"""Tests de estructura para comfyui_build_skill_workflow (funcion pura).
Compila recetas de skill a workflows ComfyUI en API format y verifica que:
- el golden (txt2img + 1 LoRA + facedetailer) produce un dict bien formado con los
class_types esperados y el subject sustituido,
- el edge (sin loras ni blocks) produce el workflow base minimo,
- los params (seed/steps/cfg) se reflejan,
- los error paths lanzan SkillWorkflowError (base desconocido / base que pide imagen).
Offline: no toca GPU ni server.
"""
import os
import sys
sys.path.insert(0, os.path.dirname(__file__))
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
import pytest # noqa: E402
from ml.comfyui_build_skill_workflow import build_skill_workflow, SkillWorkflowError # noqa: E402
from _comfyui_wf_assert import assert_api_format, class_types, node_by_ct # noqa: E402
def _recipe_base(**over):
r = {
"schema_version": 1,
"slug": "portrait_cinematic_sdxl",
"version": "1.0.0",
"base_workflow": "txt2img",
"checkpoint": "juggernaut_xl_v11.safetensors",
"loras": [],
"params": {"steps": 30, "cfg": 5.5, "sampler_name": "dpmpp_2m",
"scheduler": "karras", "width": 832, "height": 1216},
"prompt_scaffold": {"positive": "cinematic portrait of {subject}, sharp focus",
"negative": "blurry, lowres", "trigger_words": []},
"blocks": [],
"provenance": {"source": "manual", "nsfw": False},
}
r.update(over)
return r
def test_golden_txt2img_lora_facedetailer():
recipe = _recipe_base(
loras=[{"name": "add_detail.safetensors", "strength_model": 0.6, "strength_clip": 0.6}],
blocks=[{"type": "facedetailer", "params": {"denoise": 0.45}}],
)
wf = build_skill_workflow(recipe, "a woman with red hair", seed=42)
assert_api_format(wf)
cts = class_types(wf)
assert "CheckpointLoaderSimple" in cts
assert "KSampler" in cts
assert "LoraLoader" in cts # el LoRA se inyecto
assert "FaceDetailer" in cts # el bloque facedetailer se aplico
assert "SaveImage" in cts
# seed propagada al KSampler base.
assert node_by_ct(wf, "KSampler")["inputs"]["seed"] == 42
# subject sustituido en algun CLIPTextEncode positivo.
textos = [n["inputs"]["text"] for n in wf.values() if n["class_type"] == "CLIPTextEncode"]
assert any("a woman with red hair" in t for t in textos)
# el LoRA respeta la fuerza de la receta.
lora = node_by_ct(wf, "LoraLoader")["inputs"]
assert lora["lora_name"] == "add_detail.safetensors"
assert lora["strength_model"] == 0.6
def test_edge_sin_loras_ni_blocks_da_workflow_base_minimo():
wf = build_skill_workflow(_recipe_base(), "a red apple", seed=7)
assert_api_format(wf)
assert class_types(wf) == {
"CheckpointLoaderSimple", "CLIPTextEncode", "EmptyLatentImage",
"KSampler", "VAEDecode", "SaveImage",
}
assert "LoraLoader" not in class_types(wf)
assert "FaceDetailer" not in class_types(wf)
def test_params_y_trigger_words_se_reflejan():
recipe = _recipe_base()
recipe["prompt_scaffold"]["trigger_words"] = ["masterpiece", "8k"]
wf = build_skill_workflow(recipe, "a cat", seed=3)
ks = node_by_ct(wf, "KSampler")["inputs"]
assert ks["steps"] == 30 and ks["cfg"] == 5.5 and ks["seed"] == 3
lat = node_by_ct(wf, "EmptyLatentImage")["inputs"]
assert lat["width"] == 832 and lat["height"] == 1216
pos = [n["inputs"]["text"] for n in wf.values() if n["class_type"] == "CLIPTextEncode"]
assert any(t.startswith("masterpiece, 8k") for t in pos)
def test_error_base_workflow_desconocido():
with pytest.raises(SkillWorkflowError):
build_skill_workflow(_recipe_base(base_workflow="diffusion_xyz"), "x")
def test_error_base_que_requiere_imagen():
with pytest.raises(SkillWorkflowError):
build_skill_workflow(_recipe_base(base_workflow="img2img"), "x")
def test_error_recipe_no_dict():
with pytest.raises(SkillWorkflowError):
build_skill_workflow(["no", "dict"], "x")
@@ -0,0 +1,71 @@
"""Tests de estructura y pureza para comfyui_inject_hires_fix (funcion pura)."""
import os
import sys
import pytest
sys.path.insert(0, os.path.dirname(__file__))
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
from ml.comfyui_build_txt2img_workflow import comfyui_build_txt2img_workflow
from ml.comfyui_inject_hires_fix import comfyui_inject_hires_fix
from _comfyui_wf_assert import assert_api_format, class_types
def _node_id(wf, ct):
return next(nid for nid, n in wf.items() if n["class_type"] == ct)
def test_no_muta_la_entrada():
base = comfyui_build_txt2img_workflow("ck.safetensors", "POS", "NEG")
base_antes = {k: dict(v) for k, v in base.items()}
_ = comfyui_inject_hires_fix(base)
# La copia profunda garantiza que el dict original queda intacto (pureza).
assert "UltimateSDUpscale" not in class_types(base)
assert "UpscaleModelLoader" not in class_types(base)
assert set(base) == set(base_antes)
def test_inserta_ultimatesdupscale_y_loader():
base = comfyui_build_txt2img_workflow("ck.safetensors", "POS", "NEG")
inj = comfyui_inject_hires_fix(base)
assert_api_format(inj)
assert "UltimateSDUpscale" in class_types(inj)
assert "UpscaleModelLoader" in class_types(inj)
def test_repunta_el_saveimage_al_upscale():
base = comfyui_build_txt2img_workflow("ck.safetensors", "POS", "NEG")
inj = comfyui_inject_hires_fix(base)
upscale_id = _node_id(inj, "UltimateSDUpscale")
save = next(n for n in inj.values() if n["class_type"] == "SaveImage")
# El SaveImage debe tomar la imagen del UltimateSDUpscale, no ya del VAEDecode.
assert save["inputs"]["images"][0] == upscale_id
def test_params_reflejados():
base = comfyui_build_txt2img_workflow("ck.safetensors", "POS", "NEG")
inj = comfyui_inject_hires_fix(
base, upscale_by=2.5, denoise=0.33, seed=123, upscale_model="x4.pth"
)
upscale_id = _node_id(inj, "UltimateSDUpscale")
loader_id = _node_id(inj, "UpscaleModelLoader")
up_in = inj[upscale_id]["inputs"]
assert up_in["upscale_by"] == 2.5
assert up_in["denoise"] == 0.33
assert up_in["seed"] == 123
assert inj[loader_id]["inputs"]["model_name"] == "x4.pth"
# Defaults fijos copiados del builder hermano.
assert up_in["mode_type"] == "Linear"
assert up_in["force_uniform_tiles"] is True
assert up_in["tiled_decode"] is False
def test_lanza_valueerror_sin_vaedecode():
# Workflow sin VAEDecode: la funcion no puede localizar la fuente de imagen.
bad = {
"4": {"class_type": "CheckpointLoaderSimple", "inputs": {"ckpt_name": "x"}},
}
with pytest.raises(ValueError):
comfyui_inject_hires_fix(bad)