feat(ml): cierre del bucle de mejora comfyui-skill (genera→juzga→bump)

Tres funciones nuevas que cierran el lazo skill→generación→juicio→promoción
del grupo comfyui-skill (issue 0087):

- comfyui_bump_skill_version (impura): promueve una versión nueva SOLO si el
  score del panel-juez sube (gate objetivo). Snapshot versions/vN.json
  pre-mutación, deep-merge de recipe_patch, semver↑, línea en growth_log.jsonl.
  force=True salta el gate. No usa datetime.now().
- comfyui_update_skill_score (impura): media incremental de score_mean/score_n
  reescribiendo recipe.json in-place (sin snapshot ni growth_log).
- comfyui_generate_with_skill_oneshot (pipeline): one-shot load→build→submit→
  wait→fetch→judge→score_mean. recipe_patch prueba variantes sin guardar score.
  Compone 7 funciones del registry.

Tests offline: 11 passed (gate, semver, deep-merge, media incremental, errores).
Página madre docs/capabilities/comfyui-skill.md: +3 funciones, sección "Bucle de
mejora" con diagrama, fronteras de scoring actualizadas.

Demo real verificada: skill seed portrait_cinematic_sd15 (SD1.5) generó imagen
SFW real, el panel la juzgó, una variante puntuó más alto (4.787 > 4.7276) y el
gate promovió v1.0.0→v1.1.0 con el judge_run_id como evidencia.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-24 15:09:33 +02:00
parent 974cc06bc7
commit bcf731275e
9 changed files with 1046 additions and 3 deletions
@@ -0,0 +1,127 @@
"""Tests offline para comfyui_bump_skill_version (impura, sin red/GPU).
Verifican el contrato del bucle de mejora contra una librería temporal:
- golden: score sube → promueve, snapshot pre-mutación, semver subido, recipe_patch aplicado,
growth_log con una línea bien formada,
- edge: bump major/patch + deep-merge anidado sin pisar otras claves,
- error: gate (score no mejora sin force) → ok=False; force salta el gate; skill inexistente.
"""
import json
import os
import sys
sys.path.insert(0, os.path.dirname(__file__))
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
import pytest # noqa: E402
from ml.comfyui_bump_skill_version import comfyui_bump_skill_version, _bump_semver, _deep_merge # noqa: E402
def _seed_skill(lib, slug="demo_skill", version="1.0.0", **extra):
sdir = os.path.join(lib, slug)
os.makedirs(os.path.join(sdir, "versions"), exist_ok=True)
recipe = {"schema_version": 1, "slug": slug, "version": version,
"base_workflow": "txt2img", "checkpoint": "dreamshaper_8.safetensors",
"params": {"steps": 28, "cfg": 6.0}, **extra}
with open(os.path.join(sdir, "recipe.json"), "w", encoding="utf-8") as f:
json.dump(recipe, f)
return sdir
def test_semver_helper():
assert _bump_semver("1.0.0", "minor") == "1.1.0"
assert _bump_semver("1.2.3", "major") == "2.0.0"
assert _bump_semver("1.2.3", "patch") == "1.2.4"
assert _bump_semver("nan", "minor") == "0.1.0" # versión mal formada → 0.0.0 base
def test_deep_merge_no_pisa_otras_claves():
base = {"params": {"steps": 28, "cfg": 6.0}, "checkpoint": "a"}
out = _deep_merge(base, {"params": {"steps": 32}})
assert out == {"params": {"steps": 32, "cfg": 6.0}, "checkpoint": "a"}
assert base["params"]["steps"] == 28 # no muta el original
def test_golden_promueve_cuando_score_sube(tmp_path):
lib = str(tmp_path)
_seed_skill(lib)
res = comfyui_bump_skill_version(
"demo_skill", "subir steps a 32", score_before=6.5, score_after=7.4,
judge_run_id="judge_xyz", recipe_patch={"params": {"steps": 32}}, library_dir=lib)
assert res["ok"] is True
assert res["old_version"] == "1.0.0"
assert res["new_version"] == "1.1.0"
# snapshot pre-mutación conserva la receta vieja (steps 28).
with open(res["snapshot_file"], encoding="utf-8") as f:
snap = json.load(f)
assert snap["params"]["steps"] == 28
assert snap["version"] == "1.0.0"
# recipe.json mutado: patch aplicado + semver subido, cfg intacto.
with open(os.path.join(lib, "demo_skill", "recipe.json"), encoding="utf-8") as f:
cur = json.load(f)
assert cur["params"]["steps"] == 32
assert cur["params"]["cfg"] == 6.0
assert cur["version"] == "1.1.0"
# growth_log: una línea con la evidencia.
with open(os.path.join(lib, "demo_skill", "growth_log.jsonl"), encoding="utf-8") as f:
lines = [json.loads(x) for x in f if x.strip()]
assert len(lines) == 1
entry = lines[0]
assert entry["version"] == "1.1.0"
assert entry["score_before"] == 6.5
assert entry["score_after"] == 7.4
assert entry["judge_run_id"] == "judge_xyz"
assert entry["diff"] == {"params": {"steps": 32}}
def test_edge_major_y_patch(tmp_path):
lib = str(tmp_path)
_seed_skill(lib, slug="s_major", version="1.4.2")
r1 = comfyui_bump_skill_version("s_major", "rework", score_before=5.0, score_after=8.0,
bump="major", library_dir=lib)
assert r1["new_version"] == "2.0.0"
_seed_skill(lib, slug="s_patch", version="1.4.2")
r2 = comfyui_bump_skill_version("s_patch", "tweak", score_before=5.0, score_after=5.1,
bump="patch", library_dir=lib)
assert r2["new_version"] == "1.4.3"
def test_error_gate_bloquea_si_no_mejora(tmp_path):
lib = str(tmp_path)
sdir = _seed_skill(lib)
res = comfyui_bump_skill_version("demo_skill", "no mejora", score_before=7.0,
score_after=6.5, library_dir=lib)
assert res["ok"] is False
assert "gate" in res["error"]
# No se tocó nada: ni snapshot ni growth_log ni cambio de versión.
assert not os.path.exists(os.path.join(sdir, "growth_log.jsonl"))
with open(os.path.join(sdir, "recipe.json"), encoding="utf-8") as f:
assert json.load(f)["version"] == "1.0.0"
def test_error_force_salta_gate(tmp_path):
lib = str(tmp_path)
_seed_skill(lib)
res = comfyui_bump_skill_version("demo_skill", "forzado", score_before=7.0,
score_after=6.5, force=True, library_dir=lib)
assert res["ok"] is True
assert res["new_version"] == "1.1.0"
assert res["growth_entry"]["forced"] is True
def test_error_skill_inexistente(tmp_path):
res = comfyui_bump_skill_version("no_existe", "x", score_before=1.0, score_after=2.0,
library_dir=str(tmp_path))
assert res["ok"] is False
assert "no encontrada" in res["error"]
if __name__ == "__main__":
sys.exit(pytest.main([__file__, "-q"]))
@@ -0,0 +1,75 @@
"""Tests offline para comfyui_update_skill_score (impura, sin red).
- golden: media incremental correcta tras varios juicios; reescribe in-place sin crear
snapshots ni growth_log,
- edge: arranca desde score_mean/score_n ausentes (skill recién creada),
- error: skill inexistente / score no numérico → ok=False.
"""
import json
import os
import sys
sys.path.insert(0, os.path.dirname(__file__))
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
import pytest # noqa: E402
from ml.comfyui_update_skill_score import comfyui_update_skill_score # noqa: E402
def _seed(lib, slug="demo", **extra):
sdir = os.path.join(lib, slug)
os.makedirs(sdir, exist_ok=True)
recipe = {"schema_version": 1, "slug": slug, "version": "1.0.0",
"base_workflow": "txt2img", **extra}
with open(os.path.join(sdir, "recipe.json"), "w", encoding="utf-8") as f:
json.dump(recipe, f)
return sdir
def test_golden_media_incremental(tmp_path):
lib = str(tmp_path)
sdir = _seed(lib, score_mean=0.0, score_n=0)
r1 = comfyui_update_skill_score("demo", 7.0, library_dir=lib)
assert r1["ok"] and r1["score_n"] == 1 and r1["score_mean"] == 7.0
r2 = comfyui_update_skill_score("demo", 8.0, library_dir=lib)
assert r2["score_n"] == 2 and r2["score_mean"] == 7.5
r3 = comfyui_update_skill_score("demo", 6.0, library_dir=lib)
assert r3["score_n"] == 3
assert abs(r3["score_mean"] - 7.0) < 1e-9 # (7+8+6)/3
# in-place: ni versions/ ni growth_log creados por este helper.
assert not os.path.exists(os.path.join(sdir, "versions"))
assert not os.path.exists(os.path.join(sdir, "growth_log.jsonl"))
with open(os.path.join(sdir, "recipe.json"), encoding="utf-8") as f:
cur = json.load(f)
assert cur["score_n"] == 3
def test_edge_sin_campos_previos(tmp_path):
lib = str(tmp_path)
_seed(lib, slug="fresh") # sin score_mean/score_n
res = comfyui_update_skill_score("fresh", 9.0, library_dir=lib)
assert res["ok"] and res["score_mean"] == 9.0 and res["score_n"] == 1
assert res["prev_score_n"] == 0
def test_error_skill_inexistente(tmp_path):
res = comfyui_update_skill_score("nope", 5.0, library_dir=str(tmp_path))
assert res["ok"] is False and "no encontrada" in res["error"]
def test_error_score_no_numerico(tmp_path):
lib = str(tmp_path)
_seed(lib, slug="x", score_mean=0.0, score_n=0)
res = comfyui_update_skill_score("x", "siete", library_dir=lib)
assert res["ok"] is False and "numérico" in res["error"]
if __name__ == "__main__":
sys.exit(pytest.main([__file__, "-q"]))