feat(ml): generación de audio en ComfyUI (ACE-Step) — comfyui_build_audio_workflow + comfyui_fetch_output_audio

This commit is contained in:
2026-06-27 20:50:34 +02:00
parent 5494507c39
commit fd16453691
6 changed files with 612 additions and 0 deletions
@@ -0,0 +1,90 @@
"""Tests de estructura para comfyui_build_audio_workflow (funcion pura, ACE-Step)."""
import os
import sys
sys.path.insert(0, os.path.dirname(__file__))
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
from ml.comfyui_build_audio_workflow import comfyui_build_audio_workflow
from _comfyui_wf_assert import assert_api_format, class_types, node_by_ct
def test_estructura_y_nodos_acestep():
wf = comfyui_build_audio_workflow(
"AUDIO_ace_step_v1_3.5b.safetensors", "retro coin sfx"
)
assert_api_format(wf)
cts = class_types(wf)
for ct in (
"CheckpointLoaderSimple",
"TextEncodeAceStepAudio",
"ConditioningZeroOut",
"EmptyAceStepLatentAudio",
"ModelSamplingSD3",
"KSampler",
"VAEDecodeAudio",
"SaveAudio",
):
assert ct in cts, f"falta nodo {ct}"
assert len(wf) == 8
def test_ckpt_y_prompt_reflejados():
wf = comfyui_build_audio_workflow("AUDIO_x.safetensors", "magic spell whoosh")
assert node_by_ct(wf, "CheckpointLoaderSimple")["inputs"]["ckpt_name"] == "AUDIO_x.safetensors"
enc = node_by_ct(wf, "TextEncodeAceStepAudio")
assert enc["inputs"]["tags"] == "magic spell whoosh"
assert enc["inputs"]["lyrics"] == ""
def test_cableado_ksampler():
wf = comfyui_build_audio_workflow("AUDIO_x.safetensors", "p")
ks = node_by_ct(wf, "KSampler")["inputs"]
# model viene de ModelSamplingSD3 ("11"), no del checkpoint directo
assert ks["model"] == ["11", 0]
assert ks["positive"] == ["6", 0]
# negative pasa por ConditioningZeroOut ("10")
assert ks["negative"] == ["10", 0]
assert ks["latent_image"] == ["5", 0]
assert ks["denoise"] == 1.0
# ModelSamplingSD3 toma el MODEL del checkpoint
assert node_by_ct(wf, "ModelSamplingSD3")["inputs"]["model"] == ["4", 0]
# VAEDecodeAudio usa el VAE del checkpoint
assert node_by_ct(wf, "VAEDecodeAudio")["inputs"]["vae"] == ["4", 2]
# ConditioningZeroOut deriva del positive
assert node_by_ct(wf, "ConditioningZeroOut")["inputs"]["conditioning"] == ["6", 0]
def test_edge_seconds_y_seed_variables():
wf_a = comfyui_build_audio_workflow("c", "p", seconds=4.0, seed=42)
wf_b = comfyui_build_audio_workflow("c", "p", seconds=8.0, seed=99)
assert node_by_ct(wf_a, "EmptyAceStepLatentAudio")["inputs"]["seconds"] == 4.0
assert node_by_ct(wf_b, "EmptyAceStepLatentAudio")["inputs"]["seconds"] == 8.0
assert node_by_ct(wf_a, "KSampler")["inputs"]["seed"] == 42
assert node_by_ct(wf_b, "KSampler")["inputs"]["seed"] == 99
def test_params_reflejados():
wf = comfyui_build_audio_workflow(
"c", "p",
lyrics="la la la", steps=30, cfg=4.0, sampler_name="dpmpp_2m",
scheduler="karras", shift=3.5, lyrics_strength=0.7,
filename_prefix="audio/mio",
)
enc = node_by_ct(wf, "TextEncodeAceStepAudio")["inputs"]
assert enc["lyrics"] == "la la la"
assert enc["lyrics_strength"] == 0.7
ks = node_by_ct(wf, "KSampler")["inputs"]
assert ks["steps"] == 30
assert ks["cfg"] == 4.0
assert ks["sampler_name"] == "dpmpp_2m"
assert ks["scheduler"] == "karras"
assert node_by_ct(wf, "ModelSamplingSD3")["inputs"]["shift"] == 3.5
assert node_by_ct(wf, "SaveAudio")["inputs"]["filename_prefix"] == "audio/mio"
def test_determinismo():
a = comfyui_build_audio_workflow("c", "p", seconds=5.0, seed=7)
b = comfyui_build_audio_workflow("c", "p", seconds=5.0, seed=7)
assert a == b
@@ -0,0 +1,50 @@
"""Tests de localizacion de output para comfyui_fetch_output_audio.
Solo cubren la logica pura de busqueda (_is_audio_item / _find_audio_output): no
tocan red ni disco. La descarga real via HTTP se prueba en el flujo e2e con el
servidor ComfyUI vivo.
"""
import os
import sys
sys.path.insert(0, os.path.dirname(__file__))
from comfyui_fetch_output_audio import _find_audio_output, _is_audio_item
def test_is_audio_item_por_extension():
assert _is_audio_item({"filename": "comfy_audio_00001_.flac"})
assert _is_audio_item({"filename": "x.mp3"})
assert _is_audio_item({"filename": "x.WAV"})
assert not _is_audio_item({"filename": "x.png"})
assert not _is_audio_item({"filename": ""})
def test_find_saveaudio_flac_bajo_audio():
outputs = {
"9": {"audio": [{"filename": "comfy_audio_00001_.flac",
"subfolder": "audio", "type": "output"}]}
}
got = _find_audio_output(outputs)
assert got == {"filename": "comfy_audio_00001_.flac",
"subfolder": "audio", "type": "output"}
def test_find_saveaudiomp3_bajo_audio():
outputs = {"12": {"audio": [{"filename": "track.mp3", "subfolder": "", "type": "output"}]}}
assert _find_audio_output(outputs)["filename"] == "track.mp3"
def test_find_prioriza_clave_audio():
# Un nodo deja un png bajo "images" y otro un flac bajo "audio": gana el audio.
outputs = {
"9": {"images": [{"filename": "preview.png", "subfolder": "", "type": "output"}]},
"10": {"audio": [{"filename": "out.flac", "subfolder": "", "type": "output"}]},
}
assert _find_audio_output(outputs)["filename"] == "out.flac"
def test_find_sin_audio_devuelve_none():
outputs = {"9": {"images": [{"filename": "preview.png", "subfolder": "", "type": "output"}]}}
assert _find_audio_output(outputs) is None
assert _find_audio_output({}) is None