feat(ml): generación de audio en ComfyUI (ACE-Step) — comfyui_build_audio_workflow + comfyui_fetch_output_audio
This commit is contained in:
@@ -0,0 +1,90 @@
|
||||
"""Tests de estructura para comfyui_build_audio_workflow (funcion pura, ACE-Step)."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
|
||||
|
||||
from ml.comfyui_build_audio_workflow import comfyui_build_audio_workflow
|
||||
from _comfyui_wf_assert import assert_api_format, class_types, node_by_ct
|
||||
|
||||
|
||||
def test_estructura_y_nodos_acestep():
|
||||
wf = comfyui_build_audio_workflow(
|
||||
"AUDIO_ace_step_v1_3.5b.safetensors", "retro coin sfx"
|
||||
)
|
||||
assert_api_format(wf)
|
||||
cts = class_types(wf)
|
||||
for ct in (
|
||||
"CheckpointLoaderSimple",
|
||||
"TextEncodeAceStepAudio",
|
||||
"ConditioningZeroOut",
|
||||
"EmptyAceStepLatentAudio",
|
||||
"ModelSamplingSD3",
|
||||
"KSampler",
|
||||
"VAEDecodeAudio",
|
||||
"SaveAudio",
|
||||
):
|
||||
assert ct in cts, f"falta nodo {ct}"
|
||||
assert len(wf) == 8
|
||||
|
||||
|
||||
def test_ckpt_y_prompt_reflejados():
|
||||
wf = comfyui_build_audio_workflow("AUDIO_x.safetensors", "magic spell whoosh")
|
||||
assert node_by_ct(wf, "CheckpointLoaderSimple")["inputs"]["ckpt_name"] == "AUDIO_x.safetensors"
|
||||
enc = node_by_ct(wf, "TextEncodeAceStepAudio")
|
||||
assert enc["inputs"]["tags"] == "magic spell whoosh"
|
||||
assert enc["inputs"]["lyrics"] == ""
|
||||
|
||||
|
||||
def test_cableado_ksampler():
|
||||
wf = comfyui_build_audio_workflow("AUDIO_x.safetensors", "p")
|
||||
ks = node_by_ct(wf, "KSampler")["inputs"]
|
||||
# model viene de ModelSamplingSD3 ("11"), no del checkpoint directo
|
||||
assert ks["model"] == ["11", 0]
|
||||
assert ks["positive"] == ["6", 0]
|
||||
# negative pasa por ConditioningZeroOut ("10")
|
||||
assert ks["negative"] == ["10", 0]
|
||||
assert ks["latent_image"] == ["5", 0]
|
||||
assert ks["denoise"] == 1.0
|
||||
# ModelSamplingSD3 toma el MODEL del checkpoint
|
||||
assert node_by_ct(wf, "ModelSamplingSD3")["inputs"]["model"] == ["4", 0]
|
||||
# VAEDecodeAudio usa el VAE del checkpoint
|
||||
assert node_by_ct(wf, "VAEDecodeAudio")["inputs"]["vae"] == ["4", 2]
|
||||
# ConditioningZeroOut deriva del positive
|
||||
assert node_by_ct(wf, "ConditioningZeroOut")["inputs"]["conditioning"] == ["6", 0]
|
||||
|
||||
|
||||
def test_edge_seconds_y_seed_variables():
|
||||
wf_a = comfyui_build_audio_workflow("c", "p", seconds=4.0, seed=42)
|
||||
wf_b = comfyui_build_audio_workflow("c", "p", seconds=8.0, seed=99)
|
||||
assert node_by_ct(wf_a, "EmptyAceStepLatentAudio")["inputs"]["seconds"] == 4.0
|
||||
assert node_by_ct(wf_b, "EmptyAceStepLatentAudio")["inputs"]["seconds"] == 8.0
|
||||
assert node_by_ct(wf_a, "KSampler")["inputs"]["seed"] == 42
|
||||
assert node_by_ct(wf_b, "KSampler")["inputs"]["seed"] == 99
|
||||
|
||||
|
||||
def test_params_reflejados():
|
||||
wf = comfyui_build_audio_workflow(
|
||||
"c", "p",
|
||||
lyrics="la la la", steps=30, cfg=4.0, sampler_name="dpmpp_2m",
|
||||
scheduler="karras", shift=3.5, lyrics_strength=0.7,
|
||||
filename_prefix="audio/mio",
|
||||
)
|
||||
enc = node_by_ct(wf, "TextEncodeAceStepAudio")["inputs"]
|
||||
assert enc["lyrics"] == "la la la"
|
||||
assert enc["lyrics_strength"] == 0.7
|
||||
ks = node_by_ct(wf, "KSampler")["inputs"]
|
||||
assert ks["steps"] == 30
|
||||
assert ks["cfg"] == 4.0
|
||||
assert ks["sampler_name"] == "dpmpp_2m"
|
||||
assert ks["scheduler"] == "karras"
|
||||
assert node_by_ct(wf, "ModelSamplingSD3")["inputs"]["shift"] == 3.5
|
||||
assert node_by_ct(wf, "SaveAudio")["inputs"]["filename_prefix"] == "audio/mio"
|
||||
|
||||
|
||||
def test_determinismo():
|
||||
a = comfyui_build_audio_workflow("c", "p", seconds=5.0, seed=7)
|
||||
b = comfyui_build_audio_workflow("c", "p", seconds=5.0, seed=7)
|
||||
assert a == b
|
||||
@@ -0,0 +1,50 @@
|
||||
"""Tests de localizacion de output para comfyui_fetch_output_audio.
|
||||
|
||||
Solo cubren la logica pura de busqueda (_is_audio_item / _find_audio_output): no
|
||||
tocan red ni disco. La descarga real via HTTP se prueba en el flujo e2e con el
|
||||
servidor ComfyUI vivo.
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
|
||||
from comfyui_fetch_output_audio import _find_audio_output, _is_audio_item
|
||||
|
||||
|
||||
def test_is_audio_item_por_extension():
|
||||
assert _is_audio_item({"filename": "comfy_audio_00001_.flac"})
|
||||
assert _is_audio_item({"filename": "x.mp3"})
|
||||
assert _is_audio_item({"filename": "x.WAV"})
|
||||
assert not _is_audio_item({"filename": "x.png"})
|
||||
assert not _is_audio_item({"filename": ""})
|
||||
|
||||
|
||||
def test_find_saveaudio_flac_bajo_audio():
|
||||
outputs = {
|
||||
"9": {"audio": [{"filename": "comfy_audio_00001_.flac",
|
||||
"subfolder": "audio", "type": "output"}]}
|
||||
}
|
||||
got = _find_audio_output(outputs)
|
||||
assert got == {"filename": "comfy_audio_00001_.flac",
|
||||
"subfolder": "audio", "type": "output"}
|
||||
|
||||
|
||||
def test_find_saveaudiomp3_bajo_audio():
|
||||
outputs = {"12": {"audio": [{"filename": "track.mp3", "subfolder": "", "type": "output"}]}}
|
||||
assert _find_audio_output(outputs)["filename"] == "track.mp3"
|
||||
|
||||
|
||||
def test_find_prioriza_clave_audio():
|
||||
# Un nodo deja un png bajo "images" y otro un flac bajo "audio": gana el audio.
|
||||
outputs = {
|
||||
"9": {"images": [{"filename": "preview.png", "subfolder": "", "type": "output"}]},
|
||||
"10": {"audio": [{"filename": "out.flac", "subfolder": "", "type": "output"}]},
|
||||
}
|
||||
assert _find_audio_output(outputs)["filename"] == "out.flac"
|
||||
|
||||
|
||||
def test_find_sin_audio_devuelve_none():
|
||||
outputs = {"9": {"images": [{"filename": "preview.png", "subfolder": "", "type": "output"}]}}
|
||||
assert _find_audio_output(outputs) is None
|
||||
assert _find_audio_output({}) is None
|
||||
Reference in New Issue
Block a user