feat(ml): auto-commit con 7 cambios

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-24 01:16:37 +02:00
parent db4f454f8a
commit 1311c7e585
7 changed files with 967 additions and 1 deletions
@@ -0,0 +1,326 @@
"""Descarga un workflow ComfyUI desde CUALQUIER fuente y lo normaliza a API format.
Dispatcher: detecta el tipo de fuente por la URL/patron y delega la descarga, luego
normaliza el resultado a API format reusando las dos funciones de import del registry
(no reescribe la conversion):
- Google Drive (drive.google.com/.../d/<id> o uc?id=) -> gdown (si esta) o
descarga directa uc?export=download -> import_workflow_json | import_workflow_png
- GitHub (github.com/.../blob/... o raw.githubusercontent.com) -> raw URL del
.json/.png -> import_workflow_json | import_workflow_png
- Civitai (civitai.com/api/download/... o pagina /models/<id>) -> resuelve el
downloadUrl via API REST, descarga el archivo (zip o json) -> import
- HuggingFace (huggingface.co/datasets/.../resolve/...) -> import_workflow_json
- URL directa .json/.png/.webp o path local -> import segun extension
El resultado SIEMPRE es API format (dict {node_id: {class_type, inputs}}), listo para
comfyui_validate_workflow + comfyui_submit_workflow.
Compone comfyui_import_workflow_json + comfyui_import_workflow_png. Impura: red
(HTTP GET / gdown), descompresion de zip y lectura/escritura de disco. Solo stdlib
(urllib, json, zipfile, tempfile, re) + gdown opcional para Drive.
"""
import json
import os
import re
import sys
import tempfile
import urllib.error
import urllib.parse
import urllib.request
import zipfile
_THIS_DIR = os.path.dirname(os.path.abspath(__file__))
if _THIS_DIR not in sys.path:
sys.path.insert(0, _THIS_DIR)
from comfyui_import_workflow_json import comfyui_import_workflow_json # noqa: E402
from comfyui_import_workflow_png import comfyui_import_workflow_png # noqa: E402
_UA = "Mozilla/5.0 (fn_registry comfyui_download_workflow)"
def comfyui_download_workflow(
source: str,
dest: str | None = None,
*,
server: str = "127.0.0.1:8188",
civitai_token: str | None = None,
hf_token: str | None = None,
timeout: float = 30.0,
) -> dict:
"""Descarga un workflow de ComfyUI de cualquier fuente y lo normaliza a API format.
Args:
source: URL (Google Drive, GitHub, Civitai, HuggingFace, o directa a
.json/.png/.webp) o ruta de un archivo local.
dest: ruta local donde guardar el archivo descargado. Si None, se usa un
archivo temporal (que se conserva para trazabilidad y se reporta en
'path'). Para fuentes locales no se copia: 'path' = source.
server: host:port de ComfyUI, usado SOLO para mapear widgets cuando la
fuente viene en formato UI graph (lo pasa a import_workflow_json).
keyword-only.
civitai_token: token de Civitai (Bearer) para descargas restringidas/gated.
keyword-only.
hf_token: token de HuggingFace (Bearer) para datasets privados. keyword-only.
timeout: timeout HTTP en segundos. keyword-only.
Returns:
dict {ok, workflow, source_type, path, format_in, error}:
- workflow: dict en API format (vacio si ok=False).
- source_type: 'drive' | 'github' | 'civitai' | 'huggingface' |
'direct' | 'local'.
- path: ruta local del archivo descargado (o source si era local).
- format_in: formato de origen detectado ('api', 'ui_graph',
'png-prompt', 'png-workflow', 'zip').
Nunca lanza: cualquier fallo de red/IO devuelve ok=False con error.
"""
source_type = _detect_source_type(source)
try:
if source_type == "local":
local_path = source
if not os.path.exists(local_path):
return _err(source_type, f"no existe el archivo local {source!r}")
elif source_type == "drive":
local_path = _download_drive(source, dest, timeout)
elif source_type == "civitai":
local_path = _download_civitai(source, dest, civitai_token, timeout)
else: # github | huggingface | direct
url = _to_raw_url(source) if source_type == "github" else source
token = hf_token if source_type == "huggingface" else None
local_path = _download_url(url, dest, token, timeout)
except _DownloadError as exc:
return _err(source_type, str(exc))
except (urllib.error.URLError, OSError) as exc:
return _err(source_type, f"fallo de descarga: {exc}")
# Si bajamos un zip (tipico de Civitai), extraer el primer workflow de dentro.
if local_path.lower().endswith(".zip"):
try:
inner, fmt_hint = _extract_from_zip(local_path)
except _DownloadError as exc:
return _err(source_type, str(exc), path=local_path, fmt="zip")
norm = _normalize(inner, server, timeout)
norm["format_in"] = "zip"
norm["source_type"] = source_type
norm["path"] = local_path
return norm
norm = _normalize(local_path, server, timeout)
norm["source_type"] = source_type
norm["path"] = local_path
return norm
# --------------------------------------------------------------------------- #
# Deteccion + resolucion de URLs
# --------------------------------------------------------------------------- #
def _detect_source_type(source: str) -> str:
if not source.startswith(("http://", "https://")):
return "local"
host = urllib.parse.urlparse(source).netloc.lower()
if "drive.google.com" in host or "docs.google.com" in host:
return "drive"
if "civitai.com" in host:
return "civitai"
if "github.com" in host or "githubusercontent.com" in host:
return "github"
if "huggingface.co" in host:
return "huggingface"
return "direct"
def _to_raw_url(github_url: str) -> str:
"""Convierte una URL github.com/.../blob/<branch>/<path> a raw.githubusercontent.com."""
if "raw.githubusercontent.com" in github_url or "/raw/" in github_url:
return github_url
m = re.match(
r"https://github\.com/([^/]+)/([^/]+)/blob/(.+)$", github_url
)
if m:
user, repo, rest = m.groups()
return f"https://raw.githubusercontent.com/{user}/{repo}/{rest}"
return github_url # ya es raw o un patron no-blob: usar tal cual
def _drive_id(url: str) -> str | None:
m = re.search(r"/d/([A-Za-z0-9_-]+)", url) or re.search(r"[?&]id=([A-Za-z0-9_-]+)", url)
return m.group(1) if m else None
# --------------------------------------------------------------------------- #
# Descargas por fuente
# --------------------------------------------------------------------------- #
def _http_bytes(url: str, token: str | None, timeout: float) -> bytes:
req = urllib.request.Request(url, headers={"User-Agent": _UA})
if token:
req.add_header("Authorization", f"Bearer {token}")
with urllib.request.urlopen(req, timeout=timeout) as resp:
return resp.read()
def _ext_from(url_or_name: str, content: bytes) -> str:
low = url_or_name.lower().split("?")[0]
for ext in (".json", ".png", ".webp", ".zip"):
if low.endswith(ext):
return ext
if content[:8] == b"\x89PNG\r\n\x1a\n":
return ".png"
if content[:4] == b"PK\x03\x04":
return ".zip"
if content[:4] == b"RIFF" and content[8:12] == b"WEBP":
return ".webp"
return ".json"
def _save(content: bytes, dest: str | None, ext: str) -> str:
if dest:
os.makedirs(os.path.dirname(os.path.abspath(dest)) or ".", exist_ok=True)
path = dest
else:
fd, path = tempfile.mkstemp(prefix="comfy_wf_", suffix=ext)
os.close(fd)
with open(path, "wb") as f:
f.write(content)
return path
def _download_url(url: str, dest: str | None, token: str | None, timeout: float) -> str:
content = _http_bytes(url, token, timeout)
if content[:15].lstrip().startswith(b"<!DOCTYPE") or content[:6].lstrip().startswith(b"<html"):
raise _DownloadError(
f"la respuesta de {url!r} es HTML, no un workflow (gated/login o URL de pagina, no raw)"
)
return _save(content, dest, _ext_from(url, content))
def _download_drive(source: str, dest: str | None, timeout: float) -> str:
file_id = _drive_id(source)
if not file_id:
raise _DownloadError(f"no se pudo extraer el file id de Drive de {source!r}")
# Camino 1: gdown (maneja el warning de virus-scan de archivos grandes).
try:
import gdown # type: ignore
out = dest or tempfile.mkstemp(prefix="comfy_wf_", suffix=".bin")[1]
got = gdown.download(id=file_id, output=out, quiet=True)
if got and os.path.exists(out) and os.path.getsize(out) > 0:
return _retype_by_content(out)
raise _DownloadError("gdown no devolvio archivo")
except ImportError:
pass # sin gdown: fallback urllib
# Camino 2: descarga directa (sirve para archivos pequenos como un .json de workflow).
url = f"https://drive.google.com/uc?export=download&id={file_id}"
content = _http_bytes(url, None, timeout)
if content[:15].lstrip().startswith(b"<!DOCTYPE") or content[:6].lstrip().startswith(b"<html"):
raise _DownloadError(
"Drive devolvio HTML (archivo grande con aviso de virus-scan o gated). "
"Instala gdown (pip install gdown) para este archivo."
)
return _save(content, dest, _ext_from(source, content))
def _retype_by_content(path: str) -> str:
"""Renombra un archivo .bin descargado a su extension real segun cabecera."""
with open(path, "rb") as f:
head = f.read(16)
ext = _ext_from(path, head)
if path.lower().endswith(ext):
return path
new = os.path.splitext(path)[0] + ext
os.replace(path, new)
return new
def _download_civitai(source: str, dest: str | None, token: str | None, timeout: float) -> str:
download_url = source
# Pagina de modelo civitai.com/models/<id> -> resolver el primer file via API v1.
m = re.search(r"civitai\.com/models/(\d+)", source)
if m and "/api/download/" not in source:
api = f"https://civitai.com/api/v1/models/{m.group(1)}"
meta = json.loads(_http_bytes(api, token, timeout))
versions = meta.get("modelVersions") or []
files = (versions[0].get("files") if versions else None) or []
if not files:
raise _DownloadError(f"el modelo Civitai {m.group(1)} no expone archivos descargables")
download_url = files[0].get("downloadUrl") or ""
if not download_url:
raise _DownloadError("Civitai no devolvio downloadUrl para el modelo")
content = _http_bytes(download_url, token, timeout)
if content[:15].lstrip().startswith(b"<!DOCTYPE") or content[:6].lstrip().startswith(b"<html"):
raise _DownloadError(
"Civitai devolvio HTML (requiere login/token o el workflow es early-access). "
"Pasa civitai_token."
)
return _save(content, dest, _ext_from(download_url, content))
def _extract_from_zip(zip_path: str) -> tuple[str, str]:
"""Extrae el primer .json/.png de un zip a un tmp y devuelve (ruta, hint)."""
with zipfile.ZipFile(zip_path) as zf:
names = [n for n in zf.namelist() if n.lower().endswith((".json", ".png", ".webp"))]
if not names:
raise _DownloadError(f"el zip {zip_path!r} no contiene .json ni .png de workflow")
name = names[0]
data = zf.read(name)
ext = os.path.splitext(name)[1].lower()
fd, out = tempfile.mkstemp(prefix="comfy_wf_zip_", suffix=ext)
os.close(fd)
with open(out, "wb") as f:
f.write(data)
return out, ext
# --------------------------------------------------------------------------- #
# Normalizacion a API format (reusa las funciones de import del registry)
# --------------------------------------------------------------------------- #
def _normalize(path: str, server: str, timeout: float) -> dict:
low = path.lower()
if low.endswith((".png", ".webp")):
res = comfyui_import_workflow_png(path, timeout=timeout)
if not res.get("ok"):
return {"ok": False, "workflow": {}, "format_in": "",
"error": res.get("error", "PNG sin workflow embebido")}
# Preferir el chunk 'prompt' (API format). Si solo hay UI graph, normalizarlo.
if res.get("prompt"):
return {"ok": True, "workflow": res["prompt"], "format_in": "png-prompt", "error": ""}
ui = res.get("workflow") or {}
if ui:
tmp = _dump_tmp_json(ui)
j = comfyui_import_workflow_json(tmp, server=server, timeout=timeout)
return {"ok": j.get("ok", False), "workflow": j.get("workflow", {}),
"format_in": "png-workflow", "error": j.get("error", "")}
return {"ok": False, "workflow": {}, "format_in": "",
"error": "PNG sin chunk prompt ni workflow"}
# .json / sin extension -> import_workflow_json (passthrough API o normaliza UI)
res = comfyui_import_workflow_json(path, server=server, timeout=timeout)
fmt = res.get("format_detected", "")
return {"ok": res.get("ok", False), "workflow": res.get("workflow", {}),
"format_in": fmt, "error": res.get("error", "")}
def _dump_tmp_json(obj: dict) -> str:
fd, tmp = tempfile.mkstemp(prefix="comfy_wf_ui_", suffix=".json")
with os.fdopen(fd, "w") as f:
json.dump(obj, f)
return tmp
def _err(source_type: str, msg: str, *, path: str = "", fmt: str = "") -> dict:
return {"ok": False, "workflow": {}, "source_type": source_type,
"path": path, "format_in": fmt, "error": msg}
class _DownloadError(Exception):
"""Error de descarga interno, traducido a {ok: False, error} en la salida."""
if __name__ == "__main__":
# Smoke: baja un workflow real de cubiq (Apache-2.0) desde GitHub raw.
url = (
"https://raw.githubusercontent.com/cubiq/ComfyUI_Workflows/"
"main/ComfyUI_Simple/SDXL_simple.json"
)
out = comfyui_download_workflow(url)
print(json.dumps({k: v for k, v in out.items() if k != "workflow"}, indent=2))
print("nodos:", len(out.get("workflow", {})))