mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-03 02:11:48 +00:00
fix(skills/comfyui): bug fixes, cloud parity, expanded coverage, examples, tests
The audit of v4.1 surfaced ~70 issues across the five scripts and three
reference docs — most user-visible (silent file overwrites, status-error
misclassified as success, X-API-Key leaked to S3 on /api/view redirect,
Cloud endpoints that 404 because they were renamed). v5.0.0 fixes those
and fills the gaps that previously forced users to write their own glue
(WebSocket monitoring, batch/sweep, img2img upload helper, dep auto-fix,
log fetch, health check, example workflows).
Critical fixes
- run_workflow.py: poll_status now checks status_str==error BEFORE
completed:true, so a failed run no longer reports success
- run_workflow.py: download_output streams to disk via safe_path_join,
preserves server subfolder structure (no silent overwrites), and
retries with exponential backoff
- run_workflow.py: refuses to overwrite a link with a literal in
inject_params (would silently break wiring)
- _common.py: _StripSensitiveOnRedirectSession (subclasses
requests.Session.rebuild_auth) drops X-API-Key/Cookie on cross-host
redirects — fixes a real key-leak path through Cloud's signed-URL
download flow. Tested
- Cloud routing (verified live): /history → /history_v2,
/models/<f> → /experiment/models/<f>, plus folder aliases for the
unet ↔ diffusion_models and clip ↔ text_encoders rename
- check_deps.py: distinguishes 200/empty vs 404 folder_not_found vs
403 free-tier; emits concrete fix_command per missing dep
- extract_schema.py: prompt vs negative_prompt determined by tracing
KSampler.{positive,negative} connections (incl. through Reroute /
Primitive nodes) instead of meta-title heuristic; symmetric
duplicate-name resolution; cycle-safe trace_to_node
- hardware_check.py: multi-GPU pick-best, Apple variant detection,
Rosetta detection, WSL2, ROCm --json, disk-space check, optional
PyTorch probe; powershell preferred over deprecated wmic
- comfyui_setup.sh: prefers pipx → uvx → pip --user (with PEP-668
fallback); idempotent — skips relaunch if server already up;
configurable port/workspace; persistent log; SIGINT trap
New scripts
- run_batch.py — count or sweep (cartesian product), parallel up to
cloud tier limit
- ws_monitor.py — real-time WebSocket viewer; saves preview frames
- auto_fix_deps.py — runs comfy node install / model download for
whatever check_deps reports missing (with --dry-run)
- health_check.py — single command that runs the verification checklist
(comfy-cli + server + checkpoints + optional smoke test that cancels
itself to avoid burning compute)
- fetch_logs.py — pull traceback / status messages for a prompt_id
Coverage expansion
- Param patterns now cover Flux (BasicScheduler, BasicGuider,
RandomNoise, ModelSamplingFlux), SD3, Wan/Hunyuan/LTX video,
IPAdapter, rgthree, easy-use, AnimateDiff
- Embedding refs in CLIPTextEncode strings extracted as model deps
- ckpt_name / vae_name / lora_name / unet_name now controllable so
workflows can be retargeted per run
Examples
- workflows/{sd15,sdxl,flux_dev}_txt2img.json
- workflows/sdxl_{img2img,inpaint}.json
- workflows/upscale_4x.json
- workflows/{animatediff_video,wan_video_t2v}.json + README
Tests
- 117 tests (105 unit + 8 cloud integration + 4 cross-host security)
- Cloud tests auto-skip without COMFY_CLOUD_API_KEY; verified end-to-end
against live cloud API
Backwards compatibility
- All existing CLI flags continue to work; new behavior is opt-in
(--ws, --input-image, --randomize-seed, --flat-output, etc.)
This commit is contained in:
parent
7d48a16f14
commit
a7780fe05f
32 changed files with 6117 additions and 1372 deletions
223
skills/creative/comfyui/scripts/health_check.py
Executable file
223
skills/creative/comfyui/scripts/health_check.py
Executable file
|
|
@ -0,0 +1,223 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
health_check.py — One-stop verification that the ComfyUI environment is ready.
|
||||
|
||||
Runs through the verification checklist:
|
||||
1. comfy-cli on PATH
|
||||
2. server reachable (/system_stats)
|
||||
3. at least one checkpoint installed
|
||||
4. (optional) a specific workflow's deps are met
|
||||
5. (optional) actually submit a tiny test workflow and verify round-trip
|
||||
|
||||
Usage:
|
||||
python3 health_check.py
|
||||
python3 health_check.py --host https://cloud.comfy.org
|
||||
python3 health_check.py --workflow my.json
|
||||
python3 health_check.py --smoke-test # actually submit a tiny workflow
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import shutil
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
||||
from _common import ( # noqa: E402
|
||||
DEFAULT_LOCAL_HOST, ENV_API_KEY, emit_json, http_get, parse_model_list,
|
||||
resolve_api_key, resolve_url, unwrap_workflow,
|
||||
)
|
||||
|
||||
|
||||
def comfy_cli_status() -> dict:
|
||||
if shutil.which("comfy"):
|
||||
return {"available": True, "method": "comfy", "path": shutil.which("comfy")}
|
||||
if shutil.which("uvx"):
|
||||
return {"available": True, "method": "uvx",
|
||||
"hint": "Invoke as `uvx --from comfy-cli comfy ...`"}
|
||||
return {
|
||||
"available": False,
|
||||
"hint": "Install with: pipx install comfy-cli (or `pip install comfy-cli`)",
|
||||
}
|
||||
|
||||
|
||||
def server_status(host: str, headers: dict) -> dict:
|
||||
url = resolve_url(host, "/system_stats")
|
||||
try:
|
||||
r = http_get(url, headers=headers, retries=2, timeout=10)
|
||||
if r.status == 200:
|
||||
try:
|
||||
stats = r.json() or {}
|
||||
except Exception:
|
||||
stats = {}
|
||||
return {"reachable": True, "url": url, "stats": stats}
|
||||
return {"reachable": False, "url": url, "http_status": r.status, "body": r.text()[:200]}
|
||||
except Exception as e:
|
||||
return {"reachable": False, "url": url, "error": str(e)}
|
||||
|
||||
|
||||
def checkpoint_status(host: str, headers: dict) -> dict:
|
||||
url = resolve_url(host, "/models/checkpoints")
|
||||
try:
|
||||
r = http_get(url, headers=headers, retries=2, timeout=15)
|
||||
except Exception as e:
|
||||
return {"queryable": False, "error": str(e)}
|
||||
if r.status != 200:
|
||||
return {"queryable": False, "http_status": r.status, "url": url, "body": r.text()[:200]}
|
||||
try:
|
||||
models = parse_model_list(r.json())
|
||||
except Exception:
|
||||
models = set()
|
||||
return {"queryable": True, "count": len(models),
|
||||
"first_few": sorted(models)[:5]}
|
||||
|
||||
|
||||
SMOKE_WORKFLOW = {
|
||||
# Minimal SD1.5 workflow that doesn't depend on rare nodes.
|
||||
# 256x256 + 1 step is the smallest config that doesn't trigger SDXL/Flux
|
||||
# validation errors while still executing fast.
|
||||
"3": {
|
||||
"class_type": "KSampler",
|
||||
"inputs": {
|
||||
"seed": 1, "steps": 1, "cfg": 7.0,
|
||||
"sampler_name": "euler", "scheduler": "normal", "denoise": 1.0,
|
||||
"model": ["4", 0], "positive": ["6", 0], "negative": ["7", 0],
|
||||
"latent_image": ["5", 0],
|
||||
},
|
||||
},
|
||||
"4": {"class_type": "CheckpointLoaderSimple",
|
||||
"inputs": {"ckpt_name": "REPLACE_ME"}},
|
||||
"5": {"class_type": "EmptyLatentImage",
|
||||
"inputs": {"width": 256, "height": 256, "batch_size": 1}},
|
||||
"6": {"class_type": "CLIPTextEncode",
|
||||
"inputs": {"text": "test", "clip": ["4", 1]}},
|
||||
"7": {"class_type": "CLIPTextEncode",
|
||||
"inputs": {"text": "", "clip": ["4", 1]}},
|
||||
"9": {"class_type": "SaveImage",
|
||||
"inputs": {"filename_prefix": "smoke", "images": ["3", 0]}},
|
||||
}
|
||||
|
||||
|
||||
def smoke_test(host: str, headers: dict, ckpt_name: str | None) -> dict:
|
||||
"""Submit a tiny workflow and verify the server accepts it.
|
||||
|
||||
Cancels the job immediately after acceptance so we don't burn GPU
|
||||
time / cloud minutes on a smoke test.
|
||||
"""
|
||||
if not ckpt_name:
|
||||
return {"ran": False, "reason": "no checkpoint available"}
|
||||
wf = json.loads(json.dumps(SMOKE_WORKFLOW))
|
||||
wf["4"]["inputs"]["ckpt_name"] = ckpt_name
|
||||
|
||||
# Lazy import to avoid circular issues
|
||||
from run_workflow import ComfyRunner
|
||||
api_key = headers.get("X-API-Key")
|
||||
runner = ComfyRunner(host=host, api_key=api_key)
|
||||
sub = runner.submit(wf)
|
||||
if "_http_error" in sub:
|
||||
return {"ran": True, "submitted": False,
|
||||
"http_status": sub["_http_error"], "body": sub.get("body")}
|
||||
pid = sub.get("prompt_id")
|
||||
if not pid:
|
||||
return {"ran": True, "submitted": False, "response": sub}
|
||||
|
||||
# Cancel so we don't actually waste compute on the smoke test.
|
||||
cancelled = False
|
||||
try:
|
||||
cancelled = runner.cancel(pid)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return {
|
||||
"ran": True, "submitted": True, "prompt_id": pid,
|
||||
"cancelled_after_submit": cancelled,
|
||||
"note": "Submission accepted; cancelled to avoid running the full pipeline.",
|
||||
}
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
p = argparse.ArgumentParser(description="One-stop ComfyUI health check")
|
||||
p.add_argument("--host", default=DEFAULT_LOCAL_HOST)
|
||||
p.add_argument("--api-key", help=f"or set ${ENV_API_KEY}")
|
||||
p.add_argument("--workflow", help="Optional: also run check_deps on this workflow")
|
||||
p.add_argument("--smoke-test", action="store_true",
|
||||
help="Submit a tiny test workflow and verify round-trip")
|
||||
p.add_argument("--strict", action="store_true",
|
||||
help="Exit non-zero on any non-pass condition (including warnings)")
|
||||
args = p.parse_args(argv)
|
||||
|
||||
api_key = resolve_api_key(args.api_key)
|
||||
headers = {"X-API-Key": api_key} if api_key else {}
|
||||
|
||||
cli = comfy_cli_status()
|
||||
server = server_status(args.host, headers)
|
||||
ckpts = checkpoint_status(args.host, headers) if server.get("reachable") else None
|
||||
|
||||
# ---- workflow check ----
|
||||
workflow_check: dict | None = None
|
||||
if args.workflow:
|
||||
wf_path = Path(args.workflow).expanduser()
|
||||
if not wf_path.exists():
|
||||
workflow_check = {"error": "workflow file not found"}
|
||||
else:
|
||||
try:
|
||||
with wf_path.open() as f:
|
||||
workflow = unwrap_workflow(json.load(f))
|
||||
from check_deps import check_deps
|
||||
workflow_check = check_deps(workflow, host=args.host, api_key=api_key)
|
||||
except (ValueError, json.JSONDecodeError) as e:
|
||||
workflow_check = {"error": str(e)}
|
||||
|
||||
smoke = None
|
||||
if args.smoke_test and server.get("reachable"):
|
||||
first_ckpt = ckpts["first_few"][0] if ckpts and ckpts.get("first_few") else None
|
||||
smoke = smoke_test(args.host, headers, first_ckpt)
|
||||
|
||||
# ---- verdict ----
|
||||
verdict = "pass"
|
||||
reasons: list[str] = []
|
||||
if not server.get("reachable"):
|
||||
verdict = "fail"
|
||||
reasons.append("server unreachable")
|
||||
if ckpts and ckpts.get("queryable") and ckpts.get("count", 0) == 0:
|
||||
verdict = "warn" if verdict == "pass" else verdict
|
||||
reasons.append("no checkpoints installed")
|
||||
if workflow_check and workflow_check.get("error"):
|
||||
verdict = "fail"
|
||||
reasons.append(f"workflow check failed: {workflow_check['error']}")
|
||||
elif workflow_check and not workflow_check.get("is_ready"):
|
||||
if workflow_check.get("node_check_skipped"):
|
||||
reasons.append("node check skipped (cloud free tier)")
|
||||
else:
|
||||
verdict = "fail"
|
||||
reasons.append("workflow has missing deps")
|
||||
if smoke and smoke.get("ran") and not smoke.get("submitted"):
|
||||
verdict = "fail"
|
||||
reasons.append("smoke-test submission failed")
|
||||
if not cli.get("available"):
|
||||
verdict = "warn" if verdict == "pass" else verdict
|
||||
reasons.append("comfy-cli not on PATH (lifecycle commands won't work)")
|
||||
|
||||
report = {
|
||||
"verdict": verdict,
|
||||
"reasons": reasons,
|
||||
"host": args.host,
|
||||
"comfy_cli": cli,
|
||||
"server": server,
|
||||
"checkpoints": ckpts,
|
||||
"workflow_check": workflow_check,
|
||||
"smoke_test": smoke,
|
||||
}
|
||||
emit_json(report)
|
||||
|
||||
if verdict == "pass":
|
||||
return 0
|
||||
if verdict == "warn":
|
||||
return 1 if args.strict else 0
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Loading…
Add table
Add a link
Reference in a new issue