mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
Six days after #23937 (608 fixes) the codebase had accumulated 241 new PLR6201 violations. Same mechanical `x in (...)` → `x in {...}` fix, same zero-risk profile: set lookup is O(1) vs O(n) for tuple and the two are semantically equivalent for hashable scalar membership tests. All 241 instances fixed via `ruff check --select PLR6201 --fix --unsafe-fixes`, zero remaining. Every changed value is a hashable scalar (str/int/None/enum/signal); no risk of unhashable runtime errors. No behavior change. Test plan: - 119 files changed, +244/-244 (net zero) — exactly one-line edits - `ruff check` clean afterward - Compile checks pass on the largest touched files (cli.py, run_agent.py, gateway/run.py, gateway/platforms/discord.py, model_tools.py) - Subset broad test run on tests/gateway/ tests/hermes_cli/ tests/agent/ tests/tools/: 18187 passed, 59 pre-existing failures (verified against origin/main with the same shape — identical failure count, identical category — all xdist test-order flakes unrelated to this change) Follows the same template as PR #23937 ([tracker: #23972](https://github.com/NousResearch/hermes-agent/issues/23972)).
315 lines
11 KiB
Python
Executable file
315 lines
11 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""
|
|
extract_schema.py — Analyze a ComfyUI API-format workflow and extract
|
|
controllable parameters.
|
|
|
|
Improvements over v1:
|
|
- Catalogs live in `_common.py`, shared with `check_deps.py`
|
|
- Coverage expanded for Flux / SD3 / Wan / Hunyuan / LTX / IPAdapter / rgthree
|
|
- Symmetric duplicate-name resolution: ALL duplicates get a node-id suffix
|
|
(instead of "first wins, second renamed"), so callers see consistent names
|
|
- Negative prompt detected by tracing `KSampler.negative` connections back to
|
|
the source CLIPTextEncode (more reliable than meta-title heuristic)
|
|
- Embedding references in prompt text are extracted as model dependencies
|
|
- Detects Primitive nodes that drive other nodes' inputs (and surfaces them
|
|
as the user-facing parameter)
|
|
- Reroutes are followed when tracing connections
|
|
|
|
Usage:
|
|
python3 extract_schema.py workflow_api.json
|
|
python3 extract_schema.py workflow_api.json --output schema.json
|
|
|
|
Stdlib-only. Python 3.10+.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
|
from _common import ( # noqa: E402
|
|
OUTPUT_NODES, PARAM_PATTERNS, PROMPT_FIELDS,
|
|
is_link, iter_embedding_refs, iter_model_deps, iter_nodes, unwrap_workflow,
|
|
)
|
|
|
|
|
|
# Sampler nodes whose `positive` / `negative` connections we trace
|
|
SAMPLER_NODE_FAMILY = {
|
|
"KSampler", "KSamplerAdvanced",
|
|
"SamplerCustom", "SamplerCustomAdvanced",
|
|
"BasicGuider", "CFGGuider", "DualCFGGuider",
|
|
}
|
|
|
|
|
|
def infer_type(value: Any) -> str:
|
|
if isinstance(value, bool):
|
|
return "bool"
|
|
if isinstance(value, int):
|
|
return "int"
|
|
if isinstance(value, float):
|
|
return "float"
|
|
if isinstance(value, str):
|
|
return "string"
|
|
if isinstance(value, list):
|
|
return "link"
|
|
if isinstance(value, dict):
|
|
return "object"
|
|
return "unknown"
|
|
|
|
|
|
def trace_to_node(workflow: dict, link: list, *, max_hops: int = 8) -> str | None:
|
|
"""Follow a [node_id, slot] link, hopping through Reroute / Primitive nodes
|
|
if needed, to find the *upstream* node id that holds the actual value/input.
|
|
|
|
Bounded by both `max_hops` AND a visited-set to prevent infinite loops on
|
|
pathological graphs.
|
|
"""
|
|
if not is_link(link):
|
|
return None
|
|
nid: str | None = link[0]
|
|
visited: set[str] = set()
|
|
for _ in range(max_hops):
|
|
if nid is None or nid in visited:
|
|
return nid
|
|
visited.add(nid)
|
|
node = workflow.get(nid)
|
|
if not isinstance(node, dict):
|
|
return None
|
|
cls = node.get("class_type", "")
|
|
# Reroute / Primitive / passthrough wrappers
|
|
if cls in {"Reroute", "PrimitiveNode", "Note", "easy showAnything"}:
|
|
inputs = node.get("inputs", {}) or {}
|
|
# Find first link-shaped input and follow it
|
|
next_link = next((v for v in inputs.values() if is_link(v)), None)
|
|
if next_link is None:
|
|
return nid
|
|
nid = next_link[0]
|
|
continue
|
|
return nid
|
|
return nid
|
|
|
|
|
|
def find_negative_prompt_node(workflow: dict) -> str | None:
|
|
"""Trace `negative` input of a sampler back to the source text encoder."""
|
|
for nid, node in iter_nodes(workflow):
|
|
if node["class_type"] not in SAMPLER_NODE_FAMILY:
|
|
continue
|
|
inputs = node.get("inputs", {}) or {}
|
|
neg = inputs.get("negative")
|
|
if not is_link(neg):
|
|
continue
|
|
src = trace_to_node(workflow, neg)
|
|
if src and isinstance(workflow.get(src), dict):
|
|
cls = workflow[src].get("class_type", "")
|
|
if cls.startswith("CLIPTextEncode") or cls in {"smZ CLIPTextEncode", "BNK_CLIPTextEncodeAdvanced"}:
|
|
return src
|
|
return None
|
|
|
|
|
|
def find_positive_prompt_node(workflow: dict) -> str | None:
|
|
for nid, node in iter_nodes(workflow):
|
|
if node["class_type"] not in SAMPLER_NODE_FAMILY:
|
|
continue
|
|
inputs = node.get("inputs", {}) or {}
|
|
pos = inputs.get("positive")
|
|
if not is_link(pos):
|
|
continue
|
|
src = trace_to_node(workflow, pos)
|
|
if src and isinstance(workflow.get(src), dict):
|
|
cls = workflow[src].get("class_type", "")
|
|
if cls.startswith("CLIPTextEncode") or cls in {"smZ CLIPTextEncode", "BNK_CLIPTextEncodeAdvanced"}:
|
|
return src
|
|
return None
|
|
|
|
|
|
def extract_schema(workflow: dict) -> dict:
|
|
"""Extract controllable parameters from a workflow.
|
|
|
|
Returns:
|
|
{
|
|
"parameters": { friendly_name: {node_id, field, type, value, ...} },
|
|
"output_nodes": [node_id, ...],
|
|
"model_dependencies": [{node_id, class_type, field, value, folder}],
|
|
"embedding_dependencies": [{node_id, embedding_name, found_in_field, value_excerpt}],
|
|
"summary": {...}
|
|
}
|
|
"""
|
|
output_nodes: list[str] = []
|
|
|
|
# First pass: identify positive / negative prompt nodes via connection tracing
|
|
pos_node = find_positive_prompt_node(workflow)
|
|
neg_node = find_negative_prompt_node(workflow)
|
|
|
|
# ----- collect raw parameter candidates -----
|
|
# Each candidate = (friendly_name, node_id, field, value)
|
|
# We resolve duplicate friendly_names AFTER the loop so dedup is symmetric.
|
|
raw_params: list[dict] = []
|
|
|
|
for node_id, node in iter_nodes(workflow):
|
|
cls = node["class_type"]
|
|
inputs = node.get("inputs", {}) or {}
|
|
|
|
if cls in OUTPUT_NODES:
|
|
output_nodes.append(node_id)
|
|
|
|
# Match this node against PARAM_PATTERNS
|
|
for p_class, p_field, friendly in PARAM_PATTERNS:
|
|
if cls != p_class:
|
|
continue
|
|
if p_field not in inputs:
|
|
continue
|
|
value = inputs[p_field]
|
|
t = infer_type(value)
|
|
if t == "link":
|
|
continue # connections aren't directly controllable
|
|
|
|
actual_name = friendly
|
|
|
|
# Disambiguate prompt vs negative_prompt by connection tracing
|
|
if friendly == "prompt":
|
|
if node_id == neg_node and pos_node != neg_node:
|
|
actual_name = "negative_prompt"
|
|
elif node_id == pos_node:
|
|
actual_name = "prompt"
|
|
else:
|
|
# Fallback: use _meta.title hints if present
|
|
meta_title = (node.get("_meta") or {}).get("title", "").lower()
|
|
if any(t_ in meta_title for t_ in ("negative", "neg", "-prompt", "anti")):
|
|
actual_name = "negative_prompt"
|
|
|
|
raw_params.append({
|
|
"name_hint": actual_name,
|
|
"node_id": node_id,
|
|
"field": p_field,
|
|
"type": t,
|
|
"value": value,
|
|
"class_type": cls,
|
|
})
|
|
|
|
# ----- symmetric duplicate-name resolution -----
|
|
# Group by name_hint. If a hint appears once, keep it. If multiple, suffix
|
|
# ALL with their node_id. Always-stable, always-uniquely-addressable.
|
|
by_name: dict[str, list[dict]] = {}
|
|
for r in raw_params:
|
|
by_name.setdefault(r["name_hint"], []).append(r)
|
|
|
|
parameters: dict[str, dict] = {}
|
|
for name, entries in by_name.items():
|
|
if len(entries) == 1:
|
|
r = entries[0]
|
|
parameters[name] = {
|
|
"node_id": r["node_id"], "field": r["field"],
|
|
"type": r["type"], "value": r["value"],
|
|
"class_type": r["class_type"],
|
|
}
|
|
else:
|
|
# Sort by node_id (string-natural) for stability
|
|
entries.sort(key=lambda x: (str(x["node_id"]).zfill(8), x["field"]))
|
|
for r in entries:
|
|
full_name = f"{name}_{r['node_id']}"
|
|
parameters[full_name] = {
|
|
"node_id": r["node_id"], "field": r["field"],
|
|
"type": r["type"], "value": r["value"],
|
|
"class_type": r["class_type"],
|
|
"alias_of": name,
|
|
}
|
|
|
|
# ----- model dependencies -----
|
|
model_deps = list(iter_model_deps(workflow))
|
|
|
|
# ----- embedding dependencies (in prompt text) -----
|
|
embedding_deps: list[dict] = []
|
|
seen_emb: set[tuple[str, str]] = set()
|
|
for nid, emb_name in iter_embedding_refs(workflow):
|
|
key = (nid, emb_name)
|
|
if key in seen_emb:
|
|
continue
|
|
seen_emb.add(key)
|
|
# Find which field had the reference, for context
|
|
node = workflow.get(nid, {})
|
|
inputs = node.get("inputs", {}) or {}
|
|
found_field = None
|
|
excerpt = None
|
|
for fname, fval in inputs.items():
|
|
if isinstance(fval, str) and fname in PROMPT_FIELDS and emb_name in fval:
|
|
found_field = fname
|
|
excerpt = fval[:120]
|
|
break
|
|
embedding_deps.append({
|
|
"node_id": nid,
|
|
"embedding_name": emb_name,
|
|
"field": found_field,
|
|
"value_excerpt": excerpt,
|
|
"folder": "embeddings",
|
|
})
|
|
|
|
# ----- summary -----
|
|
summary = {
|
|
"parameter_count": len(parameters),
|
|
"output_node_count": len(output_nodes),
|
|
"model_dep_count": len(model_deps),
|
|
"embedding_dep_count": len(embedding_deps),
|
|
"has_negative_prompt": "negative_prompt" in parameters,
|
|
"has_seed": "seed" in parameters or any(p.startswith("seed_") for p in parameters),
|
|
"is_video_workflow": any(
|
|
workflow.get(n, {}).get("class_type", "") in {
|
|
"VHS_VideoCombine", "SaveVideo", "SaveAnimatedWEBP", "SaveAnimatedPNG",
|
|
} for n in output_nodes
|
|
),
|
|
}
|
|
|
|
return {
|
|
"parameters": parameters,
|
|
"output_nodes": output_nodes,
|
|
"model_dependencies": model_deps,
|
|
"embedding_dependencies": embedding_deps,
|
|
"summary": summary,
|
|
}
|
|
|
|
|
|
def main(argv: list[str] | None = None) -> int:
|
|
p = argparse.ArgumentParser(description="Extract controllable parameters from a ComfyUI workflow")
|
|
p.add_argument("workflow", help="Path to workflow API JSON file")
|
|
p.add_argument("--output", "-o", help="Output file (default: stdout)")
|
|
p.add_argument("--summary-only", action="store_true",
|
|
help="Only print the summary block")
|
|
args = p.parse_args(argv)
|
|
|
|
wf_path = Path(args.workflow).expanduser()
|
|
if not wf_path.exists():
|
|
print(f"Error: {wf_path} not found", file=sys.stderr)
|
|
return 1
|
|
|
|
try:
|
|
with wf_path.open() as f:
|
|
payload = json.load(f)
|
|
workflow = unwrap_workflow(payload)
|
|
except ValueError as e:
|
|
print(f"Error: {e}", file=sys.stderr)
|
|
return 1
|
|
except json.JSONDecodeError as e:
|
|
print(f"Error: invalid JSON — {e}", file=sys.stderr)
|
|
return 1
|
|
|
|
schema = extract_schema(workflow)
|
|
|
|
if args.summary_only:
|
|
out = json.dumps(schema["summary"], indent=2)
|
|
else:
|
|
out = json.dumps(schema, indent=2, default=str)
|
|
|
|
if args.output:
|
|
Path(args.output).write_text(out)
|
|
print(f"Schema written to {args.output}", file=sys.stderr)
|
|
else:
|
|
print(out)
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|