diff --git a/Dockerfile b/Dockerfile
index 37038233262..0d3da72eb77 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -21,26 +21,36 @@ RUN useradd -u 10000 -m -d /opt/data hermes
COPY --chmod=0755 --from=gosu_source /gosu /usr/local/bin/
COPY --chmod=0755 --from=uv_source /usr/local/bin/uv /usr/local/bin/uvx /usr/local/bin/
-COPY . /opt/hermes
WORKDIR /opt/hermes
-# Install Node dependencies and Playwright as root (--with-deps needs apt)
+# ---------- Layer-cached dependency install ----------
+# Copy only package manifests first so npm install + Playwright are cached
+# unless the lockfiles themselves change.
+COPY package.json package-lock.json ./
+COPY scripts/whatsapp-bridge/package.json scripts/whatsapp-bridge/package-lock.json scripts/whatsapp-bridge/
+COPY web/package.json web/package-lock.json web/
+
RUN npm install --prefer-offline --no-audit && \
npx playwright install --with-deps chromium --only-shell && \
- cd /opt/hermes/scripts/whatsapp-bridge && \
- npm install --prefer-offline --no-audit && \
+ (cd scripts/whatsapp-bridge && npm install --prefer-offline --no-audit) && \
+ (cd web && npm install --prefer-offline --no-audit) && \
npm cache clean --force
-# Hand ownership to hermes user, then install Python deps in a virtualenv
-RUN chown -R hermes:hermes /opt/hermes
-USER hermes
+# ---------- Source code ----------
+# .dockerignore excludes node_modules, so the installs above survive.
+COPY --chown=hermes:hermes . .
+# Build web dashboard (Vite outputs to hermes_cli/web_dist/)
+RUN cd web && npm run build
+
+# ---------- Python virtualenv ----------
+RUN chown hermes:hermes /opt/hermes
+USER hermes
RUN uv venv && \
uv pip install --no-cache-dir -e ".[all]"
-USER root
-RUN chmod +x /opt/hermes/docker/entrypoint.sh
-
+# ---------- Runtime ----------
+ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
ENV HERMES_HOME=/opt/data
VOLUME [ "/opt/data" ]
ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ]
diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 8adf080e31d..19bde946ee3 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -99,11 +99,48 @@ _FIXED_TEMPERATURE_MODELS: Dict[str, float] = {
"kimi-for-coding": 0.6,
}
+# Moonshot's kimi-for-coding endpoint (api.kimi.com/coding) documents:
+# "k2.5 model will use a fixed value 1.0, non-thinking mode will use a fixed
+# value 0.6. Any other value will result in an error." The same lock applies
+# to the other k2.* models served on that endpoint. Enumerated explicitly so
+# non-coding siblings like `kimi-k2-instruct` (variable temperature, served on
+# the standard chat API and third parties) are NOT clamped.
+# Source: https://platform.kimi.ai/docs/guide/kimi-k2-5-quickstart
+_KIMI_INSTANT_MODELS: frozenset = frozenset({
+ "kimi-k2.5",
+ "kimi-k2-turbo-preview",
+ "kimi-k2-0905-preview",
+})
+_KIMI_THINKING_MODELS: frozenset = frozenset({
+ "kimi-k2-thinking",
+ "kimi-k2-thinking-turbo",
+})
+
def _fixed_temperature_for_model(model: Optional[str]) -> Optional[float]:
- """Return a required temperature override for models with strict contracts."""
+ """Return a required temperature override for models with strict contracts.
+
+ Moonshot's kimi-for-coding endpoint rejects any non-approved temperature on
+ the k2.5 family. Non-thinking variants require exactly 0.6; thinking
+ variants require 1.0. An optional ``vendor/`` prefix (e.g.
+ ``moonshotai/kimi-k2.5``) is tolerated for aggregator routings.
+
+ Returns ``None`` for every other model, including ``kimi-k2-instruct*``
+ which is the separate non-coding K2 family with variable temperature.
+ """
normalized = (model or "").strip().lower()
- return _FIXED_TEMPERATURE_MODELS.get(normalized)
+ fixed = _FIXED_TEMPERATURE_MODELS.get(normalized)
+ if fixed is not None:
+ logger.debug("Forcing temperature=%s for model %r (fixed map)", fixed, model)
+ return fixed
+ bare = normalized.rsplit("/", 1)[-1]
+ if bare in _KIMI_THINKING_MODELS:
+ logger.debug("Forcing temperature=1.0 for kimi thinking model %r", model)
+ return 1.0
+ if bare in _KIMI_INSTANT_MODELS:
+ logger.debug("Forcing temperature=0.6 for kimi instant model %r", model)
+ return 0.6
+ return None
# Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
@@ -1611,7 +1648,6 @@ def resolve_provider_client(
from hermes_cli.models import copilot_default_headers
headers.update(copilot_default_headers())
-
client = OpenAI(api_key=api_key, base_url=base_url,
**({"default_headers": headers} if headers else {}))
diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index 34ec5091b1c..ae8c2c0bd31 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -63,6 +63,52 @@ _CHARS_PER_TOKEN = 4
_SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
+def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str:
+ """Shrink long string values inside a tool-call arguments JSON blob while
+ preserving JSON validity.
+
+ The ``function.arguments`` field on a tool call is a JSON-encoded string
+ passed through to the LLM provider; downstream providers strictly
+ validate it and return a non-retryable 400 when it is not well-formed.
+ An earlier implementation sliced the raw JSON at a fixed byte offset and
+ appended ``...[truncated]`` — which routinely produced strings like::
+
+ {"path": "/foo/bar", "content": "# long markdown
+ ...[truncated]
+
+ i.e. an unterminated string and a missing closing brace. MiniMax, for
+ example, rejects this with ``invalid function arguments json string``
+ and the session gets stuck re-sending the same broken history on every
+ turn. See issue #11762 for the observed loop.
+
+ This helper parses the arguments, shrinks long string leaves inside the
+ parsed structure, and re-serialises. Non-string values (paths, ints,
+ booleans) are preserved intact. If the arguments are not valid JSON
+ to begin with — some model backends use non-JSON tool arguments — the
+ original string is returned unchanged rather than replaced with
+ something neither we nor the backend can parse.
+ """
+ try:
+ parsed = json.loads(args)
+ except (ValueError, TypeError):
+ return args
+
+ def _shrink(obj: Any) -> Any:
+ if isinstance(obj, str):
+ if len(obj) > head_chars:
+ return obj[:head_chars] + "...[truncated]"
+ return obj
+ if isinstance(obj, dict):
+ return {k: _shrink(v) for k, v in obj.items()}
+ if isinstance(obj, list):
+ return [_shrink(v) for v in obj]
+ return obj
+
+ shrunken = _shrink(parsed)
+ # ensure_ascii=False preserves CJK/emoji instead of bloating with \uXXXX
+ return json.dumps(shrunken, ensure_ascii=False)
+
+
def _summarize_tool_result(tool_name: str, tool_args: str, tool_content: str) -> str:
"""Create an informative 1-line summary of a tool call + result.
@@ -449,6 +495,11 @@ class ContextCompressor(ContextEngine):
# Pass 3: Truncate large tool_call arguments in assistant messages
# outside the protected tail. write_file with 50KB content, for
# example, survives pruning entirely without this.
+ #
+ # The shrinking is done inside the parsed JSON structure so the
+ # result remains valid JSON — otherwise downstream providers 400
+ # on every subsequent turn until the broken call falls out of
+ # the window. See ``_truncate_tool_call_args_json`` docstring.
for i in range(prune_boundary):
msg = result[i]
if msg.get("role") != "assistant" or not msg.get("tool_calls"):
@@ -459,8 +510,10 @@ class ContextCompressor(ContextEngine):
if isinstance(tc, dict):
args = tc.get("function", {}).get("arguments", "")
if len(args) > 500:
- tc = {**tc, "function": {**tc["function"], "arguments": args[:200] + "...[truncated]"}}
- modified = True
+ new_args = _truncate_tool_call_args_json(args)
+ if new_args != args:
+ tc = {**tc, "function": {**tc["function"], "arguments": new_args}}
+ modified = True
new_tcs.append(tc)
if modified:
result[i] = {**msg, "tool_calls": new_tcs}
diff --git a/agent/credential_pool.py b/agent/credential_pool.py
index a67eee6c422..b02514e990c 100644
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@@ -22,8 +22,6 @@ from hermes_cli.auth import (
_auth_store_lock,
_codex_access_token_is_expiring,
_decode_jwt_claims,
- _import_codex_cli_tokens,
- _write_codex_cli_tokens,
_load_auth_store,
_load_provider_state,
_resolve_kimi_base_url,
@@ -457,39 +455,6 @@ class CredentialPool:
logger.debug("Failed to sync from credentials file: %s", exc)
return entry
- def _sync_codex_entry_from_cli(self, entry: PooledCredential) -> PooledCredential:
- """Sync an openai-codex pool entry from ~/.codex/auth.json if tokens differ.
-
- OpenAI OAuth refresh tokens are single-use and rotate on every refresh.
- When the Codex CLI (or another Hermes profile) refreshes its token,
- the pool entry's refresh_token becomes stale. This method detects that
- by comparing against ~/.codex/auth.json and syncing the fresh pair.
- """
- if self.provider != "openai-codex":
- return entry
- try:
- cli_tokens = _import_codex_cli_tokens()
- if not cli_tokens:
- return entry
- cli_refresh = cli_tokens.get("refresh_token", "")
- cli_access = cli_tokens.get("access_token", "")
- if cli_refresh and cli_refresh != entry.refresh_token:
- logger.debug("Pool entry %s: syncing tokens from ~/.codex/auth.json (refresh token changed)", entry.id)
- updated = replace(
- entry,
- access_token=cli_access,
- refresh_token=cli_refresh,
- last_status=None,
- last_status_at=None,
- last_error_code=None,
- )
- self._replace_entry(entry, updated)
- self._persist()
- return updated
- except Exception as exc:
- logger.debug("Failed to sync from ~/.codex/auth.json: %s", exc)
- return entry
-
def _sync_device_code_entry_to_auth_store(self, entry: PooledCredential) -> None:
"""Write refreshed pool entry tokens back to auth.json providers.
@@ -585,13 +550,6 @@ class CredentialPool:
except Exception as wexc:
logger.debug("Failed to write refreshed token to credentials file: %s", wexc)
elif self.provider == "openai-codex":
- # Proactively sync from ~/.codex/auth.json before refresh.
- # The Codex CLI (or another Hermes profile) may have already
- # consumed our refresh_token. Syncing first avoids a
- # "refresh_token_reused" error when the CLI has a newer pair.
- synced = self._sync_codex_entry_from_cli(entry)
- if synced is not entry:
- entry = synced
refreshed = auth_mod.refresh_codex_oauth_pure(
entry.access_token,
entry.refresh_token,
@@ -677,45 +635,6 @@ class CredentialPool:
# Credentials file had a valid (non-expired) token — use it directly
logger.debug("Credentials file has valid token, using without refresh")
return synced
- # For openai-codex: the refresh_token may have been consumed by
- # the Codex CLI between our proactive sync and the refresh call.
- # Re-sync and retry once.
- if self.provider == "openai-codex":
- synced = self._sync_codex_entry_from_cli(entry)
- if synced.refresh_token != entry.refresh_token:
- logger.debug("Retrying Codex refresh with synced token from ~/.codex/auth.json")
- try:
- refreshed = auth_mod.refresh_codex_oauth_pure(
- synced.access_token,
- synced.refresh_token,
- )
- updated = replace(
- synced,
- access_token=refreshed["access_token"],
- refresh_token=refreshed["refresh_token"],
- last_refresh=refreshed.get("last_refresh"),
- last_status=STATUS_OK,
- last_status_at=None,
- last_error_code=None,
- )
- self._replace_entry(synced, updated)
- self._persist()
- self._sync_device_code_entry_to_auth_store(updated)
- try:
- _write_codex_cli_tokens(
- updated.access_token,
- updated.refresh_token,
- last_refresh=updated.last_refresh,
- )
- except Exception as wexc:
- logger.debug("Failed to write refreshed Codex tokens to CLI file (retry): %s", wexc)
- return updated
- except Exception as retry_exc:
- logger.debug("Codex retry refresh also failed: %s", retry_exc)
- elif not self._entry_needs_refresh(synced):
- logger.debug("Codex CLI has valid token, using without refresh")
- self._sync_device_code_entry_to_auth_store(synced)
- return synced
self._mark_exhausted(entry, None)
return None
@@ -734,17 +653,6 @@ class CredentialPool:
# _seed_from_singletons() on the next load_pool() sees fresh state
# instead of re-seeding stale/consumed tokens.
self._sync_device_code_entry_to_auth_store(updated)
- # Write refreshed tokens back to ~/.codex/auth.json so Codex CLI
- # and VS Code don't hit "refresh_token_reused" on their next refresh.
- if self.provider == "openai-codex":
- try:
- _write_codex_cli_tokens(
- updated.access_token,
- updated.refresh_token,
- last_refresh=updated.last_refresh,
- )
- except Exception as wexc:
- logger.debug("Failed to write refreshed Codex tokens to CLI file: %s", wexc)
return updated
def _entry_needs_refresh(self, entry: PooledCredential) -> bool:
@@ -790,16 +698,6 @@ class CredentialPool:
if synced is not entry:
entry = synced
cleared_any = True
- # For openai-codex entries, sync from ~/.codex/auth.json before
- # any status/refresh checks. This picks up tokens refreshed by
- # the Codex CLI or another Hermes profile.
- if (self.provider == "openai-codex"
- and entry.last_status == STATUS_EXHAUSTED
- and entry.refresh_token):
- synced = self._sync_codex_entry_from_cli(entry)
- if synced is not entry:
- entry = synced
- cleared_any = True
if entry.last_status == STATUS_EXHAUSTED:
exhausted_until = _exhausted_until(entry)
if exhausted_until is not None and now < exhausted_until:
@@ -1218,8 +1116,8 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
elif provider == "openai-codex":
# Respect user suppression — `hermes auth remove openai-codex` marks
# the device_code source as suppressed so it won't be re-seeded from
- # either the Hermes auth store or ~/.codex/auth.json. Without this
- # gate the removal is instantly undone on the next load_pool() call.
+ # the Hermes auth store. Without this gate the removal is instantly
+ # undone on the next load_pool() call.
codex_suppressed = False
try:
from hermes_cli.auth import is_source_suppressed
@@ -1231,23 +1129,12 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
state = _load_provider_state(auth_store, "openai-codex")
tokens = state.get("tokens") if isinstance(state, dict) else None
- # Fallback: import from Codex CLI (~/.codex/auth.json) if Hermes auth
- # store has no tokens. This mirrors resolve_codex_runtime_credentials()
- # so that load_pool() and list_authenticated_providers() detect tokens
- # that only exist in the Codex CLI shared file.
- if not (isinstance(tokens, dict) and tokens.get("access_token")):
- try:
- from hermes_cli.auth import _import_codex_cli_tokens, _save_codex_tokens
- cli_tokens = _import_codex_cli_tokens()
- if cli_tokens:
- logger.info("Importing Codex CLI tokens into Hermes auth store.")
- _save_codex_tokens(cli_tokens)
- # Re-read state after import
- auth_store = _load_auth_store()
- state = _load_provider_state(auth_store, "openai-codex")
- tokens = state.get("tokens") if isinstance(state, dict) else None
- except Exception as exc:
- logger.debug("Codex CLI token import failed: %s", exc)
+ # Hermes owns its own Codex auth state — we do NOT auto-import from
+ # ~/.codex/auth.json at pool-load time. OAuth refresh tokens are
+ # single-use, so sharing them with Codex CLI / VS Code causes
+ # refresh_token_reused race failures. Users who want to adopt
+ # existing Codex CLI credentials get a one-time, explicit prompt
+ # via `hermes auth openai-codex`.
if isinstance(tokens, dict) and tokens.get("access_token"):
active_sources.add("device_code")
changed |= _upsert_entry(
diff --git a/agent/models_dev.py b/agent/models_dev.py
index 42c8925ffe7..3e5c911e7ee 100644
--- a/agent/models_dev.py
+++ b/agent/models_dev.py
@@ -420,7 +420,10 @@ def list_provider_models(provider: str) -> List[str]:
models = _get_provider_models(provider)
if models is None:
return []
- return list(models.keys())
+ return [
+ mid for mid in models.keys()
+ if not _should_hide_from_provider_catalog(provider, mid)
+ ]
# Patterns that indicate non-agentic or noise models (TTS, embedding,
@@ -432,6 +435,43 @@ _NOISE_PATTERNS: re.Pattern = re.compile(
re.IGNORECASE,
)
+# Google's live Gemini catalogs currently include a mix of stale slugs and
+# Gemma models whose TPM quotas are too small for normal Hermes agent traffic.
+# Keep capability metadata available for direct/manual use, but hide these from
+# the Gemini model catalogs we surface in setup and model selection.
+_GOOGLE_HIDDEN_MODELS = frozenset({
+ # Low-TPM Gemma models that trip Google input-token quota walls under
+ # agent-style traffic despite advertising large context windows.
+ "gemma-4-31b-it",
+ "gemma-4-26b-it",
+ "gemma-4-26b-a4b-it",
+ "gemma-3-1b",
+ "gemma-3-1b-it",
+ "gemma-3-2b",
+ "gemma-3-2b-it",
+ "gemma-3-4b",
+ "gemma-3-4b-it",
+ "gemma-3-12b",
+ "gemma-3-12b-it",
+ "gemma-3-27b",
+ "gemma-3-27b-it",
+ # Stale/retired Google slugs that still surface through models.dev-backed
+ # Gemini selection but 404 on the current Google endpoints.
+ "gemini-1.5-flash",
+ "gemini-1.5-pro",
+ "gemini-1.5-flash-8b",
+ "gemini-2.0-flash",
+ "gemini-2.0-flash-lite",
+})
+
+
+def _should_hide_from_provider_catalog(provider: str, model_id: str) -> bool:
+ provider_lower = (provider or "").strip().lower()
+ model_lower = (model_id or "").strip().lower()
+ if provider_lower in {"gemini", "google"} and model_lower in _GOOGLE_HIDDEN_MODELS:
+ return True
+ return False
+
def list_agentic_models(provider: str) -> List[str]:
"""Return model IDs suitable for agentic use from models.dev.
@@ -448,6 +488,8 @@ def list_agentic_models(provider: str) -> List[str]:
for mid, entry in models.items():
if not isinstance(entry, dict):
continue
+ if _should_hide_from_provider_catalog(provider, mid):
+ continue
if not entry.get("tool_call", False):
continue
if _NOISE_PATTERNS.search(mid):
@@ -582,5 +624,3 @@ def get_model_info(
return _parse_model_info(mid, mdata, mdev_id)
return None
-
-
diff --git a/cli.py b/cli.py
index c0c17babc4c..0e5e9ff6603 100644
--- a/cli.py
+++ b/cli.py
@@ -83,17 +83,51 @@ load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env)
_REASONING_TAGS = (
"REASONING_SCRATCHPAD",
"think",
- "reasoning",
- "THINKING",
"thinking",
+ "reasoning",
+ "thought",
)
def _strip_reasoning_tags(text: str) -> str:
+ """Remove reasoning/thinking blocks from displayed text.
+
+ Handles every case:
+ * Closed pairs ``…`` (case-insensitive, multi-line).
+ * Unterminated open tags that run to end-of-text (e.g. truncated
+ generations on NIM/MiniMax where the close tag is dropped).
+ * Stray orphan close tags (``stuffanswer``) left behind by
+ partial-content dumps.
+
+ Covers the variants emitted by reasoning models today: ````,
+ ````, ````, ````, and
+ ```` (Gemma 4). Must stay in sync with
+ ``run_agent.py::_strip_think_blocks`` and the stream consumer's
+ ``_OPEN_THINK_TAGS`` / ``_CLOSE_THINK_TAGS`` tuples.
+ """
cleaned = text
for tag in _REASONING_TAGS:
- cleaned = re.sub(rf"<{tag}>.*?{tag}>\s*", "", cleaned, flags=re.DOTALL)
- cleaned = re.sub(rf"<{tag}>.*$", "", cleaned, flags=re.DOTALL)
+ # Closed pair — case-insensitive so … is handled too.
+ cleaned = re.sub(
+ rf"<{tag}>.*?{tag}>\s*",
+ "",
+ cleaned,
+ flags=re.DOTALL | re.IGNORECASE,
+ )
+ # Unterminated open tag — strip from the tag to end of text.
+ cleaned = re.sub(
+ rf"<{tag}>.*$",
+ "",
+ cleaned,
+ flags=re.DOTALL | re.IGNORECASE,
+ )
+ # Stray orphan close tag left behind by partial dumps.
+ cleaned = re.sub(
+ rf"{tag}>\s*",
+ "",
+ cleaned,
+ flags=re.IGNORECASE,
+ )
return cleaned.strip()
@@ -1776,7 +1810,7 @@ class HermesCLI:
mcp_names = set((CLI_CONFIG.get("mcp_servers") or {}).keys())
invalid = [t for t in toolsets if not validate_toolset(t) and t not in mcp_names]
if invalid:
- self.console.print(f"[bold red]Warning: Unknown toolsets: {', '.join(invalid)}[/]")
+ self._console_print(f"[bold red]Warning: Unknown toolsets: {', '.join(invalid)}[/]")
# Filesystem checkpoints: CLI flag > config
cp_cfg = CLI_CONFIG.get("checkpoints", {})
@@ -2068,20 +2102,35 @@ class HermesCLI:
def _spinner_widget_height(self, width: Optional[int] = None) -> int:
"""Return the visible height for the spinner/status text line above the status bar."""
- if not getattr(self, "_spinner_text", ""):
+ spinner_line = self._render_spinner_text()
+ if not spinner_line:
return 0
if self._use_minimal_tui_chrome(width=width):
return 0
- # Compute how many lines the spinner text needs when wrapped.
- # The rendered text is " {emoji} {label} ({elapsed})" — about
- # len(_spinner_text) + 16 chars for indent + timer suffix.
width = width or self._get_tui_terminal_width()
if width and width > 10:
import math
- text_len = len(self._spinner_text) + 16 # indent + timer
- return max(1, math.ceil(text_len / width))
+ text_width = self._status_bar_display_width(spinner_line)
+ return max(1, math.ceil(text_width / width))
return 1
+ def _render_spinner_text(self) -> str:
+ """Return the live spinner/status text exactly as rendered in the TUI."""
+ txt = getattr(self, "_spinner_text", "")
+ if not txt:
+ return ""
+ t0 = getattr(self, "_tool_start_time", 0) or 0
+ if t0 > 0:
+ import time as _time
+ elapsed = _time.monotonic() - t0
+ if elapsed >= 60:
+ _m, _s = int(elapsed // 60), int(elapsed % 60)
+ elapsed_str = f"{_m}m {_s}s"
+ else:
+ elapsed_str = f"{elapsed:.1f}s"
+ return f" {txt} ({elapsed_str})"
+ return f" {txt}"
+
def _get_voice_status_fragments(self, width: Optional[int] = None):
"""Return the voice status bar fragments for the interactive TUI."""
width = width or self._get_tui_terminal_width()
@@ -2212,7 +2261,7 @@ class HermesCLI:
normalized_model = normalize_model_for_provider(current_model, resolved_provider)
if normalized_model and normalized_model != current_model:
if not self._model_is_default:
- self.console.print(
+ self._console_print(
f"[yellow]⚠️ Normalized model '{current_model}' to '{normalized_model}' for {resolved_provider}.[/]"
)
self.model = normalized_model
@@ -2228,7 +2277,7 @@ class HermesCLI:
canonical = normalize_copilot_model_id(current_model, api_key=self.api_key)
if canonical and canonical != current_model:
if not self._model_is_default:
- self.console.print(
+ self._console_print(
f"[yellow]⚠️ Normalized Copilot model '{current_model}' to '{canonical}'.[/]"
)
self.model = canonical
@@ -2250,7 +2299,7 @@ class HermesCLI:
canonical = normalize_opencode_model_id(resolved_provider, current_model)
if canonical and canonical != current_model:
if not self._model_is_default:
- self.console.print(
+ self._console_print(
f"[yellow]⚠️ Stripped provider prefix from '{current_model}'; using '{canonical}' for {resolved_provider}.[/]"
)
self.model = canonical
@@ -2272,7 +2321,7 @@ class HermesCLI:
if "/" in current_model:
slug = current_model.split("/", 1)[1]
if not self._model_is_default:
- self.console.print(
+ self._console_print(
f"[yellow]⚠️ Stripped provider prefix from '{current_model}'; "
f"using '{slug}' for OpenAI Codex.[/]"
)
@@ -3021,7 +3070,7 @@ class HermesCLI:
use_compact = self.compact or term_width < 80
if use_compact:
- self.console.print(_build_compact_banner())
+ self._console_print(_build_compact_banner())
self._show_status()
else:
# Get tools for display
@@ -3046,25 +3095,25 @@ class HermesCLI:
# Warn about very low context lengths (common with local servers)
if ctx_len and ctx_len <= 8192:
- self.console.print()
- self.console.print(
+ self._console_print()
+ self._console_print(
f"[yellow]⚠️ Context length is only {ctx_len:,} tokens — "
f"this is likely too low for agent use with tools.[/]"
)
- self.console.print(
+ self._console_print(
"[dim] Hermes needs 16k–32k minimum. Tool schemas + system prompt alone use ~4k–8k.[/]"
)
base_url = getattr(self, "base_url", "") or ""
if "11434" in base_url or "ollama" in base_url.lower():
- self.console.print(
+ self._console_print(
"[dim] Ollama fix: OLLAMA_CONTEXT_LENGTH=32768 ollama serve[/]"
)
elif "1234" in base_url:
- self.console.print(
+ self._console_print(
"[dim] LM Studio fix: Set context length in model settings → reload model[/]"
)
else:
- self.console.print(
+ self._console_print(
"[dim] Fix: Set model.context_length in config.yaml, or increase your server's context setting[/]"
)
@@ -3073,20 +3122,20 @@ class HermesCLI:
model_name = getattr(self, "model", "") or ""
if is_nous_hermes_non_agentic(model_name):
- self.console.print()
- self.console.print(
+ self._console_print()
+ self._console_print(
"[bold yellow]⚠ Nous Research Hermes 3 & 4 models are NOT agentic and are not "
"designed for use with Hermes Agent.[/]"
)
- self.console.print(
+ self._console_print(
"[dim] They lack tool-calling capabilities required for agent workflows. "
"Consider using an agentic model (Claude, GPT, Gemini, DeepSeek, etc.).[/]"
)
- self.console.print(
+ self._console_print(
"[dim] Switch with: /model sonnet or /model gpt5[/]"
)
- self.console.print()
+ self._console_print()
def _preload_resumed_session(self) -> bool:
"""Load a resumed session's history from the DB early (before first chat).
@@ -3104,10 +3153,10 @@ class HermesCLI:
session_meta = self._session_db.get_session(self.session_id)
if not session_meta:
- self.console.print(
+ self._console_print(
f"[bold red]Session not found: {self.session_id}[/]"
)
- self.console.print(
+ self._console_print(
"[dim]Use a session ID from a previous CLI run "
"(hermes sessions list).[/]"
)
@@ -3122,7 +3171,7 @@ class HermesCLI:
if session_meta.get("title"):
title_part = f' "{session_meta["title"]}"'
accent_color = _accent_hex()
- self.console.print(
+ self._console_print(
f"[{accent_color}]↻ Resumed session [bold]{self.session_id}[/bold]"
f"{title_part} "
f"({msg_count} user message{'s' if msg_count != 1 else ''}, "
@@ -3130,7 +3179,7 @@ class HermesCLI:
)
else:
accent_color = _accent_hex()
- self.console.print(
+ self._console_print(
f"[{accent_color}]Session {self.session_id} found but has no "
f"messages. Starting fresh.[/]"
)
@@ -3305,7 +3354,7 @@ class HermesCLI:
padding=(0, 1),
style=_history_text_c,
)
- self.console.print(panel)
+ self._console_print(panel)
def _try_attach_clipboard_image(self) -> bool:
"""Check clipboard for an image and attach it if found.
@@ -3741,14 +3790,14 @@ class HermesCLI:
api_key_missing = [u for u in unavailable if u["missing_vars"]]
if api_key_missing:
- self.console.print()
- self.console.print("[yellow]⚠️ Some tools disabled (missing API keys):[/]")
+ self._console_print()
+ self._console_print("[yellow]⚠️ Some tools disabled (missing API keys):[/]")
for item in api_key_missing:
tools_str = ", ".join(item["tools"][:2]) # Show first 2 tools
if len(item["tools"]) > 2:
tools_str += f", +{len(item['tools'])-2} more"
- self.console.print(f" [dim]• {item['name']}[/] [dim italic]({', '.join(item['missing_vars'])})[/]")
- self.console.print("[dim] Run 'hermes setup' to configure[/]")
+ self._console_print(f" [dim]• {item['name']}[/] [dim italic]({', '.join(item['missing_vars'])})[/]")
+ self._console_print("[dim] Run 'hermes setup' to configure[/]")
except Exception:
pass # Don't crash on import errors
@@ -3786,7 +3835,7 @@ class HermesCLI:
if self._provider_source:
provider_info += f" [dim {separator_color}]·[/] [dim]auth: {self._provider_source}[/]"
- self.console.print(
+ self._console_print(
f" {api_indicator} [{accent_color}]{model_short}[/] "
f"[dim {separator_color}]·[/] [bold {label_color}]{tool_count} tools[/]"
f"{toolsets_info}{provider_info}"
@@ -3843,7 +3892,7 @@ class HermesCLI:
f"Tokens: {total_tokens:,}",
f"Agent Running: {'Yes' if is_running else 'No'}",
])
- self.console.print("\n".join(lines), highlight=False, markup=False)
+ self._console_print("\n".join(lines), highlight=False, markup=False)
def _fast_command_available(self) -> bool:
try:
@@ -5041,8 +5090,15 @@ class HermesCLI:
print(" To change model or provider, use: hermes model")
+ def _output_console(self):
+ """Use prompt_toolkit-safe Rich rendering once the TUI is live."""
+ if getattr(self, "_app", None):
+ return ChatConsole()
+ return self.console
-
+ def _console_print(self, *args, **kwargs):
+ """Print through the active command-safe console."""
+ self._output_console().print(*args, **kwargs)
@staticmethod
def _resolve_personality_prompt(value) -> str:
@@ -5062,14 +5118,14 @@ class HermesCLI:
from agent.google_oauth import get_valid_access_token, GoogleOAuthError, load_credentials
from agent.google_code_assist import retrieve_user_quota, CodeAssistError
except ImportError as exc:
- self.console.print(f" [red]Gemini modules unavailable: {exc}[/]")
+ self._console_print(f" [red]Gemini modules unavailable: {exc}[/]")
return
try:
access_token = get_valid_access_token()
except GoogleOAuthError as exc:
- self.console.print(f" [yellow]{exc}[/]")
- self.console.print(" Run [bold]/model[/] and pick 'Google Gemini (OAuth)' to sign in.")
+ self._console_print(f" [yellow]{exc}[/]")
+ self._console_print(" Run [bold]/model[/] and pick 'Google Gemini (OAuth)' to sign in.")
return
creds = load_credentials()
@@ -5078,18 +5134,18 @@ class HermesCLI:
try:
buckets = retrieve_user_quota(access_token, project_id=project_id)
except CodeAssistError as exc:
- self.console.print(f" [red]Quota lookup failed:[/] {exc}")
+ self._console_print(f" [red]Quota lookup failed:[/] {exc}")
return
if not buckets:
- self.console.print(" [dim]No quota buckets reported (account may be on legacy/unmetered tier).[/]")
+ self._console_print(" [dim]No quota buckets reported (account may be on legacy/unmetered tier).[/]")
return
# Sort for stable display, group by model
buckets.sort(key=lambda b: (b.model_id, b.token_type))
- self.console.print()
- self.console.print(f" [bold]Gemini Code Assist quota[/] (project: {project_id or '(auto / free-tier)'})")
- self.console.print()
+ self._console_print()
+ self._console_print(f" [bold]Gemini Code Assist quota[/] (project: {project_id or '(auto / free-tier)'})")
+ self._console_print()
for b in buckets:
pct = max(0.0, min(1.0, b.remaining_fraction))
width = 20
@@ -5099,8 +5155,8 @@ class HermesCLI:
header = b.model_id
if b.token_type:
header += f" [{b.token_type}]"
- self.console.print(f" {header:40s} {bar} {pct_str}")
- self.console.print()
+ self._console_print(f" {header:40s} {bar} {pct_str}")
+ self._console_print()
def _handle_personality_command(self, cmd: str):
"""Handle the /personality command to set predefined personalities."""
@@ -5231,7 +5287,7 @@ class HermesCLI:
print(" /cron list")
print(' /cron add "every 2h" "Check server status" [--skill blogwatcher]')
print(' /cron edit --schedule "every 4h" --prompt "New task"')
- print(" /cron edit --skill blogwatcher --skill find-nearby")
+ print(" /cron edit --skill blogwatcher --skill maps")
print(" /cron edit --remove-skill blogwatcher")
print(" /cron edit --clear-skills")
print(" /cron pause ")
@@ -5548,7 +5604,7 @@ class HermesCLI:
_tip_color = get_active_skin().get_color("banner_dim", "#B8860B")
except Exception:
_tip_color = "#B8860B"
- self.console.print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]")
+ self._console_print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]")
except Exception:
pass
elif canonical == "history":
@@ -5642,7 +5698,7 @@ class HermesCLI:
elif canonical == "statusbar":
self._status_bar_visible = not self._status_bar_visible
state = "visible" if self._status_bar_visible else "hidden"
- self.console.print(f" Status bar {state}")
+ self._console_print(f" Status bar {state}")
elif canonical == "verbose":
self._toggle_verbose()
elif canonical == "yolo":
@@ -5720,6 +5776,30 @@ class HermesCLI:
_cprint(f" Queued for the next turn: {payload[:80]}{'...' if len(payload) > 80 else ''}")
else:
_cprint(f" Queued: {payload[:80]}{'...' if len(payload) > 80 else ''}")
+ elif canonical == "steer":
+ # Inject a message after the next tool call without interrupting.
+ # If the agent is actively running, push the text into the agent's
+ # pending_steer slot — the drain hook in _execute_tool_calls_*
+ # will append it to the next tool result's content. If no agent
+ # is running, fall back to queue semantics (same as /queue).
+ parts = cmd_original.split(None, 1)
+ payload = parts[1].strip() if len(parts) > 1 else ""
+ if not payload:
+ _cprint(" Usage: /steer ")
+ elif self._agent_running and self.agent is not None and hasattr(self.agent, "steer"):
+ try:
+ accepted = self.agent.steer(payload)
+ except Exception as exc:
+ _cprint(f" Steer failed: {exc}")
+ else:
+ if accepted:
+ _cprint(f" ⏩ Steer queued — arrives after the next tool call: {payload[:80]}{'...' if len(payload) > 80 else ''}")
+ else:
+ _cprint(" Steer rejected (empty payload).")
+ else:
+ # No active run — treat as a normal next-turn message.
+ self._pending_input.put(payload)
+ _cprint(f" No agent running; queued as next turn: {payload[:80]}{'...' if len(payload) > 80 else ''}")
elif canonical == "skin":
self._handle_skin_command(cmd_original)
elif canonical == "voice":
@@ -5741,15 +5821,15 @@ class HermesCLI:
)
output = result.stdout.strip() or result.stderr.strip()
if output:
- self.console.print(_rich_text_from_ansi(output))
+ self._console_print(_rich_text_from_ansi(output))
else:
- self.console.print("[dim]Command returned no output[/]")
+ self._console_print("[dim]Command returned no output[/]")
except subprocess.TimeoutExpired:
- self.console.print("[bold red]Quick command timed out (30s)[/]")
+ self._console_print("[bold red]Quick command timed out (30s)[/]")
except Exception as e:
- self.console.print(f"[bold red]Quick command error: {e}[/]")
+ self._console_print(f"[bold red]Quick command error: {e}[/]")
else:
- self.console.print(f"[bold red]Quick command '{base_cmd}' has no command defined[/]")
+ self._console_print(f"[bold red]Quick command '{base_cmd}' has no command defined[/]")
elif qcmd.get("type") == "alias":
target = qcmd.get("target", "").strip()
if target:
@@ -5758,9 +5838,9 @@ class HermesCLI:
aliased_command = f"{target} {user_args}".strip()
return self.process_command(aliased_command)
else:
- self.console.print(f"[bold red]Quick command '{base_cmd}' has no target defined[/]")
+ self._console_print(f"[bold red]Quick command '{base_cmd}' has no target defined[/]")
else:
- self.console.print(f"[bold red]Quick command '{base_cmd}' has unsupported type (supported: 'exec', 'alias')[/]")
+ self._console_print(f"[bold red]Quick command '{base_cmd}' has unsupported type (supported: 'exec', 'alias')[/]")
# Check for plugin-registered slash commands
elif base_cmd.lstrip("/") in _get_plugin_cmd_handler_names():
from hermes_cli.plugins import get_plugin_command_handler
@@ -7017,8 +7097,7 @@ class HermesCLI:
)
raise RuntimeError(
"Voice mode requires sounddevice and numpy.\n"
- "Install with: pip install sounddevice numpy\n"
- "Or: pip install hermes-agent[voice]"
+ f"Install with: {sys.executable} -m pip install sounddevice numpy"
)
if not reqs.get("stt_available", reqs.get("stt_key_set")):
raise RuntimeError(
@@ -7294,8 +7373,7 @@ class HermesCLI:
_cprint(f" {_DIM}Then install/update the Termux:API Android app for microphone capture{_RST}")
_cprint(f" {_BOLD}Option 2: pkg install python-numpy portaudio && python -m pip install sounddevice{_RST}")
else:
- _cprint(f"\n {_BOLD}Install: pip install {' '.join(reqs['missing_packages'])}{_RST}")
- _cprint(f" {_DIM}Or: pip install hermes-agent[voice]{_RST}")
+ _cprint(f"\n {_BOLD}Install: {sys.executable} -m pip install {' '.join(reqs['missing_packages'])}{_RST}")
return
with self._voice_lock:
@@ -8246,7 +8324,15 @@ class HermesCLI:
else:
print(f"\n⚡ Sending after interrupt: '{preview}'")
self._pending_input.put(combined)
-
+
+ # If a /steer was left over (agent finished before another tool
+ # batch could absorb it), deliver it as the next user turn.
+ _leftover_steer = result.get("pending_steer") if result else None
+ if _leftover_steer and hasattr(self, '_pending_input'):
+ preview = _leftover_steer[:60] + ("..." if len(_leftover_steer) > 60 else "")
+ print(f"\n⏩ Delivering leftover /steer as next turn: '{preview}'")
+ self._pending_input.put(_leftover_steer)
+
return response
except Exception as e:
@@ -8524,7 +8610,7 @@ class HermesCLI:
except Exception:
_welcome_text = "Welcome to Hermes Agent! Type your message or /help for commands."
_welcome_color = "#FFF8DC"
- self.console.print(f"[{_welcome_color}]{_welcome_text}[/]")
+ self._console_print(f"[{_welcome_color}]{_welcome_text}[/]")
# Show a random tip to help users discover features
try:
from hermes_cli.tips import get_random_tip
@@ -8533,16 +8619,16 @@ class HermesCLI:
_tip_color = _welcome_skin.get_color("banner_dim", "#B8860B")
except Exception:
_tip_color = "#B8860B"
- self.console.print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]")
+ self._console_print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]")
except Exception:
pass # Tips are non-critical — never break startup
if self.preloaded_skills and not self._startup_skills_line_shown:
skills_label = ", ".join(self.preloaded_skills)
- self.console.print(
+ self._console_print(
f"[bold {_accent_hex()}]Activated skills:[/] {skills_label}"
)
self._startup_skills_line_shown = True
- self.console.print()
+ self._console_print()
# State for async operation
self._agent_running = False
@@ -9345,21 +9431,10 @@ class HermesCLI:
return cli_ref._agent_spacer_height()
def get_spinner_text():
- txt = cli_ref._spinner_text
- if not txt:
+ spinner_line = cli_ref._render_spinner_text()
+ if not spinner_line:
return []
- # Append live elapsed timer when a tool is running
- t0 = cli_ref._tool_start_time
- if t0 > 0:
- import time as _time
- elapsed = _time.monotonic() - t0
- if elapsed >= 60:
- _m, _s = int(elapsed // 60), int(elapsed % 60)
- elapsed_str = f"{_m}m {_s}s"
- else:
- elapsed_str = f"{elapsed:.1f}s"
- return [('class:hint', f' {txt} ({elapsed_str})')]
- return [('class:hint', f' {txt}')]
+ return [('class:hint', spinner_line)]
def get_spinner_height():
return cli_ref._spinner_widget_height()
@@ -10067,8 +10142,36 @@ class HermesCLI:
# Register signal handlers for graceful shutdown on SSH disconnect / SIGTERM
def _signal_handler(signum, frame):
- """Handle SIGHUP/SIGTERM by triggering graceful cleanup."""
+ """Handle SIGHUP/SIGTERM by triggering graceful cleanup.
+
+ Calls ``self.agent.interrupt()`` first so the agent daemon
+ thread's poll loop sees the per-thread interrupt and kills the
+ tool's subprocess group via ``_kill_process`` (os.killpg).
+ Without this, the main thread dies from KeyboardInterrupt and
+ the daemon thread is killed with it — before it can run one
+ more poll iteration to clean up the subprocess, which was
+ spawned with ``os.setsid`` and therefore survives as an orphan
+ with PPID=1.
+
+ Grace window (``HERMES_SIGTERM_GRACE``, default 1.5 s) gives
+ the daemon time to: detect the interrupt (next 200 ms poll) →
+ call _kill_process (SIGTERM + 1 s wait + SIGKILL if needed) →
+ return from _wait_for_process. ``time.sleep`` releases the
+ GIL so the daemon actually runs during the window.
+ """
logger.debug("Received signal %s, triggering graceful shutdown", signum)
+ try:
+ if getattr(self, "agent", None) and getattr(self, "_agent_running", False):
+ self.agent.interrupt(f"received signal {signum}")
+ import time as _t
+ try:
+ _grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5"))
+ except (TypeError, ValueError):
+ _grace = 1.5
+ if _grace > 0:
+ _t.sleep(_grace)
+ except Exception:
+ pass # never block signal handling
raise KeyboardInterrupt()
try:
@@ -10371,6 +10474,45 @@ def main(
# Register cleanup for single-query mode (interactive mode registers in run())
atexit.register(_run_cleanup)
+
+ # Also install signal handlers in single-query / `-q` mode. Interactive
+ # mode registers its own inside HermesCLI.run(), but `-q` runs
+ # cli.agent.run_conversation() below and AIAgent spawns worker threads
+ # for tools — so when SIGTERM arrives on the main thread, raising
+ # KeyboardInterrupt only unwinds the main thread, not the worker
+ # running _wait_for_process. Python then exits, the child subprocess
+ # (spawned with os.setsid, its own process group) is reparented to
+ # init and keeps running as an orphan.
+ #
+ # Fix: route SIGTERM/SIGHUP through agent.interrupt() which sets the
+ # per-thread interrupt flag the worker's poll loop checks every 200 ms.
+ # Give the worker a grace window to call _kill_process (SIGTERM to the
+ # process group, then SIGKILL after 1 s), then raise KeyboardInterrupt
+ # so main unwinds normally. HERMES_SIGTERM_GRACE overrides the 1.5 s
+ # default for debugging.
+ def _signal_handler_q(signum, frame):
+ logger.debug("Received signal %s in single-query mode", signum)
+ try:
+ _agent = getattr(cli, "agent", None)
+ if _agent is not None:
+ _agent.interrupt(f"received signal {signum}")
+ import time as _t
+ try:
+ _grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5"))
+ except (TypeError, ValueError):
+ _grace = 1.5
+ if _grace > 0:
+ _t.sleep(_grace)
+ except Exception:
+ pass # never block signal handling
+ raise KeyboardInterrupt()
+ try:
+ import signal as _signal
+ _signal.signal(_signal.SIGTERM, _signal_handler_q)
+ if hasattr(_signal, "SIGHUP"):
+ _signal.signal(_signal.SIGHUP, _signal_handler_q)
+ except Exception:
+ pass # signal handler may fail in restricted environments
# Handle single query mode
if query or image:
diff --git a/cron/scheduler.py b/cron/scheduler.py
index db5991c6f02..6e93fc02fee 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -564,15 +564,53 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:
return False, f"Script execution failed: {exc}"
-def _build_job_prompt(job: dict) -> str:
- """Build the effective prompt for a cron job, optionally loading one or more skills first."""
+def _parse_wake_gate(script_output: str) -> bool:
+ """Parse the last non-empty stdout line of a cron job's pre-check script
+ as a wake gate.
+
+ The convention (ported from nanoclaw #1232): if the last stdout line is
+ JSON like ``{"wakeAgent": false}``, the agent is skipped entirely — no
+ LLM run, no delivery. Any other output (non-JSON, missing flag, gate
+ absent, or ``wakeAgent: true``) means wake the agent normally.
+
+ Returns True if the agent should wake, False to skip.
+ """
+ if not script_output:
+ return True
+ stripped_lines = [line for line in script_output.splitlines() if line.strip()]
+ if not stripped_lines:
+ return True
+ last_line = stripped_lines[-1].strip()
+ try:
+ gate = json.loads(last_line)
+ except (json.JSONDecodeError, ValueError):
+ return True
+ if not isinstance(gate, dict):
+ return True
+ return gate.get("wakeAgent", True) is not False
+
+
+def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
+ """Build the effective prompt for a cron job, optionally loading one or more skills first.
+
+ Args:
+ job: The cron job dict.
+ prerun_script: Optional ``(success, stdout)`` from a script that has
+ already been executed by the caller (e.g. for a wake-gate check).
+ When provided, the script is not re-executed and the cached
+ result is used for prompt injection. When omitted, the script
+ (if any) runs inline as before.
+ """
prompt = job.get("prompt", "")
skills = job.get("skills")
# Run data-collection script if configured, inject output as context.
script_path = job.get("script")
if script_path:
- success, script_output = _run_job_script(script_path)
+ if prerun_script is not None:
+ success, script_output = prerun_script
+ else:
+ success, script_output = _run_job_script(script_path)
if success:
if script_output:
prompt = (
@@ -674,13 +712,41 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
job_id = job["id"]
job_name = job["name"]
- prompt = _build_job_prompt(job)
+
+ # Wake-gate: if this job has a pre-check script, run it BEFORE building
+ # the prompt so a ``{"wakeAgent": false}`` response can short-circuit
+ # the whole agent run. We pass the result into _build_job_prompt so
+ # the script is only executed once.
+ prerun_script = None
+ script_path = job.get("script")
+ if script_path:
+ prerun_script = _run_job_script(script_path)
+ _ran_ok, _script_output = prerun_script
+ if _ran_ok and not _parse_wake_gate(_script_output):
+ logger.info(
+ "Job '%s' (ID: %s): wakeAgent=false, skipping agent run",
+ job_name, job_id,
+ )
+ silent_doc = (
+ f"# Cron Job: {job_name}\n\n"
+ f"**Job ID:** {job_id}\n"
+ f"**Run Time:** {_hermes_now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
+ "Script gate returned `wakeAgent=false` — agent skipped.\n"
+ )
+ return True, silent_doc, SILENT_MARKER, None
+
+ prompt = _build_job_prompt(job, prerun_script=prerun_script)
origin = _resolve_origin(job)
_cron_session_id = f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}"
logger.info("Running job '%s' (ID: %s)", job_name, job_id)
logger.info("Prompt: %s", prompt[:100])
+ # Mark this as a cron session so the approval system can apply cron_mode.
+ # This env var is process-wide and persists for the lifetime of the
+ # scheduler process — every job this process runs is a cron job.
+ os.environ["HERMES_CRON_SESSION"] = "1"
+
try:
# Inject origin context so the agent's send_message tool knows the chat.
# Must be INSIDE the try block so the finally cleanup always runs.
diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index af694a5e2d6..2b8536062c2 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -6,6 +6,7 @@ and implement the required methods.
"""
import asyncio
+import inspect
import ipaddress
import logging
import os
@@ -669,6 +670,15 @@ class MessageEvent:
# Original platform data
raw_message: Any = None
message_id: Optional[str] = None
+
+ # Platform-specific update identifier. For Telegram this is the
+ # ``update_id`` from the PTB Update wrapper; other platforms currently
+ # ignore it. Used by ``/restart`` to record the triggering update so the
+ # new gateway can advance the Telegram offset past it and avoid processing
+ # the same ``/restart`` twice if PTB's graceful-shutdown ACK times out
+ # ("Error while calling `get_updates` one more time to mark all fetched
+ # updates" in gateway.log).
+ platform_update_id: Optional[int] = None
# Media attachments
# media_urls: local file paths (for vision tool access)
@@ -871,10 +881,11 @@ class BasePlatformAdapter(ABC):
# working on a task after --replace or manual restarts.
self._background_tasks: set[asyncio.Task] = set()
# One-shot callbacks to fire after the main response is delivered.
- # Keyed by session_key. GatewayRunner uses this to defer
- # background-review notifications ("💾 Skill created") until the
- # primary reply has been sent.
- self._post_delivery_callbacks: Dict[str, Callable] = {}
+ # Keyed by session_key. Values are either a bare callback (legacy) or
+ # a ``(generation, callback)`` tuple so GatewayRunner can make deferred
+ # deliveries generation-aware and avoid stale runs clearing callbacks
+ # registered by a fresher run for the same session.
+ self._post_delivery_callbacks: Dict[str, Any] = {}
self._expected_cancelled_tasks: set[asyncio.Task] = set()
self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
# Chats where auto-TTS on voice input is disabled (set by /voice off)
@@ -1392,7 +1403,13 @@ class BasePlatformAdapter(ABC):
return paths, cleaned
- async def _keep_typing(self, chat_id: str, interval: float = 2.0, metadata=None) -> None:
+ async def _keep_typing(
+ self,
+ chat_id: str,
+ interval: float = 2.0,
+ metadata=None,
+ stop_event: asyncio.Event | None = None,
+ ) -> None:
"""
Continuously send typing indicator until cancelled.
@@ -1406,9 +1423,18 @@ class BasePlatformAdapter(ABC):
"""
try:
while True:
+ if stop_event is not None and stop_event.is_set():
+ return
if chat_id not in self._typing_paused:
await self.send_typing(chat_id, metadata=metadata)
- await asyncio.sleep(interval)
+ if stop_event is None:
+ await asyncio.sleep(interval)
+ continue
+ try:
+ await asyncio.wait_for(stop_event.wait(), timeout=interval)
+ except asyncio.TimeoutError:
+ continue
+ return
except asyncio.CancelledError:
pass # Normal cancellation when handler completes
finally:
@@ -1435,6 +1461,59 @@ class BasePlatformAdapter(ABC):
"""Resume typing indicator for a chat after approval resolves."""
self._typing_paused.discard(chat_id)
+ async def interrupt_session_activity(self, session_key: str, chat_id: str) -> None:
+ """Signal the active session loop to stop and clear typing immediately."""
+ if session_key:
+ interrupt_event = self._active_sessions.get(session_key)
+ if interrupt_event is not None:
+ interrupt_event.set()
+ try:
+ await self.stop_typing(chat_id)
+ except Exception:
+ pass
+
+ def register_post_delivery_callback(
+ self,
+ session_key: str,
+ callback: Callable,
+ *,
+ generation: int | None = None,
+ ) -> None:
+ """Register a deferred callback to fire after the main response.
+
+ ``generation`` lets callers tie the callback to a specific gateway run
+ generation so stale runs cannot clear callbacks owned by a fresher run.
+ """
+ if not session_key or not callable(callback):
+ return
+ if generation is None:
+ self._post_delivery_callbacks[session_key] = callback
+ else:
+ self._post_delivery_callbacks[session_key] = (int(generation), callback)
+
+ def pop_post_delivery_callback(
+ self,
+ session_key: str,
+ *,
+ generation: int | None = None,
+ ) -> Callable | None:
+ """Pop a deferred callback, optionally requiring generation ownership."""
+ if not session_key:
+ return None
+ entry = self._post_delivery_callbacks.get(session_key)
+ if entry is None:
+ return None
+ if isinstance(entry, tuple) and len(entry) == 2:
+ entry_generation, callback = entry
+ if generation is not None and int(entry_generation) != int(generation):
+ return None
+ self._post_delivery_callbacks.pop(session_key, None)
+ return callback if callable(callback) else None
+ if generation is not None:
+ return None
+ self._post_delivery_callbacks.pop(session_key, None)
+ return entry if callable(entry) else None
+
# ── Processing lifecycle hooks ──────────────────────────────────────────
# Subclasses override these to react to message processing events
# (e.g. Discord adds 👀/✅/❌ reactions).
@@ -1705,10 +1784,23 @@ class BasePlatformAdapter(ABC):
# Fall back to a new Event only if the entry was removed externally.
interrupt_event = self._active_sessions.get(session_key) or asyncio.Event()
self._active_sessions[session_key] = interrupt_event
+ callback_generation = getattr(interrupt_event, "_hermes_run_generation", None)
# Start continuous typing indicator (refreshes every 2 seconds)
_thread_metadata = {"thread_id": event.source.thread_id} if event.source.thread_id else None
- typing_task = asyncio.create_task(self._keep_typing(event.source.chat_id, metadata=_thread_metadata))
+ _keep_typing_kwargs = {"metadata": _thread_metadata}
+ try:
+ _keep_typing_sig = inspect.signature(self._keep_typing)
+ except (TypeError, ValueError):
+ _keep_typing_sig = None
+ if _keep_typing_sig is None or "stop_event" in _keep_typing_sig.parameters:
+ _keep_typing_kwargs["stop_event"] = interrupt_event
+ typing_task = asyncio.create_task(
+ self._keep_typing(
+ event.source.chat_id,
+ **_keep_typing_kwargs,
+ )
+ )
try:
await self._run_processing_hook("on_processing_start", event)
@@ -1917,9 +2009,18 @@ class BasePlatformAdapter(ABC):
if session_key in self._pending_messages:
pending_event = self._pending_messages.pop(session_key)
logger.debug("[%s] Processing queued message from interrupt", self.name)
- # Clean up current session before processing pending
- if session_key in self._active_sessions:
- del self._active_sessions[session_key]
+ # Keep the _active_sessions entry live across the turn chain
+ # and only CLEAR the interrupt Event — do NOT delete the entry.
+ # If we deleted here, a concurrent inbound message arriving
+ # during the awaits below would pass the Level-1 guard, spawn
+ # its own _process_message_background, and run simultaneously
+ # with the recursive drain below. Two agents on one
+ # session_key = duplicate responses, duplicate tool calls.
+ # Clearing the Event keeps the guard live so follow-ups take
+ # the busy-handler path (queue + interrupt) as intended.
+ _active = self._active_sessions.get(session_key)
+ if _active is not None:
+ _active.clear()
typing_task.cancel()
try:
await typing_task
@@ -1958,7 +2059,14 @@ class BasePlatformAdapter(ABC):
finally:
# Fire any one-shot post-delivery callback registered for this
# session (e.g. deferred background-review notifications).
- _post_cb = getattr(self, "_post_delivery_callbacks", {}).pop(session_key, None)
+ _callback_generation = callback_generation
+ if hasattr(self, "pop_post_delivery_callback"):
+ _post_cb = self.pop_post_delivery_callback(
+ session_key,
+ generation=_callback_generation,
+ )
+ else:
+ _post_cb = getattr(self, "_post_delivery_callbacks", {}).pop(session_key, None)
if callable(_post_cb):
try:
_post_cb()
@@ -1977,9 +2085,37 @@ class BasePlatformAdapter(ABC):
await self.stop_typing(event.source.chat_id)
except Exception:
pass
- # Clean up session tracking
- if session_key in self._active_sessions:
- del self._active_sessions[session_key]
+ # Late-arrival drain: a message may have arrived during the
+ # cleanup awaits above (typing_task cancel, stop_typing). Such
+ # messages passed the Level-1 guard (entry still live, Event
+ # possibly set) and landed in _pending_messages via the
+ # busy-handler path. Without this block, we would delete the
+ # active-session entry and the queued message would be silently
+ # dropped (user never gets a reply).
+ late_pending = self._pending_messages.pop(session_key, None)
+ if late_pending is not None:
+ logger.debug(
+ "[%s] Late-arrival pending message during cleanup — spawning drain task",
+ self.name,
+ )
+ _active = self._active_sessions.get(session_key)
+ if _active is not None:
+ _active.clear()
+ drain_task = asyncio.create_task(
+ self._process_message_background(late_pending, session_key)
+ )
+ try:
+ self._background_tasks.add(drain_task)
+ drain_task.add_done_callback(self._background_tasks.discard)
+ except TypeError:
+ # Tests stub create_task() with non-hashable sentinels; tolerate.
+ pass
+ # Leave _active_sessions[session_key] populated — the drain
+ # task's own lifecycle will clean it up.
+ else:
+ # Clean up session tracking
+ if session_key in self._active_sessions:
+ del self._active_sessions[session_key]
async def cancel_background_tasks(self) -> None:
"""Cancel any in-flight background message-processing tasks.
@@ -1987,12 +2123,26 @@ class BasePlatformAdapter(ABC):
Used during gateway shutdown/replacement so active sessions from the old
process do not keep running after adapters are being torn down.
"""
- tasks = [task for task in self._background_tasks if not task.done()]
- for task in tasks:
- self._expected_cancelled_tasks.add(task)
- task.cancel()
- if tasks:
+ # Loop until no new tasks appear. Without this, a message
+ # arriving during the `await asyncio.gather` below would spawn
+ # a fresh _process_message_background task (added to
+ # self._background_tasks at line ~1668 via handle_message),
+ # and the _background_tasks.clear() at the end of this method
+ # would drop the reference — the task runs untracked against a
+ # disconnecting adapter, logs send-failures, and may linger
+ # until it completes on its own. Retrying the drain until the
+ # task set stabilizes closes the window.
+ MAX_DRAIN_ROUNDS = 5
+ for _ in range(MAX_DRAIN_ROUNDS):
+ tasks = [task for task in self._background_tasks if not task.done()]
+ if not tasks:
+ break
+ for task in tasks:
+ self._expected_cancelled_tasks.add(task)
+ task.cancel()
await asyncio.gather(*tasks, return_exceptions=True)
+ # Loop: late-arrival tasks spawned during the gather above
+ # will be in self._background_tasks now. Re-check.
self._background_tasks.clear()
self._expected_cancelled_tasks.clear()
self._pending_messages.clear()
diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py
index 5cad956a362..fce7ece4146 100644
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@@ -498,6 +498,7 @@ class DiscordAdapter(BasePlatformAdapter):
self._allowed_role_ids: set = set() # For DISCORD_ALLOWED_ROLES filtering
# Voice channel state (per-guild)
self._voice_clients: Dict[int, Any] = {} # guild_id -> VoiceClient
+ self._voice_locks: Dict[int, asyncio.Lock] = {} # guild_id -> serialize join/leave
# Text batching: merge rapid successive messages (Telegram-style)
self._text_batch_delay_seconds = float(os.getenv("HERMES_DISCORD_TEXT_BATCH_DELAY_SECONDS", "0.6"))
self._text_batch_split_delay_seconds = float(os.getenv("HERMES_DISCORD_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0"))
@@ -636,6 +637,30 @@ class DiscordAdapter(BasePlatformAdapter):
@self._client.event
async def on_message(message: DiscordMessage):
+ # Wait for on_ready to finish resolving username-based
+ # allowlist entries. Without this block, messages
+ # arriving between Discord's READY event and the end
+ # of _resolve_allowed_usernames compare author IDs
+ # (numeric) against a set that may still contain raw
+ # usernames (strings) from DISCORD_ALLOWED_USERS —
+ # legitimate users get silently rejected for the first
+ # few seconds after every reconnect. The wait is a
+ # near-instant no-op in steady state (_ready_event is
+ # already set); only the startup / reconnect window
+ # ever blocks.
+ if not adapter_self._ready_event.is_set():
+ try:
+ await asyncio.wait_for(
+ adapter_self._ready_event.wait(),
+ timeout=30.0,
+ )
+ except asyncio.TimeoutError:
+ logger.warning(
+ "[%s] on_message timed out waiting for _ready_event; "
+ "allowlist check may use pre-resolved entries",
+ adapter_self.name,
+ )
+
# Dedup: Discord RESUME replays events after reconnects (#4777)
if adapter_self._dedup.is_duplicate(str(message.id)):
return
@@ -1231,57 +1256,74 @@ class DiscordAdapter(BasePlatformAdapter):
# Voice channel methods (join / leave / play)
# ------------------------------------------------------------------
+ def _voice_lock_for(self, guild_id: int) -> "asyncio.Lock":
+ """Return the per-guild lock, creating it on first use.
+
+ Voice join/leave/move must be serialized per guild — without
+ this, two concurrent /voice channel invocations both see
+ _voice_clients.get(guild_id) return None, both call
+ channel.connect(), and discord.py raises ClientException
+ ('Already connected') on the loser.
+ """
+ lock = self._voice_locks.get(guild_id)
+ if lock is None:
+ lock = asyncio.Lock()
+ self._voice_locks[guild_id] = lock
+ return lock
+
async def join_voice_channel(self, channel) -> bool:
"""Join a Discord voice channel. Returns True on success."""
if not self._client or not DISCORD_AVAILABLE:
return False
guild_id = channel.guild.id
- # Already connected in this guild?
- existing = self._voice_clients.get(guild_id)
- if existing and existing.is_connected():
- if existing.channel.id == channel.id:
+ async with self._voice_lock_for(guild_id):
+ # Already connected in this guild?
+ existing = self._voice_clients.get(guild_id)
+ if existing and existing.is_connected():
+ if existing.channel.id == channel.id:
+ self._reset_voice_timeout(guild_id)
+ return True
+ await existing.move_to(channel)
self._reset_voice_timeout(guild_id)
return True
- await existing.move_to(channel)
+
+ vc = await channel.connect()
+ self._voice_clients[guild_id] = vc
self._reset_voice_timeout(guild_id)
+
+ # Start voice receiver (Phase 2: listen to users)
+ try:
+ receiver = VoiceReceiver(vc, allowed_user_ids=self._allowed_user_ids)
+ receiver.start()
+ self._voice_receivers[guild_id] = receiver
+ self._voice_listen_tasks[guild_id] = asyncio.ensure_future(
+ self._voice_listen_loop(guild_id)
+ )
+ except Exception as e:
+ logger.warning("Voice receiver failed to start: %s", e)
+
return True
- vc = await channel.connect()
- self._voice_clients[guild_id] = vc
- self._reset_voice_timeout(guild_id)
-
- # Start voice receiver (Phase 2: listen to users)
- try:
- receiver = VoiceReceiver(vc, allowed_user_ids=self._allowed_user_ids)
- receiver.start()
- self._voice_receivers[guild_id] = receiver
- self._voice_listen_tasks[guild_id] = asyncio.ensure_future(
- self._voice_listen_loop(guild_id)
- )
- except Exception as e:
- logger.warning("Voice receiver failed to start: %s", e)
-
- return True
-
async def leave_voice_channel(self, guild_id: int) -> None:
"""Disconnect from the voice channel in a guild."""
- # Stop voice receiver first
- receiver = self._voice_receivers.pop(guild_id, None)
- if receiver:
- receiver.stop()
- listen_task = self._voice_listen_tasks.pop(guild_id, None)
- if listen_task:
- listen_task.cancel()
+ async with self._voice_lock_for(guild_id):
+ # Stop voice receiver first
+ receiver = self._voice_receivers.pop(guild_id, None)
+ if receiver:
+ receiver.stop()
+ listen_task = self._voice_listen_tasks.pop(guild_id, None)
+ if listen_task:
+ listen_task.cancel()
- vc = self._voice_clients.pop(guild_id, None)
- if vc and vc.is_connected():
- await vc.disconnect()
- task = self._voice_timeout_tasks.pop(guild_id, None)
- if task:
- task.cancel()
- self._voice_text_channels.pop(guild_id, None)
- self._voice_sources.pop(guild_id, None)
+ vc = self._voice_clients.pop(guild_id, None)
+ if vc and vc.is_connected():
+ await vc.disconnect()
+ task = self._voice_timeout_tasks.pop(guild_id, None)
+ if task:
+ task.cancel()
+ self._voice_text_channels.pop(guild_id, None)
+ self._voice_sources.pop(guild_id, None)
# Maximum seconds to wait for voice playback before giving up
PLAYBACK_TIMEOUT = 120
@@ -1933,6 +1975,24 @@ class DiscordAdapter(BasePlatformAdapter):
the "thinking..." indicator is replaced with that text; otherwise it
is deleted so the channel isn't cluttered.
"""
+ # Log the invoker so ghost-command reports can be triaged. Discord
+ # native slash invocations are always user-initiated (no bot can fire
+ # them), but mobile autocomplete / keyboard shortcuts / other users
+ # in the same channel are easy to miss in post-mortems.
+ try:
+ _user = interaction.user
+ _chan_id = getattr(interaction.channel, "id", None) or getattr(interaction, "channel_id", None)
+ logger.info(
+ "[Discord] slash '%s' invoked by user=%s id=%s channel=%s guild=%s",
+ command_text,
+ getattr(_user, "name", "?"),
+ getattr(_user, "id", "?"),
+ _chan_id,
+ getattr(interaction, "guild_id", None),
+ )
+ except Exception:
+ pass # logging must never block command dispatch
+
await interaction.response.defer(ephemeral=True)
event = self._build_slash_event(interaction, command_text)
await self.handle_message(event)
@@ -1994,6 +2054,11 @@ class DiscordAdapter(BasePlatformAdapter):
async def slash_stop(interaction: discord.Interaction):
await self._run_simple_slash(interaction, "/stop", "Stop requested~")
+ @tree.command(name="steer", description="Inject a message after the next tool call (no interrupt)")
+ @discord.app_commands.describe(prompt="Text to inject into the agent's next tool result")
+ async def slash_steer(interaction: discord.Interaction, prompt: str):
+ await self._run_simple_slash(interaction, f"/steer {prompt}".strip())
+
@tree.command(name="compress", description="Compress conversation context")
async def slash_compress(interaction: discord.Interaction):
await self._run_simple_slash(interaction, "/compress")
@@ -3242,7 +3307,20 @@ class DiscordAdapter(BasePlatformAdapter):
"[Discord] Flushing text batch %s (%d chars)",
key, len(event.text or ""),
)
- await self.handle_message(event)
+ # Shield the downstream dispatch so that a subsequent chunk
+ # arriving while handle_message is mid-flight cannot cancel
+ # the running agent turn. _enqueue_text_event always cancels
+ # the prior flush task when a new chunk lands; without this
+ # shield, CancelledError would propagate from our task down
+ # into handle_message → the agent's streaming request,
+ # aborting the response the user was waiting on. The new
+ # chunk is handled by the fresh flush task regardless.
+ await asyncio.shield(self.handle_message(event))
+ except asyncio.CancelledError:
+ # Only reached if cancel landed before the pop — the shielded
+ # handle_message is unaffected either way. Let the task exit
+ # cleanly so the finally block cleans up.
+ pass
finally:
if self._pending_text_batch_tasks.get(key) is current_task:
self._pending_text_batch_tasks.pop(key, None)
diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py
index 351337e8275..3b57db46d3c 100644
--- a/gateway/platforms/feishu.py
+++ b/gateway/platforms/feishu.py
@@ -119,6 +119,8 @@ _MARKDOWN_HINT_RE = re.compile(
re.MULTILINE,
)
_MARKDOWN_LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
+_MARKDOWN_FENCE_OPEN_RE = re.compile(r"^```([^\n`]*)\s*$")
+_MARKDOWN_FENCE_CLOSE_RE = re.compile(r"^```\s*$")
_MENTION_RE = re.compile(r"@_user_\d+")
_MULTISPACE_RE = re.compile(r"[ \t]{2,}")
_POST_CONTENT_INVALID_RE = re.compile(r"content format of the post type is incorrect", re.IGNORECASE)
@@ -430,23 +432,66 @@ def _coerce_required_int(value: Any, default: int, min_value: int = 0) -> int:
def _build_markdown_post_payload(content: str) -> str:
+ rows = _build_markdown_post_rows(content)
return json.dumps(
{
"zh_cn": {
- "content": [
- [
- {
- "tag": "md",
- "text": content,
- }
- ]
- ],
+ "content": rows,
}
},
ensure_ascii=False,
)
+def _build_markdown_post_rows(content: str) -> List[List[Dict[str, str]]]:
+ """Build Feishu post rows while isolating fenced code blocks.
+
+ Feishu's `md` renderer can swallow trailing content when a fenced code block
+ appears inside one large markdown element. Split the reply at real fence
+ lines so prose before/after the code block remains visible while code stays
+ in a dedicated row.
+ """
+ if not content:
+ return [[{"tag": "md", "text": ""}]]
+ if "```" not in content:
+ return [[{"tag": "md", "text": content}]]
+
+ rows: List[List[Dict[str, str]]] = []
+ current: List[str] = []
+ in_code_block = False
+
+ def _flush_current() -> None:
+ nonlocal current
+ if not current:
+ return
+ segment = "\n".join(current)
+ if segment.strip():
+ rows.append([{"tag": "md", "text": segment}])
+ current = []
+
+ for raw_line in content.splitlines():
+ stripped_line = raw_line.strip()
+ is_fence = bool(
+ _MARKDOWN_FENCE_CLOSE_RE.match(stripped_line)
+ if in_code_block
+ else _MARKDOWN_FENCE_OPEN_RE.match(stripped_line)
+ )
+
+ if is_fence:
+ if not in_code_block:
+ _flush_current()
+ current.append(raw_line)
+ in_code_block = not in_code_block
+ if not in_code_block:
+ _flush_current()
+ continue
+
+ current.append(raw_line)
+
+ _flush_current()
+ return rows or [[{"tag": "md", "text": content}]]
+
+
def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
resolved = _resolve_post_payload(payload)
if not resolved:
diff --git a/gateway/platforms/signal.py b/gateway/platforms/signal.py
index 617713ad908..4df4193bc0d 100644
--- a/gateway/platforms/signal.py
+++ b/gateway/platforms/signal.py
@@ -160,6 +160,14 @@ class SignalAdapter(BasePlatformAdapter):
self._sse_task: Optional[asyncio.Task] = None
self._health_monitor_task: Optional[asyncio.Task] = None
self._typing_tasks: Dict[str, asyncio.Task] = {}
+ # Per-chat typing-indicator backoff. When signal-cli reports
+ # NETWORK_FAILURE (recipient offline / unroutable), base.py's
+ # _keep_typing refresh loop would otherwise hammer sendTyping every
+ # ~2s indefinitely, producing WARNING-level log spam and pointless
+ # RPC traffic. We track consecutive failures per chat and skip the
+ # RPC during a cooldown window instead.
+ self._typing_failures: Dict[str, int] = {}
+ self._typing_skip_until: Dict[str, float] = {}
self._running = False
self._last_sse_activity = 0.0
self._sse_response: Optional[httpx.Response] = None
@@ -548,8 +556,22 @@ class SignalAdapter(BasePlatformAdapter):
# JSON-RPC Communication
# ------------------------------------------------------------------
- async def _rpc(self, method: str, params: dict, rpc_id: str = None) -> Any:
- """Send a JSON-RPC 2.0 request to signal-cli daemon."""
+ async def _rpc(
+ self,
+ method: str,
+ params: dict,
+ rpc_id: str = None,
+ *,
+ log_failures: bool = True,
+ ) -> Any:
+ """Send a JSON-RPC 2.0 request to signal-cli daemon.
+
+ When ``log_failures=False``, error and exception paths log at DEBUG
+ instead of WARNING — used by the typing-indicator path to silence
+ repeated NETWORK_FAILURE spam for unreachable recipients while
+ still preserving visibility for the first occurrence and for
+ unrelated RPCs.
+ """
if not self.client:
logger.warning("Signal: RPC called but client not connected")
return None
@@ -574,13 +596,19 @@ class SignalAdapter(BasePlatformAdapter):
data = resp.json()
if "error" in data:
- logger.warning("Signal RPC error (%s): %s", method, data["error"])
+ if log_failures:
+ logger.warning("Signal RPC error (%s): %s", method, data["error"])
+ else:
+ logger.debug("Signal RPC error (%s): %s", method, data["error"])
return None
return data.get("result")
except Exception as e:
- logger.warning("Signal RPC %s failed: %s", method, e)
+ if log_failures:
+ logger.warning("Signal RPC %s failed: %s", method, e)
+ else:
+ logger.debug("Signal RPC %s failed: %s", method, e)
return None
# ------------------------------------------------------------------
@@ -627,7 +655,28 @@ class SignalAdapter(BasePlatformAdapter):
self._recent_sent_timestamps.pop()
async def send_typing(self, chat_id: str, metadata=None) -> None:
- """Send a typing indicator."""
+ """Send a typing indicator.
+
+ base.py's ``_keep_typing`` refresh loop calls this every ~2s while
+ the agent is processing. If signal-cli returns NETWORK_FAILURE for
+ this recipient (offline, unroutable, group membership lost, etc.)
+ the unmitigated behaviour is: a WARNING log every 2 seconds for as
+ long as the agent keeps running. Instead we:
+
+ - silence the WARNING after the first consecutive failure (subsequent
+ attempts log at DEBUG) so transport issues are still visible once
+ but don't flood the log,
+ - skip the RPC entirely during an exponential cooldown window once
+ three consecutive failures have happened, so we stop hammering
+ signal-cli with requests it can't deliver.
+
+ A successful sendTyping clears the counters.
+ """
+ now = time.monotonic()
+ skip_until = self._typing_skip_until.get(chat_id, 0.0)
+ if now < skip_until:
+ return
+
params: Dict[str, Any] = {
"account": self.account,
}
@@ -637,7 +686,26 @@ class SignalAdapter(BasePlatformAdapter):
else:
params["recipient"] = [chat_id]
- await self._rpc("sendTyping", params, rpc_id="typing")
+ fails = self._typing_failures.get(chat_id, 0)
+ result = await self._rpc(
+ "sendTyping",
+ params,
+ rpc_id="typing",
+ log_failures=(fails == 0),
+ )
+
+ if result is None:
+ fails += 1
+ self._typing_failures[chat_id] = fails
+ # After 3 consecutive failures, back off exponentially (16s,
+ # 32s, 60s cap) to stop spamming signal-cli for a recipient
+ # that clearly isn't reachable right now.
+ if fails >= 3:
+ backoff = min(60.0, 16.0 * (2 ** (fails - 3)))
+ self._typing_skip_until[chat_id] = now + backoff
+ else:
+ self._typing_failures.pop(chat_id, None)
+ self._typing_skip_until.pop(chat_id, None)
async def send_image(
self,
@@ -789,6 +857,10 @@ class SignalAdapter(BasePlatformAdapter):
await task
except asyncio.CancelledError:
pass
+ # Reset per-chat typing backoff state so the next agent turn starts
+ # fresh rather than inheriting a cooldown from a prior conversation.
+ self._typing_failures.pop(chat_id, None)
+ self._typing_skip_until.pop(chat_id, None)
async def stop_typing(self, chat_id: str) -> None:
"""Public interface for stopping typing — called by base adapter's
diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index 5b1fef1337b..0b74c4e15f4 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -1657,6 +1657,21 @@ class TelegramAdapter(BasePlatformAdapter):
except Exception as exc:
logger.error("Failed to write update response from callback: %s", exc)
+ def _missing_media_path_error(self, label: str, path: str) -> str:
+ """Build an actionable file-not-found error for gateway MEDIA delivery.
+
+ Paths like /workspace/... or /output/... often only exist inside the
+ Docker sandbox, while the gateway process runs on the host.
+ """
+ error = f"{label} file not found: {path}"
+ if path.startswith(("/workspace/", "/output/", "/outputs/")):
+ error += (
+ " (path may only exist inside the Docker sandbox. "
+ "Bind-mount a host directory and emit the host-visible "
+ "path in MEDIA: for gateway file delivery.)"
+ )
+ return error
+
async def send_voice(
self,
chat_id: str,
@@ -1673,7 +1688,7 @@ class TelegramAdapter(BasePlatformAdapter):
try:
import os
if not os.path.exists(audio_path):
- return SendResult(success=False, error=f"Audio file not found: {audio_path}")
+ return SendResult(success=False, error=self._missing_media_path_error("Audio", audio_path))
with open(audio_path, "rb") as audio_file:
# .ogg files -> send as voice (round playable bubble)
@@ -1722,7 +1737,7 @@ class TelegramAdapter(BasePlatformAdapter):
try:
import os
if not os.path.exists(image_path):
- return SendResult(success=False, error=f"Image file not found: {image_path}")
+ return SendResult(success=False, error=self._missing_media_path_error("Image", image_path))
_thread = self._metadata_thread_id(metadata)
with open(image_path, "rb") as image_file:
@@ -1759,7 +1774,7 @@ class TelegramAdapter(BasePlatformAdapter):
try:
if not os.path.exists(file_path):
- return SendResult(success=False, error=f"File not found: {file_path}")
+ return SendResult(success=False, error=self._missing_media_path_error("File", file_path))
display_name = file_name or os.path.basename(file_path)
_thread = self._metadata_thread_id(metadata)
@@ -1793,7 +1808,7 @@ class TelegramAdapter(BasePlatformAdapter):
try:
if not os.path.exists(video_path):
- return SendResult(success=False, error=f"Video file not found: {video_path}")
+ return SendResult(success=False, error=self._missing_media_path_error("Video", video_path))
_thread = self._metadata_thread_id(metadata)
with open(video_path, "rb") as f:
@@ -2326,7 +2341,7 @@ class TelegramAdapter(BasePlatformAdapter):
if not self._should_process_message(update.message):
return
- event = self._build_message_event(update.message, MessageType.TEXT)
+ event = self._build_message_event(update.message, MessageType.TEXT, update_id=update.update_id)
event.text = self._clean_bot_trigger_text(event.text)
self._enqueue_text_event(event)
@@ -2337,7 +2352,7 @@ class TelegramAdapter(BasePlatformAdapter):
if not self._should_process_message(update.message, is_command=True):
return
- event = self._build_message_event(update.message, MessageType.COMMAND)
+ event = self._build_message_event(update.message, MessageType.COMMAND, update_id=update.update_id)
await self.handle_message(event)
async def _handle_location_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
@@ -2373,7 +2388,7 @@ class TelegramAdapter(BasePlatformAdapter):
parts.append(f"Map: https://www.google.com/maps/search/?api=1&query={lat},{lon}")
parts.append("Ask what they'd like to find nearby (restaurants, cafes, etc.) and any preferences.")
- event = self._build_message_event(msg, MessageType.LOCATION)
+ event = self._build_message_event(msg, MessageType.LOCATION, update_id=update.update_id)
event.text = "\n".join(parts)
await self.handle_message(event)
@@ -2524,7 +2539,7 @@ class TelegramAdapter(BasePlatformAdapter):
else:
msg_type = MessageType.DOCUMENT
- event = self._build_message_event(msg, msg_type)
+ event = self._build_message_event(msg, msg_type, update_id=update.update_id)
# Add caption as text
if msg.caption:
@@ -2863,8 +2878,19 @@ class TelegramAdapter(BasePlatformAdapter):
self.name, cache_key, thread_id,
)
- def _build_message_event(self, message: Message, msg_type: MessageType) -> MessageEvent:
- """Build a MessageEvent from a Telegram message."""
+ def _build_message_event(
+ self,
+ message: Message,
+ msg_type: MessageType,
+ update_id: Optional[int] = None,
+ ) -> MessageEvent:
+ """Build a MessageEvent from a Telegram message.
+
+ ``update_id`` is the ``Update.update_id`` from PTB; passing it through
+ lets ``/restart`` record the triggering offset so the new gateway
+ process can advance past it (prevents ``/restart`` being re-delivered
+ when PTB's graceful-shutdown ACK fails).
+ """
chat = message.chat
user = message.from_user
@@ -2915,8 +2941,8 @@ class TelegramAdapter(BasePlatformAdapter):
chat_id=str(chat.id),
chat_name=chat.title or (chat.full_name if hasattr(chat, "full_name") else None),
chat_type=chat_type,
- user_id=str(user.id) if user else None,
- user_name=user.full_name if user else None,
+ user_id=str(user.id) if user else (str(chat.id) if chat_type == "dm" else None),
+ user_name=user.full_name if user else (chat.full_name if hasattr(chat, "full_name") and chat_type == "dm" else None),
thread_id=thread_id_str,
chat_topic=chat_topic,
)
@@ -2943,6 +2969,7 @@ class TelegramAdapter(BasePlatformAdapter):
source=source,
raw_message=message,
message_id=str(message.message_id),
+ platform_update_id=update_id,
reply_to_message_id=reply_to_id,
reply_to_text=reply_to_text,
auto_skill=topic_skill,
diff --git a/gateway/platforms/webhook.py b/gateway/platforms/webhook.py
index c37445b17e8..9995ac38709 100644
--- a/gateway/platforms/webhook.py
+++ b/gateway/platforms/webhook.py
@@ -13,6 +13,10 @@ Each route defines:
- skills: optional list of skills to load for the agent
- deliver: where to send the response (github_comment, telegram, etc.)
- deliver_extra: additional delivery config (repo, pr_number, chat_id)
+ - deliver_only: if true, skip the agent — the rendered prompt IS the
+ message that gets delivered. Use for external push notifications
+ (Supabase, monitoring alerts, inter-agent pings) where zero LLM cost
+ and sub-second delivery matter more than agent reasoning.
Security:
- HMAC secret is required per route (validated at startup)
@@ -122,6 +126,19 @@ class WebhookAdapter(BasePlatformAdapter):
f"For testing without auth, set secret to '{_INSECURE_NO_AUTH}'."
)
+ # deliver_only routes bypass the agent — the POST body becomes a
+ # direct push notification via the configured delivery target.
+ # Validate up-front so misconfiguration surfaces at startup rather
+ # than on the first webhook POST.
+ if route.get("deliver_only"):
+ deliver = route.get("deliver", "log")
+ if not deliver or deliver == "log":
+ raise ValueError(
+ f"[webhook] Route '{name}' has deliver_only=true but "
+ f"deliver is '{deliver}'. Direct delivery requires a "
+ f"real target (telegram, discord, slack, github_comment, etc.)."
+ )
+
app = web.Application()
app.router.add_get("/health", self._handle_health)
app.router.add_post("/webhooks/{route_name}", self._handle_webhook)
@@ -419,6 +436,64 @@ class WebhookAdapter(BasePlatformAdapter):
)
self._seen_deliveries[delivery_id] = now
+ # ── Direct delivery mode (deliver_only) ─────────────────
+ # Skip the agent entirely — the rendered prompt IS the message we
+ # deliver. Use case: external services (Supabase, monitoring,
+ # cron jobs, other agents) that need to push a plain notification
+ # to a user's chat with zero LLM cost. Reuses the same HMAC auth,
+ # rate limiting, idempotency, and template rendering as agent mode.
+ if route_config.get("deliver_only"):
+ delivery = {
+ "deliver": route_config.get("deliver", "log"),
+ "deliver_extra": self._render_delivery_extra(
+ route_config.get("deliver_extra", {}), payload
+ ),
+ "payload": payload,
+ }
+ logger.info(
+ "[webhook] direct-deliver event=%s route=%s target=%s msg_len=%d delivery=%s",
+ event_type,
+ route_name,
+ delivery["deliver"],
+ len(prompt),
+ delivery_id,
+ )
+ try:
+ result = await self._direct_deliver(prompt, delivery)
+ except Exception:
+ logger.exception(
+ "[webhook] direct-deliver failed route=%s delivery=%s",
+ route_name,
+ delivery_id,
+ )
+ return web.json_response(
+ {"status": "error", "error": "Delivery failed", "delivery_id": delivery_id},
+ status=502,
+ )
+
+ if result.success:
+ return web.json_response(
+ {
+ "status": "delivered",
+ "route": route_name,
+ "target": delivery["deliver"],
+ "delivery_id": delivery_id,
+ },
+ status=200,
+ )
+ # Delivery attempted but target rejected it — surface as 502
+ # with a generic error (don't leak adapter-level detail).
+ logger.warning(
+ "[webhook] direct-deliver target rejected route=%s target=%s error=%s",
+ route_name,
+ delivery["deliver"],
+ result.error,
+ )
+ return web.json_response(
+ {"status": "error", "error": "Delivery failed", "delivery_id": delivery_id},
+ status=502,
+ )
+
# Use delivery_id in session key so concurrent webhooks on the
# same route get independent agent runs (not queued/interrupted).
session_chat_id = f"webhook:{route_name}:{delivery_id}"
@@ -572,6 +647,34 @@ class WebhookAdapter(BasePlatformAdapter):
# Response delivery
# ------------------------------------------------------------------
+ async def _direct_deliver(
+ self, content: str, delivery: dict
+ ) -> SendResult:
+ """Deliver *content* directly without invoking the agent.
+
+ Used by ``deliver_only`` routes: the rendered template becomes the
+ literal message body, and we dispatch to the same delivery helpers
+ that the agent-mode ``send()`` flow uses. All target types that
+ work in agent mode work here — Telegram, Discord, Slack, GitHub
+ PR comments, etc.
+ """
+ deliver_type = delivery.get("deliver", "log")
+
+ if deliver_type == "log":
+ # Shouldn't reach here — startup validation rejects deliver_only
+ # with deliver=log — but guard defensively.
+ logger.info("[webhook] direct-deliver log-only: %s", content[:200])
+ return SendResult(success=True)
+
+ if deliver_type == "github_comment":
+ return await self._deliver_github_comment(content, delivery)
+
+ # Fall through to the cross-platform dispatcher, which validates the
+ # target name and routes via the gateway runner.
+ return await self._deliver_cross_platform(
+ deliver_type, content, delivery
+ )
+
async def _deliver_github_comment(
self, content: str, delivery: dict
) -> SendResult:
diff --git a/gateway/run.py b/gateway/run.py
index caa55e72caa..feb55eb1d62 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -96,6 +96,10 @@ from hermes_cli.env_loader import load_hermes_dotenv
_env_path = _hermes_home / '.env'
load_hermes_dotenv(hermes_home=_hermes_home, project_env=Path(__file__).resolve().parents[1] / '.env')
+
+_DOCKER_VOLUME_SPEC_RE = re.compile(r"^(?P.+):(?P/[^:]+?)(?::(?P[^:]+))?$")
+_DOCKER_MEDIA_OUTPUT_CONTAINER_PATHS = {"/output", "/outputs"}
+
# Bridge config.yaml values into the environment so os.getenv() picks them up.
# config.yaml is authoritative for terminal settings — overrides .env.
_config_path = _hermes_home / 'config.yaml'
@@ -398,6 +402,33 @@ def _dequeue_pending_event(adapter, session_key: str) -> MessageEvent | None:
return adapter.get_pending_message(session_key)
+_INTERRUPT_REASON_STOP = "Stop requested"
+_INTERRUPT_REASON_RESET = "Session reset requested"
+_INTERRUPT_REASON_TIMEOUT = "Execution timed out (inactivity)"
+_INTERRUPT_REASON_SSE_DISCONNECT = "SSE client disconnected"
+_INTERRUPT_REASON_GATEWAY_SHUTDOWN = "Gateway shutting down"
+_INTERRUPT_REASON_GATEWAY_RESTART = "Gateway restarting"
+
+_CONTROL_INTERRUPT_MESSAGES = frozenset(
+ {
+ _INTERRUPT_REASON_STOP.lower(),
+ _INTERRUPT_REASON_RESET.lower(),
+ _INTERRUPT_REASON_TIMEOUT.lower(),
+ _INTERRUPT_REASON_SSE_DISCONNECT.lower(),
+ _INTERRUPT_REASON_GATEWAY_SHUTDOWN.lower(),
+ _INTERRUPT_REASON_GATEWAY_RESTART.lower(),
+ }
+)
+
+
+def _is_control_interrupt_message(message: Optional[str]) -> bool:
+ """Return True when an interrupt message is internal control flow."""
+ if not message:
+ return False
+ normalized = " ".join(str(message).strip().split()).lower()
+ return normalized in _CONTROL_INTERRUPT_MESSAGES
+
+
def _check_unavailable_skill(command_name: str) -> str | None:
"""Check if a command matches a known-but-inactive skill.
@@ -585,6 +616,7 @@ class GatewayRunner:
def __init__(self, config: Optional[GatewayConfig] = None):
self.config = config or load_gateway_config()
self.adapters: Dict[Platform, BasePlatformAdapter] = {}
+ self._warn_if_docker_media_delivery_is_risky()
# Load ephemeral config from config.yaml / env vars.
# Both are injected at API-call time only and never persisted.
@@ -625,6 +657,7 @@ class GatewayRunner:
self._running_agents_ts: Dict[str, float] = {} # start timestamp per session
self._pending_messages: Dict[str, str] = {} # Queued messages during interrupt
self._busy_ack_ts: Dict[str, float] = {} # last busy-ack timestamp per session (debounce)
+ self._session_run_generation: Dict[str, int] = {}
# Cache AIAgent instances per session to preserve prompt caching.
# Without this, a new AIAgent is created per message, rebuilding the
@@ -691,6 +724,53 @@ class GatewayRunner:
self._background_tasks: set = set()
+ def _warn_if_docker_media_delivery_is_risky(self) -> None:
+ """Warn when Docker-backed gateways lack an explicit export mount.
+
+ MEDIA delivery happens in the gateway process, so paths emitted by the model
+ must be readable from the host. A plain container-local path like
+ `/workspace/report.txt` or `/output/report.txt` often exists only inside
+ Docker, so users commonly need a dedicated export mount such as
+ `host-dir:/output`.
+ """
+ if os.getenv("TERMINAL_ENV", "").strip().lower() != "docker":
+ return
+
+ connected = self.config.get_connected_platforms()
+ messaging_platforms = [p for p in connected if p not in {Platform.LOCAL, Platform.API_SERVER, Platform.WEBHOOK}]
+ if not messaging_platforms:
+ return
+
+ raw_volumes = os.getenv("TERMINAL_DOCKER_VOLUMES", "").strip()
+ volumes: List[str] = []
+ if raw_volumes:
+ try:
+ parsed = json.loads(raw_volumes)
+ if isinstance(parsed, list):
+ volumes = [str(v) for v in parsed if isinstance(v, str)]
+ except Exception:
+ logger.debug("Could not parse TERMINAL_DOCKER_VOLUMES for gateway media warning", exc_info=True)
+
+ has_explicit_output_mount = False
+ for spec in volumes:
+ match = _DOCKER_VOLUME_SPEC_RE.match(spec)
+ if not match:
+ continue
+ container_path = match.group("container")
+ if container_path in _DOCKER_MEDIA_OUTPUT_CONTAINER_PATHS:
+ has_explicit_output_mount = True
+ break
+
+ if has_explicit_output_mount:
+ return
+
+ logger.warning(
+ "Docker backend is enabled for the messaging gateway but no explicit host-visible "
+ "output mount (for example '/home/user/.hermes/cache/documents:/output') is configured. "
+ "This is fine if the model already emits host-visible paths, but MEDIA file delivery can fail "
+ "for container-local paths like '/workspace/...' or '/output/...'."
+ )
+
# -- Setup skill availability ----------------------------------------
@@ -752,6 +832,26 @@ class GatewayRunner:
chat_id for chat_id, mode in self._voice_mode.items() if mode == "off"
)
+ async def _safe_adapter_disconnect(self, adapter, platform) -> None:
+ """Call adapter.disconnect() defensively, swallowing any error.
+
+ Used when adapter.connect() failed or raised — the adapter may
+ have allocated partial resources (aiohttp.ClientSession, poll
+ tasks, child subprocesses) that would otherwise leak and surface
+ as "Unclosed client session" warnings at process exit.
+
+ Must tolerate partial-init state and never raise, since callers
+ use it inside error-handling blocks.
+ """
+ try:
+ await adapter.disconnect()
+ except Exception as e:
+ logger.debug(
+ "Defensive %s disconnect after failed connect raised: %s",
+ platform.value if platform is not None else "adapter",
+ e,
+ )
+
# -----------------------------------------------------------------
def _flush_memories_for_session(
@@ -1547,7 +1647,7 @@ class GatewayRunner:
action = "restarting" if self._restart_requested else "shutting down"
hint = (
"Your current task will be interrupted. "
- "Send any message after restart to resume where it left off."
+ "Send any message after restart and I'll try to resume where you left off."
if self._restart_requested
else "Your current task will be interrupted."
)
@@ -1921,6 +2021,15 @@ class GatewayRunner:
logger.info("✓ %s connected", platform.value)
else:
logger.warning("✗ %s failed to connect", platform.value)
+ # Defensive cleanup: a failed connect() may have
+ # allocated resources (aiohttp.ClientSession, poll
+ # tasks, bridge subprocesses) before giving up.
+ # Without this call, those resources are orphaned
+ # and Python logs "Unclosed client session" at
+ # process exit. Adapter disconnect() implementations
+ # are expected to be idempotent and tolerate
+ # partial-init state.
+ await self._safe_adapter_disconnect(adapter, platform)
if adapter.has_fatal_error:
self._update_platform_runtime_status(
platform.value,
@@ -1961,6 +2070,10 @@ class GatewayRunner:
}
except Exception as e:
logger.error("✗ %s error: %s", platform.value, e)
+ # Same defensive cleanup path for exceptions — an adapter
+ # that raised mid-connect may still have a live
+ # aiohttp.ClientSession or child subprocess.
+ await self._safe_adapter_disconnect(adapter, platform)
self._update_platform_runtime_status(
platform.value,
platform_state="retrying",
@@ -2381,8 +2494,42 @@ class GatewayRunner:
timeout,
self._running_agent_count(),
)
+ # Mark forcibly-interrupted sessions as resume_pending BEFORE
+ # interrupting the agents. This preserves each session's
+ # session_id + transcript so the next message on the same
+ # session_key auto-resumes from the existing conversation
+ # instead of getting routed through suspend_recently_active()
+ # and converted into a fresh session. Terminal escalation
+ # for genuinely stuck sessions still flows through the
+ # existing ``.restart_failure_counts`` stuck-loop counter
+ # (incremented below, threshold 3), which sets
+ # ``suspended=True`` and overrides resume_pending.
+ #
+ # Iterate self._running_agents (current) rather than the
+ # drain-start ``active_agents`` snapshot — the snapshot
+ # may include sessions that finished gracefully during
+ # the drain window, and marking those falsely would give
+ # them a stray restart-interruption system note on their
+ # next turn even though their previous turn completed
+ # cleanly. Skip pending sentinels for the same reason
+ # _interrupt_running_agents() does: their agent hasn't
+ # started yet, there's nothing to interrupt, and the
+ # session shouldn't carry a misleading resume flag.
+ _resume_reason = (
+ "restart_timeout" if self._restart_requested else "shutdown_timeout"
+ )
+ for _sk, _agent in list(self._running_agents.items()):
+ if _agent is _AGENT_PENDING_SENTINEL:
+ continue
+ try:
+ self.session_store.mark_resume_pending(_sk, _resume_reason)
+ except Exception as _e:
+ logger.debug(
+ "mark_resume_pending failed for %s: %s",
+ _sk[:20], _e,
+ )
self._interrupt_running_agents(
- "Gateway restarting" if self._restart_requested else "Gateway shutting down"
+ _INTERRUPT_REASON_GATEWAY_RESTART if self._restart_requested else _INTERRUPT_REASON_GATEWAY_SHUTDOWN
)
interrupt_deadline = asyncio.get_running_loop().time() + 5.0
while self._running_agents and asyncio.get_running_loop().time() < interrupt_deadline:
@@ -2953,6 +3100,10 @@ class GatewayRunner:
_quick_key[:30], _stale_age, _stale_idle,
_raw_stale_timeout, _stale_detail,
)
+ self._invalidate_session_run_generation(
+ _quick_key,
+ reason="stale_running_agent_eviction",
+ )
self._release_running_agent_state(_quick_key)
if _quick_key in self._running_agents:
@@ -2961,8 +3112,8 @@ class GatewayRunner:
# Resolve the command once for all early-intercept checks below.
from hermes_cli.commands import (
+ ACTIVE_SESSION_BYPASS_COMMANDS as _DEDICATED_HANDLERS,
resolve_command as _resolve_cmd_inner,
- should_bypass_active_session as _should_bypass_active_inner,
)
_evt_cmd = event.get_command()
_cmd_def_inner = _resolve_cmd_inner(_evt_cmd) if _evt_cmd else None
@@ -2976,15 +3127,12 @@ class GatewayRunner:
# _interrupt_requested. Force-clean _running_agents so the session
# is unlocked and subsequent messages are processed normally.
if _cmd_def_inner and _cmd_def_inner.name == "stop":
- running_agent = self._running_agents.get(_quick_key)
- if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
- running_agent.interrupt("Stop requested")
- # Force-clean: remove the session lock regardless of agent state
- adapter = self.adapters.get(source.platform)
- if adapter and hasattr(adapter, 'get_pending_message'):
- adapter.get_pending_message(_quick_key) # consume and discard
- self._pending_messages.pop(_quick_key, None)
- self._release_running_agent_state(_quick_key)
+ await self._interrupt_and_clear_session(
+ _quick_key,
+ source,
+ interrupt_reason=_INTERRUPT_REASON_STOP,
+ invalidation_reason="stop_command",
+ )
logger.info("STOP for session %s — agent interrupted, session lock released", _quick_key[:20])
return "⚡ Stopped. You can continue this session."
@@ -2996,17 +3144,15 @@ class GatewayRunner:
# doesn't get re-processed as a user message after the
# interrupt completes.
if _cmd_def_inner and _cmd_def_inner.name == "new":
- running_agent = self._running_agents.get(_quick_key)
- if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
- running_agent.interrupt("Session reset requested")
# Clear any pending messages so the old text doesn't replay
- adapter = self.adapters.get(source.platform)
- if adapter and hasattr(adapter, 'get_pending_message'):
- adapter.get_pending_message(_quick_key) # consume and discard
- self._pending_messages.pop(_quick_key, None)
+ await self._interrupt_and_clear_session(
+ _quick_key,
+ source,
+ interrupt_reason=_INTERRUPT_REASON_RESET,
+ invalidation_reason="new_command",
+ )
# Clean up the running agent entry so the reset handler
# doesn't think an agent is still active.
- self._release_running_agent_state(_quick_key)
return await self._handle_reset_command(event)
# /queue — queue without interrupting
@@ -3027,6 +3173,54 @@ class GatewayRunner:
adapter._pending_messages[_quick_key] = queued_event
return "Queued for the next turn."
+ # /steer — inject mid-run after the next tool call.
+ # Unlike /queue (turn boundary), /steer lands BETWEEN tool-call
+ # iterations inside the same agent run, by appending to the
+ # last tool result's content. No interrupt, no new user turn,
+ # no role-alternation violation.
+ if _cmd_def_inner and _cmd_def_inner.name == "steer":
+ steer_text = event.get_command_args().strip()
+ if not steer_text:
+ return "Usage: /steer "
+ running_agent = self._running_agents.get(_quick_key)
+ if running_agent is _AGENT_PENDING_SENTINEL:
+ # Agent hasn't started yet — queue as turn-boundary fallback.
+ adapter = self.adapters.get(source.platform)
+ if adapter:
+ from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
+ queued_event = _ME(
+ text=steer_text,
+ message_type=_MT.TEXT,
+ source=event.source,
+ message_id=event.message_id,
+ channel_prompt=event.channel_prompt,
+ )
+ adapter._pending_messages[_quick_key] = queued_event
+ return "Agent still starting — /steer queued for the next turn."
+ if running_agent and hasattr(running_agent, "steer"):
+ try:
+ accepted = running_agent.steer(steer_text)
+ except Exception as exc:
+ logger.warning("Steer failed for session %s: %s", _quick_key[:20], exc)
+ return f"⚠️ Steer failed: {exc}"
+ if accepted:
+ preview = steer_text[:60] + ("..." if len(steer_text) > 60 else "")
+ return f"⏩ Steer queued — arrives after the next tool call: '{preview}'"
+ return "Steer rejected (empty payload)."
+ # Running agent is missing or lacks steer() — fall back to queue.
+ adapter = self.adapters.get(source.platform)
+ if adapter:
+ from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
+ queued_event = _ME(
+ text=steer_text,
+ message_type=_MT.TEXT,
+ source=event.source,
+ message_id=event.message_id,
+ channel_prompt=event.channel_prompt,
+ )
+ adapter._pending_messages[_quick_key] = queued_event
+ return "No active agent — /steer queued for the next turn."
+
# /model must not be used while the agent is running.
if _cmd_def_inner and _cmd_def_inner.name == "model":
return "Agent is running — wait or /stop first, then switch models."
@@ -3049,11 +3243,9 @@ class GatewayRunner:
if _cmd_def_inner and _cmd_def_inner.name == "background":
return await self._handle_background_command(event)
- # Gateway-handled info/control commands must never fall through to
- # the interrupt path. If they are queued as pending text, the
- # slash-command safety net discards them before the user sees any
- # response.
- if _cmd_def_inner and _should_bypass_active_inner(_cmd_def_inner.name):
+ # Gateway-handled info/control commands with dedicated
+ # running-agent handlers.
+ if _cmd_def_inner and _cmd_def_inner.name in _DEDICATED_HANDLERS:
if _cmd_def_inner.name == "help":
return await self._handle_help_command(event)
if _cmd_def_inner.name == "commands":
@@ -3063,6 +3255,21 @@ class GatewayRunner:
if _cmd_def_inner.name == "update":
return await self._handle_update_command(event)
+ # Catch-all: any other recognized slash command reached the
+ # running-agent guard. Reject gracefully rather than falling
+ # through to interrupt + discard. Without this, commands
+ # like /model, /reasoning, /voice, /insights, /title,
+ # /resume, /retry, /undo, /compress, /usage, /provider,
+ # /reload-mcp, /sethome, /reset (all registered as Discord
+ # slash commands) would interrupt the agent AND get
+ # silently discarded by the slash-command safety net,
+ # producing a zero-char response. See #5057, #6252, #10370.
+ if _cmd_def_inner:
+ return (
+ f"⏳ Agent is running — `/{_cmd_def_inner.name}` can't run "
+ f"mid-turn. Wait for the current response or `/stop` first."
+ )
+
if event.message_type == MessageType.PHOTO:
logger.debug("PRIORITY photo follow-up for session %s — queueing without interrupt", _quick_key[:20])
adapter = self.adapters.get(source.platform)
@@ -3268,6 +3475,21 @@ class GatewayRunner:
if canonical == "btw":
return await self._handle_btw_command(event)
+ if canonical == "steer":
+ # No active agent — /steer has no tool call to inject into.
+ # Strip the prefix so downstream treats it as a normal user
+ # message. If the payload is empty, surface the usage hint.
+ steer_payload = event.get_command_args().strip()
+ if not steer_payload:
+ return "Usage: /steer (no agent is running; sending as a normal message)"
+ try:
+ event.text = steer_payload
+ except Exception:
+ pass
+ # Do NOT return — fall through to _handle_message_with_agent
+ # at the end of this function so the rewritten text is sent
+ # to the agent as a regular user turn.
+
if canonical == "voice":
return await self._handle_voice_command(event)
@@ -3411,9 +3633,10 @@ class GatewayRunner:
# same session — corrupting the transcript.
self._running_agents[_quick_key] = _AGENT_PENDING_SENTINEL
self._running_agents_ts[_quick_key] = time.time()
+ _run_generation = self._begin_session_run_generation(_quick_key)
try:
- return await self._handle_message_with_agent(event, source, _quick_key)
+ return await self._handle_message_with_agent(event, source, _quick_key, _run_generation)
finally:
# If _run_agent replaced the sentinel with a real agent and
# then cleaned it up, this is a no-op. If we exited early
@@ -3584,7 +3807,7 @@ class GatewayRunner:
return message_text
- async def _handle_message_with_agent(self, event, source, _quick_key: str):
+ async def _handle_message_with_agent(self, event, source, _quick_key: str, run_generation: int):
"""Inner handler that runs under the _running_agents sentinel guard."""
_msg_start_time = time.time()
_platform_name = source.platform.value if hasattr(source.platform, "value") else str(source.platform)
@@ -4041,6 +4264,15 @@ class GatewayRunner:
if message_text is None:
return
+ # Bind this gateway run generation to the adapter's active-session
+ # event so deferred post-delivery callbacks can be released by the
+ # same run that registered them.
+ self._bind_adapter_run_generation(
+ self.adapters.get(source.platform),
+ session_key,
+ run_generation,
+ )
+
try:
# Emit agent:start hook
hook_ctx = {
@@ -4059,6 +4291,7 @@ class GatewayRunner:
source=source,
session_id=session_entry.session_id,
session_key=session_key,
+ run_generation=run_generation,
event_message_id=event.message_id,
channel_prompt=event.channel_prompt,
)
@@ -4071,6 +4304,22 @@ class GatewayRunner:
except Exception:
pass
+ if not self._is_session_run_current(_quick_key, run_generation):
+ logger.info(
+ "Discarding stale agent result for %s — generation %d is no longer current",
+ _quick_key[:20] if _quick_key else "?",
+ run_generation,
+ )
+ _stale_adapter = self.adapters.get(source.platform)
+ if getattr(type(_stale_adapter), "pop_post_delivery_callback", None) is not None:
+ _stale_adapter.pop_post_delivery_callback(
+ _quick_key,
+ generation=run_generation,
+ )
+ elif _stale_adapter and hasattr(_stale_adapter, "_post_delivery_callbacks"):
+ _stale_adapter._post_delivery_callbacks.pop(_quick_key, None)
+ return None
+
response = agent_result.get("final_response") or ""
# Convert the agent's internal "(empty)" sentinel into a
@@ -4097,8 +4346,20 @@ class GatewayRunner:
# Successful turn — clear any stuck-loop counter for this session.
# This ensures the counter only accumulates across CONSECUTIVE
# restarts where the session was active (never completed).
+ #
+ # Also clear the resume_pending flag (set by drain-timeout
+ # shutdown) — the turn ran to completion, so recovery
+ # succeeded and subsequent messages should no longer receive
+ # the restart-interruption system note.
if session_key:
self._clear_restart_failure_count(session_key)
+ try:
+ self.session_store.clear_resume_pending(session_key)
+ except Exception as _e:
+ logger.debug(
+ "clear_resume_pending failed for %s: %s",
+ session_key[:20], _e,
+ )
# Surface error details when the agent failed silently (final_response=None)
if not response and agent_result.get("failed"):
@@ -4473,6 +4734,7 @@ class GatewayRunner:
# Get existing session key
session_key = self._session_key_for_source(source)
+ self._invalidate_session_run_generation(session_key, reason="session_reset")
# Flush memories in the background (fire-and-forget) so the user
# gets the "Session reset!" response immediately.
@@ -4732,20 +4994,49 @@ class GatewayRunner:
agent = self._running_agents.get(session_key)
if agent is _AGENT_PENDING_SENTINEL:
# Force-clean the sentinel so the session is unlocked.
- self._release_running_agent_state(session_key)
+ await self._interrupt_and_clear_session(
+ session_key,
+ source,
+ interrupt_reason=_INTERRUPT_REASON_STOP,
+ invalidation_reason="stop_command_pending",
+ )
logger.info("STOP (pending) for session %s — sentinel cleared", session_key[:20])
return "⚡ Stopped. The agent hadn't started yet — you can continue this session."
if agent:
- agent.interrupt("Stop requested")
# Force-clean the session lock so a truly hung agent doesn't
# keep it locked forever.
- self._release_running_agent_state(session_key)
+ await self._interrupt_and_clear_session(
+ session_key,
+ source,
+ interrupt_reason=_INTERRUPT_REASON_STOP,
+ invalidation_reason="stop_command_handler",
+ )
return "⚡ Stopped. You can continue this session."
else:
return "No active task to stop."
async def _handle_restart_command(self, event: MessageEvent) -> str:
"""Handle /restart command - drain active work, then restart the gateway."""
+ # Defensive idempotency check: if the previous gateway process
+ # recorded this same /restart (same platform + update_id) and the new
+ # process is seeing it *again*, this is a re-delivery caused by PTB's
+ # graceful-shutdown `get_updates` ACK failing on the way out ("Error
+ # while calling `get_updates` one more time to mark all fetched
+ # updates. Suppressing error to ensure graceful shutdown. When
+ # polling for updates is restarted, updates may be received twice."
+ # in gateway.log). Ignoring the stale redelivery prevents a
+ # self-perpetuating restart loop where every fresh gateway
+ # re-processes the same /restart command and immediately restarts
+ # again.
+ if self._is_stale_restart_redelivery(event):
+ logger.info(
+ "Ignoring redelivered /restart (platform=%s, update_id=%s) — "
+ "already processed by a previous gateway instance.",
+ event.source.platform.value if event.source and event.source.platform else "?",
+ event.platform_update_id,
+ )
+ return ""
+
if self._restart_requested or self._draining:
count = self._running_agent_count()
if count:
@@ -4768,6 +5059,26 @@ class GatewayRunner:
except Exception as e:
logger.debug("Failed to write restart notify file: %s", e)
+ # Record the triggering platform + update_id in a dedicated dedup
+ # marker. Unlike .restart_notify.json (which gets unlinked once the
+ # new gateway sends the "gateway restarted" notification), this
+ # marker persists so the new gateway can still detect a delayed
+ # /restart redelivery from Telegram. Overwritten on every /restart.
+ try:
+ import json as _json
+ import time as _time
+ dedup_data = {
+ "platform": event.source.platform.value if event.source.platform else None,
+ "requested_at": _time.time(),
+ }
+ if event.platform_update_id is not None:
+ dedup_data["update_id"] = event.platform_update_id
+ (_hermes_home / ".restart_last_processed.json").write_text(
+ _json.dumps(dedup_data)
+ )
+ except Exception as e:
+ logger.debug("Failed to write restart dedup marker: %s", e)
+
active_agents = self._running_agent_count()
# When running under a service manager (systemd/launchd), use the
# service restart path: exit with code 75 so the service manager
@@ -4783,6 +5094,58 @@ class GatewayRunner:
return f"⏳ Draining {active_agents} active agent(s) before restart..."
return "♻ Restarting gateway. If you aren't notified within 60 seconds, restart from the console with `hermes gateway restart`."
+ def _is_stale_restart_redelivery(self, event: MessageEvent) -> bool:
+ """Return True if this /restart is a Telegram re-delivery we already handled.
+
+ The previous gateway wrote ``.restart_last_processed.json`` with the
+ triggering platform + update_id when it processed the /restart. If
+ we now see a /restart on the same platform with an update_id <= that
+ recorded value AND the marker is recent (< 5 minutes), it's a
+ redelivery and should be ignored.
+
+ Only applies to Telegram today (the only platform that exposes a
+ numeric cross-session update ordering); other platforms return False.
+ """
+ if event is None or event.source is None:
+ return False
+ if event.platform_update_id is None:
+ return False
+ if event.source.platform is None:
+ return False
+ # Only Telegram populates platform_update_id currently; be explicit
+ # so future platforms aren't accidentally gated by this check.
+ try:
+ platform_value = event.source.platform.value
+ except Exception:
+ return False
+ if platform_value != "telegram":
+ return False
+
+ try:
+ import json as _json
+ import time as _time
+ marker_path = _hermes_home / ".restart_last_processed.json"
+ if not marker_path.exists():
+ return False
+ data = _json.loads(marker_path.read_text())
+ except Exception:
+ return False
+
+ if data.get("platform") != platform_value:
+ return False
+ recorded_uid = data.get("update_id")
+ if not isinstance(recorded_uid, int):
+ return False
+ # Staleness guard: ignore markers older than 5 minutes. A legitimately
+ # old marker (e.g. crash recovery where notify never fired) should not
+ # swallow a fresh /restart from the user.
+ requested_at = data.get("requested_at")
+ if isinstance(requested_at, (int, float)):
+ if _time.time() - requested_at > 300:
+ return False
+ return event.platform_update_id <= recorded_uid
+
+
async def _handle_help_command(self, event: MessageEvent) -> str:
"""Handle /help command - list available commands."""
from hermes_cli.commands import gateway_help_lines
@@ -5528,8 +5891,7 @@ class GatewayRunner:
if "pynacl" in err_lower or "nacl" in err_lower or "davey" in err_lower:
return (
"Voice dependencies are missing (PyNaCl / davey). "
- "Install or reinstall Hermes with the messaging extra, e.g. "
- "`pip install hermes-agent[messaging]`."
+ f"Install with: `{sys.executable} -m pip install PyNaCl`"
)
return f"Failed to join voice channel: {e}"
@@ -8096,6 +8458,84 @@ class GatewayRunner:
if hasattr(self, "_busy_ack_ts"):
self._busy_ack_ts.pop(session_key, None)
+ def _begin_session_run_generation(self, session_key: str) -> int:
+ """Claim a fresh run generation token for ``session_key``.
+
+ Every top-level gateway turn gets a monotonically increasing token.
+ If a later command like /stop or /new invalidates that token while the
+ old worker is still unwinding, the late result can be recognized and
+ dropped instead of bleeding into the fresh session.
+ """
+ if not session_key:
+ return 0
+ generations = self.__dict__.get("_session_run_generation")
+ if generations is None:
+ generations = {}
+ self._session_run_generation = generations
+ next_generation = int(generations.get(session_key, 0)) + 1
+ generations[session_key] = next_generation
+ return next_generation
+
+ def _invalidate_session_run_generation(self, session_key: str, *, reason: str = "") -> int:
+ """Invalidate any in-flight run token for ``session_key``."""
+ generation = self._begin_session_run_generation(session_key)
+ if reason:
+ logger.info(
+ "Invalidated run generation for %s → %d (%s)",
+ session_key[:20],
+ generation,
+ reason,
+ )
+ return generation
+
+ def _is_session_run_current(self, session_key: str, generation: int) -> bool:
+ """Return True when ``generation`` is still current for ``session_key``."""
+ if not session_key:
+ return True
+ generations = self.__dict__.get("_session_run_generation") or {}
+ return int(generations.get(session_key, 0)) == int(generation)
+
+ def _bind_adapter_run_generation(
+ self,
+ adapter: Any,
+ session_key: str,
+ generation: int | None,
+ ) -> None:
+ """Bind a gateway run generation to the adapter's active-session event."""
+ if not adapter or not session_key or generation is None:
+ return
+ try:
+ interrupt_event = getattr(adapter, "_active_sessions", {}).get(session_key)
+ if interrupt_event is not None:
+ setattr(interrupt_event, "_hermes_run_generation", int(generation))
+ except Exception:
+ pass
+
+ async def _interrupt_and_clear_session(
+ self,
+ session_key: str,
+ source: SessionSource,
+ *,
+ interrupt_reason: str,
+ invalidation_reason: str,
+ release_running_state: bool = True,
+ ) -> None:
+ """Interrupt the current run and clear queued session state consistently."""
+ if not session_key:
+ return
+ running_agent = self._running_agents.get(session_key)
+ if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
+ running_agent.interrupt(interrupt_reason)
+ self._invalidate_session_run_generation(session_key, reason=invalidation_reason)
+ adapter = self.adapters.get(source.platform)
+ if adapter and hasattr(adapter, "interrupt_session_activity"):
+ await adapter.interrupt_session_activity(session_key, source.chat_id)
+ if adapter and hasattr(adapter, "get_pending_message"):
+ adapter.get_pending_message(session_key) # consume and discard
+ self._pending_messages.pop(session_key, None)
+ if release_running_state:
+ self._release_running_agent_state(session_key)
+
def _evict_cached_agent(self, session_key: str) -> None:
"""Remove a cached agent for a session (called on /new, /model, etc)."""
_lock = getattr(self, "_agent_cache_lock", None)
@@ -8277,6 +8717,7 @@ class GatewayRunner:
source: "SessionSource",
session_id: str,
session_key: str = None,
+ run_generation: Optional[int] = None,
event_message_id: Optional[str] = None,
) -> Dict[str, Any]:
"""Forward the message to a remote Hermes API server instead of
@@ -8312,6 +8753,11 @@ class GatewayRunner:
proxy_key = os.getenv("GATEWAY_PROXY_KEY", "").strip()
+ def _run_still_current() -> bool:
+ if run_generation is None or not session_key:
+ return True
+ return self._is_session_run_current(session_key, run_generation)
+
# Build messages in OpenAI chat format --------------------------
#
# The remote api_server can maintain session continuity via
@@ -8441,6 +8887,21 @@ class GatewayRunner:
# Parse SSE stream
buffer = ""
async for chunk in resp.content.iter_any():
+ if not _run_still_current():
+ logger.info(
+ "Discarding stale proxy stream for %s — generation %d is no longer current",
+ session_key[:20] if session_key else "?",
+ run_generation or 0,
+ )
+ return {
+ "final_response": "",
+ "messages": [],
+ "api_calls": 0,
+ "tools": [],
+ "history_offset": len(history),
+ "session_id": session_id,
+ "response_previewed": False,
+ }
text = chunk.decode("utf-8", errors="replace")
buffer += text
@@ -8490,6 +8951,21 @@ class GatewayRunner:
stream_task.cancel()
_elapsed = time.time() - _start
+ if not _run_still_current():
+ logger.info(
+ "Discarding stale proxy result for %s — generation %d is no longer current",
+ session_key[:20] if session_key else "?",
+ run_generation or 0,
+ )
+ return {
+ "final_response": "",
+ "messages": [],
+ "api_calls": 0,
+ "tools": [],
+ "history_offset": len(history),
+ "session_id": session_id,
+ "response_previewed": False,
+ }
logger.info(
"proxy response: url=%s session=%s time=%.1fs response=%d chars",
proxy_url, (session_id or "")[:20], _elapsed, len(full_response),
@@ -8518,6 +8994,7 @@ class GatewayRunner:
source: SessionSource,
session_id: str,
session_key: str = None,
+ run_generation: Optional[int] = None,
_interrupt_depth: int = 0,
event_message_id: Optional[str] = None,
channel_prompt: Optional[str] = None,
@@ -8543,11 +9020,17 @@ class GatewayRunner:
source=source,
session_id=session_id,
session_key=session_key,
+ run_generation=run_generation,
event_message_id=event_message_id,
)
from run_agent import AIAgent
import queue
+
+ def _run_still_current() -> bool:
+ if run_generation is None or not session_key:
+ return True
+ return self._is_session_run_current(session_key, run_generation)
user_config = _load_gateway_config()
platform_key = _platform_config_key(source.platform)
@@ -8602,7 +9085,7 @@ class GatewayRunner:
def progress_callback(event_type: str, tool_name: str = None, preview: str = None, args: dict = None, **kwargs):
"""Callback invoked by agent on tool lifecycle events."""
- if not progress_queue:
+ if not progress_queue or not _run_still_current():
return
# Only act on tool.started events (ignore tool.completed, reasoning.available, etc.)
@@ -8707,6 +9190,14 @@ class GatewayRunner:
while True:
try:
+ if not _run_still_current():
+ while not progress_queue.empty():
+ try:
+ progress_queue.get_nowait()
+ except Exception:
+ break
+ return
+
raw = progress_queue.get_nowait()
# Handle dedup messages: update last line with repeat counter
@@ -8732,6 +9223,9 @@ class GatewayRunner:
await asyncio.sleep(_remaining)
continue
+ if not _run_still_current():
+ return
+
if can_edit and progress_msg_id is not None:
# Try to edit the existing progress message
full_text = "\n".join(progress_lines)
@@ -8767,7 +9261,8 @@ class GatewayRunner:
# Restore typing indicator
await asyncio.sleep(0.3)
- await adapter.send_typing(source.chat_id, metadata=_progress_metadata)
+ if _run_still_current():
+ await adapter.send_typing(source.chat_id, metadata=_progress_metadata)
except queue.Empty:
await asyncio.sleep(0.3)
@@ -8811,6 +9306,8 @@ class GatewayRunner:
_hooks_ref = self.hooks
def _step_callback_sync(iteration: int, prev_tools: list) -> None:
+ if not _run_still_current():
+ return
try:
# prev_tools may be list[str] or list[dict] with "name"/"result"
# keys. Normalise to keep "tool_names" backward-compatible for
@@ -8841,7 +9338,7 @@ class GatewayRunner:
_status_thread_metadata = {"thread_id": _progress_thread_id} if _progress_thread_id else None
def _status_callback_sync(event_type: str, message: str) -> None:
- if not _status_adapter:
+ if not _status_adapter or not _run_still_current():
return
try:
asyncio.run_coroutine_threadsafe(
@@ -8972,12 +9469,16 @@ class GatewayRunner:
metadata={"thread_id": _progress_thread_id} if _progress_thread_id else None,
)
if _want_stream_deltas:
- _stream_delta_cb = _stream_consumer.on_delta
+ def _stream_delta_cb(text: str) -> None:
+ if _run_still_current():
+ _stream_consumer.on_delta(text)
stream_consumer_holder[0] = _stream_consumer
except Exception as _sc_err:
logger.debug("Could not set up stream consumer: %s", _sc_err)
def _interim_assistant_cb(text: str, *, already_streamed: bool = False) -> None:
+ if not _run_still_current():
+ return
if _stream_consumer is not None:
if already_streamed:
_stream_consumer.on_segment_break()
@@ -9081,7 +9582,7 @@ class GatewayRunner:
_bg_review_pending_lock = threading.Lock()
def _deliver_bg_review_message(message: str) -> None:
- if not _status_adapter:
+ if not _status_adapter or not _run_still_current():
return
try:
asyncio.run_coroutine_threadsafe(
@@ -9105,7 +9606,7 @@ class GatewayRunner:
# Background review delivery — send "💾 Memory updated" etc. to user
def _bg_review_send(message: str) -> None:
- if not _status_adapter:
+ if not _status_adapter or not _run_still_current():
return
if not _bg_review_release.is_set():
with _bg_review_pending_lock:
@@ -9118,9 +9619,16 @@ class GatewayRunner:
# Register the release hook on the adapter so base.py's finally
# block can fire it after delivering the main response.
if _status_adapter and session_key:
- _pdc = getattr(_status_adapter, "_post_delivery_callbacks", None)
- if _pdc is not None:
- _pdc[session_key] = _release_bg_review_messages
+ if getattr(type(_status_adapter), "register_post_delivery_callback", None) is not None:
+ _status_adapter.register_post_delivery_callback(
+ session_key,
+ _release_bg_review_messages,
+ generation=run_generation,
+ )
+ else:
+ _pdc = getattr(_status_adapter, "_post_delivery_callbacks", None)
+ if _pdc is not None:
+ _pdc[session_key] = _release_bg_review_messages
# Store agent reference for interrupt support
agent_holder[0] = agent
@@ -9282,7 +9790,40 @@ class GatewayRunner:
# restart, crash, SIGTERM). Prepend a system note so the model
# finishes processing the pending tool results before addressing
# the user's new message. (#4493)
- if agent_history and agent_history[-1].get("role") == "tool":
+ #
+ # Session-level resume_pending (set on drain-timeout shutdown)
+ # escalates the wording — the transcript's last role may be
+ # anything (tool, assistant with unfinished work, etc.), so we
+ # give a stronger, reason-aware instruction that subsumes the
+ # tool-tail case.
+ _resume_entry = None
+ if session_key:
+ try:
+ _resume_entry = self.session_store._entries.get(session_key)
+ except Exception:
+ _resume_entry = None
+ _is_resume_pending = bool(
+ _resume_entry is not None and getattr(_resume_entry, "resume_pending", False)
+ )
+
+ if _is_resume_pending:
+ _reason = getattr(_resume_entry, "resume_reason", None) or "restart_timeout"
+ _reason_phrase = (
+ "a gateway restart"
+ if _reason == "restart_timeout"
+ else "a gateway shutdown"
+ if _reason == "shutdown_timeout"
+ else "a gateway interruption"
+ )
+ message = (
+ f"[System note: Your previous turn in this session was interrupted "
+ f"by {_reason_phrase}. The conversation history below is intact. "
+ f"If it contains unfinished tool result(s), process them first and "
+ f"summarize what was accomplished, then address the user's new "
+ f"message below.]\n\n"
+ + message
+ )
+ elif agent_history and agent_history[-1].get("role") == "tool":
message = (
"[System note: Your previous turn was interrupted before you could "
"process the last tool result(s). The conversation history contains "
@@ -9689,7 +10230,7 @@ class GatewayRunner:
# Interrupt the agent if it's still running so the thread
# pool worker is freed.
if _timed_out_agent and hasattr(_timed_out_agent, "interrupt"):
- _timed_out_agent.interrupt("Execution timed out (inactivity)")
+ _timed_out_agent.interrupt(_INTERRUPT_REASON_TIMEOUT)
_timeout_mins = int(_agent_timeout // 60) or 1
@@ -9754,7 +10295,15 @@ class GatewayRunner:
if result and adapter and session_key:
pending_event = _dequeue_pending_event(adapter, session_key)
if result.get("interrupted") and not pending_event and result.get("interrupt_message"):
- pending = result.get("interrupt_message")
+ interrupt_message = result.get("interrupt_message")
+ if _is_control_interrupt_message(interrupt_message):
+ logger.info(
+ "Ignoring control interrupt message for session %s: %s",
+ session_key[:20] if session_key else "?",
+ interrupt_message,
+ )
+ else:
+ pending = interrupt_message
elif pending_event:
pending = pending_event.text or _build_media_placeholder(pending_event)
logger.debug("Processing queued message after agent completion: '%s...'", pending[:40])
@@ -9859,7 +10408,17 @@ class GatewayRunner:
# first response has been delivered. Pop from the
# adapter's callback dict (prevents double-fire in
# base.py's finally block) and call it.
- if adapter and hasattr(adapter, "_post_delivery_callbacks"):
+ if getattr(type(adapter), "pop_post_delivery_callback", None) is not None:
+ _bg_cb = adapter.pop_post_delivery_callback(
+ session_key,
+ generation=run_generation,
+ )
+ if callable(_bg_cb):
+ try:
+ _bg_cb()
+ except Exception:
+ pass
+ elif adapter and hasattr(adapter, "_post_delivery_callbacks"):
_bg_cb = adapter._post_delivery_callbacks.pop(session_key, None)
if callable(_bg_cb):
try:
@@ -9907,6 +10466,7 @@ class GatewayRunner:
source=next_source,
session_id=session_id,
session_key=session_key,
+ run_generation=run_generation,
_interrupt_depth=_interrupt_depth + 1,
event_message_id=next_message_id,
channel_prompt=next_channel_prompt,
diff --git a/gateway/session.py b/gateway/session.py
index 4cb623128c7..8b31c2b0aa2 100644
--- a/gateway/session.py
+++ b/gateway/session.py
@@ -377,7 +377,19 @@ class SessionEntry:
# this session (create a new session_id) so the user starts fresh.
# Set by /stop to break stuck-resume loops (#7536).
suspended: bool = False
-
+
+ # When True the session was interrupted by a gateway restart/shutdown
+ # drain timeout, but recovery is still expected. Unlike ``suspended``,
+ # ``resume_pending`` preserves the existing session_id on next access —
+ # the user stays on the same transcript and the agent auto-continues
+ # from where it left off. Cleared after the next successful turn.
+ # Escalation to ``suspended`` is handled by the existing
+ # ``.restart_failure_counts`` stuck-loop counter (#7536), not by a
+ # parallel counter on this entry.
+ resume_pending: bool = False
+ resume_reason: Optional[str] = None # e.g. "restart_timeout"
+ last_resume_marked_at: Optional[datetime] = None
+
def to_dict(self) -> Dict[str, Any]:
result = {
"session_key": self.session_key,
@@ -397,6 +409,13 @@ class SessionEntry:
"cost_status": self.cost_status,
"memory_flushed": self.memory_flushed,
"suspended": self.suspended,
+ "resume_pending": self.resume_pending,
+ "resume_reason": self.resume_reason,
+ "last_resume_marked_at": (
+ self.last_resume_marked_at.isoformat()
+ if self.last_resume_marked_at
+ else None
+ ),
}
if self.origin:
result["origin"] = self.origin.to_dict()
@@ -414,7 +433,15 @@ class SessionEntry:
platform = Platform(data["platform"])
except ValueError as e:
logger.debug("Unknown platform value %r: %s", data["platform"], e)
-
+
+ last_resume_marked_at = None
+ _lrma = data.get("last_resume_marked_at")
+ if _lrma:
+ try:
+ last_resume_marked_at = datetime.fromisoformat(_lrma)
+ except (TypeError, ValueError):
+ last_resume_marked_at = None
+
return cls(
session_key=data["session_key"],
session_id=data["session_id"],
@@ -434,6 +461,9 @@ class SessionEntry:
cost_status=data.get("cost_status", "unknown"),
memory_flushed=data.get("memory_flushed", False),
suspended=data.get("suspended", False),
+ resume_pending=data.get("resume_pending", False),
+ resume_reason=data.get("resume_reason"),
+ last_resume_marked_at=last_resume_marked_at,
)
@@ -710,9 +740,23 @@ class SessionStore:
entry = self._entries[session_key]
# Auto-reset sessions marked as suspended (e.g. after /stop
- # broke a stuck loop — #7536).
+ # broke a stuck loop — #7536). ``suspended`` is the hard
+ # forced-wipe signal and always wins over ``resume_pending``,
+ # so repeated interrupted restarts that escalate via the
+ # existing ``.restart_failure_counts`` stuck-loop counter
+ # still converge to a clean slate.
if entry.suspended:
reset_reason = "suspended"
+ elif entry.resume_pending:
+ # Restart-interrupted session: preserve the session_id
+ # and return the existing entry so the transcript
+ # reloads intact. ``resume_pending`` is cleared after
+ # the NEXT successful turn completes (not here), which
+ # means a re-interrupted retry keeps trying — the
+ # stuck-loop counter handles terminal escalation.
+ entry.updated_at = now
+ self._save()
+ return entry
else:
reset_reason = self._should_reset(entry, source)
if not reset_reason:
@@ -802,6 +846,55 @@ class SessionStore:
return True
return False
+ def mark_resume_pending(
+ self,
+ session_key: str,
+ reason: str = "restart_timeout",
+ ) -> bool:
+ """Mark a session as resumable after a restart interruption.
+
+ Unlike ``suspend_session()``, this preserves the existing
+ ``session_id`` and the transcript. The next call to
+ ``get_or_create_session()`` for this key returns the same entry
+ so the user auto-resumes on the same conversation lane.
+
+ Returns True if the session existed and was marked.
+ """
+ with self._lock:
+ self._ensure_loaded_locked()
+ if session_key in self._entries:
+ entry = self._entries[session_key]
+ # Never override an explicit ``suspended`` — that is a hard
+ # forced-wipe signal (from /stop or stuck-loop escalation).
+ if entry.suspended:
+ return False
+ entry.resume_pending = True
+ entry.resume_reason = reason
+ entry.last_resume_marked_at = _now()
+ self._save()
+ return True
+ return False
+
+ def clear_resume_pending(self, session_key: str) -> bool:
+ """Clear the resume-pending flag after a successful resumed turn.
+
+ Called from the gateway after ``run_conversation()`` returns a
+ final response for a session that had ``resume_pending=True``,
+ signalling that recovery succeeded.
+
+ Returns True if a flag was cleared.
+ """
+ with self._lock:
+ self._ensure_loaded_locked()
+ entry = self._entries.get(session_key)
+ if entry is None or not entry.resume_pending:
+ return False
+ entry.resume_pending = False
+ entry.resume_reason = None
+ entry.last_resume_marked_at = None
+ self._save()
+ return True
+
def prune_old_entries(self, max_age_days: int) -> int:
"""Drop SessionEntry records older than max_age_days.
@@ -861,6 +954,12 @@ class SessionStore:
(#7536). Only suspends sessions updated within *max_age_seconds*
to avoid resetting long-idle sessions that are harmless to resume.
Returns the number of sessions that were suspended.
+
+ Entries flagged ``resume_pending=True`` are skipped — those were
+ marked intentionally by the drain-timeout path as recoverable.
+ Terminal escalation for genuinely stuck ``resume_pending`` sessions
+ is handled by the existing ``.restart_failure_counts`` stuck-loop
+ counter, which runs after this method on startup.
"""
from datetime import timedelta
@@ -869,6 +968,8 @@ class SessionStore:
with self._lock:
self._ensure_loaded_locked()
for entry in self._entries.values():
+ if entry.resume_pending:
+ continue
if not entry.suspended and entry.updated_at >= cutoff:
entry.suspended = True
count += 1
diff --git a/gateway/stream_consumer.py b/gateway/stream_consumer.py
index ae00aee392b..78e365712d9 100644
--- a/gateway/stream_consumer.py
+++ b/gateway/stream_consumer.py
@@ -430,6 +430,21 @@ class GatewayStreamConsumer:
# a real string like "msg_1", not "__no_edit__", so that case
# still resets and creates a fresh segment as intended.)
if got_segment_break:
+ # If the segment-break edit failed to deliver the
+ # accumulated content (flood control that has not yet
+ # promoted to fallback mode, or fallback mode itself),
+ # _accumulated still holds pre-boundary text the user
+ # never saw. Flush that tail as a continuation message
+ # before the reset below wipes _accumulated — otherwise
+ # text generated before the tool boundary is silently
+ # dropped (issue #8124).
+ if (
+ self._accumulated
+ and not current_update_visible
+ and self._message_id
+ and self._message_id != "__no_edit__"
+ ):
+ await self._flush_segment_tail_on_edit_failure()
self._reset_segment_state(preserve_no_edit=True)
await asyncio.sleep(0.05) # Small yield to not busy-loop
@@ -556,6 +571,30 @@ class GatewayStreamConsumer:
if final_text.strip() and final_text != self._visible_prefix():
continuation = final_text
else:
+ # Defence-in-depth for #7183: the last edit may still show the
+ # cursor character because fallback mode was entered after an
+ # edit failure left it stuck. Try one final edit to strip it
+ # so the message doesn't freeze with a visible ▉. Best-effort
+ # — if this edit also fails (flood control still active),
+ # _try_strip_cursor has already been called on fallback entry
+ # and the adaptive-backoff retries will have had their shot.
+ if (
+ self._message_id
+ and self._last_sent_text
+ and self.cfg.cursor
+ and self._last_sent_text.endswith(self.cfg.cursor)
+ ):
+ clean_text = self._last_sent_text[:-len(self.cfg.cursor)]
+ try:
+ result = await self.adapter.edit_message(
+ chat_id=self.chat_id,
+ message_id=self._message_id,
+ content=clean_text,
+ )
+ if result.success:
+ self._last_sent_text = clean_text
+ except Exception:
+ pass
self._already_sent = True
self._final_response_sent = True
return
@@ -620,6 +659,39 @@ class GatewayStreamConsumer:
err_lower = err.lower()
return "flood" in err_lower or "retry after" in err_lower or "rate" in err_lower
+ async def _flush_segment_tail_on_edit_failure(self) -> None:
+ """Deliver un-sent tail content before a segment-break reset.
+
+ When an edit fails (flood control, transport error) and a tool
+ boundary arrives before the next retry, ``_accumulated`` holds text
+ that was generated but never shown to the user. Without this flush,
+ the segment reset would discard that tail and leave a frozen cursor
+ in the partial message.
+
+ Sends the tail that sits after the last successfully-delivered
+ prefix as a new message, and best-effort strips the stuck cursor
+ from the previous partial message.
+ """
+ if not self._fallback_final_send:
+ await self._try_strip_cursor()
+ visible = self._fallback_prefix or self._visible_prefix()
+ tail = self._accumulated
+ if visible and tail.startswith(visible):
+ tail = tail[len(visible):].lstrip()
+ tail = self._clean_for_display(tail)
+ if not tail.strip():
+ return
+ try:
+ result = await self.adapter.send(
+ chat_id=self.chat_id,
+ content=tail,
+ metadata=self.metadata,
+ )
+ if result.success:
+ self._already_sent = True
+ except Exception as e:
+ logger.error("Segment-break tail flush error: %s", e)
+
async def _try_strip_cursor(self) -> None:
"""Best-effort edit to remove the cursor from the last visible message.
diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 831f81bf266..4623147a5a5 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -1434,49 +1434,6 @@ def _read_codex_tokens(*, _lock: bool = True) -> Dict[str, Any]:
}
-def _write_codex_cli_tokens(
- access_token: str,
- refresh_token: str,
- *,
- last_refresh: Optional[str] = None,
-) -> None:
- """Write refreshed tokens back to ~/.codex/auth.json.
-
- OpenAI OAuth refresh tokens are single-use and rotate on every refresh.
- When Hermes refreshes a token it consumes the old refresh_token; if we
- don't write the new pair back, the Codex CLI (or VS Code extension) will
- fail with ``refresh_token_reused`` on its next refresh attempt.
-
- This mirrors the Anthropic write-back to ~/.claude/.credentials.json
- via ``_write_claude_code_credentials()``.
- """
- codex_home = os.getenv("CODEX_HOME", "").strip()
- if not codex_home:
- codex_home = str(Path.home() / ".codex")
- auth_path = Path(codex_home).expanduser() / "auth.json"
- try:
- existing: Dict[str, Any] = {}
- if auth_path.is_file():
- existing = json.loads(auth_path.read_text(encoding="utf-8"))
- if not isinstance(existing, dict):
- existing = {}
-
- tokens_dict = existing.get("tokens")
- if not isinstance(tokens_dict, dict):
- tokens_dict = {}
- tokens_dict["access_token"] = access_token
- tokens_dict["refresh_token"] = refresh_token
- existing["tokens"] = tokens_dict
- if last_refresh is not None:
- existing["last_refresh"] = last_refresh
-
- auth_path.parent.mkdir(parents=True, exist_ok=True)
- auth_path.write_text(json.dumps(existing, indent=2), encoding="utf-8")
- auth_path.chmod(0o600)
- except (OSError, IOError) as exc:
- logger.debug("Failed to write refreshed tokens to %s: %s", auth_path, exc)
-
-
def _save_codex_tokens(tokens: Dict[str, str], last_refresh: str = None) -> None:
"""Save Codex OAuth tokens to Hermes auth store (~/.hermes/auth.json)."""
if last_refresh is None:
@@ -1544,6 +1501,11 @@ def refresh_codex_oauth_pure(
"then run `hermes auth` to re-authenticate."
)
relogin_required = True
+ # A 401/403 from the token endpoint always means the refresh token
+ # is invalid/expired — force relogin even if the body error code
+ # wasn't one of the known strings above.
+ if response.status_code in (401, 403) and not relogin_required:
+ relogin_required = True
raise AuthError(
message,
provider="openai-codex",
@@ -1599,12 +1561,6 @@ def _refresh_codex_auth_tokens(
updated_tokens["refresh_token"] = refreshed["refresh_token"]
_save_codex_tokens(updated_tokens)
- # Write back to ~/.codex/auth.json so Codex CLI / VS Code stay in sync.
- _write_codex_cli_tokens(
- refreshed["access_token"],
- refreshed["refresh_token"],
- last_refresh=refreshed.get("last_refresh"),
- )
return updated_tokens
@@ -1649,25 +1605,7 @@ def resolve_codex_runtime_credentials(
refresh_skew_seconds: int = CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
) -> Dict[str, Any]:
"""Resolve runtime credentials from Hermes's own Codex token store."""
- try:
- data = _read_codex_tokens()
- except AuthError as orig_err:
- # Only attempt migration when there are NO tokens stored at all
- # (code == "codex_auth_missing"), not when tokens exist but are invalid.
- if orig_err.code != "codex_auth_missing":
- raise
-
- # Migration: user had Codex as active provider with old storage (~/.codex/).
- cli_tokens = _import_codex_cli_tokens()
- if cli_tokens:
- logger.info("Migrating Codex credentials from ~/.codex/ to Hermes auth store")
- print("⚠️ Migrating Codex credentials to Hermes's own auth store.")
- print(" This avoids conflicts with Codex CLI and VS Code.")
- print(" Run `hermes auth` to create a fully independent session.\n")
- _save_codex_tokens(cli_tokens)
- data = _read_codex_tokens()
- else:
- raise
+ data = _read_codex_tokens()
tokens = dict(data["tokens"])
access_token = str(tokens.get("access_token", "") or "").strip()
refresh_timeout_seconds = float(os.getenv("HERMES_CODEX_REFRESH_TIMEOUT_SECONDS", "20"))
diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index ce257b0d7cb..f753d6f3a73 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -91,6 +91,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
aliases=("tasks",)),
CommandDef("queue", "Queue a prompt for the next turn (doesn't interrupt)", "Session",
aliases=("q",), args_hint=""),
+ CommandDef("steer", "Inject a message after the next tool call without interrupting", "Session",
+ args_hint=""),
CommandDef("status", "Show session info", "Session"),
CommandDef("profile", "Show active profile name and home directory", "Info"),
CommandDef("sethome", "Set this chat as the home channel", "Session",
@@ -258,10 +260,10 @@ GATEWAY_KNOWN_COMMANDS: frozenset[str] = frozenset(
)
-# Commands that must never be queued behind an active gateway session.
-# These are explicit control/info commands handled by the gateway itself;
-# if they get queued as pending text, the safety net in gateway.run will
-# discard them before they ever reach the user.
+# Commands with explicit Level-2 running-agent handlers in gateway/run.py.
+# Listed here for introspection / tests; semantically a subset of
+# "all resolvable commands" — which is the real bypass set (see
+# should_bypass_active_session below).
ACTIVE_SESSION_BYPASS_COMMANDS: frozenset[str] = frozenset(
{
"agents",
@@ -275,6 +277,7 @@ ACTIVE_SESSION_BYPASS_COMMANDS: frozenset[str] = frozenset(
"queue",
"restart",
"status",
+ "steer",
"stop",
"update",
}
@@ -282,9 +285,26 @@ ACTIVE_SESSION_BYPASS_COMMANDS: frozenset[str] = frozenset(
def should_bypass_active_session(command_name: str | None) -> bool:
- """Return True when a slash command must bypass active-session queuing."""
- cmd = resolve_command(command_name) if command_name else None
- return bool(cmd and cmd.name in ACTIVE_SESSION_BYPASS_COMMANDS)
+ """Return True for any resolvable slash command.
+
+ Rationale: every gateway-registered slash command either has a
+ specific Level-2 handler in gateway/run.py (/stop, /new, /model,
+ /approve, etc.) or reaches the running-agent catch-all that returns
+ a "busy — wait or /stop first" response. In both paths the command
+ is dispatched, not queued.
+
+ Queueing is always wrong for a recognized slash command because the
+ safety net in gateway.run discards any command text that reaches
+ the pending queue — which meant a mid-run /model (or /reasoning,
+ /voice, /insights, /title, /resume, /retry, /undo, /compress,
+ /usage, /provider, /reload-mcp, /sethome, /reset) would silently
+ interrupt the agent AND get discarded, producing a zero-char
+ response. See issue #5057 / PRs #6252, #10370, #4665.
+
+ ACTIVE_SESSION_BYPASS_COMMANDS remains the subset of commands with
+ explicit Level-2 handlers; the rest fall through to the catch-all.
+ """
+ return resolve_command(command_name) is not None if command_name else False
def _resolve_config_gates() -> set[str]:
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index c9e05e3e882..786ff622d93 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -403,7 +403,11 @@ DEFAULT_CONFIG = {
"container_persistent": True, # Persist filesystem across sessions
# Docker volume mounts — share host directories with the container.
# Each entry is "host_path:container_path" (standard Docker -v syntax).
- # Example: ["/home/user/projects:/workspace/projects", "/data:/data"]
+ # Example:
+ # ["/home/user/projects:/workspace/projects",
+ # "/home/user/.hermes/cache/documents:/output"]
+ # For gateway MEDIA delivery, write inside Docker to /output/... and emit
+ # the host-visible path in MEDIA:, not the container path.
"docker_volumes": [],
# Explicit opt-in: mount the host cwd into /workspace for Docker sessions.
# Default off because passing host directories into a sandbox weakens isolation.
@@ -737,9 +741,14 @@ DEFAULT_CONFIG = {
# manual — always prompt the user (default)
# smart — use auxiliary LLM to auto-approve low-risk commands, prompt for high-risk
# off — skip all approval prompts (equivalent to --yolo)
+ #
+ # cron_mode — what to do when a cron job hits a dangerous command:
+ # deny — block the command and let the agent find another way (default, safe)
+ # approve — auto-approve all dangerous commands in cron jobs
"approvals": {
"mode": "manual",
"timeout": 60,
+ "cron_mode": "deny",
},
# Permanently allowed dangerous command patterns (added via "always" approval)
@@ -771,6 +780,20 @@ DEFAULT_CONFIG = {
"wrap_response": True,
},
+ # execute_code settings — controls the tool used for programmatic tool calls.
+ "code_execution": {
+ # Execution mode:
+ # project (default) — scripts run in the session's working directory
+ # with the active virtualenv/conda env's python, so project deps
+ # (pandas, torch, project packages) and relative paths resolve.
+ # strict — scripts run in an isolated temp directory with
+ # hermes-agent's own python (sys.executable). Maximum isolation
+ # and reproducibility; project deps and relative paths won't work.
+ # Env scrubbing (strips *_API_KEY, *_TOKEN, *_SECRET, ...) and the
+ # tool whitelist apply identically in both modes.
+ "mode": "project",
+ },
+
# Logging — controls file logging to ~/.hermes/logs/.
# agent.log captures INFO+ (all agent activity); errors.log captures WARNING+.
"logging": {
@@ -788,7 +811,7 @@ DEFAULT_CONFIG = {
},
# Config schema version - bump this when adding new required fields
- "_config_version": 18,
+ "_config_version": 19,
}
# =============================================================================
@@ -2842,7 +2865,7 @@ _FALLBACK_COMMENT = """
# minimax (MINIMAX_API_KEY) — MiniMax
# minimax-cn (MINIMAX_CN_API_KEY) — MiniMax (China)
#
-# For custom OpenAI-compatible endpoints, add base_url and api_key_env.
+# For custom OpenAI-compatible endpoints, add base_url and key_env.
#
# fallback_model:
# provider: openrouter
@@ -2886,7 +2909,7 @@ _COMMENTED_SECTIONS = """
# minimax (MINIMAX_API_KEY) — MiniMax
# minimax-cn (MINIMAX_CN_API_KEY) — MiniMax (China)
#
-# For custom OpenAI-compatible endpoints, add base_url and api_key_env.
+# For custom OpenAI-compatible endpoints, add base_url and key_env.
#
# fallback_model:
# provider: openrouter
diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index 28c4af1fa8a..4138aeaa278 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -895,8 +895,8 @@ def run_doctor(args):
_model_count = len(_br_resp.get("modelSummaries", []))
print(f"\r {color('✓', Colors.GREEN)} {_label} {color(f'({_auth_var}, {_region}, {_model_count} models)', Colors.DIM)} ")
except ImportError:
- print(f"\r {color('⚠', Colors.YELLOW)} {_label} {color('(boto3 not installed — pip install hermes-agent[bedrock])', Colors.DIM)} ")
- issues.append("Install boto3 for Bedrock: pip install hermes-agent[bedrock]")
+ print(f"\r {color('⚠', Colors.YELLOW)} {_label} {color(f'(boto3 not installed — {sys.executable} -m pip install boto3)', Colors.DIM)} ")
+ issues.append(f"Install boto3 for Bedrock: {sys.executable} -m pip install boto3")
except Exception as _e:
_err_name = type(_e).__name__
print(f"\r {color('⚠', Colors.YELLOW)} {_label} {color(f'({_err_name}: {_e})', Colors.DIM)} ")
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index e2e2a774f5a..71fc6ae3810 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -897,6 +897,10 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
_ensure_tui_node()
def _node_bin(bin: str) -> str:
+ if bin == "node":
+ env_node = os.environ.get("HERMES_NODE")
+ if env_node and os.path.isfile(env_node) and os.access(env_node, os.X_OK):
+ return env_node
path = shutil.which(bin)
if not path:
print(f"{bin} not found — install Node.js to use the TUI.")
@@ -3969,7 +3973,7 @@ def _model_flow_anthropic(config, current_model=""):
elif choice == "2":
print()
- print(" Get an API key at: https://console.anthropic.com/settings/keys")
+ print(" Get an API key at: https://platform.claude.com/settings/keys")
print()
try:
import getpass
@@ -4985,8 +4989,187 @@ def _update_node_dependencies() -> None:
print(f" {stderr.splitlines()[-1]}")
+class _UpdateOutputStream:
+ """Stream wrapper used during ``hermes update`` to survive terminal loss.
+
+ Wraps the process's original stdout/stderr so that:
+
+ * Every write is also mirrored to an append-only log file
+ (``~/.hermes/logs/update.log``) that users can inspect after the
+ terminal disconnects.
+ * Writes to the original stream that fail with ``BrokenPipeError`` /
+ ``OSError`` / ``ValueError`` (closed file) no longer cascade into
+ process exit — the update keeps going, only the on-screen output
+ stops.
+
+ Combined with ``SIGHUP -> SIG_IGN`` installed by
+ ``_install_hangup_protection``, this makes ``hermes update`` safe to
+ run in a plain SSH session that might disconnect mid-install.
+ """
+
+ def __init__(self, original, log_file):
+ self._original = original
+ self._log = log_file
+ self._original_broken = False
+
+ def write(self, data):
+ # Mirror to the log file first — it's the most reliable destination.
+ if self._log is not None:
+ try:
+ self._log.write(data)
+ except Exception:
+ # Log errors should never abort the update.
+ pass
+
+ if self._original_broken:
+ return len(data) if isinstance(data, (str, bytes)) else 0
+
+ try:
+ return self._original.write(data)
+ except (BrokenPipeError, OSError, ValueError):
+ # Terminal vanished (SSH disconnect, shell close). Stop trying
+ # to write to it, but keep the update running.
+ self._original_broken = True
+ return len(data) if isinstance(data, (str, bytes)) else 0
+
+ def flush(self):
+ if self._log is not None:
+ try:
+ self._log.flush()
+ except Exception:
+ pass
+ if self._original_broken:
+ return
+ try:
+ self._original.flush()
+ except (BrokenPipeError, OSError, ValueError):
+ self._original_broken = True
+
+ def isatty(self):
+ if self._original_broken:
+ return False
+ try:
+ return self._original.isatty()
+ except Exception:
+ return False
+
+ def fileno(self):
+ # Some tools probe fileno(); defer to the underlying stream and let
+ # callers handle failures (same behaviour as the unwrapped stream).
+ return self._original.fileno()
+
+ def __getattr__(self, name):
+ return getattr(self._original, name)
+
+
+def _install_hangup_protection(gateway_mode: bool = False):
+ """Protect ``cmd_update`` from SIGHUP and broken terminal pipes.
+
+ Users commonly run ``hermes update`` in an SSH session or a terminal
+ that may close mid-install. Without protection, ``SIGHUP`` from the
+ terminal kills the Python process during ``pip install`` and leaves
+ the venv half-installed; the documented workaround ("use screen /
+ tmux") shouldn't be required for something as routine as an update.
+
+ Protections installed:
+
+ 1. ``SIGHUP`` is set to ``SIG_IGN``. POSIX preserves ``SIG_IGN``
+ across ``exec()``, so pip and git subprocesses also stop dying on
+ hangup.
+ 2. ``sys.stdout`` / ``sys.stderr`` are wrapped to mirror output to
+ ``~/.hermes/logs/update.log`` and to silently absorb
+ ``BrokenPipeError`` when the terminal vanishes.
+
+ ``SIGINT`` (Ctrl-C) and ``SIGTERM`` (systemd shutdown) are
+ **intentionally left alone** — those are legitimate cancellation
+ signals the user or OS sent on purpose.
+
+ In gateway mode (``hermes update --gateway``) the update is already
+ spawned detached from a terminal, so this function is a no-op.
+
+ Returns a dict that ``cmd_update`` can pass to
+ ``_finalize_update_output`` on exit. Returning a dict rather than a
+ tuple keeps the call site forward-compatible with future additions.
+ """
+ state = {
+ "prev_stdout": sys.stdout,
+ "prev_stderr": sys.stderr,
+ "log_file": None,
+ "installed": False,
+ }
+
+ if gateway_mode:
+ return state
+
+ import signal as _signal
+
+ # (1) Ignore SIGHUP for the remainder of this process.
+ if hasattr(_signal, "SIGHUP"):
+ try:
+ _signal.signal(_signal.SIGHUP, _signal.SIG_IGN)
+ except (ValueError, OSError):
+ # Called from a non-main thread — not fatal. The update still
+ # runs, just without hangup protection.
+ pass
+
+ # (2) Mirror output to update.log and wrap stdio for broken-pipe
+ # tolerance. Any failure here is non-fatal; we just skip the wrap.
+ try:
+ from hermes_cli.config import get_hermes_home
+
+ logs_dir = get_hermes_home() / "logs"
+ logs_dir.mkdir(parents=True, exist_ok=True)
+ log_path = logs_dir / "update.log"
+ log_file = open(log_path, "a", buffering=1, encoding="utf-8")
+
+ import datetime as _dt
+
+ log_file.write(
+ f"\n=== hermes update started "
+ f"{_dt.datetime.now().isoformat(timespec='seconds')} ===\n"
+ )
+
+ state["log_file"] = log_file
+ sys.stdout = _UpdateOutputStream(state["prev_stdout"], log_file)
+ sys.stderr = _UpdateOutputStream(state["prev_stderr"], log_file)
+ state["installed"] = True
+ except Exception:
+ # Leave stdio untouched on any setup failure. Update continues
+ # without mirroring.
+ state["log_file"] = None
+
+ return state
+
+
+def _finalize_update_output(state):
+ """Restore stdio and close the update.log handle opened by ``_install_hangup_protection``."""
+ if not state:
+ return
+ if state.get("installed"):
+ try:
+ sys.stdout = state.get("prev_stdout", sys.stdout)
+ except Exception:
+ pass
+ try:
+ sys.stderr = state.get("prev_stderr", sys.stderr)
+ except Exception:
+ pass
+ log_file = state.get("log_file")
+ if log_file is not None:
+ try:
+ log_file.flush()
+ log_file.close()
+ except Exception:
+ pass
+
+
def cmd_update(args):
- """Update Hermes Agent to the latest version."""
+ """Update Hermes Agent to the latest version.
+
+ Thin wrapper around ``_cmd_update_impl``: installs hangup protection,
+ runs the update, then restores stdio on the way out (even on
+ ``sys.exit`` or unhandled exceptions).
+ """
from hermes_cli.config import is_managed, managed_error
if is_managed():
@@ -4994,6 +5177,20 @@ def cmd_update(args):
return
gateway_mode = getattr(args, "gateway", False)
+
+ # Protect against mid-update terminal disconnects (SIGHUP) and tolerate
+ # writes to a closed stdout. No-op in gateway mode. See
+ # _install_hangup_protection for rationale.
+ _update_io_state = _install_hangup_protection(gateway_mode=gateway_mode)
+ try:
+ _cmd_update_impl(args, gateway_mode=gateway_mode)
+ finally:
+ _finalize_update_output(_update_io_state)
+
+
+def _cmd_update_impl(args, gateway_mode: bool):
+ """Body of ``cmd_update`` — kept separate so the wrapper can always
+ restore stdio even on ``sys.exit``."""
# In gateway mode, use file-based IPC for prompts instead of stdin
gw_input_fn = (
(lambda prompt, default="": _gateway_prompt(prompt, default))
@@ -6029,11 +6226,12 @@ def cmd_dashboard(args):
import uvicorn # noqa: F401
except ImportError:
print("Web UI dependencies not installed.")
- print("Install them with: pip install hermes-agent[web]")
+ print(f"Install them with: {sys.executable} -m pip install 'fastapi' 'uvicorn[standard]'")
sys.exit(1)
- if not _build_web_ui(PROJECT_ROOT / "web", fatal=True):
- sys.exit(1)
+ if "HERMES_WEB_DIST" not in os.environ:
+ if not _build_web_ui(PROJECT_ROOT / "web", fatal=True):
+ sys.exit(1)
from hermes_cli.web_server import start_server
@@ -6804,6 +7002,13 @@ For more help on a command:
wh_sub.add_argument(
"--secret", default="", help="HMAC secret (auto-generated if omitted)"
)
+ wh_sub.add_argument(
+ "--deliver-only",
+ action="store_true",
+ help="Skip the agent — deliver the rendered prompt directly as the "
+ "message. Zero LLM cost. Requires --deliver to be a real target "
+ "(not 'log').",
+ )
webhook_subparsers.add_parser(
"list", aliases=["ls"], help="List all dynamic subscriptions"
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index cbbeef62d44..a0d7c2220c1 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -133,8 +133,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"gemini-2.5-pro",
"gemini-2.5-flash",
"gemini-2.5-flash-lite",
- # Gemma open models (also served via AI Studio)
- "gemma-4-31b-it",
],
"google-gemini-cli": [
"gemini-2.5-pro",
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 8770386b73e..f969bd4bd16 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -91,7 +91,6 @@ _DEFAULT_PROVIDER_MODELS = {
"gemini": [
"gemini-3.1-pro-preview", "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview",
"gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite",
- "gemma-4-31b-it",
],
"zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
"kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
@@ -1461,7 +1460,9 @@ def setup_agent_settings(config: dict):
)
print_info("Maximum tool-calling iterations per conversation.")
print_info("Higher = more complex tasks, but costs more tokens.")
- print_info("Default is 90, which works for most tasks. Use 150+ for open exploration.")
+ print_info(
+ f"Press Enter to keep {current_max}. Use 90 for most tasks or 150+ for open exploration."
+ )
max_iter_str = prompt("Max iterations", current_max)
try:
diff --git a/hermes_cli/uninstall.py b/hermes_cli/uninstall.py
index 8d8e3393b36..67cea418209 100644
--- a/hermes_cli/uninstall.py
+++ b/hermes_cli/uninstall.py
@@ -118,59 +118,166 @@ def remove_wrapper_script():
def uninstall_gateway_service():
- """Stop and uninstall the gateway service if running."""
+ """Stop and uninstall the gateway service (systemd, launchd) and kill any
+ standalone gateway processes.
+
+ Delegates to the gateway module which handles:
+ - Linux: user + system systemd services (with proper DBUS env setup)
+ - macOS: launchd plists
+ - All platforms: standalone ``hermes gateway run`` processes
+ - Termux/Android: skips systemd (no systemd on Android), still kills standalone processes
+ """
import platform
-
- if platform.system() != "Linux":
- return False
+ stopped_something = False
- prefix = os.getenv("PREFIX", "")
- if os.getenv("TERMUX_VERSION") or "com.termux/files/usr" in prefix:
- return False
-
+ # 1. Kill any standalone gateway processes (all platforms, including Termux)
try:
- from hermes_cli.gateway import get_service_name
- svc_name = get_service_name()
- except Exception:
- svc_name = "hermes-gateway"
-
- service_file = Path.home() / ".config" / "systemd" / "user" / f"{svc_name}.service"
-
- if not service_file.exists():
- return False
-
- try:
- # Stop the service
- subprocess.run(
- ["systemctl", "--user", "stop", svc_name],
- capture_output=True,
- check=False
- )
-
- # Disable the service
- subprocess.run(
- ["systemctl", "--user", "disable", svc_name],
- capture_output=True,
- check=False
- )
-
- # Remove service file
- service_file.unlink()
-
- # Reload systemd
- subprocess.run(
- ["systemctl", "--user", "daemon-reload"],
- capture_output=True,
- check=False
- )
-
- return True
-
+ from hermes_cli.gateway import kill_gateway_processes, find_gateway_pids
+ pids = find_gateway_pids()
+ if pids:
+ killed = kill_gateway_processes()
+ if killed:
+ log_success(f"Killed {killed} running gateway process(es)")
+ stopped_something = True
except Exception as e:
- log_warn(f"Could not fully remove gateway service: {e}")
+ log_warn(f"Could not check for gateway processes: {e}")
+
+ system = platform.system()
+
+ # Termux/Android has no systemd and no launchd — nothing left to do.
+ prefix = os.getenv("PREFIX", "")
+ is_termux = bool(os.getenv("TERMUX_VERSION") or "com.termux/files/usr" in prefix)
+ if is_termux:
+ return stopped_something
+
+ # 2. Linux: uninstall systemd services (both user and system scopes)
+ if system == "Linux":
+ try:
+ from hermes_cli.gateway import (
+ get_systemd_unit_path,
+ get_service_name,
+ _systemctl_cmd,
+ )
+ svc_name = get_service_name()
+
+ for is_system in (False, True):
+ unit_path = get_systemd_unit_path(system=is_system)
+ if not unit_path.exists():
+ continue
+
+ scope = "system" if is_system else "user"
+ try:
+ if is_system and os.geteuid() != 0:
+ log_warn(f"System gateway service exists at {unit_path} "
+ f"but needs sudo to remove")
+ continue
+
+ cmd = _systemctl_cmd(is_system)
+ subprocess.run(cmd + ["stop", svc_name],
+ capture_output=True, check=False)
+ subprocess.run(cmd + ["disable", svc_name],
+ capture_output=True, check=False)
+ unit_path.unlink()
+ subprocess.run(cmd + ["daemon-reload"],
+ capture_output=True, check=False)
+ log_success(f"Removed {scope} gateway service ({unit_path})")
+ stopped_something = True
+ except Exception as e:
+ log_warn(f"Could not remove {scope} gateway service: {e}")
+ except Exception as e:
+ log_warn(f"Could not check systemd gateway services: {e}")
+
+ # 3. macOS: uninstall launchd plist
+ elif system == "Darwin":
+ try:
+ from hermes_cli.gateway import get_launchd_plist_path
+ plist_path = get_launchd_plist_path()
+ if plist_path.exists():
+ subprocess.run(["launchctl", "unload", str(plist_path)],
+ capture_output=True, check=False)
+ plist_path.unlink()
+ log_success(f"Removed macOS gateway service ({plist_path})")
+ stopped_something = True
+ except Exception as e:
+ log_warn(f"Could not remove launchd gateway service: {e}")
+
+ return stopped_something
+
+
+def _is_default_hermes_home(hermes_home: Path) -> bool:
+ """Return True when ``hermes_home`` points at the default (non-profile) root."""
+ try:
+ from hermes_constants import get_default_hermes_root
+ return hermes_home.resolve() == get_default_hermes_root().resolve()
+ except Exception:
return False
+def _discover_named_profiles():
+ """Return a list of ``ProfileInfo`` for every non-default profile, or ``[]``
+ if profile support is unavailable or nothing is installed beyond the
+ default root."""
+ try:
+ from hermes_cli.profiles import list_profiles
+ except Exception:
+ return []
+ try:
+ return [p for p in list_profiles() if not getattr(p, "is_default", False)]
+ except Exception as e:
+ log_warn(f"Could not enumerate profiles: {e}")
+ return []
+
+
+def _uninstall_profile(profile) -> None:
+ """Fully uninstall a single named profile: stop its gateway service,
+ remove its alias wrapper, and wipe its HERMES_HOME directory.
+
+ We shell out to ``hermes -p gateway stop|uninstall`` because
+ service names, unit paths, and plist paths are all derived from the
+ current HERMES_HOME and can't be easily switched in-process.
+ """
+ import sys as _sys
+ name = profile.name
+ profile_home = profile.path
+
+ log_info(f"Uninstalling profile '{name}'...")
+
+ # 1. Stop and remove this profile's gateway service.
+ # Use `python -m hermes_cli.main` so we don't depend on a `hermes`
+ # wrapper that may be half-removed mid-uninstall.
+ hermes_invocation = [_sys.executable, "-m", "hermes_cli.main", "--profile", name]
+ for subcmd in ("stop", "uninstall"):
+ try:
+ subprocess.run(
+ hermes_invocation + ["gateway", subcmd],
+ capture_output=True,
+ text=True,
+ timeout=60,
+ check=False,
+ )
+ except subprocess.TimeoutExpired:
+ log_warn(f" Gateway {subcmd} timed out for '{name}'")
+ except Exception as e:
+ log_warn(f" Could not run gateway {subcmd} for '{name}': {e}")
+
+ # 2. Remove the wrapper alias script at ~/.local/bin/ (if any).
+ alias_path = getattr(profile, "alias_path", None)
+ if alias_path and alias_path.exists():
+ try:
+ alias_path.unlink()
+ log_success(f" Removed alias {alias_path}")
+ except Exception as e:
+ log_warn(f" Could not remove alias {alias_path}: {e}")
+
+ # 3. Wipe the profile's HERMES_HOME directory.
+ try:
+ if profile_home.exists():
+ shutil.rmtree(profile_home)
+ log_success(f" Removed {profile_home}")
+ except Exception as e:
+ log_warn(f" Could not remove {profile_home}: {e}")
+
+
def run_uninstall(args):
"""
Run the uninstall process.
@@ -181,7 +288,13 @@ def run_uninstall(args):
"""
project_root = get_project_root()
hermes_home = get_hermes_home()
-
+
+ # Detect named profiles when uninstalling from the default root —
+ # offer to clean them up too instead of leaving zombie HERMES_HOMEs
+ # and systemd units behind.
+ is_default_profile = _is_default_hermes_home(hermes_home)
+ named_profiles = _discover_named_profiles() if is_default_profile else []
+
print()
print(color("┌─────────────────────────────────────────────────────────┐", Colors.MAGENTA, Colors.BOLD))
print(color("│ ⚕ Hermes Agent Uninstaller │", Colors.MAGENTA, Colors.BOLD))
@@ -195,6 +308,13 @@ def run_uninstall(args):
print(f" Secrets: {hermes_home / '.env'}")
print(f" Data: {hermes_home / 'cron/'}, {hermes_home / 'sessions/'}, {hermes_home / 'logs/'}")
print()
+
+ if named_profiles:
+ print(color("Other profiles detected:", Colors.CYAN, Colors.BOLD))
+ for p in named_profiles:
+ running = " (gateway running)" if getattr(p, "gateway_running", False) else ""
+ print(f" • {p.name}{running}: {p.path}")
+ print()
# Ask for confirmation
print(color("Uninstall Options:", Colors.YELLOW, Colors.BOLD))
@@ -221,12 +341,40 @@ def run_uninstall(args):
return
full_uninstall = (choice == "2")
-
+
+ # When doing a full uninstall from the default profile, also offer to
+ # remove any named profiles — stopping their gateway services, unlinking
+ # their alias wrappers, and wiping their HERMES_HOME dirs. Otherwise
+ # those leave zombie services and data behind.
+ remove_profiles = False
+ if full_uninstall and named_profiles:
+ print()
+ print(color("Other profiles will NOT be removed by default.", Colors.YELLOW))
+ print(f"Found {len(named_profiles)} named profile(s): " +
+ ", ".join(p.name for p in named_profiles))
+ print()
+ try:
+ resp = input(color(
+ f"Also stop and remove these {len(named_profiles)} profile(s)? [y/N]: ",
+ Colors.BOLD
+ )).strip().lower()
+ except (KeyboardInterrupt, EOFError):
+ print()
+ print("Cancelled.")
+ return
+ remove_profiles = resp in ("y", "yes")
+
# Final confirmation
print()
if full_uninstall:
print(color("⚠️ WARNING: This will permanently delete ALL Hermes data!", Colors.RED, Colors.BOLD))
print(color(" Including: configs, API keys, sessions, scheduled jobs, logs", Colors.RED))
+ if remove_profiles:
+ print(color(
+ f" Plus {len(named_profiles)} profile(s): " +
+ ", ".join(p.name for p in named_profiles),
+ Colors.RED
+ ))
else:
print("This will remove the Hermes code but keep your configuration and data.")
@@ -247,12 +395,10 @@ def run_uninstall(args):
print(color("Uninstalling...", Colors.CYAN, Colors.BOLD))
print()
- # 1. Stop and uninstall gateway service
- log_info("Checking for gateway service...")
- if uninstall_gateway_service():
- log_success("Gateway service stopped and removed")
- else:
- log_info("No gateway service found")
+ # 1. Stop and uninstall gateway service + kill standalone processes
+ log_info("Checking for running gateway...")
+ if not uninstall_gateway_service():
+ log_info("No gateway service or processes found")
# 2. Remove PATH entries from shell configs
log_info("Removing PATH entries from shell configs...")
@@ -291,8 +437,17 @@ def run_uninstall(args):
log_warn(f"Could not fully remove {project_root}: {e}")
log_info("You may need to manually remove it")
- # 5. Optionally remove ~/.hermes/ data directory
+ # 5. Optionally remove ~/.hermes/ data directory (and named profiles)
if full_uninstall:
+ # 5a. Stop and remove each named profile's gateway service and
+ # alias wrapper. The profile HERMES_HOME dirs live under
+ # ``/profiles//`` and will be swept away by the
+ # rmtree below, but services + alias scripts live OUTSIDE the
+ # default root and have to be cleaned up explicitly.
+ if remove_profiles and named_profiles:
+ for prof in named_profiles:
+ _uninstall_profile(prof)
+
log_info("Removing configuration and data...")
try:
if hermes_home.exists():
diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py
index e5f2eb53767..110b81e4b5e 100644
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@@ -56,10 +56,10 @@ try:
except ImportError:
raise SystemExit(
"Web UI requires fastapi and uvicorn.\n"
- "Run 'hermes web' to auto-install, or: pip install hermes-agent[web]"
+ f"Install with: {sys.executable} -m pip install 'fastapi' 'uvicorn[standard]'"
)
-WEB_DIST = Path(__file__).parent / "web_dist"
+WEB_DIST = Path(os.environ["HERMES_WEB_DIST"]) if "HERMES_WEB_DIST" in os.environ else Path(__file__).parent / "web_dist"
_log = logging.getLogger(__name__)
app = FastAPI(title="Hermes Agent", version=__version__)
diff --git a/hermes_cli/webhook.py b/hermes_cli/webhook.py
index 8ff135e29e5..378f11b4a7e 100644
--- a/hermes_cli/webhook.py
+++ b/hermes_cli/webhook.py
@@ -155,6 +155,15 @@ def _cmd_subscribe(args):
"created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
}
+ if getattr(args, "deliver_only", False):
+ if route["deliver"] == "log":
+ print(
+ "Error: --deliver-only requires --deliver to be a real target "
+ "(telegram, discord, slack, github_comment, etc.) — not 'log'."
+ )
+ return
+ route["deliver_only"] = True
+
if args.deliver_chat_id:
route["deliver_extra"] = {"chat_id": args.deliver_chat_id}
@@ -172,9 +181,12 @@ def _cmd_subscribe(args):
else:
print(" Events: (all)")
print(f" Deliver: {route['deliver']}")
+ if route.get("deliver_only"):
+ print(" Mode: direct delivery (no agent, zero LLM cost)")
if route.get("prompt"):
prompt_preview = route["prompt"][:80] + ("..." if len(route["prompt"]) > 80 else "")
- print(f" Prompt: {prompt_preview}")
+ label = "Message" if route.get("deliver_only") else "Prompt"
+ print(f" {label}: {prompt_preview}")
print(f"\n Configure your service to POST to the URL above.")
print(f" Use the secret for HMAC-SHA256 signature validation.")
print(f" The gateway must be running to receive events (hermes gateway run).\n")
@@ -192,6 +204,8 @@ def _cmd_list(args):
for name, route in subs.items():
events = ", ".join(route.get("events", [])) or "(all)"
deliver = route.get("deliver", "log")
+ if route.get("deliver_only"):
+ deliver = f"{deliver} (direct — no agent)"
desc = route.get("description", "")
print(f" ◆ {name}")
if desc:
diff --git a/hermes_state.py b/hermes_state.py
index 5e563666e83..af97f7fbd89 100644
--- a/hermes_state.py
+++ b/hermes_state.py
@@ -987,6 +987,22 @@ class SessionDB:
return sanitized.strip()
+
+ @staticmethod
+ def _contains_cjk(text: str) -> bool:
+ """Check if text contains CJK (Chinese, Japanese, Korean) characters."""
+ for ch in text:
+ cp = ord(ch)
+ if (0x4E00 <= cp <= 0x9FFF or # CJK Unified Ideographs
+ 0x3400 <= cp <= 0x4DBF or # CJK Extension A
+ 0x20000 <= cp <= 0x2A6DF or # CJK Extension B
+ 0x3000 <= cp <= 0x303F or # CJK Symbols
+ 0x3040 <= cp <= 0x309F or # Hiragana
+ 0x30A0 <= cp <= 0x30FF or # Katakana
+ 0xAC00 <= cp <= 0xD7AF): # Hangul Syllables
+ return True
+ return False
+
def search_messages(
self,
query: str,
@@ -1062,8 +1078,47 @@ class SessionDB:
cursor = self._conn.execute(sql, params)
except sqlite3.OperationalError:
# FTS5 query syntax error despite sanitization — return empty
- return []
- matches = [dict(row) for row in cursor.fetchall()]
+ # unless query contains CJK (fall back to LIKE below)
+ if not self._contains_cjk(query):
+ return []
+ matches = []
+ else:
+ matches = [dict(row) for row in cursor.fetchall()]
+
+ # LIKE fallback for CJK queries: FTS5 default tokenizer splits CJK
+ # characters individually, causing multi-character queries to fail.
+ if not matches and self._contains_cjk(query):
+ raw_query = query.strip('"').strip()
+ like_where = ["m.content LIKE ?"]
+ like_params: list = [f"%{raw_query}%"]
+ if source_filter is not None:
+ like_where.append(f"s.source IN ({','.join('?' for _ in source_filter)})")
+ like_params.extend(source_filter)
+ if exclude_sources is not None:
+ like_where.append(f"s.source NOT IN ({','.join('?' for _ in exclude_sources)})")
+ like_params.extend(exclude_sources)
+ if role_filter:
+ like_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
+ like_params.extend(role_filter)
+ like_sql = f"""
+ SELECT m.id, m.session_id, m.role,
+ substr(m.content,
+ max(1, instr(m.content, ?) - 40),
+ 120) AS snippet,
+ m.content, m.timestamp, m.tool_name,
+ s.source, s.model, s.started_at AS session_started
+ FROM messages m
+ JOIN sessions s ON s.id = m.session_id
+ WHERE {' AND '.join(like_where)}
+ ORDER BY m.timestamp DESC
+ LIMIT ? OFFSET ?
+ """
+ like_params.extend([limit, offset])
+ # instr() parameter goes first in the bound list
+ like_params = [raw_query] + like_params
+ with self._lock:
+ like_cursor = self._conn.execute(like_sql, like_params)
+ matches = [dict(row) for row in like_cursor.fetchall()]
# Add surrounding context (1 message before + after each match).
# Done outside the lock so we don't hold it across N sequential queries.
diff --git a/mcp_serve.py b/mcp_serve.py
index e8294d1f91f..e0aeb706191 100644
--- a/mcp_serve.py
+++ b/mcp_serve.py
@@ -433,7 +433,7 @@ def create_mcp_server(event_bridge: Optional[EventBridge] = None) -> "FastMCP":
if not _MCP_SERVER_AVAILABLE:
raise ImportError(
"MCP server requires the 'mcp' package. "
- "Install with: pip install 'hermes-agent[mcp]'"
+ f"Install with: {sys.executable} -m pip install 'mcp'"
)
mcp = FastMCP(
@@ -838,7 +838,7 @@ def run_mcp_server(verbose: bool = False) -> None:
if not _MCP_SERVER_AVAILABLE:
print(
"Error: MCP server requires the 'mcp' package.\n"
- "Install with: pip install 'hermes-agent[mcp]'",
+ f"Install with: {sys.executable} -m pip install 'mcp'",
file=sys.stderr,
)
sys.exit(1)
diff --git a/model_tools.py b/model_tools.py
index 801255b7978..5ec806e78bf 100644
--- a/model_tools.py
+++ b/model_tools.py
@@ -274,9 +274,9 @@ def get_tool_definitions(
# execute_code" even when the API key isn't configured or the toolset is
# disabled (#560-discord).
if "execute_code" in available_tool_names:
- from tools.code_execution_tool import SANDBOX_ALLOWED_TOOLS, build_execute_code_schema
+ from tools.code_execution_tool import SANDBOX_ALLOWED_TOOLS, build_execute_code_schema, _get_execution_mode
sandbox_enabled = SANDBOX_ALLOWED_TOOLS & available_tool_names
- dynamic_schema = build_execute_code_schema(sandbox_enabled)
+ dynamic_schema = build_execute_code_schema(sandbox_enabled, mode=_get_execution_mode())
for i, td in enumerate(filtered_tools):
if td.get("function", {}).get("name") == "execute_code":
filtered_tools[i] = {"type": "function", "function": dynamic_schema}
diff --git a/nix/checks.nix b/nix/checks.nix
index 55068a94f16..984016a4f47 100644
--- a/nix/checks.nix
+++ b/nix/checks.nix
@@ -37,7 +37,30 @@ json.dump(sorted(leaf_paths(DEFAULT_CONFIG)), sys.stdout, indent=2)
in {
packages.configKeys = configKeys;
- checks = lib.optionalAttrs pkgs.stdenv.hostPlatform.isLinux {
+ checks = {
+ # Cross-platform evaluation — catches "not supported for interpreter"
+ # errors (e.g. sphinx dropping python311) without needing a darwin builder.
+ # Evaluation is pure and instant; it doesn't build anything.
+ cross-eval = let
+ targetSystems = builtins.filter
+ (s: inputs.self.packages ? ${s})
+ [ "x86_64-linux" "aarch64-linux" "aarch64-darwin" "x86_64-darwin" ];
+ tryEvalPkg = sys:
+ let pkg = inputs.self.packages.${sys}.default;
+ in builtins.tryEval (builtins.seq pkg.drvPath true);
+ results = map (sys: { inherit sys; result = tryEvalPkg sys; }) targetSystems;
+ failures = builtins.filter (r: !r.result.success) results;
+ failMsg = lib.concatMapStringsSep "\n" (r: " - ${r.sys}") failures;
+ in pkgs.runCommand "hermes-cross-eval" { } (
+ if failures != [] then
+ builtins.throw "Package fails to evaluate on:\n${failMsg}"
+ else ''
+ echo "PASS: package evaluates on all ${toString (builtins.length targetSystems)} platforms"
+ mkdir -p $out
+ echo "ok" > $out/result
+ ''
+ );
+ } // lib.optionalAttrs pkgs.stdenv.hostPlatform.isLinux {
# Verify binaries exist and are executable
package-contents = pkgs.runCommand "hermes-package-contents" { } ''
set -e
@@ -125,6 +148,29 @@ json.dump(sorted(leaf_paths(DEFAULT_CONFIG)), sys.stdout, indent=2)
echo "ok" > $out/result
'';
+ # Verify HERMES_NODE is set in wrapper and points to Node 20+
+ # (string-width uses the /v regex flag which requires Node 20+)
+ hermes-node = pkgs.runCommand "hermes-node-version" { } ''
+ set -e
+ echo "=== Checking HERMES_NODE in wrapper ==="
+ grep -q "HERMES_NODE" ${hermes-agent}/bin/hermes || \
+ (echo "FAIL: HERMES_NODE not set in wrapper"; exit 1)
+ echo "PASS: HERMES_NODE present in wrapper"
+
+ HERMES_NODE=$(sed -n "s/^export HERMES_NODE='\(.*\)'/\1/p" ${hermes-agent}/bin/hermes)
+ test -x "$HERMES_NODE" || (echo "FAIL: HERMES_NODE=$HERMES_NODE not executable"; exit 1)
+ echo "PASS: HERMES_NODE executable at $HERMES_NODE"
+
+ NODE_MAJOR=$("$HERMES_NODE" --version | sed 's/^v//' | cut -d. -f1)
+ test "$NODE_MAJOR" -ge 20 || \
+ (echo "FAIL: Node v$NODE_MAJOR < 20, TUI needs /v regex flag support"; exit 1)
+ echo "PASS: Node v$NODE_MAJOR >= 20"
+
+ echo "=== All HERMES_NODE checks passed ==="
+ mkdir -p $out
+ echo "ok" > $out/result
+ '';
+
# Verify HERMES_MANAGED guard works on all mutation commands
managed-guard = pkgs.runCommand "hermes-managed-guard" { } ''
set -e
diff --git a/nix/devShell.nix b/nix/devShell.nix
index db39c9d9557..63edc59cf1e 100644
--- a/nix/devShell.nix
+++ b/nix/devShell.nix
@@ -12,7 +12,7 @@
devShells.default = pkgs.mkShell {
inputsFrom = packages;
packages = with pkgs; [
- python311 uv nodejs_22 ripgrep git openssh ffmpeg
+ python312 uv nodejs_22 ripgrep git openssh ffmpeg
];
shellHook = let
diff --git a/nix/nixosModules.nix b/nix/nixosModules.nix
index 75b3dca31b2..3f2709f8145 100644
--- a/nix/nixosModules.nix
+++ b/nix/nixosModules.nix
@@ -121,11 +121,19 @@
# ── Provision apt packages (first boot only, cached in writable layer) ──
# sudo: agent self-modification
# nodejs/npm: writable node so npm i -g works (nix store copies are read-only)
- # curl: needed for uv installer
+ # Node 22 via NodeSource — Ubuntu 24.04 ships Node 18 which is EOL.
+ # curl: needed for uv installer + NodeSource setup
if [ ! -f /var/lib/hermes-tools-provisioned ] && command -v apt-get >/dev/null 2>&1; then
echo "First boot: provisioning agent tools..."
apt-get update -qq
- apt-get install -y -qq sudo nodejs npm curl
+ apt-get install -y -qq sudo curl ca-certificates gnupg
+ mkdir -p /etc/apt/keyrings
+ curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key \
+ | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg
+ echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_22.x nodistro main" \
+ > /etc/apt/sources.list.d/nodesource.list
+ apt-get update -qq
+ apt-get install -y -qq nodejs
touch /var/lib/hermes-tools-provisioned
fi
@@ -140,15 +148,14 @@
su -s /bin/sh "$TARGET_USER" -c 'curl -LsSf https://astral.sh/uv/install.sh | sh' || true
fi
- # Python 3.11 venv — gives the agent a writable Python with pip.
- # Uses uv to install Python 3.11 (Ubuntu 24.04 ships 3.12).
+ # Python 3.12 venv — gives the agent a writable Python with pip.
# --seed includes pip/setuptools so bare `pip install` works.
_UV_BIN="$TARGET_HOME/.local/bin/uv"
if [ ! -d "$TARGET_HOME/.venv" ] && [ -x "$_UV_BIN" ]; then
su -s /bin/sh "$TARGET_USER" -c "
export PATH=\"\$HOME/.local/bin:\$PATH\"
- uv python install 3.11
- uv venv --python 3.11 --seed \"\$HOME/.venv\"
+ uv python install 3.12
+ uv venv --python 3.12 --seed \"\$HOME/.venv\"
" || true
fi
@@ -171,7 +178,7 @@
# Package and entrypoint use stable symlinks (current-package, current-entrypoint)
# so they can update without recreation. Env vars go through $HERMES_HOME/.env.
containerIdentity = builtins.hashString "sha256" (builtins.toJSON {
- schema = 3; # bump when identity inputs change
+ schema = 4; # bump when identity inputs change (4: Node 18→22 via NodeSource)
image = cfg.container.image;
extraVolumes = cfg.container.extraVolumes;
extraOptions = cfg.container.extraOptions;
diff --git a/nix/packages.nix b/nix/packages.nix
index f39d9d0b2be..912be7843bd 100644
--- a/nix/packages.nix
+++ b/nix/packages.nix
@@ -18,6 +18,10 @@
filter = path: _type: !(pkgs.lib.hasInfix "/index-cache/" path);
};
+ hermesWeb = pkgs.callPackage ./web.nix {
+ npm-lockfile-fix = inputs'.npm-lockfile-fix.packages.default;
+ };
+
runtimeDeps = with pkgs; [
nodejs_22
ripgrep
@@ -52,6 +56,7 @@
mkdir -p $out/share/hermes-agent $out/bin
cp -r ${bundledSkills} $out/share/hermes-agent/skills
+ cp -r ${hermesWeb} $out/share/hermes-agent/web_dist
# copy pre-built TUI (same layout as dev: ui-tui/dist/ + node_modules/)
mkdir -p $out/ui-tui
@@ -62,8 +67,10 @@
makeWrapper ${hermesVenv}/bin/${name} $out/bin/${name} \
--suffix PATH : "${runtimePath}" \
--set HERMES_BUNDLED_SKILLS $out/share/hermes-agent/skills \
+ --set HERMES_WEB_DIST $out/share/hermes-agent/web_dist \
--set HERMES_TUI_DIR $out/ui-tui \
- --set HERMES_PYTHON ${hermesVenv}/bin/python3
+ --set HERMES_PYTHON ${hermesVenv}/bin/python3 \
+ --set HERMES_NODE ${pkgs.nodejs_22}/bin/node
'')
[
"hermes"
@@ -80,7 +87,7 @@
STAMP_VALUE="${pyprojectHash}:${uvLockHash}"
if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
echo "hermes-agent: installing Python dependencies..."
- uv venv .venv --python ${pkgs.python311}/bin/python3 2>/dev/null || true
+ uv venv .venv --python ${pkgs.python312}/bin/python3 2>/dev/null || true
source .venv/bin/activate
uv pip install -e ".[all]"
[ -d mini-swe-agent ] && uv pip install -e ./mini-swe-agent 2>/dev/null || true
@@ -103,6 +110,7 @@
};
tui = hermesTui;
+ web = hermesWeb;
};
};
}
diff --git a/nix/python.nix b/nix/python.nix
index 160b4ee790b..0bcd017e76d 100644
--- a/nix/python.nix
+++ b/nix/python.nix
@@ -1,6 +1,6 @@
# nix/python.nix — uv2nix virtual environment builder
{
- python311,
+ python312,
lib,
callPackage,
uv2nix,
@@ -35,30 +35,46 @@ let
};
};
+ # Legacy alibabacloud packages ship only sdists with setup.py/setup.cfg
+ # and no pyproject.toml, so setuptools isn't declared as a build dep.
+ buildSystemOverrides = final: prev: builtins.mapAttrs
+ (name: _: prev.${name}.overrideAttrs (old: {
+ nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [ final.setuptools ];
+ }))
+ (lib.genAttrs [
+ "alibabacloud-credentials-api"
+ "alibabacloud-endpoint-util"
+ "alibabacloud-gateway-dingtalk"
+ "alibabacloud-gateway-spi"
+ "alibabacloud-tea"
+ ] (_: null));
+
pythonPackageOverrides = final: _prev:
if isAarch64Darwin then {
- numpy = mkPrebuiltOverride final python311.pkgs.numpy { };
+ numpy = mkPrebuiltOverride final python312.pkgs.numpy { };
- av = mkPrebuiltOverride final python311.pkgs.av { };
+ pyarrow = mkPrebuiltOverride final python312.pkgs.pyarrow { };
- humanfriendly = mkPrebuiltOverride final python311.pkgs.humanfriendly { };
+ av = mkPrebuiltOverride final python312.pkgs.av { };
- coloredlogs = mkPrebuiltOverride final python311.pkgs.coloredlogs {
+ humanfriendly = mkPrebuiltOverride final python312.pkgs.humanfriendly { };
+
+ coloredlogs = mkPrebuiltOverride final python312.pkgs.coloredlogs {
humanfriendly = [ ];
};
- onnxruntime = mkPrebuiltOverride final python311.pkgs.onnxruntime {
+ onnxruntime = mkPrebuiltOverride final python312.pkgs.onnxruntime {
coloredlogs = [ ];
numpy = [ ];
packaging = [ ];
};
- ctranslate2 = mkPrebuiltOverride final python311.pkgs.ctranslate2 {
+ ctranslate2 = mkPrebuiltOverride final python312.pkgs.ctranslate2 {
numpy = [ ];
pyyaml = [ ];
};
- faster-whisper = mkPrebuiltOverride final python311.pkgs.faster-whisper {
+ faster-whisper = mkPrebuiltOverride final python312.pkgs.faster-whisper {
av = [ ];
ctranslate2 = [ ];
huggingface-hub = [ ];
@@ -70,11 +86,12 @@ let
pythonSet =
(callPackage pyproject-nix.build.packages {
- python = python311;
+ python = python312;
}).overrideScope
(lib.composeManyExtensions [
pyproject-build-systems.overlays.default
overlay
+ buildSystemOverrides
pythonPackageOverrides
]);
in
diff --git a/nix/tui.nix b/nix/tui.nix
index 70eb67f949a..7303edecb9f 100644
--- a/nix/tui.nix
+++ b/nix/tui.nix
@@ -4,7 +4,7 @@ let
src = ../ui-tui;
npmDeps = pkgs.fetchNpmDeps {
inherit src;
- hash = "sha256-zsUPmbC6oMUO10EhS3ptvDjwlfpCSEmrkjyeORw7fac=";
+ hash = "sha256-mG3vpgGi4ljt4X3XIf3I/5mIcm+rVTUAmx2DQ6YVA90=";
};
packageJson = builtins.fromJSON (builtins.readFile (src + "/package.json"));
@@ -18,11 +18,6 @@ pkgs.buildNpmPackage {
doCheck = false;
- postPatch = ''
- # fetchNpmDeps strips the trailing newline; match it so the diff passes
- sed -i -z 's/\n$//' package-lock.json
- '';
-
installPhase = ''
runHook preInstall
diff --git a/nix/web.nix b/nix/web.nix
new file mode 100644
index 00000000000..247889753f6
--- /dev/null
+++ b/nix/web.nix
@@ -0,0 +1,63 @@
+# nix/web.nix — Hermes Web Dashboard (Vite/React) frontend build
+{ pkgs, npm-lockfile-fix, ... }:
+let
+ src = ../web;
+ npmDeps = pkgs.fetchNpmDeps {
+ inherit src;
+ hash = "sha256-Y0pOzdFG8BLjfvCLmsvqYpjxFjAQabXp1i7X9W/cCU4=";
+ };
+
+ npmLockHash = builtins.hashString "sha256" (builtins.readFile ../web/package-lock.json);
+in
+pkgs.buildNpmPackage {
+ pname = "hermes-web";
+ version = "0.0.0";
+ inherit src npmDeps;
+
+ doCheck = false;
+
+ buildPhase = ''
+ npx tsc -b
+ npx vite build --outDir dist
+ '';
+
+ installPhase = ''
+ runHook preInstall
+ cp -r dist $out
+ runHook postInstall
+ '';
+
+ nativeBuildInputs = [
+ (pkgs.writeShellScriptBin "update_web_lockfile" ''
+ set -euox pipefail
+
+ REPO_ROOT=$(git rev-parse --show-toplevel)
+
+ cd "$REPO_ROOT/web"
+ rm -rf node_modules/
+ npm cache clean --force
+ CI=true npm install
+ ${pkgs.lib.getExe npm-lockfile-fix} ./package-lock.json
+
+ NIX_FILE="$REPO_ROOT/nix/web.nix"
+ sed -i "s/hash = \"[^\"]*\";/hash = \"\";/" $NIX_FILE
+ NIX_OUTPUT=$(nix build .#web 2>&1 || true)
+ NEW_HASH=$(echo "$NIX_OUTPUT" | grep 'got:' | awk '{print $2}')
+ echo got new hash $NEW_HASH
+ sed -i "s|hash = \"[^\"]*\";|hash = \"$NEW_HASH\";|" $NIX_FILE
+ nix build .#web
+ echo "Updated npm hash in $NIX_FILE to $NEW_HASH"
+ '')
+ ];
+
+ passthru.devShellHook = ''
+ STAMP=".nix-stamps/hermes-web"
+ STAMP_VALUE="${npmLockHash}"
+ if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
+ echo "hermes-web: installing npm dependencies..."
+ cd web && CI=true npm install --silent --no-fund --no-audit 2>/dev/null && cd ..
+ mkdir -p .nix-stamps
+ echo "$STAMP_VALUE" > "$STAMP"
+ fi
+ '';
+}
diff --git a/optional-skills/autonomous-ai-agents/honcho/SKILL.md b/optional-skills/autonomous-ai-agents/honcho/SKILL.md
index c60d2c63561..1c099ca605f 100644
--- a/optional-skills/autonomous-ai-agents/honcho/SKILL.md
+++ b/optional-skills/autonomous-ai-agents/honcho/SKILL.md
@@ -145,10 +145,10 @@ Controls **how often** dialectic and context calls happen.
| Key | Default | Description |
|-----|---------|-------------|
| `contextCadence` | `1` | Min turns between context API calls |
-| `dialecticCadence` | `3` | Min turns between dialectic API calls |
+| `dialecticCadence` | `2` | Min turns between dialectic API calls. Recommended 1–5 |
| `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` for base context injection |
-Higher cadence values reduce API calls and cost. `dialecticCadence: 3` (default) means the dialectic engine fires at most every 3rd turn.
+Higher cadence values fire the dialectic LLM less often. `dialecticCadence: 2` means the engine fires every other turn. Setting it to `1` fires every turn.
### Depth (how many)
@@ -180,6 +180,8 @@ If `dialecticDepthLevels` is omitted, rounds use **proportional levels** derived
This keeps earlier passes cheap while using full depth on the final synthesis.
+**Depth at session start.** The session-start prewarm runs the full configured `dialecticDepth` in the background before turn 1. A single-pass prewarm on a cold peer often returns thin output — multi-pass depth runs the audit/reconcile cycle before the user ever speaks. Turn 1 consumes the prewarm result directly; if prewarm hasn't landed in time, turn 1 falls back to a synchronous call with a bounded timeout.
+
### Level (how hard)
Controls the **intensity** of each dialectic reasoning round.
@@ -368,7 +370,7 @@ Config file: `$HERMES_HOME/honcho.json` (profile-local) or `~/.honcho/config.jso
| `contextTokens` | uncapped | Max tokens for the combined base context injection (summary + representation + card). Opt-in cap — omit to leave uncapped, set to an integer to bound injection size. |
| `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` |
| `contextCadence` | `1` | Min turns between context API calls |
-| `dialecticCadence` | `3` | Min turns between dialectic LLM calls |
+| `dialecticCadence` | `2` | Min turns between dialectic LLM calls (recommended 1–5) |
The `contextTokens` budget is enforced at injection time. If the session summary + representation + card exceed the budget, Honcho trims the summary first, then the representation, preserving the card. This prevents context blowup in long sessions.
diff --git a/optional-skills/creative/touchdesigner-mcp/SKILL.md b/optional-skills/creative/touchdesigner-mcp/SKILL.md
new file mode 100644
index 00000000000..d0bd348afc4
--- /dev/null
+++ b/optional-skills/creative/touchdesigner-mcp/SKILL.md
@@ -0,0 +1,339 @@
+---
+name: touchdesigner-mcp
+description: "Control a running TouchDesigner instance via twozero MCP — create operators, set parameters, wire connections, execute Python, build real-time visuals. 36 native tools."
+version: 1.0.0
+author: kshitijk4poor
+license: MIT
+metadata:
+ hermes:
+ tags: [TouchDesigner, MCP, twozero, creative-coding, real-time-visuals, generative-art, audio-reactive, VJ, installation, GLSL]
+ related_skills: [native-mcp, ascii-video, manim-video, hermes-video]
+
+---
+
+# TouchDesigner Integration (twozero MCP)
+
+## CRITICAL RULES
+
+1. **NEVER guess parameter names.** Call `td_get_par_info` for the op type FIRST. Your training data is wrong for TD 2025.32.
+2. **If `tdAttributeError` fires, STOP.** Call `td_get_operator_info` on the failing node before continuing.
+3. **NEVER hardcode absolute paths** in script callbacks. Use `me.parent()` / `scriptOp.parent()`.
+4. **Prefer native MCP tools over td_execute_python.** Use `td_create_operator`, `td_set_operator_pars`, `td_get_errors` etc. Only fall back to `td_execute_python` for complex multi-step logic.
+5. **Call `td_get_hints` before building.** It returns patterns specific to the op type you're working with.
+
+## Architecture
+
+```
+Hermes Agent -> MCP (Streamable HTTP) -> twozero.tox (port 40404) -> TD Python
+```
+
+36 native tools. Free plugin (no payment/license — confirmed April 2026).
+Context-aware (knows selected OP, current network).
+Hub health check: `GET http://localhost:40404/mcp` returns JSON with instance PID, project name, TD version.
+
+## Setup (Automated)
+
+Run the setup script to handle everything:
+
+```bash
+bash "${HERMES_HOME:-$HOME/.hermes}/skills/creative/touchdesigner-mcp/scripts/setup.sh"
+```
+
+The script will:
+1. Check if TD is running
+2. Download twozero.tox if not already cached
+3. Add `twozero_td` MCP server to Hermes config (if missing)
+4. Test the MCP connection on port 40404
+5. Report what manual steps remain (drag .tox into TD, enable MCP toggle)
+
+### Manual steps (one-time, cannot be automated)
+
+1. **Drag `~/Downloads/twozero.tox` into the TD network editor** → click Install
+2. **Enable MCP:** click twozero icon → Settings → mcp → "auto start MCP" → Yes
+3. **Restart Hermes session** to pick up the new MCP server
+
+After setup, verify:
+```bash
+nc -z 127.0.0.1 40404 && echo "twozero MCP: READY"
+```
+
+## Environment Notes
+
+- **Non-Commercial TD** caps resolution at 1280×1280. Use `outputresolution = 'custom'` and set width/height explicitly.
+- **Codecs:** `prores` (preferred on macOS) or `mjpa` as fallback. H.264/H.265/AV1 require a Commercial license.
+- Always call `td_get_par_info` before setting params — names vary by TD version (see CRITICAL RULES #1).
+
+## Workflow
+
+### Step 0: Discover (before building anything)
+
+```
+Call td_get_par_info with op_type for each type you plan to use.
+Call td_get_hints with the topic you're building (e.g. "glsl", "audio reactive", "feedback").
+Call td_get_focus to see where the user is and what's selected.
+Call td_get_network to see what already exists.
+```
+
+No temp nodes, no cleanup. This replaces the old discovery dance entirely.
+
+### Step 1: Clean + Build
+
+**IMPORTANT: Split cleanup and creation into SEPARATE MCP calls.** Destroying and recreating same-named nodes in one `td_execute_python` script causes "Invalid OP object" errors. See pitfalls #11b.
+
+Use `td_create_operator` for each node (handles viewport positioning automatically):
+
+```
+td_create_operator(type="noiseTOP", parent="/project1", name="bg", parameters={"resolutionw": 1280, "resolutionh": 720})
+td_create_operator(type="levelTOP", parent="/project1", name="brightness")
+td_create_operator(type="nullTOP", parent="/project1", name="out")
+```
+
+For bulk creation or wiring, use `td_execute_python`:
+
+```python
+# td_execute_python script:
+root = op('/project1')
+nodes = []
+for name, optype in [('bg', noiseTOP), ('fx', levelTOP), ('out', nullTOP)]:
+ n = root.create(optype, name)
+ nodes.append(n.path)
+# Wire chain
+for i in range(len(nodes)-1):
+ op(nodes[i]).outputConnectors[0].connect(op(nodes[i+1]).inputConnectors[0])
+result = {'created': nodes}
+```
+
+### Step 2: Set Parameters
+
+Prefer the native tool (validates params, won't crash):
+
+```
+td_set_operator_pars(path="/project1/bg", parameters={"roughness": 0.6, "monochrome": true})
+```
+
+For expressions or modes, use `td_execute_python`:
+
+```python
+op('/project1/time_driver').par.colorr.expr = "absTime.seconds % 1000.0"
+```
+
+### Step 3: Wire
+
+Use `td_execute_python` — no native wire tool exists:
+
+```python
+op('/project1/bg').outputConnectors[0].connect(op('/project1/fx').inputConnectors[0])
+```
+
+### Step 4: Verify
+
+```
+td_get_errors(path="/project1", recursive=true)
+td_get_perf()
+td_get_operator_info(path="/project1/out", detail="full")
+```
+
+### Step 5: Display / Capture
+
+```
+td_get_screenshot(path="/project1/out")
+```
+
+Or open a window via script:
+
+```python
+win = op('/project1').create(windowCOMP, 'display')
+win.par.winop = op('/project1/out').path
+win.par.winw = 1280; win.par.winh = 720
+win.par.winopen.pulse()
+```
+
+## MCP Tool Quick Reference
+
+**Core (use these most):**
+| Tool | What |
+|------|------|
+| `td_execute_python` | Run arbitrary Python in TD. Full API access. |
+| `td_create_operator` | Create node with params + auto-positioning |
+| `td_set_operator_pars` | Set params safely (validates, won't crash) |
+| `td_get_operator_info` | Inspect one node: connections, params, errors |
+| `td_get_operators_info` | Inspect multiple nodes in one call |
+| `td_get_network` | See network structure at a path |
+| `td_get_errors` | Find errors/warnings recursively |
+| `td_get_par_info` | Get param names for an OP type (replaces discovery) |
+| `td_get_hints` | Get patterns/tips before building |
+| `td_get_focus` | What network is open, what's selected |
+
+**Read/Write:**
+| Tool | What |
+|------|------|
+| `td_read_dat` | Read DAT text content |
+| `td_write_dat` | Write/patch DAT content |
+| `td_read_chop` | Read CHOP channel values |
+| `td_read_textport` | Read TD console output |
+
+**Visual:**
+| Tool | What |
+|------|------|
+| `td_get_screenshot` | Capture one OP viewer to file |
+| `td_get_screenshots` | Capture multiple OPs at once |
+| `td_get_screen_screenshot` | Capture actual screen via TD |
+| `td_navigate_to` | Jump network editor to an OP |
+
+**Search:**
+| Tool | What |
+|------|------|
+| `td_find_op` | Find ops by name/type across project |
+| `td_search` | Search code, expressions, string params |
+
+**System:**
+| Tool | What |
+|------|------|
+| `td_get_perf` | Performance profiling (FPS, slow ops) |
+| `td_list_instances` | List all running TD instances |
+| `td_get_docs` | In-depth docs on a TD topic |
+| `td_agents_md` | Read/write per-COMP markdown docs |
+| `td_reinit_extension` | Reload extension after code edit |
+| `td_clear_textport` | Clear console before debug session |
+
+**Input Automation:**
+| Tool | What |
+|------|------|
+| `td_input_execute` | Send mouse/keyboard to TD |
+| `td_input_status` | Poll input queue status |
+| `td_input_clear` | Stop input automation |
+| `td_op_screen_rect` | Get screen coords of a node |
+| `td_click_screen_point` | Click a point in a screenshot |
+
+See `references/mcp-tools.md` for full parameter schemas.
+
+## Key Implementation Rules
+
+**GLSL time:** No `uTDCurrentTime` in GLSL TOP. Use the Values page:
+```python
+# Call td_get_par_info(op_type="glslTOP") first to confirm param names
+td_set_operator_pars(path="/project1/shader", parameters={"value0name": "uTime"})
+# Then set expression via script:
+# op('/project1/shader').par.value0.expr = "absTime.seconds"
+# In GLSL: uniform float uTime;
+```
+
+Fallback: Constant TOP in `rgba32float` format (8-bit clamps to 0-1, freezing the shader).
+
+**Feedback TOP:** Use `top` parameter reference, not direct input wire. "Not enough sources" resolves after first cook. "Cook dependency loop" warning is expected.
+
+**Resolution:** Non-Commercial caps at 1280×1280. Use `outputresolution = 'custom'`.
+
+**Large shaders:** Write GLSL to `/tmp/file.glsl`, then use `td_write_dat` or `td_execute_python` to load.
+
+**Vertex/Point access (TD 2025.32):** `point.P[0]`, `point.P[1]`, `point.P[2]` — NOT `.x`, `.y`, `.z`.
+
+**Extensions:** `ext0object` format is `"op('./datName').module.ClassName(me)"` in CONSTANT mode. After editing extension code with `td_write_dat`, call `td_reinit_extension`.
+
+**Script callbacks:** ALWAYS use relative paths via `me.parent()` / `scriptOp.parent()`.
+
+**Cleaning nodes:** Always `list(root.children)` before iterating + `child.valid` check.
+
+## Recording / Exporting Video
+
+```python
+# via td_execute_python:
+root = op('/project1')
+rec = root.create(moviefileoutTOP, 'recorder')
+op('/project1/out').outputConnectors[0].connect(rec.inputConnectors[0])
+rec.par.type = 'movie'
+rec.par.file = '/tmp/output.mov'
+rec.par.videocodec = 'prores' # Apple ProRes — NOT license-restricted on macOS
+rec.par.record = True # start
+# rec.par.record = False # stop (call separately later)
+```
+
+H.264/H.265/AV1 need Commercial license. Use `prores` on macOS or `mjpa` as fallback.
+Extract frames: `ffmpeg -i /tmp/output.mov -vframes 120 /tmp/frames/frame_%06d.png`
+
+**TOP.save() is useless for animation** — captures same GPU texture every time. Always use MovieFileOut.
+
+### Before Recording: Checklist
+
+1. **Verify FPS > 0** via `td_get_perf`. If FPS=0 the recording will be empty. See pitfalls #38-39.
+2. **Verify shader output is not black** via `td_get_screenshot`. Black output = shader error or missing input. See pitfalls #8, #40.
+3. **If recording with audio:** cue audio to start first, then delay recording by 3 frames. See pitfalls #19.
+4. **Set output path before starting record** — setting both in the same script can race.
+
+## Audio-Reactive GLSL (Proven Recipe)
+
+### Correct signal chain (tested April 2026)
+
+```
+AudioFileIn CHOP (playmode=sequential)
+ → AudioSpectrum CHOP (FFT=512, outputmenu=setmanually, outlength=256, timeslice=ON)
+ → Math CHOP (gain=10)
+ → CHOP to TOP (dataformat=r, layout=rowscropped)
+ → GLSL TOP input 1 (spectrum texture, 256x2)
+
+Constant TOP (rgba32float, time) → GLSL TOP input 0
+GLSL TOP → Null TOP → MovieFileOut
+```
+
+### Critical audio-reactive rules (empirically verified)
+
+1. **TimeSlice must stay ON** for AudioSpectrum. OFF = processes entire audio file → 24000+ samples → CHOP to TOP overflow.
+2. **Set Output Length manually** to 256 via `outputmenu='setmanually'` and `outlength=256`. Default outputs 22050 samples.
+3. **DO NOT use Lag CHOP for spectrum smoothing.** Lag CHOP operates in timeslice mode and expands 256 samples to 2400+, averaging all values to near-zero (~1e-06). The shader receives no usable data. This was the #1 audio sync failure in testing.
+4. **DO NOT use Filter CHOP either** — same timeslice expansion problem with spectrum data.
+5. **Smoothing belongs in the GLSL shader** if needed, via temporal lerp with a feedback texture: `mix(prevValue, newValue, 0.3)`. This gives frame-perfect sync with zero pipeline latency.
+6. **CHOP to TOP dataformat = 'r'**, layout = 'rowscropped'. Spectrum output is 256x2 (stereo). Sample at y=0.25 for first channel.
+7. **Math gain = 10** (not 5). Raw spectrum values are ~0.19 in bass range. Gain of 10 gives usable ~5.0 for the shader.
+8. **No Resample CHOP needed.** Control output size via AudioSpectrum's `outlength` param directly.
+
+### GLSL spectrum sampling
+
+```glsl
+// Input 0 = time (1x1 rgba32float), Input 1 = spectrum (256x2)
+float iTime = texture(sTD2DInputs[0], vec2(0.5)).r;
+
+// Sample multiple points per band and average for stability:
+// NOTE: y=0.25 for first channel (stereo texture is 256x2, first row center is 0.25)
+float bass = (texture(sTD2DInputs[1], vec2(0.02, 0.25)).r +
+ texture(sTD2DInputs[1], vec2(0.05, 0.25)).r) / 2.0;
+float mid = (texture(sTD2DInputs[1], vec2(0.2, 0.25)).r +
+ texture(sTD2DInputs[1], vec2(0.35, 0.25)).r) / 2.0;
+float hi = (texture(sTD2DInputs[1], vec2(0.6, 0.25)).r +
+ texture(sTD2DInputs[1], vec2(0.8, 0.25)).r) / 2.0;
+```
+
+See `references/network-patterns.md` for complete build scripts + shader code.
+
+## Operator Quick Reference
+
+| Family | Color | Python class / MCP type | Suffix |
+|--------|-------|-------------|--------|
+| TOP | Purple | noiseTOP, glslTOP, compositeTOP, levelTop, blurTOP, textTOP, nullTOP | TOP |
+| CHOP | Green | audiofileinCHOP, audiospectrumCHOP, mathCHOP, lfoCHOP, constantCHOP | CHOP |
+| SOP | Blue | gridSOP, sphereSOP, transformSOP, noiseSOP | SOP |
+| DAT | White | textDAT, tableDAT, scriptDAT, webserverDAT | DAT |
+| MAT | Yellow | phongMAT, pbrMAT, glslMAT, constMAT | MAT |
+| COMP | Gray | geometryCOMP, containerCOMP, cameraCOMP, lightCOMP, windowCOMP | COMP |
+
+## Security Notes
+
+- MCP runs on localhost only (port 40404). No authentication — any local process can send commands.
+- `td_execute_python` has unrestricted access to the TD Python environment and filesystem as the TD process user.
+- `setup.sh` downloads twozero.tox from the official 404zero.com URL. Verify the download if concerned.
+- The skill never sends data outside localhost. All MCP communication is local.
+
+## References
+
+| File | What |
+|------|------|
+| `references/pitfalls.md` | Hard-won lessons from real sessions |
+| `references/operators.md` | All operator families with params and use cases |
+| `references/network-patterns.md` | Recipes: audio-reactive, generative, GLSL, instancing |
+| `references/mcp-tools.md` | Full twozero MCP tool parameter schemas |
+| `references/python-api.md` | TD Python: op(), scripting, extensions |
+| `references/troubleshooting.md` | Connection diagnostics, debugging |
+| `scripts/setup.sh` | Automated setup script |
+
+---
+
+> You're not writing code. You're conducting light.
diff --git a/optional-skills/creative/touchdesigner-mcp/references/mcp-tools.md b/optional-skills/creative/touchdesigner-mcp/references/mcp-tools.md
new file mode 100644
index 00000000000..ec90076cb2b
--- /dev/null
+++ b/optional-skills/creative/touchdesigner-mcp/references/mcp-tools.md
@@ -0,0 +1,382 @@
+# twozero MCP Tools Reference
+
+36 tools from twozero MCP v2.774+ (April 2026).
+All tools accept an optional `target_instance` param for multi-TD-instance scenarios.
+
+## Execution & Scripting
+
+### td_execute_python
+
+Execute Python code inside TouchDesigner and return the result. Has full access to TD Python API (op, project, app, etc). Print statements and the last expression value are captured. Best for: wiring connections (inputConnectors), setting expressions (par.X.expr/mode), querying parameter names, and batch creation scripts (5+ operators). For creating 1-4 operators, prefer td_create_operator instead.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `code` | string | yes | Python code to execute in TouchDesigner |
+
+## Network & Structure
+
+### td_get_network
+
+Get the operator network structure in TouchDesigner (TD) at a given path. Returns compact list: name OPType flags. First line is full path of queried op. Flags: ch:N=children count, !cook=allowCooking off, bypass, private=isPrivate, blocked:reason, "comment text". depth=0 (default) = current level only. depth=1 = one level of children (indented). To explore deeper, call again on a specific COMP path. System operators (/ui, /sys) are hidden by default.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | no | Network path to inspect, e.g. '/' or '/project1' |
+| `depth` | integer | no | How many levels deep to recurse. 0=current level only (recommended), 1=include direct children of COMPs |
+| `includeSystem` | boolean | no | Include system operators (/ui, /sys). Default false. |
+| `nodeXY` | boolean | no | Include nodeX,nodeY coordinates. Default false. |
+
+### td_create_operator
+
+Create a new operator (node) in TouchDesigner (TD). Preferred way to create operators — handles viewport positioning, viewer flag, and docked ops automatically. For batch creation (5+ ops), you may use td_execute_python with a script instead, but then call td_get_hints('construction') first for correct parameter names and layout rules. Supports all TD operator types: TOP, CHOP, SOP, DAT, COMP, MAT. If parent is omitted, creates in the currently open network at the user's viewport position. When building a container: first create baseCOMP (no parent), then create children with parent=compPath.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `type` | string | yes | Operator type, e.g. 'textDAT', 'constantCHOP', 'noiseTOP', 'transformTOP', 'baseCOMP' |
+| `parent` | string | no | Path to the parent operator. If omitted, uses the currently open network in TD. |
+| `name` | string | no | Name for the new operator (optional, TD auto-names if omitted) |
+| `parameters` | object | no | Key-value pairs of parameters to set on the created operator |
+
+### td_find_op
+
+Find operators by name and/or type across the project. Returns TSV: path, OPType, flags. Flags: bypass, !cook, private, blocked:reason. Use td_search to search inside code/expressions; use td_find_op to find operators themselves.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `name` | string | no | Substring to match in operator name (case-insensitive). E.g. 'noise' finds noise1, noise2, myNoise. |
+| `type` | string | no | Substring to match in OPType (case-insensitive). E.g. 'noiseTOP', 'baseCOMP', 'CHOP'. Use exact type for precision or partial for broader matches. |
+| `root` | string | no | Root operator path to search from. Default '/project1'. |
+| `max_results` | number | no | Maximum results to return. Default 50. |
+| `max_depth` | number | no | Max recursion depth from root. Default unlimited. |
+| `detail` | `basic` / `summary` | no | Result detail level. 'basic' = name/path/type (fast). 'summary' = + connections, non-default pars, expressions. Default 'basic'. |
+
+### td_search
+
+Search for text across all code (DAT scripts), parameter expressions, and string parameter values in the TD project. Returns TSV: path, kind (code/expression/parameter/ref), line, text. JSON when context>0. Words are OR-matched. Use quotes for exact phrases: 'GetLogin "op('login')"'. Use count_only=true to quickly check if something is referenced without fetching full results.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `query` | string | yes | Search query. Multiple words = OR (any match). Wrap in quotes for exact phrase. Example: 'GetLogin getLogin' finds either. |
+| `root` | string | no | Root operator path to search from. Default '/project1'. |
+| `scope` | `all` / `code` / `editable` / `expressions` / `parameters` | no | What to search. 'code' = DAT scripts only (fast, ~0.05s). 'editable' = only editable code (skips inherited/ref DATs). 'expressions' = parameter expressions only. 'parameters' = string parameter values only. 'all' = everything (slow, ~1.5s due to parameter scan). Default 'all'. |
+| `case_sensitive` | boolean | no | Case-sensitive matching. Default false. |
+| `max_results` | number | no | Maximum results to return. Default 50. |
+| `context` | number | no | Lines to show before/after each code match. Saves td_read_dat calls. Default 0. |
+| `count_only` | boolean | no | Return only match count, not results. Fast existence check. |
+| `max_depth` | number | no | Max recursion depth from root. Default unlimited. |
+
+### td_navigate_to
+
+Navigate the TouchDesigner Network Editor viewport to show a specific operator. Opens the operator's parent network and centers the view on it. Use this to show the user where a problem is, or to navigate to an operator before modifying it.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | yes | Path to the operator to navigate to, e.g. '/project1/noise1' |
+
+## Operator Inspection
+
+### td_get_operator_info
+
+Get information about a specific operator (node) in TouchDesigner (TD). detail='summary': connections, non-default pars, expressions, CHOP channels (compact). detail='full': all of the above PLUS every parameter with value/default/label.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | yes | Full path to the operator, e.g. '/project1/noise1' |
+| `detail` | `summary` / `full` | no | Level of detail. 'summary' = connections, expressions, non-default pars, custom pars (pulse marked), CHOP channels. 'full' = summary + all parameters. Default 'full'. |
+
+### td_get_operators_info
+
+Get information about multiple operators in one call. Returns an array of operator info objects. Use instead of calling td_get_operator_info multiple times.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `paths` | array | yes | Array of full operator paths, e.g. ['/project1/null1', '/project1/null2'] |
+| `detail` | `summary` / `full` | no | Level of detail. Default 'summary'. |
+
+### td_get_par_info
+
+Get parameter names and details for a TouchDesigner operator type. Without specific pars: returns compact list of all parameters with their names, types, and menu options. With pars: returns full details (help text, menu values, style) for specific parameters. Use this when you need to know exact parameter names before setting them.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `op_type` | string | yes | TD operator type name, e.g. 'noiseTOP', 'blurTOP', 'lfoCHOP', 'compositeTOP' |
+| `pars` | array | no | Optional list of specific parameter names to get full details for |
+
+## Parameter Setting
+
+### td_set_operator_pars
+
+Set parameters and flags on an operator in TouchDesigner (TD). Safer than td_execute_python for simple parameter changes. Can set values, toggle bypass/viewer, without writing Python code.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | yes | Path to the operator |
+| `parameters` | object | no | Key-value pairs of parameters to set |
+| `bypass` | boolean | no | Set bypass state of the operator (not available on COMPs) |
+| `viewer` | boolean | no | Set viewer state of the operator |
+| `allowCooking` | boolean | no | Set cooking flag on a COMP. When False, internal network stops cooking (0 CPU). COMP-only. |
+
+## Data Read/Write
+
+### td_read_dat
+
+Read the text content of a DAT operator in TouchDesigner (TD). Returns content with line numbers. Use to read scripts, extensions, GLSL shaders, table data.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | yes | Path to the DAT operator |
+| `start_line` | integer | no | Start line (1-based). Omit to read from beginning. |
+| `end_line` | integer | no | End line (inclusive). Omit to read to end. |
+
+### td_write_dat
+
+Write or patch text content of a DAT operator in TouchDesigner (TD). Can do full replacement or StrReplace-style patching (old_text -> new_text). Use for editing scripts, extensions, shaders. Does NOT reinit extensions automatically.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | yes | Path to the DAT operator |
+| `text` | string | no | Full replacement text. Use this OR old_text+new_text, not both. |
+| `old_text` | string | no | Text to find and replace (must be unique in the DAT) |
+| `new_text` | string | no | Replacement text |
+| `replace_all` | boolean | no | If true, replaces ALL occurrences of old_text (default: false, requires unique match) |
+
+### td_read_chop
+
+Read CHOP channel sample data. Returns channel values as arrays. Use when you need the actual sample values (animation curves, lookup tables, waveforms), not just the summary from td_get_operator_info.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | yes | Path to the CHOP operator |
+| `channels` | array | no | Channel names to read. Omit to read all channels. |
+| `start` | integer | no | Start sample index (0-based). Omit to read from beginning. |
+| `end` | integer | no | End sample index (inclusive). Omit to read to end. |
+
+### td_read_textport
+
+Read the last N lines from the TouchDesigner (TD) log/textport (console output). Use this to see errors, warnings and print output from TD.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `lines` | integer | no | Number of recent lines to return |
+
+### td_clear_textport
+
+Clear the MCP textport log buffer. Use this before starting a debug session or an edit-run-check loop to keep td_read_textport output focused and minimal.
+
+No parameters (other than optional `target_instance`).
+
+## Visual Capture
+
+### td_get_screenshot
+
+Get a screenshot of an operator's viewer in TouchDesigner (TD). Saves the image to a file and returns the file path. Use your file-reading tool to view the image. Shows what the operator looks like in its viewer (TOP output, CHOP waveform graph, SOP geometry, DAT table, parameter UI, etc). Use this to visually inspect any operator, or to generate images via TD for use in your project. TWO-STEP ASYNC USAGE: Step 1 — call with 'path' to start: returns {'status': 'pending', 'requestId': '...'}. Step 2 — call with 'request_id' to retrieve: returns {'file': '/tmp/.../opname_id.jpg'}. Then read the file to see the image. If step 2 still returns pending, make one other tool call then retry.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | no | Full operator path to screenshot, e.g. '/project1/noise1'. Required for step 1. |
+| `request_id` | string | no | Request ID from step 1 to retrieve the completed screenshot. |
+| `max_size` | integer | no | Max pixel size for the longer side (default 512). Use 0 for original operator resolution (useful for pixel-accurate UI work). Higher values (e.g. 1024) for more detail. |
+| `output_path` | string | no | Optional absolute path where the image should be saved (e.g. '/Users/me/project/render.png'). If omitted, saved to /tmp/pisang_mcp/screenshots/. Use absolute paths — TD's working directory may differ from the agent's. |
+| `as_top` | boolean | no | If true, captures the operator directly as a TOP (bypasses the viewer renderer), preserving alpha/transparency. Only works for TOP operators — if the target is not a TOP, falls back to the viewer automatically. Use this when you need a clean PNG with alpha, e.g. to save a generated image for use in another project. |
+| `format` | `auto` / `jpg` / `png` | no | Image format. 'auto' (default): JPEG for viewer mode, PNG for as_top=true. 'jpg': always JPEG (smaller). 'png': always PNG (lossless). |
+
+### td_get_screenshots
+
+Get screenshots of multiple operators in one batch. Saves images to files and returns file paths. Use your file-reading tool to view images. TWO-STEP ASYNC USAGE: Step 1 — call with 'paths' array to start: returns {'status': 'pending', 'batchId': '...', 'total': N}. Step 2 — call with 'batch_id' to retrieve: returns {'files': [{op, file}, ...]}. Then read the files to see the images. If still processing returns {'status': 'pending', 'ready': K, 'total': N}.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `paths` | array | no | List of full operator paths to screenshot. Required for step 1. |
+| `batch_id` | string | no | Batch ID from step 1 to retrieve completed screenshots. |
+| `max_size` | integer | no | Max pixel size for longer side (default 512). Use 0 for original resolution. |
+| `as_top` | boolean | no | If true, captures TOP operators directly (preserves alpha). Non-TOP operators fall back to viewer. |
+| `output_dir` | string | no | Optional absolute path to a directory. Each screenshot saved as .jpg or .png inside it and kept on disk. |
+| `format` | `auto` / `jpg` / `png` | no | Image format. 'auto' (default): JPEG for viewer mode, PNG for as_top=true. 'jpg': always JPEG (smaller). 'png': always PNG (lossless). |
+
+### td_get_screen_screenshot
+
+Capture a screenshot of the actual screen via TD's screenGrabTOP. Saves the image to a file and returns the file path. Use your file-reading tool to view the image. Unlike td_get_screenshot (operator viewer), this shows what the user literally sees on their monitor — TD windows, UI panels, everything. Use when simulating mouse/keyboard input to verify what happened on screen. Workflow: td_get_screen_screenshot → read file → td_input_execute → wait idle → td_get_screen_screenshot again. TWO-STEP ASYNC: Step 1 — call without request_id: returns {'status':'pending','requestId':'...'}. Step 2 — call with request_id: returns {'file': '/tmp/.../screen_id.jpg', 'info': '...metadata...'}. Then read the file to see the image. The requestId also stays usable with td_screen_point_to_global for later coordinate lookup. crop_x/y/w/h are in ACTUAL SCREEN PIXELS (not image pixels). Crops exceeding screen bounds are auto-clamped. SMART DEFAULTS: max_size is auto when omitted — 1920 for full screen (good overview), max(crop_w,crop_h) for cropped (guarantees 1:1 scale). At 1:1 scale: screen_coord = crop_origin + image_pixel. Otherwise use the formula from metadata.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `request_id` | string | no | Request ID from step 1 to retrieve the completed screenshot. |
+| `max_size` | integer | no | Max pixel size for the longer side. Auto when omitted: 1920 for full screen, max(crop_w,crop_h) for cropped (1:1). Set explicitly to override. |
+| `crop_x` | integer | no | Left edge in screen pixels. |
+| `crop_y` | integer | no | Top edge in screen pixels (y=0 at top of screen). |
+| `crop_w` | integer | no | Width in pixels. |
+| `crop_h` | integer | no | Height in pixels. |
+| `display` | integer | no | Screen index (default 0 = primary display). |
+
+## Context & Focus
+
+### td_get_focus
+
+Get the current user focus in TouchDesigner (TD): which network is open, selected operators, current operator, and rollover (what is under the mouse cursor). IMPORTANT: when the user says 'this operator' or 'вот этот', they mean the SELECTED/CURRENT operator, NOT the rollover. Rollover is just incidental mouse position and should be ignored for intent. Pass screenshots=true to immediately start a screenshot batch for all selected operators — response includes a 'screenshots' field with batchId; retrieve with td_get_screenshots(batch_id=...).
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `screenshots` | boolean | no | If true, start a screenshot batch for all selected operators. Retrieve with td_get_screenshots(batch_id=...). |
+| `max_size` | integer | no | Max screenshot size when screenshots=true (default 512). |
+| `as_top` | boolean | no | Passed to the screenshot batch when screenshots=true. |
+
+### td_get_errors
+
+Find errors and warnings in TouchDesigner (TD) operators. Checks operator errors, warnings, AND broken parameter expressions (missing channels, bad references, etc). Also includes recent script errors from the log (tracebacks), grouped and deduplicated — e.g. 1000 identical mouse-move errors shown as ×1000 with one entry. If path is given, checks that operator and its children. If no path, checks the currently open network. Use '/' for entire project. Use when user says something is broken, has errors, red nodes, горит ошибка, etc. TIP: call td_clear_textport before reproducing an error to keep log focused. TIP: combine with td_get_perf when user says 'тупит/лагает' to check both errors and performance.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | no | Path to check. If omitted, checks the current network. Use '/' to scan entire project. |
+| `recursive` | boolean | no | Check children recursively (default true) |
+| `include_log` | boolean | no | Include recent script errors from log, grouped by unique signature (default true). Use td_clear_textport before reproducing an error to keep results focused. |
+
+### td_get_perf
+
+Get performance data from TouchDesigner (TD). Returns TSV: header with fps/budget/memory summary, then slowest operators sorted by cook time. Columns: path, OPType, cpu/cook(ms), gpu/cook(ms), cpu/s, gpu/s, rate, flags. Use when user reports lag, low FPS, slow performance, тупит, тормозит.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | no | Path to profile. If omitted, profiles the current network. Use '/' for entire project. |
+| `top` | integer | no | Number of slowest operators to return |
+
+## Documentation
+
+### td_get_docs
+
+Get comprehensive documentation on a TouchDesigner topic. Unlike td_get_hints (compact tips), this returns in-depth reference material. Call without arguments to see available topics with descriptions. Call with a topic name to get the full documentation.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `topic` | string | no | Topic to get docs for. Omit to list available topics. |
+
+### td_get_hints
+
+Get TouchDesigner tips and common patterns for a topic. Call this BEFORE creating operators or writing TD Python code to learn correct parameter names, expressions, and idiomatic approaches. Available topics: animation, noise, connections, parameters, scripting, construction, ui_analysis, panel_layout, screenshots, input_simulation, undo. IMPORTANT: always call with topic='construction' before building multi-operator setups to get correct TOP/CHOP parameter names, compositeTOP input ordering, and layout guidelines. IMPORTANT: always call with topic='input_simulation' before using td_input_execute to learn focus recovery, coordinate systems, and testing workflow.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `topic` | string | yes | Topic to get hints for. Available: 'animation', 'noise', 'connections', 'parameters', 'scripting', 'construction', 'ui_analysis', 'panel_layout', 'screenshots', 'input_simulation', 'undo', 'networking', 'all' |
+
+### td_agents_md
+
+Read, write, or update the agents_md documentation inside a COMP container. agents_md is a Markdown textDAT describing the container's purpose, structure, and conventions. action='read': returns content + staleness check (compares documented children vs live state). action='update': refreshes auto-generated sections (children list, connections) from live state, preserves human-written sections. action='write': sets full content, creates the DAT if missing.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | yes | Path to the COMP container |
+| `action` | `read` / `update` / `write` | yes | read=get content+staleness, update=refresh auto sections, write=set content |
+| `content` | string | no | Markdown content (only for action='write') |
+
+## Input Automation
+
+### td_input_execute
+
+Send a sequence of mouse/keyboard commands to TouchDesigner. Commands execute sequentially with smooth bezier movement. Returns immediately — poll td_input_status() until status='idle' before proceeding. Command types: 'focus' — bring TD to foreground. 'move' — smooth mouse move: {type,x,y,duration,easing}. 'click' — click: {type,x,y,button,hold,duration,easing}. hold=seconds to hold down. duration=smooth move before click. 'dblclick' — double click: {type,x,y,duration}. 'mousedown'/'mouseup' — {type,x,y,button}. 'key' — keystroke: {type,keys} e.g. 'ctrl+z','tab','escape','shift+f5'. Requires Accessibility permission on Mac. 'type' — human-like typing: {type,text,wpm,variance} — layout-independent Unicode, variable timing. 'wait' — pause: {type,duration}. 'scroll' — {type,x,y,dx,dy,steps} — human-like scroll: moves mouse to (x,y) first, then sends dy (vertical, +up) and dx (horizontal, +right) as multiple ticks with natural timing. steps=4 by default. Mouse commands may include coord_space='logical' (default) or coord_space='physical'. On macOS, 'physical' means actual screen pixels from td_get_screen_screenshot and is converted to CGEvent logical coords automatically. Top-level coord_space applies to commands that do not override it. on_error: 'stop' (default) clears queue on error; 'continue' skips failed command. IMPORTANT: call td_get_hints('input_simulation') before first use to learn focus recovery, coordinate systems, and testing workflow.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `commands` | array | yes | List of command dicts to execute in sequence. |
+| `coord_space` | `logical` / `physical` | no | Default coordinate space for mouse commands that do not specify their own coord_space. 'logical' uses CGEvent coords directly. 'physical' uses actual screen pixels from td_get_screen_screenshot and is auto-converted on macOS. |
+| `on_error` | `stop` / `continue` | no | What to do on error. Default 'stop'. |
+
+### td_input_status
+
+Get current status of the td_input command queue. Poll this after td_input_execute until status='idle'. Returns: status ('idle'/'running'), current command, queue_remaining, last error.
+
+No parameters (other than optional `target_instance`).
+
+### td_input_clear
+
+Clear the td_input command queue and stop current execution immediately.
+
+No parameters (other than optional `target_instance`).
+
+### td_op_screen_rect
+
+Get the screen coordinates of an operator node in the network editor. Returns {x,y,w,h,cx,cy} where cx,cy is the center for clicking. Use this to find where to click on a specific operator. Only works if the operator's parent network is currently open in a network editor pane.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | yes | Full path to the operator, e.g. '/project1/myComp/noise1' |
+
+### td_click_screen_point
+
+Resolve a point inside a previous td_get_screen_screenshot result and click it. Pass the screenshot request_id plus either normalized u/v or image_x/image_y. Queues a td_input click using physical screen coordinates, so it works directly with screenshot-derived points. Use duration/easing to control the cursor travel before the click.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `request_id` | string | yes | Request ID originally returned by td_get_screen_screenshot. |
+| `u` | number | no | Normalized horizontal position inside the screenshot region (0=left, 1=right). Use with v. |
+| `v` | number | no | Normalized vertical position inside the screenshot region (0=top, 1=bottom). Use with u. |
+| `image_x` | number | no | Horizontal pixel coordinate inside the returned screenshot image. Use with image_y. |
+| `image_y` | number | no | Vertical pixel coordinate inside the returned screenshot image. Use with image_x. |
+| `button` | `left` / `right` / `middle` | no | Mouse button to click. Default left. |
+| `hold` | number | no | Seconds to hold the mouse button down before releasing. |
+| `duration` | number | no | Seconds for the cursor to travel to the target before clicking. |
+| `easing` | `linear` / `ease-in` / `ease-out` / `ease-in-out` | no | Cursor movement easing for the pre-click travel. |
+| `focus` | boolean | no | If true, bring TD to the front before clicking and wait briefly for focus to settle. |
+
+### td_screen_point_to_global
+
+Convert a point inside a previous td_get_screen_screenshot result into absolute screen coordinates. Pass the screenshot request_id plus either normalized u/v (0..1 inside that screenshot region) or image_x/image_y in returned image pixels. Returns absolute physical screen coordinates, logical coordinates, and a ready-to-use td_input_execute payload. Metadata is kept for the most recent screen screenshots so multiple agents can resolve points later by request_id.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `request_id` | string | yes | Request ID originally returned by td_get_screen_screenshot. |
+| `u` | number | no | Normalized horizontal position inside the screenshot region (0=left, 1=right). Use with v. |
+| `v` | number | no | Normalized vertical position inside the screenshot region (0=top, 1=bottom). Use with u. |
+| `image_x` | number | no | Horizontal pixel coordinate inside the returned screenshot image. Use with image_y. |
+| `image_y` | number | no | Vertical pixel coordinate inside the returned screenshot image. Use with image_x. |
+
+## System
+
+### td_list_instances
+
+List all running TouchDesigner (TD) instances with active MCP servers. Returns port, project name, PID, and instanceId for each instance. Call this at the start of every conversation to discover available instances and choose which one to work with. instanceId is stable for the lifetime of a TD process and is used as target_instance in all other tool calls.
+
+No parameters (other than optional `target_instance`).
+
+### td_project_quit
+
+Save and/or close the current TouchDesigner (TD) project. Can save before closing. Reports if project has unsaved changes. To close a different instance, pass target_instance=instanceId. WARNING: this will shut down the MCP server on that instance.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `save` | boolean | no | Save the project before closing. Default true. |
+| `force` | boolean | no | Force close without save dialog. Default false. |
+
+### td_reinit_extension
+
+Reinitialize an extension on a COMP in TouchDesigner (TD). Call this AFTER finishing all code edits via td_write_dat to apply changes. Do NOT call after every small edit - batch your changes first.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `path` | string | yes | Path to the COMP with the extension |
+
+### td_dev_log
+
+Read the last N entries from the MCP dev log. Only available when Devmode is enabled. Shows request/response history.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `count` | integer | no | Number of recent log entries to return |
+
+### td_clear_dev_log
+
+Clear the current MCP dev log by closing the old file and starting a fresh one. Only available when Devmode is enabled.
+
+No parameters (other than optional `target_instance`).
+
+### td_test_session
+
+Manage test sessions, bug reports, and conversation export. IMPORTANT: Do NOT proactively suggest exporting chat or submitting reports. These are tools for specific situations: - export_chat / submit_report: ONLY when the user encounters a BUG with the plugin or TouchDesigner and wants to report it, or when the user explicitly asks to export the conversation. Never suggest this at session end or as routine action. USER PHRASES → ACTIONS: 'разбор тестовых сессий' / 'analyze test sessions' → list, then pull, read meta.json → index.jsonl → calls/. 'разбор репортов' / 'analyze user reports' → list with session='user', then pull by name. 'экспортируй чат' / 'export chat' → (1) export_chat_id → marker, (2) export_chat with session=marker. 'сообщи о проблеме' / 'report bug' → export chat, review for privacy, then submit_report with summary + tags + result_op=file_path. ACTIONS: export_chat_id | export_chat | submit_report | start | note | import_chat | end | list | pull. list: default=auto-detect repo. session='user' for user_reports (dev only). pull: auto-searches both repos. Auto-detects dev vs user Hub access.
+
+| Param | Type | Required | Description |
+|-------|------|----------|-------------|
+| `action` | `export_chat_id` / `export_chat` / `submit_report` / `start` / `note` / `import_chat` / `end` / `list` / `pull` | yes | Action: export_chat_id / export_chat / submit_report / start / note / import_chat / end / list / pull |
+| `prompt` | string | no | (start) The test prompt/task description |
+| `tags` | array | no | (start) Tags for categorization, e.g. ['ui', 'layout'] |
+| `text` | string | no | (note) Observation text. (import_chat) Full conversation text. |
+| `outcome` | `success` / `partial` / `failure` | no | (end) Result: success / partial / failure |
+| `summary` | string | no | (end) Brief summary of what happened |
+| `result_op` | string | no | (end) Path to operator to save as result.tox |
+| `session` | string | no | (pull) Session name or substring to download |
diff --git a/optional-skills/creative/touchdesigner-mcp/references/network-patterns.md b/optional-skills/creative/touchdesigner-mcp/references/network-patterns.md
new file mode 100644
index 00000000000..cb04fd54d57
--- /dev/null
+++ b/optional-skills/creative/touchdesigner-mcp/references/network-patterns.md
@@ -0,0 +1,966 @@
+# TouchDesigner Network Patterns
+
+Complete network recipes for common creative coding tasks. Each pattern shows the operator chain, MCP tool calls to build it, and key parameter settings.
+
+## Audio-Reactive Visuals
+
+### Pattern 1: Audio Spectrum -> Noise Displacement
+
+Audio drives noise parameters for organic, music-responsive textures.
+
+```
+Audio File In CHOP -> Audio Spectrum CHOP -> Math CHOP (scale)
+ |
+ v (export to noise params)
+ Noise TOP -> Level TOP -> Feedback TOP -> Composite TOP -> Null TOP (out)
+ ^ |
+ |________________|
+```
+
+**MCP Build Sequence:**
+
+```
+1. td_create_operator(parent="/project1", type="audiofileinChop", name="audio_in")
+2. td_create_operator(parent="/project1", type="audiospectrumChop", name="spectrum")
+3. td_create_operator(parent="/project1", type="mathChop", name="spectrum_scale")
+4. td_create_operator(parent="/project1", type="noiseTop", name="noise1")
+5. td_create_operator(parent="/project1", type="levelTop", name="level1")
+6. td_create_operator(parent="/project1", type="feedbackTop", name="feedback1")
+7. td_create_operator(parent="/project1", type="compositeTop", name="comp1")
+8. td_create_operator(parent="/project1", type="nullTop", name="out")
+
+9. td_set_operator_pars(path="/project1/audio_in",
+ properties={"file": "/path/to/music.wav", "play": true})
+10. td_set_operator_pars(path="/project1/spectrum",
+ properties={"size": 512})
+11. td_set_operator_pars(path="/project1/spectrum_scale",
+ properties={"gain": 2.0, "postoff": 0.0})
+12. td_set_operator_pars(path="/project1/noise1",
+ properties={"type": 1, "monochrome": false, "resolutionw": 1280, "resolutionh": 720,
+ "period": 4.0, "harmonics": 3, "amp": 1.0})
+13. td_set_operator_pars(path="/project1/level1",
+ properties={"opacity": 0.95, "gamma1": 0.75})
+14. td_set_operator_pars(path="/project1/feedback1",
+ properties={"top": "/project1/comp1"})
+15. td_set_operator_pars(path="/project1/comp1",
+ properties={"operand": 0})
+
+16. td_execute_python: """
+op('/project1/audio_in').outputConnectors[0].connect(op('/project1/spectrum'))
+op('/project1/spectrum').outputConnectors[0].connect(op('/project1/spectrum_scale'))
+op('/project1/noise1').outputConnectors[0].connect(op('/project1/level1'))
+op('/project1/level1').outputConnectors[0].connect(op('/project1/comp1').inputConnectors[0])
+op('/project1/feedback1').outputConnectors[0].connect(op('/project1/comp1').inputConnectors[1])
+op('/project1/comp1').outputConnectors[0].connect(op('/project1/out'))
+"""
+
+17. td_execute_python: """
+# Export spectrum values to drive noise parameters
+# This makes the noise react to audio frequencies
+op('/project1/noise1').par.seed.expr = "op('/project1/spectrum_scale')['chan1']"
+op('/project1/noise1').par.period.expr = "tdu.remap(op('/project1/spectrum_scale')['chan1'].eval(), 0, 1, 1, 8)"
+"""
+```
+
+### Pattern 2: Beat Detection -> Visual Pulses
+
+Detect beats from audio and trigger visual events.
+
+```
+Audio Device In CHOP -> Audio Spectrum CHOP -> Math CHOP (isolate bass)
+ |
+ Trigger CHOP (envelope)
+ |
+ [export to visual params]
+```
+
+**Key parameter settings:**
+
+```
+# Isolate bass frequencies (20-200 Hz)
+Math CHOP: chanop=1 (Add channels), range1low=0, range1high=10
+ (first 10 FFT bins = bass frequencies with 512 FFT at 44100Hz)
+
+# ADSR envelope on each beat
+Trigger CHOP: attack=0.02, peak=1.0, decay=0.3, sustain=0.0, release=0.1
+
+# Export to visual: Scale, brightness, or color intensity
+td_execute_python: "op('/project1/level1').par.brightness1.expr = \"1.0 + op('/project1/trigger1')['chan1'] * 0.5\""
+```
+
+### Pattern 3: Multi-Band Audio -> Multi-Layer Visuals
+
+Split audio into frequency bands, drive different visual layers per band.
+
+```
+Audio In -> Spectrum -> Audio Band EQ (3 bands: bass, mid, treble)
+ |
+ +---------+---------+
+ | | |
+ Bass Mids Treble
+ | | |
+ Noise TOP Circle TOP Text TOP
+ (slow,dark) (mid,warm) (fast,bright)
+ | | |
+ +-----+----+----+----+
+ | |
+ Composite Composite
+ |
+ Out
+```
+
+### Pattern 3b: Audio-Reactive GLSL Fractal (Proven Recipe)
+
+Complete working recipe. Plays an MP3, runs FFT, feeds spectrum as a texture into a GLSL shader where inner fractal reacts to bass, outer to treble.
+
+**Network:**
+```
+AudioFileIn CHOP → AudioSpectrum CHOP (FFT=512, outlength=256)
+ → Math CHOP (gain=10) → CHOP To TOP (256x2 spectrum texture, dataformat=r)
+ ↓
+Constant TOP (time, rgba32float) → GLSL TOP (input 0=time, input 1=spectrum) → Null → MovieFileOut
+ ↓
+AudioFileIn CHOP → Audio Device Out CHOP Record to .mov
+```
+
+**Build via td_execute_python (one call per step for reliability):**
+
+```python
+# Step 1: Audio chain
+# td_execute_python script:
+td_execute_python(code="""
+root = op('/project1')
+audio = root.create(audiofileinCHOP, 'audio_in')
+audio.par.file = '/path/to/music.mp3'
+audio.par.playmode = 0 # Locked to timeline
+audio.par.volume = 0.5
+
+spec = root.create(audiospectrumCHOP, 'spectrum')
+audio.outputConnectors[0].connect(spec.inputConnectors[0])
+
+math_n = root.create(mathCHOP, 'math_norm')
+spec.outputConnectors[0].connect(math_n.inputConnectors[0])
+math_n.par.gain = 5 # boost signal
+
+resamp = root.create(resampleCHOP, 'resample_spec')
+math_n.outputConnectors[0].connect(resamp.inputConnectors[0])
+resamp.par.timeslice = True
+resamp.par.rate = 256
+
+chop2top = root.create(choptoTOP, 'spectrum_tex')
+chop2top.par.chop = resamp # CHOP To TOP has NO input connectors — use par.chop reference
+
+# Audio output (hear the music)
+aout = root.create(audiodeviceoutCHOP, 'audio_out')
+audio.outputConnectors[0].connect(aout.inputConnectors[0])
+result = 'audio chain ok'
+""")
+
+# Step 2: Time driver (MUST be rgba32float — see pitfalls #6)
+# td_execute_python script:
+td_execute_python(code="""
+root = op('/project1')
+td = root.create(constantTOP, 'time_driver')
+td.par.format = 'rgba32float'
+td.par.outputresolution = 'custom'
+td.par.resolutionw = 1
+td.par.resolutionh = 1
+td.par.colorr.expr = "absTime.seconds % 1000.0"
+td.par.colorg.expr = "int(absTime.seconds / 1000.0)"
+result = 'time ok'
+""")
+
+# Step 3: GLSL shader (write to /tmp, load from file)
+# td_execute_python script:
+td_execute_python(code="""
+root = op('/project1')
+glsl = root.create(glslTOP, 'audio_shader')
+glsl.par.outputresolution = 'custom'
+glsl.par.resolutionw = 1280
+glsl.par.resolutionh = 720
+
+sd = root.create(textDAT, 'shader_code')
+sd.text = open('/tmp/my_shader.glsl').read()
+glsl.par.pixeldat = sd
+
+# Wire: input 0 = time, input 1 = spectrum texture
+op('/project1/time_driver').outputConnectors[0].connect(glsl.inputConnectors[0])
+op('/project1/spectrum_tex').outputConnectors[0].connect(glsl.inputConnectors[1])
+result = 'glsl ok'
+""")
+
+# Step 4: Output + recorder
+# td_execute_python script:
+td_execute_python(code="""
+root = op('/project1')
+out = root.create(nullTOP, 'output')
+op('/project1/audio_shader').outputConnectors[0].connect(out.inputConnectors[0])
+
+rec = root.create(moviefileoutTOP, 'recorder')
+out.outputConnectors[0].connect(rec.inputConnectors[0])
+rec.par.type = 'movie'
+rec.par.file = '/tmp/output.mov'
+rec.par.videocodec = 'mjpa'
+result = 'output ok'
+""")
+```
+
+**GLSL shader pattern (audio-reactive fractal):**
+```glsl
+out vec4 fragColor;
+
+vec3 palette(float t) {
+ vec3 a = vec3(0.5); vec3 b = vec3(0.5);
+ vec3 c = vec3(1.0); vec3 d = vec3(0.263, 0.416, 0.557);
+ return a + b * cos(6.28318 * (c * t + d));
+}
+
+void main() {
+ // Input 0 = time (1x1 rgba32float constant)
+ // Input 1 = audio spectrum (256x2 CHOP To TOP, stereo — sample at y=0.25 for first channel)
+ vec4 td = texture(sTD2DInputs[0], vec2(0.5));
+ float t = td.r + td.g * 1000.0;
+
+ vec2 res = uTDOutputInfo.res.zw;
+ vec2 uv = (gl_FragCoord.xy * 2.0 - res) / min(res.x, res.y);
+ vec2 uv0 = uv;
+ vec3 finalColor = vec3(0.0);
+
+ float bass = texture(sTD2DInputs[1], vec2(0.05, 0.25)).r;
+ float mids = texture(sTD2DInputs[1], vec2(0.25, 0.25)).r;
+
+ for (float i = 0.0; i < 4.0; i++) {
+ uv = fract(uv * (1.4 + bass * 0.3)) - 0.5;
+ float d = length(uv) * exp(-length(uv0));
+
+ // Sample spectrum at distance: inner=bass, outer=treble
+ float freq = texture(sTD2DInputs[1], vec2(clamp(d * 0.5, 0.0, 1.0), 0.25)).r;
+
+ vec3 col = palette(length(uv0) + i * 0.4 + t * 0.35);
+ d = sin(d * (7.0 + bass * 4.0) + t * 1.5) / 8.0;
+ d = abs(d);
+ d = pow(0.012 / d, 1.2 + freq * 0.8 + bass * 0.5);
+ finalColor += col * d;
+ }
+
+ // Tone mapping
+ finalColor = finalColor / (finalColor + vec3(1.0));
+ fragColor = TDOutputSwizzle(vec4(finalColor, 1.0));
+}
+```
+
+**Key insights from testing:**
+- `spectrum_tex` (CHOP To TOP) produces a 256x2 texture — x position = frequency, y=0.25 for first channel
+- Sampling at `vec2(0.05, 0.0)` gets bass, `vec2(0.65, 0.0)` gets treble
+- Sampling based on pixel distance (`d * 0.5`) makes inner fractal react to bass, outer to treble
+- `bass * 0.3` in the `fract()` zoom makes the fractal breathe with kicks
+- Math CHOP gain of 5 is needed because raw spectrum values are very small
+
+## Generative Art
+
+### Pattern 4: Feedback Loop with Transform
+
+Classic generative technique — texture evolves through recursive transformation.
+
+```
+Noise TOP -> Composite TOP -> Level TOP -> Null TOP (out)
+ ^ |
+ | v
+ Transform TOP <- Feedback TOP
+```
+
+**MCP Build Sequence:**
+
+```
+1. td_create_operator(parent="/project1", type="noiseTop", name="seed_noise")
+2. td_create_operator(parent="/project1", type="compositeTop", name="mix")
+3. td_create_operator(parent="/project1", type="transformTop", name="evolve")
+4. td_create_operator(parent="/project1", type="feedbackTop", name="fb")
+5. td_create_operator(parent="/project1", type="levelTop", name="color_correct")
+6. td_create_operator(parent="/project1", type="nullTop", name="out")
+
+7. td_set_operator_pars(path="/project1/seed_noise",
+ properties={"type": 1, "monochrome": false, "period": 2.0, "amp": 0.3,
+ "resolutionw": 1280, "resolutionh": 720})
+8. td_set_operator_pars(path="/project1/mix",
+ properties={"operand": 27}) # 27 = Screen blend
+9. td_set_operator_pars(path="/project1/evolve",
+ properties={"sx": 1.003, "sy": 1.003, "rz": 0.5, "extend": 2}) # slight zoom + rotate, repeat edges
+10. td_set_operator_pars(path="/project1/fb",
+ properties={"top": "/project1/mix"})
+11. td_set_operator_pars(path="/project1/color_correct",
+ properties={"opacity": 0.98, "gamma1": 0.85})
+
+12. td_execute_python: """
+op('/project1/seed_noise').outputConnectors[0].connect(op('/project1/mix').inputConnectors[0])
+op('/project1/fb').outputConnectors[0].connect(op('/project1/evolve'))
+op('/project1/evolve').outputConnectors[0].connect(op('/project1/mix').inputConnectors[1])
+op('/project1/mix').outputConnectors[0].connect(op('/project1/color_correct'))
+op('/project1/color_correct').outputConnectors[0].connect(op('/project1/out'))
+"""
+```
+
+**Variations:**
+- Change Transform: `rz` (rotation), `sx/sy` (zoom), `tx/ty` (drift)
+- Change Composite operand: Screen (glow), Add (bright), Multiply (dark)
+- Add HSV Adjust in the feedback loop for color evolution
+- Add Blur for dreamlike softness
+- Replace Noise with a GLSL TOP for custom seed patterns
+
+### Pattern 5: Instancing (Particle-Like Systems)
+
+Render thousands of copies of geometry, each with unique position/rotation/scale driven by CHOP data or DATs.
+
+```
+Table DAT (instance data) -> DAT to CHOP -> Geometry COMP (instancing on) -> Render TOP
+ + Sphere SOP (template geometry)
+ + Constant MAT (material)
+ + Camera COMP
+ + Light COMP
+```
+
+**MCP Build Sequence:**
+
+```
+1. td_create_operator(parent="/project1", type="tableDat", name="instance_data")
+2. td_create_operator(parent="/project1", type="geometryComp", name="geo1")
+3. td_create_operator(parent="/project1/geo1", type="sphereSop", name="sphere")
+4. td_create_operator(parent="/project1", type="constMat", name="mat1")
+5. td_create_operator(parent="/project1", type="cameraComp", name="cam1")
+6. td_create_operator(parent="/project1", type="lightComp", name="light1")
+7. td_create_operator(parent="/project1", type="renderTop", name="render1")
+
+8. td_execute_python: """
+import random, math
+dat = op('/project1/instance_data')
+dat.clear()
+dat.appendRow(['tx', 'ty', 'tz', 'sx', 'sy', 'sz', 'cr', 'cg', 'cb'])
+for i in range(500):
+ angle = i * 0.1
+ r = 2 + i * 0.01
+ dat.appendRow([
+ str(math.cos(angle) * r),
+ str(math.sin(angle) * r),
+ str((i - 250) * 0.02),
+ '0.05', '0.05', '0.05',
+ str(random.random()),
+ str(random.random()),
+ str(random.random())
+ ])
+"""
+
+9. td_set_operator_pars(path="/project1/geo1",
+ properties={"instancing": true, "instancechop": "",
+ "instancedat": "/project1/instance_data",
+ "material": "/project1/mat1"})
+10. td_set_operator_pars(path="/project1/render1",
+ properties={"camera": "/project1/cam1", "geometry": "/project1/geo1",
+ "light": "/project1/light1",
+ "resolutionw": 1280, "resolutionh": 720})
+11. td_set_operator_pars(path="/project1/cam1",
+ properties={"tz": 10})
+```
+
+### Pattern 6: Reaction-Diffusion (GLSL)
+
+Classic Gray-Scott reaction-diffusion system running on the GPU.
+
+```
+Text DAT (GLSL code) -> GLSL TOP (resolution, dat reference) -> Feedback TOP
+ ^ |
+ |_______________________________________|
+ Level TOP (out)
+```
+
+**Key GLSL code (write to Text DAT via td_execute_python):**
+
+```glsl
+// Gray-Scott reaction-diffusion
+uniform float feed; // 0.037
+uniform float kill; // 0.06
+uniform float dA; // 1.0
+uniform float dB; // 0.5
+
+layout(location = 0) out vec4 fragColor;
+
+void main() {
+ vec2 uv = vUV.st;
+ vec2 texel = 1.0 / uTDOutputInfo.res.zw;
+
+ vec4 c = texture(sTD2DInputs[0], uv);
+ float a = c.r;
+ float b = c.g;
+
+ // Laplacian (9-point stencil)
+ float lA = 0.0, lB = 0.0;
+ for(int dx = -1; dx <= 1; dx++) {
+ for(int dy = -1; dy <= 1; dy++) {
+ float w = (dx == 0 && dy == 0) ? -1.0 : (abs(dx) + abs(dy) == 1 ? 0.2 : 0.05);
+ vec4 s = texture(sTD2DInputs[0], uv + vec2(dx, dy) * texel);
+ lA += s.r * w;
+ lB += s.g * w;
+ }
+ }
+
+ float reaction = a * b * b;
+ float newA = a + (dA * lA - reaction + feed * (1.0 - a));
+ float newB = b + (dB * lB + reaction - (kill + feed) * b);
+
+ fragColor = vec4(clamp(newA, 0.0, 1.0), clamp(newB, 0.0, 1.0), 0.0, 1.0);
+}
+```
+
+## Video Processing
+
+### Pattern 7: Video Effects Chain
+
+Apply a chain of effects to a video file.
+
+```
+Movie File In TOP -> HSV Adjust TOP -> Level TOP -> Blur TOP -> Composite TOP -> Null TOP (out)
+ ^
+ Text TOP ---+
+```
+
+**MCP Build Sequence:**
+
+```
+1. td_create_operator(parent="/project1", type="moviefileinTop", name="video_in")
+2. td_create_operator(parent="/project1", type="hsvadjustTop", name="color")
+3. td_create_operator(parent="/project1", type="levelTop", name="levels")
+4. td_create_operator(parent="/project1", type="blurTop", name="blur")
+5. td_create_operator(parent="/project1", type="compositeTop", name="overlay")
+6. td_create_operator(parent="/project1", type="textTop", name="title")
+7. td_create_operator(parent="/project1", type="nullTop", name="out")
+
+8. td_set_operator_pars(path="/project1/video_in",
+ properties={"file": "/path/to/video.mp4", "play": true})
+9. td_set_operator_pars(path="/project1/color",
+ properties={"hueoffset": 0.1, "saturationmult": 1.3})
+10. td_set_operator_pars(path="/project1/levels",
+ properties={"brightness1": 1.1, "contrast": 1.2, "gamma1": 0.9})
+11. td_set_operator_pars(path="/project1/blur",
+ properties={"sizex": 2, "sizey": 2})
+12. td_set_operator_pars(path="/project1/title",
+ properties={"text": "My Video", "fontsizex": 48, "alignx": 1, "aligny": 1})
+
+13. td_execute_python: """
+chain = ['video_in', 'color', 'levels', 'blur']
+for i in range(len(chain) - 1):
+ op(f'/project1/{chain[i]}').outputConnectors[0].connect(op(f'/project1/{chain[i+1]}'))
+op('/project1/blur').outputConnectors[0].connect(op('/project1/overlay').inputConnectors[0])
+op('/project1/title').outputConnectors[0].connect(op('/project1/overlay').inputConnectors[1])
+op('/project1/overlay').outputConnectors[0].connect(op('/project1/out'))
+"""
+```
+
+### Pattern 8: Video Recording
+
+Record the output to a file. **H.264/H.265 require a Commercial license** — use Motion JPEG (`mjpa`) on Non-Commercial.
+
+```
+[any TOP chain] -> Null TOP -> Movie File Out TOP
+```
+
+```python
+# Build via td_execute_python:
+root = op('/project1')
+
+# Always put a Null TOP before the recorder
+null_out = root.op('out') # or create one
+rec = root.create(moviefileoutTOP, 'recorder')
+null_out.outputConnectors[0].connect(rec.inputConnectors[0])
+
+rec.par.type = 'movie'
+rec.par.file = '/tmp/output.mov'
+rec.par.videocodec = 'mjpa' # Motion JPEG — works on Non-Commercial
+
+# Start recording (par.record is a toggle — .record() method may not exist)
+rec.par.record = True
+# ... let TD run for desired duration ...
+rec.par.record = False
+
+# For image sequences:
+# rec.par.type = 'imagesequence'
+# rec.par.imagefiletype = 'png'
+# rec.par.file.expr = "'/tmp/frames/out' + me.fileSuffix" # fileSuffix REQUIRED
+```
+
+**Pitfalls:**
+- Setting `par.file` + `par.record = True` in the same script may race — use `run("...", delayFrames=2)`
+- `TOP.save()` called rapidly always captures the same frame — use MovieFileOut for animation
+- See `pitfalls.md` #25-27 for full details
+
+### Pattern 8b: TD → External Pipeline (FFmpeg / Python / Post-Processing)
+
+Export TD visuals for use in another tool (ffmpeg, Python, ASCII art, etc.). This is the standard workflow when you need to composite TD output with external processing (ASCII conversion, Python shader chains, ML inference, etc.).
+
+**Step 1: Record to video in TD**
+
+```python
+# Preferred: ProRes on macOS (lossless, Non-Commercial OK, ~55MB/s at 1280x720)
+rec.par.videocodec = 'prores'
+# Fallback for non-macOS: mjpa (Motion JPEG)
+# rec.par.videocodec = 'mjpa'
+rec.par.record = True
+# ... wait N seconds ...
+rec.par.record = False
+```
+
+**Step 2: Extract frames with ffmpeg**
+
+```bash
+# Extract all frames at 30fps
+ffmpeg -y -i /tmp/output.mov -vf 'fps=30' /tmp/frames/frame_%06d.png
+
+# Or extract a specific duration
+ffmpeg -y -i /tmp/output.mov -t 25 -vf 'fps=30' /tmp/frames/frame_%06d.png
+
+# Or extract specific frame range
+ffmpeg -y -i /tmp/output.mov -vf 'select=between(n\,0\,749)' -vsync vfr /tmp/frames/frame_%06d.png
+```
+
+**Step 3: Process frames in Python**
+
+```python
+from PIL import Image
+import os
+
+frames_dir = '/tmp/frames'
+output_dir = '/tmp/processed'
+os.makedirs(output_dir, exist_ok=True)
+
+for fname in sorted(os.listdir(frames_dir)):
+ if not fname.endswith('.png'):
+ continue
+ img = Image.open(os.path.join(frames_dir, fname))
+ # ... apply your processing ...
+ img.save(os.path.join(output_dir, fname))
+```
+
+**Step 4: Mux processed frames back with audio**
+
+```bash
+# Create video from processed frames + audio with fade-out
+ffmpeg -y \
+ -framerate 30 -i /tmp/processed/frame_%06d.png \
+ -i /tmp/audio.mp3 \
+ -c:v libx264 -pix_fmt yuv420p -crf 18 \
+ -c:a aac -b:a 192k \
+ -shortest \
+ -af 'afade=t=out:st=23:d=2' \
+ /tmp/final_output.mp4
+```
+
+**Key considerations:**
+- Use ProRes for the TD recording step to avoid generation loss during compositing
+- Extract at the target output framerate (not TD's render framerate)
+- For audio-synced content, analyze the audio file separately in Python (scipy FFT) to get per-frame features (rms, spectral bands, beats) and drive compositing parameters
+- Always verify TD FPS > 0 before recording (see pitfalls #37, #38)
+
+## Data Visualization
+
+### Pattern 9: Table Data -> Bar Chart via Instancing
+
+Visualize tabular data as a 3D bar chart.
+
+```
+Table DAT (data) -> Script DAT (transform to instance format) -> DAT to CHOP
+ |
+Box SOP -> Geometry COMP (instancing from CHOP) -> Render TOP -> Null TOP (out)
+ + PBR MAT
+ + Camera COMP
+ + Light COMP
+```
+
+```python
+# Script DAT code to transform data to instance positions
+td_execute_python: """
+source = op('/project1/data_table')
+instance = op('/project1/instance_transform')
+instance.clear()
+instance.appendRow(['tx', 'ty', 'tz', 'sx', 'sy', 'sz', 'cr', 'cg', 'cb'])
+
+for i in range(1, source.numRows):
+ value = float(source[i, 'value'])
+ name = source[i, 'name']
+ instance.appendRow([
+ str(i * 1.5), # x position (spread bars)
+ str(value / 2), # y position (center bar vertically)
+ '0', # z position
+ '1', str(value), '1', # scale (height = data value)
+ '0.2', '0.6', '1.0' # color (blue)
+ ])
+"""
+```
+
+### Pattern 9b: Audio-Reactive GLSL Fractal (Proven Recipe)
+
+Audio spectrum drives a GLSL fractal shader directly via a spectrum texture input. Bass thickens inner fractal lines, mids twist rotation, highs light outer edges. **Always run discovery (SKILL.md Step 0) before using any param names from these recipes — they may differ in your TD version.**
+
+```
+Audio File In CHOP → Audio Spectrum CHOP (FFT=512, outlength=256)
+ → Math CHOP (gain=10)
+ → CHOP To TOP (spectrum texture, 256x2, dataformat=r)
+ ↓ (input 1)
+Constant TOP (rgba32float, time) → GLSL TOP (audio-reactive shader) → Null TOP
+ (input 0) ↑
+ Text DAT (shader code)
+```
+
+**Build via td_execute_python (complete working script):**
+
+```python
+# td_execute_python script:
+td_execute_python(code="""
+import os
+root = op('/project1')
+
+# Audio input
+audio = root.create(audiofileinCHOP, 'audio_in')
+audio.par.file = '/path/to/music.mp3'
+audio.par.playmode = 0 # Locked to timeline
+
+# FFT analysis (output length manually set to 256 bins)
+spectrum = root.create(audiospectrumCHOP, 'spectrum')
+audio.outputConnectors[0].connect(spectrum.inputConnectors[0])
+spectrum.par.fftsize = '512'
+spectrum.par.outputmenu = 'setmanually'
+spectrum.par.outlength = 256
+
+# THEN boost gain on the raw spectrum (NO Lag CHOP — see pitfall #34)
+math = root.create(mathCHOP, 'math_norm')
+spectrum.outputConnectors[0].connect(math.inputConnectors[0])
+math.par.gain = 10
+
+# Spectrum → texture (256x2 image — stereo, sample at y=0.25 for first channel)
+# NOTE: choptoTOP has NO input connectors — use par.chop reference!
+spec_tex = root.create(choptoTOP, 'spectrum_tex')
+spec_tex.par.chop = math
+spec_tex.par.dataformat = 'r'
+spec_tex.par.layout = 'rowscropped'
+
+# Time driver (rgba32float to avoid 0-1 clamping!)
+time_drv = root.create(constantTOP, 'time_driver')
+time_drv.par.format = 'rgba32float'
+time_drv.par.outputresolution = 'custom'
+time_drv.par.resolutionw = 1
+time_drv.par.resolutionh = 1
+time_drv.par.colorr.expr = "absTime.seconds % 1000.0"
+time_drv.par.colorg.expr = "int(absTime.seconds / 1000.0)"
+
+# GLSL shader
+glsl = root.create(glslTOP, 'audio_shader')
+glsl.par.outputresolution = 'custom'
+glsl.par.resolutionw = 1280; glsl.par.resolutionh = 720
+
+shader_dat = root.create(textDAT, 'shader_code')
+shader_dat.text = open('/tmp/shader.glsl').read()
+glsl.par.pixeldat = shader_dat
+
+# Wire: input 0=time, input 1=spectrum
+time_drv.outputConnectors[0].connect(glsl.inputConnectors[0])
+spec_tex.outputConnectors[0].connect(glsl.inputConnectors[1])
+
+# Output + audio playback
+out = root.create(nullTOP, 'output')
+glsl.outputConnectors[0].connect(out.inputConnectors[0])
+audio_out = root.create(audiodeviceoutCHOP, 'audio_out')
+audio.outputConnectors[0].connect(audio_out.inputConnectors[0])
+
+result = 'network built'
+""")
+```
+
+**GLSL shader (reads spectrum from input 1 texture):**
+
+```glsl
+out vec4 fragColor;
+
+vec3 palette(float t) {
+ vec3 a = vec3(0.5); vec3 b = vec3(0.5);
+ vec3 c = vec3(1.0); vec3 d = vec3(0.263, 0.416, 0.557);
+ return a + b * cos(6.28318 * (c * t + d));
+}
+
+void main() {
+ vec4 td = texture(sTD2DInputs[0], vec2(0.5));
+ float t = td.r + td.g * 1000.0;
+
+ vec2 res = uTDOutputInfo.res.zw;
+ vec2 uv = (gl_FragCoord.xy * 2.0 - res) / min(res.x, res.y);
+ vec2 uv0 = uv;
+ vec3 finalColor = vec3(0.0);
+
+ float bass = texture(sTD2DInputs[1], vec2(0.05, 0.25)).r;
+ float mids = texture(sTD2DInputs[1], vec2(0.25, 0.25)).r;
+ float highs = texture(sTD2DInputs[1], vec2(0.65, 0.25)).r;
+
+ float ca = cos(t * (0.15 + mids * 0.3));
+ float sa = sin(t * (0.15 + mids * 0.3));
+ uv = mat2(ca, -sa, sa, ca) * uv;
+
+ for (float i = 0.0; i < 4.0; i++) {
+ uv = fract(uv * (1.4 + bass * 0.3)) - 0.5;
+ float d = length(uv) * exp(-length(uv0));
+ float freq = texture(sTD2DInputs[1], vec2(clamp(d*0.5, 0.0, 1.0), 0.25)).r;
+ vec3 col = palette(length(uv0) + i * 0.4 + t * 0.35);
+ d = sin(d * (7.0 + bass * 4.0) + t * 1.5) / 8.0;
+ d = abs(d);
+ d = pow(0.012 / d, 1.2 + freq * 0.8 + bass * 0.5);
+ finalColor += col * d;
+ }
+
+ float glow = (0.03 + bass * 0.05) / (length(uv0) + 0.03);
+ finalColor += vec3(0.4, 0.1, 0.7) * glow * (0.6 + 0.4 * sin(t * 2.5));
+
+ float ring = abs(length(uv0) - 0.4 - mids * 0.3);
+ finalColor += vec3(0.1, 0.6, 0.8) * (0.005 / ring) * (0.2 + highs * 0.5);
+
+ finalColor *= smoothstep(0.0, 1.0, 1.0 - dot(uv0*0.55, uv0*0.55));
+ finalColor = finalColor / (finalColor + vec3(1.0));
+
+ fragColor = TDOutputSwizzle(vec4(finalColor, 1.0));
+}
+```
+
+**How spectrum sampling drives the visual:**
+- `texture(sTD2DInputs[1], vec2(x, 0.0)).r` — x position = frequency (0=bass, 1=treble)
+- Inner fractal iterations sample lower x → react to bass
+- Outer iterations sample higher x → react to treble
+- `bass * 0.3` on `fract()` scale → fractal zoom pulses with bass
+- `bass * 4.0` on sin frequency → line density pulses with bass
+- `mids * 0.3` on rotation speed → spiral twists faster during vocal/mid sections
+- `highs * 0.5` on ring opacity → high-frequency sparkle on outer ring
+
+**Recording the output:** Use MovieFileOut TOP with `mjpa` codec (H.264 requires Commercial license). See pitfalls #25-27.
+
+## GLSL Shaders
+
+### Pattern 10: Custom Fragment Shader
+
+Write a custom visual effect as a GLSL fragment shader.
+
+```
+Text DAT (shader code) -> GLSL TOP -> Level TOP -> Null TOP (out)
+ + optional input TOPs for texture sampling
+```
+
+**Common GLSL uniforms available in TouchDesigner:**
+
+```glsl
+// Automatically provided by TD
+uniform vec4 uTDOutputInfo; // .res.zw = resolution
+
+// NOTE: uTDCurrentTime does NOT exist in TD 099!
+// Feed time via a 1x1 Constant TOP (format=rgba32float):
+// t.par.colorr.expr = "absTime.seconds % 1000.0"
+// t.par.colorg.expr = "int(absTime.seconds / 1000.0)"
+// Then read in GLSL:
+// vec4 td = texture(sTD2DInputs[0], vec2(0.5));
+// float t = td.r + td.g * 1000.0;
+
+// Input textures (from connected TOP inputs)
+uniform sampler2D sTD2DInputs[1]; // array of input samplers
+
+// From vertex shader
+in vec3 vUV; // UV coordinates (0-1 range)
+```
+
+**Example: Plasma shader (using time from input texture)**
+
+```glsl
+layout(location = 0) out vec4 fragColor;
+
+void main() {
+ vec2 uv = vUV.st;
+ // Read time from Constant TOP input 0 (rgba32float format)
+ vec4 td = texture(sTD2DInputs[0], vec2(0.5));
+ float t = td.r + td.g * 1000.0;
+
+ float v1 = sin(uv.x * 10.0 + t);
+ float v2 = sin(uv.y * 10.0 + t * 0.7);
+ float v3 = sin((uv.x + uv.y) * 10.0 + t * 1.3);
+ float v4 = sin(length(uv - 0.5) * 20.0 - t * 2.0);
+
+ float v = (v1 + v2 + v3 + v4) * 0.25;
+
+ vec3 color = vec3(
+ sin(v * 3.14159 + 0.0) * 0.5 + 0.5,
+ sin(v * 3.14159 + 2.094) * 0.5 + 0.5,
+ sin(v * 3.14159 + 4.189) * 0.5 + 0.5
+ );
+
+ fragColor = vec4(color, 1.0);
+}
+```
+
+### Pattern 11: Multi-Pass GLSL (Ping-Pong)
+
+For effects needing state across frames (particles, fluid, cellular automata), use GLSL Multi TOP with multiple passes or a Feedback TOP loop.
+
+```
+GLSL Multi TOP (pass 0: simulation, pass 1: rendering)
+ + Text DAT (simulation shader)
+ + Text DAT (render shader)
+ -> Level TOP -> Null TOP (out)
+ ^
+ |__ Feedback TOP (feeds simulation state back)
+```
+
+## Interactive Installations
+
+### Pattern 12: Mouse/Touch -> Visual Response
+
+```
+Mouse In CHOP -> Math CHOP (normalize to 0-1) -> [export to visual params]
+
+# Or for touch/multi-touch:
+Multi Touch In DAT -> Script CHOP (parse touches) -> [export to visual params]
+```
+
+```python
+# Normalize mouse position to 0-1 range
+td_execute_python: """
+op('/project1/noise1').par.offsetx.expr = "op('/project1/mouse_norm')['tx']"
+op('/project1/noise1').par.offsety.expr = "op('/project1/mouse_norm')['ty']"
+"""
+```
+
+### Pattern 13: OSC Control (from external software)
+
+```
+OSC In CHOP (port 7000) -> Select CHOP (pick channels) -> [export to visual params]
+```
+
+```
+1. td_create_operator(parent="/project1", type="oscinChop", name="osc_in")
+2. td_set_operator_pars(path="/project1/osc_in", properties={"port": 7000})
+
+# OSC messages like /frequency 440 will appear as channel "frequency" with value 440
+# Export to any parameter:
+3. td_execute_python: "op('/project1/noise1').par.period.expr = \"op('/project1/osc_in')['frequency']\""
+```
+
+### Pattern 14: MIDI Control (DJ/VJ)
+
+```
+MIDI In CHOP (device) -> Select CHOP -> [export channels to visual params]
+```
+
+Common MIDI mappings:
+- CC channels (knobs/faders): continuous 0-127, map to float params
+- Note On/Off: binary triggers, map to Trigger CHOP for envelopes
+- Velocity: intensity/brightness
+
+## Live Performance
+
+### Pattern 15: Multi-Source VJ Setup
+
+```
+Source A (generative) ----+
+Source B (video) ---------+-- Switch/Cross TOP -- Level TOP -- Window COMP (output)
+Source C (camera) --------+
+ ^
+ MIDI/OSC control selects active source and crossfade
+```
+
+```python
+# MIDI CC1 controls which source is active (0-127 -> 0-2)
+td_execute_python: """
+op('/project1/switch1').par.index.expr = "int(op('/project1/midi_in')['cc1'] / 42)"
+"""
+
+# MIDI CC2 controls crossfade between current and next
+td_execute_python: """
+op('/project1/cross1').par.cross.expr = "op('/project1/midi_in')['cc2'] / 127.0"
+"""
+```
+
+### Pattern 16: Projection Mapping
+
+```
+Content TOPs ----+
+ |
+Stoner TOP (UV mapping) -> Composite TOP -> Window COMP (projector output)
+ or
+Kantan Mapper COMP (external .tox)
+```
+
+For projection mapping, the key is:
+1. Create your visual content as standard TOPs
+2. Use Stoner TOP or a third-party mapping tool to UV-map content to physical surfaces
+3. Output via Window COMP to the projector
+
+### Pattern 17: Cue System
+
+```
+Table DAT (cue list: cue_number, scene_name, duration, transition_type)
+ |
+Script CHOP (cue state: current_cue, progress, next_cue_trigger)
+ |
+[export to Switch/Cross TOPs to transition between scenes]
+```
+
+```python
+td_execute_python: """
+# Simple cue system
+cue_table = op('/project1/cue_list')
+cue_state = op('/project1/cue_state')
+
+def advance_cue():
+ current = int(cue_state.par.value0.val)
+ next_cue = min(current + 1, cue_table.numRows - 1)
+ cue_state.par.value0.val = next_cue
+
+ scene = cue_table[next_cue, 'scene']
+ duration = float(cue_table[next_cue, 'duration'])
+
+ # Set crossfade target and duration
+ op('/project1/cross1').par.cross.val = 0
+ # Animate cross to 1.0 over duration seconds
+ # (use a Timer CHOP or LFO CHOP for smooth animation)
+"""
+```
+
+## Networking
+
+### Pattern 18: OSC Server/Client
+
+```
+# Sending OSC
+OSC Out CHOP -> (network) -> external application
+
+# Receiving OSC
+(network) -> OSC In CHOP -> Select CHOP -> [use values]
+```
+
+### Pattern 19: NDI Video Streaming
+
+```
+# Send video over network
+[any TOP chain] -> NDI Out TOP (source name)
+
+# Receive video from network
+NDI In TOP (select source) -> [process as normal TOP]
+```
+
+### Pattern 20: WebSocket Communication
+
+```
+WebSocket DAT -> Script DAT (parse JSON messages) -> [update visuals]
+```
+
+```python
+td_execute_python: """
+ws = op('/project1/websocket1')
+ws.par.address = 'ws://localhost:8080'
+ws.par.active = True
+
+# In a DAT Execute callback (Script DAT watching WebSocket DAT):
+# def onTableChange(dat):
+# import json
+# msg = json.loads(dat.text)
+# op('/project1/noise1').par.seed.val = msg.get('seed', 0)
+"""
+```
diff --git a/optional-skills/creative/touchdesigner-mcp/references/operators.md b/optional-skills/creative/touchdesigner-mcp/references/operators.md
new file mode 100644
index 00000000000..6aa716cb9a2
--- /dev/null
+++ b/optional-skills/creative/touchdesigner-mcp/references/operators.md
@@ -0,0 +1,239 @@
+# TouchDesigner Operator Reference
+
+## Operator Families Overview
+
+TouchDesigner has 6 operator families. Each family processes a specific data type and is color-coded in the UI. Operators can only connect to others of the SAME family (with cross-family converters as the bridge).
+
+## TOPs — Texture Operators (Purple)
+
+2D image/texture processing on the GPU. The workhorse of visual output.
+
+### Generators (create images from nothing)
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Noise TOP | `noiseTop` | `type` (0-6), `monochrome`, `seed`, `period`, `harmonics`, `exponent`, `amp`, `offset`, `resolutionw/h` | Procedural noise textures — Perlin, Simplex, Sparse, etc. Foundation of generative art. |
+| Constant TOP | `constantTop` | `colorr/g/b/a`, `resolutionw/h` | Solid color. Use as background or blend input. |
+| Text TOP | `textTop` | `text`, `fontsizex`, `fontfile`, `alignx/y`, `colorr/g/b` | Render text to texture. Supports multi-line, word wrap. |
+| Ramp TOP | `rampTop` | `type` (0=horizontal, 1=vertical, 2=radial, 3=circular), `phase`, `period` | Gradient textures for masking, color mapping. |
+| Circle TOP | `circleTop` | `radiusx/y`, `centerx/y`, `width` | Circles, rings, ellipses. |
+| Rectangle TOP | `rectangleTop` | `sizex/y`, `centerx/y`, `softness` | Rectangles with optional softness. |
+| GLSL TOP | `glslTop` | `dat` (points to shader DAT), `resolutionw/h`, `outputformat`, custom uniforms | Custom fragment shaders. Most powerful TOP for custom visuals. |
+| GLSL Multi TOP | `glslmultiTop` | `dat`, `numinputs`, `numoutputs`, `numcomputepasses` | Multi-pass GLSL with compute shaders. Advanced. |
+| Render TOP | `renderTop` | `camera`, `geometry`, `lights`, `resolutionw/h` | Renders 3D scenes (SOPs + MATs + Camera/Light COMPs). |
+
+### Filters (modify a single input)
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Level TOP | `levelTop` | `opacity`, `brightness1/2`, `gamma1/2`, `contrast`, `invert`, `blacklevel/whitelevel` | Brightness, contrast, gamma, levels. Essential color correction. |
+| Blur TOP | `blurTop` | `sizex/y`, `type` (0=Gaussian, 1=Box, 2=Bartlett) | Gaussian/box blur. |
+| Transform TOP | `transformTop` | `tx/ty`, `sx/sy`, `rz`, `pivotx/y`, `extend` (0=Hold, 1=Zero, 2=Repeat, 3=Mirror) | Translate, scale, rotate textures. |
+| HSV Adjust TOP | `hsvadjustTop` | `hueoffset`, `saturationmult`, `valuemult` | HSV color adjustments. |
+| Lookup TOP | `lookupTop` | (input: texture + lookup table) | Color remapping via lookup table texture. |
+| Edge TOP | `edgeTop` | `type` (0=Sobel, 1=Frei-Chen) | Edge detection. |
+| Displace TOP | `displaceTop` | `scalex/y` | Pixel displacement using a second input as displacement map. |
+| Flip TOP | `flipTop` | `flipx`, `flipy`, `flop` (diagonal) | Mirror/flip textures. |
+| Crop TOP | `cropTop` | `cropleft/right/top/bottom` | Crop region of texture. |
+| Resolution TOP | `resolutionTop` | `resolutionw/h`, `outputresolution` | Resize textures. |
+| Null TOP | `nullTop` | (none significant) | Pass-through. Use for organization, referencing, feedback delay. |
+| Cache TOP | `cacheTop` | `length`, `step` | Store N frames of history. Useful for trails, time effects. |
+
+### Compositors (combine multiple inputs)
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Composite TOP | `compositeTop` | `operand` (0-31: Over, Add, Multiply, Screen, etc.) | Blend two textures with standard compositing modes. |
+| Over TOP | `overTop` | (simple alpha compositing) | Layer with alpha. Simpler than Composite. |
+| Add TOP | `addTop` | (additive blend) | Additive blending. Great for glow, light effects. |
+| Multiply TOP | `multiplyTop` | (multiplicative blend) | Multiply blend. Good for masking, darkening. |
+| Switch TOP | `switchTop` | `index` (0-based) | Switch between multiple inputs by index. |
+| Cross TOP | `crossTop` | `cross` (0.0-1.0) | Crossfade between two inputs. |
+
+### I/O (input/output)
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Movie File In TOP | `moviefileinTop` | `file`, `speed`, `trim`, `index` | Load video files, image sequences. |
+| Movie File Out TOP | `moviefileoutTop` | `file`, `type` (codec), `record` (toggle) | Record/export video files. |
+| NDI In TOP | `ndiinTop` | `sourcename` | Receive NDI video streams. |
+| NDI Out TOP | `ndioutTop` | `sourcename` | Send NDI video streams. |
+| Syphon Spout In/Out TOP | `syphonspoutinTop` / `syphonspoutoutTop` | `servername` | Inter-app texture sharing. |
+| Video Device In TOP | `videodeviceinTop` | `device` | Webcam/capture card input. |
+| Feedback TOP | `feedbackTop` | `top` (path to the TOP to feed back) | One-frame delay feedback. Essential for recursive effects. |
+
+### Converters
+
+| Operator | Type Name | Direction | Use |
+|----------|-----------|-----------|-----|
+| CHOP to TOP | `choptopTop` | CHOP -> TOP | Visualize channel data as texture (waveform, spectrum display). |
+| TOP to CHOP | `topchopChop` | TOP -> CHOP | Sample texture pixels as channel data. |
+
+## CHOPs — Channel Operators (Green)
+
+Time-varying numeric data: audio, animation curves, sensor data, control signals.
+
+### Generators
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Constant CHOP | `constantChop` | `name0/value0`, `name1/value1`... | Static named channels. Control panel for parameters. |
+| LFO CHOP | `lfoChop` | `frequency`, `type` (0=Sin, 1=Tri, 2=Square, 3=Ramp, 4=Pulse), `amp`, `offset`, `phase` | Low frequency oscillator. Animation driver. |
+| Noise CHOP | `noiseChop` | `type`, `roughness`, `period`, `amp`, `seed`, `channels` | Smooth random motion. Organic animation. |
+| Pattern CHOP | `patternChop` | `type` (0=Sine, 1=Triangle, ...), `length`, `cycles` | Generate waveform patterns. |
+| Timer CHOP | `timerChop` | `length`, `play`, `cue`, `cycles` | Countdown/count-up timer with cue points. |
+| Count CHOP | `countChop` | `threshold`, `limittype`, `limitmin/max` | Event counter with wrapping/clamping. |
+
+### Audio
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Audio File In CHOP | `audiofileinChop` | `file`, `volume`, `play`, `speed`, `trim` | Play audio files. |
+| Audio Device In CHOP | `audiodeviceinChop` | `device`, `channels` | Live microphone/line input. |
+| Audio Spectrum CHOP | `audiospectrumChop` | `size` (FFT size), `outputformat` (0=Power, 1=Magnitude) | FFT frequency analysis. |
+| Audio Band EQ CHOP | `audiobandeqChop` | `bands`, `gaindb` per band | Frequency band isolation. |
+| Audio Device Out CHOP | `audiodeviceoutChop` | `device` | Audio playback output. |
+
+### Math/Logic
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Math CHOP | `mathChop` | `preoff`, `gain`, `postoff`, `chanop` (0=Off, 1=Add, 2=Subtract, 3=Multiply...) | Math operations on channels. The Swiss army knife. |
+| Logic CHOP | `logicChop` | `preop` (0=Off, 1=AND, 2=OR, 3=XOR, 4=NAND), `convert` | Boolean logic on channels. |
+| Filter CHOP | `filterChop` | `type` (0=Low Pass, 1=Band Pass, 2=High Pass, 3=Notch), `cutofffreq`, `filterwidth` | Smooth, dampen, filter signals. |
+| Lag CHOP | `lagChop` | `lag1/2`, `overshoot1/2` | Smooth transitions with overshoot. |
+| Limit CHOP | `limitChop` | `type` (0=Clamp, 1=Loop, 2=ZigZag), `min/max` | Clamp or wrap channel values. |
+| Speed CHOP | `speedChop` | (none significant) | Integrate values (velocity to position, acceleration to velocity). |
+| Trigger CHOP | `triggerChop` | `attack`, `peak`, `decay`, `sustain`, `release` | ADSR envelope from trigger events. |
+| Select CHOP | `selectChop` | `chop` (path), `channames` | Reference channels from another CHOP. |
+| Merge CHOP | `mergeChop` | `align` (0=Extend, 1=Trim to First, 2=Trim to Shortest) | Combine channels from multiple CHOPs. |
+| Null CHOP | `nullChop` | (none significant) | Pass-through for organization and referencing. |
+
+### Input Devices
+
+| Operator | Type Name | Use |
+|----------|-----------|-----|
+| Mouse In CHOP | `mouseinChop` | Mouse position, buttons, wheel. |
+| Keyboard In CHOP | `keyboardinChop` | Keyboard key states. |
+| MIDI In CHOP | `midiinChop` | MIDI note/CC input. |
+| OSC In CHOP | `oscinChop` | OSC message input (network). |
+
+## SOPs — Surface Operators (Blue)
+
+3D geometry: points, polygons, NURBS, meshes.
+
+### Generators
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Grid SOP | `gridSop` | `rows`, `cols`, `sizex/y`, `type` (0=Polygon, 1=Mesh, 2=NURBS) | Flat grid mesh. Foundation for displacement, instancing. |
+| Sphere SOP | `sphereSop` | `type`, `rows`, `cols`, `radius` | Sphere geometry. |
+| Box SOP | `boxSop` | `sizex/y/z` | Box geometry. |
+| Torus SOP | `torusSop` | `radiusx/y`, `rows`, `cols` | Donut shape. |
+| Circle SOP | `circleSop` | `type`, `radius`, `divs` | Circle/ring geometry. |
+| Line SOP | `lineSop` | `dist`, `points` | Line segments. |
+| Text SOP | `textSop` | `text`, `fontsizex`, `fontfile`, `extrude` | 3D text geometry. |
+
+### Modifiers
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Transform SOP | `transformSop` | `tx/ty/tz`, `rx/ry/rz`, `sx/sy/sz` | Transform geometry (translate, rotate, scale). |
+| Noise SOP | `noiseSop` | `type`, `amp`, `period`, `roughness` | Deform geometry with noise. |
+| Sort SOP | `sortSop` | `ptsort`, `primsort` | Reorder points/primitives. |
+| Facet SOP | `facetSop` | `unique`, `consolidate`, `computenormals` | Normals, consolidation, unique points. |
+| Merge SOP | `mergeSop` | (none significant) | Combine multiple geometry inputs. |
+| Null SOP | `nullSop` | (none significant) | Pass-through. |
+
+## DATs — Data Operators (White)
+
+Text, tables, scripts, network data.
+
+### Core
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Table DAT | `tableDat` | (edit content directly) | Spreadsheet-like data tables. |
+| Text DAT | `textDat` | (edit content directly) | Arbitrary text content. Shader code, configs, scripts. |
+| Script DAT | `scriptDat` | `language` (0=Python, 1=C++) | Custom callbacks and DAT processing. |
+| CHOP Execute DAT | `chopexecDat` | `chop` (path to watch), callbacks | Trigger Python on CHOP value changes. |
+| DAT Execute DAT | `datexecDat` | `dat` (path to watch) | Trigger Python on DAT content changes. |
+| Panel Execute DAT | `panelexecDat` | `panel` | Trigger Python on UI panel events. |
+
+### I/O
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Web DAT | `webDat` | `url`, `fetchmethod` (0=GET, 1=POST) | HTTP requests. API integration. |
+| TCP/IP DAT | `tcpipDat` | `address`, `port`, `mode` | TCP networking. |
+| OSC In DAT | `oscinDat` | `port` | Receive OSC as text messages. |
+| Serial DAT | `serialDat` | `port`, `baudrate` | Serial port communication (Arduino, etc.). |
+| File In DAT | `fileinDat` | `file` | Read text files. |
+| File Out DAT | `fileoutDat` | `file`, `write` | Write text files. |
+
+### Conversions
+
+| Operator | Type Name | Direction | Use |
+|----------|-----------|-----------|-----|
+| DAT to CHOP | `dattochopChop` | DAT -> CHOP | Convert table data to channels. |
+| CHOP to DAT | `choptodatDat` | CHOP -> DAT | Convert channel data to table rows. |
+| SOP to DAT | `soptodatDat` | SOP -> DAT | Extract geometry data as table. |
+
+## MATs — Material Operators (Yellow)
+
+Materials for 3D rendering in Render TOP / Geometry COMP.
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Phong MAT | `phongMat` | `diff_colorr/g/b`, `spec_colorr/g/b`, `shininess`, `colormap`, `normalmap` | Classic Phong shading. Simple, fast. |
+| PBR MAT | `pbrMat` | `basecolorr/g/b`, `metallic`, `roughness`, `normalmap`, `emitcolorr/g/b` | Physically-based rendering. Realistic materials. |
+| GLSL MAT | `glslMat` | `dat` (shader DAT), custom uniforms | Custom vertex + fragment shaders for 3D. |
+| Constant MAT | `constMat` | `colorr/g/b`, `colormap` | Flat unlit color/texture. No shading. |
+| Point Sprite MAT | `pointspriteMat` | `colormap`, `scale` | Render points as camera-facing sprites. Great for particles. |
+| Wireframe MAT | `wireframeMat` | `colorr/g/b`, `width` | Wireframe rendering. |
+| Depth MAT | `depthMat` | `near`, `far` | Render depth buffer as grayscale. |
+
+## COMPs — Component Operators (Gray)
+
+Containers, 3D scene elements, UI components.
+
+### 3D Scene
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Geometry COMP | `geometryComp` | `material` (path), `instancechop` (path), `instancing` (toggle) | Renders geometry with material. Instancing host. |
+| Camera COMP | `cameraComp` | `tx/ty/tz`, `rx/ry/rz`, `fov`, `near/far` | Camera for Render TOP. |
+| Light COMP | `lightComp` | `lighttype` (0=Point, 1=Directional, 2=Spot, 3=Cone), `dimmer`, `colorr/g/b` | Lighting for 3D scenes. |
+| Ambient Light COMP | `ambientlightComp` | `dimmer`, `colorr/g/b` | Ambient lighting. |
+| Environment Light COMP | `envlightComp` | `envmap` | Image-based lighting (IBL). |
+
+### Containers
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Container COMP | `containerComp` | `w`, `h`, `bgcolor1/2/3` | UI container. Holds other COMPs for panel layouts. |
+| Base COMP | `baseComp` | (none significant) | Generic container. Networks-inside-networks. |
+| Replicator COMP | `replicatorComp` | `template`, `operatorsdat` | Clone a template operator N times from a table. |
+
+### Utilities
+
+| Operator | Type Name | Key Parameters | Use |
+|----------|-----------|---------------|-----|
+| Window COMP | `windowComp` | `winw/h`, `winoffsetx/y`, `monitor`, `borders` | Output window for display/projection. |
+| Select COMP | `selectComp` | `rowcol`, `panel` | Select and display content from elsewhere. |
+| Engine COMP | `engineComp` | `tox`, `externaltox` | Load external .tox components. Sub-process isolation. |
+
+## Cross-Family Converter Summary
+
+| From | To | Operator | Type Name |
+|------|-----|----------|-----------|
+| CHOP | TOP | CHOP to TOP | `choptopTop` |
+| TOP | CHOP | TOP to CHOP | `topchopChop` |
+| DAT | CHOP | DAT to CHOP | `dattochopChop` |
+| CHOP | DAT | CHOP to DAT | `choptodatDat` |
+| SOP | CHOP | SOP to CHOP | `soptochopChop` |
+| CHOP | SOP | CHOP to SOP | `choptosopSop` |
+| SOP | DAT | SOP to DAT | `soptodatDat` |
+| DAT | SOP | DAT to SOP | `dattosopSop` |
+| SOP | TOP | (use Render TOP + Geometry COMP) | — |
+| TOP | SOP | TOP to SOP | `toptosopSop` |
diff --git a/optional-skills/creative/touchdesigner-mcp/references/pitfalls.md b/optional-skills/creative/touchdesigner-mcp/references/pitfalls.md
new file mode 100644
index 00000000000..33c9b5f4d87
--- /dev/null
+++ b/optional-skills/creative/touchdesigner-mcp/references/pitfalls.md
@@ -0,0 +1,508 @@
+# TouchDesigner MCP — Pitfalls & Lessons Learned
+
+Hard-won knowledge from real TD sessions. Read this before building anything.
+
+## Parameter Names
+
+### 1. NEVER hardcode parameter names — always discover
+
+Parameter names change between TD versions. What works in one build may not work in another. ALWAYS use td_get_par_info to discover actual names from TD.
+
+The agent's LLM training data contains WRONG parameter names. Do not trust them.
+
+Known historical differences (may vary further — always verify):
+| What docs/training say | Actual in some versions | Notes |
+|---------------|---------------|-------|
+| `dat` | `pixeldat` | GLSL TOP pixel shader DAT |
+| `colora` | `alpha` | Constant TOP alpha |
+| `sizex` / `sizey` | `size` | Blur TOP (single value) |
+| `fontr/g/b/a` | `fontcolorr/g/b/a` | Text TOP font color (r/g/b) |
+| `fontcolora` | `fontalpha` | Text TOP font alpha (NOT `fontcolora`) |
+| `bgcolora` | `bgalpha` | Text TOP bg alpha |
+| `value1name` | `vec0name` | GLSL TOP uniform name |
+
+### 2. twozero td_execute_python response format
+
+When calling `td_execute_python` via twozero MCP, successful responses return `(ok)` followed by FPS/error summary (e.g. `[fps 60.0/60] [0 err/0 warn]`), NOT the raw Python `result` dict. If you're parsing responses programmatically, check for the `(ok)` prefix — don't pattern-match on Python variable names from the script. Use `td_get_operator_info` or separate inspection calls to read back values.
+
+### 3. When using td_set_operator_pars, param names must match exactly
+
+Use td_get_par_info to discover them. The MCP tool validates parameter names and returns clear errors explaining what went wrong, unlike raw Python which crashes the whole script with tdAttributeError and stops execution. Always discover before setting.
+
+### 4. Use `safe_par()` pattern for cross-version compatibility
+
+```python
+def safe_par(node, name, value):
+ p = getattr(node.par, name, None)
+ if p is not None:
+ p.val = value
+ return True
+ return False
+```
+
+### 5. `td.tdAttributeError` crashes the whole script — use defensive access
+
+If you do `node.par.nonexistent = value`, TD raises `tdAttributeError` and stops the entire script. Prevention is better than catching:
+- Use `op()` instead of `opex()` — `op()` returns None on failure, `opex()` raises
+- Use `hasattr(node.par, 'name')` before accessing any parameter
+- Use `getattr(node.par, 'name', None)` with a default
+- Use the `safe_par()` pattern from pitfall #3
+
+```python
+# WRONG — crashes if param doesn't exist:
+node.par.nonexistent = value
+
+# CORRECT — defensive access:
+if hasattr(node.par, 'nonexistent'):
+ node.par.nonexistent = value
+```
+
+### 6. `outputresolution` is a string menu, not an integer
+
+```
+menuNames: ['useinput','eighth','quarter','half','2x','4x','8x','fit','limit','custom','parpanel']
+```
+Always use the string form. Setting `outputresolution = 9` may silently fail.
+```python
+node.par.outputresolution = 'custom' # correct
+node.par.resolutionw = 1280; node.par.resolutionh = 720
+```
+Discover valid values: `list(node.par.outputresolution.menuNames)`
+
+## GLSL Shaders
+
+### 7. `uTDCurrentTime` does NOT exist in GLSL TOP
+
+There is NO built-in time uniform for GLSL TOPs. GLSL MAT has `uTDGeneral.seconds` but that's NOT available in GLSL TOP context.
+
+**PRIMARY — GLSL TOP Vectors/Values page:**
+```python
+gl.par.value0name = 'uTime'
+gl.par.value0.expr = "absTime.seconds"
+# In GLSL: uniform float uTime;
+```
+
+**FALLBACK — Constant TOP texture (for complex time data):**
+
+CRITICAL: set format to `rgba32float` — default 8-bit clamps to 0-1:
+```python
+t = root.create(constantTOP, 'time_driver')
+t.par.format = 'rgba32float'
+t.par.outputresolution = 'custom'
+t.par.resolutionw = 1; t.par.resolutionh = 1
+t.par.colorr.expr = "absTime.seconds % 1000.0"
+t.outputConnectors[0].connect(glsl.inputConnectors[0])
+```
+
+### 8. GLSL compile errors are silent in the API
+
+The GLSL TOP shows a yellow warning triangle in the UI but `node.errors()` may return empty string. Check `node.warnings()` too, and create an Info DAT pointed at the GLSL TOP to read the actual compiler output.
+
+### 9. TD GLSL uses `vUV.st` not `gl_FragCoord` — and REQUIRES `TDOutputSwizzle()` on macOS
+
+Standard GLSL patterns don't work. TD provides:
+- `vUV.st` — UV coordinates (0-1)
+- `uTDOutputInfo.res.zw` — resolution
+- `sTD2DInputs[0]` — input textures
+- `layout(location = 0) out vec4 fragColor` — output
+
+CRITICAL on macOS: Always wrap output with `TDOutputSwizzle()`:
+```glsl
+fragColor = TDOutputSwizzle(color);
+```
+TD uses GLSL 4.60 (Vulkan backend). GLSL 3.30 and earlier removed.
+
+### 10. Large GLSL shaders — write to temp file
+
+GLSL code with special characters can corrupt JSON payloads. Write the shader to a temp file and load it in TD:
+```python
+# Agent side: write shader to /tmp/shader.glsl via write_file
+# TD side:
+sd = root.create(textDAT, 'shader_code')
+with open('/tmp/shader.glsl', 'r') as f:
+ sd.text = f.read()
+```
+
+## Node Management
+
+### 11. Destroying nodes while iterating `root.children` causes `tdError`
+
+The iterator is invalidated when a child is destroyed. Always snapshot first:
+```python
+kids = list(root.children) # snapshot
+for child in kids:
+ if child.valid: # check — earlier destroys may cascade
+ child.destroy()
+```
+
+### 11b. Split cleanup and creation into SEPARATE td_execute_python calls
+
+Creating nodes with the same names you just destroyed in the SAME script causes "Invalid OP object" errors — even with `list()` snapshot. TD's internal references can go stale within one execution context.
+
+**WRONG (single call):**
+```python
+# td_execute_python:
+for c in list(root.children):
+ if c.valid and c.name.startswith('promo_'):
+ c.destroy()
+# ... then create promo_audio, promo_shader etc. in same script → CRASHES
+```
+
+**CORRECT (two separate calls):**
+```python
+# Call 1: td_execute_python — clean only
+for c in list(root.children):
+ if c.valid and c.name.startswith('promo_'):
+ c.destroy()
+
+# Call 2: td_execute_python — build (separate MCP call)
+audio = root.create(audiofileinCHOP, 'promo_audio')
+# ... rest of build
+```
+
+### 12. Feedback TOP: use `top` parameter, NOT direct input wire
+
+The feedbackTOP's `top` parameter references which TOP to delay. Do NOT also wire that TOP directly into the feedback's input — this creates a real cook dependency loop.
+
+Correct setup:
+```python
+fb = root.create(feedbackTOP, 'fb_delay')
+fb.par.top = comp.path # reference only — no wire to fb input
+fb.outputConnectors[0].connect(xf) # fb output -> transform -> fade -> comp
+```
+
+The "Cook dependency loop detected" warning on the transform/fade chain is expected.
+
+### 13. GLSL TOP auto-creates companion nodes
+
+Creating a `glslTOP` also creates `name_pixel` (Text DAT), `name_info` (Info DAT), and `name_compute` (Text DAT). These are visible in the network. Don't be alarmed by "extra" nodes.
+
+### 14. The default project root is `/project1`
+
+New TD files start with `/project1` as the main container. System nodes live at `/`, `/ui`, `/sys`, `/local`, `/perform`. Don't create user nodes outside `/project1`.
+
+### 15. Non-Commercial license caps resolution at 1280x1280
+
+Setting `resolutionw=1920` silently clamps to 1280. Always check effective resolution after creation:
+```python
+n.cook(force=True)
+actual = str(n.width) + 'x' + str(n.height)
+```
+
+## Recording & Codecs
+
+### 16. MovieFileOut TOP: H.264/H.265/AV1 requires Commercial license
+
+In Non-Commercial TD, these codecs produce an error. Recommended alternatives:
+- `prores` — Apple ProRes, **best on macOS**, HW accelerated, NOT license-restricted. ~55MB/s at 1280x720 but lossless quality. **Use this as default on macOS.**
+- `cineform` — GoPro Cineform, supports alpha
+- `hap` — GPU-accelerated playback, large files
+- `notchlc` — GPU-accelerated, good quality
+- `mjpa` — Motion JPEG, legacy fallback (lossy, use only if ProRes unavailable)
+
+For image sequences: `rec.par.type = 'imagesequence'`, `rec.par.imagefiletype = 'png'`
+
+### 17. MovieFileOut `.record()` method may not exist
+
+Use the toggle parameter instead:
+```python
+rec.par.record = True # start recording
+rec.par.record = False # stop recording
+```
+
+When setting file path and starting recording in the same script, use delayFrames:
+```python
+rec.par.file = '/tmp/new_output.mov'
+run("op('/project1/recorder').par.record = True", delayFrames=2)
+```
+
+### 18. TOP.save() captures same frame when called rapidly
+
+Use MovieFileOut for real-time recording. Set `project.realTime = False` for frame-accurate output.
+
+### 19. AudioFileIn CHOP: cue and recording sequence matters
+
+The recording sequence must be done in exact order, or the recording will be empty, audio will start mid-file, or the file won't be written.
+
+**Proven recording sequence:**
+
+```python
+# Step 1: Stop any existing recording
+rec.par.record = False
+
+# Step 2: Reset audio to beginning
+audio.par.play = False
+audio.par.cue = True
+audio.par.cuepoint = 0 # may need cuepointunit=0 too
+# Verify: audio.par.cue.eval() should be True
+
+# Step 3: Set output file path
+rec.par.file = '/tmp/output.mov'
+
+# Step 4: Release cue + start playing + start recording (with frame delay)
+audio.par.cue = False
+audio.par.play = True
+audio.par.playmode = 2 # Sequential — plays once through
+run("op('/project1/recorder').par.record = True", delayFrames=3)
+```
+
+**Why each step matters:**
+- `rec.par.record = False` first — if a previous recording is active, setting `par.file` may fail silently
+- `audio.par.cue = True` + `cuepoint = 0` — guarantees audio starts from the beginning, otherwise the spectrum may be silent for the first few seconds
+- `delayFrames=3` on the record start — setting `par.file` and `par.record = True` in the same script can race; the file path needs a frame to register before recording starts
+- `playmode = 2` (Sequential) — plays the file once. Use `playmode = 0` (Locked to Timeline) if you want TD's timeline to control position
+
+## TD Python API Patterns
+
+### 20. COMP extension setup: ext0object format is CRITICAL
+
+`ext0object` expects a CONSTANT string (NOT expression mode):
+```python
+comp.par.ext0object = "op('./myExtensionDat').module.MyClassName(me)"
+```
+NEVER set as just the DAT name. NEVER use ParMode.EXPRESSION. ALWAYS ensure the DAT has `par.language='python'`.
+
+### 21. td.Panel is NOT subscriptable — use attribute access
+
+```python
+comp.panel.select # correct (attribute access, returns float)
+comp.panel['select'] # WRONG — 'td.Panel' object is not subscriptable
+```
+
+### 22. ALWAYS use relative paths in script callbacks
+
+In scriptTOP/CHOP/SOP/DAT callbacks, use paths relative to `scriptOp` or `me`:
+```python
+root = scriptOp.parent().parent()
+dat = root.op('pixel_data')
+```
+NEVER hardcode absolute paths like `op('/project1/myComp/child')` — they break when containers are renamed or copied.
+
+### 23. keyboardinCHOP channel names have 'k' prefix
+
+Channel names are `kup`, `kdown`, `kleft`, `kright`, `ka`, `kb`, etc. — NOT `up`, `down`, `a`, `b`. Always verify with:
+```python
+channels = [c.name for c in op('/project1/keyboard1').chans()]
+```
+
+### 24. expressCHOP cook-only properties — false positive errors
+
+`me.inputVal`, `me.chanIndex`, `me.sampleIndex` work ONLY in cook-context. Calling `par.expr0expr.eval()` from outside always raises an error — this is NOT a real operator error. Ignore these in error scans.
+
+### 25. td.Vertex attributes — use index access not named attributes
+
+In TD 2025.32, `td.Vertex` objects do NOT have `.x`, `.y`, `.z` attributes:
+```python
+# WRONG — crashes:
+vertex.x, vertex.y, vertex.z
+
+# CORRECT — index-based:
+vertex.point.P[0], vertex.point.P[1], vertex.point.P[2]
+# Or for SOP point positions:
+pt = sop.points()[i]
+pos = pt.P # use P[0], P[1], P[2]
+```
+
+## Audio
+
+### 26. Audio Spectrum CHOP output is weak — boost it
+
+Raw output is very small (0.001-0.05). Use built-in boost: `spectrum.par.highfrequencyboost = 3.0`
+
+If still weak, add Math CHOP in Range mode: `fromrangehi=0.05, torangehi=1.0`
+
+### 27. AudioSpectrum CHOP: timeslice and sample count are the #1 gotcha
+
+AudioSpectrum at 44100Hz with `timeslice=False` outputs the ENTIRE audio file as samples (~24000+). CHOP-to-TOP then exceeds texture resolution max and warns/fails.
+
+**Fix:** Keep `timeslice = True` (default) for real-time per-frame FFT. Set `fftsize` to control bin count (it's a STRING enum: `'256'` not `256`).
+
+If the CHOP-to-TOP still gets too many samples, set `layout = 'rowscropped'` on the choptoTOP.
+
+```python
+spectrum.par.fftsize = '256' # STRING, not int — enum values
+spectrum.par.timeslice = True # MUST be True for real-time audio reactivity
+spectex.par.layout = 'rowscropped' # handles oversized CHOP inputs
+```
+
+**resampleCHOP has NO `numsamples` param.** It uses `rate`, `start`, `end`, `method`. Don't guess — always `td_get_par_info('resampleCHOP')` first.
+
+### 28. CHOP To TOP has NO input connectors — use par.chop reference
+
+```python
+spec_tex = root.create(choptoTOP, 'spectrum_tex')
+spec_tex.par.chop = resample # correct: parameter reference
+# NOT: resample.outputConnectors[0].connect(spec_tex.inputConnectors[0]) # WRONG
+```
+
+## Workflow
+
+### 29. Always verify after building — errors are silent
+
+Node errors and broken connections produce no output. Always check:
+```python
+for c in list(root.children):
+ e = c.errors()
+ w = c.warnings()
+ if e: print(c.name, 'ERR:', e)
+ if w: print(c.name, 'WARN:', w)
+```
+
+### 30. Window COMP param for display target is `winop`
+
+```python
+win = root.create(windowCOMP, 'display')
+win.par.winop = '/project1/logo_out'
+win.par.winw = 1280; win.par.winh = 720
+win.par.winopen.pulse()
+```
+
+### 31. `sample()` returns frozen pixels in rapid calls
+
+`out.sample(x, y)` returns pixels from a single cook snapshot. Compare samples with 2+ second delays, or use screencapture on the display window.
+
+### 32. Audio-reactive GLSL: dual-layer sync pipeline
+
+For audio-synced visuals, use BOTH layers for maximum effect:
+
+**Layer 1 (TD-side, real-time):** AudioFileIn → AudioSpectrum(timeslice=True, fftsize='256') → Math(gain=5) → choptoTOP(par.chop=math, layout='rowscropped') → GLSL input. The shader samples `sTD2DInputs[1]` at different x positions for bass/mid/hi. Record the TD output with MovieFileOut.
+
+**Layer 2 (Python-side, post-hoc):** scipy FFT on the SAME audio file → per-frame features (rms, bass, mid, hi, beat detection) → drive ASCII brightness, chromatic aberration, beat flashes during the render pass.
+
+Both layers locked to the same audio file = visuals genuinely sync to the beat at two independent stages.
+
+**Key gotcha:** AudioFileIn must be cued (`par.cue=True` → `par.cuepulse.pulse()`) then uncued (`par.cue=False`, `par.play=True`) before recording starts. Otherwise the spectrum is silent for the first few seconds.
+
+### 33. twozero MCP: benchmark and prefer native tools
+
+Benchmarked April 2026: twozero MCP with 36 native tools. The old curl/REST method (port 9981) had zero native tools.
+
+**Always prefer native MCP tools over td_execute_python:**
+- `td_create_operator` over `root.create()` scripts (handles viewport positioning)
+- `td_set_operator_pars` over `node.par.X = Y` scripts (validates param names)
+- `td_get_par_info` over temp-node discovery dance (instant, no cleanup)
+- `td_get_errors` over manual `c.errors()` loops
+- `td_get_focus` for context awareness (no equivalent in old method)
+
+Only fall back to `td_execute_python` for multi-step logic (wiring chains, conditional builds, loops).
+
+### 34. twozero td_execute_python response wrapping
+
+twozero wraps `td_execute_python` responses with status info: `(ok)\n\n[fps 60.0/60] [0 err/0 warn]`. Your Python `result` variable value may not appear verbatim in the response text. If you need to check results programmatically, use `print()` statements in the script — they appear in the response. Don't rely on string-matching the `result` dict.
+
+### 35. Audio-reactive chain: DO NOT use Lag CHOP or Filter CHOP for spectrum smoothing
+
+The Derivative docs and tutorials suggest using Lag CHOP (lag1=0.2, lag2=0.5) to smooth raw FFT output before passing to a shader. **This does NOT work with AudioSpectrum → CHOP to TOP → GLSL.**
+
+What happens: Lag CHOP operates in timeslice mode. A 256-sample spectrum input gets expanded to 1600-2400 samples. The Lag averaging drives all values to near-zero (~1e-06). The CHOP to TOP produces a 2400x2 texture instead of 256x2. The shader receives effectively zero audio data.
+
+**The correct chain is: Spectrum(outlength=256) → Math(gain=10) → CHOPtoTOP → GLSL.** No CHOP smoothing at all. If you need smoothing, do it in the GLSL shader via temporal lerp with a feedback texture.
+
+Verified values with audio playing:
+- Without Lag CHOP: bass bins = 5.0-5.4, mid bins = 1.0-1.7 (strong, usable)
+- With Lag CHOP: ALL bins = 0.000001-0.00004 (dead, zero audio reactivity)
+
+### 36. AudioSpectrum Output Length: set manually to avoid CHOP to TOP overflow
+
+AudioSpectrum in Visualization mode with FFT 8192 outputs 22,050 samples by default (1 per Hz, 0–22050). CHOP to TOP cannot handle this — you get "Number of samples exceeded texture resolution max".
+
+Fix: `spectrum.par.outputmenu = 'setmanually'` and `spectrum.par.outlength = 256`. This gives 256 frequency bins — plenty for visual FFT.
+
+DO NOT set `timeslice = False` as a workaround — that processes the entire audio file at once and produces even more samples.
+
+### 37. GLSL spectrum texture from CHOP to TOP is 256x2 not 256x1
+
+AudioSpectrum outputs 2 channels (stereo: chan1, chan2). CHOP to TOP with `dataformat='r'` creates a 256x2 texture — one row per channel. Sample the first channel at `y=0.25` (center of first row), NOT `y=0.5` (boundary between rows):
+
+```glsl
+float bass = texture(sTD2DInputs[1], vec2(0.05, 0.25)).r; // correct
+float bass = texture(sTD2DInputs[1], vec2(0.05, 0.5)).r; // WRONG — samples between rows
+```
+
+### 38. FPS=0 doesn't mean ops aren't cooking — check play state
+
+TD can show `fps:0` in `td_get_perf` while ops still cook and `TOP.save()` still produces valid screenshots. The two most common causes:
+
+**a) Project is paused (playbar stopped).** TD's playbar can be toggled with spacebar. The `root` at `/` has no `.playbar` attribute (it's on the perform COMP). The easiest fix is sending a spacebar keypress via `td_input_execute`, though this tool can sometimes error. As a workaround, `TOP.save()` always works regardless of play state — use it to verify rendering is actually happening before spending time debugging FPS.
+
+**b) Audio device CHOP blocking the main thread.** An `audiooutCHOP` with an active audio device can consume 300-400ms/s (2000%+ of frame budget), stalling the cook loop at FPS=0. Fix: keep the CHOP active but set `volume=0` to prevent the audio driver from blocking. Disabling it entirely (`active=False`) may also work but can prevent downstream audio processing CHOPs from cooking.
+
+Diagnostic sequence when FPS=0:
+1. `td_get_perf` — check if any op has extreme CPU/s
+2. `TOP.save()` on the output — if it produces a valid image, the pipeline works, just not at real-time rate
+3. Check for blocking CHOPs (audioout, audiodevin, etc.)
+4. Toggle play state (spacebar, or check if absTime.seconds is advancing)
+
+### 39. Recording while FPS=0 produces empty or near-empty files
+
+This is the #1 cause of "I recorded for 30 seconds but got a 2-frame video." If TD's cook loop is stalled (FPS=0 or very low), MovieFileOut has nothing to record. Unlike `TOP.save()` which captures the last cooked frame regardless, MovieFileOut only writes frames that actually cook.
+
+**Always verify FPS before starting a recording:**
+```python
+# Check via td_get_perf first
+# If FPS < 30, do NOT start recording — fix the performance issue first
+# If FPS=0, the playbar is likely paused — see pitfall #37
+```
+
+Common causes of recording empty video:
+- Playbar paused (FPS=0) — see pitfall #37
+- Audio device CHOP blocking the main thread — see pitfall #37b
+- Recording started before audio was cued — audio is silent, GLSL outputs black, MovieFileOut records black frames that look empty
+- `par.file` set in the same script as `par.record = True` — see pitfall #18
+
+### 40. GLSL shader produces black output — test before committing to a long render
+
+New GLSL shaders can fail silently (see pitfall #7). Before recording a long take, always:
+
+1. **Write a minimal test shader first** that just outputs a solid color or pass-through:
+```glsl
+void main() {
+ vec2 uv = vUV.st;
+ fragColor = TDOutputSwizzle(vec4(uv, 0.0, 1.0));
+}
+```
+
+2. **Verify the test renders correctly** via `td_get_screenshot` on the GLSL TOP's output.
+
+3. **Swap in the real shader** and screenshot again immediately. If black, the shader has a compile error or logic issue.
+
+4. **Only then start recording.** A 90-second ProRes recording is ~5GB. Recording black frames wastes disk and time.
+
+Common causes of black GLSL output:
+- Missing `TDOutputSwizzle()` on macOS (pitfall #8)
+- Time uniform not connected — shader uses default 0.0, fractal stays at origin
+- Spectrum texture not connected — audio values all 0.0, driving everything to black
+- Integer division where float division was expected (`1/2 = 0` not `0.5`)
+- `absTime.seconds % 1000.0` rolled over past 1000 and the modulo produces unexpected values
+
+### 41. td_write_dat uses `text` parameter, NOT `content`
+
+The MCP tool `td_write_dat` expects a `text` parameter for full replacement. Passing `content` returns an error: `"Provide either 'text' for full replace, or 'old_text'+'new_text' for patching"`.
+
+If `td_write_dat` fails, fall back to `td_execute_python`:
+```python
+op("/project1/shader_code").text = shader_string
+```
+
+### 42. td_execute_python does NOT return stdout or print() output
+
+Despite what earlier versions of pitfall #33 stated, `print()` and `debug()` output from `td_execute_python` scripts does NOT appear in the MCP response. The response is always just `(ok)` + FPS/error summary. To read values back, use dedicated inspection tools (`td_get_operator_info`, `td_read_dat`, `td_read_chop`) instead of trying to print from within a script.
+
+### 43. td_get_operator_info JSON is appended with `[fps X.X/X]` — breaks json.loads()
+
+The response text from `td_get_operator_info` has `[fps 60.0/60]` appended after the JSON object. This causes `json.loads()` to fail with "Extra data" errors. Strip it before parsing:
+```python
+clean = response_text.rsplit('[fps', 1)[0]
+data = json.loads(clean)
+```
+
+### 44. td_get_screenshot is asynchronous — returns `{"status": "pending"}`
+
+Screenshots don't complete instantly. The tool returns `{"status": "pending", "requestId": "..."}` and the actual file appears later. Wait a few seconds before checking for the file. There is no callback or completion notification — poll the filesystem.
+
+### 45. Recording duration is manual — no auto-stop at audio end
+
+MovieFileOut records until `par.record = False` is set. If audio ends before you stop recording, the file keeps growing with repeated frames. Always stop recording promptly after the audio duration. For precision: set a timer on the agent side matching the audio length, then send `par.record = False`. Trim excess with ffmpeg as a safety net:
+```bash
+ffmpeg -i raw.mov -t 25 -c copy trimmed.mov
+```
\ No newline at end of file
diff --git a/optional-skills/creative/touchdesigner-mcp/references/python-api.md b/optional-skills/creative/touchdesigner-mcp/references/python-api.md
new file mode 100644
index 00000000000..f2955110b0e
--- /dev/null
+++ b/optional-skills/creative/touchdesigner-mcp/references/python-api.md
@@ -0,0 +1,463 @@
+# TouchDesigner Python API Reference
+
+## The td Module
+
+TouchDesigner's Python environment auto-imports the `td` module. All TD-specific classes, functions, and constants live here. Scripts inside TD (Script DATs, CHOP/DAT Execute callbacks, Extensions) have full access.
+
+When using the MCP `execute_python_script` tool, these globals are pre-loaded:
+- `op` — shortcut for `td.op()`, finds operators by path
+- `ops` — shortcut for `td.ops()`, finds multiple operators by pattern
+- `me` — the operator running the script (via MCP this is the twozero internal executor)
+- `parent` — shortcut for `me.parent()`
+- `project` — the root project component
+- `td` — the full td module
+
+## Finding Operators: op() and ops()
+
+### op(path) — Find a single operator
+
+```python
+# Absolute path (always works from MCP)
+node = op('/project1/noise1')
+
+# Relative path (relative to current operator — only in Script DATs)
+node = op('noise1') # sibling
+node = op('../noise1') # parent's sibling
+
+# Returns None if not found (does NOT raise)
+node = op('/project1/nonexistent') # None
+```
+
+### ops(pattern) — Find multiple operators
+
+```python
+# Glob patterns
+nodes = ops('/project1/noise*') # all nodes starting with "noise"
+nodes = ops('/project1/*') # all direct children
+nodes = ops('/project1/container1/*') # all children of container1
+
+# Returns a tuple of operators (may be empty)
+for n in ops('/project1/*'):
+ print(n.name, n.OPType)
+```
+
+### Navigation from a node
+
+```python
+node = op('/project1/noise1')
+
+node.name # 'noise1'
+node.path # '/project1/noise1'
+node.OPType # 'noiseTop'
+node.type #
+node.family # 'TOP'
+
+# Parent / children
+node.parent() # the parent COMP
+node.parent().children # all siblings + self
+node.parent().findChildren(name='noise*') # filtered
+
+# Type checking
+node.isTOP # True
+node.isCHOP # False
+node.isSOP # False
+node.isDAT # False
+node.isMAT # False
+node.isCOMP # False
+```
+
+## Parameters
+
+Every operator has parameters accessed via the `.par` attribute.
+
+### Reading parameters
+
+```python
+node = op('/project1/noise1')
+
+# Direct access
+node.par.seed.val # current evaluated value (may be an expression result)
+node.par.seed.eval() # same as .val
+node.par.seed.default # default value
+node.par.monochrome.val # boolean parameters: True/False
+
+# List all parameters
+for p in node.pars():
+ print(f"{p.name}: {p.val} (default: {p.default})")
+
+# Filter by page (parameter group)
+for p in node.pars('Noise'): # page name
+ print(f"{p.name}: {p.val}")
+```
+
+### Setting parameters
+
+```python
+# Direct value setting
+node.par.seed.val = 42
+node.par.monochrome.val = True
+node.par.resolutionw.val = 1920
+node.par.resolutionh.val = 1080
+
+# String parameters
+op('/project1/text1').par.text.val = 'Hello World'
+
+# File paths
+op('/project1/moviefilein1').par.file.val = '/path/to/video.mp4'
+
+# Reference another operator (for "dat", "chop", "top" type parameters)
+op('/project1/glsl1').par.dat.val = '/project1/shader_code'
+```
+
+### Parameter expressions
+
+```python
+# Python expressions that evaluate dynamically
+node.par.seed.expr = "me.time.frame"
+node.par.tx.expr = "math.sin(me.time.seconds * 2)"
+
+# Reference another parameter
+node.par.brightness1.expr = "op('/project1/constant1').par.value0.val"
+
+# Export (one-way binding from CHOP to parameter)
+# This makes the parameter follow a CHOP channel value
+op('/project1/noise1').par.seed.val # can also be driven by exports
+```
+
+### Parameter types
+
+| Type | Python Type | Example |
+|------|------------|---------|
+| Float | `float` | `node.par.brightness1.val = 0.5` |
+| Int | `int` | `node.par.seed.val = 42` |
+| Toggle | `bool` | `node.par.monochrome.val = True` |
+| String | `str` | `node.par.text.val = 'hello'` |
+| Menu | `int` (index) or `str` (label) | `node.par.type.val = 'sine'` |
+| File | `str` (path) | `node.par.file.val = '/path/to/file'` |
+| OP reference | `str` (path) | `node.par.dat.val = '/project1/text1'` |
+| Color | separate r/g/b/a floats | `node.par.colorr.val = 1.0` |
+| XY/XYZ | separate x/y/z floats | `node.par.tx.val = 0.5` |
+
+## Creating and Deleting Operators
+
+```python
+# Create via parent component
+parent = op('/project1')
+new_node = parent.create(noiseTop) # using class reference
+new_node = parent.create(noiseTop, 'my_noise') # with custom name
+
+# The MCP create_td_node tool handles this automatically:
+# create_td_node(parentPath="/project1", nodeType="noiseTop", nodeName="my_noise")
+
+# Delete
+node = op('/project1/my_noise')
+node.destroy()
+
+# Copy
+original = op('/project1/noise1')
+copy = parent.copy(original, name='noise1_copy')
+```
+
+## Connections (Wiring Operators)
+
+### Output to Input connections
+
+```python
+# Connect noise1's output to level1's input
+op('/project1/noise1').outputConnectors[0].connect(op('/project1/level1'))
+
+# Connect to specific input index (for multi-input operators like Composite)
+op('/project1/noise1').outputConnectors[0].connect(op('/project1/composite1').inputConnectors[0])
+op('/project1/text1').outputConnectors[0].connect(op('/project1/composite1').inputConnectors[1])
+
+# Disconnect all outputs
+op('/project1/noise1').outputConnectors[0].disconnect()
+
+# Query connections
+node = op('/project1/level1')
+inputs = node.inputs # list of connected input operators
+outputs = node.outputs # list of connected output operators
+```
+
+### Connection patterns for common setups
+
+```python
+# Linear chain: A -> B -> C -> D
+ops_list = [op(f'/project1/{name}') for name in ['noise1', 'level1', 'blur1', 'null1']]
+for i in range(len(ops_list) - 1):
+ ops_list[i].outputConnectors[0].connect(ops_list[i+1])
+
+# Fan-out: A -> B, A -> C, A -> D
+source = op('/project1/noise1')
+for target_name in ['level1', 'composite1', 'transform1']:
+ source.outputConnectors[0].connect(op(f'/project1/{target_name}'))
+
+# Merge: A + B + C -> Composite
+comp = op('/project1/composite1')
+for i, source_name in enumerate(['noise1', 'text1', 'ramp1']):
+ op(f'/project1/{source_name}').outputConnectors[0].connect(comp.inputConnectors[i])
+```
+
+## DAT Content Manipulation
+
+### Text DATs
+
+```python
+dat = op('/project1/text1')
+
+# Read
+content = dat.text # full text as string
+
+# Write
+dat.text = "new content"
+dat.text = '''multi
+line
+content'''
+
+# Append
+dat.text += "\nnew line"
+```
+
+### Table DATs
+
+```python
+dat = op('/project1/table1')
+
+# Read cell
+val = dat[0, 0] # row 0, col 0
+val = dat[0, 'name'] # row 0, column named 'name'
+val = dat['key', 1] # row named 'key', col 1
+
+# Write cell
+dat[0, 0] = 'value'
+
+# Read row/col
+row = dat.row(0) # list of Cell objects
+col = dat.col('name') # list of Cell objects
+
+# Dimensions
+rows = dat.numRows
+cols = dat.numCols
+
+# Append row
+dat.appendRow(['col1_val', 'col2_val', 'col3_val'])
+
+# Clear
+dat.clear()
+
+# Set entire table
+dat.clear()
+dat.appendRow(['name', 'value', 'type'])
+dat.appendRow(['frequency', '440', 'float'])
+dat.appendRow(['amplitude', '0.8', 'float'])
+```
+
+## Time and Animation
+
+```python
+# Global time
+td.absTime.frame # absolute frame number (never resets)
+td.absTime.seconds # absolute seconds
+
+# Timeline time (affected by play/pause/loop)
+me.time.frame # current frame on timeline
+me.time.seconds # current seconds on timeline
+me.time.rate # FPS setting
+
+# Timeline control (via execute_python_script)
+project.play = True
+project.play = False
+project.frameRange = (1, 300) # set timeline range
+
+# Cook frame (when operator was last computed)
+node.cookFrame
+node.cookTime
+```
+
+## Extensions (Custom Python Classes on Components)
+
+Extensions add custom Python methods and attributes to COMPs.
+
+```python
+# Create extension on a Base COMP
+base = op('/project1/myBase')
+
+# The extension class is defined in a Text DAT inside the COMP
+# Typically named 'ExtClass' with the extension code:
+
+extension_code = '''
+class MyExtension:
+ def __init__(self, ownerComp):
+ self.ownerComp = ownerComp
+ self.counter = 0
+
+ def Reset(self):
+ self.counter = 0
+
+ def Increment(self):
+ self.counter += 1
+ return self.counter
+
+ @property
+ def Count(self):
+ return self.counter
+'''
+
+# Write extension code to DAT inside the COMP
+op('/project1/myBase/extClass').text = extension_code
+
+# Configure the extension on the COMP
+base.par.extension1 = 'extClass' # name of the DAT
+base.par.promoteextension1 = True # promote methods to parent
+
+# Call extension methods
+base.Increment() # calls MyExtension.Increment()
+count = base.Count # accesses MyExtension.Count property
+base.Reset()
+```
+
+## Useful Built-in Modules
+
+### tdu — TouchDesigner Utilities
+
+```python
+import tdu
+
+# Dependency tracking (reactive values)
+dep = tdu.Dependency(initial_value)
+dep.val = new_value # triggers dependents to recook
+
+# File path utilities
+tdu.expandPath('$HOME/Desktop/output.mov')
+
+# Math
+tdu.clamp(value, min, max)
+tdu.remap(value, from_min, from_max, to_min, to_max)
+```
+
+### TDFunctions
+
+```python
+from TDFunctions import *
+
+# Commonly used utilities
+clamp(value, low, high)
+remap(value, inLow, inHigh, outLow, outHigh)
+interp(value1, value2, t) # linear interpolation
+```
+
+### TDStoreTools — Persistent Storage
+
+```python
+from TDStoreTools import StorageManager
+
+# Store data that survives project reload
+me.store('myKey', 'myValue')
+val = me.fetch('myKey', default='fallback')
+
+# Storage dict
+me.storage['key'] = value
+```
+
+## Common Patterns via execute_python_script
+
+### Build a complete chain
+
+```python
+# Create a complete audio-reactive noise chain
+parent = op('/project1')
+
+# Create operators
+audio_in = parent.create(audiofileinChop, 'audio_in')
+spectrum = parent.create(audiospectrumChop, 'spectrum')
+chop_to_top = parent.create(choptopTop, 'chop_to_top')
+noise = parent.create(noiseTop, 'noise1')
+level = parent.create(levelTop, 'level1')
+null_out = parent.create(nullTop, 'out')
+
+# Wire the chain
+audio_in.outputConnectors[0].connect(spectrum)
+spectrum.outputConnectors[0].connect(chop_to_top)
+noise.outputConnectors[0].connect(level)
+level.outputConnectors[0].connect(null_out)
+
+# Set parameters
+audio_in.par.file = '/path/to/music.wav'
+audio_in.par.play = True
+spectrum.par.size = 512
+noise.par.type = 1 # Sparse
+noise.par.monochrome = False
+noise.par.resolutionw = 1920
+noise.par.resolutionh = 1080
+level.par.opacity = 0.8
+level.par.gamma1 = 0.7
+```
+
+### Query network state
+
+```python
+# Get all TOPs in the project
+tops = [c for c in op('/project1').findChildren(type=TOP)]
+for t in tops:
+ print(f"{t.path}: {t.OPType} {'ERROR' if t.errors() else 'OK'}")
+
+# Find all operators with errors
+def find_errors(parent_path='/project1'):
+ parent = op(parent_path)
+ errors = []
+ for child in parent.findChildren(depth=-1):
+ if child.errors():
+ errors.append((child.path, child.errors()))
+ return errors
+
+result = find_errors()
+```
+
+### Batch parameter changes
+
+```python
+# Set parameters on multiple nodes at once
+settings = {
+ '/project1/noise1': {'seed': 42, 'monochrome': False, 'resolutionw': 1920},
+ '/project1/level1': {'brightness1': 1.2, 'gamma1': 0.8},
+ '/project1/blur1': {'sizex': 5, 'sizey': 5},
+}
+
+for path, params in settings.items():
+ node = op(path)
+ if node:
+ for key, val in params.items():
+ setattr(node.par, key, val)
+```
+
+## Python Version and Packages
+
+TouchDesigner bundles Python 3.11+ with these pre-installed:
+- **numpy** — array operations, fast math
+- **scipy** — signal processing, FFT
+- **OpenCV** (cv2) — computer vision
+- **PIL/Pillow** — image processing
+- **requests** — HTTP client
+- **json**, **re**, **os**, **sys** — standard library
+
+**IMPORTANT:** Parameter names in examples below are illustrative. Always run discovery (SKILL.md Step 0) to get actual names for your TD version. Do NOT copy param names from these examples verbatim.
+
+Custom packages can be installed to TD's Python site-packages directory. See TD documentation for the exact path per platform.
+
+## SOP Vertex/Point Access (TD 2025.32)
+
+In TD 2025.32, `td.Vertex` does NOT have `.x`, `.y`, `.z` attributes. Use index access:
+
+```python
+# WRONG — crashes in TD 2025.32:
+vertex.x, vertex.y, vertex.z
+
+# CORRECT — index/attribute access:
+pt = sop.points()[i]
+pos = pt.P # Position object
+x, y, z = pos[0], pos[1], pos[2]
+
+# Always introspect first:
+dir(sop.points()[0]) # see what attributes actually exist
+dir(sop.points()[0].P) # see Position object interface
+```
diff --git a/optional-skills/creative/touchdesigner-mcp/references/troubleshooting.md b/optional-skills/creative/touchdesigner-mcp/references/troubleshooting.md
new file mode 100644
index 00000000000..b8e201f5c32
--- /dev/null
+++ b/optional-skills/creative/touchdesigner-mcp/references/troubleshooting.md
@@ -0,0 +1,244 @@
+# TouchDesigner Troubleshooting (twozero MCP)
+
+> See `references/pitfalls.md` for the comprehensive lessons-learned list.
+
+## 1. Connection Issues
+
+### Port 40404 not responding
+
+Check these in order:
+
+1. Is TouchDesigner running?
+ ```bash
+ pgrep TouchDesigner
+ ```
+
+1b. Quick hub health check (no JSON-RPC needed):
+ A plain GET to the MCP URL returns instance info:
+ ```
+ curl -s http://localhost:40404/mcp
+ ```
+ Returns: `{"hub": true, "pid": ..., "instances": {"127.0.0.1_PID": {"project": "...", "tdVersion": "...", ...}}}`
+ If this returns JSON but `instances` is empty, TD is running but twozero hasn't registered yet.
+
+2. Is twozero installed in TD?
+ Open TD Palette Browser > twozero should be listed. If not, install it.
+
+3. Is MCP enabled in twozero settings?
+ In TD, open twozero preferences and confirm MCP server is toggled ON.
+
+4. Test the port directly:
+ ```bash
+ nc -z 127.0.0.1 40404
+ ```
+
+5. Test the MCP endpoint:
+ ```bash
+ curl -s http://localhost:40404/mcp
+ ```
+ Should return JSON with hub info. If it does, the server is running.
+
+### Hub responds but no TD instances
+
+The twozero MCP hub is running but TD hasn't registered. Causes:
+- TD project not loaded yet (still on splash screen)
+- twozero COMP not initialized in the current project
+- twozero version mismatch
+
+Fix: Open/reload a TD project that contains the twozero COMP. Use td_list_instances
+to check which TD instances are registered.
+
+### Multi-instance setup
+
+twozero auto-assigns ports for multiple TD instances:
+- First instance: 40404
+- Second instance: 40405
+- Third instance: 40406
+- etc.
+
+Use `td_list_instances` to discover all running instances and their ports.
+
+## 2. MCP Tool Errors
+
+### td_execute_python returns error
+
+The error message from td_execute_python often contains the Python traceback.
+If it's unclear, use `td_read_textport` to see the full TD console output —
+Python exceptions are always printed there.
+
+Common causes:
+- Syntax error in the script
+- Referencing a node that doesn't exist (op() returns None, then you call .par on None)
+- Using wrong parameter names (see pitfalls.md)
+
+### td_set_operator_pars fails
+
+Parameter name mismatch is the #1 cause. The tool validates param names and
+returns clear errors, but you must use exact names.
+
+Fix: ALWAYS call `td_get_par_info` first to discover the real parameter names:
+```
+td_get_par_info(op_type='glslTOP')
+td_get_par_info(op_type='noiseTOP')
+```
+
+### td_create_operator type name errors
+
+Operator type names use camelCase with family suffix:
+- CORRECT: noiseTOP, glslTOP, levelTOP, compositeTOP, audiospectrumCHOP
+- WRONG: NoiseTOP, noise_top, NOISE TOP, Noise
+
+### td_get_operator_info for deep inspection
+
+If unsure about any aspect of an operator (params, inputs, outputs, state):
+```
+td_get_operator_info(path='/project1/noise1', detail='full')
+```
+
+## 3. Parameter Discovery
+
+CRITICAL: ALWAYS use td_get_par_info to discover parameter names.
+
+The agent's LLM training data contains WRONG parameter names for TouchDesigner.
+Do not trust them. Known wrong names include dat vs pixeldat, colora vs alpha,
+sizex vs size, and many more. See pitfalls.md for the full list.
+
+Workflow:
+1. td_get_par_info(op_type='glslTOP') — get all params for a type
+2. td_get_operator_info(path='/project1/mynode', detail='full') — get params for a specific instance
+3. Use ONLY the names returned by these tools
+
+## 4. Performance
+
+### Diagnosing slow performance
+
+Use `td_get_perf` to see which operators are slow. Look at cook times —
+anything over 1ms per frame is worth investigating.
+
+Common causes:
+- Resolution too high (especially on Non-Commercial)
+- Complex GLSL shaders
+- Too many TOP-to-CHOP or CHOP-to-TOP transfers (GPU-CPU memory copies)
+- Feedback loops without decay (values accumulate, memory grows)
+
+### Non-Commercial license restrictions
+
+- Resolution cap: 1280x1280. Setting resolutionw=1920 silently clamps to 1280.
+- H.264/H.265/AV1 encoding requires Commercial license. Use ProRes or Hap instead.
+- No commercial use of output.
+
+Always check effective resolution after creation:
+```python
+n.cook(force=True)
+actual = str(n.width) + 'x' + str(n.height)
+```
+
+## 5. Hermes Configuration
+
+### Config location
+
+`$HERMES_HOME/config.yaml` (defaults to `~/.hermes/config.yaml` when `HERMES_HOME` is unset)
+
+### MCP entry format
+
+The twozero TD entry should look like:
+```yaml
+mcpServers:
+ twozero_td:
+ url: http://localhost:40404/mcp
+```
+
+### After config changes
+
+Restart the Hermes session for changes to take effect. The MCP connection is
+established at session startup.
+
+### Verifying MCP tools are available
+
+After restarting, the session log should show twozero MCP tools registered.
+If tools show as registered but aren't callable, check:
+- The twozero MCP hub is still running (curl test above)
+- TD is still running with a project loaded
+- No firewall blocking localhost:40404
+
+## 6. Node Creation Issues
+
+### "Node type not found" error
+
+Wrong type string. Use camelCase with family suffix:
+- Wrong: NoiseTop, noise_top, NOISE TOP
+- Right: noiseTOP
+
+### Node created but not visible
+
+Check parentPath — use absolute paths like /project1. The default project
+root is /project1. System nodes live at /, /ui, /sys, /local, /perform.
+Don't create user nodes outside /project1.
+
+### Cannot create node inside a non-COMP
+
+Only COMP operators (Container, Base, Geometry, etc.) can contain children.
+You cannot create nodes inside a TOP, CHOP, SOP, DAT, or MAT.
+
+## 7. Wiring Issues
+
+### Cross-family wiring
+
+TOPs connect to TOPs, CHOPs to CHOPs, SOPs to SOPs, DATs to DATs.
+Use converter operators to bridge: choptoTOP, topToCHOP, soptoDAT, etc.
+
+Note: choptoTOP has NO input connectors. Use par.chop reference instead:
+```python
+spec_tex.par.chop = resample_node # correct
+# NOT: resample.outputConnectors[0].connect(spec_tex.inputConnectors[0])
+```
+
+### Feedback loops
+
+Never create A -> B -> A directly. Use a Feedback TOP:
+```python
+fb = root.create(feedbackTOP, 'fb')
+fb.par.top = comp.path # reference only, no wire to fb input
+fb.outputConnectors[0].connect(next_node)
+```
+"Cook dependency loop detected" warning on the chain is expected and correct.
+
+## 8. GLSL Issues
+
+### Shader compilation errors are silent
+
+GLSL TOP shows a yellow warning in the UI but node.errors() may return empty.
+Check node.warnings() too. Create an Info DAT pointed at the GLSL TOP for
+full compiler output.
+
+### TD GLSL specifics
+
+- Uses GLSL 4.60 (Vulkan backend). GLSL 3.30 and earlier removed.
+- UV coordinates: vUV.st (not gl_FragCoord)
+- Input textures: sTD2DInputs[0]
+- Output: layout(location = 0) out vec4 fragColor
+- macOS CRITICAL: Always wrap output with TDOutputSwizzle(color)
+- No built-in time uniform. Pass time via GLSL TOP Values page or Constant TOP.
+
+## 9. Recording Issues
+
+### H.264/H.265/AV1 requires Commercial license
+
+Use Apple ProRes on macOS (hardware accelerated, not license-restricted):
+```python
+rec.par.videocodec = 'prores' # Preferred on macOS — lossless, Non-Commercial OK
+# rec.par.videocodec = 'mjpa' # Fallback — lossy, works everywhere
+```
+
+### MovieFileOut has no .record() method
+
+Use the toggle parameter:
+```python
+rec.par.record = True # start
+rec.par.record = False # stop
+```
+
+### All exported frames identical
+
+TOP.save() captures same frame when called rapidly. Use MovieFileOut for
+real-time recording. Set project.realTime = False for frame-accurate output.
diff --git a/optional-skills/creative/touchdesigner-mcp/scripts/setup.sh b/optional-skills/creative/touchdesigner-mcp/scripts/setup.sh
new file mode 100644
index 00000000000..15dc662c1cd
--- /dev/null
+++ b/optional-skills/creative/touchdesigner-mcp/scripts/setup.sh
@@ -0,0 +1,115 @@
+#!/usr/bin/env bash
+# setup.sh — Automated setup for twozero MCP plugin for TouchDesigner
+# Idempotent: safe to run multiple times.
+set -euo pipefail
+
+GREEN='\033[0;32m'; RED='\033[0;31m'; YELLOW='\033[1;33m'; CYAN='\033[0;36m'; NC='\033[0m'
+OK="${GREEN}✔${NC}"; FAIL="${RED}✘${NC}"; WARN="${YELLOW}⚠${NC}"
+
+TWOZERO_URL="https://www.404zero.com/pisang/twozero.tox"
+TOX_PATH="$HOME/Downloads/twozero.tox"
+HERMES_HOME_DIR="${HERMES_HOME:-$HOME/.hermes}"
+HERMES_CFG="${HERMES_HOME_DIR}/config.yaml"
+MCP_PORT=40404
+MCP_ENDPOINT="http://localhost:${MCP_PORT}/mcp"
+
+manual_steps=()
+
+echo -e "\n${CYAN}═══ twozero MCP for TouchDesigner — Setup ═══${NC}\n"
+
+# ── 1. Check if TouchDesigner is running ──
+# Match on process *name* (not full cmdline) to avoid self-matching shells
+# that happen to have "TouchDesigner" in their args. macOS and Linux pgrep
+# both support -x for exact name match.
+if pgrep -x TouchDesigner >/dev/null 2>&1 || pgrep -x TouchDesignerFTE >/dev/null 2>&1; then
+ echo -e " ${OK} TouchDesigner is running"
+ td_running=true
+else
+ echo -e " ${WARN} TouchDesigner is not running"
+ td_running=false
+fi
+
+# ── 2. Ensure twozero.tox exists ──
+if [[ -f "$TOX_PATH" ]]; then
+ echo -e " ${OK} twozero.tox already exists at ${TOX_PATH}"
+else
+ echo -e " ${WARN} twozero.tox not found — downloading..."
+ if curl -fSL -o "$TOX_PATH" "$TWOZERO_URL" 2>/dev/null; then
+ echo -e " ${OK} Downloaded twozero.tox to ${TOX_PATH}"
+ else
+ echo -e " ${FAIL} Failed to download twozero.tox from ${TWOZERO_URL}"
+ echo " Please download manually and place at ${TOX_PATH}"
+ manual_steps+=("Download twozero.tox from ${TWOZERO_URL} to ${TOX_PATH}")
+ fi
+fi
+
+# ── 3. Ensure Hermes config has twozero_td MCP entry ──
+if [[ ! -f "$HERMES_CFG" ]]; then
+ echo -e " ${FAIL} Hermes config not found at ${HERMES_CFG}"
+ manual_steps+=("Create ${HERMES_CFG} with twozero_td MCP server entry")
+elif grep -q 'twozero_td' "$HERMES_CFG" 2>/dev/null; then
+ echo -e " ${OK} twozero_td MCP entry exists in Hermes config"
+else
+ echo -e " ${WARN} Adding twozero_td MCP entry to Hermes config..."
+ python3 -c "
+import yaml, sys, copy
+
+cfg_path = '$HERMES_CFG'
+with open(cfg_path, 'r') as f:
+ cfg = yaml.safe_load(f) or {}
+
+if 'mcp_servers' not in cfg:
+ cfg['mcp_servers'] = {}
+
+if 'twozero_td' not in cfg['mcp_servers']:
+ cfg['mcp_servers']['twozero_td'] = {
+ 'url': '${MCP_ENDPOINT}',
+ 'timeout': 120,
+ 'connect_timeout': 60
+ }
+ with open(cfg_path, 'w') as f:
+ yaml.dump(cfg, f, default_flow_style=False, sort_keys=False)
+" 2>/dev/null && echo -e " ${OK} twozero_td MCP entry added to config" \
+ || { echo -e " ${FAIL} Could not update config (is PyYAML installed?)"; \
+ manual_steps+=("Add twozero_td MCP entry to ${HERMES_CFG} manually"); }
+ manual_steps+=("Restart Hermes session to pick up config change")
+fi
+
+# ── 4. Test if MCP port is responding ──
+if nc -z 127.0.0.1 "$MCP_PORT" 2>/dev/null; then
+ echo -e " ${OK} Port ${MCP_PORT} is open"
+
+ # ── 5. Verify MCP endpoint responds ──
+ resp=$(curl -s --max-time 3 "$MCP_ENDPOINT" 2>/dev/null || true)
+ if [[ -n "$resp" ]]; then
+ echo -e " ${OK} MCP endpoint responded at ${MCP_ENDPOINT}"
+ else
+ echo -e " ${WARN} Port open but MCP endpoint returned empty response"
+ manual_steps+=("Verify MCP is enabled in twozero settings")
+ fi
+else
+ echo -e " ${WARN} Port ${MCP_PORT} is not open"
+ if [[ "$td_running" == true ]]; then
+ manual_steps+=("In TD: drag twozero.tox into network editor → click Install")
+ manual_steps+=("Enable MCP: twozero icon → Settings → mcp → 'auto start MCP' → Yes")
+ else
+ manual_steps+=("Launch TouchDesigner")
+ manual_steps+=("Drag twozero.tox into the TD network editor and click Install")
+ manual_steps+=("Enable MCP: twozero icon → Settings → mcp → 'auto start MCP' → Yes")
+ fi
+fi
+
+# ── Status Report ──
+echo -e "\n${CYAN}═══ Status Report ═══${NC}\n"
+
+if [[ ${#manual_steps[@]} -eq 0 ]]; then
+ echo -e " ${OK} ${GREEN}Fully configured! twozero MCP is ready to use.${NC}\n"
+ exit 0
+else
+ echo -e " ${WARN} ${YELLOW}Manual steps remaining:${NC}\n"
+ for i in "${!manual_steps[@]}"; do
+ echo -e " $((i+1)). ${manual_steps[$i]}"
+ done
+ echo ""
+ exit 1
+fi
diff --git a/skills/mcp/mcporter/SKILL.md b/optional-skills/mcp/mcporter/SKILL.md
similarity index 100%
rename from skills/mcp/mcporter/SKILL.md
rename to optional-skills/mcp/mcporter/SKILL.md
diff --git a/skills/mlops/models/clip/SKILL.md b/optional-skills/mlops/clip/SKILL.md
similarity index 100%
rename from skills/mlops/models/clip/SKILL.md
rename to optional-skills/mlops/clip/SKILL.md
diff --git a/skills/mlops/models/clip/references/applications.md b/optional-skills/mlops/clip/references/applications.md
similarity index 100%
rename from skills/mlops/models/clip/references/applications.md
rename to optional-skills/mlops/clip/references/applications.md
diff --git a/skills/mlops/inference/guidance/SKILL.md b/optional-skills/mlops/guidance/SKILL.md
similarity index 100%
rename from skills/mlops/inference/guidance/SKILL.md
rename to optional-skills/mlops/guidance/SKILL.md
diff --git a/skills/mlops/inference/guidance/references/backends.md b/optional-skills/mlops/guidance/references/backends.md
similarity index 100%
rename from skills/mlops/inference/guidance/references/backends.md
rename to optional-skills/mlops/guidance/references/backends.md
diff --git a/skills/mlops/inference/guidance/references/constraints.md b/optional-skills/mlops/guidance/references/constraints.md
similarity index 100%
rename from skills/mlops/inference/guidance/references/constraints.md
rename to optional-skills/mlops/guidance/references/constraints.md
diff --git a/skills/mlops/inference/guidance/references/examples.md b/optional-skills/mlops/guidance/references/examples.md
similarity index 100%
rename from skills/mlops/inference/guidance/references/examples.md
rename to optional-skills/mlops/guidance/references/examples.md
diff --git a/optional-skills/mlops/hermes-atropos-environments/SKILL.md b/optional-skills/mlops/hermes-atropos-environments/SKILL.md
index 9dff4668767..5101886b41a 100644
--- a/optional-skills/mlops/hermes-atropos-environments/SKILL.md
+++ b/optional-skills/mlops/hermes-atropos-environments/SKILL.md
@@ -7,7 +7,7 @@ license: MIT
metadata:
hermes:
tags: [atropos, rl, environments, training, reinforcement-learning, reward-functions]
- related_skills: [axolotl, grpo-rl-training, trl-fine-tuning, lm-evaluation-harness]
+ related_skills: [axolotl, fine-tuning-with-trl, lm-evaluation-harness]
---
# Hermes Agent Atropos Environments
diff --git a/skills/mlops/cloud/modal/SKILL.md b/optional-skills/mlops/modal/SKILL.md
similarity index 100%
rename from skills/mlops/cloud/modal/SKILL.md
rename to optional-skills/mlops/modal/SKILL.md
diff --git a/skills/mlops/cloud/modal/references/advanced-usage.md b/optional-skills/mlops/modal/references/advanced-usage.md
similarity index 100%
rename from skills/mlops/cloud/modal/references/advanced-usage.md
rename to optional-skills/mlops/modal/references/advanced-usage.md
diff --git a/skills/mlops/cloud/modal/references/troubleshooting.md b/optional-skills/mlops/modal/references/troubleshooting.md
similarity index 100%
rename from skills/mlops/cloud/modal/references/troubleshooting.md
rename to optional-skills/mlops/modal/references/troubleshooting.md
diff --git a/skills/mlops/training/peft/SKILL.md b/optional-skills/mlops/peft/SKILL.md
similarity index 100%
rename from skills/mlops/training/peft/SKILL.md
rename to optional-skills/mlops/peft/SKILL.md
diff --git a/skills/mlops/training/peft/references/advanced-usage.md b/optional-skills/mlops/peft/references/advanced-usage.md
similarity index 100%
rename from skills/mlops/training/peft/references/advanced-usage.md
rename to optional-skills/mlops/peft/references/advanced-usage.md
diff --git a/skills/mlops/training/peft/references/troubleshooting.md b/optional-skills/mlops/peft/references/troubleshooting.md
similarity index 100%
rename from skills/mlops/training/peft/references/troubleshooting.md
rename to optional-skills/mlops/peft/references/troubleshooting.md
diff --git a/skills/mlops/training/pytorch-fsdp/SKILL.md b/optional-skills/mlops/pytorch-fsdp/SKILL.md
similarity index 100%
rename from skills/mlops/training/pytorch-fsdp/SKILL.md
rename to optional-skills/mlops/pytorch-fsdp/SKILL.md
diff --git a/skills/mlops/training/pytorch-fsdp/references/index.md b/optional-skills/mlops/pytorch-fsdp/references/index.md
similarity index 100%
rename from skills/mlops/training/pytorch-fsdp/references/index.md
rename to optional-skills/mlops/pytorch-fsdp/references/index.md
diff --git a/skills/mlops/training/pytorch-fsdp/references/other.md b/optional-skills/mlops/pytorch-fsdp/references/other.md
similarity index 100%
rename from skills/mlops/training/pytorch-fsdp/references/other.md
rename to optional-skills/mlops/pytorch-fsdp/references/other.md
diff --git a/skills/mlops/models/stable-diffusion/SKILL.md b/optional-skills/mlops/stable-diffusion/SKILL.md
similarity index 100%
rename from skills/mlops/models/stable-diffusion/SKILL.md
rename to optional-skills/mlops/stable-diffusion/SKILL.md
diff --git a/skills/mlops/models/stable-diffusion/references/advanced-usage.md b/optional-skills/mlops/stable-diffusion/references/advanced-usage.md
similarity index 100%
rename from skills/mlops/models/stable-diffusion/references/advanced-usage.md
rename to optional-skills/mlops/stable-diffusion/references/advanced-usage.md
diff --git a/skills/mlops/models/stable-diffusion/references/troubleshooting.md b/optional-skills/mlops/stable-diffusion/references/troubleshooting.md
similarity index 100%
rename from skills/mlops/models/stable-diffusion/references/troubleshooting.md
rename to optional-skills/mlops/stable-diffusion/references/troubleshooting.md
diff --git a/skills/mlops/models/whisper/SKILL.md b/optional-skills/mlops/whisper/SKILL.md
similarity index 100%
rename from skills/mlops/models/whisper/SKILL.md
rename to optional-skills/mlops/whisper/SKILL.md
diff --git a/skills/mlops/models/whisper/references/languages.md b/optional-skills/mlops/whisper/references/languages.md
similarity index 100%
rename from skills/mlops/models/whisper/references/languages.md
rename to optional-skills/mlops/whisper/references/languages.md
diff --git a/optional-skills/productivity/telephony/SKILL.md b/optional-skills/productivity/telephony/SKILL.md
index c74a3692091..6c457592a9a 100644
--- a/optional-skills/productivity/telephony/SKILL.md
+++ b/optional-skills/productivity/telephony/SKILL.md
@@ -7,7 +7,7 @@ license: MIT
metadata:
hermes:
tags: [telephony, phone, sms, mms, voice, twilio, bland.ai, vapi, calling, texting]
- related_skills: [find-nearby, google-workspace, agentmail]
+ related_skills: [maps, google-workspace, agentmail]
category: productivity
---
diff --git a/plugins/memory/honcho/__init__.py b/plugins/memory/honcho/__init__.py
index ca44ce60193..6ca32c1dcbb 100644
--- a/plugins/memory/honcho/__init__.py
+++ b/plugins/memory/honcho/__init__.py
@@ -19,6 +19,7 @@ import json
import logging
import re
import threading
+import time
from typing import Any, Dict, List, Optional
from agent.memory_provider import MemoryProvider
@@ -206,13 +207,19 @@ class HonchoMemoryProvider(MemoryProvider):
self._turn_count = 0
self._injection_frequency = "every-turn" # or "first-turn"
self._context_cadence = 1 # minimum turns between context API calls
- self._dialectic_cadence = 3 # minimum turns between dialectic API calls
+ self._dialectic_cadence = 1 # backwards-compat fallback; wizard writes 2 on new configs
self._dialectic_depth = 1 # how many .chat() calls per dialectic cycle (1-3)
self._dialectic_depth_levels: list[str] | None = None # per-pass reasoning levels
- self._reasoning_level_cap: Optional[str] = None # "minimal", "low", "medium", "high"
+ self._reasoning_heuristic: bool = True # scale base level by query length
+ self._reasoning_level_cap: str = "high" # ceiling for auto-selected level
self._last_context_turn = -999
self._last_dialectic_turn = -999
+ # Liveness + observability state
+ self._prefetch_thread_started_at: float = 0.0 # monotonic ts of current thread
+ self._prefetch_result_fired_at: int = -999 # turn the pending result was fired at
+ self._dialectic_empty_streak: int = 0 # consecutive empty returns
+
# Port #1957: lazy session init for tools-only mode
self._session_initialized = False
self._lazy_init_kwargs: Optional[dict] = None
@@ -286,14 +293,6 @@ class HonchoMemoryProvider(MemoryProvider):
logger.debug("Honcho not configured — plugin inactive")
return
- # Override peer_name with gateway user_id for per-user memory scoping.
- # Only when no explicit peerName was configured — an explicit peerName
- # means the user chose their identity; a raw user_id (e.g. Telegram
- # chat ID) should not silently replace it.
- _gw_user_id = kwargs.get("user_id")
- if _gw_user_id and not cfg.peer_name:
- cfg.peer_name = _gw_user_id
-
self._config = cfg
# ----- B1: recall_mode from config -----
@@ -305,12 +304,16 @@ class HonchoMemoryProvider(MemoryProvider):
raw = cfg.raw or {}
self._injection_frequency = raw.get("injectionFrequency", "every-turn")
self._context_cadence = int(raw.get("contextCadence", 1))
- self._dialectic_cadence = int(raw.get("dialecticCadence", 3))
+ # Backwards-compat: unset dialecticCadence falls back to 1
+ # (every turn) so existing honcho.json configs without the key
+ # behave as they did before. New setups via `hermes honcho setup`
+ # get dialecticCadence=2 written explicitly by the wizard.
+ self._dialectic_cadence = int(raw.get("dialecticCadence", 1))
self._dialectic_depth = max(1, min(cfg.dialectic_depth, 3))
self._dialectic_depth_levels = cfg.dialectic_depth_levels
- cap = raw.get("reasoningLevelCap")
- if cap and cap in ("minimal", "low", "medium", "high"):
- self._reasoning_level_cap = cap
+ self._reasoning_heuristic = cfg.reasoning_heuristic
+ if cfg.reasoning_level_cap in self._LEVEL_ORDER:
+ self._reasoning_level_cap = cfg.reasoning_level_cap
except Exception as e:
logger.debug("Honcho cost-awareness config parse error: %s", e)
@@ -352,6 +355,7 @@ class HonchoMemoryProvider(MemoryProvider):
honcho=client,
config=cfg,
context_tokens=cfg.context_tokens,
+ runtime_user_peer_name=kwargs.get("user_id") or None,
)
# ----- B3: resolve_session_name -----
@@ -391,14 +395,45 @@ class HonchoMemoryProvider(MemoryProvider):
except Exception as e:
logger.debug("Honcho memory file migration skipped: %s", e)
- # ----- B7: Pre-warming context at init -----
+ # ----- B7: Pre-warming at init -----
+ # Context prewarm warms peer.context() (base layer), consumed via
+ # pop_context_result() in prefetch(). Dialectic prewarm runs the
+ # full configured depth and writes into _prefetch_result so turn 1
+ # consumes the result directly.
if self._recall_mode in ("context", "hybrid"):
try:
self._manager.prefetch_context(self._session_key)
- self._manager.prefetch_dialectic(self._session_key, "What should I know about this user?")
- logger.debug("Honcho pre-warm threads started for session: %s", self._session_key)
except Exception as e:
- logger.debug("Honcho pre-warm failed: %s", e)
+ logger.debug("Honcho context prewarm failed: %s", e)
+
+ _prewarm_query = (
+ "Summarize what you know about this user. "
+ "Focus on preferences, current projects, and working style."
+ )
+
+ def _prewarm_dialectic() -> None:
+ try:
+ r = self._run_dialectic_depth(_prewarm_query)
+ except Exception as exc:
+ logger.debug("Honcho dialectic prewarm failed: %s", exc)
+ self._dialectic_empty_streak += 1
+ return
+ if r and r.strip():
+ with self._prefetch_lock:
+ self._prefetch_result = r
+ self._prefetch_result_fired_at = 0
+ # Treat prewarm as turn 0 so cadence gating starts clean.
+ self._last_dialectic_turn = 0
+ self._dialectic_empty_streak = 0
+ else:
+ self._dialectic_empty_streak += 1
+
+ self._prefetch_thread_started_at = time.monotonic()
+ self._prefetch_thread = threading.Thread(
+ target=_prewarm_dialectic, daemon=True, name="honcho-prewarm-dialectic"
+ )
+ self._prefetch_thread.start()
+ logger.debug("Honcho pre-warm started for session: %s", self._session_key)
def _ensure_session(self) -> bool:
"""Lazily initialize the Honcho session (for tools-only mode).
@@ -487,7 +522,8 @@ class HonchoMemoryProvider(MemoryProvider):
"# Honcho Memory\n"
"Active (tools-only mode). Use honcho_profile for a quick factual snapshot, "
"honcho_search for raw excerpts, honcho_context for raw peer context, "
- "honcho_reasoning for synthesized answers, "
+ "honcho_reasoning for synthesized answers (pass reasoning_level "
+ "minimal/low/medium/high/max — you pick the depth per call), "
"honcho_conclude to save facts about the user. "
"No automatic context injection — you must use tools to access memory."
)
@@ -497,7 +533,8 @@ class HonchoMemoryProvider(MemoryProvider):
"Active (hybrid mode). Relevant context is auto-injected AND memory tools are available. "
"Use honcho_profile for a quick factual snapshot, "
"honcho_search for raw excerpts, honcho_context for raw peer context, "
- "honcho_reasoning for synthesized answers, "
+ "honcho_reasoning for synthesized answers (pass reasoning_level "
+ "minimal/low/medium/high/max — you pick the depth per call), "
"honcho_conclude to save facts about the user."
)
@@ -526,6 +563,10 @@ class HonchoMemoryProvider(MemoryProvider):
if self._injection_frequency == "first-turn" and self._turn_count > 1:
return ""
+ # Trivial prompts ("ok", "yes", slash commands) carry no semantic signal.
+ if self._is_trivial_prompt(query):
+ return ""
+
parts = []
# ----- Layer 1: Base context (representation + card) -----
@@ -560,43 +601,72 @@ class HonchoMemoryProvider(MemoryProvider):
# On the very first turn, no queue_prefetch() has run yet so the
# dialectic result is empty. Run with a bounded timeout so a slow
# Honcho connection doesn't block the first response indefinitely.
- # On timeout the result is skipped and queue_prefetch() will pick it
- # up at the next cadence-allowed turn.
+ # On timeout we let the thread keep running and write its result into
+ # _prefetch_result under the lock, so the next turn picks it up.
+ #
+ # Skip if the session-start prewarm already filled _prefetch_result —
+ # firing another .chat() would be duplicate work.
+ with self._prefetch_lock:
+ _prewarm_landed = bool(self._prefetch_result)
+ if _prewarm_landed and self._last_dialectic_turn == -999:
+ self._last_dialectic_turn = self._turn_count
+
if self._last_dialectic_turn == -999 and query:
_first_turn_timeout = (
self._config.timeout if self._config and self._config.timeout else 8.0
)
- _result_holder: list[str] = []
+ _fired_at = self._turn_count
def _run_first_turn() -> None:
try:
- _result_holder.append(self._run_dialectic_depth(query))
+ r = self._run_dialectic_depth(query)
except Exception as exc:
logger.debug("Honcho first-turn dialectic failed: %s", exc)
-
- _t = threading.Thread(target=_run_first_turn, daemon=True)
- _t.start()
- _t.join(timeout=_first_turn_timeout)
- if not _t.is_alive():
- first_turn_dialectic = _result_holder[0] if _result_holder else ""
- if first_turn_dialectic and first_turn_dialectic.strip():
+ self._dialectic_empty_streak += 1
+ return
+ if r and r.strip():
with self._prefetch_lock:
- self._prefetch_result = first_turn_dialectic
- self._last_dialectic_turn = self._turn_count
- else:
+ self._prefetch_result = r
+ self._prefetch_result_fired_at = _fired_at
+ # Advance cadence only on a non-empty result so the next
+ # turn retries when the call returned nothing.
+ self._last_dialectic_turn = _fired_at
+ self._dialectic_empty_streak = 0
+ else:
+ self._dialectic_empty_streak += 1
+
+ self._prefetch_thread_started_at = time.monotonic()
+ self._prefetch_thread = threading.Thread(
+ target=_run_first_turn, daemon=True, name="honcho-prefetch-first"
+ )
+ self._prefetch_thread.start()
+ self._prefetch_thread.join(timeout=_first_turn_timeout)
+ if self._prefetch_thread.is_alive():
logger.debug(
- "Honcho first-turn dialectic timed out (%.1fs) — "
- "will inject at next cadence-allowed turn",
+ "Honcho first-turn dialectic still running after %.1fs — "
+ "will surface on next turn",
_first_turn_timeout,
)
- # Don't update _last_dialectic_turn: queue_prefetch() will
- # retry at the next cadence-allowed turn via the async path.
if self._prefetch_thread and self._prefetch_thread.is_alive():
self._prefetch_thread.join(timeout=3.0)
with self._prefetch_lock:
dialectic_result = self._prefetch_result
+ fired_at = self._prefetch_result_fired_at
self._prefetch_result = ""
+ self._prefetch_result_fired_at = -999
+
+ # Discard stale pending results: if the fire happened more than
+ # cadence × multiplier turns ago (e.g. a run of trivial-prompt turns
+ # passed without consumption), the content likely no longer tracks
+ # the current conversational pivot.
+ stale_limit = self._dialectic_cadence * self._STALE_RESULT_MULTIPLIER
+ if dialectic_result and fired_at >= 0 and (self._turn_count - fired_at) > stale_limit:
+ logger.debug(
+ "Honcho pending dialectic discarded as stale: fired_at=%d, "
+ "turn=%d, limit=%d", fired_at, self._turn_count, stale_limit,
+ )
+ dialectic_result = ""
if dialectic_result and dialectic_result.strip():
parts.append(dialectic_result)
@@ -641,6 +711,10 @@ class HonchoMemoryProvider(MemoryProvider):
if self._recall_mode == "tools":
return
+ # Trivial prompts don't warrant either a context refresh or a dialectic call.
+ if self._is_trivial_prompt(query):
+ return
+
# ----- Context refresh (base layer) — independent cadence -----
if self._context_cadence <= 1 or (self._turn_count - self._last_context_turn) >= self._context_cadence:
self._last_context_turn = self._turn_count
@@ -650,24 +724,46 @@ class HonchoMemoryProvider(MemoryProvider):
logger.debug("Honcho context prefetch failed: %s", e)
# ----- Dialectic prefetch (supplement layer) -----
- # B5: cadence check — skip if too soon since last dialectic call
- if self._dialectic_cadence > 1:
- if (self._turn_count - self._last_dialectic_turn) < self._dialectic_cadence:
- logger.debug("Honcho dialectic prefetch skipped: cadence %d, turns since last: %d",
- self._dialectic_cadence, self._turn_count - self._last_dialectic_turn)
- return
+ # Thread-alive guard with stale-thread recovery: a hung Honcho call
+ # older than timeout × multiplier is treated as dead so it can't
+ # block subsequent fires.
+ if self._thread_is_live():
+ logger.debug("Honcho dialectic prefetch skipped: prior thread still running")
+ return
- self._last_dialectic_turn = self._turn_count
+ # Cadence gate, widened by the empty-streak backoff so a persistently
+ # silent backend doesn't retry every turn forever.
+ effective = self._effective_cadence()
+ if (self._turn_count - self._last_dialectic_turn) < effective:
+ logger.debug(
+ "Honcho dialectic prefetch skipped: effective cadence %d "
+ "(base %d, empty streak %d), turns since last: %d",
+ effective, self._dialectic_cadence, self._dialectic_empty_streak,
+ self._turn_count - self._last_dialectic_turn,
+ )
+ return
+
+ # Cadence advances only on a non-empty result so empty returns
+ # (transient API error, sparse representation) retry next turn.
+ _fired_at = self._turn_count
def _run():
try:
result = self._run_dialectic_depth(query)
- if result and result.strip():
- with self._prefetch_lock:
- self._prefetch_result = result
except Exception as e:
logger.debug("Honcho prefetch failed: %s", e)
+ self._dialectic_empty_streak += 1
+ return
+ if result and result.strip():
+ with self._prefetch_lock:
+ self._prefetch_result = result
+ self._prefetch_result_fired_at = _fired_at
+ self._last_dialectic_turn = _fired_at
+ self._dialectic_empty_streak = 0
+ else:
+ self._dialectic_empty_streak += 1
+ self._prefetch_thread_started_at = time.monotonic()
self._prefetch_thread = threading.Thread(
target=_run, daemon=True, name="honcho-prefetch"
)
@@ -692,11 +788,91 @@ class HonchoMemoryProvider(MemoryProvider):
_LEVEL_ORDER = ("minimal", "low", "medium", "high", "max")
- def _resolve_pass_level(self, pass_idx: int) -> str:
+ # Char-count thresholds for the query-length reasoning heuristic.
+ _HEURISTIC_LENGTH_MEDIUM = 120
+ _HEURISTIC_LENGTH_HIGH = 400
+
+ # Liveness constants. A thread older than timeout × multiplier is treated
+ # as dead so a hung Honcho call can't block future retries indefinitely.
+ _STALE_THREAD_MULTIPLIER = 2.0
+ # Pending result whose fire-turn is older than cadence × multiplier is
+ # discarded on read so we don't inject context for a stale conversational
+ # pivot after a gap of trivial-prompt turns.
+ _STALE_RESULT_MULTIPLIER = 2
+ # Cap on the empty-streak backoff so a persistently silent backend
+ # eventually settles on a ceiling instead of unbounded widening.
+ _BACKOFF_MAX = 8
+
+ def _thread_is_live(self) -> bool:
+ """Thread-alive guard that treats threads older than the stale
+ threshold as dead, so a hung Honcho request can't block new fires."""
+ if not self._prefetch_thread or not self._prefetch_thread.is_alive():
+ return False
+ timeout = (self._config.timeout if self._config and self._config.timeout else 8.0)
+ age = time.monotonic() - self._prefetch_thread_started_at
+ if age > timeout * self._STALE_THREAD_MULTIPLIER:
+ logger.debug(
+ "Honcho prefetch thread age %.1fs exceeds stale threshold "
+ "%.1fs — treating as dead", age, timeout * self._STALE_THREAD_MULTIPLIER,
+ )
+ return False
+ return True
+
+ def _effective_cadence(self) -> int:
+ """Cadence plus empty-streak backoff, capped at _BACKOFF_MAX × base."""
+ if self._dialectic_empty_streak <= 0:
+ return self._dialectic_cadence
+ widened = self._dialectic_cadence + self._dialectic_empty_streak
+ ceiling = self._dialectic_cadence * self._BACKOFF_MAX
+ return min(widened, ceiling)
+
+ def liveness_snapshot(self) -> dict:
+ """In-process snapshot of dialectic liveness state for diagnostics.
+
+ Returns current turn, last successful dialectic turn, pending-result
+ fire turn, empty streak, effective cadence, and thread status.
+ """
+ thread_age = None
+ if self._prefetch_thread and self._prefetch_thread.is_alive():
+ thread_age = time.monotonic() - self._prefetch_thread_started_at
+ return {
+ "turn_count": self._turn_count,
+ "last_dialectic_turn": self._last_dialectic_turn,
+ "pending_result_fired_at": self._prefetch_result_fired_at,
+ "empty_streak": self._dialectic_empty_streak,
+ "effective_cadence": self._effective_cadence(),
+ "thread_alive": thread_age is not None,
+ "thread_age_seconds": thread_age,
+ }
+
+ def _apply_reasoning_heuristic(self, base: str, query: str) -> str:
+ """Scale `base` up by query length, clamped at reasoning_level_cap.
+
+ Char-count heuristic: +1 at >=120 chars, +2 at >=400.
+ """
+ if not self._reasoning_heuristic or not query:
+ return base
+ if base not in self._LEVEL_ORDER:
+ return base
+ n = len(query)
+ if n < self._HEURISTIC_LENGTH_MEDIUM:
+ bump = 0
+ elif n < self._HEURISTIC_LENGTH_HIGH:
+ bump = 1
+ else:
+ bump = 2
+ base_idx = self._LEVEL_ORDER.index(base)
+ cap_idx = self._LEVEL_ORDER.index(self._reasoning_level_cap)
+ return self._LEVEL_ORDER[min(base_idx + bump, cap_idx)]
+
+ def _resolve_pass_level(self, pass_idx: int, query: str = "") -> str:
"""Resolve reasoning level for a given pass index.
- Uses dialecticDepthLevels if configured, otherwise proportional
- defaults relative to dialecticReasoningLevel.
+ Precedence:
+ 1. dialecticDepthLevels (explicit per-pass) — wins absolutely
+ 2. _PROPORTIONAL_LEVELS table (depth>1 lighter-early passes)
+ 3. Base level = dialecticReasoningLevel, optionally scaled by the
+ reasoning heuristic when the mapping falls through to 'base'
"""
if self._dialectic_depth_levels and pass_idx < len(self._dialectic_depth_levels):
return self._dialectic_depth_levels[pass_idx]
@@ -704,7 +880,7 @@ class HonchoMemoryProvider(MemoryProvider):
base = (self._config.dialectic_reasoning_level if self._config else "low")
mapping = self._PROPORTIONAL_LEVELS.get((self._dialectic_depth, pass_idx))
if mapping is None or mapping == "base":
- return base
+ return self._apply_reasoning_heuristic(base, query)
return mapping
def _build_dialectic_prompt(self, pass_idx: int, prior_results: list[str], is_cold: bool) -> str:
@@ -791,7 +967,7 @@ class HonchoMemoryProvider(MemoryProvider):
break
prompt = self._build_dialectic_prompt(i, results, is_cold)
- level = self._resolve_pass_level(i)
+ level = self._resolve_pass_level(i, query=query)
logger.debug("Honcho dialectic depth %d: pass %d, level=%s, cold=%s",
self._dialectic_depth, i, level, is_cold)
@@ -808,6 +984,29 @@ class HonchoMemoryProvider(MemoryProvider):
return r
return ""
+ # Prompts that carry no semantic signal — trivial acknowledgements, slash
+ # commands, empty input. Skipping injection here saves tokens and prevents
+ # stale user-model context from derailing one-word replies.
+ _TRIVIAL_PROMPT_RE = re.compile(
+ r'^(yes|no|ok|okay|sure|thanks|thank you|y|n|yep|nope|yeah|nah|'
+ r'continue|go ahead|do it|proceed|got it|cool|nice|great|done|next|lgtm|k)$',
+ re.IGNORECASE,
+ )
+
+ @classmethod
+ def _is_trivial_prompt(cls, text: str) -> bool:
+ """Return True if the prompt is too trivial to warrant context injection."""
+ if not text:
+ return True
+ stripped = text.strip()
+ if not stripped:
+ return True
+ if stripped.startswith("/"):
+ return True
+ if cls._TRIVIAL_PROMPT_RE.match(stripped):
+ return True
+ return False
+
def on_turn_start(self, turn_number: int, message: str, **kwargs) -> None:
"""Track turn count for cadence and injection_frequency logic."""
self._turn_count = turn_number
diff --git a/plugins/memory/honcho/cli.py b/plugins/memory/honcho/cli.py
index 536d34002de..5c829a4c989 100644
--- a/plugins/memory/honcho/cli.py
+++ b/plugins/memory/honcho/cli.py
@@ -460,17 +460,37 @@ def cmd_setup(args) -> None:
pass # keep current
# --- 7b. Dialectic cadence ---
- current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "3")
+ current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "2")
print("\n Dialectic cadence:")
print(" How often Honcho rebuilds its user model (LLM call on Honcho backend).")
- print(" 1 = every turn (aggressive), 3 = every 3 turns (recommended), 5+ = sparse.")
+ print(" 1 = every turn, 2 = every other turn, 3+ = sparser.")
+ print(" Recommended: 1-5.")
new_dialectic = _prompt("Dialectic cadence", default=current_dialectic)
try:
val = int(new_dialectic)
if val >= 1:
hermes_host["dialecticCadence"] = val
except (ValueError, TypeError):
- hermes_host["dialecticCadence"] = 3
+ hermes_host["dialecticCadence"] = 2
+
+ # --- 7c. Dialectic reasoning level ---
+ current_reasoning = (
+ hermes_host.get("dialecticReasoningLevel")
+ or cfg.get("dialecticReasoningLevel")
+ or "low"
+ )
+ print("\n Dialectic reasoning level:")
+ print(" Depth Honcho uses when synthesizing user context on auto-injected calls.")
+ print(" minimal -- quick factual lookups")
+ print(" low -- straightforward questions (default)")
+ print(" medium -- multi-aspect synthesis")
+ print(" high -- complex behavioral patterns")
+ print(" max -- thorough audit-level analysis")
+ new_reasoning = _prompt("Reasoning level", default=current_reasoning)
+ if new_reasoning in ("minimal", "low", "medium", "high", "max"):
+ hermes_host["dialecticReasoningLevel"] = new_reasoning
+ else:
+ hermes_host["dialecticReasoningLevel"] = "low"
# --- 8. Session strategy ---
current_strat = hermes_host.get("sessionStrategy") or cfg.get("sessionStrategy", "per-session")
@@ -636,8 +656,11 @@ def cmd_status(args) -> None:
print(f" Recall mode: {hcfg.recall_mode}")
print(f" Context budget: {hcfg.context_tokens or '(uncapped)'} tokens")
raw = getattr(hcfg, "raw", None) or {}
- dialectic_cadence = raw.get("dialecticCadence") or 3
+ dialectic_cadence = raw.get("dialecticCadence") or 1
print(f" Dialectic cad: every {dialectic_cadence} turn{'s' if dialectic_cadence != 1 else ''}")
+ reasoning_cap = raw.get("reasoningLevelCap") or hcfg.reasoning_level_cap
+ heuristic_on = "on" if hcfg.reasoning_heuristic else "off"
+ print(f" Reasoning: base={hcfg.dialectic_reasoning_level}, cap={reasoning_cap}, heuristic={heuristic_on}")
print(f" Observation: user(me={hcfg.user_observe_me},others={hcfg.user_observe_others}) ai(me={hcfg.ai_observe_me},others={hcfg.ai_observe_others})")
print(f" Write freq: {hcfg.write_frequency}")
diff --git a/plugins/memory/honcho/client.py b/plugins/memory/honcho/client.py
index 2474d3a2b65..fef2e2d58f1 100644
--- a/plugins/memory/honcho/client.py
+++ b/plugins/memory/honcho/client.py
@@ -251,6 +251,11 @@ class HonchoClientConfig:
# matching dialectic_depth length. When None, uses proportional defaults
# derived from dialectic_reasoning_level.
dialectic_depth_levels: list[str] | None = None
+ # When true, the auto-injected dialectic scales reasoning level up on
+ # longer queries. See HonchoMemoryProvider for thresholds.
+ reasoning_heuristic: bool = True
+ # Ceiling for the heuristic-selected reasoning level.
+ reasoning_level_cap: str = "high"
# Honcho API limits — configurable for self-hosted instances
# Max chars per message sent via add_messages() (Honcho cloud: 25000)
message_max_chars: int = 25000
@@ -446,6 +451,16 @@ class HonchoClientConfig:
raw.get("dialecticDepthLevels"),
depth=_parse_dialectic_depth(host_block.get("dialecticDepth"), raw.get("dialecticDepth")),
),
+ reasoning_heuristic=_resolve_bool(
+ host_block.get("reasoningHeuristic"),
+ raw.get("reasoningHeuristic"),
+ default=True,
+ ),
+ reasoning_level_cap=(
+ host_block.get("reasoningLevelCap")
+ or raw.get("reasoningLevelCap")
+ or "high"
+ ),
message_max_chars=int(
host_block.get("messageMaxChars")
or raw.get("messageMaxChars")
diff --git a/plugins/memory/honcho/session.py b/plugins/memory/honcho/session.py
index fd91ee3b3b9..79625b5cd58 100644
--- a/plugins/memory/honcho/session.py
+++ b/plugins/memory/honcho/session.py
@@ -78,6 +78,7 @@ class HonchoSessionManager:
honcho: Honcho | None = None,
context_tokens: int | None = None,
config: Any | None = None,
+ runtime_user_peer_name: str | None = None,
):
"""
Initialize the session manager.
@@ -87,10 +88,12 @@ class HonchoSessionManager:
context_tokens: Max tokens for context() calls (None = Honcho default).
config: HonchoClientConfig from global config (provides peer_name, ai_peer,
write_frequency, observation, etc.).
+ runtime_user_peer_name: Gateway user identity for per-user memory scoping.
"""
self._honcho = honcho
self._context_tokens = context_tokens
self._config = config
+ self._runtime_user_peer_name = runtime_user_peer_name
self._cache: dict[str, HonchoSession] = {}
self._peers_cache: dict[str, Any] = {}
self._sessions_cache: dict[str, Any] = {}
@@ -100,9 +103,11 @@ class HonchoSessionManager:
self._write_frequency = write_frequency
self._turn_counter: int = 0
- # Prefetch caches: session_key → last result (consumed once per turn)
+ # Prefetch cache: session_key → last context result (consumed once per turn).
+ # Dialectic results are cached on the plugin side (HonchoMemoryProvider
+ # ._prefetch_result) so session-start prewarm and turn-driven fires share
+ # one source of truth; see __init__.py _do_session_init for the prewarm.
self._context_cache: dict[str, dict] = {}
- self._dialectic_cache: dict[str, str] = {}
self._prefetch_cache_lock = threading.Lock()
self._dialectic_reasoning_level: str = (
config.dialectic_reasoning_level if config else "low"
@@ -272,8 +277,10 @@ class HonchoSessionManager:
logger.debug("Local session cache hit: %s", key)
return self._cache[key]
- # Use peer names from global config when available
- if self._config and self._config.peer_name:
+ # Gateway sessions should use the runtime user identity when available.
+ if self._runtime_user_peer_name:
+ user_peer_id = self._sanitize_id(self._runtime_user_peer_name)
+ elif self._config and self._config.peer_name:
user_peer_id = self._sanitize_id(self._config.peer_name)
else:
# Fallback: derive from session key
@@ -499,8 +506,8 @@ class HonchoSessionManager:
Query Honcho's dialectic endpoint about a peer.
Runs an LLM on Honcho's backend against the target peer's full
- representation. Higher latency than context() — call async via
- prefetch_dialectic() to avoid blocking the response.
+ representation. Higher latency than context() — callers run this in
+ a background thread (see HonchoMemoryProvider) to avoid blocking.
Args:
session_key: The session key to query against.
@@ -555,42 +562,6 @@ class HonchoSessionManager:
logger.warning("Honcho dialectic query failed: %s", e)
return ""
- def prefetch_dialectic(self, session_key: str, query: str) -> None:
- """
- Fire a dialectic_query in a background thread, caching the result.
-
- Non-blocking. The result is available via pop_dialectic_result()
- on the next call (typically the following turn). Reasoning level
- is selected dynamically based on query complexity.
-
- Args:
- session_key: The session key to query against.
- query: The user's current message, used as the query.
- """
- def _run():
- result = self.dialectic_query(session_key, query)
- if result:
- self.set_dialectic_result(session_key, result)
-
- t = threading.Thread(target=_run, name="honcho-dialectic-prefetch", daemon=True)
- t.start()
-
- def set_dialectic_result(self, session_key: str, result: str) -> None:
- """Store a prefetched dialectic result in a thread-safe way."""
- if not result:
- return
- with self._prefetch_cache_lock:
- self._dialectic_cache[session_key] = result
-
- def pop_dialectic_result(self, session_key: str) -> str:
- """
- Return and clear the cached dialectic result for this session.
-
- Returns empty string if no result is ready yet.
- """
- with self._prefetch_cache_lock:
- return self._dialectic_cache.pop(session_key, "")
-
def prefetch_context(self, session_key: str, user_message: str | None = None) -> None:
"""
Fire get_prefetch_context in a background thread, caching the result.
diff --git a/run_agent.py b/run_agent.py
index ef90ae39e20..8e1fbfed194 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -831,6 +831,26 @@ class AIAgent:
self._execution_thread_id: int | None = None # Set at run_conversation() start
self._interrupt_thread_signal_pending = False
self._client_lock = threading.RLock()
+
+ # /steer mechanism — inject a user note into the next tool result
+ # without interrupting the agent. Unlike interrupt(), steer() does
+ # NOT set _interrupt_requested; it waits for the current tool batch
+ # to finish naturally, then the drain hook appends the text to the
+ # last tool result's content so the model sees it on its next
+ # iteration. Message-role alternation is preserved (we modify an
+ # existing tool message rather than inserting a new user turn).
+ self._pending_steer: Optional[str] = None
+ self._pending_steer_lock = threading.Lock()
+
+ # Concurrent-tool worker thread tracking. `_execute_tool_calls_concurrent`
+ # runs each tool on its own ThreadPoolExecutor worker — those worker
+ # threads have tids distinct from `_execution_thread_id`, so
+ # `_set_interrupt(True, _execution_thread_id)` alone does NOT cause
+ # `is_interrupted()` inside the worker to return True. Track the
+ # workers here so `interrupt()` / `clear_interrupt()` can fan out to
+ # their tids explicitly.
+ self._tool_worker_threads: set[int] = set()
+ self._tool_worker_threads_lock = threading.Lock()
# Subagent delegation state
self._delegate_depth = 0 # 0 = top-level agent, incremented for children
@@ -1286,31 +1306,6 @@ class AIAgent:
try:
_mem_provider_name = mem_config.get("provider", "") if mem_config else ""
- # Auto-migrate: if Honcho was actively configured (enabled +
- # credentials) but memory.provider is not set, activate the
- # honcho plugin automatically. Just having the config file
- # is not enough — the user may have disabled Honcho or the
- # file may be from a different tool.
- if not _mem_provider_name:
- try:
- from plugins.memory.honcho.client import HonchoClientConfig as _HCC
- _hcfg = _HCC.from_global_config()
- if _hcfg.enabled and (_hcfg.api_key or _hcfg.base_url):
- _mem_provider_name = "honcho"
- # Persist so this only auto-migrates once
- try:
- from hermes_cli.config import load_config as _lc, save_config as _sc
- _cfg = _lc()
- _cfg.setdefault("memory", {})["provider"] = "honcho"
- _sc(_cfg)
- except Exception:
- pass
- if not self.quiet_mode:
- print(" ✓ Auto-migrated Honcho to memory provider plugin.")
- print(" Your config and data are preserved.\n")
- except Exception:
- pass
-
if _mem_provider_name:
from agent.memory_manager import MemoryManager as _MemoryManager
from plugins.memory import load_memory_provider as _load_mem
@@ -1921,13 +1916,16 @@ class AIAgent:
def _should_emit_quiet_tool_messages(self) -> bool:
"""Return True when quiet-mode tool summaries should print directly.
- When the caller provides ``tool_progress_callback`` (for example the CLI
- TUI or a gateway progress renderer), that callback owns progress display.
- Emitting quiet-mode summary lines here duplicates progress and leaks tool
- previews into flows that are expected to stay silent, such as
- ``hermes chat -q``.
+ Quiet mode is used by both the interactive CLI and embedded/library
+ callers. The CLI may still want compact progress hints when no callback
+ owns rendering. Embedded/library callers, on the other hand, expect
+ quiet mode to be truly silent.
"""
- return self.quiet_mode and not self.tool_progress_callback
+ return (
+ self.quiet_mode
+ and not self.tool_progress_callback
+ and getattr(self, "platform", "") == "cli"
+ )
def _emit_status(self, message: str) -> None:
"""Emit a lifecycle status message to both CLI and gateway channels.
@@ -2152,17 +2150,49 @@ class AIAgent:
return bool(cleaned.strip())
def _strip_think_blocks(self, content: str) -> str:
- """Remove reasoning/thinking blocks from content, returning only visible text."""
+ """Remove reasoning/thinking blocks from content, returning only visible text.
+
+ Handles four cases:
+ 1. Closed tag pairs (``…``) — the common path when
+ the provider emits complete reasoning blocks.
+ 2. Unterminated open tag at a block boundary (start of text or
+ after a newline) — e.g. MiniMax M2.7 / NIM endpoints where the
+ closing tag is dropped. Everything from the open tag to end
+ of string is stripped. The block-boundary check mirrors
+ ``gateway/stream_consumer.py``'s filter so models that mention
+ ```` in prose aren't over-stripped.
+ 3. Stray orphan open/close tags that slip through.
+ 4. Tag variants: ````, ````, ````,
+ ````, ```` (Gemma 4), all
+ case-insensitive.
+ """
if not content:
return ""
- # Strip all reasoning tag variants: , , ,
- # , , (Gemma 4)
- content = re.sub(r'.*?', '', content, flags=re.DOTALL)
+ # 1. Closed tag pairs — case-insensitive for all variants so
+ # mixed-case tags (, ) don't slip through to
+ # the unterminated-tag pass and take trailing content with them.
+ content = re.sub(r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE)
content = re.sub(r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE)
- content = re.sub(r'.*?', '', content, flags=re.DOTALL)
- content = re.sub(r'.*?', '', content, flags=re.DOTALL)
+ content = re.sub(r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE)
+ content = re.sub(r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE)
content = re.sub(r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE)
- content = re.sub(r'?(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>\s*', '', content, flags=re.IGNORECASE)
+ # 2. Unterminated reasoning block — open tag at a block boundary
+ # (start of text, or after a newline) with no matching close.
+ # Strip from the tag to end of string. Fixes #8878 / #9568
+ # (MiniMax M2.7 leaking raw reasoning into assistant content).
+ content = re.sub(
+ r'(?:^|\n)[ \t]*<(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)\b[^>]*>.*$',
+ '',
+ content,
+ flags=re.DOTALL | re.IGNORECASE,
+ )
+ # 3. Stray orphan open/close tags that slipped through.
+ content = re.sub(
+ r'?(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>\s*',
+ '',
+ content,
+ flags=re.IGNORECASE,
+ )
return content
@staticmethod
@@ -3191,6 +3221,25 @@ class AIAgent:
# interrupt signal until startup completes instead of targeting
# the caller thread by mistake.
self._interrupt_thread_signal_pending = True
+ # Fan out to concurrent-tool worker threads. Those workers run tools
+ # on their own tids (ThreadPoolExecutor workers), so `is_interrupted()`
+ # inside a tool only sees an interrupt when their specific tid is in
+ # the `_interrupted_threads` set. Without this propagation, an
+ # already-running concurrent tool (e.g. a terminal command hung on
+ # network I/O) never notices the interrupt and has to run to its own
+ # timeout. See `_run_tool` for the matching entry/exit bookkeeping.
+ # `getattr` fallback covers test stubs that build AIAgent via
+ # object.__new__ and skip __init__.
+ _tracker = getattr(self, "_tool_worker_threads", None)
+ _tracker_lock = getattr(self, "_tool_worker_threads_lock", None)
+ if _tracker is not None and _tracker_lock is not None:
+ with _tracker_lock:
+ _worker_tids = list(_tracker)
+ for _wtid in _worker_tids:
+ try:
+ _set_interrupt(True, _wtid)
+ except Exception:
+ pass
# Propagate interrupt to any running child agents (subagent delegation)
with self._active_children_lock:
children_copy = list(self._active_children)
@@ -3209,6 +3258,146 @@ class AIAgent:
self._interrupt_thread_signal_pending = False
if self._execution_thread_id is not None:
_set_interrupt(False, self._execution_thread_id)
+ # Also clear any concurrent-tool worker thread bits. Tracked
+ # workers normally clear their own bit on exit, but an explicit
+ # clear here guarantees no stale interrupt can survive a turn
+ # boundary and fire on a subsequent, unrelated tool call that
+ # happens to get scheduled onto the same recycled worker tid.
+ # `getattr` fallback covers test stubs that build AIAgent via
+ # object.__new__ and skip __init__.
+ _tracker = getattr(self, "_tool_worker_threads", None)
+ _tracker_lock = getattr(self, "_tool_worker_threads_lock", None)
+ if _tracker is not None and _tracker_lock is not None:
+ with _tracker_lock:
+ _worker_tids = list(_tracker)
+ for _wtid in _worker_tids:
+ try:
+ _set_interrupt(False, _wtid)
+ except Exception:
+ pass
+ # A hard interrupt supersedes any pending /steer — the steer was
+ # meant for the agent's next tool-call iteration, which will no
+ # longer happen. Drop it instead of surprising the user with a
+ # late injection on the post-interrupt turn.
+ _steer_lock = getattr(self, "_pending_steer_lock", None)
+ if _steer_lock is not None:
+ with _steer_lock:
+ self._pending_steer = None
+
+ def steer(self, text: str) -> bool:
+ """
+ Inject a user message into the next tool result without interrupting.
+
+ Unlike interrupt(), this does NOT stop the current tool call. The
+ text is stashed and the agent loop appends it to the LAST tool
+ result's content once the current tool batch finishes. The model
+ sees the steer as part of the tool output on its next iteration.
+
+ Thread-safe: callable from gateway/CLI/TUI threads. Multiple calls
+ before the drain point concatenate with newlines.
+
+ Args:
+ text: The user text to inject. Empty strings are ignored.
+
+ Returns:
+ True if the steer was accepted, False if the text was empty.
+ """
+ if not text or not text.strip():
+ return False
+ cleaned = text.strip()
+ _lock = getattr(self, "_pending_steer_lock", None)
+ if _lock is None:
+ # Test stubs that built AIAgent via object.__new__ skip __init__.
+ # Fall back to direct attribute set; no concurrent callers expected
+ # in those stubs.
+ existing = getattr(self, "_pending_steer", None)
+ self._pending_steer = (existing + "\n" + cleaned) if existing else cleaned
+ return True
+ with _lock:
+ if self._pending_steer:
+ self._pending_steer = self._pending_steer + "\n" + cleaned
+ else:
+ self._pending_steer = cleaned
+ return True
+
+ def _drain_pending_steer(self) -> Optional[str]:
+ """Return the pending steer text (if any) and clear the slot.
+
+ Safe to call from the agent execution thread after appending tool
+ results. Returns None when no steer is pending.
+ """
+ _lock = getattr(self, "_pending_steer_lock", None)
+ if _lock is None:
+ text = getattr(self, "_pending_steer", None)
+ self._pending_steer = None
+ return text
+ with _lock:
+ text = self._pending_steer
+ self._pending_steer = None
+ return text
+
+ def _apply_pending_steer_to_tool_results(self, messages: list, num_tool_msgs: int) -> None:
+ """Append any pending /steer text to the last tool result in this turn.
+
+ Called at the end of a tool-call batch, before the next API call.
+ The steer is appended to the last ``role:"tool"`` message's content
+ with a clear marker so the model understands it came from the user
+ and NOT from the tool itself. Role alternation is preserved —
+ nothing new is inserted, we only modify existing content.
+
+ Args:
+ messages: The running messages list.
+ num_tool_msgs: Number of tool results appended in this batch;
+ used to locate the tail slice safely.
+ """
+ if num_tool_msgs <= 0 or not messages:
+ return
+ steer_text = self._drain_pending_steer()
+ if not steer_text:
+ return
+ # Find the last tool-role message in the recent tail. Skipping
+ # non-tool messages defends against future code appending
+ # something else at the boundary.
+ target_idx = None
+ for j in range(len(messages) - 1, max(len(messages) - num_tool_msgs - 1, -1), -1):
+ msg = messages[j]
+ if isinstance(msg, dict) and msg.get("role") == "tool":
+ target_idx = j
+ break
+ if target_idx is None:
+ # No tool result in this batch (e.g. all skipped by interrupt);
+ # put the steer back so the caller's fallback path can deliver
+ # it as a normal next-turn user message.
+ _lock = getattr(self, "_pending_steer_lock", None)
+ if _lock is not None:
+ with _lock:
+ if self._pending_steer:
+ self._pending_steer = self._pending_steer + "\n" + steer_text
+ else:
+ self._pending_steer = steer_text
+ else:
+ existing = getattr(self, "_pending_steer", None)
+ self._pending_steer = (existing + "\n" + steer_text) if existing else steer_text
+ return
+ marker = f"\n\n[USER STEER (injected mid-run, not tool output): {steer_text}]"
+ existing_content = messages[target_idx].get("content", "")
+ if not isinstance(existing_content, str):
+ # Anthropic multimodal content blocks — preserve them and append
+ # a text block at the end.
+ try:
+ blocks = list(existing_content) if existing_content else []
+ blocks.append({"type": "text", "text": marker.lstrip()})
+ messages[target_idx]["content"] = blocks
+ except Exception:
+ # Fall back to string replacement if content shape is unexpected.
+ messages[target_idx]["content"] = f"{existing_content}{marker}"
+ else:
+ messages[target_idx]["content"] = existing_content + marker
+ logger.info(
+ "Delivered /steer to agent after tool batch (%d chars): %s",
+ len(steer_text),
+ steer_text[:120] + ("..." if len(steer_text) > 120 else ""),
+ )
def _touch_activity(self, desc: str) -> None:
"""Update the last-activity timestamp and description (thread-safe)."""
@@ -5512,7 +5701,7 @@ class AIAgent:
raise result["error"]
return result["response"]
- result = {"response": None, "error": None}
+ result = {"response": None, "error": None, "partial_tool_names": []}
request_client_holder = {"client": None}
first_delta_fired = {"done": False}
deltas_were_sent = {"yes": False} # Track if any deltas were fired (for fallback)
@@ -5668,7 +5857,15 @@ class AIAgent:
entry["id"] = tc_delta.id
if tc_delta.function:
if tc_delta.function.name:
- entry["function"]["name"] += tc_delta.function.name
+ # Use assignment, not +=. Function names are
+ # atomic identifiers delivered complete in the
+ # first chunk (OpenAI spec). Some providers
+ # (MiniMax M2.7 via NVIDIA NIM) resend the full
+ # name in every chunk; concatenation would
+ # produce "read_fileread_file". Assignment
+ # (matching the OpenAI Node SDK / LiteLLM /
+ # Vercel AI patterns) is immune to this.
+ entry["function"]["name"] = tc_delta.function.name
if tc_delta.function.arguments:
entry["function"]["arguments"] += tc_delta.function.arguments
extra = getattr(tc_delta, "extra_content", None)
@@ -5684,6 +5881,14 @@ class AIAgent:
tool_gen_notified.add(idx)
_fire_first_delta()
self._fire_tool_gen_started(name)
+ # Record the partial tool-call name so the outer
+ # stub-builder can surface a user-visible warning
+ # if streaming dies before this tool's arguments
+ # are fully delivered. Without this, a stall
+ # during tool-call JSON generation lets the stub
+ # at line ~6107 return `tool_calls=None`, silently
+ # discarding the attempted action.
+ result["partial_tool_names"].append(name)
if chunk.choices[0].finish_reason:
finish_reason = chunk.choices[0].finish_reason
@@ -6050,13 +6255,44 @@ class AIAgent:
_partial_text = (
getattr(self, "_current_streamed_assistant_text", "") or ""
).strip() or None
- logger.warning(
- "Partial stream delivered before error; returning stub "
- "response with %s chars of recovered content to prevent "
- "duplicate messages: %s",
- len(_partial_text or ""),
- result["error"],
- )
+
+ # If the stream died while the model was emitting a tool call,
+ # the stub below will silently set `tool_calls=None` and the
+ # agent loop will treat the turn as complete — the attempted
+ # action is lost with no user-facing signal. Append a
+ # human-visible warning to the stub content so (a) the user
+ # knows something failed, and (b) the next turn's model sees
+ # in conversation history what was attempted and can retry.
+ _partial_names = list(result.get("partial_tool_names") or [])
+ if _partial_names:
+ _name_str = ", ".join(_partial_names[:3])
+ if len(_partial_names) > 3:
+ _name_str += f", +{len(_partial_names) - 3} more"
+ _warn = (
+ f"\n\n⚠ Stream stalled mid tool-call "
+ f"({_name_str}); the action was not executed. "
+ f"Ask me to retry if you want to continue."
+ )
+ _partial_text = (_partial_text or "") + _warn
+ # Also fire as a streaming delta so the user sees it now
+ # instead of only in the persisted transcript.
+ try:
+ self._fire_stream_delta(_warn)
+ except Exception:
+ pass
+ logger.warning(
+ "Partial stream dropped tool call(s) %s after %s chars "
+ "of text; surfaced warning to user: %s",
+ _partial_names, len(_partial_text or ""), result["error"],
+ )
+ else:
+ logger.warning(
+ "Partial stream delivered before error; returning stub "
+ "response with %s chars of recovered content to prevent "
+ "duplicate messages: %s",
+ len(_partial_text or ""),
+ result["error"],
+ )
_stub_msg = SimpleNamespace(
role="assistant", content=_partial_text, tool_calls=None,
reasoning_content=None,
@@ -6814,8 +7050,20 @@ class AIAgent:
if self.tools:
api_kwargs["tools"] = self.tools
- if self.max_tokens is not None:
+ # ── max_tokens for chat_completions ──────────────────────────────
+ # Priority: ephemeral override (error recovery / length-continuation
+ # boost) > user-configured max_tokens > provider-specific defaults.
+ _ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None)
+ if _ephemeral_out is not None:
+ self._ephemeral_max_output_tokens = None # consume immediately
+ api_kwargs.update(self._max_tokens_param(_ephemeral_out))
+ elif self.max_tokens is not None:
api_kwargs.update(self._max_tokens_param(self.max_tokens))
+ elif "integrate.api.nvidia.com" in self._base_url_lower:
+ # NVIDIA NIM defaults to a very low max_tokens when omitted,
+ # causing models like GLM-4.7 to truncate immediately (thinking
+ # tokens alone exhaust the budget). 16384 provides adequate room.
+ api_kwargs.update(self._max_tokens_param(16384))
elif self._is_qwen_portal():
# Qwen Portal defaults to a very low max_tokens when omitted.
# Reasoning models (qwen3-coder-plus) exhaust that budget on
@@ -7024,6 +7272,20 @@ class AIAgent:
if reasoning_text:
reasoning_text = _sanitize_surrogates(reasoning_text)
+ # Strip inline reasoning tags (… etc.) from the stored
+ # assistant content. Reasoning was already captured into
+ # ``reasoning_text`` above (either from structured fields or the
+ # inline-block fallback), so the raw tags in content are redundant.
+ # Leaving them in place caused reasoning to leak to messaging
+ # platforms (#8878, #9568), inflate context on subsequent turns
+ # (#9306 observed 16% content-size reduction on a real MiniMax
+ # session), and pollute generated session titles. One strip at the
+ # storage boundary cleans content for every downstream consumer:
+ # API replay, session transcript, gateway delivery, CLI display,
+ # compression, title generation.
+ if isinstance(_san_content, str) and _san_content:
+ _san_content = self._strip_think_blocks(_san_content).strip()
+
msg = {
"role": "assistant",
"content": _san_content,
@@ -7653,6 +7915,22 @@ class AIAgent:
def _run_tool(index, tool_call, function_name, function_args):
"""Worker function executed in a thread."""
+ # Register this worker tid so the agent can fan out an interrupt
+ # to it — see AIAgent.interrupt(). Must happen first thing, and
+ # must be paired with discard + clear in the finally block.
+ _worker_tid = threading.current_thread().ident
+ with self._tool_worker_threads_lock:
+ self._tool_worker_threads.add(_worker_tid)
+ # Race: if the agent was interrupted between fan-out (which
+ # snapshotted an empty/earlier set) and our registration, apply
+ # the interrupt to our own tid now so is_interrupted() inside
+ # the tool returns True on the next poll.
+ if self._interrupt_requested:
+ try:
+ from tools.interrupt import set_interrupt as _sif
+ _sif(True, _worker_tid)
+ except Exception:
+ pass
# Set the activity callback on THIS worker thread so
# _wait_for_process (terminal commands) can fire heartbeats.
# The callback is thread-local; the main thread's callback
@@ -7675,6 +7953,16 @@ class AIAgent:
else:
logger.info("tool %s completed (%.2fs, %d chars)", function_name, duration, len(result))
results[index] = (function_name, function_args, result, duration, is_error)
+ # Tear down worker-tid tracking. Clear any interrupt bit we may
+ # have set so the next task scheduled onto this recycled tid
+ # starts with a clean slate.
+ with self._tool_worker_threads_lock:
+ self._tool_worker_threads.discard(_worker_tid)
+ try:
+ from tools.interrupt import set_interrupt as _sif
+ _sif(False, _worker_tid)
+ except Exception:
+ pass
# Start spinner for CLI mode (skip when TUI handles tool progress)
spinner = None
@@ -7819,6 +8107,13 @@ class AIAgent:
turn_tool_msgs = messages[-num_tools:]
enforce_turn_budget(turn_tool_msgs, env=get_active_env(effective_task_id))
+ # ── /steer injection ──────────────────────────────────────────────
+ # Append any pending user steer text to the last tool result so the
+ # agent sees it on its next iteration. Runs AFTER budget enforcement
+ # so the steer marker is never truncated. See steer() for details.
+ if num_tools > 0:
+ self._apply_pending_steer_to_tool_results(messages, num_tools)
+
def _execute_tool_calls_sequential(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
"""Execute tool calls sequentially (original behavior). Used for single calls or interactive tools."""
for i, tool_call in enumerate(assistant_message.tool_calls, 1):
@@ -8030,7 +8325,7 @@ class AIAgent:
elif self._context_engine_tool_names and function_name in self._context_engine_tool_names:
# Context engine tools (lcm_grep, lcm_describe, lcm_expand, etc.)
spinner = None
- if self.quiet_mode and not self.tool_progress_callback:
+ if self._should_emit_quiet_tool_messages():
face = random.choice(KawaiiSpinner.get_waiting_faces())
emoji = _get_tool_emoji(function_name)
preview = _build_tool_preview(function_name, function_args) or function_name
@@ -8048,7 +8343,7 @@ class AIAgent:
cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_ce_result)
if spinner:
spinner.stop(cute_msg)
- elif self.quiet_mode:
+ elif self._should_emit_quiet_tool_messages():
self._vprint(f" {cute_msg}")
elif self._memory_manager and self._memory_manager.has_tool(function_name):
# Memory provider tools (hindsight_retain, honcho_search, etc.)
@@ -8198,6 +8493,12 @@ class AIAgent:
if num_tools_seq > 0:
enforce_turn_budget(messages[-num_tools_seq:], env=get_active_env(effective_task_id))
+ # ── /steer injection ──────────────────────────────────────────────
+ # See _execute_tool_calls_parallel for the rationale. Same hook,
+ # applied to sequential execution as well.
+ if num_tools_seq > 0:
+ self._apply_pending_steer_to_tool_results(messages, num_tools_seq)
+
def _handle_max_iterations(self, messages: list, api_call_count: int) -> str:
@@ -9874,7 +10175,7 @@ class AIAgent:
_dhh = _dhh_fn()
print(f"{self.log_prefix} • Check ANTHROPIC_TOKEN in {_dhh}/.env for Hermes-managed OAuth/setup tokens")
print(f"{self.log_prefix} • Check ANTHROPIC_API_KEY in {_dhh}/.env for API keys or legacy token values")
- print(f"{self.log_prefix} • For API keys: verify at https://console.anthropic.com/settings/keys")
+ print(f"{self.log_prefix} • For API keys: verify at https://platform.claude.com/settings/keys")
print(f"{self.log_prefix} • For Claude Code: run 'claude /login' to refresh, then retry")
print(f"{self.log_prefix} • Legacy cleanup: hermes config set ANTHROPIC_TOKEN \"\"")
print(f"{self.log_prefix} • Clear stale keys: hermes config set ANTHROPIC_API_KEY \"\"")
@@ -10518,6 +10819,12 @@ class AIAgent:
continue
if restart_with_length_continuation:
+ # Progressively boost the output token budget on each retry.
+ # Retry 1 → 2× base, retry 2 → 3× base, capped at 32 768.
+ # Applies to all providers via _ephemeral_max_output_tokens.
+ _boost_base = self.max_tokens if self.max_tokens else 4096
+ _boost = _boost_base * (length_continue_retries + 1)
+ self._ephemeral_max_output_tokens = min(_boost, 32768)
continue
# Guard: if all retries exhausted without a successful response
@@ -10880,17 +11187,10 @@ class AIAgent:
self._last_content_tools_all_housekeeping = _all_housekeeping
if _all_housekeeping and self._has_stream_consumers():
self._mute_post_response = True
- elif self.quiet_mode:
+ elif self._should_emit_quiet_tool_messages():
clean = self._strip_think_blocks(turn_content).strip()
if clean:
- relayed = False
- if (
- self.tool_progress_callback
- and getattr(self, "platform", "") == "tui"
- ):
- relayed = True
- if not relayed:
- self._vprint(f" ┊ 💬 {clean}")
+ self._vprint(f" ┊ 💬 {clean}")
# Pop thinking-only prefill message(s) before appending
# (tool-call path — same rationale as the final-response path).
@@ -11478,6 +11778,12 @@ class AIAgent:
"cost_status": self.session_cost_status,
"cost_source": self.session_cost_source,
}
+ # If a /steer landed after the final assistant turn (no more tool
+ # batches to drain into), hand it back to the caller so it can be
+ # delivered as the next user turn instead of being silently lost.
+ _leftover_steer = self._drain_pending_steer()
+ if _leftover_steer:
+ result["pending_steer"] = _leftover_steer
self._response_was_previewed = False
# Include interrupt message if one triggered the interrupt
diff --git a/scripts/release.py b/scripts/release.py
index e8039047ceb..a20c3c134fa 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -48,6 +48,7 @@ AUTHOR_MAP = {
"35742124+0xbyt4@users.noreply.github.com": "0xbyt4",
"82637225+kshitijk4poor@users.noreply.github.com": "kshitijk4poor",
"kshitijk4poor@users.noreply.github.com": "kshitijk4poor",
+ "kshitijk4poor@gmail.com": "kshitijk4poor",
"16443023+stablegenius49@users.noreply.github.com": "stablegenius49",
"185121704+stablegenius49@users.noreply.github.com": "stablegenius49",
"101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit",
@@ -74,6 +75,10 @@ AUTHOR_MAP = {
"109555139+davetist@users.noreply.github.com": "davetist",
"39405770+yyq4193@users.noreply.github.com": "yyq4193",
"Asunfly@users.noreply.github.com": "Asunfly",
+ "2500400+honghua@users.noreply.github.com": "honghua",
+ "nish3451@users.noreply.github.com": "nish3451",
+ "Mibayy@users.noreply.github.com": "Mibayy",
+ "135070653+sgaofen@users.noreply.github.com": "sgaofen",
# contributors (manual mapping from git names)
"ahmedsherif95@gmail.com": "asheriif",
"liujinkun@bytedance.com": "liujinkun2025",
@@ -207,10 +212,12 @@ AUTHOR_MAP = {
"cola-runner@users.noreply.github.com": "cola-runner",
"ygd58@users.noreply.github.com": "ygd58",
"vominh1919@users.noreply.github.com": "vominh1919",
+ "iamagenius00@users.noreply.github.com": "iamagenius00",
"trevmanthony@gmail.com": "trevthefoolish",
"ziliangpeng@users.noreply.github.com": "ziliangpeng",
"centripetal-star@users.noreply.github.com": "centripetal-star",
"LeonSGP43@users.noreply.github.com": "LeonSGP43",
+ "154585401+LeonSGP43@users.noreply.github.com": "LeonSGP43",
"Lubrsy706@users.noreply.github.com": "Lubrsy706",
"niyant@spicefi.xyz": "spniyant",
"olafthiele@gmail.com": "olafthiele",
@@ -262,6 +269,10 @@ AUTHOR_MAP = {
"xiayh17@gmail.com": "xiayh0107",
"asurla@nvidia.com": "anniesurla",
"limkuan24@gmail.com": "WideLee",
+ "aviralarora002@gmail.com": "AviArora02-commits",
+ "junminliu@gmail.com": "JimLiu",
+ "jarvischer@gmail.com": "maxchernin",
+ "levantam.98.2324@gmail.com": "LVT382009",
}
diff --git a/scripts/whatsapp-bridge/bridge.js b/scripts/whatsapp-bridge/bridge.js
index 70cf8e95d9f..401651c8a83 100644
--- a/scripts/whatsapp-bridge/bridge.js
+++ b/scripts/whatsapp-bridge/bridge.js
@@ -229,6 +229,14 @@ async function startSocket() {
// Check allowlist for messages from others (resolve LID ↔ phone aliases)
if (!msg.key.fromMe && !matchesAllowedUser(senderId, ALLOWED_USERS, SESSION_DIR)) {
+ try {
+ console.log(JSON.stringify({
+ event: 'ignored',
+ reason: 'allowlist_mismatch',
+ chatId,
+ senderId,
+ }));
+ } catch {}
continue;
}
diff --git a/skills/creative/baoyu-infographic/PORT_NOTES.md b/skills/creative/baoyu-infographic/PORT_NOTES.md
new file mode 100644
index 00000000000..0a2d86d89ca
--- /dev/null
+++ b/skills/creative/baoyu-infographic/PORT_NOTES.md
@@ -0,0 +1,43 @@
+# Port Notes — baoyu-infographic
+
+Ported from [JimLiu/baoyu-skills](https://github.com/JimLiu/baoyu-skills) v1.56.1.
+
+## Changes from upstream
+
+Only `SKILL.md` was modified. All 45 reference files are verbatim copies.
+
+### SKILL.md adaptations
+
+| Change | Upstream | Hermes |
+|--------|----------|--------|
+| Metadata namespace | `openclaw` | `hermes` |
+| Trigger | `/baoyu-infographic` slash command | Natural language skill matching |
+| User config | EXTEND.md file (project/user/XDG paths) | Removed — not part of Hermes infra |
+| User prompts | `AskUserQuestion` (batched) | `clarify` tool (one at a time) |
+| Image generation | baoyu-imagine (Bun/TypeScript) | `image_generate` tool |
+| Platform support | Linux/macOS/Windows/WSL/PowerShell | Linux/macOS only |
+| File operations | Bash commands | Hermes file tools (write_file, read_file) |
+
+### What was preserved
+
+- All layout definitions (21 files)
+- All style definitions (21 files)
+- Core reference files (analysis-framework, base-prompt, structured-content-template)
+- Recommended combinations table
+- Keyword shortcuts table
+- Core principles and workflow structure
+- Author, version, homepage attribution
+
+## Syncing with upstream
+
+To pull upstream updates:
+```bash
+# Compare versions
+curl -sL https://raw.githubusercontent.com/JimLiu/baoyu-skills/main/skills/baoyu-infographic/SKILL.md | head -5
+# Look for version: line
+
+# Diff reference files
+diff <(curl -sL https://raw.githubusercontent.com/.../references/layouts/bento-grid.md) references/layouts/bento-grid.md
+```
+
+Reference files can be overwritten directly (they're unchanged from upstream). SKILL.md must be manually merged since it contains Hermes-specific adaptations.
diff --git a/skills/creative/baoyu-infographic/SKILL.md b/skills/creative/baoyu-infographic/SKILL.md
new file mode 100644
index 00000000000..fea3499cbf4
--- /dev/null
+++ b/skills/creative/baoyu-infographic/SKILL.md
@@ -0,0 +1,236 @@
+---
+name: baoyu-infographic
+description: Generate professional infographics with 21 layout types and 21 visual styles. Analyzes content, recommends layout×style combinations, and generates publication-ready infographics. Use when user asks to create "infographic", "visual summary", "信息图", "可视化", or "高密度信息大图".
+version: 1.56.1
+author: 宝玉 (JimLiu)
+license: MIT
+metadata:
+ hermes:
+ tags: [infographic, visual-summary, creative, image-generation]
+ homepage: https://github.com/JimLiu/baoyu-skills#baoyu-infographic
+---
+
+# Infographic Generator
+
+Adapted from [baoyu-infographic](https://github.com/JimLiu/baoyu-skills) for Hermes Agent's tool ecosystem.
+
+Two dimensions: **layout** (information structure) × **style** (visual aesthetics). Freely combine any layout with any style.
+
+## When to Use
+
+Trigger this skill when the user asks to create an infographic, visual summary, information graphic, or uses terms like "信息图", "可视化", or "高密度信息大图". The user provides content (text, file path, URL, or topic) and optionally specifies layout, style, aspect ratio, or language.
+
+## Options
+
+| Option | Values |
+|--------|--------|
+| Layout | 21 options (see Layout Gallery), default: bento-grid |
+| Style | 21 options (see Style Gallery), default: craft-handmade |
+| Aspect | Named: landscape (16:9), portrait (9:16), square (1:1). Custom: any W:H ratio (e.g., 3:4, 4:3, 2.35:1) |
+| Language | en, zh, ja, etc. |
+
+## Layout Gallery
+
+| Layout | Best For |
+|--------|----------|
+| `linear-progression` | Timelines, processes, tutorials |
+| `binary-comparison` | A vs B, before-after, pros-cons |
+| `comparison-matrix` | Multi-factor comparisons |
+| `hierarchical-layers` | Pyramids, priority levels |
+| `tree-branching` | Categories, taxonomies |
+| `hub-spoke` | Central concept with related items |
+| `structural-breakdown` | Exploded views, cross-sections |
+| `bento-grid` | Multiple topics, overview (default) |
+| `iceberg` | Surface vs hidden aspects |
+| `bridge` | Problem-solution |
+| `funnel` | Conversion, filtering |
+| `isometric-map` | Spatial relationships |
+| `dashboard` | Metrics, KPIs |
+| `periodic-table` | Categorized collections |
+| `comic-strip` | Narratives, sequences |
+| `story-mountain` | Plot structure, tension arcs |
+| `jigsaw` | Interconnected parts |
+| `venn-diagram` | Overlapping concepts |
+| `winding-roadmap` | Journey, milestones |
+| `circular-flow` | Cycles, recurring processes |
+| `dense-modules` | High-density modules, data-rich guides |
+
+Full definitions: `references/layouts/.md`
+
+## Style Gallery
+
+| Style | Description |
+|-------|-------------|
+| `craft-handmade` | Hand-drawn, paper craft (default) |
+| `claymation` | 3D clay figures, stop-motion |
+| `kawaii` | Japanese cute, pastels |
+| `storybook-watercolor` | Soft painted, whimsical |
+| `chalkboard` | Chalk on black board |
+| `cyberpunk-neon` | Neon glow, futuristic |
+| `bold-graphic` | Comic style, halftone |
+| `aged-academia` | Vintage science, sepia |
+| `corporate-memphis` | Flat vector, vibrant |
+| `technical-schematic` | Blueprint, engineering |
+| `origami` | Folded paper, geometric |
+| `pixel-art` | Retro 8-bit |
+| `ui-wireframe` | Grayscale interface mockup |
+| `subway-map` | Transit diagram |
+| `ikea-manual` | Minimal line art |
+| `knolling` | Organized flat-lay |
+| `lego-brick` | Toy brick construction |
+| `pop-laboratory` | Blueprint grid, coordinate markers, lab precision |
+| `morandi-journal` | Hand-drawn doodle, warm Morandi tones |
+| `retro-pop-grid` | 1970s retro pop art, Swiss grid, thick outlines |
+| `hand-drawn-edu` | Macaron pastels, hand-drawn wobble, stick figures |
+
+Full definitions: `references/styles/