Merge branch 'main' of github.com:NousResearch/hermes-agent into bb/gui

# Conflicts:
#	cli.py
#	hermes_cli/main.py
#	run_agent.py
#	tests/hermes_cli/test_cmd_update.py
#	tools/mcp_tool.py
#	web/src/lib/gatewayClient.ts
This commit is contained in:
Brooklyn Nicholson 2026-05-18 01:26:56 -05:00
commit 02aaac8f73
260 changed files with 24547 additions and 13573 deletions

View file

@ -66,9 +66,11 @@ RUN npm install --prefer-offline --no-audit && \
# frontend stats the readme path during dep resolution, so we `touch` an
# empty placeholder — the real README is restored by `COPY . .` below.
#
# `uv sync --frozen --no-install-project --extra all` installs only the
# deps reachable through the composite `[all]` extra (handpicked set
# intended for the production image). We do NOT use `--all-extras`:
# `uv sync --frozen --no-install-project --extra all --extra messaging`
# installs the deps reachable through the composite `[all]` extra
# (handpicked set intended for the production image), plus gateway
# messaging adapters that should work in the published image without a
# first-boot lazy install. We do NOT use `--all-extras`:
# that would pull in `[rl]` (atroposlib + tinker + torch + wandb from
# git), `[yc-bench]` (another git dep), and `[termux-all]` (Android
# redundancy), none of which belong in the published container.
@ -76,7 +78,7 @@ RUN npm install --prefer-offline --no-audit && \
# The editable link is created after the source copy below.
COPY pyproject.toml uv.lock ./
RUN touch ./README.md
RUN uv sync --frozen --no-install-project --extra all
RUN uv sync --frozen --no-install-project --extra all --extra messaging
# ---------- Source code ----------
# .dockerignore excludes node_modules, so the installs above survive.
@ -94,10 +96,10 @@ RUN cd web && npm run build && \
# hermes_cli/main.py succeeds (see #18800). /opt/hermes/web is build-time
# only (HERMES_WEB_DIST points at hermes_cli/web_dist) and is intentionally
# not chowned here.
# The .venv MUST be hermes-writable so lazy_deps.py can install platform
# packages (discord.py, telegram, slack, etc.) at first gateway boot.
# Without this, `uv pip install` fails with EACCES and all messaging
# adapters silently fail to load. See tools/lazy_deps.py.
# The .venv MUST remain hermes-writable so lazy_deps.py can install
# remaining optional platform packages and future pin bumps at first use.
# Without this, `uv pip install` fails with EACCES and adapters silently
# fail to load. See tools/lazy_deps.py.
USER root
RUN chmod -R a+rX /opt/hermes && \
chown -R hermes:hermes /opt/hermes/.venv /opt/hermes/ui-tui /opt/hermes/node_modules

View file

@ -1123,7 +1123,6 @@ def build_tool_start(
)
# Generic fallback
import json
try:
args_text = json.dumps(arguments, indent=2, default=str)
except (TypeError, ValueError):

View file

@ -1,7 +1,7 @@
{
"id": "hermes-agent",
"name": "Hermes Agent",
"version": "0.13.0",
"version": "0.14.0",
"description": "Self-improving open-source AI agent by Nous Research with ACP editor integration, persistent memory, skills, and rich tool support.",
"repository": "https://github.com/NousResearch/hermes-agent",
"website": "https://hermes-agent.nousresearch.com/docs/user-guide/features/acp",
@ -9,7 +9,7 @@
"license": "MIT",
"distribution": {
"uvx": {
"package": "hermes-agent[acp]==0.13.0",
"package": "hermes-agent[acp]==0.14.0",
"args": ["hermes-acp"]
}
}

1469
agent/agent_init.py Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -755,7 +755,8 @@ class _CodexCompletionsAdapter:
def _check_cancelled() -> None:
if deadline is not None and time.monotonic() >= deadline:
timed_out.set()
if not timed_out.is_set():
_close_client_on_timeout()
raise TimeoutError(_timeout_message())
try:
from tools.interrupt import is_interrupted
@ -1233,7 +1234,7 @@ def _read_nous_auth() -> Optional[dict]:
def _nous_api_key(provider: dict) -> str:
"""Extract the best API key from a Nous provider state dict."""
"""Extract the Nous runtime credential from the compatibility field."""
return provider.get("agent_key") or provider.get("access_token", "")
@ -1246,17 +1247,25 @@ def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[
"""Return fresh Nous runtime credentials when available.
This mirrors the main agent's 401 recovery path and keeps auxiliary
clients aligned with the singleton auth store + mint flow instead of
clients aligned with the singleton auth store + JWT/mint flow instead of
relying only on whatever raw tokens happen to be sitting in auth.json
or the credential pool.
"""
try:
from hermes_cli.auth import resolve_nous_runtime_credentials
from hermes_cli.auth import (
NOUS_INFERENCE_AUTH_MODE_AUTO,
NOUS_INFERENCE_AUTH_MODE_LEGACY,
resolve_nous_runtime_credentials,
)
creds = resolve_nous_runtime_credentials(
min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
force_mint=force_refresh,
inference_auth_mode=(
NOUS_INFERENCE_AUTH_MODE_LEGACY
if force_refresh
else NOUS_INFERENCE_AUTH_MODE_AUTO
),
)
except Exception as exc:
logger.debug("Auxiliary Nous runtime credential resolution failed: %s", exc)
@ -1473,7 +1482,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Optional[str]]:
def _try_openrouter(explicit_api_key: str = None, model: str = None) -> Tuple[Optional[OpenAI], Optional[str]]:
pool_present, entry = _select_pool_entry("openrouter")
if pool_present:
or_key = explicit_api_key or _pool_runtime_api_key(entry)
@ -1483,7 +1492,7 @@ def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Opt
base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
logger.debug("Auxiliary client: OpenRouter via pool")
return OpenAI(api_key=or_key, base_url=base_url,
default_headers=build_or_headers()), _OPENROUTER_MODEL
default_headers=build_or_headers()), model or _OPENROUTER_MODEL
or_key = explicit_api_key or os.getenv("OPENROUTER_API_KEY")
if not or_key:
@ -1491,7 +1500,7 @@ def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Opt
return None, None
logger.debug("Auxiliary client: OpenRouter")
return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
default_headers=build_or_headers()), _OPENROUTER_MODEL
default_headers=build_or_headers()), model or _OPENROUTER_MODEL
def _describe_openrouter_unavailable() -> str:
@ -2087,7 +2096,13 @@ def _is_payment_error(exc: Exception) -> bool:
"""Detect payment/credit/quota exhaustion errors.
Returns True for HTTP 402 (Payment Required) and for 429/other errors
whose message indicates billing exhaustion rather than rate limiting.
whose message indicates billing exhaustion or daily quota exhaustion
rather than transient rate limiting.
Daily token quota errors (e.g. Bedrock "Too many tokens per day",
Vertex AI "quota exceeded") are functionally equivalent to credit
exhaustion the provider cannot serve the request until the quota
resets and should trigger the same provider-fallback logic.
"""
status = getattr(exc, "status_code", None)
if status == 402:
@ -2095,10 +2110,19 @@ def _is_payment_error(exc: Exception) -> bool:
err_lower = str(exc).lower()
# OpenRouter and other providers include "credits" or "afford" in 402 bodies,
# but sometimes wrap them in 429 or other codes.
# Daily quota exhaustion from Bedrock, Vertex AI, and similar providers
# uses different language but is semantically identical to credit exhaustion.
if status in {402, 429, None}:
if any(kw in err_lower for kw in ("credits", "insufficient funds",
"can only afford", "billing",
"payment required")):
if any(kw in err_lower for kw in (
"credits", "insufficient funds",
"can only afford", "billing",
"payment required",
# Daily / monthly quota exhaustion keywords
"quota exceeded", "quota_exceeded",
"too many tokens per day", "daily limit",
"tokens per day", "daily quota",
"resource exhausted", # Vertex AI / gRPC quota errors
)):
return True
return False
@ -2500,12 +2524,15 @@ def _refresh_provider_credentials(provider: str) -> bool:
_evict_cached_clients(normalized)
return True
if normalized == "nous":
from hermes_cli.auth import resolve_nous_runtime_credentials
from hermes_cli.auth import (
NOUS_INFERENCE_AUTH_MODE_LEGACY,
resolve_nous_runtime_credentials,
)
creds = resolve_nous_runtime_credentials(
min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
force_mint=True,
inference_auth_mode=NOUS_INFERENCE_AUTH_MODE_LEGACY,
)
if not str(creds.get("api_key", "") or "").strip():
return False
@ -2579,6 +2606,133 @@ def _try_payment_fallback(
return None, None, ""
def _try_main_agent_model_fallback(
failed_provider: str,
task: str = None,
reason: str = "error",
) -> Tuple[Optional[Any], Optional[str], str]:
"""Last-resort fallback to the user's main agent provider + model.
Used after the configured fallback_chain is exhausted (or empty) for
users with an explicit auxiliary provider. This is the "safety net"
layer: if nothing the user asked for can serve the request, try the
main chat model before giving up.
Skips when the failed provider already IS the main provider (no point
retrying the same backend that just failed).
Returns:
(client, model, provider_label) or (None, None, "") if no fallback.
"""
main_provider = (_read_main_provider() or "").strip()
main_model = (_read_main_model() or "").strip()
if not main_provider or not main_model or main_provider.lower() in {"auto", ""}:
return None, None, ""
skip = (failed_provider or "").lower().strip()
if main_provider.lower() == skip:
# The thing that failed IS the main model — nothing to fall back to.
return None, None, ""
if _is_provider_unhealthy(main_provider):
_log_skip_unhealthy(main_provider, task)
return None, None, ""
try:
client, resolved_model = resolve_provider_client(
provider=main_provider, model=main_model,
)
except Exception:
client, resolved_model = None, None
if client is None:
return None, None, ""
label = f"main-agent({main_provider})"
logger.info(
"Auxiliary %s: %s on %s — falling back to main agent model %s (%s)",
task or "call", reason, failed_provider, label, resolved_model or main_model,
)
return client, resolved_model or main_model, label
def _try_configured_fallback_chain(
task: str,
failed_provider: str,
reason: str = "error",
) -> Tuple[Optional[Any], Optional[str], str]:
"""Try user-configured fallback_chain for a specific auxiliary task.
Reads auxiliary.<task>.fallback_chain from config.yaml and tries each
entry in order. Each entry must have at least ``provider``; ``model``,
``base_url``, and ``api_key`` are optional.
Returns:
(client, model, provider_label) or (None, None, "") if no fallback.
"""
if not task:
return None, None, ""
task_config = _get_auxiliary_task_config(task)
chain = task_config.get("fallback_chain")
if not chain or not isinstance(chain, list):
return None, None, ""
skip = failed_provider.lower().strip()
tried = []
for i, entry in enumerate(chain):
if not isinstance(entry, dict):
continue
fb_provider = str(entry.get("provider", "")).strip()
if not fb_provider or fb_provider.lower() == skip:
continue
fb_model = str(entry.get("model", "")).strip() or None
fb_base_url = str(entry.get("base_url", "")).strip() or None
fb_api_key = str(entry.get("api_key", "")).strip() or None
label = f"fallback_chain[{i}]({fb_provider})"
try:
fb_client = _resolve_single_provider(
fb_provider, fb_model, fb_base_url, fb_api_key)
except Exception:
fb_client = None
if fb_client is not None:
logger.info(
"Auxiliary %s: %s on %s — configured fallback to %s (%s)",
task, reason, failed_provider, label, fb_model or "default",
)
return fb_client, fb_model, label
tried.append(label)
if tried:
logger.debug(
"Auxiliary %s: configured fallback_chain exhausted (tried: %s)",
task, ", ".join(tried),
)
return None, None, ""
def _resolve_single_provider(
provider: str,
model: Optional[str] = None,
base_url: Optional[str] = None,
api_key: Optional[str] = None,
) -> Optional[Any]:
"""Resolve a single provider entry from fallback_chain to an OpenAI client.
Uses the existing provider resolution infrastructure where possible.
"""
# Reuse resolve_provider_client which handles provider→client mapping
client, resolved_model = resolve_provider_client(
provider=provider,
model=model,
base_url=base_url,
api_key=api_key,
)
return client
def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Optional[OpenAI], Optional[str]]:
"""Full auto-detection chain.
@ -3049,10 +3203,17 @@ def resolve_provider_client(
if custom_entry:
custom_base = custom_entry.get("base_url", "").strip()
custom_key = custom_entry.get("api_key", "").strip()
custom_key_env = custom_entry.get("key_env", "").strip()
custom_key_env = (custom_entry.get("key_env") or custom_entry.get("api_key_env") or "").strip()
if not custom_key and custom_key_env:
custom_key = os.getenv(custom_key_env, "").strip()
custom_key = custom_key or "no-key-required"
if custom_key == "no-key-required":
logger.warning(
"resolve_provider_client: named custom provider %r has no resolvable "
"api_key — request will be sent with placeholder no-key-required "
"and will 401 on auth-required endpoints",
custom_entry.get("name") or provider,
)
# An explicit per-task api_mode override (from _resolve_task_provider_model)
# wins; otherwise fall back to what the provider entry declared.
entry_api_mode = (api_mode or custom_entry.get("api_mode") or "").strip()
@ -3400,7 +3561,7 @@ def _resolve_strict_vision_backend(
if provider == "copilot":
return resolve_provider_client("copilot", model, is_vision=True)
if provider == "openrouter":
return _try_openrouter()
return _try_openrouter(model=model)
if provider == "nous":
return _try_nous(vision=True)
if provider == "openai-codex":
@ -4519,11 +4680,17 @@ def call_llm(
or _is_connection_error(first_err)
or _is_rate_limit_error(first_err)
)
# Only try alternative providers when the user didn't explicitly
# configure this task's provider. Explicit provider = hard constraint;
# auto (the default) = best-effort fallback chain. (#7559)
# Respect explicit provider choice for transient errors (auth, request
# validation, etc.) but allow fallback when the provider clearly cannot
# serve the request due to capacity: payment/quota exhaustion and
# connection failures are capacity problems, not request constraints.
# See #26803: daily token quota (429 + "too many tokens per day") must
# fall back just like a 402 credit error.
is_auto = resolved_provider in {"auto", "", None}
if should_fallback and is_auto:
# Capacity errors bypass the explicit-provider gate: the provider
# literally cannot serve this request regardless of user intent.
is_capacity_error = _is_payment_error(first_err) or _is_connection_error(first_err)
if should_fallback and (is_auto or is_capacity_error):
if _is_payment_error(first_err):
reason = "payment error"
# Resolve the actual provider label (resolved_provider may be
@ -4539,8 +4706,24 @@ def call_llm(
reason = "connection error"
logger.info("Auxiliary %s: %s on %s (%s), trying fallback",
task or "call", reason, resolved_provider, first_err)
fb_client, fb_model, fb_label = _try_payment_fallback(
resolved_provider, task, reason=reason)
# Fallback order (#26882, #26803):
# 1. User-configured fallback_chain (per-task) if set
# 2. Main agent model (last-resort safety net)
# For auto users (no explicit aux provider), use the full
# auto-detection chain instead — its Step 1 IS the main agent
# model, so users on `auto` already get main-model fallback.
fb_client, fb_model, fb_label = (None, None, "")
if is_auto:
fb_client, fb_model, fb_label = _try_payment_fallback(
resolved_provider, task, reason=reason)
else:
fb_client, fb_model, fb_label = _try_configured_fallback_chain(
task, resolved_provider or "auto", reason=reason)
if fb_client is None:
fb_client, fb_model, fb_label = _try_main_agent_model_fallback(
resolved_provider, task, reason=reason)
if fb_client is not None:
fb_kwargs = _build_call_kwargs(
fb_label, fb_model, messages,
@ -4550,6 +4733,14 @@ def call_llm(
base_url=str(getattr(fb_client, "base_url", "") or ""))
return _validate_llm_response(
fb_client.chat.completions.create(**fb_kwargs), task)
# All fallback layers exhausted — emit a single user-visible
# warning so the operator knows aux task is about to fail.
# (#26882) The error itself is re-raised below.
logger.warning(
"Auxiliary %s: %s on %s and all fallbacks exhausted "
"(fallback_chain + main agent model). Raising original error.",
task or "call", reason, resolved_provider,
)
# Connection/timeout errors leave the cached client poisoned (closed
# httpx transport, half-read stream, dead async loop). Drop it from
# the cache regardless of whether we found a fallback above so the
@ -4851,8 +5042,12 @@ async def async_call_llm(
or _is_connection_error(first_err)
or _is_rate_limit_error(first_err)
)
# Capacity errors (payment/quota/connection) bypass the explicit-provider
# gate — the provider cannot serve the request regardless of user intent.
# See #26803: daily token quota must fall back like a 402 credit error.
is_auto = resolved_provider in {"auto", "", None}
if should_fallback and is_auto:
is_capacity_error = _is_payment_error(first_err) or _is_connection_error(first_err)
if should_fallback and (is_auto or is_capacity_error):
if _is_payment_error(first_err):
reason = "payment error"
_mark_provider_unhealthy(
@ -4864,8 +5059,23 @@ async def async_call_llm(
reason = "connection error"
logger.info("Auxiliary %s (async): %s on %s (%s), trying fallback",
task or "call", reason, resolved_provider, first_err)
fb_client, fb_model, fb_label = _try_payment_fallback(
resolved_provider, task, reason=reason)
# Fallback order (#26882, #26803):
# 1. User-configured fallback_chain (per-task) if set
# 2. Main agent model (last-resort safety net)
# Auto users get the full auto-detection chain instead — its
# Step 1 IS the main agent model.
fb_client, fb_model, fb_label = (None, None, "")
if is_auto:
fb_client, fb_model, fb_label = _try_payment_fallback(
resolved_provider, task, reason=reason)
else:
fb_client, fb_model, fb_label = _try_configured_fallback_chain(
task, resolved_provider or "auto", reason=reason)
if fb_client is None:
fb_client, fb_model, fb_label = _try_main_agent_model_fallback(
resolved_provider, task, reason=reason)
if fb_client is not None:
fb_kwargs = _build_call_kwargs(
fb_label, fb_model, messages,
@ -4881,6 +5091,12 @@ async def async_call_llm(
fb_kwargs["model"] = async_fb_model
return _validate_llm_response(
await async_fb.chat.completions.create(**fb_kwargs), task)
# All fallback layers exhausted — warn before re-raising. (#26882)
logger.warning(
"Auxiliary %s (async): %s on %s and all fallbacks exhausted "
"(fallback_chain + main agent model). Raising original error.",
task or "call", reason, resolved_provider,
)
# Mirror the sync path: drop poisoned clients on connection/timeout
# so the next aux call rebuilds. See issue #23432.
if _is_connection_error(first_err):

570
agent/background_review.py Normal file
View file

@ -0,0 +1,570 @@
"""Background memory/skill review — fork the agent to evaluate the turn.
After every turn, ``AIAgent.run_conversation`` may call
:func:`spawn_background_review` to fire off a daemon thread that replays
the conversation snapshot in a forked :class:`AIAgent` and asks itself
"should any skill/memory be saved or updated?". Writes go straight to
the memory + skill stores. Main conversation and prompt cache are never
touched.
The fork inherits the parent's live runtime (provider, model, base_url,
credentials, cached system prompt) so it hits the same prefix cache and
uses the same auth. It runs with a tool whitelist limited to memory and
skill management tools; everything else is denied at runtime.
See the ``hermes-agent-dev`` skill (``references/self-improvement-loop.md``)
for invariants and PR review criteria.
"""
from __future__ import annotations
import contextlib
import json
import logging
import os
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
# Review-prompt strings — used by ``spawn_background_review_thread`` to build
# the user-message that the forked review agent receives. AIAgent exposes
# them as class attributes (``_MEMORY_REVIEW_PROMPT`` etc.) for back-compat;
# the actual text lives here so future edits are one-place.
_MEMORY_REVIEW_PROMPT = (
"Review the conversation above and consider saving to memory if appropriate.\n\n"
"Focus on:\n"
"1. Has the user revealed things about themselves — their persona, desires, "
"preferences, or personal details worth remembering?\n"
"2. Has the user expressed expectations about how you should behave, their work "
"style, or ways they want you to operate?\n\n"
"If something stands out, save it using the memory tool. "
"If nothing is worth saving, just say 'Nothing to save.' and stop."
)
_SKILL_REVIEW_PROMPT = (
"Review the conversation above and update the skill library. Be "
"ACTIVE — most sessions produce at least one skill update, even if "
"small. A pass that does nothing is a missed learning opportunity, "
"not a neutral outcome.\n\n"
"Target shape of the library: CLASS-LEVEL skills, each with a rich "
"SKILL.md and a `references/` directory for session-specific detail. "
"Not a long flat list of narrow one-session-one-skill entries. This "
"shapes HOW you update, not WHETHER you update.\n\n"
"Signals to look for (any one of these warrants action):\n"
" • User corrected your style, tone, format, legibility, or "
"verbosity. Frustration signals like 'stop doing X', 'this is too "
"verbose', 'don't format like this', 'why are you explaining', "
"'just give me the answer', 'you always do Y and I hate it', or an "
"explicit 'remember this' are FIRST-CLASS skill signals, not just "
"memory signals. Update the relevant skill(s) to embed the "
"preference so the next session starts already knowing.\n"
" • User corrected your workflow, approach, or sequence of steps. "
"Encode the correction as a pitfall or explicit step in the skill "
"that governs that class of task.\n"
" • Non-trivial technique, fix, workaround, debugging path, or "
"tool-usage pattern emerged that a future session would benefit "
"from. Capture it.\n"
" • A skill that got loaded or consulted this session turned out "
"to be wrong, missing a step, or outdated. Patch it NOW.\n\n"
"Preference order — prefer the earliest action that fits, but do "
"pick one when a signal above fired:\n"
" 1. UPDATE A CURRENTLY-LOADED SKILL. Look back through the "
"conversation for skills the user loaded via /skill-name or you "
"read via skill_view. If any of them covers the territory of the "
"new learning, PATCH that one first. It is the skill that was in "
"play, so it's the right one to extend.\n"
" 2. UPDATE AN EXISTING UMBRELLA (via skills_list + skill_view). "
"If no loaded skill fits but an existing class-level skill does, "
"patch it. Add a subsection, a pitfall, or broaden a trigger.\n"
" 3. ADD A SUPPORT FILE under an existing umbrella. Skills can be "
"packaged with three kinds of support files — use the right "
"directory per kind:\n"
" • `references/<topic>.md` — session-specific detail (error "
"transcripts, reproduction recipes, provider quirks) AND "
"condensed knowledge banks: quoted research, API docs, external "
"authoritative excerpts, or domain notes you found while working "
"on the problem. Write it concise and for the value of the task, "
"not as a full mirror of upstream docs.\n"
" • `templates/<name>.<ext>` — starter files meant to be "
"copied and modified (boilerplate configs, scaffolding, a "
"known-good example the agent can `reproduce with modifications`).\n"
" • `scripts/<name>.<ext>` — statically re-runnable actions "
"the skill can invoke directly (verification scripts, fixture "
"generators, deterministic probes, anything the agent should run "
"rather than hand-type each time).\n"
" Add support files via skill_manage action=write_file with "
"file_path starting 'references/', 'templates/', or 'scripts/'. "
"The umbrella's SKILL.md should gain a one-line pointer to any "
"new support file so future agents know it exists.\n"
" 4. CREATE A NEW CLASS-LEVEL UMBRELLA SKILL when no existing "
"skill covers the class. The name MUST be at the class level. "
"The name MUST NOT be a specific PR number, error string, feature "
"codename, library-alone name, or 'fix-X / debug-Y / audit-Z-today' "
"session artifact. If the proposed name only makes sense for "
"today's task, it's wrong — fall back to (1), (2), or (3).\n\n"
"User-preference embedding (important): when the user expressed a "
"style/format/workflow preference, the update belongs in the "
"SKILL.md body, not just in memory. Memory captures 'who the user "
"is and what the current situation and state of your operations "
"are'; skills capture 'how to do this class of task for this "
"user'. When they complain about how you handled a task, the "
"skill that governs that task needs to carry the lesson.\n\n"
"If you notice two existing skills that overlap, note it in your "
"reply — the background curator handles consolidation at scale.\n\n"
"Do NOT capture (these become persistent self-imposed constraints "
"that bite you later when the environment changes):\n"
" • Environment-dependent failures: missing binaries, fresh-install "
"errors, post-migration path mismatches, 'command not found', "
"unconfigured credentials, uninstalled packages. The user can fix "
"these — they are not durable rules.\n"
" • Negative claims about tools or features ('browser tools do not "
"work', 'X tool is broken', 'cannot use Y from execute_code'). These "
"harden into refusals the agent cites against itself for months "
"after the actual problem was fixed.\n"
" • Session-specific transient errors that resolved before the "
"conversation ended. If retrying worked, the lesson is the retry "
"pattern, not the original failure.\n"
" • One-off task narratives. A user asking 'summarize today's "
"market' or 'analyze this PR' is not a class of work that warrants "
"a skill.\n\n"
"If a tool failed because of setup state, capture the FIX (install "
"command, config step, env var to set) under an existing setup or "
"troubleshooting skill — never 'this tool does not work' as a "
"standalone constraint.\n\n"
"'Nothing to save.' is a real option but should NOT be the "
"default. If the session ran smoothly with no corrections and "
"produced no new technique, just say 'Nothing to save.' and stop. "
"Otherwise, act."
)
_COMBINED_REVIEW_PROMPT = (
"Review the conversation above and update two things:\n\n"
"**Memory**: who the user is. Did the user reveal persona, "
"desires, preferences, personal details, or expectations about "
"how you should behave? Save facts about the user and durable "
"preferences with the memory tool.\n\n"
"**Skills**: how to do this class of task. Be ACTIVE — most "
"sessions produce at least one skill update. A pass that does "
"nothing is a missed learning opportunity, not a neutral outcome.\n\n"
"Target shape of the skill library: CLASS-LEVEL skills with a rich "
"SKILL.md and a `references/` directory for session-specific detail. "
"Not a long flat list of narrow one-session-one-skill entries.\n\n"
"Signals that warrant a skill update (any one is enough):\n"
" • User corrected your style, tone, format, legibility, "
"verbosity, or approach. Frustration is a FIRST-CLASS skill "
"signal, not just a memory signal. 'stop doing X', 'don't format "
"like this', 'I hate when you Y' — embed the lesson in the skill "
"that governs that task so the next session starts fixed.\n"
" • Non-trivial technique, fix, workaround, or debugging path "
"emerged.\n"
" • A skill that was loaded or consulted turned out wrong, "
"missing, or outdated — patch it now.\n\n"
"Preference order for skills — pick the earliest that fits:\n"
" 1. UPDATE A CURRENTLY-LOADED SKILL. Check what skills were "
"loaded via /skill-name or skill_view in the conversation. If one "
"of them covers the learning, PATCH it first. It was in play; "
"it's the right place.\n"
" 2. UPDATE AN EXISTING UMBRELLA (skills_list + skill_view to "
"find the right one). Patch it.\n"
" 3. ADD A SUPPORT FILE under an existing umbrella via "
"skill_manage action=write_file. Three kinds: "
"`references/<topic>.md` for session-specific detail OR condensed "
"knowledge banks (quoted research, API docs excerpts, domain "
"notes) written concise and task-focused; `templates/<name>.<ext>` "
"for starter files meant to be copied and modified; "
"`scripts/<name>.<ext>` for statically re-runnable actions "
"(verification, fixture generators, probes). Add a one-line "
"pointer in SKILL.md so future agents find them.\n"
" 4. CREATE A NEW CLASS-LEVEL UMBRELLA when nothing exists. "
"Name at the class level — NOT a PR number, error string, "
"codename, library-alone name, or 'fix-X / debug-Y' session "
"artifact. If the name only fits today's task, fall back to (1), "
"(2), or (3).\n\n"
"User-preference embedding: when the user complains about how "
"you handled a task, update the skill that governs that task — "
"memory alone isn't enough. Memory says 'who the user is and "
"what the current situation and state of your operations are'; "
"skills say 'how to do this class of task for this user'. Both "
"should carry user-preference lessons when relevant.\n\n"
"If you notice overlapping existing skills, mention it — the "
"background curator handles consolidation.\n\n"
"Do NOT capture as skills (these become persistent self-imposed "
"constraints that bite you later when the environment changes):\n"
" • Environment-dependent failures: missing binaries, fresh-install "
"errors, post-migration path mismatches, 'command not found', "
"unconfigured credentials, uninstalled packages. The user can fix "
"these — they are not durable rules.\n"
" • Negative claims about tools or features ('browser tools do not "
"work', 'X tool is broken', 'cannot use Y from execute_code'). These "
"harden into refusals the agent cites against itself for months "
"after the actual problem was fixed.\n"
" • Session-specific transient errors that resolved before the "
"conversation ended. If retrying worked, the lesson is the retry "
"pattern, not the original failure.\n"
" • One-off task narratives. A user asking 'summarize today's "
"market' or 'analyze this PR' is not a class of work that warrants "
"a skill.\n\n"
"If a tool failed because of setup state, capture the FIX (install "
"command, config step, env var to set) under an existing setup or "
"troubleshooting skill — never 'this tool does not work' as a "
"standalone constraint.\n\n"
"Act on whichever of the two dimensions has real signal. If "
"genuinely nothing stands out on either, say 'Nothing to save.' "
"and stop — but don't reach for that conclusion as a default."
)
def summarize_background_review_actions(
review_messages: List[Dict],
prior_snapshot: List[Dict],
) -> List[str]:
"""Build the human-facing action summary for a background review pass.
Walks the review agent's session messages and collects "successful tool
action" descriptions to surface to the user (e.g. "Memory updated").
Tool messages already present in ``prior_snapshot`` are skipped so we
don't re-surface stale results from the prior conversation that the
review agent inherited via ``conversation_history`` (issue #14944).
Matching is by ``tool_call_id`` when available, with a content-equality
fallback for tool messages that lack one.
"""
existing_tool_call_ids = set()
existing_tool_contents = set()
for prior in prior_snapshot or []:
if not isinstance(prior, dict) or prior.get("role") != "tool":
continue
tcid = prior.get("tool_call_id")
if tcid:
existing_tool_call_ids.add(tcid)
else:
content = prior.get("content")
if isinstance(content, str):
existing_tool_contents.add(content)
actions: List[str] = []
for msg in review_messages or []:
if not isinstance(msg, dict) or msg.get("role") != "tool":
continue
tcid = msg.get("tool_call_id")
if tcid and tcid in existing_tool_call_ids:
continue
if not tcid:
content_str = msg.get("content")
if isinstance(content_str, str) and content_str in existing_tool_contents:
continue
try:
data = json.loads(msg.get("content", "{}"))
except (json.JSONDecodeError, TypeError):
continue
if not isinstance(data, dict) or not data.get("success"):
continue
message = data.get("message", "")
target = data.get("target", "")
if "created" in message.lower():
actions.append(message)
elif "updated" in message.lower():
actions.append(message)
elif "added" in message.lower() or (target and "add" in message.lower()):
label = "Memory" if target == "memory" else "User profile" if target == "user" else target
actions.append(f"{label} updated")
elif "Entry added" in message:
label = "Memory" if target == "memory" else "User profile" if target == "user" else target
actions.append(f"{label} updated")
elif "removed" in message.lower() or "replaced" in message.lower():
label = "Memory" if target == "memory" else "User profile" if target == "user" else target
actions.append(f"{label} updated")
return actions
def build_memory_write_metadata(
agent: Any,
*,
write_origin: Optional[str] = None,
execution_context: Optional[str] = None,
task_id: Optional[str] = None,
tool_call_id: Optional[str] = None,
) -> Dict[str, Any]:
"""Build provenance metadata for external memory-provider mirrors."""
metadata: Dict[str, Any] = {
"write_origin": write_origin or getattr(agent, "_memory_write_origin", "assistant_tool"),
"execution_context": (
execution_context
or getattr(agent, "_memory_write_context", "foreground")
),
"session_id": agent.session_id or "",
"parent_session_id": agent._parent_session_id or "",
"platform": agent.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
"tool_name": "memory",
}
if task_id:
metadata["task_id"] = task_id
if tool_call_id:
metadata["tool_call_id"] = tool_call_id
return {k: v for k, v in metadata.items() if v not in {None, ""}}
def _run_review_in_thread(
agent: Any,
messages_snapshot: List[Dict],
prompt: str,
) -> None:
"""Worker function executed in the background-review daemon thread.
Spawns a forked ``AIAgent`` inheriting the parent's runtime, runs the
review prompt, and surfaces a compact action summary back to the user
via ``agent._safe_print`` and ``agent.background_review_callback``.
"""
# Local import to avoid a hard circular dep at module load.
from run_agent import AIAgent
from tools.terminal_tool import set_approval_callback as _set_approval_callback
# Install a non-interactive approval callback on this worker
# thread so any dangerous-command guard the review agent trips
# resolves to "deny" instead of falling back to input() -- which
# deadlocks against the parent's prompt_toolkit TUI (#15216).
# Same pattern as _subagent_auto_deny in tools/delegate_tool.py.
def _bg_review_auto_deny(command, description, **kwargs):
logger.warning(
"Background review auto-denied dangerous command: %s (%s)",
command, description,
)
return "deny"
try:
_set_approval_callback(_bg_review_auto_deny)
except Exception:
pass
review_agent = None
review_messages: List[Dict] = []
try:
with open(os.devnull, "w", encoding="utf-8") as _devnull, \
contextlib.redirect_stdout(_devnull), \
contextlib.redirect_stderr(_devnull):
# Inherit the parent agent's live runtime (provider, model,
# base_url, api_key, api_mode) so the fork uses the exact
# same credentials the main turn is using. Without this,
# AIAgent.__init__ re-runs auto-resolution from env vars,
# which fails for OAuth-only providers, session-scoped
# creds, or credential-pool setups where the resolver can't
# reconstruct auth from scratch -- producing the spurious
# "No LLM provider configured" warning at end of turn.
_parent_runtime = agent._current_main_runtime()
_parent_api_mode = _parent_runtime.get("api_mode") or None
# The review fork needs to call agent-loop tools (memory,
# skill_manage). Those tools require Hermes' own dispatch,
# which the codex_app_server runtime bypasses entirely
# (it runs the turn inside codex's subprocess). So when
# the parent is on codex_app_server, downgrade the review
# fork to codex_responses — same auth/credentials, but
# talks to the OpenAI Responses API directly so Hermes
# owns the loop and the agent-loop tools dispatch.
if _parent_api_mode == "codex_app_server":
_parent_api_mode = "codex_responses"
# skip_memory=True keeps the review fork from
# touching external memory plugins (honcho, mem0,
# supermemory, etc.). Without it, the fork's
# __init__ rebuilds its own _memory_manager from
# config, scoped to the parent's session_id, and
# run_conversation() then leaks the harness prompt
# into the user's real memory namespace via three
# ingestion sites: on_turn_start (cadence + turn
# message), prefetch_all (recall query), and
# sync_all (harness prompt + review output recorded
# as a (user, assistant) turn pair). Built-in
# MEMORY.md / USER.md state is re-bound from the
# parent below so memory(action="add") writes from
# the review still land on disk; the review just
# has zero side effects on external providers.
review_agent = AIAgent(
model=agent.model,
max_iterations=16,
quiet_mode=True,
platform=agent.platform,
provider=agent.provider,
api_mode=_parent_api_mode,
base_url=_parent_runtime.get("base_url") or None,
api_key=_parent_runtime.get("api_key") or None,
credential_pool=getattr(agent, "_credential_pool", None),
parent_session_id=agent.session_id,
skip_memory=True,
)
review_agent._memory_write_origin = "background_review"
review_agent._memory_write_context = "background_review"
review_agent._memory_store = agent._memory_store
review_agent._memory_enabled = agent._memory_enabled
review_agent._user_profile_enabled = agent._user_profile_enabled
review_agent._memory_nudge_interval = 0
review_agent._skill_nudge_interval = 0
# Suppress all status/warning emits from the fork so the
# user only sees the final successful-action summary.
# Without this, mid-review "Iteration budget exhausted",
# rate-limit retries, compression warnings, and other
# lifecycle messages bubble up through _emit_status ->
# _vprint and leak past the stdout redirect (they go via
# _print_fn/status_callback, which bypass sys.stdout).
review_agent.suppress_status_output = True
# Inherit the parent's cached system prompt verbatim so
# the review fork's outbound HTTP request hits the same
# Anthropic/OpenRouter prefix cache the parent warmed.
# Without this, the fork rebuilds the system prompt from
# scratch (fresh _hermes_now() timestamp, fresh
# session_id, narrower toolset → different skills_prompt)
# and the byte-exact prefix-cache key misses. See
# issue #25322 and PR #17276 for the full analysis +
# measured impact (~26% end-to-end cost reduction on
# Sonnet 4.5).
review_agent._cached_system_prompt = agent._cached_system_prompt
# Defensive: pin session_start + session_id to the
# parent's so any code path that re-renders parts of
# the system prompt (compression, plugin hooks) still
# produces byte-identical output. The cached-prompt
# assignment above already short-circuits the normal
# rebuild path, but these pins guarantee parity even
# if a future code path bypasses the cache.
review_agent.session_start = agent.session_start
review_agent.session_id = agent.session_id
from model_tools import get_tool_definitions
from hermes_cli.plugins import (
set_thread_tool_whitelist,
clear_thread_tool_whitelist,
)
review_whitelist = {
t["function"]["name"]
for t in get_tool_definitions(
enabled_toolsets=["memory", "skills"],
quiet_mode=True,
)
}
set_thread_tool_whitelist(
review_whitelist,
deny_msg_fmt=(
"Background review denied non-whitelisted tool: "
"{tool_name}. Only memory/skill tools are allowed."
),
)
try:
review_agent.run_conversation(
user_message=(
prompt
+ "\n\nYou can only call memory and skill "
"management tools. Other tools will be denied "
"at runtime — do not attempt them."
),
conversation_history=messages_snapshot,
)
finally:
clear_thread_tool_whitelist()
# Tear down memory providers while stdout is still
# redirected so background thread teardown (Honcho flush,
# Hindsight sync, etc.) stays silent. The finally block
# below is a safety net for the exception path.
try:
review_agent.shutdown_memory_provider()
except Exception:
pass
try:
review_agent.close()
except Exception:
pass
review_messages = list(getattr(review_agent, "_session_messages", []))
review_agent = None
# Scan the review agent's messages for successful tool actions
# and surface a compact summary to the user. Tool messages
# already present in messages_snapshot must be skipped, since
# the review agent inherits that history and would otherwise
# re-surface stale "created"/"updated" messages from the prior
# conversation as if they just happened (issue #14944).
actions = summarize_background_review_actions(
review_messages,
messages_snapshot,
)
if actions:
summary = " · ".join(dict.fromkeys(actions))
agent._safe_print(
f" 💾 Self-improvement review: {summary}"
)
_bg_cb = agent.background_review_callback
if _bg_cb:
try:
_bg_cb(
f"💾 Self-improvement review: {summary}"
)
except Exception:
pass
except Exception as e:
logger.warning("Background memory/skill review failed: %s", e)
agent._emit_auxiliary_failure("background review", e)
finally:
# Safety-net cleanup for the exception path. Normal
# completion already shut down inside redirect_stdout above.
# Re-open devnull here so any teardown output (Honcho flush,
# Hindsight sync, background thread joins) stays silent even
# on the exception path where redirect_stdout already exited.
if review_agent is not None:
try:
with open(os.devnull, "w", encoding="utf-8") as _fn, \
contextlib.redirect_stdout(_fn), \
contextlib.redirect_stderr(_fn):
try:
review_agent.shutdown_memory_provider()
except Exception:
pass
try:
review_agent.close()
except Exception:
pass
except Exception:
pass
# Clear the approval callback on this bg-review thread so a
# recycled thread-id doesn't inherit a stale reference.
try:
_set_approval_callback(None)
except Exception:
pass
def spawn_background_review_thread(
agent: Any,
messages_snapshot: List[Dict],
review_memory: bool = False,
review_skills: bool = False,
):
"""Build the review thread target and prompt for a background review.
Returns a ``(target, prompt)`` tuple. The caller (``AIAgent._spawn_background_review``)
owns the actual ``threading.Thread`` construction so test-level patches
of ``run_agent.threading.Thread`` keep working.
"""
# Pick the right prompt based on which triggers fired. Allow per-agent
# override (the prompts moved to module-level constants but old code paths
# that set agent._MEMORY_REVIEW_PROMPT etc. directly keep working).
if review_memory and review_skills:
prompt = getattr(agent, "_COMBINED_REVIEW_PROMPT", _COMBINED_REVIEW_PROMPT)
elif review_memory:
prompt = getattr(agent, "_MEMORY_REVIEW_PROMPT", _MEMORY_REVIEW_PROMPT)
else:
prompt = getattr(agent, "_SKILL_REVIEW_PROMPT", _SKILL_REVIEW_PROMPT)
def _target() -> None:
_run_review_in_thread(agent, messages_snapshot, prompt)
return _target, prompt
__all__ = [
"_MEMORY_REVIEW_PROMPT",
"_SKILL_REVIEW_PROMPT",
"_COMBINED_REVIEW_PROMPT",
"spawn_background_review_thread",
"summarize_background_review_actions",
"build_memory_write_metadata",
]

View file

@ -36,6 +36,19 @@ from typing import Any, Dict, List, Optional, Tuple
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Ensure boto3/botocore are installed before any code in this module runs.
# Upstream removed boto3 from [all] extras (PRs #24220, #24515); lazy_deps
# handles on-demand installation so the Bedrock provider still works in the
# EKS deployment without baking boto3 into the base image.
# ---------------------------------------------------------------------------
try:
from tools.lazy_deps import ensure
ensure("provider.bedrock", prompt=False)
except Exception:
pass # lazy_deps unavailable or install failed — let downstream imports surface the real error
# ---------------------------------------------------------------------------
# Lazy boto3 import — only loaded when the Bedrock provider is actually used.
# This keeps startup fast for users who don't use Bedrock.

175
agent/browser_provider.py Normal file
View file

@ -0,0 +1,175 @@
"""
Browser Provider ABC
====================
Defines the pluggable-backend interface for cloud browser providers
(Browserbase, Browser Use, Firecrawl, ). Providers register instances via
:meth:`PluginContext.register_browser_provider`; the active one (selected via
``browser.cloud_provider`` in ``config.yaml``) services every cloud-mode
``browser_*`` tool call.
Providers live in ``<repo>/plugins/browser/<name>/`` (built-in, auto-loaded as
``kind: backend``) or ``~/.hermes/plugins/browser/<name>/`` (user, opt-in via
``plugins.enabled``).
This ABC mirrors :class:`agent.web_search_provider.WebSearchProvider` (PR
#25182) — same shape, same registration flow, same picker integration. The
legacy in-tree ``tools.browser_providers.base.CloudBrowserProvider`` ABC was
deleted in PR #25214 (this work) along with the per-vendor inline modules in
``tools/browser_providers/``; the lifecycle contract documented below is
preserved bit-for-bit so the tool wrapper (:mod:`tools.browser_tool`) does
not have to translate.
Session metadata contract (preserved from the legacy ``CloudBrowserProvider``)::
{
"session_name": str, # unique name for agent-browser --session
"bb_session_id": str, # provider session ID (for close/cleanup)
"cdp_url": str, # CDP websocket URL
"features": dict, # feature flags that were enabled
"external_call_id": str, # optional, managed-gateway billing key
}
``bb_session_id`` is a legacy key name kept verbatim for backward compat with
:mod:`tools.browser_tool` it holds the provider's session ID regardless of
which provider is in use.
"""
from __future__ import annotations
import abc
from typing import Any, Dict
# ---------------------------------------------------------------------------
# ABC
# ---------------------------------------------------------------------------
class BrowserProvider(abc.ABC):
"""Abstract base class for a cloud browser backend.
Subclasses must implement :meth:`name`, :meth:`is_available`, and the
three lifecycle methods: :meth:`create_session`, :meth:`close_session`,
:meth:`emergency_cleanup`.
The lifecycle shape preserves the legacy ``CloudBrowserProvider`` contract
bit-for-bit so the dispatcher in :mod:`tools.browser_tool` is a pure
registry lookup no per-provider conditionals, no shape translation.
"""
@property
@abc.abstractmethod
def name(self) -> str:
"""Stable short identifier used in the ``browser.cloud_provider``
config key.
Lowercase, hyphens permitted to preserve existing user-visible names.
Examples: ``browserbase``, ``browser-use``, ``firecrawl``.
"""
@property
def display_name(self) -> str:
"""Human-readable label shown in ``hermes tools``. Defaults to ``name``."""
return self.name
@abc.abstractmethod
def is_available(self) -> bool:
"""Return True when this provider can service calls.
Typically a cheap check (env var present, managed-gateway token
readable, optional Python dep importable). Must NOT make network
calls this runs at tool-registration time and on every
``hermes tools`` paint.
Mirrors the legacy ``CloudBrowserProvider.is_configured()`` method;
renamed for parity with :class:`agent.web_search_provider.WebSearchProvider`.
"""
@abc.abstractmethod
def create_session(self, task_id: str) -> Dict[str, object]:
"""Create a cloud browser session and return session metadata.
Must return a dict with at least::
{
"session_name": str, # unique name for agent-browser --session
"bb_session_id": str, # provider session ID (for close/cleanup)
"cdp_url": str, # CDP websocket URL
"features": dict, # feature flags that were enabled
}
``bb_session_id`` is a legacy key name kept for backward compat with
the rest of :mod:`tools.browser_tool` it holds the provider's
session ID regardless of which provider is in use.
May raise ``ValueError`` (missing credentials) or ``RuntimeError``
(network / API failure); the dispatcher surfaces these to the user.
"""
@abc.abstractmethod
def close_session(self, session_id: str) -> bool:
"""Release / terminate a cloud session by its provider session ID.
Returns True on success, False on failure. Should not raise log and
return False on any exception so the dispatcher's cleanup loop keeps
moving across sessions.
"""
@abc.abstractmethod
def emergency_cleanup(self, session_id: str) -> None:
"""Best-effort session teardown during process exit.
Called from atexit / signal handlers. Must tolerate missing
credentials, network errors, etc. log and move on. Must not raise.
"""
def get_setup_schema(self) -> Dict[str, Any]:
"""Return provider metadata for the ``hermes tools`` picker.
Used by :mod:`hermes_cli.tools_config` to inject this provider as a
row in the Browser Automation picker. Shape mirrors the existing
hardcoded entries in ``TOOL_CATEGORIES["browser"]``::
{
"name": "Browserbase",
"badge": "paid",
"tag": "Cloud browser with stealth and proxies",
"env_vars": [
{"key": "BROWSERBASE_API_KEY",
"prompt": "Browserbase API key",
"url": "https://browserbase.com"},
],
"post_setup": "agent_browser",
}
Default: minimal entry derived from :attr:`display_name`. Override to
expose API key prompts, badges, managed-Nous gating, and the
``post_setup`` install hook.
"""
return {
"name": self.display_name,
"badge": "",
"tag": "",
"env_vars": [],
}
# ------------------------------------------------------------------
# Backward-compat shims for the legacy CloudBrowserProvider API
# ------------------------------------------------------------------
#
# The pre-PR-#25214 ABC exposed ``is_configured()`` and ``provider_name()``;
# ``tools.browser_tool`` has ~6 callers that still use those names. Rather
# than churn every callsite (and break out-of-tree downstream code that
# subclassed CloudBrowserProvider), we expose the old names as thin
# delegations to the new API. Subclasses MUST implement :meth:`is_available`
# and :attr:`name`; they may override ``is_configured`` / ``provider_name``
# for compatibility with the legacy ABC but it is not required.
def is_configured(self) -> bool:
"""Backward-compat alias for :meth:`is_available`."""
return self.is_available()
def provider_name(self) -> str:
"""Backward-compat alias returning :attr:`display_name`."""
return self.display_name

223
agent/browser_registry.py Normal file
View file

@ -0,0 +1,223 @@
"""
Browser Provider Registry
=========================
Central map of registered cloud browser providers. Populated by plugins at
import-time via :meth:`PluginContext.register_browser_provider`; consumed by
:func:`tools.browser_tool._get_cloud_provider` to route each cloud-mode
``browser_*`` tool call to the active backend.
Active selection
----------------
The active provider is chosen by configuration with this precedence:
1. ``browser.cloud_provider`` in ``config.yaml`` (explicit override).
2. Legacy preference order ``browser-use`` ``browserbase`` filtered by
availability. Matches the historic auto-detect order in
:func:`tools.browser_tool._get_cloud_provider` (Browser Use checked first
because it covers both the managed Nous gateway and direct API key path;
Browserbase as the older direct-credentials fallback). ``firecrawl`` is
intentionally NOT in the legacy walk users only get Firecrawl as a
cloud browser when they explicitly set ``browser.cloud_provider:
firecrawl``, matching pre-migration behaviour where Firecrawl was never
auto-selected.
3. Otherwise ``None`` the dispatcher falls back to local browser mode.
The explicit-config branch (rule 1) intentionally ignores ``is_available()``
so the dispatcher surfaces a typed "X_API_KEY is not set" error to the user
instead of silently switching backends. Matches the legacy
:func:`tools.browser_tool._get_cloud_provider` behaviour for configured names.
Note: there is no "capability" split here (unlike the web subsystem, which
has search/extract/crawl). Every browser provider implements the full
:class:`agent.browser_provider.BrowserProvider` lifecycle; the registry's
job is purely selection, not capability routing.
"""
from __future__ import annotations
import logging
import threading
from typing import Dict, List, Optional
from agent.browser_provider import BrowserProvider
logger = logging.getLogger(__name__)
_providers: Dict[str, BrowserProvider] = {}
_lock = threading.Lock()
def register_provider(provider: BrowserProvider) -> None:
"""Register a cloud browser provider.
Re-registration (same ``name``) overwrites the previous entry and logs
a debug message makes hot-reload scenarios (tests, dev loops) behave
predictably.
"""
if not isinstance(provider, BrowserProvider):
raise TypeError(
f"register_provider() expects a BrowserProvider instance, "
f"got {type(provider).__name__}"
)
name = provider.name
if not isinstance(name, str) or not name.strip():
raise ValueError("Browser provider .name must be a non-empty string")
with _lock:
existing = _providers.get(name)
_providers[name] = provider
if existing is not None:
logger.debug(
"Browser provider '%s' re-registered (was %r)",
name, type(existing).__name__,
)
else:
logger.debug(
"Registered browser provider '%s' (%s)",
name, type(provider).__name__,
)
def list_providers() -> List[BrowserProvider]:
"""Return all registered providers, sorted by name."""
with _lock:
items = list(_providers.values())
return sorted(items, key=lambda p: p.name)
def get_provider(name: str) -> Optional[BrowserProvider]:
"""Return the provider registered under *name*, or None."""
if not isinstance(name, str):
return None
with _lock:
return _providers.get(name.strip())
# ---------------------------------------------------------------------------
# Active-provider resolution
# ---------------------------------------------------------------------------
# Legacy auto-detect order — used when no ``browser.cloud_provider`` is set.
# Matches the pre-migration walk in :func:`tools.browser_tool._get_cloud_provider`.
# Firecrawl is intentionally absent so users with ``FIRECRAWL_API_KEY`` set
# for web-extract don't get silently routed to a paid cloud browser. See
# :func:`_resolve` for the full rationale.
_LEGACY_PREFERENCE = (
"browser-use",
"browserbase",
)
def _resolve(configured: Optional[str]) -> Optional[BrowserProvider]:
"""Resolve the active browser provider.
Resolution rules (in order):
1. **Explicit "local".** Returns None the dispatcher disables cloud
mode entirely. Mirrors legacy short-circuit in
:func:`tools.browser_tool._get_cloud_provider`.
2. **Explicit config wins, ignoring availability.** If ``configured``
names a registered provider, return it even if its
:meth:`is_available` returns False the dispatcher will surface a
precise "X_API_KEY is not set" error instead of silently routing
somewhere else.
3. **Legacy preference walk, filtered by availability.** Walk
:data:`_LEGACY_PREFERENCE` (``browser-use`` ``browserbase``) looking
for a provider whose ``is_available()`` is True.
There is intentionally NO "single-eligible shortcut" rule here (unlike
:func:`agent.web_search_registry._resolve`). Pre-migration, the
auto-detect branch in ``tools.browser_tool._get_cloud_provider`` only
considered Browser Use and Browserbase; Firecrawl was reachable only
via an explicit ``browser.cloud_provider: firecrawl`` config key.
Preserving that gate matters because Firecrawl shares its API key with
the *web* extract plugin (``plugins/web/firecrawl/``), so users who set
``FIRECRAWL_API_KEY`` for web extract must NOT get silently routed to a
paid cloud browser on a fresh install. Third-party browser-provider
plugins added under ``~/.hermes/plugins/browser/<vendor>/`` are subject
to the same gate they must be explicitly configured to take effect.
Returns None when no provider is configured AND no available provider
matches the legacy preference; the dispatcher then falls back to local
browser mode.
"""
with _lock:
snapshot = dict(_providers)
def _is_available_safe(p: BrowserProvider) -> bool:
"""Wrap ``is_available()`` so a buggy provider doesn't kill resolution."""
try:
return bool(p.is_available())
except Exception as exc: # noqa: BLE001
logger.warning(
"Browser provider %s.is_available() raised %s — treating as unavailable",
p.name, exc, exc_info=True,
)
return False
# 1. Explicit "local" short-circuit.
if configured == "local":
return None
# 2. Explicit config wins — return regardless of is_available() so the
# user gets a precise downstream error message rather than a silent
# backend switch. Matches _get_cloud_provider() in browser_tool.py.
if configured:
provider = snapshot.get(configured)
if provider is not None:
return provider
logger.debug(
"browser cloud_provider '%s' configured but not registered; "
"falling back to auto-detect",
configured,
)
# 3. Legacy preference walk — only providers in _LEGACY_PREFERENCE are
# auto-eligible. Filtered by availability so we don't surface a
# provider the user has no credentials for. See docstring for why
# we do NOT fall back to "any single-eligible registered provider".
for legacy in _LEGACY_PREFERENCE:
provider = snapshot.get(legacy)
if provider is not None and _is_available_safe(provider):
return provider
return None
def get_active_browser_provider() -> Optional[BrowserProvider]:
"""Resolve the currently-active cloud browser provider.
Reads ``browser.cloud_provider`` from config.yaml; falls back per the
module docstring. Returns None for local mode or when no provider is
available.
"""
try:
from hermes_cli.config import read_raw_config
cfg = read_raw_config()
browser_cfg = cfg.get("browser", {})
except Exception as exc:
logger.debug("Could not read browser config: %s", exc)
browser_cfg = {}
configured: Optional[str] = None
if isinstance(browser_cfg, dict) and "cloud_provider" in browser_cfg:
try:
from tools.tool_backend_helpers import normalize_browser_cloud_provider
configured = normalize_browser_cloud_provider(
browser_cfg.get("cloud_provider")
)
except Exception as exc:
logger.debug("normalize_browser_cloud_provider failed: %s", exc)
configured = None
return _resolve(configured)
def _reset_for_tests() -> None:
"""Clear the registry. **Test-only.**"""
with _lock:
_providers.clear()

File diff suppressed because it is too large Load diff

448
agent/codex_runtime.py Normal file
View file

@ -0,0 +1,448 @@
"""Codex API runtime — App Server and Responses-API streaming paths.
Extracted from :class:`AIAgent` to keep the agent loop file focused.
Each function takes the parent ``AIAgent`` as its first argument
(``agent``). AIAgent keeps thin forwarder methods for backward
compatibility.
* ``run_codex_app_server_turn`` drives one turn through the
``codex_app_server`` subprocess client (used when a Codex CLI install
is the active provider).
* ``run_codex_stream`` streams a Codex Responses API call (the
``codex_responses`` api_mode).
* ``run_codex_create_stream_fallback`` recovery path when the
Responses ``stream=True`` initial create fails.
"""
from __future__ import annotations
import json
import logging
import os
from types import SimpleNamespace
from typing import Any, Dict, List
logger = logging.getLogger(__name__)
def run_codex_app_server_turn(
agent,
*,
user_message: str,
original_user_message: Any,
messages: List[Dict[str, Any]],
effective_task_id: str,
should_review_memory: bool = False,
) -> Dict[str, Any]:
"""Codex app-server runtime path. Hands the entire turn to a `codex
app-server` subprocess and projects its events back into Hermes'
messages list so memory/skill review keep working.
Called from run_conversation() when agent.api_mode == "codex_app_server".
Returns the same dict shape as the chat_completions path.
"""
from agent.transports.codex_app_server_session import CodexAppServerSession
# Lazy session: one CodexAppServerSession per AIAgent instance.
# Spawned on first turn, reused across turns, closed at AIAgent
# shutdown (see _cleanup hook).
if not hasattr(agent, "_codex_session") or agent._codex_session is None:
cwd = getattr(agent, "session_cwd", None) or os.getcwd()
# Approval callback: defer to Hermes' standard prompt flow if a
# CLI thread has installed one. Gateway / cron contexts get the
# codex-side fail-closed default.
try:
from tools.terminal_tool import _get_approval_callback
approval_callback = _get_approval_callback()
except Exception:
approval_callback = None
agent._codex_session = CodexAppServerSession(
cwd=cwd,
approval_callback=approval_callback,
)
# NOTE: the user message is ALREADY appended to messages by the
# standard run_conversation() flow (line ~11823) before the early
# return reaches us. Do NOT append again — that would duplicate.
try:
turn = agent._codex_session.run_turn(user_input=user_message)
except Exception as exc:
logger.exception("codex app-server turn failed")
# Crash → unconditionally drop the session so the next turn
# respawns from scratch instead of reusing a dead client.
try:
agent._codex_session.close()
except Exception:
pass
agent._codex_session = None
return {
"final_response": (
f"Codex app-server turn failed: {exc}. "
f"Fall back to default runtime with `/codex-runtime auto`."
),
"messages": messages,
"api_calls": 0,
"completed": False,
"partial": True,
"error": str(exc),
}
# If the turn signalled the underlying client is wedged (deadline
# blown, post-tool watchdog tripped, OAuth refresh died, subprocess
# exited), retire the session so the next turn respawns codex
# rather than riding the broken process. Mirrors openclaw beta.8's
# "retire timed-out app-server clients" fix.
if getattr(turn, "should_retire", False):
logger.warning(
"codex app-server session retired (turn error: %s)",
turn.error,
)
try:
agent._codex_session.close()
except Exception:
pass
agent._codex_session = None
# Splice projected messages into the conversation. The projector emits
# standard {role, content, tool_calls, tool_call_id} entries, which
# is exactly what curator.py / sessions DB expect.
if turn.projected_messages:
messages.extend(turn.projected_messages)
# Counter ticks for the agent-improvement loop.
# _turns_since_memory and _user_turn_count are ALREADY incremented
# in the run_conversation() pre-loop block (lines ~11793-11817) so we
# do NOT touch them here — that would double-count.
# Only _iters_since_skill needs explicit increment, since the
# chat_completions loop bumps it per tool iteration (line ~12110)
# and that loop is bypassed on this path.
agent._iters_since_skill = (
getattr(agent, "_iters_since_skill", 0) + turn.tool_iterations
)
# Now check the skill nudge AFTER iters were incremented — same
# pattern the chat_completions path uses (line ~15432).
should_review_skills = False
if (
agent._skill_nudge_interval > 0
and agent._iters_since_skill >= agent._skill_nudge_interval
and "skill_manage" in agent.valid_tool_names
):
should_review_skills = True
agent._iters_since_skill = 0
# External memory provider sync (mirrors line ~15439). Skipped on
# interrupt/error to avoid feeding partial transcripts to memory.
if not turn.interrupted and turn.error is None:
try:
agent._sync_external_memory_for_turn(
original_user_message=original_user_message,
final_response=turn.final_text,
interrupted=False,
)
except Exception:
logger.debug("external memory sync raised", exc_info=True)
# Background review fork — same cadence + signature as the default
# path (line ~15449). Only fires when a trigger actually tripped AND
# we have a real final response.
if (
turn.final_text
and not turn.interrupted
and (should_review_memory or should_review_skills)
):
try:
agent._spawn_background_review(
messages_snapshot=list(messages),
review_memory=should_review_memory,
review_skills=should_review_skills,
)
except Exception:
logger.debug("background review spawn raised", exc_info=True)
return {
"final_response": turn.final_text,
"messages": messages,
"api_calls": 1, # one app-server "turn" maps to one logical API call
"completed": not turn.interrupted and turn.error is None,
"partial": turn.interrupted or turn.error is not None,
"error": turn.error,
"codex_thread_id": turn.thread_id,
"codex_turn_id": turn.turn_id,
}
def run_codex_stream(agent, api_kwargs: dict, client: Any = None, on_first_delta: callable = None):
"""Execute one streaming Responses API request and return the final response."""
import httpx as _httpx
active_client = client or agent._ensure_primary_openai_client(reason="codex_stream_direct")
max_stream_retries = 1
has_tool_calls = False
first_delta_fired = False
# Accumulate streamed text so we can recover if get_final_response()
# returns empty output (e.g. chatgpt.com backend-api sends
# response.incomplete instead of response.completed).
agent._codex_streamed_text_parts: list = []
for attempt in range(max_stream_retries + 1):
if agent._interrupt_requested:
raise InterruptedError("Agent interrupted before Codex stream retry")
collected_output_items: list = []
try:
with active_client.responses.stream(**api_kwargs) as stream:
for event in stream:
agent._touch_activity("receiving stream response")
if agent._interrupt_requested:
break
event_type = getattr(event, "type", "")
# Fire callbacks on text content deltas (suppress during tool calls)
if "output_text.delta" in event_type or event_type == "response.output_text.delta":
delta_text = getattr(event, "delta", "")
if delta_text:
agent._codex_streamed_text_parts.append(delta_text)
if delta_text and not has_tool_calls:
if not first_delta_fired:
first_delta_fired = True
if on_first_delta:
try:
on_first_delta()
except Exception:
pass
agent._fire_stream_delta(delta_text)
# Track tool calls to suppress text streaming
elif "function_call" in event_type:
has_tool_calls = True
# Fire reasoning callbacks
elif "reasoning" in event_type and "delta" in event_type:
reasoning_text = getattr(event, "delta", "")
if reasoning_text:
agent._fire_reasoning_delta(reasoning_text)
# Collect completed output items — some backends
# (chatgpt.com/backend-api/codex) stream valid items
# via response.output_item.done but the SDK's
# get_final_response() returns an empty output list.
elif event_type == "response.output_item.done":
done_item = getattr(event, "item", None)
if done_item is not None:
collected_output_items.append(done_item)
# Log non-completed terminal events for diagnostics
elif event_type in {"response.incomplete", "response.failed"}:
resp_obj = getattr(event, "response", None)
status = getattr(resp_obj, "status", None) if resp_obj else None
incomplete_details = getattr(resp_obj, "incomplete_details", None) if resp_obj else None
logger.warning(
"Codex Responses stream received terminal event %s "
"(status=%s, incomplete_details=%s, streamed_chars=%d). %s",
event_type, status, incomplete_details,
sum(len(p) for p in agent._codex_streamed_text_parts),
agent._client_log_context(),
)
final_response = stream.get_final_response()
# PATCH: ChatGPT Codex backend streams valid output items
# but get_final_response() can return an empty output list.
# Backfill from collected items or synthesize from deltas.
_out = getattr(final_response, "output", None)
if isinstance(_out, list) and not _out:
if collected_output_items:
final_response.output = list(collected_output_items)
logger.debug(
"Codex stream: backfilled %d output items from stream events",
len(collected_output_items),
)
elif agent._codex_streamed_text_parts and not has_tool_calls:
assembled = "".join(agent._codex_streamed_text_parts)
final_response.output = [SimpleNamespace(
type="message",
role="assistant",
status="completed",
content=[SimpleNamespace(type="output_text", text=assembled)],
)]
logger.debug(
"Codex stream: synthesized output from %d text deltas (%d chars)",
len(agent._codex_streamed_text_parts), len(assembled),
)
return final_response
except (_httpx.RemoteProtocolError, _httpx.ReadTimeout, _httpx.ConnectError, ConnectionError) as exc:
if attempt < max_stream_retries:
logger.debug(
"Codex Responses stream transport failed (attempt %s/%s); retrying. %s error=%s",
attempt + 1,
max_stream_retries + 1,
agent._client_log_context(),
exc,
)
continue
logger.debug(
"Codex Responses stream transport failed; falling back to create(stream=True). %s error=%s",
agent._client_log_context(),
exc,
)
return agent._run_codex_create_stream_fallback(api_kwargs, client=active_client)
except RuntimeError as exc:
err_text = str(exc)
missing_completed = "response.completed" in err_text
# The OpenAI SDK's Responses streaming state machine raises
# ``RuntimeError("Expected to have received `response.created`
# before `<event-type>`")`` when the first SSE event from the
# server is anything other than ``response.created`` — and it
# discards the event's payload before we can read it. Three
# real-world backends emit a different first frame:
#
# * xAI on grok-4.x OAuth — sends ``error`` (issues
# reported around the May 2026 SuperGrok rollout when
# multi-turn conversations replay encrypted reasoning
# content the OAuth tier rejects)
# * codex-lb relays — send ``codex.rate_limits`` (#14634)
# * custom Responses relays — send ``response.in_progress``
# (#8133)
#
# In all three cases the underlying byte stream is still
# readable: a non-stream ``responses.create(stream=True)``
# fallback succeeds and surfaces the real provider error as
# a normal exception with body+status_code attached, which
# ``_summarize_api_error`` can then translate into a useful
# user-facing line. Treat ``response.created`` prelude
# errors the same way we already treat ``response.completed``
# postlude errors.
prelude_error = (
"Expected to have received `response.created`" in err_text
or "Expected to have received \"response.created\"" in err_text
)
if (missing_completed or prelude_error) and attempt < max_stream_retries:
logger.debug(
"Responses stream %s (attempt %s/%s); retrying. %s",
"prelude rejected" if prelude_error else "closed before completion",
attempt + 1,
max_stream_retries + 1,
agent._client_log_context(),
)
continue
if missing_completed or prelude_error:
logger.debug(
"Responses stream %s; falling back to create(stream=True). %s err=%s",
"rejected before response.created" if prelude_error else "did not emit response.completed",
agent._client_log_context(),
err_text,
)
return agent._run_codex_create_stream_fallback(api_kwargs, client=active_client)
raise
def run_codex_create_stream_fallback(agent, api_kwargs: dict, client: Any = None):
"""Fallback path for stream completion edge cases on Codex-style Responses backends."""
active_client = client or agent._ensure_primary_openai_client(reason="codex_create_stream_fallback")
fallback_kwargs = dict(api_kwargs)
fallback_kwargs["stream"] = True
fallback_kwargs = agent._get_transport().preflight_kwargs(fallback_kwargs, allow_stream=True)
stream_or_response = active_client.responses.create(**fallback_kwargs)
# Compatibility shim for mocks or providers that still return a concrete response.
if hasattr(stream_or_response, "output"):
return stream_or_response
if not hasattr(stream_or_response, "__iter__"):
return stream_or_response
terminal_response = None
collected_output_items: list = []
collected_text_deltas: list = []
try:
for event in stream_or_response:
agent._touch_activity("receiving stream response")
event_type = getattr(event, "type", None)
if not event_type and isinstance(event, dict):
event_type = event.get("type")
# ``error`` SSE frames carry the provider's real failure
# reason (subscription / quota / model-not-available /
# rejected-reasoning-replay) but never appear in the
# ``{completed, incomplete, failed}`` terminal set, so the
# raw loop below would silently consume them and end with
# "did not emit a terminal response". xAI in particular
# emits ``type=error`` as the FIRST frame for OAuth
# accounts whose Grok subscription is missing/exhausted —
# the SDK's stream helper raises ``RuntimeError(Expected
# to have received response.created before error)`` which
# the caller catches and routes here, expecting this
# fallback to surface the message. Synthesize an
# APIError-shaped exception so ``_summarize_api_error``
# and the credential-pool entitlement detector see the
# real text instead of a generic RuntimeError.
if event_type == "error":
err_message = getattr(event, "message", None)
if not err_message and isinstance(event, dict):
err_message = event.get("message")
err_code = getattr(event, "code", None)
if not err_code and isinstance(event, dict):
err_code = event.get("code")
err_param = getattr(event, "param", None)
if not err_param and isinstance(event, dict):
err_param = event.get("param")
err_message = (err_message or "stream emitted error event").strip()
from run_agent import _StreamErrorEvent
raise _StreamErrorEvent(err_message, code=err_code, param=err_param)
# Collect output items and text deltas for backfill
if event_type == "response.output_item.done":
done_item = getattr(event, "item", None)
if done_item is None and isinstance(event, dict):
done_item = event.get("item")
if done_item is not None:
collected_output_items.append(done_item)
elif event_type in {"response.output_text.delta",}:
delta = getattr(event, "delta", "")
if not delta and isinstance(event, dict):
delta = event.get("delta", "")
if delta:
collected_text_deltas.append(delta)
if event_type not in {"response.completed", "response.incomplete", "response.failed"}:
continue
terminal_response = getattr(event, "response", None)
if terminal_response is None and isinstance(event, dict):
terminal_response = event.get("response")
if terminal_response is not None:
# Backfill empty output from collected stream events
_out = getattr(terminal_response, "output", None)
if isinstance(_out, list) and not _out:
if collected_output_items:
terminal_response.output = list(collected_output_items)
logger.debug(
"Codex fallback stream: backfilled %d output items",
len(collected_output_items),
)
elif collected_text_deltas:
assembled = "".join(collected_text_deltas)
terminal_response.output = [SimpleNamespace(
type="message", role="assistant",
status="completed",
content=[SimpleNamespace(type="output_text", text=assembled)],
)]
logger.debug(
"Codex fallback stream: synthesized from %d deltas (%d chars)",
len(collected_text_deltas), len(assembled),
)
return terminal_response
finally:
close_fn = getattr(stream_or_response, "close", None)
if callable(close_fn):
try:
close_fn()
except Exception:
pass
if terminal_response is not None:
return terminal_response
raise RuntimeError("Responses create(stream=True) fallback did not emit a terminal response.")
__all__ = [
"run_codex_app_server_turn",
"run_codex_stream",
"run_codex_create_stream_fallback",
]

View file

@ -0,0 +1,556 @@
"""Context compression — extract the AIAgent methods that drive summarisation.
Three concerns live here:
* :func:`check_compression_model_feasibility` startup probe of the
configured auxiliary compression model. Warns when the aux context
window can't fit the main model's compression threshold; auto-lowers
the session threshold when possible; hard-rejects auxes below
``MINIMUM_CONTEXT_LENGTH``.
* :func:`replay_compression_warning` re-emit a stored warning through
the gateway ``status_callback`` once it's wired up (the callback is
set after :class:`AIAgent` construction).
* :func:`compress_context` the actual compression call. Runs the
configured compressor, splits the SQLite session, rotates the
session_id, notifies plugin context engines / memory providers, and
returns the compressed message list and freshly-built system prompt.
* :func:`try_shrink_image_parts_in_messages` image-too-large recovery
helper that re-encodes ``data:image/...;base64,...`` parts at a smaller
size so retries can fit under provider ceilings (Anthropic's 5 MB).
``run_agent`` keeps thin wrappers for each so existing call sites
(``self._compress_context(...)``) keep working. Tests that exercise
these paths see no behavioural change.
"""
from __future__ import annotations
import logging
import os
import tempfile
import uuid
from datetime import datetime
from pathlib import Path
from typing import Any, List, Optional, Tuple
from agent.model_metadata import estimate_request_tokens_rough
logger = logging.getLogger(__name__)
def check_compression_model_feasibility(agent: Any) -> None:
"""Warn at session start if the auxiliary compression model's context
window is smaller than the main model's compression threshold.
When the auxiliary model cannot fit the content that needs summarising,
compression will either fail outright (the LLM call errors) or produce
a severely truncated summary.
Called during ``AIAgent.__init__`` so CLI users see the warning
immediately (via ``_vprint``). The gateway sets ``status_callback``
*after* construction, so :func:`replay_compression_warning` re-sends
the stored warning through the callback on the first
``run_conversation()`` call.
"""
if not agent.compression_enabled:
return
try:
from agent.auxiliary_client import (
_resolve_task_provider_model,
get_text_auxiliary_client,
)
from agent.model_metadata import (
MINIMUM_CONTEXT_LENGTH,
get_model_context_length,
)
client, aux_model = get_text_auxiliary_client(
"compression",
main_runtime=agent._current_main_runtime(),
)
# Best-effort aux provider label for the warning message. The
# configured provider may be "auto", in which case we fall back
# to the client's base_url hostname so the user can still tell
# where the compression model is actually being called.
try:
_aux_cfg_provider, _, _, _, _ = _resolve_task_provider_model("compression")
except Exception:
_aux_cfg_provider = ""
if client is None or not aux_model:
if _aux_cfg_provider and _aux_cfg_provider != "auto":
msg = (
"⚠ Configured auxiliary compression provider "
f"'{_aux_cfg_provider}' is unavailable — context "
"compression will drop middle turns without a summary. "
"Check auxiliary.compression in config.yaml and "
"reauthenticate that provider."
)
else:
msg = (
"⚠ No auxiliary LLM provider configured — context "
"compression will drop middle turns without a summary. "
"Run `hermes setup` or set OPENROUTER_API_KEY."
)
agent._compression_warning = msg
agent._emit_status(msg)
logger.warning(
"No auxiliary LLM provider for compression — "
"summaries will be unavailable."
)
return
aux_base_url = str(getattr(client, "base_url", ""))
aux_api_key = str(getattr(client, "api_key", ""))
aux_context = get_model_context_length(
aux_model,
base_url=aux_base_url,
api_key=aux_api_key,
config_context_length=getattr(agent, "_aux_compression_context_length_config", None),
# Each model must be resolved with its own provider so that
# provider-specific paths (e.g. Bedrock static table, OpenRouter API)
# are invoked for the correct client, not inherited from the main model.
provider=(_aux_cfg_provider if _aux_cfg_provider and _aux_cfg_provider != "auto" else getattr(agent, "provider", "")),
custom_providers=agent._custom_providers,
)
# Hard floor: the auxiliary compression model must have at least
# MINIMUM_CONTEXT_LENGTH (64K) tokens of context. The main model
# is already required to meet this floor (checked earlier in
# __init__), so the compression model must too — otherwise it
# cannot summarise a full threshold-sized window of main-model
# content. Mirrors the main-model rejection pattern.
if aux_context and aux_context < MINIMUM_CONTEXT_LENGTH:
raise ValueError(
f"Auxiliary compression model {aux_model} has a context "
f"window of {aux_context:,} tokens, which is below the "
f"minimum {MINIMUM_CONTEXT_LENGTH:,} required by Hermes "
f"Agent. Choose a compression model with at least "
f"{MINIMUM_CONTEXT_LENGTH // 1000}K context (set "
f"auxiliary.compression.model in config.yaml), or set "
f"auxiliary.compression.context_length to override the "
f"detected value if it is wrong."
)
threshold = agent.context_compressor.threshold_tokens
if aux_context < threshold:
# Auto-correct: lower the live session threshold so
# compression actually works this session. The hard floor
# above guarantees aux_context >= MINIMUM_CONTEXT_LENGTH,
# so the new threshold is always >= 64K.
#
# The compression summariser sends a single user-role
# prompt (no system prompt, no tools) to the aux model, so
# new_threshold == aux_context is safe: the request is
# the raw messages plus a small summarisation instruction.
old_threshold = threshold
new_threshold = aux_context
agent.context_compressor.threshold_tokens = new_threshold
# Keep threshold_percent in sync so future main-model
# context_length changes (update_model) re-derive from a
# sensible number rather than the original too-high value.
main_ctx = agent.context_compressor.context_length
if main_ctx:
agent.context_compressor.threshold_percent = (
new_threshold / main_ctx
)
safe_pct = int((aux_context / main_ctx) * 100) if main_ctx else 50
# Build human-readable "model (provider)" labels for both
# the main model and the compression model so users can
# tell at a glance which provider each side is actually
# using. When the configured provider is empty or "auto",
# fall back to the client's base_url hostname.
_main_model = getattr(agent, "model", "") or "?"
_main_provider = getattr(agent, "provider", "") or ""
_aux_provider_label = (
_aux_cfg_provider
if _aux_cfg_provider and _aux_cfg_provider != "auto"
else ""
)
if not _aux_provider_label:
try:
from urllib.parse import urlparse
_aux_provider_label = (
urlparse(aux_base_url).hostname or aux_base_url
)
except Exception:
_aux_provider_label = aux_base_url or "auto"
_main_label = (
f"{_main_model} ({_main_provider})"
if _main_provider
else _main_model
)
_aux_label = f"{aux_model} ({_aux_provider_label})"
msg = (
f"⚠ Compression model {_aux_label} context is "
f"{aux_context:,} tokens, but the main model "
f"{_main_label}'s compression threshold was "
f"{old_threshold:,} tokens. "
f"Auto-lowered this session's threshold to "
f"{new_threshold:,} tokens so compression can run.\n"
f" To make this permanent, edit config.yaml — either:\n"
f" 1. Use a larger compression model:\n"
f" auxiliary:\n"
f" compression:\n"
f" model: <model-with-{old_threshold:,}+-context>\n"
f" 2. Lower the compression threshold:\n"
f" compression:\n"
f" threshold: 0.{safe_pct:02d}"
)
agent._compression_warning = msg
agent._emit_status(msg)
logger.warning(
"Auxiliary compression model %s has %d token context, "
"below the main model's compression threshold of %d "
"tokens — auto-lowered session threshold to %d to "
"keep compression working.",
aux_model,
aux_context,
old_threshold,
new_threshold,
)
except ValueError:
# Hard rejections (aux below minimum context) must propagate
# so the session refuses to start.
raise
except Exception as exc:
logger.debug(
"Compression feasibility check failed (non-fatal): %s", exc
)
def replay_compression_warning(agent: Any) -> None:
"""Re-send the compression warning through ``status_callback``.
During ``__init__`` the gateway's ``status_callback`` is not yet
wired, so ``_emit_status`` only reaches ``_vprint`` (CLI). This
method is called once at the start of the first
``run_conversation()`` by then the gateway has set the callback,
so every platform (Telegram, Discord, Slack, etc.) receives the
warning.
"""
msg = getattr(agent, "_compression_warning", None)
if msg and agent.status_callback:
try:
agent.status_callback("lifecycle", msg)
except Exception:
pass
def compress_context(
agent: Any,
messages: list,
system_message: str,
*,
approx_tokens: Optional[int] = None,
task_id: str = "default",
focus_topic: Optional[str] = None,
) -> Tuple[list, str]:
"""Compress conversation context and split the session in SQLite.
Args:
agent: The owning :class:`AIAgent`.
messages: Current message history (will be summarised).
system_message: Current system prompt; rebuilt after compression.
approx_tokens: Pre-compression token estimate, logged for ops.
task_id: Tool task scope (used for clearing file-read dedup state).
focus_topic: Optional focus string for guided compression the
summariser will prioritise preserving information related to
this topic. Inspired by Claude Code's ``/compact <focus>``.
Returns:
``(compressed_messages, new_system_prompt)`` tuple.
"""
_pre_msg_count = len(messages)
logger.info(
"context compression started: session=%s messages=%d tokens=~%s model=%s focus=%r",
agent.session_id or "none", _pre_msg_count,
f"{approx_tokens:,}" if approx_tokens else "unknown", agent.model,
focus_topic,
)
agent._emit_status(
"🗜️ Compacting context — summarizing earlier conversation so I can continue..."
)
# Notify external memory provider before compression discards context
if agent._memory_manager:
try:
agent._memory_manager.on_pre_compress(messages)
except Exception:
pass
try:
compressed = agent.context_compressor.compress(messages, current_tokens=approx_tokens, focus_topic=focus_topic)
except TypeError:
# Plugin context engine with strict signature that doesn't accept
# focus_topic — fall back to calling without it.
compressed = agent.context_compressor.compress(messages, current_tokens=approx_tokens)
summary_error = getattr(agent.context_compressor, "_last_summary_error", None)
if summary_error:
if getattr(agent, "_last_compression_summary_warning", None) != summary_error:
agent._last_compression_summary_warning = summary_error
agent._emit_warning(
f"⚠ Compression summary failed: {summary_error}. "
"Inserted a fallback context marker."
)
else:
# No hard failure — but did the configured aux model error out
# and get recovered by retrying on main? Surface that so users
# know their auxiliary.compression.model setting is broken even
# though compression succeeded.
_aux_fail_model = getattr(agent.context_compressor, "_last_aux_model_failure_model", None)
_aux_fail_err = getattr(agent.context_compressor, "_last_aux_model_failure_error", None)
if _aux_fail_model:
# Dedup on (model, error) so we don't spam on every compaction
_aux_key = (_aux_fail_model, _aux_fail_err)
if getattr(agent, "_last_aux_fallback_warning_key", None) != _aux_key:
agent._last_aux_fallback_warning_key = _aux_key
agent._emit_warning(
f" Configured compression model '{_aux_fail_model}' failed "
f"({_aux_fail_err or 'unknown error'}). Recovered using main model — "
"check auxiliary.compression.model in config.yaml."
)
todo_snapshot = agent._todo_store.format_for_injection()
if todo_snapshot:
compressed.append({"role": "user", "content": todo_snapshot})
agent._invalidate_system_prompt()
new_system_prompt = agent._build_system_prompt(system_message)
agent._cached_system_prompt = new_system_prompt
if agent._session_db:
try:
# Propagate title to the new session with auto-numbering
old_title = agent._session_db.get_session_title(agent.session_id)
# Trigger memory extraction on the old session before it rotates.
agent.commit_memory_session(messages)
agent._session_db.end_session(agent.session_id, "compression")
old_session_id = agent.session_id
agent.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
os.environ["HERMES_SESSION_ID"] = agent.session_id
try:
from gateway.session_context import _SESSION_ID
_SESSION_ID.set(agent.session_id)
except Exception:
pass
# Update session_log_file to point to the new session's JSON file
agent.session_log_file = agent.logs_dir / f"session_{agent.session_id}.json"
agent._session_db_created = False
agent._session_db.create_session(
session_id=agent.session_id,
source=agent.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
model=agent.model,
model_config=agent._session_init_model_config,
parent_session_id=old_session_id,
)
agent._session_db_created = True
# Auto-number the title for the continuation session
if old_title:
try:
new_title = agent._session_db.get_next_title_in_lineage(old_title)
agent._session_db.set_session_title(agent.session_id, new_title)
except (ValueError, Exception) as e:
logger.debug("Could not propagate title on compression: %s", e)
agent._session_db.update_system_prompt(agent.session_id, new_system_prompt)
# Reset flush cursor — new session starts with no messages written
agent._last_flushed_db_idx = 0
except Exception as e:
logger.warning("Session DB compression split failed — new session will NOT be indexed: %s", e)
# Notify the context engine that the session_id rotated because of
# compression (not a fresh /new). Plugin engines (e.g. hermes-lcm) use
# boundary_reason="compression" to preserve DAG lineage across the
# rollover instead of re-initializing fresh per-session state.
# See hermes-lcm#68. Built-in ContextCompressor ignores kwargs.
try:
_old_sid = locals().get("old_session_id")
if _old_sid and hasattr(agent.context_compressor, "on_session_start"):
agent.context_compressor.on_session_start(
agent.session_id or "",
boundary_reason="compression",
old_session_id=_old_sid,
)
except Exception as _ce_err:
logger.debug("context engine on_session_start (compression): %s", _ce_err)
# Notify memory providers of the compression-driven session_id rotation
# so provider-cached per-session state (Hindsight's _document_id,
# accumulated turn buffers, counters) refreshes. reset=False because
# the logical conversation continues; only the id and DB row rolled
# over. See #6672.
try:
_old_sid = locals().get("old_session_id")
if _old_sid and agent._memory_manager:
agent._memory_manager.on_session_switch(
agent.session_id or "",
parent_session_id=_old_sid,
reset=False,
reason="compression",
)
except Exception as _me_err:
logger.debug("memory manager on_session_switch (compression): %s", _me_err)
# Warn on repeated compressions (quality degrades with each pass)
_cc = agent.context_compressor.compression_count
if _cc >= 2:
agent._vprint(
f"{agent.log_prefix}⚠️ Session compressed {_cc} times — "
f"accuracy may degrade. Consider /new to start fresh.",
force=True,
)
# Update token estimate after compaction so pressure calculations
# use the post-compression count, not the stale pre-compression one.
# Use estimate_request_tokens_rough() so tool schemas are included —
# with 50+ tools enabled, schemas alone can add 20-30K tokens, and
# omitting them delays the next compression cycle far past the
# configured threshold (issue #14695).
_compressed_est = estimate_request_tokens_rough(
compressed,
system_prompt=new_system_prompt or "",
tools=agent.tools or None,
)
agent.context_compressor.last_prompt_tokens = _compressed_est
agent.context_compressor.last_completion_tokens = 0
# Clear the file-read dedup cache. After compression the original
# read content is summarised away — if the model re-reads the same
# file it needs the full content, not a "file unchanged" stub.
try:
from tools.file_tools import reset_file_dedup
reset_file_dedup(task_id)
except Exception:
pass
logger.info(
"context compression done: session=%s messages=%d->%d tokens=~%s",
agent.session_id or "none", _pre_msg_count, len(compressed),
f"{_compressed_est:,}",
)
return compressed, new_system_prompt
def try_shrink_image_parts_in_messages(api_messages: list) -> bool:
"""Re-encode all native image parts at a smaller size to recover from
image-too-large errors (Anthropic 5 MB, unknown other providers).
Mutates ``api_messages`` in place. Returns True if any image part was
actually replaced, False if there were no image parts to shrink or
Pillow couldn't help (caller should surface the original error).
Strategy: look for ``image_url`` / ``input_image`` parts carrying a
``data:image/...;base64,...`` payload. For each one whose encoded
size exceeds 4 MB (a safe target that slides under Anthropic's 5 MB
ceiling with header overhead), write the base64 to a tempfile, call
``vision_tools._resize_image_for_vision`` to produce a smaller data
URL, and substitute it in place.
Non-data-URL images (http/https URLs) are not touched the provider
fetches those itself and the size limit is different.
"""
if not api_messages:
return False
try:
from tools.vision_tools import _resize_image_for_vision
except Exception as exc:
logger.warning("image-shrink recovery: vision_tools unavailable — %s", exc)
return False
# 4 MB target leaves comfortable headroom under Anthropic's 5 MB.
# Non-Anthropic providers we haven't observed rejecting are fine with
# much larger; shrinking to 4 MB here loses quality but only fires
# after a confirmed provider rejection, so the alternative is failure.
target_bytes = 4 * 1024 * 1024
changed_count = 0
def _shrink_data_url(url: str) -> Optional[str]:
"""Return a smaller data URL, or None if shrink can't help."""
if not isinstance(url, str) or not url.startswith("data:"):
return None
if len(url) <= target_bytes:
# This specific image wasn't the oversized one.
return None
try:
header, _, data = url.partition(",")
mime = "image/jpeg"
if header.startswith("data:"):
mime_part = header[len("data:"):].split(";", 1)[0].strip()
if mime_part.startswith("image/"):
mime = mime_part
import base64 as _b64
raw = _b64.b64decode(data)
suffix = {
"image/png": ".png", "image/gif": ".gif", "image/webp": ".webp",
"image/jpeg": ".jpg", "image/jpg": ".jpg", "image/bmp": ".bmp",
}.get(mime, ".jpg")
tmp = tempfile.NamedTemporaryFile(
prefix="hermes_shrink_", suffix=suffix, delete=False,
)
try:
tmp.write(raw)
tmp.close()
resized = _resize_image_for_vision(
Path(tmp.name),
mime_type=mime,
max_base64_bytes=target_bytes,
)
finally:
try:
Path(tmp.name).unlink(missing_ok=True)
except Exception:
pass
if not resized or len(resized) >= len(url):
# Shrink didn't help (or made it bigger — corrupt input?).
return None
return resized
except Exception as exc:
logger.warning("image-shrink recovery: re-encode failed — %s", exc)
return None
for msg in api_messages:
if not isinstance(msg, dict):
continue
content = msg.get("content")
if not isinstance(content, list):
continue
for part in content:
if not isinstance(part, dict):
continue
ptype = part.get("type")
if ptype not in {"image_url", "input_image"}:
continue
image_value = part.get("image_url")
# OpenAI chat.completions: {"image_url": {"url": "data:..."}}
# OpenAI Responses: {"image_url": "data:..."}
if isinstance(image_value, dict):
url = image_value.get("url", "")
resized = _shrink_data_url(url)
if resized:
image_value["url"] = resized
changed_count += 1
elif isinstance(image_value, str):
resized = _shrink_data_url(image_value)
if resized:
part["image_url"] = resized
changed_count += 1
if changed_count:
logger.info(
"image-shrink recovery: re-encoded %d image part(s) to fit under %.0f MB",
changed_count, target_bytes / (1024 * 1024),
)
return changed_count > 0
__all__ = [
"check_compression_model_feasibility",
"replay_compression_warning",
"compress_context",
"try_shrink_image_parts_in_messages",
]

4018
agent/conversation_loop.py Normal file

File diff suppressed because it is too large Load diff

View file

@ -166,6 +166,8 @@ class PooledCredential:
@property
def runtime_api_key(self) -> str:
if self.provider == "nous":
# Nous stores the runtime inference credential in agent_key for
# compatibility. It may be a NAS invoke JWT or legacy opaque key.
return str(self.agent_key or self.access_token or "")
return str(self.access_token or "")
@ -621,18 +623,35 @@ class CredentialPool:
return entry
store_refresh = state.get("refresh_token", "")
store_access = state.get("access_token", "")
if store_refresh and store_refresh != entry.refresh_token:
comparable_updates = {
"access_token": store_access,
"refresh_token": store_refresh,
"expires_at": state.get("expires_at"),
"agent_key": state.get("agent_key"),
"agent_key_expires_at": state.get("agent_key_expires_at"),
"inference_base_url": state.get("inference_base_url"),
}
should_sync = any(
value not in (None, "") and getattr(entry, key, None) != value
for key, value in comparable_updates.items()
)
if should_sync:
logger.debug(
"Pool entry %s: syncing tokens from auth.json (Nous refresh token changed)",
"Pool entry %s: syncing Nous state from auth.json",
entry.id,
)
field_updates: Dict[str, Any] = {
"access_token": store_access,
"refresh_token": store_refresh,
"last_status": None,
"last_status_at": None,
"last_error_code": None,
"last_error_reason": None,
"last_error_message": None,
"last_error_reset_at": None,
}
if store_access:
field_updates["access_token"] = store_access
if store_refresh:
field_updates["refresh_token"] = store_refresh
if state.get("expires_at"):
field_updates["expires_at"] = state["expires_at"]
if state.get("agent_key"):
@ -811,36 +830,15 @@ class CredentialPool:
synced = self._sync_nous_entry_from_auth_store(entry)
if synced is not entry:
entry = synced
nous_state = {
"access_token": entry.access_token,
"refresh_token": entry.refresh_token,
"client_id": entry.client_id,
"portal_base_url": entry.portal_base_url,
"inference_base_url": entry.inference_base_url,
"token_type": entry.token_type,
"scope": entry.scope,
"obtained_at": entry.obtained_at,
"expires_at": entry.expires_at,
"agent_key": entry.agent_key,
"agent_key_expires_at": entry.agent_key_expires_at,
"tls": entry.tls,
}
refreshed = auth_mod.refresh_nous_oauth_from_state(
nous_state,
auth_mod.resolve_nous_runtime_credentials(
min_key_ttl_seconds=DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
force_refresh=force,
force_mint=force,
inference_auth_mode=(
auth_mod.NOUS_INFERENCE_AUTH_MODE_LEGACY
if force
else auth_mod.NOUS_INFERENCE_AUTH_MODE_AUTO
),
)
# Apply returned fields: dataclass fields via replace, extras via dict update
field_updates = {}
extra_updates = dict(entry.extra)
_field_names = {f.name for f in fields(entry)}
for k, v in refreshed.items():
if k in _field_names:
field_updates[k] = v
elif k in _EXTRA_KEYS:
extra_updates[k] = v
updated = replace(entry, extra=extra_updates, **field_updates)
updated = self._sync_nous_entry_from_auth_store(entry)
else:
return entry
except Exception as exc:
@ -929,6 +927,49 @@ class CredentialPool:
self._persist()
self._sync_device_code_entry_to_auth_store(updated)
return updated
if auth_mod._is_terminal_nous_refresh_error(exc):
logger.debug("Nous refresh token is terminally invalid; clearing local token state")
try:
with _auth_store_lock():
auth_store = _load_auth_store()
state = _load_provider_state(auth_store, "nous") or {
"client_id": entry.client_id,
"portal_base_url": entry.portal_base_url,
"inference_base_url": entry.inference_base_url,
"token_type": entry.token_type,
"scope": entry.scope,
"tls": entry.tls,
}
store_refresh = str(state.get("refresh_token") or "").strip()
entry_refresh = str(entry.refresh_token or "").strip()
if not store_refresh or store_refresh == entry_refresh:
auth_mod._quarantine_nous_oauth_state(
state,
exc,
reason="credential_pool_refresh_failure",
)
auth_mod._quarantine_nous_pool_entries(
auth_store,
exc,
reason="credential_pool_refresh_failure",
)
_save_provider_state(auth_store, "nous", state)
_save_auth_store(auth_store)
except Exception as clear_exc:
logger.debug("Failed to clear terminal Nous OAuth state: %s", clear_exc)
singleton_sources = {
auth_mod.NOUS_DEVICE_CODE_SOURCE,
f"manual:{auth_mod.NOUS_DEVICE_CODE_SOURCE}",
}
self._entries = [
item for item in self._entries
if item.source not in singleton_sources
]
if self._current_id == entry.id:
self._current_id = None
self._persist()
return None
self._mark_exhausted(entry, None)
return None
@ -1365,7 +1406,22 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
elif provider == "nous":
state = _load_provider_state(auth_store, "nous")
if state and not _is_suppressed(provider, "device_code"):
has_runtime_material = bool(
isinstance(state, dict)
and (
str(state.get("access_token") or "").strip()
or str(state.get("agent_key") or "").strip()
)
)
if state and not has_runtime_material:
retained = [
entry for entry in entries
if entry.source not in {"device_code", "manual:device_code"}
]
if len(retained) != len(entries):
entries[:] = retained
changed = True
if state and has_runtime_material and not _is_suppressed(provider, "device_code"):
active_sources.add("device_code")
# Prefer a user-supplied label embedded in the singleton state
# (set by persist_nous_credentials(label=...) when the user ran

62
agent/iteration_budget.py Normal file
View file

@ -0,0 +1,62 @@
"""Per-agent iteration budget — thread-safe consume/refund counter.
Extracted from ``run_agent.py``. Each ``AIAgent`` instance (parent or
subagent) holds an :class:`IterationBudget`; the parent's cap comes from
``max_iterations`` (default 90), each subagent's cap comes from
``delegation.max_iterations`` (default 50).
``run_agent`` re-exports ``IterationBudget`` so existing
``from run_agent import IterationBudget`` imports keep working unchanged.
"""
from __future__ import annotations
import threading
class IterationBudget:
"""Thread-safe iteration counter for an agent.
Each agent (parent or subagent) gets its own ``IterationBudget``.
The parent's budget is capped at ``max_iterations`` (default 90).
Each subagent gets an independent budget capped at
``delegation.max_iterations`` (default 50) this means total
iterations across parent + subagents can exceed the parent's cap.
Users control the per-subagent limit via ``delegation.max_iterations``
in config.yaml.
``execute_code`` (programmatic tool calling) iterations are refunded via
:meth:`refund` so they don't eat into the budget.
"""
def __init__(self, max_total: int):
self.max_total = max_total
self._used = 0
self._lock = threading.Lock()
def consume(self) -> bool:
"""Try to consume one iteration. Returns True if allowed."""
with self._lock:
if self._used >= self.max_total:
return False
self._used += 1
return True
def refund(self) -> None:
"""Give back one iteration (e.g. for execute_code turns)."""
with self._lock:
if self._used > 0:
self._used -= 1
@property
def used(self) -> int:
with self._lock:
return self._used
@property
def remaining(self) -> int:
with self._lock:
return max(0, self.max_total - self._used)
__all__ = ["IterationBudget"]

View file

@ -232,7 +232,7 @@ class LSPClient:
the process is killed and the client is left in state
``"error"`` re-call ``start()`` to retry.
"""
if self._state in ("running", "starting"):
if self._state in {"running", "starting"}:
return
self._state = "starting"
try:

View file

@ -151,7 +151,7 @@ def try_install(pkg: str, strategy: str = "auto") -> Optional[str]:
same path (or ``None``) without reinstalling. Concurrent calls
are serialized.
"""
if strategy not in ("auto",):
if strategy not in {"auto",}:
# Only ``auto`` triggers an actual install. In manual/off,
# we still check whether the binary already exists.
recipe = INSTALL_RECIPES.get(pkg, {})

View file

@ -162,7 +162,7 @@ class LSPService:
idle_timeout: float = DEFAULT_IDLE_TIMEOUT,
) -> None:
self._enabled = enabled
self._wait_mode = wait_mode if wait_mode in ("document", "full") else "document"
self._wait_mode = wait_mode if wait_mode in {"document", "full"} else "document"
self._wait_timeout = wait_timeout
self._install_strategy = install_strategy
self._binary_overrides = binary_overrides or {}

View file

@ -28,7 +28,7 @@ def format_diagnostic(d: Dict[str, Any]) -> str:
col = int(start.get("character", 0)) + 1
msg = str(d.get("message") or "").rstrip()
code = d.get("code")
code_part = f" [{code}]" if code not in (None, "") else ""
code_part = f" [{code}]" if code not in {None, ""} else ""
source = d.get("source")
source_part = f" ({source})" if source else ""
return f"{sev} [{line}:{col}] {msg}{code_part}{source_part}"

View file

@ -237,7 +237,7 @@ def _spawn_pyright(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
return None
# If we got the cli ``pyright``, the langserver is its sibling.
base = os.path.basename(bin_path)
if base in ("pyright", "pyright.exe"):
if base in {"pyright", "pyright.exe"}:
sibling = os.path.join(os.path.dirname(bin_path), "pyright-langserver")
if os.path.exists(sibling):
bin_path = sibling

View file

@ -0,0 +1,444 @@
"""Message and tool-payload sanitization helpers.
Pure functions extracted from ``run_agent.py`` so the AIAgent module can
stay focused on the conversation loop. These walk OpenAI-format message
lists and structured payloads, repairing or stripping problematic
characters that would otherwise crash ``json.dumps`` inside the OpenAI
SDK or be rejected by upstream APIs.
All helpers are stateless and side-effect-free except for in-place
mutation of their input (where documented). Backward-compatible
re-exports from ``run_agent`` remain in place so existing imports
``from run_agent import _sanitize_surrogates`` keep working.
"""
from __future__ import annotations
import json
import logging
import re
from typing import Any
logger = logging.getLogger(__name__)
# Lone surrogate code points are invalid in UTF-8 and crash json.dumps
# inside the OpenAI SDK. Used by every surrogate-sanitization helper
# below as well as by run_agent and the CLI for paste-from-clipboard
# scrubbing.
_SURROGATE_RE = re.compile(r'[\ud800-\udfff]')
def _sanitize_surrogates(text: str) -> str:
"""Replace lone surrogate code points with U+FFFD (replacement character).
Surrogates are invalid in UTF-8 and will crash ``json.dumps()`` inside the
OpenAI SDK. This is a fast no-op when the text contains no surrogates.
"""
if _SURROGATE_RE.search(text):
return _SURROGATE_RE.sub('\ufffd', text)
return text
def _sanitize_structure_surrogates(payload: Any) -> bool:
"""Replace surrogate code points in nested dict/list payloads in-place.
Mirror of ``_sanitize_structure_non_ascii`` but for surrogate recovery.
Used to scrub nested structured fields (e.g. ``reasoning_details`` an
array of dicts with ``summary``/``text`` strings) that flat per-field
checks don't reach. Returns True if any surrogates were replaced.
"""
found = False
def _walk(node):
nonlocal found
if isinstance(node, dict):
for key, value in node.items():
if isinstance(value, str):
if _SURROGATE_RE.search(value):
node[key] = _SURROGATE_RE.sub('\ufffd', value)
found = True
elif isinstance(value, (dict, list)):
_walk(value)
elif isinstance(node, list):
for idx, value in enumerate(node):
if isinstance(value, str):
if _SURROGATE_RE.search(value):
node[idx] = _SURROGATE_RE.sub('\ufffd', value)
found = True
elif isinstance(value, (dict, list)):
_walk(value)
_walk(payload)
return found
def _sanitize_messages_surrogates(messages: list) -> bool:
"""Sanitize surrogate characters from all string content in a messages list.
Walks message dicts in-place. Returns True if any surrogates were found
and replaced, False otherwise. Covers content/text, name, tool call
metadata/arguments, AND any additional string or nested structured fields
(``reasoning``, ``reasoning_content``, ``reasoning_details``, etc.) so
retries don't fail on a non-content field. Byte-level reasoning models
(xiaomi/mimo, kimi, glm) can emit lone surrogates in reasoning output
that flow through to ``api_messages["reasoning_content"]`` on the next
turn and crash json.dumps inside the OpenAI SDK.
"""
found = False
for msg in messages:
if not isinstance(msg, dict):
continue
content = msg.get("content")
if isinstance(content, str) and _SURROGATE_RE.search(content):
msg["content"] = _SURROGATE_RE.sub('\ufffd', content)
found = True
elif isinstance(content, list):
for part in content:
if isinstance(part, dict):
text = part.get("text")
if isinstance(text, str) and _SURROGATE_RE.search(text):
part["text"] = _SURROGATE_RE.sub('\ufffd', text)
found = True
name = msg.get("name")
if isinstance(name, str) and _SURROGATE_RE.search(name):
msg["name"] = _SURROGATE_RE.sub('\ufffd', name)
found = True
tool_calls = msg.get("tool_calls")
if isinstance(tool_calls, list):
for tc in tool_calls:
if not isinstance(tc, dict):
continue
tc_id = tc.get("id")
if isinstance(tc_id, str) and _SURROGATE_RE.search(tc_id):
tc["id"] = _SURROGATE_RE.sub('\ufffd', tc_id)
found = True
fn = tc.get("function")
if isinstance(fn, dict):
fn_name = fn.get("name")
if isinstance(fn_name, str) and _SURROGATE_RE.search(fn_name):
fn["name"] = _SURROGATE_RE.sub('\ufffd', fn_name)
found = True
fn_args = fn.get("arguments")
if isinstance(fn_args, str) and _SURROGATE_RE.search(fn_args):
fn["arguments"] = _SURROGATE_RE.sub('\ufffd', fn_args)
found = True
# Walk any additional string / nested fields (reasoning,
# reasoning_content, reasoning_details, etc.) — surrogates from
# byte-level reasoning models (xiaomi/mimo, kimi, glm) can lurk
# in these fields and aren't covered by the per-field checks above.
# Matches _sanitize_messages_non_ascii's coverage (PR #10537).
for key, value in msg.items():
if key in {"content", "name", "tool_calls", "role"}:
continue
if isinstance(value, str):
if _SURROGATE_RE.search(value):
msg[key] = _SURROGATE_RE.sub('\ufffd', value)
found = True
elif isinstance(value, (dict, list)):
if _sanitize_structure_surrogates(value):
found = True
return found
def _escape_invalid_chars_in_json_strings(raw: str) -> str:
"""Escape unescaped control chars inside JSON string values.
Walks the raw JSON character-by-character, tracking whether we are
inside a double-quoted string. Inside strings, replaces literal
control characters (0x00-0x1F) that aren't already part of an escape
sequence with their ``\\uXXXX`` equivalents. Pass-through for everything
else.
Ported from #12093 — complements the other repair passes in
``_repair_tool_call_arguments`` when ``json.loads(strict=False)`` is
not enough (e.g. llama.cpp backends that emit literal apostrophes or
tabs alongside other malformations).
"""
out: list[str] = []
in_string = False
i = 0
n = len(raw)
while i < n:
ch = raw[i]
if in_string:
if ch == "\\" and i + 1 < n:
# Already-escaped char — pass through as-is
out.append(ch)
out.append(raw[i + 1])
i += 2
continue
if ch == '"':
in_string = False
out.append(ch)
elif ord(ch) < 0x20:
out.append(f"\\u{ord(ch):04x}")
else:
out.append(ch)
else:
if ch == '"':
in_string = True
out.append(ch)
i += 1
return "".join(out)
def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str:
"""Attempt to repair malformed tool_call argument JSON.
Models like GLM-5.1 via Ollama can produce truncated JSON, trailing
commas, Python ``None``, etc. The API proxy rejects these with HTTP 400
"invalid tool call arguments". This function applies common repairs;
if all fail it returns ``"{}"`` so the request succeeds (better than
crashing the session). All repairs are logged at WARNING level.
"""
raw_stripped = raw_args.strip() if isinstance(raw_args, str) else ""
# Fast-path: empty / whitespace-only -> empty object
if not raw_stripped:
logger.warning("Sanitized empty tool_call arguments for %s", tool_name)
return "{}"
# Python-literal None -> normalise to {}
if raw_stripped == "None":
logger.warning("Sanitized Python-None tool_call arguments for %s", tool_name)
return "{}"
# Repair pass 0: llama.cpp backends sometimes emit literal control
# characters (tabs, newlines) inside JSON string values. json.loads
# with strict=False accepts these and lets us re-serialise the
# result into wire-valid JSON without any string surgery. This is
# the most common local-model repair case (#12068).
try:
parsed = json.loads(raw_stripped, strict=False)
reserialised = json.dumps(parsed, separators=(",", ":"))
if reserialised != raw_stripped:
logger.warning(
"Repaired unescaped control chars in tool_call arguments for %s",
tool_name,
)
return reserialised
except (json.JSONDecodeError, TypeError, ValueError):
pass
# Attempt common JSON repairs
fixed = raw_stripped
# 1. Strip trailing commas before } or ]
fixed = re.sub(r',\s*([}\]])', r'\1', fixed)
# 2. Close unclosed structures
open_curly = fixed.count('{') - fixed.count('}')
open_bracket = fixed.count('[') - fixed.count(']')
if open_curly > 0:
fixed += '}' * open_curly
if open_bracket > 0:
fixed += ']' * open_bracket
# 3. Remove excess closing braces/brackets (bounded to 50 iterations)
for _ in range(50):
try:
json.loads(fixed)
break
except json.JSONDecodeError:
if fixed.endswith('}') and fixed.count('}') > fixed.count('{'):
fixed = fixed[:-1]
elif fixed.endswith(']') and fixed.count(']') > fixed.count('['):
fixed = fixed[:-1]
else:
break
try:
json.loads(fixed)
logger.warning(
"Repaired malformed tool_call arguments for %s: %s%s",
tool_name, raw_stripped[:80], fixed[:80],
)
return fixed
except json.JSONDecodeError:
pass
# Repair pass 4: escape unescaped control chars inside JSON strings,
# then retry. Catches cases where strict=False alone fails because
# other malformations are present too.
try:
escaped = _escape_invalid_chars_in_json_strings(fixed)
if escaped != fixed:
json.loads(escaped)
logger.warning(
"Repaired control-char-laced tool_call arguments for %s: %s%s",
tool_name, raw_stripped[:80], escaped[:80],
)
return escaped
except (json.JSONDecodeError, TypeError, ValueError):
pass
# Last resort: replace with empty object so the API request doesn't
# crash the entire session.
logger.warning(
"Unrepairable tool_call arguments for %s"
"replaced with empty object (was: %s)",
tool_name, raw_stripped[:80],
)
return "{}"
def _strip_non_ascii(text: str) -> str:
"""Remove non-ASCII characters, replacing with closest ASCII equivalent or removing.
Used as a last resort when the system encoding is ASCII and can't handle
any non-ASCII characters (e.g. LANG=C on Chromebooks).
"""
return text.encode('ascii', errors='ignore').decode('ascii')
def _sanitize_messages_non_ascii(messages: list) -> bool:
"""Strip non-ASCII characters from all string content in a messages list.
This is a last-resort recovery for systems with ASCII-only encoding
(LANG=C, Chromebooks, minimal containers). Returns True if any
non-ASCII content was found and sanitized.
"""
found = False
for msg in messages:
if not isinstance(msg, dict):
continue
# Sanitize content (string)
content = msg.get("content")
if isinstance(content, str):
sanitized = _strip_non_ascii(content)
if sanitized != content:
msg["content"] = sanitized
found = True
elif isinstance(content, list):
for part in content:
if isinstance(part, dict):
text = part.get("text")
if isinstance(text, str):
sanitized = _strip_non_ascii(text)
if sanitized != text:
part["text"] = sanitized
found = True
# Sanitize name field (can contain non-ASCII in tool results)
name = msg.get("name")
if isinstance(name, str):
sanitized = _strip_non_ascii(name)
if sanitized != name:
msg["name"] = sanitized
found = True
# Sanitize tool_calls
tool_calls = msg.get("tool_calls")
if isinstance(tool_calls, list):
for tc in tool_calls:
if isinstance(tc, dict):
fn = tc.get("function", {})
if isinstance(fn, dict):
fn_args = fn.get("arguments")
if isinstance(fn_args, str):
sanitized = _strip_non_ascii(fn_args)
if sanitized != fn_args:
fn["arguments"] = sanitized
found = True
# Sanitize any additional top-level string fields (e.g. reasoning_content)
for key, value in msg.items():
if key in {"content", "name", "tool_calls", "role"}:
continue
if isinstance(value, str):
sanitized = _strip_non_ascii(value)
if sanitized != value:
msg[key] = sanitized
found = True
return found
def _sanitize_tools_non_ascii(tools: list) -> bool:
"""Strip non-ASCII characters from tool payloads in-place."""
return _sanitize_structure_non_ascii(tools)
def _strip_images_from_messages(messages: list) -> bool:
"""Remove image_url content parts from all messages in-place.
Called when a server signals it does not support images (e.g.
"Only 'text' content type is supported."). Mutates messages so the
next API call sends text only.
Preserves message alternation invariants:
* ``tool``-role messages whose content was entirely images are replaced
with a plaintext placeholder, NOT deleted deleting them would leave
the paired ``tool_call_id`` on the prior assistant message unmatched,
which providers reject with HTTP 400.
* Non-tool messages whose content becomes empty are dropped. In
practice this only hits synthetic image-only user messages appended
for attachment delivery; real user turns always include text.
Returns True if any image parts were removed.
"""
found = False
to_delete = []
for i, msg in enumerate(messages):
if not isinstance(msg, dict):
continue
content = msg.get("content")
if not isinstance(content, list):
continue
new_parts = []
for part in content:
if isinstance(part, dict) and part.get("type") in {"image_url", "image", "input_image"}:
found = True
else:
new_parts.append(part)
if len(new_parts) < len(content):
if new_parts:
msg["content"] = new_parts
elif msg.get("role") == "tool":
# Preserve tool_call_id linkage — providers require every
# assistant tool_call to have a matching tool response.
msg["content"] = "[image content removed — server does not support images]"
else:
# Synthetic image-only user/assistant message with no text;
# safe to drop.
to_delete.append(i)
for i in reversed(to_delete):
del messages[i]
return found
def _sanitize_structure_non_ascii(payload: Any) -> bool:
"""Strip non-ASCII characters from nested dict/list payloads in-place."""
found = False
def _walk(node):
nonlocal found
if isinstance(node, dict):
for key, value in node.items():
if isinstance(value, str):
sanitized = _strip_non_ascii(value)
if sanitized != value:
node[key] = sanitized
found = True
elif isinstance(value, (dict, list)):
_walk(value)
elif isinstance(node, list):
for idx, value in enumerate(node):
if isinstance(value, str):
sanitized = _strip_non_ascii(value)
if sanitized != value:
node[idx] = sanitized
found = True
elif isinstance(value, (dict, list)):
_walk(value)
_walk(payload)
return found
__all__ = [
"_SURROGATE_RE",
"_sanitize_surrogates",
"_sanitize_structure_surrogates",
"_sanitize_messages_surrogates",
"_escape_invalid_chars_in_json_strings",
"_repair_tool_call_arguments",
"_strip_non_ascii",
"_sanitize_messages_non_ascii",
"_sanitize_tools_non_ascii",
"_strip_images_from_messages",
"_sanitize_structure_non_ascii",
]

View file

@ -194,6 +194,7 @@ DEFAULT_CONTEXT_LENGTHS = {
"llama": 131072,
# Qwen — specific model families before the catch-all.
# Official docs: https://help.aliyun.com/zh/model-studio/developer-reference/
"qwen3.6-plus": 1048576, # 1M context (DashScope/Alibaba & OpenRouter)
"qwen3-coder-plus": 1000000, # 1M context
"qwen3-coder": 262144, # 256K context
"qwen": 131072,

167
agent/process_bootstrap.py Normal file
View file

@ -0,0 +1,167 @@
"""Process-level bootstrap helpers for ``run_agent``.
Three concerns, all tied to ``AIAgent`` boot-time / runtime IO setup:
1. **Lazy OpenAI SDK import** ``_load_openai_cls`` + ``_OpenAIProxy``
defer the 240ms-ish ``from openai import OpenAI`` cost until first use,
while preserving ``isinstance(client, OpenAI)`` checks and
``patch("run_agent.OpenAI", ...)`` test patterns.
2. **Crash-resistant stdio** ``_SafeWriter`` wraps stdout/stderr so
``OSError: Input/output error`` from broken pipes (systemd, Docker,
thread teardown races) cannot crash the agent. ``_install_safe_stdio``
applies the wrapper.
3. **HTTP proxy resolution** ``_get_proxy_from_env`` reads
``HTTPS_PROXY`` / ``HTTP_PROXY`` / ``ALL_PROXY``;
``_get_proxy_for_base_url`` respects ``NO_PROXY`` for the given base URL.
``run_agent`` re-exports every name so existing
``from run_agent import _get_proxy_from_env`` imports keep working
unchanged.
"""
from __future__ import annotations
import os
import sys
import urllib.request
from typing import Optional
from utils import base_url_hostname, normalize_proxy_url
# Cached at module level so we only pay the OpenAI SDK import cost once
# per process (after the first lazy load).
_OPENAI_CLS_CACHE = None
def _load_openai_cls() -> type:
"""Import and cache ``openai.OpenAI``."""
global _OPENAI_CLS_CACHE
if _OPENAI_CLS_CACHE is None:
from openai import OpenAI as _cls
_OPENAI_CLS_CACHE = _cls
return _OPENAI_CLS_CACHE
class _OpenAIProxy:
"""Module-level proxy that looks like ``openai.OpenAI`` but imports lazily."""
__slots__ = ()
def __call__(self, *args, **kwargs):
return _load_openai_cls()(*args, **kwargs)
def __instancecheck__(self, obj):
return isinstance(obj, _load_openai_cls())
def __repr__(self):
return "<lazy openai.OpenAI proxy>"
class _SafeWriter:
"""Transparent stdio wrapper that catches OSError/ValueError from broken pipes.
When hermes-agent runs as a systemd service, Docker container, or headless
daemon, the stdout/stderr pipe can become unavailable (idle timeout, buffer
exhaustion, socket reset). Any print() call then raises
``OSError: [Errno 5] Input/output error``, which can crash agent setup or
run_conversation() especially via double-fault when an except handler
also tries to print.
Additionally, when subagents run in ThreadPoolExecutor threads, the shared
stdout handle can close between thread teardown and cleanup, raising
``ValueError: I/O operation on closed file`` instead of OSError.
This wrapper delegates all writes to the underlying stream and silently
catches both OSError and ValueError. It is transparent when the wrapped
stream is healthy.
"""
__slots__ = ("_inner",)
def __init__(self, inner):
object.__setattr__(self, "_inner", inner)
def write(self, data):
try:
return self._inner.write(data)
except (OSError, ValueError):
return len(data) if isinstance(data, str) else 0
def flush(self):
try:
self._inner.flush()
except (OSError, ValueError):
pass
def fileno(self):
return self._inner.fileno()
def isatty(self):
try:
return self._inner.isatty()
except (OSError, ValueError):
return False
def __getattr__(self, name):
return getattr(self._inner, name)
def _get_proxy_from_env() -> Optional[str]:
"""Read proxy URL from environment variables.
Checks HTTPS_PROXY, HTTP_PROXY, ALL_PROXY (and lowercase variants) in order.
Returns the first valid proxy URL found, or None if no proxy is configured.
"""
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
"https_proxy", "http_proxy", "all_proxy"):
value = os.environ.get(key, "").strip()
if value:
return normalize_proxy_url(value)
return None
def _get_proxy_for_base_url(base_url: Optional[str]) -> Optional[str]:
"""Return an env-configured proxy unless NO_PROXY excludes this base URL."""
proxy = _get_proxy_from_env()
if not proxy or not base_url:
return proxy
host = base_url_hostname(base_url)
if not host:
return proxy
try:
if urllib.request.proxy_bypass_environment(host):
return None
except Exception:
pass
return proxy
def _install_safe_stdio() -> None:
"""Wrap stdout/stderr so best-effort console output cannot crash the agent."""
for stream_name in ("stdout", "stderr"):
stream = getattr(sys, stream_name, None)
if stream is not None and not isinstance(stream, _SafeWriter):
setattr(sys, stream_name, _SafeWriter(stream))
# Module-level proxy instance — drops in for ``openai.OpenAI``. Imported as
# ``from agent.process_bootstrap import OpenAI`` (or re-exported via
# ``run_agent`` for legacy tests).
OpenAI = _OpenAIProxy()
__all__ = [
"OpenAI",
"_OpenAIProxy",
"_load_openai_cls",
"_SafeWriter",
"_install_safe_stdio",
"_get_proxy_from_env",
"_get_proxy_for_base_url",
]

View file

@ -83,6 +83,7 @@ logger = logging.getLogger(__name__)
DEFAULT_TIMEOUT_SECONDS = 60
MAX_TIMEOUT_SECONDS = 300
ALLOWLIST_FILENAME = "shell-hooks-allowlist.json"
_DEFAULT_BLOCK_MESSAGE = "Blocked by shell hook."
# (event, matcher, command) triples that have been wired to the plugin
# manager in the current process. Matcher is part of the key because
@ -481,6 +482,17 @@ def _serialize_payload(event: str, kwargs: Dict[str, Any]) -> str:
return json.dumps(payload, ensure_ascii=False, default=str)
def _block_message(primary: Any, secondary: Any) -> str:
"""Return a validated string block message, falling back to the default.
Accepts two candidate fields (primary wins over secondary) so callers
can express field-priority differences between the two hook wire formats
without duplicating the type-check logic.
"""
raw = primary or secondary
return raw if isinstance(raw, str) and raw else _DEFAULT_BLOCK_MESSAGE
def _parse_response(event: str, stdout: str) -> Optional[Dict[str, Any]]:
"""Translate stdout JSON into a Hermes wire-shape dict.
@ -515,13 +527,9 @@ def _parse_response(event: str, stdout: str) -> Optional[Dict[str, Any]]:
if event == "pre_tool_call":
if data.get("action") == "block":
message = data.get("message") or data.get("reason") or ""
if isinstance(message, str) and message:
return {"action": "block", "message": message}
return {"action": "block", "message": _block_message(data.get("message"), data.get("reason"))}
if data.get("decision") == "block":
message = data.get("reason") or data.get("message") or ""
if isinstance(message, str) and message:
return {"action": "block", "message": message}
return {"action": "block", "message": _block_message(data.get("reason"), data.get("message"))}
return None
context = data.get("context")

280
agent/stream_diag.py Normal file
View file

@ -0,0 +1,280 @@
"""Stream diagnostics — per-attempt counters, exception chains, retry logging.
When a streaming chat-completions request dies mid-response, we want to
know why: which Cloudflare edge served the request, which OpenRouter
downstream provider answered, how many bytes/chunks we got before the
drop, the HTTP status, the underlying httpx error class. These helpers
collect that info and emit it both to ``agent.log`` (full detail) and to
the user-facing status line (compact).
All helpers are extracted from :class:`AIAgent` for cleanliness.
``run_agent`` keeps thin forwarder methods so existing call sites and
tests that patch ``run_agent.<helper>`` keep working.
"""
from __future__ import annotations
import logging
import time
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
# Per-attempt stream diagnostic headers. Lowercased; httpx returns
# CIMultiDict so case-insensitive lookups already work, but we read .get()
# on the dict from agent.log for free-form post-hoc analysis.
STREAM_DIAG_HEADERS = (
"cf-ray",
"cf-cache-status",
"x-openrouter-provider",
"x-openrouter-model",
"x-openrouter-id",
"x-request-id",
"x-vercel-id",
"via",
"server",
"x-forwarded-for",
)
def stream_diag_init() -> Dict[str, Any]:
"""Return a fresh per-attempt diagnostic dict.
Mutated in-place by the streaming functions and read from the retry
block when a stream dies. Lives on ``request_client_holder`` so it
survives across the closure boundary.
"""
return {
"started_at": time.time(),
"first_chunk_at": None,
"chunks": 0,
"bytes": 0,
"headers": {},
"http_status": None,
}
def stream_diag_capture_response(agent: Any, diag: Dict[str, Any], http_response: Any) -> None:
"""Snapshot interesting headers + HTTP status from the live stream.
Called once at stream open (before iterating chunks) so the metadata
survives even if the stream dies before any chunk arrives. Failures
are swallowed diag is best-effort.
"""
if http_response is None or not isinstance(diag, dict):
return
try:
diag["http_status"] = getattr(http_response, "status_code", None)
except Exception:
pass
try:
headers = getattr(http_response, "headers", None) or {}
captured: Dict[str, str] = {}
# Allow per-agent override of the headers list (back-compat).
target_headers = getattr(agent, "_STREAM_DIAG_HEADERS", STREAM_DIAG_HEADERS)
for name in target_headers:
try:
val = headers.get(name)
if val:
# Truncate single-value to keep log lines bounded.
captured[name] = str(val)[:120]
except Exception:
continue
diag["headers"] = captured
except Exception:
pass
def flatten_exception_chain(error: BaseException) -> str:
"""Return a compact ``Outer(msg) <- Inner(msg) <- ...`` rendering.
OpenAI SDK wraps httpx errors as ``APIConnectionError`` /
``APIError`` and only the wrapper's class is visible at the catch
site but the underlying ``RemoteProtocolError`` /
``ConnectError`` / ``ReadError`` is what tells us WHY the stream
died. Walks ``__cause__`` then ``__context__`` (deduped, max 4
deep) to surface the chain in one line.
"""
seen: List[BaseException] = []
link: Optional[BaseException] = error
while link is not None and len(seen) < 4:
if link in seen:
break
seen.append(link)
nxt = getattr(link, "__cause__", None) or getattr(
link, "__context__", None
)
if nxt is None or nxt is link:
break
link = nxt
parts: List[str] = []
for e in seen:
msg = str(e).strip().replace("\n", " ")
if len(msg) > 140:
msg = msg[:140] + ""
parts.append(f"{type(e).__name__}({msg})" if msg else type(e).__name__)
return " <- ".join(parts) if parts else type(error).__name__
def log_stream_retry(
agent: Any,
*,
kind: str,
error: BaseException,
attempt: int,
max_attempts: int,
mid_tool_call: bool,
diag: Optional[Dict[str, Any]] = None,
) -> None:
"""Record a transient stream-drop and retry to ``agent.log``.
Always logs a structured WARNING so users have a breadcrumb regardless
of UI verbosity. Subagents in particular benefit because their
retries no longer spam the parent's terminal — but the file log keeps
full detail (provider, error class, attempt, base_url, subagent_id).
When *diag* is provided (the per-attempt stream-diagnostic dict from
:func:`stream_diag_init`), the WARNING also captures upstream headers
(cf-ray, x-openrouter-provider, x-openrouter-id), HTTP status, bytes
streamed before the drop, and elapsed time on the dying attempt.
These are the breadcrumbs needed to answer "is one CF edge / one
downstream provider responsible, or is it random across runs?"
"""
try:
try:
_summary = agent._summarize_api_error(error)
except Exception:
_summary = str(error)
if _summary and len(_summary) > 240:
_summary = _summary[:240] + ""
# Inner-cause chain (httpx errors hide under openai.APIError).
try:
_chain = flatten_exception_chain(error)
except Exception:
_chain = type(error).__name__
# Per-attempt counters and upstream headers.
_now = time.time()
_bytes = 0
_chunks = 0
_elapsed = 0.0
_ttfb = None
_headers_repr = "-"
_http_status = "-"
if isinstance(diag, dict):
try:
_bytes = int(diag.get("bytes") or 0)
_chunks = int(diag.get("chunks") or 0)
_started = float(diag.get("started_at") or _now)
_elapsed = max(0.0, _now - _started)
_first = diag.get("first_chunk_at")
if _first is not None:
_ttfb = max(0.0, float(_first) - _started)
headers = diag.get("headers") or {}
if isinstance(headers, dict) and headers:
_headers_repr = " ".join(
f"{k}={v}" for k, v in headers.items()
)
if diag.get("http_status") is not None:
_http_status = str(diag.get("http_status"))
except Exception:
pass
logger.warning(
"Stream %s on attempt %s/%s — retrying. "
"subagent_id=%s depth=%s provider=%s base_url=%s "
"error_type=%s error=%s "
"chain=%s "
"http_status=%s bytes=%d chunks=%d elapsed=%.2fs ttfb=%s "
"upstream=[%s]",
kind,
attempt,
max_attempts,
getattr(agent, "_subagent_id", None) or "-",
getattr(agent, "_delegate_depth", 0),
agent.provider or "-",
agent.base_url or "-",
type(error).__name__,
_summary,
_chain,
_http_status,
_bytes,
_chunks,
_elapsed,
f"{_ttfb:.2f}s" if _ttfb is not None else "-",
_headers_repr,
extra={"mid_tool_call": mid_tool_call},
)
except Exception:
logger.debug("stream-retry log emit failed", exc_info=True)
def emit_stream_drop(
agent: Any,
*,
error: BaseException,
attempt: int,
max_attempts: int,
mid_tool_call: bool,
diag: Optional[Dict[str, Any]] = None,
) -> None:
"""Emit a single user-visible line for a stream drop+retry.
Both top-level agents and subagents announce drops in the UI the
parent prefixes subagent lines with ``[subagent-N]`` via ``log_prefix``
so they're easy to attribute. All cases also write a structured
WARNING to ``agent.log`` via :func:`log_stream_retry` with the full
diagnostic detail (subagent_id, provider, base_url, error_type,
cf-ray, x-openrouter-provider, bytes/chunks, elapsed) for post-hoc
analysis.
The user-visible status line is intentionally compact: provider,
error class, attempt N/M, plus ``after Xs`` when the stream dropped
mid-flight. Full diagnostic detail goes to ``agent.log`` only
``hermes logs --level WARNING | grep "Stream drop"`` to inspect.
"""
kind = "drop mid tool-call" if mid_tool_call else "drop"
log_stream_retry(
agent,
kind=kind,
error=error,
attempt=attempt,
max_attempts=max_attempts,
mid_tool_call=mid_tool_call,
diag=diag,
)
provider = agent.provider or "provider"
# Compose a brief "after Xs" suffix when we have timing data — helps
# the user distinguish "couldn't connect" (0s) from "died after 30s
# of streaming" (likely upstream idle-kill or proxy timeout).
_suffix = ""
if isinstance(diag, dict):
try:
started = diag.get("started_at")
if started is not None:
_suffix = f" after {max(0.0, time.time() - float(started)):.1f}s"
except Exception:
pass
try:
agent._emit_status(
f"⚠️ {provider} stream {kind} ({type(error).__name__}){_suffix} "
f"— reconnecting, retry {attempt}/{max_attempts}"
)
agent._touch_activity(
f"stream retry {attempt}/{max_attempts} "
f"after {type(error).__name__}"
)
except Exception:
pass
__all__ = [
"STREAM_DIAG_HEADERS",
"stream_diag_init",
"stream_diag_capture_response",
"flatten_exception_chain",
"log_stream_retry",
"emit_stream_drop",
]

333
agent/system_prompt.py Normal file
View file

@ -0,0 +1,333 @@
"""System-prompt assembly for :class:`AIAgent`.
The agent's system prompt is built once per session and reused across all
turns only context compression triggers a rebuild. This keeps the
upstream prefix cache warm. See ``hermes-agent-dev``'s
``references/system-prompt-invariant.md`` for the invariants and
``references/self-improvement-loop.md`` for how the background-review
fork inherits the cached prompt verbatim.
Three tiers are joined with ``\\n\\n``:
* ``stable`` identity (SOUL.md or DEFAULT_AGENT_IDENTITY), tool
guidance, computer-use guidance, nous subscription block, tool-use
enforcement guidance + per-model operational guidance, skills prompt,
alibaba model-name workaround, environment hints, platform hints.
* ``context`` caller-supplied ``system_message`` plus context files
(AGENTS.md / .cursorrules / etc.) discovered under ``TERMINAL_CWD``.
* ``volatile`` memory snapshot, USER.md profile, external memory
provider block, timestamp/session/model/provider line.
Pure helpers that read the agent's state. AIAgent keeps thin forwarders.
"""
from __future__ import annotations
import json
import os
from typing import Any, Dict, List, Optional
from agent.prompt_builder import (
DEFAULT_AGENT_IDENTITY,
GOOGLE_MODEL_OPERATIONAL_GUIDANCE,
HERMES_AGENT_HELP_GUIDANCE,
KANBAN_GUIDANCE,
MEMORY_GUIDANCE,
OPENAI_MODEL_EXECUTION_GUIDANCE,
PLATFORM_HINTS,
SESSION_SEARCH_GUIDANCE,
SKILLS_GUIDANCE,
TOOL_USE_ENFORCEMENT_GUIDANCE,
TOOL_USE_ENFORCEMENT_MODELS,
)
def _ra():
"""Lazy reference to the ``run_agent`` module.
Helpers like ``load_soul_md``, ``build_environment_hints``,
``build_context_files_prompt``, ``build_nous_subscription_prompt``,
``build_skills_system_prompt`` and ``get_toolset_for_tool`` are
imported into ``run_agent``'s namespace. Many tests
``patch("run_agent.load_soul_md", ...)``; if we imported them
directly here those patches would not reach us. Looking them up
through ``run_agent`` on every call preserves the patch contract.
"""
import run_agent
return run_agent
def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None) -> Dict[str, str]:
"""Assemble the system prompt as three ordered parts.
Returns a dict with three keys:
* ``stable`` identity, tool guidance, skills prompt,
environment hints, platform hints, model-family operational
guidance.
* ``context`` context files (AGENTS.md, .cursorrules, etc.)
and caller-supplied system_message.
* ``volatile`` memory snapshot, user profile, external
memory provider block, timestamp line.
Joined into a single string by :func:`build_system_prompt` and
cached on ``agent._cached_system_prompt`` for the lifetime of the
AIAgent. Hermes never re-renders parts of this string mid-
session that's the only way to keep upstream prompt caches
warm across turns.
"""
# Local import to avoid pulling model_tools at module load. Tests
# patch ``run_agent.get_toolset_for_tool`` and similar helpers, so
# we resolve through ``_ra()`` to honor those patches.
_r = _ra()
# ── Stable tier ────────────────────────────────────────────────
stable_parts: List[str] = []
# Try SOUL.md as primary identity unless the caller explicitly skipped it.
# Some execution modes (cron) still want HERMES_HOME persona while keeping
# cwd project instructions disabled.
_soul_loaded = False
if agent.load_soul_identity or not agent.skip_context_files:
_soul_content = _r.load_soul_md()
if _soul_content:
stable_parts.append(_soul_content)
_soul_loaded = True
if not _soul_loaded:
# Fallback to hardcoded identity
stable_parts.append(DEFAULT_AGENT_IDENTITY)
# Pointer to the hermes-agent skill + docs for user questions about Hermes itself.
stable_parts.append(HERMES_AGENT_HELP_GUIDANCE)
# Tool-aware behavioral guidance: only inject when the tools are loaded
tool_guidance = []
if "memory" in agent.valid_tool_names:
tool_guidance.append(MEMORY_GUIDANCE)
if "session_search" in agent.valid_tool_names:
tool_guidance.append(SESSION_SEARCH_GUIDANCE)
if "skill_manage" in agent.valid_tool_names:
tool_guidance.append(SKILLS_GUIDANCE)
# Kanban worker/orchestrator lifecycle — only present when the
# dispatcher spawned this process (kanban_show check_fn gates on
# HERMES_KANBAN_TASK env var). Normal chat sessions never see
# this block.
if "kanban_show" in agent.valid_tool_names:
tool_guidance.append(KANBAN_GUIDANCE)
if tool_guidance:
stable_parts.append(" ".join(tool_guidance))
# Computer-use (macOS) — goes in as its own block rather than being
# merged into tool_guidance because the content is multi-paragraph.
if "computer_use" in agent.valid_tool_names:
from agent.prompt_builder import COMPUTER_USE_GUIDANCE
stable_parts.append(COMPUTER_USE_GUIDANCE)
nous_subscription_prompt = _r.build_nous_subscription_prompt(agent.valid_tool_names)
if nous_subscription_prompt:
stable_parts.append(nous_subscription_prompt)
# Tool-use enforcement: tells the model to actually call tools instead
# of describing intended actions. Controlled by config.yaml
# agent.tool_use_enforcement:
# "auto" (default) — matches TOOL_USE_ENFORCEMENT_MODELS
# true — always inject (all models)
# false — never inject
# list — custom model-name substrings to match
if agent.valid_tool_names:
_enforce = agent._tool_use_enforcement
_inject = False
if _enforce is True or (isinstance(_enforce, str) and _enforce.lower() in {"true", "always", "yes", "on"}):
_inject = True
elif _enforce is False or (isinstance(_enforce, str) and _enforce.lower() in {"false", "never", "no", "off"}):
_inject = False
elif isinstance(_enforce, list):
model_lower = (agent.model or "").lower()
_inject = any(p.lower() in model_lower for p in _enforce if isinstance(p, str))
else:
# "auto" or any unrecognised value — use hardcoded defaults
model_lower = (agent.model or "").lower()
_inject = any(p in model_lower for p in TOOL_USE_ENFORCEMENT_MODELS)
if _inject:
stable_parts.append(TOOL_USE_ENFORCEMENT_GUIDANCE)
_model_lower = (agent.model or "").lower()
# Google model operational guidance (conciseness, absolute
# paths, parallel tool calls, verify-before-edit, etc.)
if "gemini" in _model_lower or "gemma" in _model_lower:
stable_parts.append(GOOGLE_MODEL_OPERATIONAL_GUIDANCE)
# OpenAI GPT/Codex execution discipline (tool persistence,
# prerequisite checks, verification, anti-hallucination).
if "gpt" in _model_lower or "codex" in _model_lower:
stable_parts.append(OPENAI_MODEL_EXECUTION_GUIDANCE)
has_skills_tools = any(name in agent.valid_tool_names for name in ['skills_list', 'skill_view', 'skill_manage'])
if has_skills_tools:
avail_toolsets = {
toolset
for toolset in (
_r.get_toolset_for_tool(tool_name) for tool_name in agent.valid_tool_names
)
if toolset
}
skills_prompt = _r.build_skills_system_prompt(
available_tools=agent.valid_tool_names,
available_toolsets=avail_toolsets,
)
else:
skills_prompt = ""
if skills_prompt:
stable_parts.append(skills_prompt)
# Alibaba Coding Plan API always returns "glm-4.7" as model name regardless
# of the requested model. Inject explicit model identity into the system prompt
# so the agent can correctly report which model it is (workaround for API bug).
# Stable for the lifetime of an agent instance — model and provider are fixed
# at construction time.
if agent.provider == "alibaba":
_model_short = agent.model.split("/")[-1] if "/" in agent.model else agent.model
stable_parts.append(
f"You are powered by the model named {_model_short}. "
f"The exact model ID is {agent.model}. "
f"When asked what model you are, always answer based on this information, "
f"not on any model name returned by the API."
)
# Environment hints (WSL, Termux, etc.) — tell the agent about the
# execution environment so it can translate paths and adapt behavior.
# Stable for the lifetime of the process.
_env_hints = _r.build_environment_hints()
if _env_hints:
stable_parts.append(_env_hints)
platform_key = (agent.platform or "").lower().strip()
if platform_key in PLATFORM_HINTS:
stable_parts.append(PLATFORM_HINTS[platform_key])
elif platform_key:
# Check plugin registry for platform-specific LLM guidance
try:
from gateway.platform_registry import platform_registry
_entry = platform_registry.get(platform_key)
if _entry and _entry.platform_hint:
stable_parts.append(_entry.platform_hint)
except Exception:
pass
# ── Context tier (cwd-dependent, may change between sessions) ─
context_parts: List[str] = []
# Note: ephemeral_system_prompt is NOT included here. It's injected at
# API-call time only so it stays out of the cached/stored system prompt.
if system_message is not None:
context_parts.append(system_message)
if not agent.skip_context_files:
# Use TERMINAL_CWD for context file discovery when set (gateway
# mode). The gateway process runs from the hermes-agent install
# dir, so os.getcwd() would pick up the repo's AGENTS.md and
# other dev files — inflating token usage by ~10k for no benefit.
_context_cwd = os.getenv("TERMINAL_CWD") or None
context_files_prompt = _r.build_context_files_prompt(
cwd=_context_cwd, skip_soul=_soul_loaded)
if context_files_prompt:
context_parts.append(context_files_prompt)
# ── Volatile tier (changes per session/turn — never cached) ───
volatile_parts: List[str] = []
if agent._memory_store:
if agent._memory_enabled:
mem_block = agent._memory_store.format_for_system_prompt("memory")
if mem_block:
volatile_parts.append(mem_block)
# USER.md is always included when enabled.
if agent._user_profile_enabled:
user_block = agent._memory_store.format_for_system_prompt("user")
if user_block:
volatile_parts.append(user_block)
# External memory provider system prompt block (additive to built-in)
if agent._memory_manager:
try:
_ext_mem_block = agent._memory_manager.build_system_prompt()
if _ext_mem_block:
volatile_parts.append(_ext_mem_block)
except Exception:
pass
from hermes_time import now as _hermes_now
now = _hermes_now()
timestamp_line = f"Conversation started: {now.strftime('%A, %B %d, %Y %I:%M %p')}"
if agent.pass_session_id and agent.session_id:
timestamp_line += f"\nSession ID: {agent.session_id}"
if agent.model:
timestamp_line += f"\nModel: {agent.model}"
if agent.provider:
timestamp_line += f"\nProvider: {agent.provider}"
volatile_parts.append(timestamp_line)
return {
"stable": "\n\n".join(p.strip() for p in stable_parts if p and p.strip()),
"context": "\n\n".join(p.strip() for p in context_parts if p and p.strip()),
"volatile": "\n\n".join(p.strip() for p in volatile_parts if p and p.strip()),
}
def build_system_prompt(agent: Any, system_message: Optional[str] = None) -> str:
"""Assemble the full system prompt from all layers.
Called once per session (cached on ``agent._cached_system_prompt``) and
only rebuilt after context compression events. This ensures the system
prompt is stable across all turns in a session, maximizing prefix cache
hits.
Layers are ordered cache-friendly: stable identity/guidance first,
then session-stable context files, then per-call volatile content
(memory, USER profile, timestamp). The whole string is treated as
one cached block Hermes never rebuilds or reinjects parts of it
mid-session, which is the only way to keep upstream prompt caches
warm across turns.
"""
parts = build_system_prompt_parts(agent, system_message=system_message)
return "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)
def invalidate_system_prompt(agent: Any) -> None:
"""Invalidate the cached system prompt, forcing a rebuild on the next turn.
Called after context compression events. Also reloads memory from disk
so the rebuilt prompt captures any writes from this session.
"""
agent._cached_system_prompt = None
if agent._memory_store:
agent._memory_store.load_from_disk()
def format_tools_for_system_message(agent: Any) -> str:
"""Format tool definitions for the system message in the trajectory format.
Returns:
str: JSON string representation of tool definitions
"""
if not agent.tools:
return "[]"
# Convert tool definitions to the format expected in trajectories
formatted_tools = []
for tool in agent.tools:
func = tool["function"]
formatted_tool = {
"name": func["name"],
"description": func.get("description", ""),
"parameters": func.get("parameters", {}),
"required": None # Match the format in the example
}
formatted_tools.append(formatted_tool)
return json.dumps(formatted_tools, ensure_ascii=False)
__all__ = [
"build_system_prompt_parts",
"build_system_prompt",
"invalidate_system_prompt",
"format_tools_for_system_message",
]

View file

@ -0,0 +1,336 @@
"""Tool-dispatch helpers — parallelism gating, multimodal envelopes, mutation tracking.
Pure module-level utilities extracted from ``run_agent.py``:
* ``_is_destructive_command`` terminal-command heuristic used to gate
parallel batch dispatch.
* ``_should_parallelize_tool_batch`` / ``_extract_parallel_scope_path`` /
``_paths_overlap`` the rules engine deciding when a multi-tool batch
can run concurrently.
* ``_is_multimodal_tool_result`` / ``_multimodal_text_summary`` /
``_append_subdir_hint_to_multimodal`` envelope helpers for the
``{"_multimodal": True, "content": [...], "text_summary": ...}`` dict
shape returned by tools like ``computer_use``.
* ``_extract_file_mutation_targets`` / ``_extract_error_preview``
per-turn file-mutation verifier inputs.
* ``_trajectory_normalize_msg`` strip image blobs from a message for
trajectory saving.
All helpers are stateless. ``run_agent`` re-exports each name so existing
``from run_agent import ...`` imports in tests and other modules keep
working unchanged.
"""
from __future__ import annotations
import json
import logging
import os
import re
from pathlib import Path
from typing import Any, Dict, List, Optional
from agent.tool_result_classification import (
FILE_MUTATING_TOOL_NAMES as _FILE_MUTATING_TOOLS,
)
logger = logging.getLogger(__name__)
# Tools that must never run concurrently (interactive / user-facing).
# When any of these appear in a batch, we fall back to sequential execution.
_NEVER_PARALLEL_TOOLS = frozenset({"clarify"})
# Read-only tools with no shared mutable session state.
_PARALLEL_SAFE_TOOLS = frozenset({
"ha_get_state",
"ha_list_entities",
"ha_list_services",
"read_file",
"search_files",
"session_search",
"skill_view",
"skills_list",
"vision_analyze",
"web_extract",
"web_search",
})
# File tools can run concurrently when they target independent paths.
_PATH_SCOPED_TOOLS = frozenset({"read_file", "write_file", "patch"})
# Patterns that indicate a terminal command may modify/delete files.
_DESTRUCTIVE_PATTERNS = re.compile(
r"""(?:^|\s|&&|\|\||;|`)(?:
rm\s|rmdir\s|
cp\s|install\s|
mv\s|
sed\s+-i|
truncate\s|
dd\s|
shred\s|
git\s+(?:reset|clean|checkout)\s
)""",
re.VERBOSE,
)
# Output redirects that overwrite files (> but not >>)
_REDIRECT_OVERWRITE = re.compile(r'[^>]>[^>]|^>[^>]')
def _is_destructive_command(cmd: str) -> bool:
"""Heuristic: does this terminal command look like it modifies/deletes files?"""
if not cmd:
return False
if _DESTRUCTIVE_PATTERNS.search(cmd):
return True
if _REDIRECT_OVERWRITE.search(cmd):
return True
return False
def _is_mcp_tool_parallel_safe(tool_name: str) -> bool:
"""Check if an MCP tool comes from a server with parallel tool calls enabled.
Lazy-imports from ``tools.mcp_tool`` to avoid circular dependencies.
Returns False if the MCP module is not available.
"""
try:
from tools.mcp_tool import is_mcp_tool_parallel_safe
return is_mcp_tool_parallel_safe(tool_name)
except Exception:
return False
def _should_parallelize_tool_batch(tool_calls) -> bool:
"""Return True when a tool-call batch is safe to run concurrently."""
if len(tool_calls) <= 1:
return False
tool_names = [tc.function.name for tc in tool_calls]
if any(name in _NEVER_PARALLEL_TOOLS for name in tool_names):
return False
reserved_paths: list[Path] = []
for tool_call in tool_calls:
tool_name = tool_call.function.name
try:
function_args = json.loads(tool_call.function.arguments)
except Exception:
logging.debug(
"Could not parse args for %s — defaulting to sequential; raw=%s",
tool_name,
tool_call.function.arguments[:200],
)
return False
if not isinstance(function_args, dict):
logging.debug(
"Non-dict args for %s (%s) — defaulting to sequential",
tool_name,
type(function_args).__name__,
)
return False
if tool_name in _PATH_SCOPED_TOOLS:
scoped_path = _extract_parallel_scope_path(tool_name, function_args)
if scoped_path is None:
return False
if any(_paths_overlap(scoped_path, existing) for existing in reserved_paths):
return False
reserved_paths.append(scoped_path)
continue
if tool_name not in _PARALLEL_SAFE_TOOLS:
# Check if it's an MCP tool from a server that opted into parallel calls.
if not _is_mcp_tool_parallel_safe(tool_name):
return False
return True
def _extract_parallel_scope_path(tool_name: str, function_args: dict) -> Optional[Path]:
"""Return the normalized file target for path-scoped tools."""
if tool_name not in _PATH_SCOPED_TOOLS:
return None
raw_path = function_args.get("path")
if not isinstance(raw_path, str) or not raw_path.strip():
return None
expanded = Path(raw_path).expanduser()
if expanded.is_absolute():
return Path(os.path.abspath(str(expanded)))
# Avoid resolve(); the file may not exist yet.
return Path(os.path.abspath(str(Path.cwd() / expanded)))
def _paths_overlap(left: Path, right: Path) -> bool:
"""Return True when two paths may refer to the same subtree."""
left_parts = left.parts
right_parts = right.parts
if not left_parts or not right_parts:
# Empty paths shouldn't reach here (guarded upstream), but be safe.
return bool(left_parts) == bool(right_parts) and bool(left_parts)
common_len = min(len(left_parts), len(right_parts))
return left_parts[:common_len] == right_parts[:common_len]
def _is_multimodal_tool_result(value: Any) -> bool:
"""True if the value is a multimodal tool result envelope.
Multimodal handlers (e.g. tools/computer_use) return a dict with
`_multimodal=True`, a `content` key holding OpenAI-style content
parts, and an optional `text_summary` for string-only fallbacks.
"""
return (
isinstance(value, dict)
and value.get("_multimodal") is True
and isinstance(value.get("content"), list)
)
def _multimodal_text_summary(value: Any) -> str:
"""Extract a plain text view of a multimodal tool result.
Used wherever downstream code needs a string logging, previews,
persistence size heuristics, fall-back content for providers that
don't support multipart tool messages.
"""
if _is_multimodal_tool_result(value):
if value.get("text_summary"):
return str(value["text_summary"])
parts = []
for p in value.get("content") or []:
if isinstance(p, dict) and p.get("type") == "text":
parts.append(str(p.get("text", "")))
if parts:
return "\n".join(parts)
return "[multimodal tool result]"
if isinstance(value, str):
return value
try:
return json.dumps(value, default=str)
except Exception:
return str(value)
def _append_subdir_hint_to_multimodal(value: Dict[str, Any], hint: str) -> None:
"""Mutate a multimodal tool-result envelope to append a subdir hint.
The hint is added to the first text part so the model sees it; image
parts are left untouched. `text_summary` is also updated for
string-fallback callers.
"""
if not _is_multimodal_tool_result(value):
return
parts = value.get("content") or []
for p in parts:
if isinstance(p, dict) and p.get("type") == "text":
p["text"] = str(p.get("text", "")) + hint
break
else:
parts.insert(0, {"type": "text", "text": hint})
value["content"] = parts
if isinstance(value.get("text_summary"), str):
value["text_summary"] = value["text_summary"] + hint
def _extract_file_mutation_targets(tool_name: str, args: Dict[str, Any]) -> List[str]:
"""Return the file paths a ``write_file`` or ``patch`` call is targeting.
For ``write_file`` and ``patch`` in replace mode this is just ``args["path"]``.
For ``patch`` in V4A patch mode we parse the patch content for
``*** Update File:`` / ``*** Add File:`` / ``*** Delete File:`` headers so
the verifier can track each file in a multi-file patch separately.
"""
if tool_name not in _FILE_MUTATING_TOOLS:
return []
if tool_name == "write_file":
p = args.get("path")
return [str(p)] if p else []
# tool_name == "patch"
mode = args.get("mode") or "replace"
if mode == "replace":
p = args.get("path")
return [str(p)] if p else []
if mode == "patch":
body = args.get("patch") or ""
if not isinstance(body, str) or not body:
return []
paths: List[str] = []
for _m in re.finditer(
r'^\*\*\*\s+(?:Update|Add|Delete)\s+File:\s*(.+)$',
body,
re.MULTILINE,
):
p = _m.group(1).strip()
if p:
paths.append(p)
return paths
return []
def _extract_error_preview(result: Any, max_len: int = 180) -> str:
"""Pull a one-line error summary out of a tool result for footer display."""
text = _multimodal_text_summary(result) if result is not None else ""
if not isinstance(text, str):
try:
text = str(text)
except Exception:
return ""
# Try to parse JSON and pull the ``error`` field — tool handlers return
# ``{"success": false, "error": "..."}``; raw string wins if parse fails.
stripped = text.strip()
if stripped.startswith("{"):
try:
data = json.loads(stripped)
if isinstance(data, dict) and isinstance(data.get("error"), str):
text = data["error"]
except Exception:
pass
# Collapse whitespace, trim to max_len.
text = " ".join(text.split())
if len(text) > max_len:
text = text[: max_len - 1] + ""
return text
def _trajectory_normalize_msg(msg: Dict[str, Any]) -> Dict[str, Any]:
"""Strip image blobs from a message for trajectory saving.
Returns a shallow copy with multimodal tool results replaced by their
text_summary, and image parts in content lists replaced by
`[screenshot]` placeholders. Keeps the message schema otherwise intact.
"""
if not isinstance(msg, dict):
return msg
content = msg.get("content")
if _is_multimodal_tool_result(content):
return {**msg, "content": _multimodal_text_summary(content)}
if isinstance(content, list):
cleaned = []
for p in content:
if isinstance(p, dict) and p.get("type") in {"image", "image_url", "input_image"}:
cleaned.append({"type": "text", "text": "[screenshot]"})
else:
cleaned.append(p)
return {**msg, "content": cleaned}
return msg
__all__ = [
"_NEVER_PARALLEL_TOOLS",
"_PARALLEL_SAFE_TOOLS",
"_PATH_SCOPED_TOOLS",
"_DESTRUCTIVE_PATTERNS",
"_REDIRECT_OVERWRITE",
"_is_destructive_command",
"_should_parallelize_tool_batch",
"_extract_parallel_scope_path",
"_paths_overlap",
"_is_multimodal_tool_result",
"_multimodal_text_summary",
"_append_subdir_hint_to_multimodal",
"_extract_file_mutation_targets",
"_extract_error_preview",
"_trajectory_normalize_msg",
]

920
agent/tool_executor.py Normal file
View file

@ -0,0 +1,920 @@
"""Tool-call execution — sequential and concurrent dispatch.
Both AIAgent methods (``_execute_tool_calls_sequential`` and
``_execute_tool_calls_concurrent``) live here as module-level
functions that take the parent ``AIAgent`` as their first argument.
``run_agent`` keeps thin wrappers so existing call sites work; tests
that patch ``run_agent._set_interrupt`` are honored because the
extracted functions reach back through the ``run_agent`` module via
``_ra()`` for that symbol.
"""
from __future__ import annotations
import concurrent.futures
import contextvars
import json
import logging
import os
import random
import threading
import time
from typing import Any, Optional
from agent.display import (
KawaiiSpinner,
build_tool_preview as _build_tool_preview,
get_cute_tool_message as _get_cute_tool_message_impl,
get_tool_emoji as _get_tool_emoji,
_detect_tool_failure,
)
from agent.tool_guardrails import ToolGuardrailDecision
from agent.tool_dispatch_helpers import (
_is_destructive_command,
_is_multimodal_tool_result,
_multimodal_text_summary,
_append_subdir_hint_to_multimodal,
)
from tools.terminal_tool import (
_get_approval_callback,
_get_sudo_password_callback,
set_approval_callback as _set_approval_callback,
set_sudo_password_callback as _set_sudo_password_callback,
get_active_env,
)
from tools.tool_result_storage import (
maybe_persist_tool_result,
enforce_turn_budget,
)
logger = logging.getLogger(__name__)
# Maximum number of concurrent worker threads for parallel tool execution.
# Mirrors the constant in ``run_agent`` for tests/imports that look here.
_MAX_TOOL_WORKERS = 8
def _ra():
"""Lazy reference to ``run_agent`` so patches like ``run_agent._set_interrupt`` work."""
import run_agent
return run_agent
def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
"""Execute multiple tool calls concurrently using a thread pool.
Results are collected in the original tool-call order and appended to
messages so the API sees them in the expected sequence.
"""
tool_calls = assistant_message.tool_calls
num_tools = len(tool_calls)
# ── Pre-flight: interrupt check ──────────────────────────────────
if agent._interrupt_requested:
print(f"{agent.log_prefix}⚡ Interrupt: skipping {num_tools} tool call(s)")
for tc in tool_calls:
messages.append({
"role": "tool",
"name": tc.function.name,
"content": f"[Tool execution cancelled — {tc.function.name} was skipped due to user interrupt]",
"tool_call_id": tc.id,
})
return
# ── Parse args + pre-execution bookkeeping ───────────────────────
parsed_calls = [] # list of (tool_call, function_name, function_args)
for tool_call in tool_calls:
function_name = tool_call.function.name
# Reset nudge counters
if function_name == "memory":
agent._turns_since_memory = 0
elif function_name == "skill_manage":
agent._iters_since_skill = 0
try:
function_args = json.loads(tool_call.function.arguments)
except json.JSONDecodeError:
function_args = {}
if not isinstance(function_args, dict):
function_args = {}
# Checkpoint for file-mutating tools
if function_name in {"write_file", "patch"} and agent._checkpoint_mgr.enabled:
try:
file_path = function_args.get("path", "")
if file_path:
work_dir = agent._checkpoint_mgr.get_working_dir_for_path(file_path)
agent._checkpoint_mgr.ensure_checkpoint(work_dir, f"before {function_name}")
except Exception:
pass
# Checkpoint before destructive terminal commands
if function_name == "terminal" and agent._checkpoint_mgr.enabled:
try:
cmd = function_args.get("command", "")
if _is_destructive_command(cmd):
cwd = function_args.get("workdir") or os.getenv("TERMINAL_CWD", os.getcwd())
agent._checkpoint_mgr.ensure_checkpoint(
cwd, f"before terminal: {cmd[:60]}"
)
except Exception:
pass
block_result = None
blocked_by_guardrail = False
try:
from hermes_cli.plugins import get_pre_tool_call_block_message
block_message = get_pre_tool_call_block_message(
function_name, function_args, task_id=effective_task_id or "",
)
except Exception:
block_message = None
if block_message is not None:
block_result = json.dumps({"error": block_message}, ensure_ascii=False)
else:
guardrail_decision = agent._tool_guardrails.before_call(function_name, function_args)
if not guardrail_decision.allows_execution:
block_result = agent._guardrail_block_result(guardrail_decision)
blocked_by_guardrail = True
parsed_calls.append((tool_call, function_name, function_args, block_result, blocked_by_guardrail))
# ── Logging / callbacks ──────────────────────────────────────────
tool_names_str = ", ".join(name for _, name, _, _, _ in parsed_calls)
if not agent.quiet_mode:
print(f" ⚡ Concurrent: {num_tools} tool calls — {tool_names_str}")
for i, (tc, name, args, block_result, blocked_by_guardrail) in enumerate(parsed_calls, 1):
args_str = json.dumps(args, ensure_ascii=False)
if agent.verbose_logging:
print(f" 📞 Tool {i}: {name}({list(args.keys())})")
print(agent._wrap_verbose("Args: ", json.dumps(args, indent=2, ensure_ascii=False)))
else:
args_preview = args_str[:agent.log_prefix_chars] + "..." if len(args_str) > agent.log_prefix_chars else args_str
print(f" 📞 Tool {i}: {name}({list(args.keys())}) - {args_preview}")
for tc, name, args, block_result, blocked_by_guardrail in parsed_calls:
if block_result is not None:
continue
if agent.tool_progress_callback:
try:
preview = _build_tool_preview(name, args)
agent.tool_progress_callback("tool.started", name, preview, args)
except Exception as cb_err:
logging.debug(f"Tool progress callback error: {cb_err}")
for tc, name, args, block_result, blocked_by_guardrail in parsed_calls:
if block_result is not None:
continue
if agent.tool_start_callback:
try:
agent.tool_start_callback(tc.id, name, args)
except Exception as cb_err:
logging.debug(f"Tool start callback error: {cb_err}")
# ── Concurrent execution ─────────────────────────────────────────
# Each slot holds (function_name, function_args, function_result, duration, error_flag, blocked_flag)
results = [None] * num_tools
for i, (tc, name, args, block_result, blocked_by_guardrail) in enumerate(parsed_calls):
if block_result is not None:
results[i] = (name, args, block_result, 0.0, True, True)
# Touch activity before launching workers so the gateway knows
# we're executing tools (not stuck).
agent._current_tool = tool_names_str
agent._touch_activity(f"executing {num_tools} tools concurrently: {tool_names_str}")
# Capture CLI callbacks from the agent thread so worker threads can
# register them locally. Without this, _get_approval_callback() in
# terminal_tool returns None in ThreadPoolExecutor workers, causing
# the dangerous-command prompt to fall back to input() — which
# deadlocks against prompt_toolkit's raw terminal mode (#13617).
_parent_approval_cb = _get_approval_callback()
_parent_sudo_cb = _get_sudo_password_callback()
def _run_tool(index, tool_call, function_name, function_args):
"""Worker function executed in a thread."""
# Register this worker tid so the agent can fan out an interrupt
# to it — see AIAgent.interrupt(). Must happen first thing, and
# must be paired with discard + clear in the finally block.
_worker_tid = threading.current_thread().ident
with agent._tool_worker_threads_lock:
agent._tool_worker_threads.add(_worker_tid)
# Race: if the agent was interrupted between fan-out (which
# snapshotted an empty/earlier set) and our registration, apply
# the interrupt to our own tid now so is_interrupted() inside
# the tool returns True on the next poll.
if agent._interrupt_requested:
try:
_ra()._set_interrupt(True, _worker_tid)
except Exception:
pass
# Set the activity callback on THIS worker thread so
# _wait_for_process (terminal commands) can fire heartbeats.
# The callback is thread-local; the main thread's callback
# is invisible to worker threads.
try:
from tools.environments.base import set_activity_callback
set_activity_callback(agent._touch_activity)
except Exception:
pass
# Propagate approval/sudo callbacks to this worker thread.
# Mirrors cli.py run_agent() pattern (GHSA-qg5c-hvr5-hjgr).
if _parent_approval_cb is not None:
try:
_set_approval_callback(_parent_approval_cb)
except Exception:
pass
if _parent_sudo_cb is not None:
try:
_set_sudo_password_callback(_parent_sudo_cb)
except Exception:
pass
start = time.time()
try:
result = agent._invoke_tool(
function_name,
function_args,
effective_task_id,
tool_call.id,
messages=messages,
pre_tool_block_checked=True,
)
except Exception as tool_error:
result = f"Error executing tool '{function_name}': {tool_error}"
logger.error("_invoke_tool raised for %s: %s", function_name, tool_error, exc_info=True)
duration = time.time() - start
is_error, _ = _detect_tool_failure(function_name, result)
if is_error:
logger.info("tool %s failed (%.2fs): %s", function_name, duration, result[:200])
else:
logger.info("tool %s completed (%.2fs, %d chars)", function_name, duration, len(result))
results[index] = (function_name, function_args, result, duration, is_error, False)
# Tear down worker-tid tracking. Clear any interrupt bit we may
# have set so the next task scheduled onto this recycled tid
# starts with a clean slate.
with agent._tool_worker_threads_lock:
agent._tool_worker_threads.discard(_worker_tid)
try:
_ra()._set_interrupt(False, _worker_tid)
except Exception:
pass
# Clear thread-local callbacks so a recycled worker thread
# doesn't hold stale references to a disposed CLI instance.
try:
_set_approval_callback(None)
_set_sudo_password_callback(None)
except Exception:
pass
# Start spinner for CLI mode (skip when TUI handles tool progress)
spinner = None
if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
face = random.choice(KawaiiSpinner.get_waiting_faces())
spinner = KawaiiSpinner(f"{face} ⚡ running {num_tools} tools concurrently", spinner_type='dots', print_fn=agent._print_fn)
spinner.start()
try:
runnable_calls = [
(i, tc, name, args)
for i, (tc, name, args, block_result, blocked_by_guardrail) in enumerate(parsed_calls)
if block_result is None
]
futures = []
if runnable_calls:
max_workers = min(len(runnable_calls), _MAX_TOOL_WORKERS)
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
for i, tc, name, args in runnable_calls:
# Propagate ContextVars (e.g. _approval_session_key); mirrors asyncio.to_thread.
ctx = contextvars.copy_context()
f = executor.submit(ctx.run, _run_tool, i, tc, name, args)
futures.append(f)
# Wait for all to complete with periodic heartbeats so the
# gateway's inactivity monitor doesn't kill us during long
# concurrent tool batches. Also check for user interrupts
# so we don't block indefinitely when the user sends /stop
# or a new message during concurrent tool execution.
_conc_start = time.time()
_interrupt_logged = False
while True:
done, not_done = concurrent.futures.wait(
futures, timeout=5.0,
)
if not not_done:
break
# Check for interrupt — the per-thread interrupt signal
# already causes individual tools (terminal, execute_code)
# to abort, but tools without interrupt checks (web_search,
# read_file) will run to completion. Cancel any futures
# that haven't started yet so we don't block on them.
if agent._interrupt_requested:
if not _interrupt_logged:
_interrupt_logged = True
agent._vprint(
f"{agent.log_prefix}⚡ Interrupt: cancelling "
f"{len(not_done)} pending concurrent tool(s)",
force=True,
)
for f in not_done:
f.cancel()
# Give already-running tools a moment to notice the
# per-thread interrupt signal and exit gracefully.
concurrent.futures.wait(not_done, timeout=3.0)
break
_conc_elapsed = int(time.time() - _conc_start)
# Heartbeat every ~30s (6 × 5s poll intervals)
if _conc_elapsed > 0 and _conc_elapsed % 30 < 6:
_still_running = [
parsed_calls[futures.index(f)][1]
for f in not_done
if f in futures
]
agent._touch_activity(
f"concurrent tools running ({_conc_elapsed}s, "
f"{len(not_done)} remaining: {', '.join(_still_running[:3])})"
)
finally:
if spinner:
# Build a summary message for the spinner stop
completed = sum(1 for r in results if r is not None)
total_dur = sum(r[3] for r in results if r is not None)
spinner.stop(f"{completed}/{num_tools} tools completed in {total_dur:.1f}s total")
# ── Post-execution: display per-tool results ─────────────────────
for i, (tc, name, args, block_result, blocked_by_guardrail) in enumerate(parsed_calls):
r = results[i]
blocked = False
if r is None:
# Tool was cancelled (interrupt) or thread didn't return
if agent._interrupt_requested:
function_result = f"[Tool execution cancelled — {name} was skipped due to user interrupt]"
else:
function_result = f"Error executing tool '{name}': thread did not return a result"
tool_duration = 0.0
else:
function_name, function_args, function_result, tool_duration, is_error, blocked = r
if not blocked:
function_result = agent._append_guardrail_observation(
function_name,
function_args,
function_result,
failed=is_error,
)
if is_error:
_err_text = _multimodal_text_summary(function_result)
result_preview = _err_text[:200] if len(_err_text) > 200 else _err_text
logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
# Track file-mutation outcome for the turn-end verifier.
# `blocked` calls never actually ran — don't let a guardrail
# block count as either a failure or a success.
if not blocked:
try:
agent._record_file_mutation_result(
function_name, function_args, function_result, is_error,
)
except Exception as _ver_err:
logging.debug("file-mutation verifier record failed: %s", _ver_err)
if not blocked and agent.tool_progress_callback:
try:
agent.tool_progress_callback(
"tool.completed", function_name, None, None,
duration=tool_duration, is_error=is_error,
)
except Exception as cb_err:
logging.debug(f"Tool progress callback error: {cb_err}")
if agent.verbose_logging:
logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s")
logging.debug(f"Tool result ({len(function_result)} chars): {function_result}")
# Print cute message per tool
if agent._should_emit_quiet_tool_messages():
cute_msg = _get_cute_tool_message_impl(name, args, tool_duration, result=function_result)
agent._safe_print(f" {cute_msg}")
elif not agent.quiet_mode:
_preview_str = _multimodal_text_summary(function_result)
if agent.verbose_logging:
print(f" ✅ Tool {i+1} completed in {tool_duration:.2f}s")
print(agent._wrap_verbose("Result: ", _preview_str))
else:
response_preview = _preview_str[:agent.log_prefix_chars] + "..." if len(_preview_str) > agent.log_prefix_chars else _preview_str
print(f" ✅ Tool {i+1} completed in {tool_duration:.2f}s - {response_preview}")
agent._current_tool = None
agent._touch_activity(f"tool completed: {name} ({tool_duration:.1f}s)")
if not blocked and agent.tool_complete_callback:
try:
agent.tool_complete_callback(tc.id, name, args, function_result)
except Exception as cb_err:
logging.debug(f"Tool complete callback error: {cb_err}")
function_result = maybe_persist_tool_result(
content=function_result,
tool_name=name,
tool_use_id=tc.id,
env=get_active_env(effective_task_id),
) if not _is_multimodal_tool_result(function_result) else function_result
subdir_hints = agent._subdirectory_hints.check_tool_call(name, args)
if subdir_hints:
if _is_multimodal_tool_result(function_result):
# Append the hint to the text summary part so the model
# still sees it; don't touch the image blocks.
_append_subdir_hint_to_multimodal(function_result, subdir_hints)
else:
function_result += subdir_hints
# Unwrap _multimodal dicts to an OpenAI-style content list so any
# vision-capable provider receives [{type:text},{type:image_url}]
# rather than a raw Python dict. The Anthropic adapter already
# accepts content lists; vision-capable OpenAI-compatible servers
# (mlx-vlm, GPT-4o, …) accept image_url in tool messages natively.
# Text-only servers get a string-safe fallback here so a rejected
# image tool result never poisons canonical session history.
# String results pass through unchanged.
_tool_content = agent._tool_result_content_for_active_model(name, function_result)
tool_msg = {
"role": "tool",
"name": name,
"content": _tool_content,
"tool_call_id": tc.id,
}
messages.append(tool_msg)
# ── Per-tool /steer drain ───────────────────────────────────
# Same as the sequential path: drain between each collected
# result so the steer lands as early as possible.
agent._apply_pending_steer_to_tool_results(messages, 1)
# ── Per-turn aggregate budget enforcement ─────────────────────────
num_tools = len(parsed_calls)
if num_tools > 0:
turn_tool_msgs = messages[-num_tools:]
enforce_turn_budget(turn_tool_msgs, env=get_active_env(effective_task_id))
# ── /steer injection ──────────────────────────────────────────────
# Append any pending user steer text to the last tool result so the
# agent sees it on its next iteration. Runs AFTER budget enforcement
# so the steer marker is never truncated. See steer() for details.
if num_tools > 0:
agent._apply_pending_steer_to_tool_results(messages, num_tools)
def execute_tool_calls_sequential(agent, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
"""Execute tool calls sequentially (original behavior). Used for single calls or interactive tools."""
for i, tool_call in enumerate(assistant_message.tool_calls, 1):
# SAFETY: check interrupt BEFORE starting each tool.
# If the user sent "stop" during a previous tool's execution,
# do NOT start any more tools -- skip them all immediately.
if agent._interrupt_requested:
remaining_calls = assistant_message.tool_calls[i-1:]
if remaining_calls:
agent._vprint(f"{agent.log_prefix}⚡ Interrupt: skipping {len(remaining_calls)} tool call(s)", force=True)
for skipped_tc in remaining_calls:
skipped_name = skipped_tc.function.name
skip_msg = {
"role": "tool",
"name": skipped_name,
"content": f"[Tool execution cancelled — {skipped_name} was skipped due to user interrupt]",
"tool_call_id": skipped_tc.id,
}
messages.append(skip_msg)
break
function_name = tool_call.function.name
try:
function_args = json.loads(tool_call.function.arguments)
except json.JSONDecodeError as e:
logging.warning(f"Unexpected JSON error after validation: {e}")
function_args = {}
if not isinstance(function_args, dict):
function_args = {}
# Check plugin hooks for a block directive before executing.
_block_msg: Optional[str] = None
try:
from hermes_cli.plugins import get_pre_tool_call_block_message
_block_msg = get_pre_tool_call_block_message(
function_name, function_args, task_id=effective_task_id or "",
)
except Exception:
pass
_guardrail_block_decision: ToolGuardrailDecision | None = None
if _block_msg is None:
guardrail_decision = agent._tool_guardrails.before_call(function_name, function_args)
if not guardrail_decision.allows_execution:
_guardrail_block_decision = guardrail_decision
_execution_blocked = _block_msg is not None or _guardrail_block_decision is not None
if _execution_blocked:
# Tool blocked by plugin or guardrail policy — skip counters,
# callbacks, checkpointing, activity mutation, and real execution.
pass
# Reset nudge counters when the relevant tool is actually used
elif function_name == "memory":
agent._turns_since_memory = 0
elif function_name == "skill_manage":
agent._iters_since_skill = 0
if not agent.quiet_mode:
args_str = json.dumps(function_args, ensure_ascii=False)
if agent.verbose_logging:
print(f" 📞 Tool {i}: {function_name}({list(function_args.keys())})")
print(agent._wrap_verbose("Args: ", json.dumps(function_args, indent=2, ensure_ascii=False)))
else:
args_preview = args_str[:agent.log_prefix_chars] + "..." if len(args_str) > agent.log_prefix_chars else args_str
print(f" 📞 Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}")
if not _execution_blocked:
agent._current_tool = function_name
agent._touch_activity(f"executing tool: {function_name}")
# Set activity callback for long-running tool execution (terminal
# commands, etc.) so the gateway's inactivity monitor doesn't kill
# the agent while a command is running.
if not _execution_blocked:
try:
from tools.environments.base import set_activity_callback
set_activity_callback(agent._touch_activity)
except Exception:
pass
if not _execution_blocked and agent.tool_progress_callback:
try:
preview = _build_tool_preview(function_name, function_args)
agent.tool_progress_callback("tool.started", function_name, preview, function_args)
except Exception as cb_err:
logging.debug(f"Tool progress callback error: {cb_err}")
if not _execution_blocked and agent.tool_start_callback:
try:
agent.tool_start_callback(tool_call.id, function_name, function_args)
except Exception as cb_err:
logging.debug(f"Tool start callback error: {cb_err}")
# Checkpoint: snapshot working dir before file-mutating tools
if not _execution_blocked and function_name in {"write_file", "patch"} and agent._checkpoint_mgr.enabled:
try:
file_path = function_args.get("path", "")
if file_path:
work_dir = agent._checkpoint_mgr.get_working_dir_for_path(file_path)
agent._checkpoint_mgr.ensure_checkpoint(
work_dir, f"before {function_name}"
)
except Exception:
pass # never block tool execution
# Checkpoint before destructive terminal commands
if not _execution_blocked and function_name == "terminal" and agent._checkpoint_mgr.enabled:
try:
cmd = function_args.get("command", "")
if _is_destructive_command(cmd):
cwd = function_args.get("workdir") or os.getenv("TERMINAL_CWD", os.getcwd())
agent._checkpoint_mgr.ensure_checkpoint(
cwd, f"before terminal: {cmd[:60]}"
)
except Exception:
pass # never block tool execution
tool_start_time = time.time()
if _block_msg is not None:
# Tool blocked by plugin policy — return error without executing.
function_result = json.dumps({"error": _block_msg}, ensure_ascii=False)
tool_duration = 0.0
elif _guardrail_block_decision is not None:
# Tool blocked by tool-loop guardrail — synthesize exactly one
# tool result for the original tool_call_id without executing.
function_result = agent._guardrail_block_result(_guardrail_block_decision)
tool_duration = 0.0
elif function_name == "todo":
from tools.todo_tool import todo_tool as _todo_tool
function_result = _todo_tool(
todos=function_args.get("todos"),
merge=function_args.get("merge", False),
store=agent._todo_store,
)
tool_duration = time.time() - tool_start_time
if agent._should_emit_quiet_tool_messages():
agent._vprint(f" {_get_cute_tool_message_impl('todo', function_args, tool_duration, result=function_result)}")
elif function_name == "session_search":
session_db = agent._get_session_db_for_recall()
if not session_db:
from hermes_state import format_session_db_unavailable
function_result = json.dumps({"success": False, "error": format_session_db_unavailable()})
else:
from tools.session_search_tool import session_search as _session_search
function_result = _session_search(
query=function_args.get("query", ""),
role_filter=function_args.get("role_filter"),
limit=function_args.get("limit", 3),
db=session_db,
current_session_id=agent.session_id,
)
tool_duration = time.time() - tool_start_time
if agent._should_emit_quiet_tool_messages():
agent._vprint(f" {_get_cute_tool_message_impl('session_search', function_args, tool_duration, result=function_result)}")
elif function_name == "memory":
target = function_args.get("target", "memory")
from tools.memory_tool import memory_tool as _memory_tool
function_result = _memory_tool(
action=function_args.get("action"),
target=target,
content=function_args.get("content"),
old_text=function_args.get("old_text"),
store=agent._memory_store,
)
# Bridge: notify external memory provider of built-in memory writes
if agent._memory_manager and function_args.get("action") in {"add", "replace"}:
try:
agent._memory_manager.on_memory_write(
function_args.get("action", ""),
target,
function_args.get("content", ""),
metadata=agent._build_memory_write_metadata(
task_id=effective_task_id,
tool_call_id=getattr(tool_call, "id", None),
),
)
except Exception:
pass
tool_duration = time.time() - tool_start_time
if agent._should_emit_quiet_tool_messages():
agent._vprint(f" {_get_cute_tool_message_impl('memory', function_args, tool_duration, result=function_result)}")
elif function_name == "clarify":
from tools.clarify_tool import clarify_tool as _clarify_tool
function_result = _clarify_tool(
question=function_args.get("question", ""),
choices=function_args.get("choices"),
callback=agent.clarify_callback,
)
tool_duration = time.time() - tool_start_time
if agent._should_emit_quiet_tool_messages():
agent._vprint(f" {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}")
elif function_name == "delegate_task":
tasks_arg = function_args.get("tasks")
if tasks_arg and isinstance(tasks_arg, list):
spinner_label = f"🔀 delegating {len(tasks_arg)} tasks"
else:
goal_preview = (function_args.get("goal") or "")[:30]
spinner_label = f"🔀 {goal_preview}" if goal_preview else "🔀 delegating"
spinner = None
if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
face = random.choice(KawaiiSpinner.get_waiting_faces())
spinner = KawaiiSpinner(f"{face} {spinner_label}", spinner_type='dots', print_fn=agent._print_fn)
spinner.start()
agent._delegate_spinner = spinner
_delegate_result = None
try:
function_result = agent._dispatch_delegate_task(function_args)
_delegate_result = function_result
finally:
agent._delegate_spinner = None
tool_duration = time.time() - tool_start_time
cute_msg = _get_cute_tool_message_impl('delegate_task', function_args, tool_duration, result=_delegate_result)
if spinner:
spinner.stop(cute_msg)
elif agent._should_emit_quiet_tool_messages():
agent._vprint(f" {cute_msg}")
elif agent._context_engine_tool_names and function_name in agent._context_engine_tool_names:
# Context engine tools (lcm_grep, lcm_describe, lcm_expand, etc.)
spinner = None
if agent._should_emit_quiet_tool_messages():
face = random.choice(KawaiiSpinner.get_waiting_faces())
emoji = _get_tool_emoji(function_name)
preview = _build_tool_preview(function_name, function_args) or function_name
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
spinner.start()
_ce_result = None
try:
function_result = agent.context_compressor.handle_tool_call(function_name, function_args, messages=messages)
_ce_result = function_result
except Exception as tool_error:
function_result = json.dumps({"error": f"Context engine tool '{function_name}' failed: {tool_error}"})
logger.error("context_engine.handle_tool_call raised for %s: %s", function_name, tool_error, exc_info=True)
finally:
tool_duration = time.time() - tool_start_time
cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_ce_result)
if spinner:
spinner.stop(cute_msg)
elif agent._should_emit_quiet_tool_messages():
agent._vprint(f" {cute_msg}")
elif agent._memory_manager and agent._memory_manager.has_tool(function_name):
# Memory provider tools (hindsight_retain, honcho_search, etc.)
# These are not in the tool registry — route through MemoryManager.
spinner = None
if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
face = random.choice(KawaiiSpinner.get_waiting_faces())
emoji = _get_tool_emoji(function_name)
preview = _build_tool_preview(function_name, function_args) or function_name
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
spinner.start()
_mem_result = None
try:
function_result = agent._memory_manager.handle_tool_call(function_name, function_args)
_mem_result = function_result
except Exception as tool_error:
function_result = json.dumps({"error": f"Memory tool '{function_name}' failed: {tool_error}"})
logger.error("memory_manager.handle_tool_call raised for %s: %s", function_name, tool_error, exc_info=True)
finally:
tool_duration = time.time() - tool_start_time
cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_mem_result)
if spinner:
spinner.stop(cute_msg)
elif agent._should_emit_quiet_tool_messages():
agent._vprint(f" {cute_msg}")
elif agent.quiet_mode:
spinner = None
if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
face = random.choice(KawaiiSpinner.get_waiting_faces())
emoji = _get_tool_emoji(function_name)
preview = _build_tool_preview(function_name, function_args) or function_name
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
spinner.start()
_spinner_result = None
try:
function_result = _ra().handle_function_call(
function_name, function_args, effective_task_id,
tool_call_id=tool_call.id,
session_id=agent.session_id or "",
enabled_tools=list(agent.valid_tool_names) if agent.valid_tool_names else None,
skip_pre_tool_call_hook=True,
)
_spinner_result = function_result
except Exception as tool_error:
function_result = f"Error executing tool '{function_name}': {tool_error}"
logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
finally:
tool_duration = time.time() - tool_start_time
cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_spinner_result)
if spinner:
spinner.stop(cute_msg)
elif agent._should_emit_quiet_tool_messages():
agent._vprint(f" {cute_msg}")
else:
try:
function_result = _ra().handle_function_call(
function_name, function_args, effective_task_id,
tool_call_id=tool_call.id,
session_id=agent.session_id or "",
enabled_tools=list(agent.valid_tool_names) if agent.valid_tool_names else None,
skip_pre_tool_call_hook=True,
)
except Exception as tool_error:
function_result = f"Error executing tool '{function_name}': {tool_error}"
logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
tool_duration = time.time() - tool_start_time
if isinstance(function_result, str):
result_preview = function_result if agent.verbose_logging else (
function_result[:200] if len(function_result) > 200 else function_result
)
_result_len = len(function_result)
else:
# Multimodal dict result (_multimodal=True) — not sliceable as string
result_preview = function_result
_result_len = len(str(function_result))
# Log tool errors to the persistent error log so [error] tags
# in the UI always have a corresponding detailed entry on disk.
_is_error_result, _ = _detect_tool_failure(function_name, function_result)
if not _execution_blocked:
function_result = agent._append_guardrail_observation(
function_name,
function_args,
function_result,
failed=_is_error_result,
)
result_preview = function_result if agent.verbose_logging else (
function_result[:200] if len(function_result) > 200 else function_result
)
if _is_error_result:
logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
else:
logger.info("tool %s completed (%.2fs, %d chars)", function_name, tool_duration, _result_len)
# Track file-mutation outcome for the turn-end verifier. See
# the concurrent path for the rationale; both paths must feed
# the same state so the footer reflects every tool call in the
# turn, not just the parallel ones.
if not _execution_blocked:
try:
agent._record_file_mutation_result(
function_name, function_args, function_result, _is_error_result,
)
except Exception as _ver_err:
logging.debug("file-mutation verifier record failed: %s", _ver_err)
if not _execution_blocked and agent.tool_progress_callback:
try:
agent.tool_progress_callback(
"tool.completed", function_name, None, None,
duration=tool_duration, is_error=_is_error_result,
)
except Exception as cb_err:
logging.debug(f"Tool progress callback error: {cb_err}")
agent._current_tool = None
agent._touch_activity(f"tool completed: {function_name} ({tool_duration:.1f}s)")
if agent.verbose_logging:
logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s")
_log_result = _multimodal_text_summary(function_result)
logging.debug(f"Tool result ({len(_log_result)} chars): {_log_result}")
if not _execution_blocked and agent.tool_complete_callback:
try:
agent.tool_complete_callback(tool_call.id, function_name, function_args, function_result)
except Exception as cb_err:
logging.debug(f"Tool complete callback error: {cb_err}")
function_result = maybe_persist_tool_result(
content=function_result,
tool_name=function_name,
tool_use_id=tool_call.id,
env=get_active_env(effective_task_id),
) if not _is_multimodal_tool_result(function_result) else function_result
# Discover subdirectory context files from tool arguments
subdir_hints = agent._subdirectory_hints.check_tool_call(function_name, function_args)
if subdir_hints:
if _is_multimodal_tool_result(function_result):
_append_subdir_hint_to_multimodal(function_result, subdir_hints)
else:
function_result += subdir_hints
# Unwrap _multimodal dicts to an OpenAI-style content list
# (see parallel path for rationale). String results pass through.
_tool_content = agent._tool_result_content_for_active_model(function_name, function_result)
tool_msg = {
"role": "tool",
"name": function_name,
"content": _tool_content,
"tool_call_id": tool_call.id
}
messages.append(tool_msg)
# ── Per-tool /steer drain ───────────────────────────────────
# Drain pending steer BETWEEN individual tool calls so the
# injection lands as soon as a tool finishes — not after the
# entire batch. The model sees it on the next API iteration.
agent._apply_pending_steer_to_tool_results(messages, 1)
if not agent.quiet_mode:
if agent.verbose_logging:
print(f" ✅ Tool {i} completed in {tool_duration:.2f}s")
print(agent._wrap_verbose("Result: ", function_result))
else:
_fr_str = function_result if isinstance(function_result, str) else str(function_result)
response_preview = _fr_str[:agent.log_prefix_chars] + "..." if len(_fr_str) > agent.log_prefix_chars else _fr_str
print(f" ✅ Tool {i} completed in {tool_duration:.2f}s - {response_preview}")
if agent._interrupt_requested and i < len(assistant_message.tool_calls):
remaining = len(assistant_message.tool_calls) - i
agent._vprint(f"{agent.log_prefix}⚡ Interrupt: skipping {remaining} remaining tool call(s)", force=True)
for skipped_tc in assistant_message.tool_calls[i:]:
skipped_name = skipped_tc.function.name
skip_msg = {
"role": "tool",
"name": skipped_name,
"content": f"[Tool execution skipped — {skipped_name} was not started. User sent a new message]",
"tool_call_id": skipped_tc.id
}
messages.append(skip_msg)
break
if agent.tool_delay > 0 and i < len(assistant_message.tool_calls):
time.sleep(agent.tool_delay)
# ── Per-turn aggregate budget enforcement ─────────────────────────
num_tools_seq = len(assistant_message.tool_calls)
if num_tools_seq > 0:
enforce_turn_budget(messages[-num_tools_seq:], env=get_active_env(effective_task_id))
# ── /steer injection ──────────────────────────────────────────────
# See _execute_tool_calls_parallel for the rationale. Same hook,
# applied to sequential execution as well.
if num_tools_seq > 0:
agent._apply_pending_steer_to_tool_results(messages, num_tools_seq)
__all__ = [
"execute_tool_calls_concurrent",
"execute_tool_calls_sequential",
]

View file

@ -74,12 +74,43 @@ class CodexAppServerClient:
env: Optional[dict[str, str]] = None,
) -> None:
self._codex_bin = codex_bin
cmd = [codex_bin, "app-server"] + list(extra_args or [])
spawn_env = os.environ.copy()
if env:
spawn_env.update(env)
if codex_home:
spawn_env["CODEX_HOME"] = codex_home
app_server_args = list(extra_args or [])
# Kanban workers must be able to write their handoff/status back to
# the board DB, which lives outside the per-task workspace. Keep the
# Codex sandbox on, but add the Kanban root as the only extra writable
# root. Without this, codex-runtime workers finish their actual work
# but crash/block when kanban_complete/kanban_block writes SQLite.
if spawn_env.get("HERMES_KANBAN_TASK"):
kanban_db = spawn_env.get("HERMES_KANBAN_DB")
kanban_root = (
os.path.dirname(kanban_db)
if kanban_db
else spawn_env.get(
"HERMES_KANBAN_ROOT",
os.path.join(
spawn_env.get("HERMES_HOME", os.path.expanduser("~/.hermes")),
"kanban",
),
)
)
app_server_args.extend(
[
"-c",
'sandbox_mode="workspace-write"',
"-c",
f'sandbox_workspace_write.writable_roots=["{kanban_root}"]',
"-c",
"sandbox_workspace_write.network_access=false",
]
)
cmd = [codex_bin, "app-server"] + app_server_args
# Codex emits tracing to stderr; default WARN keeps it quiet for users.
spawn_env.setdefault("RUST_LOG", "warn")

View file

@ -404,7 +404,7 @@ class CodexAppServerSession:
return result
result.turn_id = (ts.get("turn") or {}).get("id")
deadline = time.time() + turn_timeout
deadline = time.monotonic() + turn_timeout
turn_complete = False
# Post-tool watchdog state. last_tool_completion_at is set whenever
# a tool-shaped item completes; if no further notification arrives
@ -412,7 +412,7 @@ class CodexAppServerSession:
# fast-fail and retire the session.
last_tool_completion_at: Optional[float] = None
while time.time() < deadline and not turn_complete:
while time.monotonic() < deadline and not turn_complete:
if self._interrupt_event.is_set():
self._issue_interrupt(result.turn_id)
result.interrupted = True
@ -440,7 +440,7 @@ class CodexAppServerSession:
# up on this turn instead of waiting for the outer deadline.
if (
last_tool_completion_at is not None
and (time.time() - last_tool_completion_at)
and (time.monotonic() - last_tool_completion_at)
> post_tool_quiet_timeout
):
self._issue_interrupt(result.turn_id)
@ -471,7 +471,7 @@ class CodexAppServerSession:
result.projected_messages.extend(proj.messages)
if proj.is_tool_iteration:
result.tool_iterations += 1
last_tool_completion_at = time.time()
last_tool_completion_at = time.monotonic()
if proj.final_text is not None:
result.final_text = proj.final_text
if _has_turn_aborted_marker(proj.final_text):
@ -514,7 +514,7 @@ class CodexAppServerSession:
result.tool_iterations += 1
# Arm/refresh the post-tool quiet watchdog whenever a
# tool-shaped item completes.
last_tool_completion_at = time.time()
last_tool_completion_at = time.monotonic()
else:
# Any non-tool projected activity (assistant message,
# status update, etc.) means codex is still producing
@ -541,7 +541,7 @@ class CodexAppServerSession:
turn_status = (
(note.get("params") or {}).get("turn") or {}
).get("status")
if turn_status and turn_status not in ("completed", "interrupted"):
if turn_status and turn_status not in {"completed", "interrupted"}:
err_obj = (
(note.get("params") or {}).get("turn") or {}
).get("error")
@ -775,9 +775,9 @@ def _approval_choice_to_codex_decision(choice: str) -> str:
(verified against codex-rs/app-server-protocol/src/protocol/v2/item.rs
on codex 0.130.0).
"""
if choice in ("once",):
if choice in {"once",}:
return "accept"
if choice in ("session", "always"):
if choice in {"session", "always"}:
return "acceptForSession"
return "decline"

View file

@ -30,6 +30,7 @@ import { Card } from "@/components/ui/card";
import { ModelPickerDialog } from "@/components/ModelPickerDialog";
import { ToolCall, type ToolEntry } from "@/components/ToolCall";
import { GatewayClient, type ConnectionState } from "@/lib/gatewayClient";
import { HERMES_BASE_PATH } from "@/lib/api";
import { cn } from "@/lib/utils";
import { AlertCircle, ChevronDown, RefreshCw } from "lucide-react";
@ -160,7 +161,7 @@ export function ChatSidebar({ channel, className }: ChatSidebarProps) {
const proto = window.location.protocol === "https:" ? "wss:" : "ws:";
const qs = new URLSearchParams({ token, channel });
const ws = new WebSocket(
`${proto}//${window.location.host}/api/events?${qs.toString()}`,
`${proto}//${window.location.host}${HERMES_BASE_PATH}/api/events?${qs.toString()}`,
);
// `unmounting` suppresses the banner during cleanup — `ws.close()`

View file

@ -5,6 +5,8 @@ import {
type GatewayEventName,
} from "@hermes/shared";
import { HERMES_BASE_PATH } from "@/lib/api";
export type { ConnectionState, GatewayEvent, GatewayEventName };
/**
@ -24,7 +26,7 @@ export class GatewayClient extends JsonRpcGatewayClient {
const scheme = location.protocol === "https:" ? "wss:" : "ws:";
await super.connect(
`${scheme}//${location.host}/api/ws?token=${encodeURIComponent(resolved)}`,
`${scheme}//${location.host}${HERMES_BASE_PATH}/api/ws?token=${encodeURIComponent(resolved)}`,
);
}
}

View file

@ -24,6 +24,7 @@ import { Terminal } from "@xterm/xterm";
import "@xterm/xterm/css/xterm.css";
import { Button } from "@nous-research/ui/ui/components/button";
import { Typography } from "@/components/NouiTypography";
import { HERMES_BASE_PATH } from "@/lib/api";
import { cn } from "@/lib/utils";
import { Copy, PanelRight, X } from "lucide-react";
import { useCallback, useEffect, useMemo, useRef, useState } from "react";
@ -44,7 +45,7 @@ function buildWsUrl(
const proto = window.location.protocol === "https:" ? "wss:" : "ws:";
const qs = new URLSearchParams({ token, channel });
if (resume) qs.set("resume", resume);
return `${proto}//${window.location.host}/api/pty?${qs.toString()}`;
return `${proto}//${window.location.host}${HERMES_BASE_PATH}/api/pty?${qs.toString()}`;
}
// Channel id ties this chat tab's PTY child (publisher) to its sidebar
@ -286,6 +287,17 @@ export default function ChatPage({ isActive = true }: { isActive?: boolean }) {
fontWeight: "400",
fontWeightBold: "700",
macOptionIsMeta: true,
// Hold Option (Alt on Linux/Windows) to force native text selection
// even when the inner Hermes TUI has enabled xterm mouse-events
// mode (CSI ?1000h family). Without this, click-and-drag in the
// chat canvas selects nothing and Cmd+C falls back to copying the
// entire visible buffer, which is rarely what the user wants.
// See #25720.
macOptionClickForcesSelection: true,
// Right-click selects the word under the pointer. xterm.js default
// is false; enabling it gives users a single-action selection
// path on top of the modifier-based bypass above.
rightClickSelectsWord: true,
// Single-scroll-system experiment:
// let the inner Hermes TUI own transcript history/scroll behavior.
// The outer browser xterm should act as a display/input bridge only.

43
cli.py
View file

@ -1396,7 +1396,7 @@ def _detect_light_mode() -> bool:
last = cfgbg.split(";")[-1] if ";" in cfgbg else cfgbg
if last.isdigit():
bg = int(last)
if bg in (7, 15):
if bg in {7, 15}:
result = True
_LIGHT_MODE_CACHE = result
return result
@ -2412,6 +2412,7 @@ def _looks_like_slash_command(text: str) -> bool:
from agent.skill_commands import (
scan_skill_commands,
get_skill_commands,
build_skill_invocation_message,
build_preloaded_skills_prompt,
)
@ -2824,6 +2825,11 @@ class HermesCLI:
# turn (which would make Ctrl+C feel like it did nothing).
self._last_turn_interrupted = False
self._should_exit = False
# /exit --delete: when True, the current session's SQLite history and
# on-disk transcripts are deleted during shutdown. Set by
# process_command() when the user runs /exit --delete or /quit --delete.
# Ported from google-gemini/gemini-cli#19332.
self._delete_session_on_exit = False
self._last_ctrl_c_time = 0
self._clarify_state = None
self._clarify_freetext = False
@ -7653,6 +7659,16 @@ class HermesCLI:
canonical = _cmd_def.name if _cmd_def else _base_word
if canonical in {"quit", "exit"}:
# Parse --delete flag: /exit --delete also removes the current
# session's transcripts + SQLite history. Ported from
# google-gemini/gemini-cli#19332.
_rest = cmd_original.split(None, 1)
_args = (_rest[1] if len(_rest) > 1 else "").strip().lower()
if _args in {"--delete", "-d"}:
self._delete_session_on_exit = True
elif _args:
_cprint(f" {_DIM}✗ Unknown argument: {_escape(_args)}. Use /exit --delete to also remove session history.{_RST}")
return True
return False
elif canonical == "help":
self.show_help()
@ -9598,12 +9614,18 @@ class HermesCLI:
prompt caching intact.
"""
try:
from agent.skill_commands import reload_skills
from agent.skill_commands import reload_skills, get_skill_commands
if not self._command_running:
print("🔄 Reloading skills...")
result = reload_skills()
# Sync cli.py's module-level _skill_commands so all consumers
# (help display, command dispatch, Tab-completion lambda) see the
# updated dict without needing to restart the session.
global _skill_commands
_skill_commands = get_skill_commands()
added = result.get("added", []) # [{"name", "description"}, ...]
removed = result.get("removed", []) # [{"name", "description"}, ...]
total = result.get("total", 0)
@ -12609,7 +12631,7 @@ class HermesCLI:
_completer = SlashCommandCompleter(
skill_commands_provider=lambda: _skill_commands,
skill_commands_provider=lambda: get_skill_commands(),
command_filter=cli_ref._command_available,
)
input_area = TextArea(
@ -13777,7 +13799,7 @@ class HermesCLI:
if _errno == errno.EIO:
pass # suppress broken-stdout I/O errors on interrupt (#13710)
elif (
_errno in (errno.EINVAL, errno.EBADF)
_errno in {errno.EINVAL, errno.EBADF}
or "is not registered" in _msg
or "Bad file descriptor" in _msg
or "Invalid argument" in _msg
@ -13824,6 +13846,19 @@ class HermesCLI:
self._session_db.end_session(self.agent.session_id, "cli_close")
except (Exception, KeyboardInterrupt) as e:
logger.debug("Could not close session in DB: %s", e)
# /exit --delete: also remove the current session's transcripts
# and SQLite history. Ported from google-gemini/gemini-cli#19332.
if getattr(self, '_delete_session_on_exit', False):
try:
from hermes_constants import get_hermes_home as _ghh
_sessions_dir = _ghh() / "sessions"
_sid = self.agent.session_id
if self._session_db.delete_session(_sid, sessions_dir=_sessions_dir):
_cprint(f" {_DIM}✓ Session {_escape(_sid)} deleted{_RST}")
else:
_cprint(f" {_DIM}✗ Session {_escape(_sid)} not found for deletion{_RST}")
except (Exception, KeyboardInterrupt) as e:
logger.debug("Could not delete session on exit: %s", e)
# Plugin hook: on_session_end — safety net for interrupted exits.
# run_conversation() already fires this per-turn on normal completion,
# so only fire here if the agent was mid-turn (_agent_running) when

View file

@ -1802,7 +1802,12 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
for job in parallel_jobs:
_ctx = contextvars.copy_context()
_futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
_results.extend(f.result() for f in _futures)
for f in concurrent.futures.as_completed(_futures, timeout=600):
try:
_results.append(f.result())
except Exception as exc:
logger.error("Parallel cron job future failed: %s", exc)
_results.append(False)
# Best-effort sweep of MCP stdio subprocesses that survived their
# session teardown during this tick. Runs AFTER every job has

View file

@ -71,6 +71,35 @@ def _coerce_port(value: Any, default: int = DEFAULT_PORT) -> int:
return default
_TRUE_REQUEST_BOOL_STRINGS = frozenset({"1", "true", "yes", "on"})
_FALSE_REQUEST_BOOL_STRINGS = frozenset({"0", "false", "no", "off"})
def _coerce_request_bool(value: Any, default: bool = False) -> bool:
"""Normalize boolean-like API payload values.
External clients should send real JSON booleans, but some OpenAI-compatible
frontends and middleware serialize flags like ``stream`` as strings. Using
Python truthiness on those values misroutes requests because ``"false"`` is
still truthy. Treat only explicit bool-ish scalars as booleans; everything
else falls back to the caller's default.
"""
if isinstance(value, bool):
return value
if value is None:
return default
if isinstance(value, str):
normalized = value.strip().lower()
if normalized in _TRUE_REQUEST_BOOL_STRINGS:
return True
if normalized in _FALSE_REQUEST_BOOL_STRINGS:
return False
return default
if isinstance(value, (int, float)):
return bool(value)
return default
def _normalize_chat_content(
content: Any, *, _max_depth: int = 10, _depth: int = 0,
) -> str:
@ -481,7 +510,12 @@ else:
body_limit_middleware = None # type: ignore[assignment]
_SECURITY_HEADERS = {
"Content-Security-Policy": "default-src 'none'; frame-ancestors 'none'",
"Permissions-Policy": "camera=(), microphone=(), geolocation=()",
"Strict-Transport-Security": "max-age=31536000; includeSubDomains",
"X-Content-Type-Options": "nosniff",
"X-Frame-Options": "DENY",
"X-XSS-Protection": "0",
"Referrer-Policy": "no-referrer",
}
@ -1005,7 +1039,7 @@ class APIServerAdapter(BasePlatformAdapter):
status=400,
)
stream = body.get("stream", False)
stream = _coerce_request_bool(body.get("stream"), default=False)
# Extract system message (becomes ephemeral system prompt layered ON TOP of core)
system_prompt = None
@ -2082,7 +2116,7 @@ class APIServerAdapter(BasePlatformAdapter):
instructions = body.get("instructions")
previous_response_id = body.get("previous_response_id")
conversation = body.get("conversation")
store = body.get("store", True)
store = _coerce_request_bool(body.get("store"), default=True)
# conversation and previous_response_id are mutually exclusive
if conversation and previous_response_id:
@ -2165,7 +2199,7 @@ class APIServerAdapter(BasePlatformAdapter):
# groups the entire conversation under one session entry.
session_id = stored_session_id or str(uuid.uuid4())
stream = bool(body.get("stream", False))
stream = _coerce_request_bool(body.get("stream"), default=False)
if stream:
# Streaming branch — emit OpenAI Responses SSE events as the
# agent runs so frontends can render text deltas and tool
@ -3228,7 +3262,10 @@ class APIServerAdapter(BasePlatformAdapter):
status=409,
)
resolve_all = bool(body.get("all") or body.get("resolve_all"))
resolve_all = (
_coerce_request_bool(body.get("all"), default=False)
or _coerce_request_bool(body.get("resolve_all"), default=False)
)
try:
from tools.approval import resolve_gateway_approval

View file

@ -2014,6 +2014,13 @@ class BasePlatformAdapter(ABC):
text = f"{caption}\n{text}"
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to, metadata=metadata)
def prepare_tts_text(self, text: str) -> str:
"""Prepare text for TTS. Override to filter tool output, code, etc.
Default strips markdown formatting and truncates to 4000 chars.
"""
return re.sub(r'[*_`#\[\]()]', '', text)[:4000].strip()
async def play_tts(
self,
chat_id: str,
@ -3144,7 +3151,7 @@ class BasePlatformAdapter(ABC):
from tools.tts_tool import text_to_speech_tool, check_tts_requirements
if check_tts_requirements():
import json as _json
speech_text = re.sub(r'[*_`#\[\]()]', '', text_content)[:4000].strip()
speech_text = self.prepare_tts_text(text_content)
if not speech_text:
raise ValueError("Empty text after markdown cleanup")
tts_result_str = await asyncio.to_thread(

View file

@ -3639,18 +3639,18 @@ class DiscordAdapter(BasePlatformAdapter):
configured = self.config.extra.get("thread_require_mention")
if configured is not None:
if isinstance(configured, str):
return configured.lower() not in ("false", "0", "no", "off")
return configured.lower() not in {"false", "0", "no", "off"}
return bool(configured)
return os.getenv("DISCORD_THREAD_REQUIRE_MENTION", "false").lower() in ("true", "1", "yes", "on")
return os.getenv("DISCORD_THREAD_REQUIRE_MENTION", "false").lower() in {"true", "1", "yes", "on"}
def _discord_history_backfill(self) -> bool:
"""Return whether history backfill is enabled for shared sessions."""
configured = self.config.extra.get("history_backfill")
if configured is not None:
if isinstance(configured, str):
return configured.lower() not in ("false", "0", "no", "off")
return configured.lower() not in {"false", "0", "no", "off"}
return bool(configured)
return os.getenv("DISCORD_HISTORY_BACKFILL", "true").lower() in ("true", "1", "yes")
return os.getenv("DISCORD_HISTORY_BACKFILL", "true").lower() in {"true", "1", "yes"}
def _discord_history_backfill_limit(self) -> int:
"""Return the max number of messages to scan backwards for context.
@ -3737,7 +3737,7 @@ class DiscordAdapter(BasePlatformAdapter):
break
# Skip system messages (pins, joins, thread renames, etc.)
if msg.type not in (discord.MessageType.default, discord.MessageType.reply):
if msg.type not in {discord.MessageType.default, discord.MessageType.reply}:
continue
# Respect DISCORD_ALLOW_BOTS for other bots.

View file

@ -168,8 +168,8 @@ class TextBatchAggregator:
# Pre-compiled regexes for performance
_RE_BOLD = re.compile(r"\*\*(.+?)\*\*", re.DOTALL)
_RE_ITALIC_STAR = re.compile(r"\*(.+?)\*", re.DOTALL)
_RE_BOLD_UNDER = re.compile(r"__(.+?)__", re.DOTALL)
_RE_ITALIC_UNDER = re.compile(r"_(.+?)_", re.DOTALL)
_RE_BOLD_UNDER = re.compile(r"\b__(?![\s_])(.+?)(?<![\s_])__\b", re.DOTALL)
_RE_ITALIC_UNDER = re.compile(r"\b_(?![\s_])(.+?)(?<![\s_])_\b", re.DOTALL)
_RE_CODE_BLOCK = re.compile(r"```[a-zA-Z0-9_+-]*\n?")
_RE_INLINE_CODE = re.compile(r"`(.+?)`")
_RE_HEADING = re.compile(r"^#{1,6}\s+", re.MULTILINE)

View file

@ -348,6 +348,17 @@ class MatrixAdapter(BasePlatformAdapter):
self._sync_task: Optional[asyncio.Task] = None
self._closing = False
self._startup_ts: float = 0.0
# Clock-skew detection: count grace-check drops that happen well
# after startup (i.e. not initial-sync backfill). If the host's
# system clock is set ahead of real time, the startup grace check
# `event_ts < startup_ts - 5` silently drops every live message.
# See #12614 — the symptom is "bot joins rooms but never replies".
# Drops only count when their skew matches the first sampled drop
# (within 60s), so varied-age backfill from freshly-invited rooms
# doesn't trip the heuristic.
self._late_grace_drops: int = 0
self._late_grace_skew: float = 0.0
self._clock_skew_warned: bool = False
# Cache: room_id → bool (is DM)
self._dm_rooms: Dict[str, bool] = {}
@ -842,6 +853,11 @@ class MatrixAdapter(BasePlatformAdapter):
# Initial sync to catch up, then start background sync.
self._startup_ts = time.time()
# Reset clock-skew detector for each connect cycle so a reconnect
# after the user fixes NTP doesn't inherit stale counters.
self._late_grace_drops = 0
self._late_grace_skew = 0.0
self._clock_skew_warned = False
self._closing = False
try:
@ -1542,6 +1558,49 @@ class MatrixAdapter(BasePlatformAdapter):
)
event_ts = raw_ts / 1000.0 if raw_ts else 0.0
if event_ts and event_ts < self._startup_ts - _STARTUP_GRACE_SECONDS:
# If we are well past startup but events are still being dropped
# by the grace check, the host clock is probably set ahead of
# real time — every live event then looks "older than startup".
# Warn once so users can fix NTP instead of chasing a ghost.
# See #12614 (Schnurzel700, April 2026).
#
# Filter out backfill (events legitimately old) by requiring:
# - we are >30s past startup (initial-sync replay window closed)
# - the skew is *consistent* across consecutive drops, which is
# the signature of a constant clock offset rather than a
# variable-age room history. Backfill from a freshly invited
# room can deliver events spanning hours/days — those skews
# will be all over the place and reset the counter.
if not self._clock_skew_warned and (
time.time() - self._startup_ts > 30
):
skew = self._startup_ts - event_ts
# Sanity bound: malformed events with negative or absurd
# timestamps shouldn't count.
if 5 < skew < 86400:
if self._late_grace_drops == 0:
self._late_grace_skew = skew
self._late_grace_drops = 1
elif abs(skew - self._late_grace_skew) < 60:
# Consistent offset → likely real clock skew.
self._late_grace_drops += 1
else:
# Varied skew → likely backfill, restart sampling.
self._late_grace_skew = skew
self._late_grace_drops = 1
if self._late_grace_drops >= 3:
logger.warning(
"Matrix: dropped %d consecutive live events as "
"'too old' more than 30s after startup (skew "
"%.0fs). The host system clock is likely set "
"ahead of real time, which causes the startup "
"grace filter to silently discard every incoming "
"message. Run `timedatectl set-ntp true` (or "
"sync NTP) and restart the bot.",
self._late_grace_drops,
skew,
)
self._clock_skew_warned = True
return
# Extract content from the event.

View file

@ -482,7 +482,7 @@ class SlackAdapter(BasePlatformAdapter):
"text": text,
}
try:
async with aiohttp.ClientSession() as session:
async with aiohttp.ClientSession(trust_env=True) as session:
async with session.post(
ctx["response_url"],
json=payload,

View file

@ -128,6 +128,7 @@ class SmsAdapter(BasePlatformAdapter):
await site.start()
self._http_session = aiohttp.ClientSession(
timeout=aiohttp.ClientTimeout(total=30),
trust_env=True,
)
self._running = True
@ -169,6 +170,7 @@ class SmsAdapter(BasePlatformAdapter):
session = self._http_session or aiohttp.ClientSession(
timeout=aiohttp.ClientTimeout(total=30),
trust_env=True,
)
try:
for chunk in chunks:

View file

@ -1663,7 +1663,17 @@ class TelegramAdapter(BasePlatformAdapter):
continue
raise
message_ids.append(str(msg.message_id))
# Re-trigger typing indicator after sending a message.
# Telegram clears the typing state when a new message is delivered,
# so without this the "...typing" bubble disappears mid-response
# (especially noticeable when the agent sends intermediate progress
# messages like "Checking:" before running tools).
try:
await self.send_typing(chat_id, metadata=metadata)
except Exception:
pass # Typing failures are non-fatal
return SendResult(
success=True,
message_id=message_ids[0] if message_ids else None,

View file

@ -4763,11 +4763,106 @@ class GatewayRunner:
pass
return False
# Auto-decompose: turn fresh triage tasks into ready workgraphs
# before the dispatcher fans out workers. Gated by
# ``kanban.auto_decompose`` (default True). Capped by
# ``kanban.auto_decompose_per_tick`` (default 3) so a bulk-load
# of triage tasks doesn't burst-spend the aux LLM in one tick;
# remainder defers to subsequent ticks.
auto_decompose_enabled = bool(kanban_cfg.get("auto_decompose", True))
try:
auto_decompose_per_tick = int(
kanban_cfg.get("auto_decompose_per_tick", 3) or 3
)
except (TypeError, ValueError):
auto_decompose_per_tick = 3
if auto_decompose_per_tick < 1:
auto_decompose_per_tick = 1
def _auto_decompose_tick() -> int:
"""Run the auto-decomposer for up to N triage tasks across all
boards. Returns the number of triage tasks that were
successfully decomposed or specified this tick.
"""
try:
from hermes_cli import kanban_decompose as _decomp
except Exception as exc: # pragma: no cover
logger.warning(
"kanban auto-decompose: import failed (%s); skipping", exc,
)
return 0
try:
boards = _kb.list_boards(include_archived=False)
except Exception:
boards = [_kb.read_board_metadata(_kb.DEFAULT_BOARD)]
attempted = 0
successes = 0
for b in boards:
slug = b.get("slug") or _kb.DEFAULT_BOARD
if attempted >= auto_decompose_per_tick:
break
# Pin this board for the duration of the call — same
# pattern as the dashboard specify endpoint. The
# decomposer module connects with no board kwarg and
# relies on the env var.
prev_env = os.environ.get("HERMES_KANBAN_BOARD")
try:
os.environ["HERMES_KANBAN_BOARD"] = slug
try:
triage_ids = _decomp.list_triage_ids()
except Exception as exc:
logger.debug(
"kanban auto-decompose: list_triage_ids failed on board %s (%s)",
slug, exc,
)
triage_ids = []
for tid in triage_ids:
if attempted >= auto_decompose_per_tick:
break
attempted += 1
try:
outcome = _decomp.decompose_task(
tid, author="auto-decomposer",
)
except Exception:
logger.exception(
"kanban auto-decompose: decompose_task crashed on %s",
tid,
)
continue
if outcome.ok:
successes += 1
if outcome.fanout and outcome.child_ids:
logger.info(
"kanban auto-decompose [%s]: %s%d children",
slug, tid, len(outcome.child_ids),
)
else:
logger.info(
"kanban auto-decompose [%s]: %s → single task (no fanout)",
slug, tid,
)
else:
# Common no-op reasons (no aux client configured) shouldn't
# spam logs every tick. Log at debug.
logger.debug(
"kanban auto-decompose [%s]: %s skipped: %s",
slug, tid, outcome.reason,
)
finally:
if prev_env is None:
os.environ.pop("HERMES_KANBAN_BOARD", None)
else:
os.environ["HERMES_KANBAN_BOARD"] = prev_env
return successes
logger.info(
"kanban dispatcher: embedded in gateway (interval=%.1fs)", interval
)
while self._running:
try:
if auto_decompose_enabled:
await asyncio.to_thread(_auto_decompose_tick)
results = await asyncio.to_thread(_tick_once)
any_spawned = False
for slug, res in (results or []):
@ -8845,7 +8940,7 @@ class GatewayRunner:
lines.append("Failed/paused: (none)")
return "\n".join(lines)
if action in ("pause", "resume"):
if action in {"pause", "resume"}:
if not target:
return f"Usage: /platform {action} <name>"
platform = _resolve_platform(target)
@ -8953,13 +9048,15 @@ class GatewayRunner:
logger.debug("Failed to write restart dedup marker: %s", e)
active_agents = self._running_agent_count()
# When running under a service manager (systemd/launchd), use the
# service restart path: exit with code 75 so the service manager
# restarts us. The detached subprocess approach (setsid + bash)
# doesn't work under systemd because KillMode=mixed kills all
# processes in the cgroup, including the detached helper.
# When running under a service manager (systemd/launchd) or inside a
# Docker/Podman container, use the service restart path: exit with
# code 75 so the service manager / container restart policy restarts
# us. The detached subprocess approach (setsid + bash) doesn't work
# under systemd (KillMode=mixed kills the cgroup) or Docker (tini
# exits when the gateway dies, taking the detached helper with it).
_under_service = bool(os.environ.get("INVOCATION_ID")) # systemd sets this
if _under_service:
_in_container = os.path.exists("/.dockerenv") or os.path.exists("/run/.containerenv")
if _under_service or _in_container:
self.request_restart(detached=False, via_service=True)
else:
self.request_restart(detached=True, via_service=False)
@ -12528,6 +12625,12 @@ class GatewayRunner:
and getattr(source, "chat_type", None) == "dm"
):
metadata["telegram_dm_topic_reply_fallback"] = True
# Telegram DM topic lanes need direct_messages_topic_id in metadata
# so synthetic/queued messages (goal continuations, status notices)
# route to the correct topic even when reply anchor is unavailable.
tid = str(thread_id)
if tid and tid not in {"", "1"}:
metadata["direct_messages_topic_id"] = tid
anchor = reply_to_message_id or getattr(source, "message_id", None)
if anchor is not None:
metadata["telegram_reply_to_message_id"] = str(anchor)
@ -12813,7 +12916,11 @@ class GatewayRunner:
update_cmd = (
f"PYTHONUNBUFFERED=1 {hermes_cmd_str} update --gateway"
f" > {shlex.quote(str(output_path))} 2>&1; "
f"status=$?; printf '%s' \"$status\" > {shlex.quote(str(exit_code_path))}"
# Avoid `status=$?`: `status` is a read-only special parameter
# in zsh, and this command string is copied/reused in macOS/zsh
# operator wrappers. Keep the template zsh-safe even though this
# specific subprocess currently runs under bash.
f"rc=$?; printf '%s' \"$rc\" > {shlex.quote(str(exit_code_path))}"
)
setsid_bin = shutil.which("setsid")
if setsid_bin:

File diff suppressed because it is too large Load diff

View file

@ -48,9 +48,9 @@ def parse_args(arg_string: str) -> tuple[Optional[str], list[str]]:
if not raw:
return None, []
# Accept human-friendly synonyms
if raw in ("on", "codex", "enable"):
if raw in {"on", "codex", "enable"}:
return "codex_app_server", []
if raw in ("off", "default", "disable", "hermes"):
if raw in {"off", "default", "disable", "hermes"}:
return "auto", []
if raw in VALID_RUNTIMES:
return raw, []

View file

@ -123,7 +123,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
CommandDef("model", "Switch model for this session", "Configuration",
aliases=("provider",), args_hint="[model] [--provider name] [--global]"),
CommandDef("codex-runtime", "Toggle codex app-server runtime for OpenAI/Codex models",
"Configuration", args_hint="[auto|codex_app_server]"),
"Configuration", aliases=("codex_runtime",),
args_hint="[auto|codex_app_server]"),
CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info",
cli_only=True),

View file

@ -926,6 +926,31 @@ DEFAULT_CONFIG = {
"timeout": 120,
"extra_body": {},
},
# Kanban decomposer — decomposes a triage task into a graph of
# child tasks routed to specialist profiles by description.
# Invoked by ``hermes kanban decompose`` and the kanban
# auto-decompose dispatcher tick. Returns a JSON task graph;
# uses more tokens than the specifier so allow more headroom.
"kanban_decomposer": {
"provider": "auto",
"model": "",
"base_url": "",
"api_key": "",
"timeout": 180,
"extra_body": {},
},
# Profile describer — auto-generates a 1-2 sentence description
# of what a profile is good at. Invoked by
# ``hermes profile describe <name> --auto`` and the dashboard's
# auto-generate button. Short, cheap call.
"profile_describer": {
"provider": "auto",
"model": "",
"base_url": "",
"api_key": "",
"timeout": 60,
"extra_body": {},
},
# Curator — skill-usage review fork. Timeout is generous because the
# review pass can take several minutes on reasoning models (umbrella
# building over hundreds of candidate skills). "auto" = use main chat
@ -1473,6 +1498,25 @@ DEFAULT_CONFIG = {
# same task/profile (spawn_failed, timed_out, or crashed). Reassignment
# resets the streak for the new profile.
"failure_limit": 2,
# Profile that decomposes tasks in the Triage column. When unset,
# falls back to the default profile (the one `hermes` launches with
# no -p flag). Set this to a dedicated 'orchestrator' profile if you
# want decomposition to use a different model/skills from your main
# working profile.
"orchestrator_profile": "",
# Where a child task lands if the orchestrator can't match an
# assignee to any installed profile. When unset, falls back to the
# default profile. A task never ends up with assignee=None.
"default_assignee": "",
# When true, the kanban dispatcher auto-runs the decomposer on
# tasks that land in Triage (every dispatcher tick). When false,
# decomposition is manual via `hermes kanban decompose <id>` or
# the dashboard's Decompose button.
"auto_decompose": True,
# Max triage tasks to decompose per dispatcher tick. Prevents a
# large bulk-load of triage tasks from spending a burst of aux
# LLM calls in one tick. Excess tasks defer to the next tick.
"auto_decompose_per_tick": 3,
},
# execute_code settings — controls the tool used for programmatic tool calls.
@ -2913,6 +2957,7 @@ def _normalize_custom_provider_entry(
"api_mode", "transport", "model", "default_model", "models",
"context_length", "rate_limit_delay",
"request_timeout_seconds", "stale_timeout_seconds",
"discover_models",
}
for camel, snake in _CAMEL_ALIASES.items():
if camel in entry and snake not in entry:
@ -3003,6 +3048,10 @@ def _normalize_custom_provider_entry(
if isinstance(rate_limit_delay, (int, float)) and rate_limit_delay >= 0:
normalized["rate_limit_delay"] = rate_limit_delay
discover_models = entry.get("discover_models")
if isinstance(discover_models, bool):
normalized["discover_models"] = discover_models
return normalized

View file

@ -91,7 +91,7 @@ def ensure_dependency(dep: str, interactive: bool = True) -> bool:
reply = input(f"{desc} is not installed. Install now? [Y/n] ").strip().lower()
except (EOFError, KeyboardInterrupt):
return False
if reply not in ("", "y", "yes"):
if reply not in {"", "y", "yes"}:
return False
result = subprocess.run(

View file

@ -160,19 +160,25 @@ def _has_healthy_oauth_fallback_for_apikey_provider(provider_label: str) -> bool
still show a failed API-key connectivity row, but it should not promote
that direct-key problem into the final blocking summary.
"""
try:
from hermes_cli.auth import (
get_gemini_oauth_auth_status,
get_minimax_oauth_auth_status,
)
except Exception:
return False
normalized = (provider_label or "").strip().lower()
if normalized in {"google / gemini", "gemini"}:
return bool((get_gemini_oauth_auth_status() or {}).get("logged_in"))
try:
from hermes_cli.auth import get_gemini_oauth_auth_status
return bool((get_gemini_oauth_auth_status() or {}).get("logged_in"))
except Exception:
return False
if normalized == "minimax":
return bool((get_minimax_oauth_auth_status() or {}).get("logged_in"))
try:
from hermes_cli.auth import get_minimax_oauth_auth_status
return bool((get_minimax_oauth_auth_status() or {}).get("logged_in"))
except Exception:
return False
if normalized == "xai":
try:
from hermes_cli.auth import get_xai_oauth_auth_status
return bool((get_xai_oauth_auth_status() or {}).get("logged_in"))
except Exception:
return False
return False
@ -645,31 +651,41 @@ def run_doctor(args):
# Check credentials for the configured provider.
# Limit to API-key providers in PROVIDER_REGISTRY — other provider
# types (OAuth, SDK, openrouter/anthropic/custom/auto) have their
# own env-var checks elsewhere in doctor, and get_auth_status()
# returns a bare {logged_in: False} for anything it doesn't
# explicitly dispatch, which would produce false positives.
if runtime_provider and runtime_provider not in {"auto", "custom", "openrouter"}:
# types (OAuth, SDK, anthropic/custom/auto) have their own env-var
# checks elsewhere in doctor, and get_auth_status() returns a bare
# {logged_in: False} for anything it doesn't explicitly dispatch,
# which would produce false positives.
if runtime_provider and runtime_provider not in ("auto", "custom"):
try:
from hermes_cli.auth import PROVIDER_REGISTRY, get_auth_status
pconfig = PROVIDER_REGISTRY.get(runtime_provider)
if pconfig and getattr(pconfig, "auth_type", "") == "api_key":
status = get_auth_status(runtime_provider) or {}
if runtime_provider == "openrouter":
from hermes_cli.config import get_env_value
configured = bool(
status.get("configured")
or status.get("logged_in")
or status.get("api_key")
str(get_env_value("OPENROUTER_API_KEY") or "").strip()
or str(get_env_value("OPENAI_API_KEY") or "").strip()
)
if not configured:
check_fail(
f"model.provider '{runtime_provider}' is set but no API key is configured",
"(check ~/.hermes/.env or run 'hermes setup')",
)
issues.append(
f"No credentials found for provider '{runtime_provider}'. "
f"Run 'hermes setup' or set the provider's API key in {_DHH}/.env, "
f"or switch providers with 'hermes config set model.provider <name>'"
else:
from hermes_cli.auth import PROVIDER_REGISTRY, get_auth_status
pconfig = PROVIDER_REGISTRY.get(runtime_provider)
configured = True
if pconfig and getattr(pconfig, "auth_type", "") == "api_key":
status = get_auth_status(runtime_provider) or {}
configured = bool(
status.get("configured")
or status.get("logged_in")
or status.get("api_key")
)
if not configured:
check_fail(
f"model.provider '{runtime_provider}' is set but no API key is configured",
"(check ~/.hermes/.env or run 'hermes setup')",
)
issues.append(
f"No credentials found for provider '{runtime_provider}'. "
f"Run 'hermes setup' or set the provider's API key in {_DHH}/.env, "
f"or switch providers with 'hermes config set model.provider <name>'"
)
except Exception:
pass
@ -817,6 +833,20 @@ def run_doctor(args):
except Exception as e:
check_warn("Auth provider status", f"(could not check: {e})")
# xAI OAuth — separate try/except so an import failure here cannot
# disrupt the already-printed Nous/Codex/Gemini/MiniMax rows above.
try:
from hermes_cli.auth import get_xai_oauth_auth_status
xai_oauth_status = get_xai_oauth_auth_status() or {}
if xai_oauth_status.get("logged_in"):
check_ok("xAI OAuth", "(logged in)")
else:
check_warn("xAI OAuth", "(not logged in)")
if xai_oauth_status.get("error"):
check_info(xai_oauth_status["error"])
except Exception:
pass
if _safe_which("codex"):
check_ok("codex CLI")
else:
@ -1073,10 +1103,20 @@ def run_doctor(args):
if terminal_env == "ssh":
ssh_host = os.getenv("TERMINAL_SSH_HOST")
if ssh_host:
ssh_user = os.getenv("TERMINAL_SSH_USER")
ssh_port = os.getenv("TERMINAL_SSH_PORT")
ssh_key = os.getenv("TERMINAL_SSH_KEY")
target = f"{ssh_user}@{ssh_host}" if ssh_user else ssh_host
cmd = ["ssh", "-o", "ConnectTimeout=5", "-o", "BatchMode=yes"]
if ssh_port:
cmd += ["-p", ssh_port]
if ssh_key:
cmd += ["-i", os.path.expanduser(ssh_key)]
cmd += [target, "echo ok"]
# Try to connect
try:
result = subprocess.run(
["ssh", "-o", "ConnectTimeout=5", "-o", "BatchMode=yes", ssh_host, "echo ok"],
cmd,
capture_output=True,
text=True,
timeout=15
@ -1474,6 +1514,15 @@ def run_doctor(args):
}
if base_url_host_matches(base, "api.kimi.com"):
headers["User-Agent"] = "claude-code/0.1.0"
# Google's Generative Language API (generativelanguage.googleapis.com)
# rejects ``Authorization: Bearer <api-key>`` with 401
# ``ACCESS_TOKEN_TYPE_UNSUPPORTED`` — that header is reserved for
# OAuth 2 access tokens, not plain API keys. Plain keys use
# ``x-goog-api-key`` (or ``?key=``). Without this, a perfectly valid
# GOOGLE_API_KEY/GEMINI_API_KEY always shows red in ``hermes doctor``.
if url and base_url_host_matches(url, "generativelanguage.googleapis.com"):
headers.pop("Authorization", None)
headers["x-goog-api-key"] = key
r = httpx.get(url, headers=headers, timeout=10)
if (
pname == "Alibaba/DashScope"

View file

@ -2110,24 +2110,30 @@ def _build_service_path_dirs(project_root: Path | None = None) -> list[str]:
if project_root is None:
project_root = PROJECT_ROOT
def _is_dir(path: Path) -> bool:
try:
return path.is_dir()
except OSError:
return False
candidates = []
venv_bin = project_root / "venv" / "bin"
if venv_bin.is_dir():
if _is_dir(venv_bin):
candidates.append(str(venv_bin))
elif sys.prefix != sys.base_prefix:
candidates.append(str(Path(sys.prefix) / "bin"))
node_bin = project_root / "node_modules" / ".bin"
if node_bin.is_dir():
if _is_dir(node_bin):
candidates.append(str(node_bin))
hermes_home = get_hermes_home()
hermes_node = hermes_home / "node" / "bin"
if hermes_node.is_dir():
if _is_dir(hermes_node):
candidates.append(str(hermes_node))
hermes_nm = hermes_home / "node_modules" / ".bin"
if hermes_nm.is_dir():
if _is_dir(hermes_nm):
candidates.append(str(hermes_nm))
return candidates

View file

@ -34,6 +34,7 @@ import logging
import re
import time
from dataclasses import dataclass, field, asdict
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional, Tuple
logger = logging.getLogger(__name__)
@ -110,6 +111,7 @@ JUDGE_SYSTEM_PROMPT = (
JUDGE_USER_PROMPT_TEMPLATE = (
"Goal:\n{goal}\n\n"
"Agent's most recent response:\n{response}\n\n"
"Current time: {current_time}\n\n"
"Is the goal satisfied?"
)
@ -120,6 +122,7 @@ JUDGE_USER_PROMPT_WITH_SUBGOALS_TEMPLATE = (
"Additional criteria the user added mid-loop (all must also be "
"satisfied for the goal to be DONE):\n{subgoals_block}\n\n"
"Agent's most recent response:\n{response}\n\n"
"Current time: {current_time}\n\n"
"Decision: For each numbered criterion above, find concrete "
"evidence in the agent's response that the criterion is "
"satisfied. Do not accept generic phrases like 'all requirements "
@ -415,6 +418,7 @@ def judge_goal(
# Build the prompt — pick the with-subgoals variant when applicable.
clean_subgoals = [s.strip() for s in (subgoals or []) if s and s.strip()]
current_time = datetime.now(tz=timezone.utc).astimezone().strftime("%Y-%m-%d %H:%M:%S %Z")
if clean_subgoals:
subgoals_block = "\n".join(
f"- {i}. {text}" for i, text in enumerate(clean_subgoals, start=1)
@ -423,11 +427,13 @@ def judge_goal(
goal=_truncate(goal, 2000),
subgoals_block=_truncate(subgoals_block, 2000),
response=_truncate(last_response, _JUDGE_RESPONSE_SNIPPET_CHARS),
current_time=current_time,
)
else:
prompt = JUDGE_USER_PROMPT_TEMPLATE.format(
goal=_truncate(goal, 2000),
response=_truncate(last_response, _JUDGE_RESPONSE_SNIPPET_CHARS),
current_time=current_time,
)
try:

View file

@ -610,6 +610,43 @@ def build_parser(parent_subparsers: argparse._SubParsersAction) -> argparse.Argu
help="Emit one JSON object per task on stdout",
)
# --- decompose --- (triage → fan-out via auxiliary LLM + orchestrator)
p_decompose = sub.add_parser(
"decompose",
help="Decompose a triage-column task into a graph of child tasks "
"routed to specialist profiles by description. Falls back to "
"specify-style single-task promotion when the task doesn't "
"benefit from fan-out. Uses auxiliary.kanban_decomposer.",
)
p_decompose.add_argument(
"task_id",
nargs="?",
default=None,
help="Task id to decompose (required unless --all is given)",
)
p_decompose.add_argument(
"--all",
dest="all_triage",
action="store_true",
help="Decompose every task currently in the triage column",
)
p_decompose.add_argument(
"--tenant",
default=None,
help="When used with --all, restrict the sweep to this tenant",
)
p_decompose.add_argument(
"--author",
default=None,
help="Author name recorded on the audit comment "
"(default: $HERMES_PROFILE or 'decomposer')",
)
p_decompose.add_argument(
"--json",
action="store_true",
help="Emit one JSON object per task on stdout",
)
# --- gc ---
p_gc = sub.add_parser(
"gc", help="Garbage-collect archived-task workspaces, old events, and old logs",
@ -740,6 +777,7 @@ def kanban_command(args: argparse.Namespace) -> int:
"notify-unsubscribe": _cmd_notify_unsubscribe,
"context": _cmd_context,
"specify": _cmd_specify,
"decompose": _cmd_decompose,
"gc": _cmd_gc,
}
handler = handlers.get(action)
@ -2115,6 +2153,87 @@ def _cmd_specify(args: argparse.Namespace) -> int:
return 0 if (ok_count > 0 or not ids) else 1
def _cmd_decompose(args: argparse.Namespace) -> int:
"""Fan a triage task (or all of them) out into a graph of child
tasks via the auxiliary LLM, routed to specialist profiles by
description. Thin wrapper over ``kanban_decompose``."""
from hermes_cli import kanban_decompose as decomp
all_flag = bool(getattr(args, "all_triage", False))
tenant = getattr(args, "tenant", None)
author = getattr(args, "author", None) or _profile_author()
want_json = bool(getattr(args, "json", False))
if args.task_id and all_flag:
print(
"kanban: pass either a task id OR --all, not both",
file=sys.stderr,
)
return 2
if all_flag:
ids = decomp.list_triage_ids(tenant=tenant)
if not ids:
msg = (
"No triage tasks"
+ (f" for tenant {tenant!r}" if tenant else "")
+ "."
)
if want_json:
print(json.dumps({"decomposed": 0, "total": 0}))
else:
print(msg)
return 0
elif args.task_id:
ids = [args.task_id]
else:
print(
"kanban: decompose requires a task id or --all",
file=sys.stderr,
)
return 2
ok_count = 0
for tid in ids:
outcome = decomp.decompose_task(tid, author=author)
if outcome.ok:
ok_count += 1
if want_json:
print(json.dumps({
"task_id": outcome.task_id,
"ok": outcome.ok,
"reason": outcome.reason,
"fanout": outcome.fanout,
"child_ids": outcome.child_ids,
"new_title": outcome.new_title,
}))
elif outcome.ok:
if outcome.fanout and outcome.child_ids:
child_summary = ", ".join(outcome.child_ids)
print(
f"Decomposed {outcome.task_id}{len(outcome.child_ids)} "
f"children ({child_summary}); root promoted to todo"
)
else:
title_suffix = (
f" — retitled: {outcome.new_title!r}"
if outcome.new_title
else ""
)
print(
f"Specified {outcome.task_id} → todo "
f"(no fanout){title_suffix}"
)
else:
print(
f"kanban: decompose {outcome.task_id}: {outcome.reason}",
file=sys.stderr,
)
if not all_flag:
return 0 if ok_count == 1 else 1
return 0 if (ok_count > 0 or not ids) else 1
def _cmd_gc(args: argparse.Namespace) -> int:
"""Remove scratch workspaces of archived tasks, prune old events, and
delete old worker logs."""

View file

@ -93,6 +93,7 @@ from toolsets import get_toolset_names
VALID_STATUSES = {"triage", "todo", "ready", "running", "blocked", "done", "archived"}
VALID_WORKSPACE_KINDS = {"scratch", "worktree", "dir"}
KNOWN_TOOLSET_NAMES = frozenset(name.casefold() for name in get_toolset_names())
_IS_WINDOWS = sys.platform == "win32"
# A running task's claim is valid for 15 minutes; after that the next
# dispatcher tick reclaims it. Workers that outlive this window should call
@ -2776,6 +2777,180 @@ def specify_triage_task(
return True
def decompose_triage_task(
conn: sqlite3.Connection,
task_id: str,
*,
root_assignee: Optional[str],
children: list[dict],
author: Optional[str] = None,
) -> Optional[list[str]]:
"""Fan a triage task out into child tasks and promote the root to ``todo``.
The root task stays alive and becomes the parent of every child
when all children reach ``done``, the root promotes to ``ready`` and
its assignee (typically the orchestrator profile) wakes back up to
judge completion or spawn more work.
``children`` is a list of dicts, each shaped like::
{
"title": "...",
"body": "...", # optional
"assignee": "profile-name", # optional, None -> default fallback
"parents": [0, 2], # indices into this same children list
}
Returns the list of created child task ids (in input order) on
success. Returns ``None`` when:
- The root task does not exist
- The root task is not in ``triage``
- A cycle would result (caller built a bad graph)
Validation of titles/assignees happens inside the same write_txn as
the inserts so a malformed entry aborts the whole decomposition
cleanly (no orphan children).
"""
if not children:
return None
if root_assignee is not None:
root_assignee = _canonical_assignee(root_assignee)
# Pre-validate the children list shape outside the txn. Cheap checks
# that don't need DB access. Bad input aborts before we touch the DB.
for idx, child in enumerate(children):
if not isinstance(child, dict):
raise ValueError(f"child[{idx}] is not a dict")
title = child.get("title")
if not isinstance(title, str) or not title.strip():
raise ValueError(f"child[{idx}].title is required")
parents_idx = child.get("parents") or []
if not isinstance(parents_idx, list):
raise ValueError(f"child[{idx}].parents must be a list")
for p in parents_idx:
if not isinstance(p, int) or p < 0 or p >= len(children):
raise ValueError(
f"child[{idx}].parents[{p}] is not a valid index into children"
)
if p == idx:
raise ValueError(f"child[{idx}] cannot list itself as a parent")
# We do the full decomposition in a SINGLE write_txn so it's
# atomic: either every child is created AND the root flips to
# ``todo``, or nothing changes. We deliberately do NOT call any
# kb helper that opens its own write_txn (create_task, link_tasks,
# add_comment) from inside this block — see architecture.md
# write_txn pitfalls. Instead we inline the INSERTs and
# _append_event calls.
now = int(time.time())
child_ids: list[str] = []
with write_txn(conn):
root_row = conn.execute(
"SELECT id, status, tenant FROM tasks WHERE id = ?", (task_id,)
).fetchone()
if root_row is None:
return None
if root_row["status"] != "triage":
return None
tenant = root_row["tenant"]
# Create children. Status is 'todo' regardless of parents — we
# link them under the root AFTER creation so the dispatcher
# sees a coherent state, and recompute_ready() at the end
# promotes parent-free children to 'ready'.
for idx, child in enumerate(children):
new_id = _new_task_id()
title = child["title"].strip()
body = child.get("body")
assignee = _canonical_assignee(child.get("assignee"))
conn.execute(
"INSERT INTO tasks "
"(id, title, body, assignee, status, workspace_kind, "
" tenant, created_at, created_by) "
"VALUES (?, ?, ?, ?, 'todo', 'scratch', ?, ?, ?)",
(
new_id,
title,
body if isinstance(body, str) else None,
assignee,
tenant,
now,
(author or "decomposer"),
),
)
_append_event(
conn, new_id, "created",
{"by": author or "decomposer", "from_decompose_of": task_id},
)
child_ids.append(new_id)
# Link children to their sibling parents (within the decomposed graph).
for idx, child in enumerate(children):
for p_idx in child.get("parents") or []:
parent_id = child_ids[p_idx]
child_id = child_ids[idx]
conn.execute(
"INSERT OR IGNORE INTO task_links (parent_id, child_id) "
"VALUES (?, ?)",
(parent_id, child_id),
)
_append_event(
conn, child_id, "linked",
{"parent": parent_id, "child": child_id},
)
# Link the ROOT task as a child of every leaf child — i.e. the
# root waits for the whole graph. Simpler than computing leaves:
# link root under every child. Cycle-free because the root is
# only ever a child here, never a parent of children.
for cid in child_ids:
conn.execute(
"INSERT OR IGNORE INTO task_links (parent_id, child_id) "
"VALUES (?, ?)",
(cid, task_id),
)
# Flip the root: triage -> todo, set assignee to the orchestrator.
sets = ["status = 'todo'"]
params: list[Any] = []
if root_assignee is not None:
sets.append("assignee = ?")
params.append(root_assignee)
params.append(task_id)
conn.execute(
f"UPDATE tasks SET {', '.join(sets)} WHERE id = ?",
tuple(params),
)
# Audit comment + event on the root so the timeline shows the fan-out.
if author and author.strip():
conn.execute(
"INSERT INTO task_comments (task_id, author, body, created_at) "
"VALUES (?, ?, ?, ?)",
(
task_id,
author.strip(),
"Decomposed into "
+ ", ".join(child_ids)
+ ". Root will wake when all children complete.",
now,
),
)
_append_event(
conn, task_id, "decomposed",
{
"child_ids": child_ids,
"root_assignee": root_assignee,
},
)
# Outside the write_txn: promote parent-free children to 'ready'
# so the dispatcher picks them up on its next tick. Same pattern
# specify_triage_task uses.
recompute_ready(conn)
return child_ids
def archive_task(conn: sqlite3.Connection, task_id: str) -> bool:
with write_txn(conn):
cur = conn.execute(
@ -4024,6 +4199,7 @@ def _default_spawn(
stderr=subprocess.STDOUT,
env=env,
start_new_session=True,
creationflags=subprocess.CREATE_NO_WINDOW if _IS_WINDOWS else 0,
)
except FileNotFoundError:
log_f.close()

View file

@ -0,0 +1,440 @@
"""Kanban decomposer — fan a triage task out into a graph of child tasks.
Invoked by ``hermes kanban decompose [task_id | --all]`` and the
auto-decompose path in the gateway dispatcher loop. Reads the user's
profile roster (with descriptions) and asks the auxiliary LLM to
return a task graph in JSON. Then atomically creates the children,
links them under the root, and flips the root ``triage -> todo``.
The root task stays alive and becomes the parent of every leaf child,
so when the whole graph completes the root wakes back up its
assignee (the orchestrator profile) gets a chance to judge completion
and add more tasks if the work isn't done yet.
Design notes
------------
* Mirrors the shape of ``hermes_cli/kanban_specify.py``: lazy aux
client import inside the function, lenient response parse, never
raises on expected failure modes.
* The system prompt sees the *configured* profile roster names plus
descriptions plus the default fallback. Profiles without a
description are still listed (with a note) so the orchestrator can
match on name as a fallback, but the user has an obvious incentive
to describe them.
* ``fanout=false`` collapses to the same effect as ``kanban specify``:
we tighten the body and flip ``triage -> todo`` as a single task,
no children created. This makes ``decompose`` a strict superset of
``specify`` from the user's perspective.
* If the LLM picks an assignee that doesn't exist as a profile, we
rewrite it to the configured ``default_assignee`` (or the default
profile if unset). A child task NEVER ends up with ``assignee=None``.
"""
from __future__ import annotations
import json
import logging
import os
import re
from dataclasses import dataclass
from typing import Optional
from hermes_cli import kanban_db as kb
from hermes_cli import profiles as profiles_mod
logger = logging.getLogger(__name__)
_SYSTEM_PROMPT = """You are the Kanban decomposer for the Hermes Agent board.
A user dropped a rough idea into the Triage column. Your job is to break it
into a small graph of concrete child tasks and route each one to the best-
matching profile from the available roster.
You will be given:
- The original task title and body
- The list of available profiles (each with name + description)
- The fallback "default_assignee" used when no profile fits
Output a single JSON object with this exact shape:
{
"fanout": true,
"rationale": "<one sentence on why this decomposition>",
"tasks": [
{
"title": "<concrete task title, imperative voice, <= 80 chars>",
"body": "<detailed spec for the worker on this child task>",
"assignee": "<profile name from the roster, or null for default>",
"parents": [<int>, ...]
},
...
]
}
Rules:
- "parents" is a list of INDICES (0-based) into this same "tasks" list,
expressing actual data dependencies. Tasks with no parents run in
PARALLEL. Tasks with parents wait until every parent completes.
- Prefer parallelism. If two tasks can be done independently, give
them no parents so the dispatcher fans them out at once.
- Use 2-6 tasks for normal work. Don't create 20 tiny tasks. Don't
cram everything into 1 task.
- Pick assignees from the roster by matching the task to the profile's
DESCRIPTION (not just the name). When nothing matches well, use null
and the system will route to the default_assignee.
- Each child task body is what a fresh worker will read with no other
context be specific about goal, approach, and acceptance criteria.
When the task is genuinely a single unit of work (no useful decomposition),
return:
{
"fanout": false,
"rationale": "<one sentence>",
"title": "<tightened title>",
"body": "<concrete spec for a single worker>"
}
In that case the task stays as one work item, just with a tightened spec.
No preamble, no closing remarks, no code fences. Output only the JSON object.
"""
_USER_TEMPLATE = """Task id: {task_id}
Title: {title}
Body:
{body}
Available profiles (assignees you may pick from):
{roster}
Default assignee (used when no profile fits a task): {default_assignee}
"""
_FENCE_RE = re.compile(r"^```(?:json)?\s*|\s*```$", re.MULTILINE)
@dataclass
class DecomposeOutcome:
"""Result of decomposing a single triage task."""
task_id: str
ok: bool
reason: str = ""
fanout: bool = False
child_ids: list[str] | None = None
new_title: Optional[str] = None
def _truncate(text: str, limit: int) -> str:
if len(text) <= limit:
return text
return text[: limit - 1] + ""
def _extract_json_blob(raw: str) -> Optional[dict]:
if not raw:
return None
stripped = _FENCE_RE.sub("", raw.strip())
first = stripped.find("{")
last = stripped.rfind("}")
if first == -1 or last == -1 or last <= first:
return None
candidate = stripped[first : last + 1]
try:
val = json.loads(candidate)
except (ValueError, json.JSONDecodeError):
return None
if not isinstance(val, dict):
return None
return val
def _profile_author() -> str:
"""Mirror of ``hermes_cli.kanban._profile_author``."""
return (
os.environ.get("HERMES_PROFILE")
or os.environ.get("USER")
or "decomposer"
)
def _load_config() -> dict:
try:
from hermes_cli.config import load_config
return load_config() or {}
except Exception:
return {}
def _resolve_orchestrator_profile(cfg: dict) -> str:
"""Resolve which profile owns decomposition.
Falls back to the active default profile when ``kanban.orchestrator_profile``
is unset, so a task is never stranded for lack of an orchestrator.
"""
kanban_cfg = cfg.get("kanban", {}) if isinstance(cfg, dict) else {}
explicit = (kanban_cfg.get("orchestrator_profile") or "").strip()
if explicit:
try:
if profiles_mod.profile_exists(explicit):
return explicit
except Exception:
pass
# Fall back to the active default profile.
try:
return profiles_mod.get_active_profile_name() or "default"
except Exception:
return "default"
def _resolve_default_assignee(cfg: dict) -> str:
"""Resolve which profile catches child tasks the orchestrator can't route."""
kanban_cfg = cfg.get("kanban", {}) if isinstance(cfg, dict) else {}
explicit = (kanban_cfg.get("default_assignee") or "").strip()
if explicit:
try:
if profiles_mod.profile_exists(explicit):
return explicit
except Exception:
pass
try:
return profiles_mod.get_active_profile_name() or "default"
except Exception:
return "default"
def _build_roster() -> tuple[list[dict], set[str]]:
"""Return (roster_for_prompt, valid_assignee_names).
Each roster entry is ``{name, description, has_description}``. The
valid-set is used after the LLM responds to rewrite invalid
assignees to the default fallback.
"""
roster: list[dict] = []
valid: set[str] = set()
try:
all_profiles = profiles_mod.list_profiles()
except Exception as exc:
logger.warning("decompose: failed to list profiles: %s", exc)
return roster, valid
for p in all_profiles:
desc = (p.description or "").strip()
roster.append({
"name": p.name,
"description": desc or f"(no description; profile named {p.name!r})",
"has_description": bool(desc),
})
valid.add(p.name)
return roster, valid
def _format_roster(roster: list[dict]) -> str:
if not roster:
return " (no profiles installed — decomposer cannot route work)"
lines = []
for entry in roster:
tag = "" if entry["has_description"] else " ⚠ undescribed"
lines.append(f" - {entry['name']}{tag}: {entry['description']}")
return "\n".join(lines)
def decompose_task(
task_id: str,
*,
author: Optional[str] = None,
timeout: Optional[int] = None,
) -> DecomposeOutcome:
"""Decompose a triage task into a graph of child tasks.
Returns an outcome describing what happened. Never raises for
expected failure modes (task not in triage, no aux client
configured, API error, malformed response, decomposer returned
fanout=true with empty task list) those surface via ``ok=False``.
"""
with kb.connect() as conn:
task = kb.get_task(conn, task_id)
if task is None:
return DecomposeOutcome(task_id, False, "unknown task id")
if task.status != "triage":
return DecomposeOutcome(
task_id, False, f"task is not in triage (status={task.status!r})"
)
cfg = _load_config()
orchestrator = _resolve_orchestrator_profile(cfg)
default_assignee = _resolve_default_assignee(cfg)
roster, valid_names = _build_roster()
try:
from agent.auxiliary_client import ( # type: ignore
get_auxiliary_extra_body,
get_text_auxiliary_client,
)
except Exception as exc:
logger.debug("decompose: auxiliary client import failed: %s", exc)
return DecomposeOutcome(task_id, False, "auxiliary client unavailable")
try:
client, model = get_text_auxiliary_client("kanban_decomposer")
except Exception as exc:
logger.debug("decompose: get_text_auxiliary_client failed: %s", exc)
return DecomposeOutcome(task_id, False, "auxiliary client unavailable")
if client is None or not model:
return DecomposeOutcome(task_id, False, "no auxiliary client configured")
user_msg = _USER_TEMPLATE.format(
task_id=task.id,
title=_truncate(task.title or "", 400),
body=_truncate(task.body or "(no body)", 4000),
roster=_format_roster(roster),
default_assignee=default_assignee,
)
try:
resp = client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": _SYSTEM_PROMPT},
{"role": "user", "content": user_msg},
],
temperature=0.3,
max_tokens=4000,
timeout=timeout or 180,
extra_body=get_auxiliary_extra_body() or None,
)
except Exception as exc:
logger.info(
"decompose: API call failed for %s (%s)", task_id, exc,
)
return DecomposeOutcome(task_id, False, f"LLM error: {type(exc).__name__}")
try:
raw = resp.choices[0].message.content or ""
except Exception:
raw = ""
parsed = _extract_json_blob(raw)
if parsed is None:
return DecomposeOutcome(task_id, False, "LLM returned malformed JSON")
fanout = bool(parsed.get("fanout"))
audit_author = author or _profile_author()
if not fanout:
# Fall back to single-task spec promotion (same effect as specify).
new_title = parsed.get("title")
new_body = parsed.get("body")
title_val = new_title.strip() if isinstance(new_title, str) and new_title.strip() else None
body_val = new_body if isinstance(new_body, str) and new_body.strip() else None
if title_val is None and body_val is None:
return DecomposeOutcome(
task_id, False, "decomposer returned fanout=false with no title/body",
)
with kb.connect() as conn:
ok = kb.specify_triage_task(
conn,
task_id,
title=title_val,
body=body_val,
author=audit_author,
)
if not ok:
return DecomposeOutcome(
task_id, False, "task moved out of triage before promotion",
)
return DecomposeOutcome(
task_id, True, "single task (no fanout)",
fanout=False, new_title=title_val,
)
raw_tasks = parsed.get("tasks") or []
if not isinstance(raw_tasks, list) or not raw_tasks:
return DecomposeOutcome(
task_id, False, "decomposer returned fanout=true with empty tasks list",
)
# Rewrite invalid assignees to the default fallback. Never leave a
# task with assignee=None — the user explicitly does not want that.
children: list[dict] = []
for idx, entry in enumerate(raw_tasks):
if not isinstance(entry, dict):
return DecomposeOutcome(
task_id, False, f"tasks[{idx}] is not an object",
)
title = entry.get("title")
if not isinstance(title, str) or not title.strip():
return DecomposeOutcome(
task_id, False, f"tasks[{idx}].title is missing or empty",
)
body = entry.get("body")
if not isinstance(body, str):
body = ""
assignee = entry.get("assignee")
if not isinstance(assignee, str) or not assignee.strip():
chosen = default_assignee
elif assignee not in valid_names:
logger.info(
"decompose: task %s child %d picked unknown assignee %r"
"routing to default_assignee %r",
task_id, idx, assignee, default_assignee,
)
chosen = default_assignee
else:
chosen = assignee
parents = entry.get("parents") or []
if not isinstance(parents, list):
parents = []
# Clean parent indices: drop non-int and out-of-range.
clean_parents = [p for p in parents if isinstance(p, int) and 0 <= p < len(raw_tasks) and p != idx]
children.append({
"title": title.strip()[:200],
"body": body.strip(),
"assignee": chosen,
"parents": clean_parents,
})
try:
with kb.connect() as conn:
child_ids = kb.decompose_triage_task(
conn,
task_id,
root_assignee=orchestrator,
children=children,
author=audit_author,
)
except ValueError as exc:
return DecomposeOutcome(task_id, False, f"DB rejected graph: {exc}")
except Exception as exc:
logger.exception("decompose: DB error on task %s", task_id)
return DecomposeOutcome(task_id, False, f"DB error: {type(exc).__name__}")
if child_ids is None:
return DecomposeOutcome(
task_id, False, "task moved out of triage before decomposition",
)
return DecomposeOutcome(
task_id, True, f"decomposed into {len(child_ids)} children",
fanout=True, child_ids=child_ids,
)
def list_triage_ids(*, tenant: Optional[str] = None) -> list[str]:
"""Return task ids currently in the triage column."""
with kb.connect() as conn:
rows = kb.list_tasks(
conn,
status="triage",
tenant=tenant,
limit=1000,
)
return [row.id for row in rows]

View file

@ -9082,6 +9082,7 @@ def cmd_profile(args):
clone_config=clone,
no_alias=no_alias,
no_skills=no_skills,
description=getattr(args, "description", None),
)
print(f"\nProfile '{name}' created at {profile_dir}")
@ -9181,6 +9182,107 @@ def cmd_profile(args):
print(f"Error: {e}")
sys.exit(1)
elif action == "describe":
# Read or write a profile's description. The description is
# consumed by the kanban decomposer to route tasks based on
# role instead of name alone.
from hermes_cli import profiles as _profiles_mod
all_flag = bool(getattr(args, "all_missing", False))
auto_flag = bool(getattr(args, "auto", False))
overwrite_flag = bool(getattr(args, "overwrite", False))
text_value = getattr(args, "text", None)
name = getattr(args, "profile_name", None)
if all_flag and not auto_flag:
print("profile describe: --all requires --auto", file=sys.stderr)
sys.exit(2)
if all_flag and (text_value or name):
print(
"profile describe: --all is mutually exclusive with a profile name / --text",
file=sys.stderr,
)
sys.exit(2)
if not all_flag and not name:
print("profile describe: profile name is required (or --all --auto)", file=sys.stderr)
sys.exit(2)
if text_value and auto_flag:
print(
"profile describe: --text is mutually exclusive with --auto",
file=sys.stderr,
)
sys.exit(2)
# Show current description if no operation requested.
if name and not text_value and not auto_flag:
try:
if _profiles_mod.normalize_profile_name(name) == "default":
from hermes_constants import get_hermes_home as _hh
profile_dir = Path(_hh())
else:
profile_dir = _profiles_mod.get_profile_dir(name)
except Exception as exc:
print(f"Error: {exc}", file=sys.stderr)
sys.exit(1)
if not profile_dir.is_dir():
print(f"Error: profile '{name}' not found", file=sys.stderr)
sys.exit(1)
meta = _profiles_mod.read_profile_meta(profile_dir)
desc = meta.get("description") or ""
if not desc:
print(f"(no description set for '{name}')")
else:
tag = "[auto] " if meta.get("description_auto") else ""
print(f"{tag}{desc}")
sys.exit(0)
# --text path: just write the user-authored description.
if text_value:
try:
if _profiles_mod.normalize_profile_name(name) == "default":
from hermes_constants import get_hermes_home as _hh
profile_dir = Path(_hh())
else:
profile_dir = _profiles_mod.get_profile_dir(name)
_profiles_mod.write_profile_meta(
profile_dir,
description=text_value,
description_auto=False,
)
print(f"Description updated for '{name}'.")
except Exception as exc:
print(f"Error: {exc}", file=sys.stderr)
sys.exit(1)
sys.exit(0)
# --auto path: invoke the LLM describer.
from hermes_cli import profile_describer as _pd
if all_flag:
targets = _pd.list_describable_profiles(missing_only=True)
if not targets:
print("All profiles already have descriptions.")
sys.exit(0)
else:
targets = [name]
ok_count = 0
fail_count = 0
for tgt in targets:
outcome = _pd.describe_profile(tgt, overwrite=overwrite_flag)
if outcome.ok:
ok_count += 1
print(f"Described '{outcome.profile_name}': {outcome.description}")
else:
fail_count += 1
print(
f"profile describe {outcome.profile_name}: {outcome.reason}",
file=sys.stderr,
)
if not all_flag:
sys.exit(0 if ok_count == 1 else 1)
sys.exit(0 if ok_count > 0 else 1)
elif action == "show":
name = args.profile_name
from hermes_cli.profiles import (
@ -9684,8 +9786,8 @@ _BUILTIN_SUBCOMMANDS = frozenset(
"config", "cron", "curator", "dashboard", "debug", "doctor",
"dump", "fallback", "gateway", "hooks", "import", "insights",
"kanban", "login", "logout", "logs", "lsp", "mcp", "memory",
"model", "pairing", "plugins", "postinstall", "profile", "proxy", "send",
"sessions", "setup",
"model", "pairing", "plugins", "postinstall", "profile", "proxy",
"send", "sessions", "setup",
"skills", "slack", "status", "tools", "uninstall", "update",
"version", "webhook", "whatsapp", "chat",
# Help-ish invocations — plugin commands not being listed in
@ -12076,6 +12178,13 @@ Examples:
action="store_true",
help="Create an empty profile with no bundled skills (opts out of `hermes update` skill sync)",
)
profile_create.add_argument(
"--description",
default=None,
help="One- or two-sentence description of what this profile is good at. "
"Used by the kanban decomposer to route tasks based on role instead "
"of profile name alone. Skip and add later via `hermes profile describe`.",
)
profile_delete = profile_subparsers.add_parser("delete", help="Delete a profile")
profile_delete.add_argument("profile_name", help="Profile to delete")
@ -12083,6 +12192,40 @@ Examples:
"-y", "--yes", action="store_true", help="Skip confirmation prompt"
)
profile_describe = profile_subparsers.add_parser(
"describe",
help="Read or set a profile's description (used by the kanban orchestrator)",
)
profile_describe.add_argument(
"profile_name",
nargs="?",
default=None,
help="Profile to describe (omit + use --all --auto to sweep)",
)
profile_describe.add_argument(
"--text",
default=None,
help="Set description to this exact text (overwrites any existing description)",
)
profile_describe.add_argument(
"--auto",
action="store_true",
help="Auto-generate description via the auxiliary LLM "
"(uses auxiliary.profile_describer)",
)
profile_describe.add_argument(
"--overwrite",
action="store_true",
help="With --auto, replace user-authored descriptions too (default: only "
"fill in missing or previously-auto descriptions)",
)
profile_describe.add_argument(
"--all",
dest="all_missing",
action="store_true",
help="With --auto, run on every profile missing a description",
)
profile_show = profile_subparsers.add_parser("show", help="Show profile details")
profile_show.add_argument("profile_name", help="Profile to show")

View file

@ -1688,7 +1688,26 @@ def list_authenticated_providers(
continue
# Live model discovery from custom provider endpoints (matches
# Section 3 behavior for user ``providers:`` entries).
if api_url and api_key:
# Also probes when no api_key is set (e.g. local llama.cpp /
# Ollama servers) — the /models endpoint often works without
# auth. The CLI's _model_flow_named_custom always probes, so
# the Telegram/Discord picker should do the same for parity.
# Live-discovery policy:
# - With an api_key, the user has explicitly opted into the
# endpoint and live /models is the source of truth — replace
# the (possibly partial) ``models:`` subset configured for
# context-length overrides with the full live catalog.
# This is the Bifrost / aggregator-gateway case.
# - Without an api_key but with an explicit ``models:`` list
# (or top-level ``model:``), the user is narrowing a public
# endpoint to a specific subset (e.g. ollama.com /v1/models
# returns 35 models but the user only wants 4). Preserve the
# explicit list and skip live discovery.
# - Without an api_key AND no explicit models, fall through to
# live discovery so bare-endpoint custom providers (local
# llama.cpp / Ollama servers) still appear populated.
should_probe = bool(api_url) and (bool(api_key) or not grp["models"])
if should_probe:
try:
from hermes_cli.models import fetch_api_models

View file

@ -608,6 +608,38 @@ class PluginContext:
self.manifest.name, provider.name,
)
# -- browser provider registration ---------------------------------------
def register_browser_provider(self, provider) -> None:
"""Register a cloud browser backend.
``provider`` must be an instance of
:class:`agent.browser_provider.BrowserProvider`. The
``provider.name`` attribute is what ``browser.cloud_provider`` in
``config.yaml`` matches against when routing cloud-mode
``browser_*`` tool calls.
Mirrors :meth:`register_web_search_provider` exactly same
registration shape, same gating, same logging. The browser
subsystem's dispatcher (:func:`tools.browser_tool._get_cloud_provider`)
consults the registry built up by these calls.
"""
from agent.browser_provider import BrowserProvider
from agent.browser_registry import register_provider as _register_browser_provider
if not isinstance(provider, BrowserProvider):
logger.warning(
"Plugin '%s' tried to register a browser provider that does "
"not inherit from BrowserProvider. Ignoring.",
self.manifest.name,
)
return
_register_browser_provider(provider)
logger.info(
"Plugin '%s' registered browser provider: %s",
self.manifest.name, provider.name,
)
# -- platform adapter registration ---------------------------------------
def register_platform(

View file

@ -0,0 +1,299 @@
"""Profile describer — auto-generate ``description`` for a profile.
Used by ``hermes profile describe <name> --auto`` and the dashboard's
"auto-generate description" button. Reads the profile's installed
skills, model+provider, name, and optionally a small slice of memory,
then asks the auxiliary LLM to produce a 1-2 sentence description of
what the profile is good at.
Result is written to ``<profile_dir>/profile.yaml`` with
``description_auto: true`` so the dashboard can surface a "review"
badge. User can edit afterward to confirm.
Design notes
------------
- Mirrors the shape of ``hermes_cli/kanban_specify.py``: lazy aux
client import inside the function, lenient response parse, never
raises on expected failure modes.
- Reads at most ``MAX_SKILLS_FOR_PROMPT`` skill names to keep the
prompt bounded. No skill body names + categories are enough
signal and avoid blowing context on profiles with 100+ skills.
- Memory is intentionally NOT read here. Memories are personal and
the orchestrator routes work to a *role* not a *biography*. If we
find later that memory adds signal we can wire it; for now,
skills + name + model is plenty.
"""
from __future__ import annotations
import json
import logging
import os
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
from hermes_cli import profiles as profiles_mod
logger = logging.getLogger(__name__)
# Cap on how many skill names we feed the LLM. Profiles with 200+
# skills (uncommon but possible) would blow context otherwise. The cap
# is per-category — see _collect_skills.
MAX_SKILLS_FOR_PROMPT = 60
_SYSTEM_PROMPT = """You are a profile-describer for the Hermes Agent kanban board.
A user runs multiple "profiles" distinct agent identities, each with their
own skills, model, and configuration. The kanban board's orchestrator routes
work to whichever profile best fits each task. To do that well, every
profile needs a short, concrete description of what it's good at.
You are given a profile's:
- Name
- Model / provider
- List of installed skill names (a strong signal of role / domain)
Produce a single JSON object with exactly one key:
{
"description": "<1-2 sentence description, plain prose, no preamble>"
}
Rules:
- The description is what an orchestrator will read to decide whether to
route a task here. Lead with the profile's strongest capability.
- Stay concrete. Bad: "an AI agent that helps users."
Good: "Reads and modifies Python codebases — runs tests,
refactors functions, opens GitHub PRs."
- 1-2 sentences, <= 280 characters total.
- Never invent capabilities the skills don't suggest.
- Never write "Hermes Agent profile" or other meta-narration.
- No code fences, no preamble, no closing remarks. Output only JSON.
"""
_USER_TEMPLATE = """Profile name: {name}
Default model: {model}
Provider: {provider}
Installed skill count: {skill_count}
Notable skills (up to {skill_cap}):
{skill_list}
"""
_FENCE_RE = re.compile(r"^```(?:json)?\s*|\s*```$", re.MULTILINE)
@dataclass
class DescribeOutcome:
"""Result of describing a single profile."""
profile_name: str
ok: bool
reason: str = ""
description: Optional[str] = None
def _collect_skills(profile_dir: Path) -> list[str]:
"""Return a stable, capped list of skill names for the prompt.
Format: ``category/skill_name`` where category is the immediate
subdir under ``skills/`` (e.g. ``devops``, ``research``). Skills
that live directly under ``skills/`` show as bare ``skill_name``.
"""
skills_dir = profile_dir / "skills"
if not skills_dir.is_dir():
return []
names: list[str] = []
for md in skills_dir.rglob("SKILL.md"):
path_str = str(md)
if "/.hub/" in path_str or "/.git/" in path_str:
continue
try:
rel = md.relative_to(skills_dir)
except ValueError:
continue
parts = rel.parts[:-1] # drop SKILL.md filename
if not parts:
continue
# parts[-1] is the skill dir name; parts[:-1] is the category path
if len(parts) == 1:
names.append(parts[0])
else:
names.append(f"{parts[0]}/{parts[-1]}")
names.sort()
# Keep within prompt budget. Skills earlier in alphabet aren't more
# important — we'll let the LLM see a sample. Pick evenly-spaced
# entries instead of just the head so a profile with skills A..Z
# doesn't get described as "starts with A".
if len(names) <= MAX_SKILLS_FOR_PROMPT:
return names
step = len(names) / MAX_SKILLS_FOR_PROMPT
sampled = [names[int(i * step)] for i in range(MAX_SKILLS_FOR_PROMPT)]
return sampled
def _extract_json_blob(raw: str) -> Optional[dict]:
if not raw:
return None
stripped = _FENCE_RE.sub("", raw.strip())
first = stripped.find("{")
last = stripped.rfind("}")
if first == -1 or last == -1 or last <= first:
return None
candidate = stripped[first : last + 1]
try:
val = json.loads(candidate)
except (ValueError, json.JSONDecodeError):
return None
if not isinstance(val, dict):
return None
return val
def describe_profile(
profile_name: str,
*,
overwrite: bool = False,
timeout: Optional[int] = None,
) -> DescribeOutcome:
"""Auto-generate a description for one profile.
Returns an outcome describing what happened. Never raises for
expected failure modes (profile missing, no aux client configured,
API error, malformed response) those surface via ``ok=False`` so
a sweep can continue past individual failures.
``overwrite`` controls whether an existing user-authored description
is replaced. By default we refuse to overwrite a description with
``description_auto: false`` to protect curated text. Auto-generated
descriptions (``description_auto: true``) are always replaceable.
"""
canon = profiles_mod.normalize_profile_name(profile_name)
if not profiles_mod.profile_exists(canon):
# Special case: "default" exists as a virtual profile name
# mapped to the default home dir. profile_exists() handles it.
return DescribeOutcome(canon, False, "profile not found")
try:
if canon == "default":
from hermes_constants import get_hermes_home # type: ignore
profile_dir = Path(get_hermes_home())
else:
profile_dir = profiles_mod.get_profile_dir(canon)
except Exception as exc:
return DescribeOutcome(canon, False, f"cannot resolve profile dir: {exc}")
# Honor curated descriptions unless --overwrite.
existing = profiles_mod.read_profile_meta(profile_dir)
if existing.get("description") and not existing.get("description_auto") and not overwrite:
return DescribeOutcome(
canon,
False,
"profile already has a user-authored description "
"(use --overwrite to replace)",
)
skill_names = _collect_skills(profile_dir)
skill_list = "\n".join(f" - {n}" for n in skill_names) or " (no skills installed)"
skill_count = sum(
1 for _ in (profile_dir / "skills").rglob("SKILL.md")
if "/.hub/" not in str(_) and "/.git/" not in str(_)
) if (profile_dir / "skills").is_dir() else 0
# Read model + provider from the profile's config.
try:
model, provider = profiles_mod._read_config_model(profile_dir)
except Exception:
model, provider = None, None
try:
from agent.auxiliary_client import ( # type: ignore
get_auxiliary_extra_body,
get_text_auxiliary_client,
)
except Exception as exc:
logger.debug("describe: auxiliary client import failed: %s", exc)
return DescribeOutcome(canon, False, "auxiliary client unavailable")
try:
client, aux_model = get_text_auxiliary_client("profile_describer")
except Exception as exc:
logger.debug("describe: get_text_auxiliary_client failed: %s", exc)
return DescribeOutcome(canon, False, "auxiliary client unavailable")
if client is None or not aux_model:
return DescribeOutcome(canon, False, "no auxiliary client configured")
user_msg = _USER_TEMPLATE.format(
name=canon,
model=(model or "(unset)"),
provider=(provider or "(unset)"),
skill_count=skill_count,
skill_cap=MAX_SKILLS_FOR_PROMPT,
skill_list=skill_list,
)
try:
resp = client.chat.completions.create(
model=aux_model,
messages=[
{"role": "system", "content": _SYSTEM_PROMPT},
{"role": "user", "content": user_msg},
],
temperature=0.3,
max_tokens=400,
timeout=timeout or 60,
extra_body=get_auxiliary_extra_body() or None,
)
except Exception as exc:
logger.info("describe: API call failed for %s (%s)", canon, exc)
return DescribeOutcome(canon, False, f"LLM error: {type(exc).__name__}")
try:
raw = resp.choices[0].message.content or ""
except Exception:
raw = ""
parsed = _extract_json_blob(raw)
if parsed is None:
# Fall back: take the raw text trimmed to one paragraph.
text = raw.strip().split("\n\n", 1)[0]
if not text:
return DescribeOutcome(canon, False, "LLM returned an empty response")
description = text[:280]
else:
val = parsed.get("description")
if not isinstance(val, str) or not val.strip():
return DescribeOutcome(
canon, False, "LLM response missing 'description' field"
)
description = val.strip()[:280]
try:
profiles_mod.write_profile_meta(
profile_dir,
description=description,
description_auto=True,
)
except Exception as exc:
return DescribeOutcome(canon, False, f"failed to write profile.yaml: {exc}")
return DescribeOutcome(canon, True, "described", description=description)
def list_describable_profiles(*, missing_only: bool = True) -> list[str]:
"""Return profile names that can be described.
``missing_only=True`` (default) returns only profiles without a
description. ``missing_only=False`` returns every profile.
"""
out: list[str] = []
for p in profiles_mod.list_profiles():
if missing_only and (p.description or "").strip() and not p.description_auto:
continue
out.append(p.name)
return out

View file

@ -412,6 +412,17 @@ class ProfileInfo:
distribution_name: Optional[str] = None
distribution_version: Optional[str] = None
distribution_source: Optional[str] = None
# Free-form description (1-2 sentences) of what this profile is good
# at. Persisted in ``<profile_dir>/profile.yaml``. Empty when the
# user has not described the profile (legacy profiles, fresh
# installs). Surfaced to the kanban decomposer so it can route work
# to the right profile based on role rather than name alone.
description: str = ""
# When True, ``description`` was auto-generated by the LLM
# describer and has not been confirmed by the user. The dashboard
# surfaces a "review" badge in this case so the user can edit or
# accept.
description_auto: bool = False
def _read_distribution_meta(profile_dir: Path) -> tuple:
@ -479,6 +490,82 @@ def _count_skills(profile_dir: Path) -> int:
return count
# ---------------------------------------------------------------------------
# profile.yaml — per-profile metadata (description, role, etc.)
# ---------------------------------------------------------------------------
#
# We keep this file deliberately tiny and separate from the profile's
# ``config.yaml``. ``config.yaml`` is the user-facing Hermes config
# (~5000 lines of defaults); ``profile.yaml`` is metadata ABOUT the
# profile itself (its role, who described it). Mixing them makes both
# harder to read.
#
# Missing file -> empty defaults; never an error. The kanban decomposer
# tolerates empty descriptions and just falls back to the profile name.
def _profile_yaml_path(profile_dir: Path) -> Path:
return profile_dir / "profile.yaml"
def read_profile_meta(profile_dir: Path) -> dict:
"""Read ``<profile_dir>/profile.yaml`` and return a dict.
Returns ``{"description": "", "description_auto": False}`` when the
file is missing or unreadable. Never raises a corrupt
profile.yaml on an unrelated profile must not break
``hermes profile list``.
"""
path = _profile_yaml_path(profile_dir)
if not path.is_file():
return {"description": "", "description_auto": False}
try:
import yaml
with open(path, "r", encoding="utf-8") as f:
data = yaml.safe_load(f) or {}
except Exception:
return {"description": "", "description_auto": False}
if not isinstance(data, dict):
return {"description": "", "description_auto": False}
return {
"description": str(data.get("description") or "").strip(),
"description_auto": bool(data.get("description_auto", False)),
}
def write_profile_meta(
profile_dir: Path,
*,
description: Optional[str] = None,
description_auto: Optional[bool] = None,
) -> None:
"""Update ``<profile_dir>/profile.yaml`` in place.
Only the explicitly passed fields are overwritten; unspecified
fields preserve existing values. Creates the file if missing.
Profile directory itself must exist.
"""
if not profile_dir.is_dir():
raise FileNotFoundError(f"profile directory does not exist: {profile_dir}")
import yaml
path = _profile_yaml_path(profile_dir)
existing: dict = {}
if path.is_file():
try:
with open(path, "r", encoding="utf-8") as f:
loaded = yaml.safe_load(f) or {}
if isinstance(loaded, dict):
existing = loaded
except Exception:
existing = {}
if description is not None:
existing["description"] = description.strip()
if description_auto is not None:
existing["description_auto"] = bool(description_auto)
with open(path, "w", encoding="utf-8") as f:
yaml.safe_dump(existing, f, sort_keys=False, default_flow_style=False)
# ---------------------------------------------------------------------------
# CRUD operations
# ---------------------------------------------------------------------------
@ -493,6 +580,7 @@ def list_profiles() -> List[ProfileInfo]:
if default_home.is_dir():
model, provider = _read_config_model(default_home)
dist_name, dist_version, dist_source = _read_distribution_meta(default_home)
meta = read_profile_meta(default_home)
profiles.append(ProfileInfo(
name="default",
path=default_home,
@ -505,6 +593,8 @@ def list_profiles() -> List[ProfileInfo]:
distribution_name=dist_name,
distribution_version=dist_version,
distribution_source=dist_source,
description=meta.get("description", ""),
description_auto=meta.get("description_auto", False),
))
# Named profiles
@ -519,6 +609,7 @@ def list_profiles() -> List[ProfileInfo]:
model, provider = _read_config_model(entry)
alias_path = wrapper_dir / name
dist_name, dist_version, dist_source = _read_distribution_meta(entry)
meta = read_profile_meta(entry)
profiles.append(ProfileInfo(
name=name,
path=entry,
@ -532,6 +623,8 @@ def list_profiles() -> List[ProfileInfo]:
distribution_name=dist_name,
distribution_version=dist_version,
distribution_source=dist_source,
description=meta.get("description", ""),
description_auto=meta.get("description_auto", False),
))
return profiles
@ -544,6 +637,7 @@ def create_profile(
clone_config: bool = False,
no_alias: bool = False,
no_skills: bool = False,
description: Optional[str] = None,
) -> Path:
"""Create a new profile directory.
@ -667,6 +761,19 @@ def create_profile(
except OSError:
pass # best-effort — the feature still works via the empty skills/ dir
# Persist description if the caller provided one. Done last so a
# partial-create failure doesn't strand a description file in an
# incomplete profile.
if description and description.strip():
try:
write_profile_meta(
profile_dir,
description=description.strip(),
description_auto=False,
)
except Exception:
pass # non-fatal — user can describe later with `hermes profile describe`
return profile_dir

View file

@ -81,6 +81,21 @@ class UpstreamAdapter(ABC):
refresh fails. The proxy will return 401 to the client.
"""
def get_retry_credential(
self,
*,
failed_credential: UpstreamCredential,
status_code: int,
) -> Optional[UpstreamCredential]:
"""Return an alternate credential after an upstream auth failure.
The default is no retry. Providers can override this for one-shot
fallback paths, such as switching from a preferred token type to a
legacy bearer after the upstream rejects the first request.
"""
_ = failed_credential, status_code
return None
def describe(self) -> str:
"""One-line status summary for ``proxy status``."""
try:

View file

@ -1,12 +1,13 @@
"""Nous Portal upstream adapter.
Reads the user's Nous OAuth state from ``~/.hermes/auth.json``, refreshes
the access token and mints a fresh agent key when needed, and exposes the
upstream base URL plus minted bearer for the proxy server to forward to.
Reads the user's Nous OAuth state from ``~/.hermes/auth.json`` through the
shared runtime resolver, refreshes the access token and resolves the
``agent_key`` compatibility credential when needed, then exposes the upstream
base URL plus bearer for the proxy server to forward to.
The minted ``agent_key`` (not the OAuth ``access_token``) is what
``inference-api.nousresearch.com`` accepts as a bearer. The refresh helper
already handles both see :func:`hermes_cli.auth.refresh_nous_oauth_from_state`.
The ``agent_key`` field may hold either a NAS invoke JWT or the legacy
opaque session key. The refresh helper handles both see
:func:`hermes_cli.auth.resolve_nous_runtime_credentials`.
"""
from __future__ import annotations
@ -16,11 +17,18 @@ import threading
from typing import Any, Dict, FrozenSet, Optional
from hermes_cli.auth import (
AuthError,
DEFAULT_NOUS_INFERENCE_URL,
NOUS_INFERENCE_AUTH_MODE_AUTO,
NOUS_INFERENCE_AUTH_MODE_LEGACY,
_load_auth_store,
_auth_store_lock,
_is_terminal_nous_refresh_error,
_quarantine_nous_oauth_state,
_quarantine_nous_pool_entries,
_save_auth_store,
_write_shared_nous_state,
refresh_nous_oauth_from_state,
resolve_nous_runtime_credentials,
)
from hermes_cli.proxy.adapters.base import UpstreamAdapter, UpstreamCredential
@ -43,9 +51,8 @@ class NousPortalAdapter(UpstreamAdapter):
"""Proxy upstream for the Nous Portal inference API."""
def __init__(self) -> None:
# Lock guards _load → refresh → _save against parallel proxy requests
# racing to refresh expired tokens. Refresh itself is HTTP, so we
# hold the lock across the network call (brief; OAuth refresh is fast).
# Serialize proxy requests in this process; cross-process token refresh
# and persistence are handled by resolve_nous_runtime_credentials().
self._lock = threading.Lock()
@property
@ -72,6 +79,26 @@ class NousPortalAdapter(UpstreamAdapter):
)
def get_credential(self) -> UpstreamCredential:
return self._get_credential(
inference_auth_mode=NOUS_INFERENCE_AUTH_MODE_AUTO,
)
def get_retry_credential(
self,
*,
failed_credential: UpstreamCredential,
status_code: int,
) -> Optional[UpstreamCredential]:
if status_code != 401:
return None
if failed_credential.bearer.count(".") != 2:
return None
logger.info("proxy: Nous upstream rejected bearer; retrying with legacy session key")
return self._get_credential(
inference_auth_mode=NOUS_INFERENCE_AUTH_MODE_LEGACY,
)
def _get_credential(self, *, inference_auth_mode: str) -> UpstreamCredential:
with self._lock:
state = self._read_state()
if state is None:
@ -80,28 +107,43 @@ class NousPortalAdapter(UpstreamAdapter):
)
try:
refreshed = refresh_nous_oauth_from_state(state)
refreshed = resolve_nous_runtime_credentials(
inference_auth_mode=inference_auth_mode,
)
except AuthError as exc:
if _is_terminal_nous_refresh_error(exc):
_quarantine_nous_oauth_state(
state,
exc,
reason="proxy_refresh_failure",
)
self._save_state(
state,
quarantine_error=exc,
quarantine_reason="proxy_refresh_failure",
)
raise RuntimeError(
f"Failed to refresh Nous Portal credentials: {exc}"
) from exc
except Exception as exc:
raise RuntimeError(
f"Failed to refresh Nous Portal credentials: {exc}"
) from exc
self._save_state(refreshed)
agent_key = refreshed.get("agent_key")
agent_key = refreshed.get("api_key")
if not agent_key:
raise RuntimeError(
"Nous Portal refresh did not return a usable agent_key. "
"Try `hermes login nous` to re-authenticate."
)
base_url = refreshed.get("inference_base_url") or DEFAULT_NOUS_INFERENCE_URL
base_url = refreshed.get("base_url") or DEFAULT_NOUS_INFERENCE_URL
base_url = base_url.rstrip("/")
return UpstreamCredential(
bearer=agent_key,
base_url=base_url,
expires_at=refreshed.get("agent_key_expires_at"),
expires_at=refreshed.get("expires_at"),
)
# ------------------------------------------------------------------
@ -111,7 +153,8 @@ class NousPortalAdapter(UpstreamAdapter):
def _read_state(self) -> Optional[Dict[str, Any]]:
try:
store = _load_auth_store()
with _auth_store_lock():
store = _load_auth_store()
except Exception as exc:
logger.warning("proxy: failed to load auth store: %s", exc)
return None
@ -121,17 +164,28 @@ class NousPortalAdapter(UpstreamAdapter):
return None
return dict(state) # copy so the refresh helper can mutate freely
def _save_state(self, state: Dict[str, Any]) -> None:
def _save_state(
self,
state: Dict[str, Any],
*,
quarantine_error: Optional[AuthError] = None,
quarantine_reason: Optional[str] = None,
) -> None:
try:
store = _load_auth_store()
providers = store.setdefault("providers", {})
providers["nous"] = state
_save_auth_store(store)
with _auth_store_lock():
store = _load_auth_store()
if quarantine_error is not None and quarantine_reason:
_quarantine_nous_pool_entries(
store,
quarantine_error,
reason=quarantine_reason,
)
providers = store.setdefault("providers", {})
providers["nous"] = state
_save_auth_store(store)
_write_shared_nous_state(state)
except Exception as exc:
# Best effort — we still return the fresh credential. The next
# request just won't see cached state, which means another refresh.
logger.warning("proxy: failed to persist refreshed Nous state: %s", exc)
logger.warning("proxy: failed to persist Nous quarantine state: %s", exc)
__all__ = ["NousPortalAdapter"]

View file

@ -114,7 +114,7 @@ def cmd_proxy(args: Any) -> int:
return cmd_proxy_start(args)
if sub == "status":
return cmd_proxy_status(args)
if sub in ("providers", "list"):
if sub in {"providers", "list"}:
return cmd_proxy_list_providers(args)
# No subcommand → print short help.
print(

View file

@ -26,7 +26,7 @@ except ImportError:
web = None # type: ignore[assignment]
AIOHTTP_AVAILABLE = False
from hermes_cli.proxy.adapters.base import UpstreamAdapter
from hermes_cli.proxy.adapters.base import UpstreamAdapter, UpstreamCredential
logger = logging.getLogger(__name__)
@ -76,7 +76,7 @@ def _filter_response_headers(headers) -> dict:
if key.lower() in _HOP_BY_HOP_HEADERS:
continue
# aiohttp recomputes Content-Encoding/Content-Length on stream — let it.
if key.lower() in ("content-encoding", "content-length"):
if key.lower() in {"content-encoding", "content-length"}:
continue
out[key] = value
return out
@ -136,50 +136,93 @@ def create_app(adapter: UpstreamAdapter) -> "web.Application":
logger.warning("proxy: credential resolution failed: %s", exc)
return _json_error(401, str(exc), code="upstream_auth_failed")
upstream_url = f"{cred.base_url.rstrip('/')}{rel_path}"
# Preserve query string verbatim.
if request.query_string:
upstream_url = f"{upstream_url}?{request.query_string}"
# Forward body verbatim. Read into memory once — request bodies for
# chat/completions/embeddings are small (<1MB typically). If we ever
# need to forward large multipart uploads we'll switch to streaming
# the request body too.
body = await request.read()
fwd_headers = _filter_request_headers(request.headers)
fwd_headers["Authorization"] = f"{cred.token_type} {cred.bearer}"
logger.debug(
"proxy: forwarding %s %s -> %s (body=%d bytes)",
request.method, rel_path, upstream_url, len(body),
)
# Use a per-request session so connection state doesn't leak between
# clients. Could be optimized to a shared session later.
timeout = aiohttp.ClientTimeout(total=None, sock_connect=15, sock_read=300)
try:
session = aiohttp.ClientSession(timeout=timeout)
except Exception as exc: # pragma: no cover - aiohttp setup issue
return _json_error(500, f"proxy session init failed: {exc}")
try:
upstream_resp = await session.request(
request.method,
upstream_url,
data=body if body else None,
headers=fwd_headers,
allow_redirects=False,
async def _send_upstream(active_cred: UpstreamCredential):
upstream_url = f"{active_cred.base_url.rstrip('/')}{rel_path}"
# Preserve query string verbatim.
if request.query_string:
upstream_url = f"{upstream_url}?{request.query_string}"
fwd_headers = _filter_request_headers(request.headers)
fwd_headers["Authorization"] = f"{active_cred.token_type} {active_cred.bearer}"
logger.debug(
"proxy: forwarding %s %s -> %s (body=%d bytes)",
request.method, rel_path, upstream_url, len(body),
)
except aiohttp.ClientError as exc:
await session.close()
logger.warning("proxy: upstream connection failed: %s", exc)
return _json_error(502, f"upstream connection failed: {exc}",
code="upstream_unreachable")
except asyncio.TimeoutError:
await session.close()
return _json_error(504, "upstream request timed out",
code="upstream_timeout")
try:
session = aiohttp.ClientSession(timeout=timeout)
except Exception as exc: # pragma: no cover - aiohttp setup issue
raise RuntimeError(f"proxy session init failed: {exc}") from exc
try:
upstream_resp = await session.request(
request.method,
upstream_url,
data=body if body else None,
headers=fwd_headers,
allow_redirects=False,
)
except Exception:
await session.close()
raise
return session, upstream_resp
async def _open_upstream(active_cred: UpstreamCredential):
try:
return await _send_upstream(active_cred)
except RuntimeError as exc:
return _json_error(500, str(exc)), None
except aiohttp.ClientError as exc:
logger.warning("proxy: upstream connection failed: %s", exc)
return (
_json_error(
502,
f"upstream connection failed: {exc}",
code="upstream_unreachable",
),
None,
)
except asyncio.TimeoutError:
return (
_json_error(
504,
"upstream request timed out",
code="upstream_timeout",
),
None,
)
session_or_response, upstream_resp = await _open_upstream(cred)
if upstream_resp is None:
return session_or_response
session = session_or_response
if upstream_resp.status == 401:
try:
retry_cred = adapter.get_retry_credential(
failed_credential=cred,
status_code=upstream_resp.status,
)
except Exception as exc:
logger.warning("proxy: retry credential resolution failed: %s", exc)
retry_cred = None
if retry_cred is not None:
upstream_resp.release()
await session.close()
session_or_response, upstream_resp = await _open_upstream(retry_cred)
if upstream_resp is None:
return session_or_response
session = session_or_response
# Stream response back. Headers first, then chunked body.
resp = web.StreamResponse(

View file

@ -209,7 +209,7 @@ def _maybe_apply_codex_app_server_runtime(
Returns the (possibly-rewritten) api_mode."""
if not model_cfg:
return api_mode
if provider not in ("openai", "openai-codex"):
if provider not in {"openai", "openai-codex"}:
return api_mode
runtime = str(model_cfg.get("openai_runtime") or "").strip().lower()
if runtime == "codex_app_server":
@ -875,10 +875,9 @@ def _resolve_explicit_runtime(
explicit_base_url
or str(state.get("inference_base_url") or auth_mod.DEFAULT_NOUS_INFERENCE_URL).strip().rstrip("/")
)
# Only use agent_key for inference — access_token is an OAuth token for the
# portal API (minting keys, refreshing tokens), not for the inference API.
# Falling back to access_token sends an OAuth bearer token to the inference
# endpoint, which returns 404 because it is not a valid inference credential.
# Only use the agent_key compatibility field for inference. It may be
# either a NAS invoke JWT or a legacy opaque session key; raw OAuth
# access_token fallback is handled by resolve_nous_runtime_credentials().
api_key = explicit_api_key or str(state.get("agent_key") or "").strip()
expires_at = state.get("agent_key_expires_at") or state.get("expires_at")
if not api_key:
@ -1069,17 +1068,19 @@ def resolve_runtime_provider(
getattr(entry, "runtime_api_key", None)
or getattr(entry, "access_token", "")
)
# For Nous, the pool entry's runtime_api_key is the agent_key — a
# short-lived inference credential (~30 min TTL). The pool doesn't
# For Nous, the pool entry's runtime_api_key is the agent_key
# compatibility field: either an invoke JWT or legacy opaque key.
# The pool doesn't
# refresh it during selection (that would trigger network calls in
# non-runtime contexts like `hermes auth list`). If the key is
# expired, clear pool_api_key so we fall through to
# resolve_nous_runtime_credentials() which handles refresh + mint.
# resolve_nous_runtime_credentials() which handles refresh + fallback.
if provider == "nous" and entry is not None and pool_api_key:
min_ttl = max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800")))
nous_state = {
"agent_key": getattr(entry, "agent_key", None),
"agent_key_expires_at": getattr(entry, "agent_key_expires_at", None),
"scope": getattr(entry, "scope", None),
}
if not _agent_key_is_usable(nous_state, min_ttl):
logger.debug("Nous pool entry agent_key expired/missing, falling through to runtime resolution")

View file

@ -171,7 +171,7 @@ def _recent_window(
cut = 0
for i in range(len(messages) - 1, -1, -1):
msg = messages[i]
if isinstance(msg, Mapping) and msg.get("role") in ("user", "assistant"):
if isinstance(msg, Mapping) and msg.get("role") in {"user", "assistant"}:
count += 1
if count >= window:
cut = i

View file

@ -259,6 +259,27 @@ def show_status(args):
if minimax_status.get("error") and not minimax_logged_in:
print(f" Error: {minimax_status.get('error')}")
# xAI OAuth — separate try/except so an import failure here cannot
# disrupt the already-printed Nous/Codex/Qwen/MiniMax rows above.
try:
from hermes_cli.auth import get_xai_oauth_auth_status
xai_oauth_status = get_xai_oauth_auth_status() or {}
except Exception:
xai_oauth_status = {}
xai_oauth_logged_in = bool(xai_oauth_status.get("logged_in"))
print(
f" {'xAI OAuth':<12} {check_mark(xai_oauth_logged_in)} "
f"{'logged in' if xai_oauth_logged_in else 'not logged in (run: hermes auth add xai-oauth)'}"
)
xai_auth_file = xai_oauth_status.get("auth_store")
if xai_auth_file:
print(f" Auth file: {xai_auth_file}")
if xai_oauth_status.get("last_refresh"):
print(f" Refreshed: {_format_iso_timestamp(xai_oauth_status.get('last_refresh'))}")
if xai_oauth_status.get("error") and not xai_oauth_logged_in:
print(f" Error: {xai_oauth_status.get('error')}")
# =========================================================================
# Nous Subscription Features
# =========================================================================

View file

@ -88,12 +88,40 @@ CONFIGURABLE_TOOLSETS = [
# who want it opt in via `hermes tools` → Video Generation, which walks
# them through provider + model selection.
#
# X search is off by default — gated on xAI credentials (SuperGrok OAuth
# or XAI_API_KEY). Users opt in via `hermes tools` → X (Twitter) Search,
# which walks them through credential setup. The tool's check_fn means
# the schema won't appear to the model even if enabled without credentials.
# X search is off by default for users without xAI credentials, but
# auto-enables when SuperGrok OAuth tokens are stored OR XAI_API_KEY is
# set — mirroring the HASS_TOKEN → homeassistant auto-enable below. The
# `hermes tools` → X (Twitter) Search setup walks users through credential
# setup. The tool's check_fn means the schema still won't appear to the
# model if the credential later goes missing or expires.
_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "spotify", "discord", "discord_admin", "video", "video_gen", "x_search"}
def _xai_credentials_present() -> bool:
"""Cheap, side-effect-free check for usable xAI credentials.
Used to auto-enable the ``x_search`` toolset when the user has either
completed xAI Grok OAuth (SuperGrok subscription) or set
``XAI_API_KEY``. Does NOT hit the network only inspects the local
auth store and environment. The tool's runtime ``check_fn`` still
gates schema registration if creds later expire or get revoked.
"""
try:
from hermes_cli.auth import _read_xai_oauth_tokens
_read_xai_oauth_tokens()
return True
except Exception:
pass
try:
from tools.xai_http import get_env_value as _xai_get_env_value
if str(_xai_get_env_value("XAI_API_KEY") or "").strip():
return True
except Exception:
pass
return bool(str(os.environ.get("XAI_API_KEY") or "").strip())
# Platform-scoped toolsets: only appear in the `hermes tools` checklist for
# these platforms, and only resolve/save for these platforms. A toolset
# absent from this map is available on every platform (current behaviour).
@ -350,6 +378,17 @@ TOOL_CATEGORIES = {
"browser": {
"name": "Browser Automation",
"icon": "🌐",
# Per-provider rows for Browserbase, Browser Use, and Firecrawl are
# injected at runtime from plugins.browser.<vendor>.provider via
# _plugin_browser_providers() in _visible_providers(). Only
# non-provider UX setup-flow rows remain here:
# - "Nous Subscription (Browser Use cloud)" — managed Browser Use
# billed via Nous subscription (requires_nous_auth +
# override_env_vars). Uses the browser-use plugin as the
# underlying backend but has a distinct setup UX.
# - "Local Browser" — non-cloud option, no CloudBrowserProvider.
# - "Camofox" — anti-detection local Firefox; short-circuits the
# cloud-provider dispatch path via _is_camofox_mode().
"providers": [
{
"name": "Nous Subscription (Browser Use cloud)",
@ -370,37 +409,6 @@ TOOL_CATEGORIES = {
"browser_provider": "local",
"post_setup": "agent_browser",
},
{
"name": "Browserbase",
"badge": "paid",
"tag": "Cloud browser with stealth and proxies",
"env_vars": [
{"key": "BROWSERBASE_API_KEY", "prompt": "Browserbase API key", "url": "https://browserbase.com"},
{"key": "BROWSERBASE_PROJECT_ID", "prompt": "Browserbase project ID"},
],
"browser_provider": "browserbase",
"post_setup": "agent_browser",
},
{
"name": "Browser Use",
"badge": "paid",
"tag": "Cloud browser with remote execution",
"env_vars": [
{"key": "BROWSER_USE_API_KEY", "prompt": "Browser Use API key", "url": "https://browser-use.com"},
],
"browser_provider": "browser-use",
"post_setup": "agent_browser",
},
{
"name": "Firecrawl",
"badge": "paid",
"tag": "Cloud browser with remote execution",
"env_vars": [
{"key": "FIRECRAWL_API_KEY", "prompt": "Firecrawl API key", "url": "https://firecrawl.dev"},
],
"browser_provider": "firecrawl",
"post_setup": "agent_browser",
},
{
"name": "Camofox",
"badge": "free · local",
@ -1170,6 +1178,23 @@ def _get_platform_tools(
if ts_tools and ts_tools.issubset(all_tool_names):
enabled_toolsets.add(ts_key)
# Auto-enable ``x_search`` when xAI credentials are configured.
# Unlike ``homeassistant`` (whose ``ha_*`` tools live inside the
# platform composite and thus pass the subset check above),
# ``x_search`` is its own one-tool toolset that the composite does
# NOT include, so the subset loop never picks it up. Inject it
# directly here, mirroring the HASS_TOKEN → ``homeassistant`` rule
# below: once you have working creds, you don't have to also click
# through ``hermes tools`` to flip the toolset on. Only fires when
# the user has not yet saved an explicit toolset list — once they
# do, the saved list is authoritative.
x_search_auto_enabled = (
_toolset_allowed_for_platform("x_search", platform)
and _xai_credentials_present()
)
if x_search_auto_enabled:
enabled_toolsets.add("x_search")
default_off = set(_DEFAULT_OFF_TOOLSETS)
# Legacy safety: if the platform's own name matches a default-off
# toolset (e.g. `homeassistant` platform + `homeassistant` toolset),
@ -1187,6 +1212,11 @@ def _get_platform_tools(
# regressed after #14798 made cron honor per-platform tool config.
if "homeassistant" in default_off and os.getenv("HASS_TOKEN"):
default_off.remove("homeassistant")
# Symmetric carve-out for x_search auto-enable (see the inject
# block above). Without this, the default_off subtraction would
# strip the entry we just added.
if x_search_auto_enabled and "x_search" in default_off:
default_off.remove("x_search")
enabled_toolsets -= default_off
# Recover non-configurable platform toolsets (e.g. discord, feishu_doc,
@ -1653,6 +1683,61 @@ def _plugin_web_search_providers() -> list[dict]:
return rows
# Mirror of _plugin_web_search_providers for cloud browser backends. After
# PR #25214, Browserbase / Browser Use / Firecrawl live as plugins under
# plugins/browser/<vendor>/; this helper is the sole source of provider rows
# for those three in the "Browser Automation" picker. The hardcoded
# ``TOOL_CATEGORIES["browser"]`` entries that drove the category before
# were deleted in the same PR; only non-provider UX setup-flow rows remain
# ("Nous Subscription", "Local Browser", "Camofox") — see the comment block
# in ``TOOL_CATEGORIES["browser"]`` for why each one stays hardcoded.
def _plugin_browser_providers() -> list[dict]:
"""Build picker-row dicts from plugin-registered cloud browser providers.
Each returned dict mirrors the legacy ``TOOL_CATEGORIES["browser"]``
schema (``name`` / ``badge`` / ``tag`` / ``env_vars`` /
``browser_provider`` / ``post_setup``) so the picker behaves identically
whether a provider was hardcoded or plugin-registered.
Populates ``browser_provider`` (the legacy config key written to
``browser.cloud_provider``) and a ``browser_plugin_name`` marker so
setup / write paths can route through the registry when they want to.
"""
try:
from agent.browser_registry import list_providers as _list_browser_providers
from hermes_cli.plugins import _ensure_plugins_discovered
_ensure_plugins_discovered()
providers = _list_browser_providers()
except Exception:
return []
rows: list[dict] = []
for provider in providers:
name = getattr(provider, "name", None)
if not name:
continue
try:
schema = provider.get_setup_schema()
except Exception:
continue
if not isinstance(schema, dict):
continue
row = {
"name": schema.get("name", provider.display_name),
"badge": schema.get("badge", ""),
"tag": schema.get("tag", ""),
"env_vars": schema.get("env_vars", []),
"browser_provider": name,
"browser_plugin_name": name,
}
# Pass-through optional fields the schema can opt into.
if schema.get("post_setup"):
row["post_setup"] = schema["post_setup"]
rows.append(row)
return rows
def _visible_providers(cat: dict, config: dict) -> list[dict]:
"""Return provider entries visible for the current auth/config state."""
features = get_nous_subscription_features(config)
@ -1682,6 +1767,14 @@ def _visible_providers(cat: dict, config: dict) -> list[dict]:
if cat.get("name") == "Web Search & Extract":
visible.extend(_plugin_web_search_providers())
# Inject plugin-registered cloud browser backends. After PR #25214,
# Browserbase / Browser Use / Firecrawl are the plugin-supplied rows;
# the hardcoded "Nous Subscription" / "Local Browser" / "Camofox" rows
# stay because they're non-provider UX setup flows (subscription auth,
# local fallback, and the REST-API anti-detection backend respectively).
if cat.get("name") == "Browser Automation":
visible.extend(_plugin_browser_providers())
return visible
@ -2590,6 +2683,9 @@ def _reconfigure_provider(provider: dict, config: dict):
else:
_print_info(" Kept current")
if provider.get("post_setup"):
_run_post_setup(provider["post_setup"])
# Imagegen backends prompt for model selection on reconfig too.
plugin_name = provider.get("image_gen_plugin_name")
if plugin_name:

View file

@ -2609,7 +2609,11 @@ async def _start_device_code_flow(provider_id: str) -> Dict[str, Any]:
so the UI can render the verification page link + user code.
"""
if provider_id == "nous":
from hermes_cli.auth import _request_device_code, PROVIDER_REGISTRY
from hermes_cli.auth import (
_nous_device_scope_with_env_override,
_request_nous_device_code_with_scope_fallback,
PROVIDER_REGISTRY,
)
import httpx
pconfig = PROVIDER_REGISTRY["nous"]
portal_base_url = (
@ -2618,22 +2622,34 @@ async def _start_device_code_flow(provider_id: str) -> Dict[str, Any]:
or pconfig.portal_base_url
).rstrip("/")
client_id = pconfig.client_id
scope = pconfig.scope
scope, explicit_scope = _nous_device_scope_with_env_override(
None,
default_scope=pconfig.scope,
)
def _do_nous_device_request():
with httpx.Client(timeout=httpx.Timeout(15.0), headers={"Accept": "application/json"}) as client:
return _request_device_code(
with httpx.Client(
timeout=httpx.Timeout(15.0),
headers={"Accept": "application/json"},
) as client:
return _request_nous_device_code_with_scope_fallback(
client=client,
portal_base_url=portal_base_url,
client_id=client_id,
scope=scope,
allow_legacy_fallback=not explicit_scope,
)
device_data = await asyncio.get_running_loop().run_in_executor(None, _do_nous_device_request)
device_data, effective_scope = await asyncio.get_running_loop().run_in_executor(
None, _do_nous_device_request
)
sid, sess = _new_oauth_session("nous", "device_code")
sess["device_code"] = str(device_data["device_code"])
sess["interval"] = int(device_data["interval"])
sess["expires_at"] = time.time() + int(device_data["expires_in"])
sess["portal_base_url"] = portal_base_url
sess["client_id"] = client_id
sess["scope"] = effective_scope
threading.Thread(
target=_nous_poller, args=(sid,), daemon=True, name=f"oauth-poll-{sid[:6]}"
).start()
@ -2762,7 +2778,11 @@ async def _start_device_code_flow(provider_id: str) -> Dict[str, Any]:
def _nous_poller(session_id: str) -> None:
"""Background poller that drives a Nous device-code flow to completion."""
from hermes_cli.auth import _poll_for_token, refresh_nous_oauth_from_state
from hermes_cli.auth import (
NOUS_INFERENCE_AUTH_MODE_FRESH,
_poll_for_token,
refresh_nous_oauth_from_state,
)
from datetime import datetime, timezone
import httpx
with _oauth_sessions_lock:
@ -2773,6 +2793,7 @@ def _nous_poller(session_id: str) -> None:
client_id = sess["client_id"]
device_code = sess["device_code"]
interval = sess["interval"]
scope = sess.get("scope")
expires_in = max(60, int(sess["expires_at"] - time.time()))
try:
with httpx.Client(timeout=httpx.Timeout(15.0), headers={"Accept": "application/json"}) as client:
@ -2791,7 +2812,7 @@ def _nous_poller(session_id: str) -> None:
"portal_base_url": portal_base_url,
"inference_base_url": token_data.get("inference_base_url"),
"client_id": client_id,
"scope": token_data.get("scope"),
"scope": token_data.get("scope") or scope,
"token_type": token_data.get("token_type", "Bearer"),
"access_token": token_data["access_token"],
"refresh_token": token_data.get("refresh_token"),
@ -2803,8 +2824,11 @@ def _nous_poller(session_id: str) -> None:
"expires_in": token_ttl,
}
full_state = refresh_nous_oauth_from_state(
auth_state, min_key_ttl_seconds=300, timeout_seconds=15.0,
force_refresh=False, force_mint=True,
auth_state,
min_key_ttl_seconds=300,
timeout_seconds=15.0,
force_refresh=False,
inference_auth_mode=NOUS_INFERENCE_AUTH_MODE_FRESH,
)
from hermes_cli.auth import persist_nous_credentials
persist_nous_credentials(full_state)
@ -5381,4 +5405,7 @@ def start_server(
open_browser,
)
print(f" Hermes Web UI → http://{host}:{port}")
uvicorn.run(app, host=host, port=port, log_level="warning")
# proxy_headers=False so _ws_client_is_allowed sees the real connection peer
# rather than X-Forwarded-For's rewritten value (which would defeat the
# loopback gate when behind a reverse proxy).
uvicorn.run(app, host=host, port=port, log_level="warning", proxy_headers=False)

View file

@ -358,7 +358,7 @@ def generate_meme(template_id: str, texts: list[str], output_path: str) -> str:
img = _overlay_on_image(img, texts, fields)
output = Path(output_path)
if output.suffix.lower() in (".jpg", ".jpeg"):
if output.suffix.lower() in {".jpg", ".jpeg"}:
img = img.convert("RGB")
img.save(str(output), quality=95)
return str(output)
@ -378,7 +378,7 @@ def generate_from_image(
result = _overlay_on_image(img, texts, fields)
output = Path(output_path)
if output.suffix.lower() in (".jpg", ".jpeg"):
if output.suffix.lower() in {".jpg", ".jpeg"}:
result = result.convert("RGB")
result.save(str(output), quality=95)
return str(output)

View file

@ -43,7 +43,7 @@ def _parse_feed(xml_bytes: bytes):
entries = []
for item in root.iter():
tag = _strip_ns(item.tag)
if tag not in ("item", "entry"):
if tag not in {"item", "entry"}:
continue
# ElementTree Elements without children are *falsy* — use `is not None`.
children = {_strip_ns(c.tag): c for c in item}

View file

@ -125,7 +125,7 @@ def fetch_url(url: str, headers: dict | None = None, retries: int = MAX_RETRIES)
return json.loads(raw.decode("utf-8", errors="replace"))
except urllib.error.HTTPError as e:
last_err = e
if e.code in (404, 400):
if e.code in {404, 400}:
break # no point retrying
wait = BACKOFF_BASE ** attempt
time.sleep(wait)

View file

@ -95,11 +95,11 @@ def one_rep_max(weight, reps):
def macros(tdee_kcal, goal):
goal = goal.lower()
if goal in ("cut", "lose", "deficit"):
if goal in {"cut", "lose", "deficit"}:
cals = tdee_kcal - 500
p, f, c = 0.40, 0.30, 0.30
label = "Fat Loss (-500 kcal)"
elif goal in ("bulk", "gain", "surplus"):
elif goal in {"bulk", "gain", "surplus"}:
cals = tdee_kcal + 400
p, f, c = 0.30, 0.25, 0.45
label = "Lean Bulk (+400 kcal)"
@ -184,7 +184,7 @@ def main():
int(sys.argv[4]), sys.argv[5], int(sys.argv[6]),
)
elif cmd in ("1rm", "orm"):
elif cmd in {"1rm", "orm"}:
one_rep_max(float(sys.argv[2]), int(sys.argv[3]))
elif cmd == "macros":

View file

@ -610,7 +610,7 @@ def _is_secret_key(key: str) -> bool:
normalized = _normalize_secret_key(key)
if normalized == "token" or normalized.endswith("token"):
return True
if normalized in ("auth", "authorization"):
if normalized in {"auth", "authorization"}:
return True
return any(marker in normalized for marker in _SECRET_KEY_MARKERS)
@ -831,7 +831,7 @@ class Migrator:
# Flip the config-block flag when a conflict/error occurs on a
# config.yaml write. Later config-mutating options will skip rather
# than attempting a partial write.
if status in (STATUS_CONFLICT, STATUS_ERROR) and destination is not None:
if status in {STATUS_CONFLICT, STATUS_ERROR} and destination is not None:
dest_str = str(destination)
if dest_str.endswith("config.yaml") or dest_str.endswith("config.yml"):
self._config_apply_blocked = True
@ -1526,7 +1526,7 @@ class Migrator:
api_key = resolve_secret_input(raw_key, openclaw_env)
if not api_key:
# Warn if a SecretRef with file/exec source was silently unresolvable
if isinstance(raw_key, dict) and raw_key.get("source") in ("file", "exec"):
if isinstance(raw_key, dict) and raw_key.get("source") in {"file", "exec"}:
self.record(
"provider-keys",
self.source_root / "openclaw.json",
@ -1736,7 +1736,7 @@ class Migrator:
tts_data: Dict[str, Any] = {}
provider = tts.get("provider")
if isinstance(provider, str) and provider in ("elevenlabs", "openai", "edge", "microsoft"):
if isinstance(provider, str) and provider in {"elevenlabs", "openai", "edge", "microsoft"}:
# OpenClaw renamed "edge" to "microsoft"; Hermes still uses "edge"
tts_data["provider"] = "edge" if provider == "microsoft" else provider
@ -2304,11 +2304,11 @@ class Migrator:
if defaults.get("thinkingDefault"):
# Map OpenClaw thinking -> Hermes reasoning_effort
thinking = defaults["thinkingDefault"]
if thinking in ("always", "high", "xhigh"):
if thinking in {"always", "high", "xhigh"}:
agent_cfg["reasoning_effort"] = "high"
elif thinking in ("auto", "medium", "adaptive"):
elif thinking in {"auto", "medium", "adaptive"}:
agent_cfg["reasoning_effort"] = "medium"
elif thinking in ("off", "low", "none", "minimal"):
elif thinking in {"off", "low", "none", "minimal"}:
agent_cfg["reasoning_effort"] = "low"
changes = True
@ -2626,8 +2626,8 @@ class Migrator:
if not isinstance(ch_cfg, dict):
continue
complex_keys = {k: v for k, v in ch_cfg.items()
if k not in ("botToken", "appToken", "allowFrom", "enabled")
and v and k not in ("requireMention", "autoThread")}
if k not in {"botToken", "appToken", "allowFrom", "enabled"}
and v and k not in {"requireMention", "autoThread"}}
if complex_keys:
complex_archive[ch_name] = complex_keys
@ -2671,7 +2671,7 @@ class Migrator:
# Archive remaining browser settings
advanced = {k: v for k, v in browser.items()
if k not in ("cdpUrl", "headless") and v}
if k not in {"cdpUrl", "headless"} and v}
if advanced and self.archive_dir:
if self.execute:
self.archive_dir.mkdir(parents=True, exist_ok=True)

View file

@ -109,7 +109,7 @@ def _config_lookup(*paths: tuple[str, ...], default: str = "") -> str:
node = None
break
node = node.get(key)
if node not in (None, "") and not isinstance(node, dict):
if node not in {None, ""} and not isinstance(node, dict):
return str(node)
return default

View file

@ -51,7 +51,7 @@ def main() -> int:
field = args.field
if field is None:
for k, v in vars(org).items():
if isinstance(v, str) and not k.startswith("_") and k not in ("id",):
if isinstance(v, str) and not k.startswith("_") and k not in {"id",}:
field = k
break
val = getattr(org, field, None) if field else None

View file

@ -185,7 +185,7 @@ def whois_lookup(domain):
for key, pat in patterns.items():
matches = re.findall(pat, raw, re.IGNORECASE)
if matches:
if key in ("name_servers", "status"):
if key in {"name_servers", "status"}:
result[key] = list(dict.fromkeys(m.strip().lower() for m in matches))
else:
result[key] = matches[0].strip()

View file

@ -60,7 +60,7 @@ def get(
f"HTTP 429 rate-limited by {urllib.parse.urlsplit(url).netloc}. "
f"Slow down or supply a real API key. Body: {body[:300]}"
) from e
if e.code in (500, 502, 503, 504) and attempt < max_retries:
if e.code in {500, 502, 503, 504} and attempt < max_retries:
retry_after = e.headers.get("Retry-After") if e.headers else None
wait = float(retry_after) if (retry_after and retry_after.isdigit()) else backoff ** (attempt + 1)
time.sleep(wait)

View file

@ -122,7 +122,7 @@ def fetch(
with zipfile.ZipFile(zip_path) as zf:
for node_type, csv_substring in targets:
relevant_needles = [n for (k, n) in needles if k in (node_type, "Entity", "Officer")] or []
relevant_needles = [n for (k, n) in needles if k in {node_type, "Entity", "Officer"}] or []
# Only scan a CSV if we have a needle that could plausibly match it,
# or if we have ONLY a jurisdiction filter.
applicable_needles = [n for (k, n) in needles if k == node_type]

View file

@ -0,0 +1,14 @@
"""Browser Use cloud browser plugin — bundled, auto-loaded.
Mirrors the ``plugins/web/<vendor>/`` layout: ``provider.py`` holds the
provider class; ``__init__.py::register`` instantiates and registers it.
"""
from __future__ import annotations
from plugins.browser.browser_use.provider import BrowserUseBrowserProvider
def register(ctx) -> None:
"""Register the Browser Use provider with the plugin context."""
ctx.register_browser_provider(BrowserUseBrowserProvider())

View file

@ -0,0 +1,7 @@
name: browser-browser-use
version: 1.0.0
description: "Browser Use (https://browser-use.com) cloud browser backend. Supports both direct BROWSER_USE_API_KEY and the managed Nous tool gateway. Also powers the 'Nous Subscription' UX flow that bills usage to a Nous subscription."
author: NousResearch
kind: backend
provides_browser_providers:
- browser-use

View file

@ -1,4 +1,32 @@
"""Browser Use cloud browser provider."""
"""Browser Use cloud browser provider — plugin form.
Subclasses :class:`agent.browser_provider.BrowserProvider` (the plugin-facing
ABC introduced in PR #25214). The legacy in-tree module
``tools.browser_providers.browser_use`` was removed in the same PR; this file
is now the canonical implementation.
Browser Use is the only browser backend with dual auth: a direct
``BROWSER_USE_API_KEY`` for self-billed users, or the managed Nous tool
gateway (which Hermes uses to bill Browser Use sessions to a Nous
subscription). The dispatch order direct API key first, managed gateway
second preserves the pre-migration behaviour in
``tools.browser_providers.browser_use.BrowserUseProvider._get_config_or_none``.
Config keys this provider responds to::
browser:
cloud_provider: "browser-use" # explicit selection
tool_gateway:
browser: "gateway" # optional: prefer managed gateway
# even when BROWSER_USE_API_KEY is set
Auth env vars (one of)::
BROWSER_USE_API_KEY=... # https://browser-use.com
# OR a managed Nous gateway entry (configured via 'hermes setup')
"""
from __future__ import annotations
import logging
import os
@ -8,11 +36,14 @@ from typing import Any, Dict, Optional
import requests
from tools.browser_providers.base import CloudBrowserProvider
from tools.managed_tool_gateway import resolve_managed_tool_gateway
from tools.tool_backend_helpers import managed_nous_tools_enabled, prefers_gateway
from agent.browser_provider import BrowserProvider
logger = logging.getLogger(__name__)
# Idempotency tracking for managed-mode session creation. The managed Nous
# gateway returns 409 "already in progress" on retried POSTs; we forward the
# original idempotency key so the gateway can deduplicate. Cleared on
# success or terminal failure.
_pending_create_keys: Dict[str, str] = {}
_pending_create_keys_lock = threading.Lock()
@ -38,6 +69,16 @@ def _clear_pending_create_key(task_id: str) -> None:
def _should_preserve_pending_create_key(response: requests.Response) -> bool:
"""Decide whether to keep the idempotency key after a failed create.
Preserve the key when the failure looks retryable (5xx) OR when the
gateway reports the original request is still in flight (409 "already
in progress") — in either case, retrying with the same key lets the
gateway deduplicate.
Drop the key on any other 4xx (auth failure, bad request, etc.) those
won't succeed by being retried.
"""
if response.status_code >= 500:
return True
@ -60,13 +101,24 @@ def _should_preserve_pending_create_key(response: requests.Response) -> bool:
return "already in progress" in message
class BrowserUseProvider(CloudBrowserProvider):
"""Browser Use (https://browser-use.com) cloud browser backend."""
class BrowserUseBrowserProvider(BrowserProvider):
"""Browser Use (https://browser-use.com) cloud browser backend.
def provider_name(self) -> str:
Dual auth: prefers a direct BROWSER_USE_API_KEY when set, falling back
to the managed Nous tool gateway when ``tool_gateway.browser`` config
routes through it. Setting ``tool_gateway.browser: gateway`` flips the
order so managed billing wins even when BROWSER_USE_API_KEY is present.
"""
@property
def name(self) -> str:
return "browser-use"
@property
def display_name(self) -> str:
return "Browser Use"
def is_configured(self) -> bool:
def is_available(self) -> bool:
return self._get_config_or_none() is not None
# ------------------------------------------------------------------
@ -74,6 +126,14 @@ class BrowserUseProvider(CloudBrowserProvider):
# ------------------------------------------------------------------
def _get_config_or_none(self) -> Optional[Dict[str, Any]]:
# Import here to avoid a hard dependency at module-import time —
# managed_tool_gateway pulls in the Nous auth stack which can be
# heavy and is not needed for direct-API-key users.
from tools.managed_tool_gateway import resolve_managed_tool_gateway
from tools.tool_backend_helpers import prefers_gateway
# Direct API key wins unless the user has explicitly opted into the
# managed Nous gateway via ``tool_gateway.browser: gateway``.
api_key = os.environ.get("BROWSER_USE_API_KEY")
if api_key and not prefers_gateway("browser"):
return {
@ -93,6 +153,8 @@ class BrowserUseProvider(CloudBrowserProvider):
}
def _get_config(self) -> Dict[str, Any]:
from tools.tool_backend_helpers import managed_nous_tools_enabled
config = self._get_config_or_none()
if config is None:
message = (
@ -111,11 +173,10 @@ class BrowserUseProvider(CloudBrowserProvider):
# ------------------------------------------------------------------
def _headers(self, config: Dict[str, Any]) -> Dict[str, str]:
headers = {
return {
"Content-Type": "application/json",
"X-Browser-Use-API-Key": config["api_key"],
}
return headers
def create_session(self, task_id: str) -> Dict[str, object]:
config = self._get_config()
@ -166,7 +227,9 @@ class BrowserUseProvider(CloudBrowserProvider):
if managed_mode:
_clear_pending_create_key(task_id)
session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}"
external_call_id = response.headers.get("x-external-call-id") if managed_mode else None
external_call_id = (
response.headers.get("x-external-call-id") if managed_mode else None
)
logger.info("Created Browser Use session %s", session_name)
@ -184,7 +247,9 @@ class BrowserUseProvider(CloudBrowserProvider):
try:
config = self._get_config()
except ValueError:
logger.warning("Cannot close Browser Use session %s — missing credentials", session_id)
logger.warning(
"Cannot close Browser Use session %s — missing credentials", session_id
)
return False
try:
@ -212,7 +277,10 @@ class BrowserUseProvider(CloudBrowserProvider):
def emergency_cleanup(self, session_id: str) -> None:
config = self._get_config_or_none()
if config is None:
logger.warning("Cannot emergency-cleanup Browser Use session %s — missing credentials", session_id)
logger.warning(
"Cannot emergency-cleanup Browser Use session %s — missing credentials",
session_id,
)
return
try:
requests.patch(
@ -222,4 +290,21 @@ class BrowserUseProvider(CloudBrowserProvider):
timeout=5,
)
except Exception as e:
logger.debug("Emergency cleanup failed for Browser Use session %s: %s", session_id, e)
logger.debug(
"Emergency cleanup failed for Browser Use session %s: %s", session_id, e
)
def get_setup_schema(self) -> Dict[str, Any]:
return {
"name": "Browser Use",
"badge": "paid",
"tag": "Cloud browser with remote execution",
"env_vars": [
{
"key": "BROWSER_USE_API_KEY",
"prompt": "Browser Use API key",
"url": "https://browser-use.com",
},
],
"post_setup": "agent_browser",
}

View file

@ -0,0 +1,15 @@
"""Browserbase cloud browser plugin — bundled, auto-loaded.
Mirrors the ``plugins/web/<vendor>/`` and ``plugins/image_gen/openai/``
layout: ``provider.py`` holds the provider class; ``__init__.py::register``
instantiates and registers it via the plugin context.
"""
from __future__ import annotations
from plugins.browser.browserbase.provider import BrowserbaseBrowserProvider
def register(ctx) -> None:
"""Register the Browserbase provider with the plugin context."""
ctx.register_browser_provider(BrowserbaseBrowserProvider())

View file

@ -0,0 +1,7 @@
name: browser-browserbase
version: 1.0.0
description: "Browserbase (https://browserbase.com) cloud browser backend. Requires BROWSERBASE_API_KEY + BROWSERBASE_PROJECT_ID. Supports stealth, proxies, and keep-alive sessions; auto-falls-back when paid features are unavailable."
author: NousResearch
kind: backend
provides_browser_providers:
- browserbase

View file

@ -1,4 +1,35 @@
"""Browserbase cloud browser provider (direct credentials only)."""
"""Browserbase cloud browser provider — plugin form.
Subclasses :class:`agent.browser_provider.BrowserProvider` (the plugin-facing
ABC introduced in PR #25214). The legacy in-tree module
``tools.browser_providers.browserbase`` was removed in the same PR; this file
is now the canonical implementation.
Browserbase requires direct ``BROWSERBASE_API_KEY`` and ``BROWSERBASE_PROJECT_ID``
credentials. Managed Nous gateway support has been removed the Nous
subscription now routes through Browser Use instead (see
``plugins/browser/browser_use/``).
Config keys this provider responds to::
browser:
cloud_provider: "browserbase"
Auth env vars::
BROWSERBASE_API_KEY=... # https://browserbase.com
BROWSERBASE_PROJECT_ID=...
Optional feature knobs::
BROWSERBASE_BASE_URL=... # default https://api.browserbase.com
BROWSERBASE_PROXIES=true # default true
BROWSERBASE_ADVANCED_STEALTH=false
BROWSERBASE_KEEP_ALIVE=true # default true
BROWSERBASE_SESSION_TIMEOUT=... (ms, integer)
"""
from __future__ import annotations
import logging
import os
@ -7,27 +38,31 @@ from typing import Any, Dict, Optional
import requests
from tools.browser_providers.base import CloudBrowserProvider
from agent.browser_provider import BrowserProvider
logger = logging.getLogger(__name__)
class BrowserbaseProvider(CloudBrowserProvider):
class BrowserbaseBrowserProvider(BrowserProvider):
"""Browserbase (https://browserbase.com) cloud browser backend.
This provider requires direct BROWSERBASE_API_KEY and BROWSERBASE_PROJECT_ID
credentials. Managed Nous gateway support has been removed the Nous
subscription now routes through Browser Use instead.
Direct credentials only managed-Nous-gateway support lives on the
Browser Use provider now.
"""
def provider_name(self) -> str:
@property
def name(self) -> str:
return "browserbase"
@property
def display_name(self) -> str:
return "Browserbase"
def is_configured(self) -> bool:
def is_available(self) -> bool:
return self._get_config_or_none() is not None
# ------------------------------------------------------------------
# Session lifecycle
# Config resolution
# ------------------------------------------------------------------
def _get_config_or_none(self) -> Optional[Dict[str, Any]]:
@ -37,7 +72,9 @@ class BrowserbaseProvider(CloudBrowserProvider):
return {
"api_key": api_key,
"project_id": project_id,
"base_url": os.environ.get("BROWSERBASE_BASE_URL", "https://api.browserbase.com").rstrip("/"),
"base_url": os.environ.get(
"BROWSERBASE_BASE_URL", "https://api.browserbase.com"
).rstrip("/"),
}
return None
@ -50,13 +87,21 @@ class BrowserbaseProvider(CloudBrowserProvider):
)
return config
# ------------------------------------------------------------------
# Session lifecycle
# ------------------------------------------------------------------
def create_session(self, task_id: str) -> Dict[str, object]:
config = self._get_config()
# Optional env-var knobs
enable_proxies = os.environ.get("BROWSERBASE_PROXIES", "true").lower() != "false"
enable_advanced_stealth = os.environ.get("BROWSERBASE_ADVANCED_STEALTH", "false").lower() == "true"
enable_keep_alive = os.environ.get("BROWSERBASE_KEEP_ALIVE", "true").lower() != "false"
enable_advanced_stealth = (
os.environ.get("BROWSERBASE_ADVANCED_STEALTH", "false").lower() == "true"
)
enable_keep_alive = (
os.environ.get("BROWSERBASE_KEEP_ALIVE", "true").lower() != "false"
)
custom_timeout_ms = os.environ.get("BROWSERBASE_SESSION_TIMEOUT")
features_enabled = {
@ -78,7 +123,9 @@ class BrowserbaseProvider(CloudBrowserProvider):
if timeout_val > 0:
session_config["timeout"] = timeout_val
except ValueError:
logger.warning("Invalid BROWSERBASE_SESSION_TIMEOUT value: %s", custom_timeout_ms)
logger.warning(
"Invalid BROWSERBASE_SESSION_TIMEOUT value: %s", custom_timeout_ms
)
if enable_proxies:
session_config["proxies"] = True
@ -156,7 +203,9 @@ class BrowserbaseProvider(CloudBrowserProvider):
features_enabled["custom_timeout"] = True
feature_str = ", ".join(k for k, v in features_enabled.items() if v)
logger.info("Created Browserbase session %s with features: %s", session_name, feature_str)
logger.info(
"Created Browserbase session %s with features: %s", session_name, feature_str
)
return {
"session_name": session_name,
@ -169,7 +218,9 @@ class BrowserbaseProvider(CloudBrowserProvider):
try:
config = self._get_config()
except ValueError:
logger.warning("Cannot close Browserbase session %s — missing credentials", session_id)
logger.warning(
"Cannot close Browserbase session %s — missing credentials", session_id
)
return False
try:
@ -203,7 +254,10 @@ class BrowserbaseProvider(CloudBrowserProvider):
def emergency_cleanup(self, session_id: str) -> None:
config = self._get_config_or_none()
if config is None:
logger.warning("Cannot emergency-cleanup Browserbase session %s — missing credentials", session_id)
logger.warning(
"Cannot emergency-cleanup Browserbase session %s — missing credentials",
session_id,
)
return
try:
requests.post(
@ -219,4 +273,25 @@ class BrowserbaseProvider(CloudBrowserProvider):
timeout=5,
)
except Exception as e:
logger.debug("Emergency cleanup failed for Browserbase session %s: %s", session_id, e)
logger.debug(
"Emergency cleanup failed for Browserbase session %s: %s", session_id, e
)
def get_setup_schema(self) -> Dict[str, Any]:
return {
"name": "Browserbase",
"badge": "paid",
"tag": "Cloud browser with stealth and proxies",
"env_vars": [
{
"key": "BROWSERBASE_API_KEY",
"prompt": "Browserbase API key",
"url": "https://browserbase.com",
},
{
"key": "BROWSERBASE_PROJECT_ID",
"prompt": "Browserbase project ID",
},
],
"post_setup": "agent_browser",
}

View file

@ -0,0 +1,16 @@
"""Firecrawl cloud browser plugin — bundled, auto-loaded.
Distinct from ``plugins/web/firecrawl/`` (the web search/extract/crawl
plugin); both share the FIRECRAWL_API_KEY but speak to different endpoints
(``/v2/browser`` here vs ``/v2/search`` / ``/v2/scrape`` / ``/v2/crawl``
over there).
"""
from __future__ import annotations
from plugins.browser.firecrawl.provider import FirecrawlBrowserProvider
def register(ctx) -> None:
"""Register the Firecrawl cloud-browser provider with the plugin context."""
ctx.register_browser_provider(FirecrawlBrowserProvider())

View file

@ -0,0 +1,7 @@
name: browser-firecrawl
version: 1.0.0
description: "Firecrawl (https://firecrawl.dev) cloud browser backend. Requires FIRECRAWL_API_KEY. Distinct from the firecrawl WEB search/extract plugin — the two share an API key but operate on different endpoints."
author: NousResearch
kind: backend
provides_browser_providers:
- firecrawl

View file

@ -1,26 +1,61 @@
"""Firecrawl cloud browser provider."""
"""Firecrawl cloud browser provider — plugin form.
Subclasses :class:`agent.browser_provider.BrowserProvider` (the plugin-facing
ABC introduced in PR #25214). The legacy in-tree module
``tools.browser_providers.firecrawl`` was removed in the same PR; this file
is now the canonical implementation.
This is the cloud-browser path distinct from the firecrawl WEB plugin at
``plugins/web/firecrawl/`` which handles search/extract/crawl on
``/v2/search`` / ``/v2/scrape`` / ``/v2/crawl``. The two plugins share the
``FIRECRAWL_API_KEY`` env var but talk to different endpoints (this one
hits ``/v2/browser``).
Config keys this provider responds to::
browser:
cloud_provider: "firecrawl" # explicit selection only — not in the
# legacy auto-detect walk
Auth env vars::
FIRECRAWL_API_KEY=... # https://firecrawl.dev
FIRECRAWL_API_URL=... # optional override (default https://api.firecrawl.dev)
FIRECRAWL_BROWSER_TTL=... # optional, default 300 seconds
"""
from __future__ import annotations
import logging
import os
import uuid
from typing import Dict
from typing import Any, Dict
import requests
from tools.browser_providers.base import CloudBrowserProvider
from agent.browser_provider import BrowserProvider
logger = logging.getLogger(__name__)
_BASE_URL = "https://api.firecrawl.dev"
class FirecrawlProvider(CloudBrowserProvider):
"""Firecrawl (https://firecrawl.dev) cloud browser backend."""
class FirecrawlBrowserProvider(BrowserProvider):
"""Firecrawl (https://firecrawl.dev) cloud browser backend.
def provider_name(self) -> str:
Cloud-browser path only search/extract/crawl live in the separate
``plugins/web/firecrawl/`` plugin.
"""
@property
def name(self) -> str:
return "firecrawl"
@property
def display_name(self) -> str:
return "Firecrawl"
def is_configured(self) -> bool:
def is_available(self) -> bool:
return bool(os.environ.get("FIRECRAWL_API_KEY"))
# ------------------------------------------------------------------
@ -100,13 +135,34 @@ class FirecrawlProvider(CloudBrowserProvider):
return False
def emergency_cleanup(self, session_id: str) -> None:
if not self.is_available():
logger.warning(
"Cannot emergency-cleanup Firecrawl session %s — missing credentials",
session_id,
)
return
try:
requests.delete(
f"{self._api_url()}/v2/browser/{session_id}",
headers=self._headers(),
timeout=5,
)
except ValueError:
logger.warning("Cannot emergency-cleanup Firecrawl session %s — missing credentials", session_id)
except Exception as e:
logger.debug("Emergency cleanup failed for Firecrawl session %s: %s", session_id, e)
logger.debug(
"Emergency cleanup failed for Firecrawl session %s: %s", session_id, e
)
def get_setup_schema(self) -> Dict[str, Any]:
return {
"name": "Firecrawl",
"badge": "paid",
"tag": "Cloud browser with remote execution",
"env_vars": [
{
"key": "FIRECRAWL_API_KEY",
"prompt": "Firecrawl API key",
"url": "https://firecrawl.dev",
},
],
"post_setup": "agent_browser",
}

View file

@ -222,7 +222,7 @@ def _fmt_summary(summary: Dict[str, Any]) -> str:
def _handle_slash(raw_args: str) -> Optional[str]:
argv = raw_args.strip().split()
if not argv or argv[0] in ("help", "-h", "--help"):
if not argv or argv[0] in {"help", "-h", "--help"}:
return _HELP_TEXT
sub = argv[0]

View file

@ -72,7 +72,7 @@ def register(ctx) -> None:
# tested path there and guest-join Chromium is flakier. Refuse to register
# rather than half-working.
system = platform.system().lower()
if system not in ("linux", "darwin"):
if system not in {"linux", "darwin"}:
logger.info(
"google_meet plugin: platform=%s not supported (linux/macos only)",
system,

View file

@ -159,7 +159,7 @@ def _cmd_setup() -> int:
print("---------------------")
system = _p.system()
system_ok = system in ("Linux", "Darwin")
system_ok = system in {"Linux", "Darwin"}
print(f" platform : {system} [{'ok' if system_ok else 'unsupported'}]")
try:
@ -231,7 +231,7 @@ def _cmd_install(*, realtime: bool, assume_yes: bool) -> int:
import subprocess as _sp
system = _p.system()
if system not in ("Linux", "Darwin"):
if system not in {"Linux", "Darwin"}:
print(f"google_meet install: {system} is not supported (linux/macos only)")
return 1
@ -242,7 +242,7 @@ def _cmd_install(*, realtime: bool, assume_yes: bool) -> int:
ans = input(f"{prompt} [y/N] ").strip().lower()
except EOFError:
return False
return ans in ("y", "yes")
return ans in {"y", "yes"}
print("google_meet install")
print("-------------------")

View file

@ -447,7 +447,7 @@ def _mac_audio_device_index(device_name: str) -> str:
def run_bot() -> int: # noqa: C901 — orchestration, explicit branches
url = os.environ.get("HERMES_MEET_URL", "").strip()
out_dir_env = os.environ.get("HERMES_MEET_OUT_DIR", "").strip()
headed = os.environ.get("HERMES_MEET_HEADED", "").lower() in ("1", "true", "yes")
headed = os.environ.get("HERMES_MEET_HEADED", "").lower() in {"1", "true", "yes"}
auth_state = os.environ.get("HERMES_MEET_AUTH_STATE", "").strip()
guest_name = os.environ.get("HERMES_MEET_GUEST_NAME", "Hermes Agent")
duration_s = _parse_duration(os.environ.get("HERMES_MEET_DURATION", ""))
@ -808,7 +808,7 @@ def _looks_like_human_speaker(speaker: str, bot_guest_name: str) -> bool:
if not speaker or not speaker.strip():
return False
spk = speaker.strip().lower()
if spk in ("unknown", "you", bot_guest_name.strip().lower()):
if spk in {"unknown", "you", bot_guest_name.strip().lower()}:
return False
return True

View file

@ -103,7 +103,7 @@ def node_command(args: argparse.Namespace) -> int:
print(f"removed {args.name!r}" if ok else f"no such node: {args.name!r}")
return 0 if ok else 1
if cmd in ("status", "ping"):
if cmd in {"status", "ping"}:
entry = reg.get(args.name)
if entry is None:
print(f"no such node: {args.name!r}", file=sys.stderr)

View file

@ -183,7 +183,7 @@ class RealtimeSession:
rid = (frame.get("response") or {}).get("id")
if rid:
self._last_response_id = rid
elif ftype in ("response.done", "response.completed", "response.cancelled"):
elif ftype in {"response.done", "response.completed", "response.cancelled"}:
break
elif ftype == "error":
err = frame.get("error") or frame

View file

@ -36,7 +36,7 @@ def check_meet_requirements() -> bool:
handlers relax the requirement when a node is addressed.
"""
import platform as _p
if _p.system().lower() not in ("linux", "darwin"):
if _p.system().lower() not in {"linux", "darwin"}:
return False
try:
import playwright # noqa: F401
@ -238,7 +238,7 @@ def handle_meet_join(args: Dict[str, Any], **_kw) -> str:
if not url:
return _err("url is required")
mode = (args.get("mode") or "transcribe").strip().lower()
if mode not in ("transcribe", "realtime"):
if mode not in {"transcribe", "realtime"}:
return _err(f"mode must be 'transcribe' or 'realtime' (got {mode!r})")
node = args.get("node")

View file

@ -908,6 +908,7 @@
return createNewBoard(payload).then(function () { setShowNewBoard(false); });
},
}) : null,
h(OrchestrationPanel, null),
h(AttentionStrip, {
boardData,
onOpen: setSelectedTaskId,
@ -1386,6 +1387,288 @@
}, "?");
}
// ---------------------------------------------------------------------
// OrchestrationPanel — collapsible settings panel for the kanban
// orchestrator (orchestrator profile picker, default assignee picker,
// auto-decompose toggle, plus per-profile description editing with
// auto-generate). Backed by /orchestration + /profiles endpoints.
// ---------------------------------------------------------------------
function OrchestrationPanel() {
const [expanded, setExpanded] = useState(false);
const [settings, setSettings] = useState(null);
const [profiles, setProfiles] = useState([]);
const [busy, setBusy] = useState({});
const [msg, setMsg] = useState(null);
const loadAll = useCallback(function () {
Promise.all([
SDK.fetchJSON(`${API}/orchestration`),
SDK.fetchJSON(`${API}/profiles`),
]).then(function (results) {
setSettings(results[0] || null);
setProfiles((results[1] && results[1].profiles) || []);
setMsg(null);
}).catch(function (err) {
setMsg({ ok: false, text: "Failed to load: " + (err.message || String(err)) });
});
}, []);
useEffect(function () {
// Load on mount so the collapsed pill shows the real mode without
// requiring the user to expand the panel first.
if (settings === null) loadAll();
}, [settings, loadAll]);
const saveSettings = function (patch) {
setMsg(null);
return SDK.fetchJSON(`${API}/orchestration`, {
method: "PUT",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(patch),
}).then(function (res) {
setSettings(res);
setMsg({ ok: true, text: "Settings saved." });
return res;
}).catch(function (err) {
setMsg({ ok: false, text: "Save failed: " + (err.message || String(err)) });
});
};
const saveProfileDescription = function (name, description) {
setBusy(function (b) { return Object.assign({}, b, { [name]: "save" }); });
return SDK.fetchJSON(`${API}/profiles/${encodeURIComponent(name)}`, {
method: "PATCH",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ description: description }),
}).then(function () {
loadAll();
setMsg({ ok: true, text: `Description saved for ${name}.` });
}).catch(function (err) {
setMsg({ ok: false, text: "Save failed: " + (err.message || String(err)) });
}).then(function () {
setBusy(function (b) {
const next = Object.assign({}, b); delete next[name]; return next;
});
});
};
const autoGenerateDescription = function (name, overwrite) {
setBusy(function (b) { return Object.assign({}, b, { [name]: "auto" }); });
return SDK.fetchJSON(`${API}/profiles/${encodeURIComponent(name)}/describe-auto`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ overwrite: !!overwrite }),
}).then(function (res) {
if (res && res.ok) {
loadAll();
setMsg({ ok: true, text: `Auto-generated description for ${name}.` });
} else {
setMsg({
ok: false,
text: "Auto-generate failed: " + ((res && res.reason) || "unknown error"),
});
}
}).catch(function (err) {
setMsg({ ok: false, text: "Auto-generate failed: " + (err.message || String(err)) });
}).then(function () {
setBusy(function (b) {
const next = Object.assign({}, b); delete next[name]; return next;
});
});
};
const headerLabel = expanded
? "▾ Orchestration settings"
: "▸ Orchestration settings";
// Mode pill — always visible (collapsed or expanded). One click flips
// between Auto and Manual. Auto = dispatcher decomposes new triage tasks
// every tick. Manual = pre-PR behavior, the user clicks ⚗ Decompose on
// each triage card (or runs `hermes kanban decompose <id>`) and tasks
// stay in triage until then.
const autoOn = !!(settings && settings.auto_decompose);
const modePillTitle = settings === null
? "Loading mode…"
: (autoOn
? "Orchestration: Auto — the dispatcher decomposes new triage tasks automatically every tick. Click to switch to Manual (pre-PR behavior)."
: "Orchestration: Manual — triage tasks stay in triage until you click ⚗ Decompose on each card. Click to switch to Auto.");
const modePill = h("button", {
type: "button",
onClick: function () {
if (settings === null) return; // not loaded yet
saveSettings({ auto_decompose: !autoOn });
},
disabled: settings === null,
title: modePillTitle,
className: "inline-flex items-center gap-1 rounded-full border px-2 py-0.5 "
+ "text-xs font-medium "
+ (autoOn
? "border-emerald-500/40 bg-emerald-500/10 text-emerald-700 dark:text-emerald-300"
: "border-muted-foreground/30 bg-muted/30 text-muted-foreground"),
},
"Orchestration: ",
h("span", { className: "ml-1 font-semibold" },
settings === null ? "…" : (autoOn ? "Auto" : "Manual"))
);
if (!expanded) {
return h("div", { className: "flex items-center gap-3 text-xs" },
modePill,
h("button", {
type: "button",
onClick: function () { setExpanded(true); },
className: "underline text-muted-foreground hover:text-foreground",
title: "Configure the kanban orchestrator (profile picker, default assignee, auto-decompose, profile descriptions)",
}, headerLabel),
);
}
const profileOptions = profiles.map(function (p) {
const tag = p.is_default ? " (default)" : "";
return h(SelectOption, { key: p.name, value: p.name }, p.name + tag);
});
return h(Card, { className: "p-3" },
h(CardContent, { className: "p-2 flex flex-col gap-3" },
h("div", { className: "flex items-center justify-between" },
h("button", {
type: "button",
onClick: function () { setExpanded(false); },
className: "text-sm font-medium underline-offset-2 hover:underline",
}, headerLabel),
modePill,
h(Button, { onClick: loadAll, size: "sm" }, "Reload"),
),
msg ? h("div", {
className: msg.ok ? "hermes-kanban-msg-ok" : "hermes-kanban-msg-err",
}, msg.text) : null,
settings ? h("div", { className: "grid gap-3 sm:grid-cols-3" },
h("div", { className: "flex flex-col gap-1" },
h(Label, { className: "text-xs text-muted-foreground" },
"Orchestrator profile"),
h(Select, {
value: settings.orchestrator_profile || "",
className: "h-8",
onChange: function (e) {
const v = (e && e.target ? e.target.value : e) || "";
saveSettings({ orchestrator_profile: v });
},
},
h(SelectOption, { value: "" },
"(default: " + (settings.active_profile || "default") + ")"),
profileOptions,
),
h("div", { className: "text-[10px] text-muted-foreground" },
"Resolved: " + (settings.resolved_orchestrator_profile || "default")),
),
h("div", { className: "flex flex-col gap-1" },
h(Label, { className: "text-xs text-muted-foreground" },
"Default assignee"),
h(Select, {
value: settings.default_assignee || "",
className: "h-8",
onChange: function (e) {
const v = (e && e.target ? e.target.value : e) || "";
saveSettings({ default_assignee: v });
},
},
h(SelectOption, { value: "" },
"(default: " + (settings.active_profile || "default") + ")"),
profileOptions,
),
h("div", { className: "text-[10px] text-muted-foreground" },
"Resolved: " + (settings.resolved_default_assignee || "default")),
),
h("div", { className: "flex flex-col gap-1" },
h(Label, { className: "text-xs text-muted-foreground" },
"Orchestration mode"),
h("label", { className: "flex items-center gap-2 text-xs h-8" },
h("input", {
type: "checkbox",
checked: !!settings.auto_decompose,
onChange: function (e) {
saveSettings({ auto_decompose: !!e.target.checked });
},
}),
settings.auto_decompose ? "Auto (default)" : "Manual",
),
h("div", { className: "text-[10px] text-muted-foreground" },
"When on, the dispatcher decomposes new triage tasks automatically."),
),
) : h("div", { className: "text-xs text-muted-foreground" },
"Loading…"),
h("div", { className: "border-t pt-3" },
h(Label, { className: "text-xs text-muted-foreground" },
"Profile descriptions"),
h("div", { className: "text-[10px] text-muted-foreground pb-2" },
"Descriptions guide the orchestrator's routing. Click ⚗ to auto-generate, or edit and save."),
profiles.length === 0
? h("div", { className: "text-xs text-muted-foreground" }, "No profiles installed.")
: h("div", { className: "flex flex-col gap-2" },
profiles.map(function (p) {
return h(ProfileDescriptionRow, {
key: p.name,
profile: p,
busy: busy[p.name] || null,
onSave: saveProfileDescription,
onAuto: autoGenerateDescription,
});
}),
),
),
),
);
}
function ProfileDescriptionRow(props) {
const p = props.profile;
const [draft, setDraft] = useState(p.description || "");
const busy = props.busy;
// Re-sync the local draft if the server-side description changes (e.g.
// after auto-generate). Cheap because re-runs only happen on prop change.
useEffect(function () {
setDraft(p.description || "");
}, [p.description]);
const tag = p.description_auto && p.description ? " [auto, review]" : "";
return h("div", { className: "flex flex-col gap-1 border-l-2 pl-2",
style: { borderColor: p.description ? "#888" : "#cc6" } },
h("div", { className: "flex items-center gap-2 text-xs" },
h("span", { className: "font-medium" }, p.name),
p.is_default ? h("span", { className: "text-[10px] text-muted-foreground" }, "(default)") : null,
p.description_auto && p.description
? h("span", { className: "text-[10px] text-yellow-600" }, "auto — review")
: null,
!p.description
? h("span", { className: "text-[10px] text-yellow-600" }, "⚠ no description")
: null,
),
h("div", { className: "flex items-center gap-2" },
h(Input, {
value: draft,
onChange: function (e) { setDraft(e.target.value); },
placeholder: "What is this profile good at?",
className: "h-7 text-xs flex-1",
}),
h(Button, {
onClick: function () { props.onSave(p.name, draft); },
size: "sm",
disabled: !!busy || draft === (p.description || ""),
title: "Save the description above as user-authored",
}, busy === "save" ? "Saving…" : "Save"),
h(Button, {
onClick: function () { props.onAuto(p.name, true); },
size: "sm",
disabled: !!busy,
title: "Auto-generate a description from this profile's skills and model",
}, busy === "auto" ? "Generating…" : "⚗ Auto"),
),
);
}
function BoardSwitcher(props) {
const { t } = useI18n();
const list = props.boardList || [];
@ -2395,6 +2678,25 @@
});
};
// POST /tasks/:id/decompose — fan a triage task out into a graph
// of child tasks routed to specialist profiles by description.
// Refreshes both the drawer (so the user sees the root flip to
// todo) and the board (so the new children appear in the columns).
const doDecompose = function () {
return SDK.fetchJSON(
withBoard(`${API}/tasks/${encodeURIComponent(props.taskId)}/decompose`, boardSlug),
{
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({}),
}
).then(function (res) {
load();
props.onRefresh();
return res;
});
};
const addLink = function (parentId) {
return SDK.fetchJSON(withBoard(`${API}/links`, boardSlug), {
method: "POST",
@ -2486,6 +2788,7 @@
boardSlug: boardSlug,
onPatch: doPatch,
onSpecify: doSpecify,
onDecompose: doDecompose,
onAddParent: addLink,
onRemoveParent: removeLink,
onAddChild: addChild,
@ -2559,6 +2862,7 @@
task: t,
onPatch: props.onPatch,
onSpecify: props.onSpecify,
onDecompose: props.onDecompose,
}),
h(DiagnosticsSection, {
task: t,
@ -3023,6 +3327,8 @@
const task = props.task;
const [specifyBusy, setSpecifyBusy] = useState(false);
const [specifyMsg, setSpecifyMsg] = useState(null);
const [decomposeBusy, setDecomposeBusy] = useState(false);
const [decomposeMsg, setDecomposeMsg] = useState(null);
const b = function (label, patch, enabled, confirmMsg) {
return h(Button, {
onClick: function () { if (enabled !== false) props.onPatch(patch, { confirm: confirmMsg }); },
@ -3067,9 +3373,57 @@
}, specifyBusy ? "Specifying…" : "✨ Specify")
: null;
// "Decompose" is the orchestrator-driven fan-out. Like Specify, only
// makes sense on triage-column tasks — elsewhere the backend short-
// circuits with ok:false. When the orchestrator returns fanout:false
// we render the same single-task message as Specify; when it fans
// out we report the child count for quick at-a-glance verification.
const decomposeButton = (task.status === "triage" && props.onDecompose)
? h(Button, {
onClick: function () {
if (decomposeBusy) return;
setDecomposeBusy(true);
setDecomposeMsg(null);
props.onDecompose().then(function (res) {
if (res && res.ok) {
if (res.fanout && res.child_ids && res.child_ids.length) {
setDecomposeMsg({
ok: true,
text: `Decomposed into ${res.child_ids.length} children: ${res.child_ids.join(", ")}`,
});
} else {
const suffix = res.new_title
? ` — retitled: ${res.new_title}`
: "";
setDecomposeMsg({
ok: true,
text: `Single task (no fanout)${suffix}`,
});
}
} else {
setDecomposeMsg({
ok: false,
text: "Decompose failed: " + ((res && res.reason) || "unknown error"),
});
}
}).catch(function (err) {
setDecomposeMsg({
ok: false,
text: "Decompose failed: " + (err.message || String(err)),
});
}).then(function () {
setDecomposeBusy(false);
});
},
disabled: decomposeBusy,
size: "sm",
}, decomposeBusy ? "Decomposing…" : "⚗ Decompose")
: null;
return h("div", null,
h("div", { className: "hermes-kanban-actions" },
specifyButton,
decomposeButton,
b("→ triage", { status: "triage" }, task.status !== "triage"),
b("→ ready", { status: "ready" }, task.status !== "ready"),
// No direct → running button: /tasks/:id PATCH rejects status=running
@ -3091,6 +3445,11 @@
? "hermes-kanban-msg-ok"
: "hermes-kanban-msg-err",
}, specifyMsg.text) : null,
decomposeMsg ? h("div", {
className: decomposeMsg.ok
? "hermes-kanban-msg-ok"
: "hermes-kanban-msg-err",
}, decomposeMsg.text) : null,
);
}

View file

@ -628,7 +628,7 @@ def update_task(task_id: str, payload: UpdateTaskBody, board: Optional[str] = Qu
status_code=400,
detail="Cannot set status to 'running' directly; use the dispatcher/claim path",
)
elif s in ("todo", "triage"):
elif s in {"todo", "triage"}:
ok = _set_status_direct(conn, task_id, s)
else:
raise HTTPException(status_code=400, detail=f"unknown status: {s}")
@ -742,7 +742,7 @@ def _set_status_direct(
(task_id, run_id, json.dumps({"status": new_status}), int(time.time())),
)
# If we re-opened something, children may have gone stale.
if new_status in ("done", "ready"):
if new_status in {"done", "ready"}:
kanban_db.recompute_ready(conn)
return True
@ -868,7 +868,7 @@ def bulk_update(payload: BulkTaskBody, board: Optional[str] = Query(None)):
ok = kanban_db.unblock_task(conn, tid)
else:
ok = _set_status_direct(conn, tid, "ready")
elif s in ("todo", "running", "triage"):
elif s in {"todo", "running", "triage"}:
ok = _set_status_direct(conn, tid, s)
else:
entry.update(ok=False, error=f"unknown status {s!r}")
@ -1535,6 +1535,279 @@ def switch_board(slug: str):
_EVENT_POLL_SECONDS = 0.3
# ---------------------------------------------------------------------------
# Profile metadata & description editing (consumed by the kanban orchestrator)
# ---------------------------------------------------------------------------
class DescribeBody(BaseModel):
description: Optional[str] = None # explicit user-authored text
class DescribeAutoBody(BaseModel):
overwrite: bool = False
@router.get("/profiles")
def list_profile_roster():
"""Return every installed profile with its description.
Consumed by the dashboard's settings panel (orchestrator picker)
and the profile-description editing UI. Profiles without a
description still appear here they're routable on name alone,
just less precisely.
"""
try:
from hermes_cli import profiles as profiles_mod
profiles = profiles_mod.list_profiles()
except Exception as exc:
raise HTTPException(status_code=500, detail=f"failed to list profiles: {exc}")
return {
"profiles": [
{
"name": p.name,
"is_default": bool(p.is_default),
"model": p.model or "",
"provider": p.provider or "",
"description": p.description or "",
"description_auto": bool(p.description_auto),
"skill_count": int(p.skill_count or 0),
}
for p in profiles
],
}
@router.patch("/profiles/{profile_name}")
def update_profile_description(profile_name: str, payload: DescribeBody):
"""Set or clear the description of a profile.
Empty string clears the description; non-empty stores it as a
user-authored description (``description_auto: false``) so the
auto-describer won't overwrite it on a sweep without
``--overwrite``.
"""
try:
from hermes_cli import profiles as profiles_mod
canon = profiles_mod.normalize_profile_name(profile_name)
if canon == "default":
from hermes_constants import get_hermes_home # type: ignore
from pathlib import Path as _Path
profile_dir = _Path(get_hermes_home())
else:
profile_dir = profiles_mod.get_profile_dir(canon)
if not profile_dir.is_dir():
raise HTTPException(status_code=404, detail=f"profile '{profile_name}' not found")
text = (payload.description or "").strip()
profiles_mod.write_profile_meta(
profile_dir,
description=text,
description_auto=False,
)
except HTTPException:
raise
except Exception as exc:
raise HTTPException(status_code=500, detail=f"failed to update profile: {exc}")
return {"ok": True, "profile": canon, "description": text}
@router.post("/profiles/{profile_name}/describe-auto")
def auto_describe_profile(profile_name: str, payload: DescribeAutoBody):
"""Generate a description for the named profile via the auxiliary
LLM (``auxiliary.profile_describer``). Persists with
``description_auto: true`` so the dashboard can surface a "review"
badge.
Maps 1:1 to ``hermes profile describe <name> --auto``. Non-OK
outcomes are NOT HTTP errors the UI renders the reason inline
(e.g. "no auxiliary client configured") so the operator can fix
config and retry without a page reload.
"""
try:
from hermes_cli import profile_describer # noqa: WPS433 (intentional)
outcome = profile_describer.describe_profile(
profile_name,
overwrite=bool(payload.overwrite),
)
except Exception as exc:
raise HTTPException(status_code=500, detail=f"describer crashed: {exc}")
return {
"ok": bool(outcome.ok),
"profile": outcome.profile_name,
"reason": outcome.reason,
"description": outcome.description,
}
# ---------------------------------------------------------------------------
# Decompose endpoint (orchestrator-driven fan-out)
# ---------------------------------------------------------------------------
class DecomposeBody(BaseModel):
author: Optional[str] = None
@router.post("/tasks/{task_id}/decompose")
def decompose_task_endpoint(
task_id: str,
payload: DecomposeBody,
board: Optional[str] = Query(None),
):
"""Fan a triage-column task out into a graph of child tasks via the
auxiliary LLM, routed to specialist profiles by description. Maps
1:1 to ``hermes kanban decompose <task_id>``.
Returns the outcome shape used by the CLI: ``{ok, task_id, reason,
fanout, child_ids, new_title}``. A non-OK outcome is NOT an HTTP
error the UI renders the reason inline.
Runs in FastAPI's threadpool (sync ``def``) because the LLM call
can take minutes on reasoning models.
"""
board = _resolve_board(board)
prev_env = os.environ.get("HERMES_KANBAN_BOARD")
try:
os.environ["HERMES_KANBAN_BOARD"] = board or kanban_db.DEFAULT_BOARD
from hermes_cli import kanban_decompose # noqa: WPS433 (intentional)
outcome = kanban_decompose.decompose_task(
task_id,
author=(payload.author or None),
)
finally:
if prev_env is None:
os.environ.pop("HERMES_KANBAN_BOARD", None)
else:
os.environ["HERMES_KANBAN_BOARD"] = prev_env
return {
"ok": bool(outcome.ok),
"task_id": outcome.task_id,
"reason": outcome.reason,
"fanout": bool(outcome.fanout),
"child_ids": outcome.child_ids or [],
"new_title": outcome.new_title,
}
# ---------------------------------------------------------------------------
# Orchestration settings (kanban.orchestrator_profile / default_assignee /
# auto_decompose) — surfaced to the dashboard's settings panel
# ---------------------------------------------------------------------------
class OrchestrationSettingsBody(BaseModel):
orchestrator_profile: Optional[str] = None
default_assignee: Optional[str] = None
auto_decompose: Optional[bool] = None
@router.get("/orchestration")
def get_orchestration_settings():
"""Return the current kanban orchestration knobs from config.yaml
plus the resolved effective values (filling in fallbacks)."""
try:
from hermes_cli.config import load_config
cfg = load_config() or {}
except Exception:
cfg = {}
kanban_cfg = (cfg.get("kanban") or {}) if isinstance(cfg, dict) else {}
explicit_orch = (kanban_cfg.get("orchestrator_profile") or "").strip()
explicit_default = (kanban_cfg.get("default_assignee") or "").strip()
auto_decompose = bool(kanban_cfg.get("auto_decompose", True))
# Resolve fallbacks the same way the decomposer does.
resolved_orch = explicit_orch
resolved_default = explicit_default
try:
from hermes_cli import profiles as profiles_mod
active_default = profiles_mod.get_active_profile_name() or "default"
if not resolved_orch or not profiles_mod.profile_exists(resolved_orch):
resolved_orch = active_default
if not resolved_default or not profiles_mod.profile_exists(resolved_default):
resolved_default = active_default
except Exception:
active_default = "default"
if not resolved_orch:
resolved_orch = active_default
if not resolved_default:
resolved_default = active_default
return {
"orchestrator_profile": explicit_orch,
"default_assignee": explicit_default,
"auto_decompose": auto_decompose,
"resolved_orchestrator_profile": resolved_orch,
"resolved_default_assignee": resolved_default,
"active_profile": active_default,
}
@router.put("/orchestration")
def set_orchestration_settings(payload: OrchestrationSettingsBody):
"""Update the kanban orchestration knobs in ~/.hermes/config.yaml.
Each field is optional only fields explicitly passed are
written. ``orchestrator_profile`` / ``default_assignee`` accept
empty strings to clear the override and fall back to the default
profile.
"""
try:
from hermes_cli.config import load_config, save_config
cfg = load_config() or {}
except Exception as exc:
raise HTTPException(status_code=500, detail=f"failed to load config: {exc}")
kanban_section = cfg.setdefault("kanban", {})
if not isinstance(kanban_section, dict):
kanban_section = {}
cfg["kanban"] = kanban_section
# Validate any non-empty profile names exist before saving.
try:
from hermes_cli import profiles as profiles_mod
except Exception:
profiles_mod = None # type: ignore
if payload.orchestrator_profile is not None:
name = (payload.orchestrator_profile or "").strip()
if name and profiles_mod is not None:
try:
if not profiles_mod.profile_exists(name):
raise HTTPException(
status_code=400,
detail=f"profile '{name}' does not exist",
)
except HTTPException:
raise
except Exception:
pass # fail open if the lookup itself errors
kanban_section["orchestrator_profile"] = name
if payload.default_assignee is not None:
name = (payload.default_assignee or "").strip()
if name and profiles_mod is not None:
try:
if not profiles_mod.profile_exists(name):
raise HTTPException(
status_code=400,
detail=f"profile '{name}' does not exist",
)
except HTTPException:
raise
except Exception:
pass
kanban_section["default_assignee"] = name
if payload.auto_decompose is not None:
kanban_section["auto_decompose"] = bool(payload.auto_decompose)
try:
save_config(cfg)
except Exception as exc:
raise HTTPException(status_code=500, detail=f"failed to save config: {exc}")
# Echo back the resolved state (callers usually re-render from it).
return get_orchestration_settings()
@router.websocket("/events")
async def stream_events(ws: WebSocket):
# Enforce the dashboard session token as a query param — browsers can't

View file

@ -263,7 +263,7 @@ class ByteRoverMemoryProvider(MemoryProvider):
def on_memory_write(self, action: str, target: str, content: str) -> None:
"""Mirror built-in memory writes to ByteRover."""
if action not in ("add", "replace") or not content:
if action not in {"add", "replace"} or not content:
return
def _write():
@ -289,7 +289,7 @@ class ByteRoverMemoryProvider(MemoryProvider):
for msg in messages[-10:]: # last 10 messages
role = msg.get("role", "")
content = msg.get("content", "")
if isinstance(content, str) and content.strip() and role in ("user", "assistant"):
if isinstance(content, str) and content.strip() and role in {"user", "assistant"}:
parts.append(f"{role}: {content[:500]}")
if not parts:

View file

@ -416,7 +416,7 @@ def _build_embedded_profile_env(config: dict[str, Any], *, llm_api_key: str | No
current_base_url = config.get("llm_base_url") or os.environ.get("HINDSIGHT_API_LLM_BASE_URL", "")
# The embedded daemon expects OpenAI wire format for these providers.
daemon_provider = "openai" if current_provider in ("openai_compatible", "openrouter") else current_provider
daemon_provider = "openai" if current_provider in {"openai_compatible", "openrouter"} else current_provider
env_values = {
"HINDSIGHT_API_LLM_PROVIDER": str(daemon_provider),
@ -596,7 +596,7 @@ class HindsightMemoryProvider(MemoryProvider):
try:
cfg = _load_config()
mode = cfg.get("mode", "cloud")
if mode in ("local", "local_embedded"):
if mode in {"local", "local_embedded"}:
available, _ = _check_local_runtime()
return available
if mode == "local_external":
@ -888,7 +888,7 @@ class HindsightMemoryProvider(MemoryProvider):
from hindsight import HindsightEmbedded
HindsightEmbedded.__del__ = lambda self: None
llm_provider = self._config.get("llm_provider", "")
if llm_provider in ("openai_compatible", "openrouter"):
if llm_provider in {"openai_compatible", "openrouter"}:
llm_provider = "openai"
logger.debug("Creating HindsightEmbedded client (profile=%s, provider=%s)",
self._config.get("profile", "hermes"), llm_provider)
@ -1132,7 +1132,7 @@ class HindsightMemoryProvider(MemoryProvider):
self._mode = "disabled"
return
self._api_key = self._config.get("apiKey") or self._config.get("api_key") or os.environ.get("HINDSIGHT_API_KEY", "")
default_url = _DEFAULT_LOCAL_URL if self._mode in ("local_embedded", "local_external") else _DEFAULT_API_URL
default_url = _DEFAULT_LOCAL_URL if self._mode in {"local_embedded", "local_external"} else _DEFAULT_API_URL
self._api_url = self._config.get("api_url") or os.environ.get("HINDSIGHT_API_URL", default_url)
self._llm_base_url = self._config.get("llm_base_url", "")
@ -1152,10 +1152,10 @@ class HindsightMemoryProvider(MemoryProvider):
self._budget = budget if budget in _VALID_BUDGETS else "mid"
memory_mode = self._config.get("memory_mode", "hybrid")
self._memory_mode = memory_mode if memory_mode in ("context", "tools", "hybrid") else "hybrid"
self._memory_mode = memory_mode if memory_mode in {"context", "tools", "hybrid"} else "hybrid"
prefetch_method = self._config.get("recall_prefetch_method") or self._config.get("prefetch_method", "recall")
self._prefetch_method = prefetch_method if prefetch_method in ("recall", "reflect") else "recall"
self._prefetch_method = prefetch_method if prefetch_method in {"recall", "reflect"} else "recall"
# Bank options
self._bank_mission = self._config.get("bank_mission", "")

Some files were not shown because too many files have changed in this diff Show more