mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
Merge branch 'main' of github.com:NousResearch/hermes-agent into bb/gui
# Conflicts: # cli.py # hermes_cli/main.py # run_agent.py # tests/hermes_cli/test_cmd_update.py # tools/mcp_tool.py # web/src/lib/gatewayClient.ts
This commit is contained in:
commit
02aaac8f73
260 changed files with 24547 additions and 13573 deletions
18
Dockerfile
18
Dockerfile
|
|
@ -66,9 +66,11 @@ RUN npm install --prefer-offline --no-audit && \
|
|||
# frontend stats the readme path during dep resolution, so we `touch` an
|
||||
# empty placeholder — the real README is restored by `COPY . .` below.
|
||||
#
|
||||
# `uv sync --frozen --no-install-project --extra all` installs only the
|
||||
# deps reachable through the composite `[all]` extra (handpicked set
|
||||
# intended for the production image). We do NOT use `--all-extras`:
|
||||
# `uv sync --frozen --no-install-project --extra all --extra messaging`
|
||||
# installs the deps reachable through the composite `[all]` extra
|
||||
# (handpicked set intended for the production image), plus gateway
|
||||
# messaging adapters that should work in the published image without a
|
||||
# first-boot lazy install. We do NOT use `--all-extras`:
|
||||
# that would pull in `[rl]` (atroposlib + tinker + torch + wandb from
|
||||
# git), `[yc-bench]` (another git dep), and `[termux-all]` (Android
|
||||
# redundancy), none of which belong in the published container.
|
||||
|
|
@ -76,7 +78,7 @@ RUN npm install --prefer-offline --no-audit && \
|
|||
# The editable link is created after the source copy below.
|
||||
COPY pyproject.toml uv.lock ./
|
||||
RUN touch ./README.md
|
||||
RUN uv sync --frozen --no-install-project --extra all
|
||||
RUN uv sync --frozen --no-install-project --extra all --extra messaging
|
||||
|
||||
# ---------- Source code ----------
|
||||
# .dockerignore excludes node_modules, so the installs above survive.
|
||||
|
|
@ -94,10 +96,10 @@ RUN cd web && npm run build && \
|
|||
# hermes_cli/main.py succeeds (see #18800). /opt/hermes/web is build-time
|
||||
# only (HERMES_WEB_DIST points at hermes_cli/web_dist) and is intentionally
|
||||
# not chowned here.
|
||||
# The .venv MUST be hermes-writable so lazy_deps.py can install platform
|
||||
# packages (discord.py, telegram, slack, etc.) at first gateway boot.
|
||||
# Without this, `uv pip install` fails with EACCES and all messaging
|
||||
# adapters silently fail to load. See tools/lazy_deps.py.
|
||||
# The .venv MUST remain hermes-writable so lazy_deps.py can install
|
||||
# remaining optional platform packages and future pin bumps at first use.
|
||||
# Without this, `uv pip install` fails with EACCES and adapters silently
|
||||
# fail to load. See tools/lazy_deps.py.
|
||||
USER root
|
||||
RUN chmod -R a+rX /opt/hermes && \
|
||||
chown -R hermes:hermes /opt/hermes/.venv /opt/hermes/ui-tui /opt/hermes/node_modules
|
||||
|
|
|
|||
|
|
@ -1123,7 +1123,6 @@ def build_tool_start(
|
|||
)
|
||||
|
||||
# Generic fallback
|
||||
import json
|
||||
try:
|
||||
args_text = json.dumps(arguments, indent=2, default=str)
|
||||
except (TypeError, ValueError):
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"id": "hermes-agent",
|
||||
"name": "Hermes Agent",
|
||||
"version": "0.13.0",
|
||||
"version": "0.14.0",
|
||||
"description": "Self-improving open-source AI agent by Nous Research with ACP editor integration, persistent memory, skills, and rich tool support.",
|
||||
"repository": "https://github.com/NousResearch/hermes-agent",
|
||||
"website": "https://hermes-agent.nousresearch.com/docs/user-guide/features/acp",
|
||||
|
|
@ -9,7 +9,7 @@
|
|||
"license": "MIT",
|
||||
"distribution": {
|
||||
"uvx": {
|
||||
"package": "hermes-agent[acp]==0.13.0",
|
||||
"package": "hermes-agent[acp]==0.14.0",
|
||||
"args": ["hermes-acp"]
|
||||
}
|
||||
}
|
||||
|
|
|
|||
1469
agent/agent_init.py
Normal file
1469
agent/agent_init.py
Normal file
File diff suppressed because it is too large
Load diff
2134
agent/agent_runtime_helpers.py
Normal file
2134
agent/agent_runtime_helpers.py
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -755,7 +755,8 @@ class _CodexCompletionsAdapter:
|
|||
|
||||
def _check_cancelled() -> None:
|
||||
if deadline is not None and time.monotonic() >= deadline:
|
||||
timed_out.set()
|
||||
if not timed_out.is_set():
|
||||
_close_client_on_timeout()
|
||||
raise TimeoutError(_timeout_message())
|
||||
try:
|
||||
from tools.interrupt import is_interrupted
|
||||
|
|
@ -1233,7 +1234,7 @@ def _read_nous_auth() -> Optional[dict]:
|
|||
|
||||
|
||||
def _nous_api_key(provider: dict) -> str:
|
||||
"""Extract the best API key from a Nous provider state dict."""
|
||||
"""Extract the Nous runtime credential from the compatibility field."""
|
||||
return provider.get("agent_key") or provider.get("access_token", "")
|
||||
|
||||
|
||||
|
|
@ -1246,17 +1247,25 @@ def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[
|
|||
"""Return fresh Nous runtime credentials when available.
|
||||
|
||||
This mirrors the main agent's 401 recovery path and keeps auxiliary
|
||||
clients aligned with the singleton auth store + mint flow instead of
|
||||
clients aligned with the singleton auth store + JWT/mint flow instead of
|
||||
relying only on whatever raw tokens happen to be sitting in auth.json
|
||||
or the credential pool.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.auth import resolve_nous_runtime_credentials
|
||||
from hermes_cli.auth import (
|
||||
NOUS_INFERENCE_AUTH_MODE_AUTO,
|
||||
NOUS_INFERENCE_AUTH_MODE_LEGACY,
|
||||
resolve_nous_runtime_credentials,
|
||||
)
|
||||
|
||||
creds = resolve_nous_runtime_credentials(
|
||||
min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
|
||||
timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
|
||||
force_mint=force_refresh,
|
||||
inference_auth_mode=(
|
||||
NOUS_INFERENCE_AUTH_MODE_LEGACY
|
||||
if force_refresh
|
||||
else NOUS_INFERENCE_AUTH_MODE_AUTO
|
||||
),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("Auxiliary Nous runtime credential resolution failed: %s", exc)
|
||||
|
|
@ -1473,7 +1482,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
|||
|
||||
|
||||
|
||||
def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
def _try_openrouter(explicit_api_key: str = None, model: str = None) -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
pool_present, entry = _select_pool_entry("openrouter")
|
||||
if pool_present:
|
||||
or_key = explicit_api_key or _pool_runtime_api_key(entry)
|
||||
|
|
@ -1483,7 +1492,7 @@ def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Opt
|
|||
base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
|
||||
logger.debug("Auxiliary client: OpenRouter via pool")
|
||||
return OpenAI(api_key=or_key, base_url=base_url,
|
||||
default_headers=build_or_headers()), _OPENROUTER_MODEL
|
||||
default_headers=build_or_headers()), model or _OPENROUTER_MODEL
|
||||
|
||||
or_key = explicit_api_key or os.getenv("OPENROUTER_API_KEY")
|
||||
if not or_key:
|
||||
|
|
@ -1491,7 +1500,7 @@ def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Opt
|
|||
return None, None
|
||||
logger.debug("Auxiliary client: OpenRouter")
|
||||
return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
|
||||
default_headers=build_or_headers()), _OPENROUTER_MODEL
|
||||
default_headers=build_or_headers()), model or _OPENROUTER_MODEL
|
||||
|
||||
|
||||
def _describe_openrouter_unavailable() -> str:
|
||||
|
|
@ -2087,7 +2096,13 @@ def _is_payment_error(exc: Exception) -> bool:
|
|||
"""Detect payment/credit/quota exhaustion errors.
|
||||
|
||||
Returns True for HTTP 402 (Payment Required) and for 429/other errors
|
||||
whose message indicates billing exhaustion rather than rate limiting.
|
||||
whose message indicates billing exhaustion or daily quota exhaustion
|
||||
rather than transient rate limiting.
|
||||
|
||||
Daily token quota errors (e.g. Bedrock "Too many tokens per day",
|
||||
Vertex AI "quota exceeded") are functionally equivalent to credit
|
||||
exhaustion — the provider cannot serve the request until the quota
|
||||
resets — and should trigger the same provider-fallback logic.
|
||||
"""
|
||||
status = getattr(exc, "status_code", None)
|
||||
if status == 402:
|
||||
|
|
@ -2095,10 +2110,19 @@ def _is_payment_error(exc: Exception) -> bool:
|
|||
err_lower = str(exc).lower()
|
||||
# OpenRouter and other providers include "credits" or "afford" in 402 bodies,
|
||||
# but sometimes wrap them in 429 or other codes.
|
||||
# Daily quota exhaustion from Bedrock, Vertex AI, and similar providers
|
||||
# uses different language but is semantically identical to credit exhaustion.
|
||||
if status in {402, 429, None}:
|
||||
if any(kw in err_lower for kw in ("credits", "insufficient funds",
|
||||
"can only afford", "billing",
|
||||
"payment required")):
|
||||
if any(kw in err_lower for kw in (
|
||||
"credits", "insufficient funds",
|
||||
"can only afford", "billing",
|
||||
"payment required",
|
||||
# Daily / monthly quota exhaustion keywords
|
||||
"quota exceeded", "quota_exceeded",
|
||||
"too many tokens per day", "daily limit",
|
||||
"tokens per day", "daily quota",
|
||||
"resource exhausted", # Vertex AI / gRPC quota errors
|
||||
)):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
|
@ -2500,12 +2524,15 @@ def _refresh_provider_credentials(provider: str) -> bool:
|
|||
_evict_cached_clients(normalized)
|
||||
return True
|
||||
if normalized == "nous":
|
||||
from hermes_cli.auth import resolve_nous_runtime_credentials
|
||||
from hermes_cli.auth import (
|
||||
NOUS_INFERENCE_AUTH_MODE_LEGACY,
|
||||
resolve_nous_runtime_credentials,
|
||||
)
|
||||
|
||||
creds = resolve_nous_runtime_credentials(
|
||||
min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
|
||||
timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
|
||||
force_mint=True,
|
||||
inference_auth_mode=NOUS_INFERENCE_AUTH_MODE_LEGACY,
|
||||
)
|
||||
if not str(creds.get("api_key", "") or "").strip():
|
||||
return False
|
||||
|
|
@ -2579,6 +2606,133 @@ def _try_payment_fallback(
|
|||
return None, None, ""
|
||||
|
||||
|
||||
def _try_main_agent_model_fallback(
|
||||
failed_provider: str,
|
||||
task: str = None,
|
||||
reason: str = "error",
|
||||
) -> Tuple[Optional[Any], Optional[str], str]:
|
||||
"""Last-resort fallback to the user's main agent provider + model.
|
||||
|
||||
Used after the configured fallback_chain is exhausted (or empty) for
|
||||
users with an explicit auxiliary provider. This is the "safety net"
|
||||
layer: if nothing the user asked for can serve the request, try the
|
||||
main chat model before giving up.
|
||||
|
||||
Skips when the failed provider already IS the main provider (no point
|
||||
retrying the same backend that just failed).
|
||||
|
||||
Returns:
|
||||
(client, model, provider_label) or (None, None, "") if no fallback.
|
||||
"""
|
||||
main_provider = (_read_main_provider() or "").strip()
|
||||
main_model = (_read_main_model() or "").strip()
|
||||
if not main_provider or not main_model or main_provider.lower() in {"auto", ""}:
|
||||
return None, None, ""
|
||||
|
||||
skip = (failed_provider or "").lower().strip()
|
||||
if main_provider.lower() == skip:
|
||||
# The thing that failed IS the main model — nothing to fall back to.
|
||||
return None, None, ""
|
||||
if _is_provider_unhealthy(main_provider):
|
||||
_log_skip_unhealthy(main_provider, task)
|
||||
return None, None, ""
|
||||
|
||||
try:
|
||||
client, resolved_model = resolve_provider_client(
|
||||
provider=main_provider, model=main_model,
|
||||
)
|
||||
except Exception:
|
||||
client, resolved_model = None, None
|
||||
|
||||
if client is None:
|
||||
return None, None, ""
|
||||
|
||||
label = f"main-agent({main_provider})"
|
||||
logger.info(
|
||||
"Auxiliary %s: %s on %s — falling back to main agent model %s (%s)",
|
||||
task or "call", reason, failed_provider, label, resolved_model or main_model,
|
||||
)
|
||||
return client, resolved_model or main_model, label
|
||||
|
||||
|
||||
def _try_configured_fallback_chain(
|
||||
task: str,
|
||||
failed_provider: str,
|
||||
reason: str = "error",
|
||||
) -> Tuple[Optional[Any], Optional[str], str]:
|
||||
"""Try user-configured fallback_chain for a specific auxiliary task.
|
||||
|
||||
Reads auxiliary.<task>.fallback_chain from config.yaml and tries each
|
||||
entry in order. Each entry must have at least ``provider``; ``model``,
|
||||
``base_url``, and ``api_key`` are optional.
|
||||
|
||||
Returns:
|
||||
(client, model, provider_label) or (None, None, "") if no fallback.
|
||||
"""
|
||||
if not task:
|
||||
return None, None, ""
|
||||
|
||||
task_config = _get_auxiliary_task_config(task)
|
||||
chain = task_config.get("fallback_chain")
|
||||
if not chain or not isinstance(chain, list):
|
||||
return None, None, ""
|
||||
|
||||
skip = failed_provider.lower().strip()
|
||||
tried = []
|
||||
|
||||
for i, entry in enumerate(chain):
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
fb_provider = str(entry.get("provider", "")).strip()
|
||||
if not fb_provider or fb_provider.lower() == skip:
|
||||
continue
|
||||
fb_model = str(entry.get("model", "")).strip() or None
|
||||
fb_base_url = str(entry.get("base_url", "")).strip() or None
|
||||
fb_api_key = str(entry.get("api_key", "")).strip() or None
|
||||
|
||||
label = f"fallback_chain[{i}]({fb_provider})"
|
||||
|
||||
try:
|
||||
fb_client = _resolve_single_provider(
|
||||
fb_provider, fb_model, fb_base_url, fb_api_key)
|
||||
except Exception:
|
||||
fb_client = None
|
||||
|
||||
if fb_client is not None:
|
||||
logger.info(
|
||||
"Auxiliary %s: %s on %s — configured fallback to %s (%s)",
|
||||
task, reason, failed_provider, label, fb_model or "default",
|
||||
)
|
||||
return fb_client, fb_model, label
|
||||
tried.append(label)
|
||||
|
||||
if tried:
|
||||
logger.debug(
|
||||
"Auxiliary %s: configured fallback_chain exhausted (tried: %s)",
|
||||
task, ", ".join(tried),
|
||||
)
|
||||
return None, None, ""
|
||||
|
||||
|
||||
def _resolve_single_provider(
|
||||
provider: str,
|
||||
model: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
) -> Optional[Any]:
|
||||
"""Resolve a single provider entry from fallback_chain to an OpenAI client.
|
||||
|
||||
Uses the existing provider resolution infrastructure where possible.
|
||||
"""
|
||||
# Reuse resolve_provider_client which handles provider→client mapping
|
||||
client, resolved_model = resolve_provider_client(
|
||||
provider=provider,
|
||||
model=model,
|
||||
base_url=base_url,
|
||||
api_key=api_key,
|
||||
)
|
||||
return client
|
||||
|
||||
def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
"""Full auto-detection chain.
|
||||
|
||||
|
|
@ -3049,10 +3203,17 @@ def resolve_provider_client(
|
|||
if custom_entry:
|
||||
custom_base = custom_entry.get("base_url", "").strip()
|
||||
custom_key = custom_entry.get("api_key", "").strip()
|
||||
custom_key_env = custom_entry.get("key_env", "").strip()
|
||||
custom_key_env = (custom_entry.get("key_env") or custom_entry.get("api_key_env") or "").strip()
|
||||
if not custom_key and custom_key_env:
|
||||
custom_key = os.getenv(custom_key_env, "").strip()
|
||||
custom_key = custom_key or "no-key-required"
|
||||
if custom_key == "no-key-required":
|
||||
logger.warning(
|
||||
"resolve_provider_client: named custom provider %r has no resolvable "
|
||||
"api_key — request will be sent with placeholder no-key-required "
|
||||
"and will 401 on auth-required endpoints",
|
||||
custom_entry.get("name") or provider,
|
||||
)
|
||||
# An explicit per-task api_mode override (from _resolve_task_provider_model)
|
||||
# wins; otherwise fall back to what the provider entry declared.
|
||||
entry_api_mode = (api_mode or custom_entry.get("api_mode") or "").strip()
|
||||
|
|
@ -3400,7 +3561,7 @@ def _resolve_strict_vision_backend(
|
|||
if provider == "copilot":
|
||||
return resolve_provider_client("copilot", model, is_vision=True)
|
||||
if provider == "openrouter":
|
||||
return _try_openrouter()
|
||||
return _try_openrouter(model=model)
|
||||
if provider == "nous":
|
||||
return _try_nous(vision=True)
|
||||
if provider == "openai-codex":
|
||||
|
|
@ -4519,11 +4680,17 @@ def call_llm(
|
|||
or _is_connection_error(first_err)
|
||||
or _is_rate_limit_error(first_err)
|
||||
)
|
||||
# Only try alternative providers when the user didn't explicitly
|
||||
# configure this task's provider. Explicit provider = hard constraint;
|
||||
# auto (the default) = best-effort fallback chain. (#7559)
|
||||
# Respect explicit provider choice for transient errors (auth, request
|
||||
# validation, etc.) but allow fallback when the provider clearly cannot
|
||||
# serve the request due to capacity: payment/quota exhaustion and
|
||||
# connection failures are capacity problems, not request constraints.
|
||||
# See #26803: daily token quota (429 + "too many tokens per day") must
|
||||
# fall back just like a 402 credit error.
|
||||
is_auto = resolved_provider in {"auto", "", None}
|
||||
if should_fallback and is_auto:
|
||||
# Capacity errors bypass the explicit-provider gate: the provider
|
||||
# literally cannot serve this request regardless of user intent.
|
||||
is_capacity_error = _is_payment_error(first_err) or _is_connection_error(first_err)
|
||||
if should_fallback and (is_auto or is_capacity_error):
|
||||
if _is_payment_error(first_err):
|
||||
reason = "payment error"
|
||||
# Resolve the actual provider label (resolved_provider may be
|
||||
|
|
@ -4539,8 +4706,24 @@ def call_llm(
|
|||
reason = "connection error"
|
||||
logger.info("Auxiliary %s: %s on %s (%s), trying fallback",
|
||||
task or "call", reason, resolved_provider, first_err)
|
||||
fb_client, fb_model, fb_label = _try_payment_fallback(
|
||||
resolved_provider, task, reason=reason)
|
||||
|
||||
# Fallback order (#26882, #26803):
|
||||
# 1. User-configured fallback_chain (per-task) if set
|
||||
# 2. Main agent model (last-resort safety net)
|
||||
# For auto users (no explicit aux provider), use the full
|
||||
# auto-detection chain instead — its Step 1 IS the main agent
|
||||
# model, so users on `auto` already get main-model fallback.
|
||||
fb_client, fb_model, fb_label = (None, None, "")
|
||||
if is_auto:
|
||||
fb_client, fb_model, fb_label = _try_payment_fallback(
|
||||
resolved_provider, task, reason=reason)
|
||||
else:
|
||||
fb_client, fb_model, fb_label = _try_configured_fallback_chain(
|
||||
task, resolved_provider or "auto", reason=reason)
|
||||
if fb_client is None:
|
||||
fb_client, fb_model, fb_label = _try_main_agent_model_fallback(
|
||||
resolved_provider, task, reason=reason)
|
||||
|
||||
if fb_client is not None:
|
||||
fb_kwargs = _build_call_kwargs(
|
||||
fb_label, fb_model, messages,
|
||||
|
|
@ -4550,6 +4733,14 @@ def call_llm(
|
|||
base_url=str(getattr(fb_client, "base_url", "") or ""))
|
||||
return _validate_llm_response(
|
||||
fb_client.chat.completions.create(**fb_kwargs), task)
|
||||
# All fallback layers exhausted — emit a single user-visible
|
||||
# warning so the operator knows aux task is about to fail.
|
||||
# (#26882) The error itself is re-raised below.
|
||||
logger.warning(
|
||||
"Auxiliary %s: %s on %s and all fallbacks exhausted "
|
||||
"(fallback_chain + main agent model). Raising original error.",
|
||||
task or "call", reason, resolved_provider,
|
||||
)
|
||||
# Connection/timeout errors leave the cached client poisoned (closed
|
||||
# httpx transport, half-read stream, dead async loop). Drop it from
|
||||
# the cache regardless of whether we found a fallback above so the
|
||||
|
|
@ -4851,8 +5042,12 @@ async def async_call_llm(
|
|||
or _is_connection_error(first_err)
|
||||
or _is_rate_limit_error(first_err)
|
||||
)
|
||||
# Capacity errors (payment/quota/connection) bypass the explicit-provider
|
||||
# gate — the provider cannot serve the request regardless of user intent.
|
||||
# See #26803: daily token quota must fall back like a 402 credit error.
|
||||
is_auto = resolved_provider in {"auto", "", None}
|
||||
if should_fallback and is_auto:
|
||||
is_capacity_error = _is_payment_error(first_err) or _is_connection_error(first_err)
|
||||
if should_fallback and (is_auto or is_capacity_error):
|
||||
if _is_payment_error(first_err):
|
||||
reason = "payment error"
|
||||
_mark_provider_unhealthy(
|
||||
|
|
@ -4864,8 +5059,23 @@ async def async_call_llm(
|
|||
reason = "connection error"
|
||||
logger.info("Auxiliary %s (async): %s on %s (%s), trying fallback",
|
||||
task or "call", reason, resolved_provider, first_err)
|
||||
fb_client, fb_model, fb_label = _try_payment_fallback(
|
||||
resolved_provider, task, reason=reason)
|
||||
|
||||
# Fallback order (#26882, #26803):
|
||||
# 1. User-configured fallback_chain (per-task) if set
|
||||
# 2. Main agent model (last-resort safety net)
|
||||
# Auto users get the full auto-detection chain instead — its
|
||||
# Step 1 IS the main agent model.
|
||||
fb_client, fb_model, fb_label = (None, None, "")
|
||||
if is_auto:
|
||||
fb_client, fb_model, fb_label = _try_payment_fallback(
|
||||
resolved_provider, task, reason=reason)
|
||||
else:
|
||||
fb_client, fb_model, fb_label = _try_configured_fallback_chain(
|
||||
task, resolved_provider or "auto", reason=reason)
|
||||
if fb_client is None:
|
||||
fb_client, fb_model, fb_label = _try_main_agent_model_fallback(
|
||||
resolved_provider, task, reason=reason)
|
||||
|
||||
if fb_client is not None:
|
||||
fb_kwargs = _build_call_kwargs(
|
||||
fb_label, fb_model, messages,
|
||||
|
|
@ -4881,6 +5091,12 @@ async def async_call_llm(
|
|||
fb_kwargs["model"] = async_fb_model
|
||||
return _validate_llm_response(
|
||||
await async_fb.chat.completions.create(**fb_kwargs), task)
|
||||
# All fallback layers exhausted — warn before re-raising. (#26882)
|
||||
logger.warning(
|
||||
"Auxiliary %s (async): %s on %s and all fallbacks exhausted "
|
||||
"(fallback_chain + main agent model). Raising original error.",
|
||||
task or "call", reason, resolved_provider,
|
||||
)
|
||||
# Mirror the sync path: drop poisoned clients on connection/timeout
|
||||
# so the next aux call rebuilds. See issue #23432.
|
||||
if _is_connection_error(first_err):
|
||||
|
|
|
|||
570
agent/background_review.py
Normal file
570
agent/background_review.py
Normal file
|
|
@ -0,0 +1,570 @@
|
|||
"""Background memory/skill review — fork the agent to evaluate the turn.
|
||||
|
||||
After every turn, ``AIAgent.run_conversation`` may call
|
||||
:func:`spawn_background_review` to fire off a daemon thread that replays
|
||||
the conversation snapshot in a forked :class:`AIAgent` and asks itself
|
||||
"should any skill/memory be saved or updated?". Writes go straight to
|
||||
the memory + skill stores. Main conversation and prompt cache are never
|
||||
touched.
|
||||
|
||||
The fork inherits the parent's live runtime (provider, model, base_url,
|
||||
credentials, cached system prompt) so it hits the same prefix cache and
|
||||
uses the same auth. It runs with a tool whitelist limited to memory and
|
||||
skill management tools; everything else is denied at runtime.
|
||||
|
||||
See the ``hermes-agent-dev`` skill (``references/self-improvement-loop.md``)
|
||||
for invariants and PR review criteria.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Review-prompt strings — used by ``spawn_background_review_thread`` to build
|
||||
# the user-message that the forked review agent receives. AIAgent exposes
|
||||
# them as class attributes (``_MEMORY_REVIEW_PROMPT`` etc.) for back-compat;
|
||||
# the actual text lives here so future edits are one-place.
|
||||
_MEMORY_REVIEW_PROMPT = (
|
||||
"Review the conversation above and consider saving to memory if appropriate.\n\n"
|
||||
"Focus on:\n"
|
||||
"1. Has the user revealed things about themselves — their persona, desires, "
|
||||
"preferences, or personal details worth remembering?\n"
|
||||
"2. Has the user expressed expectations about how you should behave, their work "
|
||||
"style, or ways they want you to operate?\n\n"
|
||||
"If something stands out, save it using the memory tool. "
|
||||
"If nothing is worth saving, just say 'Nothing to save.' and stop."
|
||||
)
|
||||
|
||||
_SKILL_REVIEW_PROMPT = (
|
||||
"Review the conversation above and update the skill library. Be "
|
||||
"ACTIVE — most sessions produce at least one skill update, even if "
|
||||
"small. A pass that does nothing is a missed learning opportunity, "
|
||||
"not a neutral outcome.\n\n"
|
||||
"Target shape of the library: CLASS-LEVEL skills, each with a rich "
|
||||
"SKILL.md and a `references/` directory for session-specific detail. "
|
||||
"Not a long flat list of narrow one-session-one-skill entries. This "
|
||||
"shapes HOW you update, not WHETHER you update.\n\n"
|
||||
"Signals to look for (any one of these warrants action):\n"
|
||||
" • User corrected your style, tone, format, legibility, or "
|
||||
"verbosity. Frustration signals like 'stop doing X', 'this is too "
|
||||
"verbose', 'don't format like this', 'why are you explaining', "
|
||||
"'just give me the answer', 'you always do Y and I hate it', or an "
|
||||
"explicit 'remember this' are FIRST-CLASS skill signals, not just "
|
||||
"memory signals. Update the relevant skill(s) to embed the "
|
||||
"preference so the next session starts already knowing.\n"
|
||||
" • User corrected your workflow, approach, or sequence of steps. "
|
||||
"Encode the correction as a pitfall or explicit step in the skill "
|
||||
"that governs that class of task.\n"
|
||||
" • Non-trivial technique, fix, workaround, debugging path, or "
|
||||
"tool-usage pattern emerged that a future session would benefit "
|
||||
"from. Capture it.\n"
|
||||
" • A skill that got loaded or consulted this session turned out "
|
||||
"to be wrong, missing a step, or outdated. Patch it NOW.\n\n"
|
||||
"Preference order — prefer the earliest action that fits, but do "
|
||||
"pick one when a signal above fired:\n"
|
||||
" 1. UPDATE A CURRENTLY-LOADED SKILL. Look back through the "
|
||||
"conversation for skills the user loaded via /skill-name or you "
|
||||
"read via skill_view. If any of them covers the territory of the "
|
||||
"new learning, PATCH that one first. It is the skill that was in "
|
||||
"play, so it's the right one to extend.\n"
|
||||
" 2. UPDATE AN EXISTING UMBRELLA (via skills_list + skill_view). "
|
||||
"If no loaded skill fits but an existing class-level skill does, "
|
||||
"patch it. Add a subsection, a pitfall, or broaden a trigger.\n"
|
||||
" 3. ADD A SUPPORT FILE under an existing umbrella. Skills can be "
|
||||
"packaged with three kinds of support files — use the right "
|
||||
"directory per kind:\n"
|
||||
" • `references/<topic>.md` — session-specific detail (error "
|
||||
"transcripts, reproduction recipes, provider quirks) AND "
|
||||
"condensed knowledge banks: quoted research, API docs, external "
|
||||
"authoritative excerpts, or domain notes you found while working "
|
||||
"on the problem. Write it concise and for the value of the task, "
|
||||
"not as a full mirror of upstream docs.\n"
|
||||
" • `templates/<name>.<ext>` — starter files meant to be "
|
||||
"copied and modified (boilerplate configs, scaffolding, a "
|
||||
"known-good example the agent can `reproduce with modifications`).\n"
|
||||
" • `scripts/<name>.<ext>` — statically re-runnable actions "
|
||||
"the skill can invoke directly (verification scripts, fixture "
|
||||
"generators, deterministic probes, anything the agent should run "
|
||||
"rather than hand-type each time).\n"
|
||||
" Add support files via skill_manage action=write_file with "
|
||||
"file_path starting 'references/', 'templates/', or 'scripts/'. "
|
||||
"The umbrella's SKILL.md should gain a one-line pointer to any "
|
||||
"new support file so future agents know it exists.\n"
|
||||
" 4. CREATE A NEW CLASS-LEVEL UMBRELLA SKILL when no existing "
|
||||
"skill covers the class. The name MUST be at the class level. "
|
||||
"The name MUST NOT be a specific PR number, error string, feature "
|
||||
"codename, library-alone name, or 'fix-X / debug-Y / audit-Z-today' "
|
||||
"session artifact. If the proposed name only makes sense for "
|
||||
"today's task, it's wrong — fall back to (1), (2), or (3).\n\n"
|
||||
"User-preference embedding (important): when the user expressed a "
|
||||
"style/format/workflow preference, the update belongs in the "
|
||||
"SKILL.md body, not just in memory. Memory captures 'who the user "
|
||||
"is and what the current situation and state of your operations "
|
||||
"are'; skills capture 'how to do this class of task for this "
|
||||
"user'. When they complain about how you handled a task, the "
|
||||
"skill that governs that task needs to carry the lesson.\n\n"
|
||||
"If you notice two existing skills that overlap, note it in your "
|
||||
"reply — the background curator handles consolidation at scale.\n\n"
|
||||
"Do NOT capture (these become persistent self-imposed constraints "
|
||||
"that bite you later when the environment changes):\n"
|
||||
" • Environment-dependent failures: missing binaries, fresh-install "
|
||||
"errors, post-migration path mismatches, 'command not found', "
|
||||
"unconfigured credentials, uninstalled packages. The user can fix "
|
||||
"these — they are not durable rules.\n"
|
||||
" • Negative claims about tools or features ('browser tools do not "
|
||||
"work', 'X tool is broken', 'cannot use Y from execute_code'). These "
|
||||
"harden into refusals the agent cites against itself for months "
|
||||
"after the actual problem was fixed.\n"
|
||||
" • Session-specific transient errors that resolved before the "
|
||||
"conversation ended. If retrying worked, the lesson is the retry "
|
||||
"pattern, not the original failure.\n"
|
||||
" • One-off task narratives. A user asking 'summarize today's "
|
||||
"market' or 'analyze this PR' is not a class of work that warrants "
|
||||
"a skill.\n\n"
|
||||
"If a tool failed because of setup state, capture the FIX (install "
|
||||
"command, config step, env var to set) under an existing setup or "
|
||||
"troubleshooting skill — never 'this tool does not work' as a "
|
||||
"standalone constraint.\n\n"
|
||||
"'Nothing to save.' is a real option but should NOT be the "
|
||||
"default. If the session ran smoothly with no corrections and "
|
||||
"produced no new technique, just say 'Nothing to save.' and stop. "
|
||||
"Otherwise, act."
|
||||
)
|
||||
|
||||
_COMBINED_REVIEW_PROMPT = (
|
||||
"Review the conversation above and update two things:\n\n"
|
||||
"**Memory**: who the user is. Did the user reveal persona, "
|
||||
"desires, preferences, personal details, or expectations about "
|
||||
"how you should behave? Save facts about the user and durable "
|
||||
"preferences with the memory tool.\n\n"
|
||||
"**Skills**: how to do this class of task. Be ACTIVE — most "
|
||||
"sessions produce at least one skill update. A pass that does "
|
||||
"nothing is a missed learning opportunity, not a neutral outcome.\n\n"
|
||||
"Target shape of the skill library: CLASS-LEVEL skills with a rich "
|
||||
"SKILL.md and a `references/` directory for session-specific detail. "
|
||||
"Not a long flat list of narrow one-session-one-skill entries.\n\n"
|
||||
"Signals that warrant a skill update (any one is enough):\n"
|
||||
" • User corrected your style, tone, format, legibility, "
|
||||
"verbosity, or approach. Frustration is a FIRST-CLASS skill "
|
||||
"signal, not just a memory signal. 'stop doing X', 'don't format "
|
||||
"like this', 'I hate when you Y' — embed the lesson in the skill "
|
||||
"that governs that task so the next session starts fixed.\n"
|
||||
" • Non-trivial technique, fix, workaround, or debugging path "
|
||||
"emerged.\n"
|
||||
" • A skill that was loaded or consulted turned out wrong, "
|
||||
"missing, or outdated — patch it now.\n\n"
|
||||
"Preference order for skills — pick the earliest that fits:\n"
|
||||
" 1. UPDATE A CURRENTLY-LOADED SKILL. Check what skills were "
|
||||
"loaded via /skill-name or skill_view in the conversation. If one "
|
||||
"of them covers the learning, PATCH it first. It was in play; "
|
||||
"it's the right place.\n"
|
||||
" 2. UPDATE AN EXISTING UMBRELLA (skills_list + skill_view to "
|
||||
"find the right one). Patch it.\n"
|
||||
" 3. ADD A SUPPORT FILE under an existing umbrella via "
|
||||
"skill_manage action=write_file. Three kinds: "
|
||||
"`references/<topic>.md` for session-specific detail OR condensed "
|
||||
"knowledge banks (quoted research, API docs excerpts, domain "
|
||||
"notes) written concise and task-focused; `templates/<name>.<ext>` "
|
||||
"for starter files meant to be copied and modified; "
|
||||
"`scripts/<name>.<ext>` for statically re-runnable actions "
|
||||
"(verification, fixture generators, probes). Add a one-line "
|
||||
"pointer in SKILL.md so future agents find them.\n"
|
||||
" 4. CREATE A NEW CLASS-LEVEL UMBRELLA when nothing exists. "
|
||||
"Name at the class level — NOT a PR number, error string, "
|
||||
"codename, library-alone name, or 'fix-X / debug-Y' session "
|
||||
"artifact. If the name only fits today's task, fall back to (1), "
|
||||
"(2), or (3).\n\n"
|
||||
"User-preference embedding: when the user complains about how "
|
||||
"you handled a task, update the skill that governs that task — "
|
||||
"memory alone isn't enough. Memory says 'who the user is and "
|
||||
"what the current situation and state of your operations are'; "
|
||||
"skills say 'how to do this class of task for this user'. Both "
|
||||
"should carry user-preference lessons when relevant.\n\n"
|
||||
"If you notice overlapping existing skills, mention it — the "
|
||||
"background curator handles consolidation.\n\n"
|
||||
"Do NOT capture as skills (these become persistent self-imposed "
|
||||
"constraints that bite you later when the environment changes):\n"
|
||||
" • Environment-dependent failures: missing binaries, fresh-install "
|
||||
"errors, post-migration path mismatches, 'command not found', "
|
||||
"unconfigured credentials, uninstalled packages. The user can fix "
|
||||
"these — they are not durable rules.\n"
|
||||
" • Negative claims about tools or features ('browser tools do not "
|
||||
"work', 'X tool is broken', 'cannot use Y from execute_code'). These "
|
||||
"harden into refusals the agent cites against itself for months "
|
||||
"after the actual problem was fixed.\n"
|
||||
" • Session-specific transient errors that resolved before the "
|
||||
"conversation ended. If retrying worked, the lesson is the retry "
|
||||
"pattern, not the original failure.\n"
|
||||
" • One-off task narratives. A user asking 'summarize today's "
|
||||
"market' or 'analyze this PR' is not a class of work that warrants "
|
||||
"a skill.\n\n"
|
||||
"If a tool failed because of setup state, capture the FIX (install "
|
||||
"command, config step, env var to set) under an existing setup or "
|
||||
"troubleshooting skill — never 'this tool does not work' as a "
|
||||
"standalone constraint.\n\n"
|
||||
"Act on whichever of the two dimensions has real signal. If "
|
||||
"genuinely nothing stands out on either, say 'Nothing to save.' "
|
||||
"and stop — but don't reach for that conclusion as a default."
|
||||
)
|
||||
|
||||
|
||||
|
||||
def summarize_background_review_actions(
|
||||
review_messages: List[Dict],
|
||||
prior_snapshot: List[Dict],
|
||||
) -> List[str]:
|
||||
"""Build the human-facing action summary for a background review pass.
|
||||
|
||||
Walks the review agent's session messages and collects "successful tool
|
||||
action" descriptions to surface to the user (e.g. "Memory updated").
|
||||
Tool messages already present in ``prior_snapshot`` are skipped so we
|
||||
don't re-surface stale results from the prior conversation that the
|
||||
review agent inherited via ``conversation_history`` (issue #14944).
|
||||
|
||||
Matching is by ``tool_call_id`` when available, with a content-equality
|
||||
fallback for tool messages that lack one.
|
||||
"""
|
||||
existing_tool_call_ids = set()
|
||||
existing_tool_contents = set()
|
||||
for prior in prior_snapshot or []:
|
||||
if not isinstance(prior, dict) or prior.get("role") != "tool":
|
||||
continue
|
||||
tcid = prior.get("tool_call_id")
|
||||
if tcid:
|
||||
existing_tool_call_ids.add(tcid)
|
||||
else:
|
||||
content = prior.get("content")
|
||||
if isinstance(content, str):
|
||||
existing_tool_contents.add(content)
|
||||
|
||||
actions: List[str] = []
|
||||
for msg in review_messages or []:
|
||||
if not isinstance(msg, dict) or msg.get("role") != "tool":
|
||||
continue
|
||||
tcid = msg.get("tool_call_id")
|
||||
if tcid and tcid in existing_tool_call_ids:
|
||||
continue
|
||||
if not tcid:
|
||||
content_str = msg.get("content")
|
||||
if isinstance(content_str, str) and content_str in existing_tool_contents:
|
||||
continue
|
||||
try:
|
||||
data = json.loads(msg.get("content", "{}"))
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
continue
|
||||
if not isinstance(data, dict) or not data.get("success"):
|
||||
continue
|
||||
message = data.get("message", "")
|
||||
target = data.get("target", "")
|
||||
if "created" in message.lower():
|
||||
actions.append(message)
|
||||
elif "updated" in message.lower():
|
||||
actions.append(message)
|
||||
elif "added" in message.lower() or (target and "add" in message.lower()):
|
||||
label = "Memory" if target == "memory" else "User profile" if target == "user" else target
|
||||
actions.append(f"{label} updated")
|
||||
elif "Entry added" in message:
|
||||
label = "Memory" if target == "memory" else "User profile" if target == "user" else target
|
||||
actions.append(f"{label} updated")
|
||||
elif "removed" in message.lower() or "replaced" in message.lower():
|
||||
label = "Memory" if target == "memory" else "User profile" if target == "user" else target
|
||||
actions.append(f"{label} updated")
|
||||
return actions
|
||||
|
||||
|
||||
def build_memory_write_metadata(
|
||||
agent: Any,
|
||||
*,
|
||||
write_origin: Optional[str] = None,
|
||||
execution_context: Optional[str] = None,
|
||||
task_id: Optional[str] = None,
|
||||
tool_call_id: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build provenance metadata for external memory-provider mirrors."""
|
||||
metadata: Dict[str, Any] = {
|
||||
"write_origin": write_origin or getattr(agent, "_memory_write_origin", "assistant_tool"),
|
||||
"execution_context": (
|
||||
execution_context
|
||||
or getattr(agent, "_memory_write_context", "foreground")
|
||||
),
|
||||
"session_id": agent.session_id or "",
|
||||
"parent_session_id": agent._parent_session_id or "",
|
||||
"platform": agent.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
|
||||
"tool_name": "memory",
|
||||
}
|
||||
if task_id:
|
||||
metadata["task_id"] = task_id
|
||||
if tool_call_id:
|
||||
metadata["tool_call_id"] = tool_call_id
|
||||
return {k: v for k, v in metadata.items() if v not in {None, ""}}
|
||||
|
||||
|
||||
def _run_review_in_thread(
|
||||
agent: Any,
|
||||
messages_snapshot: List[Dict],
|
||||
prompt: str,
|
||||
) -> None:
|
||||
"""Worker function executed in the background-review daemon thread.
|
||||
|
||||
Spawns a forked ``AIAgent`` inheriting the parent's runtime, runs the
|
||||
review prompt, and surfaces a compact action summary back to the user
|
||||
via ``agent._safe_print`` and ``agent.background_review_callback``.
|
||||
"""
|
||||
# Local import to avoid a hard circular dep at module load.
|
||||
from run_agent import AIAgent
|
||||
from tools.terminal_tool import set_approval_callback as _set_approval_callback
|
||||
|
||||
# Install a non-interactive approval callback on this worker
|
||||
# thread so any dangerous-command guard the review agent trips
|
||||
# resolves to "deny" instead of falling back to input() -- which
|
||||
# deadlocks against the parent's prompt_toolkit TUI (#15216).
|
||||
# Same pattern as _subagent_auto_deny in tools/delegate_tool.py.
|
||||
def _bg_review_auto_deny(command, description, **kwargs):
|
||||
logger.warning(
|
||||
"Background review auto-denied dangerous command: %s (%s)",
|
||||
command, description,
|
||||
)
|
||||
return "deny"
|
||||
try:
|
||||
_set_approval_callback(_bg_review_auto_deny)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
review_agent = None
|
||||
review_messages: List[Dict] = []
|
||||
try:
|
||||
with open(os.devnull, "w", encoding="utf-8") as _devnull, \
|
||||
contextlib.redirect_stdout(_devnull), \
|
||||
contextlib.redirect_stderr(_devnull):
|
||||
# Inherit the parent agent's live runtime (provider, model,
|
||||
# base_url, api_key, api_mode) so the fork uses the exact
|
||||
# same credentials the main turn is using. Without this,
|
||||
# AIAgent.__init__ re-runs auto-resolution from env vars,
|
||||
# which fails for OAuth-only providers, session-scoped
|
||||
# creds, or credential-pool setups where the resolver can't
|
||||
# reconstruct auth from scratch -- producing the spurious
|
||||
# "No LLM provider configured" warning at end of turn.
|
||||
_parent_runtime = agent._current_main_runtime()
|
||||
_parent_api_mode = _parent_runtime.get("api_mode") or None
|
||||
# The review fork needs to call agent-loop tools (memory,
|
||||
# skill_manage). Those tools require Hermes' own dispatch,
|
||||
# which the codex_app_server runtime bypasses entirely
|
||||
# (it runs the turn inside codex's subprocess). So when
|
||||
# the parent is on codex_app_server, downgrade the review
|
||||
# fork to codex_responses — same auth/credentials, but
|
||||
# talks to the OpenAI Responses API directly so Hermes
|
||||
# owns the loop and the agent-loop tools dispatch.
|
||||
if _parent_api_mode == "codex_app_server":
|
||||
_parent_api_mode = "codex_responses"
|
||||
# skip_memory=True keeps the review fork from
|
||||
# touching external memory plugins (honcho, mem0,
|
||||
# supermemory, etc.). Without it, the fork's
|
||||
# __init__ rebuilds its own _memory_manager from
|
||||
# config, scoped to the parent's session_id, and
|
||||
# run_conversation() then leaks the harness prompt
|
||||
# into the user's real memory namespace via three
|
||||
# ingestion sites: on_turn_start (cadence + turn
|
||||
# message), prefetch_all (recall query), and
|
||||
# sync_all (harness prompt + review output recorded
|
||||
# as a (user, assistant) turn pair). Built-in
|
||||
# MEMORY.md / USER.md state is re-bound from the
|
||||
# parent below so memory(action="add") writes from
|
||||
# the review still land on disk; the review just
|
||||
# has zero side effects on external providers.
|
||||
review_agent = AIAgent(
|
||||
model=agent.model,
|
||||
max_iterations=16,
|
||||
quiet_mode=True,
|
||||
platform=agent.platform,
|
||||
provider=agent.provider,
|
||||
api_mode=_parent_api_mode,
|
||||
base_url=_parent_runtime.get("base_url") or None,
|
||||
api_key=_parent_runtime.get("api_key") or None,
|
||||
credential_pool=getattr(agent, "_credential_pool", None),
|
||||
parent_session_id=agent.session_id,
|
||||
skip_memory=True,
|
||||
)
|
||||
review_agent._memory_write_origin = "background_review"
|
||||
review_agent._memory_write_context = "background_review"
|
||||
review_agent._memory_store = agent._memory_store
|
||||
review_agent._memory_enabled = agent._memory_enabled
|
||||
review_agent._user_profile_enabled = agent._user_profile_enabled
|
||||
review_agent._memory_nudge_interval = 0
|
||||
review_agent._skill_nudge_interval = 0
|
||||
# Suppress all status/warning emits from the fork so the
|
||||
# user only sees the final successful-action summary.
|
||||
# Without this, mid-review "Iteration budget exhausted",
|
||||
# rate-limit retries, compression warnings, and other
|
||||
# lifecycle messages bubble up through _emit_status ->
|
||||
# _vprint and leak past the stdout redirect (they go via
|
||||
# _print_fn/status_callback, which bypass sys.stdout).
|
||||
review_agent.suppress_status_output = True
|
||||
# Inherit the parent's cached system prompt verbatim so
|
||||
# the review fork's outbound HTTP request hits the same
|
||||
# Anthropic/OpenRouter prefix cache the parent warmed.
|
||||
# Without this, the fork rebuilds the system prompt from
|
||||
# scratch (fresh _hermes_now() timestamp, fresh
|
||||
# session_id, narrower toolset → different skills_prompt)
|
||||
# and the byte-exact prefix-cache key misses. See
|
||||
# issue #25322 and PR #17276 for the full analysis +
|
||||
# measured impact (~26% end-to-end cost reduction on
|
||||
# Sonnet 4.5).
|
||||
review_agent._cached_system_prompt = agent._cached_system_prompt
|
||||
# Defensive: pin session_start + session_id to the
|
||||
# parent's so any code path that re-renders parts of
|
||||
# the system prompt (compression, plugin hooks) still
|
||||
# produces byte-identical output. The cached-prompt
|
||||
# assignment above already short-circuits the normal
|
||||
# rebuild path, but these pins guarantee parity even
|
||||
# if a future code path bypasses the cache.
|
||||
review_agent.session_start = agent.session_start
|
||||
review_agent.session_id = agent.session_id
|
||||
|
||||
from model_tools import get_tool_definitions
|
||||
from hermes_cli.plugins import (
|
||||
set_thread_tool_whitelist,
|
||||
clear_thread_tool_whitelist,
|
||||
)
|
||||
|
||||
review_whitelist = {
|
||||
t["function"]["name"]
|
||||
for t in get_tool_definitions(
|
||||
enabled_toolsets=["memory", "skills"],
|
||||
quiet_mode=True,
|
||||
)
|
||||
}
|
||||
set_thread_tool_whitelist(
|
||||
review_whitelist,
|
||||
deny_msg_fmt=(
|
||||
"Background review denied non-whitelisted tool: "
|
||||
"{tool_name}. Only memory/skill tools are allowed."
|
||||
),
|
||||
)
|
||||
try:
|
||||
review_agent.run_conversation(
|
||||
user_message=(
|
||||
prompt
|
||||
+ "\n\nYou can only call memory and skill "
|
||||
"management tools. Other tools will be denied "
|
||||
"at runtime — do not attempt them."
|
||||
),
|
||||
conversation_history=messages_snapshot,
|
||||
)
|
||||
finally:
|
||||
clear_thread_tool_whitelist()
|
||||
|
||||
# Tear down memory providers while stdout is still
|
||||
# redirected so background thread teardown (Honcho flush,
|
||||
# Hindsight sync, etc.) stays silent. The finally block
|
||||
# below is a safety net for the exception path.
|
||||
try:
|
||||
review_agent.shutdown_memory_provider()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
review_agent.close()
|
||||
except Exception:
|
||||
pass
|
||||
review_messages = list(getattr(review_agent, "_session_messages", []))
|
||||
review_agent = None
|
||||
|
||||
# Scan the review agent's messages for successful tool actions
|
||||
# and surface a compact summary to the user. Tool messages
|
||||
# already present in messages_snapshot must be skipped, since
|
||||
# the review agent inherits that history and would otherwise
|
||||
# re-surface stale "created"/"updated" messages from the prior
|
||||
# conversation as if they just happened (issue #14944).
|
||||
actions = summarize_background_review_actions(
|
||||
review_messages,
|
||||
messages_snapshot,
|
||||
)
|
||||
|
||||
if actions:
|
||||
summary = " · ".join(dict.fromkeys(actions))
|
||||
agent._safe_print(
|
||||
f" 💾 Self-improvement review: {summary}"
|
||||
)
|
||||
_bg_cb = agent.background_review_callback
|
||||
if _bg_cb:
|
||||
try:
|
||||
_bg_cb(
|
||||
f"💾 Self-improvement review: {summary}"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Background memory/skill review failed: %s", e)
|
||||
agent._emit_auxiliary_failure("background review", e)
|
||||
finally:
|
||||
# Safety-net cleanup for the exception path. Normal
|
||||
# completion already shut down inside redirect_stdout above.
|
||||
# Re-open devnull here so any teardown output (Honcho flush,
|
||||
# Hindsight sync, background thread joins) stays silent even
|
||||
# on the exception path where redirect_stdout already exited.
|
||||
if review_agent is not None:
|
||||
try:
|
||||
with open(os.devnull, "w", encoding="utf-8") as _fn, \
|
||||
contextlib.redirect_stdout(_fn), \
|
||||
contextlib.redirect_stderr(_fn):
|
||||
try:
|
||||
review_agent.shutdown_memory_provider()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
review_agent.close()
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
# Clear the approval callback on this bg-review thread so a
|
||||
# recycled thread-id doesn't inherit a stale reference.
|
||||
try:
|
||||
_set_approval_callback(None)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def spawn_background_review_thread(
|
||||
agent: Any,
|
||||
messages_snapshot: List[Dict],
|
||||
review_memory: bool = False,
|
||||
review_skills: bool = False,
|
||||
):
|
||||
"""Build the review thread target and prompt for a background review.
|
||||
|
||||
Returns a ``(target, prompt)`` tuple. The caller (``AIAgent._spawn_background_review``)
|
||||
owns the actual ``threading.Thread`` construction so test-level patches
|
||||
of ``run_agent.threading.Thread`` keep working.
|
||||
"""
|
||||
# Pick the right prompt based on which triggers fired. Allow per-agent
|
||||
# override (the prompts moved to module-level constants but old code paths
|
||||
# that set agent._MEMORY_REVIEW_PROMPT etc. directly keep working).
|
||||
if review_memory and review_skills:
|
||||
prompt = getattr(agent, "_COMBINED_REVIEW_PROMPT", _COMBINED_REVIEW_PROMPT)
|
||||
elif review_memory:
|
||||
prompt = getattr(agent, "_MEMORY_REVIEW_PROMPT", _MEMORY_REVIEW_PROMPT)
|
||||
else:
|
||||
prompt = getattr(agent, "_SKILL_REVIEW_PROMPT", _SKILL_REVIEW_PROMPT)
|
||||
|
||||
def _target() -> None:
|
||||
_run_review_in_thread(agent, messages_snapshot, prompt)
|
||||
|
||||
return _target, prompt
|
||||
|
||||
|
||||
__all__ = [
|
||||
"_MEMORY_REVIEW_PROMPT",
|
||||
"_SKILL_REVIEW_PROMPT",
|
||||
"_COMBINED_REVIEW_PROMPT",
|
||||
"spawn_background_review_thread",
|
||||
"summarize_background_review_actions",
|
||||
"build_memory_write_metadata",
|
||||
]
|
||||
|
|
@ -36,6 +36,19 @@ from typing import Any, Dict, List, Optional, Tuple
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Ensure boto3/botocore are installed before any code in this module runs.
|
||||
# Upstream removed boto3 from [all] extras (PRs #24220, #24515); lazy_deps
|
||||
# handles on-demand installation so the Bedrock provider still works in the
|
||||
# EKS deployment without baking boto3 into the base image.
|
||||
# ---------------------------------------------------------------------------
|
||||
try:
|
||||
from tools.lazy_deps import ensure
|
||||
ensure("provider.bedrock", prompt=False)
|
||||
except Exception:
|
||||
pass # lazy_deps unavailable or install failed — let downstream imports surface the real error
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Lazy boto3 import — only loaded when the Bedrock provider is actually used.
|
||||
# This keeps startup fast for users who don't use Bedrock.
|
||||
|
|
|
|||
175
agent/browser_provider.py
Normal file
175
agent/browser_provider.py
Normal file
|
|
@ -0,0 +1,175 @@
|
|||
"""
|
||||
Browser Provider ABC
|
||||
====================
|
||||
|
||||
Defines the pluggable-backend interface for cloud browser providers
|
||||
(Browserbase, Browser Use, Firecrawl, …). Providers register instances via
|
||||
:meth:`PluginContext.register_browser_provider`; the active one (selected via
|
||||
``browser.cloud_provider`` in ``config.yaml``) services every cloud-mode
|
||||
``browser_*`` tool call.
|
||||
|
||||
Providers live in ``<repo>/plugins/browser/<name>/`` (built-in, auto-loaded as
|
||||
``kind: backend``) or ``~/.hermes/plugins/browser/<name>/`` (user, opt-in via
|
||||
``plugins.enabled``).
|
||||
|
||||
This ABC mirrors :class:`agent.web_search_provider.WebSearchProvider` (PR
|
||||
#25182) — same shape, same registration flow, same picker integration. The
|
||||
legacy in-tree ``tools.browser_providers.base.CloudBrowserProvider`` ABC was
|
||||
deleted in PR #25214 (this work) along with the per-vendor inline modules in
|
||||
``tools/browser_providers/``; the lifecycle contract documented below is
|
||||
preserved bit-for-bit so the tool wrapper (:mod:`tools.browser_tool`) does
|
||||
not have to translate.
|
||||
|
||||
Session metadata contract (preserved from the legacy ``CloudBrowserProvider``)::
|
||||
|
||||
{
|
||||
"session_name": str, # unique name for agent-browser --session
|
||||
"bb_session_id": str, # provider session ID (for close/cleanup)
|
||||
"cdp_url": str, # CDP websocket URL
|
||||
"features": dict, # feature flags that were enabled
|
||||
"external_call_id": str, # optional, managed-gateway billing key
|
||||
}
|
||||
|
||||
``bb_session_id`` is a legacy key name kept verbatim for backward compat with
|
||||
:mod:`tools.browser_tool` — it holds the provider's session ID regardless of
|
||||
which provider is in use.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
from typing import Any, Dict
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ABC
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class BrowserProvider(abc.ABC):
|
||||
"""Abstract base class for a cloud browser backend.
|
||||
|
||||
Subclasses must implement :meth:`name`, :meth:`is_available`, and the
|
||||
three lifecycle methods: :meth:`create_session`, :meth:`close_session`,
|
||||
:meth:`emergency_cleanup`.
|
||||
|
||||
The lifecycle shape preserves the legacy ``CloudBrowserProvider`` contract
|
||||
bit-for-bit so the dispatcher in :mod:`tools.browser_tool` is a pure
|
||||
registry lookup — no per-provider conditionals, no shape translation.
|
||||
"""
|
||||
|
||||
@property
|
||||
@abc.abstractmethod
|
||||
def name(self) -> str:
|
||||
"""Stable short identifier used in the ``browser.cloud_provider``
|
||||
config key.
|
||||
|
||||
Lowercase, hyphens permitted to preserve existing user-visible names.
|
||||
Examples: ``browserbase``, ``browser-use``, ``firecrawl``.
|
||||
"""
|
||||
|
||||
@property
|
||||
def display_name(self) -> str:
|
||||
"""Human-readable label shown in ``hermes tools``. Defaults to ``name``."""
|
||||
return self.name
|
||||
|
||||
@abc.abstractmethod
|
||||
def is_available(self) -> bool:
|
||||
"""Return True when this provider can service calls.
|
||||
|
||||
Typically a cheap check (env var present, managed-gateway token
|
||||
readable, optional Python dep importable). Must NOT make network
|
||||
calls — this runs at tool-registration time and on every
|
||||
``hermes tools`` paint.
|
||||
|
||||
Mirrors the legacy ``CloudBrowserProvider.is_configured()`` method;
|
||||
renamed for parity with :class:`agent.web_search_provider.WebSearchProvider`.
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def create_session(self, task_id: str) -> Dict[str, object]:
|
||||
"""Create a cloud browser session and return session metadata.
|
||||
|
||||
Must return a dict with at least::
|
||||
|
||||
{
|
||||
"session_name": str, # unique name for agent-browser --session
|
||||
"bb_session_id": str, # provider session ID (for close/cleanup)
|
||||
"cdp_url": str, # CDP websocket URL
|
||||
"features": dict, # feature flags that were enabled
|
||||
}
|
||||
|
||||
``bb_session_id`` is a legacy key name kept for backward compat with
|
||||
the rest of :mod:`tools.browser_tool` — it holds the provider's
|
||||
session ID regardless of which provider is in use.
|
||||
|
||||
May raise ``ValueError`` (missing credentials) or ``RuntimeError``
|
||||
(network / API failure); the dispatcher surfaces these to the user.
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def close_session(self, session_id: str) -> bool:
|
||||
"""Release / terminate a cloud session by its provider session ID.
|
||||
|
||||
Returns True on success, False on failure. Should not raise — log and
|
||||
return False on any exception so the dispatcher's cleanup loop keeps
|
||||
moving across sessions.
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def emergency_cleanup(self, session_id: str) -> None:
|
||||
"""Best-effort session teardown during process exit.
|
||||
|
||||
Called from atexit / signal handlers. Must tolerate missing
|
||||
credentials, network errors, etc. — log and move on. Must not raise.
|
||||
"""
|
||||
|
||||
def get_setup_schema(self) -> Dict[str, Any]:
|
||||
"""Return provider metadata for the ``hermes tools`` picker.
|
||||
|
||||
Used by :mod:`hermes_cli.tools_config` to inject this provider as a
|
||||
row in the Browser Automation picker. Shape mirrors the existing
|
||||
hardcoded entries in ``TOOL_CATEGORIES["browser"]``::
|
||||
|
||||
{
|
||||
"name": "Browserbase",
|
||||
"badge": "paid",
|
||||
"tag": "Cloud browser with stealth and proxies",
|
||||
"env_vars": [
|
||||
{"key": "BROWSERBASE_API_KEY",
|
||||
"prompt": "Browserbase API key",
|
||||
"url": "https://browserbase.com"},
|
||||
],
|
||||
"post_setup": "agent_browser",
|
||||
}
|
||||
|
||||
Default: minimal entry derived from :attr:`display_name`. Override to
|
||||
expose API key prompts, badges, managed-Nous gating, and the
|
||||
``post_setup`` install hook.
|
||||
"""
|
||||
return {
|
||||
"name": self.display_name,
|
||||
"badge": "",
|
||||
"tag": "",
|
||||
"env_vars": [],
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Backward-compat shims for the legacy CloudBrowserProvider API
|
||||
# ------------------------------------------------------------------
|
||||
#
|
||||
# The pre-PR-#25214 ABC exposed ``is_configured()`` and ``provider_name()``;
|
||||
# ``tools.browser_tool`` has ~6 callers that still use those names. Rather
|
||||
# than churn every callsite (and break out-of-tree downstream code that
|
||||
# subclassed CloudBrowserProvider), we expose the old names as thin
|
||||
# delegations to the new API. Subclasses MUST implement :meth:`is_available`
|
||||
# and :attr:`name`; they may override ``is_configured`` / ``provider_name``
|
||||
# for compatibility with the legacy ABC but it is not required.
|
||||
|
||||
def is_configured(self) -> bool:
|
||||
"""Backward-compat alias for :meth:`is_available`."""
|
||||
return self.is_available()
|
||||
|
||||
def provider_name(self) -> str:
|
||||
"""Backward-compat alias returning :attr:`display_name`."""
|
||||
return self.display_name
|
||||
223
agent/browser_registry.py
Normal file
223
agent/browser_registry.py
Normal file
|
|
@ -0,0 +1,223 @@
|
|||
"""
|
||||
Browser Provider Registry
|
||||
=========================
|
||||
|
||||
Central map of registered cloud browser providers. Populated by plugins at
|
||||
import-time via :meth:`PluginContext.register_browser_provider`; consumed by
|
||||
:func:`tools.browser_tool._get_cloud_provider` to route each cloud-mode
|
||||
``browser_*`` tool call to the active backend.
|
||||
|
||||
Active selection
|
||||
----------------
|
||||
The active provider is chosen by configuration with this precedence:
|
||||
|
||||
1. ``browser.cloud_provider`` in ``config.yaml`` (explicit override).
|
||||
2. Legacy preference order — ``browser-use`` → ``browserbase`` — filtered by
|
||||
availability. Matches the historic auto-detect order in
|
||||
:func:`tools.browser_tool._get_cloud_provider` (Browser Use checked first
|
||||
because it covers both the managed Nous gateway and direct API key path;
|
||||
Browserbase as the older direct-credentials fallback). ``firecrawl`` is
|
||||
intentionally NOT in the legacy walk — users only get Firecrawl as a
|
||||
cloud browser when they explicitly set ``browser.cloud_provider:
|
||||
firecrawl``, matching pre-migration behaviour where Firecrawl was never
|
||||
auto-selected.
|
||||
3. Otherwise ``None`` — the dispatcher falls back to local browser mode.
|
||||
|
||||
The explicit-config branch (rule 1) intentionally ignores ``is_available()``
|
||||
so the dispatcher surfaces a typed "X_API_KEY is not set" error to the user
|
||||
instead of silently switching backends. Matches the legacy
|
||||
:func:`tools.browser_tool._get_cloud_provider` behaviour for configured names.
|
||||
|
||||
Note: there is no "capability" split here (unlike the web subsystem, which
|
||||
has search/extract/crawl). Every browser provider implements the full
|
||||
:class:`agent.browser_provider.BrowserProvider` lifecycle; the registry's
|
||||
job is purely selection, not capability routing.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from agent.browser_provider import BrowserProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_providers: Dict[str, BrowserProvider] = {}
|
||||
_lock = threading.Lock()
|
||||
|
||||
|
||||
def register_provider(provider: BrowserProvider) -> None:
|
||||
"""Register a cloud browser provider.
|
||||
|
||||
Re-registration (same ``name``) overwrites the previous entry and logs
|
||||
a debug message — makes hot-reload scenarios (tests, dev loops) behave
|
||||
predictably.
|
||||
"""
|
||||
if not isinstance(provider, BrowserProvider):
|
||||
raise TypeError(
|
||||
f"register_provider() expects a BrowserProvider instance, "
|
||||
f"got {type(provider).__name__}"
|
||||
)
|
||||
name = provider.name
|
||||
if not isinstance(name, str) or not name.strip():
|
||||
raise ValueError("Browser provider .name must be a non-empty string")
|
||||
with _lock:
|
||||
existing = _providers.get(name)
|
||||
_providers[name] = provider
|
||||
if existing is not None:
|
||||
logger.debug(
|
||||
"Browser provider '%s' re-registered (was %r)",
|
||||
name, type(existing).__name__,
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
"Registered browser provider '%s' (%s)",
|
||||
name, type(provider).__name__,
|
||||
)
|
||||
|
||||
|
||||
def list_providers() -> List[BrowserProvider]:
|
||||
"""Return all registered providers, sorted by name."""
|
||||
with _lock:
|
||||
items = list(_providers.values())
|
||||
return sorted(items, key=lambda p: p.name)
|
||||
|
||||
|
||||
def get_provider(name: str) -> Optional[BrowserProvider]:
|
||||
"""Return the provider registered under *name*, or None."""
|
||||
if not isinstance(name, str):
|
||||
return None
|
||||
with _lock:
|
||||
return _providers.get(name.strip())
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Active-provider resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
# Legacy auto-detect order — used when no ``browser.cloud_provider`` is set.
|
||||
# Matches the pre-migration walk in :func:`tools.browser_tool._get_cloud_provider`.
|
||||
# Firecrawl is intentionally absent so users with ``FIRECRAWL_API_KEY`` set
|
||||
# for web-extract don't get silently routed to a paid cloud browser. See
|
||||
# :func:`_resolve` for the full rationale.
|
||||
_LEGACY_PREFERENCE = (
|
||||
"browser-use",
|
||||
"browserbase",
|
||||
)
|
||||
|
||||
|
||||
def _resolve(configured: Optional[str]) -> Optional[BrowserProvider]:
|
||||
"""Resolve the active browser provider.
|
||||
|
||||
Resolution rules (in order):
|
||||
|
||||
1. **Explicit "local".** Returns None — the dispatcher disables cloud
|
||||
mode entirely. Mirrors legacy short-circuit in
|
||||
:func:`tools.browser_tool._get_cloud_provider`.
|
||||
2. **Explicit config wins, ignoring availability.** If ``configured``
|
||||
names a registered provider, return it even if its
|
||||
:meth:`is_available` returns False — the dispatcher will surface a
|
||||
precise "X_API_KEY is not set" error instead of silently routing
|
||||
somewhere else.
|
||||
3. **Legacy preference walk, filtered by availability.** Walk
|
||||
:data:`_LEGACY_PREFERENCE` (``browser-use`` → ``browserbase``) looking
|
||||
for a provider whose ``is_available()`` is True.
|
||||
|
||||
There is intentionally NO "single-eligible shortcut" rule here (unlike
|
||||
:func:`agent.web_search_registry._resolve`). Pre-migration, the
|
||||
auto-detect branch in ``tools.browser_tool._get_cloud_provider`` only
|
||||
considered Browser Use and Browserbase; Firecrawl was reachable only
|
||||
via an explicit ``browser.cloud_provider: firecrawl`` config key.
|
||||
Preserving that gate matters because Firecrawl shares its API key with
|
||||
the *web* extract plugin (``plugins/web/firecrawl/``), so users who set
|
||||
``FIRECRAWL_API_KEY`` for web extract must NOT get silently routed to a
|
||||
paid cloud browser on a fresh install. Third-party browser-provider
|
||||
plugins added under ``~/.hermes/plugins/browser/<vendor>/`` are subject
|
||||
to the same gate — they must be explicitly configured to take effect.
|
||||
|
||||
Returns None when no provider is configured AND no available provider
|
||||
matches the legacy preference; the dispatcher then falls back to local
|
||||
browser mode.
|
||||
"""
|
||||
with _lock:
|
||||
snapshot = dict(_providers)
|
||||
|
||||
def _is_available_safe(p: BrowserProvider) -> bool:
|
||||
"""Wrap ``is_available()`` so a buggy provider doesn't kill resolution."""
|
||||
try:
|
||||
return bool(p.is_available())
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.warning(
|
||||
"Browser provider %s.is_available() raised %s — treating as unavailable",
|
||||
p.name, exc, exc_info=True,
|
||||
)
|
||||
return False
|
||||
|
||||
# 1. Explicit "local" short-circuit.
|
||||
if configured == "local":
|
||||
return None
|
||||
|
||||
# 2. Explicit config wins — return regardless of is_available() so the
|
||||
# user gets a precise downstream error message rather than a silent
|
||||
# backend switch. Matches _get_cloud_provider() in browser_tool.py.
|
||||
if configured:
|
||||
provider = snapshot.get(configured)
|
||||
if provider is not None:
|
||||
return provider
|
||||
logger.debug(
|
||||
"browser cloud_provider '%s' configured but not registered; "
|
||||
"falling back to auto-detect",
|
||||
configured,
|
||||
)
|
||||
|
||||
# 3. Legacy preference walk — only providers in _LEGACY_PREFERENCE are
|
||||
# auto-eligible. Filtered by availability so we don't surface a
|
||||
# provider the user has no credentials for. See docstring for why
|
||||
# we do NOT fall back to "any single-eligible registered provider".
|
||||
for legacy in _LEGACY_PREFERENCE:
|
||||
provider = snapshot.get(legacy)
|
||||
if provider is not None and _is_available_safe(provider):
|
||||
return provider
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_active_browser_provider() -> Optional[BrowserProvider]:
|
||||
"""Resolve the currently-active cloud browser provider.
|
||||
|
||||
Reads ``browser.cloud_provider`` from config.yaml; falls back per the
|
||||
module docstring. Returns None for local mode or when no provider is
|
||||
available.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import read_raw_config
|
||||
|
||||
cfg = read_raw_config()
|
||||
browser_cfg = cfg.get("browser", {})
|
||||
except Exception as exc:
|
||||
logger.debug("Could not read browser config: %s", exc)
|
||||
browser_cfg = {}
|
||||
|
||||
configured: Optional[str] = None
|
||||
if isinstance(browser_cfg, dict) and "cloud_provider" in browser_cfg:
|
||||
try:
|
||||
from tools.tool_backend_helpers import normalize_browser_cloud_provider
|
||||
|
||||
configured = normalize_browser_cloud_provider(
|
||||
browser_cfg.get("cloud_provider")
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("normalize_browser_cloud_provider failed: %s", exc)
|
||||
configured = None
|
||||
|
||||
return _resolve(configured)
|
||||
|
||||
|
||||
def _reset_for_tests() -> None:
|
||||
"""Clear the registry. **Test-only.**"""
|
||||
with _lock:
|
||||
_providers.clear()
|
||||
2066
agent/chat_completion_helpers.py
Normal file
2066
agent/chat_completion_helpers.py
Normal file
File diff suppressed because it is too large
Load diff
448
agent/codex_runtime.py
Normal file
448
agent/codex_runtime.py
Normal file
|
|
@ -0,0 +1,448 @@
|
|||
"""Codex API runtime — App Server and Responses-API streaming paths.
|
||||
|
||||
Extracted from :class:`AIAgent` to keep the agent loop file focused.
|
||||
Each function takes the parent ``AIAgent`` as its first argument
|
||||
(``agent``). AIAgent keeps thin forwarder methods for backward
|
||||
compatibility.
|
||||
|
||||
* ``run_codex_app_server_turn`` — drives one turn through the
|
||||
``codex_app_server`` subprocess client (used when a Codex CLI install
|
||||
is the active provider).
|
||||
* ``run_codex_stream`` — streams a Codex Responses API call (the
|
||||
``codex_responses`` api_mode).
|
||||
* ``run_codex_create_stream_fallback`` — recovery path when the
|
||||
Responses ``stream=True`` initial create fails.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, List
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def run_codex_app_server_turn(
|
||||
agent,
|
||||
*,
|
||||
user_message: str,
|
||||
original_user_message: Any,
|
||||
messages: List[Dict[str, Any]],
|
||||
effective_task_id: str,
|
||||
should_review_memory: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""Codex app-server runtime path. Hands the entire turn to a `codex
|
||||
app-server` subprocess and projects its events back into Hermes'
|
||||
messages list so memory/skill review keep working.
|
||||
|
||||
Called from run_conversation() when agent.api_mode == "codex_app_server".
|
||||
Returns the same dict shape as the chat_completions path.
|
||||
"""
|
||||
from agent.transports.codex_app_server_session import CodexAppServerSession
|
||||
|
||||
# Lazy session: one CodexAppServerSession per AIAgent instance.
|
||||
# Spawned on first turn, reused across turns, closed at AIAgent
|
||||
# shutdown (see _cleanup hook).
|
||||
if not hasattr(agent, "_codex_session") or agent._codex_session is None:
|
||||
cwd = getattr(agent, "session_cwd", None) or os.getcwd()
|
||||
# Approval callback: defer to Hermes' standard prompt flow if a
|
||||
# CLI thread has installed one. Gateway / cron contexts get the
|
||||
# codex-side fail-closed default.
|
||||
try:
|
||||
from tools.terminal_tool import _get_approval_callback
|
||||
approval_callback = _get_approval_callback()
|
||||
except Exception:
|
||||
approval_callback = None
|
||||
agent._codex_session = CodexAppServerSession(
|
||||
cwd=cwd,
|
||||
approval_callback=approval_callback,
|
||||
)
|
||||
|
||||
# NOTE: the user message is ALREADY appended to messages by the
|
||||
# standard run_conversation() flow (line ~11823) before the early
|
||||
# return reaches us. Do NOT append again — that would duplicate.
|
||||
|
||||
try:
|
||||
turn = agent._codex_session.run_turn(user_input=user_message)
|
||||
except Exception as exc:
|
||||
logger.exception("codex app-server turn failed")
|
||||
# Crash → unconditionally drop the session so the next turn
|
||||
# respawns from scratch instead of reusing a dead client.
|
||||
try:
|
||||
agent._codex_session.close()
|
||||
except Exception:
|
||||
pass
|
||||
agent._codex_session = None
|
||||
return {
|
||||
"final_response": (
|
||||
f"Codex app-server turn failed: {exc}. "
|
||||
f"Fall back to default runtime with `/codex-runtime auto`."
|
||||
),
|
||||
"messages": messages,
|
||||
"api_calls": 0,
|
||||
"completed": False,
|
||||
"partial": True,
|
||||
"error": str(exc),
|
||||
}
|
||||
|
||||
# If the turn signalled the underlying client is wedged (deadline
|
||||
# blown, post-tool watchdog tripped, OAuth refresh died, subprocess
|
||||
# exited), retire the session so the next turn respawns codex
|
||||
# rather than riding the broken process. Mirrors openclaw beta.8's
|
||||
# "retire timed-out app-server clients" fix.
|
||||
if getattr(turn, "should_retire", False):
|
||||
logger.warning(
|
||||
"codex app-server session retired (turn error: %s)",
|
||||
turn.error,
|
||||
)
|
||||
try:
|
||||
agent._codex_session.close()
|
||||
except Exception:
|
||||
pass
|
||||
agent._codex_session = None
|
||||
|
||||
# Splice projected messages into the conversation. The projector emits
|
||||
# standard {role, content, tool_calls, tool_call_id} entries, which
|
||||
# is exactly what curator.py / sessions DB expect.
|
||||
if turn.projected_messages:
|
||||
messages.extend(turn.projected_messages)
|
||||
|
||||
# Counter ticks for the agent-improvement loop.
|
||||
# _turns_since_memory and _user_turn_count are ALREADY incremented
|
||||
# in the run_conversation() pre-loop block (lines ~11793-11817) so we
|
||||
# do NOT touch them here — that would double-count.
|
||||
# Only _iters_since_skill needs explicit increment, since the
|
||||
# chat_completions loop bumps it per tool iteration (line ~12110)
|
||||
# and that loop is bypassed on this path.
|
||||
agent._iters_since_skill = (
|
||||
getattr(agent, "_iters_since_skill", 0) + turn.tool_iterations
|
||||
)
|
||||
|
||||
# Now check the skill nudge AFTER iters were incremented — same
|
||||
# pattern the chat_completions path uses (line ~15432).
|
||||
should_review_skills = False
|
||||
if (
|
||||
agent._skill_nudge_interval > 0
|
||||
and agent._iters_since_skill >= agent._skill_nudge_interval
|
||||
and "skill_manage" in agent.valid_tool_names
|
||||
):
|
||||
should_review_skills = True
|
||||
agent._iters_since_skill = 0
|
||||
|
||||
# External memory provider sync (mirrors line ~15439). Skipped on
|
||||
# interrupt/error to avoid feeding partial transcripts to memory.
|
||||
if not turn.interrupted and turn.error is None:
|
||||
try:
|
||||
agent._sync_external_memory_for_turn(
|
||||
original_user_message=original_user_message,
|
||||
final_response=turn.final_text,
|
||||
interrupted=False,
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("external memory sync raised", exc_info=True)
|
||||
|
||||
# Background review fork — same cadence + signature as the default
|
||||
# path (line ~15449). Only fires when a trigger actually tripped AND
|
||||
# we have a real final response.
|
||||
if (
|
||||
turn.final_text
|
||||
and not turn.interrupted
|
||||
and (should_review_memory or should_review_skills)
|
||||
):
|
||||
try:
|
||||
agent._spawn_background_review(
|
||||
messages_snapshot=list(messages),
|
||||
review_memory=should_review_memory,
|
||||
review_skills=should_review_skills,
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("background review spawn raised", exc_info=True)
|
||||
|
||||
return {
|
||||
"final_response": turn.final_text,
|
||||
"messages": messages,
|
||||
"api_calls": 1, # one app-server "turn" maps to one logical API call
|
||||
"completed": not turn.interrupted and turn.error is None,
|
||||
"partial": turn.interrupted or turn.error is not None,
|
||||
"error": turn.error,
|
||||
"codex_thread_id": turn.thread_id,
|
||||
"codex_turn_id": turn.turn_id,
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
def run_codex_stream(agent, api_kwargs: dict, client: Any = None, on_first_delta: callable = None):
|
||||
"""Execute one streaming Responses API request and return the final response."""
|
||||
import httpx as _httpx
|
||||
|
||||
active_client = client or agent._ensure_primary_openai_client(reason="codex_stream_direct")
|
||||
max_stream_retries = 1
|
||||
has_tool_calls = False
|
||||
first_delta_fired = False
|
||||
# Accumulate streamed text so we can recover if get_final_response()
|
||||
# returns empty output (e.g. chatgpt.com backend-api sends
|
||||
# response.incomplete instead of response.completed).
|
||||
agent._codex_streamed_text_parts: list = []
|
||||
for attempt in range(max_stream_retries + 1):
|
||||
if agent._interrupt_requested:
|
||||
raise InterruptedError("Agent interrupted before Codex stream retry")
|
||||
collected_output_items: list = []
|
||||
try:
|
||||
with active_client.responses.stream(**api_kwargs) as stream:
|
||||
for event in stream:
|
||||
agent._touch_activity("receiving stream response")
|
||||
if agent._interrupt_requested:
|
||||
break
|
||||
event_type = getattr(event, "type", "")
|
||||
# Fire callbacks on text content deltas (suppress during tool calls)
|
||||
if "output_text.delta" in event_type or event_type == "response.output_text.delta":
|
||||
delta_text = getattr(event, "delta", "")
|
||||
if delta_text:
|
||||
agent._codex_streamed_text_parts.append(delta_text)
|
||||
if delta_text and not has_tool_calls:
|
||||
if not first_delta_fired:
|
||||
first_delta_fired = True
|
||||
if on_first_delta:
|
||||
try:
|
||||
on_first_delta()
|
||||
except Exception:
|
||||
pass
|
||||
agent._fire_stream_delta(delta_text)
|
||||
# Track tool calls to suppress text streaming
|
||||
elif "function_call" in event_type:
|
||||
has_tool_calls = True
|
||||
# Fire reasoning callbacks
|
||||
elif "reasoning" in event_type and "delta" in event_type:
|
||||
reasoning_text = getattr(event, "delta", "")
|
||||
if reasoning_text:
|
||||
agent._fire_reasoning_delta(reasoning_text)
|
||||
# Collect completed output items — some backends
|
||||
# (chatgpt.com/backend-api/codex) stream valid items
|
||||
# via response.output_item.done but the SDK's
|
||||
# get_final_response() returns an empty output list.
|
||||
elif event_type == "response.output_item.done":
|
||||
done_item = getattr(event, "item", None)
|
||||
if done_item is not None:
|
||||
collected_output_items.append(done_item)
|
||||
# Log non-completed terminal events for diagnostics
|
||||
elif event_type in {"response.incomplete", "response.failed"}:
|
||||
resp_obj = getattr(event, "response", None)
|
||||
status = getattr(resp_obj, "status", None) if resp_obj else None
|
||||
incomplete_details = getattr(resp_obj, "incomplete_details", None) if resp_obj else None
|
||||
logger.warning(
|
||||
"Codex Responses stream received terminal event %s "
|
||||
"(status=%s, incomplete_details=%s, streamed_chars=%d). %s",
|
||||
event_type, status, incomplete_details,
|
||||
sum(len(p) for p in agent._codex_streamed_text_parts),
|
||||
agent._client_log_context(),
|
||||
)
|
||||
final_response = stream.get_final_response()
|
||||
# PATCH: ChatGPT Codex backend streams valid output items
|
||||
# but get_final_response() can return an empty output list.
|
||||
# Backfill from collected items or synthesize from deltas.
|
||||
_out = getattr(final_response, "output", None)
|
||||
if isinstance(_out, list) and not _out:
|
||||
if collected_output_items:
|
||||
final_response.output = list(collected_output_items)
|
||||
logger.debug(
|
||||
"Codex stream: backfilled %d output items from stream events",
|
||||
len(collected_output_items),
|
||||
)
|
||||
elif agent._codex_streamed_text_parts and not has_tool_calls:
|
||||
assembled = "".join(agent._codex_streamed_text_parts)
|
||||
final_response.output = [SimpleNamespace(
|
||||
type="message",
|
||||
role="assistant",
|
||||
status="completed",
|
||||
content=[SimpleNamespace(type="output_text", text=assembled)],
|
||||
)]
|
||||
logger.debug(
|
||||
"Codex stream: synthesized output from %d text deltas (%d chars)",
|
||||
len(agent._codex_streamed_text_parts), len(assembled),
|
||||
)
|
||||
return final_response
|
||||
except (_httpx.RemoteProtocolError, _httpx.ReadTimeout, _httpx.ConnectError, ConnectionError) as exc:
|
||||
if attempt < max_stream_retries:
|
||||
logger.debug(
|
||||
"Codex Responses stream transport failed (attempt %s/%s); retrying. %s error=%s",
|
||||
attempt + 1,
|
||||
max_stream_retries + 1,
|
||||
agent._client_log_context(),
|
||||
exc,
|
||||
)
|
||||
continue
|
||||
logger.debug(
|
||||
"Codex Responses stream transport failed; falling back to create(stream=True). %s error=%s",
|
||||
agent._client_log_context(),
|
||||
exc,
|
||||
)
|
||||
return agent._run_codex_create_stream_fallback(api_kwargs, client=active_client)
|
||||
except RuntimeError as exc:
|
||||
err_text = str(exc)
|
||||
missing_completed = "response.completed" in err_text
|
||||
# The OpenAI SDK's Responses streaming state machine raises
|
||||
# ``RuntimeError("Expected to have received `response.created`
|
||||
# before `<event-type>`")`` when the first SSE event from the
|
||||
# server is anything other than ``response.created`` — and it
|
||||
# discards the event's payload before we can read it. Three
|
||||
# real-world backends emit a different first frame:
|
||||
#
|
||||
# * xAI on grok-4.x OAuth — sends ``error`` (issues
|
||||
# reported around the May 2026 SuperGrok rollout when
|
||||
# multi-turn conversations replay encrypted reasoning
|
||||
# content the OAuth tier rejects)
|
||||
# * codex-lb relays — send ``codex.rate_limits`` (#14634)
|
||||
# * custom Responses relays — send ``response.in_progress``
|
||||
# (#8133)
|
||||
#
|
||||
# In all three cases the underlying byte stream is still
|
||||
# readable: a non-stream ``responses.create(stream=True)``
|
||||
# fallback succeeds and surfaces the real provider error as
|
||||
# a normal exception with body+status_code attached, which
|
||||
# ``_summarize_api_error`` can then translate into a useful
|
||||
# user-facing line. Treat ``response.created`` prelude
|
||||
# errors the same way we already treat ``response.completed``
|
||||
# postlude errors.
|
||||
prelude_error = (
|
||||
"Expected to have received `response.created`" in err_text
|
||||
or "Expected to have received \"response.created\"" in err_text
|
||||
)
|
||||
if (missing_completed or prelude_error) and attempt < max_stream_retries:
|
||||
logger.debug(
|
||||
"Responses stream %s (attempt %s/%s); retrying. %s",
|
||||
"prelude rejected" if prelude_error else "closed before completion",
|
||||
attempt + 1,
|
||||
max_stream_retries + 1,
|
||||
agent._client_log_context(),
|
||||
)
|
||||
continue
|
||||
if missing_completed or prelude_error:
|
||||
logger.debug(
|
||||
"Responses stream %s; falling back to create(stream=True). %s err=%s",
|
||||
"rejected before response.created" if prelude_error else "did not emit response.completed",
|
||||
agent._client_log_context(),
|
||||
err_text,
|
||||
)
|
||||
return agent._run_codex_create_stream_fallback(api_kwargs, client=active_client)
|
||||
raise
|
||||
|
||||
|
||||
|
||||
def run_codex_create_stream_fallback(agent, api_kwargs: dict, client: Any = None):
|
||||
"""Fallback path for stream completion edge cases on Codex-style Responses backends."""
|
||||
active_client = client or agent._ensure_primary_openai_client(reason="codex_create_stream_fallback")
|
||||
fallback_kwargs = dict(api_kwargs)
|
||||
fallback_kwargs["stream"] = True
|
||||
fallback_kwargs = agent._get_transport().preflight_kwargs(fallback_kwargs, allow_stream=True)
|
||||
stream_or_response = active_client.responses.create(**fallback_kwargs)
|
||||
|
||||
# Compatibility shim for mocks or providers that still return a concrete response.
|
||||
if hasattr(stream_or_response, "output"):
|
||||
return stream_or_response
|
||||
if not hasattr(stream_or_response, "__iter__"):
|
||||
return stream_or_response
|
||||
|
||||
terminal_response = None
|
||||
collected_output_items: list = []
|
||||
collected_text_deltas: list = []
|
||||
try:
|
||||
for event in stream_or_response:
|
||||
agent._touch_activity("receiving stream response")
|
||||
event_type = getattr(event, "type", None)
|
||||
if not event_type and isinstance(event, dict):
|
||||
event_type = event.get("type")
|
||||
|
||||
# ``error`` SSE frames carry the provider's real failure
|
||||
# reason (subscription / quota / model-not-available /
|
||||
# rejected-reasoning-replay) but never appear in the
|
||||
# ``{completed, incomplete, failed}`` terminal set, so the
|
||||
# raw loop below would silently consume them and end with
|
||||
# "did not emit a terminal response". xAI in particular
|
||||
# emits ``type=error`` as the FIRST frame for OAuth
|
||||
# accounts whose Grok subscription is missing/exhausted —
|
||||
# the SDK's stream helper raises ``RuntimeError(Expected
|
||||
# to have received response.created before error)`` which
|
||||
# the caller catches and routes here, expecting this
|
||||
# fallback to surface the message. Synthesize an
|
||||
# APIError-shaped exception so ``_summarize_api_error``
|
||||
# and the credential-pool entitlement detector see the
|
||||
# real text instead of a generic RuntimeError.
|
||||
if event_type == "error":
|
||||
err_message = getattr(event, "message", None)
|
||||
if not err_message and isinstance(event, dict):
|
||||
err_message = event.get("message")
|
||||
err_code = getattr(event, "code", None)
|
||||
if not err_code and isinstance(event, dict):
|
||||
err_code = event.get("code")
|
||||
err_param = getattr(event, "param", None)
|
||||
if not err_param and isinstance(event, dict):
|
||||
err_param = event.get("param")
|
||||
err_message = (err_message or "stream emitted error event").strip()
|
||||
from run_agent import _StreamErrorEvent
|
||||
raise _StreamErrorEvent(err_message, code=err_code, param=err_param)
|
||||
|
||||
# Collect output items and text deltas for backfill
|
||||
if event_type == "response.output_item.done":
|
||||
done_item = getattr(event, "item", None)
|
||||
if done_item is None and isinstance(event, dict):
|
||||
done_item = event.get("item")
|
||||
if done_item is not None:
|
||||
collected_output_items.append(done_item)
|
||||
elif event_type in {"response.output_text.delta",}:
|
||||
delta = getattr(event, "delta", "")
|
||||
if not delta and isinstance(event, dict):
|
||||
delta = event.get("delta", "")
|
||||
if delta:
|
||||
collected_text_deltas.append(delta)
|
||||
|
||||
if event_type not in {"response.completed", "response.incomplete", "response.failed"}:
|
||||
continue
|
||||
|
||||
terminal_response = getattr(event, "response", None)
|
||||
if terminal_response is None and isinstance(event, dict):
|
||||
terminal_response = event.get("response")
|
||||
if terminal_response is not None:
|
||||
# Backfill empty output from collected stream events
|
||||
_out = getattr(terminal_response, "output", None)
|
||||
if isinstance(_out, list) and not _out:
|
||||
if collected_output_items:
|
||||
terminal_response.output = list(collected_output_items)
|
||||
logger.debug(
|
||||
"Codex fallback stream: backfilled %d output items",
|
||||
len(collected_output_items),
|
||||
)
|
||||
elif collected_text_deltas:
|
||||
assembled = "".join(collected_text_deltas)
|
||||
terminal_response.output = [SimpleNamespace(
|
||||
type="message", role="assistant",
|
||||
status="completed",
|
||||
content=[SimpleNamespace(type="output_text", text=assembled)],
|
||||
)]
|
||||
logger.debug(
|
||||
"Codex fallback stream: synthesized from %d deltas (%d chars)",
|
||||
len(collected_text_deltas), len(assembled),
|
||||
)
|
||||
return terminal_response
|
||||
finally:
|
||||
close_fn = getattr(stream_or_response, "close", None)
|
||||
if callable(close_fn):
|
||||
try:
|
||||
close_fn()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if terminal_response is not None:
|
||||
return terminal_response
|
||||
raise RuntimeError("Responses create(stream=True) fallback did not emit a terminal response.")
|
||||
|
||||
|
||||
|
||||
__all__ = [
|
||||
"run_codex_app_server_turn",
|
||||
"run_codex_stream",
|
||||
"run_codex_create_stream_fallback",
|
||||
]
|
||||
556
agent/conversation_compression.py
Normal file
556
agent/conversation_compression.py
Normal file
|
|
@ -0,0 +1,556 @@
|
|||
"""Context compression — extract the AIAgent methods that drive summarisation.
|
||||
|
||||
Three concerns live here:
|
||||
|
||||
* :func:`check_compression_model_feasibility` — startup probe of the
|
||||
configured auxiliary compression model. Warns when the aux context
|
||||
window can't fit the main model's compression threshold; auto-lowers
|
||||
the session threshold when possible; hard-rejects auxes below
|
||||
``MINIMUM_CONTEXT_LENGTH``.
|
||||
|
||||
* :func:`replay_compression_warning` — re-emit a stored warning through
|
||||
the gateway ``status_callback`` once it's wired up (the callback is
|
||||
set after :class:`AIAgent` construction).
|
||||
|
||||
* :func:`compress_context` — the actual compression call. Runs the
|
||||
configured compressor, splits the SQLite session, rotates the
|
||||
session_id, notifies plugin context engines / memory providers, and
|
||||
returns the compressed message list and freshly-built system prompt.
|
||||
|
||||
* :func:`try_shrink_image_parts_in_messages` — image-too-large recovery
|
||||
helper that re-encodes ``data:image/...;base64,...`` parts at a smaller
|
||||
size so retries can fit under provider ceilings (Anthropic's 5 MB).
|
||||
|
||||
``run_agent`` keeps thin wrappers for each so existing call sites
|
||||
(``self._compress_context(...)``) keep working. Tests that exercise
|
||||
these paths see no behavioural change.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, List, Optional, Tuple
|
||||
|
||||
from agent.model_metadata import estimate_request_tokens_rough
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def check_compression_model_feasibility(agent: Any) -> None:
|
||||
"""Warn at session start if the auxiliary compression model's context
|
||||
window is smaller than the main model's compression threshold.
|
||||
|
||||
When the auxiliary model cannot fit the content that needs summarising,
|
||||
compression will either fail outright (the LLM call errors) or produce
|
||||
a severely truncated summary.
|
||||
|
||||
Called during ``AIAgent.__init__`` so CLI users see the warning
|
||||
immediately (via ``_vprint``). The gateway sets ``status_callback``
|
||||
*after* construction, so :func:`replay_compression_warning` re-sends
|
||||
the stored warning through the callback on the first
|
||||
``run_conversation()`` call.
|
||||
"""
|
||||
if not agent.compression_enabled:
|
||||
return
|
||||
try:
|
||||
from agent.auxiliary_client import (
|
||||
_resolve_task_provider_model,
|
||||
get_text_auxiliary_client,
|
||||
)
|
||||
from agent.model_metadata import (
|
||||
MINIMUM_CONTEXT_LENGTH,
|
||||
get_model_context_length,
|
||||
)
|
||||
|
||||
client, aux_model = get_text_auxiliary_client(
|
||||
"compression",
|
||||
main_runtime=agent._current_main_runtime(),
|
||||
)
|
||||
# Best-effort aux provider label for the warning message. The
|
||||
# configured provider may be "auto", in which case we fall back
|
||||
# to the client's base_url hostname so the user can still tell
|
||||
# where the compression model is actually being called.
|
||||
try:
|
||||
_aux_cfg_provider, _, _, _, _ = _resolve_task_provider_model("compression")
|
||||
except Exception:
|
||||
_aux_cfg_provider = ""
|
||||
if client is None or not aux_model:
|
||||
if _aux_cfg_provider and _aux_cfg_provider != "auto":
|
||||
msg = (
|
||||
"⚠ Configured auxiliary compression provider "
|
||||
f"'{_aux_cfg_provider}' is unavailable — context "
|
||||
"compression will drop middle turns without a summary. "
|
||||
"Check auxiliary.compression in config.yaml and "
|
||||
"reauthenticate that provider."
|
||||
)
|
||||
else:
|
||||
msg = (
|
||||
"⚠ No auxiliary LLM provider configured — context "
|
||||
"compression will drop middle turns without a summary. "
|
||||
"Run `hermes setup` or set OPENROUTER_API_KEY."
|
||||
)
|
||||
agent._compression_warning = msg
|
||||
agent._emit_status(msg)
|
||||
logger.warning(
|
||||
"No auxiliary LLM provider for compression — "
|
||||
"summaries will be unavailable."
|
||||
)
|
||||
return
|
||||
|
||||
aux_base_url = str(getattr(client, "base_url", ""))
|
||||
aux_api_key = str(getattr(client, "api_key", ""))
|
||||
|
||||
aux_context = get_model_context_length(
|
||||
aux_model,
|
||||
base_url=aux_base_url,
|
||||
api_key=aux_api_key,
|
||||
config_context_length=getattr(agent, "_aux_compression_context_length_config", None),
|
||||
# Each model must be resolved with its own provider so that
|
||||
# provider-specific paths (e.g. Bedrock static table, OpenRouter API)
|
||||
# are invoked for the correct client, not inherited from the main model.
|
||||
provider=(_aux_cfg_provider if _aux_cfg_provider and _aux_cfg_provider != "auto" else getattr(agent, "provider", "")),
|
||||
custom_providers=agent._custom_providers,
|
||||
)
|
||||
|
||||
# Hard floor: the auxiliary compression model must have at least
|
||||
# MINIMUM_CONTEXT_LENGTH (64K) tokens of context. The main model
|
||||
# is already required to meet this floor (checked earlier in
|
||||
# __init__), so the compression model must too — otherwise it
|
||||
# cannot summarise a full threshold-sized window of main-model
|
||||
# content. Mirrors the main-model rejection pattern.
|
||||
if aux_context and aux_context < MINIMUM_CONTEXT_LENGTH:
|
||||
raise ValueError(
|
||||
f"Auxiliary compression model {aux_model} has a context "
|
||||
f"window of {aux_context:,} tokens, which is below the "
|
||||
f"minimum {MINIMUM_CONTEXT_LENGTH:,} required by Hermes "
|
||||
f"Agent. Choose a compression model with at least "
|
||||
f"{MINIMUM_CONTEXT_LENGTH // 1000}K context (set "
|
||||
f"auxiliary.compression.model in config.yaml), or set "
|
||||
f"auxiliary.compression.context_length to override the "
|
||||
f"detected value if it is wrong."
|
||||
)
|
||||
|
||||
threshold = agent.context_compressor.threshold_tokens
|
||||
if aux_context < threshold:
|
||||
# Auto-correct: lower the live session threshold so
|
||||
# compression actually works this session. The hard floor
|
||||
# above guarantees aux_context >= MINIMUM_CONTEXT_LENGTH,
|
||||
# so the new threshold is always >= 64K.
|
||||
#
|
||||
# The compression summariser sends a single user-role
|
||||
# prompt (no system prompt, no tools) to the aux model, so
|
||||
# new_threshold == aux_context is safe: the request is
|
||||
# the raw messages plus a small summarisation instruction.
|
||||
old_threshold = threshold
|
||||
new_threshold = aux_context
|
||||
agent.context_compressor.threshold_tokens = new_threshold
|
||||
# Keep threshold_percent in sync so future main-model
|
||||
# context_length changes (update_model) re-derive from a
|
||||
# sensible number rather than the original too-high value.
|
||||
main_ctx = agent.context_compressor.context_length
|
||||
if main_ctx:
|
||||
agent.context_compressor.threshold_percent = (
|
||||
new_threshold / main_ctx
|
||||
)
|
||||
safe_pct = int((aux_context / main_ctx) * 100) if main_ctx else 50
|
||||
# Build human-readable "model (provider)" labels for both
|
||||
# the main model and the compression model so users can
|
||||
# tell at a glance which provider each side is actually
|
||||
# using. When the configured provider is empty or "auto",
|
||||
# fall back to the client's base_url hostname.
|
||||
_main_model = getattr(agent, "model", "") or "?"
|
||||
_main_provider = getattr(agent, "provider", "") or ""
|
||||
_aux_provider_label = (
|
||||
_aux_cfg_provider
|
||||
if _aux_cfg_provider and _aux_cfg_provider != "auto"
|
||||
else ""
|
||||
)
|
||||
if not _aux_provider_label:
|
||||
try:
|
||||
from urllib.parse import urlparse
|
||||
_aux_provider_label = (
|
||||
urlparse(aux_base_url).hostname or aux_base_url
|
||||
)
|
||||
except Exception:
|
||||
_aux_provider_label = aux_base_url or "auto"
|
||||
_main_label = (
|
||||
f"{_main_model} ({_main_provider})"
|
||||
if _main_provider
|
||||
else _main_model
|
||||
)
|
||||
_aux_label = f"{aux_model} ({_aux_provider_label})"
|
||||
msg = (
|
||||
f"⚠ Compression model {_aux_label} context is "
|
||||
f"{aux_context:,} tokens, but the main model "
|
||||
f"{_main_label}'s compression threshold was "
|
||||
f"{old_threshold:,} tokens. "
|
||||
f"Auto-lowered this session's threshold to "
|
||||
f"{new_threshold:,} tokens so compression can run.\n"
|
||||
f" To make this permanent, edit config.yaml — either:\n"
|
||||
f" 1. Use a larger compression model:\n"
|
||||
f" auxiliary:\n"
|
||||
f" compression:\n"
|
||||
f" model: <model-with-{old_threshold:,}+-context>\n"
|
||||
f" 2. Lower the compression threshold:\n"
|
||||
f" compression:\n"
|
||||
f" threshold: 0.{safe_pct:02d}"
|
||||
)
|
||||
agent._compression_warning = msg
|
||||
agent._emit_status(msg)
|
||||
logger.warning(
|
||||
"Auxiliary compression model %s has %d token context, "
|
||||
"below the main model's compression threshold of %d "
|
||||
"tokens — auto-lowered session threshold to %d to "
|
||||
"keep compression working.",
|
||||
aux_model,
|
||||
aux_context,
|
||||
old_threshold,
|
||||
new_threshold,
|
||||
)
|
||||
except ValueError:
|
||||
# Hard rejections (aux below minimum context) must propagate
|
||||
# so the session refuses to start.
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.debug(
|
||||
"Compression feasibility check failed (non-fatal): %s", exc
|
||||
)
|
||||
|
||||
|
||||
def replay_compression_warning(agent: Any) -> None:
|
||||
"""Re-send the compression warning through ``status_callback``.
|
||||
|
||||
During ``__init__`` the gateway's ``status_callback`` is not yet
|
||||
wired, so ``_emit_status`` only reaches ``_vprint`` (CLI). This
|
||||
method is called once at the start of the first
|
||||
``run_conversation()`` — by then the gateway has set the callback,
|
||||
so every platform (Telegram, Discord, Slack, etc.) receives the
|
||||
warning.
|
||||
"""
|
||||
msg = getattr(agent, "_compression_warning", None)
|
||||
if msg and agent.status_callback:
|
||||
try:
|
||||
agent.status_callback("lifecycle", msg)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def compress_context(
|
||||
agent: Any,
|
||||
messages: list,
|
||||
system_message: str,
|
||||
*,
|
||||
approx_tokens: Optional[int] = None,
|
||||
task_id: str = "default",
|
||||
focus_topic: Optional[str] = None,
|
||||
) -> Tuple[list, str]:
|
||||
"""Compress conversation context and split the session in SQLite.
|
||||
|
||||
Args:
|
||||
agent: The owning :class:`AIAgent`.
|
||||
messages: Current message history (will be summarised).
|
||||
system_message: Current system prompt; rebuilt after compression.
|
||||
approx_tokens: Pre-compression token estimate, logged for ops.
|
||||
task_id: Tool task scope (used for clearing file-read dedup state).
|
||||
focus_topic: Optional focus string for guided compression — the
|
||||
summariser will prioritise preserving information related to
|
||||
this topic. Inspired by Claude Code's ``/compact <focus>``.
|
||||
|
||||
Returns:
|
||||
``(compressed_messages, new_system_prompt)`` tuple.
|
||||
"""
|
||||
_pre_msg_count = len(messages)
|
||||
logger.info(
|
||||
"context compression started: session=%s messages=%d tokens=~%s model=%s focus=%r",
|
||||
agent.session_id or "none", _pre_msg_count,
|
||||
f"{approx_tokens:,}" if approx_tokens else "unknown", agent.model,
|
||||
focus_topic,
|
||||
)
|
||||
agent._emit_status(
|
||||
"🗜️ Compacting context — summarizing earlier conversation so I can continue..."
|
||||
)
|
||||
|
||||
# Notify external memory provider before compression discards context
|
||||
if agent._memory_manager:
|
||||
try:
|
||||
agent._memory_manager.on_pre_compress(messages)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
compressed = agent.context_compressor.compress(messages, current_tokens=approx_tokens, focus_topic=focus_topic)
|
||||
except TypeError:
|
||||
# Plugin context engine with strict signature that doesn't accept
|
||||
# focus_topic — fall back to calling without it.
|
||||
compressed = agent.context_compressor.compress(messages, current_tokens=approx_tokens)
|
||||
|
||||
summary_error = getattr(agent.context_compressor, "_last_summary_error", None)
|
||||
if summary_error:
|
||||
if getattr(agent, "_last_compression_summary_warning", None) != summary_error:
|
||||
agent._last_compression_summary_warning = summary_error
|
||||
agent._emit_warning(
|
||||
f"⚠ Compression summary failed: {summary_error}. "
|
||||
"Inserted a fallback context marker."
|
||||
)
|
||||
else:
|
||||
# No hard failure — but did the configured aux model error out
|
||||
# and get recovered by retrying on main? Surface that so users
|
||||
# know their auxiliary.compression.model setting is broken even
|
||||
# though compression succeeded.
|
||||
_aux_fail_model = getattr(agent.context_compressor, "_last_aux_model_failure_model", None)
|
||||
_aux_fail_err = getattr(agent.context_compressor, "_last_aux_model_failure_error", None)
|
||||
if _aux_fail_model:
|
||||
# Dedup on (model, error) so we don't spam on every compaction
|
||||
_aux_key = (_aux_fail_model, _aux_fail_err)
|
||||
if getattr(agent, "_last_aux_fallback_warning_key", None) != _aux_key:
|
||||
agent._last_aux_fallback_warning_key = _aux_key
|
||||
agent._emit_warning(
|
||||
f"ℹ Configured compression model '{_aux_fail_model}' failed "
|
||||
f"({_aux_fail_err or 'unknown error'}). Recovered using main model — "
|
||||
"check auxiliary.compression.model in config.yaml."
|
||||
)
|
||||
|
||||
todo_snapshot = agent._todo_store.format_for_injection()
|
||||
if todo_snapshot:
|
||||
compressed.append({"role": "user", "content": todo_snapshot})
|
||||
|
||||
agent._invalidate_system_prompt()
|
||||
new_system_prompt = agent._build_system_prompt(system_message)
|
||||
agent._cached_system_prompt = new_system_prompt
|
||||
|
||||
if agent._session_db:
|
||||
try:
|
||||
# Propagate title to the new session with auto-numbering
|
||||
old_title = agent._session_db.get_session_title(agent.session_id)
|
||||
# Trigger memory extraction on the old session before it rotates.
|
||||
agent.commit_memory_session(messages)
|
||||
agent._session_db.end_session(agent.session_id, "compression")
|
||||
old_session_id = agent.session_id
|
||||
agent.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
|
||||
os.environ["HERMES_SESSION_ID"] = agent.session_id
|
||||
try:
|
||||
from gateway.session_context import _SESSION_ID
|
||||
_SESSION_ID.set(agent.session_id)
|
||||
except Exception:
|
||||
pass
|
||||
# Update session_log_file to point to the new session's JSON file
|
||||
agent.session_log_file = agent.logs_dir / f"session_{agent.session_id}.json"
|
||||
agent._session_db_created = False
|
||||
agent._session_db.create_session(
|
||||
session_id=agent.session_id,
|
||||
source=agent.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
|
||||
model=agent.model,
|
||||
model_config=agent._session_init_model_config,
|
||||
parent_session_id=old_session_id,
|
||||
)
|
||||
agent._session_db_created = True
|
||||
# Auto-number the title for the continuation session
|
||||
if old_title:
|
||||
try:
|
||||
new_title = agent._session_db.get_next_title_in_lineage(old_title)
|
||||
agent._session_db.set_session_title(agent.session_id, new_title)
|
||||
except (ValueError, Exception) as e:
|
||||
logger.debug("Could not propagate title on compression: %s", e)
|
||||
agent._session_db.update_system_prompt(agent.session_id, new_system_prompt)
|
||||
# Reset flush cursor — new session starts with no messages written
|
||||
agent._last_flushed_db_idx = 0
|
||||
except Exception as e:
|
||||
logger.warning("Session DB compression split failed — new session will NOT be indexed: %s", e)
|
||||
|
||||
# Notify the context engine that the session_id rotated because of
|
||||
# compression (not a fresh /new). Plugin engines (e.g. hermes-lcm) use
|
||||
# boundary_reason="compression" to preserve DAG lineage across the
|
||||
# rollover instead of re-initializing fresh per-session state.
|
||||
# See hermes-lcm#68. Built-in ContextCompressor ignores kwargs.
|
||||
try:
|
||||
_old_sid = locals().get("old_session_id")
|
||||
if _old_sid and hasattr(agent.context_compressor, "on_session_start"):
|
||||
agent.context_compressor.on_session_start(
|
||||
agent.session_id or "",
|
||||
boundary_reason="compression",
|
||||
old_session_id=_old_sid,
|
||||
)
|
||||
except Exception as _ce_err:
|
||||
logger.debug("context engine on_session_start (compression): %s", _ce_err)
|
||||
|
||||
# Notify memory providers of the compression-driven session_id rotation
|
||||
# so provider-cached per-session state (Hindsight's _document_id,
|
||||
# accumulated turn buffers, counters) refreshes. reset=False because
|
||||
# the logical conversation continues; only the id and DB row rolled
|
||||
# over. See #6672.
|
||||
try:
|
||||
_old_sid = locals().get("old_session_id")
|
||||
if _old_sid and agent._memory_manager:
|
||||
agent._memory_manager.on_session_switch(
|
||||
agent.session_id or "",
|
||||
parent_session_id=_old_sid,
|
||||
reset=False,
|
||||
reason="compression",
|
||||
)
|
||||
except Exception as _me_err:
|
||||
logger.debug("memory manager on_session_switch (compression): %s", _me_err)
|
||||
|
||||
# Warn on repeated compressions (quality degrades with each pass)
|
||||
_cc = agent.context_compressor.compression_count
|
||||
if _cc >= 2:
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix}⚠️ Session compressed {_cc} times — "
|
||||
f"accuracy may degrade. Consider /new to start fresh.",
|
||||
force=True,
|
||||
)
|
||||
|
||||
# Update token estimate after compaction so pressure calculations
|
||||
# use the post-compression count, not the stale pre-compression one.
|
||||
# Use estimate_request_tokens_rough() so tool schemas are included —
|
||||
# with 50+ tools enabled, schemas alone can add 20-30K tokens, and
|
||||
# omitting them delays the next compression cycle far past the
|
||||
# configured threshold (issue #14695).
|
||||
_compressed_est = estimate_request_tokens_rough(
|
||||
compressed,
|
||||
system_prompt=new_system_prompt or "",
|
||||
tools=agent.tools or None,
|
||||
)
|
||||
agent.context_compressor.last_prompt_tokens = _compressed_est
|
||||
agent.context_compressor.last_completion_tokens = 0
|
||||
|
||||
# Clear the file-read dedup cache. After compression the original
|
||||
# read content is summarised away — if the model re-reads the same
|
||||
# file it needs the full content, not a "file unchanged" stub.
|
||||
try:
|
||||
from tools.file_tools import reset_file_dedup
|
||||
reset_file_dedup(task_id)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
logger.info(
|
||||
"context compression done: session=%s messages=%d->%d tokens=~%s",
|
||||
agent.session_id or "none", _pre_msg_count, len(compressed),
|
||||
f"{_compressed_est:,}",
|
||||
)
|
||||
return compressed, new_system_prompt
|
||||
|
||||
|
||||
def try_shrink_image_parts_in_messages(api_messages: list) -> bool:
|
||||
"""Re-encode all native image parts at a smaller size to recover from
|
||||
image-too-large errors (Anthropic 5 MB, unknown other providers).
|
||||
|
||||
Mutates ``api_messages`` in place. Returns True if any image part was
|
||||
actually replaced, False if there were no image parts to shrink or
|
||||
Pillow couldn't help (caller should surface the original error).
|
||||
|
||||
Strategy: look for ``image_url`` / ``input_image`` parts carrying a
|
||||
``data:image/...;base64,...`` payload. For each one whose encoded
|
||||
size exceeds 4 MB (a safe target that slides under Anthropic's 5 MB
|
||||
ceiling with header overhead), write the base64 to a tempfile, call
|
||||
``vision_tools._resize_image_for_vision`` to produce a smaller data
|
||||
URL, and substitute it in place.
|
||||
|
||||
Non-data-URL images (http/https URLs) are not touched — the provider
|
||||
fetches those itself and the size limit is different.
|
||||
"""
|
||||
if not api_messages:
|
||||
return False
|
||||
|
||||
try:
|
||||
from tools.vision_tools import _resize_image_for_vision
|
||||
except Exception as exc:
|
||||
logger.warning("image-shrink recovery: vision_tools unavailable — %s", exc)
|
||||
return False
|
||||
|
||||
# 4 MB target leaves comfortable headroom under Anthropic's 5 MB.
|
||||
# Non-Anthropic providers we haven't observed rejecting are fine with
|
||||
# much larger; shrinking to 4 MB here loses quality but only fires
|
||||
# after a confirmed provider rejection, so the alternative is failure.
|
||||
target_bytes = 4 * 1024 * 1024
|
||||
changed_count = 0
|
||||
|
||||
def _shrink_data_url(url: str) -> Optional[str]:
|
||||
"""Return a smaller data URL, or None if shrink can't help."""
|
||||
if not isinstance(url, str) or not url.startswith("data:"):
|
||||
return None
|
||||
if len(url) <= target_bytes:
|
||||
# This specific image wasn't the oversized one.
|
||||
return None
|
||||
try:
|
||||
header, _, data = url.partition(",")
|
||||
mime = "image/jpeg"
|
||||
if header.startswith("data:"):
|
||||
mime_part = header[len("data:"):].split(";", 1)[0].strip()
|
||||
if mime_part.startswith("image/"):
|
||||
mime = mime_part
|
||||
import base64 as _b64
|
||||
raw = _b64.b64decode(data)
|
||||
suffix = {
|
||||
"image/png": ".png", "image/gif": ".gif", "image/webp": ".webp",
|
||||
"image/jpeg": ".jpg", "image/jpg": ".jpg", "image/bmp": ".bmp",
|
||||
}.get(mime, ".jpg")
|
||||
tmp = tempfile.NamedTemporaryFile(
|
||||
prefix="hermes_shrink_", suffix=suffix, delete=False,
|
||||
)
|
||||
try:
|
||||
tmp.write(raw)
|
||||
tmp.close()
|
||||
resized = _resize_image_for_vision(
|
||||
Path(tmp.name),
|
||||
mime_type=mime,
|
||||
max_base64_bytes=target_bytes,
|
||||
)
|
||||
finally:
|
||||
try:
|
||||
Path(tmp.name).unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
if not resized or len(resized) >= len(url):
|
||||
# Shrink didn't help (or made it bigger — corrupt input?).
|
||||
return None
|
||||
return resized
|
||||
except Exception as exc:
|
||||
logger.warning("image-shrink recovery: re-encode failed — %s", exc)
|
||||
return None
|
||||
|
||||
for msg in api_messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
content = msg.get("content")
|
||||
if not isinstance(content, list):
|
||||
continue
|
||||
for part in content:
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
ptype = part.get("type")
|
||||
if ptype not in {"image_url", "input_image"}:
|
||||
continue
|
||||
image_value = part.get("image_url")
|
||||
# OpenAI chat.completions: {"image_url": {"url": "data:..."}}
|
||||
# OpenAI Responses: {"image_url": "data:..."}
|
||||
if isinstance(image_value, dict):
|
||||
url = image_value.get("url", "")
|
||||
resized = _shrink_data_url(url)
|
||||
if resized:
|
||||
image_value["url"] = resized
|
||||
changed_count += 1
|
||||
elif isinstance(image_value, str):
|
||||
resized = _shrink_data_url(image_value)
|
||||
if resized:
|
||||
part["image_url"] = resized
|
||||
changed_count += 1
|
||||
|
||||
if changed_count:
|
||||
logger.info(
|
||||
"image-shrink recovery: re-encoded %d image part(s) to fit under %.0f MB",
|
||||
changed_count, target_bytes / (1024 * 1024),
|
||||
)
|
||||
return changed_count > 0
|
||||
|
||||
|
||||
__all__ = [
|
||||
"check_compression_model_feasibility",
|
||||
"replay_compression_warning",
|
||||
"compress_context",
|
||||
"try_shrink_image_parts_in_messages",
|
||||
]
|
||||
4018
agent/conversation_loop.py
Normal file
4018
agent/conversation_loop.py
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -166,6 +166,8 @@ class PooledCredential:
|
|||
@property
|
||||
def runtime_api_key(self) -> str:
|
||||
if self.provider == "nous":
|
||||
# Nous stores the runtime inference credential in agent_key for
|
||||
# compatibility. It may be a NAS invoke JWT or legacy opaque key.
|
||||
return str(self.agent_key or self.access_token or "")
|
||||
return str(self.access_token or "")
|
||||
|
||||
|
|
@ -621,18 +623,35 @@ class CredentialPool:
|
|||
return entry
|
||||
store_refresh = state.get("refresh_token", "")
|
||||
store_access = state.get("access_token", "")
|
||||
if store_refresh and store_refresh != entry.refresh_token:
|
||||
comparable_updates = {
|
||||
"access_token": store_access,
|
||||
"refresh_token": store_refresh,
|
||||
"expires_at": state.get("expires_at"),
|
||||
"agent_key": state.get("agent_key"),
|
||||
"agent_key_expires_at": state.get("agent_key_expires_at"),
|
||||
"inference_base_url": state.get("inference_base_url"),
|
||||
}
|
||||
should_sync = any(
|
||||
value not in (None, "") and getattr(entry, key, None) != value
|
||||
for key, value in comparable_updates.items()
|
||||
)
|
||||
if should_sync:
|
||||
logger.debug(
|
||||
"Pool entry %s: syncing tokens from auth.json (Nous refresh token changed)",
|
||||
"Pool entry %s: syncing Nous state from auth.json",
|
||||
entry.id,
|
||||
)
|
||||
field_updates: Dict[str, Any] = {
|
||||
"access_token": store_access,
|
||||
"refresh_token": store_refresh,
|
||||
"last_status": None,
|
||||
"last_status_at": None,
|
||||
"last_error_code": None,
|
||||
"last_error_reason": None,
|
||||
"last_error_message": None,
|
||||
"last_error_reset_at": None,
|
||||
}
|
||||
if store_access:
|
||||
field_updates["access_token"] = store_access
|
||||
if store_refresh:
|
||||
field_updates["refresh_token"] = store_refresh
|
||||
if state.get("expires_at"):
|
||||
field_updates["expires_at"] = state["expires_at"]
|
||||
if state.get("agent_key"):
|
||||
|
|
@ -811,36 +830,15 @@ class CredentialPool:
|
|||
synced = self._sync_nous_entry_from_auth_store(entry)
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
nous_state = {
|
||||
"access_token": entry.access_token,
|
||||
"refresh_token": entry.refresh_token,
|
||||
"client_id": entry.client_id,
|
||||
"portal_base_url": entry.portal_base_url,
|
||||
"inference_base_url": entry.inference_base_url,
|
||||
"token_type": entry.token_type,
|
||||
"scope": entry.scope,
|
||||
"obtained_at": entry.obtained_at,
|
||||
"expires_at": entry.expires_at,
|
||||
"agent_key": entry.agent_key,
|
||||
"agent_key_expires_at": entry.agent_key_expires_at,
|
||||
"tls": entry.tls,
|
||||
}
|
||||
refreshed = auth_mod.refresh_nous_oauth_from_state(
|
||||
nous_state,
|
||||
auth_mod.resolve_nous_runtime_credentials(
|
||||
min_key_ttl_seconds=DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
|
||||
force_refresh=force,
|
||||
force_mint=force,
|
||||
inference_auth_mode=(
|
||||
auth_mod.NOUS_INFERENCE_AUTH_MODE_LEGACY
|
||||
if force
|
||||
else auth_mod.NOUS_INFERENCE_AUTH_MODE_AUTO
|
||||
),
|
||||
)
|
||||
# Apply returned fields: dataclass fields via replace, extras via dict update
|
||||
field_updates = {}
|
||||
extra_updates = dict(entry.extra)
|
||||
_field_names = {f.name for f in fields(entry)}
|
||||
for k, v in refreshed.items():
|
||||
if k in _field_names:
|
||||
field_updates[k] = v
|
||||
elif k in _EXTRA_KEYS:
|
||||
extra_updates[k] = v
|
||||
updated = replace(entry, extra=extra_updates, **field_updates)
|
||||
updated = self._sync_nous_entry_from_auth_store(entry)
|
||||
else:
|
||||
return entry
|
||||
except Exception as exc:
|
||||
|
|
@ -929,6 +927,49 @@ class CredentialPool:
|
|||
self._persist()
|
||||
self._sync_device_code_entry_to_auth_store(updated)
|
||||
return updated
|
||||
if auth_mod._is_terminal_nous_refresh_error(exc):
|
||||
logger.debug("Nous refresh token is terminally invalid; clearing local token state")
|
||||
try:
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
state = _load_provider_state(auth_store, "nous") or {
|
||||
"client_id": entry.client_id,
|
||||
"portal_base_url": entry.portal_base_url,
|
||||
"inference_base_url": entry.inference_base_url,
|
||||
"token_type": entry.token_type,
|
||||
"scope": entry.scope,
|
||||
"tls": entry.tls,
|
||||
}
|
||||
store_refresh = str(state.get("refresh_token") or "").strip()
|
||||
entry_refresh = str(entry.refresh_token or "").strip()
|
||||
if not store_refresh or store_refresh == entry_refresh:
|
||||
auth_mod._quarantine_nous_oauth_state(
|
||||
state,
|
||||
exc,
|
||||
reason="credential_pool_refresh_failure",
|
||||
)
|
||||
auth_mod._quarantine_nous_pool_entries(
|
||||
auth_store,
|
||||
exc,
|
||||
reason="credential_pool_refresh_failure",
|
||||
)
|
||||
_save_provider_state(auth_store, "nous", state)
|
||||
_save_auth_store(auth_store)
|
||||
except Exception as clear_exc:
|
||||
logger.debug("Failed to clear terminal Nous OAuth state: %s", clear_exc)
|
||||
|
||||
singleton_sources = {
|
||||
auth_mod.NOUS_DEVICE_CODE_SOURCE,
|
||||
f"manual:{auth_mod.NOUS_DEVICE_CODE_SOURCE}",
|
||||
}
|
||||
self._entries = [
|
||||
item for item in self._entries
|
||||
if item.source not in singleton_sources
|
||||
]
|
||||
if self._current_id == entry.id:
|
||||
self._current_id = None
|
||||
self._persist()
|
||||
return None
|
||||
self._mark_exhausted(entry, None)
|
||||
return None
|
||||
|
||||
|
|
@ -1365,7 +1406,22 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
|||
|
||||
elif provider == "nous":
|
||||
state = _load_provider_state(auth_store, "nous")
|
||||
if state and not _is_suppressed(provider, "device_code"):
|
||||
has_runtime_material = bool(
|
||||
isinstance(state, dict)
|
||||
and (
|
||||
str(state.get("access_token") or "").strip()
|
||||
or str(state.get("agent_key") or "").strip()
|
||||
)
|
||||
)
|
||||
if state and not has_runtime_material:
|
||||
retained = [
|
||||
entry for entry in entries
|
||||
if entry.source not in {"device_code", "manual:device_code"}
|
||||
]
|
||||
if len(retained) != len(entries):
|
||||
entries[:] = retained
|
||||
changed = True
|
||||
if state and has_runtime_material and not _is_suppressed(provider, "device_code"):
|
||||
active_sources.add("device_code")
|
||||
# Prefer a user-supplied label embedded in the singleton state
|
||||
# (set by persist_nous_credentials(label=...) when the user ran
|
||||
|
|
|
|||
62
agent/iteration_budget.py
Normal file
62
agent/iteration_budget.py
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
"""Per-agent iteration budget — thread-safe consume/refund counter.
|
||||
|
||||
Extracted from ``run_agent.py``. Each ``AIAgent`` instance (parent or
|
||||
subagent) holds an :class:`IterationBudget`; the parent's cap comes from
|
||||
``max_iterations`` (default 90), each subagent's cap comes from
|
||||
``delegation.max_iterations`` (default 50).
|
||||
|
||||
``run_agent`` re-exports ``IterationBudget`` so existing
|
||||
``from run_agent import IterationBudget`` imports keep working unchanged.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
|
||||
|
||||
class IterationBudget:
|
||||
"""Thread-safe iteration counter for an agent.
|
||||
|
||||
Each agent (parent or subagent) gets its own ``IterationBudget``.
|
||||
The parent's budget is capped at ``max_iterations`` (default 90).
|
||||
Each subagent gets an independent budget capped at
|
||||
``delegation.max_iterations`` (default 50) — this means total
|
||||
iterations across parent + subagents can exceed the parent's cap.
|
||||
Users control the per-subagent limit via ``delegation.max_iterations``
|
||||
in config.yaml.
|
||||
|
||||
``execute_code`` (programmatic tool calling) iterations are refunded via
|
||||
:meth:`refund` so they don't eat into the budget.
|
||||
"""
|
||||
|
||||
def __init__(self, max_total: int):
|
||||
self.max_total = max_total
|
||||
self._used = 0
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def consume(self) -> bool:
|
||||
"""Try to consume one iteration. Returns True if allowed."""
|
||||
with self._lock:
|
||||
if self._used >= self.max_total:
|
||||
return False
|
||||
self._used += 1
|
||||
return True
|
||||
|
||||
def refund(self) -> None:
|
||||
"""Give back one iteration (e.g. for execute_code turns)."""
|
||||
with self._lock:
|
||||
if self._used > 0:
|
||||
self._used -= 1
|
||||
|
||||
@property
|
||||
def used(self) -> int:
|
||||
with self._lock:
|
||||
return self._used
|
||||
|
||||
@property
|
||||
def remaining(self) -> int:
|
||||
with self._lock:
|
||||
return max(0, self.max_total - self._used)
|
||||
|
||||
|
||||
__all__ = ["IterationBudget"]
|
||||
|
|
@ -232,7 +232,7 @@ class LSPClient:
|
|||
the process is killed and the client is left in state
|
||||
``"error"`` — re-call ``start()`` to retry.
|
||||
"""
|
||||
if self._state in ("running", "starting"):
|
||||
if self._state in {"running", "starting"}:
|
||||
return
|
||||
self._state = "starting"
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -151,7 +151,7 @@ def try_install(pkg: str, strategy: str = "auto") -> Optional[str]:
|
|||
same path (or ``None``) without reinstalling. Concurrent calls
|
||||
are serialized.
|
||||
"""
|
||||
if strategy not in ("auto",):
|
||||
if strategy not in {"auto",}:
|
||||
# Only ``auto`` triggers an actual install. In manual/off,
|
||||
# we still check whether the binary already exists.
|
||||
recipe = INSTALL_RECIPES.get(pkg, {})
|
||||
|
|
|
|||
|
|
@ -162,7 +162,7 @@ class LSPService:
|
|||
idle_timeout: float = DEFAULT_IDLE_TIMEOUT,
|
||||
) -> None:
|
||||
self._enabled = enabled
|
||||
self._wait_mode = wait_mode if wait_mode in ("document", "full") else "document"
|
||||
self._wait_mode = wait_mode if wait_mode in {"document", "full"} else "document"
|
||||
self._wait_timeout = wait_timeout
|
||||
self._install_strategy = install_strategy
|
||||
self._binary_overrides = binary_overrides or {}
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ def format_diagnostic(d: Dict[str, Any]) -> str:
|
|||
col = int(start.get("character", 0)) + 1
|
||||
msg = str(d.get("message") or "").rstrip()
|
||||
code = d.get("code")
|
||||
code_part = f" [{code}]" if code not in (None, "") else ""
|
||||
code_part = f" [{code}]" if code not in {None, ""} else ""
|
||||
source = d.get("source")
|
||||
source_part = f" ({source})" if source else ""
|
||||
return f"{sev} [{line}:{col}] {msg}{code_part}{source_part}"
|
||||
|
|
|
|||
|
|
@ -237,7 +237,7 @@ def _spawn_pyright(root: str, ctx: ServerContext) -> Optional[SpawnSpec]:
|
|||
return None
|
||||
# If we got the cli ``pyright``, the langserver is its sibling.
|
||||
base = os.path.basename(bin_path)
|
||||
if base in ("pyright", "pyright.exe"):
|
||||
if base in {"pyright", "pyright.exe"}:
|
||||
sibling = os.path.join(os.path.dirname(bin_path), "pyright-langserver")
|
||||
if os.path.exists(sibling):
|
||||
bin_path = sibling
|
||||
|
|
|
|||
444
agent/message_sanitization.py
Normal file
444
agent/message_sanitization.py
Normal file
|
|
@ -0,0 +1,444 @@
|
|||
"""Message and tool-payload sanitization helpers.
|
||||
|
||||
Pure functions extracted from ``run_agent.py`` so the AIAgent module can
|
||||
stay focused on the conversation loop. These walk OpenAI-format message
|
||||
lists and structured payloads, repairing or stripping problematic
|
||||
characters that would otherwise crash ``json.dumps`` inside the OpenAI
|
||||
SDK or be rejected by upstream APIs.
|
||||
|
||||
All helpers are stateless and side-effect-free except for in-place
|
||||
mutation of their input (where documented). Backward-compatible
|
||||
re-exports from ``run_agent`` remain in place so existing imports
|
||||
``from run_agent import _sanitize_surrogates`` keep working.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Lone surrogate code points are invalid in UTF-8 and crash json.dumps
|
||||
# inside the OpenAI SDK. Used by every surrogate-sanitization helper
|
||||
# below as well as by run_agent and the CLI for paste-from-clipboard
|
||||
# scrubbing.
|
||||
_SURROGATE_RE = re.compile(r'[\ud800-\udfff]')
|
||||
|
||||
|
||||
def _sanitize_surrogates(text: str) -> str:
|
||||
"""Replace lone surrogate code points with U+FFFD (replacement character).
|
||||
|
||||
Surrogates are invalid in UTF-8 and will crash ``json.dumps()`` inside the
|
||||
OpenAI SDK. This is a fast no-op when the text contains no surrogates.
|
||||
"""
|
||||
if _SURROGATE_RE.search(text):
|
||||
return _SURROGATE_RE.sub('\ufffd', text)
|
||||
return text
|
||||
|
||||
|
||||
def _sanitize_structure_surrogates(payload: Any) -> bool:
|
||||
"""Replace surrogate code points in nested dict/list payloads in-place.
|
||||
|
||||
Mirror of ``_sanitize_structure_non_ascii`` but for surrogate recovery.
|
||||
Used to scrub nested structured fields (e.g. ``reasoning_details`` — an
|
||||
array of dicts with ``summary``/``text`` strings) that flat per-field
|
||||
checks don't reach. Returns True if any surrogates were replaced.
|
||||
"""
|
||||
found = False
|
||||
|
||||
def _walk(node):
|
||||
nonlocal found
|
||||
if isinstance(node, dict):
|
||||
for key, value in node.items():
|
||||
if isinstance(value, str):
|
||||
if _SURROGATE_RE.search(value):
|
||||
node[key] = _SURROGATE_RE.sub('\ufffd', value)
|
||||
found = True
|
||||
elif isinstance(value, (dict, list)):
|
||||
_walk(value)
|
||||
elif isinstance(node, list):
|
||||
for idx, value in enumerate(node):
|
||||
if isinstance(value, str):
|
||||
if _SURROGATE_RE.search(value):
|
||||
node[idx] = _SURROGATE_RE.sub('\ufffd', value)
|
||||
found = True
|
||||
elif isinstance(value, (dict, list)):
|
||||
_walk(value)
|
||||
|
||||
_walk(payload)
|
||||
return found
|
||||
|
||||
|
||||
def _sanitize_messages_surrogates(messages: list) -> bool:
|
||||
"""Sanitize surrogate characters from all string content in a messages list.
|
||||
|
||||
Walks message dicts in-place. Returns True if any surrogates were found
|
||||
and replaced, False otherwise. Covers content/text, name, tool call
|
||||
metadata/arguments, AND any additional string or nested structured fields
|
||||
(``reasoning``, ``reasoning_content``, ``reasoning_details``, etc.) so
|
||||
retries don't fail on a non-content field. Byte-level reasoning models
|
||||
(xiaomi/mimo, kimi, glm) can emit lone surrogates in reasoning output
|
||||
that flow through to ``api_messages["reasoning_content"]`` on the next
|
||||
turn and crash json.dumps inside the OpenAI SDK.
|
||||
"""
|
||||
found = False
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
content = msg.get("content")
|
||||
if isinstance(content, str) and _SURROGATE_RE.search(content):
|
||||
msg["content"] = _SURROGATE_RE.sub('\ufffd', content)
|
||||
found = True
|
||||
elif isinstance(content, list):
|
||||
for part in content:
|
||||
if isinstance(part, dict):
|
||||
text = part.get("text")
|
||||
if isinstance(text, str) and _SURROGATE_RE.search(text):
|
||||
part["text"] = _SURROGATE_RE.sub('\ufffd', text)
|
||||
found = True
|
||||
name = msg.get("name")
|
||||
if isinstance(name, str) and _SURROGATE_RE.search(name):
|
||||
msg["name"] = _SURROGATE_RE.sub('\ufffd', name)
|
||||
found = True
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if isinstance(tool_calls, list):
|
||||
for tc in tool_calls:
|
||||
if not isinstance(tc, dict):
|
||||
continue
|
||||
tc_id = tc.get("id")
|
||||
if isinstance(tc_id, str) and _SURROGATE_RE.search(tc_id):
|
||||
tc["id"] = _SURROGATE_RE.sub('\ufffd', tc_id)
|
||||
found = True
|
||||
fn = tc.get("function")
|
||||
if isinstance(fn, dict):
|
||||
fn_name = fn.get("name")
|
||||
if isinstance(fn_name, str) and _SURROGATE_RE.search(fn_name):
|
||||
fn["name"] = _SURROGATE_RE.sub('\ufffd', fn_name)
|
||||
found = True
|
||||
fn_args = fn.get("arguments")
|
||||
if isinstance(fn_args, str) and _SURROGATE_RE.search(fn_args):
|
||||
fn["arguments"] = _SURROGATE_RE.sub('\ufffd', fn_args)
|
||||
found = True
|
||||
# Walk any additional string / nested fields (reasoning,
|
||||
# reasoning_content, reasoning_details, etc.) — surrogates from
|
||||
# byte-level reasoning models (xiaomi/mimo, kimi, glm) can lurk
|
||||
# in these fields and aren't covered by the per-field checks above.
|
||||
# Matches _sanitize_messages_non_ascii's coverage (PR #10537).
|
||||
for key, value in msg.items():
|
||||
if key in {"content", "name", "tool_calls", "role"}:
|
||||
continue
|
||||
if isinstance(value, str):
|
||||
if _SURROGATE_RE.search(value):
|
||||
msg[key] = _SURROGATE_RE.sub('\ufffd', value)
|
||||
found = True
|
||||
elif isinstance(value, (dict, list)):
|
||||
if _sanitize_structure_surrogates(value):
|
||||
found = True
|
||||
return found
|
||||
|
||||
|
||||
def _escape_invalid_chars_in_json_strings(raw: str) -> str:
|
||||
"""Escape unescaped control chars inside JSON string values.
|
||||
|
||||
Walks the raw JSON character-by-character, tracking whether we are
|
||||
inside a double-quoted string. Inside strings, replaces literal
|
||||
control characters (0x00-0x1F) that aren't already part of an escape
|
||||
sequence with their ``\\uXXXX`` equivalents. Pass-through for everything
|
||||
else.
|
||||
|
||||
Ported from #12093 — complements the other repair passes in
|
||||
``_repair_tool_call_arguments`` when ``json.loads(strict=False)`` is
|
||||
not enough (e.g. llama.cpp backends that emit literal apostrophes or
|
||||
tabs alongside other malformations).
|
||||
"""
|
||||
out: list[str] = []
|
||||
in_string = False
|
||||
i = 0
|
||||
n = len(raw)
|
||||
while i < n:
|
||||
ch = raw[i]
|
||||
if in_string:
|
||||
if ch == "\\" and i + 1 < n:
|
||||
# Already-escaped char — pass through as-is
|
||||
out.append(ch)
|
||||
out.append(raw[i + 1])
|
||||
i += 2
|
||||
continue
|
||||
if ch == '"':
|
||||
in_string = False
|
||||
out.append(ch)
|
||||
elif ord(ch) < 0x20:
|
||||
out.append(f"\\u{ord(ch):04x}")
|
||||
else:
|
||||
out.append(ch)
|
||||
else:
|
||||
if ch == '"':
|
||||
in_string = True
|
||||
out.append(ch)
|
||||
i += 1
|
||||
return "".join(out)
|
||||
|
||||
|
||||
def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str:
|
||||
"""Attempt to repair malformed tool_call argument JSON.
|
||||
|
||||
Models like GLM-5.1 via Ollama can produce truncated JSON, trailing
|
||||
commas, Python ``None``, etc. The API proxy rejects these with HTTP 400
|
||||
"invalid tool call arguments". This function applies common repairs;
|
||||
if all fail it returns ``"{}"`` so the request succeeds (better than
|
||||
crashing the session). All repairs are logged at WARNING level.
|
||||
"""
|
||||
raw_stripped = raw_args.strip() if isinstance(raw_args, str) else ""
|
||||
|
||||
# Fast-path: empty / whitespace-only -> empty object
|
||||
if not raw_stripped:
|
||||
logger.warning("Sanitized empty tool_call arguments for %s", tool_name)
|
||||
return "{}"
|
||||
|
||||
# Python-literal None -> normalise to {}
|
||||
if raw_stripped == "None":
|
||||
logger.warning("Sanitized Python-None tool_call arguments for %s", tool_name)
|
||||
return "{}"
|
||||
|
||||
# Repair pass 0: llama.cpp backends sometimes emit literal control
|
||||
# characters (tabs, newlines) inside JSON string values. json.loads
|
||||
# with strict=False accepts these and lets us re-serialise the
|
||||
# result into wire-valid JSON without any string surgery. This is
|
||||
# the most common local-model repair case (#12068).
|
||||
try:
|
||||
parsed = json.loads(raw_stripped, strict=False)
|
||||
reserialised = json.dumps(parsed, separators=(",", ":"))
|
||||
if reserialised != raw_stripped:
|
||||
logger.warning(
|
||||
"Repaired unescaped control chars in tool_call arguments for %s",
|
||||
tool_name,
|
||||
)
|
||||
return reserialised
|
||||
except (json.JSONDecodeError, TypeError, ValueError):
|
||||
pass
|
||||
|
||||
# Attempt common JSON repairs
|
||||
fixed = raw_stripped
|
||||
# 1. Strip trailing commas before } or ]
|
||||
fixed = re.sub(r',\s*([}\]])', r'\1', fixed)
|
||||
# 2. Close unclosed structures
|
||||
open_curly = fixed.count('{') - fixed.count('}')
|
||||
open_bracket = fixed.count('[') - fixed.count(']')
|
||||
if open_curly > 0:
|
||||
fixed += '}' * open_curly
|
||||
if open_bracket > 0:
|
||||
fixed += ']' * open_bracket
|
||||
# 3. Remove excess closing braces/brackets (bounded to 50 iterations)
|
||||
for _ in range(50):
|
||||
try:
|
||||
json.loads(fixed)
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
if fixed.endswith('}') and fixed.count('}') > fixed.count('{'):
|
||||
fixed = fixed[:-1]
|
||||
elif fixed.endswith(']') and fixed.count(']') > fixed.count('['):
|
||||
fixed = fixed[:-1]
|
||||
else:
|
||||
break
|
||||
|
||||
try:
|
||||
json.loads(fixed)
|
||||
logger.warning(
|
||||
"Repaired malformed tool_call arguments for %s: %s → %s",
|
||||
tool_name, raw_stripped[:80], fixed[:80],
|
||||
)
|
||||
return fixed
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Repair pass 4: escape unescaped control chars inside JSON strings,
|
||||
# then retry. Catches cases where strict=False alone fails because
|
||||
# other malformations are present too.
|
||||
try:
|
||||
escaped = _escape_invalid_chars_in_json_strings(fixed)
|
||||
if escaped != fixed:
|
||||
json.loads(escaped)
|
||||
logger.warning(
|
||||
"Repaired control-char-laced tool_call arguments for %s: %s → %s",
|
||||
tool_name, raw_stripped[:80], escaped[:80],
|
||||
)
|
||||
return escaped
|
||||
except (json.JSONDecodeError, TypeError, ValueError):
|
||||
pass
|
||||
|
||||
# Last resort: replace with empty object so the API request doesn't
|
||||
# crash the entire session.
|
||||
logger.warning(
|
||||
"Unrepairable tool_call arguments for %s — "
|
||||
"replaced with empty object (was: %s)",
|
||||
tool_name, raw_stripped[:80],
|
||||
)
|
||||
return "{}"
|
||||
|
||||
|
||||
def _strip_non_ascii(text: str) -> str:
|
||||
"""Remove non-ASCII characters, replacing with closest ASCII equivalent or removing.
|
||||
|
||||
Used as a last resort when the system encoding is ASCII and can't handle
|
||||
any non-ASCII characters (e.g. LANG=C on Chromebooks).
|
||||
"""
|
||||
return text.encode('ascii', errors='ignore').decode('ascii')
|
||||
|
||||
|
||||
def _sanitize_messages_non_ascii(messages: list) -> bool:
|
||||
"""Strip non-ASCII characters from all string content in a messages list.
|
||||
|
||||
This is a last-resort recovery for systems with ASCII-only encoding
|
||||
(LANG=C, Chromebooks, minimal containers). Returns True if any
|
||||
non-ASCII content was found and sanitized.
|
||||
"""
|
||||
found = False
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
# Sanitize content (string)
|
||||
content = msg.get("content")
|
||||
if isinstance(content, str):
|
||||
sanitized = _strip_non_ascii(content)
|
||||
if sanitized != content:
|
||||
msg["content"] = sanitized
|
||||
found = True
|
||||
elif isinstance(content, list):
|
||||
for part in content:
|
||||
if isinstance(part, dict):
|
||||
text = part.get("text")
|
||||
if isinstance(text, str):
|
||||
sanitized = _strip_non_ascii(text)
|
||||
if sanitized != text:
|
||||
part["text"] = sanitized
|
||||
found = True
|
||||
# Sanitize name field (can contain non-ASCII in tool results)
|
||||
name = msg.get("name")
|
||||
if isinstance(name, str):
|
||||
sanitized = _strip_non_ascii(name)
|
||||
if sanitized != name:
|
||||
msg["name"] = sanitized
|
||||
found = True
|
||||
# Sanitize tool_calls
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if isinstance(tool_calls, list):
|
||||
for tc in tool_calls:
|
||||
if isinstance(tc, dict):
|
||||
fn = tc.get("function", {})
|
||||
if isinstance(fn, dict):
|
||||
fn_args = fn.get("arguments")
|
||||
if isinstance(fn_args, str):
|
||||
sanitized = _strip_non_ascii(fn_args)
|
||||
if sanitized != fn_args:
|
||||
fn["arguments"] = sanitized
|
||||
found = True
|
||||
# Sanitize any additional top-level string fields (e.g. reasoning_content)
|
||||
for key, value in msg.items():
|
||||
if key in {"content", "name", "tool_calls", "role"}:
|
||||
continue
|
||||
if isinstance(value, str):
|
||||
sanitized = _strip_non_ascii(value)
|
||||
if sanitized != value:
|
||||
msg[key] = sanitized
|
||||
found = True
|
||||
return found
|
||||
|
||||
|
||||
def _sanitize_tools_non_ascii(tools: list) -> bool:
|
||||
"""Strip non-ASCII characters from tool payloads in-place."""
|
||||
return _sanitize_structure_non_ascii(tools)
|
||||
|
||||
|
||||
def _strip_images_from_messages(messages: list) -> bool:
|
||||
"""Remove image_url content parts from all messages in-place.
|
||||
|
||||
Called when a server signals it does not support images (e.g.
|
||||
"Only 'text' content type is supported."). Mutates messages so the
|
||||
next API call sends text only.
|
||||
|
||||
Preserves message alternation invariants:
|
||||
* ``tool``-role messages whose content was entirely images are replaced
|
||||
with a plaintext placeholder, NOT deleted — deleting them would leave
|
||||
the paired ``tool_call_id`` on the prior assistant message unmatched,
|
||||
which providers reject with HTTP 400.
|
||||
* Non-tool messages whose content becomes empty are dropped. In
|
||||
practice this only hits synthetic image-only user messages appended
|
||||
for attachment delivery; real user turns always include text.
|
||||
|
||||
Returns True if any image parts were removed.
|
||||
"""
|
||||
found = False
|
||||
to_delete = []
|
||||
for i, msg in enumerate(messages):
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
content = msg.get("content")
|
||||
if not isinstance(content, list):
|
||||
continue
|
||||
new_parts = []
|
||||
for part in content:
|
||||
if isinstance(part, dict) and part.get("type") in {"image_url", "image", "input_image"}:
|
||||
found = True
|
||||
else:
|
||||
new_parts.append(part)
|
||||
if len(new_parts) < len(content):
|
||||
if new_parts:
|
||||
msg["content"] = new_parts
|
||||
elif msg.get("role") == "tool":
|
||||
# Preserve tool_call_id linkage — providers require every
|
||||
# assistant tool_call to have a matching tool response.
|
||||
msg["content"] = "[image content removed — server does not support images]"
|
||||
else:
|
||||
# Synthetic image-only user/assistant message with no text;
|
||||
# safe to drop.
|
||||
to_delete.append(i)
|
||||
for i in reversed(to_delete):
|
||||
del messages[i]
|
||||
return found
|
||||
|
||||
|
||||
def _sanitize_structure_non_ascii(payload: Any) -> bool:
|
||||
"""Strip non-ASCII characters from nested dict/list payloads in-place."""
|
||||
found = False
|
||||
|
||||
def _walk(node):
|
||||
nonlocal found
|
||||
if isinstance(node, dict):
|
||||
for key, value in node.items():
|
||||
if isinstance(value, str):
|
||||
sanitized = _strip_non_ascii(value)
|
||||
if sanitized != value:
|
||||
node[key] = sanitized
|
||||
found = True
|
||||
elif isinstance(value, (dict, list)):
|
||||
_walk(value)
|
||||
elif isinstance(node, list):
|
||||
for idx, value in enumerate(node):
|
||||
if isinstance(value, str):
|
||||
sanitized = _strip_non_ascii(value)
|
||||
if sanitized != value:
|
||||
node[idx] = sanitized
|
||||
found = True
|
||||
elif isinstance(value, (dict, list)):
|
||||
_walk(value)
|
||||
|
||||
_walk(payload)
|
||||
return found
|
||||
|
||||
|
||||
__all__ = [
|
||||
"_SURROGATE_RE",
|
||||
"_sanitize_surrogates",
|
||||
"_sanitize_structure_surrogates",
|
||||
"_sanitize_messages_surrogates",
|
||||
"_escape_invalid_chars_in_json_strings",
|
||||
"_repair_tool_call_arguments",
|
||||
"_strip_non_ascii",
|
||||
"_sanitize_messages_non_ascii",
|
||||
"_sanitize_tools_non_ascii",
|
||||
"_strip_images_from_messages",
|
||||
"_sanitize_structure_non_ascii",
|
||||
]
|
||||
|
|
@ -194,6 +194,7 @@ DEFAULT_CONTEXT_LENGTHS = {
|
|||
"llama": 131072,
|
||||
# Qwen — specific model families before the catch-all.
|
||||
# Official docs: https://help.aliyun.com/zh/model-studio/developer-reference/
|
||||
"qwen3.6-plus": 1048576, # 1M context (DashScope/Alibaba & OpenRouter)
|
||||
"qwen3-coder-plus": 1000000, # 1M context
|
||||
"qwen3-coder": 262144, # 256K context
|
||||
"qwen": 131072,
|
||||
|
|
|
|||
167
agent/process_bootstrap.py
Normal file
167
agent/process_bootstrap.py
Normal file
|
|
@ -0,0 +1,167 @@
|
|||
"""Process-level bootstrap helpers for ``run_agent``.
|
||||
|
||||
Three concerns, all tied to ``AIAgent`` boot-time / runtime IO setup:
|
||||
|
||||
1. **Lazy OpenAI SDK import** — ``_load_openai_cls`` + ``_OpenAIProxy``
|
||||
defer the 240ms-ish ``from openai import OpenAI`` cost until first use,
|
||||
while preserving ``isinstance(client, OpenAI)`` checks and
|
||||
``patch("run_agent.OpenAI", ...)`` test patterns.
|
||||
|
||||
2. **Crash-resistant stdio** — ``_SafeWriter`` wraps stdout/stderr so
|
||||
``OSError: Input/output error`` from broken pipes (systemd, Docker,
|
||||
thread teardown races) cannot crash the agent. ``_install_safe_stdio``
|
||||
applies the wrapper.
|
||||
|
||||
3. **HTTP proxy resolution** — ``_get_proxy_from_env`` reads
|
||||
``HTTPS_PROXY`` / ``HTTP_PROXY`` / ``ALL_PROXY``;
|
||||
``_get_proxy_for_base_url`` respects ``NO_PROXY`` for the given base URL.
|
||||
|
||||
``run_agent`` re-exports every name so existing
|
||||
``from run_agent import _get_proxy_from_env`` imports keep working
|
||||
unchanged.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import urllib.request
|
||||
from typing import Optional
|
||||
|
||||
from utils import base_url_hostname, normalize_proxy_url
|
||||
|
||||
|
||||
# Cached at module level so we only pay the OpenAI SDK import cost once
|
||||
# per process (after the first lazy load).
|
||||
_OPENAI_CLS_CACHE = None
|
||||
|
||||
|
||||
def _load_openai_cls() -> type:
|
||||
"""Import and cache ``openai.OpenAI``."""
|
||||
global _OPENAI_CLS_CACHE
|
||||
if _OPENAI_CLS_CACHE is None:
|
||||
from openai import OpenAI as _cls
|
||||
_OPENAI_CLS_CACHE = _cls
|
||||
return _OPENAI_CLS_CACHE
|
||||
|
||||
|
||||
class _OpenAIProxy:
|
||||
"""Module-level proxy that looks like ``openai.OpenAI`` but imports lazily."""
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
return _load_openai_cls()(*args, **kwargs)
|
||||
|
||||
def __instancecheck__(self, obj):
|
||||
return isinstance(obj, _load_openai_cls())
|
||||
|
||||
def __repr__(self):
|
||||
return "<lazy openai.OpenAI proxy>"
|
||||
|
||||
|
||||
class _SafeWriter:
|
||||
"""Transparent stdio wrapper that catches OSError/ValueError from broken pipes.
|
||||
|
||||
When hermes-agent runs as a systemd service, Docker container, or headless
|
||||
daemon, the stdout/stderr pipe can become unavailable (idle timeout, buffer
|
||||
exhaustion, socket reset). Any print() call then raises
|
||||
``OSError: [Errno 5] Input/output error``, which can crash agent setup or
|
||||
run_conversation() — especially via double-fault when an except handler
|
||||
also tries to print.
|
||||
|
||||
Additionally, when subagents run in ThreadPoolExecutor threads, the shared
|
||||
stdout handle can close between thread teardown and cleanup, raising
|
||||
``ValueError: I/O operation on closed file`` instead of OSError.
|
||||
|
||||
This wrapper delegates all writes to the underlying stream and silently
|
||||
catches both OSError and ValueError. It is transparent when the wrapped
|
||||
stream is healthy.
|
||||
"""
|
||||
|
||||
__slots__ = ("_inner",)
|
||||
|
||||
def __init__(self, inner):
|
||||
object.__setattr__(self, "_inner", inner)
|
||||
|
||||
def write(self, data):
|
||||
try:
|
||||
return self._inner.write(data)
|
||||
except (OSError, ValueError):
|
||||
return len(data) if isinstance(data, str) else 0
|
||||
|
||||
def flush(self):
|
||||
try:
|
||||
self._inner.flush()
|
||||
except (OSError, ValueError):
|
||||
pass
|
||||
|
||||
def fileno(self):
|
||||
return self._inner.fileno()
|
||||
|
||||
def isatty(self):
|
||||
try:
|
||||
return self._inner.isatty()
|
||||
except (OSError, ValueError):
|
||||
return False
|
||||
|
||||
def __getattr__(self, name):
|
||||
return getattr(self._inner, name)
|
||||
|
||||
|
||||
def _get_proxy_from_env() -> Optional[str]:
|
||||
"""Read proxy URL from environment variables.
|
||||
|
||||
Checks HTTPS_PROXY, HTTP_PROXY, ALL_PROXY (and lowercase variants) in order.
|
||||
Returns the first valid proxy URL found, or None if no proxy is configured.
|
||||
"""
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
|
||||
"https_proxy", "http_proxy", "all_proxy"):
|
||||
value = os.environ.get(key, "").strip()
|
||||
if value:
|
||||
return normalize_proxy_url(value)
|
||||
return None
|
||||
|
||||
|
||||
def _get_proxy_for_base_url(base_url: Optional[str]) -> Optional[str]:
|
||||
"""Return an env-configured proxy unless NO_PROXY excludes this base URL."""
|
||||
proxy = _get_proxy_from_env()
|
||||
if not proxy or not base_url:
|
||||
return proxy
|
||||
|
||||
host = base_url_hostname(base_url)
|
||||
if not host:
|
||||
return proxy
|
||||
|
||||
try:
|
||||
if urllib.request.proxy_bypass_environment(host):
|
||||
return None
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return proxy
|
||||
|
||||
|
||||
def _install_safe_stdio() -> None:
|
||||
"""Wrap stdout/stderr so best-effort console output cannot crash the agent."""
|
||||
for stream_name in ("stdout", "stderr"):
|
||||
stream = getattr(sys, stream_name, None)
|
||||
if stream is not None and not isinstance(stream, _SafeWriter):
|
||||
setattr(sys, stream_name, _SafeWriter(stream))
|
||||
|
||||
|
||||
# Module-level proxy instance — drops in for ``openai.OpenAI``. Imported as
|
||||
# ``from agent.process_bootstrap import OpenAI`` (or re-exported via
|
||||
# ``run_agent`` for legacy tests).
|
||||
OpenAI = _OpenAIProxy()
|
||||
|
||||
|
||||
__all__ = [
|
||||
"OpenAI",
|
||||
"_OpenAIProxy",
|
||||
"_load_openai_cls",
|
||||
"_SafeWriter",
|
||||
"_install_safe_stdio",
|
||||
"_get_proxy_from_env",
|
||||
"_get_proxy_for_base_url",
|
||||
]
|
||||
|
|
@ -83,6 +83,7 @@ logger = logging.getLogger(__name__)
|
|||
DEFAULT_TIMEOUT_SECONDS = 60
|
||||
MAX_TIMEOUT_SECONDS = 300
|
||||
ALLOWLIST_FILENAME = "shell-hooks-allowlist.json"
|
||||
_DEFAULT_BLOCK_MESSAGE = "Blocked by shell hook."
|
||||
|
||||
# (event, matcher, command) triples that have been wired to the plugin
|
||||
# manager in the current process. Matcher is part of the key because
|
||||
|
|
@ -481,6 +482,17 @@ def _serialize_payload(event: str, kwargs: Dict[str, Any]) -> str:
|
|||
return json.dumps(payload, ensure_ascii=False, default=str)
|
||||
|
||||
|
||||
def _block_message(primary: Any, secondary: Any) -> str:
|
||||
"""Return a validated string block message, falling back to the default.
|
||||
|
||||
Accepts two candidate fields (primary wins over secondary) so callers
|
||||
can express field-priority differences between the two hook wire formats
|
||||
without duplicating the type-check logic.
|
||||
"""
|
||||
raw = primary or secondary
|
||||
return raw if isinstance(raw, str) and raw else _DEFAULT_BLOCK_MESSAGE
|
||||
|
||||
|
||||
def _parse_response(event: str, stdout: str) -> Optional[Dict[str, Any]]:
|
||||
"""Translate stdout JSON into a Hermes wire-shape dict.
|
||||
|
||||
|
|
@ -515,13 +527,9 @@ def _parse_response(event: str, stdout: str) -> Optional[Dict[str, Any]]:
|
|||
|
||||
if event == "pre_tool_call":
|
||||
if data.get("action") == "block":
|
||||
message = data.get("message") or data.get("reason") or ""
|
||||
if isinstance(message, str) and message:
|
||||
return {"action": "block", "message": message}
|
||||
return {"action": "block", "message": _block_message(data.get("message"), data.get("reason"))}
|
||||
if data.get("decision") == "block":
|
||||
message = data.get("reason") or data.get("message") or ""
|
||||
if isinstance(message, str) and message:
|
||||
return {"action": "block", "message": message}
|
||||
return {"action": "block", "message": _block_message(data.get("reason"), data.get("message"))}
|
||||
return None
|
||||
|
||||
context = data.get("context")
|
||||
|
|
|
|||
280
agent/stream_diag.py
Normal file
280
agent/stream_diag.py
Normal file
|
|
@ -0,0 +1,280 @@
|
|||
"""Stream diagnostics — per-attempt counters, exception chains, retry logging.
|
||||
|
||||
When a streaming chat-completions request dies mid-response, we want to
|
||||
know why: which Cloudflare edge served the request, which OpenRouter
|
||||
downstream provider answered, how many bytes/chunks we got before the
|
||||
drop, the HTTP status, the underlying httpx error class. These helpers
|
||||
collect that info and emit it both to ``agent.log`` (full detail) and to
|
||||
the user-facing status line (compact).
|
||||
|
||||
All helpers are extracted from :class:`AIAgent` for cleanliness.
|
||||
``run_agent`` keeps thin forwarder methods so existing call sites and
|
||||
tests that patch ``run_agent.<helper>`` keep working.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Per-attempt stream diagnostic headers. Lowercased; httpx returns
|
||||
# CIMultiDict so case-insensitive lookups already work, but we read .get()
|
||||
# on the dict from agent.log for free-form post-hoc analysis.
|
||||
STREAM_DIAG_HEADERS = (
|
||||
"cf-ray",
|
||||
"cf-cache-status",
|
||||
"x-openrouter-provider",
|
||||
"x-openrouter-model",
|
||||
"x-openrouter-id",
|
||||
"x-request-id",
|
||||
"x-vercel-id",
|
||||
"via",
|
||||
"server",
|
||||
"x-forwarded-for",
|
||||
)
|
||||
|
||||
|
||||
def stream_diag_init() -> Dict[str, Any]:
|
||||
"""Return a fresh per-attempt diagnostic dict.
|
||||
|
||||
Mutated in-place by the streaming functions and read from the retry
|
||||
block when a stream dies. Lives on ``request_client_holder`` so it
|
||||
survives across the closure boundary.
|
||||
"""
|
||||
return {
|
||||
"started_at": time.time(),
|
||||
"first_chunk_at": None,
|
||||
"chunks": 0,
|
||||
"bytes": 0,
|
||||
"headers": {},
|
||||
"http_status": None,
|
||||
}
|
||||
|
||||
|
||||
def stream_diag_capture_response(agent: Any, diag: Dict[str, Any], http_response: Any) -> None:
|
||||
"""Snapshot interesting headers + HTTP status from the live stream.
|
||||
|
||||
Called once at stream open (before iterating chunks) so the metadata
|
||||
survives even if the stream dies before any chunk arrives. Failures
|
||||
are swallowed — diag is best-effort.
|
||||
"""
|
||||
if http_response is None or not isinstance(diag, dict):
|
||||
return
|
||||
try:
|
||||
diag["http_status"] = getattr(http_response, "status_code", None)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
headers = getattr(http_response, "headers", None) or {}
|
||||
captured: Dict[str, str] = {}
|
||||
# Allow per-agent override of the headers list (back-compat).
|
||||
target_headers = getattr(agent, "_STREAM_DIAG_HEADERS", STREAM_DIAG_HEADERS)
|
||||
for name in target_headers:
|
||||
try:
|
||||
val = headers.get(name)
|
||||
if val:
|
||||
# Truncate single-value to keep log lines bounded.
|
||||
captured[name] = str(val)[:120]
|
||||
except Exception:
|
||||
continue
|
||||
diag["headers"] = captured
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def flatten_exception_chain(error: BaseException) -> str:
|
||||
"""Return a compact ``Outer(msg) <- Inner(msg) <- ...`` rendering.
|
||||
|
||||
OpenAI SDK wraps httpx errors as ``APIConnectionError`` /
|
||||
``APIError`` and only the wrapper's class is visible at the catch
|
||||
site — but the underlying ``RemoteProtocolError`` /
|
||||
``ConnectError`` / ``ReadError`` is what tells us WHY the stream
|
||||
died. Walks ``__cause__`` then ``__context__`` (deduped, max 4
|
||||
deep) to surface the chain in one line.
|
||||
"""
|
||||
seen: List[BaseException] = []
|
||||
link: Optional[BaseException] = error
|
||||
while link is not None and len(seen) < 4:
|
||||
if link in seen:
|
||||
break
|
||||
seen.append(link)
|
||||
nxt = getattr(link, "__cause__", None) or getattr(
|
||||
link, "__context__", None
|
||||
)
|
||||
if nxt is None or nxt is link:
|
||||
break
|
||||
link = nxt
|
||||
parts: List[str] = []
|
||||
for e in seen:
|
||||
msg = str(e).strip().replace("\n", " ")
|
||||
if len(msg) > 140:
|
||||
msg = msg[:140] + "…"
|
||||
parts.append(f"{type(e).__name__}({msg})" if msg else type(e).__name__)
|
||||
return " <- ".join(parts) if parts else type(error).__name__
|
||||
|
||||
|
||||
def log_stream_retry(
|
||||
agent: Any,
|
||||
*,
|
||||
kind: str,
|
||||
error: BaseException,
|
||||
attempt: int,
|
||||
max_attempts: int,
|
||||
mid_tool_call: bool,
|
||||
diag: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
"""Record a transient stream-drop and retry to ``agent.log``.
|
||||
|
||||
Always logs a structured WARNING so users have a breadcrumb regardless
|
||||
of UI verbosity. Subagents in particular benefit because their
|
||||
retries no longer spam the parent's terminal — but the file log keeps
|
||||
full detail (provider, error class, attempt, base_url, subagent_id).
|
||||
|
||||
When *diag* is provided (the per-attempt stream-diagnostic dict from
|
||||
:func:`stream_diag_init`), the WARNING also captures upstream headers
|
||||
(cf-ray, x-openrouter-provider, x-openrouter-id), HTTP status, bytes
|
||||
streamed before the drop, and elapsed time on the dying attempt.
|
||||
These are the breadcrumbs needed to answer "is one CF edge / one
|
||||
downstream provider responsible, or is it random across runs?"
|
||||
"""
|
||||
try:
|
||||
try:
|
||||
_summary = agent._summarize_api_error(error)
|
||||
except Exception:
|
||||
_summary = str(error)
|
||||
if _summary and len(_summary) > 240:
|
||||
_summary = _summary[:240] + "…"
|
||||
|
||||
# Inner-cause chain (httpx errors hide under openai.APIError).
|
||||
try:
|
||||
_chain = flatten_exception_chain(error)
|
||||
except Exception:
|
||||
_chain = type(error).__name__
|
||||
|
||||
# Per-attempt counters and upstream headers.
|
||||
_now = time.time()
|
||||
_bytes = 0
|
||||
_chunks = 0
|
||||
_elapsed = 0.0
|
||||
_ttfb = None
|
||||
_headers_repr = "-"
|
||||
_http_status = "-"
|
||||
if isinstance(diag, dict):
|
||||
try:
|
||||
_bytes = int(diag.get("bytes") or 0)
|
||||
_chunks = int(diag.get("chunks") or 0)
|
||||
_started = float(diag.get("started_at") or _now)
|
||||
_elapsed = max(0.0, _now - _started)
|
||||
_first = diag.get("first_chunk_at")
|
||||
if _first is not None:
|
||||
_ttfb = max(0.0, float(_first) - _started)
|
||||
headers = diag.get("headers") or {}
|
||||
if isinstance(headers, dict) and headers:
|
||||
_headers_repr = " ".join(
|
||||
f"{k}={v}" for k, v in headers.items()
|
||||
)
|
||||
if diag.get("http_status") is not None:
|
||||
_http_status = str(diag.get("http_status"))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
logger.warning(
|
||||
"Stream %s on attempt %s/%s — retrying. "
|
||||
"subagent_id=%s depth=%s provider=%s base_url=%s "
|
||||
"error_type=%s error=%s "
|
||||
"chain=%s "
|
||||
"http_status=%s bytes=%d chunks=%d elapsed=%.2fs ttfb=%s "
|
||||
"upstream=[%s]",
|
||||
kind,
|
||||
attempt,
|
||||
max_attempts,
|
||||
getattr(agent, "_subagent_id", None) or "-",
|
||||
getattr(agent, "_delegate_depth", 0),
|
||||
agent.provider or "-",
|
||||
agent.base_url or "-",
|
||||
type(error).__name__,
|
||||
_summary,
|
||||
_chain,
|
||||
_http_status,
|
||||
_bytes,
|
||||
_chunks,
|
||||
_elapsed,
|
||||
f"{_ttfb:.2f}s" if _ttfb is not None else "-",
|
||||
_headers_repr,
|
||||
extra={"mid_tool_call": mid_tool_call},
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("stream-retry log emit failed", exc_info=True)
|
||||
|
||||
|
||||
def emit_stream_drop(
|
||||
agent: Any,
|
||||
*,
|
||||
error: BaseException,
|
||||
attempt: int,
|
||||
max_attempts: int,
|
||||
mid_tool_call: bool,
|
||||
diag: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
"""Emit a single user-visible line for a stream drop+retry.
|
||||
|
||||
Both top-level agents and subagents announce drops in the UI — the
|
||||
parent prefixes subagent lines with ``[subagent-N]`` via ``log_prefix``
|
||||
so they're easy to attribute. All cases also write a structured
|
||||
WARNING to ``agent.log`` via :func:`log_stream_retry` with the full
|
||||
diagnostic detail (subagent_id, provider, base_url, error_type,
|
||||
cf-ray, x-openrouter-provider, bytes/chunks, elapsed) for post-hoc
|
||||
analysis.
|
||||
|
||||
The user-visible status line is intentionally compact: provider,
|
||||
error class, attempt N/M, plus ``after Xs`` when the stream dropped
|
||||
mid-flight. Full diagnostic detail goes to ``agent.log`` only —
|
||||
``hermes logs --level WARNING | grep "Stream drop"`` to inspect.
|
||||
"""
|
||||
kind = "drop mid tool-call" if mid_tool_call else "drop"
|
||||
log_stream_retry(
|
||||
agent,
|
||||
kind=kind,
|
||||
error=error,
|
||||
attempt=attempt,
|
||||
max_attempts=max_attempts,
|
||||
mid_tool_call=mid_tool_call,
|
||||
diag=diag,
|
||||
)
|
||||
provider = agent.provider or "provider"
|
||||
# Compose a brief "after Xs" suffix when we have timing data — helps
|
||||
# the user distinguish "couldn't connect" (0s) from "died after 30s
|
||||
# of streaming" (likely upstream idle-kill or proxy timeout).
|
||||
_suffix = ""
|
||||
if isinstance(diag, dict):
|
||||
try:
|
||||
started = diag.get("started_at")
|
||||
if started is not None:
|
||||
_suffix = f" after {max(0.0, time.time() - float(started)):.1f}s"
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
agent._emit_status(
|
||||
f"⚠️ {provider} stream {kind} ({type(error).__name__}){_suffix} "
|
||||
f"— reconnecting, retry {attempt}/{max_attempts}"
|
||||
)
|
||||
agent._touch_activity(
|
||||
f"stream retry {attempt}/{max_attempts} "
|
||||
f"after {type(error).__name__}"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
__all__ = [
|
||||
"STREAM_DIAG_HEADERS",
|
||||
"stream_diag_init",
|
||||
"stream_diag_capture_response",
|
||||
"flatten_exception_chain",
|
||||
"log_stream_retry",
|
||||
"emit_stream_drop",
|
||||
]
|
||||
333
agent/system_prompt.py
Normal file
333
agent/system_prompt.py
Normal file
|
|
@ -0,0 +1,333 @@
|
|||
"""System-prompt assembly for :class:`AIAgent`.
|
||||
|
||||
The agent's system prompt is built once per session and reused across all
|
||||
turns — only context compression triggers a rebuild. This keeps the
|
||||
upstream prefix cache warm. See ``hermes-agent-dev``'s
|
||||
``references/system-prompt-invariant.md`` for the invariants and
|
||||
``references/self-improvement-loop.md`` for how the background-review
|
||||
fork inherits the cached prompt verbatim.
|
||||
|
||||
Three tiers are joined with ``\\n\\n``:
|
||||
|
||||
* ``stable`` — identity (SOUL.md or DEFAULT_AGENT_IDENTITY), tool
|
||||
guidance, computer-use guidance, nous subscription block, tool-use
|
||||
enforcement guidance + per-model operational guidance, skills prompt,
|
||||
alibaba model-name workaround, environment hints, platform hints.
|
||||
* ``context`` — caller-supplied ``system_message`` plus context files
|
||||
(AGENTS.md / .cursorrules / etc.) discovered under ``TERMINAL_CWD``.
|
||||
* ``volatile`` — memory snapshot, USER.md profile, external memory
|
||||
provider block, timestamp/session/model/provider line.
|
||||
|
||||
Pure helpers that read the agent's state. AIAgent keeps thin forwarders.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.prompt_builder import (
|
||||
DEFAULT_AGENT_IDENTITY,
|
||||
GOOGLE_MODEL_OPERATIONAL_GUIDANCE,
|
||||
HERMES_AGENT_HELP_GUIDANCE,
|
||||
KANBAN_GUIDANCE,
|
||||
MEMORY_GUIDANCE,
|
||||
OPENAI_MODEL_EXECUTION_GUIDANCE,
|
||||
PLATFORM_HINTS,
|
||||
SESSION_SEARCH_GUIDANCE,
|
||||
SKILLS_GUIDANCE,
|
||||
TOOL_USE_ENFORCEMENT_GUIDANCE,
|
||||
TOOL_USE_ENFORCEMENT_MODELS,
|
||||
)
|
||||
|
||||
|
||||
def _ra():
|
||||
"""Lazy reference to the ``run_agent`` module.
|
||||
|
||||
Helpers like ``load_soul_md``, ``build_environment_hints``,
|
||||
``build_context_files_prompt``, ``build_nous_subscription_prompt``,
|
||||
``build_skills_system_prompt`` and ``get_toolset_for_tool`` are
|
||||
imported into ``run_agent``'s namespace. Many tests
|
||||
``patch("run_agent.load_soul_md", ...)``; if we imported them
|
||||
directly here those patches would not reach us. Looking them up
|
||||
through ``run_agent`` on every call preserves the patch contract.
|
||||
"""
|
||||
import run_agent
|
||||
return run_agent
|
||||
|
||||
|
||||
def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None) -> Dict[str, str]:
|
||||
"""Assemble the system prompt as three ordered parts.
|
||||
|
||||
Returns a dict with three keys:
|
||||
* ``stable`` — identity, tool guidance, skills prompt,
|
||||
environment hints, platform hints, model-family operational
|
||||
guidance.
|
||||
* ``context`` — context files (AGENTS.md, .cursorrules, etc.)
|
||||
and caller-supplied system_message.
|
||||
* ``volatile`` — memory snapshot, user profile, external
|
||||
memory provider block, timestamp line.
|
||||
|
||||
Joined into a single string by :func:`build_system_prompt` and
|
||||
cached on ``agent._cached_system_prompt`` for the lifetime of the
|
||||
AIAgent. Hermes never re-renders parts of this string mid-
|
||||
session — that's the only way to keep upstream prompt caches
|
||||
warm across turns.
|
||||
"""
|
||||
# Local import to avoid pulling model_tools at module load. Tests
|
||||
# patch ``run_agent.get_toolset_for_tool`` and similar helpers, so
|
||||
# we resolve through ``_ra()`` to honor those patches.
|
||||
_r = _ra()
|
||||
|
||||
# ── Stable tier ────────────────────────────────────────────────
|
||||
stable_parts: List[str] = []
|
||||
|
||||
# Try SOUL.md as primary identity unless the caller explicitly skipped it.
|
||||
# Some execution modes (cron) still want HERMES_HOME persona while keeping
|
||||
# cwd project instructions disabled.
|
||||
_soul_loaded = False
|
||||
if agent.load_soul_identity or not agent.skip_context_files:
|
||||
_soul_content = _r.load_soul_md()
|
||||
if _soul_content:
|
||||
stable_parts.append(_soul_content)
|
||||
_soul_loaded = True
|
||||
|
||||
if not _soul_loaded:
|
||||
# Fallback to hardcoded identity
|
||||
stable_parts.append(DEFAULT_AGENT_IDENTITY)
|
||||
|
||||
# Pointer to the hermes-agent skill + docs for user questions about Hermes itself.
|
||||
stable_parts.append(HERMES_AGENT_HELP_GUIDANCE)
|
||||
|
||||
# Tool-aware behavioral guidance: only inject when the tools are loaded
|
||||
tool_guidance = []
|
||||
if "memory" in agent.valid_tool_names:
|
||||
tool_guidance.append(MEMORY_GUIDANCE)
|
||||
if "session_search" in agent.valid_tool_names:
|
||||
tool_guidance.append(SESSION_SEARCH_GUIDANCE)
|
||||
if "skill_manage" in agent.valid_tool_names:
|
||||
tool_guidance.append(SKILLS_GUIDANCE)
|
||||
# Kanban worker/orchestrator lifecycle — only present when the
|
||||
# dispatcher spawned this process (kanban_show check_fn gates on
|
||||
# HERMES_KANBAN_TASK env var). Normal chat sessions never see
|
||||
# this block.
|
||||
if "kanban_show" in agent.valid_tool_names:
|
||||
tool_guidance.append(KANBAN_GUIDANCE)
|
||||
if tool_guidance:
|
||||
stable_parts.append(" ".join(tool_guidance))
|
||||
|
||||
# Computer-use (macOS) — goes in as its own block rather than being
|
||||
# merged into tool_guidance because the content is multi-paragraph.
|
||||
if "computer_use" in agent.valid_tool_names:
|
||||
from agent.prompt_builder import COMPUTER_USE_GUIDANCE
|
||||
stable_parts.append(COMPUTER_USE_GUIDANCE)
|
||||
|
||||
nous_subscription_prompt = _r.build_nous_subscription_prompt(agent.valid_tool_names)
|
||||
if nous_subscription_prompt:
|
||||
stable_parts.append(nous_subscription_prompt)
|
||||
# Tool-use enforcement: tells the model to actually call tools instead
|
||||
# of describing intended actions. Controlled by config.yaml
|
||||
# agent.tool_use_enforcement:
|
||||
# "auto" (default) — matches TOOL_USE_ENFORCEMENT_MODELS
|
||||
# true — always inject (all models)
|
||||
# false — never inject
|
||||
# list — custom model-name substrings to match
|
||||
if agent.valid_tool_names:
|
||||
_enforce = agent._tool_use_enforcement
|
||||
_inject = False
|
||||
if _enforce is True or (isinstance(_enforce, str) and _enforce.lower() in {"true", "always", "yes", "on"}):
|
||||
_inject = True
|
||||
elif _enforce is False or (isinstance(_enforce, str) and _enforce.lower() in {"false", "never", "no", "off"}):
|
||||
_inject = False
|
||||
elif isinstance(_enforce, list):
|
||||
model_lower = (agent.model or "").lower()
|
||||
_inject = any(p.lower() in model_lower for p in _enforce if isinstance(p, str))
|
||||
else:
|
||||
# "auto" or any unrecognised value — use hardcoded defaults
|
||||
model_lower = (agent.model or "").lower()
|
||||
_inject = any(p in model_lower for p in TOOL_USE_ENFORCEMENT_MODELS)
|
||||
if _inject:
|
||||
stable_parts.append(TOOL_USE_ENFORCEMENT_GUIDANCE)
|
||||
_model_lower = (agent.model or "").lower()
|
||||
# Google model operational guidance (conciseness, absolute
|
||||
# paths, parallel tool calls, verify-before-edit, etc.)
|
||||
if "gemini" in _model_lower or "gemma" in _model_lower:
|
||||
stable_parts.append(GOOGLE_MODEL_OPERATIONAL_GUIDANCE)
|
||||
# OpenAI GPT/Codex execution discipline (tool persistence,
|
||||
# prerequisite checks, verification, anti-hallucination).
|
||||
if "gpt" in _model_lower or "codex" in _model_lower:
|
||||
stable_parts.append(OPENAI_MODEL_EXECUTION_GUIDANCE)
|
||||
|
||||
has_skills_tools = any(name in agent.valid_tool_names for name in ['skills_list', 'skill_view', 'skill_manage'])
|
||||
if has_skills_tools:
|
||||
avail_toolsets = {
|
||||
toolset
|
||||
for toolset in (
|
||||
_r.get_toolset_for_tool(tool_name) for tool_name in agent.valid_tool_names
|
||||
)
|
||||
if toolset
|
||||
}
|
||||
skills_prompt = _r.build_skills_system_prompt(
|
||||
available_tools=agent.valid_tool_names,
|
||||
available_toolsets=avail_toolsets,
|
||||
)
|
||||
else:
|
||||
skills_prompt = ""
|
||||
if skills_prompt:
|
||||
stable_parts.append(skills_prompt)
|
||||
|
||||
# Alibaba Coding Plan API always returns "glm-4.7" as model name regardless
|
||||
# of the requested model. Inject explicit model identity into the system prompt
|
||||
# so the agent can correctly report which model it is (workaround for API bug).
|
||||
# Stable for the lifetime of an agent instance — model and provider are fixed
|
||||
# at construction time.
|
||||
if agent.provider == "alibaba":
|
||||
_model_short = agent.model.split("/")[-1] if "/" in agent.model else agent.model
|
||||
stable_parts.append(
|
||||
f"You are powered by the model named {_model_short}. "
|
||||
f"The exact model ID is {agent.model}. "
|
||||
f"When asked what model you are, always answer based on this information, "
|
||||
f"not on any model name returned by the API."
|
||||
)
|
||||
|
||||
# Environment hints (WSL, Termux, etc.) — tell the agent about the
|
||||
# execution environment so it can translate paths and adapt behavior.
|
||||
# Stable for the lifetime of the process.
|
||||
_env_hints = _r.build_environment_hints()
|
||||
if _env_hints:
|
||||
stable_parts.append(_env_hints)
|
||||
|
||||
platform_key = (agent.platform or "").lower().strip()
|
||||
if platform_key in PLATFORM_HINTS:
|
||||
stable_parts.append(PLATFORM_HINTS[platform_key])
|
||||
elif platform_key:
|
||||
# Check plugin registry for platform-specific LLM guidance
|
||||
try:
|
||||
from gateway.platform_registry import platform_registry
|
||||
_entry = platform_registry.get(platform_key)
|
||||
if _entry and _entry.platform_hint:
|
||||
stable_parts.append(_entry.platform_hint)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# ── Context tier (cwd-dependent, may change between sessions) ─
|
||||
context_parts: List[str] = []
|
||||
|
||||
# Note: ephemeral_system_prompt is NOT included here. It's injected at
|
||||
# API-call time only so it stays out of the cached/stored system prompt.
|
||||
if system_message is not None:
|
||||
context_parts.append(system_message)
|
||||
|
||||
if not agent.skip_context_files:
|
||||
# Use TERMINAL_CWD for context file discovery when set (gateway
|
||||
# mode). The gateway process runs from the hermes-agent install
|
||||
# dir, so os.getcwd() would pick up the repo's AGENTS.md and
|
||||
# other dev files — inflating token usage by ~10k for no benefit.
|
||||
_context_cwd = os.getenv("TERMINAL_CWD") or None
|
||||
context_files_prompt = _r.build_context_files_prompt(
|
||||
cwd=_context_cwd, skip_soul=_soul_loaded)
|
||||
if context_files_prompt:
|
||||
context_parts.append(context_files_prompt)
|
||||
|
||||
# ── Volatile tier (changes per session/turn — never cached) ───
|
||||
volatile_parts: List[str] = []
|
||||
|
||||
if agent._memory_store:
|
||||
if agent._memory_enabled:
|
||||
mem_block = agent._memory_store.format_for_system_prompt("memory")
|
||||
if mem_block:
|
||||
volatile_parts.append(mem_block)
|
||||
# USER.md is always included when enabled.
|
||||
if agent._user_profile_enabled:
|
||||
user_block = agent._memory_store.format_for_system_prompt("user")
|
||||
if user_block:
|
||||
volatile_parts.append(user_block)
|
||||
|
||||
# External memory provider system prompt block (additive to built-in)
|
||||
if agent._memory_manager:
|
||||
try:
|
||||
_ext_mem_block = agent._memory_manager.build_system_prompt()
|
||||
if _ext_mem_block:
|
||||
volatile_parts.append(_ext_mem_block)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
from hermes_time import now as _hermes_now
|
||||
now = _hermes_now()
|
||||
timestamp_line = f"Conversation started: {now.strftime('%A, %B %d, %Y %I:%M %p')}"
|
||||
if agent.pass_session_id and agent.session_id:
|
||||
timestamp_line += f"\nSession ID: {agent.session_id}"
|
||||
if agent.model:
|
||||
timestamp_line += f"\nModel: {agent.model}"
|
||||
if agent.provider:
|
||||
timestamp_line += f"\nProvider: {agent.provider}"
|
||||
volatile_parts.append(timestamp_line)
|
||||
|
||||
return {
|
||||
"stable": "\n\n".join(p.strip() for p in stable_parts if p and p.strip()),
|
||||
"context": "\n\n".join(p.strip() for p in context_parts if p and p.strip()),
|
||||
"volatile": "\n\n".join(p.strip() for p in volatile_parts if p and p.strip()),
|
||||
}
|
||||
|
||||
|
||||
def build_system_prompt(agent: Any, system_message: Optional[str] = None) -> str:
|
||||
"""Assemble the full system prompt from all layers.
|
||||
|
||||
Called once per session (cached on ``agent._cached_system_prompt``) and
|
||||
only rebuilt after context compression events. This ensures the system
|
||||
prompt is stable across all turns in a session, maximizing prefix cache
|
||||
hits.
|
||||
|
||||
Layers are ordered cache-friendly: stable identity/guidance first,
|
||||
then session-stable context files, then per-call volatile content
|
||||
(memory, USER profile, timestamp). The whole string is treated as
|
||||
one cached block — Hermes never rebuilds or reinjects parts of it
|
||||
mid-session, which is the only way to keep upstream prompt caches
|
||||
warm across turns.
|
||||
"""
|
||||
parts = build_system_prompt_parts(agent, system_message=system_message)
|
||||
return "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)
|
||||
|
||||
|
||||
def invalidate_system_prompt(agent: Any) -> None:
|
||||
"""Invalidate the cached system prompt, forcing a rebuild on the next turn.
|
||||
|
||||
Called after context compression events. Also reloads memory from disk
|
||||
so the rebuilt prompt captures any writes from this session.
|
||||
"""
|
||||
agent._cached_system_prompt = None
|
||||
if agent._memory_store:
|
||||
agent._memory_store.load_from_disk()
|
||||
|
||||
|
||||
def format_tools_for_system_message(agent: Any) -> str:
|
||||
"""Format tool definitions for the system message in the trajectory format.
|
||||
|
||||
Returns:
|
||||
str: JSON string representation of tool definitions
|
||||
"""
|
||||
if not agent.tools:
|
||||
return "[]"
|
||||
|
||||
# Convert tool definitions to the format expected in trajectories
|
||||
formatted_tools = []
|
||||
for tool in agent.tools:
|
||||
func = tool["function"]
|
||||
formatted_tool = {
|
||||
"name": func["name"],
|
||||
"description": func.get("description", ""),
|
||||
"parameters": func.get("parameters", {}),
|
||||
"required": None # Match the format in the example
|
||||
}
|
||||
formatted_tools.append(formatted_tool)
|
||||
|
||||
return json.dumps(formatted_tools, ensure_ascii=False)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"build_system_prompt_parts",
|
||||
"build_system_prompt",
|
||||
"invalidate_system_prompt",
|
||||
"format_tools_for_system_message",
|
||||
]
|
||||
336
agent/tool_dispatch_helpers.py
Normal file
336
agent/tool_dispatch_helpers.py
Normal file
|
|
@ -0,0 +1,336 @@
|
|||
"""Tool-dispatch helpers — parallelism gating, multimodal envelopes, mutation tracking.
|
||||
|
||||
Pure module-level utilities extracted from ``run_agent.py``:
|
||||
|
||||
* ``_is_destructive_command`` — terminal-command heuristic used to gate
|
||||
parallel batch dispatch.
|
||||
* ``_should_parallelize_tool_batch`` / ``_extract_parallel_scope_path`` /
|
||||
``_paths_overlap`` — the rules engine deciding when a multi-tool batch
|
||||
can run concurrently.
|
||||
* ``_is_multimodal_tool_result`` / ``_multimodal_text_summary`` /
|
||||
``_append_subdir_hint_to_multimodal`` — envelope helpers for the
|
||||
``{"_multimodal": True, "content": [...], "text_summary": ...}`` dict
|
||||
shape returned by tools like ``computer_use``.
|
||||
* ``_extract_file_mutation_targets`` / ``_extract_error_preview`` —
|
||||
per-turn file-mutation verifier inputs.
|
||||
* ``_trajectory_normalize_msg`` — strip image blobs from a message for
|
||||
trajectory saving.
|
||||
|
||||
All helpers are stateless. ``run_agent`` re-exports each name so existing
|
||||
``from run_agent import ...`` imports in tests and other modules keep
|
||||
working unchanged.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.tool_result_classification import (
|
||||
FILE_MUTATING_TOOL_NAMES as _FILE_MUTATING_TOOLS,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Tools that must never run concurrently (interactive / user-facing).
|
||||
# When any of these appear in a batch, we fall back to sequential execution.
|
||||
_NEVER_PARALLEL_TOOLS = frozenset({"clarify"})
|
||||
|
||||
# Read-only tools with no shared mutable session state.
|
||||
_PARALLEL_SAFE_TOOLS = frozenset({
|
||||
"ha_get_state",
|
||||
"ha_list_entities",
|
||||
"ha_list_services",
|
||||
"read_file",
|
||||
"search_files",
|
||||
"session_search",
|
||||
"skill_view",
|
||||
"skills_list",
|
||||
"vision_analyze",
|
||||
"web_extract",
|
||||
"web_search",
|
||||
})
|
||||
|
||||
# File tools can run concurrently when they target independent paths.
|
||||
_PATH_SCOPED_TOOLS = frozenset({"read_file", "write_file", "patch"})
|
||||
|
||||
# Patterns that indicate a terminal command may modify/delete files.
|
||||
_DESTRUCTIVE_PATTERNS = re.compile(
|
||||
r"""(?:^|\s|&&|\|\||;|`)(?:
|
||||
rm\s|rmdir\s|
|
||||
cp\s|install\s|
|
||||
mv\s|
|
||||
sed\s+-i|
|
||||
truncate\s|
|
||||
dd\s|
|
||||
shred\s|
|
||||
git\s+(?:reset|clean|checkout)\s
|
||||
)""",
|
||||
re.VERBOSE,
|
||||
)
|
||||
# Output redirects that overwrite files (> but not >>)
|
||||
_REDIRECT_OVERWRITE = re.compile(r'[^>]>[^>]|^>[^>]')
|
||||
|
||||
|
||||
def _is_destructive_command(cmd: str) -> bool:
|
||||
"""Heuristic: does this terminal command look like it modifies/deletes files?"""
|
||||
if not cmd:
|
||||
return False
|
||||
if _DESTRUCTIVE_PATTERNS.search(cmd):
|
||||
return True
|
||||
if _REDIRECT_OVERWRITE.search(cmd):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _is_mcp_tool_parallel_safe(tool_name: str) -> bool:
|
||||
"""Check if an MCP tool comes from a server with parallel tool calls enabled.
|
||||
|
||||
Lazy-imports from ``tools.mcp_tool`` to avoid circular dependencies.
|
||||
Returns False if the MCP module is not available.
|
||||
"""
|
||||
try:
|
||||
from tools.mcp_tool import is_mcp_tool_parallel_safe
|
||||
return is_mcp_tool_parallel_safe(tool_name)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _should_parallelize_tool_batch(tool_calls) -> bool:
|
||||
"""Return True when a tool-call batch is safe to run concurrently."""
|
||||
if len(tool_calls) <= 1:
|
||||
return False
|
||||
|
||||
tool_names = [tc.function.name for tc in tool_calls]
|
||||
if any(name in _NEVER_PARALLEL_TOOLS for name in tool_names):
|
||||
return False
|
||||
|
||||
reserved_paths: list[Path] = []
|
||||
for tool_call in tool_calls:
|
||||
tool_name = tool_call.function.name
|
||||
try:
|
||||
function_args = json.loads(tool_call.function.arguments)
|
||||
except Exception:
|
||||
logging.debug(
|
||||
"Could not parse args for %s — defaulting to sequential; raw=%s",
|
||||
tool_name,
|
||||
tool_call.function.arguments[:200],
|
||||
)
|
||||
return False
|
||||
if not isinstance(function_args, dict):
|
||||
logging.debug(
|
||||
"Non-dict args for %s (%s) — defaulting to sequential",
|
||||
tool_name,
|
||||
type(function_args).__name__,
|
||||
)
|
||||
return False
|
||||
|
||||
if tool_name in _PATH_SCOPED_TOOLS:
|
||||
scoped_path = _extract_parallel_scope_path(tool_name, function_args)
|
||||
if scoped_path is None:
|
||||
return False
|
||||
if any(_paths_overlap(scoped_path, existing) for existing in reserved_paths):
|
||||
return False
|
||||
reserved_paths.append(scoped_path)
|
||||
continue
|
||||
|
||||
if tool_name not in _PARALLEL_SAFE_TOOLS:
|
||||
# Check if it's an MCP tool from a server that opted into parallel calls.
|
||||
if not _is_mcp_tool_parallel_safe(tool_name):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def _extract_parallel_scope_path(tool_name: str, function_args: dict) -> Optional[Path]:
|
||||
"""Return the normalized file target for path-scoped tools."""
|
||||
if tool_name not in _PATH_SCOPED_TOOLS:
|
||||
return None
|
||||
|
||||
raw_path = function_args.get("path")
|
||||
if not isinstance(raw_path, str) or not raw_path.strip():
|
||||
return None
|
||||
|
||||
expanded = Path(raw_path).expanduser()
|
||||
if expanded.is_absolute():
|
||||
return Path(os.path.abspath(str(expanded)))
|
||||
|
||||
# Avoid resolve(); the file may not exist yet.
|
||||
return Path(os.path.abspath(str(Path.cwd() / expanded)))
|
||||
|
||||
|
||||
def _paths_overlap(left: Path, right: Path) -> bool:
|
||||
"""Return True when two paths may refer to the same subtree."""
|
||||
left_parts = left.parts
|
||||
right_parts = right.parts
|
||||
if not left_parts or not right_parts:
|
||||
# Empty paths shouldn't reach here (guarded upstream), but be safe.
|
||||
return bool(left_parts) == bool(right_parts) and bool(left_parts)
|
||||
common_len = min(len(left_parts), len(right_parts))
|
||||
return left_parts[:common_len] == right_parts[:common_len]
|
||||
|
||||
|
||||
def _is_multimodal_tool_result(value: Any) -> bool:
|
||||
"""True if the value is a multimodal tool result envelope.
|
||||
|
||||
Multimodal handlers (e.g. tools/computer_use) return a dict with
|
||||
`_multimodal=True`, a `content` key holding OpenAI-style content
|
||||
parts, and an optional `text_summary` for string-only fallbacks.
|
||||
"""
|
||||
return (
|
||||
isinstance(value, dict)
|
||||
and value.get("_multimodal") is True
|
||||
and isinstance(value.get("content"), list)
|
||||
)
|
||||
|
||||
|
||||
def _multimodal_text_summary(value: Any) -> str:
|
||||
"""Extract a plain text view of a multimodal tool result.
|
||||
|
||||
Used wherever downstream code needs a string — logging, previews,
|
||||
persistence size heuristics, fall-back content for providers that
|
||||
don't support multipart tool messages.
|
||||
"""
|
||||
if _is_multimodal_tool_result(value):
|
||||
if value.get("text_summary"):
|
||||
return str(value["text_summary"])
|
||||
parts = []
|
||||
for p in value.get("content") or []:
|
||||
if isinstance(p, dict) and p.get("type") == "text":
|
||||
parts.append(str(p.get("text", "")))
|
||||
if parts:
|
||||
return "\n".join(parts)
|
||||
return "[multimodal tool result]"
|
||||
if isinstance(value, str):
|
||||
return value
|
||||
try:
|
||||
return json.dumps(value, default=str)
|
||||
except Exception:
|
||||
return str(value)
|
||||
|
||||
|
||||
def _append_subdir_hint_to_multimodal(value: Dict[str, Any], hint: str) -> None:
|
||||
"""Mutate a multimodal tool-result envelope to append a subdir hint.
|
||||
|
||||
The hint is added to the first text part so the model sees it; image
|
||||
parts are left untouched. `text_summary` is also updated for
|
||||
string-fallback callers.
|
||||
"""
|
||||
if not _is_multimodal_tool_result(value):
|
||||
return
|
||||
parts = value.get("content") or []
|
||||
for p in parts:
|
||||
if isinstance(p, dict) and p.get("type") == "text":
|
||||
p["text"] = str(p.get("text", "")) + hint
|
||||
break
|
||||
else:
|
||||
parts.insert(0, {"type": "text", "text": hint})
|
||||
value["content"] = parts
|
||||
if isinstance(value.get("text_summary"), str):
|
||||
value["text_summary"] = value["text_summary"] + hint
|
||||
|
||||
|
||||
def _extract_file_mutation_targets(tool_name: str, args: Dict[str, Any]) -> List[str]:
|
||||
"""Return the file paths a ``write_file`` or ``patch`` call is targeting.
|
||||
|
||||
For ``write_file`` and ``patch`` in replace mode this is just ``args["path"]``.
|
||||
For ``patch`` in V4A patch mode we parse the patch content for
|
||||
``*** Update File:`` / ``*** Add File:`` / ``*** Delete File:`` headers so
|
||||
the verifier can track each file in a multi-file patch separately.
|
||||
"""
|
||||
if tool_name not in _FILE_MUTATING_TOOLS:
|
||||
return []
|
||||
if tool_name == "write_file":
|
||||
p = args.get("path")
|
||||
return [str(p)] if p else []
|
||||
# tool_name == "patch"
|
||||
mode = args.get("mode") or "replace"
|
||||
if mode == "replace":
|
||||
p = args.get("path")
|
||||
return [str(p)] if p else []
|
||||
if mode == "patch":
|
||||
body = args.get("patch") or ""
|
||||
if not isinstance(body, str) or not body:
|
||||
return []
|
||||
paths: List[str] = []
|
||||
for _m in re.finditer(
|
||||
r'^\*\*\*\s+(?:Update|Add|Delete)\s+File:\s*(.+)$',
|
||||
body,
|
||||
re.MULTILINE,
|
||||
):
|
||||
p = _m.group(1).strip()
|
||||
if p:
|
||||
paths.append(p)
|
||||
return paths
|
||||
return []
|
||||
|
||||
|
||||
def _extract_error_preview(result: Any, max_len: int = 180) -> str:
|
||||
"""Pull a one-line error summary out of a tool result for footer display."""
|
||||
text = _multimodal_text_summary(result) if result is not None else ""
|
||||
if not isinstance(text, str):
|
||||
try:
|
||||
text = str(text)
|
||||
except Exception:
|
||||
return ""
|
||||
# Try to parse JSON and pull the ``error`` field — tool handlers return
|
||||
# ``{"success": false, "error": "..."}``; raw string wins if parse fails.
|
||||
stripped = text.strip()
|
||||
if stripped.startswith("{"):
|
||||
try:
|
||||
data = json.loads(stripped)
|
||||
if isinstance(data, dict) and isinstance(data.get("error"), str):
|
||||
text = data["error"]
|
||||
except Exception:
|
||||
pass
|
||||
# Collapse whitespace, trim to max_len.
|
||||
text = " ".join(text.split())
|
||||
if len(text) > max_len:
|
||||
text = text[: max_len - 1] + "…"
|
||||
return text
|
||||
|
||||
|
||||
def _trajectory_normalize_msg(msg: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Strip image blobs from a message for trajectory saving.
|
||||
|
||||
Returns a shallow copy with multimodal tool results replaced by their
|
||||
text_summary, and image parts in content lists replaced by
|
||||
`[screenshot]` placeholders. Keeps the message schema otherwise intact.
|
||||
"""
|
||||
if not isinstance(msg, dict):
|
||||
return msg
|
||||
content = msg.get("content")
|
||||
if _is_multimodal_tool_result(content):
|
||||
return {**msg, "content": _multimodal_text_summary(content)}
|
||||
if isinstance(content, list):
|
||||
cleaned = []
|
||||
for p in content:
|
||||
if isinstance(p, dict) and p.get("type") in {"image", "image_url", "input_image"}:
|
||||
cleaned.append({"type": "text", "text": "[screenshot]"})
|
||||
else:
|
||||
cleaned.append(p)
|
||||
return {**msg, "content": cleaned}
|
||||
return msg
|
||||
|
||||
|
||||
__all__ = [
|
||||
"_NEVER_PARALLEL_TOOLS",
|
||||
"_PARALLEL_SAFE_TOOLS",
|
||||
"_PATH_SCOPED_TOOLS",
|
||||
"_DESTRUCTIVE_PATTERNS",
|
||||
"_REDIRECT_OVERWRITE",
|
||||
"_is_destructive_command",
|
||||
"_should_parallelize_tool_batch",
|
||||
"_extract_parallel_scope_path",
|
||||
"_paths_overlap",
|
||||
"_is_multimodal_tool_result",
|
||||
"_multimodal_text_summary",
|
||||
"_append_subdir_hint_to_multimodal",
|
||||
"_extract_file_mutation_targets",
|
||||
"_extract_error_preview",
|
||||
"_trajectory_normalize_msg",
|
||||
]
|
||||
920
agent/tool_executor.py
Normal file
920
agent/tool_executor.py
Normal file
|
|
@ -0,0 +1,920 @@
|
|||
"""Tool-call execution — sequential and concurrent dispatch.
|
||||
|
||||
Both AIAgent methods (``_execute_tool_calls_sequential`` and
|
||||
``_execute_tool_calls_concurrent``) live here as module-level
|
||||
functions that take the parent ``AIAgent`` as their first argument.
|
||||
|
||||
``run_agent`` keeps thin wrappers so existing call sites work; tests
|
||||
that patch ``run_agent._set_interrupt`` are honored because the
|
||||
extracted functions reach back through the ``run_agent`` module via
|
||||
``_ra()`` for that symbol.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import concurrent.futures
|
||||
import contextvars
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
from typing import Any, Optional
|
||||
|
||||
from agent.display import (
|
||||
KawaiiSpinner,
|
||||
build_tool_preview as _build_tool_preview,
|
||||
get_cute_tool_message as _get_cute_tool_message_impl,
|
||||
get_tool_emoji as _get_tool_emoji,
|
||||
_detect_tool_failure,
|
||||
)
|
||||
from agent.tool_guardrails import ToolGuardrailDecision
|
||||
from agent.tool_dispatch_helpers import (
|
||||
_is_destructive_command,
|
||||
_is_multimodal_tool_result,
|
||||
_multimodal_text_summary,
|
||||
_append_subdir_hint_to_multimodal,
|
||||
)
|
||||
from tools.terminal_tool import (
|
||||
_get_approval_callback,
|
||||
_get_sudo_password_callback,
|
||||
set_approval_callback as _set_approval_callback,
|
||||
set_sudo_password_callback as _set_sudo_password_callback,
|
||||
get_active_env,
|
||||
)
|
||||
from tools.tool_result_storage import (
|
||||
maybe_persist_tool_result,
|
||||
enforce_turn_budget,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Maximum number of concurrent worker threads for parallel tool execution.
|
||||
# Mirrors the constant in ``run_agent`` for tests/imports that look here.
|
||||
_MAX_TOOL_WORKERS = 8
|
||||
|
||||
|
||||
def _ra():
|
||||
"""Lazy reference to ``run_agent`` so patches like ``run_agent._set_interrupt`` work."""
|
||||
import run_agent
|
||||
return run_agent
|
||||
|
||||
|
||||
def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
|
||||
"""Execute multiple tool calls concurrently using a thread pool.
|
||||
|
||||
Results are collected in the original tool-call order and appended to
|
||||
messages so the API sees them in the expected sequence.
|
||||
"""
|
||||
tool_calls = assistant_message.tool_calls
|
||||
num_tools = len(tool_calls)
|
||||
|
||||
# ── Pre-flight: interrupt check ──────────────────────────────────
|
||||
if agent._interrupt_requested:
|
||||
print(f"{agent.log_prefix}⚡ Interrupt: skipping {num_tools} tool call(s)")
|
||||
for tc in tool_calls:
|
||||
messages.append({
|
||||
"role": "tool",
|
||||
"name": tc.function.name,
|
||||
"content": f"[Tool execution cancelled — {tc.function.name} was skipped due to user interrupt]",
|
||||
"tool_call_id": tc.id,
|
||||
})
|
||||
return
|
||||
|
||||
# ── Parse args + pre-execution bookkeeping ───────────────────────
|
||||
parsed_calls = [] # list of (tool_call, function_name, function_args)
|
||||
for tool_call in tool_calls:
|
||||
function_name = tool_call.function.name
|
||||
|
||||
# Reset nudge counters
|
||||
if function_name == "memory":
|
||||
agent._turns_since_memory = 0
|
||||
elif function_name == "skill_manage":
|
||||
agent._iters_since_skill = 0
|
||||
|
||||
try:
|
||||
function_args = json.loads(tool_call.function.arguments)
|
||||
except json.JSONDecodeError:
|
||||
function_args = {}
|
||||
if not isinstance(function_args, dict):
|
||||
function_args = {}
|
||||
|
||||
# Checkpoint for file-mutating tools
|
||||
if function_name in {"write_file", "patch"} and agent._checkpoint_mgr.enabled:
|
||||
try:
|
||||
file_path = function_args.get("path", "")
|
||||
if file_path:
|
||||
work_dir = agent._checkpoint_mgr.get_working_dir_for_path(file_path)
|
||||
agent._checkpoint_mgr.ensure_checkpoint(work_dir, f"before {function_name}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Checkpoint before destructive terminal commands
|
||||
if function_name == "terminal" and agent._checkpoint_mgr.enabled:
|
||||
try:
|
||||
cmd = function_args.get("command", "")
|
||||
if _is_destructive_command(cmd):
|
||||
cwd = function_args.get("workdir") or os.getenv("TERMINAL_CWD", os.getcwd())
|
||||
agent._checkpoint_mgr.ensure_checkpoint(
|
||||
cwd, f"before terminal: {cmd[:60]}"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
block_result = None
|
||||
blocked_by_guardrail = False
|
||||
try:
|
||||
from hermes_cli.plugins import get_pre_tool_call_block_message
|
||||
block_message = get_pre_tool_call_block_message(
|
||||
function_name, function_args, task_id=effective_task_id or "",
|
||||
)
|
||||
except Exception:
|
||||
block_message = None
|
||||
|
||||
if block_message is not None:
|
||||
block_result = json.dumps({"error": block_message}, ensure_ascii=False)
|
||||
else:
|
||||
guardrail_decision = agent._tool_guardrails.before_call(function_name, function_args)
|
||||
if not guardrail_decision.allows_execution:
|
||||
block_result = agent._guardrail_block_result(guardrail_decision)
|
||||
blocked_by_guardrail = True
|
||||
|
||||
parsed_calls.append((tool_call, function_name, function_args, block_result, blocked_by_guardrail))
|
||||
|
||||
# ── Logging / callbacks ──────────────────────────────────────────
|
||||
tool_names_str = ", ".join(name for _, name, _, _, _ in parsed_calls)
|
||||
if not agent.quiet_mode:
|
||||
print(f" ⚡ Concurrent: {num_tools} tool calls — {tool_names_str}")
|
||||
for i, (tc, name, args, block_result, blocked_by_guardrail) in enumerate(parsed_calls, 1):
|
||||
args_str = json.dumps(args, ensure_ascii=False)
|
||||
if agent.verbose_logging:
|
||||
print(f" 📞 Tool {i}: {name}({list(args.keys())})")
|
||||
print(agent._wrap_verbose("Args: ", json.dumps(args, indent=2, ensure_ascii=False)))
|
||||
else:
|
||||
args_preview = args_str[:agent.log_prefix_chars] + "..." if len(args_str) > agent.log_prefix_chars else args_str
|
||||
print(f" 📞 Tool {i}: {name}({list(args.keys())}) - {args_preview}")
|
||||
|
||||
for tc, name, args, block_result, blocked_by_guardrail in parsed_calls:
|
||||
if block_result is not None:
|
||||
continue
|
||||
if agent.tool_progress_callback:
|
||||
try:
|
||||
preview = _build_tool_preview(name, args)
|
||||
agent.tool_progress_callback("tool.started", name, preview, args)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool progress callback error: {cb_err}")
|
||||
|
||||
for tc, name, args, block_result, blocked_by_guardrail in parsed_calls:
|
||||
if block_result is not None:
|
||||
continue
|
||||
if agent.tool_start_callback:
|
||||
try:
|
||||
agent.tool_start_callback(tc.id, name, args)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool start callback error: {cb_err}")
|
||||
|
||||
# ── Concurrent execution ─────────────────────────────────────────
|
||||
# Each slot holds (function_name, function_args, function_result, duration, error_flag, blocked_flag)
|
||||
results = [None] * num_tools
|
||||
for i, (tc, name, args, block_result, blocked_by_guardrail) in enumerate(parsed_calls):
|
||||
if block_result is not None:
|
||||
results[i] = (name, args, block_result, 0.0, True, True)
|
||||
|
||||
# Touch activity before launching workers so the gateway knows
|
||||
# we're executing tools (not stuck).
|
||||
agent._current_tool = tool_names_str
|
||||
agent._touch_activity(f"executing {num_tools} tools concurrently: {tool_names_str}")
|
||||
|
||||
# Capture CLI callbacks from the agent thread so worker threads can
|
||||
# register them locally. Without this, _get_approval_callback() in
|
||||
# terminal_tool returns None in ThreadPoolExecutor workers, causing
|
||||
# the dangerous-command prompt to fall back to input() — which
|
||||
# deadlocks against prompt_toolkit's raw terminal mode (#13617).
|
||||
_parent_approval_cb = _get_approval_callback()
|
||||
_parent_sudo_cb = _get_sudo_password_callback()
|
||||
|
||||
def _run_tool(index, tool_call, function_name, function_args):
|
||||
"""Worker function executed in a thread."""
|
||||
# Register this worker tid so the agent can fan out an interrupt
|
||||
# to it — see AIAgent.interrupt(). Must happen first thing, and
|
||||
# must be paired with discard + clear in the finally block.
|
||||
_worker_tid = threading.current_thread().ident
|
||||
with agent._tool_worker_threads_lock:
|
||||
agent._tool_worker_threads.add(_worker_tid)
|
||||
# Race: if the agent was interrupted between fan-out (which
|
||||
# snapshotted an empty/earlier set) and our registration, apply
|
||||
# the interrupt to our own tid now so is_interrupted() inside
|
||||
# the tool returns True on the next poll.
|
||||
if agent._interrupt_requested:
|
||||
try:
|
||||
_ra()._set_interrupt(True, _worker_tid)
|
||||
except Exception:
|
||||
pass
|
||||
# Set the activity callback on THIS worker thread so
|
||||
# _wait_for_process (terminal commands) can fire heartbeats.
|
||||
# The callback is thread-local; the main thread's callback
|
||||
# is invisible to worker threads.
|
||||
try:
|
||||
from tools.environments.base import set_activity_callback
|
||||
set_activity_callback(agent._touch_activity)
|
||||
except Exception:
|
||||
pass
|
||||
# Propagate approval/sudo callbacks to this worker thread.
|
||||
# Mirrors cli.py run_agent() pattern (GHSA-qg5c-hvr5-hjgr).
|
||||
if _parent_approval_cb is not None:
|
||||
try:
|
||||
_set_approval_callback(_parent_approval_cb)
|
||||
except Exception:
|
||||
pass
|
||||
if _parent_sudo_cb is not None:
|
||||
try:
|
||||
_set_sudo_password_callback(_parent_sudo_cb)
|
||||
except Exception:
|
||||
pass
|
||||
start = time.time()
|
||||
try:
|
||||
result = agent._invoke_tool(
|
||||
function_name,
|
||||
function_args,
|
||||
effective_task_id,
|
||||
tool_call.id,
|
||||
messages=messages,
|
||||
pre_tool_block_checked=True,
|
||||
)
|
||||
except Exception as tool_error:
|
||||
result = f"Error executing tool '{function_name}': {tool_error}"
|
||||
logger.error("_invoke_tool raised for %s: %s", function_name, tool_error, exc_info=True)
|
||||
duration = time.time() - start
|
||||
is_error, _ = _detect_tool_failure(function_name, result)
|
||||
if is_error:
|
||||
logger.info("tool %s failed (%.2fs): %s", function_name, duration, result[:200])
|
||||
else:
|
||||
logger.info("tool %s completed (%.2fs, %d chars)", function_name, duration, len(result))
|
||||
results[index] = (function_name, function_args, result, duration, is_error, False)
|
||||
# Tear down worker-tid tracking. Clear any interrupt bit we may
|
||||
# have set so the next task scheduled onto this recycled tid
|
||||
# starts with a clean slate.
|
||||
with agent._tool_worker_threads_lock:
|
||||
agent._tool_worker_threads.discard(_worker_tid)
|
||||
try:
|
||||
_ra()._set_interrupt(False, _worker_tid)
|
||||
except Exception:
|
||||
pass
|
||||
# Clear thread-local callbacks so a recycled worker thread
|
||||
# doesn't hold stale references to a disposed CLI instance.
|
||||
try:
|
||||
_set_approval_callback(None)
|
||||
_set_sudo_password_callback(None)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Start spinner for CLI mode (skip when TUI handles tool progress)
|
||||
spinner = None
|
||||
if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
|
||||
face = random.choice(KawaiiSpinner.get_waiting_faces())
|
||||
spinner = KawaiiSpinner(f"{face} ⚡ running {num_tools} tools concurrently", spinner_type='dots', print_fn=agent._print_fn)
|
||||
spinner.start()
|
||||
|
||||
try:
|
||||
runnable_calls = [
|
||||
(i, tc, name, args)
|
||||
for i, (tc, name, args, block_result, blocked_by_guardrail) in enumerate(parsed_calls)
|
||||
if block_result is None
|
||||
]
|
||||
futures = []
|
||||
if runnable_calls:
|
||||
max_workers = min(len(runnable_calls), _MAX_TOOL_WORKERS)
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
for i, tc, name, args in runnable_calls:
|
||||
# Propagate ContextVars (e.g. _approval_session_key); mirrors asyncio.to_thread.
|
||||
ctx = contextvars.copy_context()
|
||||
f = executor.submit(ctx.run, _run_tool, i, tc, name, args)
|
||||
futures.append(f)
|
||||
|
||||
# Wait for all to complete with periodic heartbeats so the
|
||||
# gateway's inactivity monitor doesn't kill us during long
|
||||
# concurrent tool batches. Also check for user interrupts
|
||||
# so we don't block indefinitely when the user sends /stop
|
||||
# or a new message during concurrent tool execution.
|
||||
_conc_start = time.time()
|
||||
_interrupt_logged = False
|
||||
while True:
|
||||
done, not_done = concurrent.futures.wait(
|
||||
futures, timeout=5.0,
|
||||
)
|
||||
if not not_done:
|
||||
break
|
||||
|
||||
# Check for interrupt — the per-thread interrupt signal
|
||||
# already causes individual tools (terminal, execute_code)
|
||||
# to abort, but tools without interrupt checks (web_search,
|
||||
# read_file) will run to completion. Cancel any futures
|
||||
# that haven't started yet so we don't block on them.
|
||||
if agent._interrupt_requested:
|
||||
if not _interrupt_logged:
|
||||
_interrupt_logged = True
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix}⚡ Interrupt: cancelling "
|
||||
f"{len(not_done)} pending concurrent tool(s)",
|
||||
force=True,
|
||||
)
|
||||
for f in not_done:
|
||||
f.cancel()
|
||||
# Give already-running tools a moment to notice the
|
||||
# per-thread interrupt signal and exit gracefully.
|
||||
concurrent.futures.wait(not_done, timeout=3.0)
|
||||
break
|
||||
|
||||
_conc_elapsed = int(time.time() - _conc_start)
|
||||
# Heartbeat every ~30s (6 × 5s poll intervals)
|
||||
if _conc_elapsed > 0 and _conc_elapsed % 30 < 6:
|
||||
_still_running = [
|
||||
parsed_calls[futures.index(f)][1]
|
||||
for f in not_done
|
||||
if f in futures
|
||||
]
|
||||
agent._touch_activity(
|
||||
f"concurrent tools running ({_conc_elapsed}s, "
|
||||
f"{len(not_done)} remaining: {', '.join(_still_running[:3])})"
|
||||
)
|
||||
finally:
|
||||
if spinner:
|
||||
# Build a summary message for the spinner stop
|
||||
completed = sum(1 for r in results if r is not None)
|
||||
total_dur = sum(r[3] for r in results if r is not None)
|
||||
spinner.stop(f"⚡ {completed}/{num_tools} tools completed in {total_dur:.1f}s total")
|
||||
|
||||
# ── Post-execution: display per-tool results ─────────────────────
|
||||
for i, (tc, name, args, block_result, blocked_by_guardrail) in enumerate(parsed_calls):
|
||||
r = results[i]
|
||||
blocked = False
|
||||
if r is None:
|
||||
# Tool was cancelled (interrupt) or thread didn't return
|
||||
if agent._interrupt_requested:
|
||||
function_result = f"[Tool execution cancelled — {name} was skipped due to user interrupt]"
|
||||
else:
|
||||
function_result = f"Error executing tool '{name}': thread did not return a result"
|
||||
tool_duration = 0.0
|
||||
else:
|
||||
function_name, function_args, function_result, tool_duration, is_error, blocked = r
|
||||
|
||||
if not blocked:
|
||||
function_result = agent._append_guardrail_observation(
|
||||
function_name,
|
||||
function_args,
|
||||
function_result,
|
||||
failed=is_error,
|
||||
)
|
||||
|
||||
if is_error:
|
||||
_err_text = _multimodal_text_summary(function_result)
|
||||
result_preview = _err_text[:200] if len(_err_text) > 200 else _err_text
|
||||
logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
|
||||
|
||||
# Track file-mutation outcome for the turn-end verifier.
|
||||
# `blocked` calls never actually ran — don't let a guardrail
|
||||
# block count as either a failure or a success.
|
||||
if not blocked:
|
||||
try:
|
||||
agent._record_file_mutation_result(
|
||||
function_name, function_args, function_result, is_error,
|
||||
)
|
||||
except Exception as _ver_err:
|
||||
logging.debug("file-mutation verifier record failed: %s", _ver_err)
|
||||
|
||||
if not blocked and agent.tool_progress_callback:
|
||||
try:
|
||||
agent.tool_progress_callback(
|
||||
"tool.completed", function_name, None, None,
|
||||
duration=tool_duration, is_error=is_error,
|
||||
)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool progress callback error: {cb_err}")
|
||||
|
||||
if agent.verbose_logging:
|
||||
logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s")
|
||||
logging.debug(f"Tool result ({len(function_result)} chars): {function_result}")
|
||||
|
||||
# Print cute message per tool
|
||||
if agent._should_emit_quiet_tool_messages():
|
||||
cute_msg = _get_cute_tool_message_impl(name, args, tool_duration, result=function_result)
|
||||
agent._safe_print(f" {cute_msg}")
|
||||
elif not agent.quiet_mode:
|
||||
_preview_str = _multimodal_text_summary(function_result)
|
||||
if agent.verbose_logging:
|
||||
print(f" ✅ Tool {i+1} completed in {tool_duration:.2f}s")
|
||||
print(agent._wrap_verbose("Result: ", _preview_str))
|
||||
else:
|
||||
response_preview = _preview_str[:agent.log_prefix_chars] + "..." if len(_preview_str) > agent.log_prefix_chars else _preview_str
|
||||
print(f" ✅ Tool {i+1} completed in {tool_duration:.2f}s - {response_preview}")
|
||||
|
||||
agent._current_tool = None
|
||||
agent._touch_activity(f"tool completed: {name} ({tool_duration:.1f}s)")
|
||||
|
||||
if not blocked and agent.tool_complete_callback:
|
||||
try:
|
||||
agent.tool_complete_callback(tc.id, name, args, function_result)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool complete callback error: {cb_err}")
|
||||
|
||||
function_result = maybe_persist_tool_result(
|
||||
content=function_result,
|
||||
tool_name=name,
|
||||
tool_use_id=tc.id,
|
||||
env=get_active_env(effective_task_id),
|
||||
) if not _is_multimodal_tool_result(function_result) else function_result
|
||||
|
||||
subdir_hints = agent._subdirectory_hints.check_tool_call(name, args)
|
||||
if subdir_hints:
|
||||
if _is_multimodal_tool_result(function_result):
|
||||
# Append the hint to the text summary part so the model
|
||||
# still sees it; don't touch the image blocks.
|
||||
_append_subdir_hint_to_multimodal(function_result, subdir_hints)
|
||||
else:
|
||||
function_result += subdir_hints
|
||||
|
||||
# Unwrap _multimodal dicts to an OpenAI-style content list so any
|
||||
# vision-capable provider receives [{type:text},{type:image_url}]
|
||||
# rather than a raw Python dict. The Anthropic adapter already
|
||||
# accepts content lists; vision-capable OpenAI-compatible servers
|
||||
# (mlx-vlm, GPT-4o, …) accept image_url in tool messages natively.
|
||||
# Text-only servers get a string-safe fallback here so a rejected
|
||||
# image tool result never poisons canonical session history.
|
||||
# String results pass through unchanged.
|
||||
_tool_content = agent._tool_result_content_for_active_model(name, function_result)
|
||||
tool_msg = {
|
||||
"role": "tool",
|
||||
"name": name,
|
||||
"content": _tool_content,
|
||||
"tool_call_id": tc.id,
|
||||
}
|
||||
messages.append(tool_msg)
|
||||
|
||||
# ── Per-tool /steer drain ───────────────────────────────────
|
||||
# Same as the sequential path: drain between each collected
|
||||
# result so the steer lands as early as possible.
|
||||
agent._apply_pending_steer_to_tool_results(messages, 1)
|
||||
|
||||
# ── Per-turn aggregate budget enforcement ─────────────────────────
|
||||
num_tools = len(parsed_calls)
|
||||
if num_tools > 0:
|
||||
turn_tool_msgs = messages[-num_tools:]
|
||||
enforce_turn_budget(turn_tool_msgs, env=get_active_env(effective_task_id))
|
||||
|
||||
# ── /steer injection ──────────────────────────────────────────────
|
||||
# Append any pending user steer text to the last tool result so the
|
||||
# agent sees it on its next iteration. Runs AFTER budget enforcement
|
||||
# so the steer marker is never truncated. See steer() for details.
|
||||
if num_tools > 0:
|
||||
agent._apply_pending_steer_to_tool_results(messages, num_tools)
|
||||
|
||||
|
||||
|
||||
def execute_tool_calls_sequential(agent, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
|
||||
"""Execute tool calls sequentially (original behavior). Used for single calls or interactive tools."""
|
||||
for i, tool_call in enumerate(assistant_message.tool_calls, 1):
|
||||
# SAFETY: check interrupt BEFORE starting each tool.
|
||||
# If the user sent "stop" during a previous tool's execution,
|
||||
# do NOT start any more tools -- skip them all immediately.
|
||||
if agent._interrupt_requested:
|
||||
remaining_calls = assistant_message.tool_calls[i-1:]
|
||||
if remaining_calls:
|
||||
agent._vprint(f"{agent.log_prefix}⚡ Interrupt: skipping {len(remaining_calls)} tool call(s)", force=True)
|
||||
for skipped_tc in remaining_calls:
|
||||
skipped_name = skipped_tc.function.name
|
||||
skip_msg = {
|
||||
"role": "tool",
|
||||
"name": skipped_name,
|
||||
"content": f"[Tool execution cancelled — {skipped_name} was skipped due to user interrupt]",
|
||||
"tool_call_id": skipped_tc.id,
|
||||
}
|
||||
messages.append(skip_msg)
|
||||
break
|
||||
|
||||
function_name = tool_call.function.name
|
||||
|
||||
try:
|
||||
function_args = json.loads(tool_call.function.arguments)
|
||||
except json.JSONDecodeError as e:
|
||||
logging.warning(f"Unexpected JSON error after validation: {e}")
|
||||
function_args = {}
|
||||
if not isinstance(function_args, dict):
|
||||
function_args = {}
|
||||
|
||||
# Check plugin hooks for a block directive before executing.
|
||||
_block_msg: Optional[str] = None
|
||||
try:
|
||||
from hermes_cli.plugins import get_pre_tool_call_block_message
|
||||
_block_msg = get_pre_tool_call_block_message(
|
||||
function_name, function_args, task_id=effective_task_id or "",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_guardrail_block_decision: ToolGuardrailDecision | None = None
|
||||
if _block_msg is None:
|
||||
guardrail_decision = agent._tool_guardrails.before_call(function_name, function_args)
|
||||
if not guardrail_decision.allows_execution:
|
||||
_guardrail_block_decision = guardrail_decision
|
||||
|
||||
_execution_blocked = _block_msg is not None or _guardrail_block_decision is not None
|
||||
|
||||
if _execution_blocked:
|
||||
# Tool blocked by plugin or guardrail policy — skip counters,
|
||||
# callbacks, checkpointing, activity mutation, and real execution.
|
||||
pass
|
||||
# Reset nudge counters when the relevant tool is actually used
|
||||
elif function_name == "memory":
|
||||
agent._turns_since_memory = 0
|
||||
elif function_name == "skill_manage":
|
||||
agent._iters_since_skill = 0
|
||||
|
||||
if not agent.quiet_mode:
|
||||
args_str = json.dumps(function_args, ensure_ascii=False)
|
||||
if agent.verbose_logging:
|
||||
print(f" 📞 Tool {i}: {function_name}({list(function_args.keys())})")
|
||||
print(agent._wrap_verbose("Args: ", json.dumps(function_args, indent=2, ensure_ascii=False)))
|
||||
else:
|
||||
args_preview = args_str[:agent.log_prefix_chars] + "..." if len(args_str) > agent.log_prefix_chars else args_str
|
||||
print(f" 📞 Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}")
|
||||
|
||||
if not _execution_blocked:
|
||||
agent._current_tool = function_name
|
||||
agent._touch_activity(f"executing tool: {function_name}")
|
||||
|
||||
# Set activity callback for long-running tool execution (terminal
|
||||
# commands, etc.) so the gateway's inactivity monitor doesn't kill
|
||||
# the agent while a command is running.
|
||||
if not _execution_blocked:
|
||||
try:
|
||||
from tools.environments.base import set_activity_callback
|
||||
set_activity_callback(agent._touch_activity)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not _execution_blocked and agent.tool_progress_callback:
|
||||
try:
|
||||
preview = _build_tool_preview(function_name, function_args)
|
||||
agent.tool_progress_callback("tool.started", function_name, preview, function_args)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool progress callback error: {cb_err}")
|
||||
|
||||
if not _execution_blocked and agent.tool_start_callback:
|
||||
try:
|
||||
agent.tool_start_callback(tool_call.id, function_name, function_args)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool start callback error: {cb_err}")
|
||||
|
||||
# Checkpoint: snapshot working dir before file-mutating tools
|
||||
if not _execution_blocked and function_name in {"write_file", "patch"} and agent._checkpoint_mgr.enabled:
|
||||
try:
|
||||
file_path = function_args.get("path", "")
|
||||
if file_path:
|
||||
work_dir = agent._checkpoint_mgr.get_working_dir_for_path(file_path)
|
||||
agent._checkpoint_mgr.ensure_checkpoint(
|
||||
work_dir, f"before {function_name}"
|
||||
)
|
||||
except Exception:
|
||||
pass # never block tool execution
|
||||
|
||||
# Checkpoint before destructive terminal commands
|
||||
if not _execution_blocked and function_name == "terminal" and agent._checkpoint_mgr.enabled:
|
||||
try:
|
||||
cmd = function_args.get("command", "")
|
||||
if _is_destructive_command(cmd):
|
||||
cwd = function_args.get("workdir") or os.getenv("TERMINAL_CWD", os.getcwd())
|
||||
agent._checkpoint_mgr.ensure_checkpoint(
|
||||
cwd, f"before terminal: {cmd[:60]}"
|
||||
)
|
||||
except Exception:
|
||||
pass # never block tool execution
|
||||
|
||||
tool_start_time = time.time()
|
||||
|
||||
if _block_msg is not None:
|
||||
# Tool blocked by plugin policy — return error without executing.
|
||||
function_result = json.dumps({"error": _block_msg}, ensure_ascii=False)
|
||||
tool_duration = 0.0
|
||||
elif _guardrail_block_decision is not None:
|
||||
# Tool blocked by tool-loop guardrail — synthesize exactly one
|
||||
# tool result for the original tool_call_id without executing.
|
||||
function_result = agent._guardrail_block_result(_guardrail_block_decision)
|
||||
tool_duration = 0.0
|
||||
elif function_name == "todo":
|
||||
from tools.todo_tool import todo_tool as _todo_tool
|
||||
function_result = _todo_tool(
|
||||
todos=function_args.get("todos"),
|
||||
merge=function_args.get("merge", False),
|
||||
store=agent._todo_store,
|
||||
)
|
||||
tool_duration = time.time() - tool_start_time
|
||||
if agent._should_emit_quiet_tool_messages():
|
||||
agent._vprint(f" {_get_cute_tool_message_impl('todo', function_args, tool_duration, result=function_result)}")
|
||||
elif function_name == "session_search":
|
||||
session_db = agent._get_session_db_for_recall()
|
||||
if not session_db:
|
||||
from hermes_state import format_session_db_unavailable
|
||||
function_result = json.dumps({"success": False, "error": format_session_db_unavailable()})
|
||||
else:
|
||||
from tools.session_search_tool import session_search as _session_search
|
||||
function_result = _session_search(
|
||||
query=function_args.get("query", ""),
|
||||
role_filter=function_args.get("role_filter"),
|
||||
limit=function_args.get("limit", 3),
|
||||
db=session_db,
|
||||
current_session_id=agent.session_id,
|
||||
)
|
||||
tool_duration = time.time() - tool_start_time
|
||||
if agent._should_emit_quiet_tool_messages():
|
||||
agent._vprint(f" {_get_cute_tool_message_impl('session_search', function_args, tool_duration, result=function_result)}")
|
||||
elif function_name == "memory":
|
||||
target = function_args.get("target", "memory")
|
||||
from tools.memory_tool import memory_tool as _memory_tool
|
||||
function_result = _memory_tool(
|
||||
action=function_args.get("action"),
|
||||
target=target,
|
||||
content=function_args.get("content"),
|
||||
old_text=function_args.get("old_text"),
|
||||
store=agent._memory_store,
|
||||
)
|
||||
# Bridge: notify external memory provider of built-in memory writes
|
||||
if agent._memory_manager and function_args.get("action") in {"add", "replace"}:
|
||||
try:
|
||||
agent._memory_manager.on_memory_write(
|
||||
function_args.get("action", ""),
|
||||
target,
|
||||
function_args.get("content", ""),
|
||||
metadata=agent._build_memory_write_metadata(
|
||||
task_id=effective_task_id,
|
||||
tool_call_id=getattr(tool_call, "id", None),
|
||||
),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
tool_duration = time.time() - tool_start_time
|
||||
if agent._should_emit_quiet_tool_messages():
|
||||
agent._vprint(f" {_get_cute_tool_message_impl('memory', function_args, tool_duration, result=function_result)}")
|
||||
elif function_name == "clarify":
|
||||
from tools.clarify_tool import clarify_tool as _clarify_tool
|
||||
function_result = _clarify_tool(
|
||||
question=function_args.get("question", ""),
|
||||
choices=function_args.get("choices"),
|
||||
callback=agent.clarify_callback,
|
||||
)
|
||||
tool_duration = time.time() - tool_start_time
|
||||
if agent._should_emit_quiet_tool_messages():
|
||||
agent._vprint(f" {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}")
|
||||
elif function_name == "delegate_task":
|
||||
tasks_arg = function_args.get("tasks")
|
||||
if tasks_arg and isinstance(tasks_arg, list):
|
||||
spinner_label = f"🔀 delegating {len(tasks_arg)} tasks"
|
||||
else:
|
||||
goal_preview = (function_args.get("goal") or "")[:30]
|
||||
spinner_label = f"🔀 {goal_preview}" if goal_preview else "🔀 delegating"
|
||||
spinner = None
|
||||
if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
|
||||
face = random.choice(KawaiiSpinner.get_waiting_faces())
|
||||
spinner = KawaiiSpinner(f"{face} {spinner_label}", spinner_type='dots', print_fn=agent._print_fn)
|
||||
spinner.start()
|
||||
agent._delegate_spinner = spinner
|
||||
_delegate_result = None
|
||||
try:
|
||||
function_result = agent._dispatch_delegate_task(function_args)
|
||||
_delegate_result = function_result
|
||||
finally:
|
||||
agent._delegate_spinner = None
|
||||
tool_duration = time.time() - tool_start_time
|
||||
cute_msg = _get_cute_tool_message_impl('delegate_task', function_args, tool_duration, result=_delegate_result)
|
||||
if spinner:
|
||||
spinner.stop(cute_msg)
|
||||
elif agent._should_emit_quiet_tool_messages():
|
||||
agent._vprint(f" {cute_msg}")
|
||||
elif agent._context_engine_tool_names and function_name in agent._context_engine_tool_names:
|
||||
# Context engine tools (lcm_grep, lcm_describe, lcm_expand, etc.)
|
||||
spinner = None
|
||||
if agent._should_emit_quiet_tool_messages():
|
||||
face = random.choice(KawaiiSpinner.get_waiting_faces())
|
||||
emoji = _get_tool_emoji(function_name)
|
||||
preview = _build_tool_preview(function_name, function_args) or function_name
|
||||
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
|
||||
spinner.start()
|
||||
_ce_result = None
|
||||
try:
|
||||
function_result = agent.context_compressor.handle_tool_call(function_name, function_args, messages=messages)
|
||||
_ce_result = function_result
|
||||
except Exception as tool_error:
|
||||
function_result = json.dumps({"error": f"Context engine tool '{function_name}' failed: {tool_error}"})
|
||||
logger.error("context_engine.handle_tool_call raised for %s: %s", function_name, tool_error, exc_info=True)
|
||||
finally:
|
||||
tool_duration = time.time() - tool_start_time
|
||||
cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_ce_result)
|
||||
if spinner:
|
||||
spinner.stop(cute_msg)
|
||||
elif agent._should_emit_quiet_tool_messages():
|
||||
agent._vprint(f" {cute_msg}")
|
||||
elif agent._memory_manager and agent._memory_manager.has_tool(function_name):
|
||||
# Memory provider tools (hindsight_retain, honcho_search, etc.)
|
||||
# These are not in the tool registry — route through MemoryManager.
|
||||
spinner = None
|
||||
if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
|
||||
face = random.choice(KawaiiSpinner.get_waiting_faces())
|
||||
emoji = _get_tool_emoji(function_name)
|
||||
preview = _build_tool_preview(function_name, function_args) or function_name
|
||||
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
|
||||
spinner.start()
|
||||
_mem_result = None
|
||||
try:
|
||||
function_result = agent._memory_manager.handle_tool_call(function_name, function_args)
|
||||
_mem_result = function_result
|
||||
except Exception as tool_error:
|
||||
function_result = json.dumps({"error": f"Memory tool '{function_name}' failed: {tool_error}"})
|
||||
logger.error("memory_manager.handle_tool_call raised for %s: %s", function_name, tool_error, exc_info=True)
|
||||
finally:
|
||||
tool_duration = time.time() - tool_start_time
|
||||
cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_mem_result)
|
||||
if spinner:
|
||||
spinner.stop(cute_msg)
|
||||
elif agent._should_emit_quiet_tool_messages():
|
||||
agent._vprint(f" {cute_msg}")
|
||||
elif agent.quiet_mode:
|
||||
spinner = None
|
||||
if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
|
||||
face = random.choice(KawaiiSpinner.get_waiting_faces())
|
||||
emoji = _get_tool_emoji(function_name)
|
||||
preview = _build_tool_preview(function_name, function_args) or function_name
|
||||
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
|
||||
spinner.start()
|
||||
_spinner_result = None
|
||||
try:
|
||||
function_result = _ra().handle_function_call(
|
||||
function_name, function_args, effective_task_id,
|
||||
tool_call_id=tool_call.id,
|
||||
session_id=agent.session_id or "",
|
||||
enabled_tools=list(agent.valid_tool_names) if agent.valid_tool_names else None,
|
||||
skip_pre_tool_call_hook=True,
|
||||
)
|
||||
_spinner_result = function_result
|
||||
except Exception as tool_error:
|
||||
function_result = f"Error executing tool '{function_name}': {tool_error}"
|
||||
logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
|
||||
finally:
|
||||
tool_duration = time.time() - tool_start_time
|
||||
cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_spinner_result)
|
||||
if spinner:
|
||||
spinner.stop(cute_msg)
|
||||
elif agent._should_emit_quiet_tool_messages():
|
||||
agent._vprint(f" {cute_msg}")
|
||||
else:
|
||||
try:
|
||||
function_result = _ra().handle_function_call(
|
||||
function_name, function_args, effective_task_id,
|
||||
tool_call_id=tool_call.id,
|
||||
session_id=agent.session_id or "",
|
||||
enabled_tools=list(agent.valid_tool_names) if agent.valid_tool_names else None,
|
||||
skip_pre_tool_call_hook=True,
|
||||
)
|
||||
except Exception as tool_error:
|
||||
function_result = f"Error executing tool '{function_name}': {tool_error}"
|
||||
logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
|
||||
tool_duration = time.time() - tool_start_time
|
||||
|
||||
if isinstance(function_result, str):
|
||||
result_preview = function_result if agent.verbose_logging else (
|
||||
function_result[:200] if len(function_result) > 200 else function_result
|
||||
)
|
||||
_result_len = len(function_result)
|
||||
else:
|
||||
# Multimodal dict result (_multimodal=True) — not sliceable as string
|
||||
result_preview = function_result
|
||||
_result_len = len(str(function_result))
|
||||
|
||||
# Log tool errors to the persistent error log so [error] tags
|
||||
# in the UI always have a corresponding detailed entry on disk.
|
||||
_is_error_result, _ = _detect_tool_failure(function_name, function_result)
|
||||
if not _execution_blocked:
|
||||
function_result = agent._append_guardrail_observation(
|
||||
function_name,
|
||||
function_args,
|
||||
function_result,
|
||||
failed=_is_error_result,
|
||||
)
|
||||
result_preview = function_result if agent.verbose_logging else (
|
||||
function_result[:200] if len(function_result) > 200 else function_result
|
||||
)
|
||||
if _is_error_result:
|
||||
logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
|
||||
else:
|
||||
logger.info("tool %s completed (%.2fs, %d chars)", function_name, tool_duration, _result_len)
|
||||
|
||||
# Track file-mutation outcome for the turn-end verifier. See
|
||||
# the concurrent path for the rationale; both paths must feed
|
||||
# the same state so the footer reflects every tool call in the
|
||||
# turn, not just the parallel ones.
|
||||
if not _execution_blocked:
|
||||
try:
|
||||
agent._record_file_mutation_result(
|
||||
function_name, function_args, function_result, _is_error_result,
|
||||
)
|
||||
except Exception as _ver_err:
|
||||
logging.debug("file-mutation verifier record failed: %s", _ver_err)
|
||||
|
||||
if not _execution_blocked and agent.tool_progress_callback:
|
||||
try:
|
||||
agent.tool_progress_callback(
|
||||
"tool.completed", function_name, None, None,
|
||||
duration=tool_duration, is_error=_is_error_result,
|
||||
)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool progress callback error: {cb_err}")
|
||||
|
||||
agent._current_tool = None
|
||||
agent._touch_activity(f"tool completed: {function_name} ({tool_duration:.1f}s)")
|
||||
|
||||
if agent.verbose_logging:
|
||||
logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s")
|
||||
_log_result = _multimodal_text_summary(function_result)
|
||||
logging.debug(f"Tool result ({len(_log_result)} chars): {_log_result}")
|
||||
|
||||
if not _execution_blocked and agent.tool_complete_callback:
|
||||
try:
|
||||
agent.tool_complete_callback(tool_call.id, function_name, function_args, function_result)
|
||||
except Exception as cb_err:
|
||||
logging.debug(f"Tool complete callback error: {cb_err}")
|
||||
|
||||
function_result = maybe_persist_tool_result(
|
||||
content=function_result,
|
||||
tool_name=function_name,
|
||||
tool_use_id=tool_call.id,
|
||||
env=get_active_env(effective_task_id),
|
||||
) if not _is_multimodal_tool_result(function_result) else function_result
|
||||
|
||||
# Discover subdirectory context files from tool arguments
|
||||
subdir_hints = agent._subdirectory_hints.check_tool_call(function_name, function_args)
|
||||
if subdir_hints:
|
||||
if _is_multimodal_tool_result(function_result):
|
||||
_append_subdir_hint_to_multimodal(function_result, subdir_hints)
|
||||
else:
|
||||
function_result += subdir_hints
|
||||
|
||||
# Unwrap _multimodal dicts to an OpenAI-style content list
|
||||
# (see parallel path for rationale). String results pass through.
|
||||
_tool_content = agent._tool_result_content_for_active_model(function_name, function_result)
|
||||
tool_msg = {
|
||||
"role": "tool",
|
||||
"name": function_name,
|
||||
"content": _tool_content,
|
||||
"tool_call_id": tool_call.id
|
||||
}
|
||||
messages.append(tool_msg)
|
||||
|
||||
# ── Per-tool /steer drain ───────────────────────────────────
|
||||
# Drain pending steer BETWEEN individual tool calls so the
|
||||
# injection lands as soon as a tool finishes — not after the
|
||||
# entire batch. The model sees it on the next API iteration.
|
||||
agent._apply_pending_steer_to_tool_results(messages, 1)
|
||||
|
||||
if not agent.quiet_mode:
|
||||
if agent.verbose_logging:
|
||||
print(f" ✅ Tool {i} completed in {tool_duration:.2f}s")
|
||||
print(agent._wrap_verbose("Result: ", function_result))
|
||||
else:
|
||||
_fr_str = function_result if isinstance(function_result, str) else str(function_result)
|
||||
response_preview = _fr_str[:agent.log_prefix_chars] + "..." if len(_fr_str) > agent.log_prefix_chars else _fr_str
|
||||
print(f" ✅ Tool {i} completed in {tool_duration:.2f}s - {response_preview}")
|
||||
|
||||
if agent._interrupt_requested and i < len(assistant_message.tool_calls):
|
||||
remaining = len(assistant_message.tool_calls) - i
|
||||
agent._vprint(f"{agent.log_prefix}⚡ Interrupt: skipping {remaining} remaining tool call(s)", force=True)
|
||||
for skipped_tc in assistant_message.tool_calls[i:]:
|
||||
skipped_name = skipped_tc.function.name
|
||||
skip_msg = {
|
||||
"role": "tool",
|
||||
"name": skipped_name,
|
||||
"content": f"[Tool execution skipped — {skipped_name} was not started. User sent a new message]",
|
||||
"tool_call_id": skipped_tc.id
|
||||
}
|
||||
messages.append(skip_msg)
|
||||
break
|
||||
|
||||
if agent.tool_delay > 0 and i < len(assistant_message.tool_calls):
|
||||
time.sleep(agent.tool_delay)
|
||||
|
||||
# ── Per-turn aggregate budget enforcement ─────────────────────────
|
||||
num_tools_seq = len(assistant_message.tool_calls)
|
||||
if num_tools_seq > 0:
|
||||
enforce_turn_budget(messages[-num_tools_seq:], env=get_active_env(effective_task_id))
|
||||
|
||||
# ── /steer injection ──────────────────────────────────────────────
|
||||
# See _execute_tool_calls_parallel for the rationale. Same hook,
|
||||
# applied to sequential execution as well.
|
||||
if num_tools_seq > 0:
|
||||
agent._apply_pending_steer_to_tool_results(messages, num_tools_seq)
|
||||
|
||||
|
||||
|
||||
|
||||
__all__ = [
|
||||
"execute_tool_calls_concurrent",
|
||||
"execute_tool_calls_sequential",
|
||||
]
|
||||
|
|
@ -74,12 +74,43 @@ class CodexAppServerClient:
|
|||
env: Optional[dict[str, str]] = None,
|
||||
) -> None:
|
||||
self._codex_bin = codex_bin
|
||||
cmd = [codex_bin, "app-server"] + list(extra_args or [])
|
||||
spawn_env = os.environ.copy()
|
||||
if env:
|
||||
spawn_env.update(env)
|
||||
if codex_home:
|
||||
spawn_env["CODEX_HOME"] = codex_home
|
||||
|
||||
app_server_args = list(extra_args or [])
|
||||
# Kanban workers must be able to write their handoff/status back to
|
||||
# the board DB, which lives outside the per-task workspace. Keep the
|
||||
# Codex sandbox on, but add the Kanban root as the only extra writable
|
||||
# root. Without this, codex-runtime workers finish their actual work
|
||||
# but crash/block when kanban_complete/kanban_block writes SQLite.
|
||||
if spawn_env.get("HERMES_KANBAN_TASK"):
|
||||
kanban_db = spawn_env.get("HERMES_KANBAN_DB")
|
||||
kanban_root = (
|
||||
os.path.dirname(kanban_db)
|
||||
if kanban_db
|
||||
else spawn_env.get(
|
||||
"HERMES_KANBAN_ROOT",
|
||||
os.path.join(
|
||||
spawn_env.get("HERMES_HOME", os.path.expanduser("~/.hermes")),
|
||||
"kanban",
|
||||
),
|
||||
)
|
||||
)
|
||||
app_server_args.extend(
|
||||
[
|
||||
"-c",
|
||||
'sandbox_mode="workspace-write"',
|
||||
"-c",
|
||||
f'sandbox_workspace_write.writable_roots=["{kanban_root}"]',
|
||||
"-c",
|
||||
"sandbox_workspace_write.network_access=false",
|
||||
]
|
||||
)
|
||||
|
||||
cmd = [codex_bin, "app-server"] + app_server_args
|
||||
# Codex emits tracing to stderr; default WARN keeps it quiet for users.
|
||||
spawn_env.setdefault("RUST_LOG", "warn")
|
||||
|
||||
|
|
|
|||
|
|
@ -404,7 +404,7 @@ class CodexAppServerSession:
|
|||
return result
|
||||
|
||||
result.turn_id = (ts.get("turn") or {}).get("id")
|
||||
deadline = time.time() + turn_timeout
|
||||
deadline = time.monotonic() + turn_timeout
|
||||
turn_complete = False
|
||||
# Post-tool watchdog state. last_tool_completion_at is set whenever
|
||||
# a tool-shaped item completes; if no further notification arrives
|
||||
|
|
@ -412,7 +412,7 @@ class CodexAppServerSession:
|
|||
# fast-fail and retire the session.
|
||||
last_tool_completion_at: Optional[float] = None
|
||||
|
||||
while time.time() < deadline and not turn_complete:
|
||||
while time.monotonic() < deadline and not turn_complete:
|
||||
if self._interrupt_event.is_set():
|
||||
self._issue_interrupt(result.turn_id)
|
||||
result.interrupted = True
|
||||
|
|
@ -440,7 +440,7 @@ class CodexAppServerSession:
|
|||
# up on this turn instead of waiting for the outer deadline.
|
||||
if (
|
||||
last_tool_completion_at is not None
|
||||
and (time.time() - last_tool_completion_at)
|
||||
and (time.monotonic() - last_tool_completion_at)
|
||||
> post_tool_quiet_timeout
|
||||
):
|
||||
self._issue_interrupt(result.turn_id)
|
||||
|
|
@ -471,7 +471,7 @@ class CodexAppServerSession:
|
|||
result.projected_messages.extend(proj.messages)
|
||||
if proj.is_tool_iteration:
|
||||
result.tool_iterations += 1
|
||||
last_tool_completion_at = time.time()
|
||||
last_tool_completion_at = time.monotonic()
|
||||
if proj.final_text is not None:
|
||||
result.final_text = proj.final_text
|
||||
if _has_turn_aborted_marker(proj.final_text):
|
||||
|
|
@ -514,7 +514,7 @@ class CodexAppServerSession:
|
|||
result.tool_iterations += 1
|
||||
# Arm/refresh the post-tool quiet watchdog whenever a
|
||||
# tool-shaped item completes.
|
||||
last_tool_completion_at = time.time()
|
||||
last_tool_completion_at = time.monotonic()
|
||||
else:
|
||||
# Any non-tool projected activity (assistant message,
|
||||
# status update, etc.) means codex is still producing
|
||||
|
|
@ -541,7 +541,7 @@ class CodexAppServerSession:
|
|||
turn_status = (
|
||||
(note.get("params") or {}).get("turn") or {}
|
||||
).get("status")
|
||||
if turn_status and turn_status not in ("completed", "interrupted"):
|
||||
if turn_status and turn_status not in {"completed", "interrupted"}:
|
||||
err_obj = (
|
||||
(note.get("params") or {}).get("turn") or {}
|
||||
).get("error")
|
||||
|
|
@ -775,9 +775,9 @@ def _approval_choice_to_codex_decision(choice: str) -> str:
|
|||
(verified against codex-rs/app-server-protocol/src/protocol/v2/item.rs
|
||||
on codex 0.130.0).
|
||||
"""
|
||||
if choice in ("once",):
|
||||
if choice in {"once",}:
|
||||
return "accept"
|
||||
if choice in ("session", "always"):
|
||||
if choice in {"session", "always"}:
|
||||
return "acceptForSession"
|
||||
return "decline"
|
||||
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import { Card } from "@/components/ui/card";
|
|||
import { ModelPickerDialog } from "@/components/ModelPickerDialog";
|
||||
import { ToolCall, type ToolEntry } from "@/components/ToolCall";
|
||||
import { GatewayClient, type ConnectionState } from "@/lib/gatewayClient";
|
||||
import { HERMES_BASE_PATH } from "@/lib/api";
|
||||
|
||||
import { cn } from "@/lib/utils";
|
||||
import { AlertCircle, ChevronDown, RefreshCw } from "lucide-react";
|
||||
|
|
@ -160,7 +161,7 @@ export function ChatSidebar({ channel, className }: ChatSidebarProps) {
|
|||
const proto = window.location.protocol === "https:" ? "wss:" : "ws:";
|
||||
const qs = new URLSearchParams({ token, channel });
|
||||
const ws = new WebSocket(
|
||||
`${proto}//${window.location.host}/api/events?${qs.toString()}`,
|
||||
`${proto}//${window.location.host}${HERMES_BASE_PATH}/api/events?${qs.toString()}`,
|
||||
);
|
||||
|
||||
// `unmounting` suppresses the banner during cleanup — `ws.close()`
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@ import {
|
|||
type GatewayEventName,
|
||||
} from "@hermes/shared";
|
||||
|
||||
import { HERMES_BASE_PATH } from "@/lib/api";
|
||||
|
||||
export type { ConnectionState, GatewayEvent, GatewayEventName };
|
||||
|
||||
/**
|
||||
|
|
@ -24,7 +26,7 @@ export class GatewayClient extends JsonRpcGatewayClient {
|
|||
|
||||
const scheme = location.protocol === "https:" ? "wss:" : "ws:";
|
||||
await super.connect(
|
||||
`${scheme}//${location.host}/api/ws?token=${encodeURIComponent(resolved)}`,
|
||||
`${scheme}//${location.host}${HERMES_BASE_PATH}/api/ws?token=${encodeURIComponent(resolved)}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ import { Terminal } from "@xterm/xterm";
|
|||
import "@xterm/xterm/css/xterm.css";
|
||||
import { Button } from "@nous-research/ui/ui/components/button";
|
||||
import { Typography } from "@/components/NouiTypography";
|
||||
import { HERMES_BASE_PATH } from "@/lib/api";
|
||||
import { cn } from "@/lib/utils";
|
||||
import { Copy, PanelRight, X } from "lucide-react";
|
||||
import { useCallback, useEffect, useMemo, useRef, useState } from "react";
|
||||
|
|
@ -44,7 +45,7 @@ function buildWsUrl(
|
|||
const proto = window.location.protocol === "https:" ? "wss:" : "ws:";
|
||||
const qs = new URLSearchParams({ token, channel });
|
||||
if (resume) qs.set("resume", resume);
|
||||
return `${proto}//${window.location.host}/api/pty?${qs.toString()}`;
|
||||
return `${proto}//${window.location.host}${HERMES_BASE_PATH}/api/pty?${qs.toString()}`;
|
||||
}
|
||||
|
||||
// Channel id ties this chat tab's PTY child (publisher) to its sidebar
|
||||
|
|
@ -286,6 +287,17 @@ export default function ChatPage({ isActive = true }: { isActive?: boolean }) {
|
|||
fontWeight: "400",
|
||||
fontWeightBold: "700",
|
||||
macOptionIsMeta: true,
|
||||
// Hold Option (Alt on Linux/Windows) to force native text selection
|
||||
// even when the inner Hermes TUI has enabled xterm mouse-events
|
||||
// mode (CSI ?1000h family). Without this, click-and-drag in the
|
||||
// chat canvas selects nothing and Cmd+C falls back to copying the
|
||||
// entire visible buffer, which is rarely what the user wants.
|
||||
// See #25720.
|
||||
macOptionClickForcesSelection: true,
|
||||
// Right-click selects the word under the pointer. xterm.js default
|
||||
// is false; enabling it gives users a single-action selection
|
||||
// path on top of the modifier-based bypass above.
|
||||
rightClickSelectsWord: true,
|
||||
// Single-scroll-system experiment:
|
||||
// let the inner Hermes TUI own transcript history/scroll behavior.
|
||||
// The outer browser xterm should act as a display/input bridge only.
|
||||
|
|
|
|||
43
cli.py
43
cli.py
|
|
@ -1396,7 +1396,7 @@ def _detect_light_mode() -> bool:
|
|||
last = cfgbg.split(";")[-1] if ";" in cfgbg else cfgbg
|
||||
if last.isdigit():
|
||||
bg = int(last)
|
||||
if bg in (7, 15):
|
||||
if bg in {7, 15}:
|
||||
result = True
|
||||
_LIGHT_MODE_CACHE = result
|
||||
return result
|
||||
|
|
@ -2412,6 +2412,7 @@ def _looks_like_slash_command(text: str) -> bool:
|
|||
|
||||
from agent.skill_commands import (
|
||||
scan_skill_commands,
|
||||
get_skill_commands,
|
||||
build_skill_invocation_message,
|
||||
build_preloaded_skills_prompt,
|
||||
)
|
||||
|
|
@ -2824,6 +2825,11 @@ class HermesCLI:
|
|||
# turn (which would make Ctrl+C feel like it did nothing).
|
||||
self._last_turn_interrupted = False
|
||||
self._should_exit = False
|
||||
# /exit --delete: when True, the current session's SQLite history and
|
||||
# on-disk transcripts are deleted during shutdown. Set by
|
||||
# process_command() when the user runs /exit --delete or /quit --delete.
|
||||
# Ported from google-gemini/gemini-cli#19332.
|
||||
self._delete_session_on_exit = False
|
||||
self._last_ctrl_c_time = 0
|
||||
self._clarify_state = None
|
||||
self._clarify_freetext = False
|
||||
|
|
@ -7653,6 +7659,16 @@ class HermesCLI:
|
|||
canonical = _cmd_def.name if _cmd_def else _base_word
|
||||
|
||||
if canonical in {"quit", "exit"}:
|
||||
# Parse --delete flag: /exit --delete also removes the current
|
||||
# session's transcripts + SQLite history. Ported from
|
||||
# google-gemini/gemini-cli#19332.
|
||||
_rest = cmd_original.split(None, 1)
|
||||
_args = (_rest[1] if len(_rest) > 1 else "").strip().lower()
|
||||
if _args in {"--delete", "-d"}:
|
||||
self._delete_session_on_exit = True
|
||||
elif _args:
|
||||
_cprint(f" {_DIM}✗ Unknown argument: {_escape(_args)}. Use /exit --delete to also remove session history.{_RST}")
|
||||
return True
|
||||
return False
|
||||
elif canonical == "help":
|
||||
self.show_help()
|
||||
|
|
@ -9598,12 +9614,18 @@ class HermesCLI:
|
|||
prompt caching intact.
|
||||
"""
|
||||
try:
|
||||
from agent.skill_commands import reload_skills
|
||||
from agent.skill_commands import reload_skills, get_skill_commands
|
||||
|
||||
if not self._command_running:
|
||||
print("🔄 Reloading skills...")
|
||||
|
||||
result = reload_skills()
|
||||
|
||||
# Sync cli.py's module-level _skill_commands so all consumers
|
||||
# (help display, command dispatch, Tab-completion lambda) see the
|
||||
# updated dict without needing to restart the session.
|
||||
global _skill_commands
|
||||
_skill_commands = get_skill_commands()
|
||||
added = result.get("added", []) # [{"name", "description"}, ...]
|
||||
removed = result.get("removed", []) # [{"name", "description"}, ...]
|
||||
total = result.get("total", 0)
|
||||
|
|
@ -12609,7 +12631,7 @@ class HermesCLI:
|
|||
|
||||
|
||||
_completer = SlashCommandCompleter(
|
||||
skill_commands_provider=lambda: _skill_commands,
|
||||
skill_commands_provider=lambda: get_skill_commands(),
|
||||
command_filter=cli_ref._command_available,
|
||||
)
|
||||
input_area = TextArea(
|
||||
|
|
@ -13777,7 +13799,7 @@ class HermesCLI:
|
|||
if _errno == errno.EIO:
|
||||
pass # suppress broken-stdout I/O errors on interrupt (#13710)
|
||||
elif (
|
||||
_errno in (errno.EINVAL, errno.EBADF)
|
||||
_errno in {errno.EINVAL, errno.EBADF}
|
||||
or "is not registered" in _msg
|
||||
or "Bad file descriptor" in _msg
|
||||
or "Invalid argument" in _msg
|
||||
|
|
@ -13824,6 +13846,19 @@ class HermesCLI:
|
|||
self._session_db.end_session(self.agent.session_id, "cli_close")
|
||||
except (Exception, KeyboardInterrupt) as e:
|
||||
logger.debug("Could not close session in DB: %s", e)
|
||||
# /exit --delete: also remove the current session's transcripts
|
||||
# and SQLite history. Ported from google-gemini/gemini-cli#19332.
|
||||
if getattr(self, '_delete_session_on_exit', False):
|
||||
try:
|
||||
from hermes_constants import get_hermes_home as _ghh
|
||||
_sessions_dir = _ghh() / "sessions"
|
||||
_sid = self.agent.session_id
|
||||
if self._session_db.delete_session(_sid, sessions_dir=_sessions_dir):
|
||||
_cprint(f" {_DIM}✓ Session {_escape(_sid)} deleted{_RST}")
|
||||
else:
|
||||
_cprint(f" {_DIM}✗ Session {_escape(_sid)} not found for deletion{_RST}")
|
||||
except (Exception, KeyboardInterrupt) as e:
|
||||
logger.debug("Could not delete session on exit: %s", e)
|
||||
# Plugin hook: on_session_end — safety net for interrupted exits.
|
||||
# run_conversation() already fires this per-turn on normal completion,
|
||||
# so only fire here if the agent was mid-turn (_agent_running) when
|
||||
|
|
|
|||
|
|
@ -1802,7 +1802,12 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
|
|||
for job in parallel_jobs:
|
||||
_ctx = contextvars.copy_context()
|
||||
_futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
|
||||
_results.extend(f.result() for f in _futures)
|
||||
for f in concurrent.futures.as_completed(_futures, timeout=600):
|
||||
try:
|
||||
_results.append(f.result())
|
||||
except Exception as exc:
|
||||
logger.error("Parallel cron job future failed: %s", exc)
|
||||
_results.append(False)
|
||||
|
||||
# Best-effort sweep of MCP stdio subprocesses that survived their
|
||||
# session teardown during this tick. Runs AFTER every job has
|
||||
|
|
|
|||
|
|
@ -71,6 +71,35 @@ def _coerce_port(value: Any, default: int = DEFAULT_PORT) -> int:
|
|||
return default
|
||||
|
||||
|
||||
_TRUE_REQUEST_BOOL_STRINGS = frozenset({"1", "true", "yes", "on"})
|
||||
_FALSE_REQUEST_BOOL_STRINGS = frozenset({"0", "false", "no", "off"})
|
||||
|
||||
|
||||
def _coerce_request_bool(value: Any, default: bool = False) -> bool:
|
||||
"""Normalize boolean-like API payload values.
|
||||
|
||||
External clients should send real JSON booleans, but some OpenAI-compatible
|
||||
frontends and middleware serialize flags like ``stream`` as strings. Using
|
||||
Python truthiness on those values misroutes requests because ``"false"`` is
|
||||
still truthy. Treat only explicit bool-ish scalars as booleans; everything
|
||||
else falls back to the caller's default.
|
||||
"""
|
||||
if isinstance(value, bool):
|
||||
return value
|
||||
if value is None:
|
||||
return default
|
||||
if isinstance(value, str):
|
||||
normalized = value.strip().lower()
|
||||
if normalized in _TRUE_REQUEST_BOOL_STRINGS:
|
||||
return True
|
||||
if normalized in _FALSE_REQUEST_BOOL_STRINGS:
|
||||
return False
|
||||
return default
|
||||
if isinstance(value, (int, float)):
|
||||
return bool(value)
|
||||
return default
|
||||
|
||||
|
||||
def _normalize_chat_content(
|
||||
content: Any, *, _max_depth: int = 10, _depth: int = 0,
|
||||
) -> str:
|
||||
|
|
@ -481,7 +510,12 @@ else:
|
|||
body_limit_middleware = None # type: ignore[assignment]
|
||||
|
||||
_SECURITY_HEADERS = {
|
||||
"Content-Security-Policy": "default-src 'none'; frame-ancestors 'none'",
|
||||
"Permissions-Policy": "camera=(), microphone=(), geolocation=()",
|
||||
"Strict-Transport-Security": "max-age=31536000; includeSubDomains",
|
||||
"X-Content-Type-Options": "nosniff",
|
||||
"X-Frame-Options": "DENY",
|
||||
"X-XSS-Protection": "0",
|
||||
"Referrer-Policy": "no-referrer",
|
||||
}
|
||||
|
||||
|
|
@ -1005,7 +1039,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
status=400,
|
||||
)
|
||||
|
||||
stream = body.get("stream", False)
|
||||
stream = _coerce_request_bool(body.get("stream"), default=False)
|
||||
|
||||
# Extract system message (becomes ephemeral system prompt layered ON TOP of core)
|
||||
system_prompt = None
|
||||
|
|
@ -2082,7 +2116,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
instructions = body.get("instructions")
|
||||
previous_response_id = body.get("previous_response_id")
|
||||
conversation = body.get("conversation")
|
||||
store = body.get("store", True)
|
||||
store = _coerce_request_bool(body.get("store"), default=True)
|
||||
|
||||
# conversation and previous_response_id are mutually exclusive
|
||||
if conversation and previous_response_id:
|
||||
|
|
@ -2165,7 +2199,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
# groups the entire conversation under one session entry.
|
||||
session_id = stored_session_id or str(uuid.uuid4())
|
||||
|
||||
stream = bool(body.get("stream", False))
|
||||
stream = _coerce_request_bool(body.get("stream"), default=False)
|
||||
if stream:
|
||||
# Streaming branch — emit OpenAI Responses SSE events as the
|
||||
# agent runs so frontends can render text deltas and tool
|
||||
|
|
@ -3228,7 +3262,10 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
status=409,
|
||||
)
|
||||
|
||||
resolve_all = bool(body.get("all") or body.get("resolve_all"))
|
||||
resolve_all = (
|
||||
_coerce_request_bool(body.get("all"), default=False)
|
||||
or _coerce_request_bool(body.get("resolve_all"), default=False)
|
||||
)
|
||||
try:
|
||||
from tools.approval import resolve_gateway_approval
|
||||
|
||||
|
|
|
|||
|
|
@ -2014,6 +2014,13 @@ class BasePlatformAdapter(ABC):
|
|||
text = f"{caption}\n{text}"
|
||||
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to, metadata=metadata)
|
||||
|
||||
def prepare_tts_text(self, text: str) -> str:
|
||||
"""Prepare text for TTS. Override to filter tool output, code, etc.
|
||||
|
||||
Default strips markdown formatting and truncates to 4000 chars.
|
||||
"""
|
||||
return re.sub(r'[*_`#\[\]()]', '', text)[:4000].strip()
|
||||
|
||||
async def play_tts(
|
||||
self,
|
||||
chat_id: str,
|
||||
|
|
@ -3144,7 +3151,7 @@ class BasePlatformAdapter(ABC):
|
|||
from tools.tts_tool import text_to_speech_tool, check_tts_requirements
|
||||
if check_tts_requirements():
|
||||
import json as _json
|
||||
speech_text = re.sub(r'[*_`#\[\]()]', '', text_content)[:4000].strip()
|
||||
speech_text = self.prepare_tts_text(text_content)
|
||||
if not speech_text:
|
||||
raise ValueError("Empty text after markdown cleanup")
|
||||
tts_result_str = await asyncio.to_thread(
|
||||
|
|
|
|||
|
|
@ -3639,18 +3639,18 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
configured = self.config.extra.get("thread_require_mention")
|
||||
if configured is not None:
|
||||
if isinstance(configured, str):
|
||||
return configured.lower() not in ("false", "0", "no", "off")
|
||||
return configured.lower() not in {"false", "0", "no", "off"}
|
||||
return bool(configured)
|
||||
return os.getenv("DISCORD_THREAD_REQUIRE_MENTION", "false").lower() in ("true", "1", "yes", "on")
|
||||
return os.getenv("DISCORD_THREAD_REQUIRE_MENTION", "false").lower() in {"true", "1", "yes", "on"}
|
||||
|
||||
def _discord_history_backfill(self) -> bool:
|
||||
"""Return whether history backfill is enabled for shared sessions."""
|
||||
configured = self.config.extra.get("history_backfill")
|
||||
if configured is not None:
|
||||
if isinstance(configured, str):
|
||||
return configured.lower() not in ("false", "0", "no", "off")
|
||||
return configured.lower() not in {"false", "0", "no", "off"}
|
||||
return bool(configured)
|
||||
return os.getenv("DISCORD_HISTORY_BACKFILL", "true").lower() in ("true", "1", "yes")
|
||||
return os.getenv("DISCORD_HISTORY_BACKFILL", "true").lower() in {"true", "1", "yes"}
|
||||
|
||||
def _discord_history_backfill_limit(self) -> int:
|
||||
"""Return the max number of messages to scan backwards for context.
|
||||
|
|
@ -3737,7 +3737,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
break
|
||||
|
||||
# Skip system messages (pins, joins, thread renames, etc.)
|
||||
if msg.type not in (discord.MessageType.default, discord.MessageType.reply):
|
||||
if msg.type not in {discord.MessageType.default, discord.MessageType.reply}:
|
||||
continue
|
||||
|
||||
# Respect DISCORD_ALLOW_BOTS for other bots.
|
||||
|
|
|
|||
|
|
@ -168,8 +168,8 @@ class TextBatchAggregator:
|
|||
# Pre-compiled regexes for performance
|
||||
_RE_BOLD = re.compile(r"\*\*(.+?)\*\*", re.DOTALL)
|
||||
_RE_ITALIC_STAR = re.compile(r"\*(.+?)\*", re.DOTALL)
|
||||
_RE_BOLD_UNDER = re.compile(r"__(.+?)__", re.DOTALL)
|
||||
_RE_ITALIC_UNDER = re.compile(r"_(.+?)_", re.DOTALL)
|
||||
_RE_BOLD_UNDER = re.compile(r"\b__(?![\s_])(.+?)(?<![\s_])__\b", re.DOTALL)
|
||||
_RE_ITALIC_UNDER = re.compile(r"\b_(?![\s_])(.+?)(?<![\s_])_\b", re.DOTALL)
|
||||
_RE_CODE_BLOCK = re.compile(r"```[a-zA-Z0-9_+-]*\n?")
|
||||
_RE_INLINE_CODE = re.compile(r"`(.+?)`")
|
||||
_RE_HEADING = re.compile(r"^#{1,6}\s+", re.MULTILINE)
|
||||
|
|
|
|||
|
|
@ -348,6 +348,17 @@ class MatrixAdapter(BasePlatformAdapter):
|
|||
self._sync_task: Optional[asyncio.Task] = None
|
||||
self._closing = False
|
||||
self._startup_ts: float = 0.0
|
||||
# Clock-skew detection: count grace-check drops that happen well
|
||||
# after startup (i.e. not initial-sync backfill). If the host's
|
||||
# system clock is set ahead of real time, the startup grace check
|
||||
# `event_ts < startup_ts - 5` silently drops every live message.
|
||||
# See #12614 — the symptom is "bot joins rooms but never replies".
|
||||
# Drops only count when their skew matches the first sampled drop
|
||||
# (within 60s), so varied-age backfill from freshly-invited rooms
|
||||
# doesn't trip the heuristic.
|
||||
self._late_grace_drops: int = 0
|
||||
self._late_grace_skew: float = 0.0
|
||||
self._clock_skew_warned: bool = False
|
||||
|
||||
# Cache: room_id → bool (is DM)
|
||||
self._dm_rooms: Dict[str, bool] = {}
|
||||
|
|
@ -842,6 +853,11 @@ class MatrixAdapter(BasePlatformAdapter):
|
|||
|
||||
# Initial sync to catch up, then start background sync.
|
||||
self._startup_ts = time.time()
|
||||
# Reset clock-skew detector for each connect cycle so a reconnect
|
||||
# after the user fixes NTP doesn't inherit stale counters.
|
||||
self._late_grace_drops = 0
|
||||
self._late_grace_skew = 0.0
|
||||
self._clock_skew_warned = False
|
||||
self._closing = False
|
||||
|
||||
try:
|
||||
|
|
@ -1542,6 +1558,49 @@ class MatrixAdapter(BasePlatformAdapter):
|
|||
)
|
||||
event_ts = raw_ts / 1000.0 if raw_ts else 0.0
|
||||
if event_ts and event_ts < self._startup_ts - _STARTUP_GRACE_SECONDS:
|
||||
# If we are well past startup but events are still being dropped
|
||||
# by the grace check, the host clock is probably set ahead of
|
||||
# real time — every live event then looks "older than startup".
|
||||
# Warn once so users can fix NTP instead of chasing a ghost.
|
||||
# See #12614 (Schnurzel700, April 2026).
|
||||
#
|
||||
# Filter out backfill (events legitimately old) by requiring:
|
||||
# - we are >30s past startup (initial-sync replay window closed)
|
||||
# - the skew is *consistent* across consecutive drops, which is
|
||||
# the signature of a constant clock offset rather than a
|
||||
# variable-age room history. Backfill from a freshly invited
|
||||
# room can deliver events spanning hours/days — those skews
|
||||
# will be all over the place and reset the counter.
|
||||
if not self._clock_skew_warned and (
|
||||
time.time() - self._startup_ts > 30
|
||||
):
|
||||
skew = self._startup_ts - event_ts
|
||||
# Sanity bound: malformed events with negative or absurd
|
||||
# timestamps shouldn't count.
|
||||
if 5 < skew < 86400:
|
||||
if self._late_grace_drops == 0:
|
||||
self._late_grace_skew = skew
|
||||
self._late_grace_drops = 1
|
||||
elif abs(skew - self._late_grace_skew) < 60:
|
||||
# Consistent offset → likely real clock skew.
|
||||
self._late_grace_drops += 1
|
||||
else:
|
||||
# Varied skew → likely backfill, restart sampling.
|
||||
self._late_grace_skew = skew
|
||||
self._late_grace_drops = 1
|
||||
if self._late_grace_drops >= 3:
|
||||
logger.warning(
|
||||
"Matrix: dropped %d consecutive live events as "
|
||||
"'too old' more than 30s after startup (skew "
|
||||
"≈ %.0fs). The host system clock is likely set "
|
||||
"ahead of real time, which causes the startup "
|
||||
"grace filter to silently discard every incoming "
|
||||
"message. Run `timedatectl set-ntp true` (or "
|
||||
"sync NTP) and restart the bot.",
|
||||
self._late_grace_drops,
|
||||
skew,
|
||||
)
|
||||
self._clock_skew_warned = True
|
||||
return
|
||||
|
||||
# Extract content from the event.
|
||||
|
|
|
|||
|
|
@ -482,7 +482,7 @@ class SlackAdapter(BasePlatformAdapter):
|
|||
"text": text,
|
||||
}
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with aiohttp.ClientSession(trust_env=True) as session:
|
||||
async with session.post(
|
||||
ctx["response_url"],
|
||||
json=payload,
|
||||
|
|
|
|||
|
|
@ -128,6 +128,7 @@ class SmsAdapter(BasePlatformAdapter):
|
|||
await site.start()
|
||||
self._http_session = aiohttp.ClientSession(
|
||||
timeout=aiohttp.ClientTimeout(total=30),
|
||||
trust_env=True,
|
||||
)
|
||||
self._running = True
|
||||
|
||||
|
|
@ -169,6 +170,7 @@ class SmsAdapter(BasePlatformAdapter):
|
|||
|
||||
session = self._http_session or aiohttp.ClientSession(
|
||||
timeout=aiohttp.ClientTimeout(total=30),
|
||||
trust_env=True,
|
||||
)
|
||||
try:
|
||||
for chunk in chunks:
|
||||
|
|
|
|||
|
|
@ -1663,7 +1663,17 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
continue
|
||||
raise
|
||||
message_ids.append(str(msg.message_id))
|
||||
|
||||
|
||||
# Re-trigger typing indicator after sending a message.
|
||||
# Telegram clears the typing state when a new message is delivered,
|
||||
# so without this the "...typing" bubble disappears mid-response
|
||||
# (especially noticeable when the agent sends intermediate progress
|
||||
# messages like "Checking:" before running tools).
|
||||
try:
|
||||
await self.send_typing(chat_id, metadata=metadata)
|
||||
except Exception:
|
||||
pass # Typing failures are non-fatal
|
||||
|
||||
return SendResult(
|
||||
success=True,
|
||||
message_id=message_ids[0] if message_ids else None,
|
||||
|
|
|
|||
123
gateway/run.py
123
gateway/run.py
|
|
@ -4763,11 +4763,106 @@ class GatewayRunner:
|
|||
pass
|
||||
return False
|
||||
|
||||
# Auto-decompose: turn fresh triage tasks into ready workgraphs
|
||||
# before the dispatcher fans out workers. Gated by
|
||||
# ``kanban.auto_decompose`` (default True). Capped by
|
||||
# ``kanban.auto_decompose_per_tick`` (default 3) so a bulk-load
|
||||
# of triage tasks doesn't burst-spend the aux LLM in one tick;
|
||||
# remainder defers to subsequent ticks.
|
||||
auto_decompose_enabled = bool(kanban_cfg.get("auto_decompose", True))
|
||||
try:
|
||||
auto_decompose_per_tick = int(
|
||||
kanban_cfg.get("auto_decompose_per_tick", 3) or 3
|
||||
)
|
||||
except (TypeError, ValueError):
|
||||
auto_decompose_per_tick = 3
|
||||
if auto_decompose_per_tick < 1:
|
||||
auto_decompose_per_tick = 1
|
||||
|
||||
def _auto_decompose_tick() -> int:
|
||||
"""Run the auto-decomposer for up to N triage tasks across all
|
||||
boards. Returns the number of triage tasks that were
|
||||
successfully decomposed or specified this tick.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli import kanban_decompose as _decomp
|
||||
except Exception as exc: # pragma: no cover
|
||||
logger.warning(
|
||||
"kanban auto-decompose: import failed (%s); skipping", exc,
|
||||
)
|
||||
return 0
|
||||
try:
|
||||
boards = _kb.list_boards(include_archived=False)
|
||||
except Exception:
|
||||
boards = [_kb.read_board_metadata(_kb.DEFAULT_BOARD)]
|
||||
attempted = 0
|
||||
successes = 0
|
||||
for b in boards:
|
||||
slug = b.get("slug") or _kb.DEFAULT_BOARD
|
||||
if attempted >= auto_decompose_per_tick:
|
||||
break
|
||||
# Pin this board for the duration of the call — same
|
||||
# pattern as the dashboard specify endpoint. The
|
||||
# decomposer module connects with no board kwarg and
|
||||
# relies on the env var.
|
||||
prev_env = os.environ.get("HERMES_KANBAN_BOARD")
|
||||
try:
|
||||
os.environ["HERMES_KANBAN_BOARD"] = slug
|
||||
try:
|
||||
triage_ids = _decomp.list_triage_ids()
|
||||
except Exception as exc:
|
||||
logger.debug(
|
||||
"kanban auto-decompose: list_triage_ids failed on board %s (%s)",
|
||||
slug, exc,
|
||||
)
|
||||
triage_ids = []
|
||||
for tid in triage_ids:
|
||||
if attempted >= auto_decompose_per_tick:
|
||||
break
|
||||
attempted += 1
|
||||
try:
|
||||
outcome = _decomp.decompose_task(
|
||||
tid, author="auto-decomposer",
|
||||
)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"kanban auto-decompose: decompose_task crashed on %s",
|
||||
tid,
|
||||
)
|
||||
continue
|
||||
if outcome.ok:
|
||||
successes += 1
|
||||
if outcome.fanout and outcome.child_ids:
|
||||
logger.info(
|
||||
"kanban auto-decompose [%s]: %s → %d children",
|
||||
slug, tid, len(outcome.child_ids),
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"kanban auto-decompose [%s]: %s → single task (no fanout)",
|
||||
slug, tid,
|
||||
)
|
||||
else:
|
||||
# Common no-op reasons (no aux client configured) shouldn't
|
||||
# spam logs every tick. Log at debug.
|
||||
logger.debug(
|
||||
"kanban auto-decompose [%s]: %s skipped: %s",
|
||||
slug, tid, outcome.reason,
|
||||
)
|
||||
finally:
|
||||
if prev_env is None:
|
||||
os.environ.pop("HERMES_KANBAN_BOARD", None)
|
||||
else:
|
||||
os.environ["HERMES_KANBAN_BOARD"] = prev_env
|
||||
return successes
|
||||
|
||||
logger.info(
|
||||
"kanban dispatcher: embedded in gateway (interval=%.1fs)", interval
|
||||
)
|
||||
while self._running:
|
||||
try:
|
||||
if auto_decompose_enabled:
|
||||
await asyncio.to_thread(_auto_decompose_tick)
|
||||
results = await asyncio.to_thread(_tick_once)
|
||||
any_spawned = False
|
||||
for slug, res in (results or []):
|
||||
|
|
@ -8845,7 +8940,7 @@ class GatewayRunner:
|
|||
lines.append("Failed/paused: (none)")
|
||||
return "\n".join(lines)
|
||||
|
||||
if action in ("pause", "resume"):
|
||||
if action in {"pause", "resume"}:
|
||||
if not target:
|
||||
return f"Usage: /platform {action} <name>"
|
||||
platform = _resolve_platform(target)
|
||||
|
|
@ -8953,13 +9048,15 @@ class GatewayRunner:
|
|||
logger.debug("Failed to write restart dedup marker: %s", e)
|
||||
|
||||
active_agents = self._running_agent_count()
|
||||
# When running under a service manager (systemd/launchd), use the
|
||||
# service restart path: exit with code 75 so the service manager
|
||||
# restarts us. The detached subprocess approach (setsid + bash)
|
||||
# doesn't work under systemd because KillMode=mixed kills all
|
||||
# processes in the cgroup, including the detached helper.
|
||||
# When running under a service manager (systemd/launchd) or inside a
|
||||
# Docker/Podman container, use the service restart path: exit with
|
||||
# code 75 so the service manager / container restart policy restarts
|
||||
# us. The detached subprocess approach (setsid + bash) doesn't work
|
||||
# under systemd (KillMode=mixed kills the cgroup) or Docker (tini
|
||||
# exits when the gateway dies, taking the detached helper with it).
|
||||
_under_service = bool(os.environ.get("INVOCATION_ID")) # systemd sets this
|
||||
if _under_service:
|
||||
_in_container = os.path.exists("/.dockerenv") or os.path.exists("/run/.containerenv")
|
||||
if _under_service or _in_container:
|
||||
self.request_restart(detached=False, via_service=True)
|
||||
else:
|
||||
self.request_restart(detached=True, via_service=False)
|
||||
|
|
@ -12528,6 +12625,12 @@ class GatewayRunner:
|
|||
and getattr(source, "chat_type", None) == "dm"
|
||||
):
|
||||
metadata["telegram_dm_topic_reply_fallback"] = True
|
||||
# Telegram DM topic lanes need direct_messages_topic_id in metadata
|
||||
# so synthetic/queued messages (goal continuations, status notices)
|
||||
# route to the correct topic even when reply anchor is unavailable.
|
||||
tid = str(thread_id)
|
||||
if tid and tid not in {"", "1"}:
|
||||
metadata["direct_messages_topic_id"] = tid
|
||||
anchor = reply_to_message_id or getattr(source, "message_id", None)
|
||||
if anchor is not None:
|
||||
metadata["telegram_reply_to_message_id"] = str(anchor)
|
||||
|
|
@ -12813,7 +12916,11 @@ class GatewayRunner:
|
|||
update_cmd = (
|
||||
f"PYTHONUNBUFFERED=1 {hermes_cmd_str} update --gateway"
|
||||
f" > {shlex.quote(str(output_path))} 2>&1; "
|
||||
f"status=$?; printf '%s' \"$status\" > {shlex.quote(str(exit_code_path))}"
|
||||
# Avoid `status=$?`: `status` is a read-only special parameter
|
||||
# in zsh, and this command string is copied/reused in macOS/zsh
|
||||
# operator wrappers. Keep the template zsh-safe even though this
|
||||
# specific subprocess currently runs under bash.
|
||||
f"rc=$?; printf '%s' \"$rc\" > {shlex.quote(str(exit_code_path))}"
|
||||
)
|
||||
setsid_bin = shutil.which("setsid")
|
||||
if setsid_bin:
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -48,9 +48,9 @@ def parse_args(arg_string: str) -> tuple[Optional[str], list[str]]:
|
|||
if not raw:
|
||||
return None, []
|
||||
# Accept human-friendly synonyms
|
||||
if raw in ("on", "codex", "enable"):
|
||||
if raw in {"on", "codex", "enable"}:
|
||||
return "codex_app_server", []
|
||||
if raw in ("off", "default", "disable", "hermes"):
|
||||
if raw in {"off", "default", "disable", "hermes"}:
|
||||
return "auto", []
|
||||
if raw in VALID_RUNTIMES:
|
||||
return raw, []
|
||||
|
|
|
|||
|
|
@ -123,7 +123,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
|||
CommandDef("model", "Switch model for this session", "Configuration",
|
||||
aliases=("provider",), args_hint="[model] [--provider name] [--global]"),
|
||||
CommandDef("codex-runtime", "Toggle codex app-server runtime for OpenAI/Codex models",
|
||||
"Configuration", args_hint="[auto|codex_app_server]"),
|
||||
"Configuration", aliases=("codex_runtime",),
|
||||
args_hint="[auto|codex_app_server]"),
|
||||
CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info",
|
||||
cli_only=True),
|
||||
|
||||
|
|
|
|||
|
|
@ -926,6 +926,31 @@ DEFAULT_CONFIG = {
|
|||
"timeout": 120,
|
||||
"extra_body": {},
|
||||
},
|
||||
# Kanban decomposer — decomposes a triage task into a graph of
|
||||
# child tasks routed to specialist profiles by description.
|
||||
# Invoked by ``hermes kanban decompose`` and the kanban
|
||||
# auto-decompose dispatcher tick. Returns a JSON task graph;
|
||||
# uses more tokens than the specifier so allow more headroom.
|
||||
"kanban_decomposer": {
|
||||
"provider": "auto",
|
||||
"model": "",
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
"timeout": 180,
|
||||
"extra_body": {},
|
||||
},
|
||||
# Profile describer — auto-generates a 1-2 sentence description
|
||||
# of what a profile is good at. Invoked by
|
||||
# ``hermes profile describe <name> --auto`` and the dashboard's
|
||||
# auto-generate button. Short, cheap call.
|
||||
"profile_describer": {
|
||||
"provider": "auto",
|
||||
"model": "",
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
"timeout": 60,
|
||||
"extra_body": {},
|
||||
},
|
||||
# Curator — skill-usage review fork. Timeout is generous because the
|
||||
# review pass can take several minutes on reasoning models (umbrella
|
||||
# building over hundreds of candidate skills). "auto" = use main chat
|
||||
|
|
@ -1473,6 +1498,25 @@ DEFAULT_CONFIG = {
|
|||
# same task/profile (spawn_failed, timed_out, or crashed). Reassignment
|
||||
# resets the streak for the new profile.
|
||||
"failure_limit": 2,
|
||||
# Profile that decomposes tasks in the Triage column. When unset,
|
||||
# falls back to the default profile (the one `hermes` launches with
|
||||
# no -p flag). Set this to a dedicated 'orchestrator' profile if you
|
||||
# want decomposition to use a different model/skills from your main
|
||||
# working profile.
|
||||
"orchestrator_profile": "",
|
||||
# Where a child task lands if the orchestrator can't match an
|
||||
# assignee to any installed profile. When unset, falls back to the
|
||||
# default profile. A task never ends up with assignee=None.
|
||||
"default_assignee": "",
|
||||
# When true, the kanban dispatcher auto-runs the decomposer on
|
||||
# tasks that land in Triage (every dispatcher tick). When false,
|
||||
# decomposition is manual via `hermes kanban decompose <id>` or
|
||||
# the dashboard's Decompose button.
|
||||
"auto_decompose": True,
|
||||
# Max triage tasks to decompose per dispatcher tick. Prevents a
|
||||
# large bulk-load of triage tasks from spending a burst of aux
|
||||
# LLM calls in one tick. Excess tasks defer to the next tick.
|
||||
"auto_decompose_per_tick": 3,
|
||||
},
|
||||
|
||||
# execute_code settings — controls the tool used for programmatic tool calls.
|
||||
|
|
@ -2913,6 +2957,7 @@ def _normalize_custom_provider_entry(
|
|||
"api_mode", "transport", "model", "default_model", "models",
|
||||
"context_length", "rate_limit_delay",
|
||||
"request_timeout_seconds", "stale_timeout_seconds",
|
||||
"discover_models",
|
||||
}
|
||||
for camel, snake in _CAMEL_ALIASES.items():
|
||||
if camel in entry and snake not in entry:
|
||||
|
|
@ -3003,6 +3048,10 @@ def _normalize_custom_provider_entry(
|
|||
if isinstance(rate_limit_delay, (int, float)) and rate_limit_delay >= 0:
|
||||
normalized["rate_limit_delay"] = rate_limit_delay
|
||||
|
||||
discover_models = entry.get("discover_models")
|
||||
if isinstance(discover_models, bool):
|
||||
normalized["discover_models"] = discover_models
|
||||
|
||||
return normalized
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -91,7 +91,7 @@ def ensure_dependency(dep: str, interactive: bool = True) -> bool:
|
|||
reply = input(f"{desc} is not installed. Install now? [Y/n] ").strip().lower()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
return False
|
||||
if reply not in ("", "y", "yes"):
|
||||
if reply not in {"", "y", "yes"}:
|
||||
return False
|
||||
|
||||
result = subprocess.run(
|
||||
|
|
|
|||
|
|
@ -160,19 +160,25 @@ def _has_healthy_oauth_fallback_for_apikey_provider(provider_label: str) -> bool
|
|||
still show a failed API-key connectivity row, but it should not promote
|
||||
that direct-key problem into the final blocking summary.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.auth import (
|
||||
get_gemini_oauth_auth_status,
|
||||
get_minimax_oauth_auth_status,
|
||||
)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
normalized = (provider_label or "").strip().lower()
|
||||
if normalized in {"google / gemini", "gemini"}:
|
||||
return bool((get_gemini_oauth_auth_status() or {}).get("logged_in"))
|
||||
try:
|
||||
from hermes_cli.auth import get_gemini_oauth_auth_status
|
||||
return bool((get_gemini_oauth_auth_status() or {}).get("logged_in"))
|
||||
except Exception:
|
||||
return False
|
||||
if normalized == "minimax":
|
||||
return bool((get_minimax_oauth_auth_status() or {}).get("logged_in"))
|
||||
try:
|
||||
from hermes_cli.auth import get_minimax_oauth_auth_status
|
||||
return bool((get_minimax_oauth_auth_status() or {}).get("logged_in"))
|
||||
except Exception:
|
||||
return False
|
||||
if normalized == "xai":
|
||||
try:
|
||||
from hermes_cli.auth import get_xai_oauth_auth_status
|
||||
return bool((get_xai_oauth_auth_status() or {}).get("logged_in"))
|
||||
except Exception:
|
||||
return False
|
||||
return False
|
||||
|
||||
|
||||
|
|
@ -645,31 +651,41 @@ def run_doctor(args):
|
|||
|
||||
# Check credentials for the configured provider.
|
||||
# Limit to API-key providers in PROVIDER_REGISTRY — other provider
|
||||
# types (OAuth, SDK, openrouter/anthropic/custom/auto) have their
|
||||
# own env-var checks elsewhere in doctor, and get_auth_status()
|
||||
# returns a bare {logged_in: False} for anything it doesn't
|
||||
# explicitly dispatch, which would produce false positives.
|
||||
if runtime_provider and runtime_provider not in {"auto", "custom", "openrouter"}:
|
||||
# types (OAuth, SDK, anthropic/custom/auto) have their own env-var
|
||||
# checks elsewhere in doctor, and get_auth_status() returns a bare
|
||||
# {logged_in: False} for anything it doesn't explicitly dispatch,
|
||||
# which would produce false positives.
|
||||
if runtime_provider and runtime_provider not in ("auto", "custom"):
|
||||
try:
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY, get_auth_status
|
||||
pconfig = PROVIDER_REGISTRY.get(runtime_provider)
|
||||
if pconfig and getattr(pconfig, "auth_type", "") == "api_key":
|
||||
status = get_auth_status(runtime_provider) or {}
|
||||
if runtime_provider == "openrouter":
|
||||
from hermes_cli.config import get_env_value
|
||||
|
||||
configured = bool(
|
||||
status.get("configured")
|
||||
or status.get("logged_in")
|
||||
or status.get("api_key")
|
||||
str(get_env_value("OPENROUTER_API_KEY") or "").strip()
|
||||
or str(get_env_value("OPENAI_API_KEY") or "").strip()
|
||||
)
|
||||
if not configured:
|
||||
check_fail(
|
||||
f"model.provider '{runtime_provider}' is set but no API key is configured",
|
||||
"(check ~/.hermes/.env or run 'hermes setup')",
|
||||
)
|
||||
issues.append(
|
||||
f"No credentials found for provider '{runtime_provider}'. "
|
||||
f"Run 'hermes setup' or set the provider's API key in {_DHH}/.env, "
|
||||
f"or switch providers with 'hermes config set model.provider <name>'"
|
||||
else:
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY, get_auth_status
|
||||
|
||||
pconfig = PROVIDER_REGISTRY.get(runtime_provider)
|
||||
configured = True
|
||||
if pconfig and getattr(pconfig, "auth_type", "") == "api_key":
|
||||
status = get_auth_status(runtime_provider) or {}
|
||||
configured = bool(
|
||||
status.get("configured")
|
||||
or status.get("logged_in")
|
||||
or status.get("api_key")
|
||||
)
|
||||
if not configured:
|
||||
check_fail(
|
||||
f"model.provider '{runtime_provider}' is set but no API key is configured",
|
||||
"(check ~/.hermes/.env or run 'hermes setup')",
|
||||
)
|
||||
issues.append(
|
||||
f"No credentials found for provider '{runtime_provider}'. "
|
||||
f"Run 'hermes setup' or set the provider's API key in {_DHH}/.env, "
|
||||
f"or switch providers with 'hermes config set model.provider <name>'"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
|
@ -817,6 +833,20 @@ def run_doctor(args):
|
|||
except Exception as e:
|
||||
check_warn("Auth provider status", f"(could not check: {e})")
|
||||
|
||||
# xAI OAuth — separate try/except so an import failure here cannot
|
||||
# disrupt the already-printed Nous/Codex/Gemini/MiniMax rows above.
|
||||
try:
|
||||
from hermes_cli.auth import get_xai_oauth_auth_status
|
||||
xai_oauth_status = get_xai_oauth_auth_status() or {}
|
||||
if xai_oauth_status.get("logged_in"):
|
||||
check_ok("xAI OAuth", "(logged in)")
|
||||
else:
|
||||
check_warn("xAI OAuth", "(not logged in)")
|
||||
if xai_oauth_status.get("error"):
|
||||
check_info(xai_oauth_status["error"])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if _safe_which("codex"):
|
||||
check_ok("codex CLI")
|
||||
else:
|
||||
|
|
@ -1073,10 +1103,20 @@ def run_doctor(args):
|
|||
if terminal_env == "ssh":
|
||||
ssh_host = os.getenv("TERMINAL_SSH_HOST")
|
||||
if ssh_host:
|
||||
ssh_user = os.getenv("TERMINAL_SSH_USER")
|
||||
ssh_port = os.getenv("TERMINAL_SSH_PORT")
|
||||
ssh_key = os.getenv("TERMINAL_SSH_KEY")
|
||||
target = f"{ssh_user}@{ssh_host}" if ssh_user else ssh_host
|
||||
cmd = ["ssh", "-o", "ConnectTimeout=5", "-o", "BatchMode=yes"]
|
||||
if ssh_port:
|
||||
cmd += ["-p", ssh_port]
|
||||
if ssh_key:
|
||||
cmd += ["-i", os.path.expanduser(ssh_key)]
|
||||
cmd += [target, "echo ok"]
|
||||
# Try to connect
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["ssh", "-o", "ConnectTimeout=5", "-o", "BatchMode=yes", ssh_host, "echo ok"],
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=15
|
||||
|
|
@ -1474,6 +1514,15 @@ def run_doctor(args):
|
|||
}
|
||||
if base_url_host_matches(base, "api.kimi.com"):
|
||||
headers["User-Agent"] = "claude-code/0.1.0"
|
||||
# Google's Generative Language API (generativelanguage.googleapis.com)
|
||||
# rejects ``Authorization: Bearer <api-key>`` with 401
|
||||
# ``ACCESS_TOKEN_TYPE_UNSUPPORTED`` — that header is reserved for
|
||||
# OAuth 2 access tokens, not plain API keys. Plain keys use
|
||||
# ``x-goog-api-key`` (or ``?key=``). Without this, a perfectly valid
|
||||
# GOOGLE_API_KEY/GEMINI_API_KEY always shows red in ``hermes doctor``.
|
||||
if url and base_url_host_matches(url, "generativelanguage.googleapis.com"):
|
||||
headers.pop("Authorization", None)
|
||||
headers["x-goog-api-key"] = key
|
||||
r = httpx.get(url, headers=headers, timeout=10)
|
||||
if (
|
||||
pname == "Alibaba/DashScope"
|
||||
|
|
|
|||
|
|
@ -2110,24 +2110,30 @@ def _build_service_path_dirs(project_root: Path | None = None) -> list[str]:
|
|||
if project_root is None:
|
||||
project_root = PROJECT_ROOT
|
||||
|
||||
def _is_dir(path: Path) -> bool:
|
||||
try:
|
||||
return path.is_dir()
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
candidates = []
|
||||
|
||||
venv_bin = project_root / "venv" / "bin"
|
||||
if venv_bin.is_dir():
|
||||
if _is_dir(venv_bin):
|
||||
candidates.append(str(venv_bin))
|
||||
elif sys.prefix != sys.base_prefix:
|
||||
candidates.append(str(Path(sys.prefix) / "bin"))
|
||||
|
||||
node_bin = project_root / "node_modules" / ".bin"
|
||||
if node_bin.is_dir():
|
||||
if _is_dir(node_bin):
|
||||
candidates.append(str(node_bin))
|
||||
|
||||
hermes_home = get_hermes_home()
|
||||
hermes_node = hermes_home / "node" / "bin"
|
||||
if hermes_node.is_dir():
|
||||
if _is_dir(hermes_node):
|
||||
candidates.append(str(hermes_node))
|
||||
hermes_nm = hermes_home / "node_modules" / ".bin"
|
||||
if hermes_nm.is_dir():
|
||||
if _is_dir(hermes_nm):
|
||||
candidates.append(str(hermes_nm))
|
||||
|
||||
return candidates
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import logging
|
|||
import re
|
||||
import time
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -110,6 +111,7 @@ JUDGE_SYSTEM_PROMPT = (
|
|||
JUDGE_USER_PROMPT_TEMPLATE = (
|
||||
"Goal:\n{goal}\n\n"
|
||||
"Agent's most recent response:\n{response}\n\n"
|
||||
"Current time: {current_time}\n\n"
|
||||
"Is the goal satisfied?"
|
||||
)
|
||||
|
||||
|
|
@ -120,6 +122,7 @@ JUDGE_USER_PROMPT_WITH_SUBGOALS_TEMPLATE = (
|
|||
"Additional criteria the user added mid-loop (all must also be "
|
||||
"satisfied for the goal to be DONE):\n{subgoals_block}\n\n"
|
||||
"Agent's most recent response:\n{response}\n\n"
|
||||
"Current time: {current_time}\n\n"
|
||||
"Decision: For each numbered criterion above, find concrete "
|
||||
"evidence in the agent's response that the criterion is "
|
||||
"satisfied. Do not accept generic phrases like 'all requirements "
|
||||
|
|
@ -415,6 +418,7 @@ def judge_goal(
|
|||
|
||||
# Build the prompt — pick the with-subgoals variant when applicable.
|
||||
clean_subgoals = [s.strip() for s in (subgoals or []) if s and s.strip()]
|
||||
current_time = datetime.now(tz=timezone.utc).astimezone().strftime("%Y-%m-%d %H:%M:%S %Z")
|
||||
if clean_subgoals:
|
||||
subgoals_block = "\n".join(
|
||||
f"- {i}. {text}" for i, text in enumerate(clean_subgoals, start=1)
|
||||
|
|
@ -423,11 +427,13 @@ def judge_goal(
|
|||
goal=_truncate(goal, 2000),
|
||||
subgoals_block=_truncate(subgoals_block, 2000),
|
||||
response=_truncate(last_response, _JUDGE_RESPONSE_SNIPPET_CHARS),
|
||||
current_time=current_time,
|
||||
)
|
||||
else:
|
||||
prompt = JUDGE_USER_PROMPT_TEMPLATE.format(
|
||||
goal=_truncate(goal, 2000),
|
||||
response=_truncate(last_response, _JUDGE_RESPONSE_SNIPPET_CHARS),
|
||||
current_time=current_time,
|
||||
)
|
||||
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -610,6 +610,43 @@ def build_parser(parent_subparsers: argparse._SubParsersAction) -> argparse.Argu
|
|||
help="Emit one JSON object per task on stdout",
|
||||
)
|
||||
|
||||
# --- decompose --- (triage → fan-out via auxiliary LLM + orchestrator)
|
||||
p_decompose = sub.add_parser(
|
||||
"decompose",
|
||||
help="Decompose a triage-column task into a graph of child tasks "
|
||||
"routed to specialist profiles by description. Falls back to "
|
||||
"specify-style single-task promotion when the task doesn't "
|
||||
"benefit from fan-out. Uses auxiliary.kanban_decomposer.",
|
||||
)
|
||||
p_decompose.add_argument(
|
||||
"task_id",
|
||||
nargs="?",
|
||||
default=None,
|
||||
help="Task id to decompose (required unless --all is given)",
|
||||
)
|
||||
p_decompose.add_argument(
|
||||
"--all",
|
||||
dest="all_triage",
|
||||
action="store_true",
|
||||
help="Decompose every task currently in the triage column",
|
||||
)
|
||||
p_decompose.add_argument(
|
||||
"--tenant",
|
||||
default=None,
|
||||
help="When used with --all, restrict the sweep to this tenant",
|
||||
)
|
||||
p_decompose.add_argument(
|
||||
"--author",
|
||||
default=None,
|
||||
help="Author name recorded on the audit comment "
|
||||
"(default: $HERMES_PROFILE or 'decomposer')",
|
||||
)
|
||||
p_decompose.add_argument(
|
||||
"--json",
|
||||
action="store_true",
|
||||
help="Emit one JSON object per task on stdout",
|
||||
)
|
||||
|
||||
# --- gc ---
|
||||
p_gc = sub.add_parser(
|
||||
"gc", help="Garbage-collect archived-task workspaces, old events, and old logs",
|
||||
|
|
@ -740,6 +777,7 @@ def kanban_command(args: argparse.Namespace) -> int:
|
|||
"notify-unsubscribe": _cmd_notify_unsubscribe,
|
||||
"context": _cmd_context,
|
||||
"specify": _cmd_specify,
|
||||
"decompose": _cmd_decompose,
|
||||
"gc": _cmd_gc,
|
||||
}
|
||||
handler = handlers.get(action)
|
||||
|
|
@ -2115,6 +2153,87 @@ def _cmd_specify(args: argparse.Namespace) -> int:
|
|||
return 0 if (ok_count > 0 or not ids) else 1
|
||||
|
||||
|
||||
def _cmd_decompose(args: argparse.Namespace) -> int:
|
||||
"""Fan a triage task (or all of them) out into a graph of child
|
||||
tasks via the auxiliary LLM, routed to specialist profiles by
|
||||
description. Thin wrapper over ``kanban_decompose``."""
|
||||
from hermes_cli import kanban_decompose as decomp
|
||||
|
||||
all_flag = bool(getattr(args, "all_triage", False))
|
||||
tenant = getattr(args, "tenant", None)
|
||||
author = getattr(args, "author", None) or _profile_author()
|
||||
want_json = bool(getattr(args, "json", False))
|
||||
|
||||
if args.task_id and all_flag:
|
||||
print(
|
||||
"kanban: pass either a task id OR --all, not both",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 2
|
||||
|
||||
if all_flag:
|
||||
ids = decomp.list_triage_ids(tenant=tenant)
|
||||
if not ids:
|
||||
msg = (
|
||||
"No triage tasks"
|
||||
+ (f" for tenant {tenant!r}" if tenant else "")
|
||||
+ "."
|
||||
)
|
||||
if want_json:
|
||||
print(json.dumps({"decomposed": 0, "total": 0}))
|
||||
else:
|
||||
print(msg)
|
||||
return 0
|
||||
elif args.task_id:
|
||||
ids = [args.task_id]
|
||||
else:
|
||||
print(
|
||||
"kanban: decompose requires a task id or --all",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 2
|
||||
|
||||
ok_count = 0
|
||||
for tid in ids:
|
||||
outcome = decomp.decompose_task(tid, author=author)
|
||||
if outcome.ok:
|
||||
ok_count += 1
|
||||
if want_json:
|
||||
print(json.dumps({
|
||||
"task_id": outcome.task_id,
|
||||
"ok": outcome.ok,
|
||||
"reason": outcome.reason,
|
||||
"fanout": outcome.fanout,
|
||||
"child_ids": outcome.child_ids,
|
||||
"new_title": outcome.new_title,
|
||||
}))
|
||||
elif outcome.ok:
|
||||
if outcome.fanout and outcome.child_ids:
|
||||
child_summary = ", ".join(outcome.child_ids)
|
||||
print(
|
||||
f"Decomposed {outcome.task_id} → {len(outcome.child_ids)} "
|
||||
f"children ({child_summary}); root promoted to todo"
|
||||
)
|
||||
else:
|
||||
title_suffix = (
|
||||
f" — retitled: {outcome.new_title!r}"
|
||||
if outcome.new_title
|
||||
else ""
|
||||
)
|
||||
print(
|
||||
f"Specified {outcome.task_id} → todo "
|
||||
f"(no fanout){title_suffix}"
|
||||
)
|
||||
else:
|
||||
print(
|
||||
f"kanban: decompose {outcome.task_id}: {outcome.reason}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
if not all_flag:
|
||||
return 0 if ok_count == 1 else 1
|
||||
return 0 if (ok_count > 0 or not ids) else 1
|
||||
|
||||
|
||||
def _cmd_gc(args: argparse.Namespace) -> int:
|
||||
"""Remove scratch workspaces of archived tasks, prune old events, and
|
||||
delete old worker logs."""
|
||||
|
|
|
|||
|
|
@ -93,6 +93,7 @@ from toolsets import get_toolset_names
|
|||
VALID_STATUSES = {"triage", "todo", "ready", "running", "blocked", "done", "archived"}
|
||||
VALID_WORKSPACE_KINDS = {"scratch", "worktree", "dir"}
|
||||
KNOWN_TOOLSET_NAMES = frozenset(name.casefold() for name in get_toolset_names())
|
||||
_IS_WINDOWS = sys.platform == "win32"
|
||||
|
||||
# A running task's claim is valid for 15 minutes; after that the next
|
||||
# dispatcher tick reclaims it. Workers that outlive this window should call
|
||||
|
|
@ -2776,6 +2777,180 @@ def specify_triage_task(
|
|||
return True
|
||||
|
||||
|
||||
def decompose_triage_task(
|
||||
conn: sqlite3.Connection,
|
||||
task_id: str,
|
||||
*,
|
||||
root_assignee: Optional[str],
|
||||
children: list[dict],
|
||||
author: Optional[str] = None,
|
||||
) -> Optional[list[str]]:
|
||||
"""Fan a triage task out into child tasks and promote the root to ``todo``.
|
||||
|
||||
The root task stays alive and becomes the parent of every child —
|
||||
when all children reach ``done``, the root promotes to ``ready`` and
|
||||
its assignee (typically the orchestrator profile) wakes back up to
|
||||
judge completion or spawn more work.
|
||||
|
||||
``children`` is a list of dicts, each shaped like::
|
||||
|
||||
{
|
||||
"title": "...",
|
||||
"body": "...", # optional
|
||||
"assignee": "profile-name", # optional, None -> default fallback
|
||||
"parents": [0, 2], # indices into this same children list
|
||||
}
|
||||
|
||||
Returns the list of created child task ids (in input order) on
|
||||
success. Returns ``None`` when:
|
||||
- The root task does not exist
|
||||
- The root task is not in ``triage``
|
||||
- A cycle would result (caller built a bad graph)
|
||||
|
||||
Validation of titles/assignees happens inside the same write_txn as
|
||||
the inserts so a malformed entry aborts the whole decomposition
|
||||
cleanly (no orphan children).
|
||||
"""
|
||||
if not children:
|
||||
return None
|
||||
if root_assignee is not None:
|
||||
root_assignee = _canonical_assignee(root_assignee)
|
||||
|
||||
# Pre-validate the children list shape outside the txn. Cheap checks
|
||||
# that don't need DB access. Bad input aborts before we touch the DB.
|
||||
for idx, child in enumerate(children):
|
||||
if not isinstance(child, dict):
|
||||
raise ValueError(f"child[{idx}] is not a dict")
|
||||
title = child.get("title")
|
||||
if not isinstance(title, str) or not title.strip():
|
||||
raise ValueError(f"child[{idx}].title is required")
|
||||
parents_idx = child.get("parents") or []
|
||||
if not isinstance(parents_idx, list):
|
||||
raise ValueError(f"child[{idx}].parents must be a list")
|
||||
for p in parents_idx:
|
||||
if not isinstance(p, int) or p < 0 or p >= len(children):
|
||||
raise ValueError(
|
||||
f"child[{idx}].parents[{p}] is not a valid index into children"
|
||||
)
|
||||
if p == idx:
|
||||
raise ValueError(f"child[{idx}] cannot list itself as a parent")
|
||||
|
||||
# We do the full decomposition in a SINGLE write_txn so it's
|
||||
# atomic: either every child is created AND the root flips to
|
||||
# ``todo``, or nothing changes. We deliberately do NOT call any
|
||||
# kb helper that opens its own write_txn (create_task, link_tasks,
|
||||
# add_comment) from inside this block — see architecture.md
|
||||
# write_txn pitfalls. Instead we inline the INSERTs and
|
||||
# _append_event calls.
|
||||
now = int(time.time())
|
||||
child_ids: list[str] = []
|
||||
with write_txn(conn):
|
||||
root_row = conn.execute(
|
||||
"SELECT id, status, tenant FROM tasks WHERE id = ?", (task_id,)
|
||||
).fetchone()
|
||||
if root_row is None:
|
||||
return None
|
||||
if root_row["status"] != "triage":
|
||||
return None
|
||||
tenant = root_row["tenant"]
|
||||
|
||||
# Create children. Status is 'todo' regardless of parents — we
|
||||
# link them under the root AFTER creation so the dispatcher
|
||||
# sees a coherent state, and recompute_ready() at the end
|
||||
# promotes parent-free children to 'ready'.
|
||||
for idx, child in enumerate(children):
|
||||
new_id = _new_task_id()
|
||||
title = child["title"].strip()
|
||||
body = child.get("body")
|
||||
assignee = _canonical_assignee(child.get("assignee"))
|
||||
conn.execute(
|
||||
"INSERT INTO tasks "
|
||||
"(id, title, body, assignee, status, workspace_kind, "
|
||||
" tenant, created_at, created_by) "
|
||||
"VALUES (?, ?, ?, ?, 'todo', 'scratch', ?, ?, ?)",
|
||||
(
|
||||
new_id,
|
||||
title,
|
||||
body if isinstance(body, str) else None,
|
||||
assignee,
|
||||
tenant,
|
||||
now,
|
||||
(author or "decomposer"),
|
||||
),
|
||||
)
|
||||
_append_event(
|
||||
conn, new_id, "created",
|
||||
{"by": author or "decomposer", "from_decompose_of": task_id},
|
||||
)
|
||||
child_ids.append(new_id)
|
||||
|
||||
# Link children to their sibling parents (within the decomposed graph).
|
||||
for idx, child in enumerate(children):
|
||||
for p_idx in child.get("parents") or []:
|
||||
parent_id = child_ids[p_idx]
|
||||
child_id = child_ids[idx]
|
||||
conn.execute(
|
||||
"INSERT OR IGNORE INTO task_links (parent_id, child_id) "
|
||||
"VALUES (?, ?)",
|
||||
(parent_id, child_id),
|
||||
)
|
||||
_append_event(
|
||||
conn, child_id, "linked",
|
||||
{"parent": parent_id, "child": child_id},
|
||||
)
|
||||
|
||||
# Link the ROOT task as a child of every leaf child — i.e. the
|
||||
# root waits for the whole graph. Simpler than computing leaves:
|
||||
# link root under every child. Cycle-free because the root is
|
||||
# only ever a child here, never a parent of children.
|
||||
for cid in child_ids:
|
||||
conn.execute(
|
||||
"INSERT OR IGNORE INTO task_links (parent_id, child_id) "
|
||||
"VALUES (?, ?)",
|
||||
(cid, task_id),
|
||||
)
|
||||
|
||||
# Flip the root: triage -> todo, set assignee to the orchestrator.
|
||||
sets = ["status = 'todo'"]
|
||||
params: list[Any] = []
|
||||
if root_assignee is not None:
|
||||
sets.append("assignee = ?")
|
||||
params.append(root_assignee)
|
||||
params.append(task_id)
|
||||
conn.execute(
|
||||
f"UPDATE tasks SET {', '.join(sets)} WHERE id = ?",
|
||||
tuple(params),
|
||||
)
|
||||
|
||||
# Audit comment + event on the root so the timeline shows the fan-out.
|
||||
if author and author.strip():
|
||||
conn.execute(
|
||||
"INSERT INTO task_comments (task_id, author, body, created_at) "
|
||||
"VALUES (?, ?, ?, ?)",
|
||||
(
|
||||
task_id,
|
||||
author.strip(),
|
||||
"Decomposed into "
|
||||
+ ", ".join(child_ids)
|
||||
+ ". Root will wake when all children complete.",
|
||||
now,
|
||||
),
|
||||
)
|
||||
_append_event(
|
||||
conn, task_id, "decomposed",
|
||||
{
|
||||
"child_ids": child_ids,
|
||||
"root_assignee": root_assignee,
|
||||
},
|
||||
)
|
||||
|
||||
# Outside the write_txn: promote parent-free children to 'ready'
|
||||
# so the dispatcher picks them up on its next tick. Same pattern
|
||||
# specify_triage_task uses.
|
||||
recompute_ready(conn)
|
||||
return child_ids
|
||||
|
||||
|
||||
def archive_task(conn: sqlite3.Connection, task_id: str) -> bool:
|
||||
with write_txn(conn):
|
||||
cur = conn.execute(
|
||||
|
|
@ -4024,6 +4199,7 @@ def _default_spawn(
|
|||
stderr=subprocess.STDOUT,
|
||||
env=env,
|
||||
start_new_session=True,
|
||||
creationflags=subprocess.CREATE_NO_WINDOW if _IS_WINDOWS else 0,
|
||||
)
|
||||
except FileNotFoundError:
|
||||
log_f.close()
|
||||
|
|
|
|||
440
hermes_cli/kanban_decompose.py
Normal file
440
hermes_cli/kanban_decompose.py
Normal file
|
|
@ -0,0 +1,440 @@
|
|||
"""Kanban decomposer — fan a triage task out into a graph of child tasks.
|
||||
|
||||
Invoked by ``hermes kanban decompose [task_id | --all]`` and the
|
||||
auto-decompose path in the gateway dispatcher loop. Reads the user's
|
||||
profile roster (with descriptions) and asks the auxiliary LLM to
|
||||
return a task graph in JSON. Then atomically creates the children,
|
||||
links them under the root, and flips the root ``triage -> todo``.
|
||||
|
||||
The root task stays alive and becomes the parent of every leaf child,
|
||||
so when the whole graph completes the root wakes back up — its
|
||||
assignee (the orchestrator profile) gets a chance to judge completion
|
||||
and add more tasks if the work isn't done yet.
|
||||
|
||||
Design notes
|
||||
------------
|
||||
|
||||
* Mirrors the shape of ``hermes_cli/kanban_specify.py``: lazy aux
|
||||
client import inside the function, lenient response parse, never
|
||||
raises on expected failure modes.
|
||||
|
||||
* The system prompt sees the *configured* profile roster — names plus
|
||||
descriptions plus the default fallback. Profiles without a
|
||||
description are still listed (with a note) so the orchestrator can
|
||||
match on name as a fallback, but the user has an obvious incentive
|
||||
to describe them.
|
||||
|
||||
* ``fanout=false`` collapses to the same effect as ``kanban specify``:
|
||||
we tighten the body and flip ``triage -> todo`` as a single task,
|
||||
no children created. This makes ``decompose`` a strict superset of
|
||||
``specify`` from the user's perspective.
|
||||
|
||||
* If the LLM picks an assignee that doesn't exist as a profile, we
|
||||
rewrite it to the configured ``default_assignee`` (or the default
|
||||
profile if unset). A child task NEVER ends up with ``assignee=None``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
from hermes_cli import kanban_db as kb
|
||||
from hermes_cli import profiles as profiles_mod
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_SYSTEM_PROMPT = """You are the Kanban decomposer for the Hermes Agent board.
|
||||
|
||||
A user dropped a rough idea into the Triage column. Your job is to break it
|
||||
into a small graph of concrete child tasks and route each one to the best-
|
||||
matching profile from the available roster.
|
||||
|
||||
You will be given:
|
||||
- The original task title and body
|
||||
- The list of available profiles (each with name + description)
|
||||
- The fallback "default_assignee" used when no profile fits
|
||||
|
||||
Output a single JSON object with this exact shape:
|
||||
|
||||
{
|
||||
"fanout": true,
|
||||
"rationale": "<one sentence on why this decomposition>",
|
||||
"tasks": [
|
||||
{
|
||||
"title": "<concrete task title, imperative voice, <= 80 chars>",
|
||||
"body": "<detailed spec for the worker on this child task>",
|
||||
"assignee": "<profile name from the roster, or null for default>",
|
||||
"parents": [<int>, ...]
|
||||
},
|
||||
...
|
||||
]
|
||||
}
|
||||
|
||||
Rules:
|
||||
- "parents" is a list of INDICES (0-based) into this same "tasks" list,
|
||||
expressing actual data dependencies. Tasks with no parents run in
|
||||
PARALLEL. Tasks with parents wait until every parent completes.
|
||||
- Prefer parallelism. If two tasks can be done independently, give
|
||||
them no parents so the dispatcher fans them out at once.
|
||||
- Use 2-6 tasks for normal work. Don't create 20 tiny tasks. Don't
|
||||
cram everything into 1 task.
|
||||
- Pick assignees from the roster by matching the task to the profile's
|
||||
DESCRIPTION (not just the name). When nothing matches well, use null
|
||||
and the system will route to the default_assignee.
|
||||
- Each child task body is what a fresh worker will read with no other
|
||||
context — be specific about goal, approach, and acceptance criteria.
|
||||
|
||||
When the task is genuinely a single unit of work (no useful decomposition),
|
||||
return:
|
||||
|
||||
{
|
||||
"fanout": false,
|
||||
"rationale": "<one sentence>",
|
||||
"title": "<tightened title>",
|
||||
"body": "<concrete spec for a single worker>"
|
||||
}
|
||||
|
||||
In that case the task stays as one work item, just with a tightened spec.
|
||||
|
||||
No preamble, no closing remarks, no code fences. Output only the JSON object.
|
||||
"""
|
||||
|
||||
|
||||
_USER_TEMPLATE = """Task id: {task_id}
|
||||
Title: {title}
|
||||
Body:
|
||||
{body}
|
||||
|
||||
Available profiles (assignees you may pick from):
|
||||
{roster}
|
||||
|
||||
Default assignee (used when no profile fits a task): {default_assignee}
|
||||
"""
|
||||
|
||||
|
||||
_FENCE_RE = re.compile(r"^```(?:json)?\s*|\s*```$", re.MULTILINE)
|
||||
|
||||
|
||||
@dataclass
|
||||
class DecomposeOutcome:
|
||||
"""Result of decomposing a single triage task."""
|
||||
|
||||
task_id: str
|
||||
ok: bool
|
||||
reason: str = ""
|
||||
fanout: bool = False
|
||||
child_ids: list[str] | None = None
|
||||
new_title: Optional[str] = None
|
||||
|
||||
|
||||
def _truncate(text: str, limit: int) -> str:
|
||||
if len(text) <= limit:
|
||||
return text
|
||||
return text[: limit - 1] + "…"
|
||||
|
||||
|
||||
def _extract_json_blob(raw: str) -> Optional[dict]:
|
||||
if not raw:
|
||||
return None
|
||||
stripped = _FENCE_RE.sub("", raw.strip())
|
||||
first = stripped.find("{")
|
||||
last = stripped.rfind("}")
|
||||
if first == -1 or last == -1 or last <= first:
|
||||
return None
|
||||
candidate = stripped[first : last + 1]
|
||||
try:
|
||||
val = json.loads(candidate)
|
||||
except (ValueError, json.JSONDecodeError):
|
||||
return None
|
||||
if not isinstance(val, dict):
|
||||
return None
|
||||
return val
|
||||
|
||||
|
||||
def _profile_author() -> str:
|
||||
"""Mirror of ``hermes_cli.kanban._profile_author``."""
|
||||
return (
|
||||
os.environ.get("HERMES_PROFILE")
|
||||
or os.environ.get("USER")
|
||||
or "decomposer"
|
||||
)
|
||||
|
||||
|
||||
def _load_config() -> dict:
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
return load_config() or {}
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
def _resolve_orchestrator_profile(cfg: dict) -> str:
|
||||
"""Resolve which profile owns decomposition.
|
||||
|
||||
Falls back to the active default profile when ``kanban.orchestrator_profile``
|
||||
is unset, so a task is never stranded for lack of an orchestrator.
|
||||
"""
|
||||
kanban_cfg = cfg.get("kanban", {}) if isinstance(cfg, dict) else {}
|
||||
explicit = (kanban_cfg.get("orchestrator_profile") or "").strip()
|
||||
if explicit:
|
||||
try:
|
||||
if profiles_mod.profile_exists(explicit):
|
||||
return explicit
|
||||
except Exception:
|
||||
pass
|
||||
# Fall back to the active default profile.
|
||||
try:
|
||||
return profiles_mod.get_active_profile_name() or "default"
|
||||
except Exception:
|
||||
return "default"
|
||||
|
||||
|
||||
def _resolve_default_assignee(cfg: dict) -> str:
|
||||
"""Resolve which profile catches child tasks the orchestrator can't route."""
|
||||
kanban_cfg = cfg.get("kanban", {}) if isinstance(cfg, dict) else {}
|
||||
explicit = (kanban_cfg.get("default_assignee") or "").strip()
|
||||
if explicit:
|
||||
try:
|
||||
if profiles_mod.profile_exists(explicit):
|
||||
return explicit
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
return profiles_mod.get_active_profile_name() or "default"
|
||||
except Exception:
|
||||
return "default"
|
||||
|
||||
|
||||
def _build_roster() -> tuple[list[dict], set[str]]:
|
||||
"""Return (roster_for_prompt, valid_assignee_names).
|
||||
|
||||
Each roster entry is ``{name, description, has_description}``. The
|
||||
valid-set is used after the LLM responds to rewrite invalid
|
||||
assignees to the default fallback.
|
||||
"""
|
||||
roster: list[dict] = []
|
||||
valid: set[str] = set()
|
||||
try:
|
||||
all_profiles = profiles_mod.list_profiles()
|
||||
except Exception as exc:
|
||||
logger.warning("decompose: failed to list profiles: %s", exc)
|
||||
return roster, valid
|
||||
for p in all_profiles:
|
||||
desc = (p.description or "").strip()
|
||||
roster.append({
|
||||
"name": p.name,
|
||||
"description": desc or f"(no description; profile named {p.name!r})",
|
||||
"has_description": bool(desc),
|
||||
})
|
||||
valid.add(p.name)
|
||||
return roster, valid
|
||||
|
||||
|
||||
def _format_roster(roster: list[dict]) -> str:
|
||||
if not roster:
|
||||
return " (no profiles installed — decomposer cannot route work)"
|
||||
lines = []
|
||||
for entry in roster:
|
||||
tag = "" if entry["has_description"] else " ⚠ undescribed"
|
||||
lines.append(f" - {entry['name']}{tag}: {entry['description']}")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def decompose_task(
|
||||
task_id: str,
|
||||
*,
|
||||
author: Optional[str] = None,
|
||||
timeout: Optional[int] = None,
|
||||
) -> DecomposeOutcome:
|
||||
"""Decompose a triage task into a graph of child tasks.
|
||||
|
||||
Returns an outcome describing what happened. Never raises for
|
||||
expected failure modes (task not in triage, no aux client
|
||||
configured, API error, malformed response, decomposer returned
|
||||
fanout=true with empty task list) — those surface via ``ok=False``.
|
||||
"""
|
||||
with kb.connect() as conn:
|
||||
task = kb.get_task(conn, task_id)
|
||||
if task is None:
|
||||
return DecomposeOutcome(task_id, False, "unknown task id")
|
||||
if task.status != "triage":
|
||||
return DecomposeOutcome(
|
||||
task_id, False, f"task is not in triage (status={task.status!r})"
|
||||
)
|
||||
|
||||
cfg = _load_config()
|
||||
orchestrator = _resolve_orchestrator_profile(cfg)
|
||||
default_assignee = _resolve_default_assignee(cfg)
|
||||
roster, valid_names = _build_roster()
|
||||
|
||||
try:
|
||||
from agent.auxiliary_client import ( # type: ignore
|
||||
get_auxiliary_extra_body,
|
||||
get_text_auxiliary_client,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("decompose: auxiliary client import failed: %s", exc)
|
||||
return DecomposeOutcome(task_id, False, "auxiliary client unavailable")
|
||||
|
||||
try:
|
||||
client, model = get_text_auxiliary_client("kanban_decomposer")
|
||||
except Exception as exc:
|
||||
logger.debug("decompose: get_text_auxiliary_client failed: %s", exc)
|
||||
return DecomposeOutcome(task_id, False, "auxiliary client unavailable")
|
||||
|
||||
if client is None or not model:
|
||||
return DecomposeOutcome(task_id, False, "no auxiliary client configured")
|
||||
|
||||
user_msg = _USER_TEMPLATE.format(
|
||||
task_id=task.id,
|
||||
title=_truncate(task.title or "", 400),
|
||||
body=_truncate(task.body or "(no body)", 4000),
|
||||
roster=_format_roster(roster),
|
||||
default_assignee=default_assignee,
|
||||
)
|
||||
|
||||
try:
|
||||
resp = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[
|
||||
{"role": "system", "content": _SYSTEM_PROMPT},
|
||||
{"role": "user", "content": user_msg},
|
||||
],
|
||||
temperature=0.3,
|
||||
max_tokens=4000,
|
||||
timeout=timeout or 180,
|
||||
extra_body=get_auxiliary_extra_body() or None,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.info(
|
||||
"decompose: API call failed for %s (%s)", task_id, exc,
|
||||
)
|
||||
return DecomposeOutcome(task_id, False, f"LLM error: {type(exc).__name__}")
|
||||
|
||||
try:
|
||||
raw = resp.choices[0].message.content or ""
|
||||
except Exception:
|
||||
raw = ""
|
||||
|
||||
parsed = _extract_json_blob(raw)
|
||||
if parsed is None:
|
||||
return DecomposeOutcome(task_id, False, "LLM returned malformed JSON")
|
||||
|
||||
fanout = bool(parsed.get("fanout"))
|
||||
audit_author = author or _profile_author()
|
||||
|
||||
if not fanout:
|
||||
# Fall back to single-task spec promotion (same effect as specify).
|
||||
new_title = parsed.get("title")
|
||||
new_body = parsed.get("body")
|
||||
title_val = new_title.strip() if isinstance(new_title, str) and new_title.strip() else None
|
||||
body_val = new_body if isinstance(new_body, str) and new_body.strip() else None
|
||||
if title_val is None and body_val is None:
|
||||
return DecomposeOutcome(
|
||||
task_id, False, "decomposer returned fanout=false with no title/body",
|
||||
)
|
||||
with kb.connect() as conn:
|
||||
ok = kb.specify_triage_task(
|
||||
conn,
|
||||
task_id,
|
||||
title=title_val,
|
||||
body=body_val,
|
||||
author=audit_author,
|
||||
)
|
||||
if not ok:
|
||||
return DecomposeOutcome(
|
||||
task_id, False, "task moved out of triage before promotion",
|
||||
)
|
||||
return DecomposeOutcome(
|
||||
task_id, True, "single task (no fanout)",
|
||||
fanout=False, new_title=title_val,
|
||||
)
|
||||
|
||||
raw_tasks = parsed.get("tasks") or []
|
||||
if not isinstance(raw_tasks, list) or not raw_tasks:
|
||||
return DecomposeOutcome(
|
||||
task_id, False, "decomposer returned fanout=true with empty tasks list",
|
||||
)
|
||||
|
||||
# Rewrite invalid assignees to the default fallback. Never leave a
|
||||
# task with assignee=None — the user explicitly does not want that.
|
||||
children: list[dict] = []
|
||||
for idx, entry in enumerate(raw_tasks):
|
||||
if not isinstance(entry, dict):
|
||||
return DecomposeOutcome(
|
||||
task_id, False, f"tasks[{idx}] is not an object",
|
||||
)
|
||||
title = entry.get("title")
|
||||
if not isinstance(title, str) or not title.strip():
|
||||
return DecomposeOutcome(
|
||||
task_id, False, f"tasks[{idx}].title is missing or empty",
|
||||
)
|
||||
body = entry.get("body")
|
||||
if not isinstance(body, str):
|
||||
body = ""
|
||||
assignee = entry.get("assignee")
|
||||
if not isinstance(assignee, str) or not assignee.strip():
|
||||
chosen = default_assignee
|
||||
elif assignee not in valid_names:
|
||||
logger.info(
|
||||
"decompose: task %s child %d picked unknown assignee %r — "
|
||||
"routing to default_assignee %r",
|
||||
task_id, idx, assignee, default_assignee,
|
||||
)
|
||||
chosen = default_assignee
|
||||
else:
|
||||
chosen = assignee
|
||||
parents = entry.get("parents") or []
|
||||
if not isinstance(parents, list):
|
||||
parents = []
|
||||
# Clean parent indices: drop non-int and out-of-range.
|
||||
clean_parents = [p for p in parents if isinstance(p, int) and 0 <= p < len(raw_tasks) and p != idx]
|
||||
children.append({
|
||||
"title": title.strip()[:200],
|
||||
"body": body.strip(),
|
||||
"assignee": chosen,
|
||||
"parents": clean_parents,
|
||||
})
|
||||
|
||||
try:
|
||||
with kb.connect() as conn:
|
||||
child_ids = kb.decompose_triage_task(
|
||||
conn,
|
||||
task_id,
|
||||
root_assignee=orchestrator,
|
||||
children=children,
|
||||
author=audit_author,
|
||||
)
|
||||
except ValueError as exc:
|
||||
return DecomposeOutcome(task_id, False, f"DB rejected graph: {exc}")
|
||||
except Exception as exc:
|
||||
logger.exception("decompose: DB error on task %s", task_id)
|
||||
return DecomposeOutcome(task_id, False, f"DB error: {type(exc).__name__}")
|
||||
|
||||
if child_ids is None:
|
||||
return DecomposeOutcome(
|
||||
task_id, False, "task moved out of triage before decomposition",
|
||||
)
|
||||
|
||||
return DecomposeOutcome(
|
||||
task_id, True, f"decomposed into {len(child_ids)} children",
|
||||
fanout=True, child_ids=child_ids,
|
||||
)
|
||||
|
||||
|
||||
def list_triage_ids(*, tenant: Optional[str] = None) -> list[str]:
|
||||
"""Return task ids currently in the triage column."""
|
||||
with kb.connect() as conn:
|
||||
rows = kb.list_tasks(
|
||||
conn,
|
||||
status="triage",
|
||||
tenant=tenant,
|
||||
limit=1000,
|
||||
)
|
||||
return [row.id for row in rows]
|
||||
|
|
@ -9082,6 +9082,7 @@ def cmd_profile(args):
|
|||
clone_config=clone,
|
||||
no_alias=no_alias,
|
||||
no_skills=no_skills,
|
||||
description=getattr(args, "description", None),
|
||||
)
|
||||
print(f"\nProfile '{name}' created at {profile_dir}")
|
||||
|
||||
|
|
@ -9181,6 +9182,107 @@ def cmd_profile(args):
|
|||
print(f"Error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
elif action == "describe":
|
||||
# Read or write a profile's description. The description is
|
||||
# consumed by the kanban decomposer to route tasks based on
|
||||
# role instead of name alone.
|
||||
from hermes_cli import profiles as _profiles_mod
|
||||
|
||||
all_flag = bool(getattr(args, "all_missing", False))
|
||||
auto_flag = bool(getattr(args, "auto", False))
|
||||
overwrite_flag = bool(getattr(args, "overwrite", False))
|
||||
text_value = getattr(args, "text", None)
|
||||
name = getattr(args, "profile_name", None)
|
||||
|
||||
if all_flag and not auto_flag:
|
||||
print("profile describe: --all requires --auto", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
if all_flag and (text_value or name):
|
||||
print(
|
||||
"profile describe: --all is mutually exclusive with a profile name / --text",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(2)
|
||||
if not all_flag and not name:
|
||||
print("profile describe: profile name is required (or --all --auto)", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
if text_value and auto_flag:
|
||||
print(
|
||||
"profile describe: --text is mutually exclusive with --auto",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(2)
|
||||
|
||||
# Show current description if no operation requested.
|
||||
if name and not text_value and not auto_flag:
|
||||
try:
|
||||
if _profiles_mod.normalize_profile_name(name) == "default":
|
||||
from hermes_constants import get_hermes_home as _hh
|
||||
profile_dir = Path(_hh())
|
||||
else:
|
||||
profile_dir = _profiles_mod.get_profile_dir(name)
|
||||
except Exception as exc:
|
||||
print(f"Error: {exc}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
if not profile_dir.is_dir():
|
||||
print(f"Error: profile '{name}' not found", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
meta = _profiles_mod.read_profile_meta(profile_dir)
|
||||
desc = meta.get("description") or ""
|
||||
if not desc:
|
||||
print(f"(no description set for '{name}')")
|
||||
else:
|
||||
tag = "[auto] " if meta.get("description_auto") else ""
|
||||
print(f"{tag}{desc}")
|
||||
sys.exit(0)
|
||||
|
||||
# --text path: just write the user-authored description.
|
||||
if text_value:
|
||||
try:
|
||||
if _profiles_mod.normalize_profile_name(name) == "default":
|
||||
from hermes_constants import get_hermes_home as _hh
|
||||
profile_dir = Path(_hh())
|
||||
else:
|
||||
profile_dir = _profiles_mod.get_profile_dir(name)
|
||||
_profiles_mod.write_profile_meta(
|
||||
profile_dir,
|
||||
description=text_value,
|
||||
description_auto=False,
|
||||
)
|
||||
print(f"Description updated for '{name}'.")
|
||||
except Exception as exc:
|
||||
print(f"Error: {exc}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
sys.exit(0)
|
||||
|
||||
# --auto path: invoke the LLM describer.
|
||||
from hermes_cli import profile_describer as _pd
|
||||
|
||||
if all_flag:
|
||||
targets = _pd.list_describable_profiles(missing_only=True)
|
||||
if not targets:
|
||||
print("All profiles already have descriptions.")
|
||||
sys.exit(0)
|
||||
else:
|
||||
targets = [name]
|
||||
|
||||
ok_count = 0
|
||||
fail_count = 0
|
||||
for tgt in targets:
|
||||
outcome = _pd.describe_profile(tgt, overwrite=overwrite_flag)
|
||||
if outcome.ok:
|
||||
ok_count += 1
|
||||
print(f"Described '{outcome.profile_name}': {outcome.description}")
|
||||
else:
|
||||
fail_count += 1
|
||||
print(
|
||||
f"profile describe {outcome.profile_name}: {outcome.reason}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
if not all_flag:
|
||||
sys.exit(0 if ok_count == 1 else 1)
|
||||
sys.exit(0 if ok_count > 0 else 1)
|
||||
|
||||
elif action == "show":
|
||||
name = args.profile_name
|
||||
from hermes_cli.profiles import (
|
||||
|
|
@ -9684,8 +9786,8 @@ _BUILTIN_SUBCOMMANDS = frozenset(
|
|||
"config", "cron", "curator", "dashboard", "debug", "doctor",
|
||||
"dump", "fallback", "gateway", "hooks", "import", "insights",
|
||||
"kanban", "login", "logout", "logs", "lsp", "mcp", "memory",
|
||||
"model", "pairing", "plugins", "postinstall", "profile", "proxy", "send",
|
||||
"sessions", "setup",
|
||||
"model", "pairing", "plugins", "postinstall", "profile", "proxy",
|
||||
"send", "sessions", "setup",
|
||||
"skills", "slack", "status", "tools", "uninstall", "update",
|
||||
"version", "webhook", "whatsapp", "chat",
|
||||
# Help-ish invocations — plugin commands not being listed in
|
||||
|
|
@ -12076,6 +12178,13 @@ Examples:
|
|||
action="store_true",
|
||||
help="Create an empty profile with no bundled skills (opts out of `hermes update` skill sync)",
|
||||
)
|
||||
profile_create.add_argument(
|
||||
"--description",
|
||||
default=None,
|
||||
help="One- or two-sentence description of what this profile is good at. "
|
||||
"Used by the kanban decomposer to route tasks based on role instead "
|
||||
"of profile name alone. Skip and add later via `hermes profile describe`.",
|
||||
)
|
||||
|
||||
profile_delete = profile_subparsers.add_parser("delete", help="Delete a profile")
|
||||
profile_delete.add_argument("profile_name", help="Profile to delete")
|
||||
|
|
@ -12083,6 +12192,40 @@ Examples:
|
|||
"-y", "--yes", action="store_true", help="Skip confirmation prompt"
|
||||
)
|
||||
|
||||
profile_describe = profile_subparsers.add_parser(
|
||||
"describe",
|
||||
help="Read or set a profile's description (used by the kanban orchestrator)",
|
||||
)
|
||||
profile_describe.add_argument(
|
||||
"profile_name",
|
||||
nargs="?",
|
||||
default=None,
|
||||
help="Profile to describe (omit + use --all --auto to sweep)",
|
||||
)
|
||||
profile_describe.add_argument(
|
||||
"--text",
|
||||
default=None,
|
||||
help="Set description to this exact text (overwrites any existing description)",
|
||||
)
|
||||
profile_describe.add_argument(
|
||||
"--auto",
|
||||
action="store_true",
|
||||
help="Auto-generate description via the auxiliary LLM "
|
||||
"(uses auxiliary.profile_describer)",
|
||||
)
|
||||
profile_describe.add_argument(
|
||||
"--overwrite",
|
||||
action="store_true",
|
||||
help="With --auto, replace user-authored descriptions too (default: only "
|
||||
"fill in missing or previously-auto descriptions)",
|
||||
)
|
||||
profile_describe.add_argument(
|
||||
"--all",
|
||||
dest="all_missing",
|
||||
action="store_true",
|
||||
help="With --auto, run on every profile missing a description",
|
||||
)
|
||||
|
||||
profile_show = profile_subparsers.add_parser("show", help="Show profile details")
|
||||
profile_show.add_argument("profile_name", help="Profile to show")
|
||||
|
||||
|
|
|
|||
|
|
@ -1688,7 +1688,26 @@ def list_authenticated_providers(
|
|||
continue
|
||||
# Live model discovery from custom provider endpoints (matches
|
||||
# Section 3 behavior for user ``providers:`` entries).
|
||||
if api_url and api_key:
|
||||
# Also probes when no api_key is set (e.g. local llama.cpp /
|
||||
# Ollama servers) — the /models endpoint often works without
|
||||
# auth. The CLI's _model_flow_named_custom always probes, so
|
||||
# the Telegram/Discord picker should do the same for parity.
|
||||
# Live-discovery policy:
|
||||
# - With an api_key, the user has explicitly opted into the
|
||||
# endpoint and live /models is the source of truth — replace
|
||||
# the (possibly partial) ``models:`` subset configured for
|
||||
# context-length overrides with the full live catalog.
|
||||
# This is the Bifrost / aggregator-gateway case.
|
||||
# - Without an api_key but with an explicit ``models:`` list
|
||||
# (or top-level ``model:``), the user is narrowing a public
|
||||
# endpoint to a specific subset (e.g. ollama.com /v1/models
|
||||
# returns 35 models but the user only wants 4). Preserve the
|
||||
# explicit list and skip live discovery.
|
||||
# - Without an api_key AND no explicit models, fall through to
|
||||
# live discovery so bare-endpoint custom providers (local
|
||||
# llama.cpp / Ollama servers) still appear populated.
|
||||
should_probe = bool(api_url) and (bool(api_key) or not grp["models"])
|
||||
if should_probe:
|
||||
try:
|
||||
from hermes_cli.models import fetch_api_models
|
||||
|
||||
|
|
|
|||
|
|
@ -608,6 +608,38 @@ class PluginContext:
|
|||
self.manifest.name, provider.name,
|
||||
)
|
||||
|
||||
# -- browser provider registration ---------------------------------------
|
||||
|
||||
def register_browser_provider(self, provider) -> None:
|
||||
"""Register a cloud browser backend.
|
||||
|
||||
``provider`` must be an instance of
|
||||
:class:`agent.browser_provider.BrowserProvider`. The
|
||||
``provider.name`` attribute is what ``browser.cloud_provider`` in
|
||||
``config.yaml`` matches against when routing cloud-mode
|
||||
``browser_*`` tool calls.
|
||||
|
||||
Mirrors :meth:`register_web_search_provider` exactly — same
|
||||
registration shape, same gating, same logging. The browser
|
||||
subsystem's dispatcher (:func:`tools.browser_tool._get_cloud_provider`)
|
||||
consults the registry built up by these calls.
|
||||
"""
|
||||
from agent.browser_provider import BrowserProvider
|
||||
from agent.browser_registry import register_provider as _register_browser_provider
|
||||
|
||||
if not isinstance(provider, BrowserProvider):
|
||||
logger.warning(
|
||||
"Plugin '%s' tried to register a browser provider that does "
|
||||
"not inherit from BrowserProvider. Ignoring.",
|
||||
self.manifest.name,
|
||||
)
|
||||
return
|
||||
_register_browser_provider(provider)
|
||||
logger.info(
|
||||
"Plugin '%s' registered browser provider: %s",
|
||||
self.manifest.name, provider.name,
|
||||
)
|
||||
|
||||
# -- platform adapter registration ---------------------------------------
|
||||
|
||||
def register_platform(
|
||||
|
|
|
|||
299
hermes_cli/profile_describer.py
Normal file
299
hermes_cli/profile_describer.py
Normal file
|
|
@ -0,0 +1,299 @@
|
|||
"""Profile describer — auto-generate ``description`` for a profile.
|
||||
|
||||
Used by ``hermes profile describe <name> --auto`` and the dashboard's
|
||||
"auto-generate description" button. Reads the profile's installed
|
||||
skills, model+provider, name, and optionally a small slice of memory,
|
||||
then asks the auxiliary LLM to produce a 1-2 sentence description of
|
||||
what the profile is good at.
|
||||
|
||||
Result is written to ``<profile_dir>/profile.yaml`` with
|
||||
``description_auto: true`` so the dashboard can surface a "review"
|
||||
badge. User can edit afterward to confirm.
|
||||
|
||||
Design notes
|
||||
------------
|
||||
- Mirrors the shape of ``hermes_cli/kanban_specify.py``: lazy aux
|
||||
client import inside the function, lenient response parse, never
|
||||
raises on expected failure modes.
|
||||
- Reads at most ``MAX_SKILLS_FOR_PROMPT`` skill names to keep the
|
||||
prompt bounded. No skill body — names + categories are enough
|
||||
signal and avoid blowing context on profiles with 100+ skills.
|
||||
- Memory is intentionally NOT read here. Memories are personal and
|
||||
the orchestrator routes work to a *role* not a *biography*. If we
|
||||
find later that memory adds signal we can wire it; for now,
|
||||
skills + name + model is plenty.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from hermes_cli import profiles as profiles_mod
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Cap on how many skill names we feed the LLM. Profiles with 200+
|
||||
# skills (uncommon but possible) would blow context otherwise. The cap
|
||||
# is per-category — see _collect_skills.
|
||||
MAX_SKILLS_FOR_PROMPT = 60
|
||||
|
||||
|
||||
_SYSTEM_PROMPT = """You are a profile-describer for the Hermes Agent kanban board.
|
||||
|
||||
A user runs multiple "profiles" — distinct agent identities, each with their
|
||||
own skills, model, and configuration. The kanban board's orchestrator routes
|
||||
work to whichever profile best fits each task. To do that well, every
|
||||
profile needs a short, concrete description of what it's good at.
|
||||
|
||||
You are given a profile's:
|
||||
- Name
|
||||
- Model / provider
|
||||
- List of installed skill names (a strong signal of role / domain)
|
||||
|
||||
Produce a single JSON object with exactly one key:
|
||||
|
||||
{
|
||||
"description": "<1-2 sentence description, plain prose, no preamble>"
|
||||
}
|
||||
|
||||
Rules:
|
||||
- The description is what an orchestrator will read to decide whether to
|
||||
route a task here. Lead with the profile's strongest capability.
|
||||
- Stay concrete. Bad: "an AI agent that helps users."
|
||||
Good: "Reads and modifies Python codebases — runs tests,
|
||||
refactors functions, opens GitHub PRs."
|
||||
- 1-2 sentences, <= 280 characters total.
|
||||
- Never invent capabilities the skills don't suggest.
|
||||
- Never write "Hermes Agent profile" or other meta-narration.
|
||||
- No code fences, no preamble, no closing remarks. Output only JSON.
|
||||
"""
|
||||
|
||||
|
||||
_USER_TEMPLATE = """Profile name: {name}
|
||||
Default model: {model}
|
||||
Provider: {provider}
|
||||
Installed skill count: {skill_count}
|
||||
Notable skills (up to {skill_cap}):
|
||||
{skill_list}
|
||||
"""
|
||||
|
||||
|
||||
_FENCE_RE = re.compile(r"^```(?:json)?\s*|\s*```$", re.MULTILINE)
|
||||
|
||||
|
||||
@dataclass
|
||||
class DescribeOutcome:
|
||||
"""Result of describing a single profile."""
|
||||
|
||||
profile_name: str
|
||||
ok: bool
|
||||
reason: str = ""
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
def _collect_skills(profile_dir: Path) -> list[str]:
|
||||
"""Return a stable, capped list of skill names for the prompt.
|
||||
|
||||
Format: ``category/skill_name`` where category is the immediate
|
||||
subdir under ``skills/`` (e.g. ``devops``, ``research``). Skills
|
||||
that live directly under ``skills/`` show as bare ``skill_name``.
|
||||
"""
|
||||
skills_dir = profile_dir / "skills"
|
||||
if not skills_dir.is_dir():
|
||||
return []
|
||||
names: list[str] = []
|
||||
for md in skills_dir.rglob("SKILL.md"):
|
||||
path_str = str(md)
|
||||
if "/.hub/" in path_str or "/.git/" in path_str:
|
||||
continue
|
||||
try:
|
||||
rel = md.relative_to(skills_dir)
|
||||
except ValueError:
|
||||
continue
|
||||
parts = rel.parts[:-1] # drop SKILL.md filename
|
||||
if not parts:
|
||||
continue
|
||||
# parts[-1] is the skill dir name; parts[:-1] is the category path
|
||||
if len(parts) == 1:
|
||||
names.append(parts[0])
|
||||
else:
|
||||
names.append(f"{parts[0]}/{parts[-1]}")
|
||||
names.sort()
|
||||
# Keep within prompt budget. Skills earlier in alphabet aren't more
|
||||
# important — we'll let the LLM see a sample. Pick evenly-spaced
|
||||
# entries instead of just the head so a profile with skills A..Z
|
||||
# doesn't get described as "starts with A".
|
||||
if len(names) <= MAX_SKILLS_FOR_PROMPT:
|
||||
return names
|
||||
step = len(names) / MAX_SKILLS_FOR_PROMPT
|
||||
sampled = [names[int(i * step)] for i in range(MAX_SKILLS_FOR_PROMPT)]
|
||||
return sampled
|
||||
|
||||
|
||||
def _extract_json_blob(raw: str) -> Optional[dict]:
|
||||
if not raw:
|
||||
return None
|
||||
stripped = _FENCE_RE.sub("", raw.strip())
|
||||
first = stripped.find("{")
|
||||
last = stripped.rfind("}")
|
||||
if first == -1 or last == -1 or last <= first:
|
||||
return None
|
||||
candidate = stripped[first : last + 1]
|
||||
try:
|
||||
val = json.loads(candidate)
|
||||
except (ValueError, json.JSONDecodeError):
|
||||
return None
|
||||
if not isinstance(val, dict):
|
||||
return None
|
||||
return val
|
||||
|
||||
|
||||
def describe_profile(
|
||||
profile_name: str,
|
||||
*,
|
||||
overwrite: bool = False,
|
||||
timeout: Optional[int] = None,
|
||||
) -> DescribeOutcome:
|
||||
"""Auto-generate a description for one profile.
|
||||
|
||||
Returns an outcome describing what happened. Never raises for
|
||||
expected failure modes (profile missing, no aux client configured,
|
||||
API error, malformed response) — those surface via ``ok=False`` so
|
||||
a sweep can continue past individual failures.
|
||||
|
||||
``overwrite`` controls whether an existing user-authored description
|
||||
is replaced. By default we refuse to overwrite a description with
|
||||
``description_auto: false`` to protect curated text. Auto-generated
|
||||
descriptions (``description_auto: true``) are always replaceable.
|
||||
"""
|
||||
canon = profiles_mod.normalize_profile_name(profile_name)
|
||||
if not profiles_mod.profile_exists(canon):
|
||||
# Special case: "default" exists as a virtual profile name
|
||||
# mapped to the default home dir. profile_exists() handles it.
|
||||
return DescribeOutcome(canon, False, "profile not found")
|
||||
|
||||
try:
|
||||
if canon == "default":
|
||||
from hermes_constants import get_hermes_home # type: ignore
|
||||
profile_dir = Path(get_hermes_home())
|
||||
else:
|
||||
profile_dir = profiles_mod.get_profile_dir(canon)
|
||||
except Exception as exc:
|
||||
return DescribeOutcome(canon, False, f"cannot resolve profile dir: {exc}")
|
||||
|
||||
# Honor curated descriptions unless --overwrite.
|
||||
existing = profiles_mod.read_profile_meta(profile_dir)
|
||||
if existing.get("description") and not existing.get("description_auto") and not overwrite:
|
||||
return DescribeOutcome(
|
||||
canon,
|
||||
False,
|
||||
"profile already has a user-authored description "
|
||||
"(use --overwrite to replace)",
|
||||
)
|
||||
|
||||
skill_names = _collect_skills(profile_dir)
|
||||
skill_list = "\n".join(f" - {n}" for n in skill_names) or " (no skills installed)"
|
||||
skill_count = sum(
|
||||
1 for _ in (profile_dir / "skills").rglob("SKILL.md")
|
||||
if "/.hub/" not in str(_) and "/.git/" not in str(_)
|
||||
) if (profile_dir / "skills").is_dir() else 0
|
||||
|
||||
# Read model + provider from the profile's config.
|
||||
try:
|
||||
model, provider = profiles_mod._read_config_model(profile_dir)
|
||||
except Exception:
|
||||
model, provider = None, None
|
||||
|
||||
try:
|
||||
from agent.auxiliary_client import ( # type: ignore
|
||||
get_auxiliary_extra_body,
|
||||
get_text_auxiliary_client,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("describe: auxiliary client import failed: %s", exc)
|
||||
return DescribeOutcome(canon, False, "auxiliary client unavailable")
|
||||
|
||||
try:
|
||||
client, aux_model = get_text_auxiliary_client("profile_describer")
|
||||
except Exception as exc:
|
||||
logger.debug("describe: get_text_auxiliary_client failed: %s", exc)
|
||||
return DescribeOutcome(canon, False, "auxiliary client unavailable")
|
||||
|
||||
if client is None or not aux_model:
|
||||
return DescribeOutcome(canon, False, "no auxiliary client configured")
|
||||
|
||||
user_msg = _USER_TEMPLATE.format(
|
||||
name=canon,
|
||||
model=(model or "(unset)"),
|
||||
provider=(provider or "(unset)"),
|
||||
skill_count=skill_count,
|
||||
skill_cap=MAX_SKILLS_FOR_PROMPT,
|
||||
skill_list=skill_list,
|
||||
)
|
||||
|
||||
try:
|
||||
resp = client.chat.completions.create(
|
||||
model=aux_model,
|
||||
messages=[
|
||||
{"role": "system", "content": _SYSTEM_PROMPT},
|
||||
{"role": "user", "content": user_msg},
|
||||
],
|
||||
temperature=0.3,
|
||||
max_tokens=400,
|
||||
timeout=timeout or 60,
|
||||
extra_body=get_auxiliary_extra_body() or None,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.info("describe: API call failed for %s (%s)", canon, exc)
|
||||
return DescribeOutcome(canon, False, f"LLM error: {type(exc).__name__}")
|
||||
|
||||
try:
|
||||
raw = resp.choices[0].message.content or ""
|
||||
except Exception:
|
||||
raw = ""
|
||||
|
||||
parsed = _extract_json_blob(raw)
|
||||
if parsed is None:
|
||||
# Fall back: take the raw text trimmed to one paragraph.
|
||||
text = raw.strip().split("\n\n", 1)[0]
|
||||
if not text:
|
||||
return DescribeOutcome(canon, False, "LLM returned an empty response")
|
||||
description = text[:280]
|
||||
else:
|
||||
val = parsed.get("description")
|
||||
if not isinstance(val, str) or not val.strip():
|
||||
return DescribeOutcome(
|
||||
canon, False, "LLM response missing 'description' field"
|
||||
)
|
||||
description = val.strip()[:280]
|
||||
|
||||
try:
|
||||
profiles_mod.write_profile_meta(
|
||||
profile_dir,
|
||||
description=description,
|
||||
description_auto=True,
|
||||
)
|
||||
except Exception as exc:
|
||||
return DescribeOutcome(canon, False, f"failed to write profile.yaml: {exc}")
|
||||
|
||||
return DescribeOutcome(canon, True, "described", description=description)
|
||||
|
||||
|
||||
def list_describable_profiles(*, missing_only: bool = True) -> list[str]:
|
||||
"""Return profile names that can be described.
|
||||
|
||||
``missing_only=True`` (default) returns only profiles without a
|
||||
description. ``missing_only=False`` returns every profile.
|
||||
"""
|
||||
out: list[str] = []
|
||||
for p in profiles_mod.list_profiles():
|
||||
if missing_only and (p.description or "").strip() and not p.description_auto:
|
||||
continue
|
||||
out.append(p.name)
|
||||
return out
|
||||
|
|
@ -412,6 +412,17 @@ class ProfileInfo:
|
|||
distribution_name: Optional[str] = None
|
||||
distribution_version: Optional[str] = None
|
||||
distribution_source: Optional[str] = None
|
||||
# Free-form description (1-2 sentences) of what this profile is good
|
||||
# at. Persisted in ``<profile_dir>/profile.yaml``. Empty when the
|
||||
# user has not described the profile (legacy profiles, fresh
|
||||
# installs). Surfaced to the kanban decomposer so it can route work
|
||||
# to the right profile based on role rather than name alone.
|
||||
description: str = ""
|
||||
# When True, ``description`` was auto-generated by the LLM
|
||||
# describer and has not been confirmed by the user. The dashboard
|
||||
# surfaces a "review" badge in this case so the user can edit or
|
||||
# accept.
|
||||
description_auto: bool = False
|
||||
|
||||
|
||||
def _read_distribution_meta(profile_dir: Path) -> tuple:
|
||||
|
|
@ -479,6 +490,82 @@ def _count_skills(profile_dir: Path) -> int:
|
|||
return count
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# profile.yaml — per-profile metadata (description, role, etc.)
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# We keep this file deliberately tiny and separate from the profile's
|
||||
# ``config.yaml``. ``config.yaml`` is the user-facing Hermes config
|
||||
# (~5000 lines of defaults); ``profile.yaml`` is metadata ABOUT the
|
||||
# profile itself (its role, who described it). Mixing them makes both
|
||||
# harder to read.
|
||||
#
|
||||
# Missing file -> empty defaults; never an error. The kanban decomposer
|
||||
# tolerates empty descriptions and just falls back to the profile name.
|
||||
|
||||
|
||||
def _profile_yaml_path(profile_dir: Path) -> Path:
|
||||
return profile_dir / "profile.yaml"
|
||||
|
||||
|
||||
def read_profile_meta(profile_dir: Path) -> dict:
|
||||
"""Read ``<profile_dir>/profile.yaml`` and return a dict.
|
||||
|
||||
Returns ``{"description": "", "description_auto": False}`` when the
|
||||
file is missing or unreadable. Never raises — a corrupt
|
||||
profile.yaml on an unrelated profile must not break
|
||||
``hermes profile list``.
|
||||
"""
|
||||
path = _profile_yaml_path(profile_dir)
|
||||
if not path.is_file():
|
||||
return {"description": "", "description_auto": False}
|
||||
try:
|
||||
import yaml
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
data = yaml.safe_load(f) or {}
|
||||
except Exception:
|
||||
return {"description": "", "description_auto": False}
|
||||
if not isinstance(data, dict):
|
||||
return {"description": "", "description_auto": False}
|
||||
return {
|
||||
"description": str(data.get("description") or "").strip(),
|
||||
"description_auto": bool(data.get("description_auto", False)),
|
||||
}
|
||||
|
||||
|
||||
def write_profile_meta(
|
||||
profile_dir: Path,
|
||||
*,
|
||||
description: Optional[str] = None,
|
||||
description_auto: Optional[bool] = None,
|
||||
) -> None:
|
||||
"""Update ``<profile_dir>/profile.yaml`` in place.
|
||||
|
||||
Only the explicitly passed fields are overwritten; unspecified
|
||||
fields preserve existing values. Creates the file if missing.
|
||||
Profile directory itself must exist.
|
||||
"""
|
||||
if not profile_dir.is_dir():
|
||||
raise FileNotFoundError(f"profile directory does not exist: {profile_dir}")
|
||||
import yaml
|
||||
path = _profile_yaml_path(profile_dir)
|
||||
existing: dict = {}
|
||||
if path.is_file():
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
loaded = yaml.safe_load(f) or {}
|
||||
if isinstance(loaded, dict):
|
||||
existing = loaded
|
||||
except Exception:
|
||||
existing = {}
|
||||
if description is not None:
|
||||
existing["description"] = description.strip()
|
||||
if description_auto is not None:
|
||||
existing["description_auto"] = bool(description_auto)
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
yaml.safe_dump(existing, f, sort_keys=False, default_flow_style=False)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CRUD operations
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -493,6 +580,7 @@ def list_profiles() -> List[ProfileInfo]:
|
|||
if default_home.is_dir():
|
||||
model, provider = _read_config_model(default_home)
|
||||
dist_name, dist_version, dist_source = _read_distribution_meta(default_home)
|
||||
meta = read_profile_meta(default_home)
|
||||
profiles.append(ProfileInfo(
|
||||
name="default",
|
||||
path=default_home,
|
||||
|
|
@ -505,6 +593,8 @@ def list_profiles() -> List[ProfileInfo]:
|
|||
distribution_name=dist_name,
|
||||
distribution_version=dist_version,
|
||||
distribution_source=dist_source,
|
||||
description=meta.get("description", ""),
|
||||
description_auto=meta.get("description_auto", False),
|
||||
))
|
||||
|
||||
# Named profiles
|
||||
|
|
@ -519,6 +609,7 @@ def list_profiles() -> List[ProfileInfo]:
|
|||
model, provider = _read_config_model(entry)
|
||||
alias_path = wrapper_dir / name
|
||||
dist_name, dist_version, dist_source = _read_distribution_meta(entry)
|
||||
meta = read_profile_meta(entry)
|
||||
profiles.append(ProfileInfo(
|
||||
name=name,
|
||||
path=entry,
|
||||
|
|
@ -532,6 +623,8 @@ def list_profiles() -> List[ProfileInfo]:
|
|||
distribution_name=dist_name,
|
||||
distribution_version=dist_version,
|
||||
distribution_source=dist_source,
|
||||
description=meta.get("description", ""),
|
||||
description_auto=meta.get("description_auto", False),
|
||||
))
|
||||
|
||||
return profiles
|
||||
|
|
@ -544,6 +637,7 @@ def create_profile(
|
|||
clone_config: bool = False,
|
||||
no_alias: bool = False,
|
||||
no_skills: bool = False,
|
||||
description: Optional[str] = None,
|
||||
) -> Path:
|
||||
"""Create a new profile directory.
|
||||
|
||||
|
|
@ -667,6 +761,19 @@ def create_profile(
|
|||
except OSError:
|
||||
pass # best-effort — the feature still works via the empty skills/ dir
|
||||
|
||||
# Persist description if the caller provided one. Done last so a
|
||||
# partial-create failure doesn't strand a description file in an
|
||||
# incomplete profile.
|
||||
if description and description.strip():
|
||||
try:
|
||||
write_profile_meta(
|
||||
profile_dir,
|
||||
description=description.strip(),
|
||||
description_auto=False,
|
||||
)
|
||||
except Exception:
|
||||
pass # non-fatal — user can describe later with `hermes profile describe`
|
||||
|
||||
return profile_dir
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -81,6 +81,21 @@ class UpstreamAdapter(ABC):
|
|||
refresh fails. The proxy will return 401 to the client.
|
||||
"""
|
||||
|
||||
def get_retry_credential(
|
||||
self,
|
||||
*,
|
||||
failed_credential: UpstreamCredential,
|
||||
status_code: int,
|
||||
) -> Optional[UpstreamCredential]:
|
||||
"""Return an alternate credential after an upstream auth failure.
|
||||
|
||||
The default is no retry. Providers can override this for one-shot
|
||||
fallback paths, such as switching from a preferred token type to a
|
||||
legacy bearer after the upstream rejects the first request.
|
||||
"""
|
||||
_ = failed_credential, status_code
|
||||
return None
|
||||
|
||||
def describe(self) -> str:
|
||||
"""One-line status summary for ``proxy status``."""
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -1,12 +1,13 @@
|
|||
"""Nous Portal upstream adapter.
|
||||
|
||||
Reads the user's Nous OAuth state from ``~/.hermes/auth.json``, refreshes
|
||||
the access token and mints a fresh agent key when needed, and exposes the
|
||||
upstream base URL plus minted bearer for the proxy server to forward to.
|
||||
Reads the user's Nous OAuth state from ``~/.hermes/auth.json`` through the
|
||||
shared runtime resolver, refreshes the access token and resolves the
|
||||
``agent_key`` compatibility credential when needed, then exposes the upstream
|
||||
base URL plus bearer for the proxy server to forward to.
|
||||
|
||||
The minted ``agent_key`` (not the OAuth ``access_token``) is what
|
||||
``inference-api.nousresearch.com`` accepts as a bearer. The refresh helper
|
||||
already handles both — see :func:`hermes_cli.auth.refresh_nous_oauth_from_state`.
|
||||
The ``agent_key`` field may hold either a NAS invoke JWT or the legacy
|
||||
opaque session key. The refresh helper handles both — see
|
||||
:func:`hermes_cli.auth.resolve_nous_runtime_credentials`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -16,11 +17,18 @@ import threading
|
|||
from typing import Any, Dict, FrozenSet, Optional
|
||||
|
||||
from hermes_cli.auth import (
|
||||
AuthError,
|
||||
DEFAULT_NOUS_INFERENCE_URL,
|
||||
NOUS_INFERENCE_AUTH_MODE_AUTO,
|
||||
NOUS_INFERENCE_AUTH_MODE_LEGACY,
|
||||
_load_auth_store,
|
||||
_auth_store_lock,
|
||||
_is_terminal_nous_refresh_error,
|
||||
_quarantine_nous_oauth_state,
|
||||
_quarantine_nous_pool_entries,
|
||||
_save_auth_store,
|
||||
_write_shared_nous_state,
|
||||
refresh_nous_oauth_from_state,
|
||||
resolve_nous_runtime_credentials,
|
||||
)
|
||||
from hermes_cli.proxy.adapters.base import UpstreamAdapter, UpstreamCredential
|
||||
|
||||
|
|
@ -43,9 +51,8 @@ class NousPortalAdapter(UpstreamAdapter):
|
|||
"""Proxy upstream for the Nous Portal inference API."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
# Lock guards _load → refresh → _save against parallel proxy requests
|
||||
# racing to refresh expired tokens. Refresh itself is HTTP, so we
|
||||
# hold the lock across the network call (brief; OAuth refresh is fast).
|
||||
# Serialize proxy requests in this process; cross-process token refresh
|
||||
# and persistence are handled by resolve_nous_runtime_credentials().
|
||||
self._lock = threading.Lock()
|
||||
|
||||
@property
|
||||
|
|
@ -72,6 +79,26 @@ class NousPortalAdapter(UpstreamAdapter):
|
|||
)
|
||||
|
||||
def get_credential(self) -> UpstreamCredential:
|
||||
return self._get_credential(
|
||||
inference_auth_mode=NOUS_INFERENCE_AUTH_MODE_AUTO,
|
||||
)
|
||||
|
||||
def get_retry_credential(
|
||||
self,
|
||||
*,
|
||||
failed_credential: UpstreamCredential,
|
||||
status_code: int,
|
||||
) -> Optional[UpstreamCredential]:
|
||||
if status_code != 401:
|
||||
return None
|
||||
if failed_credential.bearer.count(".") != 2:
|
||||
return None
|
||||
logger.info("proxy: Nous upstream rejected bearer; retrying with legacy session key")
|
||||
return self._get_credential(
|
||||
inference_auth_mode=NOUS_INFERENCE_AUTH_MODE_LEGACY,
|
||||
)
|
||||
|
||||
def _get_credential(self, *, inference_auth_mode: str) -> UpstreamCredential:
|
||||
with self._lock:
|
||||
state = self._read_state()
|
||||
if state is None:
|
||||
|
|
@ -80,28 +107,43 @@ class NousPortalAdapter(UpstreamAdapter):
|
|||
)
|
||||
|
||||
try:
|
||||
refreshed = refresh_nous_oauth_from_state(state)
|
||||
refreshed = resolve_nous_runtime_credentials(
|
||||
inference_auth_mode=inference_auth_mode,
|
||||
)
|
||||
except AuthError as exc:
|
||||
if _is_terminal_nous_refresh_error(exc):
|
||||
_quarantine_nous_oauth_state(
|
||||
state,
|
||||
exc,
|
||||
reason="proxy_refresh_failure",
|
||||
)
|
||||
self._save_state(
|
||||
state,
|
||||
quarantine_error=exc,
|
||||
quarantine_reason="proxy_refresh_failure",
|
||||
)
|
||||
raise RuntimeError(
|
||||
f"Failed to refresh Nous Portal credentials: {exc}"
|
||||
) from exc
|
||||
except Exception as exc:
|
||||
raise RuntimeError(
|
||||
f"Failed to refresh Nous Portal credentials: {exc}"
|
||||
) from exc
|
||||
|
||||
self._save_state(refreshed)
|
||||
|
||||
agent_key = refreshed.get("agent_key")
|
||||
agent_key = refreshed.get("api_key")
|
||||
if not agent_key:
|
||||
raise RuntimeError(
|
||||
"Nous Portal refresh did not return a usable agent_key. "
|
||||
"Try `hermes login nous` to re-authenticate."
|
||||
)
|
||||
|
||||
base_url = refreshed.get("inference_base_url") or DEFAULT_NOUS_INFERENCE_URL
|
||||
base_url = refreshed.get("base_url") or DEFAULT_NOUS_INFERENCE_URL
|
||||
base_url = base_url.rstrip("/")
|
||||
|
||||
return UpstreamCredential(
|
||||
bearer=agent_key,
|
||||
base_url=base_url,
|
||||
expires_at=refreshed.get("agent_key_expires_at"),
|
||||
expires_at=refreshed.get("expires_at"),
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
|
|
@ -111,7 +153,8 @@ class NousPortalAdapter(UpstreamAdapter):
|
|||
|
||||
def _read_state(self) -> Optional[Dict[str, Any]]:
|
||||
try:
|
||||
store = _load_auth_store()
|
||||
with _auth_store_lock():
|
||||
store = _load_auth_store()
|
||||
except Exception as exc:
|
||||
logger.warning("proxy: failed to load auth store: %s", exc)
|
||||
return None
|
||||
|
|
@ -121,17 +164,28 @@ class NousPortalAdapter(UpstreamAdapter):
|
|||
return None
|
||||
return dict(state) # copy so the refresh helper can mutate freely
|
||||
|
||||
def _save_state(self, state: Dict[str, Any]) -> None:
|
||||
def _save_state(
|
||||
self,
|
||||
state: Dict[str, Any],
|
||||
*,
|
||||
quarantine_error: Optional[AuthError] = None,
|
||||
quarantine_reason: Optional[str] = None,
|
||||
) -> None:
|
||||
try:
|
||||
store = _load_auth_store()
|
||||
providers = store.setdefault("providers", {})
|
||||
providers["nous"] = state
|
||||
_save_auth_store(store)
|
||||
with _auth_store_lock():
|
||||
store = _load_auth_store()
|
||||
if quarantine_error is not None and quarantine_reason:
|
||||
_quarantine_nous_pool_entries(
|
||||
store,
|
||||
quarantine_error,
|
||||
reason=quarantine_reason,
|
||||
)
|
||||
providers = store.setdefault("providers", {})
|
||||
providers["nous"] = state
|
||||
_save_auth_store(store)
|
||||
_write_shared_nous_state(state)
|
||||
except Exception as exc:
|
||||
# Best effort — we still return the fresh credential. The next
|
||||
# request just won't see cached state, which means another refresh.
|
||||
logger.warning("proxy: failed to persist refreshed Nous state: %s", exc)
|
||||
logger.warning("proxy: failed to persist Nous quarantine state: %s", exc)
|
||||
|
||||
|
||||
__all__ = ["NousPortalAdapter"]
|
||||
|
|
|
|||
|
|
@ -114,7 +114,7 @@ def cmd_proxy(args: Any) -> int:
|
|||
return cmd_proxy_start(args)
|
||||
if sub == "status":
|
||||
return cmd_proxy_status(args)
|
||||
if sub in ("providers", "list"):
|
||||
if sub in {"providers", "list"}:
|
||||
return cmd_proxy_list_providers(args)
|
||||
# No subcommand → print short help.
|
||||
print(
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ except ImportError:
|
|||
web = None # type: ignore[assignment]
|
||||
AIOHTTP_AVAILABLE = False
|
||||
|
||||
from hermes_cli.proxy.adapters.base import UpstreamAdapter
|
||||
from hermes_cli.proxy.adapters.base import UpstreamAdapter, UpstreamCredential
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -76,7 +76,7 @@ def _filter_response_headers(headers) -> dict:
|
|||
if key.lower() in _HOP_BY_HOP_HEADERS:
|
||||
continue
|
||||
# aiohttp recomputes Content-Encoding/Content-Length on stream — let it.
|
||||
if key.lower() in ("content-encoding", "content-length"):
|
||||
if key.lower() in {"content-encoding", "content-length"}:
|
||||
continue
|
||||
out[key] = value
|
||||
return out
|
||||
|
|
@ -136,50 +136,93 @@ def create_app(adapter: UpstreamAdapter) -> "web.Application":
|
|||
logger.warning("proxy: credential resolution failed: %s", exc)
|
||||
return _json_error(401, str(exc), code="upstream_auth_failed")
|
||||
|
||||
upstream_url = f"{cred.base_url.rstrip('/')}{rel_path}"
|
||||
# Preserve query string verbatim.
|
||||
if request.query_string:
|
||||
upstream_url = f"{upstream_url}?{request.query_string}"
|
||||
|
||||
# Forward body verbatim. Read into memory once — request bodies for
|
||||
# chat/completions/embeddings are small (<1MB typically). If we ever
|
||||
# need to forward large multipart uploads we'll switch to streaming
|
||||
# the request body too.
|
||||
body = await request.read()
|
||||
|
||||
fwd_headers = _filter_request_headers(request.headers)
|
||||
fwd_headers["Authorization"] = f"{cred.token_type} {cred.bearer}"
|
||||
|
||||
logger.debug(
|
||||
"proxy: forwarding %s %s -> %s (body=%d bytes)",
|
||||
request.method, rel_path, upstream_url, len(body),
|
||||
)
|
||||
|
||||
# Use a per-request session so connection state doesn't leak between
|
||||
# clients. Could be optimized to a shared session later.
|
||||
timeout = aiohttp.ClientTimeout(total=None, sock_connect=15, sock_read=300)
|
||||
try:
|
||||
session = aiohttp.ClientSession(timeout=timeout)
|
||||
except Exception as exc: # pragma: no cover - aiohttp setup issue
|
||||
return _json_error(500, f"proxy session init failed: {exc}")
|
||||
|
||||
try:
|
||||
upstream_resp = await session.request(
|
||||
request.method,
|
||||
upstream_url,
|
||||
data=body if body else None,
|
||||
headers=fwd_headers,
|
||||
allow_redirects=False,
|
||||
async def _send_upstream(active_cred: UpstreamCredential):
|
||||
upstream_url = f"{active_cred.base_url.rstrip('/')}{rel_path}"
|
||||
# Preserve query string verbatim.
|
||||
if request.query_string:
|
||||
upstream_url = f"{upstream_url}?{request.query_string}"
|
||||
|
||||
fwd_headers = _filter_request_headers(request.headers)
|
||||
fwd_headers["Authorization"] = f"{active_cred.token_type} {active_cred.bearer}"
|
||||
|
||||
logger.debug(
|
||||
"proxy: forwarding %s %s -> %s (body=%d bytes)",
|
||||
request.method, rel_path, upstream_url, len(body),
|
||||
)
|
||||
except aiohttp.ClientError as exc:
|
||||
await session.close()
|
||||
logger.warning("proxy: upstream connection failed: %s", exc)
|
||||
return _json_error(502, f"upstream connection failed: {exc}",
|
||||
code="upstream_unreachable")
|
||||
except asyncio.TimeoutError:
|
||||
await session.close()
|
||||
return _json_error(504, "upstream request timed out",
|
||||
code="upstream_timeout")
|
||||
|
||||
try:
|
||||
session = aiohttp.ClientSession(timeout=timeout)
|
||||
except Exception as exc: # pragma: no cover - aiohttp setup issue
|
||||
raise RuntimeError(f"proxy session init failed: {exc}") from exc
|
||||
|
||||
try:
|
||||
upstream_resp = await session.request(
|
||||
request.method,
|
||||
upstream_url,
|
||||
data=body if body else None,
|
||||
headers=fwd_headers,
|
||||
allow_redirects=False,
|
||||
)
|
||||
except Exception:
|
||||
await session.close()
|
||||
raise
|
||||
return session, upstream_resp
|
||||
|
||||
async def _open_upstream(active_cred: UpstreamCredential):
|
||||
try:
|
||||
return await _send_upstream(active_cred)
|
||||
except RuntimeError as exc:
|
||||
return _json_error(500, str(exc)), None
|
||||
except aiohttp.ClientError as exc:
|
||||
logger.warning("proxy: upstream connection failed: %s", exc)
|
||||
return (
|
||||
_json_error(
|
||||
502,
|
||||
f"upstream connection failed: {exc}",
|
||||
code="upstream_unreachable",
|
||||
),
|
||||
None,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
return (
|
||||
_json_error(
|
||||
504,
|
||||
"upstream request timed out",
|
||||
code="upstream_timeout",
|
||||
),
|
||||
None,
|
||||
)
|
||||
|
||||
session_or_response, upstream_resp = await _open_upstream(cred)
|
||||
if upstream_resp is None:
|
||||
return session_or_response
|
||||
session = session_or_response
|
||||
|
||||
if upstream_resp.status == 401:
|
||||
try:
|
||||
retry_cred = adapter.get_retry_credential(
|
||||
failed_credential=cred,
|
||||
status_code=upstream_resp.status,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("proxy: retry credential resolution failed: %s", exc)
|
||||
retry_cred = None
|
||||
|
||||
if retry_cred is not None:
|
||||
upstream_resp.release()
|
||||
await session.close()
|
||||
session_or_response, upstream_resp = await _open_upstream(retry_cred)
|
||||
if upstream_resp is None:
|
||||
return session_or_response
|
||||
session = session_or_response
|
||||
|
||||
# Stream response back. Headers first, then chunked body.
|
||||
resp = web.StreamResponse(
|
||||
|
|
|
|||
|
|
@ -209,7 +209,7 @@ def _maybe_apply_codex_app_server_runtime(
|
|||
Returns the (possibly-rewritten) api_mode."""
|
||||
if not model_cfg:
|
||||
return api_mode
|
||||
if provider not in ("openai", "openai-codex"):
|
||||
if provider not in {"openai", "openai-codex"}:
|
||||
return api_mode
|
||||
runtime = str(model_cfg.get("openai_runtime") or "").strip().lower()
|
||||
if runtime == "codex_app_server":
|
||||
|
|
@ -875,10 +875,9 @@ def _resolve_explicit_runtime(
|
|||
explicit_base_url
|
||||
or str(state.get("inference_base_url") or auth_mod.DEFAULT_NOUS_INFERENCE_URL).strip().rstrip("/")
|
||||
)
|
||||
# Only use agent_key for inference — access_token is an OAuth token for the
|
||||
# portal API (minting keys, refreshing tokens), not for the inference API.
|
||||
# Falling back to access_token sends an OAuth bearer token to the inference
|
||||
# endpoint, which returns 404 because it is not a valid inference credential.
|
||||
# Only use the agent_key compatibility field for inference. It may be
|
||||
# either a NAS invoke JWT or a legacy opaque session key; raw OAuth
|
||||
# access_token fallback is handled by resolve_nous_runtime_credentials().
|
||||
api_key = explicit_api_key or str(state.get("agent_key") or "").strip()
|
||||
expires_at = state.get("agent_key_expires_at") or state.get("expires_at")
|
||||
if not api_key:
|
||||
|
|
@ -1069,17 +1068,19 @@ def resolve_runtime_provider(
|
|||
getattr(entry, "runtime_api_key", None)
|
||||
or getattr(entry, "access_token", "")
|
||||
)
|
||||
# For Nous, the pool entry's runtime_api_key is the agent_key — a
|
||||
# short-lived inference credential (~30 min TTL). The pool doesn't
|
||||
# For Nous, the pool entry's runtime_api_key is the agent_key
|
||||
# compatibility field: either an invoke JWT or legacy opaque key.
|
||||
# The pool doesn't
|
||||
# refresh it during selection (that would trigger network calls in
|
||||
# non-runtime contexts like `hermes auth list`). If the key is
|
||||
# expired, clear pool_api_key so we fall through to
|
||||
# resolve_nous_runtime_credentials() which handles refresh + mint.
|
||||
# resolve_nous_runtime_credentials() which handles refresh + fallback.
|
||||
if provider == "nous" and entry is not None and pool_api_key:
|
||||
min_ttl = max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800")))
|
||||
nous_state = {
|
||||
"agent_key": getattr(entry, "agent_key", None),
|
||||
"agent_key_expires_at": getattr(entry, "agent_key_expires_at", None),
|
||||
"scope": getattr(entry, "scope", None),
|
||||
}
|
||||
if not _agent_key_is_usable(nous_state, min_ttl):
|
||||
logger.debug("Nous pool entry agent_key expired/missing, falling through to runtime resolution")
|
||||
|
|
|
|||
|
|
@ -171,7 +171,7 @@ def _recent_window(
|
|||
cut = 0
|
||||
for i in range(len(messages) - 1, -1, -1):
|
||||
msg = messages[i]
|
||||
if isinstance(msg, Mapping) and msg.get("role") in ("user", "assistant"):
|
||||
if isinstance(msg, Mapping) and msg.get("role") in {"user", "assistant"}:
|
||||
count += 1
|
||||
if count >= window:
|
||||
cut = i
|
||||
|
|
|
|||
|
|
@ -259,6 +259,27 @@ def show_status(args):
|
|||
if minimax_status.get("error") and not minimax_logged_in:
|
||||
print(f" Error: {minimax_status.get('error')}")
|
||||
|
||||
# xAI OAuth — separate try/except so an import failure here cannot
|
||||
# disrupt the already-printed Nous/Codex/Qwen/MiniMax rows above.
|
||||
try:
|
||||
from hermes_cli.auth import get_xai_oauth_auth_status
|
||||
xai_oauth_status = get_xai_oauth_auth_status() or {}
|
||||
except Exception:
|
||||
xai_oauth_status = {}
|
||||
|
||||
xai_oauth_logged_in = bool(xai_oauth_status.get("logged_in"))
|
||||
print(
|
||||
f" {'xAI OAuth':<12} {check_mark(xai_oauth_logged_in)} "
|
||||
f"{'logged in' if xai_oauth_logged_in else 'not logged in (run: hermes auth add xai-oauth)'}"
|
||||
)
|
||||
xai_auth_file = xai_oauth_status.get("auth_store")
|
||||
if xai_auth_file:
|
||||
print(f" Auth file: {xai_auth_file}")
|
||||
if xai_oauth_status.get("last_refresh"):
|
||||
print(f" Refreshed: {_format_iso_timestamp(xai_oauth_status.get('last_refresh'))}")
|
||||
if xai_oauth_status.get("error") and not xai_oauth_logged_in:
|
||||
print(f" Error: {xai_oauth_status.get('error')}")
|
||||
|
||||
# =========================================================================
|
||||
# Nous Subscription Features
|
||||
# =========================================================================
|
||||
|
|
|
|||
|
|
@ -88,12 +88,40 @@ CONFIGURABLE_TOOLSETS = [
|
|||
# who want it opt in via `hermes tools` → Video Generation, which walks
|
||||
# them through provider + model selection.
|
||||
#
|
||||
# X search is off by default — gated on xAI credentials (SuperGrok OAuth
|
||||
# or XAI_API_KEY). Users opt in via `hermes tools` → X (Twitter) Search,
|
||||
# which walks them through credential setup. The tool's check_fn means
|
||||
# the schema won't appear to the model even if enabled without credentials.
|
||||
# X search is off by default for users without xAI credentials, but
|
||||
# auto-enables when SuperGrok OAuth tokens are stored OR XAI_API_KEY is
|
||||
# set — mirroring the HASS_TOKEN → homeassistant auto-enable below. The
|
||||
# `hermes tools` → X (Twitter) Search setup walks users through credential
|
||||
# setup. The tool's check_fn means the schema still won't appear to the
|
||||
# model if the credential later goes missing or expires.
|
||||
_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "spotify", "discord", "discord_admin", "video", "video_gen", "x_search"}
|
||||
|
||||
|
||||
def _xai_credentials_present() -> bool:
|
||||
"""Cheap, side-effect-free check for usable xAI credentials.
|
||||
|
||||
Used to auto-enable the ``x_search`` toolset when the user has either
|
||||
completed xAI Grok OAuth (SuperGrok subscription) or set
|
||||
``XAI_API_KEY``. Does NOT hit the network — only inspects the local
|
||||
auth store and environment. The tool's runtime ``check_fn`` still
|
||||
gates schema registration if creds later expire or get revoked.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.auth import _read_xai_oauth_tokens
|
||||
|
||||
_read_xai_oauth_tokens()
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
from tools.xai_http import get_env_value as _xai_get_env_value
|
||||
|
||||
if str(_xai_get_env_value("XAI_API_KEY") or "").strip():
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
return bool(str(os.environ.get("XAI_API_KEY") or "").strip())
|
||||
|
||||
# Platform-scoped toolsets: only appear in the `hermes tools` checklist for
|
||||
# these platforms, and only resolve/save for these platforms. A toolset
|
||||
# absent from this map is available on every platform (current behaviour).
|
||||
|
|
@ -350,6 +378,17 @@ TOOL_CATEGORIES = {
|
|||
"browser": {
|
||||
"name": "Browser Automation",
|
||||
"icon": "🌐",
|
||||
# Per-provider rows for Browserbase, Browser Use, and Firecrawl are
|
||||
# injected at runtime from plugins.browser.<vendor>.provider via
|
||||
# _plugin_browser_providers() in _visible_providers(). Only
|
||||
# non-provider UX setup-flow rows remain here:
|
||||
# - "Nous Subscription (Browser Use cloud)" — managed Browser Use
|
||||
# billed via Nous subscription (requires_nous_auth +
|
||||
# override_env_vars). Uses the browser-use plugin as the
|
||||
# underlying backend but has a distinct setup UX.
|
||||
# - "Local Browser" — non-cloud option, no CloudBrowserProvider.
|
||||
# - "Camofox" — anti-detection local Firefox; short-circuits the
|
||||
# cloud-provider dispatch path via _is_camofox_mode().
|
||||
"providers": [
|
||||
{
|
||||
"name": "Nous Subscription (Browser Use cloud)",
|
||||
|
|
@ -370,37 +409,6 @@ TOOL_CATEGORIES = {
|
|||
"browser_provider": "local",
|
||||
"post_setup": "agent_browser",
|
||||
},
|
||||
{
|
||||
"name": "Browserbase",
|
||||
"badge": "paid",
|
||||
"tag": "Cloud browser with stealth and proxies",
|
||||
"env_vars": [
|
||||
{"key": "BROWSERBASE_API_KEY", "prompt": "Browserbase API key", "url": "https://browserbase.com"},
|
||||
{"key": "BROWSERBASE_PROJECT_ID", "prompt": "Browserbase project ID"},
|
||||
],
|
||||
"browser_provider": "browserbase",
|
||||
"post_setup": "agent_browser",
|
||||
},
|
||||
{
|
||||
"name": "Browser Use",
|
||||
"badge": "paid",
|
||||
"tag": "Cloud browser with remote execution",
|
||||
"env_vars": [
|
||||
{"key": "BROWSER_USE_API_KEY", "prompt": "Browser Use API key", "url": "https://browser-use.com"},
|
||||
],
|
||||
"browser_provider": "browser-use",
|
||||
"post_setup": "agent_browser",
|
||||
},
|
||||
{
|
||||
"name": "Firecrawl",
|
||||
"badge": "paid",
|
||||
"tag": "Cloud browser with remote execution",
|
||||
"env_vars": [
|
||||
{"key": "FIRECRAWL_API_KEY", "prompt": "Firecrawl API key", "url": "https://firecrawl.dev"},
|
||||
],
|
||||
"browser_provider": "firecrawl",
|
||||
"post_setup": "agent_browser",
|
||||
},
|
||||
{
|
||||
"name": "Camofox",
|
||||
"badge": "free · local",
|
||||
|
|
@ -1170,6 +1178,23 @@ def _get_platform_tools(
|
|||
if ts_tools and ts_tools.issubset(all_tool_names):
|
||||
enabled_toolsets.add(ts_key)
|
||||
|
||||
# Auto-enable ``x_search`` when xAI credentials are configured.
|
||||
# Unlike ``homeassistant`` (whose ``ha_*`` tools live inside the
|
||||
# platform composite and thus pass the subset check above),
|
||||
# ``x_search`` is its own one-tool toolset that the composite does
|
||||
# NOT include, so the subset loop never picks it up. Inject it
|
||||
# directly here, mirroring the HASS_TOKEN → ``homeassistant`` rule
|
||||
# below: once you have working creds, you don't have to also click
|
||||
# through ``hermes tools`` to flip the toolset on. Only fires when
|
||||
# the user has not yet saved an explicit toolset list — once they
|
||||
# do, the saved list is authoritative.
|
||||
x_search_auto_enabled = (
|
||||
_toolset_allowed_for_platform("x_search", platform)
|
||||
and _xai_credentials_present()
|
||||
)
|
||||
if x_search_auto_enabled:
|
||||
enabled_toolsets.add("x_search")
|
||||
|
||||
default_off = set(_DEFAULT_OFF_TOOLSETS)
|
||||
# Legacy safety: if the platform's own name matches a default-off
|
||||
# toolset (e.g. `homeassistant` platform + `homeassistant` toolset),
|
||||
|
|
@ -1187,6 +1212,11 @@ def _get_platform_tools(
|
|||
# regressed after #14798 made cron honor per-platform tool config.
|
||||
if "homeassistant" in default_off and os.getenv("HASS_TOKEN"):
|
||||
default_off.remove("homeassistant")
|
||||
# Symmetric carve-out for x_search auto-enable (see the inject
|
||||
# block above). Without this, the default_off subtraction would
|
||||
# strip the entry we just added.
|
||||
if x_search_auto_enabled and "x_search" in default_off:
|
||||
default_off.remove("x_search")
|
||||
enabled_toolsets -= default_off
|
||||
|
||||
# Recover non-configurable platform toolsets (e.g. discord, feishu_doc,
|
||||
|
|
@ -1653,6 +1683,61 @@ def _plugin_web_search_providers() -> list[dict]:
|
|||
return rows
|
||||
|
||||
|
||||
# Mirror of _plugin_web_search_providers for cloud browser backends. After
|
||||
# PR #25214, Browserbase / Browser Use / Firecrawl live as plugins under
|
||||
# plugins/browser/<vendor>/; this helper is the sole source of provider rows
|
||||
# for those three in the "Browser Automation" picker. The hardcoded
|
||||
# ``TOOL_CATEGORIES["browser"]`` entries that drove the category before
|
||||
# were deleted in the same PR; only non-provider UX setup-flow rows remain
|
||||
# ("Nous Subscription", "Local Browser", "Camofox") — see the comment block
|
||||
# in ``TOOL_CATEGORIES["browser"]`` for why each one stays hardcoded.
|
||||
def _plugin_browser_providers() -> list[dict]:
|
||||
"""Build picker-row dicts from plugin-registered cloud browser providers.
|
||||
|
||||
Each returned dict mirrors the legacy ``TOOL_CATEGORIES["browser"]``
|
||||
schema (``name`` / ``badge`` / ``tag`` / ``env_vars`` /
|
||||
``browser_provider`` / ``post_setup``) so the picker behaves identically
|
||||
whether a provider was hardcoded or plugin-registered.
|
||||
|
||||
Populates ``browser_provider`` (the legacy config key written to
|
||||
``browser.cloud_provider``) and a ``browser_plugin_name`` marker so
|
||||
setup / write paths can route through the registry when they want to.
|
||||
"""
|
||||
try:
|
||||
from agent.browser_registry import list_providers as _list_browser_providers
|
||||
from hermes_cli.plugins import _ensure_plugins_discovered
|
||||
|
||||
_ensure_plugins_discovered()
|
||||
providers = _list_browser_providers()
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
rows: list[dict] = []
|
||||
for provider in providers:
|
||||
name = getattr(provider, "name", None)
|
||||
if not name:
|
||||
continue
|
||||
try:
|
||||
schema = provider.get_setup_schema()
|
||||
except Exception:
|
||||
continue
|
||||
if not isinstance(schema, dict):
|
||||
continue
|
||||
row = {
|
||||
"name": schema.get("name", provider.display_name),
|
||||
"badge": schema.get("badge", ""),
|
||||
"tag": schema.get("tag", ""),
|
||||
"env_vars": schema.get("env_vars", []),
|
||||
"browser_provider": name,
|
||||
"browser_plugin_name": name,
|
||||
}
|
||||
# Pass-through optional fields the schema can opt into.
|
||||
if schema.get("post_setup"):
|
||||
row["post_setup"] = schema["post_setup"]
|
||||
rows.append(row)
|
||||
return rows
|
||||
|
||||
|
||||
def _visible_providers(cat: dict, config: dict) -> list[dict]:
|
||||
"""Return provider entries visible for the current auth/config state."""
|
||||
features = get_nous_subscription_features(config)
|
||||
|
|
@ -1682,6 +1767,14 @@ def _visible_providers(cat: dict, config: dict) -> list[dict]:
|
|||
if cat.get("name") == "Web Search & Extract":
|
||||
visible.extend(_plugin_web_search_providers())
|
||||
|
||||
# Inject plugin-registered cloud browser backends. After PR #25214,
|
||||
# Browserbase / Browser Use / Firecrawl are the plugin-supplied rows;
|
||||
# the hardcoded "Nous Subscription" / "Local Browser" / "Camofox" rows
|
||||
# stay because they're non-provider UX setup flows (subscription auth,
|
||||
# local fallback, and the REST-API anti-detection backend respectively).
|
||||
if cat.get("name") == "Browser Automation":
|
||||
visible.extend(_plugin_browser_providers())
|
||||
|
||||
return visible
|
||||
|
||||
|
||||
|
|
@ -2590,6 +2683,9 @@ def _reconfigure_provider(provider: dict, config: dict):
|
|||
else:
|
||||
_print_info(" Kept current")
|
||||
|
||||
if provider.get("post_setup"):
|
||||
_run_post_setup(provider["post_setup"])
|
||||
|
||||
# Imagegen backends prompt for model selection on reconfig too.
|
||||
plugin_name = provider.get("image_gen_plugin_name")
|
||||
if plugin_name:
|
||||
|
|
|
|||
|
|
@ -2609,7 +2609,11 @@ async def _start_device_code_flow(provider_id: str) -> Dict[str, Any]:
|
|||
so the UI can render the verification page link + user code.
|
||||
"""
|
||||
if provider_id == "nous":
|
||||
from hermes_cli.auth import _request_device_code, PROVIDER_REGISTRY
|
||||
from hermes_cli.auth import (
|
||||
_nous_device_scope_with_env_override,
|
||||
_request_nous_device_code_with_scope_fallback,
|
||||
PROVIDER_REGISTRY,
|
||||
)
|
||||
import httpx
|
||||
pconfig = PROVIDER_REGISTRY["nous"]
|
||||
portal_base_url = (
|
||||
|
|
@ -2618,22 +2622,34 @@ async def _start_device_code_flow(provider_id: str) -> Dict[str, Any]:
|
|||
or pconfig.portal_base_url
|
||||
).rstrip("/")
|
||||
client_id = pconfig.client_id
|
||||
scope = pconfig.scope
|
||||
scope, explicit_scope = _nous_device_scope_with_env_override(
|
||||
None,
|
||||
default_scope=pconfig.scope,
|
||||
)
|
||||
|
||||
def _do_nous_device_request():
|
||||
with httpx.Client(timeout=httpx.Timeout(15.0), headers={"Accept": "application/json"}) as client:
|
||||
return _request_device_code(
|
||||
with httpx.Client(
|
||||
timeout=httpx.Timeout(15.0),
|
||||
headers={"Accept": "application/json"},
|
||||
) as client:
|
||||
return _request_nous_device_code_with_scope_fallback(
|
||||
client=client,
|
||||
portal_base_url=portal_base_url,
|
||||
client_id=client_id,
|
||||
scope=scope,
|
||||
allow_legacy_fallback=not explicit_scope,
|
||||
)
|
||||
device_data = await asyncio.get_running_loop().run_in_executor(None, _do_nous_device_request)
|
||||
|
||||
device_data, effective_scope = await asyncio.get_running_loop().run_in_executor(
|
||||
None, _do_nous_device_request
|
||||
)
|
||||
sid, sess = _new_oauth_session("nous", "device_code")
|
||||
sess["device_code"] = str(device_data["device_code"])
|
||||
sess["interval"] = int(device_data["interval"])
|
||||
sess["expires_at"] = time.time() + int(device_data["expires_in"])
|
||||
sess["portal_base_url"] = portal_base_url
|
||||
sess["client_id"] = client_id
|
||||
sess["scope"] = effective_scope
|
||||
threading.Thread(
|
||||
target=_nous_poller, args=(sid,), daemon=True, name=f"oauth-poll-{sid[:6]}"
|
||||
).start()
|
||||
|
|
@ -2762,7 +2778,11 @@ async def _start_device_code_flow(provider_id: str) -> Dict[str, Any]:
|
|||
|
||||
def _nous_poller(session_id: str) -> None:
|
||||
"""Background poller that drives a Nous device-code flow to completion."""
|
||||
from hermes_cli.auth import _poll_for_token, refresh_nous_oauth_from_state
|
||||
from hermes_cli.auth import (
|
||||
NOUS_INFERENCE_AUTH_MODE_FRESH,
|
||||
_poll_for_token,
|
||||
refresh_nous_oauth_from_state,
|
||||
)
|
||||
from datetime import datetime, timezone
|
||||
import httpx
|
||||
with _oauth_sessions_lock:
|
||||
|
|
@ -2773,6 +2793,7 @@ def _nous_poller(session_id: str) -> None:
|
|||
client_id = sess["client_id"]
|
||||
device_code = sess["device_code"]
|
||||
interval = sess["interval"]
|
||||
scope = sess.get("scope")
|
||||
expires_in = max(60, int(sess["expires_at"] - time.time()))
|
||||
try:
|
||||
with httpx.Client(timeout=httpx.Timeout(15.0), headers={"Accept": "application/json"}) as client:
|
||||
|
|
@ -2791,7 +2812,7 @@ def _nous_poller(session_id: str) -> None:
|
|||
"portal_base_url": portal_base_url,
|
||||
"inference_base_url": token_data.get("inference_base_url"),
|
||||
"client_id": client_id,
|
||||
"scope": token_data.get("scope"),
|
||||
"scope": token_data.get("scope") or scope,
|
||||
"token_type": token_data.get("token_type", "Bearer"),
|
||||
"access_token": token_data["access_token"],
|
||||
"refresh_token": token_data.get("refresh_token"),
|
||||
|
|
@ -2803,8 +2824,11 @@ def _nous_poller(session_id: str) -> None:
|
|||
"expires_in": token_ttl,
|
||||
}
|
||||
full_state = refresh_nous_oauth_from_state(
|
||||
auth_state, min_key_ttl_seconds=300, timeout_seconds=15.0,
|
||||
force_refresh=False, force_mint=True,
|
||||
auth_state,
|
||||
min_key_ttl_seconds=300,
|
||||
timeout_seconds=15.0,
|
||||
force_refresh=False,
|
||||
inference_auth_mode=NOUS_INFERENCE_AUTH_MODE_FRESH,
|
||||
)
|
||||
from hermes_cli.auth import persist_nous_credentials
|
||||
persist_nous_credentials(full_state)
|
||||
|
|
@ -5381,4 +5405,7 @@ def start_server(
|
|||
open_browser,
|
||||
)
|
||||
print(f" Hermes Web UI → http://{host}:{port}")
|
||||
uvicorn.run(app, host=host, port=port, log_level="warning")
|
||||
# proxy_headers=False so _ws_client_is_allowed sees the real connection peer
|
||||
# rather than X-Forwarded-For's rewritten value (which would defeat the
|
||||
# loopback gate when behind a reverse proxy).
|
||||
uvicorn.run(app, host=host, port=port, log_level="warning", proxy_headers=False)
|
||||
|
|
|
|||
|
|
@ -358,7 +358,7 @@ def generate_meme(template_id: str, texts: list[str], output_path: str) -> str:
|
|||
img = _overlay_on_image(img, texts, fields)
|
||||
|
||||
output = Path(output_path)
|
||||
if output.suffix.lower() in (".jpg", ".jpeg"):
|
||||
if output.suffix.lower() in {".jpg", ".jpeg"}:
|
||||
img = img.convert("RGB")
|
||||
img.save(str(output), quality=95)
|
||||
return str(output)
|
||||
|
|
@ -378,7 +378,7 @@ def generate_from_image(
|
|||
result = _overlay_on_image(img, texts, fields)
|
||||
|
||||
output = Path(output_path)
|
||||
if output.suffix.lower() in (".jpg", ".jpeg"):
|
||||
if output.suffix.lower() in {".jpg", ".jpeg"}:
|
||||
result = result.convert("RGB")
|
||||
result.save(str(output), quality=95)
|
||||
return str(output)
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ def _parse_feed(xml_bytes: bytes):
|
|||
entries = []
|
||||
for item in root.iter():
|
||||
tag = _strip_ns(item.tag)
|
||||
if tag not in ("item", "entry"):
|
||||
if tag not in {"item", "entry"}:
|
||||
continue
|
||||
# ElementTree Elements without children are *falsy* — use `is not None`.
|
||||
children = {_strip_ns(c.tag): c for c in item}
|
||||
|
|
|
|||
|
|
@ -125,7 +125,7 @@ def fetch_url(url: str, headers: dict | None = None, retries: int = MAX_RETRIES)
|
|||
return json.loads(raw.decode("utf-8", errors="replace"))
|
||||
except urllib.error.HTTPError as e:
|
||||
last_err = e
|
||||
if e.code in (404, 400):
|
||||
if e.code in {404, 400}:
|
||||
break # no point retrying
|
||||
wait = BACKOFF_BASE ** attempt
|
||||
time.sleep(wait)
|
||||
|
|
|
|||
|
|
@ -95,11 +95,11 @@ def one_rep_max(weight, reps):
|
|||
|
||||
def macros(tdee_kcal, goal):
|
||||
goal = goal.lower()
|
||||
if goal in ("cut", "lose", "deficit"):
|
||||
if goal in {"cut", "lose", "deficit"}:
|
||||
cals = tdee_kcal - 500
|
||||
p, f, c = 0.40, 0.30, 0.30
|
||||
label = "Fat Loss (-500 kcal)"
|
||||
elif goal in ("bulk", "gain", "surplus"):
|
||||
elif goal in {"bulk", "gain", "surplus"}:
|
||||
cals = tdee_kcal + 400
|
||||
p, f, c = 0.30, 0.25, 0.45
|
||||
label = "Lean Bulk (+400 kcal)"
|
||||
|
|
@ -184,7 +184,7 @@ def main():
|
|||
int(sys.argv[4]), sys.argv[5], int(sys.argv[6]),
|
||||
)
|
||||
|
||||
elif cmd in ("1rm", "orm"):
|
||||
elif cmd in {"1rm", "orm"}:
|
||||
one_rep_max(float(sys.argv[2]), int(sys.argv[3]))
|
||||
|
||||
elif cmd == "macros":
|
||||
|
|
|
|||
|
|
@ -610,7 +610,7 @@ def _is_secret_key(key: str) -> bool:
|
|||
normalized = _normalize_secret_key(key)
|
||||
if normalized == "token" or normalized.endswith("token"):
|
||||
return True
|
||||
if normalized in ("auth", "authorization"):
|
||||
if normalized in {"auth", "authorization"}:
|
||||
return True
|
||||
return any(marker in normalized for marker in _SECRET_KEY_MARKERS)
|
||||
|
||||
|
|
@ -831,7 +831,7 @@ class Migrator:
|
|||
# Flip the config-block flag when a conflict/error occurs on a
|
||||
# config.yaml write. Later config-mutating options will skip rather
|
||||
# than attempting a partial write.
|
||||
if status in (STATUS_CONFLICT, STATUS_ERROR) and destination is not None:
|
||||
if status in {STATUS_CONFLICT, STATUS_ERROR} and destination is not None:
|
||||
dest_str = str(destination)
|
||||
if dest_str.endswith("config.yaml") or dest_str.endswith("config.yml"):
|
||||
self._config_apply_blocked = True
|
||||
|
|
@ -1526,7 +1526,7 @@ class Migrator:
|
|||
api_key = resolve_secret_input(raw_key, openclaw_env)
|
||||
if not api_key:
|
||||
# Warn if a SecretRef with file/exec source was silently unresolvable
|
||||
if isinstance(raw_key, dict) and raw_key.get("source") in ("file", "exec"):
|
||||
if isinstance(raw_key, dict) and raw_key.get("source") in {"file", "exec"}:
|
||||
self.record(
|
||||
"provider-keys",
|
||||
self.source_root / "openclaw.json",
|
||||
|
|
@ -1736,7 +1736,7 @@ class Migrator:
|
|||
tts_data: Dict[str, Any] = {}
|
||||
|
||||
provider = tts.get("provider")
|
||||
if isinstance(provider, str) and provider in ("elevenlabs", "openai", "edge", "microsoft"):
|
||||
if isinstance(provider, str) and provider in {"elevenlabs", "openai", "edge", "microsoft"}:
|
||||
# OpenClaw renamed "edge" to "microsoft"; Hermes still uses "edge"
|
||||
tts_data["provider"] = "edge" if provider == "microsoft" else provider
|
||||
|
||||
|
|
@ -2304,11 +2304,11 @@ class Migrator:
|
|||
if defaults.get("thinkingDefault"):
|
||||
# Map OpenClaw thinking -> Hermes reasoning_effort
|
||||
thinking = defaults["thinkingDefault"]
|
||||
if thinking in ("always", "high", "xhigh"):
|
||||
if thinking in {"always", "high", "xhigh"}:
|
||||
agent_cfg["reasoning_effort"] = "high"
|
||||
elif thinking in ("auto", "medium", "adaptive"):
|
||||
elif thinking in {"auto", "medium", "adaptive"}:
|
||||
agent_cfg["reasoning_effort"] = "medium"
|
||||
elif thinking in ("off", "low", "none", "minimal"):
|
||||
elif thinking in {"off", "low", "none", "minimal"}:
|
||||
agent_cfg["reasoning_effort"] = "low"
|
||||
changes = True
|
||||
|
||||
|
|
@ -2626,8 +2626,8 @@ class Migrator:
|
|||
if not isinstance(ch_cfg, dict):
|
||||
continue
|
||||
complex_keys = {k: v for k, v in ch_cfg.items()
|
||||
if k not in ("botToken", "appToken", "allowFrom", "enabled")
|
||||
and v and k not in ("requireMention", "autoThread")}
|
||||
if k not in {"botToken", "appToken", "allowFrom", "enabled"}
|
||||
and v and k not in {"requireMention", "autoThread"}}
|
||||
if complex_keys:
|
||||
complex_archive[ch_name] = complex_keys
|
||||
|
||||
|
|
@ -2671,7 +2671,7 @@ class Migrator:
|
|||
|
||||
# Archive remaining browser settings
|
||||
advanced = {k: v for k, v in browser.items()
|
||||
if k not in ("cdpUrl", "headless") and v}
|
||||
if k not in {"cdpUrl", "headless"} and v}
|
||||
if advanced and self.archive_dir:
|
||||
if self.execute:
|
||||
self.archive_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
|
|
|||
|
|
@ -109,7 +109,7 @@ def _config_lookup(*paths: tuple[str, ...], default: str = "") -> str:
|
|||
node = None
|
||||
break
|
||||
node = node.get(key)
|
||||
if node not in (None, "") and not isinstance(node, dict):
|
||||
if node not in {None, ""} and not isinstance(node, dict):
|
||||
return str(node)
|
||||
return default
|
||||
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ def main() -> int:
|
|||
field = args.field
|
||||
if field is None:
|
||||
for k, v in vars(org).items():
|
||||
if isinstance(v, str) and not k.startswith("_") and k not in ("id",):
|
||||
if isinstance(v, str) and not k.startswith("_") and k not in {"id",}:
|
||||
field = k
|
||||
break
|
||||
val = getattr(org, field, None) if field else None
|
||||
|
|
|
|||
|
|
@ -185,7 +185,7 @@ def whois_lookup(domain):
|
|||
for key, pat in patterns.items():
|
||||
matches = re.findall(pat, raw, re.IGNORECASE)
|
||||
if matches:
|
||||
if key in ("name_servers", "status"):
|
||||
if key in {"name_servers", "status"}:
|
||||
result[key] = list(dict.fromkeys(m.strip().lower() for m in matches))
|
||||
else:
|
||||
result[key] = matches[0].strip()
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ def get(
|
|||
f"HTTP 429 rate-limited by {urllib.parse.urlsplit(url).netloc}. "
|
||||
f"Slow down or supply a real API key. Body: {body[:300]}"
|
||||
) from e
|
||||
if e.code in (500, 502, 503, 504) and attempt < max_retries:
|
||||
if e.code in {500, 502, 503, 504} and attempt < max_retries:
|
||||
retry_after = e.headers.get("Retry-After") if e.headers else None
|
||||
wait = float(retry_after) if (retry_after and retry_after.isdigit()) else backoff ** (attempt + 1)
|
||||
time.sleep(wait)
|
||||
|
|
|
|||
|
|
@ -122,7 +122,7 @@ def fetch(
|
|||
|
||||
with zipfile.ZipFile(zip_path) as zf:
|
||||
for node_type, csv_substring in targets:
|
||||
relevant_needles = [n for (k, n) in needles if k in (node_type, "Entity", "Officer")] or []
|
||||
relevant_needles = [n for (k, n) in needles if k in {node_type, "Entity", "Officer"}] or []
|
||||
# Only scan a CSV if we have a needle that could plausibly match it,
|
||||
# or if we have ONLY a jurisdiction filter.
|
||||
applicable_needles = [n for (k, n) in needles if k == node_type]
|
||||
|
|
|
|||
14
plugins/browser/browser_use/__init__.py
Normal file
14
plugins/browser/browser_use/__init__.py
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
"""Browser Use cloud browser plugin — bundled, auto-loaded.
|
||||
|
||||
Mirrors the ``plugins/web/<vendor>/`` layout: ``provider.py`` holds the
|
||||
provider class; ``__init__.py::register`` instantiates and registers it.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from plugins.browser.browser_use.provider import BrowserUseBrowserProvider
|
||||
|
||||
|
||||
def register(ctx) -> None:
|
||||
"""Register the Browser Use provider with the plugin context."""
|
||||
ctx.register_browser_provider(BrowserUseBrowserProvider())
|
||||
7
plugins/browser/browser_use/plugin.yaml
Normal file
7
plugins/browser/browser_use/plugin.yaml
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
name: browser-browser-use
|
||||
version: 1.0.0
|
||||
description: "Browser Use (https://browser-use.com) cloud browser backend. Supports both direct BROWSER_USE_API_KEY and the managed Nous tool gateway. Also powers the 'Nous Subscription' UX flow that bills usage to a Nous subscription."
|
||||
author: NousResearch
|
||||
kind: backend
|
||||
provides_browser_providers:
|
||||
- browser-use
|
||||
|
|
@ -1,4 +1,32 @@
|
|||
"""Browser Use cloud browser provider."""
|
||||
"""Browser Use cloud browser provider — plugin form.
|
||||
|
||||
Subclasses :class:`agent.browser_provider.BrowserProvider` (the plugin-facing
|
||||
ABC introduced in PR #25214). The legacy in-tree module
|
||||
``tools.browser_providers.browser_use`` was removed in the same PR; this file
|
||||
is now the canonical implementation.
|
||||
|
||||
Browser Use is the only browser backend with dual auth: a direct
|
||||
``BROWSER_USE_API_KEY`` for self-billed users, or the managed Nous tool
|
||||
gateway (which Hermes uses to bill Browser Use sessions to a Nous
|
||||
subscription). The dispatch order — direct API key first, managed gateway
|
||||
second — preserves the pre-migration behaviour in
|
||||
``tools.browser_providers.browser_use.BrowserUseProvider._get_config_or_none``.
|
||||
|
||||
Config keys this provider responds to::
|
||||
|
||||
browser:
|
||||
cloud_provider: "browser-use" # explicit selection
|
||||
tool_gateway:
|
||||
browser: "gateway" # optional: prefer managed gateway
|
||||
# even when BROWSER_USE_API_KEY is set
|
||||
|
||||
Auth env vars (one of)::
|
||||
|
||||
BROWSER_USE_API_KEY=... # https://browser-use.com
|
||||
# OR a managed Nous gateway entry (configured via 'hermes setup')
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
|
@ -8,11 +36,14 @@ from typing import Any, Dict, Optional
|
|||
|
||||
import requests
|
||||
|
||||
from tools.browser_providers.base import CloudBrowserProvider
|
||||
from tools.managed_tool_gateway import resolve_managed_tool_gateway
|
||||
from tools.tool_backend_helpers import managed_nous_tools_enabled, prefers_gateway
|
||||
from agent.browser_provider import BrowserProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Idempotency tracking for managed-mode session creation. The managed Nous
|
||||
# gateway returns 409 "already in progress" on retried POSTs; we forward the
|
||||
# original idempotency key so the gateway can deduplicate. Cleared on
|
||||
# success or terminal failure.
|
||||
_pending_create_keys: Dict[str, str] = {}
|
||||
_pending_create_keys_lock = threading.Lock()
|
||||
|
||||
|
|
@ -38,6 +69,16 @@ def _clear_pending_create_key(task_id: str) -> None:
|
|||
|
||||
|
||||
def _should_preserve_pending_create_key(response: requests.Response) -> bool:
|
||||
"""Decide whether to keep the idempotency key after a failed create.
|
||||
|
||||
Preserve the key when the failure looks retryable (5xx) OR when the
|
||||
gateway reports the original request is still in flight (409 "already
|
||||
in progress") — in either case, retrying with the same key lets the
|
||||
gateway deduplicate.
|
||||
|
||||
Drop the key on any other 4xx (auth failure, bad request, etc.) — those
|
||||
won't succeed by being retried.
|
||||
"""
|
||||
if response.status_code >= 500:
|
||||
return True
|
||||
|
||||
|
|
@ -60,13 +101,24 @@ def _should_preserve_pending_create_key(response: requests.Response) -> bool:
|
|||
return "already in progress" in message
|
||||
|
||||
|
||||
class BrowserUseProvider(CloudBrowserProvider):
|
||||
"""Browser Use (https://browser-use.com) cloud browser backend."""
|
||||
class BrowserUseBrowserProvider(BrowserProvider):
|
||||
"""Browser Use (https://browser-use.com) cloud browser backend.
|
||||
|
||||
def provider_name(self) -> str:
|
||||
Dual auth: prefers a direct BROWSER_USE_API_KEY when set, falling back
|
||||
to the managed Nous tool gateway when ``tool_gateway.browser`` config
|
||||
routes through it. Setting ``tool_gateway.browser: gateway`` flips the
|
||||
order so managed billing wins even when BROWSER_USE_API_KEY is present.
|
||||
"""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "browser-use"
|
||||
|
||||
@property
|
||||
def display_name(self) -> str:
|
||||
return "Browser Use"
|
||||
|
||||
def is_configured(self) -> bool:
|
||||
def is_available(self) -> bool:
|
||||
return self._get_config_or_none() is not None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
|
|
@ -74,6 +126,14 @@ class BrowserUseProvider(CloudBrowserProvider):
|
|||
# ------------------------------------------------------------------
|
||||
|
||||
def _get_config_or_none(self) -> Optional[Dict[str, Any]]:
|
||||
# Import here to avoid a hard dependency at module-import time —
|
||||
# managed_tool_gateway pulls in the Nous auth stack which can be
|
||||
# heavy and is not needed for direct-API-key users.
|
||||
from tools.managed_tool_gateway import resolve_managed_tool_gateway
|
||||
from tools.tool_backend_helpers import prefers_gateway
|
||||
|
||||
# Direct API key wins unless the user has explicitly opted into the
|
||||
# managed Nous gateway via ``tool_gateway.browser: gateway``.
|
||||
api_key = os.environ.get("BROWSER_USE_API_KEY")
|
||||
if api_key and not prefers_gateway("browser"):
|
||||
return {
|
||||
|
|
@ -93,6 +153,8 @@ class BrowserUseProvider(CloudBrowserProvider):
|
|||
}
|
||||
|
||||
def _get_config(self) -> Dict[str, Any]:
|
||||
from tools.tool_backend_helpers import managed_nous_tools_enabled
|
||||
|
||||
config = self._get_config_or_none()
|
||||
if config is None:
|
||||
message = (
|
||||
|
|
@ -111,11 +173,10 @@ class BrowserUseProvider(CloudBrowserProvider):
|
|||
# ------------------------------------------------------------------
|
||||
|
||||
def _headers(self, config: Dict[str, Any]) -> Dict[str, str]:
|
||||
headers = {
|
||||
return {
|
||||
"Content-Type": "application/json",
|
||||
"X-Browser-Use-API-Key": config["api_key"],
|
||||
}
|
||||
return headers
|
||||
|
||||
def create_session(self, task_id: str) -> Dict[str, object]:
|
||||
config = self._get_config()
|
||||
|
|
@ -166,7 +227,9 @@ class BrowserUseProvider(CloudBrowserProvider):
|
|||
if managed_mode:
|
||||
_clear_pending_create_key(task_id)
|
||||
session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}"
|
||||
external_call_id = response.headers.get("x-external-call-id") if managed_mode else None
|
||||
external_call_id = (
|
||||
response.headers.get("x-external-call-id") if managed_mode else None
|
||||
)
|
||||
|
||||
logger.info("Created Browser Use session %s", session_name)
|
||||
|
||||
|
|
@ -184,7 +247,9 @@ class BrowserUseProvider(CloudBrowserProvider):
|
|||
try:
|
||||
config = self._get_config()
|
||||
except ValueError:
|
||||
logger.warning("Cannot close Browser Use session %s — missing credentials", session_id)
|
||||
logger.warning(
|
||||
"Cannot close Browser Use session %s — missing credentials", session_id
|
||||
)
|
||||
return False
|
||||
|
||||
try:
|
||||
|
|
@ -212,7 +277,10 @@ class BrowserUseProvider(CloudBrowserProvider):
|
|||
def emergency_cleanup(self, session_id: str) -> None:
|
||||
config = self._get_config_or_none()
|
||||
if config is None:
|
||||
logger.warning("Cannot emergency-cleanup Browser Use session %s — missing credentials", session_id)
|
||||
logger.warning(
|
||||
"Cannot emergency-cleanup Browser Use session %s — missing credentials",
|
||||
session_id,
|
||||
)
|
||||
return
|
||||
try:
|
||||
requests.patch(
|
||||
|
|
@ -222,4 +290,21 @@ class BrowserUseProvider(CloudBrowserProvider):
|
|||
timeout=5,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Emergency cleanup failed for Browser Use session %s: %s", session_id, e)
|
||||
logger.debug(
|
||||
"Emergency cleanup failed for Browser Use session %s: %s", session_id, e
|
||||
)
|
||||
|
||||
def get_setup_schema(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": "Browser Use",
|
||||
"badge": "paid",
|
||||
"tag": "Cloud browser with remote execution",
|
||||
"env_vars": [
|
||||
{
|
||||
"key": "BROWSER_USE_API_KEY",
|
||||
"prompt": "Browser Use API key",
|
||||
"url": "https://browser-use.com",
|
||||
},
|
||||
],
|
||||
"post_setup": "agent_browser",
|
||||
}
|
||||
15
plugins/browser/browserbase/__init__.py
Normal file
15
plugins/browser/browserbase/__init__.py
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
"""Browserbase cloud browser plugin — bundled, auto-loaded.
|
||||
|
||||
Mirrors the ``plugins/web/<vendor>/`` and ``plugins/image_gen/openai/``
|
||||
layout: ``provider.py`` holds the provider class; ``__init__.py::register``
|
||||
instantiates and registers it via the plugin context.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from plugins.browser.browserbase.provider import BrowserbaseBrowserProvider
|
||||
|
||||
|
||||
def register(ctx) -> None:
|
||||
"""Register the Browserbase provider with the plugin context."""
|
||||
ctx.register_browser_provider(BrowserbaseBrowserProvider())
|
||||
7
plugins/browser/browserbase/plugin.yaml
Normal file
7
plugins/browser/browserbase/plugin.yaml
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
name: browser-browserbase
|
||||
version: 1.0.0
|
||||
description: "Browserbase (https://browserbase.com) cloud browser backend. Requires BROWSERBASE_API_KEY + BROWSERBASE_PROJECT_ID. Supports stealth, proxies, and keep-alive sessions; auto-falls-back when paid features are unavailable."
|
||||
author: NousResearch
|
||||
kind: backend
|
||||
provides_browser_providers:
|
||||
- browserbase
|
||||
|
|
@ -1,4 +1,35 @@
|
|||
"""Browserbase cloud browser provider (direct credentials only)."""
|
||||
"""Browserbase cloud browser provider — plugin form.
|
||||
|
||||
Subclasses :class:`agent.browser_provider.BrowserProvider` (the plugin-facing
|
||||
ABC introduced in PR #25214). The legacy in-tree module
|
||||
``tools.browser_providers.browserbase`` was removed in the same PR; this file
|
||||
is now the canonical implementation.
|
||||
|
||||
Browserbase requires direct ``BROWSERBASE_API_KEY`` and ``BROWSERBASE_PROJECT_ID``
|
||||
credentials. Managed Nous gateway support has been removed — the Nous
|
||||
subscription now routes through Browser Use instead (see
|
||||
``plugins/browser/browser_use/``).
|
||||
|
||||
Config keys this provider responds to::
|
||||
|
||||
browser:
|
||||
cloud_provider: "browserbase"
|
||||
|
||||
Auth env vars::
|
||||
|
||||
BROWSERBASE_API_KEY=... # https://browserbase.com
|
||||
BROWSERBASE_PROJECT_ID=...
|
||||
|
||||
Optional feature knobs::
|
||||
|
||||
BROWSERBASE_BASE_URL=... # default https://api.browserbase.com
|
||||
BROWSERBASE_PROXIES=true # default true
|
||||
BROWSERBASE_ADVANCED_STEALTH=false
|
||||
BROWSERBASE_KEEP_ALIVE=true # default true
|
||||
BROWSERBASE_SESSION_TIMEOUT=... (ms, integer)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
|
@ -7,27 +38,31 @@ from typing import Any, Dict, Optional
|
|||
|
||||
import requests
|
||||
|
||||
from tools.browser_providers.base import CloudBrowserProvider
|
||||
from agent.browser_provider import BrowserProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BrowserbaseProvider(CloudBrowserProvider):
|
||||
class BrowserbaseBrowserProvider(BrowserProvider):
|
||||
"""Browserbase (https://browserbase.com) cloud browser backend.
|
||||
|
||||
This provider requires direct BROWSERBASE_API_KEY and BROWSERBASE_PROJECT_ID
|
||||
credentials. Managed Nous gateway support has been removed — the Nous
|
||||
subscription now routes through Browser Use instead.
|
||||
Direct credentials only — managed-Nous-gateway support lives on the
|
||||
Browser Use provider now.
|
||||
"""
|
||||
|
||||
def provider_name(self) -> str:
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "browserbase"
|
||||
|
||||
@property
|
||||
def display_name(self) -> str:
|
||||
return "Browserbase"
|
||||
|
||||
def is_configured(self) -> bool:
|
||||
def is_available(self) -> bool:
|
||||
return self._get_config_or_none() is not None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Session lifecycle
|
||||
# Config resolution
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _get_config_or_none(self) -> Optional[Dict[str, Any]]:
|
||||
|
|
@ -37,7 +72,9 @@ class BrowserbaseProvider(CloudBrowserProvider):
|
|||
return {
|
||||
"api_key": api_key,
|
||||
"project_id": project_id,
|
||||
"base_url": os.environ.get("BROWSERBASE_BASE_URL", "https://api.browserbase.com").rstrip("/"),
|
||||
"base_url": os.environ.get(
|
||||
"BROWSERBASE_BASE_URL", "https://api.browserbase.com"
|
||||
).rstrip("/"),
|
||||
}
|
||||
return None
|
||||
|
||||
|
|
@ -50,13 +87,21 @@ class BrowserbaseProvider(CloudBrowserProvider):
|
|||
)
|
||||
return config
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Session lifecycle
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def create_session(self, task_id: str) -> Dict[str, object]:
|
||||
config = self._get_config()
|
||||
|
||||
# Optional env-var knobs
|
||||
enable_proxies = os.environ.get("BROWSERBASE_PROXIES", "true").lower() != "false"
|
||||
enable_advanced_stealth = os.environ.get("BROWSERBASE_ADVANCED_STEALTH", "false").lower() == "true"
|
||||
enable_keep_alive = os.environ.get("BROWSERBASE_KEEP_ALIVE", "true").lower() != "false"
|
||||
enable_advanced_stealth = (
|
||||
os.environ.get("BROWSERBASE_ADVANCED_STEALTH", "false").lower() == "true"
|
||||
)
|
||||
enable_keep_alive = (
|
||||
os.environ.get("BROWSERBASE_KEEP_ALIVE", "true").lower() != "false"
|
||||
)
|
||||
custom_timeout_ms = os.environ.get("BROWSERBASE_SESSION_TIMEOUT")
|
||||
|
||||
features_enabled = {
|
||||
|
|
@ -78,7 +123,9 @@ class BrowserbaseProvider(CloudBrowserProvider):
|
|||
if timeout_val > 0:
|
||||
session_config["timeout"] = timeout_val
|
||||
except ValueError:
|
||||
logger.warning("Invalid BROWSERBASE_SESSION_TIMEOUT value: %s", custom_timeout_ms)
|
||||
logger.warning(
|
||||
"Invalid BROWSERBASE_SESSION_TIMEOUT value: %s", custom_timeout_ms
|
||||
)
|
||||
|
||||
if enable_proxies:
|
||||
session_config["proxies"] = True
|
||||
|
|
@ -156,7 +203,9 @@ class BrowserbaseProvider(CloudBrowserProvider):
|
|||
features_enabled["custom_timeout"] = True
|
||||
|
||||
feature_str = ", ".join(k for k, v in features_enabled.items() if v)
|
||||
logger.info("Created Browserbase session %s with features: %s", session_name, feature_str)
|
||||
logger.info(
|
||||
"Created Browserbase session %s with features: %s", session_name, feature_str
|
||||
)
|
||||
|
||||
return {
|
||||
"session_name": session_name,
|
||||
|
|
@ -169,7 +218,9 @@ class BrowserbaseProvider(CloudBrowserProvider):
|
|||
try:
|
||||
config = self._get_config()
|
||||
except ValueError:
|
||||
logger.warning("Cannot close Browserbase session %s — missing credentials", session_id)
|
||||
logger.warning(
|
||||
"Cannot close Browserbase session %s — missing credentials", session_id
|
||||
)
|
||||
return False
|
||||
|
||||
try:
|
||||
|
|
@ -203,7 +254,10 @@ class BrowserbaseProvider(CloudBrowserProvider):
|
|||
def emergency_cleanup(self, session_id: str) -> None:
|
||||
config = self._get_config_or_none()
|
||||
if config is None:
|
||||
logger.warning("Cannot emergency-cleanup Browserbase session %s — missing credentials", session_id)
|
||||
logger.warning(
|
||||
"Cannot emergency-cleanup Browserbase session %s — missing credentials",
|
||||
session_id,
|
||||
)
|
||||
return
|
||||
try:
|
||||
requests.post(
|
||||
|
|
@ -219,4 +273,25 @@ class BrowserbaseProvider(CloudBrowserProvider):
|
|||
timeout=5,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Emergency cleanup failed for Browserbase session %s: %s", session_id, e)
|
||||
logger.debug(
|
||||
"Emergency cleanup failed for Browserbase session %s: %s", session_id, e
|
||||
)
|
||||
|
||||
def get_setup_schema(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": "Browserbase",
|
||||
"badge": "paid",
|
||||
"tag": "Cloud browser with stealth and proxies",
|
||||
"env_vars": [
|
||||
{
|
||||
"key": "BROWSERBASE_API_KEY",
|
||||
"prompt": "Browserbase API key",
|
||||
"url": "https://browserbase.com",
|
||||
},
|
||||
{
|
||||
"key": "BROWSERBASE_PROJECT_ID",
|
||||
"prompt": "Browserbase project ID",
|
||||
},
|
||||
],
|
||||
"post_setup": "agent_browser",
|
||||
}
|
||||
16
plugins/browser/firecrawl/__init__.py
Normal file
16
plugins/browser/firecrawl/__init__.py
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
"""Firecrawl cloud browser plugin — bundled, auto-loaded.
|
||||
|
||||
Distinct from ``plugins/web/firecrawl/`` (the web search/extract/crawl
|
||||
plugin); both share the FIRECRAWL_API_KEY but speak to different endpoints
|
||||
(``/v2/browser`` here vs ``/v2/search`` / ``/v2/scrape`` / ``/v2/crawl``
|
||||
over there).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from plugins.browser.firecrawl.provider import FirecrawlBrowserProvider
|
||||
|
||||
|
||||
def register(ctx) -> None:
|
||||
"""Register the Firecrawl cloud-browser provider with the plugin context."""
|
||||
ctx.register_browser_provider(FirecrawlBrowserProvider())
|
||||
7
plugins/browser/firecrawl/plugin.yaml
Normal file
7
plugins/browser/firecrawl/plugin.yaml
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
name: browser-firecrawl
|
||||
version: 1.0.0
|
||||
description: "Firecrawl (https://firecrawl.dev) cloud browser backend. Requires FIRECRAWL_API_KEY. Distinct from the firecrawl WEB search/extract plugin — the two share an API key but operate on different endpoints."
|
||||
author: NousResearch
|
||||
kind: backend
|
||||
provides_browser_providers:
|
||||
- firecrawl
|
||||
|
|
@ -1,26 +1,61 @@
|
|||
"""Firecrawl cloud browser provider."""
|
||||
"""Firecrawl cloud browser provider — plugin form.
|
||||
|
||||
Subclasses :class:`agent.browser_provider.BrowserProvider` (the plugin-facing
|
||||
ABC introduced in PR #25214). The legacy in-tree module
|
||||
``tools.browser_providers.firecrawl`` was removed in the same PR; this file
|
||||
is now the canonical implementation.
|
||||
|
||||
This is the cloud-browser path — distinct from the firecrawl WEB plugin at
|
||||
``plugins/web/firecrawl/`` which handles search/extract/crawl on
|
||||
``/v2/search`` / ``/v2/scrape`` / ``/v2/crawl``. The two plugins share the
|
||||
``FIRECRAWL_API_KEY`` env var but talk to different endpoints (this one
|
||||
hits ``/v2/browser``).
|
||||
|
||||
Config keys this provider responds to::
|
||||
|
||||
browser:
|
||||
cloud_provider: "firecrawl" # explicit selection only — not in the
|
||||
# legacy auto-detect walk
|
||||
|
||||
Auth env vars::
|
||||
|
||||
FIRECRAWL_API_KEY=... # https://firecrawl.dev
|
||||
FIRECRAWL_API_URL=... # optional override (default https://api.firecrawl.dev)
|
||||
FIRECRAWL_BROWSER_TTL=... # optional, default 300 seconds
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import uuid
|
||||
from typing import Dict
|
||||
from typing import Any, Dict
|
||||
|
||||
import requests
|
||||
|
||||
from tools.browser_providers.base import CloudBrowserProvider
|
||||
from agent.browser_provider import BrowserProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_BASE_URL = "https://api.firecrawl.dev"
|
||||
|
||||
|
||||
class FirecrawlProvider(CloudBrowserProvider):
|
||||
"""Firecrawl (https://firecrawl.dev) cloud browser backend."""
|
||||
class FirecrawlBrowserProvider(BrowserProvider):
|
||||
"""Firecrawl (https://firecrawl.dev) cloud browser backend.
|
||||
|
||||
def provider_name(self) -> str:
|
||||
Cloud-browser path only — search/extract/crawl live in the separate
|
||||
``plugins/web/firecrawl/`` plugin.
|
||||
"""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "firecrawl"
|
||||
|
||||
@property
|
||||
def display_name(self) -> str:
|
||||
return "Firecrawl"
|
||||
|
||||
def is_configured(self) -> bool:
|
||||
def is_available(self) -> bool:
|
||||
return bool(os.environ.get("FIRECRAWL_API_KEY"))
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
|
|
@ -100,13 +135,34 @@ class FirecrawlProvider(CloudBrowserProvider):
|
|||
return False
|
||||
|
||||
def emergency_cleanup(self, session_id: str) -> None:
|
||||
if not self.is_available():
|
||||
logger.warning(
|
||||
"Cannot emergency-cleanup Firecrawl session %s — missing credentials",
|
||||
session_id,
|
||||
)
|
||||
return
|
||||
try:
|
||||
requests.delete(
|
||||
f"{self._api_url()}/v2/browser/{session_id}",
|
||||
headers=self._headers(),
|
||||
timeout=5,
|
||||
)
|
||||
except ValueError:
|
||||
logger.warning("Cannot emergency-cleanup Firecrawl session %s — missing credentials", session_id)
|
||||
except Exception as e:
|
||||
logger.debug("Emergency cleanup failed for Firecrawl session %s: %s", session_id, e)
|
||||
logger.debug(
|
||||
"Emergency cleanup failed for Firecrawl session %s: %s", session_id, e
|
||||
)
|
||||
|
||||
def get_setup_schema(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": "Firecrawl",
|
||||
"badge": "paid",
|
||||
"tag": "Cloud browser with remote execution",
|
||||
"env_vars": [
|
||||
{
|
||||
"key": "FIRECRAWL_API_KEY",
|
||||
"prompt": "Firecrawl API key",
|
||||
"url": "https://firecrawl.dev",
|
||||
},
|
||||
],
|
||||
"post_setup": "agent_browser",
|
||||
}
|
||||
|
|
@ -222,7 +222,7 @@ def _fmt_summary(summary: Dict[str, Any]) -> str:
|
|||
|
||||
def _handle_slash(raw_args: str) -> Optional[str]:
|
||||
argv = raw_args.strip().split()
|
||||
if not argv or argv[0] in ("help", "-h", "--help"):
|
||||
if not argv or argv[0] in {"help", "-h", "--help"}:
|
||||
return _HELP_TEXT
|
||||
|
||||
sub = argv[0]
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ def register(ctx) -> None:
|
|||
# tested path there and guest-join Chromium is flakier. Refuse to register
|
||||
# rather than half-working.
|
||||
system = platform.system().lower()
|
||||
if system not in ("linux", "darwin"):
|
||||
if system not in {"linux", "darwin"}:
|
||||
logger.info(
|
||||
"google_meet plugin: platform=%s not supported (linux/macos only)",
|
||||
system,
|
||||
|
|
|
|||
|
|
@ -159,7 +159,7 @@ def _cmd_setup() -> int:
|
|||
print("---------------------")
|
||||
|
||||
system = _p.system()
|
||||
system_ok = system in ("Linux", "Darwin")
|
||||
system_ok = system in {"Linux", "Darwin"}
|
||||
print(f" platform : {system} [{'ok' if system_ok else 'unsupported'}]")
|
||||
|
||||
try:
|
||||
|
|
@ -231,7 +231,7 @@ def _cmd_install(*, realtime: bool, assume_yes: bool) -> int:
|
|||
import subprocess as _sp
|
||||
|
||||
system = _p.system()
|
||||
if system not in ("Linux", "Darwin"):
|
||||
if system not in {"Linux", "Darwin"}:
|
||||
print(f"google_meet install: {system} is not supported (linux/macos only)")
|
||||
return 1
|
||||
|
||||
|
|
@ -242,7 +242,7 @@ def _cmd_install(*, realtime: bool, assume_yes: bool) -> int:
|
|||
ans = input(f"{prompt} [y/N] ").strip().lower()
|
||||
except EOFError:
|
||||
return False
|
||||
return ans in ("y", "yes")
|
||||
return ans in {"y", "yes"}
|
||||
|
||||
print("google_meet install")
|
||||
print("-------------------")
|
||||
|
|
|
|||
|
|
@ -447,7 +447,7 @@ def _mac_audio_device_index(device_name: str) -> str:
|
|||
def run_bot() -> int: # noqa: C901 — orchestration, explicit branches
|
||||
url = os.environ.get("HERMES_MEET_URL", "").strip()
|
||||
out_dir_env = os.environ.get("HERMES_MEET_OUT_DIR", "").strip()
|
||||
headed = os.environ.get("HERMES_MEET_HEADED", "").lower() in ("1", "true", "yes")
|
||||
headed = os.environ.get("HERMES_MEET_HEADED", "").lower() in {"1", "true", "yes"}
|
||||
auth_state = os.environ.get("HERMES_MEET_AUTH_STATE", "").strip()
|
||||
guest_name = os.environ.get("HERMES_MEET_GUEST_NAME", "Hermes Agent")
|
||||
duration_s = _parse_duration(os.environ.get("HERMES_MEET_DURATION", ""))
|
||||
|
|
@ -808,7 +808,7 @@ def _looks_like_human_speaker(speaker: str, bot_guest_name: str) -> bool:
|
|||
if not speaker or not speaker.strip():
|
||||
return False
|
||||
spk = speaker.strip().lower()
|
||||
if spk in ("unknown", "you", bot_guest_name.strip().lower()):
|
||||
if spk in {"unknown", "you", bot_guest_name.strip().lower()}:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
|
|
|||
|
|
@ -103,7 +103,7 @@ def node_command(args: argparse.Namespace) -> int:
|
|||
print(f"removed {args.name!r}" if ok else f"no such node: {args.name!r}")
|
||||
return 0 if ok else 1
|
||||
|
||||
if cmd in ("status", "ping"):
|
||||
if cmd in {"status", "ping"}:
|
||||
entry = reg.get(args.name)
|
||||
if entry is None:
|
||||
print(f"no such node: {args.name!r}", file=sys.stderr)
|
||||
|
|
|
|||
|
|
@ -183,7 +183,7 @@ class RealtimeSession:
|
|||
rid = (frame.get("response") or {}).get("id")
|
||||
if rid:
|
||||
self._last_response_id = rid
|
||||
elif ftype in ("response.done", "response.completed", "response.cancelled"):
|
||||
elif ftype in {"response.done", "response.completed", "response.cancelled"}:
|
||||
break
|
||||
elif ftype == "error":
|
||||
err = frame.get("error") or frame
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ def check_meet_requirements() -> bool:
|
|||
handlers relax the requirement when a node is addressed.
|
||||
"""
|
||||
import platform as _p
|
||||
if _p.system().lower() not in ("linux", "darwin"):
|
||||
if _p.system().lower() not in {"linux", "darwin"}:
|
||||
return False
|
||||
try:
|
||||
import playwright # noqa: F401
|
||||
|
|
@ -238,7 +238,7 @@ def handle_meet_join(args: Dict[str, Any], **_kw) -> str:
|
|||
if not url:
|
||||
return _err("url is required")
|
||||
mode = (args.get("mode") or "transcribe").strip().lower()
|
||||
if mode not in ("transcribe", "realtime"):
|
||||
if mode not in {"transcribe", "realtime"}:
|
||||
return _err(f"mode must be 'transcribe' or 'realtime' (got {mode!r})")
|
||||
|
||||
node = args.get("node")
|
||||
|
|
|
|||
359
plugins/kanban/dashboard/dist/index.js
vendored
359
plugins/kanban/dashboard/dist/index.js
vendored
|
|
@ -908,6 +908,7 @@
|
|||
return createNewBoard(payload).then(function () { setShowNewBoard(false); });
|
||||
},
|
||||
}) : null,
|
||||
h(OrchestrationPanel, null),
|
||||
h(AttentionStrip, {
|
||||
boardData,
|
||||
onOpen: setSelectedTaskId,
|
||||
|
|
@ -1386,6 +1387,288 @@
|
|||
}, "?");
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// OrchestrationPanel — collapsible settings panel for the kanban
|
||||
// orchestrator (orchestrator profile picker, default assignee picker,
|
||||
// auto-decompose toggle, plus per-profile description editing with
|
||||
// auto-generate). Backed by /orchestration + /profiles endpoints.
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
function OrchestrationPanel() {
|
||||
const [expanded, setExpanded] = useState(false);
|
||||
const [settings, setSettings] = useState(null);
|
||||
const [profiles, setProfiles] = useState([]);
|
||||
const [busy, setBusy] = useState({});
|
||||
const [msg, setMsg] = useState(null);
|
||||
|
||||
const loadAll = useCallback(function () {
|
||||
Promise.all([
|
||||
SDK.fetchJSON(`${API}/orchestration`),
|
||||
SDK.fetchJSON(`${API}/profiles`),
|
||||
]).then(function (results) {
|
||||
setSettings(results[0] || null);
|
||||
setProfiles((results[1] && results[1].profiles) || []);
|
||||
setMsg(null);
|
||||
}).catch(function (err) {
|
||||
setMsg({ ok: false, text: "Failed to load: " + (err.message || String(err)) });
|
||||
});
|
||||
}, []);
|
||||
|
||||
useEffect(function () {
|
||||
// Load on mount so the collapsed pill shows the real mode without
|
||||
// requiring the user to expand the panel first.
|
||||
if (settings === null) loadAll();
|
||||
}, [settings, loadAll]);
|
||||
|
||||
const saveSettings = function (patch) {
|
||||
setMsg(null);
|
||||
return SDK.fetchJSON(`${API}/orchestration`, {
|
||||
method: "PUT",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(patch),
|
||||
}).then(function (res) {
|
||||
setSettings(res);
|
||||
setMsg({ ok: true, text: "Settings saved." });
|
||||
return res;
|
||||
}).catch(function (err) {
|
||||
setMsg({ ok: false, text: "Save failed: " + (err.message || String(err)) });
|
||||
});
|
||||
};
|
||||
|
||||
const saveProfileDescription = function (name, description) {
|
||||
setBusy(function (b) { return Object.assign({}, b, { [name]: "save" }); });
|
||||
return SDK.fetchJSON(`${API}/profiles/${encodeURIComponent(name)}`, {
|
||||
method: "PATCH",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ description: description }),
|
||||
}).then(function () {
|
||||
loadAll();
|
||||
setMsg({ ok: true, text: `Description saved for ${name}.` });
|
||||
}).catch(function (err) {
|
||||
setMsg({ ok: false, text: "Save failed: " + (err.message || String(err)) });
|
||||
}).then(function () {
|
||||
setBusy(function (b) {
|
||||
const next = Object.assign({}, b); delete next[name]; return next;
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
const autoGenerateDescription = function (name, overwrite) {
|
||||
setBusy(function (b) { return Object.assign({}, b, { [name]: "auto" }); });
|
||||
return SDK.fetchJSON(`${API}/profiles/${encodeURIComponent(name)}/describe-auto`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ overwrite: !!overwrite }),
|
||||
}).then(function (res) {
|
||||
if (res && res.ok) {
|
||||
loadAll();
|
||||
setMsg({ ok: true, text: `Auto-generated description for ${name}.` });
|
||||
} else {
|
||||
setMsg({
|
||||
ok: false,
|
||||
text: "Auto-generate failed: " + ((res && res.reason) || "unknown error"),
|
||||
});
|
||||
}
|
||||
}).catch(function (err) {
|
||||
setMsg({ ok: false, text: "Auto-generate failed: " + (err.message || String(err)) });
|
||||
}).then(function () {
|
||||
setBusy(function (b) {
|
||||
const next = Object.assign({}, b); delete next[name]; return next;
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
const headerLabel = expanded
|
||||
? "▾ Orchestration settings"
|
||||
: "▸ Orchestration settings";
|
||||
|
||||
// Mode pill — always visible (collapsed or expanded). One click flips
|
||||
// between Auto and Manual. Auto = dispatcher decomposes new triage tasks
|
||||
// every tick. Manual = pre-PR behavior, the user clicks ⚗ Decompose on
|
||||
// each triage card (or runs `hermes kanban decompose <id>`) and tasks
|
||||
// stay in triage until then.
|
||||
const autoOn = !!(settings && settings.auto_decompose);
|
||||
const modePillTitle = settings === null
|
||||
? "Loading mode…"
|
||||
: (autoOn
|
||||
? "Orchestration: Auto — the dispatcher decomposes new triage tasks automatically every tick. Click to switch to Manual (pre-PR behavior)."
|
||||
: "Orchestration: Manual — triage tasks stay in triage until you click ⚗ Decompose on each card. Click to switch to Auto.");
|
||||
const modePill = h("button", {
|
||||
type: "button",
|
||||
onClick: function () {
|
||||
if (settings === null) return; // not loaded yet
|
||||
saveSettings({ auto_decompose: !autoOn });
|
||||
},
|
||||
disabled: settings === null,
|
||||
title: modePillTitle,
|
||||
className: "inline-flex items-center gap-1 rounded-full border px-2 py-0.5 "
|
||||
+ "text-xs font-medium "
|
||||
+ (autoOn
|
||||
? "border-emerald-500/40 bg-emerald-500/10 text-emerald-700 dark:text-emerald-300"
|
||||
: "border-muted-foreground/30 bg-muted/30 text-muted-foreground"),
|
||||
},
|
||||
"Orchestration: ",
|
||||
h("span", { className: "ml-1 font-semibold" },
|
||||
settings === null ? "…" : (autoOn ? "Auto" : "Manual"))
|
||||
);
|
||||
|
||||
if (!expanded) {
|
||||
return h("div", { className: "flex items-center gap-3 text-xs" },
|
||||
modePill,
|
||||
h("button", {
|
||||
type: "button",
|
||||
onClick: function () { setExpanded(true); },
|
||||
className: "underline text-muted-foreground hover:text-foreground",
|
||||
title: "Configure the kanban orchestrator (profile picker, default assignee, auto-decompose, profile descriptions)",
|
||||
}, headerLabel),
|
||||
);
|
||||
}
|
||||
|
||||
const profileOptions = profiles.map(function (p) {
|
||||
const tag = p.is_default ? " (default)" : "";
|
||||
return h(SelectOption, { key: p.name, value: p.name }, p.name + tag);
|
||||
});
|
||||
|
||||
return h(Card, { className: "p-3" },
|
||||
h(CardContent, { className: "p-2 flex flex-col gap-3" },
|
||||
h("div", { className: "flex items-center justify-between" },
|
||||
h("button", {
|
||||
type: "button",
|
||||
onClick: function () { setExpanded(false); },
|
||||
className: "text-sm font-medium underline-offset-2 hover:underline",
|
||||
}, headerLabel),
|
||||
modePill,
|
||||
h(Button, { onClick: loadAll, size: "sm" }, "Reload"),
|
||||
),
|
||||
msg ? h("div", {
|
||||
className: msg.ok ? "hermes-kanban-msg-ok" : "hermes-kanban-msg-err",
|
||||
}, msg.text) : null,
|
||||
|
||||
settings ? h("div", { className: "grid gap-3 sm:grid-cols-3" },
|
||||
h("div", { className: "flex flex-col gap-1" },
|
||||
h(Label, { className: "text-xs text-muted-foreground" },
|
||||
"Orchestrator profile"),
|
||||
h(Select, {
|
||||
value: settings.orchestrator_profile || "",
|
||||
className: "h-8",
|
||||
onChange: function (e) {
|
||||
const v = (e && e.target ? e.target.value : e) || "";
|
||||
saveSettings({ orchestrator_profile: v });
|
||||
},
|
||||
},
|
||||
h(SelectOption, { value: "" },
|
||||
"(default: " + (settings.active_profile || "default") + ")"),
|
||||
profileOptions,
|
||||
),
|
||||
h("div", { className: "text-[10px] text-muted-foreground" },
|
||||
"Resolved: " + (settings.resolved_orchestrator_profile || "default")),
|
||||
),
|
||||
h("div", { className: "flex flex-col gap-1" },
|
||||
h(Label, { className: "text-xs text-muted-foreground" },
|
||||
"Default assignee"),
|
||||
h(Select, {
|
||||
value: settings.default_assignee || "",
|
||||
className: "h-8",
|
||||
onChange: function (e) {
|
||||
const v = (e && e.target ? e.target.value : e) || "";
|
||||
saveSettings({ default_assignee: v });
|
||||
},
|
||||
},
|
||||
h(SelectOption, { value: "" },
|
||||
"(default: " + (settings.active_profile || "default") + ")"),
|
||||
profileOptions,
|
||||
),
|
||||
h("div", { className: "text-[10px] text-muted-foreground" },
|
||||
"Resolved: " + (settings.resolved_default_assignee || "default")),
|
||||
),
|
||||
h("div", { className: "flex flex-col gap-1" },
|
||||
h(Label, { className: "text-xs text-muted-foreground" },
|
||||
"Orchestration mode"),
|
||||
h("label", { className: "flex items-center gap-2 text-xs h-8" },
|
||||
h("input", {
|
||||
type: "checkbox",
|
||||
checked: !!settings.auto_decompose,
|
||||
onChange: function (e) {
|
||||
saveSettings({ auto_decompose: !!e.target.checked });
|
||||
},
|
||||
}),
|
||||
settings.auto_decompose ? "Auto (default)" : "Manual",
|
||||
),
|
||||
h("div", { className: "text-[10px] text-muted-foreground" },
|
||||
"When on, the dispatcher decomposes new triage tasks automatically."),
|
||||
),
|
||||
) : h("div", { className: "text-xs text-muted-foreground" },
|
||||
"Loading…"),
|
||||
|
||||
h("div", { className: "border-t pt-3" },
|
||||
h(Label, { className: "text-xs text-muted-foreground" },
|
||||
"Profile descriptions"),
|
||||
h("div", { className: "text-[10px] text-muted-foreground pb-2" },
|
||||
"Descriptions guide the orchestrator's routing. Click ⚗ to auto-generate, or edit and save."),
|
||||
profiles.length === 0
|
||||
? h("div", { className: "text-xs text-muted-foreground" }, "No profiles installed.")
|
||||
: h("div", { className: "flex flex-col gap-2" },
|
||||
profiles.map(function (p) {
|
||||
return h(ProfileDescriptionRow, {
|
||||
key: p.name,
|
||||
profile: p,
|
||||
busy: busy[p.name] || null,
|
||||
onSave: saveProfileDescription,
|
||||
onAuto: autoGenerateDescription,
|
||||
});
|
||||
}),
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
function ProfileDescriptionRow(props) {
|
||||
const p = props.profile;
|
||||
const [draft, setDraft] = useState(p.description || "");
|
||||
const busy = props.busy;
|
||||
// Re-sync the local draft if the server-side description changes (e.g.
|
||||
// after auto-generate). Cheap because re-runs only happen on prop change.
|
||||
useEffect(function () {
|
||||
setDraft(p.description || "");
|
||||
}, [p.description]);
|
||||
|
||||
const tag = p.description_auto && p.description ? " [auto, review]" : "";
|
||||
return h("div", { className: "flex flex-col gap-1 border-l-2 pl-2",
|
||||
style: { borderColor: p.description ? "#888" : "#cc6" } },
|
||||
h("div", { className: "flex items-center gap-2 text-xs" },
|
||||
h("span", { className: "font-medium" }, p.name),
|
||||
p.is_default ? h("span", { className: "text-[10px] text-muted-foreground" }, "(default)") : null,
|
||||
p.description_auto && p.description
|
||||
? h("span", { className: "text-[10px] text-yellow-600" }, "auto — review")
|
||||
: null,
|
||||
!p.description
|
||||
? h("span", { className: "text-[10px] text-yellow-600" }, "⚠ no description")
|
||||
: null,
|
||||
),
|
||||
h("div", { className: "flex items-center gap-2" },
|
||||
h(Input, {
|
||||
value: draft,
|
||||
onChange: function (e) { setDraft(e.target.value); },
|
||||
placeholder: "What is this profile good at?",
|
||||
className: "h-7 text-xs flex-1",
|
||||
}),
|
||||
h(Button, {
|
||||
onClick: function () { props.onSave(p.name, draft); },
|
||||
size: "sm",
|
||||
disabled: !!busy || draft === (p.description || ""),
|
||||
title: "Save the description above as user-authored",
|
||||
}, busy === "save" ? "Saving…" : "Save"),
|
||||
h(Button, {
|
||||
onClick: function () { props.onAuto(p.name, true); },
|
||||
size: "sm",
|
||||
disabled: !!busy,
|
||||
title: "Auto-generate a description from this profile's skills and model",
|
||||
}, busy === "auto" ? "Generating…" : "⚗ Auto"),
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
function BoardSwitcher(props) {
|
||||
const { t } = useI18n();
|
||||
const list = props.boardList || [];
|
||||
|
|
@ -2395,6 +2678,25 @@
|
|||
});
|
||||
};
|
||||
|
||||
// POST /tasks/:id/decompose — fan a triage task out into a graph
|
||||
// of child tasks routed to specialist profiles by description.
|
||||
// Refreshes both the drawer (so the user sees the root flip to
|
||||
// todo) and the board (so the new children appear in the columns).
|
||||
const doDecompose = function () {
|
||||
return SDK.fetchJSON(
|
||||
withBoard(`${API}/tasks/${encodeURIComponent(props.taskId)}/decompose`, boardSlug),
|
||||
{
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({}),
|
||||
}
|
||||
).then(function (res) {
|
||||
load();
|
||||
props.onRefresh();
|
||||
return res;
|
||||
});
|
||||
};
|
||||
|
||||
const addLink = function (parentId) {
|
||||
return SDK.fetchJSON(withBoard(`${API}/links`, boardSlug), {
|
||||
method: "POST",
|
||||
|
|
@ -2486,6 +2788,7 @@
|
|||
boardSlug: boardSlug,
|
||||
onPatch: doPatch,
|
||||
onSpecify: doSpecify,
|
||||
onDecompose: doDecompose,
|
||||
onAddParent: addLink,
|
||||
onRemoveParent: removeLink,
|
||||
onAddChild: addChild,
|
||||
|
|
@ -2559,6 +2862,7 @@
|
|||
task: t,
|
||||
onPatch: props.onPatch,
|
||||
onSpecify: props.onSpecify,
|
||||
onDecompose: props.onDecompose,
|
||||
}),
|
||||
h(DiagnosticsSection, {
|
||||
task: t,
|
||||
|
|
@ -3023,6 +3327,8 @@
|
|||
const task = props.task;
|
||||
const [specifyBusy, setSpecifyBusy] = useState(false);
|
||||
const [specifyMsg, setSpecifyMsg] = useState(null);
|
||||
const [decomposeBusy, setDecomposeBusy] = useState(false);
|
||||
const [decomposeMsg, setDecomposeMsg] = useState(null);
|
||||
const b = function (label, patch, enabled, confirmMsg) {
|
||||
return h(Button, {
|
||||
onClick: function () { if (enabled !== false) props.onPatch(patch, { confirm: confirmMsg }); },
|
||||
|
|
@ -3067,9 +3373,57 @@
|
|||
}, specifyBusy ? "Specifying…" : "✨ Specify")
|
||||
: null;
|
||||
|
||||
// "Decompose" is the orchestrator-driven fan-out. Like Specify, only
|
||||
// makes sense on triage-column tasks — elsewhere the backend short-
|
||||
// circuits with ok:false. When the orchestrator returns fanout:false
|
||||
// we render the same single-task message as Specify; when it fans
|
||||
// out we report the child count for quick at-a-glance verification.
|
||||
const decomposeButton = (task.status === "triage" && props.onDecompose)
|
||||
? h(Button, {
|
||||
onClick: function () {
|
||||
if (decomposeBusy) return;
|
||||
setDecomposeBusy(true);
|
||||
setDecomposeMsg(null);
|
||||
props.onDecompose().then(function (res) {
|
||||
if (res && res.ok) {
|
||||
if (res.fanout && res.child_ids && res.child_ids.length) {
|
||||
setDecomposeMsg({
|
||||
ok: true,
|
||||
text: `Decomposed into ${res.child_ids.length} children: ${res.child_ids.join(", ")}`,
|
||||
});
|
||||
} else {
|
||||
const suffix = res.new_title
|
||||
? ` — retitled: ${res.new_title}`
|
||||
: "";
|
||||
setDecomposeMsg({
|
||||
ok: true,
|
||||
text: `Single task (no fanout)${suffix}`,
|
||||
});
|
||||
}
|
||||
} else {
|
||||
setDecomposeMsg({
|
||||
ok: false,
|
||||
text: "Decompose failed: " + ((res && res.reason) || "unknown error"),
|
||||
});
|
||||
}
|
||||
}).catch(function (err) {
|
||||
setDecomposeMsg({
|
||||
ok: false,
|
||||
text: "Decompose failed: " + (err.message || String(err)),
|
||||
});
|
||||
}).then(function () {
|
||||
setDecomposeBusy(false);
|
||||
});
|
||||
},
|
||||
disabled: decomposeBusy,
|
||||
size: "sm",
|
||||
}, decomposeBusy ? "Decomposing…" : "⚗ Decompose")
|
||||
: null;
|
||||
|
||||
return h("div", null,
|
||||
h("div", { className: "hermes-kanban-actions" },
|
||||
specifyButton,
|
||||
decomposeButton,
|
||||
b("→ triage", { status: "triage" }, task.status !== "triage"),
|
||||
b("→ ready", { status: "ready" }, task.status !== "ready"),
|
||||
// No direct → running button: /tasks/:id PATCH rejects status=running
|
||||
|
|
@ -3091,6 +3445,11 @@
|
|||
? "hermes-kanban-msg-ok"
|
||||
: "hermes-kanban-msg-err",
|
||||
}, specifyMsg.text) : null,
|
||||
decomposeMsg ? h("div", {
|
||||
className: decomposeMsg.ok
|
||||
? "hermes-kanban-msg-ok"
|
||||
: "hermes-kanban-msg-err",
|
||||
}, decomposeMsg.text) : null,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -628,7 +628,7 @@ def update_task(task_id: str, payload: UpdateTaskBody, board: Optional[str] = Qu
|
|||
status_code=400,
|
||||
detail="Cannot set status to 'running' directly; use the dispatcher/claim path",
|
||||
)
|
||||
elif s in ("todo", "triage"):
|
||||
elif s in {"todo", "triage"}:
|
||||
ok = _set_status_direct(conn, task_id, s)
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail=f"unknown status: {s}")
|
||||
|
|
@ -742,7 +742,7 @@ def _set_status_direct(
|
|||
(task_id, run_id, json.dumps({"status": new_status}), int(time.time())),
|
||||
)
|
||||
# If we re-opened something, children may have gone stale.
|
||||
if new_status in ("done", "ready"):
|
||||
if new_status in {"done", "ready"}:
|
||||
kanban_db.recompute_ready(conn)
|
||||
return True
|
||||
|
||||
|
|
@ -868,7 +868,7 @@ def bulk_update(payload: BulkTaskBody, board: Optional[str] = Query(None)):
|
|||
ok = kanban_db.unblock_task(conn, tid)
|
||||
else:
|
||||
ok = _set_status_direct(conn, tid, "ready")
|
||||
elif s in ("todo", "running", "triage"):
|
||||
elif s in {"todo", "running", "triage"}:
|
||||
ok = _set_status_direct(conn, tid, s)
|
||||
else:
|
||||
entry.update(ok=False, error=f"unknown status {s!r}")
|
||||
|
|
@ -1535,6 +1535,279 @@ def switch_board(slug: str):
|
|||
_EVENT_POLL_SECONDS = 0.3
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Profile metadata & description editing (consumed by the kanban orchestrator)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class DescribeBody(BaseModel):
|
||||
description: Optional[str] = None # explicit user-authored text
|
||||
|
||||
|
||||
class DescribeAutoBody(BaseModel):
|
||||
overwrite: bool = False
|
||||
|
||||
|
||||
@router.get("/profiles")
|
||||
def list_profile_roster():
|
||||
"""Return every installed profile with its description.
|
||||
|
||||
Consumed by the dashboard's settings panel (orchestrator picker)
|
||||
and the profile-description editing UI. Profiles without a
|
||||
description still appear here — they're routable on name alone,
|
||||
just less precisely.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli import profiles as profiles_mod
|
||||
profiles = profiles_mod.list_profiles()
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=500, detail=f"failed to list profiles: {exc}")
|
||||
return {
|
||||
"profiles": [
|
||||
{
|
||||
"name": p.name,
|
||||
"is_default": bool(p.is_default),
|
||||
"model": p.model or "",
|
||||
"provider": p.provider or "",
|
||||
"description": p.description or "",
|
||||
"description_auto": bool(p.description_auto),
|
||||
"skill_count": int(p.skill_count or 0),
|
||||
}
|
||||
for p in profiles
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
@router.patch("/profiles/{profile_name}")
|
||||
def update_profile_description(profile_name: str, payload: DescribeBody):
|
||||
"""Set or clear the description of a profile.
|
||||
|
||||
Empty string clears the description; non-empty stores it as a
|
||||
user-authored description (``description_auto: false``) so the
|
||||
auto-describer won't overwrite it on a sweep without
|
||||
``--overwrite``.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli import profiles as profiles_mod
|
||||
canon = profiles_mod.normalize_profile_name(profile_name)
|
||||
if canon == "default":
|
||||
from hermes_constants import get_hermes_home # type: ignore
|
||||
from pathlib import Path as _Path
|
||||
profile_dir = _Path(get_hermes_home())
|
||||
else:
|
||||
profile_dir = profiles_mod.get_profile_dir(canon)
|
||||
if not profile_dir.is_dir():
|
||||
raise HTTPException(status_code=404, detail=f"profile '{profile_name}' not found")
|
||||
text = (payload.description or "").strip()
|
||||
profiles_mod.write_profile_meta(
|
||||
profile_dir,
|
||||
description=text,
|
||||
description_auto=False,
|
||||
)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=500, detail=f"failed to update profile: {exc}")
|
||||
return {"ok": True, "profile": canon, "description": text}
|
||||
|
||||
|
||||
@router.post("/profiles/{profile_name}/describe-auto")
|
||||
def auto_describe_profile(profile_name: str, payload: DescribeAutoBody):
|
||||
"""Generate a description for the named profile via the auxiliary
|
||||
LLM (``auxiliary.profile_describer``). Persists with
|
||||
``description_auto: true`` so the dashboard can surface a "review"
|
||||
badge.
|
||||
|
||||
Maps 1:1 to ``hermes profile describe <name> --auto``. Non-OK
|
||||
outcomes are NOT HTTP errors — the UI renders the reason inline
|
||||
(e.g. "no auxiliary client configured") so the operator can fix
|
||||
config and retry without a page reload.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli import profile_describer # noqa: WPS433 (intentional)
|
||||
outcome = profile_describer.describe_profile(
|
||||
profile_name,
|
||||
overwrite=bool(payload.overwrite),
|
||||
)
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=500, detail=f"describer crashed: {exc}")
|
||||
return {
|
||||
"ok": bool(outcome.ok),
|
||||
"profile": outcome.profile_name,
|
||||
"reason": outcome.reason,
|
||||
"description": outcome.description,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Decompose endpoint (orchestrator-driven fan-out)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class DecomposeBody(BaseModel):
|
||||
author: Optional[str] = None
|
||||
|
||||
|
||||
@router.post("/tasks/{task_id}/decompose")
|
||||
def decompose_task_endpoint(
|
||||
task_id: str,
|
||||
payload: DecomposeBody,
|
||||
board: Optional[str] = Query(None),
|
||||
):
|
||||
"""Fan a triage-column task out into a graph of child tasks via the
|
||||
auxiliary LLM, routed to specialist profiles by description. Maps
|
||||
1:1 to ``hermes kanban decompose <task_id>``.
|
||||
|
||||
Returns the outcome shape used by the CLI: ``{ok, task_id, reason,
|
||||
fanout, child_ids, new_title}``. A non-OK outcome is NOT an HTTP
|
||||
error — the UI renders the reason inline.
|
||||
|
||||
Runs in FastAPI's threadpool (sync ``def``) because the LLM call
|
||||
can take minutes on reasoning models.
|
||||
"""
|
||||
board = _resolve_board(board)
|
||||
prev_env = os.environ.get("HERMES_KANBAN_BOARD")
|
||||
try:
|
||||
os.environ["HERMES_KANBAN_BOARD"] = board or kanban_db.DEFAULT_BOARD
|
||||
from hermes_cli import kanban_decompose # noqa: WPS433 (intentional)
|
||||
outcome = kanban_decompose.decompose_task(
|
||||
task_id,
|
||||
author=(payload.author or None),
|
||||
)
|
||||
finally:
|
||||
if prev_env is None:
|
||||
os.environ.pop("HERMES_KANBAN_BOARD", None)
|
||||
else:
|
||||
os.environ["HERMES_KANBAN_BOARD"] = prev_env
|
||||
|
||||
return {
|
||||
"ok": bool(outcome.ok),
|
||||
"task_id": outcome.task_id,
|
||||
"reason": outcome.reason,
|
||||
"fanout": bool(outcome.fanout),
|
||||
"child_ids": outcome.child_ids or [],
|
||||
"new_title": outcome.new_title,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Orchestration settings (kanban.orchestrator_profile / default_assignee /
|
||||
# auto_decompose) — surfaced to the dashboard's settings panel
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class OrchestrationSettingsBody(BaseModel):
|
||||
orchestrator_profile: Optional[str] = None
|
||||
default_assignee: Optional[str] = None
|
||||
auto_decompose: Optional[bool] = None
|
||||
|
||||
|
||||
@router.get("/orchestration")
|
||||
def get_orchestration_settings():
|
||||
"""Return the current kanban orchestration knobs from config.yaml
|
||||
plus the resolved effective values (filling in fallbacks)."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config() or {}
|
||||
except Exception:
|
||||
cfg = {}
|
||||
kanban_cfg = (cfg.get("kanban") or {}) if isinstance(cfg, dict) else {}
|
||||
explicit_orch = (kanban_cfg.get("orchestrator_profile") or "").strip()
|
||||
explicit_default = (kanban_cfg.get("default_assignee") or "").strip()
|
||||
auto_decompose = bool(kanban_cfg.get("auto_decompose", True))
|
||||
|
||||
# Resolve fallbacks the same way the decomposer does.
|
||||
resolved_orch = explicit_orch
|
||||
resolved_default = explicit_default
|
||||
try:
|
||||
from hermes_cli import profiles as profiles_mod
|
||||
active_default = profiles_mod.get_active_profile_name() or "default"
|
||||
if not resolved_orch or not profiles_mod.profile_exists(resolved_orch):
|
||||
resolved_orch = active_default
|
||||
if not resolved_default or not profiles_mod.profile_exists(resolved_default):
|
||||
resolved_default = active_default
|
||||
except Exception:
|
||||
active_default = "default"
|
||||
if not resolved_orch:
|
||||
resolved_orch = active_default
|
||||
if not resolved_default:
|
||||
resolved_default = active_default
|
||||
|
||||
return {
|
||||
"orchestrator_profile": explicit_orch,
|
||||
"default_assignee": explicit_default,
|
||||
"auto_decompose": auto_decompose,
|
||||
"resolved_orchestrator_profile": resolved_orch,
|
||||
"resolved_default_assignee": resolved_default,
|
||||
"active_profile": active_default,
|
||||
}
|
||||
|
||||
|
||||
@router.put("/orchestration")
|
||||
def set_orchestration_settings(payload: OrchestrationSettingsBody):
|
||||
"""Update the kanban orchestration knobs in ~/.hermes/config.yaml.
|
||||
|
||||
Each field is optional — only fields explicitly passed are
|
||||
written. ``orchestrator_profile`` / ``default_assignee`` accept
|
||||
empty strings to clear the override and fall back to the default
|
||||
profile.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config, save_config
|
||||
cfg = load_config() or {}
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=500, detail=f"failed to load config: {exc}")
|
||||
|
||||
kanban_section = cfg.setdefault("kanban", {})
|
||||
if not isinstance(kanban_section, dict):
|
||||
kanban_section = {}
|
||||
cfg["kanban"] = kanban_section
|
||||
|
||||
# Validate any non-empty profile names exist before saving.
|
||||
try:
|
||||
from hermes_cli import profiles as profiles_mod
|
||||
except Exception:
|
||||
profiles_mod = None # type: ignore
|
||||
|
||||
if payload.orchestrator_profile is not None:
|
||||
name = (payload.orchestrator_profile or "").strip()
|
||||
if name and profiles_mod is not None:
|
||||
try:
|
||||
if not profiles_mod.profile_exists(name):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"profile '{name}' does not exist",
|
||||
)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception:
|
||||
pass # fail open if the lookup itself errors
|
||||
kanban_section["orchestrator_profile"] = name
|
||||
|
||||
if payload.default_assignee is not None:
|
||||
name = (payload.default_assignee or "").strip()
|
||||
if name and profiles_mod is not None:
|
||||
try:
|
||||
if not profiles_mod.profile_exists(name):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"profile '{name}' does not exist",
|
||||
)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception:
|
||||
pass
|
||||
kanban_section["default_assignee"] = name
|
||||
|
||||
if payload.auto_decompose is not None:
|
||||
kanban_section["auto_decompose"] = bool(payload.auto_decompose)
|
||||
|
||||
try:
|
||||
save_config(cfg)
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=500, detail=f"failed to save config: {exc}")
|
||||
|
||||
# Echo back the resolved state (callers usually re-render from it).
|
||||
return get_orchestration_settings()
|
||||
|
||||
|
||||
@router.websocket("/events")
|
||||
async def stream_events(ws: WebSocket):
|
||||
# Enforce the dashboard session token as a query param — browsers can't
|
||||
|
|
|
|||
|
|
@ -263,7 +263,7 @@ class ByteRoverMemoryProvider(MemoryProvider):
|
|||
|
||||
def on_memory_write(self, action: str, target: str, content: str) -> None:
|
||||
"""Mirror built-in memory writes to ByteRover."""
|
||||
if action not in ("add", "replace") or not content:
|
||||
if action not in {"add", "replace"} or not content:
|
||||
return
|
||||
|
||||
def _write():
|
||||
|
|
@ -289,7 +289,7 @@ class ByteRoverMemoryProvider(MemoryProvider):
|
|||
for msg in messages[-10:]: # last 10 messages
|
||||
role = msg.get("role", "")
|
||||
content = msg.get("content", "")
|
||||
if isinstance(content, str) and content.strip() and role in ("user", "assistant"):
|
||||
if isinstance(content, str) and content.strip() and role in {"user", "assistant"}:
|
||||
parts.append(f"{role}: {content[:500]}")
|
||||
|
||||
if not parts:
|
||||
|
|
|
|||
|
|
@ -416,7 +416,7 @@ def _build_embedded_profile_env(config: dict[str, Any], *, llm_api_key: str | No
|
|||
current_base_url = config.get("llm_base_url") or os.environ.get("HINDSIGHT_API_LLM_BASE_URL", "")
|
||||
|
||||
# The embedded daemon expects OpenAI wire format for these providers.
|
||||
daemon_provider = "openai" if current_provider in ("openai_compatible", "openrouter") else current_provider
|
||||
daemon_provider = "openai" if current_provider in {"openai_compatible", "openrouter"} else current_provider
|
||||
|
||||
env_values = {
|
||||
"HINDSIGHT_API_LLM_PROVIDER": str(daemon_provider),
|
||||
|
|
@ -596,7 +596,7 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
try:
|
||||
cfg = _load_config()
|
||||
mode = cfg.get("mode", "cloud")
|
||||
if mode in ("local", "local_embedded"):
|
||||
if mode in {"local", "local_embedded"}:
|
||||
available, _ = _check_local_runtime()
|
||||
return available
|
||||
if mode == "local_external":
|
||||
|
|
@ -888,7 +888,7 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
from hindsight import HindsightEmbedded
|
||||
HindsightEmbedded.__del__ = lambda self: None
|
||||
llm_provider = self._config.get("llm_provider", "")
|
||||
if llm_provider in ("openai_compatible", "openrouter"):
|
||||
if llm_provider in {"openai_compatible", "openrouter"}:
|
||||
llm_provider = "openai"
|
||||
logger.debug("Creating HindsightEmbedded client (profile=%s, provider=%s)",
|
||||
self._config.get("profile", "hermes"), llm_provider)
|
||||
|
|
@ -1132,7 +1132,7 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
self._mode = "disabled"
|
||||
return
|
||||
self._api_key = self._config.get("apiKey") or self._config.get("api_key") or os.environ.get("HINDSIGHT_API_KEY", "")
|
||||
default_url = _DEFAULT_LOCAL_URL if self._mode in ("local_embedded", "local_external") else _DEFAULT_API_URL
|
||||
default_url = _DEFAULT_LOCAL_URL if self._mode in {"local_embedded", "local_external"} else _DEFAULT_API_URL
|
||||
self._api_url = self._config.get("api_url") or os.environ.get("HINDSIGHT_API_URL", default_url)
|
||||
self._llm_base_url = self._config.get("llm_base_url", "")
|
||||
|
||||
|
|
@ -1152,10 +1152,10 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
self._budget = budget if budget in _VALID_BUDGETS else "mid"
|
||||
|
||||
memory_mode = self._config.get("memory_mode", "hybrid")
|
||||
self._memory_mode = memory_mode if memory_mode in ("context", "tools", "hybrid") else "hybrid"
|
||||
self._memory_mode = memory_mode if memory_mode in {"context", "tools", "hybrid"} else "hybrid"
|
||||
|
||||
prefetch_method = self._config.get("recall_prefetch_method") or self._config.get("prefetch_method", "recall")
|
||||
self._prefetch_method = prefetch_method if prefetch_method in ("recall", "reflect") else "recall"
|
||||
self._prefetch_method = prefetch_method if prefetch_method in {"recall", "reflect"} else "recall"
|
||||
|
||||
# Bank options
|
||||
self._bank_mission = self._config.get("bank_mission", "")
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue