mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Merge branch 'main' of github.com:NousResearch/hermes-agent into feat/ink-refactor
# Conflicts: # gateway/platforms/base.py # gateway/run.py # tests/gateway/test_command_bypass_active_session.py
This commit is contained in:
commit
b04248f4d5
319 changed files with 25283 additions and 7048 deletions
|
|
@ -6,14 +6,15 @@ ENV PYTHONUNBUFFERED=1
|
|||
# Install system dependencies in one layer, clear APT cache
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential nodejs npm python3 python3-pip ripgrep ffmpeg gcc python3-dev libffi-dev && \
|
||||
build-essential nodejs npm python3 python3-pip ripgrep ffmpeg gcc python3-dev libffi-dev procps && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY . /opt/hermes
|
||||
WORKDIR /opt/hermes
|
||||
|
||||
# Install Python and Node dependencies in one layer, no cache
|
||||
RUN pip install --no-cache-dir -e ".[all]" --break-system-packages && \
|
||||
RUN pip install --no-cache-dir uv --break-system-packages && \
|
||||
uv pip install --system --break-system-packages --no-cache -e ".[all]" && \
|
||||
npm install --prefer-offline --no-audit && \
|
||||
npx playwright install --with-deps chromium --only-shell && \
|
||||
cd /opt/hermes/scripts/whatsapp-bridge && \
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ from acp.schema import (
|
|||
SessionCapabilities,
|
||||
SessionForkCapabilities,
|
||||
SessionListCapabilities,
|
||||
SessionResumeCapabilities,
|
||||
SessionInfo,
|
||||
TextContentBlock,
|
||||
UnstructuredCommandInput,
|
||||
|
|
@ -245,9 +246,11 @@ class HermesACPAgent(acp.Agent):
|
|||
protocol_version=acp.PROTOCOL_VERSION,
|
||||
agent_info=Implementation(name="hermes-agent", version=HERMES_VERSION),
|
||||
agent_capabilities=AgentCapabilities(
|
||||
load_session=True,
|
||||
session_capabilities=SessionCapabilities(
|
||||
fork=SessionForkCapabilities(),
|
||||
list=SessionListCapabilities(),
|
||||
resume=SessionResumeCapabilities(),
|
||||
),
|
||||
),
|
||||
auth_methods=auth_methods,
|
||||
|
|
@ -451,14 +454,13 @@ class HermesACPAgent(acp.Agent):
|
|||
await conn.session_update(session_id, update)
|
||||
|
||||
usage = None
|
||||
usage_data = result.get("usage")
|
||||
if usage_data and isinstance(usage_data, dict):
|
||||
if any(result.get(key) is not None for key in ("prompt_tokens", "completion_tokens", "total_tokens")):
|
||||
usage = Usage(
|
||||
input_tokens=usage_data.get("prompt_tokens", 0),
|
||||
output_tokens=usage_data.get("completion_tokens", 0),
|
||||
total_tokens=usage_data.get("total_tokens", 0),
|
||||
thought_tokens=usage_data.get("reasoning_tokens"),
|
||||
cached_read_tokens=usage_data.get("cached_tokens"),
|
||||
input_tokens=result.get("prompt_tokens", 0),
|
||||
output_tokens=result.get("completion_tokens", 0),
|
||||
total_tokens=result.get("total_tokens", 0),
|
||||
thought_tokens=result.get("reasoning_tokens"),
|
||||
cached_read_tokens=result.get("cache_read_tokens"),
|
||||
)
|
||||
|
||||
stop_reason = "cancelled" if state.cancel_event and state.cancel_event.is_set() else "end_turn"
|
||||
|
|
|
|||
|
|
@ -60,6 +60,8 @@ _ANTHROPIC_OUTPUT_LIMITS = {
|
|||
"claude-3-opus": 4_096,
|
||||
"claude-3-sonnet": 4_096,
|
||||
"claude-3-haiku": 4_096,
|
||||
# Third-party Anthropic-compatible providers
|
||||
"minimax": 131_072,
|
||||
}
|
||||
|
||||
# For any model not in the table, assume the highest current limit.
|
||||
|
|
@ -74,8 +76,11 @@ def _get_anthropic_max_output(model: str) -> int:
|
|||
model IDs (claude-sonnet-4-5-20250929) and variant suffixes (:1m, :fast)
|
||||
resolve correctly. Longest-prefix match wins to avoid e.g. "claude-3-5"
|
||||
matching before "claude-3-5-sonnet".
|
||||
|
||||
Normalizes dots to hyphens so that model names like
|
||||
``anthropic/claude-opus-4.6`` match the ``claude-opus-4-6`` table key.
|
||||
"""
|
||||
m = model.lower()
|
||||
m = model.lower().replace(".", "-")
|
||||
best_key = ""
|
||||
best_val = _ANTHROPIC_DEFAULT_OUTPUT_LIMIT
|
||||
for key, val in _ANTHROPIC_OUTPUT_LIMITS.items():
|
||||
|
|
@ -95,6 +100,15 @@ _COMMON_BETAS = [
|
|||
"interleaved-thinking-2025-05-14",
|
||||
"fine-grained-tool-streaming-2025-05-14",
|
||||
]
|
||||
# MiniMax's Anthropic-compatible endpoints fail tool-use requests when
|
||||
# the fine-grained tool streaming beta is present. Omit it so tool calls
|
||||
# fall back to the provider's default response path.
|
||||
_TOOL_STREAMING_BETA = "fine-grained-tool-streaming-2025-05-14"
|
||||
|
||||
# Fast mode beta — enables the ``speed: "fast"`` request parameter for
|
||||
# significantly higher output token throughput on Opus 4.6 (~2.5x).
|
||||
# See https://platform.claude.com/docs/en/build-with-claude/fast-mode
|
||||
_FAST_MODE_BETA = "fast-mode-2026-02-01"
|
||||
|
||||
# Additional beta headers required for OAuth/subscription auth.
|
||||
# Matches what Claude Code (and pi-ai / OpenCode) send.
|
||||
|
|
@ -149,18 +163,27 @@ def _get_claude_code_version() -> str:
|
|||
|
||||
|
||||
def _is_oauth_token(key: str) -> bool:
|
||||
"""Check if the key is an OAuth/setup token (not a regular Console API key).
|
||||
"""Check if the key is an Anthropic OAuth/setup token.
|
||||
|
||||
Regular API keys start with 'sk-ant-api'. Everything else (setup-tokens
|
||||
starting with 'sk-ant-oat', managed keys, JWTs, etc.) needs Bearer auth.
|
||||
Positively identifies Anthropic OAuth tokens by their key format:
|
||||
- ``sk-ant-`` prefix (but NOT ``sk-ant-api``) → setup tokens, managed keys
|
||||
- ``eyJ`` prefix → JWTs from the Anthropic OAuth flow
|
||||
|
||||
Non-Anthropic keys (MiniMax, Alibaba, etc.) don't match either pattern
|
||||
and correctly return False.
|
||||
"""
|
||||
if not key:
|
||||
return False
|
||||
# Regular Console API keys use x-api-key header
|
||||
# Regular Anthropic Console API keys — x-api-key auth, never OAuth
|
||||
if key.startswith("sk-ant-api"):
|
||||
return False
|
||||
# Everything else (setup-tokens, managed keys, JWTs) uses Bearer auth
|
||||
return True
|
||||
# Anthropic-issued tokens (setup-tokens sk-ant-oat-*, managed keys)
|
||||
if key.startswith("sk-ant-"):
|
||||
return True
|
||||
# JWTs from Anthropic OAuth flow
|
||||
if key.startswith("eyJ"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _normalize_base_url_text(base_url) -> str:
|
||||
|
|
@ -204,6 +227,19 @@ def _requires_bearer_auth(base_url: str | None) -> bool:
|
|||
return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic"))
|
||||
|
||||
|
||||
def _common_betas_for_base_url(base_url: str | None) -> list[str]:
|
||||
"""Return the beta headers that are safe for the configured endpoint.
|
||||
|
||||
MiniMax's Anthropic-compatible endpoints (Bearer-auth) reject requests
|
||||
that include Anthropic's ``fine-grained-tool-streaming`` beta — every
|
||||
tool-use message triggers a connection error. Strip that beta for
|
||||
Bearer-auth endpoints while keeping all other betas intact.
|
||||
"""
|
||||
if _requires_bearer_auth(base_url):
|
||||
return [b for b in _COMMON_BETAS if b != _TOOL_STREAMING_BETA]
|
||||
return _COMMON_BETAS
|
||||
|
||||
|
||||
def build_anthropic_client(api_key: str, base_url: str = None):
|
||||
"""Create an Anthropic client, auto-detecting setup-tokens vs API keys.
|
||||
|
||||
|
|
@ -222,6 +258,7 @@ def build_anthropic_client(api_key: str, base_url: str = None):
|
|||
}
|
||||
if normalized_base_url:
|
||||
kwargs["base_url"] = normalized_base_url
|
||||
common_betas = _common_betas_for_base_url(normalized_base_url)
|
||||
|
||||
if _requires_bearer_auth(normalized_base_url):
|
||||
# Some Anthropic-compatible providers (e.g. MiniMax) expect the API key in
|
||||
|
|
@ -231,21 +268,21 @@ def build_anthropic_client(api_key: str, base_url: str = None):
|
|||
# not use Anthropic's sk-ant-api prefix and would otherwise be misread as
|
||||
# Anthropic OAuth/setup tokens.
|
||||
kwargs["auth_token"] = api_key
|
||||
if _COMMON_BETAS:
|
||||
kwargs["default_headers"] = {"anthropic-beta": ",".join(_COMMON_BETAS)}
|
||||
if common_betas:
|
||||
kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)}
|
||||
elif _is_third_party_anthropic_endpoint(base_url):
|
||||
# Third-party proxies (Azure AI Foundry, AWS Bedrock, etc.) use their
|
||||
# own API keys with x-api-key auth. Skip OAuth detection — their keys
|
||||
# don't follow Anthropic's sk-ant-* prefix convention and would be
|
||||
# misclassified as OAuth tokens.
|
||||
kwargs["api_key"] = api_key
|
||||
if _COMMON_BETAS:
|
||||
kwargs["default_headers"] = {"anthropic-beta": ",".join(_COMMON_BETAS)}
|
||||
if common_betas:
|
||||
kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)}
|
||||
elif _is_oauth_token(api_key):
|
||||
# OAuth access token / setup-token → Bearer auth + Claude Code identity.
|
||||
# Anthropic routes OAuth requests based on user-agent and headers;
|
||||
# without Claude Code's fingerprint, requests get intermittent 500s.
|
||||
all_betas = _COMMON_BETAS + _OAUTH_ONLY_BETAS
|
||||
all_betas = common_betas + _OAUTH_ONLY_BETAS
|
||||
kwargs["auth_token"] = api_key
|
||||
kwargs["default_headers"] = {
|
||||
"anthropic-beta": ",".join(all_betas),
|
||||
|
|
@ -255,8 +292,8 @@ def build_anthropic_client(api_key: str, base_url: str = None):
|
|||
else:
|
||||
# Regular API key → x-api-key header + common betas
|
||||
kwargs["api_key"] = api_key
|
||||
if _COMMON_BETAS:
|
||||
kwargs["default_headers"] = {"anthropic-beta": ",".join(_COMMON_BETAS)}
|
||||
if common_betas:
|
||||
kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)}
|
||||
|
||||
return _anthropic_sdk.Anthropic(**kwargs)
|
||||
|
||||
|
|
@ -485,35 +522,6 @@ def _prefer_refreshable_claude_code_token(env_token: str, creds: Optional[Dict[s
|
|||
return None
|
||||
|
||||
|
||||
def get_anthropic_token_source(token: Optional[str] = None) -> str:
|
||||
"""Best-effort source classification for an Anthropic credential token."""
|
||||
token = (token or "").strip()
|
||||
if not token:
|
||||
return "none"
|
||||
|
||||
env_token = os.getenv("ANTHROPIC_TOKEN", "").strip()
|
||||
if env_token and env_token == token:
|
||||
return "anthropic_token_env"
|
||||
|
||||
cc_env_token = os.getenv("CLAUDE_CODE_OAUTH_TOKEN", "").strip()
|
||||
if cc_env_token and cc_env_token == token:
|
||||
return "claude_code_oauth_token_env"
|
||||
|
||||
creds = read_claude_code_credentials()
|
||||
if creds and creds.get("accessToken") == token:
|
||||
return str(creds.get("source") or "claude_code_credentials")
|
||||
|
||||
managed_key = read_claude_managed_key()
|
||||
if managed_key and managed_key == token:
|
||||
return "claude_json_primary_api_key"
|
||||
|
||||
api_key = os.getenv("ANTHROPIC_API_KEY", "").strip()
|
||||
if api_key and api_key == token:
|
||||
return "anthropic_api_key_env"
|
||||
|
||||
return "unknown"
|
||||
|
||||
|
||||
def resolve_anthropic_token() -> Optional[str]:
|
||||
"""Resolve an Anthropic token from all available sources.
|
||||
|
||||
|
|
@ -720,21 +728,6 @@ def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
|
|||
}
|
||||
|
||||
|
||||
def _save_hermes_oauth_credentials(access_token: str, refresh_token: str, expires_at_ms: int) -> None:
|
||||
"""Save OAuth credentials to ~/.hermes/.anthropic_oauth.json."""
|
||||
data = {
|
||||
"accessToken": access_token,
|
||||
"refreshToken": refresh_token,
|
||||
"expiresAt": expires_at_ms,
|
||||
}
|
||||
try:
|
||||
_HERMES_OAUTH_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
_HERMES_OAUTH_FILE.write_text(json.dumps(data, indent=2), encoding="utf-8")
|
||||
_HERMES_OAUTH_FILE.chmod(0o600)
|
||||
except (OSError, IOError) as e:
|
||||
logger.debug("Failed to save Hermes OAuth credentials: %s", e)
|
||||
|
||||
|
||||
def read_hermes_oauth_credentials() -> Optional[Dict[str, Any]]:
|
||||
"""Read Hermes-managed OAuth credentials from ~/.hermes/.anthropic_oauth.json."""
|
||||
if _HERMES_OAUTH_FILE.exists():
|
||||
|
|
@ -783,39 +776,6 @@ def _sanitize_tool_id(tool_id: str) -> str:
|
|||
return sanitized or "tool_0"
|
||||
|
||||
|
||||
def _convert_openai_image_part_to_anthropic(part: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
"""Convert an OpenAI-style image block to Anthropic's image source format."""
|
||||
image_data = part.get("image_url", {})
|
||||
url = image_data.get("url", "") if isinstance(image_data, dict) else str(image_data)
|
||||
if not isinstance(url, str) or not url.strip():
|
||||
return None
|
||||
url = url.strip()
|
||||
|
||||
if url.startswith("data:"):
|
||||
header, sep, data = url.partition(",")
|
||||
if sep and ";base64" in header:
|
||||
media_type = header[5:].split(";", 1)[0] or "image/png"
|
||||
return {
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": media_type,
|
||||
"data": data,
|
||||
},
|
||||
}
|
||||
|
||||
if url.startswith(("http://", "https://")):
|
||||
return {
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "url",
|
||||
"url": url,
|
||||
},
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
|
||||
"""Convert OpenAI tool definitions to Anthropic format."""
|
||||
if not tools:
|
||||
|
|
@ -1235,6 +1195,7 @@ def build_anthropic_kwargs(
|
|||
preserve_dots: bool = False,
|
||||
context_length: Optional[int] = None,
|
||||
base_url: str | None = None,
|
||||
fast_mode: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build kwargs for anthropic.messages.create().
|
||||
|
||||
|
|
@ -1268,6 +1229,10 @@ def build_anthropic_kwargs(
|
|||
|
||||
When *base_url* points to a third-party Anthropic-compatible endpoint,
|
||||
thinking block signatures are stripped (they are Anthropic-proprietary).
|
||||
|
||||
When *fast_mode* is True, adds ``speed: "fast"`` and the fast-mode beta
|
||||
header for ~2.5x faster output throughput on Opus 4.6. Currently only
|
||||
supported on native Anthropic endpoints (not third-party compatible ones).
|
||||
"""
|
||||
system, anthropic_messages = convert_messages_to_anthropic(messages, base_url=base_url)
|
||||
anthropic_tools = convert_tools_to_anthropic(tools) if tools else []
|
||||
|
|
@ -1350,9 +1315,10 @@ def build_anthropic_kwargs(
|
|||
# Map reasoning_config to Anthropic's thinking parameter.
|
||||
# Claude 4.6 models use adaptive thinking + output_config.effort.
|
||||
# Older models use manual thinking with budget_tokens.
|
||||
# Haiku and MiniMax models do NOT support extended thinking — skip entirely.
|
||||
# MiniMax Anthropic-compat endpoints support thinking (manual mode only,
|
||||
# not adaptive). Haiku does NOT support extended thinking — skip entirely.
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
if reasoning_config.get("enabled") is not False and "haiku" not in model.lower() and "minimax" not in model.lower():
|
||||
if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
|
||||
effort = str(reasoning_config.get("effort", "medium")).lower()
|
||||
budget = THINKING_BUDGET.get(effort, 8000)
|
||||
if _supports_adaptive_thinking(model):
|
||||
|
|
@ -1366,6 +1332,20 @@ def build_anthropic_kwargs(
|
|||
kwargs["temperature"] = 1
|
||||
kwargs["max_tokens"] = max(effective_max_tokens, budget + 4096)
|
||||
|
||||
# ── Fast mode (Opus 4.6 only) ────────────────────────────────────
|
||||
# Adds speed:"fast" + the fast-mode beta header for ~2.5x output speed.
|
||||
# Only for native Anthropic endpoints — third-party providers would
|
||||
# reject the unknown beta header and speed parameter.
|
||||
if fast_mode and not _is_third_party_anthropic_endpoint(base_url):
|
||||
kwargs["speed"] = "fast"
|
||||
# Build extra_headers with ALL applicable betas (the per-request
|
||||
# extra_headers override the client-level anthropic-beta header).
|
||||
betas = list(_common_betas_for_base_url(base_url))
|
||||
if is_oauth:
|
||||
betas.extend(_OAUTH_ONLY_BETAS)
|
||||
betas.append(_FAST_MODE_BETA)
|
||||
kwargs["extra_headers"] = {"anthropic-beta": ",".join(betas)}
|
||||
|
||||
return kwargs
|
||||
|
||||
|
||||
|
|
@ -1427,4 +1407,4 @@ def normalize_anthropic_response(
|
|||
reasoning_details=reasoning_details or None,
|
||||
),
|
||||
finish_reason,
|
||||
)
|
||||
)
|
||||
|
|
|
|||
|
|
@ -59,6 +59,9 @@ from hermes_constants import OPENROUTER_BASE_URL
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Module-level flag: only warn once per process about stale OPENAI_BASE_URL.
|
||||
_stale_base_url_warned = False
|
||||
|
||||
_PROVIDER_ALIASES = {
|
||||
"google": "gemini",
|
||||
"google-gemini": "gemini",
|
||||
|
|
@ -687,6 +690,15 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
|||
if pconfig.auth_type != "api_key":
|
||||
continue
|
||||
if provider_id == "anthropic":
|
||||
# Only try anthropic when the user has explicitly configured it.
|
||||
# Without this gate, Claude Code credentials get silently used
|
||||
# as auxiliary fallback when the user's primary provider fails.
|
||||
try:
|
||||
from hermes_cli.auth import is_provider_explicitly_configured
|
||||
if not is_provider_explicitly_configured("anthropic"):
|
||||
continue
|
||||
except ImportError:
|
||||
pass
|
||||
return _try_anthropic()
|
||||
|
||||
pool_present, entry = _select_pool_entry(provider_id)
|
||||
|
|
@ -698,11 +710,13 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
|||
base_url = _to_openai_base_url(
|
||||
_pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
|
||||
)
|
||||
model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id, "default")
|
||||
model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
|
||||
if model is None:
|
||||
continue # skip provider if we don't know a valid aux model
|
||||
logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model)
|
||||
extra = {}
|
||||
if "api.kimi.com" in base_url.lower():
|
||||
extra["default_headers"] = {"User-Agent": "KimiCLI/1.3"}
|
||||
extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
|
||||
elif "api.githubcopilot.com" in base_url.lower():
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
|
||||
|
|
@ -717,11 +731,13 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
|||
base_url = _to_openai_base_url(
|
||||
str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
|
||||
)
|
||||
model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id, "default")
|
||||
model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
|
||||
if model is None:
|
||||
continue # skip provider if we don't know a valid aux model
|
||||
logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model)
|
||||
extra = {}
|
||||
if "api.kimi.com" in base_url.lower():
|
||||
extra["default_headers"] = {"User-Agent": "KimiCLI/1.3"}
|
||||
extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
|
||||
elif "api.githubcopilot.com" in base_url.lower():
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
|
||||
|
|
@ -848,7 +864,7 @@ def _read_main_provider() -> str:
|
|||
return ""
|
||||
|
||||
|
||||
def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str]]:
|
||||
def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str], Optional[str]]:
|
||||
"""Resolve the active custom/main endpoint the same way the main CLI does.
|
||||
|
||||
This covers both env-driven OPENAI_BASE_URL setups and config-saved custom
|
||||
|
|
@ -861,18 +877,29 @@ def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str]]:
|
|||
runtime = resolve_runtime_provider(requested="custom")
|
||||
except Exception as exc:
|
||||
logger.debug("Auxiliary client: custom runtime resolution failed: %s", exc)
|
||||
return None, None
|
||||
runtime = None
|
||||
|
||||
if not isinstance(runtime, dict):
|
||||
openai_base = os.getenv("OPENAI_BASE_URL", "").strip().rstrip("/")
|
||||
openai_key = os.getenv("OPENAI_API_KEY", "").strip()
|
||||
if not openai_base:
|
||||
return None, None, None
|
||||
runtime = {
|
||||
"base_url": openai_base,
|
||||
"api_key": openai_key,
|
||||
}
|
||||
|
||||
custom_base = runtime.get("base_url")
|
||||
custom_key = runtime.get("api_key")
|
||||
custom_mode = runtime.get("api_mode")
|
||||
if not isinstance(custom_base, str) or not custom_base.strip():
|
||||
return None, None
|
||||
return None, None, None
|
||||
|
||||
custom_base = custom_base.strip().rstrip("/")
|
||||
if "openrouter.ai" in custom_base.lower():
|
||||
# requested='custom' falls back to OpenRouter when no custom endpoint is
|
||||
# configured. Treat that as "no custom endpoint" for auxiliary routing.
|
||||
return None, None
|
||||
return None, None, None
|
||||
|
||||
# Local servers (Ollama, llama.cpp, vLLM, LM Studio) don't require auth.
|
||||
# Use a placeholder key — the OpenAI SDK requires a non-empty string but
|
||||
|
|
@ -881,20 +908,33 @@ def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str]]:
|
|||
if not isinstance(custom_key, str) or not custom_key.strip():
|
||||
custom_key = "no-key-required"
|
||||
|
||||
return custom_base, custom_key.strip()
|
||||
if not isinstance(custom_mode, str) or not custom_mode.strip():
|
||||
custom_mode = None
|
||||
|
||||
return custom_base, custom_key.strip(), custom_mode
|
||||
|
||||
|
||||
def _current_custom_base_url() -> str:
|
||||
custom_base, _ = _resolve_custom_runtime()
|
||||
custom_base, _, _ = _resolve_custom_runtime()
|
||||
return custom_base or ""
|
||||
|
||||
|
||||
def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
custom_base, custom_key = _resolve_custom_runtime()
|
||||
runtime = _resolve_custom_runtime()
|
||||
if len(runtime) == 2:
|
||||
custom_base, custom_key = runtime
|
||||
custom_mode = None
|
||||
else:
|
||||
custom_base, custom_key, custom_mode = runtime
|
||||
if not custom_base or not custom_key:
|
||||
return None, None
|
||||
if custom_base.lower().startswith(_CODEX_AUX_BASE_URL.lower()):
|
||||
return None, None
|
||||
model = _read_main_model() or "gpt-4o-mini"
|
||||
logger.debug("Auxiliary client: custom endpoint (%s)", model)
|
||||
logger.debug("Auxiliary client: custom endpoint (%s, api_mode=%s)", model, custom_mode or "chat_completions")
|
||||
if custom_mode == "codex_responses":
|
||||
real_client = OpenAI(api_key=custom_key, base_url=custom_base)
|
||||
return CodexAuxiliaryClient(real_client, model), model
|
||||
return OpenAI(api_key=custom_key, base_url=custom_base), model
|
||||
|
||||
|
||||
|
|
@ -967,40 +1007,6 @@ def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
|
|||
return AnthropicAuxiliaryClient(real_client, model, token, base_url, is_oauth=is_oauth), model
|
||||
|
||||
|
||||
def _resolve_forced_provider(forced: str) -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
"""Resolve a specific forced provider. Returns (None, None) if creds missing."""
|
||||
if forced == "openrouter":
|
||||
client, model = _try_openrouter()
|
||||
if client is None:
|
||||
logger.warning("auxiliary.provider=openrouter but OPENROUTER_API_KEY not set")
|
||||
return client, model
|
||||
|
||||
if forced == "nous":
|
||||
client, model = _try_nous()
|
||||
if client is None:
|
||||
logger.warning("auxiliary.provider=nous but Nous Portal not configured (run: hermes auth)")
|
||||
return client, model
|
||||
|
||||
if forced == "codex":
|
||||
client, model = _try_codex()
|
||||
if client is None:
|
||||
logger.warning("auxiliary.provider=codex but no Codex OAuth token found (run: hermes model)")
|
||||
return client, model
|
||||
|
||||
if forced == "main":
|
||||
# "main" = skip OpenRouter/Nous, use the main chat model's credentials.
|
||||
for try_fn in (_try_custom_endpoint, _try_codex, _resolve_api_key_provider):
|
||||
client, model = try_fn()
|
||||
if client is not None:
|
||||
return client, model
|
||||
logger.warning("auxiliary.provider=main but no main endpoint credentials found")
|
||||
return None, None
|
||||
|
||||
# Unknown provider name — fall through to auto
|
||||
logger.warning("Unknown auxiliary.provider=%r, falling back to auto", forced)
|
||||
return None, None
|
||||
|
||||
|
||||
_AUTO_PROVIDER_LABELS = {
|
||||
"_try_openrouter": "openrouter",
|
||||
"_try_nous": "nous",
|
||||
|
|
@ -1076,11 +1082,12 @@ def _is_connection_error(exc: Exception) -> bool:
|
|||
def _try_payment_fallback(
|
||||
failed_provider: str,
|
||||
task: str = None,
|
||||
reason: str = "payment error",
|
||||
) -> Tuple[Optional[Any], Optional[str], str]:
|
||||
"""Try alternative providers after a payment/credit error.
|
||||
"""Try alternative providers after a payment/credit or connection error.
|
||||
|
||||
Iterates the standard auto-detection chain, skipping the provider that
|
||||
returned a payment error.
|
||||
failed.
|
||||
|
||||
Returns:
|
||||
(client, model, provider_label) or (None, None, "") if no fallback.
|
||||
|
|
@ -1106,15 +1113,15 @@ def _try_payment_fallback(
|
|||
client, model = try_fn()
|
||||
if client is not None:
|
||||
logger.info(
|
||||
"Auxiliary %s: payment error on %s — falling back to %s (%s)",
|
||||
task or "call", failed_provider, label, model or "default",
|
||||
"Auxiliary %s: %s on %s — falling back to %s (%s)",
|
||||
task or "call", reason, failed_provider, label, model or "default",
|
||||
)
|
||||
return client, model, label
|
||||
tried.append(label)
|
||||
|
||||
logger.warning(
|
||||
"Auxiliary %s: payment error on %s and no fallback available (tried: %s)",
|
||||
task or "call", failed_provider, ", ".join(tried),
|
||||
"Auxiliary %s: %s on %s and no fallback available (tried: %s)",
|
||||
task or "call", reason, failed_provider, ", ".join(tried),
|
||||
)
|
||||
return None, None, ""
|
||||
|
||||
|
|
@ -1129,9 +1136,28 @@ def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
|
|||
provider they already have credentials for — no OpenRouter key needed.
|
||||
2. OpenRouter → Nous → custom → Codex → API-key providers (original chain).
|
||||
"""
|
||||
global auxiliary_is_nous
|
||||
global auxiliary_is_nous, _stale_base_url_warned
|
||||
auxiliary_is_nous = False # Reset — _try_nous() will set True if it wins
|
||||
|
||||
# ── Warn once if OPENAI_BASE_URL is set but config.yaml uses a named
|
||||
# provider (not 'custom'). This catches the common "env poisoning"
|
||||
# scenario where a user switches providers via `hermes model` but the
|
||||
# old OPENAI_BASE_URL lingers in ~/.hermes/.env. ──
|
||||
if not _stale_base_url_warned:
|
||||
_env_base = os.getenv("OPENAI_BASE_URL", "").strip()
|
||||
_cfg_provider = _read_main_provider()
|
||||
if (_env_base and _cfg_provider
|
||||
and _cfg_provider != "custom"
|
||||
and not _cfg_provider.startswith("custom:")):
|
||||
logger.warning(
|
||||
"OPENAI_BASE_URL is set (%s) but model.provider is '%s'. "
|
||||
"Auxiliary clients may route to the wrong endpoint. "
|
||||
"Run: hermes model to reconfigure, or remove "
|
||||
"OPENAI_BASE_URL from ~/.hermes/.env",
|
||||
_env_base, _cfg_provider,
|
||||
)
|
||||
_stale_base_url_warned = True
|
||||
|
||||
# ── Step 1: non-aggregator main provider → use main model directly ──
|
||||
main_provider = _read_main_provider()
|
||||
main_model = _read_main_model()
|
||||
|
|
@ -1195,10 +1221,22 @@ def _to_async_client(sync_client, model: str):
|
|||
|
||||
async_kwargs["default_headers"] = copilot_default_headers()
|
||||
elif "api.kimi.com" in base_lower:
|
||||
async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.3"}
|
||||
async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
|
||||
return AsyncOpenAI(**async_kwargs), model
|
||||
|
||||
|
||||
def _normalize_resolved_model(model_name: Optional[str], provider: str) -> Optional[str]:
|
||||
"""Normalize a resolved model for the provider that will receive it."""
|
||||
if not model_name:
|
||||
return model_name
|
||||
try:
|
||||
from hermes_cli.model_normalize import normalize_model_for_provider
|
||||
|
||||
return normalize_model_for_provider(model_name, provider)
|
||||
except Exception:
|
||||
return model_name
|
||||
|
||||
|
||||
def resolve_provider_client(
|
||||
provider: str,
|
||||
model: str = None,
|
||||
|
|
@ -1206,6 +1244,7 @@ def resolve_provider_client(
|
|||
raw_codex: bool = False,
|
||||
explicit_base_url: str = None,
|
||||
explicit_api_key: str = None,
|
||||
api_mode: str = None,
|
||||
) -> Tuple[Optional[Any], Optional[str]]:
|
||||
"""Central router: given a provider name and optional model, return a
|
||||
configured client with the correct auth, base URL, and API format.
|
||||
|
|
@ -1229,6 +1268,10 @@ def resolve_provider_client(
|
|||
the main agent loop).
|
||||
explicit_base_url: Optional direct OpenAI-compatible endpoint.
|
||||
explicit_api_key: Optional API key paired with explicit_base_url.
|
||||
api_mode: API mode override. One of "chat_completions",
|
||||
"codex_responses", or None (auto-detect). When set to
|
||||
"codex_responses", the client is wrapped in
|
||||
CodexAuxiliaryClient to route through the Responses API.
|
||||
|
||||
Returns:
|
||||
(client, resolved_model) or (None, None) if auth is unavailable.
|
||||
|
|
@ -1236,6 +1279,40 @@ def resolve_provider_client(
|
|||
# Normalise aliases
|
||||
provider = _normalize_aux_provider(provider)
|
||||
|
||||
def _needs_codex_wrap(client_obj, base_url_str: str, model_str: str) -> bool:
|
||||
"""Decide if a plain OpenAI client should be wrapped for Responses API.
|
||||
|
||||
Returns True when api_mode is explicitly "codex_responses", or when
|
||||
auto-detection (api.openai.com + codex-family model) suggests it.
|
||||
Already-wrapped clients (CodexAuxiliaryClient) are skipped.
|
||||
"""
|
||||
if isinstance(client_obj, CodexAuxiliaryClient):
|
||||
return False
|
||||
if raw_codex:
|
||||
return False
|
||||
if api_mode == "codex_responses":
|
||||
return True
|
||||
# Auto-detect: api.openai.com + codex model name pattern
|
||||
if api_mode and api_mode != "codex_responses":
|
||||
return False # explicit non-codex mode
|
||||
normalized_base = (base_url_str or "").strip().lower()
|
||||
if "api.openai.com" in normalized_base and "openrouter" not in normalized_base:
|
||||
model_lower = (model_str or "").lower()
|
||||
if "codex" in model_lower:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = ""):
|
||||
"""Wrap a plain OpenAI client in CodexAuxiliaryClient if Responses API is needed."""
|
||||
if _needs_codex_wrap(client_obj, base_url_str, final_model_str):
|
||||
logger.debug(
|
||||
"resolve_provider_client: wrapping client in CodexAuxiliaryClient "
|
||||
"(api_mode=%s, model=%s, base_url=%s)",
|
||||
api_mode or "auto-detected", final_model_str,
|
||||
base_url_str[:60] if base_url_str else "")
|
||||
return CodexAuxiliaryClient(client_obj, final_model_str)
|
||||
return client_obj
|
||||
|
||||
# ── Auto: try all providers in priority order ────────────────────
|
||||
if provider == "auto":
|
||||
client, resolved = _resolve_auto()
|
||||
|
|
@ -1261,7 +1338,7 @@ def resolve_provider_client(
|
|||
logger.warning("resolve_provider_client: openrouter requested "
|
||||
"but OPENROUTER_API_KEY not set")
|
||||
return None, None
|
||||
final_model = model or default
|
||||
final_model = _normalize_resolved_model(model or default, provider)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
|
|
@ -1272,7 +1349,7 @@ def resolve_provider_client(
|
|||
logger.warning("resolve_provider_client: nous requested "
|
||||
"but Nous Portal not configured (run: hermes auth)")
|
||||
return None, None
|
||||
final_model = model or default
|
||||
final_model = _normalize_resolved_model(model or default, provider)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
|
|
@ -1286,7 +1363,7 @@ def resolve_provider_client(
|
|||
logger.warning("resolve_provider_client: openai-codex requested "
|
||||
"but no Codex OAuth token found (run: hermes model)")
|
||||
return None, None
|
||||
final_model = model or _CODEX_AUX_MODEL
|
||||
final_model = _normalize_resolved_model(model or _CODEX_AUX_MODEL, provider)
|
||||
raw_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL)
|
||||
return (raw_client, final_model)
|
||||
# Standard path: wrap in CodexAuxiliaryClient adapter
|
||||
|
|
@ -1295,7 +1372,7 @@ def resolve_provider_client(
|
|||
logger.warning("resolve_provider_client: openai-codex requested "
|
||||
"but no Codex OAuth token found (run: hermes model)")
|
||||
return None, None
|
||||
final_model = model or default
|
||||
final_model = _normalize_resolved_model(model or default, provider)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
|
|
@ -1314,14 +1391,18 @@ def resolve_provider_client(
|
|||
"but base_url is empty"
|
||||
)
|
||||
return None, None
|
||||
final_model = model or _read_main_model() or "gpt-4o-mini"
|
||||
final_model = _normalize_resolved_model(
|
||||
model or _read_main_model() or "gpt-4o-mini",
|
||||
provider,
|
||||
)
|
||||
extra = {}
|
||||
if "api.kimi.com" in custom_base.lower():
|
||||
extra["default_headers"] = {"User-Agent": "KimiCLI/1.3"}
|
||||
extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
|
||||
elif "api.githubcopilot.com" in custom_base.lower():
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
extra["default_headers"] = copilot_default_headers()
|
||||
client = OpenAI(api_key=custom_key, base_url=custom_base, **extra)
|
||||
client = _wrap_if_needed(client, final_model, custom_base)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
# Try custom first, then codex, then API-key providers
|
||||
|
|
@ -1329,7 +1410,9 @@ def resolve_provider_client(
|
|||
_resolve_api_key_provider):
|
||||
client, default = try_fn()
|
||||
if client is not None:
|
||||
final_model = model or default
|
||||
final_model = _normalize_resolved_model(model or default, provider)
|
||||
_cbase = str(getattr(client, "base_url", "") or "")
|
||||
client = _wrap_if_needed(client, final_model, _cbase)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
logger.warning("resolve_provider_client: custom/main requested "
|
||||
|
|
@ -1344,8 +1427,12 @@ def resolve_provider_client(
|
|||
custom_base = custom_entry.get("base_url", "").strip()
|
||||
custom_key = custom_entry.get("api_key", "").strip() or "no-key-required"
|
||||
if custom_base:
|
||||
final_model = model or _read_main_model() or "gpt-4o-mini"
|
||||
final_model = _normalize_resolved_model(
|
||||
model or _read_main_model() or "gpt-4o-mini",
|
||||
provider,
|
||||
)
|
||||
client = OpenAI(api_key=custom_key, base_url=custom_base)
|
||||
client = _wrap_if_needed(client, final_model, custom_base)
|
||||
logger.debug(
|
||||
"resolve_provider_client: named custom provider %r (%s)",
|
||||
provider, final_model)
|
||||
|
|
@ -1376,7 +1463,7 @@ def resolve_provider_client(
|
|||
if client is None:
|
||||
logger.warning("resolve_provider_client: anthropic requested but no Anthropic credentials found")
|
||||
return None, None
|
||||
final_model = model or default_model
|
||||
final_model = _normalize_resolved_model(model or default_model, provider)
|
||||
return (_to_async_client(client, final_model) if async_mode else (client, final_model))
|
||||
|
||||
creds = resolve_api_key_provider_credentials(provider)
|
||||
|
|
@ -1395,12 +1482,12 @@ def resolve_provider_client(
|
|||
)
|
||||
|
||||
default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
|
||||
final_model = model or default_model
|
||||
final_model = _normalize_resolved_model(model or default_model, provider)
|
||||
|
||||
# Provider-specific headers
|
||||
headers = {}
|
||||
if "api.kimi.com" in base_url.lower():
|
||||
headers["User-Agent"] = "KimiCLI/1.3"
|
||||
headers["User-Agent"] = "KimiCLI/1.30.0"
|
||||
elif "api.githubcopilot.com" in base_url.lower():
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
|
||||
|
|
@ -1408,6 +1495,28 @@ def resolve_provider_client(
|
|||
|
||||
client = OpenAI(api_key=api_key, base_url=base_url,
|
||||
**({"default_headers": headers} if headers else {}))
|
||||
|
||||
# Copilot GPT-5+ models (except gpt-5-mini) require the Responses
|
||||
# API — they are not accessible via /chat/completions. Wrap the
|
||||
# plain client in CodexAuxiliaryClient so call_llm() transparently
|
||||
# routes through responses.stream().
|
||||
if provider == "copilot" and final_model and not raw_codex:
|
||||
try:
|
||||
from hermes_cli.models import _should_use_copilot_responses_api
|
||||
if _should_use_copilot_responses_api(final_model):
|
||||
logger.debug(
|
||||
"resolve_provider_client: copilot model %s needs "
|
||||
"Responses API — wrapping with CodexAuxiliaryClient",
|
||||
final_model)
|
||||
client = CodexAuxiliaryClient(client, final_model)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# Honor api_mode for any API-key provider (e.g. direct OpenAI with
|
||||
# codex-family models). The copilot-specific wrapping above handles
|
||||
# copilot; this covers the general case (#6800).
|
||||
client = _wrap_if_needed(client, final_model, base_url)
|
||||
|
||||
logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
|
|
@ -1440,12 +1549,13 @@ def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optiona
|
|||
Callers may override the returned model with a per-task env var
|
||||
(e.g. CONTEXT_COMPRESSION_MODEL, AUXILIARY_WEB_EXTRACT_MODEL).
|
||||
"""
|
||||
provider, model, base_url, api_key = _resolve_task_provider_model(task or None)
|
||||
provider, model, base_url, api_key, api_mode = _resolve_task_provider_model(task or None)
|
||||
return resolve_provider_client(
|
||||
provider,
|
||||
model=model,
|
||||
explicit_base_url=base_url,
|
||||
explicit_api_key=api_key,
|
||||
api_mode=api_mode,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -1456,13 +1566,14 @@ def get_async_text_auxiliary_client(task: str = ""):
|
|||
(AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
|
||||
Returns (None, None) when no provider is available.
|
||||
"""
|
||||
provider, model, base_url, api_key = _resolve_task_provider_model(task or None)
|
||||
provider, model, base_url, api_key, api_mode = _resolve_task_provider_model(task or None)
|
||||
return resolve_provider_client(
|
||||
provider,
|
||||
model=model,
|
||||
async_mode=True,
|
||||
explicit_base_url=base_url,
|
||||
explicit_api_key=api_key,
|
||||
api_mode=api_mode,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -1495,22 +1606,6 @@ def _strict_vision_backend_available(provider: str) -> bool:
|
|||
return _resolve_strict_vision_backend(provider)[0] is not None
|
||||
|
||||
|
||||
def _preferred_main_vision_provider() -> Optional[str]:
|
||||
"""Return the selected main provider when it is also a supported vision backend."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
config = load_config()
|
||||
model_cfg = config.get("model", {})
|
||||
if isinstance(model_cfg, dict):
|
||||
provider = _normalize_vision_provider(model_cfg.get("provider", ""))
|
||||
if provider in _VISION_AUTO_PROVIDER_ORDER:
|
||||
return provider
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def get_available_vision_backends() -> List[str]:
|
||||
"""Return the currently available vision backends in auto-selection order.
|
||||
|
||||
|
|
@ -1551,7 +1646,7 @@ def resolve_vision_provider_client(
|
|||
backends, so users can intentionally force experimental providers. Auto mode
|
||||
stays conservative and only tries vision backends known to work today.
|
||||
"""
|
||||
requested, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
|
||||
requested, resolved_model, resolved_base_url, resolved_api_key, resolved_api_mode = _resolve_task_provider_model(
|
||||
"vision", provider, model, base_url, api_key
|
||||
)
|
||||
requested = _normalize_vision_provider(requested)
|
||||
|
|
@ -1624,18 +1719,6 @@ def resolve_vision_provider_client(
|
|||
return requested, client, final_model
|
||||
|
||||
|
||||
def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
"""Return (client, default_model_slug) for vision/multimodal auxiliary tasks."""
|
||||
_, client, final_model = resolve_vision_provider_client(async_mode=False)
|
||||
return client, final_model
|
||||
|
||||
|
||||
def get_async_vision_auxiliary_client():
|
||||
"""Return (async_client, model_slug) for async vision consumers."""
|
||||
_, client, final_model = resolve_vision_provider_client(async_mode=True)
|
||||
return client, final_model
|
||||
|
||||
|
||||
def get_auxiliary_extra_body() -> dict:
|
||||
"""Return extra_body kwargs for auxiliary API calls.
|
||||
|
||||
|
|
@ -1779,12 +1862,30 @@ def cleanup_stale_async_clients() -> None:
|
|||
del _client_cache[key]
|
||||
|
||||
|
||||
def _is_openrouter_client(client: Any) -> bool:
|
||||
for obj in (client, getattr(client, "_client", None), getattr(client, "client", None)):
|
||||
if obj and "openrouter" in str(getattr(obj, "base_url", "") or "").lower():
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _compat_model(client: Any, model: Optional[str], cached_default: Optional[str]) -> Optional[str]:
|
||||
"""Drop OpenRouter-format model slugs (with '/') for non-OpenRouter clients.
|
||||
|
||||
Mirrors the guard in resolve_provider_client() which is skipped on cache hits.
|
||||
"""
|
||||
if model and "/" in model and not _is_openrouter_client(client):
|
||||
return cached_default
|
||||
return model or cached_default
|
||||
|
||||
|
||||
def _get_cached_client(
|
||||
provider: str,
|
||||
model: str = None,
|
||||
async_mode: bool = False,
|
||||
base_url: str = None,
|
||||
api_key: str = None,
|
||||
api_mode: str = None,
|
||||
) -> Tuple[Optional[Any], Optional[str]]:
|
||||
"""Get or create a cached client for the given provider.
|
||||
|
||||
|
|
@ -1808,7 +1909,7 @@ def _get_cached_client(
|
|||
loop_id = id(current_loop)
|
||||
except RuntimeError:
|
||||
pass
|
||||
cache_key = (provider, async_mode, base_url or "", api_key or "", loop_id)
|
||||
cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", loop_id)
|
||||
with _client_cache_lock:
|
||||
if cache_key in _client_cache:
|
||||
cached_client, cached_default, cached_loop = _client_cache[cache_key]
|
||||
|
|
@ -1820,9 +1921,11 @@ def _get_cached_client(
|
|||
_force_close_async_httpx(cached_client)
|
||||
del _client_cache[cache_key]
|
||||
else:
|
||||
return cached_client, model or cached_default
|
||||
effective = _compat_model(cached_client, model, cached_default)
|
||||
return cached_client, effective
|
||||
else:
|
||||
return cached_client, model or cached_default
|
||||
effective = _compat_model(cached_client, model, cached_default)
|
||||
return cached_client, effective
|
||||
# Build outside the lock
|
||||
client, default_model = resolve_provider_client(
|
||||
provider,
|
||||
|
|
@ -1830,6 +1933,7 @@ def _get_cached_client(
|
|||
async_mode,
|
||||
explicit_base_url=base_url,
|
||||
explicit_api_key=api_key,
|
||||
api_mode=api_mode,
|
||||
)
|
||||
if client is not None:
|
||||
# For async clients, remember which loop they were created on so we
|
||||
|
|
@ -1849,7 +1953,7 @@ def _resolve_task_provider_model(
|
|||
model: str = None,
|
||||
base_url: str = None,
|
||||
api_key: str = None,
|
||||
) -> Tuple[str, Optional[str], Optional[str], Optional[str]]:
|
||||
) -> Tuple[str, Optional[str], Optional[str], Optional[str], Optional[str]]:
|
||||
"""Determine provider + model for a call.
|
||||
|
||||
Priority:
|
||||
|
|
@ -1858,15 +1962,17 @@ def _resolve_task_provider_model(
|
|||
3. Config file (auxiliary.{task}.* or compression.*)
|
||||
4. "auto" (full auto-detection chain)
|
||||
|
||||
Returns (provider, model, base_url, api_key) where model may be None
|
||||
(use provider default). When base_url is set, provider is forced to
|
||||
"custom" and the task uses that direct endpoint.
|
||||
Returns (provider, model, base_url, api_key, api_mode) where model may
|
||||
be None (use provider default). When base_url is set, provider is forced
|
||||
to "custom" and the task uses that direct endpoint. api_mode is one of
|
||||
"chat_completions", "codex_responses", or None (auto-detect).
|
||||
"""
|
||||
config = {}
|
||||
cfg_provider = None
|
||||
cfg_model = None
|
||||
cfg_base_url = None
|
||||
cfg_api_key = None
|
||||
cfg_api_mode = None
|
||||
|
||||
if task:
|
||||
try:
|
||||
|
|
@ -1883,6 +1989,7 @@ def _resolve_task_provider_model(
|
|||
cfg_model = str(task_config.get("model", "")).strip() or None
|
||||
cfg_base_url = str(task_config.get("base_url", "")).strip() or None
|
||||
cfg_api_key = str(task_config.get("api_key", "")).strip() or None
|
||||
cfg_api_mode = str(task_config.get("api_mode", "")).strip() or None
|
||||
|
||||
# Backwards compat: compression section has its own keys.
|
||||
# The auxiliary.compression defaults to provider="auto", so treat
|
||||
|
|
@ -1896,30 +2003,32 @@ def _resolve_task_provider_model(
|
|||
cfg_base_url = cfg_base_url or _sbu.strip() or None
|
||||
|
||||
env_model = _get_auxiliary_env_override(task, "MODEL") if task else None
|
||||
env_api_mode = _get_auxiliary_env_override(task, "API_MODE") if task else None
|
||||
resolved_model = model or env_model or cfg_model
|
||||
resolved_api_mode = env_api_mode or cfg_api_mode
|
||||
|
||||
if base_url:
|
||||
return "custom", resolved_model, base_url, api_key
|
||||
return "custom", resolved_model, base_url, api_key, resolved_api_mode
|
||||
if provider:
|
||||
return provider, resolved_model, base_url, api_key
|
||||
return provider, resolved_model, base_url, api_key, resolved_api_mode
|
||||
|
||||
if task:
|
||||
env_base_url = _get_auxiliary_env_override(task, "BASE_URL")
|
||||
env_api_key = _get_auxiliary_env_override(task, "API_KEY")
|
||||
if env_base_url:
|
||||
return "custom", resolved_model, env_base_url, env_api_key or cfg_api_key
|
||||
return "custom", resolved_model, env_base_url, env_api_key or cfg_api_key, resolved_api_mode
|
||||
|
||||
env_provider = _get_auxiliary_provider(task)
|
||||
if env_provider != "auto":
|
||||
return env_provider, resolved_model, None, None
|
||||
return env_provider, resolved_model, None, None, resolved_api_mode
|
||||
|
||||
if cfg_base_url:
|
||||
return "custom", resolved_model, cfg_base_url, cfg_api_key
|
||||
return "custom", resolved_model, cfg_base_url, cfg_api_key, resolved_api_mode
|
||||
if cfg_provider and cfg_provider != "auto":
|
||||
return cfg_provider, resolved_model, None, None
|
||||
return "auto", resolved_model, None, None
|
||||
return cfg_provider, resolved_model, None, None, resolved_api_mode
|
||||
return "auto", resolved_model, None, None, resolved_api_mode
|
||||
|
||||
return "auto", resolved_model, None, None
|
||||
return "auto", resolved_model, None, None, resolved_api_mode
|
||||
|
||||
|
||||
_DEFAULT_AUX_TIMEOUT = 30.0
|
||||
|
|
@ -1991,6 +2100,37 @@ def _build_call_kwargs(
|
|||
return kwargs
|
||||
|
||||
|
||||
def _validate_llm_response(response: Any, task: str = None) -> Any:
|
||||
"""Validate that an LLM response has the expected .choices[0].message shape.
|
||||
|
||||
Fails fast with a clear error instead of letting malformed payloads
|
||||
propagate to downstream consumers where they crash with misleading
|
||||
AttributeError (e.g. "'str' object has no attribute 'choices'").
|
||||
|
||||
See #7264.
|
||||
"""
|
||||
if response is None:
|
||||
raise RuntimeError(
|
||||
f"Auxiliary {task or 'call'}: LLM returned None response"
|
||||
)
|
||||
# Allow SimpleNamespace responses from adapters (CodexAuxiliaryClient,
|
||||
# AnthropicAuxiliaryClient) — they have .choices[0].message.
|
||||
try:
|
||||
choices = response.choices
|
||||
if not choices or not hasattr(choices[0], "message"):
|
||||
raise AttributeError("missing choices[0].message")
|
||||
except (AttributeError, TypeError, IndexError) as exc:
|
||||
response_type = type(response).__name__
|
||||
response_preview = str(response)[:120]
|
||||
raise RuntimeError(
|
||||
f"Auxiliary {task or 'call'}: LLM returned invalid response "
|
||||
f"(type={response_type}): {response_preview!r}. "
|
||||
f"Expected object with .choices[0].message — check provider "
|
||||
f"adapter or custom endpoint compatibility."
|
||||
) from exc
|
||||
return response
|
||||
|
||||
|
||||
def call_llm(
|
||||
task: str = None,
|
||||
*,
|
||||
|
|
@ -2029,7 +2169,7 @@ def call_llm(
|
|||
Raises:
|
||||
RuntimeError: If no provider is configured.
|
||||
"""
|
||||
resolved_provider, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
|
||||
resolved_provider, resolved_model, resolved_base_url, resolved_api_key, resolved_api_mode = _resolve_task_provider_model(
|
||||
task, provider, model, base_url, api_key)
|
||||
|
||||
if task == "vision":
|
||||
|
|
@ -2062,6 +2202,7 @@ def call_llm(
|
|||
resolved_model,
|
||||
base_url=resolved_base_url,
|
||||
api_key=resolved_api_key,
|
||||
api_mode=resolved_api_mode,
|
||||
)
|
||||
if client is None:
|
||||
# When the user explicitly chose a non-OpenRouter provider but no
|
||||
|
|
@ -2105,18 +2246,20 @@ def call_llm(
|
|||
|
||||
# Handle max_tokens vs max_completion_tokens retry, then payment fallback.
|
||||
try:
|
||||
return client.chat.completions.create(**kwargs)
|
||||
return _validate_llm_response(
|
||||
client.chat.completions.create(**kwargs), task)
|
||||
except Exception as first_err:
|
||||
err_str = str(first_err)
|
||||
if "max_tokens" in err_str or "unsupported_parameter" in err_str:
|
||||
kwargs.pop("max_tokens", None)
|
||||
kwargs["max_completion_tokens"] = max_tokens
|
||||
try:
|
||||
return client.chat.completions.create(**kwargs)
|
||||
return _validate_llm_response(
|
||||
client.chat.completions.create(**kwargs), task)
|
||||
except Exception as retry_err:
|
||||
# If the max_tokens retry also hits a payment error,
|
||||
# fall through to the payment fallback below.
|
||||
if not _is_payment_error(retry_err):
|
||||
# If the max_tokens retry also hits a payment or connection
|
||||
# error, fall through to the fallback chain below.
|
||||
if not (_is_payment_error(retry_err) or _is_connection_error(retry_err)):
|
||||
raise
|
||||
first_err = retry_err
|
||||
|
||||
|
|
@ -2133,19 +2276,24 @@ def call_llm(
|
|||
# and providers the user never configured that got picked up by
|
||||
# the auto-detection chain.
|
||||
should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
|
||||
if should_fallback:
|
||||
# Only try alternative providers when the user didn't explicitly
|
||||
# configure this task's provider. Explicit provider = hard constraint;
|
||||
# auto (the default) = best-effort fallback chain. (#7559)
|
||||
is_auto = resolved_provider in ("auto", "", None)
|
||||
if should_fallback and is_auto:
|
||||
reason = "payment error" if _is_payment_error(first_err) else "connection error"
|
||||
logger.info("Auxiliary %s: %s on %s (%s), trying fallback",
|
||||
task or "call", reason, resolved_provider, first_err)
|
||||
fb_client, fb_model, fb_label = _try_payment_fallback(
|
||||
resolved_provider, task)
|
||||
resolved_provider, task, reason=reason)
|
||||
if fb_client is not None:
|
||||
fb_kwargs = _build_call_kwargs(
|
||||
fb_label, fb_model, messages,
|
||||
temperature=temperature, max_tokens=max_tokens,
|
||||
tools=tools, timeout=effective_timeout,
|
||||
extra_body=extra_body)
|
||||
return fb_client.chat.completions.create(**fb_kwargs)
|
||||
return _validate_llm_response(
|
||||
fb_client.chat.completions.create(**fb_kwargs), task)
|
||||
raise
|
||||
|
||||
|
||||
|
|
@ -2223,7 +2371,7 @@ async def async_call_llm(
|
|||
|
||||
Same as call_llm() but async. See call_llm() for full documentation.
|
||||
"""
|
||||
resolved_provider, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
|
||||
resolved_provider, resolved_model, resolved_base_url, resolved_api_key, resolved_api_mode = _resolve_task_provider_model(
|
||||
task, provider, model, base_url, api_key)
|
||||
|
||||
if task == "vision":
|
||||
|
|
@ -2257,6 +2405,7 @@ async def async_call_llm(
|
|||
async_mode=True,
|
||||
base_url=resolved_base_url,
|
||||
api_key=resolved_api_key,
|
||||
api_mode=resolved_api_mode,
|
||||
)
|
||||
if client is None:
|
||||
_explicit = (resolved_provider or "").strip().lower()
|
||||
|
|
@ -2267,11 +2416,9 @@ async def async_call_llm(
|
|||
f"variable, or switch to a different provider with `hermes model`."
|
||||
)
|
||||
if not resolved_base_url:
|
||||
logger.warning("Provider %s unavailable, falling back to openrouter",
|
||||
resolved_provider)
|
||||
client, final_model = _get_cached_client(
|
||||
"openrouter", resolved_model or _OPENROUTER_MODEL,
|
||||
async_mode=True)
|
||||
logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
|
||||
task or "call", resolved_provider)
|
||||
client, final_model = _get_cached_client("auto", async_mode=True)
|
||||
if client is None:
|
||||
raise RuntimeError(
|
||||
f"No LLM provider configured for task={task} provider={resolved_provider}. "
|
||||
|
|
@ -2286,11 +2433,42 @@ async def async_call_llm(
|
|||
base_url=resolved_base_url)
|
||||
|
||||
try:
|
||||
return await client.chat.completions.create(**kwargs)
|
||||
return _validate_llm_response(
|
||||
await client.chat.completions.create(**kwargs), task)
|
||||
except Exception as first_err:
|
||||
err_str = str(first_err)
|
||||
if "max_tokens" in err_str or "unsupported_parameter" in err_str:
|
||||
kwargs.pop("max_tokens", None)
|
||||
kwargs["max_completion_tokens"] = max_tokens
|
||||
return await client.chat.completions.create(**kwargs)
|
||||
try:
|
||||
return _validate_llm_response(
|
||||
await client.chat.completions.create(**kwargs), task)
|
||||
except Exception as retry_err:
|
||||
# If the max_tokens retry also hits a payment or connection
|
||||
# error, fall through to the fallback chain below.
|
||||
if not (_is_payment_error(retry_err) or _is_connection_error(retry_err)):
|
||||
raise
|
||||
first_err = retry_err
|
||||
|
||||
# ── Payment / connection fallback (mirrors sync call_llm) ─────
|
||||
should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
|
||||
is_auto = resolved_provider in ("auto", "", None)
|
||||
if should_fallback and is_auto:
|
||||
reason = "payment error" if _is_payment_error(first_err) else "connection error"
|
||||
logger.info("Auxiliary %s (async): %s on %s (%s), trying fallback",
|
||||
task or "call", reason, resolved_provider, first_err)
|
||||
fb_client, fb_model, fb_label = _try_payment_fallback(
|
||||
resolved_provider, task, reason=reason)
|
||||
if fb_client is not None:
|
||||
fb_kwargs = _build_call_kwargs(
|
||||
fb_label, fb_model, messages,
|
||||
temperature=temperature, max_tokens=max_tokens,
|
||||
tools=tools, timeout=effective_timeout,
|
||||
extra_body=extra_body)
|
||||
# Convert sync fallback client to async
|
||||
async_fb, async_fb_model = _to_async_client(fb_client, fb_model or "")
|
||||
if async_fb_model and async_fb_model != fb_kwargs.get("model"):
|
||||
fb_kwargs["model"] = async_fb_model
|
||||
return _validate_llm_response(
|
||||
await async_fb.chat.completions.create(**fb_kwargs), task)
|
||||
raise
|
||||
|
|
|
|||
|
|
@ -1,114 +0,0 @@
|
|||
"""BuiltinMemoryProvider — wraps MEMORY.md / USER.md as a MemoryProvider.
|
||||
|
||||
Always registered as the first provider. Cannot be disabled or removed.
|
||||
This is the existing Hermes memory system exposed through the provider
|
||||
interface for compatibility with the MemoryManager.
|
||||
|
||||
The actual storage logic lives in tools/memory_tool.py (MemoryStore).
|
||||
This provider is a thin adapter that delegates to MemoryStore and
|
||||
exposes the memory tool schema.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from agent.memory_provider import MemoryProvider
|
||||
from tools.registry import tool_error
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BuiltinMemoryProvider(MemoryProvider):
|
||||
"""Built-in file-backed memory (MEMORY.md + USER.md).
|
||||
|
||||
Always active, never disabled by other providers. The `memory` tool
|
||||
is handled by run_agent.py's agent-level tool interception (not through
|
||||
the normal registry), so get_tool_schemas() returns an empty list —
|
||||
the memory tool is already wired separately.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
memory_store=None,
|
||||
memory_enabled: bool = False,
|
||||
user_profile_enabled: bool = False,
|
||||
):
|
||||
self._store = memory_store
|
||||
self._memory_enabled = memory_enabled
|
||||
self._user_profile_enabled = user_profile_enabled
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "builtin"
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Built-in memory is always available."""
|
||||
return True
|
||||
|
||||
def initialize(self, session_id: str, **kwargs) -> None:
|
||||
"""Load memory from disk if not already loaded."""
|
||||
if self._store is not None:
|
||||
self._store.load_from_disk()
|
||||
|
||||
def system_prompt_block(self) -> str:
|
||||
"""Return MEMORY.md and USER.md content for the system prompt.
|
||||
|
||||
Uses the frozen snapshot captured at load time. This ensures the
|
||||
system prompt stays stable throughout a session (preserving the
|
||||
prompt cache), even though the live entries may change via tool calls.
|
||||
"""
|
||||
if not self._store:
|
||||
return ""
|
||||
|
||||
parts = []
|
||||
if self._memory_enabled:
|
||||
mem_block = self._store.format_for_system_prompt("memory")
|
||||
if mem_block:
|
||||
parts.append(mem_block)
|
||||
if self._user_profile_enabled:
|
||||
user_block = self._store.format_for_system_prompt("user")
|
||||
if user_block:
|
||||
parts.append(user_block)
|
||||
|
||||
return "\n\n".join(parts)
|
||||
|
||||
def prefetch(self, query: str, *, session_id: str = "") -> str:
|
||||
"""Built-in memory doesn't do query-based recall — it's injected via system_prompt_block."""
|
||||
return ""
|
||||
|
||||
def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
|
||||
"""Built-in memory doesn't auto-sync turns — writes happen via the memory tool."""
|
||||
|
||||
def get_tool_schemas(self) -> List[Dict[str, Any]]:
|
||||
"""Return empty list.
|
||||
|
||||
The `memory` tool is an agent-level intercepted tool, handled
|
||||
specially in run_agent.py before normal tool dispatch. It's not
|
||||
part of the standard tool registry. We don't duplicate it here.
|
||||
"""
|
||||
return []
|
||||
|
||||
def handle_tool_call(self, tool_name: str, args: Dict[str, Any], **kwargs) -> str:
|
||||
"""Not used — the memory tool is intercepted in run_agent.py."""
|
||||
return tool_error("Built-in memory tool is handled by the agent loop")
|
||||
|
||||
def shutdown(self) -> None:
|
||||
"""No cleanup needed — files are saved on every write."""
|
||||
|
||||
# -- Property access for backward compatibility --------------------------
|
||||
|
||||
@property
|
||||
def store(self):
|
||||
"""Access the underlying MemoryStore for legacy code paths."""
|
||||
return self._store
|
||||
|
||||
@property
|
||||
def memory_enabled(self) -> bool:
|
||||
return self._memory_enabled
|
||||
|
||||
@property
|
||||
def user_profile_enabled(self) -> bool:
|
||||
return self._user_profile_enabled
|
||||
|
|
@ -18,6 +18,7 @@ import time
|
|||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.auxiliary_client import call_llm
|
||||
from agent.context_engine import ContextEngine
|
||||
from agent.model_metadata import (
|
||||
get_model_context_length,
|
||||
estimate_messages_tokens_rough,
|
||||
|
|
@ -50,8 +51,8 @@ _CHARS_PER_TOKEN = 4
|
|||
_SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
|
||||
|
||||
|
||||
class ContextCompressor:
|
||||
"""Compresses conversation context when approaching the model's context limit.
|
||||
class ContextCompressor(ContextEngine):
|
||||
"""Default context engine — compresses conversation context via lossy summarization.
|
||||
|
||||
Algorithm:
|
||||
1. Prune old tool results (cheap, no LLM call)
|
||||
|
|
@ -61,6 +62,33 @@ class ContextCompressor:
|
|||
5. On subsequent compactions, iteratively update the previous summary
|
||||
"""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "compressor"
|
||||
|
||||
def on_session_reset(self) -> None:
|
||||
"""Reset all per-session state for /new or /reset."""
|
||||
super().on_session_reset()
|
||||
self._context_probed = False
|
||||
self._context_probe_persistable = False
|
||||
self._previous_summary = None
|
||||
|
||||
def update_model(
|
||||
self,
|
||||
model: str,
|
||||
context_length: int,
|
||||
base_url: str = "",
|
||||
api_key: str = "",
|
||||
provider: str = "",
|
||||
) -> None:
|
||||
"""Update model info after a model switch or fallback activation."""
|
||||
self.model = model
|
||||
self.base_url = base_url
|
||||
self.api_key = api_key
|
||||
self.provider = provider
|
||||
self.context_length = context_length
|
||||
self.threshold_tokens = int(context_length * self.threshold_percent)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: str,
|
||||
|
|
@ -114,7 +142,6 @@ class ContextCompressor:
|
|||
|
||||
self.last_prompt_tokens = 0
|
||||
self.last_completion_tokens = 0
|
||||
self.last_total_tokens = 0
|
||||
|
||||
self.summary_model = summary_model_override or ""
|
||||
|
||||
|
|
@ -126,28 +153,12 @@ class ContextCompressor:
|
|||
"""Update tracked token usage from API response."""
|
||||
self.last_prompt_tokens = usage.get("prompt_tokens", 0)
|
||||
self.last_completion_tokens = usage.get("completion_tokens", 0)
|
||||
self.last_total_tokens = usage.get("total_tokens", 0)
|
||||
|
||||
def should_compress(self, prompt_tokens: int = None) -> bool:
|
||||
"""Check if context exceeds the compression threshold."""
|
||||
tokens = prompt_tokens if prompt_tokens is not None else self.last_prompt_tokens
|
||||
return tokens >= self.threshold_tokens
|
||||
|
||||
def should_compress_preflight(self, messages: List[Dict[str, Any]]) -> bool:
|
||||
"""Quick pre-flight check using rough estimate (before API call)."""
|
||||
rough_estimate = estimate_messages_tokens_rough(messages)
|
||||
return rough_estimate >= self.threshold_tokens
|
||||
|
||||
def get_status(self) -> Dict[str, Any]:
|
||||
"""Get current compression status for display/logging."""
|
||||
return {
|
||||
"last_prompt_tokens": self.last_prompt_tokens,
|
||||
"threshold_tokens": self.threshold_tokens,
|
||||
"context_length": self.context_length,
|
||||
"usage_percent": min(100, (self.last_prompt_tokens / self.context_length * 100)) if self.context_length else 0,
|
||||
"compression_count": self.compression_count,
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Tool output pruning (cheap pre-pass, no LLM call)
|
||||
# ------------------------------------------------------------------
|
||||
|
|
|
|||
184
agent/context_engine.py
Normal file
184
agent/context_engine.py
Normal file
|
|
@ -0,0 +1,184 @@
|
|||
"""Abstract base class for pluggable context engines.
|
||||
|
||||
A context engine controls how conversation context is managed when
|
||||
approaching the model's token limit. The built-in ContextCompressor
|
||||
is the default implementation. Third-party engines (e.g. LCM) can
|
||||
replace it via the plugin system or by being placed in the
|
||||
``plugins/context_engine/<name>/`` directory.
|
||||
|
||||
Selection is config-driven: ``context.engine`` in config.yaml.
|
||||
Default is ``"compressor"`` (the built-in). Only one engine is active.
|
||||
|
||||
The engine is responsible for:
|
||||
- Deciding when compaction should fire
|
||||
- Performing compaction (summarization, DAG construction, etc.)
|
||||
- Optionally exposing tools the agent can call (e.g. lcm_grep)
|
||||
- Tracking token usage from API responses
|
||||
|
||||
Lifecycle:
|
||||
1. Engine is instantiated and registered (plugin register() or default)
|
||||
2. on_session_start() called when a conversation begins
|
||||
3. update_from_response() called after each API response with usage data
|
||||
4. should_compress() checked after each turn
|
||||
5. compress() called when should_compress() returns True
|
||||
6. on_session_end() called at real session boundaries (CLI exit, /reset,
|
||||
gateway session expiry) — NOT per-turn
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
class ContextEngine(ABC):
|
||||
"""Base class all context engines must implement."""
|
||||
|
||||
# -- Identity ----------------------------------------------------------
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def name(self) -> str:
|
||||
"""Short identifier (e.g. 'compressor', 'lcm')."""
|
||||
|
||||
# -- Token state (read by run_agent.py for display/logging) ------------
|
||||
#
|
||||
# Engines MUST maintain these. run_agent.py reads them directly.
|
||||
|
||||
last_prompt_tokens: int = 0
|
||||
last_completion_tokens: int = 0
|
||||
last_total_tokens: int = 0
|
||||
threshold_tokens: int = 0
|
||||
context_length: int = 0
|
||||
compression_count: int = 0
|
||||
|
||||
# -- Compaction parameters (read by run_agent.py for preflight) --------
|
||||
#
|
||||
# These control the preflight compression check. Subclasses may
|
||||
# override via __init__ or property; defaults are sensible for most
|
||||
# engines.
|
||||
|
||||
threshold_percent: float = 0.75
|
||||
protect_first_n: int = 3
|
||||
protect_last_n: int = 6
|
||||
|
||||
# -- Core interface ----------------------------------------------------
|
||||
|
||||
@abstractmethod
|
||||
def update_from_response(self, usage: Dict[str, Any]) -> None:
|
||||
"""Update tracked token usage from an API response.
|
||||
|
||||
Called after every LLM call with the usage dict from the response.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def should_compress(self, prompt_tokens: int = None) -> bool:
|
||||
"""Return True if compaction should fire this turn."""
|
||||
|
||||
@abstractmethod
|
||||
def compress(
|
||||
self,
|
||||
messages: List[Dict[str, Any]],
|
||||
current_tokens: int = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Compact the message list and return the new message list.
|
||||
|
||||
This is the main entry point. The engine receives the full message
|
||||
list and returns a (possibly shorter) list that fits within the
|
||||
context budget. The implementation is free to summarize, build a
|
||||
DAG, or do anything else — as long as the returned list is a valid
|
||||
OpenAI-format message sequence.
|
||||
"""
|
||||
|
||||
# -- Optional: pre-flight check ----------------------------------------
|
||||
|
||||
def should_compress_preflight(self, messages: List[Dict[str, Any]]) -> bool:
|
||||
"""Quick rough check before the API call (no real token count yet).
|
||||
|
||||
Default returns False (skip pre-flight). Override if your engine
|
||||
can do a cheap estimate.
|
||||
"""
|
||||
return False
|
||||
|
||||
# -- Optional: session lifecycle ---------------------------------------
|
||||
|
||||
def on_session_start(self, session_id: str, **kwargs) -> None:
|
||||
"""Called when a new conversation session begins.
|
||||
|
||||
Use this to load persisted state (DAG, store) for the session.
|
||||
kwargs may include hermes_home, platform, model, etc.
|
||||
"""
|
||||
|
||||
def on_session_end(self, session_id: str, messages: List[Dict[str, Any]]) -> None:
|
||||
"""Called at real session boundaries (CLI exit, /reset, gateway expiry).
|
||||
|
||||
Use this to flush state, close DB connections, etc.
|
||||
NOT called per-turn — only when the session truly ends.
|
||||
"""
|
||||
|
||||
def on_session_reset(self) -> None:
|
||||
"""Called on /new or /reset. Reset per-session state.
|
||||
|
||||
Default resets compression_count and token tracking.
|
||||
"""
|
||||
self.last_prompt_tokens = 0
|
||||
self.last_completion_tokens = 0
|
||||
self.last_total_tokens = 0
|
||||
self.compression_count = 0
|
||||
|
||||
# -- Optional: tools ---------------------------------------------------
|
||||
|
||||
def get_tool_schemas(self) -> List[Dict[str, Any]]:
|
||||
"""Return tool schemas this engine provides to the agent.
|
||||
|
||||
Default returns empty list (no tools). LCM would return schemas
|
||||
for lcm_grep, lcm_describe, lcm_expand here.
|
||||
"""
|
||||
return []
|
||||
|
||||
def handle_tool_call(self, name: str, args: Dict[str, Any], **kwargs) -> str:
|
||||
"""Handle a tool call from the agent.
|
||||
|
||||
Only called for tool names returned by get_tool_schemas().
|
||||
Must return a JSON string.
|
||||
|
||||
kwargs may include:
|
||||
messages: the current in-memory message list (for live ingestion)
|
||||
"""
|
||||
import json
|
||||
return json.dumps({"error": f"Unknown context engine tool: {name}"})
|
||||
|
||||
# -- Optional: status / display ----------------------------------------
|
||||
|
||||
def get_status(self) -> Dict[str, Any]:
|
||||
"""Return status dict for display/logging.
|
||||
|
||||
Default returns the standard fields run_agent.py expects.
|
||||
"""
|
||||
return {
|
||||
"last_prompt_tokens": self.last_prompt_tokens,
|
||||
"threshold_tokens": self.threshold_tokens,
|
||||
"context_length": self.context_length,
|
||||
"usage_percent": (
|
||||
min(100, self.last_prompt_tokens / self.context_length * 100)
|
||||
if self.context_length else 0
|
||||
),
|
||||
"compression_count": self.compression_count,
|
||||
}
|
||||
|
||||
# -- Optional: model switch support ------------------------------------
|
||||
|
||||
def update_model(
|
||||
self,
|
||||
model: str,
|
||||
context_length: int,
|
||||
base_url: str = "",
|
||||
api_key: str = "",
|
||||
provider: str = "",
|
||||
) -> None:
|
||||
"""Called when the user switches models or on fallback activation.
|
||||
|
||||
Default updates context_length and recalculates threshold_tokens
|
||||
from threshold_percent. Override if your engine needs more
|
||||
(e.g. recalculate DAG budgets, switch summary models).
|
||||
"""
|
||||
self.context_length = context_length
|
||||
self.threshold_tokens = int(context_length * self.threshold_percent)
|
||||
|
|
@ -13,8 +13,9 @@ from typing import Awaitable, Callable
|
|||
|
||||
from agent.model_metadata import estimate_tokens_rough
|
||||
|
||||
_QUOTED_REFERENCE_VALUE = r'(?:`[^`\n]+`|"[^"\n]+"|\'[^\'\n]+\')'
|
||||
REFERENCE_PATTERN = re.compile(
|
||||
r"(?<![\w/])@(?:(?P<simple>diff|staged)\b|(?P<kind>file|folder|git|url):(?P<value>\S+))"
|
||||
rf"(?<![\w/])@(?:(?P<simple>diff|staged)\b|(?P<kind>file|folder|git|url):(?P<value>{_QUOTED_REFERENCE_VALUE}(?::\d+(?:-\d+)?)?|\S+))"
|
||||
)
|
||||
TRAILING_PUNCTUATION = ",.;!?"
|
||||
_SENSITIVE_HOME_DIRS = (".ssh", ".aws", ".gnupg", ".kube", ".docker", ".azure", ".config/gh")
|
||||
|
|
@ -81,14 +82,10 @@ def parse_context_references(message: str) -> list[ContextReference]:
|
|||
value = _strip_trailing_punctuation(match.group("value") or "")
|
||||
line_start = None
|
||||
line_end = None
|
||||
target = value
|
||||
target = _strip_reference_wrappers(value)
|
||||
|
||||
if kind == "file":
|
||||
range_match = re.match(r"^(?P<path>.+?):(?P<start>\d+)(?:-(?P<end>\d+))?$", value)
|
||||
if range_match:
|
||||
target = range_match.group("path")
|
||||
line_start = int(range_match.group("start"))
|
||||
line_end = int(range_match.group("end") or range_match.group("start"))
|
||||
target, line_start, line_end = _parse_file_reference_value(value)
|
||||
|
||||
refs.append(
|
||||
ContextReference(
|
||||
|
|
@ -375,6 +372,38 @@ def _strip_trailing_punctuation(value: str) -> str:
|
|||
return stripped
|
||||
|
||||
|
||||
def _strip_reference_wrappers(value: str) -> str:
|
||||
if len(value) >= 2 and value[0] == value[-1] and value[0] in "`\"'":
|
||||
return value[1:-1]
|
||||
return value
|
||||
|
||||
|
||||
def _parse_file_reference_value(value: str) -> tuple[str, int | None, int | None]:
|
||||
quoted_match = re.match(
|
||||
r'^(?P<quote>`|"|\')(?P<path>.+?)(?P=quote)(?::(?P<start>\d+)(?:-(?P<end>\d+))?)?$',
|
||||
value,
|
||||
)
|
||||
if quoted_match:
|
||||
line_start = quoted_match.group("start")
|
||||
line_end = quoted_match.group("end")
|
||||
return (
|
||||
quoted_match.group("path"),
|
||||
int(line_start) if line_start is not None else None,
|
||||
int(line_end or line_start) if line_start is not None else None,
|
||||
)
|
||||
|
||||
range_match = re.match(r"^(?P<path>.+?):(?P<start>\d+)(?:-(?P<end>\d+))?$", value)
|
||||
if range_match:
|
||||
line_start = int(range_match.group("start"))
|
||||
return (
|
||||
range_match.group("path"),
|
||||
line_start,
|
||||
int(range_match.group("end") or range_match.group("start")),
|
||||
)
|
||||
|
||||
return _strip_reference_wrappers(value), None, None
|
||||
|
||||
|
||||
def _remove_reference_tokens(message: str, refs: list[ContextReference]) -> str:
|
||||
pieces: list[str] = []
|
||||
cursor = 0
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ from hermes_cli.auth import (
|
|||
DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
|
||||
KIMI_CODE_BASE_URL,
|
||||
PROVIDER_REGISTRY,
|
||||
_auth_store_lock,
|
||||
_codex_access_token_is_expiring,
|
||||
_decode_jwt_claims,
|
||||
_import_codex_cli_tokens,
|
||||
|
|
@ -27,6 +28,8 @@ from hermes_cli.auth import (
|
|||
_load_provider_state,
|
||||
_resolve_kimi_base_url,
|
||||
_resolve_zai_base_url,
|
||||
_save_auth_store,
|
||||
_save_provider_state,
|
||||
read_credential_pool,
|
||||
write_credential_pool,
|
||||
)
|
||||
|
|
@ -479,6 +482,67 @@ class CredentialPool:
|
|||
logger.debug("Failed to sync from ~/.codex/auth.json: %s", exc)
|
||||
return entry
|
||||
|
||||
def _sync_device_code_entry_to_auth_store(self, entry: PooledCredential) -> None:
|
||||
"""Write refreshed pool entry tokens back to auth.json providers.
|
||||
|
||||
After a pool-level refresh, the pool entry has fresh tokens but
|
||||
auth.json's ``providers.<id>`` still holds the pre-refresh state.
|
||||
On the next ``load_pool()``, ``_seed_from_singletons()`` reads that
|
||||
stale state and can overwrite the fresh pool entry — potentially
|
||||
re-seeding a consumed single-use refresh token.
|
||||
|
||||
Applies to any OAuth provider whose singleton lives in auth.json
|
||||
(currently Nous and OpenAI Codex).
|
||||
"""
|
||||
if entry.source != "device_code":
|
||||
return
|
||||
try:
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
if self.provider == "nous":
|
||||
state = _load_provider_state(auth_store, "nous")
|
||||
if state is None:
|
||||
return
|
||||
state["access_token"] = entry.access_token
|
||||
if entry.refresh_token:
|
||||
state["refresh_token"] = entry.refresh_token
|
||||
if entry.expires_at:
|
||||
state["expires_at"] = entry.expires_at
|
||||
if entry.agent_key:
|
||||
state["agent_key"] = entry.agent_key
|
||||
if entry.agent_key_expires_at:
|
||||
state["agent_key_expires_at"] = entry.agent_key_expires_at
|
||||
for extra_key in ("obtained_at", "expires_in", "agent_key_id",
|
||||
"agent_key_expires_in", "agent_key_reused",
|
||||
"agent_key_obtained_at"):
|
||||
val = entry.extra.get(extra_key)
|
||||
if val is not None:
|
||||
state[extra_key] = val
|
||||
if entry.inference_base_url:
|
||||
state["inference_base_url"] = entry.inference_base_url
|
||||
_save_provider_state(auth_store, "nous", state)
|
||||
|
||||
elif self.provider == "openai-codex":
|
||||
state = _load_provider_state(auth_store, "openai-codex")
|
||||
if not isinstance(state, dict):
|
||||
return
|
||||
tokens = state.get("tokens")
|
||||
if not isinstance(tokens, dict):
|
||||
return
|
||||
tokens["access_token"] = entry.access_token
|
||||
if entry.refresh_token:
|
||||
tokens["refresh_token"] = entry.refresh_token
|
||||
if entry.last_refresh:
|
||||
state["last_refresh"] = entry.last_refresh
|
||||
_save_provider_state(auth_store, "openai-codex", state)
|
||||
|
||||
else:
|
||||
return
|
||||
|
||||
_save_auth_store(auth_store)
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to sync %s pool entry back to auth store: %s", self.provider, exc)
|
||||
|
||||
def _refresh_entry(self, entry: PooledCredential, *, force: bool) -> Optional[PooledCredential]:
|
||||
if entry.auth_type != AUTH_TYPE_OAUTH or not entry.refresh_token:
|
||||
if force:
|
||||
|
|
@ -513,6 +577,13 @@ class CredentialPool:
|
|||
except Exception as wexc:
|
||||
logger.debug("Failed to write refreshed token to credentials file: %s", wexc)
|
||||
elif self.provider == "openai-codex":
|
||||
# Proactively sync from ~/.codex/auth.json before refresh.
|
||||
# The Codex CLI (or another Hermes profile) may have already
|
||||
# consumed our refresh_token. Syncing first avoids a
|
||||
# "refresh_token_reused" error when the CLI has a newer pair.
|
||||
synced = self._sync_codex_entry_from_cli(entry)
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
refreshed = auth_mod.refresh_codex_oauth_pure(
|
||||
entry.access_token,
|
||||
entry.refresh_token,
|
||||
|
|
@ -598,6 +669,37 @@ class CredentialPool:
|
|||
# Credentials file had a valid (non-expired) token — use it directly
|
||||
logger.debug("Credentials file has valid token, using without refresh")
|
||||
return synced
|
||||
# For openai-codex: the refresh_token may have been consumed by
|
||||
# the Codex CLI between our proactive sync and the refresh call.
|
||||
# Re-sync and retry once.
|
||||
if self.provider == "openai-codex":
|
||||
synced = self._sync_codex_entry_from_cli(entry)
|
||||
if synced.refresh_token != entry.refresh_token:
|
||||
logger.debug("Retrying Codex refresh with synced token from ~/.codex/auth.json")
|
||||
try:
|
||||
refreshed = auth_mod.refresh_codex_oauth_pure(
|
||||
synced.access_token,
|
||||
synced.refresh_token,
|
||||
)
|
||||
updated = replace(
|
||||
synced,
|
||||
access_token=refreshed["access_token"],
|
||||
refresh_token=refreshed["refresh_token"],
|
||||
last_refresh=refreshed.get("last_refresh"),
|
||||
last_status=STATUS_OK,
|
||||
last_status_at=None,
|
||||
last_error_code=None,
|
||||
)
|
||||
self._replace_entry(synced, updated)
|
||||
self._persist()
|
||||
self._sync_device_code_entry_to_auth_store(updated)
|
||||
return updated
|
||||
except Exception as retry_exc:
|
||||
logger.debug("Codex retry refresh also failed: %s", retry_exc)
|
||||
elif not self._entry_needs_refresh(synced):
|
||||
logger.debug("Codex CLI has valid token, using without refresh")
|
||||
self._sync_device_code_entry_to_auth_store(synced)
|
||||
return synced
|
||||
self._mark_exhausted(entry, None)
|
||||
return None
|
||||
|
||||
|
|
@ -612,6 +714,10 @@ class CredentialPool:
|
|||
)
|
||||
self._replace_entry(entry, updated)
|
||||
self._persist()
|
||||
# Sync refreshed tokens back to auth.json providers so that
|
||||
# _seed_from_singletons() on the next load_pool() sees fresh state
|
||||
# instead of re-seeding stale/consumed tokens.
|
||||
self._sync_device_code_entry_to_auth_store(updated)
|
||||
return updated
|
||||
|
||||
def _entry_needs_refresh(self, entry: PooledCredential) -> bool:
|
||||
|
|
@ -633,17 +739,6 @@ class CredentialPool:
|
|||
return False
|
||||
return False
|
||||
|
||||
def mark_used(self, entry_id: Optional[str] = None) -> None:
|
||||
"""Increment request_count for tracking. Used by least_used strategy."""
|
||||
target_id = entry_id or self._current_id
|
||||
if not target_id:
|
||||
return
|
||||
with self._lock:
|
||||
for idx, entry in enumerate(self._entries):
|
||||
if entry.id == target_id:
|
||||
self._entries[idx] = replace(entry, request_count=entry.request_count + 1)
|
||||
return
|
||||
|
||||
def select(self) -> Optional[PooledCredential]:
|
||||
with self._lock:
|
||||
return self._select_unlocked()
|
||||
|
|
@ -805,11 +900,6 @@ class CredentialPool:
|
|||
else:
|
||||
self._active_leases[credential_id] = count - 1
|
||||
|
||||
def active_lease_count(self, credential_id: str) -> int:
|
||||
"""Return the number of active leases for a credential."""
|
||||
with self._lock:
|
||||
return self._active_leases.get(credential_id, 0)
|
||||
|
||||
def try_refresh_current(self) -> Optional[PooledCredential]:
|
||||
with self._lock:
|
||||
return self._try_refresh_current_unlocked()
|
||||
|
|
@ -969,6 +1059,17 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
|||
auth_store = _load_auth_store()
|
||||
|
||||
if provider == "anthropic":
|
||||
# Only auto-discover external credentials (Claude Code, Hermes PKCE)
|
||||
# when the user has explicitly configured anthropic as their provider.
|
||||
# Without this gate, auxiliary client fallback chains silently read
|
||||
# ~/.claude/.credentials.json without user consent. See PR #4210.
|
||||
try:
|
||||
from hermes_cli.auth import is_provider_explicitly_configured
|
||||
if not is_provider_explicitly_configured("anthropic"):
|
||||
return changed, active_sources
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
from agent.anthropic_adapter import read_claude_code_credentials, read_hermes_oauth_credentials
|
||||
|
||||
for source_name, creds in (
|
||||
|
|
@ -976,6 +1077,13 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
|||
("claude_code", read_claude_code_credentials()),
|
||||
):
|
||||
if creds and creds.get("accessToken"):
|
||||
# Check if user explicitly removed this source
|
||||
try:
|
||||
from hermes_cli.auth import is_source_suppressed
|
||||
if is_source_suppressed(provider, source_name):
|
||||
continue
|
||||
except ImportError:
|
||||
pass
|
||||
active_sources.add(source_name)
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
|
|
|
|||
160
agent/display.py
160
agent/display.py
|
|
@ -21,11 +21,73 @@ _RESET = "\033[0m"
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
_ANSI_RESET = "\033[0m"
|
||||
_ANSI_DIM = "\033[38;2;150;150;150m"
|
||||
_ANSI_FILE = "\033[38;2;180;160;255m"
|
||||
_ANSI_HUNK = "\033[38;2;120;120;140m"
|
||||
_ANSI_MINUS = "\033[38;2;255;255;255;48;2;120;20;20m"
|
||||
_ANSI_PLUS = "\033[38;2;255;255;255;48;2;20;90;20m"
|
||||
|
||||
# Diff colors — resolved lazily from the skin engine so they adapt
|
||||
# to light/dark themes. Falls back to sensible defaults on import
|
||||
# failure. We cache after first resolution for performance.
|
||||
_diff_colors_cached: dict[str, str] | None = None
|
||||
|
||||
|
||||
def _diff_ansi() -> dict[str, str]:
|
||||
"""Return ANSI escapes for diff display, resolved from the active skin."""
|
||||
global _diff_colors_cached
|
||||
if _diff_colors_cached is not None:
|
||||
return _diff_colors_cached
|
||||
|
||||
# Defaults that work on dark terminals
|
||||
dim = "\033[38;2;150;150;150m"
|
||||
file_c = "\033[38;2;180;160;255m"
|
||||
hunk = "\033[38;2;120;120;140m"
|
||||
minus = "\033[38;2;255;255;255;48;2;120;20;20m"
|
||||
plus = "\033[38;2;255;255;255;48;2;20;90;20m"
|
||||
|
||||
try:
|
||||
from hermes_cli.skin_engine import get_active_skin
|
||||
skin = get_active_skin()
|
||||
|
||||
def _hex_fg(key: str, fallback_rgb: tuple[int, int, int]) -> str:
|
||||
h = skin.get_color(key, "")
|
||||
if h and len(h) == 7 and h[0] == "#":
|
||||
r, g, b = int(h[1:3], 16), int(h[3:5], 16), int(h[5:7], 16)
|
||||
return f"\033[38;2;{r};{g};{b}m"
|
||||
r, g, b = fallback_rgb
|
||||
return f"\033[38;2;{r};{g};{b}m"
|
||||
|
||||
dim = _hex_fg("banner_dim", (150, 150, 150))
|
||||
file_c = _hex_fg("session_label", (180, 160, 255))
|
||||
hunk = _hex_fg("session_border", (120, 120, 140))
|
||||
# minus/plus use background colors — derive from ui_error/ui_ok
|
||||
err_h = skin.get_color("ui_error", "#ef5350")
|
||||
ok_h = skin.get_color("ui_ok", "#4caf50")
|
||||
if err_h and len(err_h) == 7:
|
||||
er, eg, eb = int(err_h[1:3], 16), int(err_h[3:5], 16), int(err_h[5:7], 16)
|
||||
# Use a dark tinted version as background
|
||||
minus = f"\033[38;2;255;255;255;48;2;{max(er//2,20)};{max(eg//4,10)};{max(eb//4,10)}m"
|
||||
if ok_h and len(ok_h) == 7:
|
||||
or_, og, ob = int(ok_h[1:3], 16), int(ok_h[3:5], 16), int(ok_h[5:7], 16)
|
||||
plus = f"\033[38;2;255;255;255;48;2;{max(or_//4,10)};{max(og//2,20)};{max(ob//4,10)}m"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_diff_colors_cached = {
|
||||
"dim": dim, "file": file_c, "hunk": hunk,
|
||||
"minus": minus, "plus": plus,
|
||||
}
|
||||
return _diff_colors_cached
|
||||
|
||||
|
||||
def reset_diff_colors() -> None:
|
||||
"""Reset cached diff colors (call after /skin switch)."""
|
||||
global _diff_colors_cached
|
||||
_diff_colors_cached = None
|
||||
|
||||
|
||||
# Module-level helpers — each call resolves from the active skin lazily.
|
||||
def _diff_dim(): return _diff_ansi()["dim"]
|
||||
def _diff_file(): return _diff_ansi()["file"]
|
||||
def _diff_hunk(): return _diff_ansi()["hunk"]
|
||||
def _diff_minus(): return _diff_ansi()["minus"]
|
||||
def _diff_plus(): return _diff_ansi()["plus"]
|
||||
_MAX_INLINE_DIFF_FILES = 6
|
||||
_MAX_INLINE_DIFF_LINES = 80
|
||||
|
||||
|
|
@ -67,26 +129,6 @@ def _get_skin():
|
|||
return None
|
||||
|
||||
|
||||
def get_skin_faces(key: str, default: list) -> list:
|
||||
"""Get spinner face list from active skin, falling back to default."""
|
||||
skin = _get_skin()
|
||||
if skin:
|
||||
faces = skin.get_spinner_list(key)
|
||||
if faces:
|
||||
return faces
|
||||
return default
|
||||
|
||||
|
||||
def get_skin_verbs() -> list:
|
||||
"""Get thinking verbs from active skin."""
|
||||
skin = _get_skin()
|
||||
if skin:
|
||||
verbs = skin.get_spinner_list("thinking_verbs")
|
||||
if verbs:
|
||||
return verbs
|
||||
return KawaiiSpinner.THINKING_VERBS
|
||||
|
||||
|
||||
def get_skin_tool_prefix() -> str:
|
||||
"""Get tool output prefix character from active skin."""
|
||||
skin = _get_skin()
|
||||
|
|
@ -423,19 +465,19 @@ def _render_inline_unified_diff(diff: str) -> list[str]:
|
|||
if raw_line.startswith("+++ "):
|
||||
to_file = raw_line[4:].strip()
|
||||
if from_file or to_file:
|
||||
rendered.append(f"{_ANSI_FILE}{from_file or 'a/?'} → {to_file or 'b/?'}{_ANSI_RESET}")
|
||||
rendered.append(f"{_diff_file()}{from_file or 'a/?'} → {to_file or 'b/?'}{_ANSI_RESET}")
|
||||
continue
|
||||
if raw_line.startswith("@@"):
|
||||
rendered.append(f"{_ANSI_HUNK}{raw_line}{_ANSI_RESET}")
|
||||
rendered.append(f"{_diff_hunk()}{raw_line}{_ANSI_RESET}")
|
||||
continue
|
||||
if raw_line.startswith("-"):
|
||||
rendered.append(f"{_ANSI_MINUS}{raw_line}{_ANSI_RESET}")
|
||||
rendered.append(f"{_diff_minus()}{raw_line}{_ANSI_RESET}")
|
||||
continue
|
||||
if raw_line.startswith("+"):
|
||||
rendered.append(f"{_ANSI_PLUS}{raw_line}{_ANSI_RESET}")
|
||||
rendered.append(f"{_diff_plus()}{raw_line}{_ANSI_RESET}")
|
||||
continue
|
||||
if raw_line.startswith(" "):
|
||||
rendered.append(f"{_ANSI_DIM}{raw_line}{_ANSI_RESET}")
|
||||
rendered.append(f"{_diff_dim()}{raw_line}{_ANSI_RESET}")
|
||||
continue
|
||||
if raw_line:
|
||||
rendered.append(raw_line)
|
||||
|
|
@ -501,7 +543,7 @@ def _summarize_rendered_diff_sections(
|
|||
summary = f"… omitted {omitted_lines} diff line(s)"
|
||||
if omitted_files:
|
||||
summary += f" across {omitted_files} additional file(s)/section(s)"
|
||||
rendered.append(f"{_ANSI_HUNK}{summary}{_ANSI_RESET}")
|
||||
rendered.append(f"{_diff_hunk()}{summary}{_ANSI_RESET}")
|
||||
|
||||
return rendered
|
||||
|
||||
|
|
@ -723,46 +765,6 @@ class KawaiiSpinner:
|
|||
return False
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Kawaii face arrays (used by AIAgent._execute_tool_calls for spinner text)
|
||||
# =========================================================================
|
||||
|
||||
KAWAII_SEARCH = [
|
||||
"♪(´ε` )", "(。◕‿◕。)", "ヾ(^∇^)", "(◕ᴗ◕✿)", "( ˘▽˘)っ",
|
||||
"٩(◕‿◕。)۶", "(✿◠‿◠)", "♪~(´ε` )", "(ノ´ヮ`)ノ*:・゚✧", "\(◎o◎)/",
|
||||
]
|
||||
KAWAII_READ = [
|
||||
"φ(゜▽゜*)♪", "( ˘▽˘)っ", "(⌐■_■)", "٩(。•́‿•̀。)۶", "(◕‿◕✿)",
|
||||
"ヾ(@⌒ー⌒@)ノ", "(✧ω✧)", "♪(๑ᴖ◡ᴖ๑)♪", "(≧◡≦)", "( ´ ▽ ` )ノ",
|
||||
]
|
||||
KAWAII_TERMINAL = [
|
||||
"ヽ(>∀<☆)ノ", "(ノ°∀°)ノ", "٩(^ᴗ^)۶", "ヾ(⌐■_■)ノ♪", "(•̀ᴗ•́)و",
|
||||
"┗(^0^)┓", "(`・ω・´)", "\( ̄▽ ̄)/", "(ง •̀_•́)ง", "ヽ(´▽`)/",
|
||||
]
|
||||
KAWAII_BROWSER = [
|
||||
"(ノ°∀°)ノ", "(☞゚ヮ゚)☞", "( ͡° ͜ʖ ͡°)", "┌( ಠ_ಠ)┘", "(⊙_⊙)?",
|
||||
"ヾ(•ω•`)o", "( ̄ω ̄)", "( ˇωˇ )", "(ᵔᴥᵔ)", "\(◎o◎)/",
|
||||
]
|
||||
KAWAII_CREATE = [
|
||||
"✧*。٩(ˊᗜˋ*)و✧", "(ノ◕ヮ◕)ノ*:・゚✧", "ヽ(>∀<☆)ノ", "٩(♡ε♡)۶", "(◕‿◕)♡",
|
||||
"✿◕ ‿ ◕✿", "(*≧▽≦)", "ヾ(^-^)ノ", "(☆▽☆)", "°˖✧◝(⁰▿⁰)◜✧˖°",
|
||||
]
|
||||
KAWAII_SKILL = [
|
||||
"ヾ(@⌒ー⌒@)ノ", "(๑˃ᴗ˂)ﻭ", "٩(◕‿◕。)۶", "(✿╹◡╹)", "ヽ(・∀・)ノ",
|
||||
"(ノ´ヮ`)ノ*:・゚✧", "♪(๑ᴖ◡ᴖ๑)♪", "(◠‿◠)", "٩(ˊᗜˋ*)و", "(^▽^)",
|
||||
"ヾ(^∇^)", "(★ω★)/", "٩(。•́‿•̀。)۶", "(◕ᴗ◕✿)", "\(◎o◎)/",
|
||||
"(✧ω✧)", "ヽ(>∀<☆)ノ", "( ˘▽˘)っ", "(≧◡≦) ♡", "ヾ( ̄▽ ̄)",
|
||||
]
|
||||
KAWAII_THINK = [
|
||||
"(っ°Д°;)っ", "(;′⌒`)", "(・_・ヾ", "( ´_ゝ`)", "( ̄ヘ ̄)",
|
||||
"(。-`ω´-)", "( ˘︹˘ )", "(¬_¬)", "ヽ(ー_ー )ノ", "(;一_一)",
|
||||
]
|
||||
KAWAII_GENERIC = [
|
||||
"♪(´ε` )", "(◕‿◕✿)", "ヾ(^∇^)", "٩(◕‿◕。)۶", "(✿◠‿◠)",
|
||||
"(ノ´ヮ`)ノ*:・゚✧", "ヽ(>∀<☆)ノ", "(☆▽☆)", "( ˘▽˘)っ", "(≧◡≦)",
|
||||
]
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Cute tool message (completion line that replaces the spinner)
|
||||
# =========================================================================
|
||||
|
|
@ -970,22 +972,6 @@ _SKY_BLUE = "\033[38;5;117m"
|
|||
_ANSI_RESET = "\033[0m"
|
||||
|
||||
|
||||
def honcho_session_url(workspace: str, session_name: str) -> str:
|
||||
"""Build a Honcho app URL for a session."""
|
||||
from urllib.parse import quote
|
||||
return (
|
||||
f"https://app.honcho.dev/explore"
|
||||
f"?workspace={quote(workspace, safe='')}"
|
||||
f"&view=sessions"
|
||||
f"&session={quote(session_name, safe='')}"
|
||||
)
|
||||
|
||||
|
||||
def _osc8_link(url: str, text: str) -> str:
|
||||
"""OSC 8 terminal hyperlink (clickable in iTerm2, Ghostty, WezTerm, etc.)."""
|
||||
return f"\033]8;;{url}\033\\{text}\033]8;;\033\\"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Context pressure display (CLI user-facing warnings)
|
||||
# =========================================================================
|
||||
|
|
|
|||
|
|
@ -82,16 +82,6 @@ class ClassifiedError:
|
|||
def is_auth(self) -> bool:
|
||||
return self.reason in (FailoverReason.auth, FailoverReason.auth_permanent)
|
||||
|
||||
@property
|
||||
def is_transient(self) -> bool:
|
||||
"""Error is expected to resolve on retry (with or without backoff)."""
|
||||
return self.reason in (
|
||||
FailoverReason.rate_limit,
|
||||
FailoverReason.overloaded,
|
||||
FailoverReason.server_error,
|
||||
FailoverReason.timeout,
|
||||
FailoverReason.unknown,
|
||||
)
|
||||
|
||||
|
||||
# ── Provider-specific patterns ──────────────────────────────────────────
|
||||
|
|
@ -122,6 +112,7 @@ _RATE_LIMIT_PATTERNS = [
|
|||
"try again in",
|
||||
"please retry after",
|
||||
"resource_exhausted",
|
||||
"rate increased too quickly", # Alibaba/DashScope throttling
|
||||
]
|
||||
|
||||
# Usage-limit patterns that need disambiguation (could be billing OR rate_limit)
|
||||
|
|
@ -725,11 +716,16 @@ def _classify_by_message(
|
|||
)
|
||||
|
||||
# Auth patterns
|
||||
# Auth errors should NOT be retried directly — the credential is invalid and
|
||||
# retrying with the same key will always fail. Set retryable=False so the
|
||||
# caller triggers credential rotation (should_rotate_credential=True) or
|
||||
# provider fallback rather than an immediate retry loop.
|
||||
if any(p in error_msg for p in _AUTH_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.auth,
|
||||
retryable=True,
|
||||
retryable=False,
|
||||
should_rotate_credential=True,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
# Model not found patterns
|
||||
|
|
|
|||
|
|
@ -39,15 +39,6 @@ def _has_known_pricing(model_name: str, provider: str = None, base_url: str = No
|
|||
return has_known_pricing(model_name, provider=provider, base_url=base_url)
|
||||
|
||||
|
||||
def _get_pricing(model_name: str) -> Dict[str, float]:
|
||||
"""Look up pricing for a model. Uses fuzzy matching on model name.
|
||||
|
||||
Returns _DEFAULT_PRICING (zero cost) for unknown/custom models —
|
||||
we can't assume costs for self-hosted endpoints, local inference, etc.
|
||||
"""
|
||||
return get_pricing(model_name)
|
||||
|
||||
|
||||
def _estimate_cost(
|
||||
session_or_model: Dict[str, Any] | str,
|
||||
input_tokens: int = 0,
|
||||
|
|
|
|||
49
agent/manual_compression_feedback.py
Normal file
49
agent/manual_compression_feedback.py
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
"""User-facing summaries for manual compression commands."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Sequence
|
||||
|
||||
|
||||
def summarize_manual_compression(
|
||||
before_messages: Sequence[dict[str, Any]],
|
||||
after_messages: Sequence[dict[str, Any]],
|
||||
before_tokens: int,
|
||||
after_tokens: int,
|
||||
) -> dict[str, Any]:
|
||||
"""Return consistent user-facing feedback for manual compression."""
|
||||
before_count = len(before_messages)
|
||||
after_count = len(after_messages)
|
||||
noop = list(after_messages) == list(before_messages)
|
||||
|
||||
if noop:
|
||||
headline = f"No changes from compression: {before_count} messages"
|
||||
if after_tokens == before_tokens:
|
||||
token_line = (
|
||||
f"Rough transcript estimate: ~{before_tokens:,} tokens (unchanged)"
|
||||
)
|
||||
else:
|
||||
token_line = (
|
||||
f"Rough transcript estimate: ~{before_tokens:,} → "
|
||||
f"~{after_tokens:,} tokens"
|
||||
)
|
||||
else:
|
||||
headline = f"Compressed: {before_count} → {after_count} messages"
|
||||
token_line = (
|
||||
f"Rough transcript estimate: ~{before_tokens:,} → "
|
||||
f"~{after_tokens:,} tokens"
|
||||
)
|
||||
|
||||
note = None
|
||||
if not noop and after_count < before_count and after_tokens > before_tokens:
|
||||
note = (
|
||||
"Note: fewer messages can still raise this rough transcript estimate "
|
||||
"when compression rewrites the transcript into denser summaries."
|
||||
)
|
||||
|
||||
return {
|
||||
"noop": noop,
|
||||
"headline": headline,
|
||||
"token_line": token_line,
|
||||
"note": note,
|
||||
}
|
||||
|
|
@ -134,11 +134,6 @@ class MemoryManager:
|
|||
"""All registered providers in order."""
|
||||
return list(self._providers)
|
||||
|
||||
@property
|
||||
def provider_names(self) -> List[str]:
|
||||
"""Names of all registered providers."""
|
||||
return [p.name for p in self._providers]
|
||||
|
||||
def get_provider(self, name: str) -> Optional[MemoryProvider]:
|
||||
"""Get a provider by name, or None if not registered."""
|
||||
for p in self._providers:
|
||||
|
|
|
|||
|
|
@ -113,19 +113,31 @@ DEFAULT_CONTEXT_LENGTHS = {
|
|||
"deepseek": 128000,
|
||||
# Meta
|
||||
"llama": 131072,
|
||||
# Qwen
|
||||
# Qwen — specific model families before the catch-all.
|
||||
# Official docs: https://help.aliyun.com/zh/model-studio/developer-reference/
|
||||
"qwen3-coder-plus": 1000000, # 1M context
|
||||
"qwen3-coder": 262144, # 256K context
|
||||
"qwen": 131072,
|
||||
# MiniMax (lowercase — lookup lowercases model names at line 973)
|
||||
"minimax-m1-256k": 1000000,
|
||||
"minimax-m1-128k": 1000000,
|
||||
"minimax-m1-80k": 1000000,
|
||||
"minimax-m1-40k": 1000000,
|
||||
"minimax-m1": 1000000,
|
||||
"minimax-m2.5": 1048576,
|
||||
"minimax-m2.7": 1048576,
|
||||
"minimax": 1048576,
|
||||
# MiniMax — official docs: 204,800 context for all models
|
||||
# https://platform.minimax.io/docs/api-reference/text-anthropic-api
|
||||
"minimax": 204800,
|
||||
# GLM
|
||||
"glm": 202752,
|
||||
# xAI Grok — xAI /v1/models does not return context_length metadata,
|
||||
# so these hardcoded fallbacks prevent Hermes from probing-down to
|
||||
# the default 128k when the user points at https://api.x.ai/v1
|
||||
# via a custom provider. Values sourced from models.dev (2026-04).
|
||||
# Keys use substring matching (longest-first), so e.g. "grok-4.20"
|
||||
# matches "grok-4.20-0309-reasoning" / "-non-reasoning" / "-multi-agent-0309".
|
||||
"grok-code-fast": 256000, # grok-code-fast-1
|
||||
"grok-4-1-fast": 2000000, # grok-4-1-fast-(non-)reasoning
|
||||
"grok-2-vision": 8192, # grok-2-vision, -1212, -latest
|
||||
"grok-4-fast": 2000000, # grok-4-fast-(non-)reasoning
|
||||
"grok-4.20": 2000000, # grok-4.20-0309-(non-)reasoning, -multi-agent-0309
|
||||
"grok-4": 256000, # grok-4, grok-4-0709
|
||||
"grok-3": 131072, # grok-3, grok-3-mini, grok-3-fast, grok-3-mini-fast
|
||||
"grok-2": 131072, # grok-2, grok-2-1212, grok-2-latest
|
||||
"grok": 131072, # catch-all (grok-beta, unknown grok-*)
|
||||
# Kimi
|
||||
"kimi": 262144,
|
||||
# Arcee
|
||||
|
|
@ -136,7 +148,7 @@ DEFAULT_CONTEXT_LENGTHS = {
|
|||
"deepseek-ai/DeepSeek-V3.2": 65536,
|
||||
"moonshotai/Kimi-K2.5": 262144,
|
||||
"moonshotai/Kimi-K2-Thinking": 262144,
|
||||
"MiniMaxAI/MiniMax-M2.5": 1048576,
|
||||
"MiniMaxAI/MiniMax-M2.5": 204800,
|
||||
"XiaomiMiMo/MiMo-V2-Flash": 32768,
|
||||
"mimo-v2-pro": 1048576,
|
||||
"mimo-v2-omni": 1048576,
|
||||
|
|
@ -198,6 +210,7 @@ _URL_TO_PROVIDER: Dict[str, str] = {
|
|||
"models.github.ai": "copilot",
|
||||
"api.fireworks.ai": "fireworks",
|
||||
"opencode.ai": "opencode-go",
|
||||
"api.x.ai": "xai",
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -135,9 +135,6 @@ class ProviderInfo:
|
|||
doc: str = "" # documentation URL
|
||||
model_count: int = 0
|
||||
|
||||
def has_api_url(self) -> bool:
|
||||
return bool(self.api)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Provider ID mapping: Hermes ↔ models.dev
|
||||
|
|
@ -634,43 +631,6 @@ def get_provider_info(provider_id: str) -> Optional[ProviderInfo]:
|
|||
return _parse_provider_info(mdev_id, raw)
|
||||
|
||||
|
||||
def list_all_providers() -> Dict[str, ProviderInfo]:
|
||||
"""Return all providers from models.dev as {provider_id: ProviderInfo}.
|
||||
|
||||
Returns the full catalog — 109+ providers. For providers that have
|
||||
a Hermes alias, both the models.dev ID and the Hermes ID are included.
|
||||
"""
|
||||
data = fetch_models_dev()
|
||||
result: Dict[str, ProviderInfo] = {}
|
||||
|
||||
for pid, pdata in data.items():
|
||||
if isinstance(pdata, dict):
|
||||
info = _parse_provider_info(pid, pdata)
|
||||
result[pid] = info
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def get_providers_for_env_var(env_var: str) -> List[str]:
|
||||
"""Reverse lookup: find all providers that use a given env var.
|
||||
|
||||
Useful for auto-detection: "user has ANTHROPIC_API_KEY set, which
|
||||
providers does that enable?"
|
||||
|
||||
Returns list of models.dev provider IDs.
|
||||
"""
|
||||
data = fetch_models_dev()
|
||||
matches: List[str] = []
|
||||
|
||||
for pid, pdata in data.items():
|
||||
if isinstance(pdata, dict):
|
||||
env = pdata.get("env", [])
|
||||
if isinstance(env, list) and env_var in env:
|
||||
matches.append(pid)
|
||||
|
||||
return matches
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Model-level queries (rich ModelInfo)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -708,74 +668,3 @@ def get_model_info(
|
|||
return None
|
||||
|
||||
|
||||
def get_model_info_any_provider(model_id: str) -> Optional[ModelInfo]:
|
||||
"""Search all providers for a model by ID.
|
||||
|
||||
Useful when you have a full slug like "anthropic/claude-sonnet-4.6" or
|
||||
a bare name and want to find it anywhere. Checks Hermes-mapped providers
|
||||
first, then falls back to all models.dev providers.
|
||||
"""
|
||||
data = fetch_models_dev()
|
||||
|
||||
# Try Hermes-mapped providers first (more likely what the user wants)
|
||||
for hermes_id, mdev_id in PROVIDER_TO_MODELS_DEV.items():
|
||||
pdata = data.get(mdev_id)
|
||||
if not isinstance(pdata, dict):
|
||||
continue
|
||||
models = pdata.get("models", {})
|
||||
if not isinstance(models, dict):
|
||||
continue
|
||||
|
||||
raw = models.get(model_id)
|
||||
if isinstance(raw, dict):
|
||||
return _parse_model_info(model_id, raw, mdev_id)
|
||||
|
||||
# Case-insensitive
|
||||
model_lower = model_id.lower()
|
||||
for mid, mdata in models.items():
|
||||
if mid.lower() == model_lower and isinstance(mdata, dict):
|
||||
return _parse_model_info(mid, mdata, mdev_id)
|
||||
|
||||
# Fall back to ALL providers
|
||||
for pid, pdata in data.items():
|
||||
if pid in _get_reverse_mapping():
|
||||
continue # already checked
|
||||
if not isinstance(pdata, dict):
|
||||
continue
|
||||
models = pdata.get("models", {})
|
||||
if not isinstance(models, dict):
|
||||
continue
|
||||
|
||||
raw = models.get(model_id)
|
||||
if isinstance(raw, dict):
|
||||
return _parse_model_info(model_id, raw, pid)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def list_provider_model_infos(provider_id: str) -> List[ModelInfo]:
|
||||
"""Return all models for a provider as ModelInfo objects.
|
||||
|
||||
Filters out deprecated models by default.
|
||||
"""
|
||||
mdev_id = PROVIDER_TO_MODELS_DEV.get(provider_id, provider_id)
|
||||
|
||||
data = fetch_models_dev()
|
||||
pdata = data.get(mdev_id)
|
||||
if not isinstance(pdata, dict):
|
||||
return []
|
||||
|
||||
models = pdata.get("models", {})
|
||||
if not isinstance(models, dict):
|
||||
return []
|
||||
|
||||
result: List[ModelInfo] = []
|
||||
for mid, mdata in models.items():
|
||||
if not isinstance(mdata, dict):
|
||||
continue
|
||||
status = mdata.get("status", "")
|
||||
if status == "deprecated":
|
||||
continue
|
||||
result.append(_parse_model_info(mid, mdata, mdev_id))
|
||||
|
||||
return result
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ _CONTEXT_THREAT_PATTERNS = [
|
|||
(r'disregard\s+(your|all|any)\s+(instructions|rules|guidelines)', "disregard_rules"),
|
||||
(r'act\s+as\s+(if|though)\s+you\s+(have\s+no|don\'t\s+have)\s+(restrictions|limits|rules)', "bypass_restrictions"),
|
||||
(r'<!--[^>]*(?:ignore|override|system|secret|hidden)[^>]*-->', "html_comment_injection"),
|
||||
(r'<\s*div\s+style\s*=\s*["\'].*display\s*:\s*none', "hidden_div"),
|
||||
(r'<\s*div\s+style\s*=\s*["\'][\s\S]*?display\s*:\s*none', "hidden_div"),
|
||||
(r'translate\s+.*\s+into\s+.*\s+and\s+(execute|run|eval)', "translate_execute"),
|
||||
(r'curl\s+[^\n]*\$\{?\w*(KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL|API)', "exfil_curl"),
|
||||
(r'cat\s+[^\n]*(\.env|credentials|\.netrc|\.pgpass)', "read_secrets"),
|
||||
|
|
@ -356,6 +356,14 @@ PLATFORM_HINTS = {
|
|||
"MEDIA:/absolute/path/to/file in your response. Images (.jpg, .png, "
|
||||
".heic) appear as photos and other files arrive as attachments."
|
||||
),
|
||||
"weixin": (
|
||||
"You are on Weixin/WeChat. Markdown formatting is supported, so you may use it when "
|
||||
"it improves readability, but keep the message compact and chat-friendly. You can send media files natively: "
|
||||
"include MEDIA:/absolute/path/to/file in your response. Images are sent as native "
|
||||
"photos, videos play inline when supported, and other files arrive as downloadable "
|
||||
"documents. You can also include image URLs in markdown format  and they "
|
||||
"will be downloaded and sent as native media when possible."
|
||||
),
|
||||
}
|
||||
|
||||
CONTEXT_FILE_MAX_CHARS = 20_000
|
||||
|
|
@ -479,7 +487,7 @@ def _parse_skill_file(skill_file: Path) -> tuple[bool, dict, str]:
|
|||
(True, {}, "") to err on the side of showing the skill.
|
||||
"""
|
||||
try:
|
||||
raw = skill_file.read_text(encoding="utf-8")[:2000]
|
||||
raw = skill_file.read_text(encoding="utf-8")
|
||||
frontmatter, _ = parse_frontmatter(raw)
|
||||
|
||||
if not skill_matches_platform(frontmatter):
|
||||
|
|
@ -487,21 +495,10 @@ def _parse_skill_file(skill_file: Path) -> tuple[bool, dict, str]:
|
|||
|
||||
return True, frontmatter, extract_skill_description(frontmatter)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to parse skill file %s: %s", skill_file, e)
|
||||
logger.warning("Failed to parse skill file %s: %s", skill_file, e)
|
||||
return True, {}, ""
|
||||
|
||||
|
||||
def _read_skill_conditions(skill_file: Path) -> dict:
|
||||
"""Extract conditional activation fields from SKILL.md frontmatter."""
|
||||
try:
|
||||
raw = skill_file.read_text(encoding="utf-8")[:2000]
|
||||
frontmatter, _ = parse_frontmatter(raw)
|
||||
return extract_skill_conditions(frontmatter)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to read skill conditions from %s: %s", skill_file, e)
|
||||
return {}
|
||||
|
||||
|
||||
def _skill_should_show(
|
||||
conditions: dict,
|
||||
available_tools: "set[str] | None",
|
||||
|
|
@ -561,9 +558,10 @@ def build_skills_system_prompt(
|
|||
# ── Layer 1: in-process LRU cache ─────────────────────────────────
|
||||
# Include the resolved platform so per-platform disabled-skill lists
|
||||
# produce distinct cache entries (gateway serves multiple platforms).
|
||||
from gateway.session_context import get_session_env
|
||||
_platform_hint = (
|
||||
os.environ.get("HERMES_PLATFORM")
|
||||
or os.environ.get("HERMES_SESSION_PLATFORM")
|
||||
or get_session_env("HERMES_SESSION_PLATFORM")
|
||||
or ""
|
||||
)
|
||||
cache_key = (
|
||||
|
|
|
|||
|
|
@ -97,8 +97,12 @@ def parse_rate_limit_headers(
|
|||
|
||||
Returns None if no rate limit headers are present.
|
||||
"""
|
||||
# Normalize to lowercase so lookups work regardless of how the server
|
||||
# capitalises headers (HTTP header names are case-insensitive per RFC 7230).
|
||||
lowered = {k.lower(): v for k, v in headers.items()}
|
||||
|
||||
# Quick check: at least one rate limit header must exist
|
||||
has_any = any(k.lower().startswith("x-ratelimit-") for k in headers)
|
||||
has_any = any(k.startswith("x-ratelimit-") for k in lowered)
|
||||
if not has_any:
|
||||
return None
|
||||
|
||||
|
|
@ -109,9 +113,9 @@ def parse_rate_limit_headers(
|
|||
# resource="tokens", suffix="-1h" -> per-hour
|
||||
tag = f"{resource}{suffix}"
|
||||
return RateLimitBucket(
|
||||
limit=_safe_int(headers.get(f"x-ratelimit-limit-{tag}")),
|
||||
remaining=_safe_int(headers.get(f"x-ratelimit-remaining-{tag}")),
|
||||
reset_seconds=_safe_float(headers.get(f"x-ratelimit-reset-{tag}")),
|
||||
limit=_safe_int(lowered.get(f"x-ratelimit-limit-{tag}")),
|
||||
remaining=_safe_int(lowered.get(f"x-ratelimit-remaining-{tag}")),
|
||||
reset_seconds=_safe_float(lowered.get(f"x-ratelimit-reset-{tag}")),
|
||||
captured_at=now,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -168,7 +168,7 @@ def _build_skill_message(
|
|||
subdir_path = skill_dir / subdir
|
||||
if subdir_path.exists():
|
||||
for f in sorted(subdir_path.rglob("*")):
|
||||
if f.is_file():
|
||||
if f.is_file() and not f.is_symlink():
|
||||
rel = str(f.relative_to(skill_dir))
|
||||
supporting.append(rel)
|
||||
|
||||
|
|
|
|||
|
|
@ -145,10 +145,11 @@ def get_disabled_skill_names(platform: str | None = None) -> Set[str]:
|
|||
if not isinstance(skills_cfg, dict):
|
||||
return set()
|
||||
|
||||
from gateway.session_context import get_session_env
|
||||
resolved_platform = (
|
||||
platform
|
||||
or os.getenv("HERMES_PLATFORM")
|
||||
or os.getenv("HERMES_SESSION_PLATFORM")
|
||||
or get_session_env("HERMES_SESSION_PLATFORM")
|
||||
)
|
||||
if resolved_platform:
|
||||
platform_disabled = (skills_cfg.get("platform_disabled") or {}).get(
|
||||
|
|
|
|||
|
|
@ -181,6 +181,7 @@ def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any
|
|||
"api_mode": runtime.get("api_mode"),
|
||||
"command": runtime.get("command"),
|
||||
"args": list(runtime.get("args") or []),
|
||||
"credential_pool": runtime.get("credential_pool"),
|
||||
},
|
||||
"label": f"smart route → {route.get('model')} ({runtime.get('provider')})",
|
||||
"signature": (
|
||||
|
|
|
|||
|
|
@ -595,30 +595,6 @@ def get_pricing(
|
|||
}
|
||||
|
||||
|
||||
def estimate_cost_usd(
|
||||
model: str,
|
||||
input_tokens: int,
|
||||
output_tokens: int,
|
||||
*,
|
||||
provider: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
) -> float:
|
||||
"""Backward-compatible helper for legacy callers.
|
||||
|
||||
This uses non-cached input/output only. New code should call
|
||||
`estimate_usage_cost()` with canonical usage buckets.
|
||||
"""
|
||||
result = estimate_usage_cost(
|
||||
model,
|
||||
CanonicalUsage(input_tokens=input_tokens, output_tokens=output_tokens),
|
||||
provider=provider,
|
||||
base_url=base_url,
|
||||
api_key=api_key,
|
||||
)
|
||||
return float(result.amount_usd or _ZERO)
|
||||
|
||||
|
||||
def format_duration_compact(seconds: float) -> str:
|
||||
if seconds < 60:
|
||||
return f"{seconds:.0f}s"
|
||||
|
|
|
|||
|
|
@ -480,6 +480,12 @@ agent:
|
|||
# Fires once per run when inactivity reaches this threshold (seconds).
|
||||
# Set to 0 to disable the warning.
|
||||
# gateway_timeout_warning: 900
|
||||
|
||||
# Graceful drain timeout for gateway stop/restart (seconds).
|
||||
# The gateway stops accepting new work, waits for in-flight agents to
|
||||
# finish, then interrupts anything still running after this timeout.
|
||||
# 0 = no drain, interrupt immediately.
|
||||
# restart_drain_timeout: 60
|
||||
|
||||
# Enable verbose logging
|
||||
verbose: false
|
||||
|
|
@ -582,7 +588,7 @@ platform_toolsets:
|
|||
# skills_hub - skill_hub (search/install/manage from online registries — user-driven only)
|
||||
# moa - mixture_of_agents (requires OPENROUTER_API_KEY)
|
||||
# todo - todo (in-memory task planning, no deps)
|
||||
# tts - text_to_speech (Edge TTS free, or ELEVENLABS/OPENAI/MINIMAX key)
|
||||
# tts - text_to_speech (Edge TTS free, or ELEVENLABS/OPENAI/MINIMAX/MISTRAL key)
|
||||
# cronjob - cronjob (create/list/update/pause/resume/run/remove scheduled tasks)
|
||||
# rl - rl_list_environments, rl_start_training, etc. (requires TINKER_API_KEY)
|
||||
#
|
||||
|
|
@ -611,7 +617,7 @@ platform_toolsets:
|
|||
# todo - Task planning and tracking for multi-step work
|
||||
# memory - Persistent memory across sessions (personal notes + user profile)
|
||||
# session_search - Search and recall past conversations (FTS5 + Gemini Flash summarization)
|
||||
# tts - Text-to-speech (Edge TTS free, ElevenLabs, OpenAI, MiniMax)
|
||||
# tts - Text-to-speech (Edge TTS free, ElevenLabs, OpenAI, MiniMax, Mistral)
|
||||
# cronjob - Schedule and manage automated tasks (CLI-only)
|
||||
# rl - RL training tools (Tinker-Atropos)
|
||||
#
|
||||
|
|
@ -684,7 +690,11 @@ platform_toolsets:
|
|||
stt:
|
||||
enabled: true
|
||||
# provider: "local" # auto-detected if omitted
|
||||
model: "whisper-1" # whisper-1 (cheapest) | gpt-4o-mini-transcribe | gpt-4o-transcribe
|
||||
local:
|
||||
model: "base" # tiny | base | small | medium | large-v3 | turbo
|
||||
# language: "" # auto-detect; set to "en", "es", "fr", etc. to force
|
||||
openai:
|
||||
model: "whisper-1" # whisper-1 | gpt-4o-mini-transcribe | gpt-4o-transcribe
|
||||
# mistral:
|
||||
# model: "voxtral-mini-latest" # voxtral-mini-latest | voxtral-mini-2602
|
||||
|
||||
|
|
|
|||
17
cron/jobs.py
17
cron/jobs.py
|
|
@ -31,7 +31,7 @@ except ImportError:
|
|||
# Configuration
|
||||
# =============================================================================
|
||||
|
||||
HERMES_DIR = get_hermes_home()
|
||||
HERMES_DIR = get_hermes_home().resolve()
|
||||
CRON_DIR = HERMES_DIR / "cron"
|
||||
JOBS_FILE = CRON_DIR / "jobs.json"
|
||||
OUTPUT_DIR = CRON_DIR / "output"
|
||||
|
|
@ -338,10 +338,12 @@ def load_jobs() -> List[Dict[str, Any]]:
|
|||
save_jobs(jobs)
|
||||
logger.warning("Auto-repaired jobs.json (had invalid control characters)")
|
||||
return jobs
|
||||
except Exception:
|
||||
return []
|
||||
except IOError:
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.error("Failed to auto-repair jobs.json: %s", e)
|
||||
raise RuntimeError(f"Cron database corrupted and unrepairable: {e}") from e
|
||||
except IOError as e:
|
||||
logger.error("IOError reading jobs.json: %s", e)
|
||||
raise RuntimeError(f"Failed to read cron database: {e}") from e
|
||||
|
||||
|
||||
def save_jobs(jobs: List[Dict[str, Any]]):
|
||||
|
|
@ -452,6 +454,7 @@ def create_job(
|
|||
"last_run_at": None,
|
||||
"last_status": None,
|
||||
"last_error": None,
|
||||
"last_delivery_error": None,
|
||||
# Delivery configuration
|
||||
"deliver": deliver,
|
||||
"origin": origin, # Tracks where job was created for "origin" delivery
|
||||
|
|
@ -620,8 +623,8 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
|
|||
|
||||
save_jobs(jobs)
|
||||
return
|
||||
|
||||
save_jobs(jobs)
|
||||
|
||||
logger.warning("mark_job_run: job_id %s not found, skipping save", job_id)
|
||||
|
||||
|
||||
def advance_next_run(job_id: str) -> bool:
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ logger = logging.getLogger(__name__)
|
|||
_KNOWN_DELIVERY_PLATFORMS = frozenset({
|
||||
"telegram", "discord", "slack", "whatsapp", "signal",
|
||||
"matrix", "mattermost", "homeassistant", "dingtalk", "feishu",
|
||||
"wecom", "sms", "email", "webhook", "bluebubbles",
|
||||
"wecom", "weixin", "sms", "email", "webhook", "bluebubbles",
|
||||
})
|
||||
|
||||
from cron.jobs import get_due_jobs, mark_job_run, save_job_output, advance_next_run
|
||||
|
|
@ -234,6 +234,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
|
|||
"dingtalk": Platform.DINGTALK,
|
||||
"feishu": Platform.FEISHU,
|
||||
"wecom": Platform.WECOM,
|
||||
"weixin": Platform.WEIXIN,
|
||||
"email": Platform.EMAIL,
|
||||
"sms": Platform.SMS,
|
||||
"bluebubbles": Platform.BLUEBUBBLES,
|
||||
|
|
@ -346,7 +347,42 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
|
|||
return None
|
||||
|
||||
|
||||
_SCRIPT_TIMEOUT = 120 # seconds
|
||||
_DEFAULT_SCRIPT_TIMEOUT = 120 # seconds
|
||||
# Backward-compatible module override used by tests and emergency monkeypatches.
|
||||
_SCRIPT_TIMEOUT = _DEFAULT_SCRIPT_TIMEOUT
|
||||
|
||||
|
||||
def _get_script_timeout() -> int:
|
||||
"""Resolve cron pre-run script timeout from module/env/config with a safe default."""
|
||||
if _SCRIPT_TIMEOUT != _DEFAULT_SCRIPT_TIMEOUT:
|
||||
try:
|
||||
timeout = int(float(_SCRIPT_TIMEOUT))
|
||||
if timeout > 0:
|
||||
return timeout
|
||||
except Exception:
|
||||
logger.warning("Invalid patched _SCRIPT_TIMEOUT=%r; using env/config/default", _SCRIPT_TIMEOUT)
|
||||
|
||||
env_value = os.getenv("HERMES_CRON_SCRIPT_TIMEOUT", "").strip()
|
||||
if env_value:
|
||||
try:
|
||||
timeout = int(float(env_value))
|
||||
if timeout > 0:
|
||||
return timeout
|
||||
except Exception:
|
||||
logger.warning("Invalid HERMES_CRON_SCRIPT_TIMEOUT=%r; using config/default", env_value)
|
||||
|
||||
try:
|
||||
cfg = load_config() or {}
|
||||
cron_cfg = cfg.get("cron", {}) if isinstance(cfg, dict) else {}
|
||||
configured = cron_cfg.get("script_timeout_seconds")
|
||||
if configured is not None:
|
||||
timeout = int(float(configured))
|
||||
if timeout > 0:
|
||||
return timeout
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to load cron script timeout from config: %s", exc)
|
||||
|
||||
return _DEFAULT_SCRIPT_TIMEOUT
|
||||
|
||||
|
||||
def _run_job_script(script_path: str) -> tuple[bool, str]:
|
||||
|
|
@ -393,17 +429,27 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:
|
|||
if not path.is_file():
|
||||
return False, f"Script path is not a file: {path}"
|
||||
|
||||
script_timeout = _get_script_timeout()
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[sys.executable, str(path)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=_SCRIPT_TIMEOUT,
|
||||
timeout=script_timeout,
|
||||
cwd=str(path.parent),
|
||||
)
|
||||
stdout = (result.stdout or "").strip()
|
||||
stderr = (result.stderr or "").strip()
|
||||
|
||||
# Redact secrets from both stdout and stderr before any return path.
|
||||
try:
|
||||
from agent.redact import redact_sensitive_text
|
||||
stdout = redact_sensitive_text(stdout)
|
||||
stderr = redact_sensitive_text(stderr)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if result.returncode != 0:
|
||||
parts = [f"Script exited with code {result.returncode}"]
|
||||
if stderr:
|
||||
|
|
@ -412,17 +458,10 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:
|
|||
parts.append(f"stdout:\n{stdout}")
|
||||
return False, "\n".join(parts)
|
||||
|
||||
# Redact any secrets that may appear in script output before
|
||||
# they are injected into the LLM prompt context.
|
||||
try:
|
||||
from agent.redact import redact_sensitive_text
|
||||
stdout = redact_sensitive_text(stdout)
|
||||
except Exception:
|
||||
pass
|
||||
return True, stdout
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
return False, f"Script timed out after {_SCRIPT_TIMEOUT}s: {path}"
|
||||
return False, f"Script timed out after {script_timeout}s: {path}"
|
||||
except Exception as exc:
|
||||
return False, f"Script execution failed: {exc}"
|
||||
|
||||
|
|
@ -646,6 +685,24 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
|||
},
|
||||
)
|
||||
|
||||
fallback_model = _cfg.get("fallback_providers") or _cfg.get("fallback_model") or None
|
||||
credential_pool = None
|
||||
runtime_provider = str(turn_route["runtime"].get("provider") or "").strip().lower()
|
||||
if runtime_provider:
|
||||
try:
|
||||
from agent.credential_pool import load_pool
|
||||
pool = load_pool(runtime_provider)
|
||||
if pool.has_credentials():
|
||||
credential_pool = pool
|
||||
logger.info(
|
||||
"Job '%s': loaded credential pool for provider %s with %d entries",
|
||||
job_id,
|
||||
runtime_provider,
|
||||
len(pool.entries()),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Job '%s': failed to load credential pool for %s: %s", job_id, runtime_provider, e)
|
||||
|
||||
agent = AIAgent(
|
||||
model=turn_route["model"],
|
||||
api_key=turn_route["runtime"].get("api_key"),
|
||||
|
|
@ -657,6 +714,8 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
|||
max_iterations=max_iterations,
|
||||
reasoning_config=reasoning_config,
|
||||
prefill_messages=prefill_messages,
|
||||
fallback_model=fallback_model,
|
||||
credential_pool=credential_pool,
|
||||
providers_allowed=pr.get("only"),
|
||||
providers_ignored=pr.get("ignore"),
|
||||
providers_order=pr.get("order"),
|
||||
|
|
@ -711,7 +770,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
|||
_cron_pool.shutdown(wait=False, cancel_futures=True)
|
||||
raise
|
||||
finally:
|
||||
_cron_pool.shutdown(wait=False)
|
||||
_cron_pool.shutdown(wait=False, cancel_futures=True)
|
||||
|
||||
if _inactivity_timeout:
|
||||
# Build diagnostic summary from the agent's activity tracker.
|
||||
|
|
|
|||
|
|
@ -9,7 +9,10 @@ INSTALL_DIR="/opt/hermes"
|
|||
# (cache/images, cache/audio, platforms/whatsapp, etc.) are created on
|
||||
# demand by the application — don't pre-create them here so new installs
|
||||
# get the consolidated layout from get_hermes_dir().
|
||||
mkdir -p "$HERMES_HOME"/{cron,sessions,logs,hooks,memories,skills}
|
||||
# The "home/" subdirectory is a per-profile HOME for subprocesses (git,
|
||||
# ssh, gh, npm …). Without it those tools write to /root which is
|
||||
# ephemeral and shared across profiles. See issue #4426.
|
||||
mkdir -p "$HERMES_HOME"/{cron,sessions,logs,hooks,memories,skills,skins,plans,workspace,home}
|
||||
|
||||
# .env
|
||||
if [ ! -f "$HERMES_HOME/.env" ]; then
|
||||
|
|
|
|||
|
|
@ -49,6 +49,8 @@ class HermesToolCallParser(ToolCallParser):
|
|||
continue
|
||||
|
||||
tc_data = json.loads(raw_json)
|
||||
if "name" not in tc_data:
|
||||
continue
|
||||
tool_calls.append(
|
||||
ChatCompletionMessageToolCall(
|
||||
id=f"call_{uuid.uuid4().hex[:8]}",
|
||||
|
|
|
|||
|
|
@ -89,6 +89,8 @@ class MistralToolCallParser(ToolCallParser):
|
|||
parsed = [parsed]
|
||||
|
||||
for tc in parsed:
|
||||
if "name" not in tc:
|
||||
continue
|
||||
args = tc.get("arguments", {})
|
||||
if isinstance(args, dict):
|
||||
args = json.dumps(args, ensure_ascii=False)
|
||||
|
|
|
|||
|
|
@ -76,10 +76,15 @@ def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
|
|||
except Exception as e:
|
||||
logger.warning("Channel directory: failed to build %s: %s", platform.value, e)
|
||||
|
||||
# Telegram, WhatsApp & Signal can't enumerate chats -- pull from session history
|
||||
for plat_name in ("telegram", "whatsapp", "signal", "email", "sms", "bluebubbles"):
|
||||
if plat_name not in platforms:
|
||||
platforms[plat_name] = _build_from_sessions(plat_name)
|
||||
# Platforms that don't support direct channel enumeration get session-based
|
||||
# discovery automatically. Skip infrastructure entries that aren't messaging
|
||||
# platforms — everything else falls through to _build_from_sessions().
|
||||
_SKIP_SESSION_DISCOVERY = frozenset({"local", "api_server", "webhook"})
|
||||
for plat in Platform:
|
||||
plat_name = plat.value
|
||||
if plat_name in _SKIP_SESSION_DISCOVERY or plat_name in platforms:
|
||||
continue
|
||||
platforms[plat_name] = _build_from_sessions(plat_name)
|
||||
|
||||
directory = {
|
||||
"updated_at": datetime.now().isoformat(),
|
||||
|
|
|
|||
|
|
@ -63,6 +63,7 @@ class Platform(Enum):
|
|||
WEBHOOK = "webhook"
|
||||
FEISHU = "feishu"
|
||||
WECOM = "wecom"
|
||||
WEIXIN = "weixin"
|
||||
BLUEBUBBLES = "bluebubbles"
|
||||
|
||||
|
||||
|
|
@ -261,6 +262,11 @@ class GatewayConfig:
|
|||
for platform, config in self.platforms.items():
|
||||
if not config.enabled:
|
||||
continue
|
||||
# Weixin requires both a token and an account_id
|
||||
if platform == Platform.WEIXIN:
|
||||
if config.extra.get("account_id") and (config.token or config.extra.get("token")):
|
||||
connected.append(platform)
|
||||
continue
|
||||
# Platforms that use token/api_key auth
|
||||
if config.token or config.api_key:
|
||||
connected.append(platform)
|
||||
|
|
@ -536,6 +542,8 @@ def load_gateway_config() -> GatewayConfig:
|
|||
bridged["free_response_channels"] = platform_cfg["free_response_channels"]
|
||||
if "mention_patterns" in platform_cfg:
|
||||
bridged["mention_patterns"] = platform_cfg["mention_patterns"]
|
||||
if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
|
||||
bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
|
||||
if not bridged:
|
||||
continue
|
||||
plat_data = platforms_data.setdefault(plat.value, {})
|
||||
|
|
@ -581,6 +589,12 @@ def load_gateway_config() -> GatewayConfig:
|
|||
if isinstance(ic, list):
|
||||
ic = ",".join(str(v) for v in ic)
|
||||
os.environ["DISCORD_IGNORED_CHANNELS"] = str(ic)
|
||||
# allowed_channels: if set, bot ONLY responds in these channels (whitelist)
|
||||
ac = discord_cfg.get("allowed_channels")
|
||||
if ac is not None and not os.getenv("DISCORD_ALLOWED_CHANNELS"):
|
||||
if isinstance(ac, list):
|
||||
ac = ",".join(str(v) for v in ac)
|
||||
os.environ["DISCORD_ALLOWED_CHANNELS"] = str(ac)
|
||||
# no_thread_channels: channels where bot responds directly without creating thread
|
||||
ntc = discord_cfg.get("no_thread_channels")
|
||||
if ntc is not None and not os.getenv("DISCORD_NO_THREAD_CHANNELS"):
|
||||
|
|
@ -628,6 +642,8 @@ def load_gateway_config() -> GatewayConfig:
|
|||
os.environ["MATRIX_FREE_RESPONSE_ROOMS"] = str(frc)
|
||||
if "auto_thread" in matrix_cfg and not os.getenv("MATRIX_AUTO_THREAD"):
|
||||
os.environ["MATRIX_AUTO_THREAD"] = str(matrix_cfg["auto_thread"]).lower()
|
||||
if "dm_mention_threads" in matrix_cfg and not os.getenv("MATRIX_DM_MENTION_THREADS"):
|
||||
os.environ["MATRIX_DM_MENTION_THREADS"] = str(matrix_cfg["dm_mention_threads"]).lower()
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
|
|
@ -666,6 +682,7 @@ def load_gateway_config() -> GatewayConfig:
|
|||
Platform.SLACK: "SLACK_BOT_TOKEN",
|
||||
Platform.MATTERMOST: "MATTERMOST_TOKEN",
|
||||
Platform.MATRIX: "MATRIX_ACCESS_TOKEN",
|
||||
Platform.WEIXIN: "WEIXIN_TOKEN",
|
||||
}
|
||||
for platform, pconfig in config.platforms.items():
|
||||
if not pconfig.enabled:
|
||||
|
|
@ -970,6 +987,44 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
|||
name=os.getenv("WECOM_HOME_CHANNEL_NAME", "Home"),
|
||||
)
|
||||
|
||||
# Weixin (personal WeChat via iLink Bot API)
|
||||
weixin_token = os.getenv("WEIXIN_TOKEN")
|
||||
weixin_account_id = os.getenv("WEIXIN_ACCOUNT_ID")
|
||||
if weixin_token or weixin_account_id:
|
||||
if Platform.WEIXIN not in config.platforms:
|
||||
config.platforms[Platform.WEIXIN] = PlatformConfig()
|
||||
config.platforms[Platform.WEIXIN].enabled = True
|
||||
if weixin_token:
|
||||
config.platforms[Platform.WEIXIN].token = weixin_token
|
||||
extra = config.platforms[Platform.WEIXIN].extra
|
||||
if weixin_account_id:
|
||||
extra["account_id"] = weixin_account_id
|
||||
weixin_base_url = os.getenv("WEIXIN_BASE_URL", "").strip()
|
||||
if weixin_base_url:
|
||||
extra["base_url"] = weixin_base_url.rstrip("/")
|
||||
weixin_cdn_base_url = os.getenv("WEIXIN_CDN_BASE_URL", "").strip()
|
||||
if weixin_cdn_base_url:
|
||||
extra["cdn_base_url"] = weixin_cdn_base_url.rstrip("/")
|
||||
weixin_dm_policy = os.getenv("WEIXIN_DM_POLICY", "").strip().lower()
|
||||
if weixin_dm_policy:
|
||||
extra["dm_policy"] = weixin_dm_policy
|
||||
weixin_group_policy = os.getenv("WEIXIN_GROUP_POLICY", "").strip().lower()
|
||||
if weixin_group_policy:
|
||||
extra["group_policy"] = weixin_group_policy
|
||||
weixin_allowed_users = os.getenv("WEIXIN_ALLOWED_USERS", "").strip()
|
||||
if weixin_allowed_users:
|
||||
extra["allow_from"] = weixin_allowed_users
|
||||
weixin_group_allowed_users = os.getenv("WEIXIN_GROUP_ALLOWED_USERS", "").strip()
|
||||
if weixin_group_allowed_users:
|
||||
extra["group_allow_from"] = weixin_group_allowed_users
|
||||
weixin_home = os.getenv("WEIXIN_HOME_CHANNEL", "").strip()
|
||||
if weixin_home:
|
||||
config.platforms[Platform.WEIXIN].home_channel = HomeChannel(
|
||||
platform=Platform.WEIXIN,
|
||||
chat_id=weixin_home,
|
||||
name=os.getenv("WEIXIN_HOME_CHANNEL_NAME", "Home"),
|
||||
)
|
||||
|
||||
# BlueBubbles (iMessage)
|
||||
bluebubbles_server_url = os.getenv("BLUEBUBBLES_SERVER_URL")
|
||||
bluebubbles_password = os.getenv("BLUEBUBBLES_PASSWORD")
|
||||
|
|
|
|||
|
|
@ -124,53 +124,6 @@ class DeliveryRouter:
|
|||
self.adapters = adapters or {}
|
||||
self.output_dir = get_hermes_home() / "cron" / "output"
|
||||
|
||||
def resolve_targets(
|
||||
self,
|
||||
deliver: Union[str, List[str]],
|
||||
origin: Optional[SessionSource] = None
|
||||
) -> List[DeliveryTarget]:
|
||||
"""
|
||||
Resolve delivery specification to concrete targets.
|
||||
|
||||
Args:
|
||||
deliver: Delivery spec - "origin", "telegram", ["local", "discord"], etc.
|
||||
origin: The source where the request originated (for "origin" target)
|
||||
|
||||
Returns:
|
||||
List of resolved delivery targets
|
||||
"""
|
||||
if isinstance(deliver, str):
|
||||
deliver = [deliver]
|
||||
|
||||
targets = []
|
||||
seen_platforms = set()
|
||||
|
||||
for target_str in deliver:
|
||||
target = DeliveryTarget.parse(target_str, origin)
|
||||
|
||||
# Resolve home channel if needed
|
||||
if target.chat_id is None and target.platform != Platform.LOCAL:
|
||||
home = self.config.get_home_channel(target.platform)
|
||||
if home:
|
||||
target.chat_id = home.chat_id
|
||||
else:
|
||||
# No home channel configured, skip this platform
|
||||
continue
|
||||
|
||||
# Deduplicate
|
||||
key = (target.platform, target.chat_id, target.thread_id)
|
||||
if key not in seen_platforms:
|
||||
seen_platforms.add(key)
|
||||
targets.append(target)
|
||||
|
||||
# Always include local if configured
|
||||
if self.config.always_log_local:
|
||||
local_key = (Platform.LOCAL, None, None)
|
||||
if local_key not in seen_platforms:
|
||||
targets.append(DeliveryTarget(platform=Platform.LOCAL))
|
||||
|
||||
return targets
|
||||
|
||||
async def deliver(
|
||||
self,
|
||||
content: str,
|
||||
|
|
@ -299,19 +252,5 @@ class DeliveryRouter:
|
|||
return await adapter.send(target.chat_id, content, metadata=send_metadata or None)
|
||||
|
||||
|
||||
def parse_deliver_spec(
|
||||
deliver: Optional[Union[str, List[str]]],
|
||||
origin: Optional[SessionSource] = None,
|
||||
default: str = "origin"
|
||||
) -> Union[str, List[str]]:
|
||||
"""
|
||||
Normalize a delivery specification.
|
||||
|
||||
If None or empty, returns the default.
|
||||
"""
|
||||
if not deliver:
|
||||
return default
|
||||
return deliver
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -20,10 +20,13 @@ Requires:
|
|||
"""
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import socket as _socket
|
||||
import re
|
||||
import sqlite3
|
||||
import time
|
||||
import uuid
|
||||
|
|
@ -40,6 +43,7 @@ from gateway.config import Platform, PlatformConfig
|
|||
from gateway.platforms.base import (
|
||||
BasePlatformAdapter,
|
||||
SendResult,
|
||||
is_network_accessible,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -282,6 +286,24 @@ def _make_request_fingerprint(body: Dict[str, Any], keys: List[str]) -> str:
|
|||
return sha256(repr(subset).encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def _derive_chat_session_id(
|
||||
system_prompt: Optional[str],
|
||||
first_user_message: str,
|
||||
) -> str:
|
||||
"""Derive a stable session ID from the conversation's first user message.
|
||||
|
||||
OpenAI-compatible frontends (Open WebUI, LibreChat, etc.) send the full
|
||||
conversation history with every request. The system prompt and first user
|
||||
message are constant across all turns of the same conversation, so hashing
|
||||
them produces a deterministic session ID that lets the API server reuse
|
||||
the same Hermes session (and therefore the same Docker container sandbox
|
||||
directory) across turns.
|
||||
"""
|
||||
seed = f"{system_prompt or ''}\n{first_user_message}"
|
||||
digest = hashlib.sha256(seed.encode("utf-8")).hexdigest()[:16]
|
||||
return f"api-{digest}"
|
||||
|
||||
|
||||
class APIServerAdapter(BasePlatformAdapter):
|
||||
"""
|
||||
OpenAI-compatible HTTP API server adapter.
|
||||
|
|
@ -386,7 +408,8 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
Validate Bearer token from Authorization header.
|
||||
|
||||
Returns None if auth is OK, or a 401 web.Response on failure.
|
||||
If no API key is configured, all requests are allowed.
|
||||
If no API key is configured, all requests are allowed (only when API
|
||||
server is local).
|
||||
"""
|
||||
if not self._api_key:
|
||||
return None # No key configured — allow all (local-only use)
|
||||
|
|
@ -554,8 +577,32 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
|
||||
# Allow caller to continue an existing session by passing X-Hermes-Session-Id.
|
||||
# When provided, history is loaded from state.db instead of from the request body.
|
||||
#
|
||||
# Security: session continuation exposes conversation history, so it is
|
||||
# only allowed when the API key is configured and the request is
|
||||
# authenticated. Without this gate, any unauthenticated client could
|
||||
# read arbitrary session history by guessing/enumerating session IDs.
|
||||
provided_session_id = request.headers.get("X-Hermes-Session-Id", "").strip()
|
||||
if provided_session_id:
|
||||
if not self._api_key:
|
||||
logger.warning(
|
||||
"Session continuation via X-Hermes-Session-Id rejected: "
|
||||
"no API key configured. Set API_SERVER_KEY to enable "
|
||||
"session continuity."
|
||||
)
|
||||
return web.json_response(
|
||||
_openai_error(
|
||||
"Session continuation requires API key authentication. "
|
||||
"Configure API_SERVER_KEY to enable this feature."
|
||||
),
|
||||
status=403,
|
||||
)
|
||||
# Sanitize: reject control characters that could enable header injection.
|
||||
if re.search(r'[\r\n\x00]', provided_session_id):
|
||||
return web.json_response(
|
||||
{"error": {"message": "Invalid session ID", "type": "invalid_request_error"}},
|
||||
status=400,
|
||||
)
|
||||
session_id = provided_session_id
|
||||
try:
|
||||
db = self._ensure_session_db()
|
||||
|
|
@ -565,7 +612,16 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
logger.warning("Failed to load session history for %s: %s", session_id, e)
|
||||
history = []
|
||||
else:
|
||||
session_id = str(uuid.uuid4())
|
||||
# Derive a stable session ID from the conversation fingerprint so
|
||||
# that consecutive messages from the same Open WebUI (or similar)
|
||||
# conversation map to the same Hermes session. The first user
|
||||
# message + system prompt are constant across all turns.
|
||||
first_user = ""
|
||||
for cm in conversation_messages:
|
||||
if cm.get("role") == "user":
|
||||
first_user = cm.get("content", "")
|
||||
break
|
||||
session_id = _derive_chat_session_id(system_prompt, first_user)
|
||||
# history already set from request body above
|
||||
|
||||
completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
|
||||
|
|
@ -588,15 +644,35 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
_stream_q.put(delta)
|
||||
|
||||
def _on_tool_progress(event_type, name, preview, args, **kwargs):
|
||||
"""Inject tool progress into the SSE stream for Open WebUI."""
|
||||
"""Send tool progress as a separate SSE event.
|
||||
|
||||
Previously, progress markers like ``⏰ list`` were injected
|
||||
directly into ``delta.content``. OpenAI-compatible frontends
|
||||
(Open WebUI, LobeChat, …) store ``delta.content`` verbatim as
|
||||
the assistant message and send it back on subsequent requests.
|
||||
After enough turns the model learns to *emit* the markers as
|
||||
plain text instead of issuing real tool calls — silently
|
||||
hallucinating tool results. See #6972.
|
||||
|
||||
The fix: push a tagged tuple ``("__tool_progress__", payload)``
|
||||
onto the stream queue. The SSE writer emits it as a custom
|
||||
``event: hermes.tool.progress`` line that compliant frontends
|
||||
can render for UX but will *not* persist into conversation
|
||||
history. Clients that don't understand the custom event type
|
||||
silently ignore it per the SSE specification.
|
||||
"""
|
||||
if event_type != "tool.started":
|
||||
return # Only show tool start events in chat stream
|
||||
return
|
||||
if name.startswith("_"):
|
||||
return # Skip internal events (_thinking)
|
||||
return
|
||||
from agent.display import get_tool_emoji
|
||||
emoji = get_tool_emoji(name)
|
||||
label = preview or name
|
||||
_stream_q.put(f"\n`{emoji} {label}`\n")
|
||||
_stream_q.put(("__tool_progress__", {
|
||||
"tool": name,
|
||||
"emoji": emoji,
|
||||
"label": label,
|
||||
}))
|
||||
|
||||
# Start agent in background. agent_ref is a mutable container
|
||||
# so the SSE writer can interrupt the agent on client disconnect.
|
||||
|
|
@ -707,6 +783,29 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
}
|
||||
await response.write(f"data: {json.dumps(role_chunk)}\n\n".encode())
|
||||
|
||||
# Helper — route a queue item to the correct SSE event.
|
||||
async def _emit(item):
|
||||
"""Write a single queue item to the SSE stream.
|
||||
|
||||
Plain strings are sent as normal ``delta.content`` chunks.
|
||||
Tagged tuples ``("__tool_progress__", payload)`` are sent
|
||||
as a custom ``event: hermes.tool.progress`` SSE event so
|
||||
frontends can display them without storing the markers in
|
||||
conversation history. See #6972.
|
||||
"""
|
||||
if isinstance(item, tuple) and len(item) == 2 and item[0] == "__tool_progress__":
|
||||
event_data = json.dumps(item[1])
|
||||
await response.write(
|
||||
f"event: hermes.tool.progress\ndata: {event_data}\n\n".encode()
|
||||
)
|
||||
else:
|
||||
content_chunk = {
|
||||
"id": completion_id, "object": "chat.completion.chunk",
|
||||
"created": created, "model": model,
|
||||
"choices": [{"index": 0, "delta": {"content": item}, "finish_reason": None}],
|
||||
}
|
||||
await response.write(f"data: {json.dumps(content_chunk)}\n\n".encode())
|
||||
|
||||
# Stream content chunks as they arrive from the agent
|
||||
loop = asyncio.get_event_loop()
|
||||
while True:
|
||||
|
|
@ -720,12 +819,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
delta = stream_q.get_nowait()
|
||||
if delta is None:
|
||||
break
|
||||
content_chunk = {
|
||||
"id": completion_id, "object": "chat.completion.chunk",
|
||||
"created": created, "model": model,
|
||||
"choices": [{"index": 0, "delta": {"content": delta}, "finish_reason": None}],
|
||||
}
|
||||
await response.write(f"data: {json.dumps(content_chunk)}\n\n".encode())
|
||||
await _emit(delta)
|
||||
except _q.Empty:
|
||||
break
|
||||
break
|
||||
|
|
@ -734,12 +828,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
if delta is None: # End of stream sentinel
|
||||
break
|
||||
|
||||
content_chunk = {
|
||||
"id": completion_id, "object": "chat.completion.chunk",
|
||||
"created": created, "model": model,
|
||||
"choices": [{"index": 0, "delta": {"content": delta}, "finish_reason": None}],
|
||||
}
|
||||
await response.write(f"data: {json.dumps(content_chunk)}\n\n".encode())
|
||||
await _emit(delta)
|
||||
|
||||
# Get usage from completed agent
|
||||
usage = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
|
||||
|
|
@ -1341,6 +1430,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
result = agent.run_conversation(
|
||||
user_message=user_message,
|
||||
conversation_history=conversation_history,
|
||||
task_id="default",
|
||||
)
|
||||
usage = {
|
||||
"input_tokens": getattr(agent, "session_prompt_tokens", 0) or 0,
|
||||
|
|
@ -1507,6 +1597,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
r = agent.run_conversation(
|
||||
user_message=user_message,
|
||||
conversation_history=conversation_history,
|
||||
task_id="default",
|
||||
)
|
||||
u = {
|
||||
"input_tokens": getattr(agent, "session_prompt_tokens", 0) or 0,
|
||||
|
|
@ -1658,8 +1749,16 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
if hasattr(sweep_task, "add_done_callback"):
|
||||
sweep_task.add_done_callback(self._background_tasks.discard)
|
||||
|
||||
# Refuse to start network-accessible without authentication
|
||||
if is_network_accessible(self._host) and not self._api_key:
|
||||
logger.error(
|
||||
"[%s] Refusing to start: binding to %s requires API_SERVER_KEY. "
|
||||
"Set API_SERVER_KEY or use the default 127.0.0.1.",
|
||||
self.name, self._host,
|
||||
)
|
||||
return False
|
||||
|
||||
# Port conflict detection — fail fast if port is already in use
|
||||
import socket as _socket
|
||||
try:
|
||||
with _socket.socket(_socket.AF_INET, _socket.SOCK_STREAM) as _s:
|
||||
_s.settimeout(1)
|
||||
|
|
@ -1675,6 +1774,14 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
await self._site.start()
|
||||
|
||||
self._mark_connected()
|
||||
if not self._api_key:
|
||||
logger.warning(
|
||||
"[%s] ⚠️ No API key configured (API_SERVER_KEY / platforms.api_server.key). "
|
||||
"All requests will be accepted without authentication. "
|
||||
"Set an API key for production deployments to prevent "
|
||||
"unauthorized access to sessions, responses, and cron jobs.",
|
||||
self.name,
|
||||
)
|
||||
logger.info(
|
||||
"[%s] API server listening on http://%s:%d (model: %s)",
|
||||
self.name, self._host, self._port, self._model_name,
|
||||
|
|
|
|||
|
|
@ -6,10 +6,12 @@ and implement the required methods.
|
|||
"""
|
||||
|
||||
import asyncio
|
||||
import ipaddress
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import socket as _socket
|
||||
import subprocess
|
||||
import sys
|
||||
import uuid
|
||||
|
|
@ -19,6 +21,41 @@ from urllib.parse import urlsplit
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def is_network_accessible(host: str) -> bool:
|
||||
"""Return True if *host* would expose the server beyond loopback.
|
||||
|
||||
Loopback addresses (127.0.0.1, ::1, IPv4-mapped ::ffff:127.0.0.1)
|
||||
are local-only. Unspecified addresses (0.0.0.0, ::) bind all
|
||||
interfaces. Hostnames are resolved; DNS failure fails closed.
|
||||
"""
|
||||
try:
|
||||
addr = ipaddress.ip_address(host)
|
||||
if addr.is_loopback:
|
||||
return False
|
||||
# ::ffff:127.0.0.1 — Python reports is_loopback=False for mapped
|
||||
# addresses, so check the underlying IPv4 explicitly.
|
||||
if getattr(addr, "ipv4_mapped", None) and addr.ipv4_mapped.is_loopback:
|
||||
return False
|
||||
return True
|
||||
except ValueError:
|
||||
# when host variable is a hostname, we should try to resolve below
|
||||
pass
|
||||
|
||||
try:
|
||||
resolved = _socket.getaddrinfo(
|
||||
host, None, _socket.AF_UNSPEC, _socket.SOCK_STREAM,
|
||||
)
|
||||
# if the hostname resolves into at least one non-loopback address,
|
||||
# then we consider it to be network accessible
|
||||
for _family, _type, _proto, _canonname, sockaddr in resolved:
|
||||
addr = ipaddress.ip_address(sockaddr[0])
|
||||
if not addr.is_loopback:
|
||||
return True
|
||||
return False
|
||||
except (_socket.gaierror, OSError):
|
||||
return True
|
||||
|
||||
|
||||
def _detect_macos_system_proxy() -> str | None:
|
||||
"""Read the macOS system HTTP(S) proxy via ``scutil --proxy``.
|
||||
|
||||
|
|
@ -160,7 +197,7 @@ GATEWAY_SECRET_CAPTURE_UNSUPPORTED_MESSAGE = (
|
|||
)
|
||||
|
||||
|
||||
def _safe_url_for_log(url: str, max_len: int = 80) -> str:
|
||||
def safe_url_for_log(url: str, max_len: int = 80) -> str:
|
||||
"""Return a URL string safe for logs (no query/fragment/userinfo)."""
|
||||
if max_len <= 0:
|
||||
return ""
|
||||
|
|
@ -197,6 +234,23 @@ def _safe_url_for_log(url: str, max_len: int = 80) -> str:
|
|||
return f"{safe[:max_len - 3]}..."
|
||||
|
||||
|
||||
async def _ssrf_redirect_guard(response):
|
||||
"""Re-validate each redirect target to prevent redirect-based SSRF.
|
||||
|
||||
Without this, an attacker can host a public URL that 302-redirects to
|
||||
http://169.254.169.254/ and bypass the pre-flight is_safe_url() check.
|
||||
|
||||
Must be async because httpx.AsyncClient awaits response event hooks.
|
||||
"""
|
||||
if response.is_redirect and response.next_request:
|
||||
redirect_url = str(response.next_request.url)
|
||||
from tools.url_safety import is_safe_url
|
||||
if not is_safe_url(redirect_url):
|
||||
raise ValueError(
|
||||
f"Blocked redirect to private/internal address: {safe_url_for_log(redirect_url)}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Image cache utilities
|
||||
#
|
||||
|
|
@ -216,6 +270,23 @@ def get_image_cache_dir() -> Path:
|
|||
return IMAGE_CACHE_DIR
|
||||
|
||||
|
||||
def _looks_like_image(data: bytes) -> bool:
|
||||
"""Return True if *data* starts with a known image magic-byte sequence."""
|
||||
if len(data) < 4:
|
||||
return False
|
||||
if data[:8] == b"\x89PNG\r\n\x1a\n":
|
||||
return True
|
||||
if data[:3] == b"\xff\xd8\xff":
|
||||
return True
|
||||
if data[:6] in (b"GIF87a", b"GIF89a"):
|
||||
return True
|
||||
if data[:2] == b"BM":
|
||||
return True
|
||||
if data[:4] == b"RIFF" and len(data) >= 12 and data[8:12] == b"WEBP":
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def cache_image_from_bytes(data: bytes, ext: str = ".jpg") -> str:
|
||||
"""
|
||||
Save raw image bytes to the cache and return the absolute file path.
|
||||
|
|
@ -226,7 +297,17 @@ def cache_image_from_bytes(data: bytes, ext: str = ".jpg") -> str:
|
|||
|
||||
Returns:
|
||||
Absolute path to the cached image file as a string.
|
||||
|
||||
Raises:
|
||||
ValueError: If *data* does not look like a valid image (e.g. an HTML
|
||||
error page returned by the upstream server).
|
||||
"""
|
||||
if not _looks_like_image(data):
|
||||
snippet = data[:80].decode("utf-8", errors="replace")
|
||||
raise ValueError(
|
||||
f"Refusing to cache non-image data as {ext} "
|
||||
f"(starts with: {snippet!r})"
|
||||
)
|
||||
cache_dir = get_image_cache_dir()
|
||||
filename = f"img_{uuid.uuid4().hex[:12]}{ext}"
|
||||
filepath = cache_dir / filename
|
||||
|
|
@ -254,7 +335,7 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
|
|||
"""
|
||||
from tools.url_safety import is_safe_url
|
||||
if not is_safe_url(url):
|
||||
raise ValueError(f"Blocked unsafe URL (SSRF protection): {_safe_url_for_log(url)}")
|
||||
raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")
|
||||
|
||||
import asyncio
|
||||
import httpx
|
||||
|
|
@ -262,7 +343,11 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
|
|||
_log = _logging.getLogger(__name__)
|
||||
|
||||
last_exc = None
|
||||
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
||||
async with httpx.AsyncClient(
|
||||
timeout=30.0,
|
||||
follow_redirects=True,
|
||||
event_hooks={"response": [_ssrf_redirect_guard]},
|
||||
) as client:
|
||||
for attempt in range(retries + 1):
|
||||
try:
|
||||
response = await client.get(
|
||||
|
|
@ -284,7 +369,7 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
|
|||
"Media cache retry %d/%d for %s (%.1fs): %s",
|
||||
attempt + 1,
|
||||
retries,
|
||||
_safe_url_for_log(url),
|
||||
safe_url_for_log(url),
|
||||
wait,
|
||||
exc,
|
||||
)
|
||||
|
|
@ -369,7 +454,7 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
|
|||
"""
|
||||
from tools.url_safety import is_safe_url
|
||||
if not is_safe_url(url):
|
||||
raise ValueError(f"Blocked unsafe URL (SSRF protection): {_safe_url_for_log(url)}")
|
||||
raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")
|
||||
|
||||
import asyncio
|
||||
import httpx
|
||||
|
|
@ -377,7 +462,11 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
|
|||
_log = _logging.getLogger(__name__)
|
||||
|
||||
last_exc = None
|
||||
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
||||
async with httpx.AsyncClient(
|
||||
timeout=30.0,
|
||||
follow_redirects=True,
|
||||
event_hooks={"response": [_ssrf_redirect_guard]},
|
||||
) as client:
|
||||
for attempt in range(retries + 1):
|
||||
try:
|
||||
response = await client.get(
|
||||
|
|
@ -399,7 +488,7 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
|
|||
"Audio cache retry %d/%d for %s (%.1fs): %s",
|
||||
attempt + 1,
|
||||
retries,
|
||||
_safe_url_for_log(url),
|
||||
safe_url_for_log(url),
|
||||
wait,
|
||||
exc,
|
||||
)
|
||||
|
|
@ -502,6 +591,14 @@ class MessageType(Enum):
|
|||
COMMAND = "command" # /command style
|
||||
|
||||
|
||||
class ProcessingOutcome(Enum):
|
||||
"""Result classification for message-processing lifecycle hooks."""
|
||||
|
||||
SUCCESS = "success"
|
||||
FAILURE = "failure"
|
||||
CANCELLED = "cancelled"
|
||||
|
||||
|
||||
@dataclass
|
||||
class MessageEvent:
|
||||
"""
|
||||
|
|
@ -529,8 +626,9 @@ class MessageEvent:
|
|||
reply_to_message_id: Optional[str] = None
|
||||
reply_to_text: Optional[str] = None # Text of the replied-to message (for context injection)
|
||||
|
||||
# Auto-loaded skill for topic/channel bindings (e.g., Telegram DM Topics)
|
||||
auto_skill: Optional[str] = None
|
||||
# Auto-loaded skill(s) for topic/channel bindings (e.g., Telegram DM Topics,
|
||||
# Discord channel_skill_bindings). A single name or ordered list.
|
||||
auto_skill: Optional[str | list[str]] = None
|
||||
|
||||
# Internal flag — set for synthetic events (e.g. background process
|
||||
# completion notifications) that must bypass user authorization checks.
|
||||
|
|
@ -552,6 +650,9 @@ class MessageEvent:
|
|||
raw = parts[0][1:].lower() if parts else None
|
||||
if raw and "@" in raw:
|
||||
raw = raw.split("@", 1)[0]
|
||||
# Reject file paths: valid command names never contain /
|
||||
if raw and "/" in raw:
|
||||
return None
|
||||
return raw
|
||||
|
||||
def get_command_args(self) -> str:
|
||||
|
|
@ -572,6 +673,32 @@ class SendResult:
|
|||
retryable: bool = False # True for transient connection errors — base will retry automatically
|
||||
|
||||
|
||||
def merge_pending_message_event(
|
||||
pending_messages: Dict[str, MessageEvent],
|
||||
session_key: str,
|
||||
event: MessageEvent,
|
||||
) -> None:
|
||||
"""Store or merge a pending event for a session.
|
||||
|
||||
Photo bursts/albums often arrive as multiple near-simultaneous PHOTO
|
||||
events. Merge those into the existing queued event so the next turn sees
|
||||
the whole burst, while non-photo follow-ups still replace the pending
|
||||
event normally.
|
||||
"""
|
||||
existing = pending_messages.get(session_key)
|
||||
if (
|
||||
existing
|
||||
and getattr(existing, "message_type", None) == MessageType.PHOTO
|
||||
and event.message_type == MessageType.PHOTO
|
||||
):
|
||||
existing.media_urls.extend(event.media_urls)
|
||||
existing.media_types.extend(event.media_types)
|
||||
if event.text:
|
||||
existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text)
|
||||
return
|
||||
pending_messages[session_key] = event
|
||||
|
||||
|
||||
# Error substrings that indicate a transient *connection* failure worth retrying.
|
||||
# "timeout" / "timed out" / "readtimeout" / "writetimeout" are intentionally
|
||||
# excluded: a read/write timeout on a non-idempotent call (e.g. send_message)
|
||||
|
|
@ -625,6 +752,8 @@ class BasePlatformAdapter(ABC):
|
|||
# Gateway shutdown cancels these so an old gateway instance doesn't keep
|
||||
# working on a task after --replace or manual restarts.
|
||||
self._background_tasks: set[asyncio.Task] = set()
|
||||
self._expected_cancelled_tasks: set[asyncio.Task] = set()
|
||||
self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
|
||||
# Chats where auto-TTS on voice input is disabled (set by /voice off)
|
||||
self._auto_tts_disabled_chats: set = set()
|
||||
# Chats where typing indicator is paused (e.g. during approval waits).
|
||||
|
|
@ -713,6 +842,10 @@ class BasePlatformAdapter(ABC):
|
|||
an optional response string.
|
||||
"""
|
||||
self._message_handler = handler
|
||||
|
||||
def set_busy_session_handler(self, handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]]) -> None:
|
||||
"""Set an optional handler for messages arriving during active sessions."""
|
||||
self._busy_session_handler = handler
|
||||
|
||||
def set_session_store(self, session_store: Any) -> None:
|
||||
"""
|
||||
|
|
@ -1133,7 +1266,7 @@ class BasePlatformAdapter(ABC):
|
|||
async def on_processing_start(self, event: MessageEvent) -> None:
|
||||
"""Hook called when background processing begins."""
|
||||
|
||||
async def on_processing_complete(self, event: MessageEvent, success: bool) -> None:
|
||||
async def on_processing_complete(self, event: MessageEvent, outcome: ProcessingOutcome) -> None:
|
||||
"""Hook called when background processing completes."""
|
||||
|
||||
async def _run_processing_hook(self, hook_name: str, *args: Any, **kwargs: Any) -> None:
|
||||
|
|
@ -1294,7 +1427,18 @@ class BasePlatformAdapter(ABC):
|
|||
# session lifecycle and its cleanup races with the running task
|
||||
# (see PR #4926).
|
||||
cmd = event.get_command()
|
||||
if cmd in ("approve", "deny", "status", "agents", "tasks", "stop", "new", "reset"):
|
||||
if cmd in (
|
||||
"approve",
|
||||
"deny",
|
||||
"status",
|
||||
"agents",
|
||||
"tasks",
|
||||
"stop",
|
||||
"new",
|
||||
"reset",
|
||||
"background",
|
||||
"restart",
|
||||
):
|
||||
logger.debug(
|
||||
"[%s] Command '/%s' bypassing active-session guard for %s",
|
||||
self.name, cmd, session_key,
|
||||
|
|
@ -1313,19 +1457,19 @@ class BasePlatformAdapter(ABC):
|
|||
logger.error("[%s] Command '/%s' dispatch failed: %s", self.name, cmd, e, exc_info=True)
|
||||
return
|
||||
|
||||
if self._busy_session_handler is not None:
|
||||
try:
|
||||
if await self._busy_session_handler(event, session_key):
|
||||
return
|
||||
except Exception as e:
|
||||
logger.error("[%s] Busy-session handler failed: %s", self.name, e, exc_info=True)
|
||||
|
||||
# Special case: photo bursts/albums frequently arrive as multiple near-
|
||||
# simultaneous messages. Queue them without interrupting the active run,
|
||||
# then process them immediately after the current task finishes.
|
||||
if event.message_type == MessageType.PHOTO:
|
||||
logger.debug("[%s] Queuing photo follow-up for session %s without interrupt", self.name, session_key)
|
||||
existing = self._pending_messages.get(session_key)
|
||||
if existing and existing.message_type == MessageType.PHOTO:
|
||||
existing.media_urls.extend(event.media_urls)
|
||||
existing.media_types.extend(event.media_types)
|
||||
if event.text:
|
||||
existing.text = self._merge_caption(existing.text, event.text)
|
||||
else:
|
||||
self._pending_messages[session_key] = event
|
||||
merge_pending_message_event(self._pending_messages, session_key, event)
|
||||
return # Don't interrupt now - will run after current task completes
|
||||
|
||||
# Default behavior for non-photo follow-ups: interrupt the running agent
|
||||
|
|
@ -1352,6 +1496,7 @@ class BasePlatformAdapter(ABC):
|
|||
return
|
||||
if hasattr(task, "add_done_callback"):
|
||||
task.add_done_callback(self._background_tasks.discard)
|
||||
task.add_done_callback(self._expected_cancelled_tasks.discard)
|
||||
|
||||
@staticmethod
|
||||
def _get_human_delay() -> float:
|
||||
|
|
@ -1488,7 +1633,7 @@ class BasePlatformAdapter(ABC):
|
|||
logger.info(
|
||||
"[%s] Sending image: %s (alt=%s)",
|
||||
self.name,
|
||||
_safe_url_for_log(image_url),
|
||||
safe_url_for_log(image_url),
|
||||
alt_text[:30] if alt_text else "",
|
||||
)
|
||||
# Route animated GIFs through send_animation for proper playback
|
||||
|
|
@ -1580,7 +1725,11 @@ class BasePlatformAdapter(ABC):
|
|||
|
||||
# Determine overall success for the processing hook
|
||||
processing_ok = delivery_succeeded if delivery_attempted else not bool(response)
|
||||
await self._run_processing_hook("on_processing_complete", event, processing_ok)
|
||||
await self._run_processing_hook(
|
||||
"on_processing_complete",
|
||||
event,
|
||||
ProcessingOutcome.SUCCESS if processing_ok else ProcessingOutcome.FAILURE,
|
||||
)
|
||||
|
||||
# Check if there's a pending message that was queued during our processing
|
||||
if session_key in self._pending_messages:
|
||||
|
|
@ -1599,10 +1748,14 @@ class BasePlatformAdapter(ABC):
|
|||
return # Already cleaned up
|
||||
|
||||
except asyncio.CancelledError:
|
||||
await self._run_processing_hook("on_processing_complete", event, False)
|
||||
current_task = asyncio.current_task()
|
||||
outcome = ProcessingOutcome.CANCELLED
|
||||
if current_task is None or current_task not in self._expected_cancelled_tasks:
|
||||
outcome = ProcessingOutcome.FAILURE
|
||||
await self._run_processing_hook("on_processing_complete", event, outcome)
|
||||
raise
|
||||
except Exception as e:
|
||||
await self._run_processing_hook("on_processing_complete", event, False)
|
||||
await self._run_processing_hook("on_processing_complete", event, ProcessingOutcome.FAILURE)
|
||||
logger.error("[%s] Error handling message: %s", self.name, e, exc_info=True)
|
||||
# Send the error to the user so they aren't left with radio silence
|
||||
try:
|
||||
|
|
@ -1646,10 +1799,12 @@ class BasePlatformAdapter(ABC):
|
|||
"""
|
||||
tasks = [task for task in self._background_tasks if not task.done()]
|
||||
for task in tasks:
|
||||
self._expected_cancelled_tasks.add(task)
|
||||
task.cancel()
|
||||
if tasks:
|
||||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
self._background_tasks.clear()
|
||||
self._expected_cancelled_tasks.clear()
|
||||
self._pending_messages.clear()
|
||||
self._active_sessions.clear()
|
||||
|
||||
|
|
|
|||
|
|
@ -207,9 +207,17 @@ class BlueBubblesAdapter(BasePlatformAdapter):
|
|||
self.webhook_port,
|
||||
self.webhook_path,
|
||||
)
|
||||
|
||||
# Register webhook with BlueBubbles server
|
||||
# This is required for the server to know where to send events
|
||||
await self._register_webhook()
|
||||
|
||||
return True
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
# Unregister webhook before cleaning up
|
||||
await self._unregister_webhook()
|
||||
|
||||
if self.client:
|
||||
await self.client.aclose()
|
||||
self.client = None
|
||||
|
|
@ -218,6 +226,105 @@ class BlueBubblesAdapter(BasePlatformAdapter):
|
|||
self._runner = None
|
||||
self._mark_disconnected()
|
||||
|
||||
@property
|
||||
def _webhook_url(self) -> str:
|
||||
"""Compute the external webhook URL for BlueBubbles registration."""
|
||||
host = self.webhook_host
|
||||
if host in ("0.0.0.0", "127.0.0.1", "localhost", "::"):
|
||||
host = "localhost"
|
||||
return f"http://{host}:{self.webhook_port}{self.webhook_path}"
|
||||
|
||||
async def _find_registered_webhooks(self, url: str) -> list:
|
||||
"""Return list of BB webhook entries matching *url*."""
|
||||
try:
|
||||
res = await self._api_get("/api/v1/webhook")
|
||||
data = res.get("data")
|
||||
if isinstance(data, list):
|
||||
return [wh for wh in data if wh.get("url") == url]
|
||||
except Exception:
|
||||
pass
|
||||
return []
|
||||
|
||||
async def _register_webhook(self) -> bool:
|
||||
"""Register this webhook URL with the BlueBubbles server.
|
||||
|
||||
BlueBubbles requires webhooks to be registered via API before
|
||||
it will send events. Checks for an existing registration first
|
||||
to avoid duplicates (e.g. after a crash without clean shutdown).
|
||||
"""
|
||||
if not self.client:
|
||||
return False
|
||||
|
||||
webhook_url = self._webhook_url
|
||||
|
||||
# Crash resilience — reuse an existing registration if present
|
||||
existing = await self._find_registered_webhooks(webhook_url)
|
||||
if existing:
|
||||
logger.info(
|
||||
"[bluebubbles] webhook already registered: %s", webhook_url
|
||||
)
|
||||
return True
|
||||
|
||||
payload = {
|
||||
"url": webhook_url,
|
||||
"events": ["new-message", "updated-message", "message"],
|
||||
}
|
||||
|
||||
try:
|
||||
res = await self._api_post("/api/v1/webhook", payload)
|
||||
status = res.get("status", 0)
|
||||
if 200 <= status < 300:
|
||||
logger.info(
|
||||
"[bluebubbles] webhook registered with server: %s",
|
||||
webhook_url,
|
||||
)
|
||||
return True
|
||||
else:
|
||||
logger.warning(
|
||||
"[bluebubbles] webhook registration returned status %s: %s",
|
||||
status,
|
||||
res.get("message"),
|
||||
)
|
||||
return False
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"[bluebubbles] failed to register webhook with server: %s",
|
||||
exc,
|
||||
)
|
||||
return False
|
||||
|
||||
async def _unregister_webhook(self) -> bool:
|
||||
"""Unregister this webhook URL from the BlueBubbles server.
|
||||
|
||||
Removes *all* matching registrations to clean up any duplicates
|
||||
left by prior crashes.
|
||||
"""
|
||||
if not self.client:
|
||||
return False
|
||||
|
||||
webhook_url = self._webhook_url
|
||||
removed = False
|
||||
|
||||
try:
|
||||
for wh in await self._find_registered_webhooks(webhook_url):
|
||||
wh_id = wh.get("id")
|
||||
if wh_id:
|
||||
res = await self.client.delete(
|
||||
self._api_url(f"/api/v1/webhook/{wh_id}")
|
||||
)
|
||||
res.raise_for_status()
|
||||
removed = True
|
||||
if removed:
|
||||
logger.info(
|
||||
"[bluebubbles] webhook unregistered: %s", webhook_url
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug(
|
||||
"[bluebubbles] failed to unregister webhook (non-critical): %s",
|
||||
exc,
|
||||
)
|
||||
return removed
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Chat GUID resolution
|
||||
# ------------------------------------------------------------------
|
||||
|
|
@ -826,3 +933,4 @@ class BlueBubblesAdapter(BasePlatformAdapter):
|
|||
asyncio.create_task(self.mark_read(session_chat_id))
|
||||
|
||||
return web.Response(text="ok")
|
||||
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ Configuration in config.yaml:
|
|||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
|
|
@ -54,6 +55,8 @@ MAX_MESSAGE_LENGTH = 20000
|
|||
DEDUP_WINDOW_SECONDS = 300
|
||||
DEDUP_MAX_SIZE = 1000
|
||||
RECONNECT_BACKOFF = [2, 5, 10, 30, 60]
|
||||
_SESSION_WEBHOOKS_MAX = 500
|
||||
_DINGTALK_WEBHOOK_RE = re.compile(r'^https://api\.dingtalk\.com/')
|
||||
|
||||
|
||||
def check_dingtalk_requirements() -> bool:
|
||||
|
|
@ -195,9 +198,15 @@ class DingTalkAdapter(BasePlatformAdapter):
|
|||
chat_id = conversation_id or sender_id
|
||||
chat_type = "group" if is_group else "dm"
|
||||
|
||||
# Store session webhook for reply routing
|
||||
# Store session webhook for reply routing (validate origin to prevent SSRF)
|
||||
session_webhook = getattr(message, "session_webhook", None) or ""
|
||||
if session_webhook and chat_id:
|
||||
if session_webhook and chat_id and _DINGTALK_WEBHOOK_RE.match(session_webhook):
|
||||
if len(self._session_webhooks) >= _SESSION_WEBHOOKS_MAX:
|
||||
# Evict oldest entry to cap memory growth
|
||||
try:
|
||||
self._session_webhooks.pop(next(iter(self._session_webhooks)))
|
||||
except StopIteration:
|
||||
pass
|
||||
self._session_webhooks[chat_id] = session_webhook
|
||||
|
||||
source = self.build_source(
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ from gateway.platforms.base import (
|
|||
BasePlatformAdapter,
|
||||
MessageEvent,
|
||||
MessageType,
|
||||
ProcessingOutcome,
|
||||
SendResult,
|
||||
cache_image_from_url,
|
||||
cache_audio_from_url,
|
||||
|
|
@ -422,6 +423,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
|
||||
# Discord message limits
|
||||
MAX_MESSAGE_LENGTH = 2000
|
||||
_SPLIT_THRESHOLD = 1900 # near the 2000-char split point
|
||||
|
||||
# Auto-disconnect from voice channel after this many seconds of inactivity
|
||||
VOICE_TIMEOUT = 300
|
||||
|
|
@ -433,6 +435,11 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
self._allowed_user_ids: set = set() # For button approval authorization
|
||||
# Voice channel state (per-guild)
|
||||
self._voice_clients: Dict[int, Any] = {} # guild_id -> VoiceClient
|
||||
# Text batching: merge rapid successive messages (Telegram-style)
|
||||
self._text_batch_delay_seconds = float(os.getenv("HERMES_DISCORD_TEXT_BATCH_DELAY_SECONDS", "0.6"))
|
||||
self._text_batch_split_delay_seconds = float(os.getenv("HERMES_DISCORD_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0"))
|
||||
self._pending_text_batches: Dict[str, MessageEvent] = {}
|
||||
self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}
|
||||
self._voice_text_channels: Dict[int, int] = {} # guild_id -> text_channel_id
|
||||
self._voice_timeout_tasks: Dict[int, asyncio.Task] = {} # guild_id -> timeout task
|
||||
# Phase 2: voice listening
|
||||
|
|
@ -599,22 +606,35 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
if not self._client.user or self._client.user not in message.mentions:
|
||||
return
|
||||
# "all" falls through to handle_message
|
||||
|
||||
# If the message @mentions other users but NOT the bot, the
|
||||
# sender is talking to someone else — stay silent. Only
|
||||
# applies in server channels; in DMs the user is always
|
||||
# talking to the bot (mentions are just references).
|
||||
# Controlled by DISCORD_IGNORE_NO_MENTION (default: true).
|
||||
_ignore_no_mention = os.getenv(
|
||||
"DISCORD_IGNORE_NO_MENTION", "true"
|
||||
).lower() in ("true", "1", "yes")
|
||||
if _ignore_no_mention and message.mentions and not isinstance(message.channel, discord.DMChannel):
|
||||
_bot_mentioned = (
|
||||
|
||||
# Multi-agent filtering: if the message mentions specific bots
|
||||
# but NOT this bot, the sender is talking to another agent —
|
||||
# stay silent. Messages with no bot mentions (general chat)
|
||||
# still fall through to _handle_message for the existing
|
||||
# DISCORD_REQUIRE_MENTION check.
|
||||
#
|
||||
# This replaces the older DISCORD_IGNORE_NO_MENTION logic
|
||||
# with bot-aware filtering that works correctly when multiple
|
||||
# agents share a channel.
|
||||
if not isinstance(message.channel, discord.DMChannel) and message.mentions:
|
||||
_self_mentioned = (
|
||||
self._client.user is not None
|
||||
and self._client.user in message.mentions
|
||||
)
|
||||
if not _bot_mentioned:
|
||||
return # Talking to someone else, don't interrupt
|
||||
_other_bots_mentioned = any(
|
||||
m.bot and m != self._client.user
|
||||
for m in message.mentions
|
||||
)
|
||||
# If other bots are mentioned but we're not → not for us
|
||||
if _other_bots_mentioned and not _self_mentioned:
|
||||
return
|
||||
# If humans are mentioned but we're not → not for us
|
||||
# (preserves old DISCORD_IGNORE_NO_MENTION=true behavior)
|
||||
_ignore_no_mention = os.getenv(
|
||||
"DISCORD_IGNORE_NO_MENTION", "true"
|
||||
).lower() in ("true", "1", "yes")
|
||||
if _ignore_no_mention and not _self_mentioned and not _other_bots_mentioned:
|
||||
return
|
||||
|
||||
await self._handle_message(message)
|
||||
|
||||
|
|
@ -748,14 +768,17 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
if hasattr(message, "add_reaction"):
|
||||
await self._add_reaction(message, "👀")
|
||||
|
||||
async def on_processing_complete(self, event: MessageEvent, success: bool) -> None:
|
||||
async def on_processing_complete(self, event: MessageEvent, outcome: ProcessingOutcome) -> None:
|
||||
"""Swap the in-progress reaction for a final success/failure reaction."""
|
||||
if not self._reactions_enabled():
|
||||
return
|
||||
message = event.raw_message
|
||||
if hasattr(message, "add_reaction"):
|
||||
await self._remove_reaction(message, "👀")
|
||||
await self._add_reaction(message, "✅" if success else "❌")
|
||||
if outcome == ProcessingOutcome.SUCCESS:
|
||||
await self._add_reaction(message, "✅")
|
||||
elif outcome == ProcessingOutcome.FAILURE:
|
||||
await self._add_reaction(message, "❌")
|
||||
|
||||
async def send(
|
||||
self,
|
||||
|
|
@ -764,18 +787,34 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
) -> SendResult:
|
||||
"""Send a message to a Discord channel."""
|
||||
"""Send a message to a Discord channel or thread.
|
||||
|
||||
When metadata contains a thread_id, the message is sent to that
|
||||
thread instead of the parent channel identified by chat_id.
|
||||
"""
|
||||
if not self._client:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
# Get the channel
|
||||
channel = self._client.get_channel(int(chat_id))
|
||||
if not channel:
|
||||
channel = await self._client.fetch_channel(int(chat_id))
|
||||
# Determine target channel: thread_id in metadata takes precedence.
|
||||
thread_id = None
|
||||
if metadata and metadata.get("thread_id"):
|
||||
thread_id = metadata["thread_id"]
|
||||
|
||||
if not channel:
|
||||
return SendResult(success=False, error=f"Channel {chat_id} not found")
|
||||
if thread_id:
|
||||
# Fetch the thread directly — threads are addressed by their own ID.
|
||||
channel = self._client.get_channel(int(thread_id))
|
||||
if not channel:
|
||||
channel = await self._client.fetch_channel(int(thread_id))
|
||||
if not channel:
|
||||
return SendResult(success=False, error=f"Thread {thread_id} not found")
|
||||
else:
|
||||
# Get the parent channel
|
||||
channel = self._client.get_channel(int(chat_id))
|
||||
if not channel:
|
||||
channel = await self._client.fetch_channel(int(chat_id))
|
||||
if not channel:
|
||||
return SendResult(success=False, error=f"Channel {chat_id} not found")
|
||||
|
||||
# Format and split message if needed
|
||||
formatted = self.format_message(content)
|
||||
|
|
@ -1238,9 +1277,8 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
try:
|
||||
await asyncio.to_thread(VoiceReceiver.pcm_to_wav, pcm_data, wav_path)
|
||||
|
||||
from tools.transcription_tools import transcribe_audio, get_stt_model_from_config
|
||||
stt_model = get_stt_model_from_config()
|
||||
result = await asyncio.to_thread(transcribe_audio, wav_path, model=stt_model)
|
||||
from tools.transcription_tools import transcribe_audio
|
||||
result = await asyncio.to_thread(transcribe_audio, wav_path)
|
||||
|
||||
if not result.get("success"):
|
||||
return
|
||||
|
|
@ -1867,14 +1905,42 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
chat_topic=chat_topic,
|
||||
)
|
||||
|
||||
_parent_id = str(getattr(getattr(interaction, "channel", None), "parent_id", "") or "")
|
||||
_skills = self._resolve_channel_skills(thread_id, _parent_id or None)
|
||||
event = MessageEvent(
|
||||
text=text,
|
||||
message_type=MessageType.TEXT,
|
||||
source=source,
|
||||
raw_message=interaction,
|
||||
auto_skill=_skills,
|
||||
)
|
||||
await self.handle_message(event)
|
||||
|
||||
def _resolve_channel_skills(self, channel_id: str, parent_id: str | None = None) -> list[str] | None:
|
||||
"""Look up auto-skill bindings for a Discord channel/forum thread.
|
||||
|
||||
Config format (in platform extra):
|
||||
channel_skill_bindings:
|
||||
- id: "123456"
|
||||
skills: ["skill-a", "skill-b"]
|
||||
Also checks parent_id so forum threads inherit the forum's bindings.
|
||||
"""
|
||||
bindings = self.config.extra.get("channel_skill_bindings", [])
|
||||
if not bindings:
|
||||
return None
|
||||
ids_to_check = {channel_id}
|
||||
if parent_id:
|
||||
ids_to_check.add(parent_id)
|
||||
for entry in bindings:
|
||||
entry_id = str(entry.get("id", ""))
|
||||
if entry_id in ids_to_check:
|
||||
skills = entry.get("skills") or entry.get("skill")
|
||||
if isinstance(skills, str):
|
||||
return [skills]
|
||||
if isinstance(skills, list) and skills:
|
||||
return list(dict.fromkeys(skills)) # dedup, preserve order
|
||||
return None
|
||||
|
||||
def _thread_parent_channel(self, channel: Any) -> Any:
|
||||
"""Return the parent text channel when invoked from a thread."""
|
||||
return getattr(channel, "parent", None) or channel
|
||||
|
|
@ -2228,6 +2294,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
# discord.require_mention: Require @mention in server channels (default: true)
|
||||
# discord.free_response_channels: Channel IDs where bot responds without mention
|
||||
# discord.ignored_channels: Channel IDs where bot NEVER responds (even when mentioned)
|
||||
# discord.allowed_channels: If set, bot ONLY responds in these channels (whitelist)
|
||||
# discord.no_thread_channels: Channel IDs where bot responds directly without creating thread
|
||||
# discord.auto_thread: Auto-create thread on @mention in channels (default: true)
|
||||
|
||||
|
|
@ -2239,12 +2306,21 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
parent_channel_id = self._get_parent_channel_id(message.channel)
|
||||
|
||||
if not isinstance(message.channel, discord.DMChannel):
|
||||
# Check ignored channels first - never respond even when mentioned
|
||||
ignored_channels_raw = os.getenv("DISCORD_IGNORED_CHANNELS", "")
|
||||
ignored_channels = {ch.strip() for ch in ignored_channels_raw.split(",") if ch.strip()}
|
||||
channel_ids = {str(message.channel.id)}
|
||||
if parent_channel_id:
|
||||
channel_ids.add(parent_channel_id)
|
||||
|
||||
# Check allowed channels - if set, only respond in these channels
|
||||
allowed_channels_raw = os.getenv("DISCORD_ALLOWED_CHANNELS", "")
|
||||
if allowed_channels_raw:
|
||||
allowed_channels = {ch.strip() for ch in allowed_channels_raw.split(",") if ch.strip()}
|
||||
if not (channel_ids & allowed_channels):
|
||||
logger.debug("[%s] Ignoring message in non-allowed channel: %s", self.name, channel_ids)
|
||||
return
|
||||
|
||||
# Check ignored channels - never respond even when mentioned
|
||||
ignored_channels_raw = os.getenv("DISCORD_IGNORED_CHANNELS", "")
|
||||
ignored_channels = {ch.strip() for ch in ignored_channels_raw.split(",") if ch.strip()}
|
||||
if channel_ids & ignored_channels:
|
||||
logger.debug("[%s] Ignoring message in ignored channel: %s", self.name, channel_ids)
|
||||
return
|
||||
|
|
@ -2449,6 +2525,10 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
if not event_text or not event_text.strip():
|
||||
event_text = "(The user sent a message with no text content)"
|
||||
|
||||
_chan = message.channel
|
||||
_parent_id = str(getattr(_chan, "parent_id", "") or "")
|
||||
_chan_id = str(getattr(_chan, "id", ""))
|
||||
_skills = self._resolve_channel_skills(_chan_id, _parent_id or None)
|
||||
event = MessageEvent(
|
||||
text=event_text,
|
||||
message_type=msg_type,
|
||||
|
|
@ -2459,6 +2539,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
media_types=media_types,
|
||||
reply_to_message_id=str(message.reference.message_id) if message.reference else None,
|
||||
timestamp=message.created_at,
|
||||
auto_skill=_skills,
|
||||
)
|
||||
|
||||
# Track thread participation so the bot won't require @mention for
|
||||
|
|
@ -2466,7 +2547,80 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
if thread_id:
|
||||
self._track_thread(thread_id)
|
||||
|
||||
await self.handle_message(event)
|
||||
# Only batch plain text messages — commands, media, etc. dispatch
|
||||
# immediately since they won't be split by the Discord client.
|
||||
if msg_type == MessageType.TEXT and self._text_batch_delay_seconds > 0:
|
||||
self._enqueue_text_event(event)
|
||||
else:
|
||||
await self.handle_message(event)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Text message aggregation (handles Discord client-side splits)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _text_batch_key(self, event: MessageEvent) -> str:
|
||||
"""Session-scoped key for text message batching."""
|
||||
from gateway.session import build_session_key
|
||||
return build_session_key(
|
||||
event.source,
|
||||
group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
|
||||
thread_sessions_per_user=self.config.extra.get("thread_sessions_per_user", False),
|
||||
)
|
||||
|
||||
def _enqueue_text_event(self, event: MessageEvent) -> None:
|
||||
"""Buffer a text event and reset the flush timer.
|
||||
|
||||
When Discord splits a long user message at 2000 chars, the chunks
|
||||
arrive within a few hundred milliseconds. This merges them into
|
||||
a single event before dispatching.
|
||||
"""
|
||||
key = self._text_batch_key(event)
|
||||
existing = self._pending_text_batches.get(key)
|
||||
chunk_len = len(event.text or "")
|
||||
if existing is None:
|
||||
event._last_chunk_len = chunk_len # type: ignore[attr-defined]
|
||||
self._pending_text_batches[key] = event
|
||||
else:
|
||||
if event.text:
|
||||
existing.text = f"{existing.text}\n{event.text}" if existing.text else event.text
|
||||
existing._last_chunk_len = chunk_len # type: ignore[attr-defined]
|
||||
if event.media_urls:
|
||||
existing.media_urls.extend(event.media_urls)
|
||||
existing.media_types.extend(event.media_types)
|
||||
|
||||
prior_task = self._pending_text_batch_tasks.get(key)
|
||||
if prior_task and not prior_task.done():
|
||||
prior_task.cancel()
|
||||
self._pending_text_batch_tasks[key] = asyncio.create_task(
|
||||
self._flush_text_batch(key)
|
||||
)
|
||||
|
||||
async def _flush_text_batch(self, key: str) -> None:
|
||||
"""Wait for the quiet period then dispatch the aggregated text.
|
||||
|
||||
Uses a longer delay when the latest chunk is near Discord's 2000-char
|
||||
split point, since a continuation chunk is almost certain.
|
||||
"""
|
||||
current_task = asyncio.current_task()
|
||||
try:
|
||||
pending = self._pending_text_batches.get(key)
|
||||
last_len = getattr(pending, "_last_chunk_len", 0) if pending else 0
|
||||
if last_len >= self._SPLIT_THRESHOLD:
|
||||
delay = self._text_batch_split_delay_seconds
|
||||
else:
|
||||
delay = self._text_batch_delay_seconds
|
||||
await asyncio.sleep(delay)
|
||||
event = self._pending_text_batches.pop(key, None)
|
||||
if not event:
|
||||
return
|
||||
logger.info(
|
||||
"[Discord] Flushing text batch %s (%d chars)",
|
||||
key, len(event.text or ""),
|
||||
)
|
||||
await self.handle_message(event)
|
||||
finally:
|
||||
if self._pending_text_batch_tasks.get(key) is current_task:
|
||||
self._pending_text_batch_tasks.pop(key, None)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -195,7 +195,11 @@ def _extract_attachments(
|
|||
|
||||
ext = Path(filename).suffix.lower()
|
||||
if ext in _IMAGE_EXTS:
|
||||
cached_path = cache_image_from_bytes(payload, ext)
|
||||
try:
|
||||
cached_path = cache_image_from_bytes(payload, ext)
|
||||
except ValueError:
|
||||
logger.debug("Skipping non-image attachment %s (invalid magic bytes)", filename)
|
||||
continue
|
||||
attachments.append({
|
||||
"path": cached_path,
|
||||
"filename": filename,
|
||||
|
|
|
|||
|
|
@ -264,6 +264,7 @@ class FeishuAdapterSettings:
|
|||
bot_name: str
|
||||
dedup_cache_size: int
|
||||
text_batch_delay_seconds: float
|
||||
text_batch_split_delay_seconds: float
|
||||
text_batch_max_messages: int
|
||||
text_batch_max_chars: int
|
||||
media_batch_delay_seconds: float
|
||||
|
|
@ -972,7 +973,8 @@ def _run_official_feishu_ws_client(ws_client: Any, adapter: Any) -> None:
|
|||
return await original_connect(*args, **kwargs)
|
||||
|
||||
def _configure_with_overrides(conf: Any) -> Any:
|
||||
assert original_configure is not None
|
||||
if original_configure is None:
|
||||
raise RuntimeError("Feishu _configure_with_overrides called but original_configure is None")
|
||||
result = original_configure(conf)
|
||||
_apply_runtime_ws_overrides()
|
||||
return result
|
||||
|
|
@ -1014,6 +1016,10 @@ class FeishuAdapter(BasePlatformAdapter):
|
|||
"""Feishu/Lark bot adapter."""
|
||||
|
||||
MAX_MESSAGE_LENGTH = 8000
|
||||
# Threshold for detecting Feishu client-side message splits.
|
||||
# When a chunk is near the ~4096-char practical limit, a continuation
|
||||
# is almost certain.
|
||||
_SPLIT_THRESHOLD = 4000
|
||||
|
||||
# =========================================================================
|
||||
# Lifecycle — init / settings / connect / disconnect
|
||||
|
|
@ -1105,6 +1111,9 @@ class FeishuAdapter(BasePlatformAdapter):
|
|||
text_batch_delay_seconds=float(
|
||||
os.getenv("HERMES_FEISHU_TEXT_BATCH_DELAY_SECONDS", str(_DEFAULT_TEXT_BATCH_DELAY_SECONDS))
|
||||
),
|
||||
text_batch_split_delay_seconds=float(
|
||||
os.getenv("HERMES_FEISHU_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0")
|
||||
),
|
||||
text_batch_max_messages=max(
|
||||
1,
|
||||
int(os.getenv("HERMES_FEISHU_TEXT_BATCH_MAX_MESSAGES", str(_DEFAULT_TEXT_BATCH_MAX_MESSAGES))),
|
||||
|
|
@ -1152,6 +1161,7 @@ class FeishuAdapter(BasePlatformAdapter):
|
|||
self._bot_name = settings.bot_name
|
||||
self._dedup_cache_size = settings.dedup_cache_size
|
||||
self._text_batch_delay_seconds = settings.text_batch_delay_seconds
|
||||
self._text_batch_split_delay_seconds = settings.text_batch_split_delay_seconds
|
||||
self._text_batch_max_messages = settings.text_batch_max_messages
|
||||
self._text_batch_max_chars = settings.text_batch_max_chars
|
||||
self._media_batch_delay_seconds = settings.media_batch_delay_seconds
|
||||
|
|
@ -1180,6 +1190,8 @@ class FeishuAdapter(BasePlatformAdapter):
|
|||
lambda data: self._on_reaction_event("im.message.reaction.deleted_v1", data)
|
||||
)
|
||||
.register_p2_card_action_trigger(self._on_card_action_trigger)
|
||||
.register_p2_im_chat_member_bot_added_v1(self._on_bot_added_to_chat)
|
||||
.register_p2_im_chat_member_bot_deleted_v1(self._on_bot_removed_from_chat)
|
||||
.build()
|
||||
)
|
||||
|
||||
|
|
@ -1570,13 +1582,18 @@ class FeishuAdapter(BasePlatformAdapter):
|
|||
return SendResult(success=False, error=f"Image file not found: {image_path}")
|
||||
|
||||
try:
|
||||
with open(image_path, "rb") as image_file:
|
||||
body = self._build_image_upload_body(
|
||||
image_type=_FEISHU_IMAGE_UPLOAD_TYPE,
|
||||
image=image_file,
|
||||
)
|
||||
request = self._build_image_upload_request(body)
|
||||
upload_response = await asyncio.to_thread(self._client.im.v1.image.create, request)
|
||||
import io as _io
|
||||
with open(image_path, "rb") as f:
|
||||
image_bytes = f.read()
|
||||
# Wrap in BytesIO so lark SDK's MultipartEncoder can read .name and .tell()
|
||||
image_file = _io.BytesIO(image_bytes)
|
||||
image_file.name = os.path.basename(image_path)
|
||||
body = self._build_image_upload_body(
|
||||
image_type=_FEISHU_IMAGE_UPLOAD_TYPE,
|
||||
image=image_file,
|
||||
)
|
||||
request = self._build_image_upload_request(body)
|
||||
upload_response = await asyncio.to_thread(self._client.im.v1.image.create, request)
|
||||
image_key = self._extract_response_field(upload_response, "image_key")
|
||||
if not image_key:
|
||||
return self._response_error_result(
|
||||
|
|
@ -2478,8 +2495,10 @@ class FeishuAdapter(BasePlatformAdapter):
|
|||
async def _enqueue_text_event(self, event: MessageEvent) -> None:
|
||||
"""Debounce rapid Feishu text bursts into a single MessageEvent."""
|
||||
key = self._text_batch_key(event)
|
||||
chunk_len = len(event.text or "")
|
||||
existing = self._pending_text_batches.get(key)
|
||||
if existing is None:
|
||||
event._last_chunk_len = chunk_len # type: ignore[attr-defined]
|
||||
self._pending_text_batches[key] = event
|
||||
self._pending_text_batch_counts[key] = 1
|
||||
self._schedule_text_batch_flush(key)
|
||||
|
|
@ -2504,6 +2523,7 @@ class FeishuAdapter(BasePlatformAdapter):
|
|||
return
|
||||
|
||||
existing.text = next_text
|
||||
existing._last_chunk_len = chunk_len # type: ignore[attr-defined]
|
||||
existing.timestamp = event.timestamp
|
||||
if event.message_id:
|
||||
existing.message_id = event.message_id
|
||||
|
|
@ -2530,10 +2550,22 @@ class FeishuAdapter(BasePlatformAdapter):
|
|||
task_map[key] = asyncio.create_task(flush_fn(key))
|
||||
|
||||
async def _flush_text_batch(self, key: str) -> None:
|
||||
"""Flush a pending text batch after the quiet period."""
|
||||
"""Flush a pending text batch after the quiet period.
|
||||
|
||||
Uses a longer delay when the latest chunk is near Feishu's ~4096-char
|
||||
split point, since a continuation chunk is almost certain.
|
||||
"""
|
||||
current_task = asyncio.current_task()
|
||||
try:
|
||||
await asyncio.sleep(self._text_batch_delay_seconds)
|
||||
# Adaptive delay: if the latest chunk is near the split threshold,
|
||||
# a continuation is almost certain — wait longer.
|
||||
pending = self._pending_text_batches.get(key)
|
||||
last_len = getattr(pending, "_last_chunk_len", 0) if pending else 0
|
||||
if last_len >= self._SPLIT_THRESHOLD:
|
||||
delay = self._text_batch_split_delay_seconds
|
||||
else:
|
||||
delay = self._text_batch_delay_seconds
|
||||
await asyncio.sleep(delay)
|
||||
await self._flush_text_batch_now(key)
|
||||
finally:
|
||||
if self._pending_text_batch_tasks.get(key) is current_task:
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -39,6 +39,7 @@ from gateway.platforms.base import (
|
|||
MessageType,
|
||||
SendResult,
|
||||
SUPPORTED_DOCUMENT_TYPES,
|
||||
safe_url_for_log,
|
||||
cache_document_from_bytes,
|
||||
)
|
||||
|
||||
|
|
@ -656,8 +657,19 @@ class SlackAdapter(BasePlatformAdapter):
|
|||
try:
|
||||
import httpx
|
||||
|
||||
async def _ssrf_redirect_guard(response):
|
||||
"""Re-check redirect targets so public URLs cannot bounce into private IPs."""
|
||||
if response.is_redirect and response.next_request:
|
||||
redirect_url = str(response.next_request.url)
|
||||
if not is_safe_url(redirect_url):
|
||||
raise ValueError("Blocked redirect to private/internal address")
|
||||
|
||||
# Download the image first
|
||||
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
||||
async with httpx.AsyncClient(
|
||||
timeout=30.0,
|
||||
follow_redirects=True,
|
||||
event_hooks={"response": [_ssrf_redirect_guard]},
|
||||
) as client:
|
||||
response = await client.get(image_url)
|
||||
response.raise_for_status()
|
||||
|
||||
|
|
@ -674,7 +686,7 @@ class SlackAdapter(BasePlatformAdapter):
|
|||
except Exception as e: # pragma: no cover - defensive logging
|
||||
logger.warning(
|
||||
"[Slack] Failed to upload image from URL %s, falling back to text: %s",
|
||||
image_url,
|
||||
safe_url_for_log(image_url),
|
||||
e,
|
||||
exc_info=True,
|
||||
)
|
||||
|
|
@ -1596,6 +1608,18 @@ class SlackAdapter(BasePlatformAdapter):
|
|||
)
|
||||
response.raise_for_status()
|
||||
|
||||
# Slack may return an HTML sign-in/redirect page
|
||||
# instead of actual media bytes (e.g. expired token,
|
||||
# restricted file access). Detect this early so we
|
||||
# don't cache bogus data and confuse downstream tools.
|
||||
ct = response.headers.get("content-type", "")
|
||||
if "text/html" in ct:
|
||||
raise ValueError(
|
||||
"Slack returned HTML instead of media "
|
||||
f"(content-type: {ct}); "
|
||||
"check bot token scopes and file permissions"
|
||||
)
|
||||
|
||||
if audio:
|
||||
from gateway.platforms.base import cache_audio_from_bytes
|
||||
return cache_audio_from_bytes(response.content, ext)
|
||||
|
|
|
|||
|
|
@ -60,6 +60,7 @@ from gateway.platforms.base import (
|
|||
BasePlatformAdapter,
|
||||
MessageEvent,
|
||||
MessageType,
|
||||
ProcessingOutcome,
|
||||
SendResult,
|
||||
cache_image_from_bytes,
|
||||
cache_audio_from_bytes,
|
||||
|
|
@ -121,6 +122,9 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
|
||||
# Telegram message limits
|
||||
MAX_MESSAGE_LENGTH = 4096
|
||||
# Threshold for detecting Telegram client-side message splits.
|
||||
# When a chunk is near this limit, a continuation is almost certain.
|
||||
_SPLIT_THRESHOLD = 4000
|
||||
MEDIA_GROUP_WAIT_SECONDS = 0.8
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
|
|
@ -140,6 +144,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
# Buffer rapid text messages so Telegram client-side splits of long
|
||||
# messages are aggregated into a single MessageEvent.
|
||||
self._text_batch_delay_seconds = float(os.getenv("HERMES_TELEGRAM_TEXT_BATCH_DELAY_SECONDS", "0.6"))
|
||||
self._text_batch_split_delay_seconds = float(os.getenv("HERMES_TELEGRAM_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0"))
|
||||
self._pending_text_batches: Dict[str, MessageEvent] = {}
|
||||
self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}
|
||||
self._token_lock_identity: Optional[str] = None
|
||||
|
|
@ -513,6 +518,45 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
|
||||
# Build the application
|
||||
builder = Application.builder().token(self.config.token)
|
||||
custom_base_url = self.config.extra.get("base_url")
|
||||
if custom_base_url:
|
||||
builder = builder.base_url(custom_base_url)
|
||||
builder = builder.base_file_url(
|
||||
self.config.extra.get("base_file_url", custom_base_url)
|
||||
)
|
||||
logger.info(
|
||||
"[%s] Using custom Telegram base_url: %s",
|
||||
self.name, custom_base_url,
|
||||
)
|
||||
|
||||
# PTB defaults (pool_timeout=1s) are too aggressive on flaky networks and
|
||||
# can trigger "Pool timeout: All connections in the connection pool are occupied"
|
||||
# during reconnect/bootstrap. Use safer defaults and allow env overrides.
|
||||
def _env_int(name: str, default: int) -> int:
|
||||
try:
|
||||
return int(os.getenv(name, str(default)))
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
def _env_float(name: str, default: float) -> float:
|
||||
try:
|
||||
return float(os.getenv(name, str(default)))
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
request_kwargs = {
|
||||
"connection_pool_size": _env_int("HERMES_TELEGRAM_HTTP_POOL_SIZE", 512),
|
||||
"pool_timeout": _env_float("HERMES_TELEGRAM_HTTP_POOL_TIMEOUT", 8.0),
|
||||
"connect_timeout": _env_float("HERMES_TELEGRAM_HTTP_CONNECT_TIMEOUT", 10.0),
|
||||
"read_timeout": _env_float("HERMES_TELEGRAM_HTTP_READ_TIMEOUT", 20.0),
|
||||
"write_timeout": _env_float("HERMES_TELEGRAM_HTTP_WRITE_TIMEOUT", 20.0),
|
||||
}
|
||||
|
||||
proxy_configured = any(
|
||||
(os.getenv(k) or "").strip()
|
||||
for k in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy")
|
||||
)
|
||||
disable_fallback = (os.getenv("HERMES_TELEGRAM_DISABLE_FALLBACK_IPS", "").strip().lower() in ("1", "true", "yes", "on"))
|
||||
fallback_ips = self._fallback_ips()
|
||||
if not fallback_ips:
|
||||
fallback_ips = await discover_fallback_ips()
|
||||
|
|
@ -521,16 +565,32 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
self.name,
|
||||
", ".join(fallback_ips),
|
||||
)
|
||||
if fallback_ips:
|
||||
|
||||
if fallback_ips and not proxy_configured and not disable_fallback:
|
||||
logger.info(
|
||||
"[%s] Telegram fallback IPs active: %s",
|
||||
self.name,
|
||||
", ".join(fallback_ips),
|
||||
)
|
||||
transport = TelegramFallbackTransport(fallback_ips)
|
||||
request = HTTPXRequest(httpx_kwargs={"transport": transport})
|
||||
get_updates_request = HTTPXRequest(httpx_kwargs={"transport": transport})
|
||||
builder = builder.request(request).get_updates_request(get_updates_request)
|
||||
# Keep request/update pools separate to reduce contention during
|
||||
# polling reconnect + bot API bootstrap/delete_webhook calls.
|
||||
request = HTTPXRequest(
|
||||
**request_kwargs,
|
||||
httpx_kwargs={"transport": TelegramFallbackTransport(fallback_ips)},
|
||||
)
|
||||
get_updates_request = HTTPXRequest(
|
||||
**request_kwargs,
|
||||
httpx_kwargs={"transport": TelegramFallbackTransport(fallback_ips)},
|
||||
)
|
||||
else:
|
||||
if proxy_configured:
|
||||
logger.info("[%s] Proxy configured; skipping Telegram fallback-IP transport", self.name)
|
||||
elif disable_fallback:
|
||||
logger.info("[%s] Telegram fallback-IP transport disabled via env", self.name)
|
||||
request = HTTPXRequest(**request_kwargs)
|
||||
get_updates_request = HTTPXRequest(**request_kwargs)
|
||||
|
||||
builder = builder.request(request).get_updates_request(get_updates_request)
|
||||
self._app = builder.build()
|
||||
self._bot = self._app.bot
|
||||
|
||||
|
|
@ -2160,12 +2220,15 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
"""
|
||||
key = self._text_batch_key(event)
|
||||
existing = self._pending_text_batches.get(key)
|
||||
chunk_len = len(event.text or "")
|
||||
if existing is None:
|
||||
event._last_chunk_len = chunk_len # type: ignore[attr-defined]
|
||||
self._pending_text_batches[key] = event
|
||||
else:
|
||||
# Append text from the follow-up chunk
|
||||
if event.text:
|
||||
existing.text = f"{existing.text}\n{event.text}" if existing.text else event.text
|
||||
existing._last_chunk_len = chunk_len # type: ignore[attr-defined]
|
||||
# Merge any media that might be attached
|
||||
if event.media_urls:
|
||||
existing.media_urls.extend(event.media_urls)
|
||||
|
|
@ -2180,10 +2243,22 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
)
|
||||
|
||||
async def _flush_text_batch(self, key: str) -> None:
|
||||
"""Wait for the quiet period then dispatch the aggregated text."""
|
||||
"""Wait for the quiet period then dispatch the aggregated text.
|
||||
|
||||
Uses a longer delay when the latest chunk is near Telegram's 4096-char
|
||||
split point, since a continuation chunk is almost certain.
|
||||
"""
|
||||
current_task = asyncio.current_task()
|
||||
try:
|
||||
await asyncio.sleep(self._text_batch_delay_seconds)
|
||||
# Adaptive delay: if the latest chunk is near Telegram's 4096-char
|
||||
# split point, a continuation is almost certain — wait longer.
|
||||
pending = self._pending_text_batches.get(key)
|
||||
last_len = getattr(pending, "_last_chunk_len", 0) if pending else 0
|
||||
if last_len >= self._SPLIT_THRESHOLD:
|
||||
delay = self._text_batch_split_delay_seconds
|
||||
else:
|
||||
delay = self._text_batch_delay_seconds
|
||||
await asyncio.sleep(delay)
|
||||
event = self._pending_text_batches.pop(key, None)
|
||||
if not event:
|
||||
return
|
||||
|
|
@ -2713,7 +2788,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
if chat_id and message_id:
|
||||
await self._set_reaction(chat_id, message_id, "\U0001f440")
|
||||
|
||||
async def on_processing_complete(self, event: MessageEvent, success: bool) -> None:
|
||||
async def on_processing_complete(self, event: MessageEvent, outcome: ProcessingOutcome) -> None:
|
||||
"""Swap the in-progress reaction for a final success/failure reaction.
|
||||
|
||||
Unlike Discord (additive reactions), Telegram's set_message_reaction
|
||||
|
|
@ -2723,5 +2798,9 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
return
|
||||
chat_id = getattr(event.source, "chat_id", None)
|
||||
message_id = getattr(event, "message_id", None)
|
||||
if chat_id and message_id:
|
||||
await self._set_reaction(chat_id, message_id, "\u2705" if success else "\u274c")
|
||||
if chat_id and message_id and outcome != ProcessingOutcome.CANCELLED:
|
||||
await self._set_reaction(
|
||||
chat_id,
|
||||
message_id,
|
||||
"\U0001f44d" if outcome == ProcessingOutcome.SUCCESS else "\U0001f44e",
|
||||
)
|
||||
|
|
|
|||
|
|
@ -110,7 +110,8 @@ class TelegramFallbackTransport(httpx.AsyncBaseTransport):
|
|||
logger.warning("[Telegram] Fallback IP %s failed: %s", ip, exc)
|
||||
continue
|
||||
|
||||
assert last_error is not None
|
||||
if last_error is None:
|
||||
raise RuntimeError("All Telegram fallback IPs exhausted but no error was recorded")
|
||||
raise last_error
|
||||
|
||||
async def aclose(self) -> None:
|
||||
|
|
|
|||
|
|
@ -186,13 +186,23 @@ class WebhookAdapter(BasePlatformAdapter):
|
|||
if deliver_type == "github_comment":
|
||||
return await self._deliver_github_comment(content, delivery)
|
||||
|
||||
# Cross-platform delivery (telegram, discord, etc.)
|
||||
# Cross-platform delivery — any platform with a gateway adapter
|
||||
if self.gateway_runner and deliver_type in (
|
||||
"telegram",
|
||||
"discord",
|
||||
"slack",
|
||||
"signal",
|
||||
"sms",
|
||||
"whatsapp",
|
||||
"matrix",
|
||||
"mattermost",
|
||||
"homeassistant",
|
||||
"email",
|
||||
"dingtalk",
|
||||
"feishu",
|
||||
"wecom",
|
||||
"weixin",
|
||||
"bluebubbles",
|
||||
):
|
||||
return await self._deliver_cross_platform(
|
||||
deliver_type, content, delivery
|
||||
|
|
@ -262,7 +272,7 @@ class WebhookAdapter(BasePlatformAdapter):
|
|||
", ".join(self._dynamic_routes.keys()) or "(none)",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("[webhook] Failed to reload dynamic routes: %s", e)
|
||||
logger.error("[webhook] Failed to reload dynamic routes: %s", e)
|
||||
|
||||
async def _handle_webhook(self, request: "web.Request") -> "web.Response":
|
||||
"""POST /webhooks/{route_name} — receive and process a webhook event."""
|
||||
|
|
|
|||
|
|
@ -143,6 +143,9 @@ class WeComAdapter(BasePlatformAdapter):
|
|||
"""WeCom AI Bot adapter backed by a persistent WebSocket connection."""
|
||||
|
||||
MAX_MESSAGE_LENGTH = MAX_MESSAGE_LENGTH
|
||||
# Threshold for detecting WeCom client-side message splits.
|
||||
# When a chunk is near the 4000-char limit, a continuation is almost certain.
|
||||
_SPLIT_THRESHOLD = 3900
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
super().__init__(config, Platform.WECOM)
|
||||
|
|
@ -172,6 +175,13 @@ class WeComAdapter(BasePlatformAdapter):
|
|||
self._seen_messages: Dict[str, float] = {}
|
||||
self._reply_req_ids: Dict[str, str] = {}
|
||||
|
||||
# Text batching: merge rapid successive messages (Telegram-style).
|
||||
# WeCom clients split long messages around 4000 chars.
|
||||
self._text_batch_delay_seconds = float(os.getenv("HERMES_WECOM_TEXT_BATCH_DELAY_SECONDS", "0.6"))
|
||||
self._text_batch_split_delay_seconds = float(os.getenv("HERMES_WECOM_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0"))
|
||||
self._pending_text_batches: Dict[str, MessageEvent] = {}
|
||||
self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Connection lifecycle
|
||||
# ------------------------------------------------------------------
|
||||
|
|
@ -519,7 +529,82 @@ class WeComAdapter(BasePlatformAdapter):
|
|||
timestamp=datetime.now(tz=timezone.utc),
|
||||
)
|
||||
|
||||
await self.handle_message(event)
|
||||
# Only batch plain text messages — commands, media, etc. dispatch
|
||||
# immediately since they won't be split by the WeCom client.
|
||||
if message_type == MessageType.TEXT and self._text_batch_delay_seconds > 0:
|
||||
self._enqueue_text_event(event)
|
||||
else:
|
||||
await self.handle_message(event)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Text message aggregation (handles WeCom client-side splits)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _text_batch_key(self, event: MessageEvent) -> str:
|
||||
"""Session-scoped key for text message batching."""
|
||||
from gateway.session import build_session_key
|
||||
return build_session_key(
|
||||
event.source,
|
||||
group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
|
||||
thread_sessions_per_user=self.config.extra.get("thread_sessions_per_user", False),
|
||||
)
|
||||
|
||||
def _enqueue_text_event(self, event: MessageEvent) -> None:
|
||||
"""Buffer a text event and reset the flush timer.
|
||||
|
||||
When WeCom splits a long user message at 4000 chars, the chunks
|
||||
arrive within a few hundred milliseconds. This merges them into
|
||||
a single event before dispatching.
|
||||
"""
|
||||
key = self._text_batch_key(event)
|
||||
existing = self._pending_text_batches.get(key)
|
||||
chunk_len = len(event.text or "")
|
||||
if existing is None:
|
||||
event._last_chunk_len = chunk_len # type: ignore[attr-defined]
|
||||
self._pending_text_batches[key] = event
|
||||
else:
|
||||
if event.text:
|
||||
existing.text = f"{existing.text}\n{event.text}" if existing.text else event.text
|
||||
existing._last_chunk_len = chunk_len # type: ignore[attr-defined]
|
||||
# Merge any media that might be attached
|
||||
if event.media_urls:
|
||||
existing.media_urls.extend(event.media_urls)
|
||||
existing.media_types.extend(event.media_types)
|
||||
|
||||
# Cancel any pending flush and restart the timer
|
||||
prior_task = self._pending_text_batch_tasks.get(key)
|
||||
if prior_task and not prior_task.done():
|
||||
prior_task.cancel()
|
||||
self._pending_text_batch_tasks[key] = asyncio.create_task(
|
||||
self._flush_text_batch(key)
|
||||
)
|
||||
|
||||
async def _flush_text_batch(self, key: str) -> None:
|
||||
"""Wait for the quiet period then dispatch the aggregated text.
|
||||
|
||||
Uses a longer delay when the latest chunk is near WeCom's 4000-char
|
||||
split point, since a continuation chunk is almost certain.
|
||||
"""
|
||||
current_task = asyncio.current_task()
|
||||
try:
|
||||
pending = self._pending_text_batches.get(key)
|
||||
last_len = getattr(pending, "_last_chunk_len", 0) if pending else 0
|
||||
if last_len >= self._SPLIT_THRESHOLD:
|
||||
delay = self._text_batch_split_delay_seconds
|
||||
else:
|
||||
delay = self._text_batch_delay_seconds
|
||||
await asyncio.sleep(delay)
|
||||
event = self._pending_text_batches.pop(key, None)
|
||||
if not event:
|
||||
return
|
||||
logger.info(
|
||||
"[WeCom] Flushing text batch %s (%d chars)",
|
||||
key, len(event.text or ""),
|
||||
)
|
||||
await self.handle_message(event)
|
||||
finally:
|
||||
if self._pending_text_batch_tasks.get(key) is current_task:
|
||||
self._pending_text_batch_tasks.pop(key, None)
|
||||
|
||||
@staticmethod
|
||||
def _extract_text(body: Dict[str, Any]) -> Tuple[str, Optional[str]]:
|
||||
|
|
@ -611,7 +696,11 @@ class WeComAdapter(BasePlatformAdapter):
|
|||
|
||||
if kind == "image":
|
||||
ext = self._detect_image_ext(raw)
|
||||
return cache_image_from_bytes(raw, ext), self._mime_for_ext(ext, fallback="image/jpeg")
|
||||
try:
|
||||
return cache_image_from_bytes(raw, ext), self._mime_for_ext(ext, fallback="image/jpeg")
|
||||
except ValueError as exc:
|
||||
logger.warning("[%s] Rejected non-image bytes: %s", self.name, exc)
|
||||
return None
|
||||
|
||||
filename = str(media.get("filename") or media.get("name") or "wecom_file")
|
||||
return cache_document_from_bytes(raw, filename), mimetypes.guess_type(filename)[0] or "application/octet-stream"
|
||||
|
|
@ -637,7 +726,11 @@ class WeComAdapter(BasePlatformAdapter):
|
|||
content_type = str(headers.get("content-type") or "").split(";", 1)[0].strip() or "application/octet-stream"
|
||||
if kind == "image":
|
||||
ext = self._guess_extension(url, content_type, fallback=self._detect_image_ext(raw))
|
||||
return cache_image_from_bytes(raw, ext), content_type or self._mime_for_ext(ext, fallback="image/jpeg")
|
||||
try:
|
||||
return cache_image_from_bytes(raw, ext), content_type or self._mime_for_ext(ext, fallback="image/jpeg")
|
||||
except ValueError as exc:
|
||||
logger.warning("[%s] Rejected non-image bytes from %s: %s", self.name, url, exc)
|
||||
return None
|
||||
|
||||
filename = self._guess_filename(url, headers.get("content-disposition"), content_type)
|
||||
return cache_document_from_bytes(raw, filename), content_type
|
||||
|
|
|
|||
1669
gateway/platforms/weixin.py
Normal file
1669
gateway/platforms/weixin.py
Normal file
File diff suppressed because it is too large
Load diff
20
gateway/restart.py
Normal file
20
gateway/restart.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
"""Shared gateway restart constants and parsing helpers."""
|
||||
|
||||
from hermes_cli.config import DEFAULT_CONFIG
|
||||
|
||||
# EX_TEMPFAIL from sysexits.h — used to ask the service manager to restart
|
||||
# the gateway after a graceful drain/reload path completes.
|
||||
GATEWAY_SERVICE_RESTART_EXIT_CODE = 75
|
||||
|
||||
DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT = float(
|
||||
DEFAULT_CONFIG["agent"]["restart_drain_timeout"]
|
||||
)
|
||||
|
||||
|
||||
def parse_restart_drain_timeout(raw: object) -> float:
|
||||
"""Parse a configured drain timeout, falling back to the shared default."""
|
||||
try:
|
||||
value = float(raw) if str(raw or "").strip() else DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT
|
||||
except (TypeError, ValueError):
|
||||
return DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT
|
||||
return max(0.0, value)
|
||||
1157
gateway/run.py
1157
gateway/run.py
File diff suppressed because it is too large
Load diff
|
|
@ -32,9 +32,6 @@ def _now() -> datetime:
|
|||
# PII redaction helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_PHONE_RE = re.compile(r"^\+?\d[\d\-\s]{6,}$")
|
||||
|
||||
|
||||
def _hash_id(value: str) -> str:
|
||||
"""Deterministic 12-char hex hash of an identifier."""
|
||||
return hashlib.sha256(value.encode("utf-8")).hexdigest()[:12]
|
||||
|
|
@ -58,10 +55,6 @@ def _hash_chat_id(value: str) -> str:
|
|||
return _hash_id(value)
|
||||
|
||||
|
||||
def _looks_like_phone(value: str) -> bool:
|
||||
"""Return True if *value* looks like a phone number (E.164 or similar)."""
|
||||
return bool(_PHONE_RE.match(value.strip()))
|
||||
|
||||
from .config import (
|
||||
Platform,
|
||||
GatewayConfig,
|
||||
|
|
@ -144,15 +137,6 @@ class SessionSource:
|
|||
chat_id_alt=data.get("chat_id_alt"),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def local_cli(cls) -> "SessionSource":
|
||||
"""Create a source representing the local CLI."""
|
||||
return cls(
|
||||
platform=Platform.LOCAL,
|
||||
chat_id="cli",
|
||||
chat_name="CLI terminal",
|
||||
chat_type="dm",
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -510,8 +494,7 @@ class SessionStore:
|
|||
"""
|
||||
|
||||
def __init__(self, sessions_dir: Path, config: GatewayConfig,
|
||||
has_active_processes_fn=None,
|
||||
on_auto_reset=None):
|
||||
has_active_processes_fn=None):
|
||||
self.sessions_dir = sessions_dir
|
||||
self.config = config
|
||||
self._entries: Dict[str, SessionEntry] = {}
|
||||
|
|
@ -770,41 +753,6 @@ class SessionStore:
|
|||
except Exception as e:
|
||||
print(f"[gateway] Warning: Failed to create SQLite session: {e}")
|
||||
|
||||
# Seed new DM thread sessions with parent DM session history.
|
||||
# When a bot reply creates a Slack thread and the user responds in it,
|
||||
# the thread gets a new session (keyed by thread_ts). Without seeding,
|
||||
# the thread session starts with zero context — the user's original
|
||||
# question and the bot's answer are invisible. Fix: copy the parent
|
||||
# DM session's transcript into the new thread session so context carries
|
||||
# over while still keeping threads isolated from each other.
|
||||
if (
|
||||
source.chat_type == "dm"
|
||||
and source.thread_id
|
||||
and entry.created_at == entry.updated_at # brand-new session
|
||||
and not was_auto_reset
|
||||
):
|
||||
parent_source = SessionSource(
|
||||
platform=source.platform,
|
||||
chat_id=source.chat_id,
|
||||
chat_type="dm",
|
||||
user_id=source.user_id,
|
||||
# no thread_id — this is the parent DM session
|
||||
)
|
||||
parent_key = self._generate_session_key(parent_source)
|
||||
with self._lock:
|
||||
parent_entry = self._entries.get(parent_key)
|
||||
if parent_entry and parent_entry.session_id != entry.session_id:
|
||||
try:
|
||||
parent_history = self.load_transcript(parent_entry.session_id)
|
||||
if parent_history:
|
||||
self.rewrite_transcript(entry.session_id, parent_history)
|
||||
logger.info(
|
||||
"[Session] Seeded DM thread session %s with %d messages from parent %s",
|
||||
entry.session_id, len(parent_history), parent_entry.session_id,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("[Session] Failed to seed thread session: %s", e)
|
||||
|
||||
return entry
|
||||
|
||||
def update_session(
|
||||
|
|
|
|||
113
gateway/session_context.py
Normal file
113
gateway/session_context.py
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
"""
|
||||
Session-scoped context variables for the Hermes gateway.
|
||||
|
||||
Replaces the previous ``os.environ``-based session state
|
||||
(``HERMES_SESSION_PLATFORM``, ``HERMES_SESSION_CHAT_ID``, etc.) with
|
||||
Python's ``contextvars.ContextVar``.
|
||||
|
||||
**Why this matters**
|
||||
|
||||
The gateway processes messages concurrently via ``asyncio``. When two
|
||||
messages arrive at the same time the old code did:
|
||||
|
||||
os.environ["HERMES_SESSION_THREAD_ID"] = str(context.source.thread_id)
|
||||
|
||||
Because ``os.environ`` is *process-global*, Message A's value was
|
||||
silently overwritten by Message B before Message A's agent finished
|
||||
running. Background-task notifications and tool calls therefore routed
|
||||
to the wrong thread.
|
||||
|
||||
``contextvars.ContextVar`` values are *task-local*: each ``asyncio``
|
||||
task (and any ``run_in_executor`` thread it spawns) gets its own copy,
|
||||
so concurrent messages never interfere.
|
||||
|
||||
**Backward compatibility**
|
||||
|
||||
The public helper ``get_session_env(name, default="")`` mirrors the old
|
||||
``os.getenv("HERMES_SESSION_*", ...)`` calls. Existing tool code only
|
||||
needs to replace the import + call site:
|
||||
|
||||
# before
|
||||
import os
|
||||
platform = os.getenv("HERMES_SESSION_PLATFORM", "")
|
||||
|
||||
# after
|
||||
from gateway.session_context import get_session_env
|
||||
platform = get_session_env("HERMES_SESSION_PLATFORM", "")
|
||||
"""
|
||||
|
||||
from contextvars import ContextVar
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-task session variables
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_SESSION_PLATFORM: ContextVar[str] = ContextVar("HERMES_SESSION_PLATFORM", default="")
|
||||
_SESSION_CHAT_ID: ContextVar[str] = ContextVar("HERMES_SESSION_CHAT_ID", default="")
|
||||
_SESSION_CHAT_NAME: ContextVar[str] = ContextVar("HERMES_SESSION_CHAT_NAME", default="")
|
||||
_SESSION_THREAD_ID: ContextVar[str] = ContextVar("HERMES_SESSION_THREAD_ID", default="")
|
||||
|
||||
_VAR_MAP = {
|
||||
"HERMES_SESSION_PLATFORM": _SESSION_PLATFORM,
|
||||
"HERMES_SESSION_CHAT_ID": _SESSION_CHAT_ID,
|
||||
"HERMES_SESSION_CHAT_NAME": _SESSION_CHAT_NAME,
|
||||
"HERMES_SESSION_THREAD_ID": _SESSION_THREAD_ID,
|
||||
}
|
||||
|
||||
|
||||
def set_session_vars(
|
||||
platform: str = "",
|
||||
chat_id: str = "",
|
||||
chat_name: str = "",
|
||||
thread_id: str = "",
|
||||
) -> list:
|
||||
"""Set all session context variables and return reset tokens.
|
||||
|
||||
Call ``clear_session_vars(tokens)`` in a ``finally`` block to restore
|
||||
the previous values when the handler exits.
|
||||
|
||||
Returns a list of ``Token`` objects (one per variable) that can be
|
||||
passed to ``clear_session_vars``.
|
||||
"""
|
||||
tokens = [
|
||||
_SESSION_PLATFORM.set(platform),
|
||||
_SESSION_CHAT_ID.set(chat_id),
|
||||
_SESSION_CHAT_NAME.set(chat_name),
|
||||
_SESSION_THREAD_ID.set(thread_id),
|
||||
]
|
||||
return tokens
|
||||
|
||||
|
||||
def clear_session_vars(tokens: list) -> None:
|
||||
"""Restore session context variables to their pre-handler values."""
|
||||
if not tokens:
|
||||
return
|
||||
vars_in_order = [
|
||||
_SESSION_PLATFORM,
|
||||
_SESSION_CHAT_ID,
|
||||
_SESSION_CHAT_NAME,
|
||||
_SESSION_THREAD_ID,
|
||||
]
|
||||
for var, token in zip(vars_in_order, tokens):
|
||||
var.reset(token)
|
||||
|
||||
|
||||
def get_session_env(name: str, default: str = "") -> str:
|
||||
"""Read a session context variable by its legacy ``HERMES_SESSION_*`` name.
|
||||
|
||||
Drop-in replacement for ``os.getenv("HERMES_SESSION_*", default)``.
|
||||
|
||||
Resolution order:
|
||||
1. Context variable (set by the gateway for concurrency-safe access)
|
||||
2. ``os.environ`` (used by CLI, cron scheduler, and tests)
|
||||
3. *default*
|
||||
"""
|
||||
import os
|
||||
|
||||
var = _VAR_MAP.get(name)
|
||||
if var is not None:
|
||||
value = var.get()
|
||||
if value:
|
||||
return value
|
||||
# Fall back to os.environ for CLI, cron, and test compatibility
|
||||
return os.getenv(name, default)
|
||||
|
|
@ -14,6 +14,8 @@ concurrently under distinct configurations).
|
|||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
|
@ -23,6 +25,7 @@ from typing import Any, Optional
|
|||
_GATEWAY_KIND = "hermes-gateway"
|
||||
_RUNTIME_STATUS_FILE = "gateway_state.json"
|
||||
_LOCKS_DIRNAME = "gateway-locks"
|
||||
_IS_WINDOWS = sys.platform == "win32"
|
||||
|
||||
|
||||
def _get_pid_path() -> Path:
|
||||
|
|
@ -49,6 +52,33 @@ def _utc_now_iso() -> str:
|
|||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def terminate_pid(pid: int, *, force: bool = False) -> None:
|
||||
"""Terminate a PID with platform-appropriate force semantics.
|
||||
|
||||
POSIX uses SIGTERM/SIGKILL. Windows uses taskkill /T /F for true force-kill
|
||||
because os.kill(..., SIGTERM) is not equivalent to a tree-killing hard stop.
|
||||
"""
|
||||
if force and _IS_WINDOWS:
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["taskkill", "/PID", str(pid), "/T", "/F"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10,
|
||||
)
|
||||
except FileNotFoundError:
|
||||
os.kill(pid, signal.SIGTERM)
|
||||
return
|
||||
|
||||
if result.returncode != 0:
|
||||
details = (result.stderr or result.stdout or "").strip()
|
||||
raise OSError(details or f"taskkill failed for PID {pid}")
|
||||
return
|
||||
|
||||
sig = signal.SIGTERM if not force else getattr(signal, "SIGKILL", signal.SIGTERM)
|
||||
os.kill(pid, sig)
|
||||
|
||||
|
||||
def _scope_hash(identity: str) -> str:
|
||||
return hashlib.sha256(identity.encode("utf-8")).hexdigest()[:16]
|
||||
|
||||
|
|
@ -128,6 +158,8 @@ def _build_runtime_status_record() -> dict[str, Any]:
|
|||
payload.update({
|
||||
"gateway_state": "starting",
|
||||
"exit_reason": None,
|
||||
"restart_requested": False,
|
||||
"active_agents": 0,
|
||||
"platforms": {},
|
||||
"updated_at": _utc_now_iso(),
|
||||
})
|
||||
|
|
@ -188,6 +220,8 @@ def write_runtime_status(
|
|||
*,
|
||||
gateway_state: Optional[str] = None,
|
||||
exit_reason: Optional[str] = None,
|
||||
restart_requested: Optional[bool] = None,
|
||||
active_agents: Optional[int] = None,
|
||||
platform: Optional[str] = None,
|
||||
platform_state: Optional[str] = None,
|
||||
error_code: Optional[str] = None,
|
||||
|
|
@ -206,6 +240,10 @@ def write_runtime_status(
|
|||
payload["gateway_state"] = gateway_state
|
||||
if exit_reason is not None:
|
||||
payload["exit_reason"] = exit_reason
|
||||
if restart_requested is not None:
|
||||
payload["restart_requested"] = bool(restart_requested)
|
||||
if active_agents is not None:
|
||||
payload["active_agents"] = max(0, int(active_agents))
|
||||
|
||||
if platform is not None:
|
||||
platform_payload = payload["platforms"].get(platform, {})
|
||||
|
|
|
|||
|
|
@ -205,11 +205,20 @@ class GatewayStreamConsumer:
|
|||
await self._send_or_edit(self._accumulated)
|
||||
return
|
||||
|
||||
# Tool boundary: the should_edit block above already flushed
|
||||
# accumulated text without a cursor. Reset state so the next
|
||||
# text chunk creates a fresh message below any tool-progress
|
||||
# messages the gateway sent in between.
|
||||
if got_segment_break:
|
||||
# Tool boundary: reset message state so the next text chunk
|
||||
# creates a fresh message below any tool-progress messages.
|
||||
#
|
||||
# Exception: when _message_id is "__no_edit__" the platform
|
||||
# never returned a real message ID (e.g. Signal, webhook with
|
||||
# github_comment delivery). Resetting to None would re-enter
|
||||
# the "first send" path on every tool boundary and post one
|
||||
# platform message per tool call — that is what caused 155
|
||||
# comments under a single PR. Instead, keep all state so the
|
||||
# full continuation is delivered once via _send_fallback_final.
|
||||
# (When editing fails mid-stream due to flood control the id is
|
||||
# a real string like "msg_1", not "__no_edit__", so that case
|
||||
# still resets and creates a fresh segment as intended.)
|
||||
if got_segment_break and self._message_id != "__no_edit__":
|
||||
self._message_id = None
|
||||
self._accumulated = ""
|
||||
self._last_sent_text = ""
|
||||
|
|
|
|||
|
|
@ -70,7 +70,6 @@ DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
|
|||
DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1"
|
||||
DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
|
||||
DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
|
||||
DEFAULT_GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai"
|
||||
CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
|
||||
CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
|
||||
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
|
||||
|
|
@ -199,6 +198,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
|||
api_key_env_vars=("DEEPSEEK_API_KEY",),
|
||||
base_url_env_var="DEEPSEEK_BASE_URL",
|
||||
),
|
||||
"xai": ProviderConfig(
|
||||
id="xai",
|
||||
name="xAI",
|
||||
auth_type="api_key",
|
||||
inference_base_url="https://api.x.ai/v1",
|
||||
api_key_env_vars=("XAI_API_KEY",),
|
||||
base_url_env_var="XAI_BASE_URL",
|
||||
),
|
||||
"ai-gateway": ProviderConfig(
|
||||
id="ai-gateway",
|
||||
name="AI Gateway",
|
||||
|
|
@ -705,6 +712,27 @@ def write_credential_pool(provider_id: str, entries: List[Dict[str, Any]]) -> Pa
|
|||
return _save_auth_store(auth_store)
|
||||
|
||||
|
||||
def suppress_credential_source(provider_id: str, source: str) -> None:
|
||||
"""Mark a credential source as suppressed so it won't be re-seeded."""
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
suppressed = auth_store.setdefault("suppressed_sources", {})
|
||||
provider_list = suppressed.setdefault(provider_id, [])
|
||||
if source not in provider_list:
|
||||
provider_list.append(source)
|
||||
_save_auth_store(auth_store)
|
||||
|
||||
|
||||
def is_source_suppressed(provider_id: str, source: str) -> bool:
|
||||
"""Check if a credential source has been suppressed by the user."""
|
||||
try:
|
||||
auth_store = _load_auth_store()
|
||||
suppressed = auth_store.get("suppressed_sources", {})
|
||||
return source in suppressed.get(provider_id, [])
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def get_provider_auth_state(provider_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""Return persisted auth state for a provider, or None."""
|
||||
auth_store = _load_auth_store()
|
||||
|
|
@ -717,6 +745,57 @@ def get_active_provider() -> Optional[str]:
|
|||
return auth_store.get("active_provider")
|
||||
|
||||
|
||||
def is_provider_explicitly_configured(provider_id: str) -> bool:
|
||||
"""Return True only if the user has explicitly configured this provider.
|
||||
|
||||
Checks:
|
||||
1. active_provider in auth.json matches
|
||||
2. model.provider in config.yaml matches
|
||||
3. Provider-specific env vars are set (e.g. ANTHROPIC_API_KEY)
|
||||
|
||||
This is used to gate auto-discovery of external credentials (e.g.
|
||||
Claude Code's ~/.claude/.credentials.json) so they are never used
|
||||
without the user's explicit choice. See PR #4210 for the same
|
||||
pattern applied to the setup wizard gate.
|
||||
"""
|
||||
normalized = (provider_id or "").strip().lower()
|
||||
|
||||
# 1. Check auth.json active_provider
|
||||
try:
|
||||
auth_store = _load_auth_store()
|
||||
active = (auth_store.get("active_provider") or "").strip().lower()
|
||||
if active and active == normalized:
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 2. Check config.yaml model.provider
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
model_cfg = cfg.get("model")
|
||||
if isinstance(model_cfg, dict):
|
||||
cfg_provider = (model_cfg.get("provider") or "").strip().lower()
|
||||
if cfg_provider == normalized:
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 3. Check provider-specific env vars
|
||||
# Exclude CLAUDE_CODE_OAUTH_TOKEN — it's set by Claude Code itself,
|
||||
# not by the user explicitly configuring anthropic in Hermes.
|
||||
_IMPLICIT_ENV_VARS = {"CLAUDE_CODE_OAUTH_TOKEN"}
|
||||
pconfig = PROVIDER_REGISTRY.get(normalized)
|
||||
if pconfig and pconfig.auth_type == "api_key":
|
||||
for env_var in pconfig.api_key_env_vars:
|
||||
if env_var in _IMPLICIT_ENV_VARS:
|
||||
continue
|
||||
if has_usable_secret(os.getenv(env_var, "")):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def clear_provider_auth(provider_id: Optional[str] = None) -> bool:
|
||||
"""
|
||||
Clear auth state for a provider. Used by `hermes logout`.
|
||||
|
|
@ -819,7 +898,7 @@ def resolve_provider(
|
|||
_PROVIDER_ALIASES = {
|
||||
"glm": "zai", "z-ai": "zai", "z.ai": "zai", "zhipu": "zai",
|
||||
"google": "gemini", "google-gemini": "gemini", "google-ai-studio": "gemini",
|
||||
"kimi": "kimi-coding", "moonshot": "kimi-coding",
|
||||
"kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding",
|
||||
"minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
|
||||
"claude": "anthropic", "claude-code": "anthropic",
|
||||
"github": "copilot", "github-copilot": "copilot",
|
||||
|
|
@ -1442,7 +1521,15 @@ def _resolve_verify(
|
|||
if effective_insecure:
|
||||
return False
|
||||
if effective_ca:
|
||||
return str(effective_ca)
|
||||
ca_path = str(effective_ca)
|
||||
if not os.path.isfile(ca_path):
|
||||
import logging
|
||||
logging.getLogger("hermes.auth").warning(
|
||||
"CA bundle path does not exist: %s — falling back to default certificates",
|
||||
ca_path,
|
||||
)
|
||||
return True
|
||||
return ca_path
|
||||
return True
|
||||
|
||||
|
||||
|
|
@ -2342,33 +2429,6 @@ def resolve_external_process_provider_credentials(provider_id: str) -> Dict[str,
|
|||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# External credential detection
|
||||
# =============================================================================
|
||||
|
||||
def detect_external_credentials() -> List[Dict[str, Any]]:
|
||||
"""Scan for credentials from other CLI tools that Hermes can reuse.
|
||||
|
||||
Returns a list of dicts, each with:
|
||||
- provider: str -- Hermes provider id (e.g. "openai-codex")
|
||||
- path: str -- filesystem path where creds were found
|
||||
- label: str -- human-friendly description for the setup UI
|
||||
"""
|
||||
found: List[Dict[str, Any]] = []
|
||||
|
||||
# Codex CLI: ~/.codex/auth.json (importable, not shared)
|
||||
cli_tokens = _import_codex_cli_tokens()
|
||||
if cli_tokens:
|
||||
codex_path = Path.home() / ".codex" / "auth.json"
|
||||
found.append({
|
||||
"provider": "openai-codex",
|
||||
"path": str(codex_path),
|
||||
"label": f"Codex CLI credentials found ({codex_path}) — run `hermes auth` to create a separate session",
|
||||
})
|
||||
|
||||
return found
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# CLI Commands — login / logout
|
||||
# =============================================================================
|
||||
|
|
@ -2572,6 +2632,8 @@ def _prompt_model_selection(
|
|||
title=effective_title,
|
||||
)
|
||||
idx = menu.show()
|
||||
from hermes_cli.curses_ui import flush_stdin
|
||||
flush_stdin()
|
||||
if idx is None:
|
||||
return None
|
||||
print()
|
||||
|
|
@ -2581,7 +2643,7 @@ def _prompt_model_selection(
|
|||
custom = input("Enter model name: ").strip()
|
||||
return custom if custom else None
|
||||
return None
|
||||
except (ImportError, NotImplementedError):
|
||||
except (ImportError, NotImplementedError, OSError, subprocess.SubprocessError):
|
||||
pass
|
||||
|
||||
# Fallback: numbered list
|
||||
|
|
|
|||
|
|
@ -347,8 +347,11 @@ def auth_remove_command(args) -> None:
|
|||
print("Cleared Hermes Anthropic OAuth credentials")
|
||||
|
||||
elif removed.source == "claude_code" and provider == "anthropic":
|
||||
print("Note: Claude Code credentials live in ~/.claude/.credentials.json")
|
||||
print(" Remove them manually if you want to deauthorize Claude Code.")
|
||||
from hermes_cli.auth import suppress_credential_source
|
||||
suppress_credential_source(provider, "claude_code")
|
||||
print("Suppressed claude_code credential — it will not be re-seeded.")
|
||||
print("Note: Claude Code credentials still live in ~/.claude/.credentials.json")
|
||||
print("Run `hermes auth add anthropic` to re-enable if needed.")
|
||||
|
||||
|
||||
def auth_reset_command(args) -> None:
|
||||
|
|
|
|||
|
|
@ -90,12 +90,6 @@ HERMES_CADUCEUS = """[#CD7F32]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣀⡀⠀⣀⣀
|
|||
[#B8860B]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠈⠳⠈⣡⠞⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
|
||||
[#B8860B]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠈⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]"""
|
||||
|
||||
COMPACT_BANNER = """
|
||||
[bold #FFD700]╔══════════════════════════════════════════════════════════════╗[/]
|
||||
[bold #FFD700]║[/] [#FFBF00]⚕ NOUS HERMES[/] [dim #B8860B]- AI Agent Framework[/] [bold #FFD700]║[/]
|
||||
[bold #FFD700]║[/] [#CD7F32]Messenger of the Digital Gods[/] [dim #B8860B]Nous Research[/] [bold #FFD700]║[/]
|
||||
[bold #FFD700]╚══════════════════════════════════════════════════════════════╝[/]
|
||||
"""
|
||||
|
||||
|
||||
# =========================================================================
|
||||
|
|
|
|||
|
|
@ -1,140 +0,0 @@
|
|||
"""Shared curses-based multi-select checklist for Hermes CLI.
|
||||
|
||||
Used by both ``hermes tools`` and ``hermes skills`` to present a
|
||||
toggleable list of items. Falls back to a numbered text UI when
|
||||
curses is unavailable (Windows without curses, piped stdin, etc.).
|
||||
"""
|
||||
|
||||
import sys
|
||||
from typing import List, Set
|
||||
|
||||
from hermes_cli.colors import Colors, color
|
||||
|
||||
|
||||
def curses_checklist(
|
||||
title: str,
|
||||
items: List[str],
|
||||
pre_selected: Set[int],
|
||||
) -> Set[int]:
|
||||
"""Multi-select checklist. Returns set of **selected** indices.
|
||||
|
||||
Args:
|
||||
title: Header text shown at the top of the checklist.
|
||||
items: Display labels for each row.
|
||||
pre_selected: Indices that start checked.
|
||||
|
||||
Returns:
|
||||
The indices the user confirmed as checked. On cancel (ESC/q),
|
||||
returns ``pre_selected`` unchanged.
|
||||
"""
|
||||
# Safety: return defaults when stdin is not a terminal.
|
||||
if not sys.stdin.isatty():
|
||||
return set(pre_selected)
|
||||
|
||||
try:
|
||||
import curses
|
||||
selected = set(pre_selected)
|
||||
result = [None]
|
||||
|
||||
def _ui(stdscr):
|
||||
curses.curs_set(0)
|
||||
if curses.has_colors():
|
||||
curses.start_color()
|
||||
curses.use_default_colors()
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1)
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1)
|
||||
curses.init_pair(3, 8, -1) # dim gray
|
||||
cursor = 0
|
||||
scroll_offset = 0
|
||||
|
||||
while True:
|
||||
stdscr.clear()
|
||||
max_y, max_x = stdscr.getmaxyx()
|
||||
|
||||
# Header
|
||||
try:
|
||||
hattr = curses.A_BOLD | (curses.color_pair(2) if curses.has_colors() else 0)
|
||||
stdscr.addnstr(0, 0, title, max_x - 1, hattr)
|
||||
stdscr.addnstr(
|
||||
1, 0,
|
||||
" ↑↓ navigate SPACE toggle ENTER confirm ESC cancel",
|
||||
max_x - 1, curses.A_DIM,
|
||||
)
|
||||
except curses.error:
|
||||
pass
|
||||
|
||||
# Scrollable item list
|
||||
visible_rows = max_y - 3
|
||||
if cursor < scroll_offset:
|
||||
scroll_offset = cursor
|
||||
elif cursor >= scroll_offset + visible_rows:
|
||||
scroll_offset = cursor - visible_rows + 1
|
||||
|
||||
for draw_i, i in enumerate(
|
||||
range(scroll_offset, min(len(items), scroll_offset + visible_rows))
|
||||
):
|
||||
y = draw_i + 3
|
||||
if y >= max_y - 1:
|
||||
break
|
||||
check = "✓" if i in selected else " "
|
||||
arrow = "→" if i == cursor else " "
|
||||
line = f" {arrow} [{check}] {items[i]}"
|
||||
|
||||
attr = curses.A_NORMAL
|
||||
if i == cursor:
|
||||
attr = curses.A_BOLD
|
||||
if curses.has_colors():
|
||||
attr |= curses.color_pair(1)
|
||||
try:
|
||||
stdscr.addnstr(y, 0, line, max_x - 1, attr)
|
||||
except curses.error:
|
||||
pass
|
||||
|
||||
stdscr.refresh()
|
||||
key = stdscr.getch()
|
||||
|
||||
if key in (curses.KEY_UP, ord("k")):
|
||||
cursor = (cursor - 1) % len(items)
|
||||
elif key in (curses.KEY_DOWN, ord("j")):
|
||||
cursor = (cursor + 1) % len(items)
|
||||
elif key == ord(" "):
|
||||
selected.symmetric_difference_update({cursor})
|
||||
elif key in (curses.KEY_ENTER, 10, 13):
|
||||
result[0] = set(selected)
|
||||
return
|
||||
elif key in (27, ord("q")):
|
||||
result[0] = set(pre_selected)
|
||||
return
|
||||
|
||||
curses.wrapper(_ui)
|
||||
return result[0] if result[0] is not None else set(pre_selected)
|
||||
|
||||
except Exception:
|
||||
pass # fall through to numbered fallback
|
||||
|
||||
# ── Numbered text fallback ────────────────────────────────────────────
|
||||
selected = set(pre_selected)
|
||||
print(color(f"\n {title}", Colors.YELLOW))
|
||||
print(color(" Toggle by number, Enter to confirm.\n", Colors.DIM))
|
||||
|
||||
while True:
|
||||
for i, label in enumerate(items):
|
||||
check = "✓" if i in selected else " "
|
||||
print(f" {i + 1:3}. [{check}] {label}")
|
||||
print()
|
||||
|
||||
try:
|
||||
raw = input(color(" Number to toggle, 's' to save, 'q' to cancel: ", Colors.DIM)).strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
return set(pre_selected)
|
||||
|
||||
if raw.lower() == "s" or raw == "":
|
||||
return selected
|
||||
if raw.lower() == "q":
|
||||
return set(pre_selected)
|
||||
try:
|
||||
idx = int(raw) - 1
|
||||
if 0 <= idx < len(items):
|
||||
selected.symmetric_difference_update({idx})
|
||||
except ValueError:
|
||||
print(color(" Invalid input", Colors.DIM))
|
||||
|
|
@ -19,10 +19,9 @@ import subprocess
|
|||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
from hermes_constants import is_wsl as _is_wsl
|
||||
|
||||
# Cache WSL detection (checked once per process)
|
||||
_wsl_detected: bool | None = None
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def save_clipboard_image(dest: Path) -> bool:
|
||||
|
|
@ -218,19 +217,6 @@ def _windows_save(dest: Path) -> bool:
|
|||
|
||||
# ── Linux ────────────────────────────────────────────────────────────────
|
||||
|
||||
def _is_wsl() -> bool:
|
||||
"""Detect if running inside WSL (1 or 2)."""
|
||||
global _wsl_detected
|
||||
if _wsl_detected is not None:
|
||||
return _wsl_detected
|
||||
try:
|
||||
with open("/proc/version", "r") as f:
|
||||
_wsl_detected = "microsoft" in f.read().lower()
|
||||
except Exception:
|
||||
_wsl_detected = False
|
||||
return _wsl_detected
|
||||
|
||||
|
||||
def _linux_save(dest: Path) -> bool:
|
||||
"""Try clipboard backends in priority order: WSL → Wayland → X11."""
|
||||
if _is_wsl():
|
||||
|
|
|
|||
|
|
@ -16,8 +16,18 @@ from collections.abc import Callable, Mapping
|
|||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
from prompt_toolkit.auto_suggest import AutoSuggest, Suggestion
|
||||
from prompt_toolkit.completion import Completer, Completion
|
||||
# prompt_toolkit is an optional CLI dependency — only needed for
|
||||
# SlashCommandCompleter and SlashCommandAutoSuggest. Gateway and test
|
||||
# environments that lack it must still be able to import this module
|
||||
# for resolve_command, gateway_help_lines, and COMMAND_REGISTRY.
|
||||
try:
|
||||
from prompt_toolkit.auto_suggest import AutoSuggest, Suggestion
|
||||
from prompt_toolkit.completion import Completer, Completion
|
||||
except ImportError: # pragma: no cover
|
||||
AutoSuggest = object # type: ignore[assignment,misc]
|
||||
Completer = object # type: ignore[assignment,misc]
|
||||
Suggestion = None # type: ignore[assignment]
|
||||
Completion = None # type: ignore[assignment]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -75,8 +85,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
|||
aliases=("tasks",)),
|
||||
CommandDef("queue", "Queue a prompt for the next turn (doesn't interrupt)", "Session",
|
||||
aliases=("q",), args_hint="<prompt>"),
|
||||
CommandDef("status", "Show session info", "Session",
|
||||
gateway_only=True),
|
||||
CommandDef("status", "Show session info", "Session"),
|
||||
CommandDef("profile", "Show active profile name and home directory", "Info"),
|
||||
CommandDef("sethome", "Set this chat as the home channel", "Session",
|
||||
gateway_only=True, aliases=("set-home",)),
|
||||
|
|
@ -102,6 +111,9 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
|||
CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",
|
||||
args_hint="[level|show|hide]",
|
||||
subcommands=("none", "minimal", "low", "medium", "high", "xhigh", "show", "hide", "on", "off")),
|
||||
CommandDef("fast", "Toggle fast mode — OpenAI Priority Processing / Anthropic Fast Mode (Normal/Fast)", "Configuration",
|
||||
args_hint="[normal|fast|status]",
|
||||
subcommands=("normal", "fast", "status", "on", "off")),
|
||||
CommandDef("skin", "Show or change the display skin/theme", "Configuration",
|
||||
args_hint="[name]"),
|
||||
CommandDef("voice", "Toggle voice mode", "Configuration",
|
||||
|
|
@ -130,6 +142,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
|||
CommandDef("commands", "Browse all commands and skills (paginated)", "Info",
|
||||
gateway_only=True, args_hint="[page]"),
|
||||
CommandDef("help", "Show available commands", "Info"),
|
||||
CommandDef("restart", "Gracefully restart the gateway after draining active runs", "Session",
|
||||
gateway_only=True),
|
||||
CommandDef("usage", "Show token usage and rate limits for the current session", "Info"),
|
||||
CommandDef("insights", "Show usage insights and analytics", "Info",
|
||||
args_hint="[days]"),
|
||||
|
|
@ -175,12 +189,6 @@ def resolve_command(name: str) -> CommandDef | None:
|
|||
return _COMMAND_LOOKUP.get(name.lower().lstrip("/"))
|
||||
|
||||
|
||||
def register_plugin_command(cmd: CommandDef) -> None:
|
||||
"""Append a plugin-defined command to the registry and refresh lookups."""
|
||||
COMMAND_REGISTRY.append(cmd)
|
||||
rebuild_lookups()
|
||||
|
||||
|
||||
def rebuild_lookups() -> None:
|
||||
"""Rebuild all derived lookup dicts from the current COMMAND_REGISTRY.
|
||||
|
||||
|
|
@ -643,8 +651,18 @@ class SlashCommandCompleter(Completer):
|
|||
def __init__(
|
||||
self,
|
||||
skill_commands_provider: Callable[[], Mapping[str, dict[str, Any]]] | None = None,
|
||||
command_filter: Callable[[str], bool] | None = None,
|
||||
) -> None:
|
||||
self._skill_commands_provider = skill_commands_provider
|
||||
self._command_filter = command_filter
|
||||
|
||||
def _command_allowed(self, slash_command: str) -> bool:
|
||||
if self._command_filter is None:
|
||||
return True
|
||||
try:
|
||||
return bool(self._command_filter(slash_command))
|
||||
except Exception:
|
||||
return True
|
||||
|
||||
def _iter_skill_commands(self) -> Mapping[str, dict[str, Any]]:
|
||||
if self._skill_commands_provider is None:
|
||||
|
|
@ -943,7 +961,7 @@ class SlashCommandCompleter(Completer):
|
|||
return
|
||||
|
||||
# Static subcommand completions
|
||||
if " " not in sub_text and base_cmd in SUBCOMMANDS:
|
||||
if " " not in sub_text and base_cmd in SUBCOMMANDS and self._command_allowed(base_cmd):
|
||||
for sub in SUBCOMMANDS[base_cmd]:
|
||||
if sub.startswith(sub_lower) and sub != sub_lower:
|
||||
yield Completion(
|
||||
|
|
@ -956,6 +974,8 @@ class SlashCommandCompleter(Completer):
|
|||
word = text[1:]
|
||||
|
||||
for cmd, desc in COMMANDS.items():
|
||||
if not self._command_allowed(cmd):
|
||||
continue
|
||||
cmd_name = cmd[1:]
|
||||
if cmd_name.startswith(word):
|
||||
yield Completion(
|
||||
|
|
@ -1014,6 +1034,8 @@ class SlashCommandAutoSuggest(AutoSuggest):
|
|||
# Still typing the command name: /upd → suggest "ate"
|
||||
word = text[1:].lower()
|
||||
for cmd in COMMANDS:
|
||||
if self._completer is not None and not self._completer._command_allowed(cmd):
|
||||
continue
|
||||
cmd_name = cmd[1:] # strip leading /
|
||||
if cmd_name.startswith(word) and cmd_name != word:
|
||||
return Suggestion(cmd_name[len(word):])
|
||||
|
|
@ -1024,6 +1046,8 @@ class SlashCommandAutoSuggest(AutoSuggest):
|
|||
sub_lower = sub_text.lower()
|
||||
|
||||
# Static subcommands
|
||||
if self._completer is not None and not self._completer._command_allowed(base_cmd):
|
||||
return None
|
||||
if base_cmd in SUBCOMMANDS and SUBCOMMANDS[base_cmd]:
|
||||
if " " not in sub_text:
|
||||
for sub in SUBCOMMANDS[base_cmd]:
|
||||
|
|
|
|||
|
|
@ -39,6 +39,9 @@ _EXTRA_ENV_KEYS = frozenset({
|
|||
"DINGTALK_CLIENT_ID", "DINGTALK_CLIENT_SECRET",
|
||||
"FEISHU_APP_ID", "FEISHU_APP_SECRET", "FEISHU_ENCRYPT_KEY", "FEISHU_VERIFICATION_TOKEN",
|
||||
"WECOM_BOT_ID", "WECOM_SECRET",
|
||||
"WEIXIN_ACCOUNT_ID", "WEIXIN_TOKEN", "WEIXIN_BASE_URL", "WEIXIN_CDN_BASE_URL",
|
||||
"WEIXIN_HOME_CHANNEL", "WEIXIN_HOME_CHANNEL_NAME", "WEIXIN_DM_POLICY", "WEIXIN_GROUP_POLICY",
|
||||
"WEIXIN_ALLOWED_USERS", "WEIXIN_GROUP_ALLOWED_USERS", "WEIXIN_ALLOW_ALL_USERS",
|
||||
"BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_PASSWORD",
|
||||
"TERMINAL_ENV", "TERMINAL_SSH_KEY", "TERMINAL_SSH_PORT",
|
||||
"WHATSAPP_MODE", "WHATSAPP_ENABLED",
|
||||
|
|
@ -158,16 +161,27 @@ def get_project_root() -> Path:
|
|||
return Path(__file__).parent.parent.resolve()
|
||||
|
||||
def _secure_dir(path):
|
||||
"""Set directory to owner-only access (0700). No-op on Windows.
|
||||
"""Set directory to owner-only access (0700 by default). No-op on Windows.
|
||||
|
||||
Skipped in managed mode — the NixOS module sets group-readable
|
||||
permissions (0750) so interactive users in the hermes group can
|
||||
share state with the gateway service.
|
||||
|
||||
The mode can be overridden via the HERMES_HOME_MODE environment variable
|
||||
(e.g. HERMES_HOME_MODE=0701) for deployments where a web server (nginx,
|
||||
caddy, etc.) needs to traverse HERMES_HOME to reach a served subdirectory.
|
||||
The execute-only bit on a directory permits cd-through without exposing
|
||||
directory listings.
|
||||
"""
|
||||
if is_managed():
|
||||
return
|
||||
try:
|
||||
os.chmod(path, 0o700)
|
||||
mode_str = os.environ.get("HERMES_HOME_MODE", "").strip()
|
||||
mode = int(mode_str, 8) if mode_str else 0o700
|
||||
except ValueError:
|
||||
mode = 0o700
|
||||
try:
|
||||
os.chmod(path, mode)
|
||||
except (OSError, NotImplementedError):
|
||||
pass
|
||||
|
||||
|
|
@ -255,6 +269,12 @@ DEFAULT_CONFIG = {
|
|||
# tools or receiving API responses. Only fires when the agent has
|
||||
# been completely idle for this duration. 0 = unlimited.
|
||||
"gateway_timeout": 1800,
|
||||
# Graceful drain timeout for gateway stop/restart (seconds).
|
||||
# The gateway stops accepting new work, waits for running agents
|
||||
# to finish, then interrupts any remaining runs after the timeout.
|
||||
# 0 = no drain, interrupt immediately.
|
||||
"restart_drain_timeout": 60,
|
||||
"service_tier": "",
|
||||
# Tool-use enforcement: injects system prompt guidance that tells the
|
||||
# model to actually call tools instead of describing intended actions.
|
||||
# Values: "auto" (default — applies to gpt/codex models), true/false
|
||||
|
|
@ -438,7 +458,7 @@ DEFAULT_CONFIG = {
|
|||
|
||||
# Text-to-speech configuration
|
||||
"tts": {
|
||||
"provider": "edge", # "edge" (free) | "elevenlabs" (premium) | "openai" | "neutts" (local)
|
||||
"provider": "edge", # "edge" (free) | "elevenlabs" (premium) | "openai" | "minimax" | "mistral" | "neutts" (local)
|
||||
"edge": {
|
||||
"voice": "en-US-AriaNeural",
|
||||
# Popular: AriaNeural, JennyNeural, AndrewNeural, BrianNeural, SoniaNeural
|
||||
|
|
@ -452,6 +472,10 @@ DEFAULT_CONFIG = {
|
|||
"voice": "alloy",
|
||||
# Voices: alloy, echo, fable, onyx, nova, shimmer
|
||||
},
|
||||
"mistral": {
|
||||
"model": "voxtral-mini-tts-2603",
|
||||
"voice_id": "c69964a6-ab8b-4f8a-9465-ec0925096ec8", # Paul - Neutral
|
||||
},
|
||||
"neutts": {
|
||||
"ref_audio": "", # Path to reference voice audio (empty = bundled default)
|
||||
"ref_text": "", # Path to reference voice transcript (empty = bundled default)
|
||||
|
|
@ -489,6 +513,16 @@ DEFAULT_CONFIG = {
|
|||
"max_ms": 2500,
|
||||
},
|
||||
|
||||
# Context engine -- controls how the context window is managed when
|
||||
# approaching the model's token limit.
|
||||
# "compressor" = built-in lossy summarization (default).
|
||||
# Set to a plugin name to activate an alternative engine (e.g. "lcm"
|
||||
# for Lossless Context Management). The engine must be installed as
|
||||
# a plugin in plugins/context_engine/<name>/ or ~/.hermes/plugins/.
|
||||
"context": {
|
||||
"engine": "compressor",
|
||||
},
|
||||
|
||||
# Persistent memory -- bounded curated memory injected into system prompt
|
||||
"memory": {
|
||||
"memory_enabled": True,
|
||||
|
|
@ -513,6 +547,8 @@ DEFAULT_CONFIG = {
|
|||
"api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY)
|
||||
"max_iterations": 50, # per-subagent iteration cap (each subagent gets its own budget,
|
||||
# independent of the parent's max_iterations)
|
||||
"reasoning_effort": "", # reasoning effort for subagents: "xhigh", "high", "medium",
|
||||
# "low", "minimal", "none" (empty = inherit parent's level)
|
||||
},
|
||||
|
||||
# Ephemeral prefill messages file — JSON list of {role, content} dicts
|
||||
|
|
@ -540,6 +576,7 @@ DEFAULT_CONFIG = {
|
|||
"discord": {
|
||||
"require_mention": True, # Require @mention to respond in server channels
|
||||
"free_response_channels": "", # Comma-separated channel IDs where bot responds without mention
|
||||
"allowed_channels": "", # If set, bot ONLY responds in these channel IDs (whitelist)
|
||||
"auto_thread": True, # Auto-create threads on @mention in channels (like Slack)
|
||||
"reactions": True, # Add 👀/✅/❌ reactions to messages during processing
|
||||
},
|
||||
|
|
@ -599,7 +636,7 @@ DEFAULT_CONFIG = {
|
|||
},
|
||||
|
||||
# Config schema version - bump this when adding new required fields
|
||||
"_config_version": 13,
|
||||
"_config_version": 14,
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
|
|
@ -983,6 +1020,13 @@ OPTIONAL_ENV_VARS = {
|
|||
"password": True,
|
||||
"category": "tool",
|
||||
},
|
||||
"MISTRAL_API_KEY": {
|
||||
"description": "Mistral API key for Voxtral TTS and transcription (STT)",
|
||||
"prompt": "Mistral API key",
|
||||
"url": "https://console.mistral.ai/",
|
||||
"password": True,
|
||||
"category": "tool",
|
||||
},
|
||||
"GITHUB_TOKEN": {
|
||||
"description": "GitHub token for Skills Hub (higher API rate limits, skill publish)",
|
||||
"prompt": "GitHub Token",
|
||||
|
|
@ -1193,8 +1237,8 @@ OPTIONAL_ENV_VARS = {
|
|||
"advanced": True,
|
||||
},
|
||||
"API_SERVER_KEY": {
|
||||
"description": "Bearer token for API server authentication. If empty, all requests are allowed (local use only).",
|
||||
"prompt": "API server auth key (optional)",
|
||||
"description": "Bearer token for API server authentication. Required for non-loopback binding; server refuses to start without it. On loopback (127.0.0.1), all requests are allowed if empty.",
|
||||
"prompt": "API server auth key (required for network access)",
|
||||
"url": None,
|
||||
"password": True,
|
||||
"category": "messaging",
|
||||
|
|
@ -1209,7 +1253,7 @@ OPTIONAL_ENV_VARS = {
|
|||
"advanced": True,
|
||||
},
|
||||
"API_SERVER_HOST": {
|
||||
"description": "Host/bind address for the API server (default: 127.0.0.1). Use 0.0.0.0 for network access — requires API_SERVER_KEY for security.",
|
||||
"description": "Host/bind address for the API server (default: 127.0.0.1). Use 0.0.0.0 for network access — server refuses to start without API_SERVER_KEY.",
|
||||
"prompt": "API server host",
|
||||
"url": None,
|
||||
"password": False,
|
||||
|
|
@ -1434,7 +1478,7 @@ _KNOWN_ROOT_KEYS = {
|
|||
"_config_version", "model", "providers", "fallback_model",
|
||||
"fallback_providers", "credential_pool_strategies", "toolsets",
|
||||
"agent", "terminal", "display", "compression", "delegation",
|
||||
"auxiliary", "custom_providers", "memory", "gateway",
|
||||
"auxiliary", "custom_providers", "context", "memory", "gateway",
|
||||
}
|
||||
|
||||
# Valid fields inside a custom_providers list entry
|
||||
|
|
@ -1754,6 +1798,56 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
# ── Version 13 → 14: migrate legacy flat stt.model to provider section ──
|
||||
# Old configs (and cli-config.yaml.example) had a flat `stt.model` key
|
||||
# that was provider-agnostic. When the provider was "local" this caused
|
||||
# OpenAI model names (e.g. "whisper-1") to be fed to faster-whisper,
|
||||
# crashing with "Invalid model size". Move the value into the correct
|
||||
# provider-specific section and remove the flat key.
|
||||
if current_ver < 14:
|
||||
# Read raw config (no defaults merged) to check what the user actually
|
||||
# wrote, then apply changes to the merged config for saving.
|
||||
raw = read_raw_config()
|
||||
raw_stt = raw.get("stt", {})
|
||||
if isinstance(raw_stt, dict) and "model" in raw_stt:
|
||||
legacy_model = raw_stt["model"]
|
||||
provider = raw_stt.get("provider", "local")
|
||||
config = load_config()
|
||||
stt = config.get("stt", {})
|
||||
# Remove the legacy flat key
|
||||
stt.pop("model", None)
|
||||
# Place it in the appropriate provider section only if the
|
||||
# user didn't already set a model there
|
||||
if provider in ("local", "local_command"):
|
||||
# Don't migrate an OpenAI model name into the local section
|
||||
_local_models = {
|
||||
"tiny.en", "tiny", "base.en", "base", "small.en", "small",
|
||||
"medium.en", "medium", "large-v1", "large-v2", "large-v3",
|
||||
"large", "distil-large-v2", "distil-medium.en",
|
||||
"distil-small.en", "distil-large-v3", "distil-large-v3.5",
|
||||
"large-v3-turbo", "turbo",
|
||||
}
|
||||
if legacy_model in _local_models:
|
||||
# Check raw config — only set if user didn't already
|
||||
# have a nested local.model
|
||||
raw_local = raw_stt.get("local", {})
|
||||
if not isinstance(raw_local, dict) or "model" not in raw_local:
|
||||
local_cfg = stt.setdefault("local", {})
|
||||
local_cfg["model"] = legacy_model
|
||||
# else: drop it — it was an OpenAI model name, local section
|
||||
# already defaults to "base" via DEFAULT_CONFIG
|
||||
else:
|
||||
# Cloud provider — put it in that provider's section only
|
||||
# if user didn't already set a nested model
|
||||
raw_provider = raw_stt.get(provider, {})
|
||||
if not isinstance(raw_provider, dict) or "model" not in raw_provider:
|
||||
provider_cfg = stt.setdefault(provider, {})
|
||||
provider_cfg["model"] = legacy_model
|
||||
config["stt"] = stt
|
||||
save_config(config)
|
||||
if not quiet:
|
||||
print(f" ✓ Migrated legacy stt.model to provider-specific config")
|
||||
|
||||
if current_ver < latest_ver and not quiet:
|
||||
print(f"Config version: {current_ver} → {latest_ver}")
|
||||
|
||||
|
|
@ -2707,6 +2801,10 @@ def set_config_value(key: str, value: str):
|
|||
"terminal.timeout": "TERMINAL_TIMEOUT",
|
||||
"terminal.sandbox_dir": "TERMINAL_SANDBOX_DIR",
|
||||
"terminal.persistent_shell": "TERMINAL_PERSISTENT_SHELL",
|
||||
"terminal.container_cpu": "TERMINAL_CONTAINER_CPU",
|
||||
"terminal.container_memory": "TERMINAL_CONTAINER_MEMORY",
|
||||
"terminal.container_disk": "TERMINAL_CONTAINER_DISK",
|
||||
"terminal.container_persistent": "TERMINAL_CONTAINER_PERSISTENT",
|
||||
}
|
||||
if key in _config_to_env_sync:
|
||||
save_env_value(_config_to_env_sync[key], str(value))
|
||||
|
|
|
|||
|
|
@ -31,13 +31,6 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
# OAuth device code flow constants (same client ID as opencode/Copilot CLI)
|
||||
COPILOT_OAUTH_CLIENT_ID = "Ov23li8tweQw6odWQebz"
|
||||
COPILOT_DEVICE_CODE_URL = "https://github.com/login/device/code"
|
||||
COPILOT_ACCESS_TOKEN_URL = "https://github.com/login/oauth/access_token"
|
||||
|
||||
# Copilot API constants
|
||||
COPILOT_TOKEN_EXCHANGE_URL = "https://api.github.com/copilot_internal/v2/token"
|
||||
COPILOT_API_BASE_URL = "https://api.githubcopilot.com"
|
||||
|
||||
# Token type prefixes
|
||||
_CLASSIC_PAT_PREFIX = "ghp_"
|
||||
_SUPPORTED_PREFIXES = ("gho_", "github_pat_", "ghu_")
|
||||
|
|
@ -50,11 +43,6 @@ _DEVICE_CODE_POLL_INTERVAL = 5 # seconds
|
|||
_DEVICE_CODE_POLL_SAFETY_MARGIN = 3 # seconds
|
||||
|
||||
|
||||
def is_classic_pat(token: str) -> bool:
|
||||
"""Check if a token is a classic PAT (ghp_*), which Copilot doesn't support."""
|
||||
return token.strip().startswith(_CLASSIC_PAT_PREFIX)
|
||||
|
||||
|
||||
def validate_copilot_token(token: str) -> tuple[bool, str]:
|
||||
"""Validate that a token is usable with the Copilot API.
|
||||
|
||||
|
|
@ -285,6 +273,7 @@ def copilot_request_headers(
|
|||
headers: dict[str, str] = {
|
||||
"Editor-Version": "vscode/1.104.1",
|
||||
"User-Agent": "HermesAgent/1.0",
|
||||
"Copilot-Integration-Id": "vscode-chat",
|
||||
"Openai-Intent": "conversation-edits",
|
||||
"x-initiator": "agent" if is_agent_turn else "user",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,6 +10,28 @@ from typing import Callable, List, Optional, Set
|
|||
from hermes_cli.colors import Colors, color
|
||||
|
||||
|
||||
def flush_stdin() -> None:
|
||||
"""Flush any stray bytes from the stdin input buffer.
|
||||
|
||||
Must be called after ``curses.wrapper()`` (or any terminal-mode library
|
||||
like simple_term_menu) returns, **before** the next ``input()`` /
|
||||
``getpass.getpass()`` call. ``curses.endwin()`` restores the terminal
|
||||
but does NOT drain the OS input buffer — leftover escape-sequence bytes
|
||||
(from arrow keys, terminal mode-switch responses, or rapid keypresses)
|
||||
remain buffered and silently get consumed by the next ``input()`` call,
|
||||
corrupting user data (e.g. writing ``^[^[`` into .env files).
|
||||
|
||||
On non-TTY stdin (piped, redirected) or Windows, this is a no-op.
|
||||
"""
|
||||
try:
|
||||
if not sys.stdin.isatty():
|
||||
return
|
||||
import termios
|
||||
termios.tcflush(sys.stdin, termios.TCIFLUSH)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def curses_checklist(
|
||||
title: str,
|
||||
items: List[str],
|
||||
|
|
@ -131,12 +153,140 @@ def curses_checklist(
|
|||
return
|
||||
|
||||
curses.wrapper(_draw)
|
||||
flush_stdin()
|
||||
return result_holder[0] if result_holder[0] is not None else cancel_returns
|
||||
|
||||
except Exception:
|
||||
return _numbered_fallback(title, items, selected, cancel_returns, status_fn)
|
||||
|
||||
|
||||
def curses_radiolist(
|
||||
title: str,
|
||||
items: List[str],
|
||||
selected: int = 0,
|
||||
*,
|
||||
cancel_returns: int | None = None,
|
||||
) -> int:
|
||||
"""Curses single-select radio list. Returns the selected index.
|
||||
|
||||
Args:
|
||||
title: Header line displayed above the list.
|
||||
items: Display labels for each row.
|
||||
selected: Index that starts selected (pre-selected).
|
||||
cancel_returns: Returned on ESC/q. Defaults to the original *selected*.
|
||||
"""
|
||||
if cancel_returns is None:
|
||||
cancel_returns = selected
|
||||
|
||||
if not sys.stdin.isatty():
|
||||
return cancel_returns
|
||||
|
||||
try:
|
||||
import curses
|
||||
result_holder: list = [None]
|
||||
|
||||
def _draw(stdscr):
|
||||
curses.curs_set(0)
|
||||
if curses.has_colors():
|
||||
curses.start_color()
|
||||
curses.use_default_colors()
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1)
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1)
|
||||
cursor = selected
|
||||
scroll_offset = 0
|
||||
|
||||
while True:
|
||||
stdscr.clear()
|
||||
max_y, max_x = stdscr.getmaxyx()
|
||||
|
||||
# Header
|
||||
try:
|
||||
hattr = curses.A_BOLD
|
||||
if curses.has_colors():
|
||||
hattr |= curses.color_pair(2)
|
||||
stdscr.addnstr(0, 0, title, max_x - 1, hattr)
|
||||
stdscr.addnstr(
|
||||
1, 0,
|
||||
" \u2191\u2193 navigate ENTER/SPACE select ESC cancel",
|
||||
max_x - 1, curses.A_DIM,
|
||||
)
|
||||
except curses.error:
|
||||
pass
|
||||
|
||||
# Scrollable item list
|
||||
visible_rows = max_y - 4
|
||||
if cursor < scroll_offset:
|
||||
scroll_offset = cursor
|
||||
elif cursor >= scroll_offset + visible_rows:
|
||||
scroll_offset = cursor - visible_rows + 1
|
||||
|
||||
for draw_i, i in enumerate(
|
||||
range(scroll_offset, min(len(items), scroll_offset + visible_rows))
|
||||
):
|
||||
y = draw_i + 3
|
||||
if y >= max_y - 1:
|
||||
break
|
||||
radio = "\u25cf" if i == selected else "\u25cb"
|
||||
arrow = "\u2192" if i == cursor else " "
|
||||
line = f" {arrow} ({radio}) {items[i]}"
|
||||
attr = curses.A_NORMAL
|
||||
if i == cursor:
|
||||
attr = curses.A_BOLD
|
||||
if curses.has_colors():
|
||||
attr |= curses.color_pair(1)
|
||||
try:
|
||||
stdscr.addnstr(y, 0, line, max_x - 1, attr)
|
||||
except curses.error:
|
||||
pass
|
||||
|
||||
stdscr.refresh()
|
||||
key = stdscr.getch()
|
||||
|
||||
if key in (curses.KEY_UP, ord("k")):
|
||||
cursor = (cursor - 1) % len(items)
|
||||
elif key in (curses.KEY_DOWN, ord("j")):
|
||||
cursor = (cursor + 1) % len(items)
|
||||
elif key in (ord(" "), curses.KEY_ENTER, 10, 13):
|
||||
result_holder[0] = cursor
|
||||
return
|
||||
elif key in (27, ord("q")):
|
||||
result_holder[0] = cancel_returns
|
||||
return
|
||||
|
||||
curses.wrapper(_draw)
|
||||
flush_stdin()
|
||||
return result_holder[0] if result_holder[0] is not None else cancel_returns
|
||||
|
||||
except Exception:
|
||||
return _radio_numbered_fallback(title, items, selected, cancel_returns)
|
||||
|
||||
|
||||
def _radio_numbered_fallback(
|
||||
title: str,
|
||||
items: List[str],
|
||||
selected: int,
|
||||
cancel_returns: int,
|
||||
) -> int:
|
||||
"""Text-based numbered fallback for radio selection."""
|
||||
print(color(f"\n {title}", Colors.YELLOW))
|
||||
print(color(" Select by number, Enter to confirm.\n", Colors.DIM))
|
||||
|
||||
for i, label in enumerate(items):
|
||||
marker = color("(\u25cf)", Colors.GREEN) if i == selected else "(\u25cb)"
|
||||
print(f" {marker} {i + 1:>2}. {label}")
|
||||
print()
|
||||
try:
|
||||
val = input(color(f" Choice [default {selected + 1}]: ", Colors.DIM)).strip()
|
||||
if not val:
|
||||
return selected
|
||||
idx = int(val) - 1
|
||||
if 0 <= idx < len(items):
|
||||
return idx
|
||||
return selected
|
||||
except (ValueError, KeyboardInterrupt, EOFError):
|
||||
return cancel_returns
|
||||
|
||||
|
||||
def _numbered_fallback(
|
||||
title: str,
|
||||
items: List[str],
|
||||
|
|
|
|||
|
|
@ -722,9 +722,9 @@ def run_doctor(args):
|
|||
("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True),
|
||||
("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
|
||||
("Alibaba/DashScope", ("DASHSCOPE_API_KEY",), "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
|
||||
# MiniMax APIs don't support /models endpoint — https://github.com/NousResearch/hermes-agent/issues/811
|
||||
("MiniMax", ("MINIMAX_API_KEY",), None, "MINIMAX_BASE_URL", False),
|
||||
("MiniMax (China)", ("MINIMAX_CN_API_KEY",), None, "MINIMAX_CN_BASE_URL", False),
|
||||
# MiniMax: the /anthropic endpoint doesn't support /models, but the /v1 endpoint does.
|
||||
("MiniMax", ("MINIMAX_API_KEY",), "https://api.minimax.io/v1/models", "MINIMAX_BASE_URL", True),
|
||||
("MiniMax (China)", ("MINIMAX_CN_API_KEY",), "https://api.minimaxi.com/v1/models", "MINIMAX_CN_BASE_URL", True),
|
||||
("AI Gateway", ("AI_GATEWAY_API_KEY",), "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True),
|
||||
("Kilo Code", ("KILOCODE_API_KEY",), "https://api.kilo.ai/api/gateway/models", "KILOCODE_BASE_URL", True),
|
||||
("OpenCode Zen", ("OPENCODE_ZEN_API_KEY",), "https://opencode.ai/zen/v1/models", "OPENCODE_ZEN_BASE_URL", True),
|
||||
|
|
@ -749,10 +749,15 @@ def run_doctor(args):
|
|||
# Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com
|
||||
if not _base and _key.startswith("sk-kimi-"):
|
||||
_base = "https://api.kimi.com/coding/v1"
|
||||
# Anthropic-compat endpoints (/anthropic) don't support /models.
|
||||
# Rewrite to the OpenAI-compat /v1 surface for health checks.
|
||||
if _base and _base.rstrip("/").endswith("/anthropic"):
|
||||
from agent.auxiliary_client import _to_openai_base_url
|
||||
_base = _to_openai_base_url(_base)
|
||||
_url = (_base.rstrip("/") + "/models") if _base else _default_url
|
||||
_headers = {"Authorization": f"Bearer {_key}"}
|
||||
if "api.kimi.com" in _url.lower():
|
||||
_headers["User-Agent"] = "KimiCLI/1.0"
|
||||
_headers["User-Agent"] = "KimiCLI/1.30.0"
|
||||
_resp = httpx.get(
|
||||
_url,
|
||||
headers=_headers,
|
||||
|
|
|
|||
|
|
@ -32,11 +32,6 @@ def _get_git_commit(project_root: Path) -> str:
|
|||
return "(unknown)"
|
||||
|
||||
|
||||
def _key_present(name: str) -> str:
|
||||
"""Return 'set' or 'not set' for an env var."""
|
||||
return "set" if os.getenv(name) else "not set"
|
||||
|
||||
|
||||
def _redact(value: str) -> str:
|
||||
"""Redact all but first 4 and last 4 chars."""
|
||||
if not value:
|
||||
|
|
@ -124,6 +119,7 @@ def _configured_platforms() -> list[str]:
|
|||
"dingtalk": "DINGTALK_CLIENT_ID",
|
||||
"feishu": "FEISHU_APP_ID",
|
||||
"wecom": "WECOM_BOT_ID",
|
||||
"weixin": "WEIXIN_ACCOUNT_ID",
|
||||
}
|
||||
return [name for name, env in checks.items() if os.getenv(env)]
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,20 @@ from pathlib import Path
|
|||
|
||||
PROJECT_ROOT = Path(__file__).parent.parent.resolve()
|
||||
|
||||
from hermes_cli.config import get_env_value, get_hermes_home, save_env_value, is_managed, managed_error
|
||||
from gateway.status import terminate_pid
|
||||
from gateway.restart import (
|
||||
DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT,
|
||||
GATEWAY_SERVICE_RESTART_EXIT_CODE,
|
||||
parse_restart_drain_timeout,
|
||||
)
|
||||
from hermes_cli.config import (
|
||||
get_env_value,
|
||||
get_hermes_home,
|
||||
is_managed,
|
||||
managed_error,
|
||||
read_raw_config,
|
||||
save_env_value,
|
||||
)
|
||||
# display_hermes_home is imported lazily at call sites to avoid ImportError
|
||||
# when hermes_constants is cached from a pre-update version during `hermes update`.
|
||||
from hermes_cli.setup import (
|
||||
|
|
@ -91,6 +104,59 @@ def _get_service_pids() -> set:
|
|||
return pids
|
||||
|
||||
|
||||
def _get_parent_pid(pid: int) -> int | None:
|
||||
"""Return the parent PID for ``pid``, or ``None`` when unavailable."""
|
||||
if pid <= 1:
|
||||
return None
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["ps", "-o", "ppid=", "-p", str(pid)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired):
|
||||
return None
|
||||
if result.returncode != 0:
|
||||
return None
|
||||
raw = result.stdout.strip()
|
||||
if not raw:
|
||||
return None
|
||||
try:
|
||||
parent_pid = int(raw.splitlines()[-1].strip())
|
||||
except ValueError:
|
||||
return None
|
||||
return parent_pid if parent_pid > 0 else None
|
||||
|
||||
|
||||
def _is_pid_ancestor_of_current_process(target_pid: int) -> bool:
|
||||
"""Return True when ``target_pid`` is this process or one of its ancestors."""
|
||||
if target_pid <= 0:
|
||||
return False
|
||||
|
||||
pid = os.getpid()
|
||||
seen: set[int] = set()
|
||||
while pid and pid not in seen:
|
||||
if pid == target_pid:
|
||||
return True
|
||||
seen.add(pid)
|
||||
pid = _get_parent_pid(pid) or 0
|
||||
return False
|
||||
|
||||
|
||||
def _request_gateway_self_restart(pid: int) -> bool:
|
||||
"""Ask a running gateway ancestor to restart itself asynchronously."""
|
||||
if not hasattr(signal, "SIGUSR1"):
|
||||
return False
|
||||
if not _is_pid_ancestor_of_current_process(pid):
|
||||
return False
|
||||
try:
|
||||
os.kill(pid, signal.SIGUSR1)
|
||||
except (ProcessLookupError, PermissionError, OSError):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def find_gateway_pids(exclude_pids: set | None = None) -> list:
|
||||
"""Find PIDs of running gateway processes.
|
||||
|
||||
|
|
@ -162,7 +228,7 @@ def kill_gateway_processes(force: bool = False, exclude_pids: set | None = None)
|
|||
"""Kill any running gateway processes. Returns count killed.
|
||||
|
||||
Args:
|
||||
force: Use SIGKILL instead of SIGTERM.
|
||||
force: Use the platform's force-kill mechanism instead of graceful terminate.
|
||||
exclude_pids: PIDs to skip (e.g. service-managed PIDs that were just
|
||||
restarted and should not be killed).
|
||||
"""
|
||||
|
|
@ -171,10 +237,7 @@ def kill_gateway_processes(force: bool = False, exclude_pids: set | None = None)
|
|||
|
||||
for pid in pids:
|
||||
try:
|
||||
if force and not is_windows():
|
||||
os.kill(pid, signal.SIGKILL)
|
||||
else:
|
||||
os.kill(pid, signal.SIGTERM)
|
||||
terminate_pid(pid, force=force)
|
||||
killed += 1
|
||||
except ProcessLookupError:
|
||||
# Process already gone
|
||||
|
|
@ -182,6 +245,8 @@ def kill_gateway_processes(force: bool = False, exclude_pids: set | None = None)
|
|||
except PermissionError:
|
||||
print(f"⚠ Permission denied to kill PID {pid}")
|
||||
|
||||
except OSError as exc:
|
||||
print(f"Failed to kill PID {pid}: {exc}")
|
||||
return killed
|
||||
|
||||
|
||||
|
|
@ -226,11 +291,33 @@ def is_linux() -> bool:
|
|||
return sys.platform.startswith('linux')
|
||||
|
||||
|
||||
from hermes_constants import is_termux
|
||||
from hermes_constants import is_termux, is_wsl
|
||||
|
||||
|
||||
def _wsl_systemd_operational() -> bool:
|
||||
"""Check if systemd is actually running as PID 1 on WSL.
|
||||
|
||||
WSL2 with ``systemd=true`` in wsl.conf has working systemd.
|
||||
WSL2 without it (or WSL1) does not — systemctl commands fail.
|
||||
"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["systemctl", "is-system-running"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
# "running", "degraded", "starting" all mean systemd is PID 1
|
||||
status = result.stdout.strip().lower()
|
||||
return status in ("running", "degraded", "starting", "initializing")
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
|
||||
return False
|
||||
|
||||
|
||||
def supports_systemd_services() -> bool:
|
||||
return is_linux() and not is_termux()
|
||||
if not is_linux() or is_termux():
|
||||
return False
|
||||
if is_wsl():
|
||||
return _wsl_systemd_operational()
|
||||
return True
|
||||
|
||||
|
||||
def is_macos() -> bool:
|
||||
|
|
@ -251,18 +338,18 @@ SERVICE_DESCRIPTION = "Hermes Agent Gateway - Messaging Platform Integration"
|
|||
def _profile_suffix() -> str:
|
||||
"""Derive a service-name suffix from the current HERMES_HOME.
|
||||
|
||||
Returns ``""`` for the default ``~/.hermes``, the profile name for
|
||||
``~/.hermes/profiles/<name>``, or a short hash for any other custom
|
||||
HERMES_HOME path.
|
||||
Returns ``""`` for the default root, the profile name for
|
||||
``<root>/profiles/<name>``, or a short hash for any other path.
|
||||
Works correctly in Docker (HERMES_HOME=/opt/data) and standard deployments.
|
||||
"""
|
||||
import hashlib
|
||||
import re
|
||||
from pathlib import Path as _Path
|
||||
from hermes_constants import get_default_hermes_root
|
||||
home = get_hermes_home().resolve()
|
||||
default = (_Path.home() / ".hermes").resolve()
|
||||
default = get_default_hermes_root().resolve()
|
||||
if home == default:
|
||||
return ""
|
||||
# Detect ~/.hermes/profiles/<name> pattern → use the profile name
|
||||
# Detect <root>/profiles/<name> pattern → use the profile name
|
||||
profiles_root = (default / "profiles").resolve()
|
||||
try:
|
||||
rel = home.relative_to(profiles_root)
|
||||
|
|
@ -287,9 +374,9 @@ def _profile_arg(hermes_home: str | None = None) -> str:
|
|||
service definition for a different user (e.g. system service).
|
||||
"""
|
||||
import re
|
||||
from pathlib import Path as _Path
|
||||
from hermes_constants import get_default_hermes_root
|
||||
home = Path(hermes_home or str(get_hermes_home())).resolve()
|
||||
default = (_Path.home() / ".hermes").resolve()
|
||||
default = get_default_hermes_root().resolve()
|
||||
if home == default:
|
||||
return ""
|
||||
profiles_root = (default / "profiles").resolve()
|
||||
|
|
@ -316,8 +403,6 @@ def get_service_name() -> str:
|
|||
return f"{_SERVICE_BASE}-{suffix}"
|
||||
|
||||
|
||||
SERVICE_NAME = _SERVICE_BASE # backward-compat for external importers; prefer get_service_name()
|
||||
|
||||
|
||||
def get_systemd_unit_path(system: bool = False) -> Path:
|
||||
name = get_service_name()
|
||||
|
|
@ -591,17 +676,6 @@ def get_python_path() -> str:
|
|||
return str(venv_python)
|
||||
return sys.executable
|
||||
|
||||
def get_hermes_cli_path() -> str:
|
||||
"""Get the path to the hermes CLI."""
|
||||
# Check if installed via pip
|
||||
import shutil
|
||||
hermes_bin = shutil.which("hermes")
|
||||
if hermes_bin:
|
||||
return hermes_bin
|
||||
|
||||
# Fallback to direct module execution
|
||||
return f"{get_python_path()} -m hermes_cli.main"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Systemd (Linux)
|
||||
|
|
@ -618,6 +692,24 @@ def _build_user_local_paths(home: Path, path_entries: list[str]) -> list[str]:
|
|||
return [p for p in candidates if p not in path_entries and Path(p).exists()]
|
||||
|
||||
|
||||
def _remap_path_for_user(path: str, target_home_dir: str) -> str:
|
||||
"""Remap *path* from the current user's home to *target_home_dir*.
|
||||
|
||||
If *path* lives under ``Path.home()`` the corresponding prefix is swapped
|
||||
to *target_home_dir*; otherwise the path is returned unchanged.
|
||||
|
||||
/root/.hermes/hermes-agent -> /home/alice/.hermes/hermes-agent
|
||||
/opt/hermes -> /opt/hermes (kept as-is)
|
||||
"""
|
||||
current_home = Path.home().resolve()
|
||||
resolved = Path(path).resolve()
|
||||
try:
|
||||
relative = resolved.relative_to(current_home)
|
||||
return str(Path(target_home_dir) / relative)
|
||||
except ValueError:
|
||||
return str(resolved)
|
||||
|
||||
|
||||
def _hermes_home_for_target_user(target_home_dir: str) -> str:
|
||||
"""Remap the current HERMES_HOME to the equivalent under a target user's home.
|
||||
|
||||
|
|
@ -660,11 +752,21 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None)
|
|||
path_entries.append(resolved_node_dir)
|
||||
|
||||
common_bin_paths = ["/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"]
|
||||
restart_timeout = max(60, int(_get_restart_drain_timeout() or 0))
|
||||
|
||||
if system:
|
||||
username, group_name, home_dir = _system_service_identity(run_as_user)
|
||||
hermes_home = _hermes_home_for_target_user(home_dir)
|
||||
profile_arg = _profile_arg(hermes_home)
|
||||
# Remap all paths that may resolve under the calling user's home
|
||||
# (e.g. /root/) to the target user's home so the service can
|
||||
# actually access them.
|
||||
python_path = _remap_path_for_user(python_path, home_dir)
|
||||
working_dir = _remap_path_for_user(working_dir, home_dir)
|
||||
venv_dir = _remap_path_for_user(venv_dir, home_dir)
|
||||
venv_bin = _remap_path_for_user(venv_bin, home_dir)
|
||||
node_bin = _remap_path_for_user(node_bin, home_dir)
|
||||
path_entries = [_remap_path_for_user(p, home_dir) for p in path_entries]
|
||||
path_entries.extend(_build_user_local_paths(Path(home_dir), path_entries))
|
||||
path_entries.extend(common_bin_paths)
|
||||
sane_path = ":".join(path_entries)
|
||||
|
|
@ -689,9 +791,11 @@ Environment="VIRTUAL_ENV={venv_dir}"
|
|||
Environment="HERMES_HOME={hermes_home}"
|
||||
Restart=on-failure
|
||||
RestartSec=30
|
||||
RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}
|
||||
KillMode=mixed
|
||||
KillSignal=SIGTERM
|
||||
TimeoutStopSec=60
|
||||
ExecReload=/bin/kill -USR1 $MAINPID
|
||||
TimeoutStopSec={restart_timeout}
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
|
|
@ -719,9 +823,11 @@ Environment="VIRTUAL_ENV={venv_dir}"
|
|||
Environment="HERMES_HOME={hermes_home}"
|
||||
Restart=on-failure
|
||||
RestartSec=30
|
||||
RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}
|
||||
KillMode=mixed
|
||||
KillSignal=SIGTERM
|
||||
TimeoutStopSec=60
|
||||
ExecReload=/bin/kill -USR1 $MAINPID
|
||||
TimeoutStopSec={restart_timeout}
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
|
|
@ -824,6 +930,20 @@ def _select_systemd_scope(system: bool = False) -> bool:
|
|||
return get_systemd_unit_path(system=True).exists() and not get_systemd_unit_path(system=False).exists()
|
||||
|
||||
|
||||
def _get_restart_drain_timeout() -> float:
|
||||
"""Return the configured gateway restart drain timeout in seconds."""
|
||||
raw = os.getenv("HERMES_RESTART_DRAIN_TIMEOUT", "").strip()
|
||||
if not raw:
|
||||
cfg = read_raw_config()
|
||||
agent_cfg = cfg.get("agent", {}) if isinstance(cfg, dict) else {}
|
||||
raw = str(
|
||||
agent_cfg.get(
|
||||
"restart_drain_timeout", DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT
|
||||
)
|
||||
)
|
||||
return parse_restart_drain_timeout(raw)
|
||||
|
||||
|
||||
def systemd_install(force: bool = False, system: bool = False, run_as_user: str | None = None):
|
||||
if system:
|
||||
_require_root_for_system_service("install")
|
||||
|
|
@ -909,7 +1029,13 @@ def systemd_restart(system: bool = False):
|
|||
if system:
|
||||
_require_root_for_system_service("restart")
|
||||
refresh_systemd_unit_if_needed(system=system)
|
||||
subprocess.run(_systemctl_cmd(system) + ["restart", get_service_name()], check=True, timeout=90)
|
||||
from gateway.status import get_running_pid
|
||||
|
||||
pid = get_running_pid()
|
||||
if pid is not None and _request_gateway_self_restart(pid):
|
||||
print(f"✓ {_service_scope_label(system).capitalize()} service restart requested")
|
||||
return
|
||||
subprocess.run(_systemctl_cmd(system) + ["reload-or-restart", get_service_name()], check=True, timeout=90)
|
||||
print(f"✓ {_service_scope_label(system).capitalize()} service restarted")
|
||||
|
||||
|
||||
|
|
@ -1182,10 +1308,22 @@ def launchd_start():
|
|||
|
||||
def launchd_stop():
|
||||
label = get_launchd_label()
|
||||
subprocess.run(["launchctl", "kill", "SIGTERM", f"{_launchd_domain()}/{label}"], check=True, timeout=30)
|
||||
target = f"{_launchd_domain()}/{label}"
|
||||
# bootout unloads the service definition so KeepAlive doesn't respawn
|
||||
# the process. A plain `kill SIGTERM` only signals the process — launchd
|
||||
# immediately restarts it because KeepAlive.SuccessfulExit = false.
|
||||
# `hermes gateway start` re-bootstraps when it detects the job is unloaded.
|
||||
try:
|
||||
subprocess.run(["launchctl", "bootout", target], check=True, timeout=90)
|
||||
except subprocess.CalledProcessError as e:
|
||||
if e.returncode in (3, 113):
|
||||
pass # Already unloaded — nothing to stop.
|
||||
else:
|
||||
raise
|
||||
_wait_for_gateway_exit(timeout=10.0, force_after=5.0)
|
||||
print("✓ Service stopped")
|
||||
|
||||
def _wait_for_gateway_exit(timeout: float = 10.0, force_after: float = 5.0):
|
||||
def _wait_for_gateway_exit(timeout: float = 10.0, force_after: float | None = 5.0) -> bool:
|
||||
"""Wait for the gateway process (by saved PID) to exit.
|
||||
|
||||
Uses the PID from the gateway.pid file — not launchd labels — so this
|
||||
|
|
@ -1194,44 +1332,59 @@ def _wait_for_gateway_exit(timeout: float = 10.0, force_after: float = 5.0):
|
|||
|
||||
Args:
|
||||
timeout: Total seconds to wait before giving up.
|
||||
force_after: Seconds of graceful waiting before sending SIGKILL.
|
||||
force_after: Seconds of graceful waiting before escalating to force-kill.
|
||||
"""
|
||||
import time
|
||||
from gateway.status import get_running_pid
|
||||
|
||||
deadline = time.monotonic() + timeout
|
||||
force_deadline = time.monotonic() + force_after
|
||||
force_deadline = (time.monotonic() + force_after) if force_after is not None else None
|
||||
force_sent = False
|
||||
|
||||
while time.monotonic() < deadline:
|
||||
pid = get_running_pid()
|
||||
if pid is None:
|
||||
return # Process exited cleanly.
|
||||
return True # Process exited cleanly.
|
||||
|
||||
if not force_sent and time.monotonic() >= force_deadline:
|
||||
if force_after is not None and not force_sent and time.monotonic() >= force_deadline:
|
||||
# Grace period expired — force-kill the specific PID.
|
||||
try:
|
||||
os.kill(pid, signal.SIGKILL)
|
||||
terminate_pid(pid, force=True)
|
||||
print(f"⚠ Gateway PID {pid} did not exit gracefully; sent SIGKILL")
|
||||
except (ProcessLookupError, PermissionError):
|
||||
return # Already gone or we can't touch it.
|
||||
except (ProcessLookupError, PermissionError, OSError):
|
||||
return True # Already gone or we can't touch it.
|
||||
force_sent = True
|
||||
|
||||
time.sleep(0.3)
|
||||
|
||||
# Timed out even after SIGKILL.
|
||||
# Timed out even after force-kill.
|
||||
remaining_pid = get_running_pid()
|
||||
if remaining_pid is not None:
|
||||
print(f"⚠ Gateway PID {remaining_pid} still running after {timeout}s — restart may fail")
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def launchd_restart():
|
||||
label = get_launchd_label()
|
||||
target = f"{_launchd_domain()}/{label}"
|
||||
# Use kickstart -k so launchd performs an atomic kill+restart.
|
||||
# A two-step stop/start from inside the gateway's own process tree
|
||||
# would kill the shell before the start command is reached.
|
||||
drain_timeout = _get_restart_drain_timeout()
|
||||
from gateway.status import get_running_pid
|
||||
|
||||
try:
|
||||
pid = get_running_pid()
|
||||
if pid is not None and _request_gateway_self_restart(pid):
|
||||
print("✓ Service restart requested")
|
||||
return
|
||||
if pid is not None:
|
||||
try:
|
||||
terminate_pid(pid, force=False)
|
||||
except (ProcessLookupError, PermissionError, OSError):
|
||||
pid = None
|
||||
if pid is not None:
|
||||
exited = _wait_for_gateway_exit(timeout=drain_timeout, force_after=None)
|
||||
if not exited:
|
||||
print(f"⚠ Gateway drain timed out after {drain_timeout:.0f}s — forcing launchd restart")
|
||||
subprocess.run(["launchctl", "kickstart", "-k", target], check=True, timeout=90)
|
||||
print("✓ Service restarted")
|
||||
except subprocess.CalledProcessError as e:
|
||||
|
|
@ -1416,7 +1569,7 @@ _PLATFORMS = [
|
|||
" Or via API: curl -X POST https://your-server/_matrix/client/v3/login \\",
|
||||
" -d '{\"type\":\"m.login.password\",\"user\":\"@bot:server\",\"password\":\"...\"}'",
|
||||
"4. Alternatively, provide user ID + password and Hermes will log in directly",
|
||||
"5. For E2EE: set MATRIX_ENCRYPTION=true (requires pip install 'matrix-nio[e2e]')",
|
||||
"5. For E2EE: set MATRIX_ENCRYPTION=true (requires pip install 'mautrix[encryption]')",
|
||||
"6. To find your user ID: it's @username:your-server (shown in Element profile)",
|
||||
],
|
||||
"vars": [
|
||||
|
|
@ -1598,6 +1751,12 @@ _PLATFORMS = [
|
|||
"help": "Chat ID for scheduled results and notifications."},
|
||||
],
|
||||
},
|
||||
{
|
||||
"key": "weixin",
|
||||
"label": "Weixin / WeChat",
|
||||
"emoji": "💬",
|
||||
"token_var": "WEIXIN_ACCOUNT_ID",
|
||||
},
|
||||
{
|
||||
"key": "bluebubbles",
|
||||
"label": "BlueBubbles (iMessage)",
|
||||
|
|
@ -1670,6 +1829,13 @@ def _platform_status(platform: dict) -> str:
|
|||
if val or password or homeserver:
|
||||
return "partially configured"
|
||||
return "not configured"
|
||||
if platform.get("key") == "weixin":
|
||||
token = get_env_value("WEIXIN_TOKEN")
|
||||
if val and token:
|
||||
return "configured"
|
||||
if val or token:
|
||||
return "partially configured"
|
||||
return "not configured"
|
||||
if val:
|
||||
return "configured"
|
||||
return "not configured"
|
||||
|
|
@ -1689,6 +1855,8 @@ def _runtime_health_lines() -> list[str]:
|
|||
lines: list[str] = []
|
||||
gateway_state = state.get("gateway_state")
|
||||
exit_reason = state.get("exit_reason")
|
||||
active_agents = state.get("active_agents")
|
||||
restart_requested = state.get("restart_requested")
|
||||
platforms = state.get("platforms", {}) or {}
|
||||
|
||||
for platform, pdata in platforms.items():
|
||||
|
|
@ -1698,6 +1866,10 @@ def _runtime_health_lines() -> list[str]:
|
|||
|
||||
if gateway_state == "startup_failed" and exit_reason:
|
||||
lines.append(f"⚠ Last startup issue: {exit_reason}")
|
||||
elif gateway_state == "draining":
|
||||
action = "restart" if restart_requested else "shutdown"
|
||||
count = int(active_agents or 0)
|
||||
lines.append(f"⏳ Gateway draining for {action} ({count} active agent(s))")
|
||||
elif gateway_state == "stopped" and exit_reason:
|
||||
lines.append(f"⚠ Last shutdown reason: {exit_reason}")
|
||||
|
||||
|
|
@ -1773,7 +1945,7 @@ def _setup_standard_platform(platform: dict):
|
|||
print_warning(" Open access enabled — anyone can use your bot!")
|
||||
elif access_idx == 1:
|
||||
print_success(" DM pairing mode — users will receive a code to request access.")
|
||||
print_info(" Approve with: hermes pairing approve {platform} {code}")
|
||||
print_info(" Approve with: hermes pairing approve <platform> <code>")
|
||||
else:
|
||||
print_info(" Skipped — configure later with 'hermes gateway setup'")
|
||||
continue
|
||||
|
|
@ -1860,6 +2032,133 @@ def _is_service_running() -> bool:
|
|||
return len(find_gateway_pids()) > 0
|
||||
|
||||
|
||||
def _setup_weixin():
|
||||
"""Interactive setup for Weixin / WeChat personal accounts."""
|
||||
print()
|
||||
print(color(" ─── 💬 Weixin / WeChat Setup ───", Colors.CYAN))
|
||||
print()
|
||||
print_info(" 1. Hermes will open Tencent iLink QR login in this terminal.")
|
||||
print_info(" 2. Use WeChat to scan and confirm the QR code.")
|
||||
print_info(" 3. Hermes will store the returned account_id/token in ~/.hermes/.env.")
|
||||
print_info(" 4. This adapter supports native text, image, video, and document delivery.")
|
||||
|
||||
existing_account = get_env_value("WEIXIN_ACCOUNT_ID")
|
||||
existing_token = get_env_value("WEIXIN_TOKEN")
|
||||
if existing_account and existing_token:
|
||||
print()
|
||||
print_success("Weixin is already configured.")
|
||||
if not prompt_yes_no(" Reconfigure Weixin?", False):
|
||||
return
|
||||
|
||||
try:
|
||||
from gateway.platforms.weixin import check_weixin_requirements, qr_login
|
||||
except Exception as exc:
|
||||
print_error(f" Weixin adapter import failed: {exc}")
|
||||
print_info(" Install gateway dependencies first, then retry.")
|
||||
return
|
||||
|
||||
if not check_weixin_requirements():
|
||||
print_error(" Missing dependencies: Weixin needs aiohttp and cryptography.")
|
||||
print_info(" Install them, then rerun `hermes gateway setup`.")
|
||||
return
|
||||
|
||||
print()
|
||||
if not prompt_yes_no(" Start QR login now?", True):
|
||||
print_info(" Cancelled.")
|
||||
return
|
||||
|
||||
import asyncio
|
||||
try:
|
||||
credentials = asyncio.run(qr_login(str(get_hermes_home())))
|
||||
except KeyboardInterrupt:
|
||||
print()
|
||||
print_warning(" Weixin setup cancelled.")
|
||||
return
|
||||
except Exception as exc:
|
||||
print_error(f" QR login failed: {exc}")
|
||||
return
|
||||
|
||||
if not credentials:
|
||||
print_warning(" QR login did not complete.")
|
||||
return
|
||||
|
||||
account_id = credentials.get("account_id", "")
|
||||
token = credentials.get("token", "")
|
||||
base_url = credentials.get("base_url", "")
|
||||
user_id = credentials.get("user_id", "")
|
||||
|
||||
save_env_value("WEIXIN_ACCOUNT_ID", account_id)
|
||||
save_env_value("WEIXIN_TOKEN", token)
|
||||
if base_url:
|
||||
save_env_value("WEIXIN_BASE_URL", base_url)
|
||||
save_env_value("WEIXIN_CDN_BASE_URL", get_env_value("WEIXIN_CDN_BASE_URL") or "https://novac2c.cdn.weixin.qq.com/c2c")
|
||||
|
||||
print()
|
||||
access_choices = [
|
||||
"Use DM pairing approval (recommended)",
|
||||
"Allow all direct messages",
|
||||
"Only allow listed user IDs",
|
||||
"Disable direct messages",
|
||||
]
|
||||
access_idx = prompt_choice(" How should direct messages be authorized?", access_choices, 0)
|
||||
if access_idx == 0:
|
||||
save_env_value("WEIXIN_DM_POLICY", "pairing")
|
||||
save_env_value("WEIXIN_ALLOW_ALL_USERS", "false")
|
||||
save_env_value("WEIXIN_ALLOWED_USERS", "")
|
||||
print_success(" DM pairing enabled.")
|
||||
print_info(" Unknown DM users can request access and you approve them with `hermes pairing approve`.")
|
||||
elif access_idx == 1:
|
||||
save_env_value("WEIXIN_DM_POLICY", "open")
|
||||
save_env_value("WEIXIN_ALLOW_ALL_USERS", "true")
|
||||
save_env_value("WEIXIN_ALLOWED_USERS", "")
|
||||
print_warning(" Open DM access enabled for Weixin.")
|
||||
elif access_idx == 2:
|
||||
default_allow = user_id or ""
|
||||
allowlist = prompt(" Allowed Weixin user IDs (comma-separated)", default_allow, password=False).replace(" ", "")
|
||||
save_env_value("WEIXIN_DM_POLICY", "allowlist")
|
||||
save_env_value("WEIXIN_ALLOW_ALL_USERS", "false")
|
||||
save_env_value("WEIXIN_ALLOWED_USERS", allowlist)
|
||||
print_success(" Weixin allowlist saved.")
|
||||
else:
|
||||
save_env_value("WEIXIN_DM_POLICY", "disabled")
|
||||
save_env_value("WEIXIN_ALLOW_ALL_USERS", "false")
|
||||
save_env_value("WEIXIN_ALLOWED_USERS", "")
|
||||
print_warning(" Direct messages disabled.")
|
||||
|
||||
print()
|
||||
group_choices = [
|
||||
"Disable group chats (recommended)",
|
||||
"Allow all group chats",
|
||||
"Only allow listed group chat IDs",
|
||||
]
|
||||
group_idx = prompt_choice(" How should group chats be handled?", group_choices, 0)
|
||||
if group_idx == 0:
|
||||
save_env_value("WEIXIN_GROUP_POLICY", "disabled")
|
||||
save_env_value("WEIXIN_GROUP_ALLOWED_USERS", "")
|
||||
print_info(" Group chats disabled.")
|
||||
elif group_idx == 1:
|
||||
save_env_value("WEIXIN_GROUP_POLICY", "open")
|
||||
save_env_value("WEIXIN_GROUP_ALLOWED_USERS", "")
|
||||
print_warning(" All group chats enabled.")
|
||||
else:
|
||||
allow_groups = prompt(" Allowed group chat IDs (comma-separated)", "", password=False).replace(" ", "")
|
||||
save_env_value("WEIXIN_GROUP_POLICY", "allowlist")
|
||||
save_env_value("WEIXIN_GROUP_ALLOWED_USERS", allow_groups)
|
||||
print_success(" Group allowlist saved.")
|
||||
|
||||
if user_id:
|
||||
print()
|
||||
if prompt_yes_no(f" Use your Weixin user ID ({user_id}) as the home channel?", True):
|
||||
save_env_value("WEIXIN_HOME_CHANNEL", user_id)
|
||||
print_success(f" Home channel set to {user_id}")
|
||||
|
||||
print()
|
||||
print_success("Weixin configured!")
|
||||
print_info(f" Account ID: {account_id}")
|
||||
if user_id:
|
||||
print_info(f" User ID: {user_id}")
|
||||
|
||||
|
||||
def _setup_signal():
|
||||
"""Interactive setup for Signal messenger."""
|
||||
import shutil
|
||||
|
|
@ -2035,6 +2334,8 @@ def gateway_setup():
|
|||
_setup_whatsapp()
|
||||
elif platform["key"] == "signal":
|
||||
_setup_signal()
|
||||
elif platform["key"] == "weixin":
|
||||
_setup_weixin()
|
||||
else:
|
||||
_setup_standard_platform(platform)
|
||||
|
||||
|
|
@ -2076,7 +2377,8 @@ def gateway_setup():
|
|||
print()
|
||||
if supports_systemd_services() or is_macos():
|
||||
platform_name = "systemd" if supports_systemd_services() else "launchd"
|
||||
if prompt_yes_no(f" Install the gateway as a {platform_name} service? (runs in background, starts on boot)", True):
|
||||
wsl_note = " (note: services may not survive WSL restarts)" if is_wsl() else ""
|
||||
if prompt_yes_no(f" Install the gateway as a {platform_name} service?{wsl_note} (runs in background, starts on boot)", True):
|
||||
try:
|
||||
installed_scope = None
|
||||
did_install = False
|
||||
|
|
@ -2101,16 +2403,21 @@ def gateway_setup():
|
|||
print_info(" You can install later: hermes gateway install")
|
||||
if supports_systemd_services():
|
||||
print_info(" Or as a boot-time service: sudo hermes gateway install --system")
|
||||
print_info(" Or run in foreground: hermes gateway")
|
||||
print_info(" Or run in foreground: hermes gateway run")
|
||||
elif is_wsl():
|
||||
print_info(" WSL detected but systemd is not running.")
|
||||
print_info(" Run in foreground: hermes gateway run")
|
||||
print_info(" For persistence: tmux new -s hermes 'hermes gateway run'")
|
||||
print_info(" To enable systemd: add systemd=true to /etc/wsl.conf, then 'wsl --shutdown'")
|
||||
else:
|
||||
if is_termux():
|
||||
from hermes_constants import display_hermes_home as _dhh
|
||||
print_info(" Termux does not use systemd/launchd services.")
|
||||
print_info(" Run in foreground: hermes gateway")
|
||||
print_info(f" Or start it manually in the background (best effort): nohup hermes gateway >{_dhh()}/logs/gateway.log 2>&1 &")
|
||||
print_info(" Run in foreground: hermes gateway run")
|
||||
print_info(f" Or start it manually in the background (best effort): nohup hermes gateway run >{_dhh()}/logs/gateway.log 2>&1 &")
|
||||
else:
|
||||
print_info(" Service install not supported on this platform.")
|
||||
print_info(" Run in foreground: hermes gateway")
|
||||
print_info(" Run in foreground: hermes gateway run")
|
||||
else:
|
||||
print()
|
||||
print_info("No platforms configured. Run 'hermes gateway setup' when ready.")
|
||||
|
|
@ -2151,9 +2458,23 @@ def gateway_command(args):
|
|||
print("Run manually: hermes gateway")
|
||||
sys.exit(1)
|
||||
if supports_systemd_services():
|
||||
if is_wsl():
|
||||
print_warning("WSL detected — systemd services may not survive WSL restarts.")
|
||||
print_info(" Consider running in foreground instead: hermes gateway run")
|
||||
print_info(" Or use tmux/screen for persistence: tmux new -s hermes 'hermes gateway run'")
|
||||
print()
|
||||
systemd_install(force=force, system=system, run_as_user=run_as_user)
|
||||
elif is_macos():
|
||||
launchd_install(force)
|
||||
elif is_wsl():
|
||||
print("WSL detected but systemd is not running.")
|
||||
print("Either enable systemd (add systemd=true to /etc/wsl.conf and restart WSL)")
|
||||
print("or run the gateway in foreground mode:")
|
||||
print()
|
||||
print(" hermes gateway run # direct foreground")
|
||||
print(" tmux new -s hermes 'hermes gateway run' # persistent via tmux")
|
||||
print(" nohup hermes gateway run > ~/.hermes/logs/gateway.log 2>&1 & # background")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("Service installation not supported on this platform.")
|
||||
print("Run manually: hermes gateway run")
|
||||
|
|
@ -2186,6 +2507,16 @@ def gateway_command(args):
|
|||
systemd_start(system=system)
|
||||
elif is_macos():
|
||||
launchd_start()
|
||||
elif is_wsl():
|
||||
print("WSL detected but systemd is not available.")
|
||||
print("Run the gateway in foreground mode instead:")
|
||||
print()
|
||||
print(" hermes gateway run # direct foreground")
|
||||
print(" tmux new -s hermes 'hermes gateway run' # persistent via tmux")
|
||||
print(" nohup hermes gateway run > ~/.hermes/logs/gateway.log 2>&1 & # background")
|
||||
print()
|
||||
print("To enable systemd: add systemd=true to /etc/wsl.conf and run 'wsl --shutdown' from PowerShell.")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("Not supported on this platform.")
|
||||
sys.exit(1)
|
||||
|
|
@ -2320,6 +2651,10 @@ def gateway_command(args):
|
|||
if is_termux():
|
||||
print("Termux note:")
|
||||
print(" Android may stop background jobs when Termux is suspended")
|
||||
elif is_wsl():
|
||||
print("WSL note:")
|
||||
print(" The gateway is running in foreground/manual mode (recommended for WSL).")
|
||||
print(" Use tmux or screen for persistence across terminal closes.")
|
||||
else:
|
||||
print("To install as a service:")
|
||||
print(" hermes gateway install")
|
||||
|
|
@ -2334,9 +2669,12 @@ def gateway_command(args):
|
|||
print(f" {line}")
|
||||
print()
|
||||
print("To start:")
|
||||
print(" hermes gateway # Run in foreground")
|
||||
print(" hermes gateway run # Run in foreground")
|
||||
if is_termux():
|
||||
print(" nohup hermes gateway > ~/.hermes/logs/gateway.log 2>&1 & # Best-effort background start")
|
||||
print(" nohup hermes gateway run > ~/.hermes/logs/gateway.log 2>&1 & # Best-effort background start")
|
||||
elif is_wsl():
|
||||
print(" tmux new -s hermes 'hermes gateway run' # persistent via tmux")
|
||||
print(" nohup hermes gateway run > ~/.hermes/logs/gateway.log 2>&1 & # background")
|
||||
else:
|
||||
print(" hermes gateway install # Install as user service")
|
||||
print(" sudo hermes gateway install --system # Install as boot-time system service")
|
||||
|
|
|
|||
|
|
@ -98,10 +98,11 @@ def _apply_profile_override() -> None:
|
|||
consume = 1
|
||||
break
|
||||
|
||||
# 2. If no flag, check ~/.hermes/active_profile
|
||||
# 2. If no flag, check active_profile in the hermes root
|
||||
if profile_name is None:
|
||||
try:
|
||||
active_path = Path.home() / ".hermes" / "active_profile"
|
||||
from hermes_constants import get_default_hermes_root
|
||||
active_path = get_default_hermes_root() / "active_profile"
|
||||
if active_path.exists():
|
||||
name = active_path.read_text().strip()
|
||||
if name and name != "default":
|
||||
|
|
@ -993,7 +994,6 @@ def cmd_whatsapp(args):
|
|||
|
||||
def cmd_setup(args):
|
||||
"""Interactive setup wizard."""
|
||||
_require_tty("setup")
|
||||
from hermes_cli.setup import run_setup_wizard
|
||||
run_setup_wizard(args)
|
||||
|
||||
|
|
@ -1103,10 +1103,11 @@ def select_provider_and_model(args=None):
|
|||
("alibaba", "Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
|
||||
]
|
||||
|
||||
# Add user-defined custom providers from config.yaml
|
||||
custom_providers_cfg = config.get("custom_providers") or []
|
||||
_custom_provider_map = {} # key → {name, base_url, api_key}
|
||||
if isinstance(custom_providers_cfg, list):
|
||||
def _named_custom_provider_map(cfg) -> dict[str, dict[str, str]]:
|
||||
custom_providers_cfg = cfg.get("custom_providers") or []
|
||||
custom_provider_map = {}
|
||||
if not isinstance(custom_providers_cfg, list):
|
||||
return custom_provider_map
|
||||
for entry in custom_providers_cfg:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
|
|
@ -1115,16 +1116,23 @@ def select_provider_and_model(args=None):
|
|||
if not name or not base_url:
|
||||
continue
|
||||
key = "custom:" + name.lower().replace(" ", "-")
|
||||
short_url = base_url.replace("https://", "").replace("http://", "").rstrip("/")
|
||||
saved_model = entry.get("model", "")
|
||||
model_hint = f" — {saved_model}" if saved_model else ""
|
||||
top_providers.append((key, f"{name} ({short_url}){model_hint}"))
|
||||
_custom_provider_map[key] = {
|
||||
custom_provider_map[key] = {
|
||||
"name": name,
|
||||
"base_url": base_url,
|
||||
"api_key": entry.get("api_key", ""),
|
||||
"model": saved_model,
|
||||
"model": entry.get("model", ""),
|
||||
}
|
||||
return custom_provider_map
|
||||
|
||||
# Add user-defined custom providers from config.yaml
|
||||
_custom_provider_map = _named_custom_provider_map(config) # key → {name, base_url, api_key}
|
||||
for key, provider_info in _custom_provider_map.items():
|
||||
name = provider_info["name"]
|
||||
base_url = provider_info["base_url"]
|
||||
short_url = base_url.replace("https://", "").replace("http://", "").rstrip("/")
|
||||
saved_model = provider_info.get("model", "")
|
||||
model_hint = f" — {saved_model}" if saved_model else ""
|
||||
top_providers.append((key, f"{name} ({short_url}){model_hint}"))
|
||||
|
||||
top_keys = {k for k, _ in top_providers}
|
||||
extended_keys = {k for k, _ in extended_providers}
|
||||
|
|
@ -1189,8 +1197,15 @@ def select_provider_and_model(args=None):
|
|||
_model_flow_copilot(config, current_model)
|
||||
elif selected_provider == "custom":
|
||||
_model_flow_custom(config)
|
||||
elif selected_provider.startswith("custom:") and selected_provider in _custom_provider_map:
|
||||
_model_flow_named_custom(config, _custom_provider_map[selected_provider])
|
||||
elif selected_provider.startswith("custom:"):
|
||||
provider_info = _named_custom_provider_map(load_config()).get(selected_provider)
|
||||
if provider_info is None:
|
||||
print(
|
||||
"Warning: the selected saved custom provider is no longer available. "
|
||||
"It may have been removed from config.yaml. No change."
|
||||
)
|
||||
return
|
||||
_model_flow_named_custom(config, provider_info)
|
||||
elif selected_provider == "remove-custom":
|
||||
_remove_custom_provider(config)
|
||||
elif selected_provider == "anthropic":
|
||||
|
|
@ -1200,6 +1215,42 @@ def select_provider_and_model(args=None):
|
|||
elif selected_provider in ("gemini", "zai", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface"):
|
||||
_model_flow_api_key_provider(config, selected_provider, current_model)
|
||||
|
||||
# ── Post-switch cleanup: clear stale OPENAI_BASE_URL ──────────────
|
||||
# When the user switches to a named provider (anything except "custom"),
|
||||
# a leftover OPENAI_BASE_URL in ~/.hermes/.env can poison auxiliary
|
||||
# clients that use provider:auto. Clear it proactively. (#5161)
|
||||
if selected_provider not in ("custom", "cancel", "remove-custom") \
|
||||
and not selected_provider.startswith("custom:"):
|
||||
_clear_stale_openai_base_url()
|
||||
|
||||
|
||||
def _clear_stale_openai_base_url():
|
||||
"""Remove OPENAI_BASE_URL from ~/.hermes/.env if the active provider is not 'custom'.
|
||||
|
||||
After a provider switch, a leftover OPENAI_BASE_URL causes auxiliary
|
||||
clients (compression, vision, delegation) with provider:auto to route
|
||||
requests to the old custom endpoint instead of the newly selected
|
||||
provider. See issue #5161.
|
||||
"""
|
||||
from hermes_cli.config import get_env_value, save_env_value, load_config
|
||||
|
||||
cfg = load_config()
|
||||
model_cfg = cfg.get("model", {})
|
||||
if isinstance(model_cfg, dict):
|
||||
provider = (model_cfg.get("provider") or "").strip().lower()
|
||||
else:
|
||||
provider = ""
|
||||
|
||||
if provider == "custom" or not provider:
|
||||
return # custom provider legitimately uses OPENAI_BASE_URL
|
||||
|
||||
stale_url = get_env_value("OPENAI_BASE_URL")
|
||||
if stale_url:
|
||||
save_env_value("OPENAI_BASE_URL", "")
|
||||
print(f"Cleared stale OPENAI_BASE_URL from .env (was: {stale_url[:40]}...)"
|
||||
if len(stale_url) > 40
|
||||
else f"Cleared stale OPENAI_BASE_URL from .env (was: {stale_url})")
|
||||
|
||||
|
||||
def _prompt_provider_choice(choices, *, default=0):
|
||||
"""Show provider selection menu with curses arrow-key navigation.
|
||||
|
|
@ -1263,10 +1314,10 @@ def _model_flow_openrouter(config, current_model=""):
|
|||
print()
|
||||
|
||||
from hermes_cli.models import model_ids, get_pricing_for_provider
|
||||
openrouter_models = model_ids()
|
||||
openrouter_models = model_ids(force_refresh=True)
|
||||
|
||||
# Fetch live pricing (non-blocking — returns empty dict on failure)
|
||||
pricing = get_pricing_for_provider("openrouter")
|
||||
pricing = get_pricing_for_provider("openrouter", force_refresh=True)
|
||||
|
||||
selected = _prompt_model_selection(openrouter_models, current_model=current_model, pricing=pricing)
|
||||
if selected:
|
||||
|
|
@ -1793,8 +1844,10 @@ def _remove_custom_provider(config):
|
|||
title="Select provider to remove:",
|
||||
)
|
||||
idx = menu.show()
|
||||
from hermes_cli.curses_ui import flush_stdin
|
||||
flush_stdin()
|
||||
print()
|
||||
except (ImportError, NotImplementedError):
|
||||
except (ImportError, NotImplementedError, OSError, subprocess.SubprocessError):
|
||||
for i, c in enumerate(choices, 1):
|
||||
print(f" {i}. {c}")
|
||||
print()
|
||||
|
|
@ -1818,8 +1871,9 @@ def _remove_custom_provider(config):
|
|||
def _model_flow_named_custom(config, provider_info):
|
||||
"""Handle a named custom provider from config.yaml custom_providers list.
|
||||
|
||||
If the entry has a saved model name, activates it immediately.
|
||||
Otherwise probes the endpoint's /models API to let the user pick one.
|
||||
Always probes the endpoint's /models API to let the user pick a model.
|
||||
If a model was previously saved, it is pre-selected in the menu.
|
||||
Falls back to the saved model if probing fails.
|
||||
"""
|
||||
from hermes_cli.auth import _save_model_choice, deactivate_provider
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
|
@ -1830,54 +1884,46 @@ def _model_flow_named_custom(config, provider_info):
|
|||
api_key = provider_info.get("api_key", "")
|
||||
saved_model = provider_info.get("model", "")
|
||||
|
||||
# If a model is saved, just activate immediately — no probing needed
|
||||
if saved_model:
|
||||
_save_model_choice(saved_model)
|
||||
|
||||
cfg = load_config()
|
||||
model = cfg.get("model")
|
||||
if not isinstance(model, dict):
|
||||
model = {"default": model} if model else {}
|
||||
cfg["model"] = model
|
||||
model["provider"] = "custom"
|
||||
model["base_url"] = base_url
|
||||
if api_key:
|
||||
model["api_key"] = api_key
|
||||
save_config(cfg)
|
||||
deactivate_provider()
|
||||
|
||||
print(f"✅ Switched to: {saved_model}")
|
||||
print(f" Provider: {name} ({base_url})")
|
||||
return
|
||||
|
||||
# No saved model — probe endpoint and let user pick
|
||||
print(f" Provider: {name}")
|
||||
print(f" URL: {base_url}")
|
||||
if saved_model:
|
||||
print(f" Current: {saved_model}")
|
||||
print()
|
||||
print("No model saved for this provider. Fetching available models...")
|
||||
|
||||
print("Fetching available models...")
|
||||
models = fetch_api_models(api_key, base_url, timeout=8.0)
|
||||
|
||||
if models:
|
||||
default_idx = 0
|
||||
if saved_model and saved_model in models:
|
||||
default_idx = models.index(saved_model)
|
||||
|
||||
print(f"Found {len(models)} model(s):\n")
|
||||
try:
|
||||
from simple_term_menu import TerminalMenu
|
||||
menu_items = [f" {m}" for m in models] + [" Cancel"]
|
||||
menu_items = [
|
||||
f" {m} (current)" if m == saved_model else f" {m}"
|
||||
for m in models
|
||||
] + [" Cancel"]
|
||||
menu = TerminalMenu(
|
||||
menu_items, cursor_index=0,
|
||||
menu_items, cursor_index=default_idx,
|
||||
menu_cursor="-> ", menu_cursor_style=("fg_green", "bold"),
|
||||
menu_highlight_style=("fg_green",),
|
||||
cycle_cursor=True, clear_screen=False,
|
||||
title=f"Select model from {name}:",
|
||||
)
|
||||
idx = menu.show()
|
||||
from hermes_cli.curses_ui import flush_stdin
|
||||
flush_stdin()
|
||||
print()
|
||||
if idx is None or idx >= len(models):
|
||||
print("Cancelled.")
|
||||
return
|
||||
model_name = models[idx]
|
||||
except (ImportError, NotImplementedError):
|
||||
except (ImportError, NotImplementedError, OSError, subprocess.SubprocessError):
|
||||
for i, m in enumerate(models, 1):
|
||||
print(f" {i}. {m}")
|
||||
suffix = " (current)" if m == saved_model else ""
|
||||
print(f" {i}. {m}{suffix}")
|
||||
print(f" {len(models) + 1}. Cancel")
|
||||
print()
|
||||
try:
|
||||
|
|
@ -1893,6 +1939,13 @@ def _model_flow_named_custom(config, provider_info):
|
|||
except (ValueError, KeyboardInterrupt, EOFError):
|
||||
print("\nCancelled.")
|
||||
return
|
||||
elif saved_model:
|
||||
print("Could not fetch models from endpoint.")
|
||||
try:
|
||||
model_name = input(f"Model name [{saved_model}]: ").strip() or saved_model
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print("\nCancelled.")
|
||||
return
|
||||
else:
|
||||
print("Could not fetch models from endpoint. Enter model name manually.")
|
||||
try:
|
||||
|
|
@ -1988,6 +2041,8 @@ def _prompt_reasoning_effort_selection(efforts, current_effort=""):
|
|||
title="Select reasoning effort:",
|
||||
)
|
||||
idx = menu.show()
|
||||
from hermes_cli.curses_ui import flush_stdin
|
||||
flush_stdin()
|
||||
if idx is None:
|
||||
return None
|
||||
print()
|
||||
|
|
@ -1996,7 +2051,7 @@ def _prompt_reasoning_effort_selection(efforts, current_effort=""):
|
|||
if idx == len(ordered):
|
||||
return "none"
|
||||
return None
|
||||
except (ImportError, NotImplementedError):
|
||||
except (ImportError, NotImplementedError, OSError, subprocess.SubprocessError):
|
||||
pass
|
||||
|
||||
print("Select reasoning effort:")
|
||||
|
|
@ -3157,33 +3212,19 @@ def _restore_stashed_changes(
|
|||
print("\nYour stashed changes are preserved — nothing is lost.")
|
||||
print(f" Stash ref: {stash_ref}")
|
||||
|
||||
# Ask before resetting (if interactive)
|
||||
do_reset = True
|
||||
if prompt_user:
|
||||
print("\nReset working tree to clean state so Hermes can run?")
|
||||
print(" (You can re-apply your changes later with: git stash apply)")
|
||||
print("[Y/n] ", end="", flush=True)
|
||||
response = input().strip().lower()
|
||||
if response not in ("", "y", "yes"):
|
||||
do_reset = False
|
||||
|
||||
if do_reset:
|
||||
subprocess.run(
|
||||
git_cmd + ["reset", "--hard", "HEAD"],
|
||||
cwd=cwd,
|
||||
capture_output=True,
|
||||
)
|
||||
print("Working tree reset to clean state.")
|
||||
else:
|
||||
print("Working tree left as-is (may have conflict markers).")
|
||||
print("Resolve conflicts manually, then run: git stash drop")
|
||||
|
||||
print(f"Restore your changes with: git stash apply {stash_ref}")
|
||||
# In non-interactive mode (gateway /update), don't abort — the code
|
||||
# update itself succeeded, only the stash restore had conflicts.
|
||||
# Aborting would report the entire update as failed.
|
||||
if prompt_user:
|
||||
sys.exit(1)
|
||||
# Always reset to clean state — leaving conflict markers in source
|
||||
# files makes hermes completely unrunnable (SyntaxError on import).
|
||||
# The user's changes are safe in the stash for manual recovery.
|
||||
subprocess.run(
|
||||
git_cmd + ["reset", "--hard", "HEAD"],
|
||||
cwd=cwd,
|
||||
capture_output=True,
|
||||
)
|
||||
print("Working tree reset to clean state.")
|
||||
print(f"Restore your changes later with: git stash apply {stash_ref}")
|
||||
# Don't sys.exit — the code update itself succeeded, only the stash
|
||||
# restore had conflicts. Let cmd_update continue with pip install,
|
||||
# skill sync, and gateway restart.
|
||||
return False
|
||||
|
||||
stash_selector = _resolve_stash_selector(git_cmd, cwd, stash_ref)
|
||||
|
|
@ -3444,10 +3485,11 @@ def _invalidate_update_cache():
|
|||
``hermes update``, every profile is now current.
|
||||
"""
|
||||
homes = []
|
||||
# Default profile home
|
||||
default_home = Path.home() / ".hermes"
|
||||
# Default profile home (Docker-aware — uses /opt/data in Docker)
|
||||
from hermes_constants import get_default_hermes_root
|
||||
default_home = get_default_hermes_root()
|
||||
homes.append(default_home)
|
||||
# Named profiles under ~/.hermes/profiles/
|
||||
# Named profiles under <root>/profiles/
|
||||
profiles_root = default_home / "profiles"
|
||||
if profiles_root.is_dir():
|
||||
for entry in profiles_root.iterdir():
|
||||
|
|
@ -4184,7 +4226,10 @@ def cmd_profile(args):
|
|||
print(f" {name} chat Start chatting")
|
||||
print(f" {name} gateway start Start the messaging gateway")
|
||||
if clone or clone_all:
|
||||
profile_dir_display = f"~/.hermes/profiles/{name}"
|
||||
try:
|
||||
profile_dir_display = "~/" + str(profile_dir.relative_to(Path.home()))
|
||||
except ValueError:
|
||||
profile_dir_display = str(profile_dir)
|
||||
print(f"\n Edit {profile_dir_display}/.env for different API keys")
|
||||
print(f" Edit {profile_dir_display}/SOUL.md for different personality")
|
||||
print()
|
||||
|
|
@ -4585,7 +4630,7 @@ For more help on a command:
|
|||
gateway_subparsers = gateway_parser.add_subparsers(dest="gateway_command")
|
||||
|
||||
# gateway run (default)
|
||||
gateway_run = gateway_subparsers.add_parser("run", help="Run gateway in foreground")
|
||||
gateway_run = gateway_subparsers.add_parser("run", help="Run gateway in foreground (recommended for WSL, Docker, Termux)")
|
||||
gateway_run.add_argument("-v", "--verbose", action="count", default=0,
|
||||
help="Increase stderr log verbosity (-v=INFO, -vv=DEBUG)")
|
||||
gateway_run.add_argument("-q", "--quiet", action="store_true",
|
||||
|
|
@ -4594,7 +4639,7 @@ For more help on a command:
|
|||
help="Replace any existing gateway instance (useful for systemd)")
|
||||
|
||||
# gateway start
|
||||
gateway_start = gateway_subparsers.add_parser("start", help="Start gateway service")
|
||||
gateway_start = gateway_subparsers.add_parser("start", help="Start the installed systemd/launchd background service")
|
||||
gateway_start.add_argument("--system", action="store_true", help="Target the Linux system-level gateway service")
|
||||
|
||||
# gateway stop
|
||||
|
|
@ -4612,7 +4657,7 @@ For more help on a command:
|
|||
gateway_status.add_argument("--system", action="store_true", help="Target the Linux system-level gateway service")
|
||||
|
||||
# gateway install
|
||||
gateway_install = gateway_subparsers.add_parser("install", help="Install gateway as service")
|
||||
gateway_install = gateway_subparsers.add_parser("install", help="Install gateway as a systemd/launchd background service")
|
||||
gateway_install.add_argument("--force", action="store_true", help="Force reinstall")
|
||||
gateway_install.add_argument("--system", action="store_true", help="Install as a Linux system-level service (starts at boot)")
|
||||
gateway_install.add_argument("--run-as-user", dest="run_as_user", help="User account the Linux system service should run as")
|
||||
|
|
@ -4633,12 +4678,12 @@ For more help on a command:
|
|||
"setup",
|
||||
help="Interactive setup wizard",
|
||||
description="Configure Hermes Agent with an interactive wizard. "
|
||||
"Run a specific section: hermes setup model|terminal|gateway|tools|agent"
|
||||
"Run a specific section: hermes setup model|tts|terminal|gateway|tools|agent"
|
||||
)
|
||||
setup_parser.add_argument(
|
||||
"section",
|
||||
nargs="?",
|
||||
choices=["model", "terminal", "gateway", "tools", "agent"],
|
||||
choices=["model", "tts", "terminal", "gateway", "tools", "agent"],
|
||||
default=None,
|
||||
help="Run a specific setup section instead of the full wizard"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -76,17 +76,22 @@ _STRIP_VENDOR_ONLY_PROVIDERS: frozenset[str] = frozenset({
|
|||
"copilot-acp",
|
||||
})
|
||||
|
||||
# Providers whose own naming is authoritative -- pass through unchanged.
|
||||
_PASSTHROUGH_PROVIDERS: frozenset[str] = frozenset({
|
||||
# Providers whose native naming is authoritative -- pass through unchanged.
|
||||
_AUTHORITATIVE_NATIVE_PROVIDERS: frozenset[str] = frozenset({
|
||||
"gemini",
|
||||
"huggingface",
|
||||
"openai-codex",
|
||||
})
|
||||
|
||||
# Direct providers that accept bare native names but should repair a matching
|
||||
# provider/ prefix when users copy the aggregator form into config.yaml.
|
||||
_MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({
|
||||
"zai",
|
||||
"kimi-coding",
|
||||
"minimax",
|
||||
"minimax-cn",
|
||||
"alibaba",
|
||||
"qwen-oauth",
|
||||
"huggingface",
|
||||
"openai-codex",
|
||||
"custom",
|
||||
})
|
||||
|
||||
|
|
@ -168,6 +173,40 @@ def _dots_to_hyphens(model_name: str) -> str:
|
|||
return model_name.replace(".", "-")
|
||||
|
||||
|
||||
def _normalize_provider_alias(provider_name: str) -> str:
|
||||
"""Resolve provider aliases to Hermes' canonical ids."""
|
||||
raw = (provider_name or "").strip().lower()
|
||||
if not raw:
|
||||
return raw
|
||||
try:
|
||||
from hermes_cli.models import normalize_provider
|
||||
|
||||
return normalize_provider(raw)
|
||||
except Exception:
|
||||
return raw
|
||||
|
||||
|
||||
def _strip_matching_provider_prefix(model_name: str, target_provider: str) -> str:
|
||||
"""Strip ``provider/`` only when the prefix matches the target provider.
|
||||
|
||||
This prevents arbitrary slash-bearing model IDs from being mangled on
|
||||
native providers while still repairing manual config values like
|
||||
``zai/glm-5.1`` for the ``zai`` provider.
|
||||
"""
|
||||
if "/" not in model_name:
|
||||
return model_name
|
||||
|
||||
prefix, remainder = model_name.split("/", 1)
|
||||
if not prefix.strip() or not remainder.strip():
|
||||
return model_name
|
||||
|
||||
normalized_prefix = _normalize_provider_alias(prefix)
|
||||
normalized_target = _normalize_provider_alias(target_provider)
|
||||
if normalized_prefix and normalized_prefix == normalized_target:
|
||||
return remainder.strip()
|
||||
return model_name
|
||||
|
||||
|
||||
def detect_vendor(model_name: str) -> Optional[str]:
|
||||
"""Detect the vendor slug from a bare model name.
|
||||
|
||||
|
|
@ -305,24 +344,37 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
|
|||
if not name:
|
||||
return name
|
||||
|
||||
provider = (target_provider or "").strip().lower()
|
||||
provider = _normalize_provider_alias(target_provider)
|
||||
|
||||
# --- Aggregators: need vendor/model format ---
|
||||
if provider in _AGGREGATOR_PROVIDERS:
|
||||
return _prepend_vendor(name)
|
||||
|
||||
# --- Anthropic / OpenCode: strip vendor, dots -> hyphens ---
|
||||
# --- Anthropic / OpenCode: strip matching provider prefix, dots -> hyphens ---
|
||||
if provider in _DOT_TO_HYPHEN_PROVIDERS:
|
||||
bare = _strip_vendor_prefix(name)
|
||||
bare = _strip_matching_provider_prefix(name, provider)
|
||||
if "/" in bare:
|
||||
return bare
|
||||
return _dots_to_hyphens(bare)
|
||||
|
||||
# --- Copilot: strip vendor, keep dots ---
|
||||
# --- Copilot: strip matching provider prefix, keep dots ---
|
||||
if provider in _STRIP_VENDOR_ONLY_PROVIDERS:
|
||||
return _strip_vendor_prefix(name)
|
||||
return _strip_matching_provider_prefix(name, provider)
|
||||
|
||||
# --- DeepSeek: map to one of two canonical names ---
|
||||
if provider == "deepseek":
|
||||
return _normalize_for_deepseek(name)
|
||||
bare = _strip_matching_provider_prefix(name, provider)
|
||||
if "/" in bare:
|
||||
return bare
|
||||
return _normalize_for_deepseek(bare)
|
||||
|
||||
# --- Direct providers: repair matching provider prefixes only ---
|
||||
if provider in _MATCHING_PREFIX_STRIP_PROVIDERS:
|
||||
return _strip_matching_provider_prefix(name, provider)
|
||||
|
||||
# --- Authoritative native providers: preserve user-facing slugs as-is ---
|
||||
if provider in _AUTHORITATIVE_NATIVE_PROVIDERS:
|
||||
return name
|
||||
|
||||
# --- Custom & all others: pass through as-is ---
|
||||
return name
|
||||
|
|
@ -332,31 +384,3 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
|
|||
# Batch / convenience helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def model_display_name(model_id: str) -> str:
|
||||
"""Return a short, human-readable display name for a model id.
|
||||
|
||||
Strips the vendor prefix (if any) for a cleaner display in menus
|
||||
and status bars, while preserving dots for readability.
|
||||
|
||||
Examples::
|
||||
|
||||
>>> model_display_name("anthropic/claude-sonnet-4.6")
|
||||
'claude-sonnet-4.6'
|
||||
>>> model_display_name("claude-sonnet-4-6")
|
||||
'claude-sonnet-4-6'
|
||||
"""
|
||||
return _strip_vendor_prefix((model_id or "").strip())
|
||||
|
||||
|
||||
def is_aggregator_provider(provider: str) -> bool:
|
||||
"""Check if a provider is an aggregator that needs vendor/model format."""
|
||||
return (provider or "").strip().lower() in _AGGREGATOR_PROVIDERS
|
||||
|
||||
|
||||
def vendor_for_model(model_name: str) -> str:
|
||||
"""Return the vendor slug for a model, or ``""`` if unknown.
|
||||
|
||||
Convenience wrapper around :func:`detect_vendor` that never returns
|
||||
``None``.
|
||||
"""
|
||||
return detect_vendor(model_name) or ""
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ from dataclasses import dataclass
|
|||
from typing import List, NamedTuple, Optional
|
||||
|
||||
from hermes_cli.providers import (
|
||||
custom_provider_slug,
|
||||
determine_api_mode,
|
||||
get_label,
|
||||
is_aggregator,
|
||||
|
|
@ -336,6 +337,7 @@ def resolve_alias(
|
|||
def get_authenticated_provider_slugs(
|
||||
current_provider: str = "",
|
||||
user_providers: dict = None,
|
||||
custom_providers: list | None = None,
|
||||
) -> list[str]:
|
||||
"""Return slugs of providers that have credentials.
|
||||
|
||||
|
|
@ -346,6 +348,7 @@ def get_authenticated_provider_slugs(
|
|||
providers = list_authenticated_providers(
|
||||
current_provider=current_provider,
|
||||
user_providers=user_providers,
|
||||
custom_providers=custom_providers,
|
||||
max_models=0,
|
||||
)
|
||||
return [p["slug"] for p in providers]
|
||||
|
|
@ -383,6 +386,7 @@ def switch_model(
|
|||
is_global: bool = False,
|
||||
explicit_provider: str = "",
|
||||
user_providers: dict = None,
|
||||
custom_providers: list | None = None,
|
||||
) -> ModelSwitchResult:
|
||||
"""Core model-switching pipeline shared between CLI and gateway.
|
||||
|
||||
|
|
@ -416,6 +420,7 @@ def switch_model(
|
|||
is_global: Whether to persist the switch.
|
||||
explicit_provider: From --provider flag (empty = no explicit provider).
|
||||
user_providers: The ``providers:`` dict from config.yaml (for user endpoints).
|
||||
custom_providers: The ``custom_providers:`` list from config.yaml.
|
||||
|
||||
Returns:
|
||||
ModelSwitchResult with all information the caller needs.
|
||||
|
|
@ -436,7 +441,11 @@ def switch_model(
|
|||
# =================================================================
|
||||
if explicit_provider:
|
||||
# Resolve the provider
|
||||
pdef = resolve_provider_full(explicit_provider, user_providers)
|
||||
pdef = resolve_provider_full(
|
||||
explicit_provider,
|
||||
user_providers,
|
||||
custom_providers,
|
||||
)
|
||||
if pdef is None:
|
||||
_switch_err = (
|
||||
f"Unknown provider '{explicit_provider}'. "
|
||||
|
|
@ -516,6 +525,7 @@ def switch_model(
|
|||
authed = get_authenticated_provider_slugs(
|
||||
current_provider=current_provider,
|
||||
user_providers=user_providers,
|
||||
custom_providers=custom_providers,
|
||||
)
|
||||
fallback_result = _resolve_alias_fallback(raw_input, authed)
|
||||
if fallback_result is not None:
|
||||
|
|
@ -590,6 +600,14 @@ def switch_model(
|
|||
|
||||
provider_changed = target_provider != current_provider
|
||||
provider_label = get_label(target_provider)
|
||||
if target_provider.startswith("custom:"):
|
||||
custom_pdef = resolve_provider_full(
|
||||
target_provider,
|
||||
user_providers,
|
||||
custom_providers,
|
||||
)
|
||||
if custom_pdef is not None:
|
||||
provider_label = custom_pdef.name
|
||||
|
||||
# --- Resolve credentials ---
|
||||
api_key = current_api_key
|
||||
|
|
@ -708,6 +726,7 @@ def switch_model(
|
|||
def list_authenticated_providers(
|
||||
current_provider: str = "",
|
||||
user_providers: dict = None,
|
||||
custom_providers: list | None = None,
|
||||
max_models: int = 8,
|
||||
) -> List[dict]:
|
||||
"""Detect which providers have credentials and list their curated models.
|
||||
|
|
@ -790,42 +809,69 @@ def list_authenticated_providers(
|
|||
})
|
||||
seen_slugs.add(slug)
|
||||
|
||||
# --- 2. Check Hermes-only providers (nous, openai-codex, copilot) ---
|
||||
# --- 2. Check Hermes-only providers (nous, openai-codex, copilot, opencode-go) ---
|
||||
from hermes_cli.providers import HERMES_OVERLAYS
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY as _auth_registry
|
||||
|
||||
# Build reverse mapping: models.dev ID → Hermes provider ID.
|
||||
# HERMES_OVERLAYS keys may be models.dev IDs (e.g. "github-copilot")
|
||||
# while _PROVIDER_MODELS and config.yaml use Hermes IDs ("copilot").
|
||||
_mdev_to_hermes = {v: k for k, v in PROVIDER_TO_MODELS_DEV.items()}
|
||||
|
||||
for pid, overlay in HERMES_OVERLAYS.items():
|
||||
if pid in seen_slugs:
|
||||
continue
|
||||
|
||||
# Resolve Hermes slug — e.g. "github-copilot" → "copilot"
|
||||
hermes_slug = _mdev_to_hermes.get(pid, pid)
|
||||
if hermes_slug in seen_slugs:
|
||||
continue
|
||||
|
||||
# Check if credentials exist
|
||||
has_creds = False
|
||||
if overlay.extra_env_vars:
|
||||
has_creds = any(os.environ.get(ev) for ev in overlay.extra_env_vars)
|
||||
if overlay.auth_type in ("oauth_device_code", "oauth_external", "external_process"):
|
||||
# Also check api_key_env_vars from PROVIDER_REGISTRY for api_key auth_type
|
||||
if not has_creds and overlay.auth_type == "api_key":
|
||||
for _key in (pid, hermes_slug):
|
||||
pcfg = _auth_registry.get(_key)
|
||||
if pcfg and pcfg.api_key_env_vars:
|
||||
if any(os.environ.get(ev) for ev in pcfg.api_key_env_vars):
|
||||
has_creds = True
|
||||
break
|
||||
if not has_creds and overlay.auth_type in ("oauth_device_code", "oauth_external", "external_process"):
|
||||
# These use auth stores, not env vars — check for auth.json entries
|
||||
try:
|
||||
from hermes_cli.auth import _load_auth_store
|
||||
store = _load_auth_store()
|
||||
if store and (pid in store.get("providers", {}) or pid in store.get("credential_pool", {})):
|
||||
providers_store = store.get("providers", {})
|
||||
pool_store = store.get("credential_pool", {})
|
||||
if store and (
|
||||
pid in providers_store or hermes_slug in providers_store
|
||||
or pid in pool_store or hermes_slug in pool_store
|
||||
):
|
||||
has_creds = True
|
||||
except Exception as exc:
|
||||
logger.debug("Auth store check failed for %s: %s", pid, exc)
|
||||
if not has_creds:
|
||||
continue
|
||||
|
||||
# Use curated list
|
||||
model_ids = curated.get(pid, [])
|
||||
# Use curated list — look up by Hermes slug, fall back to overlay key
|
||||
model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
|
||||
total = len(model_ids)
|
||||
top = model_ids[:max_models]
|
||||
|
||||
results.append({
|
||||
"slug": pid,
|
||||
"name": get_label(pid),
|
||||
"is_current": pid == current_provider,
|
||||
"slug": hermes_slug,
|
||||
"name": get_label(hermes_slug),
|
||||
"is_current": hermes_slug == current_provider or pid == current_provider,
|
||||
"is_user_defined": False,
|
||||
"models": top,
|
||||
"total_models": total,
|
||||
"source": "hermes",
|
||||
})
|
||||
seen_slugs.add(pid)
|
||||
seen_slugs.add(hermes_slug)
|
||||
|
||||
# --- 3. User-defined endpoints from config ---
|
||||
if user_providers and isinstance(user_providers, dict):
|
||||
|
|
@ -853,80 +899,46 @@ def list_authenticated_providers(
|
|||
"api_url": api_url,
|
||||
})
|
||||
|
||||
# --- 4. Saved custom providers from config ---
|
||||
if custom_providers and isinstance(custom_providers, list):
|
||||
for entry in custom_providers:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
|
||||
display_name = (entry.get("name") or "").strip()
|
||||
api_url = (
|
||||
entry.get("base_url", "")
|
||||
or entry.get("url", "")
|
||||
or entry.get("api", "")
|
||||
or ""
|
||||
).strip()
|
||||
if not display_name or not api_url:
|
||||
continue
|
||||
|
||||
slug = custom_provider_slug(display_name)
|
||||
if slug in seen_slugs:
|
||||
continue
|
||||
|
||||
models_list = []
|
||||
default_model = (entry.get("model") or "").strip()
|
||||
if default_model:
|
||||
models_list.append(default_model)
|
||||
|
||||
results.append({
|
||||
"slug": slug,
|
||||
"name": display_name,
|
||||
"is_current": slug == current_provider,
|
||||
"is_user_defined": True,
|
||||
"models": models_list,
|
||||
"total_models": len(models_list),
|
||||
"source": "user-config",
|
||||
"api_url": api_url,
|
||||
})
|
||||
seen_slugs.add(slug)
|
||||
|
||||
# Sort: current provider first, then by model count descending
|
||||
results.sort(key=lambda r: (not r["is_current"], -r["total_models"]))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fuzzy suggestions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def suggest_models(raw_input: str, limit: int = 3) -> List[str]:
|
||||
"""Return fuzzy model suggestions for a (possibly misspelled) input."""
|
||||
query = raw_input.strip()
|
||||
if not query:
|
||||
return []
|
||||
|
||||
results = search_models_dev(query, limit=limit)
|
||||
suggestions: list[str] = []
|
||||
for r in results:
|
||||
mid = r.get("model_id", "")
|
||||
if mid:
|
||||
suggestions.append(mid)
|
||||
|
||||
return suggestions[:limit]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Custom provider switch
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def switch_to_custom_provider() -> CustomAutoResult:
|
||||
"""Handle bare '/model --provider custom' — resolve endpoint and auto-detect model."""
|
||||
from hermes_cli.runtime_provider import (
|
||||
resolve_runtime_provider,
|
||||
_auto_detect_local_model,
|
||||
)
|
||||
|
||||
try:
|
||||
runtime = resolve_runtime_provider(requested="custom")
|
||||
except Exception as e:
|
||||
return CustomAutoResult(
|
||||
success=False,
|
||||
error_message=f"Could not resolve custom endpoint: {e}",
|
||||
)
|
||||
|
||||
cust_base = runtime.get("base_url", "")
|
||||
cust_key = runtime.get("api_key", "")
|
||||
|
||||
if not cust_base or "openrouter.ai" in cust_base:
|
||||
return CustomAutoResult(
|
||||
success=False,
|
||||
error_message=(
|
||||
"No custom endpoint configured. "
|
||||
"Set model.base_url in config.yaml, or set OPENAI_BASE_URL "
|
||||
"in .env, or run: hermes setup -> Custom OpenAI-compatible endpoint"
|
||||
),
|
||||
)
|
||||
|
||||
detected_model = _auto_detect_local_model(cust_base)
|
||||
if not detected_model:
|
||||
return CustomAutoResult(
|
||||
success=False,
|
||||
base_url=cust_base,
|
||||
api_key=cust_key,
|
||||
error_message=(
|
||||
f"Custom endpoint at {cust_base} is reachable but no single "
|
||||
f"model was auto-detected. Specify the model explicitly: "
|
||||
f"/model <model-name> --provider custom"
|
||||
),
|
||||
)
|
||||
|
||||
return CustomAutoResult(
|
||||
success=True,
|
||||
model=detected_model,
|
||||
base_url=cust_base,
|
||||
api_key=cust_key,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -20,22 +20,20 @@ COPILOT_EDITOR_VERSION = "vscode/1.104.1"
|
|||
COPILOT_REASONING_EFFORTS_GPT5 = ["minimal", "low", "medium", "high"]
|
||||
COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
|
||||
|
||||
# Backward-compatible aliases for the earlier GitHub Models-backed Copilot work.
|
||||
GITHUB_MODELS_BASE_URL = COPILOT_BASE_URL
|
||||
GITHUB_MODELS_CATALOG_URL = COPILOT_MODELS_URL
|
||||
|
||||
# Fallback OpenRouter snapshot used when the live catalog is unavailable.
|
||||
# (model_id, display description shown in menus)
|
||||
OPENROUTER_MODELS: list[tuple[str, str]] = [
|
||||
("anthropic/claude-opus-4.6", "recommended"),
|
||||
("anthropic/claude-sonnet-4.6", ""),
|
||||
("qwen/qwen3.6-plus:free", "free"),
|
||||
("qwen/qwen3.6-plus", ""),
|
||||
("anthropic/claude-sonnet-4.5", ""),
|
||||
("anthropic/claude-haiku-4.5", ""),
|
||||
("openai/gpt-5.4", ""),
|
||||
("openai/gpt-5.4-mini", ""),
|
||||
("xiaomi/mimo-v2-pro", ""),
|
||||
("openai/gpt-5.3-codex", ""),
|
||||
("google/gemini-3-pro-preview", ""),
|
||||
("google/gemini-3-pro-image-preview", ""),
|
||||
("google/gemini-3-flash-preview", ""),
|
||||
("google/gemini-3.1-pro-preview", ""),
|
||||
("google/gemini-3.1-flash-lite-preview", ""),
|
||||
|
|
@ -47,7 +45,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
|
|||
("z-ai/glm-5.1", ""),
|
||||
("z-ai/glm-5-turbo", ""),
|
||||
("moonshotai/kimi-k2.5", ""),
|
||||
("x-ai/grok-4.20-beta", ""),
|
||||
("x-ai/grok-4.20", ""),
|
||||
("nvidia/nemotron-3-super-120b-a12b", ""),
|
||||
("nvidia/nemotron-3-super-120b-a12b:free", "free"),
|
||||
("arcee-ai/trinity-large-preview:free", "free"),
|
||||
|
|
@ -56,6 +54,8 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
|
|||
("openai/gpt-5.4-nano", ""),
|
||||
]
|
||||
|
||||
_openrouter_catalog_cache: list[tuple[str, str]] | None = None
|
||||
|
||||
_PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"nous": [
|
||||
"anthropic/claude-opus-4.6",
|
||||
|
|
@ -87,6 +87,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
|||
"openai/gpt-5.4-nano",
|
||||
],
|
||||
"openai-codex": [
|
||||
"gpt-5.4",
|
||||
"gpt-5.4-mini",
|
||||
"gpt-5.3-codex",
|
||||
"gpt-5.2-codex",
|
||||
"gpt-5.1-codex-mini",
|
||||
|
|
@ -129,6 +131,19 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
|||
"glm-4.5",
|
||||
"glm-4.5-flash",
|
||||
],
|
||||
"xai": [
|
||||
"grok-4.20-0309-reasoning",
|
||||
"grok-4.20-0309-non-reasoning",
|
||||
"grok-4.20-multi-agent-0309",
|
||||
"grok-4-1-fast-reasoning",
|
||||
"grok-4-1-fast-non-reasoning",
|
||||
"grok-4-fast-reasoning",
|
||||
"grok-4-fast-non-reasoning",
|
||||
"grok-4-0709",
|
||||
"grok-code-fast-1",
|
||||
"grok-3",
|
||||
"grok-3-mini",
|
||||
],
|
||||
"kimi-coding": [
|
||||
"kimi-for-coding",
|
||||
"kimi-k2.5",
|
||||
|
|
@ -144,22 +159,16 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
|||
"kimi-k2-0905-preview",
|
||||
],
|
||||
"minimax": [
|
||||
"MiniMax-M1",
|
||||
"MiniMax-M1-40k",
|
||||
"MiniMax-M1-80k",
|
||||
"MiniMax-M1-128k",
|
||||
"MiniMax-M1-256k",
|
||||
"MiniMax-M2.5",
|
||||
"MiniMax-M2.7",
|
||||
"MiniMax-M2.5",
|
||||
"MiniMax-M2.1",
|
||||
"MiniMax-M2",
|
||||
],
|
||||
"minimax-cn": [
|
||||
"MiniMax-M1",
|
||||
"MiniMax-M1-40k",
|
||||
"MiniMax-M1-80k",
|
||||
"MiniMax-M1-128k",
|
||||
"MiniMax-M1-256k",
|
||||
"MiniMax-M2.5",
|
||||
"MiniMax-M2.7",
|
||||
"MiniMax-M2.5",
|
||||
"MiniMax-M2.1",
|
||||
"MiniMax-M2",
|
||||
],
|
||||
"anthropic": [
|
||||
"claude-opus-4-6",
|
||||
|
|
@ -416,12 +425,6 @@ _FREE_TIER_CACHE_TTL: int = 180 # seconds (3 minutes)
|
|||
_free_tier_cache: tuple[bool, float] | None = None # (result, timestamp)
|
||||
|
||||
|
||||
def clear_nous_free_tier_cache() -> None:
|
||||
"""Invalidate the cached free-tier result (e.g. after login/logout)."""
|
||||
global _free_tier_cache
|
||||
_free_tier_cache = None
|
||||
|
||||
|
||||
def check_nous_free_tier() -> bool:
|
||||
"""Check if the current Nous Portal user is on a free (unpaid) tier.
|
||||
|
||||
|
|
@ -530,19 +533,84 @@ _PROVIDER_ALIASES = {
|
|||
}
|
||||
|
||||
|
||||
def model_ids() -> list[str]:
|
||||
def _openrouter_model_is_free(pricing: Any) -> bool:
|
||||
"""Return True when both prompt and completion pricing are zero."""
|
||||
if not isinstance(pricing, dict):
|
||||
return False
|
||||
try:
|
||||
return float(pricing.get("prompt", "0")) == 0 and float(pricing.get("completion", "0")) == 0
|
||||
except (TypeError, ValueError):
|
||||
return False
|
||||
|
||||
|
||||
def fetch_openrouter_models(
|
||||
timeout: float = 8.0,
|
||||
*,
|
||||
force_refresh: bool = False,
|
||||
) -> list[tuple[str, str]]:
|
||||
"""Return the curated OpenRouter picker list, refreshed from the live catalog when possible."""
|
||||
global _openrouter_catalog_cache
|
||||
|
||||
if _openrouter_catalog_cache is not None and not force_refresh:
|
||||
return list(_openrouter_catalog_cache)
|
||||
|
||||
fallback = list(OPENROUTER_MODELS)
|
||||
preferred_ids = [mid for mid, _ in fallback]
|
||||
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
"https://openrouter.ai/api/v1/models",
|
||||
headers={"Accept": "application/json"},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
payload = json.loads(resp.read().decode())
|
||||
except Exception:
|
||||
return list(_openrouter_catalog_cache or fallback)
|
||||
|
||||
live_items = payload.get("data", [])
|
||||
if not isinstance(live_items, list):
|
||||
return list(_openrouter_catalog_cache or fallback)
|
||||
|
||||
live_by_id: dict[str, dict[str, Any]] = {}
|
||||
for item in live_items:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
mid = str(item.get("id") or "").strip()
|
||||
if not mid:
|
||||
continue
|
||||
live_by_id[mid] = item
|
||||
|
||||
curated: list[tuple[str, str]] = []
|
||||
for preferred_id in preferred_ids:
|
||||
live_item = live_by_id.get(preferred_id)
|
||||
if live_item is None:
|
||||
continue
|
||||
desc = "free" if _openrouter_model_is_free(live_item.get("pricing")) else ""
|
||||
curated.append((preferred_id, desc))
|
||||
|
||||
if not curated:
|
||||
return list(_openrouter_catalog_cache or fallback)
|
||||
|
||||
first_id, _ = curated[0]
|
||||
curated[0] = (first_id, "recommended")
|
||||
_openrouter_catalog_cache = curated
|
||||
return list(curated)
|
||||
|
||||
|
||||
def model_ids(*, force_refresh: bool = False) -> list[str]:
|
||||
"""Return just the OpenRouter model-id strings."""
|
||||
return [mid for mid, _ in OPENROUTER_MODELS]
|
||||
return [mid for mid, _ in fetch_openrouter_models(force_refresh=force_refresh)]
|
||||
|
||||
|
||||
def menu_labels() -> list[str]:
|
||||
def menu_labels(*, force_refresh: bool = False) -> list[str]:
|
||||
"""Return display labels like 'anthropic/claude-opus-4.6 (recommended)'."""
|
||||
labels = []
|
||||
for mid, desc in OPENROUTER_MODELS:
|
||||
for mid, desc in fetch_openrouter_models(force_refresh=force_refresh):
|
||||
labels.append(f"{mid} ({desc})" if desc else mid)
|
||||
return labels
|
||||
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pricing helpers — fetch live pricing from OpenRouter-compatible /v1/models
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -575,31 +643,6 @@ def _format_price_per_mtok(per_token_str: str) -> str:
|
|||
return f"${per_m:.2f}"
|
||||
|
||||
|
||||
def format_pricing_label(pricing: dict[str, str] | None) -> str:
|
||||
"""Build a compact pricing label like 'in $3 · out $15 · cache $0.30/Mtok'.
|
||||
|
||||
Returns empty string when pricing is unavailable.
|
||||
"""
|
||||
if not pricing:
|
||||
return ""
|
||||
prompt_price = pricing.get("prompt", "")
|
||||
completion_price = pricing.get("completion", "")
|
||||
if not prompt_price and not completion_price:
|
||||
return ""
|
||||
inp = _format_price_per_mtok(prompt_price)
|
||||
out = _format_price_per_mtok(completion_price)
|
||||
if inp == "free" and out == "free":
|
||||
return "free"
|
||||
cache_read = pricing.get("input_cache_read", "")
|
||||
cache_str = _format_price_per_mtok(cache_read) if cache_read else ""
|
||||
if inp == out and not cache_str:
|
||||
return f"{inp}/Mtok"
|
||||
parts = [f"in {inp}", f"out {out}"]
|
||||
if cache_str and cache_str != "?" and cache_str != inp:
|
||||
parts.append(f"cache {cache_str}")
|
||||
return " · ".join(parts) + "/Mtok"
|
||||
|
||||
|
||||
def format_model_pricing_table(
|
||||
models: list[tuple[str, str]],
|
||||
pricing_map: dict[str, dict[str, str]],
|
||||
|
|
@ -727,13 +770,14 @@ def _resolve_nous_pricing_credentials() -> tuple[str, str]:
|
|||
return ("", "")
|
||||
|
||||
|
||||
def get_pricing_for_provider(provider: str) -> dict[str, dict[str, str]]:
|
||||
def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> dict[str, dict[str, str]]:
|
||||
"""Return live pricing for providers that support it (openrouter, nous)."""
|
||||
normalized = normalize_provider(provider)
|
||||
if normalized == "openrouter":
|
||||
return fetch_models_with_pricing(
|
||||
api_key=_resolve_openrouter_api_key(),
|
||||
base_url="https://openrouter.ai/api",
|
||||
force_refresh=force_refresh,
|
||||
)
|
||||
if normalized == "nous":
|
||||
api_key, base_url = _resolve_nous_pricing_credentials()
|
||||
|
|
@ -746,6 +790,7 @@ def get_pricing_for_provider(provider: str) -> dict[str, dict[str, str]]:
|
|||
return fetch_models_with_pricing(
|
||||
api_key=api_key,
|
||||
base_url=stripped,
|
||||
force_refresh=force_refresh,
|
||||
)
|
||||
return {}
|
||||
|
||||
|
|
@ -854,7 +899,11 @@ def _get_custom_base_url() -> str:
|
|||
return ""
|
||||
|
||||
|
||||
def curated_models_for_provider(provider: Optional[str]) -> list[tuple[str, str]]:
|
||||
def curated_models_for_provider(
|
||||
provider: Optional[str],
|
||||
*,
|
||||
force_refresh: bool = False,
|
||||
) -> list[tuple[str, str]]:
|
||||
"""Return ``(model_id, description)`` tuples for a provider's model list.
|
||||
|
||||
Tries to fetch the live model list from the provider's API first,
|
||||
|
|
@ -863,7 +912,7 @@ def curated_models_for_provider(provider: Optional[str]) -> list[tuple[str, str]
|
|||
"""
|
||||
normalized = normalize_provider(provider)
|
||||
if normalized == "openrouter":
|
||||
return list(OPENROUTER_MODELS)
|
||||
return fetch_openrouter_models(force_refresh=force_refresh)
|
||||
|
||||
# Try live API first (Codex, Nous, etc. all support /models)
|
||||
live = provider_model_ids(normalized)
|
||||
|
|
@ -982,12 +1031,12 @@ def _find_openrouter_slug(model_name: str) -> Optional[str]:
|
|||
return None
|
||||
|
||||
# Exact match (already has provider/ prefix)
|
||||
for mid, _ in OPENROUTER_MODELS:
|
||||
for mid in model_ids():
|
||||
if name_lower == mid.lower():
|
||||
return mid
|
||||
|
||||
# Try matching just the model part (after the /)
|
||||
for mid, _ in OPENROUTER_MODELS:
|
||||
for mid in model_ids():
|
||||
if "/" in mid:
|
||||
_, model_part = mid.split("/", 1)
|
||||
if name_lower == model_part.lower():
|
||||
|
|
@ -1017,6 +1066,79 @@ def provider_label(provider: Optional[str]) -> str:
|
|||
return _PROVIDER_LABELS.get(normalized, original or "OpenRouter")
|
||||
|
||||
|
||||
# Models that support OpenAI Priority Processing (service_tier="priority").
|
||||
# See https://openai.com/api-priority-processing/ for the canonical list.
|
||||
# Only the bare model slug is stored (no vendor prefix).
|
||||
_PRIORITY_PROCESSING_MODELS: frozenset[str] = frozenset({
|
||||
"gpt-5.4",
|
||||
"gpt-5.4-mini",
|
||||
"gpt-5.2",
|
||||
"gpt-5.1",
|
||||
"gpt-5",
|
||||
"gpt-5-mini",
|
||||
"gpt-4.1",
|
||||
"gpt-4.1-mini",
|
||||
"gpt-4.1-nano",
|
||||
"gpt-4o",
|
||||
"gpt-4o-mini",
|
||||
"o3",
|
||||
"o4-mini",
|
||||
})
|
||||
|
||||
# Models that support Anthropic Fast Mode (speed="fast").
|
||||
# See https://platform.claude.com/docs/en/build-with-claude/fast-mode
|
||||
# Currently only Claude Opus 4.6. Both hyphen and dot variants are stored
|
||||
# to handle native Anthropic (claude-opus-4-6) and OpenRouter (claude-opus-4.6).
|
||||
_ANTHROPIC_FAST_MODE_MODELS: frozenset[str] = frozenset({
|
||||
"claude-opus-4-6",
|
||||
"claude-opus-4.6",
|
||||
})
|
||||
|
||||
|
||||
def _strip_vendor_prefix(model_id: str) -> str:
|
||||
"""Strip vendor/ prefix from a model ID (e.g. 'anthropic/claude-opus-4-6' -> 'claude-opus-4-6')."""
|
||||
raw = str(model_id or "").strip().lower()
|
||||
if "/" in raw:
|
||||
raw = raw.split("/", 1)[1]
|
||||
return raw
|
||||
|
||||
|
||||
def model_supports_fast_mode(model_id: Optional[str]) -> bool:
|
||||
"""Return whether Hermes should expose the /fast toggle for this model."""
|
||||
raw = _strip_vendor_prefix(str(model_id or ""))
|
||||
if raw in _PRIORITY_PROCESSING_MODELS:
|
||||
return True
|
||||
# Anthropic fast mode — strip date suffixes (e.g. claude-opus-4-6-20260401)
|
||||
# and OpenRouter variant tags (:fast, :beta) for matching.
|
||||
base = raw.split(":")[0]
|
||||
return base in _ANTHROPIC_FAST_MODE_MODELS
|
||||
|
||||
|
||||
def _is_anthropic_fast_model(model_id: Optional[str]) -> bool:
|
||||
"""Return True if the model supports Anthropic's fast mode (speed='fast')."""
|
||||
raw = _strip_vendor_prefix(str(model_id or ""))
|
||||
base = raw.split(":")[0]
|
||||
return base in _ANTHROPIC_FAST_MODE_MODELS
|
||||
|
||||
|
||||
def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | None:
|
||||
"""Return request_overrides for fast/priority mode, or None if unsupported.
|
||||
|
||||
Returns provider-appropriate overrides:
|
||||
- OpenAI models: ``{"service_tier": "priority"}`` (Priority Processing)
|
||||
- Anthropic models: ``{"speed": "fast"}`` (Anthropic Fast Mode beta)
|
||||
|
||||
The overrides are injected into the API request kwargs by
|
||||
``_build_api_kwargs`` in run_agent.py — each API path handles its own
|
||||
keys (service_tier for OpenAI/Codex, speed for Anthropic Messages).
|
||||
"""
|
||||
if not model_supports_fast_mode(model_id):
|
||||
return None
|
||||
if _is_anthropic_fast_model(model_id):
|
||||
return {"speed": "fast"}
|
||||
return {"service_tier": "priority"}
|
||||
|
||||
|
||||
def _resolve_copilot_catalog_api_key() -> str:
|
||||
"""Best-effort GitHub token for fetching the Copilot model catalog."""
|
||||
try:
|
||||
|
|
@ -1028,7 +1150,7 @@ def _resolve_copilot_catalog_api_key() -> str:
|
|||
return ""
|
||||
|
||||
|
||||
def provider_model_ids(provider: Optional[str]) -> list[str]:
|
||||
def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False) -> list[str]:
|
||||
"""Return the best known model catalog for a provider.
|
||||
|
||||
Tries live API endpoints for providers that support them (Codex, Nous),
|
||||
|
|
@ -1036,7 +1158,7 @@ def provider_model_ids(provider: Optional[str]) -> list[str]:
|
|||
"""
|
||||
normalized = normalize_provider(provider)
|
||||
if normalized == "openrouter":
|
||||
return model_ids()
|
||||
return model_ids(force_refresh=force_refresh)
|
||||
if normalized == "openai-codex":
|
||||
from hermes_cli.codex_models import get_codex_model_ids
|
||||
|
||||
|
|
|
|||
|
|
@ -143,6 +143,7 @@ def _tts_label(current_provider: str) -> str:
|
|||
"openai": "OpenAI TTS",
|
||||
"elevenlabs": "ElevenLabs",
|
||||
"edge": "Edge TTS",
|
||||
"mistral": "Mistral Voxtral TTS",
|
||||
"neutts": "NeuTTS",
|
||||
}
|
||||
return mapping.get(current_provider or "edge", current_provider or "Edge TTS")
|
||||
|
|
@ -309,6 +310,7 @@ def get_nous_subscription_features(
|
|||
tts_current_provider in {"edge", "neutts"}
|
||||
or (tts_current_provider == "openai" and (managed_tts_available or direct_openai_tts))
|
||||
or (tts_current_provider == "elevenlabs" and direct_elevenlabs)
|
||||
or (tts_current_provider == "mistral" and bool(get_env_value("MISTRAL_API_KEY")))
|
||||
)
|
||||
tts_active = bool(tts_tool_enabled and tts_available)
|
||||
|
||||
|
|
|
|||
|
|
@ -201,8 +201,7 @@ class PluginContext:
|
|||
|
||||
The *setup_fn* receives an argparse subparser and should add any
|
||||
arguments/sub-subparsers. If *handler_fn* is provided it is set
|
||||
as the default dispatch function via ``set_defaults(func=...)``.
|
||||
"""
|
||||
as the default dispatch function via ``set_defaults(func=...)``."""
|
||||
self._manager._cli_commands[name] = {
|
||||
"name": name,
|
||||
"help": help,
|
||||
|
|
@ -213,6 +212,38 @@ class PluginContext:
|
|||
}
|
||||
logger.debug("Plugin %s registered CLI command: %s", self.manifest.name, name)
|
||||
|
||||
# -- context engine registration -----------------------------------------
|
||||
|
||||
def register_context_engine(self, engine) -> None:
|
||||
"""Register a context engine to replace the built-in ContextCompressor.
|
||||
|
||||
Only one context engine plugin is allowed. If a second plugin tries
|
||||
to register one, it is rejected with a warning.
|
||||
|
||||
The engine must be an instance of ``agent.context_engine.ContextEngine``.
|
||||
"""
|
||||
if self._manager._context_engine is not None:
|
||||
logger.warning(
|
||||
"Plugin '%s' tried to register a context engine, but one is "
|
||||
"already registered. Only one context engine plugin is allowed.",
|
||||
self.manifest.name,
|
||||
)
|
||||
return
|
||||
# Defer the import to avoid circular deps at module level
|
||||
from agent.context_engine import ContextEngine
|
||||
if not isinstance(engine, ContextEngine):
|
||||
logger.warning(
|
||||
"Plugin '%s' tried to register a context engine that does not "
|
||||
"inherit from ContextEngine. Ignoring.",
|
||||
self.manifest.name,
|
||||
)
|
||||
return
|
||||
self._manager._context_engine = engine
|
||||
logger.info(
|
||||
"Plugin '%s' registered context engine: %s",
|
||||
self.manifest.name, engine.name,
|
||||
)
|
||||
|
||||
# -- hook registration --------------------------------------------------
|
||||
|
||||
def register_hook(self, hook_name: str, callback: Callable) -> None:
|
||||
|
|
@ -245,6 +276,7 @@ class PluginManager:
|
|||
self._hooks: Dict[str, List[Callable]] = {}
|
||||
self._plugin_tool_names: Set[str] = set()
|
||||
self._cli_commands: Dict[str, dict] = {}
|
||||
self._context_engine = None # Set by a plugin via register_context_engine()
|
||||
self._discovered: bool = False
|
||||
self._cli_ref = None # Set by CLI after plugin discovery
|
||||
|
||||
|
|
@ -566,6 +598,11 @@ def get_plugin_cli_commands() -> Dict[str, dict]:
|
|||
return dict(get_plugin_manager()._cli_commands)
|
||||
|
||||
|
||||
def get_plugin_context_engine():
|
||||
"""Return the plugin-registered context engine, or None."""
|
||||
return get_plugin_manager()._context_engine
|
||||
|
||||
|
||||
def get_plugin_toolsets() -> List[tuple]:
|
||||
"""Return plugin toolsets as ``(key, label, description)`` tuples.
|
||||
|
||||
|
|
|
|||
|
|
@ -531,7 +531,7 @@ def cmd_disable(name: str) -> None:
|
|||
|
||||
disabled.add(name)
|
||||
_save_disabled_set(disabled)
|
||||
console.print(f"[yellow]⊘[/yellow] Plugin [bold]{name}[/bold] disabled. Takes effect on next session.")
|
||||
console.print(f"[yellow]\u2298[/yellow] Plugin [bold]{name}[/bold] disabled. Takes effect on next session.")
|
||||
|
||||
|
||||
def cmd_list() -> None:
|
||||
|
|
@ -594,8 +594,152 @@ def cmd_list() -> None:
|
|||
console.print("[dim]Enable/disable:[/dim] hermes plugins enable/disable <name>")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Provider plugin discovery helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _discover_memory_providers() -> list[tuple[str, str]]:
|
||||
"""Return [(name, description), ...] for available memory providers."""
|
||||
try:
|
||||
from plugins.memory import discover_memory_providers
|
||||
return [(name, desc) for name, desc, _avail in discover_memory_providers()]
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
def _discover_context_engines() -> list[tuple[str, str]]:
|
||||
"""Return [(name, description), ...] for available context engines."""
|
||||
try:
|
||||
from plugins.context_engine import discover_context_engines
|
||||
return [(name, desc) for name, desc, _avail in discover_context_engines()]
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
def _get_current_memory_provider() -> str:
|
||||
"""Return the current memory.provider from config (empty = built-in)."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
config = load_config()
|
||||
return config.get("memory", {}).get("provider", "") or ""
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def _get_current_context_engine() -> str:
|
||||
"""Return the current context.engine from config."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
config = load_config()
|
||||
return config.get("context", {}).get("engine", "compressor") or "compressor"
|
||||
except Exception:
|
||||
return "compressor"
|
||||
|
||||
|
||||
def _save_memory_provider(name: str) -> None:
|
||||
"""Persist memory.provider to config.yaml."""
|
||||
from hermes_cli.config import load_config, save_config
|
||||
config = load_config()
|
||||
if "memory" not in config:
|
||||
config["memory"] = {}
|
||||
config["memory"]["provider"] = name
|
||||
save_config(config)
|
||||
|
||||
|
||||
def _save_context_engine(name: str) -> None:
|
||||
"""Persist context.engine to config.yaml."""
|
||||
from hermes_cli.config import load_config, save_config
|
||||
config = load_config()
|
||||
if "context" not in config:
|
||||
config["context"] = {}
|
||||
config["context"]["engine"] = name
|
||||
save_config(config)
|
||||
|
||||
|
||||
def _configure_memory_provider() -> bool:
|
||||
"""Launch a radio picker for memory providers. Returns True if changed."""
|
||||
from hermes_cli.curses_ui import curses_radiolist
|
||||
|
||||
current = _get_current_memory_provider()
|
||||
providers = _discover_memory_providers()
|
||||
|
||||
# Build items: "built-in" first, then discovered providers
|
||||
items = ["built-in (default)"]
|
||||
names = [""] # empty string = built-in
|
||||
selected = 0
|
||||
|
||||
for name, desc in providers:
|
||||
names.append(name)
|
||||
label = f"{name} \u2014 {desc}" if desc else name
|
||||
items.append(label)
|
||||
if name == current:
|
||||
selected = len(items) - 1
|
||||
|
||||
# If current provider isn't in discovered list, add it
|
||||
if current and current not in names:
|
||||
names.append(current)
|
||||
items.append(f"{current} (not found)")
|
||||
selected = len(items) - 1
|
||||
|
||||
choice = curses_radiolist(
|
||||
title="Memory Provider (select one)",
|
||||
items=items,
|
||||
selected=selected,
|
||||
)
|
||||
|
||||
new_provider = names[choice]
|
||||
if new_provider != current:
|
||||
_save_memory_provider(new_provider)
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _configure_context_engine() -> bool:
|
||||
"""Launch a radio picker for context engines. Returns True if changed."""
|
||||
from hermes_cli.curses_ui import curses_radiolist
|
||||
|
||||
current = _get_current_context_engine()
|
||||
engines = _discover_context_engines()
|
||||
|
||||
# Build items: "compressor" first (built-in), then discovered engines
|
||||
items = ["compressor (default)"]
|
||||
names = ["compressor"]
|
||||
selected = 0
|
||||
|
||||
for name, desc in engines:
|
||||
names.append(name)
|
||||
label = f"{name} \u2014 {desc}" if desc else name
|
||||
items.append(label)
|
||||
if name == current:
|
||||
selected = len(items) - 1
|
||||
|
||||
# If current engine isn't in discovered list and isn't compressor, add it
|
||||
if current != "compressor" and current not in names:
|
||||
names.append(current)
|
||||
items.append(f"{current} (not found)")
|
||||
selected = len(items) - 1
|
||||
|
||||
choice = curses_radiolist(
|
||||
title="Context Engine (select one)",
|
||||
items=items,
|
||||
selected=selected,
|
||||
)
|
||||
|
||||
new_engine = names[choice]
|
||||
if new_engine != current:
|
||||
_save_context_engine(new_engine)
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Composite plugins UI
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def cmd_toggle() -> None:
|
||||
"""Interactive curses checklist to enable/disable installed plugins."""
|
||||
"""Interactive composite UI — general plugins + provider plugin categories."""
|
||||
from rich.console import Console
|
||||
|
||||
try:
|
||||
|
|
@ -606,18 +750,13 @@ def cmd_toggle() -> None:
|
|||
console = Console()
|
||||
plugins_dir = _plugins_dir()
|
||||
|
||||
# -- General plugins discovery --
|
||||
dirs = sorted(d for d in plugins_dir.iterdir() if d.is_dir())
|
||||
if not dirs:
|
||||
console.print("[dim]No plugins installed.[/dim]")
|
||||
console.print("[dim]Install with:[/dim] hermes plugins install owner/repo")
|
||||
return
|
||||
|
||||
disabled = _get_disabled_set()
|
||||
|
||||
# Build items list: "name — description" for display
|
||||
names = []
|
||||
labels = []
|
||||
selected = set()
|
||||
plugin_names = []
|
||||
plugin_labels = []
|
||||
plugin_selected = set()
|
||||
|
||||
for i, d in enumerate(dirs):
|
||||
manifest_file = d / "plugin.yaml"
|
||||
|
|
@ -633,36 +772,335 @@ def cmd_toggle() -> None:
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
names.append(name)
|
||||
label = f"{name} — {description}" if description else name
|
||||
labels.append(label)
|
||||
plugin_names.append(name)
|
||||
label = f"{name} \u2014 {description}" if description else name
|
||||
plugin_labels.append(label)
|
||||
|
||||
if name not in disabled and d.name not in disabled:
|
||||
selected.add(i)
|
||||
plugin_selected.add(i)
|
||||
|
||||
from hermes_cli.curses_ui import curses_checklist
|
||||
# -- Provider categories --
|
||||
current_memory = _get_current_memory_provider() or "built-in"
|
||||
current_context = _get_current_context_engine()
|
||||
categories = [
|
||||
("Memory Provider", current_memory, _configure_memory_provider),
|
||||
("Context Engine", current_context, _configure_context_engine),
|
||||
]
|
||||
|
||||
result = curses_checklist(
|
||||
title="Plugins — toggle enabled/disabled",
|
||||
items=labels,
|
||||
selected=selected,
|
||||
)
|
||||
has_plugins = bool(plugin_names)
|
||||
has_categories = bool(categories)
|
||||
|
||||
# Compute new disabled set from deselected items
|
||||
if not has_plugins and not has_categories:
|
||||
console.print("[dim]No plugins installed and no provider categories available.[/dim]")
|
||||
console.print("[dim]Install with:[/dim] hermes plugins install owner/repo")
|
||||
return
|
||||
|
||||
# Non-TTY fallback
|
||||
if not sys.stdin.isatty():
|
||||
console.print("[dim]Interactive mode requires a terminal.[/dim]")
|
||||
return
|
||||
|
||||
# Launch the composite curses UI
|
||||
try:
|
||||
import curses
|
||||
_run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
|
||||
disabled, categories, console)
|
||||
except ImportError:
|
||||
_run_composite_fallback(plugin_names, plugin_labels, plugin_selected,
|
||||
disabled, categories, console)
|
||||
|
||||
|
||||
def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
|
||||
disabled, categories, console):
|
||||
"""Custom curses screen with checkboxes + category action rows."""
|
||||
from hermes_cli.curses_ui import flush_stdin
|
||||
|
||||
chosen = set(plugin_selected)
|
||||
n_plugins = len(plugin_names)
|
||||
# Total rows: plugins + separator + categories
|
||||
# separator is not navigable
|
||||
n_categories = len(categories)
|
||||
total_items = n_plugins + n_categories # navigable items
|
||||
|
||||
result_holder = {"plugins_changed": False, "providers_changed": False}
|
||||
|
||||
def _draw(stdscr):
|
||||
curses.curs_set(0)
|
||||
if curses.has_colors():
|
||||
curses.start_color()
|
||||
curses.use_default_colors()
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1)
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1)
|
||||
curses.init_pair(3, curses.COLOR_CYAN, -1)
|
||||
curses.init_pair(4, 8, -1) # dim gray
|
||||
cursor = 0
|
||||
scroll_offset = 0
|
||||
|
||||
while True:
|
||||
stdscr.clear()
|
||||
max_y, max_x = stdscr.getmaxyx()
|
||||
|
||||
# Header
|
||||
try:
|
||||
hattr = curses.A_BOLD
|
||||
if curses.has_colors():
|
||||
hattr |= curses.color_pair(2)
|
||||
stdscr.addnstr(0, 0, "Plugins", max_x - 1, hattr)
|
||||
stdscr.addnstr(
|
||||
1, 0,
|
||||
" \u2191\u2193 navigate SPACE toggle ENTER configure/confirm ESC done",
|
||||
max_x - 1, curses.A_DIM,
|
||||
)
|
||||
except curses.error:
|
||||
pass
|
||||
|
||||
# Build display rows
|
||||
# Row layout:
|
||||
# [plugins section header] (not navigable, skipped in scroll math)
|
||||
# plugin checkboxes (navigable, indices 0..n_plugins-1)
|
||||
# [separator] (not navigable)
|
||||
# [categories section header] (not navigable)
|
||||
# category action rows (navigable, indices n_plugins..total_items-1)
|
||||
|
||||
visible_rows = max_y - 4
|
||||
if cursor < scroll_offset:
|
||||
scroll_offset = cursor
|
||||
elif cursor >= scroll_offset + visible_rows:
|
||||
scroll_offset = cursor - visible_rows + 1
|
||||
|
||||
y = 3 # start drawing after header
|
||||
|
||||
# Determine which items are visible based on scroll
|
||||
# We need to map logical cursor positions to screen rows
|
||||
# accounting for non-navigable separator/headers
|
||||
|
||||
draw_row = 0 # tracks navigable item index
|
||||
|
||||
# --- General Plugins section ---
|
||||
if n_plugins > 0:
|
||||
# Section header
|
||||
if y < max_y - 1:
|
||||
try:
|
||||
sattr = curses.A_BOLD
|
||||
if curses.has_colors():
|
||||
sattr |= curses.color_pair(2)
|
||||
stdscr.addnstr(y, 0, " General Plugins", max_x - 1, sattr)
|
||||
except curses.error:
|
||||
pass
|
||||
y += 1
|
||||
|
||||
for i in range(n_plugins):
|
||||
if y >= max_y - 1:
|
||||
break
|
||||
check = "\u2713" if i in chosen else " "
|
||||
arrow = "\u2192" if i == cursor else " "
|
||||
line = f" {arrow} [{check}] {plugin_labels[i]}"
|
||||
attr = curses.A_NORMAL
|
||||
if i == cursor:
|
||||
attr = curses.A_BOLD
|
||||
if curses.has_colors():
|
||||
attr |= curses.color_pair(1)
|
||||
try:
|
||||
stdscr.addnstr(y, 0, line, max_x - 1, attr)
|
||||
except curses.error:
|
||||
pass
|
||||
y += 1
|
||||
|
||||
# --- Separator ---
|
||||
if y < max_y - 1:
|
||||
y += 1 # blank line
|
||||
|
||||
# --- Provider Plugins section ---
|
||||
if n_categories > 0 and y < max_y - 1:
|
||||
try:
|
||||
sattr = curses.A_BOLD
|
||||
if curses.has_colors():
|
||||
sattr |= curses.color_pair(2)
|
||||
stdscr.addnstr(y, 0, " Provider Plugins", max_x - 1, sattr)
|
||||
except curses.error:
|
||||
pass
|
||||
y += 1
|
||||
|
||||
for ci, (cat_name, cat_current, _cat_fn) in enumerate(categories):
|
||||
if y >= max_y - 1:
|
||||
break
|
||||
cat_idx = n_plugins + ci
|
||||
arrow = "\u2192" if cat_idx == cursor else " "
|
||||
line = f" {arrow} {cat_name:<24} \u25b8 {cat_current}"
|
||||
attr = curses.A_NORMAL
|
||||
if cat_idx == cursor:
|
||||
attr = curses.A_BOLD
|
||||
if curses.has_colors():
|
||||
attr |= curses.color_pair(3)
|
||||
try:
|
||||
stdscr.addnstr(y, 0, line, max_x - 1, attr)
|
||||
except curses.error:
|
||||
pass
|
||||
y += 1
|
||||
|
||||
stdscr.refresh()
|
||||
key = stdscr.getch()
|
||||
|
||||
if key in (curses.KEY_UP, ord("k")):
|
||||
if total_items > 0:
|
||||
cursor = (cursor - 1) % total_items
|
||||
elif key in (curses.KEY_DOWN, ord("j")):
|
||||
if total_items > 0:
|
||||
cursor = (cursor + 1) % total_items
|
||||
elif key == ord(" "):
|
||||
if cursor < n_plugins:
|
||||
# Toggle general plugin
|
||||
chosen.symmetric_difference_update({cursor})
|
||||
else:
|
||||
# Provider category — launch sub-screen
|
||||
ci = cursor - n_plugins
|
||||
if 0 <= ci < n_categories:
|
||||
curses.endwin()
|
||||
_cat_name, _cat_cur, cat_fn = categories[ci]
|
||||
changed = cat_fn()
|
||||
if changed:
|
||||
result_holder["providers_changed"] = True
|
||||
# Refresh current values
|
||||
categories[ci] = (
|
||||
_cat_name,
|
||||
_get_current_memory_provider() or "built-in" if ci == 0
|
||||
else _get_current_context_engine(),
|
||||
cat_fn,
|
||||
)
|
||||
# Re-enter curses
|
||||
stdscr = curses.initscr()
|
||||
curses.noecho()
|
||||
curses.cbreak()
|
||||
stdscr.keypad(True)
|
||||
if curses.has_colors():
|
||||
curses.start_color()
|
||||
curses.use_default_colors()
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1)
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1)
|
||||
curses.init_pair(3, curses.COLOR_CYAN, -1)
|
||||
curses.init_pair(4, 8, -1)
|
||||
curses.curs_set(0)
|
||||
elif key in (curses.KEY_ENTER, 10, 13):
|
||||
if cursor < n_plugins:
|
||||
# ENTER on a plugin checkbox — confirm and exit
|
||||
result_holder["plugins_changed"] = True
|
||||
return
|
||||
else:
|
||||
# ENTER on a category — same as SPACE, launch sub-screen
|
||||
ci = cursor - n_plugins
|
||||
if 0 <= ci < n_categories:
|
||||
curses.endwin()
|
||||
_cat_name, _cat_cur, cat_fn = categories[ci]
|
||||
changed = cat_fn()
|
||||
if changed:
|
||||
result_holder["providers_changed"] = True
|
||||
categories[ci] = (
|
||||
_cat_name,
|
||||
_get_current_memory_provider() or "built-in" if ci == 0
|
||||
else _get_current_context_engine(),
|
||||
cat_fn,
|
||||
)
|
||||
stdscr = curses.initscr()
|
||||
curses.noecho()
|
||||
curses.cbreak()
|
||||
stdscr.keypad(True)
|
||||
if curses.has_colors():
|
||||
curses.start_color()
|
||||
curses.use_default_colors()
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1)
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1)
|
||||
curses.init_pair(3, curses.COLOR_CYAN, -1)
|
||||
curses.init_pair(4, 8, -1)
|
||||
curses.curs_set(0)
|
||||
elif key in (27, ord("q")):
|
||||
# Save plugin changes on exit
|
||||
result_holder["plugins_changed"] = True
|
||||
return
|
||||
|
||||
curses.wrapper(_draw)
|
||||
flush_stdin()
|
||||
|
||||
# Persist general plugin changes
|
||||
new_disabled = set()
|
||||
for i, name in enumerate(names):
|
||||
if i not in result:
|
||||
for i, name in enumerate(plugin_names):
|
||||
if i not in chosen:
|
||||
new_disabled.add(name)
|
||||
|
||||
if new_disabled != disabled:
|
||||
_save_disabled_set(new_disabled)
|
||||
enabled_count = len(names) - len(new_disabled)
|
||||
enabled_count = len(plugin_names) - len(new_disabled)
|
||||
console.print(
|
||||
f"\n[green]✓[/green] {enabled_count} enabled, {len(new_disabled)} disabled. "
|
||||
f"Takes effect on next session."
|
||||
f"\n[green]\u2713[/green] General plugins: {enabled_count} enabled, "
|
||||
f"{len(new_disabled)} disabled."
|
||||
)
|
||||
else:
|
||||
console.print("\n[dim]No changes.[/dim]")
|
||||
elif n_plugins > 0:
|
||||
console.print("\n[dim]General plugins unchanged.[/dim]")
|
||||
|
||||
if result_holder["providers_changed"]:
|
||||
new_memory = _get_current_memory_provider() or "built-in"
|
||||
new_context = _get_current_context_engine()
|
||||
console.print(
|
||||
f"[green]\u2713[/green] Memory provider: [bold]{new_memory}[/bold] "
|
||||
f"Context engine: [bold]{new_context}[/bold]"
|
||||
)
|
||||
|
||||
if n_plugins > 0 or result_holder["providers_changed"]:
|
||||
console.print("[dim]Changes take effect on next session.[/dim]")
|
||||
console.print()
|
||||
|
||||
|
||||
def _run_composite_fallback(plugin_names, plugin_labels, plugin_selected,
|
||||
disabled, categories, console):
|
||||
"""Text-based fallback for the composite plugins UI."""
|
||||
from hermes_cli.colors import Colors, color
|
||||
|
||||
print(color("\n Plugins", Colors.YELLOW))
|
||||
|
||||
# General plugins
|
||||
if plugin_names:
|
||||
chosen = set(plugin_selected)
|
||||
print(color("\n General Plugins", Colors.YELLOW))
|
||||
print(color(" Toggle by number, Enter to confirm.\n", Colors.DIM))
|
||||
|
||||
while True:
|
||||
for i, label in enumerate(plugin_labels):
|
||||
marker = color("[\u2713]", Colors.GREEN) if i in chosen else "[ ]"
|
||||
print(f" {marker} {i + 1:>2}. {label}")
|
||||
print()
|
||||
try:
|
||||
val = input(color(" Toggle # (or Enter to confirm): ", Colors.DIM)).strip()
|
||||
if not val:
|
||||
break
|
||||
idx = int(val) - 1
|
||||
if 0 <= idx < len(plugin_names):
|
||||
chosen.symmetric_difference_update({idx})
|
||||
except (ValueError, KeyboardInterrupt, EOFError):
|
||||
return
|
||||
print()
|
||||
|
||||
new_disabled = set()
|
||||
for i, name in enumerate(plugin_names):
|
||||
if i not in chosen:
|
||||
new_disabled.add(name)
|
||||
if new_disabled != disabled:
|
||||
_save_disabled_set(new_disabled)
|
||||
|
||||
# Provider categories
|
||||
if categories:
|
||||
print(color("\n Provider Plugins", Colors.YELLOW))
|
||||
for ci, (cat_name, cat_current, cat_fn) in enumerate(categories):
|
||||
print(f" {ci + 1}. {cat_name} [{cat_current}]")
|
||||
print()
|
||||
try:
|
||||
val = input(color(" Configure # (or Enter to skip): ", Colors.DIM)).strip()
|
||||
if val:
|
||||
ci = int(val) - 1
|
||||
if 0 <= ci < len(categories):
|
||||
categories[ci][2]() # call the configure function
|
||||
except (ValueError, KeyboardInterrupt, EOFError):
|
||||
pass
|
||||
|
||||
print()
|
||||
|
||||
|
||||
def plugins_command(args) -> None:
|
||||
|
|
|
|||
|
|
@ -42,6 +42,11 @@ _PROFILE_DIRS = [
|
|||
"plans",
|
||||
"workspace",
|
||||
"cron",
|
||||
# Per-profile HOME for subprocesses: isolates system tool configs (git,
|
||||
# ssh, gh, npm …) so credentials don't bleed between profiles. In Docker
|
||||
# this also ensures tool configs land inside the persistent volume.
|
||||
# See hermes_constants.get_subprocess_home() and issue #4426.
|
||||
"home",
|
||||
]
|
||||
|
||||
# Files copied during --clone (if they exist in the source)
|
||||
|
|
@ -115,16 +120,26 @@ _HERMES_SUBCOMMANDS = frozenset({
|
|||
def _get_profiles_root() -> Path:
|
||||
"""Return the directory where named profiles are stored.
|
||||
|
||||
Always ``~/.hermes/profiles/`` — anchored to the user's home,
|
||||
NOT to the current HERMES_HOME (which may itself be a profile).
|
||||
This ensures ``coder profile list`` can see all profiles.
|
||||
Anchored to the hermes root, NOT to the current HERMES_HOME
|
||||
(which may itself be a profile). This ensures ``coder profile list``
|
||||
can see all profiles.
|
||||
|
||||
In Docker/custom deployments where HERMES_HOME points outside
|
||||
``~/.hermes``, profiles live under ``HERMES_HOME/profiles/`` so
|
||||
they persist on the mounted volume.
|
||||
"""
|
||||
return Path.home() / ".hermes" / "profiles"
|
||||
return _get_default_hermes_home() / "profiles"
|
||||
|
||||
|
||||
def _get_default_hermes_home() -> Path:
|
||||
"""Return the default (pre-profile) HERMES_HOME path."""
|
||||
return Path.home() / ".hermes"
|
||||
"""Return the default (pre-profile) HERMES_HOME path.
|
||||
|
||||
In standard deployments this is ``~/.hermes``.
|
||||
In Docker/custom deployments where HERMES_HOME is outside ``~/.hermes``
|
||||
(e.g. ``/opt/data``), returns HERMES_HOME directly.
|
||||
"""
|
||||
from hermes_constants import get_default_hermes_root
|
||||
return get_default_hermes_root()
|
||||
|
||||
|
||||
def _get_active_profile_path() -> Path:
|
||||
|
|
|
|||
|
|
@ -88,11 +88,11 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
|
|||
base_url_env_var="KIMI_BASE_URL",
|
||||
),
|
||||
"minimax": HermesOverlay(
|
||||
transport="openai_chat",
|
||||
transport="anthropic_messages",
|
||||
base_url_env_var="MINIMAX_BASE_URL",
|
||||
),
|
||||
"minimax-cn": HermesOverlay(
|
||||
transport="openai_chat",
|
||||
transport="anthropic_messages",
|
||||
base_url_env_var="MINIMAX_CN_BASE_URL",
|
||||
),
|
||||
"deepseek": HermesOverlay(
|
||||
|
|
@ -127,6 +127,11 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
|
|||
is_aggregator=True,
|
||||
base_url_env_var="HF_BASE_URL",
|
||||
),
|
||||
"xai": HermesOverlay(
|
||||
transport="openai_chat",
|
||||
base_url_override="https://api.x.ai/v1",
|
||||
base_url_env_var="XAI_BASE_URL",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -148,10 +153,6 @@ class ProviderDef:
|
|||
doc: str = ""
|
||||
source: str = "" # "models.dev", "hermes", "user-config"
|
||||
|
||||
@property
|
||||
def is_user_defined(self) -> bool:
|
||||
return self.source == "user-config"
|
||||
|
||||
|
||||
# -- Aliases ------------------------------------------------------------------
|
||||
# Maps human-friendly / legacy names to canonical provider IDs.
|
||||
|
|
@ -167,6 +168,10 @@ ALIASES: Dict[str, str] = {
|
|||
"z.ai": "zai",
|
||||
"zhipu": "zai",
|
||||
|
||||
# xai
|
||||
"x-ai": "xai",
|
||||
"x.ai": "xai",
|
||||
|
||||
# kimi-for-coding (models.dev ID)
|
||||
"kimi": "kimi-for-coding",
|
||||
"kimi-coding": "kimi-for-coding",
|
||||
|
|
@ -262,12 +267,6 @@ def normalize_provider(name: str) -> str:
|
|||
return ALIASES.get(key, key)
|
||||
|
||||
|
||||
def get_overlay(provider_id: str) -> Optional[HermesOverlay]:
|
||||
"""Get Hermes overlay for a provider, if one exists."""
|
||||
canonical = normalize_provider(provider_id)
|
||||
return HERMES_OVERLAYS.get(canonical)
|
||||
|
||||
|
||||
def get_provider(name: str) -> Optional[ProviderDef]:
|
||||
"""Look up a provider by id or alias, merging all data sources.
|
||||
|
||||
|
|
@ -350,36 +349,6 @@ def get_label(provider_id: str) -> str:
|
|||
return canonical
|
||||
|
||||
|
||||
# For direct import compat, expose as module-level dict
|
||||
# Built on demand by get_label() calls
|
||||
LABELS: Dict[str, str] = {
|
||||
# Static entries for backward compat — get_label() is the proper API
|
||||
"openrouter": "OpenRouter",
|
||||
"nous": "Nous Portal",
|
||||
"openai-codex": "OpenAI Codex",
|
||||
"copilot-acp": "GitHub Copilot ACP",
|
||||
"github-copilot": "GitHub Copilot",
|
||||
"anthropic": "Anthropic",
|
||||
"zai": "Z.AI / GLM",
|
||||
"kimi-for-coding": "Kimi / Moonshot",
|
||||
"minimax": "MiniMax",
|
||||
"minimax-cn": "MiniMax (China)",
|
||||
"deepseek": "DeepSeek",
|
||||
"alibaba": "Alibaba Cloud (DashScope)",
|
||||
"vercel": "Vercel AI Gateway",
|
||||
"opencode": "OpenCode Zen",
|
||||
"opencode-go": "OpenCode Go",
|
||||
"kilo": "Kilo Gateway",
|
||||
"huggingface": "Hugging Face",
|
||||
"local": "Local endpoint",
|
||||
"custom": "Custom endpoint",
|
||||
# Legacy Hermes IDs (point to same providers)
|
||||
"ai-gateway": "Vercel AI Gateway",
|
||||
"kilocode": "Kilo Gateway",
|
||||
"copilot": "GitHub Copilot",
|
||||
"kimi-coding": "Kimi / Moonshot",
|
||||
"opencode-zen": "OpenCode Zen",
|
||||
}
|
||||
|
||||
|
||||
def is_aggregator(provider: str) -> bool:
|
||||
|
|
@ -452,9 +421,64 @@ def resolve_user_provider(name: str, user_config: Dict[str, Any]) -> Optional[Pr
|
|||
)
|
||||
|
||||
|
||||
def custom_provider_slug(display_name: str) -> str:
|
||||
"""Build a canonical slug for a custom_providers entry.
|
||||
|
||||
Matches the convention used by runtime_provider and credential_pool
|
||||
(``custom:<normalized-name>``). Centralised here so all call-sites
|
||||
produce identical slugs.
|
||||
"""
|
||||
return "custom:" + display_name.strip().lower().replace(" ", "-")
|
||||
|
||||
|
||||
def resolve_custom_provider(
|
||||
name: str,
|
||||
custom_providers: Optional[List[Dict[str, Any]]],
|
||||
) -> Optional[ProviderDef]:
|
||||
"""Resolve a provider from the user's config.yaml ``custom_providers`` list."""
|
||||
if not custom_providers or not isinstance(custom_providers, list):
|
||||
return None
|
||||
|
||||
requested = (name or "").strip().lower()
|
||||
if not requested:
|
||||
return None
|
||||
|
||||
for entry in custom_providers:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
|
||||
display_name = (entry.get("name") or "").strip()
|
||||
api_url = (
|
||||
entry.get("base_url", "")
|
||||
or entry.get("url", "")
|
||||
or entry.get("api", "")
|
||||
or ""
|
||||
).strip()
|
||||
if not display_name or not api_url:
|
||||
continue
|
||||
|
||||
slug = custom_provider_slug(display_name)
|
||||
if requested not in {display_name.lower(), slug}:
|
||||
continue
|
||||
|
||||
return ProviderDef(
|
||||
id=slug,
|
||||
name=display_name,
|
||||
transport="openai_chat",
|
||||
api_key_env_vars=(),
|
||||
base_url=api_url,
|
||||
is_aggregator=False,
|
||||
auth_type="api_key",
|
||||
source="user-config",
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def resolve_provider_full(
|
||||
name: str,
|
||||
user_providers: Optional[Dict[str, Any]] = None,
|
||||
custom_providers: Optional[List[Dict[str, Any]]] = None,
|
||||
) -> Optional[ProviderDef]:
|
||||
"""Full resolution chain: built-in → models.dev → user config.
|
||||
|
||||
|
|
@ -463,6 +487,7 @@ def resolve_provider_full(
|
|||
Args:
|
||||
name: Provider name or alias.
|
||||
user_providers: The ``providers:`` dict from config.yaml (optional).
|
||||
custom_providers: The ``custom_providers:`` list from config.yaml (optional).
|
||||
|
||||
Returns:
|
||||
ProviderDef if found, else None.
|
||||
|
|
@ -485,6 +510,11 @@ def resolve_provider_full(
|
|||
if user_pdef is not None:
|
||||
return user_pdef
|
||||
|
||||
# 2b. Saved custom providers from config
|
||||
custom_pdef = resolve_custom_provider(name, custom_providers)
|
||||
if custom_pdef is not None:
|
||||
return custom_pdef
|
||||
|
||||
# 3. Try models.dev directly (for providers not in our ALIASES)
|
||||
try:
|
||||
from agent.models_dev import get_provider_info as _mdev_provider
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ from hermes_cli.auth import (
|
|||
DEFAULT_CODEX_BASE_URL,
|
||||
DEFAULT_QWEN_BASE_URL,
|
||||
PROVIDER_REGISTRY,
|
||||
_agent_key_is_usable,
|
||||
format_auth_error,
|
||||
resolve_provider,
|
||||
resolve_nous_runtime_credentials,
|
||||
|
|
@ -644,6 +645,21 @@ def resolve_runtime_provider(
|
|||
getattr(entry, "runtime_api_key", None)
|
||||
or getattr(entry, "access_token", "")
|
||||
)
|
||||
# For Nous, the pool entry's runtime_api_key is the agent_key — a
|
||||
# short-lived inference credential (~30 min TTL). The pool doesn't
|
||||
# refresh it during selection (that would trigger network calls in
|
||||
# non-runtime contexts like `hermes auth list`). If the key is
|
||||
# expired, clear pool_api_key so we fall through to
|
||||
# resolve_nous_runtime_credentials() which handles refresh + mint.
|
||||
if provider == "nous" and entry is not None and pool_api_key:
|
||||
min_ttl = max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800")))
|
||||
nous_state = {
|
||||
"agent_key": getattr(entry, "agent_key", None),
|
||||
"agent_key_expires_at": getattr(entry, "agent_key_expires_at", None),
|
||||
}
|
||||
if not _agent_key_is_usable(nous_state, min_ttl):
|
||||
logger.debug("Nous pool entry agent_key expired/missing, falling through to runtime resolution")
|
||||
pool_api_key = ""
|
||||
if entry is not None and pool_api_key:
|
||||
return _resolve_runtime_from_pool_entry(
|
||||
provider=provider,
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ import logging
|
|||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import copy
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
|
|
@ -105,8 +106,8 @@ _DEFAULT_PROVIDER_MODELS = {
|
|||
],
|
||||
"zai": ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
|
||||
"kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
|
||||
"minimax": ["MiniMax-M1", "MiniMax-M1-40k", "MiniMax-M1-80k", "MiniMax-M1-128k", "MiniMax-M1-256k", "MiniMax-M2.5", "MiniMax-M2.7"],
|
||||
"minimax-cn": ["MiniMax-M1", "MiniMax-M1-40k", "MiniMax-M1-80k", "MiniMax-M1-128k", "MiniMax-M1-256k", "MiniMax-M2.5", "MiniMax-M2.7"],
|
||||
"minimax": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
|
||||
"minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
|
||||
"ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
|
||||
"kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
|
||||
"opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"],
|
||||
|
|
@ -172,150 +173,10 @@ def _setup_copilot_reasoning_selection(
|
|||
_set_reasoning_effort(config, "none")
|
||||
|
||||
|
||||
def _setup_provider_model_selection(config, provider_id, current_model, prompt_choice, prompt_fn):
|
||||
"""Model selection for API-key providers with live /models detection.
|
||||
|
||||
Tries the provider's /models endpoint first. Falls back to a
|
||||
hardcoded default list with a warning if the endpoint is unreachable.
|
||||
Always offers a 'Custom model' escape hatch.
|
||||
"""
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY, resolve_api_key_provider_credentials
|
||||
from hermes_cli.config import get_env_value
|
||||
from hermes_cli.models import (
|
||||
copilot_model_api_mode,
|
||||
fetch_api_models,
|
||||
fetch_github_model_catalog,
|
||||
normalize_copilot_model_id,
|
||||
normalize_opencode_model_id,
|
||||
opencode_model_api_mode,
|
||||
)
|
||||
|
||||
pconfig = PROVIDER_REGISTRY[provider_id]
|
||||
is_copilot_catalog_provider = provider_id in {"copilot", "copilot-acp"}
|
||||
|
||||
# Resolve API key and base URL for the probe
|
||||
if is_copilot_catalog_provider:
|
||||
api_key = ""
|
||||
if provider_id == "copilot":
|
||||
creds = resolve_api_key_provider_credentials(provider_id)
|
||||
api_key = creds.get("api_key", "")
|
||||
base_url = creds.get("base_url", "") or pconfig.inference_base_url
|
||||
else:
|
||||
try:
|
||||
creds = resolve_api_key_provider_credentials("copilot")
|
||||
api_key = creds.get("api_key", "")
|
||||
except Exception:
|
||||
pass
|
||||
base_url = pconfig.inference_base_url
|
||||
catalog = fetch_github_model_catalog(api_key)
|
||||
current_model = normalize_copilot_model_id(
|
||||
current_model,
|
||||
catalog=catalog,
|
||||
api_key=api_key,
|
||||
) or current_model
|
||||
else:
|
||||
api_key = ""
|
||||
for ev in pconfig.api_key_env_vars:
|
||||
api_key = get_env_value(ev) or os.getenv(ev, "")
|
||||
if api_key:
|
||||
break
|
||||
base_url_env = pconfig.base_url_env_var or ""
|
||||
base_url = (get_env_value(base_url_env) if base_url_env else "") or pconfig.inference_base_url
|
||||
catalog = None
|
||||
|
||||
# Try live /models endpoint
|
||||
if is_copilot_catalog_provider and catalog:
|
||||
live_models = [item.get("id", "") for item in catalog if item.get("id")]
|
||||
else:
|
||||
live_models = fetch_api_models(api_key, base_url)
|
||||
|
||||
if live_models:
|
||||
provider_models = live_models
|
||||
print_info(f"Found {len(live_models)} model(s) from {pconfig.name} API")
|
||||
else:
|
||||
fallback_provider_id = "copilot" if provider_id == "copilot-acp" else provider_id
|
||||
provider_models = _DEFAULT_PROVIDER_MODELS.get(fallback_provider_id, [])
|
||||
if provider_models:
|
||||
print_warning(
|
||||
f"Could not auto-detect models from {pconfig.name} API — showing defaults.\n"
|
||||
f" Use \"Custom model\" if the model you expect isn't listed."
|
||||
)
|
||||
|
||||
if provider_id in {"opencode-zen", "opencode-go"}:
|
||||
provider_models = [normalize_opencode_model_id(provider_id, mid) for mid in provider_models]
|
||||
current_model = normalize_opencode_model_id(provider_id, current_model)
|
||||
provider_models = list(dict.fromkeys(mid for mid in provider_models if mid))
|
||||
|
||||
model_choices = list(provider_models)
|
||||
model_choices.append("Custom model")
|
||||
model_choices.append(f"Keep current ({current_model})")
|
||||
|
||||
keep_idx = len(model_choices) - 1
|
||||
model_idx = prompt_choice("Select default model:", model_choices, keep_idx)
|
||||
|
||||
selected_model = current_model
|
||||
|
||||
if model_idx < len(provider_models):
|
||||
selected_model = provider_models[model_idx]
|
||||
if is_copilot_catalog_provider:
|
||||
selected_model = normalize_copilot_model_id(
|
||||
selected_model,
|
||||
catalog=catalog,
|
||||
api_key=api_key,
|
||||
) or selected_model
|
||||
elif provider_id in {"opencode-zen", "opencode-go"}:
|
||||
selected_model = normalize_opencode_model_id(provider_id, selected_model)
|
||||
_set_default_model(config, selected_model)
|
||||
elif model_idx == len(provider_models):
|
||||
custom = prompt_fn("Enter model name")
|
||||
if custom:
|
||||
if is_copilot_catalog_provider:
|
||||
selected_model = normalize_copilot_model_id(
|
||||
custom,
|
||||
catalog=catalog,
|
||||
api_key=api_key,
|
||||
) or custom
|
||||
elif provider_id in {"opencode-zen", "opencode-go"}:
|
||||
selected_model = normalize_opencode_model_id(provider_id, custom)
|
||||
else:
|
||||
selected_model = custom
|
||||
_set_default_model(config, selected_model)
|
||||
else:
|
||||
# "Keep current" selected — validate it's compatible with the new
|
||||
# provider. OpenRouter-formatted names (containing "/") won't work
|
||||
# on direct-API providers and would silently break the gateway.
|
||||
if "/" in (current_model or "") and provider_models:
|
||||
print_warning(
|
||||
f"Current model \"{current_model}\" looks like an OpenRouter model "
|
||||
f"and won't work with {pconfig.name}. "
|
||||
f"Switching to {provider_models[0]}."
|
||||
)
|
||||
selected_model = provider_models[0]
|
||||
_set_default_model(config, provider_models[0])
|
||||
|
||||
if provider_id == "copilot" and selected_model:
|
||||
model_cfg = _model_config_dict(config)
|
||||
model_cfg["api_mode"] = copilot_model_api_mode(
|
||||
selected_model,
|
||||
catalog=catalog,
|
||||
api_key=api_key,
|
||||
)
|
||||
config["model"] = model_cfg
|
||||
_setup_copilot_reasoning_selection(
|
||||
config,
|
||||
selected_model,
|
||||
prompt_choice,
|
||||
catalog=catalog,
|
||||
api_key=api_key,
|
||||
)
|
||||
elif provider_id in {"opencode-zen", "opencode-go"} and selected_model:
|
||||
model_cfg = _model_config_dict(config)
|
||||
model_cfg["api_mode"] = opencode_model_api_mode(provider_id, selected_model)
|
||||
config["model"] = model_cfg
|
||||
|
||||
|
||||
# Import config helpers
|
||||
from hermes_cli.config import (
|
||||
DEFAULT_CONFIG,
|
||||
get_hermes_home,
|
||||
get_config_path,
|
||||
get_env_path,
|
||||
|
|
@ -477,6 +338,8 @@ def _curses_prompt_choice(question: str, choices: list, default: int = 0) -> int
|
|||
return
|
||||
|
||||
curses.wrapper(_curses_menu)
|
||||
from hermes_cli.curses_ui import flush_stdin
|
||||
flush_stdin()
|
||||
return result_holder[0]
|
||||
except Exception:
|
||||
return -1
|
||||
|
|
@ -694,6 +557,8 @@ def _print_setup_summary(config: dict, hermes_home):
|
|||
tool_status.append(("Text-to-Speech (OpenAI)", True, None))
|
||||
elif tts_provider == "minimax" and get_env_value("MINIMAX_API_KEY"):
|
||||
tool_status.append(("Text-to-Speech (MiniMax)", True, None))
|
||||
elif tts_provider == "mistral" and get_env_value("MISTRAL_API_KEY"):
|
||||
tool_status.append(("Text-to-Speech (Mistral Voxtral)", True, None))
|
||||
elif tts_provider == "neutts":
|
||||
try:
|
||||
import importlib.util
|
||||
|
|
@ -921,8 +786,10 @@ def setup_model_provider(config: dict, *, quick: bool = False):
|
|||
# changes with stale values (#4172).
|
||||
_refreshed = load_config()
|
||||
config["model"] = _refreshed.get("model", config.get("model"))
|
||||
if _refreshed.get("custom_providers"):
|
||||
if "custom_providers" in _refreshed:
|
||||
config["custom_providers"] = _refreshed["custom_providers"]
|
||||
else:
|
||||
config.pop("custom_providers", None)
|
||||
|
||||
# Derive the selected provider for downstream steps (vision setup).
|
||||
selected_provider = None
|
||||
|
|
@ -1006,8 +873,6 @@ def setup_model_provider(config: dict, *, quick: bool = False):
|
|||
strategy_value = ["fill_first", "round_robin", "random"][strategy_idx]
|
||||
_set_credential_pool_strategy(config, selected_provider, strategy_value)
|
||||
print_success(f"Saved {selected_provider} rotation strategy: {strategy_value}")
|
||||
else:
|
||||
_set_credential_pool_strategy(config, selected_provider, "fill_first")
|
||||
except Exception as exc:
|
||||
logger.debug("Could not configure same-provider fallback in setup: %s", exc)
|
||||
|
||||
|
|
@ -1181,6 +1046,7 @@ def _setup_tts_provider(config: dict):
|
|||
"elevenlabs": "ElevenLabs",
|
||||
"openai": "OpenAI TTS",
|
||||
"minimax": "MiniMax TTS",
|
||||
"mistral": "Mistral Voxtral TTS",
|
||||
"neutts": "NeuTTS",
|
||||
}
|
||||
current_label = provider_labels.get(current_provider, current_provider)
|
||||
|
|
@ -1201,10 +1067,11 @@ def _setup_tts_provider(config: dict):
|
|||
"ElevenLabs (premium quality, needs API key)",
|
||||
"OpenAI TTS (good quality, needs API key)",
|
||||
"MiniMax TTS (high quality with voice cloning, needs API key)",
|
||||
"Mistral Voxtral TTS (multilingual, native Opus, needs API key)",
|
||||
"NeuTTS (local on-device, free, ~300MB model download)",
|
||||
]
|
||||
)
|
||||
providers.extend(["edge", "elevenlabs", "openai", "minimax", "neutts"])
|
||||
providers.extend(["edge", "elevenlabs", "openai", "minimax", "mistral", "neutts"])
|
||||
choices.append(f"Keep current ({current_label})")
|
||||
keep_current_idx = len(choices) - 1
|
||||
idx = prompt_choice("Select TTS provider:", choices, keep_current_idx)
|
||||
|
|
@ -1282,6 +1149,18 @@ def _setup_tts_provider(config: dict):
|
|||
print_warning("No API key provided. Falling back to Edge TTS.")
|
||||
selected = "edge"
|
||||
|
||||
elif selected == "mistral":
|
||||
existing = get_env_value("MISTRAL_API_KEY")
|
||||
if not existing:
|
||||
print()
|
||||
api_key = prompt("Mistral API key for TTS", password=True)
|
||||
if api_key:
|
||||
save_env_value("MISTRAL_API_KEY", api_key)
|
||||
print_success("Mistral TTS API key saved")
|
||||
else:
|
||||
print_warning("No API key provided. Falling back to Edge TTS.")
|
||||
selected = "edge"
|
||||
|
||||
# Save the selection
|
||||
if "tts" not in config:
|
||||
config["tts"] = {}
|
||||
|
|
@ -2062,9 +1941,9 @@ def _setup_matrix():
|
|||
save_env_value("MATRIX_ENCRYPTION", "true")
|
||||
print_success("E2EE enabled")
|
||||
|
||||
matrix_pkg = "matrix-nio[e2e]" if want_e2ee else "matrix-nio"
|
||||
matrix_pkg = "mautrix[encryption]" if want_e2ee else "mautrix"
|
||||
try:
|
||||
__import__("nio")
|
||||
__import__("mautrix")
|
||||
except ImportError:
|
||||
print_info(f"Installing {matrix_pkg}...")
|
||||
import subprocess
|
||||
|
|
@ -2167,6 +2046,12 @@ def _setup_whatsapp():
|
|||
print_info("or personal self-chat) and pair via QR code.")
|
||||
|
||||
|
||||
def _setup_weixin():
|
||||
"""Configure Weixin (personal WeChat) via iLink Bot API QR login."""
|
||||
from hermes_cli.gateway import _setup_weixin as _gateway_setup_weixin
|
||||
_gateway_setup_weixin()
|
||||
|
||||
|
||||
def _setup_bluebubbles():
|
||||
"""Configure BlueBubbles iMessage gateway."""
|
||||
print_header("BlueBubbles (iMessage)")
|
||||
|
|
@ -2286,6 +2171,7 @@ _GATEWAY_PLATFORMS = [
|
|||
("Matrix", "MATRIX_ACCESS_TOKEN", _setup_matrix),
|
||||
("Mattermost", "MATTERMOST_TOKEN", _setup_mattermost),
|
||||
("WhatsApp", "WHATSAPP_ENABLED", _setup_whatsapp),
|
||||
("Weixin (WeChat)", "WEIXIN_ACCOUNT_ID", _setup_weixin),
|
||||
("BlueBubbles (iMessage)", "BLUEBUBBLES_SERVER_URL", _setup_bluebubbles),
|
||||
("Webhooks (GitHub, GitLab, etc.)", "WEBHOOK_ENABLED", _setup_webhooks),
|
||||
]
|
||||
|
|
@ -2844,6 +2730,7 @@ def run_setup_wizard(args):
|
|||
Supports full, quick, and section-specific setup:
|
||||
hermes setup — full or quick (auto-detected)
|
||||
hermes setup model — just model/provider
|
||||
hermes setup tts — just text-to-speech
|
||||
hermes setup terminal — just terminal backend
|
||||
hermes setup gateway — just messaging platforms
|
||||
hermes setup tools — just tool configuration
|
||||
|
|
@ -2855,6 +2742,11 @@ def run_setup_wizard(args):
|
|||
return
|
||||
ensure_hermes_home()
|
||||
|
||||
reset_requested = bool(getattr(args, "reset", False))
|
||||
if reset_requested:
|
||||
save_config(copy.deepcopy(DEFAULT_CONFIG))
|
||||
print_success("Configuration reset to defaults.")
|
||||
|
||||
config = load_config()
|
||||
hermes_home = get_hermes_home()
|
||||
|
||||
|
|
@ -2955,18 +2847,13 @@ def run_setup_wizard(args):
|
|||
menu_choices = [
|
||||
"Quick Setup - configure missing items only",
|
||||
"Full Setup - reconfigure everything",
|
||||
"---",
|
||||
"Model & Provider",
|
||||
"Terminal Backend",
|
||||
"Messaging Platforms (Gateway)",
|
||||
"Tools",
|
||||
"Agent Settings",
|
||||
"---",
|
||||
"Exit",
|
||||
]
|
||||
|
||||
# Separator indices (not selectable, but prompt_choice doesn't filter them,
|
||||
# so we handle them below)
|
||||
choice = prompt_choice("What would you like to do?", menu_choices, 0)
|
||||
|
||||
if choice == 0:
|
||||
|
|
@ -2976,18 +2863,14 @@ def run_setup_wizard(args):
|
|||
elif choice == 1:
|
||||
# Full setup — fall through to run all sections
|
||||
pass
|
||||
elif choice in (2, 8):
|
||||
# Separator — treat as exit
|
||||
elif choice == 7:
|
||||
print_info("Exiting. Run 'hermes setup' again when ready.")
|
||||
return
|
||||
elif choice == 9:
|
||||
print_info("Exiting. Run 'hermes setup' again when ready.")
|
||||
return
|
||||
elif 3 <= choice <= 7:
|
||||
elif 2 <= choice <= 6:
|
||||
# Individual section — map by key, not by position.
|
||||
# SETUP_SECTIONS includes TTS but the returning-user menu skips it,
|
||||
# so positional indexing (choice - 3) would dispatch the wrong section.
|
||||
section_key = RETURNING_USER_MENU_SECTION_KEYS[choice - 3]
|
||||
# so positional indexing (choice - 2) would dispatch the wrong section.
|
||||
section_key = RETURNING_USER_MENU_SECTION_KEYS[choice - 2]
|
||||
section = next((s for s in SETUP_SECTIONS if s[0] == section_key), None)
|
||||
if section:
|
||||
_, label, func = section
|
||||
|
|
@ -3055,19 +2938,33 @@ def run_setup_wizard(args):
|
|||
_offer_launch_chat()
|
||||
|
||||
|
||||
def _resolve_hermes_chat_argv() -> Optional[list[str]]:
|
||||
"""Resolve argv for launching ``hermes chat`` in a fresh process."""
|
||||
hermes_bin = shutil.which("hermes")
|
||||
if hermes_bin:
|
||||
return [hermes_bin, "chat"]
|
||||
|
||||
try:
|
||||
if importlib.util.find_spec("hermes_cli") is not None:
|
||||
return [sys.executable, "-m", "hermes_cli.main", "chat"]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _offer_launch_chat():
|
||||
"""Prompt the user to jump straight into chat after setup."""
|
||||
print()
|
||||
if prompt_yes_no("Launch hermes chat now?", True):
|
||||
from hermes_cli.main import cmd_chat
|
||||
from types import SimpleNamespace
|
||||
cmd_chat(SimpleNamespace(
|
||||
query=None, resume=None, continue_last=None, model=None,
|
||||
provider=None, effort=None, skin=None, oneshot=False,
|
||||
quiet=False, verbose=False, toolsets=None, skills=None,
|
||||
yolo=False, source=None, worktree=False, checkpoints=False,
|
||||
pass_session_id=False, max_turns=None,
|
||||
))
|
||||
if not prompt_yes_no("Launch hermes chat now?", True):
|
||||
return
|
||||
|
||||
chat_argv = _resolve_hermes_chat_argv()
|
||||
if not chat_argv:
|
||||
print_info("Could not relaunch Hermes automatically. Run 'hermes chat' manually.")
|
||||
return
|
||||
|
||||
os.execvp(chat_argv[0], chat_argv)
|
||||
|
||||
|
||||
def _run_first_time_quick_setup(config: dict, hermes_home, is_existing: bool):
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ PLATFORMS = {
|
|||
"dingtalk": "💬 DingTalk",
|
||||
"feishu": "🪽 Feishu",
|
||||
"wecom": "💬 WeCom",
|
||||
"weixin": "💬 Weixin",
|
||||
"webhook": "🔗 Webhook",
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -151,7 +151,8 @@ def do_search(query: str, source: str = "all", limit: int = 10,
|
|||
|
||||
auth = GitHubAuth()
|
||||
sources = create_source_router(auth)
|
||||
results = unified_search(query, sources, source_filter=source, limit=limit)
|
||||
with c.status("[bold]Searching registries..."):
|
||||
results = unified_search(query, sources, source_filter=source, limit=limit)
|
||||
|
||||
if not results:
|
||||
c.print("[dim]No skills found matching your query.[/]\n")
|
||||
|
|
@ -187,7 +188,7 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all",
|
|||
Official skills are always shown first, regardless of source filter.
|
||||
"""
|
||||
from tools.skills_hub import (
|
||||
GitHubAuth, create_source_router,
|
||||
GitHubAuth, create_source_router, parallel_search_sources,
|
||||
)
|
||||
|
||||
# Clamp page_size to safe range
|
||||
|
|
@ -198,27 +199,23 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all",
|
|||
auth = GitHubAuth()
|
||||
sources = create_source_router(auth)
|
||||
|
||||
# Collect results from all (or filtered) sources
|
||||
# Use empty query to get everything; per-source limits prevent overload
|
||||
# Collect results from all (or filtered) sources in parallel.
|
||||
# Per-source limits are generous — parallelism + 30s timeout cap prevents hangs.
|
||||
_TRUST_RANK = {"builtin": 3, "trusted": 2, "community": 1}
|
||||
_PER_SOURCE_LIMIT = {"official": 100, "skills-sh": 100, "well-known": 25, "github": 100, "clawhub": 50,
|
||||
"claude-marketplace": 50, "lobehub": 50}
|
||||
_PER_SOURCE_LIMIT = {
|
||||
"official": 200, "skills-sh": 200, "well-known": 50,
|
||||
"github": 200, "clawhub": 500, "claude-marketplace": 100,
|
||||
"lobehub": 500,
|
||||
}
|
||||
|
||||
all_results: list = []
|
||||
source_counts: dict = {}
|
||||
|
||||
for src in sources:
|
||||
sid = src.source_id()
|
||||
if source != "all" and sid != source and sid != "official":
|
||||
# Always include official source for the "first" placement
|
||||
continue
|
||||
try:
|
||||
limit = _PER_SOURCE_LIMIT.get(sid, 50)
|
||||
results = src.search("", limit=limit)
|
||||
source_counts[sid] = len(results)
|
||||
all_results.extend(results)
|
||||
except Exception:
|
||||
continue
|
||||
with c.status("[bold]Fetching skills from registries..."):
|
||||
all_results, source_counts, timed_out = parallel_search_sources(
|
||||
sources,
|
||||
query="",
|
||||
per_source_limits=_PER_SOURCE_LIMIT,
|
||||
source_filter=source,
|
||||
overall_timeout=30,
|
||||
)
|
||||
|
||||
if not all_results:
|
||||
c.print("[dim]No skills found in the Skills Hub.[/]\n")
|
||||
|
|
@ -252,8 +249,11 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all",
|
|||
|
||||
# Build header
|
||||
source_label = f"— {source}" if source != "all" else "— all sources"
|
||||
loaded_label = f"{total} skills loaded"
|
||||
if timed_out:
|
||||
loaded_label += f", {len(timed_out)} source(s) still loading"
|
||||
c.print(f"\n[bold]Skills Hub — Browse {source_label}[/]"
|
||||
f" [dim]({total} skills, page {page}/{total_pages})[/]")
|
||||
f" [dim]({loaded_label}, page {page}/{total_pages})[/]")
|
||||
if official_count > 0 and page == 1:
|
||||
c.print(f"[bright_cyan]★ {official_count} official optional skill(s) from Nous Research[/]")
|
||||
c.print()
|
||||
|
|
@ -300,8 +300,11 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all",
|
|||
parts = [f"{sid}: {ct}" for sid, ct in sorted(source_counts.items())]
|
||||
c.print(f" [dim]Sources: {', '.join(parts)}[/]")
|
||||
|
||||
c.print("[dim]Use: hermes skills inspect <identifier> to preview, "
|
||||
"hermes skills install <identifier> to install[/]\n")
|
||||
if timed_out:
|
||||
c.print(f" [yellow]⚡ Slow sources skipped: {', '.join(timed_out)} "
|
||||
f"— run again for cached results[/]")
|
||||
|
||||
c.print("[dim]Tip: 'hermes skills search <query>' searches deeper across all registries[/]\n")
|
||||
|
||||
|
||||
def do_install(identifier: str, category: str = "", force: bool = False,
|
||||
|
|
|
|||
|
|
@ -305,6 +305,7 @@ def show_status(args):
|
|||
"DingTalk": ("DINGTALK_CLIENT_ID", None),
|
||||
"Feishu": ("FEISHU_APP_ID", "FEISHU_HOME_CHANNEL"),
|
||||
"WeCom": ("WECOM_BOT_ID", "WECOM_HOME_CHANNEL"),
|
||||
"Weixin": ("WEIXIN_ACCOUNT_ID", "WEIXIN_HOME_CHANNEL"),
|
||||
"BlueBubbles": ("BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_HOME_CHANNEL"),
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -133,6 +133,7 @@ PLATFORMS = {
|
|||
"dingtalk": {"label": "💬 DingTalk", "default_toolset": "hermes-dingtalk"},
|
||||
"feishu": {"label": "🪽 Feishu", "default_toolset": "hermes-feishu"},
|
||||
"wecom": {"label": "💬 WeCom", "default_toolset": "hermes-wecom"},
|
||||
"weixin": {"label": "💬 Weixin", "default_toolset": "hermes-weixin"},
|
||||
"api_server": {"label": "🌐 API Server", "default_toolset": "hermes-api-server"},
|
||||
"mattermost": {"label": "💬 Mattermost", "default_toolset": "hermes-mattermost"},
|
||||
"webhook": {"label": "🔗 Webhook", "default_toolset": "hermes-webhook"},
|
||||
|
|
@ -180,6 +181,14 @@ TOOL_CATEGORIES = {
|
|||
],
|
||||
"tts_provider": "elevenlabs",
|
||||
},
|
||||
{
|
||||
"name": "Mistral (Voxtral TTS)",
|
||||
"tag": "Multilingual, native Opus, needs MISTRAL_API_KEY",
|
||||
"env_vars": [
|
||||
{"key": "MISTRAL_API_KEY", "prompt": "Mistral API key", "url": "https://console.mistral.ai/"},
|
||||
],
|
||||
"tts_provider": "mistral",
|
||||
},
|
||||
],
|
||||
},
|
||||
"web": {
|
||||
|
|
@ -500,6 +509,10 @@ def _get_platform_tools(
|
|||
default_ts = PLATFORMS[platform]["default_toolset"]
|
||||
toolset_names = [default_ts]
|
||||
|
||||
# YAML may parse bare numeric names (e.g. ``12306:``) as int.
|
||||
# Normalise to str so downstream sorted() never mixes types.
|
||||
toolset_names = [str(ts) for ts in toolset_names]
|
||||
|
||||
configurable_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
|
||||
|
||||
# If the saved list contains any configurable keys directly, the user
|
||||
|
|
@ -558,7 +571,7 @@ def _get_platform_tools(
|
|||
# Special sentinel: "no_mcp" in the toolset list disables all MCP servers.
|
||||
mcp_servers = config.get("mcp_servers") or {}
|
||||
enabled_mcp_servers = {
|
||||
name
|
||||
str(name)
|
||||
for name, server_cfg in mcp_servers.items()
|
||||
if isinstance(server_cfg, dict)
|
||||
and _parse_enabled_flag(server_cfg.get("enabled", True), default=True)
|
||||
|
|
@ -720,6 +733,8 @@ def _prompt_choice(question: str, choices: list, default: int = 0) -> int:
|
|||
return
|
||||
|
||||
curses.wrapper(_curses_menu)
|
||||
from hermes_cli.curses_ui import flush_stdin
|
||||
flush_stdin()
|
||||
return result_holder[0]
|
||||
|
||||
except Exception:
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@ Provides options for:
|
|||
- Keep data: Remove code but keep ~/.hermes/ (configs, sessions, logs)
|
||||
"""
|
||||
|
||||
import os
|
||||
import platform
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
|
|
|||
|
|
@ -17,6 +17,45 @@ def get_hermes_home() -> Path:
|
|||
return Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
|
||||
|
||||
|
||||
def get_default_hermes_root() -> Path:
|
||||
"""Return the root Hermes directory for profile-level operations.
|
||||
|
||||
In standard deployments this is ``~/.hermes``.
|
||||
|
||||
In Docker or custom deployments where ``HERMES_HOME`` points outside
|
||||
``~/.hermes`` (e.g. ``/opt/data``), returns ``HERMES_HOME`` directly
|
||||
— that IS the root.
|
||||
|
||||
In profile mode where ``HERMES_HOME`` is ``<root>/profiles/<name>``,
|
||||
returns ``<root>`` so that ``profile list`` can see all profiles.
|
||||
Works both for standard (``~/.hermes/profiles/coder``) and Docker
|
||||
(``/opt/data/profiles/coder``) layouts.
|
||||
|
||||
Import-safe — no dependencies beyond stdlib.
|
||||
"""
|
||||
native_home = Path.home() / ".hermes"
|
||||
env_home = os.environ.get("HERMES_HOME", "")
|
||||
if not env_home:
|
||||
return native_home
|
||||
env_path = Path(env_home)
|
||||
try:
|
||||
env_path.resolve().relative_to(native_home.resolve())
|
||||
# HERMES_HOME is under ~/.hermes (normal or profile mode)
|
||||
return native_home
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Docker / custom deployment.
|
||||
# Check if this is a profile path: <root>/profiles/<name>
|
||||
# If the immediate parent dir is named "profiles", the root is
|
||||
# the grandparent — this covers Docker profiles correctly.
|
||||
if env_path.parent.name == "profiles":
|
||||
return env_path.parent.parent
|
||||
|
||||
# Not a profile path — HERMES_HOME itself is the root
|
||||
return env_path
|
||||
|
||||
|
||||
def get_optional_skills_dir(default: Path | None = None) -> Path:
|
||||
"""Return the optional-skills directory, honoring package-manager wrappers.
|
||||
|
||||
|
|
@ -72,6 +111,32 @@ def display_hermes_home() -> str:
|
|||
return str(home)
|
||||
|
||||
|
||||
def get_subprocess_home() -> str | None:
|
||||
"""Return a per-profile HOME directory for subprocesses, or None.
|
||||
|
||||
When ``{HERMES_HOME}/home/`` exists on disk, subprocesses should use it
|
||||
as ``HOME`` so system tools (git, ssh, gh, npm …) write their configs
|
||||
inside the Hermes data directory instead of the OS-level ``/root`` or
|
||||
``~/``. This provides:
|
||||
|
||||
* **Docker persistence** — tool configs land inside the persistent volume.
|
||||
* **Profile isolation** — each profile gets its own git identity, SSH
|
||||
keys, gh tokens, etc.
|
||||
|
||||
The Python process's own ``os.environ["HOME"]`` and ``Path.home()`` are
|
||||
**never** modified — only subprocess environments should inject this value.
|
||||
Activation is directory-based: if the ``home/`` subdirectory doesn't
|
||||
exist, returns ``None`` and behavior is unchanged.
|
||||
"""
|
||||
hermes_home = os.getenv("HERMES_HOME")
|
||||
if not hermes_home:
|
||||
return None
|
||||
profile_home = os.path.join(hermes_home, "home")
|
||||
if os.path.isdir(profile_home):
|
||||
return profile_home
|
||||
return None
|
||||
|
||||
|
||||
VALID_REASONING_EFFORTS = ("minimal", "low", "medium", "high", "xhigh")
|
||||
|
||||
|
||||
|
|
@ -103,13 +168,30 @@ def is_termux() -> bool:
|
|||
return bool(os.getenv("TERMUX_VERSION") or "com.termux/files/usr" in prefix)
|
||||
|
||||
|
||||
_wsl_detected: bool | None = None
|
||||
|
||||
|
||||
def is_wsl() -> bool:
|
||||
"""Return True when running inside WSL (Windows Subsystem for Linux).
|
||||
|
||||
Checks ``/proc/version`` for the ``microsoft`` marker that both WSL1
|
||||
and WSL2 inject. Result is cached for the process lifetime.
|
||||
Import-safe — no heavy deps.
|
||||
"""
|
||||
global _wsl_detected
|
||||
if _wsl_detected is not None:
|
||||
return _wsl_detected
|
||||
try:
|
||||
with open("/proc/version", "r") as f:
|
||||
_wsl_detected = "microsoft" in f.read().lower()
|
||||
except Exception:
|
||||
_wsl_detected = False
|
||||
return _wsl_detected
|
||||
|
||||
|
||||
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
|
||||
OPENROUTER_MODELS_URL = f"{OPENROUTER_BASE_URL}/models"
|
||||
OPENROUTER_CHAT_URL = f"{OPENROUTER_BASE_URL}/chat/completions"
|
||||
|
||||
AI_GATEWAY_BASE_URL = "https://ai-gateway.vercel.sh/v1"
|
||||
AI_GATEWAY_MODELS_URL = f"{AI_GATEWAY_BASE_URL}/models"
|
||||
AI_GATEWAY_CHAT_URL = f"{AI_GATEWAY_BASE_URL}/chat/completions"
|
||||
|
||||
NOUS_API_BASE_URL = "https://inference-api.nousresearch.com/v1"
|
||||
NOUS_API_CHAT_URL = f"{NOUS_API_BASE_URL}/chat/completions"
|
||||
|
|
|
|||
|
|
@ -520,72 +520,6 @@ class SessionDB:
|
|||
)
|
||||
self._execute_write(_do)
|
||||
|
||||
def set_token_counts(
|
||||
self,
|
||||
session_id: str,
|
||||
input_tokens: int = 0,
|
||||
output_tokens: int = 0,
|
||||
model: str = None,
|
||||
cache_read_tokens: int = 0,
|
||||
cache_write_tokens: int = 0,
|
||||
reasoning_tokens: int = 0,
|
||||
estimated_cost_usd: Optional[float] = None,
|
||||
actual_cost_usd: Optional[float] = None,
|
||||
cost_status: Optional[str] = None,
|
||||
cost_source: Optional[str] = None,
|
||||
pricing_version: Optional[str] = None,
|
||||
billing_provider: Optional[str] = None,
|
||||
billing_base_url: Optional[str] = None,
|
||||
billing_mode: Optional[str] = None,
|
||||
) -> None:
|
||||
"""Set token counters to absolute values (not increment).
|
||||
|
||||
Use this when the caller provides cumulative totals from a completed
|
||||
conversation run (e.g. the gateway, where the cached agent's
|
||||
session_prompt_tokens already reflects the running total).
|
||||
"""
|
||||
def _do(conn):
|
||||
conn.execute(
|
||||
"""UPDATE sessions SET
|
||||
input_tokens = ?,
|
||||
output_tokens = ?,
|
||||
cache_read_tokens = ?,
|
||||
cache_write_tokens = ?,
|
||||
reasoning_tokens = ?,
|
||||
estimated_cost_usd = ?,
|
||||
actual_cost_usd = CASE
|
||||
WHEN ? IS NULL THEN actual_cost_usd
|
||||
ELSE ?
|
||||
END,
|
||||
cost_status = COALESCE(?, cost_status),
|
||||
cost_source = COALESCE(?, cost_source),
|
||||
pricing_version = COALESCE(?, pricing_version),
|
||||
billing_provider = COALESCE(billing_provider, ?),
|
||||
billing_base_url = COALESCE(billing_base_url, ?),
|
||||
billing_mode = COALESCE(billing_mode, ?),
|
||||
model = COALESCE(model, ?)
|
||||
WHERE id = ?""",
|
||||
(
|
||||
input_tokens,
|
||||
output_tokens,
|
||||
cache_read_tokens,
|
||||
cache_write_tokens,
|
||||
reasoning_tokens,
|
||||
estimated_cost_usd,
|
||||
actual_cost_usd,
|
||||
actual_cost_usd,
|
||||
cost_status,
|
||||
cost_source,
|
||||
pricing_version,
|
||||
billing_provider,
|
||||
billing_base_url,
|
||||
billing_mode,
|
||||
model,
|
||||
session_id,
|
||||
),
|
||||
)
|
||||
self._execute_write(_do)
|
||||
|
||||
def get_session(self, session_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get a session by ID."""
|
||||
with self._lock:
|
||||
|
|
|
|||
|
|
@ -89,13 +89,6 @@ def get_timezone() -> Optional[ZoneInfo]:
|
|||
return _cached_tz
|
||||
|
||||
|
||||
def get_timezone_name() -> str:
|
||||
"""Return the IANA name of the configured timezone, or empty string."""
|
||||
if not _cache_resolved:
|
||||
get_timezone() # populates cache
|
||||
return _cached_tz_name or ""
|
||||
|
||||
|
||||
def now() -> datetime:
|
||||
"""
|
||||
Return the current time as a timezone-aware datetime.
|
||||
|
|
@ -110,9 +103,3 @@ def now() -> datetime:
|
|||
return datetime.now().astimezone()
|
||||
|
||||
|
||||
def reset_cache() -> None:
|
||||
"""Clear the cached timezone. Used by tests and after config changes."""
|
||||
global _cached_tz, _cached_tz_name, _cache_resolved
|
||||
_cached_tz = None
|
||||
_cached_tz_name = None
|
||||
_cache_resolved = False
|
||||
|
|
|
|||
219
plugins/context_engine/__init__.py
Normal file
219
plugins/context_engine/__init__.py
Normal file
|
|
@ -0,0 +1,219 @@
|
|||
"""Context engine plugin discovery.
|
||||
|
||||
Scans ``plugins/context_engine/<name>/`` directories for context engine
|
||||
plugins. Each subdirectory must contain ``__init__.py`` with a class
|
||||
implementing the ContextEngine ABC.
|
||||
|
||||
Context engines are separate from the general plugin system — they live
|
||||
in the repo and are always available without user installation. Only ONE
|
||||
can be active at a time, selected via ``context.engine`` in config.yaml.
|
||||
The default engine is ``"compressor"`` (the built-in ContextCompressor).
|
||||
|
||||
Usage:
|
||||
from plugins.context_engine import discover_context_engines, load_context_engine
|
||||
|
||||
available = discover_context_engines() # [(name, desc, available), ...]
|
||||
engine = load_context_engine("lcm") # ContextEngine instance
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
import importlib.util
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_CONTEXT_ENGINE_PLUGINS_DIR = Path(__file__).parent
|
||||
|
||||
|
||||
def discover_context_engines() -> List[Tuple[str, str, bool]]:
|
||||
"""Scan plugins/context_engine/ for available engines.
|
||||
|
||||
Returns list of (name, description, is_available) tuples.
|
||||
Does NOT import the engines — just reads plugin.yaml for metadata
|
||||
and does a lightweight availability check.
|
||||
"""
|
||||
results = []
|
||||
if not _CONTEXT_ENGINE_PLUGINS_DIR.is_dir():
|
||||
return results
|
||||
|
||||
for child in sorted(_CONTEXT_ENGINE_PLUGINS_DIR.iterdir()):
|
||||
if not child.is_dir() or child.name.startswith(("_", ".")):
|
||||
continue
|
||||
init_file = child / "__init__.py"
|
||||
if not init_file.exists():
|
||||
continue
|
||||
|
||||
# Read description from plugin.yaml if available
|
||||
desc = ""
|
||||
yaml_file = child / "plugin.yaml"
|
||||
if yaml_file.exists():
|
||||
try:
|
||||
import yaml
|
||||
with open(yaml_file) as f:
|
||||
meta = yaml.safe_load(f) or {}
|
||||
desc = meta.get("description", "")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Quick availability check — try loading and calling is_available()
|
||||
available = True
|
||||
try:
|
||||
engine = _load_engine_from_dir(child)
|
||||
if engine is None:
|
||||
available = False
|
||||
elif hasattr(engine, "is_available"):
|
||||
available = engine.is_available()
|
||||
except Exception:
|
||||
available = False
|
||||
|
||||
results.append((child.name, desc, available))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def load_context_engine(name: str) -> Optional["ContextEngine"]:
|
||||
"""Load and return a ContextEngine instance by name.
|
||||
|
||||
Returns None if the engine is not found or fails to load.
|
||||
"""
|
||||
engine_dir = _CONTEXT_ENGINE_PLUGINS_DIR / name
|
||||
if not engine_dir.is_dir():
|
||||
logger.debug("Context engine '%s' not found in %s", name, _CONTEXT_ENGINE_PLUGINS_DIR)
|
||||
return None
|
||||
|
||||
try:
|
||||
engine = _load_engine_from_dir(engine_dir)
|
||||
if engine:
|
||||
return engine
|
||||
logger.warning("Context engine '%s' loaded but no engine instance found", name)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning("Failed to load context engine '%s': %s", name, e)
|
||||
return None
|
||||
|
||||
|
||||
def _load_engine_from_dir(engine_dir: Path) -> Optional["ContextEngine"]:
|
||||
"""Import an engine module and extract the ContextEngine instance.
|
||||
|
||||
The module must have either:
|
||||
- A register(ctx) function (plugin-style) — we simulate a ctx
|
||||
- A top-level class that extends ContextEngine — we instantiate it
|
||||
"""
|
||||
name = engine_dir.name
|
||||
module_name = f"plugins.context_engine.{name}"
|
||||
init_file = engine_dir / "__init__.py"
|
||||
|
||||
if not init_file.exists():
|
||||
return None
|
||||
|
||||
# Check if already loaded
|
||||
if module_name in sys.modules:
|
||||
mod = sys.modules[module_name]
|
||||
else:
|
||||
# Handle relative imports within the plugin
|
||||
# First ensure the parent packages are registered
|
||||
for parent in ("plugins", "plugins.context_engine"):
|
||||
if parent not in sys.modules:
|
||||
parent_path = Path(__file__).parent
|
||||
if parent == "plugins":
|
||||
parent_path = parent_path.parent
|
||||
parent_init = parent_path / "__init__.py"
|
||||
if parent_init.exists():
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
parent, str(parent_init),
|
||||
submodule_search_locations=[str(parent_path)]
|
||||
)
|
||||
if spec:
|
||||
parent_mod = importlib.util.module_from_spec(spec)
|
||||
sys.modules[parent] = parent_mod
|
||||
try:
|
||||
spec.loader.exec_module(parent_mod)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Now load the engine module
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
module_name, str(init_file),
|
||||
submodule_search_locations=[str(engine_dir)]
|
||||
)
|
||||
if not spec:
|
||||
return None
|
||||
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
sys.modules[module_name] = mod
|
||||
|
||||
# Register submodules so relative imports work
|
||||
for sub_file in engine_dir.glob("*.py"):
|
||||
if sub_file.name == "__init__.py":
|
||||
continue
|
||||
sub_name = sub_file.stem
|
||||
full_sub_name = f"{module_name}.{sub_name}"
|
||||
if full_sub_name not in sys.modules:
|
||||
sub_spec = importlib.util.spec_from_file_location(
|
||||
full_sub_name, str(sub_file)
|
||||
)
|
||||
if sub_spec:
|
||||
sub_mod = importlib.util.module_from_spec(sub_spec)
|
||||
sys.modules[full_sub_name] = sub_mod
|
||||
try:
|
||||
sub_spec.loader.exec_module(sub_mod)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to load submodule %s: %s", full_sub_name, e)
|
||||
|
||||
try:
|
||||
spec.loader.exec_module(mod)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to exec_module %s: %s", module_name, e)
|
||||
sys.modules.pop(module_name, None)
|
||||
return None
|
||||
|
||||
# Try register(ctx) pattern first (how plugins are written)
|
||||
if hasattr(mod, "register"):
|
||||
collector = _EngineCollector()
|
||||
try:
|
||||
mod.register(collector)
|
||||
if collector.engine:
|
||||
return collector.engine
|
||||
except Exception as e:
|
||||
logger.debug("register() failed for %s: %s", name, e)
|
||||
|
||||
# Fallback: find a ContextEngine subclass and instantiate it
|
||||
from agent.context_engine import ContextEngine
|
||||
for attr_name in dir(mod):
|
||||
attr = getattr(mod, attr_name, None)
|
||||
if (isinstance(attr, type) and issubclass(attr, ContextEngine)
|
||||
and attr is not ContextEngine):
|
||||
try:
|
||||
return attr()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class _EngineCollector:
|
||||
"""Fake plugin context that captures register_context_engine calls."""
|
||||
|
||||
def __init__(self):
|
||||
self.engine = None
|
||||
|
||||
def register_context_engine(self, engine):
|
||||
self.engine = engine
|
||||
|
||||
# No-op for other registration methods
|
||||
def register_tool(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def register_hook(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def register_cli_command(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def register_memory_provider(self, *args, **kwargs):
|
||||
pass
|
||||
|
|
@ -218,9 +218,11 @@ class HonchoMemoryProvider(MemoryProvider):
|
|||
return
|
||||
|
||||
# Override peer_name with gateway user_id for per-user memory scoping.
|
||||
# CLI sessions won't have user_id, so the config default is preserved.
|
||||
# Only when no explicit peerName was configured — an explicit peerName
|
||||
# means the user chose their identity; a raw user_id (e.g. Telegram
|
||||
# chat ID) should not silently replace it.
|
||||
_gw_user_id = kwargs.get("user_id")
|
||||
if _gw_user_id:
|
||||
if _gw_user_id and not cfg.peer_name:
|
||||
cfg.peer_name = _gw_user_id
|
||||
|
||||
self._config = cfg
|
||||
|
|
@ -248,6 +250,12 @@ class HonchoMemoryProvider(MemoryProvider):
|
|||
|
||||
# ----- Port #1957: lazy session init for tools-only mode -----
|
||||
if self._recall_mode == "tools":
|
||||
if cfg.init_on_session_start:
|
||||
# Eager init: create session now so sync_turn() works from turn 1.
|
||||
# Does NOT enable auto-injection — prefetch() still returns empty.
|
||||
logger.debug("Honcho tools-only mode — eager session init (initOnSessionStart=true)")
|
||||
self._do_session_init(cfg, session_id, **kwargs)
|
||||
return
|
||||
# Defer actual session creation until first tool call
|
||||
self._lazy_init_kwargs = kwargs
|
||||
self._lazy_init_session_id = session_id
|
||||
|
|
|
|||
|
|
@ -189,6 +189,11 @@ class HonchoClientConfig:
|
|||
# "context" — auto-injected context only, Honcho tools removed
|
||||
# "tools" — Honcho tools only, no auto-injected context
|
||||
recall_mode: str = "hybrid"
|
||||
# When True and recallMode is "tools", create the Honcho session eagerly
|
||||
# during initialize() instead of deferring to the first tool call.
|
||||
# This ensures sync_turn() can write from the very first turn.
|
||||
# Does NOT enable automatic context injection — only changes init timing.
|
||||
init_on_session_start: bool = False
|
||||
# Observation mode: legacy string shorthand ("directional" or "unified").
|
||||
# Kept for backward compat; granular per-peer booleans below are preferred.
|
||||
observation_mode: str = "directional"
|
||||
|
|
@ -366,6 +371,11 @@ class HonchoClientConfig:
|
|||
or raw.get("recallMode")
|
||||
or "hybrid"
|
||||
),
|
||||
init_on_session_start=_resolve_bool(
|
||||
host_block.get("initOnSessionStart"),
|
||||
raw.get("initOnSessionStart"),
|
||||
default=False,
|
||||
),
|
||||
# Migration guard: existing configs without an explicit
|
||||
# observationMode keep the old "unified" default so users
|
||||
# aren't silently switched to full bidirectional observation.
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ dependencies = [
|
|||
"anthropic>=0.39.0,<1",
|
||||
"python-dotenv>=1.2.1,<2",
|
||||
"fire>=0.7.1,<1",
|
||||
"httpx>=0.28.1,<1",
|
||||
"httpx[socks]>=0.28.1,<1",
|
||||
"rich>=14.3.3,<15",
|
||||
"tenacity>=9.1.4,<10",
|
||||
"pyyaml>=6.0.2,<7",
|
||||
|
|
@ -43,7 +43,7 @@ dev = ["debugpy>=1.8.0,<2", "pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "py
|
|||
messaging = ["python-telegram-bot[webhooks]>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
|
||||
cron = ["croniter>=6.0.0,<7"]
|
||||
slack = ["slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
|
||||
matrix = ["matrix-nio[e2e]>=0.24.0,<1", "Markdown>=3.6,<4"]
|
||||
matrix = ["mautrix[encryption]>=0.20,<1", "Markdown>=3.6,<4"]
|
||||
cli = ["simple-term-menu>=1.0,<2"]
|
||||
tts-premium = ["elevenlabs>=1.0,<2"]
|
||||
voice = [
|
||||
|
|
@ -88,10 +88,10 @@ all = [
|
|||
"hermes-agent[modal]",
|
||||
"hermes-agent[daytona]",
|
||||
"hermes-agent[messaging]",
|
||||
# matrix excluded: python-olm (required by matrix-nio[e2e]) is upstream-broken
|
||||
# on modern macOS (archived libolm, C++ errors with Clang 21+). Including it
|
||||
# here causes the entire [all] install to fail, dropping all other extras.
|
||||
# Users who need Matrix can install manually: pip install 'hermes-agent[matrix]'
|
||||
# matrix: python-olm (required by matrix-nio[e2e]) is upstream-broken on
|
||||
# modern macOS (archived libolm, C++ errors with Clang 21+). On Linux the
|
||||
# [matrix] extra's own marker pulls in the [e2e] variant automatically.
|
||||
"hermes-agent[matrix]; sys_platform == 'linux'",
|
||||
"hermes-agent[cron]",
|
||||
"hermes-agent[cli]",
|
||||
"hermes-agent[dev]",
|
||||
|
|
|
|||
814
run_agent.py
814
run_agent.py
File diff suppressed because it is too large
Load diff
|
|
@ -249,8 +249,12 @@ def check_config(groq_key, eleven_key):
|
|||
|
||||
if stt_provider == "groq" and not groq_key:
|
||||
warn("STT config says groq but GROQ_API_KEY is missing")
|
||||
if stt_provider == "mistral" and not os.getenv("MISTRAL_API_KEY"):
|
||||
warn("STT config says mistral but MISTRAL_API_KEY is missing")
|
||||
if tts_provider == "elevenlabs" and not eleven_key:
|
||||
warn("TTS config says elevenlabs but ELEVENLABS_API_KEY is missing")
|
||||
if tts_provider == "mistral" and not os.getenv("MISTRAL_API_KEY"):
|
||||
warn("TTS config says mistral but MISTRAL_API_KEY is missing")
|
||||
except Exception as e:
|
||||
warn("config.yaml", f"parse error: {e}")
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -1082,10 +1082,19 @@ install_node_deps() {
|
|||
log_success "Node.js dependencies installed"
|
||||
|
||||
# Install Playwright browser + system dependencies.
|
||||
# Playwright's install-deps only supports apt/dnf/zypper natively.
|
||||
# Playwright's --with-deps only supports apt-based systems natively.
|
||||
# For Arch/Manjaro we install the system libs via pacman first.
|
||||
# Other systems must install Chromium dependencies manually.
|
||||
log_info "Installing browser engine (Playwright Chromium)..."
|
||||
case "$DISTRO" in
|
||||
ubuntu|debian|raspbian|pop|linuxmint|elementary|zorin|kali|parrot)
|
||||
log_info "Playwright may request sudo to install browser system dependencies (shared libraries)."
|
||||
log_info "This is standard Playwright setup — Hermes itself does not require root access."
|
||||
cd "$INSTALL_DIR" && npx playwright install --with-deps chromium 2>/dev/null || {
|
||||
log_warn "Playwright browser installation failed — browser tools will not work."
|
||||
log_warn "Try running manually: cd $INSTALL_DIR && npx playwright install --with-deps chromium"
|
||||
}
|
||||
;;
|
||||
arch|manjaro)
|
||||
if command -v pacman &> /dev/null; then
|
||||
log_info "Arch/Manjaro detected — installing Chromium system dependencies via pacman..."
|
||||
|
|
@ -1100,15 +1109,35 @@ install_node_deps() {
|
|||
log_warn " sudo pacman -S nss atk at-spi2-core cups libdrm libxkbcommon mesa pango cairo alsa-lib"
|
||||
fi
|
||||
fi
|
||||
cd "$INSTALL_DIR" && npx playwright install chromium 2>/dev/null || true
|
||||
cd "$INSTALL_DIR" && npx playwright install chromium 2>/dev/null || {
|
||||
log_warn "Playwright browser installation failed — browser tools will not work."
|
||||
}
|
||||
;;
|
||||
fedora|rhel|centos|rocky|alma)
|
||||
log_warn "Playwright does not support automatic dependency installation on RPM-based systems."
|
||||
log_info "Install Chromium system dependencies manually before using browser tools:"
|
||||
log_info " sudo dnf install nss atk at-spi2-core cups-libs libdrm libxkbcommon mesa-libgbm pango cairo alsa-lib"
|
||||
cd "$INSTALL_DIR" && npx playwright install chromium 2>/dev/null || {
|
||||
log_warn "Playwright browser installation failed — install dependencies above and retry."
|
||||
}
|
||||
;;
|
||||
opensuse*|sles)
|
||||
log_warn "Playwright does not support automatic dependency installation on zypper-based systems."
|
||||
log_info "Install Chromium system dependencies manually before using browser tools:"
|
||||
log_info " sudo zypper install mozilla-nss libatk-1_0-0 at-spi2-core cups-libs libdrm2 libxkbcommon0 Mesa-libgbm1 pango cairo libasound2"
|
||||
cd "$INSTALL_DIR" && npx playwright install chromium 2>/dev/null || {
|
||||
log_warn "Playwright browser installation failed — install dependencies above and retry."
|
||||
}
|
||||
;;
|
||||
*)
|
||||
log_info "Playwright may request sudo to install browser system dependencies (shared libraries)."
|
||||
log_info "This is standard Playwright setup — Hermes itself does not require root access."
|
||||
cd "$INSTALL_DIR" && npx playwright install --with-deps chromium 2>/dev/null || true
|
||||
log_warn "Playwright does not support automatic dependency installation on $DISTRO."
|
||||
log_info "Install Chromium/browser system dependencies for your distribution, then run:"
|
||||
log_info " cd $INSTALL_DIR && npx playwright install chromium"
|
||||
log_info "Browser tools will not work until dependencies are installed."
|
||||
cd "$INSTALL_DIR" && npx playwright install chromium 2>/dev/null || true
|
||||
;;
|
||||
esac
|
||||
log_success "Browser engine installed"
|
||||
log_success "Browser engine setup complete"
|
||||
fi
|
||||
|
||||
# Install TUI dependencies
|
||||
|
|
|
|||
|
|
@ -203,3 +203,30 @@ For segmented videos (quotes, scenes, chapters), render each as a separate clip
|
|||
| `references/inputs.md` | Audio analysis (FFT, bands, beats), video sampling, image conversion, text/lyrics, TTS integration (ElevenLabs, voice assignment, audio mixing) |
|
||||
| `references/optimization.md` | Hardware detection, quality profiles, vectorized patterns, parallel rendering, memory management, performance budgets |
|
||||
| `references/troubleshooting.md` | NumPy broadcasting traps, blend mode pitfalls, multiprocessing/pickling, brightness diagnostics, ffmpeg issues, font problems, common mistakes |
|
||||
|
||||
---
|
||||
|
||||
## Creative Divergence (use only when user requests experimental/creative/unique output)
|
||||
|
||||
If the user asks for creative, experimental, surprising, or unconventional output, select the strategy that best fits and reason through its steps BEFORE generating code.
|
||||
|
||||
- **Forced Connections** — when the user wants cross-domain inspiration ("make it look organic," "industrial aesthetic")
|
||||
- **Conceptual Blending** — when the user names two things to combine ("ocean meets music," "space + calligraphy")
|
||||
- **Oblique Strategies** — when the user is maximally open ("surprise me," "something I've never seen")
|
||||
|
||||
### Forced Connections
|
||||
1. Pick a domain unrelated to the visual goal (weather systems, microbiology, architecture, fluid dynamics, textile weaving)
|
||||
2. List its core visual/structural elements (erosion → gradual reveal; mitosis → splitting duplication; weaving → interlocking patterns)
|
||||
3. Map those elements onto ASCII characters and animation patterns
|
||||
4. Synthesize — what does "erosion" or "crystallization" look like in a character grid?
|
||||
|
||||
### Conceptual Blending
|
||||
1. Name two distinct visual/conceptual spaces (e.g., ocean waves + sheet music)
|
||||
2. Map correspondences (crests = high notes, troughs = rests, foam = staccato)
|
||||
3. Blend selectively — keep the most interesting mappings, discard forced ones
|
||||
4. Develop emergent properties that exist only in the blend
|
||||
|
||||
### Oblique Strategies
|
||||
1. Draw one: "Honor thy error as a hidden intention" / "Use an old idea" / "What would your closest friend do?" / "Emphasize the flaws" / "Turn it upside down" / "Only a part, not the whole" / "Reverse"
|
||||
2. Interpret the directive against the current ASCII animation challenge
|
||||
3. Apply the lateral insight to the visual design before writing code
|
||||
|
|
|
|||
147
skills/creative/creative-ideation/SKILL.md
Normal file
147
skills/creative/creative-ideation/SKILL.md
Normal file
|
|
@ -0,0 +1,147 @@
|
|||
---
|
||||
name: ideation
|
||||
title: Creative Ideation — Constraint-Driven Project Generation
|
||||
description: "Generate project ideas through creative constraints. Use when the user says 'I want to build something', 'give me a project idea', 'I'm bored', 'what should I make', 'inspire me', or any variant of 'I have tools but no direction'. Works for code, art, hardware, writing, tools, and anything that can be made."
|
||||
version: 1.0.0
|
||||
author: SHL0MS
|
||||
license: MIT
|
||||
metadata:
|
||||
hermes:
|
||||
tags: [Creative, Ideation, Projects, Brainstorming, Inspiration]
|
||||
category: creative
|
||||
requires_toolsets: []
|
||||
---
|
||||
|
||||
# Creative Ideation
|
||||
|
||||
Generate project ideas through creative constraints. Constraint + direction = creativity.
|
||||
|
||||
## How It Works
|
||||
|
||||
1. **Pick a constraint** from the library below — random, or matched to the user's domain/mood
|
||||
2. **Interpret it broadly** — a coding prompt can become a hardware project, an art prompt can become a CLI tool
|
||||
3. **Generate 3 concrete project ideas** that satisfy the constraint
|
||||
4. **If they pick one, build it** — create the project, write the code, ship it
|
||||
|
||||
## The Rule
|
||||
|
||||
Every prompt is interpreted as broadly as possible. "Does this include X?" → Yes. The prompts provide direction and mild constraint. Without either, there is no creativity.
|
||||
|
||||
## Constraint Library
|
||||
|
||||
### For Developers
|
||||
|
||||
**Solve your own itch:**
|
||||
Build the tool you wished existed this week. Under 50 lines. Ship it today.
|
||||
|
||||
**Automate the annoying thing:**
|
||||
What's the most tedious part of your workflow? Script it away. Two hours to fix a problem that costs you five minutes a day.
|
||||
|
||||
**The CLI tool that should exist:**
|
||||
Think of a command you've wished you could type. `git undo-that-thing-i-just-did`. `docker why-is-this-broken`. `npm explain-yourself`. Now build it.
|
||||
|
||||
**Nothing new except glue:**
|
||||
Make something entirely from existing APIs, libraries, and datasets. The only original contribution is how you connect them.
|
||||
|
||||
**Frankenstein week:**
|
||||
Take something that does X and make it do Y. A git repo that plays music. A Dockerfile that generates poetry. A cron job that sends compliments.
|
||||
|
||||
**Subtract:**
|
||||
How much can you remove from a codebase before it breaks? Strip a tool to its minimum viable function. Delete until only the essence remains.
|
||||
|
||||
**High concept, low effort:**
|
||||
A deep idea, lazily executed. The concept should be brilliant. The implementation should take an afternoon. If it takes longer, you're overthinking it.
|
||||
|
||||
### For Makers & Artists
|
||||
|
||||
**Blatantly copy something:**
|
||||
Pick something you admire — a tool, an artwork, an interface. Recreate it from scratch. The learning is in the gap between your version and theirs.
|
||||
|
||||
**One million of something:**
|
||||
One million is both a lot and not that much. One million pixels is a 1MB photo. One million API calls is a Tuesday. One million of anything becomes interesting at scale.
|
||||
|
||||
**Make something that dies:**
|
||||
A website that loses a feature every day. A chatbot that forgets. A countdown to nothing. An exercise in rot, killing, or letting go.
|
||||
|
||||
**Do a lot of math:**
|
||||
Generative geometry, shader golf, mathematical art, computational origami. Time to re-learn what an arcsin is.
|
||||
|
||||
### For Anyone
|
||||
|
||||
**Text is the universal interface:**
|
||||
Build something where text is the only interface. No buttons, no graphics, just words in and words out. Text can go in and out of almost anything.
|
||||
|
||||
**Start at the punchline:**
|
||||
Think of something that would be a funny sentence. Work backwards to make it real. "I taught my thermostat to gaslight me" → now build it.
|
||||
|
||||
**Hostile UI:**
|
||||
Make something intentionally painful to use. A password field that requires 47 conditions. A form where every label lies. A CLI that judges your commands.
|
||||
|
||||
**Take two:**
|
||||
Remember an old project. Do it again from scratch. No looking at the original. See what changed about how you think.
|
||||
|
||||
See `references/full-prompt-library.md` for 30+ additional constraints across communication, scale, philosophy, transformation, and more.
|
||||
|
||||
## Matching Constraints to Users
|
||||
|
||||
| User says | Pick from |
|
||||
|-----------|-----------|
|
||||
| "I want to build something" (no direction) | Random — any constraint |
|
||||
| "I'm learning [language]" | Blatantly copy something, Automate the annoying thing |
|
||||
| "I want something weird" | Hostile UI, Frankenstein week, Start at the punchline |
|
||||
| "I want something useful" | Solve your own itch, The CLI that should exist, Automate the annoying thing |
|
||||
| "I want something beautiful" | Do a lot of math, One million of something |
|
||||
| "I'm burned out" | High concept low effort, Make something that dies |
|
||||
| "Weekend project" | Nothing new except glue, Start at the punchline |
|
||||
| "I want a challenge" | One million of something, Subtract, Take two |
|
||||
|
||||
## Output Format
|
||||
|
||||
```
|
||||
## Constraint: [Name]
|
||||
> [The constraint, one sentence]
|
||||
|
||||
### Ideas
|
||||
|
||||
1. **[One-line pitch]**
|
||||
[2-3 sentences: what you'd build and why it's interesting]
|
||||
⏱ [weekend / week / month] • 🔧 [stack]
|
||||
|
||||
2. **[One-line pitch]**
|
||||
[2-3 sentences]
|
||||
⏱ ... • 🔧 ...
|
||||
|
||||
3. **[One-line pitch]**
|
||||
[2-3 sentences]
|
||||
⏱ ... • 🔧 ...
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
```
|
||||
## Constraint: The CLI tool that should exist
|
||||
> Think of a command you've wished you could type. Now build it.
|
||||
|
||||
### Ideas
|
||||
|
||||
1. **`git whatsup` — show what happened while you were away**
|
||||
Compares your last active commit to HEAD and summarizes what changed,
|
||||
who committed, and what PRs merged. Like a morning standup from your repo.
|
||||
⏱ weekend • 🔧 Python, GitPython, click
|
||||
|
||||
2. **`explain 503` — HTTP status codes for humans**
|
||||
Pipe any status code or error message and get a plain-English explanation
|
||||
with common causes and fixes. Pulls from a curated database, not an LLM.
|
||||
⏱ weekend • 🔧 Rust or Go, static dataset
|
||||
|
||||
3. **`deps why <package>` — why is this in my dependency tree**
|
||||
Traces a transitive dependency back to the direct dependency that pulled
|
||||
it in. Answers "why do I have 47 copies of lodash" in one command.
|
||||
⏱ weekend • 🔧 Node.js, npm/yarn lockfile parsing
|
||||
```
|
||||
|
||||
After the user picks one, start building — create the project, write the code, iterate.
|
||||
|
||||
## Attribution
|
||||
|
||||
Constraint approach inspired by [wttdotm.com/prompts.html](https://wttdotm.com/prompts.html). Adapted and expanded for software development and general-purpose ideation.
|
||||
|
|
@ -0,0 +1,110 @@
|
|||
# Full Prompt Library
|
||||
|
||||
Extended constraint library beyond the core set in SKILL.md. Load these when the user wants more variety or a specific category.
|
||||
|
||||
## Communication & Connection
|
||||
|
||||
**Create a means of distribution:**
|
||||
The project works when you can use what you made to give something to somebody else.
|
||||
|
||||
**Make a way to communicate:**
|
||||
The project works when you can hold a conversation with someone else using what you created. Not chat — something weirder.
|
||||
|
||||
**Write a love letter:**
|
||||
To a person, a programming language, a game, a place, a tool. On paper, in code, in music, in light. Mail it.
|
||||
|
||||
**Mail chess / Asynchronous games:**
|
||||
Something turn-based played with no time limit. No requirement to be there at the same time. The game happens in the gaps.
|
||||
|
||||
**Twitch plays X:**
|
||||
A group of people share control over something. Collective input, emergent behavior.
|
||||
|
||||
## Screens & Interfaces
|
||||
|
||||
**Something for your desktop:**
|
||||
You spend a lot of time there. Spruce it up. A custom clock, a pet that lives in your terminal, a wallpaper that changes based on your git activity.
|
||||
|
||||
**One screen, two screen, old screen, new screen:**
|
||||
Take something you associate with one screen and put it on a very different one. DOOM on a smart fridge. A spreadsheet on a watch. A terminal in a painting.
|
||||
|
||||
**Make a mirror:**
|
||||
Something that reflects the viewer back at themselves. A website that shows your browsing history. A CLI that prints your git sins.
|
||||
|
||||
## Philosophy & Concept
|
||||
|
||||
**Code as koan, koan as code:**
|
||||
What is the sound of one hand clapping? A program that answers a question it wasn't asked. A function that returns before it's called.
|
||||
|
||||
**The useless tree:**
|
||||
Make something useless. Deliberately, completely, beautifully useless. No utility. No purpose. No point. That's the point.
|
||||
|
||||
**Artificial stupidity:**
|
||||
Make fun of AI by showcasing its faults. Mistrain it. Lie to it. Build the opposite of what AI is supposed to be good at.
|
||||
|
||||
**"I use technology in order to hate it properly":**
|
||||
Make something inspired by the tension between loving and hating your tools.
|
||||
|
||||
**The more things change, the more they stay the same:**
|
||||
Reflect on time, difference, and similarity.
|
||||
|
||||
## Transformation
|
||||
|
||||
**Translate:**
|
||||
Take something meant for one audience and make it understandable by another. A research paper as a children's book. An API as a board game. A song as an architecture diagram.
|
||||
|
||||
**I mean, I GUESS you could store something that way:**
|
||||
The project works when you can save and open something. Store data in DNS caches. Encode a novel in emoji. Write a file system on top of something that isn't a file system.
|
||||
|
||||
**I mean, I GUESS those could be pixels:**
|
||||
The project works when you can display an image. Render anything visual in a medium that wasn't meant for rendering.
|
||||
|
||||
## Identity & Reflection
|
||||
|
||||
**Make a self-portrait:**
|
||||
Be yourself? Be fake? Be real? In code, in data, in sound, in a directory structure.
|
||||
|
||||
**Make a pun:**
|
||||
The stupider the better. Physical, digital, linguistic, visual. The project IS the joke.
|
||||
|
||||
**Doors, walls, borders, barriers, boundaries:**
|
||||
Things that intermediate two places: opening, closing, permeating, excluding, combining.
|
||||
|
||||
## Scale & Repetition
|
||||
|
||||
**Lists!:**
|
||||
Itemizations, taxonomies, exhaustive recountings, iterations. This one. A list of list of lists.
|
||||
|
||||
**Did you mean *recursion*?**
|
||||
Did you mean recursion?
|
||||
|
||||
**Animals:**
|
||||
Lions, and tigers, and bears. Crab logic gates. Fish plays the stock market.
|
||||
|
||||
**Cats:**
|
||||
Where would the internet be without them.
|
||||
|
||||
## Starting Points
|
||||
|
||||
**An idea that comes from a book:**
|
||||
Read something. Make something inspired by it.
|
||||
|
||||
**Go to a museum:**
|
||||
Project ensues.
|
||||
|
||||
**NPC loot:**
|
||||
What do you drop when you die? What do you take on your journey? Build the item.
|
||||
|
||||
**Mythological objects and entities:**
|
||||
Pandora's box, the ocarina of time, the palantir. Build the artifact.
|
||||
|
||||
**69:**
|
||||
Nice. Make something with the joke being the number 69.
|
||||
|
||||
**Office Space printer scene:**
|
||||
Capture the same energy. Channel the catharsis of destroying the thing that frustrates you.
|
||||
|
||||
**Borges week:**
|
||||
Something inspired by the Argentine. The library of babel. The map that is the territory.
|
||||
|
||||
**Lights!:**
|
||||
LED throwies, light installations, illuminated anything. Make something that glows.
|
||||
|
|
@ -239,3 +239,26 @@ Always iterate at `-ql`. Only render `-qh` for final output.
|
|||
| `references/paper-explainer.md` | Turning research papers into animations — workflow, templates, domain patterns |
|
||||
| `references/decorations.md` | SurroundingRectangle, Brace, arrows, DashedLine, Angle, annotation lifecycle |
|
||||
| `references/production-quality.md` | Pre-code, pre-render, post-render checklists, spatial layout, color, tempo |
|
||||
|
||||
---
|
||||
|
||||
## Creative Divergence (use only when user requests experimental/creative/unique output)
|
||||
|
||||
If the user asks for creative, experimental, or unconventional explanatory approaches, select a strategy and reason through it BEFORE designing the animation.
|
||||
|
||||
- **SCAMPER** — when the user wants a fresh take on a standard explanation
|
||||
- **Assumption Reversal** — when the user wants to challenge how something is typically taught
|
||||
|
||||
### SCAMPER Transformation
|
||||
Take a standard mathematical/technical visualization and transform it:
|
||||
- **Substitute**: replace the standard visual metaphor (number line → winding path, matrix → city grid)
|
||||
- **Combine**: merge two explanation approaches (algebraic + geometric simultaneously)
|
||||
- **Reverse**: derive backward — start from the result and deconstruct to axioms
|
||||
- **Modify**: exaggerate a parameter to show why it matters (10x the learning rate, 1000x the sample size)
|
||||
- **Eliminate**: remove all notation — explain purely through animation and spatial relationships
|
||||
|
||||
### Assumption Reversal
|
||||
1. List what's "standard" about how this topic is visualized (left-to-right, 2D, discrete steps, formal notation)
|
||||
2. Pick the most fundamental assumption
|
||||
3. Reverse it (right-to-left derivation, 3D embedding of a 2D concept, continuous morphing instead of steps, zero notation)
|
||||
4. Explore what the reversal reveals that the standard approach hides
|
||||
|
|
|
|||
|
|
@ -511,3 +511,37 @@ When building p5.js sketches:
|
|||
| `references/export-pipeline.md` | `saveCanvas()`, `saveGif()`, `saveFrames()`, deterministic headless capture, ffmpeg frame-to-video, CCapture.js, SVG export, per-clip architecture, platform export (fxhash), video gotchas |
|
||||
| `references/troubleshooting.md` | Performance profiling, per-pixel budgets, common mistakes, browser compatibility, WebGL debugging, font loading issues, pixel density traps, memory leaks, CORS |
|
||||
| `templates/viewer.html` | Interactive viewer template: seed navigation (prev/next/random/jump), parameter sliders, download PNG, responsive canvas. Start from this for explorable generative art |
|
||||
|
||||
---
|
||||
|
||||
## Creative Divergence (use only when user requests experimental/creative/unique output)
|
||||
|
||||
If the user asks for creative, experimental, surprising, or unconventional output, select the strategy that best fits and reason through its steps BEFORE generating code.
|
||||
|
||||
- **Conceptual Blending** — when the user names two things to combine or wants hybrid aesthetics
|
||||
- **SCAMPER** — when the user wants a twist on a known generative art pattern
|
||||
- **Distance Association** — when the user gives a single concept and wants exploration ("make something about time")
|
||||
|
||||
### Conceptual Blending
|
||||
1. Name two distinct visual systems (e.g., particle physics + handwriting)
|
||||
2. Map correspondences (particles = ink drops, forces = pen pressure, fields = letterforms)
|
||||
3. Blend selectively — keep mappings that produce interesting emergent visuals
|
||||
4. Code the blend as a unified system, not two systems side-by-side
|
||||
|
||||
### SCAMPER Transformation
|
||||
Take a known generative pattern (flow field, particle system, L-system, cellular automata) and systematically transform it:
|
||||
- **Substitute**: replace circles with text characters, lines with gradients
|
||||
- **Combine**: merge two patterns (flow field + voronoi)
|
||||
- **Adapt**: apply a 2D pattern to a 3D projection
|
||||
- **Modify**: exaggerate scale, warp the coordinate space
|
||||
- **Purpose**: use a physics sim for typography, a sorting algorithm for color
|
||||
- **Eliminate**: remove the grid, remove color, remove symmetry
|
||||
- **Reverse**: run the simulation backward, invert the parameter space
|
||||
|
||||
### Distance Association
|
||||
1. Anchor on the user's concept (e.g., "loneliness")
|
||||
2. Generate associations at three distances:
|
||||
- Close (obvious): empty room, single figure, silence
|
||||
- Medium (interesting): one fish in a school swimming the wrong way, a phone with no notifications, the gap between subway cars
|
||||
- Far (abstract): prime numbers, asymptotic curves, the color of 3am
|
||||
3. Develop the medium-distance associations — they're specific enough to visualize but unexpected enough to be interesting
|
||||
|
|
|
|||
|
|
@ -68,9 +68,22 @@ class TestInitialize:
|
|||
resp = await agent.initialize(protocol_version=1)
|
||||
caps = resp.agent_capabilities
|
||||
assert isinstance(caps, AgentCapabilities)
|
||||
assert caps.load_session is True
|
||||
assert caps.session_capabilities is not None
|
||||
assert caps.session_capabilities.fork is not None
|
||||
assert caps.session_capabilities.list is not None
|
||||
assert caps.session_capabilities.resume is not None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_initialize_capabilities_wire_format(self, agent):
|
||||
"""Verify the JSON wire format uses correct aliases so ACP clients see the right keys."""
|
||||
resp = await agent.initialize(protocol_version=1)
|
||||
payload = resp.agent_capabilities.model_dump(by_alias=True, exclude_none=True)
|
||||
assert payload["loadSession"] is True
|
||||
session_caps = payload["sessionCapabilities"]
|
||||
assert "fork" in session_caps
|
||||
assert "list" in session_caps
|
||||
assert "resume" in session_caps
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -410,6 +423,37 @@ class TestPrompt:
|
|||
update = last_call[1].get("update") or last_call[0][1]
|
||||
assert update.session_update == "agent_message_chunk"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_prompt_populates_usage_from_top_level_run_conversation_fields(self, agent):
|
||||
"""ACP should map top-level token fields into PromptResponse.usage."""
|
||||
new_resp = await agent.new_session(cwd=".")
|
||||
state = agent.session_manager.get_session(new_resp.session_id)
|
||||
|
||||
state.agent.run_conversation = MagicMock(return_value={
|
||||
"final_response": "usage attached",
|
||||
"messages": [],
|
||||
"prompt_tokens": 123,
|
||||
"completion_tokens": 45,
|
||||
"total_tokens": 168,
|
||||
"reasoning_tokens": 7,
|
||||
"cache_read_tokens": 11,
|
||||
})
|
||||
|
||||
mock_conn = MagicMock(spec=acp.Client)
|
||||
mock_conn.session_update = AsyncMock()
|
||||
agent._conn = mock_conn
|
||||
|
||||
prompt = [TextContentBlock(type="text", text="show usage")]
|
||||
resp = await agent.prompt(prompt=prompt, session_id=new_resp.session_id)
|
||||
|
||||
assert isinstance(resp, PromptResponse)
|
||||
assert resp.usage is not None
|
||||
assert resp.usage.input_tokens == 123
|
||||
assert resp.usage.output_tokens == 45
|
||||
assert resp.usage.total_tokens == 168
|
||||
assert resp.usage.thought_tokens == 7
|
||||
assert resp.usage.cached_read_tokens == 11
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_prompt_cancelled_returns_cancelled_stop_reason(self, agent):
|
||||
"""If cancel is called during prompt, stop_reason should be 'cancelled'."""
|
||||
|
|
|
|||
|
|
@ -17,7 +17,6 @@ from agent.anthropic_adapter import (
|
|||
build_anthropic_kwargs,
|
||||
convert_messages_to_anthropic,
|
||||
convert_tools_to_anthropic,
|
||||
get_anthropic_token_source,
|
||||
is_claude_code_token_valid,
|
||||
normalize_anthropic_response,
|
||||
normalize_model_name,
|
||||
|
|
@ -40,8 +39,13 @@ class TestIsOAuthToken:
|
|||
assert _is_oauth_token("sk-ant-api03-abcdef1234567890") is False
|
||||
|
||||
def test_managed_key(self):
|
||||
# Managed keys from ~/.claude.json are NOT regular API keys
|
||||
assert _is_oauth_token("ou1R1z-ft0A-bDeZ9wAA") is True
|
||||
# Managed keys from ~/.claude.json without a recognisable Anthropic
|
||||
# prefix are not positively identified as OAuth. They enter the system
|
||||
# via diagnostics-only read_claude_managed_key(), not via
|
||||
# resolve_anthropic_token(), so they don't reach the OAuth gate in
|
||||
# practice. Third-party provider keys (MiniMax, Alibaba) also lack
|
||||
# the sk-ant- prefix and must NOT be treated as OAuth.
|
||||
assert _is_oauth_token("ou1R1z-ft0A-bDeZ9wAA") is False
|
||||
|
||||
def test_jwt_token(self):
|
||||
# JWTs from OAuth flow
|
||||
|
|
@ -81,6 +85,9 @@ class TestBuildAnthropicClient:
|
|||
build_anthropic_client("sk-ant-api03-x", base_url="https://custom.api.com")
|
||||
kwargs = mock_sdk.Anthropic.call_args[1]
|
||||
assert kwargs["base_url"] == "https://custom.api.com"
|
||||
assert kwargs["default_headers"] == {
|
||||
"anthropic-beta": "interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14"
|
||||
}
|
||||
|
||||
def test_minimax_anthropic_endpoint_uses_bearer_auth_for_regular_api_keys(self):
|
||||
with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
|
||||
|
|
@ -92,7 +99,20 @@ class TestBuildAnthropicClient:
|
|||
assert kwargs["auth_token"] == "minimax-secret-123"
|
||||
assert "api_key" not in kwargs
|
||||
assert kwargs["default_headers"] == {
|
||||
"anthropic-beta": "interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14"
|
||||
"anthropic-beta": "interleaved-thinking-2025-05-14"
|
||||
}
|
||||
|
||||
def test_minimax_cn_anthropic_endpoint_omits_tool_streaming_beta(self):
|
||||
with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
|
||||
build_anthropic_client(
|
||||
"minimax-cn-secret-123",
|
||||
base_url="https://api.minimaxi.com/anthropic",
|
||||
)
|
||||
kwargs = mock_sdk.Anthropic.call_args[1]
|
||||
assert kwargs["auth_token"] == "minimax-cn-secret-123"
|
||||
assert "api_key" not in kwargs
|
||||
assert kwargs["default_headers"] == {
|
||||
"anthropic-beta": "interleaved-thinking-2025-05-14"
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -165,15 +185,6 @@ class TestResolveAnthropicToken:
|
|||
monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
|
||||
assert resolve_anthropic_token() == "sk-ant-oat01-mytoken"
|
||||
|
||||
def test_reports_claude_json_primary_key_source(self, monkeypatch, tmp_path):
|
||||
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
|
||||
monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False)
|
||||
monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
|
||||
(tmp_path / ".claude.json").write_text(json.dumps({"primaryApiKey": "sk-ant-api03-primary"}))
|
||||
monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
|
||||
|
||||
assert get_anthropic_token_source("sk-ant-api03-primary") == "claude_json_primary_api_key"
|
||||
|
||||
def test_does_not_resolve_primary_api_key_as_native_anthropic_token(self, monkeypatch, tmp_path):
|
||||
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
|
||||
monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False)
|
||||
|
|
|
|||
|
|
@ -1,26 +1,26 @@
|
|||
"""Tests for agent.auxiliary_client resolution chain, provider overrides, and model overrides."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, MagicMock
|
||||
from unittest.mock import patch, MagicMock, AsyncMock
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.auxiliary_client import (
|
||||
get_text_auxiliary_client,
|
||||
get_vision_auxiliary_client,
|
||||
get_available_vision_backends,
|
||||
resolve_vision_provider_client,
|
||||
resolve_provider_client,
|
||||
auxiliary_max_tokens_param,
|
||||
call_llm,
|
||||
async_call_llm,
|
||||
_read_codex_access_token,
|
||||
_get_auxiliary_provider,
|
||||
_get_provider_chain,
|
||||
_is_payment_error,
|
||||
_try_payment_fallback,
|
||||
_resolve_forced_provider,
|
||||
_resolve_auto,
|
||||
)
|
||||
|
||||
|
|
@ -660,19 +660,23 @@ class TestGetTextAuxiliaryClient:
|
|||
assert client is None
|
||||
assert model is None
|
||||
|
||||
def test_custom_endpoint_uses_codex_wrapper_when_runtime_requests_responses_api(self):
|
||||
with patch("agent.auxiliary_client._resolve_custom_runtime",
|
||||
return_value=("https://api.openai.com/v1", "sk-test", "codex_responses")), \
|
||||
patch("agent.auxiliary_client._read_main_model", return_value="gpt-5.3-codex"), \
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
client, model = get_text_auxiliary_client()
|
||||
|
||||
from agent.auxiliary_client import CodexAuxiliaryClient
|
||||
assert isinstance(client, CodexAuxiliaryClient)
|
||||
assert model == "gpt-5.3-codex"
|
||||
assert mock_openai.call_args.kwargs["base_url"] == "https://api.openai.com/v1"
|
||||
assert mock_openai.call_args.kwargs["api_key"] == "sk-test"
|
||||
|
||||
|
||||
class TestVisionClientFallback:
|
||||
"""Vision client auto mode resolves known-good multimodal backends."""
|
||||
|
||||
def test_vision_returns_none_without_any_credentials(self):
|
||||
with (
|
||||
patch("agent.auxiliary_client._read_nous_auth", return_value=None),
|
||||
patch("agent.auxiliary_client._try_anthropic", return_value=(None, None)),
|
||||
):
|
||||
client, model = get_vision_auxiliary_client()
|
||||
assert client is None
|
||||
assert model is None
|
||||
|
||||
def test_vision_auto_includes_active_provider_when_configured(self, monkeypatch):
|
||||
"""Active provider appears in available backends when credentials exist."""
|
||||
monkeypatch.setenv("ANTHROPIC_API_KEY", "***")
|
||||
|
|
@ -754,6 +758,54 @@ class TestAuxiliaryPoolAwareness:
|
|||
assert call_kwargs["base_url"] == "https://api.githubcopilot.com"
|
||||
assert call_kwargs["default_headers"]["Editor-Version"]
|
||||
|
||||
def test_copilot_responses_api_model_wrapped_in_codex_client(self, monkeypatch):
|
||||
"""Copilot GPT-5+ models (needing Responses API) are wrapped in CodexAuxiliaryClient."""
|
||||
monkeypatch.delenv("GITHUB_TOKEN", raising=False)
|
||||
monkeypatch.delenv("GH_TOKEN", raising=False)
|
||||
|
||||
with (
|
||||
patch(
|
||||
"hermes_cli.auth.resolve_api_key_provider_credentials",
|
||||
return_value={
|
||||
"provider": "copilot",
|
||||
"api_key": "test-token",
|
||||
"base_url": "https://api.githubcopilot.com",
|
||||
"source": "gh auth token",
|
||||
},
|
||||
),
|
||||
patch("agent.auxiliary_client.OpenAI"),
|
||||
):
|
||||
client, model = resolve_provider_client("copilot", model="gpt-5.4-mini")
|
||||
|
||||
from agent.auxiliary_client import CodexAuxiliaryClient
|
||||
assert isinstance(client, CodexAuxiliaryClient)
|
||||
assert model == "gpt-5.4-mini"
|
||||
|
||||
def test_copilot_chat_completions_model_not_wrapped(self, monkeypatch):
|
||||
"""Copilot models using Chat Completions are returned as plain OpenAI clients."""
|
||||
monkeypatch.delenv("GITHUB_TOKEN", raising=False)
|
||||
monkeypatch.delenv("GH_TOKEN", raising=False)
|
||||
|
||||
with (
|
||||
patch(
|
||||
"hermes_cli.auth.resolve_api_key_provider_credentials",
|
||||
return_value={
|
||||
"provider": "copilot",
|
||||
"api_key": "test-token",
|
||||
"base_url": "https://api.githubcopilot.com",
|
||||
"source": "gh auth token",
|
||||
},
|
||||
),
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai,
|
||||
):
|
||||
client, model = resolve_provider_client("copilot", model="gpt-4.1-mini")
|
||||
|
||||
from agent.auxiliary_client import CodexAuxiliaryClient
|
||||
assert not isinstance(client, CodexAuxiliaryClient)
|
||||
assert model == "gpt-4.1-mini"
|
||||
# Should be the raw mock OpenAI client
|
||||
assert client is mock_openai.return_value
|
||||
|
||||
def test_vision_auto_uses_active_provider_as_fallback(self, monkeypatch):
|
||||
"""When no OpenRouter/Nous available, vision auto falls back to active provider."""
|
||||
monkeypatch.setenv("ANTHROPIC_API_KEY", "***")
|
||||
|
|
@ -800,43 +852,6 @@ class TestAuxiliaryPoolAwareness:
|
|||
assert client is not None
|
||||
assert provider == "custom:local"
|
||||
|
||||
def test_vision_direct_endpoint_override(self, monkeypatch):
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
monkeypatch.setenv("AUXILIARY_VISION_BASE_URL", "http://localhost:4567/v1")
|
||||
monkeypatch.setenv("AUXILIARY_VISION_API_KEY", "vision-key")
|
||||
monkeypatch.setenv("AUXILIARY_VISION_MODEL", "vision-model")
|
||||
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
client, model = get_vision_auxiliary_client()
|
||||
assert model == "vision-model"
|
||||
assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:4567/v1"
|
||||
assert mock_openai.call_args.kwargs["api_key"] == "vision-key"
|
||||
|
||||
def test_vision_direct_endpoint_without_key_uses_placeholder(self, monkeypatch):
|
||||
"""Vision endpoint without API key should use 'no-key-required' placeholder."""
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
monkeypatch.setenv("AUXILIARY_VISION_BASE_URL", "http://localhost:4567/v1")
|
||||
monkeypatch.setenv("AUXILIARY_VISION_MODEL", "vision-model")
|
||||
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
client, model = get_vision_auxiliary_client()
|
||||
assert client is not None
|
||||
assert model == "vision-model"
|
||||
assert mock_openai.call_args.kwargs["api_key"] == "no-key-required"
|
||||
|
||||
def test_vision_uses_openrouter_when_available(self, monkeypatch):
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
client, model = get_vision_auxiliary_client()
|
||||
assert model == "google/gemini-3-flash-preview"
|
||||
assert client is not None
|
||||
|
||||
def test_vision_uses_nous_when_available(self, monkeypatch):
|
||||
with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \
|
||||
patch("agent.auxiliary_client.OpenAI"):
|
||||
mock_nous.return_value = {"access_token": "nous-tok"}
|
||||
client, model = get_vision_auxiliary_client()
|
||||
assert model == "google/gemini-3-flash-preview"
|
||||
assert client is not None
|
||||
|
||||
def test_vision_config_google_provider_uses_gemini_credentials(self, monkeypatch):
|
||||
config = {
|
||||
"auxiliary": {
|
||||
|
|
@ -862,53 +877,6 @@ class TestAuxiliaryPoolAwareness:
|
|||
assert mock_openai.call_args.kwargs["api_key"] == "gemini-key"
|
||||
assert mock_openai.call_args.kwargs["base_url"] == "https://generativelanguage.googleapis.com/v1beta/openai"
|
||||
|
||||
def test_vision_forced_main_uses_custom_endpoint(self, monkeypatch):
|
||||
"""When explicitly forced to 'main', vision CAN use custom endpoint."""
|
||||
config = {
|
||||
"model": {
|
||||
"provider": "custom",
|
||||
"base_url": "http://localhost:1234/v1",
|
||||
"default": "my-local-model",
|
||||
}
|
||||
}
|
||||
monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "main")
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "local-key")
|
||||
monkeypatch.setattr("hermes_cli.config.load_config", lambda: config)
|
||||
monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config)
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
client, model = get_vision_auxiliary_client()
|
||||
assert client is not None
|
||||
assert model == "my-local-model"
|
||||
|
||||
def test_vision_forced_main_returns_none_without_creds(self, monkeypatch):
|
||||
"""Forced main with no credentials still returns None."""
|
||||
monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "main")
|
||||
monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
|
||||
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
||||
# Clear client cache to avoid stale entries from previous tests
|
||||
from agent.auxiliary_client import _client_cache
|
||||
_client_cache.clear()
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||
patch("agent.auxiliary_client._read_main_provider", return_value=""), \
|
||||
patch("agent.auxiliary_client._read_main_model", return_value=""), \
|
||||
patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)), \
|
||||
patch("agent.auxiliary_client._resolve_custom_runtime", return_value=(None, None)), \
|
||||
patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
|
||||
patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)):
|
||||
client, model = get_vision_auxiliary_client()
|
||||
assert client is None
|
||||
assert model is None
|
||||
|
||||
def test_vision_forced_codex(self, monkeypatch, codex_auth_dir):
|
||||
"""When forced to 'codex', vision uses Codex OAuth."""
|
||||
monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "codex")
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||
patch("agent.auxiliary_client.OpenAI"):
|
||||
client, model = get_vision_auxiliary_client()
|
||||
from agent.auxiliary_client import CodexAuxiliaryClient
|
||||
assert isinstance(client, CodexAuxiliaryClient)
|
||||
assert model == "gpt-5.2-codex"
|
||||
|
||||
|
||||
class TestGetAuxiliaryProvider:
|
||||
|
|
@ -948,122 +916,6 @@ class TestGetAuxiliaryProvider:
|
|||
assert _get_auxiliary_provider("web_extract") == "main"
|
||||
|
||||
|
||||
class TestResolveForcedProvider:
|
||||
"""Tests for _resolve_forced_provider with explicit provider selection."""
|
||||
|
||||
def test_forced_openrouter(self, monkeypatch):
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
client, model = _resolve_forced_provider("openrouter")
|
||||
assert model == "google/gemini-3-flash-preview"
|
||||
assert client is not None
|
||||
|
||||
def test_forced_openrouter_no_key(self, monkeypatch):
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None):
|
||||
client, model = _resolve_forced_provider("openrouter")
|
||||
assert client is None
|
||||
assert model is None
|
||||
|
||||
def test_forced_nous(self, monkeypatch):
|
||||
with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \
|
||||
patch("agent.auxiliary_client.OpenAI"):
|
||||
mock_nous.return_value = {"access_token": "nous-tok"}
|
||||
client, model = _resolve_forced_provider("nous")
|
||||
assert model == "google/gemini-3-flash-preview"
|
||||
assert client is not None
|
||||
|
||||
def test_forced_nous_not_configured(self, monkeypatch):
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None):
|
||||
client, model = _resolve_forced_provider("nous")
|
||||
assert client is None
|
||||
assert model is None
|
||||
|
||||
def test_forced_main_uses_custom(self, monkeypatch):
|
||||
config = {
|
||||
"model": {
|
||||
"provider": "custom",
|
||||
"base_url": "http://local:8080/v1",
|
||||
"default": "my-local-model",
|
||||
}
|
||||
}
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "local-key")
|
||||
monkeypatch.setattr("hermes_cli.config.load_config", lambda: config)
|
||||
monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config)
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
client, model = _resolve_forced_provider("main")
|
||||
assert model == "my-local-model"
|
||||
|
||||
def test_forced_main_uses_config_saved_custom_endpoint(self, monkeypatch):
|
||||
config = {
|
||||
"model": {
|
||||
"provider": "custom",
|
||||
"base_url": "http://local:8080/v1",
|
||||
"default": "my-local-model",
|
||||
}
|
||||
}
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "local-key")
|
||||
monkeypatch.setattr("hermes_cli.config.load_config", lambda: config)
|
||||
monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config)
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||
patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
|
||||
patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)), \
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
client, model = _resolve_forced_provider("main")
|
||||
assert client is not None
|
||||
assert model == "my-local-model"
|
||||
call_kwargs = mock_openai.call_args
|
||||
assert call_kwargs.kwargs["base_url"] == "http://local:8080/v1"
|
||||
|
||||
def test_forced_main_skips_openrouter_nous(self, monkeypatch):
|
||||
"""Even if OpenRouter key is set, 'main' skips it."""
|
||||
config = {
|
||||
"model": {
|
||||
"provider": "custom",
|
||||
"base_url": "http://local:8080/v1",
|
||||
"default": "my-local-model",
|
||||
}
|
||||
}
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "local-key")
|
||||
monkeypatch.setattr("hermes_cli.config.load_config", lambda: config)
|
||||
monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config)
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
client, model = _resolve_forced_provider("main")
|
||||
# Should use custom endpoint, not OpenRouter
|
||||
assert model == "my-local-model"
|
||||
|
||||
def test_forced_main_falls_to_codex(self, codex_auth_dir, monkeypatch):
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||
patch("agent.auxiliary_client.OpenAI"):
|
||||
client, model = _resolve_forced_provider("main")
|
||||
from agent.auxiliary_client import CodexAuxiliaryClient
|
||||
assert isinstance(client, CodexAuxiliaryClient)
|
||||
assert model == "gpt-5.2-codex"
|
||||
|
||||
def test_forced_codex(self, codex_auth_dir, monkeypatch):
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||
patch("agent.auxiliary_client.OpenAI"):
|
||||
client, model = _resolve_forced_provider("codex")
|
||||
from agent.auxiliary_client import CodexAuxiliaryClient
|
||||
assert isinstance(client, CodexAuxiliaryClient)
|
||||
assert model == "gpt-5.2-codex"
|
||||
|
||||
def test_forced_codex_no_token(self, monkeypatch):
|
||||
with patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
|
||||
client, model = _resolve_forced_provider("codex")
|
||||
assert client is None
|
||||
assert model is None
|
||||
|
||||
def test_forced_unknown_returns_none(self, monkeypatch):
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||
patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
|
||||
client, model = _resolve_forced_provider("invalid-provider")
|
||||
assert client is None
|
||||
assert model is None
|
||||
|
||||
|
||||
class TestTaskSpecificOverrides:
|
||||
"""Integration tests for per-task provider routing via get_text_auxiliary_client(task=...)."""
|
||||
|
||||
|
|
@ -1272,8 +1124,8 @@ class TestCallLlmPaymentFallback:
|
|||
exc.status_code = 402
|
||||
return exc
|
||||
|
||||
def test_402_triggers_fallback(self, monkeypatch):
|
||||
"""When the primary provider returns 402, call_llm tries the next one."""
|
||||
def test_402_triggers_fallback_when_auto(self, monkeypatch):
|
||||
"""When provider is auto and returns 402, call_llm tries the next one."""
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
|
||||
primary_client = MagicMock()
|
||||
|
|
@ -1286,7 +1138,7 @@ class TestCallLlmPaymentFallback:
|
|||
with patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(primary_client, "google/gemini-3-flash-preview")), \
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("openrouter", "google/gemini-3-flash-preview", None, None)), \
|
||||
return_value=("auto", "google/gemini-3-flash-preview", None, None, None)), \
|
||||
patch("agent.auxiliary_client._try_payment_fallback",
|
||||
return_value=(fallback_client, "gpt-5.2-codex", "openai-codex")) as mock_fb:
|
||||
result = call_llm(
|
||||
|
|
@ -1295,13 +1147,62 @@ class TestCallLlmPaymentFallback:
|
|||
)
|
||||
|
||||
assert result is fallback_response
|
||||
mock_fb.assert_called_once_with("openrouter", "compression")
|
||||
mock_fb.assert_called_once_with("auto", "compression", reason="payment error")
|
||||
# Fallback call should use the fallback model
|
||||
fb_kwargs = fallback_client.chat.completions.create.call_args.kwargs
|
||||
assert fb_kwargs["model"] == "gpt-5.2-codex"
|
||||
|
||||
def test_402_no_fallback_when_explicit_provider(self, monkeypatch):
|
||||
"""When provider is explicitly configured (not auto), 402 should NOT fallback (#7559)."""
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
|
||||
primary_client = MagicMock()
|
||||
primary_client.chat.completions.create.side_effect = self._make_402_error()
|
||||
|
||||
with patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(primary_client, "local-model")), \
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("custom", "local-model", None, None, None)), \
|
||||
patch("agent.auxiliary_client._try_payment_fallback") as mock_fb:
|
||||
with pytest.raises(Exception, match="insufficient credits"):
|
||||
call_llm(
|
||||
task="compression",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
)
|
||||
|
||||
# Fallback should NOT be attempted when provider is explicit
|
||||
mock_fb.assert_not_called()
|
||||
|
||||
def test_connection_error_triggers_fallback_when_auto(self, monkeypatch):
|
||||
"""Connection errors also trigger fallback when provider is auto."""
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
|
||||
primary_client = MagicMock()
|
||||
conn_err = Exception("Connection refused")
|
||||
conn_err.status_code = None
|
||||
primary_client.chat.completions.create.side_effect = conn_err
|
||||
|
||||
fallback_client = MagicMock()
|
||||
fallback_response = MagicMock()
|
||||
fallback_client.chat.completions.create.return_value = fallback_response
|
||||
|
||||
with patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(primary_client, "model")), \
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("auto", "model", None, None, None)), \
|
||||
patch("agent.auxiliary_client._is_connection_error", return_value=True), \
|
||||
patch("agent.auxiliary_client._try_payment_fallback",
|
||||
return_value=(fallback_client, "fb-model", "nous")) as mock_fb:
|
||||
result = call_llm(
|
||||
task="compression",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
)
|
||||
|
||||
assert result is fallback_response
|
||||
mock_fb.assert_called_once_with("auto", "compression", reason="connection error")
|
||||
|
||||
def test_non_payment_error_not_caught(self, monkeypatch):
|
||||
"""Non-payment errors (500, connection, etc.) should NOT trigger fallback."""
|
||||
"""Non-payment/non-connection errors (500) should NOT trigger fallback."""
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
|
||||
primary_client = MagicMock()
|
||||
|
|
@ -1312,7 +1213,7 @@ class TestCallLlmPaymentFallback:
|
|||
with patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(primary_client, "google/gemini-3-flash-preview")), \
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("openrouter", "google/gemini-3-flash-preview", None, None)):
|
||||
return_value=("auto", "google/gemini-3-flash-preview", None, None, None)):
|
||||
with pytest.raises(Exception, match="Internal Server Error"):
|
||||
call_llm(
|
||||
task="compression",
|
||||
|
|
@ -1329,7 +1230,7 @@ class TestCallLlmPaymentFallback:
|
|||
with patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(primary_client, "google/gemini-3-flash-preview")), \
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("openrouter", "google/gemini-3-flash-preview", None, None)), \
|
||||
return_value=("auto", "google/gemini-3-flash-preview", None, None, None)), \
|
||||
patch("agent.auxiliary_client._try_payment_fallback",
|
||||
return_value=(None, None, "")):
|
||||
with pytest.raises(Exception, match="insufficient credits"):
|
||||
|
|
@ -1337,3 +1238,325 @@ class TestCallLlmPaymentFallback:
|
|||
task="compression",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Gate: _resolve_api_key_provider must skip anthropic when not configured
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_resolve_api_key_provider_skips_unconfigured_anthropic(monkeypatch):
|
||||
"""_resolve_api_key_provider must not try anthropic when user never configured it."""
|
||||
from collections import OrderedDict
|
||||
from hermes_cli.auth import ProviderConfig
|
||||
|
||||
# Build a minimal registry with only "anthropic" so the loop is guaranteed
|
||||
# to reach it without being short-circuited by earlier providers.
|
||||
fake_registry = OrderedDict({
|
||||
"anthropic": ProviderConfig(
|
||||
id="anthropic",
|
||||
name="Anthropic",
|
||||
auth_type="api_key",
|
||||
inference_base_url="https://api.anthropic.com",
|
||||
api_key_env_vars=("ANTHROPIC_API_KEY",),
|
||||
),
|
||||
})
|
||||
|
||||
called = []
|
||||
|
||||
def mock_try_anthropic():
|
||||
called.append("anthropic")
|
||||
return None, None
|
||||
|
||||
monkeypatch.setattr("agent.auxiliary_client._try_anthropic", mock_try_anthropic)
|
||||
monkeypatch.setattr("hermes_cli.auth.PROVIDER_REGISTRY", fake_registry)
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.auth.is_provider_explicitly_configured",
|
||||
lambda pid: False,
|
||||
)
|
||||
|
||||
from agent.auxiliary_client import _resolve_api_key_provider
|
||||
_resolve_api_key_provider()
|
||||
|
||||
assert "anthropic" not in called, \
|
||||
"_try_anthropic() should not be called when anthropic is not explicitly configured"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# model="default" elimination (#7512)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestModelDefaultElimination:
|
||||
"""_resolve_api_key_provider must skip providers without known aux models."""
|
||||
|
||||
def test_unknown_provider_skipped(self, monkeypatch):
|
||||
"""Providers not in _API_KEY_PROVIDER_AUX_MODELS are skipped, not sent model='default'."""
|
||||
from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
|
||||
|
||||
# Verify our known providers have entries
|
||||
assert "gemini" in _API_KEY_PROVIDER_AUX_MODELS
|
||||
assert "kimi-coding" in _API_KEY_PROVIDER_AUX_MODELS
|
||||
|
||||
# A random provider_id not in the dict should return None
|
||||
assert _API_KEY_PROVIDER_AUX_MODELS.get("totally-unknown-provider") is None
|
||||
|
||||
def test_known_provider_gets_real_model(self):
|
||||
"""Known providers get a real model name, not 'default'."""
|
||||
from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
|
||||
|
||||
for provider_id, model in _API_KEY_PROVIDER_AUX_MODELS.items():
|
||||
assert model != "default", f"{provider_id} should not map to 'default'"
|
||||
assert isinstance(model, str) and model.strip(), \
|
||||
f"{provider_id} should have a non-empty model string"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _try_payment_fallback reason parameter (#7512 bug 3)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestTryPaymentFallbackReason:
|
||||
"""_try_payment_fallback uses the reason parameter in log messages."""
|
||||
|
||||
def test_reason_parameter_passed_through(self, monkeypatch):
|
||||
"""The reason= parameter is accepted without error."""
|
||||
from agent.auxiliary_client import _try_payment_fallback
|
||||
|
||||
# Mock the provider chain to return nothing
|
||||
monkeypatch.setattr(
|
||||
"agent.auxiliary_client._get_provider_chain",
|
||||
lambda: [],
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"agent.auxiliary_client._read_main_provider",
|
||||
lambda: "",
|
||||
)
|
||||
|
||||
client, model, label = _try_payment_fallback(
|
||||
"openrouter", task="compression", reason="connection error"
|
||||
)
|
||||
assert client is None
|
||||
assert label == ""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _is_connection_error coverage
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestIsConnectionError:
|
||||
"""Tests for _is_connection_error detection."""
|
||||
|
||||
def test_connection_refused(self):
|
||||
from agent.auxiliary_client import _is_connection_error
|
||||
err = Exception("Connection refused")
|
||||
assert _is_connection_error(err) is True
|
||||
|
||||
def test_timeout(self):
|
||||
from agent.auxiliary_client import _is_connection_error
|
||||
err = Exception("Request timed out.")
|
||||
assert _is_connection_error(err) is True
|
||||
|
||||
def test_dns_failure(self):
|
||||
from agent.auxiliary_client import _is_connection_error
|
||||
err = Exception("Name or service not known")
|
||||
assert _is_connection_error(err) is True
|
||||
|
||||
def test_normal_api_error_not_connection(self):
|
||||
from agent.auxiliary_client import _is_connection_error
|
||||
err = Exception("Bad Request: invalid model")
|
||||
err.status_code = 400
|
||||
assert _is_connection_error(err) is False
|
||||
|
||||
def test_500_not_connection(self):
|
||||
from agent.auxiliary_client import _is_connection_error
|
||||
err = Exception("Internal Server Error")
|
||||
err.status_code = 500
|
||||
assert _is_connection_error(err) is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# async_call_llm payment / connection fallback (#7512 bug 2)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestAsyncCallLlmFallback:
|
||||
"""async_call_llm mirrors call_llm fallback behavior."""
|
||||
|
||||
def _make_402_error(self, msg="Payment Required: insufficient credits"):
|
||||
exc = Exception(msg)
|
||||
exc.status_code = 402
|
||||
return exc
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_402_triggers_async_fallback_when_auto(self, monkeypatch):
|
||||
"""When provider is auto and returns 402, async_call_llm tries fallback."""
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
|
||||
primary_client = MagicMock()
|
||||
primary_client.chat.completions.create = AsyncMock(
|
||||
side_effect=self._make_402_error())
|
||||
|
||||
# Fallback client (sync) returned by _try_payment_fallback
|
||||
fb_sync_client = MagicMock()
|
||||
fb_async_client = MagicMock()
|
||||
fb_response = MagicMock()
|
||||
fb_async_client.chat.completions.create = AsyncMock(return_value=fb_response)
|
||||
|
||||
with patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(primary_client, "google/gemini-3-flash-preview")), \
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("auto", "google/gemini-3-flash-preview", None, None, None)), \
|
||||
patch("agent.auxiliary_client._try_payment_fallback",
|
||||
return_value=(fb_sync_client, "gpt-5.2-codex", "openai-codex")) as mock_fb, \
|
||||
patch("agent.auxiliary_client._to_async_client",
|
||||
return_value=(fb_async_client, "gpt-5.2-codex")):
|
||||
result = await async_call_llm(
|
||||
task="compression",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
)
|
||||
|
||||
assert result is fb_response
|
||||
mock_fb.assert_called_once_with("auto", "compression", reason="payment error")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_402_no_async_fallback_when_explicit(self, monkeypatch):
|
||||
"""When provider is explicit, 402 should NOT trigger async fallback."""
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
|
||||
primary_client = MagicMock()
|
||||
primary_client.chat.completions.create = AsyncMock(
|
||||
side_effect=self._make_402_error())
|
||||
|
||||
with patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(primary_client, "local-model")), \
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("custom", "local-model", None, None, None)), \
|
||||
patch("agent.auxiliary_client._try_payment_fallback") as mock_fb:
|
||||
with pytest.raises(Exception, match="insufficient credits"):
|
||||
await async_call_llm(
|
||||
task="compression",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
)
|
||||
|
||||
mock_fb.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_connection_error_triggers_async_fallback(self, monkeypatch):
|
||||
"""Connection errors trigger async fallback when provider is auto."""
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
|
||||
primary_client = MagicMock()
|
||||
conn_err = Exception("Connection refused")
|
||||
conn_err.status_code = None
|
||||
primary_client.chat.completions.create = AsyncMock(side_effect=conn_err)
|
||||
|
||||
fb_sync_client = MagicMock()
|
||||
fb_async_client = MagicMock()
|
||||
fb_response = MagicMock()
|
||||
fb_async_client.chat.completions.create = AsyncMock(return_value=fb_response)
|
||||
|
||||
with patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(primary_client, "model")), \
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("auto", "model", None, None, None)), \
|
||||
patch("agent.auxiliary_client._is_connection_error", return_value=True), \
|
||||
patch("agent.auxiliary_client._try_payment_fallback",
|
||||
return_value=(fb_sync_client, "fb-model", "nous")) as mock_fb, \
|
||||
patch("agent.auxiliary_client._to_async_client",
|
||||
return_value=(fb_async_client, "fb-model")):
|
||||
result = await async_call_llm(
|
||||
task="compression",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
)
|
||||
|
||||
assert result is fb_response
|
||||
mock_fb.assert_called_once_with("auto", "compression", reason="connection error")
|
||||
class TestStaleBaseUrlWarning:
|
||||
"""_resolve_auto() warns when OPENAI_BASE_URL conflicts with config provider (#5161)."""
|
||||
|
||||
def test_warns_when_openai_base_url_set_with_named_provider(self, monkeypatch, caplog):
|
||||
"""Warning fires when OPENAI_BASE_URL is set but provider is a named provider."""
|
||||
import agent.auxiliary_client as mod
|
||||
# Reset the module-level flag so the warning fires
|
||||
monkeypatch.setattr(mod, "_stale_base_url_warned", False)
|
||||
monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:11434/v1")
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-test")
|
||||
|
||||
with patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"), \
|
||||
patch("agent.auxiliary_client._read_main_model", return_value="google/gemini-flash"), \
|
||||
caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
|
||||
_resolve_auto()
|
||||
|
||||
assert any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \
|
||||
"Expected a warning about stale OPENAI_BASE_URL"
|
||||
assert mod._stale_base_url_warned is True
|
||||
|
||||
def test_no_warning_when_provider_is_custom(self, monkeypatch, caplog):
|
||||
"""No warning when the provider is 'custom' — OPENAI_BASE_URL is expected."""
|
||||
import agent.auxiliary_client as mod
|
||||
monkeypatch.setattr(mod, "_stale_base_url_warned", False)
|
||||
monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:11434/v1")
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "test-key")
|
||||
|
||||
with patch("agent.auxiliary_client._read_main_provider", return_value="custom"), \
|
||||
patch("agent.auxiliary_client._read_main_model", return_value="llama3"), \
|
||||
patch("agent.auxiliary_client._resolve_custom_runtime",
|
||||
return_value=("http://localhost:11434/v1", "test-key", None)), \
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai, \
|
||||
caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
|
||||
mock_openai.return_value = MagicMock()
|
||||
_resolve_auto()
|
||||
|
||||
assert not any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \
|
||||
"Should NOT warn when provider is 'custom'"
|
||||
|
||||
def test_no_warning_when_provider_is_named_custom(self, monkeypatch, caplog):
|
||||
"""No warning when the provider is 'custom:myname' — base_url comes from config."""
|
||||
import agent.auxiliary_client as mod
|
||||
monkeypatch.setattr(mod, "_stale_base_url_warned", False)
|
||||
monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:11434/v1")
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "test-key")
|
||||
|
||||
with patch("agent.auxiliary_client._read_main_provider", return_value="custom:ollama-local"), \
|
||||
patch("agent.auxiliary_client._read_main_model", return_value="llama3"), \
|
||||
patch("agent.auxiliary_client.resolve_provider_client",
|
||||
return_value=(MagicMock(), "llama3")), \
|
||||
caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
|
||||
_resolve_auto()
|
||||
|
||||
assert not any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \
|
||||
"Should NOT warn when provider is 'custom:*'"
|
||||
|
||||
def test_no_warning_when_openai_base_url_not_set(self, monkeypatch, caplog):
|
||||
"""No warning when OPENAI_BASE_URL is absent."""
|
||||
import agent.auxiliary_client as mod
|
||||
monkeypatch.setattr(mod, "_stale_base_url_warned", False)
|
||||
monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-test")
|
||||
|
||||
with patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"), \
|
||||
patch("agent.auxiliary_client._read_main_model", return_value="google/gemini-flash"), \
|
||||
caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
|
||||
_resolve_auto()
|
||||
|
||||
assert not any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \
|
||||
"Should NOT warn when OPENAI_BASE_URL is not set"
|
||||
|
||||
def test_warning_only_fires_once(self, monkeypatch, caplog):
|
||||
"""Warning is suppressed after the first invocation."""
|
||||
import agent.auxiliary_client as mod
|
||||
monkeypatch.setattr(mod, "_stale_base_url_warned", False)
|
||||
monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:11434/v1")
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-test")
|
||||
|
||||
with patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"), \
|
||||
patch("agent.auxiliary_client._read_main_model", return_value="google/gemini-flash"), \
|
||||
caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
|
||||
_resolve_auto()
|
||||
caplog.clear()
|
||||
_resolve_auto()
|
||||
|
||||
assert not any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \
|
||||
"Warning should not fire a second time"
|
||||
|
|
|
|||
|
|
@ -12,6 +12,17 @@ def _isolate(tmp_path, monkeypatch):
|
|||
hermes_home = tmp_path / ".hermes"
|
||||
hermes_home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
for env_var in (
|
||||
"AUXILIARY_VISION_PROVIDER",
|
||||
"AUXILIARY_VISION_MODEL",
|
||||
"AUXILIARY_VISION_BASE_URL",
|
||||
"AUXILIARY_VISION_API_KEY",
|
||||
"CONTEXT_VISION_PROVIDER",
|
||||
"CONTEXT_VISION_MODEL",
|
||||
"CONTEXT_VISION_BASE_URL",
|
||||
"CONTEXT_VISION_API_KEY",
|
||||
):
|
||||
monkeypatch.delenv(env_var, raising=False)
|
||||
# Write a minimal config so load_config doesn't fail
|
||||
(hermes_home / "config.yaml").write_text("model:\n default: test-model\n")
|
||||
|
||||
|
|
@ -149,3 +160,83 @@ class TestResolveProviderClientNamedCustom:
|
|||
# "coffee" doesn't exist in custom_providers
|
||||
client, model = resolve_provider_client("coffee", "test")
|
||||
assert client is None
|
||||
|
||||
|
||||
class TestResolveProviderClientModelNormalization:
|
||||
"""Direct-provider auxiliary routing should normalize models like main runtime."""
|
||||
|
||||
def test_matching_native_prefix_is_stripped_for_main_provider(self, tmp_path):
|
||||
_write_config(tmp_path, {
|
||||
"model": {"default": "zai/glm-5.1", "provider": "zai"},
|
||||
})
|
||||
with (
|
||||
patch("hermes_cli.auth.resolve_api_key_provider_credentials", return_value={
|
||||
"api_key": "glm-key",
|
||||
"base_url": "https://api.z.ai/api/paas/v4",
|
||||
}),
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai,
|
||||
):
|
||||
mock_openai.return_value = MagicMock()
|
||||
from agent.auxiliary_client import resolve_provider_client
|
||||
|
||||
client, model = resolve_provider_client("main", "zai/glm-5.1")
|
||||
|
||||
assert client is not None
|
||||
assert model == "glm-5.1"
|
||||
|
||||
def test_non_matching_prefix_is_preserved_for_direct_provider(self, tmp_path):
|
||||
_write_config(tmp_path, {
|
||||
"model": {"default": "zai/glm-5.1", "provider": "zai"},
|
||||
})
|
||||
with (
|
||||
patch("hermes_cli.auth.resolve_api_key_provider_credentials", return_value={
|
||||
"api_key": "glm-key",
|
||||
"base_url": "https://api.z.ai/api/paas/v4",
|
||||
}),
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai,
|
||||
):
|
||||
mock_openai.return_value = MagicMock()
|
||||
from agent.auxiliary_client import resolve_provider_client
|
||||
|
||||
client, model = resolve_provider_client("zai", "google/gemini-2.5-pro")
|
||||
|
||||
assert client is not None
|
||||
assert model == "google/gemini-2.5-pro"
|
||||
|
||||
def test_aggregator_vendor_slug_is_preserved(self, monkeypatch):
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
mock_openai.return_value = MagicMock()
|
||||
from agent.auxiliary_client import resolve_provider_client
|
||||
|
||||
client, model = resolve_provider_client(
|
||||
"openrouter", "anthropic/claude-sonnet-4.6"
|
||||
)
|
||||
|
||||
assert client is not None
|
||||
assert model == "anthropic/claude-sonnet-4.6"
|
||||
|
||||
|
||||
class TestResolveVisionProviderClientModelNormalization:
|
||||
"""Vision auto-routing should reuse the same provider-specific normalization."""
|
||||
|
||||
def test_vision_auto_strips_matching_main_provider_prefix(self, tmp_path):
|
||||
_write_config(tmp_path, {
|
||||
"model": {"default": "zai/glm-5.1", "provider": "zai"},
|
||||
})
|
||||
with (
|
||||
patch("agent.auxiliary_client._read_nous_auth", return_value=None),
|
||||
patch("hermes_cli.auth.resolve_api_key_provider_credentials", return_value={
|
||||
"api_key": "glm-key",
|
||||
"base_url": "https://api.z.ai/api/paas/v4",
|
||||
}),
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai,
|
||||
):
|
||||
mock_openai.return_value = MagicMock()
|
||||
from agent.auxiliary_client import resolve_vision_provider_client
|
||||
|
||||
provider, client, model = resolve_vision_provider_client()
|
||||
|
||||
assert provider == "zai"
|
||||
assert client is not None
|
||||
assert model == "glm-5.1"
|
||||
|
|
|
|||
|
|
@ -38,16 +38,6 @@ class TestShouldCompress:
|
|||
assert compressor.should_compress(prompt_tokens=50000) is False
|
||||
|
||||
|
||||
class TestShouldCompressPreflight:
|
||||
def test_short_messages(self, compressor):
|
||||
msgs = [{"role": "user", "content": "short"}]
|
||||
assert compressor.should_compress_preflight(msgs) is False
|
||||
|
||||
def test_long_messages(self, compressor):
|
||||
# Each message ~100k chars / 4 = 25k tokens, need >85k threshold
|
||||
msgs = [{"role": "user", "content": "x" * 400000}]
|
||||
assert compressor.should_compress_preflight(msgs) is True
|
||||
|
||||
|
||||
class TestUpdateFromResponse:
|
||||
def test_updates_fields(self, compressor):
|
||||
|
|
@ -58,27 +48,12 @@ class TestUpdateFromResponse:
|
|||
})
|
||||
assert compressor.last_prompt_tokens == 5000
|
||||
assert compressor.last_completion_tokens == 1000
|
||||
assert compressor.last_total_tokens == 6000
|
||||
|
||||
def test_missing_fields_default_zero(self, compressor):
|
||||
compressor.update_from_response({})
|
||||
assert compressor.last_prompt_tokens == 0
|
||||
|
||||
|
||||
class TestGetStatus:
|
||||
def test_returns_expected_keys(self, compressor):
|
||||
status = compressor.get_status()
|
||||
assert "last_prompt_tokens" in status
|
||||
assert "threshold_tokens" in status
|
||||
assert "context_length" in status
|
||||
assert "usage_percent" in status
|
||||
assert "compression_count" in status
|
||||
|
||||
def test_usage_percent_calculation(self, compressor):
|
||||
compressor.last_prompt_tokens = 50000
|
||||
status = compressor.get_status()
|
||||
assert status["usage_percent"] == 50.0
|
||||
|
||||
|
||||
class TestCompress:
|
||||
def _make_messages(self, n):
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue