Merge branch 'main' of github.com:NousResearch/hermes-agent into feat/ink-refactor

This commit is contained in:
Brooklyn Nicholson 2026-04-15 19:11:07 -05:00
commit 097702c8a7
55 changed files with 4904 additions and 51 deletions

View file

@ -298,6 +298,33 @@ def build_anthropic_client(api_key: str, base_url: str = None):
return _anthropic_sdk.Anthropic(**kwargs) return _anthropic_sdk.Anthropic(**kwargs)
def build_anthropic_bedrock_client(region: str):
"""Create an AnthropicBedrock client for Bedrock Claude models.
Uses the Anthropic SDK's native Bedrock adapter, which provides full
Claude feature parity: prompt caching, thinking budgets, adaptive
thinking, fast mode features not available via the Converse API.
Auth uses the boto3 default credential chain (IAM roles, SSO, env vars).
"""
if _anthropic_sdk is None:
raise ImportError(
"The 'anthropic' package is required for the Bedrock provider. "
"Install it with: pip install 'anthropic>=0.39.0'"
)
if not hasattr(_anthropic_sdk, "AnthropicBedrock"):
raise ImportError(
"anthropic.AnthropicBedrock not available. "
"Upgrade with: pip install 'anthropic>=0.39.0'"
)
from httpx import Timeout
return _anthropic_sdk.AnthropicBedrock(
aws_region=region,
timeout=Timeout(timeout=900.0, connect=10.0),
)
def read_claude_code_credentials() -> Optional[Dict[str, Any]]: def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
"""Read refreshable Claude Code OAuth credentials from ~/.claude/.credentials.json. """Read refreshable Claude Code OAuth credentials from ~/.claude/.credentials.json.

View file

@ -775,6 +775,21 @@ def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]: def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
# Check cross-session rate limit guard before attempting Nous —
# if another session already recorded a 429, skip Nous entirely
# to avoid piling more requests onto the tapped RPH bucket.
try:
from agent.nous_rate_guard import nous_rate_limit_remaining
_remaining = nous_rate_limit_remaining()
if _remaining is not None and _remaining > 0:
logger.debug(
"Auxiliary: skipping Nous Portal (rate-limited, resets in %.0fs)",
_remaining,
)
return None, None
except Exception:
pass
nous = _read_nous_auth() nous = _read_nous_auth()
if not nous: if not nous:
return None, None return None, None
@ -899,6 +914,51 @@ def _current_custom_base_url() -> str:
return custom_base or "" return custom_base or ""
def _validate_proxy_env_urls() -> None:
"""Fail fast with a clear error when proxy env vars have malformed URLs.
Common cause: shell config (e.g. .zshrc) with a typo like
``export HTTP_PROXY=http://127.0.0.1:6153export NEXT_VAR=...``
which concatenates 'export' into the port number. Without this
check the OpenAI/httpx client raises a cryptic ``Invalid port``
error that doesn't name the offending env var.
"""
from urllib.parse import urlparse
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
"https_proxy", "http_proxy", "all_proxy"):
value = str(os.environ.get(key) or "").strip()
if not value:
continue
try:
parsed = urlparse(value)
if parsed.scheme:
_ = parsed.port # raises ValueError for e.g. '6153export'
except ValueError as exc:
raise RuntimeError(
f"Malformed proxy environment variable {key}={value!r}. "
"Fix or unset your proxy settings and try again."
) from exc
def _validate_base_url(base_url: str) -> None:
"""Reject obviously broken custom endpoint URLs before they reach httpx."""
from urllib.parse import urlparse
candidate = str(base_url or "").strip()
if not candidate or candidate.startswith("acp://"):
return
try:
parsed = urlparse(candidate)
if parsed.scheme in {"http", "https"}:
_ = parsed.port # raises ValueError for malformed ports
except ValueError as exc:
raise RuntimeError(
f"Malformed custom endpoint URL: {candidate!r}. "
"Run `hermes setup` or `hermes model` and enter a valid http(s) base URL."
) from exc
def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]: def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
runtime = _resolve_custom_runtime() runtime = _resolve_custom_runtime()
if len(runtime) == 2: if len(runtime) == 2:
@ -1299,6 +1359,7 @@ def resolve_provider_client(
Returns: Returns:
(client, resolved_model) or (None, None) if auth is unavailable. (client, resolved_model) or (None, None) if auth is unavailable.
""" """
_validate_proxy_env_urls()
# Normalise aliases # Normalise aliases
provider = _normalize_aux_provider(provider) provider = _normalize_aux_provider(provider)

1098
agent/bedrock_adapter.py Normal file

File diff suppressed because it is too large Load diff

View file

@ -112,6 +112,10 @@ _RATE_LIMIT_PATTERNS = [
"please retry after", "please retry after",
"resource_exhausted", "resource_exhausted",
"rate increased too quickly", # Alibaba/DashScope throttling "rate increased too quickly", # Alibaba/DashScope throttling
# AWS Bedrock throttling
"throttlingexception",
"too many concurrent requests",
"servicequotaexceededexception",
] ]
# Usage-limit patterns that need disambiguation (could be billing OR rate_limit) # Usage-limit patterns that need disambiguation (could be billing OR rate_limit)
@ -171,6 +175,11 @@ _CONTEXT_OVERFLOW_PATTERNS = [
# Chinese error messages (some providers return these) # Chinese error messages (some providers return these)
"超过最大长度", "超过最大长度",
"上下文长度", "上下文长度",
# AWS Bedrock Converse API error patterns
"input is too long",
"max input token",
"input token",
"exceeds the maximum number of input tokens",
] ]
# Model not found patterns # Model not found patterns

View file

@ -1012,6 +1012,16 @@ def get_model_context_length(
if ctx: if ctx:
return ctx return ctx
# 4b. AWS Bedrock — use static context length table.
# Bedrock's ListFoundationModels doesn't expose context window sizes,
# so we maintain a curated table in bedrock_adapter.py.
if provider == "bedrock" or (base_url and "bedrock-runtime" in base_url):
try:
from agent.bedrock_adapter import get_bedrock_context_length
return get_bedrock_context_length(model)
except ImportError:
pass # boto3 not installed — fall through to generic resolution
# 5. Provider-aware lookups (before generic OpenRouter cache) # 5. Provider-aware lookups (before generic OpenRouter cache)
# These are provider-specific and take priority over the generic OR cache, # These are provider-specific and take priority over the generic OR cache,
# since the same model can have different context limits per provider # since the same model can have different context limits per provider

182
agent/nous_rate_guard.py Normal file
View file

@ -0,0 +1,182 @@
"""Cross-session rate limit guard for Nous Portal.
Writes rate limit state to a shared file so all sessions (CLI, gateway,
cron, auxiliary) can check whether Nous Portal is currently rate-limited
before making requests. Prevents retry amplification when RPH is tapped.
Each 429 from Nous triggers up to 9 API calls per conversation turn
(3 SDK retries x 3 Hermes retries), and every one of those calls counts
against RPH. By recording the rate limit state on first 429 and checking
it before subsequent attempts, we eliminate the amplification effect.
"""
from __future__ import annotations
import json
import logging
import os
import tempfile
import time
from typing import Any, Mapping, Optional
logger = logging.getLogger(__name__)
_STATE_SUBDIR = "rate_limits"
_STATE_FILENAME = "nous.json"
def _state_path() -> str:
"""Return the path to the Nous rate limit state file."""
try:
from hermes_constants import get_hermes_home
base = get_hermes_home()
except ImportError:
base = os.path.join(os.path.expanduser("~"), ".hermes")
return os.path.join(base, _STATE_SUBDIR, _STATE_FILENAME)
def _parse_reset_seconds(headers: Optional[Mapping[str, str]]) -> Optional[float]:
"""Extract the best available reset-time estimate from response headers.
Priority:
1. x-ratelimit-reset-requests-1h (hourly RPH window most useful)
2. x-ratelimit-reset-requests (per-minute RPM window)
3. retry-after (generic HTTP header)
Returns seconds-from-now, or None if no usable header found.
"""
if not headers:
return None
lowered = {k.lower(): v for k, v in headers.items()}
for key in (
"x-ratelimit-reset-requests-1h",
"x-ratelimit-reset-requests",
"retry-after",
):
raw = lowered.get(key)
if raw is not None:
try:
val = float(raw)
if val > 0:
return val
except (TypeError, ValueError):
pass
return None
def record_nous_rate_limit(
*,
headers: Optional[Mapping[str, str]] = None,
error_context: Optional[dict[str, Any]] = None,
default_cooldown: float = 300.0,
) -> None:
"""Record that Nous Portal is rate-limited.
Parses the reset time from response headers or error context.
Falls back to ``default_cooldown`` (5 minutes) if no reset info
is available. Writes to a shared file that all sessions can read.
Args:
headers: HTTP response headers from the 429 error.
error_context: Structured error context from _extract_api_error_context().
default_cooldown: Fallback cooldown in seconds when no header data.
"""
now = time.time()
reset_at = None
# Try headers first (most accurate)
header_seconds = _parse_reset_seconds(headers)
if header_seconds is not None:
reset_at = now + header_seconds
# Try error_context reset_at (from body parsing)
if reset_at is None and isinstance(error_context, dict):
ctx_reset = error_context.get("reset_at")
if isinstance(ctx_reset, (int, float)) and ctx_reset > now:
reset_at = float(ctx_reset)
# Default cooldown
if reset_at is None:
reset_at = now + default_cooldown
path = _state_path()
try:
state_dir = os.path.dirname(path)
os.makedirs(state_dir, exist_ok=True)
state = {
"reset_at": reset_at,
"recorded_at": now,
"reset_seconds": reset_at - now,
}
# Atomic write: write to temp file + rename
fd, tmp_path = tempfile.mkstemp(dir=state_dir, suffix=".tmp")
try:
with os.fdopen(fd, "w") as f:
json.dump(state, f)
os.replace(tmp_path, path)
except Exception:
# Clean up temp file on failure
try:
os.unlink(tmp_path)
except OSError:
pass
raise
logger.info(
"Nous rate limit recorded: resets in %.0fs (at %.0f)",
reset_at - now, reset_at,
)
except Exception as exc:
logger.debug("Failed to write Nous rate limit state: %s", exc)
def nous_rate_limit_remaining() -> Optional[float]:
"""Check if Nous Portal is currently rate-limited.
Returns:
Seconds remaining until reset, or None if not rate-limited.
"""
path = _state_path()
try:
with open(path) as f:
state = json.load(f)
reset_at = state.get("reset_at", 0)
remaining = reset_at - time.time()
if remaining > 0:
return remaining
# Expired — clean up
try:
os.unlink(path)
except OSError:
pass
return None
except (FileNotFoundError, json.JSONDecodeError, KeyError, TypeError):
return None
def clear_nous_rate_limit() -> None:
"""Clear the rate limit state (e.g., after a successful Nous request)."""
try:
os.unlink(_state_path())
except FileNotFoundError:
pass
except OSError as exc:
logger.debug("Failed to clear Nous rate limit state: %s", exc)
def format_remaining(seconds: float) -> str:
"""Format seconds remaining into human-readable duration."""
s = max(0, int(seconds))
if s < 60:
return f"{s}s"
if s < 3600:
m, sec = divmod(s, 60)
return f"{m}m {sec}s" if sec else f"{m}m"
h, remainder = divmod(s, 3600)
m = remainder // 60
return f"{h}h {m}m" if m else f"{h}h"

View file

@ -93,6 +93,17 @@ _DB_CONNSTR_RE = re.compile(
re.IGNORECASE, re.IGNORECASE,
) )
# JWT tokens: header.payload[.signature] — always start with "eyJ" (base64 for "{")
# Matches 1-part (header only), 2-part (header.payload), and full 3-part JWTs.
_JWT_RE = re.compile(
r"eyJ[A-Za-z0-9_-]{10,}" # Header (always starts with eyJ)
r"(?:\.[A-Za-z0-9_=-]{4,}){0,2}" # Optional payload and/or signature
)
# Discord user/role mentions: <@123456789012345678> or <@!123456789012345678>
# Snowflake IDs are 17-20 digit integers that resolve to specific Discord accounts.
_DISCORD_MENTION_RE = re.compile(r"<@!?(\d{17,20})>")
# E.164 phone numbers: +<country><number>, 7-15 digits # E.164 phone numbers: +<country><number>, 7-15 digits
# Negative lookahead prevents matching hex strings or identifiers # Negative lookahead prevents matching hex strings or identifiers
_SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])") _SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])")
@ -159,6 +170,12 @@ def redact_sensitive_text(text: str) -> str:
# Database connection string passwords # Database connection string passwords
text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text) text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)
# JWT tokens (eyJ... — base64-encoded JSON headers)
text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)
# Discord user/role mentions (<@snowflake_id>)
text = _DISCORD_MENTION_RE.sub(lambda m: f"<@{'!' if '!' in m.group(0) else ''}***>", text)
# E.164 phone numbers (Signal, WhatsApp) # E.164 phone numbers (Signal, WhatsApp)
def _redact_phone(m): def _redact_phone(m):
phone = m.group(1) phone = m.group(1)

View file

@ -284,6 +284,80 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
source_url="https://ai.google.dev/pricing", source_url="https://ai.google.dev/pricing",
pricing_version="google-pricing-2026-03-16", pricing_version="google-pricing-2026-03-16",
), ),
# AWS Bedrock — pricing per the Bedrock pricing page.
# Bedrock charges the same per-token rates as the model provider but
# through AWS billing. These are the on-demand prices (no commitment).
# Source: https://aws.amazon.com/bedrock/pricing/
(
"bedrock",
"anthropic.claude-opus-4-6",
): PricingEntry(
input_cost_per_million=Decimal("15.00"),
output_cost_per_million=Decimal("75.00"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
(
"bedrock",
"anthropic.claude-sonnet-4-6",
): PricingEntry(
input_cost_per_million=Decimal("3.00"),
output_cost_per_million=Decimal("15.00"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
(
"bedrock",
"anthropic.claude-sonnet-4-5",
): PricingEntry(
input_cost_per_million=Decimal("3.00"),
output_cost_per_million=Decimal("15.00"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
(
"bedrock",
"anthropic.claude-haiku-4-5",
): PricingEntry(
input_cost_per_million=Decimal("0.80"),
output_cost_per_million=Decimal("4.00"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
(
"bedrock",
"amazon.nova-pro",
): PricingEntry(
input_cost_per_million=Decimal("0.80"),
output_cost_per_million=Decimal("3.20"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
(
"bedrock",
"amazon.nova-lite",
): PricingEntry(
input_cost_per_million=Decimal("0.06"),
output_cost_per_million=Decimal("0.24"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
(
"bedrock",
"amazon.nova-micro",
): PricingEntry(
input_cost_per_million=Decimal("0.035"),
output_cost_per_million=Decimal("0.14"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
} }

View file

@ -554,6 +554,12 @@ def load_gateway_config() -> GatewayConfig:
bridged["mention_patterns"] = platform_cfg["mention_patterns"] bridged["mention_patterns"] = platform_cfg["mention_patterns"]
if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg: if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"] bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
if "channel_prompts" in platform_cfg:
channel_prompts = platform_cfg["channel_prompts"]
if isinstance(channel_prompts, dict):
bridged["channel_prompts"] = {str(k): v for k, v in channel_prompts.items()}
else:
bridged["channel_prompts"] = channel_prompts
if not bridged: if not bridged:
continue continue
plat_data = platforms_data.setdefault(plat.value, {}) plat_data = platforms_data.setdefault(plat.value, {})

View file

@ -683,6 +683,10 @@ class MessageEvent:
# Discord channel_skill_bindings). A single name or ordered list. # Discord channel_skill_bindings). A single name or ordered list.
auto_skill: Optional[str | list[str]] = None auto_skill: Optional[str | list[str]] = None
# Per-channel ephemeral system prompt (e.g. Discord channel_prompts).
# Applied at API call time and never persisted to transcript history.
channel_prompt: Optional[str] = None
# Internal flag — set for synthetic events (e.g. background process # Internal flag — set for synthetic events (e.g. background process
# completion notifications) that must bypass user authorization checks. # completion notifications) that must bypass user authorization checks.
internal: bool = False internal: bool = False
@ -776,6 +780,36 @@ _RETRYABLE_ERROR_PATTERNS = (
MessageHandler = Callable[[MessageEvent], Awaitable[Optional[str]]] MessageHandler = Callable[[MessageEvent], Awaitable[Optional[str]]]
def resolve_channel_prompt(
config_extra: dict,
channel_id: str,
parent_id: str | None = None,
) -> str | None:
"""Resolve a per-channel ephemeral prompt from platform config.
Looks up ``channel_prompts`` in the adapter's ``config.extra`` dict.
Prefers an exact match on *channel_id*; falls back to *parent_id*
(useful for forum threads / child channels inheriting a parent prompt).
Returns the prompt string, or None if no match is found. Blank/whitespace-
only prompts are treated as absent.
"""
prompts = config_extra.get("channel_prompts") or {}
if not isinstance(prompts, dict):
return None
for key in (channel_id, parent_id):
if not key:
continue
prompt = prompts.get(key)
if prompt is None:
continue
prompt = str(prompt).strip()
if prompt:
return prompt
return None
class BasePlatformAdapter(ABC): class BasePlatformAdapter(ABC):
""" """
Base class for platform adapters. Base class for platform adapters.

View file

@ -1992,11 +1992,14 @@ class DiscordAdapter(BasePlatformAdapter):
) )
msg_type = MessageType.COMMAND if text.startswith("/") else MessageType.TEXT msg_type = MessageType.COMMAND if text.startswith("/") else MessageType.TEXT
channel_id = str(interaction.channel_id)
parent_id = str(getattr(getattr(interaction, "channel", None), "parent_id", "") or "")
return MessageEvent( return MessageEvent(
text=text, text=text,
message_type=msg_type, message_type=msg_type,
source=source, source=source,
raw_message=interaction, raw_message=interaction,
channel_prompt=self._resolve_channel_prompt(channel_id, parent_id or None),
) )
# ------------------------------------------------------------------ # ------------------------------------------------------------------
@ -2067,14 +2070,17 @@ class DiscordAdapter(BasePlatformAdapter):
chat_topic=chat_topic, chat_topic=chat_topic,
) )
_parent_id = str(getattr(getattr(interaction, "channel", None), "parent_id", "") or "") _parent_channel = self._thread_parent_channel(getattr(interaction, "channel", None))
_parent_id = str(getattr(_parent_channel, "id", "") or "")
_skills = self._resolve_channel_skills(thread_id, _parent_id or None) _skills = self._resolve_channel_skills(thread_id, _parent_id or None)
_channel_prompt = self._resolve_channel_prompt(thread_id, _parent_id or None)
event = MessageEvent( event = MessageEvent(
text=text, text=text,
message_type=MessageType.TEXT, message_type=MessageType.TEXT,
source=source, source=source,
raw_message=interaction, raw_message=interaction,
auto_skill=_skills, auto_skill=_skills,
channel_prompt=_channel_prompt,
) )
await self.handle_message(event) await self.handle_message(event)
@ -2103,6 +2109,11 @@ class DiscordAdapter(BasePlatformAdapter):
return list(dict.fromkeys(skills)) # dedup, preserve order return list(dict.fromkeys(skills)) # dedup, preserve order
return None return None
def _resolve_channel_prompt(self, channel_id: str, parent_id: str | None = None) -> str | None:
"""Resolve a Discord per-channel prompt, preferring the exact channel over its parent."""
from gateway.platforms.base import resolve_channel_prompt
return resolve_channel_prompt(self.config.extra, channel_id, parent_id)
def _thread_parent_channel(self, channel: Any) -> Any: def _thread_parent_channel(self, channel: Any) -> Any:
"""Return the parent text channel when invoked from a thread.""" """Return the parent text channel when invoked from a thread."""
return getattr(channel, "parent", None) or channel return getattr(channel, "parent", None) or channel
@ -2654,6 +2665,7 @@ class DiscordAdapter(BasePlatformAdapter):
_parent_id = str(getattr(_chan, "parent_id", "") or "") _parent_id = str(getattr(_chan, "parent_id", "") or "")
_chan_id = str(getattr(_chan, "id", "")) _chan_id = str(getattr(_chan, "id", ""))
_skills = self._resolve_channel_skills(_chan_id, _parent_id or None) _skills = self._resolve_channel_skills(_chan_id, _parent_id or None)
_channel_prompt = self._resolve_channel_prompt(_chan_id, _parent_id or None)
reply_to_id = None reply_to_id = None
reply_to_text = None reply_to_text = None
@ -2674,6 +2686,7 @@ class DiscordAdapter(BasePlatformAdapter):
reply_to_text=reply_to_text, reply_to_text=reply_to_text,
timestamp=message.created_at, timestamp=message.created_at,
auto_skill=_skills, auto_skill=_skills,
channel_prompt=_channel_prompt,
) )
# Track thread participation so the bot won't require @mention for # Track thread participation so the bot won't require @mention for

View file

@ -718,6 +718,12 @@ class MattermostAdapter(BasePlatformAdapter):
thread_id=thread_id, thread_id=thread_id,
) )
# Per-channel ephemeral prompt
from gateway.platforms.base import resolve_channel_prompt
_channel_prompt = resolve_channel_prompt(
self.config.extra, channel_id, None,
)
msg_event = MessageEvent( msg_event = MessageEvent(
text=message_text, text=message_text,
message_type=msg_type, message_type=msg_type,
@ -726,6 +732,7 @@ class MattermostAdapter(BasePlatformAdapter):
message_id=post_id, message_id=post_id,
media_urls=media_urls if media_urls else None, media_urls=media_urls if media_urls else None,
media_types=media_types if media_types else None, media_types=media_types if media_types else None,
channel_prompt=_channel_prompt,
) )
await self.handle_message(msg_event) await self.handle_message(msg_event)

View file

@ -1167,6 +1167,12 @@ class SlackAdapter(BasePlatformAdapter):
thread_id=thread_ts, thread_id=thread_ts,
) )
# Per-channel ephemeral prompt
from gateway.platforms.base import resolve_channel_prompt
_channel_prompt = resolve_channel_prompt(
self.config.extra, channel_id, None,
)
msg_event = MessageEvent( msg_event = MessageEvent(
text=text, text=text,
message_type=msg_type, message_type=msg_type,
@ -1176,6 +1182,7 @@ class SlackAdapter(BasePlatformAdapter):
media_urls=media_urls, media_urls=media_urls,
media_types=media_types, media_types=media_types,
reply_to_message_id=thread_ts if thread_ts != ts else None, reply_to_message_id=thread_ts if thread_ts != ts else None,
channel_prompt=_channel_prompt,
) )
# Only react when bot is directly addressed (DM or @mention). # Only react when bot is directly addressed (DM or @mention).

View file

@ -2775,6 +2775,15 @@ class TelegramAdapter(BasePlatformAdapter):
reply_to_id = str(message.reply_to_message.message_id) reply_to_id = str(message.reply_to_message.message_id)
reply_to_text = message.reply_to_message.text or message.reply_to_message.caption or None reply_to_text = message.reply_to_message.text or message.reply_to_message.caption or None
# Per-channel/topic ephemeral prompt
from gateway.platforms.base import resolve_channel_prompt
_chat_id_str = str(chat.id)
_channel_prompt = resolve_channel_prompt(
self.config.extra,
thread_id_str or _chat_id_str,
_chat_id_str if thread_id_str else None,
)
return MessageEvent( return MessageEvent(
text=message.text or "", text=message.text or "",
message_type=msg_type, message_type=msg_type,
@ -2784,6 +2793,7 @@ class TelegramAdapter(BasePlatformAdapter):
reply_to_message_id=reply_to_id, reply_to_message_id=reply_to_id,
reply_to_text=reply_to_text, reply_to_text=reply_to_text,
auto_skill=topic_skill, auto_skill=topic_skill,
channel_prompt=_channel_prompt,
timestamp=message.date, timestamp=message.date,
) )

View file

@ -2891,6 +2891,7 @@ class GatewayRunner:
message_type=_MT.TEXT, message_type=_MT.TEXT,
source=event.source, source=event.source,
message_id=event.message_id, message_id=event.message_id,
channel_prompt=event.channel_prompt,
) )
adapter._pending_messages[_quick_key] = queued_event adapter._pending_messages[_quick_key] = queued_event
return "Queued for the next turn." return "Queued for the next turn."
@ -3875,6 +3876,7 @@ class GatewayRunner:
session_id=session_entry.session_id, session_id=session_entry.session_id,
session_key=session_key, session_key=session_key,
event_message_id=event.message_id, event_message_id=event.message_id,
channel_prompt=event.channel_prompt,
) )
# Stop persistent typing indicator now that the agent is done # Stop persistent typing indicator now that the agent is done
@ -5186,6 +5188,7 @@ class GatewayRunner:
message_type=MessageType.TEXT, message_type=MessageType.TEXT,
source=source, source=source,
raw_message=event.raw_message, raw_message=event.raw_message,
channel_prompt=event.channel_prompt,
) )
# Let the normal message handler process it # Let the normal message handler process it
@ -8166,6 +8169,7 @@ class GatewayRunner:
session_key: str = None, session_key: str = None,
_interrupt_depth: int = 0, _interrupt_depth: int = 0,
event_message_id: Optional[str] = None, event_message_id: Optional[str] = None,
channel_prompt: Optional[str] = None,
) -> Dict[str, Any]: ) -> Dict[str, Any]:
""" """
Run the agent with the given message and context. Run the agent with the given message and context.
@ -8520,8 +8524,12 @@ class GatewayRunner:
# Platform.LOCAL ("local") maps to "cli"; others pass through as-is. # Platform.LOCAL ("local") maps to "cli"; others pass through as-is.
platform_key = "cli" if source.platform == Platform.LOCAL else source.platform.value platform_key = "cli" if source.platform == Platform.LOCAL else source.platform.value
# Combine platform context with user-configured ephemeral system prompt # Combine platform context, per-channel context, and the user-configured
# ephemeral system prompt.
combined_ephemeral = context_prompt or "" combined_ephemeral = context_prompt or ""
event_channel_prompt = (channel_prompt or "").strip()
if event_channel_prompt:
combined_ephemeral = (combined_ephemeral + "\n\n" + event_channel_prompt).strip()
if self._ephemeral_system_prompt: if self._ephemeral_system_prompt:
combined_ephemeral = (combined_ephemeral + "\n\n" + self._ephemeral_system_prompt).strip() combined_ephemeral = (combined_ephemeral + "\n\n" + self._ephemeral_system_prompt).strip()
@ -9473,6 +9481,7 @@ class GatewayRunner:
session_key=session_key, session_key=session_key,
_interrupt_depth=_interrupt_depth + 1, _interrupt_depth=_interrupt_depth + 1,
event_message_id=next_message_id, event_message_id=next_message_id,
channel_prompt=pending_event.channel_prompt,
) )
finally: finally:
# Stop progress sender, interrupt monitor, and notification task # Stop progress sender, interrupt monitor, and notification task

View file

@ -274,6 +274,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
api_key_env_vars=("XIAOMI_API_KEY",), api_key_env_vars=("XIAOMI_API_KEY",),
base_url_env_var="XIAOMI_BASE_URL", base_url_env_var="XIAOMI_BASE_URL",
), ),
"bedrock": ProviderConfig(
id="bedrock",
name="AWS Bedrock",
auth_type="aws_sdk",
inference_base_url="https://bedrock-runtime.us-east-1.amazonaws.com",
api_key_env_vars=(),
base_url_env_var="BEDROCK_BASE_URL",
),
} }
@ -924,6 +932,7 @@ def resolve_provider(
"qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth", "qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth",
"hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface", "hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
"mimo": "xiaomi", "xiaomi-mimo": "xiaomi", "mimo": "xiaomi", "xiaomi-mimo": "xiaomi",
"aws": "bedrock", "aws-bedrock": "bedrock", "amazon-bedrock": "bedrock", "amazon": "bedrock",
"go": "opencode-go", "opencode-go-sub": "opencode-go", "go": "opencode-go", "opencode-go-sub": "opencode-go",
"kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode", "kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
# Local server aliases — route through the generic custom provider # Local server aliases — route through the generic custom provider
@ -980,6 +989,15 @@ def resolve_provider(
if has_usable_secret(os.getenv(env_var, "")): if has_usable_secret(os.getenv(env_var, "")):
return pid return pid
# AWS Bedrock — detect via boto3 credential chain (IAM roles, SSO, env vars).
# This runs after API-key providers so explicit keys always win.
try:
from agent.bedrock_adapter import has_aws_credentials
if has_aws_credentials():
return "bedrock"
except ImportError:
pass # boto3 not installed — skip Bedrock auto-detection
raise AuthError( raise AuthError(
"No inference provider configured. Run 'hermes model' to choose a " "No inference provider configured. Run 'hermes model' to choose a "
"provider and model, or set an API key (OPENROUTER_API_KEY, " "provider and model, or set an API key (OPENROUTER_API_KEY, "
@ -2446,6 +2464,13 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
pconfig = PROVIDER_REGISTRY.get(target) pconfig = PROVIDER_REGISTRY.get(target)
if pconfig and pconfig.auth_type == "api_key": if pconfig and pconfig.auth_type == "api_key":
return get_api_key_provider_status(target) return get_api_key_provider_status(target)
# AWS SDK providers (Bedrock) — check via boto3 credential chain
if pconfig and pconfig.auth_type == "aws_sdk":
try:
from agent.bedrock_adapter import has_aws_credentials
return {"logged_in": has_aws_credentials(), "provider": target}
except ImportError:
return {"logged_in": False, "provider": target, "error": "boto3 not installed"}
return {"logged_in": False} return {"logged_in": False}

View file

@ -368,6 +368,27 @@ def _interactive_auth() -> None:
print("=" * 50) print("=" * 50)
auth_list_command(SimpleNamespace(provider=None)) auth_list_command(SimpleNamespace(provider=None))
# Show AWS Bedrock credential status (not in the pool — uses boto3 chain)
try:
from agent.bedrock_adapter import has_aws_credentials, resolve_aws_auth_env_var, resolve_bedrock_region
if has_aws_credentials():
auth_source = resolve_aws_auth_env_var() or "unknown"
region = resolve_bedrock_region()
print(f"bedrock (AWS SDK credential chain):")
print(f" Auth: {auth_source}")
print(f" Region: {region}")
try:
import boto3
sts = boto3.client("sts", region_name=region)
identity = sts.get_caller_identity()
arn = identity.get("Arn", "unknown")
print(f" Identity: {arn}")
except Exception:
print(f" Identity: (could not resolve — boto3 STS call failed)")
print()
except ImportError:
pass # boto3 or bedrock_adapter not available
print() print()
# Main menu # Main menu

View file

@ -419,6 +419,27 @@ DEFAULT_CONFIG = {
"protect_last_n": 20, # minimum recent messages to keep uncompressed "protect_last_n": 20, # minimum recent messages to keep uncompressed
}, },
# AWS Bedrock provider configuration.
# Only used when model.provider is "bedrock".
"bedrock": {
"region": "", # AWS region for Bedrock API calls (empty = AWS_REGION env var → us-east-1)
"discovery": {
"enabled": True, # Auto-discover models via ListFoundationModels
"provider_filter": [], # Only show models from these providers (e.g. ["anthropic", "amazon"])
"refresh_interval": 3600, # Cache discovery results for this many seconds
},
"guardrail": {
# Amazon Bedrock Guardrails — content filtering and safety policies.
# Create a guardrail in the Bedrock console, then set the ID and version here.
# See: https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails.html
"guardrail_identifier": "", # e.g. "abc123def456"
"guardrail_version": "", # e.g. "1" or "DRAFT"
"stream_processing_mode": "async", # "sync" or "async"
"trace": "disabled", # "enabled", "disabled", or "enabled_full"
},
},
"smart_model_routing": { "smart_model_routing": {
"enabled": False, "enabled": False,
"max_simple_chars": 160, "max_simple_chars": 160,
@ -638,6 +659,7 @@ DEFAULT_CONFIG = {
"allowed_channels": "", # If set, bot ONLY responds in these channel IDs (whitelist) "allowed_channels": "", # If set, bot ONLY responds in these channel IDs (whitelist)
"auto_thread": True, # Auto-create threads on @mention in channels (like Slack) "auto_thread": True, # Auto-create threads on @mention in channels (like Slack)
"reactions": True, # Add 👀/✅/❌ reactions to messages during processing "reactions": True, # Add 👀/✅/❌ reactions to messages during processing
"channel_prompts": {}, # Per-channel ephemeral system prompts (forum parents apply to child threads)
}, },
# WhatsApp platform settings (gateway mode) # WhatsApp platform settings (gateway mode)
@ -648,6 +670,21 @@ DEFAULT_CONFIG = {
# Supports \n for newlines, e.g. "🤖 *My Bot*\n──────\n" # Supports \n for newlines, e.g. "🤖 *My Bot*\n──────\n"
}, },
# Telegram platform settings (gateway mode)
"telegram": {
"channel_prompts": {}, # Per-chat/topic ephemeral system prompts (topics inherit from parent group)
},
# Slack platform settings (gateway mode)
"slack": {
"channel_prompts": {}, # Per-channel ephemeral system prompts
},
# Mattermost platform settings (gateway mode)
"mattermost": {
"channel_prompts": {}, # Per-channel ephemeral system prompts
},
# Approval mode for dangerous commands: # Approval mode for dangerous commands:
# manual — always prompt the user (default) # manual — always prompt the user (default)
# smart — use auxiliary LLM to auto-approve low-risk commands, prompt for high-risk # smart — use auxiliary LLM to auto-approve low-risk commands, prompt for high-risk
@ -703,7 +740,7 @@ DEFAULT_CONFIG = {
}, },
# Config schema version - bump this when adding new required fields # Config schema version - bump this when adding new required fields
"_config_version": 17, "_config_version": 18,
} }
# ============================================================================= # =============================================================================
@ -974,6 +1011,22 @@ OPTIONAL_ENV_VARS = {
"category": "provider", "category": "provider",
"advanced": True, "advanced": True,
}, },
"AWS_REGION": {
"description": "AWS region for Bedrock API calls (e.g. us-east-1, eu-central-1)",
"prompt": "AWS Region",
"url": "https://docs.aws.amazon.com/bedrock/latest/userguide/bedrock-regions.html",
"password": False,
"category": "provider",
"advanced": True,
},
"AWS_PROFILE": {
"description": "AWS named profile for Bedrock authentication (from ~/.aws/credentials)",
"prompt": "AWS Profile",
"url": None,
"password": False,
"category": "provider",
"advanced": True,
},
# ── Tool API keys ── # ── Tool API keys ──
"EXA_API_KEY": { "EXA_API_KEY": {

View file

@ -860,6 +860,31 @@ def run_doctor(args):
except Exception as _e: except Exception as _e:
print(f"\r {color('', Colors.YELLOW)} {_label} {color(f'({_e})', Colors.DIM)} ") print(f"\r {color('', Colors.YELLOW)} {_label} {color(f'({_e})', Colors.DIM)} ")
# -- AWS Bedrock --
# Bedrock uses the AWS SDK credential chain, not API keys.
try:
from agent.bedrock_adapter import has_aws_credentials, resolve_aws_auth_env_var, resolve_bedrock_region
if has_aws_credentials():
_auth_var = resolve_aws_auth_env_var()
_region = resolve_bedrock_region()
_label = "AWS Bedrock".ljust(20)
print(f" Checking AWS Bedrock...", end="", flush=True)
try:
import boto3
_br_client = boto3.client("bedrock", region_name=_region)
_br_resp = _br_client.list_foundation_models()
_model_count = len(_br_resp.get("modelSummaries", []))
print(f"\r {color('', Colors.GREEN)} {_label} {color(f'({_auth_var}, {_region}, {_model_count} models)', Colors.DIM)} ")
except ImportError:
print(f"\r {color('', Colors.YELLOW)} {_label} {color('(boto3 not installed — pip install hermes-agent[bedrock])', Colors.DIM)} ")
issues.append("Install boto3 for Bedrock: pip install hermes-agent[bedrock]")
except Exception as _e:
_err_name = type(_e).__name__
print(f"\r {color('', Colors.YELLOW)} {_label} {color(f'({_err_name}: {_e})', Colors.DIM)} ")
issues.append(f"AWS Bedrock: {_err_name} — check IAM permissions for bedrock:ListFoundationModels")
except ImportError:
pass # bedrock_adapter not available — skip silently
# ========================================================================= # =========================================================================
# Check: Submodules # Check: Submodules
# ========================================================================= # =========================================================================

View file

@ -222,7 +222,7 @@ def find_gateway_pids(exclude_pids: set | None = None, all_profiles: bool = Fals
current_cmd = "" current_cmd = ""
else: else:
result = subprocess.run( result = subprocess.run(
["ps", "eww", "-ax", "-o", "pid=,command="], ["ps", "-A", "eww", "-o", "pid=,command="],
capture_output=True, capture_output=True,
text=True, text=True,
timeout=10, timeout=10,

View file

@ -1370,6 +1370,8 @@ def select_provider_and_model(args=None):
_model_flow_anthropic(config, current_model) _model_flow_anthropic(config, current_model)
elif selected_provider == "kimi-coding": elif selected_provider == "kimi-coding":
_model_flow_kimi(config, current_model) _model_flow_kimi(config, current_model)
elif selected_provider == "bedrock":
_model_flow_bedrock(config, current_model)
elif selected_provider in ("gemini", "deepseek", "xai", "zai", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi", "arcee"): elif selected_provider in ("gemini", "deepseek", "xai", "zai", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi", "arcee"):
_model_flow_api_key_provider(config, selected_provider, current_model) _model_flow_api_key_provider(config, selected_provider, current_model)
@ -2656,6 +2658,252 @@ def _model_flow_kimi(config, current_model=""):
print("No change.") print("No change.")
def _model_flow_bedrock_api_key(config, region, current_model=""):
"""Bedrock API Key mode — uses the OpenAI-compatible bedrock-mantle endpoint.
For developers who don't have an AWS account but received a Bedrock API Key
from their AWS admin. Works like any OpenAI-compatible endpoint.
"""
from hermes_cli.auth import _prompt_model_selection, _save_model_choice, deactivate_provider
from hermes_cli.config import load_config, save_config, get_env_value, save_env_value
from hermes_cli.models import _PROVIDER_MODELS
mantle_base_url = f"https://bedrock-mantle.{region}.api.aws/v1"
# Prompt for API key
existing_key = get_env_value("AWS_BEARER_TOKEN_BEDROCK") or ""
if existing_key:
print(f" Bedrock API Key: {existing_key[:12]}... ✓")
else:
print(f" Endpoint: {mantle_base_url}")
print()
try:
import getpass
api_key = getpass.getpass(" Bedrock API Key: ").strip()
except (KeyboardInterrupt, EOFError):
print()
return
if not api_key:
print(" Cancelled.")
return
save_env_value("AWS_BEARER_TOKEN_BEDROCK", api_key)
existing_key = api_key
print(" ✓ API key saved.")
print()
# Model selection — use static list (mantle doesn't need boto3 for discovery)
model_list = _PROVIDER_MODELS.get("bedrock", [])
print(f" Showing {len(model_list)} curated models")
if model_list:
selected = _prompt_model_selection(model_list, current_model=current_model)
else:
try:
selected = input(" Model ID: ").strip()
except (KeyboardInterrupt, EOFError):
selected = None
if selected:
_save_model_choice(selected)
# Save as custom provider pointing to bedrock-mantle
cfg = load_config()
model = cfg.get("model")
if not isinstance(model, dict):
model = {"default": model} if model else {}
cfg["model"] = model
model["provider"] = "custom"
model["base_url"] = mantle_base_url
model.pop("api_mode", None) # chat_completions is the default
# Also save region in bedrock config for reference
bedrock_cfg = cfg.get("bedrock", {})
if not isinstance(bedrock_cfg, dict):
bedrock_cfg = {}
bedrock_cfg["region"] = region
cfg["bedrock"] = bedrock_cfg
# Save the API key env var name so hermes knows where to find it
save_env_value("OPENAI_API_KEY", existing_key)
save_env_value("OPENAI_BASE_URL", mantle_base_url)
save_config(cfg)
deactivate_provider()
print(f" Default model set to: {selected} (via Bedrock API Key, {region})")
print(f" Endpoint: {mantle_base_url}")
else:
print(" No change.")
def _model_flow_bedrock(config, current_model=""):
"""AWS Bedrock provider: verify credentials, pick region, discover models.
Uses the native Converse API via boto3 not the OpenAI-compatible endpoint.
Auth is handled by the AWS SDK default credential chain (env vars, profile,
instance role), so no API key prompt is needed.
"""
from hermes_cli.auth import _prompt_model_selection, _save_model_choice, deactivate_provider
from hermes_cli.config import load_config, save_config
from hermes_cli.models import _PROVIDER_MODELS
# 1. Check for AWS credentials
try:
from agent.bedrock_adapter import (
has_aws_credentials,
resolve_aws_auth_env_var,
resolve_bedrock_region,
discover_bedrock_models,
)
except ImportError:
print(" ✗ boto3 is not installed. Install it with:")
print(" pip install boto3")
print()
return
if not has_aws_credentials():
print(" ⚠ No AWS credentials detected via environment variables.")
print(" Bedrock will use boto3's default credential chain (IMDS, SSO, etc.)")
print()
auth_var = resolve_aws_auth_env_var()
if auth_var:
print(f" AWS credentials: {auth_var}")
else:
print(" AWS credentials: boto3 default chain (instance role / SSO)")
print()
# 2. Region selection
current_region = resolve_bedrock_region()
try:
region_input = input(f" AWS Region [{current_region}]: ").strip()
except (KeyboardInterrupt, EOFError):
print()
return
region = region_input or current_region
# 2b. Authentication mode
print(" Choose authentication method:")
print()
print(" 1. IAM credential chain (recommended)")
print(" Works with EC2 instance roles, SSO, env vars, aws configure")
print(" 2. Bedrock API Key")
print(" Enter your Bedrock API Key directly — also supports")
print(" team scenarios where an admin distributes keys")
print()
try:
auth_choice = input(" Choice [1]: ").strip()
except (KeyboardInterrupt, EOFError):
print()
return
if auth_choice == "2":
_model_flow_bedrock_api_key(config, region, current_model)
return
# 3. Model discovery — try live API first, fall back to static list
print(f" Discovering models in {region}...")
live_models = discover_bedrock_models(region)
if live_models:
_EXCLUDE_PREFIXES = (
"stability.", "cohere.embed", "twelvelabs.", "us.stability.",
"us.cohere.embed", "us.twelvelabs.", "global.cohere.embed",
"global.twelvelabs.",
)
_EXCLUDE_SUBSTRINGS = ("safeguard", "voxtral", "palmyra-vision")
filtered = []
for m in live_models:
mid = m["id"]
if any(mid.startswith(p) for p in _EXCLUDE_PREFIXES):
continue
if any(s in mid.lower() for s in _EXCLUDE_SUBSTRINGS):
continue
filtered.append(m)
# Deduplicate: prefer inference profiles (us.*, global.*) over bare
# foundation model IDs.
profile_base_ids = set()
for m in filtered:
mid = m["id"]
if mid.startswith(("us.", "global.")):
base = mid.split(".", 1)[1] if "." in mid[3:] else mid
profile_base_ids.add(base)
deduped = []
for m in filtered:
mid = m["id"]
if not mid.startswith(("us.", "global.")) and mid in profile_base_ids:
continue
deduped.append(m)
_RECOMMENDED = [
"us.anthropic.claude-sonnet-4-6",
"us.anthropic.claude-opus-4-6",
"us.anthropic.claude-haiku-4-5",
"us.amazon.nova-pro",
"us.amazon.nova-lite",
"us.amazon.nova-micro",
"deepseek.v3",
"us.meta.llama4-maverick",
"us.meta.llama4-scout",
]
def _sort_key(m):
mid = m["id"]
for i, rec in enumerate(_RECOMMENDED):
if mid.startswith(rec):
return (0, i, mid)
if mid.startswith("global."):
return (1, 0, mid)
return (2, 0, mid)
deduped.sort(key=_sort_key)
model_list = [m["id"] for m in deduped]
print(f" Found {len(model_list)} text model(s) (filtered from {len(live_models)} total)")
else:
model_list = _PROVIDER_MODELS.get("bedrock", [])
if model_list:
print(f" Using {len(model_list)} curated models (live discovery unavailable)")
else:
print(" No models found. Check IAM permissions for bedrock:ListFoundationModels.")
return
# 4. Model selection
if model_list:
selected = _prompt_model_selection(model_list, current_model=current_model)
else:
try:
selected = input(" Model ID: ").strip()
except (KeyboardInterrupt, EOFError):
selected = None
if selected:
_save_model_choice(selected)
cfg = load_config()
model = cfg.get("model")
if not isinstance(model, dict):
model = {"default": model} if model else {}
cfg["model"] = model
model["provider"] = "bedrock"
model["base_url"] = f"https://bedrock-runtime.{region}.amazonaws.com"
model.pop("api_mode", None) # bedrock_converse is auto-detected
bedrock_cfg = cfg.get("bedrock", {})
if not isinstance(bedrock_cfg, dict):
bedrock_cfg = {}
bedrock_cfg["region"] = region
cfg["bedrock"] = bedrock_cfg
save_config(cfg)
deactivate_provider()
print(f" Default model set to: {selected} (via AWS Bedrock, {region})")
else:
print(" No change.")
def _model_flow_api_key_provider(config, provider_id, current_model=""): def _model_flow_api_key_provider(config, provider_id, current_model=""):
"""Generic flow for API-key providers (z.ai, MiniMax, OpenCode, etc.).""" """Generic flow for API-key providers (z.ai, MiniMax, OpenCode, etc.)."""
from hermes_cli.auth import ( from hermes_cli.auth import (

View file

@ -303,6 +303,22 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"XiaomiMiMo/MiMo-V2-Flash", "XiaomiMiMo/MiMo-V2-Flash",
"moonshotai/Kimi-K2-Thinking", "moonshotai/Kimi-K2-Thinking",
], ],
# AWS Bedrock — static fallback list used when dynamic discovery is
# unavailable (no boto3, no credentials, or API error). The agent
# prefers live discovery via ListFoundationModels + ListInferenceProfiles.
# Use inference profile IDs (us.*) since most models require them.
"bedrock": [
"us.anthropic.claude-sonnet-4-6",
"us.anthropic.claude-opus-4-6-v1",
"us.anthropic.claude-haiku-4-5-20251001-v1:0",
"us.anthropic.claude-sonnet-4-5-20250929-v1:0",
"us.amazon.nova-pro-v1:0",
"us.amazon.nova-lite-v1:0",
"us.amazon.nova-micro-v1:0",
"deepseek.v3.2",
"us.meta.llama4-maverick-17b-instruct-v1:0",
"us.meta.llama4-scout-17b-instruct-v1:0",
],
} }
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@ -536,6 +552,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"), ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"), ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"),
ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, pay-per-use)"), ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, pay-per-use)"),
ProviderEntry("bedrock", "AWS Bedrock", "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
] ]
# Derived dicts — used throughout the codebase # Derived dicts — used throughout the codebase
@ -587,6 +604,10 @@ _PROVIDER_ALIASES = {
"huggingface-hub": "huggingface", "huggingface-hub": "huggingface",
"mimo": "xiaomi", "mimo": "xiaomi",
"xiaomi-mimo": "xiaomi", "xiaomi-mimo": "xiaomi",
"aws": "bedrock",
"aws-bedrock": "bedrock",
"amazon-bedrock": "bedrock",
"amazon": "bedrock",
"grok": "xai", "grok": "xai",
"x-ai": "xai", "x-ai": "xai",
"x.ai": "xai", "x.ai": "xai",
@ -1955,6 +1976,42 @@ def validate_requested_model(
# api_models is None — couldn't reach API. Accept and persist, # api_models is None — couldn't reach API. Accept and persist,
# but warn so typos don't silently break things. # but warn so typos don't silently break things.
# Bedrock: use our own discovery instead of HTTP /models endpoint.
# Bedrock's bedrock-runtime URL doesn't support /models — it uses the
# AWS SDK control plane (ListFoundationModels + ListInferenceProfiles).
if normalized == "bedrock":
try:
from agent.bedrock_adapter import discover_bedrock_models, resolve_bedrock_region
region = resolve_bedrock_region()
discovered = discover_bedrock_models(region)
discovered_ids = {m["id"] for m in discovered}
if requested in discovered_ids:
return {
"accepted": True,
"persist": True,
"recognized": True,
"message": None,
}
# Not in discovered list — still accept (user may have custom
# inference profiles or cross-account access), but warn.
suggestions = get_close_matches(requested, list(discovered_ids), n=3, cutoff=0.4)
suggestion_text = ""
if suggestions:
suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
return {
"accepted": True,
"persist": True,
"recognized": False,
"message": (
f"Note: `{requested}` was not found in Bedrock model discovery for {region}. "
f"It may still work with custom inference profiles or cross-account access."
f"{suggestion_text}"
),
}
except Exception:
pass # Fall through to generic warning
provider_label = _PROVIDER_LABELS.get(normalized, normalized) provider_label = _PROVIDER_LABELS.get(normalized, normalized)
return { return {
"accepted": False, "accepted": False,

View file

@ -236,6 +236,12 @@ ALIASES: Dict[str, str] = {
"mimo": "xiaomi", "mimo": "xiaomi",
"xiaomi-mimo": "xiaomi", "xiaomi-mimo": "xiaomi",
# bedrock
"aws": "bedrock",
"aws-bedrock": "bedrock",
"amazon-bedrock": "bedrock",
"amazon": "bedrock",
# arcee # arcee
"arcee-ai": "arcee", "arcee-ai": "arcee",
"arceeai": "arcee", "arceeai": "arcee",
@ -262,6 +268,7 @@ _LABEL_OVERRIDES: Dict[str, str] = {
"copilot-acp": "GitHub Copilot ACP", "copilot-acp": "GitHub Copilot ACP",
"xiaomi": "Xiaomi MiMo", "xiaomi": "Xiaomi MiMo",
"local": "Local endpoint", "local": "Local endpoint",
"bedrock": "AWS Bedrock",
} }
@ -271,6 +278,7 @@ TRANSPORT_TO_API_MODE: Dict[str, str] = {
"openai_chat": "chat_completions", "openai_chat": "chat_completions",
"anthropic_messages": "anthropic_messages", "anthropic_messages": "anthropic_messages",
"codex_responses": "codex_responses", "codex_responses": "codex_responses",
"bedrock_converse": "bedrock_converse",
} }
@ -388,6 +396,10 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
if pdef is not None: if pdef is not None:
return TRANSPORT_TO_API_MODE.get(pdef.transport, "chat_completions") return TRANSPORT_TO_API_MODE.get(pdef.transport, "chat_completions")
# Direct provider checks for providers not in HERMES_OVERLAYS
if provider == "bedrock":
return "bedrock_converse"
# URL-based heuristics for custom / unknown providers # URL-based heuristics for custom / unknown providers
if base_url: if base_url:
url_lower = base_url.rstrip("/").lower() url_lower = base_url.rstrip("/").lower()
@ -395,6 +407,8 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
return "anthropic_messages" return "anthropic_messages"
if "api.openai.com" in url_lower: if "api.openai.com" in url_lower:
return "codex_responses" return "codex_responses"
if "bedrock-runtime" in url_lower and "amazonaws.com" in url_lower:
return "bedrock_converse"
return "chat_completions" return "chat_completions"

View file

@ -124,7 +124,7 @@ def _copilot_runtime_api_mode(model_cfg: Dict[str, Any], api_key: str) -> str:
return "chat_completions" return "chat_completions"
_VALID_API_MODES = {"chat_completions", "codex_responses", "anthropic_messages"} _VALID_API_MODES = {"chat_completions", "codex_responses", "anthropic_messages", "bedrock_converse"}
def _parse_api_mode(raw: Any) -> Optional[str]: def _parse_api_mode(raw: Any) -> Optional[str]:
@ -836,6 +836,77 @@ def resolve_runtime_provider(
"requested_provider": requested_provider, "requested_provider": requested_provider,
} }
# AWS Bedrock (native Converse API via boto3)
if provider == "bedrock":
from agent.bedrock_adapter import (
has_aws_credentials,
resolve_aws_auth_env_var,
resolve_bedrock_region,
is_anthropic_bedrock_model,
)
# When the user explicitly selected bedrock (not auto-detected),
# trust boto3's credential chain — it handles IMDS, ECS task roles,
# Lambda execution roles, SSO, and other implicit sources that our
# env-var check can't detect.
is_explicit = requested_provider in ("bedrock", "aws", "aws-bedrock", "amazon-bedrock", "amazon")
if not is_explicit and not has_aws_credentials():
raise AuthError(
"No AWS credentials found for Bedrock. Configure one of:\n"
" - AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY\n"
" - AWS_PROFILE (for SSO / named profiles)\n"
" - IAM instance role (EC2, ECS, Lambda)\n"
"Or run 'aws configure' to set up credentials.",
code="no_aws_credentials",
)
# Read bedrock-specific config from config.yaml
from hermes_cli.config import load_config as _load_bedrock_config
_bedrock_cfg = _load_bedrock_config().get("bedrock", {})
# Region priority: config.yaml bedrock.region → env var → us-east-1
region = (_bedrock_cfg.get("region") or "").strip() or resolve_bedrock_region()
auth_source = resolve_aws_auth_env_var() or "aws-sdk-default-chain"
# Build guardrail config if configured
_gr = _bedrock_cfg.get("guardrail", {})
guardrail_config = None
if _gr.get("guardrail_identifier") and _gr.get("guardrail_version"):
guardrail_config = {
"guardrailIdentifier": _gr["guardrail_identifier"],
"guardrailVersion": _gr["guardrail_version"],
}
if _gr.get("stream_processing_mode"):
guardrail_config["streamProcessingMode"] = _gr["stream_processing_mode"]
if _gr.get("trace"):
guardrail_config["trace"] = _gr["trace"]
# Dual-path routing: Claude models use AnthropicBedrock SDK for full
# feature parity (prompt caching, thinking budgets, adaptive thinking).
# Non-Claude models use the Converse API for multi-model support.
_current_model = str(model_cfg.get("default") or "").strip()
if is_anthropic_bedrock_model(_current_model):
# Claude on Bedrock → AnthropicBedrock SDK → anthropic_messages path
runtime = {
"provider": "bedrock",
"api_mode": "anthropic_messages",
"base_url": f"https://bedrock-runtime.{region}.amazonaws.com",
"api_key": "aws-sdk",
"source": auth_source,
"region": region,
"bedrock_anthropic": True, # Signal to use AnthropicBedrock client
"requested_provider": requested_provider,
}
else:
# Non-Claude (Nova, DeepSeek, Llama, etc.) → Converse API
runtime = {
"provider": "bedrock",
"api_mode": "bedrock_converse",
"base_url": f"https://bedrock-runtime.{region}.amazonaws.com",
"api_key": "aws-sdk",
"source": auth_source,
"region": region,
"requested_provider": requested_provider,
}
if guardrail_config:
runtime["guardrail_config"] = guardrail_config
return runtime
# API-key providers (z.ai/GLM, Kimi, MiniMax, MiniMax-CN) # API-key providers (z.ai/GLM, Kimi, MiniMax, MiniMax-CN)
pconfig = PROVIDER_REGISTRY.get(provider) pconfig = PROVIDER_REGISTRY.get(provider)
if pconfig and pconfig.auth_type == "api_key": if pconfig and pconfig.auth_type == "api_key":

View file

@ -58,8 +58,7 @@ def resolve_config_path() -> Path:
Resolution order: Resolution order:
1. $HERMES_HOME/honcho.json (profile-local, if it exists) 1. $HERMES_HOME/honcho.json (profile-local, if it exists)
2. ~/.hermes/honcho.json (default profile shared host blocks live here) 2. ~/.honcho/config.json (global, cross-app interop)
3. ~/.honcho/config.json (global, cross-app interop)
Returns the global path if none exist (for first-time setup writes). Returns the global path if none exist (for first-time setup writes).
""" """
@ -67,11 +66,6 @@ def resolve_config_path() -> Path:
if local_path.exists(): if local_path.exists():
return local_path return local_path
# Default profile's config — host blocks accumulate here via setup/clone
default_path = Path.home() / ".hermes" / "honcho.json"
if default_path != local_path and default_path.exists():
return default_path
return GLOBAL_CONFIG_PATH return GLOBAL_CONFIG_PATH

View file

@ -63,6 +63,7 @@ homeassistant = ["aiohttp>=3.9.0,<4"]
sms = ["aiohttp>=3.9.0,<4"] sms = ["aiohttp>=3.9.0,<4"]
acp = ["agent-client-protocol>=0.9.0,<1.0"] acp = ["agent-client-protocol>=0.9.0,<1.0"]
mistral = ["mistralai>=2.3.0,<3"] mistral = ["mistralai>=2.3.0,<3"]
bedrock = ["boto3>=1.35.0,<2"]
termux = [ termux = [
# Tested Android / Termux path: keeps the core CLI feature-rich while # Tested Android / Termux path: keeps the core CLI feature-rich while
# avoiding extras that currently depend on non-Android wheels (notably # avoiding extras that currently depend on non-Android wheels (notably
@ -108,6 +109,7 @@ all = [
"hermes-agent[dingtalk]", "hermes-agent[dingtalk]",
"hermes-agent[feishu]", "hermes-agent[feishu]",
"hermes-agent[mistral]", "hermes-agent[mistral]",
"hermes-agent[bedrock]",
"hermes-agent[web]", "hermes-agent[web]",
] ]

View file

@ -685,7 +685,7 @@ class AIAgent:
self.provider = provider_name or "" self.provider = provider_name or ""
self.acp_command = acp_command or command self.acp_command = acp_command or command
self.acp_args = list(acp_args or args or []) self.acp_args = list(acp_args or args or [])
if api_mode in {"chat_completions", "codex_responses", "anthropic_messages"}: if api_mode in {"chat_completions", "codex_responses", "anthropic_messages", "bedrock_converse"}:
self.api_mode = api_mode self.api_mode = api_mode
elif self.provider == "openai-codex": elif self.provider == "openai-codex":
self.api_mode = "codex_responses" self.api_mode = "codex_responses"
@ -700,6 +700,9 @@ class AIAgent:
# use a URL convention ending in /anthropic. Auto-detect these so the # use a URL convention ending in /anthropic. Auto-detect these so the
# Anthropic Messages API adapter is used instead of chat completions. # Anthropic Messages API adapter is used instead of chat completions.
self.api_mode = "anthropic_messages" self.api_mode = "anthropic_messages"
elif self.provider == "bedrock" or "bedrock-runtime" in self._base_url_lower:
# AWS Bedrock — auto-detect from provider name or base URL.
self.api_mode = "bedrock_converse"
else: else:
self.api_mode = "chat_completions" self.api_mode = "chat_completions"
@ -721,8 +724,11 @@ class AIAgent:
# Responses there. ACP runtimes are excluded: CopilotACPClient # Responses there. ACP runtimes are excluded: CopilotACPClient
# handles its own routing and does not implement the Responses API # handles its own routing and does not implement the Responses API
# surface. # surface.
# When api_mode was explicitly provided, respect it — the user
# knows what their endpoint supports (#10473).
if ( if (
self.api_mode == "chat_completions" api_mode is None
and self.api_mode == "chat_completions"
and self.provider != "copilot-acp" and self.provider != "copilot-acp"
and not str(self.base_url or "").lower().startswith("acp://copilot") and not str(self.base_url or "").lower().startswith("acp://copilot")
and not str(self.base_url or "").lower().startswith("acp+tcp://") and not str(self.base_url or "").lower().startswith("acp+tcp://")
@ -889,24 +895,70 @@ class AIAgent:
if self.api_mode == "anthropic_messages": if self.api_mode == "anthropic_messages":
from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
# Only fall back to ANTHROPIC_TOKEN when the provider is actually Anthropic. # Bedrock + Claude → use AnthropicBedrock SDK for full feature parity
# Other anthropic_messages providers (MiniMax, Alibaba, etc.) must use their own API key. # (prompt caching, thinking budgets, adaptive thinking).
# Falling back would send Anthropic credentials to third-party endpoints (Fixes #1739, #minimax-401). _is_bedrock_anthropic = self.provider == "bedrock"
_is_native_anthropic = self.provider == "anthropic" if _is_bedrock_anthropic:
effective_key = (api_key or resolve_anthropic_token() or "") if _is_native_anthropic else (api_key or "") from agent.anthropic_adapter import build_anthropic_bedrock_client
self.api_key = effective_key import re as _re
self._anthropic_api_key = effective_key _region_match = _re.search(r"bedrock-runtime\.([a-z0-9-]+)\.", base_url or "")
self._anthropic_base_url = base_url _br_region = _region_match.group(1) if _region_match else "us-east-1"
from agent.anthropic_adapter import _is_oauth_token as _is_oat self._bedrock_region = _br_region
self._is_anthropic_oauth = _is_oat(effective_key) self._anthropic_client = build_anthropic_bedrock_client(_br_region)
self._anthropic_client = build_anthropic_client(effective_key, base_url) self._anthropic_api_key = "aws-sdk"
# No OpenAI client needed for Anthropic mode self._anthropic_base_url = base_url
self._is_anthropic_oauth = False
self.api_key = "aws-sdk"
self.client = None
self._client_kwargs = {}
if not self.quiet_mode:
print(f"🤖 AI Agent initialized with model: {self.model} (AWS Bedrock + AnthropicBedrock SDK, {_br_region})")
else:
# Only fall back to ANTHROPIC_TOKEN when the provider is actually Anthropic.
# Other anthropic_messages providers (MiniMax, Alibaba, etc.) must use their own API key.
# Falling back would send Anthropic credentials to third-party endpoints (Fixes #1739, #minimax-401).
_is_native_anthropic = self.provider == "anthropic"
effective_key = (api_key or resolve_anthropic_token() or "") if _is_native_anthropic else (api_key or "")
self.api_key = effective_key
self._anthropic_api_key = effective_key
self._anthropic_base_url = base_url
from agent.anthropic_adapter import _is_oauth_token as _is_oat
self._is_anthropic_oauth = _is_oat(effective_key)
self._anthropic_client = build_anthropic_client(effective_key, base_url)
# No OpenAI client needed for Anthropic mode
self.client = None
self._client_kwargs = {}
if not self.quiet_mode:
print(f"🤖 AI Agent initialized with model: {self.model} (Anthropic native)")
if effective_key and len(effective_key) > 12:
print(f"🔑 Using token: {effective_key[:8]}...{effective_key[-4:]}")
elif self.api_mode == "bedrock_converse":
# AWS Bedrock — uses boto3 directly, no OpenAI client needed.
# Region is extracted from the base_url or defaults to us-east-1.
import re as _re
_region_match = _re.search(r"bedrock-runtime\.([a-z0-9-]+)\.", base_url or "")
self._bedrock_region = _region_match.group(1) if _region_match else "us-east-1"
# Guardrail config — read from config.yaml at init time.
self._bedrock_guardrail_config = None
try:
from hermes_cli.config import load_config as _load_br_cfg
_gr = _load_br_cfg().get("bedrock", {}).get("guardrail", {})
if _gr.get("guardrail_identifier") and _gr.get("guardrail_version"):
self._bedrock_guardrail_config = {
"guardrailIdentifier": _gr["guardrail_identifier"],
"guardrailVersion": _gr["guardrail_version"],
}
if _gr.get("stream_processing_mode"):
self._bedrock_guardrail_config["streamProcessingMode"] = _gr["stream_processing_mode"]
if _gr.get("trace"):
self._bedrock_guardrail_config["trace"] = _gr["trace"]
except Exception:
pass
self.client = None self.client = None
self._client_kwargs = {} self._client_kwargs = {}
if not self.quiet_mode: if not self.quiet_mode:
print(f"🤖 AI Agent initialized with model: {self.model} (Anthropic native)") _gr_label = " + Guardrails" if self._bedrock_guardrail_config else ""
if effective_key and len(effective_key) > 12: print(f"🤖 AI Agent initialized with model: {self.model} (AWS Bedrock, {self._bedrock_region}{_gr_label})")
print(f"🔑 Using token: {effective_key[:8]}...{effective_key[-4:]}")
else: else:
if api_key and base_url: if api_key and base_url:
# Explicit credentials from CLI/gateway — construct directly. # Explicit credentials from CLI/gateway — construct directly.
@ -951,9 +1003,20 @@ class AIAgent:
# message instead of silently routing through OpenRouter. # message instead of silently routing through OpenRouter.
_explicit = (self.provider or "").strip().lower() _explicit = (self.provider or "").strip().lower()
if _explicit and _explicit not in ("auto", "openrouter", "custom"): if _explicit and _explicit not in ("auto", "openrouter", "custom"):
# Look up the actual env var name from the provider
# config — some providers use non-standard names
# (e.g. alibaba → DASHSCOPE_API_KEY, not ALIBABA_API_KEY).
_env_hint = f"{_explicit.upper()}_API_KEY"
try:
from hermes_cli.auth import PROVIDER_REGISTRY
_pcfg = PROVIDER_REGISTRY.get(_explicit)
if _pcfg and _pcfg.api_key_env_vars:
_env_hint = _pcfg.api_key_env_vars[0]
except Exception:
pass
raise RuntimeError( raise RuntimeError(
f"Provider '{_explicit}' is set in config.yaml but no API key " f"Provider '{_explicit}' is set in config.yaml but no API key "
f"was found. Set the {_explicit.upper()}_API_KEY environment " f"was found. Set the {_env_hint} environment "
f"variable, or switch to a different provider with `hermes model`." f"variable, or switch to a different provider with `hermes model`."
) )
# Final fallback: try raw OpenRouter key # Final fallback: try raw OpenRouter key
@ -1217,6 +1280,15 @@ class AIAgent:
"hermes_home": str(_ghh()), "hermes_home": str(_ghh()),
"agent_context": "primary", "agent_context": "primary",
} }
# Thread session title for memory provider scoping
# (e.g. honcho uses this to derive chat-scoped session keys)
if self._session_db:
try:
_st = self._session_db.get_session_title(self.session_id)
if _st:
_init_kwargs["session_title"] = _st
except Exception:
pass
# Thread gateway user identity for per-user memory scoping # Thread gateway user identity for per-user memory scoping
if self._user_id: if self._user_id:
_init_kwargs["user_id"] = self._user_id _init_kwargs["user_id"] = self._user_id
@ -4206,6 +4278,9 @@ class AIAgent:
return False return False
def _create_openai_client(self, client_kwargs: dict, *, reason: str, shared: bool) -> Any: def _create_openai_client(self, client_kwargs: dict, *, reason: str, shared: bool) -> Any:
from agent.auxiliary_client import _validate_base_url, _validate_proxy_env_urls
_validate_proxy_env_urls()
_validate_base_url(client_kwargs.get("base_url"))
if self.provider == "copilot-acp" or str(client_kwargs.get("base_url", "")).startswith("acp://copilot"): if self.provider == "copilot-acp" or str(client_kwargs.get("base_url", "")).startswith("acp://copilot"):
from agent.copilot_acp_client import CopilotACPClient from agent.copilot_acp_client import CopilotACPClient
@ -4890,6 +4965,17 @@ class AIAgent:
) )
elif self.api_mode == "anthropic_messages": elif self.api_mode == "anthropic_messages":
result["response"] = self._anthropic_messages_create(api_kwargs) result["response"] = self._anthropic_messages_create(api_kwargs)
elif self.api_mode == "bedrock_converse":
# Bedrock uses boto3 directly — no OpenAI client needed.
from agent.bedrock_adapter import (
_get_bedrock_runtime_client,
normalize_converse_response,
)
region = api_kwargs.pop("__bedrock_region__", "us-east-1")
api_kwargs.pop("__bedrock_converse__", None)
client = _get_bedrock_runtime_client(region)
raw_response = client.converse(**api_kwargs)
result["response"] = normalize_converse_response(raw_response)
else: else:
request_client_holder["client"] = self._create_request_openai_client(reason="chat_completion_request") request_client_holder["client"] = self._create_request_openai_client(reason="chat_completion_request")
result["response"] = request_client_holder["client"].chat.completions.create(**api_kwargs) result["response"] = request_client_holder["client"].chat.completions.create(**api_kwargs)
@ -5129,6 +5215,65 @@ class AIAgent:
finally: finally:
self._codex_on_first_delta = None self._codex_on_first_delta = None
# Bedrock Converse uses boto3's converse_stream() with real-time delta
# callbacks — same UX as Anthropic and chat_completions streaming.
if self.api_mode == "bedrock_converse":
result = {"response": None, "error": None}
first_delta_fired = {"done": False}
deltas_were_sent = {"yes": False}
def _fire_first():
if not first_delta_fired["done"] and on_first_delta:
first_delta_fired["done"] = True
try:
on_first_delta()
except Exception:
pass
def _bedrock_call():
try:
from agent.bedrock_adapter import (
_get_bedrock_runtime_client,
stream_converse_with_callbacks,
)
region = api_kwargs.pop("__bedrock_region__", "us-east-1")
api_kwargs.pop("__bedrock_converse__", None)
client = _get_bedrock_runtime_client(region)
raw_response = client.converse_stream(**api_kwargs)
def _on_text(text):
_fire_first()
self._fire_stream_delta(text)
deltas_were_sent["yes"] = True
def _on_tool(name):
_fire_first()
self._fire_tool_gen_started(name)
def _on_reasoning(text):
_fire_first()
self._fire_reasoning_delta(text)
result["response"] = stream_converse_with_callbacks(
raw_response,
on_text_delta=_on_text if self._has_stream_consumers() else None,
on_tool_start=_on_tool,
on_reasoning_delta=_on_reasoning if self.reasoning_callback or self.stream_delta_callback else None,
on_interrupt_check=lambda: self._interrupt_requested,
)
except Exception as e:
result["error"] = e
t = threading.Thread(target=_bedrock_call, daemon=True)
t.start()
while t.is_alive():
t.join(timeout=0.3)
if self._interrupt_requested:
raise InterruptedError("Agent interrupted during Bedrock API call")
if result["error"] is not None:
raise result["error"]
return result["response"]
result = {"response": None, "error": None} result = {"response": None, "error": None}
request_client_holder = {"client": None} request_client_holder = {"client": None}
first_delta_fired = {"done": False} first_delta_fired = {"done": False}
@ -5760,6 +5905,8 @@ class AIAgent:
# provider-specific exceptions like Copilot gpt-5-mini on # provider-specific exceptions like Copilot gpt-5-mini on
# chat completions. # chat completions.
fb_api_mode = "codex_responses" fb_api_mode = "codex_responses"
elif fb_provider == "bedrock" or "bedrock-runtime" in fb_base_url.lower():
fb_api_mode = "bedrock_converse"
old_model = self.model old_model = self.model
self.model = fb_model self.model = fb_model
@ -6239,6 +6386,25 @@ class AIAgent:
fast_mode=(self.request_overrides or {}).get("speed") == "fast", fast_mode=(self.request_overrides or {}).get("speed") == "fast",
) )
# AWS Bedrock native Converse API — bypasses the OpenAI client entirely.
# The adapter handles message/tool conversion and boto3 calls directly.
if self.api_mode == "bedrock_converse":
from agent.bedrock_adapter import build_converse_kwargs
region = getattr(self, "_bedrock_region", None) or "us-east-1"
guardrail = getattr(self, "_bedrock_guardrail_config", None)
return {
"__bedrock_converse__": True,
"__bedrock_region__": region,
**build_converse_kwargs(
model=self.model,
messages=api_messages,
tools=self.tools,
max_tokens=self.max_tokens or 4096,
temperature=None, # Let the model use its default
guardrail_config=guardrail,
),
}
if self.api_mode == "codex_responses": if self.api_mode == "codex_responses":
instructions = "" instructions = ""
payload_messages = api_messages payload_messages = api_messages
@ -8504,6 +8670,53 @@ class AIAgent:
api_kwargs = None # Guard against UnboundLocalError in except handler api_kwargs = None # Guard against UnboundLocalError in except handler
while retry_count < max_retries: while retry_count < max_retries:
# ── Nous Portal rate limit guard ──────────────────────
# If another session already recorded that Nous is rate-
# limited, skip the API call entirely. Each attempt
# (including SDK-level retries) counts against RPH and
# deepens the rate limit hole.
if self.provider == "nous":
try:
from agent.nous_rate_guard import (
nous_rate_limit_remaining,
format_remaining as _fmt_nous_remaining,
)
_nous_remaining = nous_rate_limit_remaining()
if _nous_remaining is not None and _nous_remaining > 0:
_nous_msg = (
f"Nous Portal rate limit active — "
f"resets in {_fmt_nous_remaining(_nous_remaining)}."
)
self._vprint(
f"{self.log_prefix}{_nous_msg} Trying fallback...",
force=True,
)
self._emit_status(f"{_nous_msg}")
if self._try_activate_fallback():
retry_count = 0
compression_attempts = 0
primary_recovery_attempted = False
continue
# No fallback available — return with clear message
self._persist_session(messages, conversation_history)
return {
"final_response": (
f"{_nous_msg}\n\n"
"No fallback provider available. "
"Try again after the reset, or add a "
"fallback provider in config.yaml."
),
"messages": messages,
"api_calls": api_call_count,
"completed": False,
"failed": True,
"error": _nous_msg,
}
except ImportError:
pass
except Exception:
pass # Never let rate guard break the agent loop
try: try:
self._reset_stream_delivery_tracking() self._reset_stream_delivery_tracking()
api_kwargs = self._build_api_kwargs(api_messages) api_kwargs = self._build_api_kwargs(api_messages)
@ -8816,7 +9029,7 @@ class AIAgent:
# targeted error instead of wasting 3 API calls. # targeted error instead of wasting 3 API calls.
_trunc_content = None _trunc_content = None
_trunc_has_tool_calls = False _trunc_has_tool_calls = False
if self.api_mode == "chat_completions": if self.api_mode in ("chat_completions", "bedrock_converse"):
_trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None _trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None
_trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None _trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None
_trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False _trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False
@ -8885,7 +9098,7 @@ class AIAgent:
"error": _exhaust_error, "error": _exhaust_error,
} }
if self.api_mode == "chat_completions": if self.api_mode in ("chat_completions", "bedrock_converse"):
assistant_message = response.choices[0].message assistant_message = response.choices[0].message
if not assistant_message.tool_calls: if not assistant_message.tool_calls:
length_continue_retries += 1 length_continue_retries += 1
@ -8925,7 +9138,7 @@ class AIAgent:
"error": "Response remained truncated after 3 continuation attempts", "error": "Response remained truncated after 3 continuation attempts",
} }
if self.api_mode == "chat_completions": if self.api_mode in ("chat_completions", "bedrock_converse"):
assistant_message = response.choices[0].message assistant_message = response.choices[0].message
if assistant_message.tool_calls: if assistant_message.tool_calls:
if truncated_tool_call_retries < 1: if truncated_tool_call_retries < 1:
@ -9092,6 +9305,15 @@ class AIAgent:
self._vprint(f"{self.log_prefix} 💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)") self._vprint(f"{self.log_prefix} 💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)")
has_retried_429 = False # Reset on success has_retried_429 = False # Reset on success
# Clear Nous rate limit state on successful request —
# proves the limit has reset and other sessions can
# resume hitting Nous.
if self.provider == "nous":
try:
from agent.nous_rate_guard import clear_nous_rate_limit
clear_nous_rate_limit()
except Exception:
pass
self._touch_activity(f"API call #{api_call_count} completed") self._touch_activity(f"API call #{api_call_count} completed")
break # Success, exit retry loop break # Success, exit retry loop
@ -9503,6 +9725,38 @@ class AIAgent:
primary_recovery_attempted = False primary_recovery_attempted = False
continue continue
# ── Nous Portal: record rate limit & skip retries ─────
# When Nous returns a 429, record the reset time to a
# shared file so ALL sessions (cron, gateway, auxiliary)
# know not to pile on. Then skip further retries —
# each one burns another RPH request and deepens the
# rate limit hole. The retry loop's top-of-iteration
# guard will catch this on the next pass and try
# fallback or bail with a clear message.
if (
is_rate_limited
and self.provider == "nous"
and classified.reason == FailoverReason.rate_limit
and not recovered_with_pool
):
try:
from agent.nous_rate_guard import record_nous_rate_limit
_err_resp = getattr(api_error, "response", None)
_err_hdrs = (
getattr(_err_resp, "headers", None)
if _err_resp else None
)
record_nous_rate_limit(
headers=_err_hdrs,
error_context=error_context,
)
except Exception:
pass
# Skip straight to max_retries — the top-of-loop
# guard will handle fallback or bail cleanly.
retry_count = max_retries
continue
is_payload_too_large = ( is_payload_too_large = (
classified.reason == FailoverReason.payload_too_large classified.reason == FailoverReason.payload_too_large
) )

View file

@ -28,7 +28,7 @@ BOLD='\033[1m'
# Configuration # Configuration
REPO_URL_SSH="git@github.com:NousResearch/hermes-agent.git" REPO_URL_SSH="git@github.com:NousResearch/hermes-agent.git"
REPO_URL_HTTPS="https://github.com/NousResearch/hermes-agent.git" REPO_URL_HTTPS="https://github.com/NousResearch/hermes-agent.git"
HERMES_HOME="$HOME/.hermes" HERMES_HOME="${HERMES_HOME:-$HOME/.hermes}"
INSTALL_DIR="${HERMES_INSTALL_DIR:-$HERMES_HOME/hermes-agent}" INSTALL_DIR="${HERMES_INSTALL_DIR:-$HERMES_HOME/hermes-agent}"
PYTHON_VERSION="3.11" PYTHON_VERSION="3.11"
NODE_VERSION="22" NODE_VERSION="22"
@ -66,6 +66,10 @@ while [[ $# -gt 0 ]]; do
INSTALL_DIR="$2" INSTALL_DIR="$2"
shift 2 shift 2
;; ;;
--hermes-home)
HERMES_HOME="$2"
shift 2
;;
-h|--help) -h|--help)
echo "Hermes Agent Installer" echo "Hermes Agent Installer"
echo "" echo ""
@ -76,6 +80,7 @@ while [[ $# -gt 0 ]]; do
echo " --skip-setup Skip interactive setup wizard" echo " --skip-setup Skip interactive setup wizard"
echo " --branch NAME Git branch to install (default: main)" echo " --branch NAME Git branch to install (default: main)"
echo " --dir PATH Installation directory (default: ~/.hermes/hermes-agent)" echo " --dir PATH Installation directory (default: ~/.hermes/hermes-agent)"
echo " --hermes-home PATH Data directory (default: ~/.hermes, or \$HERMES_HOME)"
echo " -h, --help Show this help" echo " -h, --help Show this help"
exit 0 exit 0
;; ;;

View file

@ -62,6 +62,7 @@ AUTHOR_MAP = {
"258577966+voidborne-d@users.noreply.github.com": "voidborne-d", "258577966+voidborne-d@users.noreply.github.com": "voidborne-d",
"70424851+insecurejezza@users.noreply.github.com": "insecurejezza", "70424851+insecurejezza@users.noreply.github.com": "insecurejezza",
"259807879+Bartok9@users.noreply.github.com": "Bartok9", "259807879+Bartok9@users.noreply.github.com": "Bartok9",
"241404605+MestreY0d4-Uninter@users.noreply.github.com": "MestreY0d4-Uninter",
"268667990+Roy-oss1@users.noreply.github.com": "Roy-oss1", "268667990+Roy-oss1@users.noreply.github.com": "Roy-oss1",
"241404605+MestreY0d4-Uninter@users.noreply.github.com": "MestreY0d4-Uninter", "241404605+MestreY0d4-Uninter@users.noreply.github.com": "MestreY0d4-Uninter",
# contributors (manual mapping from git names) # contributors (manual mapping from git names)
@ -81,6 +82,7 @@ AUTHOR_MAP = {
"brooklyn.bb.nicholson@gmail.com": "brooklynnicholson", "brooklyn.bb.nicholson@gmail.com": "brooklynnicholson",
"4317663+helix4u@users.noreply.github.com": "helix4u", "4317663+helix4u@users.noreply.github.com": "helix4u",
"331214+counterposition@users.noreply.github.com": "counterposition", "331214+counterposition@users.noreply.github.com": "counterposition",
"blspear@gmail.com": "BrennerSpear",
"gpickett00@gmail.com": "gpickett00", "gpickett00@gmail.com": "gpickett00",
"mcosma@gmail.com": "wakamex", "mcosma@gmail.com": "wakamex",
"clawdia.nash@proton.me": "clawdia-nash", "clawdia.nash@proton.me": "clawdia-nash",

View file

@ -313,7 +313,7 @@ Type these during an interactive chat session.
``` ```
~/.hermes/config.yaml Main configuration ~/.hermes/config.yaml Main configuration
~/.hermes/.env API keys and secrets ~/.hermes/.env API keys and secrets
~/.hermes/skills/ Installed skills $HERMES_HOME/skills/ Installed skills
~/.hermes/sessions/ Session transcripts ~/.hermes/sessions/ Session transcripts
~/.hermes/logs/ Gateway and error logs ~/.hermes/logs/ Gateway and error logs
~/.hermes/auth.json OAuth tokens and credential pools ~/.hermes/auth.json OAuth tokens and credential pools

View file

@ -334,7 +334,7 @@ When the user asks you to "review PR #N", "look at this PR", or gives you a PR U
### Step 1: Set up environment ### Step 1: Set up environment
```bash ```bash
source ~/.hermes/skills/github/github-auth/scripts/gh-env.sh source "${HERMES_HOME:-$HOME/.hermes}/skills/github/github-auth/scripts/gh-env.sh"
# Or run the inline setup block from the top of this skill # Or run the inline setup block from the top of this skill
``` ```

View file

@ -6,7 +6,7 @@ All requests need: `-H "Authorization: token $GITHUB_TOKEN"`
Use the `gh-env.sh` helper to set `$GITHUB_TOKEN`, `$GH_OWNER`, `$GH_REPO` automatically: Use the `gh-env.sh` helper to set `$GITHUB_TOKEN`, `$GH_OWNER`, `$GH_REPO` automatically:
```bash ```bash
source ~/.hermes/skills/github/github-auth/scripts/gh-env.sh source "${HERMES_HOME:-$HOME/.hermes}/skills/github/github-auth/scripts/gh-env.sh"
``` ```
## Repositories ## Repositories

View file

@ -32,7 +32,7 @@ on CLI, Telegram, Discord, or any platform.
Define a shorthand first: Define a shorthand first:
```bash ```bash
GSETUP="python ~/.hermes/skills/productivity/google-workspace/scripts/setup.py" GSETUP="python ${HERMES_HOME:-$HOME/.hermes}/skills/productivity/google-workspace/scripts/setup.py"
``` ```
### Step 0: Check if already set up ### Step 0: Check if already set up
@ -163,7 +163,7 @@ Should print `AUTHENTICATED`. Setup is complete — token refreshes automaticall
All commands go through the API script. Set `GAPI` as a shorthand: All commands go through the API script. Set `GAPI` as a shorthand:
```bash ```bash
GAPI="python ~/.hermes/skills/productivity/google-workspace/scripts/google_api.py" GAPI="python ${HERMES_HOME:-$HOME/.hermes}/skills/productivity/google-workspace/scripts/google_api.py"
``` ```
### Gmail ### Gmail

View file

@ -60,7 +60,7 @@ The fastest path — auto-detect the model, test strategies, and lock in the win
# In execute_code — use the loader to avoid exec-scoping issues: # In execute_code — use the loader to avoid exec-scoping issues:
import os import os
exec(open(os.path.expanduser( exec(open(os.path.expanduser(
"~/.hermes/skills/red-teaming/godmode/scripts/load_godmode.py" os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), "skills/red-teaming/godmode/scripts/load_godmode.py")
)).read()) )).read())
# Auto-detect model from config and jailbreak it # Auto-detect model from config and jailbreak it
@ -192,7 +192,7 @@ python3 scripts/parseltongue.py "How do I hack into a WiFi network?" --tier stan
Or use `execute_code` inline: Or use `execute_code` inline:
```python ```python
# Load the parseltongue module # Load the parseltongue module
exec(open(os.path.expanduser("~/.hermes/skills/red-teaming/godmode/scripts/parseltongue.py")).read()) exec(open(os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), "skills/red-teaming/godmode/scripts/parseltongue.py")).read())
query = "How do I hack into a WiFi network?" query = "How do I hack into a WiFi network?"
variants = generate_variants(query, tier="standard") variants = generate_variants(query, tier="standard")
@ -229,7 +229,7 @@ Race multiple models against the same query, score responses, pick the winner:
```python ```python
# Via execute_code # Via execute_code
exec(open(os.path.expanduser("~/.hermes/skills/red-teaming/godmode/scripts/godmode_race.py")).read()) exec(open(os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), "skills/red-teaming/godmode/scripts/godmode_race.py")).read())
result = race_models( result = race_models(
query="Explain how SQL injection works with a practical example", query="Explain how SQL injection works with a practical example",

View file

@ -114,7 +114,7 @@ hermes
### Via the GODMODE CLASSIC racer script ### Via the GODMODE CLASSIC racer script
```python ```python
exec(open(os.path.expanduser("~/.hermes/skills/red-teaming/godmode/scripts/godmode_race.py")).read()) exec(open(os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), "skills/red-teaming/godmode/scripts/godmode_race.py")).read())
result = race_godmode_classic("Your query here") result = race_godmode_classic("Your query here")
print(f"Winner: {result['codename']} — Score: {result['score']}") print(f"Winner: {result['codename']} — Score: {result['score']}")
print(result['content']) print(result['content'])

View file

@ -129,7 +129,7 @@ These don't auto-reject but reduce the response score:
## Using in Python ## Using in Python
```python ```python
exec(open(os.path.expanduser("~/.hermes/skills/red-teaming/godmode/scripts/godmode_race.py")).read()) exec(open(os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), "skills/red-teaming/godmode/scripts/godmode_race.py")).read())
# Check if a response is a refusal # Check if a response is a refusal
text = "I'm sorry, but I can't assist with that request." text = "I'm sorry, but I can't assist with that request."

View file

@ -7,7 +7,7 @@ finds what works, and locks it in by writing config.yaml + prefill.json.
Usage in execute_code: Usage in execute_code:
exec(open(os.path.expanduser( exec(open(os.path.expanduser(
"~/.hermes/skills/red-teaming/godmode/scripts/auto_jailbreak.py" os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), "skills/red-teaming/godmode/scripts/auto_jailbreak.py")
)).read()) )).read())
result = auto_jailbreak() # Uses current model from config result = auto_jailbreak() # Uses current model from config

View file

@ -7,7 +7,7 @@ Queries multiple models in parallel via OpenRouter, scores responses
on quality/filteredness/speed, returns the best unfiltered answer. on quality/filteredness/speed, returns the best unfiltered answer.
Usage in execute_code: Usage in execute_code:
exec(open(os.path.expanduser("~/.hermes/skills/red-teaming/godmode/scripts/godmode_race.py")).read()) exec(open(os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), "skills/red-teaming/godmode/scripts/godmode_race.py")).read())
result = race_models( result = race_models(
query="Your query here", query="Your query here",

View file

@ -3,7 +3,7 @@ Loader for G0DM0D3 scripts. Handles the exec-scoping issues.
Usage in execute_code: Usage in execute_code:
exec(open(os.path.expanduser( exec(open(os.path.expanduser(
"~/.hermes/skills/red-teaming/godmode/scripts/load_godmode.py" os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), "skills/red-teaming/godmode/scripts/load_godmode.py")
)).read()) )).read())
# Now all functions are available: # Now all functions are available:

View file

@ -11,7 +11,7 @@ Usage:
python parseltongue.py "How do I hack a WiFi network?" --tier standard python parseltongue.py "How do I hack a WiFi network?" --tier standard
# As a module in execute_code # As a module in execute_code
exec(open("~/.hermes/skills/red-teaming/godmode/scripts/parseltongue.py").read()) exec(open(os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), "skills/red-teaming/godmode/scripts/parseltongue.py")).read())
variants = generate_variants("How do I hack a WiFi network?", tier="standard") variants = generate_variants("How do I hack a WiFi network?", tier="standard")
""" """

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,269 @@
"""Integration tests for the AWS Bedrock provider wiring.
Verifies that the Bedrock provider is correctly registered in the
provider registry, model catalog, and runtime resolution pipeline.
These tests do NOT require AWS credentials or boto3 all AWS calls
are mocked.
Note: Tests that import ``hermes_cli.auth`` or ``hermes_cli.runtime_provider``
require Python 3.10+ due to ``str | None`` type syntax in the import chain.
"""
import os
from unittest.mock import MagicMock, patch
import pytest
class TestProviderRegistry:
"""Verify Bedrock is registered in PROVIDER_REGISTRY."""
def test_bedrock_in_registry(self):
from hermes_cli.auth import PROVIDER_REGISTRY
assert "bedrock" in PROVIDER_REGISTRY
def test_bedrock_auth_type_is_aws_sdk(self):
from hermes_cli.auth import PROVIDER_REGISTRY
pconfig = PROVIDER_REGISTRY["bedrock"]
assert pconfig.auth_type == "aws_sdk"
def test_bedrock_has_no_api_key_env_vars(self):
"""Bedrock uses the AWS SDK credential chain, not API keys."""
from hermes_cli.auth import PROVIDER_REGISTRY
pconfig = PROVIDER_REGISTRY["bedrock"]
assert pconfig.api_key_env_vars == ()
def test_bedrock_base_url_env_var(self):
from hermes_cli.auth import PROVIDER_REGISTRY
pconfig = PROVIDER_REGISTRY["bedrock"]
assert pconfig.base_url_env_var == "BEDROCK_BASE_URL"
class TestProviderAliases:
"""Verify Bedrock aliases resolve correctly."""
def test_aws_alias(self):
from hermes_cli.models import _PROVIDER_ALIASES
assert _PROVIDER_ALIASES.get("aws") == "bedrock"
def test_aws_bedrock_alias(self):
from hermes_cli.models import _PROVIDER_ALIASES
assert _PROVIDER_ALIASES.get("aws-bedrock") == "bedrock"
def test_amazon_bedrock_alias(self):
from hermes_cli.models import _PROVIDER_ALIASES
assert _PROVIDER_ALIASES.get("amazon-bedrock") == "bedrock"
def test_amazon_alias(self):
from hermes_cli.models import _PROVIDER_ALIASES
assert _PROVIDER_ALIASES.get("amazon") == "bedrock"
class TestProviderLabels:
"""Verify Bedrock appears in provider labels."""
def test_bedrock_label(self):
from hermes_cli.models import _PROVIDER_LABELS
assert _PROVIDER_LABELS.get("bedrock") == "AWS Bedrock"
class TestModelCatalog:
"""Verify Bedrock has a static model fallback list."""
def test_bedrock_has_curated_models(self):
from hermes_cli.models import _PROVIDER_MODELS
models = _PROVIDER_MODELS.get("bedrock", [])
assert len(models) > 0
def test_bedrock_models_include_claude(self):
from hermes_cli.models import _PROVIDER_MODELS
models = _PROVIDER_MODELS.get("bedrock", [])
claude_models = [m for m in models if "anthropic.claude" in m]
assert len(claude_models) > 0
def test_bedrock_models_include_nova(self):
from hermes_cli.models import _PROVIDER_MODELS
models = _PROVIDER_MODELS.get("bedrock", [])
nova_models = [m for m in models if "amazon.nova" in m]
assert len(nova_models) > 0
class TestResolveProvider:
"""Verify resolve_provider() handles bedrock correctly."""
def test_explicit_bedrock_resolves(self, monkeypatch):
"""When user explicitly requests 'bedrock', it should resolve."""
from hermes_cli.auth import PROVIDER_REGISTRY
# bedrock is in the registry, so resolve_provider should return it
from hermes_cli.auth import resolve_provider
result = resolve_provider("bedrock")
assert result == "bedrock"
def test_aws_alias_resolves_to_bedrock(self):
from hermes_cli.auth import resolve_provider
result = resolve_provider("aws")
assert result == "bedrock"
def test_amazon_bedrock_alias_resolves(self):
from hermes_cli.auth import resolve_provider
result = resolve_provider("amazon-bedrock")
assert result == "bedrock"
def test_auto_detect_with_aws_credentials(self, monkeypatch):
"""When AWS credentials are present and no other provider is configured,
auto-detect should find bedrock."""
from hermes_cli.auth import resolve_provider
# Clear all other provider env vars
for var in ["OPENAI_API_KEY", "OPENROUTER_API_KEY", "ANTHROPIC_API_KEY",
"ANTHROPIC_TOKEN", "GOOGLE_API_KEY", "DEEPSEEK_API_KEY"]:
monkeypatch.delenv(var, raising=False)
# Set AWS credentials
monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE")
monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
# Mock the auth store to have no active provider
with patch("hermes_cli.auth._load_auth_store", return_value={}):
result = resolve_provider("auto")
assert result == "bedrock"
class TestRuntimeProvider:
"""Verify resolve_runtime_provider() handles bedrock correctly."""
def test_bedrock_runtime_resolution(self, monkeypatch):
from hermes_cli.runtime_provider import resolve_runtime_provider
monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE")
monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
monkeypatch.setenv("AWS_REGION", "eu-west-1")
# Mock resolve_provider to return bedrock
with patch("hermes_cli.runtime_provider.resolve_provider", return_value="bedrock"), \
patch("hermes_cli.runtime_provider._get_model_config", return_value={"provider": "bedrock"}):
result = resolve_runtime_provider(requested="bedrock")
assert result["provider"] == "bedrock"
assert result["api_mode"] == "bedrock_converse"
assert result["region"] == "eu-west-1"
assert "bedrock-runtime.eu-west-1.amazonaws.com" in result["base_url"]
assert result["api_key"] == "aws-sdk"
def test_bedrock_runtime_default_region(self, monkeypatch):
from hermes_cli.runtime_provider import resolve_runtime_provider
monkeypatch.setenv("AWS_PROFILE", "default")
monkeypatch.delenv("AWS_REGION", raising=False)
monkeypatch.delenv("AWS_DEFAULT_REGION", raising=False)
with patch("hermes_cli.runtime_provider.resolve_provider", return_value="bedrock"), \
patch("hermes_cli.runtime_provider._get_model_config", return_value={"provider": "bedrock"}):
result = resolve_runtime_provider(requested="bedrock")
assert result["region"] == "us-east-1"
def test_bedrock_runtime_no_credentials_raises_on_auto_detect(self, monkeypatch):
"""When bedrock is auto-detected (not explicitly requested) and no
credentials are found, runtime resolution should raise AuthError."""
from hermes_cli.runtime_provider import resolve_runtime_provider
from hermes_cli.auth import AuthError
# Clear all AWS env vars
for var in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_PROFILE",
"AWS_BEARER_TOKEN_BEDROCK", "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI",
"AWS_WEB_IDENTITY_TOKEN_FILE"]:
monkeypatch.delenv(var, raising=False)
# Mock both the provider resolution and boto3's credential chain
mock_session = MagicMock()
mock_session.get_credentials.return_value = None
with patch("hermes_cli.runtime_provider.resolve_provider", return_value="bedrock"), \
patch("hermes_cli.runtime_provider._get_model_config", return_value={"provider": "bedrock"}), \
patch("hermes_cli.runtime_provider.resolve_requested_provider", return_value="auto"), \
patch.dict("sys.modules", {"botocore": MagicMock(), "botocore.session": MagicMock()}):
import botocore.session as _bs
_bs.get_session = MagicMock(return_value=mock_session)
with pytest.raises(AuthError, match="No AWS credentials"):
resolve_runtime_provider(requested="auto")
def test_bedrock_runtime_explicit_skips_credential_check(self, monkeypatch):
"""When user explicitly requests bedrock, trust boto3's credential chain
even if env-var detection finds nothing (covers IMDS, SSO, etc.)."""
from hermes_cli.runtime_provider import resolve_runtime_provider
# No AWS env vars set — but explicit bedrock request should not raise
for var in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_PROFILE",
"AWS_BEARER_TOKEN_BEDROCK"]:
monkeypatch.delenv(var, raising=False)
with patch("hermes_cli.runtime_provider.resolve_provider", return_value="bedrock"), \
patch("hermes_cli.runtime_provider._get_model_config", return_value={"provider": "bedrock"}):
result = resolve_runtime_provider(requested="bedrock")
assert result["provider"] == "bedrock"
assert result["api_mode"] == "bedrock_converse"
# ---------------------------------------------------------------------------
# providers.py integration
# ---------------------------------------------------------------------------
class TestProvidersModule:
"""Verify bedrock is wired into hermes_cli/providers.py."""
def test_bedrock_alias_in_providers(self):
from hermes_cli.providers import ALIASES
assert ALIASES.get("bedrock") is None # "bedrock" IS the canonical name, not an alias
assert ALIASES.get("aws") == "bedrock"
assert ALIASES.get("aws-bedrock") == "bedrock"
def test_bedrock_transport_mapping(self):
from hermes_cli.providers import TRANSPORT_TO_API_MODE
assert TRANSPORT_TO_API_MODE.get("bedrock_converse") == "bedrock_converse"
def test_determine_api_mode_from_bedrock_url(self):
from hermes_cli.providers import determine_api_mode
assert determine_api_mode(
"unknown", "https://bedrock-runtime.us-east-1.amazonaws.com"
) == "bedrock_converse"
def test_label_override(self):
from hermes_cli.providers import _LABEL_OVERRIDES
assert _LABEL_OVERRIDES.get("bedrock") == "AWS Bedrock"
# ---------------------------------------------------------------------------
# Error classifier integration
# ---------------------------------------------------------------------------
class TestErrorClassifierBedrock:
"""Verify Bedrock error patterns are in the global error classifier."""
def test_throttling_in_rate_limit_patterns(self):
from agent.error_classifier import _RATE_LIMIT_PATTERNS
assert "throttlingexception" in _RATE_LIMIT_PATTERNS
def test_context_overflow_patterns(self):
from agent.error_classifier import _CONTEXT_OVERFLOW_PATTERNS
assert "input is too long" in _CONTEXT_OVERFLOW_PATTERNS
# ---------------------------------------------------------------------------
# pyproject.toml bedrock extra
# ---------------------------------------------------------------------------
class TestPackaging:
"""Verify bedrock optional dependency is declared."""
def test_bedrock_extra_exists(self):
import configparser
from pathlib import Path
# Read pyproject.toml to verify [bedrock] extra
toml_path = Path(__file__).parent.parent.parent / "pyproject.toml"
content = toml_path.read_text()
assert 'bedrock = ["boto3' in content
def test_bedrock_in_all_extra(self):
from pathlib import Path
content = (Path(__file__).parent.parent.parent / "pyproject.toml").read_text()
assert '"hermes-agent[bedrock]"' in content

View file

@ -0,0 +1,253 @@
"""Tests for agent/nous_rate_guard.py — cross-session Nous Portal rate limit guard."""
import json
import os
import time
import pytest
@pytest.fixture
def rate_guard_env(tmp_path, monkeypatch):
"""Isolate rate guard state to a temp directory."""
hermes_home = str(tmp_path / ".hermes")
os.makedirs(hermes_home, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", hermes_home)
# Clear any cached module-level imports
return hermes_home
class TestRecordNousRateLimit:
"""Test recording rate limit state."""
def test_records_with_header_reset(self, rate_guard_env):
from agent.nous_rate_guard import record_nous_rate_limit, _state_path
headers = {"x-ratelimit-reset-requests-1h": "1800"}
record_nous_rate_limit(headers=headers)
path = _state_path()
assert os.path.exists(path)
with open(path) as f:
state = json.load(f)
assert state["reset_seconds"] == pytest.approx(1800, abs=2)
assert state["reset_at"] > time.time()
def test_records_with_per_minute_header(self, rate_guard_env):
from agent.nous_rate_guard import record_nous_rate_limit, _state_path
headers = {"x-ratelimit-reset-requests": "45"}
record_nous_rate_limit(headers=headers)
with open(_state_path()) as f:
state = json.load(f)
assert state["reset_seconds"] == pytest.approx(45, abs=2)
def test_records_with_retry_after_header(self, rate_guard_env):
from agent.nous_rate_guard import record_nous_rate_limit, _state_path
headers = {"retry-after": "60"}
record_nous_rate_limit(headers=headers)
with open(_state_path()) as f:
state = json.load(f)
assert state["reset_seconds"] == pytest.approx(60, abs=2)
def test_prefers_hourly_over_per_minute(self, rate_guard_env):
from agent.nous_rate_guard import record_nous_rate_limit, _state_path
headers = {
"x-ratelimit-reset-requests-1h": "1800",
"x-ratelimit-reset-requests": "45",
}
record_nous_rate_limit(headers=headers)
with open(_state_path()) as f:
state = json.load(f)
# Should use the hourly value, not the per-minute one
assert state["reset_seconds"] == pytest.approx(1800, abs=2)
def test_falls_back_to_error_context_reset_at(self, rate_guard_env):
from agent.nous_rate_guard import record_nous_rate_limit, _state_path
future_reset = time.time() + 900
record_nous_rate_limit(
headers=None,
error_context={"reset_at": future_reset},
)
with open(_state_path()) as f:
state = json.load(f)
assert state["reset_at"] == pytest.approx(future_reset, abs=1)
def test_falls_back_to_default_cooldown(self, rate_guard_env):
from agent.nous_rate_guard import record_nous_rate_limit, _state_path
record_nous_rate_limit(headers=None)
with open(_state_path()) as f:
state = json.load(f)
# Default is 300 seconds (5 minutes)
assert state["reset_seconds"] == pytest.approx(300, abs=2)
def test_custom_default_cooldown(self, rate_guard_env):
from agent.nous_rate_guard import record_nous_rate_limit, _state_path
record_nous_rate_limit(headers=None, default_cooldown=120.0)
with open(_state_path()) as f:
state = json.load(f)
assert state["reset_seconds"] == pytest.approx(120, abs=2)
def test_creates_directory_if_missing(self, rate_guard_env):
from agent.nous_rate_guard import record_nous_rate_limit, _state_path
record_nous_rate_limit(headers={"retry-after": "10"})
assert os.path.exists(_state_path())
class TestNousRateLimitRemaining:
"""Test checking remaining rate limit time."""
def test_returns_none_when_no_file(self, rate_guard_env):
from agent.nous_rate_guard import nous_rate_limit_remaining
assert nous_rate_limit_remaining() is None
def test_returns_remaining_seconds_when_active(self, rate_guard_env):
from agent.nous_rate_guard import record_nous_rate_limit, nous_rate_limit_remaining
record_nous_rate_limit(headers={"x-ratelimit-reset-requests-1h": "600"})
remaining = nous_rate_limit_remaining()
assert remaining is not None
assert 595 < remaining <= 605 # ~600 seconds, allowing for test execution time
def test_returns_none_when_expired(self, rate_guard_env):
from agent.nous_rate_guard import nous_rate_limit_remaining, _state_path
# Write an already-expired state
state_dir = os.path.dirname(_state_path())
os.makedirs(state_dir, exist_ok=True)
with open(_state_path(), "w") as f:
json.dump({"reset_at": time.time() - 10, "recorded_at": time.time() - 100}, f)
assert nous_rate_limit_remaining() is None
# File should be cleaned up
assert not os.path.exists(_state_path())
def test_handles_corrupt_file(self, rate_guard_env):
from agent.nous_rate_guard import nous_rate_limit_remaining, _state_path
state_dir = os.path.dirname(_state_path())
os.makedirs(state_dir, exist_ok=True)
with open(_state_path(), "w") as f:
f.write("not valid json{{{")
assert nous_rate_limit_remaining() is None
class TestClearNousRateLimit:
"""Test clearing rate limit state."""
def test_clears_existing_file(self, rate_guard_env):
from agent.nous_rate_guard import (
record_nous_rate_limit,
clear_nous_rate_limit,
nous_rate_limit_remaining,
_state_path,
)
record_nous_rate_limit(headers={"retry-after": "600"})
assert nous_rate_limit_remaining() is not None
clear_nous_rate_limit()
assert nous_rate_limit_remaining() is None
assert not os.path.exists(_state_path())
def test_clear_when_no_file(self, rate_guard_env):
from agent.nous_rate_guard import clear_nous_rate_limit
# Should not raise
clear_nous_rate_limit()
class TestFormatRemaining:
"""Test human-readable duration formatting."""
def test_seconds(self):
from agent.nous_rate_guard import format_remaining
assert format_remaining(30) == "30s"
def test_minutes(self):
from agent.nous_rate_guard import format_remaining
assert format_remaining(125) == "2m 5s"
def test_exact_minutes(self):
from agent.nous_rate_guard import format_remaining
assert format_remaining(120) == "2m"
def test_hours(self):
from agent.nous_rate_guard import format_remaining
assert format_remaining(3720) == "1h 2m"
class TestParseResetSeconds:
"""Test header parsing for reset times."""
def test_case_insensitive_headers(self, rate_guard_env):
from agent.nous_rate_guard import _parse_reset_seconds
headers = {"X-Ratelimit-Reset-Requests-1h": "1200"}
assert _parse_reset_seconds(headers) == 1200.0
def test_returns_none_for_empty_headers(self):
from agent.nous_rate_guard import _parse_reset_seconds
assert _parse_reset_seconds(None) is None
assert _parse_reset_seconds({}) is None
def test_ignores_zero_values(self):
from agent.nous_rate_guard import _parse_reset_seconds
headers = {"x-ratelimit-reset-requests-1h": "0"}
assert _parse_reset_seconds(headers) is None
def test_ignores_invalid_values(self):
from agent.nous_rate_guard import _parse_reset_seconds
headers = {"x-ratelimit-reset-requests-1h": "not-a-number"}
assert _parse_reset_seconds(headers) is None
class TestAuxiliaryClientIntegration:
"""Test that the auxiliary client respects the rate guard."""
def test_try_nous_skips_when_rate_limited(self, rate_guard_env, monkeypatch):
from agent.nous_rate_guard import record_nous_rate_limit
# Record a rate limit
record_nous_rate_limit(headers={"retry-after": "600"})
# Mock _read_nous_auth to return valid creds (would normally succeed)
import agent.auxiliary_client as aux
monkeypatch.setattr(aux, "_read_nous_auth", lambda: {
"access_token": "test-token",
"inference_base_url": "https://api.nous.test/v1",
})
result = aux._try_nous()
assert result == (None, None)
def test_try_nous_works_when_not_rate_limited(self, rate_guard_env, monkeypatch):
import agent.auxiliary_client as aux
# No rate limit recorded — _try_nous should proceed normally
# (will return None because no real creds, but won't be blocked
# by the rate guard)
monkeypatch.setattr(aux, "_read_nous_auth", lambda: None)
result = aux._try_nous()
assert result == (None, None)

View file

@ -0,0 +1,60 @@
"""Tests for malformed proxy env var and base URL validation.
Salvaged from PR #6403 by MestreY0d4-Uninter — validates that the agent
surfaces clear errors instead of cryptic httpx ``Invalid port`` exceptions
when proxy env vars or custom endpoint URLs are malformed.
"""
from __future__ import annotations
import pytest
from agent.auxiliary_client import _validate_base_url, _validate_proxy_env_urls
# -- proxy env validation ------------------------------------------------
def test_proxy_env_accepts_normal_values(monkeypatch):
monkeypatch.setenv("HTTP_PROXY", "http://127.0.0.1:6153")
monkeypatch.setenv("HTTPS_PROXY", "https://proxy.example.com:8443")
monkeypatch.setenv("ALL_PROXY", "socks5://127.0.0.1:1080")
_validate_proxy_env_urls() # should not raise
def test_proxy_env_accepts_empty(monkeypatch):
monkeypatch.delenv("HTTP_PROXY", raising=False)
monkeypatch.delenv("HTTPS_PROXY", raising=False)
monkeypatch.delenv("ALL_PROXY", raising=False)
monkeypatch.delenv("http_proxy", raising=False)
monkeypatch.delenv("https_proxy", raising=False)
monkeypatch.delenv("all_proxy", raising=False)
_validate_proxy_env_urls() # should not raise
@pytest.mark.parametrize("key", [
"HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY",
"http_proxy", "https_proxy", "all_proxy",
])
def test_proxy_env_rejects_malformed_port(monkeypatch, key):
monkeypatch.setenv(key, "http://127.0.0.1:6153export")
with pytest.raises(RuntimeError, match=rf"Malformed proxy environment variable {key}=.*6153export"):
_validate_proxy_env_urls()
# -- base URL validation -------------------------------------------------
@pytest.mark.parametrize("url", [
"https://api.example.com/v1",
"http://127.0.0.1:6153/v1",
"acp://copilot",
"",
None,
])
def test_base_url_accepts_valid(url):
_validate_base_url(url) # should not raise
def test_base_url_rejects_malformed_port():
with pytest.raises(RuntimeError, match="Malformed custom endpoint URL"):
_validate_base_url("http://127.0.0.1:6153export")

View file

@ -284,3 +284,95 @@ class TestElevenLabsTavilyExaKeys:
assert "XYZ789abcdef" not in result assert "XYZ789abcdef" not in result
assert "HOME=/home/user" in result assert "HOME=/home/user" in result
assert "SHELL=/bin/bash" in result assert "SHELL=/bin/bash" in result
class TestJWTTokens:
"""JWT tokens start with eyJ (base64 for '{') and have dot-separated parts."""
def test_full_3part_jwt(self):
text = (
"Token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9"
".eyJpc3MiOiI0MjNiZDJkYjg4MjI0MDAwIn0"
".Gxgv0rru-_kS-I_60EJ7CENTnBh9UeuL3QhkMoQ-VnM"
)
result = redact_sensitive_text(text)
assert "Token:" in result
# Payload and signature must not survive
assert "eyJpc3Mi" not in result
assert "Gxgv0rru" not in result
def test_2part_jwt(self):
text = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0"
result = redact_sensitive_text(text)
assert "eyJzdWIi" not in result
def test_standalone_jwt_header(self):
text = "leaked header: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9 here"
result = redact_sensitive_text(text)
assert "IkpXVCJ9" not in result
assert "leaked header:" in result
def test_jwt_with_base64_padding(self):
text = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0=.abc123def456ghij"
result = redact_sensitive_text(text)
assert "abc123def456" not in result
def test_short_eyj_not_matched(self):
"""eyJ followed by fewer than 10 base64 chars should not match."""
text = "eyJust a normal word"
assert redact_sensitive_text(text) == text
def test_jwt_preserves_surrounding_text(self):
text = "before eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0 after"
result = redact_sensitive_text(text)
assert result.startswith("before ")
assert result.endswith(" after")
def test_home_assistant_jwt_in_memory(self):
"""Real-world pattern: HA token stored in agent memory block."""
text = (
"Home Assistant API Token: "
"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9"
".eyJpc3MiOiJhYmNkZWYiLCJleHAiOjE3NzQ5NTcxMDN9"
".Gxgv0rru-_kS-I_60EJ7CENTnBh9UeuL3QhkMoQ-VnM"
)
result = redact_sensitive_text(text)
assert "Home Assistant API Token:" in result
assert "Gxgv0rru" not in result
assert "..." in result
class TestDiscordMentions:
"""Discord snowflake IDs in <@ID> or <@!ID> format."""
def test_normal_mention(self):
result = redact_sensitive_text("Hello <@222589316709220353>")
assert "222589316709220353" not in result
assert "<@***>" in result
def test_nickname_mention(self):
result = redact_sensitive_text("Ping <@!1331549159177846844>")
assert "1331549159177846844" not in result
assert "<@!***>" in result
def test_multiple_mentions(self):
text = "<@111111111111111111> and <@222222222222222222>"
result = redact_sensitive_text(text)
assert "111111111111111111" not in result
assert "222222222222222222" not in result
def test_short_id_not_matched(self):
"""IDs shorter than 17 digits are not Discord snowflakes."""
text = "<@12345>"
assert redact_sensitive_text(text) == text
def test_slack_mention_not_matched(self):
"""Slack mentions use letters, not pure digits."""
text = "<@U024BE7LH>"
assert redact_sensitive_text(text) == text
def test_preserves_surrounding_text(self):
text = "User <@222589316709220353> said hello"
result = redact_sensitive_text(text)
assert result.startswith("User ")
assert result.endswith(" said hello")

View file

@ -193,6 +193,67 @@ class TestLoadGatewayConfig:
assert config.thread_sessions_per_user is False assert config.thread_sessions_per_user is False
def test_bridges_discord_channel_prompts_from_config_yaml(self, tmp_path, monkeypatch):
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
config_path = hermes_home / "config.yaml"
config_path.write_text(
"discord:\n"
" channel_prompts:\n"
" \"123\": Research mode\n"
" 456: Therapist mode\n",
encoding="utf-8",
)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
config = load_gateway_config()
assert config.platforms[Platform.DISCORD].extra["channel_prompts"] == {
"123": "Research mode",
"456": "Therapist mode",
}
def test_bridges_telegram_channel_prompts_from_config_yaml(self, tmp_path, monkeypatch):
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
config_path = hermes_home / "config.yaml"
config_path.write_text(
"telegram:\n"
" channel_prompts:\n"
' "-1001234567": Research assistant\n'
" 789: Creative writing\n",
encoding="utf-8",
)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
config = load_gateway_config()
assert config.platforms[Platform.TELEGRAM].extra["channel_prompts"] == {
"-1001234567": "Research assistant",
"789": "Creative writing",
}
def test_bridges_slack_channel_prompts_from_config_yaml(self, tmp_path, monkeypatch):
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
config_path = hermes_home / "config.yaml"
config_path.write_text(
"slack:\n"
" channel_prompts:\n"
' "C01ABC": Code review mode\n',
encoding="utf-8",
)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
config = load_gateway_config()
assert config.platforms[Platform.SLACK].extra["channel_prompts"] == {
"C01ABC": "Code review mode",
}
def test_invalid_quick_commands_in_config_yaml_are_ignored(self, tmp_path, monkeypatch): def test_invalid_quick_commands_in_config_yaml_are_ignored(self, tmp_path, monkeypatch):
hermes_home = tmp_path / ".hermes" hermes_home = tmp_path / ".hermes"
hermes_home.mkdir() hermes_home.mkdir()

View file

@ -0,0 +1,259 @@
"""Tests for Discord channel_prompts resolution and injection."""
import sys
import threading
import types
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock
import pytest
def _ensure_discord_mock():
if "discord" in sys.modules and hasattr(sys.modules["discord"], "__file__"):
return
discord_mod = types.ModuleType("discord")
discord_mod.Intents = MagicMock()
discord_mod.Intents.default.return_value = MagicMock()
discord_mod.DMChannel = type("DMChannel", (), {})
discord_mod.Thread = type("Thread", (), {})
discord_mod.ForumChannel = type("ForumChannel", (), {})
discord_mod.Interaction = object
ext_mod = MagicMock()
commands_mod = MagicMock()
commands_mod.Bot = MagicMock
ext_mod.commands = commands_mod
sys.modules.setdefault("discord", discord_mod)
sys.modules.setdefault("discord.ext", ext_mod)
sys.modules.setdefault("discord.ext.commands", commands_mod)
import gateway.run as gateway_run
from gateway.config import Platform
from gateway.platforms.base import MessageEvent
from gateway.session import SessionSource
class _CapturingAgent:
last_init = None
def __init__(self, *args, **kwargs):
type(self).last_init = dict(kwargs)
self.tools = []
def run_conversation(self, user_message, conversation_history=None, task_id=None, persist_user_message=None):
return {
"final_response": "ok",
"messages": [],
"api_calls": 1,
"completed": True,
}
def _install_fake_agent(monkeypatch):
fake_run_agent = types.ModuleType("run_agent")
fake_run_agent.AIAgent = _CapturingAgent
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
def _make_adapter():
_ensure_discord_mock()
from gateway.platforms.discord import DiscordAdapter
adapter = object.__new__(DiscordAdapter)
adapter.config = MagicMock()
adapter.config.extra = {}
return adapter
def _make_runner():
runner = object.__new__(gateway_run.GatewayRunner)
runner.adapters = {}
runner._ephemeral_system_prompt = "Global prompt"
runner._prefill_messages = []
runner._reasoning_config = None
runner._service_tier = None
runner._provider_routing = {}
runner._fallback_model = None
runner._smart_model_routing = {}
runner._running_agents = {}
runner._pending_model_notes = {}
runner._session_db = None
runner._agent_cache = {}
runner._agent_cache_lock = threading.Lock()
runner._session_model_overrides = {}
runner.hooks = SimpleNamespace(loaded_hooks=False)
runner.config = SimpleNamespace(streaming=None)
runner.session_store = SimpleNamespace(
get_or_create_session=lambda source: SimpleNamespace(session_id="session-1"),
load_transcript=lambda session_id: [],
)
runner._get_or_create_gateway_honcho = lambda session_key: (None, None)
runner._enrich_message_with_vision = AsyncMock(return_value="ENRICHED")
return runner
def _make_source() -> SessionSource:
return SessionSource(
platform=Platform.DISCORD,
chat_id="12345",
chat_type="thread",
user_id="user-1",
)
class TestResolveChannelPrompts:
def test_no_prompt_returns_none(self):
adapter = _make_adapter()
assert adapter._resolve_channel_prompt("123") is None
def test_match_by_channel_id(self):
adapter = _make_adapter()
adapter.config.extra = {"channel_prompts": {"100": "Research mode"}}
assert adapter._resolve_channel_prompt("100") == "Research mode"
def test_numeric_yaml_keys_normalized_at_config_load(self):
"""Numeric YAML keys are normalized to strings by config bridging.
The resolver itself expects string keys (config.py handles normalization),
so raw numeric keys will not match this is intentional.
"""
adapter = _make_adapter()
# Simulates post-bridging state: keys are already strings
adapter.config.extra = {"channel_prompts": {"100": "Research mode"}}
assert adapter._resolve_channel_prompt("100") == "Research mode"
# Pre-bridging numeric key would not match (bridging is responsible)
adapter.config.extra = {"channel_prompts": {100: "Research mode"}}
assert adapter._resolve_channel_prompt("100") is None
def test_match_by_parent_id(self):
adapter = _make_adapter()
adapter.config.extra = {"channel_prompts": {"200": "Forum prompt"}}
assert adapter._resolve_channel_prompt("999", parent_id="200") == "Forum prompt"
def test_exact_channel_overrides_parent(self):
adapter = _make_adapter()
adapter.config.extra = {
"channel_prompts": {
"999": "Thread override",
"200": "Forum prompt",
}
}
assert adapter._resolve_channel_prompt("999", parent_id="200") == "Thread override"
def test_build_message_event_sets_channel_prompt(self):
adapter = _make_adapter()
adapter.config.extra = {"channel_prompts": {"321": "Command prompt"}}
adapter.build_source = MagicMock(return_value=SimpleNamespace())
interaction = SimpleNamespace(
channel_id=321,
channel=SimpleNamespace(name="general", guild=None, parent_id=None),
user=SimpleNamespace(id=1, display_name="Brenner"),
)
adapter._get_effective_topic = MagicMock(return_value=None)
event = adapter._build_slash_event(interaction, "/retry")
assert event.channel_prompt == "Command prompt"
@pytest.mark.asyncio
async def test_dispatch_thread_session_inherits_parent_channel_prompt(self):
adapter = _make_adapter()
adapter.config.extra = {"channel_prompts": {"200": "Parent prompt"}}
adapter.build_source = MagicMock(return_value=SimpleNamespace())
adapter._get_effective_topic = MagicMock(return_value=None)
adapter.handle_message = AsyncMock()
interaction = SimpleNamespace(
guild=SimpleNamespace(name="Wetlands"),
channel=SimpleNamespace(id=200, parent=None),
user=SimpleNamespace(id=1, display_name="Brenner"),
)
await adapter._dispatch_thread_session(interaction, "999", "new-thread", "hello")
dispatched_event = adapter.handle_message.await_args.args[0]
assert dispatched_event.channel_prompt == "Parent prompt"
def test_blank_prompts_are_ignored(self):
adapter = _make_adapter()
adapter.config.extra = {"channel_prompts": {"100": " "}}
assert adapter._resolve_channel_prompt("100") is None
@pytest.mark.asyncio
async def test_retry_preserves_channel_prompt(monkeypatch):
runner = _make_runner()
runner.session_store = SimpleNamespace(
get_or_create_session=lambda source: SimpleNamespace(session_id="session-1", last_prompt_tokens=10),
load_transcript=lambda session_id: [
{"role": "user", "content": "original message"},
{"role": "assistant", "content": "old reply"},
],
rewrite_transcript=MagicMock(),
)
runner._handle_message = AsyncMock(return_value="ok")
event = MessageEvent(
text="/retry",
message_type=gateway_run.MessageType.COMMAND,
source=_make_source(),
raw_message=SimpleNamespace(),
channel_prompt="Channel prompt",
)
result = await runner._handle_retry_command(event)
assert result == "ok"
retried_event = runner._handle_message.await_args.args[0]
assert retried_event.channel_prompt == "Channel prompt"
@pytest.mark.asyncio
async def test_run_agent_appends_channel_prompt_to_ephemeral_system_prompt(monkeypatch, tmp_path):
_install_fake_agent(monkeypatch)
runner = _make_runner()
(tmp_path / "config.yaml").write_text("agent:\n system_prompt: Global prompt\n", encoding="utf-8")
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
monkeypatch.setattr(gateway_run, "_env_path", tmp_path / ".env")
monkeypatch.setattr(gateway_run, "load_dotenv", lambda *args, **kwargs: None)
monkeypatch.setattr(gateway_run, "_load_gateway_config", lambda: {})
monkeypatch.setattr(gateway_run, "_resolve_gateway_model", lambda config=None: "gpt-5.4")
monkeypatch.setattr(
gateway_run,
"_resolve_runtime_agent_kwargs",
lambda: {
"provider": "openrouter",
"api_mode": "chat_completions",
"base_url": "https://openrouter.ai/api/v1",
"api_key": "***",
},
)
import hermes_cli.tools_config as tools_config
monkeypatch.setattr(tools_config, "_get_platform_tools", lambda user_config, platform_key: {"core"})
_CapturingAgent.last_init = None
event = MessageEvent(
text="hi",
source=_make_source(),
message_id="m1",
channel_prompt="Channel prompt",
)
result = await runner._run_agent(
message="hi",
context_prompt="Context prompt",
history=[],
source=_make_source(),
session_id="session-1",
session_key="agent:main:discord:thread:12345",
channel_prompt=event.channel_prompt,
)
assert result["final_response"] == "ok"
assert _CapturingAgent.last_init["ephemeral_system_prompt"] == (
"Context prompt\n\nChannel prompt\n\nGlobal prompt"
)

View file

@ -459,7 +459,7 @@ class TestCustomProviderCompatibility:
migrate_config(interactive=False, quiet=True) migrate_config(interactive=False, quiet=True)
raw = yaml.safe_load(config_path.read_text(encoding="utf-8")) raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
assert raw["_config_version"] == 17 assert raw["_config_version"] == 18
assert raw["providers"]["openai-direct"] == { assert raw["providers"]["openai-direct"] == {
"api": "https://api.openai.com/v1", "api": "https://api.openai.com/v1",
"api_key": "test-key", "api_key": "test-key",
@ -606,6 +606,26 @@ class TestInterimAssistantMessageConfig:
migrate_config(interactive=False, quiet=True) migrate_config(interactive=False, quiet=True)
raw = yaml.safe_load(config_path.read_text(encoding="utf-8")) raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
assert raw["_config_version"] == 17 assert raw["_config_version"] == 18
assert raw["display"]["tool_progress"] == "off" assert raw["display"]["tool_progress"] == "off"
assert raw["display"]["interim_assistant_messages"] is True assert raw["display"]["interim_assistant_messages"] is True
class TestDiscordChannelPromptsConfig:
def test_default_config_includes_discord_channel_prompts(self):
assert DEFAULT_CONFIG["discord"]["channel_prompts"] == {}
def test_migrate_adds_discord_channel_prompts_default(self, tmp_path):
config_path = tmp_path / "config.yaml"
config_path.write_text(
yaml.safe_dump({"_config_version": 17, "discord": {"auto_thread": True}}),
encoding="utf-8",
)
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
migrate_config(interactive=False, quiet=True)
raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
assert raw["_config_version"] == 18
assert raw["discord"]["auto_thread"] is True
assert raw["discord"]["channel_prompts"] == {}

View file

@ -64,4 +64,4 @@ class TestCamofoxConfigDefaults:
# The current schema version is tracked globally; unrelated default # The current schema version is tracked globally; unrelated default
# options may bump it after browser defaults are added. # options may bump it after browser defaults are added.
assert DEFAULT_CONFIG["_config_version"] == 17 assert DEFAULT_CONFIG["_config_version"] == 18

View file

@ -82,6 +82,18 @@ SKILLS_DIR = HERMES_HOME / "skills"
MAX_NAME_LENGTH = 64 MAX_NAME_LENGTH = 64
MAX_DESCRIPTION_LENGTH = 1024 MAX_DESCRIPTION_LENGTH = 1024
def _is_local_skill(skill_path: Path) -> bool:
"""Check if a skill path is within the local SKILLS_DIR.
Skills found in external_dirs are read-only from the agent's perspective.
"""
try:
skill_path.resolve().relative_to(SKILLS_DIR.resolve())
return True
except ValueError:
return False
MAX_SKILL_CONTENT_CHARS = 100_000 # ~36k tokens at 2.75 chars/token MAX_SKILL_CONTENT_CHARS = 100_000 # ~36k tokens at 2.75 chars/token
MAX_SKILL_FILE_BYTES = 1_048_576 # 1 MiB per supporting file MAX_SKILL_FILE_BYTES = 1_048_576 # 1 MiB per supporting file
@ -360,6 +372,9 @@ def _edit_skill(name: str, content: str) -> Dict[str, Any]:
if not existing: if not existing:
return {"success": False, "error": f"Skill '{name}' not found. Use skills_list() to see available skills."} return {"success": False, "error": f"Skill '{name}' not found. Use skills_list() to see available skills."}
if not _is_local_skill(existing["path"]):
return {"success": False, "error": f"Skill '{name}' is in an external directory and cannot be modified. Copy it to your local skills directory first."}
skill_md = existing["path"] / "SKILL.md" skill_md = existing["path"] / "SKILL.md"
# Back up original content for rollback # Back up original content for rollback
original_content = skill_md.read_text(encoding="utf-8") if skill_md.exists() else None original_content = skill_md.read_text(encoding="utf-8") if skill_md.exists() else None
@ -400,6 +415,9 @@ def _patch_skill(
if not existing: if not existing:
return {"success": False, "error": f"Skill '{name}' not found."} return {"success": False, "error": f"Skill '{name}' not found."}
if not _is_local_skill(existing["path"]):
return {"success": False, "error": f"Skill '{name}' is in an external directory and cannot be modified. Copy it to your local skills directory first."}
skill_dir = existing["path"] skill_dir = existing["path"]
if file_path: if file_path:
@ -473,6 +491,9 @@ def _delete_skill(name: str) -> Dict[str, Any]:
if not existing: if not existing:
return {"success": False, "error": f"Skill '{name}' not found."} return {"success": False, "error": f"Skill '{name}' not found."}
if not _is_local_skill(existing["path"]):
return {"success": False, "error": f"Skill '{name}' is in an external directory and cannot be deleted."}
skill_dir = existing["path"] skill_dir = existing["path"]
shutil.rmtree(skill_dir) shutil.rmtree(skill_dir)
@ -515,6 +536,9 @@ def _write_file(name: str, file_path: str, file_content: str) -> Dict[str, Any]:
if not existing: if not existing:
return {"success": False, "error": f"Skill '{name}' not found. Create it first with action='create'."} return {"success": False, "error": f"Skill '{name}' not found. Create it first with action='create'."}
if not _is_local_skill(existing["path"]):
return {"success": False, "error": f"Skill '{name}' is in an external directory and cannot be modified. Copy it to your local skills directory first."}
target, err = _resolve_skill_target(existing["path"], file_path) target, err = _resolve_skill_target(existing["path"], file_path)
if err: if err:
return {"success": False, "error": err} return {"success": False, "error": err}
@ -548,6 +572,10 @@ def _remove_file(name: str, file_path: str) -> Dict[str, Any]:
existing = _find_skill(name) existing = _find_skill(name)
if not existing: if not existing:
return {"success": False, "error": f"Skill '{name}' not found."} return {"success": False, "error": f"Skill '{name}' not found."}
if not _is_local_skill(existing["path"]):
return {"success": False, "error": f"Skill '{name}' is in an external directory and cannot be modified."}
skill_dir = existing["path"] skill_dir = existing["path"]
target, err = _resolve_skill_target(skill_dir, file_path) target, err = _resolve_skill_target(skill_dir, file_path)

View file

@ -0,0 +1,164 @@
---
sidebar_position: 14
title: "AWS Bedrock"
description: "Use Hermes Agent with Amazon Bedrock — native Converse API, IAM authentication, Guardrails, and cross-region inference"
---
# AWS Bedrock
Hermes Agent supports Amazon Bedrock as a native provider using the **Converse API** — not the OpenAI-compatible endpoint. This gives you full access to the Bedrock ecosystem: IAM authentication, Guardrails, cross-region inference profiles, and all foundation models.
## Prerequisites
- **AWS credentials** — any source supported by the [boto3 credential chain](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html):
- IAM instance role (EC2, ECS, Lambda — zero config)
- `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` environment variables
- `AWS_PROFILE` for SSO or named profiles
- `aws configure` for local development
- **boto3** — install with `pip install hermes-agent[bedrock]`
- **IAM permissions** — at minimum:
- `bedrock:InvokeModel` and `bedrock:InvokeModelWithResponseStream` (for inference)
- `bedrock:ListFoundationModels` and `bedrock:ListInferenceProfiles` (for model discovery)
:::tip EC2 / ECS / Lambda
On AWS compute, attach an IAM role with `AmazonBedrockFullAccess` and you're done. No API keys, no `.env` configuration — Hermes detects the instance role automatically.
:::
## Quick Start
```bash
# Install with Bedrock support
pip install hermes-agent[bedrock]
# Select Bedrock as your provider
hermes model
# → Choose "More providers..." → "AWS Bedrock"
# → Select your region and model
# Start chatting
hermes chat
```
## Configuration
After running `hermes model`, your `~/.hermes/config.yaml` will contain:
```yaml
model:
default: us.anthropic.claude-sonnet-4-6
provider: bedrock
base_url: https://bedrock-runtime.us-east-2.amazonaws.com
bedrock:
region: us-east-2
```
### Region
Set the AWS region in any of these ways (highest priority first):
1. `bedrock.region` in `config.yaml`
2. `AWS_REGION` environment variable
3. `AWS_DEFAULT_REGION` environment variable
4. Default: `us-east-1`
### Guardrails
To apply [Amazon Bedrock Guardrails](https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails.html) to all model invocations:
```yaml
bedrock:
region: us-east-2
guardrail:
guardrail_identifier: "abc123def456" # From the Bedrock console
guardrail_version: "1" # Version number or "DRAFT"
stream_processing_mode: "async" # "sync" or "async"
trace: "disabled" # "enabled", "disabled", or "enabled_full"
```
### Model Discovery
Hermes auto-discovers available models via the Bedrock control plane. You can customize discovery:
```yaml
bedrock:
discovery:
enabled: true
provider_filter: ["anthropic", "amazon"] # Only show these providers
refresh_interval: 3600 # Cache for 1 hour
```
## Available Models
Bedrock models use **inference profile IDs** for on-demand invocation. The `hermes model` picker shows these automatically, with recommended models at the top:
| Model | ID | Notes |
|-------|-----|-------|
| Claude Sonnet 4.6 | `us.anthropic.claude-sonnet-4-6` | Recommended — best balance of speed and capability |
| Claude Opus 4.6 | `us.anthropic.claude-opus-4-6-v1` | Most capable |
| Claude Haiku 4.5 | `us.anthropic.claude-haiku-4-5-20251001-v1:0` | Fastest Claude |
| Amazon Nova Pro | `us.amazon.nova-pro-v1:0` | Amazon's flagship |
| Amazon Nova Micro | `us.amazon.nova-micro-v1:0` | Fastest, cheapest |
| DeepSeek V3.2 | `deepseek.v3.2` | Strong open model |
| Llama 4 Scout 17B | `us.meta.llama4-scout-17b-instruct-v1:0` | Meta's latest |
:::info Cross-Region Inference
Models prefixed with `us.` use cross-region inference profiles, which provide better capacity and automatic failover across AWS regions. Models prefixed with `global.` route across all available regions worldwide.
:::
## Switching Models Mid-Session
Use the `/model` command during a conversation:
```
/model us.amazon.nova-pro-v1:0
/model deepseek.v3.2
/model us.anthropic.claude-opus-4-6-v1
```
## Diagnostics
```bash
hermes doctor
```
The doctor checks:
- Whether AWS credentials are available (env vars, IAM role, SSO)
- Whether `boto3` is installed
- Whether the Bedrock API is reachable (ListFoundationModels)
- Number of available models in your region
## Gateway (Messaging Platforms)
Bedrock works with all Hermes gateway platforms (Telegram, Discord, Slack, Feishu, etc.). Configure Bedrock as your provider, then start the gateway normally:
```bash
hermes gateway setup
hermes gateway start
```
The gateway reads `config.yaml` and uses the same Bedrock provider configuration.
## Troubleshooting
### "No API key found" / "No AWS credentials"
Hermes checks for credentials in this order:
1. `AWS_BEARER_TOKEN_BEDROCK`
2. `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY`
3. `AWS_PROFILE`
4. EC2 instance metadata (IMDS)
5. ECS container credentials
6. Lambda execution role
If none are found, run `aws configure` or attach an IAM role to your compute instance.
### "Invocation of model ID ... with on-demand throughput isn't supported"
Use an **inference profile ID** (prefixed with `us.` or `global.`) instead of the bare foundation model ID. For example:
- ❌ `anthropic.claude-sonnet-4-6`
- ✅ `us.anthropic.claude-sonnet-4-6`
### "ThrottlingException"
You've hit the Bedrock per-model rate limit. Hermes automatically retries with backoff. To increase limits, request a quota increase in the [AWS Service Quotas console](https://console.aws.amazon.com/servicequotas/).

View file

@ -297,6 +297,7 @@ discord:
reactions: true # Add emoji reactions during processing reactions: true # Add emoji reactions during processing
ignored_channels: [] # Channel IDs where bot never responds ignored_channels: [] # Channel IDs where bot never responds
no_thread_channels: [] # Channel IDs where bot responds without threading no_thread_channels: [] # Channel IDs where bot responds without threading
channel_prompts: {} # Per-channel ephemeral system prompts
# Session isolation (applies to all gateway platforms, not just Discord) # Session isolation (applies to all gateway platforms, not just Discord)
group_sessions_per_user: true # Isolate sessions per user in shared channels group_sessions_per_user: true # Isolate sessions per user in shared channels
@ -381,6 +382,28 @@ discord:
Useful for channels dedicated to bot interaction where threads would add unnecessary noise. Useful for channels dedicated to bot interaction where threads would add unnecessary noise.
#### `discord.channel_prompts`
**Type:** mapping — **Default:** `{}`
Per-channel ephemeral system prompts that are injected on every turn in the matching Discord channel or thread without being persisted to transcript history.
```yaml
discord:
channel_prompts:
"1234567890": |
This channel is for research tasks. Prefer deep comparisons,
citations, and concise synthesis.
"9876543210": |
This forum is for therapy-style support. Be warm, grounded,
and non-judgmental.
```
Behavior:
- Exact thread/channel ID matches win.
- If a message arrives inside a thread or forum post and that thread has no explicit entry, Hermes falls back to the parent channel/forum ID.
- Prompts are applied ephemerally at runtime, so changing them affects future turns immediately without rewriting past session history.
#### `group_sessions_per_user` #### `group_sessions_per_user`
**Type:** boolean — **Default:** `true` **Type:** boolean — **Default:** `true`

View file

@ -281,6 +281,23 @@ If this returns your bot's user info, the token is valid. If it returns an error
**Fix**: Add your User ID to `MATTERMOST_ALLOWED_USERS` in `~/.hermes/.env` and restart the gateway. Remember: the User ID is a 26-character alphanumeric string, not your `@username`. **Fix**: Add your User ID to `MATTERMOST_ALLOWED_USERS` in `~/.hermes/.env` and restart the gateway. Remember: the User ID is a 26-character alphanumeric string, not your `@username`.
## Per-Channel Prompts
Assign ephemeral system prompts to specific Mattermost channels. The prompt is injected at runtime on every turn — never persisted to transcript history — so changes take effect immediately.
```yaml
mattermost:
channel_prompts:
"channel_id_abc123": |
You are a research assistant. Focus on academic sources,
citations, and concise synthesis.
"channel_id_def456": |
Code review mode. Be precise about edge cases and
performance implications.
```
Keys are Mattermost channel IDs (find them in the channel URL or via the API). All messages in the matching channel get the prompt injected as an ephemeral system instruction.
## Security ## Security
:::warning :::warning

View file

@ -418,6 +418,23 @@ Hermes supports voice on Slack:
--- ---
## Per-Channel Prompts
Assign ephemeral system prompts to specific Slack channels. The prompt is injected at runtime on every turn — never persisted to transcript history — so changes take effect immediately.
```yaml
slack:
channel_prompts:
"C01RESEARCH": |
You are a research assistant. Focus on academic sources,
citations, and concise synthesis.
"C02ENGINEERING": |
Code review mode. Be precise about edge cases and
performance implications.
```
Keys are Slack channel IDs (find them via channel details → "About" → scroll to bottom). All messages in the matching channel get the prompt injected as an ephemeral system instruction.
## Troubleshooting ## Troubleshooting
| Problem | Solution | | Problem | Solution |

View file

@ -526,6 +526,29 @@ Unlike Discord (where reactions are additive), Telegram's Bot API replaces all b
If the bot doesn't have permission to add reactions in a group, the reaction calls fail silently and message processing continues normally. If the bot doesn't have permission to add reactions in a group, the reaction calls fail silently and message processing continues normally.
::: :::
## Per-Channel Prompts
Assign ephemeral system prompts to specific Telegram groups or forum topics. The prompt is injected at runtime on every turn — never persisted to transcript history — so changes take effect immediately.
```yaml
telegram:
channel_prompts:
"-1001234567890": |
You are a research assistant. Focus on academic sources,
citations, and concise synthesis.
"42": |
This topic is for creative writing feedback. Be warm and
constructive.
```
Keys are chat IDs (groups/supergroups) or forum topic IDs. For forum groups, topic-level prompts override the group-level prompt:
- Message in topic `42` inside group `-1001234567890` → uses topic `42`'s prompt
- Message in topic `99` (no explicit entry) → falls back to group `-1001234567890`'s prompt
- Message in a group with no entry → no channel prompt applied
Numeric YAML keys are automatically normalized to strings.
## Troubleshooting ## Troubleshooting
| Problem | Solution | | Problem | Solution |