Merge branch 'main' of github.com:NousResearch/hermes-agent into feat/ink-refactor

2026-04-25 00:51:20 +00:00 · 2026-04-15 19:11:07 -05:00 · 2026-04-15 19:11:07 -05:00 · 097702c8a7
commit 097702c8a7
parent 72aebfbb24 e402906d48
55 changed files with 4904 additions and 51 deletions
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@ -298,6 +298,33 @@ def build_anthropic_client(api_key: str, base_url: str = None):
    return _anthropic_sdk.Anthropic(**kwargs)
 def build_anthropic_bedrock_client(region: str):
    """Create an AnthropicBedrock client for Bedrock Claude models.
    Uses the Anthropic SDK's native Bedrock adapter, which provides full
    Claude feature parity: prompt caching, thinking budgets, adaptive
    thinking, fast mode — features not available via the Converse API.
    Auth uses the boto3 default credential chain (IAM roles, SSO, env vars).
    """
    if _anthropic_sdk is None:
        raise ImportError(
            "The 'anthropic' package is required for the Bedrock provider. "
            "Install it with: pip install 'anthropic>=0.39.0'"
        )
    if not hasattr(_anthropic_sdk, "AnthropicBedrock"):
        raise ImportError(
            "anthropic.AnthropicBedrock not available. "
            "Upgrade with: pip install 'anthropic>=0.39.0'"
        )
    from httpx import Timeout
    return _anthropic_sdk.AnthropicBedrock(
        aws_region=region,
        timeout=Timeout(timeout=900.0, connect=10.0),
    )
 def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
    """Read refreshable Claude Code OAuth credentials from ~/.claude/.credentials.json.
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -775,6 +775,21 @@ def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
 def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
    # Check cross-session rate limit guard before attempting Nous —
    # if another session already recorded a 429, skip Nous entirely
    # to avoid piling more requests onto the tapped RPH bucket.
    try:
        from agent.nous_rate_guard import nous_rate_limit_remaining
        _remaining = nous_rate_limit_remaining()
        if _remaining is not None and _remaining > 0:
            logger.debug(
                "Auxiliary: skipping Nous Portal (rate-limited, resets in %.0fs)",
                _remaining,
            )
            return None, None
    except Exception:
        pass
    nous = _read_nous_auth()
    if not nous:
        return None, None
@ -899,6 +914,51 @@ def _current_custom_base_url() -> str:
    return custom_base or ""
 def _validate_proxy_env_urls() -> None:
    """Fail fast with a clear error when proxy env vars have malformed URLs.
    Common cause: shell config (e.g. .zshrc) with a typo like
    ``export HTTP_PROXY=http://127.0.0.1:6153export NEXT_VAR=...``
    which concatenates 'export' into the port number.  Without this
    check the OpenAI/httpx client raises a cryptic ``Invalid port``
    error that doesn't name the offending env var.
    """
    from urllib.parse import urlparse
    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
                "https_proxy", "http_proxy", "all_proxy"):
        value = str(os.environ.get(key) or "").strip()
        if not value:
            continue
        try:
            parsed = urlparse(value)
            if parsed.scheme:
                _ = parsed.port          # raises ValueError for e.g. '6153export'
        except ValueError as exc:
            raise RuntimeError(
                f"Malformed proxy environment variable {key}={value!r}. "
                "Fix or unset your proxy settings and try again."
            ) from exc
 def _validate_base_url(base_url: str) -> None:
    """Reject obviously broken custom endpoint URLs before they reach httpx."""
    from urllib.parse import urlparse
    candidate = str(base_url or "").strip()
    if not candidate or candidate.startswith("acp://"):
        return
    try:
        parsed = urlparse(candidate)
        if parsed.scheme in {"http", "https"}:
            _ = parsed.port              # raises ValueError for malformed ports
    except ValueError as exc:
        raise RuntimeError(
            f"Malformed custom endpoint URL: {candidate!r}. "
            "Run `hermes setup` or `hermes model` and enter a valid http(s) base URL."
        ) from exc
 def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
    runtime = _resolve_custom_runtime()
    if len(runtime) == 2:
@ -1299,6 +1359,7 @@ def resolve_provider_client(
    Returns:
        (client, resolved_model) or (None, None) if auth is unavailable.
    """
    _validate_proxy_env_urls()
    # Normalise aliases
    provider = _normalize_aux_provider(provider)
--- a/agent/bedrock_adapter.py
+++ b/agent/bedrock_adapter.py
--- a/agent/error_classifier.py
+++ b/agent/error_classifier.py
@ -112,6 +112,10 @@ _RATE_LIMIT_PATTERNS = [
    "please retry after",
    "resource_exhausted",
    "rate increased too quickly",  # Alibaba/DashScope throttling
    # AWS Bedrock throttling
    "throttlingexception",
    "too many concurrent requests",
    "servicequotaexceededexception",
 ]
 # Usage-limit patterns that need disambiguation (could be billing OR rate_limit)
@ -171,6 +175,11 @@ _CONTEXT_OVERFLOW_PATTERNS = [
    # Chinese error messages (some providers return these)
    "超过最大长度",
    "上下文长度",
    # AWS Bedrock Converse API error patterns
    "input is too long",
    "max input token",
    "input token",
    "exceeds the maximum number of input tokens",
 ]
 # Model not found patterns
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@ -1012,6 +1012,16 @@ def get_model_context_length(
        if ctx:
            return ctx
    # 4b. AWS Bedrock — use static context length table.
    # Bedrock's ListFoundationModels doesn't expose context window sizes,
    # so we maintain a curated table in bedrock_adapter.py.
    if provider == "bedrock" or (base_url and "bedrock-runtime" in base_url):
        try:
            from agent.bedrock_adapter import get_bedrock_context_length
            return get_bedrock_context_length(model)
        except ImportError:
            pass  # boto3 not installed — fall through to generic resolution
    # 5. Provider-aware lookups (before generic OpenRouter cache)
    # These are provider-specific and take priority over the generic OR cache,
    # since the same model can have different context limits per provider
--- a/agent/nous_rate_guard.py
+++ b/agent/nous_rate_guard.py
@ -0,0 +1,182 @@
 """Cross-session rate limit guard for Nous Portal.
 Writes rate limit state to a shared file so all sessions (CLI, gateway,
 cron, auxiliary) can check whether Nous Portal is currently rate-limited
 before making requests.  Prevents retry amplification when RPH is tapped.
 Each 429 from Nous triggers up to 9 API calls per conversation turn
 (3 SDK retries x 3 Hermes retries), and every one of those calls counts
 against RPH.  By recording the rate limit state on first 429 and checking
 it before subsequent attempts, we eliminate the amplification effect.
 """
 from __future__ import annotations
 import json
 import logging
 import os
 import tempfile
 import time
 from typing import Any, Mapping, Optional
 logger = logging.getLogger(__name__)
 _STATE_SUBDIR = "rate_limits"
 _STATE_FILENAME = "nous.json"
 def _state_path() -> str:
    """Return the path to the Nous rate limit state file."""
    try:
        from hermes_constants import get_hermes_home
        base = get_hermes_home()
    except ImportError:
        base = os.path.join(os.path.expanduser("~"), ".hermes")
    return os.path.join(base, _STATE_SUBDIR, _STATE_FILENAME)
 def _parse_reset_seconds(headers: Optional[Mapping[str, str]]) -> Optional[float]:
    """Extract the best available reset-time estimate from response headers.
    Priority:
      1. x-ratelimit-reset-requests-1h  (hourly RPH window — most useful)
      2. x-ratelimit-reset-requests     (per-minute RPM window)
      3. retry-after                     (generic HTTP header)
    Returns seconds-from-now, or None if no usable header found.
    """
    if not headers:
        return None
    lowered = {k.lower(): v for k, v in headers.items()}
    for key in (
        "x-ratelimit-reset-requests-1h",
        "x-ratelimit-reset-requests",
        "retry-after",
    ):
        raw = lowered.get(key)
        if raw is not None:
            try:
                val = float(raw)
                if val > 0:
                    return val
            except (TypeError, ValueError):
                pass
    return None
 def record_nous_rate_limit(
    *,
    headers: Optional[Mapping[str, str]] = None,
    error_context: Optional[dict[str, Any]] = None,
    default_cooldown: float = 300.0,
 ) -> None:
    """Record that Nous Portal is rate-limited.
    Parses the reset time from response headers or error context.
    Falls back to ``default_cooldown`` (5 minutes) if no reset info
    is available.  Writes to a shared file that all sessions can read.
    Args:
        headers: HTTP response headers from the 429 error.
        error_context: Structured error context from _extract_api_error_context().
        default_cooldown: Fallback cooldown in seconds when no header data.
    """
    now = time.time()
    reset_at = None
    # Try headers first (most accurate)
    header_seconds = _parse_reset_seconds(headers)
    if header_seconds is not None:
        reset_at = now + header_seconds
    # Try error_context reset_at (from body parsing)
    if reset_at is None and isinstance(error_context, dict):
        ctx_reset = error_context.get("reset_at")
        if isinstance(ctx_reset, (int, float)) and ctx_reset > now:
            reset_at = float(ctx_reset)
    # Default cooldown
    if reset_at is None:
        reset_at = now + default_cooldown
    path = _state_path()
    try:
        state_dir = os.path.dirname(path)
        os.makedirs(state_dir, exist_ok=True)
        state = {
            "reset_at": reset_at,
            "recorded_at": now,
            "reset_seconds": reset_at - now,
        }
        # Atomic write: write to temp file + rename
        fd, tmp_path = tempfile.mkstemp(dir=state_dir, suffix=".tmp")
        try:
            with os.fdopen(fd, "w") as f:
                json.dump(state, f)
            os.replace(tmp_path, path)
        except Exception:
            # Clean up temp file on failure
            try:
                os.unlink(tmp_path)
            except OSError:
                pass
            raise
        logger.info(
            "Nous rate limit recorded: resets in %.0fs (at %.0f)",
            reset_at - now, reset_at,
        )
    except Exception as exc:
        logger.debug("Failed to write Nous rate limit state: %s", exc)
 def nous_rate_limit_remaining() -> Optional[float]:
    """Check if Nous Portal is currently rate-limited.
    Returns:
        Seconds remaining until reset, or None if not rate-limited.
    """
    path = _state_path()
    try:
        with open(path) as f:
            state = json.load(f)
        reset_at = state.get("reset_at", 0)
        remaining = reset_at - time.time()
        if remaining > 0:
            return remaining
        # Expired — clean up
        try:
            os.unlink(path)
        except OSError:
            pass
        return None
    except (FileNotFoundError, json.JSONDecodeError, KeyError, TypeError):
        return None
 def clear_nous_rate_limit() -> None:
    """Clear the rate limit state (e.g., after a successful Nous request)."""
    try:
        os.unlink(_state_path())
    except FileNotFoundError:
        pass
    except OSError as exc:
        logger.debug("Failed to clear Nous rate limit state: %s", exc)
 def format_remaining(seconds: float) -> str:
    """Format seconds remaining into human-readable duration."""
    s = max(0, int(seconds))
    if s < 60:
        return f"{s}s"
    if s < 3600:
        m, sec = divmod(s, 60)
        return f"{m}m {sec}s" if sec else f"{m}m"
    h, remainder = divmod(s, 3600)
    m = remainder // 60
    return f"{h}h {m}m" if m else f"{h}h"
--- a/agent/redact.py
+++ b/agent/redact.py
@ -93,6 +93,17 @@ _DB_CONNSTR_RE = re.compile(
    re.IGNORECASE,
 )
 # JWT tokens: header.payload[.signature] — always start with "eyJ" (base64 for "{")
 # Matches 1-part (header only), 2-part (header.payload), and full 3-part JWTs.
 _JWT_RE = re.compile(
    r"eyJ[A-Za-z0-9_-]{10,}"           # Header (always starts with eyJ)
    r"(?:\.[A-Za-z0-9_=-]{4,}){0,2}"   # Optional payload and/or signature
 )
 # Discord user/role mentions: <@123456789012345678> or <@!123456789012345678>
 # Snowflake IDs are 17-20 digit integers that resolve to specific Discord accounts.
 _DISCORD_MENTION_RE = re.compile(r"<@!?(\d{17,20})>")
 # E.164 phone numbers: +<country><number>, 7-15 digits
 # Negative lookahead prevents matching hex strings or identifiers
 _SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])")
@ -159,6 +170,12 @@ def redact_sensitive_text(text: str) -> str:
    # Database connection string passwords
    text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)
    # JWT tokens (eyJ... — base64-encoded JSON headers)
    text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)
    # Discord user/role mentions (<@snowflake_id>)
    text = _DISCORD_MENTION_RE.sub(lambda m: f"<@{'!' if '!' in m.group(0) else ''}***>", text)
    # E.164 phone numbers (Signal, WhatsApp)
    def _redact_phone(m):
        phone = m.group(1)
--- a/agent/usage_pricing.py
+++ b/agent/usage_pricing.py
@ -284,6 +284,80 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
        source_url="https://ai.google.dev/pricing",
        pricing_version="google-pricing-2026-03-16",
    ),
    # AWS Bedrock — pricing per the Bedrock pricing page.
    # Bedrock charges the same per-token rates as the model provider but
    # through AWS billing.  These are the on-demand prices (no commitment).
    # Source: https://aws.amazon.com/bedrock/pricing/
    (
        "bedrock",
        "anthropic.claude-opus-4-6",
    ): PricingEntry(
        input_cost_per_million=Decimal("15.00"),
        output_cost_per_million=Decimal("75.00"),
        source="official_docs_snapshot",
        source_url="https://aws.amazon.com/bedrock/pricing/",
        pricing_version="bedrock-pricing-2026-04",
    ),
    (
        "bedrock",
        "anthropic.claude-sonnet-4-6",
    ): PricingEntry(
        input_cost_per_million=Decimal("3.00"),
        output_cost_per_million=Decimal("15.00"),
        source="official_docs_snapshot",
        source_url="https://aws.amazon.com/bedrock/pricing/",
        pricing_version="bedrock-pricing-2026-04",
    ),
    (
        "bedrock",
        "anthropic.claude-sonnet-4-5",
    ): PricingEntry(
        input_cost_per_million=Decimal("3.00"),
        output_cost_per_million=Decimal("15.00"),
        source="official_docs_snapshot",
        source_url="https://aws.amazon.com/bedrock/pricing/",
        pricing_version="bedrock-pricing-2026-04",
    ),
    (
        "bedrock",
        "anthropic.claude-haiku-4-5",
    ): PricingEntry(
        input_cost_per_million=Decimal("0.80"),
        output_cost_per_million=Decimal("4.00"),
        source="official_docs_snapshot",
        source_url="https://aws.amazon.com/bedrock/pricing/",
        pricing_version="bedrock-pricing-2026-04",
    ),
    (
        "bedrock",
        "amazon.nova-pro",
    ): PricingEntry(
        input_cost_per_million=Decimal("0.80"),
        output_cost_per_million=Decimal("3.20"),
        source="official_docs_snapshot",
        source_url="https://aws.amazon.com/bedrock/pricing/",
        pricing_version="bedrock-pricing-2026-04",
    ),
    (
        "bedrock",
        "amazon.nova-lite",
    ): PricingEntry(
        input_cost_per_million=Decimal("0.06"),
        output_cost_per_million=Decimal("0.24"),
        source="official_docs_snapshot",
        source_url="https://aws.amazon.com/bedrock/pricing/",
        pricing_version="bedrock-pricing-2026-04",
    ),
    (
        "bedrock",
        "amazon.nova-micro",
    ): PricingEntry(
        input_cost_per_million=Decimal("0.035"),
        output_cost_per_million=Decimal("0.14"),
        source="official_docs_snapshot",
        source_url="https://aws.amazon.com/bedrock/pricing/",
        pricing_version="bedrock-pricing-2026-04",
    ),
 }
--- a/gateway/config.py
+++ b/gateway/config.py
@ -554,6 +554,12 @@ def load_gateway_config() -> GatewayConfig:
                    bridged["mention_patterns"] = platform_cfg["mention_patterns"]
                if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
                    bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
                if "channel_prompts" in platform_cfg:
                    channel_prompts = platform_cfg["channel_prompts"]
                    if isinstance(channel_prompts, dict):
                        bridged["channel_prompts"] = {str(k): v for k, v in channel_prompts.items()}
                    else:
                        bridged["channel_prompts"] = channel_prompts
                if not bridged:
                    continue
                plat_data = platforms_data.setdefault(plat.value, {})
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@ -683,6 +683,10 @@ class MessageEvent:
    # Discord channel_skill_bindings).  A single name or ordered list.
    auto_skill: Optional[str | list[str]] = None
    # Per-channel ephemeral system prompt (e.g. Discord channel_prompts).
    # Applied at API call time and never persisted to transcript history.
    channel_prompt: Optional[str] = None
    # Internal flag — set for synthetic events (e.g. background process
    # completion notifications) that must bypass user authorization checks.
    internal: bool = False
@ -776,6 +780,36 @@ _RETRYABLE_ERROR_PATTERNS = (
 MessageHandler = Callable[[MessageEvent], Awaitable[Optional[str]]]
 def resolve_channel_prompt(
    config_extra: dict,
    channel_id: str,
    parent_id: str | None = None,
 ) -> str | None:
    """Resolve a per-channel ephemeral prompt from platform config.
    Looks up ``channel_prompts`` in the adapter's ``config.extra`` dict.
    Prefers an exact match on *channel_id*; falls back to *parent_id*
    (useful for forum threads / child channels inheriting a parent prompt).
    Returns the prompt string, or None if no match is found.  Blank/whitespace-
    only prompts are treated as absent.
    """
    prompts = config_extra.get("channel_prompts") or {}
    if not isinstance(prompts, dict):
        return None
    for key in (channel_id, parent_id):
        if not key:
            continue
        prompt = prompts.get(key)
        if prompt is None:
            continue
        prompt = str(prompt).strip()
        if prompt:
            return prompt
    return None
 class BasePlatformAdapter(ABC):
    """
    Base class for platform adapters.
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@ -1992,11 +1992,14 @@ class DiscordAdapter(BasePlatformAdapter):
        )
        msg_type = MessageType.COMMAND if text.startswith("/") else MessageType.TEXT
        channel_id = str(interaction.channel_id)
        parent_id = str(getattr(getattr(interaction, "channel", None), "parent_id", "") or "")
        return MessageEvent(
            text=text,
            message_type=msg_type,
            source=source,
            raw_message=interaction,
            channel_prompt=self._resolve_channel_prompt(channel_id, parent_id or None),
        )
    # ------------------------------------------------------------------
@ -2067,14 +2070,17 @@ class DiscordAdapter(BasePlatformAdapter):
            chat_topic=chat_topic,
        )
-        _parent_id = str(getattr(getattr(interaction, "channel", None), "parent_id", "") or "")
+        _parent_channel = self._thread_parent_channel(getattr(interaction, "channel", None))
        _parent_id = str(getattr(_parent_channel, "id", "") or "")
        _skills = self._resolve_channel_skills(thread_id, _parent_id or None)
        _channel_prompt = self._resolve_channel_prompt(thread_id, _parent_id or None)
        event = MessageEvent(
            text=text,
            message_type=MessageType.TEXT,
            source=source,
            raw_message=interaction,
            auto_skill=_skills,
            channel_prompt=_channel_prompt,
        )
        await self.handle_message(event)
@ -2103,6 +2109,11 @@ class DiscordAdapter(BasePlatformAdapter):
                    return list(dict.fromkeys(skills))  # dedup, preserve order
        return None
    def _resolve_channel_prompt(self, channel_id: str, parent_id: str | None = None) -> str | None:
        """Resolve a Discord per-channel prompt, preferring the exact channel over its parent."""
        from gateway.platforms.base import resolve_channel_prompt
        return resolve_channel_prompt(self.config.extra, channel_id, parent_id)
    def _thread_parent_channel(self, channel: Any) -> Any:
        """Return the parent text channel when invoked from a thread."""
        return getattr(channel, "parent", None) or channel
@ -2654,6 +2665,7 @@ class DiscordAdapter(BasePlatformAdapter):
        _parent_id = str(getattr(_chan, "parent_id", "") or "")
        _chan_id = str(getattr(_chan, "id", ""))
        _skills = self._resolve_channel_skills(_chan_id, _parent_id or None)
        _channel_prompt = self._resolve_channel_prompt(_chan_id, _parent_id or None)
        reply_to_id = None
        reply_to_text = None
@ -2674,6 +2686,7 @@ class DiscordAdapter(BasePlatformAdapter):
            reply_to_text=reply_to_text,
            timestamp=message.created_at,
            auto_skill=_skills,
            channel_prompt=_channel_prompt,
        )
        # Track thread participation so the bot won't require @mention for
--- a/gateway/platforms/mattermost.py
+++ b/gateway/platforms/mattermost.py
@ -718,6 +718,12 @@ class MattermostAdapter(BasePlatformAdapter):
            thread_id=thread_id,
        )
        # Per-channel ephemeral prompt
        from gateway.platforms.base import resolve_channel_prompt
        _channel_prompt = resolve_channel_prompt(
            self.config.extra, channel_id, None,
        )
        msg_event = MessageEvent(
            text=message_text,
            message_type=msg_type,
@ -726,6 +732,7 @@ class MattermostAdapter(BasePlatformAdapter):
            message_id=post_id,
            media_urls=media_urls if media_urls else None,
            media_types=media_types if media_types else None,
            channel_prompt=_channel_prompt,
        )
        await self.handle_message(msg_event)
--- a/gateway/platforms/slack.py
+++ b/gateway/platforms/slack.py
@ -1167,6 +1167,12 @@ class SlackAdapter(BasePlatformAdapter):
            thread_id=thread_ts,
        )
        # Per-channel ephemeral prompt
        from gateway.platforms.base import resolve_channel_prompt
        _channel_prompt = resolve_channel_prompt(
            self.config.extra, channel_id, None,
        )
        msg_event = MessageEvent(
            text=text,
            message_type=msg_type,
@ -1176,6 +1182,7 @@ class SlackAdapter(BasePlatformAdapter):
            media_urls=media_urls,
            media_types=media_types,
            reply_to_message_id=thread_ts if thread_ts != ts else None,
            channel_prompt=_channel_prompt,
        )
        # Only react when bot is directly addressed (DM or @mention).
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@ -2775,6 +2775,15 @@ class TelegramAdapter(BasePlatformAdapter):
            reply_to_id = str(message.reply_to_message.message_id)
            reply_to_text = message.reply_to_message.text or message.reply_to_message.caption or None
        # Per-channel/topic ephemeral prompt
        from gateway.platforms.base import resolve_channel_prompt
        _chat_id_str = str(chat.id)
        _channel_prompt = resolve_channel_prompt(
            self.config.extra,
            thread_id_str or _chat_id_str,
            _chat_id_str if thread_id_str else None,
        )
        return MessageEvent(
            text=message.text or "",
            message_type=msg_type,
@ -2784,6 +2793,7 @@ class TelegramAdapter(BasePlatformAdapter):
            reply_to_message_id=reply_to_id,
            reply_to_text=reply_to_text,
            auto_skill=topic_skill,
            channel_prompt=_channel_prompt,
            timestamp=message.date,
        )
--- a/gateway/run.py
+++ b/gateway/run.py
@ -2891,6 +2891,7 @@ class GatewayRunner:
                        message_type=_MT.TEXT,
                        source=event.source,
                        message_id=event.message_id,
                        channel_prompt=event.channel_prompt,
                    )
                    adapter._pending_messages[_quick_key] = queued_event
                return "Queued for the next turn."
@ -3875,6 +3876,7 @@ class GatewayRunner:
                session_id=session_entry.session_id,
                session_key=session_key,
                event_message_id=event.message_id,
                channel_prompt=event.channel_prompt,
            )
            # Stop persistent typing indicator now that the agent is done
@ -5186,6 +5188,7 @@ class GatewayRunner:
            message_type=MessageType.TEXT,
            source=source,
            raw_message=event.raw_message,
            channel_prompt=event.channel_prompt,
        )
        # Let the normal message handler process it
@ -8166,6 +8169,7 @@ class GatewayRunner:
        session_key: str = None,
        _interrupt_depth: int = 0,
        event_message_id: Optional[str] = None,
        channel_prompt: Optional[str] = None,
    ) -> Dict[str, Any]:
        """
        Run the agent with the given message and context.
@ -8520,8 +8524,12 @@ class GatewayRunner:
            # Platform.LOCAL ("local") maps to "cli"; others pass through as-is.
            platform_key = "cli" if source.platform == Platform.LOCAL else source.platform.value
-            # Combine platform context with user-configured ephemeral system prompt
+            # Combine platform context, per-channel context, and the user-configured
            # ephemeral system prompt.
            combined_ephemeral = context_prompt or ""
            event_channel_prompt = (channel_prompt or "").strip()
            if event_channel_prompt:
                combined_ephemeral = (combined_ephemeral + "\n\n" + event_channel_prompt).strip()
            if self._ephemeral_system_prompt:
                combined_ephemeral = (combined_ephemeral + "\n\n" + self._ephemeral_system_prompt).strip()
@ -9473,6 +9481,7 @@ class GatewayRunner:
                    session_key=session_key,
                    _interrupt_depth=_interrupt_depth + 1,
                    event_message_id=next_message_id,
                    channel_prompt=pending_event.channel_prompt,
                )
        finally:
            # Stop progress sender, interrupt monitor, and notification task
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@ -274,6 +274,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("XIAOMI_API_KEY",),
        base_url_env_var="XIAOMI_BASE_URL",
    ),
    "bedrock": ProviderConfig(
        id="bedrock",
        name="AWS Bedrock",
        auth_type="aws_sdk",
        inference_base_url="https://bedrock-runtime.us-east-1.amazonaws.com",
        api_key_env_vars=(),
        base_url_env_var="BEDROCK_BASE_URL",
    ),
 }
@ -924,6 +932,7 @@ def resolve_provider(
        "qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth",
        "hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
        "mimo": "xiaomi", "xiaomi-mimo": "xiaomi",
        "aws": "bedrock", "aws-bedrock": "bedrock", "amazon-bedrock": "bedrock", "amazon": "bedrock",
        "go": "opencode-go", "opencode-go-sub": "opencode-go",
        "kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
        # Local server aliases — route through the generic custom provider
@ -980,6 +989,15 @@ def resolve_provider(
            if has_usable_secret(os.getenv(env_var, "")):
                return pid
    # AWS Bedrock — detect via boto3 credential chain (IAM roles, SSO, env vars).
    # This runs after API-key providers so explicit keys always win.
    try:
        from agent.bedrock_adapter import has_aws_credentials
        if has_aws_credentials():
            return "bedrock"
    except ImportError:
        pass  # boto3 not installed — skip Bedrock auto-detection
    raise AuthError(
        "No inference provider configured. Run 'hermes model' to choose a "
        "provider and model, or set an API key (OPENROUTER_API_KEY, "
@ -2446,6 +2464,13 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
    pconfig = PROVIDER_REGISTRY.get(target)
    if pconfig and pconfig.auth_type == "api_key":
        return get_api_key_provider_status(target)
    # AWS SDK providers (Bedrock) — check via boto3 credential chain
    if pconfig and pconfig.auth_type == "aws_sdk":
        try:
            from agent.bedrock_adapter import has_aws_credentials
            return {"logged_in": has_aws_credentials(), "provider": target}
        except ImportError:
            return {"logged_in": False, "provider": target, "error": "boto3 not installed"}
    return {"logged_in": False}
--- a/hermes_cli/auth_commands.py
+++ b/hermes_cli/auth_commands.py
@ -368,6 +368,27 @@ def _interactive_auth() -> None:
    print("=" * 50)
    auth_list_command(SimpleNamespace(provider=None))
    # Show AWS Bedrock credential status (not in the pool — uses boto3 chain)
    try:
        from agent.bedrock_adapter import has_aws_credentials, resolve_aws_auth_env_var, resolve_bedrock_region
        if has_aws_credentials():
            auth_source = resolve_aws_auth_env_var() or "unknown"
            region = resolve_bedrock_region()
            print(f"bedrock (AWS SDK credential chain):")
            print(f"  Auth: {auth_source}")
            print(f"  Region: {region}")
            try:
                import boto3
                sts = boto3.client("sts", region_name=region)
                identity = sts.get_caller_identity()
                arn = identity.get("Arn", "unknown")
                print(f"  Identity: {arn}")
            except Exception:
                print(f"  Identity: (could not resolve — boto3 STS call failed)")
            print()
    except ImportError:
        pass  # boto3 or bedrock_adapter not available
    print()
    # Main menu
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -419,6 +419,27 @@ DEFAULT_CONFIG = {
        "protect_last_n": 20,         # minimum recent messages to keep uncompressed
    },
    # AWS Bedrock provider configuration.
    # Only used when model.provider is "bedrock".
    "bedrock": {
        "region": "",  # AWS region for Bedrock API calls (empty = AWS_REGION env var → us-east-1)
        "discovery": {
            "enabled": True,           # Auto-discover models via ListFoundationModels
            "provider_filter": [],     # Only show models from these providers (e.g. ["anthropic", "amazon"])
            "refresh_interval": 3600,  # Cache discovery results for this many seconds
        },
        "guardrail": {
            # Amazon Bedrock Guardrails — content filtering and safety policies.
            # Create a guardrail in the Bedrock console, then set the ID and version here.
            # See: https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails.html
            "guardrail_identifier": "",  # e.g. "abc123def456"
            "guardrail_version": "",     # e.g. "1" or "DRAFT"
            "stream_processing_mode": "async",  # "sync" or "async"
            "trace": "disabled",         # "enabled", "disabled", or "enabled_full"
        },
    },
    "smart_model_routing": {
        "enabled": False,
        "max_simple_chars": 160,
@ -638,6 +659,7 @@ DEFAULT_CONFIG = {
        "allowed_channels": "",        # If set, bot ONLY responds in these channel IDs (whitelist)
        "auto_thread": True,           # Auto-create threads on @mention in channels (like Slack)
        "reactions": True,             # Add 👀/✅/❌ reactions to messages during processing
        "channel_prompts": {},         # Per-channel ephemeral system prompts (forum parents apply to child threads)
    },
    # WhatsApp platform settings (gateway mode)
@ -648,6 +670,21 @@ DEFAULT_CONFIG = {
        # Supports \n for newlines, e.g. "🤖 *My Bot*\n──────\n"
    },
    # Telegram platform settings (gateway mode)
    "telegram": {
        "channel_prompts": {},         # Per-chat/topic ephemeral system prompts (topics inherit from parent group)
    },
    # Slack platform settings (gateway mode)
    "slack": {
        "channel_prompts": {},         # Per-channel ephemeral system prompts
    },
    # Mattermost platform settings (gateway mode)
    "mattermost": {
        "channel_prompts": {},         # Per-channel ephemeral system prompts
    },
    # Approval mode for dangerous commands:
    #   manual — always prompt the user (default)
    #   smart  — use auxiliary LLM to auto-approve low-risk commands, prompt for high-risk
@ -703,7 +740,7 @@ DEFAULT_CONFIG = {
    },
    # Config schema version - bump this when adding new required fields
-    "_config_version": 17,
+    "_config_version": 18,
 }
 # =============================================================================
@ -974,6 +1011,22 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
    "AWS_REGION": {
        "description": "AWS region for Bedrock API calls (e.g. us-east-1, eu-central-1)",
        "prompt": "AWS Region",
        "url": "https://docs.aws.amazon.com/bedrock/latest/userguide/bedrock-regions.html",
        "password": False,
        "category": "provider",
        "advanced": True,
    },
    "AWS_PROFILE": {
        "description": "AWS named profile for Bedrock authentication (from ~/.aws/credentials)",
        "prompt": "AWS Profile",
        "url": None,
        "password": False,
        "category": "provider",
        "advanced": True,
    },
    # ── Tool API keys ──
    "EXA_API_KEY": {
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@ -860,6 +860,31 @@ def run_doctor(args):
            except Exception as _e:
                print(f"\r  {color('⚠', Colors.YELLOW)} {_label} {color(f'({_e})', Colors.DIM)}           ")
    # -- AWS Bedrock --
    # Bedrock uses the AWS SDK credential chain, not API keys.
    try:
        from agent.bedrock_adapter import has_aws_credentials, resolve_aws_auth_env_var, resolve_bedrock_region
        if has_aws_credentials():
            _auth_var = resolve_aws_auth_env_var()
            _region = resolve_bedrock_region()
            _label = "AWS Bedrock".ljust(20)
            print(f"  Checking AWS Bedrock...", end="", flush=True)
            try:
                import boto3
                _br_client = boto3.client("bedrock", region_name=_region)
                _br_resp = _br_client.list_foundation_models()
                _model_count = len(_br_resp.get("modelSummaries", []))
                print(f"\r  {color('✓', Colors.GREEN)} {_label} {color(f'({_auth_var}, {_region}, {_model_count} models)', Colors.DIM)}           ")
            except ImportError:
                print(f"\r  {color('⚠', Colors.YELLOW)} {_label} {color('(boto3 not installed — pip install hermes-agent[bedrock])', Colors.DIM)}           ")
                issues.append("Install boto3 for Bedrock: pip install hermes-agent[bedrock]")
            except Exception as _e:
                _err_name = type(_e).__name__
                print(f"\r  {color('⚠', Colors.YELLOW)} {_label} {color(f'({_err_name}: {_e})', Colors.DIM)}           ")
                issues.append(f"AWS Bedrock: {_err_name} — check IAM permissions for bedrock:ListFoundationModels")
    except ImportError:
        pass  # bedrock_adapter not available — skip silently
    # =========================================================================
    # Check: Submodules
    # =========================================================================
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@ -222,7 +222,7 @@ def find_gateway_pids(exclude_pids: set | None = None, all_profiles: bool = Fals
                    current_cmd = ""
        else:
            result = subprocess.run(
-                ["ps", "eww", "-ax", "-o", "pid=,command="],
+                ["ps", "-A", "eww", "-o", "pid=,command="],
                capture_output=True,
                text=True,
                timeout=10,
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -1370,6 +1370,8 @@ def select_provider_and_model(args=None):
        _model_flow_anthropic(config, current_model)
    elif selected_provider == "kimi-coding":
        _model_flow_kimi(config, current_model)
    elif selected_provider == "bedrock":
        _model_flow_bedrock(config, current_model)
    elif selected_provider in ("gemini", "deepseek", "xai", "zai", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi", "arcee"):
        _model_flow_api_key_provider(config, selected_provider, current_model)
@ -2656,6 +2658,252 @@ def _model_flow_kimi(config, current_model=""):
        print("No change.")
 def _model_flow_bedrock_api_key(config, region, current_model=""):
    """Bedrock API Key mode — uses the OpenAI-compatible bedrock-mantle endpoint.
    For developers who don't have an AWS account but received a Bedrock API Key
    from their AWS admin. Works like any OpenAI-compatible endpoint.
    """
    from hermes_cli.auth import _prompt_model_selection, _save_model_choice, deactivate_provider
    from hermes_cli.config import load_config, save_config, get_env_value, save_env_value
    from hermes_cli.models import _PROVIDER_MODELS
    mantle_base_url = f"https://bedrock-mantle.{region}.api.aws/v1"
    # Prompt for API key
    existing_key = get_env_value("AWS_BEARER_TOKEN_BEDROCK") or ""
    if existing_key:
        print(f"  Bedrock API Key: {existing_key[:12]}... ✓")
    else:
        print(f"  Endpoint: {mantle_base_url}")
        print()
        try:
            import getpass
            api_key = getpass.getpass("  Bedrock API Key: ").strip()
        except (KeyboardInterrupt, EOFError):
            print()
            return
        if not api_key:
            print("  Cancelled.")
            return
        save_env_value("AWS_BEARER_TOKEN_BEDROCK", api_key)
        existing_key = api_key
        print("  ✓ API key saved.")
    print()
    # Model selection — use static list (mantle doesn't need boto3 for discovery)
    model_list = _PROVIDER_MODELS.get("bedrock", [])
    print(f"  Showing {len(model_list)} curated models")
    if model_list:
        selected = _prompt_model_selection(model_list, current_model=current_model)
    else:
        try:
            selected = input("  Model ID: ").strip()
        except (KeyboardInterrupt, EOFError):
            selected = None
    if selected:
        _save_model_choice(selected)
        # Save as custom provider pointing to bedrock-mantle
        cfg = load_config()
        model = cfg.get("model")
        if not isinstance(model, dict):
            model = {"default": model} if model else {}
            cfg["model"] = model
        model["provider"] = "custom"
        model["base_url"] = mantle_base_url
        model.pop("api_mode", None)  # chat_completions is the default
        # Also save region in bedrock config for reference
        bedrock_cfg = cfg.get("bedrock", {})
        if not isinstance(bedrock_cfg, dict):
            bedrock_cfg = {}
        bedrock_cfg["region"] = region
        cfg["bedrock"] = bedrock_cfg
        # Save the API key env var name so hermes knows where to find it
        save_env_value("OPENAI_API_KEY", existing_key)
        save_env_value("OPENAI_BASE_URL", mantle_base_url)
        save_config(cfg)
        deactivate_provider()
        print(f"  Default model set to: {selected} (via Bedrock API Key, {region})")
        print(f"  Endpoint: {mantle_base_url}")
    else:
        print("  No change.")
 def _model_flow_bedrock(config, current_model=""):
    """AWS Bedrock provider: verify credentials, pick region, discover models.
    Uses the native Converse API via boto3 — not the OpenAI-compatible endpoint.
    Auth is handled by the AWS SDK default credential chain (env vars, profile,
    instance role), so no API key prompt is needed.
    """
    from hermes_cli.auth import _prompt_model_selection, _save_model_choice, deactivate_provider
    from hermes_cli.config import load_config, save_config
    from hermes_cli.models import _PROVIDER_MODELS
    # 1. Check for AWS credentials
    try:
        from agent.bedrock_adapter import (
            has_aws_credentials,
            resolve_aws_auth_env_var,
            resolve_bedrock_region,
            discover_bedrock_models,
        )
    except ImportError:
        print("  ✗ boto3 is not installed. Install it with:")
        print("    pip install boto3")
        print()
        return
    if not has_aws_credentials():
        print("  ⚠ No AWS credentials detected via environment variables.")
        print("  Bedrock will use boto3's default credential chain (IMDS, SSO, etc.)")
        print()
    auth_var = resolve_aws_auth_env_var()
    if auth_var:
        print(f"  AWS credentials: {auth_var} ✓")
    else:
        print("  AWS credentials: boto3 default chain (instance role / SSO)")
    print()
    # 2. Region selection
    current_region = resolve_bedrock_region()
    try:
        region_input = input(f"  AWS Region [{current_region}]: ").strip()
    except (KeyboardInterrupt, EOFError):
        print()
        return
    region = region_input or current_region
    # 2b. Authentication mode
    print("  Choose authentication method:")
    print()
    print("    1. IAM credential chain (recommended)")
    print("       Works with EC2 instance roles, SSO, env vars, aws configure")
    print("    2. Bedrock API Key")
    print("       Enter your Bedrock API Key directly — also supports")
    print("       team scenarios where an admin distributes keys")
    print()
    try:
        auth_choice = input("  Choice [1]: ").strip()
    except (KeyboardInterrupt, EOFError):
        print()
        return
    if auth_choice == "2":
        _model_flow_bedrock_api_key(config, region, current_model)
        return
    # 3. Model discovery — try live API first, fall back to static list
    print(f"  Discovering models in {region}...")
    live_models = discover_bedrock_models(region)
    if live_models:
        _EXCLUDE_PREFIXES = (
            "stability.", "cohere.embed", "twelvelabs.", "us.stability.",
            "us.cohere.embed", "us.twelvelabs.", "global.cohere.embed",
            "global.twelvelabs.",
        )
        _EXCLUDE_SUBSTRINGS = ("safeguard", "voxtral", "palmyra-vision")
        filtered = []
        for m in live_models:
            mid = m["id"]
            if any(mid.startswith(p) for p in _EXCLUDE_PREFIXES):
                continue
            if any(s in mid.lower() for s in _EXCLUDE_SUBSTRINGS):
                continue
            filtered.append(m)
        # Deduplicate: prefer inference profiles (us.*, global.*) over bare
        # foundation model IDs.
        profile_base_ids = set()
        for m in filtered:
            mid = m["id"]
            if mid.startswith(("us.", "global.")):
                base = mid.split(".", 1)[1] if "." in mid[3:] else mid
                profile_base_ids.add(base)
        deduped = []
        for m in filtered:
            mid = m["id"]
            if not mid.startswith(("us.", "global.")) and mid in profile_base_ids:
                continue
            deduped.append(m)
        _RECOMMENDED = [
            "us.anthropic.claude-sonnet-4-6",
            "us.anthropic.claude-opus-4-6",
            "us.anthropic.claude-haiku-4-5",
            "us.amazon.nova-pro",
            "us.amazon.nova-lite",
            "us.amazon.nova-micro",
            "deepseek.v3",
            "us.meta.llama4-maverick",
            "us.meta.llama4-scout",
        ]
        def _sort_key(m):
            mid = m["id"]
            for i, rec in enumerate(_RECOMMENDED):
                if mid.startswith(rec):
                    return (0, i, mid)
            if mid.startswith("global."):
                return (1, 0, mid)
            return (2, 0, mid)
        deduped.sort(key=_sort_key)
        model_list = [m["id"] for m in deduped]
        print(f"  Found {len(model_list)} text model(s) (filtered from {len(live_models)} total)")
    else:
        model_list = _PROVIDER_MODELS.get("bedrock", [])
        if model_list:
            print(f"  Using {len(model_list)} curated models (live discovery unavailable)")
        else:
            print("  No models found. Check IAM permissions for bedrock:ListFoundationModels.")
            return
    # 4. Model selection
    if model_list:
        selected = _prompt_model_selection(model_list, current_model=current_model)
    else:
        try:
            selected = input("  Model ID: ").strip()
        except (KeyboardInterrupt, EOFError):
            selected = None
    if selected:
        _save_model_choice(selected)
        cfg = load_config()
        model = cfg.get("model")
        if not isinstance(model, dict):
            model = {"default": model} if model else {}
            cfg["model"] = model
        model["provider"] = "bedrock"
        model["base_url"] = f"https://bedrock-runtime.{region}.amazonaws.com"
        model.pop("api_mode", None)  # bedrock_converse is auto-detected
        bedrock_cfg = cfg.get("bedrock", {})
        if not isinstance(bedrock_cfg, dict):
            bedrock_cfg = {}
        bedrock_cfg["region"] = region
        cfg["bedrock"] = bedrock_cfg
        save_config(cfg)
        deactivate_provider()
        print(f"  Default model set to: {selected} (via AWS Bedrock, {region})")
    else:
        print("  No change.")
 def _model_flow_api_key_provider(config, provider_id, current_model=""):
    """Generic flow for API-key providers (z.ai, MiniMax, OpenCode, etc.)."""
    from hermes_cli.auth import (
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@ -303,6 +303,22 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "XiaomiMiMo/MiMo-V2-Flash",
        "moonshotai/Kimi-K2-Thinking",
    ],
    # AWS Bedrock — static fallback list used when dynamic discovery is
    # unavailable (no boto3, no credentials, or API error).  The agent
    # prefers live discovery via ListFoundationModels + ListInferenceProfiles.
    # Use inference profile IDs (us.*) since most models require them.
    "bedrock": [
        "us.anthropic.claude-sonnet-4-6",
        "us.anthropic.claude-opus-4-6-v1",
        "us.anthropic.claude-haiku-4-5-20251001-v1:0",
        "us.anthropic.claude-sonnet-4-5-20250929-v1:0",
        "us.amazon.nova-pro-v1:0",
        "us.amazon.nova-lite-v1:0",
        "us.amazon.nova-micro-v1:0",
        "deepseek.v3.2",
        "us.meta.llama4-maverick-17b-instruct-v1:0",
        "us.meta.llama4-scout-17b-instruct-v1:0",
    ],
 }
 # ---------------------------------------------------------------------------
@ -536,6 +552,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("opencode-zen",   "OpenCode Zen",             "OpenCode Zen (35+ curated models, pay-as-you-go)"),
    ProviderEntry("opencode-go",    "OpenCode Go",              "OpenCode Go (open models, $10/month subscription)"),
    ProviderEntry("ai-gateway",     "Vercel AI Gateway",        "Vercel AI Gateway (200+ models, pay-per-use)"),
    ProviderEntry("bedrock",        "AWS Bedrock",              "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
 ]
 # Derived dicts — used throughout the codebase
@ -587,6 +604,10 @@ _PROVIDER_ALIASES = {
    "huggingface-hub": "huggingface",
    "mimo": "xiaomi",
    "xiaomi-mimo": "xiaomi",
    "aws": "bedrock",
    "aws-bedrock": "bedrock",
    "amazon-bedrock": "bedrock",
    "amazon": "bedrock",
    "grok": "xai",
    "x-ai": "xai",
    "x.ai": "xai",
@ -1955,6 +1976,42 @@ def validate_requested_model(
    # api_models is None — couldn't reach API.  Accept and persist,
    # but warn so typos don't silently break things.
    # Bedrock: use our own discovery instead of HTTP /models endpoint.
    # Bedrock's bedrock-runtime URL doesn't support /models — it uses the
    # AWS SDK control plane (ListFoundationModels + ListInferenceProfiles).
    if normalized == "bedrock":
        try:
            from agent.bedrock_adapter import discover_bedrock_models, resolve_bedrock_region
            region = resolve_bedrock_region()
            discovered = discover_bedrock_models(region)
            discovered_ids = {m["id"] for m in discovered}
            if requested in discovered_ids:
                return {
                    "accepted": True,
                    "persist": True,
                    "recognized": True,
                    "message": None,
                }
            # Not in discovered list — still accept (user may have custom
            # inference profiles or cross-account access), but warn.
            suggestions = get_close_matches(requested, list(discovered_ids), n=3, cutoff=0.4)
            suggestion_text = ""
            if suggestions:
                suggestion_text = "\n  Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
            return {
                "accepted": True,
                "persist": True,
                "recognized": False,
                "message": (
                    f"Note: `{requested}` was not found in Bedrock model discovery for {region}. "
                    f"It may still work with custom inference profiles or cross-account access."
                    f"{suggestion_text}"
                ),
            }
        except Exception:
            pass  # Fall through to generic warning
    provider_label = _PROVIDER_LABELS.get(normalized, normalized)
    return {
        "accepted": False,
--- a/hermes_cli/providers.py
+++ b/hermes_cli/providers.py
@ -236,6 +236,12 @@ ALIASES: Dict[str, str] = {
    "mimo": "xiaomi",
    "xiaomi-mimo": "xiaomi",
    # bedrock
    "aws": "bedrock",
    "aws-bedrock": "bedrock",
    "amazon-bedrock": "bedrock",
    "amazon": "bedrock",
    # arcee
    "arcee-ai": "arcee",
    "arceeai": "arcee",
@ -262,6 +268,7 @@ _LABEL_OVERRIDES: Dict[str, str] = {
    "copilot-acp": "GitHub Copilot ACP",
    "xiaomi": "Xiaomi MiMo",
    "local": "Local endpoint",
    "bedrock": "AWS Bedrock",
 }
@ -271,6 +278,7 @@ TRANSPORT_TO_API_MODE: Dict[str, str] = {
    "openai_chat": "chat_completions",
    "anthropic_messages": "anthropic_messages",
    "codex_responses": "codex_responses",
    "bedrock_converse": "bedrock_converse",
 }
@ -388,6 +396,10 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
    if pdef is not None:
        return TRANSPORT_TO_API_MODE.get(pdef.transport, "chat_completions")
    # Direct provider checks for providers not in HERMES_OVERLAYS
    if provider == "bedrock":
        return "bedrock_converse"
    # URL-based heuristics for custom / unknown providers
    if base_url:
        url_lower = base_url.rstrip("/").lower()
@ -395,6 +407,8 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
            return "anthropic_messages"
        if "api.openai.com" in url_lower:
            return "codex_responses"
        if "bedrock-runtime" in url_lower and "amazonaws.com" in url_lower:
            return "bedrock_converse"
    return "chat_completions"
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@ -124,7 +124,7 @@ def _copilot_runtime_api_mode(model_cfg: Dict[str, Any], api_key: str) -> str:
        return "chat_completions"
-_VALID_API_MODES = {"chat_completions", "codex_responses", "anthropic_messages"}
+_VALID_API_MODES = {"chat_completions", "codex_responses", "anthropic_messages", "bedrock_converse"}
 def _parse_api_mode(raw: Any) -> Optional[str]:
@ -836,6 +836,77 @@ def resolve_runtime_provider(
            "requested_provider": requested_provider,
        }
    # AWS Bedrock (native Converse API via boto3)
    if provider == "bedrock":
        from agent.bedrock_adapter import (
            has_aws_credentials,
            resolve_aws_auth_env_var,
            resolve_bedrock_region,
            is_anthropic_bedrock_model,
        )
        # When the user explicitly selected bedrock (not auto-detected),
        # trust boto3's credential chain — it handles IMDS, ECS task roles,
        # Lambda execution roles, SSO, and other implicit sources that our
        # env-var check can't detect.
        is_explicit = requested_provider in ("bedrock", "aws", "aws-bedrock", "amazon-bedrock", "amazon")
        if not is_explicit and not has_aws_credentials():
            raise AuthError(
                "No AWS credentials found for Bedrock. Configure one of:\n"
                "  - AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY\n"
                "  - AWS_PROFILE (for SSO / named profiles)\n"
                "  - IAM instance role (EC2, ECS, Lambda)\n"
                "Or run 'aws configure' to set up credentials.",
                code="no_aws_credentials",
            )
        # Read bedrock-specific config from config.yaml
        from hermes_cli.config import load_config as _load_bedrock_config
        _bedrock_cfg = _load_bedrock_config().get("bedrock", {})
        # Region priority: config.yaml bedrock.region → env var → us-east-1
        region = (_bedrock_cfg.get("region") or "").strip() or resolve_bedrock_region()
        auth_source = resolve_aws_auth_env_var() or "aws-sdk-default-chain"
        # Build guardrail config if configured
        _gr = _bedrock_cfg.get("guardrail", {})
        guardrail_config = None
        if _gr.get("guardrail_identifier") and _gr.get("guardrail_version"):
            guardrail_config = {
                "guardrailIdentifier": _gr["guardrail_identifier"],
                "guardrailVersion": _gr["guardrail_version"],
            }
            if _gr.get("stream_processing_mode"):
                guardrail_config["streamProcessingMode"] = _gr["stream_processing_mode"]
            if _gr.get("trace"):
                guardrail_config["trace"] = _gr["trace"]
        # Dual-path routing: Claude models use AnthropicBedrock SDK for full
        # feature parity (prompt caching, thinking budgets, adaptive thinking).
        # Non-Claude models use the Converse API for multi-model support.
        _current_model = str(model_cfg.get("default") or "").strip()
        if is_anthropic_bedrock_model(_current_model):
            # Claude on Bedrock → AnthropicBedrock SDK → anthropic_messages path
            runtime = {
                "provider": "bedrock",
                "api_mode": "anthropic_messages",
                "base_url": f"https://bedrock-runtime.{region}.amazonaws.com",
                "api_key": "aws-sdk",
                "source": auth_source,
                "region": region,
                "bedrock_anthropic": True,  # Signal to use AnthropicBedrock client
                "requested_provider": requested_provider,
            }
        else:
            # Non-Claude (Nova, DeepSeek, Llama, etc.) → Converse API
            runtime = {
                "provider": "bedrock",
                "api_mode": "bedrock_converse",
                "base_url": f"https://bedrock-runtime.{region}.amazonaws.com",
                "api_key": "aws-sdk",
                "source": auth_source,
                "region": region,
                "requested_provider": requested_provider,
            }
        if guardrail_config:
            runtime["guardrail_config"] = guardrail_config
        return runtime
    # API-key providers (z.ai/GLM, Kimi, MiniMax, MiniMax-CN)
    pconfig = PROVIDER_REGISTRY.get(provider)
    if pconfig and pconfig.auth_type == "api_key":
--- a/plugins/memory/honcho/client.py
+++ b/plugins/memory/honcho/client.py
@ -58,8 +58,7 @@ def resolve_config_path() -> Path:
    Resolution order:
      1. $HERMES_HOME/honcho.json      (profile-local, if it exists)
-      2. ~/.hermes/honcho.json          (default profile — shared host blocks live here)
+      2. ~/.honcho/config.json          (global, cross-app interop)
      3. ~/.honcho/config.json          (global, cross-app interop)
    Returns the global path if none exist (for first-time setup writes).
    """
@ -67,11 +66,6 @@ def resolve_config_path() -> Path:
    if local_path.exists():
        return local_path
    # Default profile's config — host blocks accumulate here via setup/clone
    default_path = Path.home() / ".hermes" / "honcho.json"
    if default_path != local_path and default_path.exists():
        return default_path
    return GLOBAL_CONFIG_PATH
--- a/pyproject.toml
+++ b/pyproject.toml
@ -63,6 +63,7 @@ homeassistant = ["aiohttp>=3.9.0,<4"]
 sms = ["aiohttp>=3.9.0,<4"]
 acp = ["agent-client-protocol>=0.9.0,<1.0"]
 mistral = ["mistralai>=2.3.0,<3"]
 bedrock = ["boto3>=1.35.0,<2"]
 termux = [
  # Tested Android / Termux path: keeps the core CLI feature-rich while
  # avoiding extras that currently depend on non-Android wheels (notably
@ -108,6 +109,7 @@ all = [
  "hermes-agent[dingtalk]",
  "hermes-agent[feishu]",
  "hermes-agent[mistral]",
  "hermes-agent[bedrock]",
  "hermes-agent[web]",
 ]
--- a/run_agent.py
+++ b/run_agent.py
@ -685,7 +685,7 @@ class AIAgent:
        self.provider = provider_name or ""
        self.acp_command = acp_command or command
        self.acp_args = list(acp_args or args or [])
-        if api_mode in {"chat_completions", "codex_responses", "anthropic_messages"}:
+        if api_mode in {"chat_completions", "codex_responses", "anthropic_messages", "bedrock_converse"}:
            self.api_mode = api_mode
        elif self.provider == "openai-codex":
            self.api_mode = "codex_responses"
@ -700,6 +700,9 @@ class AIAgent:
            # use a URL convention ending in /anthropic. Auto-detect these so the
            # Anthropic Messages API adapter is used instead of chat completions.
            self.api_mode = "anthropic_messages"
        elif self.provider == "bedrock" or "bedrock-runtime" in self._base_url_lower:
            # AWS Bedrock — auto-detect from provider name or base URL.
            self.api_mode = "bedrock_converse"
        else:
            self.api_mode = "chat_completions"
@ -721,8 +724,11 @@ class AIAgent:
        # Responses there. ACP runtimes are excluded: CopilotACPClient
        # handles its own routing and does not implement the Responses API
        # surface.
        # When api_mode was explicitly provided, respect it — the user
        # knows what their endpoint supports (#10473).
        if (
-            self.api_mode == "chat_completions"
+            api_mode is None
            and self.api_mode == "chat_completions"
            and self.provider != "copilot-acp"
            and not str(self.base_url or "").lower().startswith("acp://copilot")
            and not str(self.base_url or "").lower().startswith("acp+tcp://")
@ -889,24 +895,70 @@ class AIAgent:
        if self.api_mode == "anthropic_messages":
            from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
-            # Only fall back to ANTHROPIC_TOKEN when the provider is actually Anthropic.
+            # Bedrock + Claude → use AnthropicBedrock SDK for full feature parity
-            # Other anthropic_messages providers (MiniMax, Alibaba, etc.) must use their own API key.
+            # (prompt caching, thinking budgets, adaptive thinking).
-            # Falling back would send Anthropic credentials to third-party endpoints (Fixes #1739, #minimax-401).
+            _is_bedrock_anthropic = self.provider == "bedrock"
-            _is_native_anthropic = self.provider == "anthropic"
+            if _is_bedrock_anthropic:
-            effective_key = (api_key or resolve_anthropic_token() or "") if _is_native_anthropic else (api_key or "")
+                from agent.anthropic_adapter import build_anthropic_bedrock_client
-            self.api_key = effective_key
+                import re as _re
-            self._anthropic_api_key = effective_key
+                _region_match = _re.search(r"bedrock-runtime\.([a-z0-9-]+)\.", base_url or "")
-            self._anthropic_base_url = base_url
+                _br_region = _region_match.group(1) if _region_match else "us-east-1"
-            from agent.anthropic_adapter import _is_oauth_token as _is_oat
+                self._bedrock_region = _br_region
-            self._is_anthropic_oauth = _is_oat(effective_key)
+                self._anthropic_client = build_anthropic_bedrock_client(_br_region)
-            self._anthropic_client = build_anthropic_client(effective_key, base_url)
+                self._anthropic_api_key = "aws-sdk"
-            # No OpenAI client needed for Anthropic mode
+                self._anthropic_base_url = base_url
                self._is_anthropic_oauth = False
                self.api_key = "aws-sdk"
                self.client = None
                self._client_kwargs = {}
                if not self.quiet_mode:
                    print(f"🤖 AI Agent initialized with model: {self.model} (AWS Bedrock + AnthropicBedrock SDK, {_br_region})")
            else:
                # Only fall back to ANTHROPIC_TOKEN when the provider is actually Anthropic.
                # Other anthropic_messages providers (MiniMax, Alibaba, etc.) must use their own API key.
                # Falling back would send Anthropic credentials to third-party endpoints (Fixes #1739, #minimax-401).
                _is_native_anthropic = self.provider == "anthropic"
                effective_key = (api_key or resolve_anthropic_token() or "") if _is_native_anthropic else (api_key or "")
                self.api_key = effective_key
                self._anthropic_api_key = effective_key
                self._anthropic_base_url = base_url
                from agent.anthropic_adapter import _is_oauth_token as _is_oat
                self._is_anthropic_oauth = _is_oat(effective_key)
                self._anthropic_client = build_anthropic_client(effective_key, base_url)
                # No OpenAI client needed for Anthropic mode
                self.client = None
                self._client_kwargs = {}
                if not self.quiet_mode:
                    print(f"🤖 AI Agent initialized with model: {self.model} (Anthropic native)")
                    if effective_key and len(effective_key) > 12:
                        print(f"🔑 Using token: {effective_key[:8]}...{effective_key[-4:]}")
        elif self.api_mode == "bedrock_converse":
            # AWS Bedrock — uses boto3 directly, no OpenAI client needed.
            # Region is extracted from the base_url or defaults to us-east-1.
            import re as _re
            _region_match = _re.search(r"bedrock-runtime\.([a-z0-9-]+)\.", base_url or "")
            self._bedrock_region = _region_match.group(1) if _region_match else "us-east-1"
            # Guardrail config — read from config.yaml at init time.
            self._bedrock_guardrail_config = None
            try:
                from hermes_cli.config import load_config as _load_br_cfg
                _gr = _load_br_cfg().get("bedrock", {}).get("guardrail", {})
                if _gr.get("guardrail_identifier") and _gr.get("guardrail_version"):
                    self._bedrock_guardrail_config = {
                        "guardrailIdentifier": _gr["guardrail_identifier"],
                        "guardrailVersion": _gr["guardrail_version"],
                    }
                    if _gr.get("stream_processing_mode"):
                        self._bedrock_guardrail_config["streamProcessingMode"] = _gr["stream_processing_mode"]
                    if _gr.get("trace"):
                        self._bedrock_guardrail_config["trace"] = _gr["trace"]
            except Exception:
                pass
            self.client = None
            self._client_kwargs = {}
            if not self.quiet_mode:
-                print(f"🤖 AI Agent initialized with model: {self.model} (Anthropic native)")
+                _gr_label = " + Guardrails" if self._bedrock_guardrail_config else ""
-                if effective_key and len(effective_key) > 12:
+                print(f"🤖 AI Agent initialized with model: {self.model} (AWS Bedrock, {self._bedrock_region}{_gr_label})")
                    print(f"🔑 Using token: {effective_key[:8]}...{effective_key[-4:]}")
        else:
            if api_key and base_url:
                # Explicit credentials from CLI/gateway — construct directly.
@ -951,9 +1003,20 @@ class AIAgent:
                    # message instead of silently routing through OpenRouter.
                    _explicit = (self.provider or "").strip().lower()
                    if _explicit and _explicit not in ("auto", "openrouter", "custom"):
                        # Look up the actual env var name from the provider
                        # config — some providers use non-standard names
                        # (e.g. alibaba → DASHSCOPE_API_KEY, not ALIBABA_API_KEY).
                        _env_hint = f"{_explicit.upper()}_API_KEY"
                        try:
                            from hermes_cli.auth import PROVIDER_REGISTRY
                            _pcfg = PROVIDER_REGISTRY.get(_explicit)
                            if _pcfg and _pcfg.api_key_env_vars:
                                _env_hint = _pcfg.api_key_env_vars[0]
                        except Exception:
                            pass
                        raise RuntimeError(
                            f"Provider '{_explicit}' is set in config.yaml but no API key "
-                            f"was found. Set the {_explicit.upper()}_API_KEY environment "
+                            f"was found. Set the {_env_hint} environment "
                            f"variable, or switch to a different provider with `hermes model`."
                        )
                    # Final fallback: try raw OpenRouter key
@ -1217,6 +1280,15 @@ class AIAgent:
                            "hermes_home": str(_ghh()),
                            "agent_context": "primary",
                        }
                        # Thread session title for memory provider scoping
                        # (e.g. honcho uses this to derive chat-scoped session keys)
                        if self._session_db:
                            try:
                                _st = self._session_db.get_session_title(self.session_id)
                                if _st:
                                    _init_kwargs["session_title"] = _st
                            except Exception:
                                pass
                        # Thread gateway user identity for per-user memory scoping
                        if self._user_id:
                            _init_kwargs["user_id"] = self._user_id
@ -4206,6 +4278,9 @@ class AIAgent:
        return False
    def _create_openai_client(self, client_kwargs: dict, *, reason: str, shared: bool) -> Any:
        from agent.auxiliary_client import _validate_base_url, _validate_proxy_env_urls
        _validate_proxy_env_urls()
        _validate_base_url(client_kwargs.get("base_url"))
        if self.provider == "copilot-acp" or str(client_kwargs.get("base_url", "")).startswith("acp://copilot"):
            from agent.copilot_acp_client import CopilotACPClient
@ -4890,6 +4965,17 @@ class AIAgent:
                    )
                elif self.api_mode == "anthropic_messages":
                    result["response"] = self._anthropic_messages_create(api_kwargs)
                elif self.api_mode == "bedrock_converse":
                    # Bedrock uses boto3 directly — no OpenAI client needed.
                    from agent.bedrock_adapter import (
                        _get_bedrock_runtime_client,
                        normalize_converse_response,
                    )
                    region = api_kwargs.pop("__bedrock_region__", "us-east-1")
                    api_kwargs.pop("__bedrock_converse__", None)
                    client = _get_bedrock_runtime_client(region)
                    raw_response = client.converse(**api_kwargs)
                    result["response"] = normalize_converse_response(raw_response)
                else:
                    request_client_holder["client"] = self._create_request_openai_client(reason="chat_completion_request")
                    result["response"] = request_client_holder["client"].chat.completions.create(**api_kwargs)
@ -5129,6 +5215,65 @@ class AIAgent:
            finally:
                self._codex_on_first_delta = None
        # Bedrock Converse uses boto3's converse_stream() with real-time delta
        # callbacks — same UX as Anthropic and chat_completions streaming.
        if self.api_mode == "bedrock_converse":
            result = {"response": None, "error": None}
            first_delta_fired = {"done": False}
            deltas_were_sent = {"yes": False}
            def _fire_first():
                if not first_delta_fired["done"] and on_first_delta:
                    first_delta_fired["done"] = True
                    try:
                        on_first_delta()
                    except Exception:
                        pass
            def _bedrock_call():
                try:
                    from agent.bedrock_adapter import (
                        _get_bedrock_runtime_client,
                        stream_converse_with_callbacks,
                    )
                    region = api_kwargs.pop("__bedrock_region__", "us-east-1")
                    api_kwargs.pop("__bedrock_converse__", None)
                    client = _get_bedrock_runtime_client(region)
                    raw_response = client.converse_stream(**api_kwargs)
                    def _on_text(text):
                        _fire_first()
                        self._fire_stream_delta(text)
                        deltas_were_sent["yes"] = True
                    def _on_tool(name):
                        _fire_first()
                        self._fire_tool_gen_started(name)
                    def _on_reasoning(text):
                        _fire_first()
                        self._fire_reasoning_delta(text)
                    result["response"] = stream_converse_with_callbacks(
                        raw_response,
                        on_text_delta=_on_text if self._has_stream_consumers() else None,
                        on_tool_start=_on_tool,
                        on_reasoning_delta=_on_reasoning if self.reasoning_callback or self.stream_delta_callback else None,
                        on_interrupt_check=lambda: self._interrupt_requested,
                    )
                except Exception as e:
                    result["error"] = e
            t = threading.Thread(target=_bedrock_call, daemon=True)
            t.start()
            while t.is_alive():
                t.join(timeout=0.3)
                if self._interrupt_requested:
                    raise InterruptedError("Agent interrupted during Bedrock API call")
            if result["error"] is not None:
                raise result["error"]
            return result["response"]
        result = {"response": None, "error": None}
        request_client_holder = {"client": None}
        first_delta_fired = {"done": False}
@ -5760,6 +5905,8 @@ class AIAgent:
                # provider-specific exceptions like Copilot gpt-5-mini on
                # chat completions.
                fb_api_mode = "codex_responses"
            elif fb_provider == "bedrock" or "bedrock-runtime" in fb_base_url.lower():
                fb_api_mode = "bedrock_converse"
            old_model = self.model
            self.model = fb_model
@ -6239,6 +6386,25 @@ class AIAgent:
                fast_mode=(self.request_overrides or {}).get("speed") == "fast",
            )
        # AWS Bedrock native Converse API — bypasses the OpenAI client entirely.
        # The adapter handles message/tool conversion and boto3 calls directly.
        if self.api_mode == "bedrock_converse":
            from agent.bedrock_adapter import build_converse_kwargs
            region = getattr(self, "_bedrock_region", None) or "us-east-1"
            guardrail = getattr(self, "_bedrock_guardrail_config", None)
            return {
                "__bedrock_converse__": True,
                "__bedrock_region__": region,
                **build_converse_kwargs(
                    model=self.model,
                    messages=api_messages,
                    tools=self.tools,
                    max_tokens=self.max_tokens or 4096,
                    temperature=None,  # Let the model use its default
                    guardrail_config=guardrail,
                ),
            }
        if self.api_mode == "codex_responses":
            instructions = ""
            payload_messages = api_messages
@ -8504,6 +8670,53 @@ class AIAgent:
            api_kwargs = None  # Guard against UnboundLocalError in except handler
            while retry_count < max_retries:
                # ── Nous Portal rate limit guard ──────────────────────
                # If another session already recorded that Nous is rate-
                # limited, skip the API call entirely.  Each attempt
                # (including SDK-level retries) counts against RPH and
                # deepens the rate limit hole.
                if self.provider == "nous":
                    try:
                        from agent.nous_rate_guard import (
                            nous_rate_limit_remaining,
                            format_remaining as _fmt_nous_remaining,
                        )
                        _nous_remaining = nous_rate_limit_remaining()
                        if _nous_remaining is not None and _nous_remaining > 0:
                            _nous_msg = (
                                f"Nous Portal rate limit active — "
                                f"resets in {_fmt_nous_remaining(_nous_remaining)}."
                            )
                            self._vprint(
                                f"{self.log_prefix}⏳ {_nous_msg} Trying fallback...",
                                force=True,
                            )
                            self._emit_status(f"⏳ {_nous_msg}")
                            if self._try_activate_fallback():
                                retry_count = 0
                                compression_attempts = 0
                                primary_recovery_attempted = False
                                continue
                            # No fallback available — return with clear message
                            self._persist_session(messages, conversation_history)
                            return {
                                "final_response": (
                                    f"⏳ {_nous_msg}\n\n"
                                    "No fallback provider available. "
                                    "Try again after the reset, or add a "
                                    "fallback provider in config.yaml."
                                ),
                                "messages": messages,
                                "api_calls": api_call_count,
                                "completed": False,
                                "failed": True,
                                "error": _nous_msg,
                            }
                    except ImportError:
                        pass
                    except Exception:
                        pass  # Never let rate guard break the agent loop
                try:
                    self._reset_stream_delivery_tracking()
                    api_kwargs = self._build_api_kwargs(api_messages)
@ -8816,7 +9029,7 @@ class AIAgent:
                        # targeted error instead of wasting 3 API calls.
                        _trunc_content = None
                        _trunc_has_tool_calls = False
-                        if self.api_mode == "chat_completions":
+                        if self.api_mode in ("chat_completions", "bedrock_converse"):
                            _trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None
                            _trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None
                            _trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False
@ -8885,7 +9098,7 @@ class AIAgent:
                                "error": _exhaust_error,
                            }
-                        if self.api_mode == "chat_completions":
+                        if self.api_mode in ("chat_completions", "bedrock_converse"):
                            assistant_message = response.choices[0].message
                            if not assistant_message.tool_calls:
                                length_continue_retries += 1
@ -8925,7 +9138,7 @@ class AIAgent:
                                    "error": "Response remained truncated after 3 continuation attempts",
                                }
-                        if self.api_mode == "chat_completions":
+                        if self.api_mode in ("chat_completions", "bedrock_converse"):
                            assistant_message = response.choices[0].message
                            if assistant_message.tool_calls:
                                if truncated_tool_call_retries < 1:
@ -9092,6 +9305,15 @@ class AIAgent:
                                self._vprint(f"{self.log_prefix}   💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)")
                    has_retried_429 = False  # Reset on success
                    # Clear Nous rate limit state on successful request —
                    # proves the limit has reset and other sessions can
                    # resume hitting Nous.
                    if self.provider == "nous":
                        try:
                            from agent.nous_rate_guard import clear_nous_rate_limit
                            clear_nous_rate_limit()
                        except Exception:
                            pass
                    self._touch_activity(f"API call #{api_call_count} completed")
                    break  # Success, exit retry loop
@ -9503,6 +9725,38 @@ class AIAgent:
                                primary_recovery_attempted = False
                                continue
                    # ── Nous Portal: record rate limit & skip retries ─────
                    # When Nous returns a 429, record the reset time to a
                    # shared file so ALL sessions (cron, gateway, auxiliary)
                    # know not to pile on.  Then skip further retries —
                    # each one burns another RPH request and deepens the
                    # rate limit hole.  The retry loop's top-of-iteration
                    # guard will catch this on the next pass and try
                    # fallback or bail with a clear message.
                    if (
                        is_rate_limited
                        and self.provider == "nous"
                        and classified.reason == FailoverReason.rate_limit
                        and not recovered_with_pool
                    ):
                        try:
                            from agent.nous_rate_guard import record_nous_rate_limit
                            _err_resp = getattr(api_error, "response", None)
                            _err_hdrs = (
                                getattr(_err_resp, "headers", None)
                                if _err_resp else None
                            )
                            record_nous_rate_limit(
                                headers=_err_hdrs,
                                error_context=error_context,
                            )
                        except Exception:
                            pass
                        # Skip straight to max_retries — the top-of-loop
                        # guard will handle fallback or bail cleanly.
                        retry_count = max_retries
                        continue
                    is_payload_too_large = (
                        classified.reason == FailoverReason.payload_too_large
                    )
--- a/scripts/install.sh
+++ b/scripts/install.sh
@ -28,7 +28,7 @@ BOLD='\033[1m'
 # Configuration
 REPO_URL_SSH="git@github.com:NousResearch/hermes-agent.git"
 REPO_URL_HTTPS="https://github.com/NousResearch/hermes-agent.git"
-HERMES_HOME="$HOME/.hermes"
+HERMES_HOME="${HERMES_HOME:-$HOME/.hermes}"
 INSTALL_DIR="${HERMES_INSTALL_DIR:-$HERMES_HOME/hermes-agent}"
 PYTHON_VERSION="3.11"
 NODE_VERSION="22"
@ -66,6 +66,10 @@ while [[ $# -gt 0 ]]; do
            INSTALL_DIR="$2"
            shift 2
            ;;
        --hermes-home)
            HERMES_HOME="$2"
            shift 2
            ;;
        -h|--help)
            echo "Hermes Agent Installer"
            echo ""
@ -76,6 +80,7 @@ while [[ $# -gt 0 ]]; do
            echo "  --skip-setup   Skip interactive setup wizard"
            echo "  --branch NAME  Git branch to install (default: main)"
            echo "  --dir PATH     Installation directory (default: ~/.hermes/hermes-agent)"
            echo "  --hermes-home PATH  Data directory (default: ~/.hermes, or \$HERMES_HOME)"
            echo "  -h, --help     Show this help"
            exit 0
            ;;
--- a/scripts/release.py
+++ b/scripts/release.py
@ -62,6 +62,7 @@ AUTHOR_MAP = {
    "258577966+voidborne-d@users.noreply.github.com": "voidborne-d",
    "70424851+insecurejezza@users.noreply.github.com": "insecurejezza",
    "259807879+Bartok9@users.noreply.github.com": "Bartok9",
    "241404605+MestreY0d4-Uninter@users.noreply.github.com": "MestreY0d4-Uninter",
    "268667990+Roy-oss1@users.noreply.github.com": "Roy-oss1",
    "241404605+MestreY0d4-Uninter@users.noreply.github.com": "MestreY0d4-Uninter",
    # contributors (manual mapping from git names)
@ -81,6 +82,7 @@ AUTHOR_MAP = {
    "brooklyn.bb.nicholson@gmail.com": "brooklynnicholson",
    "4317663+helix4u@users.noreply.github.com": "helix4u",
    "331214+counterposition@users.noreply.github.com": "counterposition",
    "blspear@gmail.com": "BrennerSpear",
    "gpickett00@gmail.com": "gpickett00",
    "mcosma@gmail.com": "wakamex",
    "clawdia.nash@proton.me": "clawdia-nash",
--- a/skills/autonomous-ai-agents/hermes-agent/SKILL.md
+++ b/skills/autonomous-ai-agents/hermes-agent/SKILL.md
@ -313,7 +313,7 @@ Type these during an interactive chat session.
 ```
 ~/.hermes/config.yaml       Main configuration
 ~/.hermes/.env              API keys and secrets
-~/.hermes/skills/           Installed skills
+$HERMES_HOME/skills/        Installed skills
 ~/.hermes/sessions/         Session transcripts
 ~/.hermes/logs/             Gateway and error logs
 ~/.hermes/auth.json         OAuth tokens and credential pools
--- a/skills/github/github-code-review/SKILL.md
+++ b/skills/github/github-code-review/SKILL.md
@ -334,7 +334,7 @@ When the user asks you to "review PR #N", "look at this PR", or gives you a PR U
 ### Step 1: Set up environment
 ```bash
-source ~/.hermes/skills/github/github-auth/scripts/gh-env.sh
+source "${HERMES_HOME:-$HOME/.hermes}/skills/github/github-auth/scripts/gh-env.sh"
 # Or run the inline setup block from the top of this skill
 ```
--- a/skills/github/github-repo-management/references/github-api-cheatsheet.md
+++ b/skills/github/github-repo-management/references/github-api-cheatsheet.md
@ -6,7 +6,7 @@ All requests need: `-H "Authorization: token $GITHUB_TOKEN"`
 Use the `gh-env.sh` helper to set `$GITHUB_TOKEN`, `$GH_OWNER`, `$GH_REPO` automatically:
 ```bash
-source ~/.hermes/skills/github/github-auth/scripts/gh-env.sh
+source "${HERMES_HOME:-$HOME/.hermes}/skills/github/github-auth/scripts/gh-env.sh"
 ```
 ## Repositories
--- a/skills/productivity/google-workspace/SKILL.md
+++ b/skills/productivity/google-workspace/SKILL.md
@ -32,7 +32,7 @@ on CLI, Telegram, Discord, or any platform.
 Define a shorthand first:
 ```bash
-GSETUP="python ~/.hermes/skills/productivity/google-workspace/scripts/setup.py"
+GSETUP="python ${HERMES_HOME:-$HOME/.hermes}/skills/productivity/google-workspace/scripts/setup.py"
 ```
 ### Step 0: Check if already set up
@ -163,7 +163,7 @@ Should print `AUTHENTICATED`. Setup is complete — token refreshes automaticall
 All commands go through the API script. Set `GAPI` as a shorthand:
 ```bash
-GAPI="python ~/.hermes/skills/productivity/google-workspace/scripts/google_api.py"
+GAPI="python ${HERMES_HOME:-$HOME/.hermes}/skills/productivity/google-workspace/scripts/google_api.py"
 ```
 ### Gmail
--- a/skills/red-teaming/godmode/SKILL.md
+++ b/skills/red-teaming/godmode/SKILL.md
@ -60,7 +60,7 @@ The fastest path — auto-detect the model, test strategies, and lock in the win
 # In execute_code — use the loader to avoid exec-scoping issues:
 import os
 exec(open(os.path.expanduser(
-    "~/.hermes/skills/red-teaming/godmode/scripts/load_godmode.py"
+    os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), "skills/red-teaming/godmode/scripts/load_godmode.py")
 )).read())
 # Auto-detect model from config and jailbreak it
@ -192,7 +192,7 @@ python3 scripts/parseltongue.py "How do I hack into a WiFi network?" --tier stan
 Or use `execute_code` inline:
 ```python
 # Load the parseltongue module
-exec(open(os.path.expanduser("~/.hermes/skills/red-teaming/godmode/scripts/parseltongue.py")).read())
+exec(open(os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), "skills/red-teaming/godmode/scripts/parseltongue.py")).read())
 query = "How do I hack into a WiFi network?"
 variants = generate_variants(query, tier="standard")
@ -229,7 +229,7 @@ Race multiple models against the same query, score responses, pick the winner:
 ```python
 # Via execute_code
-exec(open(os.path.expanduser("~/.hermes/skills/red-teaming/godmode/scripts/godmode_race.py")).read())
+exec(open(os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), "skills/red-teaming/godmode/scripts/godmode_race.py")).read())
 result = race_models(
    query="Explain how SQL injection works with a practical example",
--- a/skills/red-teaming/godmode/references/jailbreak-templates.md
+++ b/skills/red-teaming/godmode/references/jailbreak-templates.md
@ -114,7 +114,7 @@ hermes
 ### Via the GODMODE CLASSIC racer script
 ```python
-exec(open(os.path.expanduser("~/.hermes/skills/red-teaming/godmode/scripts/godmode_race.py")).read())
+exec(open(os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), "skills/red-teaming/godmode/scripts/godmode_race.py")).read())
 result = race_godmode_classic("Your query here")
 print(f"Winner: {result['codename']} — Score: {result['score']}")
 print(result['content'])
--- a/skills/red-teaming/godmode/references/refusal-detection.md
+++ b/skills/red-teaming/godmode/references/refusal-detection.md
@ -129,7 +129,7 @@ These don't auto-reject but reduce the response score:
 ## Using in Python
 ```python
-exec(open(os.path.expanduser("~/.hermes/skills/red-teaming/godmode/scripts/godmode_race.py")).read())
+exec(open(os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), "skills/red-teaming/godmode/scripts/godmode_race.py")).read())
 # Check if a response is a refusal
 text = "I'm sorry, but I can't assist with that request."
--- a/skills/red-teaming/godmode/scripts/auto_jailbreak.py
+++ b/skills/red-teaming/godmode/scripts/auto_jailbreak.py
@ -7,7 +7,7 @@ finds what works, and locks it in by writing config.yaml + prefill.json.
 Usage in execute_code:
    exec(open(os.path.expanduser(
-        "~/.hermes/skills/red-teaming/godmode/scripts/auto_jailbreak.py"
+        os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), "skills/red-teaming/godmode/scripts/auto_jailbreak.py")
    )).read())
    result = auto_jailbreak()  # Uses current model from config
--- a/skills/red-teaming/godmode/scripts/godmode_race.py
+++ b/skills/red-teaming/godmode/scripts/godmode_race.py
@ -7,7 +7,7 @@ Queries multiple models in parallel via OpenRouter, scores responses
 on quality/filteredness/speed, returns the best unfiltered answer.
 Usage in execute_code:
-    exec(open(os.path.expanduser("~/.hermes/skills/red-teaming/godmode/scripts/godmode_race.py")).read())
+    exec(open(os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), "skills/red-teaming/godmode/scripts/godmode_race.py")).read())
    result = race_models(
        query="Your query here",
--- a/skills/red-teaming/godmode/scripts/load_godmode.py
+++ b/skills/red-teaming/godmode/scripts/load_godmode.py
@ -3,7 +3,7 @@ Loader for G0DM0D3 scripts. Handles the exec-scoping issues.
 Usage in execute_code:
    exec(open(os.path.expanduser(
-        "~/.hermes/skills/red-teaming/godmode/scripts/load_godmode.py"
+        os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), "skills/red-teaming/godmode/scripts/load_godmode.py")
    )).read())
    # Now all functions are available:
--- a/skills/red-teaming/godmode/scripts/parseltongue.py
+++ b/skills/red-teaming/godmode/scripts/parseltongue.py
@ -11,7 +11,7 @@ Usage:
    python parseltongue.py "How do I hack a WiFi network?" --tier standard
    # As a module in execute_code
-    exec(open("~/.hermes/skills/red-teaming/godmode/scripts/parseltongue.py").read())
+    exec(open(os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), "skills/red-teaming/godmode/scripts/parseltongue.py")).read())
    variants = generate_variants("How do I hack a WiFi network?", tier="standard")
 """
--- a/tests/agent/test_bedrock_adapter.py
+++ b/tests/agent/test_bedrock_adapter.py
--- a/tests/agent/test_bedrock_integration.py
+++ b/tests/agent/test_bedrock_integration.py
@ -0,0 +1,269 @@
 """Integration tests for the AWS Bedrock provider wiring.
 Verifies that the Bedrock provider is correctly registered in the
 provider registry, model catalog, and runtime resolution pipeline.
 These tests do NOT require AWS credentials or boto3 — all AWS calls
 are mocked.
 Note: Tests that import ``hermes_cli.auth`` or ``hermes_cli.runtime_provider``
 require Python 3.10+ due to ``str | None`` type syntax in the import chain.
 """
 import os
 from unittest.mock import MagicMock, patch
 import pytest
 class TestProviderRegistry:
    """Verify Bedrock is registered in PROVIDER_REGISTRY."""
    def test_bedrock_in_registry(self):
        from hermes_cli.auth import PROVIDER_REGISTRY
        assert "bedrock" in PROVIDER_REGISTRY
    def test_bedrock_auth_type_is_aws_sdk(self):
        from hermes_cli.auth import PROVIDER_REGISTRY
        pconfig = PROVIDER_REGISTRY["bedrock"]
        assert pconfig.auth_type == "aws_sdk"
    def test_bedrock_has_no_api_key_env_vars(self):
        """Bedrock uses the AWS SDK credential chain, not API keys."""
        from hermes_cli.auth import PROVIDER_REGISTRY
        pconfig = PROVIDER_REGISTRY["bedrock"]
        assert pconfig.api_key_env_vars == ()
    def test_bedrock_base_url_env_var(self):
        from hermes_cli.auth import PROVIDER_REGISTRY
        pconfig = PROVIDER_REGISTRY["bedrock"]
        assert pconfig.base_url_env_var == "BEDROCK_BASE_URL"
 class TestProviderAliases:
    """Verify Bedrock aliases resolve correctly."""
    def test_aws_alias(self):
        from hermes_cli.models import _PROVIDER_ALIASES
        assert _PROVIDER_ALIASES.get("aws") == "bedrock"
    def test_aws_bedrock_alias(self):
        from hermes_cli.models import _PROVIDER_ALIASES
        assert _PROVIDER_ALIASES.get("aws-bedrock") == "bedrock"
    def test_amazon_bedrock_alias(self):
        from hermes_cli.models import _PROVIDER_ALIASES
        assert _PROVIDER_ALIASES.get("amazon-bedrock") == "bedrock"
    def test_amazon_alias(self):
        from hermes_cli.models import _PROVIDER_ALIASES
        assert _PROVIDER_ALIASES.get("amazon") == "bedrock"
 class TestProviderLabels:
    """Verify Bedrock appears in provider labels."""
    def test_bedrock_label(self):
        from hermes_cli.models import _PROVIDER_LABELS
        assert _PROVIDER_LABELS.get("bedrock") == "AWS Bedrock"
 class TestModelCatalog:
    """Verify Bedrock has a static model fallback list."""
    def test_bedrock_has_curated_models(self):
        from hermes_cli.models import _PROVIDER_MODELS
        models = _PROVIDER_MODELS.get("bedrock", [])
        assert len(models) > 0
    def test_bedrock_models_include_claude(self):
        from hermes_cli.models import _PROVIDER_MODELS
        models = _PROVIDER_MODELS.get("bedrock", [])
        claude_models = [m for m in models if "anthropic.claude" in m]
        assert len(claude_models) > 0
    def test_bedrock_models_include_nova(self):
        from hermes_cli.models import _PROVIDER_MODELS
        models = _PROVIDER_MODELS.get("bedrock", [])
        nova_models = [m for m in models if "amazon.nova" in m]
        assert len(nova_models) > 0
 class TestResolveProvider:
    """Verify resolve_provider() handles bedrock correctly."""
    def test_explicit_bedrock_resolves(self, monkeypatch):
        """When user explicitly requests 'bedrock', it should resolve."""
        from hermes_cli.auth import PROVIDER_REGISTRY
        # bedrock is in the registry, so resolve_provider should return it
        from hermes_cli.auth import resolve_provider
        result = resolve_provider("bedrock")
        assert result == "bedrock"
    def test_aws_alias_resolves_to_bedrock(self):
        from hermes_cli.auth import resolve_provider
        result = resolve_provider("aws")
        assert result == "bedrock"
    def test_amazon_bedrock_alias_resolves(self):
        from hermes_cli.auth import resolve_provider
        result = resolve_provider("amazon-bedrock")
        assert result == "bedrock"
    def test_auto_detect_with_aws_credentials(self, monkeypatch):
        """When AWS credentials are present and no other provider is configured,
        auto-detect should find bedrock."""
        from hermes_cli.auth import resolve_provider
        # Clear all other provider env vars
        for var in ["OPENAI_API_KEY", "OPENROUTER_API_KEY", "ANTHROPIC_API_KEY",
                     "ANTHROPIC_TOKEN", "GOOGLE_API_KEY", "DEEPSEEK_API_KEY"]:
            monkeypatch.delenv(var, raising=False)
        # Set AWS credentials
        monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE")
        monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
        # Mock the auth store to have no active provider
        with patch("hermes_cli.auth._load_auth_store", return_value={}):
            result = resolve_provider("auto")
        assert result == "bedrock"
 class TestRuntimeProvider:
    """Verify resolve_runtime_provider() handles bedrock correctly."""
    def test_bedrock_runtime_resolution(self, monkeypatch):
        from hermes_cli.runtime_provider import resolve_runtime_provider
        monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE")
        monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
        monkeypatch.setenv("AWS_REGION", "eu-west-1")
        # Mock resolve_provider to return bedrock
        with patch("hermes_cli.runtime_provider.resolve_provider", return_value="bedrock"), \
             patch("hermes_cli.runtime_provider._get_model_config", return_value={"provider": "bedrock"}):
            result = resolve_runtime_provider(requested="bedrock")
        assert result["provider"] == "bedrock"
        assert result["api_mode"] == "bedrock_converse"
        assert result["region"] == "eu-west-1"
        assert "bedrock-runtime.eu-west-1.amazonaws.com" in result["base_url"]
        assert result["api_key"] == "aws-sdk"
    def test_bedrock_runtime_default_region(self, monkeypatch):
        from hermes_cli.runtime_provider import resolve_runtime_provider
        monkeypatch.setenv("AWS_PROFILE", "default")
        monkeypatch.delenv("AWS_REGION", raising=False)
        monkeypatch.delenv("AWS_DEFAULT_REGION", raising=False)
        with patch("hermes_cli.runtime_provider.resolve_provider", return_value="bedrock"), \
             patch("hermes_cli.runtime_provider._get_model_config", return_value={"provider": "bedrock"}):
            result = resolve_runtime_provider(requested="bedrock")
        assert result["region"] == "us-east-1"
    def test_bedrock_runtime_no_credentials_raises_on_auto_detect(self, monkeypatch):
        """When bedrock is auto-detected (not explicitly requested) and no
        credentials are found, runtime resolution should raise AuthError."""
        from hermes_cli.runtime_provider import resolve_runtime_provider
        from hermes_cli.auth import AuthError
        # Clear all AWS env vars
        for var in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_PROFILE",
                     "AWS_BEARER_TOKEN_BEDROCK", "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI",
                     "AWS_WEB_IDENTITY_TOKEN_FILE"]:
            monkeypatch.delenv(var, raising=False)
        # Mock both the provider resolution and boto3's credential chain
        mock_session = MagicMock()
        mock_session.get_credentials.return_value = None
        with patch("hermes_cli.runtime_provider.resolve_provider", return_value="bedrock"), \
             patch("hermes_cli.runtime_provider._get_model_config", return_value={"provider": "bedrock"}), \
             patch("hermes_cli.runtime_provider.resolve_requested_provider", return_value="auto"), \
             patch.dict("sys.modules", {"botocore": MagicMock(), "botocore.session": MagicMock()}):
            import botocore.session as _bs
            _bs.get_session = MagicMock(return_value=mock_session)
            with pytest.raises(AuthError, match="No AWS credentials"):
                resolve_runtime_provider(requested="auto")
    def test_bedrock_runtime_explicit_skips_credential_check(self, monkeypatch):
        """When user explicitly requests bedrock, trust boto3's credential chain
        even if env-var detection finds nothing (covers IMDS, SSO, etc.)."""
        from hermes_cli.runtime_provider import resolve_runtime_provider
        # No AWS env vars set — but explicit bedrock request should not raise
        for var in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_PROFILE",
                     "AWS_BEARER_TOKEN_BEDROCK"]:
            monkeypatch.delenv(var, raising=False)
        with patch("hermes_cli.runtime_provider.resolve_provider", return_value="bedrock"), \
             patch("hermes_cli.runtime_provider._get_model_config", return_value={"provider": "bedrock"}):
            result = resolve_runtime_provider(requested="bedrock")
        assert result["provider"] == "bedrock"
        assert result["api_mode"] == "bedrock_converse"
 # ---------------------------------------------------------------------------
 # providers.py integration
 # ---------------------------------------------------------------------------
 class TestProvidersModule:
    """Verify bedrock is wired into hermes_cli/providers.py."""
    def test_bedrock_alias_in_providers(self):
        from hermes_cli.providers import ALIASES
        assert ALIASES.get("bedrock") is None  # "bedrock" IS the canonical name, not an alias
        assert ALIASES.get("aws") == "bedrock"
        assert ALIASES.get("aws-bedrock") == "bedrock"
    def test_bedrock_transport_mapping(self):
        from hermes_cli.providers import TRANSPORT_TO_API_MODE
        assert TRANSPORT_TO_API_MODE.get("bedrock_converse") == "bedrock_converse"
    def test_determine_api_mode_from_bedrock_url(self):
        from hermes_cli.providers import determine_api_mode
        assert determine_api_mode(
            "unknown", "https://bedrock-runtime.us-east-1.amazonaws.com"
        ) == "bedrock_converse"
    def test_label_override(self):
        from hermes_cli.providers import _LABEL_OVERRIDES
        assert _LABEL_OVERRIDES.get("bedrock") == "AWS Bedrock"
 # ---------------------------------------------------------------------------
 # Error classifier integration
 # ---------------------------------------------------------------------------
 class TestErrorClassifierBedrock:
    """Verify Bedrock error patterns are in the global error classifier."""
    def test_throttling_in_rate_limit_patterns(self):
        from agent.error_classifier import _RATE_LIMIT_PATTERNS
        assert "throttlingexception" in _RATE_LIMIT_PATTERNS
    def test_context_overflow_patterns(self):
        from agent.error_classifier import _CONTEXT_OVERFLOW_PATTERNS
        assert "input is too long" in _CONTEXT_OVERFLOW_PATTERNS
 # ---------------------------------------------------------------------------
 # pyproject.toml bedrock extra
 # ---------------------------------------------------------------------------
 class TestPackaging:
    """Verify bedrock optional dependency is declared."""
    def test_bedrock_extra_exists(self):
        import configparser
        from pathlib import Path
        # Read pyproject.toml to verify [bedrock] extra
        toml_path = Path(__file__).parent.parent.parent / "pyproject.toml"
        content = toml_path.read_text()
        assert 'bedrock = ["boto3' in content
    def test_bedrock_in_all_extra(self):
        from pathlib import Path
        content = (Path(__file__).parent.parent.parent / "pyproject.toml").read_text()
        assert '"hermes-agent[bedrock]"' in content
--- a/tests/agent/test_nous_rate_guard.py
+++ b/tests/agent/test_nous_rate_guard.py
@ -0,0 +1,253 @@
 """Tests for agent/nous_rate_guard.py — cross-session Nous Portal rate limit guard."""
 import json
 import os
 import time
 import pytest
@pytest.fixture
 def rate_guard_env(tmp_path, monkeypatch):
    """Isolate rate guard state to a temp directory."""
    hermes_home = str(tmp_path / ".hermes")
    os.makedirs(hermes_home, exist_ok=True)
    monkeypatch.setenv("HERMES_HOME", hermes_home)
    # Clear any cached module-level imports
    return hermes_home
 class TestRecordNousRateLimit:
    """Test recording rate limit state."""
    def test_records_with_header_reset(self, rate_guard_env):
        from agent.nous_rate_guard import record_nous_rate_limit, _state_path
        headers = {"x-ratelimit-reset-requests-1h": "1800"}
        record_nous_rate_limit(headers=headers)
        path = _state_path()
        assert os.path.exists(path)
        with open(path) as f:
            state = json.load(f)
        assert state["reset_seconds"] == pytest.approx(1800, abs=2)
        assert state["reset_at"] > time.time()
    def test_records_with_per_minute_header(self, rate_guard_env):
        from agent.nous_rate_guard import record_nous_rate_limit, _state_path
        headers = {"x-ratelimit-reset-requests": "45"}
        record_nous_rate_limit(headers=headers)
        with open(_state_path()) as f:
            state = json.load(f)
        assert state["reset_seconds"] == pytest.approx(45, abs=2)
    def test_records_with_retry_after_header(self, rate_guard_env):
        from agent.nous_rate_guard import record_nous_rate_limit, _state_path
        headers = {"retry-after": "60"}
        record_nous_rate_limit(headers=headers)
        with open(_state_path()) as f:
            state = json.load(f)
        assert state["reset_seconds"] == pytest.approx(60, abs=2)
    def test_prefers_hourly_over_per_minute(self, rate_guard_env):
        from agent.nous_rate_guard import record_nous_rate_limit, _state_path
        headers = {
            "x-ratelimit-reset-requests-1h": "1800",
            "x-ratelimit-reset-requests": "45",
        }
        record_nous_rate_limit(headers=headers)
        with open(_state_path()) as f:
            state = json.load(f)
        # Should use the hourly value, not the per-minute one
        assert state["reset_seconds"] == pytest.approx(1800, abs=2)
    def test_falls_back_to_error_context_reset_at(self, rate_guard_env):
        from agent.nous_rate_guard import record_nous_rate_limit, _state_path
        future_reset = time.time() + 900
        record_nous_rate_limit(
            headers=None,
            error_context={"reset_at": future_reset},
        )
        with open(_state_path()) as f:
            state = json.load(f)
        assert state["reset_at"] == pytest.approx(future_reset, abs=1)
    def test_falls_back_to_default_cooldown(self, rate_guard_env):
        from agent.nous_rate_guard import record_nous_rate_limit, _state_path
        record_nous_rate_limit(headers=None)
        with open(_state_path()) as f:
            state = json.load(f)
        # Default is 300 seconds (5 minutes)
        assert state["reset_seconds"] == pytest.approx(300, abs=2)
    def test_custom_default_cooldown(self, rate_guard_env):
        from agent.nous_rate_guard import record_nous_rate_limit, _state_path
        record_nous_rate_limit(headers=None, default_cooldown=120.0)
        with open(_state_path()) as f:
            state = json.load(f)
        assert state["reset_seconds"] == pytest.approx(120, abs=2)
    def test_creates_directory_if_missing(self, rate_guard_env):
        from agent.nous_rate_guard import record_nous_rate_limit, _state_path
        record_nous_rate_limit(headers={"retry-after": "10"})
        assert os.path.exists(_state_path())
 class TestNousRateLimitRemaining:
    """Test checking remaining rate limit time."""
    def test_returns_none_when_no_file(self, rate_guard_env):
        from agent.nous_rate_guard import nous_rate_limit_remaining
        assert nous_rate_limit_remaining() is None
    def test_returns_remaining_seconds_when_active(self, rate_guard_env):
        from agent.nous_rate_guard import record_nous_rate_limit, nous_rate_limit_remaining
        record_nous_rate_limit(headers={"x-ratelimit-reset-requests-1h": "600"})
        remaining = nous_rate_limit_remaining()
        assert remaining is not None
        assert 595 < remaining <= 605  # ~600 seconds, allowing for test execution time
    def test_returns_none_when_expired(self, rate_guard_env):
        from agent.nous_rate_guard import nous_rate_limit_remaining, _state_path
        # Write an already-expired state
        state_dir = os.path.dirname(_state_path())
        os.makedirs(state_dir, exist_ok=True)
        with open(_state_path(), "w") as f:
            json.dump({"reset_at": time.time() - 10, "recorded_at": time.time() - 100}, f)
        assert nous_rate_limit_remaining() is None
        # File should be cleaned up
        assert not os.path.exists(_state_path())
    def test_handles_corrupt_file(self, rate_guard_env):
        from agent.nous_rate_guard import nous_rate_limit_remaining, _state_path
        state_dir = os.path.dirname(_state_path())
        os.makedirs(state_dir, exist_ok=True)
        with open(_state_path(), "w") as f:
            f.write("not valid json{{{")
        assert nous_rate_limit_remaining() is None
 class TestClearNousRateLimit:
    """Test clearing rate limit state."""
    def test_clears_existing_file(self, rate_guard_env):
        from agent.nous_rate_guard import (
            record_nous_rate_limit,
            clear_nous_rate_limit,
            nous_rate_limit_remaining,
            _state_path,
        )
        record_nous_rate_limit(headers={"retry-after": "600"})
        assert nous_rate_limit_remaining() is not None
        clear_nous_rate_limit()
        assert nous_rate_limit_remaining() is None
        assert not os.path.exists(_state_path())
    def test_clear_when_no_file(self, rate_guard_env):
        from agent.nous_rate_guard import clear_nous_rate_limit
        # Should not raise
        clear_nous_rate_limit()
 class TestFormatRemaining:
    """Test human-readable duration formatting."""
    def test_seconds(self):
        from agent.nous_rate_guard import format_remaining
        assert format_remaining(30) == "30s"
    def test_minutes(self):
        from agent.nous_rate_guard import format_remaining
        assert format_remaining(125) == "2m 5s"
    def test_exact_minutes(self):
        from agent.nous_rate_guard import format_remaining
        assert format_remaining(120) == "2m"
    def test_hours(self):
        from agent.nous_rate_guard import format_remaining
        assert format_remaining(3720) == "1h 2m"
 class TestParseResetSeconds:
    """Test header parsing for reset times."""
    def test_case_insensitive_headers(self, rate_guard_env):
        from agent.nous_rate_guard import _parse_reset_seconds
        headers = {"X-Ratelimit-Reset-Requests-1h": "1200"}
        assert _parse_reset_seconds(headers) == 1200.0
    def test_returns_none_for_empty_headers(self):
        from agent.nous_rate_guard import _parse_reset_seconds
        assert _parse_reset_seconds(None) is None
        assert _parse_reset_seconds({}) is None
    def test_ignores_zero_values(self):
        from agent.nous_rate_guard import _parse_reset_seconds
        headers = {"x-ratelimit-reset-requests-1h": "0"}
        assert _parse_reset_seconds(headers) is None
    def test_ignores_invalid_values(self):
        from agent.nous_rate_guard import _parse_reset_seconds
        headers = {"x-ratelimit-reset-requests-1h": "not-a-number"}
        assert _parse_reset_seconds(headers) is None
 class TestAuxiliaryClientIntegration:
    """Test that the auxiliary client respects the rate guard."""
    def test_try_nous_skips_when_rate_limited(self, rate_guard_env, monkeypatch):
        from agent.nous_rate_guard import record_nous_rate_limit
        # Record a rate limit
        record_nous_rate_limit(headers={"retry-after": "600"})
        # Mock _read_nous_auth to return valid creds (would normally succeed)
        import agent.auxiliary_client as aux
        monkeypatch.setattr(aux, "_read_nous_auth", lambda: {
            "access_token": "test-token",
            "inference_base_url": "https://api.nous.test/v1",
        })
        result = aux._try_nous()
        assert result == (None, None)
    def test_try_nous_works_when_not_rate_limited(self, rate_guard_env, monkeypatch):
        import agent.auxiliary_client as aux
        # No rate limit recorded — _try_nous should proceed normally
        # (will return None because no real creds, but won't be blocked
        # by the rate guard)
        monkeypatch.setattr(aux, "_read_nous_auth", lambda: None)
        result = aux._try_nous()
        assert result == (None, None)
--- a/tests/agent/test_proxy_and_url_validation.py
+++ b/tests/agent/test_proxy_and_url_validation.py
@ -0,0 +1,60 @@
 """Tests for malformed proxy env var and base URL validation.
 Salvaged from PR #6403 by MestreY0d4-Uninter — validates that the agent
 surfaces clear errors instead of cryptic httpx ``Invalid port`` exceptions
 when proxy env vars or custom endpoint URLs are malformed.
 """
 from __future__ import annotations
 import pytest
 from agent.auxiliary_client import _validate_base_url, _validate_proxy_env_urls
 # -- proxy env validation ------------------------------------------------
 def test_proxy_env_accepts_normal_values(monkeypatch):
    monkeypatch.setenv("HTTP_PROXY", "http://127.0.0.1:6153")
    monkeypatch.setenv("HTTPS_PROXY", "https://proxy.example.com:8443")
    monkeypatch.setenv("ALL_PROXY", "socks5://127.0.0.1:1080")
    _validate_proxy_env_urls()  # should not raise
 def test_proxy_env_accepts_empty(monkeypatch):
    monkeypatch.delenv("HTTP_PROXY", raising=False)
    monkeypatch.delenv("HTTPS_PROXY", raising=False)
    monkeypatch.delenv("ALL_PROXY", raising=False)
    monkeypatch.delenv("http_proxy", raising=False)
    monkeypatch.delenv("https_proxy", raising=False)
    monkeypatch.delenv("all_proxy", raising=False)
    _validate_proxy_env_urls()  # should not raise
@pytest.mark.parametrize("key", [
    "HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY",
    "http_proxy", "https_proxy", "all_proxy",
 ])
 def test_proxy_env_rejects_malformed_port(monkeypatch, key):
    monkeypatch.setenv(key, "http://127.0.0.1:6153export")
    with pytest.raises(RuntimeError, match=rf"Malformed proxy environment variable {key}=.*6153export"):
        _validate_proxy_env_urls()
 # -- base URL validation -------------------------------------------------
@pytest.mark.parametrize("url", [
    "https://api.example.com/v1",
    "http://127.0.0.1:6153/v1",
    "acp://copilot",
    "",
    None,
 ])
 def test_base_url_accepts_valid(url):
    _validate_base_url(url)  # should not raise
 def test_base_url_rejects_malformed_port():
    with pytest.raises(RuntimeError, match="Malformed custom endpoint URL"):
        _validate_base_url("http://127.0.0.1:6153export")
--- a/tests/agent/test_redact.py
+++ b/tests/agent/test_redact.py
@ -284,3 +284,95 @@ class TestElevenLabsTavilyExaKeys:
        assert "XYZ789abcdef" not in result
        assert "HOME=/home/user" in result
        assert "SHELL=/bin/bash" in result
 class TestJWTTokens:
    """JWT tokens start with eyJ (base64 for '{') and have dot-separated parts."""
    def test_full_3part_jwt(self):
        text = (
            "Token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9"
            ".eyJpc3MiOiI0MjNiZDJkYjg4MjI0MDAwIn0"
            ".Gxgv0rru-_kS-I_60EJ7CENTnBh9UeuL3QhkMoQ-VnM"
        )
        result = redact_sensitive_text(text)
        assert "Token:" in result
        # Payload and signature must not survive
        assert "eyJpc3Mi" not in result
        assert "Gxgv0rru" not in result
    def test_2part_jwt(self):
        text = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0"
        result = redact_sensitive_text(text)
        assert "eyJzdWIi" not in result
    def test_standalone_jwt_header(self):
        text = "leaked header: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9 here"
        result = redact_sensitive_text(text)
        assert "IkpXVCJ9" not in result
        assert "leaked header:" in result
    def test_jwt_with_base64_padding(self):
        text = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0=.abc123def456ghij"
        result = redact_sensitive_text(text)
        assert "abc123def456" not in result
    def test_short_eyj_not_matched(self):
        """eyJ followed by fewer than 10 base64 chars should not match."""
        text = "eyJust a normal word"
        assert redact_sensitive_text(text) == text
    def test_jwt_preserves_surrounding_text(self):
        text = "before eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0 after"
        result = redact_sensitive_text(text)
        assert result.startswith("before ")
        assert result.endswith(" after")
    def test_home_assistant_jwt_in_memory(self):
        """Real-world pattern: HA token stored in agent memory block."""
        text = (
            "Home Assistant API Token: "
            "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9"
            ".eyJpc3MiOiJhYmNkZWYiLCJleHAiOjE3NzQ5NTcxMDN9"
            ".Gxgv0rru-_kS-I_60EJ7CENTnBh9UeuL3QhkMoQ-VnM"
        )
        result = redact_sensitive_text(text)
        assert "Home Assistant API Token:" in result
        assert "Gxgv0rru" not in result
        assert "..." in result
 class TestDiscordMentions:
    """Discord snowflake IDs in <@ID> or <@!ID> format."""
    def test_normal_mention(self):
        result = redact_sensitive_text("Hello <@222589316709220353>")
        assert "222589316709220353" not in result
        assert "<@***>" in result
    def test_nickname_mention(self):
        result = redact_sensitive_text("Ping <@!1331549159177846844>")
        assert "1331549159177846844" not in result
        assert "<@!***>" in result
    def test_multiple_mentions(self):
        text = "<@111111111111111111> and <@222222222222222222>"
        result = redact_sensitive_text(text)
        assert "111111111111111111" not in result
        assert "222222222222222222" not in result
    def test_short_id_not_matched(self):
        """IDs shorter than 17 digits are not Discord snowflakes."""
        text = "<@12345>"
        assert redact_sensitive_text(text) == text
    def test_slack_mention_not_matched(self):
        """Slack mentions use letters, not pure digits."""
        text = "<@U024BE7LH>"
        assert redact_sensitive_text(text) == text
    def test_preserves_surrounding_text(self):
        text = "User <@222589316709220353> said hello"
        result = redact_sensitive_text(text)
        assert result.startswith("User ")
        assert result.endswith(" said hello")
--- a/tests/gateway/test_config.py
+++ b/tests/gateway/test_config.py
@ -193,6 +193,67 @@ class TestLoadGatewayConfig:
        assert config.thread_sessions_per_user is False
    def test_bridges_discord_channel_prompts_from_config_yaml(self, tmp_path, monkeypatch):
        hermes_home = tmp_path / ".hermes"
        hermes_home.mkdir()
        config_path = hermes_home / "config.yaml"
        config_path.write_text(
            "discord:\n"
            "  channel_prompts:\n"
            "    \"123\": Research mode\n"
            "    456: Therapist mode\n",
            encoding="utf-8",
        )
        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
        config = load_gateway_config()
        assert config.platforms[Platform.DISCORD].extra["channel_prompts"] == {
            "123": "Research mode",
            "456": "Therapist mode",
        }
    def test_bridges_telegram_channel_prompts_from_config_yaml(self, tmp_path, monkeypatch):
        hermes_home = tmp_path / ".hermes"
        hermes_home.mkdir()
        config_path = hermes_home / "config.yaml"
        config_path.write_text(
            "telegram:\n"
            "  channel_prompts:\n"
            '    "-1001234567": Research assistant\n'
            "    789: Creative writing\n",
            encoding="utf-8",
        )
        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
        config = load_gateway_config()
        assert config.platforms[Platform.TELEGRAM].extra["channel_prompts"] == {
            "-1001234567": "Research assistant",
            "789": "Creative writing",
        }
    def test_bridges_slack_channel_prompts_from_config_yaml(self, tmp_path, monkeypatch):
        hermes_home = tmp_path / ".hermes"
        hermes_home.mkdir()
        config_path = hermes_home / "config.yaml"
        config_path.write_text(
            "slack:\n"
            "  channel_prompts:\n"
            '    "C01ABC": Code review mode\n',
            encoding="utf-8",
        )
        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
        config = load_gateway_config()
        assert config.platforms[Platform.SLACK].extra["channel_prompts"] == {
            "C01ABC": "Code review mode",
        }
    def test_invalid_quick_commands_in_config_yaml_are_ignored(self, tmp_path, monkeypatch):
        hermes_home = tmp_path / ".hermes"
        hermes_home.mkdir()
--- a/tests/gateway/test_discord_channel_prompts.py
+++ b/tests/gateway/test_discord_channel_prompts.py
@ -0,0 +1,259 @@
 """Tests for Discord channel_prompts resolution and injection."""
 import sys
 import threading
 import types
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock
 import pytest
 def _ensure_discord_mock():
    if "discord" in sys.modules and hasattr(sys.modules["discord"], "__file__"):
        return
    discord_mod = types.ModuleType("discord")
    discord_mod.Intents = MagicMock()
    discord_mod.Intents.default.return_value = MagicMock()
    discord_mod.DMChannel = type("DMChannel", (), {})
    discord_mod.Thread = type("Thread", (), {})
    discord_mod.ForumChannel = type("ForumChannel", (), {})
    discord_mod.Interaction = object
    ext_mod = MagicMock()
    commands_mod = MagicMock()
    commands_mod.Bot = MagicMock
    ext_mod.commands = commands_mod
    sys.modules.setdefault("discord", discord_mod)
    sys.modules.setdefault("discord.ext", ext_mod)
    sys.modules.setdefault("discord.ext.commands", commands_mod)
 import gateway.run as gateway_run
 from gateway.config import Platform
 from gateway.platforms.base import MessageEvent
 from gateway.session import SessionSource
 class _CapturingAgent:
    last_init = None
    def __init__(self, *args, **kwargs):
        type(self).last_init = dict(kwargs)
        self.tools = []
    def run_conversation(self, user_message, conversation_history=None, task_id=None, persist_user_message=None):
        return {
            "final_response": "ok",
            "messages": [],
            "api_calls": 1,
            "completed": True,
        }
 def _install_fake_agent(monkeypatch):
    fake_run_agent = types.ModuleType("run_agent")
    fake_run_agent.AIAgent = _CapturingAgent
    monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
 def _make_adapter():
    _ensure_discord_mock()
    from gateway.platforms.discord import DiscordAdapter
    adapter = object.__new__(DiscordAdapter)
    adapter.config = MagicMock()
    adapter.config.extra = {}
    return adapter
 def _make_runner():
    runner = object.__new__(gateway_run.GatewayRunner)
    runner.adapters = {}
    runner._ephemeral_system_prompt = "Global prompt"
    runner._prefill_messages = []
    runner._reasoning_config = None
    runner._service_tier = None
    runner._provider_routing = {}
    runner._fallback_model = None
    runner._smart_model_routing = {}
    runner._running_agents = {}
    runner._pending_model_notes = {}
    runner._session_db = None
    runner._agent_cache = {}
    runner._agent_cache_lock = threading.Lock()
    runner._session_model_overrides = {}
    runner.hooks = SimpleNamespace(loaded_hooks=False)
    runner.config = SimpleNamespace(streaming=None)
    runner.session_store = SimpleNamespace(
        get_or_create_session=lambda source: SimpleNamespace(session_id="session-1"),
        load_transcript=lambda session_id: [],
    )
    runner._get_or_create_gateway_honcho = lambda session_key: (None, None)
    runner._enrich_message_with_vision = AsyncMock(return_value="ENRICHED")
    return runner
 def _make_source() -> SessionSource:
    return SessionSource(
        platform=Platform.DISCORD,
        chat_id="12345",
        chat_type="thread",
        user_id="user-1",
    )
 class TestResolveChannelPrompts:
    def test_no_prompt_returns_none(self):
        adapter = _make_adapter()
        assert adapter._resolve_channel_prompt("123") is None
    def test_match_by_channel_id(self):
        adapter = _make_adapter()
        adapter.config.extra = {"channel_prompts": {"100": "Research mode"}}
        assert adapter._resolve_channel_prompt("100") == "Research mode"
    def test_numeric_yaml_keys_normalized_at_config_load(self):
        """Numeric YAML keys are normalized to strings by config bridging.
        The resolver itself expects string keys (config.py handles normalization),
        so raw numeric keys will not match — this is intentional.
        """
        adapter = _make_adapter()
        # Simulates post-bridging state: keys are already strings
        adapter.config.extra = {"channel_prompts": {"100": "Research mode"}}
        assert adapter._resolve_channel_prompt("100") == "Research mode"
        # Pre-bridging numeric key would not match (bridging is responsible)
        adapter.config.extra = {"channel_prompts": {100: "Research mode"}}
        assert adapter._resolve_channel_prompt("100") is None
    def test_match_by_parent_id(self):
        adapter = _make_adapter()
        adapter.config.extra = {"channel_prompts": {"200": "Forum prompt"}}
        assert adapter._resolve_channel_prompt("999", parent_id="200") == "Forum prompt"
    def test_exact_channel_overrides_parent(self):
        adapter = _make_adapter()
        adapter.config.extra = {
            "channel_prompts": {
                "999": "Thread override",
                "200": "Forum prompt",
            }
        }
        assert adapter._resolve_channel_prompt("999", parent_id="200") == "Thread override"
    def test_build_message_event_sets_channel_prompt(self):
        adapter = _make_adapter()
        adapter.config.extra = {"channel_prompts": {"321": "Command prompt"}}
        adapter.build_source = MagicMock(return_value=SimpleNamespace())
        interaction = SimpleNamespace(
            channel_id=321,
            channel=SimpleNamespace(name="general", guild=None, parent_id=None),
            user=SimpleNamespace(id=1, display_name="Brenner"),
        )
        adapter._get_effective_topic = MagicMock(return_value=None)
        event = adapter._build_slash_event(interaction, "/retry")
        assert event.channel_prompt == "Command prompt"
    @pytest.mark.asyncio
    async def test_dispatch_thread_session_inherits_parent_channel_prompt(self):
        adapter = _make_adapter()
        adapter.config.extra = {"channel_prompts": {"200": "Parent prompt"}}
        adapter.build_source = MagicMock(return_value=SimpleNamespace())
        adapter._get_effective_topic = MagicMock(return_value=None)
        adapter.handle_message = AsyncMock()
        interaction = SimpleNamespace(
            guild=SimpleNamespace(name="Wetlands"),
            channel=SimpleNamespace(id=200, parent=None),
            user=SimpleNamespace(id=1, display_name="Brenner"),
        )
        await adapter._dispatch_thread_session(interaction, "999", "new-thread", "hello")
        dispatched_event = adapter.handle_message.await_args.args[0]
        assert dispatched_event.channel_prompt == "Parent prompt"
    def test_blank_prompts_are_ignored(self):
        adapter = _make_adapter()
        adapter.config.extra = {"channel_prompts": {"100": "   "}}
        assert adapter._resolve_channel_prompt("100") is None
@pytest.mark.asyncio
 async def test_retry_preserves_channel_prompt(monkeypatch):
    runner = _make_runner()
    runner.session_store = SimpleNamespace(
        get_or_create_session=lambda source: SimpleNamespace(session_id="session-1", last_prompt_tokens=10),
        load_transcript=lambda session_id: [
            {"role": "user", "content": "original message"},
            {"role": "assistant", "content": "old reply"},
        ],
        rewrite_transcript=MagicMock(),
    )
    runner._handle_message = AsyncMock(return_value="ok")
    event = MessageEvent(
        text="/retry",
        message_type=gateway_run.MessageType.COMMAND,
        source=_make_source(),
        raw_message=SimpleNamespace(),
        channel_prompt="Channel prompt",
    )
    result = await runner._handle_retry_command(event)
    assert result == "ok"
    retried_event = runner._handle_message.await_args.args[0]
    assert retried_event.channel_prompt == "Channel prompt"
@pytest.mark.asyncio
 async def test_run_agent_appends_channel_prompt_to_ephemeral_system_prompt(monkeypatch, tmp_path):
    _install_fake_agent(monkeypatch)
    runner = _make_runner()
    (tmp_path / "config.yaml").write_text("agent:\n  system_prompt: Global prompt\n", encoding="utf-8")
    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
    monkeypatch.setattr(gateway_run, "_env_path", tmp_path / ".env")
    monkeypatch.setattr(gateway_run, "load_dotenv", lambda *args, **kwargs: None)
    monkeypatch.setattr(gateway_run, "_load_gateway_config", lambda: {})
    monkeypatch.setattr(gateway_run, "_resolve_gateway_model", lambda config=None: "gpt-5.4")
    monkeypatch.setattr(
        gateway_run,
        "_resolve_runtime_agent_kwargs",
        lambda: {
            "provider": "openrouter",
            "api_mode": "chat_completions",
            "base_url": "https://openrouter.ai/api/v1",
            "api_key": "***",
        },
    )
    import hermes_cli.tools_config as tools_config
    monkeypatch.setattr(tools_config, "_get_platform_tools", lambda user_config, platform_key: {"core"})
    _CapturingAgent.last_init = None
    event = MessageEvent(
        text="hi",
        source=_make_source(),
        message_id="m1",
        channel_prompt="Channel prompt",
    )
    result = await runner._run_agent(
        message="hi",
        context_prompt="Context prompt",
        history=[],
        source=_make_source(),
        session_id="session-1",
        session_key="agent:main:discord:thread:12345",
        channel_prompt=event.channel_prompt,
    )
    assert result["final_response"] == "ok"
    assert _CapturingAgent.last_init["ephemeral_system_prompt"] == (
        "Context prompt\n\nChannel prompt\n\nGlobal prompt"
    )
--- a/tests/hermes_cli/test_config.py
+++ b/tests/hermes_cli/test_config.py
@ -459,7 +459,7 @@ class TestCustomProviderCompatibility:
            migrate_config(interactive=False, quiet=True)
            raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
-        assert raw["_config_version"] == 17
+        assert raw["_config_version"] == 18
        assert raw["providers"]["openai-direct"] == {
            "api": "https://api.openai.com/v1",
            "api_key": "test-key",
@ -606,6 +606,26 @@ class TestInterimAssistantMessageConfig:
            migrate_config(interactive=False, quiet=True)
            raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
-        assert raw["_config_version"] == 17
+        assert raw["_config_version"] == 18
        assert raw["display"]["tool_progress"] == "off"
        assert raw["display"]["interim_assistant_messages"] is True
 class TestDiscordChannelPromptsConfig:
    def test_default_config_includes_discord_channel_prompts(self):
        assert DEFAULT_CONFIG["discord"]["channel_prompts"] == {}
    def test_migrate_adds_discord_channel_prompts_default(self, tmp_path):
        config_path = tmp_path / "config.yaml"
        config_path.write_text(
            yaml.safe_dump({"_config_version": 17, "discord": {"auto_thread": True}}),
            encoding="utf-8",
        )
        with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
            migrate_config(interactive=False, quiet=True)
            raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
        assert raw["_config_version"] == 18
        assert raw["discord"]["auto_thread"] is True
        assert raw["discord"]["channel_prompts"] == {}
--- a/tests/tools/test_browser_camofox_state.py
+++ b/tests/tools/test_browser_camofox_state.py
@ -64,4 +64,4 @@ class TestCamofoxConfigDefaults:
        # The current schema version is tracked globally; unrelated default
        # options may bump it after browser defaults are added.
-        assert DEFAULT_CONFIG["_config_version"] == 17
+        assert DEFAULT_CONFIG["_config_version"] == 18
--- a/tools/skill_manager_tool.py
+++ b/tools/skill_manager_tool.py
@ -82,6 +82,18 @@ SKILLS_DIR = HERMES_HOME / "skills"
 MAX_NAME_LENGTH = 64
 MAX_DESCRIPTION_LENGTH = 1024
 def _is_local_skill(skill_path: Path) -> bool:
    """Check if a skill path is within the local SKILLS_DIR.
    Skills found in external_dirs are read-only from the agent's perspective.
    """
    try:
        skill_path.resolve().relative_to(SKILLS_DIR.resolve())
        return True
    except ValueError:
        return False
 MAX_SKILL_CONTENT_CHARS = 100_000   # ~36k tokens at 2.75 chars/token
 MAX_SKILL_FILE_BYTES = 1_048_576    # 1 MiB per supporting file
@ -360,6 +372,9 @@ def _edit_skill(name: str, content: str) -> Dict[str, Any]:
    if not existing:
        return {"success": False, "error": f"Skill '{name}' not found. Use skills_list() to see available skills."}
    if not _is_local_skill(existing["path"]):
        return {"success": False, "error": f"Skill '{name}' is in an external directory and cannot be modified. Copy it to your local skills directory first."}
    skill_md = existing["path"] / "SKILL.md"
    # Back up original content for rollback
    original_content = skill_md.read_text(encoding="utf-8") if skill_md.exists() else None
@ -400,6 +415,9 @@ def _patch_skill(
    if not existing:
        return {"success": False, "error": f"Skill '{name}' not found."}
    if not _is_local_skill(existing["path"]):
        return {"success": False, "error": f"Skill '{name}' is in an external directory and cannot be modified. Copy it to your local skills directory first."}
    skill_dir = existing["path"]
    if file_path:
@ -473,6 +491,9 @@ def _delete_skill(name: str) -> Dict[str, Any]:
    if not existing:
        return {"success": False, "error": f"Skill '{name}' not found."}
    if not _is_local_skill(existing["path"]):
        return {"success": False, "error": f"Skill '{name}' is in an external directory and cannot be deleted."}
    skill_dir = existing["path"]
    shutil.rmtree(skill_dir)
@ -515,6 +536,9 @@ def _write_file(name: str, file_path: str, file_content: str) -> Dict[str, Any]:
    if not existing:
        return {"success": False, "error": f"Skill '{name}' not found. Create it first with action='create'."}
    if not _is_local_skill(existing["path"]):
        return {"success": False, "error": f"Skill '{name}' is in an external directory and cannot be modified. Copy it to your local skills directory first."}
    target, err = _resolve_skill_target(existing["path"], file_path)
    if err:
        return {"success": False, "error": err}
@ -548,6 +572,10 @@ def _remove_file(name: str, file_path: str) -> Dict[str, Any]:
    existing = _find_skill(name)
    if not existing:
        return {"success": False, "error": f"Skill '{name}' not found."}
    if not _is_local_skill(existing["path"]):
        return {"success": False, "error": f"Skill '{name}' is in an external directory and cannot be modified."}
    skill_dir = existing["path"]
    target, err = _resolve_skill_target(skill_dir, file_path)
--- a/website/docs/guides/aws-bedrock.md
+++ b/website/docs/guides/aws-bedrock.md
@ -0,0 +1,164 @@
 ---
 sidebar_position: 14
 title: "AWS Bedrock"
 description: "Use Hermes Agent with Amazon Bedrock — native Converse API, IAM authentication, Guardrails, and cross-region inference"
 ---
 # AWS Bedrock
 Hermes Agent supports Amazon Bedrock as a native provider using the **Converse API** — not the OpenAI-compatible endpoint. This gives you full access to the Bedrock ecosystem: IAM authentication, Guardrails, cross-region inference profiles, and all foundation models.
 ## Prerequisites
 - **AWS credentials** — any source supported by the [boto3 credential chain](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html):
  - IAM instance role (EC2, ECS, Lambda — zero config)
  - `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` environment variables
  - `AWS_PROFILE` for SSO or named profiles
  - `aws configure` for local development
 - **boto3** — install with `pip install hermes-agent[bedrock]`
 - **IAM permissions** — at minimum:
  - `bedrock:InvokeModel` and `bedrock:InvokeModelWithResponseStream` (for inference)
  - `bedrock:ListFoundationModels` and `bedrock:ListInferenceProfiles` (for model discovery)
 :::tip EC2 / ECS / Lambda
 On AWS compute, attach an IAM role with `AmazonBedrockFullAccess` and you're done. No API keys, no `.env` configuration — Hermes detects the instance role automatically.
 :::
 ## Quick Start
 ```bash
 # Install with Bedrock support
 pip install hermes-agent[bedrock]
 # Select Bedrock as your provider
 hermes model
 # → Choose "More providers..." → "AWS Bedrock"
 # → Select your region and model
 # Start chatting
 hermes chat
 ```
 ## Configuration
 After running `hermes model`, your `~/.hermes/config.yaml` will contain:
 ```yaml
 model:
  default: us.anthropic.claude-sonnet-4-6
  provider: bedrock
  base_url: https://bedrock-runtime.us-east-2.amazonaws.com
 bedrock:
  region: us-east-2
 ```
 ### Region
 Set the AWS region in any of these ways (highest priority first):
 1. `bedrock.region` in `config.yaml`
 2. `AWS_REGION` environment variable
 3. `AWS_DEFAULT_REGION` environment variable
 4. Default: `us-east-1`
 ### Guardrails
 To apply [Amazon Bedrock Guardrails](https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails.html) to all model invocations:
 ```yaml
 bedrock:
  region: us-east-2
  guardrail:
    guardrail_identifier: "abc123def456"  # From the Bedrock console
    guardrail_version: "1"                # Version number or "DRAFT"
    stream_processing_mode: "async"       # "sync" or "async"
    trace: "disabled"                     # "enabled", "disabled", or "enabled_full"
 ```
 ### Model Discovery
 Hermes auto-discovers available models via the Bedrock control plane. You can customize discovery:
 ```yaml
 bedrock:
  discovery:
    enabled: true
    provider_filter: ["anthropic", "amazon"]  # Only show these providers
    refresh_interval: 3600                     # Cache for 1 hour
 ```
 ## Available Models
 Bedrock models use **inference profile IDs** for on-demand invocation. The `hermes model` picker shows these automatically, with recommended models at the top:
 | Model | ID | Notes |
 |-------|-----|-------|
 | Claude Sonnet 4.6 | `us.anthropic.claude-sonnet-4-6` | Recommended — best balance of speed and capability |
 | Claude Opus 4.6 | `us.anthropic.claude-opus-4-6-v1` | Most capable |
 | Claude Haiku 4.5 | `us.anthropic.claude-haiku-4-5-20251001-v1:0` | Fastest Claude |
 | Amazon Nova Pro | `us.amazon.nova-pro-v1:0` | Amazon's flagship |
 | Amazon Nova Micro | `us.amazon.nova-micro-v1:0` | Fastest, cheapest |
 | DeepSeek V3.2 | `deepseek.v3.2` | Strong open model |
 | Llama 4 Scout 17B | `us.meta.llama4-scout-17b-instruct-v1:0` | Meta's latest |
 :::info Cross-Region Inference
 Models prefixed with `us.` use cross-region inference profiles, which provide better capacity and automatic failover across AWS regions. Models prefixed with `global.` route across all available regions worldwide.
 :::
 ## Switching Models Mid-Session
 Use the `/model` command during a conversation:
 ```
 /model us.amazon.nova-pro-v1:0
 /model deepseek.v3.2
 /model us.anthropic.claude-opus-4-6-v1
 ```
 ## Diagnostics
 ```bash
 hermes doctor
 ```
 The doctor checks:
 - Whether AWS credentials are available (env vars, IAM role, SSO)
 - Whether `boto3` is installed
 - Whether the Bedrock API is reachable (ListFoundationModels)
 - Number of available models in your region
 ## Gateway (Messaging Platforms)
 Bedrock works with all Hermes gateway platforms (Telegram, Discord, Slack, Feishu, etc.). Configure Bedrock as your provider, then start the gateway normally:
 ```bash
 hermes gateway setup
 hermes gateway start
 ```
 The gateway reads `config.yaml` and uses the same Bedrock provider configuration.
 ## Troubleshooting
 ### "No API key found" / "No AWS credentials"
 Hermes checks for credentials in this order:
 1. `AWS_BEARER_TOKEN_BEDROCK`
 2. `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY`
 3. `AWS_PROFILE`
 4. EC2 instance metadata (IMDS)
 5. ECS container credentials
 6. Lambda execution role
 If none are found, run `aws configure` or attach an IAM role to your compute instance.
 ### "Invocation of model ID ... with on-demand throughput isn't supported"
 Use an **inference profile ID** (prefixed with `us.` or `global.`) instead of the bare foundation model ID. For example:
 - ❌ `anthropic.claude-sonnet-4-6`
 - ✅ `us.anthropic.claude-sonnet-4-6`
 ### "ThrottlingException"
 You've hit the Bedrock per-model rate limit. Hermes automatically retries with backoff. To increase limits, request a quota increase in the [AWS Service Quotas console](https://console.aws.amazon.com/servicequotas/).
--- a/website/docs/user-guide/messaging/discord.md
+++ b/website/docs/user-guide/messaging/discord.md
@ -297,6 +297,7 @@ discord:
  reactions: true                 # Add emoji reactions during processing
  ignored_channels: []            # Channel IDs where bot never responds
  no_thread_channels: []          # Channel IDs where bot responds without threading
  channel_prompts: {}             # Per-channel ephemeral system prompts
 # Session isolation (applies to all gateway platforms, not just Discord)
 group_sessions_per_user: true     # Isolate sessions per user in shared channels
@ -381,6 +382,28 @@ discord:
 Useful for channels dedicated to bot interaction where threads would add unnecessary noise.
 #### `discord.channel_prompts`
 **Type:** mapping — **Default:** `{}`
 Per-channel ephemeral system prompts that are injected on every turn in the matching Discord channel or thread without being persisted to transcript history.
 ```yaml
 discord:
  channel_prompts:
    "1234567890": |
      This channel is for research tasks. Prefer deep comparisons,
      citations, and concise synthesis.
    "9876543210": |
      This forum is for therapy-style support. Be warm, grounded,
      and non-judgmental.
 ```
 Behavior:
 - Exact thread/channel ID matches win.
 - If a message arrives inside a thread or forum post and that thread has no explicit entry, Hermes falls back to the parent channel/forum ID.
 - Prompts are applied ephemerally at runtime, so changing them affects future turns immediately without rewriting past session history.
 #### `group_sessions_per_user`
 **Type:** boolean — **Default:** `true`
--- a/website/docs/user-guide/messaging/mattermost.md
+++ b/website/docs/user-guide/messaging/mattermost.md
@ -281,6 +281,23 @@ If this returns your bot's user info, the token is valid. If it returns an error
 **Fix**: Add your User ID to `MATTERMOST_ALLOWED_USERS` in `~/.hermes/.env` and restart the gateway. Remember: the User ID is a 26-character alphanumeric string, not your `@username`.
 ## Per-Channel Prompts
 Assign ephemeral system prompts to specific Mattermost channels. The prompt is injected at runtime on every turn — never persisted to transcript history — so changes take effect immediately.
 ```yaml
 mattermost:
  channel_prompts:
    "channel_id_abc123": |
      You are a research assistant. Focus on academic sources,
      citations, and concise synthesis.
    "channel_id_def456": |
      Code review mode. Be precise about edge cases and
      performance implications.
 ```
 Keys are Mattermost channel IDs (find them in the channel URL or via the API). All messages in the matching channel get the prompt injected as an ephemeral system instruction.
 ## Security
 :::warning
--- a/website/docs/user-guide/messaging/slack.md
+++ b/website/docs/user-guide/messaging/slack.md
@ -418,6 +418,23 @@ Hermes supports voice on Slack:
 ---
 ## Per-Channel Prompts
 Assign ephemeral system prompts to specific Slack channels. The prompt is injected at runtime on every turn — never persisted to transcript history — so changes take effect immediately.
 ```yaml
 slack:
  channel_prompts:
    "C01RESEARCH": |
      You are a research assistant. Focus on academic sources,
      citations, and concise synthesis.
    "C02ENGINEERING": |
      Code review mode. Be precise about edge cases and
      performance implications.
 ```
 Keys are Slack channel IDs (find them via channel details → "About" → scroll to bottom). All messages in the matching channel get the prompt injected as an ephemeral system instruction.
 ## Troubleshooting
 | Problem | Solution |
--- a/website/docs/user-guide/messaging/telegram.md
+++ b/website/docs/user-guide/messaging/telegram.md
@ -526,6 +526,29 @@ Unlike Discord (where reactions are additive), Telegram's Bot API replaces all b
 If the bot doesn't have permission to add reactions in a group, the reaction calls fail silently and message processing continues normally.
 :::
 ## Per-Channel Prompts
 Assign ephemeral system prompts to specific Telegram groups or forum topics. The prompt is injected at runtime on every turn — never persisted to transcript history — so changes take effect immediately.
 ```yaml
 telegram:
  channel_prompts:
    "-1001234567890": |
      You are a research assistant. Focus on academic sources,
      citations, and concise synthesis.
    "42":  |
      This topic is for creative writing feedback. Be warm and
      constructive.
 ```
 Keys are chat IDs (groups/supergroups) or forum topic IDs. For forum groups, topic-level prompts override the group-level prompt:
 - Message in topic `42` inside group `-1001234567890` → uses topic `42`'s prompt
 - Message in topic `99` (no explicit entry) → falls back to group `-1001234567890`'s prompt
 - Message in a group with no entry → no channel prompt applied
 Numeric YAML keys are automatically normalized to strings.
 ## Troubleshooting
 | Problem | Solution |