Merge 6e6d7c48c8 into 00c3d848d8

2026-04-25 00:51:20 +00:00 · 2026-04-24 22:53:38 +00:00 · 2026-04-24 22:53:38 +00:00 · 0d6cc3bc70
commit 0d6cc3bc70
parent 00c3d848d8 6e6d7c48c8
18 changed files with 868 additions and 45 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -1303,13 +1303,47 @@ def _is_payment_error(exc: Exception) -> bool:
    status = getattr(exc, "status_code", None)
    if status == 402:
        return True
-    err_lower = str(exc).lower()
+
+    parts = [str(exc).lower()]
+    body = getattr(exc, "body", None)
+    if isinstance(body, dict):
+        try:
+            parts.append(json.dumps(body, ensure_ascii=False).lower())
+        except Exception:
+            parts.append(str(body).lower())
+        err_obj = body.get("error", {}) if isinstance(body.get("error"), dict) else {}
+        body_msg = (err_obj.get("message") or body.get("message") or "").lower()
+        if body_msg:
+            parts.append(body_msg)
+        error_code = (err_obj.get("code") or err_obj.get("type") or body.get("code") or body.get("type") or "")
+        if error_code:
+            parts.append(str(error_code).lower())
+        metadata = err_obj.get("metadata", {}) if isinstance(err_obj, dict) else {}
+        raw_json = metadata.get("raw") if isinstance(metadata, dict) else None
+        if isinstance(raw_json, str) and raw_json.strip():
+            try:
+                inner = json.loads(raw_json)
+                if isinstance(inner, dict):
+                    inner_err = inner.get("error", {}) if isinstance(inner.get("error"), dict) else {}
+                    inner_msg = (inner_err.get("message") or inner.get("message") or "").lower()
+                    if inner_msg:
+                        parts.append(inner_msg)
+            except Exception:
+                parts.append(raw_json.lower())
+    elif body is not None:
+        parts.append(str(body).lower())
+
+    err_lower = " ".join(p for p in parts if p)
    # OpenRouter and other providers include "credits" or "afford" in 402 bodies,
    # but sometimes wrap them in 429 or other codes.
    if status in (402, 429, None):
-        if any(kw in err_lower for kw in ("credits", "insufficient funds",
-                                           "can only afford", "billing",
-                                           "payment required")):
+        if any(kw in err_lower for kw in (
+            "credits", "insufficient funds", "insufficient balance",
+            "insufficient_balance", "insufficient_quota",
+            "can only afford", "billing",
+            "payment required", "payment_required",
+            "top up your credits",
+        )):
            return True
    return False

--- a/agent/error_classifier.py
+++ b/agent/error_classifier.py
@ -89,10 +89,13 @@ class ClassifiedError:
 # Patterns that indicate billing exhaustion (not transient rate limit)
 _BILLING_PATTERNS = [
    "insufficient credits",
+    "insufficient balance",
+    "insufficient_balance",
    "insufficient_quota",
    "credit balance",
    "credits have been exhausted",
    "top up your credits",
+    "can only afford",
    "payment required",
    "billing hard limit",
    "exceeded your current quota",
@ -589,6 +592,20 @@ def _classify_by_status(
        )

    if status_code == 429:
+        # Some providers surface billing exhaustion as 429 instead of 402 and may
+        # only expose the billing signal through a structured error code.
+        if error_code:
+            classified = _classify_by_error_code(error_code, error_msg, result_fn)
+            if classified is not None:
+                return classified
+        # Others embed the billing signal only in free-text messages.
+        if any(p in error_msg for p in _BILLING_PATTERNS):
+            return result_fn(
+                FailoverReason.billing,
+                retryable=False,
+                should_rotate_credential=True,
+                should_fallback=True,
+            )
        # Already checked long_context_tier above; this is a normal rate limit
        return result_fn(
            FailoverReason.rate_limit,
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -923,6 +923,10 @@ DEFAULT_CONFIG = {
            "domains": [],
            "shared_files": [],
        },
+        "tool_governance": {
+            "skill_allowed_tools": False,
+            "channel_tool_review": False,
+        },
    },

    "cron": {
@ -3763,6 +3767,8 @@ def show_config():
    
    print(f"  Telegram:     {'configured' if telegram_token else color('not configured', Colors.DIM)}")
    print(f"  Discord:      {'configured' if discord_token else color('not configured', Colors.DIM)}")
+
+    _print_tool_governance_section(config)
    
    # Skill config
    try:
@ -3821,6 +3827,78 @@ def edit_config():
    subprocess.run([editor, str(config_path)])


+def _print_tool_governance_section(config: dict, *, show_commands: bool = False) -> None:
+    print()
+    print(color("◆ Tool Governance", Colors.CYAN, Colors.BOLD))
+    tool_governance = config.get('security', {}).get('tool_governance', {})
+    skill_allowed_tools = tool_governance.get('skill_allowed_tools', False)
+    channel_tool_review = tool_governance.get('channel_tool_review', False)
+
+    print(f"  skill_allowed_tools: {'on' if skill_allowed_tools else color('off', Colors.DIM)}")
+    print(f"  channel_tool_review: {'on' if channel_tool_review else color('off', Colors.DIM)}")
+    print(
+        "  security.tool_governance.skill_allowed_tools"
+        f"  {'on' if skill_allowed_tools else color('off', Colors.DIM)}"
+    )
+    print(
+        "  security.tool_governance.channel_tool_review"
+        f"  {'on' if channel_tool_review else color('off', Colors.DIM)}"
+    )
+
+    if show_commands:
+        print()
+        print("  Presets:")
+        print("    hermes config governance --preset messaging-safe")
+        print("    hermes config governance --preset skill-safe")
+        print("    hermes config governance --preset balanced")
+        print()
+        print("  Enable with:")
+        print("    hermes config set security.tool_governance.skill_allowed_tools true")
+        print("    hermes config set security.tool_governance.channel_tool_review true")
+        print()
+        print("  Disable with:")
+        print("    hermes config set security.tool_governance.skill_allowed_tools false")
+        print("    hermes config set security.tool_governance.channel_tool_review false")
+
+
+
+def _apply_tool_governance_toggles(args) -> None:
+    enable_all = bool(getattr(args, 'enable_all', False))
+    disable_all = bool(getattr(args, 'disable_all', False))
+    enable_skill_allowed_tools = bool(getattr(args, 'enable_skill_allowed_tools', False))
+    disable_skill_allowed_tools = bool(getattr(args, 'disable_skill_allowed_tools', False))
+    enable_channel_review = bool(getattr(args, 'enable_channel_review', False))
+    disable_channel_review = bool(getattr(args, 'disable_channel_review', False))
+    preset = getattr(args, 'preset', None)
+
+    if preset == 'messaging-safe':
+        enable_channel_review = True
+        disable_skill_allowed_tools = True
+    elif preset == 'skill-safe':
+        enable_skill_allowed_tools = True
+        disable_channel_review = True
+    elif preset == 'balanced':
+        enable_skill_allowed_tools = True
+        enable_channel_review = True
+
+    if enable_all:
+        enable_skill_allowed_tools = True
+        enable_channel_review = True
+    if disable_all:
+        disable_skill_allowed_tools = True
+        disable_channel_review = True
+
+    if enable_skill_allowed_tools:
+        set_config_value("security.tool_governance.skill_allowed_tools", "true")
+    elif disable_skill_allowed_tools:
+        set_config_value("security.tool_governance.skill_allowed_tools", "false")
+
+    if enable_channel_review:
+        set_config_value("security.tool_governance.channel_tool_review", "true")
+    elif disable_channel_review:
+        set_config_value("security.tool_governance.channel_tool_review", "false")
+
+
 def set_config_value(key: str, value: str):
    """Set a configuration value."""
    if is_managed():
@ -3931,6 +4009,8 @@ def config_command(args):
            print("Examples:")
            print("  hermes config set model anthropic/claude-sonnet-4")
            print("  hermes config set terminal.backend docker")
+            print("  hermes config set security.tool_governance.skill_allowed_tools true")
+            print("  hermes config set security.tool_governance.channel_tool_review true")
            print("  hermes config set OPENROUTER_API_KEY sk-or-...")
            sys.exit(1)
        set_config_value(key, value)
@ -4032,8 +4112,17 @@ def config_command(args):
            print()
            print(color(f"  {len(missing_config)} new config option(s) available", Colors.YELLOW))
            print("    Run 'hermes config migrate' to add them")
+
+        _print_tool_governance_section(load_config())
        
        print()
+
+    elif subcmd == "governance":
+        print()
+        print(color("🛡 Tool Governance", Colors.CYAN, Colors.BOLD))
+        _apply_tool_governance_toggles(args)
+        _print_tool_governance_section(load_config(), show_commands=True)
+        print()
    
    else:
        print(f"Unknown config command: {subcmd}")
@ -4043,6 +4132,7 @@ def config_command(args):
        print("  hermes config edit      Open config in editor")
        print("  hermes config set <key> <value>   Set a config value")
        print("  hermes config check     Check for missing/outdated config")
+        print("  hermes config governance Show tool governance settings")
        print("  hermes config migrate   Update config with new options")
        print("  hermes config path      Show config file path")
        print("  hermes config env-path  Show .env file path")
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@ -130,6 +130,27 @@ def check_info(text: str):
    print(f"    {color('→', Colors.CYAN)} {text}")


+def _report_tool_governance(config: dict) -> None:
+    tool_governance = (config.get("security") or {}).get("tool_governance") or {}
+
+    print()
+    print(color("◆ Tool Governance", Colors.CYAN, Colors.BOLD))
+
+    skill_allowed_tools = tool_governance.get("skill_allowed_tools", False)
+    channel_tool_review = tool_governance.get("channel_tool_review", False)
+    check_ok(
+        f"Skill allowed-tools enforcement: {'enabled' if skill_allowed_tools else 'disabled'}",
+        "(config.yaml)",
+    )
+    check_ok(
+        f"Channel tool review: {'enabled' if channel_tool_review else 'disabled'}",
+        "(config.yaml)",
+    )
+
+    if not skill_allowed_tools and not channel_tool_review:
+        check_info("Both governance policies are advisory/off by default until explicitly enabled")
+
+
 def _check_gateway_service_linger(issues: list[str]) -> None:
    """Warn when a systemd user gateway service will stop after logout."""
    try:
@ -465,6 +486,14 @@ def run_doctor(args):
        except Exception:
            pass

+        try:
+            import yaml
+            with open(config_path, encoding="utf-8") as f:
+                raw_config = yaml.safe_load(f) or {}
+            _report_tool_governance(raw_config)
+        except Exception:
+            pass
+
    # =========================================================================
    # Check: Auth providers
    # =========================================================================
@ -937,9 +966,10 @@ def run_doctor(args):
        ("Hugging Face",     ("HF_TOKEN",),                                   "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
        ("NVIDIA NIM",       ("NVIDIA_API_KEY",),                             "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
        ("Alibaba/DashScope", ("DASHSCOPE_API_KEY",),                         "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
-        # MiniMax: the /anthropic endpoint doesn't support /models, but the /v1 endpoint does.
-        ("MiniMax",          ("MINIMAX_API_KEY",),                            "https://api.minimax.io/v1/models",    "MINIMAX_BASE_URL", True),
-        ("MiniMax (China)",  ("MINIMAX_CN_API_KEY",),                         "https://api.minimaxi.com/v1/models",  "MINIMAX_CN_BASE_URL", True),
+        # MiniMax providers do not expose a shared /models endpoint; skip live health
+        # probes here and rely on runtime requests / static catalog validation instead.
+        ("MiniMax",          ("MINIMAX_API_KEY",),                            None,                                  "MINIMAX_BASE_URL", False),
+        ("MiniMax (China)",  ("MINIMAX_CN_API_KEY",),                         None,                                  "MINIMAX_CN_BASE_URL", False),
        ("Vercel AI Gateway",       ("AI_GATEWAY_API_KEY",),                          "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True),
        ("Kilo Code",        ("KILOCODE_API_KEY",),                            "https://api.kilo.ai/api/gateway/models",  "KILOCODE_BASE_URL", True),
        ("OpenCode Zen",     ("OPENCODE_ZEN_API_KEY",),                        "https://opencode.ai/zen/v1/models",  "OPENCODE_ZEN_BASE_URL", True),
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -7256,12 +7256,12 @@ For more help on a command:
        "setup",
        help="Interactive setup wizard",
        description="Configure Hermes Agent with an interactive wizard. "
-        "Run a specific section: hermes setup model|tts|terminal|gateway|tools|agent",
+        "Run a specific section: hermes setup model|tts|terminal|gateway|tools|agent|security",
    )
    setup_parser.add_argument(
        "section",
        nargs="?",
-        choices=["model", "tts", "terminal", "gateway", "tools", "agent"],
+        choices=["model", "tts", "terminal", "gateway", "tools", "agent", "security"],
        default=None,
        help="Run a specific setup section instead of the full wizard",
    )
@ -7823,6 +7823,36 @@ Examples:
    # config check
    config_subparsers.add_parser("check", help="Check for missing/outdated config")

+    # config governance
+    config_governance = config_subparsers.add_parser("governance", help="Show tool governance settings")
+    config_governance.add_argument("--enable-all", action="store_true", help="Enable all tool governance policies")
+    config_governance.add_argument("--disable-all", action="store_true", help="Disable all tool governance policies")
+    config_governance.add_argument(
+        "--preset",
+        choices=["messaging-safe", "skill-safe", "balanced"],
+        help="Apply a governance preset",
+    )
+    config_governance.add_argument(
+        "--enable-skill-allowed-tools",
+        action="store_true",
+        help="Enable skill allowed-tools enforcement",
+    )
+    config_governance.add_argument(
+        "--disable-skill-allowed-tools",
+        action="store_true",
+        help="Disable skill allowed-tools enforcement",
+    )
+    config_governance.add_argument(
+        "--enable-channel-review",
+        action="store_true",
+        help="Enable channel tool review",
+    )
+    config_governance.add_argument(
+        "--disable-channel-review",
+        action="store_true",
+        help="Disable channel tool review",
+    )
+
    # config migrate
    config_subparsers.add_parser("migrate", help="Update config with new options")

--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@ -596,6 +596,8 @@ def _print_setup_summary(config: dict, hermes_home):
    )
    print(f"   {color('hermes config set <key> <value>', Colors.GREEN)}")
    print("                          Set a specific value")
+    print("                          e.g. security.tool_governance.skill_allowed_tools true")
+    print("                               security.tool_governance.channel_tool_review true")
    print()
    print("   Or edit the files directly:")
    print(f"   {color(f'nano {get_config_path()}', Colors.DIM)}")
@ -612,6 +614,49 @@ def _print_setup_summary(config: dict, hermes_home):
    print()


+def _prompt_tool_governance_defaults(config: dict) -> None:
+    """Offer optional low-risk governance defaults during setup."""
+    print()
+    print_header("Tool Governance (optional)")
+    print_info("Optional low-risk guardrails for messaging and skill-driven sessions.")
+    print_info("Defaults stay off unless you enable them here.")
+    print()
+
+    if not prompt_yes_no("Configure tool governance defaults now?", False):
+        return
+
+    choice = prompt_choice(
+        "Which governance preset would you like?",
+        [
+            "Messaging-safe — enable channel tool review",
+            "Skill-safe — enforce skill allowed-tools",
+            "Enable both low-risk policies",
+        ],
+        0,
+    )
+
+    tool_governance = config.setdefault("security", {}).setdefault("tool_governance", {})
+    tool_governance.setdefault("skill_allowed_tools", False)
+    tool_governance.setdefault("channel_tool_review", False)
+    if choice == 0:
+        tool_governance["channel_tool_review"] = True
+        tool_governance["skill_allowed_tools"] = False
+    elif choice == 1:
+        tool_governance["skill_allowed_tools"] = True
+        tool_governance["channel_tool_review"] = False
+    elif choice == 2:
+        tool_governance["channel_tool_review"] = True
+        tool_governance["skill_allowed_tools"] = True
+
+    print_success("Tool governance defaults updated")
+    print_info("You can change these later with 'hermes config governance'.")
+
+
+def setup_security(config: dict):
+    """Configure security and governance defaults."""
+    _prompt_tool_governance_defaults(config)
+
+
 def _prompt_container_resources(config: dict):
    """Prompt for container resource settings (Docker, Singularity, Modal, Daytona)."""
    terminal = config.setdefault("terminal", {})
@ -2861,6 +2906,7 @@ SETUP_SECTIONS = [
    ("gateway", "Messaging Platforms (Gateway)", setup_gateway),
    ("tools", "Tools", setup_tools),
    ("agent", "Agent Settings", setup_agent_settings),
+    ("security", "Security & Governance", setup_security),
 ]

 # The returning-user menu intentionally omits standalone TTS because model setup
@ -2872,6 +2918,7 @@ RETURNING_USER_MENU_SECTION_KEYS = [
    "gateway",
    "tools",
    "agent",
+    "security",
 ]


@ -2886,6 +2933,7 @@ def run_setup_wizard(args):
      hermes setup gateway   — just messaging platforms
      hermes setup tools     — just tool configuration
      hermes setup agent     — just agent settings
+      hermes setup security  — security/governance defaults
    """
    from hermes_cli.config import is_managed, managed_error
    if is_managed():
@ -3003,6 +3051,7 @@ def run_setup_wizard(args):
            "Messaging Platforms (Gateway)",
            "Tools",
            "Agent Settings",
+            "Security & Governance",
            "Exit",
        ]
        choice = prompt_choice("What would you like to do?", menu_choices, 0)
@ -3014,10 +3063,10 @@ def run_setup_wizard(args):
        elif choice == 1:
            # Full setup — fall through to run all sections
            pass
-        elif choice == 7:
+        elif choice == 8:
            print_info("Exiting. Run 'hermes setup' again when ready.")
            return
-        elif 2 <= choice <= 6:
+        elif 2 <= choice <= 7:
            # Individual section — map by key, not by position.
            # SETUP_SECTIONS includes TTS but the returning-user menu skips it,
            # so positional indexing (choice - 2) would dispatch the wrong section.
@ -3082,6 +3131,8 @@ def run_setup_wizard(args):
    if not (migration_ran and _skip_configured_section(config, "tools", "Tools")):
        setup_tools(config, first_install=not is_existing)

+    _prompt_tool_governance_defaults(config)
+
    # Save and show summary
    save_config(config)
    _print_setup_summary(config, hermes_home)
@ -3149,6 +3200,8 @@ def _run_first_time_quick_setup(config: dict, hermes_home, is_existing: bool):
        setup_gateway(config)
        save_config(config)

+    _prompt_tool_governance_defaults(config)
+
    print()
    print_success("Setup complete! You're ready to go.")
    print()
--- a/hermes_cli/tips.py
+++ b/hermes_cli/tips.py
@ -82,6 +82,8 @@ TIPS = [
    "hermes config set KEY VALUE auto-routes secrets to .env and everything else to config.yaml.",
    "hermes config edit opens config.yaml in your default editor.",
    "hermes config check scans for missing or stale configuration options.",
+    "hermes config governance shows tool-governance status and supports quick presets like --preset messaging-safe.",
+    "hermes setup security opens the dedicated setup section for tool-governance defaults.",
    "hermes sessions browse opens an interactive session picker with search.",
    "hermes sessions stats shows session counts by platform and database size.",
    "hermes sessions prune --older-than 30 cleans up old sessions.",
@ -127,7 +129,7 @@ TIPS = [

    # --- Tools & Capabilities ---
    "execute_code runs Python scripts that call Hermes tools programmatically — results stay out of context.",
-    "delegate_task spawns up to 3 concurrent sub-agents by default (delegation.max_concurrent_children) with isolated contexts for parallel work.",
+    "delegate_task spawns up to 3 sub-agents by default (see delegation.max_concurrent_children) with isolated contexts for parallel work.",
    "web_extract works on PDF URLs — pass any PDF link and it converts to markdown.",
    "search_files is ripgrep-backed and faster than grep — use it instead of terminal grep.",
    "patch uses 9 fuzzy matching strategies so minor whitespace differences won't break edits.",
--- a/run_agent.py
+++ b/run_agent.py
@ -2292,6 +2292,36 @@ class AIAgent:
            and getattr(self, "platform", "") == "cli"
        )

+    def _should_suppress_gateway_lifecycle_status(self, message: str) -> bool:
+        """Return True when a lifecycle status is too noisy for chat gateways.
+
+        CLI users still see every lifecycle event through ``_vprint``. Messaging
+        platforms, however, should not be flooded with internal retry/fallback
+        chatter when a final assistant response will summarize the failure.
+        """
+        raw_platform = getattr(self, "platform", "") or ""
+        platform = getattr(raw_platform, "value", raw_platform)
+        platform = str(platform).strip().lower()
+        if not platform or platform == "cli":
+            return False
+        text = (message or "").strip()
+        if not text:
+            return False
+
+        noisy_prefixes = (
+            "⚠️ Rate limited — switching to fallback provider...",
+            "💸 Provider credits/balance exhausted — switching to fallback provider...",
+            "⚠️ Empty/malformed response — switching to fallback...",
+            "🔄 Primary model failed — switching to fallback:",
+            "⏱️ Rate limit reached. Waiting",
+            "❌ Rate limited after ",
+        )
+        if any(text.startswith(prefix) for prefix in noisy_prefixes):
+            return True
+        if text.startswith("⚠️ Max retries (") and "trying fallback" in text.lower():
+            return True
+        return False
+
    def _emit_status(self, message: str) -> None:
        """Emit a lifecycle status message to both CLI and gateway channels.

@ -2307,6 +2337,8 @@ class AIAgent:
        except Exception:
            pass
        if self.status_callback:
+            if self._should_suppress_gateway_lifecycle_status(message):
+                return
            try:
                self.status_callback("lifecycle", message)
            except Exception:
@ -2544,19 +2576,20 @@ class AIAgent:

        return 300.0, True

-    def _compute_non_stream_stale_timeout(self, messages: list[dict[str, Any]]) -> float:
+    def _compute_non_stream_stale_timeout(self, api_kwargs: dict[str, Any]) -> float:
        """Compute the effective non-stream stale timeout for this request."""
        stale_base, uses_implicit_default = self._resolved_api_call_stale_timeout_base()
        base_url = getattr(self, "_base_url", None) or self.base_url or ""
        if uses_implicit_default and base_url and is_local_endpoint(base_url):
            return float("inf")

-        est_tokens = sum(len(str(v)) for v in messages) // 4
-        if est_tokens > 100_000:
-            return max(stale_base, 600.0)
-        if est_tokens > 50_000:
-            return max(stale_base, 450.0)
-        return stale_base
+        est_tokens = self._estimate_request_context_tokens(api_kwargs)
+        return self._scale_stale_timeout_for_context(
+            stale_base,
+            est_tokens,
+            medium_timeout=450.0,
+            large_timeout=600.0,
+        )

    def _is_openrouter_url(self) -> bool:
        """Return True when the base URL targets OpenRouter."""
@ -5634,6 +5667,36 @@ class AIAgent:
                timeout=get_provider_request_timeout(self.provider, self.model),
            )

+    @staticmethod
+    def _rough_payload_chars(value: Any) -> int:
+        if value is None:
+            return 0
+        try:
+            return len(json.dumps(value, ensure_ascii=False, separators=(",", ":")))
+        except Exception:
+            return len(str(value))
+
+    def _estimate_request_context_tokens(self, api_kwargs: dict) -> int:
+        """Roughly estimate request size across chat-completions and Responses payloads."""
+        total_chars = 0
+        for key in ("system", "messages", "input", "instructions", "tools"):
+            total_chars += self._rough_payload_chars(api_kwargs.get(key))
+        return (total_chars + 3) // 4 if total_chars > 0 else 0
+
+    @staticmethod
+    def _scale_stale_timeout_for_context(
+        base_timeout: float,
+        est_tokens: int,
+        *,
+        medium_timeout: float,
+        large_timeout: float,
+    ) -> float:
+        if est_tokens > 100_000:
+            return max(base_timeout, large_timeout)
+        if est_tokens > 50_000:
+            return max(base_timeout, medium_timeout)
+        return base_timeout
+
    def _interruptible_api_call(self, api_kwargs: dict):
        """
        Run the API call in a background thread so the main conversation loop
@ -5701,9 +5764,7 @@ class AIAgent:
        # httpx timeout (default 1800s) with zero feedback.  The stale
        # detector kills the connection early so the main retry loop can
        # apply richer recovery (credential rotation, provider fallback).
-        _stale_timeout = self._compute_non_stream_stale_timeout(
-            api_kwargs.get("messages", [])
-        )
+        _stale_timeout = self._compute_non_stream_stale_timeout(api_kwargs)

        _call_start = time.time()
        self._touch_activity("waiting for non-streaming API response")
@ -5727,7 +5788,7 @@ class AIAgent:
            # arrives within the configured timeout.
            _elapsed = time.time() - _call_start
            if _elapsed > _stale_timeout:
-                _est_ctx = sum(len(str(v)) for v in api_kwargs.get("messages", [])) // 4
+                _est_ctx = self._estimate_request_context_tokens(api_kwargs)
                logger.warning(
                    "Non-streaming API call stale for %.0fs (threshold %.0fs). "
                    "model=%s context=~%s tokens. Killing connection.",
@ -6561,13 +6622,13 @@ class AIAgent:
            # when the context is large.  Without this, the stale detector kills
            # healthy connections during the model's thinking phase, producing
            # spurious RemoteProtocolError ("peer closed connection").
-            _est_tokens = sum(len(str(v)) for v in api_kwargs.get("messages", [])) // 4
-            if _est_tokens > 100_000:
-                _stream_stale_timeout = max(_stream_stale_timeout_base, 300.0)
-            elif _est_tokens > 50_000:
-                _stream_stale_timeout = max(_stream_stale_timeout_base, 240.0)
-            else:
-                _stream_stale_timeout = _stream_stale_timeout_base
+            _est_tokens = self._estimate_request_context_tokens(api_kwargs)
+            _stream_stale_timeout = self._scale_stale_timeout_for_context(
+                _stream_stale_timeout_base,
+                _est_tokens,
+                medium_timeout=240.0,
+                large_timeout=300.0,
+            )

        t = threading.Thread(target=_call, daemon=True)
        t.start()
@ -6597,7 +6658,7 @@ class AIAgent:
            # inner retry loop can start a fresh connection.
            _stale_elapsed = time.time() - last_chunk_time["t"]
            if _stale_elapsed > _stream_stale_timeout:
-                _est_ctx = sum(len(str(v)) for v in api_kwargs.get("messages", [])) // 4
+                _est_ctx = self._estimate_request_context_tokens(api_kwargs)
                logger.warning(
                    "Stream stale for %.0fs (threshold %.0fs) — no chunks received. "
                    "model=%s context=~%s tokens. Killing connection.",
@ -11081,7 +11142,10 @@ class AIAgent:
                            self._credential_pool
                        )
                        if not pool_may_recover:
-                            self._emit_status("⚠️ Rate limited — switching to fallback provider...")
+                            if classified.reason == FailoverReason.billing:
+                                self._emit_status("💸 Provider credits/balance exhausted — switching to fallback provider...")
+                            else:
+                                self._emit_status("⚠️ Rate limited — switching to fallback provider...")
                            if self._try_activate_fallback(reason=classified.reason):
                                retry_count = 0
                                compression_attempts = 0
@ -11363,7 +11427,6 @@ class AIAgent:
                            and not classified.should_compress
                            and classified.reason not in (
                                FailoverReason.rate_limit,
-                                FailoverReason.billing,
                                FailoverReason.overloaded,
                                FailoverReason.context_overflow,
                                FailoverReason.payload_too_large,
@ -11394,7 +11457,7 @@ class AIAgent:
                        self._vprint(f"{self.log_prefix}   🔌 Provider: {_provider}  Model: {_model}", force=True)
                        self._vprint(f"{self.log_prefix}   🌐 Endpoint: {_base}", force=True)
                        # Actionable guidance for common auth errors
-                        if classified.is_auth or classified.reason == FailoverReason.billing:
+                        if classified.is_auth:
                            if _provider == "openai-codex" and status_code == 401:
                                self._vprint(f"{self.log_prefix}   💡 Codex OAuth token was rejected (HTTP 401). Your token may have been", force=True)
                                self._vprint(f"{self.log_prefix}      refreshed by another client (Codex CLI, VS Code). To fix:", force=True)
@ -11406,6 +11469,12 @@ class AIAgent:
                                self._vprint(f"{self.log_prefix}      • Does your account have access to {_model}?", force=True)
                                if base_url_host_matches(str(_base), "openrouter.ai"):
                                    self._vprint(f"{self.log_prefix}      • Check credits: https://openrouter.ai/settings/credits", force=True)
+                        elif classified.reason == FailoverReason.billing:
+                            self._vprint(f"{self.log_prefix}   💡 Provider balance/credits appear exhausted for this request.", force=True)
+                            if "openrouter" in str(_base).lower():
+                                self._vprint(f"{self.log_prefix}      • Top up credits: https://openrouter.ai/settings/credits", force=True)
+                            elif _provider == "minimax":
+                                self._vprint(f"{self.log_prefix}      • Check MiniMax account balance / billing before retrying.", force=True)
                        else:
                            self._vprint(f"{self.log_prefix}   💡 This type of error won't be fixed by retrying.", force=True)
                        logging.error(f"{self.log_prefix}Non-retryable client error: {api_error}")
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@ -664,6 +664,23 @@ class TestIsPaymentError:
        exc.status_code = 429
        assert _is_payment_error(exc) is True

+    def test_429_with_insufficient_balance_message(self):
+        exc = Exception("HTTP 429: insufficient balance (1008)")
+        exc.status_code = 429
+        assert _is_payment_error(exc) is True
+
+    def test_429_with_billing_message_in_structured_body(self):
+        exc = Exception("provider error")
+        exc.status_code = 429
+        exc.body = {"error": {"message": "insufficient balance (1008)"}}
+        assert _is_payment_error(exc) is True
+
+    def test_429_with_billing_error_code_in_structured_body(self):
+        exc = Exception("provider error")
+        exc.status_code = 429
+        exc.body = {"error": {"code": "payment_required", "message": "provider error"}}
+        assert _is_payment_error(exc) is True
+
    def test_429_without_credits_message_is_not_payment(self):
        """Normal rate limits should NOT be treated as payment errors."""
        exc = Exception("Rate limit exceeded, try again in 2 seconds")
--- a/tests/agent/test_error_classifier.py
+++ b/tests/agent/test_error_classifier.py
@ -250,6 +250,24 @@ class TestClassifyApiError:
        assert result.reason == FailoverReason.rate_limit
        assert result.should_fallback is True

+    def test_429_insufficient_balance_classified_as_billing(self):
+        e = MockAPIError("HTTP 429: insufficient balance (1008)", status_code=429)
+        result = classify_api_error(e, provider="minimax")
+        assert result.reason == FailoverReason.billing
+        assert result.retryable is False
+        assert result.should_rotate_credential is True
+        assert result.should_fallback is True
+
+    def test_429_payment_required_error_code_classified_as_billing(self):
+        e = MockAPIError(
+            "provider error",
+            status_code=429,
+            body={"error": {"code": "payment_required", "message": "provider error"}},
+        )
+        result = classify_api_error(e, provider="openrouter")
+        assert result.reason == FailoverReason.billing
+        assert result.retryable is False
+
    def test_alibaba_rate_increased_too_quickly(self):
        """Alibaba/DashScope returns a unique throttling message.

--- a/tests/hermes_cli/test_doctor.py
+++ b/tests/hermes_cli/test_doctor.py
@ -399,22 +399,23 @@ def test_run_doctor_kimi_cn_env_is_detected_and_probe_is_null_safe(monkeypatch,


@pytest.mark.parametrize("base_url", [None, "https://opencode.ai/zen/go/v1"])
-def test_run_doctor_opencode_go_skips_invalid_models_probe(monkeypatch, tmp_path, base_url):
+def _run_doctor_with_single_api_key_provider(monkeypatch, tmp_path, env_var, env_value, provider_name, base_env=None, base_url=None):
    home = tmp_path / ".hermes"
    home.mkdir(parents=True, exist_ok=True)
    (home / "config.yaml").write_text("memory: {}\n", encoding="utf-8")
-    (home / ".env").write_text("OPENCODE_GO_API_KEY=***\n", encoding="utf-8")
+    (home / ".env").write_text(f"{env_var}=***\n", encoding="utf-8")
    project = tmp_path / "project"
    project.mkdir(exist_ok=True)

    monkeypatch.setattr(doctor_mod, "HERMES_HOME", home)
    monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", project)
    monkeypatch.setattr(doctor_mod, "_DHH", str(home))
-    monkeypatch.setenv("OPENCODE_GO_API_KEY", "sk-test")
-    if base_url:
-        monkeypatch.setenv("OPENCODE_GO_BASE_URL", base_url)
-    else:
-        monkeypatch.delenv("OPENCODE_GO_BASE_URL", raising=False)
+    monkeypatch.setenv(env_var, env_value)
+    if base_env:
+        if base_url:
+            monkeypatch.setenv(base_env, base_url)
+        else:
+            monkeypatch.delenv(base_env, raising=False)

    fake_model_tools = types.SimpleNamespace(
        check_tool_availability=lambda *a, **kw: ([], []),
@ -438,15 +439,89 @@ def test_run_doctor_opencode_go_skips_invalid_models_probe(monkeypatch, tmp_path
    import httpx
    monkeypatch.setattr(httpx, "get", fake_get)

-    import io, contextlib
    buf = io.StringIO()
    with contextlib.redirect_stdout(buf):
        doctor_mod.run_doctor(Namespace(fix=False))
    out = buf.getvalue()

    assert any(
-        "OpenCode Go" in line and "(key configured)" in line
+        provider_name in line and "(key configured)" in line
        for line in out.splitlines()
    )
+    return out, calls
+
+
+@pytest.mark.parametrize("base_url", [None, "https://opencode.ai/zen/go/v1"])
+def test_run_doctor_opencode_go_skips_invalid_models_probe(monkeypatch, tmp_path, base_url):
+    _, calls = _run_doctor_with_single_api_key_provider(
+        monkeypatch,
+        tmp_path,
+        env_var="OPENCODE_GO_API_KEY",
+        env_value="sk-test",
+        provider_name="OpenCode Go",
+        base_env="OPENCODE_GO_BASE_URL",
+        base_url=base_url,
+    )
+
    assert not any(url == "https://opencode.ai/zen/go/v1/models" for url, _, _ in calls)
    assert not any("opencode" in url.lower() and "models" in url.lower() for url, _, _ in calls)
+
+
+@pytest.mark.parametrize("base_url", [None, "https://api.minimax.io/anthropic"])
+def test_run_doctor_minimax_skips_invalid_models_probe(monkeypatch, tmp_path, base_url):
+    _, calls = _run_doctor_with_single_api_key_provider(
+        monkeypatch,
+        tmp_path,
+        env_var="MINIMAX_API_KEY",
+        env_value="sk-test",
+        provider_name="MiniMax",
+        base_env="MINIMAX_BASE_URL",
+        base_url=base_url,
+    )
+
+    assert not any("minimax" in url.lower() and "models" in url.lower() for url, _, _ in calls)
+
+
+def test_run_doctor_reports_tool_governance_settings(monkeypatch, tmp_path):
+    home = tmp_path / ".hermes"
+    home.mkdir(parents=True, exist_ok=True)
+    (home / "config.yaml").write_text(
+        """
+memory: {}
+security:
+  tool_governance:
+    skill_allowed_tools: true
+    channel_tool_review: false
+""".strip()
+        + "\n",
+        encoding="utf-8",
+    )
+    project = tmp_path / "project"
+    project.mkdir(exist_ok=True)
+
+    monkeypatch.setattr(doctor_mod, "HERMES_HOME", home)
+    monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", project)
+    monkeypatch.setattr(doctor_mod, "_DHH", str(home))
+
+    fake_model_tools = types.SimpleNamespace(
+        check_tool_availability=lambda *a, **kw: ([], []),
+        TOOLSET_REQUIREMENTS={},
+    )
+    monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
+
+    try:
+        from hermes_cli import auth as _auth_mod
+        monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {})
+        monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {})
+    except Exception:
+        pass
+
+    import io, contextlib
+    buf = io.StringIO()
+    with contextlib.redirect_stdout(buf):
+        doctor_mod.run_doctor(Namespace(fix=False))
+    out = buf.getvalue()
+
+    assert "Tool Governance" in out
+    assert "Skill allowed-tools enforcement: enabled" in out
+    assert "Channel tool review: disabled" in out
--- a/tests/hermes_cli/test_placeholder_usage.py
+++ b/tests/hermes_cli/test_placeholder_usage.py
@ -2,6 +2,7 @@

 import os
 from argparse import Namespace
+from pathlib import Path
 from unittest.mock import patch

 import pytest
@ -40,9 +41,154 @@ def test_show_config_marks_placeholders(tmp_path, capsys):
    assert "hermes config set <key> <value>" in out


+def test_show_config_surfaces_tool_governance_settings(tmp_path, capsys):
+    with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
+        show_config()
+
+    out = capsys.readouterr().out
+    assert "Tool Governance" in out
+    assert "security.tool_governance.skill_allowed_tools" in out
+    assert "security.tool_governance.channel_tool_review" in out
+
+
 def test_setup_summary_marks_placeholders(tmp_path, capsys):
    with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
        _print_setup_summary({"tts": {"provider": "edge"}}, tmp_path)

    out = capsys.readouterr().out
    assert "hermes config set <key> <value>" in out
+
+
+def test_setup_summary_mentions_tool_governance_examples(tmp_path, capsys):
+    with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
+        _print_setup_summary({"tts": {"provider": "edge"}}, tmp_path)
+
+    out = capsys.readouterr().out
+    assert "security.tool_governance.skill_allowed_tools" in out
+    assert "security.tool_governance.channel_tool_review" in out
+
+
+def test_config_check_surfaces_tool_governance_settings(tmp_path, capsys):
+    config_path = Path(tmp_path) / "config.yaml"
+    config_path.write_text(
+        """
+security:
+  tool_governance:
+    skill_allowed_tools: true
+    channel_tool_review: false
+""".strip()
+        + "\n",
+        encoding="utf-8",
+    )
+
+    with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
+        config_command(Namespace(config_command="check"))
+
+    out = capsys.readouterr().out
+    assert "Tool Governance" in out
+    assert "skill_allowed_tools: on" in out
+    assert "channel_tool_review: off" in out
+
+
+def test_config_governance_command_shows_focus_view(tmp_path, capsys):
+    config_path = Path(tmp_path) / "config.yaml"
+    config_path.write_text(
+        """
+security:
+  tool_governance:
+    skill_allowed_tools: false
+    channel_tool_review: true
+""".strip()
+        + "\n",
+        encoding="utf-8",
+    )
+
+    with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
+        config_command(Namespace(config_command="governance"))
+
+    out = capsys.readouterr().out
+    assert "Tool Governance" in out
+    assert "skill_allowed_tools: off" in out
+    assert "channel_tool_review: on" in out
+    assert "hermes config set security.tool_governance.skill_allowed_tools true" in out
+    assert "hermes config set security.tool_governance.channel_tool_review true" in out
+
+
+def test_config_governance_enable_all_updates_config(tmp_path, capsys):
+    with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
+        config_command(
+            Namespace(
+                config_command="governance",
+                enable_all=True,
+                disable_all=False,
+                enable_skill_allowed_tools=False,
+                disable_skill_allowed_tools=False,
+                enable_channel_review=False,
+                disable_channel_review=False,
+            )
+        )
+
+    out = capsys.readouterr().out
+    saved = (Path(tmp_path) / "config.yaml").read_text(encoding="utf-8")
+    assert "skill_allowed_tools: true" in saved
+    assert "channel_tool_review: true" in saved
+    assert "skill_allowed_tools: on" in out
+    assert "channel_tool_review: on" in out
+
+
+def test_config_governance_can_toggle_individual_policy(tmp_path, capsys):
+    config_path = Path(tmp_path) / "config.yaml"
+    config_path.write_text(
+        """
+security:
+  tool_governance:
+    skill_allowed_tools: true
+    channel_tool_review: true
+""".strip()
+        + "\n",
+        encoding="utf-8",
+    )
+
+    with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
+        config_command(
+            Namespace(
+                config_command="governance",
+                enable_all=False,
+                disable_all=False,
+                enable_skill_allowed_tools=False,
+                disable_skill_allowed_tools=True,
+                enable_channel_review=False,
+                disable_channel_review=False,
+                preset=None,
+            )
+        )
+
+    out = capsys.readouterr().out
+    saved = config_path.read_text(encoding="utf-8")
+    assert "skill_allowed_tools: false" in saved
+    assert "channel_tool_review: true" in saved
+    assert "skill_allowed_tools: off" in out
+    assert "channel_tool_review: on" in out
+
+
+def test_config_governance_preset_messaging_safe_updates_config(tmp_path, capsys):
+    with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
+        config_command(
+            Namespace(
+                config_command="governance",
+                enable_all=False,
+                disable_all=False,
+                enable_skill_allowed_tools=False,
+                disable_skill_allowed_tools=False,
+                enable_channel_review=False,
+                disable_channel_review=False,
+                preset="messaging-safe",
+            )
+        )
+
+    out = capsys.readouterr().out
+    saved = (Path(tmp_path) / "config.yaml").read_text(encoding="utf-8")
+    assert "skill_allowed_tools: false" in saved
+    assert "channel_tool_review: true" in saved
+    assert "skill_allowed_tools: off" in out
+    assert "channel_tool_review: on" in out
--- a/tests/hermes_cli/test_setup.py
+++ b/tests/hermes_cli/test_setup.py
@ -2,6 +2,7 @@
 import json
 import sys
 import types
+from argparse import Namespace

 import pytest

@ -223,6 +224,86 @@ def test_setup_gateway_in_container_shows_docker_guidance(monkeypatch, capsys):
    assert "restart" in out.lower()


+def test_prompt_tool_governance_defaults_can_enable_both(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    config = load_config()
+
+    yes_no_answers = iter([True])
+    choice_answers = iter([2])
+    monkeypatch.setattr(setup_mod, "prompt_yes_no", lambda *args, **kwargs: next(yes_no_answers))
+    monkeypatch.setattr(setup_mod, "prompt_choice", lambda *args, **kwargs: next(choice_answers))
+
+    setup_mod._prompt_tool_governance_defaults(config)
+
+    assert config["security"]["tool_governance"]["skill_allowed_tools"] is True
+    assert config["security"]["tool_governance"]["channel_tool_review"] is True
+
+
+def test_run_first_time_quick_setup_offers_tool_governance(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    config = load_config()
+    calls = []
+
+    monkeypatch.setattr(setup_mod, "setup_model_provider", lambda cfg, quick=True: calls.append(("model", quick)))
+    monkeypatch.setattr(setup_mod, "setup_gateway", lambda cfg: calls.append(("gateway", None)))
+    monkeypatch.setattr(setup_mod, "_prompt_tool_governance_defaults", lambda cfg: calls.append(("governance", None)))
+    monkeypatch.setattr(setup_mod, "_print_setup_summary", lambda cfg, home: calls.append(("summary", None)))
+    monkeypatch.setattr(setup_mod, "_offer_launch_chat", lambda: calls.append(("chat", None)))
+    monkeypatch.setattr(setup_mod, "prompt_choice", lambda *args, **kwargs: 1)
+
+    setup_mod._run_first_time_quick_setup(config, tmp_path, is_existing=False)
+
+    assert ("governance", None) in calls
+
+
+def test_run_setup_wizard_full_setup_offers_tool_governance(tmp_path, monkeypatch):
+    args = Namespace(non_interactive=False, section=None, reset=False)
+    config = load_config()
+    calls = []
+
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    monkeypatch.setattr(setup_mod, "ensure_hermes_home", lambda: None)
+    monkeypatch.setattr(setup_mod, "load_config", lambda: config)
+    monkeypatch.setattr(setup_mod, "get_hermes_home", lambda: tmp_path)
+    monkeypatch.setattr(setup_mod, "is_interactive_stdin", lambda: True)
+    monkeypatch.setattr(setup_mod, "get_env_value", lambda key: "")
+    monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None)
+    monkeypatch.setattr(setup_mod, "_offer_openclaw_migration", lambda home: False)
+    choices = iter([1])
+    monkeypatch.setattr(setup_mod, "prompt_choice", lambda *args, **kwargs: next(choices))
+    monkeypatch.setattr(setup_mod, "setup_model_provider", lambda cfg: calls.append("model"))
+    monkeypatch.setattr(setup_mod, "setup_terminal_backend", lambda cfg: calls.append("terminal"))
+    monkeypatch.setattr(setup_mod, "setup_agent_settings", lambda cfg: calls.append("agent"))
+    monkeypatch.setattr(setup_mod, "setup_gateway", lambda cfg: calls.append("gateway"))
+    monkeypatch.setattr(setup_mod, "setup_tools", lambda cfg, first_install=True: calls.append("tools"))
+    monkeypatch.setattr(setup_mod, "_prompt_tool_governance_defaults", lambda cfg: calls.append("governance"))
+    monkeypatch.setattr(setup_mod, "_print_setup_summary", lambda cfg, home: calls.append("summary"))
+    monkeypatch.setattr(setup_mod, "_offer_launch_chat", lambda: calls.append("chat"))
+    monkeypatch.setattr(setup_mod, "save_config", lambda cfg: calls.append("save"))
+
+    setup_mod.run_setup_wizard(args)
+
+    assert "governance" in calls
+
+
+def test_run_setup_wizard_security_section_dispatches_governance(tmp_path, monkeypatch):
+    args = Namespace(non_interactive=False, section="security", reset=False)
+    config = load_config()
+    calls = []
+
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    monkeypatch.setattr(setup_mod, "ensure_hermes_home", lambda: None)
+    monkeypatch.setattr(setup_mod, "load_config", lambda: config)
+    monkeypatch.setattr(setup_mod, "get_hermes_home", lambda: tmp_path)
+    monkeypatch.setattr(setup_mod, "is_interactive_stdin", lambda: True)
+    monkeypatch.setattr(setup_mod, "_prompt_tool_governance_defaults", lambda cfg: calls.append("governance"))
+    monkeypatch.setattr(setup_mod, "save_config", lambda cfg: calls.append("save"))
+
+    setup_mod.run_setup_wizard(args)
+
+    assert calls == ["governance", "save"]
+
+
 def test_setup_syncs_custom_provider_removal_from_disk(tmp_path, monkeypatch):
    """Removing the last custom provider in model setup should persist."""
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
--- a/tests/hermes_cli/test_setup_noninteractive.py
+++ b/tests/hermes_cli/test_setup_noninteractive.py
@ -226,6 +226,7 @@ class TestNonInteractiveSetup:
            "Messaging Platforms (Gateway)",
            "Tools",
            "Agent Settings",
+            "Security & Governance",
            "Exit",
        ]

@ -244,3 +245,19 @@ class TestNonInteractiveSetup:
        main_mod.main()

        assert received["section"] == "tts"
+
+    def test_main_accepts_security_setup_section(self, monkeypatch):
+        """`hermes setup security` should parse and dispatch like other setup sections."""
+        from hermes_cli import main as main_mod
+
+        received = {}
+
+        def fake_cmd_setup(args):
+            received["section"] = args.section
+
+        monkeypatch.setattr(main_mod, "cmd_setup", fake_cmd_setup)
+        monkeypatch.setattr("sys.argv", ["hermes", "setup", "security"])
+
+        main_mod.main()
+
+        assert received["section"] == "security"
--- a/tests/hermes_cli/test_timeouts.py
+++ b/tests/hermes_cli/test_timeouts.py
@ -285,7 +285,7 @@ def test_default_non_stream_stale_timeout_auto_disables_for_local_endpoints(monk
        platform="cli",
    )

-    assert agent._compute_non_stream_stale_timeout([]) == float("inf")
+    assert agent._compute_non_stream_stale_timeout({"messages": []}) == float("inf")


 def test_explicit_non_stream_stale_timeout_is_honored_for_local_endpoints(monkeypatch, tmp_path):
@ -305,4 +305,4 @@ def test_explicit_non_stream_stale_timeout_is_honored_for_local_endpoints(monkey
        platform="cli",
    )

-    assert agent._compute_non_stream_stale_timeout([]) == 300.0
+    assert agent._compute_non_stream_stale_timeout({"messages": []}) == 300.0
--- a/tests/hermes_cli/test_tips.py
+++ b/tests/hermes_cli/test_tips.py
@ -70,3 +70,11 @@ class TestTipIntegrationInCLI:
        # Should not contain nested/broken Rich tags
        assert markup.count("[/]") == 1
        assert "[dim #B8860B]" in markup
+
+
+class TestToolGovernanceTips:
+    def test_has_config_governance_tip(self):
+        assert any("hermes config governance" in tip for tip in TIPS)
+
+    def test_has_setup_security_tip(self):
+        assert any("hermes setup security" in tip for tip in TIPS)
--- a/tests/run_agent/test_provider_parity.py
+++ b/tests/run_agent/test_provider_parity.py
@ -438,6 +438,80 @@ class TestBuildApiKwargsCodex:
        assert "function" not in tools[0]


+class TestEstimateRequestContextTokens:
+    def test_chat_completions_counts_messages_and_tools(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        api_kwargs = {
+            "model": "anthropic/claude-sonnet-4-20250514",
+            "messages": [
+                {"role": "system", "content": "a" * 4000},
+                {"role": "user", "content": "b" * 4000},
+            ],
+            "tools": _tool_defs("web_search", "terminal"),
+        }
+
+        est = agent._estimate_request_context_tokens(api_kwargs)
+
+        assert est > 2000
+
+    def test_codex_responses_counts_input_and_instructions(self, monkeypatch):
+        agent = _make_agent(
+            monkeypatch,
+            "openai-codex",
+            api_mode="codex_responses",
+            base_url="https://chatgpt.com/backend-api/codex",
+        )
+        api_kwargs = {
+            "model": "gpt-5.4",
+            "instructions": "system:" + ("y" * 10000),
+            "input": [{"role": "user", "content": "x" * 410000}],
+            "tools": [
+                {
+                    "type": "function",
+                    "name": "web_search",
+                    "description": "search",
+                    "parameters": {"type": "object", "properties": {}},
+                }
+            ],
+        }
+
+        assert "messages" not in api_kwargs
+        est = agent._estimate_request_context_tokens(api_kwargs)
+
+        assert est > 100000
+        assert agent._scale_stale_timeout_for_context(
+            300.0,
+            est,
+            medium_timeout=450.0,
+            large_timeout=600.0,
+        ) == 600.0
+        assert agent._scale_stale_timeout_for_context(
+            180.0,
+            est,
+            medium_timeout=240.0,
+            large_timeout=300.0,
+        ) == 300.0
+
+    def test_anthropic_counts_top_level_system(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "anthropic", api_mode="anthropic_messages")
+        api_kwargs = {
+            "model": "claude-sonnet-4.6",
+            "system": "policy:" + ("s" * 240000),
+            "messages": [{"role": "user", "content": "hi"}],
+            "tools": _tool_defs("web_search"),
+        }
+
+        est = agent._estimate_request_context_tokens(api_kwargs)
+
+        assert est > 60000
+        assert agent._scale_stale_timeout_for_context(
+            300.0,
+            est,
+            medium_timeout=450.0,
+            large_timeout=600.0,
+        ) == 450.0
+
+
 # ── Message conversion tests ────────────────────────────────────────────────

 class TestChatMessagesToResponsesInput:
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@ -22,6 +22,7 @@ import run_agent
 from run_agent import AIAgent
 from agent.error_classifier import FailoverReason
 from agent.prompt_builder import DEFAULT_AGENT_IDENTITY
+from gateway.config import Platform


 # ---------------------------------------------------------------------------
@ -1629,6 +1630,67 @@ class TestExecuteToolCalls:
        assert "API call failed" not in output
        assert "Rate limit reached" not in output

+    def test_emit_status_suppresses_noisy_gateway_fallback_messages(self, agent):
+        agent.platform = "telegram"
+        agent.status_callback = MagicMock()
+
+        with patch.object(agent, "_vprint") as mock_vprint:
+            agent._emit_status("⚠️ Rate limited — switching to fallback provider...")
+
+        mock_vprint.assert_called_once()
+        agent.status_callback.assert_not_called()
+
+    def test_emit_status_forwards_non_noisy_gateway_messages(self, agent):
+        agent.platform = "telegram"
+        agent.status_callback = MagicMock()
+
+        with patch.object(agent, "_vprint") as mock_vprint:
+            agent._emit_status("🗜️ Context reduced to 120,000 tokens (was 240,000), retrying...")
+
+        mock_vprint.assert_called_once()
+        agent.status_callback.assert_called_once_with("lifecycle", "🗜️ Context reduced to 120,000 tokens (was 240,000), retrying...")
+
+    def test_emit_status_handles_platform_enum_for_gateway_suppression(self, agent):
+        agent.platform = Platform.TELEGRAM
+        agent.status_callback = MagicMock()
+
+        with patch.object(agent, "_vprint") as mock_vprint:
+            agent._emit_status("⚠️ Rate limited — switching to fallback provider...")
+
+        mock_vprint.assert_called_once()
+        agent.status_callback.assert_not_called()
+
+    def test_billing_429_does_not_emit_rate_limit_backoff_status(self, agent):
+        class _Billing429Error(Exception):
+            status_code = 429
+
+            def __str__(self):
+                return "HTTP 429: insufficient balance (1008)"
+
+        agent._cached_system_prompt = "You are helpful."
+        agent._use_prompt_caching = False
+        agent.tool_delay = 0
+        agent.compression_enabled = False
+        agent.save_trajectories = False
+        agent.base_url = "https://api.minimax.io/v1/"
+        status_messages = []
+
+        with (
+            patch.object(agent, "_interruptible_api_call", side_effect=_Billing429Error()),
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+            patch.object(agent, "_emit_status", side_effect=status_messages.append),
+            patch("run_agent.time.sleep", return_value=None),
+        ):
+            result = agent.run_conversation("hello")
+
+        assert result["completed"] is False
+        assert result["final_response"] is None
+        assert "insufficient balance (1008)" in result["error"]
+        assert result["api_calls"] == 1
+        assert not any("Rate limit reached. Waiting" in msg for msg in status_messages)
+

 class TestConcurrentToolExecution:
    """Tests for _execute_tool_calls_concurrent and dispatch logic."""