From 7e3845ac508eec504f0e0f56185751d3d131698a Mon Sep 17 00:00:00 2001
From: kshitij <82637225+kshitijk4poor@users.noreply.github.com>
Date: Thu, 16 Apr 2026 09:22:04 -0700
Subject: [PATCH 01/14] chore: add bare noreply email for kshitijk4poor to
 AUTHOR_MAP (#11120)

The numbered form (82637225+kshitijk4poor@) was already mapped but
the bare form (kshitijk4poor@users.noreply.github.com) used by
cherry-pick commits was missing, causing check-attribution CI to fail.

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
---
 scripts/release.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/release.py b/scripts/release.py
index 30b72a9c7..66fd04987 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -46,6 +46,7 @@ AUTHOR_MAP = {
     # contributors (from noreply pattern)
     "35742124+0xbyt4@users.noreply.github.com": "0xbyt4",
     "82637225+kshitijk4poor@users.noreply.github.com": "kshitijk4poor",
+    "kshitijk4poor@users.noreply.github.com": "kshitijk4poor",
     "16443023+stablegenius49@users.noreply.github.com": "stablegenius49",
     "185121704+stablegenius49@users.noreply.github.com": "stablegenius49",
     "101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit",

From 4377d7da0d1475f390096ed6efb0650eb6f734a3 Mon Sep 17 00:00:00 2001
From: ogzerber <ogzerber@users.noreply.github.com>
Date: Thu, 16 Apr 2026 18:41:12 +0530
Subject: [PATCH 02/14] fix(honcho): improve conclude descriptions and add
 exactly-one validation

Improve honcho_conclude tool descriptions to explicitly tell the model
not to send both params together. Add runtime validation that rejects
calls with both or neither of conclusion/delete_id. Add schema
regression test and both-params rejection test.

Consolidates #10847 by @ygd58, #10864 by @cola-runner,
#10870 by @vominh1919, and #10952 by @ogzerber.
The anyOf removal itself was already merged; this adds the
runtime validation and tests those PRs contributed.

Co-authored-by: ygd58 <ygd58@users.noreply.github.com>
Co-authored-by: cola-runner <cola-runner@users.noreply.github.com>
Co-authored-by: vominh1919 <vominh1919@users.noreply.github.com>
---
 plugins/memory/honcho/__init__.py   | 16 ++++++++++------
 tests/honcho_plugin/test_session.py | 29 ++++++++++++++++++++++++++++-
 2 files changed, 38 insertions(+), 7 deletions(-)

diff --git a/plugins/memory/honcho/__init__.py b/plugins/memory/honcho/__init__.py
index 6f7e52f0b..fcd64b881 100644
--- a/plugins/memory/honcho/__init__.py
+++ b/plugins/memory/honcho/__init__.py
@@ -160,11 +160,11 @@ CONCLUDE_SCHEMA = {
         "properties": {
             "conclusion": {
                 "type": "string",
-                "description": "A factual statement to persist. Required when not using delete_id.",
+                "description": "A factual statement to persist. Provide this when creating a conclusion. Do not send it together with delete_id.",
             },
             "delete_id": {
                 "type": "string",
-                "description": "Conclusion ID to delete (for PII removal). Required when not using conclusion.",
+                "description": "Conclusion ID to delete for PII removal. Provide this when deleting a conclusion. Do not send it together with conclusion.",
             },
             "peer": {
                 "type": "string",
@@ -1009,15 +1009,19 @@ class HonchoMemoryProvider(MemoryProvider):
 
             elif tool_name == "honcho_conclude":
                 delete_id = args.get("delete_id")
+                conclusion = args.get("conclusion", "")
                 peer = args.get("peer", "user")
-                if delete_id:
+
+                has_delete_id = bool(delete_id)
+                has_conclusion = bool(conclusion)
+                if has_delete_id == has_conclusion:
+                    return tool_error("Exactly one of conclusion or delete_id must be provided.")
+
+                if has_delete_id:
                     ok = self._manager.delete_conclusion(self._session_key, delete_id, peer=peer)
                     if ok:
                         return json.dumps({"result": f"Conclusion {delete_id} deleted."})
                     return tool_error(f"Failed to delete conclusion {delete_id}.")
-                conclusion = args.get("conclusion", "")
-                if not conclusion:
-                    return tool_error("Missing required parameter: conclusion or delete_id")
                 ok = self._manager.create_conclusion(self._session_key, conclusion, peer=peer)
                 if ok:
                     return json.dumps({"result": f"Conclusion saved for {peer}: {conclusion}"})
diff --git a/tests/honcho_plugin/test_session.py b/tests/honcho_plugin/test_session.py
index 69c024efe..404f82120 100644
--- a/tests/honcho_plugin/test_session.py
+++ b/tests/honcho_plugin/test_session.py
@@ -366,6 +366,17 @@ class TestPeerLookupHelpers:
 
 
 class TestConcludeToolDispatch:
+    def test_conclude_schema_has_no_anyof(self):
+        """anyOf/oneOf/allOf breaks Anthropic and Fireworks APIs — schema must be plain object."""
+        from plugins.memory.honcho import CONCLUDE_SCHEMA
+        params = CONCLUDE_SCHEMA["parameters"]
+        assert params["type"] == "object"
+        assert "conclusion" in params["properties"]
+        assert "delete_id" in params["properties"]
+        assert "anyOf" not in params
+        assert "oneOf" not in params
+        assert "allOf" not in params
+
     def test_honcho_conclude_defaults_to_user_peer(self):
         provider = HonchoMemoryProvider()
         provider._session_initialized = True
@@ -470,7 +481,23 @@ class TestConcludeToolDispatch:
         result = provider.handle_tool_call("honcho_conclude", {})
 
         parsed = json.loads(result)
-        assert "error" in parsed or "Missing required" in parsed.get("result", "")
+        assert parsed == {"error": "Exactly one of conclusion or delete_id must be provided."}
+        provider._manager.create_conclusion.assert_not_called()
+        provider._manager.delete_conclusion.assert_not_called()
+
+    def test_honcho_conclude_rejects_both_params_at_once(self):
+        """Sending both conclusion and delete_id should be rejected."""
+        import json
+        provider = HonchoMemoryProvider()
+        provider._session_initialized = True
+        provider._session_key = "telegram:123"
+        provider._manager = MagicMock()
+        result = provider.handle_tool_call(
+            "honcho_conclude",
+            {"conclusion": "User prefers dark mode", "delete_id": "conc-123"},
+        )
+        parsed = json.loads(result)
+        assert parsed == {"error": "Exactly one of conclusion or delete_id must be provided."}
         provider._manager.create_conclusion.assert_not_called()
         provider._manager.delete_conclusion.assert_not_called()
 

From fe3e68f5728b04b5b66cfaf62558c36e5852b40d Mon Sep 17 00:00:00 2001
From: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
Date: Thu, 16 Apr 2026 22:19:54 +0530
Subject: [PATCH 03/14] fix(honcho): strip whitespace from conclusion and
 delete_id inputs

Models may send whitespace-only strings like {"conclusion": " "} which
pass bool() but create meaningless conclusions. Strip both inputs so
whitespace-only values are treated as empty.

Adds tests for whitespace-only conclusion and delete_id.

Reviewed-by: @erosika
---
 plugins/memory/honcho/__init__.py   |  4 ++--
 tests/honcho_plugin/test_session.py | 24 ++++++++++++++++++++++++
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/plugins/memory/honcho/__init__.py b/plugins/memory/honcho/__init__.py
index fcd64b881..ca44ce601 100644
--- a/plugins/memory/honcho/__init__.py
+++ b/plugins/memory/honcho/__init__.py
@@ -1008,8 +1008,8 @@ class HonchoMemoryProvider(MemoryProvider):
                 return json.dumps({"result": "\n\n".join(parts) or "No context available."})
 
             elif tool_name == "honcho_conclude":
-                delete_id = args.get("delete_id")
-                conclusion = args.get("conclusion", "")
+                delete_id = (args.get("delete_id") or "").strip()
+                conclusion = args.get("conclusion", "").strip()
                 peer = args.get("peer", "user")
 
                 has_delete_id = bool(delete_id)
diff --git a/tests/honcho_plugin/test_session.py b/tests/honcho_plugin/test_session.py
index 404f82120..9784959d3 100644
--- a/tests/honcho_plugin/test_session.py
+++ b/tests/honcho_plugin/test_session.py
@@ -501,6 +501,30 @@ class TestConcludeToolDispatch:
         provider._manager.create_conclusion.assert_not_called()
         provider._manager.delete_conclusion.assert_not_called()
 
+    def test_honcho_conclude_rejects_whitespace_only_conclusion(self):
+        """Whitespace-only conclusion should be treated as empty."""
+        import json
+        provider = HonchoMemoryProvider()
+        provider._session_initialized = True
+        provider._session_key = "telegram:123"
+        provider._manager = MagicMock()
+        result = provider.handle_tool_call("honcho_conclude", {"conclusion": "   "})
+        parsed = json.loads(result)
+        assert parsed == {"error": "Exactly one of conclusion or delete_id must be provided."}
+        provider._manager.create_conclusion.assert_not_called()
+
+    def test_honcho_conclude_rejects_whitespace_only_delete_id(self):
+        """Whitespace-only delete_id should be treated as empty."""
+        import json
+        provider = HonchoMemoryProvider()
+        provider._session_initialized = True
+        provider._session_key = "telegram:123"
+        provider._manager = MagicMock()
+        result = provider.handle_tool_call("honcho_conclude", {"delete_id": "  "})
+        parsed = json.loads(result)
+        assert parsed == {"error": "Exactly one of conclusion or delete_id must be provided."}
+        provider._manager.delete_conclusion.assert_not_called()
+
 
 # ---------------------------------------------------------------------------
 # Message chunking

From 59d3939173f49093b72460cb02909f2cc0c04c06 Mon Sep 17 00:00:00 2001
From: helix4u <4317663+helix4u@users.noreply.github.com>
Date: Tue, 14 Apr 2026 12:52:23 -0600
Subject: [PATCH 04/14] docs(update): remove unsupported --check command

---
 website/docs/getting-started/updating.md | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/website/docs/getting-started/updating.md b/website/docs/getting-started/updating.md
index 16bb0ce47..b0e34e07d 100644
--- a/website/docs/getting-started/updating.md
+++ b/website/docs/getting-started/updating.md
@@ -65,11 +65,7 @@ If `git status --short` shows unexpected changes after `hermes update`, stop and
 hermes version
 ```
 
-Compare against the latest release at the [GitHub releases page](https://github.com/NousResearch/hermes-agent/releases) or check for available updates:
-
-```bash
-hermes update --check
-```
+Compare against the latest release at the [GitHub releases page](https://github.com/NousResearch/hermes-agent/releases).
 
 ### Updating from Messaging Platforms
 

From a99516afcfdb67acc946565d46cded1b4ac8d40d Mon Sep 17 00:00:00 2001
From: helix4u <4317663+helix4u@users.noreply.github.com>
Date: Wed, 15 Apr 2026 20:13:51 -0600
Subject: [PATCH 05/14] docs(nix): clarify SOUL.md location

---
 website/docs/getting-started/nix-setup.md | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/website/docs/getting-started/nix-setup.md b/website/docs/getting-started/nix-setup.md
index 858315329..e2bcd9dd6 100644
--- a/website/docs/getting-started/nix-setup.md
+++ b/website/docs/getting-started/nix-setup.md
@@ -267,7 +267,6 @@ Run `nix build .#configKeys && cat result` to see every leaf config key extracte
 
     # ── Documents ──────────────────────────────────────────────────────
     documents = {
-      "SOUL.md" = builtins.readFile /home/user/.hermes/SOUL.md;
       "USER.md" = ./documents/USER.md;
     };
 
@@ -316,7 +315,7 @@ Quick reference for the most common things Nix users want to customize:
 | Change the LLM model | `settings.model.default` | `"anthropic/claude-sonnet-4"` |
 | Use a different provider endpoint | `settings.model.base_url` | `"https://openrouter.ai/api/v1"` |
 | Add API keys | `environmentFiles` | `[ config.sops.secrets."hermes-env".path ]` |
-| Give the agent a personality | `documents."SOUL.md"` | `builtins.readFile ./my-soul.md` |
+| Give the agent a personality | `${services.hermes-agent.stateDir}/.hermes/SOUL.md` | manage the file directly |
 | Add MCP tool servers | `mcpServers.<name>` | See [MCP Servers](#mcp-servers) |
 | Mount host directories into container | `container.extraVolumes` | `[ "/data:/data:rw" ]` |
 | Pass GPU access to container | `container.extraOptions` | `[ "--gpus" "all" ]` |
@@ -397,17 +396,14 @@ The file is only copied if `auth.json` doesn't already exist (unless `authFileFo
 
 The `documents` option installs files into the agent's working directory (the `workingDirectory`, which the agent reads as its workspace). Hermes looks for specific filenames by convention:
 
-- **`SOUL.md`** — the agent's system prompt / personality. Hermes reads this on startup and uses it as persistent instructions that shape its behavior across all conversations.
 - **`USER.md`** — context about the user the agent is interacting with.
 - Any other files you place here are visible to the agent as workspace files.
 
+The agent identity file is separate: Hermes loads its primary `SOUL.md` from `$HERMES_HOME/SOUL.md`, which in the NixOS module is `${services.hermes-agent.stateDir}/.hermes/SOUL.md`. Putting `SOUL.md` in `documents` only creates a workspace file and will not replace the main persona file.
+
 ```nix
 {
   services.hermes-agent.documents = {
-    "SOUL.md" = ''
-      You are a helpful research assistant specializing in NixOS packaging.
-      Always cite sources and prefer reproducible solutions.
-    '';
     "USER.md" = ./documents/USER.md;  # path reference, copied from Nix store
   };
 }

From 1ccd0637864ddede3603007c7a3c68c8d16e67ce Mon Sep 17 00:00:00 2001
From: helix4u <4317663+helix4u@users.noreply.github.com>
Date: Sun, 12 Apr 2026 11:33:15 -0600
Subject: [PATCH 06/14] fix(cli): route /yolo toggle through TUI-safe renderer

---
 cli.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/cli.py b/cli.py
index 12cb72014..fc426a28d 100644
--- a/cli.py
+++ b/cli.py
@@ -6220,13 +6220,21 @@ class HermesCLI:
     def _toggle_yolo(self):
         """Toggle YOLO mode — skip all dangerous command approval prompts."""
         import os
+        from hermes_cli.colors import Colors as _Colors
+
         current = bool(os.environ.get("HERMES_YOLO_MODE"))
         if current:
             os.environ.pop("HERMES_YOLO_MODE", None)
-            self.console.print("  ⚠ YOLO mode [bold red]OFF[/] — dangerous commands will require approval.")
+            _cprint(
+                f"  ⚠ YOLO mode {_Colors.BOLD}{_Colors.RED}OFF{_Colors.RESET}"
+                " — dangerous commands will require approval."
+            )
         else:
             os.environ["HERMES_YOLO_MODE"] = "1"
-            self.console.print("  ⚡ YOLO mode [bold green]ON[/] — all commands auto-approved. Use with caution.")
+            _cprint(
+                f"  ⚡ YOLO mode {_Colors.BOLD}{_Colors.GREEN}ON{_Colors.RESET}"
+                " — all commands auto-approved. Use with caution."
+            )
 
     def _handle_reasoning_command(self, cmd: str):
         """Handle /reasoning — manage effort level and display toggle.

From 0517ac3e9325a0548c3f5878185a926921be9311 Mon Sep 17 00:00:00 2001
From: trevthefoolish <trevmanthony@gmail.com>
Date: Thu, 16 Apr 2026 12:35:43 -0500
Subject: [PATCH 07/14] fix(agent): complete Claude Opus 4.7 API migration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Claude Opus 4.7 introduced several breaking API changes that the current
codebase partially handled but not completely. This patch finishes the
migration per the official migration guide at
https://platform.claude.com/docs/en/about-claude/models/migration-guide

Fixes NousResearch/hermes-agent#11137

Breaking-change coverage:

1. Adaptive thinking + output_config.effort — 4.7 is now recognized by
   _supports_adaptive_thinking() (extends previous 4.6-only gate).

2. Sampling parameter stripping — 4.7 returns 400 for any non-default
   temperature / top_p / top_k. build_anthropic_kwargs drops them as a
   safety net; the OpenAI-protocol auxiliary path (_build_call_kwargs)
   and AnthropicCompletionsAdapter.create() both early-exit before
   setting temperature for 4.7+ models. This keeps flush_memories and
   structured-JSON aux paths that hardcode temperature from 400ing
   when the aux model is flipped to 4.7.

3. thinking.display = "summarized" — 4.7 defaults display to "omitted",
   which silently hides reasoning text from Hermes's CLI activity feed
   during long tool runs. Restoring "summarized" preserves 4.6 UX.

4. Effort level mapping — xhigh now maps to xhigh (was xhigh→max, which
   silently over-efforted every coding/agentic request). max is now a
   distinct ceiling per Anthropic's 5-level effort model.

5. New stop_reason values — refusal and model_context_window_exceeded
   were silently collapsed to "stop" (end_turn) by the adapter's
   stop_reason_map. Now mapped to "content_filter" and "length"
   respectively, matching upstream finish-reason handling already in
   bedrock_adapter.

6. Model catalogs — claude-opus-4-7 added to the Anthropic provider
   list, anthropic/claude-opus-4.7 added at top of OpenRouter fallback
   catalog (recommended), claude-opus-4-7 added to model_metadata
   DEFAULT_CONTEXT_LENGTHS (1M, matching 4.6 per migration guide).

7. Prefill docstrings — run_agent.AIAgent and BatchRunner now document
   that Anthropic Sonnet/Opus 4.6+ reject a trailing assistant-role
   prefill (400).

8. Tests — 4 new tests in test_anthropic_adapter covering display
   default, xhigh preservation, max on 4.7, refusal / context-overflow
   stop_reason mapping, plus the sampling-param predicate. test_model_metadata
   accepts 4.7 at 1M context.

Tested on macOS 15.5 (darwin). 119 tests pass in
tests/agent/test_anthropic_adapter.py, 1320 pass in tests/agent/.
---
 agent/anthropic_adapter.py            | 80 +++++++++++++++++++++++----
 agent/auxiliary_client.py             | 16 +++++-
 agent/model_metadata.py               |  2 +
 batch_runner.py                       |  5 +-
 hermes_cli/models.py                  |  4 +-
 run_agent.py                          |  3 +
 tests/agent/test_anthropic_adapter.py | 58 ++++++++++++++++++-
 tests/agent/test_model_metadata.py    |  6 +-
 8 files changed, 155 insertions(+), 19 deletions(-)

diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py
index f3f08039d..90a3a412e 100644
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -28,19 +28,37 @@ except ImportError:
 logger = logging.getLogger(__name__)
 
 THINKING_BUDGET = {"xhigh": 32000, "high": 16000, "medium": 8000, "low": 4000}
+# Hermes effort → Anthropic adaptive-thinking effort (output_config.effort).
+# Anthropic exposes 5 levels on 4.7+: low, medium, high, xhigh, max.
+# We preserve xhigh as xhigh (the recommended default for coding/agentic on
+# 4.7) and expose max as a distinct ceiling. "minimal" is a legacy alias that
+# maps to low.  See:
+# https://platform.claude.com/docs/en/about-claude/models/migration-guide
 ADAPTIVE_EFFORT_MAP = {
-    "xhigh": "max",
-    "high": "high",
-    "medium": "medium",
-    "low": "low",
+    "max":     "max",
+    "xhigh":   "xhigh",
+    "high":    "high",
+    "medium":  "medium",
+    "low":     "low",
     "minimal": "low",
 }
 
+# Models where extended thinking is deprecated/removed (4.6+ behavior: adaptive
+# is the only supported mode; 4.7 additionally forbids manual thinking entirely
+# and drops temperature/top_p/top_k).
+_ADAPTIVE_THINKING_SUBSTRINGS = ("4-6", "4.6", "4-7", "4.7")
+
+# Models where temperature/top_p/top_k return 400 if set to non-default values.
+# This is the Opus 4.7 contract; future 4.x+ models are expected to follow it.
+_NO_SAMPLING_PARAMS_SUBSTRINGS = ("4-7", "4.7")
+
 # ── Max output token limits per Anthropic model ───────────────────────
 # Source: Anthropic docs + Cline model catalog.  Anthropic's API requires
 # max_tokens as a mandatory field.  Previously we hardcoded 16384, which
 # starves thinking-enabled models (thinking tokens count toward the limit).
 _ANTHROPIC_OUTPUT_LIMITS = {
+    # Claude 4.7
+    "claude-opus-4-7":   128_000,
     # Claude 4.6
     "claude-opus-4-6":   128_000,
     "claude-sonnet-4-6":  64_000,
@@ -91,11 +109,26 @@ def _get_anthropic_max_output(model: str) -> int:
 
 
 def _supports_adaptive_thinking(model: str) -> bool:
-    """Return True for Claude 4.6 models that support adaptive thinking."""
-    return any(v in model for v in ("4-6", "4.6"))
+    """Return True for Claude 4.6+ models that support adaptive thinking."""
+    return any(v in model for v in _ADAPTIVE_THINKING_SUBSTRINGS)
 
 
-# Beta headers for enhanced features (sent with ALL auth types)
+def _forbids_sampling_params(model: str) -> bool:
+    """Return True for models that 400 on any non-default temperature/top_p/top_k.
+
+    Opus 4.7 explicitly rejects sampling parameters; later Claude releases are
+    expected to follow suit.  Callers should omit these fields entirely rather
+    than passing zero/default values (the API rejects anything non-null).
+    """
+    return any(v in model for v in _NO_SAMPLING_PARAMS_SUBSTRINGS)
+
+
+# Beta headers for enhanced features (sent with ALL auth types).
+# As of Opus 4.7 (2026-04-16), both of these are GA on Claude 4.6+ — the
+# beta headers are still accepted (harmless no-op) but not required. Kept
+# here so older Claude (4.5, 4.1) + third-party Anthropic-compat endpoints
+# that still gate on the headers continue to get the enhanced features.
+# Migration guide: remove these if you no longer support ≤4.5 models.
 _COMMON_BETAS = [
     "interleaved-thinking-2025-05-14",
     "fine-grained-tool-streaming-2025-05-14",
@@ -1341,18 +1374,26 @@ def build_anthropic_kwargs(
             kwargs["tool_choice"] = {"type": "tool", "name": tool_choice}
 
     # Map reasoning_config to Anthropic's thinking parameter.
-    # Claude 4.6 models use adaptive thinking + output_config.effort.
+    # Claude 4.6+ models use adaptive thinking + output_config.effort.
     # Older models use manual thinking with budget_tokens.
     # MiniMax Anthropic-compat endpoints support thinking (manual mode only,
     # not adaptive).  Haiku does NOT support extended thinking — skip entirely.
+    #
+    # On 4.7+ the `thinking.display` field defaults to "omitted", which
+    # silently hides reasoning text that Hermes surfaces in its CLI. We
+    # request "summarized" so the reasoning blocks stay populated — matching
+    # 4.6 behavior and preserving the activity-feed UX during long tool runs.
     if reasoning_config and isinstance(reasoning_config, dict):
         if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
             effort = str(reasoning_config.get("effort", "medium")).lower()
             budget = THINKING_BUDGET.get(effort, 8000)
             if _supports_adaptive_thinking(model):
-                kwargs["thinking"] = {"type": "adaptive"}
+                kwargs["thinking"] = {
+                    "type": "adaptive",
+                    "display": "summarized",
+                }
                 kwargs["output_config"] = {
-                    "effort": ADAPTIVE_EFFORT_MAP.get(effort, "medium")
+                    "effort": ADAPTIVE_EFFORT_MAP.get(effort, "medium"),
                 }
             else:
                 kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget}
@@ -1360,6 +1401,15 @@ def build_anthropic_kwargs(
                 kwargs["temperature"] = 1
                 kwargs["max_tokens"] = max(effective_max_tokens, budget + 4096)
 
+    # ── Strip sampling params on 4.7+ ─────────────────────────────────
+    # Opus 4.7 rejects any non-default temperature/top_p/top_k with a 400.
+    # Callers (auxiliary_client, flush_memories, etc.) may set these for
+    # older models; drop them here as a safety net so upstream 4.6 → 4.7
+    # migrations don't require coordinated edits everywhere.
+    if _forbids_sampling_params(model):
+        for _sampling_key in ("temperature", "top_p", "top_k"):
+            kwargs.pop(_sampling_key, None)
+
     # ── Fast mode (Opus 4.6 only) ────────────────────────────────────
     # Adds extra_body.speed="fast" + the fast-mode beta header for ~2.5x
     # output speed. Only for native Anthropic endpoints — third-party
@@ -1417,12 +1467,20 @@ def normalize_anthropic_response(
                 )
             )
 
-    # Map Anthropic stop_reason to OpenAI finish_reason
+    # Map Anthropic stop_reason to OpenAI finish_reason.
+    # Newer stop reasons added in Claude 4.5+ / 4.7:
+    #   - refusal: the model declined to answer (cyber safeguards, CSAM, etc.)
+    #   - model_context_window_exceeded: hit context limit (not max_tokens)
+    # Both need distinct handling upstream — a refusal should surface to the
+    # user with a clear message, and a context-window overflow should trigger
+    # compression/truncation rather than be treated as normal end-of-turn.
     stop_reason_map = {
         "end_turn": "stop",
         "tool_use": "tool_calls",
         "max_tokens": "length",
         "stop_sequence": "stop",
+        "refusal": "content_filter",
+        "model_context_window_exceeded": "length",
     }
     finish_reason = stop_reason_map.get(response.stop_reason, "stop")
 
diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index c31ff55f9..4f1746166 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -518,8 +518,13 @@ class _AnthropicCompletionsAdapter:
             tool_choice=normalized_tool_choice,
             is_oauth=self._is_oauth,
         )
+        # Opus 4.7+ rejects any non-default temperature/top_p/top_k; only set
+        # temperature for models that still accept it. build_anthropic_kwargs
+        # additionally strips these keys as a safety net — keep both layers.
         if temperature is not None:
-            anthropic_kwargs["temperature"] = temperature
+            from agent.anthropic_adapter import _forbids_sampling_params
+            if not _forbids_sampling_params(model):
+                anthropic_kwargs["temperature"] = temperature
 
         response = self._client.messages.create(**anthropic_kwargs)
         assistant_message, finish_reason = normalize_anthropic_response(response)
@@ -2288,6 +2293,15 @@ def _build_call_kwargs(
         "timeout": timeout,
     }
 
+    # Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
+    # drop here so auxiliary callers that hardcode temperature (e.g. 0.3 on
+    # flush_memories, 0 on structured-JSON extraction) don't 400 the moment
+    # the aux model is flipped to 4.7.
+    if temperature is not None:
+        from agent.anthropic_adapter import _forbids_sampling_params
+        if _forbids_sampling_params(model):
+            temperature = None
+
     if temperature is not None:
         kwargs["temperature"] = temperature
 
diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index db3048941..089fd132a 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -102,6 +102,8 @@ DEFAULT_CONTEXT_LENGTHS = {
     # fuzzy-match collisions (e.g. "anthropic/claude-sonnet-4" is a
     # substring of "anthropic/claude-sonnet-4.6").
     # OpenRouter-prefixed models resolve via OpenRouter live API or models.dev.
+    "claude-opus-4-7": 1000000,
+    "claude-opus-4.7": 1000000,
     "claude-opus-4-6": 1000000,
     "claude-sonnet-4-6": 1000000,
     "claude-opus-4.6": 1000000,
diff --git a/batch_runner.py b/batch_runner.py
index 195452c0a..1a65f473f 100644
--- a/batch_runner.py
+++ b/batch_runner.py
@@ -561,7 +561,10 @@ class BatchRunner:
             provider_sort (str): Sort providers by price/throughput/latency (optional)
             max_tokens (int): Maximum tokens for model responses (optional, uses model default if not set)
             reasoning_config (Dict): OpenRouter reasoning config override (e.g. {"effort": "none"} to disable thinking)
-            prefill_messages (List[Dict]): Messages to prepend as prefilled conversation context (few-shot priming)
+            prefill_messages (List[Dict]): Messages to prepend as prefilled conversation context (few-shot priming).
+                NOTE: Anthropic Sonnet 4.6+ and Opus 4.6+ reject a trailing assistant-role prefill
+                (400 error).  For those models use output_config.format or structured-output
+                schemas instead.  Safe here for user-role priming and for older Claude / non-Claude models.
             max_samples (int): Only process the first N samples from the dataset (optional, processes all if not set)
         """
         self.dataset_file = Path(dataset_file)
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index 309840aea..48cf6873b 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -26,7 +26,8 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
 # Fallback OpenRouter snapshot used when the live catalog is unavailable.
 # (model_id, display description shown in menus)
 OPENROUTER_MODELS: list[tuple[str, str]] = [
-    ("anthropic/claude-opus-4.6",       "recommended"),
+    ("anthropic/claude-opus-4.7",       "recommended"),
+    ("anthropic/claude-opus-4.6",       ""),
     ("anthropic/claude-sonnet-4.6",     ""),
     ("qwen/qwen3.6-plus",               ""),
     ("anthropic/claude-sonnet-4.5",     ""),
@@ -181,6 +182,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
         "MiniMax-M2",
     ],
     "anthropic": [
+        "claude-opus-4-7",
         "claude-opus-4-6",
         "claude-sonnet-4-6",
         "claude-opus-4-5-20251101",
diff --git a/run_agent.py b/run_agent.py
index f6c67b109..920b49c2f 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -641,6 +641,9 @@ class AIAgent:
             prefill_messages (List[Dict]): Messages to prepend to conversation history as prefilled context.
                 Useful for injecting a few-shot example or priming the model's response style.
                 Example: [{"role": "user", "content": "Hi!"}, {"role": "assistant", "content": "Hello!"}]
+                NOTE: Anthropic Sonnet 4.6+ and Opus 4.6+ reject a conversation that ends on an
+                assistant-role message (400 error).  For those models use structured outputs or
+                output_config.format instead of a trailing-assistant prefill.
             platform (str): The interface platform the user is on (e.g. "cli", "telegram", "discord", "whatsapp").
                 Used to inject platform-specific formatting hints into the system prompt.
             skip_context_files (bool): If True, skip auto-injection of SOUL.md, AGENTS.md, and .cursorrules
diff --git a/tests/agent/test_anthropic_adapter.py b/tests/agent/test_anthropic_adapter.py
index ae78888d8..9d8f3deaa 100644
--- a/tests/agent/test_anthropic_adapter.py
+++ b/tests/agent/test_anthropic_adapter.py
@@ -951,13 +951,19 @@ class TestBuildAnthropicKwargs:
             max_tokens=4096,
             reasoning_config={"enabled": True, "effort": "high"},
         )
-        assert kwargs["thinking"] == {"type": "adaptive"}
+        # Adaptive thinking + display="summarized" keeps reasoning text
+        # populated in the response stream (Opus 4.7 default is "omitted").
+        assert kwargs["thinking"] == {"type": "adaptive", "display": "summarized"}
         assert kwargs["output_config"] == {"effort": "high"}
         assert "budget_tokens" not in kwargs["thinking"]
         assert "temperature" not in kwargs
         assert kwargs["max_tokens"] == 4096
 
-    def test_reasoning_config_maps_xhigh_to_max_effort_for_4_6_models(self):
+    def test_reasoning_config_maps_xhigh_to_xhigh_effort_for_4_6_models(self):
+        # Opus 4.7 added "xhigh" as a distinct effort level (the recommended
+        # default for coding/agentic work). Earlier mapping aliased xhigh→max,
+        # which silently over-efforted every request. 2026-04-16 migration
+        # guide: xhigh and max are distinct levels.
         kwargs = build_anthropic_kwargs(
             model="claude-sonnet-4-6",
             messages=[{"role": "user", "content": "think harder"}],
@@ -965,9 +971,40 @@ class TestBuildAnthropicKwargs:
             max_tokens=4096,
             reasoning_config={"enabled": True, "effort": "xhigh"},
         )
-        assert kwargs["thinking"] == {"type": "adaptive"}
+        assert kwargs["thinking"] == {"type": "adaptive", "display": "summarized"}
+        assert kwargs["output_config"] == {"effort": "xhigh"}
+
+    def test_reasoning_config_maps_max_effort_for_4_7_models(self):
+        kwargs = build_anthropic_kwargs(
+            model="claude-opus-4-7",
+            messages=[{"role": "user", "content": "maximum reasoning please"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": True, "effort": "max"},
+        )
+        assert kwargs["thinking"] == {"type": "adaptive", "display": "summarized"}
         assert kwargs["output_config"] == {"effort": "max"}
 
+    def test_opus_4_7_strips_sampling_params(self):
+        # Opus 4.7 returns 400 on non-default temperature/top_p/top_k.
+        # build_anthropic_kwargs must strip them as a safety net even if an
+        # upstream caller injects them for older-model compatibility.
+        kwargs = build_anthropic_kwargs(
+            model="claude-opus-4-7",
+            messages=[{"role": "user", "content": "hi"}],
+            tools=None,
+            max_tokens=1024,
+            reasoning_config=None,
+        )
+        # Manually inject sampling params then re-run through the guard.
+        # Because build_anthropic_kwargs doesn't currently accept sampling
+        # params through its signature, we exercise the strip behavior by
+        # calling the internal predicate directly.
+        from agent.anthropic_adapter import _forbids_sampling_params
+        assert _forbids_sampling_params("claude-opus-4-7") is True
+        assert _forbids_sampling_params("claude-opus-4-6") is False
+        assert _forbids_sampling_params("claude-sonnet-4-5") is False
+
     def test_reasoning_disabled(self):
         kwargs = build_anthropic_kwargs(
             model="claude-sonnet-4-20250514",
@@ -1248,6 +1285,21 @@ class TestNormalizeResponse:
         assert r2 == "tool_calls"
         assert r3 == "length"
 
+    def test_stop_reason_refusal_and_context_exceeded(self):
+        # Claude 4.5+ introduced two new stop_reason values the Messages API
+        # returns.  We map both to OpenAI-style finish_reasons upstream
+        # handlers already understand, instead of silently collapsing to
+        # "stop" (old behavior).
+        block = SimpleNamespace(type="text", text="")
+        _, refusal_reason = normalize_anthropic_response(
+            self._make_response([block], "refusal")
+        )
+        _, overflow_reason = normalize_anthropic_response(
+            self._make_response([block], "model_context_window_exceeded")
+        )
+        assert refusal_reason == "content_filter"
+        assert overflow_reason == "length"
+
     def test_no_text_content(self):
         block = SimpleNamespace(
             type="tool_use", id="tc_1", name="search", input={"q": "hi"}
diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py
index df680fb24..6a0eab151 100644
--- a/tests/agent/test_model_metadata.py
+++ b/tests/agent/test_model_metadata.py
@@ -113,8 +113,10 @@ class TestDefaultContextLengths:
         for key, value in DEFAULT_CONTEXT_LENGTHS.items():
             if "claude" not in key:
                 continue
-            # Claude 4.6 models have 1M context
-            if "4.6" in key or "4-6" in key:
+            # Claude 4.6+ models (4.6 and 4.7) have 1M context at standard
+            # API pricing (no long-context premium).  Older Claude 4.x and
+            # 3.x models cap at 200k.
+            if any(tag in key for tag in ("4.6", "4-6", "4.7", "4-7")):
                 assert value == 1000000, f"{key} should be 1000000"
             else:
                 assert value == 200000, f"{key} should be 200000"

From 37913d9109a441db3ed884c668df7002102a1dbe Mon Sep 17 00:00:00 2001
From: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
Date: Thu, 16 Apr 2026 23:17:20 +0530
Subject: [PATCH 08/14] chore: add Opus 4.7 PR contributors to AUTHOR_MAP

Add trevthefoolish, ziliangpeng, centripetal-star for the consolidated
Opus 4.7 salvage PR (#11107, #11145, #11152, #11157).
---
 scripts/release.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/scripts/release.py b/scripts/release.py
index 66fd04987..384f2f65e 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -191,6 +191,9 @@ AUTHOR_MAP = {
     "cola-runner@users.noreply.github.com": "cola-runner",
     "ygd58@users.noreply.github.com": "ygd58",
     "vominh1919@users.noreply.github.com": "vominh1919",
+    "trevmanthony@gmail.com": "trevthefoolish",
+    "ziliangpeng@users.noreply.github.com": "ziliangpeng",
+    "centripetal-star@users.noreply.github.com": "centripetal-star",
     "LeonSGP43@users.noreply.github.com": "LeonSGP43",
     "Lubrsy706@users.noreply.github.com": "Lubrsy706",
     "niyant@spicefi.xyz": "spniyant",

From 63d06dd93d6c19f22598d8ffc855788e1fe04714 Mon Sep 17 00:00:00 2001
From: Trev <trevthefoolish@users.noreply.github.com>
Date: Thu, 16 Apr 2026 13:51:42 -0500
Subject: [PATCH 09/14] =?UTF-8?q?fix(agent):=20downgrade=20xhigh=E2=86=92m?=
 =?UTF-8?q?ax=20on=20Anthropic=20pre-4.7=20adaptive=20models?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Regression from #11161 (Claude Opus 4.7 migration, commit 0517ac3e).

The Opus 4.7 migration changed `ADAPTIVE_EFFORT_MAP["xhigh"]` from "max"
(the pre-migration alias) to "xhigh" to preserve the new 4.7 effort level
as distinct from max. This is correct for 4.7, but Opus/Sonnet 4.6 only
expose 4 levels (low/medium/high/max) — sending "xhigh" there now 400s:

    BadRequestError [HTTP 400]: This model does not support effort
    level 'xhigh'. Supported levels: high, low, max, medium.

Users who set reasoning_effort=xhigh as their default (xhigh is the
recommended default for coding/agentic on 4.7 per the Anthropic migration
guide) now 400 every request the moment they switch back to a 4.6 model
via `/model` or config. Verified live against the Anthropic API on
`anthropic==0.94.0`.

Fix: make the mapping model-aware. Add `_supports_xhigh_effort()`
predicate (matches 4-7/4.7 substrings, mirroring the existing
`_supports_adaptive_thinking` / `_forbids_sampling_params` pattern).
On pre-4.7 adaptive models, downgrade xhigh→max (the strongest effort
those models accept, restoring pre-migration behavior). On 4.7+, keep
xhigh as a distinct level.

Per Anthropic's migration guide, xhigh is 4.7-only:
https://platform.claude.com/docs/en/about-claude/models/migration-guide
> Opus 4.7 effort levels: max, xhigh (new), high, medium, low.
> Opus 4.6 effort levels: max, high, medium, low.
SDK typing confirms: `anthropic.types.OutputConfigParam.effort: Literal[
"low", "medium", "high", "max"]` (v0.94.0 not yet updated for xhigh).

## Test plan

Verified live on macOS 15.5 / anthropic==0.94.0:

    claude-opus-4-6 + effort=xhigh → output_config.effort=max  → 200 OK
    claude-opus-4-7 + effort=xhigh → output_config.effort=xhigh → 200 OK
    claude-opus-4-6 + effort=max   → output_config.effort=max  → 200 OK
    claude-opus-4-7 + effort=max   → output_config.effort=max  → 200 OK

`tests/agent/test_anthropic_adapter.py` — 120 pass (replaced 1 bugged
test that asserted the broken behavior, added 1 for 4.7 preservation).

Full adapter suite: 120 passed in 1.05s.
Broader suite (agent + run_agent + cli/gateway reasoning): 2140 passed
(2 pre-existing failures on clean upstream/main, unrelated).

## Platforms

Tested on macOS 15.5. No platform-specific code paths touched.
---
 agent/anthropic_adapter.py            | 32 +++++++++++++++++++++++----
 tests/agent/test_anthropic_adapter.py | 25 ++++++++++++++++-----
 2 files changed, 48 insertions(+), 9 deletions(-)

diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py
index 90a3a412e..64b952251 100644
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -30,9 +30,11 @@ logger = logging.getLogger(__name__)
 THINKING_BUDGET = {"xhigh": 32000, "high": 16000, "medium": 8000, "low": 4000}
 # Hermes effort → Anthropic adaptive-thinking effort (output_config.effort).
 # Anthropic exposes 5 levels on 4.7+: low, medium, high, xhigh, max.
-# We preserve xhigh as xhigh (the recommended default for coding/agentic on
-# 4.7) and expose max as a distinct ceiling. "minimal" is a legacy alias that
-# maps to low.  See:
+# Opus/Sonnet 4.6 only expose 4 levels: low, medium, high, max — no xhigh.
+# We preserve xhigh as xhigh on 4.7+ (the recommended default for coding/
+# agentic work) and downgrade it to max on pre-4.7 adaptive models (which
+# is the strongest level they accept).  "minimal" is a legacy alias that
+# maps to low on every model.  See:
 # https://platform.claude.com/docs/en/about-claude/models/migration-guide
 ADAPTIVE_EFFORT_MAP = {
     "max":     "max",
@@ -43,6 +45,12 @@ ADAPTIVE_EFFORT_MAP = {
     "minimal": "low",
 }
 
+# Models that accept the "xhigh" output_config.effort level.  Opus 4.7 added
+# xhigh as a distinct level between high and max; older adaptive-thinking
+# models (4.6) reject it with a 400.  Keep this substring list in sync with
+# the Anthropic migration guide as new model families ship.
+_XHIGH_EFFORT_SUBSTRINGS = ("4-7", "4.7")
+
 # Models where extended thinking is deprecated/removed (4.6+ behavior: adaptive
 # is the only supported mode; 4.7 additionally forbids manual thinking entirely
 # and drops temperature/top_p/top_k).
@@ -113,6 +121,17 @@ def _supports_adaptive_thinking(model: str) -> bool:
     return any(v in model for v in _ADAPTIVE_THINKING_SUBSTRINGS)
 
 
+def _supports_xhigh_effort(model: str) -> bool:
+    """Return True for models that accept the 'xhigh' adaptive effort level.
+
+    Opus 4.7 introduced xhigh as a distinct level between high and max.
+    Pre-4.7 adaptive models (Opus/Sonnet 4.6) only accept low/medium/high/max
+    and reject xhigh with an HTTP 400. Callers should downgrade xhigh→max
+    when this returns False.
+    """
+    return any(v in model for v in _XHIGH_EFFORT_SUBSTRINGS)
+
+
 def _forbids_sampling_params(model: str) -> bool:
     """Return True for models that 400 on any non-default temperature/top_p/top_k.
 
@@ -1392,8 +1411,13 @@ def build_anthropic_kwargs(
                     "type": "adaptive",
                     "display": "summarized",
                 }
+                adaptive_effort = ADAPTIVE_EFFORT_MAP.get(effort, "medium")
+                # Downgrade xhigh→max on models that don't list xhigh as a
+                # supported level (Opus/Sonnet 4.6). Opus 4.7+ keeps xhigh.
+                if adaptive_effort == "xhigh" and not _supports_xhigh_effort(model):
+                    adaptive_effort = "max"
                 kwargs["output_config"] = {
-                    "effort": ADAPTIVE_EFFORT_MAP.get(effort, "medium"),
+                    "effort": adaptive_effort,
                 }
             else:
                 kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget}
diff --git a/tests/agent/test_anthropic_adapter.py b/tests/agent/test_anthropic_adapter.py
index 9d8f3deaa..737db01a3 100644
--- a/tests/agent/test_anthropic_adapter.py
+++ b/tests/agent/test_anthropic_adapter.py
@@ -959,11 +959,13 @@ class TestBuildAnthropicKwargs:
         assert "temperature" not in kwargs
         assert kwargs["max_tokens"] == 4096
 
-    def test_reasoning_config_maps_xhigh_to_xhigh_effort_for_4_6_models(self):
-        # Opus 4.7 added "xhigh" as a distinct effort level (the recommended
-        # default for coding/agentic work). Earlier mapping aliased xhigh→max,
-        # which silently over-efforted every request. 2026-04-16 migration
-        # guide: xhigh and max are distinct levels.
+    def test_reasoning_config_downgrades_xhigh_to_max_for_4_6_models(self):
+        # Opus 4.7 added "xhigh" as a distinct effort level (low/medium/high/
+        # xhigh/max). Opus 4.6 only supports low/medium/high/max — sending
+        # "xhigh" there returns an API 400. Preserve the pre-migration
+        # behavior of aliasing xhigh→max on pre-4.7 adaptive models so users
+        # who prefer xhigh as their default don't 400 every request when
+        # switching back to 4.6.
         kwargs = build_anthropic_kwargs(
             model="claude-sonnet-4-6",
             messages=[{"role": "user", "content": "think harder"}],
@@ -972,6 +974,19 @@ class TestBuildAnthropicKwargs:
             reasoning_config={"enabled": True, "effort": "xhigh"},
         )
         assert kwargs["thinking"] == {"type": "adaptive", "display": "summarized"}
+        assert kwargs["output_config"] == {"effort": "max"}
+
+    def test_reasoning_config_preserves_xhigh_for_4_7_models(self):
+        # On 4.7+ xhigh is a real level and the recommended default for
+        # coding/agentic work — keep it distinct from max.
+        kwargs = build_anthropic_kwargs(
+            model="claude-opus-4-7",
+            messages=[{"role": "user", "content": "think harder"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": True, "effort": "xhigh"},
+        )
+        assert kwargs["thinking"] == {"type": "adaptive", "display": "summarized"}
         assert kwargs["output_config"] == {"effort": "xhigh"}
 
     def test_reasoning_config_maps_max_effort_for_4_7_models(self):

From 25c7b1baa7bbb72113552c40cc16b34d0e9f30f0 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Thu, 16 Apr 2026 12:05:11 -0700
Subject: [PATCH 10/14] fix: handle httpx.Timeout object in CopilotACPClient
 (#11058)

run_agent.py passes httpx.Timeout(connect=30, read=120, write=1800,
pool=30) as the timeout kwarg on the streaming path. The OpenAI SDK
handles this natively, but CopilotACPClient._create_chat_completion()
called float(timeout or default), which raises TypeError because
httpx.Timeout doesn't implement __float__.

Normalize the timeout before passing to _run_prompt: plain floats/ints
pass through, httpx.Timeout objects get their largest component
extracted (write=1800s is the correct wall-clock budget for the ACP
subprocess), and None falls back to the 900s default.
---
 agent/copilot_acp_client.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/agent/copilot_acp_client.py b/agent/copilot_acp_client.py
index 235fd9a1a..031c58d70 100644
--- a/agent/copilot_acp_client.py
+++ b/agent/copilot_acp_client.py
@@ -313,9 +313,25 @@ class CopilotACPClient:
             tools=tools,
             tool_choice=tool_choice,
         )
+        # Normalise timeout: run_agent.py may pass an httpx.Timeout object
+        # (used natively by the OpenAI SDK) rather than a plain float.
+        if timeout is None:
+            _effective_timeout = _DEFAULT_TIMEOUT_SECONDS
+        elif isinstance(timeout, (int, float)):
+            _effective_timeout = float(timeout)
+        else:
+            # httpx.Timeout or similar — pick the largest component so the
+            # subprocess has enough wall-clock time for the full response.
+            _candidates = [
+                getattr(timeout, attr, None)
+                for attr in ("read", "write", "connect", "pool", "timeout")
+            ]
+            _numeric = [float(v) for v in _candidates if isinstance(v, (int, float))]
+            _effective_timeout = max(_numeric) if _numeric else _DEFAULT_TIMEOUT_SECONDS
+
         response_text, reasoning_text = self._run_prompt(
             prompt_text,
-            timeout_seconds=float(timeout or _DEFAULT_TIMEOUT_SECONDS),
+            timeout_seconds=_effective_timeout,
         )
 
         tool_calls, cleaned_text = _extract_tool_calls_from_text(response_text)

From f188ac74f077a91b80bdc20b933511f72f58f66f Mon Sep 17 00:00:00 2001
From: emozilla <emozilla@nousresearch.com>
Date: Thu, 16 Apr 2026 01:59:51 -0400
Subject: [PATCH 11/14] =?UTF-8?q?feat:=20ungate=20Tool=20Gateway=20?=
 =?UTF-8?q?=E2=80=94=20subscription-based=20access=20with=20per-tool=20opt?=
 =?UTF-8?q?-in?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the HERMES_ENABLE_NOUS_MANAGED_TOOLS env-var feature flag with
subscription-based detection. The Tool Gateway is now available to any
paid Nous subscriber without needing a hidden env var.

Core changes:
- managed_nous_tools_enabled() checks get_nous_auth_status() +
  check_nous_free_tier() instead of an env var
- New use_gateway config flag per tool section (web, tts, browser,
  image_gen) records explicit user opt-in and overrides direct API
  keys at runtime
- New prefers_gateway(section) shared helper in tool_backend_helpers.py
  used by all 4 tool runtimes (web, tts, image gen, browser)

UX flow:
- hermes model: after Nous login/model selection, shows a curses
  prompt listing all gateway-eligible tools with current status.
  User chooses to enable all, enable only unconfigured tools, or skip.
  Defaults to Enable for new users, Skip when direct keys exist.
- hermes tools: provider selection now manages use_gateway flag —
  selecting Nous Subscription sets it, selecting any other provider
  clears it
- hermes status: renamed section to Nous Tool Gateway, added
  free-tier upgrade nudge for logged-in free users
- curses_radiolist: new description parameter for multi-line context
  that survives the screen clear

Runtime behavior:
- Each tool runtime (web_tools, tts_tool, image_generation_tool,
  browser_use) checks prefers_gateway() before falling back to
  direct env-var credentials
- get_nous_subscription_features() respects use_gateway flags,
  suppressing direct credential detection when the user opted in

Removed:
- HERMES_ENABLE_NOUS_MANAGED_TOOLS env var and all references
- apply_nous_provider_defaults() silent TTS auto-set
- get_nous_subscription_explainer_lines() static text
- Override env var warnings (use_gateway handles this properly now)
---
 hermes_cli/config.py                          |  11 +-
 hermes_cli/curses_ui.py                       |  29 +-
 hermes_cli/main.py                            |  28 +-
 hermes_cli/nous_subscription.py               | 306 ++++++++++++++++--
 hermes_cli/setup.py                           |  22 +-
 hermes_cli/status.py                          |  14 +-
 hermes_cli/tools_config.py                    |  38 ++-
 tests/agent/test_prompt_builder.py            |   6 +-
 tests/cli/test_cli_provider_resolution.py     |  27 +-
 tests/hermes_cli/test_nous_subscription.py    |   2 +-
 tests/hermes_cli/test_setup.py                |   4 +-
 .../hermes_cli/test_status_model_provider.py  |   8 +-
 tests/hermes_cli/test_tools_config.py         |   7 +-
 .../test_managed_browserbase_and_modal.py     |  10 +-
 tests/tools/test_managed_media_gateways.py    |   5 +-
 tests/tools/test_managed_tool_gateway.py      |  14 +-
 tests/tools/test_terminal_requirements.py     |  18 +-
 .../tools/test_terminal_tool_requirements.py  |   3 +-
 tests/tools/test_tool_backend_helpers.py      |  55 +++-
 tests/tools/test_web_tools_config.py          |  40 ++-
 tools/browser_providers/browser_use.py        |   4 +-
 tools/image_generation_tool.py                |   7 +-
 tools/terminal_tool.py                        |   8 +-
 tools/tool_backend_helpers.py                 |  38 ++-
 tools/tts_tool.py                             |  10 +-
 tools/web_tools.py                            |  17 +-
 26 files changed, 544 insertions(+), 187 deletions(-)

diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 33bc325ee..7eae4d479 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -23,7 +23,6 @@ from dataclasses import dataclass
 from pathlib import Path
 from typing import Dict, Any, Optional, List, Tuple
 
-from tools.tool_backend_helpers import managed_nous_tools_enabled as _managed_nous_tools_enabled
 
 _IS_WINDOWS = platform.system() == "Windows"
 _ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
@@ -1646,14 +1645,8 @@ OPTIONAL_ENV_VARS = {
     },
 }
 
-if not _managed_nous_tools_enabled():
-    for _hidden_var in (
-        "FIRECRAWL_GATEWAY_URL",
-        "TOOL_GATEWAY_DOMAIN",
-        "TOOL_GATEWAY_SCHEME",
-        "TOOL_GATEWAY_USER_TOKEN",
-    ):
-        OPTIONAL_ENV_VARS.pop(_hidden_var, None)
+# Tool Gateway env vars are always visible — they're useful for
+# self-hosted / custom gateway setups regardless of subscription state.
 
 
 def get_missing_env_vars(required_only: bool = False) -> List[Dict[str, Any]]:
diff --git a/hermes_cli/curses_ui.py b/hermes_cli/curses_ui.py
index 4880171fd..b05295f1e 100644
--- a/hermes_cli/curses_ui.py
+++ b/hermes_cli/curses_ui.py
@@ -166,6 +166,7 @@ def curses_radiolist(
     selected: int = 0,
     *,
     cancel_returns: int | None = None,
+    description: str | None = None,
 ) -> int:
     """Curses single-select radio list. Returns the selected index.
 
@@ -174,6 +175,9 @@ def curses_radiolist(
         items: Display labels for each row.
         selected: Index that starts selected (pre-selected).
         cancel_returns: Returned on ESC/q. Defaults to the original *selected*.
+        description: Optional multi-line text shown between the title and
+            the item list.  Useful for context that should survive the
+            curses screen clear.
     """
     if cancel_returns is None:
         cancel_returns = selected
@@ -181,6 +185,10 @@ def curses_radiolist(
     if not sys.stdin.isatty():
         return cancel_returns
 
+    desc_lines: list[str] = []
+    if description:
+        desc_lines = description.splitlines()
+
     try:
         import curses
         result_holder: list = [None]
@@ -199,22 +207,35 @@ def curses_radiolist(
                 stdscr.clear()
                 max_y, max_x = stdscr.getmaxyx()
 
+                row = 0
+
                 # Header
                 try:
                     hattr = curses.A_BOLD
                     if curses.has_colors():
                         hattr |= curses.color_pair(2)
-                    stdscr.addnstr(0, 0, title, max_x - 1, hattr)
+                    stdscr.addnstr(row, 0, title, max_x - 1, hattr)
+                    row += 1
+
+                    # Description lines
+                    for dline in desc_lines:
+                        if row >= max_y - 1:
+                            break
+                        stdscr.addnstr(row, 0, dline, max_x - 1, curses.A_NORMAL)
+                        row += 1
+
                     stdscr.addnstr(
-                        1, 0,
+                        row, 0,
                         "  \u2191\u2193 navigate  ENTER/SPACE select  ESC cancel",
                         max_x - 1, curses.A_DIM,
                     )
+                    row += 1
                 except curses.error:
                     pass
 
                 # Scrollable item list
-                visible_rows = max_y - 4
+                items_start = row + 1
+                visible_rows = max_y - items_start - 1
                 if cursor < scroll_offset:
                     scroll_offset = cursor
                 elif cursor >= scroll_offset + visible_rows:
@@ -223,7 +244,7 @@ def curses_radiolist(
                 for draw_i, i in enumerate(
                     range(scroll_offset, min(len(items), scroll_offset + visible_rows))
                 ):
-                    y = draw_i + 3
+                    y = draw_i + items_start
                     if y >= max_y - 1:
                         break
                     radio = "\u25cf" if i == selected else "\u25cb"
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 3eedcf7fc..33d017d8c 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -1277,11 +1277,8 @@ def _model_flow_nous(config, current_model="", args=None):
         AuthError, format_auth_error,
         _login_nous, PROVIDER_REGISTRY,
     )
-    from hermes_cli.config import get_env_value, save_config, save_env_value
-    from hermes_cli.nous_subscription import (
-        apply_nous_provider_defaults,
-        get_nous_subscription_explainer_lines,
-    )
+    from hermes_cli.config import get_env_value, load_config, save_config, save_env_value
+    from hermes_cli.nous_subscription import prompt_enable_tool_gateway
     import argparse
 
     state = get_provider_auth_state("nous")
@@ -1300,9 +1297,12 @@ def _model_flow_nous(config, current_model="", args=None):
                 insecure=bool(getattr(args, "insecure", False)),
             )
             _login_nous(mock_args, PROVIDER_REGISTRY["nous"])
-            print()
-            for line in get_nous_subscription_explainer_lines():
-                print(line)
+            # Offer Tool Gateway enablement for paid subscribers
+            try:
+                _refreshed = load_config() or {}
+                prompt_enable_tool_gateway(_refreshed)
+            except Exception:
+                pass
         except SystemExit:
             print("Login cancelled or failed.")
             return
@@ -1410,18 +1410,10 @@ def _model_flow_nous(config, current_model="", args=None):
         if get_env_value("OPENAI_BASE_URL"):
             save_env_value("OPENAI_BASE_URL", "")
             save_env_value("OPENAI_API_KEY", "")
-        changed_defaults = apply_nous_provider_defaults(config)
         save_config(config)
         print(f"Default model set to: {selected} (via Nous Portal)")
-        if "tts" in changed_defaults:
-            print("TTS provider set to: OpenAI TTS via your Nous subscription")
-        else:
-            current_tts = str(config.get("tts", {}).get("provider") or "edge")
-            if current_tts.lower() not in {"", "edge"}:
-                print(f"Keeping your existing TTS provider: {current_tts}")
-        print()
-        for line in get_nous_subscription_explainer_lines():
-            print(line)
+        # Offer Tool Gateway enablement for paid subscribers
+        prompt_enable_tool_gateway(config)
     else:
         print("No change.")
 
diff --git a/hermes_cli/nous_subscription.py b/hermes_cli/nous_subscription.py
index e182b37e7..691126a4c 100644
--- a/hermes_cli/nous_subscription.py
+++ b/hermes_cli/nous_subscription.py
@@ -258,6 +258,15 @@ def get_nous_subscription_features(
         terminal_cfg.get("modal_mode")
     )
 
+    # use_gateway flags — when True, the user explicitly opted into the
+    # Tool Gateway via `hermes model`, so direct credentials should NOT
+    # prevent gateway routing.
+    web_use_gateway = bool(web_cfg.get("use_gateway"))
+    tts_use_gateway = bool(tts_cfg.get("use_gateway"))
+    browser_use_gateway = bool(browser_cfg.get("use_gateway"))
+    image_gen_cfg = config.get("image_gen") if isinstance(config.get("image_gen"), dict) else {}
+    image_use_gateway = bool(image_gen_cfg.get("use_gateway"))
+
     direct_exa = bool(get_env_value("EXA_API_KEY"))
     direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"))
     direct_parallel = bool(get_env_value("PARALLEL_API_KEY"))
@@ -270,6 +279,21 @@ def get_nous_subscription_features(
     direct_browser_use = bool(get_env_value("BROWSER_USE_API_KEY"))
     direct_modal = has_direct_modal_credentials()
 
+    # When use_gateway is set, suppress direct credentials for managed detection
+    if web_use_gateway:
+        direct_firecrawl = False
+        direct_exa = False
+        direct_parallel = False
+        direct_tavily = False
+    if image_use_gateway:
+        direct_fal = False
+    if tts_use_gateway:
+        direct_openai_tts = False
+        direct_elevenlabs = False
+    if browser_use_gateway:
+        direct_browser_use = False
+        direct_browserbase = False
+
     managed_web_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("firecrawl")
     managed_image_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("fal-queue")
     managed_tts_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("openai-audio")
@@ -440,37 +464,7 @@ def get_nous_subscription_features(
     )
 
 
-def get_nous_subscription_explainer_lines() -> list[str]:
-    if not managed_nous_tools_enabled():
-        return []
 
-    return [
-        "Nous subscription enables managed web tools, image generation, OpenAI TTS, and browser automation by default.",
-        "Those managed tools bill to your Nous subscription. Modal execution is optional and can bill to your subscription too.",
-        "Change these later with: hermes setup tools, hermes setup terminal, or hermes status.",
-    ]
-
-
-def apply_nous_provider_defaults(config: Dict[str, object]) -> set[str]:
-    """Apply provider-level Nous defaults shared by `hermes setup` and `hermes model`."""
-    if not managed_nous_tools_enabled():
-        return set()
-
-    features = get_nous_subscription_features(config)
-    if not features.provider_is_nous:
-        return set()
-
-    tts_cfg = config.get("tts")
-    if not isinstance(tts_cfg, dict):
-        tts_cfg = {}
-        config["tts"] = tts_cfg
-
-    current_tts = str(tts_cfg.get("provider") or "edge").strip().lower()
-    if current_tts not in {"", "edge"}:
-        return set()
-
-    tts_cfg["provider"] = "openai"
-    return {"tts"}
 
 
 def apply_nous_managed_defaults(
@@ -530,3 +524,255 @@ def apply_nous_managed_defaults(
         changed.add("image_gen")
 
     return changed
+
+
+# ---------------------------------------------------------------------------
+# Tool Gateway offer — single Y/n prompt after model selection
+# ---------------------------------------------------------------------------
+
+_GATEWAY_TOOL_LABELS = {
+    "web": "Web search & extract (Firecrawl)",
+    "image_gen": "Image generation (FAL)",
+    "tts": "Text-to-speech (OpenAI TTS)",
+    "browser": "Browser automation (Browser Use)",
+}
+
+
+def _get_gateway_direct_credentials() -> Dict[str, bool]:
+    """Return a dict of tool_key -> has_direct_credentials."""
+    return {
+        "web": bool(
+            get_env_value("FIRECRAWL_API_KEY")
+            or get_env_value("FIRECRAWL_API_URL")
+            or get_env_value("PARALLEL_API_KEY")
+            or get_env_value("TAVILY_API_KEY")
+            or get_env_value("EXA_API_KEY")
+        ),
+        "image_gen": bool(get_env_value("FAL_KEY")),
+        "tts": bool(
+            resolve_openai_audio_api_key()
+            or get_env_value("ELEVENLABS_API_KEY")
+        ),
+        "browser": bool(
+            get_env_value("BROWSER_USE_API_KEY")
+            or (get_env_value("BROWSERBASE_API_KEY") and get_env_value("BROWSERBASE_PROJECT_ID"))
+        ),
+    }
+
+
+_GATEWAY_DIRECT_LABELS = {
+    "web": "Firecrawl/Exa/Parallel/Tavily key",
+    "image_gen": "FAL key",
+    "tts": "OpenAI/ElevenLabs key",
+    "browser": "Browser Use/Browserbase key",
+}
+
+_ALL_GATEWAY_KEYS = ("web", "image_gen", "tts", "browser")
+
+
+def get_gateway_eligible_tools(
+    config: Optional[Dict[str, object]] = None,
+) -> tuple[list[str], list[str], list[str]]:
+    """Return (unconfigured, has_direct, already_managed) tool key lists.
+
+    - unconfigured: tools with no direct credentials (easy switch)
+    - has_direct: tools where the user has their own API keys
+    - already_managed: tools already routed through the gateway
+
+    All lists are empty when the user is not a paid Nous subscriber or
+    is not using Nous as their provider.
+    """
+    if not managed_nous_tools_enabled():
+        return [], [], []
+
+    if config is None:
+        from hermes_cli.config import load_config
+        config = load_config() or {}
+
+    # Quick provider check without the heavy get_nous_subscription_features call
+    model_cfg = config.get("model")
+    if not isinstance(model_cfg, dict) or str(model_cfg.get("provider") or "").strip().lower() != "nous":
+        return [], [], []
+
+    direct = _get_gateway_direct_credentials()
+
+    # Check which tools the user has explicitly opted into the gateway for.
+    # This is distinct from managed_by_nous which fires implicitly when
+    # no direct keys exist — we only skip the prompt for tools where
+    # use_gateway was explicitly set.
+    opted_in = {
+        "web": bool((config.get("web") if isinstance(config.get("web"), dict) else {}).get("use_gateway")),
+        "image_gen": bool((config.get("image_gen") if isinstance(config.get("image_gen"), dict) else {}).get("use_gateway")),
+        "tts": bool((config.get("tts") if isinstance(config.get("tts"), dict) else {}).get("use_gateway")),
+        "browser": bool((config.get("browser") if isinstance(config.get("browser"), dict) else {}).get("use_gateway")),
+    }
+
+    unconfigured: list[str] = []
+    has_direct: list[str] = []
+    already_managed: list[str] = []
+    for key in _ALL_GATEWAY_KEYS:
+        if opted_in.get(key):
+            already_managed.append(key)
+        elif direct.get(key):
+            has_direct.append(key)
+        else:
+            unconfigured.append(key)
+    return unconfigured, has_direct, already_managed
+
+
+def apply_gateway_defaults(
+    config: Dict[str, object],
+    tool_keys: list[str],
+) -> set[str]:
+    """Apply Tool Gateway config for the given tool keys.
+
+    Sets ``use_gateway: true`` in each tool's config section so the
+    runtime prefers the gateway even when direct API keys are present.
+
+    Returns the set of tools that were actually changed.
+    """
+    changed: set[str] = set()
+
+    web_cfg = config.get("web")
+    if not isinstance(web_cfg, dict):
+        web_cfg = {}
+        config["web"] = web_cfg
+
+    tts_cfg = config.get("tts")
+    if not isinstance(tts_cfg, dict):
+        tts_cfg = {}
+        config["tts"] = tts_cfg
+
+    browser_cfg = config.get("browser")
+    if not isinstance(browser_cfg, dict):
+        browser_cfg = {}
+        config["browser"] = browser_cfg
+
+    if "web" in tool_keys:
+        web_cfg["backend"] = "firecrawl"
+        web_cfg["use_gateway"] = True
+        changed.add("web")
+
+    if "tts" in tool_keys:
+        tts_cfg["provider"] = "openai"
+        tts_cfg["use_gateway"] = True
+        changed.add("tts")
+
+    if "browser" in tool_keys:
+        browser_cfg["cloud_provider"] = "browser-use"
+        browser_cfg["use_gateway"] = True
+        changed.add("browser")
+
+    if "image_gen" in tool_keys:
+        image_cfg = config.get("image_gen")
+        if not isinstance(image_cfg, dict):
+            image_cfg = {}
+            config["image_gen"] = image_cfg
+        image_cfg["use_gateway"] = True
+        changed.add("image_gen")
+
+    return changed
+
+
+def prompt_enable_tool_gateway(config: Dict[str, object]) -> set[str]:
+    """If eligible tools exist, prompt the user to enable the Tool Gateway.
+
+    Uses prompt_choice() with a description parameter so the curses TUI
+    shows the tool context alongside the choices.
+
+    Returns the set of tools that were enabled, or empty set if the user
+    declined or no tools were eligible.
+    """
+    unconfigured, has_direct, already_managed = get_gateway_eligible_tools(config)
+    if not unconfigured and not has_direct:
+        return set()
+
+    try:
+        from hermes_cli.setup import prompt_choice
+    except Exception:
+        return set()
+
+    # Build description lines showing full status of all gateway tools
+    desc_parts: list[str] = [
+        "",
+        "  The Tool Gateway gives you access to web search, image generation,",
+        "  text-to-speech, and browser automation through your Nous subscription.",
+        "  No need to sign up for separate API keys — just pick the tools you want.",
+        "",
+    ]
+    if already_managed:
+        for k in already_managed:
+            desc_parts.append(f"  ✓ {_GATEWAY_TOOL_LABELS[k]} — using Tool Gateway")
+    if unconfigured:
+        for k in unconfigured:
+            desc_parts.append(f"  ○ {_GATEWAY_TOOL_LABELS[k]} — not configured")
+    if has_direct:
+        for k in has_direct:
+            desc_parts.append(f"  ○ {_GATEWAY_TOOL_LABELS[k]} — using {_GATEWAY_DIRECT_LABELS[k]}")
+
+    # Build short choice labels — detail is in the description above
+    choices: list[str] = []
+    choice_keys: list[str] = []  # maps choice index -> action
+
+    if unconfigured and has_direct:
+        choices.append("Enable for all tools (existing keys kept, not used)")
+        choice_keys.append("all")
+
+        choices.append("Enable only for tools without existing keys")
+        choice_keys.append("unconfigured")
+
+        choices.append("Skip")
+        choice_keys.append("skip")
+
+    elif unconfigured:
+        choices.append("Enable Tool Gateway")
+        choice_keys.append("unconfigured")
+
+        choices.append("Skip")
+        choice_keys.append("skip")
+
+    else:
+        choices.append("Enable Tool Gateway (existing keys kept, not used)")
+        choice_keys.append("all")
+
+        choices.append("Skip")
+        choice_keys.append("skip")
+
+    description = "\n".join(desc_parts) if desc_parts else None
+    # Default to "Enable" when user has no direct keys (new user),
+    # default to "Skip" when they have existing keys to preserve.
+    default_idx = 0 if not has_direct else len(choices) - 1
+
+    try:
+        idx = prompt_choice(
+            "Your Nous subscription includes the Tool Gateway.",
+            choices,
+            default_idx,
+            description=description,
+        )
+    except (KeyboardInterrupt, EOFError, OSError, SystemExit):
+        return set()
+
+    action = choice_keys[idx]
+    if action == "skip":
+        return set()
+
+    if action == "all":
+        # Apply to switchable tools + ensure already-managed tools also
+        # have use_gateway persisted in config for consistency.
+        to_apply = list(_ALL_GATEWAY_KEYS)
+    else:
+        to_apply = unconfigured
+
+    changed = apply_gateway_defaults(config, to_apply)
+    if changed:
+        from hermes_cli.config import save_config
+        save_config(config)
+        # Only report the tools that actually switched (not already-managed ones)
+        newly_switched = changed - set(already_managed)
+        for key in sorted(newly_switched):
+            label = _GATEWAY_TOOL_LABELS.get(key, key)
+            print(f"  ✓ {label}: enabled via Nous subscription")
+        if already_managed and not newly_switched:
+            print("  (all tools already using Tool Gateway)")
+    return changed
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index eafe3b633..96ee77112 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -20,10 +20,7 @@ import copy
 from pathlib import Path
 from typing import Optional, Dict, Any
 
-from hermes_cli.nous_subscription import (
-    apply_nous_provider_defaults,
-    get_nous_subscription_features,
-)
+from hermes_cli.nous_subscription import get_nous_subscription_features
 from tools.tool_backend_helpers import managed_nous_tools_enabled
 from hermes_constants import get_optional_skills_dir
 
@@ -213,20 +210,20 @@ def prompt(question: str, default: str = None, password: bool = False) -> str:
         sys.exit(1)
 
 
-def _curses_prompt_choice(question: str, choices: list, default: int = 0) -> int:
+def _curses_prompt_choice(question: str, choices: list, default: int = 0, description: str | None = None) -> int:
     """Single-select menu using curses. Delegates to curses_radiolist."""
     from hermes_cli.curses_ui import curses_radiolist
-    return curses_radiolist(question, choices, selected=default, cancel_returns=-1)
+    return curses_radiolist(question, choices, selected=default, cancel_returns=-1, description=description)
 
 
 
-def prompt_choice(question: str, choices: list, default: int = 0) -> int:
+def prompt_choice(question: str, choices: list, default: int = 0, description: str | None = None) -> int:
     """Prompt for a choice from a list with arrow key navigation.
 
     Escape keeps the current default (skips the question).
     Ctrl+C exits the wizard.
     """
-    idx = _curses_prompt_choice(question, choices, default)
+    idx = _curses_prompt_choice(question, choices, default, description=description)
     if idx >= 0:
         if idx == default:
             print_info("  Skipped (keeping current)")
@@ -835,14 +832,7 @@ def setup_model_provider(config: dict, *, quick: bool = False):
             print_info("Skipped — add later with 'hermes setup' or configure AUXILIARY_VISION_* settings")
 
 
-    if selected_provider == "nous" and nous_subscription_selected:
-        changed_defaults = apply_nous_provider_defaults(config)
-        current_tts = str(config.get("tts", {}).get("provider") or "edge")
-        if "tts" in changed_defaults:
-            print_success("TTS provider set to: OpenAI TTS via your Nous subscription")
-        else:
-            print_info(f"Keeping your existing TTS provider: {current_tts}")
-
+    # Tool Gateway prompt is already shown by _model_flow_nous() above.
     save_config(config)
 
     if not quick and selected_provider != "nous":
diff --git a/hermes_cli/status.py b/hermes_cli/status.py
index 5ec93f24d..2e34ae9c3 100644
--- a/hermes_cli/status.py
+++ b/hermes_cli/status.py
@@ -212,7 +212,7 @@ def show_status(args):
     if managed_nous_tools_enabled():
         features = get_nous_subscription_features(config)
         print()
-        print(color("◆ Nous Subscription Features", Colors.CYAN, Colors.BOLD))
+        print(color("◆ Nous Tool Gateway", Colors.CYAN, Colors.BOLD))
         if not features.nous_auth_present:
             print("  Nous Portal   ✗ not logged in")
         else:
@@ -230,6 +230,18 @@ def show_status(args):
             else:
                 state = "not configured"
             print(f"  {feature.label:<15} {check_mark(feature.available or feature.active or feature.managed_by_nous)} {state}")
+    elif nous_logged_in:
+        # Logged into Nous but on the free tier — show upgrade nudge
+        print()
+        print(color("◆ Nous Tool Gateway", Colors.CYAN, Colors.BOLD))
+        print("  Your free-tier Nous account does not include Tool Gateway access.")
+        print("  Upgrade your subscription to unlock managed web, image, TTS, and browser tools.")
+        try:
+            portal_url = nous_status.get("portal_base_url", "").rstrip("/")
+            if portal_url:
+                print(f"  Upgrade: {portal_url}")
+        except Exception:
+            pass
 
     # =========================================================================
     # API-Key Providers
diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py
index 0609e7ff4..fa15fe087 100644
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@@ -954,34 +954,49 @@ def _configure_provider(provider: dict, config: dict):
 
     # Set TTS provider in config if applicable
     if provider.get("tts_provider"):
-        config.setdefault("tts", {})["provider"] = provider["tts_provider"]
+        tts_cfg = config.setdefault("tts", {})
+        tts_cfg["provider"] = provider["tts_provider"]
+        tts_cfg["use_gateway"] = bool(managed_feature)
 
     # Set browser cloud provider in config if applicable
     if "browser_provider" in provider:
         bp = provider["browser_provider"]
+        browser_cfg = config.setdefault("browser", {})
         if bp == "local":
-            config.setdefault("browser", {})["cloud_provider"] = "local"
+            browser_cfg["cloud_provider"] = "local"
             _print_success("  Browser set to local mode")
         elif bp:
-            config.setdefault("browser", {})["cloud_provider"] = bp
+            browser_cfg["cloud_provider"] = bp
             _print_success(f"  Browser cloud provider set to: {bp}")
+        browser_cfg["use_gateway"] = bool(managed_feature)
 
     # Set web search backend in config if applicable
     if provider.get("web_backend"):
-        config.setdefault("web", {})["backend"] = provider["web_backend"]
+        web_cfg = config.setdefault("web", {})
+        web_cfg["backend"] = provider["web_backend"]
+        web_cfg["use_gateway"] = bool(managed_feature)
         _print_success(f"  Web backend set to: {provider['web_backend']}")
 
+    # For tools without a specific config key (e.g. image_gen), still
+    # track use_gateway so the runtime knows the user's intent.
+    if managed_feature and managed_feature not in ("web", "tts", "browser"):
+        config.setdefault(managed_feature, {})["use_gateway"] = True
+    elif not managed_feature:
+        # User picked a non-gateway provider — find which category this
+        # belongs to and clear use_gateway if it was previously set.
+        for cat_key, cat in TOOL_CATEGORIES.items():
+            if provider in cat.get("providers", []):
+                section = config.get(cat_key)
+                if isinstance(section, dict) and section.get("use_gateway"):
+                    section["use_gateway"] = False
+                break
+
     if not env_vars:
         if provider.get("post_setup"):
             _run_post_setup(provider["post_setup"])
         _print_success(f"  {provider['name']} - no configuration needed!")
         if managed_feature:
             _print_info("  Requests for this tool will be billed to your Nous subscription.")
-            override_envs = provider.get("override_env_vars", [])
-            if any(get_env_value(env_var) for env_var in override_envs):
-                _print_warning(
-                    "  Direct credentials are still configured and may take precedence until you remove them from ~/.hermes/.env."
-                )
         return
 
     # Prompt for each required env var
@@ -1187,11 +1202,6 @@ def _reconfigure_provider(provider: dict, config: dict):
         _print_success(f"  {provider['name']} - no configuration needed!")
         if managed_feature:
             _print_info("  Requests for this tool will be billed to your Nous subscription.")
-            override_envs = provider.get("override_env_vars", [])
-            if any(get_env_value(env_var) for env_var in override_envs):
-                _print_warning(
-                    "  Direct credentials are still configured and may take precedence until you remove them from ~/.hermes/.env."
-                )
         return
 
     for var in env_vars:
diff --git a/tests/agent/test_prompt_builder.py b/tests/agent/test_prompt_builder.py
index 5a222cc38..2b231d2d1 100644
--- a/tests/agent/test_prompt_builder.py
+++ b/tests/agent/test_prompt_builder.py
@@ -413,7 +413,7 @@ class TestBuildSkillsSystemPrompt:
 
 class TestBuildNousSubscriptionPrompt:
     def test_includes_active_subscription_features(self, monkeypatch):
-        monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1")
+        monkeypatch.setattr("tools.tool_backend_helpers.managed_nous_tools_enabled", lambda: True)
         monkeypatch.setattr(
             "hermes_cli.nous_subscription.get_nous_subscription_features",
             lambda config=None: NousSubscriptionFeatures(
@@ -437,7 +437,7 @@ class TestBuildNousSubscriptionPrompt:
         assert "do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browser-Use API keys" in prompt
 
     def test_non_subscriber_prompt_includes_relevant_upgrade_guidance(self, monkeypatch):
-        monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1")
+        monkeypatch.setattr("tools.tool_backend_helpers.managed_nous_tools_enabled", lambda: True)
         monkeypatch.setattr(
             "hermes_cli.nous_subscription.get_nous_subscription_features",
             lambda config=None: NousSubscriptionFeatures(
@@ -460,7 +460,7 @@ class TestBuildNousSubscriptionPrompt:
         assert "Do not mention subscription unless" in prompt
 
     def test_feature_flag_off_returns_empty_prompt(self, monkeypatch):
-        monkeypatch.delenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", raising=False)
+        monkeypatch.setattr("tools.tool_backend_helpers.managed_nous_tools_enabled", lambda: False)
 
         prompt = build_nous_subscription_prompt({"web_search"})
 
diff --git a/tests/cli/test_cli_provider_resolution.py b/tests/cli/test_cli_provider_resolution.py
index 624e166a8..fe4153c80 100644
--- a/tests/cli/test_cli_provider_resolution.py
+++ b/tests/cli/test_cli_provider_resolution.py
@@ -308,7 +308,7 @@ def test_codex_provider_replaces_incompatible_default_model(monkeypatch):
 
 
 def test_model_flow_nous_prints_subscription_guidance_without_mutating_explicit_tts(monkeypatch, capsys):
-    monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1")
+    monkeypatch.setattr("hermes_cli.nous_subscription.managed_nous_tools_enabled", lambda: True)
     config = {
         "model": {"provider": "nous", "default": "claude-opus-4-6"},
         "tts": {"provider": "elevenlabs"},
@@ -333,21 +333,17 @@ def test_model_flow_nous_prints_subscription_guidance_without_mutating_explicit_
     monkeypatch.setattr("hermes_cli.auth._prompt_model_selection", lambda model_ids, current_model="", pricing=None, **kw: "claude-opus-4-6")
     monkeypatch.setattr("hermes_cli.auth._save_model_choice", lambda model: None)
     monkeypatch.setattr("hermes_cli.auth._update_config_for_provider", lambda provider, url: None)
-    monkeypatch.setattr(
-        "hermes_cli.nous_subscription.get_nous_subscription_explainer_lines",
-        lambda: ["Nous subscription enables managed web tools."],
-    )
 
     hermes_main._model_flow_nous(config, current_model="claude-opus-4-6")
 
     out = capsys.readouterr().out
-    assert "Nous subscription enables managed web tools." in out
+    assert "Default model set to:" in out
     assert config["tts"]["provider"] == "elevenlabs"
     assert config["browser"]["cloud_provider"] == "browser-use"
 
 
-def test_model_flow_nous_applies_managed_tts_default_when_unconfigured(monkeypatch, capsys):
-    monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1")
+def test_model_flow_nous_offers_tool_gateway_prompt_when_unconfigured(monkeypatch, capsys):
+    monkeypatch.setattr("hermes_cli.nous_subscription.managed_nous_tools_enabled", lambda: True)
     config = {
         "model": {"provider": "nous", "default": "claude-opus-4-6"},
         "tts": {"provider": "edge"},
@@ -355,13 +351,13 @@ def test_model_flow_nous_applies_managed_tts_default_when_unconfigured(monkeypat
 
     monkeypatch.setattr(
         "hermes_cli.auth.get_provider_auth_state",
-        lambda provider: {"access_token": "nous-token"},
+        lambda provider: {"access_token": "***"},
     )
     monkeypatch.setattr(
         "hermes_cli.auth.resolve_nous_runtime_credentials",
         lambda *args, **kwargs: {
             "base_url": "https://inference.example.com/v1",
-            "api_key": "nous-key",
+            "api_key": "***",
         },
     )
     monkeypatch.setattr(
@@ -371,17 +367,12 @@ def test_model_flow_nous_applies_managed_tts_default_when_unconfigured(monkeypat
     monkeypatch.setattr("hermes_cli.auth._prompt_model_selection", lambda model_ids, current_model="", pricing=None, **kw: "claude-opus-4-6")
     monkeypatch.setattr("hermes_cli.auth._save_model_choice", lambda model: None)
     monkeypatch.setattr("hermes_cli.auth._update_config_for_provider", lambda provider, url: None)
-    monkeypatch.setattr(
-        "hermes_cli.nous_subscription.get_nous_subscription_explainer_lines",
-        lambda: ["Nous subscription enables managed web tools."],
-    )
-
     hermes_main._model_flow_nous(config, current_model="claude-opus-4-6")
 
     out = capsys.readouterr().out
-    assert "Nous subscription enables managed web tools." in out
-    assert "OpenAI TTS via your Nous subscription" in out
-    assert config["tts"]["provider"] == "openai"
+    # Tool Gateway prompt should be shown (input() raises OSError in pytest
+    # which is caught, so the prompt text appears but nothing is applied)
+    assert "Tool Gateway" in out
 
 
 def test_codex_provider_uses_config_model(monkeypatch):
diff --git a/tests/hermes_cli/test_nous_subscription.py b/tests/hermes_cli/test_nous_subscription.py
index c04276976..b7819cfa8 100644
--- a/tests/hermes_cli/test_nous_subscription.py
+++ b/tests/hermes_cli/test_nous_subscription.py
@@ -24,7 +24,7 @@ def test_get_nous_subscription_features_recognizes_direct_exa_backend(monkeypatc
 
 
 def test_get_nous_subscription_features_prefers_managed_modal_in_auto_mode(monkeypatch):
-    monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1")
+    monkeypatch.setattr("tools.tool_backend_helpers.managed_nous_tools_enabled", lambda: True)
     monkeypatch.setattr(ns, "get_env_value", lambda name: "")
     monkeypatch.setattr(ns, "get_nous_auth_status", lambda: {"logged_in": True})
     monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: True)
diff --git a/tests/hermes_cli/test_setup.py b/tests/hermes_cli/test_setup.py
index 2c07d3d66..150fddab0 100644
--- a/tests/hermes_cli/test_setup.py
+++ b/tests/hermes_cli/test_setup.py
@@ -363,7 +363,7 @@ def test_codex_setup_uses_runtime_access_token_for_live_model_list(tmp_path, mon
 
 
 def test_modal_setup_can_use_nous_subscription_without_modal_creds(tmp_path, monkeypatch, capsys):
-    monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1")
+    monkeypatch.setattr("hermes_cli.setup.managed_nous_tools_enabled", lambda: True)
     monkeypatch.setenv("HERMES_HOME", str(tmp_path))
     config = load_config()
 
@@ -405,7 +405,7 @@ def test_modal_setup_can_use_nous_subscription_without_modal_creds(tmp_path, mon
 
 
 def test_modal_setup_persists_direct_mode_when_user_chooses_their_own_account(tmp_path, monkeypatch):
-    monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1")
+    monkeypatch.setattr("hermes_cli.setup.managed_nous_tools_enabled", lambda: True)
     monkeypatch.setenv("HERMES_HOME", str(tmp_path))
     monkeypatch.delenv("MODAL_TOKEN_ID", raising=False)
     monkeypatch.delenv("MODAL_TOKEN_SECRET", raising=False)
diff --git a/tests/hermes_cli/test_status_model_provider.py b/tests/hermes_cli/test_status_model_provider.py
index 04221d88f..d9f860153 100644
--- a/tests/hermes_cli/test_status_model_provider.py
+++ b/tests/hermes_cli/test_status_model_provider.py
@@ -64,7 +64,7 @@ def test_show_status_displays_legacy_string_model_and_custom_endpoint(monkeypatc
 
 
 def test_show_status_reports_managed_nous_features(monkeypatch, capsys, tmp_path):
-    monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1")
+    monkeypatch.setattr("hermes_cli.status.managed_nous_tools_enabled", lambda: True)
     from hermes_cli import status as status_mod
 
     _patch_common_status_deps(monkeypatch, status_mod, tmp_path)
@@ -98,13 +98,13 @@ def test_show_status_reports_managed_nous_features(monkeypatch, capsys, tmp_path
     status_mod.show_status(SimpleNamespace(all=False, deep=False))
 
     out = capsys.readouterr().out
-    assert "Nous Subscription Features" in out
+    assert "Nous Tool Gateway" in out
     assert "Browser automation" in out
     assert "active via Nous subscription" in out
 
 
 def test_show_status_hides_nous_subscription_section_when_feature_flag_is_off(monkeypatch, capsys, tmp_path):
-    monkeypatch.delenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", raising=False)
+    monkeypatch.setattr("hermes_cli.status.managed_nous_tools_enabled", lambda: False)
     from hermes_cli import status as status_mod
 
     _patch_common_status_deps(monkeypatch, status_mod, tmp_path)
@@ -121,4 +121,4 @@ def test_show_status_hides_nous_subscription_section_when_feature_flag_is_off(mo
     status_mod.show_status(SimpleNamespace(all=False, deep=False))
 
     out = capsys.readouterr().out
-    assert "Nous Subscription Features" not in out
+    assert "Nous Tool Gateway" not in out
diff --git a/tests/hermes_cli/test_tools_config.py b/tests/hermes_cli/test_tools_config.py
index 3ad0be886..09765c440 100644
--- a/tests/hermes_cli/test_tools_config.py
+++ b/tests/hermes_cli/test_tools_config.py
@@ -296,7 +296,7 @@ def test_save_platform_tools_still_preserves_mcp_with_platform_default_present()
 
 
 def test_visible_providers_include_nous_subscription_when_logged_in(monkeypatch):
-    monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1")
+    monkeypatch.setattr("hermes_cli.tools_config.managed_nous_tools_enabled", lambda: True)
     config = {"model": {"provider": "nous"}}
 
     monkeypatch.setattr(
@@ -310,7 +310,7 @@ def test_visible_providers_include_nous_subscription_when_logged_in(monkeypatch)
 
 
 def test_visible_providers_hide_nous_subscription_when_feature_flag_is_off(monkeypatch):
-    monkeypatch.delenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", raising=False)
+    monkeypatch.setattr("hermes_cli.tools_config.managed_nous_tools_enabled", lambda: False)
     config = {"model": {"provider": "nous"}}
 
     monkeypatch.setattr(
@@ -338,7 +338,8 @@ def test_local_browser_provider_is_saved_explicitly(monkeypatch):
 
 
 def test_first_install_nous_auto_configures_managed_defaults(monkeypatch):
-    monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1")
+    monkeypatch.setattr("hermes_cli.tools_config.managed_nous_tools_enabled", lambda: True)
+    monkeypatch.setattr("hermes_cli.nous_subscription.managed_nous_tools_enabled", lambda: True)
     config = {
         "model": {"provider": "nous"},
         "platform_toolsets": {"cli": []},
diff --git a/tests/tools/test_managed_browserbase_and_modal.py b/tests/tools/test_managed_browserbase_and_modal.py
index 5ae24f01a..6c963be62 100644
--- a/tests/tools/test_managed_browserbase_and_modal.py
+++ b/tests/tools/test_managed_browserbase_and_modal.py
@@ -47,7 +47,15 @@ def _restore_tool_and_agent_modules():
 
 @pytest.fixture(autouse=True)
 def _enable_managed_nous_tools(monkeypatch):
-    monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1")
+    """Ensure managed_nous_tools_enabled() returns True even after module reloads.
+
+    The _install_fake_tools_package() helper resets and reimports tool modules,
+    so a simple monkeypatch on tool_backend_helpers doesn't survive.  We patch
+    the *source* modules that the reimported modules will import from — both
+    hermes_cli.auth and hermes_cli.models — so the function body returns True.
+    """
+    monkeypatch.setattr("hermes_cli.auth.get_nous_auth_status", lambda: {"logged_in": True})
+    monkeypatch.setattr("hermes_cli.models.check_nous_free_tier", lambda: False)
 
 
 def _install_fake_tools_package():
diff --git a/tests/tools/test_managed_media_gateways.py b/tests/tools/test_managed_media_gateways.py
index ecbf71c2a..4468dfe94 100644
--- a/tests/tools/test_managed_media_gateways.py
+++ b/tests/tools/test_managed_media_gateways.py
@@ -46,7 +46,10 @@ def _restore_tool_and_agent_modules():
 
 @pytest.fixture(autouse=True)
 def _enable_managed_nous_tools(monkeypatch):
-    monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1")
+    """Patch the source modules so managed_nous_tools_enabled() returns True
+    even after tool modules are dynamically reloaded."""
+    monkeypatch.setattr("hermes_cli.auth.get_nous_auth_status", lambda: {"logged_in": True})
+    monkeypatch.setattr("hermes_cli.models.check_nous_free_tier", lambda: False)
 
 
 def _install_fake_tools_package():
diff --git a/tests/tools/test_managed_tool_gateway.py b/tests/tools/test_managed_tool_gateway.py
index f854732b2..a539fb57c 100644
--- a/tests/tools/test_managed_tool_gateway.py
+++ b/tests/tools/test_managed_tool_gateway.py
@@ -19,11 +19,10 @@ def test_resolve_managed_tool_gateway_derives_vendor_origin_from_shared_domain()
     with patch.dict(
         os.environ,
         {
-            "HERMES_ENABLE_NOUS_MANAGED_TOOLS": "1",
             "TOOL_GATEWAY_DOMAIN": "nousresearch.com",
         },
         clear=False,
-    ):
+    ), patch.object(managed_tool_gateway, "managed_nous_tools_enabled", return_value=True):
         result = resolve_managed_tool_gateway(
             "firecrawl",
             token_reader=lambda: "nous-token",
@@ -39,11 +38,10 @@ def test_resolve_managed_tool_gateway_uses_vendor_specific_override():
     with patch.dict(
         os.environ,
         {
-            "HERMES_ENABLE_NOUS_MANAGED_TOOLS": "1",
             "BROWSER_USE_GATEWAY_URL": "http://browser-use-gateway.localhost:3009/",
         },
         clear=False,
-    ):
+    ), patch.object(managed_tool_gateway, "managed_nous_tools_enabled", return_value=True):
         result = resolve_managed_tool_gateway(
             "browser-use",
             token_reader=lambda: "nous-token",
@@ -57,11 +55,10 @@ def test_resolve_managed_tool_gateway_is_inactive_without_nous_token():
     with patch.dict(
         os.environ,
         {
-            "HERMES_ENABLE_NOUS_MANAGED_TOOLS": "1",
             "TOOL_GATEWAY_DOMAIN": "nousresearch.com",
         },
         clear=False,
-    ):
+    ), patch.object(managed_tool_gateway, "managed_nous_tools_enabled", return_value=True):
         result = resolve_managed_tool_gateway(
             "firecrawl",
             token_reader=lambda: None,
@@ -70,8 +67,9 @@ def test_resolve_managed_tool_gateway_is_inactive_without_nous_token():
     assert result is None
 
 
-def test_resolve_managed_tool_gateway_is_disabled_without_feature_flag():
-    with patch.dict(os.environ, {"TOOL_GATEWAY_DOMAIN": "nousresearch.com"}, clear=False):
+def test_resolve_managed_tool_gateway_is_disabled_without_subscription():
+    with patch.dict(os.environ, {"TOOL_GATEWAY_DOMAIN": "nousresearch.com"}, clear=False), \
+         patch.object(managed_tool_gateway, "managed_nous_tools_enabled", return_value=False):
         result = resolve_managed_tool_gateway(
             "firecrawl",
             token_reader=lambda: "nous-token",
diff --git a/tests/tools/test_terminal_requirements.py b/tests/tools/test_terminal_requirements.py
index aab5c53f5..7859043ab 100644
--- a/tests/tools/test_terminal_requirements.py
+++ b/tests/tools/test_terminal_requirements.py
@@ -7,7 +7,6 @@ terminal_tool_module = importlib.import_module("tools.terminal_tool")
 def _clear_terminal_env(monkeypatch):
     """Remove terminal env vars that could affect requirements checks."""
     keys = [
-        "HERMES_ENABLE_NOUS_MANAGED_TOOLS",
         "TERMINAL_ENV",
         "TERMINAL_MODAL_MODE",
         "TERMINAL_SSH_HOST",
@@ -19,6 +18,11 @@ def _clear_terminal_env(monkeypatch):
     ]
     for key in keys:
         monkeypatch.delenv(key, raising=False)
+    # Default: no Nous subscription — patch both the terminal_tool local
+    # binding and tool_backend_helpers (used by resolve_modal_backend_state).
+    monkeypatch.setattr(terminal_tool_module, "managed_nous_tools_enabled", lambda: False)
+    import tools.tool_backend_helpers as _tbh
+    monkeypatch.setattr(_tbh, "managed_nous_tools_enabled", lambda: False)
 
 
 def test_local_terminal_requirements(monkeypatch, caplog):
@@ -81,7 +85,9 @@ def test_modal_backend_without_token_or_config_logs_specific_error(monkeypatch,
 
 def test_modal_backend_with_managed_gateway_does_not_require_direct_creds_or_minisweagent(monkeypatch, tmp_path):
     _clear_terminal_env(monkeypatch)
-    monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1")
+    monkeypatch.setattr(terminal_tool_module, "managed_nous_tools_enabled", lambda: True)
+    import tools.tool_backend_helpers as _tbh
+    monkeypatch.setattr(_tbh, "managed_nous_tools_enabled", lambda: True)
     monkeypatch.setenv("TERMINAL_ENV", "modal")
     monkeypatch.setenv("HOME", str(tmp_path))
     monkeypatch.setenv("USERPROFILE", str(tmp_path))
@@ -98,7 +104,9 @@ def test_modal_backend_with_managed_gateway_does_not_require_direct_creds_or_min
 
 def test_modal_backend_auto_mode_prefers_managed_gateway_over_direct_creds(monkeypatch, tmp_path):
     _clear_terminal_env(monkeypatch)
-    monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1")
+    monkeypatch.setattr(terminal_tool_module, "managed_nous_tools_enabled", lambda: True)
+    import tools.tool_backend_helpers as _tbh
+    monkeypatch.setattr(_tbh, "managed_nous_tools_enabled", lambda: True)
     monkeypatch.setenv("TERMINAL_ENV", "modal")
     monkeypatch.setenv("MODAL_TOKEN_ID", "tok-id")
     monkeypatch.setenv("MODAL_TOKEN_SECRET", "tok-secret")
@@ -147,7 +155,7 @@ def test_modal_backend_managed_mode_does_not_fall_back_to_direct(monkeypatch, ca
 
     assert ok is False
     assert any(
-        "HERMES_ENABLE_NOUS_MANAGED_TOOLS is not enabled" in record.getMessage()
+        "paid Nous subscription is required" in record.getMessage()
         for record in caplog.records
     )
 
@@ -165,6 +173,6 @@ def test_modal_backend_managed_mode_without_feature_flag_logs_clear_error(monkey
 
     assert ok is False
     assert any(
-        "HERMES_ENABLE_NOUS_MANAGED_TOOLS is not enabled" in record.getMessage()
+        "paid Nous subscription is required" in record.getMessage()
         for record in caplog.records
     )
diff --git a/tests/tools/test_terminal_tool_requirements.py b/tests/tools/test_terminal_tool_requirements.py
index d21e0628f..1fbaef8e3 100644
--- a/tests/tools/test_terminal_tool_requirements.py
+++ b/tests/tools/test_terminal_tool_requirements.py
@@ -28,7 +28,8 @@ class TestTerminalRequirements:
         assert {"read_file", "write_file", "patch", "search_files"}.issubset(names)
 
     def test_terminal_and_execute_code_tools_resolve_for_managed_modal(self, monkeypatch, tmp_path):
-        monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1")
+        monkeypatch.setattr("tools.tool_backend_helpers.managed_nous_tools_enabled", lambda: True)
+        monkeypatch.setattr(terminal_tool_module, "managed_nous_tools_enabled", lambda: True)
         monkeypatch.setenv("HOME", str(tmp_path))
         monkeypatch.setenv("USERPROFILE", str(tmp_path))
         monkeypatch.delenv("MODAL_TOKEN_ID", raising=False)
diff --git a/tests/tools/test_tool_backend_helpers.py b/tests/tools/test_tool_backend_helpers.py
index faaed9c5e..abe6d7bd1 100644
--- a/tests/tools/test_tool_backend_helpers.py
+++ b/tests/tools/test_tool_backend_helpers.py
@@ -1,7 +1,7 @@
 """Unit tests for tools/tool_backend_helpers.py.
 
 Tests cover:
-- managed_nous_tools_enabled() feature flag
+- managed_nous_tools_enabled() subscription-based gate
 - normalize_browser_cloud_provider() coercion
 - coerce_modal_mode() / normalize_modal_mode() validation
 - has_direct_modal_credentials() detection
@@ -27,24 +27,51 @@ from tools.tool_backend_helpers import (
 )
 
 
+def _raise_import():
+    raise ImportError("simulated missing module")
+
+
 # ---------------------------------------------------------------------------
 # managed_nous_tools_enabled
 # ---------------------------------------------------------------------------
 class TestManagedNousToolsEnabled:
-    """Feature flag driven by HERMES_ENABLE_NOUS_MANAGED_TOOLS."""
+    """Subscription-based gate: True for paid Nous subscribers."""
 
-    def test_disabled_by_default(self, monkeypatch):
-        monkeypatch.delenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", raising=False)
+    def test_disabled_when_not_logged_in(self, monkeypatch):
+        monkeypatch.setattr(
+            "hermes_cli.auth.get_nous_auth_status",
+            lambda: {},
+        )
         assert managed_nous_tools_enabled() is False
 
-    @pytest.mark.parametrize("val", ["1", "true", "True", "yes"])
-    def test_enabled_when_truthy(self, monkeypatch, val):
-        monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", val)
+    def test_disabled_for_free_tier(self, monkeypatch):
+        monkeypatch.setattr(
+            "hermes_cli.auth.get_nous_auth_status",
+            lambda: {"logged_in": True},
+        )
+        monkeypatch.setattr(
+            "hermes_cli.models.check_nous_free_tier",
+            lambda: True,
+        )
+        assert managed_nous_tools_enabled() is False
+
+    def test_enabled_for_paid_subscriber(self, monkeypatch):
+        monkeypatch.setattr(
+            "hermes_cli.auth.get_nous_auth_status",
+            lambda: {"logged_in": True},
+        )
+        monkeypatch.setattr(
+            "hermes_cli.models.check_nous_free_tier",
+            lambda: False,
+        )
         assert managed_nous_tools_enabled() is True
 
-    @pytest.mark.parametrize("val", ["0", "false", "no", ""])
-    def test_disabled_when_falsy(self, monkeypatch, val):
-        monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", val)
+    def test_returns_false_on_exception(self, monkeypatch):
+        """Should never crash — returns False on any exception."""
+        monkeypatch.setattr(
+            "hermes_cli.auth.get_nous_auth_status",
+            _raise_import,
+        )
         assert managed_nous_tools_enabled() is False
 
 
@@ -171,10 +198,10 @@ class TestResolveModalBackendState:
     @staticmethod
     def _resolve(monkeypatch, mode, *, has_direct, managed_ready, nous_enabled=False):
         """Helper to call resolve_modal_backend_state with feature flag control."""
-        if nous_enabled:
-            monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1")
-        else:
-            monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "")
+        monkeypatch.setattr(
+            "tools.tool_backend_helpers.managed_nous_tools_enabled",
+            lambda: nous_enabled,
+        )
         return resolve_modal_backend_state(
             mode, has_direct=has_direct, managed_ready=managed_ready
         )
diff --git a/tests/tools/test_web_tools_config.py b/tests/tools/test_web_tools_config.py
index 9e33d7445..ff9e0d549 100644
--- a/tests/tools/test_web_tools_config.py
+++ b/tests/tools/test_web_tools_config.py
@@ -26,7 +26,6 @@ class TestFirecrawlClientConfig:
         tools.web_tools._firecrawl_client = None
         tools.web_tools._firecrawl_client_config = None
         for key in (
-            "HERMES_ENABLE_NOUS_MANAGED_TOOLS",
             "FIRECRAWL_API_KEY",
             "FIRECRAWL_API_URL",
             "FIRECRAWL_GATEWAY_URL",
@@ -35,7 +34,15 @@ class TestFirecrawlClientConfig:
             "TOOL_GATEWAY_USER_TOKEN",
         ):
             os.environ.pop(key, None)
-        os.environ["HERMES_ENABLE_NOUS_MANAGED_TOOLS"] = "1"
+        # Enable managed tools by default for these tests — patch both the
+        # local web_tools import and the managed_tool_gateway import so the
+        # full firecrawl client init path sees True.
+        self._managed_patchers = [
+            patch("tools.web_tools.managed_nous_tools_enabled", return_value=True),
+            patch("tools.managed_tool_gateway.managed_nous_tools_enabled", return_value=True),
+        ]
+        for p in self._managed_patchers:
+            p.start()
 
     def teardown_method(self):
         """Reset client after each test."""
@@ -43,7 +50,6 @@ class TestFirecrawlClientConfig:
         tools.web_tools._firecrawl_client = None
         tools.web_tools._firecrawl_client_config = None
         for key in (
-            "HERMES_ENABLE_NOUS_MANAGED_TOOLS",
             "FIRECRAWL_API_KEY",
             "FIRECRAWL_API_URL",
             "FIRECRAWL_GATEWAY_URL",
@@ -52,6 +58,8 @@ class TestFirecrawlClientConfig:
             "TOOL_GATEWAY_USER_TOKEN",
         ):
             os.environ.pop(key, None)
+        for p in self._managed_patchers:
+            p.stop()
 
     # ── Configuration matrix ─────────────────────────────────────────
 
@@ -298,7 +306,6 @@ class TestBackendSelection:
     """
 
     _ENV_KEYS = (
-        "HERMES_ENABLE_NOUS_MANAGED_TOOLS",
         "EXA_API_KEY",
         "PARALLEL_API_KEY",
         "FIRECRAWL_API_KEY",
@@ -311,14 +318,20 @@ class TestBackendSelection:
     )
 
     def setup_method(self):
-        os.environ["HERMES_ENABLE_NOUS_MANAGED_TOOLS"] = "1"
         for key in self._ENV_KEYS:
-            if key != "HERMES_ENABLE_NOUS_MANAGED_TOOLS":
-                os.environ.pop(key, None)
+            os.environ.pop(key, None)
+        self._managed_patchers = [
+            patch("tools.web_tools.managed_nous_tools_enabled", return_value=True),
+            patch("tools.managed_tool_gateway.managed_nous_tools_enabled", return_value=True),
+        ]
+        for p in self._managed_patchers:
+            p.start()
 
     def teardown_method(self):
         for key in self._ENV_KEYS:
             os.environ.pop(key, None)
+        for p in self._managed_patchers:
+            p.stop()
 
     # ── Config-based selection (web.backend in config.yaml) ───────────
 
@@ -523,7 +536,6 @@ class TestCheckWebApiKey:
     """Test suite for check_web_api_key() unified availability check."""
 
     _ENV_KEYS = (
-        "HERMES_ENABLE_NOUS_MANAGED_TOOLS",
         "EXA_API_KEY",
         "PARALLEL_API_KEY",
         "FIRECRAWL_API_KEY",
@@ -536,14 +548,20 @@ class TestCheckWebApiKey:
     )
 
     def setup_method(self):
-        os.environ["HERMES_ENABLE_NOUS_MANAGED_TOOLS"] = "1"
         for key in self._ENV_KEYS:
-            if key != "HERMES_ENABLE_NOUS_MANAGED_TOOLS":
-                os.environ.pop(key, None)
+            os.environ.pop(key, None)
+        self._managed_patchers = [
+            patch("tools.web_tools.managed_nous_tools_enabled", return_value=True),
+            patch("tools.managed_tool_gateway.managed_nous_tools_enabled", return_value=True),
+        ]
+        for p in self._managed_patchers:
+            p.start()
 
     def teardown_method(self):
         for key in self._ENV_KEYS:
             os.environ.pop(key, None)
+        for p in self._managed_patchers:
+            p.stop()
 
     def test_parallel_key_only(self):
         with patch.dict(os.environ, {"PARALLEL_API_KEY": "test-key"}):
diff --git a/tools/browser_providers/browser_use.py b/tools/browser_providers/browser_use.py
index 0f12dc440..f8e9a8d9f 100644
--- a/tools/browser_providers/browser_use.py
+++ b/tools/browser_providers/browser_use.py
@@ -10,7 +10,7 @@ import requests
 
 from tools.browser_providers.base import CloudBrowserProvider
 from tools.managed_tool_gateway import resolve_managed_tool_gateway
-from tools.tool_backend_helpers import managed_nous_tools_enabled
+from tools.tool_backend_helpers import managed_nous_tools_enabled, prefers_gateway
 
 logger = logging.getLogger(__name__)
 _pending_create_keys: Dict[str, str] = {}
@@ -75,7 +75,7 @@ class BrowserUseProvider(CloudBrowserProvider):
 
     def _get_config_or_none(self) -> Optional[Dict[str, Any]]:
         api_key = os.environ.get("BROWSER_USE_API_KEY")
-        if api_key:
+        if api_key and not prefers_gateway("browser"):
             return {
                 "api_key": api_key,
                 "base_url": _BASE_URL,
diff --git a/tools/image_generation_tool.py b/tools/image_generation_tool.py
index 487b9b8db..db2c5254e 100644
--- a/tools/image_generation_tool.py
+++ b/tools/image_generation_tool.py
@@ -39,7 +39,7 @@ from urllib.parse import urlencode
 import fal_client
 from tools.debug_helpers import DebugSession
 from tools.managed_tool_gateway import resolve_managed_tool_gateway
-from tools.tool_backend_helpers import managed_nous_tools_enabled
+from tools.tool_backend_helpers import managed_nous_tools_enabled, prefers_gateway
 
 logger = logging.getLogger(__name__)
 
@@ -87,8 +87,9 @@ _managed_fal_client_lock = threading.Lock()
 
 
 def _resolve_managed_fal_gateway():
-    """Return managed fal-queue gateway config when direct FAL credentials are absent."""
-    if os.getenv("FAL_KEY"):
+    """Return managed fal-queue gateway config when the user prefers the gateway
+    or direct FAL credentials are absent."""
+    if os.getenv("FAL_KEY") and not prefers_gateway("image_gen"):
         return None
     return resolve_managed_tool_gateway("fal-queue")
 
diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index 1aa266522..69832cc1c 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -762,8 +762,8 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
             if modal_state["managed_mode_blocked"]:
                 raise ValueError(
                     "Modal backend is configured for managed mode, but "
-                    "HERMES_ENABLE_NOUS_MANAGED_TOOLS is not enabled and no direct "
-                    "Modal credentials/config were found. Enable the feature flag or "
+                    "a paid Nous subscription is required for the Tool Gateway and no direct "
+                    "Modal credentials/config were found. Log in with `hermes model` or "
                     "choose TERMINAL_MODAL_MODE=direct/auto."
                 )
             if modal_state["mode"] == "managed":
@@ -1577,8 +1577,8 @@ def check_terminal_requirements() -> bool:
                 if modal_state["managed_mode_blocked"]:
                     logger.error(
                         "Modal backend selected with TERMINAL_MODAL_MODE=managed, but "
-                        "HERMES_ENABLE_NOUS_MANAGED_TOOLS is not enabled and no direct "
-                        "Modal credentials/config were found. Enable the feature flag "
+                        "a paid Nous subscription is required for the Tool Gateway and no direct "
+                        "Modal credentials/config were found. Log in with `hermes model` "
                         "or choose TERMINAL_MODAL_MODE=direct/auto."
                     )
                     return False
diff --git a/tools/tool_backend_helpers.py b/tools/tool_backend_helpers.py
index b65e19174..a770fe747 100644
--- a/tools/tool_backend_helpers.py
+++ b/tools/tool_backend_helpers.py
@@ -6,7 +6,6 @@ import os
 from pathlib import Path
 from typing import Any, Dict
 
-from utils import env_var_enabled
 
 _DEFAULT_BROWSER_PROVIDER = "local"
 _DEFAULT_MODAL_MODE = "auto"
@@ -14,8 +13,26 @@ _VALID_MODAL_MODES = {"auto", "direct", "managed"}
 
 
 def managed_nous_tools_enabled() -> bool:
-    """Return True when the hidden Nous-managed tools feature flag is enabled."""
-    return env_var_enabled("HERMES_ENABLE_NOUS_MANAGED_TOOLS")
+    """Return True when the user has an active paid Nous subscription.
+
+    The Tool Gateway is available to any Nous subscriber who is NOT on
+    the free tier.  We intentionally catch all exceptions and return
+    False — never block the agent startup path.
+    """
+    try:
+        from hermes_cli.auth import get_nous_auth_status
+
+        status = get_nous_auth_status()
+        if not status.get("logged_in"):
+            return False
+
+        from hermes_cli.models import check_nous_free_tier
+
+        if check_nous_free_tier():
+            return False  # free-tier users don't get gateway access
+        return True
+    except Exception:
+        return False
 
 
 def normalize_browser_cloud_provider(value: object | None) -> str:
@@ -87,3 +104,18 @@ def resolve_openai_audio_api_key() -> str:
         os.getenv("VOICE_TOOLS_OPENAI_KEY", "")
         or os.getenv("OPENAI_API_KEY", "")
     ).strip()
+
+
+def prefers_gateway(config_section: str) -> bool:
+    """Return True when the user opted into the Tool Gateway for this tool.
+
+    Reads ``<section>.use_gateway`` from config.yaml.  Never raises.
+    """
+    try:
+        from hermes_cli.config import load_config
+        section = (load_config() or {}).get(config_section)
+        if isinstance(section, dict):
+            return bool(section.get("use_gateway"))
+    except Exception:
+        pass
+    return False
diff --git a/tools/tts_tool.py b/tools/tts_tool.py
index 65ff725ee..68c0d3c39 100644
--- a/tools/tts_tool.py
+++ b/tools/tts_tool.py
@@ -44,7 +44,7 @@ from hermes_constants import display_hermes_home
 
 logger = logging.getLogger(__name__)
 from tools.managed_tool_gateway import resolve_managed_tool_gateway
-from tools.tool_backend_helpers import managed_nous_tools_enabled, resolve_openai_audio_api_key
+from tools.tool_backend_helpers import managed_nous_tools_enabled, prefers_gateway, resolve_openai_audio_api_key
 from tools.xai_http import hermes_xai_user_agent
 
 # ---------------------------------------------------------------------------
@@ -823,9 +823,13 @@ def check_tts_requirements() -> bool:
 
 
 def _resolve_openai_audio_client_config() -> tuple[str, str]:
-    """Return direct OpenAI audio config or a managed gateway fallback."""
+    """Return direct OpenAI audio config or a managed gateway fallback.
+
+    When ``tts.use_gateway`` is set in config, the Tool Gateway is preferred
+    even if direct OpenAI credentials are present.
+    """
     direct_api_key = resolve_openai_audio_api_key()
-    if direct_api_key:
+    if direct_api_key and not prefers_gateway("tts"):
         return direct_api_key, DEFAULT_OPENAI_BASE_URL
 
     managed_gateway = resolve_managed_tool_gateway("openai-audio")
diff --git a/tools/web_tools.py b/tools/web_tools.py
index 0f21328ec..c24f1fc38 100644
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -59,7 +59,7 @@ from tools.managed_tool_gateway import (
     read_nous_access_token as _read_nous_access_token,
     resolve_managed_tool_gateway,
 )
-from tools.tool_backend_helpers import managed_nous_tools_enabled
+from tools.tool_backend_helpers import managed_nous_tools_enabled, prefers_gateway
 from tools.url_safety import is_safe_url
 from tools.website_policy import check_website_access
 
@@ -165,8 +165,8 @@ def _raise_web_backend_configuration_error() -> None:
     )
     if managed_nous_tools_enabled():
         message += (
-            " If you have the hidden Nous-managed tools flag enabled, you can also login to Nous "
-            "(`hermes model`) and provide FIRECRAWL_GATEWAY_URL or TOOL_GATEWAY_DOMAIN."
+            " With your Nous subscription you can also use the Tool Gateway — "
+            "run `hermes tools` and select Nous Subscription as the web provider."
         )
     raise ValueError(message)
 
@@ -176,8 +176,8 @@ def _firecrawl_backend_help_suffix() -> str:
     if not managed_nous_tools_enabled():
         return ""
     return (
-        ", or, if you have the hidden Nous-managed tools flag enabled, login to Nous and use "
-        "FIRECRAWL_GATEWAY_URL or TOOL_GATEWAY_DOMAIN"
+        ", or use the Nous Tool Gateway via your subscription "
+        "(FIRECRAWL_GATEWAY_URL or TOOL_GATEWAY_DOMAIN)"
     )
 
 
@@ -205,13 +205,14 @@ def _web_requires_env() -> list[str]:
 def _get_firecrawl_client():
     """Get or create Firecrawl client.
 
-    Direct Firecrawl takes precedence when explicitly configured. Otherwise
-    Hermes falls back to the Firecrawl tool-gateway for logged-in Nous Subscribers.
+    When ``web.use_gateway`` is set in config, the Tool Gateway is preferred
+    even if direct Firecrawl credentials are present.  Otherwise direct
+    Firecrawl takes precedence when explicitly configured.
     """
     global _firecrawl_client, _firecrawl_client_config
 
     direct_config = _get_direct_firecrawl_config()
-    if direct_config is not None:
+    if direct_config is not None and not prefers_gateway("web"):
         kwargs, client_config = direct_config
     else:
         managed_gateway = resolve_managed_tool_gateway(

From 10edd288c3ef2bac3c11d70b3bf204e87ea9e698 Mon Sep 17 00:00:00 2001
From: emozilla <emozilla@nousresearch.com>
Date: Thu, 16 Apr 2026 11:21:21 -0400
Subject: [PATCH 12/14] docs: add Nous Tool Gateway documentation

- New page: user-guide/features/tool-gateway.md covering eligibility,
  setup (hermes model, hermes tools, manual config), how use_gateway
  works, precedence, switching back, status checking, self-hosted
  gateway env vars, and FAQ
- Added to sidebar under Features (top-level, before Core category)
- Cross-references from: overview.md, tools.md, browser.md,
  image-generation.md, tts.md, providers.md, environment-variables.md
- Added Nous Tool Gateway subsection to env vars reference with
  TOOL_GATEWAY_DOMAIN, TOOL_GATEWAY_SCHEME, TOOL_GATEWAY_USER_TOKEN,
  and FIRECRAWL_GATEWAY_URL
---
 website/docs/integrations/providers.md        |   4 +
 .../docs/reference/environment-variables.md   |  11 ++
 website/docs/user-guide/features/browser.md   |   4 +
 .../user-guide/features/image-generation.md   |   4 +
 website/docs/user-guide/features/overview.md  |   1 +
 .../docs/user-guide/features/tool-gateway.md  | 183 ++++++++++++++++++
 website/docs/user-guide/features/tools.md     |   4 +
 website/docs/user-guide/features/tts.md       |   4 +
 website/sidebars.ts                           |   1 +
 9 files changed, 216 insertions(+)
 create mode 100644 website/docs/user-guide/features/tool-gateway.md

diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md
index 22deca638..c0eaf6e62 100644
--- a/website/docs/integrations/providers.md
+++ b/website/docs/integrations/providers.md
@@ -49,6 +49,10 @@ The OpenAI Codex provider authenticates via device code (open a URL, enter a cod
 Even when using Nous Portal, Codex, or a custom endpoint, some tools (vision, web summarization, MoA) use a separate "auxiliary" model — by default Gemini Flash via OpenRouter. An `OPENROUTER_API_KEY` enables these tools automatically. You can also configure which model and provider these tools use — see [Auxiliary Models](/docs/user-guide/configuration#auxiliary-models).
 :::
 
+:::tip Nous Tool Gateway
+Paid Nous Portal subscribers also get access to the **[Tool Gateway](/docs/user-guide/features/tool-gateway)** — web search, image generation, TTS, and browser automation routed through your subscription. No extra API keys needed. It's offered automatically during `hermes model` setup, or enable it later with `hermes tools`.
+:::
+
 ### Two Commands for Model Management
 
 Hermes has **two** model commands that serve different purposes:
diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md
index aa0acd8c7..c4d4a11fa 100644
--- a/website/docs/reference/environment-variables.md
+++ b/website/docs/reference/environment-variables.md
@@ -110,6 +110,17 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
 | `WANDB_API_KEY` | RL training metrics ([wandb.ai](https://wandb.ai/)) |
 | `DAYTONA_API_KEY` | Daytona cloud sandboxes ([daytona.io](https://daytona.io/)) |
 
+### Nous Tool Gateway
+
+These variables configure the [Tool Gateway](/docs/user-guide/features/tool-gateway) for paid Nous subscribers or self-hosted gateway deployments. Most users don't need to set these — the gateway is configured automatically via `hermes model` or `hermes tools`.
+
+| Variable | Description |
+|----------|-------------|
+| `TOOL_GATEWAY_DOMAIN` | Base domain for Tool Gateway routing (default: `nousresearch.com`) |
+| `TOOL_GATEWAY_SCHEME` | HTTP or HTTPS scheme for gateway URLs (default: `https`) |
+| `TOOL_GATEWAY_USER_TOKEN` | Auth token for the Tool Gateway (normally auto-populated from Nous auth) |
+| `FIRECRAWL_GATEWAY_URL` | Override URL for the Firecrawl gateway endpoint specifically |
+
 ## Terminal Backend
 
 | Variable | Description |
diff --git a/website/docs/user-guide/features/browser.md b/website/docs/user-guide/features/browser.md
index 016f29f7c..9880965ae 100644
--- a/website/docs/user-guide/features/browser.md
+++ b/website/docs/user-guide/features/browser.md
@@ -33,6 +33,10 @@ Key capabilities:
 
 ## Setup
 
+:::tip Nous Subscribers
+If you have a paid [Nous Portal](https://portal.nousresearch.com) subscription, you can use browser automation through the **[Tool Gateway](tool-gateway.md)** without any separate API keys. Run `hermes model` or `hermes tools` to enable it.
+:::
+
 ### Browserbase cloud mode
 
 To use Browserbase-managed cloud browsers, add:
diff --git a/website/docs/user-guide/features/image-generation.md b/website/docs/user-guide/features/image-generation.md
index a782630b1..eea563c44 100644
--- a/website/docs/user-guide/features/image-generation.md
+++ b/website/docs/user-guide/features/image-generation.md
@@ -11,6 +11,10 @@ Hermes Agent can generate images from text prompts using FAL.ai's **FLUX 2 Pro**
 
 ## Setup
 
+:::tip Nous Subscribers
+If you have a paid [Nous Portal](https://portal.nousresearch.com) subscription, you can use image generation through the **[Tool Gateway](tool-gateway.md)** without a FAL API key. Run `hermes model` or `hermes tools` to enable it.
+:::
+
 ### Get a FAL API Key
 
 1. Sign up at [fal.ai](https://fal.ai/)
diff --git a/website/docs/user-guide/features/overview.md b/website/docs/user-guide/features/overview.md
index 2d26e153a..3838b715b 100644
--- a/website/docs/user-guide/features/overview.md
+++ b/website/docs/user-guide/features/overview.md
@@ -10,6 +10,7 @@ Hermes Agent includes a rich set of capabilities that extend far beyond basic ch
 
 ## Core
 
+- **[Nous Tool Gateway](tool-gateway.md)** — Paid Nous Portal subscribers can route web search, image generation, text-to-speech, and browser automation through the Tool Gateway — no separate API keys needed. Enable it with `hermes model` or `hermes tools`.
 - **[Tools & Toolsets](tools.md)** — Tools are functions that extend the agent's capabilities. They're organized into logical toolsets that can be enabled or disabled per platform, covering web search, terminal execution, file editing, memory, delegation, and more.
 - **[Skills System](skills.md)** — On-demand knowledge documents the agent can load when needed. Skills follow a progressive disclosure pattern to minimize token usage and are compatible with the [agentskills.io](https://agentskills.io/specification) open standard.
 - **[Persistent Memory](memory.md)** — Bounded, curated memory that persists across sessions. Hermes remembers your preferences, projects, environment, and things it has learned via `MEMORY.md` and `USER.md`.
diff --git a/website/docs/user-guide/features/tool-gateway.md b/website/docs/user-guide/features/tool-gateway.md
new file mode 100644
index 000000000..4d549f067
--- /dev/null
+++ b/website/docs/user-guide/features/tool-gateway.md
@@ -0,0 +1,183 @@
+---
+title: "Nous Tool Gateway"
+description: "Route web search, image generation, text-to-speech, and browser automation through your Nous subscription — no extra API keys needed"
+sidebar_label: "Tool Gateway"
+sidebar_position: 2
+---
+
+# Nous Tool Gateway
+
+The **Tool Gateway** lets paid [Nous Portal](https://portal.nousresearch.com) subscribers use web search, image generation, text-to-speech, and browser automation through their existing subscription — no need to sign up for separate API keys from Firecrawl, FAL, OpenAI, or Browser Use.
+
+## What's Included
+
+| Tool | What It Does | Direct Alternative |
+|------|--------------|--------------------|
+| **Web search & extract** | Search the web and extract page content via Firecrawl | `FIRECRAWL_API_KEY`, `EXA_API_KEY`, `PARALLEL_API_KEY`, `TAVILY_API_KEY` |
+| **Image generation** | Generate images via FAL (FLUX 2 Pro + upscaling) | `FAL_KEY` |
+| **Text-to-speech** | Convert text to speech via OpenAI TTS | `VOICE_TOOLS_OPENAI_KEY`, `ELEVENLABS_API_KEY` |
+| **Browser automation** | Control cloud browsers via Browser Use | `BROWSER_USE_API_KEY`, `BROWSERBASE_API_KEY` |
+
+All four tools bill to your Nous subscription. You can enable any combination — for example, use the gateway for web and image generation while keeping your own ElevenLabs key for TTS.
+
+## Eligibility
+
+The Tool Gateway is available to **paid** Nous Portal subscribers. Free-tier accounts do not have access.
+
+To check your status:
+
+```bash
+hermes status
+```
+
+Look for the **Nous Tool Gateway** section. It shows which tools are active via the gateway, which use direct keys, and which aren't configured.
+
+## Enabling the Tool Gateway
+
+### During model setup
+
+When you run `hermes model` and select Nous Portal as your provider, Hermes automatically offers to enable the Tool Gateway:
+
+```
+Your Nous subscription includes the Tool Gateway.
+
+  The Tool Gateway gives you access to web search, image generation,
+  text-to-speech, and browser automation through your Nous subscription.
+  No need to sign up for separate API keys — just pick the tools you want.
+
+  ○ Web search & extract (Firecrawl) — not configured
+  ○ Image generation (FAL) — not configured
+  ○ Text-to-speech (OpenAI TTS) — not configured
+  ○ Browser automation (Browser Use) — not configured
+
+  ● Enable Tool Gateway
+  ○ Skip
+```
+
+Select **Enable Tool Gateway** and you're done.
+
+If you already have direct API keys for some tools, the prompt adapts — you can enable the gateway for all tools (your existing keys are kept in `.env` but not used at runtime), enable only for unconfigured tools, or skip entirely.
+
+### Via `hermes tools`
+
+You can also enable the gateway tool-by-tool through the interactive tool configuration:
+
+```bash
+hermes tools
+```
+
+Select a tool category (Web, Browser, Image Generation, or TTS), then choose **Nous Subscription** as the provider. This sets `use_gateway: true` for that tool in your config.
+
+### Manual configuration
+
+Set the `use_gateway` flag directly in `~/.hermes/config.yaml`:
+
+```yaml
+web:
+  backend: firecrawl
+  use_gateway: true
+
+image_gen:
+  use_gateway: true
+
+tts:
+  provider: openai
+  use_gateway: true
+
+browser:
+  cloud_provider: browser-use
+  use_gateway: true
+```
+
+## How It Works
+
+When `use_gateway: true` is set for a tool, the runtime routes API calls through the Nous Tool Gateway instead of using direct API keys:
+
+1. **Web tools** — `web_search` and `web_extract` use the gateway's Firecrawl endpoint
+2. **Image generation** — `image_generate` uses the gateway's FAL endpoint
+3. **TTS** — `text_to_speech` uses the gateway's OpenAI Audio endpoint
+4. **Browser** — `browser_navigate` and other browser tools use the gateway's Browser Use endpoint
+
+The gateway authenticates using your Nous Portal credentials (stored in `~/.hermes/auth.json` after `hermes model`).
+
+### Precedence
+
+Each tool checks `use_gateway` first:
+
+- **`use_gateway: true`** → route through the gateway, even if direct API keys exist in `.env`
+- **`use_gateway: false`** (or absent) → use direct API keys if available, fall back to gateway only when no direct keys exist
+
+This means you can switch between gateway and direct keys at any time without deleting your `.env` credentials.
+
+## Switching Back to Direct Keys
+
+To stop using the gateway for a specific tool:
+
+```bash
+hermes tools    # Select the tool → choose a direct provider
+```
+
+Or set `use_gateway: false` in config:
+
+```yaml
+web:
+  backend: firecrawl
+  use_gateway: false  # Now uses FIRECRAWL_API_KEY from .env
+```
+
+When you select a non-gateway provider in `hermes tools`, the `use_gateway` flag is automatically set to `false` to prevent contradictory config.
+
+## Checking Status
+
+```bash
+hermes status
+```
+
+The **Nous Tool Gateway** section shows:
+
+```
+◆ Nous Tool Gateway
+  Nous Portal   ✓ managed tools available
+  Web tools       ✓ active via Nous subscription
+  Image gen       ✓ active via Nous subscription
+  TTS             ✓ active via Nous subscription
+  Browser         ○ active via Browser Use key
+  Modal           ○ available via subscription (optional)
+```
+
+Tools marked "active via Nous subscription" are routed through the gateway. Tools with their own keys show which provider is active.
+
+## Advanced: Self-Hosted Gateway
+
+For self-hosted or custom gateway deployments, you can override the gateway endpoints via environment variables in `~/.hermes/.env`:
+
+```bash
+TOOL_GATEWAY_DOMAIN=nousresearch.com     # Base domain for gateway routing
+TOOL_GATEWAY_SCHEME=https                 # HTTP or HTTPS (default: https)
+TOOL_GATEWAY_USER_TOKEN=your-token        # Auth token (normally auto-populated)
+FIRECRAWL_GATEWAY_URL=https://...         # Override for the Firecrawl endpoint specifically
+```
+
+These env vars are always visible in the configuration regardless of subscription status — they're useful for custom infrastructure setups.
+
+## FAQ
+
+### Do I need to delete my existing API keys?
+
+No. When `use_gateway: true` is set, the runtime skips direct API keys and routes through the gateway. Your keys stay in `.env` untouched. If you later disable the gateway, they'll be used again automatically.
+
+### Can I use the gateway for some tools and direct keys for others?
+
+Yes. The `use_gateway` flag is per-tool. You can mix and match — for example, gateway for web and image generation, your own ElevenLabs key for TTS, and Browserbase for browser automation.
+
+### What if my subscription expires?
+
+Tools that were routed through the gateway will stop working until you renew or switch to direct API keys via `hermes tools`.
+
+### Does the gateway work with the messaging gateway?
+
+Yes. The Tool Gateway routes tool API calls regardless of whether you're using the CLI, Telegram, Discord, or any other messaging platform. It operates at the tool runtime level, not the entry point level.
+
+### Is Modal included?
+
+Modal (serverless terminal backend) is available as an optional add-on through the Nous subscription. It's not enabled by the Tool Gateway prompt — configure it separately via `hermes setup terminal` or in `config.yaml`.
diff --git a/website/docs/user-guide/features/tools.md b/website/docs/user-guide/features/tools.md
index 0adec6f06..2283c16fb 100644
--- a/website/docs/user-guide/features/tools.md
+++ b/website/docs/user-guide/features/tools.md
@@ -31,6 +31,10 @@ High-level categories:
 
 For the authoritative code-derived registry, see [Built-in Tools Reference](/docs/reference/tools-reference) and [Toolsets Reference](/docs/reference/toolsets-reference).
 
+:::tip Nous Tool Gateway
+Paid [Nous Portal](https://portal.nousresearch.com) subscribers can use web search, image generation, TTS, and browser automation through the **[Tool Gateway](tool-gateway.md)** — no separate API keys needed. Run `hermes model` to enable it, or configure individual tools with `hermes tools`.
+:::
+
 ## Using Toolsets
 
 ```bash
diff --git a/website/docs/user-guide/features/tts.md b/website/docs/user-guide/features/tts.md
index 625e25ad9..7d864eddd 100644
--- a/website/docs/user-guide/features/tts.md
+++ b/website/docs/user-guide/features/tts.md
@@ -8,6 +8,10 @@ description: "Text-to-speech and voice message transcription across all platform
 
 Hermes Agent supports both text-to-speech output and voice message transcription across all messaging platforms.
 
+:::tip Nous Subscribers
+If you have a paid [Nous Portal](https://portal.nousresearch.com) subscription, OpenAI TTS is available through the **[Tool Gateway](tool-gateway.md)** without a separate OpenAI API key. Run `hermes model` or `hermes tools` to enable it.
+:::
+
 ## Text-to-Speech
 
 Convert text to speech with six providers:
diff --git a/website/sidebars.ts b/website/sidebars.ts
index c72f3e61d..b1f7fcf59 100644
--- a/website/sidebars.ts
+++ b/website/sidebars.ts
@@ -36,6 +36,7 @@ const sidebars: SidebarsConfig = {
       collapsed: true,
       items: [
         'user-guide/features/overview',
+        'user-guide/features/tool-gateway',
         {
           type: 'category',
           label: 'Core',

From 3d8be06bce0c3e5ea9c64d5e8b8e7bfa874ed4c6 Mon Sep 17 00:00:00 2001
From: Jeffrey Quesnelle <emozilla@nousresearch.com>
Date: Thu, 16 Apr 2026 11:26:53 -0400
Subject: [PATCH 13/14] remove tool gateway from core features in docs

---
 website/docs/user-guide/features/overview.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/website/docs/user-guide/features/overview.md b/website/docs/user-guide/features/overview.md
index 3838b715b..2d26e153a 100644
--- a/website/docs/user-guide/features/overview.md
+++ b/website/docs/user-guide/features/overview.md
@@ -10,7 +10,6 @@ Hermes Agent includes a rich set of capabilities that extend far beyond basic ch
 
 ## Core
 
-- **[Nous Tool Gateway](tool-gateway.md)** — Paid Nous Portal subscribers can route web search, image generation, text-to-speech, and browser automation through the Tool Gateway — no separate API keys needed. Enable it with `hermes model` or `hermes tools`.
 - **[Tools & Toolsets](tools.md)** — Tools are functions that extend the agent's capabilities. They're organized into logical toolsets that can be enabled or disabled per platform, covering web search, terminal execution, file editing, memory, delegation, and more.
 - **[Skills System](skills.md)** — On-demand knowledge documents the agent can load when needed. Skills follow a progressive disclosure pattern to minimize token usage and are compatible with the [agentskills.io](https://agentskills.io/specification) open standard.
 - **[Persistent Memory](memory.md)** — Bounded, curated memory that persists across sessions. Hermes remembers your preferences, projects, environment, and things it has learned via `MEMORY.md` and `USER.md`.

From dead2dfd4f40dff4b14bef9af95bfca107c29553 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Thu, 16 Apr 2026 12:48:03 -0700
Subject: [PATCH 14/14] docs: add portal subscription links to tool-gateway
 page (#11208)

---
 website/docs/user-guide/features/tool-gateway.md | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/website/docs/user-guide/features/tool-gateway.md b/website/docs/user-guide/features/tool-gateway.md
index 4d549f067..e53878949 100644
--- a/website/docs/user-guide/features/tool-gateway.md
+++ b/website/docs/user-guide/features/tool-gateway.md
@@ -7,6 +7,10 @@ sidebar_position: 2
 
 # Nous Tool Gateway
 
+:::tip Get Started
+The Tool Gateway is included with paid Nous Portal subscriptions. **[Manage your subscription →](https://portal.nousresearch.com/manage-subscription)**
+:::
+
 The **Tool Gateway** lets paid [Nous Portal](https://portal.nousresearch.com) subscribers use web search, image generation, text-to-speech, and browser automation through their existing subscription — no need to sign up for separate API keys from Firecrawl, FAL, OpenAI, or Browser Use.
 
 ## What's Included
@@ -22,7 +26,7 @@ All four tools bill to your Nous subscription. You can enable any combination 
 
 ## Eligibility
 
-The Tool Gateway is available to **paid** Nous Portal subscribers. Free-tier accounts do not have access.
+The Tool Gateway is available to **paid** [Nous Portal](https://portal.nousresearch.com/manage-subscription) subscribers. Free-tier accounts do not have access — [upgrade your subscription](https://portal.nousresearch.com/manage-subscription) to unlock it.
 
 To check your status:
 
@@ -172,7 +176,7 @@ Yes. The `use_gateway` flag is per-tool. You can mix and match — for example,
 
 ### What if my subscription expires?
 
-Tools that were routed through the gateway will stop working until you renew or switch to direct API keys via `hermes tools`.
+Tools that were routed through the gateway will stop working until you [renew your subscription](https://portal.nousresearch.com/manage-subscription) or switch to direct API keys via `hermes tools`.
 
 ### Does the gateway work with the messaging gateway?