From 609b19b63086102bbf66deaf9d5fee05c8c8499e Mon Sep 17 00:00:00 2001
From: George Pickett <gpickett00@gmail.com>
Date: Wed, 25 Feb 2026 18:20:38 -0800
Subject: [PATCH 01/31] Add OpenAI Codex provider runtime and responses
 integration (without .agent/PLANS.md)

---
 .env.example                              |  15 +-
 README.md                                 |  16 +-
 cli-config.yaml.example                   |   1 +
 cli.py                                    |  86 ++--
 cron/scheduler.py                         |  21 +-
 gateway/run.py                            |  41 +-
 hermes_cli/auth.py                        | 204 ++++++++-
 hermes_cli/doctor.py                      |  30 ++
 hermes_cli/main.py                        |  74 +++-
 hermes_cli/runtime_provider.py            | 149 +++++++
 hermes_cli/setup.py                       |  63 ++-
 hermes_cli/status.py                      |  18 +-
 run_agent.py                              | 483 +++++++++++++++++++---
 tests/test_auth_codex_provider.py         | 114 +++++
 tests/test_cli_provider_resolution.py     | 187 +++++++++
 tests/test_delegate.py                    |  27 ++
 tests/test_run_agent_codex_responses.py   | 231 +++++++++++
 tests/test_runtime_provider_resolution.py |  95 +++++
 tools/delegate_tool.py                    |   3 +
 19 files changed, 1713 insertions(+), 145 deletions(-)
 create mode 100644 hermes_cli/runtime_provider.py
 create mode 100644 tests/test_auth_codex_provider.py
 create mode 100644 tests/test_cli_provider_resolution.py
 create mode 100644 tests/test_run_agent_codex_responses.py
 create mode 100644 tests/test_runtime_provider_resolution.py

diff --git a/.env.example b/.env.example
index ac6a187f34..452f23eb57 100644
--- a/.env.example
+++ b/.env.example
@@ -2,10 +2,14 @@
 # Copy this file to .env and fill in your API keys
 
 # =============================================================================
-# LLM PROVIDER (OpenRouter)
+# LLM PROVIDER
 # =============================================================================
-# OpenRouter provides access to many models through one API
-# All LLM calls go through OpenRouter - no direct provider keys needed
+# Provider selection override: auto | openrouter | nous | openai-codex
+# If unset, Hermes auto-detects from auth/config.
+# HERMES_INFERENCE_PROVIDER=auto
+
+# OpenRouter key (required when using OpenRouter directly, and still used by
+# some tools even when your primary chat provider is Nous/Codex/custom).
 # Get your key at: https://openrouter.ai/keys
 OPENROUTER_API_KEY=
 
@@ -13,6 +17,11 @@ OPENROUTER_API_KEY=
 # Examples: anthropic/claude-opus-4.6, openai/gpt-4o, google/gemini-2.0-flash, zhipuai/glm-4-plus
 LLM_MODEL=anthropic/claude-opus-4.6
 
+# OpenAI Codex provider uses Codex CLI auth state:
+#   hermes login --provider openai-codex
+#   (reads CODEX_HOME/auth.json, default: ~/.codex/auth.json)
+# CODEX_HOME=~/.codex
+
 # =============================================================================
 # TOOL API KEYS
 # =============================================================================
diff --git a/README.md b/README.md
index a97e637711..9ddbb3dffd 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@
 
 **The fully open-source AI agent that grows with you.** Install it on a machine, give it your messaging accounts, and it becomes a persistent personal agent — learning your projects, building its own skills, running tasks on a schedule, and reaching you wherever you are. An autonomous agent that lives on your server, remembers what it learns, and gets more capable the longer it runs.
 
-Use any model you want — log in with a [Nous Portal](https://portal.nousresearch.com) subscription for zero-config access, connect an [OpenRouter](https://openrouter.ai) key for 200+ models, or point it at your own VLLM/SGLang endpoint. Switch with `hermes model` — no code changes, no lock-in.
+Use any model you want — log in with [Nous Portal](https://portal.nousresearch.com), log in with OpenAI Codex via `hermes login --provider openai-codex`, connect an [OpenRouter](https://openrouter.ai) key for 200+ models, or point it at your own VLLM/SGLang endpoint. Switch with `hermes model` — no code changes, no lock-in.
 
 Built by [Nous Research](https://nousresearch.com). Under the hood, the same architecture powers [batch data generation](#batch-processing) and [RL training environments](#-atropos-rl-environments) for training the next generation of tool-calling models.
 
@@ -121,11 +121,14 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro
 | Provider | Setup |
 |----------|-------|
 | **Nous Portal** | `hermes login` (OAuth, subscription-based) |
+| **OpenAI Codex** | `hermes login --provider openai-codex` (uses `CODEX_HOME/auth.json`) |
 | **OpenRouter** | `OPENROUTER_API_KEY` in `~/.hermes/.env` |
 | **Custom Endpoint** | `OPENAI_BASE_URL` + `OPENAI_API_KEY` in `~/.hermes/.env` |
 
 **Note:** Even when using Nous Portal or a custom endpoint, some tools (vision, web summarization, MoA) use OpenRouter independently. An `OPENROUTER_API_KEY` enables these tools.
 
+**Codex note:** The `openai-codex` provider uses Codex CLI auth (`CODEX_HOME/auth.json`, default `~/.codex/auth.json`) and Hermes routes that provider through the Responses API transport.
+
 ---
 
 ## Configuration
@@ -136,7 +139,7 @@ All your settings are stored in `~/.hermes/` for easy access:
 ~/.hermes/
 ├── config.yaml     # Settings (model, terminal, TTS, compression, etc.)
 ├── .env            # API keys and secrets
-├── auth.json       # OAuth provider credentials (Nous Portal, etc.)
+├── auth.json       # OAuth provider credentials (Nous Portal, OpenAI Codex)
 ├── SOUL.md         # Optional: global persona (agent embodies this personality)
 ├── memories/       # Persistent memory (MEMORY.md, USER.md)
 ├── skills/         # Agent-created skills (managed via skill_manage tool)
@@ -335,6 +338,7 @@ hermes chat -q "Hello"    # Single query mode
 # Provider & model management
 hermes model              # Switch provider and model interactively
 hermes login              # Authenticate with Nous Portal (OAuth)
+hermes login --provider openai-codex
 hermes logout             # Clear stored OAuth credentials
 
 # Configuration
@@ -1406,7 +1410,7 @@ All variables go in `~/.hermes/.env`. Run `hermes config set VAR value` to set t
 **Provider Auth (OAuth):**
 | Variable | Description |
 |----------|-------------|
-| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `openrouter`, `nous` (default: `auto`) |
+| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `openrouter`, `nous`, `openai-codex` (default: `auto`) |
 | `HERMES_PORTAL_BASE_URL` | Override Nous Portal URL (for development/testing) |
 | `NOUS_INFERENCE_BASE_URL` | Override Nous inference API URL |
 | `HERMES_NOUS_MIN_KEY_TTL_SECONDS` | Min agent key TTL before re-mint (default: 1800 = 30min) |
@@ -1481,7 +1485,7 @@ All variables go in `~/.hermes/.env`. Run `hermes config set VAR value` to set t
 |------|-------------|
 | `~/.hermes/config.yaml` | Your settings |
 | `~/.hermes/.env` | API keys and secrets |
-| `~/.hermes/auth.json` | OAuth provider credentials (managed by `hermes login`) |
+| `~/.hermes/auth.json` | OAuth provider credentials (managed by `hermes login`, including Codex metadata) |
 | `~/.hermes/cron/` | Scheduled jobs data |
 | `~/.hermes/sessions/` | Gateway session data |
 | `~/.hermes/hermes-agent/` | Installation directory |
@@ -1509,11 +1513,11 @@ hermes config    # View current settings
 Common issues:
 - **"API key not set"**: Run `hermes setup` or `hermes config set OPENROUTER_API_KEY your_key`
 - **"hermes: command not found"**: Reload your shell (`source ~/.bashrc`) or check PATH
-- **"Run `hermes login` to re-authenticate"**: Your Nous Portal session expired. Run `hermes login` to refresh.
+- **"Run `hermes login` to re-authenticate"**: Your OAuth session expired. Use `hermes login` for Nous or `hermes login --provider openai-codex` for Codex.
 - **"No active paid subscription"**: Your Nous Portal account needs an active subscription for inference.
 - **Gateway won't start**: Check `hermes gateway status` and logs
 - **Missing config after update**: Run `hermes config check` to see what's new, then `hermes config migrate` to add missing options
-- **Provider auto-detection wrong**: Force a provider with `hermes chat --provider openrouter` or set `HERMES_INFERENCE_PROVIDER` in `.env`
+- **Provider auto-detection wrong**: Force a provider with `hermes chat --provider openrouter` (or `nous` / `openai-codex`) or set `HERMES_INFERENCE_PROVIDER` in `.env`
 
 ---
 
diff --git a/cli-config.yaml.example b/cli-config.yaml.example
index 0b49368dc5..d42d9db267 100644
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -13,6 +13,7 @@ model:
   #   "auto"       - Use Nous Portal if logged in, otherwise OpenRouter/env vars (default)
   #   "openrouter" - Always use OpenRouter API key from OPENROUTER_API_KEY
   #   "nous"       - Always use Nous Portal (requires: hermes login)
+  #   "openai-codex" - Always use Codex CLI auth (requires: hermes login --provider openai-codex)
   # Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
   provider: "auto"
   
diff --git a/cli.py b/cli.py
index a09d501621..32a751d154 100755
--- a/cli.py
+++ b/cli.py
@@ -751,7 +751,7 @@ class HermesCLI:
         Args:
             model: Model to use (default: from env or claude-sonnet)
             toolsets: List of toolsets to enable (default: all)
-            provider: Inference provider ("auto", "openrouter", "nous")
+            provider: Inference provider ("auto", "openrouter", "nous", "openai-codex")
             api_key: API key (default: from environment)
             base_url: API base URL (default: OpenRouter)
             max_turns: Maximum tool-calling iterations (default: 60)
@@ -766,28 +766,26 @@ class HermesCLI:
         # Configuration - priority: CLI args > env vars > config file
         # Model can come from: CLI arg, LLM_MODEL env, OPENAI_MODEL env (custom endpoint), or config
         self.model = model or os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL") or CLI_CONFIG["model"]["default"]
-        
-        # Base URL: custom endpoint (OPENAI_BASE_URL) takes precedence over OpenRouter
-        self.base_url = base_url or os.getenv("OPENAI_BASE_URL") or os.getenv("OPENROUTER_BASE_URL", CLI_CONFIG["model"]["base_url"])
-        
-        # API key: custom endpoint (OPENAI_API_KEY) takes precedence over OpenRouter
-        self.api_key = api_key or os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY")
 
-        # Provider resolution: determines whether to use OAuth credentials or env var keys
-        from hermes_cli.auth import resolve_provider
+        self._explicit_api_key = api_key
+        self._explicit_base_url = base_url
+
+        # Provider selection is resolved lazily at use-time via _ensure_runtime_credentials().
         self.requested_provider = (
             provider
             or os.getenv("HERMES_INFERENCE_PROVIDER")
             or CLI_CONFIG["model"].get("provider")
             or "auto"
         )
-        self.provider = resolve_provider(
-            self.requested_provider,
-            explicit_api_key=api_key,
-            explicit_base_url=base_url,
+        self._provider_source: Optional[str] = None
+        self.provider = self.requested_provider
+        self.api_mode = "chat_completions"
+        self.base_url = (
+            base_url
+            or os.getenv("OPENAI_BASE_URL")
+            or os.getenv("OPENROUTER_BASE_URL", CLI_CONFIG["model"]["base_url"])
         )
-        self._nous_key_expires_at: Optional[str] = None
-        self._nous_key_source: Optional[str] = None
+        self.api_key = api_key or os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY")
         # Max turns priority: CLI arg > env var > config file (agent.max_turns or root max_turns) > default
         if max_turns != 60:  # CLI arg was explicitly set
             self.max_turns = max_turns
@@ -844,45 +842,51 @@ class HermesCLI:
 
     def _ensure_runtime_credentials(self) -> bool:
         """
-        Ensure OAuth provider credentials are fresh before agent use.
-        For Nous Portal: checks agent key TTL, refreshes/re-mints as needed.
-        If the key changed, tears down the agent so it rebuilds with new creds.
+        Ensure runtime credentials are resolved before agent use.
+        Re-resolves provider credentials so key rotation and token refresh
+        are picked up without restarting the CLI.
         Returns True if credentials are ready, False on auth failure.
         """
-        if self.provider != "nous":
-            return True
-
-        from hermes_cli.auth import format_auth_error, resolve_nous_runtime_credentials
+        from hermes_cli.runtime_provider import (
+            resolve_runtime_provider,
+            format_runtime_provider_error,
+        )
 
         try:
-            credentials = resolve_nous_runtime_credentials(
-                min_key_ttl_seconds=max(
-                    60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))
-                ),
-                timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
+            runtime = resolve_runtime_provider(
+                requested=self.requested_provider,
+                explicit_api_key=self._explicit_api_key,
+                explicit_base_url=self._explicit_base_url,
             )
         except Exception as exc:
-            message = format_auth_error(exc)
+            message = format_runtime_provider_error(exc)
             self.console.print(f"[bold red]{message}[/]")
             return False
 
-        api_key = credentials.get("api_key")
-        base_url = credentials.get("base_url")
+        api_key = runtime.get("api_key")
+        base_url = runtime.get("base_url")
+        resolved_provider = runtime.get("provider", "openrouter")
+        resolved_api_mode = runtime.get("api_mode", self.api_mode)
         if not isinstance(api_key, str) or not api_key:
-            self.console.print("[bold red]Nous credential resolver returned an empty API key.[/]")
+            self.console.print("[bold red]Provider resolver returned an empty API key.[/]")
             return False
         if not isinstance(base_url, str) or not base_url:
-            self.console.print("[bold red]Nous credential resolver returned an empty base URL.[/]")
+            self.console.print("[bold red]Provider resolver returned an empty base URL.[/]")
             return False
 
         credentials_changed = api_key != self.api_key or base_url != self.base_url
+        routing_changed = (
+            resolved_provider != self.provider
+            or resolved_api_mode != self.api_mode
+        )
+        self.provider = resolved_provider
+        self.api_mode = resolved_api_mode
+        self._provider_source = runtime.get("source")
         self.api_key = api_key
         self.base_url = base_url
-        self._nous_key_expires_at = credentials.get("expires_at")
-        self._nous_key_source = credentials.get("source")
 
         # AIAgent/OpenAI client holds auth at init time, so rebuild if key rotated
-        if credentials_changed and self.agent is not None:
+        if (credentials_changed or routing_changed) and self.agent is not None:
             self.agent = None
 
         return True
@@ -897,7 +901,7 @@ class HermesCLI:
         if self.agent is not None:
             return True
 
-        if self.provider == "nous" and not self._ensure_runtime_credentials():
+        if not self._ensure_runtime_credentials():
             return False
 
         # Initialize SQLite session store for CLI sessions
@@ -913,6 +917,8 @@ class HermesCLI:
                 model=self.model,
                 api_key=self.api_key,
                 base_url=self.base_url,
+                provider=self.provider,
+                api_mode=self.api_mode,
                 max_iterations=self.max_turns,
                 enabled_toolsets=self.enabled_toolsets,
                 verbose_logging=self.verbose,
@@ -1004,8 +1010,8 @@ class HermesCLI:
             toolsets_info = f" [dim #B8860B]·[/] [#CD7F32]toolsets: {', '.join(self.enabled_toolsets)}[/]"
 
         provider_info = f" [dim #B8860B]·[/] [dim]provider: {self.provider}[/]"
-        if self.provider == "nous" and self._nous_key_source:
-            provider_info += f" [dim #B8860B]·[/] [dim]key: {self._nous_key_source}[/]"
+        if self._provider_source:
+            provider_info += f" [dim #B8860B]·[/] [dim]auth: {self._provider_source}[/]"
 
         self.console.print(
             f"  {api_indicator} [#FFBF00]{model_short}[/] "
@@ -1786,8 +1792,8 @@ class HermesCLI:
         Returns:
             The agent's response, or None on error
         """
-        # Refresh OAuth credentials if needed (handles key rotation transparently)
-        if self.provider == "nous" and not self._ensure_runtime_credentials():
+        # Refresh provider credentials if needed (handles key rotation transparently)
+        if not self._ensure_runtime_credentials():
             return None
 
         # Initialize agent if needed
diff --git a/cron/scheduler.py b/cron/scheduler.py
index 62987cca62..4d45fde1e2 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -170,8 +170,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
             load_dotenv(os.path.expanduser("~/.hermes/.env"), override=True, encoding="latin-1")
 
         model = os.getenv("HERMES_MODEL", "anthropic/claude-opus-4.6")
-        api_key = os.getenv("OPENROUTER_API_KEY", "")
-        base_url = os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
 
         try:
             import yaml
@@ -184,14 +182,27 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
                     model = _model_cfg
                 elif isinstance(_model_cfg, dict):
                     model = _model_cfg.get("default", model)
-                    base_url = _model_cfg.get("base_url", base_url)
         except Exception:
             pass
 
+        from hermes_cli.runtime_provider import (
+            resolve_runtime_provider,
+            format_runtime_provider_error,
+        )
+        try:
+            runtime = resolve_runtime_provider(
+                requested=os.getenv("HERMES_INFERENCE_PROVIDER"),
+            )
+        except Exception as exc:
+            message = format_runtime_provider_error(exc)
+            raise RuntimeError(message) from exc
+
         agent = AIAgent(
             model=model,
-            api_key=api_key,
-            base_url=base_url,
+            api_key=runtime.get("api_key"),
+            base_url=runtime.get("base_url"),
+            provider=runtime.get("provider"),
+            api_mode=runtime.get("api_mode"),
             quiet_mode=True,
             session_id=f"cron_{job_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
         )
diff --git a/gateway/run.py b/gateway/run.py
index 214a026ab9..387f88339f 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -83,6 +83,28 @@ from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageTyp
 logger = logging.getLogger(__name__)
 
 
+def _resolve_runtime_agent_kwargs() -> dict:
+    """Resolve provider credentials for gateway-created AIAgent instances."""
+    from hermes_cli.runtime_provider import (
+        resolve_runtime_provider,
+        format_runtime_provider_error,
+    )
+
+    try:
+        runtime = resolve_runtime_provider(
+            requested=os.getenv("HERMES_INFERENCE_PROVIDER"),
+        )
+    except Exception as exc:
+        raise RuntimeError(format_runtime_provider_error(exc)) from exc
+
+    return {
+        "api_key": runtime.get("api_key"),
+        "base_url": runtime.get("base_url"),
+        "provider": runtime.get("provider"),
+        "api_mode": runtime.get("api_mode"),
+    }
+
+
 class GatewayRunner:
     """
     Main gateway controller.
@@ -768,6 +790,7 @@ class GatewayRunner:
                     def _do_flush():
                         tmp_agent = AIAgent(
                             model=os.getenv("HERMES_MODEL", "anthropic/claude-opus-4.6"),
+                            **_resolve_runtime_agent_kwargs(),
                             max_iterations=5,
                             quiet_mode=True,
                             enabled_toolsets=["memory"],
@@ -1378,7 +1401,7 @@ class GatewayRunner:
             combined_ephemeral = context_prompt or ""
             if self._ephemeral_system_prompt:
                 combined_ephemeral = (combined_ephemeral + "\n\n" + self._ephemeral_system_prompt).strip()
-            
+
             # Re-read .env and config for fresh credentials (gateway is long-lived,
             # keys may change without restart).
             try:
@@ -1388,8 +1411,6 @@ class GatewayRunner:
             except Exception:
                 pass
 
-            api_key = os.getenv("OPENROUTER_API_KEY", "")
-            base_url = os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
             model = os.getenv("HERMES_MODEL", "anthropic/claude-opus-4.6")
 
             try:
@@ -1403,14 +1424,22 @@ class GatewayRunner:
                         model = _model_cfg
                     elif isinstance(_model_cfg, dict):
                         model = _model_cfg.get("default", model)
-                        base_url = _model_cfg.get("base_url", base_url)
             except Exception:
                 pass
 
+            try:
+                runtime_kwargs = _resolve_runtime_agent_kwargs()
+            except Exception as exc:
+                return {
+                    "final_response": f"⚠️ Provider authentication failed: {exc}",
+                    "messages": [],
+                    "api_calls": 0,
+                    "tools": [],
+                }
+
             agent = AIAgent(
                 model=model,
-                api_key=api_key,
-                base_url=base_url,
+                **runtime_kwargs,
                 max_iterations=max_iterations,
                 quiet_mode=True,
                 enabled_toolsets=enabled_toolsets,
diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 0941c6d919..328b84f145 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -18,7 +18,9 @@ from __future__ import annotations
 import json
 import logging
 import os
+import shutil
 import stat
+import subprocess
 import time
 import webbrowser
 from contextlib import contextmanager
@@ -55,6 +57,7 @@ DEFAULT_NOUS_SCOPE = "inference:mint_agent_key"
 DEFAULT_AGENT_KEY_MIN_TTL_SECONDS = 30 * 60  # 30 minutes
 ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120       # refresh 2 min before expiry
 DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS = 1     # poll at most every 1s
+DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
 
 
 # =============================================================================
@@ -84,7 +87,12 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
         client_id=DEFAULT_NOUS_CLIENT_ID,
         scope=DEFAULT_NOUS_SCOPE,
     ),
-    # Future: "openai_codex", "anthropic", etc.
+    "openai-codex": ProviderConfig(
+        id="openai-codex",
+        name="OpenAI Codex",
+        auth_type="oauth_external",
+        inference_base_url=DEFAULT_CODEX_BASE_URL,
+    ),
 }
 
 
@@ -298,12 +306,15 @@ def resolve_provider(
     """
     normalized = (requested or "auto").strip().lower()
 
+    if normalized in {"openrouter", "custom"}:
+        return "openrouter"
     if normalized in PROVIDER_REGISTRY:
         return normalized
-    if normalized == "openrouter":
-        return "openrouter"
     if normalized != "auto":
-        return "openrouter"
+        raise AuthError(
+            f"Unknown provider '{normalized}'.",
+            code="invalid_provider",
+        )
 
     # Explicit one-off CLI creds always mean openrouter/custom
     if explicit_api_key or explicit_base_url:
@@ -314,8 +325,8 @@ def resolve_provider(
         auth_store = _load_auth_store()
         active = auth_store.get("active_provider")
         if active and active in PROVIDER_REGISTRY:
-            state = _load_provider_state(auth_store, active)
-            if state and (state.get("access_token") or state.get("refresh_token")):
+            status = get_auth_status(active)
+            if status.get("logged_in"):
                 return active
     except Exception as e:
         logger.debug("Could not detect active auth provider: %s", e)
@@ -378,6 +389,108 @@ def _is_remote_session() -> bool:
     return bool(os.getenv("SSH_CLIENT") or os.getenv("SSH_TTY"))
 
 
+# =============================================================================
+# OpenAI Codex auth file helpers
+# =============================================================================
+
+def resolve_codex_home_path() -> Path:
+    """Resolve CODEX_HOME, defaulting to ~/.codex."""
+    codex_home = os.getenv("CODEX_HOME", "").strip()
+    if not codex_home:
+        codex_home = str(Path.home() / ".codex")
+    return Path(codex_home).expanduser()
+
+
+def _codex_auth_file_path() -> Path:
+    return resolve_codex_home_path() / "auth.json"
+
+
+def read_codex_auth_file() -> Dict[str, Any]:
+    """Read and validate Codex auth.json shape."""
+    codex_home = resolve_codex_home_path()
+    if not codex_home.exists():
+        raise AuthError(
+            f"Codex home directory not found at {codex_home}.",
+            provider="openai-codex",
+            code="codex_home_missing",
+            relogin_required=True,
+        )
+
+    auth_path = codex_home / "auth.json"
+    if not auth_path.exists():
+        raise AuthError(
+            f"Codex auth file not found at {auth_path}.",
+            provider="openai-codex",
+            code="codex_auth_missing",
+            relogin_required=True,
+        )
+
+    try:
+        payload = json.loads(auth_path.read_text())
+    except Exception as exc:
+        raise AuthError(
+            f"Failed to parse Codex auth file at {auth_path}.",
+            provider="openai-codex",
+            code="codex_auth_invalid_json",
+            relogin_required=True,
+        ) from exc
+
+    tokens = payload.get("tokens")
+    if not isinstance(tokens, dict):
+        raise AuthError(
+            "Codex auth file is missing a valid 'tokens' object.",
+            provider="openai-codex",
+            code="codex_auth_invalid_shape",
+            relogin_required=True,
+        )
+
+    access_token = tokens.get("access_token")
+    refresh_token = tokens.get("refresh_token")
+    if not isinstance(access_token, str) or not access_token.strip():
+        raise AuthError(
+            "Codex auth file is missing tokens.access_token.",
+            provider="openai-codex",
+            code="codex_auth_missing_access_token",
+            relogin_required=True,
+        )
+    if not isinstance(refresh_token, str) or not refresh_token.strip():
+        raise AuthError(
+            "Codex auth file is missing tokens.refresh_token.",
+            provider="openai-codex",
+            code="codex_auth_missing_refresh_token",
+            relogin_required=True,
+        )
+
+    return {
+        "payload": payload,
+        "tokens": tokens,
+        "auth_path": auth_path,
+        "codex_home": codex_home,
+    }
+
+
+def resolve_codex_runtime_credentials() -> Dict[str, Any]:
+    """Resolve runtime credentials from Codex CLI auth state."""
+    data = read_codex_auth_file()
+    payload = data["payload"]
+    tokens = data["tokens"]
+    base_url = (
+        os.getenv("HERMES_CODEX_BASE_URL", "").strip().rstrip("/")
+        or DEFAULT_CODEX_BASE_URL
+    )
+
+    return {
+        "provider": "openai-codex",
+        "base_url": base_url,
+        "api_key": tokens["access_token"],
+        "source": "codex-auth-json",
+        "last_refresh": payload.get("last_refresh"),
+        "auth_mode": payload.get("auth_mode"),
+        "auth_file": str(data["auth_path"]),
+        "codex_home": str(data["codex_home"]),
+    }
+
+
 # =============================================================================
 # TLS verification helper
 # =============================================================================
@@ -806,11 +919,37 @@ def get_nous_auth_status() -> Dict[str, Any]:
     }
 
 
+def get_codex_auth_status() -> Dict[str, Any]:
+    """Status snapshot for Codex auth."""
+    state = get_provider_auth_state("openai-codex") or {}
+    auth_file = state.get("auth_file") or str(_codex_auth_file_path())
+    codex_home = state.get("codex_home") or str(resolve_codex_home_path())
+    try:
+        creds = resolve_codex_runtime_credentials()
+        return {
+            "logged_in": True,
+            "auth_file": creds.get("auth_file"),
+            "codex_home": creds.get("codex_home"),
+            "last_refresh": creds.get("last_refresh"),
+            "auth_mode": creds.get("auth_mode"),
+            "source": creds.get("source"),
+        }
+    except AuthError as exc:
+        return {
+            "logged_in": False,
+            "auth_file": auth_file,
+            "codex_home": codex_home,
+            "error": str(exc),
+        }
+
+
 def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
     """Generic auth status dispatcher."""
     target = provider_id or get_active_provider()
     if target == "nous":
         return get_nous_auth_status()
+    if target == "openai-codex":
+        return get_codex_auth_status()
     return {"logged_in": False}
 
 
@@ -982,11 +1121,64 @@ def login_command(args) -> None:
 
     if provider_id == "nous":
         _login_nous(args, pconfig)
+    elif provider_id == "openai-codex":
+        _login_openai_codex(args, pconfig)
     else:
         print(f"Login for provider '{provider_id}' is not yet implemented.")
         raise SystemExit(1)
 
 
+def _login_openai_codex(args, pconfig: ProviderConfig) -> None:
+    """OpenAI Codex login flow using Codex CLI auth state."""
+    codex_path = shutil.which("codex")
+    if not codex_path:
+        print("Codex CLI was not found in PATH.")
+        print("Install Codex CLI, then retry `hermes login --provider openai-codex`.")
+        raise SystemExit(1)
+
+    print(f"Starting Hermes login via {pconfig.name}...")
+    print(f"Using Codex CLI: {codex_path}")
+    print(f"Codex home: {resolve_codex_home_path()}")
+
+    creds: Dict[str, Any]
+    try:
+        creds = resolve_codex_runtime_credentials()
+    except AuthError:
+        print("No usable Codex auth found. Running `codex login`...")
+        try:
+            subprocess.run(["codex", "login"], check=True)
+        except subprocess.CalledProcessError as exc:
+            print(f"Codex login failed with exit code {exc.returncode}.")
+            raise SystemExit(1)
+        except KeyboardInterrupt:
+            print("\nLogin cancelled.")
+            raise SystemExit(130)
+        try:
+            creds = resolve_codex_runtime_credentials()
+        except AuthError as exc:
+            print(format_auth_error(exc))
+            raise SystemExit(1)
+
+    auth_state = {
+        "auth_file": creds.get("auth_file"),
+        "codex_home": creds.get("codex_home"),
+        "last_refresh": creds.get("last_refresh"),
+        "auth_mode": creds.get("auth_mode"),
+        "source": creds.get("source"),
+    }
+
+    with _auth_store_lock():
+        auth_store = _load_auth_store()
+        _save_provider_state(auth_store, "openai-codex", auth_state)
+        saved_to = _save_auth_store(auth_store)
+
+    config_path = _update_config_for_provider("openai-codex", creds["base_url"])
+    print()
+    print("Login successful!")
+    print(f"  Auth state: {saved_to}")
+    print(f"  Config updated: {config_path} (model.provider=openai-codex)")
+
+
 def _login_nous(args, pconfig: ProviderConfig) -> None:
     """Nous Portal device authorization flow."""
     portal_base_url = (
diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index f9156354a1..77ff65d1ec 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -171,6 +171,36 @@ def run_doctor(args):
             else:
                 check_warn("config.yaml not found", "(using defaults)")
     
+    # =========================================================================
+    # Check: Auth providers
+    # =========================================================================
+    print()
+    print(color("◆ Auth Providers", Colors.CYAN, Colors.BOLD))
+
+    try:
+        from hermes_cli.auth import get_nous_auth_status, get_codex_auth_status
+
+        nous_status = get_nous_auth_status()
+        if nous_status.get("logged_in"):
+            check_ok("Nous Portal auth", "(logged in)")
+        else:
+            check_warn("Nous Portal auth", "(not logged in)")
+
+        codex_status = get_codex_auth_status()
+        if codex_status.get("logged_in"):
+            check_ok("OpenAI Codex auth", "(logged in)")
+        else:
+            check_warn("OpenAI Codex auth", "(not logged in)")
+            if codex_status.get("error"):
+                check_info(codex_status["error"])
+    except Exception as e:
+        check_warn("Auth provider status", f"(could not check: {e})")
+
+    if shutil.which("codex"):
+        check_ok("codex CLI")
+    else:
+        check_warn("codex CLI not found", "(required for openai-codex login)")
+
     # =========================================================================
     # Check: Directory structure
     # =========================================================================
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 544932020d..3d1c76c005 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -53,6 +53,7 @@ logger = logging.getLogger(__name__)
 def _has_any_provider_configured() -> bool:
     """Check if at least one inference provider is usable."""
     from hermes_cli.config import get_env_path, get_hermes_home
+    from hermes_cli.auth import get_auth_status
 
     # Check env vars (may be set by .env or shell)
     if os.getenv("OPENROUTER_API_KEY") or os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY"):
@@ -81,8 +82,8 @@ def _has_any_provider_configured() -> bool:
             auth = json.loads(auth_file.read_text())
             active = auth.get("active_provider")
             if active:
-                state = auth.get("providers", {}).get(active, {})
-                if state.get("access_token") or state.get("refresh_token"):
+                status = get_auth_status(active)
+                if status.get("logged_in"):
                     return True
         except Exception:
             pass
@@ -145,7 +146,7 @@ def cmd_model(args):
         resolve_provider, get_provider_auth_state, PROVIDER_REGISTRY,
         _prompt_model_selection, _save_model_choice, _update_config_for_provider,
         resolve_nous_runtime_credentials, fetch_nous_models, AuthError, format_auth_error,
-        _login_nous, ProviderConfig,
+        _login_nous,
     )
     from hermes_cli.config import load_config, save_config, get_env_value, save_env_value
 
@@ -168,7 +169,12 @@ def cmd_model(args):
         or config_provider
         or "auto"
     )
-    active = resolve_provider(effective_provider)
+    try:
+        active = resolve_provider(effective_provider)
+    except AuthError as exc:
+        warning = format_auth_error(exc)
+        print(f"Warning: {warning} Falling back to auto provider detection.")
+        active = resolve_provider("auto")
 
     # Detect custom endpoint
     if active == "openrouter" and get_env_value("OPENAI_BASE_URL"):
@@ -177,6 +183,7 @@ def cmd_model(args):
     provider_labels = {
         "openrouter": "OpenRouter",
         "nous": "Nous Portal",
+        "openai-codex": "OpenAI Codex",
         "custom": "Custom endpoint",
     }
     active_label = provider_labels.get(active, active)
@@ -190,11 +197,12 @@ def cmd_model(args):
     providers = [
         ("openrouter", "OpenRouter (100+ models, pay-per-use)"),
         ("nous", "Nous Portal (Nous Research subscription)"),
+        ("openai-codex", "OpenAI Codex (ChatGPT/Codex CLI login)"),
         ("custom", "Custom endpoint (self-hosted / VLLM / etc.)"),
     ]
 
     # Reorder so the active provider is at the top
-    active_key = active if active in ("openrouter", "nous") else "custom"
+    active_key = active if active in ("openrouter", "nous", "openai-codex") else "custom"
     ordered = []
     for key, label in providers:
         if key == active_key:
@@ -215,6 +223,8 @@ def cmd_model(args):
         _model_flow_openrouter(config, current_model)
     elif selected_provider == "nous":
         _model_flow_nous(config, current_model)
+    elif selected_provider == "openai-codex":
+        _model_flow_openai_codex(config, current_model)
     elif selected_provider == "custom":
         _model_flow_custom(config)
 
@@ -368,6 +378,52 @@ def _model_flow_nous(config, current_model=""):
         print("No change.")
 
 
+def _model_flow_openai_codex(config, current_model=""):
+    """OpenAI Codex provider: ensure logged in, then pick model."""
+    from hermes_cli.auth import (
+        get_codex_auth_status, _prompt_model_selection, _save_model_choice,
+        _update_config_for_provider, _login_openai_codex,
+        PROVIDER_REGISTRY, DEFAULT_CODEX_BASE_URL,
+    )
+    from hermes_cli.config import get_env_value, save_env_value
+    import argparse
+
+    status = get_codex_auth_status()
+    if not status.get("logged_in"):
+        print("Not logged into OpenAI Codex. Starting login...")
+        print()
+        try:
+            mock_args = argparse.Namespace()
+            _login_openai_codex(mock_args, PROVIDER_REGISTRY["openai-codex"])
+        except SystemExit:
+            print("Login cancelled or failed.")
+            return
+        except Exception as exc:
+            print(f"Login failed: {exc}")
+            return
+
+    # Codex models are not discoverable through /models with this auth path,
+    # so provide curated IDs with custom fallback.
+    codex_models = [
+        "gpt-5-codex",
+        "gpt-5.3-codex",
+        "gpt-5.2-codex",
+        "gpt-5.1-codex",
+    ]
+
+    selected = _prompt_model_selection(codex_models, current_model=current_model)
+    if selected:
+        _save_model_choice(selected)
+        _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
+        # Clear custom endpoint env vars that would otherwise override Codex.
+        if get_env_value("OPENAI_BASE_URL"):
+            save_env_value("OPENAI_BASE_URL", "")
+            save_env_value("OPENAI_API_KEY", "")
+        print(f"Default model set to: {selected} (via OpenAI Codex)")
+    else:
+        print("No change.")
+
+
 def _model_flow_custom(config):
     """Custom endpoint: collect URL, API key, and model name."""
     from hermes_cli.auth import _save_model_choice, deactivate_provider
@@ -678,7 +734,7 @@ For more help on a command:
     )
     chat_parser.add_argument(
         "--provider",
-        choices=["auto", "openrouter", "nous"],
+        choices=["auto", "openrouter", "nous", "openai-codex"],
         default=None,
         help="Inference provider (default: auto)"
     )
@@ -765,9 +821,9 @@ For more help on a command:
     )
     login_parser.add_argument(
         "--provider",
-        choices=["nous"],
+        choices=["nous", "openai-codex"],
         default=None,
-        help="Provider to authenticate with (default: interactive selection)"
+        help="Provider to authenticate with (default: nous)"
     )
     login_parser.add_argument(
         "--portal-url",
@@ -819,7 +875,7 @@ For more help on a command:
     )
     logout_parser.add_argument(
         "--provider",
-        choices=["nous"],
+        choices=["nous", "openai-codex"],
         default=None,
         help="Provider to log out from (default: active provider)"
     )
diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py
new file mode 100644
index 0000000000..1f070ac22b
--- /dev/null
+++ b/hermes_cli/runtime_provider.py
@@ -0,0 +1,149 @@
+"""Shared runtime provider resolution for CLI, gateway, cron, and helpers."""
+
+from __future__ import annotations
+
+import os
+from typing import Any, Dict, Optional
+
+from hermes_cli.auth import (
+    AuthError,
+    format_auth_error,
+    resolve_provider,
+    resolve_nous_runtime_credentials,
+    resolve_codex_runtime_credentials,
+)
+from hermes_cli.config import load_config
+from hermes_constants import OPENROUTER_BASE_URL
+
+
+def _get_model_config() -> Dict[str, Any]:
+    config = load_config()
+    model_cfg = config.get("model")
+    if isinstance(model_cfg, dict):
+        return dict(model_cfg)
+    if isinstance(model_cfg, str) and model_cfg.strip():
+        return {"default": model_cfg.strip()}
+    return {}
+
+
+def resolve_requested_provider(requested: Optional[str] = None) -> str:
+    """Resolve provider request from explicit arg, env, then config."""
+    if requested and requested.strip():
+        return requested.strip().lower()
+
+    env_provider = os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
+    if env_provider:
+        return env_provider
+
+    model_cfg = _get_model_config()
+    cfg_provider = model_cfg.get("provider")
+    if isinstance(cfg_provider, str) and cfg_provider.strip():
+        return cfg_provider.strip().lower()
+
+    return "auto"
+
+
+def _resolve_openrouter_runtime(
+    *,
+    requested_provider: str,
+    explicit_api_key: Optional[str] = None,
+    explicit_base_url: Optional[str] = None,
+) -> Dict[str, Any]:
+    model_cfg = _get_model_config()
+    cfg_base_url = model_cfg.get("base_url") if isinstance(model_cfg.get("base_url"), str) else ""
+    cfg_provider = model_cfg.get("provider") if isinstance(model_cfg.get("provider"), str) else ""
+    requested_norm = (requested_provider or "").strip().lower()
+    cfg_provider = cfg_provider.strip().lower()
+
+    env_openai_base_url = os.getenv("OPENAI_BASE_URL", "").strip()
+    env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip()
+
+    use_config_base_url = False
+    if requested_norm == "auto":
+        if cfg_base_url.strip() and not explicit_base_url and not env_openai_base_url:
+            if not cfg_provider or cfg_provider == "auto":
+                use_config_base_url = True
+
+    base_url = (
+        (explicit_base_url or "").strip()
+        or env_openai_base_url
+        or (cfg_base_url.strip() if use_config_base_url else "")
+        or env_openrouter_base_url
+        or OPENROUTER_BASE_URL
+    ).rstrip("/")
+
+    api_key = (
+        explicit_api_key
+        or os.getenv("OPENAI_API_KEY")
+        or os.getenv("OPENROUTER_API_KEY")
+        or ""
+    )
+
+    source = "explicit" if (explicit_api_key or explicit_base_url) else "env/config"
+
+    return {
+        "provider": "openrouter",
+        "api_mode": "chat_completions",
+        "base_url": base_url,
+        "api_key": api_key,
+        "source": source,
+    }
+
+
+def resolve_runtime_provider(
+    *,
+    requested: Optional[str] = None,
+    explicit_api_key: Optional[str] = None,
+    explicit_base_url: Optional[str] = None,
+) -> Dict[str, Any]:
+    """Resolve runtime provider credentials for agent execution."""
+    requested_provider = resolve_requested_provider(requested)
+
+    provider = resolve_provider(
+        requested_provider,
+        explicit_api_key=explicit_api_key,
+        explicit_base_url=explicit_base_url,
+    )
+
+    if provider == "nous":
+        creds = resolve_nous_runtime_credentials(
+            min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
+            timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
+        )
+        return {
+            "provider": "nous",
+            "api_mode": "chat_completions",
+            "base_url": creds.get("base_url", "").rstrip("/"),
+            "api_key": creds.get("api_key", ""),
+            "source": creds.get("source", "portal"),
+            "expires_at": creds.get("expires_at"),
+            "requested_provider": requested_provider,
+        }
+
+    if provider == "openai-codex":
+        creds = resolve_codex_runtime_credentials()
+        return {
+            "provider": "openai-codex",
+            "api_mode": "codex_responses",
+            "base_url": creds.get("base_url", "").rstrip("/"),
+            "api_key": creds.get("api_key", ""),
+            "source": creds.get("source", "codex-auth-json"),
+            "auth_file": creds.get("auth_file"),
+            "codex_home": creds.get("codex_home"),
+            "last_refresh": creds.get("last_refresh"),
+            "requested_provider": requested_provider,
+        }
+
+    runtime = _resolve_openrouter_runtime(
+        requested_provider=requested_provider,
+        explicit_api_key=explicit_api_key,
+        explicit_base_url=explicit_base_url,
+    )
+    runtime["requested_provider"] = requested_provider
+    return runtime
+
+
+def format_runtime_provider_error(error: Exception) -> str:
+    if isinstance(error, AuthError):
+        return format_auth_error(error)
+    return str(error)
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 95c59213d8..08fd28dddd 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -613,6 +613,7 @@ def run_setup_wizard(args):
         get_active_provider, get_provider_auth_state, PROVIDER_REGISTRY,
         format_auth_error, AuthError, fetch_nous_models,
         resolve_nous_runtime_credentials, _update_config_for_provider,
+        _login_openai_codex, get_codex_auth_status, DEFAULT_CODEX_BASE_URL,
     )
     existing_custom = get_env_value("OPENAI_BASE_URL")
     existing_or = get_env_value("OPENROUTER_API_KEY")
@@ -633,6 +634,7 @@ def run_setup_wizard(args):
 
     provider_choices = [
         "Login with Nous Portal (Nous Research subscription)",
+        "Login with OpenAI Codex (ChatGPT/Codex CLI auth)",
         "OpenRouter API key (100+ models, pay-per-use)",
         "Custom OpenAI-compatible endpoint (self-hosted / VLLM / etc.)",
     ]
@@ -640,7 +642,7 @@ def run_setup_wizard(args):
         provider_choices.append(keep_label)
     
     # Default to "Keep current" if a provider exists, otherwise OpenRouter (most common)
-    default_provider = len(provider_choices) - 1 if has_any_provider else 1
+    default_provider = len(provider_choices) - 1 if has_any_provider else 2
     
     if not has_any_provider:
         print_warning("An inference provider is required for Hermes to work.")
@@ -649,7 +651,7 @@ def run_setup_wizard(args):
     provider_idx = prompt_choice("Select your inference provider:", provider_choices, default_provider)
 
     # Track which provider was selected for model step
-    selected_provider = None  # "nous", "openrouter", "custom", or None (keep)
+    selected_provider = None  # "nous", "openai-codex", "openrouter", "custom", or None (keep)
     nous_models = []  # populated if Nous login succeeds
 
     if provider_idx == 0:  # Nous Portal
@@ -692,7 +694,33 @@ def run_setup_wizard(args):
             print_info("You can try again later with: hermes login")
             selected_provider = None
 
-    elif provider_idx == 1:  # OpenRouter
+    elif provider_idx == 1:  # OpenAI Codex
+        selected_provider = "openai-codex"
+        print()
+        print_header("OpenAI Codex Login")
+        print_info("This uses your Codex CLI auth state from CODEX_HOME/auth.json.")
+        print_info("If you're not logged in, Hermes will run `codex login`.")
+        print()
+
+        try:
+            import argparse
+            mock_args = argparse.Namespace()
+            _login_openai_codex(mock_args, PROVIDER_REGISTRY["openai-codex"])
+            # Clear custom endpoint vars that would override provider routing.
+            if existing_custom:
+                save_env_value("OPENAI_BASE_URL", "")
+                save_env_value("OPENAI_API_KEY", "")
+            _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
+        except SystemExit:
+            print_warning("OpenAI Codex login was cancelled or failed.")
+            print_info("You can try again later with: hermes login --provider openai-codex")
+            selected_provider = None
+        except Exception as e:
+            print_error(f"Login failed: {e}")
+            print_info("You can try again later with: hermes login --provider openai-codex")
+            selected_provider = None
+
+    elif provider_idx == 2:  # OpenRouter
         selected_provider = "openrouter"
         print()
         print_header("OpenRouter API Key")
@@ -719,7 +747,7 @@ def run_setup_wizard(args):
             save_env_value("OPENAI_BASE_URL", "")
             save_env_value("OPENAI_API_KEY", "")
 
-    elif provider_idx == 2:  # Custom endpoint
+    elif provider_idx == 3:  # Custom endpoint
         selected_provider = "custom"
         print()
         print_header("Custom OpenAI-Compatible Endpoint")
@@ -746,14 +774,14 @@ def run_setup_wizard(args):
             config['model'] = model_name
             save_env_value("LLM_MODEL", model_name)
         print_success("Custom endpoint configured")
-    # else: provider_idx == 3 (Keep current) — only shown when a provider already exists
+    # else: provider_idx == 4 (Keep current) — only shown when a provider already exists
 
     # =========================================================================
     # Step 1b: OpenRouter API Key for tools (if not already set)
     # =========================================================================
     # Tools (vision, web, MoA) use OpenRouter independently of the main provider.
     # Prompt for OpenRouter key if not set and a non-OpenRouter provider was chosen.
-    if selected_provider in ("nous", "custom") and not get_env_value("OPENROUTER_API_KEY"):
+    if selected_provider in ("nous", "openai-codex", "custom") and not get_env_value("OPENROUTER_API_KEY"):
         print()
         print_header("OpenRouter API Key (for tools)")
         print_info("Tools like vision analysis, web search, and MoA use OpenRouter")
@@ -799,6 +827,29 @@ def run_setup_wizard(args):
                     config['model'] = custom
                     save_env_value("LLM_MODEL", custom)
             # else: keep current
+        elif selected_provider == "openai-codex":
+            codex_models = [
+                "gpt-5-codex",
+                "gpt-5.3-codex",
+                "gpt-5.2-codex",
+                "gpt-5.1-codex",
+            ]
+            model_choices = [f"{m}" for m in codex_models]
+            model_choices.append("Custom model")
+            model_choices.append(f"Keep current ({current_model})")
+
+            keep_idx = len(model_choices) - 1
+            model_idx = prompt_choice("Select default model:", model_choices, keep_idx)
+
+            if model_idx < len(codex_models):
+                config['model'] = codex_models[model_idx]
+                save_env_value("LLM_MODEL", codex_models[model_idx])
+            elif model_idx == len(codex_models):
+                custom = prompt("Enter model name")
+                if custom:
+                    config['model'] = custom
+                    save_env_value("LLM_MODEL", custom)
+            _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
         else:
             # Static list for OpenRouter / fallback (from canonical list)
             from hermes_cli.models import model_ids, menu_labels
diff --git a/hermes_cli/status.py b/hermes_cli/status.py
index 33ebd4983d..4d542ece68 100644
--- a/hermes_cli/status.py
+++ b/hermes_cli/status.py
@@ -100,10 +100,12 @@ def show_status(args):
     print(color("◆ Auth Providers", Colors.CYAN, Colors.BOLD))
 
     try:
-        from hermes_cli.auth import get_nous_auth_status
+        from hermes_cli.auth import get_nous_auth_status, get_codex_auth_status
         nous_status = get_nous_auth_status()
+        codex_status = get_codex_auth_status()
     except Exception:
         nous_status = {}
+        codex_status = {}
 
     nous_logged_in = bool(nous_status.get("logged_in"))
     print(
@@ -120,6 +122,20 @@ def show_status(args):
         print(f"    Key exp:    {key_exp}")
         print(f"    Refresh:    {refresh_label}")
 
+    codex_logged_in = bool(codex_status.get("logged_in"))
+    print(
+        f"  {'OpenAI Codex':<12}  {check_mark(codex_logged_in)} "
+        f"{'logged in' if codex_logged_in else 'not logged in (run: hermes login --provider openai-codex)'}"
+    )
+    codex_auth_file = codex_status.get("auth_file")
+    if codex_auth_file:
+        print(f"    Auth file:  {codex_auth_file}")
+    codex_last_refresh = _format_iso_timestamp(codex_status.get("last_refresh"))
+    if codex_status.get("last_refresh"):
+        print(f"    Refreshed:  {codex_last_refresh}")
+    if codex_status.get("error") and not codex_logged_in:
+        print(f"    Error:      {codex_status.get('error')}")
+
     # =========================================================================
     # Terminal Configuration
     # =========================================================================
diff --git a/run_agent.py b/run_agent.py
index beb9d07a1a..d1d3d27e52 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -30,6 +30,7 @@ import re
 import sys
 import time
 import threading
+from types import SimpleNamespace
 import uuid
 from typing import List, Dict, Any, Optional
 from openai import OpenAI
@@ -95,6 +96,8 @@ class AIAgent:
         self,
         base_url: str = None,
         api_key: str = None,
+        provider: str = None,
+        api_mode: str = None,
         model: str = "anthropic/claude-opus-4.6",  # OpenRouter format
         max_iterations: int = 60,  # Default tool-calling iterations
         tool_delay: float = 1.0,
@@ -127,6 +130,8 @@ class AIAgent:
         Args:
             base_url (str): Base URL for the model API (optional)
             api_key (str): API key for authentication (optional, uses env var if not provided)
+            provider (str): Provider identifier (optional; used for telemetry/routing hints)
+            api_mode (str): API mode override: "chat_completions" or "codex_responses"
             model (str): Model name to use (default: "anthropic/claude-opus-4.6")
             max_iterations (int): Maximum number of tool calling iterations (default: 60)
             tool_delay (float): Delay between tool calls in seconds (default: 1.0)
@@ -172,6 +177,17 @@ class AIAgent:
         # Store effective base URL for feature detection (prompt caching, reasoning, etc.)
         # When no base_url is provided, the client defaults to OpenRouter, so reflect that here.
         self.base_url = base_url or OPENROUTER_BASE_URL
+        provider_name = provider.strip().lower() if isinstance(provider, str) and provider.strip() else None
+        self.provider = provider_name or "openrouter"
+        if api_mode in {"chat_completions", "codex_responses"}:
+            self.api_mode = api_mode
+        elif self.provider == "openai-codex":
+            self.api_mode = "codex_responses"
+        elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self.base_url.lower():
+            self.api_mode = "codex_responses"
+            self.provider = "openai-codex"
+        else:
+            self.api_mode = "chat_completions"
         self.tool_progress_callback = tool_progress_callback
         self.clarify_callback = clarify_callback
         self._last_reported_tool = None  # Track for "new tool" mode
@@ -1122,6 +1138,220 @@ class AIAgent:
         if self._memory_store:
             self._memory_store.load_from_disk()
 
+    def _responses_tools(self, tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]:
+        """Convert chat-completions tool schemas to Responses function-tool schemas."""
+        source_tools = tools if tools is not None else self.tools
+        if not source_tools:
+            return None
+
+        converted: List[Dict[str, Any]] = []
+        for item in source_tools:
+            fn = item.get("function", {}) if isinstance(item, dict) else {}
+            name = fn.get("name")
+            if not isinstance(name, str) or not name.strip():
+                continue
+            converted.append({
+                "type": "function",
+                "name": name,
+                "description": fn.get("description", ""),
+                "parameters": fn.get("parameters", {"type": "object", "properties": {}}),
+            })
+        return converted or None
+
+    def _chat_messages_to_responses_input(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Convert internal chat-style messages to Responses input items."""
+        items: List[Dict[str, Any]] = []
+
+        for msg in messages:
+            if not isinstance(msg, dict):
+                continue
+            role = msg.get("role")
+            if role == "system":
+                continue
+
+            if role in {"user", "assistant"}:
+                content = msg.get("content", "")
+                content_text = str(content) if content is not None else ""
+
+                if role == "assistant":
+                    if content_text.strip():
+                        items.append({"role": "assistant", "content": content_text})
+
+                    tool_calls = msg.get("tool_calls")
+                    if isinstance(tool_calls, list):
+                        for tc in tool_calls:
+                            if not isinstance(tc, dict):
+                                continue
+                            fn = tc.get("function", {})
+                            fn_name = fn.get("name")
+                            if not isinstance(fn_name, str) or not fn_name.strip():
+                                continue
+
+                            call_id = tc.get("id") or tc.get("call_id")
+                            if not isinstance(call_id, str) or not call_id.strip():
+                                call_id = f"call_{uuid.uuid4().hex[:12]}"
+
+                            arguments = fn.get("arguments", "{}")
+                            if isinstance(arguments, dict):
+                                arguments = json.dumps(arguments, ensure_ascii=False)
+                            elif not isinstance(arguments, str):
+                                arguments = str(arguments)
+                            arguments = arguments.strip() or "{}"
+
+                            items.append({
+                                "type": "function_call",
+                                "id": call_id,
+                                "call_id": call_id,
+                                "name": fn_name,
+                                "arguments": arguments,
+                            })
+                    continue
+
+                items.append({"role": role, "content": content_text})
+                continue
+
+            if role == "tool":
+                call_id = msg.get("tool_call_id")
+                if not isinstance(call_id, str) or not call_id.strip():
+                    continue
+                items.append({
+                    "type": "function_call_output",
+                    "call_id": call_id,
+                    "output": str(msg.get("content", "") or ""),
+                })
+
+        return items
+
+    def _extract_responses_message_text(self, item: Any) -> str:
+        """Extract assistant text from a Responses message output item."""
+        content = getattr(item, "content", None)
+        if not isinstance(content, list):
+            return ""
+
+        chunks: List[str] = []
+        for part in content:
+            ptype = getattr(part, "type", None)
+            if ptype not in {"output_text", "text"}:
+                continue
+            text = getattr(part, "text", None)
+            if isinstance(text, str) and text:
+                chunks.append(text)
+        return "".join(chunks).strip()
+
+    def _extract_responses_reasoning_text(self, item: Any) -> str:
+        """Extract a compact reasoning text from a Responses reasoning item."""
+        summary = getattr(item, "summary", None)
+        if isinstance(summary, list):
+            chunks: List[str] = []
+            for part in summary:
+                text = getattr(part, "text", None)
+                if isinstance(text, str) and text:
+                    chunks.append(text)
+            if chunks:
+                return "\n".join(chunks).strip()
+        text = getattr(item, "text", None)
+        if isinstance(text, str) and text:
+            return text.strip()
+        return ""
+
+    def _normalize_codex_response(self, response: Any) -> tuple[Any, str]:
+        """Normalize a Responses API object to an assistant_message-like object."""
+        output = getattr(response, "output", None)
+        if not isinstance(output, list) or not output:
+            raise RuntimeError("Responses API returned no output items")
+
+        response_status = getattr(response, "status", None)
+        if isinstance(response_status, str):
+            response_status = response_status.strip().lower()
+        else:
+            response_status = None
+
+        if response_status in {"failed", "cancelled"}:
+            error_obj = getattr(response, "error", None)
+            if isinstance(error_obj, dict):
+                error_msg = error_obj.get("message") or str(error_obj)
+            else:
+                error_msg = str(error_obj) if error_obj else f"Responses API returned status '{response_status}'"
+            raise RuntimeError(error_msg)
+
+        content_parts: List[str] = []
+        reasoning_parts: List[str] = []
+        tool_calls: List[Any] = []
+        has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
+
+        for item in output:
+            item_type = getattr(item, "type", None)
+            item_status = getattr(item, "status", None)
+            if isinstance(item_status, str):
+                item_status = item_status.strip().lower()
+            else:
+                item_status = None
+
+            if item_status in {"queued", "in_progress", "incomplete"}:
+                has_incomplete_items = True
+
+            if item_type == "message":
+                message_text = self._extract_responses_message_text(item)
+                if message_text:
+                    content_parts.append(message_text)
+            elif item_type == "reasoning":
+                reasoning_text = self._extract_responses_reasoning_text(item)
+                if reasoning_text:
+                    reasoning_parts.append(reasoning_text)
+            elif item_type == "function_call":
+                if item_status in {"queued", "in_progress", "incomplete"}:
+                    continue
+                fn_name = getattr(item, "name", "") or ""
+                arguments = getattr(item, "arguments", "{}")
+                if not isinstance(arguments, str):
+                    arguments = str(arguments)
+                call_id = getattr(item, "call_id", None) or getattr(item, "id", None) or f"call_{uuid.uuid4().hex[:12]}"
+                tool_calls.append(SimpleNamespace(
+                    id=call_id,
+                    type="function",
+                    function=SimpleNamespace(name=fn_name, arguments=arguments),
+                ))
+            elif item_type == "custom_tool_call":
+                fn_name = getattr(item, "name", "") or ""
+                arguments = getattr(item, "input", "{}")
+                if not isinstance(arguments, str):
+                    arguments = str(arguments)
+                call_id = getattr(item, "call_id", None) or getattr(item, "id", None) or f"call_{uuid.uuid4().hex[:12]}"
+                tool_calls.append(SimpleNamespace(
+                    id=call_id,
+                    type="function",
+                    function=SimpleNamespace(name=fn_name, arguments=arguments),
+                ))
+
+        final_text = "\n".join([p for p in content_parts if p]).strip()
+        if not final_text and hasattr(response, "output_text"):
+            out_text = getattr(response, "output_text", "")
+            if isinstance(out_text, str):
+                final_text = out_text.strip()
+
+        assistant_message = SimpleNamespace(
+            content=final_text,
+            tool_calls=tool_calls,
+            reasoning="\n\n".join(reasoning_parts).strip() if reasoning_parts else None,
+            reasoning_content=None,
+            reasoning_details=None,
+        )
+
+        if tool_calls:
+            finish_reason = "tool_calls"
+        elif has_incomplete_items:
+            finish_reason = "incomplete"
+        else:
+            finish_reason = "stop"
+        return assistant_message, finish_reason
+
+    def _run_codex_stream(self, api_kwargs: dict):
+        """Execute one streaming Responses API request and return the final response."""
+        with self.client.responses.stream(**api_kwargs) as stream:
+            for _ in stream:
+                pass
+            return stream.get_final_response()
+
     def _interruptible_api_call(self, api_kwargs: dict):
         """
         Run the API call in a background thread so the main conversation loop
@@ -1135,7 +1365,10 @@ class AIAgent:
 
         def _call():
             try:
-                result["response"] = self.client.chat.completions.create(**api_kwargs)
+                if self.api_mode == "codex_responses":
+                    result["response"] = self._run_codex_stream(api_kwargs)
+                else:
+                    result["response"] = self.client.chat.completions.create(**api_kwargs)
             except Exception as e:
                 result["error"] = e
 
@@ -1160,7 +1393,24 @@ class AIAgent:
         return result["response"]
 
     def _build_api_kwargs(self, api_messages: list) -> dict:
-        """Build the keyword arguments dict for the chat completions API call."""
+        """Build the keyword arguments dict for the active API mode."""
+        if self.api_mode == "codex_responses":
+            instructions = ""
+            payload_messages = api_messages
+            if api_messages and api_messages[0].get("role") == "system":
+                instructions = str(api_messages[0].get("content") or "").strip()
+                payload_messages = api_messages[1:]
+            if not instructions:
+                instructions = DEFAULT_AGENT_IDENTITY
+
+            return {
+                "model": self.model,
+                "instructions": instructions,
+                "input": self._chat_messages_to_responses_input(payload_messages),
+                "tools": self._responses_tools(),
+                "store": False,
+            }
+
         provider_preferences = {}
         if self.providers_allowed:
             provider_preferences["only"] = self.providers_allowed
@@ -1308,36 +1558,43 @@ class AIAgent:
                 messages.pop()  # remove flush msg
                 return
 
-            api_kwargs = {
-                "model": self.model,
-                "messages": api_messages,
-                "tools": [memory_tool_def],
-                "temperature": 0.3,
-                "max_tokens": 1024,
-            }
+            if self.api_mode == "codex_responses":
+                codex_kwargs = self._build_api_kwargs(api_messages)
+                codex_kwargs["tools"] = self._responses_tools([memory_tool_def])
+                response = self._run_codex_stream(codex_kwargs)
+                assistant_message, _ = self._normalize_codex_response(response)
+            else:
+                api_kwargs = {
+                    "model": self.model,
+                    "messages": api_messages,
+                    "tools": [memory_tool_def],
+                    "temperature": 0.3,
+                    "max_tokens": 1024,
+                }
+                response = self.client.chat.completions.create(**api_kwargs, timeout=30.0)
+                if not response.choices:
+                    assistant_message = None
+                else:
+                    assistant_message = response.choices[0].message
 
-            response = self.client.chat.completions.create(**api_kwargs, timeout=30.0)
-
-            if response.choices:
-                assistant_message = response.choices[0].message
-                if assistant_message.tool_calls:
-                    # Execute only memory tool calls
-                    for tc in assistant_message.tool_calls:
-                        if tc.function.name == "memory":
-                            try:
-                                args = json.loads(tc.function.arguments)
-                                from tools.memory_tool import memory_tool as _memory_tool
-                                result = _memory_tool(
-                                    action=args.get("action"),
-                                    target=args.get("target", "memory"),
-                                    content=args.get("content"),
-                                    old_text=args.get("old_text"),
-                                    store=self._memory_store,
-                                )
-                                if not self.quiet_mode:
-                                    print(f"  🧠 Memory flush: saved to {args.get('target', 'memory')}")
-                            except Exception as e:
-                                logger.debug("Memory flush tool call failed: %s", e)
+            if assistant_message and assistant_message.tool_calls:
+                # Execute only memory tool calls
+                for tc in assistant_message.tool_calls:
+                    if tc.function.name == "memory":
+                        try:
+                            args = json.loads(tc.function.arguments)
+                            from tools.memory_tool import memory_tool as _memory_tool
+                            _memory_tool(
+                                action=args.get("action"),
+                                target=args.get("target", "memory"),
+                                content=args.get("content"),
+                                old_text=args.get("old_text"),
+                                store=self._memory_store,
+                            )
+                            if not self.quiet_mode:
+                                print(f"  🧠 Memory flush: saved to {args.get('target', 'memory')}")
+                        except Exception as e:
+                            logger.debug("Memory flush tool call failed: %s", e)
         except Exception as e:
             logger.debug("Memory flush API call failed: %s", e)
         finally:
@@ -1628,24 +1885,37 @@ class AIAgent:
             if _is_nous:
                 summary_extra_body["tags"] = ["product=hermes-agent"]
 
-            summary_kwargs = {
-                "model": self.model,
-                "messages": api_messages,
-            }
-            if self.max_tokens is not None:
-                summary_kwargs["max_tokens"] = self.max_tokens
-            if summary_extra_body:
-                summary_kwargs["extra_body"] = summary_extra_body
-
-            summary_response = self.client.chat.completions.create(**summary_kwargs)
-
-            if summary_response.choices and summary_response.choices[0].message.content:
-                final_response = summary_response.choices[0].message.content
+            if self.api_mode == "codex_responses":
+                summary_kwargs = self._build_api_kwargs(api_messages)
+                summary_kwargs["tools"] = None
+                summary_response = self._run_codex_stream(summary_kwargs)
+                assistant_message, _ = self._normalize_codex_response(summary_response)
+                final_response = assistant_message.content or ""
                 if "<think>" in final_response:
                     final_response = re.sub(r'<think>.*?</think>\s*', '', final_response, flags=re.DOTALL).strip()
-                messages.append({"role": "assistant", "content": final_response})
+                if final_response:
+                    messages.append({"role": "assistant", "content": final_response})
+                else:
+                    final_response = "I reached the iteration limit and couldn't generate a summary."
             else:
-                final_response = "I reached the iteration limit and couldn't generate a summary."
+                summary_kwargs = {
+                    "model": self.model,
+                    "messages": api_messages,
+                }
+                if self.max_tokens is not None:
+                    summary_kwargs["max_tokens"] = self.max_tokens
+                if summary_extra_body:
+                    summary_kwargs["extra_body"] = summary_extra_body
+
+                summary_response = self.client.chat.completions.create(**summary_kwargs)
+
+                if summary_response.choices and summary_response.choices[0].message.content:
+                    final_response = summary_response.choices[0].message.content
+                    if "<think>" in final_response:
+                        final_response = re.sub(r'<think>.*?</think>\s*', '', final_response, flags=re.DOTALL).strip()
+                    messages.append({"role": "assistant", "content": final_response})
+                else:
+                    final_response = "I reached the iteration limit and couldn't generate a summary."
 
         except Exception as e:
             logging.warning(f"Failed to get summary response: {e}")
@@ -1848,6 +2118,8 @@ class AIAgent:
             retry_count = 0
             max_retries = 6  # Increased to allow longer backoff periods
 
+            finish_reason = "stop"
+
             while retry_count <= max_retries:
                 try:
                     api_kwargs = self._build_api_kwargs(api_messages)
@@ -1873,8 +2145,33 @@ class AIAgent:
                         resp_model = getattr(response, 'model', 'N/A') if response else 'N/A'
                         logging.debug(f"API Response received - Model: {resp_model}, Usage: {response.usage if hasattr(response, 'usage') else 'N/A'}")
                     
-                    # Validate response has valid choices before proceeding
-                    if response is None or not hasattr(response, 'choices') or response.choices is None or len(response.choices) == 0:
+                    # Validate response shape before proceeding
+                    response_invalid = False
+                    error_details = []
+                    if self.api_mode == "codex_responses":
+                        output_items = getattr(response, "output", None) if response is not None else None
+                        if response is None:
+                            response_invalid = True
+                            error_details.append("response is None")
+                        elif not isinstance(output_items, list):
+                            response_invalid = True
+                            error_details.append("response.output is not a list")
+                        elif len(output_items) == 0:
+                            response_invalid = True
+                            error_details.append("response.output is empty")
+                    else:
+                        if response is None or not hasattr(response, 'choices') or response.choices is None or len(response.choices) == 0:
+                            response_invalid = True
+                            if response is None:
+                                error_details.append("response is None")
+                            elif not hasattr(response, 'choices'):
+                                error_details.append("response has no 'choices' attribute")
+                            elif response.choices is None:
+                                error_details.append("response.choices is None")
+                            else:
+                                error_details.append("response.choices is empty")
+
+                    if response_invalid:
                         # Stop spinner before printing error messages
                         if thinking_spinner:
                             thinking_spinner.stop(f"(´;ω;`) oops, retrying...")
@@ -1882,15 +2179,6 @@ class AIAgent:
                         
                         # This is often rate limiting or provider returning malformed response
                         retry_count += 1
-                        error_details = []
-                        if response is None:
-                            error_details.append("response is None")
-                        elif not hasattr(response, 'choices'):
-                            error_details.append("response has no 'choices' attribute")
-                        elif response.choices is None:
-                            error_details.append("response.choices is None")
-                        else:
-                            error_details.append("response.choices is empty")
                         
                         # Check for error field in response (some providers include this)
                         error_msg = "Unknown"
@@ -1927,7 +2215,7 @@ class AIAgent:
                                 "messages": messages,
                                 "completed": False,
                                 "api_calls": api_call_count,
-                                "error": f"Invalid API response (choices is None/empty). Likely rate limited by provider.",
+                                "error": "Invalid API response shape. Likely rate limited or malformed provider response.",
                                 "failed": True  # Mark as failure for filtering
                             }
                         
@@ -1953,7 +2241,20 @@ class AIAgent:
                         continue  # Retry the API call
 
                     # Check finish_reason before proceeding
-                    finish_reason = response.choices[0].finish_reason
+                    if self.api_mode == "codex_responses":
+                        status = getattr(response, "status", None)
+                        incomplete_details = getattr(response, "incomplete_details", None)
+                        incomplete_reason = None
+                        if isinstance(incomplete_details, dict):
+                            incomplete_reason = incomplete_details.get("reason")
+                        else:
+                            incomplete_reason = getattr(incomplete_details, "reason", None)
+                        if status == "incomplete" and incomplete_reason in {"max_output_tokens", "length"}:
+                            finish_reason = "length"
+                        else:
+                            finish_reason = "stop"
+                    else:
+                        finish_reason = response.choices[0].finish_reason
                     
                     # Handle "length" finish_reason - response was truncated
                     if finish_reason == "length":
@@ -1990,10 +2291,21 @@ class AIAgent:
                     
                     # Track actual token usage from response for context management
                     if hasattr(response, 'usage') and response.usage:
+                        if self.api_mode == "codex_responses":
+                            prompt_tokens = getattr(response.usage, 'input_tokens', 0) or 0
+                            completion_tokens = getattr(response.usage, 'output_tokens', 0) or 0
+                            total_tokens = (
+                                getattr(response.usage, 'total_tokens', None)
+                                or (prompt_tokens + completion_tokens)
+                            )
+                        else:
+                            prompt_tokens = getattr(response.usage, 'prompt_tokens', 0) or 0
+                            completion_tokens = getattr(response.usage, 'completion_tokens', 0) or 0
+                            total_tokens = getattr(response.usage, 'total_tokens', 0) or 0
                         usage_dict = {
-                            "prompt_tokens": getattr(response.usage, 'prompt_tokens', 0),
-                            "completion_tokens": getattr(response.usage, 'completion_tokens', 0),
-                            "total_tokens": getattr(response.usage, 'total_tokens', 0),
+                            "prompt_tokens": prompt_tokens,
+                            "completion_tokens": completion_tokens,
+                            "total_tokens": total_tokens,
                         }
                         self.context_compressor.update_from_response(usage_dict)
                         
@@ -2145,7 +2457,10 @@ class AIAgent:
                 break
 
             try:
-                assistant_message = response.choices[0].message
+                if self.api_mode == "codex_responses":
+                    assistant_message, finish_reason = self._normalize_codex_response(response)
+                else:
+                    assistant_message = response.choices[0].message
                 
                 # Handle assistant response
                 if assistant_message.content and not self.quiet_mode:
@@ -2185,6 +2500,48 @@ class AIAgent:
                 # Reset incomplete scratchpad counter on clean response
                 if hasattr(self, '_incomplete_scratchpad_retries'):
                     self._incomplete_scratchpad_retries = 0
+
+                if self.api_mode == "codex_responses" and finish_reason == "incomplete":
+                    if not hasattr(self, "_codex_incomplete_retries"):
+                        self._codex_incomplete_retries = 0
+                    self._codex_incomplete_retries += 1
+
+                    interim_msg = self._build_assistant_message(assistant_message, finish_reason)
+                    interim_has_content = bool(interim_msg.get("content", "").strip())
+                    interim_has_reasoning = bool(interim_msg.get("reasoning", "").strip()) if isinstance(interim_msg.get("reasoning"), str) else False
+
+                    if interim_has_content or interim_has_reasoning:
+                        last_msg = messages[-1] if messages else None
+                        duplicate_interim = (
+                            isinstance(last_msg, dict)
+                            and last_msg.get("role") == "assistant"
+                            and last_msg.get("finish_reason") == "incomplete"
+                            and (last_msg.get("content") or "") == (interim_msg.get("content") or "")
+                            and (last_msg.get("reasoning") or "") == (interim_msg.get("reasoning") or "")
+                        )
+                        if not duplicate_interim:
+                            messages.append(interim_msg)
+                            self._log_msg_to_db(interim_msg)
+
+                    if self._codex_incomplete_retries < 3:
+                        if not self.quiet_mode:
+                            print(f"{self.log_prefix}↻ Codex response incomplete; continuing turn ({self._codex_incomplete_retries}/3)")
+                        self._session_messages = messages
+                        self._save_session_log(messages)
+                        continue
+
+                    self._codex_incomplete_retries = 0
+                    self._persist_session(messages, conversation_history)
+                    return {
+                        "final_response": None,
+                        "messages": messages,
+                        "api_calls": api_call_count,
+                        "completed": False,
+                        "partial": True,
+                        "error": "Codex response remained incomplete after 3 continuation attempts",
+                    }
+                elif hasattr(self, "_codex_incomplete_retries"):
+                    self._codex_incomplete_retries = 0
                 
                 # Check for tool calls
                 if assistant_message.tool_calls:
diff --git a/tests/test_auth_codex_provider.py b/tests/test_auth_codex_provider.py
new file mode 100644
index 0000000000..eaca52aac9
--- /dev/null
+++ b/tests/test_auth_codex_provider.py
@@ -0,0 +1,114 @@
+import json
+from pathlib import Path
+from types import SimpleNamespace
+
+import pytest
+import yaml
+
+from hermes_cli.auth import (
+    AuthError,
+    DEFAULT_CODEX_BASE_URL,
+    PROVIDER_REGISTRY,
+    _login_openai_codex,
+    login_command,
+    get_codex_auth_status,
+    get_provider_auth_state,
+    read_codex_auth_file,
+    resolve_codex_runtime_credentials,
+    resolve_provider,
+)
+
+
+def _write_codex_auth(codex_home: Path, *, access_token: str = "access", refresh_token: str = "refresh") -> Path:
+    codex_home.mkdir(parents=True, exist_ok=True)
+    auth_file = codex_home / "auth.json"
+    auth_file.write_text(
+        json.dumps(
+            {
+                "auth_mode": "oauth",
+                "last_refresh": "2026-02-26T00:00:00Z",
+                "tokens": {
+                    "access_token": access_token,
+                    "refresh_token": refresh_token,
+                },
+            }
+        )
+    )
+    return auth_file
+
+
+def test_read_codex_auth_file_success(tmp_path, monkeypatch):
+    codex_home = tmp_path / "codex-home"
+    auth_file = _write_codex_auth(codex_home)
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    payload = read_codex_auth_file()
+
+    assert payload["auth_path"] == auth_file
+    assert payload["tokens"]["access_token"] == "access"
+    assert payload["tokens"]["refresh_token"] == "refresh"
+
+
+def test_resolve_codex_runtime_credentials_missing_access_token(tmp_path, monkeypatch):
+    codex_home = tmp_path / "codex-home"
+    _write_codex_auth(codex_home, access_token="")
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    with pytest.raises(AuthError) as exc:
+        resolve_codex_runtime_credentials()
+
+    assert exc.value.code == "codex_auth_missing_access_token"
+    assert exc.value.relogin_required is True
+
+
+def test_resolve_provider_explicit_codex_does_not_fallback(monkeypatch):
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+    assert resolve_provider("openai-codex") == "openai-codex"
+
+
+def test_get_codex_auth_status_not_logged_in(tmp_path, monkeypatch):
+    monkeypatch.setenv("CODEX_HOME", str(tmp_path / "missing-codex-home"))
+    status = get_codex_auth_status()
+    assert status["logged_in"] is False
+    assert "error" in status
+
+
+def test_login_openai_codex_persists_provider_state(tmp_path, monkeypatch):
+    hermes_home = tmp_path / "hermes-home"
+    codex_home = tmp_path / "codex-home"
+    _write_codex_auth(codex_home)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+    monkeypatch.setattr("hermes_cli.auth.shutil.which", lambda _: "/usr/local/bin/codex")
+    monkeypatch.setattr("hermes_cli.auth.subprocess.run", lambda *a, **k: None)
+
+    _login_openai_codex(SimpleNamespace(), PROVIDER_REGISTRY["openai-codex"])
+
+    state = get_provider_auth_state("openai-codex")
+    assert state is not None
+    assert state["source"] == "codex-auth-json"
+    assert state["auth_file"].endswith("auth.json")
+
+    config_path = hermes_home / "config.yaml"
+    config = yaml.safe_load(config_path.read_text())
+    assert config["model"]["provider"] == "openai-codex"
+    assert config["model"]["base_url"] == DEFAULT_CODEX_BASE_URL
+
+
+def test_login_command_defaults_to_nous(monkeypatch):
+    calls = {"nous": 0, "codex": 0}
+
+    def _fake_nous(args, pconfig):
+        calls["nous"] += 1
+
+    def _fake_codex(args, pconfig):
+        calls["codex"] += 1
+
+    monkeypatch.setattr("hermes_cli.auth._login_nous", _fake_nous)
+    monkeypatch.setattr("hermes_cli.auth._login_openai_codex", _fake_codex)
+
+    login_command(SimpleNamespace())
+
+    assert calls["nous"] == 1
+    assert calls["codex"] == 0
diff --git a/tests/test_cli_provider_resolution.py b/tests/test_cli_provider_resolution.py
new file mode 100644
index 0000000000..3c8fe14a5e
--- /dev/null
+++ b/tests/test_cli_provider_resolution.py
@@ -0,0 +1,187 @@
+import importlib
+import sys
+import types
+from contextlib import nullcontext
+from types import SimpleNamespace
+
+from hermes_cli.auth import AuthError
+from hermes_cli import main as hermes_main
+
+
+def _install_prompt_toolkit_stubs():
+    class _Dummy:
+        def __init__(self, *args, **kwargs):
+            pass
+
+    class _Condition:
+        def __init__(self, func):
+            self.func = func
+
+        def __bool__(self):
+            return bool(self.func())
+
+    class _ANSI(str):
+        pass
+
+    root = types.ModuleType("prompt_toolkit")
+    history = types.ModuleType("prompt_toolkit.history")
+    styles = types.ModuleType("prompt_toolkit.styles")
+    patch_stdout = types.ModuleType("prompt_toolkit.patch_stdout")
+    application = types.ModuleType("prompt_toolkit.application")
+    layout = types.ModuleType("prompt_toolkit.layout")
+    processors = types.ModuleType("prompt_toolkit.layout.processors")
+    filters = types.ModuleType("prompt_toolkit.filters")
+    dimension = types.ModuleType("prompt_toolkit.layout.dimension")
+    menus = types.ModuleType("prompt_toolkit.layout.menus")
+    widgets = types.ModuleType("prompt_toolkit.widgets")
+    key_binding = types.ModuleType("prompt_toolkit.key_binding")
+    completion = types.ModuleType("prompt_toolkit.completion")
+    formatted_text = types.ModuleType("prompt_toolkit.formatted_text")
+
+    history.FileHistory = _Dummy
+    styles.Style = _Dummy
+    patch_stdout.patch_stdout = lambda *args, **kwargs: nullcontext()
+    application.Application = _Dummy
+    layout.Layout = _Dummy
+    layout.HSplit = _Dummy
+    layout.Window = _Dummy
+    layout.FormattedTextControl = _Dummy
+    layout.ConditionalContainer = _Dummy
+    processors.Processor = _Dummy
+    processors.Transformation = _Dummy
+    processors.PasswordProcessor = _Dummy
+    processors.ConditionalProcessor = _Dummy
+    filters.Condition = _Condition
+    dimension.Dimension = _Dummy
+    menus.CompletionsMenu = _Dummy
+    widgets.TextArea = _Dummy
+    key_binding.KeyBindings = _Dummy
+    completion.Completer = _Dummy
+    completion.Completion = _Dummy
+    formatted_text.ANSI = _ANSI
+    root.print_formatted_text = lambda *args, **kwargs: None
+
+    sys.modules.setdefault("prompt_toolkit", root)
+    sys.modules.setdefault("prompt_toolkit.history", history)
+    sys.modules.setdefault("prompt_toolkit.styles", styles)
+    sys.modules.setdefault("prompt_toolkit.patch_stdout", patch_stdout)
+    sys.modules.setdefault("prompt_toolkit.application", application)
+    sys.modules.setdefault("prompt_toolkit.layout", layout)
+    sys.modules.setdefault("prompt_toolkit.layout.processors", processors)
+    sys.modules.setdefault("prompt_toolkit.filters", filters)
+    sys.modules.setdefault("prompt_toolkit.layout.dimension", dimension)
+    sys.modules.setdefault("prompt_toolkit.layout.menus", menus)
+    sys.modules.setdefault("prompt_toolkit.widgets", widgets)
+    sys.modules.setdefault("prompt_toolkit.key_binding", key_binding)
+    sys.modules.setdefault("prompt_toolkit.completion", completion)
+    sys.modules.setdefault("prompt_toolkit.formatted_text", formatted_text)
+
+
+def _import_cli():
+    try:
+        importlib.import_module("prompt_toolkit")
+    except ModuleNotFoundError:
+        _install_prompt_toolkit_stubs()
+    return importlib.import_module("cli")
+
+
+def test_hermes_cli_init_does_not_eagerly_resolve_runtime_provider(monkeypatch):
+    cli = _import_cli()
+    calls = {"count": 0}
+
+    def _unexpected_runtime_resolve(**kwargs):
+        calls["count"] += 1
+        raise AssertionError("resolve_runtime_provider should not be called in HermesCLI.__init__")
+
+    monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _unexpected_runtime_resolve)
+    monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
+
+    shell = cli.HermesCLI(model="gpt-5", compact=True, max_turns=1)
+
+    assert shell is not None
+    assert calls["count"] == 0
+
+
+def test_runtime_resolution_failure_is_not_sticky(monkeypatch):
+    cli = _import_cli()
+    calls = {"count": 0}
+
+    def _runtime_resolve(**kwargs):
+        calls["count"] += 1
+        if calls["count"] == 1:
+            raise RuntimeError("temporary auth failure")
+        return {
+            "provider": "openrouter",
+            "api_mode": "chat_completions",
+            "base_url": "https://openrouter.ai/api/v1",
+            "api_key": "test-key",
+            "source": "env/config",
+        }
+
+    class _DummyAgent:
+        def __init__(self, *args, **kwargs):
+            self.kwargs = kwargs
+
+    monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
+    monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
+    monkeypatch.setattr(cli, "AIAgent", _DummyAgent)
+
+    shell = cli.HermesCLI(model="gpt-5", compact=True, max_turns=1)
+
+    assert shell._init_agent() is False
+    assert shell._init_agent() is True
+    assert calls["count"] == 2
+    assert shell.agent is not None
+
+
+def test_runtime_resolution_rebuilds_agent_on_routing_change(monkeypatch):
+    cli = _import_cli()
+
+    def _runtime_resolve(**kwargs):
+        return {
+            "provider": "openai-codex",
+            "api_mode": "codex_responses",
+            "base_url": "https://same-endpoint.example/v1",
+            "api_key": "same-key",
+            "source": "env/config",
+        }
+
+    monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
+    monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
+
+    shell = cli.HermesCLI(model="gpt-5", compact=True, max_turns=1)
+    shell.provider = "openrouter"
+    shell.api_mode = "chat_completions"
+    shell.base_url = "https://same-endpoint.example/v1"
+    shell.api_key = "same-key"
+    shell.agent = object()
+
+    assert shell._ensure_runtime_credentials() is True
+    assert shell.agent is None
+    assert shell.provider == "openai-codex"
+    assert shell.api_mode == "codex_responses"
+
+
+def test_cmd_model_falls_back_to_auto_on_invalid_provider(monkeypatch, capsys):
+    monkeypatch.setattr(
+        "hermes_cli.config.load_config",
+        lambda: {"model": {"default": "gpt-5", "provider": "invalid-provider"}},
+    )
+    monkeypatch.setattr("hermes_cli.config.save_config", lambda cfg: None)
+    monkeypatch.setattr("hermes_cli.config.get_env_value", lambda key: "")
+    monkeypatch.setattr("hermes_cli.config.save_env_value", lambda key, value: None)
+
+    def _resolve_provider(requested, **kwargs):
+        if requested == "invalid-provider":
+            raise AuthError("Unknown provider 'invalid-provider'.", code="invalid_provider")
+        return "openrouter"
+
+    monkeypatch.setattr("hermes_cli.auth.resolve_provider", _resolve_provider)
+    monkeypatch.setattr(hermes_main, "_prompt_provider_choice", lambda choices: len(choices) - 1)
+
+    hermes_main.cmd_model(SimpleNamespace())
+    output = capsys.readouterr().out
+
+    assert "Warning:" in output
+    assert "falling back to auto provider detection" in output.lower()
+    assert "No change." in output
diff --git a/tests/test_delegate.py b/tests/test_delegate.py
index 811940a026..8fb16be61e 100644
--- a/tests/test_delegate.py
+++ b/tests/test_delegate.py
@@ -33,6 +33,9 @@ def _make_mock_parent(depth=0):
     """Create a mock parent agent with the fields delegate_task expects."""
     parent = MagicMock()
     parent.base_url = "https://openrouter.ai/api/v1"
+    parent.api_key = "parent-key"
+    parent.provider = "openrouter"
+    parent.api_mode = "chat_completions"
     parent.model = "anthropic/claude-sonnet-4"
     parent.platform = "cli"
     parent.providers_allowed = None
@@ -221,6 +224,30 @@ class TestDelegateTask(unittest.TestCase):
             delegate_task(goal="Test tracking", parent_agent=parent)
             self.assertEqual(len(parent._active_children), 0)
 
+    def test_child_inherits_runtime_credentials(self):
+        parent = _make_mock_parent(depth=0)
+        parent.base_url = "https://chatgpt.com/backend-api/codex"
+        parent.api_key = "codex-token"
+        parent.provider = "openai-codex"
+        parent.api_mode = "codex_responses"
+
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = MagicMock()
+            mock_child.run_conversation.return_value = {
+                "final_response": "ok",
+                "completed": True,
+                "api_calls": 1,
+            }
+            MockAgent.return_value = mock_child
+
+            delegate_task(goal="Test runtime inheritance", parent_agent=parent)
+
+            _, kwargs = MockAgent.call_args
+            self.assertEqual(kwargs["base_url"], parent.base_url)
+            self.assertEqual(kwargs["api_key"], parent.api_key)
+            self.assertEqual(kwargs["provider"], parent.provider)
+            self.assertEqual(kwargs["api_mode"], parent.api_mode)
+
 
 class TestBlockedTools(unittest.TestCase):
     def test_blocked_tools_constant(self):
diff --git a/tests/test_run_agent_codex_responses.py b/tests/test_run_agent_codex_responses.py
new file mode 100644
index 0000000000..846d9c1c0a
--- /dev/null
+++ b/tests/test_run_agent_codex_responses.py
@@ -0,0 +1,231 @@
+import sys
+import types
+from types import SimpleNamespace
+
+
+sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
+sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
+sys.modules.setdefault("fal_client", types.SimpleNamespace())
+
+import run_agent
+
+
+def _patch_agent_bootstrap(monkeypatch):
+    monkeypatch.setattr(
+        run_agent,
+        "get_tool_definitions",
+        lambda **kwargs: [
+            {
+                "type": "function",
+                "function": {
+                    "name": "terminal",
+                    "description": "Run shell commands.",
+                    "parameters": {"type": "object", "properties": {}},
+                },
+            }
+        ],
+    )
+    monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
+
+
+def _build_agent(monkeypatch):
+    _patch_agent_bootstrap(monkeypatch)
+
+    agent = run_agent.AIAgent(
+        model="gpt-5-codex",
+        base_url="https://chatgpt.com/backend-api/codex",
+        api_key="codex-token",
+        quiet_mode=True,
+        max_iterations=4,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+    agent._cleanup_task_resources = lambda task_id: None
+    agent._persist_session = lambda messages, history=None: None
+    agent._save_trajectory = lambda messages, user_message, completed: None
+    agent._save_session_log = lambda messages: None
+    return agent
+
+
+def _codex_message_response(text: str):
+    return SimpleNamespace(
+        output=[
+            SimpleNamespace(
+                type="message",
+                content=[SimpleNamespace(type="output_text", text=text)],
+            )
+        ],
+        usage=SimpleNamespace(input_tokens=5, output_tokens=3, total_tokens=8),
+        status="completed",
+        model="gpt-5-codex",
+    )
+
+
+def _codex_tool_call_response():
+    return SimpleNamespace(
+        output=[
+            SimpleNamespace(
+                type="function_call",
+                id="call_1",
+                call_id="call_1",
+                name="terminal",
+                arguments="{}",
+            )
+        ],
+        usage=SimpleNamespace(input_tokens=12, output_tokens=4, total_tokens=16),
+        status="completed",
+        model="gpt-5-codex",
+    )
+
+
+def _codex_incomplete_message_response(text: str):
+    return SimpleNamespace(
+        output=[
+            SimpleNamespace(
+                type="message",
+                status="in_progress",
+                content=[SimpleNamespace(type="output_text", text=text)],
+            )
+        ],
+        usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
+        status="in_progress",
+        model="gpt-5-codex",
+    )
+
+
+def test_api_mode_uses_explicit_provider_when_codex(monkeypatch):
+    _patch_agent_bootstrap(monkeypatch)
+    agent = run_agent.AIAgent(
+        model="gpt-5-codex",
+        base_url="https://openrouter.ai/api/v1",
+        provider="openai-codex",
+        api_key="codex-token",
+        quiet_mode=True,
+        max_iterations=1,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+    assert agent.api_mode == "codex_responses"
+    assert agent.provider == "openai-codex"
+
+
+def test_api_mode_normalizes_provider_case(monkeypatch):
+    _patch_agent_bootstrap(monkeypatch)
+    agent = run_agent.AIAgent(
+        model="gpt-5-codex",
+        base_url="https://openrouter.ai/api/v1",
+        provider="OpenAI-Codex",
+        api_key="codex-token",
+        quiet_mode=True,
+        max_iterations=1,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+    assert agent.provider == "openai-codex"
+    assert agent.api_mode == "codex_responses"
+
+
+def test_api_mode_respects_explicit_openrouter_provider_over_codex_url(monkeypatch):
+    _patch_agent_bootstrap(monkeypatch)
+    agent = run_agent.AIAgent(
+        model="gpt-5-codex",
+        base_url="https://chatgpt.com/backend-api/codex",
+        provider="openrouter",
+        api_key="test-token",
+        quiet_mode=True,
+        max_iterations=1,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+    assert agent.api_mode == "chat_completions"
+    assert agent.provider == "openrouter"
+
+
+def test_build_api_kwargs_codex(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    kwargs = agent._build_api_kwargs(
+        [
+            {"role": "system", "content": "You are Hermes."},
+            {"role": "user", "content": "Ping"},
+        ]
+    )
+
+    assert kwargs["model"] == "gpt-5-codex"
+    assert kwargs["instructions"] == "You are Hermes."
+    assert kwargs["store"] is False
+    assert isinstance(kwargs["input"], list)
+    assert kwargs["input"][0]["role"] == "user"
+    assert kwargs["tools"][0]["type"] == "function"
+    assert kwargs["tools"][0]["name"] == "terminal"
+    assert "function" not in kwargs["tools"][0]
+
+
+def test_run_conversation_codex_plain_text(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: _codex_message_response("OK"))
+
+    result = agent.run_conversation("Say OK")
+
+    assert result["completed"] is True
+    assert result["final_response"] == "OK"
+    assert result["messages"][-1]["role"] == "assistant"
+    assert result["messages"][-1]["content"] == "OK"
+
+
+def test_run_conversation_codex_tool_round_trip(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    responses = [_codex_tool_call_response(), _codex_message_response("done")]
+    monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
+
+    def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
+        for call in assistant_message.tool_calls:
+            messages.append(
+                {
+                    "role": "tool",
+                    "tool_call_id": call.id,
+                    "content": '{"ok":true}',
+                }
+            )
+
+    monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
+
+    result = agent.run_conversation("run a command")
+
+    assert result["completed"] is True
+    assert result["final_response"] == "done"
+    assert any(msg.get("tool_calls") for msg in result["messages"] if msg.get("role") == "assistant")
+    assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
+
+
+def test_run_conversation_codex_continues_after_incomplete_interim_message(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    responses = [
+        _codex_incomplete_message_response("I'll inspect the repo structure first."),
+        _codex_tool_call_response(),
+        _codex_message_response("Architecture summary complete."),
+    ]
+    monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
+
+    def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
+        for call in assistant_message.tool_calls:
+            messages.append(
+                {
+                    "role": "tool",
+                    "tool_call_id": call.id,
+                    "content": '{"ok":true}',
+                }
+            )
+
+    monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
+
+    result = agent.run_conversation("analyze repo")
+
+    assert result["completed"] is True
+    assert result["final_response"] == "Architecture summary complete."
+    assert any(
+        msg.get("role") == "assistant"
+        and msg.get("finish_reason") == "incomplete"
+        and "inspect the repo structure" in (msg.get("content") or "")
+        for msg in result["messages"]
+    )
+    assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
diff --git a/tests/test_runtime_provider_resolution.py b/tests/test_runtime_provider_resolution.py
new file mode 100644
index 0000000000..af69140925
--- /dev/null
+++ b/tests/test_runtime_provider_resolution.py
@@ -0,0 +1,95 @@
+from hermes_cli import runtime_provider as rp
+
+
+def test_resolve_runtime_provider_codex(monkeypatch):
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openai-codex")
+    monkeypatch.setattr(
+        rp,
+        "resolve_codex_runtime_credentials",
+        lambda: {
+            "provider": "openai-codex",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+            "api_key": "codex-token",
+            "source": "codex-auth-json",
+            "auth_file": "/tmp/auth.json",
+            "codex_home": "/tmp/codex",
+            "last_refresh": "2026-02-26T00:00:00Z",
+        },
+    )
+
+    resolved = rp.resolve_runtime_provider(requested="openai-codex")
+
+    assert resolved["provider"] == "openai-codex"
+    assert resolved["api_mode"] == "codex_responses"
+    assert resolved["base_url"] == "https://chatgpt.com/backend-api/codex"
+    assert resolved["api_key"] == "codex-token"
+    assert resolved["requested_provider"] == "openai-codex"
+
+
+def test_resolve_runtime_provider_openrouter_explicit(monkeypatch):
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
+    monkeypatch.setattr(rp, "_get_model_config", lambda: {})
+    monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+
+    resolved = rp.resolve_runtime_provider(
+        requested="openrouter",
+        explicit_api_key="test-key",
+        explicit_base_url="https://example.com/v1/",
+    )
+
+    assert resolved["provider"] == "openrouter"
+    assert resolved["api_mode"] == "chat_completions"
+    assert resolved["api_key"] == "test-key"
+    assert resolved["base_url"] == "https://example.com/v1"
+    assert resolved["source"] == "explicit"
+
+
+def test_resolve_runtime_provider_openrouter_ignores_codex_config_base_url(monkeypatch):
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
+    monkeypatch.setattr(
+        rp,
+        "_get_model_config",
+        lambda: {
+            "provider": "openai-codex",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+        },
+    )
+    monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+
+    resolved = rp.resolve_runtime_provider(requested="openrouter")
+
+    assert resolved["provider"] == "openrouter"
+    assert resolved["base_url"] == rp.OPENROUTER_BASE_URL
+
+
+def test_resolve_runtime_provider_auto_uses_custom_config_base_url(monkeypatch):
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
+    monkeypatch.setattr(
+        rp,
+        "_get_model_config",
+        lambda: {
+            "provider": "auto",
+            "base_url": "https://custom.example/v1/",
+        },
+    )
+    monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+
+    resolved = rp.resolve_runtime_provider(requested="auto")
+
+    assert resolved["provider"] == "openrouter"
+    assert resolved["base_url"] == "https://custom.example/v1"
+
+
+def test_resolve_requested_provider_precedence(monkeypatch):
+    monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "nous")
+    monkeypatch.setattr(rp, "_get_model_config", lambda: {"provider": "openai-codex"})
+    assert rp.resolve_requested_provider("openrouter") == "openrouter"
diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py
index 111beb33a2..db72a5f1a2 100644
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@@ -101,7 +101,10 @@ def _run_single_child(
     try:
         child = AIAgent(
             base_url=parent_agent.base_url,
+            api_key=getattr(parent_agent, "api_key", None),
             model=model or parent_agent.model,
+            provider=getattr(parent_agent, "provider", None),
+            api_mode=getattr(parent_agent, "api_mode", None),
             max_iterations=max_iterations,
             enabled_toolsets=child_toolsets,
             quiet_mode=True,

From ce175d73722dd8a38184df77a9d8722101671bbd Mon Sep 17 00:00:00 2001
From: George Pickett <gpickett00@gmail.com>
Date: Wed, 25 Feb 2026 18:11:26 -0800
Subject: [PATCH 02/31] Fix Codex Responses continuation and schema parity

---
 run_agent.py                            | 234 ++++++++++++++++++++++--
 tests/test_run_agent_codex_responses.py | 212 ++++++++++++++++++++-
 2 files changed, 432 insertions(+), 14 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index d1d3d27e52..7a06823850 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -21,6 +21,7 @@ Usage:
 """
 
 import copy
+import hashlib
 import json
 import logging
 logger = logging.getLogger(__name__)
@@ -482,6 +483,54 @@ class AIAgent:
         if not content:
             return ""
         return re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
+
+    def _looks_like_codex_intermediate_ack(
+        self,
+        user_message: str,
+        assistant_content: str,
+        messages: List[Dict[str, Any]],
+    ) -> bool:
+        """Detect a planning/ack message that should continue instead of ending the turn."""
+        if any(isinstance(msg, dict) and msg.get("role") == "tool" for msg in messages):
+            return False
+
+        assistant_text = self._strip_think_blocks(assistant_content or "").strip().lower()
+        if not assistant_text:
+            return False
+        if len(assistant_text) > 1200:
+            return False
+
+        has_future_ack = bool(
+            re.search(r"\b(i['’]ll|i will|let me|i can do that|i can help with that)\b", assistant_text)
+        )
+        if not has_future_ack:
+            return False
+
+        action_markers = (
+            "look into",
+            "inspect",
+            "scan",
+            "check",
+            "analyz",
+            "review",
+            "explore",
+            "read",
+            "open",
+            "run",
+            "test",
+            "fix",
+            "debug",
+            "search",
+            "find",
+            "walkthrough",
+            "report back",
+            "summarize",
+        )
+
+        user_text = (user_message or "").strip().lower()
+        user_requests_action = any(marker in user_text for marker in action_markers) or "~/" in user_text or "/" in user_text
+        assistant_mentions_action = any(marker in assistant_text for marker in action_markers)
+        return user_requests_action and assistant_mentions_action
     
     
     def _extract_reasoning(self, assistant_message) -> Optional[str]:
@@ -1154,10 +1203,57 @@ class AIAgent:
                 "type": "function",
                 "name": name,
                 "description": fn.get("description", ""),
+                "strict": False,
                 "parameters": fn.get("parameters", {"type": "object", "properties": {}}),
             })
         return converted or None
 
+    @staticmethod
+    def _split_responses_tool_id(raw_id: Any) -> tuple[Optional[str], Optional[str]]:
+        """Split a stored tool id into (call_id, response_item_id)."""
+        if not isinstance(raw_id, str):
+            return None, None
+        value = raw_id.strip()
+        if not value:
+            return None, None
+        if "|" in value:
+            call_id, response_item_id = value.split("|", 1)
+            call_id = call_id.strip() or None
+            response_item_id = response_item_id.strip() or None
+            return call_id, response_item_id
+        if value.startswith("fc_"):
+            return None, value
+        return value, None
+
+    def _derive_responses_function_call_id(
+        self,
+        call_id: str,
+        response_item_id: Optional[str] = None,
+    ) -> str:
+        """Build a valid Responses `function_call.id` (must start with `fc_`)."""
+        if isinstance(response_item_id, str):
+            candidate = response_item_id.strip()
+            if candidate.startswith("fc_"):
+                return candidate
+
+        source = (call_id or "").strip()
+        if source.startswith("fc_"):
+            return source
+        if source.startswith("call_") and len(source) > len("call_"):
+            return f"fc_{source[len('call_'):]}"
+
+        sanitized = re.sub(r"[^A-Za-z0-9_-]", "", source)
+        if sanitized.startswith("fc_"):
+            return sanitized
+        if sanitized.startswith("call_") and len(sanitized) > len("call_"):
+            return f"fc_{sanitized[len('call_'):]}"
+        if sanitized:
+            return f"fc_{sanitized[:48]}"
+
+        seed = source or str(response_item_id or "") or uuid.uuid4().hex
+        digest = hashlib.sha1(seed.encode("utf-8")).hexdigest()[:24]
+        return f"fc_{digest}"
+
     def _chat_messages_to_responses_input(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """Convert internal chat-style messages to Responses input items."""
         items: List[Dict[str, Any]] = []
@@ -1187,9 +1283,32 @@ class AIAgent:
                             if not isinstance(fn_name, str) or not fn_name.strip():
                                 continue
 
-                            call_id = tc.get("id") or tc.get("call_id")
+                            embedded_call_id, embedded_response_item_id = self._split_responses_tool_id(
+                                tc.get("id")
+                            )
+                            call_id = tc.get("call_id")
                             if not isinstance(call_id, str) or not call_id.strip():
-                                call_id = f"call_{uuid.uuid4().hex[:12]}"
+                                call_id = embedded_call_id
+                            if not isinstance(call_id, str) or not call_id.strip():
+                                if (
+                                    isinstance(embedded_response_item_id, str)
+                                    and embedded_response_item_id.startswith("fc_")
+                                    and len(embedded_response_item_id) > len("fc_")
+                                ):
+                                    call_id = f"call_{embedded_response_item_id[len('fc_'):]}"
+                                else:
+                                    call_id = f"call_{uuid.uuid4().hex[:12]}"
+                            call_id = call_id.strip()
+
+                            response_item_id = tc.get("response_item_id")
+                            if not isinstance(response_item_id, str) or not response_item_id.strip():
+                                response_item_id = tc.get("responses_item_id")
+                            if not isinstance(response_item_id, str) or not response_item_id.strip():
+                                response_item_id = embedded_response_item_id
+                            response_item_id = self._derive_responses_function_call_id(
+                                call_id,
+                                response_item_id if isinstance(response_item_id, str) else None,
+                            )
 
                             arguments = fn.get("arguments", "{}")
                             if isinstance(arguments, dict):
@@ -1200,7 +1319,7 @@ class AIAgent:
 
                             items.append({
                                 "type": "function_call",
-                                "id": call_id,
+                                "id": response_item_id,
                                 "call_id": call_id,
                                 "name": fn_name,
                                 "arguments": arguments,
@@ -1211,7 +1330,11 @@ class AIAgent:
                 continue
 
             if role == "tool":
-                call_id = msg.get("tool_call_id")
+                raw_tool_call_id = msg.get("tool_call_id")
+                call_id, _ = self._split_responses_tool_id(raw_tool_call_id)
+                if not isinstance(call_id, str) or not call_id.strip():
+                    if isinstance(raw_tool_call_id, str) and raw_tool_call_id.strip():
+                        call_id = raw_tool_call_id.strip()
                 if not isinstance(call_id, str) or not call_id.strip():
                     continue
                 items.append({
@@ -1278,6 +1401,8 @@ class AIAgent:
         reasoning_parts: List[str] = []
         tool_calls: List[Any] = []
         has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
+        saw_commentary_phase = False
+        saw_final_answer_phase = False
 
         for item in output:
             item_type = getattr(item, "type", None)
@@ -1291,6 +1416,13 @@ class AIAgent:
                 has_incomplete_items = True
 
             if item_type == "message":
+                item_phase = getattr(item, "phase", None)
+                if isinstance(item_phase, str):
+                    normalized_phase = item_phase.strip().lower()
+                    if normalized_phase in {"commentary", "analysis"}:
+                        saw_commentary_phase = True
+                    elif normalized_phase in {"final_answer", "final"}:
+                        saw_final_answer_phase = True
                 message_text = self._extract_responses_message_text(item)
                 if message_text:
                     content_parts.append(message_text)
@@ -1305,9 +1437,19 @@ class AIAgent:
                 arguments = getattr(item, "arguments", "{}")
                 if not isinstance(arguments, str):
                     arguments = str(arguments)
-                call_id = getattr(item, "call_id", None) or getattr(item, "id", None) or f"call_{uuid.uuid4().hex[:12]}"
+                raw_call_id = getattr(item, "call_id", None)
+                raw_item_id = getattr(item, "id", None)
+                embedded_call_id, _ = self._split_responses_tool_id(raw_item_id)
+                call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
+                if not isinstance(call_id, str) or not call_id.strip():
+                    call_id = f"call_{uuid.uuid4().hex[:12]}"
+                call_id = call_id.strip()
+                response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
+                response_item_id = self._derive_responses_function_call_id(call_id, response_item_id)
                 tool_calls.append(SimpleNamespace(
                     id=call_id,
+                    call_id=call_id,
+                    response_item_id=response_item_id,
                     type="function",
                     function=SimpleNamespace(name=fn_name, arguments=arguments),
                 ))
@@ -1316,9 +1458,19 @@ class AIAgent:
                 arguments = getattr(item, "input", "{}")
                 if not isinstance(arguments, str):
                     arguments = str(arguments)
-                call_id = getattr(item, "call_id", None) or getattr(item, "id", None) or f"call_{uuid.uuid4().hex[:12]}"
+                raw_call_id = getattr(item, "call_id", None)
+                raw_item_id = getattr(item, "id", None)
+                embedded_call_id, _ = self._split_responses_tool_id(raw_item_id)
+                call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
+                if not isinstance(call_id, str) or not call_id.strip():
+                    call_id = f"call_{uuid.uuid4().hex[:12]}"
+                call_id = call_id.strip()
+                response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
+                response_item_id = self._derive_responses_function_call_id(call_id, response_item_id)
                 tool_calls.append(SimpleNamespace(
                     id=call_id,
+                    call_id=call_id,
+                    response_item_id=response_item_id,
                     type="function",
                     function=SimpleNamespace(name=fn_name, arguments=arguments),
                 ))
@@ -1339,7 +1491,7 @@ class AIAgent:
 
         if tool_calls:
             finish_reason = "tool_calls"
-        elif has_incomplete_items:
+        elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
             finish_reason = "incomplete"
         else:
             finish_reason = "stop"
@@ -1484,17 +1636,42 @@ class AIAgent:
             ]
 
         if assistant_message.tool_calls:
-            msg["tool_calls"] = [
-                {
-                    "id": tool_call.id,
+            tool_calls = []
+            for tool_call in assistant_message.tool_calls:
+                raw_id = getattr(tool_call, "id", None)
+                call_id = getattr(tool_call, "call_id", None)
+                if not isinstance(call_id, str) or not call_id.strip():
+                    embedded_call_id, _ = self._split_responses_tool_id(raw_id)
+                    call_id = embedded_call_id
+                if not isinstance(call_id, str) or not call_id.strip():
+                    if isinstance(raw_id, str) and raw_id.strip():
+                        call_id = raw_id.strip()
+                    else:
+                        call_id = f"call_{uuid.uuid4().hex[:12]}"
+                call_id = call_id.strip()
+
+                response_item_id = getattr(tool_call, "response_item_id", None)
+                if not isinstance(response_item_id, str) or not response_item_id.strip():
+                    _, embedded_response_item_id = self._split_responses_tool_id(raw_id)
+                    response_item_id = embedded_response_item_id
+
+                response_item_id = self._derive_responses_function_call_id(
+                    call_id,
+                    response_item_id if isinstance(response_item_id, str) else None,
+                )
+
+                tool_calls.append({
+                    "id": call_id,
+                    "call_id": call_id,
+                    "response_item_id": response_item_id,
                     "type": tool_call.type,
                     "function": {
                         "name": tool_call.function.name,
                         "arguments": tool_call.function.arguments
-                    }
+                    },
                 }
-                for tool_call in assistant_message.tool_calls
-            ]
+                )
+            msg["tool_calls"] = tool_calls
 
         return msg
 
@@ -2021,6 +2198,7 @@ class AIAgent:
         api_call_count = 0
         final_response = None
         interrupted = False
+        codex_ack_continuations = 0
         
         # Clear any stale interrupt state at start
         self.clear_interrupt()
@@ -2742,6 +2920,36 @@ class AIAgent:
                     # Reset retry counter on successful content
                     if hasattr(self, '_empty_content_retries'):
                         self._empty_content_retries = 0
+
+                    if (
+                        self.api_mode == "codex_responses"
+                        and self.valid_tool_names
+                        and codex_ack_continuations < 2
+                        and self._looks_like_codex_intermediate_ack(
+                            user_message=user_message,
+                            assistant_content=final_response,
+                            messages=messages,
+                        )
+                    ):
+                        codex_ack_continuations += 1
+                        interim_msg = self._build_assistant_message(assistant_message, "incomplete")
+                        messages.append(interim_msg)
+                        self._log_msg_to_db(interim_msg)
+
+                        continue_msg = {
+                            "role": "user",
+                            "content": (
+                                "[System: Continue now. Execute the required tool calls and only "
+                                "send your final answer after completing the task.]"
+                            ),
+                        }
+                        messages.append(continue_msg)
+                        self._log_msg_to_db(continue_msg)
+                        self._session_messages = messages
+                        self._save_session_log(messages)
+                        continue
+
+                    codex_ack_continuations = 0
                     
                     final_msg = self._build_assistant_message(assistant_message, finish_reason)
                     
diff --git a/tests/test_run_agent_codex_responses.py b/tests/test_run_agent_codex_responses.py
index 846d9c1c0a..27723bd676 100644
--- a/tests/test_run_agent_codex_responses.py
+++ b/tests/test_run_agent_codex_responses.py
@@ -66,7 +66,7 @@ def _codex_tool_call_response():
         output=[
             SimpleNamespace(
                 type="function_call",
-                id="call_1",
+                id="fc_1",
                 call_id="call_1",
                 name="terminal",
                 arguments="{}",
@@ -93,6 +93,37 @@ def _codex_incomplete_message_response(text: str):
     )
 
 
+def _codex_commentary_message_response(text: str):
+    return SimpleNamespace(
+        output=[
+            SimpleNamespace(
+                type="message",
+                phase="commentary",
+                status="completed",
+                content=[SimpleNamespace(type="output_text", text=text)],
+            )
+        ],
+        usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
+        status="completed",
+        model="gpt-5-codex",
+    )
+
+
+def _codex_ack_message_response(text: str):
+    return SimpleNamespace(
+        output=[
+            SimpleNamespace(
+                type="message",
+                status="completed",
+                content=[SimpleNamespace(type="output_text", text=text)],
+            )
+        ],
+        usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
+        status="completed",
+        model="gpt-5-codex",
+    )
+
+
 def test_api_mode_uses_explicit_provider_when_codex(monkeypatch):
     _patch_agent_bootstrap(monkeypatch)
     agent = run_agent.AIAgent(
@@ -157,6 +188,7 @@ def test_build_api_kwargs_codex(monkeypatch):
     assert kwargs["input"][0]["role"] == "user"
     assert kwargs["tools"][0]["type"] == "function"
     assert kwargs["tools"][0]["name"] == "terminal"
+    assert kwargs["tools"][0]["strict"] is False
     assert "function" not in kwargs["tools"][0]
 
 
@@ -197,6 +229,99 @@ def test_run_conversation_codex_tool_round_trip(monkeypatch):
     assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
 
 
+def test_chat_messages_to_responses_input_uses_fc_id_for_function_call(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    items = agent._chat_messages_to_responses_input(
+        [
+            {"role": "user", "content": "Run terminal"},
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "call_abc123",
+                        "type": "function",
+                        "function": {"name": "terminal", "arguments": "{}"},
+                    }
+                ],
+            },
+            {"role": "tool", "tool_call_id": "call_abc123", "content": '{"ok":true}'},
+        ]
+    )
+
+    function_call = next(item for item in items if item.get("type") == "function_call")
+    function_output = next(item for item in items if item.get("type") == "function_call_output")
+
+    assert function_call["call_id"] == "call_abc123"
+    assert function_call["id"] == "fc_abc123"
+    assert function_output["call_id"] == "call_abc123"
+
+
+def test_chat_messages_to_responses_input_accepts_call_pipe_fc_ids(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    items = agent._chat_messages_to_responses_input(
+        [
+            {"role": "user", "content": "Run terminal"},
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "call_pair123|fc_pair123",
+                        "type": "function",
+                        "function": {"name": "terminal", "arguments": "{}"},
+                    }
+                ],
+            },
+            {"role": "tool", "tool_call_id": "call_pair123|fc_pair123", "content": '{"ok":true}'},
+        ]
+    )
+
+    function_call = next(item for item in items if item.get("type") == "function_call")
+    function_output = next(item for item in items if item.get("type") == "function_call_output")
+
+    assert function_call["call_id"] == "call_pair123"
+    assert function_call["id"] == "fc_pair123"
+    assert function_output["call_id"] == "call_pair123"
+
+
+def test_run_conversation_codex_replay_payload_keeps_call_id_and_fc_id(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    responses = [_codex_tool_call_response(), _codex_message_response("done")]
+    requests = []
+
+    def _fake_api_call(api_kwargs):
+        requests.append(api_kwargs)
+        return responses.pop(0)
+
+    monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call)
+
+    def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
+        for call in assistant_message.tool_calls:
+            messages.append(
+                {
+                    "role": "tool",
+                    "tool_call_id": call.id,
+                    "content": '{"ok":true}',
+                }
+            )
+
+    monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
+
+    result = agent.run_conversation("run a command")
+
+    assert result["completed"] is True
+    assert result["final_response"] == "done"
+    assert len(requests) >= 2
+
+    replay_input = requests[1]["input"]
+    function_call = next(item for item in replay_input if item.get("type") == "function_call")
+    function_output = next(item for item in replay_input if item.get("type") == "function_call_output")
+    assert function_call["call_id"] == "call_1"
+    assert function_call["id"] == "fc_1"
+    assert function_output["call_id"] == "call_1"
+
+
 def test_run_conversation_codex_continues_after_incomplete_interim_message(monkeypatch):
     agent = _build_agent(monkeypatch)
     responses = [
@@ -229,3 +354,88 @@ def test_run_conversation_codex_continues_after_incomplete_interim_message(monke
         for msg in result["messages"]
     )
     assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
+
+
+def test_normalize_codex_response_marks_commentary_only_message_as_incomplete(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    assistant_message, finish_reason = agent._normalize_codex_response(
+        _codex_commentary_message_response("I'll inspect the repository first.")
+    )
+
+    assert finish_reason == "incomplete"
+    assert "inspect the repository" in (assistant_message.content or "")
+
+
+def test_run_conversation_codex_continues_after_commentary_phase_message(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    responses = [
+        _codex_commentary_message_response("I'll inspect the repo structure first."),
+        _codex_tool_call_response(),
+        _codex_message_response("Architecture summary complete."),
+    ]
+    monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
+
+    def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
+        for call in assistant_message.tool_calls:
+            messages.append(
+                {
+                    "role": "tool",
+                    "tool_call_id": call.id,
+                    "content": '{"ok":true}',
+                }
+            )
+
+    monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
+
+    result = agent.run_conversation("analyze repo")
+
+    assert result["completed"] is True
+    assert result["final_response"] == "Architecture summary complete."
+    assert any(
+        msg.get("role") == "assistant"
+        and msg.get("finish_reason") == "incomplete"
+        and "inspect the repo structure" in (msg.get("content") or "")
+        for msg in result["messages"]
+    )
+    assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
+
+
+def test_run_conversation_codex_continues_after_ack_stop_message(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    responses = [
+        _codex_ack_message_response(
+            "Absolutely — I can do that. I'll inspect ~/openclaw-studio and report back with a walkthrough."
+        ),
+        _codex_tool_call_response(),
+        _codex_message_response("Architecture summary complete."),
+    ]
+    monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
+
+    def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
+        for call in assistant_message.tool_calls:
+            messages.append(
+                {
+                    "role": "tool",
+                    "tool_call_id": call.id,
+                    "content": '{"ok":true}',
+                }
+            )
+
+    monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
+
+    result = agent.run_conversation("look into ~/openclaw-studio and tell me how it works")
+
+    assert result["completed"] is True
+    assert result["final_response"] == "Architecture summary complete."
+    assert any(
+        msg.get("role") == "assistant"
+        and msg.get("finish_reason") == "incomplete"
+        and "inspect ~/openclaw-studio" in (msg.get("content") or "")
+        for msg in result["messages"]
+    )
+    assert any(
+        msg.get("role") == "user"
+        and "Continue now. Execute the required tool calls" in (msg.get("content") or "")
+        for msg in result["messages"]
+    )
+    assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])

From 7727a792f2758b30d04a27ec05b9e31c7f4c33a2 Mon Sep 17 00:00:00 2001
From: George Pickett <gpickett00@gmail.com>
Date: Wed, 25 Feb 2026 18:21:50 -0800
Subject: [PATCH 03/31] Revert README Codex messaging changes

---
 README.md | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index 9ddbb3dffd..a97e637711 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@
 
 **The fully open-source AI agent that grows with you.** Install it on a machine, give it your messaging accounts, and it becomes a persistent personal agent — learning your projects, building its own skills, running tasks on a schedule, and reaching you wherever you are. An autonomous agent that lives on your server, remembers what it learns, and gets more capable the longer it runs.
 
-Use any model you want — log in with [Nous Portal](https://portal.nousresearch.com), log in with OpenAI Codex via `hermes login --provider openai-codex`, connect an [OpenRouter](https://openrouter.ai) key for 200+ models, or point it at your own VLLM/SGLang endpoint. Switch with `hermes model` — no code changes, no lock-in.
+Use any model you want — log in with a [Nous Portal](https://portal.nousresearch.com) subscription for zero-config access, connect an [OpenRouter](https://openrouter.ai) key for 200+ models, or point it at your own VLLM/SGLang endpoint. Switch with `hermes model` — no code changes, no lock-in.
 
 Built by [Nous Research](https://nousresearch.com). Under the hood, the same architecture powers [batch data generation](#batch-processing) and [RL training environments](#-atropos-rl-environments) for training the next generation of tool-calling models.
 
@@ -121,14 +121,11 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro
 | Provider | Setup |
 |----------|-------|
 | **Nous Portal** | `hermes login` (OAuth, subscription-based) |
-| **OpenAI Codex** | `hermes login --provider openai-codex` (uses `CODEX_HOME/auth.json`) |
 | **OpenRouter** | `OPENROUTER_API_KEY` in `~/.hermes/.env` |
 | **Custom Endpoint** | `OPENAI_BASE_URL` + `OPENAI_API_KEY` in `~/.hermes/.env` |
 
 **Note:** Even when using Nous Portal or a custom endpoint, some tools (vision, web summarization, MoA) use OpenRouter independently. An `OPENROUTER_API_KEY` enables these tools.
 
-**Codex note:** The `openai-codex` provider uses Codex CLI auth (`CODEX_HOME/auth.json`, default `~/.codex/auth.json`) and Hermes routes that provider through the Responses API transport.
-
 ---
 
 ## Configuration
@@ -139,7 +136,7 @@ All your settings are stored in `~/.hermes/` for easy access:
 ~/.hermes/
 ├── config.yaml     # Settings (model, terminal, TTS, compression, etc.)
 ├── .env            # API keys and secrets
-├── auth.json       # OAuth provider credentials (Nous Portal, OpenAI Codex)
+├── auth.json       # OAuth provider credentials (Nous Portal, etc.)
 ├── SOUL.md         # Optional: global persona (agent embodies this personality)
 ├── memories/       # Persistent memory (MEMORY.md, USER.md)
 ├── skills/         # Agent-created skills (managed via skill_manage tool)
@@ -338,7 +335,6 @@ hermes chat -q "Hello"    # Single query mode
 # Provider & model management
 hermes model              # Switch provider and model interactively
 hermes login              # Authenticate with Nous Portal (OAuth)
-hermes login --provider openai-codex
 hermes logout             # Clear stored OAuth credentials
 
 # Configuration
@@ -1410,7 +1406,7 @@ All variables go in `~/.hermes/.env`. Run `hermes config set VAR value` to set t
 **Provider Auth (OAuth):**
 | Variable | Description |
 |----------|-------------|
-| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `openrouter`, `nous`, `openai-codex` (default: `auto`) |
+| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `openrouter`, `nous` (default: `auto`) |
 | `HERMES_PORTAL_BASE_URL` | Override Nous Portal URL (for development/testing) |
 | `NOUS_INFERENCE_BASE_URL` | Override Nous inference API URL |
 | `HERMES_NOUS_MIN_KEY_TTL_SECONDS` | Min agent key TTL before re-mint (default: 1800 = 30min) |
@@ -1485,7 +1481,7 @@ All variables go in `~/.hermes/.env`. Run `hermes config set VAR value` to set t
 |------|-------------|
 | `~/.hermes/config.yaml` | Your settings |
 | `~/.hermes/.env` | API keys and secrets |
-| `~/.hermes/auth.json` | OAuth provider credentials (managed by `hermes login`, including Codex metadata) |
+| `~/.hermes/auth.json` | OAuth provider credentials (managed by `hermes login`) |
 | `~/.hermes/cron/` | Scheduled jobs data |
 | `~/.hermes/sessions/` | Gateway session data |
 | `~/.hermes/hermes-agent/` | Installation directory |
@@ -1513,11 +1509,11 @@ hermes config    # View current settings
 Common issues:
 - **"API key not set"**: Run `hermes setup` or `hermes config set OPENROUTER_API_KEY your_key`
 - **"hermes: command not found"**: Reload your shell (`source ~/.bashrc`) or check PATH
-- **"Run `hermes login` to re-authenticate"**: Your OAuth session expired. Use `hermes login` for Nous or `hermes login --provider openai-codex` for Codex.
+- **"Run `hermes login` to re-authenticate"**: Your Nous Portal session expired. Run `hermes login` to refresh.
 - **"No active paid subscription"**: Your Nous Portal account needs an active subscription for inference.
 - **Gateway won't start**: Check `hermes gateway status` and logs
 - **Missing config after update**: Run `hermes config check` to see what's new, then `hermes config migrate` to add missing options
-- **Provider auto-detection wrong**: Force a provider with `hermes chat --provider openrouter` (or `nous` / `openai-codex`) or set `HERMES_INFERENCE_PROVIDER` in `.env`
+- **Provider auto-detection wrong**: Force a provider with `hermes chat --provider openrouter` or set `HERMES_INFERENCE_PROVIDER` in `.env`
 
 ---
 

From 3ba8b15f13a9b988357c8d46e7cbd4be03ec3c5c Mon Sep 17 00:00:00 2001
From: George Pickett <gpickett00@gmail.com>
Date: Wed, 25 Feb 2026 18:25:15 -0800
Subject: [PATCH 04/31] Tone down Codex docs and prompt wording

---
 .env.example            | 15 +++------------
 cli-config.yaml.example |  1 -
 hermes_cli/main.py      |  2 +-
 hermes_cli/setup.py     |  4 +---
 4 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/.env.example b/.env.example
index 452f23eb57..ac6a187f34 100644
--- a/.env.example
+++ b/.env.example
@@ -2,14 +2,10 @@
 # Copy this file to .env and fill in your API keys
 
 # =============================================================================
-# LLM PROVIDER
+# LLM PROVIDER (OpenRouter)
 # =============================================================================
-# Provider selection override: auto | openrouter | nous | openai-codex
-# If unset, Hermes auto-detects from auth/config.
-# HERMES_INFERENCE_PROVIDER=auto
-
-# OpenRouter key (required when using OpenRouter directly, and still used by
-# some tools even when your primary chat provider is Nous/Codex/custom).
+# OpenRouter provides access to many models through one API
+# All LLM calls go through OpenRouter - no direct provider keys needed
 # Get your key at: https://openrouter.ai/keys
 OPENROUTER_API_KEY=
 
@@ -17,11 +13,6 @@ OPENROUTER_API_KEY=
 # Examples: anthropic/claude-opus-4.6, openai/gpt-4o, google/gemini-2.0-flash, zhipuai/glm-4-plus
 LLM_MODEL=anthropic/claude-opus-4.6
 
-# OpenAI Codex provider uses Codex CLI auth state:
-#   hermes login --provider openai-codex
-#   (reads CODEX_HOME/auth.json, default: ~/.codex/auth.json)
-# CODEX_HOME=~/.codex
-
 # =============================================================================
 # TOOL API KEYS
 # =============================================================================
diff --git a/cli-config.yaml.example b/cli-config.yaml.example
index d42d9db267..0b49368dc5 100644
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -13,7 +13,6 @@ model:
   #   "auto"       - Use Nous Portal if logged in, otherwise OpenRouter/env vars (default)
   #   "openrouter" - Always use OpenRouter API key from OPENROUTER_API_KEY
   #   "nous"       - Always use Nous Portal (requires: hermes login)
-  #   "openai-codex" - Always use Codex CLI auth (requires: hermes login --provider openai-codex)
   # Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
   provider: "auto"
   
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 3d1c76c005..e611f69293 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -197,7 +197,7 @@ def cmd_model(args):
     providers = [
         ("openrouter", "OpenRouter (100+ models, pay-per-use)"),
         ("nous", "Nous Portal (Nous Research subscription)"),
-        ("openai-codex", "OpenAI Codex (ChatGPT/Codex CLI login)"),
+        ("openai-codex", "OpenAI Codex"),
         ("custom", "Custom endpoint (self-hosted / VLLM / etc.)"),
     ]
 
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 08fd28dddd..021c429a6a 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -634,7 +634,7 @@ def run_setup_wizard(args):
 
     provider_choices = [
         "Login with Nous Portal (Nous Research subscription)",
-        "Login with OpenAI Codex (ChatGPT/Codex CLI auth)",
+        "Login with OpenAI Codex",
         "OpenRouter API key (100+ models, pay-per-use)",
         "Custom OpenAI-compatible endpoint (self-hosted / VLLM / etc.)",
     ]
@@ -698,8 +698,6 @@ def run_setup_wizard(args):
         selected_provider = "openai-codex"
         print()
         print_header("OpenAI Codex Login")
-        print_info("This uses your Codex CLI auth state from CODEX_HOME/auth.json.")
-        print_info("If you're not logged in, Hermes will run `codex login`.")
         print()
 
         try:

From e63986b53487b098ad144f8639e20be7d274b98e Mon Sep 17 00:00:00 2001
From: George Pickett <gpickett00@gmail.com>
Date: Wed, 25 Feb 2026 18:56:06 -0800
Subject: [PATCH 05/31] Harden Codex stream handling and ack continuation

---
 run_agent.py                            |  54 ++++++++++--
 tests/test_run_agent_codex_responses.py | 112 ++++++++++++++++++++++++
 2 files changed, 160 insertions(+), 6 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index 7a06823850..ae5c3b5deb 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -508,6 +508,7 @@ class AIAgent:
 
         action_markers = (
             "look into",
+            "look at",
             "inspect",
             "scan",
             "check",
@@ -526,11 +527,33 @@ class AIAgent:
             "report back",
             "summarize",
         )
+        workspace_markers = (
+            "directory",
+            "current directory",
+            "current dir",
+            "cwd",
+            "repo",
+            "repository",
+            "codebase",
+            "project",
+            "folder",
+            "filesystem",
+            "file tree",
+            "files",
+            "path",
+        )
 
         user_text = (user_message or "").strip().lower()
-        user_requests_action = any(marker in user_text for marker in action_markers) or "~/" in user_text or "/" in user_text
+        user_targets_workspace = (
+            any(marker in user_text for marker in workspace_markers)
+            or "~/" in user_text
+            or "/" in user_text
+        )
         assistant_mentions_action = any(marker in assistant_text for marker in action_markers)
-        return user_requests_action and assistant_mentions_action
+        assistant_targets_workspace = any(
+            marker in assistant_text for marker in workspace_markers
+        )
+        return (user_targets_workspace or assistant_targets_workspace) and assistant_mentions_action
     
     
     def _extract_reasoning(self, assistant_message) -> Optional[str]:
@@ -1499,10 +1522,29 @@ class AIAgent:
 
     def _run_codex_stream(self, api_kwargs: dict):
         """Execute one streaming Responses API request and return the final response."""
-        with self.client.responses.stream(**api_kwargs) as stream:
-            for _ in stream:
-                pass
-            return stream.get_final_response()
+        max_stream_retries = 1
+        for attempt in range(max_stream_retries + 1):
+            try:
+                with self.client.responses.stream(**api_kwargs) as stream:
+                    for _ in stream:
+                        pass
+                    return stream.get_final_response()
+            except RuntimeError as exc:
+                err_text = str(exc)
+                missing_completed = "response.completed" in err_text
+                if missing_completed and attempt < max_stream_retries:
+                    logger.debug(
+                        "Responses stream closed before completion (attempt %s/%s); retrying.",
+                        attempt + 1,
+                        max_stream_retries + 1,
+                    )
+                    continue
+                if missing_completed:
+                    logger.debug(
+                        "Responses stream did not emit response.completed; falling back to non-stream create."
+                    )
+                    return self.client.responses.create(**api_kwargs)
+                raise
 
     def _interruptible_api_call(self, api_kwargs: dict):
         """
diff --git a/tests/test_run_agent_codex_responses.py b/tests/test_run_agent_codex_responses.py
index 27723bd676..d4fd75ebf7 100644
--- a/tests/test_run_agent_codex_responses.py
+++ b/tests/test_run_agent_codex_responses.py
@@ -124,6 +124,26 @@ def _codex_ack_message_response(text: str):
     )
 
 
+class _FakeResponsesStream:
+    def __init__(self, *, final_response=None, final_error=None):
+        self._final_response = final_response
+        self._final_error = final_error
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc, tb):
+        return False
+
+    def __iter__(self):
+        return iter(())
+
+    def get_final_response(self):
+        if self._final_error is not None:
+            raise self._final_error
+        return self._final_response
+
+
 def test_api_mode_uses_explicit_provider_when_codex(monkeypatch):
     _patch_agent_bootstrap(monkeypatch)
     agent = run_agent.AIAgent(
@@ -192,6 +212,57 @@ def test_build_api_kwargs_codex(monkeypatch):
     assert "function" not in kwargs["tools"][0]
 
 
+def test_run_codex_stream_retries_when_completed_event_missing(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    calls = {"stream": 0}
+
+    def _fake_stream(**kwargs):
+        calls["stream"] += 1
+        if calls["stream"] == 1:
+            return _FakeResponsesStream(
+                final_error=RuntimeError("Didn't receive a `response.completed` event.")
+            )
+        return _FakeResponsesStream(final_response=_codex_message_response("stream ok"))
+
+    agent.client = SimpleNamespace(
+        responses=SimpleNamespace(
+            stream=_fake_stream,
+            create=lambda **kwargs: _codex_message_response("fallback"),
+        )
+    )
+
+    response = agent._run_codex_stream({"model": "gpt-5-codex"})
+    assert calls["stream"] == 2
+    assert response.output[0].content[0].text == "stream ok"
+
+
+def test_run_codex_stream_falls_back_to_create_after_stream_completion_error(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    calls = {"stream": 0, "create": 0}
+
+    def _fake_stream(**kwargs):
+        calls["stream"] += 1
+        return _FakeResponsesStream(
+            final_error=RuntimeError("Didn't receive a `response.completed` event.")
+        )
+
+    def _fake_create(**kwargs):
+        calls["create"] += 1
+        return _codex_message_response("create fallback ok")
+
+    agent.client = SimpleNamespace(
+        responses=SimpleNamespace(
+            stream=_fake_stream,
+            create=_fake_create,
+        )
+    )
+
+    response = agent._run_codex_stream({"model": "gpt-5-codex"})
+    assert calls["stream"] == 2
+    assert calls["create"] == 1
+    assert response.output[0].content[0].text == "create fallback ok"
+
+
 def test_run_conversation_codex_plain_text(monkeypatch):
     agent = _build_agent(monkeypatch)
     monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: _codex_message_response("OK"))
@@ -439,3 +510,44 @@ def test_run_conversation_codex_continues_after_ack_stop_message(monkeypatch):
         for msg in result["messages"]
     )
     assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
+
+
+def test_run_conversation_codex_continues_after_ack_for_directory_listing_prompt(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    responses = [
+        _codex_ack_message_response(
+            "I'll check what's in the current directory and call out 3 notable items."
+        ),
+        _codex_tool_call_response(),
+        _codex_message_response("Directory summary complete."),
+    ]
+    monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
+
+    def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
+        for call in assistant_message.tool_calls:
+            messages.append(
+                {
+                    "role": "tool",
+                    "tool_call_id": call.id,
+                    "content": '{"ok":true}',
+                }
+            )
+
+    monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
+
+    result = agent.run_conversation("look at current directory and list 3 notable things")
+
+    assert result["completed"] is True
+    assert result["final_response"] == "Directory summary complete."
+    assert any(
+        msg.get("role") == "assistant"
+        and msg.get("finish_reason") == "incomplete"
+        and "current directory" in (msg.get("content") or "")
+        for msg in result["messages"]
+    )
+    assert any(
+        msg.get("role") == "user"
+        and "Continue now. Execute the required tool calls" in (msg.get("content") or "")
+        for msg in result["messages"]
+    )
+    assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])

From 47f16505d2e099085908aa5a056178d323350b4e Mon Sep 17 00:00:00 2001
From: George Pickett <gpickett00@gmail.com>
Date: Wed, 25 Feb 2026 19:00:11 -0800
Subject: [PATCH 06/31] Omit optional function_call id in Responses replay
 input

---
 run_agent.py                            | 11 -----------
 tests/test_run_agent_codex_responses.py | 10 +++++-----
 2 files changed, 5 insertions(+), 16 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index ae5c3b5deb..5170178b70 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1323,16 +1323,6 @@ class AIAgent:
                                     call_id = f"call_{uuid.uuid4().hex[:12]}"
                             call_id = call_id.strip()
 
-                            response_item_id = tc.get("response_item_id")
-                            if not isinstance(response_item_id, str) or not response_item_id.strip():
-                                response_item_id = tc.get("responses_item_id")
-                            if not isinstance(response_item_id, str) or not response_item_id.strip():
-                                response_item_id = embedded_response_item_id
-                            response_item_id = self._derive_responses_function_call_id(
-                                call_id,
-                                response_item_id if isinstance(response_item_id, str) else None,
-                            )
-
                             arguments = fn.get("arguments", "{}")
                             if isinstance(arguments, dict):
                                 arguments = json.dumps(arguments, ensure_ascii=False)
@@ -1342,7 +1332,6 @@ class AIAgent:
 
                             items.append({
                                 "type": "function_call",
-                                "id": response_item_id,
                                 "call_id": call_id,
                                 "name": fn_name,
                                 "arguments": arguments,
diff --git a/tests/test_run_agent_codex_responses.py b/tests/test_run_agent_codex_responses.py
index d4fd75ebf7..7121cc5720 100644
--- a/tests/test_run_agent_codex_responses.py
+++ b/tests/test_run_agent_codex_responses.py
@@ -300,7 +300,7 @@ def test_run_conversation_codex_tool_round_trip(monkeypatch):
     assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
 
 
-def test_chat_messages_to_responses_input_uses_fc_id_for_function_call(monkeypatch):
+def test_chat_messages_to_responses_input_uses_call_id_for_function_call(monkeypatch):
     agent = _build_agent(monkeypatch)
     items = agent._chat_messages_to_responses_input(
         [
@@ -324,7 +324,7 @@ def test_chat_messages_to_responses_input_uses_fc_id_for_function_call(monkeypat
     function_output = next(item for item in items if item.get("type") == "function_call_output")
 
     assert function_call["call_id"] == "call_abc123"
-    assert function_call["id"] == "fc_abc123"
+    assert "id" not in function_call
     assert function_output["call_id"] == "call_abc123"
 
 
@@ -352,11 +352,11 @@ def test_chat_messages_to_responses_input_accepts_call_pipe_fc_ids(monkeypatch):
     function_output = next(item for item in items if item.get("type") == "function_call_output")
 
     assert function_call["call_id"] == "call_pair123"
-    assert function_call["id"] == "fc_pair123"
+    assert "id" not in function_call
     assert function_output["call_id"] == "call_pair123"
 
 
-def test_run_conversation_codex_replay_payload_keeps_call_id_and_fc_id(monkeypatch):
+def test_run_conversation_codex_replay_payload_keeps_call_id(monkeypatch):
     agent = _build_agent(monkeypatch)
     responses = [_codex_tool_call_response(), _codex_message_response("done")]
     requests = []
@@ -389,7 +389,7 @@ def test_run_conversation_codex_replay_payload_keeps_call_id_and_fc_id(monkeypat
     function_call = next(item for item in replay_input if item.get("type") == "function_call")
     function_output = next(item for item in replay_input if item.get("type") == "function_call_output")
     assert function_call["call_id"] == "call_1"
-    assert function_call["id"] == "fc_1"
+    assert "id" not in function_call
     assert function_output["call_id"] == "call_1"
 
 

From 91bdb9eb2d8e93a1aa37029b72da14b7b8fceebd Mon Sep 17 00:00:00 2001
From: George Pickett <gpickett00@gmail.com>
Date: Wed, 25 Feb 2026 19:08:11 -0800
Subject: [PATCH 07/31] Fix Codex stream fallback for Responses completion gaps

---
 run_agent.py                            | 42 ++++++++++++++++++++--
 tests/test_run_agent_codex_responses.py | 48 +++++++++++++++++++++++++
 2 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index 5170178b70..6d69374012 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1530,11 +1530,49 @@ class AIAgent:
                     continue
                 if missing_completed:
                     logger.debug(
-                        "Responses stream did not emit response.completed; falling back to non-stream create."
+                        "Responses stream did not emit response.completed; falling back to create(stream=True)."
                     )
-                    return self.client.responses.create(**api_kwargs)
+                    return self._run_codex_create_stream_fallback(api_kwargs)
                 raise
 
+    def _run_codex_create_stream_fallback(self, api_kwargs: dict):
+        """Fallback path for stream completion edge cases on Codex-style Responses backends."""
+        fallback_kwargs = dict(api_kwargs)
+        fallback_kwargs["stream"] = True
+        stream_or_response = self.client.responses.create(**fallback_kwargs)
+
+        # Compatibility shim for mocks or providers that still return a concrete response.
+        if hasattr(stream_or_response, "output"):
+            return stream_or_response
+        if not hasattr(stream_or_response, "__iter__"):
+            return stream_or_response
+
+        terminal_response = None
+        try:
+            for event in stream_or_response:
+                event_type = getattr(event, "type", None)
+                if not event_type and isinstance(event, dict):
+                    event_type = event.get("type")
+                if event_type not in {"response.completed", "response.incomplete", "response.failed"}:
+                    continue
+
+                terminal_response = getattr(event, "response", None)
+                if terminal_response is None and isinstance(event, dict):
+                    terminal_response = event.get("response")
+                if terminal_response is not None:
+                    return terminal_response
+        finally:
+            close_fn = getattr(stream_or_response, "close", None)
+            if callable(close_fn):
+                try:
+                    close_fn()
+                except Exception:
+                    pass
+
+        if terminal_response is not None:
+            return terminal_response
+        raise RuntimeError("Responses create(stream=True) fallback did not emit a terminal response.")
+
     def _interruptible_api_call(self, api_kwargs: dict):
         """
         Run the API call in a background thread so the main conversation loop
diff --git a/tests/test_run_agent_codex_responses.py b/tests/test_run_agent_codex_responses.py
index 7121cc5720..fc7c619802 100644
--- a/tests/test_run_agent_codex_responses.py
+++ b/tests/test_run_agent_codex_responses.py
@@ -144,6 +144,18 @@ class _FakeResponsesStream:
         return self._final_response
 
 
+class _FakeCreateStream:
+    def __init__(self, events):
+        self._events = list(events)
+        self.closed = False
+
+    def __iter__(self):
+        return iter(self._events)
+
+    def close(self):
+        self.closed = True
+
+
 def test_api_mode_uses_explicit_provider_when_codex(monkeypatch):
     _patch_agent_bootstrap(monkeypatch)
     agent = run_agent.AIAgent(
@@ -263,6 +275,42 @@ def test_run_codex_stream_falls_back_to_create_after_stream_completion_error(mon
     assert response.output[0].content[0].text == "create fallback ok"
 
 
+def test_run_codex_stream_fallback_parses_create_stream_events(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    calls = {"stream": 0, "create": 0}
+    create_stream = _FakeCreateStream(
+        [
+            SimpleNamespace(type="response.created"),
+            SimpleNamespace(type="response.in_progress"),
+            SimpleNamespace(type="response.completed", response=_codex_message_response("streamed create ok")),
+        ]
+    )
+
+    def _fake_stream(**kwargs):
+        calls["stream"] += 1
+        return _FakeResponsesStream(
+            final_error=RuntimeError("Didn't receive a `response.completed` event.")
+        )
+
+    def _fake_create(**kwargs):
+        calls["create"] += 1
+        assert kwargs.get("stream") is True
+        return create_stream
+
+    agent.client = SimpleNamespace(
+        responses=SimpleNamespace(
+            stream=_fake_stream,
+            create=_fake_create,
+        )
+    )
+
+    response = agent._run_codex_stream({"model": "gpt-5-codex"})
+    assert calls["stream"] == 2
+    assert calls["create"] == 1
+    assert create_stream.closed is True
+    assert response.output[0].content[0].text == "streamed create ok"
+
+
 def test_run_conversation_codex_plain_text(monkeypatch):
     agent = _build_agent(monkeypatch)
     monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: _codex_message_response("OK"))

From 74c662b63a8c559ebb9638a42bd5195d4fc726f4 Mon Sep 17 00:00:00 2001
From: George Pickett <gpickett00@gmail.com>
Date: Wed, 25 Feb 2026 19:27:54 -0800
Subject: [PATCH 08/31] Harden Codex auth refresh and responses compatibility

---
 hermes_cli/auth.py                      | 227 +++++++++++++++++++++++-
 hermes_cli/codex_models.py              |  91 ++++++++++
 hermes_cli/main.py                      |  10 +-
 hermes_cli/setup.py                     |   8 +-
 run_agent.py                            | 224 ++++++++++++++++++++++-
 tests/test_auth_codex_provider.py       | 105 +++++++++++
 tests/test_codex_execution_paths.py     | 175 ++++++++++++++++++
 tests/test_codex_models.py              |  40 +++++
 tests/test_run_agent_codex_responses.py | 138 +++++++++++++-
 9 files changed, 996 insertions(+), 22 deletions(-)
 create mode 100644 hermes_cli/codex_models.py
 create mode 100644 tests/test_codex_execution_paths.py
 create mode 100644 tests/test_codex_models.py

diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 328b84f145..864916b32f 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -20,6 +20,7 @@ import logging
 import os
 import shutil
 import stat
+import base64
 import subprocess
 import time
 import webbrowser
@@ -58,6 +59,9 @@ DEFAULT_AGENT_KEY_MIN_TTL_SECONDS = 30 * 60  # 30 minutes
 ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120       # refresh 2 min before expiry
 DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS = 1     # poll at most every 1s
 DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
+CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
+CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
+CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
 
 
 # =============================================================================
@@ -380,6 +384,27 @@ def _optional_base_url(value: Any) -> Optional[str]:
     return cleaned if cleaned else None
 
 
+def _decode_jwt_claims(token: Any) -> Dict[str, Any]:
+    if not isinstance(token, str) or token.count(".") != 2:
+        return {}
+    payload = token.split(".")[1]
+    payload += "=" * ((4 - len(payload) % 4) % 4)
+    try:
+        raw = base64.urlsafe_b64decode(payload.encode("utf-8"))
+        claims = json.loads(raw.decode("utf-8"))
+    except Exception:
+        return {}
+    return claims if isinstance(claims, dict) else {}
+
+
+def _codex_access_token_is_expiring(access_token: Any, skew_seconds: int) -> bool:
+    claims = _decode_jwt_claims(access_token)
+    exp = claims.get("exp")
+    if not isinstance(exp, (int, float)):
+        return False
+    return float(exp) <= (time.time() + max(0, int(skew_seconds)))
+
+
 # =============================================================================
 # SSH / remote session detection
 # =============================================================================
@@ -405,6 +430,39 @@ def _codex_auth_file_path() -> Path:
     return resolve_codex_home_path() / "auth.json"
 
 
+def _codex_auth_lock_path(auth_path: Path) -> Path:
+    return auth_path.with_suffix(auth_path.suffix + ".lock")
+
+
+@contextmanager
+def _codex_auth_file_lock(
+    auth_path: Path,
+    timeout_seconds: float = AUTH_LOCK_TIMEOUT_SECONDS,
+):
+    lock_path = _codex_auth_lock_path(auth_path)
+    lock_path.parent.mkdir(parents=True, exist_ok=True)
+
+    with lock_path.open("a+") as lock_file:
+        if fcntl is None:
+            yield
+            return
+
+        deadline = time.time() + max(1.0, timeout_seconds)
+        while True:
+            try:
+                fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+                break
+            except BlockingIOError:
+                if time.time() >= deadline:
+                    raise TimeoutError(f"Timed out waiting for Codex auth lock: {lock_path}")
+                time.sleep(0.05)
+
+        try:
+            yield
+        finally:
+            fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
+
+
 def read_codex_auth_file() -> Dict[str, Any]:
     """Read and validate Codex auth.json shape."""
     codex_home = resolve_codex_home_path()
@@ -469,11 +527,172 @@ def read_codex_auth_file() -> Dict[str, Any]:
     }
 
 
-def resolve_codex_runtime_credentials() -> Dict[str, Any]:
+def _persist_codex_auth_payload(
+    auth_path: Path,
+    payload: Dict[str, Any],
+    *,
+    lock_held: bool = False,
+) -> None:
+    auth_path.parent.mkdir(parents=True, exist_ok=True)
+
+    def _write() -> None:
+        serialized = json.dumps(payload, indent=2, ensure_ascii=False) + "\n"
+        tmp_path = auth_path.parent / f".{auth_path.name}.{os.getpid()}.{time.time_ns()}.tmp"
+        try:
+            with tmp_path.open("w", encoding="utf-8") as tmp_file:
+                tmp_file.write(serialized)
+                tmp_file.flush()
+                os.fsync(tmp_file.fileno())
+            os.replace(tmp_path, auth_path)
+        finally:
+            if tmp_path.exists():
+                try:
+                    tmp_path.unlink()
+                except OSError:
+                    pass
+
+        try:
+            auth_path.chmod(stat.S_IRUSR | stat.S_IWUSR)
+        except OSError:
+            pass
+
+    if lock_held:
+        _write()
+        return
+
+    with _codex_auth_file_lock(auth_path):
+        _write()
+
+
+def _refresh_codex_auth_tokens(
+    *,
+    payload: Dict[str, Any],
+    auth_path: Path,
+    timeout_seconds: float,
+    lock_held: bool = False,
+) -> Dict[str, Any]:
+    tokens = payload.get("tokens")
+    if not isinstance(tokens, dict):
+        raise AuthError(
+            "Codex auth file is missing a valid 'tokens' object.",
+            provider="openai-codex",
+            code="codex_auth_invalid_shape",
+            relogin_required=True,
+        )
+
+    refresh_token = tokens.get("refresh_token")
+    if not isinstance(refresh_token, str) or not refresh_token.strip():
+        raise AuthError(
+            "Codex auth file is missing tokens.refresh_token.",
+            provider="openai-codex",
+            code="codex_auth_missing_refresh_token",
+            relogin_required=True,
+        )
+
+    timeout = httpx.Timeout(max(5.0, float(timeout_seconds)))
+    with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}) as client:
+        response = client.post(
+            CODEX_OAUTH_TOKEN_URL,
+            headers={"Content-Type": "application/x-www-form-urlencoded"},
+            data={
+                "grant_type": "refresh_token",
+                "refresh_token": refresh_token,
+                "client_id": CODEX_OAUTH_CLIENT_ID,
+            },
+        )
+
+    if response.status_code != 200:
+        code = "codex_refresh_failed"
+        message = f"Codex token refresh failed with status {response.status_code}."
+        relogin_required = False
+        try:
+            err = response.json()
+            if isinstance(err, dict):
+                err_code = err.get("error")
+                if isinstance(err_code, str) and err_code.strip():
+                    code = err_code.strip()
+                err_desc = err.get("error_description") or err.get("message")
+                if isinstance(err_desc, str) and err_desc.strip():
+                    message = f"Codex token refresh failed: {err_desc.strip()}"
+        except Exception:
+            pass
+        if code in {"invalid_grant", "invalid_token", "invalid_request"}:
+            relogin_required = True
+        raise AuthError(
+            message,
+            provider="openai-codex",
+            code=code,
+            relogin_required=relogin_required,
+        )
+
+    try:
+        refresh_payload = response.json()
+    except Exception as exc:
+        raise AuthError(
+            "Codex token refresh returned invalid JSON.",
+            provider="openai-codex",
+            code="codex_refresh_invalid_json",
+            relogin_required=True,
+        ) from exc
+
+    access_token = refresh_payload.get("access_token")
+    if not isinstance(access_token, str) or not access_token.strip():
+        raise AuthError(
+            "Codex token refresh response was missing access_token.",
+            provider="openai-codex",
+            code="codex_refresh_missing_access_token",
+            relogin_required=True,
+        )
+
+    updated_tokens = dict(tokens)
+    updated_tokens["access_token"] = access_token.strip()
+    next_refresh = refresh_payload.get("refresh_token")
+    if isinstance(next_refresh, str) and next_refresh.strip():
+        updated_tokens["refresh_token"] = next_refresh.strip()
+    payload["tokens"] = updated_tokens
+    payload["last_refresh"] = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
+    _persist_codex_auth_payload(auth_path, payload, lock_held=lock_held)
+    return updated_tokens
+
+
+def resolve_codex_runtime_credentials(
+    *,
+    force_refresh: bool = False,
+    refresh_if_expiring: bool = True,
+    refresh_skew_seconds: int = CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
+) -> Dict[str, Any]:
     """Resolve runtime credentials from Codex CLI auth state."""
     data = read_codex_auth_file()
     payload = data["payload"]
-    tokens = data["tokens"]
+    tokens = dict(data["tokens"])
+    auth_path = data["auth_path"]
+    access_token = str(tokens.get("access_token", "") or "").strip()
+    refresh_timeout_seconds = float(os.getenv("HERMES_CODEX_REFRESH_TIMEOUT_SECONDS", "20"))
+
+    should_refresh = bool(force_refresh)
+    if (not should_refresh) and refresh_if_expiring:
+        should_refresh = _codex_access_token_is_expiring(access_token, refresh_skew_seconds)
+    if should_refresh:
+        lock_timeout = max(float(AUTH_LOCK_TIMEOUT_SECONDS), refresh_timeout_seconds + 5.0)
+        with _codex_auth_file_lock(auth_path, timeout_seconds=lock_timeout):
+            data = read_codex_auth_file()
+            payload = data["payload"]
+            tokens = dict(data["tokens"])
+            access_token = str(tokens.get("access_token", "") or "").strip()
+
+            should_refresh = bool(force_refresh)
+            if (not should_refresh) and refresh_if_expiring:
+                should_refresh = _codex_access_token_is_expiring(access_token, refresh_skew_seconds)
+
+            if should_refresh:
+                tokens = _refresh_codex_auth_tokens(
+                    payload=payload,
+                    auth_path=auth_path,
+                    timeout_seconds=refresh_timeout_seconds,
+                    lock_held=True,
+                )
+                access_token = str(tokens.get("access_token", "") or "").strip()
+
     base_url = (
         os.getenv("HERMES_CODEX_BASE_URL", "").strip().rstrip("/")
         or DEFAULT_CODEX_BASE_URL
@@ -482,11 +701,11 @@ def resolve_codex_runtime_credentials() -> Dict[str, Any]:
     return {
         "provider": "openai-codex",
         "base_url": base_url,
-        "api_key": tokens["access_token"],
+        "api_key": access_token,
         "source": "codex-auth-json",
         "last_refresh": payload.get("last_refresh"),
         "auth_mode": payload.get("auth_mode"),
-        "auth_file": str(data["auth_path"]),
+        "auth_file": str(auth_path),
         "codex_home": str(data["codex_home"]),
     }
 
diff --git a/hermes_cli/codex_models.py b/hermes_cli/codex_models.py
new file mode 100644
index 0000000000..ed1009c546
--- /dev/null
+++ b/hermes_cli/codex_models.py
@@ -0,0 +1,91 @@
+"""Codex model discovery from local Codex CLI cache/config."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import List, Optional
+
+from hermes_cli.auth import resolve_codex_home_path
+
+DEFAULT_CODEX_MODELS: List[str] = [
+    "gpt-5-codex",
+    "gpt-5.3-codex",
+    "gpt-5.2-codex",
+    "gpt-5.1-codex",
+]
+
+
+def _read_default_model(codex_home: Path) -> Optional[str]:
+    config_path = codex_home / "config.toml"
+    if not config_path.exists():
+        return None
+    try:
+        import tomllib
+    except Exception:
+        return None
+    try:
+        payload = tomllib.loads(config_path.read_text(encoding="utf-8"))
+    except Exception:
+        return None
+    model = payload.get("model") if isinstance(payload, dict) else None
+    if isinstance(model, str) and model.strip():
+        return model.strip()
+    return None
+
+
+def _read_cache_models(codex_home: Path) -> List[str]:
+    cache_path = codex_home / "models_cache.json"
+    if not cache_path.exists():
+        return []
+    try:
+        raw = json.loads(cache_path.read_text(encoding="utf-8"))
+    except Exception:
+        return []
+
+    entries = raw.get("models") if isinstance(raw, dict) else None
+    sortable = []
+    if isinstance(entries, list):
+        for item in entries:
+            if not isinstance(item, dict):
+                continue
+            slug = item.get("slug")
+            if not isinstance(slug, str) or not slug.strip():
+                continue
+            slug = slug.strip()
+            if "codex" not in slug.lower():
+                continue
+            if item.get("supported_in_api") is False:
+                continue
+            visibility = item.get("visibility")
+            if isinstance(visibility, str) and visibility.strip().lower() == "hidden":
+                continue
+            priority = item.get("priority")
+            rank = int(priority) if isinstance(priority, (int, float)) else 10_000
+            sortable.append((rank, slug))
+
+    sortable.sort(key=lambda item: (item[0], item[1]))
+    deduped: List[str] = []
+    for _, slug in sortable:
+        if slug not in deduped:
+            deduped.append(slug)
+    return deduped
+
+
+def get_codex_model_ids() -> List[str]:
+    codex_home = resolve_codex_home_path()
+    ordered: List[str] = []
+
+    default_model = _read_default_model(codex_home)
+    if default_model:
+        ordered.append(default_model)
+
+    for model_id in _read_cache_models(codex_home):
+        if model_id not in ordered:
+            ordered.append(model_id)
+
+    for model_id in DEFAULT_CODEX_MODELS:
+        if model_id not in ordered:
+            ordered.append(model_id)
+
+    return ordered
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index e611f69293..0fe8116210 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -385,6 +385,7 @@ def _model_flow_openai_codex(config, current_model=""):
         _update_config_for_provider, _login_openai_codex,
         PROVIDER_REGISTRY, DEFAULT_CODEX_BASE_URL,
     )
+    from hermes_cli.codex_models import get_codex_model_ids
     from hermes_cli.config import get_env_value, save_env_value
     import argparse
 
@@ -402,14 +403,7 @@ def _model_flow_openai_codex(config, current_model=""):
             print(f"Login failed: {exc}")
             return
 
-    # Codex models are not discoverable through /models with this auth path,
-    # so provide curated IDs with custom fallback.
-    codex_models = [
-        "gpt-5-codex",
-        "gpt-5.3-codex",
-        "gpt-5.2-codex",
-        "gpt-5.1-codex",
-    ]
+    codex_models = get_codex_model_ids()
 
     selected = _prompt_model_selection(codex_models, current_model=current_model)
     if selected:
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 021c429a6a..035d5167cd 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -826,12 +826,8 @@ def run_setup_wizard(args):
                     save_env_value("LLM_MODEL", custom)
             # else: keep current
         elif selected_provider == "openai-codex":
-            codex_models = [
-                "gpt-5-codex",
-                "gpt-5.3-codex",
-                "gpt-5.2-codex",
-                "gpt-5.1-codex",
-            ]
+            from hermes_cli.codex_models import get_codex_model_ids
+            codex_models = get_codex_model_ids()
             model_choices = [f"{m}" for m in codex_models]
             model_choices.append("Custom model")
             model_choices.append(f"Keep current ({current_model})")
diff --git a/run_agent.py b/run_agent.py
index 6d69374012..6adc5b1a89 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1357,6 +1357,175 @@ class AIAgent:
 
         return items
 
+    def _preflight_codex_input_items(self, raw_items: Any) -> List[Dict[str, Any]]:
+        if not isinstance(raw_items, list):
+            raise ValueError("Codex Responses input must be a list of input items.")
+
+        normalized: List[Dict[str, Any]] = []
+        for idx, item in enumerate(raw_items):
+            if not isinstance(item, dict):
+                raise ValueError(f"Codex Responses input[{idx}] must be an object.")
+
+            item_type = item.get("type")
+            if item_type == "function_call":
+                call_id = item.get("call_id")
+                name = item.get("name")
+                if not isinstance(call_id, str) or not call_id.strip():
+                    raise ValueError(f"Codex Responses input[{idx}] function_call is missing call_id.")
+                if not isinstance(name, str) or not name.strip():
+                    raise ValueError(f"Codex Responses input[{idx}] function_call is missing name.")
+
+                arguments = item.get("arguments", "{}")
+                if isinstance(arguments, dict):
+                    arguments = json.dumps(arguments, ensure_ascii=False)
+                elif not isinstance(arguments, str):
+                    arguments = str(arguments)
+                arguments = arguments.strip() or "{}"
+
+                normalized.append(
+                    {
+                        "type": "function_call",
+                        "call_id": call_id.strip(),
+                        "name": name.strip(),
+                        "arguments": arguments,
+                    }
+                )
+                continue
+
+            if item_type == "function_call_output":
+                call_id = item.get("call_id")
+                if not isinstance(call_id, str) or not call_id.strip():
+                    raise ValueError(f"Codex Responses input[{idx}] function_call_output is missing call_id.")
+                output = item.get("output", "")
+                if output is None:
+                    output = ""
+                if not isinstance(output, str):
+                    output = str(output)
+
+                normalized.append(
+                    {
+                        "type": "function_call_output",
+                        "call_id": call_id.strip(),
+                        "output": output,
+                    }
+                )
+                continue
+
+            role = item.get("role")
+            if role in {"user", "assistant"}:
+                content = item.get("content", "")
+                if content is None:
+                    content = ""
+                if not isinstance(content, str):
+                    content = str(content)
+
+                normalized.append({"role": role, "content": content})
+                continue
+
+            raise ValueError(
+                f"Codex Responses input[{idx}] has unsupported item shape (type={item_type!r}, role={role!r})."
+            )
+
+        return normalized
+
+    def _preflight_codex_api_kwargs(
+        self,
+        api_kwargs: Any,
+        *,
+        allow_stream: bool = False,
+    ) -> Dict[str, Any]:
+        if not isinstance(api_kwargs, dict):
+            raise ValueError("Codex Responses request must be a dict.")
+
+        required = {"model", "instructions", "input"}
+        missing = [key for key in required if key not in api_kwargs]
+        if missing:
+            raise ValueError(f"Codex Responses request missing required field(s): {', '.join(sorted(missing))}.")
+
+        model = api_kwargs.get("model")
+        if not isinstance(model, str) or not model.strip():
+            raise ValueError("Codex Responses request 'model' must be a non-empty string.")
+        model = model.strip()
+
+        instructions = api_kwargs.get("instructions")
+        if instructions is None:
+            instructions = ""
+        if not isinstance(instructions, str):
+            instructions = str(instructions)
+        instructions = instructions.strip() or DEFAULT_AGENT_IDENTITY
+
+        normalized_input = self._preflight_codex_input_items(api_kwargs.get("input"))
+
+        tools = api_kwargs.get("tools")
+        normalized_tools = None
+        if tools is not None:
+            if not isinstance(tools, list):
+                raise ValueError("Codex Responses request 'tools' must be a list when provided.")
+            normalized_tools = []
+            for idx, tool in enumerate(tools):
+                if not isinstance(tool, dict):
+                    raise ValueError(f"Codex Responses tools[{idx}] must be an object.")
+                if tool.get("type") != "function":
+                    raise ValueError(f"Codex Responses tools[{idx}] has unsupported type {tool.get('type')!r}.")
+
+                name = tool.get("name")
+                parameters = tool.get("parameters")
+                if not isinstance(name, str) or not name.strip():
+                    raise ValueError(f"Codex Responses tools[{idx}] is missing a valid name.")
+                if not isinstance(parameters, dict):
+                    raise ValueError(f"Codex Responses tools[{idx}] is missing valid parameters.")
+
+                description = tool.get("description", "")
+                if description is None:
+                    description = ""
+                if not isinstance(description, str):
+                    description = str(description)
+
+                strict = tool.get("strict", False)
+                if not isinstance(strict, bool):
+                    strict = bool(strict)
+
+                normalized_tools.append(
+                    {
+                        "type": "function",
+                        "name": name.strip(),
+                        "description": description,
+                        "strict": strict,
+                        "parameters": parameters,
+                    }
+                )
+
+        store = api_kwargs.get("store", False)
+        if store is not False:
+            raise ValueError("Codex Responses contract requires 'store' to be false.")
+
+        allowed_keys = {"model", "instructions", "input", "tools", "store"}
+        normalized: Dict[str, Any] = {
+            "model": model,
+            "instructions": instructions,
+            "input": normalized_input,
+            "tools": normalized_tools,
+            "store": False,
+        }
+
+        if allow_stream:
+            stream = api_kwargs.get("stream")
+            if stream is not None and stream is not True:
+                raise ValueError("Codex Responses 'stream' must be true when set.")
+            if stream is True:
+                normalized["stream"] = True
+            allowed_keys.add("stream")
+        elif "stream" in api_kwargs:
+            raise ValueError("Codex Responses stream flag is only allowed in fallback streaming requests.")
+
+        unexpected = sorted(key for key in api_kwargs.keys() if key not in allowed_keys)
+        if unexpected:
+            raise ValueError(
+                f"Codex Responses request has unsupported field(s): {', '.join(unexpected)}."
+            )
+
+        return normalized
+
     def _extract_responses_message_text(self, item: Any) -> str:
         """Extract assistant text from a Responses message output item."""
         content = getattr(item, "content", None)
@@ -1511,6 +1680,7 @@ class AIAgent:
 
     def _run_codex_stream(self, api_kwargs: dict):
         """Execute one streaming Responses API request and return the final response."""
+        api_kwargs = self._preflight_codex_api_kwargs(api_kwargs, allow_stream=False)
         max_stream_retries = 1
         for attempt in range(max_stream_retries + 1):
             try:
@@ -1539,6 +1709,7 @@ class AIAgent:
         """Fallback path for stream completion edge cases on Codex-style Responses backends."""
         fallback_kwargs = dict(api_kwargs)
         fallback_kwargs["stream"] = True
+        fallback_kwargs = self._preflight_codex_api_kwargs(fallback_kwargs, allow_stream=True)
         stream_or_response = self.client.responses.create(**fallback_kwargs)
 
         # Compatibility shim for mocks or providers that still return a concrete response.
@@ -1573,6 +1744,43 @@ class AIAgent:
             return terminal_response
         raise RuntimeError("Responses create(stream=True) fallback did not emit a terminal response.")
 
+    def _try_refresh_codex_client_credentials(self, *, force: bool = True) -> bool:
+        if self.api_mode != "codex_responses" or self.provider != "openai-codex":
+            return False
+
+        try:
+            from hermes_cli.auth import resolve_codex_runtime_credentials
+
+            creds = resolve_codex_runtime_credentials(force_refresh=force)
+        except Exception as exc:
+            logger.debug("Codex credential refresh failed: %s", exc)
+            return False
+
+        api_key = creds.get("api_key")
+        base_url = creds.get("base_url")
+        if not isinstance(api_key, str) or not api_key.strip():
+            return False
+        if not isinstance(base_url, str) or not base_url.strip():
+            return False
+
+        self.api_key = api_key.strip()
+        self.base_url = base_url.strip().rstrip("/")
+        self._client_kwargs["api_key"] = self.api_key
+        self._client_kwargs["base_url"] = self.base_url
+
+        try:
+            self.client.close()
+        except Exception:
+            pass
+
+        try:
+            self.client = OpenAI(**self._client_kwargs)
+        except Exception as exc:
+            logger.warning("Failed to rebuild OpenAI client after Codex refresh: %s", exc)
+            return False
+
+        return True
+
     def _interruptible_api_call(self, api_kwargs: dict):
         """
         Run the API call in a background thread so the main conversation loop
@@ -2364,12 +2572,15 @@ class AIAgent:
             api_start_time = time.time()
             retry_count = 0
             max_retries = 6  # Increased to allow longer backoff periods
+            codex_auth_retry_attempted = False
 
             finish_reason = "stop"
 
             while retry_count <= max_retries:
                 try:
                     api_kwargs = self._build_api_kwargs(api_messages)
+                    if self.api_mode == "codex_responses":
+                        api_kwargs = self._preflight_codex_api_kwargs(api_kwargs, allow_stream=False)
 
                     if os.getenv("HERMES_DUMP_REQUESTS", "").strip().lower() in {"1", "true", "yes", "on"}:
                         self._dump_api_request_debug(api_kwargs, reason="preflight")
@@ -2586,6 +2797,18 @@ class AIAgent:
                     if thinking_spinner:
                         thinking_spinner.stop(f"(╥_╥) error, retrying...")
                         thinking_spinner = None
+
+                    status_code = getattr(api_error, "status_code", None)
+                    if (
+                        self.api_mode == "codex_responses"
+                        and self.provider == "openai-codex"
+                        and status_code == 401
+                        and not codex_auth_retry_attempted
+                    ):
+                        codex_auth_retry_attempted = True
+                        if self._try_refresh_codex_client_credentials(force=True):
+                            print(f"{self.log_prefix}🔐 Codex auth refreshed after 401. Retrying request...")
+                            continue
                     
                     retry_count += 1
                     elapsed_time = time.time() - api_start_time
@@ -2614,7 +2837,6 @@ class AIAgent:
                     # Check for non-retryable client errors (4xx HTTP status codes).
                     # These indicate a problem with the request itself (bad model ID,
                     # invalid API key, forbidden, etc.) and will never succeed on retry.
-                    status_code = getattr(api_error, "status_code", None)
                     is_client_status_error = isinstance(status_code, int) and 400 <= status_code < 500
                     is_client_error = is_client_status_error or any(phrase in error_msg for phrase in [
                         'error code: 400', 'error code: 401', 'error code: 403',
diff --git a/tests/test_auth_codex_provider.py b/tests/test_auth_codex_provider.py
index eaca52aac9..de490754c2 100644
--- a/tests/test_auth_codex_provider.py
+++ b/tests/test_auth_codex_provider.py
@@ -1,4 +1,7 @@
 import json
+import time
+import base64
+from contextlib import contextmanager
 from pathlib import Path
 from types import SimpleNamespace
 
@@ -9,6 +12,7 @@ from hermes_cli.auth import (
     AuthError,
     DEFAULT_CODEX_BASE_URL,
     PROVIDER_REGISTRY,
+    _persist_codex_auth_payload,
     _login_openai_codex,
     login_command,
     get_codex_auth_status,
@@ -37,6 +41,12 @@ def _write_codex_auth(codex_home: Path, *, access_token: str = "access", refresh
     return auth_file
 
 
+def _jwt_with_exp(exp_epoch: int) -> str:
+    payload = {"exp": exp_epoch}
+    encoded = base64.urlsafe_b64encode(json.dumps(payload).encode("utf-8")).rstrip(b"=").decode("utf-8")
+    return f"h.{encoded}.s"
+
+
 def test_read_codex_auth_file_success(tmp_path, monkeypatch):
     codex_home = tmp_path / "codex-home"
     auth_file = _write_codex_auth(codex_home)
@@ -61,12 +71,107 @@ def test_resolve_codex_runtime_credentials_missing_access_token(tmp_path, monkey
     assert exc.value.relogin_required is True
 
 
+def test_resolve_codex_runtime_credentials_refreshes_expiring_token(tmp_path, monkeypatch):
+    codex_home = tmp_path / "codex-home"
+    expiring_token = _jwt_with_exp(int(time.time()) - 10)
+    _write_codex_auth(codex_home, access_token=expiring_token, refresh_token="refresh-old")
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    called = {"count": 0}
+
+    def _fake_refresh(*, payload, auth_path, timeout_seconds, lock_held=False):
+        called["count"] += 1
+        assert auth_path == codex_home / "auth.json"
+        assert lock_held is True
+        return {"access_token": "access-new", "refresh_token": "refresh-new"}
+
+    monkeypatch.setattr("hermes_cli.auth._refresh_codex_auth_tokens", _fake_refresh)
+
+    resolved = resolve_codex_runtime_credentials()
+
+    assert called["count"] == 1
+    assert resolved["api_key"] == "access-new"
+
+
+def test_resolve_codex_runtime_credentials_force_refresh(tmp_path, monkeypatch):
+    codex_home = tmp_path / "codex-home"
+    _write_codex_auth(codex_home, access_token="access-current", refresh_token="refresh-old")
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    called = {"count": 0}
+
+    def _fake_refresh(*, payload, auth_path, timeout_seconds, lock_held=False):
+        called["count"] += 1
+        assert lock_held is True
+        return {"access_token": "access-forced", "refresh_token": "refresh-new"}
+
+    monkeypatch.setattr("hermes_cli.auth._refresh_codex_auth_tokens", _fake_refresh)
+
+    resolved = resolve_codex_runtime_credentials(force_refresh=True, refresh_if_expiring=False)
+
+    assert called["count"] == 1
+    assert resolved["api_key"] == "access-forced"
+
+
+def test_resolve_codex_runtime_credentials_uses_file_lock_on_refresh(tmp_path, monkeypatch):
+    codex_home = tmp_path / "codex-home"
+    _write_codex_auth(codex_home, access_token="access-current", refresh_token="refresh-old")
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    lock_calls = {"enter": 0, "exit": 0}
+
+    @contextmanager
+    def _fake_lock(auth_path, timeout_seconds=15.0):
+        assert auth_path == codex_home / "auth.json"
+        lock_calls["enter"] += 1
+        try:
+            yield
+        finally:
+            lock_calls["exit"] += 1
+
+    refresh_calls = {"count": 0}
+
+    def _fake_refresh(*, payload, auth_path, timeout_seconds, lock_held=False):
+        refresh_calls["count"] += 1
+        assert lock_held is True
+        return {"access_token": "access-updated", "refresh_token": "refresh-updated"}
+
+    monkeypatch.setattr("hermes_cli.auth._codex_auth_file_lock", _fake_lock)
+    monkeypatch.setattr("hermes_cli.auth._refresh_codex_auth_tokens", _fake_refresh)
+
+    resolved = resolve_codex_runtime_credentials(force_refresh=True, refresh_if_expiring=False)
+
+    assert refresh_calls["count"] == 1
+    assert lock_calls["enter"] == 1
+    assert lock_calls["exit"] == 1
+    assert resolved["api_key"] == "access-updated"
+
+
 def test_resolve_provider_explicit_codex_does_not_fallback(monkeypatch):
     monkeypatch.delenv("OPENAI_API_KEY", raising=False)
     monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
     assert resolve_provider("openai-codex") == "openai-codex"
 
 
+def test_persist_codex_auth_payload_writes_atomically(tmp_path):
+    auth_path = tmp_path / "auth.json"
+    auth_path.write_text('{"stale":true}\n')
+    payload = {
+        "auth_mode": "oauth",
+        "tokens": {
+            "access_token": "next-access",
+            "refresh_token": "next-refresh",
+        },
+        "last_refresh": "2026-02-26T00:00:00Z",
+    }
+
+    _persist_codex_auth_payload(auth_path, payload)
+
+    stored = json.loads(auth_path.read_text())
+    assert stored == payload
+    assert list(tmp_path.glob(".auth.json.*.tmp")) == []
+
+
 def test_get_codex_auth_status_not_logged_in(tmp_path, monkeypatch):
     monkeypatch.setenv("CODEX_HOME", str(tmp_path / "missing-codex-home"))
     status = get_codex_auth_status()
diff --git a/tests/test_codex_execution_paths.py b/tests/test_codex_execution_paths.py
new file mode 100644
index 0000000000..13ce5d7acf
--- /dev/null
+++ b/tests/test_codex_execution_paths.py
@@ -0,0 +1,175 @@
+import asyncio
+import sys
+import types
+from types import SimpleNamespace
+
+
+sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
+sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
+sys.modules.setdefault("fal_client", types.SimpleNamespace())
+
+import cron.scheduler as cron_scheduler
+import gateway.run as gateway_run
+import run_agent
+from gateway.config import Platform
+from gateway.session import SessionSource
+
+
+def _patch_agent_bootstrap(monkeypatch):
+    monkeypatch.setattr(
+        run_agent,
+        "get_tool_definitions",
+        lambda **kwargs: [
+            {
+                "type": "function",
+                "function": {
+                    "name": "terminal",
+                    "description": "Run shell commands.",
+                    "parameters": {"type": "object", "properties": {}},
+                },
+            }
+        ],
+    )
+    monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
+
+
+def _codex_message_response(text: str):
+    return SimpleNamespace(
+        output=[
+            SimpleNamespace(
+                type="message",
+                content=[SimpleNamespace(type="output_text", text=text)],
+            )
+        ],
+        usage=SimpleNamespace(input_tokens=5, output_tokens=3, total_tokens=8),
+        status="completed",
+        model="gpt-5-codex",
+    )
+
+
+class _UnauthorizedError(RuntimeError):
+    def __init__(self):
+        super().__init__("Error code: 401 - unauthorized")
+        self.status_code = 401
+
+
+class _FakeOpenAI:
+    def __init__(self, **kwargs):
+        self.kwargs = kwargs
+
+    def close(self):
+        return None
+
+
+class _Codex401ThenSuccessAgent(run_agent.AIAgent):
+    refresh_attempts = 0
+    last_init = {}
+
+    def __init__(self, *args, **kwargs):
+        kwargs.setdefault("skip_context_files", True)
+        kwargs.setdefault("skip_memory", True)
+        kwargs.setdefault("max_iterations", 4)
+        type(self).last_init = dict(kwargs)
+        super().__init__(*args, **kwargs)
+        self._cleanup_task_resources = lambda task_id: None
+        self._persist_session = lambda messages, history=None: None
+        self._save_trajectory = lambda messages, user_message, completed: None
+        self._save_session_log = lambda messages: None
+
+    def _try_refresh_codex_client_credentials(self, *, force: bool = True) -> bool:
+        type(self).refresh_attempts += 1
+        return True
+
+    def run_conversation(self, user_message: str, conversation_history=None):
+        calls = {"api": 0}
+
+        def _fake_api_call(api_kwargs):
+            calls["api"] += 1
+            if calls["api"] == 1:
+                raise _UnauthorizedError()
+            return _codex_message_response("Recovered via refresh")
+
+        self._interruptible_api_call = _fake_api_call
+        return super().run_conversation(user_message, conversation_history=conversation_history)
+
+
+def test_cron_run_job_codex_path_handles_internal_401_refresh(monkeypatch):
+    _patch_agent_bootstrap(monkeypatch)
+    monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)
+    monkeypatch.setattr(run_agent, "AIAgent", _Codex401ThenSuccessAgent)
+    monkeypatch.setattr(
+        "hermes_cli.runtime_provider.resolve_runtime_provider",
+        lambda requested=None: {
+            "provider": "openai-codex",
+            "api_mode": "codex_responses",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+            "api_key": "codex-token",
+        },
+    )
+    monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
+
+    _Codex401ThenSuccessAgent.refresh_attempts = 0
+    _Codex401ThenSuccessAgent.last_init = {}
+
+    success, output, final_response, error = cron_scheduler.run_job(
+        {"id": "job-1", "name": "Codex Refresh Test", "prompt": "ping"}
+    )
+
+    assert success is True
+    assert error is None
+    assert final_response == "Recovered via refresh"
+    assert "Recovered via refresh" in output
+    assert _Codex401ThenSuccessAgent.refresh_attempts == 1
+    assert _Codex401ThenSuccessAgent.last_init["provider"] == "openai-codex"
+    assert _Codex401ThenSuccessAgent.last_init["api_mode"] == "codex_responses"
+
+
+def test_gateway_run_agent_codex_path_handles_internal_401_refresh(monkeypatch):
+    _patch_agent_bootstrap(monkeypatch)
+    monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)
+    monkeypatch.setattr(run_agent, "AIAgent", _Codex401ThenSuccessAgent)
+    monkeypatch.setattr(
+        gateway_run,
+        "_resolve_runtime_agent_kwargs",
+        lambda: {
+            "provider": "openai-codex",
+            "api_mode": "codex_responses",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+            "api_key": "codex-token",
+        },
+    )
+    monkeypatch.setenv("HERMES_TOOL_PROGRESS", "false")
+
+    _Codex401ThenSuccessAgent.refresh_attempts = 0
+    _Codex401ThenSuccessAgent.last_init = {}
+
+    runner = gateway_run.GatewayRunner.__new__(gateway_run.GatewayRunner)
+    runner.adapters = {}
+    runner._ephemeral_system_prompt = ""
+    runner._prefill_messages = []
+    runner._reasoning_config = None
+    runner._running_agents = {}
+
+    source = SessionSource(
+        platform=Platform.LOCAL,
+        chat_id="cli",
+        chat_name="CLI",
+        chat_type="dm",
+        user_id="user-1",
+    )
+
+    result = asyncio.run(
+        runner._run_agent(
+            message="ping",
+            context_prompt="",
+            history=[],
+            source=source,
+            session_id="session-1",
+            session_key="agent:main:local:dm",
+        )
+    )
+
+    assert result["final_response"] == "Recovered via refresh"
+    assert _Codex401ThenSuccessAgent.refresh_attempts == 1
+    assert _Codex401ThenSuccessAgent.last_init["provider"] == "openai-codex"
+    assert _Codex401ThenSuccessAgent.last_init["api_mode"] == "codex_responses"
diff --git a/tests/test_codex_models.py b/tests/test_codex_models.py
new file mode 100644
index 0000000000..e6cc2fdec0
--- /dev/null
+++ b/tests/test_codex_models.py
@@ -0,0 +1,40 @@
+import json
+
+from hermes_cli.codex_models import DEFAULT_CODEX_MODELS, get_codex_model_ids
+
+
+def test_get_codex_model_ids_prioritizes_default_and_cache(tmp_path, monkeypatch):
+    codex_home = tmp_path / "codex-home"
+    codex_home.mkdir(parents=True, exist_ok=True)
+    (codex_home / "config.toml").write_text('model = "gpt-5.2-codex"\n')
+    (codex_home / "models_cache.json").write_text(
+        json.dumps(
+            {
+                "models": [
+                    {"slug": "gpt-5.3-codex", "priority": 20, "supported_in_api": True},
+                    {"slug": "gpt-5.1-codex", "priority": 5, "supported_in_api": True},
+                    {"slug": "gpt-4o", "priority": 1, "supported_in_api": True},
+                    {"slug": "gpt-5-hidden-codex", "priority": 2, "visibility": "hidden"},
+                ]
+            }
+        )
+    )
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    models = get_codex_model_ids()
+
+    assert models[0] == "gpt-5.2-codex"
+    assert "gpt-5.1-codex" in models
+    assert "gpt-5.3-codex" in models
+    assert "gpt-4o" not in models
+    assert "gpt-5-hidden-codex" not in models
+
+
+def test_get_codex_model_ids_falls_back_to_curated_defaults(tmp_path, monkeypatch):
+    codex_home = tmp_path / "codex-home"
+    codex_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    models = get_codex_model_ids()
+
+    assert models[: len(DEFAULT_CODEX_MODELS)] == DEFAULT_CODEX_MODELS
diff --git a/tests/test_run_agent_codex_responses.py b/tests/test_run_agent_codex_responses.py
index fc7c619802..b3d3f552f0 100644
--- a/tests/test_run_agent_codex_responses.py
+++ b/tests/test_run_agent_codex_responses.py
@@ -2,6 +2,8 @@ import sys
 import types
 from types import SimpleNamespace
 
+import pytest
+
 
 sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
 sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
@@ -156,6 +158,16 @@ class _FakeCreateStream:
         self.closed = True
 
 
+def _codex_request_kwargs():
+    return {
+        "model": "gpt-5-codex",
+        "instructions": "You are Hermes.",
+        "input": [{"role": "user", "content": "Ping"}],
+        "tools": None,
+        "store": False,
+    }
+
+
 def test_api_mode_uses_explicit_provider_when_codex(monkeypatch):
     _patch_agent_bootstrap(monkeypatch)
     agent = run_agent.AIAgent(
@@ -222,6 +234,10 @@ def test_build_api_kwargs_codex(monkeypatch):
     assert kwargs["tools"][0]["name"] == "terminal"
     assert kwargs["tools"][0]["strict"] is False
     assert "function" not in kwargs["tools"][0]
+    assert kwargs["store"] is False
+    assert "timeout" not in kwargs
+    assert "max_tokens" not in kwargs
+    assert "extra_body" not in kwargs
 
 
 def test_run_codex_stream_retries_when_completed_event_missing(monkeypatch):
@@ -243,7 +259,7 @@ def test_run_codex_stream_retries_when_completed_event_missing(monkeypatch):
         )
     )
 
-    response = agent._run_codex_stream({"model": "gpt-5-codex"})
+    response = agent._run_codex_stream(_codex_request_kwargs())
     assert calls["stream"] == 2
     assert response.output[0].content[0].text == "stream ok"
 
@@ -269,7 +285,7 @@ def test_run_codex_stream_falls_back_to_create_after_stream_completion_error(mon
         )
     )
 
-    response = agent._run_codex_stream({"model": "gpt-5-codex"})
+    response = agent._run_codex_stream(_codex_request_kwargs())
     assert calls["stream"] == 2
     assert calls["create"] == 1
     assert response.output[0].content[0].text == "create fallback ok"
@@ -304,7 +320,7 @@ def test_run_codex_stream_fallback_parses_create_stream_events(monkeypatch):
         )
     )
 
-    response = agent._run_codex_stream({"model": "gpt-5-codex"})
+    response = agent._run_codex_stream(_codex_request_kwargs())
     assert calls["stream"] == 2
     assert calls["create"] == 1
     assert create_stream.closed is True
@@ -323,6 +339,72 @@ def test_run_conversation_codex_plain_text(monkeypatch):
     assert result["messages"][-1]["content"] == "OK"
 
 
+def test_run_conversation_codex_refreshes_after_401_and_retries(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    calls = {"api": 0, "refresh": 0}
+
+    class _UnauthorizedError(RuntimeError):
+        def __init__(self):
+            super().__init__("Error code: 401 - unauthorized")
+            self.status_code = 401
+
+    def _fake_api_call(api_kwargs):
+        calls["api"] += 1
+        if calls["api"] == 1:
+            raise _UnauthorizedError()
+        return _codex_message_response("Recovered after refresh")
+
+    def _fake_refresh(*, force=True):
+        calls["refresh"] += 1
+        assert force is True
+        return True
+
+    monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call)
+    monkeypatch.setattr(agent, "_try_refresh_codex_client_credentials", _fake_refresh)
+
+    result = agent.run_conversation("Say OK")
+
+    assert calls["api"] == 2
+    assert calls["refresh"] == 1
+    assert result["completed"] is True
+    assert result["final_response"] == "Recovered after refresh"
+
+
+def test_try_refresh_codex_client_credentials_rebuilds_client(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    closed = {"value": False}
+    rebuilt = {"kwargs": None}
+
+    class _ExistingClient:
+        def close(self):
+            closed["value"] = True
+
+    class _RebuiltClient:
+        pass
+
+    def _fake_openai(**kwargs):
+        rebuilt["kwargs"] = kwargs
+        return _RebuiltClient()
+
+    monkeypatch.setattr(
+        "hermes_cli.auth.resolve_codex_runtime_credentials",
+        lambda force_refresh=True: {
+            "api_key": "new-codex-token",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+        },
+    )
+    monkeypatch.setattr(run_agent, "OpenAI", _fake_openai)
+
+    agent.client = _ExistingClient()
+    ok = agent._try_refresh_codex_client_credentials(force=True)
+
+    assert ok is True
+    assert closed["value"] is True
+    assert rebuilt["kwargs"]["api_key"] == "new-codex-token"
+    assert rebuilt["kwargs"]["base_url"] == "https://chatgpt.com/backend-api/codex"
+    assert isinstance(agent.client, _RebuiltClient)
+
+
 def test_run_conversation_codex_tool_round_trip(monkeypatch):
     agent = _build_agent(monkeypatch)
     responses = [_codex_tool_call_response(), _codex_message_response("done")]
@@ -404,6 +486,56 @@ def test_chat_messages_to_responses_input_accepts_call_pipe_fc_ids(monkeypatch):
     assert function_output["call_id"] == "call_pair123"
 
 
+def test_preflight_codex_api_kwargs_strips_optional_function_call_id(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    preflight = agent._preflight_codex_api_kwargs(
+        {
+            "model": "gpt-5-codex",
+            "instructions": "You are Hermes.",
+            "input": [
+                {"role": "user", "content": "hi"},
+                {
+                    "type": "function_call",
+                    "id": "call_bad",
+                    "call_id": "call_good",
+                    "name": "terminal",
+                    "arguments": "{}",
+                },
+            ],
+            "tools": [],
+            "store": False,
+        }
+    )
+
+    fn_call = next(item for item in preflight["input"] if item.get("type") == "function_call")
+    assert fn_call["call_id"] == "call_good"
+    assert "id" not in fn_call
+
+
+def test_preflight_codex_api_kwargs_rejects_function_call_output_without_call_id(monkeypatch):
+    agent = _build_agent(monkeypatch)
+
+    with pytest.raises(ValueError, match="function_call_output is missing call_id"):
+        agent._preflight_codex_api_kwargs(
+            {
+                "model": "gpt-5-codex",
+                "instructions": "You are Hermes.",
+                "input": [{"type": "function_call_output", "output": "{}"}],
+                "tools": [],
+                "store": False,
+            }
+        )
+
+
+def test_preflight_codex_api_kwargs_rejects_unsupported_request_fields(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    kwargs = _codex_request_kwargs()
+    kwargs["temperature"] = 0
+
+    with pytest.raises(ValueError, match="unsupported field"):
+        agent._preflight_codex_api_kwargs(kwargs)
+
+
 def test_run_conversation_codex_replay_payload_keeps_call_id(monkeypatch):
     agent = _build_agent(monkeypatch)
     responses = [_codex_tool_call_response(), _codex_message_response("done")]

From 1e463a8e39a8c0ae827ad646b6779f2454a7de6d Mon Sep 17 00:00:00 2001
From: Bartok9 <bartok@clawdbot.com>
Date: Sat, 28 Feb 2026 03:06:20 -0500
Subject: [PATCH 09/31] fix: strip <think> blocks from final response to users

Fixes #149

The _strip_think_blocks() method existed but was not applied to the
final_response in the normal completion path. This caused <think>...</think>
XML tags to leak into user-facing responses on all platforms (CLI, Telegram,
Discord, Slack, WhatsApp).

Changes:
- Strip think blocks from final_response before returning in normal path (line ~2600)
- Strip think blocks from fallback content when salvaging from prior tool_calls turn

Notes:
- The raw content with think blocks is preserved in messages[] for trajectory
  export - this only affects the user-facing final_response
- The _has_content_after_think_block() check still uses raw content before
  stripping, which is correct for detecting think-only responses
---
 run_agent.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/run_agent.py b/run_agent.py
index 59a547f0df..c32d92d7eb 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -2567,7 +2567,8 @@ class AIAgent:
                                             tool_names.append(fn.get("name", "unknown"))
                                         msg["content"] = f"Calling the {', '.join(tool_names)} tool{'s' if len(tool_names) > 1 else ''}..."
                                         break
-                                final_response = fallback
+                                # Strip <think> blocks from fallback content for user display
+                                final_response = self._strip_think_blocks(fallback).strip()
                                 break
                             
                             # No fallback -- append the empty message as-is
@@ -2596,6 +2597,9 @@ class AIAgent:
                     if hasattr(self, '_empty_content_retries'):
                         self._empty_content_retries = 0
                     
+                    # Strip <think> blocks from user-facing response (keep raw in messages for trajectory)
+                    final_response = self._strip_think_blocks(final_response).strip()
+                    
                     final_msg = self._build_assistant_message(assistant_message, finish_reason)
                     
                     messages.append(final_msg)

From 35655298e691726f725feb0c30a2b53e0834d915 Mon Sep 17 00:00:00 2001
From: Bartok9 <bartok9@users.noreply.github.com>
Date: Sat, 28 Feb 2026 03:38:27 -0500
Subject: [PATCH 10/31] fix(gateway): prevent TTS voice messages from
 accumulating across turns

Fixes #160

The issue was that MEDIA tags were being extracted from ALL messages
in the conversation history, not just messages from the current turn.
This caused TTS voice messages generated in earlier turns to be
re-attached to every subsequent reply.

The fix:
- Track history_len before calling run_conversation
- Only scan messages AFTER history_len for MEDIA tags
- Add comprehensive tests to prevent regression

This ensures each voice message is sent exactly once, when it's
generated, not on every subsequent message in the session.
---
 gateway/run.py                         |  12 +-
 tests/gateway/test_media_extraction.py | 184 +++++++++++++++++++++++++
 2 files changed, 195 insertions(+), 1 deletion(-)
 create mode 100644 tests/gateway/test_media_extraction.py

diff --git a/gateway/run.py b/gateway/run.py
index bcd2457b93..0b87949240 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -1701,6 +1701,9 @@ class GatewayRunner:
                             content = f"[Delivered from {mirror_src}] {content}"
                         agent_history.append({"role": role, "content": content})
             
+            # Track history length to only scan NEW messages for MEDIA tags
+            history_len = len(agent_history)
+            
             result = agent.run_conversation(message, conversation_history=agent_history)
             result_holder[0] = result
             
@@ -1721,10 +1724,17 @@ class GatewayRunner:
             # doesn't include them.  We collect unique tags from tool results and
             # append any that aren't already present in the final response, so the
             # adapter's extract_media() can find and deliver the files exactly once.
+            #
+            # IMPORTANT: Only scan messages from the CURRENT turn (after history_len),
+            # not the full history. This prevents TTS voice messages from earlier
+            # turns being re-attached to every subsequent reply. (Fixes #160)
             if "MEDIA:" not in final_response:
                 media_tags = []
                 has_voice_directive = False
-                for msg in result.get("messages", []):
+                all_messages = result.get("messages", [])
+                # Only process new messages from this turn
+                new_messages = all_messages[history_len:] if len(all_messages) > history_len else []
+                for msg in new_messages:
                     if msg.get("role") == "tool" or msg.get("role") == "function":
                         content = msg.get("content", "")
                         if "MEDIA:" in content:
diff --git a/tests/gateway/test_media_extraction.py b/tests/gateway/test_media_extraction.py
new file mode 100644
index 0000000000..20f7d73a8f
--- /dev/null
+++ b/tests/gateway/test_media_extraction.py
@@ -0,0 +1,184 @@
+"""
+Tests for MEDIA tag extraction from tool results.
+
+Verifies that MEDIA tags (e.g., from TTS tool) are only extracted from
+messages in the CURRENT turn, not from the full conversation history.
+This prevents voice messages from accumulating and being sent multiple
+times per reply. (Regression test for #160)
+"""
+
+import pytest
+import re
+
+
+def extract_media_tags_fixed(result_messages, history_len):
+    """
+    Extract MEDIA tags from tool results, but ONLY from new messages
+    (those added after history_len). This is the fixed behavior.
+    
+    Args:
+        result_messages: Full list of messages including history + new
+        history_len: Length of history before this turn
+        
+    Returns:
+        Tuple of (media_tags list, has_voice_directive bool)
+    """
+    media_tags = []
+    has_voice_directive = False
+    
+    # Only process new messages from this turn
+    new_messages = result_messages[history_len:] if len(result_messages) > history_len else []
+    
+    for msg in new_messages:
+        if msg.get("role") == "tool" or msg.get("role") == "function":
+            content = msg.get("content", "")
+            if "MEDIA:" in content:
+                for match in re.finditer(r'MEDIA:(\S+)', content):
+                    path = match.group(1).strip().rstrip('",}')
+                    if path:
+                        media_tags.append(f"MEDIA:{path}")
+                if "[[audio_as_voice]]" in content:
+                    has_voice_directive = True
+    
+    return media_tags, has_voice_directive
+
+
+def extract_media_tags_broken(result_messages):
+    """
+    The BROKEN behavior: extract MEDIA tags from ALL messages including history.
+    This causes TTS voice messages to accumulate and be re-sent on every reply.
+    """
+    media_tags = []
+    has_voice_directive = False
+    
+    for msg in result_messages:
+        if msg.get("role") == "tool" or msg.get("role") == "function":
+            content = msg.get("content", "")
+            if "MEDIA:" in content:
+                for match in re.finditer(r'MEDIA:(\S+)', content):
+                    path = match.group(1).strip().rstrip('",}')
+                    if path:
+                        media_tags.append(f"MEDIA:{path}")
+                if "[[audio_as_voice]]" in content:
+                    has_voice_directive = True
+    
+    return media_tags, has_voice_directive
+
+
+class TestMediaExtraction:
+    """Tests for MEDIA tag extraction from tool results."""
+    
+    def test_media_tags_not_extracted_from_history(self):
+        """MEDIA tags from previous turns should NOT be extracted again."""
+        # Simulate conversation history with a TTS call from a previous turn
+        history = [
+            {"role": "user", "content": "Say hello as audio"},
+            {"role": "assistant", "content": None, "tool_calls": [{"id": "1", "function": {"name": "text_to_speech"}}]},
+            {"role": "tool", "tool_call_id": "1", "content": '{"success": true, "media_tag": "[[audio_as_voice]]\\nMEDIA:/path/to/audio1.ogg"}'},
+            {"role": "assistant", "content": "I've said hello for you!"},
+        ]
+        
+        # New turn: user asks a simple question
+        new_messages = [
+            {"role": "user", "content": "What time is it?"},
+            {"role": "assistant", "content": "It's 3:30 AM."},
+        ]
+        
+        all_messages = history + new_messages
+        history_len = len(history)
+        
+        # Fixed behavior: should extract NO media tags (none in new messages)
+        tags, voice_directive = extract_media_tags_fixed(all_messages, history_len)
+        assert tags == [], "Fixed extraction should not find tags in history"
+        assert voice_directive is False
+        
+        # Broken behavior: would incorrectly extract the old media tag
+        broken_tags, broken_voice = extract_media_tags_broken(all_messages)
+        assert len(broken_tags) == 1, "Broken extraction finds tags in history"
+        assert "audio1.ogg" in broken_tags[0]
+    
+    def test_media_tags_extracted_from_current_turn(self):
+        """MEDIA tags from the current turn SHOULD be extracted."""
+        # History without TTS
+        history = [
+            {"role": "user", "content": "Hello"},
+            {"role": "assistant", "content": "Hi there!"},
+        ]
+        
+        # New turn with TTS call
+        new_messages = [
+            {"role": "user", "content": "Say goodbye as audio"},
+            {"role": "assistant", "content": None, "tool_calls": [{"id": "2", "function": {"name": "text_to_speech"}}]},
+            {"role": "tool", "tool_call_id": "2", "content": '{"success": true, "media_tag": "[[audio_as_voice]]\\nMEDIA:/path/to/audio2.ogg"}'},
+            {"role": "assistant", "content": "I've said goodbye!"},
+        ]
+        
+        all_messages = history + new_messages
+        history_len = len(history)
+        
+        # Fixed behavior: should extract the new media tag
+        tags, voice_directive = extract_media_tags_fixed(all_messages, history_len)
+        assert len(tags) == 1, "Should extract media tag from current turn"
+        assert "audio2.ogg" in tags[0]
+        assert voice_directive is True
+    
+    def test_multiple_tts_calls_in_history_not_accumulated(self):
+        """Multiple TTS calls in history should NOT accumulate in new responses."""
+        # History with multiple TTS calls
+        history = [
+            {"role": "user", "content": "Say hello"},
+            {"role": "tool", "tool_call_id": "1", "content": 'MEDIA:/audio/hello.ogg'},
+            {"role": "assistant", "content": "Done!"},
+            {"role": "user", "content": "Say goodbye"},
+            {"role": "tool", "tool_call_id": "2", "content": 'MEDIA:/audio/goodbye.ogg'},
+            {"role": "assistant", "content": "Done!"},
+            {"role": "user", "content": "Say thanks"},
+            {"role": "tool", "tool_call_id": "3", "content": 'MEDIA:/audio/thanks.ogg'},
+            {"role": "assistant", "content": "Done!"},
+        ]
+        
+        # New turn: no TTS
+        new_messages = [
+            {"role": "user", "content": "What time is it?"},
+            {"role": "assistant", "content": "3 PM"},
+        ]
+        
+        all_messages = history + new_messages
+        history_len = len(history)
+        
+        # Fixed: no tags
+        tags, _ = extract_media_tags_fixed(all_messages, history_len)
+        assert tags == [], "Should not accumulate tags from history"
+        
+        # Broken: would have 3 tags (all the old ones)
+        broken_tags, _ = extract_media_tags_broken(all_messages)
+        assert len(broken_tags) == 3, "Broken version accumulates all history tags"
+    
+    def test_deduplication_within_current_turn(self):
+        """Multiple MEDIA tags in current turn should be deduplicated."""
+        history = []
+        
+        # Current turn with multiple tool calls producing same media
+        new_messages = [
+            {"role": "user", "content": "Multiple TTS"},
+            {"role": "tool", "tool_call_id": "1", "content": 'MEDIA:/audio/same.ogg'},
+            {"role": "tool", "tool_call_id": "2", "content": 'MEDIA:/audio/same.ogg'},  # duplicate
+            {"role": "tool", "tool_call_id": "3", "content": 'MEDIA:/audio/different.ogg'},
+            {"role": "assistant", "content": "Done!"},
+        ]
+        
+        all_messages = history + new_messages
+        
+        tags, _ = extract_media_tags_fixed(all_messages, 0)
+        # Even though same.ogg appears twice, deduplication happens after extraction
+        # The extraction itself should get both, then caller deduplicates
+        assert len(tags) == 3  # Raw extraction gets all
+        
+        # Deduplication as done in the actual code:
+        seen = set()
+        unique = [t for t in tags if t not in seen and not seen.add(t)]
+        assert len(unique) == 2  # After dedup: same.ogg and different.ogg
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])

From f213620c8bea56ccf9f46750bf3dffee40a31268 Mon Sep 17 00:00:00 2001
From: Aayush Chaudhary <acaayush1111@gmail.com>
Date: Sat, 28 Feb 2026 14:28:18 +0530
Subject: [PATCH 11/31] fix(install): ignore commented lines when checking for
 existing PATH configuration

---
 scripts/install.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/install.sh b/scripts/install.sh
index 4f8108bb82..81978e8f04 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -723,7 +723,7 @@ setup_path() {
         PATH_LINE='export PATH="$HOME/.local/bin:$PATH"'
 
         for SHELL_CONFIG in "${SHELL_CONFIGS[@]}"; do
-            if ! grep -q '\.local/bin' "$SHELL_CONFIG" 2>/dev/null; then
+            if ! grep -v '^[[:space:]]*#' "$SHELL_CONFIG" 2>/dev/null | grep -qE 'PATH=.*\.local/bin'; then
                 echo "" >> "$SHELL_CONFIG"
                 echo "# Hermes Agent — ensure ~/.local/bin is on PATH" >> "$SHELL_CONFIG"
                 echo "$PATH_LINE" >> "$SHELL_CONFIG"

From 6366177118ec1a30622e695bba07103067d71936 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sat, 28 Feb 2026 04:46:35 -0800
Subject: [PATCH 12/31] refactor: update context compression configuration to
 use config.yaml and improve model handling

---
 .env.example                |  5 +++--
 agent/context_compressor.py |  4 +++-
 gateway/run.py              | 10 ++++++++++
 run_agent.py                |  4 +++-
 4 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/.env.example b/.env.example
index 78549212fb..2693931e01 100644
--- a/.env.example
+++ b/.env.example
@@ -10,7 +10,7 @@
 OPENROUTER_API_KEY=
 
 # Default model to use (OpenRouter format: provider/model)
-# Examples: anthropic/claude-opus-4.6, openai/gpt-4o, google/gemini-2.0-flash, zhipuai/glm-4-plus
+# Examples: anthropic/claude-opus-4.6, openai/gpt-4o, google/gemini-3-flash-preview, zhipuai/glm-4-plus
 LLM_MODEL=anthropic/claude-opus-4.6
 
 # =============================================================================
@@ -200,9 +200,10 @@ IMAGE_TOOLS_DEBUG=false
 # When conversation approaches model's context limit, middle turns are
 # automatically summarized to free up space.
 #
+# Context compression is configured in ~/.hermes/config.yaml under compression:
 # CONTEXT_COMPRESSION_ENABLED=true        # Enable auto-compression (default: true)
 # CONTEXT_COMPRESSION_THRESHOLD=0.85      # Compress at 85% of context limit
-# CONTEXT_COMPRESSION_MODEL=google/gemini-2.0-flash-001  # Fast model for summaries
+# Model is set via compression.summary_model in config.yaml (default: google/gemini-3-flash-preview)
 
 # =============================================================================
 # RL TRAINING (Tinker + Atropos)
diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index 329fd9680d..6f9ce3c01b 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -33,6 +33,7 @@ class ContextCompressor:
         protect_last_n: int = 4,
         summary_target_tokens: int = 500,
         quiet_mode: bool = False,
+        summary_model_override: str = None,
     ):
         self.model = model
         self.threshold_percent = threshold_percent
@@ -49,7 +50,8 @@ class ContextCompressor:
         self.last_completion_tokens = 0
         self.last_total_tokens = 0
 
-        self.client, self.summary_model = get_text_auxiliary_client()
+        self.client, default_model = get_text_auxiliary_client()
+        self.summary_model = summary_model_override or default_model
 
     def update_from_response(self, usage: Dict[str, Any]):
         """Update tracked token usage from API response."""
diff --git a/gateway/run.py b/gateway/run.py
index bcd2457b93..4f4a81badc 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -78,6 +78,16 @@ if _config_path.exists():
             for _cfg_key, _env_var in _terminal_env_map.items():
                 if _cfg_key in _terminal_cfg:
                     os.environ[_env_var] = str(_terminal_cfg[_cfg_key])
+        _compression_cfg = _cfg.get("compression", {})
+        if _compression_cfg and isinstance(_compression_cfg, dict):
+            _compression_env_map = {
+                "enabled": "CONTEXT_COMPRESSION_ENABLED",
+                "threshold": "CONTEXT_COMPRESSION_THRESHOLD",
+                "summary_model": "CONTEXT_COMPRESSION_MODEL",
+            }
+            for _cfg_key, _env_var in _compression_env_map.items():
+                if _cfg_key in _compression_cfg:
+                    os.environ[_env_var] = str(_compression_cfg[_cfg_key])
     except Exception:
         pass  # Non-fatal; gateway can still run with .env values
 
diff --git a/run_agent.py b/run_agent.py
index 59a547f0df..91db7cc2ad 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -479,9 +479,10 @@ class AIAgent:
         
         # Initialize context compressor for automatic context management
         # Compresses conversation when approaching model's context limit
-        # Configuration via environment variables (can be set in .env or cli-config.yaml)
+        # Configuration via config.yaml (compression section) or environment variables
         compression_threshold = float(os.getenv("CONTEXT_COMPRESSION_THRESHOLD", "0.85"))
         compression_enabled = os.getenv("CONTEXT_COMPRESSION_ENABLED", "true").lower() in ("true", "1", "yes")
+        compression_summary_model = os.getenv("CONTEXT_COMPRESSION_MODEL") or None
         
         self.context_compressor = ContextCompressor(
             model=self.model,
@@ -489,6 +490,7 @@ class AIAgent:
             protect_first_n=3,
             protect_last_n=4,
             summary_target_tokens=500,
+            summary_model_override=compression_summary_model,
             quiet_mode=self.quiet_mode,
         )
         self.compression_enabled = compression_enabled

From 1ddf8c26f50d49719a502fd0cf9b47d30a136a46 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sat, 28 Feb 2026 10:35:49 -0800
Subject: [PATCH 13/31] refactor(cli): update max turns configuration
 precedence and enhance documentation

---
 README.md      | 13 +++++++++++++
 cli.py         |  6 +++---
 gateway/run.py |  4 ++++
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 3cb1d65989..4b407c2673 100644
--- a/README.md
+++ b/README.md
@@ -161,6 +161,19 @@ hermes config set terminal.backend docker
 hermes config set OPENROUTER_API_KEY sk-or-...  # Saves to .env
 ```
 
+### Configuration Precedence
+
+Settings are resolved in this order (highest priority first):
+
+1. **CLI arguments** — `hermes chat --max-turns 100` (per-invocation override)
+2. **`~/.hermes/config.yaml`** — the primary config file for all non-secret settings
+3. **`~/.hermes/.env`** — fallback for env vars; **required** for secrets (API keys, tokens, passwords)
+4. **Built-in defaults** — hardcoded safe defaults when nothing else is set
+
+**Rule of thumb:** Secrets (API keys, bot tokens, passwords) go in `.env`. Everything else (model, terminal backend, compression settings, memory limits, toolsets) goes in `config.yaml`. When both are set, `config.yaml` wins for non-secret settings.
+
+The `hermes config set` command automatically routes values to the right file — API keys are saved to `.env`, everything else to `config.yaml`.
+
 ### Optional API Keys
 
 | Feature | Provider | Env Variable |
diff --git a/cli.py b/cli.py
index ea9c3e6308..89aa463d94 100755
--- a/cli.py
+++ b/cli.py
@@ -822,15 +822,15 @@ class HermesCLI:
         )
         self._nous_key_expires_at: Optional[str] = None
         self._nous_key_source: Optional[str] = None
-        # Max turns priority: CLI arg > env var > config file (agent.max_turns or root max_turns) > default
+        # Max turns priority: CLI arg > config file > env var > default
         if max_turns is not None:
             self.max_turns = max_turns
-        elif os.getenv("HERMES_MAX_ITERATIONS"):
-            self.max_turns = int(os.getenv("HERMES_MAX_ITERATIONS"))
         elif CLI_CONFIG["agent"].get("max_turns"):
             self.max_turns = CLI_CONFIG["agent"]["max_turns"]
         elif CLI_CONFIG.get("max_turns"):  # Backwards compat: root-level max_turns
             self.max_turns = CLI_CONFIG["max_turns"]
+        elif os.getenv("HERMES_MAX_ITERATIONS"):
+            self.max_turns = int(os.getenv("HERMES_MAX_ITERATIONS"))
         else:
             self.max_turns = 60
         
diff --git a/gateway/run.py b/gateway/run.py
index 4f4a81badc..c5d283a190 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -88,6 +88,10 @@ if _config_path.exists():
             for _cfg_key, _env_var in _compression_env_map.items():
                 if _cfg_key in _compression_cfg:
                     os.environ[_env_var] = str(_compression_cfg[_cfg_key])
+        _agent_cfg = _cfg.get("agent", {})
+        if _agent_cfg and isinstance(_agent_cfg, dict):
+            if "max_turns" in _agent_cfg:
+                os.environ["HERMES_MAX_ITERATIONS"] = str(_agent_cfg["max_turns"])
     except Exception:
         pass  # Non-fatal; gateway can still run with .env values
 

From 2205b22409f2590069a1f37841dd31417f9faf7a Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sat, 28 Feb 2026 10:38:49 -0800
Subject: [PATCH 14/31] fix(headers): update X-OpenRouter-Categories to include
 'productivity'

---
 agent/auxiliary_client.py    | 2 +-
 run_agent.py                 | 2 +-
 tools/openrouter_client.py   | 2 +-
 tools/session_search_tool.py | 2 +-
 tools/vision_tools.py        | 2 +-
 tools/web_tools.py           | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index ef179c4103..04ac41a564 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -32,7 +32,7 @@ logger = logging.getLogger(__name__)
 _OR_HEADERS = {
     "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
     "X-OpenRouter-Title": "Hermes Agent",
-    "X-OpenRouter-Categories": "cli-agent",
+    "X-OpenRouter-Categories": "productivity,cli-agent",
 }
 
 # Nous Portal extra_body for product attribution.
diff --git a/run_agent.py b/run_agent.py
index 91db7cc2ad..8e10dc6763 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -297,7 +297,7 @@ class AIAgent:
             client_kwargs["default_headers"] = {
                 "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
                 "X-OpenRouter-Title": "Hermes Agent",
-                "X-OpenRouter-Categories": "cli-agent",
+                "X-OpenRouter-Categories": "productivity,cli-agent",
             }
         
         self._client_kwargs = client_kwargs  # stored for rebuilding after interrupt
diff --git a/tools/openrouter_client.py b/tools/openrouter_client.py
index 7d30e6eec2..343cf1021d 100644
--- a/tools/openrouter_client.py
+++ b/tools/openrouter_client.py
@@ -31,7 +31,7 @@ def get_async_client() -> AsyncOpenAI:
             default_headers={
                 "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
                 "X-OpenRouter-Title": "Hermes Agent",
-                "X-OpenRouter-Categories": "cli-agent",
+                "X-OpenRouter-Categories": "productivity,cli-agent",
             },
         )
     return _client
diff --git a/tools/session_search_tool.py b/tools/session_search_tool.py
index bcfbfdf2ab..bbba7b3855 100644
--- a/tools/session_search_tool.py
+++ b/tools/session_search_tool.py
@@ -39,7 +39,7 @@ if _aux_client is not None:
         _async_kwargs["default_headers"] = {
             "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
             "X-OpenRouter-Title": "Hermes Agent",
-            "X-OpenRouter-Categories": "cli-agent",
+                "X-OpenRouter-Categories": "productivity,cli-agent",
         }
     _async_aux_client = AsyncOpenAI(**_async_kwargs)
 MAX_SESSION_CHARS = 100_000
diff --git a/tools/vision_tools.py b/tools/vision_tools.py
index 39413d5b0a..f3744e95f1 100644
--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@@ -54,7 +54,7 @@ if _aux_sync_client is not None:
         _async_kwargs["default_headers"] = {
             "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
             "X-OpenRouter-Title": "Hermes Agent",
-            "X-OpenRouter-Categories": "cli-agent",
+                "X-OpenRouter-Categories": "productivity,cli-agent",
         }
     _aux_async_client = AsyncOpenAI(**_async_kwargs)
 
diff --git a/tools/web_tools.py b/tools/web_tools.py
index 0e5baaa29e..7ec08fc02f 100644
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -79,7 +79,7 @@ if _aux_sync_client is not None:
         _async_kwargs["default_headers"] = {
             "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
             "X-OpenRouter-Title": "Hermes Agent",
-            "X-OpenRouter-Categories": "cli-agent",
+                "X-OpenRouter-Categories": "productivity,cli-agent",
         }
     _aux_async_client = AsyncOpenAI(**_async_kwargs)
 

From 8e0c48e6d25b0a31ef6f809f64afe1d28180d97f Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sat, 28 Feb 2026 11:18:50 -0800
Subject: [PATCH 15/31] feat(skills): implement dynamic skill slash commands
 for CLI and gateway

---
 AGENTS.md               |  14 +++++
 README.md               |  18 +++++++
 agent/skill_commands.py | 114 ++++++++++++++++++++++++++++++++++++++++
 cli.py                  |  67 +++++++++++++++++------
 gateway/run.py          |  52 +++++++++++++-----
 5 files changed, 235 insertions(+), 30 deletions(-)
 create mode 100644 agent/skill_commands.py

diff --git a/AGENTS.md b/AGENTS.md
index f729bde98f..d88fbf7ff0 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -179,6 +179,7 @@ The interactive CLI uses:
 Key components:
 - `HermesCLI` class - Main CLI controller with commands and conversation loop
 - `SlashCommandCompleter` - Autocomplete dropdown for `/commands` (type `/` to see all)
+- `agent/skill_commands.py` - Scans skills and builds invocation messages (shared with gateway)
 - `load_cli_config()` - Loads config, sets environment variables for terminal
 - `build_welcome_banner()` - Displays ASCII art logo, tools, and skills summary
 
@@ -191,9 +192,22 @@ CLI UX notes:
 - Pasting 5+ lines auto-saves to `~/.hermes/pastes/` and collapses to a reference
 - Multi-line input via Alt+Enter or Ctrl+J
 - `/commands` - Process user commands like `/help`, `/clear`, `/personality`, etc.
+- `/skill-name` - Invoke installed skills directly (e.g., `/axolotl`, `/gif-search`)
 
 CLI uses `quiet_mode=True` when creating AIAgent to suppress verbose logging.
 
+### Skill Slash Commands
+
+Every installed skill in `~/.hermes/skills/` is automatically registered as a slash command.
+The skill name (from frontmatter or folder name) becomes the command: `axolotl` → `/axolotl`.
+
+Implementation (`agent/skill_commands.py`, shared between CLI and gateway):
+1. `scan_skill_commands()` scans all SKILL.md files at startup
+2. `build_skill_invocation_message()` loads the SKILL.md content and builds a user-turn message
+3. The message includes the full skill content, a list of supporting files (not loaded), and the user's instruction
+4. Supporting files can be loaded on demand via the `skill_view` tool
+5. Injected as a **user message** (not system prompt) to preserve prompt caching
+
 ### Adding CLI Commands
 
 1. Add to `COMMANDS` dict with description
diff --git a/README.md b/README.md
index 4b407c2673..1403c03b58 100644
--- a/README.md
+++ b/README.md
@@ -291,6 +291,7 @@ See [docs/messaging.md](docs/messaging.md) for advanced WhatsApp configuration.
 | `/stop` | Stop the running agent |
 | `/sethome` | Set this chat as the home channel |
 | `/help` | Show available commands |
+| `/<skill-name>` | Invoke any installed skill (e.g., `/axolotl`, `/gif-search`) |
 
 ### DM Pairing (Alternative to Allowlists)
 
@@ -421,6 +422,7 @@ Type `/` to see an autocomplete dropdown of all commands.
 | `/skills` | Search, install, inspect, or manage skills from registries |
 | `/platforms` | Show gateway/messaging platform status |
 | `/quit` | Exit (also: `/exit`, `/q`) |
+| `/<skill-name>` | Invoke any installed skill (e.g., `/axolotl`, `/gif-search`) |
 
 **Keybindings:**
 - `Enter` — send message
@@ -820,6 +822,22 @@ Skills are on-demand knowledge documents the agent can load when needed. They fo
 All skills live in **`~/.hermes/skills/`** -- a single directory that is the source of truth. On fresh install, bundled skills are copied there from the repo. Hub-installed skills and agent-created skills also go here. The agent can modify or delete any skill. `hermes update` adds only genuinely new bundled skills (via a manifest) without overwriting your changes or re-adding skills you deleted.
 
 **Using Skills:**
+
+Every installed skill is automatically available as a slash command — type `/<skill-name>` to invoke it directly:
+
+```bash
+# In the CLI or any messaging platform (Telegram, Discord, Slack, WhatsApp):
+/gif-search funny cats
+/axolotl help me fine-tune Llama 3 on my dataset
+/github-pr-workflow create a PR for the auth refactor
+
+# Just the skill name (no prompt) loads the skill and lets the agent ask what you need:
+/excalidraw
+```
+
+The skill's full instructions (SKILL.md) are loaded into the conversation, and any supporting files (references, templates, scripts) are listed for the agent to pull on demand via the `skill_view` tool. Type `/help` to see all available skill commands.
+
+You can also use skills through natural conversation:
 ```bash
 hermes --toolsets skills -q "What skills do you have?"
 hermes --toolsets skills -q "Show me the axolotl skill"
diff --git a/agent/skill_commands.py b/agent/skill_commands.py
new file mode 100644
index 0000000000..fc11c53125
--- /dev/null
+++ b/agent/skill_commands.py
@@ -0,0 +1,114 @@
+"""Skill slash commands — scan installed skills and build invocation messages.
+
+Shared between CLI (cli.py) and gateway (gateway/run.py) so both surfaces
+can invoke skills via /skill-name commands.
+"""
+
+import logging
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+logger = logging.getLogger(__name__)
+
+_skill_commands: Dict[str, Dict[str, Any]] = {}
+
+
+def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
+    """Scan ~/.hermes/skills/ and return a mapping of /command -> skill info.
+
+    Returns:
+        Dict mapping "/skill-name" to {name, description, skill_md_path, skill_dir}.
+    """
+    global _skill_commands
+    _skill_commands = {}
+    try:
+        from tools.skills_tool import SKILLS_DIR, _parse_frontmatter
+        if not SKILLS_DIR.exists():
+            return _skill_commands
+        for skill_md in SKILLS_DIR.rglob("SKILL.md"):
+            path_str = str(skill_md)
+            if '/.git/' in path_str or '/.github/' in path_str or '/.hub/' in path_str:
+                continue
+            try:
+                content = skill_md.read_text(encoding='utf-8')
+                frontmatter, body = _parse_frontmatter(content)
+                name = frontmatter.get('name', skill_md.parent.name)
+                description = frontmatter.get('description', '')
+                if not description:
+                    for line in body.strip().split('\n'):
+                        line = line.strip()
+                        if line and not line.startswith('#'):
+                            description = line[:80]
+                            break
+                cmd_name = name.lower().replace(' ', '-').replace('_', '-')
+                _skill_commands[f"/{cmd_name}"] = {
+                    "name": name,
+                    "description": description or f"Invoke the {name} skill",
+                    "skill_md_path": str(skill_md),
+                    "skill_dir": str(skill_md.parent),
+                }
+            except Exception:
+                continue
+    except Exception:
+        pass
+    return _skill_commands
+
+
+def get_skill_commands() -> Dict[str, Dict[str, Any]]:
+    """Return the current skill commands mapping (scan first if empty)."""
+    if not _skill_commands:
+        scan_skill_commands()
+    return _skill_commands
+
+
+def build_skill_invocation_message(cmd_key: str, user_instruction: str = "") -> Optional[str]:
+    """Build the user message content for a skill slash command invocation.
+
+    Args:
+        cmd_key: The command key including leading slash (e.g., "/gif-search").
+        user_instruction: Optional text the user typed after the command.
+
+    Returns:
+        The formatted message string, or None if the skill wasn't found.
+    """
+    commands = get_skill_commands()
+    skill_info = commands.get(cmd_key)
+    if not skill_info:
+        return None
+
+    skill_md_path = Path(skill_info["skill_md_path"])
+    skill_dir = Path(skill_info["skill_dir"])
+    skill_name = skill_info["name"]
+
+    try:
+        content = skill_md_path.read_text(encoding='utf-8')
+    except Exception:
+        return f"[Failed to load skill: {skill_name}]"
+
+    parts = [
+        f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
+        "",
+        content.strip(),
+    ]
+
+    supporting = []
+    for subdir in ("references", "templates", "scripts", "assets"):
+        subdir_path = skill_dir / subdir
+        if subdir_path.exists():
+            for f in sorted(subdir_path.rglob("*")):
+                if f.is_file():
+                    rel = str(f.relative_to(skill_dir))
+                    supporting.append(rel)
+
+    if supporting:
+        parts.append("")
+        parts.append("[This skill has supporting files you can load with the skill_view tool:]")
+        for sf in supporting:
+            parts.append(f"- {sf}")
+        parts.append(f'\nTo view any of these, use: skill_view(name="{skill_name}", file="<path>")')
+
+    if user_instruction:
+        parts.append("")
+        parts.append(f"The user has provided the following instruction alongside the skill invocation: {user_instruction}")
+
+    return "\n".join(parts)
diff --git a/cli.py b/cli.py
index 89aa463d94..a0ccdf55bd 100755
--- a/cli.py
+++ b/cli.py
@@ -682,17 +682,27 @@ COMMANDS = {
 }
 
 
+# ============================================================================
+# Skill Slash Commands — dynamic commands generated from installed skills
+# ============================================================================
+
+from agent.skill_commands import scan_skill_commands, get_skill_commands, build_skill_invocation_message
+
+_skill_commands = scan_skill_commands()
+
+
 class SlashCommandCompleter(Completer):
-    """Autocomplete for /commands in the input area."""
+    """Autocomplete for /commands and /skill-name in the input area."""
 
     def get_completions(self, document, complete_event):
         text = document.text_before_cursor
-        # Only complete at the start of input, after /
         if not text.startswith("/"):
             return
         word = text[1:]  # strip the leading /
+
+        # Built-in commands
         for cmd, desc in COMMANDS.items():
-            cmd_name = cmd[1:]  # strip leading / from key
+            cmd_name = cmd[1:]
             if cmd_name.startswith(word):
                 yield Completion(
                     cmd_name,
@@ -701,6 +711,17 @@ class SlashCommandCompleter(Completer):
                     display_meta=desc,
                 )
 
+        # Skill commands
+        for cmd, info in _skill_commands.items():
+            cmd_name = cmd[1:]
+            if cmd_name.startswith(word):
+                yield Completion(
+                    cmd_name,
+                    start_position=-len(word),
+                    display=cmd,
+                    display_meta=f"⚡ {info['description'][:50]}",
+                )
+
 
 def save_config_value(key_path: str, value: any) -> bool:
     """
@@ -1082,20 +1103,21 @@ class HermesCLI:
         )
     
     def show_help(self):
-        """Display help information with kawaii ASCII art."""
-        print()
-        print("+" + "-" * 50 + "+")
-        print("|" + " " * 14 + "(^_^)? Available Commands" + " " * 10 + "|")
-        print("+" + "-" * 50 + "+")
-        print()
+        """Display help information."""
+        _cprint(f"\n{_BOLD}+{'-' * 50}+{_RST}")
+        _cprint(f"{_BOLD}|{' ' * 14}(^_^)? Available Commands{' ' * 10}|{_RST}")
+        _cprint(f"{_BOLD}+{'-' * 50}+{_RST}\n")
         
         for cmd, desc in COMMANDS.items():
-            print(f"  {cmd:<15} - {desc}")
+            _cprint(f"  {_GOLD}{cmd:<15}{_RST} {_DIM}-{_RST} {desc}")
         
-        print()
-        print("  Tip: Just type your message to chat with Hermes!")
-        print("  Multi-line: Alt+Enter for a new line")
-        print()
+        if _skill_commands:
+            _cprint(f"\n  ⚡ {_BOLD}Skill Commands{_RST} ({len(_skill_commands)} installed):")
+            for cmd, info in sorted(_skill_commands.items()):
+                _cprint(f"  {_GOLD}{cmd:<22}{_RST} {_DIM}-{_RST} {info['description']}")
+
+        _cprint(f"\n  {_DIM}Tip: Just type your message to chat with Hermes!{_RST}")
+        _cprint(f"  {_DIM}Multi-line: Alt+Enter for a new line{_RST}\n")
     
     def show_tools(self):
         """Display available tools with kawaii ASCII art."""
@@ -1693,8 +1715,21 @@ class HermesCLI:
         elif cmd_lower == "/verbose":
             self._toggle_verbose()
         else:
-            self.console.print(f"[bold red]Unknown command: {cmd_lower}[/]")
-            self.console.print("[dim #B8860B]Type /help for available commands[/]")
+            # Check for skill slash commands (/gif-search, /axolotl, etc.)
+            base_cmd = cmd_lower.split()[0]
+            if base_cmd in _skill_commands:
+                user_instruction = cmd_original[len(base_cmd):].strip()
+                msg = build_skill_invocation_message(base_cmd, user_instruction)
+                if msg:
+                    skill_name = _skill_commands[base_cmd]["name"]
+                    print(f"\n⚡ Loading skill: {skill_name}")
+                    if hasattr(self, '_pending_input'):
+                        self._pending_input.put(msg)
+                else:
+                    self.console.print(f"[bold red]Failed to load skill for {base_cmd}[/]")
+            else:
+                self.console.print(f"[bold red]Unknown command: {cmd_lower}[/]")
+                self.console.print("[dim #B8860B]Type /help for available commands[/]")
         
         return True
     
diff --git a/gateway/run.py b/gateway/run.py
index c5d283a190..0fa76cde10 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -636,6 +636,21 @@ class GatewayRunner:
         if command in ["sethome", "set-home"]:
             return await self._handle_set_home_command(event)
         
+        # Skill slash commands: /skill-name loads the skill and sends to agent
+        if command:
+            try:
+                from agent.skill_commands import get_skill_commands, build_skill_invocation_message
+                skill_cmds = get_skill_commands()
+                cmd_key = f"/{command}"
+                if cmd_key in skill_cmds:
+                    user_instruction = event.get_command_args().strip()
+                    msg = build_skill_invocation_message(cmd_key, user_instruction)
+                    if msg:
+                        event.text = msg
+                        # Fall through to normal message processing with skill content
+            except Exception as e:
+                logger.debug("Skill command check failed (non-fatal): %s", e)
+        
         # Check for pending exec approval responses
         if source.chat_type != "dm":
             session_key_preview = f"agent:main:{source.platform.value}:{source.chat_type}:{source.chat_id}"
@@ -1000,20 +1015,29 @@ class GatewayRunner:
     
     async def _handle_help_command(self, event: MessageEvent) -> str:
         """Handle /help command - list available commands."""
-        return (
-            "📖 **Hermes Commands**\n"
-            "\n"
-            "`/new` — Start a new conversation\n"
-            "`/reset` — Reset conversation history\n"
-            "`/status` — Show session info\n"
-            "`/stop` — Interrupt the running agent\n"
-            "`/model [name]` — Show or change the model\n"
-            "`/personality [name]` — Set a personality\n"
-            "`/retry` — Retry your last message\n"
-            "`/undo` — Remove the last exchange\n"
-            "`/sethome` — Set this chat as the home channel\n"
-            "`/help` — Show this message"
-        )
+        lines = [
+            "📖 **Hermes Commands**\n",
+            "`/new` — Start a new conversation",
+            "`/reset` — Reset conversation history",
+            "`/status` — Show session info",
+            "`/stop` — Interrupt the running agent",
+            "`/model [name]` — Show or change the model",
+            "`/personality [name]` — Set a personality",
+            "`/retry` — Retry your last message",
+            "`/undo` — Remove the last exchange",
+            "`/sethome` — Set this chat as the home channel",
+            "`/help` — Show this message",
+        ]
+        try:
+            from agent.skill_commands import get_skill_commands
+            skill_cmds = get_skill_commands()
+            if skill_cmds:
+                lines.append(f"\n⚡ **Skill Commands** ({len(skill_cmds)} installed):")
+                for cmd in sorted(skill_cmds):
+                    lines.append(f"`{cmd}` — {skill_cmds[cmd]['description']}")
+        except Exception:
+            pass
+        return "\n".join(lines)
     
     async def _handle_model_command(self, event: MessageEvent) -> str:
         """Handle /model command - show or change the current model."""

From 7b23dbfe6841002328f96e8d97980e1d11410db5 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sat, 28 Feb 2026 11:25:44 -0800
Subject: [PATCH 16/31] feat(animation): add support for sending animated GIFs
 in BasePlatformAdapter and TelegramAdapter

---
 gateway/platforms/base.py     | 40 ++++++++++++++++++++++++++++++-----
 gateway/platforms/telegram.py | 24 +++++++++++++++++++++
 2 files changed, 59 insertions(+), 5 deletions(-)

diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index 2e818b4ea3..dcd97f309d 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -425,6 +425,28 @@ class BasePlatformAdapter(ABC):
         text = f"{caption}\n{image_url}" if caption else image_url
         return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
     
+    async def send_animation(
+        self,
+        chat_id: str,
+        animation_url: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+    ) -> SendResult:
+        """
+        Send an animated GIF natively via the platform API.
+        
+        Override in subclasses to send GIFs as proper animations
+        (e.g., Telegram send_animation) so they auto-play inline.
+        Default falls back to send_image.
+        """
+        return await self.send_image(chat_id=chat_id, image_url=animation_url, caption=caption, reply_to=reply_to)
+    
+    @staticmethod
+    def _is_animation_url(url: str) -> bool:
+        """Check if a URL points to an animated GIF (vs a static image)."""
+        lower = url.lower().split('?')[0]  # Strip query params
+        return lower.endswith('.gif')
+
     @staticmethod
     def extract_images(content: str) -> Tuple[List[Tuple[str, str]], str]:
         """
@@ -636,11 +658,19 @@ class BasePlatformAdapter(ABC):
                     if human_delay > 0:
                         await asyncio.sleep(human_delay)
                     try:
-                        img_result = await self.send_image(
-                            chat_id=event.source.chat_id,
-                            image_url=image_url,
-                            caption=alt_text if alt_text else None,
-                        )
+                        # Route animated GIFs through send_animation for proper playback
+                        if self._is_animation_url(image_url):
+                            img_result = await self.send_animation(
+                                chat_id=event.source.chat_id,
+                                animation_url=image_url,
+                                caption=alt_text if alt_text else None,
+                            )
+                        else:
+                            img_result = await self.send_image(
+                                chat_id=event.source.chat_id,
+                                image_url=image_url,
+                                caption=alt_text if alt_text else None,
+                            )
                         if not img_result.success:
                             print(f"[{self.name}] Failed to send image: {img_result.error}")
                     except Exception as img_err:
diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index c37fde42ca..076e97ff54 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -272,6 +272,30 @@ class TelegramAdapter(BasePlatformAdapter):
             # Fallback: send as text link
             return await super().send_image(chat_id, image_url, caption, reply_to)
     
+    async def send_animation(
+        self,
+        chat_id: str,
+        animation_url: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+    ) -> SendResult:
+        """Send an animated GIF natively as a Telegram animation (auto-plays inline)."""
+        if not self._bot:
+            return SendResult(success=False, error="Not connected")
+        
+        try:
+            msg = await self._bot.send_animation(
+                chat_id=int(chat_id),
+                animation=animation_url,
+                caption=caption[:1024] if caption else None,
+                reply_to_message_id=int(reply_to) if reply_to else None,
+            )
+            return SendResult(success=True, message_id=str(msg.message_id))
+        except Exception as e:
+            print(f"[{self.name}] Failed to send animation, falling back to photo: {e}")
+            # Fallback: try as a regular photo
+            return await self.send_image(chat_id, animation_url, caption, reply_to)
+
     async def send_typing(self, chat_id: str) -> None:
         """Send typing indicator."""
         if self._bot:

From bf52468a913ebbdea89bb20ad979bfa610631d82 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sat, 28 Feb 2026 16:49:49 -0800
Subject: [PATCH 17/31] fix(gateway): improve MEDIA tag handling to prevent
 duplication across turns

Refactor the extraction of MEDIA paths to collect them from the history before processing the current turn's messages. This change ensures that MEDIA tags are deduplicated based on previously seen paths, preventing TTS voice messages from being re-attached in subsequent replies. This addresses the issue outlined in #160.
---
 gateway/run.py | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/gateway/run.py b/gateway/run.py
index cf2188a9e2..ccd02bc5f7 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -1739,8 +1739,18 @@ class GatewayRunner:
                             content = f"[Delivered from {mirror_src}] {content}"
                         agent_history.append({"role": role, "content": content})
             
-            # Track history length to only scan NEW messages for MEDIA tags
-            history_len = len(agent_history)
+            # Collect MEDIA paths already in history so we can exclude them
+            # from the current turn's extraction. This is compression-safe:
+            # even if the message list shrinks, we know which paths are old.
+            _history_media_paths: set = set()
+            for _hm in agent_history:
+                if _hm.get("role") in ("tool", "function"):
+                    _hc = _hm.get("content", "")
+                    if "MEDIA:" in _hc:
+                        for _match in re.finditer(r'MEDIA:(\S+)', _hc):
+                            _p = _match.group(1).strip().rstrip('",}')
+                            if _p:
+                                _history_media_paths.add(_p)
             
             result = agent.run_conversation(message, conversation_history=agent_history)
             result_holder[0] = result
@@ -1763,28 +1773,24 @@ class GatewayRunner:
             # append any that aren't already present in the final response, so the
             # adapter's extract_media() can find and deliver the files exactly once.
             #
-            # IMPORTANT: Only scan messages from the CURRENT turn (after history_len),
-            # not the full history. This prevents TTS voice messages from earlier
-            # turns being re-attached to every subsequent reply. (Fixes #160)
+            # Uses path-based deduplication against _history_media_paths (collected
+            # before run_conversation) instead of index slicing. This is safe even
+            # when context compression shrinks the message list. (Fixes #160)
             if "MEDIA:" not in final_response:
                 media_tags = []
                 has_voice_directive = False
-                all_messages = result.get("messages", [])
-                # Only process new messages from this turn
-                new_messages = all_messages[history_len:] if len(all_messages) > history_len else []
-                for msg in new_messages:
-                    if msg.get("role") == "tool" or msg.get("role") == "function":
+                for msg in result.get("messages", []):
+                    if msg.get("role") in ("tool", "function"):
                         content = msg.get("content", "")
                         if "MEDIA:" in content:
                             for match in re.finditer(r'MEDIA:(\S+)', content):
                                 path = match.group(1).strip().rstrip('",}')
-                                if path:
+                                if path and path not in _history_media_paths:
                                     media_tags.append(f"MEDIA:{path}")
                             if "[[audio_as_voice]]" in content:
                                 has_voice_directive = True
                 
                 if media_tags:
-                    # Deduplicate while preserving order
                     seen = set()
                     unique_tags = []
                     for tag in media_tags:

From 7f7643cf632c43c36d19cbb8c83911a0c06074f1 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sat, 28 Feb 2026 17:09:26 -0800
Subject: [PATCH 18/31] feat(hooks): introduce event hooks system for lifecycle
 management

Add a new hooks system allowing users to run custom code at key lifecycle points in the agent's operation. This includes support for events such as `gateway:startup`, `session:start`, `agent:step`, and more. Documentation for creating hooks and available events has been added to `README.md` and a new `hooks.md` file. Additionally, integrate step callbacks in the agent to facilitate hook execution during tool-calling iterations.
---
 README.md      |  15 +++++
 docs/hooks.md  | 174 +++++++++++++++++++++++++++++++++++++++++++++++++
 gateway/run.py |  45 +++++++++++++
 run_agent.py   |  18 +++++
 4 files changed, 252 insertions(+)
 create mode 100644 docs/hooks.md

diff --git a/README.md b/README.md
index 1403c03b58..57ec3d4274 100644
--- a/README.md
+++ b/README.md
@@ -709,6 +709,21 @@ hermes cron status         # Check if gateway is running
 
 Even if no messaging platforms are configured, the gateway stays running for cron. A file lock prevents duplicate execution if multiple processes overlap.
 
+### 🪝 Event Hooks
+
+Run custom code at key lifecycle points — log activity, send alerts, post to webhooks. Hooks are Python handlers that fire automatically during gateway operation.
+
+```
+~/.hermes/hooks/
+└── my-hook/
+    ├── HOOK.yaml      # name + events to subscribe to
+    └── handler.py     # async def handle(event_type, context)
+```
+
+**Available events:** `gateway:startup`, `session:start`, `session:reset`, `agent:start`, `agent:step`, `agent:end`, `command:*` (wildcard — fires for any slash command).
+
+Hooks are non-blocking — errors are caught and logged, never crashing the agent. See [docs/hooks.md](docs/hooks.md) for the full event reference, context keys, and examples.
+
 ### 🛡️ Exec Approval (Messaging Platforms)
 
 When the agent tries to run a potentially dangerous command (`rm -rf`, `chmod 777`, etc.) on Telegram/Discord/WhatsApp, instead of blocking it silently, it asks the user for approval:
diff --git a/docs/hooks.md b/docs/hooks.md
new file mode 100644
index 0000000000..3746eb3e46
--- /dev/null
+++ b/docs/hooks.md
@@ -0,0 +1,174 @@
+# Event Hooks
+
+The hooks system lets you run custom code at key points in the agent lifecycle — session creation, slash commands, each tool-calling step, and more. Hooks are discovered automatically from `~/.hermes/hooks/` and fire without blocking the main agent pipeline.
+
+## Creating a Hook
+
+Each hook is a directory under `~/.hermes/hooks/` containing two files:
+
+```
+~/.hermes/hooks/
+└── my-hook/
+    ├── HOOK.yaml      # Declares which events to listen for
+    └── handler.py     # Python handler function
+```
+
+### HOOK.yaml
+
+```yaml
+name: my-hook
+description: Log all agent activity to a file
+events:
+  - agent:start
+  - agent:end
+  - agent:step
+```
+
+The `events` list determines which events trigger your handler. You can subscribe to any combination of events, including wildcards like `command:*`.
+
+### handler.py
+
+```python
+import json
+from datetime import datetime
+from pathlib import Path
+
+LOG_FILE = Path.home() / ".hermes" / "hooks" / "my-hook" / "activity.log"
+
+async def handle(event_type: str, context: dict):
+    """Called for each subscribed event. Must be named 'handle'."""
+    entry = {
+        "timestamp": datetime.now().isoformat(),
+        "event": event_type,
+        **context,
+    }
+    with open(LOG_FILE, "a") as f:
+        f.write(json.dumps(entry) + "\n")
+```
+
+The handler function:
+- Must be named `handle`
+- Receives `event_type` (string) and `context` (dict)
+- Can be `async def` or regular `def` — both work
+- Errors are caught and logged, never crashing the agent
+
+## Available Events
+
+| Event | When it fires | Context keys |
+|-------|---------------|--------------|
+| `gateway:startup` | Gateway process starts | `platforms` (list of active platform names) |
+| `session:start` | New messaging session created | `platform`, `user_id`, `session_id`, `session_key` |
+| `session:reset` | User ran `/new` or `/reset` | `platform`, `user_id`, `session_key` |
+| `agent:start` | Agent begins processing a message | `platform`, `user_id`, `session_id`, `message` |
+| `agent:step` | Each iteration of the tool-calling loop | `platform`, `user_id`, `session_id`, `iteration`, `tool_names` |
+| `agent:end` | Agent finishes processing | `platform`, `user_id`, `session_id`, `message`, `response` |
+| `command:*` | Any slash command executed | `platform`, `user_id`, `command`, `args` |
+
+### Wildcard Matching
+
+Handlers registered for `command:*` fire for any `command:` event (`command:model`, `command:reset`, etc.). This lets you monitor all slash commands with a single subscription.
+
+## Examples
+
+### Telegram Notification on Long Tasks
+
+Send yourself a Telegram message when the agent takes more than 10 tool-calling steps:
+
+```yaml
+# ~/.hermes/hooks/long-task-alert/HOOK.yaml
+name: long-task-alert
+description: Alert when agent is taking many steps
+events:
+  - agent:step
+```
+
+```python
+# ~/.hermes/hooks/long-task-alert/handler.py
+import os
+import httpx
+
+THRESHOLD = 10
+BOT_TOKEN = os.getenv("TELEGRAM_BOT_TOKEN")
+CHAT_ID = os.getenv("TELEGRAM_HOME_CHANNEL")
+
+async def handle(event_type: str, context: dict):
+    iteration = context.get("iteration", 0)
+    if iteration == THRESHOLD and BOT_TOKEN and CHAT_ID:
+        tools = ", ".join(context.get("tool_names", []))
+        text = f"⚠️ Agent has been running for {iteration} steps. Last tools: {tools}"
+        async with httpx.AsyncClient() as client:
+            await client.post(
+                f"https://api.telegram.org/bot{BOT_TOKEN}/sendMessage",
+                json={"chat_id": CHAT_ID, "text": text},
+            )
+```
+
+### Command Usage Logger
+
+Track which slash commands are used and how often:
+
+```yaml
+# ~/.hermes/hooks/command-logger/HOOK.yaml
+name: command-logger
+description: Log slash command usage
+events:
+  - command:*
+```
+
+```python
+# ~/.hermes/hooks/command-logger/handler.py
+import json
+from datetime import datetime
+from pathlib import Path
+
+LOG = Path.home() / ".hermes" / "logs" / "command_usage.jsonl"
+
+def handle(event_type: str, context: dict):
+    LOG.parent.mkdir(parents=True, exist_ok=True)
+    entry = {
+        "ts": datetime.now().isoformat(),
+        "command": context.get("command"),
+        "args": context.get("args"),
+        "platform": context.get("platform"),
+        "user": context.get("user_id"),
+    }
+    with open(LOG, "a") as f:
+        f.write(json.dumps(entry) + "\n")
+```
+
+### Session Start Webhook
+
+POST to an external service whenever a new session starts:
+
+```yaml
+# ~/.hermes/hooks/session-webhook/HOOK.yaml
+name: session-webhook
+description: Notify external service on new sessions
+events:
+  - session:start
+  - session:reset
+```
+
+```python
+# ~/.hermes/hooks/session-webhook/handler.py
+import httpx
+
+WEBHOOK_URL = "https://your-service.example.com/hermes-events"
+
+async def handle(event_type: str, context: dict):
+    async with httpx.AsyncClient() as client:
+        await client.post(WEBHOOK_URL, json={
+            "event": event_type,
+            **context,
+        }, timeout=5)
+```
+
+## How It Works
+
+1. On gateway startup, `HookRegistry.discover_and_load()` scans `~/.hermes/hooks/`
+2. Each subdirectory with `HOOK.yaml` + `handler.py` is loaded dynamically
+3. Handlers are registered for their declared events
+4. At each lifecycle point, `hooks.emit()` fires all matching handlers
+5. Errors in any handler are caught and logged — a broken hook never crashes the agent
+
+Hooks only fire in the **gateway** (Telegram, Discord, Slack, WhatsApp). The CLI does not currently load hooks. The `agent:step` event bridges from the sync agent thread to the async hook system via `asyncio.run_coroutine_threadsafe`.
diff --git a/gateway/run.py b/gateway/run.py
index ccd02bc5f7..8ed487ffe5 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -609,6 +609,18 @@ class GatewayRunner:
         
         # Check for commands
         command = event.get_command()
+        
+        # Emit command:* hook for any recognized slash command
+        _known_commands = {"new", "reset", "help", "status", "stop", "model",
+                          "personality", "retry", "undo", "sethome", "set-home"}
+        if command and command in _known_commands:
+            await self.hooks.emit(f"command:{command}", {
+                "platform": source.platform.value if source.platform else "",
+                "user_id": source.user_id,
+                "command": command,
+                "args": event.get_command_args().strip(),
+            })
+        
         if command in ["new", "reset"]:
             return await self._handle_reset_command(event)
         
@@ -679,6 +691,19 @@ class GatewayRunner:
         session_entry = self.session_store.get_or_create_session(source)
         session_key = session_entry.session_key
         
+        # Emit session:start for new or auto-reset sessions
+        _is_new_session = (
+            session_entry.created_at == session_entry.updated_at
+            or getattr(session_entry, "was_auto_reset", False)
+        )
+        if _is_new_session:
+            await self.hooks.emit("session:start", {
+                "platform": source.platform.value if source.platform else "",
+                "user_id": source.user_id,
+                "session_id": session_entry.session_id,
+                "session_key": session_key,
+            })
+        
         # Build session context
         context = build_session_context(source, self.config, session_entry)
         
@@ -1618,6 +1643,25 @@ class GatewayRunner:
         result_holder = [None]  # Mutable container for the result
         tools_holder = [None]   # Mutable container for the tool definitions
         
+        # Bridge sync step_callback → async hooks.emit for agent:step events
+        _loop_for_step = asyncio.get_event_loop()
+        _hooks_ref = self.hooks
+
+        def _step_callback_sync(iteration: int, tool_names: list) -> None:
+            try:
+                asyncio.run_coroutine_threadsafe(
+                    _hooks_ref.emit("agent:step", {
+                        "platform": source.platform.value if source.platform else "",
+                        "user_id": source.user_id,
+                        "session_id": session_id,
+                        "iteration": iteration,
+                        "tool_names": tool_names,
+                    }),
+                    _loop_for_step,
+                )
+            except Exception as _e:
+                logger.debug("agent:step hook error: %s", _e)
+
         def run_sync():
             # Pass session_key to process registry via env var so background
             # processes can be mapped back to this gateway session
@@ -1687,6 +1731,7 @@ class GatewayRunner:
                 reasoning_config=self._reasoning_config,
                 session_id=session_id,
                 tool_progress_callback=progress_callback if tool_progress_enabled else None,
+                step_callback=_step_callback_sync if _hooks_ref.loaded_hooks else None,
                 platform=platform_key,
                 honcho_session_key=session_key,
                 session_db=self._session_db,
diff --git a/run_agent.py b/run_agent.py
index 2f6de6cdb3..61c9669f70 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -124,6 +124,7 @@ class AIAgent:
         session_id: str = None,
         tool_progress_callback: callable = None,
         clarify_callback: callable = None,
+        step_callback: callable = None,
         max_tokens: int = None,
         reasoning_config: Dict[str, Any] = None,
         prefill_messages: List[Dict[str, Any]] = None,
@@ -195,6 +196,7 @@ class AIAgent:
             )
         self.tool_progress_callback = tool_progress_callback
         self.clarify_callback = clarify_callback
+        self.step_callback = step_callback
         self._last_reported_tool = None  # Track for "new tool" mode
         
         # Interrupt mechanism for breaking out of tool loops
@@ -1936,6 +1938,22 @@ class AIAgent:
             
             api_call_count += 1
 
+            # Fire step_callback for gateway hooks (agent:step event)
+            if self.step_callback is not None:
+                try:
+                    prev_tools = []
+                    for _m in reversed(messages):
+                        if _m.get("role") == "assistant" and _m.get("tool_calls"):
+                            prev_tools = [
+                                tc["function"]["name"]
+                                for tc in _m["tool_calls"]
+                                if isinstance(tc, dict)
+                            ]
+                            break
+                    self.step_callback(api_call_count, prev_tools)
+                except Exception as _step_err:
+                    logger.debug("step_callback error (iteration %s): %s", api_call_count, _step_err)
+
             # Track tool-calling iterations for skill nudge.
             # Counter resets whenever skill_manage is actually used.
             if (self._skill_nudge_interval > 0

From 500f0eab4a0ad2d6590fed37256b8e4a128ad451 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sat, 28 Feb 2026 21:47:51 -0800
Subject: [PATCH 19/31] refactor(cli): Finalize OpenAI Codex Integration with
 OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.
---
 README.md                                   |  13 +-
 agent/auxiliary_client.py                   | 250 ++++++++++-
 agent/context_compressor.py                 |   2 +-
 cli.py                                      |   6 +-
 docs/cli.md                                 |   4 +-
 gateway/run.py                              |  17 +-
 hermes_cli/auth.py                          | 271 ++++++++++--
 hermes_cli/codex_models.py                  |  61 ++-
 hermes_cli/main.py                          |  15 +-
 hermes_cli/setup.py                         |  30 +-
 hermes_cli/status.py                        |   4 +-
 run_agent.py                                | 236 +++++++---
 tests/agent/test_auxiliary_client.py        | 168 +++++++
 tests/test_auth_codex_provider.py           |  27 +-
 tests/test_cli_init.py                      |  80 ++++
 tests/test_codex_execution_paths.py         |   5 +
 tests/test_external_credential_detection.py |  51 +++
 tests/test_flush_memories_codex.py          | 225 ++++++++++
 tests/test_provider_parity.py               | 460 ++++++++++++++++++++
 tests/test_run_agent_codex_responses.py     |  17 +-
 tools/session_search_tool.py                |  23 +-
 tools/web_tools.py                          |  26 +-
 22 files changed, 1784 insertions(+), 207 deletions(-)
 create mode 100644 tests/agent/test_auxiliary_client.py
 create mode 100644 tests/test_cli_init.py
 create mode 100644 tests/test_external_credential_detection.py
 create mode 100644 tests/test_flush_memories_codex.py
 create mode 100644 tests/test_provider_parity.py

diff --git a/README.md b/README.md
index 57ec3d4274..f63bb2f1ae 100644
--- a/README.md
+++ b/README.md
@@ -121,11 +121,14 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro
 
 | Provider | Setup |
 |----------|-------|
-| **Nous Portal** | `hermes login` (OAuth, subscription-based) |
+| **Nous Portal** | `hermes model` (OAuth, subscription-based) |
+| **OpenAI Codex** | `hermes model` (ChatGPT OAuth, uses Codex models) |
 | **OpenRouter** | `OPENROUTER_API_KEY` in `~/.hermes/.env` |
 | **Custom Endpoint** | `OPENAI_BASE_URL` + `OPENAI_API_KEY` in `~/.hermes/.env` |
 
-**Note:** Even when using Nous Portal or a custom endpoint, some tools (vision, web summarization, MoA) use OpenRouter independently. An `OPENROUTER_API_KEY` enables these tools.
+**Codex note:** The OpenAI Codex provider authenticates via device code (open a URL, enter a code). Credentials are stored at `~/.codex/auth.json` and auto-refresh. No Codex CLI installation required.
+
+**Note:** Even when using Nous Portal, Codex, or a custom endpoint, some tools (vision, web summarization, MoA) use OpenRouter independently. An `OPENROUTER_API_KEY` enables these tools.
 
 ---
 
@@ -368,7 +371,7 @@ hermes --resume <id>      # Resume a specific session (-r)
 
 # Provider & model management
 hermes model              # Switch provider and model interactively
-hermes login              # Authenticate with Nous Portal (OAuth)
+hermes model              # Select provider and model
 hermes logout             # Clear stored OAuth credentials
 
 # Configuration
@@ -1638,7 +1641,7 @@ All variables go in `~/.hermes/.env`. Run `hermes config set VAR value` to set t
 |------|-------------|
 | `~/.hermes/config.yaml` | Your settings |
 | `~/.hermes/.env` | API keys and secrets |
-| `~/.hermes/auth.json` | OAuth provider credentials (managed by `hermes login`) |
+| `~/.hermes/auth.json` | OAuth provider credentials (managed by `hermes model`) |
 | `~/.hermes/cron/` | Scheduled jobs data |
 | `~/.hermes/sessions/` | Gateway session data |
 | `~/.hermes/hermes-agent/` | Installation directory |
@@ -1666,7 +1669,7 @@ hermes config    # View current settings
 Common issues:
 - **"API key not set"**: Run `hermes setup` or `hermes config set OPENROUTER_API_KEY your_key`
 - **"hermes: command not found"**: Reload your shell (`source ~/.bashrc`) or check PATH
-- **"Run `hermes login` to re-authenticate"**: Your Nous Portal session expired. Run `hermes login` to refresh.
+- **"Run `hermes setup` to re-authenticate"**: Your Nous Portal session expired. Run `hermes setup` or `hermes model` to refresh.
 - **"No active paid subscription"**: Your Nous Portal account needs an active subscription for inference.
 - **Gateway won't start**: Check `hermes gateway status` and logs
 - **Missing config after update**: Run `hermes config check` to see what's new, then `hermes config migrate` to add missing options
diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 04ac41a564..4fb879414e 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -8,7 +8,9 @@ Resolution order for text tasks:
   1. OpenRouter  (OPENROUTER_API_KEY)
   2. Nous Portal (~/.hermes/auth.json active provider)
   3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY)
-  4. None
+  4. Codex OAuth (Responses API via chatgpt.com with gpt-5.3-codex,
+     wrapped to look like a chat.completions client)
+  5. None
 
 Resolution order for vision/multimodal tasks:
   1. OpenRouter
@@ -20,7 +22,8 @@ import json
 import logging
 import os
 from pathlib import Path
-from typing import Optional, Tuple
+from types import SimpleNamespace
+from typing import Any, Dict, List, Optional, Tuple
 
 from openai import OpenAI
 
@@ -49,6 +52,188 @@ _NOUS_MODEL = "gemini-3-flash"
 _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
 _AUTH_JSON_PATH = Path.home() / ".hermes" / "auth.json"
 
+# Codex fallback: uses the Responses API (the only endpoint the Codex
+# OAuth token can access) with a fast model for auxiliary tasks.
+_CODEX_AUX_MODEL = "gpt-5.3-codex"
+_CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"
+
+
+# ── Codex Responses → chat.completions adapter ─────────────────────────────
+# All auxiliary consumers call client.chat.completions.create(**kwargs) and
+# read response.choices[0].message.content. This adapter translates those
+# calls to the Codex Responses API so callers don't need any changes.
+
+class _CodexCompletionsAdapter:
+    """Drop-in shim that accepts chat.completions.create() kwargs and
+    routes them through the Codex Responses streaming API."""
+
+    def __init__(self, real_client: OpenAI, model: str):
+        self._client = real_client
+        self._model = model
+
+    def create(self, **kwargs) -> Any:
+        messages = kwargs.get("messages", [])
+        model = kwargs.get("model", self._model)
+        temperature = kwargs.get("temperature")
+
+        # Separate system/instructions from conversation messages
+        instructions = "You are a helpful assistant."
+        input_msgs: List[Dict[str, Any]] = []
+        for msg in messages:
+            role = msg.get("role", "user")
+            content = msg.get("content", "")
+            if role == "system":
+                instructions = content
+            else:
+                input_msgs.append({"role": role, "content": content})
+
+        resp_kwargs: Dict[str, Any] = {
+            "model": model,
+            "instructions": instructions,
+            "input": input_msgs or [{"role": "user", "content": ""}],
+            "stream": True,
+            "store": False,
+        }
+
+        max_tokens = kwargs.get("max_output_tokens") or kwargs.get("max_completion_tokens") or kwargs.get("max_tokens")
+        if max_tokens is not None:
+            resp_kwargs["max_output_tokens"] = int(max_tokens)
+        if temperature is not None:
+            resp_kwargs["temperature"] = temperature
+
+        # Tools support for flush_memories and similar callers
+        tools = kwargs.get("tools")
+        if tools:
+            converted = []
+            for t in tools:
+                fn = t.get("function", {}) if isinstance(t, dict) else {}
+                name = fn.get("name")
+                if not name:
+                    continue
+                converted.append({
+                    "type": "function",
+                    "name": name,
+                    "description": fn.get("description", ""),
+                    "parameters": fn.get("parameters", {}),
+                })
+            if converted:
+                resp_kwargs["tools"] = converted
+
+        # Stream and collect the response
+        text_parts: List[str] = []
+        tool_calls_raw: List[Any] = []
+        usage = None
+
+        try:
+            with self._client.responses.stream(**resp_kwargs) as stream:
+                for _event in stream:
+                    pass
+                final = stream.get_final_response()
+
+            # Extract text and tool calls from the Responses output
+            for item in getattr(final, "output", []):
+                item_type = getattr(item, "type", None)
+                if item_type == "message":
+                    for part in getattr(item, "content", []):
+                        ptype = getattr(part, "type", None)
+                        if ptype in ("output_text", "text"):
+                            text_parts.append(getattr(part, "text", ""))
+                elif item_type == "function_call":
+                    tool_calls_raw.append(SimpleNamespace(
+                        id=getattr(item, "call_id", ""),
+                        type="function",
+                        function=SimpleNamespace(
+                            name=getattr(item, "name", ""),
+                            arguments=getattr(item, "arguments", "{}"),
+                        ),
+                    ))
+
+            resp_usage = getattr(final, "usage", None)
+            if resp_usage:
+                usage = SimpleNamespace(
+                    prompt_tokens=getattr(resp_usage, "input_tokens", 0),
+                    completion_tokens=getattr(resp_usage, "output_tokens", 0),
+                    total_tokens=getattr(resp_usage, "total_tokens", 0),
+                )
+        except Exception as exc:
+            logger.debug("Codex auxiliary Responses API call failed: %s", exc)
+            raise
+
+        content = "".join(text_parts).strip() or None
+
+        # Build a response that looks like chat.completions
+        message = SimpleNamespace(
+            role="assistant",
+            content=content,
+            tool_calls=tool_calls_raw or None,
+        )
+        choice = SimpleNamespace(
+            index=0,
+            message=message,
+            finish_reason="stop" if not tool_calls_raw else "tool_calls",
+        )
+        return SimpleNamespace(
+            choices=[choice],
+            model=model,
+            usage=usage,
+        )
+
+
+class _CodexChatShim:
+    """Wraps the adapter to provide client.chat.completions.create()."""
+
+    def __init__(self, adapter: _CodexCompletionsAdapter):
+        self.completions = adapter
+
+
+class CodexAuxiliaryClient:
+    """OpenAI-client-compatible wrapper that routes through Codex Responses API.
+
+    Consumers can call client.chat.completions.create(**kwargs) as normal.
+    Also exposes .api_key and .base_url for introspection by async wrappers.
+    """
+
+    def __init__(self, real_client: OpenAI, model: str):
+        self._real_client = real_client
+        adapter = _CodexCompletionsAdapter(real_client, model)
+        self.chat = _CodexChatShim(adapter)
+        self.api_key = real_client.api_key
+        self.base_url = real_client.base_url
+
+    def close(self):
+        self._real_client.close()
+
+
+class _AsyncCodexCompletionsAdapter:
+    """Async version of the Codex Responses adapter.
+
+    Wraps the sync adapter via asyncio.to_thread() so async consumers
+    (web_tools, session_search) can await it as normal.
+    """
+
+    def __init__(self, sync_adapter: _CodexCompletionsAdapter):
+        self._sync = sync_adapter
+
+    async def create(self, **kwargs) -> Any:
+        import asyncio
+        return await asyncio.to_thread(self._sync.create, **kwargs)
+
+
+class _AsyncCodexChatShim:
+    def __init__(self, adapter: _AsyncCodexCompletionsAdapter):
+        self.completions = adapter
+
+
+class AsyncCodexAuxiliaryClient:
+    """Async-compatible wrapper matching AsyncOpenAI.chat.completions.create()."""
+
+    def __init__(self, sync_wrapper: "CodexAuxiliaryClient"):
+        sync_adapter = sync_wrapper.chat.completions
+        async_adapter = _AsyncCodexCompletionsAdapter(sync_adapter)
+        self.chat = _AsyncCodexChatShim(async_adapter)
+        self.api_key = sync_wrapper.api_key
+        self.base_url = sync_wrapper.base_url
+
 
 def _read_nous_auth() -> Optional[dict]:
     """Read and validate ~/.hermes/auth.json for an active Nous provider.
@@ -82,12 +267,31 @@ def _nous_base_url() -> str:
     return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)
 
 
+def _read_codex_access_token() -> Optional[str]:
+    """Read a valid Codex OAuth access token from ~/.codex/auth.json."""
+    try:
+        codex_auth = Path.home() / ".codex" / "auth.json"
+        if not codex_auth.is_file():
+            return None
+        data = json.loads(codex_auth.read_text())
+        tokens = data.get("tokens")
+        if not isinstance(tokens, dict):
+            return None
+        access_token = tokens.get("access_token")
+        if isinstance(access_token, str) and access_token.strip():
+            return access_token.strip()
+        return None
+    except Exception as exc:
+        logger.debug("Could not read Codex auth for auxiliary client: %s", exc)
+        return None
+
+
 # ── Public API ──────────────────────────────────────────────────────────────
 
 def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
     """Return (client, model_slug) for text-only auxiliary tasks.
 
-    Falls through OpenRouter -> Nous Portal -> custom endpoint -> (None, None).
+    Falls through OpenRouter -> Nous Portal -> custom endpoint -> Codex OAuth -> (None, None).
     """
     # 1. OpenRouter
     or_key = os.getenv("OPENROUTER_API_KEY")
@@ -115,11 +319,44 @@ def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
         logger.debug("Auxiliary text client: custom endpoint (%s)", model)
         return OpenAI(api_key=custom_key, base_url=custom_base), model
 
-    # 4. Nothing available
+    # 4. Codex OAuth -- uses the Responses API (only endpoint the token
+    # can access), wrapped to look like a chat.completions client.
+    codex_token = _read_codex_access_token()
+    if codex_token:
+        logger.debug("Auxiliary text client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL)
+        real_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL)
+        return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL
+
+    # 5. Nothing available
     logger.debug("Auxiliary text client: none available")
     return None, None
 
 
+def get_async_text_auxiliary_client():
+    """Return (async_client, model_slug) for async consumers.
+
+    For standard providers returns (AsyncOpenAI, model). For Codex returns
+    (AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
+    Returns (None, None) when no provider is available.
+    """
+    from openai import AsyncOpenAI
+
+    sync_client, model = get_text_auxiliary_client()
+    if sync_client is None:
+        return None, None
+
+    if isinstance(sync_client, CodexAuxiliaryClient):
+        return AsyncCodexAuxiliaryClient(sync_client), model
+
+    async_kwargs = {
+        "api_key": sync_client.api_key,
+        "base_url": str(sync_client.base_url),
+    }
+    if "openrouter" in str(sync_client.base_url).lower():
+        async_kwargs["default_headers"] = dict(_OR_HEADERS)
+    return AsyncOpenAI(**async_kwargs), model
+
+
 def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
     """Return (client, model_slug) for vision/multimodal auxiliary tasks.
 
@@ -161,11 +398,12 @@ def auxiliary_max_tokens_param(value: int) -> dict:
     
     OpenRouter and local models use 'max_tokens'. Direct OpenAI with newer
     models (gpt-4o, o-series, gpt-5+) requires 'max_completion_tokens'.
+    The Codex adapter translates max_tokens internally, so we use max_tokens
+    for it as well.
     """
     custom_base = os.getenv("OPENAI_BASE_URL", "")
     or_key = os.getenv("OPENROUTER_API_KEY")
-    # Only use max_completion_tokens when the auxiliary client resolved to
-    # direct OpenAI (no OpenRouter key, no Nous auth, custom endpoint is api.openai.com)
+    # Only use max_completion_tokens for direct OpenAI custom endpoints
     if (not or_key
             and _read_nous_auth() is None
             and "api.openai.com" in custom_base.lower()):
diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index 6f9ce3c01b..034eb8f997 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -31,7 +31,7 @@ class ContextCompressor:
         threshold_percent: float = 0.85,
         protect_first_n: int = 3,
         protect_last_n: int = 4,
-        summary_target_tokens: int = 500,
+        summary_target_tokens: int = 2500,
         quiet_mode: bool = False,
         summary_model_override: str = None,
     ):
diff --git a/cli.py b/cli.py
index b85edc6b7d..16ce554ee2 100755
--- a/cli.py
+++ b/cli.py
@@ -841,12 +841,10 @@ class HermesCLI:
             or os.getenv("OPENROUTER_BASE_URL", CLI_CONFIG["model"]["base_url"])
         )
         self.api_key = api_key or os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY")
-        # Max turns priority: CLI arg > env var > config file (agent.max_turns or root max_turns) > default
-        if max_turns != 60:  # CLI arg was explicitly set
         self._nous_key_expires_at: Optional[str] = None
         self._nous_key_source: Optional[str] = None
-        # Max turns priority: CLI arg > config file > env var > default
-        if max_turns is not None:
+        # Max turns priority: CLI arg > env var > config file (agent.max_turns or root max_turns) > default
+        if max_turns is not None:  # CLI arg was explicitly set
             self.max_turns = max_turns
         elif CLI_CONFIG["agent"].get("max_turns"):
             self.max_turns = CLI_CONFIG["agent"]["max_turns"]
diff --git a/docs/cli.md b/docs/cli.md
index a9257024c3..6c1abc399a 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -12,7 +12,7 @@ hermes
 hermes --model "anthropic/claude-sonnet-4"
 
 # With specific provider
-hermes --provider nous        # Use Nous Portal (requires: hermes login)
+hermes --provider nous        # Use Nous Portal (requires: hermes model)
 hermes --provider openrouter  # Force OpenRouter
 
 # With specific toolsets
@@ -93,7 +93,7 @@ model:
 ```
 
 **Provider selection** (`provider` field):
-- `auto` (default): Uses Nous Portal if logged in (`hermes login`), otherwise falls back to OpenRouter/env vars.
+- `auto` (default): Uses Nous Portal if logged in (`hermes model`), otherwise falls back to OpenRouter/env vars.
 - `openrouter`: Always uses `OPENROUTER_API_KEY` from `.env`.
 - `nous`: Always uses Nous Portal OAuth credentials from `auth.json`.
 
diff --git a/gateway/run.py b/gateway/run.py
index 942c72bbc3..c21cf8b9ec 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -214,17 +214,12 @@ class GatewayRunner:
                 return
 
             from run_agent import AIAgent
-            _flush_api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY", "")
-            _flush_base_url = os.getenv("OPENAI_BASE_URL") or os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
-            _flush_model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL", "anthropic/claude-opus-4.6")
-
-            if not _flush_api_key:
+            runtime_kwargs = _resolve_runtime_agent_kwargs()
+            if not runtime_kwargs.get("api_key"):
                 return
 
             tmp_agent = AIAgent(
-                model=_flush_model,
-                api_key=_flush_api_key,
-                base_url=_flush_base_url,
+                **runtime_kwargs,
                 max_iterations=8,
                 quiet_mode=True,
                 enabled_toolsets=["memory", "skills"],
@@ -979,12 +974,10 @@ class GatewayRunner:
                 if old_history:
                     from run_agent import AIAgent
                     loop = asyncio.get_event_loop()
-                    # Resolve credentials so the flush agent can reach the LLM
-                    _flush_model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
+                    _flush_kwargs = _resolve_runtime_agent_kwargs()
                     def _do_flush():
                         tmp_agent = AIAgent(
-                            model=_flush_model,
-                            **_resolve_runtime_agent_kwargs(),
+                            **_flush_kwargs,
                             max_iterations=5,
                             quiet_mode=True,
                             enabled_toolsets=["memory"],
diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 864916b32f..098b7620cc 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -10,7 +10,7 @@ Architecture:
 - Auth store (auth.json) holds per-provider credential state
 - resolve_provider() picks the active provider via priority chain
 - resolve_*_runtime_credentials() handles token refresh and key minting
-- login_command() / logout_command() are the CLI entry points
+- logout_command() is the CLI entry point for clearing auth
 """
 
 from __future__ import annotations
@@ -127,7 +127,7 @@ def format_auth_error(error: Exception) -> str:
         return str(error)
 
     if error.relogin_required:
-        return f"{error} Run `hermes login` to re-authenticate."
+        return f"{error} Run `hermes model` to re-authenticate."
 
     if error.code == "subscription_required":
         return (
@@ -1172,6 +1172,39 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
     return {"logged_in": False}
 
 
+# =============================================================================
+# External credential detection
+# =============================================================================
+
+def detect_external_credentials() -> List[Dict[str, Any]]:
+    """Scan for credentials from other CLI tools that Hermes can reuse.
+
+    Returns a list of dicts, each with:
+      - provider: str   -- Hermes provider id (e.g. "openai-codex")
+      - path: str       -- filesystem path where creds were found
+      - label: str      -- human-friendly description for the setup UI
+    """
+    found: List[Dict[str, Any]] = []
+
+    # Codex CLI: ~/.codex/auth.json (or $CODEX_HOME/auth.json)
+    try:
+        codex_home = resolve_codex_home_path()
+        codex_auth = codex_home / "auth.json"
+        if codex_auth.is_file():
+            data = json.loads(codex_auth.read_text())
+            tokens = data.get("tokens", {})
+            if isinstance(tokens, dict) and tokens.get("access_token"):
+                found.append({
+                    "provider": "openai-codex",
+                    "path": str(codex_auth),
+                    "label": f"Codex CLI credentials found ({codex_auth})",
+                })
+    except Exception:
+        pass
+
+    return found
+
+
 # =============================================================================
 # CLI Commands — login / logout
 # =============================================================================
@@ -1328,56 +1361,43 @@ def _save_model_choice(model_id: str) -> None:
 
 
 def login_command(args) -> None:
-    """Run OAuth device code login for the selected provider."""
-    provider_id = getattr(args, "provider", None) or "nous"
-
-    if provider_id not in PROVIDER_REGISTRY:
-        print(f"Unknown provider: {provider_id}")
-        print(f"Available: {', '.join(PROVIDER_REGISTRY.keys())}")
-        raise SystemExit(1)
-
-    pconfig = PROVIDER_REGISTRY[provider_id]
-
-    if provider_id == "nous":
-        _login_nous(args, pconfig)
-    elif provider_id == "openai-codex":
-        _login_openai_codex(args, pconfig)
-    else:
-        print(f"Login for provider '{provider_id}' is not yet implemented.")
-        raise SystemExit(1)
+    """Deprecated: use 'hermes model' or 'hermes setup' instead."""
+    print("The 'hermes login' command has been removed.")
+    print("Use 'hermes model' to select a provider and model,")
+    print("or 'hermes setup' for full interactive setup.")
+    raise SystemExit(0)
 
 
 def _login_openai_codex(args, pconfig: ProviderConfig) -> None:
-    """OpenAI Codex login flow using Codex CLI auth state."""
-    codex_path = shutil.which("codex")
-    if not codex_path:
-        print("Codex CLI was not found in PATH.")
-        print("Install Codex CLI, then retry `hermes login --provider openai-codex`.")
-        raise SystemExit(1)
+    """OpenAI Codex login via device code flow (no Codex CLI required)."""
+    codex_home = resolve_codex_home_path()
 
-    print(f"Starting Hermes login via {pconfig.name}...")
-    print(f"Using Codex CLI: {codex_path}")
-    print(f"Codex home: {resolve_codex_home_path()}")
-
-    creds: Dict[str, Any]
+    # Check for existing valid credentials first
     try:
-        creds = resolve_codex_runtime_credentials()
+        existing = resolve_codex_runtime_credentials()
+        print(f"Existing Codex credentials found at {codex_home / 'auth.json'}")
+        try:
+            reuse = input("Use existing credentials? [Y/n]: ").strip().lower()
+        except (EOFError, KeyboardInterrupt):
+            reuse = "y"
+        if reuse in ("", "y", "yes"):
+            creds = existing
+            _save_codex_provider_state(creds)
+            return
     except AuthError:
-        print("No usable Codex auth found. Running `codex login`...")
-        try:
-            subprocess.run(["codex", "login"], check=True)
-        except subprocess.CalledProcessError as exc:
-            print(f"Codex login failed with exit code {exc.returncode}.")
-            raise SystemExit(1)
-        except KeyboardInterrupt:
-            print("\nLogin cancelled.")
-            raise SystemExit(130)
-        try:
-            creds = resolve_codex_runtime_credentials()
-        except AuthError as exc:
-            print(format_auth_error(exc))
-            raise SystemExit(1)
+        pass
 
+    # No existing creds (or user declined) -- run device code flow
+    print()
+    print("Signing in to OpenAI Codex...")
+    print()
+
+    creds = _codex_device_code_login()
+    _save_codex_provider_state(creds)
+
+
+def _save_codex_provider_state(creds: Dict[str, Any]) -> None:
+    """Persist Codex provider state to auth store and config."""
     auth_state = {
         "auth_file": creds.get("auth_file"),
         "codex_home": creds.get("codex_home"),
@@ -1391,13 +1411,170 @@ def _login_openai_codex(args, pconfig: ProviderConfig) -> None:
         _save_provider_state(auth_store, "openai-codex", auth_state)
         saved_to = _save_auth_store(auth_store)
 
-    config_path = _update_config_for_provider("openai-codex", creds["base_url"])
+    config_path = _update_config_for_provider("openai-codex", creds.get("base_url", DEFAULT_CODEX_BASE_URL))
     print()
     print("Login successful!")
     print(f"  Auth state: {saved_to}")
     print(f"  Config updated: {config_path} (model.provider=openai-codex)")
 
 
+def _codex_device_code_login() -> Dict[str, Any]:
+    """Run the OpenAI device code login flow and return credentials dict."""
+    import time as _time
+
+    issuer = "https://auth.openai.com"
+    client_id = CODEX_OAUTH_CLIENT_ID
+
+    # Step 1: Request device code
+    try:
+        with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
+            resp = client.post(
+                f"{issuer}/api/accounts/deviceauth/usercode",
+                json={"client_id": client_id},
+                headers={"Content-Type": "application/json"},
+            )
+    except Exception as exc:
+        raise AuthError(
+            f"Failed to request device code: {exc}",
+            provider="openai-codex", code="device_code_request_failed",
+        )
+
+    if resp.status_code != 200:
+        raise AuthError(
+            f"Device code request returned status {resp.status_code}.",
+            provider="openai-codex", code="device_code_request_error",
+        )
+
+    device_data = resp.json()
+    user_code = device_data.get("user_code", "")
+    device_auth_id = device_data.get("device_auth_id", "")
+    poll_interval = max(3, int(device_data.get("interval", "5")))
+
+    if not user_code or not device_auth_id:
+        raise AuthError(
+            "Device code response missing required fields.",
+            provider="openai-codex", code="device_code_incomplete",
+        )
+
+    # Step 2: Show user the code
+    print("To continue, follow these steps:\n")
+    print(f"  1. Open this URL in your browser:")
+    print(f"     \033[94m{issuer}/codex/device\033[0m\n")
+    print(f"  2. Enter this code:")
+    print(f"     \033[94m{user_code}\033[0m\n")
+    print("Waiting for sign-in... (press Ctrl+C to cancel)")
+
+    # Step 3: Poll for authorization code
+    max_wait = 15 * 60  # 15 minutes
+    start = _time.monotonic()
+    code_resp = None
+
+    try:
+        with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
+            while _time.monotonic() - start < max_wait:
+                _time.sleep(poll_interval)
+                poll_resp = client.post(
+                    f"{issuer}/api/accounts/deviceauth/token",
+                    json={"device_auth_id": device_auth_id, "user_code": user_code},
+                    headers={"Content-Type": "application/json"},
+                )
+
+                if poll_resp.status_code == 200:
+                    code_resp = poll_resp.json()
+                    break
+                elif poll_resp.status_code in (403, 404):
+                    continue  # User hasn't completed login yet
+                else:
+                    raise AuthError(
+                        f"Device auth polling returned status {poll_resp.status_code}.",
+                        provider="openai-codex", code="device_code_poll_error",
+                    )
+    except KeyboardInterrupt:
+        print("\nLogin cancelled.")
+        raise SystemExit(130)
+
+    if code_resp is None:
+        raise AuthError(
+            "Login timed out after 15 minutes.",
+            provider="openai-codex", code="device_code_timeout",
+        )
+
+    # Step 4: Exchange authorization code for tokens
+    authorization_code = code_resp.get("authorization_code", "")
+    code_verifier = code_resp.get("code_verifier", "")
+    redirect_uri = f"{issuer}/deviceauth/callback"
+
+    if not authorization_code or not code_verifier:
+        raise AuthError(
+            "Device auth response missing authorization_code or code_verifier.",
+            provider="openai-codex", code="device_code_incomplete_exchange",
+        )
+
+    try:
+        with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
+            token_resp = client.post(
+                CODEX_OAUTH_TOKEN_URL,
+                data={
+                    "grant_type": "authorization_code",
+                    "code": authorization_code,
+                    "redirect_uri": redirect_uri,
+                    "client_id": client_id,
+                    "code_verifier": code_verifier,
+                },
+                headers={"Content-Type": "application/x-www-form-urlencoded"},
+            )
+    except Exception as exc:
+        raise AuthError(
+            f"Token exchange failed: {exc}",
+            provider="openai-codex", code="token_exchange_failed",
+        )
+
+    if token_resp.status_code != 200:
+        raise AuthError(
+            f"Token exchange returned status {token_resp.status_code}.",
+            provider="openai-codex", code="token_exchange_error",
+        )
+
+    tokens = token_resp.json()
+    access_token = tokens.get("access_token", "")
+    refresh_token = tokens.get("refresh_token", "")
+
+    if not access_token:
+        raise AuthError(
+            "Token exchange did not return an access_token.",
+            provider="openai-codex", code="token_exchange_no_access_token",
+        )
+
+    # Step 5: Persist tokens to ~/.codex/auth.json
+    codex_home = resolve_codex_home_path()
+    codex_home.mkdir(parents=True, exist_ok=True)
+    auth_path = codex_home / "auth.json"
+
+    payload = {
+        "tokens": {
+            "access_token": access_token,
+            "refresh_token": refresh_token,
+        },
+        "last_refresh": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
+    }
+    _persist_codex_auth_payload(auth_path, payload, lock_held=False)
+
+    base_url = (
+        os.getenv("HERMES_CODEX_BASE_URL", "").strip().rstrip("/")
+        or DEFAULT_CODEX_BASE_URL
+    )
+
+    return {
+        "api_key": access_token,
+        "base_url": base_url,
+        "auth_file": str(auth_path),
+        "codex_home": str(codex_home),
+        "last_refresh": payload["last_refresh"],
+        "auth_mode": "chatgpt",
+        "source": "device-code",
+    }
+
+
 def _login_nous(args, pconfig: ProviderConfig) -> None:
     """Nous Portal device authorization flow."""
     portal_base_url = (
@@ -1579,6 +1756,6 @@ def logout_command(args) -> None:
         if os.getenv("OPENROUTER_API_KEY"):
             print("Hermes will use OpenRouter for inference.")
         else:
-            print("Run `hermes login` or configure an API key to use Hermes.")
+            print("Run `hermes model` or configure an API key to use Hermes.")
     else:
         print(f"No auth state found for {provider_name}.")
diff --git a/hermes_cli/codex_models.py b/hermes_cli/codex_models.py
index ed1009c546..75559396f2 100644
--- a/hermes_cli/codex_models.py
+++ b/hermes_cli/codex_models.py
@@ -1,21 +1,62 @@
-"""Codex model discovery from local Codex CLI cache/config."""
+"""Codex model discovery from API, local cache, and config."""
 
 from __future__ import annotations
 
 import json
+import logging
 from pathlib import Path
 from typing import List, Optional
 
 from hermes_cli.auth import resolve_codex_home_path
 
+logger = logging.getLogger(__name__)
+
 DEFAULT_CODEX_MODELS: List[str] = [
-    "gpt-5-codex",
     "gpt-5.3-codex",
     "gpt-5.2-codex",
-    "gpt-5.1-codex",
+    "gpt-5.1-codex-max",
+    "gpt-5.1-codex-mini",
 ]
 
 
+def _fetch_models_from_api(access_token: str) -> List[str]:
+    """Fetch available models from the Codex API. Returns visible models sorted by priority."""
+    try:
+        import httpx
+        resp = httpx.get(
+            "https://chatgpt.com/backend-api/codex/models?client_version=1.0.0",
+            headers={"Authorization": f"Bearer {access_token}"},
+            timeout=10,
+        )
+        if resp.status_code != 200:
+            return []
+        data = resp.json()
+        entries = data.get("models", []) if isinstance(data, dict) else []
+    except Exception as exc:
+        logger.debug("Failed to fetch Codex models from API: %s", exc)
+        return []
+
+    sortable = []
+    for item in entries:
+        if not isinstance(item, dict):
+            continue
+        slug = item.get("slug")
+        if not isinstance(slug, str) or not slug.strip():
+            continue
+        slug = slug.strip()
+        if item.get("supported_in_api") is False:
+            continue
+        visibility = item.get("visibility", "")
+        if isinstance(visibility, str) and visibility.strip().lower() == "hide":
+            continue
+        priority = item.get("priority")
+        rank = int(priority) if isinstance(priority, (int, float)) else 10_000
+        sortable.append((rank, slug))
+
+    sortable.sort(key=lambda x: (x[0], x[1]))
+    return [slug for _, slug in sortable]
+
+
 def _read_default_model(codex_home: Path) -> Optional[str]:
     config_path = codex_home / "config.toml"
     if not config_path.exists():
@@ -72,10 +113,22 @@ def _read_cache_models(codex_home: Path) -> List[str]:
     return deduped
 
 
-def get_codex_model_ids() -> List[str]:
+def get_codex_model_ids(access_token: Optional[str] = None) -> List[str]:
+    """Return available Codex model IDs, trying API first, then local sources.
+    
+    Resolution order: API (live, if token provided) > config.toml default >
+    local cache > hardcoded defaults.
+    """
     codex_home = resolve_codex_home_path()
     ordered: List[str] = []
 
+    # Try live API if we have a token
+    if access_token:
+        api_models = _fetch_models_from_api(access_token)
+        if api_models:
+            return api_models
+
+    # Fall back to local sources
     default_model = _read_default_model(codex_home)
     if default_model:
         ordered.append(default_model)
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index b0965e5472..2bc391aadd 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -12,7 +12,6 @@ Usage:
     hermes gateway install     # Install gateway service
     hermes gateway uninstall   # Uninstall gateway service
     hermes setup               # Interactive setup wizard
-    hermes login               # Authenticate with Nous Portal (or other providers)
     hermes logout              # Clear stored authentication
     hermes status              # Show status of all components
     hermes cron                # Manage cron jobs
@@ -547,7 +546,14 @@ def _model_flow_openai_codex(config, current_model=""):
             print(f"Login failed: {exc}")
             return
 
-    codex_models = get_codex_model_ids()
+    _codex_token = None
+    try:
+        from hermes_cli.auth import resolve_codex_runtime_credentials
+        _codex_creds = resolve_codex_runtime_credentials()
+        _codex_token = _codex_creds.get("api_key")
+    except Exception:
+        pass
+    codex_models = get_codex_model_ids(access_token=_codex_token)
 
     selected = _prompt_model_selection(codex_models, current_model=current_model)
     if selected:
@@ -827,8 +833,8 @@ def cmd_update(args):
             pass  # No systemd (macOS, WSL1, etc.) — skip silently
         
         print()
-        print("Tip: You can now log in with Nous Portal for inference:")
-        print("  hermes login              # Authenticate with Nous Portal")
+        print("Tip: You can now select a provider and model:")
+        print("  hermes model              # Select provider and model")
         
     except subprocess.CalledProcessError as e:
         print(f"✗ Update failed: {e}")
@@ -848,7 +854,6 @@ Examples:
     hermes --continue             Resume the most recent session
     hermes --resume <session_id>  Resume a specific session
     hermes setup                  Run setup wizard
-    hermes login                  Authenticate with an inference provider
     hermes logout                 Clear stored authentication
     hermes model                  Select default model
     hermes config                 View configuration
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index d11cb4b7af..fa4dcebb4f 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -621,11 +621,23 @@ def run_setup_wizard(args):
         format_auth_error, AuthError, fetch_nous_models,
         resolve_nous_runtime_credentials, _update_config_for_provider,
         _login_openai_codex, get_codex_auth_status, DEFAULT_CODEX_BASE_URL,
+        detect_external_credentials,
     )
     existing_custom = get_env_value("OPENAI_BASE_URL")
     existing_or = get_env_value("OPENROUTER_API_KEY")
     active_oauth = get_active_provider()
 
+    # Detect credentials from other CLI tools
+    detected_creds = detect_external_credentials()
+    if detected_creds:
+        print_info("Detected existing credentials:")
+        for cred in detected_creds:
+            if cred["provider"] == "openai-codex":
+                print_success(f"  * {cred['label']} -- select \"OpenAI Codex\" to use it")
+            else:
+                print_info(f"  * {cred['label']}")
+        print()
+
     # Detect if any provider is already configured
     has_any_provider = bool(active_oauth or existing_custom or existing_or)
     
@@ -694,11 +706,11 @@ def run_setup_wizard(args):
 
         except SystemExit:
             print_warning("Nous Portal login was cancelled or failed.")
-            print_info("You can try again later with: hermes login")
+            print_info("You can try again later with: hermes model")
             selected_provider = None
         except Exception as e:
             print_error(f"Login failed: {e}")
-            print_info("You can try again later with: hermes login")
+            print_info("You can try again later with: hermes model")
             selected_provider = None
 
     elif provider_idx == 1:  # OpenAI Codex
@@ -718,11 +730,11 @@ def run_setup_wizard(args):
             _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
         except SystemExit:
             print_warning("OpenAI Codex login was cancelled or failed.")
-            print_info("You can try again later with: hermes login --provider openai-codex")
+            print_info("You can try again later with: hermes model")
             selected_provider = None
         except Exception as e:
             print_error(f"Login failed: {e}")
-            print_info("You can try again later with: hermes login --provider openai-codex")
+            print_info("You can try again later with: hermes model")
             selected_provider = None
 
     elif provider_idx == 2:  # OpenRouter
@@ -834,7 +846,15 @@ def run_setup_wizard(args):
             # else: keep current
         elif selected_provider == "openai-codex":
             from hermes_cli.codex_models import get_codex_model_ids
-            codex_models = get_codex_model_ids()
+            # Try to get the access token for live model discovery
+            _codex_token = None
+            try:
+                from hermes_cli.auth import resolve_codex_runtime_credentials
+                _codex_creds = resolve_codex_runtime_credentials()
+                _codex_token = _codex_creds.get("api_key")
+            except Exception:
+                pass
+            codex_models = get_codex_model_ids(access_token=_codex_token)
             model_choices = [f"{m}" for m in codex_models]
             model_choices.append("Custom model")
             model_choices.append(f"Keep current ({current_model})")
diff --git a/hermes_cli/status.py b/hermes_cli/status.py
index 81b55cab78..f1d3a7edf6 100644
--- a/hermes_cli/status.py
+++ b/hermes_cli/status.py
@@ -111,7 +111,7 @@ def show_status(args):
     nous_logged_in = bool(nous_status.get("logged_in"))
     print(
         f"  {'Nous Portal':<12}  {check_mark(nous_logged_in)} "
-        f"{'logged in' if nous_logged_in else 'not logged in (run: hermes login)'}"
+        f"{'logged in' if nous_logged_in else 'not logged in (run: hermes model)'}"
     )
     if nous_logged_in:
         portal_url = nous_status.get("portal_base_url") or "(unknown)"
@@ -126,7 +126,7 @@ def show_status(args):
     codex_logged_in = bool(codex_status.get("logged_in"))
     print(
         f"  {'OpenAI Codex':<12}  {check_mark(codex_logged_in)} "
-        f"{'logged in' if codex_logged_in else 'not logged in (run: hermes login --provider openai-codex)'}"
+        f"{'logged in' if codex_logged_in else 'not logged in (run: hermes model)'}"
     )
     codex_auth_file = codex_status.get("auth_file")
     if codex_auth_file:
diff --git a/run_agent.py b/run_agent.py
index ec634b7ab9..f025e6b981 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1432,6 +1432,14 @@ class AIAgent:
                 content_text = str(content) if content is not None else ""
 
                 if role == "assistant":
+                    # Replay encrypted reasoning items from previous turns
+                    # so the API can maintain coherent reasoning chains.
+                    codex_reasoning = msg.get("codex_reasoning_items")
+                    if isinstance(codex_reasoning, list):
+                        for ri in codex_reasoning:
+                            if isinstance(ri, dict) and ri.get("encrypted_content"):
+                                items.append(ri)
+
                     if content_text.strip():
                         items.append({"role": "assistant", "content": content_text})
 
@@ -1638,7 +1646,10 @@ class AIAgent:
         if store is not False:
             raise ValueError("Codex Responses contract requires 'store' to be false.")
 
-        allowed_keys = {"model", "instructions", "input", "tools", "store"}
+        allowed_keys = {
+            "model", "instructions", "input", "tools", "store",
+            "reasoning", "include", "max_output_tokens", "temperature",
+        }
         normalized: Dict[str, Any] = {
             "model": model,
             "instructions": instructions,
@@ -1647,6 +1658,22 @@ class AIAgent:
             "store": False,
         }
 
+        # Pass through reasoning config
+        reasoning = api_kwargs.get("reasoning")
+        if isinstance(reasoning, dict):
+            normalized["reasoning"] = reasoning
+        include = api_kwargs.get("include")
+        if isinstance(include, list):
+            normalized["include"] = include
+
+        # Pass through max_output_tokens and temperature
+        max_output_tokens = api_kwargs.get("max_output_tokens")
+        if isinstance(max_output_tokens, (int, float)) and max_output_tokens > 0:
+            normalized["max_output_tokens"] = int(max_output_tokens)
+        temperature = api_kwargs.get("temperature")
+        if isinstance(temperature, (int, float)):
+            normalized["temperature"] = float(temperature)
+
         if allow_stream:
             stream = api_kwargs.get("stream")
             if stream is not None and stream is not True:
@@ -1719,6 +1746,7 @@ class AIAgent:
 
         content_parts: List[str] = []
         reasoning_parts: List[str] = []
+        reasoning_items_raw: List[Dict[str, Any]] = []
         tool_calls: List[Any] = []
         has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
         saw_commentary_phase = False
@@ -1750,6 +1778,16 @@ class AIAgent:
                 reasoning_text = self._extract_responses_reasoning_text(item)
                 if reasoning_text:
                     reasoning_parts.append(reasoning_text)
+                # Capture the full reasoning item for multi-turn continuity.
+                # encrypted_content is an opaque blob the API needs back on
+                # subsequent turns to maintain coherent reasoning chains.
+                encrypted = getattr(item, "encrypted_content", None)
+                if isinstance(encrypted, str) and encrypted:
+                    raw_item = {"type": "reasoning", "encrypted_content": encrypted}
+                    item_id = getattr(item, "id", None)
+                    if isinstance(item_id, str) and item_id:
+                        raw_item["id"] = item_id
+                    reasoning_items_raw.append(raw_item)
             elif item_type == "function_call":
                 if item_status in {"queued", "in_progress", "incomplete"}:
                     continue
@@ -1807,6 +1845,7 @@ class AIAgent:
             reasoning="\n\n".join(reasoning_parts).strip() if reasoning_parts else None,
             reasoning_content=None,
             reasoning_details=None,
+            codex_reasoning_items=reasoning_items_raw or None,
         )
 
         if tool_calls:
@@ -1819,7 +1858,6 @@ class AIAgent:
 
     def _run_codex_stream(self, api_kwargs: dict):
         """Execute one streaming Responses API request and return the final response."""
-        api_kwargs = self._preflight_codex_api_kwargs(api_kwargs, allow_stream=False)
         max_stream_retries = 1
         for attempt in range(max_stream_retries + 1):
             try:
@@ -1971,14 +2009,29 @@ class AIAgent:
             if not instructions:
                 instructions = DEFAULT_AGENT_IDENTITY
 
-            return {
+            kwargs = {
                 "model": self.model,
                 "instructions": instructions,
                 "input": self._chat_messages_to_responses_input(payload_messages),
                 "tools": self._responses_tools(),
                 "store": False,
+                "reasoning": {"effort": "medium", "summary": "auto"},
+                "include": ["reasoning.encrypted_content"],
             }
 
+            # Apply reasoning effort from config if set
+            if self.reasoning_config and isinstance(self.reasoning_config, dict):
+                if self.reasoning_config.get("enabled") is False:
+                    kwargs.pop("reasoning", None)
+                    kwargs["include"] = []
+                elif self.reasoning_config.get("effort"):
+                    kwargs["reasoning"]["effort"] = self.reasoning_config["effort"]
+
+            if self.max_tokens is not None:
+                kwargs["max_output_tokens"] = self.max_tokens
+
+            return kwargs
+
         provider_preferences = {}
         if self.providers_allowed:
             provider_preferences["only"] = self.providers_allowed
@@ -2045,11 +2098,27 @@ class AIAgent:
         }
 
         if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details:
-            msg["reasoning_details"] = [
-                {"type": d.get("type"), "text": d.get("text"), "signature": d.get("signature")}
-                for d in assistant_message.reasoning_details
-                if isinstance(d, dict)
-            ]
+            # Pass reasoning_details back unmodified so providers (OpenRouter,
+            # Anthropic, OpenAI) can maintain reasoning continuity across turns.
+            # Each provider may include opaque fields (signature, encrypted_content)
+            # that must be preserved exactly.
+            raw_details = assistant_message.reasoning_details
+            preserved = []
+            for d in raw_details:
+                if isinstance(d, dict):
+                    preserved.append(d)
+                elif hasattr(d, "__dict__"):
+                    preserved.append(d.__dict__)
+                elif hasattr(d, "model_dump"):
+                    preserved.append(d.model_dump())
+            if preserved:
+                msg["reasoning_details"] = preserved
+
+        # Codex Responses API: preserve encrypted reasoning items for
+        # multi-turn continuity. These get replayed as input on the next turn.
+        codex_items = getattr(assistant_message, "codex_reasoning_items", None)
+        if codex_items:
+            msg["codex_reasoning_items"] = codex_items
 
         if assistant_message.tool_calls:
             tool_calls = []
@@ -2152,40 +2221,68 @@ class AIAgent:
                 messages.pop()  # remove flush msg
                 return
 
-            api_kwargs = {
-                "model": self.model,
-                "messages": api_messages,
-                "tools": [memory_tool_def],
-                "temperature": 0.3,
-                **self._max_tokens_param(1024),
-            }
+            # Use auxiliary client for the flush call when available --
+            # it's cheaper and avoids Codex Responses API incompatibility.
+            from agent.auxiliary_client import get_text_auxiliary_client
+            aux_client, aux_model = get_text_auxiliary_client()
 
-            response = self.client.chat.completions.create(**api_kwargs, timeout=30.0)
+            if aux_client:
+                api_kwargs = {
+                    "model": aux_model,
+                    "messages": api_messages,
+                    "tools": [memory_tool_def],
+                    "temperature": 0.3,
+                    "max_tokens": 5120,
+                }
+                response = aux_client.chat.completions.create(**api_kwargs, timeout=30.0)
+            elif self.api_mode == "codex_responses":
+                # No auxiliary client -- use the Codex Responses path directly
+                codex_kwargs = self._build_api_kwargs(api_messages)
+                codex_kwargs["tools"] = self._responses_tools([memory_tool_def])
+                codex_kwargs["temperature"] = 0.3
+                if "max_output_tokens" in codex_kwargs:
+                    codex_kwargs["max_output_tokens"] = 5120
+                response = self._run_codex_stream(codex_kwargs)
+            else:
+                api_kwargs = {
+                    "model": self.model,
+                    "messages": api_messages,
+                    "tools": [memory_tool_def],
+                    "temperature": 0.3,
+                    **self._max_tokens_param(5120),
+                }
+                response = self.client.chat.completions.create(**api_kwargs, timeout=30.0)
 
-            if response.choices:
+            # Extract tool calls from the response, handling both API formats
+            tool_calls = []
+            if self.api_mode == "codex_responses" and not aux_client:
+                assistant_msg, _ = self._normalize_codex_response(response)
+                if assistant_msg and assistant_msg.tool_calls:
+                    tool_calls = assistant_msg.tool_calls
+            elif hasattr(response, "choices") and response.choices:
                 assistant_message = response.choices[0].message
                 if assistant_message.tool_calls:
-                    # Execute only memory tool calls
-                    for tc in assistant_message.tool_calls:
-                        if tc.function.name == "memory":
-                            try:
-                                args = json.loads(tc.function.arguments)
-                                flush_target = args.get("target", "memory")
-                                from tools.memory_tool import memory_tool as _memory_tool
-                                result = _memory_tool(
-                                    action=args.get("action"),
-                                    target=flush_target,
-                                    content=args.get("content"),
-                                    old_text=args.get("old_text"),
-                                    store=self._memory_store,
-                                )
-                                # Also send user observations to Honcho when active
-                                if self._honcho and flush_target == "user" and args.get("action") == "add":
-                                    self._honcho_save_user_observation(args.get("content", ""))
-                                if not self.quiet_mode:
-                                    print(f"  🧠 Memory flush: saved to {args.get('target', 'memory')}")
-                            except Exception as e:
-                                logger.debug("Memory flush tool call failed: %s", e)
+                    tool_calls = assistant_message.tool_calls
+
+            for tc in tool_calls:
+                if tc.function.name == "memory":
+                    try:
+                        args = json.loads(tc.function.arguments)
+                        flush_target = args.get("target", "memory")
+                        from tools.memory_tool import memory_tool as _memory_tool
+                        result = _memory_tool(
+                            action=args.get("action"),
+                            target=flush_target,
+                            content=args.get("content"),
+                            old_text=args.get("old_text"),
+                            store=self._memory_store,
+                        )
+                        if self._honcho and flush_target == "user" and args.get("action") == "add":
+                            self._honcho_save_user_observation(args.get("content", ""))
+                        if not self.quiet_mode:
+                            print(f"  🧠 Memory flush: saved to {args.get('target', 'memory')}")
+                    except Exception as e:
+                        logger.debug("Memory flush tool call failed: %s", e)
         except Exception as e:
             logger.debug("Memory flush API call failed: %s", e)
         finally:
@@ -2493,32 +2590,19 @@ class AIAgent:
             if _is_nous:
                 summary_extra_body["tags"] = ["product=hermes-agent"]
 
-            summary_kwargs = {
-                "model": self.model,
-                "messages": api_messages,
-            }
-            if self.max_tokens is not None:
-                summary_kwargs.update(self._max_tokens_param(self.max_tokens))
-            if summary_extra_body:
-                summary_kwargs["extra_body"] = summary_extra_body
-
-            summary_response = self.client.chat.completions.create(**summary_kwargs)
-
-            if summary_response.choices and summary_response.choices[0].message.content:
-                final_response = summary_response.choices[0].message.content
-                if "<think>" in final_response:
-                    final_response = re.sub(r'<think>.*?</think>\s*', '', final_response, flags=re.DOTALL).strip()
-                if final_response:
-                    messages.append({"role": "assistant", "content": final_response})
-                else:
-                    final_response = "I reached the iteration limit and couldn't generate a summary."
+            if self.api_mode == "codex_responses":
+                codex_kwargs = self._build_api_kwargs(api_messages)
+                codex_kwargs["tools"] = None
+                summary_response = self._run_codex_stream(codex_kwargs)
+                assistant_message, _ = self._normalize_codex_response(summary_response)
+                final_response = (assistant_message.content or "").strip() if assistant_message else ""
             else:
                 summary_kwargs = {
                     "model": self.model,
                     "messages": api_messages,
                 }
                 if self.max_tokens is not None:
-                    summary_kwargs["max_tokens"] = self.max_tokens
+                    summary_kwargs.update(self._max_tokens_param(self.max_tokens))
                 if summary_extra_body:
                     summary_kwargs["extra_body"] = summary_extra_body
 
@@ -2526,6 +2610,42 @@ class AIAgent:
 
                 if summary_response.choices and summary_response.choices[0].message.content:
                     final_response = summary_response.choices[0].message.content
+                else:
+                    final_response = ""
+
+            if final_response:
+                if "<think>" in final_response:
+                    final_response = re.sub(r'<think>.*?</think>\s*', '', final_response, flags=re.DOTALL).strip()
+                if final_response:
+                    messages.append({"role": "assistant", "content": final_response})
+                else:
+                    final_response = "I reached the iteration limit and couldn't generate a summary."
+            else:
+                # Retry summary generation
+                if self.api_mode == "codex_responses":
+                    codex_kwargs = self._build_api_kwargs(api_messages)
+                    codex_kwargs["tools"] = None
+                    retry_response = self._run_codex_stream(codex_kwargs)
+                    retry_msg, _ = self._normalize_codex_response(retry_response)
+                    final_response = (retry_msg.content or "").strip() if retry_msg else ""
+                else:
+                    summary_kwargs = {
+                        "model": self.model,
+                        "messages": api_messages,
+                    }
+                    if self.max_tokens is not None:
+                        summary_kwargs["max_tokens"] = self.max_tokens
+                    if summary_extra_body:
+                        summary_kwargs["extra_body"] = summary_extra_body
+
+                    summary_response = self.client.chat.completions.create(**summary_kwargs)
+
+                    if summary_response.choices and summary_response.choices[0].message.content:
+                        final_response = summary_response.choices[0].message.content
+                    else:
+                        final_response = ""
+
+                if final_response:
                     if "<think>" in final_response:
                         final_response = re.sub(r'<think>.*?</think>\s*', '', final_response, flags=re.DOTALL).strip()
                     messages.append({"role": "assistant", "content": final_response})
diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py
new file mode 100644
index 0000000000..efcbce29f2
--- /dev/null
+++ b/tests/agent/test_auxiliary_client.py
@@ -0,0 +1,168 @@
+"""Tests for agent.auxiliary_client resolution chain, especially the Codex fallback."""
+
+import json
+import os
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+from agent.auxiliary_client import (
+    get_text_auxiliary_client,
+    get_vision_auxiliary_client,
+    auxiliary_max_tokens_param,
+    _read_codex_access_token,
+)
+
+
+@pytest.fixture(autouse=True)
+def _clean_env(monkeypatch):
+    """Strip provider env vars so each test starts clean."""
+    for key in (
+        "OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
+        "OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL",
+    ):
+        monkeypatch.delenv(key, raising=False)
+
+
+@pytest.fixture
+def codex_auth_dir(tmp_path, monkeypatch):
+    """Provide a writable ~/.codex/ directory with a valid auth.json."""
+    codex_dir = tmp_path / ".codex"
+    codex_dir.mkdir()
+    auth_file = codex_dir / "auth.json"
+    auth_file.write_text(json.dumps({
+        "tokens": {
+            "access_token": "codex-test-token-abc123",
+            "refresh_token": "codex-refresh-xyz",
+        }
+    }))
+    monkeypatch.setattr(
+        "agent.auxiliary_client._read_codex_access_token",
+        lambda: "codex-test-token-abc123",
+    )
+    return codex_dir
+
+
+class TestReadCodexAccessToken:
+    def test_valid_auth_file(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        auth = codex_dir / "auth.json"
+        auth.write_text(json.dumps({
+            "tokens": {"access_token": "tok-123", "refresh_token": "r-456"}
+        }))
+        with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
+            result = _read_codex_access_token()
+        assert result == "tok-123"
+
+    def test_missing_file_returns_none(self, tmp_path):
+        with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
+            result = _read_codex_access_token()
+        assert result is None
+
+    def test_empty_token_returns_none(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        auth = codex_dir / "auth.json"
+        auth.write_text(json.dumps({"tokens": {"access_token": "  "}}))
+        with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
+            result = _read_codex_access_token()
+        assert result is None
+
+    def test_malformed_json_returns_none(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        (codex_dir / "auth.json").write_text("{bad json")
+        with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
+            result = _read_codex_access_token()
+        assert result is None
+
+    def test_missing_tokens_key_returns_none(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        (codex_dir / "auth.json").write_text(json.dumps({"other": "data"}))
+        with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
+            result = _read_codex_access_token()
+        assert result is None
+
+
+class TestGetTextAuxiliaryClient:
+    """Test the full resolution chain for get_text_auxiliary_client."""
+
+    def test_openrouter_takes_priority(self, monkeypatch, codex_auth_dir):
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = get_text_auxiliary_client()
+        assert model == "google/gemini-3-flash-preview"
+        mock_openai.assert_called_once()
+        call_kwargs = mock_openai.call_args
+        assert call_kwargs.kwargs["api_key"] == "or-key"
+
+    def test_nous_takes_priority_over_codex(self, monkeypatch, codex_auth_dir):
+        with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \
+             patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            mock_nous.return_value = {"access_token": "nous-tok"}
+            client, model = get_text_auxiliary_client()
+        assert model == "gemini-3-flash"
+
+    def test_custom_endpoint_over_codex(self, monkeypatch, codex_auth_dir):
+        monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
+        monkeypatch.setenv("OPENAI_API_KEY", "lm-studio-key")
+        # Override the autouse monkeypatch for codex
+        monkeypatch.setattr(
+            "agent.auxiliary_client._read_codex_access_token",
+            lambda: "codex-test-token-abc123",
+        )
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = get_text_auxiliary_client()
+        assert model == "gpt-4o-mini"
+        call_kwargs = mock_openai.call_args
+        assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"
+
+    def test_codex_fallback_when_nothing_else(self, codex_auth_dir):
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = get_text_auxiliary_client()
+        assert model == "gpt-5.3-codex"
+        # Returns a CodexAuxiliaryClient wrapper, not a raw OpenAI client
+        from agent.auxiliary_client import CodexAuxiliaryClient
+        assert isinstance(client, CodexAuxiliaryClient)
+
+    def test_returns_none_when_nothing_available(self):
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
+            client, model = get_text_auxiliary_client()
+        assert client is None
+        assert model is None
+
+
+class TestCodexNotInVisionClient:
+    """Codex fallback should NOT apply to vision tasks."""
+
+    def test_vision_returns_none_without_openrouter_nous(self):
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None):
+            client, model = get_vision_auxiliary_client()
+        assert client is None
+        assert model is None
+
+
+class TestAuxiliaryMaxTokensParam:
+    def test_codex_fallback_uses_max_tokens(self, monkeypatch):
+        """Codex adapter translates max_tokens internally, so we return max_tokens."""
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client._read_codex_access_token", return_value="tok"):
+            result = auxiliary_max_tokens_param(1024)
+        assert result == {"max_tokens": 1024}
+
+    def test_openrouter_uses_max_tokens(self, monkeypatch):
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        result = auxiliary_max_tokens_param(1024)
+        assert result == {"max_tokens": 1024}
+
+    def test_no_provider_uses_max_tokens(self):
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
+            result = auxiliary_max_tokens_param(1024)
+        assert result == {"max_tokens": 1024}
diff --git a/tests/test_auth_codex_provider.py b/tests/test_auth_codex_provider.py
index de490754c2..7d30768074 100644
--- a/tests/test_auth_codex_provider.py
+++ b/tests/test_auth_codex_provider.py
@@ -185,8 +185,8 @@ def test_login_openai_codex_persists_provider_state(tmp_path, monkeypatch):
     _write_codex_auth(codex_home)
     monkeypatch.setenv("HERMES_HOME", str(hermes_home))
     monkeypatch.setenv("CODEX_HOME", str(codex_home))
-    monkeypatch.setattr("hermes_cli.auth.shutil.which", lambda _: "/usr/local/bin/codex")
-    monkeypatch.setattr("hermes_cli.auth.subprocess.run", lambda *a, **k: None)
+    # Mock input() to accept existing credentials
+    monkeypatch.setattr("builtins.input", lambda _: "y")
 
     _login_openai_codex(SimpleNamespace(), PROVIDER_REGISTRY["openai-codex"])
 
@@ -201,19 +201,10 @@ def test_login_openai_codex_persists_provider_state(tmp_path, monkeypatch):
     assert config["model"]["base_url"] == DEFAULT_CODEX_BASE_URL
 
 
-def test_login_command_defaults_to_nous(monkeypatch):
-    calls = {"nous": 0, "codex": 0}
-
-    def _fake_nous(args, pconfig):
-        calls["nous"] += 1
-
-    def _fake_codex(args, pconfig):
-        calls["codex"] += 1
-
-    monkeypatch.setattr("hermes_cli.auth._login_nous", _fake_nous)
-    monkeypatch.setattr("hermes_cli.auth._login_openai_codex", _fake_codex)
-
-    login_command(SimpleNamespace())
-
-    assert calls["nous"] == 1
-    assert calls["codex"] == 0
+def test_login_command_shows_deprecation(monkeypatch, capsys):
+    """login_command is deprecated and directs users to hermes model."""
+    with pytest.raises(SystemExit) as exc_info:
+        login_command(SimpleNamespace())
+    assert exc_info.value.code == 0
+    captured = capsys.readouterr()
+    assert "hermes model" in captured.out
diff --git a/tests/test_cli_init.py b/tests/test_cli_init.py
new file mode 100644
index 0000000000..90ce05c723
--- /dev/null
+++ b/tests/test_cli_init.py
@@ -0,0 +1,80 @@
+"""Tests for HermesCLI initialization -- catches configuration bugs
+that only manifest at runtime (not in mocked unit tests)."""
+
+import os
+import sys
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+
+def _make_cli(**kwargs):
+    """Create a HermesCLI instance with minimal mocking."""
+    from cli import HermesCLI
+    with patch("cli.get_tool_definitions", return_value=[]):
+        return HermesCLI(**kwargs)
+
+
+class TestMaxTurnsResolution:
+    """max_turns must always resolve to a positive integer, never None."""
+
+    def test_default_max_turns_is_integer(self):
+        cli = _make_cli()
+        assert isinstance(cli.max_turns, int)
+        assert cli.max_turns > 0
+
+    def test_explicit_max_turns_honored(self):
+        cli = _make_cli(max_turns=25)
+        assert cli.max_turns == 25
+
+    def test_none_max_turns_gets_default(self):
+        cli = _make_cli(max_turns=None)
+        assert isinstance(cli.max_turns, int)
+        assert cli.max_turns > 0
+
+    def test_env_var_max_turns(self, monkeypatch):
+        """Env var is used when config file doesn't set max_turns."""
+        monkeypatch.setenv("HERMES_MAX_ITERATIONS", "42")
+        import cli as cli_module
+        original = cli_module.CLI_CONFIG["agent"].get("max_turns")
+        cli_module.CLI_CONFIG["agent"]["max_turns"] = None
+        try:
+            cli_obj = _make_cli()
+            assert cli_obj.max_turns == 42
+        finally:
+            if original is not None:
+                cli_module.CLI_CONFIG["agent"]["max_turns"] = original
+
+    def test_max_turns_never_none_for_agent(self):
+        """The value passed to AIAgent must never be None (causes TypeError in run_conversation)."""
+        cli = _make_cli()
+        assert cli.max_turns is not None
+
+
+class TestVerboseAndToolProgress:
+    def test_default_verbose_is_bool(self):
+        cli = _make_cli()
+        assert isinstance(cli.verbose, bool)
+
+    def test_tool_progress_mode_is_string(self):
+        cli = _make_cli()
+        assert isinstance(cli.tool_progress_mode, str)
+        assert cli.tool_progress_mode in ("off", "new", "all", "verbose")
+
+
+class TestProviderResolution:
+    def test_api_key_is_string_or_none(self):
+        cli = _make_cli()
+        assert cli.api_key is None or isinstance(cli.api_key, str)
+
+    def test_base_url_is_string(self):
+        cli = _make_cli()
+        assert isinstance(cli.base_url, str)
+        assert cli.base_url.startswith("http")
+
+    def test_model_is_string(self):
+        cli = _make_cli()
+        assert isinstance(cli.model, str)
+        assert len(cli.model) > 0
diff --git a/tests/test_codex_execution_paths.py b/tests/test_codex_execution_paths.py
index 13ce5d7acf..ef24f02b53 100644
--- a/tests/test_codex_execution_paths.py
+++ b/tests/test_codex_execution_paths.py
@@ -149,6 +149,11 @@ def test_gateway_run_agent_codex_path_handles_internal_401_refresh(monkeypatch):
     runner._prefill_messages = []
     runner._reasoning_config = None
     runner._running_agents = {}
+    from unittest.mock import MagicMock, AsyncMock
+    runner.hooks = MagicMock()
+    runner.hooks.emit = AsyncMock()
+    runner.hooks.loaded_hooks = []
+    runner._session_db = None
 
     source = SessionSource(
         platform=Platform.LOCAL,
diff --git a/tests/test_external_credential_detection.py b/tests/test_external_credential_detection.py
new file mode 100644
index 0000000000..a1fe2a2f9f
--- /dev/null
+++ b/tests/test_external_credential_detection.py
@@ -0,0 +1,51 @@
+"""Tests for detect_external_credentials() -- Phase 2 credential sync."""
+
+import json
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from hermes_cli.auth import detect_external_credentials
+
+
+class TestDetectCodexCLI:
+    def test_detects_valid_codex_auth(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        auth = codex_dir / "auth.json"
+        auth.write_text(json.dumps({
+            "tokens": {"access_token": "tok-123", "refresh_token": "ref-456"}
+        }))
+        with patch("hermes_cli.auth.resolve_codex_home_path", return_value=codex_dir):
+            result = detect_external_credentials()
+        codex_hits = [c for c in result if c["provider"] == "openai-codex"]
+        assert len(codex_hits) == 1
+        assert "Codex CLI" in codex_hits[0]["label"]
+        assert str(auth) == codex_hits[0]["path"]
+
+    def test_skips_codex_without_access_token(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        (codex_dir / "auth.json").write_text(json.dumps({"tokens": {}}))
+        with patch("hermes_cli.auth.resolve_codex_home_path", return_value=codex_dir):
+            result = detect_external_credentials()
+        assert not any(c["provider"] == "openai-codex" for c in result)
+
+    def test_skips_missing_codex_dir(self, tmp_path):
+        with patch("hermes_cli.auth.resolve_codex_home_path", return_value=tmp_path / "nonexistent"):
+            result = detect_external_credentials()
+        assert not any(c["provider"] == "openai-codex" for c in result)
+
+    def test_skips_malformed_codex_auth(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        (codex_dir / "auth.json").write_text("{bad json")
+        with patch("hermes_cli.auth.resolve_codex_home_path", return_value=codex_dir):
+            result = detect_external_credentials()
+        assert not any(c["provider"] == "openai-codex" for c in result)
+
+    def test_returns_empty_when_nothing_found(self, tmp_path):
+        with patch("hermes_cli.auth.resolve_codex_home_path", return_value=tmp_path / ".codex"):
+            result = detect_external_credentials()
+        assert result == []
diff --git a/tests/test_flush_memories_codex.py b/tests/test_flush_memories_codex.py
new file mode 100644
index 0000000000..22eef5ab03
--- /dev/null
+++ b/tests/test_flush_memories_codex.py
@@ -0,0 +1,225 @@
+"""Tests for flush_memories() working correctly across all provider modes.
+
+Catches the bug where Codex mode called chat.completions.create on a
+Responses-only client, which would fail silently or with a 404.
+"""
+
+import json
+import os
+import sys
+import types
+from types import SimpleNamespace
+from unittest.mock import patch, MagicMock, call
+
+import pytest
+
+sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
+sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
+sys.modules.setdefault("fal_client", types.SimpleNamespace())
+
+import run_agent
+
+
+class _FakeOpenAI:
+    def __init__(self, **kwargs):
+        self.kwargs = kwargs
+        self.api_key = kwargs.get("api_key", "test")
+        self.base_url = kwargs.get("base_url", "http://test")
+
+    def close(self):
+        pass
+
+
+def _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter"):
+    """Build an AIAgent with mocked internals, ready for flush_memories testing."""
+    monkeypatch.setattr(run_agent, "get_tool_definitions", lambda **kw: [
+        {
+            "type": "function",
+            "function": {
+                "name": "memory",
+                "description": "Manage memories.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "action": {"type": "string"},
+                        "target": {"type": "string"},
+                        "content": {"type": "string"},
+                    },
+                },
+            },
+        },
+    ])
+    monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
+    monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)
+
+    agent = run_agent.AIAgent(
+        api_key="test-key",
+        base_url="https://test.example.com/v1",
+        provider=provider,
+        api_mode=api_mode,
+        max_iterations=4,
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+    # Give it a valid memory store
+    agent._memory_store = MagicMock()
+    agent._memory_flush_min_turns = 1
+    agent._user_turn_count = 5
+    return agent
+
+
+def _chat_response_with_memory_call():
+    """Simulated chat completions response with a memory tool call."""
+    return SimpleNamespace(
+        choices=[SimpleNamespace(
+            message=SimpleNamespace(
+                content=None,
+                tool_calls=[SimpleNamespace(
+                    function=SimpleNamespace(
+                        name="memory",
+                        arguments=json.dumps({
+                            "action": "add",
+                            "target": "notes",
+                            "content": "User prefers dark mode.",
+                        }),
+                    ),
+                )],
+            ),
+        )],
+        usage=SimpleNamespace(prompt_tokens=100, completion_tokens=20, total_tokens=120),
+    )
+
+
+class TestFlushMemoriesUsesAuxiliaryClient:
+    """When an auxiliary client is available, flush_memories should use it
+    instead of self.client -- especially critical in Codex mode."""
+
+    def test_flush_uses_auxiliary_when_available(self, monkeypatch):
+        agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")
+
+        mock_aux_client = MagicMock()
+        mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
+
+        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi there"},
+                {"role": "user", "content": "Remember this"},
+            ]
+            with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
+                agent.flush_memories(messages)
+
+        mock_aux_client.chat.completions.create.assert_called_once()
+        call_kwargs = mock_aux_client.chat.completions.create.call_args
+        assert call_kwargs.kwargs.get("model") == "gpt-4o-mini" or call_kwargs[1].get("model") == "gpt-4o-mini"
+
+    def test_flush_uses_main_client_when_no_auxiliary(self, monkeypatch):
+        """Non-Codex mode with no auxiliary falls back to self.client."""
+        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
+        agent.client = MagicMock()
+        agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
+
+        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)):
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi there"},
+                {"role": "user", "content": "Save this"},
+            ]
+            with patch("tools.memory_tool.memory_tool", return_value="Saved."):
+                agent.flush_memories(messages)
+
+        agent.client.chat.completions.create.assert_called_once()
+
+    def test_flush_executes_memory_tool_calls(self, monkeypatch):
+        """Verify that memory tool calls from the flush response actually get executed."""
+        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
+
+        mock_aux_client = MagicMock()
+        mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
+
+        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi"},
+                {"role": "user", "content": "Note this"},
+            ]
+            with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
+                agent.flush_memories(messages)
+
+        mock_memory.assert_called_once()
+        call_kwargs = mock_memory.call_args
+        assert call_kwargs.kwargs["action"] == "add"
+        assert call_kwargs.kwargs["target"] == "notes"
+        assert "dark mode" in call_kwargs.kwargs["content"]
+
+    def test_flush_strips_artifacts_from_messages(self, monkeypatch):
+        """After flush, the flush prompt and any response should be removed from messages."""
+        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
+
+        mock_aux_client = MagicMock()
+        mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
+
+        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi"},
+                {"role": "user", "content": "Remember X"},
+            ]
+            original_len = len(messages)
+            with patch("tools.memory_tool.memory_tool", return_value="Saved."):
+                agent.flush_memories(messages)
+
+        # Messages should not grow from the flush
+        assert len(messages) <= original_len
+        # No flush sentinel should remain
+        for msg in messages:
+            assert "_flush_sentinel" not in msg
+
+
+class TestFlushMemoriesCodexFallback:
+    """When no auxiliary client exists and we're in Codex mode, flush should
+    use the Codex Responses API path instead of chat.completions."""
+
+    def test_codex_mode_no_aux_uses_responses_api(self, monkeypatch):
+        agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")
+
+        codex_response = SimpleNamespace(
+            output=[
+                SimpleNamespace(
+                    type="function_call",
+                    call_id="call_1",
+                    name="memory",
+                    arguments=json.dumps({
+                        "action": "add",
+                        "target": "notes",
+                        "content": "Codex flush test",
+                    }),
+                ),
+            ],
+            usage=SimpleNamespace(input_tokens=50, output_tokens=10, total_tokens=60),
+            status="completed",
+            model="gpt-5-codex",
+        )
+
+        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)), \
+             patch.object(agent, "_run_codex_stream", return_value=codex_response) as mock_stream, \
+             patch.object(agent, "_build_api_kwargs") as mock_build, \
+             patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
+            mock_build.return_value = {
+                "model": "gpt-5-codex",
+                "instructions": "test",
+                "input": [],
+                "tools": [],
+                "max_output_tokens": 4096,
+            }
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi"},
+                {"role": "user", "content": "Save this"},
+            ]
+            agent.flush_memories(messages)
+
+        mock_stream.assert_called_once()
+        mock_memory.assert_called_once()
+        assert mock_memory.call_args.kwargs["content"] == "Codex flush test"
diff --git a/tests/test_provider_parity.py b/tests/test_provider_parity.py
new file mode 100644
index 0000000000..82199ac4c6
--- /dev/null
+++ b/tests/test_provider_parity.py
@@ -0,0 +1,460 @@
+"""Provider parity tests: verify that AIAgent builds correct API kwargs
+and handles responses properly for all supported providers.
+
+Ensures changes to one provider path don't silently break another.
+"""
+
+import json
+import os
+import sys
+import types
+from types import SimpleNamespace
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
+sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
+sys.modules.setdefault("fal_client", types.SimpleNamespace())
+
+from run_agent import AIAgent
+
+
+# ── Helpers ──────────────────────────────────────────────────────────────────
+
+def _tool_defs(*names):
+    return [
+        {
+            "type": "function",
+            "function": {
+                "name": n,
+                "description": f"{n} tool",
+                "parameters": {"type": "object", "properties": {}},
+            },
+        }
+        for n in names
+    ]
+
+
+class _FakeOpenAI:
+    def __init__(self, **kw):
+        self.api_key = kw.get("api_key", "test")
+        self.base_url = kw.get("base_url", "http://test")
+    def close(self):
+        pass
+
+
+def _make_agent(monkeypatch, provider, api_mode="chat_completions", base_url="https://openrouter.ai/api/v1"):
+    monkeypatch.setattr("run_agent.get_tool_definitions", lambda **kw: _tool_defs("web_search", "terminal"))
+    monkeypatch.setattr("run_agent.check_toolset_requirements", lambda: {})
+    monkeypatch.setattr("run_agent.OpenAI", _FakeOpenAI)
+    return AIAgent(
+        api_key="test-key",
+        base_url=base_url,
+        provider=provider,
+        api_mode=api_mode,
+        max_iterations=4,
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+
+
+# ── _build_api_kwargs tests ─────────────────────────────────────────────────
+
+class TestBuildApiKwargsOpenRouter:
+    def test_uses_chat_completions_format(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "messages" in kwargs
+        assert "model" in kwargs
+        assert kwargs["messages"][-1]["content"] == "hi"
+
+    def test_includes_reasoning_in_extra_body(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        extra = kwargs.get("extra_body", {})
+        assert "reasoning" in extra
+        assert extra["reasoning"]["enabled"] is True
+
+    def test_includes_tools(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "tools" in kwargs
+        tool_names = [t["function"]["name"] for t in kwargs["tools"]]
+        assert "web_search" in tool_names
+
+    def test_no_responses_api_fields(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "input" not in kwargs
+        assert "instructions" not in kwargs
+        assert "store" not in kwargs
+
+
+class TestBuildApiKwargsNousPortal:
+    def test_includes_nous_product_tags(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        extra = kwargs.get("extra_body", {})
+        assert extra.get("tags") == ["product=hermes-agent"]
+
+    def test_uses_chat_completions_format(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "messages" in kwargs
+        assert "input" not in kwargs
+
+
+class TestBuildApiKwargsCustomEndpoint:
+    def test_uses_chat_completions_format(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "custom", base_url="http://localhost:1234/v1")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "messages" in kwargs
+        assert "input" not in kwargs
+
+    def test_no_openrouter_extra_body(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "custom", base_url="http://localhost:1234/v1")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        extra = kwargs.get("extra_body", {})
+        assert "reasoning" not in extra
+
+
+class TestBuildApiKwargsCodex:
+    def test_uses_responses_api_format(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "input" in kwargs
+        assert "instructions" in kwargs
+        assert "messages" not in kwargs
+        assert kwargs["store"] is False
+
+    def test_includes_reasoning_config(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "reasoning" in kwargs
+        assert kwargs["reasoning"]["effort"] == "medium"
+
+    def test_includes_encrypted_content_in_include(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "reasoning.encrypted_content" in kwargs.get("include", [])
+
+    def test_tools_converted_to_responses_format(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        tools = kwargs.get("tools", [])
+        assert len(tools) > 0
+        # Responses format has "name" at top level, not nested under "function"
+        assert "name" in tools[0]
+        assert "function" not in tools[0]
+
+
+# ── Message conversion tests ────────────────────────────────────────────────
+
+class TestChatMessagesToResponsesInput:
+    """Verify _chat_messages_to_responses_input for Codex mode."""
+
+    def test_user_message_passes_through(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "user", "content": "hello"}]
+        items = agent._chat_messages_to_responses_input(messages)
+        assert items == [{"role": "user", "content": "hello"}]
+
+    def test_system_messages_filtered(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [
+            {"role": "system", "content": "be helpful"},
+            {"role": "user", "content": "hello"},
+        ]
+        items = agent._chat_messages_to_responses_input(messages)
+        assert len(items) == 1
+        assert items[0]["role"] == "user"
+
+    def test_assistant_tool_calls_become_function_call_items(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [{
+                "id": "call_abc",
+                "call_id": "call_abc",
+                "function": {"name": "web_search", "arguments": '{"query": "test"}'},
+            }],
+        }]
+        items = agent._chat_messages_to_responses_input(messages)
+        fc_items = [i for i in items if i.get("type") == "function_call"]
+        assert len(fc_items) == 1
+        assert fc_items[0]["name"] == "web_search"
+        assert fc_items[0]["call_id"] == "call_abc"
+
+    def test_tool_results_become_function_call_output(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "tool", "tool_call_id": "call_abc", "content": "result here"}]
+        items = agent._chat_messages_to_responses_input(messages)
+        assert items[0]["type"] == "function_call_output"
+        assert items[0]["call_id"] == "call_abc"
+        assert items[0]["output"] == "result here"
+
+    def test_encrypted_reasoning_replayed(self, monkeypatch):
+        """Encrypted reasoning items from previous turns must be included in input."""
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [
+            {"role": "user", "content": "think about this"},
+            {
+                "role": "assistant",
+                "content": "I thought about it.",
+                "codex_reasoning_items": [
+                    {"type": "reasoning", "id": "rs_abc", "encrypted_content": "gAAAA_test_blob"},
+                ],
+            },
+            {"role": "user", "content": "continue"},
+        ]
+        items = agent._chat_messages_to_responses_input(messages)
+        reasoning_items = [i for i in items if i.get("type") == "reasoning"]
+        assert len(reasoning_items) == 1
+        assert reasoning_items[0]["encrypted_content"] == "gAAAA_test_blob"
+
+    def test_no_reasoning_items_for_non_codex_messages(self, monkeypatch):
+        """Messages without codex_reasoning_items should not inject anything."""
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [
+            {"role": "assistant", "content": "hi"},
+            {"role": "user", "content": "hello"},
+        ]
+        items = agent._chat_messages_to_responses_input(messages)
+        reasoning_items = [i for i in items if i.get("type") == "reasoning"]
+        assert len(reasoning_items) == 0
+
+
+# ── Response normalization tests ─────────────────────────────────────────────
+
+class TestNormalizeCodexResponse:
+    """Verify _normalize_codex_response extracts all fields correctly."""
+
+    def _make_codex_agent(self, monkeypatch):
+        return _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                           base_url="https://chatgpt.com/backend-api/codex")
+
+    def test_text_response(self, monkeypatch):
+        agent = self._make_codex_agent(monkeypatch)
+        response = SimpleNamespace(
+            output=[
+                SimpleNamespace(type="message", status="completed",
+                    content=[SimpleNamespace(type="output_text", text="Hello!")],
+                    phase="final_answer"),
+            ],
+            status="completed",
+        )
+        msg, reason = agent._normalize_codex_response(response)
+        assert msg.content == "Hello!"
+        assert reason == "stop"
+
+    def test_reasoning_summary_extracted(self, monkeypatch):
+        agent = self._make_codex_agent(monkeypatch)
+        response = SimpleNamespace(
+            output=[
+                SimpleNamespace(type="reasoning",
+                    encrypted_content="gAAAA_blob",
+                    summary=[SimpleNamespace(type="summary_text", text="Thinking about math")],
+                    id="rs_123", status=None),
+                SimpleNamespace(type="message", status="completed",
+                    content=[SimpleNamespace(type="output_text", text="42")],
+                    phase="final_answer"),
+            ],
+            status="completed",
+        )
+        msg, reason = agent._normalize_codex_response(response)
+        assert msg.content == "42"
+        assert "math" in msg.reasoning
+        assert reason == "stop"
+
+    def test_encrypted_content_captured(self, monkeypatch):
+        agent = self._make_codex_agent(monkeypatch)
+        response = SimpleNamespace(
+            output=[
+                SimpleNamespace(type="reasoning",
+                    encrypted_content="gAAAA_secret_blob_123",
+                    summary=[SimpleNamespace(type="summary_text", text="Thinking")],
+                    id="rs_456", status=None),
+                SimpleNamespace(type="message", status="completed",
+                    content=[SimpleNamespace(type="output_text", text="done")],
+                    phase="final_answer"),
+            ],
+            status="completed",
+        )
+        msg, reason = agent._normalize_codex_response(response)
+        assert msg.codex_reasoning_items is not None
+        assert len(msg.codex_reasoning_items) == 1
+        assert msg.codex_reasoning_items[0]["encrypted_content"] == "gAAAA_secret_blob_123"
+        assert msg.codex_reasoning_items[0]["id"] == "rs_456"
+
+    def test_no_encrypted_content_when_missing(self, monkeypatch):
+        agent = self._make_codex_agent(monkeypatch)
+        response = SimpleNamespace(
+            output=[
+                SimpleNamespace(type="message", status="completed",
+                    content=[SimpleNamespace(type="output_text", text="no reasoning")],
+                    phase="final_answer"),
+            ],
+            status="completed",
+        )
+        msg, reason = agent._normalize_codex_response(response)
+        assert msg.codex_reasoning_items is None
+
+    def test_tool_calls_extracted(self, monkeypatch):
+        agent = self._make_codex_agent(monkeypatch)
+        response = SimpleNamespace(
+            output=[
+                SimpleNamespace(type="function_call", status="completed",
+                    call_id="call_xyz", name="web_search",
+                    arguments='{"query":"test"}', id="fc_xyz"),
+            ],
+            status="completed",
+        )
+        msg, reason = agent._normalize_codex_response(response)
+        assert reason == "tool_calls"
+        assert len(msg.tool_calls) == 1
+        assert msg.tool_calls[0].function.name == "web_search"
+
+
+# ── Chat completions response handling (OpenRouter/Nous) ─────────────────────
+
+class TestBuildAssistantMessage:
+    """Verify _build_assistant_message works for all provider response formats."""
+
+    def test_openrouter_reasoning_fields(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        msg = SimpleNamespace(
+            content="answer",
+            tool_calls=None,
+            reasoning="I thought about it",
+            reasoning_content=None,
+            reasoning_details=None,
+        )
+        result = agent._build_assistant_message(msg, "stop")
+        assert result["content"] == "answer"
+        assert result["reasoning"] == "I thought about it"
+        assert "codex_reasoning_items" not in result
+
+    def test_openrouter_reasoning_details_preserved_unmodified(self, monkeypatch):
+        """reasoning_details must be passed back exactly as received for
+        multi-turn continuity (OpenRouter, Anthropic, OpenAI all need this)."""
+        agent = _make_agent(monkeypatch, "openrouter")
+        original_detail = {
+            "type": "thinking",
+            "thinking": "deep thoughts here",
+            "signature": "sig123_opaque_blob",
+            "encrypted_content": "some_provider_blob",
+            "extra_field": "should_not_be_dropped",
+        }
+        msg = SimpleNamespace(
+            content="answer",
+            tool_calls=None,
+            reasoning=None,
+            reasoning_content=None,
+            reasoning_details=[original_detail],
+        )
+        result = agent._build_assistant_message(msg, "stop")
+        stored = result["reasoning_details"][0]
+        # ALL fields must survive, not just type/text/signature
+        assert stored["signature"] == "sig123_opaque_blob"
+        assert stored["encrypted_content"] == "some_provider_blob"
+        assert stored["extra_field"] == "should_not_be_dropped"
+        assert stored["thinking"] == "deep thoughts here"
+
+    def test_codex_preserves_encrypted_reasoning(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        msg = SimpleNamespace(
+            content="result",
+            tool_calls=None,
+            reasoning="summary text",
+            reasoning_content=None,
+            reasoning_details=None,
+            codex_reasoning_items=[
+                {"type": "reasoning", "id": "rs_1", "encrypted_content": "gAAAA_blob"},
+            ],
+        )
+        result = agent._build_assistant_message(msg, "stop")
+        assert result["codex_reasoning_items"] == [
+            {"type": "reasoning", "id": "rs_1", "encrypted_content": "gAAAA_blob"},
+        ]
+
+    def test_plain_message_no_codex_items(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        msg = SimpleNamespace(
+            content="simple",
+            tool_calls=None,
+            reasoning=None,
+            reasoning_content=None,
+            reasoning_details=None,
+        )
+        result = agent._build_assistant_message(msg, "stop")
+        assert "codex_reasoning_items" not in result
+
+
+# ── Auxiliary client provider resolution ─────────────────────────────────────
+
+class TestAuxiliaryClientProviderPriority:
+    """Verify auxiliary client resolution doesn't break for any provider."""
+
+    def test_openrouter_always_wins(self, monkeypatch):
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        from agent.auxiliary_client import get_text_auxiliary_client
+        with patch("agent.auxiliary_client.OpenAI") as mock:
+            client, model = get_text_auxiliary_client()
+        assert model == "google/gemini-3-flash-preview"
+        assert "openrouter" in str(mock.call_args.kwargs["base_url"]).lower()
+
+    def test_nous_when_no_openrouter(self, monkeypatch):
+        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+        from agent.auxiliary_client import get_text_auxiliary_client
+        with patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "nous-tok"}), \
+             patch("agent.auxiliary_client.OpenAI") as mock:
+            client, model = get_text_auxiliary_client()
+        assert model == "gemini-3-flash"
+
+    def test_custom_endpoint_when_no_nous(self, monkeypatch):
+        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+        monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
+        monkeypatch.setenv("OPENAI_API_KEY", "local-key")
+        from agent.auxiliary_client import get_text_auxiliary_client
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client.OpenAI") as mock:
+            client, model = get_text_auxiliary_client()
+        assert mock.call_args.kwargs["base_url"] == "http://localhost:1234/v1"
+
+    def test_codex_fallback_last_resort(self, monkeypatch):
+        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+        monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+        from agent.auxiliary_client import get_text_auxiliary_client, CodexAuxiliaryClient
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client._read_codex_access_token", return_value="codex-tok"), \
+             patch("agent.auxiliary_client.OpenAI"):
+            client, model = get_text_auxiliary_client()
+        assert model == "gpt-5.3-codex"
+        assert isinstance(client, CodexAuxiliaryClient)
diff --git a/tests/test_run_agent_codex_responses.py b/tests/test_run_agent_codex_responses.py
index b3d3f552f0..a1e5e817e0 100644
--- a/tests/test_run_agent_codex_responses.py
+++ b/tests/test_run_agent_codex_responses.py
@@ -530,12 +530,27 @@ def test_preflight_codex_api_kwargs_rejects_function_call_output_without_call_id
 def test_preflight_codex_api_kwargs_rejects_unsupported_request_fields(monkeypatch):
     agent = _build_agent(monkeypatch)
     kwargs = _codex_request_kwargs()
-    kwargs["temperature"] = 0
+    kwargs["some_unknown_field"] = "value"
 
     with pytest.raises(ValueError, match="unsupported field"):
         agent._preflight_codex_api_kwargs(kwargs)
 
 
+def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    kwargs = _codex_request_kwargs()
+    kwargs["reasoning"] = {"effort": "high", "summary": "auto"}
+    kwargs["include"] = ["reasoning.encrypted_content"]
+    kwargs["temperature"] = 0.7
+    kwargs["max_output_tokens"] = 4096
+
+    result = agent._preflight_codex_api_kwargs(kwargs)
+    assert result["reasoning"] == {"effort": "high", "summary": "auto"}
+    assert result["include"] == ["reasoning.encrypted_content"]
+    assert result["temperature"] == 0.7
+    assert result["max_output_tokens"] == 4096
+
+
 def test_run_conversation_codex_replay_payload_keeps_call_id(monkeypatch):
     agent = _build_agent(monkeypatch)
     responses = [_codex_tool_call_response(), _codex_message_response("done")]
diff --git a/tools/session_search_tool.py b/tools/session_search_tool.py
index bbba7b3855..b11b79fdae 100644
--- a/tools/session_search_tool.py
+++ b/tools/session_search_tool.py
@@ -24,26 +24,13 @@ from typing import Dict, Any, List, Optional
 
 from openai import AsyncOpenAI, OpenAI
 
-from agent.auxiliary_client import get_text_auxiliary_client
+from agent.auxiliary_client import get_async_text_auxiliary_client
 
-# Resolve the auxiliary client at import time so we have the model slug.
-# We build an AsyncOpenAI from the same credentials for async summarization.
-_aux_client, _SUMMARIZER_MODEL = get_text_auxiliary_client()
-_async_aux_client: AsyncOpenAI | None = None
-if _aux_client is not None:
-    _async_kwargs = {
-        "api_key": _aux_client.api_key,
-        "base_url": str(_aux_client.base_url),
-    }
-    if "openrouter" in str(_aux_client.base_url).lower():
-        _async_kwargs["default_headers"] = {
-            "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
-            "X-OpenRouter-Title": "Hermes Agent",
-                "X-OpenRouter-Categories": "productivity,cli-agent",
-        }
-    _async_aux_client = AsyncOpenAI(**_async_kwargs)
+# Resolve the async auxiliary client at import time so we have the model slug.
+# Handles Codex Responses API adapter transparently.
+_async_aux_client, _SUMMARIZER_MODEL = get_async_text_auxiliary_client()
 MAX_SESSION_CHARS = 100_000
-MAX_SUMMARY_TOKENS = 2000
+MAX_SUMMARY_TOKENS = 10000
 
 
 def _format_timestamp(ts) -> str:
diff --git a/tools/web_tools.py b/tools/web_tools.py
index 7ec08fc02f..541404e6d2 100644
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -48,7 +48,7 @@ import asyncio
 from typing import List, Dict, Any, Optional
 from firecrawl import Firecrawl
 from openai import AsyncOpenAI
-from agent.auxiliary_client import get_text_auxiliary_client
+from agent.auxiliary_client import get_async_text_auxiliary_client
 from tools.debug_helpers import DebugSession
 
 logger = logging.getLogger(__name__)
@@ -67,21 +67,9 @@ def _get_firecrawl_client():
 
 DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000
 
-# Resolve auxiliary text client at module level; build an async wrapper.
-_aux_sync_client, DEFAULT_SUMMARIZER_MODEL = get_text_auxiliary_client()
-_aux_async_client: AsyncOpenAI | None = None
-if _aux_sync_client is not None:
-    _async_kwargs = {
-        "api_key": _aux_sync_client.api_key,
-        "base_url": str(_aux_sync_client.base_url),
-    }
-    if "openrouter" in str(_aux_sync_client.base_url).lower():
-        _async_kwargs["default_headers"] = {
-            "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
-            "X-OpenRouter-Title": "Hermes Agent",
-                "X-OpenRouter-Categories": "productivity,cli-agent",
-        }
-    _aux_async_client = AsyncOpenAI(**_async_kwargs)
+# Resolve async auxiliary client at module level.
+# Handles Codex Responses API adapter transparently.
+_aux_async_client, DEFAULT_SUMMARIZER_MODEL = get_async_text_auxiliary_client()
 
 _debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG")
 
@@ -174,7 +162,7 @@ async def _call_summarizer_llm(
     content: str, 
     context_str: str, 
     model: str, 
-    max_tokens: int = 4000,
+    max_tokens: int = 20000,
     is_chunk: bool = False,
     chunk_info: str = ""
 ) -> Optional[str]:
@@ -306,7 +294,7 @@ async def _process_large_content_chunked(
                 chunk_content, 
                 context_str, 
                 model, 
-                max_tokens=2000,
+                max_tokens=10000,
                 is_chunk=True,
                 chunk_info=chunk_info
             )
@@ -374,7 +362,7 @@ Create a single, unified markdown summary."""
                 {"role": "user", "content": synthesis_prompt}
             ],
             temperature=0.1,
-            **auxiliary_max_tokens_param(4000),
+            **auxiliary_max_tokens_param(20000),
             **({} if not _extra else {"extra_body": _extra}),
         )
         final_summary = response.choices[0].message.content.strip()

From 95b0610f36a62cfcf3100fa046a2eb7c97c6cc00 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sat, 28 Feb 2026 21:56:05 -0800
Subject: [PATCH 20/31] refactor(cli, auth): Add Codex/OpenAI OAuth Support -
 finalized

- Replace `hermes login` with `hermes model` for selecting providers and managing authentication.
- Update documentation and CLI commands to reflect the new provider selection process.
- Introduce a new redaction system for logging sensitive information.
- Enhance Codex model discovery by integrating API fetching and local cache.
- Adjust max turns configuration logic for better clarity and precedence.
- Improve error handling and user feedback during authentication processes.
---
 agent/redact.py | 115 ++++++++++++++++++++++++++++++++++++++++++++++++
 gateway/run.py  |   3 +-
 run_agent.py    |   8 +++-
 3 files changed, 124 insertions(+), 2 deletions(-)
 create mode 100644 agent/redact.py

diff --git a/agent/redact.py b/agent/redact.py
new file mode 100644
index 0000000000..22f1a547fb
--- /dev/null
+++ b/agent/redact.py
@@ -0,0 +1,115 @@
+"""Regex-based secret redaction for logs and tool output.
+
+Applies pattern matching to mask API keys, tokens, and credentials
+before they reach log files, verbose output, or gateway logs.
+
+Short tokens (< 18 chars) are fully masked. Longer tokens preserve
+the first 6 and last 4 characters for debuggability.
+"""
+
+import logging
+import re
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+# Known API key prefixes -- match the prefix + contiguous token chars
+_PREFIX_PATTERNS = [
+    r"sk-[A-Za-z0-9_-]{10,}",           # OpenAI / OpenRouter
+    r"ghp_[A-Za-z0-9]{10,}",            # GitHub PAT (classic)
+    r"github_pat_[A-Za-z0-9_]{10,}",    # GitHub PAT (fine-grained)
+    r"xox[baprs]-[A-Za-z0-9-]{10,}",    # Slack tokens
+    r"AIza[A-Za-z0-9_-]{30,}",          # Google API keys
+    r"pplx-[A-Za-z0-9]{10,}",           # Perplexity
+    r"fal_[A-Za-z0-9_-]{10,}",          # Fal.ai
+    r"fc-[A-Za-z0-9]{10,}",             # Firecrawl
+    r"bb_live_[A-Za-z0-9_-]{10,}",      # BrowserBase
+    r"gAAAA[A-Za-z0-9_=-]{20,}",        # Codex encrypted tokens
+]
+
+# ENV assignment patterns: KEY=value where KEY contains a secret-like name
+_SECRET_ENV_NAMES = r"(?:API_?KEY|TOKEN|SECRET|PASSWORD|PASSWD|CREDENTIAL|AUTH)"
+_ENV_ASSIGN_RE = re.compile(
+    rf"([A-Z_]*{_SECRET_ENV_NAMES}[A-Z_]*)\s*=\s*(['\"]?)(\S+)\2",
+    re.IGNORECASE,
+)
+
+# JSON field patterns: "apiKey": "value", "token": "value", etc.
+_JSON_KEY_NAMES = r"(?:api_?[Kk]ey|token|secret|password|access_token|refresh_token|auth_token|bearer)"
+_JSON_FIELD_RE = re.compile(
+    rf'("{_JSON_KEY_NAMES}")\s*:\s*"([^"]+)"',
+    re.IGNORECASE,
+)
+
+# Authorization headers
+_AUTH_HEADER_RE = re.compile(
+    r"(Authorization:\s*Bearer\s+)(\S+)",
+    re.IGNORECASE,
+)
+
+# Telegram bot tokens: bot<digits>:<token> or <digits>:<alphanum>
+_TELEGRAM_RE = re.compile(
+    r"(bot)?(\d{8,}):([-A-Za-z0-9_]{30,})",
+)
+
+# Compile known prefix patterns into one alternation
+_PREFIX_RE = re.compile(
+    r"(?<![A-Za-z0-9_-])(" + "|".join(_PREFIX_PATTERNS) + r")(?![A-Za-z0-9_-])"
+)
+
+
+def _mask_token(token: str) -> str:
+    """Mask a token, preserving prefix for long tokens."""
+    if len(token) < 18:
+        return "***"
+    return f"{token[:6]}...{token[-4:]}"
+
+
+def redact_sensitive_text(text: str) -> str:
+    """Apply all redaction patterns to a block of text.
+
+    Safe to call on any string -- non-matching text passes through unchanged.
+    """
+    if not text:
+        return text
+
+    # Known prefixes (sk-, ghp_, etc.)
+    text = _PREFIX_RE.sub(lambda m: _mask_token(m.group(1)), text)
+
+    # ENV assignments: OPENAI_API_KEY=sk-abc...
+    def _redact_env(m):
+        name, quote, value = m.group(1), m.group(2), m.group(3)
+        return f"{name}={quote}{_mask_token(value)}{quote}"
+    text = _ENV_ASSIGN_RE.sub(_redact_env, text)
+
+    # JSON fields: "apiKey": "value"
+    def _redact_json(m):
+        key, value = m.group(1), m.group(2)
+        return f'{key}: "{_mask_token(value)}"'
+    text = _JSON_FIELD_RE.sub(_redact_json, text)
+
+    # Authorization headers
+    text = _AUTH_HEADER_RE.sub(
+        lambda m: m.group(1) + _mask_token(m.group(2)),
+        text,
+    )
+
+    # Telegram bot tokens
+    def _redact_telegram(m):
+        prefix = m.group(1) or ""
+        digits = m.group(2)
+        return f"{prefix}{digits}:***"
+    text = _TELEGRAM_RE.sub(_redact_telegram, text)
+
+    return text
+
+
+class RedactingFormatter(logging.Formatter):
+    """Log formatter that redacts secrets from all log messages."""
+
+    def __init__(self, fmt=None, datefmt=None, style='%', **kwargs):
+        super().__init__(fmt, datefmt, style, **kwargs)
+
+    def format(self, record: logging.LogRecord) -> str:
+        original = super().format(record)
+        return redact_sensitive_text(original)
diff --git a/gateway/run.py b/gateway/run.py
index c21cf8b9ec..8d8e29e391 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -2027,7 +2027,8 @@ async def start_gateway(config: Optional[GatewayConfig] = None) -> bool:
         maxBytes=5 * 1024 * 1024,
         backupCount=3,
     )
-    file_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
+    from agent.redact import RedactingFormatter
+    file_handler.setFormatter(RedactingFormatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
     logging.getLogger().addHandler(file_handler)
     logging.getLogger().setLevel(logging.INFO)
 
diff --git a/run_agent.py b/run_agent.py
index f025e6b981..b1e1f1bea6 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -247,13 +247,19 @@ class AIAgent:
         self._use_prompt_caching = is_openrouter and is_claude
         self._cache_ttl = "5m"  # Default 5-minute TTL (1.25x write cost)
         
-        # Configure logging
+        # Configure logging with secret redaction
+        from agent.redact import RedactingFormatter
         if self.verbose_logging:
             logging.basicConfig(
                 level=logging.DEBUG,
                 format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                 datefmt='%H:%M:%S'
             )
+            for handler in logging.getLogger().handlers:
+                handler.setFormatter(RedactingFormatter(
+                    '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+                    datefmt='%H:%M:%S',
+                ))
             # Keep third-party libraries at WARNING level to reduce noise
             # We have our own retry and error logging that's more informative
             logging.getLogger('openai').setLevel(logging.WARNING)

From 70dfec9638ada694580e2a9df6f5ef4b4d664dda Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sat, 28 Feb 2026 21:56:25 -0800
Subject: [PATCH 21/31] test(redact): add sensitive text redaction

- Introduce a new test suite for the `redact_sensitive_text` function, covering various sensitive data formats including API keys, tokens, and environment variables.
- Ensure that sensitive information is properly masked in logs and outputs while non-sensitive data remains unchanged.
- Add tests for different scenarios including JSON fields, authorization headers, and environment variable assignments.
- Implement a redacting formatter for logging to enhance security during log output.
---
 tests/agent/test_redact.py | 173 +++++++++++++++++++++++++++++++++++++
 tools/terminal_tool.py     |   6 +-
 2 files changed, 178 insertions(+), 1 deletion(-)
 create mode 100644 tests/agent/test_redact.py

diff --git a/tests/agent/test_redact.py b/tests/agent/test_redact.py
new file mode 100644
index 0000000000..52e015ca94
--- /dev/null
+++ b/tests/agent/test_redact.py
@@ -0,0 +1,173 @@
+"""Tests for agent.redact -- secret masking in logs and output."""
+
+import logging
+
+import pytest
+
+from agent.redact import redact_sensitive_text, RedactingFormatter
+
+
+class TestKnownPrefixes:
+    def test_openai_sk_key(self):
+        text = "Using key sk-proj-abc123def456ghi789jkl012"
+        result = redact_sensitive_text(text)
+        assert "sk-pro" in result
+        assert "abc123def456" not in result
+        assert "..." in result
+
+    def test_openrouter_sk_key(self):
+        text = "OPENROUTER_API_KEY=sk-or-v1-abcdefghijklmnopqrstuvwxyz1234567890"
+        result = redact_sensitive_text(text)
+        assert "abcdefghijklmnop" not in result
+
+    def test_github_pat_classic(self):
+        result = redact_sensitive_text("token: ghp_abc123def456ghi789jkl")
+        assert "abc123def456" not in result
+
+    def test_github_pat_fine_grained(self):
+        result = redact_sensitive_text("github_pat_abc123def456ghi789jklmno")
+        assert "abc123def456" not in result
+
+    def test_slack_token(self):
+        token = "xoxb-" + "0" * 12 + "-" + "a" * 14
+        result = redact_sensitive_text(token)
+        assert "a" * 14 not in result
+
+    def test_google_api_key(self):
+        result = redact_sensitive_text("AIzaSyB-abc123def456ghi789jklmno012345")
+        assert "abc123def456" not in result
+
+    def test_perplexity_key(self):
+        result = redact_sensitive_text("pplx-abcdef123456789012345")
+        assert "abcdef12345" not in result
+
+    def test_fal_key(self):
+        result = redact_sensitive_text("fal_abc123def456ghi789jkl")
+        assert "abc123def456" not in result
+
+    def test_short_token_fully_masked(self):
+        result = redact_sensitive_text("key=sk-short1234567")
+        assert "***" in result
+
+
+class TestEnvAssignments:
+    def test_export_api_key(self):
+        text = "export OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012"
+        result = redact_sensitive_text(text)
+        assert "OPENAI_API_KEY=" in result
+        assert "abc123def456" not in result
+
+    def test_quoted_value(self):
+        text = 'MY_SECRET_TOKEN="supersecretvalue123456789"'
+        result = redact_sensitive_text(text)
+        assert "MY_SECRET_TOKEN=" in result
+        assert "supersecretvalue" not in result
+
+    def test_non_secret_env_unchanged(self):
+        text = "HOME=/home/user"
+        result = redact_sensitive_text(text)
+        assert result == text
+
+    def test_path_unchanged(self):
+        text = "PATH=/usr/local/bin:/usr/bin"
+        result = redact_sensitive_text(text)
+        assert result == text
+
+
+class TestJsonFields:
+    def test_json_api_key(self):
+        text = '{"apiKey": "sk-proj-abc123def456ghi789jkl012"}'
+        result = redact_sensitive_text(text)
+        assert "abc123def456" not in result
+
+    def test_json_token(self):
+        text = '{"access_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.longtoken.here"}'
+        result = redact_sensitive_text(text)
+        assert "eyJhbGciOiJSUzI1NiIs" not in result
+
+    def test_json_non_secret_unchanged(self):
+        text = '{"name": "John", "model": "gpt-4"}'
+        result = redact_sensitive_text(text)
+        assert result == text
+
+
+class TestAuthHeaders:
+    def test_bearer_token(self):
+        text = "Authorization: Bearer sk-proj-abc123def456ghi789jkl012"
+        result = redact_sensitive_text(text)
+        assert "Authorization: Bearer" in result
+        assert "abc123def456" not in result
+
+    def test_case_insensitive(self):
+        text = "authorization: bearer mytoken123456789012345678"
+        result = redact_sensitive_text(text)
+        assert "mytoken12345" not in result
+
+
+class TestTelegramTokens:
+    def test_bot_token(self):
+        text = "bot123456789:ABCDEfghij-KLMNopqrst_UVWXyz12345"
+        result = redact_sensitive_text(text)
+        assert "ABCDEfghij" not in result
+        assert "123456789:***" in result
+
+    def test_raw_token(self):
+        text = "12345678901:ABCDEfghijKLMNopqrstUVWXyz1234567890"
+        result = redact_sensitive_text(text)
+        assert "ABCDEfghij" not in result
+
+
+class TestPassthrough:
+    def test_empty_string(self):
+        assert redact_sensitive_text("") == ""
+
+    def test_none_returns_none(self):
+        assert redact_sensitive_text(None) is None
+
+    def test_normal_text_unchanged(self):
+        text = "Hello world, this is a normal log message with no secrets."
+        assert redact_sensitive_text(text) == text
+
+    def test_code_unchanged(self):
+        text = "def main():\n    print('hello')\n    return 42"
+        assert redact_sensitive_text(text) == text
+
+    def test_url_without_key_unchanged(self):
+        text = "Connecting to https://api.openai.com/v1/chat/completions"
+        assert redact_sensitive_text(text) == text
+
+
+class TestRedactingFormatter:
+    def test_formats_and_redacts(self):
+        formatter = RedactingFormatter("%(message)s")
+        record = logging.LogRecord(
+            name="test", level=logging.INFO, pathname="", lineno=0,
+            msg="Key is sk-proj-abc123def456ghi789jkl012",
+            args=(), exc_info=None,
+        )
+        result = formatter.format(record)
+        assert "abc123def456" not in result
+        assert "sk-pro" in result
+
+
+class TestPrintenvSimulation:
+    """Simulate what happens when the agent runs `env` or `printenv`."""
+
+    def test_full_env_dump(self):
+        env_dump = """HOME=/home/user
+PATH=/usr/local/bin:/usr/bin
+OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012mno345
+OPENROUTER_API_KEY=sk-or-v1-reallyLongSecretKeyValue12345678
+FIRECRAWL_API_KEY=fc-shortkey123456789012
+TELEGRAM_BOT_TOKEN=bot987654321:ABCDEfghij-KLMNopqrst_UVWXyz12345
+SHELL=/bin/bash
+USER=teknium"""
+        result = redact_sensitive_text(env_dump)
+        # Secrets should be masked
+        assert "abc123def456" not in result
+        assert "reallyLongSecretKey" not in result
+        assert "ABCDEfghij" not in result
+        # Non-secrets should survive
+        assert "HOME=/home/user" in result
+        assert "SHELL=/bin/bash" in result
+        assert "USER=teknium" in result
diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index cb0d9cd4eb..f758768eb9 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -1037,8 +1037,12 @@ def terminal_tool(
                 )
                 output = output[:head_chars] + truncated_notice + output[-tail_chars:]
 
+            # Redact secrets from command output (catches env/printenv leaking keys)
+            from agent.redact import redact_sensitive_text
+            output = redact_sensitive_text(output.strip()) if output else ""
+
             return json.dumps({
-                "output": output.strip() if output else "",
+                "output": output,
                 "exit_code": returncode,
                 "error": None
             }, ensure_ascii=False)

From a7c2b9e280939bc93a533f760400cae459808a42 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sat, 28 Feb 2026 22:49:52 -0800
Subject: [PATCH 22/31] fix(display): enhance memory error detection for tool
 failures

- Implement logic to distinguish between "full" memory errors and actual failures in the `_detect_tool_failure` function.
- Add JSON parsing to identify specific error messages related to memory limits, improving error handling for memory-related tools.
---
 agent/display.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/agent/display.py b/agent/display.py
index 9ef8c5ebc0..6f65c5d356 100644
--- a/agent/display.py
+++ b/agent/display.py
@@ -283,6 +283,15 @@ def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]
             pass
         return False, ""
 
+    # Memory-specific: distinguish "full" from real errors
+    if tool_name == "memory":
+        try:
+            data = json.loads(result)
+            if data.get("success") is False and "exceed the limit" in data.get("error", ""):
+                return True, " [full]"
+        except (json.JSONDecodeError, TypeError, AttributeError):
+            pass
+
     # Generic heuristic for non-terminal tools
     lower = result[:500].lower()
     if '"error"' in lower or '"failed"' in lower or result.startswith("Error"):

From 23d0b7af6a577c5602d0afa427cbbbd849b77891 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sat, 28 Feb 2026 22:49:58 -0800
Subject: [PATCH 23/31] feat(logging): implement persistent error logging for
 tool failures

- Introduce a separate error log for capturing warnings and errors related to tool execution, ensuring detailed inspection of issues post-failure.
- Enhance error handling in the AIAgent class to log exceptions with stack traces for better debugging.
- Add a similar error logging mechanism in the gateway to streamline debugging processes.
---
 gateway/run.py | 10 ++++++++++
 run_agent.py   | 27 ++++++++++++++++++++++++---
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/gateway/run.py b/gateway/run.py
index 8d8e29e391..52af62e95b 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -2032,6 +2032,16 @@ async def start_gateway(config: Optional[GatewayConfig] = None) -> bool:
     logging.getLogger().addHandler(file_handler)
     logging.getLogger().setLevel(logging.INFO)
 
+    # Separate errors-only log for easy debugging
+    error_handler = RotatingFileHandler(
+        log_dir / 'errors.log',
+        maxBytes=2 * 1024 * 1024,
+        backupCount=2,
+    )
+    error_handler.setLevel(logging.WARNING)
+    error_handler.setFormatter(RedactingFormatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
+    logging.getLogger().addHandler(error_handler)
+
     runner = GatewayRunner(config)
     
     # Set up signal handlers
diff --git a/run_agent.py b/run_agent.py
index b1e1f1bea6..8cd3b157b3 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -89,6 +89,7 @@ from agent.prompt_builder import build_skills_system_prompt, build_context_files
 from agent.display import (
     KawaiiSpinner, build_tool_preview as _build_tool_preview,
     get_cute_tool_message as _get_cute_tool_message_impl,
+    _detect_tool_failure,
 )
 from agent.trajectory import (
     convert_scratchpad_to_think, has_incomplete_scratchpad,
@@ -247,8 +248,22 @@ class AIAgent:
         self._use_prompt_caching = is_openrouter and is_claude
         self._cache_ttl = "5m"  # Default 5-minute TTL (1.25x write cost)
         
-        # Configure logging with secret redaction
+        # Persistent error log -- always writes WARNING+ to ~/.hermes/logs/errors.log
+        # so tool failures, API errors, etc. are inspectable after the fact.
         from agent.redact import RedactingFormatter
+        _error_log_dir = Path.home() / ".hermes" / "logs"
+        _error_log_dir.mkdir(parents=True, exist_ok=True)
+        _error_log_path = _error_log_dir / "errors.log"
+        from logging.handlers import RotatingFileHandler
+        _error_file_handler = RotatingFileHandler(
+            _error_log_path, maxBytes=2 * 1024 * 1024, backupCount=2,
+        )
+        _error_file_handler.setLevel(logging.WARNING)
+        _error_file_handler.setFormatter(RedactingFormatter(
+            '%(asctime)s %(levelname)s %(name)s: %(message)s',
+        ))
+        logging.getLogger().addHandler(_error_file_handler)
+
         if self.verbose_logging:
             logging.basicConfig(
                 level=logging.DEBUG,
@@ -2499,7 +2514,7 @@ class AIAgent:
                     _spinner_result = function_result
                 except Exception as tool_error:
                     function_result = f"Error executing tool '{function_name}': {tool_error}"
-                    logger.error("handle_function_call raised for %s: %s", function_name, tool_error)
+                    logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
                 finally:
                     tool_duration = time.time() - tool_start_time
                     cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_spinner_result)
@@ -2509,11 +2524,17 @@ class AIAgent:
                     function_result = handle_function_call(function_name, function_args, effective_task_id)
                 except Exception as tool_error:
                     function_result = f"Error executing tool '{function_name}': {tool_error}"
-                    logger.error("handle_function_call raised for %s: %s", function_name, tool_error)
+                    logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
                 tool_duration = time.time() - tool_start_time
 
             result_preview = function_result[:200] if len(function_result) > 200 else function_result
 
+            # Log tool errors to the persistent error log so [error] tags
+            # in the UI always have a corresponding detailed entry on disk.
+            _is_error_result, _ = _detect_tool_failure(function_name, function_result)
+            if _is_error_result:
+                logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
+
             if self.verbose_logging:
                 logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s")
                 logging.debug(f"Tool result preview: {result_preview}...")

From 1db559829485a01909c949b3a322b4bfc636d561 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sat, 28 Feb 2026 22:57:58 -0800
Subject: [PATCH 24/31] feat(tests): add live integration tests for file
 operations and shell noise filtering

- Introduce a new test suite in `test_file_tools_live.py` to validate file operations and ensure accurate command execution in a real environment.
- Implement assertions to check for shell noise contamination in outputs, enhancing the reliability of command results.
- Create fixtures for setting up a local environment and populating directories with known file contents for comprehensive testing.
- Refactor shell noise handling in `process_registry.py` and `local.py` to support multiple noise patterns, improving output cleanliness.
---
 tests/tools/test_file_tools_live.py | 483 ++++++++++++++++++++++++++++
 tools/environments/local.py         |  24 +-
 tools/process_registry.py           |  18 +-
 3 files changed, 507 insertions(+), 18 deletions(-)
 create mode 100644 tests/tools/test_file_tools_live.py

diff --git a/tests/tools/test_file_tools_live.py b/tests/tools/test_file_tools_live.py
new file mode 100644
index 0000000000..99627b91a3
--- /dev/null
+++ b/tests/tools/test_file_tools_live.py
@@ -0,0 +1,483 @@
+"""Live integration tests for file operations and terminal tools.
+
+These tests run REAL commands through the LocalEnvironment -- no mocks.
+They verify that shell noise is properly filtered, commands actually work,
+and the tool outputs are EXACTLY what the agent would see.
+
+Every test with output validates against a known-good value AND
+asserts zero contamination from shell noise via _assert_clean().
+"""
+
+import json
+import os
+import sys
+from pathlib import Path
+
+import pytest
+
+sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
+
+from tools.environments.local import LocalEnvironment, _clean_shell_noise, _SHELL_NOISE_SUBSTRINGS
+from tools.file_operations import ShellFileOperations
+
+
+# ── Shared noise detection ───────────────────────────────────────────────
+# Every known shell noise pattern. If ANY of these appear in output that
+# isn't explicitly expected, the test fails with a clear message.
+
+_ALL_NOISE_PATTERNS = list(_SHELL_NOISE_SUBSTRINGS) + [
+    "bash: ",
+    "Inappropriate ioctl",
+]
+
+
+def _assert_clean(text: str, context: str = "output"):
+    """Assert text contains zero shell noise contamination."""
+    if not text:
+        return
+    for noise in _ALL_NOISE_PATTERNS:
+        assert noise not in text, (
+            f"Shell noise leaked into {context}: found {noise!r} in:\n"
+            f"{text[:500]}"
+        )
+
+
+# ── Fixtures ─────────────────────────────────────────────────────────────
+
+# Deterministic file content used across tests. Every byte is known,
+# so any unexpected text in results is immediately caught.
+SIMPLE_CONTENT = "alpha\nbravo\ncharlie\n"
+NUMBERED_CONTENT = "\n".join(f"LINE_{i:04d}" for i in range(1, 51)) + "\n"
+SPECIAL_CONTENT = "single 'quotes' and \"doubles\" and $VARS and `backticks` and \\backslash\n"
+MULTIFILE_A = "def func_alpha():\n    return 42\n"
+MULTIFILE_B = "def func_bravo():\n    return 99\n"
+MULTIFILE_C = "nothing relevant here\n"
+
+
+@pytest.fixture
+def env(tmp_path):
+    """A real LocalEnvironment rooted in a temp directory."""
+    return LocalEnvironment(cwd=str(tmp_path), timeout=15)
+
+
+@pytest.fixture
+def ops(env, tmp_path):
+    """ShellFileOperations wired to the real local environment."""
+    return ShellFileOperations(env, cwd=str(tmp_path))
+
+
+@pytest.fixture
+def populated_dir(tmp_path):
+    """A temp directory with known files for search/read tests."""
+    (tmp_path / "alpha.py").write_text(MULTIFILE_A)
+    (tmp_path / "bravo.py").write_text(MULTIFILE_B)
+    (tmp_path / "notes.txt").write_text(MULTIFILE_C)
+    (tmp_path / "data.csv").write_text("col1,col2\n1,2\n3,4\n")
+    return tmp_path
+
+
+# ── _clean_shell_noise unit tests ────────────────────────────────────────
+
+class TestCleanShellNoise:
+    def test_single_noise_line(self):
+        output = "bash: no job control in this shell\nhello world\n"
+        result = _clean_shell_noise(output)
+        assert result == "hello world\n"
+
+    def test_double_noise_lines(self):
+        output = (
+            "bash: cannot set terminal process group (-1): Inappropriate ioctl for device\n"
+            "bash: no job control in this shell\n"
+            "actual output here\n"
+        )
+        result = _clean_shell_noise(output)
+        assert result == "actual output here\n"
+        _assert_clean(result)
+
+    def test_tcsetattr_noise(self):
+        output = (
+            "bash: [12345: 2 (255)] tcsetattr: Inappropriate ioctl for device\n"
+            "real content\n"
+        )
+        result = _clean_shell_noise(output)
+        assert result == "real content\n"
+        _assert_clean(result)
+
+    def test_triple_noise_lines(self):
+        output = (
+            "bash: cannot set terminal process group (-1): Inappropriate ioctl for device\n"
+            "bash: no job control in this shell\n"
+            "bash: [999: 2 (255)] tcsetattr: Inappropriate ioctl for device\n"
+            "clean\n"
+        )
+        result = _clean_shell_noise(output)
+        assert result == "clean\n"
+
+    def test_no_noise_untouched(self):
+        assert _clean_shell_noise("hello\nworld\n") == "hello\nworld\n"
+
+    def test_empty_string(self):
+        assert _clean_shell_noise("") == ""
+
+    def test_only_noise_produces_empty(self):
+        output = "bash: no job control in this shell\n"
+        result = _clean_shell_noise(output)
+        _assert_clean(result)
+
+    def test_noise_in_middle_not_stripped(self):
+        """Only LEADING noise is stripped -- noise in the middle is real output."""
+        output = "real\nbash: no job control in this shell\nmore real\n"
+        result = _clean_shell_noise(output)
+        assert result == output
+
+
+# ── LocalEnvironment.execute() ───────────────────────────────────────────
+
+class TestLocalEnvironmentExecute:
+    def test_echo_exact_output(self, env):
+        result = env.execute("echo DETERMINISTIC_OUTPUT_12345")
+        assert result["returncode"] == 0
+        assert result["output"].strip() == "DETERMINISTIC_OUTPUT_12345"
+        _assert_clean(result["output"])
+
+    def test_printf_no_trailing_newline(self, env):
+        result = env.execute("printf 'exact'")
+        assert result["returncode"] == 0
+        assert result["output"] == "exact"
+        _assert_clean(result["output"])
+
+    def test_exit_code_propagated(self, env):
+        result = env.execute("exit 42")
+        assert result["returncode"] == 42
+
+    def test_stderr_captured_in_output(self, env):
+        result = env.execute("echo STDERR_TEST >&2")
+        assert "STDERR_TEST" in result["output"]
+        _assert_clean(result["output"])
+
+    def test_cwd_respected(self, env, tmp_path):
+        subdir = tmp_path / "subdir_test"
+        subdir.mkdir()
+        result = env.execute("pwd", cwd=str(subdir))
+        assert result["returncode"] == 0
+        assert result["output"].strip() == str(subdir)
+        _assert_clean(result["output"])
+
+    def test_multiline_exact(self, env):
+        result = env.execute("echo AAA; echo BBB; echo CCC")
+        lines = [l for l in result["output"].strip().split("\n") if l.strip()]
+        assert lines == ["AAA", "BBB", "CCC"]
+        _assert_clean(result["output"])
+
+    def test_env_var_home(self, env):
+        result = env.execute("echo $HOME")
+        assert result["returncode"] == 0
+        home = result["output"].strip()
+        assert home == str(Path.home())
+        _assert_clean(result["output"])
+
+    def test_pipe_exact(self, env):
+        result = env.execute("echo 'one two three' | wc -w")
+        assert result["returncode"] == 0
+        assert result["output"].strip() == "3"
+        _assert_clean(result["output"])
+
+    def test_cat_deterministic_content(self, env, tmp_path):
+        f = tmp_path / "det.txt"
+        f.write_text(SIMPLE_CONTENT)
+        result = env.execute(f"cat {f}")
+        assert result["returncode"] == 0
+        assert result["output"] == SIMPLE_CONTENT
+        _assert_clean(result["output"])
+
+
+# ── _has_command ─────────────────────────────────────────────────────────
+
+class TestHasCommand:
+    def test_finds_echo(self, ops):
+        assert ops._has_command("echo") is True
+
+    def test_finds_cat(self, ops):
+        assert ops._has_command("cat") is True
+
+    def test_finds_sed(self, ops):
+        assert ops._has_command("sed") is True
+
+    def test_finds_wc(self, ops):
+        assert ops._has_command("wc") is True
+
+    def test_finds_find(self, ops):
+        assert ops._has_command("find") is True
+
+    def test_missing_command(self, ops):
+        assert ops._has_command("nonexistent_tool_xyz_abc_999") is False
+
+    def test_rg_or_grep_available(self, ops):
+        assert ops._has_command("rg") or ops._has_command("grep"), \
+            "Neither rg nor grep found -- search_files will break"
+
+
+# ── read_file ────────────────────────────────────────────────────────────
+
+class TestReadFile:
+    def test_exact_content(self, ops, tmp_path):
+        f = tmp_path / "exact.txt"
+        f.write_text(SIMPLE_CONTENT)
+        result = ops.read_file(str(f))
+        assert result.error is None
+        # Content has line numbers prepended, check the actual text is there
+        assert "alpha" in result.content
+        assert "bravo" in result.content
+        assert "charlie" in result.content
+        assert result.total_lines == 3
+        _assert_clean(result.content)
+
+    def test_absolute_path(self, ops, tmp_path):
+        f = tmp_path / "abs.txt"
+        f.write_text("ABSOLUTE_PATH_CONTENT\n")
+        result = ops.read_file(str(f))
+        assert result.error is None
+        assert "ABSOLUTE_PATH_CONTENT" in result.content
+        _assert_clean(result.content)
+
+    def test_tilde_expansion(self, ops):
+        test_path = Path.home() / ".hermes_test_tilde_9f8a7b"
+        try:
+            test_path.write_text("TILDE_EXPANSION_OK\n")
+            result = ops.read_file("~/.hermes_test_tilde_9f8a7b")
+            assert result.error is None
+            assert "TILDE_EXPANSION_OK" in result.content
+            _assert_clean(result.content)
+        finally:
+            test_path.unlink(missing_ok=True)
+
+    def test_nonexistent_returns_error(self, ops, tmp_path):
+        result = ops.read_file(str(tmp_path / "ghost.txt"))
+        assert result.error is not None
+
+    def test_pagination_exact_window(self, ops, tmp_path):
+        f = tmp_path / "numbered.txt"
+        f.write_text(NUMBERED_CONTENT)
+        result = ops.read_file(str(f), offset=10, limit=5)
+        assert result.error is None
+        assert "LINE_0010" in result.content
+        assert "LINE_0014" in result.content
+        assert "LINE_0009" not in result.content
+        assert "LINE_0015" not in result.content
+        assert result.total_lines == 50
+        _assert_clean(result.content)
+
+    def test_no_noise_in_content(self, ops, tmp_path):
+        f = tmp_path / "noise_check.txt"
+        f.write_text("ONLY_THIS_CONTENT\n")
+        result = ops.read_file(str(f))
+        assert result.error is None
+        _assert_clean(result.content)
+
+
+# ── write_file ───────────────────────────────────────────────────────────
+
+class TestWriteFile:
+    def test_write_and_verify(self, ops, tmp_path):
+        path = str(tmp_path / "written.txt")
+        result = ops.write_file(path, SIMPLE_CONTENT)
+        assert result.error is None
+        assert result.bytes_written == len(SIMPLE_CONTENT.encode())
+        assert Path(path).read_text() == SIMPLE_CONTENT
+
+    def test_creates_nested_dirs(self, ops, tmp_path):
+        path = str(tmp_path / "a" / "b" / "c" / "deep.txt")
+        result = ops.write_file(path, "DEEP_CONTENT\n")
+        assert result.error is None
+        assert result.dirs_created is True
+        assert Path(path).read_text() == "DEEP_CONTENT\n"
+
+    def test_overwrites_exact(self, ops, tmp_path):
+        path = str(tmp_path / "overwrite.txt")
+        Path(path).write_text("OLD_DATA\n")
+        result = ops.write_file(path, "NEW_DATA\n")
+        assert result.error is None
+        assert Path(path).read_text() == "NEW_DATA\n"
+
+    def test_large_content_via_stdin(self, ops, tmp_path):
+        path = str(tmp_path / "large.txt")
+        content = "X" * 200_000 + "\n"
+        result = ops.write_file(path, content)
+        assert result.error is None
+        assert Path(path).read_text() == content
+
+    def test_special_characters_preserved(self, ops, tmp_path):
+        path = str(tmp_path / "special.txt")
+        result = ops.write_file(path, SPECIAL_CONTENT)
+        assert result.error is None
+        assert Path(path).read_text() == SPECIAL_CONTENT
+
+    def test_roundtrip_read_write(self, ops, tmp_path):
+        """Write -> read back -> verify exact match."""
+        path = str(tmp_path / "roundtrip.txt")
+        ops.write_file(path, SIMPLE_CONTENT)
+        result = ops.read_file(path)
+        assert result.error is None
+        assert "alpha" in result.content
+        assert "charlie" in result.content
+        _assert_clean(result.content)
+
+
+# ── patch_replace ────────────────────────────────────────────────────────
+
+class TestPatchReplace:
+    def test_exact_replacement(self, ops, tmp_path):
+        path = str(tmp_path / "patch.txt")
+        Path(path).write_text("hello world\n")
+        result = ops.patch_replace(path, "world", "earth")
+        assert result.error is None
+        assert Path(path).read_text() == "hello earth\n"
+
+    def test_not_found_error(self, ops, tmp_path):
+        path = str(tmp_path / "patch2.txt")
+        Path(path).write_text("hello\n")
+        result = ops.patch_replace(path, "NONEXISTENT_STRING", "replacement")
+        assert result.error is not None
+        assert "Could not find" in result.error
+
+    def test_multiline_patch(self, ops, tmp_path):
+        path = str(tmp_path / "multi.txt")
+        Path(path).write_text("line1\nline2\nline3\n")
+        result = ops.patch_replace(path, "line2", "REPLACED")
+        assert result.error is None
+        assert Path(path).read_text() == "line1\nREPLACED\nline3\n"
+
+
+# ── search ───────────────────────────────────────────────────────────────
+
+class TestSearch:
+    def test_content_search_finds_exact_match(self, ops, populated_dir):
+        result = ops.search("func_alpha", str(populated_dir), target="content")
+        assert result.error is None
+        assert result.total_count >= 1
+        assert any("func_alpha" in m.content for m in result.matches)
+        for m in result.matches:
+            _assert_clean(m.content)
+            _assert_clean(m.path)
+
+    def test_content_search_no_false_positives(self, ops, populated_dir):
+        result = ops.search("ZZZZZ_NONEXISTENT", str(populated_dir), target="content")
+        assert result.error is None
+        assert result.total_count == 0
+        assert len(result.matches) == 0
+
+    def test_file_search_finds_py_files(self, ops, populated_dir):
+        result = ops.search("*.py", str(populated_dir), target="files")
+        assert result.error is None
+        assert result.total_count >= 2
+        # Verify only expected files appear
+        found_names = set()
+        for f in result.files:
+            name = Path(f).name
+            found_names.add(name)
+            _assert_clean(f)
+        assert "alpha.py" in found_names
+        assert "bravo.py" in found_names
+        assert "notes.txt" not in found_names
+
+    def test_file_search_no_false_file_entries(self, ops, populated_dir):
+        """Every entry in the files list must be a real path, not noise."""
+        result = ops.search("*.py", str(populated_dir), target="files")
+        assert result.error is None
+        for f in result.files:
+            _assert_clean(f)
+            assert Path(f).exists(), f"Search returned non-existent path: {f}"
+
+    def test_content_search_with_glob_filter(self, ops, populated_dir):
+        result = ops.search("return", str(populated_dir), target="content", file_glob="*.py")
+        assert result.error is None
+        for m in result.matches:
+            assert m.path.endswith(".py"), f"Non-py file in results: {m.path}"
+            _assert_clean(m.content)
+            _assert_clean(m.path)
+
+    def test_search_output_has_zero_noise(self, ops, populated_dir):
+        """Dedicated noise check: search must return only real content."""
+        result = ops.search("func", str(populated_dir), target="content")
+        assert result.error is None
+        for m in result.matches:
+            _assert_clean(m.content)
+            _assert_clean(m.path)
+
+
+# ── _expand_path ─────────────────────────────────────────────────────────
+
+class TestExpandPath:
+    def test_tilde_exact(self, ops):
+        result = ops._expand_path("~/test.txt")
+        expected = f"{Path.home()}/test.txt"
+        assert result == expected
+        _assert_clean(result)
+
+    def test_absolute_unchanged(self, ops):
+        assert ops._expand_path("/tmp/test.txt") == "/tmp/test.txt"
+
+    def test_relative_unchanged(self, ops):
+        assert ops._expand_path("relative/path.txt") == "relative/path.txt"
+
+    def test_bare_tilde(self, ops):
+        result = ops._expand_path("~")
+        assert result == str(Path.home())
+        _assert_clean(result)
+
+
+# ── Terminal output cleanliness ──────────────────────────────────────────
+
+class TestTerminalOutputCleanliness:
+    """Every command the agent might run must produce noise-free output."""
+
+    def test_echo(self, env):
+        result = env.execute("echo CLEAN_TEST")
+        assert result["output"].strip() == "CLEAN_TEST"
+        _assert_clean(result["output"])
+
+    def test_cat(self, env, tmp_path):
+        f = tmp_path / "cat_test.txt"
+        f.write_text("CAT_CONTENT_EXACT\n")
+        result = env.execute(f"cat {f}")
+        assert result["output"] == "CAT_CONTENT_EXACT\n"
+        _assert_clean(result["output"])
+
+    def test_ls(self, env, tmp_path):
+        (tmp_path / "file_a.txt").write_text("")
+        (tmp_path / "file_b.txt").write_text("")
+        result = env.execute(f"ls {tmp_path}")
+        _assert_clean(result["output"])
+        assert "file_a.txt" in result["output"]
+        assert "file_b.txt" in result["output"]
+
+    def test_wc(self, env, tmp_path):
+        f = tmp_path / "wc_test.txt"
+        f.write_text("one\ntwo\nthree\n")
+        result = env.execute(f"wc -l < {f}")
+        assert result["output"].strip() == "3"
+        _assert_clean(result["output"])
+
+    def test_head(self, env, tmp_path):
+        f = tmp_path / "head_test.txt"
+        f.write_text(NUMBERED_CONTENT)
+        result = env.execute(f"head -n 3 {f}")
+        expected = "LINE_0001\nLINE_0002\nLINE_0003\n"
+        assert result["output"] == expected
+        _assert_clean(result["output"])
+
+    def test_env_var_expansion(self, env):
+        result = env.execute("echo $HOME")
+        assert result["output"].strip() == str(Path.home())
+        _assert_clean(result["output"])
+
+    def test_command_substitution(self, env):
+        result = env.execute("echo $(echo NESTED)")
+        assert result["output"].strip() == "NESTED"
+        _assert_clean(result["output"])
+
+    def test_command_v_detection(self, env):
+        """This is how _has_command works -- must return clean 'yes'."""
+        result = env.execute("command -v cat >/dev/null 2>&1 && echo 'yes'")
+        assert result["output"].strip() == "yes"
+        _assert_clean(result["output"])
diff --git a/tools/environments/local.py b/tools/environments/local.py
index 6d7e8da3c6..5b70a2707d 100644
--- a/tools/environments/local.py
+++ b/tools/environments/local.py
@@ -11,20 +11,26 @@ from tools.environments.base import BaseEnvironment
 
 # Noise lines emitted by interactive shells when stdin is not a terminal.
 # Filtered from output to keep tool results clean.
-_SHELL_NOISE = frozenset({
+_SHELL_NOISE_SUBSTRINGS = (
+    "bash: cannot set terminal process group",
     "bash: no job control in this shell",
-    "bash: no job control in this shell\n",
     "no job control in this shell",
-    "no job control in this shell\n",
-})
+    "cannot set terminal process group",
+    "tcsetattr: Inappropriate ioctl for device",
+)
 
 
 def _clean_shell_noise(output: str) -> str:
-    """Strip shell startup warnings that leak when using -i without a TTY."""
-    lines = output.split("\n", 2)  # only check first two lines
-    if lines and lines[0].strip() in _SHELL_NOISE:
-        return "\n".join(lines[1:])
-    return output
+    """Strip shell startup warnings that leak when using -i without a TTY.
+
+    Removes all leading lines that match known noise patterns, not just the first.
+    Some environments emit multiple noise lines (e.g. Docker, non-TTY sessions).
+    """
+    lines = output.split("\n")
+    # Strip all leading noise lines
+    while lines and any(noise in lines[0] for noise in _SHELL_NOISE_SUBSTRINGS):
+        lines.pop(0)
+    return "\n".join(lines)
 
 
 class LocalEnvironment(BaseEnvironment):
diff --git a/tools/process_registry.py b/tools/process_registry.py
index bfdb8cd1df..cbc0dd853e 100644
--- a/tools/process_registry.py
+++ b/tools/process_registry.py
@@ -87,13 +87,13 @@ class ProcessRegistry:
       - Cleanup thread (sandbox reaping coordination)
     """
 
-    # Noise lines emitted by interactive shells when stdin is not a terminal.
-    _SHELL_NOISE = frozenset({
+    _SHELL_NOISE_SUBSTRINGS = (
+        "bash: cannot set terminal process group",
         "bash: no job control in this shell",
-        "bash: no job control in this shell\n",
         "no job control in this shell",
-        "no job control in this shell\n",
-    })
+        "cannot set terminal process group",
+        "tcsetattr: Inappropriate ioctl for device",
+    )
 
     def __init__(self):
         self._running: Dict[str, ProcessSession] = {}
@@ -106,10 +106,10 @@ class ProcessRegistry:
     @staticmethod
     def _clean_shell_noise(text: str) -> str:
         """Strip shell startup warnings from the beginning of output."""
-        lines = text.split("\n", 2)
-        if lines and lines[0].strip() in ProcessRegistry._SHELL_NOISE:
-            return "\n".join(lines[1:])
-        return text
+        lines = text.split("\n")
+        while lines and any(noise in lines[0] for noise in ProcessRegistry._SHELL_NOISE_SUBSTRINGS):
+            lines.pop(0)
+        return "\n".join(lines)
 
     # ----- Spawn -----
 

From dd69f16c3e06a069d52a4ee8d44963ea2dcd8dbd Mon Sep 17 00:00:00 2001
From: lila <137614867+cutepawss@users.noreply.github.com>
Date: Sun, 1 Mar 2026 10:18:00 +0300
Subject: [PATCH 25/31] feat(gateway): expose subagent tool calls and thinking
 to user (fixes #169) (#186)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When subagents run via delegate_task, the user now sees real-time
progress instead of silence:

CLI: tree-view activity lines print above the delegation spinner
  🔀 Delegating: research quantum computing
     ├─ 💭 "I'll search for papers first..."
     ├─ 🔍 web_search  "quantum computing"
     ├─ 📖 read_file  "paper.pdf"
     └─ ⠹ working... (18.2s)

Gateway (Telegram/Discord): batched progress summaries sent every
5 tool calls to avoid message spam. Remaining tools flushed on
subagent completion.

Changes:
- agent/display.py: add KawaiiSpinner.print_above() to print
  status lines above an active spinner without disrupting animation.
  Uses captured stdout (self._out) so it works inside the child's
  redirect_stdout(devnull).

- tools/delegate_tool.py: add _build_child_progress_callback()
  that creates a per-child callback relaying tool calls and
  thinking events to the parent's spinner (CLI) or progress
  queue (gateway). Each child gets its own callback instance,
  so parallel subagents don't share state. Includes _flush()
  for gateway batch completion.

- run_agent.py: fire tool_progress_callback with '_thinking'
  event when the model produces text content. Guarded by
  _delegate_depth > 0 so only subagents fire this (prevents
  gateway spam from main agent). REASONING_SCRATCHPAD/think/
  reasoning XML tags are stripped before display.

Tests: 21 new tests covering print_above, callback builder,
thinking relay, SCRATCHPAD filtering, batching, flush, thread
isolation, delegate_depth guard, and prefix handling.
---
 agent/display.py                      |  15 ++
 run_agent.py                          |  18 ++
 tests/agent/test_subagent_progress.py | 367 ++++++++++++++++++++++++++
 tools/delegate_tool.py                | 111 ++++++--
 4 files changed, 489 insertions(+), 22 deletions(-)
 create mode 100644 tests/agent/test_subagent_progress.py

diff --git a/agent/display.py b/agent/display.py
index 6f65c5d356..19acc67d9c 100644
--- a/agent/display.py
+++ b/agent/display.py
@@ -199,6 +199,21 @@ class KawaiiSpinner:
     def update_text(self, new_message: str):
         self.message = new_message
 
+    def print_above(self, text: str):
+        """Print a line above the spinner without disrupting animation.
+
+        Clears the current spinner line, prints the text, and lets the
+        next animation tick redraw the spinner on the line below.
+        Thread-safe: uses the captured stdout reference (self._out).
+        Works inside redirect_stdout(devnull) because _write bypasses
+        sys.stdout and writes to the stdout captured at spinner creation.
+        """
+        if not self.running:
+            self._write(f"  {text}", flush=True)
+            return
+        # Clear spinner line, print text above, spinner redraws on next tick
+        self._write(f"\r\033[K  {text}", flush=True)
+
     def stop(self, final_message: str = None):
         self.running = False
         if self.thread:
diff --git a/run_agent.py b/run_agent.py
index 8cd3b157b3..32b69489f4 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -3312,6 +3312,24 @@ class AIAgent:
                 # Handle assistant response
                 if assistant_message.content and not self.quiet_mode:
                     print(f"{self.log_prefix}🤖 Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}")
+
+                # Notify progress callback of model's thinking (used by subagent
+                # delegation to relay the child's reasoning to the parent display).
+                # Guard: only fire for subagents (_delegate_depth >= 1) to avoid
+                # spamming gateway platforms with the main agent's every thought.
+                if (assistant_message.content and self.tool_progress_callback
+                        and getattr(self, '_delegate_depth', 0) > 0):
+                    _think_text = assistant_message.content.strip()
+                    # Strip reasoning XML tags that shouldn't leak to parent display
+                    _think_text = re.sub(
+                        r'</?(?:REASONING_SCRATCHPAD|think|reasoning)>', '', _think_text
+                    ).strip()
+                    first_line = _think_text.split('\n')[0][:80] if _think_text else ""
+                    if first_line:
+                        try:
+                            self.tool_progress_callback("_thinking", first_line)
+                        except Exception:
+                            pass
                 
                 # Check for incomplete <REASONING_SCRATCHPAD> (opened but never closed)
                 # This means the model ran out of output tokens mid-reasoning — retry up to 2 times
diff --git a/tests/agent/test_subagent_progress.py b/tests/agent/test_subagent_progress.py
new file mode 100644
index 0000000000..0ff4fcb835
--- /dev/null
+++ b/tests/agent/test_subagent_progress.py
@@ -0,0 +1,367 @@
+"""
+Tests for subagent progress relay (issue #169).
+
+Verifies that:
+- KawaiiSpinner.print_above() works with and without active spinner
+- _build_child_progress_callback handles CLI/gateway/no-display paths
+- Thinking events are relayed correctly
+- Parallel callbacks don't share state
+"""
+
+import io
+import sys
+import time
+import threading
+import pytest
+from unittest.mock import MagicMock, patch
+
+from agent.display import KawaiiSpinner
+from tools.delegate_tool import _build_child_progress_callback
+
+
+# =========================================================================
+# KawaiiSpinner.print_above tests
+# =========================================================================
+
+class TestPrintAbove:
+    """Tests for KawaiiSpinner.print_above method."""
+
+    def test_print_above_without_spinner_running(self):
+        """print_above should write to stdout even when spinner is not running."""
+        buf = io.StringIO()
+        spinner = KawaiiSpinner("test")
+        spinner._out = buf  # Redirect to buffer
+        
+        spinner.print_above("hello world")
+        output = buf.getvalue()
+        assert "hello world" in output
+
+    def test_print_above_with_spinner_running(self):
+        """print_above should clear spinner line and print text."""
+        buf = io.StringIO()
+        spinner = KawaiiSpinner("test")
+        spinner._out = buf
+        spinner.running = True  # Pretend spinner is running (don't start thread)
+        
+        spinner.print_above("tool line")
+        output = buf.getvalue()
+        assert "tool line" in output
+        assert "\r\033[K" in output  # Should start with line clear
+
+    def test_print_above_uses_captured_stdout(self):
+        """print_above should use self._out, not sys.stdout.
+        This ensures it works inside redirect_stdout(devnull)."""
+        buf = io.StringIO()
+        spinner = KawaiiSpinner("test")
+        spinner._out = buf
+        
+        # Simulate redirect_stdout(devnull)
+        old_stdout = sys.stdout
+        sys.stdout = io.StringIO()
+        try:
+            spinner.print_above("should go to buf")
+        finally:
+            sys.stdout = old_stdout
+        
+        assert "should go to buf" in buf.getvalue()
+
+
+# =========================================================================
+# _build_child_progress_callback tests
+# =========================================================================
+
+class TestBuildChildProgressCallback:
+    """Tests for child progress callback builder."""
+
+    def test_returns_none_when_no_display(self):
+        """Should return None when parent has no spinner or callback."""
+        parent = MagicMock()
+        parent._delegate_spinner = None
+        parent.tool_progress_callback = None
+        
+        cb = _build_child_progress_callback(0, parent)
+        assert cb is None
+
+    def test_cli_spinner_tool_event(self):
+        """Should print tool line above spinner for CLI path."""
+        buf = io.StringIO()
+        spinner = KawaiiSpinner("delegating")
+        spinner._out = buf
+        spinner.running = True
+        
+        parent = MagicMock()
+        parent._delegate_spinner = spinner
+        parent.tool_progress_callback = None
+        
+        cb = _build_child_progress_callback(0, parent)
+        assert cb is not None
+        
+        cb("web_search", "quantum computing")
+        output = buf.getvalue()
+        assert "web_search" in output
+        assert "quantum computing" in output
+        assert "├─" in output
+
+    def test_cli_spinner_thinking_event(self):
+        """Should print thinking line above spinner for CLI path."""
+        buf = io.StringIO()
+        spinner = KawaiiSpinner("delegating")
+        spinner._out = buf
+        spinner.running = True
+        
+        parent = MagicMock()
+        parent._delegate_spinner = spinner
+        parent.tool_progress_callback = None
+        
+        cb = _build_child_progress_callback(0, parent)
+        cb("_thinking", "I'll search for papers first")
+        
+        output = buf.getvalue()
+        assert "💭" in output
+        assert "search for papers" in output
+
+    def test_gateway_batched_progress(self):
+        """Gateway path should batch tool calls and flush at BATCH_SIZE."""
+        parent = MagicMock()
+        parent._delegate_spinner = None
+        parent_cb = MagicMock()
+        parent.tool_progress_callback = parent_cb
+        
+        cb = _build_child_progress_callback(0, parent)
+        
+        # Send 4 tool calls — shouldn't flush yet (BATCH_SIZE = 5)
+        for i in range(4):
+            cb(f"tool_{i}", f"arg_{i}")
+        parent_cb.assert_not_called()
+        
+        # 5th call should trigger flush
+        cb("tool_4", "arg_4")
+        parent_cb.assert_called_once()
+        call_args = parent_cb.call_args
+        assert "tool_0" in call_args[0][1]
+        assert "tool_4" in call_args[0][1]
+
+    def test_thinking_not_relayed_to_gateway(self):
+        """Thinking events should NOT be sent to gateway (too noisy)."""
+        parent = MagicMock()
+        parent._delegate_spinner = None
+        parent_cb = MagicMock()
+        parent.tool_progress_callback = parent_cb
+        
+        cb = _build_child_progress_callback(0, parent)
+        cb("_thinking", "some reasoning text")
+        
+        parent_cb.assert_not_called()
+
+    def test_parallel_callbacks_independent(self):
+        """Each child's callback should have independent batch state."""
+        parent = MagicMock()
+        parent._delegate_spinner = None
+        parent_cb = MagicMock()
+        parent.tool_progress_callback = parent_cb
+        
+        cb0 = _build_child_progress_callback(0, parent)
+        cb1 = _build_child_progress_callback(1, parent)
+        
+        # Send 3 calls to each — neither should flush (batch size = 5)
+        for i in range(3):
+            cb0(f"tool_{i}")
+            cb1(f"other_{i}")
+        
+        parent_cb.assert_not_called()
+
+    def test_task_index_prefix_in_output(self):
+        """Multi-task mode should show task index prefix."""
+        buf = io.StringIO()
+        spinner = KawaiiSpinner("delegating")
+        spinner._out = buf
+        spinner.running = True
+        
+        parent = MagicMock()
+        parent._delegate_spinner = spinner
+        parent.tool_progress_callback = None
+        
+        # task_index > 0 should add prefix
+        cb = _build_child_progress_callback(2, parent)
+        cb("web_search", "test")
+        
+        output = buf.getvalue()
+        assert "[2]" in output
+
+    def test_task_index_zero_no_prefix(self):
+        """Single task (index 0) should not show index prefix."""
+        buf = io.StringIO()
+        spinner = KawaiiSpinner("delegating")
+        spinner._out = buf
+        spinner.running = True
+        
+        parent = MagicMock()
+        parent._delegate_spinner = spinner
+        parent.tool_progress_callback = None
+        
+        cb = _build_child_progress_callback(0, parent)
+        cb("web_search", "test")
+        
+        output = buf.getvalue()
+        assert "[0]" not in output
+
+
+# =========================================================================
+# Integration: thinking callback in run_agent.py
+# =========================================================================
+
+class TestThinkingCallback:
+    """Tests for the _thinking callback in AIAgent conversation loop."""
+
+    def _simulate_thinking_callback(self, content, callback, delegate_depth=1):
+        """Simulate the exact code path from run_agent.py for the thinking callback.
+        
+        delegate_depth: simulates self._delegate_depth.
+            0 = main agent (should NOT fire), >=1 = subagent (should fire).
+        """
+        import re
+        if (content and callback and delegate_depth > 0):
+            _think_text = content.strip()
+            _think_text = re.sub(
+                r'</?(?:REASONING_SCRATCHPAD|think|reasoning)>', '', _think_text
+            ).strip()
+            first_line = _think_text.split('\n')[0][:80] if _think_text else ""
+            if first_line:
+                try:
+                    callback("_thinking", first_line)
+                except Exception:
+                    pass
+
+    def test_thinking_callback_fires_on_content(self):
+        """tool_progress_callback should receive _thinking event
+        when assistant message has content."""
+        calls = []
+        self._simulate_thinking_callback(
+            "I'll research quantum computing first, then summarize.",
+            lambda name, preview=None: calls.append((name, preview))
+        )
+        assert len(calls) == 1
+        assert calls[0][0] == "_thinking"
+        assert "quantum computing" in calls[0][1]
+
+    def test_thinking_callback_skipped_when_no_content(self):
+        """Should not fire when assistant has no content."""
+        calls = []
+        self._simulate_thinking_callback(
+            None,
+            lambda name, preview=None: calls.append((name, preview))
+        )
+        assert len(calls) == 0
+
+    def test_thinking_callback_truncates_long_content(self):
+        """Should truncate long content to 80 chars."""
+        calls = []
+        self._simulate_thinking_callback(
+            "A" * 200 + "\nSecond line should be ignored",
+            lambda name, preview=None: calls.append((name, preview))
+        )
+        assert len(calls) == 1
+        assert len(calls[0][1]) == 80
+
+    def test_thinking_callback_skipped_for_main_agent(self):
+        """Main agent (delegate_depth=0) should NOT fire thinking events.
+        This prevents gateway spam on Telegram/Discord."""
+        calls = []
+        self._simulate_thinking_callback(
+            "I'll help you with that request.",
+            lambda name, preview=None: calls.append((name, preview)),
+            delegate_depth=0,
+        )
+        assert len(calls) == 0
+
+    def test_thinking_callback_strips_reasoning_scratchpad(self):
+        """REASONING_SCRATCHPAD tags should be stripped before display."""
+        calls = []
+        self._simulate_thinking_callback(
+            "<REASONING_SCRATCHPAD>I need to analyze this carefully</REASONING_SCRATCHPAD>",
+            lambda name, preview=None: calls.append((name, preview))
+        )
+        assert len(calls) == 1
+        assert "<REASONING_SCRATCHPAD>" not in calls[0][1]
+        assert "analyze this carefully" in calls[0][1]
+
+    def test_thinking_callback_strips_think_tags(self):
+        """<think> tags should be stripped before display."""
+        calls = []
+        self._simulate_thinking_callback(
+            "<think>Let me think about this problem</think>",
+            lambda name, preview=None: calls.append((name, preview))
+        )
+        assert len(calls) == 1
+        assert "<think>" not in calls[0][1]
+        assert "think about this problem" in calls[0][1]
+
+    def test_thinking_callback_empty_after_strip(self):
+        """Should not fire when content is only XML tags."""
+        calls = []
+        self._simulate_thinking_callback(
+            "<REASONING_SCRATCHPAD></REASONING_SCRATCHPAD>",
+            lambda name, preview=None: calls.append((name, preview))
+        )
+        assert len(calls) == 0
+
+
+# =========================================================================
+# Gateway batch flush tests
+# =========================================================================
+
+class TestBatchFlush:
+    """Tests for gateway batch flush on subagent completion."""
+
+    def test_flush_sends_remaining_batch(self):
+        """_flush should send remaining tool names to gateway."""
+        parent = MagicMock()
+        parent._delegate_spinner = None
+        parent_cb = MagicMock()
+        parent.tool_progress_callback = parent_cb
+
+        cb = _build_child_progress_callback(0, parent)
+
+        # Send 3 tools (below batch size of 5)
+        cb("web_search", "query1")
+        cb("read_file", "file.txt")
+        cb("write_file", "out.txt")
+        parent_cb.assert_not_called()
+
+        # Flush should send the remaining 3
+        cb._flush()
+        parent_cb.assert_called_once()
+        summary = parent_cb.call_args[0][1]
+        assert "web_search" in summary
+        assert "write_file" in summary
+
+    def test_flush_noop_when_batch_empty(self):
+        """_flush should not send anything when batch is empty."""
+        parent = MagicMock()
+        parent._delegate_spinner = None
+        parent_cb = MagicMock()
+        parent.tool_progress_callback = parent_cb
+
+        cb = _build_child_progress_callback(0, parent)
+        cb._flush()
+        parent_cb.assert_not_called()
+
+    def test_flush_noop_when_no_parent_callback(self):
+        """_flush should not crash when there's no parent callback."""
+        buf = io.StringIO()
+        spinner = KawaiiSpinner("test")
+        spinner._out = buf
+        spinner.running = True
+
+        parent = MagicMock()
+        parent._delegate_spinner = spinner
+        parent.tool_progress_callback = None
+
+        cb = _build_child_progress_callback(0, parent)
+        cb("web_search", "test")
+        cb._flush()  # Should not crash
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
+
diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py
index 77659d3c2d..1f5c64b5b4 100644
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@@ -77,6 +77,84 @@ def _strip_blocked_tools(toolsets: List[str]) -> List[str]:
     return [t for t in toolsets if t not in blocked_toolset_names]
 
 
+def _build_child_progress_callback(task_index: int, parent_agent) -> Optional[callable]:
+    """Build a callback that relays child agent tool calls to the parent display.
+
+    Two display paths:
+      CLI:     prints tree-view lines above the parent's delegation spinner
+      Gateway: batches tool names and relays to parent's progress callback
+
+    Returns None if no display mechanism is available, in which case the
+    child agent runs with no progress callback (identical to current behavior).
+    """
+    spinner = getattr(parent_agent, '_delegate_spinner', None)
+    parent_cb = getattr(parent_agent, 'tool_progress_callback', None)
+
+    if not spinner and not parent_cb:
+        return None  # No display → no callback → zero behavior change
+
+    prefix = f"[{task_index}] " if task_index > 0 else ""
+
+    # Gateway: batch tool names, flush periodically
+    _BATCH_SIZE = 5
+    _batch: List[str] = []
+
+    def _callback(tool_name: str, preview: str = None):
+        # Special "_thinking" event: model produced text content (reasoning)
+        if tool_name == "_thinking":
+            if spinner:
+                short = (preview[:55] + "...") if preview and len(preview) > 55 else (preview or "")
+                try:
+                    spinner.print_above(f" {prefix}├─ 💭 \"{short}\"")
+                except Exception:
+                    pass
+            # Don't relay thinking to gateway (too noisy for chat)
+            return
+
+        # Regular tool call event
+        if spinner:
+            short = (preview[:35] + "...") if preview and len(preview) > 35 else (preview or "")
+            tool_emojis = {
+                "terminal": "💻", "web_search": "🔍", "web_extract": "📄",
+                "read_file": "📖", "write_file": "✍️", "patch": "🔧",
+                "search_files": "🔎", "list_directory": "📂",
+                "browser_navigate": "🌐", "browser_click": "👆",
+                "text_to_speech": "🔊", "image_generate": "🎨",
+                "vision_analyze": "👁️", "process": "⚙️",
+            }
+            emoji = tool_emojis.get(tool_name, "⚡")
+            line = f" {prefix}├─ {emoji} {tool_name}"
+            if short:
+                line += f"  \"{short}\""
+            try:
+                spinner.print_above(line)
+            except Exception:
+                pass
+
+        if parent_cb:
+            _batch.append(tool_name)
+            if len(_batch) >= _BATCH_SIZE:
+                summary = ", ".join(_batch)
+                try:
+                    parent_cb("subagent_progress", f"🔀 {prefix}{summary}")
+                except Exception:
+                    pass
+                _batch.clear()
+
+    def _flush():
+        """Flush remaining batched tool names to gateway on completion."""
+        if parent_cb and _batch:
+            summary = ", ".join(_batch)
+            try:
+                parent_cb("subagent_progress", f"🔀 {prefix}{summary}")
+            except Exception:
+                pass
+            _batch.clear()
+
+    _callback._flush = _flush
+    return _callback
+
+
 def _run_single_child(
     task_index: int,
     goal: str,
@@ -98,33 +176,15 @@ def _run_single_child(
 
     child_prompt = _build_child_system_prompt(goal, context)
 
-    # Build a progress callback that surfaces subagent tool activity.
-    # CLI: updates the parent's delegate spinner text.
-    # Gateway: forwards to the parent's progress callback (feeds message queue).
-    parent_progress_cb = getattr(parent_agent, 'tool_progress_callback', None)
-    def _child_progress(tool_name: str, preview: str = None):
-        tag = f"[subagent-{task_index+1}] {tool_name}"
-        # Update CLI spinner
-        spinner = getattr(parent_agent, '_delegate_spinner', None)
-        if spinner:
-            detail = f'"{preview}"' if preview else ""
-            try:
-                spinner.update_text(f"🔀 {tag} {detail}")
-            except Exception:
-                pass
-        # Forward to gateway progress queue
-        if parent_progress_cb:
-            try:
-                parent_progress_cb(tag, preview)
-            except Exception:
-                pass
-
     try:
         # Extract parent's API key so subagents inherit auth (e.g. Nous Portal).
         parent_api_key = getattr(parent_agent, "api_key", None)
         if (not parent_api_key) and hasattr(parent_agent, "_client_kwargs"):
             parent_api_key = parent_agent._client_kwargs.get("api_key")
 
+        # Build progress callback to relay tool calls to parent display
+        child_progress_cb = _build_child_progress_callback(task_index, parent_agent)
+
         child = AIAgent(
             base_url=parent_agent.base_url,
             api_key=parent_api_key,
@@ -145,7 +205,7 @@ def _run_single_child(
             providers_ignored=parent_agent.providers_ignored,
             providers_order=parent_agent.providers_order,
             provider_sort=parent_agent.provider_sort,
-            tool_progress_callback=_child_progress,
+            tool_progress_callback=child_progress_cb,
         )
 
         # Set delegation depth so children can't spawn grandchildren
@@ -160,6 +220,13 @@ def _run_single_child(
         with contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(devnull):
             result = child.run_conversation(user_message=goal)
 
+        # Flush any remaining batched progress to gateway
+        if child_progress_cb and hasattr(child_progress_cb, '_flush'):
+            try:
+                child_progress_cb._flush()
+            except Exception:
+                pass
+
         duration = round(time.monotonic() - child_start, 2)
 
         summary = result.get("final_response") or ""

From 4ec386cc724f8822aa188c72c89c034726bad7aa Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sat, 28 Feb 2026 23:19:23 -0800
Subject: [PATCH 26/31] fix(display): use spaces instead of ANSI \033[K in
 print_above() for prompt_toolkit compat
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

print_above() used \033[K (erase-to-end-of-line) to clear the spinner
line before printing text above it. This causes garbled escape codes when
prompt_toolkit's patch_stdout is active in CLI mode.

Switched to the same spaces-based clearing approach used by stop() —
overwrite with blanks, then carriage return back to start of line.

Updated test assertion to match the new clearing method.
---
 agent/display.py                      | 7 +++++--
 tests/agent/test_subagent_progress.py | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/agent/display.py b/agent/display.py
index 19acc67d9c..e7f074c4ec 100644
--- a/agent/display.py
+++ b/agent/display.py
@@ -211,8 +211,11 @@ class KawaiiSpinner:
         if not self.running:
             self._write(f"  {text}", flush=True)
             return
-        # Clear spinner line, print text above, spinner redraws on next tick
-        self._write(f"\r\033[K  {text}", flush=True)
+        # Clear spinner line with spaces (not \033[K) to avoid garbled escape
+        # codes when prompt_toolkit's patch_stdout is active — same approach
+        # as stop(). Then print text; spinner redraws on next tick.
+        blanks = ' ' * max(self.last_line_len + 5, 40)
+        self._write(f"\r{blanks}\r  {text}", flush=True)
 
     def stop(self, final_message: str = None):
         self.running = False
diff --git a/tests/agent/test_subagent_progress.py b/tests/agent/test_subagent_progress.py
index 0ff4fcb835..0aa39f4c8e 100644
--- a/tests/agent/test_subagent_progress.py
+++ b/tests/agent/test_subagent_progress.py
@@ -46,7 +46,7 @@ class TestPrintAbove:
         spinner.print_above("tool line")
         output = buf.getvalue()
         assert "tool line" in output
-        assert "\r\033[K" in output  # Should start with line clear
+        assert "\r" in output  # Should start with carriage return to clear spinner line
 
     def test_print_above_uses_captured_stdout(self):
         """print_above should use self._out, not sys.stdout.

From 41d8a802268d7caf2e6a9bdc3e22df7274964f7c Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sat, 28 Feb 2026 23:29:49 -0800
Subject: [PATCH 27/31] fix(display): fix subagent progress tree-view visual
 nits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two fixes to the subagent progress display from PR #186:

1. Task index prefix: show 1-indexed prefix ([1], [2], ...) for ALL
   tasks in batch mode (task_count > 1). Single tasks get no prefix.
   Previously task 0 had no prefix while others did, making batch
   output confusing.

2. Completion indicator: use spinner.print_above() instead of raw
   print() for per-task completion lines (✓ [1/2] ...). Raw print
   collided with the active spinner, mushing the completion text
   onto the spinner line. Now prints cleanly above.

Added task_count parameter to _build_child_progress_callback and
_run_single_child. Updated tests accordingly.
---
 tests/agent/test_subagent_progress.py | 29 +++++++++++++++++----------
 tools/delegate_tool.py                | 21 ++++++++++++++-----
 2 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/tests/agent/test_subagent_progress.py b/tests/agent/test_subagent_progress.py
index 0aa39f4c8e..b6e5e7525a 100644
--- a/tests/agent/test_subagent_progress.py
+++ b/tests/agent/test_subagent_progress.py
@@ -170,8 +170,8 @@ class TestBuildChildProgressCallback:
         
         parent_cb.assert_not_called()
 
-    def test_task_index_prefix_in_output(self):
-        """Multi-task mode should show task index prefix."""
+    def test_task_index_prefix_in_batch_mode(self):
+        """Batch mode (task_count > 1) should show 1-indexed prefix for all tasks."""
         buf = io.StringIO()
         spinner = KawaiiSpinner("delegating")
         spinner._out = buf
@@ -181,15 +181,22 @@ class TestBuildChildProgressCallback:
         parent._delegate_spinner = spinner
         parent.tool_progress_callback = None
         
-        # task_index > 0 should add prefix
-        cb = _build_child_progress_callback(2, parent)
-        cb("web_search", "test")
-        
+        # task_index=0 in a batch of 3 → prefix "[1]"
+        cb0 = _build_child_progress_callback(0, parent, task_count=3)
+        cb0("web_search", "test")
         output = buf.getvalue()
-        assert "[2]" in output
+        assert "[1]" in output
 
-    def test_task_index_zero_no_prefix(self):
-        """Single task (index 0) should not show index prefix."""
+        # task_index=2 in a batch of 3 → prefix "[3]"
+        buf.truncate(0)
+        buf.seek(0)
+        cb2 = _build_child_progress_callback(2, parent, task_count=3)
+        cb2("web_search", "test")
+        output = buf.getvalue()
+        assert "[3]" in output
+
+    def test_single_task_no_prefix(self):
+        """Single task (task_count=1) should not show index prefix."""
         buf = io.StringIO()
         spinner = KawaiiSpinner("delegating")
         spinner._out = buf
@@ -199,11 +206,11 @@ class TestBuildChildProgressCallback:
         parent._delegate_spinner = spinner
         parent.tool_progress_callback = None
         
-        cb = _build_child_progress_callback(0, parent)
+        cb = _build_child_progress_callback(0, parent, task_count=1)
         cb("web_search", "test")
         
         output = buf.getvalue()
-        assert "[0]" not in output
+        assert "[" not in output
 
 
 # =========================================================================
diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py
index 1f5c64b5b4..c960cc36c4 100644
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@@ -77,7 +77,7 @@ def _strip_blocked_tools(toolsets: List[str]) -> List[str]:
     return [t for t in toolsets if t not in blocked_toolset_names]
 
 
-def _build_child_progress_callback(task_index: int, parent_agent) -> Optional[callable]:
+def _build_child_progress_callback(task_index: int, parent_agent, task_count: int = 1) -> Optional[callable]:
     """Build a callback that relays child agent tool calls to the parent display.
 
     Two display paths:
@@ -93,7 +93,8 @@ def _build_child_progress_callback(task_index: int, parent_agent) -> Optional[ca
     if not spinner and not parent_cb:
         return None  # No display → no callback → zero behavior change
 
-    prefix = f"[{task_index}] " if task_index > 0 else ""
+    # Show 1-indexed prefix only in batch mode (multiple tasks)
+    prefix = f"[{task_index + 1}] " if task_count > 1 else ""
 
     # Gateway: batch tool names, flush periodically
     _BATCH_SIZE = 5
@@ -163,6 +164,7 @@ def _run_single_child(
     model: Optional[str],
     max_iterations: int,
     parent_agent,
+    task_count: int = 1,
 ) -> Dict[str, Any]:
     """
     Spawn and run a single child agent. Called from within a thread.
@@ -183,7 +185,7 @@ def _run_single_child(
             parent_api_key = parent_agent._client_kwargs.get("api_key")
 
         # Build progress callback to relay tool calls to parent display
-        child_progress_cb = _build_child_progress_callback(task_index, parent_agent)
+        child_progress_cb = _build_child_progress_callback(task_index, parent_agent, task_count)
 
         child = AIAgent(
             base_url=parent_agent.base_url,
@@ -344,6 +346,7 @@ def delegate_task(
             model=model,
             max_iterations=effective_max_iter,
             parent_agent=parent_agent,
+            task_count=1,
         )
         results.append(result)
     else:
@@ -368,6 +371,7 @@ def delegate_task(
                     model=model,
                     max_iterations=effective_max_iter,
                     parent_agent=parent_agent,
+                    task_count=n_tasks,
                 )
                 futures[future] = i
 
@@ -387,14 +391,21 @@ def delegate_task(
                 results.append(entry)
                 completed_count += 1
 
-                # Print per-task completion line (visible in CLI via patch_stdout)
+                # Print per-task completion line above the spinner
                 idx = entry["task_index"]
                 label = task_labels[idx] if idx < len(task_labels) else f"Task {idx}"
                 dur = entry.get("duration_seconds", 0)
                 status = entry.get("status", "?")
                 icon = "✓" if status == "completed" else "✗"
                 remaining = n_tasks - completed_count
-                print(f"  {icon} [{idx+1}/{n_tasks}] {label}  ({dur}s)")
+                completion_line = f"{icon} [{idx+1}/{n_tasks}] {label}  ({dur}s)"
+                if spinner_ref:
+                    try:
+                        spinner_ref.print_above(completion_line)
+                    except Exception:
+                        print(f"  {completion_line}")
+                else:
+                    print(f"  {completion_line}")
 
                 # Update spinner text to show remaining count
                 if spinner_ref and remaining > 0:

From 30efc263ffca8a67166445a56128c5b604e29ba0 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sun, 1 Mar 2026 00:16:38 -0800
Subject: [PATCH 28/31] feat(cli): add /compress command for manual
 conversation context compression

Introduced a new command "/compress" to the CLI, allowing users to manually trigger context compression on the current conversation. The method checks for sufficient conversation history and active agent status before performing compression, providing feedback on the number of messages and tokens before and after the operation. Updated command documentation accordingly.
---
 cli.py                 | 37 +++++++++++++++++++++++++++++++++++++
 hermes_cli/commands.py |  1 +
 2 files changed, 38 insertions(+)

diff --git a/cli.py b/cli.py
index 16ce554ee2..7f2b160b1b 100755
--- a/cli.py
+++ b/cli.py
@@ -1722,6 +1722,8 @@ class HermesCLI:
             self._show_gateway_status()
         elif cmd_lower == "/verbose":
             self._toggle_verbose()
+        elif cmd_lower == "/compress":
+            self._manual_compress()
         else:
             # Check for skill slash commands (/gif-search, /axolotl, etc.)
             base_cmd = cmd_lower.split()[0]
@@ -1763,6 +1765,41 @@ class HermesCLI:
         }
         self.console.print(labels.get(self.tool_progress_mode, ""))
 
+    def _manual_compress(self):
+        """Manually trigger context compression on the current conversation."""
+        if not self.conversation_history or len(self.conversation_history) < 4:
+            print("(._.) Not enough conversation to compress (need at least 4 messages).")
+            return
+
+        if not self.agent:
+            print("(._.) No active agent -- send a message first.")
+            return
+
+        if not self.agent.compression_enabled:
+            print("(._.) Compression is disabled in config.")
+            return
+
+        original_count = len(self.conversation_history)
+        try:
+            from agent.model_metadata import estimate_messages_tokens_rough
+            approx_tokens = estimate_messages_tokens_rough(self.conversation_history)
+            print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
+
+            compressed, new_system = self.agent._compress_context(
+                self.conversation_history,
+                self.agent._cached_system_prompt or "",
+                approx_tokens=approx_tokens,
+            )
+            self.conversation_history = compressed
+            new_count = len(self.conversation_history)
+            new_tokens = estimate_messages_tokens_rough(self.conversation_history)
+            print(
+                f"  ✅ Compressed: {original_count} → {new_count} messages "
+                f"(~{approx_tokens:,} → ~{new_tokens:,} tokens)"
+            )
+        except Exception as e:
+            print(f"  ❌ Compression failed: {e}")
+
         if self.verbose:
             logging.getLogger().setLevel(logging.DEBUG)
             for noisy in ('openai', 'openai._base_client', 'httpx', 'httpcore', 'asyncio', 'hpack', 'grpc', 'modal'):
diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index b7e5a62131..5de1c6bc63 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -26,6 +26,7 @@ COMMANDS = {
     "/skills": "Search, install, inspect, or manage skills from online registries",
     "/platforms": "Show gateway/messaging platform status",
     "/verbose": "Cycle tool progress display: off → new → all → verbose",
+    "/compress": "Manually compress conversation context (flush memories + summarize)",
     "/quit": "Exit the CLI (also: /exit, /q)",
 }
 

From 177be32b7f9174bca7fceebba097b187aa1d9c5f Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sun, 1 Mar 2026 00:23:19 -0800
Subject: [PATCH 29/31] feat(cli): add /usage command to display session token
 usage

Introduced a new command "/usage" in the CLI to show cumulative token usage for the current session. This includes details on prompt tokens, completion tokens, total tokens, API calls, and context state. Updated command documentation to reflect this addition. Enhanced the AIAgent class to track token usage throughout the session.
---
 cli.py                 | 38 ++++++++++++++++++++++++++++++++++++++
 hermes_cli/commands.py |  1 +
 run_agent.py           | 11 +++++++++++
 3 files changed, 50 insertions(+)

diff --git a/cli.py b/cli.py
index 7f2b160b1b..2081c7aaed 100755
--- a/cli.py
+++ b/cli.py
@@ -1724,6 +1724,8 @@ class HermesCLI:
             self._toggle_verbose()
         elif cmd_lower == "/compress":
             self._manual_compress()
+        elif cmd_lower == "/usage":
+            self._show_usage()
         else:
             # Check for skill slash commands (/gif-search, /axolotl, etc.)
             base_cmd = cmd_lower.split()[0]
@@ -1800,6 +1802,42 @@ class HermesCLI:
         except Exception as e:
             print(f"  ❌ Compression failed: {e}")
 
+    def _show_usage(self):
+        """Show cumulative token usage for the current session."""
+        if not self.agent:
+            print("(._.) No active agent -- send a message first.")
+            return
+
+        agent = self.agent
+        prompt = agent.session_prompt_tokens
+        completion = agent.session_completion_tokens
+        total = agent.session_total_tokens
+        calls = agent.session_api_calls
+
+        if calls == 0:
+            print("(._.) No API calls made yet in this session.")
+            return
+
+        # Current context window state
+        compressor = agent.context_compressor
+        last_prompt = compressor.last_prompt_tokens
+        ctx_len = compressor.context_length
+        pct = (last_prompt / ctx_len * 100) if ctx_len else 0
+        compressions = compressor.compression_count
+
+        msg_count = len(self.conversation_history)
+
+        print(f"  📊 Session Token Usage")
+        print(f"  {'─' * 40}")
+        print(f"  Prompt tokens (input):     {prompt:>10,}")
+        print(f"  Completion tokens (output): {completion:>9,}")
+        print(f"  Total tokens:              {total:>10,}")
+        print(f"  API calls:                 {calls:>10,}")
+        print(f"  {'─' * 40}")
+        print(f"  Current context:  {last_prompt:,} / {ctx_len:,} ({pct:.0f}%)")
+        print(f"  Messages:         {msg_count}")
+        print(f"  Compressions:     {compressions}")
+
         if self.verbose:
             logging.getLogger().setLevel(logging.DEBUG)
             for noisy in ('openai', 'openai._base_client', 'httpx', 'httpcore', 'asyncio', 'hpack', 'grpc', 'modal'):
diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index 5de1c6bc63..b091a79053 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -27,6 +27,7 @@ COMMANDS = {
     "/platforms": "Show gateway/messaging platform status",
     "/verbose": "Cycle tool progress display: off → new → all → verbose",
     "/compress": "Manually compress conversation context (flush memories + summarize)",
+    "/usage": "Show token usage for the current session",
     "/quit": "Exit the CLI (also: /exit, /q)",
 }
 
diff --git a/run_agent.py b/run_agent.py
index 32b69489f4..65dd3c2f03 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -535,6 +535,12 @@ class AIAgent:
         )
         self.compression_enabled = compression_enabled
         self._user_turn_count = 0
+
+        # Cumulative token usage for the session
+        self.session_prompt_tokens = 0
+        self.session_completion_tokens = 0
+        self.session_total_tokens = 0
+        self.session_api_calls = 0
         
         if not self.quiet_mode:
             if compression_enabled:
@@ -3105,6 +3111,11 @@ class AIAgent:
                             "total_tokens": total_tokens,
                         }
                         self.context_compressor.update_from_response(usage_dict)
+
+                        self.session_prompt_tokens += prompt_tokens
+                        self.session_completion_tokens += completion_tokens
+                        self.session_total_tokens += total_tokens
+                        self.session_api_calls += 1
                         
                         if self.verbose_logging:
                             logging.debug(f"Token usage: prompt={usage_dict['prompt_tokens']:,}, completion={usage_dict['completion_tokens']:,}, total={usage_dict['total_tokens']:,}")

From 93f5fd80b8b0bd2e5ebbad4355f12388a41a659d Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sun, 1 Mar 2026 00:25:44 -0800
Subject: [PATCH 30/31] feat(gateway): add /compress and /usage commands for
 conversation management

Implemented the /compress command to allow users to manually compress conversation context, ensuring sufficient history is available before execution. The /usage command was also added to display token usage statistics for the current session, including prompt and completion tokens. Updated command documentation to reflect these new features.
---
 gateway/run.py | 100 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 99 insertions(+), 1 deletion(-)

diff --git a/gateway/run.py b/gateway/run.py
index 52af62e95b..bc778f103f 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -629,7 +629,8 @@ class GatewayRunner:
         
         # Emit command:* hook for any recognized slash command
         _known_commands = {"new", "reset", "help", "status", "stop", "model",
-                          "personality", "retry", "undo", "sethome", "set-home"}
+                          "personality", "retry", "undo", "sethome", "set-home",
+                          "compress", "usage"}
         if command and command in _known_commands:
             await self.hooks.emit(f"command:{command}", {
                 "platform": source.platform.value if source.platform else "",
@@ -664,6 +665,12 @@ class GatewayRunner:
         
         if command in ["sethome", "set-home"]:
             return await self._handle_set_home_command(event)
+
+        if command == "compress":
+            return await self._handle_compress_command(event)
+
+        if command == "usage":
+            return await self._handle_usage_command(event)
         
         # Skill slash commands: /skill-name loads the skill and sends to agent
         if command:
@@ -1063,6 +1070,8 @@ class GatewayRunner:
             "`/retry` — Retry your last message",
             "`/undo` — Remove the last exchange",
             "`/sethome` — Set this chat as the home channel",
+            "`/compress` — Compress conversation context",
+            "`/usage` — Show token usage for this session",
             "`/help` — Show this message",
         ]
         try:
@@ -1267,6 +1276,95 @@ class GatewayRunner:
             f"Cron jobs and cross-platform messages will be delivered here."
         )
     
+    async def _handle_compress_command(self, event: MessageEvent) -> str:
+        """Handle /compress command -- manually compress conversation context."""
+        source = event.source
+        session_entry = self.session_store.get_or_create_session(source)
+        history = self.session_store.load_transcript(session_entry.session_id)
+
+        if not history or len(history) < 4:
+            return "Not enough conversation to compress (need at least 4 messages)."
+
+        try:
+            from run_agent import AIAgent
+            from agent.model_metadata import estimate_messages_tokens_rough
+
+            runtime_kwargs = _resolve_runtime_agent_kwargs()
+            if not runtime_kwargs.get("api_key"):
+                return "No provider configured -- cannot compress."
+
+            msgs = [
+                {"role": m.get("role"), "content": m.get("content")}
+                for m in history
+                if m.get("role") in ("user", "assistant") and m.get("content")
+            ]
+            original_count = len(msgs)
+            approx_tokens = estimate_messages_tokens_rough(msgs)
+
+            tmp_agent = AIAgent(
+                **runtime_kwargs,
+                max_iterations=4,
+                quiet_mode=True,
+                enabled_toolsets=["memory"],
+                session_id=session_entry.session_id,
+            )
+
+            loop = asyncio.get_event_loop()
+            compressed, _ = await loop.run_in_executor(
+                None,
+                lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens),
+            )
+
+            session_entry.conversation_history = compressed
+            new_count = len(compressed)
+            new_tokens = estimate_messages_tokens_rough(compressed)
+
+            return (
+                f"🗜️ Compressed: {original_count} → {new_count} messages\n"
+                f"~{approx_tokens:,} → ~{new_tokens:,} tokens"
+            )
+        except Exception as e:
+            logger.warning("Manual compress failed: %s", e)
+            return f"Compression failed: {e}"
+
+    async def _handle_usage_command(self, event: MessageEvent) -> str:
+        """Handle /usage command -- show token usage for the session's last agent run."""
+        source = event.source
+        session_key = f"agent:main:{source.platform.value}:" + \
+                      (f"dm" if source.chat_type == "dm" else f"{source.chat_type}:{source.chat_id}")
+
+        agent = self._running_agents.get(session_key)
+        if agent and hasattr(agent, "session_total_tokens") and agent.session_api_calls > 0:
+            lines = [
+                "📊 **Session Token Usage**",
+                f"Prompt (input): {agent.session_prompt_tokens:,}",
+                f"Completion (output): {agent.session_completion_tokens:,}",
+                f"Total: {agent.session_total_tokens:,}",
+                f"API calls: {agent.session_api_calls}",
+            ]
+            ctx = agent.context_compressor
+            if ctx.last_prompt_tokens:
+                pct = ctx.last_prompt_tokens / ctx.context_length * 100 if ctx.context_length else 0
+                lines.append(f"Context: {ctx.last_prompt_tokens:,} / {ctx.context_length:,} ({pct:.0f}%)")
+            if ctx.compression_count:
+                lines.append(f"Compressions: {ctx.compression_count}")
+            return "\n".join(lines)
+
+        # No running agent -- check session history for a rough count
+        session_entry = self.session_store.get_or_create_session(source)
+        history = self.session_store.load_transcript(session_entry.session_id)
+        if history:
+            from agent.model_metadata import estimate_messages_tokens_rough
+            msgs = [m for m in history if m.get("role") in ("user", "assistant") and m.get("content")]
+            approx = estimate_messages_tokens_rough(msgs)
+            return (
+                f"📊 **Session Info**\n"
+                f"Messages: {len(msgs)}\n"
+                f"Estimated context: ~{approx:,} tokens\n"
+                f"_(Detailed usage available during active conversations)_"
+            )
+        return "No usage data available for this session."
+
     def _set_session_env(self, context: SessionContext) -> None:
         """Set environment variables for the current session."""
         os.environ["HERMES_SESSION_PLATFORM"] = context.source.platform.value

From 4d6f380bd1c88a93343f3f576c72f43764dbaee5 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sun, 1 Mar 2026 00:28:07 -0800
Subject: [PATCH 31/31] docs: update README and CLI documentation for new
 commands

Enhanced the README and CLI documentation to include the newly added `/compress` and `/usage` commands for managing conversation context and monitoring token usage. Updated log descriptions to clarify the contents of log files and ensured that sensitive information is automatically redacted. This improves user understanding of available features and log management.
---
 README.md         | 15 +++++++++++++--
 docs/cli.md       |  3 +++
 docs/messaging.md |  7 +++++++
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index f63bb2f1ae..531a3049e2 100644
--- a/README.md
+++ b/README.md
@@ -146,7 +146,7 @@ All your settings are stored in `~/.hermes/` for easy access:
 ├── skills/         # Agent-created skills (managed via skill_manage tool)
 ├── cron/           # Scheduled jobs
 ├── sessions/       # Gateway sessions
-└── logs/           # Logs
+└── logs/           # Logs (errors.log, gateway.log — secrets auto-redacted)
 ```
 
 ### Managing Configuration
@@ -293,6 +293,8 @@ See [docs/messaging.md](docs/messaging.md) for advanced WhatsApp configuration.
 | `/status` | Show session info |
 | `/stop` | Stop the running agent |
 | `/sethome` | Set this chat as the home channel |
+| `/compress` | Manually compress conversation context |
+| `/usage` | Show token usage for this session |
 | `/help` | Show available commands |
 | `/<skill-name>` | Invoke any installed skill (e.g., `/axolotl`, `/gif-search`) |
 
@@ -424,6 +426,9 @@ Type `/` to see an autocomplete dropdown of all commands.
 | `/cron` | Manage scheduled tasks |
 | `/skills` | Search, install, inspect, or manage skills from registries |
 | `/platforms` | Show gateway/messaging platform status |
+| `/verbose` | Cycle tool progress display: off → new → all → verbose |
+| `/compress` | Manually compress conversation context |
+| `/usage` | Show token usage for this session |
 | `/quit` | Exit (also: `/exit`, `/q`) |
 | `/<skill-name>` | Invoke any installed skill (e.g., `/axolotl`, `/gif-search`) |
 
@@ -1315,9 +1320,13 @@ Your `~/.hermes/` directory should now look like:
 ├── skills/         # Agent-created skills (auto-created on first use)
 ├── cron/           # Scheduled job data
 ├── sessions/       # Messaging gateway sessions
-└── logs/           # Conversation logs
+└── logs/           # Logs
+    ├── gateway.log     # Gateway activity log
+    └── errors.log      # Errors from tool calls, API failures, etc.
 ```
 
+All log output is automatically redacted -- API keys, tokens, and credentials are masked before they reach disk.
+
 ---
 
 ### Step 7: Add Your API Keys
@@ -1642,6 +1651,8 @@ All variables go in `~/.hermes/.env`. Run `hermes config set VAR value` to set t
 | `~/.hermes/config.yaml` | Your settings |
 | `~/.hermes/.env` | API keys and secrets |
 | `~/.hermes/auth.json` | OAuth provider credentials (managed by `hermes model`) |
+| `~/.hermes/logs/errors.log` | Tool errors, API failures (secrets auto-redacted) |
+| `~/.hermes/logs/gateway.log` | Gateway activity log (secrets auto-redacted) |
 | `~/.hermes/cron/` | Scheduled jobs data |
 | `~/.hermes/sessions/` | Gateway session data |
 | `~/.hermes/hermes-agent/` | Installation directory |
diff --git a/docs/cli.md b/docs/cli.md
index 6c1abc399a..0945b48a12 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -73,6 +73,9 @@ The CLI is implemented in `cli.py` and uses:
 | `/history` | Show conversation history |
 | `/save` | Save current conversation to file |
 | `/config` | Show current configuration |
+| `/verbose` | Cycle tool progress display: off → new → all → verbose |
+| `/compress` | Manually compress conversation context (flush memories + summarize) |
+| `/usage` | Show token usage for the current session |
 | `/quit` | Exit the CLI (also: `/exit`, `/q`) |
 
 ## Configuration
diff --git a/docs/messaging.md b/docs/messaging.md
index 9963cfe03b..e695308b4a 100644
--- a/docs/messaging.md
+++ b/docs/messaging.md
@@ -74,6 +74,13 @@ Sessions reset based on configurable policies:
 
 Send `/new` or `/reset` as a message to start fresh.
 
+### Context Management
+
+| Command | Description |
+|---------|-------------|
+| `/compress` | Manually compress conversation context (saves memories, then summarizes) |
+| `/usage` | Show token usage and context window status for the current session |
+
 ### Per-Platform Overrides
 
 Configure different reset policies per platform: