diff --git a/.env.example b/.env.example
index 78549212f..2693931e0 100644
--- a/.env.example
+++ b/.env.example
@@ -10,7 +10,7 @@
 OPENROUTER_API_KEY=
 
 # Default model to use (OpenRouter format: provider/model)
-# Examples: anthropic/claude-opus-4.6, openai/gpt-4o, google/gemini-2.0-flash, zhipuai/glm-4-plus
+# Examples: anthropic/claude-opus-4.6, openai/gpt-4o, google/gemini-3-flash-preview, zhipuai/glm-4-plus
 LLM_MODEL=anthropic/claude-opus-4.6
 
 # =============================================================================
@@ -200,9 +200,10 @@ IMAGE_TOOLS_DEBUG=false
 # When conversation approaches model's context limit, middle turns are
 # automatically summarized to free up space.
 #
+# Context compression is configured in ~/.hermes/config.yaml under compression:
 # CONTEXT_COMPRESSION_ENABLED=true        # Enable auto-compression (default: true)
 # CONTEXT_COMPRESSION_THRESHOLD=0.85      # Compress at 85% of context limit
-# CONTEXT_COMPRESSION_MODEL=google/gemini-2.0-flash-001  # Fast model for summaries
+# Model is set via compression.summary_model in config.yaml (default: google/gemini-3-flash-preview)
 
 # =============================================================================
 # RL TRAINING (Tinker + Atropos)
diff --git a/AGENTS.md b/AGENTS.md
index f729bde98..d88fbf7ff 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -179,6 +179,7 @@ The interactive CLI uses:
 Key components:
 - `HermesCLI` class - Main CLI controller with commands and conversation loop
 - `SlashCommandCompleter` - Autocomplete dropdown for `/commands` (type `/` to see all)
+- `agent/skill_commands.py` - Scans skills and builds invocation messages (shared with gateway)
 - `load_cli_config()` - Loads config, sets environment variables for terminal
 - `build_welcome_banner()` - Displays ASCII art logo, tools, and skills summary
 
@@ -191,9 +192,22 @@ CLI UX notes:
 - Pasting 5+ lines auto-saves to `~/.hermes/pastes/` and collapses to a reference
 - Multi-line input via Alt+Enter or Ctrl+J
 - `/commands` - Process user commands like `/help`, `/clear`, `/personality`, etc.
+- `/skill-name` - Invoke installed skills directly (e.g., `/axolotl`, `/gif-search`)
 
 CLI uses `quiet_mode=True` when creating AIAgent to suppress verbose logging.
 
+### Skill Slash Commands
+
+Every installed skill in `~/.hermes/skills/` is automatically registered as a slash command.
+The skill name (from frontmatter or folder name) becomes the command: `axolotl` → `/axolotl`.
+
+Implementation (`agent/skill_commands.py`, shared between CLI and gateway):
+1. `scan_skill_commands()` scans all SKILL.md files at startup
+2. `build_skill_invocation_message()` loads the SKILL.md content and builds a user-turn message
+3. The message includes the full skill content, a list of supporting files (not loaded), and the user's instruction
+4. Supporting files can be loaded on demand via the `skill_view` tool
+5. Injected as a **user message** (not system prompt) to preserve prompt caching
+
 ### Adding CLI Commands
 
 1. Add to `COMMANDS` dict with description
diff --git a/README.md b/README.md
index 3cb1d6598..531a3049e 100644
--- a/README.md
+++ b/README.md
@@ -121,11 +121,14 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro
 
 | Provider | Setup |
 |----------|-------|
-| **Nous Portal** | `hermes login` (OAuth, subscription-based) |
+| **Nous Portal** | `hermes model` (OAuth, subscription-based) |
+| **OpenAI Codex** | `hermes model` (ChatGPT OAuth, uses Codex models) |
 | **OpenRouter** | `OPENROUTER_API_KEY` in `~/.hermes/.env` |
 | **Custom Endpoint** | `OPENAI_BASE_URL` + `OPENAI_API_KEY` in `~/.hermes/.env` |
 
-**Note:** Even when using Nous Portal or a custom endpoint, some tools (vision, web summarization, MoA) use OpenRouter independently. An `OPENROUTER_API_KEY` enables these tools.
+**Codex note:** The OpenAI Codex provider authenticates via device code (open a URL, enter a code). Credentials are stored at `~/.codex/auth.json` and auto-refresh. No Codex CLI installation required.
+
+**Note:** Even when using Nous Portal, Codex, or a custom endpoint, some tools (vision, web summarization, MoA) use OpenRouter independently. An `OPENROUTER_API_KEY` enables these tools.
 
 ---
 
@@ -143,7 +146,7 @@ All your settings are stored in `~/.hermes/` for easy access:
 ├── skills/         # Agent-created skills (managed via skill_manage tool)
 ├── cron/           # Scheduled jobs
 ├── sessions/       # Gateway sessions
-└── logs/           # Logs
+└── logs/           # Logs (errors.log, gateway.log — secrets auto-redacted)
 ```
 
 ### Managing Configuration
@@ -161,6 +164,19 @@ hermes config set terminal.backend docker
 hermes config set OPENROUTER_API_KEY sk-or-...  # Saves to .env
 ```
 
+### Configuration Precedence
+
+Settings are resolved in this order (highest priority first):
+
+1. **CLI arguments** — `hermes chat --max-turns 100` (per-invocation override)
+2. **`~/.hermes/config.yaml`** — the primary config file for all non-secret settings
+3. **`~/.hermes/.env`** — fallback for env vars; **required** for secrets (API keys, tokens, passwords)
+4. **Built-in defaults** — hardcoded safe defaults when nothing else is set
+
+**Rule of thumb:** Secrets (API keys, bot tokens, passwords) go in `.env`. Everything else (model, terminal backend, compression settings, memory limits, toolsets) goes in `config.yaml`. When both are set, `config.yaml` wins for non-secret settings.
+
+The `hermes config set` command automatically routes values to the right file — API keys are saved to `.env`, everything else to `config.yaml`.
+
 ### Optional API Keys
 
 | Feature | Provider | Env Variable |
@@ -277,7 +293,10 @@ See [docs/messaging.md](docs/messaging.md) for advanced WhatsApp configuration.
 | `/status` | Show session info |
 | `/stop` | Stop the running agent |
 | `/sethome` | Set this chat as the home channel |
+| `/compress` | Manually compress conversation context |
+| `/usage` | Show token usage for this session |
 | `/help` | Show available commands |
+| `/<skill-name>` | Invoke any installed skill (e.g., `/axolotl`, `/gif-search`) |
 
 ### DM Pairing (Alternative to Allowlists)
 
@@ -354,7 +373,7 @@ hermes --resume <id>      # Resume a specific session (-r)
 
 # Provider & model management
 hermes model              # Switch provider and model interactively
-hermes login              # Authenticate with Nous Portal (OAuth)
+hermes model              # Select provider and model
 hermes logout             # Clear stored OAuth credentials
 
 # Configuration
@@ -407,7 +426,11 @@ Type `/` to see an autocomplete dropdown of all commands.
 | `/cron` | Manage scheduled tasks |
 | `/skills` | Search, install, inspect, or manage skills from registries |
 | `/platforms` | Show gateway/messaging platform status |
+| `/verbose` | Cycle tool progress display: off → new → all → verbose |
+| `/compress` | Manually compress conversation context |
+| `/usage` | Show token usage for this session |
 | `/quit` | Exit (also: `/exit`, `/q`) |
+| `/<skill-name>` | Invoke any installed skill (e.g., `/axolotl`, `/gif-search`) |
 
 **Keybindings:**
 - `Enter` — send message
@@ -694,6 +717,21 @@ hermes cron status         # Check if gateway is running
 
 Even if no messaging platforms are configured, the gateway stays running for cron. A file lock prevents duplicate execution if multiple processes overlap.
 
+### 🪝 Event Hooks
+
+Run custom code at key lifecycle points — log activity, send alerts, post to webhooks. Hooks are Python handlers that fire automatically during gateway operation.
+
+```
+~/.hermes/hooks/
+└── my-hook/
+    ├── HOOK.yaml      # name + events to subscribe to
+    └── handler.py     # async def handle(event_type, context)
+```
+
+**Available events:** `gateway:startup`, `session:start`, `session:reset`, `agent:start`, `agent:step`, `agent:end`, `command:*` (wildcard — fires for any slash command).
+
+Hooks are non-blocking — errors are caught and logged, never crashing the agent. See [docs/hooks.md](docs/hooks.md) for the full event reference, context keys, and examples.
+
 ### 🛡️ Exec Approval (Messaging Platforms)
 
 When the agent tries to run a potentially dangerous command (`rm -rf`, `chmod 777`, etc.) on Telegram/Discord/WhatsApp, instead of blocking it silently, it asks the user for approval:
@@ -807,6 +845,22 @@ Skills are on-demand knowledge documents the agent can load when needed. They fo
 All skills live in **`~/.hermes/skills/`** -- a single directory that is the source of truth. On fresh install, bundled skills are copied there from the repo. Hub-installed skills and agent-created skills also go here. The agent can modify or delete any skill. `hermes update` adds only genuinely new bundled skills (via a manifest) without overwriting your changes or re-adding skills you deleted.
 
 **Using Skills:**
+
+Every installed skill is automatically available as a slash command — type `/<skill-name>` to invoke it directly:
+
+```bash
+# In the CLI or any messaging platform (Telegram, Discord, Slack, WhatsApp):
+/gif-search funny cats
+/axolotl help me fine-tune Llama 3 on my dataset
+/github-pr-workflow create a PR for the auth refactor
+
+# Just the skill name (no prompt) loads the skill and lets the agent ask what you need:
+/excalidraw
+```
+
+The skill's full instructions (SKILL.md) are loaded into the conversation, and any supporting files (references, templates, scripts) are listed for the agent to pull on demand via the `skill_view` tool. Type `/help` to see all available skill commands.
+
+You can also use skills through natural conversation:
 ```bash
 hermes --toolsets skills -q "What skills do you have?"
 hermes --toolsets skills -q "Show me the axolotl skill"
@@ -1266,9 +1320,13 @@ Your `~/.hermes/` directory should now look like:
 ├── skills/         # Agent-created skills (auto-created on first use)
 ├── cron/           # Scheduled job data
 ├── sessions/       # Messaging gateway sessions
-└── logs/           # Conversation logs
+└── logs/           # Logs
+    ├── gateway.log     # Gateway activity log
+    └── errors.log      # Errors from tool calls, API failures, etc.
 ```
 
+All log output is automatically redacted -- API keys, tokens, and credentials are masked before they reach disk.
+
 ---
 
 ### Step 7: Add Your API Keys
@@ -1592,7 +1650,9 @@ All variables go in `~/.hermes/.env`. Run `hermes config set VAR value` to set t
 |------|-------------|
 | `~/.hermes/config.yaml` | Your settings |
 | `~/.hermes/.env` | API keys and secrets |
-| `~/.hermes/auth.json` | OAuth provider credentials (managed by `hermes login`) |
+| `~/.hermes/auth.json` | OAuth provider credentials (managed by `hermes model`) |
+| `~/.hermes/logs/errors.log` | Tool errors, API failures (secrets auto-redacted) |
+| `~/.hermes/logs/gateway.log` | Gateway activity log (secrets auto-redacted) |
 | `~/.hermes/cron/` | Scheduled jobs data |
 | `~/.hermes/sessions/` | Gateway session data |
 | `~/.hermes/hermes-agent/` | Installation directory |
@@ -1620,7 +1680,7 @@ hermes config    # View current settings
 Common issues:
 - **"API key not set"**: Run `hermes setup` or `hermes config set OPENROUTER_API_KEY your_key`
 - **"hermes: command not found"**: Reload your shell (`source ~/.bashrc`) or check PATH
-- **"Run `hermes login` to re-authenticate"**: Your Nous Portal session expired. Run `hermes login` to refresh.
+- **"Run `hermes setup` to re-authenticate"**: Your Nous Portal session expired. Run `hermes setup` or `hermes model` to refresh.
 - **"No active paid subscription"**: Your Nous Portal account needs an active subscription for inference.
 - **Gateway won't start**: Check `hermes gateway status` and logs
 - **Missing config after update**: Run `hermes config check` to see what's new, then `hermes config migrate` to add missing options
diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index ef179c410..4fb879414 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -8,7 +8,9 @@ Resolution order for text tasks:
   1. OpenRouter  (OPENROUTER_API_KEY)
   2. Nous Portal (~/.hermes/auth.json active provider)
   3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY)
-  4. None
+  4. Codex OAuth (Responses API via chatgpt.com with gpt-5.3-codex,
+     wrapped to look like a chat.completions client)
+  5. None
 
 Resolution order for vision/multimodal tasks:
   1. OpenRouter
@@ -20,7 +22,8 @@ import json
 import logging
 import os
 from pathlib import Path
-from typing import Optional, Tuple
+from types import SimpleNamespace
+from typing import Any, Dict, List, Optional, Tuple
 
 from openai import OpenAI
 
@@ -32,7 +35,7 @@ logger = logging.getLogger(__name__)
 _OR_HEADERS = {
     "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
     "X-OpenRouter-Title": "Hermes Agent",
-    "X-OpenRouter-Categories": "cli-agent",
+    "X-OpenRouter-Categories": "productivity,cli-agent",
 }
 
 # Nous Portal extra_body for product attribution.
@@ -49,6 +52,188 @@ _NOUS_MODEL = "gemini-3-flash"
 _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
 _AUTH_JSON_PATH = Path.home() / ".hermes" / "auth.json"
 
+# Codex fallback: uses the Responses API (the only endpoint the Codex
+# OAuth token can access) with a fast model for auxiliary tasks.
+_CODEX_AUX_MODEL = "gpt-5.3-codex"
+_CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"
+
+
+# ── Codex Responses → chat.completions adapter ─────────────────────────────
+# All auxiliary consumers call client.chat.completions.create(**kwargs) and
+# read response.choices[0].message.content. This adapter translates those
+# calls to the Codex Responses API so callers don't need any changes.
+
+class _CodexCompletionsAdapter:
+    """Drop-in shim that accepts chat.completions.create() kwargs and
+    routes them through the Codex Responses streaming API."""
+
+    def __init__(self, real_client: OpenAI, model: str):
+        self._client = real_client
+        self._model = model
+
+    def create(self, **kwargs) -> Any:
+        messages = kwargs.get("messages", [])
+        model = kwargs.get("model", self._model)
+        temperature = kwargs.get("temperature")
+
+        # Separate system/instructions from conversation messages
+        instructions = "You are a helpful assistant."
+        input_msgs: List[Dict[str, Any]] = []
+        for msg in messages:
+            role = msg.get("role", "user")
+            content = msg.get("content", "")
+            if role == "system":
+                instructions = content
+            else:
+                input_msgs.append({"role": role, "content": content})
+
+        resp_kwargs: Dict[str, Any] = {
+            "model": model,
+            "instructions": instructions,
+            "input": input_msgs or [{"role": "user", "content": ""}],
+            "stream": True,
+            "store": False,
+        }
+
+        max_tokens = kwargs.get("max_output_tokens") or kwargs.get("max_completion_tokens") or kwargs.get("max_tokens")
+        if max_tokens is not None:
+            resp_kwargs["max_output_tokens"] = int(max_tokens)
+        if temperature is not None:
+            resp_kwargs["temperature"] = temperature
+
+        # Tools support for flush_memories and similar callers
+        tools = kwargs.get("tools")
+        if tools:
+            converted = []
+            for t in tools:
+                fn = t.get("function", {}) if isinstance(t, dict) else {}
+                name = fn.get("name")
+                if not name:
+                    continue
+                converted.append({
+                    "type": "function",
+                    "name": name,
+                    "description": fn.get("description", ""),
+                    "parameters": fn.get("parameters", {}),
+                })
+            if converted:
+                resp_kwargs["tools"] = converted
+
+        # Stream and collect the response
+        text_parts: List[str] = []
+        tool_calls_raw: List[Any] = []
+        usage = None
+
+        try:
+            with self._client.responses.stream(**resp_kwargs) as stream:
+                for _event in stream:
+                    pass
+                final = stream.get_final_response()
+
+            # Extract text and tool calls from the Responses output
+            for item in getattr(final, "output", []):
+                item_type = getattr(item, "type", None)
+                if item_type == "message":
+                    for part in getattr(item, "content", []):
+                        ptype = getattr(part, "type", None)
+                        if ptype in ("output_text", "text"):
+                            text_parts.append(getattr(part, "text", ""))
+                elif item_type == "function_call":
+                    tool_calls_raw.append(SimpleNamespace(
+                        id=getattr(item, "call_id", ""),
+                        type="function",
+                        function=SimpleNamespace(
+                            name=getattr(item, "name", ""),
+                            arguments=getattr(item, "arguments", "{}"),
+                        ),
+                    ))
+
+            resp_usage = getattr(final, "usage", None)
+            if resp_usage:
+                usage = SimpleNamespace(
+                    prompt_tokens=getattr(resp_usage, "input_tokens", 0),
+                    completion_tokens=getattr(resp_usage, "output_tokens", 0),
+                    total_tokens=getattr(resp_usage, "total_tokens", 0),
+                )
+        except Exception as exc:
+            logger.debug("Codex auxiliary Responses API call failed: %s", exc)
+            raise
+
+        content = "".join(text_parts).strip() or None
+
+        # Build a response that looks like chat.completions
+        message = SimpleNamespace(
+            role="assistant",
+            content=content,
+            tool_calls=tool_calls_raw or None,
+        )
+        choice = SimpleNamespace(
+            index=0,
+            message=message,
+            finish_reason="stop" if not tool_calls_raw else "tool_calls",
+        )
+        return SimpleNamespace(
+            choices=[choice],
+            model=model,
+            usage=usage,
+        )
+
+
+class _CodexChatShim:
+    """Wraps the adapter to provide client.chat.completions.create()."""
+
+    def __init__(self, adapter: _CodexCompletionsAdapter):
+        self.completions = adapter
+
+
+class CodexAuxiliaryClient:
+    """OpenAI-client-compatible wrapper that routes through Codex Responses API.
+
+    Consumers can call client.chat.completions.create(**kwargs) as normal.
+    Also exposes .api_key and .base_url for introspection by async wrappers.
+    """
+
+    def __init__(self, real_client: OpenAI, model: str):
+        self._real_client = real_client
+        adapter = _CodexCompletionsAdapter(real_client, model)
+        self.chat = _CodexChatShim(adapter)
+        self.api_key = real_client.api_key
+        self.base_url = real_client.base_url
+
+    def close(self):
+        self._real_client.close()
+
+
+class _AsyncCodexCompletionsAdapter:
+    """Async version of the Codex Responses adapter.
+
+    Wraps the sync adapter via asyncio.to_thread() so async consumers
+    (web_tools, session_search) can await it as normal.
+    """
+
+    def __init__(self, sync_adapter: _CodexCompletionsAdapter):
+        self._sync = sync_adapter
+
+    async def create(self, **kwargs) -> Any:
+        import asyncio
+        return await asyncio.to_thread(self._sync.create, **kwargs)
+
+
+class _AsyncCodexChatShim:
+    def __init__(self, adapter: _AsyncCodexCompletionsAdapter):
+        self.completions = adapter
+
+
+class AsyncCodexAuxiliaryClient:
+    """Async-compatible wrapper matching AsyncOpenAI.chat.completions.create()."""
+
+    def __init__(self, sync_wrapper: "CodexAuxiliaryClient"):
+        sync_adapter = sync_wrapper.chat.completions
+        async_adapter = _AsyncCodexCompletionsAdapter(sync_adapter)
+        self.chat = _AsyncCodexChatShim(async_adapter)
+        self.api_key = sync_wrapper.api_key
+        self.base_url = sync_wrapper.base_url
+
 
 def _read_nous_auth() -> Optional[dict]:
     """Read and validate ~/.hermes/auth.json for an active Nous provider.
@@ -82,12 +267,31 @@ def _nous_base_url() -> str:
     return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)
 
 
+def _read_codex_access_token() -> Optional[str]:
+    """Read a valid Codex OAuth access token from ~/.codex/auth.json."""
+    try:
+        codex_auth = Path.home() / ".codex" / "auth.json"
+        if not codex_auth.is_file():
+            return None
+        data = json.loads(codex_auth.read_text())
+        tokens = data.get("tokens")
+        if not isinstance(tokens, dict):
+            return None
+        access_token = tokens.get("access_token")
+        if isinstance(access_token, str) and access_token.strip():
+            return access_token.strip()
+        return None
+    except Exception as exc:
+        logger.debug("Could not read Codex auth for auxiliary client: %s", exc)
+        return None
+
+
 # ── Public API ──────────────────────────────────────────────────────────────
 
 def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
     """Return (client, model_slug) for text-only auxiliary tasks.
 
-    Falls through OpenRouter -> Nous Portal -> custom endpoint -> (None, None).
+    Falls through OpenRouter -> Nous Portal -> custom endpoint -> Codex OAuth -> (None, None).
     """
     # 1. OpenRouter
     or_key = os.getenv("OPENROUTER_API_KEY")
@@ -115,11 +319,44 @@ def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
         logger.debug("Auxiliary text client: custom endpoint (%s)", model)
         return OpenAI(api_key=custom_key, base_url=custom_base), model
 
-    # 4. Nothing available
+    # 4. Codex OAuth -- uses the Responses API (only endpoint the token
+    # can access), wrapped to look like a chat.completions client.
+    codex_token = _read_codex_access_token()
+    if codex_token:
+        logger.debug("Auxiliary text client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL)
+        real_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL)
+        return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL
+
+    # 5. Nothing available
     logger.debug("Auxiliary text client: none available")
     return None, None
 
 
+def get_async_text_auxiliary_client():
+    """Return (async_client, model_slug) for async consumers.
+
+    For standard providers returns (AsyncOpenAI, model). For Codex returns
+    (AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
+    Returns (None, None) when no provider is available.
+    """
+    from openai import AsyncOpenAI
+
+    sync_client, model = get_text_auxiliary_client()
+    if sync_client is None:
+        return None, None
+
+    if isinstance(sync_client, CodexAuxiliaryClient):
+        return AsyncCodexAuxiliaryClient(sync_client), model
+
+    async_kwargs = {
+        "api_key": sync_client.api_key,
+        "base_url": str(sync_client.base_url),
+    }
+    if "openrouter" in str(sync_client.base_url).lower():
+        async_kwargs["default_headers"] = dict(_OR_HEADERS)
+    return AsyncOpenAI(**async_kwargs), model
+
+
 def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
     """Return (client, model_slug) for vision/multimodal auxiliary tasks.
 
@@ -161,11 +398,12 @@ def auxiliary_max_tokens_param(value: int) -> dict:
     
     OpenRouter and local models use 'max_tokens'. Direct OpenAI with newer
     models (gpt-4o, o-series, gpt-5+) requires 'max_completion_tokens'.
+    The Codex adapter translates max_tokens internally, so we use max_tokens
+    for it as well.
     """
     custom_base = os.getenv("OPENAI_BASE_URL", "")
     or_key = os.getenv("OPENROUTER_API_KEY")
-    # Only use max_completion_tokens when the auxiliary client resolved to
-    # direct OpenAI (no OpenRouter key, no Nous auth, custom endpoint is api.openai.com)
+    # Only use max_completion_tokens for direct OpenAI custom endpoints
     if (not or_key
             and _read_nous_auth() is None
             and "api.openai.com" in custom_base.lower()):
diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index 329fd9680..034eb8f99 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -31,8 +31,9 @@ class ContextCompressor:
         threshold_percent: float = 0.85,
         protect_first_n: int = 3,
         protect_last_n: int = 4,
-        summary_target_tokens: int = 500,
+        summary_target_tokens: int = 2500,
         quiet_mode: bool = False,
+        summary_model_override: str = None,
     ):
         self.model = model
         self.threshold_percent = threshold_percent
@@ -49,7 +50,8 @@ class ContextCompressor:
         self.last_completion_tokens = 0
         self.last_total_tokens = 0
 
-        self.client, self.summary_model = get_text_auxiliary_client()
+        self.client, default_model = get_text_auxiliary_client()
+        self.summary_model = summary_model_override or default_model
 
     def update_from_response(self, usage: Dict[str, Any]):
         """Update tracked token usage from API response."""
diff --git a/agent/display.py b/agent/display.py
index 9ef8c5ebc..e7f074c4e 100644
--- a/agent/display.py
+++ b/agent/display.py
@@ -199,6 +199,24 @@ class KawaiiSpinner:
     def update_text(self, new_message: str):
         self.message = new_message
 
+    def print_above(self, text: str):
+        """Print a line above the spinner without disrupting animation.
+
+        Clears the current spinner line, prints the text, and lets the
+        next animation tick redraw the spinner on the line below.
+        Thread-safe: uses the captured stdout reference (self._out).
+        Works inside redirect_stdout(devnull) because _write bypasses
+        sys.stdout and writes to the stdout captured at spinner creation.
+        """
+        if not self.running:
+            self._write(f"  {text}", flush=True)
+            return
+        # Clear spinner line with spaces (not \033[K) to avoid garbled escape
+        # codes when prompt_toolkit's patch_stdout is active — same approach
+        # as stop(). Then print text; spinner redraws on next tick.
+        blanks = ' ' * max(self.last_line_len + 5, 40)
+        self._write(f"\r{blanks}\r  {text}", flush=True)
+
     def stop(self, final_message: str = None):
         self.running = False
         if self.thread:
@@ -283,6 +301,15 @@ def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]
             pass
         return False, ""
 
+    # Memory-specific: distinguish "full" from real errors
+    if tool_name == "memory":
+        try:
+            data = json.loads(result)
+            if data.get("success") is False and "exceed the limit" in data.get("error", ""):
+                return True, " [full]"
+        except (json.JSONDecodeError, TypeError, AttributeError):
+            pass
+
     # Generic heuristic for non-terminal tools
     lower = result[:500].lower()
     if '"error"' in lower or '"failed"' in lower or result.startswith("Error"):
diff --git a/agent/redact.py b/agent/redact.py
new file mode 100644
index 000000000..22f1a547f
--- /dev/null
+++ b/agent/redact.py
@@ -0,0 +1,115 @@
+"""Regex-based secret redaction for logs and tool output.
+
+Applies pattern matching to mask API keys, tokens, and credentials
+before they reach log files, verbose output, or gateway logs.
+
+Short tokens (< 18 chars) are fully masked. Longer tokens preserve
+the first 6 and last 4 characters for debuggability.
+"""
+
+import logging
+import re
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+# Known API key prefixes -- match the prefix + contiguous token chars
+_PREFIX_PATTERNS = [
+    r"sk-[A-Za-z0-9_-]{10,}",           # OpenAI / OpenRouter
+    r"ghp_[A-Za-z0-9]{10,}",            # GitHub PAT (classic)
+    r"github_pat_[A-Za-z0-9_]{10,}",    # GitHub PAT (fine-grained)
+    r"xox[baprs]-[A-Za-z0-9-]{10,}",    # Slack tokens
+    r"AIza[A-Za-z0-9_-]{30,}",          # Google API keys
+    r"pplx-[A-Za-z0-9]{10,}",           # Perplexity
+    r"fal_[A-Za-z0-9_-]{10,}",          # Fal.ai
+    r"fc-[A-Za-z0-9]{10,}",             # Firecrawl
+    r"bb_live_[A-Za-z0-9_-]{10,}",      # BrowserBase
+    r"gAAAA[A-Za-z0-9_=-]{20,}",        # Codex encrypted tokens
+]
+
+# ENV assignment patterns: KEY=value where KEY contains a secret-like name
+_SECRET_ENV_NAMES = r"(?:API_?KEY|TOKEN|SECRET|PASSWORD|PASSWD|CREDENTIAL|AUTH)"
+_ENV_ASSIGN_RE = re.compile(
+    rf"([A-Z_]*{_SECRET_ENV_NAMES}[A-Z_]*)\s*=\s*(['\"]?)(\S+)\2",
+    re.IGNORECASE,
+)
+
+# JSON field patterns: "apiKey": "value", "token": "value", etc.
+_JSON_KEY_NAMES = r"(?:api_?[Kk]ey|token|secret|password|access_token|refresh_token|auth_token|bearer)"
+_JSON_FIELD_RE = re.compile(
+    rf'("{_JSON_KEY_NAMES}")\s*:\s*"([^"]+)"',
+    re.IGNORECASE,
+)
+
+# Authorization headers
+_AUTH_HEADER_RE = re.compile(
+    r"(Authorization:\s*Bearer\s+)(\S+)",
+    re.IGNORECASE,
+)
+
+# Telegram bot tokens: bot<digits>:<token> or <digits>:<alphanum>
+_TELEGRAM_RE = re.compile(
+    r"(bot)?(\d{8,}):([-A-Za-z0-9_]{30,})",
+)
+
+# Compile known prefix patterns into one alternation
+_PREFIX_RE = re.compile(
+    r"(?<![A-Za-z0-9_-])(" + "|".join(_PREFIX_PATTERNS) + r")(?![A-Za-z0-9_-])"
+)
+
+
+def _mask_token(token: str) -> str:
+    """Mask a token, preserving prefix for long tokens."""
+    if len(token) < 18:
+        return "***"
+    return f"{token[:6]}...{token[-4:]}"
+
+
+def redact_sensitive_text(text: str) -> str:
+    """Apply all redaction patterns to a block of text.
+
+    Safe to call on any string -- non-matching text passes through unchanged.
+    """
+    if not text:
+        return text
+
+    # Known prefixes (sk-, ghp_, etc.)
+    text = _PREFIX_RE.sub(lambda m: _mask_token(m.group(1)), text)
+
+    # ENV assignments: OPENAI_API_KEY=sk-abc...
+    def _redact_env(m):
+        name, quote, value = m.group(1), m.group(2), m.group(3)
+        return f"{name}={quote}{_mask_token(value)}{quote}"
+    text = _ENV_ASSIGN_RE.sub(_redact_env, text)
+
+    # JSON fields: "apiKey": "value"
+    def _redact_json(m):
+        key, value = m.group(1), m.group(2)
+        return f'{key}: "{_mask_token(value)}"'
+    text = _JSON_FIELD_RE.sub(_redact_json, text)
+
+    # Authorization headers
+    text = _AUTH_HEADER_RE.sub(
+        lambda m: m.group(1) + _mask_token(m.group(2)),
+        text,
+    )
+
+    # Telegram bot tokens
+    def _redact_telegram(m):
+        prefix = m.group(1) or ""
+        digits = m.group(2)
+        return f"{prefix}{digits}:***"
+    text = _TELEGRAM_RE.sub(_redact_telegram, text)
+
+    return text
+
+
+class RedactingFormatter(logging.Formatter):
+    """Log formatter that redacts secrets from all log messages."""
+
+    def __init__(self, fmt=None, datefmt=None, style='%', **kwargs):
+        super().__init__(fmt, datefmt, style, **kwargs)
+
+    def format(self, record: logging.LogRecord) -> str:
+        original = super().format(record)
+        return redact_sensitive_text(original)
diff --git a/agent/skill_commands.py b/agent/skill_commands.py
new file mode 100644
index 000000000..fc11c5312
--- /dev/null
+++ b/agent/skill_commands.py
@@ -0,0 +1,114 @@
+"""Skill slash commands — scan installed skills and build invocation messages.
+
+Shared between CLI (cli.py) and gateway (gateway/run.py) so both surfaces
+can invoke skills via /skill-name commands.
+"""
+
+import logging
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+logger = logging.getLogger(__name__)
+
+_skill_commands: Dict[str, Dict[str, Any]] = {}
+
+
+def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
+    """Scan ~/.hermes/skills/ and return a mapping of /command -> skill info.
+
+    Returns:
+        Dict mapping "/skill-name" to {name, description, skill_md_path, skill_dir}.
+    """
+    global _skill_commands
+    _skill_commands = {}
+    try:
+        from tools.skills_tool import SKILLS_DIR, _parse_frontmatter
+        if not SKILLS_DIR.exists():
+            return _skill_commands
+        for skill_md in SKILLS_DIR.rglob("SKILL.md"):
+            path_str = str(skill_md)
+            if '/.git/' in path_str or '/.github/' in path_str or '/.hub/' in path_str:
+                continue
+            try:
+                content = skill_md.read_text(encoding='utf-8')
+                frontmatter, body = _parse_frontmatter(content)
+                name = frontmatter.get('name', skill_md.parent.name)
+                description = frontmatter.get('description', '')
+                if not description:
+                    for line in body.strip().split('\n'):
+                        line = line.strip()
+                        if line and not line.startswith('#'):
+                            description = line[:80]
+                            break
+                cmd_name = name.lower().replace(' ', '-').replace('_', '-')
+                _skill_commands[f"/{cmd_name}"] = {
+                    "name": name,
+                    "description": description or f"Invoke the {name} skill",
+                    "skill_md_path": str(skill_md),
+                    "skill_dir": str(skill_md.parent),
+                }
+            except Exception:
+                continue
+    except Exception:
+        pass
+    return _skill_commands
+
+
+def get_skill_commands() -> Dict[str, Dict[str, Any]]:
+    """Return the current skill commands mapping (scan first if empty)."""
+    if not _skill_commands:
+        scan_skill_commands()
+    return _skill_commands
+
+
+def build_skill_invocation_message(cmd_key: str, user_instruction: str = "") -> Optional[str]:
+    """Build the user message content for a skill slash command invocation.
+
+    Args:
+        cmd_key: The command key including leading slash (e.g., "/gif-search").
+        user_instruction: Optional text the user typed after the command.
+
+    Returns:
+        The formatted message string, or None if the skill wasn't found.
+    """
+    commands = get_skill_commands()
+    skill_info = commands.get(cmd_key)
+    if not skill_info:
+        return None
+
+    skill_md_path = Path(skill_info["skill_md_path"])
+    skill_dir = Path(skill_info["skill_dir"])
+    skill_name = skill_info["name"]
+
+    try:
+        content = skill_md_path.read_text(encoding='utf-8')
+    except Exception:
+        return f"[Failed to load skill: {skill_name}]"
+
+    parts = [
+        f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
+        "",
+        content.strip(),
+    ]
+
+    supporting = []
+    for subdir in ("references", "templates", "scripts", "assets"):
+        subdir_path = skill_dir / subdir
+        if subdir_path.exists():
+            for f in sorted(subdir_path.rglob("*")):
+                if f.is_file():
+                    rel = str(f.relative_to(skill_dir))
+                    supporting.append(rel)
+
+    if supporting:
+        parts.append("")
+        parts.append("[This skill has supporting files you can load with the skill_view tool:]")
+        for sf in supporting:
+            parts.append(f"- {sf}")
+        parts.append(f'\nTo view any of these, use: skill_view(name="{skill_name}", file="<path>")')
+
+    if user_instruction:
+        parts.append("")
+        parts.append(f"The user has provided the following instruction alongside the skill invocation: {user_instruction}")
+
+    return "\n".join(parts)
diff --git a/cli.py b/cli.py
index ea9c3e630..2081c7aae 100755
--- a/cli.py
+++ b/cli.py
@@ -682,17 +682,27 @@ COMMANDS = {
 }
 
 
+# ============================================================================
+# Skill Slash Commands — dynamic commands generated from installed skills
+# ============================================================================
+
+from agent.skill_commands import scan_skill_commands, get_skill_commands, build_skill_invocation_message
+
+_skill_commands = scan_skill_commands()
+
+
 class SlashCommandCompleter(Completer):
-    """Autocomplete for /commands in the input area."""
+    """Autocomplete for /commands and /skill-name in the input area."""
 
     def get_completions(self, document, complete_event):
         text = document.text_before_cursor
-        # Only complete at the start of input, after /
         if not text.startswith("/"):
             return
         word = text[1:]  # strip the leading /
+
+        # Built-in commands
         for cmd, desc in COMMANDS.items():
-            cmd_name = cmd[1:]  # strip leading / from key
+            cmd_name = cmd[1:]
             if cmd_name.startswith(word):
                 yield Completion(
                     cmd_name,
@@ -701,6 +711,17 @@ class SlashCommandCompleter(Completer):
                     display_meta=desc,
                 )
 
+        # Skill commands
+        for cmd, info in _skill_commands.items():
+            cmd_name = cmd[1:]
+            if cmd_name.startswith(word):
+                yield Completion(
+                    cmd_name,
+                    start_position=-len(word),
+                    display=cmd,
+                    display_meta=f"⚡ {info['description'][:50]}",
+                )
+
 
 def save_config_value(key_path: str, value: any) -> bool:
     """
@@ -782,7 +803,7 @@ class HermesCLI:
         Args:
             model: Model to use (default: from env or claude-sonnet)
             toolsets: List of toolsets to enable (default: all)
-            provider: Inference provider ("auto", "openrouter", "nous")
+            provider: Inference provider ("auto", "openrouter", "nous", "openai-codex")
             api_key: API key (default: from environment)
             base_url: API base URL (default: OpenRouter)
             max_turns: Maximum tool-calling iterations (default: 60)
@@ -800,37 +821,37 @@ class HermesCLI:
         # Configuration - priority: CLI args > env vars > config file
         # Model can come from: CLI arg, LLM_MODEL env, OPENAI_MODEL env (custom endpoint), or config
         self.model = model or os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL") or CLI_CONFIG["model"]["default"]
-        
-        # Base URL: custom endpoint (OPENAI_BASE_URL) takes precedence over OpenRouter
-        self.base_url = base_url or os.getenv("OPENAI_BASE_URL") or os.getenv("OPENROUTER_BASE_URL", CLI_CONFIG["model"]["base_url"])
-        
-        # API key: custom endpoint (OPENAI_API_KEY) takes precedence over OpenRouter
-        self.api_key = api_key or os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY")
 
-        # Provider resolution: determines whether to use OAuth credentials or env var keys
-        from hermes_cli.auth import resolve_provider
+        self._explicit_api_key = api_key
+        self._explicit_base_url = base_url
+
+        # Provider selection is resolved lazily at use-time via _ensure_runtime_credentials().
         self.requested_provider = (
             provider
             or os.getenv("HERMES_INFERENCE_PROVIDER")
             or CLI_CONFIG["model"].get("provider")
             or "auto"
         )
-        self.provider = resolve_provider(
-            self.requested_provider,
-            explicit_api_key=api_key,
-            explicit_base_url=base_url,
+        self._provider_source: Optional[str] = None
+        self.provider = self.requested_provider
+        self.api_mode = "chat_completions"
+        self.base_url = (
+            base_url
+            or os.getenv("OPENAI_BASE_URL")
+            or os.getenv("OPENROUTER_BASE_URL", CLI_CONFIG["model"]["base_url"])
         )
+        self.api_key = api_key or os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY")
         self._nous_key_expires_at: Optional[str] = None
         self._nous_key_source: Optional[str] = None
         # Max turns priority: CLI arg > env var > config file (agent.max_turns or root max_turns) > default
-        if max_turns is not None:
+        if max_turns is not None:  # CLI arg was explicitly set
             self.max_turns = max_turns
-        elif os.getenv("HERMES_MAX_ITERATIONS"):
-            self.max_turns = int(os.getenv("HERMES_MAX_ITERATIONS"))
         elif CLI_CONFIG["agent"].get("max_turns"):
             self.max_turns = CLI_CONFIG["agent"]["max_turns"]
         elif CLI_CONFIG.get("max_turns"):  # Backwards compat: root-level max_turns
             self.max_turns = CLI_CONFIG["max_turns"]
+        elif os.getenv("HERMES_MAX_ITERATIONS"):
+            self.max_turns = int(os.getenv("HERMES_MAX_ITERATIONS"))
         else:
             self.max_turns = 60
         
@@ -882,45 +903,51 @@ class HermesCLI:
 
     def _ensure_runtime_credentials(self) -> bool:
         """
-        Ensure OAuth provider credentials are fresh before agent use.
-        For Nous Portal: checks agent key TTL, refreshes/re-mints as needed.
-        If the key changed, tears down the agent so it rebuilds with new creds.
+        Ensure runtime credentials are resolved before agent use.
+        Re-resolves provider credentials so key rotation and token refresh
+        are picked up without restarting the CLI.
         Returns True if credentials are ready, False on auth failure.
         """
-        if self.provider != "nous":
-            return True
-
-        from hermes_cli.auth import format_auth_error, resolve_nous_runtime_credentials
+        from hermes_cli.runtime_provider import (
+            resolve_runtime_provider,
+            format_runtime_provider_error,
+        )
 
         try:
-            credentials = resolve_nous_runtime_credentials(
-                min_key_ttl_seconds=max(
-                    60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))
-                ),
-                timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
+            runtime = resolve_runtime_provider(
+                requested=self.requested_provider,
+                explicit_api_key=self._explicit_api_key,
+                explicit_base_url=self._explicit_base_url,
             )
         except Exception as exc:
-            message = format_auth_error(exc)
+            message = format_runtime_provider_error(exc)
             self.console.print(f"[bold red]{message}[/]")
             return False
 
-        api_key = credentials.get("api_key")
-        base_url = credentials.get("base_url")
+        api_key = runtime.get("api_key")
+        base_url = runtime.get("base_url")
+        resolved_provider = runtime.get("provider", "openrouter")
+        resolved_api_mode = runtime.get("api_mode", self.api_mode)
         if not isinstance(api_key, str) or not api_key:
-            self.console.print("[bold red]Nous credential resolver returned an empty API key.[/]")
+            self.console.print("[bold red]Provider resolver returned an empty API key.[/]")
             return False
         if not isinstance(base_url, str) or not base_url:
-            self.console.print("[bold red]Nous credential resolver returned an empty base URL.[/]")
+            self.console.print("[bold red]Provider resolver returned an empty base URL.[/]")
             return False
 
         credentials_changed = api_key != self.api_key or base_url != self.base_url
+        routing_changed = (
+            resolved_provider != self.provider
+            or resolved_api_mode != self.api_mode
+        )
+        self.provider = resolved_provider
+        self.api_mode = resolved_api_mode
+        self._provider_source = runtime.get("source")
         self.api_key = api_key
         self.base_url = base_url
-        self._nous_key_expires_at = credentials.get("expires_at")
-        self._nous_key_source = credentials.get("source")
 
         # AIAgent/OpenAI client holds auth at init time, so rebuild if key rotated
-        if credentials_changed and self.agent is not None:
+        if (credentials_changed or routing_changed) and self.agent is not None:
             self.agent = None
 
         return True
@@ -936,7 +963,7 @@ class HermesCLI:
         if self.agent is not None:
             return True
 
-        if self.provider == "nous" and not self._ensure_runtime_credentials():
+        if not self._ensure_runtime_credentials():
             return False
 
         # Initialize SQLite session store for CLI sessions
@@ -980,6 +1007,8 @@ class HermesCLI:
                 model=self.model,
                 api_key=self.api_key,
                 base_url=self.base_url,
+                provider=self.provider,
+                api_mode=self.api_mode,
                 max_iterations=self.max_turns,
                 enabled_toolsets=self.enabled_toolsets,
                 verbose_logging=self.verbose,
@@ -1072,8 +1101,8 @@ class HermesCLI:
             toolsets_info = f" [dim #B8860B]·[/] [#CD7F32]toolsets: {', '.join(self.enabled_toolsets)}[/]"
 
         provider_info = f" [dim #B8860B]·[/] [dim]provider: {self.provider}[/]"
-        if self.provider == "nous" and self._nous_key_source:
-            provider_info += f" [dim #B8860B]·[/] [dim]key: {self._nous_key_source}[/]"
+        if self._provider_source:
+            provider_info += f" [dim #B8860B]·[/] [dim]auth: {self._provider_source}[/]"
 
         self.console.print(
             f"  {api_indicator} [#FFBF00]{model_short}[/] "
@@ -1082,20 +1111,21 @@ class HermesCLI:
         )
     
     def show_help(self):
-        """Display help information with kawaii ASCII art."""
-        print()
-        print("+" + "-" * 50 + "+")
-        print("|" + " " * 14 + "(^_^)? Available Commands" + " " * 10 + "|")
-        print("+" + "-" * 50 + "+")
-        print()
+        """Display help information."""
+        _cprint(f"\n{_BOLD}+{'-' * 50}+{_RST}")
+        _cprint(f"{_BOLD}|{' ' * 14}(^_^)? Available Commands{' ' * 10}|{_RST}")
+        _cprint(f"{_BOLD}+{'-' * 50}+{_RST}\n")
         
         for cmd, desc in COMMANDS.items():
-            print(f"  {cmd:<15} - {desc}")
+            _cprint(f"  {_GOLD}{cmd:<15}{_RST} {_DIM}-{_RST} {desc}")
         
-        print()
-        print("  Tip: Just type your message to chat with Hermes!")
-        print("  Multi-line: Alt+Enter for a new line")
-        print()
+        if _skill_commands:
+            _cprint(f"\n  ⚡ {_BOLD}Skill Commands{_RST} ({len(_skill_commands)} installed):")
+            for cmd, info in sorted(_skill_commands.items()):
+                _cprint(f"  {_GOLD}{cmd:<22}{_RST} {_DIM}-{_RST} {info['description']}")
+
+        _cprint(f"\n  {_DIM}Tip: Just type your message to chat with Hermes!{_RST}")
+        _cprint(f"  {_DIM}Multi-line: Alt+Enter for a new line{_RST}\n")
     
     def show_tools(self):
         """Display available tools with kawaii ASCII art."""
@@ -1692,9 +1722,26 @@ class HermesCLI:
             self._show_gateway_status()
         elif cmd_lower == "/verbose":
             self._toggle_verbose()
+        elif cmd_lower == "/compress":
+            self._manual_compress()
+        elif cmd_lower == "/usage":
+            self._show_usage()
         else:
-            self.console.print(f"[bold red]Unknown command: {cmd_lower}[/]")
-            self.console.print("[dim #B8860B]Type /help for available commands[/]")
+            # Check for skill slash commands (/gif-search, /axolotl, etc.)
+            base_cmd = cmd_lower.split()[0]
+            if base_cmd in _skill_commands:
+                user_instruction = cmd_original[len(base_cmd):].strip()
+                msg = build_skill_invocation_message(base_cmd, user_instruction)
+                if msg:
+                    skill_name = _skill_commands[base_cmd]["name"]
+                    print(f"\n⚡ Loading skill: {skill_name}")
+                    if hasattr(self, '_pending_input'):
+                        self._pending_input.put(msg)
+                else:
+                    self.console.print(f"[bold red]Failed to load skill for {base_cmd}[/]")
+            else:
+                self.console.print(f"[bold red]Unknown command: {cmd_lower}[/]")
+                self.console.print("[dim #B8860B]Type /help for available commands[/]")
         
         return True
     
@@ -1720,6 +1767,77 @@ class HermesCLI:
         }
         self.console.print(labels.get(self.tool_progress_mode, ""))
 
+    def _manual_compress(self):
+        """Manually trigger context compression on the current conversation."""
+        if not self.conversation_history or len(self.conversation_history) < 4:
+            print("(._.) Not enough conversation to compress (need at least 4 messages).")
+            return
+
+        if not self.agent:
+            print("(._.) No active agent -- send a message first.")
+            return
+
+        if not self.agent.compression_enabled:
+            print("(._.) Compression is disabled in config.")
+            return
+
+        original_count = len(self.conversation_history)
+        try:
+            from agent.model_metadata import estimate_messages_tokens_rough
+            approx_tokens = estimate_messages_tokens_rough(self.conversation_history)
+            print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
+
+            compressed, new_system = self.agent._compress_context(
+                self.conversation_history,
+                self.agent._cached_system_prompt or "",
+                approx_tokens=approx_tokens,
+            )
+            self.conversation_history = compressed
+            new_count = len(self.conversation_history)
+            new_tokens = estimate_messages_tokens_rough(self.conversation_history)
+            print(
+                f"  ✅ Compressed: {original_count} → {new_count} messages "
+                f"(~{approx_tokens:,} → ~{new_tokens:,} tokens)"
+            )
+        except Exception as e:
+            print(f"  ❌ Compression failed: {e}")
+
+    def _show_usage(self):
+        """Show cumulative token usage for the current session."""
+        if not self.agent:
+            print("(._.) No active agent -- send a message first.")
+            return
+
+        agent = self.agent
+        prompt = agent.session_prompt_tokens
+        completion = agent.session_completion_tokens
+        total = agent.session_total_tokens
+        calls = agent.session_api_calls
+
+        if calls == 0:
+            print("(._.) No API calls made yet in this session.")
+            return
+
+        # Current context window state
+        compressor = agent.context_compressor
+        last_prompt = compressor.last_prompt_tokens
+        ctx_len = compressor.context_length
+        pct = (last_prompt / ctx_len * 100) if ctx_len else 0
+        compressions = compressor.compression_count
+
+        msg_count = len(self.conversation_history)
+
+        print(f"  📊 Session Token Usage")
+        print(f"  {'─' * 40}")
+        print(f"  Prompt tokens (input):     {prompt:>10,}")
+        print(f"  Completion tokens (output): {completion:>9,}")
+        print(f"  Total tokens:              {total:>10,}")
+        print(f"  API calls:                 {calls:>10,}")
+        print(f"  {'─' * 40}")
+        print(f"  Current context:  {last_prompt:,} / {ctx_len:,} ({pct:.0f}%)")
+        print(f"  Messages:         {msg_count}")
+        print(f"  Compressions:     {compressions}")
+
         if self.verbose:
             logging.getLogger().setLevel(logging.DEBUG)
             for noisy in ('openai', 'openai._base_client', 'httpx', 'httpcore', 'asyncio', 'hpack', 'grpc', 'modal'):
@@ -1894,8 +2012,8 @@ class HermesCLI:
         Returns:
             The agent's response, or None on error
         """
-        # Refresh OAuth credentials if needed (handles key rotation transparently)
-        if self.provider == "nous" and not self._ensure_runtime_credentials():
+        # Refresh provider credentials if needed (handles key rotation transparently)
+        if not self._ensure_runtime_credentials():
             return None
 
         # Initialize agent if needed
diff --git a/cron/scheduler.py b/cron/scheduler.py
index 23cf5cd61..df88e56b7 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -172,10 +172,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
         except UnicodeDecodeError:
             load_dotenv(str(_hermes_home / ".env"), override=True, encoding="latin-1")
 
-        model = os.getenv("HERMES_MODEL", "anthropic/claude-opus-4.6")
-        # Custom endpoint (OPENAI_*) takes precedence, matching CLI behavior
-        api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY", "")
-        base_url = os.getenv("OPENAI_BASE_URL") or os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
+        model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
 
         try:
             import yaml
@@ -188,24 +185,27 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
                     model = _model_cfg
                 elif isinstance(_model_cfg, dict):
                     model = _model_cfg.get("default", model)
-                    base_url = _model_cfg.get("base_url", base_url)
-                # Check if provider is nous — resolve OAuth credentials
-                provider = _model_cfg.get("provider", "") if isinstance(_model_cfg, dict) else ""
-                if provider == "nous":
-                    try:
-                        from hermes_cli.auth import resolve_nous_runtime_credentials
-                        creds = resolve_nous_runtime_credentials(min_key_ttl_seconds=5 * 60)
-                        api_key = creds.get("api_key", api_key)
-                        base_url = creds.get("base_url", base_url)
-                    except Exception as nous_err:
-                        logging.warning("Nous Portal credential resolution failed for cron: %s", nous_err)
         except Exception:
             pass
 
+        from hermes_cli.runtime_provider import (
+            resolve_runtime_provider,
+            format_runtime_provider_error,
+        )
+        try:
+            runtime = resolve_runtime_provider(
+                requested=os.getenv("HERMES_INFERENCE_PROVIDER"),
+            )
+        except Exception as exc:
+            message = format_runtime_provider_error(exc)
+            raise RuntimeError(message) from exc
+
         agent = AIAgent(
             model=model,
-            api_key=api_key,
-            base_url=base_url,
+            api_key=runtime.get("api_key"),
+            base_url=runtime.get("base_url"),
+            provider=runtime.get("provider"),
+            api_mode=runtime.get("api_mode"),
             quiet_mode=True,
             session_id=f"cron_{job_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
         )
diff --git a/docs/cli.md b/docs/cli.md
index a9257024c..0945b48a1 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -12,7 +12,7 @@ hermes
 hermes --model "anthropic/claude-sonnet-4"
 
 # With specific provider
-hermes --provider nous        # Use Nous Portal (requires: hermes login)
+hermes --provider nous        # Use Nous Portal (requires: hermes model)
 hermes --provider openrouter  # Force OpenRouter
 
 # With specific toolsets
@@ -73,6 +73,9 @@ The CLI is implemented in `cli.py` and uses:
 | `/history` | Show conversation history |
 | `/save` | Save current conversation to file |
 | `/config` | Show current configuration |
+| `/verbose` | Cycle tool progress display: off → new → all → verbose |
+| `/compress` | Manually compress conversation context (flush memories + summarize) |
+| `/usage` | Show token usage for the current session |
 | `/quit` | Exit the CLI (also: `/exit`, `/q`) |
 
 ## Configuration
@@ -93,7 +96,7 @@ model:
 ```
 
 **Provider selection** (`provider` field):
-- `auto` (default): Uses Nous Portal if logged in (`hermes login`), otherwise falls back to OpenRouter/env vars.
+- `auto` (default): Uses Nous Portal if logged in (`hermes model`), otherwise falls back to OpenRouter/env vars.
 - `openrouter`: Always uses `OPENROUTER_API_KEY` from `.env`.
 - `nous`: Always uses Nous Portal OAuth credentials from `auth.json`.
 
diff --git a/docs/hooks.md b/docs/hooks.md
new file mode 100644
index 000000000..3746eb3e4
--- /dev/null
+++ b/docs/hooks.md
@@ -0,0 +1,174 @@
+# Event Hooks
+
+The hooks system lets you run custom code at key points in the agent lifecycle — session creation, slash commands, each tool-calling step, and more. Hooks are discovered automatically from `~/.hermes/hooks/` and fire without blocking the main agent pipeline.
+
+## Creating a Hook
+
+Each hook is a directory under `~/.hermes/hooks/` containing two files:
+
+```
+~/.hermes/hooks/
+└── my-hook/
+    ├── HOOK.yaml      # Declares which events to listen for
+    └── handler.py     # Python handler function
+```
+
+### HOOK.yaml
+
+```yaml
+name: my-hook
+description: Log all agent activity to a file
+events:
+  - agent:start
+  - agent:end
+  - agent:step
+```
+
+The `events` list determines which events trigger your handler. You can subscribe to any combination of events, including wildcards like `command:*`.
+
+### handler.py
+
+```python
+import json
+from datetime import datetime
+from pathlib import Path
+
+LOG_FILE = Path.home() / ".hermes" / "hooks" / "my-hook" / "activity.log"
+
+async def handle(event_type: str, context: dict):
+    """Called for each subscribed event. Must be named 'handle'."""
+    entry = {
+        "timestamp": datetime.now().isoformat(),
+        "event": event_type,
+        **context,
+    }
+    with open(LOG_FILE, "a") as f:
+        f.write(json.dumps(entry) + "\n")
+```
+
+The handler function:
+- Must be named `handle`
+- Receives `event_type` (string) and `context` (dict)
+- Can be `async def` or regular `def` — both work
+- Errors are caught and logged, never crashing the agent
+
+## Available Events
+
+| Event | When it fires | Context keys |
+|-------|---------------|--------------|
+| `gateway:startup` | Gateway process starts | `platforms` (list of active platform names) |
+| `session:start` | New messaging session created | `platform`, `user_id`, `session_id`, `session_key` |
+| `session:reset` | User ran `/new` or `/reset` | `platform`, `user_id`, `session_key` |
+| `agent:start` | Agent begins processing a message | `platform`, `user_id`, `session_id`, `message` |
+| `agent:step` | Each iteration of the tool-calling loop | `platform`, `user_id`, `session_id`, `iteration`, `tool_names` |
+| `agent:end` | Agent finishes processing | `platform`, `user_id`, `session_id`, `message`, `response` |
+| `command:*` | Any slash command executed | `platform`, `user_id`, `command`, `args` |
+
+### Wildcard Matching
+
+Handlers registered for `command:*` fire for any `command:` event (`command:model`, `command:reset`, etc.). This lets you monitor all slash commands with a single subscription.
+
+## Examples
+
+### Telegram Notification on Long Tasks
+
+Send yourself a Telegram message when the agent takes more than 10 tool-calling steps:
+
+```yaml
+# ~/.hermes/hooks/long-task-alert/HOOK.yaml
+name: long-task-alert
+description: Alert when agent is taking many steps
+events:
+  - agent:step
+```
+
+```python
+# ~/.hermes/hooks/long-task-alert/handler.py
+import os
+import httpx
+
+THRESHOLD = 10
+BOT_TOKEN = os.getenv("TELEGRAM_BOT_TOKEN")
+CHAT_ID = os.getenv("TELEGRAM_HOME_CHANNEL")
+
+async def handle(event_type: str, context: dict):
+    iteration = context.get("iteration", 0)
+    if iteration == THRESHOLD and BOT_TOKEN and CHAT_ID:
+        tools = ", ".join(context.get("tool_names", []))
+        text = f"⚠️ Agent has been running for {iteration} steps. Last tools: {tools}"
+        async with httpx.AsyncClient() as client:
+            await client.post(
+                f"https://api.telegram.org/bot{BOT_TOKEN}/sendMessage",
+                json={"chat_id": CHAT_ID, "text": text},
+            )
+```
+
+### Command Usage Logger
+
+Track which slash commands are used and how often:
+
+```yaml
+# ~/.hermes/hooks/command-logger/HOOK.yaml
+name: command-logger
+description: Log slash command usage
+events:
+  - command:*
+```
+
+```python
+# ~/.hermes/hooks/command-logger/handler.py
+import json
+from datetime import datetime
+from pathlib import Path
+
+LOG = Path.home() / ".hermes" / "logs" / "command_usage.jsonl"
+
+def handle(event_type: str, context: dict):
+    LOG.parent.mkdir(parents=True, exist_ok=True)
+    entry = {
+        "ts": datetime.now().isoformat(),
+        "command": context.get("command"),
+        "args": context.get("args"),
+        "platform": context.get("platform"),
+        "user": context.get("user_id"),
+    }
+    with open(LOG, "a") as f:
+        f.write(json.dumps(entry) + "\n")
+```
+
+### Session Start Webhook
+
+POST to an external service whenever a new session starts:
+
+```yaml
+# ~/.hermes/hooks/session-webhook/HOOK.yaml
+name: session-webhook
+description: Notify external service on new sessions
+events:
+  - session:start
+  - session:reset
+```
+
+```python
+# ~/.hermes/hooks/session-webhook/handler.py
+import httpx
+
+WEBHOOK_URL = "https://your-service.example.com/hermes-events"
+
+async def handle(event_type: str, context: dict):
+    async with httpx.AsyncClient() as client:
+        await client.post(WEBHOOK_URL, json={
+            "event": event_type,
+            **context,
+        }, timeout=5)
+```
+
+## How It Works
+
+1. On gateway startup, `HookRegistry.discover_and_load()` scans `~/.hermes/hooks/`
+2. Each subdirectory with `HOOK.yaml` + `handler.py` is loaded dynamically
+3. Handlers are registered for their declared events
+4. At each lifecycle point, `hooks.emit()` fires all matching handlers
+5. Errors in any handler are caught and logged — a broken hook never crashes the agent
+
+Hooks only fire in the **gateway** (Telegram, Discord, Slack, WhatsApp). The CLI does not currently load hooks. The `agent:step` event bridges from the sync agent thread to the async hook system via `asyncio.run_coroutine_threadsafe`.
diff --git a/docs/messaging.md b/docs/messaging.md
index 9963cfe03..e695308b4 100644
--- a/docs/messaging.md
+++ b/docs/messaging.md
@@ -74,6 +74,13 @@ Sessions reset based on configurable policies:
 
 Send `/new` or `/reset` as a message to start fresh.
 
+### Context Management
+
+| Command | Description |
+|---------|-------------|
+| `/compress` | Manually compress conversation context (saves memories, then summarizes) |
+| `/usage` | Show token usage and context window status for the current session |
+
 ### Per-Platform Overrides
 
 Configure different reset policies per platform:
diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index 2e818b4ea..dcd97f309 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -425,6 +425,28 @@ class BasePlatformAdapter(ABC):
         text = f"{caption}\n{image_url}" if caption else image_url
         return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
     
+    async def send_animation(
+        self,
+        chat_id: str,
+        animation_url: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+    ) -> SendResult:
+        """
+        Send an animated GIF natively via the platform API.
+        
+        Override in subclasses to send GIFs as proper animations
+        (e.g., Telegram send_animation) so they auto-play inline.
+        Default falls back to send_image.
+        """
+        return await self.send_image(chat_id=chat_id, image_url=animation_url, caption=caption, reply_to=reply_to)
+    
+    @staticmethod
+    def _is_animation_url(url: str) -> bool:
+        """Check if a URL points to an animated GIF (vs a static image)."""
+        lower = url.lower().split('?')[0]  # Strip query params
+        return lower.endswith('.gif')
+
     @staticmethod
     def extract_images(content: str) -> Tuple[List[Tuple[str, str]], str]:
         """
@@ -636,11 +658,19 @@ class BasePlatformAdapter(ABC):
                     if human_delay > 0:
                         await asyncio.sleep(human_delay)
                     try:
-                        img_result = await self.send_image(
-                            chat_id=event.source.chat_id,
-                            image_url=image_url,
-                            caption=alt_text if alt_text else None,
-                        )
+                        # Route animated GIFs through send_animation for proper playback
+                        if self._is_animation_url(image_url):
+                            img_result = await self.send_animation(
+                                chat_id=event.source.chat_id,
+                                animation_url=image_url,
+                                caption=alt_text if alt_text else None,
+                            )
+                        else:
+                            img_result = await self.send_image(
+                                chat_id=event.source.chat_id,
+                                image_url=image_url,
+                                caption=alt_text if alt_text else None,
+                            )
                         if not img_result.success:
                             print(f"[{self.name}] Failed to send image: {img_result.error}")
                     except Exception as img_err:
diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index c37fde42c..076e97ff5 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -272,6 +272,30 @@ class TelegramAdapter(BasePlatformAdapter):
             # Fallback: send as text link
             return await super().send_image(chat_id, image_url, caption, reply_to)
     
+    async def send_animation(
+        self,
+        chat_id: str,
+        animation_url: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+    ) -> SendResult:
+        """Send an animated GIF natively as a Telegram animation (auto-plays inline)."""
+        if not self._bot:
+            return SendResult(success=False, error="Not connected")
+        
+        try:
+            msg = await self._bot.send_animation(
+                chat_id=int(chat_id),
+                animation=animation_url,
+                caption=caption[:1024] if caption else None,
+                reply_to_message_id=int(reply_to) if reply_to else None,
+            )
+            return SendResult(success=True, message_id=str(msg.message_id))
+        except Exception as e:
+            print(f"[{self.name}] Failed to send animation, falling back to photo: {e}")
+            # Fallback: try as a regular photo
+            return await self.send_image(chat_id, animation_url, caption, reply_to)
+
     async def send_typing(self, chat_id: str) -> None:
         """Send typing indicator."""
         if self._bot:
diff --git a/gateway/run.py b/gateway/run.py
index 198629ce3..32f53ba7f 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -78,6 +78,20 @@ if _config_path.exists():
             for _cfg_key, _env_var in _terminal_env_map.items():
                 if _cfg_key in _terminal_cfg:
                     os.environ[_env_var] = str(_terminal_cfg[_cfg_key])
+        _compression_cfg = _cfg.get("compression", {})
+        if _compression_cfg and isinstance(_compression_cfg, dict):
+            _compression_env_map = {
+                "enabled": "CONTEXT_COMPRESSION_ENABLED",
+                "threshold": "CONTEXT_COMPRESSION_THRESHOLD",
+                "summary_model": "CONTEXT_COMPRESSION_MODEL",
+            }
+            for _cfg_key, _env_var in _compression_env_map.items():
+                if _cfg_key in _compression_cfg:
+                    os.environ[_env_var] = str(_compression_cfg[_cfg_key])
+        _agent_cfg = _cfg.get("agent", {})
+        if _agent_cfg and isinstance(_agent_cfg, dict):
+            if "max_turns" in _agent_cfg:
+                os.environ["HERMES_MAX_ITERATIONS"] = str(_agent_cfg["max_turns"])
     except Exception:
         pass  # Non-fatal; gateway can still run with .env values
 
@@ -111,6 +125,28 @@ from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageTyp
 logger = logging.getLogger(__name__)
 
 
+def _resolve_runtime_agent_kwargs() -> dict:
+    """Resolve provider credentials for gateway-created AIAgent instances."""
+    from hermes_cli.runtime_provider import (
+        resolve_runtime_provider,
+        format_runtime_provider_error,
+    )
+
+    try:
+        runtime = resolve_runtime_provider(
+            requested=os.getenv("HERMES_INFERENCE_PROVIDER"),
+        )
+    except Exception as exc:
+        raise RuntimeError(format_runtime_provider_error(exc)) from exc
+
+    return {
+        "api_key": runtime.get("api_key"),
+        "base_url": runtime.get("base_url"),
+        "provider": runtime.get("provider"),
+        "api_mode": runtime.get("api_mode"),
+    }
+
+
 class GatewayRunner:
     """
     Main gateway controller.
@@ -178,17 +214,12 @@ class GatewayRunner:
                 return
 
             from run_agent import AIAgent
-            _flush_api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY", "")
-            _flush_base_url = os.getenv("OPENAI_BASE_URL") or os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
-            _flush_model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL", "anthropic/claude-opus-4.6")
-
-            if not _flush_api_key:
+            runtime_kwargs = _resolve_runtime_agent_kwargs()
+            if not runtime_kwargs.get("api_key"):
                 return
 
             tmp_agent = AIAgent(
-                model=_flush_model,
-                api_key=_flush_api_key,
-                base_url=_flush_base_url,
+                **runtime_kwargs,
                 max_iterations=8,
                 quiet_mode=True,
                 enabled_toolsets=["memory", "skills"],
@@ -608,6 +639,19 @@ class GatewayRunner:
         
         # Check for commands
         command = event.get_command()
+        
+        # Emit command:* hook for any recognized slash command
+        _known_commands = {"new", "reset", "help", "status", "stop", "model",
+                          "personality", "retry", "undo", "sethome", "set-home",
+                          "compress", "usage"}
+        if command and command in _known_commands:
+            await self.hooks.emit(f"command:{command}", {
+                "platform": source.platform.value if source.platform else "",
+                "user_id": source.user_id,
+                "command": command,
+                "args": event.get_command_args().strip(),
+            })
+        
         if command in ["new", "reset"]:
             return await self._handle_reset_command(event)
         
@@ -634,6 +678,27 @@ class GatewayRunner:
         
         if command in ["sethome", "set-home"]:
             return await self._handle_set_home_command(event)
+
+        if command == "compress":
+            return await self._handle_compress_command(event)
+
+        if command == "usage":
+            return await self._handle_usage_command(event)
+        
+        # Skill slash commands: /skill-name loads the skill and sends to agent
+        if command:
+            try:
+                from agent.skill_commands import get_skill_commands, build_skill_invocation_message
+                skill_cmds = get_skill_commands()
+                cmd_key = f"/{command}"
+                if cmd_key in skill_cmds:
+                    user_instruction = event.get_command_args().strip()
+                    msg = build_skill_invocation_message(cmd_key, user_instruction)
+                    if msg:
+                        event.text = msg
+                        # Fall through to normal message processing with skill content
+            except Exception as e:
+                logger.debug("Skill command check failed (non-fatal): %s", e)
         
         # Check for pending exec approval responses
         if source.chat_type != "dm":
@@ -663,6 +728,19 @@ class GatewayRunner:
         session_entry = self.session_store.get_or_create_session(source)
         session_key = session_entry.session_key
         
+        # Emit session:start for new or auto-reset sessions
+        _is_new_session = (
+            session_entry.created_at == session_entry.updated_at
+            or getattr(session_entry, "was_auto_reset", False)
+        )
+        if _is_new_session:
+            await self.hooks.emit("session:start", {
+                "platform": source.platform.value if source.platform else "",
+                "user_id": source.user_id,
+                "session_id": session_entry.session_id,
+                "session_key": session_key,
+            })
+        
         # Build session context
         context = build_session_context(source, self.config, session_entry)
         
@@ -916,15 +994,10 @@ class GatewayRunner:
                 if old_history:
                     from run_agent import AIAgent
                     loop = asyncio.get_event_loop()
-                    # Resolve credentials so the flush agent can reach the LLM
-                    _flush_api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY", "")
-                    _flush_base_url = os.getenv("OPENAI_BASE_URL") or os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
-                    _flush_model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL", "anthropic/claude-opus-4.6")
+                    _flush_kwargs = _resolve_runtime_agent_kwargs()
                     def _do_flush():
                         tmp_agent = AIAgent(
-                            model=_flush_model,
-                            api_key=_flush_api_key,
-                            base_url=_flush_base_url,
+                            **_flush_kwargs,
                             max_iterations=5,
                             quiet_mode=True,
                             enabled_toolsets=["memory"],
@@ -999,20 +1072,31 @@ class GatewayRunner:
     
     async def _handle_help_command(self, event: MessageEvent) -> str:
         """Handle /help command - list available commands."""
-        return (
-            "📖 **Hermes Commands**\n"
-            "\n"
-            "`/new` — Start a new conversation\n"
-            "`/reset` — Reset conversation history\n"
-            "`/status` — Show session info\n"
-            "`/stop` — Interrupt the running agent\n"
-            "`/model [name]` — Show or change the model\n"
-            "`/personality [name]` — Set a personality\n"
-            "`/retry` — Retry your last message\n"
-            "`/undo` — Remove the last exchange\n"
-            "`/sethome` — Set this chat as the home channel\n"
-            "`/help` — Show this message"
-        )
+        lines = [
+            "📖 **Hermes Commands**\n",
+            "`/new` — Start a new conversation",
+            "`/reset` — Reset conversation history",
+            "`/status` — Show session info",
+            "`/stop` — Interrupt the running agent",
+            "`/model [name]` — Show or change the model",
+            "`/personality [name]` — Set a personality",
+            "`/retry` — Retry your last message",
+            "`/undo` — Remove the last exchange",
+            "`/sethome` — Set this chat as the home channel",
+            "`/compress` — Compress conversation context",
+            "`/usage` — Show token usage for this session",
+            "`/help` — Show this message",
+        ]
+        try:
+            from agent.skill_commands import get_skill_commands
+            skill_cmds = get_skill_commands()
+            if skill_cmds:
+                lines.append(f"\n⚡ **Skill Commands** ({len(skill_cmds)} installed):")
+                for cmd in sorted(skill_cmds):
+                    lines.append(f"`{cmd}` — {skill_cmds[cmd]['description']}")
+        except Exception:
+            pass
+        return "\n".join(lines)
     
     async def _handle_model_command(self, event: MessageEvent) -> str:
         """Handle /model command - show or change the current model."""
@@ -1205,6 +1289,95 @@ class GatewayRunner:
             f"Cron jobs and cross-platform messages will be delivered here."
         )
     
+    async def _handle_compress_command(self, event: MessageEvent) -> str:
+        """Handle /compress command -- manually compress conversation context."""
+        source = event.source
+        session_entry = self.session_store.get_or_create_session(source)
+        history = self.session_store.load_transcript(session_entry.session_id)
+
+        if not history or len(history) < 4:
+            return "Not enough conversation to compress (need at least 4 messages)."
+
+        try:
+            from run_agent import AIAgent
+            from agent.model_metadata import estimate_messages_tokens_rough
+
+            runtime_kwargs = _resolve_runtime_agent_kwargs()
+            if not runtime_kwargs.get("api_key"):
+                return "No provider configured -- cannot compress."
+
+            msgs = [
+                {"role": m.get("role"), "content": m.get("content")}
+                for m in history
+                if m.get("role") in ("user", "assistant") and m.get("content")
+            ]
+            original_count = len(msgs)
+            approx_tokens = estimate_messages_tokens_rough(msgs)
+
+            tmp_agent = AIAgent(
+                **runtime_kwargs,
+                max_iterations=4,
+                quiet_mode=True,
+                enabled_toolsets=["memory"],
+                session_id=session_entry.session_id,
+            )
+
+            loop = asyncio.get_event_loop()
+            compressed, _ = await loop.run_in_executor(
+                None,
+                lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens),
+            )
+
+            session_entry.conversation_history = compressed
+            new_count = len(compressed)
+            new_tokens = estimate_messages_tokens_rough(compressed)
+
+            return (
+                f"🗜️ Compressed: {original_count} → {new_count} messages\n"
+                f"~{approx_tokens:,} → ~{new_tokens:,} tokens"
+            )
+        except Exception as e:
+            logger.warning("Manual compress failed: %s", e)
+            return f"Compression failed: {e}"
+
+    async def _handle_usage_command(self, event: MessageEvent) -> str:
+        """Handle /usage command -- show token usage for the session's last agent run."""
+        source = event.source
+        session_key = f"agent:main:{source.platform.value}:" + \
+                      (f"dm" if source.chat_type == "dm" else f"{source.chat_type}:{source.chat_id}")
+
+        agent = self._running_agents.get(session_key)
+        if agent and hasattr(agent, "session_total_tokens") and agent.session_api_calls > 0:
+            lines = [
+                "📊 **Session Token Usage**",
+                f"Prompt (input): {agent.session_prompt_tokens:,}",
+                f"Completion (output): {agent.session_completion_tokens:,}",
+                f"Total: {agent.session_total_tokens:,}",
+                f"API calls: {agent.session_api_calls}",
+            ]
+            ctx = agent.context_compressor
+            if ctx.last_prompt_tokens:
+                pct = ctx.last_prompt_tokens / ctx.context_length * 100 if ctx.context_length else 0
+                lines.append(f"Context: {ctx.last_prompt_tokens:,} / {ctx.context_length:,} ({pct:.0f}%)")
+            if ctx.compression_count:
+                lines.append(f"Compressions: {ctx.compression_count}")
+            return "\n".join(lines)
+
+        # No running agent -- check session history for a rough count
+        session_entry = self.session_store.get_or_create_session(source)
+        history = self.session_store.load_transcript(session_entry.session_id)
+        if history:
+            from agent.model_metadata import estimate_messages_tokens_rough
+            msgs = [m for m in history if m.get("role") in ("user", "assistant") and m.get("content")]
+            approx = estimate_messages_tokens_rough(msgs)
+            return (
+                f"📊 **Session Info**\n"
+                f"Messages: {len(msgs)}\n"
+                f"Estimated context: ~{approx:,} tokens\n"
+                f"_(Detailed usage available during active conversations)_"
+            )
+        return "No usage data available for this session."
+
     def _set_session_env(self, context: SessionContext) -> None:
         """Set environment variables for the current session."""
         os.environ["HERMES_SESSION_PLATFORM"] = context.source.platform.value
@@ -1593,6 +1766,25 @@ class GatewayRunner:
         result_holder = [None]  # Mutable container for the result
         tools_holder = [None]   # Mutable container for the tool definitions
         
+        # Bridge sync step_callback → async hooks.emit for agent:step events
+        _loop_for_step = asyncio.get_event_loop()
+        _hooks_ref = self.hooks
+
+        def _step_callback_sync(iteration: int, tool_names: list) -> None:
+            try:
+                asyncio.run_coroutine_threadsafe(
+                    _hooks_ref.emit("agent:step", {
+                        "platform": source.platform.value if source.platform else "",
+                        "user_id": source.user_id,
+                        "session_id": session_id,
+                        "iteration": iteration,
+                        "tool_names": tool_names,
+                    }),
+                    _loop_for_step,
+                )
+            except Exception as _e:
+                logger.debug("agent:step hook error: %s", _e)
+
         def run_sync():
             # Pass session_key to process registry via env var so background
             # processes can be mapped back to this gateway session
@@ -1609,7 +1801,7 @@ class GatewayRunner:
             combined_ephemeral = context_prompt or ""
             if self._ephemeral_system_prompt:
                 combined_ephemeral = (combined_ephemeral + "\n\n" + self._ephemeral_system_prompt).strip()
-            
+
             # Re-read .env and config for fresh credentials (gateway is long-lived,
             # keys may change without restart).
             try:
@@ -1619,9 +1811,6 @@ class GatewayRunner:
             except Exception:
                 pass
 
-            # Custom endpoint (OPENAI_*) takes precedence, matching CLI behavior
-            api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY", "")
-            base_url = os.getenv("OPENAI_BASE_URL") or os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
             model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
 
             try:
@@ -1635,24 +1824,22 @@ class GatewayRunner:
                         model = _model_cfg
                     elif isinstance(_model_cfg, dict):
                         model = _model_cfg.get("default", model)
-                        base_url = _model_cfg.get("base_url", base_url)
-                    # Check if provider is nous — resolve OAuth credentials
-                    provider = _model_cfg.get("provider", "") if isinstance(_model_cfg, dict) else ""
-                    if provider == "nous":
-                        try:
-                            from hermes_cli.auth import resolve_nous_runtime_credentials
-                            creds = resolve_nous_runtime_credentials(min_key_ttl_seconds=5 * 60)
-                            api_key = creds.get("api_key", api_key)
-                            base_url = creds.get("base_url", base_url)
-                        except Exception as nous_err:
-                            logger.warning("Nous Portal credential resolution failed: %s", nous_err)
             except Exception:
                 pass
 
+            try:
+                runtime_kwargs = _resolve_runtime_agent_kwargs()
+            except Exception as exc:
+                return {
+                    "final_response": f"⚠️ Provider authentication failed: {exc}",
+                    "messages": [],
+                    "api_calls": 0,
+                    "tools": [],
+                }
+
             agent = AIAgent(
                 model=model,
-                api_key=api_key,
-                base_url=base_url,
+                **runtime_kwargs,
                 max_iterations=max_iterations,
                 quiet_mode=True,
                 verbose_logging=False,
@@ -1662,6 +1849,7 @@ class GatewayRunner:
                 reasoning_config=self._reasoning_config,
                 session_id=session_id,
                 tool_progress_callback=progress_callback if tool_progress_enabled else None,
+                step_callback=_step_callback_sync if _hooks_ref.loaded_hooks else None,
                 platform=platform_key,
                 honcho_session_key=session_key,
                 session_db=self._session_db,
@@ -1714,6 +1902,19 @@ class GatewayRunner:
                             content = f"[Delivered from {mirror_src}] {content}"
                         agent_history.append({"role": role, "content": content})
             
+            # Collect MEDIA paths already in history so we can exclude them
+            # from the current turn's extraction. This is compression-safe:
+            # even if the message list shrinks, we know which paths are old.
+            _history_media_paths: set = set()
+            for _hm in agent_history:
+                if _hm.get("role") in ("tool", "function"):
+                    _hc = _hm.get("content", "")
+                    if "MEDIA:" in _hc:
+                        for _match in re.finditer(r'MEDIA:(\S+)', _hc):
+                            _p = _match.group(1).strip().rstrip('",}')
+                            if _p:
+                                _history_media_paths.add(_p)
+            
             result = agent.run_conversation(message, conversation_history=agent_history)
             result_holder[0] = result
             
@@ -1734,22 +1935,25 @@ class GatewayRunner:
             # doesn't include them.  We collect unique tags from tool results and
             # append any that aren't already present in the final response, so the
             # adapter's extract_media() can find and deliver the files exactly once.
+            #
+            # Uses path-based deduplication against _history_media_paths (collected
+            # before run_conversation) instead of index slicing. This is safe even
+            # when context compression shrinks the message list. (Fixes #160)
             if "MEDIA:" not in final_response:
                 media_tags = []
                 has_voice_directive = False
                 for msg in result.get("messages", []):
-                    if msg.get("role") == "tool" or msg.get("role") == "function":
+                    if msg.get("role") in ("tool", "function"):
                         content = msg.get("content", "")
                         if "MEDIA:" in content:
                             for match in re.finditer(r'MEDIA:(\S+)', content):
                                 path = match.group(1).strip().rstrip('",}')
-                                if path:
+                                if path and path not in _history_media_paths:
                                     media_tags.append(f"MEDIA:{path}")
                             if "[[audio_as_voice]]" in content:
                                 has_voice_directive = True
                 
                 if media_tags:
-                    # Deduplicate while preserving order
                     seen = set()
                     unique_tags = []
                     for tag in media_tags:
@@ -1934,10 +2138,21 @@ async def start_gateway(config: Optional[GatewayConfig] = None) -> bool:
         maxBytes=5 * 1024 * 1024,
         backupCount=3,
     )
-    file_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
+    from agent.redact import RedactingFormatter
+    file_handler.setFormatter(RedactingFormatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
     logging.getLogger().addHandler(file_handler)
     logging.getLogger().setLevel(logging.INFO)
 
+    # Separate errors-only log for easy debugging
+    error_handler = RotatingFileHandler(
+        log_dir / 'errors.log',
+        maxBytes=2 * 1024 * 1024,
+        backupCount=2,
+    )
+    error_handler.setLevel(logging.WARNING)
+    error_handler.setFormatter(RedactingFormatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
+    logging.getLogger().addHandler(error_handler)
+
     runner = GatewayRunner(config)
     
     # Set up signal handlers
diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 0941c6d91..098b7620c 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -10,7 +10,7 @@ Architecture:
 - Auth store (auth.json) holds per-provider credential state
 - resolve_provider() picks the active provider via priority chain
 - resolve_*_runtime_credentials() handles token refresh and key minting
-- login_command() / logout_command() are the CLI entry points
+- logout_command() is the CLI entry point for clearing auth
 """
 
 from __future__ import annotations
@@ -18,7 +18,10 @@ from __future__ import annotations
 import json
 import logging
 import os
+import shutil
 import stat
+import base64
+import subprocess
 import time
 import webbrowser
 from contextlib import contextmanager
@@ -55,6 +58,10 @@ DEFAULT_NOUS_SCOPE = "inference:mint_agent_key"
 DEFAULT_AGENT_KEY_MIN_TTL_SECONDS = 30 * 60  # 30 minutes
 ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120       # refresh 2 min before expiry
 DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS = 1     # poll at most every 1s
+DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
+CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
+CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
+CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
 
 
 # =============================================================================
@@ -84,7 +91,12 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
         client_id=DEFAULT_NOUS_CLIENT_ID,
         scope=DEFAULT_NOUS_SCOPE,
     ),
-    # Future: "openai_codex", "anthropic", etc.
+    "openai-codex": ProviderConfig(
+        id="openai-codex",
+        name="OpenAI Codex",
+        auth_type="oauth_external",
+        inference_base_url=DEFAULT_CODEX_BASE_URL,
+    ),
 }
 
 
@@ -115,7 +127,7 @@ def format_auth_error(error: Exception) -> str:
         return str(error)
 
     if error.relogin_required:
-        return f"{error} Run `hermes login` to re-authenticate."
+        return f"{error} Run `hermes model` to re-authenticate."
 
     if error.code == "subscription_required":
         return (
@@ -298,12 +310,15 @@ def resolve_provider(
     """
     normalized = (requested or "auto").strip().lower()
 
+    if normalized in {"openrouter", "custom"}:
+        return "openrouter"
     if normalized in PROVIDER_REGISTRY:
         return normalized
-    if normalized == "openrouter":
-        return "openrouter"
     if normalized != "auto":
-        return "openrouter"
+        raise AuthError(
+            f"Unknown provider '{normalized}'.",
+            code="invalid_provider",
+        )
 
     # Explicit one-off CLI creds always mean openrouter/custom
     if explicit_api_key or explicit_base_url:
@@ -314,8 +329,8 @@ def resolve_provider(
         auth_store = _load_auth_store()
         active = auth_store.get("active_provider")
         if active and active in PROVIDER_REGISTRY:
-            state = _load_provider_state(auth_store, active)
-            if state and (state.get("access_token") or state.get("refresh_token")):
+            status = get_auth_status(active)
+            if status.get("logged_in"):
                 return active
     except Exception as e:
         logger.debug("Could not detect active auth provider: %s", e)
@@ -369,6 +384,27 @@ def _optional_base_url(value: Any) -> Optional[str]:
     return cleaned if cleaned else None
 
 
+def _decode_jwt_claims(token: Any) -> Dict[str, Any]:
+    if not isinstance(token, str) or token.count(".") != 2:
+        return {}
+    payload = token.split(".")[1]
+    payload += "=" * ((4 - len(payload) % 4) % 4)
+    try:
+        raw = base64.urlsafe_b64decode(payload.encode("utf-8"))
+        claims = json.loads(raw.decode("utf-8"))
+    except Exception:
+        return {}
+    return claims if isinstance(claims, dict) else {}
+
+
+def _codex_access_token_is_expiring(access_token: Any, skew_seconds: int) -> bool:
+    claims = _decode_jwt_claims(access_token)
+    exp = claims.get("exp")
+    if not isinstance(exp, (int, float)):
+        return False
+    return float(exp) <= (time.time() + max(0, int(skew_seconds)))
+
+
 # =============================================================================
 # SSH / remote session detection
 # =============================================================================
@@ -378,6 +414,302 @@ def _is_remote_session() -> bool:
     return bool(os.getenv("SSH_CLIENT") or os.getenv("SSH_TTY"))
 
 
+# =============================================================================
+# OpenAI Codex auth file helpers
+# =============================================================================
+
+def resolve_codex_home_path() -> Path:
+    """Resolve CODEX_HOME, defaulting to ~/.codex."""
+    codex_home = os.getenv("CODEX_HOME", "").strip()
+    if not codex_home:
+        codex_home = str(Path.home() / ".codex")
+    return Path(codex_home).expanduser()
+
+
+def _codex_auth_file_path() -> Path:
+    return resolve_codex_home_path() / "auth.json"
+
+
+def _codex_auth_lock_path(auth_path: Path) -> Path:
+    return auth_path.with_suffix(auth_path.suffix + ".lock")
+
+
+@contextmanager
+def _codex_auth_file_lock(
+    auth_path: Path,
+    timeout_seconds: float = AUTH_LOCK_TIMEOUT_SECONDS,
+):
+    lock_path = _codex_auth_lock_path(auth_path)
+    lock_path.parent.mkdir(parents=True, exist_ok=True)
+
+    with lock_path.open("a+") as lock_file:
+        if fcntl is None:
+            yield
+            return
+
+        deadline = time.time() + max(1.0, timeout_seconds)
+        while True:
+            try:
+                fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+                break
+            except BlockingIOError:
+                if time.time() >= deadline:
+                    raise TimeoutError(f"Timed out waiting for Codex auth lock: {lock_path}")
+                time.sleep(0.05)
+
+        try:
+            yield
+        finally:
+            fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
+
+
+def read_codex_auth_file() -> Dict[str, Any]:
+    """Read and validate Codex auth.json shape."""
+    codex_home = resolve_codex_home_path()
+    if not codex_home.exists():
+        raise AuthError(
+            f"Codex home directory not found at {codex_home}.",
+            provider="openai-codex",
+            code="codex_home_missing",
+            relogin_required=True,
+        )
+
+    auth_path = codex_home / "auth.json"
+    if not auth_path.exists():
+        raise AuthError(
+            f"Codex auth file not found at {auth_path}.",
+            provider="openai-codex",
+            code="codex_auth_missing",
+            relogin_required=True,
+        )
+
+    try:
+        payload = json.loads(auth_path.read_text())
+    except Exception as exc:
+        raise AuthError(
+            f"Failed to parse Codex auth file at {auth_path}.",
+            provider="openai-codex",
+            code="codex_auth_invalid_json",
+            relogin_required=True,
+        ) from exc
+
+    tokens = payload.get("tokens")
+    if not isinstance(tokens, dict):
+        raise AuthError(
+            "Codex auth file is missing a valid 'tokens' object.",
+            provider="openai-codex",
+            code="codex_auth_invalid_shape",
+            relogin_required=True,
+        )
+
+    access_token = tokens.get("access_token")
+    refresh_token = tokens.get("refresh_token")
+    if not isinstance(access_token, str) or not access_token.strip():
+        raise AuthError(
+            "Codex auth file is missing tokens.access_token.",
+            provider="openai-codex",
+            code="codex_auth_missing_access_token",
+            relogin_required=True,
+        )
+    if not isinstance(refresh_token, str) or not refresh_token.strip():
+        raise AuthError(
+            "Codex auth file is missing tokens.refresh_token.",
+            provider="openai-codex",
+            code="codex_auth_missing_refresh_token",
+            relogin_required=True,
+        )
+
+    return {
+        "payload": payload,
+        "tokens": tokens,
+        "auth_path": auth_path,
+        "codex_home": codex_home,
+    }
+
+
+def _persist_codex_auth_payload(
+    auth_path: Path,
+    payload: Dict[str, Any],
+    *,
+    lock_held: bool = False,
+) -> None:
+    auth_path.parent.mkdir(parents=True, exist_ok=True)
+
+    def _write() -> None:
+        serialized = json.dumps(payload, indent=2, ensure_ascii=False) + "\n"
+        tmp_path = auth_path.parent / f".{auth_path.name}.{os.getpid()}.{time.time_ns()}.tmp"
+        try:
+            with tmp_path.open("w", encoding="utf-8") as tmp_file:
+                tmp_file.write(serialized)
+                tmp_file.flush()
+                os.fsync(tmp_file.fileno())
+            os.replace(tmp_path, auth_path)
+        finally:
+            if tmp_path.exists():
+                try:
+                    tmp_path.unlink()
+                except OSError:
+                    pass
+
+        try:
+            auth_path.chmod(stat.S_IRUSR | stat.S_IWUSR)
+        except OSError:
+            pass
+
+    if lock_held:
+        _write()
+        return
+
+    with _codex_auth_file_lock(auth_path):
+        _write()
+
+
+def _refresh_codex_auth_tokens(
+    *,
+    payload: Dict[str, Any],
+    auth_path: Path,
+    timeout_seconds: float,
+    lock_held: bool = False,
+) -> Dict[str, Any]:
+    tokens = payload.get("tokens")
+    if not isinstance(tokens, dict):
+        raise AuthError(
+            "Codex auth file is missing a valid 'tokens' object.",
+            provider="openai-codex",
+            code="codex_auth_invalid_shape",
+            relogin_required=True,
+        )
+
+    refresh_token = tokens.get("refresh_token")
+    if not isinstance(refresh_token, str) or not refresh_token.strip():
+        raise AuthError(
+            "Codex auth file is missing tokens.refresh_token.",
+            provider="openai-codex",
+            code="codex_auth_missing_refresh_token",
+            relogin_required=True,
+        )
+
+    timeout = httpx.Timeout(max(5.0, float(timeout_seconds)))
+    with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}) as client:
+        response = client.post(
+            CODEX_OAUTH_TOKEN_URL,
+            headers={"Content-Type": "application/x-www-form-urlencoded"},
+            data={
+                "grant_type": "refresh_token",
+                "refresh_token": refresh_token,
+                "client_id": CODEX_OAUTH_CLIENT_ID,
+            },
+        )
+
+    if response.status_code != 200:
+        code = "codex_refresh_failed"
+        message = f"Codex token refresh failed with status {response.status_code}."
+        relogin_required = False
+        try:
+            err = response.json()
+            if isinstance(err, dict):
+                err_code = err.get("error")
+                if isinstance(err_code, str) and err_code.strip():
+                    code = err_code.strip()
+                err_desc = err.get("error_description") or err.get("message")
+                if isinstance(err_desc, str) and err_desc.strip():
+                    message = f"Codex token refresh failed: {err_desc.strip()}"
+        except Exception:
+            pass
+        if code in {"invalid_grant", "invalid_token", "invalid_request"}:
+            relogin_required = True
+        raise AuthError(
+            message,
+            provider="openai-codex",
+            code=code,
+            relogin_required=relogin_required,
+        )
+
+    try:
+        refresh_payload = response.json()
+    except Exception as exc:
+        raise AuthError(
+            "Codex token refresh returned invalid JSON.",
+            provider="openai-codex",
+            code="codex_refresh_invalid_json",
+            relogin_required=True,
+        ) from exc
+
+    access_token = refresh_payload.get("access_token")
+    if not isinstance(access_token, str) or not access_token.strip():
+        raise AuthError(
+            "Codex token refresh response was missing access_token.",
+            provider="openai-codex",
+            code="codex_refresh_missing_access_token",
+            relogin_required=True,
+        )
+
+    updated_tokens = dict(tokens)
+    updated_tokens["access_token"] = access_token.strip()
+    next_refresh = refresh_payload.get("refresh_token")
+    if isinstance(next_refresh, str) and next_refresh.strip():
+        updated_tokens["refresh_token"] = next_refresh.strip()
+    payload["tokens"] = updated_tokens
+    payload["last_refresh"] = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
+    _persist_codex_auth_payload(auth_path, payload, lock_held=lock_held)
+    return updated_tokens
+
+
+def resolve_codex_runtime_credentials(
+    *,
+    force_refresh: bool = False,
+    refresh_if_expiring: bool = True,
+    refresh_skew_seconds: int = CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
+) -> Dict[str, Any]:
+    """Resolve runtime credentials from Codex CLI auth state."""
+    data = read_codex_auth_file()
+    payload = data["payload"]
+    tokens = dict(data["tokens"])
+    auth_path = data["auth_path"]
+    access_token = str(tokens.get("access_token", "") or "").strip()
+    refresh_timeout_seconds = float(os.getenv("HERMES_CODEX_REFRESH_TIMEOUT_SECONDS", "20"))
+
+    should_refresh = bool(force_refresh)
+    if (not should_refresh) and refresh_if_expiring:
+        should_refresh = _codex_access_token_is_expiring(access_token, refresh_skew_seconds)
+    if should_refresh:
+        lock_timeout = max(float(AUTH_LOCK_TIMEOUT_SECONDS), refresh_timeout_seconds + 5.0)
+        with _codex_auth_file_lock(auth_path, timeout_seconds=lock_timeout):
+            data = read_codex_auth_file()
+            payload = data["payload"]
+            tokens = dict(data["tokens"])
+            access_token = str(tokens.get("access_token", "") or "").strip()
+
+            should_refresh = bool(force_refresh)
+            if (not should_refresh) and refresh_if_expiring:
+                should_refresh = _codex_access_token_is_expiring(access_token, refresh_skew_seconds)
+
+            if should_refresh:
+                tokens = _refresh_codex_auth_tokens(
+                    payload=payload,
+                    auth_path=auth_path,
+                    timeout_seconds=refresh_timeout_seconds,
+                    lock_held=True,
+                )
+                access_token = str(tokens.get("access_token", "") or "").strip()
+
+    base_url = (
+        os.getenv("HERMES_CODEX_BASE_URL", "").strip().rstrip("/")
+        or DEFAULT_CODEX_BASE_URL
+    )
+
+    return {
+        "provider": "openai-codex",
+        "base_url": base_url,
+        "api_key": access_token,
+        "source": "codex-auth-json",
+        "last_refresh": payload.get("last_refresh"),
+        "auth_mode": payload.get("auth_mode"),
+        "auth_file": str(auth_path),
+        "codex_home": str(data["codex_home"]),
+    }
+
+
 # =============================================================================
 # TLS verification helper
 # =============================================================================
@@ -806,14 +1138,73 @@ def get_nous_auth_status() -> Dict[str, Any]:
     }
 
 
+def get_codex_auth_status() -> Dict[str, Any]:
+    """Status snapshot for Codex auth."""
+    state = get_provider_auth_state("openai-codex") or {}
+    auth_file = state.get("auth_file") or str(_codex_auth_file_path())
+    codex_home = state.get("codex_home") or str(resolve_codex_home_path())
+    try:
+        creds = resolve_codex_runtime_credentials()
+        return {
+            "logged_in": True,
+            "auth_file": creds.get("auth_file"),
+            "codex_home": creds.get("codex_home"),
+            "last_refresh": creds.get("last_refresh"),
+            "auth_mode": creds.get("auth_mode"),
+            "source": creds.get("source"),
+        }
+    except AuthError as exc:
+        return {
+            "logged_in": False,
+            "auth_file": auth_file,
+            "codex_home": codex_home,
+            "error": str(exc),
+        }
+
+
 def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
     """Generic auth status dispatcher."""
     target = provider_id or get_active_provider()
     if target == "nous":
         return get_nous_auth_status()
+    if target == "openai-codex":
+        return get_codex_auth_status()
     return {"logged_in": False}
 
 
+# =============================================================================
+# External credential detection
+# =============================================================================
+
+def detect_external_credentials() -> List[Dict[str, Any]]:
+    """Scan for credentials from other CLI tools that Hermes can reuse.
+
+    Returns a list of dicts, each with:
+      - provider: str   -- Hermes provider id (e.g. "openai-codex")
+      - path: str       -- filesystem path where creds were found
+      - label: str      -- human-friendly description for the setup UI
+    """
+    found: List[Dict[str, Any]] = []
+
+    # Codex CLI: ~/.codex/auth.json (or $CODEX_HOME/auth.json)
+    try:
+        codex_home = resolve_codex_home_path()
+        codex_auth = codex_home / "auth.json"
+        if codex_auth.is_file():
+            data = json.loads(codex_auth.read_text())
+            tokens = data.get("tokens", {})
+            if isinstance(tokens, dict) and tokens.get("access_token"):
+                found.append({
+                    "provider": "openai-codex",
+                    "path": str(codex_auth),
+                    "label": f"Codex CLI credentials found ({codex_auth})",
+                })
+    except Exception:
+        pass
+
+    return found
+
+
 # =============================================================================
 # CLI Commands — login / logout
 # =============================================================================
@@ -970,21 +1361,218 @@ def _save_model_choice(model_id: str) -> None:
 
 
 def login_command(args) -> None:
-    """Run OAuth device code login for the selected provider."""
-    provider_id = getattr(args, "provider", None) or "nous"
+    """Deprecated: use 'hermes model' or 'hermes setup' instead."""
+    print("The 'hermes login' command has been removed.")
+    print("Use 'hermes model' to select a provider and model,")
+    print("or 'hermes setup' for full interactive setup.")
+    raise SystemExit(0)
 
-    if provider_id not in PROVIDER_REGISTRY:
-        print(f"Unknown provider: {provider_id}")
-        print(f"Available: {', '.join(PROVIDER_REGISTRY.keys())}")
-        raise SystemExit(1)
 
-    pconfig = PROVIDER_REGISTRY[provider_id]
+def _login_openai_codex(args, pconfig: ProviderConfig) -> None:
+    """OpenAI Codex login via device code flow (no Codex CLI required)."""
+    codex_home = resolve_codex_home_path()
 
-    if provider_id == "nous":
-        _login_nous(args, pconfig)
-    else:
-        print(f"Login for provider '{provider_id}' is not yet implemented.")
-        raise SystemExit(1)
+    # Check for existing valid credentials first
+    try:
+        existing = resolve_codex_runtime_credentials()
+        print(f"Existing Codex credentials found at {codex_home / 'auth.json'}")
+        try:
+            reuse = input("Use existing credentials? [Y/n]: ").strip().lower()
+        except (EOFError, KeyboardInterrupt):
+            reuse = "y"
+        if reuse in ("", "y", "yes"):
+            creds = existing
+            _save_codex_provider_state(creds)
+            return
+    except AuthError:
+        pass
+
+    # No existing creds (or user declined) -- run device code flow
+    print()
+    print("Signing in to OpenAI Codex...")
+    print()
+
+    creds = _codex_device_code_login()
+    _save_codex_provider_state(creds)
+
+
+def _save_codex_provider_state(creds: Dict[str, Any]) -> None:
+    """Persist Codex provider state to auth store and config."""
+    auth_state = {
+        "auth_file": creds.get("auth_file"),
+        "codex_home": creds.get("codex_home"),
+        "last_refresh": creds.get("last_refresh"),
+        "auth_mode": creds.get("auth_mode"),
+        "source": creds.get("source"),
+    }
+
+    with _auth_store_lock():
+        auth_store = _load_auth_store()
+        _save_provider_state(auth_store, "openai-codex", auth_state)
+        saved_to = _save_auth_store(auth_store)
+
+    config_path = _update_config_for_provider("openai-codex", creds.get("base_url", DEFAULT_CODEX_BASE_URL))
+    print()
+    print("Login successful!")
+    print(f"  Auth state: {saved_to}")
+    print(f"  Config updated: {config_path} (model.provider=openai-codex)")
+
+
+def _codex_device_code_login() -> Dict[str, Any]:
+    """Run the OpenAI device code login flow and return credentials dict."""
+    import time as _time
+
+    issuer = "https://auth.openai.com"
+    client_id = CODEX_OAUTH_CLIENT_ID
+
+    # Step 1: Request device code
+    try:
+        with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
+            resp = client.post(
+                f"{issuer}/api/accounts/deviceauth/usercode",
+                json={"client_id": client_id},
+                headers={"Content-Type": "application/json"},
+            )
+    except Exception as exc:
+        raise AuthError(
+            f"Failed to request device code: {exc}",
+            provider="openai-codex", code="device_code_request_failed",
+        )
+
+    if resp.status_code != 200:
+        raise AuthError(
+            f"Device code request returned status {resp.status_code}.",
+            provider="openai-codex", code="device_code_request_error",
+        )
+
+    device_data = resp.json()
+    user_code = device_data.get("user_code", "")
+    device_auth_id = device_data.get("device_auth_id", "")
+    poll_interval = max(3, int(device_data.get("interval", "5")))
+
+    if not user_code or not device_auth_id:
+        raise AuthError(
+            "Device code response missing required fields.",
+            provider="openai-codex", code="device_code_incomplete",
+        )
+
+    # Step 2: Show user the code
+    print("To continue, follow these steps:\n")
+    print(f"  1. Open this URL in your browser:")
+    print(f"     \033[94m{issuer}/codex/device\033[0m\n")
+    print(f"  2. Enter this code:")
+    print(f"     \033[94m{user_code}\033[0m\n")
+    print("Waiting for sign-in... (press Ctrl+C to cancel)")
+
+    # Step 3: Poll for authorization code
+    max_wait = 15 * 60  # 15 minutes
+    start = _time.monotonic()
+    code_resp = None
+
+    try:
+        with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
+            while _time.monotonic() - start < max_wait:
+                _time.sleep(poll_interval)
+                poll_resp = client.post(
+                    f"{issuer}/api/accounts/deviceauth/token",
+                    json={"device_auth_id": device_auth_id, "user_code": user_code},
+                    headers={"Content-Type": "application/json"},
+                )
+
+                if poll_resp.status_code == 200:
+                    code_resp = poll_resp.json()
+                    break
+                elif poll_resp.status_code in (403, 404):
+                    continue  # User hasn't completed login yet
+                else:
+                    raise AuthError(
+                        f"Device auth polling returned status {poll_resp.status_code}.",
+                        provider="openai-codex", code="device_code_poll_error",
+                    )
+    except KeyboardInterrupt:
+        print("\nLogin cancelled.")
+        raise SystemExit(130)
+
+    if code_resp is None:
+        raise AuthError(
+            "Login timed out after 15 minutes.",
+            provider="openai-codex", code="device_code_timeout",
+        )
+
+    # Step 4: Exchange authorization code for tokens
+    authorization_code = code_resp.get("authorization_code", "")
+    code_verifier = code_resp.get("code_verifier", "")
+    redirect_uri = f"{issuer}/deviceauth/callback"
+
+    if not authorization_code or not code_verifier:
+        raise AuthError(
+            "Device auth response missing authorization_code or code_verifier.",
+            provider="openai-codex", code="device_code_incomplete_exchange",
+        )
+
+    try:
+        with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
+            token_resp = client.post(
+                CODEX_OAUTH_TOKEN_URL,
+                data={
+                    "grant_type": "authorization_code",
+                    "code": authorization_code,
+                    "redirect_uri": redirect_uri,
+                    "client_id": client_id,
+                    "code_verifier": code_verifier,
+                },
+                headers={"Content-Type": "application/x-www-form-urlencoded"},
+            )
+    except Exception as exc:
+        raise AuthError(
+            f"Token exchange failed: {exc}",
+            provider="openai-codex", code="token_exchange_failed",
+        )
+
+    if token_resp.status_code != 200:
+        raise AuthError(
+            f"Token exchange returned status {token_resp.status_code}.",
+            provider="openai-codex", code="token_exchange_error",
+        )
+
+    tokens = token_resp.json()
+    access_token = tokens.get("access_token", "")
+    refresh_token = tokens.get("refresh_token", "")
+
+    if not access_token:
+        raise AuthError(
+            "Token exchange did not return an access_token.",
+            provider="openai-codex", code="token_exchange_no_access_token",
+        )
+
+    # Step 5: Persist tokens to ~/.codex/auth.json
+    codex_home = resolve_codex_home_path()
+    codex_home.mkdir(parents=True, exist_ok=True)
+    auth_path = codex_home / "auth.json"
+
+    payload = {
+        "tokens": {
+            "access_token": access_token,
+            "refresh_token": refresh_token,
+        },
+        "last_refresh": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
+    }
+    _persist_codex_auth_payload(auth_path, payload, lock_held=False)
+
+    base_url = (
+        os.getenv("HERMES_CODEX_BASE_URL", "").strip().rstrip("/")
+        or DEFAULT_CODEX_BASE_URL
+    )
+
+    return {
+        "api_key": access_token,
+        "base_url": base_url,
+        "auth_file": str(auth_path),
+        "codex_home": str(codex_home),
+        "last_refresh": payload["last_refresh"],
+        "auth_mode": "chatgpt",
+        "source": "device-code",
+    }
 
 
 def _login_nous(args, pconfig: ProviderConfig) -> None:
@@ -1168,6 +1756,6 @@ def logout_command(args) -> None:
         if os.getenv("OPENROUTER_API_KEY"):
             print("Hermes will use OpenRouter for inference.")
         else:
-            print("Run `hermes login` or configure an API key to use Hermes.")
+            print("Run `hermes model` or configure an API key to use Hermes.")
     else:
         print(f"No auth state found for {provider_name}.")
diff --git a/hermes_cli/codex_models.py b/hermes_cli/codex_models.py
new file mode 100644
index 000000000..75559396f
--- /dev/null
+++ b/hermes_cli/codex_models.py
@@ -0,0 +1,144 @@
+"""Codex model discovery from API, local cache, and config."""
+
+from __future__ import annotations
+
+import json
+import logging
+from pathlib import Path
+from typing import List, Optional
+
+from hermes_cli.auth import resolve_codex_home_path
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_CODEX_MODELS: List[str] = [
+    "gpt-5.3-codex",
+    "gpt-5.2-codex",
+    "gpt-5.1-codex-max",
+    "gpt-5.1-codex-mini",
+]
+
+
+def _fetch_models_from_api(access_token: str) -> List[str]:
+    """Fetch available models from the Codex API. Returns visible models sorted by priority."""
+    try:
+        import httpx
+        resp = httpx.get(
+            "https://chatgpt.com/backend-api/codex/models?client_version=1.0.0",
+            headers={"Authorization": f"Bearer {access_token}"},
+            timeout=10,
+        )
+        if resp.status_code != 200:
+            return []
+        data = resp.json()
+        entries = data.get("models", []) if isinstance(data, dict) else []
+    except Exception as exc:
+        logger.debug("Failed to fetch Codex models from API: %s", exc)
+        return []
+
+    sortable = []
+    for item in entries:
+        if not isinstance(item, dict):
+            continue
+        slug = item.get("slug")
+        if not isinstance(slug, str) or not slug.strip():
+            continue
+        slug = slug.strip()
+        if item.get("supported_in_api") is False:
+            continue
+        visibility = item.get("visibility", "")
+        if isinstance(visibility, str) and visibility.strip().lower() == "hide":
+            continue
+        priority = item.get("priority")
+        rank = int(priority) if isinstance(priority, (int, float)) else 10_000
+        sortable.append((rank, slug))
+
+    sortable.sort(key=lambda x: (x[0], x[1]))
+    return [slug for _, slug in sortable]
+
+
+def _read_default_model(codex_home: Path) -> Optional[str]:
+    config_path = codex_home / "config.toml"
+    if not config_path.exists():
+        return None
+    try:
+        import tomllib
+    except Exception:
+        return None
+    try:
+        payload = tomllib.loads(config_path.read_text(encoding="utf-8"))
+    except Exception:
+        return None
+    model = payload.get("model") if isinstance(payload, dict) else None
+    if isinstance(model, str) and model.strip():
+        return model.strip()
+    return None
+
+
+def _read_cache_models(codex_home: Path) -> List[str]:
+    cache_path = codex_home / "models_cache.json"
+    if not cache_path.exists():
+        return []
+    try:
+        raw = json.loads(cache_path.read_text(encoding="utf-8"))
+    except Exception:
+        return []
+
+    entries = raw.get("models") if isinstance(raw, dict) else None
+    sortable = []
+    if isinstance(entries, list):
+        for item in entries:
+            if not isinstance(item, dict):
+                continue
+            slug = item.get("slug")
+            if not isinstance(slug, str) or not slug.strip():
+                continue
+            slug = slug.strip()
+            if "codex" not in slug.lower():
+                continue
+            if item.get("supported_in_api") is False:
+                continue
+            visibility = item.get("visibility")
+            if isinstance(visibility, str) and visibility.strip().lower() == "hidden":
+                continue
+            priority = item.get("priority")
+            rank = int(priority) if isinstance(priority, (int, float)) else 10_000
+            sortable.append((rank, slug))
+
+    sortable.sort(key=lambda item: (item[0], item[1]))
+    deduped: List[str] = []
+    for _, slug in sortable:
+        if slug not in deduped:
+            deduped.append(slug)
+    return deduped
+
+
+def get_codex_model_ids(access_token: Optional[str] = None) -> List[str]:
+    """Return available Codex model IDs, trying API first, then local sources.
+    
+    Resolution order: API (live, if token provided) > config.toml default >
+    local cache > hardcoded defaults.
+    """
+    codex_home = resolve_codex_home_path()
+    ordered: List[str] = []
+
+    # Try live API if we have a token
+    if access_token:
+        api_models = _fetch_models_from_api(access_token)
+        if api_models:
+            return api_models
+
+    # Fall back to local sources
+    default_model = _read_default_model(codex_home)
+    if default_model:
+        ordered.append(default_model)
+
+    for model_id in _read_cache_models(codex_home):
+        if model_id not in ordered:
+            ordered.append(model_id)
+
+    for model_id in DEFAULT_CODEX_MODELS:
+        if model_id not in ordered:
+            ordered.append(model_id)
+
+    return ordered
diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index b7e5a6213..b091a7905 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -26,6 +26,8 @@ COMMANDS = {
     "/skills": "Search, install, inspect, or manage skills from online registries",
     "/platforms": "Show gateway/messaging platform status",
     "/verbose": "Cycle tool progress display: off → new → all → verbose",
+    "/compress": "Manually compress conversation context (flush memories + summarize)",
+    "/usage": "Show token usage for the current session",
     "/quit": "Exit the CLI (also: /exit, /q)",
 }
 
diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index 742675d03..031c6eaf8 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -175,6 +175,36 @@ def run_doctor(args):
             else:
                 check_warn("config.yaml not found", "(using defaults)")
     
+    # =========================================================================
+    # Check: Auth providers
+    # =========================================================================
+    print()
+    print(color("◆ Auth Providers", Colors.CYAN, Colors.BOLD))
+
+    try:
+        from hermes_cli.auth import get_nous_auth_status, get_codex_auth_status
+
+        nous_status = get_nous_auth_status()
+        if nous_status.get("logged_in"):
+            check_ok("Nous Portal auth", "(logged in)")
+        else:
+            check_warn("Nous Portal auth", "(not logged in)")
+
+        codex_status = get_codex_auth_status()
+        if codex_status.get("logged_in"):
+            check_ok("OpenAI Codex auth", "(logged in)")
+        else:
+            check_warn("OpenAI Codex auth", "(not logged in)")
+            if codex_status.get("error"):
+                check_info(codex_status["error"])
+    except Exception as e:
+        check_warn("Auth provider status", f"(could not check: {e})")
+
+    if shutil.which("codex"):
+        check_ok("codex CLI")
+    else:
+        check_warn("codex CLI not found", "(required for openai-codex login)")
+
     # =========================================================================
     # Check: Directory structure
     # =========================================================================
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index b232d5b55..2bc391aad 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -12,7 +12,6 @@ Usage:
     hermes gateway install     # Install gateway service
     hermes gateway uninstall   # Uninstall gateway service
     hermes setup               # Interactive setup wizard
-    hermes login               # Authenticate with Nous Portal (or other providers)
     hermes logout              # Clear stored authentication
     hermes status              # Show status of all components
     hermes cron                # Manage cron jobs
@@ -60,6 +59,7 @@ logger = logging.getLogger(__name__)
 def _has_any_provider_configured() -> bool:
     """Check if at least one inference provider is usable."""
     from hermes_cli.config import get_env_path, get_hermes_home
+    from hermes_cli.auth import get_auth_status
 
     # Check env vars (may be set by .env or shell).
     # OPENAI_BASE_URL alone counts — local models (vLLM, llama.cpp, etc.)
@@ -91,8 +91,8 @@ def _has_any_provider_configured() -> bool:
             auth = json.loads(auth_file.read_text())
             active = auth.get("active_provider")
             if active:
-                state = auth.get("providers", {}).get(active, {})
-                if state.get("access_token") or state.get("refresh_token"):
+                status = get_auth_status(active)
+                if status.get("logged_in"):
                     return True
         except Exception:
             pass
@@ -289,7 +289,7 @@ def cmd_model(args):
         resolve_provider, get_provider_auth_state, PROVIDER_REGISTRY,
         _prompt_model_selection, _save_model_choice, _update_config_for_provider,
         resolve_nous_runtime_credentials, fetch_nous_models, AuthError, format_auth_error,
-        _login_nous, ProviderConfig,
+        _login_nous,
     )
     from hermes_cli.config import load_config, save_config, get_env_value, save_env_value
 
@@ -312,7 +312,12 @@ def cmd_model(args):
         or config_provider
         or "auto"
     )
-    active = resolve_provider(effective_provider)
+    try:
+        active = resolve_provider(effective_provider)
+    except AuthError as exc:
+        warning = format_auth_error(exc)
+        print(f"Warning: {warning} Falling back to auto provider detection.")
+        active = resolve_provider("auto")
 
     # Detect custom endpoint
     if active == "openrouter" and get_env_value("OPENAI_BASE_URL"):
@@ -321,6 +326,7 @@ def cmd_model(args):
     provider_labels = {
         "openrouter": "OpenRouter",
         "nous": "Nous Portal",
+        "openai-codex": "OpenAI Codex",
         "custom": "Custom endpoint",
     }
     active_label = provider_labels.get(active, active)
@@ -334,11 +340,12 @@ def cmd_model(args):
     providers = [
         ("openrouter", "OpenRouter (100+ models, pay-per-use)"),
         ("nous", "Nous Portal (Nous Research subscription)"),
+        ("openai-codex", "OpenAI Codex"),
         ("custom", "Custom endpoint (self-hosted / VLLM / etc.)"),
     ]
 
     # Reorder so the active provider is at the top
-    active_key = active if active in ("openrouter", "nous") else "custom"
+    active_key = active if active in ("openrouter", "nous", "openai-codex") else "custom"
     ordered = []
     for key, label in providers:
         if key == active_key:
@@ -359,6 +366,8 @@ def cmd_model(args):
         _model_flow_openrouter(config, current_model)
     elif selected_provider == "nous":
         _model_flow_nous(config, current_model)
+    elif selected_provider == "openai-codex":
+        _model_flow_openai_codex(config, current_model)
     elif selected_provider == "custom":
         _model_flow_custom(config)
 
@@ -512,6 +521,53 @@ def _model_flow_nous(config, current_model=""):
         print("No change.")
 
 
+def _model_flow_openai_codex(config, current_model=""):
+    """OpenAI Codex provider: ensure logged in, then pick model."""
+    from hermes_cli.auth import (
+        get_codex_auth_status, _prompt_model_selection, _save_model_choice,
+        _update_config_for_provider, _login_openai_codex,
+        PROVIDER_REGISTRY, DEFAULT_CODEX_BASE_URL,
+    )
+    from hermes_cli.codex_models import get_codex_model_ids
+    from hermes_cli.config import get_env_value, save_env_value
+    import argparse
+
+    status = get_codex_auth_status()
+    if not status.get("logged_in"):
+        print("Not logged into OpenAI Codex. Starting login...")
+        print()
+        try:
+            mock_args = argparse.Namespace()
+            _login_openai_codex(mock_args, PROVIDER_REGISTRY["openai-codex"])
+        except SystemExit:
+            print("Login cancelled or failed.")
+            return
+        except Exception as exc:
+            print(f"Login failed: {exc}")
+            return
+
+    _codex_token = None
+    try:
+        from hermes_cli.auth import resolve_codex_runtime_credentials
+        _codex_creds = resolve_codex_runtime_credentials()
+        _codex_token = _codex_creds.get("api_key")
+    except Exception:
+        pass
+    codex_models = get_codex_model_ids(access_token=_codex_token)
+
+    selected = _prompt_model_selection(codex_models, current_model=current_model)
+    if selected:
+        _save_model_choice(selected)
+        _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
+        # Clear custom endpoint env vars that would otherwise override Codex.
+        if get_env_value("OPENAI_BASE_URL"):
+            save_env_value("OPENAI_BASE_URL", "")
+            save_env_value("OPENAI_API_KEY", "")
+        print(f"Default model set to: {selected} (via OpenAI Codex)")
+    else:
+        print("No change.")
+
+
 def _model_flow_custom(config):
     """Custom endpoint: collect URL, API key, and model name."""
     from hermes_cli.auth import _save_model_choice, deactivate_provider
@@ -777,8 +833,8 @@ def cmd_update(args):
             pass  # No systemd (macOS, WSL1, etc.) — skip silently
         
         print()
-        print("Tip: You can now log in with Nous Portal for inference:")
-        print("  hermes login              # Authenticate with Nous Portal")
+        print("Tip: You can now select a provider and model:")
+        print("  hermes model              # Select provider and model")
         
     except subprocess.CalledProcessError as e:
         print(f"✗ Update failed: {e}")
@@ -798,7 +854,6 @@ Examples:
     hermes --continue             Resume the most recent session
     hermes --resume <session_id>  Resume a specific session
     hermes setup                  Run setup wizard
-    hermes login                  Authenticate with an inference provider
     hermes logout                 Clear stored authentication
     hermes model                  Select default model
     hermes config                 View configuration
@@ -857,7 +912,7 @@ For more help on a command:
     )
     chat_parser.add_argument(
         "--provider",
-        choices=["auto", "openrouter", "nous"],
+        choices=["auto", "openrouter", "nous", "openai-codex"],
         default=None,
         help="Inference provider (default: auto)"
     )
@@ -966,9 +1021,9 @@ For more help on a command:
     )
     login_parser.add_argument(
         "--provider",
-        choices=["nous"],
+        choices=["nous", "openai-codex"],
         default=None,
-        help="Provider to authenticate with (default: interactive selection)"
+        help="Provider to authenticate with (default: nous)"
     )
     login_parser.add_argument(
         "--portal-url",
@@ -1020,7 +1075,7 @@ For more help on a command:
     )
     logout_parser.add_argument(
         "--provider",
-        choices=["nous"],
+        choices=["nous", "openai-codex"],
         default=None,
         help="Provider to log out from (default: active provider)"
     )
diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py
new file mode 100644
index 000000000..1f070ac22
--- /dev/null
+++ b/hermes_cli/runtime_provider.py
@@ -0,0 +1,149 @@
+"""Shared runtime provider resolution for CLI, gateway, cron, and helpers."""
+
+from __future__ import annotations
+
+import os
+from typing import Any, Dict, Optional
+
+from hermes_cli.auth import (
+    AuthError,
+    format_auth_error,
+    resolve_provider,
+    resolve_nous_runtime_credentials,
+    resolve_codex_runtime_credentials,
+)
+from hermes_cli.config import load_config
+from hermes_constants import OPENROUTER_BASE_URL
+
+
+def _get_model_config() -> Dict[str, Any]:
+    config = load_config()
+    model_cfg = config.get("model")
+    if isinstance(model_cfg, dict):
+        return dict(model_cfg)
+    if isinstance(model_cfg, str) and model_cfg.strip():
+        return {"default": model_cfg.strip()}
+    return {}
+
+
+def resolve_requested_provider(requested: Optional[str] = None) -> str:
+    """Resolve provider request from explicit arg, env, then config."""
+    if requested and requested.strip():
+        return requested.strip().lower()
+
+    env_provider = os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
+    if env_provider:
+        return env_provider
+
+    model_cfg = _get_model_config()
+    cfg_provider = model_cfg.get("provider")
+    if isinstance(cfg_provider, str) and cfg_provider.strip():
+        return cfg_provider.strip().lower()
+
+    return "auto"
+
+
+def _resolve_openrouter_runtime(
+    *,
+    requested_provider: str,
+    explicit_api_key: Optional[str] = None,
+    explicit_base_url: Optional[str] = None,
+) -> Dict[str, Any]:
+    model_cfg = _get_model_config()
+    cfg_base_url = model_cfg.get("base_url") if isinstance(model_cfg.get("base_url"), str) else ""
+    cfg_provider = model_cfg.get("provider") if isinstance(model_cfg.get("provider"), str) else ""
+    requested_norm = (requested_provider or "").strip().lower()
+    cfg_provider = cfg_provider.strip().lower()
+
+    env_openai_base_url = os.getenv("OPENAI_BASE_URL", "").strip()
+    env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip()
+
+    use_config_base_url = False
+    if requested_norm == "auto":
+        if cfg_base_url.strip() and not explicit_base_url and not env_openai_base_url:
+            if not cfg_provider or cfg_provider == "auto":
+                use_config_base_url = True
+
+    base_url = (
+        (explicit_base_url or "").strip()
+        or env_openai_base_url
+        or (cfg_base_url.strip() if use_config_base_url else "")
+        or env_openrouter_base_url
+        or OPENROUTER_BASE_URL
+    ).rstrip("/")
+
+    api_key = (
+        explicit_api_key
+        or os.getenv("OPENAI_API_KEY")
+        or os.getenv("OPENROUTER_API_KEY")
+        or ""
+    )
+
+    source = "explicit" if (explicit_api_key or explicit_base_url) else "env/config"
+
+    return {
+        "provider": "openrouter",
+        "api_mode": "chat_completions",
+        "base_url": base_url,
+        "api_key": api_key,
+        "source": source,
+    }
+
+
+def resolve_runtime_provider(
+    *,
+    requested: Optional[str] = None,
+    explicit_api_key: Optional[str] = None,
+    explicit_base_url: Optional[str] = None,
+) -> Dict[str, Any]:
+    """Resolve runtime provider credentials for agent execution."""
+    requested_provider = resolve_requested_provider(requested)
+
+    provider = resolve_provider(
+        requested_provider,
+        explicit_api_key=explicit_api_key,
+        explicit_base_url=explicit_base_url,
+    )
+
+    if provider == "nous":
+        creds = resolve_nous_runtime_credentials(
+            min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
+            timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
+        )
+        return {
+            "provider": "nous",
+            "api_mode": "chat_completions",
+            "base_url": creds.get("base_url", "").rstrip("/"),
+            "api_key": creds.get("api_key", ""),
+            "source": creds.get("source", "portal"),
+            "expires_at": creds.get("expires_at"),
+            "requested_provider": requested_provider,
+        }
+
+    if provider == "openai-codex":
+        creds = resolve_codex_runtime_credentials()
+        return {
+            "provider": "openai-codex",
+            "api_mode": "codex_responses",
+            "base_url": creds.get("base_url", "").rstrip("/"),
+            "api_key": creds.get("api_key", ""),
+            "source": creds.get("source", "codex-auth-json"),
+            "auth_file": creds.get("auth_file"),
+            "codex_home": creds.get("codex_home"),
+            "last_refresh": creds.get("last_refresh"),
+            "requested_provider": requested_provider,
+        }
+
+    runtime = _resolve_openrouter_runtime(
+        requested_provider=requested_provider,
+        explicit_api_key=explicit_api_key,
+        explicit_base_url=explicit_base_url,
+    )
+    runtime["requested_provider"] = requested_provider
+    return runtime
+
+
+def format_runtime_provider_error(error: Exception) -> str:
+    if isinstance(error, AuthError):
+        return format_auth_error(error)
+    return str(error)
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 6ed9fb64a..fa4dcebb4 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -620,11 +620,24 @@ def run_setup_wizard(args):
         get_active_provider, get_provider_auth_state, PROVIDER_REGISTRY,
         format_auth_error, AuthError, fetch_nous_models,
         resolve_nous_runtime_credentials, _update_config_for_provider,
+        _login_openai_codex, get_codex_auth_status, DEFAULT_CODEX_BASE_URL,
+        detect_external_credentials,
     )
     existing_custom = get_env_value("OPENAI_BASE_URL")
     existing_or = get_env_value("OPENROUTER_API_KEY")
     active_oauth = get_active_provider()
 
+    # Detect credentials from other CLI tools
+    detected_creds = detect_external_credentials()
+    if detected_creds:
+        print_info("Detected existing credentials:")
+        for cred in detected_creds:
+            if cred["provider"] == "openai-codex":
+                print_success(f"  * {cred['label']} -- select \"OpenAI Codex\" to use it")
+            else:
+                print_info(f"  * {cred['label']}")
+        print()
+
     # Detect if any provider is already configured
     has_any_provider = bool(active_oauth or existing_custom or existing_or)
     
@@ -640,6 +653,7 @@ def run_setup_wizard(args):
 
     provider_choices = [
         "Login with Nous Portal (Nous Research subscription)",
+        "Login with OpenAI Codex",
         "OpenRouter API key (100+ models, pay-per-use)",
         "Custom OpenAI-compatible endpoint (self-hosted / VLLM / etc.)",
     ]
@@ -647,7 +661,7 @@ def run_setup_wizard(args):
         provider_choices.append(keep_label)
     
     # Default to "Keep current" if a provider exists, otherwise OpenRouter (most common)
-    default_provider = len(provider_choices) - 1 if has_any_provider else 1
+    default_provider = len(provider_choices) - 1 if has_any_provider else 2
     
     if not has_any_provider:
         print_warning("An inference provider is required for Hermes to work.")
@@ -656,7 +670,7 @@ def run_setup_wizard(args):
     provider_idx = prompt_choice("Select your inference provider:", provider_choices, default_provider)
 
     # Track which provider was selected for model step
-    selected_provider = None  # "nous", "openrouter", "custom", or None (keep)
+    selected_provider = None  # "nous", "openai-codex", "openrouter", "custom", or None (keep)
     nous_models = []  # populated if Nous login succeeds
 
     if provider_idx == 0:  # Nous Portal
@@ -692,14 +706,38 @@ def run_setup_wizard(args):
 
         except SystemExit:
             print_warning("Nous Portal login was cancelled or failed.")
-            print_info("You can try again later with: hermes login")
+            print_info("You can try again later with: hermes model")
             selected_provider = None
         except Exception as e:
             print_error(f"Login failed: {e}")
-            print_info("You can try again later with: hermes login")
+            print_info("You can try again later with: hermes model")
             selected_provider = None
 
-    elif provider_idx == 1:  # OpenRouter
+    elif provider_idx == 1:  # OpenAI Codex
+        selected_provider = "openai-codex"
+        print()
+        print_header("OpenAI Codex Login")
+        print()
+
+        try:
+            import argparse
+            mock_args = argparse.Namespace()
+            _login_openai_codex(mock_args, PROVIDER_REGISTRY["openai-codex"])
+            # Clear custom endpoint vars that would override provider routing.
+            if existing_custom:
+                save_env_value("OPENAI_BASE_URL", "")
+                save_env_value("OPENAI_API_KEY", "")
+            _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
+        except SystemExit:
+            print_warning("OpenAI Codex login was cancelled or failed.")
+            print_info("You can try again later with: hermes model")
+            selected_provider = None
+        except Exception as e:
+            print_error(f"Login failed: {e}")
+            print_info("You can try again later with: hermes model")
+            selected_provider = None
+
+    elif provider_idx == 2:  # OpenRouter
         selected_provider = "openrouter"
         print()
         print_header("OpenRouter API Key")
@@ -726,7 +764,7 @@ def run_setup_wizard(args):
             save_env_value("OPENAI_BASE_URL", "")
             save_env_value("OPENAI_API_KEY", "")
 
-    elif provider_idx == 2:  # Custom endpoint
+    elif provider_idx == 3:  # Custom endpoint
         selected_provider = "custom"
         print()
         print_header("Custom OpenAI-Compatible Endpoint")
@@ -753,14 +791,14 @@ def run_setup_wizard(args):
             config['model'] = model_name
             save_env_value("LLM_MODEL", model_name)
         print_success("Custom endpoint configured")
-    # else: provider_idx == 3 (Keep current) — only shown when a provider already exists
+    # else: provider_idx == 4 (Keep current) — only shown when a provider already exists
 
     # =========================================================================
     # Step 1b: OpenRouter API Key for tools (if not already set)
     # =========================================================================
     # Tools (vision, web, MoA) use OpenRouter independently of the main provider.
     # Prompt for OpenRouter key if not set and a non-OpenRouter provider was chosen.
-    if selected_provider in ("nous", "custom") and not get_env_value("OPENROUTER_API_KEY"):
+    if selected_provider in ("nous", "openai-codex", "custom") and not get_env_value("OPENROUTER_API_KEY"):
         print()
         print_header("OpenRouter API Key (for tools)")
         print_info("Tools like vision analysis, web search, and MoA use OpenRouter")
@@ -806,6 +844,33 @@ def run_setup_wizard(args):
                     config['model'] = custom
                     save_env_value("LLM_MODEL", custom)
             # else: keep current
+        elif selected_provider == "openai-codex":
+            from hermes_cli.codex_models import get_codex_model_ids
+            # Try to get the access token for live model discovery
+            _codex_token = None
+            try:
+                from hermes_cli.auth import resolve_codex_runtime_credentials
+                _codex_creds = resolve_codex_runtime_credentials()
+                _codex_token = _codex_creds.get("api_key")
+            except Exception:
+                pass
+            codex_models = get_codex_model_ids(access_token=_codex_token)
+            model_choices = [f"{m}" for m in codex_models]
+            model_choices.append("Custom model")
+            model_choices.append(f"Keep current ({current_model})")
+
+            keep_idx = len(model_choices) - 1
+            model_idx = prompt_choice("Select default model:", model_choices, keep_idx)
+
+            if model_idx < len(codex_models):
+                config['model'] = codex_models[model_idx]
+                save_env_value("LLM_MODEL", codex_models[model_idx])
+            elif model_idx == len(codex_models):
+                custom = prompt("Enter model name")
+                if custom:
+                    config['model'] = custom
+                    save_env_value("LLM_MODEL", custom)
+            _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
         else:
             # Static list for OpenRouter / fallback (from canonical list)
             from hermes_cli.models import model_ids, menu_labels
diff --git a/hermes_cli/status.py b/hermes_cli/status.py
index ec50c6d62..f1d3a7edf 100644
--- a/hermes_cli/status.py
+++ b/hermes_cli/status.py
@@ -101,15 +101,17 @@ def show_status(args):
     print(color("◆ Auth Providers", Colors.CYAN, Colors.BOLD))
 
     try:
-        from hermes_cli.auth import get_nous_auth_status
+        from hermes_cli.auth import get_nous_auth_status, get_codex_auth_status
         nous_status = get_nous_auth_status()
+        codex_status = get_codex_auth_status()
     except Exception:
         nous_status = {}
+        codex_status = {}
 
     nous_logged_in = bool(nous_status.get("logged_in"))
     print(
         f"  {'Nous Portal':<12}  {check_mark(nous_logged_in)} "
-        f"{'logged in' if nous_logged_in else 'not logged in (run: hermes login)'}"
+        f"{'logged in' if nous_logged_in else 'not logged in (run: hermes model)'}"
     )
     if nous_logged_in:
         portal_url = nous_status.get("portal_base_url") or "(unknown)"
@@ -121,6 +123,20 @@ def show_status(args):
         print(f"    Key exp:    {key_exp}")
         print(f"    Refresh:    {refresh_label}")
 
+    codex_logged_in = bool(codex_status.get("logged_in"))
+    print(
+        f"  {'OpenAI Codex':<12}  {check_mark(codex_logged_in)} "
+        f"{'logged in' if codex_logged_in else 'not logged in (run: hermes model)'}"
+    )
+    codex_auth_file = codex_status.get("auth_file")
+    if codex_auth_file:
+        print(f"    Auth file:  {codex_auth_file}")
+    codex_last_refresh = _format_iso_timestamp(codex_status.get("last_refresh"))
+    if codex_status.get("last_refresh"):
+        print(f"    Refreshed:  {codex_last_refresh}")
+    if codex_status.get("error") and not codex_logged_in:
+        print(f"    Error:      {codex_status.get('error')}")
+
     # =========================================================================
     # Terminal Configuration
     # =========================================================================
diff --git a/run_agent.py b/run_agent.py
index 3a939d161..669f1899c 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -21,6 +21,7 @@ Usage:
 """
 
 import copy
+import hashlib
 import json
 import logging
 logger = logging.getLogger(__name__)
@@ -30,6 +31,7 @@ import re
 import sys
 import time
 import threading
+from types import SimpleNamespace
 import uuid
 from typing import List, Dict, Any, Optional
 from openai import OpenAI
@@ -87,6 +89,7 @@ from agent.prompt_builder import build_skills_system_prompt, build_context_files
 from agent.display import (
     KawaiiSpinner, build_tool_preview as _build_tool_preview,
     get_cute_tool_message as _get_cute_tool_message_impl,
+    _detect_tool_failure,
 )
 from agent.trajectory import (
     convert_scratchpad_to_think, has_incomplete_scratchpad,
@@ -106,6 +109,8 @@ class AIAgent:
         self,
         base_url: str = None,
         api_key: str = None,
+        provider: str = None,
+        api_mode: str = None,
         model: str = "anthropic/claude-opus-4.6",  # OpenRouter format
         max_iterations: int = 60,  # Default tool-calling iterations
         tool_delay: float = 1.0,
@@ -124,6 +129,7 @@ class AIAgent:
         session_id: str = None,
         tool_progress_callback: callable = None,
         clarify_callback: callable = None,
+        step_callback: callable = None,
         max_tokens: int = None,
         reasoning_config: Dict[str, Any] = None,
         prefill_messages: List[Dict[str, Any]] = None,
@@ -139,6 +145,8 @@ class AIAgent:
         Args:
             base_url (str): Base URL for the model API (optional)
             api_key (str): API key for authentication (optional, uses env var if not provided)
+            provider (str): Provider identifier (optional; used for telemetry/routing hints)
+            api_mode (str): API mode override: "chat_completions" or "codex_responses"
             model (str): Model name to use (default: "anthropic/claude-opus-4.6")
             max_iterations (int): Maximum number of tool calling iterations (default: 60)
             tool_delay (float): Delay between tool calls in seconds (default: 1.0)
@@ -186,6 +194,17 @@ class AIAgent:
         # Store effective base URL for feature detection (prompt caching, reasoning, etc.)
         # When no base_url is provided, the client defaults to OpenRouter, so reflect that here.
         self.base_url = base_url or OPENROUTER_BASE_URL
+        provider_name = provider.strip().lower() if isinstance(provider, str) and provider.strip() else None
+        self.provider = provider_name or "openrouter"
+        if api_mode in {"chat_completions", "codex_responses"}:
+            self.api_mode = api_mode
+        elif self.provider == "openai-codex":
+            self.api_mode = "codex_responses"
+        elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self.base_url.lower():
+            self.api_mode = "codex_responses"
+            self.provider = "openai-codex"
+        else:
+            self.api_mode = "chat_completions"
         if base_url and "api.anthropic.com" in base_url.strip().lower():
             raise ValueError(
                 "Anthropic's native /v1/messages API is not supported yet (planned for a future release). "
@@ -195,6 +214,7 @@ class AIAgent:
             )
         self.tool_progress_callback = tool_progress_callback
         self.clarify_callback = clarify_callback
+        self.step_callback = step_callback
         self._last_reported_tool = None  # Track for "new tool" mode
         
         # Interrupt mechanism for breaking out of tool loops
@@ -228,13 +248,33 @@ class AIAgent:
         self._use_prompt_caching = is_openrouter and is_claude
         self._cache_ttl = "5m"  # Default 5-minute TTL (1.25x write cost)
         
-        # Configure logging
+        # Persistent error log -- always writes WARNING+ to ~/.hermes/logs/errors.log
+        # so tool failures, API errors, etc. are inspectable after the fact.
+        from agent.redact import RedactingFormatter
+        _error_log_dir = Path.home() / ".hermes" / "logs"
+        _error_log_dir.mkdir(parents=True, exist_ok=True)
+        _error_log_path = _error_log_dir / "errors.log"
+        from logging.handlers import RotatingFileHandler
+        _error_file_handler = RotatingFileHandler(
+            _error_log_path, maxBytes=2 * 1024 * 1024, backupCount=2,
+        )
+        _error_file_handler.setLevel(logging.WARNING)
+        _error_file_handler.setFormatter(RedactingFormatter(
+            '%(asctime)s %(levelname)s %(name)s: %(message)s',
+        ))
+        logging.getLogger().addHandler(_error_file_handler)
+
         if self.verbose_logging:
             logging.basicConfig(
                 level=logging.DEBUG,
                 format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                 datefmt='%H:%M:%S'
             )
+            for handler in logging.getLogger().handlers:
+                handler.setFormatter(RedactingFormatter(
+                    '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+                    datefmt='%H:%M:%S',
+                ))
             # Keep third-party libraries at WARNING level to reduce noise
             # We have our own retry and error logging that's more informative
             logging.getLogger('openai').setLevel(logging.WARNING)
@@ -297,7 +337,7 @@ class AIAgent:
             client_kwargs["default_headers"] = {
                 "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
                 "X-OpenRouter-Title": "Hermes Agent",
-                "X-OpenRouter-Categories": "cli-agent",
+                "X-OpenRouter-Categories": "productivity,cli-agent",
             }
         
         self._client_kwargs = client_kwargs  # stored for rebuilding after interrupt
@@ -479,9 +519,10 @@ class AIAgent:
         
         # Initialize context compressor for automatic context management
         # Compresses conversation when approaching model's context limit
-        # Configuration via environment variables (can be set in .env or cli-config.yaml)
+        # Configuration via config.yaml (compression section) or environment variables
         compression_threshold = float(os.getenv("CONTEXT_COMPRESSION_THRESHOLD", "0.85"))
         compression_enabled = os.getenv("CONTEXT_COMPRESSION_ENABLED", "true").lower() in ("true", "1", "yes")
+        compression_summary_model = os.getenv("CONTEXT_COMPRESSION_MODEL") or None
         
         self.context_compressor = ContextCompressor(
             model=self.model,
@@ -489,10 +530,17 @@ class AIAgent:
             protect_first_n=3,
             protect_last_n=4,
             summary_target_tokens=500,
+            summary_model_override=compression_summary_model,
             quiet_mode=self.quiet_mode,
         )
         self.compression_enabled = compression_enabled
         self._user_turn_count = 0
+
+        # Cumulative token usage for the session
+        self.session_prompt_tokens = 0
+        self.session_completion_tokens = 0
+        self.session_total_tokens = 0
+        self.session_api_calls = 0
         
         if not self.quiet_mode:
             if compression_enabled:
@@ -542,6 +590,77 @@ class AIAgent:
         if not content:
             return ""
         return re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
+
+    def _looks_like_codex_intermediate_ack(
+        self,
+        user_message: str,
+        assistant_content: str,
+        messages: List[Dict[str, Any]],
+    ) -> bool:
+        """Detect a planning/ack message that should continue instead of ending the turn."""
+        if any(isinstance(msg, dict) and msg.get("role") == "tool" for msg in messages):
+            return False
+
+        assistant_text = self._strip_think_blocks(assistant_content or "").strip().lower()
+        if not assistant_text:
+            return False
+        if len(assistant_text) > 1200:
+            return False
+
+        has_future_ack = bool(
+            re.search(r"\b(i['’]ll|i will|let me|i can do that|i can help with that)\b", assistant_text)
+        )
+        if not has_future_ack:
+            return False
+
+        action_markers = (
+            "look into",
+            "look at",
+            "inspect",
+            "scan",
+            "check",
+            "analyz",
+            "review",
+            "explore",
+            "read",
+            "open",
+            "run",
+            "test",
+            "fix",
+            "debug",
+            "search",
+            "find",
+            "walkthrough",
+            "report back",
+            "summarize",
+        )
+        workspace_markers = (
+            "directory",
+            "current directory",
+            "current dir",
+            "cwd",
+            "repo",
+            "repository",
+            "codebase",
+            "project",
+            "folder",
+            "filesystem",
+            "file tree",
+            "files",
+            "path",
+        )
+
+        user_text = (user_message or "").strip().lower()
+        user_targets_workspace = (
+            any(marker in user_text for marker in workspace_markers)
+            or "~/" in user_text
+            or "/" in user_text
+        )
+        assistant_mentions_action = any(marker in assistant_text for marker in action_markers)
+        assistant_targets_workspace = any(
+            marker in assistant_text for marker in workspace_markers
+        )
+        return (user_targets_workspace or assistant_targets_workspace) and assistant_mentions_action
     
     
     def _extract_reasoning(self, assistant_message) -> Optional[str]:
@@ -1257,6 +1376,615 @@ class AIAgent:
         if self._memory_store:
             self._memory_store.load_from_disk()
 
+    def _responses_tools(self, tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]:
+        """Convert chat-completions tool schemas to Responses function-tool schemas."""
+        source_tools = tools if tools is not None else self.tools
+        if not source_tools:
+            return None
+
+        converted: List[Dict[str, Any]] = []
+        for item in source_tools:
+            fn = item.get("function", {}) if isinstance(item, dict) else {}
+            name = fn.get("name")
+            if not isinstance(name, str) or not name.strip():
+                continue
+            converted.append({
+                "type": "function",
+                "name": name,
+                "description": fn.get("description", ""),
+                "strict": False,
+                "parameters": fn.get("parameters", {"type": "object", "properties": {}}),
+            })
+        return converted or None
+
+    @staticmethod
+    def _split_responses_tool_id(raw_id: Any) -> tuple[Optional[str], Optional[str]]:
+        """Split a stored tool id into (call_id, response_item_id)."""
+        if not isinstance(raw_id, str):
+            return None, None
+        value = raw_id.strip()
+        if not value:
+            return None, None
+        if "|" in value:
+            call_id, response_item_id = value.split("|", 1)
+            call_id = call_id.strip() or None
+            response_item_id = response_item_id.strip() or None
+            return call_id, response_item_id
+        if value.startswith("fc_"):
+            return None, value
+        return value, None
+
+    def _derive_responses_function_call_id(
+        self,
+        call_id: str,
+        response_item_id: Optional[str] = None,
+    ) -> str:
+        """Build a valid Responses `function_call.id` (must start with `fc_`)."""
+        if isinstance(response_item_id, str):
+            candidate = response_item_id.strip()
+            if candidate.startswith("fc_"):
+                return candidate
+
+        source = (call_id or "").strip()
+        if source.startswith("fc_"):
+            return source
+        if source.startswith("call_") and len(source) > len("call_"):
+            return f"fc_{source[len('call_'):]}"
+
+        sanitized = re.sub(r"[^A-Za-z0-9_-]", "", source)
+        if sanitized.startswith("fc_"):
+            return sanitized
+        if sanitized.startswith("call_") and len(sanitized) > len("call_"):
+            return f"fc_{sanitized[len('call_'):]}"
+        if sanitized:
+            return f"fc_{sanitized[:48]}"
+
+        seed = source or str(response_item_id or "") or uuid.uuid4().hex
+        digest = hashlib.sha1(seed.encode("utf-8")).hexdigest()[:24]
+        return f"fc_{digest}"
+
+    def _chat_messages_to_responses_input(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Convert internal chat-style messages to Responses input items."""
+        items: List[Dict[str, Any]] = []
+
+        for msg in messages:
+            if not isinstance(msg, dict):
+                continue
+            role = msg.get("role")
+            if role == "system":
+                continue
+
+            if role in {"user", "assistant"}:
+                content = msg.get("content", "")
+                content_text = str(content) if content is not None else ""
+
+                if role == "assistant":
+                    # Replay encrypted reasoning items from previous turns
+                    # so the API can maintain coherent reasoning chains.
+                    codex_reasoning = msg.get("codex_reasoning_items")
+                    if isinstance(codex_reasoning, list):
+                        for ri in codex_reasoning:
+                            if isinstance(ri, dict) and ri.get("encrypted_content"):
+                                items.append(ri)
+
+                    if content_text.strip():
+                        items.append({"role": "assistant", "content": content_text})
+
+                    tool_calls = msg.get("tool_calls")
+                    if isinstance(tool_calls, list):
+                        for tc in tool_calls:
+                            if not isinstance(tc, dict):
+                                continue
+                            fn = tc.get("function", {})
+                            fn_name = fn.get("name")
+                            if not isinstance(fn_name, str) or not fn_name.strip():
+                                continue
+
+                            embedded_call_id, embedded_response_item_id = self._split_responses_tool_id(
+                                tc.get("id")
+                            )
+                            call_id = tc.get("call_id")
+                            if not isinstance(call_id, str) or not call_id.strip():
+                                call_id = embedded_call_id
+                            if not isinstance(call_id, str) or not call_id.strip():
+                                if (
+                                    isinstance(embedded_response_item_id, str)
+                                    and embedded_response_item_id.startswith("fc_")
+                                    and len(embedded_response_item_id) > len("fc_")
+                                ):
+                                    call_id = f"call_{embedded_response_item_id[len('fc_'):]}"
+                                else:
+                                    call_id = f"call_{uuid.uuid4().hex[:12]}"
+                            call_id = call_id.strip()
+
+                            arguments = fn.get("arguments", "{}")
+                            if isinstance(arguments, dict):
+                                arguments = json.dumps(arguments, ensure_ascii=False)
+                            elif not isinstance(arguments, str):
+                                arguments = str(arguments)
+                            arguments = arguments.strip() or "{}"
+
+                            items.append({
+                                "type": "function_call",
+                                "call_id": call_id,
+                                "name": fn_name,
+                                "arguments": arguments,
+                            })
+                    continue
+
+                items.append({"role": role, "content": content_text})
+                continue
+
+            if role == "tool":
+                raw_tool_call_id = msg.get("tool_call_id")
+                call_id, _ = self._split_responses_tool_id(raw_tool_call_id)
+                if not isinstance(call_id, str) or not call_id.strip():
+                    if isinstance(raw_tool_call_id, str) and raw_tool_call_id.strip():
+                        call_id = raw_tool_call_id.strip()
+                if not isinstance(call_id, str) or not call_id.strip():
+                    continue
+                items.append({
+                    "type": "function_call_output",
+                    "call_id": call_id,
+                    "output": str(msg.get("content", "") or ""),
+                })
+
+        return items
+
+    def _preflight_codex_input_items(self, raw_items: Any) -> List[Dict[str, Any]]:
+        if not isinstance(raw_items, list):
+            raise ValueError("Codex Responses input must be a list of input items.")
+
+        normalized: List[Dict[str, Any]] = []
+        for idx, item in enumerate(raw_items):
+            if not isinstance(item, dict):
+                raise ValueError(f"Codex Responses input[{idx}] must be an object.")
+
+            item_type = item.get("type")
+            if item_type == "function_call":
+                call_id = item.get("call_id")
+                name = item.get("name")
+                if not isinstance(call_id, str) or not call_id.strip():
+                    raise ValueError(f"Codex Responses input[{idx}] function_call is missing call_id.")
+                if not isinstance(name, str) or not name.strip():
+                    raise ValueError(f"Codex Responses input[{idx}] function_call is missing name.")
+
+                arguments = item.get("arguments", "{}")
+                if isinstance(arguments, dict):
+                    arguments = json.dumps(arguments, ensure_ascii=False)
+                elif not isinstance(arguments, str):
+                    arguments = str(arguments)
+                arguments = arguments.strip() or "{}"
+
+                normalized.append(
+                    {
+                        "type": "function_call",
+                        "call_id": call_id.strip(),
+                        "name": name.strip(),
+                        "arguments": arguments,
+                    }
+                )
+                continue
+
+            if item_type == "function_call_output":
+                call_id = item.get("call_id")
+                if not isinstance(call_id, str) or not call_id.strip():
+                    raise ValueError(f"Codex Responses input[{idx}] function_call_output is missing call_id.")
+                output = item.get("output", "")
+                if output is None:
+                    output = ""
+                if not isinstance(output, str):
+                    output = str(output)
+
+                normalized.append(
+                    {
+                        "type": "function_call_output",
+                        "call_id": call_id.strip(),
+                        "output": output,
+                    }
+                )
+                continue
+
+            role = item.get("role")
+            if role in {"user", "assistant"}:
+                content = item.get("content", "")
+                if content is None:
+                    content = ""
+                if not isinstance(content, str):
+                    content = str(content)
+
+                normalized.append({"role": role, "content": content})
+                continue
+
+            raise ValueError(
+                f"Codex Responses input[{idx}] has unsupported item shape (type={item_type!r}, role={role!r})."
+            )
+
+        return normalized
+
+    def _preflight_codex_api_kwargs(
+        self,
+        api_kwargs: Any,
+        *,
+        allow_stream: bool = False,
+    ) -> Dict[str, Any]:
+        if not isinstance(api_kwargs, dict):
+            raise ValueError("Codex Responses request must be a dict.")
+
+        required = {"model", "instructions", "input"}
+        missing = [key for key in required if key not in api_kwargs]
+        if missing:
+            raise ValueError(f"Codex Responses request missing required field(s): {', '.join(sorted(missing))}.")
+
+        model = api_kwargs.get("model")
+        if not isinstance(model, str) or not model.strip():
+            raise ValueError("Codex Responses request 'model' must be a non-empty string.")
+        model = model.strip()
+
+        instructions = api_kwargs.get("instructions")
+        if instructions is None:
+            instructions = ""
+        if not isinstance(instructions, str):
+            instructions = str(instructions)
+        instructions = instructions.strip() or DEFAULT_AGENT_IDENTITY
+
+        normalized_input = self._preflight_codex_input_items(api_kwargs.get("input"))
+
+        tools = api_kwargs.get("tools")
+        normalized_tools = None
+        if tools is not None:
+            if not isinstance(tools, list):
+                raise ValueError("Codex Responses request 'tools' must be a list when provided.")
+            normalized_tools = []
+            for idx, tool in enumerate(tools):
+                if not isinstance(tool, dict):
+                    raise ValueError(f"Codex Responses tools[{idx}] must be an object.")
+                if tool.get("type") != "function":
+                    raise ValueError(f"Codex Responses tools[{idx}] has unsupported type {tool.get('type')!r}.")
+
+                name = tool.get("name")
+                parameters = tool.get("parameters")
+                if not isinstance(name, str) or not name.strip():
+                    raise ValueError(f"Codex Responses tools[{idx}] is missing a valid name.")
+                if not isinstance(parameters, dict):
+                    raise ValueError(f"Codex Responses tools[{idx}] is missing valid parameters.")
+
+                description = tool.get("description", "")
+                if description is None:
+                    description = ""
+                if not isinstance(description, str):
+                    description = str(description)
+
+                strict = tool.get("strict", False)
+                if not isinstance(strict, bool):
+                    strict = bool(strict)
+
+                normalized_tools.append(
+                    {
+                        "type": "function",
+                        "name": name.strip(),
+                        "description": description,
+                        "strict": strict,
+                        "parameters": parameters,
+                    }
+                )
+
+        store = api_kwargs.get("store", False)
+        if store is not False:
+            raise ValueError("Codex Responses contract requires 'store' to be false.")
+
+        allowed_keys = {
+            "model", "instructions", "input", "tools", "store",
+            "reasoning", "include", "max_output_tokens", "temperature",
+        }
+        normalized: Dict[str, Any] = {
+            "model": model,
+            "instructions": instructions,
+            "input": normalized_input,
+            "tools": normalized_tools,
+            "store": False,
+        }
+
+        # Pass through reasoning config
+        reasoning = api_kwargs.get("reasoning")
+        if isinstance(reasoning, dict):
+            normalized["reasoning"] = reasoning
+        include = api_kwargs.get("include")
+        if isinstance(include, list):
+            normalized["include"] = include
+
+        # Pass through max_output_tokens and temperature
+        max_output_tokens = api_kwargs.get("max_output_tokens")
+        if isinstance(max_output_tokens, (int, float)) and max_output_tokens > 0:
+            normalized["max_output_tokens"] = int(max_output_tokens)
+        temperature = api_kwargs.get("temperature")
+        if isinstance(temperature, (int, float)):
+            normalized["temperature"] = float(temperature)
+
+        if allow_stream:
+            stream = api_kwargs.get("stream")
+            if stream is not None and stream is not True:
+                raise ValueError("Codex Responses 'stream' must be true when set.")
+            if stream is True:
+                normalized["stream"] = True
+            allowed_keys.add("stream")
+        elif "stream" in api_kwargs:
+            raise ValueError("Codex Responses stream flag is only allowed in fallback streaming requests.")
+
+        unexpected = sorted(key for key in api_kwargs.keys() if key not in allowed_keys)
+        if unexpected:
+            raise ValueError(
+                f"Codex Responses request has unsupported field(s): {', '.join(unexpected)}."
+            )
+
+        return normalized
+
+    def _extract_responses_message_text(self, item: Any) -> str:
+        """Extract assistant text from a Responses message output item."""
+        content = getattr(item, "content", None)
+        if not isinstance(content, list):
+            return ""
+
+        chunks: List[str] = []
+        for part in content:
+            ptype = getattr(part, "type", None)
+            if ptype not in {"output_text", "text"}:
+                continue
+            text = getattr(part, "text", None)
+            if isinstance(text, str) and text:
+                chunks.append(text)
+        return "".join(chunks).strip()
+
+    def _extract_responses_reasoning_text(self, item: Any) -> str:
+        """Extract a compact reasoning text from a Responses reasoning item."""
+        summary = getattr(item, "summary", None)
+        if isinstance(summary, list):
+            chunks: List[str] = []
+            for part in summary:
+                text = getattr(part, "text", None)
+                if isinstance(text, str) and text:
+                    chunks.append(text)
+            if chunks:
+                return "\n".join(chunks).strip()
+        text = getattr(item, "text", None)
+        if isinstance(text, str) and text:
+            return text.strip()
+        return ""
+
+    def _normalize_codex_response(self, response: Any) -> tuple[Any, str]:
+        """Normalize a Responses API object to an assistant_message-like object."""
+        output = getattr(response, "output", None)
+        if not isinstance(output, list) or not output:
+            raise RuntimeError("Responses API returned no output items")
+
+        response_status = getattr(response, "status", None)
+        if isinstance(response_status, str):
+            response_status = response_status.strip().lower()
+        else:
+            response_status = None
+
+        if response_status in {"failed", "cancelled"}:
+            error_obj = getattr(response, "error", None)
+            if isinstance(error_obj, dict):
+                error_msg = error_obj.get("message") or str(error_obj)
+            else:
+                error_msg = str(error_obj) if error_obj else f"Responses API returned status '{response_status}'"
+            raise RuntimeError(error_msg)
+
+        content_parts: List[str] = []
+        reasoning_parts: List[str] = []
+        reasoning_items_raw: List[Dict[str, Any]] = []
+        tool_calls: List[Any] = []
+        has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
+        saw_commentary_phase = False
+        saw_final_answer_phase = False
+
+        for item in output:
+            item_type = getattr(item, "type", None)
+            item_status = getattr(item, "status", None)
+            if isinstance(item_status, str):
+                item_status = item_status.strip().lower()
+            else:
+                item_status = None
+
+            if item_status in {"queued", "in_progress", "incomplete"}:
+                has_incomplete_items = True
+
+            if item_type == "message":
+                item_phase = getattr(item, "phase", None)
+                if isinstance(item_phase, str):
+                    normalized_phase = item_phase.strip().lower()
+                    if normalized_phase in {"commentary", "analysis"}:
+                        saw_commentary_phase = True
+                    elif normalized_phase in {"final_answer", "final"}:
+                        saw_final_answer_phase = True
+                message_text = self._extract_responses_message_text(item)
+                if message_text:
+                    content_parts.append(message_text)
+            elif item_type == "reasoning":
+                reasoning_text = self._extract_responses_reasoning_text(item)
+                if reasoning_text:
+                    reasoning_parts.append(reasoning_text)
+                # Capture the full reasoning item for multi-turn continuity.
+                # encrypted_content is an opaque blob the API needs back on
+                # subsequent turns to maintain coherent reasoning chains.
+                encrypted = getattr(item, "encrypted_content", None)
+                if isinstance(encrypted, str) and encrypted:
+                    raw_item = {"type": "reasoning", "encrypted_content": encrypted}
+                    item_id = getattr(item, "id", None)
+                    if isinstance(item_id, str) and item_id:
+                        raw_item["id"] = item_id
+                    reasoning_items_raw.append(raw_item)
+            elif item_type == "function_call":
+                if item_status in {"queued", "in_progress", "incomplete"}:
+                    continue
+                fn_name = getattr(item, "name", "") or ""
+                arguments = getattr(item, "arguments", "{}")
+                if not isinstance(arguments, str):
+                    arguments = str(arguments)
+                raw_call_id = getattr(item, "call_id", None)
+                raw_item_id = getattr(item, "id", None)
+                embedded_call_id, _ = self._split_responses_tool_id(raw_item_id)
+                call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
+                if not isinstance(call_id, str) or not call_id.strip():
+                    call_id = f"call_{uuid.uuid4().hex[:12]}"
+                call_id = call_id.strip()
+                response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
+                response_item_id = self._derive_responses_function_call_id(call_id, response_item_id)
+                tool_calls.append(SimpleNamespace(
+                    id=call_id,
+                    call_id=call_id,
+                    response_item_id=response_item_id,
+                    type="function",
+                    function=SimpleNamespace(name=fn_name, arguments=arguments),
+                ))
+            elif item_type == "custom_tool_call":
+                fn_name = getattr(item, "name", "") or ""
+                arguments = getattr(item, "input", "{}")
+                if not isinstance(arguments, str):
+                    arguments = str(arguments)
+                raw_call_id = getattr(item, "call_id", None)
+                raw_item_id = getattr(item, "id", None)
+                embedded_call_id, _ = self._split_responses_tool_id(raw_item_id)
+                call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
+                if not isinstance(call_id, str) or not call_id.strip():
+                    call_id = f"call_{uuid.uuid4().hex[:12]}"
+                call_id = call_id.strip()
+                response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
+                response_item_id = self._derive_responses_function_call_id(call_id, response_item_id)
+                tool_calls.append(SimpleNamespace(
+                    id=call_id,
+                    call_id=call_id,
+                    response_item_id=response_item_id,
+                    type="function",
+                    function=SimpleNamespace(name=fn_name, arguments=arguments),
+                ))
+
+        final_text = "\n".join([p for p in content_parts if p]).strip()
+        if not final_text and hasattr(response, "output_text"):
+            out_text = getattr(response, "output_text", "")
+            if isinstance(out_text, str):
+                final_text = out_text.strip()
+
+        assistant_message = SimpleNamespace(
+            content=final_text,
+            tool_calls=tool_calls,
+            reasoning="\n\n".join(reasoning_parts).strip() if reasoning_parts else None,
+            reasoning_content=None,
+            reasoning_details=None,
+            codex_reasoning_items=reasoning_items_raw or None,
+        )
+
+        if tool_calls:
+            finish_reason = "tool_calls"
+        elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
+            finish_reason = "incomplete"
+        else:
+            finish_reason = "stop"
+        return assistant_message, finish_reason
+
+    def _run_codex_stream(self, api_kwargs: dict):
+        """Execute one streaming Responses API request and return the final response."""
+        max_stream_retries = 1
+        for attempt in range(max_stream_retries + 1):
+            try:
+                with self.client.responses.stream(**api_kwargs) as stream:
+                    for _ in stream:
+                        pass
+                    return stream.get_final_response()
+            except RuntimeError as exc:
+                err_text = str(exc)
+                missing_completed = "response.completed" in err_text
+                if missing_completed and attempt < max_stream_retries:
+                    logger.debug(
+                        "Responses stream closed before completion (attempt %s/%s); retrying.",
+                        attempt + 1,
+                        max_stream_retries + 1,
+                    )
+                    continue
+                if missing_completed:
+                    logger.debug(
+                        "Responses stream did not emit response.completed; falling back to create(stream=True)."
+                    )
+                    return self._run_codex_create_stream_fallback(api_kwargs)
+                raise
+
+    def _run_codex_create_stream_fallback(self, api_kwargs: dict):
+        """Fallback path for stream completion edge cases on Codex-style Responses backends."""
+        fallback_kwargs = dict(api_kwargs)
+        fallback_kwargs["stream"] = True
+        fallback_kwargs = self._preflight_codex_api_kwargs(fallback_kwargs, allow_stream=True)
+        stream_or_response = self.client.responses.create(**fallback_kwargs)
+
+        # Compatibility shim for mocks or providers that still return a concrete response.
+        if hasattr(stream_or_response, "output"):
+            return stream_or_response
+        if not hasattr(stream_or_response, "__iter__"):
+            return stream_or_response
+
+        terminal_response = None
+        try:
+            for event in stream_or_response:
+                event_type = getattr(event, "type", None)
+                if not event_type and isinstance(event, dict):
+                    event_type = event.get("type")
+                if event_type not in {"response.completed", "response.incomplete", "response.failed"}:
+                    continue
+
+                terminal_response = getattr(event, "response", None)
+                if terminal_response is None and isinstance(event, dict):
+                    terminal_response = event.get("response")
+                if terminal_response is not None:
+                    return terminal_response
+        finally:
+            close_fn = getattr(stream_or_response, "close", None)
+            if callable(close_fn):
+                try:
+                    close_fn()
+                except Exception:
+                    pass
+
+        if terminal_response is not None:
+            return terminal_response
+        raise RuntimeError("Responses create(stream=True) fallback did not emit a terminal response.")
+
+    def _try_refresh_codex_client_credentials(self, *, force: bool = True) -> bool:
+        if self.api_mode != "codex_responses" or self.provider != "openai-codex":
+            return False
+
+        try:
+            from hermes_cli.auth import resolve_codex_runtime_credentials
+
+            creds = resolve_codex_runtime_credentials(force_refresh=force)
+        except Exception as exc:
+            logger.debug("Codex credential refresh failed: %s", exc)
+            return False
+
+        api_key = creds.get("api_key")
+        base_url = creds.get("base_url")
+        if not isinstance(api_key, str) or not api_key.strip():
+            return False
+        if not isinstance(base_url, str) or not base_url.strip():
+            return False
+
+        self.api_key = api_key.strip()
+        self.base_url = base_url.strip().rstrip("/")
+        self._client_kwargs["api_key"] = self.api_key
+        self._client_kwargs["base_url"] = self.base_url
+
+        try:
+            self.client.close()
+        except Exception:
+            pass
+
+        try:
+            self.client = OpenAI(**self._client_kwargs)
+        except Exception as exc:
+            logger.warning("Failed to rebuild OpenAI client after Codex refresh: %s", exc)
+            return False
+
+        return True
+
     def _interruptible_api_call(self, api_kwargs: dict):
         """
         Run the API call in a background thread so the main conversation loop
@@ -1270,7 +1998,10 @@ class AIAgent:
 
         def _call():
             try:
-                result["response"] = self.client.chat.completions.create(**api_kwargs)
+                if self.api_mode == "codex_responses":
+                    result["response"] = self._run_codex_stream(api_kwargs)
+                else:
+                    result["response"] = self.client.chat.completions.create(**api_kwargs)
             except Exception as e:
                 result["error"] = e
 
@@ -1295,7 +2026,39 @@ class AIAgent:
         return result["response"]
 
     def _build_api_kwargs(self, api_messages: list) -> dict:
-        """Build the keyword arguments dict for the chat completions API call."""
+        """Build the keyword arguments dict for the active API mode."""
+        if self.api_mode == "codex_responses":
+            instructions = ""
+            payload_messages = api_messages
+            if api_messages and api_messages[0].get("role") == "system":
+                instructions = str(api_messages[0].get("content") or "").strip()
+                payload_messages = api_messages[1:]
+            if not instructions:
+                instructions = DEFAULT_AGENT_IDENTITY
+
+            kwargs = {
+                "model": self.model,
+                "instructions": instructions,
+                "input": self._chat_messages_to_responses_input(payload_messages),
+                "tools": self._responses_tools(),
+                "store": False,
+                "reasoning": {"effort": "medium", "summary": "auto"},
+                "include": ["reasoning.encrypted_content"],
+            }
+
+            # Apply reasoning effort from config if set
+            if self.reasoning_config and isinstance(self.reasoning_config, dict):
+                if self.reasoning_config.get("enabled") is False:
+                    kwargs.pop("reasoning", None)
+                    kwargs["include"] = []
+                elif self.reasoning_config.get("effort"):
+                    kwargs["reasoning"]["effort"] = self.reasoning_config["effort"]
+
+            if self.max_tokens is not None:
+                kwargs["max_output_tokens"] = self.max_tokens
+
+            return kwargs
+
         provider_preferences = {}
         if self.providers_allowed:
             provider_preferences["only"] = self.providers_allowed
@@ -1362,34 +2125,73 @@ class AIAgent:
         }
 
         if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details:
-            msg["reasoning_details"] = [
-                {"type": d.get("type"), "text": d.get("text"), "signature": d.get("signature")}
-                for d in assistant_message.reasoning_details
-                if isinstance(d, dict)
-            ]
+            # Pass reasoning_details back unmodified so providers (OpenRouter,
+            # Anthropic, OpenAI) can maintain reasoning continuity across turns.
+            # Each provider may include opaque fields (signature, encrypted_content)
+            # that must be preserved exactly.
+            raw_details = assistant_message.reasoning_details
+            preserved = []
+            for d in raw_details:
+                if isinstance(d, dict):
+                    preserved.append(d)
+                elif hasattr(d, "__dict__"):
+                    preserved.append(d.__dict__)
+                elif hasattr(d, "model_dump"):
+                    preserved.append(d.model_dump())
+            if preserved:
+                msg["reasoning_details"] = preserved
+
+        # Codex Responses API: preserve encrypted reasoning items for
+        # multi-turn continuity. These get replayed as input on the next turn.
+        codex_items = getattr(assistant_message, "codex_reasoning_items", None)
+        if codex_items:
+            msg["codex_reasoning_items"] = codex_items
 
         if assistant_message.tool_calls:
-            tc_list = []
+            tool_calls = []
             for tool_call in assistant_message.tool_calls:
+                raw_id = getattr(tool_call, "id", None)
+                call_id = getattr(tool_call, "call_id", None)
+                if not isinstance(call_id, str) or not call_id.strip():
+                    embedded_call_id, _ = self._split_responses_tool_id(raw_id)
+                    call_id = embedded_call_id
+                if not isinstance(call_id, str) or not call_id.strip():
+                    if isinstance(raw_id, str) and raw_id.strip():
+                        call_id = raw_id.strip()
+                    else:
+                        call_id = f"call_{uuid.uuid4().hex[:12]}"
+                call_id = call_id.strip()
+
+                response_item_id = getattr(tool_call, "response_item_id", None)
+                if not isinstance(response_item_id, str) or not response_item_id.strip():
+                    _, embedded_response_item_id = self._split_responses_tool_id(raw_id)
+                    response_item_id = embedded_response_item_id
+
+                response_item_id = self._derive_responses_function_call_id(
+                    call_id,
+                    response_item_id if isinstance(response_item_id, str) else None,
+                )
+
                 tc_dict = {
-                    "id": tool_call.id,
+                    "id": call_id,
+                    "call_id": call_id,
+                    "response_item_id": response_item_id,
                     "type": tool_call.type,
                     "function": {
                         "name": tool_call.function.name,
                         "arguments": tool_call.function.arguments
-                    }
+                    },
                 }
                 # Preserve extra_content (e.g. Gemini thought_signature) so it
                 # is sent back on subsequent API calls.  Without this, Gemini 3
                 # thinking models reject the request with a 400 error.
                 extra = getattr(tool_call, "extra_content", None)
                 if extra is not None:
-                    # Convert Pydantic models to plain dicts for JSON safety
                     if hasattr(extra, "model_dump"):
                         extra = extra.model_dump()
                     tc_dict["extra_content"] = extra
-                tc_list.append(tc_dict)
-            msg["tool_calls"] = tc_list
+                tool_calls.append(tc_dict)
+            msg["tool_calls"] = tool_calls
 
         return msg
 
@@ -1454,40 +2256,68 @@ class AIAgent:
                 messages.pop()  # remove flush msg
                 return
 
-            api_kwargs = {
-                "model": self.model,
-                "messages": api_messages,
-                "tools": [memory_tool_def],
-                "temperature": 0.3,
-                **self._max_tokens_param(1024),
-            }
+            # Use auxiliary client for the flush call when available --
+            # it's cheaper and avoids Codex Responses API incompatibility.
+            from agent.auxiliary_client import get_text_auxiliary_client
+            aux_client, aux_model = get_text_auxiliary_client()
 
-            response = self.client.chat.completions.create(**api_kwargs, timeout=30.0)
+            if aux_client:
+                api_kwargs = {
+                    "model": aux_model,
+                    "messages": api_messages,
+                    "tools": [memory_tool_def],
+                    "temperature": 0.3,
+                    "max_tokens": 5120,
+                }
+                response = aux_client.chat.completions.create(**api_kwargs, timeout=30.0)
+            elif self.api_mode == "codex_responses":
+                # No auxiliary client -- use the Codex Responses path directly
+                codex_kwargs = self._build_api_kwargs(api_messages)
+                codex_kwargs["tools"] = self._responses_tools([memory_tool_def])
+                codex_kwargs["temperature"] = 0.3
+                if "max_output_tokens" in codex_kwargs:
+                    codex_kwargs["max_output_tokens"] = 5120
+                response = self._run_codex_stream(codex_kwargs)
+            else:
+                api_kwargs = {
+                    "model": self.model,
+                    "messages": api_messages,
+                    "tools": [memory_tool_def],
+                    "temperature": 0.3,
+                    **self._max_tokens_param(5120),
+                }
+                response = self.client.chat.completions.create(**api_kwargs, timeout=30.0)
 
-            if response.choices:
+            # Extract tool calls from the response, handling both API formats
+            tool_calls = []
+            if self.api_mode == "codex_responses" and not aux_client:
+                assistant_msg, _ = self._normalize_codex_response(response)
+                if assistant_msg and assistant_msg.tool_calls:
+                    tool_calls = assistant_msg.tool_calls
+            elif hasattr(response, "choices") and response.choices:
                 assistant_message = response.choices[0].message
                 if assistant_message.tool_calls:
-                    # Execute only memory tool calls
-                    for tc in assistant_message.tool_calls:
-                        if tc.function.name == "memory":
-                            try:
-                                args = json.loads(tc.function.arguments)
-                                flush_target = args.get("target", "memory")
-                                from tools.memory_tool import memory_tool as _memory_tool
-                                result = _memory_tool(
-                                    action=args.get("action"),
-                                    target=flush_target,
-                                    content=args.get("content"),
-                                    old_text=args.get("old_text"),
-                                    store=self._memory_store,
-                                )
-                                # Also send user observations to Honcho when active
-                                if self._honcho and flush_target == "user" and args.get("action") == "add":
-                                    self._honcho_save_user_observation(args.get("content", ""))
-                                if not self.quiet_mode:
-                                    print(f"  🧠 Memory flush: saved to {args.get('target', 'memory')}")
-                            except Exception as e:
-                                logger.debug("Memory flush tool call failed: %s", e)
+                    tool_calls = assistant_message.tool_calls
+
+            for tc in tool_calls:
+                if tc.function.name == "memory":
+                    try:
+                        args = json.loads(tc.function.arguments)
+                        flush_target = args.get("target", "memory")
+                        from tools.memory_tool import memory_tool as _memory_tool
+                        result = _memory_tool(
+                            action=args.get("action"),
+                            target=flush_target,
+                            content=args.get("content"),
+                            old_text=args.get("old_text"),
+                            store=self._memory_store,
+                        )
+                        if self._honcho and flush_target == "user" and args.get("action") == "add":
+                            self._honcho_save_user_observation(args.get("content", ""))
+                        if not self.quiet_mode:
+                            print(f"  🧠 Memory flush: saved to {args.get('target', 'memory')}")
+                    except Exception as e:
+                        logger.debug("Memory flush tool call failed: %s", e)
         except Exception as e:
             logger.debug("Memory flush API call failed: %s", e)
         finally:
@@ -1698,7 +2528,7 @@ class AIAgent:
                     _spinner_result = function_result
                 except Exception as tool_error:
                     function_result = f"Error executing tool '{function_name}': {tool_error}"
-                    logger.error("handle_function_call raised for %s: %s", function_name, tool_error)
+                    logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
                 finally:
                     tool_duration = time.time() - tool_start_time
                     cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_spinner_result)
@@ -1708,11 +2538,17 @@ class AIAgent:
                     function_result = handle_function_call(function_name, function_args, effective_task_id)
                 except Exception as tool_error:
                     function_result = f"Error executing tool '{function_name}': {tool_error}"
-                    logger.error("handle_function_call raised for %s: %s", function_name, tool_error)
+                    logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
                 tool_duration = time.time() - tool_start_time
 
             result_preview = function_result[:200] if len(function_result) > 200 else function_result
 
+            # Log tool errors to the persistent error log so [error] tags
+            # in the UI always have a corresponding detailed entry on disk.
+            _is_error_result, _ = _detect_tool_failure(function_name, function_result)
+            if _is_error_result:
+                logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
+
             if self.verbose_logging:
                 logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s")
                 logging.debug(f"Tool result preview: {result_preview}...")
@@ -1795,24 +2631,67 @@ class AIAgent:
             if _is_nous:
                 summary_extra_body["tags"] = ["product=hermes-agent"]
 
-            summary_kwargs = {
-                "model": self.model,
-                "messages": api_messages,
-            }
-            if self.max_tokens is not None:
-                summary_kwargs.update(self._max_tokens_param(self.max_tokens))
-            if summary_extra_body:
-                summary_kwargs["extra_body"] = summary_extra_body
+            if self.api_mode == "codex_responses":
+                codex_kwargs = self._build_api_kwargs(api_messages)
+                codex_kwargs["tools"] = None
+                summary_response = self._run_codex_stream(codex_kwargs)
+                assistant_message, _ = self._normalize_codex_response(summary_response)
+                final_response = (assistant_message.content or "").strip() if assistant_message else ""
+            else:
+                summary_kwargs = {
+                    "model": self.model,
+                    "messages": api_messages,
+                }
+                if self.max_tokens is not None:
+                    summary_kwargs.update(self._max_tokens_param(self.max_tokens))
+                if summary_extra_body:
+                    summary_kwargs["extra_body"] = summary_extra_body
 
-            summary_response = self.client.chat.completions.create(**summary_kwargs)
+                summary_response = self.client.chat.completions.create(**summary_kwargs)
 
-            if summary_response.choices and summary_response.choices[0].message.content:
-                final_response = summary_response.choices[0].message.content
+                if summary_response.choices and summary_response.choices[0].message.content:
+                    final_response = summary_response.choices[0].message.content
+                else:
+                    final_response = ""
+
+            if final_response:
                 if "<think>" in final_response:
                     final_response = re.sub(r'<think>.*?</think>\s*', '', final_response, flags=re.DOTALL).strip()
-                messages.append({"role": "assistant", "content": final_response})
+                if final_response:
+                    messages.append({"role": "assistant", "content": final_response})
+                else:
+                    final_response = "I reached the iteration limit and couldn't generate a summary."
             else:
-                final_response = "I reached the iteration limit and couldn't generate a summary."
+                # Retry summary generation
+                if self.api_mode == "codex_responses":
+                    codex_kwargs = self._build_api_kwargs(api_messages)
+                    codex_kwargs["tools"] = None
+                    retry_response = self._run_codex_stream(codex_kwargs)
+                    retry_msg, _ = self._normalize_codex_response(retry_response)
+                    final_response = (retry_msg.content or "").strip() if retry_msg else ""
+                else:
+                    summary_kwargs = {
+                        "model": self.model,
+                        "messages": api_messages,
+                    }
+                    if self.max_tokens is not None:
+                        summary_kwargs["max_tokens"] = self.max_tokens
+                    if summary_extra_body:
+                        summary_kwargs["extra_body"] = summary_extra_body
+
+                    summary_response = self.client.chat.completions.create(**summary_kwargs)
+
+                    if summary_response.choices and summary_response.choices[0].message.content:
+                        final_response = summary_response.choices[0].message.content
+                    else:
+                        final_response = ""
+
+                if final_response:
+                    if "<think>" in final_response:
+                        final_response = re.sub(r'<think>.*?</think>\s*', '', final_response, flags=re.DOTALL).strip()
+                    messages.append({"role": "assistant", "content": final_response})
+                else:
+                    final_response = "I reached the iteration limit and couldn't generate a summary."
 
         except Exception as e:
             logging.warning(f"Failed to get summary response: {e}")
@@ -1930,6 +2809,7 @@ class AIAgent:
         api_call_count = 0
         final_response = None
         interrupted = False
+        codex_ack_continuations = 0
         
         # Clear any stale interrupt state at start
         self.clear_interrupt()
@@ -1944,6 +2824,22 @@ class AIAgent:
             
             api_call_count += 1
 
+            # Fire step_callback for gateway hooks (agent:step event)
+            if self.step_callback is not None:
+                try:
+                    prev_tools = []
+                    for _m in reversed(messages):
+                        if _m.get("role") == "assistant" and _m.get("tool_calls"):
+                            prev_tools = [
+                                tc["function"]["name"]
+                                for tc in _m["tool_calls"]
+                                if isinstance(tc, dict)
+                            ]
+                            break
+                    self.step_callback(api_call_count, prev_tools)
+                except Exception as _step_err:
+                    logger.debug("step_callback error (iteration %s): %s", api_call_count, _step_err)
+
             # Track tool-calling iterations for skill nudge.
             # Counter resets whenever skill_manage is actually used.
             if (self._skill_nudge_interval > 0
@@ -2028,10 +2924,15 @@ class AIAgent:
             api_start_time = time.time()
             retry_count = 0
             max_retries = 6  # Increased to allow longer backoff periods
+            codex_auth_retry_attempted = False
+
+            finish_reason = "stop"
 
             while retry_count < max_retries:
                 try:
                     api_kwargs = self._build_api_kwargs(api_messages)
+                    if self.api_mode == "codex_responses":
+                        api_kwargs = self._preflight_codex_api_kwargs(api_kwargs, allow_stream=False)
 
                     if os.getenv("HERMES_DUMP_REQUESTS", "").strip().lower() in {"1", "true", "yes", "on"}:
                         self._dump_api_request_debug(api_kwargs, reason="preflight")
@@ -2054,8 +2955,33 @@ class AIAgent:
                         resp_model = getattr(response, 'model', 'N/A') if response else 'N/A'
                         logging.debug(f"API Response received - Model: {resp_model}, Usage: {response.usage if hasattr(response, 'usage') else 'N/A'}")
                     
-                    # Validate response has valid choices before proceeding
-                    if response is None or not hasattr(response, 'choices') or response.choices is None or len(response.choices) == 0:
+                    # Validate response shape before proceeding
+                    response_invalid = False
+                    error_details = []
+                    if self.api_mode == "codex_responses":
+                        output_items = getattr(response, "output", None) if response is not None else None
+                        if response is None:
+                            response_invalid = True
+                            error_details.append("response is None")
+                        elif not isinstance(output_items, list):
+                            response_invalid = True
+                            error_details.append("response.output is not a list")
+                        elif len(output_items) == 0:
+                            response_invalid = True
+                            error_details.append("response.output is empty")
+                    else:
+                        if response is None or not hasattr(response, 'choices') or response.choices is None or len(response.choices) == 0:
+                            response_invalid = True
+                            if response is None:
+                                error_details.append("response is None")
+                            elif not hasattr(response, 'choices'):
+                                error_details.append("response has no 'choices' attribute")
+                            elif response.choices is None:
+                                error_details.append("response.choices is None")
+                            else:
+                                error_details.append("response.choices is empty")
+
+                    if response_invalid:
                         # Stop spinner before printing error messages
                         if thinking_spinner:
                             thinking_spinner.stop(f"(´;ω;`) oops, retrying...")
@@ -2063,15 +2989,6 @@ class AIAgent:
                         
                         # This is often rate limiting or provider returning malformed response
                         retry_count += 1
-                        error_details = []
-                        if response is None:
-                            error_details.append("response is None")
-                        elif not hasattr(response, 'choices'):
-                            error_details.append("response has no 'choices' attribute")
-                        elif response.choices is None:
-                            error_details.append("response.choices is None")
-                        else:
-                            error_details.append("response.choices is empty")
                         
                         # Check for error field in response (some providers include this)
                         error_msg = "Unknown"
@@ -2108,7 +3025,7 @@ class AIAgent:
                                 "messages": messages,
                                 "completed": False,
                                 "api_calls": api_call_count,
-                                "error": f"Invalid API response (choices is None/empty). Likely rate limited by provider.",
+                                "error": "Invalid API response shape. Likely rate limited or malformed provider response.",
                                 "failed": True  # Mark as failure for filtering
                             }
                         
@@ -2135,7 +3052,20 @@ class AIAgent:
                         continue  # Retry the API call
 
                     # Check finish_reason before proceeding
-                    finish_reason = response.choices[0].finish_reason
+                    if self.api_mode == "codex_responses":
+                        status = getattr(response, "status", None)
+                        incomplete_details = getattr(response, "incomplete_details", None)
+                        incomplete_reason = None
+                        if isinstance(incomplete_details, dict):
+                            incomplete_reason = incomplete_details.get("reason")
+                        else:
+                            incomplete_reason = getattr(incomplete_details, "reason", None)
+                        if status == "incomplete" and incomplete_reason in {"max_output_tokens", "length"}:
+                            finish_reason = "length"
+                        else:
+                            finish_reason = "stop"
+                    else:
+                        finish_reason = response.choices[0].finish_reason
                     
                     # Handle "length" finish_reason - response was truncated
                     if finish_reason == "length":
@@ -2172,12 +3102,28 @@ class AIAgent:
                     
                     # Track actual token usage from response for context management
                     if hasattr(response, 'usage') and response.usage:
+                        if self.api_mode == "codex_responses":
+                            prompt_tokens = getattr(response.usage, 'input_tokens', 0) or 0
+                            completion_tokens = getattr(response.usage, 'output_tokens', 0) or 0
+                            total_tokens = (
+                                getattr(response.usage, 'total_tokens', None)
+                                or (prompt_tokens + completion_tokens)
+                            )
+                        else:
+                            prompt_tokens = getattr(response.usage, 'prompt_tokens', 0) or 0
+                            completion_tokens = getattr(response.usage, 'completion_tokens', 0) or 0
+                            total_tokens = getattr(response.usage, 'total_tokens', 0) or 0
                         usage_dict = {
-                            "prompt_tokens": getattr(response.usage, 'prompt_tokens', 0),
-                            "completion_tokens": getattr(response.usage, 'completion_tokens', 0),
-                            "total_tokens": getattr(response.usage, 'total_tokens', 0),
+                            "prompt_tokens": prompt_tokens,
+                            "completion_tokens": completion_tokens,
+                            "total_tokens": total_tokens,
                         }
                         self.context_compressor.update_from_response(usage_dict)
+
+                        self.session_prompt_tokens += prompt_tokens
+                        self.session_completion_tokens += completion_tokens
+                        self.session_total_tokens += total_tokens
+                        self.session_api_calls += 1
                         
                         if self.verbose_logging:
                             logging.debug(f"Token usage: prompt={usage_dict['prompt_tokens']:,}, completion={usage_dict['completion_tokens']:,}, total={usage_dict['total_tokens']:,}")
@@ -2209,6 +3155,18 @@ class AIAgent:
                     if thinking_spinner:
                         thinking_spinner.stop(f"(╥_╥) error, retrying...")
                         thinking_spinner = None
+
+                    status_code = getattr(api_error, "status_code", None)
+                    if (
+                        self.api_mode == "codex_responses"
+                        and self.provider == "openai-codex"
+                        and status_code == 401
+                        and not codex_auth_retry_attempted
+                    ):
+                        codex_auth_retry_attempted = True
+                        if self._try_refresh_codex_client_credentials(force=True):
+                            print(f"{self.log_prefix}🔐 Codex auth refreshed after 401. Retrying request...")
+                            continue
                     
                     retry_count += 1
                     elapsed_time = time.time() - api_start_time
@@ -2365,11 +3323,32 @@ class AIAgent:
                 break
 
             try:
-                assistant_message = response.choices[0].message
+                if self.api_mode == "codex_responses":
+                    assistant_message, finish_reason = self._normalize_codex_response(response)
+                else:
+                    assistant_message = response.choices[0].message
                 
                 # Handle assistant response
                 if assistant_message.content and not self.quiet_mode:
                     print(f"{self.log_prefix}🤖 Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}")
+
+                # Notify progress callback of model's thinking (used by subagent
+                # delegation to relay the child's reasoning to the parent display).
+                # Guard: only fire for subagents (_delegate_depth >= 1) to avoid
+                # spamming gateway platforms with the main agent's every thought.
+                if (assistant_message.content and self.tool_progress_callback
+                        and getattr(self, '_delegate_depth', 0) > 0):
+                    _think_text = assistant_message.content.strip()
+                    # Strip reasoning XML tags that shouldn't leak to parent display
+                    _think_text = re.sub(
+                        r'</?(?:REASONING_SCRATCHPAD|think|reasoning)>', '', _think_text
+                    ).strip()
+                    first_line = _think_text.split('\n')[0][:80] if _think_text else ""
+                    if first_line:
+                        try:
+                            self.tool_progress_callback("_thinking", first_line)
+                        except Exception:
+                            pass
                 
                 # Check for incomplete <REASONING_SCRATCHPAD> (opened but never closed)
                 # This means the model ran out of output tokens mid-reasoning — retry up to 2 times
@@ -2405,6 +3384,48 @@ class AIAgent:
                 # Reset incomplete scratchpad counter on clean response
                 if hasattr(self, '_incomplete_scratchpad_retries'):
                     self._incomplete_scratchpad_retries = 0
+
+                if self.api_mode == "codex_responses" and finish_reason == "incomplete":
+                    if not hasattr(self, "_codex_incomplete_retries"):
+                        self._codex_incomplete_retries = 0
+                    self._codex_incomplete_retries += 1
+
+                    interim_msg = self._build_assistant_message(assistant_message, finish_reason)
+                    interim_has_content = bool(interim_msg.get("content", "").strip())
+                    interim_has_reasoning = bool(interim_msg.get("reasoning", "").strip()) if isinstance(interim_msg.get("reasoning"), str) else False
+
+                    if interim_has_content or interim_has_reasoning:
+                        last_msg = messages[-1] if messages else None
+                        duplicate_interim = (
+                            isinstance(last_msg, dict)
+                            and last_msg.get("role") == "assistant"
+                            and last_msg.get("finish_reason") == "incomplete"
+                            and (last_msg.get("content") or "") == (interim_msg.get("content") or "")
+                            and (last_msg.get("reasoning") or "") == (interim_msg.get("reasoning") or "")
+                        )
+                        if not duplicate_interim:
+                            messages.append(interim_msg)
+                            self._log_msg_to_db(interim_msg)
+
+                    if self._codex_incomplete_retries < 3:
+                        if not self.quiet_mode:
+                            print(f"{self.log_prefix}↻ Codex response incomplete; continuing turn ({self._codex_incomplete_retries}/3)")
+                        self._session_messages = messages
+                        self._save_session_log(messages)
+                        continue
+
+                    self._codex_incomplete_retries = 0
+                    self._persist_session(messages, conversation_history)
+                    return {
+                        "final_response": None,
+                        "messages": messages,
+                        "api_calls": api_call_count,
+                        "completed": False,
+                        "partial": True,
+                        "error": "Codex response remained incomplete after 3 continuation attempts",
+                    }
+                elif hasattr(self, "_codex_incomplete_retries"):
+                    self._codex_incomplete_retries = 0
                 
                 # Check for tool calls
                 if assistant_message.tool_calls:
@@ -2577,7 +3598,8 @@ class AIAgent:
                                             tool_names.append(fn.get("name", "unknown"))
                                         msg["content"] = f"Calling the {', '.join(tool_names)} tool{'s' if len(tool_names) > 1 else ''}..."
                                         break
-                                final_response = fallback
+                                # Strip <think> blocks from fallback content for user display
+                                final_response = self._strip_think_blocks(fallback).strip()
                                 break
                             
                             # No fallback -- append the empty message as-is
@@ -2605,6 +3627,39 @@ class AIAgent:
                     # Reset retry counter on successful content
                     if hasattr(self, '_empty_content_retries'):
                         self._empty_content_retries = 0
+
+                    if (
+                        self.api_mode == "codex_responses"
+                        and self.valid_tool_names
+                        and codex_ack_continuations < 2
+                        and self._looks_like_codex_intermediate_ack(
+                            user_message=user_message,
+                            assistant_content=final_response,
+                            messages=messages,
+                        )
+                    ):
+                        codex_ack_continuations += 1
+                        interim_msg = self._build_assistant_message(assistant_message, "incomplete")
+                        messages.append(interim_msg)
+                        self._log_msg_to_db(interim_msg)
+
+                        continue_msg = {
+                            "role": "user",
+                            "content": (
+                                "[System: Continue now. Execute the required tool calls and only "
+                                "send your final answer after completing the task.]"
+                            ),
+                        }
+                        messages.append(continue_msg)
+                        self._log_msg_to_db(continue_msg)
+                        self._session_messages = messages
+                        self._save_session_log(messages)
+                        continue
+
+                    codex_ack_continuations = 0
+                    
+                    # Strip <think> blocks from user-facing response (keep raw in messages for trajectory)
+                    final_response = self._strip_think_blocks(final_response).strip()
                     
                     final_msg = self._build_assistant_message(assistant_message, finish_reason)
                     
diff --git a/scripts/install.sh b/scripts/install.sh
index 4f8108bb8..81978e8f0 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -723,7 +723,7 @@ setup_path() {
         PATH_LINE='export PATH="$HOME/.local/bin:$PATH"'
 
         for SHELL_CONFIG in "${SHELL_CONFIGS[@]}"; do
-            if ! grep -q '\.local/bin' "$SHELL_CONFIG" 2>/dev/null; then
+            if ! grep -v '^[[:space:]]*#' "$SHELL_CONFIG" 2>/dev/null | grep -qE 'PATH=.*\.local/bin'; then
                 echo "" >> "$SHELL_CONFIG"
                 echo "# Hermes Agent — ensure ~/.local/bin is on PATH" >> "$SHELL_CONFIG"
                 echo "$PATH_LINE" >> "$SHELL_CONFIG"
diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py
new file mode 100644
index 000000000..efcbce29f
--- /dev/null
+++ b/tests/agent/test_auxiliary_client.py
@@ -0,0 +1,168 @@
+"""Tests for agent.auxiliary_client resolution chain, especially the Codex fallback."""
+
+import json
+import os
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+from agent.auxiliary_client import (
+    get_text_auxiliary_client,
+    get_vision_auxiliary_client,
+    auxiliary_max_tokens_param,
+    _read_codex_access_token,
+)
+
+
+@pytest.fixture(autouse=True)
+def _clean_env(monkeypatch):
+    """Strip provider env vars so each test starts clean."""
+    for key in (
+        "OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
+        "OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL",
+    ):
+        monkeypatch.delenv(key, raising=False)
+
+
+@pytest.fixture
+def codex_auth_dir(tmp_path, monkeypatch):
+    """Provide a writable ~/.codex/ directory with a valid auth.json."""
+    codex_dir = tmp_path / ".codex"
+    codex_dir.mkdir()
+    auth_file = codex_dir / "auth.json"
+    auth_file.write_text(json.dumps({
+        "tokens": {
+            "access_token": "codex-test-token-abc123",
+            "refresh_token": "codex-refresh-xyz",
+        }
+    }))
+    monkeypatch.setattr(
+        "agent.auxiliary_client._read_codex_access_token",
+        lambda: "codex-test-token-abc123",
+    )
+    return codex_dir
+
+
+class TestReadCodexAccessToken:
+    def test_valid_auth_file(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        auth = codex_dir / "auth.json"
+        auth.write_text(json.dumps({
+            "tokens": {"access_token": "tok-123", "refresh_token": "r-456"}
+        }))
+        with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
+            result = _read_codex_access_token()
+        assert result == "tok-123"
+
+    def test_missing_file_returns_none(self, tmp_path):
+        with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
+            result = _read_codex_access_token()
+        assert result is None
+
+    def test_empty_token_returns_none(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        auth = codex_dir / "auth.json"
+        auth.write_text(json.dumps({"tokens": {"access_token": "  "}}))
+        with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
+            result = _read_codex_access_token()
+        assert result is None
+
+    def test_malformed_json_returns_none(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        (codex_dir / "auth.json").write_text("{bad json")
+        with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
+            result = _read_codex_access_token()
+        assert result is None
+
+    def test_missing_tokens_key_returns_none(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        (codex_dir / "auth.json").write_text(json.dumps({"other": "data"}))
+        with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
+            result = _read_codex_access_token()
+        assert result is None
+
+
+class TestGetTextAuxiliaryClient:
+    """Test the full resolution chain for get_text_auxiliary_client."""
+
+    def test_openrouter_takes_priority(self, monkeypatch, codex_auth_dir):
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = get_text_auxiliary_client()
+        assert model == "google/gemini-3-flash-preview"
+        mock_openai.assert_called_once()
+        call_kwargs = mock_openai.call_args
+        assert call_kwargs.kwargs["api_key"] == "or-key"
+
+    def test_nous_takes_priority_over_codex(self, monkeypatch, codex_auth_dir):
+        with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \
+             patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            mock_nous.return_value = {"access_token": "nous-tok"}
+            client, model = get_text_auxiliary_client()
+        assert model == "gemini-3-flash"
+
+    def test_custom_endpoint_over_codex(self, monkeypatch, codex_auth_dir):
+        monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
+        monkeypatch.setenv("OPENAI_API_KEY", "lm-studio-key")
+        # Override the autouse monkeypatch for codex
+        monkeypatch.setattr(
+            "agent.auxiliary_client._read_codex_access_token",
+            lambda: "codex-test-token-abc123",
+        )
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = get_text_auxiliary_client()
+        assert model == "gpt-4o-mini"
+        call_kwargs = mock_openai.call_args
+        assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"
+
+    def test_codex_fallback_when_nothing_else(self, codex_auth_dir):
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = get_text_auxiliary_client()
+        assert model == "gpt-5.3-codex"
+        # Returns a CodexAuxiliaryClient wrapper, not a raw OpenAI client
+        from agent.auxiliary_client import CodexAuxiliaryClient
+        assert isinstance(client, CodexAuxiliaryClient)
+
+    def test_returns_none_when_nothing_available(self):
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
+            client, model = get_text_auxiliary_client()
+        assert client is None
+        assert model is None
+
+
+class TestCodexNotInVisionClient:
+    """Codex fallback should NOT apply to vision tasks."""
+
+    def test_vision_returns_none_without_openrouter_nous(self):
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None):
+            client, model = get_vision_auxiliary_client()
+        assert client is None
+        assert model is None
+
+
+class TestAuxiliaryMaxTokensParam:
+    def test_codex_fallback_uses_max_tokens(self, monkeypatch):
+        """Codex adapter translates max_tokens internally, so we return max_tokens."""
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client._read_codex_access_token", return_value="tok"):
+            result = auxiliary_max_tokens_param(1024)
+        assert result == {"max_tokens": 1024}
+
+    def test_openrouter_uses_max_tokens(self, monkeypatch):
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        result = auxiliary_max_tokens_param(1024)
+        assert result == {"max_tokens": 1024}
+
+    def test_no_provider_uses_max_tokens(self):
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
+            result = auxiliary_max_tokens_param(1024)
+        assert result == {"max_tokens": 1024}
diff --git a/tests/agent/test_redact.py b/tests/agent/test_redact.py
new file mode 100644
index 000000000..52e015ca9
--- /dev/null
+++ b/tests/agent/test_redact.py
@@ -0,0 +1,173 @@
+"""Tests for agent.redact -- secret masking in logs and output."""
+
+import logging
+
+import pytest
+
+from agent.redact import redact_sensitive_text, RedactingFormatter
+
+
+class TestKnownPrefixes:
+    def test_openai_sk_key(self):
+        text = "Using key sk-proj-abc123def456ghi789jkl012"
+        result = redact_sensitive_text(text)
+        assert "sk-pro" in result
+        assert "abc123def456" not in result
+        assert "..." in result
+
+    def test_openrouter_sk_key(self):
+        text = "OPENROUTER_API_KEY=sk-or-v1-abcdefghijklmnopqrstuvwxyz1234567890"
+        result = redact_sensitive_text(text)
+        assert "abcdefghijklmnop" not in result
+
+    def test_github_pat_classic(self):
+        result = redact_sensitive_text("token: ghp_abc123def456ghi789jkl")
+        assert "abc123def456" not in result
+
+    def test_github_pat_fine_grained(self):
+        result = redact_sensitive_text("github_pat_abc123def456ghi789jklmno")
+        assert "abc123def456" not in result
+
+    def test_slack_token(self):
+        token = "xoxb-" + "0" * 12 + "-" + "a" * 14
+        result = redact_sensitive_text(token)
+        assert "a" * 14 not in result
+
+    def test_google_api_key(self):
+        result = redact_sensitive_text("AIzaSyB-abc123def456ghi789jklmno012345")
+        assert "abc123def456" not in result
+
+    def test_perplexity_key(self):
+        result = redact_sensitive_text("pplx-abcdef123456789012345")
+        assert "abcdef12345" not in result
+
+    def test_fal_key(self):
+        result = redact_sensitive_text("fal_abc123def456ghi789jkl")
+        assert "abc123def456" not in result
+
+    def test_short_token_fully_masked(self):
+        result = redact_sensitive_text("key=sk-short1234567")
+        assert "***" in result
+
+
+class TestEnvAssignments:
+    def test_export_api_key(self):
+        text = "export OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012"
+        result = redact_sensitive_text(text)
+        assert "OPENAI_API_KEY=" in result
+        assert "abc123def456" not in result
+
+    def test_quoted_value(self):
+        text = 'MY_SECRET_TOKEN="supersecretvalue123456789"'
+        result = redact_sensitive_text(text)
+        assert "MY_SECRET_TOKEN=" in result
+        assert "supersecretvalue" not in result
+
+    def test_non_secret_env_unchanged(self):
+        text = "HOME=/home/user"
+        result = redact_sensitive_text(text)
+        assert result == text
+
+    def test_path_unchanged(self):
+        text = "PATH=/usr/local/bin:/usr/bin"
+        result = redact_sensitive_text(text)
+        assert result == text
+
+
+class TestJsonFields:
+    def test_json_api_key(self):
+        text = '{"apiKey": "sk-proj-abc123def456ghi789jkl012"}'
+        result = redact_sensitive_text(text)
+        assert "abc123def456" not in result
+
+    def test_json_token(self):
+        text = '{"access_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.longtoken.here"}'
+        result = redact_sensitive_text(text)
+        assert "eyJhbGciOiJSUzI1NiIs" not in result
+
+    def test_json_non_secret_unchanged(self):
+        text = '{"name": "John", "model": "gpt-4"}'
+        result = redact_sensitive_text(text)
+        assert result == text
+
+
+class TestAuthHeaders:
+    def test_bearer_token(self):
+        text = "Authorization: Bearer sk-proj-abc123def456ghi789jkl012"
+        result = redact_sensitive_text(text)
+        assert "Authorization: Bearer" in result
+        assert "abc123def456" not in result
+
+    def test_case_insensitive(self):
+        text = "authorization: bearer mytoken123456789012345678"
+        result = redact_sensitive_text(text)
+        assert "mytoken12345" not in result
+
+
+class TestTelegramTokens:
+    def test_bot_token(self):
+        text = "bot123456789:ABCDEfghij-KLMNopqrst_UVWXyz12345"
+        result = redact_sensitive_text(text)
+        assert "ABCDEfghij" not in result
+        assert "123456789:***" in result
+
+    def test_raw_token(self):
+        text = "12345678901:ABCDEfghijKLMNopqrstUVWXyz1234567890"
+        result = redact_sensitive_text(text)
+        assert "ABCDEfghij" not in result
+
+
+class TestPassthrough:
+    def test_empty_string(self):
+        assert redact_sensitive_text("") == ""
+
+    def test_none_returns_none(self):
+        assert redact_sensitive_text(None) is None
+
+    def test_normal_text_unchanged(self):
+        text = "Hello world, this is a normal log message with no secrets."
+        assert redact_sensitive_text(text) == text
+
+    def test_code_unchanged(self):
+        text = "def main():\n    print('hello')\n    return 42"
+        assert redact_sensitive_text(text) == text
+
+    def test_url_without_key_unchanged(self):
+        text = "Connecting to https://api.openai.com/v1/chat/completions"
+        assert redact_sensitive_text(text) == text
+
+
+class TestRedactingFormatter:
+    def test_formats_and_redacts(self):
+        formatter = RedactingFormatter("%(message)s")
+        record = logging.LogRecord(
+            name="test", level=logging.INFO, pathname="", lineno=0,
+            msg="Key is sk-proj-abc123def456ghi789jkl012",
+            args=(), exc_info=None,
+        )
+        result = formatter.format(record)
+        assert "abc123def456" not in result
+        assert "sk-pro" in result
+
+
+class TestPrintenvSimulation:
+    """Simulate what happens when the agent runs `env` or `printenv`."""
+
+    def test_full_env_dump(self):
+        env_dump = """HOME=/home/user
+PATH=/usr/local/bin:/usr/bin
+OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012mno345
+OPENROUTER_API_KEY=sk-or-v1-reallyLongSecretKeyValue12345678
+FIRECRAWL_API_KEY=fc-shortkey123456789012
+TELEGRAM_BOT_TOKEN=bot987654321:ABCDEfghij-KLMNopqrst_UVWXyz12345
+SHELL=/bin/bash
+USER=teknium"""
+        result = redact_sensitive_text(env_dump)
+        # Secrets should be masked
+        assert "abc123def456" not in result
+        assert "reallyLongSecretKey" not in result
+        assert "ABCDEfghij" not in result
+        # Non-secrets should survive
+        assert "HOME=/home/user" in result
+        assert "SHELL=/bin/bash" in result
+        assert "USER=teknium" in result
diff --git a/tests/agent/test_subagent_progress.py b/tests/agent/test_subagent_progress.py
new file mode 100644
index 000000000..b6e5e7525
--- /dev/null
+++ b/tests/agent/test_subagent_progress.py
@@ -0,0 +1,374 @@
+"""
+Tests for subagent progress relay (issue #169).
+
+Verifies that:
+- KawaiiSpinner.print_above() works with and without active spinner
+- _build_child_progress_callback handles CLI/gateway/no-display paths
+- Thinking events are relayed correctly
+- Parallel callbacks don't share state
+"""
+
+import io
+import sys
+import time
+import threading
+import pytest
+from unittest.mock import MagicMock, patch
+
+from agent.display import KawaiiSpinner
+from tools.delegate_tool import _build_child_progress_callback
+
+
+# =========================================================================
+# KawaiiSpinner.print_above tests
+# =========================================================================
+
+class TestPrintAbove:
+    """Tests for KawaiiSpinner.print_above method."""
+
+    def test_print_above_without_spinner_running(self):
+        """print_above should write to stdout even when spinner is not running."""
+        buf = io.StringIO()
+        spinner = KawaiiSpinner("test")
+        spinner._out = buf  # Redirect to buffer
+        
+        spinner.print_above("hello world")
+        output = buf.getvalue()
+        assert "hello world" in output
+
+    def test_print_above_with_spinner_running(self):
+        """print_above should clear spinner line and print text."""
+        buf = io.StringIO()
+        spinner = KawaiiSpinner("test")
+        spinner._out = buf
+        spinner.running = True  # Pretend spinner is running (don't start thread)
+        
+        spinner.print_above("tool line")
+        output = buf.getvalue()
+        assert "tool line" in output
+        assert "\r" in output  # Should start with carriage return to clear spinner line
+
+    def test_print_above_uses_captured_stdout(self):
+        """print_above should use self._out, not sys.stdout.
+        This ensures it works inside redirect_stdout(devnull)."""
+        buf = io.StringIO()
+        spinner = KawaiiSpinner("test")
+        spinner._out = buf
+        
+        # Simulate redirect_stdout(devnull)
+        old_stdout = sys.stdout
+        sys.stdout = io.StringIO()
+        try:
+            spinner.print_above("should go to buf")
+        finally:
+            sys.stdout = old_stdout
+        
+        assert "should go to buf" in buf.getvalue()
+
+
+# =========================================================================
+# _build_child_progress_callback tests
+# =========================================================================
+
+class TestBuildChildProgressCallback:
+    """Tests for child progress callback builder."""
+
+    def test_returns_none_when_no_display(self):
+        """Should return None when parent has no spinner or callback."""
+        parent = MagicMock()
+        parent._delegate_spinner = None
+        parent.tool_progress_callback = None
+        
+        cb = _build_child_progress_callback(0, parent)
+        assert cb is None
+
+    def test_cli_spinner_tool_event(self):
+        """Should print tool line above spinner for CLI path."""
+        buf = io.StringIO()
+        spinner = KawaiiSpinner("delegating")
+        spinner._out = buf
+        spinner.running = True
+        
+        parent = MagicMock()
+        parent._delegate_spinner = spinner
+        parent.tool_progress_callback = None
+        
+        cb = _build_child_progress_callback(0, parent)
+        assert cb is not None
+        
+        cb("web_search", "quantum computing")
+        output = buf.getvalue()
+        assert "web_search" in output
+        assert "quantum computing" in output
+        assert "├─" in output
+
+    def test_cli_spinner_thinking_event(self):
+        """Should print thinking line above spinner for CLI path."""
+        buf = io.StringIO()
+        spinner = KawaiiSpinner("delegating")
+        spinner._out = buf
+        spinner.running = True
+        
+        parent = MagicMock()
+        parent._delegate_spinner = spinner
+        parent.tool_progress_callback = None
+        
+        cb = _build_child_progress_callback(0, parent)
+        cb("_thinking", "I'll search for papers first")
+        
+        output = buf.getvalue()
+        assert "💭" in output
+        assert "search for papers" in output
+
+    def test_gateway_batched_progress(self):
+        """Gateway path should batch tool calls and flush at BATCH_SIZE."""
+        parent = MagicMock()
+        parent._delegate_spinner = None
+        parent_cb = MagicMock()
+        parent.tool_progress_callback = parent_cb
+        
+        cb = _build_child_progress_callback(0, parent)
+        
+        # Send 4 tool calls — shouldn't flush yet (BATCH_SIZE = 5)
+        for i in range(4):
+            cb(f"tool_{i}", f"arg_{i}")
+        parent_cb.assert_not_called()
+        
+        # 5th call should trigger flush
+        cb("tool_4", "arg_4")
+        parent_cb.assert_called_once()
+        call_args = parent_cb.call_args
+        assert "tool_0" in call_args[0][1]
+        assert "tool_4" in call_args[0][1]
+
+    def test_thinking_not_relayed_to_gateway(self):
+        """Thinking events should NOT be sent to gateway (too noisy)."""
+        parent = MagicMock()
+        parent._delegate_spinner = None
+        parent_cb = MagicMock()
+        parent.tool_progress_callback = parent_cb
+        
+        cb = _build_child_progress_callback(0, parent)
+        cb("_thinking", "some reasoning text")
+        
+        parent_cb.assert_not_called()
+
+    def test_parallel_callbacks_independent(self):
+        """Each child's callback should have independent batch state."""
+        parent = MagicMock()
+        parent._delegate_spinner = None
+        parent_cb = MagicMock()
+        parent.tool_progress_callback = parent_cb
+        
+        cb0 = _build_child_progress_callback(0, parent)
+        cb1 = _build_child_progress_callback(1, parent)
+        
+        # Send 3 calls to each — neither should flush (batch size = 5)
+        for i in range(3):
+            cb0(f"tool_{i}")
+            cb1(f"other_{i}")
+        
+        parent_cb.assert_not_called()
+
+    def test_task_index_prefix_in_batch_mode(self):
+        """Batch mode (task_count > 1) should show 1-indexed prefix for all tasks."""
+        buf = io.StringIO()
+        spinner = KawaiiSpinner("delegating")
+        spinner._out = buf
+        spinner.running = True
+        
+        parent = MagicMock()
+        parent._delegate_spinner = spinner
+        parent.tool_progress_callback = None
+        
+        # task_index=0 in a batch of 3 → prefix "[1]"
+        cb0 = _build_child_progress_callback(0, parent, task_count=3)
+        cb0("web_search", "test")
+        output = buf.getvalue()
+        assert "[1]" in output
+
+        # task_index=2 in a batch of 3 → prefix "[3]"
+        buf.truncate(0)
+        buf.seek(0)
+        cb2 = _build_child_progress_callback(2, parent, task_count=3)
+        cb2("web_search", "test")
+        output = buf.getvalue()
+        assert "[3]" in output
+
+    def test_single_task_no_prefix(self):
+        """Single task (task_count=1) should not show index prefix."""
+        buf = io.StringIO()
+        spinner = KawaiiSpinner("delegating")
+        spinner._out = buf
+        spinner.running = True
+        
+        parent = MagicMock()
+        parent._delegate_spinner = spinner
+        parent.tool_progress_callback = None
+        
+        cb = _build_child_progress_callback(0, parent, task_count=1)
+        cb("web_search", "test")
+        
+        output = buf.getvalue()
+        assert "[" not in output
+
+
+# =========================================================================
+# Integration: thinking callback in run_agent.py
+# =========================================================================
+
+class TestThinkingCallback:
+    """Tests for the _thinking callback in AIAgent conversation loop."""
+
+    def _simulate_thinking_callback(self, content, callback, delegate_depth=1):
+        """Simulate the exact code path from run_agent.py for the thinking callback.
+        
+        delegate_depth: simulates self._delegate_depth.
+            0 = main agent (should NOT fire), >=1 = subagent (should fire).
+        """
+        import re
+        if (content and callback and delegate_depth > 0):
+            _think_text = content.strip()
+            _think_text = re.sub(
+                r'</?(?:REASONING_SCRATCHPAD|think|reasoning)>', '', _think_text
+            ).strip()
+            first_line = _think_text.split('\n')[0][:80] if _think_text else ""
+            if first_line:
+                try:
+                    callback("_thinking", first_line)
+                except Exception:
+                    pass
+
+    def test_thinking_callback_fires_on_content(self):
+        """tool_progress_callback should receive _thinking event
+        when assistant message has content."""
+        calls = []
+        self._simulate_thinking_callback(
+            "I'll research quantum computing first, then summarize.",
+            lambda name, preview=None: calls.append((name, preview))
+        )
+        assert len(calls) == 1
+        assert calls[0][0] == "_thinking"
+        assert "quantum computing" in calls[0][1]
+
+    def test_thinking_callback_skipped_when_no_content(self):
+        """Should not fire when assistant has no content."""
+        calls = []
+        self._simulate_thinking_callback(
+            None,
+            lambda name, preview=None: calls.append((name, preview))
+        )
+        assert len(calls) == 0
+
+    def test_thinking_callback_truncates_long_content(self):
+        """Should truncate long content to 80 chars."""
+        calls = []
+        self._simulate_thinking_callback(
+            "A" * 200 + "\nSecond line should be ignored",
+            lambda name, preview=None: calls.append((name, preview))
+        )
+        assert len(calls) == 1
+        assert len(calls[0][1]) == 80
+
+    def test_thinking_callback_skipped_for_main_agent(self):
+        """Main agent (delegate_depth=0) should NOT fire thinking events.
+        This prevents gateway spam on Telegram/Discord."""
+        calls = []
+        self._simulate_thinking_callback(
+            "I'll help you with that request.",
+            lambda name, preview=None: calls.append((name, preview)),
+            delegate_depth=0,
+        )
+        assert len(calls) == 0
+
+    def test_thinking_callback_strips_reasoning_scratchpad(self):
+        """REASONING_SCRATCHPAD tags should be stripped before display."""
+        calls = []
+        self._simulate_thinking_callback(
+            "<REASONING_SCRATCHPAD>I need to analyze this carefully</REASONING_SCRATCHPAD>",
+            lambda name, preview=None: calls.append((name, preview))
+        )
+        assert len(calls) == 1
+        assert "<REASONING_SCRATCHPAD>" not in calls[0][1]
+        assert "analyze this carefully" in calls[0][1]
+
+    def test_thinking_callback_strips_think_tags(self):
+        """<think> tags should be stripped before display."""
+        calls = []
+        self._simulate_thinking_callback(
+            "<think>Let me think about this problem</think>",
+            lambda name, preview=None: calls.append((name, preview))
+        )
+        assert len(calls) == 1
+        assert "<think>" not in calls[0][1]
+        assert "think about this problem" in calls[0][1]
+
+    def test_thinking_callback_empty_after_strip(self):
+        """Should not fire when content is only XML tags."""
+        calls = []
+        self._simulate_thinking_callback(
+            "<REASONING_SCRATCHPAD></REASONING_SCRATCHPAD>",
+            lambda name, preview=None: calls.append((name, preview))
+        )
+        assert len(calls) == 0
+
+
+# =========================================================================
+# Gateway batch flush tests
+# =========================================================================
+
+class TestBatchFlush:
+    """Tests for gateway batch flush on subagent completion."""
+
+    def test_flush_sends_remaining_batch(self):
+        """_flush should send remaining tool names to gateway."""
+        parent = MagicMock()
+        parent._delegate_spinner = None
+        parent_cb = MagicMock()
+        parent.tool_progress_callback = parent_cb
+
+        cb = _build_child_progress_callback(0, parent)
+
+        # Send 3 tools (below batch size of 5)
+        cb("web_search", "query1")
+        cb("read_file", "file.txt")
+        cb("write_file", "out.txt")
+        parent_cb.assert_not_called()
+
+        # Flush should send the remaining 3
+        cb._flush()
+        parent_cb.assert_called_once()
+        summary = parent_cb.call_args[0][1]
+        assert "web_search" in summary
+        assert "write_file" in summary
+
+    def test_flush_noop_when_batch_empty(self):
+        """_flush should not send anything when batch is empty."""
+        parent = MagicMock()
+        parent._delegate_spinner = None
+        parent_cb = MagicMock()
+        parent.tool_progress_callback = parent_cb
+
+        cb = _build_child_progress_callback(0, parent)
+        cb._flush()
+        parent_cb.assert_not_called()
+
+    def test_flush_noop_when_no_parent_callback(self):
+        """_flush should not crash when there's no parent callback."""
+        buf = io.StringIO()
+        spinner = KawaiiSpinner("test")
+        spinner._out = buf
+        spinner.running = True
+
+        parent = MagicMock()
+        parent._delegate_spinner = spinner
+        parent.tool_progress_callback = None
+
+        cb = _build_child_progress_callback(0, parent)
+        cb("web_search", "test")
+        cb._flush()  # Should not crash
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
+
diff --git a/tests/gateway/test_media_extraction.py b/tests/gateway/test_media_extraction.py
new file mode 100644
index 000000000..20f7d73a8
--- /dev/null
+++ b/tests/gateway/test_media_extraction.py
@@ -0,0 +1,184 @@
+"""
+Tests for MEDIA tag extraction from tool results.
+
+Verifies that MEDIA tags (e.g., from TTS tool) are only extracted from
+messages in the CURRENT turn, not from the full conversation history.
+This prevents voice messages from accumulating and being sent multiple
+times per reply. (Regression test for #160)
+"""
+
+import pytest
+import re
+
+
+def extract_media_tags_fixed(result_messages, history_len):
+    """
+    Extract MEDIA tags from tool results, but ONLY from new messages
+    (those added after history_len). This is the fixed behavior.
+    
+    Args:
+        result_messages: Full list of messages including history + new
+        history_len: Length of history before this turn
+        
+    Returns:
+        Tuple of (media_tags list, has_voice_directive bool)
+    """
+    media_tags = []
+    has_voice_directive = False
+    
+    # Only process new messages from this turn
+    new_messages = result_messages[history_len:] if len(result_messages) > history_len else []
+    
+    for msg in new_messages:
+        if msg.get("role") == "tool" or msg.get("role") == "function":
+            content = msg.get("content", "")
+            if "MEDIA:" in content:
+                for match in re.finditer(r'MEDIA:(\S+)', content):
+                    path = match.group(1).strip().rstrip('",}')
+                    if path:
+                        media_tags.append(f"MEDIA:{path}")
+                if "[[audio_as_voice]]" in content:
+                    has_voice_directive = True
+    
+    return media_tags, has_voice_directive
+
+
+def extract_media_tags_broken(result_messages):
+    """
+    The BROKEN behavior: extract MEDIA tags from ALL messages including history.
+    This causes TTS voice messages to accumulate and be re-sent on every reply.
+    """
+    media_tags = []
+    has_voice_directive = False
+    
+    for msg in result_messages:
+        if msg.get("role") == "tool" or msg.get("role") == "function":
+            content = msg.get("content", "")
+            if "MEDIA:" in content:
+                for match in re.finditer(r'MEDIA:(\S+)', content):
+                    path = match.group(1).strip().rstrip('",}')
+                    if path:
+                        media_tags.append(f"MEDIA:{path}")
+                if "[[audio_as_voice]]" in content:
+                    has_voice_directive = True
+    
+    return media_tags, has_voice_directive
+
+
+class TestMediaExtraction:
+    """Tests for MEDIA tag extraction from tool results."""
+    
+    def test_media_tags_not_extracted_from_history(self):
+        """MEDIA tags from previous turns should NOT be extracted again."""
+        # Simulate conversation history with a TTS call from a previous turn
+        history = [
+            {"role": "user", "content": "Say hello as audio"},
+            {"role": "assistant", "content": None, "tool_calls": [{"id": "1", "function": {"name": "text_to_speech"}}]},
+            {"role": "tool", "tool_call_id": "1", "content": '{"success": true, "media_tag": "[[audio_as_voice]]\\nMEDIA:/path/to/audio1.ogg"}'},
+            {"role": "assistant", "content": "I've said hello for you!"},
+        ]
+        
+        # New turn: user asks a simple question
+        new_messages = [
+            {"role": "user", "content": "What time is it?"},
+            {"role": "assistant", "content": "It's 3:30 AM."},
+        ]
+        
+        all_messages = history + new_messages
+        history_len = len(history)
+        
+        # Fixed behavior: should extract NO media tags (none in new messages)
+        tags, voice_directive = extract_media_tags_fixed(all_messages, history_len)
+        assert tags == [], "Fixed extraction should not find tags in history"
+        assert voice_directive is False
+        
+        # Broken behavior: would incorrectly extract the old media tag
+        broken_tags, broken_voice = extract_media_tags_broken(all_messages)
+        assert len(broken_tags) == 1, "Broken extraction finds tags in history"
+        assert "audio1.ogg" in broken_tags[0]
+    
+    def test_media_tags_extracted_from_current_turn(self):
+        """MEDIA tags from the current turn SHOULD be extracted."""
+        # History without TTS
+        history = [
+            {"role": "user", "content": "Hello"},
+            {"role": "assistant", "content": "Hi there!"},
+        ]
+        
+        # New turn with TTS call
+        new_messages = [
+            {"role": "user", "content": "Say goodbye as audio"},
+            {"role": "assistant", "content": None, "tool_calls": [{"id": "2", "function": {"name": "text_to_speech"}}]},
+            {"role": "tool", "tool_call_id": "2", "content": '{"success": true, "media_tag": "[[audio_as_voice]]\\nMEDIA:/path/to/audio2.ogg"}'},
+            {"role": "assistant", "content": "I've said goodbye!"},
+        ]
+        
+        all_messages = history + new_messages
+        history_len = len(history)
+        
+        # Fixed behavior: should extract the new media tag
+        tags, voice_directive = extract_media_tags_fixed(all_messages, history_len)
+        assert len(tags) == 1, "Should extract media tag from current turn"
+        assert "audio2.ogg" in tags[0]
+        assert voice_directive is True
+    
+    def test_multiple_tts_calls_in_history_not_accumulated(self):
+        """Multiple TTS calls in history should NOT accumulate in new responses."""
+        # History with multiple TTS calls
+        history = [
+            {"role": "user", "content": "Say hello"},
+            {"role": "tool", "tool_call_id": "1", "content": 'MEDIA:/audio/hello.ogg'},
+            {"role": "assistant", "content": "Done!"},
+            {"role": "user", "content": "Say goodbye"},
+            {"role": "tool", "tool_call_id": "2", "content": 'MEDIA:/audio/goodbye.ogg'},
+            {"role": "assistant", "content": "Done!"},
+            {"role": "user", "content": "Say thanks"},
+            {"role": "tool", "tool_call_id": "3", "content": 'MEDIA:/audio/thanks.ogg'},
+            {"role": "assistant", "content": "Done!"},
+        ]
+        
+        # New turn: no TTS
+        new_messages = [
+            {"role": "user", "content": "What time is it?"},
+            {"role": "assistant", "content": "3 PM"},
+        ]
+        
+        all_messages = history + new_messages
+        history_len = len(history)
+        
+        # Fixed: no tags
+        tags, _ = extract_media_tags_fixed(all_messages, history_len)
+        assert tags == [], "Should not accumulate tags from history"
+        
+        # Broken: would have 3 tags (all the old ones)
+        broken_tags, _ = extract_media_tags_broken(all_messages)
+        assert len(broken_tags) == 3, "Broken version accumulates all history tags"
+    
+    def test_deduplication_within_current_turn(self):
+        """Multiple MEDIA tags in current turn should be deduplicated."""
+        history = []
+        
+        # Current turn with multiple tool calls producing same media
+        new_messages = [
+            {"role": "user", "content": "Multiple TTS"},
+            {"role": "tool", "tool_call_id": "1", "content": 'MEDIA:/audio/same.ogg'},
+            {"role": "tool", "tool_call_id": "2", "content": 'MEDIA:/audio/same.ogg'},  # duplicate
+            {"role": "tool", "tool_call_id": "3", "content": 'MEDIA:/audio/different.ogg'},
+            {"role": "assistant", "content": "Done!"},
+        ]
+        
+        all_messages = history + new_messages
+        
+        tags, _ = extract_media_tags_fixed(all_messages, 0)
+        # Even though same.ogg appears twice, deduplication happens after extraction
+        # The extraction itself should get both, then caller deduplicates
+        assert len(tags) == 3  # Raw extraction gets all
+        
+        # Deduplication as done in the actual code:
+        seen = set()
+        unique = [t for t in tags if t not in seen and not seen.add(t)]
+        assert len(unique) == 2  # After dedup: same.ogg and different.ogg
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/tests/test_auth_codex_provider.py b/tests/test_auth_codex_provider.py
new file mode 100644
index 000000000..7d3076807
--- /dev/null
+++ b/tests/test_auth_codex_provider.py
@@ -0,0 +1,210 @@
+import json
+import time
+import base64
+from contextlib import contextmanager
+from pathlib import Path
+from types import SimpleNamespace
+
+import pytest
+import yaml
+
+from hermes_cli.auth import (
+    AuthError,
+    DEFAULT_CODEX_BASE_URL,
+    PROVIDER_REGISTRY,
+    _persist_codex_auth_payload,
+    _login_openai_codex,
+    login_command,
+    get_codex_auth_status,
+    get_provider_auth_state,
+    read_codex_auth_file,
+    resolve_codex_runtime_credentials,
+    resolve_provider,
+)
+
+
+def _write_codex_auth(codex_home: Path, *, access_token: str = "access", refresh_token: str = "refresh") -> Path:
+    codex_home.mkdir(parents=True, exist_ok=True)
+    auth_file = codex_home / "auth.json"
+    auth_file.write_text(
+        json.dumps(
+            {
+                "auth_mode": "oauth",
+                "last_refresh": "2026-02-26T00:00:00Z",
+                "tokens": {
+                    "access_token": access_token,
+                    "refresh_token": refresh_token,
+                },
+            }
+        )
+    )
+    return auth_file
+
+
+def _jwt_with_exp(exp_epoch: int) -> str:
+    payload = {"exp": exp_epoch}
+    encoded = base64.urlsafe_b64encode(json.dumps(payload).encode("utf-8")).rstrip(b"=").decode("utf-8")
+    return f"h.{encoded}.s"
+
+
+def test_read_codex_auth_file_success(tmp_path, monkeypatch):
+    codex_home = tmp_path / "codex-home"
+    auth_file = _write_codex_auth(codex_home)
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    payload = read_codex_auth_file()
+
+    assert payload["auth_path"] == auth_file
+    assert payload["tokens"]["access_token"] == "access"
+    assert payload["tokens"]["refresh_token"] == "refresh"
+
+
+def test_resolve_codex_runtime_credentials_missing_access_token(tmp_path, monkeypatch):
+    codex_home = tmp_path / "codex-home"
+    _write_codex_auth(codex_home, access_token="")
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    with pytest.raises(AuthError) as exc:
+        resolve_codex_runtime_credentials()
+
+    assert exc.value.code == "codex_auth_missing_access_token"
+    assert exc.value.relogin_required is True
+
+
+def test_resolve_codex_runtime_credentials_refreshes_expiring_token(tmp_path, monkeypatch):
+    codex_home = tmp_path / "codex-home"
+    expiring_token = _jwt_with_exp(int(time.time()) - 10)
+    _write_codex_auth(codex_home, access_token=expiring_token, refresh_token="refresh-old")
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    called = {"count": 0}
+
+    def _fake_refresh(*, payload, auth_path, timeout_seconds, lock_held=False):
+        called["count"] += 1
+        assert auth_path == codex_home / "auth.json"
+        assert lock_held is True
+        return {"access_token": "access-new", "refresh_token": "refresh-new"}
+
+    monkeypatch.setattr("hermes_cli.auth._refresh_codex_auth_tokens", _fake_refresh)
+
+    resolved = resolve_codex_runtime_credentials()
+
+    assert called["count"] == 1
+    assert resolved["api_key"] == "access-new"
+
+
+def test_resolve_codex_runtime_credentials_force_refresh(tmp_path, monkeypatch):
+    codex_home = tmp_path / "codex-home"
+    _write_codex_auth(codex_home, access_token="access-current", refresh_token="refresh-old")
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    called = {"count": 0}
+
+    def _fake_refresh(*, payload, auth_path, timeout_seconds, lock_held=False):
+        called["count"] += 1
+        assert lock_held is True
+        return {"access_token": "access-forced", "refresh_token": "refresh-new"}
+
+    monkeypatch.setattr("hermes_cli.auth._refresh_codex_auth_tokens", _fake_refresh)
+
+    resolved = resolve_codex_runtime_credentials(force_refresh=True, refresh_if_expiring=False)
+
+    assert called["count"] == 1
+    assert resolved["api_key"] == "access-forced"
+
+
+def test_resolve_codex_runtime_credentials_uses_file_lock_on_refresh(tmp_path, monkeypatch):
+    codex_home = tmp_path / "codex-home"
+    _write_codex_auth(codex_home, access_token="access-current", refresh_token="refresh-old")
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    lock_calls = {"enter": 0, "exit": 0}
+
+    @contextmanager
+    def _fake_lock(auth_path, timeout_seconds=15.0):
+        assert auth_path == codex_home / "auth.json"
+        lock_calls["enter"] += 1
+        try:
+            yield
+        finally:
+            lock_calls["exit"] += 1
+
+    refresh_calls = {"count": 0}
+
+    def _fake_refresh(*, payload, auth_path, timeout_seconds, lock_held=False):
+        refresh_calls["count"] += 1
+        assert lock_held is True
+        return {"access_token": "access-updated", "refresh_token": "refresh-updated"}
+
+    monkeypatch.setattr("hermes_cli.auth._codex_auth_file_lock", _fake_lock)
+    monkeypatch.setattr("hermes_cli.auth._refresh_codex_auth_tokens", _fake_refresh)
+
+    resolved = resolve_codex_runtime_credentials(force_refresh=True, refresh_if_expiring=False)
+
+    assert refresh_calls["count"] == 1
+    assert lock_calls["enter"] == 1
+    assert lock_calls["exit"] == 1
+    assert resolved["api_key"] == "access-updated"
+
+
+def test_resolve_provider_explicit_codex_does_not_fallback(monkeypatch):
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+    assert resolve_provider("openai-codex") == "openai-codex"
+
+
+def test_persist_codex_auth_payload_writes_atomically(tmp_path):
+    auth_path = tmp_path / "auth.json"
+    auth_path.write_text('{"stale":true}\n')
+    payload = {
+        "auth_mode": "oauth",
+        "tokens": {
+            "access_token": "next-access",
+            "refresh_token": "next-refresh",
+        },
+        "last_refresh": "2026-02-26T00:00:00Z",
+    }
+
+    _persist_codex_auth_payload(auth_path, payload)
+
+    stored = json.loads(auth_path.read_text())
+    assert stored == payload
+    assert list(tmp_path.glob(".auth.json.*.tmp")) == []
+
+
+def test_get_codex_auth_status_not_logged_in(tmp_path, monkeypatch):
+    monkeypatch.setenv("CODEX_HOME", str(tmp_path / "missing-codex-home"))
+    status = get_codex_auth_status()
+    assert status["logged_in"] is False
+    assert "error" in status
+
+
+def test_login_openai_codex_persists_provider_state(tmp_path, monkeypatch):
+    hermes_home = tmp_path / "hermes-home"
+    codex_home = tmp_path / "codex-home"
+    _write_codex_auth(codex_home)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+    # Mock input() to accept existing credentials
+    monkeypatch.setattr("builtins.input", lambda _: "y")
+
+    _login_openai_codex(SimpleNamespace(), PROVIDER_REGISTRY["openai-codex"])
+
+    state = get_provider_auth_state("openai-codex")
+    assert state is not None
+    assert state["source"] == "codex-auth-json"
+    assert state["auth_file"].endswith("auth.json")
+
+    config_path = hermes_home / "config.yaml"
+    config = yaml.safe_load(config_path.read_text())
+    assert config["model"]["provider"] == "openai-codex"
+    assert config["model"]["base_url"] == DEFAULT_CODEX_BASE_URL
+
+
+def test_login_command_shows_deprecation(monkeypatch, capsys):
+    """login_command is deprecated and directs users to hermes model."""
+    with pytest.raises(SystemExit) as exc_info:
+        login_command(SimpleNamespace())
+    assert exc_info.value.code == 0
+    captured = capsys.readouterr()
+    assert "hermes model" in captured.out
diff --git a/tests/test_cli_init.py b/tests/test_cli_init.py
new file mode 100644
index 000000000..90ce05c72
--- /dev/null
+++ b/tests/test_cli_init.py
@@ -0,0 +1,80 @@
+"""Tests for HermesCLI initialization -- catches configuration bugs
+that only manifest at runtime (not in mocked unit tests)."""
+
+import os
+import sys
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+
+def _make_cli(**kwargs):
+    """Create a HermesCLI instance with minimal mocking."""
+    from cli import HermesCLI
+    with patch("cli.get_tool_definitions", return_value=[]):
+        return HermesCLI(**kwargs)
+
+
+class TestMaxTurnsResolution:
+    """max_turns must always resolve to a positive integer, never None."""
+
+    def test_default_max_turns_is_integer(self):
+        cli = _make_cli()
+        assert isinstance(cli.max_turns, int)
+        assert cli.max_turns > 0
+
+    def test_explicit_max_turns_honored(self):
+        cli = _make_cli(max_turns=25)
+        assert cli.max_turns == 25
+
+    def test_none_max_turns_gets_default(self):
+        cli = _make_cli(max_turns=None)
+        assert isinstance(cli.max_turns, int)
+        assert cli.max_turns > 0
+
+    def test_env_var_max_turns(self, monkeypatch):
+        """Env var is used when config file doesn't set max_turns."""
+        monkeypatch.setenv("HERMES_MAX_ITERATIONS", "42")
+        import cli as cli_module
+        original = cli_module.CLI_CONFIG["agent"].get("max_turns")
+        cli_module.CLI_CONFIG["agent"]["max_turns"] = None
+        try:
+            cli_obj = _make_cli()
+            assert cli_obj.max_turns == 42
+        finally:
+            if original is not None:
+                cli_module.CLI_CONFIG["agent"]["max_turns"] = original
+
+    def test_max_turns_never_none_for_agent(self):
+        """The value passed to AIAgent must never be None (causes TypeError in run_conversation)."""
+        cli = _make_cli()
+        assert cli.max_turns is not None
+
+
+class TestVerboseAndToolProgress:
+    def test_default_verbose_is_bool(self):
+        cli = _make_cli()
+        assert isinstance(cli.verbose, bool)
+
+    def test_tool_progress_mode_is_string(self):
+        cli = _make_cli()
+        assert isinstance(cli.tool_progress_mode, str)
+        assert cli.tool_progress_mode in ("off", "new", "all", "verbose")
+
+
+class TestProviderResolution:
+    def test_api_key_is_string_or_none(self):
+        cli = _make_cli()
+        assert cli.api_key is None or isinstance(cli.api_key, str)
+
+    def test_base_url_is_string(self):
+        cli = _make_cli()
+        assert isinstance(cli.base_url, str)
+        assert cli.base_url.startswith("http")
+
+    def test_model_is_string(self):
+        cli = _make_cli()
+        assert isinstance(cli.model, str)
+        assert len(cli.model) > 0
diff --git a/tests/test_cli_provider_resolution.py b/tests/test_cli_provider_resolution.py
new file mode 100644
index 000000000..3c8fe14a5
--- /dev/null
+++ b/tests/test_cli_provider_resolution.py
@@ -0,0 +1,187 @@
+import importlib
+import sys
+import types
+from contextlib import nullcontext
+from types import SimpleNamespace
+
+from hermes_cli.auth import AuthError
+from hermes_cli import main as hermes_main
+
+
+def _install_prompt_toolkit_stubs():
+    class _Dummy:
+        def __init__(self, *args, **kwargs):
+            pass
+
+    class _Condition:
+        def __init__(self, func):
+            self.func = func
+
+        def __bool__(self):
+            return bool(self.func())
+
+    class _ANSI(str):
+        pass
+
+    root = types.ModuleType("prompt_toolkit")
+    history = types.ModuleType("prompt_toolkit.history")
+    styles = types.ModuleType("prompt_toolkit.styles")
+    patch_stdout = types.ModuleType("prompt_toolkit.patch_stdout")
+    application = types.ModuleType("prompt_toolkit.application")
+    layout = types.ModuleType("prompt_toolkit.layout")
+    processors = types.ModuleType("prompt_toolkit.layout.processors")
+    filters = types.ModuleType("prompt_toolkit.filters")
+    dimension = types.ModuleType("prompt_toolkit.layout.dimension")
+    menus = types.ModuleType("prompt_toolkit.layout.menus")
+    widgets = types.ModuleType("prompt_toolkit.widgets")
+    key_binding = types.ModuleType("prompt_toolkit.key_binding")
+    completion = types.ModuleType("prompt_toolkit.completion")
+    formatted_text = types.ModuleType("prompt_toolkit.formatted_text")
+
+    history.FileHistory = _Dummy
+    styles.Style = _Dummy
+    patch_stdout.patch_stdout = lambda *args, **kwargs: nullcontext()
+    application.Application = _Dummy
+    layout.Layout = _Dummy
+    layout.HSplit = _Dummy
+    layout.Window = _Dummy
+    layout.FormattedTextControl = _Dummy
+    layout.ConditionalContainer = _Dummy
+    processors.Processor = _Dummy
+    processors.Transformation = _Dummy
+    processors.PasswordProcessor = _Dummy
+    processors.ConditionalProcessor = _Dummy
+    filters.Condition = _Condition
+    dimension.Dimension = _Dummy
+    menus.CompletionsMenu = _Dummy
+    widgets.TextArea = _Dummy
+    key_binding.KeyBindings = _Dummy
+    completion.Completer = _Dummy
+    completion.Completion = _Dummy
+    formatted_text.ANSI = _ANSI
+    root.print_formatted_text = lambda *args, **kwargs: None
+
+    sys.modules.setdefault("prompt_toolkit", root)
+    sys.modules.setdefault("prompt_toolkit.history", history)
+    sys.modules.setdefault("prompt_toolkit.styles", styles)
+    sys.modules.setdefault("prompt_toolkit.patch_stdout", patch_stdout)
+    sys.modules.setdefault("prompt_toolkit.application", application)
+    sys.modules.setdefault("prompt_toolkit.layout", layout)
+    sys.modules.setdefault("prompt_toolkit.layout.processors", processors)
+    sys.modules.setdefault("prompt_toolkit.filters", filters)
+    sys.modules.setdefault("prompt_toolkit.layout.dimension", dimension)
+    sys.modules.setdefault("prompt_toolkit.layout.menus", menus)
+    sys.modules.setdefault("prompt_toolkit.widgets", widgets)
+    sys.modules.setdefault("prompt_toolkit.key_binding", key_binding)
+    sys.modules.setdefault("prompt_toolkit.completion", completion)
+    sys.modules.setdefault("prompt_toolkit.formatted_text", formatted_text)
+
+
+def _import_cli():
+    try:
+        importlib.import_module("prompt_toolkit")
+    except ModuleNotFoundError:
+        _install_prompt_toolkit_stubs()
+    return importlib.import_module("cli")
+
+
+def test_hermes_cli_init_does_not_eagerly_resolve_runtime_provider(monkeypatch):
+    cli = _import_cli()
+    calls = {"count": 0}
+
+    def _unexpected_runtime_resolve(**kwargs):
+        calls["count"] += 1
+        raise AssertionError("resolve_runtime_provider should not be called in HermesCLI.__init__")
+
+    monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _unexpected_runtime_resolve)
+    monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
+
+    shell = cli.HermesCLI(model="gpt-5", compact=True, max_turns=1)
+
+    assert shell is not None
+    assert calls["count"] == 0
+
+
+def test_runtime_resolution_failure_is_not_sticky(monkeypatch):
+    cli = _import_cli()
+    calls = {"count": 0}
+
+    def _runtime_resolve(**kwargs):
+        calls["count"] += 1
+        if calls["count"] == 1:
+            raise RuntimeError("temporary auth failure")
+        return {
+            "provider": "openrouter",
+            "api_mode": "chat_completions",
+            "base_url": "https://openrouter.ai/api/v1",
+            "api_key": "test-key",
+            "source": "env/config",
+        }
+
+    class _DummyAgent:
+        def __init__(self, *args, **kwargs):
+            self.kwargs = kwargs
+
+    monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
+    monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
+    monkeypatch.setattr(cli, "AIAgent", _DummyAgent)
+
+    shell = cli.HermesCLI(model="gpt-5", compact=True, max_turns=1)
+
+    assert shell._init_agent() is False
+    assert shell._init_agent() is True
+    assert calls["count"] == 2
+    assert shell.agent is not None
+
+
+def test_runtime_resolution_rebuilds_agent_on_routing_change(monkeypatch):
+    cli = _import_cli()
+
+    def _runtime_resolve(**kwargs):
+        return {
+            "provider": "openai-codex",
+            "api_mode": "codex_responses",
+            "base_url": "https://same-endpoint.example/v1",
+            "api_key": "same-key",
+            "source": "env/config",
+        }
+
+    monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
+    monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
+
+    shell = cli.HermesCLI(model="gpt-5", compact=True, max_turns=1)
+    shell.provider = "openrouter"
+    shell.api_mode = "chat_completions"
+    shell.base_url = "https://same-endpoint.example/v1"
+    shell.api_key = "same-key"
+    shell.agent = object()
+
+    assert shell._ensure_runtime_credentials() is True
+    assert shell.agent is None
+    assert shell.provider == "openai-codex"
+    assert shell.api_mode == "codex_responses"
+
+
+def test_cmd_model_falls_back_to_auto_on_invalid_provider(monkeypatch, capsys):
+    monkeypatch.setattr(
+        "hermes_cli.config.load_config",
+        lambda: {"model": {"default": "gpt-5", "provider": "invalid-provider"}},
+    )
+    monkeypatch.setattr("hermes_cli.config.save_config", lambda cfg: None)
+    monkeypatch.setattr("hermes_cli.config.get_env_value", lambda key: "")
+    monkeypatch.setattr("hermes_cli.config.save_env_value", lambda key, value: None)
+
+    def _resolve_provider(requested, **kwargs):
+        if requested == "invalid-provider":
+            raise AuthError("Unknown provider 'invalid-provider'.", code="invalid_provider")
+        return "openrouter"
+
+    monkeypatch.setattr("hermes_cli.auth.resolve_provider", _resolve_provider)
+    monkeypatch.setattr(hermes_main, "_prompt_provider_choice", lambda choices: len(choices) - 1)
+
+    hermes_main.cmd_model(SimpleNamespace())
+    output = capsys.readouterr().out
+
+    assert "Warning:" in output
+    assert "falling back to auto provider detection" in output.lower()
+    assert "No change." in output
diff --git a/tests/test_codex_execution_paths.py b/tests/test_codex_execution_paths.py
new file mode 100644
index 000000000..ef24f02b5
--- /dev/null
+++ b/tests/test_codex_execution_paths.py
@@ -0,0 +1,180 @@
+import asyncio
+import sys
+import types
+from types import SimpleNamespace
+
+
+sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
+sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
+sys.modules.setdefault("fal_client", types.SimpleNamespace())
+
+import cron.scheduler as cron_scheduler
+import gateway.run as gateway_run
+import run_agent
+from gateway.config import Platform
+from gateway.session import SessionSource
+
+
+def _patch_agent_bootstrap(monkeypatch):
+    monkeypatch.setattr(
+        run_agent,
+        "get_tool_definitions",
+        lambda **kwargs: [
+            {
+                "type": "function",
+                "function": {
+                    "name": "terminal",
+                    "description": "Run shell commands.",
+                    "parameters": {"type": "object", "properties": {}},
+                },
+            }
+        ],
+    )
+    monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
+
+
+def _codex_message_response(text: str):
+    return SimpleNamespace(
+        output=[
+            SimpleNamespace(
+                type="message",
+                content=[SimpleNamespace(type="output_text", text=text)],
+            )
+        ],
+        usage=SimpleNamespace(input_tokens=5, output_tokens=3, total_tokens=8),
+        status="completed",
+        model="gpt-5-codex",
+    )
+
+
+class _UnauthorizedError(RuntimeError):
+    def __init__(self):
+        super().__init__("Error code: 401 - unauthorized")
+        self.status_code = 401
+
+
+class _FakeOpenAI:
+    def __init__(self, **kwargs):
+        self.kwargs = kwargs
+
+    def close(self):
+        return None
+
+
+class _Codex401ThenSuccessAgent(run_agent.AIAgent):
+    refresh_attempts = 0
+    last_init = {}
+
+    def __init__(self, *args, **kwargs):
+        kwargs.setdefault("skip_context_files", True)
+        kwargs.setdefault("skip_memory", True)
+        kwargs.setdefault("max_iterations", 4)
+        type(self).last_init = dict(kwargs)
+        super().__init__(*args, **kwargs)
+        self._cleanup_task_resources = lambda task_id: None
+        self._persist_session = lambda messages, history=None: None
+        self._save_trajectory = lambda messages, user_message, completed: None
+        self._save_session_log = lambda messages: None
+
+    def _try_refresh_codex_client_credentials(self, *, force: bool = True) -> bool:
+        type(self).refresh_attempts += 1
+        return True
+
+    def run_conversation(self, user_message: str, conversation_history=None):
+        calls = {"api": 0}
+
+        def _fake_api_call(api_kwargs):
+            calls["api"] += 1
+            if calls["api"] == 1:
+                raise _UnauthorizedError()
+            return _codex_message_response("Recovered via refresh")
+
+        self._interruptible_api_call = _fake_api_call
+        return super().run_conversation(user_message, conversation_history=conversation_history)
+
+
+def test_cron_run_job_codex_path_handles_internal_401_refresh(monkeypatch):
+    _patch_agent_bootstrap(monkeypatch)
+    monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)
+    monkeypatch.setattr(run_agent, "AIAgent", _Codex401ThenSuccessAgent)
+    monkeypatch.setattr(
+        "hermes_cli.runtime_provider.resolve_runtime_provider",
+        lambda requested=None: {
+            "provider": "openai-codex",
+            "api_mode": "codex_responses",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+            "api_key": "codex-token",
+        },
+    )
+    monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
+
+    _Codex401ThenSuccessAgent.refresh_attempts = 0
+    _Codex401ThenSuccessAgent.last_init = {}
+
+    success, output, final_response, error = cron_scheduler.run_job(
+        {"id": "job-1", "name": "Codex Refresh Test", "prompt": "ping"}
+    )
+
+    assert success is True
+    assert error is None
+    assert final_response == "Recovered via refresh"
+    assert "Recovered via refresh" in output
+    assert _Codex401ThenSuccessAgent.refresh_attempts == 1
+    assert _Codex401ThenSuccessAgent.last_init["provider"] == "openai-codex"
+    assert _Codex401ThenSuccessAgent.last_init["api_mode"] == "codex_responses"
+
+
+def test_gateway_run_agent_codex_path_handles_internal_401_refresh(monkeypatch):
+    _patch_agent_bootstrap(monkeypatch)
+    monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)
+    monkeypatch.setattr(run_agent, "AIAgent", _Codex401ThenSuccessAgent)
+    monkeypatch.setattr(
+        gateway_run,
+        "_resolve_runtime_agent_kwargs",
+        lambda: {
+            "provider": "openai-codex",
+            "api_mode": "codex_responses",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+            "api_key": "codex-token",
+        },
+    )
+    monkeypatch.setenv("HERMES_TOOL_PROGRESS", "false")
+
+    _Codex401ThenSuccessAgent.refresh_attempts = 0
+    _Codex401ThenSuccessAgent.last_init = {}
+
+    runner = gateway_run.GatewayRunner.__new__(gateway_run.GatewayRunner)
+    runner.adapters = {}
+    runner._ephemeral_system_prompt = ""
+    runner._prefill_messages = []
+    runner._reasoning_config = None
+    runner._running_agents = {}
+    from unittest.mock import MagicMock, AsyncMock
+    runner.hooks = MagicMock()
+    runner.hooks.emit = AsyncMock()
+    runner.hooks.loaded_hooks = []
+    runner._session_db = None
+
+    source = SessionSource(
+        platform=Platform.LOCAL,
+        chat_id="cli",
+        chat_name="CLI",
+        chat_type="dm",
+        user_id="user-1",
+    )
+
+    result = asyncio.run(
+        runner._run_agent(
+            message="ping",
+            context_prompt="",
+            history=[],
+            source=source,
+            session_id="session-1",
+            session_key="agent:main:local:dm",
+        )
+    )
+
+    assert result["final_response"] == "Recovered via refresh"
+    assert _Codex401ThenSuccessAgent.refresh_attempts == 1
+    assert _Codex401ThenSuccessAgent.last_init["provider"] == "openai-codex"
+    assert _Codex401ThenSuccessAgent.last_init["api_mode"] == "codex_responses"
diff --git a/tests/test_codex_models.py b/tests/test_codex_models.py
new file mode 100644
index 000000000..e6cc2fdec
--- /dev/null
+++ b/tests/test_codex_models.py
@@ -0,0 +1,40 @@
+import json
+
+from hermes_cli.codex_models import DEFAULT_CODEX_MODELS, get_codex_model_ids
+
+
+def test_get_codex_model_ids_prioritizes_default_and_cache(tmp_path, monkeypatch):
+    codex_home = tmp_path / "codex-home"
+    codex_home.mkdir(parents=True, exist_ok=True)
+    (codex_home / "config.toml").write_text('model = "gpt-5.2-codex"\n')
+    (codex_home / "models_cache.json").write_text(
+        json.dumps(
+            {
+                "models": [
+                    {"slug": "gpt-5.3-codex", "priority": 20, "supported_in_api": True},
+                    {"slug": "gpt-5.1-codex", "priority": 5, "supported_in_api": True},
+                    {"slug": "gpt-4o", "priority": 1, "supported_in_api": True},
+                    {"slug": "gpt-5-hidden-codex", "priority": 2, "visibility": "hidden"},
+                ]
+            }
+        )
+    )
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    models = get_codex_model_ids()
+
+    assert models[0] == "gpt-5.2-codex"
+    assert "gpt-5.1-codex" in models
+    assert "gpt-5.3-codex" in models
+    assert "gpt-4o" not in models
+    assert "gpt-5-hidden-codex" not in models
+
+
+def test_get_codex_model_ids_falls_back_to_curated_defaults(tmp_path, monkeypatch):
+    codex_home = tmp_path / "codex-home"
+    codex_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    models = get_codex_model_ids()
+
+    assert models[: len(DEFAULT_CODEX_MODELS)] == DEFAULT_CODEX_MODELS
diff --git a/tests/test_external_credential_detection.py b/tests/test_external_credential_detection.py
new file mode 100644
index 000000000..a1fe2a2f9
--- /dev/null
+++ b/tests/test_external_credential_detection.py
@@ -0,0 +1,51 @@
+"""Tests for detect_external_credentials() -- Phase 2 credential sync."""
+
+import json
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from hermes_cli.auth import detect_external_credentials
+
+
+class TestDetectCodexCLI:
+    def test_detects_valid_codex_auth(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        auth = codex_dir / "auth.json"
+        auth.write_text(json.dumps({
+            "tokens": {"access_token": "tok-123", "refresh_token": "ref-456"}
+        }))
+        with patch("hermes_cli.auth.resolve_codex_home_path", return_value=codex_dir):
+            result = detect_external_credentials()
+        codex_hits = [c for c in result if c["provider"] == "openai-codex"]
+        assert len(codex_hits) == 1
+        assert "Codex CLI" in codex_hits[0]["label"]
+        assert str(auth) == codex_hits[0]["path"]
+
+    def test_skips_codex_without_access_token(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        (codex_dir / "auth.json").write_text(json.dumps({"tokens": {}}))
+        with patch("hermes_cli.auth.resolve_codex_home_path", return_value=codex_dir):
+            result = detect_external_credentials()
+        assert not any(c["provider"] == "openai-codex" for c in result)
+
+    def test_skips_missing_codex_dir(self, tmp_path):
+        with patch("hermes_cli.auth.resolve_codex_home_path", return_value=tmp_path / "nonexistent"):
+            result = detect_external_credentials()
+        assert not any(c["provider"] == "openai-codex" for c in result)
+
+    def test_skips_malformed_codex_auth(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        (codex_dir / "auth.json").write_text("{bad json")
+        with patch("hermes_cli.auth.resolve_codex_home_path", return_value=codex_dir):
+            result = detect_external_credentials()
+        assert not any(c["provider"] == "openai-codex" for c in result)
+
+    def test_returns_empty_when_nothing_found(self, tmp_path):
+        with patch("hermes_cli.auth.resolve_codex_home_path", return_value=tmp_path / ".codex"):
+            result = detect_external_credentials()
+        assert result == []
diff --git a/tests/test_flush_memories_codex.py b/tests/test_flush_memories_codex.py
new file mode 100644
index 000000000..22eef5ab0
--- /dev/null
+++ b/tests/test_flush_memories_codex.py
@@ -0,0 +1,225 @@
+"""Tests for flush_memories() working correctly across all provider modes.
+
+Catches the bug where Codex mode called chat.completions.create on a
+Responses-only client, which would fail silently or with a 404.
+"""
+
+import json
+import os
+import sys
+import types
+from types import SimpleNamespace
+from unittest.mock import patch, MagicMock, call
+
+import pytest
+
+sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
+sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
+sys.modules.setdefault("fal_client", types.SimpleNamespace())
+
+import run_agent
+
+
+class _FakeOpenAI:
+    def __init__(self, **kwargs):
+        self.kwargs = kwargs
+        self.api_key = kwargs.get("api_key", "test")
+        self.base_url = kwargs.get("base_url", "http://test")
+
+    def close(self):
+        pass
+
+
+def _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter"):
+    """Build an AIAgent with mocked internals, ready for flush_memories testing."""
+    monkeypatch.setattr(run_agent, "get_tool_definitions", lambda **kw: [
+        {
+            "type": "function",
+            "function": {
+                "name": "memory",
+                "description": "Manage memories.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "action": {"type": "string"},
+                        "target": {"type": "string"},
+                        "content": {"type": "string"},
+                    },
+                },
+            },
+        },
+    ])
+    monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
+    monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)
+
+    agent = run_agent.AIAgent(
+        api_key="test-key",
+        base_url="https://test.example.com/v1",
+        provider=provider,
+        api_mode=api_mode,
+        max_iterations=4,
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+    # Give it a valid memory store
+    agent._memory_store = MagicMock()
+    agent._memory_flush_min_turns = 1
+    agent._user_turn_count = 5
+    return agent
+
+
+def _chat_response_with_memory_call():
+    """Simulated chat completions response with a memory tool call."""
+    return SimpleNamespace(
+        choices=[SimpleNamespace(
+            message=SimpleNamespace(
+                content=None,
+                tool_calls=[SimpleNamespace(
+                    function=SimpleNamespace(
+                        name="memory",
+                        arguments=json.dumps({
+                            "action": "add",
+                            "target": "notes",
+                            "content": "User prefers dark mode.",
+                        }),
+                    ),
+                )],
+            ),
+        )],
+        usage=SimpleNamespace(prompt_tokens=100, completion_tokens=20, total_tokens=120),
+    )
+
+
+class TestFlushMemoriesUsesAuxiliaryClient:
+    """When an auxiliary client is available, flush_memories should use it
+    instead of self.client -- especially critical in Codex mode."""
+
+    def test_flush_uses_auxiliary_when_available(self, monkeypatch):
+        agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")
+
+        mock_aux_client = MagicMock()
+        mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
+
+        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi there"},
+                {"role": "user", "content": "Remember this"},
+            ]
+            with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
+                agent.flush_memories(messages)
+
+        mock_aux_client.chat.completions.create.assert_called_once()
+        call_kwargs = mock_aux_client.chat.completions.create.call_args
+        assert call_kwargs.kwargs.get("model") == "gpt-4o-mini" or call_kwargs[1].get("model") == "gpt-4o-mini"
+
+    def test_flush_uses_main_client_when_no_auxiliary(self, monkeypatch):
+        """Non-Codex mode with no auxiliary falls back to self.client."""
+        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
+        agent.client = MagicMock()
+        agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
+
+        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)):
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi there"},
+                {"role": "user", "content": "Save this"},
+            ]
+            with patch("tools.memory_tool.memory_tool", return_value="Saved."):
+                agent.flush_memories(messages)
+
+        agent.client.chat.completions.create.assert_called_once()
+
+    def test_flush_executes_memory_tool_calls(self, monkeypatch):
+        """Verify that memory tool calls from the flush response actually get executed."""
+        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
+
+        mock_aux_client = MagicMock()
+        mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
+
+        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi"},
+                {"role": "user", "content": "Note this"},
+            ]
+            with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
+                agent.flush_memories(messages)
+
+        mock_memory.assert_called_once()
+        call_kwargs = mock_memory.call_args
+        assert call_kwargs.kwargs["action"] == "add"
+        assert call_kwargs.kwargs["target"] == "notes"
+        assert "dark mode" in call_kwargs.kwargs["content"]
+
+    def test_flush_strips_artifacts_from_messages(self, monkeypatch):
+        """After flush, the flush prompt and any response should be removed from messages."""
+        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
+
+        mock_aux_client = MagicMock()
+        mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
+
+        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi"},
+                {"role": "user", "content": "Remember X"},
+            ]
+            original_len = len(messages)
+            with patch("tools.memory_tool.memory_tool", return_value="Saved."):
+                agent.flush_memories(messages)
+
+        # Messages should not grow from the flush
+        assert len(messages) <= original_len
+        # No flush sentinel should remain
+        for msg in messages:
+            assert "_flush_sentinel" not in msg
+
+
+class TestFlushMemoriesCodexFallback:
+    """When no auxiliary client exists and we're in Codex mode, flush should
+    use the Codex Responses API path instead of chat.completions."""
+
+    def test_codex_mode_no_aux_uses_responses_api(self, monkeypatch):
+        agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")
+
+        codex_response = SimpleNamespace(
+            output=[
+                SimpleNamespace(
+                    type="function_call",
+                    call_id="call_1",
+                    name="memory",
+                    arguments=json.dumps({
+                        "action": "add",
+                        "target": "notes",
+                        "content": "Codex flush test",
+                    }),
+                ),
+            ],
+            usage=SimpleNamespace(input_tokens=50, output_tokens=10, total_tokens=60),
+            status="completed",
+            model="gpt-5-codex",
+        )
+
+        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)), \
+             patch.object(agent, "_run_codex_stream", return_value=codex_response) as mock_stream, \
+             patch.object(agent, "_build_api_kwargs") as mock_build, \
+             patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
+            mock_build.return_value = {
+                "model": "gpt-5-codex",
+                "instructions": "test",
+                "input": [],
+                "tools": [],
+                "max_output_tokens": 4096,
+            }
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi"},
+                {"role": "user", "content": "Save this"},
+            ]
+            agent.flush_memories(messages)
+
+        mock_stream.assert_called_once()
+        mock_memory.assert_called_once()
+        assert mock_memory.call_args.kwargs["content"] == "Codex flush test"
diff --git a/tests/test_provider_parity.py b/tests/test_provider_parity.py
new file mode 100644
index 000000000..82199ac4c
--- /dev/null
+++ b/tests/test_provider_parity.py
@@ -0,0 +1,460 @@
+"""Provider parity tests: verify that AIAgent builds correct API kwargs
+and handles responses properly for all supported providers.
+
+Ensures changes to one provider path don't silently break another.
+"""
+
+import json
+import os
+import sys
+import types
+from types import SimpleNamespace
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
+sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
+sys.modules.setdefault("fal_client", types.SimpleNamespace())
+
+from run_agent import AIAgent
+
+
+# ── Helpers ──────────────────────────────────────────────────────────────────
+
+def _tool_defs(*names):
+    return [
+        {
+            "type": "function",
+            "function": {
+                "name": n,
+                "description": f"{n} tool",
+                "parameters": {"type": "object", "properties": {}},
+            },
+        }
+        for n in names
+    ]
+
+
+class _FakeOpenAI:
+    def __init__(self, **kw):
+        self.api_key = kw.get("api_key", "test")
+        self.base_url = kw.get("base_url", "http://test")
+    def close(self):
+        pass
+
+
+def _make_agent(monkeypatch, provider, api_mode="chat_completions", base_url="https://openrouter.ai/api/v1"):
+    monkeypatch.setattr("run_agent.get_tool_definitions", lambda **kw: _tool_defs("web_search", "terminal"))
+    monkeypatch.setattr("run_agent.check_toolset_requirements", lambda: {})
+    monkeypatch.setattr("run_agent.OpenAI", _FakeOpenAI)
+    return AIAgent(
+        api_key="test-key",
+        base_url=base_url,
+        provider=provider,
+        api_mode=api_mode,
+        max_iterations=4,
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+
+
+# ── _build_api_kwargs tests ─────────────────────────────────────────────────
+
+class TestBuildApiKwargsOpenRouter:
+    def test_uses_chat_completions_format(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "messages" in kwargs
+        assert "model" in kwargs
+        assert kwargs["messages"][-1]["content"] == "hi"
+
+    def test_includes_reasoning_in_extra_body(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        extra = kwargs.get("extra_body", {})
+        assert "reasoning" in extra
+        assert extra["reasoning"]["enabled"] is True
+
+    def test_includes_tools(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "tools" in kwargs
+        tool_names = [t["function"]["name"] for t in kwargs["tools"]]
+        assert "web_search" in tool_names
+
+    def test_no_responses_api_fields(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "input" not in kwargs
+        assert "instructions" not in kwargs
+        assert "store" not in kwargs
+
+
+class TestBuildApiKwargsNousPortal:
+    def test_includes_nous_product_tags(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        extra = kwargs.get("extra_body", {})
+        assert extra.get("tags") == ["product=hermes-agent"]
+
+    def test_uses_chat_completions_format(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "messages" in kwargs
+        assert "input" not in kwargs
+
+
+class TestBuildApiKwargsCustomEndpoint:
+    def test_uses_chat_completions_format(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "custom", base_url="http://localhost:1234/v1")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "messages" in kwargs
+        assert "input" not in kwargs
+
+    def test_no_openrouter_extra_body(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "custom", base_url="http://localhost:1234/v1")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        extra = kwargs.get("extra_body", {})
+        assert "reasoning" not in extra
+
+
+class TestBuildApiKwargsCodex:
+    def test_uses_responses_api_format(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "input" in kwargs
+        assert "instructions" in kwargs
+        assert "messages" not in kwargs
+        assert kwargs["store"] is False
+
+    def test_includes_reasoning_config(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "reasoning" in kwargs
+        assert kwargs["reasoning"]["effort"] == "medium"
+
+    def test_includes_encrypted_content_in_include(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "reasoning.encrypted_content" in kwargs.get("include", [])
+
+    def test_tools_converted_to_responses_format(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        tools = kwargs.get("tools", [])
+        assert len(tools) > 0
+        # Responses format has "name" at top level, not nested under "function"
+        assert "name" in tools[0]
+        assert "function" not in tools[0]
+
+
+# ── Message conversion tests ────────────────────────────────────────────────
+
+class TestChatMessagesToResponsesInput:
+    """Verify _chat_messages_to_responses_input for Codex mode."""
+
+    def test_user_message_passes_through(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "user", "content": "hello"}]
+        items = agent._chat_messages_to_responses_input(messages)
+        assert items == [{"role": "user", "content": "hello"}]
+
+    def test_system_messages_filtered(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [
+            {"role": "system", "content": "be helpful"},
+            {"role": "user", "content": "hello"},
+        ]
+        items = agent._chat_messages_to_responses_input(messages)
+        assert len(items) == 1
+        assert items[0]["role"] == "user"
+
+    def test_assistant_tool_calls_become_function_call_items(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [{
+                "id": "call_abc",
+                "call_id": "call_abc",
+                "function": {"name": "web_search", "arguments": '{"query": "test"}'},
+            }],
+        }]
+        items = agent._chat_messages_to_responses_input(messages)
+        fc_items = [i for i in items if i.get("type") == "function_call"]
+        assert len(fc_items) == 1
+        assert fc_items[0]["name"] == "web_search"
+        assert fc_items[0]["call_id"] == "call_abc"
+
+    def test_tool_results_become_function_call_output(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "tool", "tool_call_id": "call_abc", "content": "result here"}]
+        items = agent._chat_messages_to_responses_input(messages)
+        assert items[0]["type"] == "function_call_output"
+        assert items[0]["call_id"] == "call_abc"
+        assert items[0]["output"] == "result here"
+
+    def test_encrypted_reasoning_replayed(self, monkeypatch):
+        """Encrypted reasoning items from previous turns must be included in input."""
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [
+            {"role": "user", "content": "think about this"},
+            {
+                "role": "assistant",
+                "content": "I thought about it.",
+                "codex_reasoning_items": [
+                    {"type": "reasoning", "id": "rs_abc", "encrypted_content": "gAAAA_test_blob"},
+                ],
+            },
+            {"role": "user", "content": "continue"},
+        ]
+        items = agent._chat_messages_to_responses_input(messages)
+        reasoning_items = [i for i in items if i.get("type") == "reasoning"]
+        assert len(reasoning_items) == 1
+        assert reasoning_items[0]["encrypted_content"] == "gAAAA_test_blob"
+
+    def test_no_reasoning_items_for_non_codex_messages(self, monkeypatch):
+        """Messages without codex_reasoning_items should not inject anything."""
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [
+            {"role": "assistant", "content": "hi"},
+            {"role": "user", "content": "hello"},
+        ]
+        items = agent._chat_messages_to_responses_input(messages)
+        reasoning_items = [i for i in items if i.get("type") == "reasoning"]
+        assert len(reasoning_items) == 0
+
+
+# ── Response normalization tests ─────────────────────────────────────────────
+
+class TestNormalizeCodexResponse:
+    """Verify _normalize_codex_response extracts all fields correctly."""
+
+    def _make_codex_agent(self, monkeypatch):
+        return _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                           base_url="https://chatgpt.com/backend-api/codex")
+
+    def test_text_response(self, monkeypatch):
+        agent = self._make_codex_agent(monkeypatch)
+        response = SimpleNamespace(
+            output=[
+                SimpleNamespace(type="message", status="completed",
+                    content=[SimpleNamespace(type="output_text", text="Hello!")],
+                    phase="final_answer"),
+            ],
+            status="completed",
+        )
+        msg, reason = agent._normalize_codex_response(response)
+        assert msg.content == "Hello!"
+        assert reason == "stop"
+
+    def test_reasoning_summary_extracted(self, monkeypatch):
+        agent = self._make_codex_agent(monkeypatch)
+        response = SimpleNamespace(
+            output=[
+                SimpleNamespace(type="reasoning",
+                    encrypted_content="gAAAA_blob",
+                    summary=[SimpleNamespace(type="summary_text", text="Thinking about math")],
+                    id="rs_123", status=None),
+                SimpleNamespace(type="message", status="completed",
+                    content=[SimpleNamespace(type="output_text", text="42")],
+                    phase="final_answer"),
+            ],
+            status="completed",
+        )
+        msg, reason = agent._normalize_codex_response(response)
+        assert msg.content == "42"
+        assert "math" in msg.reasoning
+        assert reason == "stop"
+
+    def test_encrypted_content_captured(self, monkeypatch):
+        agent = self._make_codex_agent(monkeypatch)
+        response = SimpleNamespace(
+            output=[
+                SimpleNamespace(type="reasoning",
+                    encrypted_content="gAAAA_secret_blob_123",
+                    summary=[SimpleNamespace(type="summary_text", text="Thinking")],
+                    id="rs_456", status=None),
+                SimpleNamespace(type="message", status="completed",
+                    content=[SimpleNamespace(type="output_text", text="done")],
+                    phase="final_answer"),
+            ],
+            status="completed",
+        )
+        msg, reason = agent._normalize_codex_response(response)
+        assert msg.codex_reasoning_items is not None
+        assert len(msg.codex_reasoning_items) == 1
+        assert msg.codex_reasoning_items[0]["encrypted_content"] == "gAAAA_secret_blob_123"
+        assert msg.codex_reasoning_items[0]["id"] == "rs_456"
+
+    def test_no_encrypted_content_when_missing(self, monkeypatch):
+        agent = self._make_codex_agent(monkeypatch)
+        response = SimpleNamespace(
+            output=[
+                SimpleNamespace(type="message", status="completed",
+                    content=[SimpleNamespace(type="output_text", text="no reasoning")],
+                    phase="final_answer"),
+            ],
+            status="completed",
+        )
+        msg, reason = agent._normalize_codex_response(response)
+        assert msg.codex_reasoning_items is None
+
+    def test_tool_calls_extracted(self, monkeypatch):
+        agent = self._make_codex_agent(monkeypatch)
+        response = SimpleNamespace(
+            output=[
+                SimpleNamespace(type="function_call", status="completed",
+                    call_id="call_xyz", name="web_search",
+                    arguments='{"query":"test"}', id="fc_xyz"),
+            ],
+            status="completed",
+        )
+        msg, reason = agent._normalize_codex_response(response)
+        assert reason == "tool_calls"
+        assert len(msg.tool_calls) == 1
+        assert msg.tool_calls[0].function.name == "web_search"
+
+
+# ── Chat completions response handling (OpenRouter/Nous) ─────────────────────
+
+class TestBuildAssistantMessage:
+    """Verify _build_assistant_message works for all provider response formats."""
+
+    def test_openrouter_reasoning_fields(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        msg = SimpleNamespace(
+            content="answer",
+            tool_calls=None,
+            reasoning="I thought about it",
+            reasoning_content=None,
+            reasoning_details=None,
+        )
+        result = agent._build_assistant_message(msg, "stop")
+        assert result["content"] == "answer"
+        assert result["reasoning"] == "I thought about it"
+        assert "codex_reasoning_items" not in result
+
+    def test_openrouter_reasoning_details_preserved_unmodified(self, monkeypatch):
+        """reasoning_details must be passed back exactly as received for
+        multi-turn continuity (OpenRouter, Anthropic, OpenAI all need this)."""
+        agent = _make_agent(monkeypatch, "openrouter")
+        original_detail = {
+            "type": "thinking",
+            "thinking": "deep thoughts here",
+            "signature": "sig123_opaque_blob",
+            "encrypted_content": "some_provider_blob",
+            "extra_field": "should_not_be_dropped",
+        }
+        msg = SimpleNamespace(
+            content="answer",
+            tool_calls=None,
+            reasoning=None,
+            reasoning_content=None,
+            reasoning_details=[original_detail],
+        )
+        result = agent._build_assistant_message(msg, "stop")
+        stored = result["reasoning_details"][0]
+        # ALL fields must survive, not just type/text/signature
+        assert stored["signature"] == "sig123_opaque_blob"
+        assert stored["encrypted_content"] == "some_provider_blob"
+        assert stored["extra_field"] == "should_not_be_dropped"
+        assert stored["thinking"] == "deep thoughts here"
+
+    def test_codex_preserves_encrypted_reasoning(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        msg = SimpleNamespace(
+            content="result",
+            tool_calls=None,
+            reasoning="summary text",
+            reasoning_content=None,
+            reasoning_details=None,
+            codex_reasoning_items=[
+                {"type": "reasoning", "id": "rs_1", "encrypted_content": "gAAAA_blob"},
+            ],
+        )
+        result = agent._build_assistant_message(msg, "stop")
+        assert result["codex_reasoning_items"] == [
+            {"type": "reasoning", "id": "rs_1", "encrypted_content": "gAAAA_blob"},
+        ]
+
+    def test_plain_message_no_codex_items(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        msg = SimpleNamespace(
+            content="simple",
+            tool_calls=None,
+            reasoning=None,
+            reasoning_content=None,
+            reasoning_details=None,
+        )
+        result = agent._build_assistant_message(msg, "stop")
+        assert "codex_reasoning_items" not in result
+
+
+# ── Auxiliary client provider resolution ─────────────────────────────────────
+
+class TestAuxiliaryClientProviderPriority:
+    """Verify auxiliary client resolution doesn't break for any provider."""
+
+    def test_openrouter_always_wins(self, monkeypatch):
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        from agent.auxiliary_client import get_text_auxiliary_client
+        with patch("agent.auxiliary_client.OpenAI") as mock:
+            client, model = get_text_auxiliary_client()
+        assert model == "google/gemini-3-flash-preview"
+        assert "openrouter" in str(mock.call_args.kwargs["base_url"]).lower()
+
+    def test_nous_when_no_openrouter(self, monkeypatch):
+        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+        from agent.auxiliary_client import get_text_auxiliary_client
+        with patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "nous-tok"}), \
+             patch("agent.auxiliary_client.OpenAI") as mock:
+            client, model = get_text_auxiliary_client()
+        assert model == "gemini-3-flash"
+
+    def test_custom_endpoint_when_no_nous(self, monkeypatch):
+        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+        monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
+        monkeypatch.setenv("OPENAI_API_KEY", "local-key")
+        from agent.auxiliary_client import get_text_auxiliary_client
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client.OpenAI") as mock:
+            client, model = get_text_auxiliary_client()
+        assert mock.call_args.kwargs["base_url"] == "http://localhost:1234/v1"
+
+    def test_codex_fallback_last_resort(self, monkeypatch):
+        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+        monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+        from agent.auxiliary_client import get_text_auxiliary_client, CodexAuxiliaryClient
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client._read_codex_access_token", return_value="codex-tok"), \
+             patch("agent.auxiliary_client.OpenAI"):
+            client, model = get_text_auxiliary_client()
+        assert model == "gpt-5.3-codex"
+        assert isinstance(client, CodexAuxiliaryClient)
diff --git a/tests/test_run_agent_codex_responses.py b/tests/test_run_agent_codex_responses.py
new file mode 100644
index 000000000..a1e5e817e
--- /dev/null
+++ b/tests/test_run_agent_codex_responses.py
@@ -0,0 +1,748 @@
+import sys
+import types
+from types import SimpleNamespace
+
+import pytest
+
+
+sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
+sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
+sys.modules.setdefault("fal_client", types.SimpleNamespace())
+
+import run_agent
+
+
+def _patch_agent_bootstrap(monkeypatch):
+    monkeypatch.setattr(
+        run_agent,
+        "get_tool_definitions",
+        lambda **kwargs: [
+            {
+                "type": "function",
+                "function": {
+                    "name": "terminal",
+                    "description": "Run shell commands.",
+                    "parameters": {"type": "object", "properties": {}},
+                },
+            }
+        ],
+    )
+    monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
+
+
+def _build_agent(monkeypatch):
+    _patch_agent_bootstrap(monkeypatch)
+
+    agent = run_agent.AIAgent(
+        model="gpt-5-codex",
+        base_url="https://chatgpt.com/backend-api/codex",
+        api_key="codex-token",
+        quiet_mode=True,
+        max_iterations=4,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+    agent._cleanup_task_resources = lambda task_id: None
+    agent._persist_session = lambda messages, history=None: None
+    agent._save_trajectory = lambda messages, user_message, completed: None
+    agent._save_session_log = lambda messages: None
+    return agent
+
+
+def _codex_message_response(text: str):
+    return SimpleNamespace(
+        output=[
+            SimpleNamespace(
+                type="message",
+                content=[SimpleNamespace(type="output_text", text=text)],
+            )
+        ],
+        usage=SimpleNamespace(input_tokens=5, output_tokens=3, total_tokens=8),
+        status="completed",
+        model="gpt-5-codex",
+    )
+
+
+def _codex_tool_call_response():
+    return SimpleNamespace(
+        output=[
+            SimpleNamespace(
+                type="function_call",
+                id="fc_1",
+                call_id="call_1",
+                name="terminal",
+                arguments="{}",
+            )
+        ],
+        usage=SimpleNamespace(input_tokens=12, output_tokens=4, total_tokens=16),
+        status="completed",
+        model="gpt-5-codex",
+    )
+
+
+def _codex_incomplete_message_response(text: str):
+    return SimpleNamespace(
+        output=[
+            SimpleNamespace(
+                type="message",
+                status="in_progress",
+                content=[SimpleNamespace(type="output_text", text=text)],
+            )
+        ],
+        usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
+        status="in_progress",
+        model="gpt-5-codex",
+    )
+
+
+def _codex_commentary_message_response(text: str):
+    return SimpleNamespace(
+        output=[
+            SimpleNamespace(
+                type="message",
+                phase="commentary",
+                status="completed",
+                content=[SimpleNamespace(type="output_text", text=text)],
+            )
+        ],
+        usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
+        status="completed",
+        model="gpt-5-codex",
+    )
+
+
+def _codex_ack_message_response(text: str):
+    return SimpleNamespace(
+        output=[
+            SimpleNamespace(
+                type="message",
+                status="completed",
+                content=[SimpleNamespace(type="output_text", text=text)],
+            )
+        ],
+        usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
+        status="completed",
+        model="gpt-5-codex",
+    )
+
+
+class _FakeResponsesStream:
+    def __init__(self, *, final_response=None, final_error=None):
+        self._final_response = final_response
+        self._final_error = final_error
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc, tb):
+        return False
+
+    def __iter__(self):
+        return iter(())
+
+    def get_final_response(self):
+        if self._final_error is not None:
+            raise self._final_error
+        return self._final_response
+
+
+class _FakeCreateStream:
+    def __init__(self, events):
+        self._events = list(events)
+        self.closed = False
+
+    def __iter__(self):
+        return iter(self._events)
+
+    def close(self):
+        self.closed = True
+
+
+def _codex_request_kwargs():
+    return {
+        "model": "gpt-5-codex",
+        "instructions": "You are Hermes.",
+        "input": [{"role": "user", "content": "Ping"}],
+        "tools": None,
+        "store": False,
+    }
+
+
+def test_api_mode_uses_explicit_provider_when_codex(monkeypatch):
+    _patch_agent_bootstrap(monkeypatch)
+    agent = run_agent.AIAgent(
+        model="gpt-5-codex",
+        base_url="https://openrouter.ai/api/v1",
+        provider="openai-codex",
+        api_key="codex-token",
+        quiet_mode=True,
+        max_iterations=1,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+    assert agent.api_mode == "codex_responses"
+    assert agent.provider == "openai-codex"
+
+
+def test_api_mode_normalizes_provider_case(monkeypatch):
+    _patch_agent_bootstrap(monkeypatch)
+    agent = run_agent.AIAgent(
+        model="gpt-5-codex",
+        base_url="https://openrouter.ai/api/v1",
+        provider="OpenAI-Codex",
+        api_key="codex-token",
+        quiet_mode=True,
+        max_iterations=1,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+    assert agent.provider == "openai-codex"
+    assert agent.api_mode == "codex_responses"
+
+
+def test_api_mode_respects_explicit_openrouter_provider_over_codex_url(monkeypatch):
+    _patch_agent_bootstrap(monkeypatch)
+    agent = run_agent.AIAgent(
+        model="gpt-5-codex",
+        base_url="https://chatgpt.com/backend-api/codex",
+        provider="openrouter",
+        api_key="test-token",
+        quiet_mode=True,
+        max_iterations=1,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+    assert agent.api_mode == "chat_completions"
+    assert agent.provider == "openrouter"
+
+
+def test_build_api_kwargs_codex(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    kwargs = agent._build_api_kwargs(
+        [
+            {"role": "system", "content": "You are Hermes."},
+            {"role": "user", "content": "Ping"},
+        ]
+    )
+
+    assert kwargs["model"] == "gpt-5-codex"
+    assert kwargs["instructions"] == "You are Hermes."
+    assert kwargs["store"] is False
+    assert isinstance(kwargs["input"], list)
+    assert kwargs["input"][0]["role"] == "user"
+    assert kwargs["tools"][0]["type"] == "function"
+    assert kwargs["tools"][0]["name"] == "terminal"
+    assert kwargs["tools"][0]["strict"] is False
+    assert "function" not in kwargs["tools"][0]
+    assert kwargs["store"] is False
+    assert "timeout" not in kwargs
+    assert "max_tokens" not in kwargs
+    assert "extra_body" not in kwargs
+
+
+def test_run_codex_stream_retries_when_completed_event_missing(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    calls = {"stream": 0}
+
+    def _fake_stream(**kwargs):
+        calls["stream"] += 1
+        if calls["stream"] == 1:
+            return _FakeResponsesStream(
+                final_error=RuntimeError("Didn't receive a `response.completed` event.")
+            )
+        return _FakeResponsesStream(final_response=_codex_message_response("stream ok"))
+
+    agent.client = SimpleNamespace(
+        responses=SimpleNamespace(
+            stream=_fake_stream,
+            create=lambda **kwargs: _codex_message_response("fallback"),
+        )
+    )
+
+    response = agent._run_codex_stream(_codex_request_kwargs())
+    assert calls["stream"] == 2
+    assert response.output[0].content[0].text == "stream ok"
+
+
+def test_run_codex_stream_falls_back_to_create_after_stream_completion_error(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    calls = {"stream": 0, "create": 0}
+
+    def _fake_stream(**kwargs):
+        calls["stream"] += 1
+        return _FakeResponsesStream(
+            final_error=RuntimeError("Didn't receive a `response.completed` event.")
+        )
+
+    def _fake_create(**kwargs):
+        calls["create"] += 1
+        return _codex_message_response("create fallback ok")
+
+    agent.client = SimpleNamespace(
+        responses=SimpleNamespace(
+            stream=_fake_stream,
+            create=_fake_create,
+        )
+    )
+
+    response = agent._run_codex_stream(_codex_request_kwargs())
+    assert calls["stream"] == 2
+    assert calls["create"] == 1
+    assert response.output[0].content[0].text == "create fallback ok"
+
+
+def test_run_codex_stream_fallback_parses_create_stream_events(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    calls = {"stream": 0, "create": 0}
+    create_stream = _FakeCreateStream(
+        [
+            SimpleNamespace(type="response.created"),
+            SimpleNamespace(type="response.in_progress"),
+            SimpleNamespace(type="response.completed", response=_codex_message_response("streamed create ok")),
+        ]
+    )
+
+    def _fake_stream(**kwargs):
+        calls["stream"] += 1
+        return _FakeResponsesStream(
+            final_error=RuntimeError("Didn't receive a `response.completed` event.")
+        )
+
+    def _fake_create(**kwargs):
+        calls["create"] += 1
+        assert kwargs.get("stream") is True
+        return create_stream
+
+    agent.client = SimpleNamespace(
+        responses=SimpleNamespace(
+            stream=_fake_stream,
+            create=_fake_create,
+        )
+    )
+
+    response = agent._run_codex_stream(_codex_request_kwargs())
+    assert calls["stream"] == 2
+    assert calls["create"] == 1
+    assert create_stream.closed is True
+    assert response.output[0].content[0].text == "streamed create ok"
+
+
+def test_run_conversation_codex_plain_text(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: _codex_message_response("OK"))
+
+    result = agent.run_conversation("Say OK")
+
+    assert result["completed"] is True
+    assert result["final_response"] == "OK"
+    assert result["messages"][-1]["role"] == "assistant"
+    assert result["messages"][-1]["content"] == "OK"
+
+
+def test_run_conversation_codex_refreshes_after_401_and_retries(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    calls = {"api": 0, "refresh": 0}
+
+    class _UnauthorizedError(RuntimeError):
+        def __init__(self):
+            super().__init__("Error code: 401 - unauthorized")
+            self.status_code = 401
+
+    def _fake_api_call(api_kwargs):
+        calls["api"] += 1
+        if calls["api"] == 1:
+            raise _UnauthorizedError()
+        return _codex_message_response("Recovered after refresh")
+
+    def _fake_refresh(*, force=True):
+        calls["refresh"] += 1
+        assert force is True
+        return True
+
+    monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call)
+    monkeypatch.setattr(agent, "_try_refresh_codex_client_credentials", _fake_refresh)
+
+    result = agent.run_conversation("Say OK")
+
+    assert calls["api"] == 2
+    assert calls["refresh"] == 1
+    assert result["completed"] is True
+    assert result["final_response"] == "Recovered after refresh"
+
+
+def test_try_refresh_codex_client_credentials_rebuilds_client(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    closed = {"value": False}
+    rebuilt = {"kwargs": None}
+
+    class _ExistingClient:
+        def close(self):
+            closed["value"] = True
+
+    class _RebuiltClient:
+        pass
+
+    def _fake_openai(**kwargs):
+        rebuilt["kwargs"] = kwargs
+        return _RebuiltClient()
+
+    monkeypatch.setattr(
+        "hermes_cli.auth.resolve_codex_runtime_credentials",
+        lambda force_refresh=True: {
+            "api_key": "new-codex-token",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+        },
+    )
+    monkeypatch.setattr(run_agent, "OpenAI", _fake_openai)
+
+    agent.client = _ExistingClient()
+    ok = agent._try_refresh_codex_client_credentials(force=True)
+
+    assert ok is True
+    assert closed["value"] is True
+    assert rebuilt["kwargs"]["api_key"] == "new-codex-token"
+    assert rebuilt["kwargs"]["base_url"] == "https://chatgpt.com/backend-api/codex"
+    assert isinstance(agent.client, _RebuiltClient)
+
+
+def test_run_conversation_codex_tool_round_trip(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    responses = [_codex_tool_call_response(), _codex_message_response("done")]
+    monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
+
+    def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
+        for call in assistant_message.tool_calls:
+            messages.append(
+                {
+                    "role": "tool",
+                    "tool_call_id": call.id,
+                    "content": '{"ok":true}',
+                }
+            )
+
+    monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
+
+    result = agent.run_conversation("run a command")
+
+    assert result["completed"] is True
+    assert result["final_response"] == "done"
+    assert any(msg.get("tool_calls") for msg in result["messages"] if msg.get("role") == "assistant")
+    assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
+
+
+def test_chat_messages_to_responses_input_uses_call_id_for_function_call(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    items = agent._chat_messages_to_responses_input(
+        [
+            {"role": "user", "content": "Run terminal"},
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "call_abc123",
+                        "type": "function",
+                        "function": {"name": "terminal", "arguments": "{}"},
+                    }
+                ],
+            },
+            {"role": "tool", "tool_call_id": "call_abc123", "content": '{"ok":true}'},
+        ]
+    )
+
+    function_call = next(item for item in items if item.get("type") == "function_call")
+    function_output = next(item for item in items if item.get("type") == "function_call_output")
+
+    assert function_call["call_id"] == "call_abc123"
+    assert "id" not in function_call
+    assert function_output["call_id"] == "call_abc123"
+
+
+def test_chat_messages_to_responses_input_accepts_call_pipe_fc_ids(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    items = agent._chat_messages_to_responses_input(
+        [
+            {"role": "user", "content": "Run terminal"},
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "call_pair123|fc_pair123",
+                        "type": "function",
+                        "function": {"name": "terminal", "arguments": "{}"},
+                    }
+                ],
+            },
+            {"role": "tool", "tool_call_id": "call_pair123|fc_pair123", "content": '{"ok":true}'},
+        ]
+    )
+
+    function_call = next(item for item in items if item.get("type") == "function_call")
+    function_output = next(item for item in items if item.get("type") == "function_call_output")
+
+    assert function_call["call_id"] == "call_pair123"
+    assert "id" not in function_call
+    assert function_output["call_id"] == "call_pair123"
+
+
+def test_preflight_codex_api_kwargs_strips_optional_function_call_id(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    preflight = agent._preflight_codex_api_kwargs(
+        {
+            "model": "gpt-5-codex",
+            "instructions": "You are Hermes.",
+            "input": [
+                {"role": "user", "content": "hi"},
+                {
+                    "type": "function_call",
+                    "id": "call_bad",
+                    "call_id": "call_good",
+                    "name": "terminal",
+                    "arguments": "{}",
+                },
+            ],
+            "tools": [],
+            "store": False,
+        }
+    )
+
+    fn_call = next(item for item in preflight["input"] if item.get("type") == "function_call")
+    assert fn_call["call_id"] == "call_good"
+    assert "id" not in fn_call
+
+
+def test_preflight_codex_api_kwargs_rejects_function_call_output_without_call_id(monkeypatch):
+    agent = _build_agent(monkeypatch)
+
+    with pytest.raises(ValueError, match="function_call_output is missing call_id"):
+        agent._preflight_codex_api_kwargs(
+            {
+                "model": "gpt-5-codex",
+                "instructions": "You are Hermes.",
+                "input": [{"type": "function_call_output", "output": "{}"}],
+                "tools": [],
+                "store": False,
+            }
+        )
+
+
+def test_preflight_codex_api_kwargs_rejects_unsupported_request_fields(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    kwargs = _codex_request_kwargs()
+    kwargs["some_unknown_field"] = "value"
+
+    with pytest.raises(ValueError, match="unsupported field"):
+        agent._preflight_codex_api_kwargs(kwargs)
+
+
+def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    kwargs = _codex_request_kwargs()
+    kwargs["reasoning"] = {"effort": "high", "summary": "auto"}
+    kwargs["include"] = ["reasoning.encrypted_content"]
+    kwargs["temperature"] = 0.7
+    kwargs["max_output_tokens"] = 4096
+
+    result = agent._preflight_codex_api_kwargs(kwargs)
+    assert result["reasoning"] == {"effort": "high", "summary": "auto"}
+    assert result["include"] == ["reasoning.encrypted_content"]
+    assert result["temperature"] == 0.7
+    assert result["max_output_tokens"] == 4096
+
+
+def test_run_conversation_codex_replay_payload_keeps_call_id(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    responses = [_codex_tool_call_response(), _codex_message_response("done")]
+    requests = []
+
+    def _fake_api_call(api_kwargs):
+        requests.append(api_kwargs)
+        return responses.pop(0)
+
+    monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call)
+
+    def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
+        for call in assistant_message.tool_calls:
+            messages.append(
+                {
+                    "role": "tool",
+                    "tool_call_id": call.id,
+                    "content": '{"ok":true}',
+                }
+            )
+
+    monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
+
+    result = agent.run_conversation("run a command")
+
+    assert result["completed"] is True
+    assert result["final_response"] == "done"
+    assert len(requests) >= 2
+
+    replay_input = requests[1]["input"]
+    function_call = next(item for item in replay_input if item.get("type") == "function_call")
+    function_output = next(item for item in replay_input if item.get("type") == "function_call_output")
+    assert function_call["call_id"] == "call_1"
+    assert "id" not in function_call
+    assert function_output["call_id"] == "call_1"
+
+
+def test_run_conversation_codex_continues_after_incomplete_interim_message(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    responses = [
+        _codex_incomplete_message_response("I'll inspect the repo structure first."),
+        _codex_tool_call_response(),
+        _codex_message_response("Architecture summary complete."),
+    ]
+    monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
+
+    def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
+        for call in assistant_message.tool_calls:
+            messages.append(
+                {
+                    "role": "tool",
+                    "tool_call_id": call.id,
+                    "content": '{"ok":true}',
+                }
+            )
+
+    monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
+
+    result = agent.run_conversation("analyze repo")
+
+    assert result["completed"] is True
+    assert result["final_response"] == "Architecture summary complete."
+    assert any(
+        msg.get("role") == "assistant"
+        and msg.get("finish_reason") == "incomplete"
+        and "inspect the repo structure" in (msg.get("content") or "")
+        for msg in result["messages"]
+    )
+    assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
+
+
+def test_normalize_codex_response_marks_commentary_only_message_as_incomplete(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    assistant_message, finish_reason = agent._normalize_codex_response(
+        _codex_commentary_message_response("I'll inspect the repository first.")
+    )
+
+    assert finish_reason == "incomplete"
+    assert "inspect the repository" in (assistant_message.content or "")
+
+
+def test_run_conversation_codex_continues_after_commentary_phase_message(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    responses = [
+        _codex_commentary_message_response("I'll inspect the repo structure first."),
+        _codex_tool_call_response(),
+        _codex_message_response("Architecture summary complete."),
+    ]
+    monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
+
+    def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
+        for call in assistant_message.tool_calls:
+            messages.append(
+                {
+                    "role": "tool",
+                    "tool_call_id": call.id,
+                    "content": '{"ok":true}',
+                }
+            )
+
+    monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
+
+    result = agent.run_conversation("analyze repo")
+
+    assert result["completed"] is True
+    assert result["final_response"] == "Architecture summary complete."
+    assert any(
+        msg.get("role") == "assistant"
+        and msg.get("finish_reason") == "incomplete"
+        and "inspect the repo structure" in (msg.get("content") or "")
+        for msg in result["messages"]
+    )
+    assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
+
+
+def test_run_conversation_codex_continues_after_ack_stop_message(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    responses = [
+        _codex_ack_message_response(
+            "Absolutely — I can do that. I'll inspect ~/openclaw-studio and report back with a walkthrough."
+        ),
+        _codex_tool_call_response(),
+        _codex_message_response("Architecture summary complete."),
+    ]
+    monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
+
+    def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
+        for call in assistant_message.tool_calls:
+            messages.append(
+                {
+                    "role": "tool",
+                    "tool_call_id": call.id,
+                    "content": '{"ok":true}',
+                }
+            )
+
+    monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
+
+    result = agent.run_conversation("look into ~/openclaw-studio and tell me how it works")
+
+    assert result["completed"] is True
+    assert result["final_response"] == "Architecture summary complete."
+    assert any(
+        msg.get("role") == "assistant"
+        and msg.get("finish_reason") == "incomplete"
+        and "inspect ~/openclaw-studio" in (msg.get("content") or "")
+        for msg in result["messages"]
+    )
+    assert any(
+        msg.get("role") == "user"
+        and "Continue now. Execute the required tool calls" in (msg.get("content") or "")
+        for msg in result["messages"]
+    )
+    assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
+
+
+def test_run_conversation_codex_continues_after_ack_for_directory_listing_prompt(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    responses = [
+        _codex_ack_message_response(
+            "I'll check what's in the current directory and call out 3 notable items."
+        ),
+        _codex_tool_call_response(),
+        _codex_message_response("Directory summary complete."),
+    ]
+    monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
+
+    def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
+        for call in assistant_message.tool_calls:
+            messages.append(
+                {
+                    "role": "tool",
+                    "tool_call_id": call.id,
+                    "content": '{"ok":true}',
+                }
+            )
+
+    monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
+
+    result = agent.run_conversation("look at current directory and list 3 notable things")
+
+    assert result["completed"] is True
+    assert result["final_response"] == "Directory summary complete."
+    assert any(
+        msg.get("role") == "assistant"
+        and msg.get("finish_reason") == "incomplete"
+        and "current directory" in (msg.get("content") or "")
+        for msg in result["messages"]
+    )
+    assert any(
+        msg.get("role") == "user"
+        and "Continue now. Execute the required tool calls" in (msg.get("content") or "")
+        for msg in result["messages"]
+    )
+    assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
diff --git a/tests/test_runtime_provider_resolution.py b/tests/test_runtime_provider_resolution.py
new file mode 100644
index 000000000..af6914092
--- /dev/null
+++ b/tests/test_runtime_provider_resolution.py
@@ -0,0 +1,95 @@
+from hermes_cli import runtime_provider as rp
+
+
+def test_resolve_runtime_provider_codex(monkeypatch):
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openai-codex")
+    monkeypatch.setattr(
+        rp,
+        "resolve_codex_runtime_credentials",
+        lambda: {
+            "provider": "openai-codex",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+            "api_key": "codex-token",
+            "source": "codex-auth-json",
+            "auth_file": "/tmp/auth.json",
+            "codex_home": "/tmp/codex",
+            "last_refresh": "2026-02-26T00:00:00Z",
+        },
+    )
+
+    resolved = rp.resolve_runtime_provider(requested="openai-codex")
+
+    assert resolved["provider"] == "openai-codex"
+    assert resolved["api_mode"] == "codex_responses"
+    assert resolved["base_url"] == "https://chatgpt.com/backend-api/codex"
+    assert resolved["api_key"] == "codex-token"
+    assert resolved["requested_provider"] == "openai-codex"
+
+
+def test_resolve_runtime_provider_openrouter_explicit(monkeypatch):
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
+    monkeypatch.setattr(rp, "_get_model_config", lambda: {})
+    monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+
+    resolved = rp.resolve_runtime_provider(
+        requested="openrouter",
+        explicit_api_key="test-key",
+        explicit_base_url="https://example.com/v1/",
+    )
+
+    assert resolved["provider"] == "openrouter"
+    assert resolved["api_mode"] == "chat_completions"
+    assert resolved["api_key"] == "test-key"
+    assert resolved["base_url"] == "https://example.com/v1"
+    assert resolved["source"] == "explicit"
+
+
+def test_resolve_runtime_provider_openrouter_ignores_codex_config_base_url(monkeypatch):
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
+    monkeypatch.setattr(
+        rp,
+        "_get_model_config",
+        lambda: {
+            "provider": "openai-codex",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+        },
+    )
+    monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+
+    resolved = rp.resolve_runtime_provider(requested="openrouter")
+
+    assert resolved["provider"] == "openrouter"
+    assert resolved["base_url"] == rp.OPENROUTER_BASE_URL
+
+
+def test_resolve_runtime_provider_auto_uses_custom_config_base_url(monkeypatch):
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
+    monkeypatch.setattr(
+        rp,
+        "_get_model_config",
+        lambda: {
+            "provider": "auto",
+            "base_url": "https://custom.example/v1/",
+        },
+    )
+    monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+
+    resolved = rp.resolve_runtime_provider(requested="auto")
+
+    assert resolved["provider"] == "openrouter"
+    assert resolved["base_url"] == "https://custom.example/v1"
+
+
+def test_resolve_requested_provider_precedence(monkeypatch):
+    monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "nous")
+    monkeypatch.setattr(rp, "_get_model_config", lambda: {"provider": "openai-codex"})
+    assert rp.resolve_requested_provider("openrouter") == "openrouter"
diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py
index 5d5bb2c7c..948af4d0f 100644
--- a/tests/tools/test_delegate.py
+++ b/tests/tools/test_delegate.py
@@ -30,6 +30,9 @@ def _make_mock_parent(depth=0):
     """Create a mock parent agent with the fields delegate_task expects."""
     parent = MagicMock()
     parent.base_url = "https://openrouter.ai/api/v1"
+    parent.api_key = "parent-key"
+    parent.provider = "openrouter"
+    parent.api_mode = "chat_completions"
     parent.model = "anthropic/claude-sonnet-4"
     parent.platform = "cli"
     parent.providers_allowed = None
@@ -218,6 +221,30 @@ class TestDelegateTask(unittest.TestCase):
             delegate_task(goal="Test tracking", parent_agent=parent)
             self.assertEqual(len(parent._active_children), 0)
 
+    def test_child_inherits_runtime_credentials(self):
+        parent = _make_mock_parent(depth=0)
+        parent.base_url = "https://chatgpt.com/backend-api/codex"
+        parent.api_key = "codex-token"
+        parent.provider = "openai-codex"
+        parent.api_mode = "codex_responses"
+
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = MagicMock()
+            mock_child.run_conversation.return_value = {
+                "final_response": "ok",
+                "completed": True,
+                "api_calls": 1,
+            }
+            MockAgent.return_value = mock_child
+
+            delegate_task(goal="Test runtime inheritance", parent_agent=parent)
+
+            _, kwargs = MockAgent.call_args
+            self.assertEqual(kwargs["base_url"], parent.base_url)
+            self.assertEqual(kwargs["api_key"], parent.api_key)
+            self.assertEqual(kwargs["provider"], parent.provider)
+            self.assertEqual(kwargs["api_mode"], parent.api_mode)
+
 
 class TestBlockedTools(unittest.TestCase):
     def test_blocked_tools_constant(self):
diff --git a/tests/tools/test_file_tools_live.py b/tests/tools/test_file_tools_live.py
new file mode 100644
index 000000000..99627b91a
--- /dev/null
+++ b/tests/tools/test_file_tools_live.py
@@ -0,0 +1,483 @@
+"""Live integration tests for file operations and terminal tools.
+
+These tests run REAL commands through the LocalEnvironment -- no mocks.
+They verify that shell noise is properly filtered, commands actually work,
+and the tool outputs are EXACTLY what the agent would see.
+
+Every test with output validates against a known-good value AND
+asserts zero contamination from shell noise via _assert_clean().
+"""
+
+import json
+import os
+import sys
+from pathlib import Path
+
+import pytest
+
+sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
+
+from tools.environments.local import LocalEnvironment, _clean_shell_noise, _SHELL_NOISE_SUBSTRINGS
+from tools.file_operations import ShellFileOperations
+
+
+# ── Shared noise detection ───────────────────────────────────────────────
+# Every known shell noise pattern. If ANY of these appear in output that
+# isn't explicitly expected, the test fails with a clear message.
+
+_ALL_NOISE_PATTERNS = list(_SHELL_NOISE_SUBSTRINGS) + [
+    "bash: ",
+    "Inappropriate ioctl",
+]
+
+
+def _assert_clean(text: str, context: str = "output"):
+    """Assert text contains zero shell noise contamination."""
+    if not text:
+        return
+    for noise in _ALL_NOISE_PATTERNS:
+        assert noise not in text, (
+            f"Shell noise leaked into {context}: found {noise!r} in:\n"
+            f"{text[:500]}"
+        )
+
+
+# ── Fixtures ─────────────────────────────────────────────────────────────
+
+# Deterministic file content used across tests. Every byte is known,
+# so any unexpected text in results is immediately caught.
+SIMPLE_CONTENT = "alpha\nbravo\ncharlie\n"
+NUMBERED_CONTENT = "\n".join(f"LINE_{i:04d}" for i in range(1, 51)) + "\n"
+SPECIAL_CONTENT = "single 'quotes' and \"doubles\" and $VARS and `backticks` and \\backslash\n"
+MULTIFILE_A = "def func_alpha():\n    return 42\n"
+MULTIFILE_B = "def func_bravo():\n    return 99\n"
+MULTIFILE_C = "nothing relevant here\n"
+
+
+@pytest.fixture
+def env(tmp_path):
+    """A real LocalEnvironment rooted in a temp directory."""
+    return LocalEnvironment(cwd=str(tmp_path), timeout=15)
+
+
+@pytest.fixture
+def ops(env, tmp_path):
+    """ShellFileOperations wired to the real local environment."""
+    return ShellFileOperations(env, cwd=str(tmp_path))
+
+
+@pytest.fixture
+def populated_dir(tmp_path):
+    """A temp directory with known files for search/read tests."""
+    (tmp_path / "alpha.py").write_text(MULTIFILE_A)
+    (tmp_path / "bravo.py").write_text(MULTIFILE_B)
+    (tmp_path / "notes.txt").write_text(MULTIFILE_C)
+    (tmp_path / "data.csv").write_text("col1,col2\n1,2\n3,4\n")
+    return tmp_path
+
+
+# ── _clean_shell_noise unit tests ────────────────────────────────────────
+
+class TestCleanShellNoise:
+    def test_single_noise_line(self):
+        output = "bash: no job control in this shell\nhello world\n"
+        result = _clean_shell_noise(output)
+        assert result == "hello world\n"
+
+    def test_double_noise_lines(self):
+        output = (
+            "bash: cannot set terminal process group (-1): Inappropriate ioctl for device\n"
+            "bash: no job control in this shell\n"
+            "actual output here\n"
+        )
+        result = _clean_shell_noise(output)
+        assert result == "actual output here\n"
+        _assert_clean(result)
+
+    def test_tcsetattr_noise(self):
+        output = (
+            "bash: [12345: 2 (255)] tcsetattr: Inappropriate ioctl for device\n"
+            "real content\n"
+        )
+        result = _clean_shell_noise(output)
+        assert result == "real content\n"
+        _assert_clean(result)
+
+    def test_triple_noise_lines(self):
+        output = (
+            "bash: cannot set terminal process group (-1): Inappropriate ioctl for device\n"
+            "bash: no job control in this shell\n"
+            "bash: [999: 2 (255)] tcsetattr: Inappropriate ioctl for device\n"
+            "clean\n"
+        )
+        result = _clean_shell_noise(output)
+        assert result == "clean\n"
+
+    def test_no_noise_untouched(self):
+        assert _clean_shell_noise("hello\nworld\n") == "hello\nworld\n"
+
+    def test_empty_string(self):
+        assert _clean_shell_noise("") == ""
+
+    def test_only_noise_produces_empty(self):
+        output = "bash: no job control in this shell\n"
+        result = _clean_shell_noise(output)
+        _assert_clean(result)
+
+    def test_noise_in_middle_not_stripped(self):
+        """Only LEADING noise is stripped -- noise in the middle is real output."""
+        output = "real\nbash: no job control in this shell\nmore real\n"
+        result = _clean_shell_noise(output)
+        assert result == output
+
+
+# ── LocalEnvironment.execute() ───────────────────────────────────────────
+
+class TestLocalEnvironmentExecute:
+    def test_echo_exact_output(self, env):
+        result = env.execute("echo DETERMINISTIC_OUTPUT_12345")
+        assert result["returncode"] == 0
+        assert result["output"].strip() == "DETERMINISTIC_OUTPUT_12345"
+        _assert_clean(result["output"])
+
+    def test_printf_no_trailing_newline(self, env):
+        result = env.execute("printf 'exact'")
+        assert result["returncode"] == 0
+        assert result["output"] == "exact"
+        _assert_clean(result["output"])
+
+    def test_exit_code_propagated(self, env):
+        result = env.execute("exit 42")
+        assert result["returncode"] == 42
+
+    def test_stderr_captured_in_output(self, env):
+        result = env.execute("echo STDERR_TEST >&2")
+        assert "STDERR_TEST" in result["output"]
+        _assert_clean(result["output"])
+
+    def test_cwd_respected(self, env, tmp_path):
+        subdir = tmp_path / "subdir_test"
+        subdir.mkdir()
+        result = env.execute("pwd", cwd=str(subdir))
+        assert result["returncode"] == 0
+        assert result["output"].strip() == str(subdir)
+        _assert_clean(result["output"])
+
+    def test_multiline_exact(self, env):
+        result = env.execute("echo AAA; echo BBB; echo CCC")
+        lines = [l for l in result["output"].strip().split("\n") if l.strip()]
+        assert lines == ["AAA", "BBB", "CCC"]
+        _assert_clean(result["output"])
+
+    def test_env_var_home(self, env):
+        result = env.execute("echo $HOME")
+        assert result["returncode"] == 0
+        home = result["output"].strip()
+        assert home == str(Path.home())
+        _assert_clean(result["output"])
+
+    def test_pipe_exact(self, env):
+        result = env.execute("echo 'one two three' | wc -w")
+        assert result["returncode"] == 0
+        assert result["output"].strip() == "3"
+        _assert_clean(result["output"])
+
+    def test_cat_deterministic_content(self, env, tmp_path):
+        f = tmp_path / "det.txt"
+        f.write_text(SIMPLE_CONTENT)
+        result = env.execute(f"cat {f}")
+        assert result["returncode"] == 0
+        assert result["output"] == SIMPLE_CONTENT
+        _assert_clean(result["output"])
+
+
+# ── _has_command ─────────────────────────────────────────────────────────
+
+class TestHasCommand:
+    def test_finds_echo(self, ops):
+        assert ops._has_command("echo") is True
+
+    def test_finds_cat(self, ops):
+        assert ops._has_command("cat") is True
+
+    def test_finds_sed(self, ops):
+        assert ops._has_command("sed") is True
+
+    def test_finds_wc(self, ops):
+        assert ops._has_command("wc") is True
+
+    def test_finds_find(self, ops):
+        assert ops._has_command("find") is True
+
+    def test_missing_command(self, ops):
+        assert ops._has_command("nonexistent_tool_xyz_abc_999") is False
+
+    def test_rg_or_grep_available(self, ops):
+        assert ops._has_command("rg") or ops._has_command("grep"), \
+            "Neither rg nor grep found -- search_files will break"
+
+
+# ── read_file ────────────────────────────────────────────────────────────
+
+class TestReadFile:
+    def test_exact_content(self, ops, tmp_path):
+        f = tmp_path / "exact.txt"
+        f.write_text(SIMPLE_CONTENT)
+        result = ops.read_file(str(f))
+        assert result.error is None
+        # Content has line numbers prepended, check the actual text is there
+        assert "alpha" in result.content
+        assert "bravo" in result.content
+        assert "charlie" in result.content
+        assert result.total_lines == 3
+        _assert_clean(result.content)
+
+    def test_absolute_path(self, ops, tmp_path):
+        f = tmp_path / "abs.txt"
+        f.write_text("ABSOLUTE_PATH_CONTENT\n")
+        result = ops.read_file(str(f))
+        assert result.error is None
+        assert "ABSOLUTE_PATH_CONTENT" in result.content
+        _assert_clean(result.content)
+
+    def test_tilde_expansion(self, ops):
+        test_path = Path.home() / ".hermes_test_tilde_9f8a7b"
+        try:
+            test_path.write_text("TILDE_EXPANSION_OK\n")
+            result = ops.read_file("~/.hermes_test_tilde_9f8a7b")
+            assert result.error is None
+            assert "TILDE_EXPANSION_OK" in result.content
+            _assert_clean(result.content)
+        finally:
+            test_path.unlink(missing_ok=True)
+
+    def test_nonexistent_returns_error(self, ops, tmp_path):
+        result = ops.read_file(str(tmp_path / "ghost.txt"))
+        assert result.error is not None
+
+    def test_pagination_exact_window(self, ops, tmp_path):
+        f = tmp_path / "numbered.txt"
+        f.write_text(NUMBERED_CONTENT)
+        result = ops.read_file(str(f), offset=10, limit=5)
+        assert result.error is None
+        assert "LINE_0010" in result.content
+        assert "LINE_0014" in result.content
+        assert "LINE_0009" not in result.content
+        assert "LINE_0015" not in result.content
+        assert result.total_lines == 50
+        _assert_clean(result.content)
+
+    def test_no_noise_in_content(self, ops, tmp_path):
+        f = tmp_path / "noise_check.txt"
+        f.write_text("ONLY_THIS_CONTENT\n")
+        result = ops.read_file(str(f))
+        assert result.error is None
+        _assert_clean(result.content)
+
+
+# ── write_file ───────────────────────────────────────────────────────────
+
+class TestWriteFile:
+    def test_write_and_verify(self, ops, tmp_path):
+        path = str(tmp_path / "written.txt")
+        result = ops.write_file(path, SIMPLE_CONTENT)
+        assert result.error is None
+        assert result.bytes_written == len(SIMPLE_CONTENT.encode())
+        assert Path(path).read_text() == SIMPLE_CONTENT
+
+    def test_creates_nested_dirs(self, ops, tmp_path):
+        path = str(tmp_path / "a" / "b" / "c" / "deep.txt")
+        result = ops.write_file(path, "DEEP_CONTENT\n")
+        assert result.error is None
+        assert result.dirs_created is True
+        assert Path(path).read_text() == "DEEP_CONTENT\n"
+
+    def test_overwrites_exact(self, ops, tmp_path):
+        path = str(tmp_path / "overwrite.txt")
+        Path(path).write_text("OLD_DATA\n")
+        result = ops.write_file(path, "NEW_DATA\n")
+        assert result.error is None
+        assert Path(path).read_text() == "NEW_DATA\n"
+
+    def test_large_content_via_stdin(self, ops, tmp_path):
+        path = str(tmp_path / "large.txt")
+        content = "X" * 200_000 + "\n"
+        result = ops.write_file(path, content)
+        assert result.error is None
+        assert Path(path).read_text() == content
+
+    def test_special_characters_preserved(self, ops, tmp_path):
+        path = str(tmp_path / "special.txt")
+        result = ops.write_file(path, SPECIAL_CONTENT)
+        assert result.error is None
+        assert Path(path).read_text() == SPECIAL_CONTENT
+
+    def test_roundtrip_read_write(self, ops, tmp_path):
+        """Write -> read back -> verify exact match."""
+        path = str(tmp_path / "roundtrip.txt")
+        ops.write_file(path, SIMPLE_CONTENT)
+        result = ops.read_file(path)
+        assert result.error is None
+        assert "alpha" in result.content
+        assert "charlie" in result.content
+        _assert_clean(result.content)
+
+
+# ── patch_replace ────────────────────────────────────────────────────────
+
+class TestPatchReplace:
+    def test_exact_replacement(self, ops, tmp_path):
+        path = str(tmp_path / "patch.txt")
+        Path(path).write_text("hello world\n")
+        result = ops.patch_replace(path, "world", "earth")
+        assert result.error is None
+        assert Path(path).read_text() == "hello earth\n"
+
+    def test_not_found_error(self, ops, tmp_path):
+        path = str(tmp_path / "patch2.txt")
+        Path(path).write_text("hello\n")
+        result = ops.patch_replace(path, "NONEXISTENT_STRING", "replacement")
+        assert result.error is not None
+        assert "Could not find" in result.error
+
+    def test_multiline_patch(self, ops, tmp_path):
+        path = str(tmp_path / "multi.txt")
+        Path(path).write_text("line1\nline2\nline3\n")
+        result = ops.patch_replace(path, "line2", "REPLACED")
+        assert result.error is None
+        assert Path(path).read_text() == "line1\nREPLACED\nline3\n"
+
+
+# ── search ───────────────────────────────────────────────────────────────
+
+class TestSearch:
+    def test_content_search_finds_exact_match(self, ops, populated_dir):
+        result = ops.search("func_alpha", str(populated_dir), target="content")
+        assert result.error is None
+        assert result.total_count >= 1
+        assert any("func_alpha" in m.content for m in result.matches)
+        for m in result.matches:
+            _assert_clean(m.content)
+            _assert_clean(m.path)
+
+    def test_content_search_no_false_positives(self, ops, populated_dir):
+        result = ops.search("ZZZZZ_NONEXISTENT", str(populated_dir), target="content")
+        assert result.error is None
+        assert result.total_count == 0
+        assert len(result.matches) == 0
+
+    def test_file_search_finds_py_files(self, ops, populated_dir):
+        result = ops.search("*.py", str(populated_dir), target="files")
+        assert result.error is None
+        assert result.total_count >= 2
+        # Verify only expected files appear
+        found_names = set()
+        for f in result.files:
+            name = Path(f).name
+            found_names.add(name)
+            _assert_clean(f)
+        assert "alpha.py" in found_names
+        assert "bravo.py" in found_names
+        assert "notes.txt" not in found_names
+
+    def test_file_search_no_false_file_entries(self, ops, populated_dir):
+        """Every entry in the files list must be a real path, not noise."""
+        result = ops.search("*.py", str(populated_dir), target="files")
+        assert result.error is None
+        for f in result.files:
+            _assert_clean(f)
+            assert Path(f).exists(), f"Search returned non-existent path: {f}"
+
+    def test_content_search_with_glob_filter(self, ops, populated_dir):
+        result = ops.search("return", str(populated_dir), target="content", file_glob="*.py")
+        assert result.error is None
+        for m in result.matches:
+            assert m.path.endswith(".py"), f"Non-py file in results: {m.path}"
+            _assert_clean(m.content)
+            _assert_clean(m.path)
+
+    def test_search_output_has_zero_noise(self, ops, populated_dir):
+        """Dedicated noise check: search must return only real content."""
+        result = ops.search("func", str(populated_dir), target="content")
+        assert result.error is None
+        for m in result.matches:
+            _assert_clean(m.content)
+            _assert_clean(m.path)
+
+
+# ── _expand_path ─────────────────────────────────────────────────────────
+
+class TestExpandPath:
+    def test_tilde_exact(self, ops):
+        result = ops._expand_path("~/test.txt")
+        expected = f"{Path.home()}/test.txt"
+        assert result == expected
+        _assert_clean(result)
+
+    def test_absolute_unchanged(self, ops):
+        assert ops._expand_path("/tmp/test.txt") == "/tmp/test.txt"
+
+    def test_relative_unchanged(self, ops):
+        assert ops._expand_path("relative/path.txt") == "relative/path.txt"
+
+    def test_bare_tilde(self, ops):
+        result = ops._expand_path("~")
+        assert result == str(Path.home())
+        _assert_clean(result)
+
+
+# ── Terminal output cleanliness ──────────────────────────────────────────
+
+class TestTerminalOutputCleanliness:
+    """Every command the agent might run must produce noise-free output."""
+
+    def test_echo(self, env):
+        result = env.execute("echo CLEAN_TEST")
+        assert result["output"].strip() == "CLEAN_TEST"
+        _assert_clean(result["output"])
+
+    def test_cat(self, env, tmp_path):
+        f = tmp_path / "cat_test.txt"
+        f.write_text("CAT_CONTENT_EXACT\n")
+        result = env.execute(f"cat {f}")
+        assert result["output"] == "CAT_CONTENT_EXACT\n"
+        _assert_clean(result["output"])
+
+    def test_ls(self, env, tmp_path):
+        (tmp_path / "file_a.txt").write_text("")
+        (tmp_path / "file_b.txt").write_text("")
+        result = env.execute(f"ls {tmp_path}")
+        _assert_clean(result["output"])
+        assert "file_a.txt" in result["output"]
+        assert "file_b.txt" in result["output"]
+
+    def test_wc(self, env, tmp_path):
+        f = tmp_path / "wc_test.txt"
+        f.write_text("one\ntwo\nthree\n")
+        result = env.execute(f"wc -l < {f}")
+        assert result["output"].strip() == "3"
+        _assert_clean(result["output"])
+
+    def test_head(self, env, tmp_path):
+        f = tmp_path / "head_test.txt"
+        f.write_text(NUMBERED_CONTENT)
+        result = env.execute(f"head -n 3 {f}")
+        expected = "LINE_0001\nLINE_0002\nLINE_0003\n"
+        assert result["output"] == expected
+        _assert_clean(result["output"])
+
+    def test_env_var_expansion(self, env):
+        result = env.execute("echo $HOME")
+        assert result["output"].strip() == str(Path.home())
+        _assert_clean(result["output"])
+
+    def test_command_substitution(self, env):
+        result = env.execute("echo $(echo NESTED)")
+        assert result["output"].strip() == "NESTED"
+        _assert_clean(result["output"])
+
+    def test_command_v_detection(self, env):
+        """This is how _has_command works -- must return clean 'yes'."""
+        result = env.execute("command -v cat >/dev/null 2>&1 && echo 'yes'")
+        assert result["output"].strip() == "yes"
+        _assert_clean(result["output"])
diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py
index ad308c2e4..c960cc36c 100644
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@@ -77,6 +77,85 @@ def _strip_blocked_tools(toolsets: List[str]) -> List[str]:
     return [t for t in toolsets if t not in blocked_toolset_names]
 
 
+def _build_child_progress_callback(task_index: int, parent_agent, task_count: int = 1) -> Optional[callable]:
+    """Build a callback that relays child agent tool calls to the parent display.
+
+    Two display paths:
+      CLI:     prints tree-view lines above the parent's delegation spinner
+      Gateway: batches tool names and relays to parent's progress callback
+
+    Returns None if no display mechanism is available, in which case the
+    child agent runs with no progress callback (identical to current behavior).
+    """
+    spinner = getattr(parent_agent, '_delegate_spinner', None)
+    parent_cb = getattr(parent_agent, 'tool_progress_callback', None)
+
+    if not spinner and not parent_cb:
+        return None  # No display → no callback → zero behavior change
+
+    # Show 1-indexed prefix only in batch mode (multiple tasks)
+    prefix = f"[{task_index + 1}] " if task_count > 1 else ""
+
+    # Gateway: batch tool names, flush periodically
+    _BATCH_SIZE = 5
+    _batch: List[str] = []
+
+    def _callback(tool_name: str, preview: str = None):
+        # Special "_thinking" event: model produced text content (reasoning)
+        if tool_name == "_thinking":
+            if spinner:
+                short = (preview[:55] + "...") if preview and len(preview) > 55 else (preview or "")
+                try:
+                    spinner.print_above(f" {prefix}├─ 💭 \"{short}\"")
+                except Exception:
+                    pass
+            # Don't relay thinking to gateway (too noisy for chat)
+            return
+
+        # Regular tool call event
+        if spinner:
+            short = (preview[:35] + "...") if preview and len(preview) > 35 else (preview or "")
+            tool_emojis = {
+                "terminal": "💻", "web_search": "🔍", "web_extract": "📄",
+                "read_file": "📖", "write_file": "✍️", "patch": "🔧",
+                "search_files": "🔎", "list_directory": "📂",
+                "browser_navigate": "🌐", "browser_click": "👆",
+                "text_to_speech": "🔊", "image_generate": "🎨",
+                "vision_analyze": "👁️", "process": "⚙️",
+            }
+            emoji = tool_emojis.get(tool_name, "⚡")
+            line = f" {prefix}├─ {emoji} {tool_name}"
+            if short:
+                line += f"  \"{short}\""
+            try:
+                spinner.print_above(line)
+            except Exception:
+                pass
+
+        if parent_cb:
+            _batch.append(tool_name)
+            if len(_batch) >= _BATCH_SIZE:
+                summary = ", ".join(_batch)
+                try:
+                    parent_cb("subagent_progress", f"🔀 {prefix}{summary}")
+                except Exception:
+                    pass
+                _batch.clear()
+
+    def _flush():
+        """Flush remaining batched tool names to gateway on completion."""
+        if parent_cb and _batch:
+            summary = ", ".join(_batch)
+            try:
+                parent_cb("subagent_progress", f"🔀 {prefix}{summary}")
+            except Exception:
+                pass
+            _batch.clear()
+
+    _callback._flush = _flush
+    return _callback
+
+
 def _run_single_child(
     task_index: int,
     goal: str,
@@ -85,6 +164,7 @@ def _run_single_child(
     model: Optional[str],
     max_iterations: int,
     parent_agent,
+    task_count: int = 1,
 ) -> Dict[str, Any]:
     """
     Spawn and run a single child agent. Called from within a thread.
@@ -98,37 +178,21 @@ def _run_single_child(
 
     child_prompt = _build_child_system_prompt(goal, context)
 
-    # Build a progress callback that surfaces subagent tool activity.
-    # CLI: updates the parent's delegate spinner text.
-    # Gateway: forwards to the parent's progress callback (feeds message queue).
-    parent_progress_cb = getattr(parent_agent, 'tool_progress_callback', None)
-    def _child_progress(tool_name: str, preview: str = None):
-        tag = f"[subagent-{task_index+1}] {tool_name}"
-        # Update CLI spinner
-        spinner = getattr(parent_agent, '_delegate_spinner', None)
-        if spinner:
-            detail = f'"{preview}"' if preview else ""
-            try:
-                spinner.update_text(f"🔀 {tag} {detail}")
-            except Exception:
-                pass
-        # Forward to gateway progress queue
-        if parent_progress_cb:
-            try:
-                parent_progress_cb(tag, preview)
-            except Exception:
-                pass
-
     try:
-        # Extract parent's API key so subagents inherit auth (e.g. Nous Portal)
-        parent_api_key = None
-        if hasattr(parent_agent, '_client_kwargs'):
+        # Extract parent's API key so subagents inherit auth (e.g. Nous Portal).
+        parent_api_key = getattr(parent_agent, "api_key", None)
+        if (not parent_api_key) and hasattr(parent_agent, "_client_kwargs"):
             parent_api_key = parent_agent._client_kwargs.get("api_key")
 
+        # Build progress callback to relay tool calls to parent display
+        child_progress_cb = _build_child_progress_callback(task_index, parent_agent, task_count)
+
         child = AIAgent(
             base_url=parent_agent.base_url,
             api_key=parent_api_key,
             model=model or parent_agent.model,
+            provider=getattr(parent_agent, "provider", None),
+            api_mode=getattr(parent_agent, "api_mode", None),
             max_iterations=max_iterations,
             enabled_toolsets=child_toolsets,
             quiet_mode=True,
@@ -143,7 +207,7 @@ def _run_single_child(
             providers_ignored=parent_agent.providers_ignored,
             providers_order=parent_agent.providers_order,
             provider_sort=parent_agent.provider_sort,
-            tool_progress_callback=_child_progress,
+            tool_progress_callback=child_progress_cb,
         )
 
         # Set delegation depth so children can't spawn grandchildren
@@ -158,6 +222,13 @@ def _run_single_child(
         with contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(devnull):
             result = child.run_conversation(user_message=goal)
 
+        # Flush any remaining batched progress to gateway
+        if child_progress_cb and hasattr(child_progress_cb, '_flush'):
+            try:
+                child_progress_cb._flush()
+            except Exception:
+                pass
+
         duration = round(time.monotonic() - child_start, 2)
 
         summary = result.get("final_response") or ""
@@ -275,6 +346,7 @@ def delegate_task(
             model=model,
             max_iterations=effective_max_iter,
             parent_agent=parent_agent,
+            task_count=1,
         )
         results.append(result)
     else:
@@ -299,6 +371,7 @@ def delegate_task(
                     model=model,
                     max_iterations=effective_max_iter,
                     parent_agent=parent_agent,
+                    task_count=n_tasks,
                 )
                 futures[future] = i
 
@@ -318,14 +391,21 @@ def delegate_task(
                 results.append(entry)
                 completed_count += 1
 
-                # Print per-task completion line (visible in CLI via patch_stdout)
+                # Print per-task completion line above the spinner
                 idx = entry["task_index"]
                 label = task_labels[idx] if idx < len(task_labels) else f"Task {idx}"
                 dur = entry.get("duration_seconds", 0)
                 status = entry.get("status", "?")
                 icon = "✓" if status == "completed" else "✗"
                 remaining = n_tasks - completed_count
-                print(f"  {icon} [{idx+1}/{n_tasks}] {label}  ({dur}s)")
+                completion_line = f"{icon} [{idx+1}/{n_tasks}] {label}  ({dur}s)"
+                if spinner_ref:
+                    try:
+                        spinner_ref.print_above(completion_line)
+                    except Exception:
+                        print(f"  {completion_line}")
+                else:
+                    print(f"  {completion_line}")
 
                 # Update spinner text to show remaining count
                 if spinner_ref and remaining > 0:
diff --git a/tools/environments/local.py b/tools/environments/local.py
index 6d7e8da3c..5b70a2707 100644
--- a/tools/environments/local.py
+++ b/tools/environments/local.py
@@ -11,20 +11,26 @@ from tools.environments.base import BaseEnvironment
 
 # Noise lines emitted by interactive shells when stdin is not a terminal.
 # Filtered from output to keep tool results clean.
-_SHELL_NOISE = frozenset({
+_SHELL_NOISE_SUBSTRINGS = (
+    "bash: cannot set terminal process group",
     "bash: no job control in this shell",
-    "bash: no job control in this shell\n",
     "no job control in this shell",
-    "no job control in this shell\n",
-})
+    "cannot set terminal process group",
+    "tcsetattr: Inappropriate ioctl for device",
+)
 
 
 def _clean_shell_noise(output: str) -> str:
-    """Strip shell startup warnings that leak when using -i without a TTY."""
-    lines = output.split("\n", 2)  # only check first two lines
-    if lines and lines[0].strip() in _SHELL_NOISE:
-        return "\n".join(lines[1:])
-    return output
+    """Strip shell startup warnings that leak when using -i without a TTY.
+
+    Removes all leading lines that match known noise patterns, not just the first.
+    Some environments emit multiple noise lines (e.g. Docker, non-TTY sessions).
+    """
+    lines = output.split("\n")
+    # Strip all leading noise lines
+    while lines and any(noise in lines[0] for noise in _SHELL_NOISE_SUBSTRINGS):
+        lines.pop(0)
+    return "\n".join(lines)
 
 
 class LocalEnvironment(BaseEnvironment):
diff --git a/tools/openrouter_client.py b/tools/openrouter_client.py
index 7d30e6eec..343cf1021 100644
--- a/tools/openrouter_client.py
+++ b/tools/openrouter_client.py
@@ -31,7 +31,7 @@ def get_async_client() -> AsyncOpenAI:
             default_headers={
                 "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
                 "X-OpenRouter-Title": "Hermes Agent",
-                "X-OpenRouter-Categories": "cli-agent",
+                "X-OpenRouter-Categories": "productivity,cli-agent",
             },
         )
     return _client
diff --git a/tools/process_registry.py b/tools/process_registry.py
index bfdb8cd1d..cbc0dd853 100644
--- a/tools/process_registry.py
+++ b/tools/process_registry.py
@@ -87,13 +87,13 @@ class ProcessRegistry:
       - Cleanup thread (sandbox reaping coordination)
     """
 
-    # Noise lines emitted by interactive shells when stdin is not a terminal.
-    _SHELL_NOISE = frozenset({
+    _SHELL_NOISE_SUBSTRINGS = (
+        "bash: cannot set terminal process group",
         "bash: no job control in this shell",
-        "bash: no job control in this shell\n",
         "no job control in this shell",
-        "no job control in this shell\n",
-    })
+        "cannot set terminal process group",
+        "tcsetattr: Inappropriate ioctl for device",
+    )
 
     def __init__(self):
         self._running: Dict[str, ProcessSession] = {}
@@ -106,10 +106,10 @@ class ProcessRegistry:
     @staticmethod
     def _clean_shell_noise(text: str) -> str:
         """Strip shell startup warnings from the beginning of output."""
-        lines = text.split("\n", 2)
-        if lines and lines[0].strip() in ProcessRegistry._SHELL_NOISE:
-            return "\n".join(lines[1:])
-        return text
+        lines = text.split("\n")
+        while lines and any(noise in lines[0] for noise in ProcessRegistry._SHELL_NOISE_SUBSTRINGS):
+            lines.pop(0)
+        return "\n".join(lines)
 
     # ----- Spawn -----
 
diff --git a/tools/session_search_tool.py b/tools/session_search_tool.py
index bcfbfdf2a..b11b79fda 100644
--- a/tools/session_search_tool.py
+++ b/tools/session_search_tool.py
@@ -24,26 +24,13 @@ from typing import Dict, Any, List, Optional
 
 from openai import AsyncOpenAI, OpenAI
 
-from agent.auxiliary_client import get_text_auxiliary_client
+from agent.auxiliary_client import get_async_text_auxiliary_client
 
-# Resolve the auxiliary client at import time so we have the model slug.
-# We build an AsyncOpenAI from the same credentials for async summarization.
-_aux_client, _SUMMARIZER_MODEL = get_text_auxiliary_client()
-_async_aux_client: AsyncOpenAI | None = None
-if _aux_client is not None:
-    _async_kwargs = {
-        "api_key": _aux_client.api_key,
-        "base_url": str(_aux_client.base_url),
-    }
-    if "openrouter" in str(_aux_client.base_url).lower():
-        _async_kwargs["default_headers"] = {
-            "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
-            "X-OpenRouter-Title": "Hermes Agent",
-            "X-OpenRouter-Categories": "cli-agent",
-        }
-    _async_aux_client = AsyncOpenAI(**_async_kwargs)
+# Resolve the async auxiliary client at import time so we have the model slug.
+# Handles Codex Responses API adapter transparently.
+_async_aux_client, _SUMMARIZER_MODEL = get_async_text_auxiliary_client()
 MAX_SESSION_CHARS = 100_000
-MAX_SUMMARY_TOKENS = 2000
+MAX_SUMMARY_TOKENS = 10000
 
 
 def _format_timestamp(ts) -> str:
diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index cb0d9cd4e..f758768eb 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -1037,8 +1037,12 @@ def terminal_tool(
                 )
                 output = output[:head_chars] + truncated_notice + output[-tail_chars:]
 
+            # Redact secrets from command output (catches env/printenv leaking keys)
+            from agent.redact import redact_sensitive_text
+            output = redact_sensitive_text(output.strip()) if output else ""
+
             return json.dumps({
-                "output": output.strip() if output else "",
+                "output": output,
                 "exit_code": returncode,
                 "error": None
             }, ensure_ascii=False)
diff --git a/tools/vision_tools.py b/tools/vision_tools.py
index 39413d5b0..f3744e95f 100644
--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@@ -54,7 +54,7 @@ if _aux_sync_client is not None:
         _async_kwargs["default_headers"] = {
             "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
             "X-OpenRouter-Title": "Hermes Agent",
-            "X-OpenRouter-Categories": "cli-agent",
+                "X-OpenRouter-Categories": "productivity,cli-agent",
         }
     _aux_async_client = AsyncOpenAI(**_async_kwargs)
 
diff --git a/tools/web_tools.py b/tools/web_tools.py
index 0e5baaa29..541404e6d 100644
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -48,7 +48,7 @@ import asyncio
 from typing import List, Dict, Any, Optional
 from firecrawl import Firecrawl
 from openai import AsyncOpenAI
-from agent.auxiliary_client import get_text_auxiliary_client
+from agent.auxiliary_client import get_async_text_auxiliary_client
 from tools.debug_helpers import DebugSession
 
 logger = logging.getLogger(__name__)
@@ -67,21 +67,9 @@ def _get_firecrawl_client():
 
 DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000
 
-# Resolve auxiliary text client at module level; build an async wrapper.
-_aux_sync_client, DEFAULT_SUMMARIZER_MODEL = get_text_auxiliary_client()
-_aux_async_client: AsyncOpenAI | None = None
-if _aux_sync_client is not None:
-    _async_kwargs = {
-        "api_key": _aux_sync_client.api_key,
-        "base_url": str(_aux_sync_client.base_url),
-    }
-    if "openrouter" in str(_aux_sync_client.base_url).lower():
-        _async_kwargs["default_headers"] = {
-            "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
-            "X-OpenRouter-Title": "Hermes Agent",
-            "X-OpenRouter-Categories": "cli-agent",
-        }
-    _aux_async_client = AsyncOpenAI(**_async_kwargs)
+# Resolve async auxiliary client at module level.
+# Handles Codex Responses API adapter transparently.
+_aux_async_client, DEFAULT_SUMMARIZER_MODEL = get_async_text_auxiliary_client()
 
 _debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG")
 
@@ -174,7 +162,7 @@ async def _call_summarizer_llm(
     content: str, 
     context_str: str, 
     model: str, 
-    max_tokens: int = 4000,
+    max_tokens: int = 20000,
     is_chunk: bool = False,
     chunk_info: str = ""
 ) -> Optional[str]:
@@ -306,7 +294,7 @@ async def _process_large_content_chunked(
                 chunk_content, 
                 context_str, 
                 model, 
-                max_tokens=2000,
+                max_tokens=10000,
                 is_chunk=True,
                 chunk_info=chunk_info
             )
@@ -374,7 +362,7 @@ Create a single, unified markdown summary."""
                 {"role": "user", "content": synthesis_prompt}
             ],
             temperature=0.1,
-            **auxiliary_max_tokens_param(4000),
+            **auxiliary_max_tokens_param(20000),
             **({} if not _extra else {"extra_body": _extra}),
         )
         final_summary = response.choices[0].message.content.strip()