Merge remote-tracking branch 'origin/main' into feature/homeassistant-integration

# Conflicts:
#	run_agent.py
This commit is contained in:
0xbyt4 2026-03-01 11:59:12 +03:00
commit 3fdf03390e
50 changed files with 7354 additions and 358 deletions

View file

@ -10,7 +10,7 @@
OPENROUTER_API_KEY=
# Default model to use (OpenRouter format: provider/model)
# Examples: anthropic/claude-opus-4.6, openai/gpt-4o, google/gemini-2.0-flash, zhipuai/glm-4-plus
# Examples: anthropic/claude-opus-4.6, openai/gpt-4o, google/gemini-3-flash-preview, zhipuai/glm-4-plus
LLM_MODEL=anthropic/claude-opus-4.6
# =============================================================================
@ -200,9 +200,10 @@ IMAGE_TOOLS_DEBUG=false
# When conversation approaches model's context limit, middle turns are
# automatically summarized to free up space.
#
# Context compression is configured in ~/.hermes/config.yaml under compression:
# CONTEXT_COMPRESSION_ENABLED=true # Enable auto-compression (default: true)
# CONTEXT_COMPRESSION_THRESHOLD=0.85 # Compress at 85% of context limit
# CONTEXT_COMPRESSION_MODEL=google/gemini-2.0-flash-001 # Fast model for summaries
# Model is set via compression.summary_model in config.yaml (default: google/gemini-3-flash-preview)
# =============================================================================
# RL TRAINING (Tinker + Atropos)

View file

@ -179,6 +179,7 @@ The interactive CLI uses:
Key components:
- `HermesCLI` class - Main CLI controller with commands and conversation loop
- `SlashCommandCompleter` - Autocomplete dropdown for `/commands` (type `/` to see all)
- `agent/skill_commands.py` - Scans skills and builds invocation messages (shared with gateway)
- `load_cli_config()` - Loads config, sets environment variables for terminal
- `build_welcome_banner()` - Displays ASCII art logo, tools, and skills summary
@ -191,9 +192,22 @@ CLI UX notes:
- Pasting 5+ lines auto-saves to `~/.hermes/pastes/` and collapses to a reference
- Multi-line input via Alt+Enter or Ctrl+J
- `/commands` - Process user commands like `/help`, `/clear`, `/personality`, etc.
- `/skill-name` - Invoke installed skills directly (e.g., `/axolotl`, `/gif-search`)
CLI uses `quiet_mode=True` when creating AIAgent to suppress verbose logging.
### Skill Slash Commands
Every installed skill in `~/.hermes/skills/` is automatically registered as a slash command.
The skill name (from frontmatter or folder name) becomes the command: `axolotl``/axolotl`.
Implementation (`agent/skill_commands.py`, shared between CLI and gateway):
1. `scan_skill_commands()` scans all SKILL.md files at startup
2. `build_skill_invocation_message()` loads the SKILL.md content and builds a user-turn message
3. The message includes the full skill content, a list of supporting files (not loaded), and the user's instruction
4. Supporting files can be loaded on demand via the `skill_view` tool
5. Injected as a **user message** (not system prompt) to preserve prompt caching
### Adding CLI Commands
1. Add to `COMMANDS` dict with description

View file

@ -121,11 +121,14 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro
| Provider | Setup |
|----------|-------|
| **Nous Portal** | `hermes login` (OAuth, subscription-based) |
| **Nous Portal** | `hermes model` (OAuth, subscription-based) |
| **OpenAI Codex** | `hermes model` (ChatGPT OAuth, uses Codex models) |
| **OpenRouter** | `OPENROUTER_API_KEY` in `~/.hermes/.env` |
| **Custom Endpoint** | `OPENAI_BASE_URL` + `OPENAI_API_KEY` in `~/.hermes/.env` |
**Note:** Even when using Nous Portal or a custom endpoint, some tools (vision, web summarization, MoA) use OpenRouter independently. An `OPENROUTER_API_KEY` enables these tools.
**Codex note:** The OpenAI Codex provider authenticates via device code (open a URL, enter a code). Credentials are stored at `~/.codex/auth.json` and auto-refresh. No Codex CLI installation required.
**Note:** Even when using Nous Portal, Codex, or a custom endpoint, some tools (vision, web summarization, MoA) use OpenRouter independently. An `OPENROUTER_API_KEY` enables these tools.
---
@ -143,7 +146,7 @@ All your settings are stored in `~/.hermes/` for easy access:
├── skills/ # Agent-created skills (managed via skill_manage tool)
├── cron/ # Scheduled jobs
├── sessions/ # Gateway sessions
└── logs/ # Logs
└── logs/ # Logs (errors.log, gateway.log — secrets auto-redacted)
```
### Managing Configuration
@ -161,6 +164,19 @@ hermes config set terminal.backend docker
hermes config set OPENROUTER_API_KEY sk-or-... # Saves to .env
```
### Configuration Precedence
Settings are resolved in this order (highest priority first):
1. **CLI arguments**`hermes chat --max-turns 100` (per-invocation override)
2. **`~/.hermes/config.yaml`** — the primary config file for all non-secret settings
3. **`~/.hermes/.env`** — fallback for env vars; **required** for secrets (API keys, tokens, passwords)
4. **Built-in defaults** — hardcoded safe defaults when nothing else is set
**Rule of thumb:** Secrets (API keys, bot tokens, passwords) go in `.env`. Everything else (model, terminal backend, compression settings, memory limits, toolsets) goes in `config.yaml`. When both are set, `config.yaml` wins for non-secret settings.
The `hermes config set` command automatically routes values to the right file — API keys are saved to `.env`, everything else to `config.yaml`.
### Optional API Keys
| Feature | Provider | Env Variable |
@ -277,7 +293,10 @@ See [docs/messaging.md](docs/messaging.md) for advanced WhatsApp configuration.
| `/status` | Show session info |
| `/stop` | Stop the running agent |
| `/sethome` | Set this chat as the home channel |
| `/compress` | Manually compress conversation context |
| `/usage` | Show token usage for this session |
| `/help` | Show available commands |
| `/<skill-name>` | Invoke any installed skill (e.g., `/axolotl`, `/gif-search`) |
### DM Pairing (Alternative to Allowlists)
@ -354,7 +373,7 @@ hermes --resume <id> # Resume a specific session (-r)
# Provider & model management
hermes model # Switch provider and model interactively
hermes login # Authenticate with Nous Portal (OAuth)
hermes model # Select provider and model
hermes logout # Clear stored OAuth credentials
# Configuration
@ -407,7 +426,11 @@ Type `/` to see an autocomplete dropdown of all commands.
| `/cron` | Manage scheduled tasks |
| `/skills` | Search, install, inspect, or manage skills from registries |
| `/platforms` | Show gateway/messaging platform status |
| `/verbose` | Cycle tool progress display: off → new → all → verbose |
| `/compress` | Manually compress conversation context |
| `/usage` | Show token usage for this session |
| `/quit` | Exit (also: `/exit`, `/q`) |
| `/<skill-name>` | Invoke any installed skill (e.g., `/axolotl`, `/gif-search`) |
**Keybindings:**
- `Enter` — send message
@ -694,6 +717,21 @@ hermes cron status # Check if gateway is running
Even if no messaging platforms are configured, the gateway stays running for cron. A file lock prevents duplicate execution if multiple processes overlap.
### 🪝 Event Hooks
Run custom code at key lifecycle points — log activity, send alerts, post to webhooks. Hooks are Python handlers that fire automatically during gateway operation.
```
~/.hermes/hooks/
└── my-hook/
├── HOOK.yaml # name + events to subscribe to
└── handler.py # async def handle(event_type, context)
```
**Available events:** `gateway:startup`, `session:start`, `session:reset`, `agent:start`, `agent:step`, `agent:end`, `command:*` (wildcard — fires for any slash command).
Hooks are non-blocking — errors are caught and logged, never crashing the agent. See [docs/hooks.md](docs/hooks.md) for the full event reference, context keys, and examples.
### 🛡️ Exec Approval (Messaging Platforms)
When the agent tries to run a potentially dangerous command (`rm -rf`, `chmod 777`, etc.) on Telegram/Discord/WhatsApp, instead of blocking it silently, it asks the user for approval:
@ -807,6 +845,22 @@ Skills are on-demand knowledge documents the agent can load when needed. They fo
All skills live in **`~/.hermes/skills/`** -- a single directory that is the source of truth. On fresh install, bundled skills are copied there from the repo. Hub-installed skills and agent-created skills also go here. The agent can modify or delete any skill. `hermes update` adds only genuinely new bundled skills (via a manifest) without overwriting your changes or re-adding skills you deleted.
**Using Skills:**
Every installed skill is automatically available as a slash command — type `/<skill-name>` to invoke it directly:
```bash
# In the CLI or any messaging platform (Telegram, Discord, Slack, WhatsApp):
/gif-search funny cats
/axolotl help me fine-tune Llama 3 on my dataset
/github-pr-workflow create a PR for the auth refactor
# Just the skill name (no prompt) loads the skill and lets the agent ask what you need:
/excalidraw
```
The skill's full instructions (SKILL.md) are loaded into the conversation, and any supporting files (references, templates, scripts) are listed for the agent to pull on demand via the `skill_view` tool. Type `/help` to see all available skill commands.
You can also use skills through natural conversation:
```bash
hermes --toolsets skills -q "What skills do you have?"
hermes --toolsets skills -q "Show me the axolotl skill"
@ -1266,9 +1320,13 @@ Your `~/.hermes/` directory should now look like:
├── skills/ # Agent-created skills (auto-created on first use)
├── cron/ # Scheduled job data
├── sessions/ # Messaging gateway sessions
└── logs/ # Conversation logs
└── logs/ # Logs
├── gateway.log # Gateway activity log
└── errors.log # Errors from tool calls, API failures, etc.
```
All log output is automatically redacted -- API keys, tokens, and credentials are masked before they reach disk.
---
### Step 7: Add Your API Keys
@ -1592,7 +1650,9 @@ All variables go in `~/.hermes/.env`. Run `hermes config set VAR value` to set t
|------|-------------|
| `~/.hermes/config.yaml` | Your settings |
| `~/.hermes/.env` | API keys and secrets |
| `~/.hermes/auth.json` | OAuth provider credentials (managed by `hermes login`) |
| `~/.hermes/auth.json` | OAuth provider credentials (managed by `hermes model`) |
| `~/.hermes/logs/errors.log` | Tool errors, API failures (secrets auto-redacted) |
| `~/.hermes/logs/gateway.log` | Gateway activity log (secrets auto-redacted) |
| `~/.hermes/cron/` | Scheduled jobs data |
| `~/.hermes/sessions/` | Gateway session data |
| `~/.hermes/hermes-agent/` | Installation directory |
@ -1620,7 +1680,7 @@ hermes config # View current settings
Common issues:
- **"API key not set"**: Run `hermes setup` or `hermes config set OPENROUTER_API_KEY your_key`
- **"hermes: command not found"**: Reload your shell (`source ~/.bashrc`) or check PATH
- **"Run `hermes login` to re-authenticate"**: Your Nous Portal session expired. Run `hermes login` to refresh.
- **"Run `hermes setup` to re-authenticate"**: Your Nous Portal session expired. Run `hermes setup` or `hermes model` to refresh.
- **"No active paid subscription"**: Your Nous Portal account needs an active subscription for inference.
- **Gateway won't start**: Check `hermes gateway status` and logs
- **Missing config after update**: Run `hermes config check` to see what's new, then `hermes config migrate` to add missing options

View file

@ -8,7 +8,9 @@ Resolution order for text tasks:
1. OpenRouter (OPENROUTER_API_KEY)
2. Nous Portal (~/.hermes/auth.json active provider)
3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY)
4. None
4. Codex OAuth (Responses API via chatgpt.com with gpt-5.3-codex,
wrapped to look like a chat.completions client)
5. None
Resolution order for vision/multimodal tasks:
1. OpenRouter
@ -20,7 +22,8 @@ import json
import logging
import os
from pathlib import Path
from typing import Optional, Tuple
from types import SimpleNamespace
from typing import Any, Dict, List, Optional, Tuple
from openai import OpenAI
@ -32,7 +35,7 @@ logger = logging.getLogger(__name__)
_OR_HEADERS = {
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
"X-OpenRouter-Title": "Hermes Agent",
"X-OpenRouter-Categories": "cli-agent",
"X-OpenRouter-Categories": "productivity,cli-agent",
}
# Nous Portal extra_body for product attribution.
@ -49,6 +52,188 @@ _NOUS_MODEL = "gemini-3-flash"
_NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
_AUTH_JSON_PATH = Path.home() / ".hermes" / "auth.json"
# Codex fallback: uses the Responses API (the only endpoint the Codex
# OAuth token can access) with a fast model for auxiliary tasks.
_CODEX_AUX_MODEL = "gpt-5.3-codex"
_CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"
# ── Codex Responses → chat.completions adapter ─────────────────────────────
# All auxiliary consumers call client.chat.completions.create(**kwargs) and
# read response.choices[0].message.content. This adapter translates those
# calls to the Codex Responses API so callers don't need any changes.
class _CodexCompletionsAdapter:
"""Drop-in shim that accepts chat.completions.create() kwargs and
routes them through the Codex Responses streaming API."""
def __init__(self, real_client: OpenAI, model: str):
self._client = real_client
self._model = model
def create(self, **kwargs) -> Any:
messages = kwargs.get("messages", [])
model = kwargs.get("model", self._model)
temperature = kwargs.get("temperature")
# Separate system/instructions from conversation messages
instructions = "You are a helpful assistant."
input_msgs: List[Dict[str, Any]] = []
for msg in messages:
role = msg.get("role", "user")
content = msg.get("content", "")
if role == "system":
instructions = content
else:
input_msgs.append({"role": role, "content": content})
resp_kwargs: Dict[str, Any] = {
"model": model,
"instructions": instructions,
"input": input_msgs or [{"role": "user", "content": ""}],
"stream": True,
"store": False,
}
max_tokens = kwargs.get("max_output_tokens") or kwargs.get("max_completion_tokens") or kwargs.get("max_tokens")
if max_tokens is not None:
resp_kwargs["max_output_tokens"] = int(max_tokens)
if temperature is not None:
resp_kwargs["temperature"] = temperature
# Tools support for flush_memories and similar callers
tools = kwargs.get("tools")
if tools:
converted = []
for t in tools:
fn = t.get("function", {}) if isinstance(t, dict) else {}
name = fn.get("name")
if not name:
continue
converted.append({
"type": "function",
"name": name,
"description": fn.get("description", ""),
"parameters": fn.get("parameters", {}),
})
if converted:
resp_kwargs["tools"] = converted
# Stream and collect the response
text_parts: List[str] = []
tool_calls_raw: List[Any] = []
usage = None
try:
with self._client.responses.stream(**resp_kwargs) as stream:
for _event in stream:
pass
final = stream.get_final_response()
# Extract text and tool calls from the Responses output
for item in getattr(final, "output", []):
item_type = getattr(item, "type", None)
if item_type == "message":
for part in getattr(item, "content", []):
ptype = getattr(part, "type", None)
if ptype in ("output_text", "text"):
text_parts.append(getattr(part, "text", ""))
elif item_type == "function_call":
tool_calls_raw.append(SimpleNamespace(
id=getattr(item, "call_id", ""),
type="function",
function=SimpleNamespace(
name=getattr(item, "name", ""),
arguments=getattr(item, "arguments", "{}"),
),
))
resp_usage = getattr(final, "usage", None)
if resp_usage:
usage = SimpleNamespace(
prompt_tokens=getattr(resp_usage, "input_tokens", 0),
completion_tokens=getattr(resp_usage, "output_tokens", 0),
total_tokens=getattr(resp_usage, "total_tokens", 0),
)
except Exception as exc:
logger.debug("Codex auxiliary Responses API call failed: %s", exc)
raise
content = "".join(text_parts).strip() or None
# Build a response that looks like chat.completions
message = SimpleNamespace(
role="assistant",
content=content,
tool_calls=tool_calls_raw or None,
)
choice = SimpleNamespace(
index=0,
message=message,
finish_reason="stop" if not tool_calls_raw else "tool_calls",
)
return SimpleNamespace(
choices=[choice],
model=model,
usage=usage,
)
class _CodexChatShim:
"""Wraps the adapter to provide client.chat.completions.create()."""
def __init__(self, adapter: _CodexCompletionsAdapter):
self.completions = adapter
class CodexAuxiliaryClient:
"""OpenAI-client-compatible wrapper that routes through Codex Responses API.
Consumers can call client.chat.completions.create(**kwargs) as normal.
Also exposes .api_key and .base_url for introspection by async wrappers.
"""
def __init__(self, real_client: OpenAI, model: str):
self._real_client = real_client
adapter = _CodexCompletionsAdapter(real_client, model)
self.chat = _CodexChatShim(adapter)
self.api_key = real_client.api_key
self.base_url = real_client.base_url
def close(self):
self._real_client.close()
class _AsyncCodexCompletionsAdapter:
"""Async version of the Codex Responses adapter.
Wraps the sync adapter via asyncio.to_thread() so async consumers
(web_tools, session_search) can await it as normal.
"""
def __init__(self, sync_adapter: _CodexCompletionsAdapter):
self._sync = sync_adapter
async def create(self, **kwargs) -> Any:
import asyncio
return await asyncio.to_thread(self._sync.create, **kwargs)
class _AsyncCodexChatShim:
def __init__(self, adapter: _AsyncCodexCompletionsAdapter):
self.completions = adapter
class AsyncCodexAuxiliaryClient:
"""Async-compatible wrapper matching AsyncOpenAI.chat.completions.create()."""
def __init__(self, sync_wrapper: "CodexAuxiliaryClient"):
sync_adapter = sync_wrapper.chat.completions
async_adapter = _AsyncCodexCompletionsAdapter(sync_adapter)
self.chat = _AsyncCodexChatShim(async_adapter)
self.api_key = sync_wrapper.api_key
self.base_url = sync_wrapper.base_url
def _read_nous_auth() -> Optional[dict]:
"""Read and validate ~/.hermes/auth.json for an active Nous provider.
@ -82,12 +267,31 @@ def _nous_base_url() -> str:
return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)
def _read_codex_access_token() -> Optional[str]:
"""Read a valid Codex OAuth access token from ~/.codex/auth.json."""
try:
codex_auth = Path.home() / ".codex" / "auth.json"
if not codex_auth.is_file():
return None
data = json.loads(codex_auth.read_text())
tokens = data.get("tokens")
if not isinstance(tokens, dict):
return None
access_token = tokens.get("access_token")
if isinstance(access_token, str) and access_token.strip():
return access_token.strip()
return None
except Exception as exc:
logger.debug("Could not read Codex auth for auxiliary client: %s", exc)
return None
# ── Public API ──────────────────────────────────────────────────────────────
def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
"""Return (client, model_slug) for text-only auxiliary tasks.
Falls through OpenRouter -> Nous Portal -> custom endpoint -> (None, None).
Falls through OpenRouter -> Nous Portal -> custom endpoint -> Codex OAuth -> (None, None).
"""
# 1. OpenRouter
or_key = os.getenv("OPENROUTER_API_KEY")
@ -115,11 +319,44 @@ def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
logger.debug("Auxiliary text client: custom endpoint (%s)", model)
return OpenAI(api_key=custom_key, base_url=custom_base), model
# 4. Nothing available
# 4. Codex OAuth -- uses the Responses API (only endpoint the token
# can access), wrapped to look like a chat.completions client.
codex_token = _read_codex_access_token()
if codex_token:
logger.debug("Auxiliary text client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL)
real_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL)
return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL
# 5. Nothing available
logger.debug("Auxiliary text client: none available")
return None, None
def get_async_text_auxiliary_client():
"""Return (async_client, model_slug) for async consumers.
For standard providers returns (AsyncOpenAI, model). For Codex returns
(AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
Returns (None, None) when no provider is available.
"""
from openai import AsyncOpenAI
sync_client, model = get_text_auxiliary_client()
if sync_client is None:
return None, None
if isinstance(sync_client, CodexAuxiliaryClient):
return AsyncCodexAuxiliaryClient(sync_client), model
async_kwargs = {
"api_key": sync_client.api_key,
"base_url": str(sync_client.base_url),
}
if "openrouter" in str(sync_client.base_url).lower():
async_kwargs["default_headers"] = dict(_OR_HEADERS)
return AsyncOpenAI(**async_kwargs), model
def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
"""Return (client, model_slug) for vision/multimodal auxiliary tasks.
@ -161,11 +398,12 @@ def auxiliary_max_tokens_param(value: int) -> dict:
OpenRouter and local models use 'max_tokens'. Direct OpenAI with newer
models (gpt-4o, o-series, gpt-5+) requires 'max_completion_tokens'.
The Codex adapter translates max_tokens internally, so we use max_tokens
for it as well.
"""
custom_base = os.getenv("OPENAI_BASE_URL", "")
or_key = os.getenv("OPENROUTER_API_KEY")
# Only use max_completion_tokens when the auxiliary client resolved to
# direct OpenAI (no OpenRouter key, no Nous auth, custom endpoint is api.openai.com)
# Only use max_completion_tokens for direct OpenAI custom endpoints
if (not or_key
and _read_nous_auth() is None
and "api.openai.com" in custom_base.lower()):

View file

@ -31,8 +31,9 @@ class ContextCompressor:
threshold_percent: float = 0.85,
protect_first_n: int = 3,
protect_last_n: int = 4,
summary_target_tokens: int = 500,
summary_target_tokens: int = 2500,
quiet_mode: bool = False,
summary_model_override: str = None,
):
self.model = model
self.threshold_percent = threshold_percent
@ -49,7 +50,8 @@ class ContextCompressor:
self.last_completion_tokens = 0
self.last_total_tokens = 0
self.client, self.summary_model = get_text_auxiliary_client()
self.client, default_model = get_text_auxiliary_client()
self.summary_model = summary_model_override or default_model
def update_from_response(self, usage: Dict[str, Any]):
"""Update tracked token usage from API response."""

View file

@ -199,6 +199,24 @@ class KawaiiSpinner:
def update_text(self, new_message: str):
self.message = new_message
def print_above(self, text: str):
"""Print a line above the spinner without disrupting animation.
Clears the current spinner line, prints the text, and lets the
next animation tick redraw the spinner on the line below.
Thread-safe: uses the captured stdout reference (self._out).
Works inside redirect_stdout(devnull) because _write bypasses
sys.stdout and writes to the stdout captured at spinner creation.
"""
if not self.running:
self._write(f" {text}", flush=True)
return
# Clear spinner line with spaces (not \033[K) to avoid garbled escape
# codes when prompt_toolkit's patch_stdout is active — same approach
# as stop(). Then print text; spinner redraws on next tick.
blanks = ' ' * max(self.last_line_len + 5, 40)
self._write(f"\r{blanks}\r {text}", flush=True)
def stop(self, final_message: str = None):
self.running = False
if self.thread:
@ -283,6 +301,15 @@ def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]
pass
return False, ""
# Memory-specific: distinguish "full" from real errors
if tool_name == "memory":
try:
data = json.loads(result)
if data.get("success") is False and "exceed the limit" in data.get("error", ""):
return True, " [full]"
except (json.JSONDecodeError, TypeError, AttributeError):
pass
# Generic heuristic for non-terminal tools
lower = result[:500].lower()
if '"error"' in lower or '"failed"' in lower or result.startswith("Error"):

115
agent/redact.py Normal file
View file

@ -0,0 +1,115 @@
"""Regex-based secret redaction for logs and tool output.
Applies pattern matching to mask API keys, tokens, and credentials
before they reach log files, verbose output, or gateway logs.
Short tokens (< 18 chars) are fully masked. Longer tokens preserve
the first 6 and last 4 characters for debuggability.
"""
import logging
import re
from typing import Optional
logger = logging.getLogger(__name__)
# Known API key prefixes -- match the prefix + contiguous token chars
_PREFIX_PATTERNS = [
r"sk-[A-Za-z0-9_-]{10,}", # OpenAI / OpenRouter
r"ghp_[A-Za-z0-9]{10,}", # GitHub PAT (classic)
r"github_pat_[A-Za-z0-9_]{10,}", # GitHub PAT (fine-grained)
r"xox[baprs]-[A-Za-z0-9-]{10,}", # Slack tokens
r"AIza[A-Za-z0-9_-]{30,}", # Google API keys
r"pplx-[A-Za-z0-9]{10,}", # Perplexity
r"fal_[A-Za-z0-9_-]{10,}", # Fal.ai
r"fc-[A-Za-z0-9]{10,}", # Firecrawl
r"bb_live_[A-Za-z0-9_-]{10,}", # BrowserBase
r"gAAAA[A-Za-z0-9_=-]{20,}", # Codex encrypted tokens
]
# ENV assignment patterns: KEY=value where KEY contains a secret-like name
_SECRET_ENV_NAMES = r"(?:API_?KEY|TOKEN|SECRET|PASSWORD|PASSWD|CREDENTIAL|AUTH)"
_ENV_ASSIGN_RE = re.compile(
rf"([A-Z_]*{_SECRET_ENV_NAMES}[A-Z_]*)\s*=\s*(['\"]?)(\S+)\2",
re.IGNORECASE,
)
# JSON field patterns: "apiKey": "value", "token": "value", etc.
_JSON_KEY_NAMES = r"(?:api_?[Kk]ey|token|secret|password|access_token|refresh_token|auth_token|bearer)"
_JSON_FIELD_RE = re.compile(
rf'("{_JSON_KEY_NAMES}")\s*:\s*"([^"]+)"',
re.IGNORECASE,
)
# Authorization headers
_AUTH_HEADER_RE = re.compile(
r"(Authorization:\s*Bearer\s+)(\S+)",
re.IGNORECASE,
)
# Telegram bot tokens: bot<digits>:<token> or <digits>:<alphanum>
_TELEGRAM_RE = re.compile(
r"(bot)?(\d{8,}):([-A-Za-z0-9_]{30,})",
)
# Compile known prefix patterns into one alternation
_PREFIX_RE = re.compile(
r"(?<![A-Za-z0-9_-])(" + "|".join(_PREFIX_PATTERNS) + r")(?![A-Za-z0-9_-])"
)
def _mask_token(token: str) -> str:
"""Mask a token, preserving prefix for long tokens."""
if len(token) < 18:
return "***"
return f"{token[:6]}...{token[-4:]}"
def redact_sensitive_text(text: str) -> str:
"""Apply all redaction patterns to a block of text.
Safe to call on any string -- non-matching text passes through unchanged.
"""
if not text:
return text
# Known prefixes (sk-, ghp_, etc.)
text = _PREFIX_RE.sub(lambda m: _mask_token(m.group(1)), text)
# ENV assignments: OPENAI_API_KEY=sk-abc...
def _redact_env(m):
name, quote, value = m.group(1), m.group(2), m.group(3)
return f"{name}={quote}{_mask_token(value)}{quote}"
text = _ENV_ASSIGN_RE.sub(_redact_env, text)
# JSON fields: "apiKey": "value"
def _redact_json(m):
key, value = m.group(1), m.group(2)
return f'{key}: "{_mask_token(value)}"'
text = _JSON_FIELD_RE.sub(_redact_json, text)
# Authorization headers
text = _AUTH_HEADER_RE.sub(
lambda m: m.group(1) + _mask_token(m.group(2)),
text,
)
# Telegram bot tokens
def _redact_telegram(m):
prefix = m.group(1) or ""
digits = m.group(2)
return f"{prefix}{digits}:***"
text = _TELEGRAM_RE.sub(_redact_telegram, text)
return text
class RedactingFormatter(logging.Formatter):
"""Log formatter that redacts secrets from all log messages."""
def __init__(self, fmt=None, datefmt=None, style='%', **kwargs):
super().__init__(fmt, datefmt, style, **kwargs)
def format(self, record: logging.LogRecord) -> str:
original = super().format(record)
return redact_sensitive_text(original)

114
agent/skill_commands.py Normal file
View file

@ -0,0 +1,114 @@
"""Skill slash commands — scan installed skills and build invocation messages.
Shared between CLI (cli.py) and gateway (gateway/run.py) so both surfaces
can invoke skills via /skill-name commands.
"""
import logging
from pathlib import Path
from typing import Any, Dict, Optional
logger = logging.getLogger(__name__)
_skill_commands: Dict[str, Dict[str, Any]] = {}
def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
"""Scan ~/.hermes/skills/ and return a mapping of /command -> skill info.
Returns:
Dict mapping "/skill-name" to {name, description, skill_md_path, skill_dir}.
"""
global _skill_commands
_skill_commands = {}
try:
from tools.skills_tool import SKILLS_DIR, _parse_frontmatter
if not SKILLS_DIR.exists():
return _skill_commands
for skill_md in SKILLS_DIR.rglob("SKILL.md"):
path_str = str(skill_md)
if '/.git/' in path_str or '/.github/' in path_str or '/.hub/' in path_str:
continue
try:
content = skill_md.read_text(encoding='utf-8')
frontmatter, body = _parse_frontmatter(content)
name = frontmatter.get('name', skill_md.parent.name)
description = frontmatter.get('description', '')
if not description:
for line in body.strip().split('\n'):
line = line.strip()
if line and not line.startswith('#'):
description = line[:80]
break
cmd_name = name.lower().replace(' ', '-').replace('_', '-')
_skill_commands[f"/{cmd_name}"] = {
"name": name,
"description": description or f"Invoke the {name} skill",
"skill_md_path": str(skill_md),
"skill_dir": str(skill_md.parent),
}
except Exception:
continue
except Exception:
pass
return _skill_commands
def get_skill_commands() -> Dict[str, Dict[str, Any]]:
"""Return the current skill commands mapping (scan first if empty)."""
if not _skill_commands:
scan_skill_commands()
return _skill_commands
def build_skill_invocation_message(cmd_key: str, user_instruction: str = "") -> Optional[str]:
"""Build the user message content for a skill slash command invocation.
Args:
cmd_key: The command key including leading slash (e.g., "/gif-search").
user_instruction: Optional text the user typed after the command.
Returns:
The formatted message string, or None if the skill wasn't found.
"""
commands = get_skill_commands()
skill_info = commands.get(cmd_key)
if not skill_info:
return None
skill_md_path = Path(skill_info["skill_md_path"])
skill_dir = Path(skill_info["skill_dir"])
skill_name = skill_info["name"]
try:
content = skill_md_path.read_text(encoding='utf-8')
except Exception:
return f"[Failed to load skill: {skill_name}]"
parts = [
f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
"",
content.strip(),
]
supporting = []
for subdir in ("references", "templates", "scripts", "assets"):
subdir_path = skill_dir / subdir
if subdir_path.exists():
for f in sorted(subdir_path.rglob("*")):
if f.is_file():
rel = str(f.relative_to(skill_dir))
supporting.append(rel)
if supporting:
parts.append("")
parts.append("[This skill has supporting files you can load with the skill_view tool:]")
for sf in supporting:
parts.append(f"- {sf}")
parts.append(f'\nTo view any of these, use: skill_view(name="{skill_name}", file="<path>")')
if user_instruction:
parts.append("")
parts.append(f"The user has provided the following instruction alongside the skill invocation: {user_instruction}")
return "\n".join(parts)

232
cli.py
View file

@ -682,17 +682,27 @@ COMMANDS = {
}
# ============================================================================
# Skill Slash Commands — dynamic commands generated from installed skills
# ============================================================================
from agent.skill_commands import scan_skill_commands, get_skill_commands, build_skill_invocation_message
_skill_commands = scan_skill_commands()
class SlashCommandCompleter(Completer):
"""Autocomplete for /commands in the input area."""
"""Autocomplete for /commands and /skill-name in the input area."""
def get_completions(self, document, complete_event):
text = document.text_before_cursor
# Only complete at the start of input, after /
if not text.startswith("/"):
return
word = text[1:] # strip the leading /
# Built-in commands
for cmd, desc in COMMANDS.items():
cmd_name = cmd[1:] # strip leading / from key
cmd_name = cmd[1:]
if cmd_name.startswith(word):
yield Completion(
cmd_name,
@ -701,6 +711,17 @@ class SlashCommandCompleter(Completer):
display_meta=desc,
)
# Skill commands
for cmd, info in _skill_commands.items():
cmd_name = cmd[1:]
if cmd_name.startswith(word):
yield Completion(
cmd_name,
start_position=-len(word),
display=cmd,
display_meta=f"{info['description'][:50]}",
)
def save_config_value(key_path: str, value: any) -> bool:
"""
@ -782,7 +803,7 @@ class HermesCLI:
Args:
model: Model to use (default: from env or claude-sonnet)
toolsets: List of toolsets to enable (default: all)
provider: Inference provider ("auto", "openrouter", "nous")
provider: Inference provider ("auto", "openrouter", "nous", "openai-codex")
api_key: API key (default: from environment)
base_url: API base URL (default: OpenRouter)
max_turns: Maximum tool-calling iterations (default: 60)
@ -800,37 +821,37 @@ class HermesCLI:
# Configuration - priority: CLI args > env vars > config file
# Model can come from: CLI arg, LLM_MODEL env, OPENAI_MODEL env (custom endpoint), or config
self.model = model or os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL") or CLI_CONFIG["model"]["default"]
# Base URL: custom endpoint (OPENAI_BASE_URL) takes precedence over OpenRouter
self.base_url = base_url or os.getenv("OPENAI_BASE_URL") or os.getenv("OPENROUTER_BASE_URL", CLI_CONFIG["model"]["base_url"])
# API key: custom endpoint (OPENAI_API_KEY) takes precedence over OpenRouter
self.api_key = api_key or os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY")
# Provider resolution: determines whether to use OAuth credentials or env var keys
from hermes_cli.auth import resolve_provider
self._explicit_api_key = api_key
self._explicit_base_url = base_url
# Provider selection is resolved lazily at use-time via _ensure_runtime_credentials().
self.requested_provider = (
provider
or os.getenv("HERMES_INFERENCE_PROVIDER")
or CLI_CONFIG["model"].get("provider")
or "auto"
)
self.provider = resolve_provider(
self.requested_provider,
explicit_api_key=api_key,
explicit_base_url=base_url,
self._provider_source: Optional[str] = None
self.provider = self.requested_provider
self.api_mode = "chat_completions"
self.base_url = (
base_url
or os.getenv("OPENAI_BASE_URL")
or os.getenv("OPENROUTER_BASE_URL", CLI_CONFIG["model"]["base_url"])
)
self.api_key = api_key or os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY")
self._nous_key_expires_at: Optional[str] = None
self._nous_key_source: Optional[str] = None
# Max turns priority: CLI arg > env var > config file (agent.max_turns or root max_turns) > default
if max_turns is not None:
if max_turns is not None: # CLI arg was explicitly set
self.max_turns = max_turns
elif os.getenv("HERMES_MAX_ITERATIONS"):
self.max_turns = int(os.getenv("HERMES_MAX_ITERATIONS"))
elif CLI_CONFIG["agent"].get("max_turns"):
self.max_turns = CLI_CONFIG["agent"]["max_turns"]
elif CLI_CONFIG.get("max_turns"): # Backwards compat: root-level max_turns
self.max_turns = CLI_CONFIG["max_turns"]
elif os.getenv("HERMES_MAX_ITERATIONS"):
self.max_turns = int(os.getenv("HERMES_MAX_ITERATIONS"))
else:
self.max_turns = 60
@ -882,45 +903,51 @@ class HermesCLI:
def _ensure_runtime_credentials(self) -> bool:
"""
Ensure OAuth provider credentials are fresh before agent use.
For Nous Portal: checks agent key TTL, refreshes/re-mints as needed.
If the key changed, tears down the agent so it rebuilds with new creds.
Ensure runtime credentials are resolved before agent use.
Re-resolves provider credentials so key rotation and token refresh
are picked up without restarting the CLI.
Returns True if credentials are ready, False on auth failure.
"""
if self.provider != "nous":
return True
from hermes_cli.auth import format_auth_error, resolve_nous_runtime_credentials
from hermes_cli.runtime_provider import (
resolve_runtime_provider,
format_runtime_provider_error,
)
try:
credentials = resolve_nous_runtime_credentials(
min_key_ttl_seconds=max(
60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))
),
timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
runtime = resolve_runtime_provider(
requested=self.requested_provider,
explicit_api_key=self._explicit_api_key,
explicit_base_url=self._explicit_base_url,
)
except Exception as exc:
message = format_auth_error(exc)
message = format_runtime_provider_error(exc)
self.console.print(f"[bold red]{message}[/]")
return False
api_key = credentials.get("api_key")
base_url = credentials.get("base_url")
api_key = runtime.get("api_key")
base_url = runtime.get("base_url")
resolved_provider = runtime.get("provider", "openrouter")
resolved_api_mode = runtime.get("api_mode", self.api_mode)
if not isinstance(api_key, str) or not api_key:
self.console.print("[bold red]Nous credential resolver returned an empty API key.[/]")
self.console.print("[bold red]Provider resolver returned an empty API key.[/]")
return False
if not isinstance(base_url, str) or not base_url:
self.console.print("[bold red]Nous credential resolver returned an empty base URL.[/]")
self.console.print("[bold red]Provider resolver returned an empty base URL.[/]")
return False
credentials_changed = api_key != self.api_key or base_url != self.base_url
routing_changed = (
resolved_provider != self.provider
or resolved_api_mode != self.api_mode
)
self.provider = resolved_provider
self.api_mode = resolved_api_mode
self._provider_source = runtime.get("source")
self.api_key = api_key
self.base_url = base_url
self._nous_key_expires_at = credentials.get("expires_at")
self._nous_key_source = credentials.get("source")
# AIAgent/OpenAI client holds auth at init time, so rebuild if key rotated
if credentials_changed and self.agent is not None:
if (credentials_changed or routing_changed) and self.agent is not None:
self.agent = None
return True
@ -936,7 +963,7 @@ class HermesCLI:
if self.agent is not None:
return True
if self.provider == "nous" and not self._ensure_runtime_credentials():
if not self._ensure_runtime_credentials():
return False
# Initialize SQLite session store for CLI sessions
@ -980,6 +1007,8 @@ class HermesCLI:
model=self.model,
api_key=self.api_key,
base_url=self.base_url,
provider=self.provider,
api_mode=self.api_mode,
max_iterations=self.max_turns,
enabled_toolsets=self.enabled_toolsets,
verbose_logging=self.verbose,
@ -1072,8 +1101,8 @@ class HermesCLI:
toolsets_info = f" [dim #B8860B]·[/] [#CD7F32]toolsets: {', '.join(self.enabled_toolsets)}[/]"
provider_info = f" [dim #B8860B]·[/] [dim]provider: {self.provider}[/]"
if self.provider == "nous" and self._nous_key_source:
provider_info += f" [dim #B8860B]·[/] [dim]key: {self._nous_key_source}[/]"
if self._provider_source:
provider_info += f" [dim #B8860B]·[/] [dim]auth: {self._provider_source}[/]"
self.console.print(
f" {api_indicator} [#FFBF00]{model_short}[/] "
@ -1082,20 +1111,21 @@ class HermesCLI:
)
def show_help(self):
"""Display help information with kawaii ASCII art."""
print()
print("+" + "-" * 50 + "+")
print("|" + " " * 14 + "(^_^)? Available Commands" + " " * 10 + "|")
print("+" + "-" * 50 + "+")
print()
"""Display help information."""
_cprint(f"\n{_BOLD}+{'-' * 50}+{_RST}")
_cprint(f"{_BOLD}|{' ' * 14}(^_^)? Available Commands{' ' * 10}|{_RST}")
_cprint(f"{_BOLD}+{'-' * 50}+{_RST}\n")
for cmd, desc in COMMANDS.items():
print(f" {cmd:<15} - {desc}")
_cprint(f" {_GOLD}{cmd:<15}{_RST} {_DIM}-{_RST} {desc}")
print()
print(" Tip: Just type your message to chat with Hermes!")
print(" Multi-line: Alt+Enter for a new line")
print()
if _skill_commands:
_cprint(f"\n{_BOLD}Skill Commands{_RST} ({len(_skill_commands)} installed):")
for cmd, info in sorted(_skill_commands.items()):
_cprint(f" {_GOLD}{cmd:<22}{_RST} {_DIM}-{_RST} {info['description']}")
_cprint(f"\n {_DIM}Tip: Just type your message to chat with Hermes!{_RST}")
_cprint(f" {_DIM}Multi-line: Alt+Enter for a new line{_RST}\n")
def show_tools(self):
"""Display available tools with kawaii ASCII art."""
@ -1692,9 +1722,26 @@ class HermesCLI:
self._show_gateway_status()
elif cmd_lower == "/verbose":
self._toggle_verbose()
elif cmd_lower == "/compress":
self._manual_compress()
elif cmd_lower == "/usage":
self._show_usage()
else:
self.console.print(f"[bold red]Unknown command: {cmd_lower}[/]")
self.console.print("[dim #B8860B]Type /help for available commands[/]")
# Check for skill slash commands (/gif-search, /axolotl, etc.)
base_cmd = cmd_lower.split()[0]
if base_cmd in _skill_commands:
user_instruction = cmd_original[len(base_cmd):].strip()
msg = build_skill_invocation_message(base_cmd, user_instruction)
if msg:
skill_name = _skill_commands[base_cmd]["name"]
print(f"\n⚡ Loading skill: {skill_name}")
if hasattr(self, '_pending_input'):
self._pending_input.put(msg)
else:
self.console.print(f"[bold red]Failed to load skill for {base_cmd}[/]")
else:
self.console.print(f"[bold red]Unknown command: {cmd_lower}[/]")
self.console.print("[dim #B8860B]Type /help for available commands[/]")
return True
@ -1720,6 +1767,77 @@ class HermesCLI:
}
self.console.print(labels.get(self.tool_progress_mode, ""))
def _manual_compress(self):
"""Manually trigger context compression on the current conversation."""
if not self.conversation_history or len(self.conversation_history) < 4:
print("(._.) Not enough conversation to compress (need at least 4 messages).")
return
if not self.agent:
print("(._.) No active agent -- send a message first.")
return
if not self.agent.compression_enabled:
print("(._.) Compression is disabled in config.")
return
original_count = len(self.conversation_history)
try:
from agent.model_metadata import estimate_messages_tokens_rough
approx_tokens = estimate_messages_tokens_rough(self.conversation_history)
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
compressed, new_system = self.agent._compress_context(
self.conversation_history,
self.agent._cached_system_prompt or "",
approx_tokens=approx_tokens,
)
self.conversation_history = compressed
new_count = len(self.conversation_history)
new_tokens = estimate_messages_tokens_rough(self.conversation_history)
print(
f" ✅ Compressed: {original_count}{new_count} messages "
f"(~{approx_tokens:,} → ~{new_tokens:,} tokens)"
)
except Exception as e:
print(f" ❌ Compression failed: {e}")
def _show_usage(self):
"""Show cumulative token usage for the current session."""
if not self.agent:
print("(._.) No active agent -- send a message first.")
return
agent = self.agent
prompt = agent.session_prompt_tokens
completion = agent.session_completion_tokens
total = agent.session_total_tokens
calls = agent.session_api_calls
if calls == 0:
print("(._.) No API calls made yet in this session.")
return
# Current context window state
compressor = agent.context_compressor
last_prompt = compressor.last_prompt_tokens
ctx_len = compressor.context_length
pct = (last_prompt / ctx_len * 100) if ctx_len else 0
compressions = compressor.compression_count
msg_count = len(self.conversation_history)
print(f" 📊 Session Token Usage")
print(f" {'' * 40}")
print(f" Prompt tokens (input): {prompt:>10,}")
print(f" Completion tokens (output): {completion:>9,}")
print(f" Total tokens: {total:>10,}")
print(f" API calls: {calls:>10,}")
print(f" {'' * 40}")
print(f" Current context: {last_prompt:,} / {ctx_len:,} ({pct:.0f}%)")
print(f" Messages: {msg_count}")
print(f" Compressions: {compressions}")
if self.verbose:
logging.getLogger().setLevel(logging.DEBUG)
for noisy in ('openai', 'openai._base_client', 'httpx', 'httpcore', 'asyncio', 'hpack', 'grpc', 'modal'):
@ -1894,8 +2012,8 @@ class HermesCLI:
Returns:
The agent's response, or None on error
"""
# Refresh OAuth credentials if needed (handles key rotation transparently)
if self.provider == "nous" and not self._ensure_runtime_credentials():
# Refresh provider credentials if needed (handles key rotation transparently)
if not self._ensure_runtime_credentials():
return None
# Initialize agent if needed

View file

@ -172,10 +172,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
except UnicodeDecodeError:
load_dotenv(str(_hermes_home / ".env"), override=True, encoding="latin-1")
model = os.getenv("HERMES_MODEL", "anthropic/claude-opus-4.6")
# Custom endpoint (OPENAI_*) takes precedence, matching CLI behavior
api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY", "")
base_url = os.getenv("OPENAI_BASE_URL") or os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
try:
import yaml
@ -188,24 +185,27 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
model = _model_cfg
elif isinstance(_model_cfg, dict):
model = _model_cfg.get("default", model)
base_url = _model_cfg.get("base_url", base_url)
# Check if provider is nous — resolve OAuth credentials
provider = _model_cfg.get("provider", "") if isinstance(_model_cfg, dict) else ""
if provider == "nous":
try:
from hermes_cli.auth import resolve_nous_runtime_credentials
creds = resolve_nous_runtime_credentials(min_key_ttl_seconds=5 * 60)
api_key = creds.get("api_key", api_key)
base_url = creds.get("base_url", base_url)
except Exception as nous_err:
logging.warning("Nous Portal credential resolution failed for cron: %s", nous_err)
except Exception:
pass
from hermes_cli.runtime_provider import (
resolve_runtime_provider,
format_runtime_provider_error,
)
try:
runtime = resolve_runtime_provider(
requested=os.getenv("HERMES_INFERENCE_PROVIDER"),
)
except Exception as exc:
message = format_runtime_provider_error(exc)
raise RuntimeError(message) from exc
agent = AIAgent(
model=model,
api_key=api_key,
base_url=base_url,
api_key=runtime.get("api_key"),
base_url=runtime.get("base_url"),
provider=runtime.get("provider"),
api_mode=runtime.get("api_mode"),
quiet_mode=True,
session_id=f"cron_{job_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
)

View file

@ -12,7 +12,7 @@ hermes
hermes --model "anthropic/claude-sonnet-4"
# With specific provider
hermes --provider nous # Use Nous Portal (requires: hermes login)
hermes --provider nous # Use Nous Portal (requires: hermes model)
hermes --provider openrouter # Force OpenRouter
# With specific toolsets
@ -73,6 +73,9 @@ The CLI is implemented in `cli.py` and uses:
| `/history` | Show conversation history |
| `/save` | Save current conversation to file |
| `/config` | Show current configuration |
| `/verbose` | Cycle tool progress display: off → new → all → verbose |
| `/compress` | Manually compress conversation context (flush memories + summarize) |
| `/usage` | Show token usage for the current session |
| `/quit` | Exit the CLI (also: `/exit`, `/q`) |
## Configuration
@ -93,7 +96,7 @@ model:
```
**Provider selection** (`provider` field):
- `auto` (default): Uses Nous Portal if logged in (`hermes login`), otherwise falls back to OpenRouter/env vars.
- `auto` (default): Uses Nous Portal if logged in (`hermes model`), otherwise falls back to OpenRouter/env vars.
- `openrouter`: Always uses `OPENROUTER_API_KEY` from `.env`.
- `nous`: Always uses Nous Portal OAuth credentials from `auth.json`.

174
docs/hooks.md Normal file
View file

@ -0,0 +1,174 @@
# Event Hooks
The hooks system lets you run custom code at key points in the agent lifecycle — session creation, slash commands, each tool-calling step, and more. Hooks are discovered automatically from `~/.hermes/hooks/` and fire without blocking the main agent pipeline.
## Creating a Hook
Each hook is a directory under `~/.hermes/hooks/` containing two files:
```
~/.hermes/hooks/
└── my-hook/
├── HOOK.yaml # Declares which events to listen for
└── handler.py # Python handler function
```
### HOOK.yaml
```yaml
name: my-hook
description: Log all agent activity to a file
events:
- agent:start
- agent:end
- agent:step
```
The `events` list determines which events trigger your handler. You can subscribe to any combination of events, including wildcards like `command:*`.
### handler.py
```python
import json
from datetime import datetime
from pathlib import Path
LOG_FILE = Path.home() / ".hermes" / "hooks" / "my-hook" / "activity.log"
async def handle(event_type: str, context: dict):
"""Called for each subscribed event. Must be named 'handle'."""
entry = {
"timestamp": datetime.now().isoformat(),
"event": event_type,
**context,
}
with open(LOG_FILE, "a") as f:
f.write(json.dumps(entry) + "\n")
```
The handler function:
- Must be named `handle`
- Receives `event_type` (string) and `context` (dict)
- Can be `async def` or regular `def` — both work
- Errors are caught and logged, never crashing the agent
## Available Events
| Event | When it fires | Context keys |
|-------|---------------|--------------|
| `gateway:startup` | Gateway process starts | `platforms` (list of active platform names) |
| `session:start` | New messaging session created | `platform`, `user_id`, `session_id`, `session_key` |
| `session:reset` | User ran `/new` or `/reset` | `platform`, `user_id`, `session_key` |
| `agent:start` | Agent begins processing a message | `platform`, `user_id`, `session_id`, `message` |
| `agent:step` | Each iteration of the tool-calling loop | `platform`, `user_id`, `session_id`, `iteration`, `tool_names` |
| `agent:end` | Agent finishes processing | `platform`, `user_id`, `session_id`, `message`, `response` |
| `command:*` | Any slash command executed | `platform`, `user_id`, `command`, `args` |
### Wildcard Matching
Handlers registered for `command:*` fire for any `command:` event (`command:model`, `command:reset`, etc.). This lets you monitor all slash commands with a single subscription.
## Examples
### Telegram Notification on Long Tasks
Send yourself a Telegram message when the agent takes more than 10 tool-calling steps:
```yaml
# ~/.hermes/hooks/long-task-alert/HOOK.yaml
name: long-task-alert
description: Alert when agent is taking many steps
events:
- agent:step
```
```python
# ~/.hermes/hooks/long-task-alert/handler.py
import os
import httpx
THRESHOLD = 10
BOT_TOKEN = os.getenv("TELEGRAM_BOT_TOKEN")
CHAT_ID = os.getenv("TELEGRAM_HOME_CHANNEL")
async def handle(event_type: str, context: dict):
iteration = context.get("iteration", 0)
if iteration == THRESHOLD and BOT_TOKEN and CHAT_ID:
tools = ", ".join(context.get("tool_names", []))
text = f"⚠️ Agent has been running for {iteration} steps. Last tools: {tools}"
async with httpx.AsyncClient() as client:
await client.post(
f"https://api.telegram.org/bot{BOT_TOKEN}/sendMessage",
json={"chat_id": CHAT_ID, "text": text},
)
```
### Command Usage Logger
Track which slash commands are used and how often:
```yaml
# ~/.hermes/hooks/command-logger/HOOK.yaml
name: command-logger
description: Log slash command usage
events:
- command:*
```
```python
# ~/.hermes/hooks/command-logger/handler.py
import json
from datetime import datetime
from pathlib import Path
LOG = Path.home() / ".hermes" / "logs" / "command_usage.jsonl"
def handle(event_type: str, context: dict):
LOG.parent.mkdir(parents=True, exist_ok=True)
entry = {
"ts": datetime.now().isoformat(),
"command": context.get("command"),
"args": context.get("args"),
"platform": context.get("platform"),
"user": context.get("user_id"),
}
with open(LOG, "a") as f:
f.write(json.dumps(entry) + "\n")
```
### Session Start Webhook
POST to an external service whenever a new session starts:
```yaml
# ~/.hermes/hooks/session-webhook/HOOK.yaml
name: session-webhook
description: Notify external service on new sessions
events:
- session:start
- session:reset
```
```python
# ~/.hermes/hooks/session-webhook/handler.py
import httpx
WEBHOOK_URL = "https://your-service.example.com/hermes-events"
async def handle(event_type: str, context: dict):
async with httpx.AsyncClient() as client:
await client.post(WEBHOOK_URL, json={
"event": event_type,
**context,
}, timeout=5)
```
## How It Works
1. On gateway startup, `HookRegistry.discover_and_load()` scans `~/.hermes/hooks/`
2. Each subdirectory with `HOOK.yaml` + `handler.py` is loaded dynamically
3. Handlers are registered for their declared events
4. At each lifecycle point, `hooks.emit()` fires all matching handlers
5. Errors in any handler are caught and logged — a broken hook never crashes the agent
Hooks only fire in the **gateway** (Telegram, Discord, Slack, WhatsApp). The CLI does not currently load hooks. The `agent:step` event bridges from the sync agent thread to the async hook system via `asyncio.run_coroutine_threadsafe`.

View file

@ -74,6 +74,13 @@ Sessions reset based on configurable policies:
Send `/new` or `/reset` as a message to start fresh.
### Context Management
| Command | Description |
|---------|-------------|
| `/compress` | Manually compress conversation context (saves memories, then summarizes) |
| `/usage` | Show token usage and context window status for the current session |
### Per-Platform Overrides
Configure different reset policies per platform:

View file

@ -425,6 +425,28 @@ class BasePlatformAdapter(ABC):
text = f"{caption}\n{image_url}" if caption else image_url
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
async def send_animation(
self,
chat_id: str,
animation_url: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
) -> SendResult:
"""
Send an animated GIF natively via the platform API.
Override in subclasses to send GIFs as proper animations
(e.g., Telegram send_animation) so they auto-play inline.
Default falls back to send_image.
"""
return await self.send_image(chat_id=chat_id, image_url=animation_url, caption=caption, reply_to=reply_to)
@staticmethod
def _is_animation_url(url: str) -> bool:
"""Check if a URL points to an animated GIF (vs a static image)."""
lower = url.lower().split('?')[0] # Strip query params
return lower.endswith('.gif')
@staticmethod
def extract_images(content: str) -> Tuple[List[Tuple[str, str]], str]:
"""
@ -636,11 +658,19 @@ class BasePlatformAdapter(ABC):
if human_delay > 0:
await asyncio.sleep(human_delay)
try:
img_result = await self.send_image(
chat_id=event.source.chat_id,
image_url=image_url,
caption=alt_text if alt_text else None,
)
# Route animated GIFs through send_animation for proper playback
if self._is_animation_url(image_url):
img_result = await self.send_animation(
chat_id=event.source.chat_id,
animation_url=image_url,
caption=alt_text if alt_text else None,
)
else:
img_result = await self.send_image(
chat_id=event.source.chat_id,
image_url=image_url,
caption=alt_text if alt_text else None,
)
if not img_result.success:
print(f"[{self.name}] Failed to send image: {img_result.error}")
except Exception as img_err:

View file

@ -272,6 +272,30 @@ class TelegramAdapter(BasePlatformAdapter):
# Fallback: send as text link
return await super().send_image(chat_id, image_url, caption, reply_to)
async def send_animation(
self,
chat_id: str,
animation_url: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
) -> SendResult:
"""Send an animated GIF natively as a Telegram animation (auto-plays inline)."""
if not self._bot:
return SendResult(success=False, error="Not connected")
try:
msg = await self._bot.send_animation(
chat_id=int(chat_id),
animation=animation_url,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
print(f"[{self.name}] Failed to send animation, falling back to photo: {e}")
# Fallback: try as a regular photo
return await self.send_image(chat_id, animation_url, caption, reply_to)
async def send_typing(self, chat_id: str) -> None:
"""Send typing indicator."""
if self._bot:

View file

@ -78,6 +78,20 @@ if _config_path.exists():
for _cfg_key, _env_var in _terminal_env_map.items():
if _cfg_key in _terminal_cfg:
os.environ[_env_var] = str(_terminal_cfg[_cfg_key])
_compression_cfg = _cfg.get("compression", {})
if _compression_cfg and isinstance(_compression_cfg, dict):
_compression_env_map = {
"enabled": "CONTEXT_COMPRESSION_ENABLED",
"threshold": "CONTEXT_COMPRESSION_THRESHOLD",
"summary_model": "CONTEXT_COMPRESSION_MODEL",
}
for _cfg_key, _env_var in _compression_env_map.items():
if _cfg_key in _compression_cfg:
os.environ[_env_var] = str(_compression_cfg[_cfg_key])
_agent_cfg = _cfg.get("agent", {})
if _agent_cfg and isinstance(_agent_cfg, dict):
if "max_turns" in _agent_cfg:
os.environ["HERMES_MAX_ITERATIONS"] = str(_agent_cfg["max_turns"])
except Exception:
pass # Non-fatal; gateway can still run with .env values
@ -111,6 +125,28 @@ from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageTyp
logger = logging.getLogger(__name__)
def _resolve_runtime_agent_kwargs() -> dict:
"""Resolve provider credentials for gateway-created AIAgent instances."""
from hermes_cli.runtime_provider import (
resolve_runtime_provider,
format_runtime_provider_error,
)
try:
runtime = resolve_runtime_provider(
requested=os.getenv("HERMES_INFERENCE_PROVIDER"),
)
except Exception as exc:
raise RuntimeError(format_runtime_provider_error(exc)) from exc
return {
"api_key": runtime.get("api_key"),
"base_url": runtime.get("base_url"),
"provider": runtime.get("provider"),
"api_mode": runtime.get("api_mode"),
}
class GatewayRunner:
"""
Main gateway controller.
@ -178,17 +214,12 @@ class GatewayRunner:
return
from run_agent import AIAgent
_flush_api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY", "")
_flush_base_url = os.getenv("OPENAI_BASE_URL") or os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
_flush_model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL", "anthropic/claude-opus-4.6")
if not _flush_api_key:
runtime_kwargs = _resolve_runtime_agent_kwargs()
if not runtime_kwargs.get("api_key"):
return
tmp_agent = AIAgent(
model=_flush_model,
api_key=_flush_api_key,
base_url=_flush_base_url,
**runtime_kwargs,
max_iterations=8,
quiet_mode=True,
enabled_toolsets=["memory", "skills"],
@ -608,6 +639,19 @@ class GatewayRunner:
# Check for commands
command = event.get_command()
# Emit command:* hook for any recognized slash command
_known_commands = {"new", "reset", "help", "status", "stop", "model",
"personality", "retry", "undo", "sethome", "set-home",
"compress", "usage"}
if command and command in _known_commands:
await self.hooks.emit(f"command:{command}", {
"platform": source.platform.value if source.platform else "",
"user_id": source.user_id,
"command": command,
"args": event.get_command_args().strip(),
})
if command in ["new", "reset"]:
return await self._handle_reset_command(event)
@ -634,6 +678,27 @@ class GatewayRunner:
if command in ["sethome", "set-home"]:
return await self._handle_set_home_command(event)
if command == "compress":
return await self._handle_compress_command(event)
if command == "usage":
return await self._handle_usage_command(event)
# Skill slash commands: /skill-name loads the skill and sends to agent
if command:
try:
from agent.skill_commands import get_skill_commands, build_skill_invocation_message
skill_cmds = get_skill_commands()
cmd_key = f"/{command}"
if cmd_key in skill_cmds:
user_instruction = event.get_command_args().strip()
msg = build_skill_invocation_message(cmd_key, user_instruction)
if msg:
event.text = msg
# Fall through to normal message processing with skill content
except Exception as e:
logger.debug("Skill command check failed (non-fatal): %s", e)
# Check for pending exec approval responses
if source.chat_type != "dm":
@ -663,6 +728,19 @@ class GatewayRunner:
session_entry = self.session_store.get_or_create_session(source)
session_key = session_entry.session_key
# Emit session:start for new or auto-reset sessions
_is_new_session = (
session_entry.created_at == session_entry.updated_at
or getattr(session_entry, "was_auto_reset", False)
)
if _is_new_session:
await self.hooks.emit("session:start", {
"platform": source.platform.value if source.platform else "",
"user_id": source.user_id,
"session_id": session_entry.session_id,
"session_key": session_key,
})
# Build session context
context = build_session_context(source, self.config, session_entry)
@ -916,15 +994,10 @@ class GatewayRunner:
if old_history:
from run_agent import AIAgent
loop = asyncio.get_event_loop()
# Resolve credentials so the flush agent can reach the LLM
_flush_api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY", "")
_flush_base_url = os.getenv("OPENAI_BASE_URL") or os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
_flush_model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL", "anthropic/claude-opus-4.6")
_flush_kwargs = _resolve_runtime_agent_kwargs()
def _do_flush():
tmp_agent = AIAgent(
model=_flush_model,
api_key=_flush_api_key,
base_url=_flush_base_url,
**_flush_kwargs,
max_iterations=5,
quiet_mode=True,
enabled_toolsets=["memory"],
@ -999,20 +1072,31 @@ class GatewayRunner:
async def _handle_help_command(self, event: MessageEvent) -> str:
"""Handle /help command - list available commands."""
return (
"📖 **Hermes Commands**\n"
"\n"
"`/new` — Start a new conversation\n"
"`/reset` — Reset conversation history\n"
"`/status` — Show session info\n"
"`/stop` — Interrupt the running agent\n"
"`/model [name]` — Show or change the model\n"
"`/personality [name]` — Set a personality\n"
"`/retry` — Retry your last message\n"
"`/undo` — Remove the last exchange\n"
"`/sethome` — Set this chat as the home channel\n"
"`/help` — Show this message"
)
lines = [
"📖 **Hermes Commands**\n",
"`/new` — Start a new conversation",
"`/reset` — Reset conversation history",
"`/status` — Show session info",
"`/stop` — Interrupt the running agent",
"`/model [name]` — Show or change the model",
"`/personality [name]` — Set a personality",
"`/retry` — Retry your last message",
"`/undo` — Remove the last exchange",
"`/sethome` — Set this chat as the home channel",
"`/compress` — Compress conversation context",
"`/usage` — Show token usage for this session",
"`/help` — Show this message",
]
try:
from agent.skill_commands import get_skill_commands
skill_cmds = get_skill_commands()
if skill_cmds:
lines.append(f"\n⚡ **Skill Commands** ({len(skill_cmds)} installed):")
for cmd in sorted(skill_cmds):
lines.append(f"`{cmd}` — {skill_cmds[cmd]['description']}")
except Exception:
pass
return "\n".join(lines)
async def _handle_model_command(self, event: MessageEvent) -> str:
"""Handle /model command - show or change the current model."""
@ -1205,6 +1289,95 @@ class GatewayRunner:
f"Cron jobs and cross-platform messages will be delivered here."
)
async def _handle_compress_command(self, event: MessageEvent) -> str:
"""Handle /compress command -- manually compress conversation context."""
source = event.source
session_entry = self.session_store.get_or_create_session(source)
history = self.session_store.load_transcript(session_entry.session_id)
if not history or len(history) < 4:
return "Not enough conversation to compress (need at least 4 messages)."
try:
from run_agent import AIAgent
from agent.model_metadata import estimate_messages_tokens_rough
runtime_kwargs = _resolve_runtime_agent_kwargs()
if not runtime_kwargs.get("api_key"):
return "No provider configured -- cannot compress."
msgs = [
{"role": m.get("role"), "content": m.get("content")}
for m in history
if m.get("role") in ("user", "assistant") and m.get("content")
]
original_count = len(msgs)
approx_tokens = estimate_messages_tokens_rough(msgs)
tmp_agent = AIAgent(
**runtime_kwargs,
max_iterations=4,
quiet_mode=True,
enabled_toolsets=["memory"],
session_id=session_entry.session_id,
)
loop = asyncio.get_event_loop()
compressed, _ = await loop.run_in_executor(
None,
lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens),
)
session_entry.conversation_history = compressed
new_count = len(compressed)
new_tokens = estimate_messages_tokens_rough(compressed)
return (
f"🗜️ Compressed: {original_count}{new_count} messages\n"
f"~{approx_tokens:,} → ~{new_tokens:,} tokens"
)
except Exception as e:
logger.warning("Manual compress failed: %s", e)
return f"Compression failed: {e}"
async def _handle_usage_command(self, event: MessageEvent) -> str:
"""Handle /usage command -- show token usage for the session's last agent run."""
source = event.source
session_key = f"agent:main:{source.platform.value}:" + \
(f"dm" if source.chat_type == "dm" else f"{source.chat_type}:{source.chat_id}")
agent = self._running_agents.get(session_key)
if agent and hasattr(agent, "session_total_tokens") and agent.session_api_calls > 0:
lines = [
"📊 **Session Token Usage**",
f"Prompt (input): {agent.session_prompt_tokens:,}",
f"Completion (output): {agent.session_completion_tokens:,}",
f"Total: {agent.session_total_tokens:,}",
f"API calls: {agent.session_api_calls}",
]
ctx = agent.context_compressor
if ctx.last_prompt_tokens:
pct = ctx.last_prompt_tokens / ctx.context_length * 100 if ctx.context_length else 0
lines.append(f"Context: {ctx.last_prompt_tokens:,} / {ctx.context_length:,} ({pct:.0f}%)")
if ctx.compression_count:
lines.append(f"Compressions: {ctx.compression_count}")
return "\n".join(lines)
# No running agent -- check session history for a rough count
session_entry = self.session_store.get_or_create_session(source)
history = self.session_store.load_transcript(session_entry.session_id)
if history:
from agent.model_metadata import estimate_messages_tokens_rough
msgs = [m for m in history if m.get("role") in ("user", "assistant") and m.get("content")]
approx = estimate_messages_tokens_rough(msgs)
return (
f"📊 **Session Info**\n"
f"Messages: {len(msgs)}\n"
f"Estimated context: ~{approx:,} tokens\n"
f"_(Detailed usage available during active conversations)_"
)
return "No usage data available for this session."
def _set_session_env(self, context: SessionContext) -> None:
"""Set environment variables for the current session."""
os.environ["HERMES_SESSION_PLATFORM"] = context.source.platform.value
@ -1593,6 +1766,25 @@ class GatewayRunner:
result_holder = [None] # Mutable container for the result
tools_holder = [None] # Mutable container for the tool definitions
# Bridge sync step_callback → async hooks.emit for agent:step events
_loop_for_step = asyncio.get_event_loop()
_hooks_ref = self.hooks
def _step_callback_sync(iteration: int, tool_names: list) -> None:
try:
asyncio.run_coroutine_threadsafe(
_hooks_ref.emit("agent:step", {
"platform": source.platform.value if source.platform else "",
"user_id": source.user_id,
"session_id": session_id,
"iteration": iteration,
"tool_names": tool_names,
}),
_loop_for_step,
)
except Exception as _e:
logger.debug("agent:step hook error: %s", _e)
def run_sync():
# Pass session_key to process registry via env var so background
# processes can be mapped back to this gateway session
@ -1609,7 +1801,7 @@ class GatewayRunner:
combined_ephemeral = context_prompt or ""
if self._ephemeral_system_prompt:
combined_ephemeral = (combined_ephemeral + "\n\n" + self._ephemeral_system_prompt).strip()
# Re-read .env and config for fresh credentials (gateway is long-lived,
# keys may change without restart).
try:
@ -1619,9 +1811,6 @@ class GatewayRunner:
except Exception:
pass
# Custom endpoint (OPENAI_*) takes precedence, matching CLI behavior
api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY", "")
base_url = os.getenv("OPENAI_BASE_URL") or os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
try:
@ -1635,24 +1824,22 @@ class GatewayRunner:
model = _model_cfg
elif isinstance(_model_cfg, dict):
model = _model_cfg.get("default", model)
base_url = _model_cfg.get("base_url", base_url)
# Check if provider is nous — resolve OAuth credentials
provider = _model_cfg.get("provider", "") if isinstance(_model_cfg, dict) else ""
if provider == "nous":
try:
from hermes_cli.auth import resolve_nous_runtime_credentials
creds = resolve_nous_runtime_credentials(min_key_ttl_seconds=5 * 60)
api_key = creds.get("api_key", api_key)
base_url = creds.get("base_url", base_url)
except Exception as nous_err:
logger.warning("Nous Portal credential resolution failed: %s", nous_err)
except Exception:
pass
try:
runtime_kwargs = _resolve_runtime_agent_kwargs()
except Exception as exc:
return {
"final_response": f"⚠️ Provider authentication failed: {exc}",
"messages": [],
"api_calls": 0,
"tools": [],
}
agent = AIAgent(
model=model,
api_key=api_key,
base_url=base_url,
**runtime_kwargs,
max_iterations=max_iterations,
quiet_mode=True,
verbose_logging=False,
@ -1662,6 +1849,7 @@ class GatewayRunner:
reasoning_config=self._reasoning_config,
session_id=session_id,
tool_progress_callback=progress_callback if tool_progress_enabled else None,
step_callback=_step_callback_sync if _hooks_ref.loaded_hooks else None,
platform=platform_key,
honcho_session_key=session_key,
session_db=self._session_db,
@ -1714,6 +1902,19 @@ class GatewayRunner:
content = f"[Delivered from {mirror_src}] {content}"
agent_history.append({"role": role, "content": content})
# Collect MEDIA paths already in history so we can exclude them
# from the current turn's extraction. This is compression-safe:
# even if the message list shrinks, we know which paths are old.
_history_media_paths: set = set()
for _hm in agent_history:
if _hm.get("role") in ("tool", "function"):
_hc = _hm.get("content", "")
if "MEDIA:" in _hc:
for _match in re.finditer(r'MEDIA:(\S+)', _hc):
_p = _match.group(1).strip().rstrip('",}')
if _p:
_history_media_paths.add(_p)
result = agent.run_conversation(message, conversation_history=agent_history)
result_holder[0] = result
@ -1734,22 +1935,25 @@ class GatewayRunner:
# doesn't include them. We collect unique tags from tool results and
# append any that aren't already present in the final response, so the
# adapter's extract_media() can find and deliver the files exactly once.
#
# Uses path-based deduplication against _history_media_paths (collected
# before run_conversation) instead of index slicing. This is safe even
# when context compression shrinks the message list. (Fixes #160)
if "MEDIA:" not in final_response:
media_tags = []
has_voice_directive = False
for msg in result.get("messages", []):
if msg.get("role") == "tool" or msg.get("role") == "function":
if msg.get("role") in ("tool", "function"):
content = msg.get("content", "")
if "MEDIA:" in content:
for match in re.finditer(r'MEDIA:(\S+)', content):
path = match.group(1).strip().rstrip('",}')
if path:
if path and path not in _history_media_paths:
media_tags.append(f"MEDIA:{path}")
if "[[audio_as_voice]]" in content:
has_voice_directive = True
if media_tags:
# Deduplicate while preserving order
seen = set()
unique_tags = []
for tag in media_tags:
@ -1934,10 +2138,21 @@ async def start_gateway(config: Optional[GatewayConfig] = None) -> bool:
maxBytes=5 * 1024 * 1024,
backupCount=3,
)
file_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
from agent.redact import RedactingFormatter
file_handler.setFormatter(RedactingFormatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
logging.getLogger().addHandler(file_handler)
logging.getLogger().setLevel(logging.INFO)
# Separate errors-only log for easy debugging
error_handler = RotatingFileHandler(
log_dir / 'errors.log',
maxBytes=2 * 1024 * 1024,
backupCount=2,
)
error_handler.setLevel(logging.WARNING)
error_handler.setFormatter(RedactingFormatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
logging.getLogger().addHandler(error_handler)
runner = GatewayRunner(config)
# Set up signal handlers

View file

@ -10,7 +10,7 @@ Architecture:
- Auth store (auth.json) holds per-provider credential state
- resolve_provider() picks the active provider via priority chain
- resolve_*_runtime_credentials() handles token refresh and key minting
- login_command() / logout_command() are the CLI entry points
- logout_command() is the CLI entry point for clearing auth
"""
from __future__ import annotations
@ -18,7 +18,10 @@ from __future__ import annotations
import json
import logging
import os
import shutil
import stat
import base64
import subprocess
import time
import webbrowser
from contextlib import contextmanager
@ -55,6 +58,10 @@ DEFAULT_NOUS_SCOPE = "inference:mint_agent_key"
DEFAULT_AGENT_KEY_MIN_TTL_SECONDS = 30 * 60 # 30 minutes
ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120 # refresh 2 min before expiry
DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS = 1 # poll at most every 1s
DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
# =============================================================================
@ -84,7 +91,12 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
client_id=DEFAULT_NOUS_CLIENT_ID,
scope=DEFAULT_NOUS_SCOPE,
),
# Future: "openai_codex", "anthropic", etc.
"openai-codex": ProviderConfig(
id="openai-codex",
name="OpenAI Codex",
auth_type="oauth_external",
inference_base_url=DEFAULT_CODEX_BASE_URL,
),
}
@ -115,7 +127,7 @@ def format_auth_error(error: Exception) -> str:
return str(error)
if error.relogin_required:
return f"{error} Run `hermes login` to re-authenticate."
return f"{error} Run `hermes model` to re-authenticate."
if error.code == "subscription_required":
return (
@ -298,12 +310,15 @@ def resolve_provider(
"""
normalized = (requested or "auto").strip().lower()
if normalized in {"openrouter", "custom"}:
return "openrouter"
if normalized in PROVIDER_REGISTRY:
return normalized
if normalized == "openrouter":
return "openrouter"
if normalized != "auto":
return "openrouter"
raise AuthError(
f"Unknown provider '{normalized}'.",
code="invalid_provider",
)
# Explicit one-off CLI creds always mean openrouter/custom
if explicit_api_key or explicit_base_url:
@ -314,8 +329,8 @@ def resolve_provider(
auth_store = _load_auth_store()
active = auth_store.get("active_provider")
if active and active in PROVIDER_REGISTRY:
state = _load_provider_state(auth_store, active)
if state and (state.get("access_token") or state.get("refresh_token")):
status = get_auth_status(active)
if status.get("logged_in"):
return active
except Exception as e:
logger.debug("Could not detect active auth provider: %s", e)
@ -369,6 +384,27 @@ def _optional_base_url(value: Any) -> Optional[str]:
return cleaned if cleaned else None
def _decode_jwt_claims(token: Any) -> Dict[str, Any]:
if not isinstance(token, str) or token.count(".") != 2:
return {}
payload = token.split(".")[1]
payload += "=" * ((4 - len(payload) % 4) % 4)
try:
raw = base64.urlsafe_b64decode(payload.encode("utf-8"))
claims = json.loads(raw.decode("utf-8"))
except Exception:
return {}
return claims if isinstance(claims, dict) else {}
def _codex_access_token_is_expiring(access_token: Any, skew_seconds: int) -> bool:
claims = _decode_jwt_claims(access_token)
exp = claims.get("exp")
if not isinstance(exp, (int, float)):
return False
return float(exp) <= (time.time() + max(0, int(skew_seconds)))
# =============================================================================
# SSH / remote session detection
# =============================================================================
@ -378,6 +414,302 @@ def _is_remote_session() -> bool:
return bool(os.getenv("SSH_CLIENT") or os.getenv("SSH_TTY"))
# =============================================================================
# OpenAI Codex auth file helpers
# =============================================================================
def resolve_codex_home_path() -> Path:
"""Resolve CODEX_HOME, defaulting to ~/.codex."""
codex_home = os.getenv("CODEX_HOME", "").strip()
if not codex_home:
codex_home = str(Path.home() / ".codex")
return Path(codex_home).expanduser()
def _codex_auth_file_path() -> Path:
return resolve_codex_home_path() / "auth.json"
def _codex_auth_lock_path(auth_path: Path) -> Path:
return auth_path.with_suffix(auth_path.suffix + ".lock")
@contextmanager
def _codex_auth_file_lock(
auth_path: Path,
timeout_seconds: float = AUTH_LOCK_TIMEOUT_SECONDS,
):
lock_path = _codex_auth_lock_path(auth_path)
lock_path.parent.mkdir(parents=True, exist_ok=True)
with lock_path.open("a+") as lock_file:
if fcntl is None:
yield
return
deadline = time.time() + max(1.0, timeout_seconds)
while True:
try:
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
break
except BlockingIOError:
if time.time() >= deadline:
raise TimeoutError(f"Timed out waiting for Codex auth lock: {lock_path}")
time.sleep(0.05)
try:
yield
finally:
fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
def read_codex_auth_file() -> Dict[str, Any]:
"""Read and validate Codex auth.json shape."""
codex_home = resolve_codex_home_path()
if not codex_home.exists():
raise AuthError(
f"Codex home directory not found at {codex_home}.",
provider="openai-codex",
code="codex_home_missing",
relogin_required=True,
)
auth_path = codex_home / "auth.json"
if not auth_path.exists():
raise AuthError(
f"Codex auth file not found at {auth_path}.",
provider="openai-codex",
code="codex_auth_missing",
relogin_required=True,
)
try:
payload = json.loads(auth_path.read_text())
except Exception as exc:
raise AuthError(
f"Failed to parse Codex auth file at {auth_path}.",
provider="openai-codex",
code="codex_auth_invalid_json",
relogin_required=True,
) from exc
tokens = payload.get("tokens")
if not isinstance(tokens, dict):
raise AuthError(
"Codex auth file is missing a valid 'tokens' object.",
provider="openai-codex",
code="codex_auth_invalid_shape",
relogin_required=True,
)
access_token = tokens.get("access_token")
refresh_token = tokens.get("refresh_token")
if not isinstance(access_token, str) or not access_token.strip():
raise AuthError(
"Codex auth file is missing tokens.access_token.",
provider="openai-codex",
code="codex_auth_missing_access_token",
relogin_required=True,
)
if not isinstance(refresh_token, str) or not refresh_token.strip():
raise AuthError(
"Codex auth file is missing tokens.refresh_token.",
provider="openai-codex",
code="codex_auth_missing_refresh_token",
relogin_required=True,
)
return {
"payload": payload,
"tokens": tokens,
"auth_path": auth_path,
"codex_home": codex_home,
}
def _persist_codex_auth_payload(
auth_path: Path,
payload: Dict[str, Any],
*,
lock_held: bool = False,
) -> None:
auth_path.parent.mkdir(parents=True, exist_ok=True)
def _write() -> None:
serialized = json.dumps(payload, indent=2, ensure_ascii=False) + "\n"
tmp_path = auth_path.parent / f".{auth_path.name}.{os.getpid()}.{time.time_ns()}.tmp"
try:
with tmp_path.open("w", encoding="utf-8") as tmp_file:
tmp_file.write(serialized)
tmp_file.flush()
os.fsync(tmp_file.fileno())
os.replace(tmp_path, auth_path)
finally:
if tmp_path.exists():
try:
tmp_path.unlink()
except OSError:
pass
try:
auth_path.chmod(stat.S_IRUSR | stat.S_IWUSR)
except OSError:
pass
if lock_held:
_write()
return
with _codex_auth_file_lock(auth_path):
_write()
def _refresh_codex_auth_tokens(
*,
payload: Dict[str, Any],
auth_path: Path,
timeout_seconds: float,
lock_held: bool = False,
) -> Dict[str, Any]:
tokens = payload.get("tokens")
if not isinstance(tokens, dict):
raise AuthError(
"Codex auth file is missing a valid 'tokens' object.",
provider="openai-codex",
code="codex_auth_invalid_shape",
relogin_required=True,
)
refresh_token = tokens.get("refresh_token")
if not isinstance(refresh_token, str) or not refresh_token.strip():
raise AuthError(
"Codex auth file is missing tokens.refresh_token.",
provider="openai-codex",
code="codex_auth_missing_refresh_token",
relogin_required=True,
)
timeout = httpx.Timeout(max(5.0, float(timeout_seconds)))
with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}) as client:
response = client.post(
CODEX_OAUTH_TOKEN_URL,
headers={"Content-Type": "application/x-www-form-urlencoded"},
data={
"grant_type": "refresh_token",
"refresh_token": refresh_token,
"client_id": CODEX_OAUTH_CLIENT_ID,
},
)
if response.status_code != 200:
code = "codex_refresh_failed"
message = f"Codex token refresh failed with status {response.status_code}."
relogin_required = False
try:
err = response.json()
if isinstance(err, dict):
err_code = err.get("error")
if isinstance(err_code, str) and err_code.strip():
code = err_code.strip()
err_desc = err.get("error_description") or err.get("message")
if isinstance(err_desc, str) and err_desc.strip():
message = f"Codex token refresh failed: {err_desc.strip()}"
except Exception:
pass
if code in {"invalid_grant", "invalid_token", "invalid_request"}:
relogin_required = True
raise AuthError(
message,
provider="openai-codex",
code=code,
relogin_required=relogin_required,
)
try:
refresh_payload = response.json()
except Exception as exc:
raise AuthError(
"Codex token refresh returned invalid JSON.",
provider="openai-codex",
code="codex_refresh_invalid_json",
relogin_required=True,
) from exc
access_token = refresh_payload.get("access_token")
if not isinstance(access_token, str) or not access_token.strip():
raise AuthError(
"Codex token refresh response was missing access_token.",
provider="openai-codex",
code="codex_refresh_missing_access_token",
relogin_required=True,
)
updated_tokens = dict(tokens)
updated_tokens["access_token"] = access_token.strip()
next_refresh = refresh_payload.get("refresh_token")
if isinstance(next_refresh, str) and next_refresh.strip():
updated_tokens["refresh_token"] = next_refresh.strip()
payload["tokens"] = updated_tokens
payload["last_refresh"] = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
_persist_codex_auth_payload(auth_path, payload, lock_held=lock_held)
return updated_tokens
def resolve_codex_runtime_credentials(
*,
force_refresh: bool = False,
refresh_if_expiring: bool = True,
refresh_skew_seconds: int = CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
) -> Dict[str, Any]:
"""Resolve runtime credentials from Codex CLI auth state."""
data = read_codex_auth_file()
payload = data["payload"]
tokens = dict(data["tokens"])
auth_path = data["auth_path"]
access_token = str(tokens.get("access_token", "") or "").strip()
refresh_timeout_seconds = float(os.getenv("HERMES_CODEX_REFRESH_TIMEOUT_SECONDS", "20"))
should_refresh = bool(force_refresh)
if (not should_refresh) and refresh_if_expiring:
should_refresh = _codex_access_token_is_expiring(access_token, refresh_skew_seconds)
if should_refresh:
lock_timeout = max(float(AUTH_LOCK_TIMEOUT_SECONDS), refresh_timeout_seconds + 5.0)
with _codex_auth_file_lock(auth_path, timeout_seconds=lock_timeout):
data = read_codex_auth_file()
payload = data["payload"]
tokens = dict(data["tokens"])
access_token = str(tokens.get("access_token", "") or "").strip()
should_refresh = bool(force_refresh)
if (not should_refresh) and refresh_if_expiring:
should_refresh = _codex_access_token_is_expiring(access_token, refresh_skew_seconds)
if should_refresh:
tokens = _refresh_codex_auth_tokens(
payload=payload,
auth_path=auth_path,
timeout_seconds=refresh_timeout_seconds,
lock_held=True,
)
access_token = str(tokens.get("access_token", "") or "").strip()
base_url = (
os.getenv("HERMES_CODEX_BASE_URL", "").strip().rstrip("/")
or DEFAULT_CODEX_BASE_URL
)
return {
"provider": "openai-codex",
"base_url": base_url,
"api_key": access_token,
"source": "codex-auth-json",
"last_refresh": payload.get("last_refresh"),
"auth_mode": payload.get("auth_mode"),
"auth_file": str(auth_path),
"codex_home": str(data["codex_home"]),
}
# =============================================================================
# TLS verification helper
# =============================================================================
@ -806,14 +1138,73 @@ def get_nous_auth_status() -> Dict[str, Any]:
}
def get_codex_auth_status() -> Dict[str, Any]:
"""Status snapshot for Codex auth."""
state = get_provider_auth_state("openai-codex") or {}
auth_file = state.get("auth_file") or str(_codex_auth_file_path())
codex_home = state.get("codex_home") or str(resolve_codex_home_path())
try:
creds = resolve_codex_runtime_credentials()
return {
"logged_in": True,
"auth_file": creds.get("auth_file"),
"codex_home": creds.get("codex_home"),
"last_refresh": creds.get("last_refresh"),
"auth_mode": creds.get("auth_mode"),
"source": creds.get("source"),
}
except AuthError as exc:
return {
"logged_in": False,
"auth_file": auth_file,
"codex_home": codex_home,
"error": str(exc),
}
def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
"""Generic auth status dispatcher."""
target = provider_id or get_active_provider()
if target == "nous":
return get_nous_auth_status()
if target == "openai-codex":
return get_codex_auth_status()
return {"logged_in": False}
# =============================================================================
# External credential detection
# =============================================================================
def detect_external_credentials() -> List[Dict[str, Any]]:
"""Scan for credentials from other CLI tools that Hermes can reuse.
Returns a list of dicts, each with:
- provider: str -- Hermes provider id (e.g. "openai-codex")
- path: str -- filesystem path where creds were found
- label: str -- human-friendly description for the setup UI
"""
found: List[Dict[str, Any]] = []
# Codex CLI: ~/.codex/auth.json (or $CODEX_HOME/auth.json)
try:
codex_home = resolve_codex_home_path()
codex_auth = codex_home / "auth.json"
if codex_auth.is_file():
data = json.loads(codex_auth.read_text())
tokens = data.get("tokens", {})
if isinstance(tokens, dict) and tokens.get("access_token"):
found.append({
"provider": "openai-codex",
"path": str(codex_auth),
"label": f"Codex CLI credentials found ({codex_auth})",
})
except Exception:
pass
return found
# =============================================================================
# CLI Commands — login / logout
# =============================================================================
@ -970,21 +1361,218 @@ def _save_model_choice(model_id: str) -> None:
def login_command(args) -> None:
"""Run OAuth device code login for the selected provider."""
provider_id = getattr(args, "provider", None) or "nous"
"""Deprecated: use 'hermes model' or 'hermes setup' instead."""
print("The 'hermes login' command has been removed.")
print("Use 'hermes model' to select a provider and model,")
print("or 'hermes setup' for full interactive setup.")
raise SystemExit(0)
if provider_id not in PROVIDER_REGISTRY:
print(f"Unknown provider: {provider_id}")
print(f"Available: {', '.join(PROVIDER_REGISTRY.keys())}")
raise SystemExit(1)
pconfig = PROVIDER_REGISTRY[provider_id]
def _login_openai_codex(args, pconfig: ProviderConfig) -> None:
"""OpenAI Codex login via device code flow (no Codex CLI required)."""
codex_home = resolve_codex_home_path()
if provider_id == "nous":
_login_nous(args, pconfig)
else:
print(f"Login for provider '{provider_id}' is not yet implemented.")
raise SystemExit(1)
# Check for existing valid credentials first
try:
existing = resolve_codex_runtime_credentials()
print(f"Existing Codex credentials found at {codex_home / 'auth.json'}")
try:
reuse = input("Use existing credentials? [Y/n]: ").strip().lower()
except (EOFError, KeyboardInterrupt):
reuse = "y"
if reuse in ("", "y", "yes"):
creds = existing
_save_codex_provider_state(creds)
return
except AuthError:
pass
# No existing creds (or user declined) -- run device code flow
print()
print("Signing in to OpenAI Codex...")
print()
creds = _codex_device_code_login()
_save_codex_provider_state(creds)
def _save_codex_provider_state(creds: Dict[str, Any]) -> None:
"""Persist Codex provider state to auth store and config."""
auth_state = {
"auth_file": creds.get("auth_file"),
"codex_home": creds.get("codex_home"),
"last_refresh": creds.get("last_refresh"),
"auth_mode": creds.get("auth_mode"),
"source": creds.get("source"),
}
with _auth_store_lock():
auth_store = _load_auth_store()
_save_provider_state(auth_store, "openai-codex", auth_state)
saved_to = _save_auth_store(auth_store)
config_path = _update_config_for_provider("openai-codex", creds.get("base_url", DEFAULT_CODEX_BASE_URL))
print()
print("Login successful!")
print(f" Auth state: {saved_to}")
print(f" Config updated: {config_path} (model.provider=openai-codex)")
def _codex_device_code_login() -> Dict[str, Any]:
"""Run the OpenAI device code login flow and return credentials dict."""
import time as _time
issuer = "https://auth.openai.com"
client_id = CODEX_OAUTH_CLIENT_ID
# Step 1: Request device code
try:
with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
resp = client.post(
f"{issuer}/api/accounts/deviceauth/usercode",
json={"client_id": client_id},
headers={"Content-Type": "application/json"},
)
except Exception as exc:
raise AuthError(
f"Failed to request device code: {exc}",
provider="openai-codex", code="device_code_request_failed",
)
if resp.status_code != 200:
raise AuthError(
f"Device code request returned status {resp.status_code}.",
provider="openai-codex", code="device_code_request_error",
)
device_data = resp.json()
user_code = device_data.get("user_code", "")
device_auth_id = device_data.get("device_auth_id", "")
poll_interval = max(3, int(device_data.get("interval", "5")))
if not user_code or not device_auth_id:
raise AuthError(
"Device code response missing required fields.",
provider="openai-codex", code="device_code_incomplete",
)
# Step 2: Show user the code
print("To continue, follow these steps:\n")
print(f" 1. Open this URL in your browser:")
print(f" \033[94m{issuer}/codex/device\033[0m\n")
print(f" 2. Enter this code:")
print(f" \033[94m{user_code}\033[0m\n")
print("Waiting for sign-in... (press Ctrl+C to cancel)")
# Step 3: Poll for authorization code
max_wait = 15 * 60 # 15 minutes
start = _time.monotonic()
code_resp = None
try:
with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
while _time.monotonic() - start < max_wait:
_time.sleep(poll_interval)
poll_resp = client.post(
f"{issuer}/api/accounts/deviceauth/token",
json={"device_auth_id": device_auth_id, "user_code": user_code},
headers={"Content-Type": "application/json"},
)
if poll_resp.status_code == 200:
code_resp = poll_resp.json()
break
elif poll_resp.status_code in (403, 404):
continue # User hasn't completed login yet
else:
raise AuthError(
f"Device auth polling returned status {poll_resp.status_code}.",
provider="openai-codex", code="device_code_poll_error",
)
except KeyboardInterrupt:
print("\nLogin cancelled.")
raise SystemExit(130)
if code_resp is None:
raise AuthError(
"Login timed out after 15 minutes.",
provider="openai-codex", code="device_code_timeout",
)
# Step 4: Exchange authorization code for tokens
authorization_code = code_resp.get("authorization_code", "")
code_verifier = code_resp.get("code_verifier", "")
redirect_uri = f"{issuer}/deviceauth/callback"
if not authorization_code or not code_verifier:
raise AuthError(
"Device auth response missing authorization_code or code_verifier.",
provider="openai-codex", code="device_code_incomplete_exchange",
)
try:
with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
token_resp = client.post(
CODEX_OAUTH_TOKEN_URL,
data={
"grant_type": "authorization_code",
"code": authorization_code,
"redirect_uri": redirect_uri,
"client_id": client_id,
"code_verifier": code_verifier,
},
headers={"Content-Type": "application/x-www-form-urlencoded"},
)
except Exception as exc:
raise AuthError(
f"Token exchange failed: {exc}",
provider="openai-codex", code="token_exchange_failed",
)
if token_resp.status_code != 200:
raise AuthError(
f"Token exchange returned status {token_resp.status_code}.",
provider="openai-codex", code="token_exchange_error",
)
tokens = token_resp.json()
access_token = tokens.get("access_token", "")
refresh_token = tokens.get("refresh_token", "")
if not access_token:
raise AuthError(
"Token exchange did not return an access_token.",
provider="openai-codex", code="token_exchange_no_access_token",
)
# Step 5: Persist tokens to ~/.codex/auth.json
codex_home = resolve_codex_home_path()
codex_home.mkdir(parents=True, exist_ok=True)
auth_path = codex_home / "auth.json"
payload = {
"tokens": {
"access_token": access_token,
"refresh_token": refresh_token,
},
"last_refresh": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
}
_persist_codex_auth_payload(auth_path, payload, lock_held=False)
base_url = (
os.getenv("HERMES_CODEX_BASE_URL", "").strip().rstrip("/")
or DEFAULT_CODEX_BASE_URL
)
return {
"api_key": access_token,
"base_url": base_url,
"auth_file": str(auth_path),
"codex_home": str(codex_home),
"last_refresh": payload["last_refresh"],
"auth_mode": "chatgpt",
"source": "device-code",
}
def _login_nous(args, pconfig: ProviderConfig) -> None:
@ -1168,6 +1756,6 @@ def logout_command(args) -> None:
if os.getenv("OPENROUTER_API_KEY"):
print("Hermes will use OpenRouter for inference.")
else:
print("Run `hermes login` or configure an API key to use Hermes.")
print("Run `hermes model` or configure an API key to use Hermes.")
else:
print(f"No auth state found for {provider_name}.")

144
hermes_cli/codex_models.py Normal file
View file

@ -0,0 +1,144 @@
"""Codex model discovery from API, local cache, and config."""
from __future__ import annotations
import json
import logging
from pathlib import Path
from typing import List, Optional
from hermes_cli.auth import resolve_codex_home_path
logger = logging.getLogger(__name__)
DEFAULT_CODEX_MODELS: List[str] = [
"gpt-5.3-codex",
"gpt-5.2-codex",
"gpt-5.1-codex-max",
"gpt-5.1-codex-mini",
]
def _fetch_models_from_api(access_token: str) -> List[str]:
"""Fetch available models from the Codex API. Returns visible models sorted by priority."""
try:
import httpx
resp = httpx.get(
"https://chatgpt.com/backend-api/codex/models?client_version=1.0.0",
headers={"Authorization": f"Bearer {access_token}"},
timeout=10,
)
if resp.status_code != 200:
return []
data = resp.json()
entries = data.get("models", []) if isinstance(data, dict) else []
except Exception as exc:
logger.debug("Failed to fetch Codex models from API: %s", exc)
return []
sortable = []
for item in entries:
if not isinstance(item, dict):
continue
slug = item.get("slug")
if not isinstance(slug, str) or not slug.strip():
continue
slug = slug.strip()
if item.get("supported_in_api") is False:
continue
visibility = item.get("visibility", "")
if isinstance(visibility, str) and visibility.strip().lower() == "hide":
continue
priority = item.get("priority")
rank = int(priority) if isinstance(priority, (int, float)) else 10_000
sortable.append((rank, slug))
sortable.sort(key=lambda x: (x[0], x[1]))
return [slug for _, slug in sortable]
def _read_default_model(codex_home: Path) -> Optional[str]:
config_path = codex_home / "config.toml"
if not config_path.exists():
return None
try:
import tomllib
except Exception:
return None
try:
payload = tomllib.loads(config_path.read_text(encoding="utf-8"))
except Exception:
return None
model = payload.get("model") if isinstance(payload, dict) else None
if isinstance(model, str) and model.strip():
return model.strip()
return None
def _read_cache_models(codex_home: Path) -> List[str]:
cache_path = codex_home / "models_cache.json"
if not cache_path.exists():
return []
try:
raw = json.loads(cache_path.read_text(encoding="utf-8"))
except Exception:
return []
entries = raw.get("models") if isinstance(raw, dict) else None
sortable = []
if isinstance(entries, list):
for item in entries:
if not isinstance(item, dict):
continue
slug = item.get("slug")
if not isinstance(slug, str) or not slug.strip():
continue
slug = slug.strip()
if "codex" not in slug.lower():
continue
if item.get("supported_in_api") is False:
continue
visibility = item.get("visibility")
if isinstance(visibility, str) and visibility.strip().lower() == "hidden":
continue
priority = item.get("priority")
rank = int(priority) if isinstance(priority, (int, float)) else 10_000
sortable.append((rank, slug))
sortable.sort(key=lambda item: (item[0], item[1]))
deduped: List[str] = []
for _, slug in sortable:
if slug not in deduped:
deduped.append(slug)
return deduped
def get_codex_model_ids(access_token: Optional[str] = None) -> List[str]:
"""Return available Codex model IDs, trying API first, then local sources.
Resolution order: API (live, if token provided) > config.toml default >
local cache > hardcoded defaults.
"""
codex_home = resolve_codex_home_path()
ordered: List[str] = []
# Try live API if we have a token
if access_token:
api_models = _fetch_models_from_api(access_token)
if api_models:
return api_models
# Fall back to local sources
default_model = _read_default_model(codex_home)
if default_model:
ordered.append(default_model)
for model_id in _read_cache_models(codex_home):
if model_id not in ordered:
ordered.append(model_id)
for model_id in DEFAULT_CODEX_MODELS:
if model_id not in ordered:
ordered.append(model_id)
return ordered

View file

@ -26,6 +26,8 @@ COMMANDS = {
"/skills": "Search, install, inspect, or manage skills from online registries",
"/platforms": "Show gateway/messaging platform status",
"/verbose": "Cycle tool progress display: off → new → all → verbose",
"/compress": "Manually compress conversation context (flush memories + summarize)",
"/usage": "Show token usage for the current session",
"/quit": "Exit the CLI (also: /exit, /q)",
}

View file

@ -175,6 +175,36 @@ def run_doctor(args):
else:
check_warn("config.yaml not found", "(using defaults)")
# =========================================================================
# Check: Auth providers
# =========================================================================
print()
print(color("◆ Auth Providers", Colors.CYAN, Colors.BOLD))
try:
from hermes_cli.auth import get_nous_auth_status, get_codex_auth_status
nous_status = get_nous_auth_status()
if nous_status.get("logged_in"):
check_ok("Nous Portal auth", "(logged in)")
else:
check_warn("Nous Portal auth", "(not logged in)")
codex_status = get_codex_auth_status()
if codex_status.get("logged_in"):
check_ok("OpenAI Codex auth", "(logged in)")
else:
check_warn("OpenAI Codex auth", "(not logged in)")
if codex_status.get("error"):
check_info(codex_status["error"])
except Exception as e:
check_warn("Auth provider status", f"(could not check: {e})")
if shutil.which("codex"):
check_ok("codex CLI")
else:
check_warn("codex CLI not found", "(required for openai-codex login)")
# =========================================================================
# Check: Directory structure
# =========================================================================

View file

@ -12,7 +12,6 @@ Usage:
hermes gateway install # Install gateway service
hermes gateway uninstall # Uninstall gateway service
hermes setup # Interactive setup wizard
hermes login # Authenticate with Nous Portal (or other providers)
hermes logout # Clear stored authentication
hermes status # Show status of all components
hermes cron # Manage cron jobs
@ -60,6 +59,7 @@ logger = logging.getLogger(__name__)
def _has_any_provider_configured() -> bool:
"""Check if at least one inference provider is usable."""
from hermes_cli.config import get_env_path, get_hermes_home
from hermes_cli.auth import get_auth_status
# Check env vars (may be set by .env or shell).
# OPENAI_BASE_URL alone counts — local models (vLLM, llama.cpp, etc.)
@ -91,8 +91,8 @@ def _has_any_provider_configured() -> bool:
auth = json.loads(auth_file.read_text())
active = auth.get("active_provider")
if active:
state = auth.get("providers", {}).get(active, {})
if state.get("access_token") or state.get("refresh_token"):
status = get_auth_status(active)
if status.get("logged_in"):
return True
except Exception:
pass
@ -289,7 +289,7 @@ def cmd_model(args):
resolve_provider, get_provider_auth_state, PROVIDER_REGISTRY,
_prompt_model_selection, _save_model_choice, _update_config_for_provider,
resolve_nous_runtime_credentials, fetch_nous_models, AuthError, format_auth_error,
_login_nous, ProviderConfig,
_login_nous,
)
from hermes_cli.config import load_config, save_config, get_env_value, save_env_value
@ -312,7 +312,12 @@ def cmd_model(args):
or config_provider
or "auto"
)
active = resolve_provider(effective_provider)
try:
active = resolve_provider(effective_provider)
except AuthError as exc:
warning = format_auth_error(exc)
print(f"Warning: {warning} Falling back to auto provider detection.")
active = resolve_provider("auto")
# Detect custom endpoint
if active == "openrouter" and get_env_value("OPENAI_BASE_URL"):
@ -321,6 +326,7 @@ def cmd_model(args):
provider_labels = {
"openrouter": "OpenRouter",
"nous": "Nous Portal",
"openai-codex": "OpenAI Codex",
"custom": "Custom endpoint",
}
active_label = provider_labels.get(active, active)
@ -334,11 +340,12 @@ def cmd_model(args):
providers = [
("openrouter", "OpenRouter (100+ models, pay-per-use)"),
("nous", "Nous Portal (Nous Research subscription)"),
("openai-codex", "OpenAI Codex"),
("custom", "Custom endpoint (self-hosted / VLLM / etc.)"),
]
# Reorder so the active provider is at the top
active_key = active if active in ("openrouter", "nous") else "custom"
active_key = active if active in ("openrouter", "nous", "openai-codex") else "custom"
ordered = []
for key, label in providers:
if key == active_key:
@ -359,6 +366,8 @@ def cmd_model(args):
_model_flow_openrouter(config, current_model)
elif selected_provider == "nous":
_model_flow_nous(config, current_model)
elif selected_provider == "openai-codex":
_model_flow_openai_codex(config, current_model)
elif selected_provider == "custom":
_model_flow_custom(config)
@ -512,6 +521,53 @@ def _model_flow_nous(config, current_model=""):
print("No change.")
def _model_flow_openai_codex(config, current_model=""):
"""OpenAI Codex provider: ensure logged in, then pick model."""
from hermes_cli.auth import (
get_codex_auth_status, _prompt_model_selection, _save_model_choice,
_update_config_for_provider, _login_openai_codex,
PROVIDER_REGISTRY, DEFAULT_CODEX_BASE_URL,
)
from hermes_cli.codex_models import get_codex_model_ids
from hermes_cli.config import get_env_value, save_env_value
import argparse
status = get_codex_auth_status()
if not status.get("logged_in"):
print("Not logged into OpenAI Codex. Starting login...")
print()
try:
mock_args = argparse.Namespace()
_login_openai_codex(mock_args, PROVIDER_REGISTRY["openai-codex"])
except SystemExit:
print("Login cancelled or failed.")
return
except Exception as exc:
print(f"Login failed: {exc}")
return
_codex_token = None
try:
from hermes_cli.auth import resolve_codex_runtime_credentials
_codex_creds = resolve_codex_runtime_credentials()
_codex_token = _codex_creds.get("api_key")
except Exception:
pass
codex_models = get_codex_model_ids(access_token=_codex_token)
selected = _prompt_model_selection(codex_models, current_model=current_model)
if selected:
_save_model_choice(selected)
_update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
# Clear custom endpoint env vars that would otherwise override Codex.
if get_env_value("OPENAI_BASE_URL"):
save_env_value("OPENAI_BASE_URL", "")
save_env_value("OPENAI_API_KEY", "")
print(f"Default model set to: {selected} (via OpenAI Codex)")
else:
print("No change.")
def _model_flow_custom(config):
"""Custom endpoint: collect URL, API key, and model name."""
from hermes_cli.auth import _save_model_choice, deactivate_provider
@ -777,8 +833,8 @@ def cmd_update(args):
pass # No systemd (macOS, WSL1, etc.) — skip silently
print()
print("Tip: You can now log in with Nous Portal for inference:")
print(" hermes login # Authenticate with Nous Portal")
print("Tip: You can now select a provider and model:")
print(" hermes model # Select provider and model")
except subprocess.CalledProcessError as e:
print(f"✗ Update failed: {e}")
@ -798,7 +854,6 @@ Examples:
hermes --continue Resume the most recent session
hermes --resume <session_id> Resume a specific session
hermes setup Run setup wizard
hermes login Authenticate with an inference provider
hermes logout Clear stored authentication
hermes model Select default model
hermes config View configuration
@ -857,7 +912,7 @@ For more help on a command:
)
chat_parser.add_argument(
"--provider",
choices=["auto", "openrouter", "nous"],
choices=["auto", "openrouter", "nous", "openai-codex"],
default=None,
help="Inference provider (default: auto)"
)
@ -966,9 +1021,9 @@ For more help on a command:
)
login_parser.add_argument(
"--provider",
choices=["nous"],
choices=["nous", "openai-codex"],
default=None,
help="Provider to authenticate with (default: interactive selection)"
help="Provider to authenticate with (default: nous)"
)
login_parser.add_argument(
"--portal-url",
@ -1020,7 +1075,7 @@ For more help on a command:
)
logout_parser.add_argument(
"--provider",
choices=["nous"],
choices=["nous", "openai-codex"],
default=None,
help="Provider to log out from (default: active provider)"
)

View file

@ -0,0 +1,149 @@
"""Shared runtime provider resolution for CLI, gateway, cron, and helpers."""
from __future__ import annotations
import os
from typing import Any, Dict, Optional
from hermes_cli.auth import (
AuthError,
format_auth_error,
resolve_provider,
resolve_nous_runtime_credentials,
resolve_codex_runtime_credentials,
)
from hermes_cli.config import load_config
from hermes_constants import OPENROUTER_BASE_URL
def _get_model_config() -> Dict[str, Any]:
config = load_config()
model_cfg = config.get("model")
if isinstance(model_cfg, dict):
return dict(model_cfg)
if isinstance(model_cfg, str) and model_cfg.strip():
return {"default": model_cfg.strip()}
return {}
def resolve_requested_provider(requested: Optional[str] = None) -> str:
"""Resolve provider request from explicit arg, env, then config."""
if requested and requested.strip():
return requested.strip().lower()
env_provider = os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
if env_provider:
return env_provider
model_cfg = _get_model_config()
cfg_provider = model_cfg.get("provider")
if isinstance(cfg_provider, str) and cfg_provider.strip():
return cfg_provider.strip().lower()
return "auto"
def _resolve_openrouter_runtime(
*,
requested_provider: str,
explicit_api_key: Optional[str] = None,
explicit_base_url: Optional[str] = None,
) -> Dict[str, Any]:
model_cfg = _get_model_config()
cfg_base_url = model_cfg.get("base_url") if isinstance(model_cfg.get("base_url"), str) else ""
cfg_provider = model_cfg.get("provider") if isinstance(model_cfg.get("provider"), str) else ""
requested_norm = (requested_provider or "").strip().lower()
cfg_provider = cfg_provider.strip().lower()
env_openai_base_url = os.getenv("OPENAI_BASE_URL", "").strip()
env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip()
use_config_base_url = False
if requested_norm == "auto":
if cfg_base_url.strip() and not explicit_base_url and not env_openai_base_url:
if not cfg_provider or cfg_provider == "auto":
use_config_base_url = True
base_url = (
(explicit_base_url or "").strip()
or env_openai_base_url
or (cfg_base_url.strip() if use_config_base_url else "")
or env_openrouter_base_url
or OPENROUTER_BASE_URL
).rstrip("/")
api_key = (
explicit_api_key
or os.getenv("OPENAI_API_KEY")
or os.getenv("OPENROUTER_API_KEY")
or ""
)
source = "explicit" if (explicit_api_key or explicit_base_url) else "env/config"
return {
"provider": "openrouter",
"api_mode": "chat_completions",
"base_url": base_url,
"api_key": api_key,
"source": source,
}
def resolve_runtime_provider(
*,
requested: Optional[str] = None,
explicit_api_key: Optional[str] = None,
explicit_base_url: Optional[str] = None,
) -> Dict[str, Any]:
"""Resolve runtime provider credentials for agent execution."""
requested_provider = resolve_requested_provider(requested)
provider = resolve_provider(
requested_provider,
explicit_api_key=explicit_api_key,
explicit_base_url=explicit_base_url,
)
if provider == "nous":
creds = resolve_nous_runtime_credentials(
min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
)
return {
"provider": "nous",
"api_mode": "chat_completions",
"base_url": creds.get("base_url", "").rstrip("/"),
"api_key": creds.get("api_key", ""),
"source": creds.get("source", "portal"),
"expires_at": creds.get("expires_at"),
"requested_provider": requested_provider,
}
if provider == "openai-codex":
creds = resolve_codex_runtime_credentials()
return {
"provider": "openai-codex",
"api_mode": "codex_responses",
"base_url": creds.get("base_url", "").rstrip("/"),
"api_key": creds.get("api_key", ""),
"source": creds.get("source", "codex-auth-json"),
"auth_file": creds.get("auth_file"),
"codex_home": creds.get("codex_home"),
"last_refresh": creds.get("last_refresh"),
"requested_provider": requested_provider,
}
runtime = _resolve_openrouter_runtime(
requested_provider=requested_provider,
explicit_api_key=explicit_api_key,
explicit_base_url=explicit_base_url,
)
runtime["requested_provider"] = requested_provider
return runtime
def format_runtime_provider_error(error: Exception) -> str:
if isinstance(error, AuthError):
return format_auth_error(error)
return str(error)

View file

@ -620,11 +620,24 @@ def run_setup_wizard(args):
get_active_provider, get_provider_auth_state, PROVIDER_REGISTRY,
format_auth_error, AuthError, fetch_nous_models,
resolve_nous_runtime_credentials, _update_config_for_provider,
_login_openai_codex, get_codex_auth_status, DEFAULT_CODEX_BASE_URL,
detect_external_credentials,
)
existing_custom = get_env_value("OPENAI_BASE_URL")
existing_or = get_env_value("OPENROUTER_API_KEY")
active_oauth = get_active_provider()
# Detect credentials from other CLI tools
detected_creds = detect_external_credentials()
if detected_creds:
print_info("Detected existing credentials:")
for cred in detected_creds:
if cred["provider"] == "openai-codex":
print_success(f" * {cred['label']} -- select \"OpenAI Codex\" to use it")
else:
print_info(f" * {cred['label']}")
print()
# Detect if any provider is already configured
has_any_provider = bool(active_oauth or existing_custom or existing_or)
@ -640,6 +653,7 @@ def run_setup_wizard(args):
provider_choices = [
"Login with Nous Portal (Nous Research subscription)",
"Login with OpenAI Codex",
"OpenRouter API key (100+ models, pay-per-use)",
"Custom OpenAI-compatible endpoint (self-hosted / VLLM / etc.)",
]
@ -647,7 +661,7 @@ def run_setup_wizard(args):
provider_choices.append(keep_label)
# Default to "Keep current" if a provider exists, otherwise OpenRouter (most common)
default_provider = len(provider_choices) - 1 if has_any_provider else 1
default_provider = len(provider_choices) - 1 if has_any_provider else 2
if not has_any_provider:
print_warning("An inference provider is required for Hermes to work.")
@ -656,7 +670,7 @@ def run_setup_wizard(args):
provider_idx = prompt_choice("Select your inference provider:", provider_choices, default_provider)
# Track which provider was selected for model step
selected_provider = None # "nous", "openrouter", "custom", or None (keep)
selected_provider = None # "nous", "openai-codex", "openrouter", "custom", or None (keep)
nous_models = [] # populated if Nous login succeeds
if provider_idx == 0: # Nous Portal
@ -692,14 +706,38 @@ def run_setup_wizard(args):
except SystemExit:
print_warning("Nous Portal login was cancelled or failed.")
print_info("You can try again later with: hermes login")
print_info("You can try again later with: hermes model")
selected_provider = None
except Exception as e:
print_error(f"Login failed: {e}")
print_info("You can try again later with: hermes login")
print_info("You can try again later with: hermes model")
selected_provider = None
elif provider_idx == 1: # OpenRouter
elif provider_idx == 1: # OpenAI Codex
selected_provider = "openai-codex"
print()
print_header("OpenAI Codex Login")
print()
try:
import argparse
mock_args = argparse.Namespace()
_login_openai_codex(mock_args, PROVIDER_REGISTRY["openai-codex"])
# Clear custom endpoint vars that would override provider routing.
if existing_custom:
save_env_value("OPENAI_BASE_URL", "")
save_env_value("OPENAI_API_KEY", "")
_update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
except SystemExit:
print_warning("OpenAI Codex login was cancelled or failed.")
print_info("You can try again later with: hermes model")
selected_provider = None
except Exception as e:
print_error(f"Login failed: {e}")
print_info("You can try again later with: hermes model")
selected_provider = None
elif provider_idx == 2: # OpenRouter
selected_provider = "openrouter"
print()
print_header("OpenRouter API Key")
@ -726,7 +764,7 @@ def run_setup_wizard(args):
save_env_value("OPENAI_BASE_URL", "")
save_env_value("OPENAI_API_KEY", "")
elif provider_idx == 2: # Custom endpoint
elif provider_idx == 3: # Custom endpoint
selected_provider = "custom"
print()
print_header("Custom OpenAI-Compatible Endpoint")
@ -753,14 +791,14 @@ def run_setup_wizard(args):
config['model'] = model_name
save_env_value("LLM_MODEL", model_name)
print_success("Custom endpoint configured")
# else: provider_idx == 3 (Keep current) — only shown when a provider already exists
# else: provider_idx == 4 (Keep current) — only shown when a provider already exists
# =========================================================================
# Step 1b: OpenRouter API Key for tools (if not already set)
# =========================================================================
# Tools (vision, web, MoA) use OpenRouter independently of the main provider.
# Prompt for OpenRouter key if not set and a non-OpenRouter provider was chosen.
if selected_provider in ("nous", "custom") and not get_env_value("OPENROUTER_API_KEY"):
if selected_provider in ("nous", "openai-codex", "custom") and not get_env_value("OPENROUTER_API_KEY"):
print()
print_header("OpenRouter API Key (for tools)")
print_info("Tools like vision analysis, web search, and MoA use OpenRouter")
@ -806,6 +844,33 @@ def run_setup_wizard(args):
config['model'] = custom
save_env_value("LLM_MODEL", custom)
# else: keep current
elif selected_provider == "openai-codex":
from hermes_cli.codex_models import get_codex_model_ids
# Try to get the access token for live model discovery
_codex_token = None
try:
from hermes_cli.auth import resolve_codex_runtime_credentials
_codex_creds = resolve_codex_runtime_credentials()
_codex_token = _codex_creds.get("api_key")
except Exception:
pass
codex_models = get_codex_model_ids(access_token=_codex_token)
model_choices = [f"{m}" for m in codex_models]
model_choices.append("Custom model")
model_choices.append(f"Keep current ({current_model})")
keep_idx = len(model_choices) - 1
model_idx = prompt_choice("Select default model:", model_choices, keep_idx)
if model_idx < len(codex_models):
config['model'] = codex_models[model_idx]
save_env_value("LLM_MODEL", codex_models[model_idx])
elif model_idx == len(codex_models):
custom = prompt("Enter model name")
if custom:
config['model'] = custom
save_env_value("LLM_MODEL", custom)
_update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
else:
# Static list for OpenRouter / fallback (from canonical list)
from hermes_cli.models import model_ids, menu_labels

View file

@ -101,15 +101,17 @@ def show_status(args):
print(color("◆ Auth Providers", Colors.CYAN, Colors.BOLD))
try:
from hermes_cli.auth import get_nous_auth_status
from hermes_cli.auth import get_nous_auth_status, get_codex_auth_status
nous_status = get_nous_auth_status()
codex_status = get_codex_auth_status()
except Exception:
nous_status = {}
codex_status = {}
nous_logged_in = bool(nous_status.get("logged_in"))
print(
f" {'Nous Portal':<12} {check_mark(nous_logged_in)} "
f"{'logged in' if nous_logged_in else 'not logged in (run: hermes login)'}"
f"{'logged in' if nous_logged_in else 'not logged in (run: hermes model)'}"
)
if nous_logged_in:
portal_url = nous_status.get("portal_base_url") or "(unknown)"
@ -121,6 +123,20 @@ def show_status(args):
print(f" Key exp: {key_exp}")
print(f" Refresh: {refresh_label}")
codex_logged_in = bool(codex_status.get("logged_in"))
print(
f" {'OpenAI Codex':<12} {check_mark(codex_logged_in)} "
f"{'logged in' if codex_logged_in else 'not logged in (run: hermes model)'}"
)
codex_auth_file = codex_status.get("auth_file")
if codex_auth_file:
print(f" Auth file: {codex_auth_file}")
codex_last_refresh = _format_iso_timestamp(codex_status.get("last_refresh"))
if codex_status.get("last_refresh"):
print(f" Refreshed: {codex_last_refresh}")
if codex_status.get("error") and not codex_logged_in:
print(f" Error: {codex_status.get('error')}")
# =========================================================================
# Terminal Configuration
# =========================================================================

File diff suppressed because it is too large Load diff

View file

@ -723,7 +723,7 @@ setup_path() {
PATH_LINE='export PATH="$HOME/.local/bin:$PATH"'
for SHELL_CONFIG in "${SHELL_CONFIGS[@]}"; do
if ! grep -q '\.local/bin' "$SHELL_CONFIG" 2>/dev/null; then
if ! grep -v '^[[:space:]]*#' "$SHELL_CONFIG" 2>/dev/null | grep -qE 'PATH=.*\.local/bin'; then
echo "" >> "$SHELL_CONFIG"
echo "# Hermes Agent — ensure ~/.local/bin is on PATH" >> "$SHELL_CONFIG"
echo "$PATH_LINE" >> "$SHELL_CONFIG"

View file

@ -0,0 +1,168 @@
"""Tests for agent.auxiliary_client resolution chain, especially the Codex fallback."""
import json
import os
from pathlib import Path
from unittest.mock import patch, MagicMock
import pytest
from agent.auxiliary_client import (
get_text_auxiliary_client,
get_vision_auxiliary_client,
auxiliary_max_tokens_param,
_read_codex_access_token,
)
@pytest.fixture(autouse=True)
def _clean_env(monkeypatch):
"""Strip provider env vars so each test starts clean."""
for key in (
"OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
"OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL",
):
monkeypatch.delenv(key, raising=False)
@pytest.fixture
def codex_auth_dir(tmp_path, monkeypatch):
"""Provide a writable ~/.codex/ directory with a valid auth.json."""
codex_dir = tmp_path / ".codex"
codex_dir.mkdir()
auth_file = codex_dir / "auth.json"
auth_file.write_text(json.dumps({
"tokens": {
"access_token": "codex-test-token-abc123",
"refresh_token": "codex-refresh-xyz",
}
}))
monkeypatch.setattr(
"agent.auxiliary_client._read_codex_access_token",
lambda: "codex-test-token-abc123",
)
return codex_dir
class TestReadCodexAccessToken:
def test_valid_auth_file(self, tmp_path):
codex_dir = tmp_path / ".codex"
codex_dir.mkdir()
auth = codex_dir / "auth.json"
auth.write_text(json.dumps({
"tokens": {"access_token": "tok-123", "refresh_token": "r-456"}
}))
with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
result = _read_codex_access_token()
assert result == "tok-123"
def test_missing_file_returns_none(self, tmp_path):
with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
result = _read_codex_access_token()
assert result is None
def test_empty_token_returns_none(self, tmp_path):
codex_dir = tmp_path / ".codex"
codex_dir.mkdir()
auth = codex_dir / "auth.json"
auth.write_text(json.dumps({"tokens": {"access_token": " "}}))
with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
result = _read_codex_access_token()
assert result is None
def test_malformed_json_returns_none(self, tmp_path):
codex_dir = tmp_path / ".codex"
codex_dir.mkdir()
(codex_dir / "auth.json").write_text("{bad json")
with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
result = _read_codex_access_token()
assert result is None
def test_missing_tokens_key_returns_none(self, tmp_path):
codex_dir = tmp_path / ".codex"
codex_dir.mkdir()
(codex_dir / "auth.json").write_text(json.dumps({"other": "data"}))
with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
result = _read_codex_access_token()
assert result is None
class TestGetTextAuxiliaryClient:
"""Test the full resolution chain for get_text_auxiliary_client."""
def test_openrouter_takes_priority(self, monkeypatch, codex_auth_dir):
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
client, model = get_text_auxiliary_client()
assert model == "google/gemini-3-flash-preview"
mock_openai.assert_called_once()
call_kwargs = mock_openai.call_args
assert call_kwargs.kwargs["api_key"] == "or-key"
def test_nous_takes_priority_over_codex(self, monkeypatch, codex_auth_dir):
with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \
patch("agent.auxiliary_client.OpenAI") as mock_openai:
mock_nous.return_value = {"access_token": "nous-tok"}
client, model = get_text_auxiliary_client()
assert model == "gemini-3-flash"
def test_custom_endpoint_over_codex(self, monkeypatch, codex_auth_dir):
monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
monkeypatch.setenv("OPENAI_API_KEY", "lm-studio-key")
# Override the autouse monkeypatch for codex
monkeypatch.setattr(
"agent.auxiliary_client._read_codex_access_token",
lambda: "codex-test-token-abc123",
)
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
patch("agent.auxiliary_client.OpenAI") as mock_openai:
client, model = get_text_auxiliary_client()
assert model == "gpt-4o-mini"
call_kwargs = mock_openai.call_args
assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"
def test_codex_fallback_when_nothing_else(self, codex_auth_dir):
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
patch("agent.auxiliary_client.OpenAI") as mock_openai:
client, model = get_text_auxiliary_client()
assert model == "gpt-5.3-codex"
# Returns a CodexAuxiliaryClient wrapper, not a raw OpenAI client
from agent.auxiliary_client import CodexAuxiliaryClient
assert isinstance(client, CodexAuxiliaryClient)
def test_returns_none_when_nothing_available(self):
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
client, model = get_text_auxiliary_client()
assert client is None
assert model is None
class TestCodexNotInVisionClient:
"""Codex fallback should NOT apply to vision tasks."""
def test_vision_returns_none_without_openrouter_nous(self):
with patch("agent.auxiliary_client._read_nous_auth", return_value=None):
client, model = get_vision_auxiliary_client()
assert client is None
assert model is None
class TestAuxiliaryMaxTokensParam:
def test_codex_fallback_uses_max_tokens(self, monkeypatch):
"""Codex adapter translates max_tokens internally, so we return max_tokens."""
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
patch("agent.auxiliary_client._read_codex_access_token", return_value="tok"):
result = auxiliary_max_tokens_param(1024)
assert result == {"max_tokens": 1024}
def test_openrouter_uses_max_tokens(self, monkeypatch):
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
result = auxiliary_max_tokens_param(1024)
assert result == {"max_tokens": 1024}
def test_no_provider_uses_max_tokens(self):
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
result = auxiliary_max_tokens_param(1024)
assert result == {"max_tokens": 1024}

173
tests/agent/test_redact.py Normal file
View file

@ -0,0 +1,173 @@
"""Tests for agent.redact -- secret masking in logs and output."""
import logging
import pytest
from agent.redact import redact_sensitive_text, RedactingFormatter
class TestKnownPrefixes:
def test_openai_sk_key(self):
text = "Using key sk-proj-abc123def456ghi789jkl012"
result = redact_sensitive_text(text)
assert "sk-pro" in result
assert "abc123def456" not in result
assert "..." in result
def test_openrouter_sk_key(self):
text = "OPENROUTER_API_KEY=sk-or-v1-abcdefghijklmnopqrstuvwxyz1234567890"
result = redact_sensitive_text(text)
assert "abcdefghijklmnop" not in result
def test_github_pat_classic(self):
result = redact_sensitive_text("token: ghp_abc123def456ghi789jkl")
assert "abc123def456" not in result
def test_github_pat_fine_grained(self):
result = redact_sensitive_text("github_pat_abc123def456ghi789jklmno")
assert "abc123def456" not in result
def test_slack_token(self):
token = "xoxb-" + "0" * 12 + "-" + "a" * 14
result = redact_sensitive_text(token)
assert "a" * 14 not in result
def test_google_api_key(self):
result = redact_sensitive_text("AIzaSyB-abc123def456ghi789jklmno012345")
assert "abc123def456" not in result
def test_perplexity_key(self):
result = redact_sensitive_text("pplx-abcdef123456789012345")
assert "abcdef12345" not in result
def test_fal_key(self):
result = redact_sensitive_text("fal_abc123def456ghi789jkl")
assert "abc123def456" not in result
def test_short_token_fully_masked(self):
result = redact_sensitive_text("key=sk-short1234567")
assert "***" in result
class TestEnvAssignments:
def test_export_api_key(self):
text = "export OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012"
result = redact_sensitive_text(text)
assert "OPENAI_API_KEY=" in result
assert "abc123def456" not in result
def test_quoted_value(self):
text = 'MY_SECRET_TOKEN="supersecretvalue123456789"'
result = redact_sensitive_text(text)
assert "MY_SECRET_TOKEN=" in result
assert "supersecretvalue" not in result
def test_non_secret_env_unchanged(self):
text = "HOME=/home/user"
result = redact_sensitive_text(text)
assert result == text
def test_path_unchanged(self):
text = "PATH=/usr/local/bin:/usr/bin"
result = redact_sensitive_text(text)
assert result == text
class TestJsonFields:
def test_json_api_key(self):
text = '{"apiKey": "sk-proj-abc123def456ghi789jkl012"}'
result = redact_sensitive_text(text)
assert "abc123def456" not in result
def test_json_token(self):
text = '{"access_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.longtoken.here"}'
result = redact_sensitive_text(text)
assert "eyJhbGciOiJSUzI1NiIs" not in result
def test_json_non_secret_unchanged(self):
text = '{"name": "John", "model": "gpt-4"}'
result = redact_sensitive_text(text)
assert result == text
class TestAuthHeaders:
def test_bearer_token(self):
text = "Authorization: Bearer sk-proj-abc123def456ghi789jkl012"
result = redact_sensitive_text(text)
assert "Authorization: Bearer" in result
assert "abc123def456" not in result
def test_case_insensitive(self):
text = "authorization: bearer mytoken123456789012345678"
result = redact_sensitive_text(text)
assert "mytoken12345" not in result
class TestTelegramTokens:
def test_bot_token(self):
text = "bot123456789:ABCDEfghij-KLMNopqrst_UVWXyz12345"
result = redact_sensitive_text(text)
assert "ABCDEfghij" not in result
assert "123456789:***" in result
def test_raw_token(self):
text = "12345678901:ABCDEfghijKLMNopqrstUVWXyz1234567890"
result = redact_sensitive_text(text)
assert "ABCDEfghij" not in result
class TestPassthrough:
def test_empty_string(self):
assert redact_sensitive_text("") == ""
def test_none_returns_none(self):
assert redact_sensitive_text(None) is None
def test_normal_text_unchanged(self):
text = "Hello world, this is a normal log message with no secrets."
assert redact_sensitive_text(text) == text
def test_code_unchanged(self):
text = "def main():\n print('hello')\n return 42"
assert redact_sensitive_text(text) == text
def test_url_without_key_unchanged(self):
text = "Connecting to https://api.openai.com/v1/chat/completions"
assert redact_sensitive_text(text) == text
class TestRedactingFormatter:
def test_formats_and_redacts(self):
formatter = RedactingFormatter("%(message)s")
record = logging.LogRecord(
name="test", level=logging.INFO, pathname="", lineno=0,
msg="Key is sk-proj-abc123def456ghi789jkl012",
args=(), exc_info=None,
)
result = formatter.format(record)
assert "abc123def456" not in result
assert "sk-pro" in result
class TestPrintenvSimulation:
"""Simulate what happens when the agent runs `env` or `printenv`."""
def test_full_env_dump(self):
env_dump = """HOME=/home/user
PATH=/usr/local/bin:/usr/bin
OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012mno345
OPENROUTER_API_KEY=sk-or-v1-reallyLongSecretKeyValue12345678
FIRECRAWL_API_KEY=fc-shortkey123456789012
TELEGRAM_BOT_TOKEN=bot987654321:ABCDEfghij-KLMNopqrst_UVWXyz12345
SHELL=/bin/bash
USER=teknium"""
result = redact_sensitive_text(env_dump)
# Secrets should be masked
assert "abc123def456" not in result
assert "reallyLongSecretKey" not in result
assert "ABCDEfghij" not in result
# Non-secrets should survive
assert "HOME=/home/user" in result
assert "SHELL=/bin/bash" in result
assert "USER=teknium" in result

View file

@ -0,0 +1,374 @@
"""
Tests for subagent progress relay (issue #169).
Verifies that:
- KawaiiSpinner.print_above() works with and without active spinner
- _build_child_progress_callback handles CLI/gateway/no-display paths
- Thinking events are relayed correctly
- Parallel callbacks don't share state
"""
import io
import sys
import time
import threading
import pytest
from unittest.mock import MagicMock, patch
from agent.display import KawaiiSpinner
from tools.delegate_tool import _build_child_progress_callback
# =========================================================================
# KawaiiSpinner.print_above tests
# =========================================================================
class TestPrintAbove:
"""Tests for KawaiiSpinner.print_above method."""
def test_print_above_without_spinner_running(self):
"""print_above should write to stdout even when spinner is not running."""
buf = io.StringIO()
spinner = KawaiiSpinner("test")
spinner._out = buf # Redirect to buffer
spinner.print_above("hello world")
output = buf.getvalue()
assert "hello world" in output
def test_print_above_with_spinner_running(self):
"""print_above should clear spinner line and print text."""
buf = io.StringIO()
spinner = KawaiiSpinner("test")
spinner._out = buf
spinner.running = True # Pretend spinner is running (don't start thread)
spinner.print_above("tool line")
output = buf.getvalue()
assert "tool line" in output
assert "\r" in output # Should start with carriage return to clear spinner line
def test_print_above_uses_captured_stdout(self):
"""print_above should use self._out, not sys.stdout.
This ensures it works inside redirect_stdout(devnull)."""
buf = io.StringIO()
spinner = KawaiiSpinner("test")
spinner._out = buf
# Simulate redirect_stdout(devnull)
old_stdout = sys.stdout
sys.stdout = io.StringIO()
try:
spinner.print_above("should go to buf")
finally:
sys.stdout = old_stdout
assert "should go to buf" in buf.getvalue()
# =========================================================================
# _build_child_progress_callback tests
# =========================================================================
class TestBuildChildProgressCallback:
"""Tests for child progress callback builder."""
def test_returns_none_when_no_display(self):
"""Should return None when parent has no spinner or callback."""
parent = MagicMock()
parent._delegate_spinner = None
parent.tool_progress_callback = None
cb = _build_child_progress_callback(0, parent)
assert cb is None
def test_cli_spinner_tool_event(self):
"""Should print tool line above spinner for CLI path."""
buf = io.StringIO()
spinner = KawaiiSpinner("delegating")
spinner._out = buf
spinner.running = True
parent = MagicMock()
parent._delegate_spinner = spinner
parent.tool_progress_callback = None
cb = _build_child_progress_callback(0, parent)
assert cb is not None
cb("web_search", "quantum computing")
output = buf.getvalue()
assert "web_search" in output
assert "quantum computing" in output
assert "├─" in output
def test_cli_spinner_thinking_event(self):
"""Should print thinking line above spinner for CLI path."""
buf = io.StringIO()
spinner = KawaiiSpinner("delegating")
spinner._out = buf
spinner.running = True
parent = MagicMock()
parent._delegate_spinner = spinner
parent.tool_progress_callback = None
cb = _build_child_progress_callback(0, parent)
cb("_thinking", "I'll search for papers first")
output = buf.getvalue()
assert "💭" in output
assert "search for papers" in output
def test_gateway_batched_progress(self):
"""Gateway path should batch tool calls and flush at BATCH_SIZE."""
parent = MagicMock()
parent._delegate_spinner = None
parent_cb = MagicMock()
parent.tool_progress_callback = parent_cb
cb = _build_child_progress_callback(0, parent)
# Send 4 tool calls — shouldn't flush yet (BATCH_SIZE = 5)
for i in range(4):
cb(f"tool_{i}", f"arg_{i}")
parent_cb.assert_not_called()
# 5th call should trigger flush
cb("tool_4", "arg_4")
parent_cb.assert_called_once()
call_args = parent_cb.call_args
assert "tool_0" in call_args[0][1]
assert "tool_4" in call_args[0][1]
def test_thinking_not_relayed_to_gateway(self):
"""Thinking events should NOT be sent to gateway (too noisy)."""
parent = MagicMock()
parent._delegate_spinner = None
parent_cb = MagicMock()
parent.tool_progress_callback = parent_cb
cb = _build_child_progress_callback(0, parent)
cb("_thinking", "some reasoning text")
parent_cb.assert_not_called()
def test_parallel_callbacks_independent(self):
"""Each child's callback should have independent batch state."""
parent = MagicMock()
parent._delegate_spinner = None
parent_cb = MagicMock()
parent.tool_progress_callback = parent_cb
cb0 = _build_child_progress_callback(0, parent)
cb1 = _build_child_progress_callback(1, parent)
# Send 3 calls to each — neither should flush (batch size = 5)
for i in range(3):
cb0(f"tool_{i}")
cb1(f"other_{i}")
parent_cb.assert_not_called()
def test_task_index_prefix_in_batch_mode(self):
"""Batch mode (task_count > 1) should show 1-indexed prefix for all tasks."""
buf = io.StringIO()
spinner = KawaiiSpinner("delegating")
spinner._out = buf
spinner.running = True
parent = MagicMock()
parent._delegate_spinner = spinner
parent.tool_progress_callback = None
# task_index=0 in a batch of 3 → prefix "[1]"
cb0 = _build_child_progress_callback(0, parent, task_count=3)
cb0("web_search", "test")
output = buf.getvalue()
assert "[1]" in output
# task_index=2 in a batch of 3 → prefix "[3]"
buf.truncate(0)
buf.seek(0)
cb2 = _build_child_progress_callback(2, parent, task_count=3)
cb2("web_search", "test")
output = buf.getvalue()
assert "[3]" in output
def test_single_task_no_prefix(self):
"""Single task (task_count=1) should not show index prefix."""
buf = io.StringIO()
spinner = KawaiiSpinner("delegating")
spinner._out = buf
spinner.running = True
parent = MagicMock()
parent._delegate_spinner = spinner
parent.tool_progress_callback = None
cb = _build_child_progress_callback(0, parent, task_count=1)
cb("web_search", "test")
output = buf.getvalue()
assert "[" not in output
# =========================================================================
# Integration: thinking callback in run_agent.py
# =========================================================================
class TestThinkingCallback:
"""Tests for the _thinking callback in AIAgent conversation loop."""
def _simulate_thinking_callback(self, content, callback, delegate_depth=1):
"""Simulate the exact code path from run_agent.py for the thinking callback.
delegate_depth: simulates self._delegate_depth.
0 = main agent (should NOT fire), >=1 = subagent (should fire).
"""
import re
if (content and callback and delegate_depth > 0):
_think_text = content.strip()
_think_text = re.sub(
r'</?(?:REASONING_SCRATCHPAD|think|reasoning)>', '', _think_text
).strip()
first_line = _think_text.split('\n')[0][:80] if _think_text else ""
if first_line:
try:
callback("_thinking", first_line)
except Exception:
pass
def test_thinking_callback_fires_on_content(self):
"""tool_progress_callback should receive _thinking event
when assistant message has content."""
calls = []
self._simulate_thinking_callback(
"I'll research quantum computing first, then summarize.",
lambda name, preview=None: calls.append((name, preview))
)
assert len(calls) == 1
assert calls[0][0] == "_thinking"
assert "quantum computing" in calls[0][1]
def test_thinking_callback_skipped_when_no_content(self):
"""Should not fire when assistant has no content."""
calls = []
self._simulate_thinking_callback(
None,
lambda name, preview=None: calls.append((name, preview))
)
assert len(calls) == 0
def test_thinking_callback_truncates_long_content(self):
"""Should truncate long content to 80 chars."""
calls = []
self._simulate_thinking_callback(
"A" * 200 + "\nSecond line should be ignored",
lambda name, preview=None: calls.append((name, preview))
)
assert len(calls) == 1
assert len(calls[0][1]) == 80
def test_thinking_callback_skipped_for_main_agent(self):
"""Main agent (delegate_depth=0) should NOT fire thinking events.
This prevents gateway spam on Telegram/Discord."""
calls = []
self._simulate_thinking_callback(
"I'll help you with that request.",
lambda name, preview=None: calls.append((name, preview)),
delegate_depth=0,
)
assert len(calls) == 0
def test_thinking_callback_strips_reasoning_scratchpad(self):
"""REASONING_SCRATCHPAD tags should be stripped before display."""
calls = []
self._simulate_thinking_callback(
"<REASONING_SCRATCHPAD>I need to analyze this carefully</REASONING_SCRATCHPAD>",
lambda name, preview=None: calls.append((name, preview))
)
assert len(calls) == 1
assert "<REASONING_SCRATCHPAD>" not in calls[0][1]
assert "analyze this carefully" in calls[0][1]
def test_thinking_callback_strips_think_tags(self):
"""<think> tags should be stripped before display."""
calls = []
self._simulate_thinking_callback(
"<think>Let me think about this problem</think>",
lambda name, preview=None: calls.append((name, preview))
)
assert len(calls) == 1
assert "<think>" not in calls[0][1]
assert "think about this problem" in calls[0][1]
def test_thinking_callback_empty_after_strip(self):
"""Should not fire when content is only XML tags."""
calls = []
self._simulate_thinking_callback(
"<REASONING_SCRATCHPAD></REASONING_SCRATCHPAD>",
lambda name, preview=None: calls.append((name, preview))
)
assert len(calls) == 0
# =========================================================================
# Gateway batch flush tests
# =========================================================================
class TestBatchFlush:
"""Tests for gateway batch flush on subagent completion."""
def test_flush_sends_remaining_batch(self):
"""_flush should send remaining tool names to gateway."""
parent = MagicMock()
parent._delegate_spinner = None
parent_cb = MagicMock()
parent.tool_progress_callback = parent_cb
cb = _build_child_progress_callback(0, parent)
# Send 3 tools (below batch size of 5)
cb("web_search", "query1")
cb("read_file", "file.txt")
cb("write_file", "out.txt")
parent_cb.assert_not_called()
# Flush should send the remaining 3
cb._flush()
parent_cb.assert_called_once()
summary = parent_cb.call_args[0][1]
assert "web_search" in summary
assert "write_file" in summary
def test_flush_noop_when_batch_empty(self):
"""_flush should not send anything when batch is empty."""
parent = MagicMock()
parent._delegate_spinner = None
parent_cb = MagicMock()
parent.tool_progress_callback = parent_cb
cb = _build_child_progress_callback(0, parent)
cb._flush()
parent_cb.assert_not_called()
def test_flush_noop_when_no_parent_callback(self):
"""_flush should not crash when there's no parent callback."""
buf = io.StringIO()
spinner = KawaiiSpinner("test")
spinner._out = buf
spinner.running = True
parent = MagicMock()
parent._delegate_spinner = spinner
parent.tool_progress_callback = None
cb = _build_child_progress_callback(0, parent)
cb("web_search", "test")
cb._flush() # Should not crash
if __name__ == "__main__":
pytest.main([__file__, "-v"])

View file

@ -0,0 +1,184 @@
"""
Tests for MEDIA tag extraction from tool results.
Verifies that MEDIA tags (e.g., from TTS tool) are only extracted from
messages in the CURRENT turn, not from the full conversation history.
This prevents voice messages from accumulating and being sent multiple
times per reply. (Regression test for #160)
"""
import pytest
import re
def extract_media_tags_fixed(result_messages, history_len):
"""
Extract MEDIA tags from tool results, but ONLY from new messages
(those added after history_len). This is the fixed behavior.
Args:
result_messages: Full list of messages including history + new
history_len: Length of history before this turn
Returns:
Tuple of (media_tags list, has_voice_directive bool)
"""
media_tags = []
has_voice_directive = False
# Only process new messages from this turn
new_messages = result_messages[history_len:] if len(result_messages) > history_len else []
for msg in new_messages:
if msg.get("role") == "tool" or msg.get("role") == "function":
content = msg.get("content", "")
if "MEDIA:" in content:
for match in re.finditer(r'MEDIA:(\S+)', content):
path = match.group(1).strip().rstrip('",}')
if path:
media_tags.append(f"MEDIA:{path}")
if "[[audio_as_voice]]" in content:
has_voice_directive = True
return media_tags, has_voice_directive
def extract_media_tags_broken(result_messages):
"""
The BROKEN behavior: extract MEDIA tags from ALL messages including history.
This causes TTS voice messages to accumulate and be re-sent on every reply.
"""
media_tags = []
has_voice_directive = False
for msg in result_messages:
if msg.get("role") == "tool" or msg.get("role") == "function":
content = msg.get("content", "")
if "MEDIA:" in content:
for match in re.finditer(r'MEDIA:(\S+)', content):
path = match.group(1).strip().rstrip('",}')
if path:
media_tags.append(f"MEDIA:{path}")
if "[[audio_as_voice]]" in content:
has_voice_directive = True
return media_tags, has_voice_directive
class TestMediaExtraction:
"""Tests for MEDIA tag extraction from tool results."""
def test_media_tags_not_extracted_from_history(self):
"""MEDIA tags from previous turns should NOT be extracted again."""
# Simulate conversation history with a TTS call from a previous turn
history = [
{"role": "user", "content": "Say hello as audio"},
{"role": "assistant", "content": None, "tool_calls": [{"id": "1", "function": {"name": "text_to_speech"}}]},
{"role": "tool", "tool_call_id": "1", "content": '{"success": true, "media_tag": "[[audio_as_voice]]\\nMEDIA:/path/to/audio1.ogg"}'},
{"role": "assistant", "content": "I've said hello for you!"},
]
# New turn: user asks a simple question
new_messages = [
{"role": "user", "content": "What time is it?"},
{"role": "assistant", "content": "It's 3:30 AM."},
]
all_messages = history + new_messages
history_len = len(history)
# Fixed behavior: should extract NO media tags (none in new messages)
tags, voice_directive = extract_media_tags_fixed(all_messages, history_len)
assert tags == [], "Fixed extraction should not find tags in history"
assert voice_directive is False
# Broken behavior: would incorrectly extract the old media tag
broken_tags, broken_voice = extract_media_tags_broken(all_messages)
assert len(broken_tags) == 1, "Broken extraction finds tags in history"
assert "audio1.ogg" in broken_tags[0]
def test_media_tags_extracted_from_current_turn(self):
"""MEDIA tags from the current turn SHOULD be extracted."""
# History without TTS
history = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi there!"},
]
# New turn with TTS call
new_messages = [
{"role": "user", "content": "Say goodbye as audio"},
{"role": "assistant", "content": None, "tool_calls": [{"id": "2", "function": {"name": "text_to_speech"}}]},
{"role": "tool", "tool_call_id": "2", "content": '{"success": true, "media_tag": "[[audio_as_voice]]\\nMEDIA:/path/to/audio2.ogg"}'},
{"role": "assistant", "content": "I've said goodbye!"},
]
all_messages = history + new_messages
history_len = len(history)
# Fixed behavior: should extract the new media tag
tags, voice_directive = extract_media_tags_fixed(all_messages, history_len)
assert len(tags) == 1, "Should extract media tag from current turn"
assert "audio2.ogg" in tags[0]
assert voice_directive is True
def test_multiple_tts_calls_in_history_not_accumulated(self):
"""Multiple TTS calls in history should NOT accumulate in new responses."""
# History with multiple TTS calls
history = [
{"role": "user", "content": "Say hello"},
{"role": "tool", "tool_call_id": "1", "content": 'MEDIA:/audio/hello.ogg'},
{"role": "assistant", "content": "Done!"},
{"role": "user", "content": "Say goodbye"},
{"role": "tool", "tool_call_id": "2", "content": 'MEDIA:/audio/goodbye.ogg'},
{"role": "assistant", "content": "Done!"},
{"role": "user", "content": "Say thanks"},
{"role": "tool", "tool_call_id": "3", "content": 'MEDIA:/audio/thanks.ogg'},
{"role": "assistant", "content": "Done!"},
]
# New turn: no TTS
new_messages = [
{"role": "user", "content": "What time is it?"},
{"role": "assistant", "content": "3 PM"},
]
all_messages = history + new_messages
history_len = len(history)
# Fixed: no tags
tags, _ = extract_media_tags_fixed(all_messages, history_len)
assert tags == [], "Should not accumulate tags from history"
# Broken: would have 3 tags (all the old ones)
broken_tags, _ = extract_media_tags_broken(all_messages)
assert len(broken_tags) == 3, "Broken version accumulates all history tags"
def test_deduplication_within_current_turn(self):
"""Multiple MEDIA tags in current turn should be deduplicated."""
history = []
# Current turn with multiple tool calls producing same media
new_messages = [
{"role": "user", "content": "Multiple TTS"},
{"role": "tool", "tool_call_id": "1", "content": 'MEDIA:/audio/same.ogg'},
{"role": "tool", "tool_call_id": "2", "content": 'MEDIA:/audio/same.ogg'}, # duplicate
{"role": "tool", "tool_call_id": "3", "content": 'MEDIA:/audio/different.ogg'},
{"role": "assistant", "content": "Done!"},
]
all_messages = history + new_messages
tags, _ = extract_media_tags_fixed(all_messages, 0)
# Even though same.ogg appears twice, deduplication happens after extraction
# The extraction itself should get both, then caller deduplicates
assert len(tags) == 3 # Raw extraction gets all
# Deduplication as done in the actual code:
seen = set()
unique = [t for t in tags if t not in seen and not seen.add(t)]
assert len(unique) == 2 # After dedup: same.ogg and different.ogg
if __name__ == "__main__":
pytest.main([__file__, "-v"])

View file

@ -0,0 +1,210 @@
import json
import time
import base64
from contextlib import contextmanager
from pathlib import Path
from types import SimpleNamespace
import pytest
import yaml
from hermes_cli.auth import (
AuthError,
DEFAULT_CODEX_BASE_URL,
PROVIDER_REGISTRY,
_persist_codex_auth_payload,
_login_openai_codex,
login_command,
get_codex_auth_status,
get_provider_auth_state,
read_codex_auth_file,
resolve_codex_runtime_credentials,
resolve_provider,
)
def _write_codex_auth(codex_home: Path, *, access_token: str = "access", refresh_token: str = "refresh") -> Path:
codex_home.mkdir(parents=True, exist_ok=True)
auth_file = codex_home / "auth.json"
auth_file.write_text(
json.dumps(
{
"auth_mode": "oauth",
"last_refresh": "2026-02-26T00:00:00Z",
"tokens": {
"access_token": access_token,
"refresh_token": refresh_token,
},
}
)
)
return auth_file
def _jwt_with_exp(exp_epoch: int) -> str:
payload = {"exp": exp_epoch}
encoded = base64.urlsafe_b64encode(json.dumps(payload).encode("utf-8")).rstrip(b"=").decode("utf-8")
return f"h.{encoded}.s"
def test_read_codex_auth_file_success(tmp_path, monkeypatch):
codex_home = tmp_path / "codex-home"
auth_file = _write_codex_auth(codex_home)
monkeypatch.setenv("CODEX_HOME", str(codex_home))
payload = read_codex_auth_file()
assert payload["auth_path"] == auth_file
assert payload["tokens"]["access_token"] == "access"
assert payload["tokens"]["refresh_token"] == "refresh"
def test_resolve_codex_runtime_credentials_missing_access_token(tmp_path, monkeypatch):
codex_home = tmp_path / "codex-home"
_write_codex_auth(codex_home, access_token="")
monkeypatch.setenv("CODEX_HOME", str(codex_home))
with pytest.raises(AuthError) as exc:
resolve_codex_runtime_credentials()
assert exc.value.code == "codex_auth_missing_access_token"
assert exc.value.relogin_required is True
def test_resolve_codex_runtime_credentials_refreshes_expiring_token(tmp_path, monkeypatch):
codex_home = tmp_path / "codex-home"
expiring_token = _jwt_with_exp(int(time.time()) - 10)
_write_codex_auth(codex_home, access_token=expiring_token, refresh_token="refresh-old")
monkeypatch.setenv("CODEX_HOME", str(codex_home))
called = {"count": 0}
def _fake_refresh(*, payload, auth_path, timeout_seconds, lock_held=False):
called["count"] += 1
assert auth_path == codex_home / "auth.json"
assert lock_held is True
return {"access_token": "access-new", "refresh_token": "refresh-new"}
monkeypatch.setattr("hermes_cli.auth._refresh_codex_auth_tokens", _fake_refresh)
resolved = resolve_codex_runtime_credentials()
assert called["count"] == 1
assert resolved["api_key"] == "access-new"
def test_resolve_codex_runtime_credentials_force_refresh(tmp_path, monkeypatch):
codex_home = tmp_path / "codex-home"
_write_codex_auth(codex_home, access_token="access-current", refresh_token="refresh-old")
monkeypatch.setenv("CODEX_HOME", str(codex_home))
called = {"count": 0}
def _fake_refresh(*, payload, auth_path, timeout_seconds, lock_held=False):
called["count"] += 1
assert lock_held is True
return {"access_token": "access-forced", "refresh_token": "refresh-new"}
monkeypatch.setattr("hermes_cli.auth._refresh_codex_auth_tokens", _fake_refresh)
resolved = resolve_codex_runtime_credentials(force_refresh=True, refresh_if_expiring=False)
assert called["count"] == 1
assert resolved["api_key"] == "access-forced"
def test_resolve_codex_runtime_credentials_uses_file_lock_on_refresh(tmp_path, monkeypatch):
codex_home = tmp_path / "codex-home"
_write_codex_auth(codex_home, access_token="access-current", refresh_token="refresh-old")
monkeypatch.setenv("CODEX_HOME", str(codex_home))
lock_calls = {"enter": 0, "exit": 0}
@contextmanager
def _fake_lock(auth_path, timeout_seconds=15.0):
assert auth_path == codex_home / "auth.json"
lock_calls["enter"] += 1
try:
yield
finally:
lock_calls["exit"] += 1
refresh_calls = {"count": 0}
def _fake_refresh(*, payload, auth_path, timeout_seconds, lock_held=False):
refresh_calls["count"] += 1
assert lock_held is True
return {"access_token": "access-updated", "refresh_token": "refresh-updated"}
monkeypatch.setattr("hermes_cli.auth._codex_auth_file_lock", _fake_lock)
monkeypatch.setattr("hermes_cli.auth._refresh_codex_auth_tokens", _fake_refresh)
resolved = resolve_codex_runtime_credentials(force_refresh=True, refresh_if_expiring=False)
assert refresh_calls["count"] == 1
assert lock_calls["enter"] == 1
assert lock_calls["exit"] == 1
assert resolved["api_key"] == "access-updated"
def test_resolve_provider_explicit_codex_does_not_fallback(monkeypatch):
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
assert resolve_provider("openai-codex") == "openai-codex"
def test_persist_codex_auth_payload_writes_atomically(tmp_path):
auth_path = tmp_path / "auth.json"
auth_path.write_text('{"stale":true}\n')
payload = {
"auth_mode": "oauth",
"tokens": {
"access_token": "next-access",
"refresh_token": "next-refresh",
},
"last_refresh": "2026-02-26T00:00:00Z",
}
_persist_codex_auth_payload(auth_path, payload)
stored = json.loads(auth_path.read_text())
assert stored == payload
assert list(tmp_path.glob(".auth.json.*.tmp")) == []
def test_get_codex_auth_status_not_logged_in(tmp_path, monkeypatch):
monkeypatch.setenv("CODEX_HOME", str(tmp_path / "missing-codex-home"))
status = get_codex_auth_status()
assert status["logged_in"] is False
assert "error" in status
def test_login_openai_codex_persists_provider_state(tmp_path, monkeypatch):
hermes_home = tmp_path / "hermes-home"
codex_home = tmp_path / "codex-home"
_write_codex_auth(codex_home)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.setenv("CODEX_HOME", str(codex_home))
# Mock input() to accept existing credentials
monkeypatch.setattr("builtins.input", lambda _: "y")
_login_openai_codex(SimpleNamespace(), PROVIDER_REGISTRY["openai-codex"])
state = get_provider_auth_state("openai-codex")
assert state is not None
assert state["source"] == "codex-auth-json"
assert state["auth_file"].endswith("auth.json")
config_path = hermes_home / "config.yaml"
config = yaml.safe_load(config_path.read_text())
assert config["model"]["provider"] == "openai-codex"
assert config["model"]["base_url"] == DEFAULT_CODEX_BASE_URL
def test_login_command_shows_deprecation(monkeypatch, capsys):
"""login_command is deprecated and directs users to hermes model."""
with pytest.raises(SystemExit) as exc_info:
login_command(SimpleNamespace())
assert exc_info.value.code == 0
captured = capsys.readouterr()
assert "hermes model" in captured.out

80
tests/test_cli_init.py Normal file
View file

@ -0,0 +1,80 @@
"""Tests for HermesCLI initialization -- catches configuration bugs
that only manifest at runtime (not in mocked unit tests)."""
import os
import sys
from unittest.mock import patch, MagicMock
import pytest
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
def _make_cli(**kwargs):
"""Create a HermesCLI instance with minimal mocking."""
from cli import HermesCLI
with patch("cli.get_tool_definitions", return_value=[]):
return HermesCLI(**kwargs)
class TestMaxTurnsResolution:
"""max_turns must always resolve to a positive integer, never None."""
def test_default_max_turns_is_integer(self):
cli = _make_cli()
assert isinstance(cli.max_turns, int)
assert cli.max_turns > 0
def test_explicit_max_turns_honored(self):
cli = _make_cli(max_turns=25)
assert cli.max_turns == 25
def test_none_max_turns_gets_default(self):
cli = _make_cli(max_turns=None)
assert isinstance(cli.max_turns, int)
assert cli.max_turns > 0
def test_env_var_max_turns(self, monkeypatch):
"""Env var is used when config file doesn't set max_turns."""
monkeypatch.setenv("HERMES_MAX_ITERATIONS", "42")
import cli as cli_module
original = cli_module.CLI_CONFIG["agent"].get("max_turns")
cli_module.CLI_CONFIG["agent"]["max_turns"] = None
try:
cli_obj = _make_cli()
assert cli_obj.max_turns == 42
finally:
if original is not None:
cli_module.CLI_CONFIG["agent"]["max_turns"] = original
def test_max_turns_never_none_for_agent(self):
"""The value passed to AIAgent must never be None (causes TypeError in run_conversation)."""
cli = _make_cli()
assert cli.max_turns is not None
class TestVerboseAndToolProgress:
def test_default_verbose_is_bool(self):
cli = _make_cli()
assert isinstance(cli.verbose, bool)
def test_tool_progress_mode_is_string(self):
cli = _make_cli()
assert isinstance(cli.tool_progress_mode, str)
assert cli.tool_progress_mode in ("off", "new", "all", "verbose")
class TestProviderResolution:
def test_api_key_is_string_or_none(self):
cli = _make_cli()
assert cli.api_key is None or isinstance(cli.api_key, str)
def test_base_url_is_string(self):
cli = _make_cli()
assert isinstance(cli.base_url, str)
assert cli.base_url.startswith("http")
def test_model_is_string(self):
cli = _make_cli()
assert isinstance(cli.model, str)
assert len(cli.model) > 0

View file

@ -0,0 +1,187 @@
import importlib
import sys
import types
from contextlib import nullcontext
from types import SimpleNamespace
from hermes_cli.auth import AuthError
from hermes_cli import main as hermes_main
def _install_prompt_toolkit_stubs():
class _Dummy:
def __init__(self, *args, **kwargs):
pass
class _Condition:
def __init__(self, func):
self.func = func
def __bool__(self):
return bool(self.func())
class _ANSI(str):
pass
root = types.ModuleType("prompt_toolkit")
history = types.ModuleType("prompt_toolkit.history")
styles = types.ModuleType("prompt_toolkit.styles")
patch_stdout = types.ModuleType("prompt_toolkit.patch_stdout")
application = types.ModuleType("prompt_toolkit.application")
layout = types.ModuleType("prompt_toolkit.layout")
processors = types.ModuleType("prompt_toolkit.layout.processors")
filters = types.ModuleType("prompt_toolkit.filters")
dimension = types.ModuleType("prompt_toolkit.layout.dimension")
menus = types.ModuleType("prompt_toolkit.layout.menus")
widgets = types.ModuleType("prompt_toolkit.widgets")
key_binding = types.ModuleType("prompt_toolkit.key_binding")
completion = types.ModuleType("prompt_toolkit.completion")
formatted_text = types.ModuleType("prompt_toolkit.formatted_text")
history.FileHistory = _Dummy
styles.Style = _Dummy
patch_stdout.patch_stdout = lambda *args, **kwargs: nullcontext()
application.Application = _Dummy
layout.Layout = _Dummy
layout.HSplit = _Dummy
layout.Window = _Dummy
layout.FormattedTextControl = _Dummy
layout.ConditionalContainer = _Dummy
processors.Processor = _Dummy
processors.Transformation = _Dummy
processors.PasswordProcessor = _Dummy
processors.ConditionalProcessor = _Dummy
filters.Condition = _Condition
dimension.Dimension = _Dummy
menus.CompletionsMenu = _Dummy
widgets.TextArea = _Dummy
key_binding.KeyBindings = _Dummy
completion.Completer = _Dummy
completion.Completion = _Dummy
formatted_text.ANSI = _ANSI
root.print_formatted_text = lambda *args, **kwargs: None
sys.modules.setdefault("prompt_toolkit", root)
sys.modules.setdefault("prompt_toolkit.history", history)
sys.modules.setdefault("prompt_toolkit.styles", styles)
sys.modules.setdefault("prompt_toolkit.patch_stdout", patch_stdout)
sys.modules.setdefault("prompt_toolkit.application", application)
sys.modules.setdefault("prompt_toolkit.layout", layout)
sys.modules.setdefault("prompt_toolkit.layout.processors", processors)
sys.modules.setdefault("prompt_toolkit.filters", filters)
sys.modules.setdefault("prompt_toolkit.layout.dimension", dimension)
sys.modules.setdefault("prompt_toolkit.layout.menus", menus)
sys.modules.setdefault("prompt_toolkit.widgets", widgets)
sys.modules.setdefault("prompt_toolkit.key_binding", key_binding)
sys.modules.setdefault("prompt_toolkit.completion", completion)
sys.modules.setdefault("prompt_toolkit.formatted_text", formatted_text)
def _import_cli():
try:
importlib.import_module("prompt_toolkit")
except ModuleNotFoundError:
_install_prompt_toolkit_stubs()
return importlib.import_module("cli")
def test_hermes_cli_init_does_not_eagerly_resolve_runtime_provider(monkeypatch):
cli = _import_cli()
calls = {"count": 0}
def _unexpected_runtime_resolve(**kwargs):
calls["count"] += 1
raise AssertionError("resolve_runtime_provider should not be called in HermesCLI.__init__")
monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _unexpected_runtime_resolve)
monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
shell = cli.HermesCLI(model="gpt-5", compact=True, max_turns=1)
assert shell is not None
assert calls["count"] == 0
def test_runtime_resolution_failure_is_not_sticky(monkeypatch):
cli = _import_cli()
calls = {"count": 0}
def _runtime_resolve(**kwargs):
calls["count"] += 1
if calls["count"] == 1:
raise RuntimeError("temporary auth failure")
return {
"provider": "openrouter",
"api_mode": "chat_completions",
"base_url": "https://openrouter.ai/api/v1",
"api_key": "test-key",
"source": "env/config",
}
class _DummyAgent:
def __init__(self, *args, **kwargs):
self.kwargs = kwargs
monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
monkeypatch.setattr(cli, "AIAgent", _DummyAgent)
shell = cli.HermesCLI(model="gpt-5", compact=True, max_turns=1)
assert shell._init_agent() is False
assert shell._init_agent() is True
assert calls["count"] == 2
assert shell.agent is not None
def test_runtime_resolution_rebuilds_agent_on_routing_change(monkeypatch):
cli = _import_cli()
def _runtime_resolve(**kwargs):
return {
"provider": "openai-codex",
"api_mode": "codex_responses",
"base_url": "https://same-endpoint.example/v1",
"api_key": "same-key",
"source": "env/config",
}
monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
shell = cli.HermesCLI(model="gpt-5", compact=True, max_turns=1)
shell.provider = "openrouter"
shell.api_mode = "chat_completions"
shell.base_url = "https://same-endpoint.example/v1"
shell.api_key = "same-key"
shell.agent = object()
assert shell._ensure_runtime_credentials() is True
assert shell.agent is None
assert shell.provider == "openai-codex"
assert shell.api_mode == "codex_responses"
def test_cmd_model_falls_back_to_auto_on_invalid_provider(monkeypatch, capsys):
monkeypatch.setattr(
"hermes_cli.config.load_config",
lambda: {"model": {"default": "gpt-5", "provider": "invalid-provider"}},
)
monkeypatch.setattr("hermes_cli.config.save_config", lambda cfg: None)
monkeypatch.setattr("hermes_cli.config.get_env_value", lambda key: "")
monkeypatch.setattr("hermes_cli.config.save_env_value", lambda key, value: None)
def _resolve_provider(requested, **kwargs):
if requested == "invalid-provider":
raise AuthError("Unknown provider 'invalid-provider'.", code="invalid_provider")
return "openrouter"
monkeypatch.setattr("hermes_cli.auth.resolve_provider", _resolve_provider)
monkeypatch.setattr(hermes_main, "_prompt_provider_choice", lambda choices: len(choices) - 1)
hermes_main.cmd_model(SimpleNamespace())
output = capsys.readouterr().out
assert "Warning:" in output
assert "falling back to auto provider detection" in output.lower()
assert "No change." in output

View file

@ -0,0 +1,180 @@
import asyncio
import sys
import types
from types import SimpleNamespace
sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
sys.modules.setdefault("fal_client", types.SimpleNamespace())
import cron.scheduler as cron_scheduler
import gateway.run as gateway_run
import run_agent
from gateway.config import Platform
from gateway.session import SessionSource
def _patch_agent_bootstrap(monkeypatch):
monkeypatch.setattr(
run_agent,
"get_tool_definitions",
lambda **kwargs: [
{
"type": "function",
"function": {
"name": "terminal",
"description": "Run shell commands.",
"parameters": {"type": "object", "properties": {}},
},
}
],
)
monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
def _codex_message_response(text: str):
return SimpleNamespace(
output=[
SimpleNamespace(
type="message",
content=[SimpleNamespace(type="output_text", text=text)],
)
],
usage=SimpleNamespace(input_tokens=5, output_tokens=3, total_tokens=8),
status="completed",
model="gpt-5-codex",
)
class _UnauthorizedError(RuntimeError):
def __init__(self):
super().__init__("Error code: 401 - unauthorized")
self.status_code = 401
class _FakeOpenAI:
def __init__(self, **kwargs):
self.kwargs = kwargs
def close(self):
return None
class _Codex401ThenSuccessAgent(run_agent.AIAgent):
refresh_attempts = 0
last_init = {}
def __init__(self, *args, **kwargs):
kwargs.setdefault("skip_context_files", True)
kwargs.setdefault("skip_memory", True)
kwargs.setdefault("max_iterations", 4)
type(self).last_init = dict(kwargs)
super().__init__(*args, **kwargs)
self._cleanup_task_resources = lambda task_id: None
self._persist_session = lambda messages, history=None: None
self._save_trajectory = lambda messages, user_message, completed: None
self._save_session_log = lambda messages: None
def _try_refresh_codex_client_credentials(self, *, force: bool = True) -> bool:
type(self).refresh_attempts += 1
return True
def run_conversation(self, user_message: str, conversation_history=None):
calls = {"api": 0}
def _fake_api_call(api_kwargs):
calls["api"] += 1
if calls["api"] == 1:
raise _UnauthorizedError()
return _codex_message_response("Recovered via refresh")
self._interruptible_api_call = _fake_api_call
return super().run_conversation(user_message, conversation_history=conversation_history)
def test_cron_run_job_codex_path_handles_internal_401_refresh(monkeypatch):
_patch_agent_bootstrap(monkeypatch)
monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)
monkeypatch.setattr(run_agent, "AIAgent", _Codex401ThenSuccessAgent)
monkeypatch.setattr(
"hermes_cli.runtime_provider.resolve_runtime_provider",
lambda requested=None: {
"provider": "openai-codex",
"api_mode": "codex_responses",
"base_url": "https://chatgpt.com/backend-api/codex",
"api_key": "codex-token",
},
)
monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
_Codex401ThenSuccessAgent.refresh_attempts = 0
_Codex401ThenSuccessAgent.last_init = {}
success, output, final_response, error = cron_scheduler.run_job(
{"id": "job-1", "name": "Codex Refresh Test", "prompt": "ping"}
)
assert success is True
assert error is None
assert final_response == "Recovered via refresh"
assert "Recovered via refresh" in output
assert _Codex401ThenSuccessAgent.refresh_attempts == 1
assert _Codex401ThenSuccessAgent.last_init["provider"] == "openai-codex"
assert _Codex401ThenSuccessAgent.last_init["api_mode"] == "codex_responses"
def test_gateway_run_agent_codex_path_handles_internal_401_refresh(monkeypatch):
_patch_agent_bootstrap(monkeypatch)
monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)
monkeypatch.setattr(run_agent, "AIAgent", _Codex401ThenSuccessAgent)
monkeypatch.setattr(
gateway_run,
"_resolve_runtime_agent_kwargs",
lambda: {
"provider": "openai-codex",
"api_mode": "codex_responses",
"base_url": "https://chatgpt.com/backend-api/codex",
"api_key": "codex-token",
},
)
monkeypatch.setenv("HERMES_TOOL_PROGRESS", "false")
_Codex401ThenSuccessAgent.refresh_attempts = 0
_Codex401ThenSuccessAgent.last_init = {}
runner = gateway_run.GatewayRunner.__new__(gateway_run.GatewayRunner)
runner.adapters = {}
runner._ephemeral_system_prompt = ""
runner._prefill_messages = []
runner._reasoning_config = None
runner._running_agents = {}
from unittest.mock import MagicMock, AsyncMock
runner.hooks = MagicMock()
runner.hooks.emit = AsyncMock()
runner.hooks.loaded_hooks = []
runner._session_db = None
source = SessionSource(
platform=Platform.LOCAL,
chat_id="cli",
chat_name="CLI",
chat_type="dm",
user_id="user-1",
)
result = asyncio.run(
runner._run_agent(
message="ping",
context_prompt="",
history=[],
source=source,
session_id="session-1",
session_key="agent:main:local:dm",
)
)
assert result["final_response"] == "Recovered via refresh"
assert _Codex401ThenSuccessAgent.refresh_attempts == 1
assert _Codex401ThenSuccessAgent.last_init["provider"] == "openai-codex"
assert _Codex401ThenSuccessAgent.last_init["api_mode"] == "codex_responses"

View file

@ -0,0 +1,40 @@
import json
from hermes_cli.codex_models import DEFAULT_CODEX_MODELS, get_codex_model_ids
def test_get_codex_model_ids_prioritizes_default_and_cache(tmp_path, monkeypatch):
codex_home = tmp_path / "codex-home"
codex_home.mkdir(parents=True, exist_ok=True)
(codex_home / "config.toml").write_text('model = "gpt-5.2-codex"\n')
(codex_home / "models_cache.json").write_text(
json.dumps(
{
"models": [
{"slug": "gpt-5.3-codex", "priority": 20, "supported_in_api": True},
{"slug": "gpt-5.1-codex", "priority": 5, "supported_in_api": True},
{"slug": "gpt-4o", "priority": 1, "supported_in_api": True},
{"slug": "gpt-5-hidden-codex", "priority": 2, "visibility": "hidden"},
]
}
)
)
monkeypatch.setenv("CODEX_HOME", str(codex_home))
models = get_codex_model_ids()
assert models[0] == "gpt-5.2-codex"
assert "gpt-5.1-codex" in models
assert "gpt-5.3-codex" in models
assert "gpt-4o" not in models
assert "gpt-5-hidden-codex" not in models
def test_get_codex_model_ids_falls_back_to_curated_defaults(tmp_path, monkeypatch):
codex_home = tmp_path / "codex-home"
codex_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("CODEX_HOME", str(codex_home))
models = get_codex_model_ids()
assert models[: len(DEFAULT_CODEX_MODELS)] == DEFAULT_CODEX_MODELS

View file

@ -0,0 +1,51 @@
"""Tests for detect_external_credentials() -- Phase 2 credential sync."""
import json
from pathlib import Path
from unittest.mock import patch
import pytest
from hermes_cli.auth import detect_external_credentials
class TestDetectCodexCLI:
def test_detects_valid_codex_auth(self, tmp_path):
codex_dir = tmp_path / ".codex"
codex_dir.mkdir()
auth = codex_dir / "auth.json"
auth.write_text(json.dumps({
"tokens": {"access_token": "tok-123", "refresh_token": "ref-456"}
}))
with patch("hermes_cli.auth.resolve_codex_home_path", return_value=codex_dir):
result = detect_external_credentials()
codex_hits = [c for c in result if c["provider"] == "openai-codex"]
assert len(codex_hits) == 1
assert "Codex CLI" in codex_hits[0]["label"]
assert str(auth) == codex_hits[0]["path"]
def test_skips_codex_without_access_token(self, tmp_path):
codex_dir = tmp_path / ".codex"
codex_dir.mkdir()
(codex_dir / "auth.json").write_text(json.dumps({"tokens": {}}))
with patch("hermes_cli.auth.resolve_codex_home_path", return_value=codex_dir):
result = detect_external_credentials()
assert not any(c["provider"] == "openai-codex" for c in result)
def test_skips_missing_codex_dir(self, tmp_path):
with patch("hermes_cli.auth.resolve_codex_home_path", return_value=tmp_path / "nonexistent"):
result = detect_external_credentials()
assert not any(c["provider"] == "openai-codex" for c in result)
def test_skips_malformed_codex_auth(self, tmp_path):
codex_dir = tmp_path / ".codex"
codex_dir.mkdir()
(codex_dir / "auth.json").write_text("{bad json")
with patch("hermes_cli.auth.resolve_codex_home_path", return_value=codex_dir):
result = detect_external_credentials()
assert not any(c["provider"] == "openai-codex" for c in result)
def test_returns_empty_when_nothing_found(self, tmp_path):
with patch("hermes_cli.auth.resolve_codex_home_path", return_value=tmp_path / ".codex"):
result = detect_external_credentials()
assert result == []

View file

@ -0,0 +1,225 @@
"""Tests for flush_memories() working correctly across all provider modes.
Catches the bug where Codex mode called chat.completions.create on a
Responses-only client, which would fail silently or with a 404.
"""
import json
import os
import sys
import types
from types import SimpleNamespace
from unittest.mock import patch, MagicMock, call
import pytest
sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
sys.modules.setdefault("fal_client", types.SimpleNamespace())
import run_agent
class _FakeOpenAI:
def __init__(self, **kwargs):
self.kwargs = kwargs
self.api_key = kwargs.get("api_key", "test")
self.base_url = kwargs.get("base_url", "http://test")
def close(self):
pass
def _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter"):
"""Build an AIAgent with mocked internals, ready for flush_memories testing."""
monkeypatch.setattr(run_agent, "get_tool_definitions", lambda **kw: [
{
"type": "function",
"function": {
"name": "memory",
"description": "Manage memories.",
"parameters": {
"type": "object",
"properties": {
"action": {"type": "string"},
"target": {"type": "string"},
"content": {"type": "string"},
},
},
},
},
])
monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)
agent = run_agent.AIAgent(
api_key="test-key",
base_url="https://test.example.com/v1",
provider=provider,
api_mode=api_mode,
max_iterations=4,
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
# Give it a valid memory store
agent._memory_store = MagicMock()
agent._memory_flush_min_turns = 1
agent._user_turn_count = 5
return agent
def _chat_response_with_memory_call():
"""Simulated chat completions response with a memory tool call."""
return SimpleNamespace(
choices=[SimpleNamespace(
message=SimpleNamespace(
content=None,
tool_calls=[SimpleNamespace(
function=SimpleNamespace(
name="memory",
arguments=json.dumps({
"action": "add",
"target": "notes",
"content": "User prefers dark mode.",
}),
),
)],
),
)],
usage=SimpleNamespace(prompt_tokens=100, completion_tokens=20, total_tokens=120),
)
class TestFlushMemoriesUsesAuxiliaryClient:
"""When an auxiliary client is available, flush_memories should use it
instead of self.client -- especially critical in Codex mode."""
def test_flush_uses_auxiliary_when_available(self, monkeypatch):
agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")
mock_aux_client = MagicMock()
mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
messages = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi there"},
{"role": "user", "content": "Remember this"},
]
with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
agent.flush_memories(messages)
mock_aux_client.chat.completions.create.assert_called_once()
call_kwargs = mock_aux_client.chat.completions.create.call_args
assert call_kwargs.kwargs.get("model") == "gpt-4o-mini" or call_kwargs[1].get("model") == "gpt-4o-mini"
def test_flush_uses_main_client_when_no_auxiliary(self, monkeypatch):
"""Non-Codex mode with no auxiliary falls back to self.client."""
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
agent.client = MagicMock()
agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)):
messages = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi there"},
{"role": "user", "content": "Save this"},
]
with patch("tools.memory_tool.memory_tool", return_value="Saved."):
agent.flush_memories(messages)
agent.client.chat.completions.create.assert_called_once()
def test_flush_executes_memory_tool_calls(self, monkeypatch):
"""Verify that memory tool calls from the flush response actually get executed."""
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
mock_aux_client = MagicMock()
mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
messages = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi"},
{"role": "user", "content": "Note this"},
]
with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
agent.flush_memories(messages)
mock_memory.assert_called_once()
call_kwargs = mock_memory.call_args
assert call_kwargs.kwargs["action"] == "add"
assert call_kwargs.kwargs["target"] == "notes"
assert "dark mode" in call_kwargs.kwargs["content"]
def test_flush_strips_artifacts_from_messages(self, monkeypatch):
"""After flush, the flush prompt and any response should be removed from messages."""
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
mock_aux_client = MagicMock()
mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
messages = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi"},
{"role": "user", "content": "Remember X"},
]
original_len = len(messages)
with patch("tools.memory_tool.memory_tool", return_value="Saved."):
agent.flush_memories(messages)
# Messages should not grow from the flush
assert len(messages) <= original_len
# No flush sentinel should remain
for msg in messages:
assert "_flush_sentinel" not in msg
class TestFlushMemoriesCodexFallback:
"""When no auxiliary client exists and we're in Codex mode, flush should
use the Codex Responses API path instead of chat.completions."""
def test_codex_mode_no_aux_uses_responses_api(self, monkeypatch):
agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")
codex_response = SimpleNamespace(
output=[
SimpleNamespace(
type="function_call",
call_id="call_1",
name="memory",
arguments=json.dumps({
"action": "add",
"target": "notes",
"content": "Codex flush test",
}),
),
],
usage=SimpleNamespace(input_tokens=50, output_tokens=10, total_tokens=60),
status="completed",
model="gpt-5-codex",
)
with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)), \
patch.object(agent, "_run_codex_stream", return_value=codex_response) as mock_stream, \
patch.object(agent, "_build_api_kwargs") as mock_build, \
patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
mock_build.return_value = {
"model": "gpt-5-codex",
"instructions": "test",
"input": [],
"tools": [],
"max_output_tokens": 4096,
}
messages = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi"},
{"role": "user", "content": "Save this"},
]
agent.flush_memories(messages)
mock_stream.assert_called_once()
mock_memory.assert_called_once()
assert mock_memory.call_args.kwargs["content"] == "Codex flush test"

View file

@ -0,0 +1,460 @@
"""Provider parity tests: verify that AIAgent builds correct API kwargs
and handles responses properly for all supported providers.
Ensures changes to one provider path don't silently break another.
"""
import json
import os
import sys
import types
from types import SimpleNamespace
from unittest.mock import patch, MagicMock
import pytest
sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
sys.modules.setdefault("fal_client", types.SimpleNamespace())
from run_agent import AIAgent
# ── Helpers ──────────────────────────────────────────────────────────────────
def _tool_defs(*names):
return [
{
"type": "function",
"function": {
"name": n,
"description": f"{n} tool",
"parameters": {"type": "object", "properties": {}},
},
}
for n in names
]
class _FakeOpenAI:
def __init__(self, **kw):
self.api_key = kw.get("api_key", "test")
self.base_url = kw.get("base_url", "http://test")
def close(self):
pass
def _make_agent(monkeypatch, provider, api_mode="chat_completions", base_url="https://openrouter.ai/api/v1"):
monkeypatch.setattr("run_agent.get_tool_definitions", lambda **kw: _tool_defs("web_search", "terminal"))
monkeypatch.setattr("run_agent.check_toolset_requirements", lambda: {})
monkeypatch.setattr("run_agent.OpenAI", _FakeOpenAI)
return AIAgent(
api_key="test-key",
base_url=base_url,
provider=provider,
api_mode=api_mode,
max_iterations=4,
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
# ── _build_api_kwargs tests ─────────────────────────────────────────────────
class TestBuildApiKwargsOpenRouter:
def test_uses_chat_completions_format(self, monkeypatch):
agent = _make_agent(monkeypatch, "openrouter")
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert "messages" in kwargs
assert "model" in kwargs
assert kwargs["messages"][-1]["content"] == "hi"
def test_includes_reasoning_in_extra_body(self, monkeypatch):
agent = _make_agent(monkeypatch, "openrouter")
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
extra = kwargs.get("extra_body", {})
assert "reasoning" in extra
assert extra["reasoning"]["enabled"] is True
def test_includes_tools(self, monkeypatch):
agent = _make_agent(monkeypatch, "openrouter")
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert "tools" in kwargs
tool_names = [t["function"]["name"] for t in kwargs["tools"]]
assert "web_search" in tool_names
def test_no_responses_api_fields(self, monkeypatch):
agent = _make_agent(monkeypatch, "openrouter")
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert "input" not in kwargs
assert "instructions" not in kwargs
assert "store" not in kwargs
class TestBuildApiKwargsNousPortal:
def test_includes_nous_product_tags(self, monkeypatch):
agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1")
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
extra = kwargs.get("extra_body", {})
assert extra.get("tags") == ["product=hermes-agent"]
def test_uses_chat_completions_format(self, monkeypatch):
agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1")
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert "messages" in kwargs
assert "input" not in kwargs
class TestBuildApiKwargsCustomEndpoint:
def test_uses_chat_completions_format(self, monkeypatch):
agent = _make_agent(monkeypatch, "custom", base_url="http://localhost:1234/v1")
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert "messages" in kwargs
assert "input" not in kwargs
def test_no_openrouter_extra_body(self, monkeypatch):
agent = _make_agent(monkeypatch, "custom", base_url="http://localhost:1234/v1")
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
extra = kwargs.get("extra_body", {})
assert "reasoning" not in extra
class TestBuildApiKwargsCodex:
def test_uses_responses_api_format(self, monkeypatch):
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
base_url="https://chatgpt.com/backend-api/codex")
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert "input" in kwargs
assert "instructions" in kwargs
assert "messages" not in kwargs
assert kwargs["store"] is False
def test_includes_reasoning_config(self, monkeypatch):
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
base_url="https://chatgpt.com/backend-api/codex")
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert "reasoning" in kwargs
assert kwargs["reasoning"]["effort"] == "medium"
def test_includes_encrypted_content_in_include(self, monkeypatch):
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
base_url="https://chatgpt.com/backend-api/codex")
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert "reasoning.encrypted_content" in kwargs.get("include", [])
def test_tools_converted_to_responses_format(self, monkeypatch):
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
base_url="https://chatgpt.com/backend-api/codex")
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
tools = kwargs.get("tools", [])
assert len(tools) > 0
# Responses format has "name" at top level, not nested under "function"
assert "name" in tools[0]
assert "function" not in tools[0]
# ── Message conversion tests ────────────────────────────────────────────────
class TestChatMessagesToResponsesInput:
"""Verify _chat_messages_to_responses_input for Codex mode."""
def test_user_message_passes_through(self, monkeypatch):
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
base_url="https://chatgpt.com/backend-api/codex")
messages = [{"role": "user", "content": "hello"}]
items = agent._chat_messages_to_responses_input(messages)
assert items == [{"role": "user", "content": "hello"}]
def test_system_messages_filtered(self, monkeypatch):
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
base_url="https://chatgpt.com/backend-api/codex")
messages = [
{"role": "system", "content": "be helpful"},
{"role": "user", "content": "hello"},
]
items = agent._chat_messages_to_responses_input(messages)
assert len(items) == 1
assert items[0]["role"] == "user"
def test_assistant_tool_calls_become_function_call_items(self, monkeypatch):
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
base_url="https://chatgpt.com/backend-api/codex")
messages = [{
"role": "assistant",
"content": "",
"tool_calls": [{
"id": "call_abc",
"call_id": "call_abc",
"function": {"name": "web_search", "arguments": '{"query": "test"}'},
}],
}]
items = agent._chat_messages_to_responses_input(messages)
fc_items = [i for i in items if i.get("type") == "function_call"]
assert len(fc_items) == 1
assert fc_items[0]["name"] == "web_search"
assert fc_items[0]["call_id"] == "call_abc"
def test_tool_results_become_function_call_output(self, monkeypatch):
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
base_url="https://chatgpt.com/backend-api/codex")
messages = [{"role": "tool", "tool_call_id": "call_abc", "content": "result here"}]
items = agent._chat_messages_to_responses_input(messages)
assert items[0]["type"] == "function_call_output"
assert items[0]["call_id"] == "call_abc"
assert items[0]["output"] == "result here"
def test_encrypted_reasoning_replayed(self, monkeypatch):
"""Encrypted reasoning items from previous turns must be included in input."""
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
base_url="https://chatgpt.com/backend-api/codex")
messages = [
{"role": "user", "content": "think about this"},
{
"role": "assistant",
"content": "I thought about it.",
"codex_reasoning_items": [
{"type": "reasoning", "id": "rs_abc", "encrypted_content": "gAAAA_test_blob"},
],
},
{"role": "user", "content": "continue"},
]
items = agent._chat_messages_to_responses_input(messages)
reasoning_items = [i for i in items if i.get("type") == "reasoning"]
assert len(reasoning_items) == 1
assert reasoning_items[0]["encrypted_content"] == "gAAAA_test_blob"
def test_no_reasoning_items_for_non_codex_messages(self, monkeypatch):
"""Messages without codex_reasoning_items should not inject anything."""
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
base_url="https://chatgpt.com/backend-api/codex")
messages = [
{"role": "assistant", "content": "hi"},
{"role": "user", "content": "hello"},
]
items = agent._chat_messages_to_responses_input(messages)
reasoning_items = [i for i in items if i.get("type") == "reasoning"]
assert len(reasoning_items) == 0
# ── Response normalization tests ─────────────────────────────────────────────
class TestNormalizeCodexResponse:
"""Verify _normalize_codex_response extracts all fields correctly."""
def _make_codex_agent(self, monkeypatch):
return _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
base_url="https://chatgpt.com/backend-api/codex")
def test_text_response(self, monkeypatch):
agent = self._make_codex_agent(monkeypatch)
response = SimpleNamespace(
output=[
SimpleNamespace(type="message", status="completed",
content=[SimpleNamespace(type="output_text", text="Hello!")],
phase="final_answer"),
],
status="completed",
)
msg, reason = agent._normalize_codex_response(response)
assert msg.content == "Hello!"
assert reason == "stop"
def test_reasoning_summary_extracted(self, monkeypatch):
agent = self._make_codex_agent(monkeypatch)
response = SimpleNamespace(
output=[
SimpleNamespace(type="reasoning",
encrypted_content="gAAAA_blob",
summary=[SimpleNamespace(type="summary_text", text="Thinking about math")],
id="rs_123", status=None),
SimpleNamespace(type="message", status="completed",
content=[SimpleNamespace(type="output_text", text="42")],
phase="final_answer"),
],
status="completed",
)
msg, reason = agent._normalize_codex_response(response)
assert msg.content == "42"
assert "math" in msg.reasoning
assert reason == "stop"
def test_encrypted_content_captured(self, monkeypatch):
agent = self._make_codex_agent(monkeypatch)
response = SimpleNamespace(
output=[
SimpleNamespace(type="reasoning",
encrypted_content="gAAAA_secret_blob_123",
summary=[SimpleNamespace(type="summary_text", text="Thinking")],
id="rs_456", status=None),
SimpleNamespace(type="message", status="completed",
content=[SimpleNamespace(type="output_text", text="done")],
phase="final_answer"),
],
status="completed",
)
msg, reason = agent._normalize_codex_response(response)
assert msg.codex_reasoning_items is not None
assert len(msg.codex_reasoning_items) == 1
assert msg.codex_reasoning_items[0]["encrypted_content"] == "gAAAA_secret_blob_123"
assert msg.codex_reasoning_items[0]["id"] == "rs_456"
def test_no_encrypted_content_when_missing(self, monkeypatch):
agent = self._make_codex_agent(monkeypatch)
response = SimpleNamespace(
output=[
SimpleNamespace(type="message", status="completed",
content=[SimpleNamespace(type="output_text", text="no reasoning")],
phase="final_answer"),
],
status="completed",
)
msg, reason = agent._normalize_codex_response(response)
assert msg.codex_reasoning_items is None
def test_tool_calls_extracted(self, monkeypatch):
agent = self._make_codex_agent(monkeypatch)
response = SimpleNamespace(
output=[
SimpleNamespace(type="function_call", status="completed",
call_id="call_xyz", name="web_search",
arguments='{"query":"test"}', id="fc_xyz"),
],
status="completed",
)
msg, reason = agent._normalize_codex_response(response)
assert reason == "tool_calls"
assert len(msg.tool_calls) == 1
assert msg.tool_calls[0].function.name == "web_search"
# ── Chat completions response handling (OpenRouter/Nous) ─────────────────────
class TestBuildAssistantMessage:
"""Verify _build_assistant_message works for all provider response formats."""
def test_openrouter_reasoning_fields(self, monkeypatch):
agent = _make_agent(monkeypatch, "openrouter")
msg = SimpleNamespace(
content="answer",
tool_calls=None,
reasoning="I thought about it",
reasoning_content=None,
reasoning_details=None,
)
result = agent._build_assistant_message(msg, "stop")
assert result["content"] == "answer"
assert result["reasoning"] == "I thought about it"
assert "codex_reasoning_items" not in result
def test_openrouter_reasoning_details_preserved_unmodified(self, monkeypatch):
"""reasoning_details must be passed back exactly as received for
multi-turn continuity (OpenRouter, Anthropic, OpenAI all need this)."""
agent = _make_agent(monkeypatch, "openrouter")
original_detail = {
"type": "thinking",
"thinking": "deep thoughts here",
"signature": "sig123_opaque_blob",
"encrypted_content": "some_provider_blob",
"extra_field": "should_not_be_dropped",
}
msg = SimpleNamespace(
content="answer",
tool_calls=None,
reasoning=None,
reasoning_content=None,
reasoning_details=[original_detail],
)
result = agent._build_assistant_message(msg, "stop")
stored = result["reasoning_details"][0]
# ALL fields must survive, not just type/text/signature
assert stored["signature"] == "sig123_opaque_blob"
assert stored["encrypted_content"] == "some_provider_blob"
assert stored["extra_field"] == "should_not_be_dropped"
assert stored["thinking"] == "deep thoughts here"
def test_codex_preserves_encrypted_reasoning(self, monkeypatch):
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
base_url="https://chatgpt.com/backend-api/codex")
msg = SimpleNamespace(
content="result",
tool_calls=None,
reasoning="summary text",
reasoning_content=None,
reasoning_details=None,
codex_reasoning_items=[
{"type": "reasoning", "id": "rs_1", "encrypted_content": "gAAAA_blob"},
],
)
result = agent._build_assistant_message(msg, "stop")
assert result["codex_reasoning_items"] == [
{"type": "reasoning", "id": "rs_1", "encrypted_content": "gAAAA_blob"},
]
def test_plain_message_no_codex_items(self, monkeypatch):
agent = _make_agent(monkeypatch, "openrouter")
msg = SimpleNamespace(
content="simple",
tool_calls=None,
reasoning=None,
reasoning_content=None,
reasoning_details=None,
)
result = agent._build_assistant_message(msg, "stop")
assert "codex_reasoning_items" not in result
# ── Auxiliary client provider resolution ─────────────────────────────────────
class TestAuxiliaryClientProviderPriority:
"""Verify auxiliary client resolution doesn't break for any provider."""
def test_openrouter_always_wins(self, monkeypatch):
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
from agent.auxiliary_client import get_text_auxiliary_client
with patch("agent.auxiliary_client.OpenAI") as mock:
client, model = get_text_auxiliary_client()
assert model == "google/gemini-3-flash-preview"
assert "openrouter" in str(mock.call_args.kwargs["base_url"]).lower()
def test_nous_when_no_openrouter(self, monkeypatch):
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
from agent.auxiliary_client import get_text_auxiliary_client
with patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "nous-tok"}), \
patch("agent.auxiliary_client.OpenAI") as mock:
client, model = get_text_auxiliary_client()
assert model == "gemini-3-flash"
def test_custom_endpoint_when_no_nous(self, monkeypatch):
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
monkeypatch.setenv("OPENAI_API_KEY", "local-key")
from agent.auxiliary_client import get_text_auxiliary_client
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
patch("agent.auxiliary_client.OpenAI") as mock:
client, model = get_text_auxiliary_client()
assert mock.call_args.kwargs["base_url"] == "http://localhost:1234/v1"
def test_codex_fallback_last_resort(self, monkeypatch):
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
from agent.auxiliary_client import get_text_auxiliary_client, CodexAuxiliaryClient
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
patch("agent.auxiliary_client._read_codex_access_token", return_value="codex-tok"), \
patch("agent.auxiliary_client.OpenAI"):
client, model = get_text_auxiliary_client()
assert model == "gpt-5.3-codex"
assert isinstance(client, CodexAuxiliaryClient)

View file

@ -0,0 +1,748 @@
import sys
import types
from types import SimpleNamespace
import pytest
sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
sys.modules.setdefault("fal_client", types.SimpleNamespace())
import run_agent
def _patch_agent_bootstrap(monkeypatch):
monkeypatch.setattr(
run_agent,
"get_tool_definitions",
lambda **kwargs: [
{
"type": "function",
"function": {
"name": "terminal",
"description": "Run shell commands.",
"parameters": {"type": "object", "properties": {}},
},
}
],
)
monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
def _build_agent(monkeypatch):
_patch_agent_bootstrap(monkeypatch)
agent = run_agent.AIAgent(
model="gpt-5-codex",
base_url="https://chatgpt.com/backend-api/codex",
api_key="codex-token",
quiet_mode=True,
max_iterations=4,
skip_context_files=True,
skip_memory=True,
)
agent._cleanup_task_resources = lambda task_id: None
agent._persist_session = lambda messages, history=None: None
agent._save_trajectory = lambda messages, user_message, completed: None
agent._save_session_log = lambda messages: None
return agent
def _codex_message_response(text: str):
return SimpleNamespace(
output=[
SimpleNamespace(
type="message",
content=[SimpleNamespace(type="output_text", text=text)],
)
],
usage=SimpleNamespace(input_tokens=5, output_tokens=3, total_tokens=8),
status="completed",
model="gpt-5-codex",
)
def _codex_tool_call_response():
return SimpleNamespace(
output=[
SimpleNamespace(
type="function_call",
id="fc_1",
call_id="call_1",
name="terminal",
arguments="{}",
)
],
usage=SimpleNamespace(input_tokens=12, output_tokens=4, total_tokens=16),
status="completed",
model="gpt-5-codex",
)
def _codex_incomplete_message_response(text: str):
return SimpleNamespace(
output=[
SimpleNamespace(
type="message",
status="in_progress",
content=[SimpleNamespace(type="output_text", text=text)],
)
],
usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
status="in_progress",
model="gpt-5-codex",
)
def _codex_commentary_message_response(text: str):
return SimpleNamespace(
output=[
SimpleNamespace(
type="message",
phase="commentary",
status="completed",
content=[SimpleNamespace(type="output_text", text=text)],
)
],
usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
status="completed",
model="gpt-5-codex",
)
def _codex_ack_message_response(text: str):
return SimpleNamespace(
output=[
SimpleNamespace(
type="message",
status="completed",
content=[SimpleNamespace(type="output_text", text=text)],
)
],
usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
status="completed",
model="gpt-5-codex",
)
class _FakeResponsesStream:
def __init__(self, *, final_response=None, final_error=None):
self._final_response = final_response
self._final_error = final_error
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def __iter__(self):
return iter(())
def get_final_response(self):
if self._final_error is not None:
raise self._final_error
return self._final_response
class _FakeCreateStream:
def __init__(self, events):
self._events = list(events)
self.closed = False
def __iter__(self):
return iter(self._events)
def close(self):
self.closed = True
def _codex_request_kwargs():
return {
"model": "gpt-5-codex",
"instructions": "You are Hermes.",
"input": [{"role": "user", "content": "Ping"}],
"tools": None,
"store": False,
}
def test_api_mode_uses_explicit_provider_when_codex(monkeypatch):
_patch_agent_bootstrap(monkeypatch)
agent = run_agent.AIAgent(
model="gpt-5-codex",
base_url="https://openrouter.ai/api/v1",
provider="openai-codex",
api_key="codex-token",
quiet_mode=True,
max_iterations=1,
skip_context_files=True,
skip_memory=True,
)
assert agent.api_mode == "codex_responses"
assert agent.provider == "openai-codex"
def test_api_mode_normalizes_provider_case(monkeypatch):
_patch_agent_bootstrap(monkeypatch)
agent = run_agent.AIAgent(
model="gpt-5-codex",
base_url="https://openrouter.ai/api/v1",
provider="OpenAI-Codex",
api_key="codex-token",
quiet_mode=True,
max_iterations=1,
skip_context_files=True,
skip_memory=True,
)
assert agent.provider == "openai-codex"
assert agent.api_mode == "codex_responses"
def test_api_mode_respects_explicit_openrouter_provider_over_codex_url(monkeypatch):
_patch_agent_bootstrap(monkeypatch)
agent = run_agent.AIAgent(
model="gpt-5-codex",
base_url="https://chatgpt.com/backend-api/codex",
provider="openrouter",
api_key="test-token",
quiet_mode=True,
max_iterations=1,
skip_context_files=True,
skip_memory=True,
)
assert agent.api_mode == "chat_completions"
assert agent.provider == "openrouter"
def test_build_api_kwargs_codex(monkeypatch):
agent = _build_agent(monkeypatch)
kwargs = agent._build_api_kwargs(
[
{"role": "system", "content": "You are Hermes."},
{"role": "user", "content": "Ping"},
]
)
assert kwargs["model"] == "gpt-5-codex"
assert kwargs["instructions"] == "You are Hermes."
assert kwargs["store"] is False
assert isinstance(kwargs["input"], list)
assert kwargs["input"][0]["role"] == "user"
assert kwargs["tools"][0]["type"] == "function"
assert kwargs["tools"][0]["name"] == "terminal"
assert kwargs["tools"][0]["strict"] is False
assert "function" not in kwargs["tools"][0]
assert kwargs["store"] is False
assert "timeout" not in kwargs
assert "max_tokens" not in kwargs
assert "extra_body" not in kwargs
def test_run_codex_stream_retries_when_completed_event_missing(monkeypatch):
agent = _build_agent(monkeypatch)
calls = {"stream": 0}
def _fake_stream(**kwargs):
calls["stream"] += 1
if calls["stream"] == 1:
return _FakeResponsesStream(
final_error=RuntimeError("Didn't receive a `response.completed` event.")
)
return _FakeResponsesStream(final_response=_codex_message_response("stream ok"))
agent.client = SimpleNamespace(
responses=SimpleNamespace(
stream=_fake_stream,
create=lambda **kwargs: _codex_message_response("fallback"),
)
)
response = agent._run_codex_stream(_codex_request_kwargs())
assert calls["stream"] == 2
assert response.output[0].content[0].text == "stream ok"
def test_run_codex_stream_falls_back_to_create_after_stream_completion_error(monkeypatch):
agent = _build_agent(monkeypatch)
calls = {"stream": 0, "create": 0}
def _fake_stream(**kwargs):
calls["stream"] += 1
return _FakeResponsesStream(
final_error=RuntimeError("Didn't receive a `response.completed` event.")
)
def _fake_create(**kwargs):
calls["create"] += 1
return _codex_message_response("create fallback ok")
agent.client = SimpleNamespace(
responses=SimpleNamespace(
stream=_fake_stream,
create=_fake_create,
)
)
response = agent._run_codex_stream(_codex_request_kwargs())
assert calls["stream"] == 2
assert calls["create"] == 1
assert response.output[0].content[0].text == "create fallback ok"
def test_run_codex_stream_fallback_parses_create_stream_events(monkeypatch):
agent = _build_agent(monkeypatch)
calls = {"stream": 0, "create": 0}
create_stream = _FakeCreateStream(
[
SimpleNamespace(type="response.created"),
SimpleNamespace(type="response.in_progress"),
SimpleNamespace(type="response.completed", response=_codex_message_response("streamed create ok")),
]
)
def _fake_stream(**kwargs):
calls["stream"] += 1
return _FakeResponsesStream(
final_error=RuntimeError("Didn't receive a `response.completed` event.")
)
def _fake_create(**kwargs):
calls["create"] += 1
assert kwargs.get("stream") is True
return create_stream
agent.client = SimpleNamespace(
responses=SimpleNamespace(
stream=_fake_stream,
create=_fake_create,
)
)
response = agent._run_codex_stream(_codex_request_kwargs())
assert calls["stream"] == 2
assert calls["create"] == 1
assert create_stream.closed is True
assert response.output[0].content[0].text == "streamed create ok"
def test_run_conversation_codex_plain_text(monkeypatch):
agent = _build_agent(monkeypatch)
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: _codex_message_response("OK"))
result = agent.run_conversation("Say OK")
assert result["completed"] is True
assert result["final_response"] == "OK"
assert result["messages"][-1]["role"] == "assistant"
assert result["messages"][-1]["content"] == "OK"
def test_run_conversation_codex_refreshes_after_401_and_retries(monkeypatch):
agent = _build_agent(monkeypatch)
calls = {"api": 0, "refresh": 0}
class _UnauthorizedError(RuntimeError):
def __init__(self):
super().__init__("Error code: 401 - unauthorized")
self.status_code = 401
def _fake_api_call(api_kwargs):
calls["api"] += 1
if calls["api"] == 1:
raise _UnauthorizedError()
return _codex_message_response("Recovered after refresh")
def _fake_refresh(*, force=True):
calls["refresh"] += 1
assert force is True
return True
monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call)
monkeypatch.setattr(agent, "_try_refresh_codex_client_credentials", _fake_refresh)
result = agent.run_conversation("Say OK")
assert calls["api"] == 2
assert calls["refresh"] == 1
assert result["completed"] is True
assert result["final_response"] == "Recovered after refresh"
def test_try_refresh_codex_client_credentials_rebuilds_client(monkeypatch):
agent = _build_agent(monkeypatch)
closed = {"value": False}
rebuilt = {"kwargs": None}
class _ExistingClient:
def close(self):
closed["value"] = True
class _RebuiltClient:
pass
def _fake_openai(**kwargs):
rebuilt["kwargs"] = kwargs
return _RebuiltClient()
monkeypatch.setattr(
"hermes_cli.auth.resolve_codex_runtime_credentials",
lambda force_refresh=True: {
"api_key": "new-codex-token",
"base_url": "https://chatgpt.com/backend-api/codex",
},
)
monkeypatch.setattr(run_agent, "OpenAI", _fake_openai)
agent.client = _ExistingClient()
ok = agent._try_refresh_codex_client_credentials(force=True)
assert ok is True
assert closed["value"] is True
assert rebuilt["kwargs"]["api_key"] == "new-codex-token"
assert rebuilt["kwargs"]["base_url"] == "https://chatgpt.com/backend-api/codex"
assert isinstance(agent.client, _RebuiltClient)
def test_run_conversation_codex_tool_round_trip(monkeypatch):
agent = _build_agent(monkeypatch)
responses = [_codex_tool_call_response(), _codex_message_response("done")]
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
for call in assistant_message.tool_calls:
messages.append(
{
"role": "tool",
"tool_call_id": call.id,
"content": '{"ok":true}',
}
)
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
result = agent.run_conversation("run a command")
assert result["completed"] is True
assert result["final_response"] == "done"
assert any(msg.get("tool_calls") for msg in result["messages"] if msg.get("role") == "assistant")
assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
def test_chat_messages_to_responses_input_uses_call_id_for_function_call(monkeypatch):
agent = _build_agent(monkeypatch)
items = agent._chat_messages_to_responses_input(
[
{"role": "user", "content": "Run terminal"},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "call_abc123",
"type": "function",
"function": {"name": "terminal", "arguments": "{}"},
}
],
},
{"role": "tool", "tool_call_id": "call_abc123", "content": '{"ok":true}'},
]
)
function_call = next(item for item in items if item.get("type") == "function_call")
function_output = next(item for item in items if item.get("type") == "function_call_output")
assert function_call["call_id"] == "call_abc123"
assert "id" not in function_call
assert function_output["call_id"] == "call_abc123"
def test_chat_messages_to_responses_input_accepts_call_pipe_fc_ids(monkeypatch):
agent = _build_agent(monkeypatch)
items = agent._chat_messages_to_responses_input(
[
{"role": "user", "content": "Run terminal"},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "call_pair123|fc_pair123",
"type": "function",
"function": {"name": "terminal", "arguments": "{}"},
}
],
},
{"role": "tool", "tool_call_id": "call_pair123|fc_pair123", "content": '{"ok":true}'},
]
)
function_call = next(item for item in items if item.get("type") == "function_call")
function_output = next(item for item in items if item.get("type") == "function_call_output")
assert function_call["call_id"] == "call_pair123"
assert "id" not in function_call
assert function_output["call_id"] == "call_pair123"
def test_preflight_codex_api_kwargs_strips_optional_function_call_id(monkeypatch):
agent = _build_agent(monkeypatch)
preflight = agent._preflight_codex_api_kwargs(
{
"model": "gpt-5-codex",
"instructions": "You are Hermes.",
"input": [
{"role": "user", "content": "hi"},
{
"type": "function_call",
"id": "call_bad",
"call_id": "call_good",
"name": "terminal",
"arguments": "{}",
},
],
"tools": [],
"store": False,
}
)
fn_call = next(item for item in preflight["input"] if item.get("type") == "function_call")
assert fn_call["call_id"] == "call_good"
assert "id" not in fn_call
def test_preflight_codex_api_kwargs_rejects_function_call_output_without_call_id(monkeypatch):
agent = _build_agent(monkeypatch)
with pytest.raises(ValueError, match="function_call_output is missing call_id"):
agent._preflight_codex_api_kwargs(
{
"model": "gpt-5-codex",
"instructions": "You are Hermes.",
"input": [{"type": "function_call_output", "output": "{}"}],
"tools": [],
"store": False,
}
)
def test_preflight_codex_api_kwargs_rejects_unsupported_request_fields(monkeypatch):
agent = _build_agent(monkeypatch)
kwargs = _codex_request_kwargs()
kwargs["some_unknown_field"] = "value"
with pytest.raises(ValueError, match="unsupported field"):
agent._preflight_codex_api_kwargs(kwargs)
def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch):
agent = _build_agent(monkeypatch)
kwargs = _codex_request_kwargs()
kwargs["reasoning"] = {"effort": "high", "summary": "auto"}
kwargs["include"] = ["reasoning.encrypted_content"]
kwargs["temperature"] = 0.7
kwargs["max_output_tokens"] = 4096
result = agent._preflight_codex_api_kwargs(kwargs)
assert result["reasoning"] == {"effort": "high", "summary": "auto"}
assert result["include"] == ["reasoning.encrypted_content"]
assert result["temperature"] == 0.7
assert result["max_output_tokens"] == 4096
def test_run_conversation_codex_replay_payload_keeps_call_id(monkeypatch):
agent = _build_agent(monkeypatch)
responses = [_codex_tool_call_response(), _codex_message_response("done")]
requests = []
def _fake_api_call(api_kwargs):
requests.append(api_kwargs)
return responses.pop(0)
monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call)
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
for call in assistant_message.tool_calls:
messages.append(
{
"role": "tool",
"tool_call_id": call.id,
"content": '{"ok":true}',
}
)
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
result = agent.run_conversation("run a command")
assert result["completed"] is True
assert result["final_response"] == "done"
assert len(requests) >= 2
replay_input = requests[1]["input"]
function_call = next(item for item in replay_input if item.get("type") == "function_call")
function_output = next(item for item in replay_input if item.get("type") == "function_call_output")
assert function_call["call_id"] == "call_1"
assert "id" not in function_call
assert function_output["call_id"] == "call_1"
def test_run_conversation_codex_continues_after_incomplete_interim_message(monkeypatch):
agent = _build_agent(monkeypatch)
responses = [
_codex_incomplete_message_response("I'll inspect the repo structure first."),
_codex_tool_call_response(),
_codex_message_response("Architecture summary complete."),
]
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
for call in assistant_message.tool_calls:
messages.append(
{
"role": "tool",
"tool_call_id": call.id,
"content": '{"ok":true}',
}
)
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
result = agent.run_conversation("analyze repo")
assert result["completed"] is True
assert result["final_response"] == "Architecture summary complete."
assert any(
msg.get("role") == "assistant"
and msg.get("finish_reason") == "incomplete"
and "inspect the repo structure" in (msg.get("content") or "")
for msg in result["messages"]
)
assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
def test_normalize_codex_response_marks_commentary_only_message_as_incomplete(monkeypatch):
agent = _build_agent(monkeypatch)
assistant_message, finish_reason = agent._normalize_codex_response(
_codex_commentary_message_response("I'll inspect the repository first.")
)
assert finish_reason == "incomplete"
assert "inspect the repository" in (assistant_message.content or "")
def test_run_conversation_codex_continues_after_commentary_phase_message(monkeypatch):
agent = _build_agent(monkeypatch)
responses = [
_codex_commentary_message_response("I'll inspect the repo structure first."),
_codex_tool_call_response(),
_codex_message_response("Architecture summary complete."),
]
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
for call in assistant_message.tool_calls:
messages.append(
{
"role": "tool",
"tool_call_id": call.id,
"content": '{"ok":true}',
}
)
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
result = agent.run_conversation("analyze repo")
assert result["completed"] is True
assert result["final_response"] == "Architecture summary complete."
assert any(
msg.get("role") == "assistant"
and msg.get("finish_reason") == "incomplete"
and "inspect the repo structure" in (msg.get("content") or "")
for msg in result["messages"]
)
assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
def test_run_conversation_codex_continues_after_ack_stop_message(monkeypatch):
agent = _build_agent(monkeypatch)
responses = [
_codex_ack_message_response(
"Absolutely — I can do that. I'll inspect ~/openclaw-studio and report back with a walkthrough."
),
_codex_tool_call_response(),
_codex_message_response("Architecture summary complete."),
]
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
for call in assistant_message.tool_calls:
messages.append(
{
"role": "tool",
"tool_call_id": call.id,
"content": '{"ok":true}',
}
)
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
result = agent.run_conversation("look into ~/openclaw-studio and tell me how it works")
assert result["completed"] is True
assert result["final_response"] == "Architecture summary complete."
assert any(
msg.get("role") == "assistant"
and msg.get("finish_reason") == "incomplete"
and "inspect ~/openclaw-studio" in (msg.get("content") or "")
for msg in result["messages"]
)
assert any(
msg.get("role") == "user"
and "Continue now. Execute the required tool calls" in (msg.get("content") or "")
for msg in result["messages"]
)
assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
def test_run_conversation_codex_continues_after_ack_for_directory_listing_prompt(monkeypatch):
agent = _build_agent(monkeypatch)
responses = [
_codex_ack_message_response(
"I'll check what's in the current directory and call out 3 notable items."
),
_codex_tool_call_response(),
_codex_message_response("Directory summary complete."),
]
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
for call in assistant_message.tool_calls:
messages.append(
{
"role": "tool",
"tool_call_id": call.id,
"content": '{"ok":true}',
}
)
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
result = agent.run_conversation("look at current directory and list 3 notable things")
assert result["completed"] is True
assert result["final_response"] == "Directory summary complete."
assert any(
msg.get("role") == "assistant"
and msg.get("finish_reason") == "incomplete"
and "current directory" in (msg.get("content") or "")
for msg in result["messages"]
)
assert any(
msg.get("role") == "user"
and "Continue now. Execute the required tool calls" in (msg.get("content") or "")
for msg in result["messages"]
)
assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])

View file

@ -0,0 +1,95 @@
from hermes_cli import runtime_provider as rp
def test_resolve_runtime_provider_codex(monkeypatch):
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openai-codex")
monkeypatch.setattr(
rp,
"resolve_codex_runtime_credentials",
lambda: {
"provider": "openai-codex",
"base_url": "https://chatgpt.com/backend-api/codex",
"api_key": "codex-token",
"source": "codex-auth-json",
"auth_file": "/tmp/auth.json",
"codex_home": "/tmp/codex",
"last_refresh": "2026-02-26T00:00:00Z",
},
)
resolved = rp.resolve_runtime_provider(requested="openai-codex")
assert resolved["provider"] == "openai-codex"
assert resolved["api_mode"] == "codex_responses"
assert resolved["base_url"] == "https://chatgpt.com/backend-api/codex"
assert resolved["api_key"] == "codex-token"
assert resolved["requested_provider"] == "openai-codex"
def test_resolve_runtime_provider_openrouter_explicit(monkeypatch):
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
monkeypatch.setattr(rp, "_get_model_config", lambda: {})
monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
resolved = rp.resolve_runtime_provider(
requested="openrouter",
explicit_api_key="test-key",
explicit_base_url="https://example.com/v1/",
)
assert resolved["provider"] == "openrouter"
assert resolved["api_mode"] == "chat_completions"
assert resolved["api_key"] == "test-key"
assert resolved["base_url"] == "https://example.com/v1"
assert resolved["source"] == "explicit"
def test_resolve_runtime_provider_openrouter_ignores_codex_config_base_url(monkeypatch):
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
monkeypatch.setattr(
rp,
"_get_model_config",
lambda: {
"provider": "openai-codex",
"base_url": "https://chatgpt.com/backend-api/codex",
},
)
monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
resolved = rp.resolve_runtime_provider(requested="openrouter")
assert resolved["provider"] == "openrouter"
assert resolved["base_url"] == rp.OPENROUTER_BASE_URL
def test_resolve_runtime_provider_auto_uses_custom_config_base_url(monkeypatch):
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
monkeypatch.setattr(
rp,
"_get_model_config",
lambda: {
"provider": "auto",
"base_url": "https://custom.example/v1/",
},
)
monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
resolved = rp.resolve_runtime_provider(requested="auto")
assert resolved["provider"] == "openrouter"
assert resolved["base_url"] == "https://custom.example/v1"
def test_resolve_requested_provider_precedence(monkeypatch):
monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "nous")
monkeypatch.setattr(rp, "_get_model_config", lambda: {"provider": "openai-codex"})
assert rp.resolve_requested_provider("openrouter") == "openrouter"

View file

@ -30,6 +30,9 @@ def _make_mock_parent(depth=0):
"""Create a mock parent agent with the fields delegate_task expects."""
parent = MagicMock()
parent.base_url = "https://openrouter.ai/api/v1"
parent.api_key = "parent-key"
parent.provider = "openrouter"
parent.api_mode = "chat_completions"
parent.model = "anthropic/claude-sonnet-4"
parent.platform = "cli"
parent.providers_allowed = None
@ -218,6 +221,30 @@ class TestDelegateTask(unittest.TestCase):
delegate_task(goal="Test tracking", parent_agent=parent)
self.assertEqual(len(parent._active_children), 0)
def test_child_inherits_runtime_credentials(self):
parent = _make_mock_parent(depth=0)
parent.base_url = "https://chatgpt.com/backend-api/codex"
parent.api_key = "codex-token"
parent.provider = "openai-codex"
parent.api_mode = "codex_responses"
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
mock_child.run_conversation.return_value = {
"final_response": "ok",
"completed": True,
"api_calls": 1,
}
MockAgent.return_value = mock_child
delegate_task(goal="Test runtime inheritance", parent_agent=parent)
_, kwargs = MockAgent.call_args
self.assertEqual(kwargs["base_url"], parent.base_url)
self.assertEqual(kwargs["api_key"], parent.api_key)
self.assertEqual(kwargs["provider"], parent.provider)
self.assertEqual(kwargs["api_mode"], parent.api_mode)
class TestBlockedTools(unittest.TestCase):
def test_blocked_tools_constant(self):

View file

@ -0,0 +1,483 @@
"""Live integration tests for file operations and terminal tools.
These tests run REAL commands through the LocalEnvironment -- no mocks.
They verify that shell noise is properly filtered, commands actually work,
and the tool outputs are EXACTLY what the agent would see.
Every test with output validates against a known-good value AND
asserts zero contamination from shell noise via _assert_clean().
"""
import json
import os
import sys
from pathlib import Path
import pytest
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
from tools.environments.local import LocalEnvironment, _clean_shell_noise, _SHELL_NOISE_SUBSTRINGS
from tools.file_operations import ShellFileOperations
# ── Shared noise detection ───────────────────────────────────────────────
# Every known shell noise pattern. If ANY of these appear in output that
# isn't explicitly expected, the test fails with a clear message.
_ALL_NOISE_PATTERNS = list(_SHELL_NOISE_SUBSTRINGS) + [
"bash: ",
"Inappropriate ioctl",
]
def _assert_clean(text: str, context: str = "output"):
"""Assert text contains zero shell noise contamination."""
if not text:
return
for noise in _ALL_NOISE_PATTERNS:
assert noise not in text, (
f"Shell noise leaked into {context}: found {noise!r} in:\n"
f"{text[:500]}"
)
# ── Fixtures ─────────────────────────────────────────────────────────────
# Deterministic file content used across tests. Every byte is known,
# so any unexpected text in results is immediately caught.
SIMPLE_CONTENT = "alpha\nbravo\ncharlie\n"
NUMBERED_CONTENT = "\n".join(f"LINE_{i:04d}" for i in range(1, 51)) + "\n"
SPECIAL_CONTENT = "single 'quotes' and \"doubles\" and $VARS and `backticks` and \\backslash\n"
MULTIFILE_A = "def func_alpha():\n return 42\n"
MULTIFILE_B = "def func_bravo():\n return 99\n"
MULTIFILE_C = "nothing relevant here\n"
@pytest.fixture
def env(tmp_path):
"""A real LocalEnvironment rooted in a temp directory."""
return LocalEnvironment(cwd=str(tmp_path), timeout=15)
@pytest.fixture
def ops(env, tmp_path):
"""ShellFileOperations wired to the real local environment."""
return ShellFileOperations(env, cwd=str(tmp_path))
@pytest.fixture
def populated_dir(tmp_path):
"""A temp directory with known files for search/read tests."""
(tmp_path / "alpha.py").write_text(MULTIFILE_A)
(tmp_path / "bravo.py").write_text(MULTIFILE_B)
(tmp_path / "notes.txt").write_text(MULTIFILE_C)
(tmp_path / "data.csv").write_text("col1,col2\n1,2\n3,4\n")
return tmp_path
# ── _clean_shell_noise unit tests ────────────────────────────────────────
class TestCleanShellNoise:
def test_single_noise_line(self):
output = "bash: no job control in this shell\nhello world\n"
result = _clean_shell_noise(output)
assert result == "hello world\n"
def test_double_noise_lines(self):
output = (
"bash: cannot set terminal process group (-1): Inappropriate ioctl for device\n"
"bash: no job control in this shell\n"
"actual output here\n"
)
result = _clean_shell_noise(output)
assert result == "actual output here\n"
_assert_clean(result)
def test_tcsetattr_noise(self):
output = (
"bash: [12345: 2 (255)] tcsetattr: Inappropriate ioctl for device\n"
"real content\n"
)
result = _clean_shell_noise(output)
assert result == "real content\n"
_assert_clean(result)
def test_triple_noise_lines(self):
output = (
"bash: cannot set terminal process group (-1): Inappropriate ioctl for device\n"
"bash: no job control in this shell\n"
"bash: [999: 2 (255)] tcsetattr: Inappropriate ioctl for device\n"
"clean\n"
)
result = _clean_shell_noise(output)
assert result == "clean\n"
def test_no_noise_untouched(self):
assert _clean_shell_noise("hello\nworld\n") == "hello\nworld\n"
def test_empty_string(self):
assert _clean_shell_noise("") == ""
def test_only_noise_produces_empty(self):
output = "bash: no job control in this shell\n"
result = _clean_shell_noise(output)
_assert_clean(result)
def test_noise_in_middle_not_stripped(self):
"""Only LEADING noise is stripped -- noise in the middle is real output."""
output = "real\nbash: no job control in this shell\nmore real\n"
result = _clean_shell_noise(output)
assert result == output
# ── LocalEnvironment.execute() ───────────────────────────────────────────
class TestLocalEnvironmentExecute:
def test_echo_exact_output(self, env):
result = env.execute("echo DETERMINISTIC_OUTPUT_12345")
assert result["returncode"] == 0
assert result["output"].strip() == "DETERMINISTIC_OUTPUT_12345"
_assert_clean(result["output"])
def test_printf_no_trailing_newline(self, env):
result = env.execute("printf 'exact'")
assert result["returncode"] == 0
assert result["output"] == "exact"
_assert_clean(result["output"])
def test_exit_code_propagated(self, env):
result = env.execute("exit 42")
assert result["returncode"] == 42
def test_stderr_captured_in_output(self, env):
result = env.execute("echo STDERR_TEST >&2")
assert "STDERR_TEST" in result["output"]
_assert_clean(result["output"])
def test_cwd_respected(self, env, tmp_path):
subdir = tmp_path / "subdir_test"
subdir.mkdir()
result = env.execute("pwd", cwd=str(subdir))
assert result["returncode"] == 0
assert result["output"].strip() == str(subdir)
_assert_clean(result["output"])
def test_multiline_exact(self, env):
result = env.execute("echo AAA; echo BBB; echo CCC")
lines = [l for l in result["output"].strip().split("\n") if l.strip()]
assert lines == ["AAA", "BBB", "CCC"]
_assert_clean(result["output"])
def test_env_var_home(self, env):
result = env.execute("echo $HOME")
assert result["returncode"] == 0
home = result["output"].strip()
assert home == str(Path.home())
_assert_clean(result["output"])
def test_pipe_exact(self, env):
result = env.execute("echo 'one two three' | wc -w")
assert result["returncode"] == 0
assert result["output"].strip() == "3"
_assert_clean(result["output"])
def test_cat_deterministic_content(self, env, tmp_path):
f = tmp_path / "det.txt"
f.write_text(SIMPLE_CONTENT)
result = env.execute(f"cat {f}")
assert result["returncode"] == 0
assert result["output"] == SIMPLE_CONTENT
_assert_clean(result["output"])
# ── _has_command ─────────────────────────────────────────────────────────
class TestHasCommand:
def test_finds_echo(self, ops):
assert ops._has_command("echo") is True
def test_finds_cat(self, ops):
assert ops._has_command("cat") is True
def test_finds_sed(self, ops):
assert ops._has_command("sed") is True
def test_finds_wc(self, ops):
assert ops._has_command("wc") is True
def test_finds_find(self, ops):
assert ops._has_command("find") is True
def test_missing_command(self, ops):
assert ops._has_command("nonexistent_tool_xyz_abc_999") is False
def test_rg_or_grep_available(self, ops):
assert ops._has_command("rg") or ops._has_command("grep"), \
"Neither rg nor grep found -- search_files will break"
# ── read_file ────────────────────────────────────────────────────────────
class TestReadFile:
def test_exact_content(self, ops, tmp_path):
f = tmp_path / "exact.txt"
f.write_text(SIMPLE_CONTENT)
result = ops.read_file(str(f))
assert result.error is None
# Content has line numbers prepended, check the actual text is there
assert "alpha" in result.content
assert "bravo" in result.content
assert "charlie" in result.content
assert result.total_lines == 3
_assert_clean(result.content)
def test_absolute_path(self, ops, tmp_path):
f = tmp_path / "abs.txt"
f.write_text("ABSOLUTE_PATH_CONTENT\n")
result = ops.read_file(str(f))
assert result.error is None
assert "ABSOLUTE_PATH_CONTENT" in result.content
_assert_clean(result.content)
def test_tilde_expansion(self, ops):
test_path = Path.home() / ".hermes_test_tilde_9f8a7b"
try:
test_path.write_text("TILDE_EXPANSION_OK\n")
result = ops.read_file("~/.hermes_test_tilde_9f8a7b")
assert result.error is None
assert "TILDE_EXPANSION_OK" in result.content
_assert_clean(result.content)
finally:
test_path.unlink(missing_ok=True)
def test_nonexistent_returns_error(self, ops, tmp_path):
result = ops.read_file(str(tmp_path / "ghost.txt"))
assert result.error is not None
def test_pagination_exact_window(self, ops, tmp_path):
f = tmp_path / "numbered.txt"
f.write_text(NUMBERED_CONTENT)
result = ops.read_file(str(f), offset=10, limit=5)
assert result.error is None
assert "LINE_0010" in result.content
assert "LINE_0014" in result.content
assert "LINE_0009" not in result.content
assert "LINE_0015" not in result.content
assert result.total_lines == 50
_assert_clean(result.content)
def test_no_noise_in_content(self, ops, tmp_path):
f = tmp_path / "noise_check.txt"
f.write_text("ONLY_THIS_CONTENT\n")
result = ops.read_file(str(f))
assert result.error is None
_assert_clean(result.content)
# ── write_file ───────────────────────────────────────────────────────────
class TestWriteFile:
def test_write_and_verify(self, ops, tmp_path):
path = str(tmp_path / "written.txt")
result = ops.write_file(path, SIMPLE_CONTENT)
assert result.error is None
assert result.bytes_written == len(SIMPLE_CONTENT.encode())
assert Path(path).read_text() == SIMPLE_CONTENT
def test_creates_nested_dirs(self, ops, tmp_path):
path = str(tmp_path / "a" / "b" / "c" / "deep.txt")
result = ops.write_file(path, "DEEP_CONTENT\n")
assert result.error is None
assert result.dirs_created is True
assert Path(path).read_text() == "DEEP_CONTENT\n"
def test_overwrites_exact(self, ops, tmp_path):
path = str(tmp_path / "overwrite.txt")
Path(path).write_text("OLD_DATA\n")
result = ops.write_file(path, "NEW_DATA\n")
assert result.error is None
assert Path(path).read_text() == "NEW_DATA\n"
def test_large_content_via_stdin(self, ops, tmp_path):
path = str(tmp_path / "large.txt")
content = "X" * 200_000 + "\n"
result = ops.write_file(path, content)
assert result.error is None
assert Path(path).read_text() == content
def test_special_characters_preserved(self, ops, tmp_path):
path = str(tmp_path / "special.txt")
result = ops.write_file(path, SPECIAL_CONTENT)
assert result.error is None
assert Path(path).read_text() == SPECIAL_CONTENT
def test_roundtrip_read_write(self, ops, tmp_path):
"""Write -> read back -> verify exact match."""
path = str(tmp_path / "roundtrip.txt")
ops.write_file(path, SIMPLE_CONTENT)
result = ops.read_file(path)
assert result.error is None
assert "alpha" in result.content
assert "charlie" in result.content
_assert_clean(result.content)
# ── patch_replace ────────────────────────────────────────────────────────
class TestPatchReplace:
def test_exact_replacement(self, ops, tmp_path):
path = str(tmp_path / "patch.txt")
Path(path).write_text("hello world\n")
result = ops.patch_replace(path, "world", "earth")
assert result.error is None
assert Path(path).read_text() == "hello earth\n"
def test_not_found_error(self, ops, tmp_path):
path = str(tmp_path / "patch2.txt")
Path(path).write_text("hello\n")
result = ops.patch_replace(path, "NONEXISTENT_STRING", "replacement")
assert result.error is not None
assert "Could not find" in result.error
def test_multiline_patch(self, ops, tmp_path):
path = str(tmp_path / "multi.txt")
Path(path).write_text("line1\nline2\nline3\n")
result = ops.patch_replace(path, "line2", "REPLACED")
assert result.error is None
assert Path(path).read_text() == "line1\nREPLACED\nline3\n"
# ── search ───────────────────────────────────────────────────────────────
class TestSearch:
def test_content_search_finds_exact_match(self, ops, populated_dir):
result = ops.search("func_alpha", str(populated_dir), target="content")
assert result.error is None
assert result.total_count >= 1
assert any("func_alpha" in m.content for m in result.matches)
for m in result.matches:
_assert_clean(m.content)
_assert_clean(m.path)
def test_content_search_no_false_positives(self, ops, populated_dir):
result = ops.search("ZZZZZ_NONEXISTENT", str(populated_dir), target="content")
assert result.error is None
assert result.total_count == 0
assert len(result.matches) == 0
def test_file_search_finds_py_files(self, ops, populated_dir):
result = ops.search("*.py", str(populated_dir), target="files")
assert result.error is None
assert result.total_count >= 2
# Verify only expected files appear
found_names = set()
for f in result.files:
name = Path(f).name
found_names.add(name)
_assert_clean(f)
assert "alpha.py" in found_names
assert "bravo.py" in found_names
assert "notes.txt" not in found_names
def test_file_search_no_false_file_entries(self, ops, populated_dir):
"""Every entry in the files list must be a real path, not noise."""
result = ops.search("*.py", str(populated_dir), target="files")
assert result.error is None
for f in result.files:
_assert_clean(f)
assert Path(f).exists(), f"Search returned non-existent path: {f}"
def test_content_search_with_glob_filter(self, ops, populated_dir):
result = ops.search("return", str(populated_dir), target="content", file_glob="*.py")
assert result.error is None
for m in result.matches:
assert m.path.endswith(".py"), f"Non-py file in results: {m.path}"
_assert_clean(m.content)
_assert_clean(m.path)
def test_search_output_has_zero_noise(self, ops, populated_dir):
"""Dedicated noise check: search must return only real content."""
result = ops.search("func", str(populated_dir), target="content")
assert result.error is None
for m in result.matches:
_assert_clean(m.content)
_assert_clean(m.path)
# ── _expand_path ─────────────────────────────────────────────────────────
class TestExpandPath:
def test_tilde_exact(self, ops):
result = ops._expand_path("~/test.txt")
expected = f"{Path.home()}/test.txt"
assert result == expected
_assert_clean(result)
def test_absolute_unchanged(self, ops):
assert ops._expand_path("/tmp/test.txt") == "/tmp/test.txt"
def test_relative_unchanged(self, ops):
assert ops._expand_path("relative/path.txt") == "relative/path.txt"
def test_bare_tilde(self, ops):
result = ops._expand_path("~")
assert result == str(Path.home())
_assert_clean(result)
# ── Terminal output cleanliness ──────────────────────────────────────────
class TestTerminalOutputCleanliness:
"""Every command the agent might run must produce noise-free output."""
def test_echo(self, env):
result = env.execute("echo CLEAN_TEST")
assert result["output"].strip() == "CLEAN_TEST"
_assert_clean(result["output"])
def test_cat(self, env, tmp_path):
f = tmp_path / "cat_test.txt"
f.write_text("CAT_CONTENT_EXACT\n")
result = env.execute(f"cat {f}")
assert result["output"] == "CAT_CONTENT_EXACT\n"
_assert_clean(result["output"])
def test_ls(self, env, tmp_path):
(tmp_path / "file_a.txt").write_text("")
(tmp_path / "file_b.txt").write_text("")
result = env.execute(f"ls {tmp_path}")
_assert_clean(result["output"])
assert "file_a.txt" in result["output"]
assert "file_b.txt" in result["output"]
def test_wc(self, env, tmp_path):
f = tmp_path / "wc_test.txt"
f.write_text("one\ntwo\nthree\n")
result = env.execute(f"wc -l < {f}")
assert result["output"].strip() == "3"
_assert_clean(result["output"])
def test_head(self, env, tmp_path):
f = tmp_path / "head_test.txt"
f.write_text(NUMBERED_CONTENT)
result = env.execute(f"head -n 3 {f}")
expected = "LINE_0001\nLINE_0002\nLINE_0003\n"
assert result["output"] == expected
_assert_clean(result["output"])
def test_env_var_expansion(self, env):
result = env.execute("echo $HOME")
assert result["output"].strip() == str(Path.home())
_assert_clean(result["output"])
def test_command_substitution(self, env):
result = env.execute("echo $(echo NESTED)")
assert result["output"].strip() == "NESTED"
_assert_clean(result["output"])
def test_command_v_detection(self, env):
"""This is how _has_command works -- must return clean 'yes'."""
result = env.execute("command -v cat >/dev/null 2>&1 && echo 'yes'")
assert result["output"].strip() == "yes"
_assert_clean(result["output"])

View file

@ -77,6 +77,85 @@ def _strip_blocked_tools(toolsets: List[str]) -> List[str]:
return [t for t in toolsets if t not in blocked_toolset_names]
def _build_child_progress_callback(task_index: int, parent_agent, task_count: int = 1) -> Optional[callable]:
"""Build a callback that relays child agent tool calls to the parent display.
Two display paths:
CLI: prints tree-view lines above the parent's delegation spinner
Gateway: batches tool names and relays to parent's progress callback
Returns None if no display mechanism is available, in which case the
child agent runs with no progress callback (identical to current behavior).
"""
spinner = getattr(parent_agent, '_delegate_spinner', None)
parent_cb = getattr(parent_agent, 'tool_progress_callback', None)
if not spinner and not parent_cb:
return None # No display → no callback → zero behavior change
# Show 1-indexed prefix only in batch mode (multiple tasks)
prefix = f"[{task_index + 1}] " if task_count > 1 else ""
# Gateway: batch tool names, flush periodically
_BATCH_SIZE = 5
_batch: List[str] = []
def _callback(tool_name: str, preview: str = None):
# Special "_thinking" event: model produced text content (reasoning)
if tool_name == "_thinking":
if spinner:
short = (preview[:55] + "...") if preview and len(preview) > 55 else (preview or "")
try:
spinner.print_above(f" {prefix}├─ 💭 \"{short}\"")
except Exception:
pass
# Don't relay thinking to gateway (too noisy for chat)
return
# Regular tool call event
if spinner:
short = (preview[:35] + "...") if preview and len(preview) > 35 else (preview or "")
tool_emojis = {
"terminal": "💻", "web_search": "🔍", "web_extract": "📄",
"read_file": "📖", "write_file": "✍️", "patch": "🔧",
"search_files": "🔎", "list_directory": "📂",
"browser_navigate": "🌐", "browser_click": "👆",
"text_to_speech": "🔊", "image_generate": "🎨",
"vision_analyze": "👁️", "process": "⚙️",
}
emoji = tool_emojis.get(tool_name, "")
line = f" {prefix}├─ {emoji} {tool_name}"
if short:
line += f" \"{short}\""
try:
spinner.print_above(line)
except Exception:
pass
if parent_cb:
_batch.append(tool_name)
if len(_batch) >= _BATCH_SIZE:
summary = ", ".join(_batch)
try:
parent_cb("subagent_progress", f"🔀 {prefix}{summary}")
except Exception:
pass
_batch.clear()
def _flush():
"""Flush remaining batched tool names to gateway on completion."""
if parent_cb and _batch:
summary = ", ".join(_batch)
try:
parent_cb("subagent_progress", f"🔀 {prefix}{summary}")
except Exception:
pass
_batch.clear()
_callback._flush = _flush
return _callback
def _run_single_child(
task_index: int,
goal: str,
@ -85,6 +164,7 @@ def _run_single_child(
model: Optional[str],
max_iterations: int,
parent_agent,
task_count: int = 1,
) -> Dict[str, Any]:
"""
Spawn and run a single child agent. Called from within a thread.
@ -98,37 +178,21 @@ def _run_single_child(
child_prompt = _build_child_system_prompt(goal, context)
# Build a progress callback that surfaces subagent tool activity.
# CLI: updates the parent's delegate spinner text.
# Gateway: forwards to the parent's progress callback (feeds message queue).
parent_progress_cb = getattr(parent_agent, 'tool_progress_callback', None)
def _child_progress(tool_name: str, preview: str = None):
tag = f"[subagent-{task_index+1}] {tool_name}"
# Update CLI spinner
spinner = getattr(parent_agent, '_delegate_spinner', None)
if spinner:
detail = f'"{preview}"' if preview else ""
try:
spinner.update_text(f"🔀 {tag} {detail}")
except Exception:
pass
# Forward to gateway progress queue
if parent_progress_cb:
try:
parent_progress_cb(tag, preview)
except Exception:
pass
try:
# Extract parent's API key so subagents inherit auth (e.g. Nous Portal)
parent_api_key = None
if hasattr(parent_agent, '_client_kwargs'):
# Extract parent's API key so subagents inherit auth (e.g. Nous Portal).
parent_api_key = getattr(parent_agent, "api_key", None)
if (not parent_api_key) and hasattr(parent_agent, "_client_kwargs"):
parent_api_key = parent_agent._client_kwargs.get("api_key")
# Build progress callback to relay tool calls to parent display
child_progress_cb = _build_child_progress_callback(task_index, parent_agent, task_count)
child = AIAgent(
base_url=parent_agent.base_url,
api_key=parent_api_key,
model=model or parent_agent.model,
provider=getattr(parent_agent, "provider", None),
api_mode=getattr(parent_agent, "api_mode", None),
max_iterations=max_iterations,
enabled_toolsets=child_toolsets,
quiet_mode=True,
@ -143,7 +207,7 @@ def _run_single_child(
providers_ignored=parent_agent.providers_ignored,
providers_order=parent_agent.providers_order,
provider_sort=parent_agent.provider_sort,
tool_progress_callback=_child_progress,
tool_progress_callback=child_progress_cb,
)
# Set delegation depth so children can't spawn grandchildren
@ -158,6 +222,13 @@ def _run_single_child(
with contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(devnull):
result = child.run_conversation(user_message=goal)
# Flush any remaining batched progress to gateway
if child_progress_cb and hasattr(child_progress_cb, '_flush'):
try:
child_progress_cb._flush()
except Exception:
pass
duration = round(time.monotonic() - child_start, 2)
summary = result.get("final_response") or ""
@ -275,6 +346,7 @@ def delegate_task(
model=model,
max_iterations=effective_max_iter,
parent_agent=parent_agent,
task_count=1,
)
results.append(result)
else:
@ -299,6 +371,7 @@ def delegate_task(
model=model,
max_iterations=effective_max_iter,
parent_agent=parent_agent,
task_count=n_tasks,
)
futures[future] = i
@ -318,14 +391,21 @@ def delegate_task(
results.append(entry)
completed_count += 1
# Print per-task completion line (visible in CLI via patch_stdout)
# Print per-task completion line above the spinner
idx = entry["task_index"]
label = task_labels[idx] if idx < len(task_labels) else f"Task {idx}"
dur = entry.get("duration_seconds", 0)
status = entry.get("status", "?")
icon = "" if status == "completed" else ""
remaining = n_tasks - completed_count
print(f" {icon} [{idx+1}/{n_tasks}] {label} ({dur}s)")
completion_line = f"{icon} [{idx+1}/{n_tasks}] {label} ({dur}s)"
if spinner_ref:
try:
spinner_ref.print_above(completion_line)
except Exception:
print(f" {completion_line}")
else:
print(f" {completion_line}")
# Update spinner text to show remaining count
if spinner_ref and remaining > 0:

View file

@ -11,20 +11,26 @@ from tools.environments.base import BaseEnvironment
# Noise lines emitted by interactive shells when stdin is not a terminal.
# Filtered from output to keep tool results clean.
_SHELL_NOISE = frozenset({
_SHELL_NOISE_SUBSTRINGS = (
"bash: cannot set terminal process group",
"bash: no job control in this shell",
"bash: no job control in this shell\n",
"no job control in this shell",
"no job control in this shell\n",
})
"cannot set terminal process group",
"tcsetattr: Inappropriate ioctl for device",
)
def _clean_shell_noise(output: str) -> str:
"""Strip shell startup warnings that leak when using -i without a TTY."""
lines = output.split("\n", 2) # only check first two lines
if lines and lines[0].strip() in _SHELL_NOISE:
return "\n".join(lines[1:])
return output
"""Strip shell startup warnings that leak when using -i without a TTY.
Removes all leading lines that match known noise patterns, not just the first.
Some environments emit multiple noise lines (e.g. Docker, non-TTY sessions).
"""
lines = output.split("\n")
# Strip all leading noise lines
while lines and any(noise in lines[0] for noise in _SHELL_NOISE_SUBSTRINGS):
lines.pop(0)
return "\n".join(lines)
class LocalEnvironment(BaseEnvironment):

View file

@ -31,7 +31,7 @@ def get_async_client() -> AsyncOpenAI:
default_headers={
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
"X-OpenRouter-Title": "Hermes Agent",
"X-OpenRouter-Categories": "cli-agent",
"X-OpenRouter-Categories": "productivity,cli-agent",
},
)
return _client

View file

@ -87,13 +87,13 @@ class ProcessRegistry:
- Cleanup thread (sandbox reaping coordination)
"""
# Noise lines emitted by interactive shells when stdin is not a terminal.
_SHELL_NOISE = frozenset({
_SHELL_NOISE_SUBSTRINGS = (
"bash: cannot set terminal process group",
"bash: no job control in this shell",
"bash: no job control in this shell\n",
"no job control in this shell",
"no job control in this shell\n",
})
"cannot set terminal process group",
"tcsetattr: Inappropriate ioctl for device",
)
def __init__(self):
self._running: Dict[str, ProcessSession] = {}
@ -106,10 +106,10 @@ class ProcessRegistry:
@staticmethod
def _clean_shell_noise(text: str) -> str:
"""Strip shell startup warnings from the beginning of output."""
lines = text.split("\n", 2)
if lines and lines[0].strip() in ProcessRegistry._SHELL_NOISE:
return "\n".join(lines[1:])
return text
lines = text.split("\n")
while lines and any(noise in lines[0] for noise in ProcessRegistry._SHELL_NOISE_SUBSTRINGS):
lines.pop(0)
return "\n".join(lines)
# ----- Spawn -----

View file

@ -24,26 +24,13 @@ from typing import Dict, Any, List, Optional
from openai import AsyncOpenAI, OpenAI
from agent.auxiliary_client import get_text_auxiliary_client
from agent.auxiliary_client import get_async_text_auxiliary_client
# Resolve the auxiliary client at import time so we have the model slug.
# We build an AsyncOpenAI from the same credentials for async summarization.
_aux_client, _SUMMARIZER_MODEL = get_text_auxiliary_client()
_async_aux_client: AsyncOpenAI | None = None
if _aux_client is not None:
_async_kwargs = {
"api_key": _aux_client.api_key,
"base_url": str(_aux_client.base_url),
}
if "openrouter" in str(_aux_client.base_url).lower():
_async_kwargs["default_headers"] = {
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
"X-OpenRouter-Title": "Hermes Agent",
"X-OpenRouter-Categories": "cli-agent",
}
_async_aux_client = AsyncOpenAI(**_async_kwargs)
# Resolve the async auxiliary client at import time so we have the model slug.
# Handles Codex Responses API adapter transparently.
_async_aux_client, _SUMMARIZER_MODEL = get_async_text_auxiliary_client()
MAX_SESSION_CHARS = 100_000
MAX_SUMMARY_TOKENS = 2000
MAX_SUMMARY_TOKENS = 10000
def _format_timestamp(ts) -> str:

View file

@ -1037,8 +1037,12 @@ def terminal_tool(
)
output = output[:head_chars] + truncated_notice + output[-tail_chars:]
# Redact secrets from command output (catches env/printenv leaking keys)
from agent.redact import redact_sensitive_text
output = redact_sensitive_text(output.strip()) if output else ""
return json.dumps({
"output": output.strip() if output else "",
"output": output,
"exit_code": returncode,
"error": None
}, ensure_ascii=False)

View file

@ -54,7 +54,7 @@ if _aux_sync_client is not None:
_async_kwargs["default_headers"] = {
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
"X-OpenRouter-Title": "Hermes Agent",
"X-OpenRouter-Categories": "cli-agent",
"X-OpenRouter-Categories": "productivity,cli-agent",
}
_aux_async_client = AsyncOpenAI(**_async_kwargs)

View file

@ -48,7 +48,7 @@ import asyncio
from typing import List, Dict, Any, Optional
from firecrawl import Firecrawl
from openai import AsyncOpenAI
from agent.auxiliary_client import get_text_auxiliary_client
from agent.auxiliary_client import get_async_text_auxiliary_client
from tools.debug_helpers import DebugSession
logger = logging.getLogger(__name__)
@ -67,21 +67,9 @@ def _get_firecrawl_client():
DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000
# Resolve auxiliary text client at module level; build an async wrapper.
_aux_sync_client, DEFAULT_SUMMARIZER_MODEL = get_text_auxiliary_client()
_aux_async_client: AsyncOpenAI | None = None
if _aux_sync_client is not None:
_async_kwargs = {
"api_key": _aux_sync_client.api_key,
"base_url": str(_aux_sync_client.base_url),
}
if "openrouter" in str(_aux_sync_client.base_url).lower():
_async_kwargs["default_headers"] = {
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
"X-OpenRouter-Title": "Hermes Agent",
"X-OpenRouter-Categories": "cli-agent",
}
_aux_async_client = AsyncOpenAI(**_async_kwargs)
# Resolve async auxiliary client at module level.
# Handles Codex Responses API adapter transparently.
_aux_async_client, DEFAULT_SUMMARIZER_MODEL = get_async_text_auxiliary_client()
_debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG")
@ -174,7 +162,7 @@ async def _call_summarizer_llm(
content: str,
context_str: str,
model: str,
max_tokens: int = 4000,
max_tokens: int = 20000,
is_chunk: bool = False,
chunk_info: str = ""
) -> Optional[str]:
@ -306,7 +294,7 @@ async def _process_large_content_chunked(
chunk_content,
context_str,
model,
max_tokens=2000,
max_tokens=10000,
is_chunk=True,
chunk_info=chunk_info
)
@ -374,7 +362,7 @@ Create a single, unified markdown summary."""
{"role": "user", "content": synthesis_prompt}
],
temperature=0.1,
**auxiliary_max_tokens_param(4000),
**auxiliary_max_tokens_param(20000),
**({} if not _extra else {"extra_body": _extra}),
)
final_summary = response.choices[0].message.content.strip()