mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Merge remote-tracking branch 'origin/main' into feature/homeassistant-integration
# Conflicts: # run_agent.py
This commit is contained in:
commit
3fdf03390e
50 changed files with 7354 additions and 358 deletions
|
|
@ -10,7 +10,7 @@
|
|||
OPENROUTER_API_KEY=
|
||||
|
||||
# Default model to use (OpenRouter format: provider/model)
|
||||
# Examples: anthropic/claude-opus-4.6, openai/gpt-4o, google/gemini-2.0-flash, zhipuai/glm-4-plus
|
||||
# Examples: anthropic/claude-opus-4.6, openai/gpt-4o, google/gemini-3-flash-preview, zhipuai/glm-4-plus
|
||||
LLM_MODEL=anthropic/claude-opus-4.6
|
||||
|
||||
# =============================================================================
|
||||
|
|
@ -200,9 +200,10 @@ IMAGE_TOOLS_DEBUG=false
|
|||
# When conversation approaches model's context limit, middle turns are
|
||||
# automatically summarized to free up space.
|
||||
#
|
||||
# Context compression is configured in ~/.hermes/config.yaml under compression:
|
||||
# CONTEXT_COMPRESSION_ENABLED=true # Enable auto-compression (default: true)
|
||||
# CONTEXT_COMPRESSION_THRESHOLD=0.85 # Compress at 85% of context limit
|
||||
# CONTEXT_COMPRESSION_MODEL=google/gemini-2.0-flash-001 # Fast model for summaries
|
||||
# Model is set via compression.summary_model in config.yaml (default: google/gemini-3-flash-preview)
|
||||
|
||||
# =============================================================================
|
||||
# RL TRAINING (Tinker + Atropos)
|
||||
|
|
|
|||
14
AGENTS.md
14
AGENTS.md
|
|
@ -179,6 +179,7 @@ The interactive CLI uses:
|
|||
Key components:
|
||||
- `HermesCLI` class - Main CLI controller with commands and conversation loop
|
||||
- `SlashCommandCompleter` - Autocomplete dropdown for `/commands` (type `/` to see all)
|
||||
- `agent/skill_commands.py` - Scans skills and builds invocation messages (shared with gateway)
|
||||
- `load_cli_config()` - Loads config, sets environment variables for terminal
|
||||
- `build_welcome_banner()` - Displays ASCII art logo, tools, and skills summary
|
||||
|
||||
|
|
@ -191,9 +192,22 @@ CLI UX notes:
|
|||
- Pasting 5+ lines auto-saves to `~/.hermes/pastes/` and collapses to a reference
|
||||
- Multi-line input via Alt+Enter or Ctrl+J
|
||||
- `/commands` - Process user commands like `/help`, `/clear`, `/personality`, etc.
|
||||
- `/skill-name` - Invoke installed skills directly (e.g., `/axolotl`, `/gif-search`)
|
||||
|
||||
CLI uses `quiet_mode=True` when creating AIAgent to suppress verbose logging.
|
||||
|
||||
### Skill Slash Commands
|
||||
|
||||
Every installed skill in `~/.hermes/skills/` is automatically registered as a slash command.
|
||||
The skill name (from frontmatter or folder name) becomes the command: `axolotl` → `/axolotl`.
|
||||
|
||||
Implementation (`agent/skill_commands.py`, shared between CLI and gateway):
|
||||
1. `scan_skill_commands()` scans all SKILL.md files at startup
|
||||
2. `build_skill_invocation_message()` loads the SKILL.md content and builds a user-turn message
|
||||
3. The message includes the full skill content, a list of supporting files (not loaded), and the user's instruction
|
||||
4. Supporting files can be loaded on demand via the `skill_view` tool
|
||||
5. Injected as a **user message** (not system prompt) to preserve prompt caching
|
||||
|
||||
### Adding CLI Commands
|
||||
|
||||
1. Add to `COMMANDS` dict with description
|
||||
|
|
|
|||
74
README.md
74
README.md
|
|
@ -121,11 +121,14 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro
|
|||
|
||||
| Provider | Setup |
|
||||
|----------|-------|
|
||||
| **Nous Portal** | `hermes login` (OAuth, subscription-based) |
|
||||
| **Nous Portal** | `hermes model` (OAuth, subscription-based) |
|
||||
| **OpenAI Codex** | `hermes model` (ChatGPT OAuth, uses Codex models) |
|
||||
| **OpenRouter** | `OPENROUTER_API_KEY` in `~/.hermes/.env` |
|
||||
| **Custom Endpoint** | `OPENAI_BASE_URL` + `OPENAI_API_KEY` in `~/.hermes/.env` |
|
||||
|
||||
**Note:** Even when using Nous Portal or a custom endpoint, some tools (vision, web summarization, MoA) use OpenRouter independently. An `OPENROUTER_API_KEY` enables these tools.
|
||||
**Codex note:** The OpenAI Codex provider authenticates via device code (open a URL, enter a code). Credentials are stored at `~/.codex/auth.json` and auto-refresh. No Codex CLI installation required.
|
||||
|
||||
**Note:** Even when using Nous Portal, Codex, or a custom endpoint, some tools (vision, web summarization, MoA) use OpenRouter independently. An `OPENROUTER_API_KEY` enables these tools.
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -143,7 +146,7 @@ All your settings are stored in `~/.hermes/` for easy access:
|
|||
├── skills/ # Agent-created skills (managed via skill_manage tool)
|
||||
├── cron/ # Scheduled jobs
|
||||
├── sessions/ # Gateway sessions
|
||||
└── logs/ # Logs
|
||||
└── logs/ # Logs (errors.log, gateway.log — secrets auto-redacted)
|
||||
```
|
||||
|
||||
### Managing Configuration
|
||||
|
|
@ -161,6 +164,19 @@ hermes config set terminal.backend docker
|
|||
hermes config set OPENROUTER_API_KEY sk-or-... # Saves to .env
|
||||
```
|
||||
|
||||
### Configuration Precedence
|
||||
|
||||
Settings are resolved in this order (highest priority first):
|
||||
|
||||
1. **CLI arguments** — `hermes chat --max-turns 100` (per-invocation override)
|
||||
2. **`~/.hermes/config.yaml`** — the primary config file for all non-secret settings
|
||||
3. **`~/.hermes/.env`** — fallback for env vars; **required** for secrets (API keys, tokens, passwords)
|
||||
4. **Built-in defaults** — hardcoded safe defaults when nothing else is set
|
||||
|
||||
**Rule of thumb:** Secrets (API keys, bot tokens, passwords) go in `.env`. Everything else (model, terminal backend, compression settings, memory limits, toolsets) goes in `config.yaml`. When both are set, `config.yaml` wins for non-secret settings.
|
||||
|
||||
The `hermes config set` command automatically routes values to the right file — API keys are saved to `.env`, everything else to `config.yaml`.
|
||||
|
||||
### Optional API Keys
|
||||
|
||||
| Feature | Provider | Env Variable |
|
||||
|
|
@ -277,7 +293,10 @@ See [docs/messaging.md](docs/messaging.md) for advanced WhatsApp configuration.
|
|||
| `/status` | Show session info |
|
||||
| `/stop` | Stop the running agent |
|
||||
| `/sethome` | Set this chat as the home channel |
|
||||
| `/compress` | Manually compress conversation context |
|
||||
| `/usage` | Show token usage for this session |
|
||||
| `/help` | Show available commands |
|
||||
| `/<skill-name>` | Invoke any installed skill (e.g., `/axolotl`, `/gif-search`) |
|
||||
|
||||
### DM Pairing (Alternative to Allowlists)
|
||||
|
||||
|
|
@ -354,7 +373,7 @@ hermes --resume <id> # Resume a specific session (-r)
|
|||
|
||||
# Provider & model management
|
||||
hermes model # Switch provider and model interactively
|
||||
hermes login # Authenticate with Nous Portal (OAuth)
|
||||
hermes model # Select provider and model
|
||||
hermes logout # Clear stored OAuth credentials
|
||||
|
||||
# Configuration
|
||||
|
|
@ -407,7 +426,11 @@ Type `/` to see an autocomplete dropdown of all commands.
|
|||
| `/cron` | Manage scheduled tasks |
|
||||
| `/skills` | Search, install, inspect, or manage skills from registries |
|
||||
| `/platforms` | Show gateway/messaging platform status |
|
||||
| `/verbose` | Cycle tool progress display: off → new → all → verbose |
|
||||
| `/compress` | Manually compress conversation context |
|
||||
| `/usage` | Show token usage for this session |
|
||||
| `/quit` | Exit (also: `/exit`, `/q`) |
|
||||
| `/<skill-name>` | Invoke any installed skill (e.g., `/axolotl`, `/gif-search`) |
|
||||
|
||||
**Keybindings:**
|
||||
- `Enter` — send message
|
||||
|
|
@ -694,6 +717,21 @@ hermes cron status # Check if gateway is running
|
|||
|
||||
Even if no messaging platforms are configured, the gateway stays running for cron. A file lock prevents duplicate execution if multiple processes overlap.
|
||||
|
||||
### 🪝 Event Hooks
|
||||
|
||||
Run custom code at key lifecycle points — log activity, send alerts, post to webhooks. Hooks are Python handlers that fire automatically during gateway operation.
|
||||
|
||||
```
|
||||
~/.hermes/hooks/
|
||||
└── my-hook/
|
||||
├── HOOK.yaml # name + events to subscribe to
|
||||
└── handler.py # async def handle(event_type, context)
|
||||
```
|
||||
|
||||
**Available events:** `gateway:startup`, `session:start`, `session:reset`, `agent:start`, `agent:step`, `agent:end`, `command:*` (wildcard — fires for any slash command).
|
||||
|
||||
Hooks are non-blocking — errors are caught and logged, never crashing the agent. See [docs/hooks.md](docs/hooks.md) for the full event reference, context keys, and examples.
|
||||
|
||||
### 🛡️ Exec Approval (Messaging Platforms)
|
||||
|
||||
When the agent tries to run a potentially dangerous command (`rm -rf`, `chmod 777`, etc.) on Telegram/Discord/WhatsApp, instead of blocking it silently, it asks the user for approval:
|
||||
|
|
@ -807,6 +845,22 @@ Skills are on-demand knowledge documents the agent can load when needed. They fo
|
|||
All skills live in **`~/.hermes/skills/`** -- a single directory that is the source of truth. On fresh install, bundled skills are copied there from the repo. Hub-installed skills and agent-created skills also go here. The agent can modify or delete any skill. `hermes update` adds only genuinely new bundled skills (via a manifest) without overwriting your changes or re-adding skills you deleted.
|
||||
|
||||
**Using Skills:**
|
||||
|
||||
Every installed skill is automatically available as a slash command — type `/<skill-name>` to invoke it directly:
|
||||
|
||||
```bash
|
||||
# In the CLI or any messaging platform (Telegram, Discord, Slack, WhatsApp):
|
||||
/gif-search funny cats
|
||||
/axolotl help me fine-tune Llama 3 on my dataset
|
||||
/github-pr-workflow create a PR for the auth refactor
|
||||
|
||||
# Just the skill name (no prompt) loads the skill and lets the agent ask what you need:
|
||||
/excalidraw
|
||||
```
|
||||
|
||||
The skill's full instructions (SKILL.md) are loaded into the conversation, and any supporting files (references, templates, scripts) are listed for the agent to pull on demand via the `skill_view` tool. Type `/help` to see all available skill commands.
|
||||
|
||||
You can also use skills through natural conversation:
|
||||
```bash
|
||||
hermes --toolsets skills -q "What skills do you have?"
|
||||
hermes --toolsets skills -q "Show me the axolotl skill"
|
||||
|
|
@ -1266,9 +1320,13 @@ Your `~/.hermes/` directory should now look like:
|
|||
├── skills/ # Agent-created skills (auto-created on first use)
|
||||
├── cron/ # Scheduled job data
|
||||
├── sessions/ # Messaging gateway sessions
|
||||
└── logs/ # Conversation logs
|
||||
└── logs/ # Logs
|
||||
├── gateway.log # Gateway activity log
|
||||
└── errors.log # Errors from tool calls, API failures, etc.
|
||||
```
|
||||
|
||||
All log output is automatically redacted -- API keys, tokens, and credentials are masked before they reach disk.
|
||||
|
||||
---
|
||||
|
||||
### Step 7: Add Your API Keys
|
||||
|
|
@ -1592,7 +1650,9 @@ All variables go in `~/.hermes/.env`. Run `hermes config set VAR value` to set t
|
|||
|------|-------------|
|
||||
| `~/.hermes/config.yaml` | Your settings |
|
||||
| `~/.hermes/.env` | API keys and secrets |
|
||||
| `~/.hermes/auth.json` | OAuth provider credentials (managed by `hermes login`) |
|
||||
| `~/.hermes/auth.json` | OAuth provider credentials (managed by `hermes model`) |
|
||||
| `~/.hermes/logs/errors.log` | Tool errors, API failures (secrets auto-redacted) |
|
||||
| `~/.hermes/logs/gateway.log` | Gateway activity log (secrets auto-redacted) |
|
||||
| `~/.hermes/cron/` | Scheduled jobs data |
|
||||
| `~/.hermes/sessions/` | Gateway session data |
|
||||
| `~/.hermes/hermes-agent/` | Installation directory |
|
||||
|
|
@ -1620,7 +1680,7 @@ hermes config # View current settings
|
|||
Common issues:
|
||||
- **"API key not set"**: Run `hermes setup` or `hermes config set OPENROUTER_API_KEY your_key`
|
||||
- **"hermes: command not found"**: Reload your shell (`source ~/.bashrc`) or check PATH
|
||||
- **"Run `hermes login` to re-authenticate"**: Your Nous Portal session expired. Run `hermes login` to refresh.
|
||||
- **"Run `hermes setup` to re-authenticate"**: Your Nous Portal session expired. Run `hermes setup` or `hermes model` to refresh.
|
||||
- **"No active paid subscription"**: Your Nous Portal account needs an active subscription for inference.
|
||||
- **Gateway won't start**: Check `hermes gateway status` and logs
|
||||
- **Missing config after update**: Run `hermes config check` to see what's new, then `hermes config migrate` to add missing options
|
||||
|
|
|
|||
|
|
@ -8,7 +8,9 @@ Resolution order for text tasks:
|
|||
1. OpenRouter (OPENROUTER_API_KEY)
|
||||
2. Nous Portal (~/.hermes/auth.json active provider)
|
||||
3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY)
|
||||
4. None
|
||||
4. Codex OAuth (Responses API via chatgpt.com with gpt-5.3-codex,
|
||||
wrapped to look like a chat.completions client)
|
||||
5. None
|
||||
|
||||
Resolution order for vision/multimodal tasks:
|
||||
1. OpenRouter
|
||||
|
|
@ -20,7 +22,8 @@ import json
|
|||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional, Tuple
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
|
|
@ -32,7 +35,7 @@ logger = logging.getLogger(__name__)
|
|||
_OR_HEADERS = {
|
||||
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
|
||||
"X-OpenRouter-Title": "Hermes Agent",
|
||||
"X-OpenRouter-Categories": "cli-agent",
|
||||
"X-OpenRouter-Categories": "productivity,cli-agent",
|
||||
}
|
||||
|
||||
# Nous Portal extra_body for product attribution.
|
||||
|
|
@ -49,6 +52,188 @@ _NOUS_MODEL = "gemini-3-flash"
|
|||
_NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
|
||||
_AUTH_JSON_PATH = Path.home() / ".hermes" / "auth.json"
|
||||
|
||||
# Codex fallback: uses the Responses API (the only endpoint the Codex
|
||||
# OAuth token can access) with a fast model for auxiliary tasks.
|
||||
_CODEX_AUX_MODEL = "gpt-5.3-codex"
|
||||
_CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"
|
||||
|
||||
|
||||
# ── Codex Responses → chat.completions adapter ─────────────────────────────
|
||||
# All auxiliary consumers call client.chat.completions.create(**kwargs) and
|
||||
# read response.choices[0].message.content. This adapter translates those
|
||||
# calls to the Codex Responses API so callers don't need any changes.
|
||||
|
||||
class _CodexCompletionsAdapter:
|
||||
"""Drop-in shim that accepts chat.completions.create() kwargs and
|
||||
routes them through the Codex Responses streaming API."""
|
||||
|
||||
def __init__(self, real_client: OpenAI, model: str):
|
||||
self._client = real_client
|
||||
self._model = model
|
||||
|
||||
def create(self, **kwargs) -> Any:
|
||||
messages = kwargs.get("messages", [])
|
||||
model = kwargs.get("model", self._model)
|
||||
temperature = kwargs.get("temperature")
|
||||
|
||||
# Separate system/instructions from conversation messages
|
||||
instructions = "You are a helpful assistant."
|
||||
input_msgs: List[Dict[str, Any]] = []
|
||||
for msg in messages:
|
||||
role = msg.get("role", "user")
|
||||
content = msg.get("content", "")
|
||||
if role == "system":
|
||||
instructions = content
|
||||
else:
|
||||
input_msgs.append({"role": role, "content": content})
|
||||
|
||||
resp_kwargs: Dict[str, Any] = {
|
||||
"model": model,
|
||||
"instructions": instructions,
|
||||
"input": input_msgs or [{"role": "user", "content": ""}],
|
||||
"stream": True,
|
||||
"store": False,
|
||||
}
|
||||
|
||||
max_tokens = kwargs.get("max_output_tokens") or kwargs.get("max_completion_tokens") or kwargs.get("max_tokens")
|
||||
if max_tokens is not None:
|
||||
resp_kwargs["max_output_tokens"] = int(max_tokens)
|
||||
if temperature is not None:
|
||||
resp_kwargs["temperature"] = temperature
|
||||
|
||||
# Tools support for flush_memories and similar callers
|
||||
tools = kwargs.get("tools")
|
||||
if tools:
|
||||
converted = []
|
||||
for t in tools:
|
||||
fn = t.get("function", {}) if isinstance(t, dict) else {}
|
||||
name = fn.get("name")
|
||||
if not name:
|
||||
continue
|
||||
converted.append({
|
||||
"type": "function",
|
||||
"name": name,
|
||||
"description": fn.get("description", ""),
|
||||
"parameters": fn.get("parameters", {}),
|
||||
})
|
||||
if converted:
|
||||
resp_kwargs["tools"] = converted
|
||||
|
||||
# Stream and collect the response
|
||||
text_parts: List[str] = []
|
||||
tool_calls_raw: List[Any] = []
|
||||
usage = None
|
||||
|
||||
try:
|
||||
with self._client.responses.stream(**resp_kwargs) as stream:
|
||||
for _event in stream:
|
||||
pass
|
||||
final = stream.get_final_response()
|
||||
|
||||
# Extract text and tool calls from the Responses output
|
||||
for item in getattr(final, "output", []):
|
||||
item_type = getattr(item, "type", None)
|
||||
if item_type == "message":
|
||||
for part in getattr(item, "content", []):
|
||||
ptype = getattr(part, "type", None)
|
||||
if ptype in ("output_text", "text"):
|
||||
text_parts.append(getattr(part, "text", ""))
|
||||
elif item_type == "function_call":
|
||||
tool_calls_raw.append(SimpleNamespace(
|
||||
id=getattr(item, "call_id", ""),
|
||||
type="function",
|
||||
function=SimpleNamespace(
|
||||
name=getattr(item, "name", ""),
|
||||
arguments=getattr(item, "arguments", "{}"),
|
||||
),
|
||||
))
|
||||
|
||||
resp_usage = getattr(final, "usage", None)
|
||||
if resp_usage:
|
||||
usage = SimpleNamespace(
|
||||
prompt_tokens=getattr(resp_usage, "input_tokens", 0),
|
||||
completion_tokens=getattr(resp_usage, "output_tokens", 0),
|
||||
total_tokens=getattr(resp_usage, "total_tokens", 0),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("Codex auxiliary Responses API call failed: %s", exc)
|
||||
raise
|
||||
|
||||
content = "".join(text_parts).strip() or None
|
||||
|
||||
# Build a response that looks like chat.completions
|
||||
message = SimpleNamespace(
|
||||
role="assistant",
|
||||
content=content,
|
||||
tool_calls=tool_calls_raw or None,
|
||||
)
|
||||
choice = SimpleNamespace(
|
||||
index=0,
|
||||
message=message,
|
||||
finish_reason="stop" if not tool_calls_raw else "tool_calls",
|
||||
)
|
||||
return SimpleNamespace(
|
||||
choices=[choice],
|
||||
model=model,
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
|
||||
class _CodexChatShim:
|
||||
"""Wraps the adapter to provide client.chat.completions.create()."""
|
||||
|
||||
def __init__(self, adapter: _CodexCompletionsAdapter):
|
||||
self.completions = adapter
|
||||
|
||||
|
||||
class CodexAuxiliaryClient:
|
||||
"""OpenAI-client-compatible wrapper that routes through Codex Responses API.
|
||||
|
||||
Consumers can call client.chat.completions.create(**kwargs) as normal.
|
||||
Also exposes .api_key and .base_url for introspection by async wrappers.
|
||||
"""
|
||||
|
||||
def __init__(self, real_client: OpenAI, model: str):
|
||||
self._real_client = real_client
|
||||
adapter = _CodexCompletionsAdapter(real_client, model)
|
||||
self.chat = _CodexChatShim(adapter)
|
||||
self.api_key = real_client.api_key
|
||||
self.base_url = real_client.base_url
|
||||
|
||||
def close(self):
|
||||
self._real_client.close()
|
||||
|
||||
|
||||
class _AsyncCodexCompletionsAdapter:
|
||||
"""Async version of the Codex Responses adapter.
|
||||
|
||||
Wraps the sync adapter via asyncio.to_thread() so async consumers
|
||||
(web_tools, session_search) can await it as normal.
|
||||
"""
|
||||
|
||||
def __init__(self, sync_adapter: _CodexCompletionsAdapter):
|
||||
self._sync = sync_adapter
|
||||
|
||||
async def create(self, **kwargs) -> Any:
|
||||
import asyncio
|
||||
return await asyncio.to_thread(self._sync.create, **kwargs)
|
||||
|
||||
|
||||
class _AsyncCodexChatShim:
|
||||
def __init__(self, adapter: _AsyncCodexCompletionsAdapter):
|
||||
self.completions = adapter
|
||||
|
||||
|
||||
class AsyncCodexAuxiliaryClient:
|
||||
"""Async-compatible wrapper matching AsyncOpenAI.chat.completions.create()."""
|
||||
|
||||
def __init__(self, sync_wrapper: "CodexAuxiliaryClient"):
|
||||
sync_adapter = sync_wrapper.chat.completions
|
||||
async_adapter = _AsyncCodexCompletionsAdapter(sync_adapter)
|
||||
self.chat = _AsyncCodexChatShim(async_adapter)
|
||||
self.api_key = sync_wrapper.api_key
|
||||
self.base_url = sync_wrapper.base_url
|
||||
|
||||
|
||||
def _read_nous_auth() -> Optional[dict]:
|
||||
"""Read and validate ~/.hermes/auth.json for an active Nous provider.
|
||||
|
|
@ -82,12 +267,31 @@ def _nous_base_url() -> str:
|
|||
return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)
|
||||
|
||||
|
||||
def _read_codex_access_token() -> Optional[str]:
|
||||
"""Read a valid Codex OAuth access token from ~/.codex/auth.json."""
|
||||
try:
|
||||
codex_auth = Path.home() / ".codex" / "auth.json"
|
||||
if not codex_auth.is_file():
|
||||
return None
|
||||
data = json.loads(codex_auth.read_text())
|
||||
tokens = data.get("tokens")
|
||||
if not isinstance(tokens, dict):
|
||||
return None
|
||||
access_token = tokens.get("access_token")
|
||||
if isinstance(access_token, str) and access_token.strip():
|
||||
return access_token.strip()
|
||||
return None
|
||||
except Exception as exc:
|
||||
logger.debug("Could not read Codex auth for auxiliary client: %s", exc)
|
||||
return None
|
||||
|
||||
|
||||
# ── Public API ──────────────────────────────────────────────────────────────
|
||||
|
||||
def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
"""Return (client, model_slug) for text-only auxiliary tasks.
|
||||
|
||||
Falls through OpenRouter -> Nous Portal -> custom endpoint -> (None, None).
|
||||
Falls through OpenRouter -> Nous Portal -> custom endpoint -> Codex OAuth -> (None, None).
|
||||
"""
|
||||
# 1. OpenRouter
|
||||
or_key = os.getenv("OPENROUTER_API_KEY")
|
||||
|
|
@ -115,11 +319,44 @@ def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
|
|||
logger.debug("Auxiliary text client: custom endpoint (%s)", model)
|
||||
return OpenAI(api_key=custom_key, base_url=custom_base), model
|
||||
|
||||
# 4. Nothing available
|
||||
# 4. Codex OAuth -- uses the Responses API (only endpoint the token
|
||||
# can access), wrapped to look like a chat.completions client.
|
||||
codex_token = _read_codex_access_token()
|
||||
if codex_token:
|
||||
logger.debug("Auxiliary text client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL)
|
||||
real_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL)
|
||||
return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL
|
||||
|
||||
# 5. Nothing available
|
||||
logger.debug("Auxiliary text client: none available")
|
||||
return None, None
|
||||
|
||||
|
||||
def get_async_text_auxiliary_client():
|
||||
"""Return (async_client, model_slug) for async consumers.
|
||||
|
||||
For standard providers returns (AsyncOpenAI, model). For Codex returns
|
||||
(AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
|
||||
Returns (None, None) when no provider is available.
|
||||
"""
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
sync_client, model = get_text_auxiliary_client()
|
||||
if sync_client is None:
|
||||
return None, None
|
||||
|
||||
if isinstance(sync_client, CodexAuxiliaryClient):
|
||||
return AsyncCodexAuxiliaryClient(sync_client), model
|
||||
|
||||
async_kwargs = {
|
||||
"api_key": sync_client.api_key,
|
||||
"base_url": str(sync_client.base_url),
|
||||
}
|
||||
if "openrouter" in str(sync_client.base_url).lower():
|
||||
async_kwargs["default_headers"] = dict(_OR_HEADERS)
|
||||
return AsyncOpenAI(**async_kwargs), model
|
||||
|
||||
|
||||
def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
"""Return (client, model_slug) for vision/multimodal auxiliary tasks.
|
||||
|
||||
|
|
@ -161,11 +398,12 @@ def auxiliary_max_tokens_param(value: int) -> dict:
|
|||
|
||||
OpenRouter and local models use 'max_tokens'. Direct OpenAI with newer
|
||||
models (gpt-4o, o-series, gpt-5+) requires 'max_completion_tokens'.
|
||||
The Codex adapter translates max_tokens internally, so we use max_tokens
|
||||
for it as well.
|
||||
"""
|
||||
custom_base = os.getenv("OPENAI_BASE_URL", "")
|
||||
or_key = os.getenv("OPENROUTER_API_KEY")
|
||||
# Only use max_completion_tokens when the auxiliary client resolved to
|
||||
# direct OpenAI (no OpenRouter key, no Nous auth, custom endpoint is api.openai.com)
|
||||
# Only use max_completion_tokens for direct OpenAI custom endpoints
|
||||
if (not or_key
|
||||
and _read_nous_auth() is None
|
||||
and "api.openai.com" in custom_base.lower()):
|
||||
|
|
|
|||
|
|
@ -31,8 +31,9 @@ class ContextCompressor:
|
|||
threshold_percent: float = 0.85,
|
||||
protect_first_n: int = 3,
|
||||
protect_last_n: int = 4,
|
||||
summary_target_tokens: int = 500,
|
||||
summary_target_tokens: int = 2500,
|
||||
quiet_mode: bool = False,
|
||||
summary_model_override: str = None,
|
||||
):
|
||||
self.model = model
|
||||
self.threshold_percent = threshold_percent
|
||||
|
|
@ -49,7 +50,8 @@ class ContextCompressor:
|
|||
self.last_completion_tokens = 0
|
||||
self.last_total_tokens = 0
|
||||
|
||||
self.client, self.summary_model = get_text_auxiliary_client()
|
||||
self.client, default_model = get_text_auxiliary_client()
|
||||
self.summary_model = summary_model_override or default_model
|
||||
|
||||
def update_from_response(self, usage: Dict[str, Any]):
|
||||
"""Update tracked token usage from API response."""
|
||||
|
|
|
|||
|
|
@ -199,6 +199,24 @@ class KawaiiSpinner:
|
|||
def update_text(self, new_message: str):
|
||||
self.message = new_message
|
||||
|
||||
def print_above(self, text: str):
|
||||
"""Print a line above the spinner without disrupting animation.
|
||||
|
||||
Clears the current spinner line, prints the text, and lets the
|
||||
next animation tick redraw the spinner on the line below.
|
||||
Thread-safe: uses the captured stdout reference (self._out).
|
||||
Works inside redirect_stdout(devnull) because _write bypasses
|
||||
sys.stdout and writes to the stdout captured at spinner creation.
|
||||
"""
|
||||
if not self.running:
|
||||
self._write(f" {text}", flush=True)
|
||||
return
|
||||
# Clear spinner line with spaces (not \033[K) to avoid garbled escape
|
||||
# codes when prompt_toolkit's patch_stdout is active — same approach
|
||||
# as stop(). Then print text; spinner redraws on next tick.
|
||||
blanks = ' ' * max(self.last_line_len + 5, 40)
|
||||
self._write(f"\r{blanks}\r {text}", flush=True)
|
||||
|
||||
def stop(self, final_message: str = None):
|
||||
self.running = False
|
||||
if self.thread:
|
||||
|
|
@ -283,6 +301,15 @@ def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]
|
|||
pass
|
||||
return False, ""
|
||||
|
||||
# Memory-specific: distinguish "full" from real errors
|
||||
if tool_name == "memory":
|
||||
try:
|
||||
data = json.loads(result)
|
||||
if data.get("success") is False and "exceed the limit" in data.get("error", ""):
|
||||
return True, " [full]"
|
||||
except (json.JSONDecodeError, TypeError, AttributeError):
|
||||
pass
|
||||
|
||||
# Generic heuristic for non-terminal tools
|
||||
lower = result[:500].lower()
|
||||
if '"error"' in lower or '"failed"' in lower or result.startswith("Error"):
|
||||
|
|
|
|||
115
agent/redact.py
Normal file
115
agent/redact.py
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
"""Regex-based secret redaction for logs and tool output.
|
||||
|
||||
Applies pattern matching to mask API keys, tokens, and credentials
|
||||
before they reach log files, verbose output, or gateway logs.
|
||||
|
||||
Short tokens (< 18 chars) are fully masked. Longer tokens preserve
|
||||
the first 6 and last 4 characters for debuggability.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Known API key prefixes -- match the prefix + contiguous token chars
|
||||
_PREFIX_PATTERNS = [
|
||||
r"sk-[A-Za-z0-9_-]{10,}", # OpenAI / OpenRouter
|
||||
r"ghp_[A-Za-z0-9]{10,}", # GitHub PAT (classic)
|
||||
r"github_pat_[A-Za-z0-9_]{10,}", # GitHub PAT (fine-grained)
|
||||
r"xox[baprs]-[A-Za-z0-9-]{10,}", # Slack tokens
|
||||
r"AIza[A-Za-z0-9_-]{30,}", # Google API keys
|
||||
r"pplx-[A-Za-z0-9]{10,}", # Perplexity
|
||||
r"fal_[A-Za-z0-9_-]{10,}", # Fal.ai
|
||||
r"fc-[A-Za-z0-9]{10,}", # Firecrawl
|
||||
r"bb_live_[A-Za-z0-9_-]{10,}", # BrowserBase
|
||||
r"gAAAA[A-Za-z0-9_=-]{20,}", # Codex encrypted tokens
|
||||
]
|
||||
|
||||
# ENV assignment patterns: KEY=value where KEY contains a secret-like name
|
||||
_SECRET_ENV_NAMES = r"(?:API_?KEY|TOKEN|SECRET|PASSWORD|PASSWD|CREDENTIAL|AUTH)"
|
||||
_ENV_ASSIGN_RE = re.compile(
|
||||
rf"([A-Z_]*{_SECRET_ENV_NAMES}[A-Z_]*)\s*=\s*(['\"]?)(\S+)\2",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# JSON field patterns: "apiKey": "value", "token": "value", etc.
|
||||
_JSON_KEY_NAMES = r"(?:api_?[Kk]ey|token|secret|password|access_token|refresh_token|auth_token|bearer)"
|
||||
_JSON_FIELD_RE = re.compile(
|
||||
rf'("{_JSON_KEY_NAMES}")\s*:\s*"([^"]+)"',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Authorization headers
|
||||
_AUTH_HEADER_RE = re.compile(
|
||||
r"(Authorization:\s*Bearer\s+)(\S+)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Telegram bot tokens: bot<digits>:<token> or <digits>:<alphanum>
|
||||
_TELEGRAM_RE = re.compile(
|
||||
r"(bot)?(\d{8,}):([-A-Za-z0-9_]{30,})",
|
||||
)
|
||||
|
||||
# Compile known prefix patterns into one alternation
|
||||
_PREFIX_RE = re.compile(
|
||||
r"(?<![A-Za-z0-9_-])(" + "|".join(_PREFIX_PATTERNS) + r")(?![A-Za-z0-9_-])"
|
||||
)
|
||||
|
||||
|
||||
def _mask_token(token: str) -> str:
|
||||
"""Mask a token, preserving prefix for long tokens."""
|
||||
if len(token) < 18:
|
||||
return "***"
|
||||
return f"{token[:6]}...{token[-4:]}"
|
||||
|
||||
|
||||
def redact_sensitive_text(text: str) -> str:
|
||||
"""Apply all redaction patterns to a block of text.
|
||||
|
||||
Safe to call on any string -- non-matching text passes through unchanged.
|
||||
"""
|
||||
if not text:
|
||||
return text
|
||||
|
||||
# Known prefixes (sk-, ghp_, etc.)
|
||||
text = _PREFIX_RE.sub(lambda m: _mask_token(m.group(1)), text)
|
||||
|
||||
# ENV assignments: OPENAI_API_KEY=sk-abc...
|
||||
def _redact_env(m):
|
||||
name, quote, value = m.group(1), m.group(2), m.group(3)
|
||||
return f"{name}={quote}{_mask_token(value)}{quote}"
|
||||
text = _ENV_ASSIGN_RE.sub(_redact_env, text)
|
||||
|
||||
# JSON fields: "apiKey": "value"
|
||||
def _redact_json(m):
|
||||
key, value = m.group(1), m.group(2)
|
||||
return f'{key}: "{_mask_token(value)}"'
|
||||
text = _JSON_FIELD_RE.sub(_redact_json, text)
|
||||
|
||||
# Authorization headers
|
||||
text = _AUTH_HEADER_RE.sub(
|
||||
lambda m: m.group(1) + _mask_token(m.group(2)),
|
||||
text,
|
||||
)
|
||||
|
||||
# Telegram bot tokens
|
||||
def _redact_telegram(m):
|
||||
prefix = m.group(1) or ""
|
||||
digits = m.group(2)
|
||||
return f"{prefix}{digits}:***"
|
||||
text = _TELEGRAM_RE.sub(_redact_telegram, text)
|
||||
|
||||
return text
|
||||
|
||||
|
||||
class RedactingFormatter(logging.Formatter):
|
||||
"""Log formatter that redacts secrets from all log messages."""
|
||||
|
||||
def __init__(self, fmt=None, datefmt=None, style='%', **kwargs):
|
||||
super().__init__(fmt, datefmt, style, **kwargs)
|
||||
|
||||
def format(self, record: logging.LogRecord) -> str:
|
||||
original = super().format(record)
|
||||
return redact_sensitive_text(original)
|
||||
114
agent/skill_commands.py
Normal file
114
agent/skill_commands.py
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
"""Skill slash commands — scan installed skills and build invocation messages.
|
||||
|
||||
Shared between CLI (cli.py) and gateway (gateway/run.py) so both surfaces
|
||||
can invoke skills via /skill-name commands.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_skill_commands: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
|
||||
def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
|
||||
"""Scan ~/.hermes/skills/ and return a mapping of /command -> skill info.
|
||||
|
||||
Returns:
|
||||
Dict mapping "/skill-name" to {name, description, skill_md_path, skill_dir}.
|
||||
"""
|
||||
global _skill_commands
|
||||
_skill_commands = {}
|
||||
try:
|
||||
from tools.skills_tool import SKILLS_DIR, _parse_frontmatter
|
||||
if not SKILLS_DIR.exists():
|
||||
return _skill_commands
|
||||
for skill_md in SKILLS_DIR.rglob("SKILL.md"):
|
||||
path_str = str(skill_md)
|
||||
if '/.git/' in path_str or '/.github/' in path_str or '/.hub/' in path_str:
|
||||
continue
|
||||
try:
|
||||
content = skill_md.read_text(encoding='utf-8')
|
||||
frontmatter, body = _parse_frontmatter(content)
|
||||
name = frontmatter.get('name', skill_md.parent.name)
|
||||
description = frontmatter.get('description', '')
|
||||
if not description:
|
||||
for line in body.strip().split('\n'):
|
||||
line = line.strip()
|
||||
if line and not line.startswith('#'):
|
||||
description = line[:80]
|
||||
break
|
||||
cmd_name = name.lower().replace(' ', '-').replace('_', '-')
|
||||
_skill_commands[f"/{cmd_name}"] = {
|
||||
"name": name,
|
||||
"description": description or f"Invoke the {name} skill",
|
||||
"skill_md_path": str(skill_md),
|
||||
"skill_dir": str(skill_md.parent),
|
||||
}
|
||||
except Exception:
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
return _skill_commands
|
||||
|
||||
|
||||
def get_skill_commands() -> Dict[str, Dict[str, Any]]:
|
||||
"""Return the current skill commands mapping (scan first if empty)."""
|
||||
if not _skill_commands:
|
||||
scan_skill_commands()
|
||||
return _skill_commands
|
||||
|
||||
|
||||
def build_skill_invocation_message(cmd_key: str, user_instruction: str = "") -> Optional[str]:
|
||||
"""Build the user message content for a skill slash command invocation.
|
||||
|
||||
Args:
|
||||
cmd_key: The command key including leading slash (e.g., "/gif-search").
|
||||
user_instruction: Optional text the user typed after the command.
|
||||
|
||||
Returns:
|
||||
The formatted message string, or None if the skill wasn't found.
|
||||
"""
|
||||
commands = get_skill_commands()
|
||||
skill_info = commands.get(cmd_key)
|
||||
if not skill_info:
|
||||
return None
|
||||
|
||||
skill_md_path = Path(skill_info["skill_md_path"])
|
||||
skill_dir = Path(skill_info["skill_dir"])
|
||||
skill_name = skill_info["name"]
|
||||
|
||||
try:
|
||||
content = skill_md_path.read_text(encoding='utf-8')
|
||||
except Exception:
|
||||
return f"[Failed to load skill: {skill_name}]"
|
||||
|
||||
parts = [
|
||||
f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
|
||||
"",
|
||||
content.strip(),
|
||||
]
|
||||
|
||||
supporting = []
|
||||
for subdir in ("references", "templates", "scripts", "assets"):
|
||||
subdir_path = skill_dir / subdir
|
||||
if subdir_path.exists():
|
||||
for f in sorted(subdir_path.rglob("*")):
|
||||
if f.is_file():
|
||||
rel = str(f.relative_to(skill_dir))
|
||||
supporting.append(rel)
|
||||
|
||||
if supporting:
|
||||
parts.append("")
|
||||
parts.append("[This skill has supporting files you can load with the skill_view tool:]")
|
||||
for sf in supporting:
|
||||
parts.append(f"- {sf}")
|
||||
parts.append(f'\nTo view any of these, use: skill_view(name="{skill_name}", file="<path>")')
|
||||
|
||||
if user_instruction:
|
||||
parts.append("")
|
||||
parts.append(f"The user has provided the following instruction alongside the skill invocation: {user_instruction}")
|
||||
|
||||
return "\n".join(parts)
|
||||
232
cli.py
232
cli.py
|
|
@ -682,17 +682,27 @@ COMMANDS = {
|
|||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Skill Slash Commands — dynamic commands generated from installed skills
|
||||
# ============================================================================
|
||||
|
||||
from agent.skill_commands import scan_skill_commands, get_skill_commands, build_skill_invocation_message
|
||||
|
||||
_skill_commands = scan_skill_commands()
|
||||
|
||||
|
||||
class SlashCommandCompleter(Completer):
|
||||
"""Autocomplete for /commands in the input area."""
|
||||
"""Autocomplete for /commands and /skill-name in the input area."""
|
||||
|
||||
def get_completions(self, document, complete_event):
|
||||
text = document.text_before_cursor
|
||||
# Only complete at the start of input, after /
|
||||
if not text.startswith("/"):
|
||||
return
|
||||
word = text[1:] # strip the leading /
|
||||
|
||||
# Built-in commands
|
||||
for cmd, desc in COMMANDS.items():
|
||||
cmd_name = cmd[1:] # strip leading / from key
|
||||
cmd_name = cmd[1:]
|
||||
if cmd_name.startswith(word):
|
||||
yield Completion(
|
||||
cmd_name,
|
||||
|
|
@ -701,6 +711,17 @@ class SlashCommandCompleter(Completer):
|
|||
display_meta=desc,
|
||||
)
|
||||
|
||||
# Skill commands
|
||||
for cmd, info in _skill_commands.items():
|
||||
cmd_name = cmd[1:]
|
||||
if cmd_name.startswith(word):
|
||||
yield Completion(
|
||||
cmd_name,
|
||||
start_position=-len(word),
|
||||
display=cmd,
|
||||
display_meta=f"⚡ {info['description'][:50]}",
|
||||
)
|
||||
|
||||
|
||||
def save_config_value(key_path: str, value: any) -> bool:
|
||||
"""
|
||||
|
|
@ -782,7 +803,7 @@ class HermesCLI:
|
|||
Args:
|
||||
model: Model to use (default: from env or claude-sonnet)
|
||||
toolsets: List of toolsets to enable (default: all)
|
||||
provider: Inference provider ("auto", "openrouter", "nous")
|
||||
provider: Inference provider ("auto", "openrouter", "nous", "openai-codex")
|
||||
api_key: API key (default: from environment)
|
||||
base_url: API base URL (default: OpenRouter)
|
||||
max_turns: Maximum tool-calling iterations (default: 60)
|
||||
|
|
@ -800,37 +821,37 @@ class HermesCLI:
|
|||
# Configuration - priority: CLI args > env vars > config file
|
||||
# Model can come from: CLI arg, LLM_MODEL env, OPENAI_MODEL env (custom endpoint), or config
|
||||
self.model = model or os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL") or CLI_CONFIG["model"]["default"]
|
||||
|
||||
# Base URL: custom endpoint (OPENAI_BASE_URL) takes precedence over OpenRouter
|
||||
self.base_url = base_url or os.getenv("OPENAI_BASE_URL") or os.getenv("OPENROUTER_BASE_URL", CLI_CONFIG["model"]["base_url"])
|
||||
|
||||
# API key: custom endpoint (OPENAI_API_KEY) takes precedence over OpenRouter
|
||||
self.api_key = api_key or os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY")
|
||||
|
||||
# Provider resolution: determines whether to use OAuth credentials or env var keys
|
||||
from hermes_cli.auth import resolve_provider
|
||||
self._explicit_api_key = api_key
|
||||
self._explicit_base_url = base_url
|
||||
|
||||
# Provider selection is resolved lazily at use-time via _ensure_runtime_credentials().
|
||||
self.requested_provider = (
|
||||
provider
|
||||
or os.getenv("HERMES_INFERENCE_PROVIDER")
|
||||
or CLI_CONFIG["model"].get("provider")
|
||||
or "auto"
|
||||
)
|
||||
self.provider = resolve_provider(
|
||||
self.requested_provider,
|
||||
explicit_api_key=api_key,
|
||||
explicit_base_url=base_url,
|
||||
self._provider_source: Optional[str] = None
|
||||
self.provider = self.requested_provider
|
||||
self.api_mode = "chat_completions"
|
||||
self.base_url = (
|
||||
base_url
|
||||
or os.getenv("OPENAI_BASE_URL")
|
||||
or os.getenv("OPENROUTER_BASE_URL", CLI_CONFIG["model"]["base_url"])
|
||||
)
|
||||
self.api_key = api_key or os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY")
|
||||
self._nous_key_expires_at: Optional[str] = None
|
||||
self._nous_key_source: Optional[str] = None
|
||||
# Max turns priority: CLI arg > env var > config file (agent.max_turns or root max_turns) > default
|
||||
if max_turns is not None:
|
||||
if max_turns is not None: # CLI arg was explicitly set
|
||||
self.max_turns = max_turns
|
||||
elif os.getenv("HERMES_MAX_ITERATIONS"):
|
||||
self.max_turns = int(os.getenv("HERMES_MAX_ITERATIONS"))
|
||||
elif CLI_CONFIG["agent"].get("max_turns"):
|
||||
self.max_turns = CLI_CONFIG["agent"]["max_turns"]
|
||||
elif CLI_CONFIG.get("max_turns"): # Backwards compat: root-level max_turns
|
||||
self.max_turns = CLI_CONFIG["max_turns"]
|
||||
elif os.getenv("HERMES_MAX_ITERATIONS"):
|
||||
self.max_turns = int(os.getenv("HERMES_MAX_ITERATIONS"))
|
||||
else:
|
||||
self.max_turns = 60
|
||||
|
||||
|
|
@ -882,45 +903,51 @@ class HermesCLI:
|
|||
|
||||
def _ensure_runtime_credentials(self) -> bool:
|
||||
"""
|
||||
Ensure OAuth provider credentials are fresh before agent use.
|
||||
For Nous Portal: checks agent key TTL, refreshes/re-mints as needed.
|
||||
If the key changed, tears down the agent so it rebuilds with new creds.
|
||||
Ensure runtime credentials are resolved before agent use.
|
||||
Re-resolves provider credentials so key rotation and token refresh
|
||||
are picked up without restarting the CLI.
|
||||
Returns True if credentials are ready, False on auth failure.
|
||||
"""
|
||||
if self.provider != "nous":
|
||||
return True
|
||||
|
||||
from hermes_cli.auth import format_auth_error, resolve_nous_runtime_credentials
|
||||
from hermes_cli.runtime_provider import (
|
||||
resolve_runtime_provider,
|
||||
format_runtime_provider_error,
|
||||
)
|
||||
|
||||
try:
|
||||
credentials = resolve_nous_runtime_credentials(
|
||||
min_key_ttl_seconds=max(
|
||||
60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))
|
||||
),
|
||||
timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=self.requested_provider,
|
||||
explicit_api_key=self._explicit_api_key,
|
||||
explicit_base_url=self._explicit_base_url,
|
||||
)
|
||||
except Exception as exc:
|
||||
message = format_auth_error(exc)
|
||||
message = format_runtime_provider_error(exc)
|
||||
self.console.print(f"[bold red]{message}[/]")
|
||||
return False
|
||||
|
||||
api_key = credentials.get("api_key")
|
||||
base_url = credentials.get("base_url")
|
||||
api_key = runtime.get("api_key")
|
||||
base_url = runtime.get("base_url")
|
||||
resolved_provider = runtime.get("provider", "openrouter")
|
||||
resolved_api_mode = runtime.get("api_mode", self.api_mode)
|
||||
if not isinstance(api_key, str) or not api_key:
|
||||
self.console.print("[bold red]Nous credential resolver returned an empty API key.[/]")
|
||||
self.console.print("[bold red]Provider resolver returned an empty API key.[/]")
|
||||
return False
|
||||
if not isinstance(base_url, str) or not base_url:
|
||||
self.console.print("[bold red]Nous credential resolver returned an empty base URL.[/]")
|
||||
self.console.print("[bold red]Provider resolver returned an empty base URL.[/]")
|
||||
return False
|
||||
|
||||
credentials_changed = api_key != self.api_key or base_url != self.base_url
|
||||
routing_changed = (
|
||||
resolved_provider != self.provider
|
||||
or resolved_api_mode != self.api_mode
|
||||
)
|
||||
self.provider = resolved_provider
|
||||
self.api_mode = resolved_api_mode
|
||||
self._provider_source = runtime.get("source")
|
||||
self.api_key = api_key
|
||||
self.base_url = base_url
|
||||
self._nous_key_expires_at = credentials.get("expires_at")
|
||||
self._nous_key_source = credentials.get("source")
|
||||
|
||||
# AIAgent/OpenAI client holds auth at init time, so rebuild if key rotated
|
||||
if credentials_changed and self.agent is not None:
|
||||
if (credentials_changed or routing_changed) and self.agent is not None:
|
||||
self.agent = None
|
||||
|
||||
return True
|
||||
|
|
@ -936,7 +963,7 @@ class HermesCLI:
|
|||
if self.agent is not None:
|
||||
return True
|
||||
|
||||
if self.provider == "nous" and not self._ensure_runtime_credentials():
|
||||
if not self._ensure_runtime_credentials():
|
||||
return False
|
||||
|
||||
# Initialize SQLite session store for CLI sessions
|
||||
|
|
@ -980,6 +1007,8 @@ class HermesCLI:
|
|||
model=self.model,
|
||||
api_key=self.api_key,
|
||||
base_url=self.base_url,
|
||||
provider=self.provider,
|
||||
api_mode=self.api_mode,
|
||||
max_iterations=self.max_turns,
|
||||
enabled_toolsets=self.enabled_toolsets,
|
||||
verbose_logging=self.verbose,
|
||||
|
|
@ -1072,8 +1101,8 @@ class HermesCLI:
|
|||
toolsets_info = f" [dim #B8860B]·[/] [#CD7F32]toolsets: {', '.join(self.enabled_toolsets)}[/]"
|
||||
|
||||
provider_info = f" [dim #B8860B]·[/] [dim]provider: {self.provider}[/]"
|
||||
if self.provider == "nous" and self._nous_key_source:
|
||||
provider_info += f" [dim #B8860B]·[/] [dim]key: {self._nous_key_source}[/]"
|
||||
if self._provider_source:
|
||||
provider_info += f" [dim #B8860B]·[/] [dim]auth: {self._provider_source}[/]"
|
||||
|
||||
self.console.print(
|
||||
f" {api_indicator} [#FFBF00]{model_short}[/] "
|
||||
|
|
@ -1082,20 +1111,21 @@ class HermesCLI:
|
|||
)
|
||||
|
||||
def show_help(self):
|
||||
"""Display help information with kawaii ASCII art."""
|
||||
print()
|
||||
print("+" + "-" * 50 + "+")
|
||||
print("|" + " " * 14 + "(^_^)? Available Commands" + " " * 10 + "|")
|
||||
print("+" + "-" * 50 + "+")
|
||||
print()
|
||||
"""Display help information."""
|
||||
_cprint(f"\n{_BOLD}+{'-' * 50}+{_RST}")
|
||||
_cprint(f"{_BOLD}|{' ' * 14}(^_^)? Available Commands{' ' * 10}|{_RST}")
|
||||
_cprint(f"{_BOLD}+{'-' * 50}+{_RST}\n")
|
||||
|
||||
for cmd, desc in COMMANDS.items():
|
||||
print(f" {cmd:<15} - {desc}")
|
||||
_cprint(f" {_GOLD}{cmd:<15}{_RST} {_DIM}-{_RST} {desc}")
|
||||
|
||||
print()
|
||||
print(" Tip: Just type your message to chat with Hermes!")
|
||||
print(" Multi-line: Alt+Enter for a new line")
|
||||
print()
|
||||
if _skill_commands:
|
||||
_cprint(f"\n ⚡ {_BOLD}Skill Commands{_RST} ({len(_skill_commands)} installed):")
|
||||
for cmd, info in sorted(_skill_commands.items()):
|
||||
_cprint(f" {_GOLD}{cmd:<22}{_RST} {_DIM}-{_RST} {info['description']}")
|
||||
|
||||
_cprint(f"\n {_DIM}Tip: Just type your message to chat with Hermes!{_RST}")
|
||||
_cprint(f" {_DIM}Multi-line: Alt+Enter for a new line{_RST}\n")
|
||||
|
||||
def show_tools(self):
|
||||
"""Display available tools with kawaii ASCII art."""
|
||||
|
|
@ -1692,9 +1722,26 @@ class HermesCLI:
|
|||
self._show_gateway_status()
|
||||
elif cmd_lower == "/verbose":
|
||||
self._toggle_verbose()
|
||||
elif cmd_lower == "/compress":
|
||||
self._manual_compress()
|
||||
elif cmd_lower == "/usage":
|
||||
self._show_usage()
|
||||
else:
|
||||
self.console.print(f"[bold red]Unknown command: {cmd_lower}[/]")
|
||||
self.console.print("[dim #B8860B]Type /help for available commands[/]")
|
||||
# Check for skill slash commands (/gif-search, /axolotl, etc.)
|
||||
base_cmd = cmd_lower.split()[0]
|
||||
if base_cmd in _skill_commands:
|
||||
user_instruction = cmd_original[len(base_cmd):].strip()
|
||||
msg = build_skill_invocation_message(base_cmd, user_instruction)
|
||||
if msg:
|
||||
skill_name = _skill_commands[base_cmd]["name"]
|
||||
print(f"\n⚡ Loading skill: {skill_name}")
|
||||
if hasattr(self, '_pending_input'):
|
||||
self._pending_input.put(msg)
|
||||
else:
|
||||
self.console.print(f"[bold red]Failed to load skill for {base_cmd}[/]")
|
||||
else:
|
||||
self.console.print(f"[bold red]Unknown command: {cmd_lower}[/]")
|
||||
self.console.print("[dim #B8860B]Type /help for available commands[/]")
|
||||
|
||||
return True
|
||||
|
||||
|
|
@ -1720,6 +1767,77 @@ class HermesCLI:
|
|||
}
|
||||
self.console.print(labels.get(self.tool_progress_mode, ""))
|
||||
|
||||
def _manual_compress(self):
|
||||
"""Manually trigger context compression on the current conversation."""
|
||||
if not self.conversation_history or len(self.conversation_history) < 4:
|
||||
print("(._.) Not enough conversation to compress (need at least 4 messages).")
|
||||
return
|
||||
|
||||
if not self.agent:
|
||||
print("(._.) No active agent -- send a message first.")
|
||||
return
|
||||
|
||||
if not self.agent.compression_enabled:
|
||||
print("(._.) Compression is disabled in config.")
|
||||
return
|
||||
|
||||
original_count = len(self.conversation_history)
|
||||
try:
|
||||
from agent.model_metadata import estimate_messages_tokens_rough
|
||||
approx_tokens = estimate_messages_tokens_rough(self.conversation_history)
|
||||
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
|
||||
|
||||
compressed, new_system = self.agent._compress_context(
|
||||
self.conversation_history,
|
||||
self.agent._cached_system_prompt or "",
|
||||
approx_tokens=approx_tokens,
|
||||
)
|
||||
self.conversation_history = compressed
|
||||
new_count = len(self.conversation_history)
|
||||
new_tokens = estimate_messages_tokens_rough(self.conversation_history)
|
||||
print(
|
||||
f" ✅ Compressed: {original_count} → {new_count} messages "
|
||||
f"(~{approx_tokens:,} → ~{new_tokens:,} tokens)"
|
||||
)
|
||||
except Exception as e:
|
||||
print(f" ❌ Compression failed: {e}")
|
||||
|
||||
def _show_usage(self):
|
||||
"""Show cumulative token usage for the current session."""
|
||||
if not self.agent:
|
||||
print("(._.) No active agent -- send a message first.")
|
||||
return
|
||||
|
||||
agent = self.agent
|
||||
prompt = agent.session_prompt_tokens
|
||||
completion = agent.session_completion_tokens
|
||||
total = agent.session_total_tokens
|
||||
calls = agent.session_api_calls
|
||||
|
||||
if calls == 0:
|
||||
print("(._.) No API calls made yet in this session.")
|
||||
return
|
||||
|
||||
# Current context window state
|
||||
compressor = agent.context_compressor
|
||||
last_prompt = compressor.last_prompt_tokens
|
||||
ctx_len = compressor.context_length
|
||||
pct = (last_prompt / ctx_len * 100) if ctx_len else 0
|
||||
compressions = compressor.compression_count
|
||||
|
||||
msg_count = len(self.conversation_history)
|
||||
|
||||
print(f" 📊 Session Token Usage")
|
||||
print(f" {'─' * 40}")
|
||||
print(f" Prompt tokens (input): {prompt:>10,}")
|
||||
print(f" Completion tokens (output): {completion:>9,}")
|
||||
print(f" Total tokens: {total:>10,}")
|
||||
print(f" API calls: {calls:>10,}")
|
||||
print(f" {'─' * 40}")
|
||||
print(f" Current context: {last_prompt:,} / {ctx_len:,} ({pct:.0f}%)")
|
||||
print(f" Messages: {msg_count}")
|
||||
print(f" Compressions: {compressions}")
|
||||
|
||||
if self.verbose:
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
for noisy in ('openai', 'openai._base_client', 'httpx', 'httpcore', 'asyncio', 'hpack', 'grpc', 'modal'):
|
||||
|
|
@ -1894,8 +2012,8 @@ class HermesCLI:
|
|||
Returns:
|
||||
The agent's response, or None on error
|
||||
"""
|
||||
# Refresh OAuth credentials if needed (handles key rotation transparently)
|
||||
if self.provider == "nous" and not self._ensure_runtime_credentials():
|
||||
# Refresh provider credentials if needed (handles key rotation transparently)
|
||||
if not self._ensure_runtime_credentials():
|
||||
return None
|
||||
|
||||
# Initialize agent if needed
|
||||
|
|
|
|||
|
|
@ -172,10 +172,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
|||
except UnicodeDecodeError:
|
||||
load_dotenv(str(_hermes_home / ".env"), override=True, encoding="latin-1")
|
||||
|
||||
model = os.getenv("HERMES_MODEL", "anthropic/claude-opus-4.6")
|
||||
# Custom endpoint (OPENAI_*) takes precedence, matching CLI behavior
|
||||
api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY", "")
|
||||
base_url = os.getenv("OPENAI_BASE_URL") or os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
|
||||
model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
|
||||
|
||||
try:
|
||||
import yaml
|
||||
|
|
@ -188,24 +185,27 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
|||
model = _model_cfg
|
||||
elif isinstance(_model_cfg, dict):
|
||||
model = _model_cfg.get("default", model)
|
||||
base_url = _model_cfg.get("base_url", base_url)
|
||||
# Check if provider is nous — resolve OAuth credentials
|
||||
provider = _model_cfg.get("provider", "") if isinstance(_model_cfg, dict) else ""
|
||||
if provider == "nous":
|
||||
try:
|
||||
from hermes_cli.auth import resolve_nous_runtime_credentials
|
||||
creds = resolve_nous_runtime_credentials(min_key_ttl_seconds=5 * 60)
|
||||
api_key = creds.get("api_key", api_key)
|
||||
base_url = creds.get("base_url", base_url)
|
||||
except Exception as nous_err:
|
||||
logging.warning("Nous Portal credential resolution failed for cron: %s", nous_err)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
from hermes_cli.runtime_provider import (
|
||||
resolve_runtime_provider,
|
||||
format_runtime_provider_error,
|
||||
)
|
||||
try:
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=os.getenv("HERMES_INFERENCE_PROVIDER"),
|
||||
)
|
||||
except Exception as exc:
|
||||
message = format_runtime_provider_error(exc)
|
||||
raise RuntimeError(message) from exc
|
||||
|
||||
agent = AIAgent(
|
||||
model=model,
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
api_key=runtime.get("api_key"),
|
||||
base_url=runtime.get("base_url"),
|
||||
provider=runtime.get("provider"),
|
||||
api_mode=runtime.get("api_mode"),
|
||||
quiet_mode=True,
|
||||
session_id=f"cron_{job_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ hermes
|
|||
hermes --model "anthropic/claude-sonnet-4"
|
||||
|
||||
# With specific provider
|
||||
hermes --provider nous # Use Nous Portal (requires: hermes login)
|
||||
hermes --provider nous # Use Nous Portal (requires: hermes model)
|
||||
hermes --provider openrouter # Force OpenRouter
|
||||
|
||||
# With specific toolsets
|
||||
|
|
@ -73,6 +73,9 @@ The CLI is implemented in `cli.py` and uses:
|
|||
| `/history` | Show conversation history |
|
||||
| `/save` | Save current conversation to file |
|
||||
| `/config` | Show current configuration |
|
||||
| `/verbose` | Cycle tool progress display: off → new → all → verbose |
|
||||
| `/compress` | Manually compress conversation context (flush memories + summarize) |
|
||||
| `/usage` | Show token usage for the current session |
|
||||
| `/quit` | Exit the CLI (also: `/exit`, `/q`) |
|
||||
|
||||
## Configuration
|
||||
|
|
@ -93,7 +96,7 @@ model:
|
|||
```
|
||||
|
||||
**Provider selection** (`provider` field):
|
||||
- `auto` (default): Uses Nous Portal if logged in (`hermes login`), otherwise falls back to OpenRouter/env vars.
|
||||
- `auto` (default): Uses Nous Portal if logged in (`hermes model`), otherwise falls back to OpenRouter/env vars.
|
||||
- `openrouter`: Always uses `OPENROUTER_API_KEY` from `.env`.
|
||||
- `nous`: Always uses Nous Portal OAuth credentials from `auth.json`.
|
||||
|
||||
|
|
|
|||
174
docs/hooks.md
Normal file
174
docs/hooks.md
Normal file
|
|
@ -0,0 +1,174 @@
|
|||
# Event Hooks
|
||||
|
||||
The hooks system lets you run custom code at key points in the agent lifecycle — session creation, slash commands, each tool-calling step, and more. Hooks are discovered automatically from `~/.hermes/hooks/` and fire without blocking the main agent pipeline.
|
||||
|
||||
## Creating a Hook
|
||||
|
||||
Each hook is a directory under `~/.hermes/hooks/` containing two files:
|
||||
|
||||
```
|
||||
~/.hermes/hooks/
|
||||
└── my-hook/
|
||||
├── HOOK.yaml # Declares which events to listen for
|
||||
└── handler.py # Python handler function
|
||||
```
|
||||
|
||||
### HOOK.yaml
|
||||
|
||||
```yaml
|
||||
name: my-hook
|
||||
description: Log all agent activity to a file
|
||||
events:
|
||||
- agent:start
|
||||
- agent:end
|
||||
- agent:step
|
||||
```
|
||||
|
||||
The `events` list determines which events trigger your handler. You can subscribe to any combination of events, including wildcards like `command:*`.
|
||||
|
||||
### handler.py
|
||||
|
||||
```python
|
||||
import json
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
LOG_FILE = Path.home() / ".hermes" / "hooks" / "my-hook" / "activity.log"
|
||||
|
||||
async def handle(event_type: str, context: dict):
|
||||
"""Called for each subscribed event. Must be named 'handle'."""
|
||||
entry = {
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"event": event_type,
|
||||
**context,
|
||||
}
|
||||
with open(LOG_FILE, "a") as f:
|
||||
f.write(json.dumps(entry) + "\n")
|
||||
```
|
||||
|
||||
The handler function:
|
||||
- Must be named `handle`
|
||||
- Receives `event_type` (string) and `context` (dict)
|
||||
- Can be `async def` or regular `def` — both work
|
||||
- Errors are caught and logged, never crashing the agent
|
||||
|
||||
## Available Events
|
||||
|
||||
| Event | When it fires | Context keys |
|
||||
|-------|---------------|--------------|
|
||||
| `gateway:startup` | Gateway process starts | `platforms` (list of active platform names) |
|
||||
| `session:start` | New messaging session created | `platform`, `user_id`, `session_id`, `session_key` |
|
||||
| `session:reset` | User ran `/new` or `/reset` | `platform`, `user_id`, `session_key` |
|
||||
| `agent:start` | Agent begins processing a message | `platform`, `user_id`, `session_id`, `message` |
|
||||
| `agent:step` | Each iteration of the tool-calling loop | `platform`, `user_id`, `session_id`, `iteration`, `tool_names` |
|
||||
| `agent:end` | Agent finishes processing | `platform`, `user_id`, `session_id`, `message`, `response` |
|
||||
| `command:*` | Any slash command executed | `platform`, `user_id`, `command`, `args` |
|
||||
|
||||
### Wildcard Matching
|
||||
|
||||
Handlers registered for `command:*` fire for any `command:` event (`command:model`, `command:reset`, etc.). This lets you monitor all slash commands with a single subscription.
|
||||
|
||||
## Examples
|
||||
|
||||
### Telegram Notification on Long Tasks
|
||||
|
||||
Send yourself a Telegram message when the agent takes more than 10 tool-calling steps:
|
||||
|
||||
```yaml
|
||||
# ~/.hermes/hooks/long-task-alert/HOOK.yaml
|
||||
name: long-task-alert
|
||||
description: Alert when agent is taking many steps
|
||||
events:
|
||||
- agent:step
|
||||
```
|
||||
|
||||
```python
|
||||
# ~/.hermes/hooks/long-task-alert/handler.py
|
||||
import os
|
||||
import httpx
|
||||
|
||||
THRESHOLD = 10
|
||||
BOT_TOKEN = os.getenv("TELEGRAM_BOT_TOKEN")
|
||||
CHAT_ID = os.getenv("TELEGRAM_HOME_CHANNEL")
|
||||
|
||||
async def handle(event_type: str, context: dict):
|
||||
iteration = context.get("iteration", 0)
|
||||
if iteration == THRESHOLD and BOT_TOKEN and CHAT_ID:
|
||||
tools = ", ".join(context.get("tool_names", []))
|
||||
text = f"⚠️ Agent has been running for {iteration} steps. Last tools: {tools}"
|
||||
async with httpx.AsyncClient() as client:
|
||||
await client.post(
|
||||
f"https://api.telegram.org/bot{BOT_TOKEN}/sendMessage",
|
||||
json={"chat_id": CHAT_ID, "text": text},
|
||||
)
|
||||
```
|
||||
|
||||
### Command Usage Logger
|
||||
|
||||
Track which slash commands are used and how often:
|
||||
|
||||
```yaml
|
||||
# ~/.hermes/hooks/command-logger/HOOK.yaml
|
||||
name: command-logger
|
||||
description: Log slash command usage
|
||||
events:
|
||||
- command:*
|
||||
```
|
||||
|
||||
```python
|
||||
# ~/.hermes/hooks/command-logger/handler.py
|
||||
import json
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
LOG = Path.home() / ".hermes" / "logs" / "command_usage.jsonl"
|
||||
|
||||
def handle(event_type: str, context: dict):
|
||||
LOG.parent.mkdir(parents=True, exist_ok=True)
|
||||
entry = {
|
||||
"ts": datetime.now().isoformat(),
|
||||
"command": context.get("command"),
|
||||
"args": context.get("args"),
|
||||
"platform": context.get("platform"),
|
||||
"user": context.get("user_id"),
|
||||
}
|
||||
with open(LOG, "a") as f:
|
||||
f.write(json.dumps(entry) + "\n")
|
||||
```
|
||||
|
||||
### Session Start Webhook
|
||||
|
||||
POST to an external service whenever a new session starts:
|
||||
|
||||
```yaml
|
||||
# ~/.hermes/hooks/session-webhook/HOOK.yaml
|
||||
name: session-webhook
|
||||
description: Notify external service on new sessions
|
||||
events:
|
||||
- session:start
|
||||
- session:reset
|
||||
```
|
||||
|
||||
```python
|
||||
# ~/.hermes/hooks/session-webhook/handler.py
|
||||
import httpx
|
||||
|
||||
WEBHOOK_URL = "https://your-service.example.com/hermes-events"
|
||||
|
||||
async def handle(event_type: str, context: dict):
|
||||
async with httpx.AsyncClient() as client:
|
||||
await client.post(WEBHOOK_URL, json={
|
||||
"event": event_type,
|
||||
**context,
|
||||
}, timeout=5)
|
||||
```
|
||||
|
||||
## How It Works
|
||||
|
||||
1. On gateway startup, `HookRegistry.discover_and_load()` scans `~/.hermes/hooks/`
|
||||
2. Each subdirectory with `HOOK.yaml` + `handler.py` is loaded dynamically
|
||||
3. Handlers are registered for their declared events
|
||||
4. At each lifecycle point, `hooks.emit()` fires all matching handlers
|
||||
5. Errors in any handler are caught and logged — a broken hook never crashes the agent
|
||||
|
||||
Hooks only fire in the **gateway** (Telegram, Discord, Slack, WhatsApp). The CLI does not currently load hooks. The `agent:step` event bridges from the sync agent thread to the async hook system via `asyncio.run_coroutine_threadsafe`.
|
||||
|
|
@ -74,6 +74,13 @@ Sessions reset based on configurable policies:
|
|||
|
||||
Send `/new` or `/reset` as a message to start fresh.
|
||||
|
||||
### Context Management
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `/compress` | Manually compress conversation context (saves memories, then summarizes) |
|
||||
| `/usage` | Show token usage and context window status for the current session |
|
||||
|
||||
### Per-Platform Overrides
|
||||
|
||||
Configure different reset policies per platform:
|
||||
|
|
|
|||
|
|
@ -425,6 +425,28 @@ class BasePlatformAdapter(ABC):
|
|||
text = f"{caption}\n{image_url}" if caption else image_url
|
||||
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
|
||||
|
||||
async def send_animation(
|
||||
self,
|
||||
chat_id: str,
|
||||
animation_url: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""
|
||||
Send an animated GIF natively via the platform API.
|
||||
|
||||
Override in subclasses to send GIFs as proper animations
|
||||
(e.g., Telegram send_animation) so they auto-play inline.
|
||||
Default falls back to send_image.
|
||||
"""
|
||||
return await self.send_image(chat_id=chat_id, image_url=animation_url, caption=caption, reply_to=reply_to)
|
||||
|
||||
@staticmethod
|
||||
def _is_animation_url(url: str) -> bool:
|
||||
"""Check if a URL points to an animated GIF (vs a static image)."""
|
||||
lower = url.lower().split('?')[0] # Strip query params
|
||||
return lower.endswith('.gif')
|
||||
|
||||
@staticmethod
|
||||
def extract_images(content: str) -> Tuple[List[Tuple[str, str]], str]:
|
||||
"""
|
||||
|
|
@ -636,11 +658,19 @@ class BasePlatformAdapter(ABC):
|
|||
if human_delay > 0:
|
||||
await asyncio.sleep(human_delay)
|
||||
try:
|
||||
img_result = await self.send_image(
|
||||
chat_id=event.source.chat_id,
|
||||
image_url=image_url,
|
||||
caption=alt_text if alt_text else None,
|
||||
)
|
||||
# Route animated GIFs through send_animation for proper playback
|
||||
if self._is_animation_url(image_url):
|
||||
img_result = await self.send_animation(
|
||||
chat_id=event.source.chat_id,
|
||||
animation_url=image_url,
|
||||
caption=alt_text if alt_text else None,
|
||||
)
|
||||
else:
|
||||
img_result = await self.send_image(
|
||||
chat_id=event.source.chat_id,
|
||||
image_url=image_url,
|
||||
caption=alt_text if alt_text else None,
|
||||
)
|
||||
if not img_result.success:
|
||||
print(f"[{self.name}] Failed to send image: {img_result.error}")
|
||||
except Exception as img_err:
|
||||
|
|
|
|||
|
|
@ -272,6 +272,30 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
# Fallback: send as text link
|
||||
return await super().send_image(chat_id, image_url, caption, reply_to)
|
||||
|
||||
async def send_animation(
|
||||
self,
|
||||
chat_id: str,
|
||||
animation_url: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""Send an animated GIF natively as a Telegram animation (auto-plays inline)."""
|
||||
if not self._bot:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
msg = await self._bot.send_animation(
|
||||
chat_id=int(chat_id),
|
||||
animation=animation_url,
|
||||
caption=caption[:1024] if caption else None,
|
||||
reply_to_message_id=int(reply_to) if reply_to else None,
|
||||
)
|
||||
return SendResult(success=True, message_id=str(msg.message_id))
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Failed to send animation, falling back to photo: {e}")
|
||||
# Fallback: try as a regular photo
|
||||
return await self.send_image(chat_id, animation_url, caption, reply_to)
|
||||
|
||||
async def send_typing(self, chat_id: str) -> None:
|
||||
"""Send typing indicator."""
|
||||
if self._bot:
|
||||
|
|
|
|||
315
gateway/run.py
315
gateway/run.py
|
|
@ -78,6 +78,20 @@ if _config_path.exists():
|
|||
for _cfg_key, _env_var in _terminal_env_map.items():
|
||||
if _cfg_key in _terminal_cfg:
|
||||
os.environ[_env_var] = str(_terminal_cfg[_cfg_key])
|
||||
_compression_cfg = _cfg.get("compression", {})
|
||||
if _compression_cfg and isinstance(_compression_cfg, dict):
|
||||
_compression_env_map = {
|
||||
"enabled": "CONTEXT_COMPRESSION_ENABLED",
|
||||
"threshold": "CONTEXT_COMPRESSION_THRESHOLD",
|
||||
"summary_model": "CONTEXT_COMPRESSION_MODEL",
|
||||
}
|
||||
for _cfg_key, _env_var in _compression_env_map.items():
|
||||
if _cfg_key in _compression_cfg:
|
||||
os.environ[_env_var] = str(_compression_cfg[_cfg_key])
|
||||
_agent_cfg = _cfg.get("agent", {})
|
||||
if _agent_cfg and isinstance(_agent_cfg, dict):
|
||||
if "max_turns" in _agent_cfg:
|
||||
os.environ["HERMES_MAX_ITERATIONS"] = str(_agent_cfg["max_turns"])
|
||||
except Exception:
|
||||
pass # Non-fatal; gateway can still run with .env values
|
||||
|
||||
|
|
@ -111,6 +125,28 @@ from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageTyp
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _resolve_runtime_agent_kwargs() -> dict:
|
||||
"""Resolve provider credentials for gateway-created AIAgent instances."""
|
||||
from hermes_cli.runtime_provider import (
|
||||
resolve_runtime_provider,
|
||||
format_runtime_provider_error,
|
||||
)
|
||||
|
||||
try:
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=os.getenv("HERMES_INFERENCE_PROVIDER"),
|
||||
)
|
||||
except Exception as exc:
|
||||
raise RuntimeError(format_runtime_provider_error(exc)) from exc
|
||||
|
||||
return {
|
||||
"api_key": runtime.get("api_key"),
|
||||
"base_url": runtime.get("base_url"),
|
||||
"provider": runtime.get("provider"),
|
||||
"api_mode": runtime.get("api_mode"),
|
||||
}
|
||||
|
||||
|
||||
class GatewayRunner:
|
||||
"""
|
||||
Main gateway controller.
|
||||
|
|
@ -178,17 +214,12 @@ class GatewayRunner:
|
|||
return
|
||||
|
||||
from run_agent import AIAgent
|
||||
_flush_api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY", "")
|
||||
_flush_base_url = os.getenv("OPENAI_BASE_URL") or os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
|
||||
_flush_model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL", "anthropic/claude-opus-4.6")
|
||||
|
||||
if not _flush_api_key:
|
||||
runtime_kwargs = _resolve_runtime_agent_kwargs()
|
||||
if not runtime_kwargs.get("api_key"):
|
||||
return
|
||||
|
||||
tmp_agent = AIAgent(
|
||||
model=_flush_model,
|
||||
api_key=_flush_api_key,
|
||||
base_url=_flush_base_url,
|
||||
**runtime_kwargs,
|
||||
max_iterations=8,
|
||||
quiet_mode=True,
|
||||
enabled_toolsets=["memory", "skills"],
|
||||
|
|
@ -608,6 +639,19 @@ class GatewayRunner:
|
|||
|
||||
# Check for commands
|
||||
command = event.get_command()
|
||||
|
||||
# Emit command:* hook for any recognized slash command
|
||||
_known_commands = {"new", "reset", "help", "status", "stop", "model",
|
||||
"personality", "retry", "undo", "sethome", "set-home",
|
||||
"compress", "usage"}
|
||||
if command and command in _known_commands:
|
||||
await self.hooks.emit(f"command:{command}", {
|
||||
"platform": source.platform.value if source.platform else "",
|
||||
"user_id": source.user_id,
|
||||
"command": command,
|
||||
"args": event.get_command_args().strip(),
|
||||
})
|
||||
|
||||
if command in ["new", "reset"]:
|
||||
return await self._handle_reset_command(event)
|
||||
|
||||
|
|
@ -634,6 +678,27 @@ class GatewayRunner:
|
|||
|
||||
if command in ["sethome", "set-home"]:
|
||||
return await self._handle_set_home_command(event)
|
||||
|
||||
if command == "compress":
|
||||
return await self._handle_compress_command(event)
|
||||
|
||||
if command == "usage":
|
||||
return await self._handle_usage_command(event)
|
||||
|
||||
# Skill slash commands: /skill-name loads the skill and sends to agent
|
||||
if command:
|
||||
try:
|
||||
from agent.skill_commands import get_skill_commands, build_skill_invocation_message
|
||||
skill_cmds = get_skill_commands()
|
||||
cmd_key = f"/{command}"
|
||||
if cmd_key in skill_cmds:
|
||||
user_instruction = event.get_command_args().strip()
|
||||
msg = build_skill_invocation_message(cmd_key, user_instruction)
|
||||
if msg:
|
||||
event.text = msg
|
||||
# Fall through to normal message processing with skill content
|
||||
except Exception as e:
|
||||
logger.debug("Skill command check failed (non-fatal): %s", e)
|
||||
|
||||
# Check for pending exec approval responses
|
||||
if source.chat_type != "dm":
|
||||
|
|
@ -663,6 +728,19 @@ class GatewayRunner:
|
|||
session_entry = self.session_store.get_or_create_session(source)
|
||||
session_key = session_entry.session_key
|
||||
|
||||
# Emit session:start for new or auto-reset sessions
|
||||
_is_new_session = (
|
||||
session_entry.created_at == session_entry.updated_at
|
||||
or getattr(session_entry, "was_auto_reset", False)
|
||||
)
|
||||
if _is_new_session:
|
||||
await self.hooks.emit("session:start", {
|
||||
"platform": source.platform.value if source.platform else "",
|
||||
"user_id": source.user_id,
|
||||
"session_id": session_entry.session_id,
|
||||
"session_key": session_key,
|
||||
})
|
||||
|
||||
# Build session context
|
||||
context = build_session_context(source, self.config, session_entry)
|
||||
|
||||
|
|
@ -916,15 +994,10 @@ class GatewayRunner:
|
|||
if old_history:
|
||||
from run_agent import AIAgent
|
||||
loop = asyncio.get_event_loop()
|
||||
# Resolve credentials so the flush agent can reach the LLM
|
||||
_flush_api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY", "")
|
||||
_flush_base_url = os.getenv("OPENAI_BASE_URL") or os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
|
||||
_flush_model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL", "anthropic/claude-opus-4.6")
|
||||
_flush_kwargs = _resolve_runtime_agent_kwargs()
|
||||
def _do_flush():
|
||||
tmp_agent = AIAgent(
|
||||
model=_flush_model,
|
||||
api_key=_flush_api_key,
|
||||
base_url=_flush_base_url,
|
||||
**_flush_kwargs,
|
||||
max_iterations=5,
|
||||
quiet_mode=True,
|
||||
enabled_toolsets=["memory"],
|
||||
|
|
@ -999,20 +1072,31 @@ class GatewayRunner:
|
|||
|
||||
async def _handle_help_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /help command - list available commands."""
|
||||
return (
|
||||
"📖 **Hermes Commands**\n"
|
||||
"\n"
|
||||
"`/new` — Start a new conversation\n"
|
||||
"`/reset` — Reset conversation history\n"
|
||||
"`/status` — Show session info\n"
|
||||
"`/stop` — Interrupt the running agent\n"
|
||||
"`/model [name]` — Show or change the model\n"
|
||||
"`/personality [name]` — Set a personality\n"
|
||||
"`/retry` — Retry your last message\n"
|
||||
"`/undo` — Remove the last exchange\n"
|
||||
"`/sethome` — Set this chat as the home channel\n"
|
||||
"`/help` — Show this message"
|
||||
)
|
||||
lines = [
|
||||
"📖 **Hermes Commands**\n",
|
||||
"`/new` — Start a new conversation",
|
||||
"`/reset` — Reset conversation history",
|
||||
"`/status` — Show session info",
|
||||
"`/stop` — Interrupt the running agent",
|
||||
"`/model [name]` — Show or change the model",
|
||||
"`/personality [name]` — Set a personality",
|
||||
"`/retry` — Retry your last message",
|
||||
"`/undo` — Remove the last exchange",
|
||||
"`/sethome` — Set this chat as the home channel",
|
||||
"`/compress` — Compress conversation context",
|
||||
"`/usage` — Show token usage for this session",
|
||||
"`/help` — Show this message",
|
||||
]
|
||||
try:
|
||||
from agent.skill_commands import get_skill_commands
|
||||
skill_cmds = get_skill_commands()
|
||||
if skill_cmds:
|
||||
lines.append(f"\n⚡ **Skill Commands** ({len(skill_cmds)} installed):")
|
||||
for cmd in sorted(skill_cmds):
|
||||
lines.append(f"`{cmd}` — {skill_cmds[cmd]['description']}")
|
||||
except Exception:
|
||||
pass
|
||||
return "\n".join(lines)
|
||||
|
||||
async def _handle_model_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /model command - show or change the current model."""
|
||||
|
|
@ -1205,6 +1289,95 @@ class GatewayRunner:
|
|||
f"Cron jobs and cross-platform messages will be delivered here."
|
||||
)
|
||||
|
||||
async def _handle_compress_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /compress command -- manually compress conversation context."""
|
||||
source = event.source
|
||||
session_entry = self.session_store.get_or_create_session(source)
|
||||
history = self.session_store.load_transcript(session_entry.session_id)
|
||||
|
||||
if not history or len(history) < 4:
|
||||
return "Not enough conversation to compress (need at least 4 messages)."
|
||||
|
||||
try:
|
||||
from run_agent import AIAgent
|
||||
from agent.model_metadata import estimate_messages_tokens_rough
|
||||
|
||||
runtime_kwargs = _resolve_runtime_agent_kwargs()
|
||||
if not runtime_kwargs.get("api_key"):
|
||||
return "No provider configured -- cannot compress."
|
||||
|
||||
msgs = [
|
||||
{"role": m.get("role"), "content": m.get("content")}
|
||||
for m in history
|
||||
if m.get("role") in ("user", "assistant") and m.get("content")
|
||||
]
|
||||
original_count = len(msgs)
|
||||
approx_tokens = estimate_messages_tokens_rough(msgs)
|
||||
|
||||
tmp_agent = AIAgent(
|
||||
**runtime_kwargs,
|
||||
max_iterations=4,
|
||||
quiet_mode=True,
|
||||
enabled_toolsets=["memory"],
|
||||
session_id=session_entry.session_id,
|
||||
)
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
compressed, _ = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens),
|
||||
)
|
||||
|
||||
session_entry.conversation_history = compressed
|
||||
new_count = len(compressed)
|
||||
new_tokens = estimate_messages_tokens_rough(compressed)
|
||||
|
||||
return (
|
||||
f"🗜️ Compressed: {original_count} → {new_count} messages\n"
|
||||
f"~{approx_tokens:,} → ~{new_tokens:,} tokens"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Manual compress failed: %s", e)
|
||||
return f"Compression failed: {e}"
|
||||
|
||||
async def _handle_usage_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /usage command -- show token usage for the session's last agent run."""
|
||||
source = event.source
|
||||
session_key = f"agent:main:{source.platform.value}:" + \
|
||||
(f"dm" if source.chat_type == "dm" else f"{source.chat_type}:{source.chat_id}")
|
||||
|
||||
agent = self._running_agents.get(session_key)
|
||||
if agent and hasattr(agent, "session_total_tokens") and agent.session_api_calls > 0:
|
||||
lines = [
|
||||
"📊 **Session Token Usage**",
|
||||
f"Prompt (input): {agent.session_prompt_tokens:,}",
|
||||
f"Completion (output): {agent.session_completion_tokens:,}",
|
||||
f"Total: {agent.session_total_tokens:,}",
|
||||
f"API calls: {agent.session_api_calls}",
|
||||
]
|
||||
ctx = agent.context_compressor
|
||||
if ctx.last_prompt_tokens:
|
||||
pct = ctx.last_prompt_tokens / ctx.context_length * 100 if ctx.context_length else 0
|
||||
lines.append(f"Context: {ctx.last_prompt_tokens:,} / {ctx.context_length:,} ({pct:.0f}%)")
|
||||
if ctx.compression_count:
|
||||
lines.append(f"Compressions: {ctx.compression_count}")
|
||||
return "\n".join(lines)
|
||||
|
||||
# No running agent -- check session history for a rough count
|
||||
session_entry = self.session_store.get_or_create_session(source)
|
||||
history = self.session_store.load_transcript(session_entry.session_id)
|
||||
if history:
|
||||
from agent.model_metadata import estimate_messages_tokens_rough
|
||||
msgs = [m for m in history if m.get("role") in ("user", "assistant") and m.get("content")]
|
||||
approx = estimate_messages_tokens_rough(msgs)
|
||||
return (
|
||||
f"📊 **Session Info**\n"
|
||||
f"Messages: {len(msgs)}\n"
|
||||
f"Estimated context: ~{approx:,} tokens\n"
|
||||
f"_(Detailed usage available during active conversations)_"
|
||||
)
|
||||
return "No usage data available for this session."
|
||||
|
||||
def _set_session_env(self, context: SessionContext) -> None:
|
||||
"""Set environment variables for the current session."""
|
||||
os.environ["HERMES_SESSION_PLATFORM"] = context.source.platform.value
|
||||
|
|
@ -1593,6 +1766,25 @@ class GatewayRunner:
|
|||
result_holder = [None] # Mutable container for the result
|
||||
tools_holder = [None] # Mutable container for the tool definitions
|
||||
|
||||
# Bridge sync step_callback → async hooks.emit for agent:step events
|
||||
_loop_for_step = asyncio.get_event_loop()
|
||||
_hooks_ref = self.hooks
|
||||
|
||||
def _step_callback_sync(iteration: int, tool_names: list) -> None:
|
||||
try:
|
||||
asyncio.run_coroutine_threadsafe(
|
||||
_hooks_ref.emit("agent:step", {
|
||||
"platform": source.platform.value if source.platform else "",
|
||||
"user_id": source.user_id,
|
||||
"session_id": session_id,
|
||||
"iteration": iteration,
|
||||
"tool_names": tool_names,
|
||||
}),
|
||||
_loop_for_step,
|
||||
)
|
||||
except Exception as _e:
|
||||
logger.debug("agent:step hook error: %s", _e)
|
||||
|
||||
def run_sync():
|
||||
# Pass session_key to process registry via env var so background
|
||||
# processes can be mapped back to this gateway session
|
||||
|
|
@ -1609,7 +1801,7 @@ class GatewayRunner:
|
|||
combined_ephemeral = context_prompt or ""
|
||||
if self._ephemeral_system_prompt:
|
||||
combined_ephemeral = (combined_ephemeral + "\n\n" + self._ephemeral_system_prompt).strip()
|
||||
|
||||
|
||||
# Re-read .env and config for fresh credentials (gateway is long-lived,
|
||||
# keys may change without restart).
|
||||
try:
|
||||
|
|
@ -1619,9 +1811,6 @@ class GatewayRunner:
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
# Custom endpoint (OPENAI_*) takes precedence, matching CLI behavior
|
||||
api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY", "")
|
||||
base_url = os.getenv("OPENAI_BASE_URL") or os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
|
||||
model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
|
||||
|
||||
try:
|
||||
|
|
@ -1635,24 +1824,22 @@ class GatewayRunner:
|
|||
model = _model_cfg
|
||||
elif isinstance(_model_cfg, dict):
|
||||
model = _model_cfg.get("default", model)
|
||||
base_url = _model_cfg.get("base_url", base_url)
|
||||
# Check if provider is nous — resolve OAuth credentials
|
||||
provider = _model_cfg.get("provider", "") if isinstance(_model_cfg, dict) else ""
|
||||
if provider == "nous":
|
||||
try:
|
||||
from hermes_cli.auth import resolve_nous_runtime_credentials
|
||||
creds = resolve_nous_runtime_credentials(min_key_ttl_seconds=5 * 60)
|
||||
api_key = creds.get("api_key", api_key)
|
||||
base_url = creds.get("base_url", base_url)
|
||||
except Exception as nous_err:
|
||||
logger.warning("Nous Portal credential resolution failed: %s", nous_err)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
runtime_kwargs = _resolve_runtime_agent_kwargs()
|
||||
except Exception as exc:
|
||||
return {
|
||||
"final_response": f"⚠️ Provider authentication failed: {exc}",
|
||||
"messages": [],
|
||||
"api_calls": 0,
|
||||
"tools": [],
|
||||
}
|
||||
|
||||
agent = AIAgent(
|
||||
model=model,
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
**runtime_kwargs,
|
||||
max_iterations=max_iterations,
|
||||
quiet_mode=True,
|
||||
verbose_logging=False,
|
||||
|
|
@ -1662,6 +1849,7 @@ class GatewayRunner:
|
|||
reasoning_config=self._reasoning_config,
|
||||
session_id=session_id,
|
||||
tool_progress_callback=progress_callback if tool_progress_enabled else None,
|
||||
step_callback=_step_callback_sync if _hooks_ref.loaded_hooks else None,
|
||||
platform=platform_key,
|
||||
honcho_session_key=session_key,
|
||||
session_db=self._session_db,
|
||||
|
|
@ -1714,6 +1902,19 @@ class GatewayRunner:
|
|||
content = f"[Delivered from {mirror_src}] {content}"
|
||||
agent_history.append({"role": role, "content": content})
|
||||
|
||||
# Collect MEDIA paths already in history so we can exclude them
|
||||
# from the current turn's extraction. This is compression-safe:
|
||||
# even if the message list shrinks, we know which paths are old.
|
||||
_history_media_paths: set = set()
|
||||
for _hm in agent_history:
|
||||
if _hm.get("role") in ("tool", "function"):
|
||||
_hc = _hm.get("content", "")
|
||||
if "MEDIA:" in _hc:
|
||||
for _match in re.finditer(r'MEDIA:(\S+)', _hc):
|
||||
_p = _match.group(1).strip().rstrip('",}')
|
||||
if _p:
|
||||
_history_media_paths.add(_p)
|
||||
|
||||
result = agent.run_conversation(message, conversation_history=agent_history)
|
||||
result_holder[0] = result
|
||||
|
||||
|
|
@ -1734,22 +1935,25 @@ class GatewayRunner:
|
|||
# doesn't include them. We collect unique tags from tool results and
|
||||
# append any that aren't already present in the final response, so the
|
||||
# adapter's extract_media() can find and deliver the files exactly once.
|
||||
#
|
||||
# Uses path-based deduplication against _history_media_paths (collected
|
||||
# before run_conversation) instead of index slicing. This is safe even
|
||||
# when context compression shrinks the message list. (Fixes #160)
|
||||
if "MEDIA:" not in final_response:
|
||||
media_tags = []
|
||||
has_voice_directive = False
|
||||
for msg in result.get("messages", []):
|
||||
if msg.get("role") == "tool" or msg.get("role") == "function":
|
||||
if msg.get("role") in ("tool", "function"):
|
||||
content = msg.get("content", "")
|
||||
if "MEDIA:" in content:
|
||||
for match in re.finditer(r'MEDIA:(\S+)', content):
|
||||
path = match.group(1).strip().rstrip('",}')
|
||||
if path:
|
||||
if path and path not in _history_media_paths:
|
||||
media_tags.append(f"MEDIA:{path}")
|
||||
if "[[audio_as_voice]]" in content:
|
||||
has_voice_directive = True
|
||||
|
||||
if media_tags:
|
||||
# Deduplicate while preserving order
|
||||
seen = set()
|
||||
unique_tags = []
|
||||
for tag in media_tags:
|
||||
|
|
@ -1934,10 +2138,21 @@ async def start_gateway(config: Optional[GatewayConfig] = None) -> bool:
|
|||
maxBytes=5 * 1024 * 1024,
|
||||
backupCount=3,
|
||||
)
|
||||
file_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
|
||||
from agent.redact import RedactingFormatter
|
||||
file_handler.setFormatter(RedactingFormatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
|
||||
logging.getLogger().addHandler(file_handler)
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
# Separate errors-only log for easy debugging
|
||||
error_handler = RotatingFileHandler(
|
||||
log_dir / 'errors.log',
|
||||
maxBytes=2 * 1024 * 1024,
|
||||
backupCount=2,
|
||||
)
|
||||
error_handler.setLevel(logging.WARNING)
|
||||
error_handler.setFormatter(RedactingFormatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
|
||||
logging.getLogger().addHandler(error_handler)
|
||||
|
||||
runner = GatewayRunner(config)
|
||||
|
||||
# Set up signal handlers
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ Architecture:
|
|||
- Auth store (auth.json) holds per-provider credential state
|
||||
- resolve_provider() picks the active provider via priority chain
|
||||
- resolve_*_runtime_credentials() handles token refresh and key minting
|
||||
- login_command() / logout_command() are the CLI entry points
|
||||
- logout_command() is the CLI entry point for clearing auth
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -18,7 +18,10 @@ from __future__ import annotations
|
|||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import stat
|
||||
import base64
|
||||
import subprocess
|
||||
import time
|
||||
import webbrowser
|
||||
from contextlib import contextmanager
|
||||
|
|
@ -55,6 +58,10 @@ DEFAULT_NOUS_SCOPE = "inference:mint_agent_key"
|
|||
DEFAULT_AGENT_KEY_MIN_TTL_SECONDS = 30 * 60 # 30 minutes
|
||||
ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120 # refresh 2 min before expiry
|
||||
DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS = 1 # poll at most every 1s
|
||||
DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
|
||||
CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
|
||||
CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
|
||||
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
|
||||
|
||||
|
||||
# =============================================================================
|
||||
|
|
@ -84,7 +91,12 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
|||
client_id=DEFAULT_NOUS_CLIENT_ID,
|
||||
scope=DEFAULT_NOUS_SCOPE,
|
||||
),
|
||||
# Future: "openai_codex", "anthropic", etc.
|
||||
"openai-codex": ProviderConfig(
|
||||
id="openai-codex",
|
||||
name="OpenAI Codex",
|
||||
auth_type="oauth_external",
|
||||
inference_base_url=DEFAULT_CODEX_BASE_URL,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -115,7 +127,7 @@ def format_auth_error(error: Exception) -> str:
|
|||
return str(error)
|
||||
|
||||
if error.relogin_required:
|
||||
return f"{error} Run `hermes login` to re-authenticate."
|
||||
return f"{error} Run `hermes model` to re-authenticate."
|
||||
|
||||
if error.code == "subscription_required":
|
||||
return (
|
||||
|
|
@ -298,12 +310,15 @@ def resolve_provider(
|
|||
"""
|
||||
normalized = (requested or "auto").strip().lower()
|
||||
|
||||
if normalized in {"openrouter", "custom"}:
|
||||
return "openrouter"
|
||||
if normalized in PROVIDER_REGISTRY:
|
||||
return normalized
|
||||
if normalized == "openrouter":
|
||||
return "openrouter"
|
||||
if normalized != "auto":
|
||||
return "openrouter"
|
||||
raise AuthError(
|
||||
f"Unknown provider '{normalized}'.",
|
||||
code="invalid_provider",
|
||||
)
|
||||
|
||||
# Explicit one-off CLI creds always mean openrouter/custom
|
||||
if explicit_api_key or explicit_base_url:
|
||||
|
|
@ -314,8 +329,8 @@ def resolve_provider(
|
|||
auth_store = _load_auth_store()
|
||||
active = auth_store.get("active_provider")
|
||||
if active and active in PROVIDER_REGISTRY:
|
||||
state = _load_provider_state(auth_store, active)
|
||||
if state and (state.get("access_token") or state.get("refresh_token")):
|
||||
status = get_auth_status(active)
|
||||
if status.get("logged_in"):
|
||||
return active
|
||||
except Exception as e:
|
||||
logger.debug("Could not detect active auth provider: %s", e)
|
||||
|
|
@ -369,6 +384,27 @@ def _optional_base_url(value: Any) -> Optional[str]:
|
|||
return cleaned if cleaned else None
|
||||
|
||||
|
||||
def _decode_jwt_claims(token: Any) -> Dict[str, Any]:
|
||||
if not isinstance(token, str) or token.count(".") != 2:
|
||||
return {}
|
||||
payload = token.split(".")[1]
|
||||
payload += "=" * ((4 - len(payload) % 4) % 4)
|
||||
try:
|
||||
raw = base64.urlsafe_b64decode(payload.encode("utf-8"))
|
||||
claims = json.loads(raw.decode("utf-8"))
|
||||
except Exception:
|
||||
return {}
|
||||
return claims if isinstance(claims, dict) else {}
|
||||
|
||||
|
||||
def _codex_access_token_is_expiring(access_token: Any, skew_seconds: int) -> bool:
|
||||
claims = _decode_jwt_claims(access_token)
|
||||
exp = claims.get("exp")
|
||||
if not isinstance(exp, (int, float)):
|
||||
return False
|
||||
return float(exp) <= (time.time() + max(0, int(skew_seconds)))
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# SSH / remote session detection
|
||||
# =============================================================================
|
||||
|
|
@ -378,6 +414,302 @@ def _is_remote_session() -> bool:
|
|||
return bool(os.getenv("SSH_CLIENT") or os.getenv("SSH_TTY"))
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# OpenAI Codex auth file helpers
|
||||
# =============================================================================
|
||||
|
||||
def resolve_codex_home_path() -> Path:
|
||||
"""Resolve CODEX_HOME, defaulting to ~/.codex."""
|
||||
codex_home = os.getenv("CODEX_HOME", "").strip()
|
||||
if not codex_home:
|
||||
codex_home = str(Path.home() / ".codex")
|
||||
return Path(codex_home).expanduser()
|
||||
|
||||
|
||||
def _codex_auth_file_path() -> Path:
|
||||
return resolve_codex_home_path() / "auth.json"
|
||||
|
||||
|
||||
def _codex_auth_lock_path(auth_path: Path) -> Path:
|
||||
return auth_path.with_suffix(auth_path.suffix + ".lock")
|
||||
|
||||
|
||||
@contextmanager
|
||||
def _codex_auth_file_lock(
|
||||
auth_path: Path,
|
||||
timeout_seconds: float = AUTH_LOCK_TIMEOUT_SECONDS,
|
||||
):
|
||||
lock_path = _codex_auth_lock_path(auth_path)
|
||||
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with lock_path.open("a+") as lock_file:
|
||||
if fcntl is None:
|
||||
yield
|
||||
return
|
||||
|
||||
deadline = time.time() + max(1.0, timeout_seconds)
|
||||
while True:
|
||||
try:
|
||||
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
break
|
||||
except BlockingIOError:
|
||||
if time.time() >= deadline:
|
||||
raise TimeoutError(f"Timed out waiting for Codex auth lock: {lock_path}")
|
||||
time.sleep(0.05)
|
||||
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
|
||||
|
||||
|
||||
def read_codex_auth_file() -> Dict[str, Any]:
|
||||
"""Read and validate Codex auth.json shape."""
|
||||
codex_home = resolve_codex_home_path()
|
||||
if not codex_home.exists():
|
||||
raise AuthError(
|
||||
f"Codex home directory not found at {codex_home}.",
|
||||
provider="openai-codex",
|
||||
code="codex_home_missing",
|
||||
relogin_required=True,
|
||||
)
|
||||
|
||||
auth_path = codex_home / "auth.json"
|
||||
if not auth_path.exists():
|
||||
raise AuthError(
|
||||
f"Codex auth file not found at {auth_path}.",
|
||||
provider="openai-codex",
|
||||
code="codex_auth_missing",
|
||||
relogin_required=True,
|
||||
)
|
||||
|
||||
try:
|
||||
payload = json.loads(auth_path.read_text())
|
||||
except Exception as exc:
|
||||
raise AuthError(
|
||||
f"Failed to parse Codex auth file at {auth_path}.",
|
||||
provider="openai-codex",
|
||||
code="codex_auth_invalid_json",
|
||||
relogin_required=True,
|
||||
) from exc
|
||||
|
||||
tokens = payload.get("tokens")
|
||||
if not isinstance(tokens, dict):
|
||||
raise AuthError(
|
||||
"Codex auth file is missing a valid 'tokens' object.",
|
||||
provider="openai-codex",
|
||||
code="codex_auth_invalid_shape",
|
||||
relogin_required=True,
|
||||
)
|
||||
|
||||
access_token = tokens.get("access_token")
|
||||
refresh_token = tokens.get("refresh_token")
|
||||
if not isinstance(access_token, str) or not access_token.strip():
|
||||
raise AuthError(
|
||||
"Codex auth file is missing tokens.access_token.",
|
||||
provider="openai-codex",
|
||||
code="codex_auth_missing_access_token",
|
||||
relogin_required=True,
|
||||
)
|
||||
if not isinstance(refresh_token, str) or not refresh_token.strip():
|
||||
raise AuthError(
|
||||
"Codex auth file is missing tokens.refresh_token.",
|
||||
provider="openai-codex",
|
||||
code="codex_auth_missing_refresh_token",
|
||||
relogin_required=True,
|
||||
)
|
||||
|
||||
return {
|
||||
"payload": payload,
|
||||
"tokens": tokens,
|
||||
"auth_path": auth_path,
|
||||
"codex_home": codex_home,
|
||||
}
|
||||
|
||||
|
||||
def _persist_codex_auth_payload(
|
||||
auth_path: Path,
|
||||
payload: Dict[str, Any],
|
||||
*,
|
||||
lock_held: bool = False,
|
||||
) -> None:
|
||||
auth_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def _write() -> None:
|
||||
serialized = json.dumps(payload, indent=2, ensure_ascii=False) + "\n"
|
||||
tmp_path = auth_path.parent / f".{auth_path.name}.{os.getpid()}.{time.time_ns()}.tmp"
|
||||
try:
|
||||
with tmp_path.open("w", encoding="utf-8") as tmp_file:
|
||||
tmp_file.write(serialized)
|
||||
tmp_file.flush()
|
||||
os.fsync(tmp_file.fileno())
|
||||
os.replace(tmp_path, auth_path)
|
||||
finally:
|
||||
if tmp_path.exists():
|
||||
try:
|
||||
tmp_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
try:
|
||||
auth_path.chmod(stat.S_IRUSR | stat.S_IWUSR)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
if lock_held:
|
||||
_write()
|
||||
return
|
||||
|
||||
with _codex_auth_file_lock(auth_path):
|
||||
_write()
|
||||
|
||||
|
||||
def _refresh_codex_auth_tokens(
|
||||
*,
|
||||
payload: Dict[str, Any],
|
||||
auth_path: Path,
|
||||
timeout_seconds: float,
|
||||
lock_held: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
tokens = payload.get("tokens")
|
||||
if not isinstance(tokens, dict):
|
||||
raise AuthError(
|
||||
"Codex auth file is missing a valid 'tokens' object.",
|
||||
provider="openai-codex",
|
||||
code="codex_auth_invalid_shape",
|
||||
relogin_required=True,
|
||||
)
|
||||
|
||||
refresh_token = tokens.get("refresh_token")
|
||||
if not isinstance(refresh_token, str) or not refresh_token.strip():
|
||||
raise AuthError(
|
||||
"Codex auth file is missing tokens.refresh_token.",
|
||||
provider="openai-codex",
|
||||
code="codex_auth_missing_refresh_token",
|
||||
relogin_required=True,
|
||||
)
|
||||
|
||||
timeout = httpx.Timeout(max(5.0, float(timeout_seconds)))
|
||||
with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}) as client:
|
||||
response = client.post(
|
||||
CODEX_OAUTH_TOKEN_URL,
|
||||
headers={"Content-Type": "application/x-www-form-urlencoded"},
|
||||
data={
|
||||
"grant_type": "refresh_token",
|
||||
"refresh_token": refresh_token,
|
||||
"client_id": CODEX_OAUTH_CLIENT_ID,
|
||||
},
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
code = "codex_refresh_failed"
|
||||
message = f"Codex token refresh failed with status {response.status_code}."
|
||||
relogin_required = False
|
||||
try:
|
||||
err = response.json()
|
||||
if isinstance(err, dict):
|
||||
err_code = err.get("error")
|
||||
if isinstance(err_code, str) and err_code.strip():
|
||||
code = err_code.strip()
|
||||
err_desc = err.get("error_description") or err.get("message")
|
||||
if isinstance(err_desc, str) and err_desc.strip():
|
||||
message = f"Codex token refresh failed: {err_desc.strip()}"
|
||||
except Exception:
|
||||
pass
|
||||
if code in {"invalid_grant", "invalid_token", "invalid_request"}:
|
||||
relogin_required = True
|
||||
raise AuthError(
|
||||
message,
|
||||
provider="openai-codex",
|
||||
code=code,
|
||||
relogin_required=relogin_required,
|
||||
)
|
||||
|
||||
try:
|
||||
refresh_payload = response.json()
|
||||
except Exception as exc:
|
||||
raise AuthError(
|
||||
"Codex token refresh returned invalid JSON.",
|
||||
provider="openai-codex",
|
||||
code="codex_refresh_invalid_json",
|
||||
relogin_required=True,
|
||||
) from exc
|
||||
|
||||
access_token = refresh_payload.get("access_token")
|
||||
if not isinstance(access_token, str) or not access_token.strip():
|
||||
raise AuthError(
|
||||
"Codex token refresh response was missing access_token.",
|
||||
provider="openai-codex",
|
||||
code="codex_refresh_missing_access_token",
|
||||
relogin_required=True,
|
||||
)
|
||||
|
||||
updated_tokens = dict(tokens)
|
||||
updated_tokens["access_token"] = access_token.strip()
|
||||
next_refresh = refresh_payload.get("refresh_token")
|
||||
if isinstance(next_refresh, str) and next_refresh.strip():
|
||||
updated_tokens["refresh_token"] = next_refresh.strip()
|
||||
payload["tokens"] = updated_tokens
|
||||
payload["last_refresh"] = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
|
||||
_persist_codex_auth_payload(auth_path, payload, lock_held=lock_held)
|
||||
return updated_tokens
|
||||
|
||||
|
||||
def resolve_codex_runtime_credentials(
|
||||
*,
|
||||
force_refresh: bool = False,
|
||||
refresh_if_expiring: bool = True,
|
||||
refresh_skew_seconds: int = CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
|
||||
) -> Dict[str, Any]:
|
||||
"""Resolve runtime credentials from Codex CLI auth state."""
|
||||
data = read_codex_auth_file()
|
||||
payload = data["payload"]
|
||||
tokens = dict(data["tokens"])
|
||||
auth_path = data["auth_path"]
|
||||
access_token = str(tokens.get("access_token", "") or "").strip()
|
||||
refresh_timeout_seconds = float(os.getenv("HERMES_CODEX_REFRESH_TIMEOUT_SECONDS", "20"))
|
||||
|
||||
should_refresh = bool(force_refresh)
|
||||
if (not should_refresh) and refresh_if_expiring:
|
||||
should_refresh = _codex_access_token_is_expiring(access_token, refresh_skew_seconds)
|
||||
if should_refresh:
|
||||
lock_timeout = max(float(AUTH_LOCK_TIMEOUT_SECONDS), refresh_timeout_seconds + 5.0)
|
||||
with _codex_auth_file_lock(auth_path, timeout_seconds=lock_timeout):
|
||||
data = read_codex_auth_file()
|
||||
payload = data["payload"]
|
||||
tokens = dict(data["tokens"])
|
||||
access_token = str(tokens.get("access_token", "") or "").strip()
|
||||
|
||||
should_refresh = bool(force_refresh)
|
||||
if (not should_refresh) and refresh_if_expiring:
|
||||
should_refresh = _codex_access_token_is_expiring(access_token, refresh_skew_seconds)
|
||||
|
||||
if should_refresh:
|
||||
tokens = _refresh_codex_auth_tokens(
|
||||
payload=payload,
|
||||
auth_path=auth_path,
|
||||
timeout_seconds=refresh_timeout_seconds,
|
||||
lock_held=True,
|
||||
)
|
||||
access_token = str(tokens.get("access_token", "") or "").strip()
|
||||
|
||||
base_url = (
|
||||
os.getenv("HERMES_CODEX_BASE_URL", "").strip().rstrip("/")
|
||||
or DEFAULT_CODEX_BASE_URL
|
||||
)
|
||||
|
||||
return {
|
||||
"provider": "openai-codex",
|
||||
"base_url": base_url,
|
||||
"api_key": access_token,
|
||||
"source": "codex-auth-json",
|
||||
"last_refresh": payload.get("last_refresh"),
|
||||
"auth_mode": payload.get("auth_mode"),
|
||||
"auth_file": str(auth_path),
|
||||
"codex_home": str(data["codex_home"]),
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# TLS verification helper
|
||||
# =============================================================================
|
||||
|
|
@ -806,14 +1138,73 @@ def get_nous_auth_status() -> Dict[str, Any]:
|
|||
}
|
||||
|
||||
|
||||
def get_codex_auth_status() -> Dict[str, Any]:
|
||||
"""Status snapshot for Codex auth."""
|
||||
state = get_provider_auth_state("openai-codex") or {}
|
||||
auth_file = state.get("auth_file") or str(_codex_auth_file_path())
|
||||
codex_home = state.get("codex_home") or str(resolve_codex_home_path())
|
||||
try:
|
||||
creds = resolve_codex_runtime_credentials()
|
||||
return {
|
||||
"logged_in": True,
|
||||
"auth_file": creds.get("auth_file"),
|
||||
"codex_home": creds.get("codex_home"),
|
||||
"last_refresh": creds.get("last_refresh"),
|
||||
"auth_mode": creds.get("auth_mode"),
|
||||
"source": creds.get("source"),
|
||||
}
|
||||
except AuthError as exc:
|
||||
return {
|
||||
"logged_in": False,
|
||||
"auth_file": auth_file,
|
||||
"codex_home": codex_home,
|
||||
"error": str(exc),
|
||||
}
|
||||
|
||||
|
||||
def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Generic auth status dispatcher."""
|
||||
target = provider_id or get_active_provider()
|
||||
if target == "nous":
|
||||
return get_nous_auth_status()
|
||||
if target == "openai-codex":
|
||||
return get_codex_auth_status()
|
||||
return {"logged_in": False}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# External credential detection
|
||||
# =============================================================================
|
||||
|
||||
def detect_external_credentials() -> List[Dict[str, Any]]:
|
||||
"""Scan for credentials from other CLI tools that Hermes can reuse.
|
||||
|
||||
Returns a list of dicts, each with:
|
||||
- provider: str -- Hermes provider id (e.g. "openai-codex")
|
||||
- path: str -- filesystem path where creds were found
|
||||
- label: str -- human-friendly description for the setup UI
|
||||
"""
|
||||
found: List[Dict[str, Any]] = []
|
||||
|
||||
# Codex CLI: ~/.codex/auth.json (or $CODEX_HOME/auth.json)
|
||||
try:
|
||||
codex_home = resolve_codex_home_path()
|
||||
codex_auth = codex_home / "auth.json"
|
||||
if codex_auth.is_file():
|
||||
data = json.loads(codex_auth.read_text())
|
||||
tokens = data.get("tokens", {})
|
||||
if isinstance(tokens, dict) and tokens.get("access_token"):
|
||||
found.append({
|
||||
"provider": "openai-codex",
|
||||
"path": str(codex_auth),
|
||||
"label": f"Codex CLI credentials found ({codex_auth})",
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return found
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# CLI Commands — login / logout
|
||||
# =============================================================================
|
||||
|
|
@ -970,21 +1361,218 @@ def _save_model_choice(model_id: str) -> None:
|
|||
|
||||
|
||||
def login_command(args) -> None:
|
||||
"""Run OAuth device code login for the selected provider."""
|
||||
provider_id = getattr(args, "provider", None) or "nous"
|
||||
"""Deprecated: use 'hermes model' or 'hermes setup' instead."""
|
||||
print("The 'hermes login' command has been removed.")
|
||||
print("Use 'hermes model' to select a provider and model,")
|
||||
print("or 'hermes setup' for full interactive setup.")
|
||||
raise SystemExit(0)
|
||||
|
||||
if provider_id not in PROVIDER_REGISTRY:
|
||||
print(f"Unknown provider: {provider_id}")
|
||||
print(f"Available: {', '.join(PROVIDER_REGISTRY.keys())}")
|
||||
raise SystemExit(1)
|
||||
|
||||
pconfig = PROVIDER_REGISTRY[provider_id]
|
||||
def _login_openai_codex(args, pconfig: ProviderConfig) -> None:
|
||||
"""OpenAI Codex login via device code flow (no Codex CLI required)."""
|
||||
codex_home = resolve_codex_home_path()
|
||||
|
||||
if provider_id == "nous":
|
||||
_login_nous(args, pconfig)
|
||||
else:
|
||||
print(f"Login for provider '{provider_id}' is not yet implemented.")
|
||||
raise SystemExit(1)
|
||||
# Check for existing valid credentials first
|
||||
try:
|
||||
existing = resolve_codex_runtime_credentials()
|
||||
print(f"Existing Codex credentials found at {codex_home / 'auth.json'}")
|
||||
try:
|
||||
reuse = input("Use existing credentials? [Y/n]: ").strip().lower()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
reuse = "y"
|
||||
if reuse in ("", "y", "yes"):
|
||||
creds = existing
|
||||
_save_codex_provider_state(creds)
|
||||
return
|
||||
except AuthError:
|
||||
pass
|
||||
|
||||
# No existing creds (or user declined) -- run device code flow
|
||||
print()
|
||||
print("Signing in to OpenAI Codex...")
|
||||
print()
|
||||
|
||||
creds = _codex_device_code_login()
|
||||
_save_codex_provider_state(creds)
|
||||
|
||||
|
||||
def _save_codex_provider_state(creds: Dict[str, Any]) -> None:
|
||||
"""Persist Codex provider state to auth store and config."""
|
||||
auth_state = {
|
||||
"auth_file": creds.get("auth_file"),
|
||||
"codex_home": creds.get("codex_home"),
|
||||
"last_refresh": creds.get("last_refresh"),
|
||||
"auth_mode": creds.get("auth_mode"),
|
||||
"source": creds.get("source"),
|
||||
}
|
||||
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
_save_provider_state(auth_store, "openai-codex", auth_state)
|
||||
saved_to = _save_auth_store(auth_store)
|
||||
|
||||
config_path = _update_config_for_provider("openai-codex", creds.get("base_url", DEFAULT_CODEX_BASE_URL))
|
||||
print()
|
||||
print("Login successful!")
|
||||
print(f" Auth state: {saved_to}")
|
||||
print(f" Config updated: {config_path} (model.provider=openai-codex)")
|
||||
|
||||
|
||||
def _codex_device_code_login() -> Dict[str, Any]:
|
||||
"""Run the OpenAI device code login flow and return credentials dict."""
|
||||
import time as _time
|
||||
|
||||
issuer = "https://auth.openai.com"
|
||||
client_id = CODEX_OAUTH_CLIENT_ID
|
||||
|
||||
# Step 1: Request device code
|
||||
try:
|
||||
with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
|
||||
resp = client.post(
|
||||
f"{issuer}/api/accounts/deviceauth/usercode",
|
||||
json={"client_id": client_id},
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
except Exception as exc:
|
||||
raise AuthError(
|
||||
f"Failed to request device code: {exc}",
|
||||
provider="openai-codex", code="device_code_request_failed",
|
||||
)
|
||||
|
||||
if resp.status_code != 200:
|
||||
raise AuthError(
|
||||
f"Device code request returned status {resp.status_code}.",
|
||||
provider="openai-codex", code="device_code_request_error",
|
||||
)
|
||||
|
||||
device_data = resp.json()
|
||||
user_code = device_data.get("user_code", "")
|
||||
device_auth_id = device_data.get("device_auth_id", "")
|
||||
poll_interval = max(3, int(device_data.get("interval", "5")))
|
||||
|
||||
if not user_code or not device_auth_id:
|
||||
raise AuthError(
|
||||
"Device code response missing required fields.",
|
||||
provider="openai-codex", code="device_code_incomplete",
|
||||
)
|
||||
|
||||
# Step 2: Show user the code
|
||||
print("To continue, follow these steps:\n")
|
||||
print(f" 1. Open this URL in your browser:")
|
||||
print(f" \033[94m{issuer}/codex/device\033[0m\n")
|
||||
print(f" 2. Enter this code:")
|
||||
print(f" \033[94m{user_code}\033[0m\n")
|
||||
print("Waiting for sign-in... (press Ctrl+C to cancel)")
|
||||
|
||||
# Step 3: Poll for authorization code
|
||||
max_wait = 15 * 60 # 15 minutes
|
||||
start = _time.monotonic()
|
||||
code_resp = None
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
|
||||
while _time.monotonic() - start < max_wait:
|
||||
_time.sleep(poll_interval)
|
||||
poll_resp = client.post(
|
||||
f"{issuer}/api/accounts/deviceauth/token",
|
||||
json={"device_auth_id": device_auth_id, "user_code": user_code},
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
|
||||
if poll_resp.status_code == 200:
|
||||
code_resp = poll_resp.json()
|
||||
break
|
||||
elif poll_resp.status_code in (403, 404):
|
||||
continue # User hasn't completed login yet
|
||||
else:
|
||||
raise AuthError(
|
||||
f"Device auth polling returned status {poll_resp.status_code}.",
|
||||
provider="openai-codex", code="device_code_poll_error",
|
||||
)
|
||||
except KeyboardInterrupt:
|
||||
print("\nLogin cancelled.")
|
||||
raise SystemExit(130)
|
||||
|
||||
if code_resp is None:
|
||||
raise AuthError(
|
||||
"Login timed out after 15 minutes.",
|
||||
provider="openai-codex", code="device_code_timeout",
|
||||
)
|
||||
|
||||
# Step 4: Exchange authorization code for tokens
|
||||
authorization_code = code_resp.get("authorization_code", "")
|
||||
code_verifier = code_resp.get("code_verifier", "")
|
||||
redirect_uri = f"{issuer}/deviceauth/callback"
|
||||
|
||||
if not authorization_code or not code_verifier:
|
||||
raise AuthError(
|
||||
"Device auth response missing authorization_code or code_verifier.",
|
||||
provider="openai-codex", code="device_code_incomplete_exchange",
|
||||
)
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
|
||||
token_resp = client.post(
|
||||
CODEX_OAUTH_TOKEN_URL,
|
||||
data={
|
||||
"grant_type": "authorization_code",
|
||||
"code": authorization_code,
|
||||
"redirect_uri": redirect_uri,
|
||||
"client_id": client_id,
|
||||
"code_verifier": code_verifier,
|
||||
},
|
||||
headers={"Content-Type": "application/x-www-form-urlencoded"},
|
||||
)
|
||||
except Exception as exc:
|
||||
raise AuthError(
|
||||
f"Token exchange failed: {exc}",
|
||||
provider="openai-codex", code="token_exchange_failed",
|
||||
)
|
||||
|
||||
if token_resp.status_code != 200:
|
||||
raise AuthError(
|
||||
f"Token exchange returned status {token_resp.status_code}.",
|
||||
provider="openai-codex", code="token_exchange_error",
|
||||
)
|
||||
|
||||
tokens = token_resp.json()
|
||||
access_token = tokens.get("access_token", "")
|
||||
refresh_token = tokens.get("refresh_token", "")
|
||||
|
||||
if not access_token:
|
||||
raise AuthError(
|
||||
"Token exchange did not return an access_token.",
|
||||
provider="openai-codex", code="token_exchange_no_access_token",
|
||||
)
|
||||
|
||||
# Step 5: Persist tokens to ~/.codex/auth.json
|
||||
codex_home = resolve_codex_home_path()
|
||||
codex_home.mkdir(parents=True, exist_ok=True)
|
||||
auth_path = codex_home / "auth.json"
|
||||
|
||||
payload = {
|
||||
"tokens": {
|
||||
"access_token": access_token,
|
||||
"refresh_token": refresh_token,
|
||||
},
|
||||
"last_refresh": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
|
||||
}
|
||||
_persist_codex_auth_payload(auth_path, payload, lock_held=False)
|
||||
|
||||
base_url = (
|
||||
os.getenv("HERMES_CODEX_BASE_URL", "").strip().rstrip("/")
|
||||
or DEFAULT_CODEX_BASE_URL
|
||||
)
|
||||
|
||||
return {
|
||||
"api_key": access_token,
|
||||
"base_url": base_url,
|
||||
"auth_file": str(auth_path),
|
||||
"codex_home": str(codex_home),
|
||||
"last_refresh": payload["last_refresh"],
|
||||
"auth_mode": "chatgpt",
|
||||
"source": "device-code",
|
||||
}
|
||||
|
||||
|
||||
def _login_nous(args, pconfig: ProviderConfig) -> None:
|
||||
|
|
@ -1168,6 +1756,6 @@ def logout_command(args) -> None:
|
|||
if os.getenv("OPENROUTER_API_KEY"):
|
||||
print("Hermes will use OpenRouter for inference.")
|
||||
else:
|
||||
print("Run `hermes login` or configure an API key to use Hermes.")
|
||||
print("Run `hermes model` or configure an API key to use Hermes.")
|
||||
else:
|
||||
print(f"No auth state found for {provider_name}.")
|
||||
|
|
|
|||
144
hermes_cli/codex_models.py
Normal file
144
hermes_cli/codex_models.py
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
"""Codex model discovery from API, local cache, and config."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
|
||||
from hermes_cli.auth import resolve_codex_home_path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_CODEX_MODELS: List[str] = [
|
||||
"gpt-5.3-codex",
|
||||
"gpt-5.2-codex",
|
||||
"gpt-5.1-codex-max",
|
||||
"gpt-5.1-codex-mini",
|
||||
]
|
||||
|
||||
|
||||
def _fetch_models_from_api(access_token: str) -> List[str]:
|
||||
"""Fetch available models from the Codex API. Returns visible models sorted by priority."""
|
||||
try:
|
||||
import httpx
|
||||
resp = httpx.get(
|
||||
"https://chatgpt.com/backend-api/codex/models?client_version=1.0.0",
|
||||
headers={"Authorization": f"Bearer {access_token}"},
|
||||
timeout=10,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return []
|
||||
data = resp.json()
|
||||
entries = data.get("models", []) if isinstance(data, dict) else []
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to fetch Codex models from API: %s", exc)
|
||||
return []
|
||||
|
||||
sortable = []
|
||||
for item in entries:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
slug = item.get("slug")
|
||||
if not isinstance(slug, str) or not slug.strip():
|
||||
continue
|
||||
slug = slug.strip()
|
||||
if item.get("supported_in_api") is False:
|
||||
continue
|
||||
visibility = item.get("visibility", "")
|
||||
if isinstance(visibility, str) and visibility.strip().lower() == "hide":
|
||||
continue
|
||||
priority = item.get("priority")
|
||||
rank = int(priority) if isinstance(priority, (int, float)) else 10_000
|
||||
sortable.append((rank, slug))
|
||||
|
||||
sortable.sort(key=lambda x: (x[0], x[1]))
|
||||
return [slug for _, slug in sortable]
|
||||
|
||||
|
||||
def _read_default_model(codex_home: Path) -> Optional[str]:
|
||||
config_path = codex_home / "config.toml"
|
||||
if not config_path.exists():
|
||||
return None
|
||||
try:
|
||||
import tomllib
|
||||
except Exception:
|
||||
return None
|
||||
try:
|
||||
payload = tomllib.loads(config_path.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
return None
|
||||
model = payload.get("model") if isinstance(payload, dict) else None
|
||||
if isinstance(model, str) and model.strip():
|
||||
return model.strip()
|
||||
return None
|
||||
|
||||
|
||||
def _read_cache_models(codex_home: Path) -> List[str]:
|
||||
cache_path = codex_home / "models_cache.json"
|
||||
if not cache_path.exists():
|
||||
return []
|
||||
try:
|
||||
raw = json.loads(cache_path.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
entries = raw.get("models") if isinstance(raw, dict) else None
|
||||
sortable = []
|
||||
if isinstance(entries, list):
|
||||
for item in entries:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
slug = item.get("slug")
|
||||
if not isinstance(slug, str) or not slug.strip():
|
||||
continue
|
||||
slug = slug.strip()
|
||||
if "codex" not in slug.lower():
|
||||
continue
|
||||
if item.get("supported_in_api") is False:
|
||||
continue
|
||||
visibility = item.get("visibility")
|
||||
if isinstance(visibility, str) and visibility.strip().lower() == "hidden":
|
||||
continue
|
||||
priority = item.get("priority")
|
||||
rank = int(priority) if isinstance(priority, (int, float)) else 10_000
|
||||
sortable.append((rank, slug))
|
||||
|
||||
sortable.sort(key=lambda item: (item[0], item[1]))
|
||||
deduped: List[str] = []
|
||||
for _, slug in sortable:
|
||||
if slug not in deduped:
|
||||
deduped.append(slug)
|
||||
return deduped
|
||||
|
||||
|
||||
def get_codex_model_ids(access_token: Optional[str] = None) -> List[str]:
|
||||
"""Return available Codex model IDs, trying API first, then local sources.
|
||||
|
||||
Resolution order: API (live, if token provided) > config.toml default >
|
||||
local cache > hardcoded defaults.
|
||||
"""
|
||||
codex_home = resolve_codex_home_path()
|
||||
ordered: List[str] = []
|
||||
|
||||
# Try live API if we have a token
|
||||
if access_token:
|
||||
api_models = _fetch_models_from_api(access_token)
|
||||
if api_models:
|
||||
return api_models
|
||||
|
||||
# Fall back to local sources
|
||||
default_model = _read_default_model(codex_home)
|
||||
if default_model:
|
||||
ordered.append(default_model)
|
||||
|
||||
for model_id in _read_cache_models(codex_home):
|
||||
if model_id not in ordered:
|
||||
ordered.append(model_id)
|
||||
|
||||
for model_id in DEFAULT_CODEX_MODELS:
|
||||
if model_id not in ordered:
|
||||
ordered.append(model_id)
|
||||
|
||||
return ordered
|
||||
|
|
@ -26,6 +26,8 @@ COMMANDS = {
|
|||
"/skills": "Search, install, inspect, or manage skills from online registries",
|
||||
"/platforms": "Show gateway/messaging platform status",
|
||||
"/verbose": "Cycle tool progress display: off → new → all → verbose",
|
||||
"/compress": "Manually compress conversation context (flush memories + summarize)",
|
||||
"/usage": "Show token usage for the current session",
|
||||
"/quit": "Exit the CLI (also: /exit, /q)",
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -175,6 +175,36 @@ def run_doctor(args):
|
|||
else:
|
||||
check_warn("config.yaml not found", "(using defaults)")
|
||||
|
||||
# =========================================================================
|
||||
# Check: Auth providers
|
||||
# =========================================================================
|
||||
print()
|
||||
print(color("◆ Auth Providers", Colors.CYAN, Colors.BOLD))
|
||||
|
||||
try:
|
||||
from hermes_cli.auth import get_nous_auth_status, get_codex_auth_status
|
||||
|
||||
nous_status = get_nous_auth_status()
|
||||
if nous_status.get("logged_in"):
|
||||
check_ok("Nous Portal auth", "(logged in)")
|
||||
else:
|
||||
check_warn("Nous Portal auth", "(not logged in)")
|
||||
|
||||
codex_status = get_codex_auth_status()
|
||||
if codex_status.get("logged_in"):
|
||||
check_ok("OpenAI Codex auth", "(logged in)")
|
||||
else:
|
||||
check_warn("OpenAI Codex auth", "(not logged in)")
|
||||
if codex_status.get("error"):
|
||||
check_info(codex_status["error"])
|
||||
except Exception as e:
|
||||
check_warn("Auth provider status", f"(could not check: {e})")
|
||||
|
||||
if shutil.which("codex"):
|
||||
check_ok("codex CLI")
|
||||
else:
|
||||
check_warn("codex CLI not found", "(required for openai-codex login)")
|
||||
|
||||
# =========================================================================
|
||||
# Check: Directory structure
|
||||
# =========================================================================
|
||||
|
|
|
|||
|
|
@ -12,7 +12,6 @@ Usage:
|
|||
hermes gateway install # Install gateway service
|
||||
hermes gateway uninstall # Uninstall gateway service
|
||||
hermes setup # Interactive setup wizard
|
||||
hermes login # Authenticate with Nous Portal (or other providers)
|
||||
hermes logout # Clear stored authentication
|
||||
hermes status # Show status of all components
|
||||
hermes cron # Manage cron jobs
|
||||
|
|
@ -60,6 +59,7 @@ logger = logging.getLogger(__name__)
|
|||
def _has_any_provider_configured() -> bool:
|
||||
"""Check if at least one inference provider is usable."""
|
||||
from hermes_cli.config import get_env_path, get_hermes_home
|
||||
from hermes_cli.auth import get_auth_status
|
||||
|
||||
# Check env vars (may be set by .env or shell).
|
||||
# OPENAI_BASE_URL alone counts — local models (vLLM, llama.cpp, etc.)
|
||||
|
|
@ -91,8 +91,8 @@ def _has_any_provider_configured() -> bool:
|
|||
auth = json.loads(auth_file.read_text())
|
||||
active = auth.get("active_provider")
|
||||
if active:
|
||||
state = auth.get("providers", {}).get(active, {})
|
||||
if state.get("access_token") or state.get("refresh_token"):
|
||||
status = get_auth_status(active)
|
||||
if status.get("logged_in"):
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
|
|
@ -289,7 +289,7 @@ def cmd_model(args):
|
|||
resolve_provider, get_provider_auth_state, PROVIDER_REGISTRY,
|
||||
_prompt_model_selection, _save_model_choice, _update_config_for_provider,
|
||||
resolve_nous_runtime_credentials, fetch_nous_models, AuthError, format_auth_error,
|
||||
_login_nous, ProviderConfig,
|
||||
_login_nous,
|
||||
)
|
||||
from hermes_cli.config import load_config, save_config, get_env_value, save_env_value
|
||||
|
||||
|
|
@ -312,7 +312,12 @@ def cmd_model(args):
|
|||
or config_provider
|
||||
or "auto"
|
||||
)
|
||||
active = resolve_provider(effective_provider)
|
||||
try:
|
||||
active = resolve_provider(effective_provider)
|
||||
except AuthError as exc:
|
||||
warning = format_auth_error(exc)
|
||||
print(f"Warning: {warning} Falling back to auto provider detection.")
|
||||
active = resolve_provider("auto")
|
||||
|
||||
# Detect custom endpoint
|
||||
if active == "openrouter" and get_env_value("OPENAI_BASE_URL"):
|
||||
|
|
@ -321,6 +326,7 @@ def cmd_model(args):
|
|||
provider_labels = {
|
||||
"openrouter": "OpenRouter",
|
||||
"nous": "Nous Portal",
|
||||
"openai-codex": "OpenAI Codex",
|
||||
"custom": "Custom endpoint",
|
||||
}
|
||||
active_label = provider_labels.get(active, active)
|
||||
|
|
@ -334,11 +340,12 @@ def cmd_model(args):
|
|||
providers = [
|
||||
("openrouter", "OpenRouter (100+ models, pay-per-use)"),
|
||||
("nous", "Nous Portal (Nous Research subscription)"),
|
||||
("openai-codex", "OpenAI Codex"),
|
||||
("custom", "Custom endpoint (self-hosted / VLLM / etc.)"),
|
||||
]
|
||||
|
||||
# Reorder so the active provider is at the top
|
||||
active_key = active if active in ("openrouter", "nous") else "custom"
|
||||
active_key = active if active in ("openrouter", "nous", "openai-codex") else "custom"
|
||||
ordered = []
|
||||
for key, label in providers:
|
||||
if key == active_key:
|
||||
|
|
@ -359,6 +366,8 @@ def cmd_model(args):
|
|||
_model_flow_openrouter(config, current_model)
|
||||
elif selected_provider == "nous":
|
||||
_model_flow_nous(config, current_model)
|
||||
elif selected_provider == "openai-codex":
|
||||
_model_flow_openai_codex(config, current_model)
|
||||
elif selected_provider == "custom":
|
||||
_model_flow_custom(config)
|
||||
|
||||
|
|
@ -512,6 +521,53 @@ def _model_flow_nous(config, current_model=""):
|
|||
print("No change.")
|
||||
|
||||
|
||||
def _model_flow_openai_codex(config, current_model=""):
|
||||
"""OpenAI Codex provider: ensure logged in, then pick model."""
|
||||
from hermes_cli.auth import (
|
||||
get_codex_auth_status, _prompt_model_selection, _save_model_choice,
|
||||
_update_config_for_provider, _login_openai_codex,
|
||||
PROVIDER_REGISTRY, DEFAULT_CODEX_BASE_URL,
|
||||
)
|
||||
from hermes_cli.codex_models import get_codex_model_ids
|
||||
from hermes_cli.config import get_env_value, save_env_value
|
||||
import argparse
|
||||
|
||||
status = get_codex_auth_status()
|
||||
if not status.get("logged_in"):
|
||||
print("Not logged into OpenAI Codex. Starting login...")
|
||||
print()
|
||||
try:
|
||||
mock_args = argparse.Namespace()
|
||||
_login_openai_codex(mock_args, PROVIDER_REGISTRY["openai-codex"])
|
||||
except SystemExit:
|
||||
print("Login cancelled or failed.")
|
||||
return
|
||||
except Exception as exc:
|
||||
print(f"Login failed: {exc}")
|
||||
return
|
||||
|
||||
_codex_token = None
|
||||
try:
|
||||
from hermes_cli.auth import resolve_codex_runtime_credentials
|
||||
_codex_creds = resolve_codex_runtime_credentials()
|
||||
_codex_token = _codex_creds.get("api_key")
|
||||
except Exception:
|
||||
pass
|
||||
codex_models = get_codex_model_ids(access_token=_codex_token)
|
||||
|
||||
selected = _prompt_model_selection(codex_models, current_model=current_model)
|
||||
if selected:
|
||||
_save_model_choice(selected)
|
||||
_update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
|
||||
# Clear custom endpoint env vars that would otherwise override Codex.
|
||||
if get_env_value("OPENAI_BASE_URL"):
|
||||
save_env_value("OPENAI_BASE_URL", "")
|
||||
save_env_value("OPENAI_API_KEY", "")
|
||||
print(f"Default model set to: {selected} (via OpenAI Codex)")
|
||||
else:
|
||||
print("No change.")
|
||||
|
||||
|
||||
def _model_flow_custom(config):
|
||||
"""Custom endpoint: collect URL, API key, and model name."""
|
||||
from hermes_cli.auth import _save_model_choice, deactivate_provider
|
||||
|
|
@ -777,8 +833,8 @@ def cmd_update(args):
|
|||
pass # No systemd (macOS, WSL1, etc.) — skip silently
|
||||
|
||||
print()
|
||||
print("Tip: You can now log in with Nous Portal for inference:")
|
||||
print(" hermes login # Authenticate with Nous Portal")
|
||||
print("Tip: You can now select a provider and model:")
|
||||
print(" hermes model # Select provider and model")
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"✗ Update failed: {e}")
|
||||
|
|
@ -798,7 +854,6 @@ Examples:
|
|||
hermes --continue Resume the most recent session
|
||||
hermes --resume <session_id> Resume a specific session
|
||||
hermes setup Run setup wizard
|
||||
hermes login Authenticate with an inference provider
|
||||
hermes logout Clear stored authentication
|
||||
hermes model Select default model
|
||||
hermes config View configuration
|
||||
|
|
@ -857,7 +912,7 @@ For more help on a command:
|
|||
)
|
||||
chat_parser.add_argument(
|
||||
"--provider",
|
||||
choices=["auto", "openrouter", "nous"],
|
||||
choices=["auto", "openrouter", "nous", "openai-codex"],
|
||||
default=None,
|
||||
help="Inference provider (default: auto)"
|
||||
)
|
||||
|
|
@ -966,9 +1021,9 @@ For more help on a command:
|
|||
)
|
||||
login_parser.add_argument(
|
||||
"--provider",
|
||||
choices=["nous"],
|
||||
choices=["nous", "openai-codex"],
|
||||
default=None,
|
||||
help="Provider to authenticate with (default: interactive selection)"
|
||||
help="Provider to authenticate with (default: nous)"
|
||||
)
|
||||
login_parser.add_argument(
|
||||
"--portal-url",
|
||||
|
|
@ -1020,7 +1075,7 @@ For more help on a command:
|
|||
)
|
||||
logout_parser.add_argument(
|
||||
"--provider",
|
||||
choices=["nous"],
|
||||
choices=["nous", "openai-codex"],
|
||||
default=None,
|
||||
help="Provider to log out from (default: active provider)"
|
||||
)
|
||||
|
|
|
|||
149
hermes_cli/runtime_provider.py
Normal file
149
hermes_cli/runtime_provider.py
Normal file
|
|
@ -0,0 +1,149 @@
|
|||
"""Shared runtime provider resolution for CLI, gateway, cron, and helpers."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from hermes_cli.auth import (
|
||||
AuthError,
|
||||
format_auth_error,
|
||||
resolve_provider,
|
||||
resolve_nous_runtime_credentials,
|
||||
resolve_codex_runtime_credentials,
|
||||
)
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
|
||||
|
||||
def _get_model_config() -> Dict[str, Any]:
|
||||
config = load_config()
|
||||
model_cfg = config.get("model")
|
||||
if isinstance(model_cfg, dict):
|
||||
return dict(model_cfg)
|
||||
if isinstance(model_cfg, str) and model_cfg.strip():
|
||||
return {"default": model_cfg.strip()}
|
||||
return {}
|
||||
|
||||
|
||||
def resolve_requested_provider(requested: Optional[str] = None) -> str:
|
||||
"""Resolve provider request from explicit arg, env, then config."""
|
||||
if requested and requested.strip():
|
||||
return requested.strip().lower()
|
||||
|
||||
env_provider = os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
|
||||
if env_provider:
|
||||
return env_provider
|
||||
|
||||
model_cfg = _get_model_config()
|
||||
cfg_provider = model_cfg.get("provider")
|
||||
if isinstance(cfg_provider, str) and cfg_provider.strip():
|
||||
return cfg_provider.strip().lower()
|
||||
|
||||
return "auto"
|
||||
|
||||
|
||||
def _resolve_openrouter_runtime(
|
||||
*,
|
||||
requested_provider: str,
|
||||
explicit_api_key: Optional[str] = None,
|
||||
explicit_base_url: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
model_cfg = _get_model_config()
|
||||
cfg_base_url = model_cfg.get("base_url") if isinstance(model_cfg.get("base_url"), str) else ""
|
||||
cfg_provider = model_cfg.get("provider") if isinstance(model_cfg.get("provider"), str) else ""
|
||||
requested_norm = (requested_provider or "").strip().lower()
|
||||
cfg_provider = cfg_provider.strip().lower()
|
||||
|
||||
env_openai_base_url = os.getenv("OPENAI_BASE_URL", "").strip()
|
||||
env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip()
|
||||
|
||||
use_config_base_url = False
|
||||
if requested_norm == "auto":
|
||||
if cfg_base_url.strip() and not explicit_base_url and not env_openai_base_url:
|
||||
if not cfg_provider or cfg_provider == "auto":
|
||||
use_config_base_url = True
|
||||
|
||||
base_url = (
|
||||
(explicit_base_url or "").strip()
|
||||
or env_openai_base_url
|
||||
or (cfg_base_url.strip() if use_config_base_url else "")
|
||||
or env_openrouter_base_url
|
||||
or OPENROUTER_BASE_URL
|
||||
).rstrip("/")
|
||||
|
||||
api_key = (
|
||||
explicit_api_key
|
||||
or os.getenv("OPENAI_API_KEY")
|
||||
or os.getenv("OPENROUTER_API_KEY")
|
||||
or ""
|
||||
)
|
||||
|
||||
source = "explicit" if (explicit_api_key or explicit_base_url) else "env/config"
|
||||
|
||||
return {
|
||||
"provider": "openrouter",
|
||||
"api_mode": "chat_completions",
|
||||
"base_url": base_url,
|
||||
"api_key": api_key,
|
||||
"source": source,
|
||||
}
|
||||
|
||||
|
||||
def resolve_runtime_provider(
|
||||
*,
|
||||
requested: Optional[str] = None,
|
||||
explicit_api_key: Optional[str] = None,
|
||||
explicit_base_url: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Resolve runtime provider credentials for agent execution."""
|
||||
requested_provider = resolve_requested_provider(requested)
|
||||
|
||||
provider = resolve_provider(
|
||||
requested_provider,
|
||||
explicit_api_key=explicit_api_key,
|
||||
explicit_base_url=explicit_base_url,
|
||||
)
|
||||
|
||||
if provider == "nous":
|
||||
creds = resolve_nous_runtime_credentials(
|
||||
min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
|
||||
timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
|
||||
)
|
||||
return {
|
||||
"provider": "nous",
|
||||
"api_mode": "chat_completions",
|
||||
"base_url": creds.get("base_url", "").rstrip("/"),
|
||||
"api_key": creds.get("api_key", ""),
|
||||
"source": creds.get("source", "portal"),
|
||||
"expires_at": creds.get("expires_at"),
|
||||
"requested_provider": requested_provider,
|
||||
}
|
||||
|
||||
if provider == "openai-codex":
|
||||
creds = resolve_codex_runtime_credentials()
|
||||
return {
|
||||
"provider": "openai-codex",
|
||||
"api_mode": "codex_responses",
|
||||
"base_url": creds.get("base_url", "").rstrip("/"),
|
||||
"api_key": creds.get("api_key", ""),
|
||||
"source": creds.get("source", "codex-auth-json"),
|
||||
"auth_file": creds.get("auth_file"),
|
||||
"codex_home": creds.get("codex_home"),
|
||||
"last_refresh": creds.get("last_refresh"),
|
||||
"requested_provider": requested_provider,
|
||||
}
|
||||
|
||||
runtime = _resolve_openrouter_runtime(
|
||||
requested_provider=requested_provider,
|
||||
explicit_api_key=explicit_api_key,
|
||||
explicit_base_url=explicit_base_url,
|
||||
)
|
||||
runtime["requested_provider"] = requested_provider
|
||||
return runtime
|
||||
|
||||
|
||||
def format_runtime_provider_error(error: Exception) -> str:
|
||||
if isinstance(error, AuthError):
|
||||
return format_auth_error(error)
|
||||
return str(error)
|
||||
|
|
@ -620,11 +620,24 @@ def run_setup_wizard(args):
|
|||
get_active_provider, get_provider_auth_state, PROVIDER_REGISTRY,
|
||||
format_auth_error, AuthError, fetch_nous_models,
|
||||
resolve_nous_runtime_credentials, _update_config_for_provider,
|
||||
_login_openai_codex, get_codex_auth_status, DEFAULT_CODEX_BASE_URL,
|
||||
detect_external_credentials,
|
||||
)
|
||||
existing_custom = get_env_value("OPENAI_BASE_URL")
|
||||
existing_or = get_env_value("OPENROUTER_API_KEY")
|
||||
active_oauth = get_active_provider()
|
||||
|
||||
# Detect credentials from other CLI tools
|
||||
detected_creds = detect_external_credentials()
|
||||
if detected_creds:
|
||||
print_info("Detected existing credentials:")
|
||||
for cred in detected_creds:
|
||||
if cred["provider"] == "openai-codex":
|
||||
print_success(f" * {cred['label']} -- select \"OpenAI Codex\" to use it")
|
||||
else:
|
||||
print_info(f" * {cred['label']}")
|
||||
print()
|
||||
|
||||
# Detect if any provider is already configured
|
||||
has_any_provider = bool(active_oauth or existing_custom or existing_or)
|
||||
|
||||
|
|
@ -640,6 +653,7 @@ def run_setup_wizard(args):
|
|||
|
||||
provider_choices = [
|
||||
"Login with Nous Portal (Nous Research subscription)",
|
||||
"Login with OpenAI Codex",
|
||||
"OpenRouter API key (100+ models, pay-per-use)",
|
||||
"Custom OpenAI-compatible endpoint (self-hosted / VLLM / etc.)",
|
||||
]
|
||||
|
|
@ -647,7 +661,7 @@ def run_setup_wizard(args):
|
|||
provider_choices.append(keep_label)
|
||||
|
||||
# Default to "Keep current" if a provider exists, otherwise OpenRouter (most common)
|
||||
default_provider = len(provider_choices) - 1 if has_any_provider else 1
|
||||
default_provider = len(provider_choices) - 1 if has_any_provider else 2
|
||||
|
||||
if not has_any_provider:
|
||||
print_warning("An inference provider is required for Hermes to work.")
|
||||
|
|
@ -656,7 +670,7 @@ def run_setup_wizard(args):
|
|||
provider_idx = prompt_choice("Select your inference provider:", provider_choices, default_provider)
|
||||
|
||||
# Track which provider was selected for model step
|
||||
selected_provider = None # "nous", "openrouter", "custom", or None (keep)
|
||||
selected_provider = None # "nous", "openai-codex", "openrouter", "custom", or None (keep)
|
||||
nous_models = [] # populated if Nous login succeeds
|
||||
|
||||
if provider_idx == 0: # Nous Portal
|
||||
|
|
@ -692,14 +706,38 @@ def run_setup_wizard(args):
|
|||
|
||||
except SystemExit:
|
||||
print_warning("Nous Portal login was cancelled or failed.")
|
||||
print_info("You can try again later with: hermes login")
|
||||
print_info("You can try again later with: hermes model")
|
||||
selected_provider = None
|
||||
except Exception as e:
|
||||
print_error(f"Login failed: {e}")
|
||||
print_info("You can try again later with: hermes login")
|
||||
print_info("You can try again later with: hermes model")
|
||||
selected_provider = None
|
||||
|
||||
elif provider_idx == 1: # OpenRouter
|
||||
elif provider_idx == 1: # OpenAI Codex
|
||||
selected_provider = "openai-codex"
|
||||
print()
|
||||
print_header("OpenAI Codex Login")
|
||||
print()
|
||||
|
||||
try:
|
||||
import argparse
|
||||
mock_args = argparse.Namespace()
|
||||
_login_openai_codex(mock_args, PROVIDER_REGISTRY["openai-codex"])
|
||||
# Clear custom endpoint vars that would override provider routing.
|
||||
if existing_custom:
|
||||
save_env_value("OPENAI_BASE_URL", "")
|
||||
save_env_value("OPENAI_API_KEY", "")
|
||||
_update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
|
||||
except SystemExit:
|
||||
print_warning("OpenAI Codex login was cancelled or failed.")
|
||||
print_info("You can try again later with: hermes model")
|
||||
selected_provider = None
|
||||
except Exception as e:
|
||||
print_error(f"Login failed: {e}")
|
||||
print_info("You can try again later with: hermes model")
|
||||
selected_provider = None
|
||||
|
||||
elif provider_idx == 2: # OpenRouter
|
||||
selected_provider = "openrouter"
|
||||
print()
|
||||
print_header("OpenRouter API Key")
|
||||
|
|
@ -726,7 +764,7 @@ def run_setup_wizard(args):
|
|||
save_env_value("OPENAI_BASE_URL", "")
|
||||
save_env_value("OPENAI_API_KEY", "")
|
||||
|
||||
elif provider_idx == 2: # Custom endpoint
|
||||
elif provider_idx == 3: # Custom endpoint
|
||||
selected_provider = "custom"
|
||||
print()
|
||||
print_header("Custom OpenAI-Compatible Endpoint")
|
||||
|
|
@ -753,14 +791,14 @@ def run_setup_wizard(args):
|
|||
config['model'] = model_name
|
||||
save_env_value("LLM_MODEL", model_name)
|
||||
print_success("Custom endpoint configured")
|
||||
# else: provider_idx == 3 (Keep current) — only shown when a provider already exists
|
||||
# else: provider_idx == 4 (Keep current) — only shown when a provider already exists
|
||||
|
||||
# =========================================================================
|
||||
# Step 1b: OpenRouter API Key for tools (if not already set)
|
||||
# =========================================================================
|
||||
# Tools (vision, web, MoA) use OpenRouter independently of the main provider.
|
||||
# Prompt for OpenRouter key if not set and a non-OpenRouter provider was chosen.
|
||||
if selected_provider in ("nous", "custom") and not get_env_value("OPENROUTER_API_KEY"):
|
||||
if selected_provider in ("nous", "openai-codex", "custom") and not get_env_value("OPENROUTER_API_KEY"):
|
||||
print()
|
||||
print_header("OpenRouter API Key (for tools)")
|
||||
print_info("Tools like vision analysis, web search, and MoA use OpenRouter")
|
||||
|
|
@ -806,6 +844,33 @@ def run_setup_wizard(args):
|
|||
config['model'] = custom
|
||||
save_env_value("LLM_MODEL", custom)
|
||||
# else: keep current
|
||||
elif selected_provider == "openai-codex":
|
||||
from hermes_cli.codex_models import get_codex_model_ids
|
||||
# Try to get the access token for live model discovery
|
||||
_codex_token = None
|
||||
try:
|
||||
from hermes_cli.auth import resolve_codex_runtime_credentials
|
||||
_codex_creds = resolve_codex_runtime_credentials()
|
||||
_codex_token = _codex_creds.get("api_key")
|
||||
except Exception:
|
||||
pass
|
||||
codex_models = get_codex_model_ids(access_token=_codex_token)
|
||||
model_choices = [f"{m}" for m in codex_models]
|
||||
model_choices.append("Custom model")
|
||||
model_choices.append(f"Keep current ({current_model})")
|
||||
|
||||
keep_idx = len(model_choices) - 1
|
||||
model_idx = prompt_choice("Select default model:", model_choices, keep_idx)
|
||||
|
||||
if model_idx < len(codex_models):
|
||||
config['model'] = codex_models[model_idx]
|
||||
save_env_value("LLM_MODEL", codex_models[model_idx])
|
||||
elif model_idx == len(codex_models):
|
||||
custom = prompt("Enter model name")
|
||||
if custom:
|
||||
config['model'] = custom
|
||||
save_env_value("LLM_MODEL", custom)
|
||||
_update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
|
||||
else:
|
||||
# Static list for OpenRouter / fallback (from canonical list)
|
||||
from hermes_cli.models import model_ids, menu_labels
|
||||
|
|
|
|||
|
|
@ -101,15 +101,17 @@ def show_status(args):
|
|||
print(color("◆ Auth Providers", Colors.CYAN, Colors.BOLD))
|
||||
|
||||
try:
|
||||
from hermes_cli.auth import get_nous_auth_status
|
||||
from hermes_cli.auth import get_nous_auth_status, get_codex_auth_status
|
||||
nous_status = get_nous_auth_status()
|
||||
codex_status = get_codex_auth_status()
|
||||
except Exception:
|
||||
nous_status = {}
|
||||
codex_status = {}
|
||||
|
||||
nous_logged_in = bool(nous_status.get("logged_in"))
|
||||
print(
|
||||
f" {'Nous Portal':<12} {check_mark(nous_logged_in)} "
|
||||
f"{'logged in' if nous_logged_in else 'not logged in (run: hermes login)'}"
|
||||
f"{'logged in' if nous_logged_in else 'not logged in (run: hermes model)'}"
|
||||
)
|
||||
if nous_logged_in:
|
||||
portal_url = nous_status.get("portal_base_url") or "(unknown)"
|
||||
|
|
@ -121,6 +123,20 @@ def show_status(args):
|
|||
print(f" Key exp: {key_exp}")
|
||||
print(f" Refresh: {refresh_label}")
|
||||
|
||||
codex_logged_in = bool(codex_status.get("logged_in"))
|
||||
print(
|
||||
f" {'OpenAI Codex':<12} {check_mark(codex_logged_in)} "
|
||||
f"{'logged in' if codex_logged_in else 'not logged in (run: hermes model)'}"
|
||||
)
|
||||
codex_auth_file = codex_status.get("auth_file")
|
||||
if codex_auth_file:
|
||||
print(f" Auth file: {codex_auth_file}")
|
||||
codex_last_refresh = _format_iso_timestamp(codex_status.get("last_refresh"))
|
||||
if codex_status.get("last_refresh"):
|
||||
print(f" Refreshed: {codex_last_refresh}")
|
||||
if codex_status.get("error") and not codex_logged_in:
|
||||
print(f" Error: {codex_status.get('error')}")
|
||||
|
||||
# =========================================================================
|
||||
# Terminal Configuration
|
||||
# =========================================================================
|
||||
|
|
|
|||
1213
run_agent.py
1213
run_agent.py
File diff suppressed because it is too large
Load diff
|
|
@ -723,7 +723,7 @@ setup_path() {
|
|||
PATH_LINE='export PATH="$HOME/.local/bin:$PATH"'
|
||||
|
||||
for SHELL_CONFIG in "${SHELL_CONFIGS[@]}"; do
|
||||
if ! grep -q '\.local/bin' "$SHELL_CONFIG" 2>/dev/null; then
|
||||
if ! grep -v '^[[:space:]]*#' "$SHELL_CONFIG" 2>/dev/null | grep -qE 'PATH=.*\.local/bin'; then
|
||||
echo "" >> "$SHELL_CONFIG"
|
||||
echo "# Hermes Agent — ensure ~/.local/bin is on PATH" >> "$SHELL_CONFIG"
|
||||
echo "$PATH_LINE" >> "$SHELL_CONFIG"
|
||||
|
|
|
|||
168
tests/agent/test_auxiliary_client.py
Normal file
168
tests/agent/test_auxiliary_client.py
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
"""Tests for agent.auxiliary_client resolution chain, especially the Codex fallback."""
|
||||
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.auxiliary_client import (
|
||||
get_text_auxiliary_client,
|
||||
get_vision_auxiliary_client,
|
||||
auxiliary_max_tokens_param,
|
||||
_read_codex_access_token,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _clean_env(monkeypatch):
|
||||
"""Strip provider env vars so each test starts clean."""
|
||||
for key in (
|
||||
"OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
|
||||
"OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL",
|
||||
):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def codex_auth_dir(tmp_path, monkeypatch):
|
||||
"""Provide a writable ~/.codex/ directory with a valid auth.json."""
|
||||
codex_dir = tmp_path / ".codex"
|
||||
codex_dir.mkdir()
|
||||
auth_file = codex_dir / "auth.json"
|
||||
auth_file.write_text(json.dumps({
|
||||
"tokens": {
|
||||
"access_token": "codex-test-token-abc123",
|
||||
"refresh_token": "codex-refresh-xyz",
|
||||
}
|
||||
}))
|
||||
monkeypatch.setattr(
|
||||
"agent.auxiliary_client._read_codex_access_token",
|
||||
lambda: "codex-test-token-abc123",
|
||||
)
|
||||
return codex_dir
|
||||
|
||||
|
||||
class TestReadCodexAccessToken:
|
||||
def test_valid_auth_file(self, tmp_path):
|
||||
codex_dir = tmp_path / ".codex"
|
||||
codex_dir.mkdir()
|
||||
auth = codex_dir / "auth.json"
|
||||
auth.write_text(json.dumps({
|
||||
"tokens": {"access_token": "tok-123", "refresh_token": "r-456"}
|
||||
}))
|
||||
with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
|
||||
result = _read_codex_access_token()
|
||||
assert result == "tok-123"
|
||||
|
||||
def test_missing_file_returns_none(self, tmp_path):
|
||||
with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
|
||||
result = _read_codex_access_token()
|
||||
assert result is None
|
||||
|
||||
def test_empty_token_returns_none(self, tmp_path):
|
||||
codex_dir = tmp_path / ".codex"
|
||||
codex_dir.mkdir()
|
||||
auth = codex_dir / "auth.json"
|
||||
auth.write_text(json.dumps({"tokens": {"access_token": " "}}))
|
||||
with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
|
||||
result = _read_codex_access_token()
|
||||
assert result is None
|
||||
|
||||
def test_malformed_json_returns_none(self, tmp_path):
|
||||
codex_dir = tmp_path / ".codex"
|
||||
codex_dir.mkdir()
|
||||
(codex_dir / "auth.json").write_text("{bad json")
|
||||
with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
|
||||
result = _read_codex_access_token()
|
||||
assert result is None
|
||||
|
||||
def test_missing_tokens_key_returns_none(self, tmp_path):
|
||||
codex_dir = tmp_path / ".codex"
|
||||
codex_dir.mkdir()
|
||||
(codex_dir / "auth.json").write_text(json.dumps({"other": "data"}))
|
||||
with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
|
||||
result = _read_codex_access_token()
|
||||
assert result is None
|
||||
|
||||
|
||||
class TestGetTextAuxiliaryClient:
|
||||
"""Test the full resolution chain for get_text_auxiliary_client."""
|
||||
|
||||
def test_openrouter_takes_priority(self, monkeypatch, codex_auth_dir):
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
client, model = get_text_auxiliary_client()
|
||||
assert model == "google/gemini-3-flash-preview"
|
||||
mock_openai.assert_called_once()
|
||||
call_kwargs = mock_openai.call_args
|
||||
assert call_kwargs.kwargs["api_key"] == "or-key"
|
||||
|
||||
def test_nous_takes_priority_over_codex(self, monkeypatch, codex_auth_dir):
|
||||
with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
mock_nous.return_value = {"access_token": "nous-tok"}
|
||||
client, model = get_text_auxiliary_client()
|
||||
assert model == "gemini-3-flash"
|
||||
|
||||
def test_custom_endpoint_over_codex(self, monkeypatch, codex_auth_dir):
|
||||
monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "lm-studio-key")
|
||||
# Override the autouse monkeypatch for codex
|
||||
monkeypatch.setattr(
|
||||
"agent.auxiliary_client._read_codex_access_token",
|
||||
lambda: "codex-test-token-abc123",
|
||||
)
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
client, model = get_text_auxiliary_client()
|
||||
assert model == "gpt-4o-mini"
|
||||
call_kwargs = mock_openai.call_args
|
||||
assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"
|
||||
|
||||
def test_codex_fallback_when_nothing_else(self, codex_auth_dir):
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
client, model = get_text_auxiliary_client()
|
||||
assert model == "gpt-5.3-codex"
|
||||
# Returns a CodexAuxiliaryClient wrapper, not a raw OpenAI client
|
||||
from agent.auxiliary_client import CodexAuxiliaryClient
|
||||
assert isinstance(client, CodexAuxiliaryClient)
|
||||
|
||||
def test_returns_none_when_nothing_available(self):
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||
patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
|
||||
client, model = get_text_auxiliary_client()
|
||||
assert client is None
|
||||
assert model is None
|
||||
|
||||
|
||||
class TestCodexNotInVisionClient:
|
||||
"""Codex fallback should NOT apply to vision tasks."""
|
||||
|
||||
def test_vision_returns_none_without_openrouter_nous(self):
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None):
|
||||
client, model = get_vision_auxiliary_client()
|
||||
assert client is None
|
||||
assert model is None
|
||||
|
||||
|
||||
class TestAuxiliaryMaxTokensParam:
|
||||
def test_codex_fallback_uses_max_tokens(self, monkeypatch):
|
||||
"""Codex adapter translates max_tokens internally, so we return max_tokens."""
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||
patch("agent.auxiliary_client._read_codex_access_token", return_value="tok"):
|
||||
result = auxiliary_max_tokens_param(1024)
|
||||
assert result == {"max_tokens": 1024}
|
||||
|
||||
def test_openrouter_uses_max_tokens(self, monkeypatch):
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
result = auxiliary_max_tokens_param(1024)
|
||||
assert result == {"max_tokens": 1024}
|
||||
|
||||
def test_no_provider_uses_max_tokens(self):
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||
patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
|
||||
result = auxiliary_max_tokens_param(1024)
|
||||
assert result == {"max_tokens": 1024}
|
||||
173
tests/agent/test_redact.py
Normal file
173
tests/agent/test_redact.py
Normal file
|
|
@ -0,0 +1,173 @@
|
|||
"""Tests for agent.redact -- secret masking in logs and output."""
|
||||
|
||||
import logging
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.redact import redact_sensitive_text, RedactingFormatter
|
||||
|
||||
|
||||
class TestKnownPrefixes:
|
||||
def test_openai_sk_key(self):
|
||||
text = "Using key sk-proj-abc123def456ghi789jkl012"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "sk-pro" in result
|
||||
assert "abc123def456" not in result
|
||||
assert "..." in result
|
||||
|
||||
def test_openrouter_sk_key(self):
|
||||
text = "OPENROUTER_API_KEY=sk-or-v1-abcdefghijklmnopqrstuvwxyz1234567890"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "abcdefghijklmnop" not in result
|
||||
|
||||
def test_github_pat_classic(self):
|
||||
result = redact_sensitive_text("token: ghp_abc123def456ghi789jkl")
|
||||
assert "abc123def456" not in result
|
||||
|
||||
def test_github_pat_fine_grained(self):
|
||||
result = redact_sensitive_text("github_pat_abc123def456ghi789jklmno")
|
||||
assert "abc123def456" not in result
|
||||
|
||||
def test_slack_token(self):
|
||||
token = "xoxb-" + "0" * 12 + "-" + "a" * 14
|
||||
result = redact_sensitive_text(token)
|
||||
assert "a" * 14 not in result
|
||||
|
||||
def test_google_api_key(self):
|
||||
result = redact_sensitive_text("AIzaSyB-abc123def456ghi789jklmno012345")
|
||||
assert "abc123def456" not in result
|
||||
|
||||
def test_perplexity_key(self):
|
||||
result = redact_sensitive_text("pplx-abcdef123456789012345")
|
||||
assert "abcdef12345" not in result
|
||||
|
||||
def test_fal_key(self):
|
||||
result = redact_sensitive_text("fal_abc123def456ghi789jkl")
|
||||
assert "abc123def456" not in result
|
||||
|
||||
def test_short_token_fully_masked(self):
|
||||
result = redact_sensitive_text("key=sk-short1234567")
|
||||
assert "***" in result
|
||||
|
||||
|
||||
class TestEnvAssignments:
|
||||
def test_export_api_key(self):
|
||||
text = "export OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "OPENAI_API_KEY=" in result
|
||||
assert "abc123def456" not in result
|
||||
|
||||
def test_quoted_value(self):
|
||||
text = 'MY_SECRET_TOKEN="supersecretvalue123456789"'
|
||||
result = redact_sensitive_text(text)
|
||||
assert "MY_SECRET_TOKEN=" in result
|
||||
assert "supersecretvalue" not in result
|
||||
|
||||
def test_non_secret_env_unchanged(self):
|
||||
text = "HOME=/home/user"
|
||||
result = redact_sensitive_text(text)
|
||||
assert result == text
|
||||
|
||||
def test_path_unchanged(self):
|
||||
text = "PATH=/usr/local/bin:/usr/bin"
|
||||
result = redact_sensitive_text(text)
|
||||
assert result == text
|
||||
|
||||
|
||||
class TestJsonFields:
|
||||
def test_json_api_key(self):
|
||||
text = '{"apiKey": "sk-proj-abc123def456ghi789jkl012"}'
|
||||
result = redact_sensitive_text(text)
|
||||
assert "abc123def456" not in result
|
||||
|
||||
def test_json_token(self):
|
||||
text = '{"access_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.longtoken.here"}'
|
||||
result = redact_sensitive_text(text)
|
||||
assert "eyJhbGciOiJSUzI1NiIs" not in result
|
||||
|
||||
def test_json_non_secret_unchanged(self):
|
||||
text = '{"name": "John", "model": "gpt-4"}'
|
||||
result = redact_sensitive_text(text)
|
||||
assert result == text
|
||||
|
||||
|
||||
class TestAuthHeaders:
|
||||
def test_bearer_token(self):
|
||||
text = "Authorization: Bearer sk-proj-abc123def456ghi789jkl012"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "Authorization: Bearer" in result
|
||||
assert "abc123def456" not in result
|
||||
|
||||
def test_case_insensitive(self):
|
||||
text = "authorization: bearer mytoken123456789012345678"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "mytoken12345" not in result
|
||||
|
||||
|
||||
class TestTelegramTokens:
|
||||
def test_bot_token(self):
|
||||
text = "bot123456789:ABCDEfghij-KLMNopqrst_UVWXyz12345"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "ABCDEfghij" not in result
|
||||
assert "123456789:***" in result
|
||||
|
||||
def test_raw_token(self):
|
||||
text = "12345678901:ABCDEfghijKLMNopqrstUVWXyz1234567890"
|
||||
result = redact_sensitive_text(text)
|
||||
assert "ABCDEfghij" not in result
|
||||
|
||||
|
||||
class TestPassthrough:
|
||||
def test_empty_string(self):
|
||||
assert redact_sensitive_text("") == ""
|
||||
|
||||
def test_none_returns_none(self):
|
||||
assert redact_sensitive_text(None) is None
|
||||
|
||||
def test_normal_text_unchanged(self):
|
||||
text = "Hello world, this is a normal log message with no secrets."
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_code_unchanged(self):
|
||||
text = "def main():\n print('hello')\n return 42"
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
def test_url_without_key_unchanged(self):
|
||||
text = "Connecting to https://api.openai.com/v1/chat/completions"
|
||||
assert redact_sensitive_text(text) == text
|
||||
|
||||
|
||||
class TestRedactingFormatter:
|
||||
def test_formats_and_redacts(self):
|
||||
formatter = RedactingFormatter("%(message)s")
|
||||
record = logging.LogRecord(
|
||||
name="test", level=logging.INFO, pathname="", lineno=0,
|
||||
msg="Key is sk-proj-abc123def456ghi789jkl012",
|
||||
args=(), exc_info=None,
|
||||
)
|
||||
result = formatter.format(record)
|
||||
assert "abc123def456" not in result
|
||||
assert "sk-pro" in result
|
||||
|
||||
|
||||
class TestPrintenvSimulation:
|
||||
"""Simulate what happens when the agent runs `env` or `printenv`."""
|
||||
|
||||
def test_full_env_dump(self):
|
||||
env_dump = """HOME=/home/user
|
||||
PATH=/usr/local/bin:/usr/bin
|
||||
OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012mno345
|
||||
OPENROUTER_API_KEY=sk-or-v1-reallyLongSecretKeyValue12345678
|
||||
FIRECRAWL_API_KEY=fc-shortkey123456789012
|
||||
TELEGRAM_BOT_TOKEN=bot987654321:ABCDEfghij-KLMNopqrst_UVWXyz12345
|
||||
SHELL=/bin/bash
|
||||
USER=teknium"""
|
||||
result = redact_sensitive_text(env_dump)
|
||||
# Secrets should be masked
|
||||
assert "abc123def456" not in result
|
||||
assert "reallyLongSecretKey" not in result
|
||||
assert "ABCDEfghij" not in result
|
||||
# Non-secrets should survive
|
||||
assert "HOME=/home/user" in result
|
||||
assert "SHELL=/bin/bash" in result
|
||||
assert "USER=teknium" in result
|
||||
374
tests/agent/test_subagent_progress.py
Normal file
374
tests/agent/test_subagent_progress.py
Normal file
|
|
@ -0,0 +1,374 @@
|
|||
"""
|
||||
Tests for subagent progress relay (issue #169).
|
||||
|
||||
Verifies that:
|
||||
- KawaiiSpinner.print_above() works with and without active spinner
|
||||
- _build_child_progress_callback handles CLI/gateway/no-display paths
|
||||
- Thinking events are relayed correctly
|
||||
- Parallel callbacks don't share state
|
||||
"""
|
||||
|
||||
import io
|
||||
import sys
|
||||
import time
|
||||
import threading
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from agent.display import KawaiiSpinner
|
||||
from tools.delegate_tool import _build_child_progress_callback
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# KawaiiSpinner.print_above tests
|
||||
# =========================================================================
|
||||
|
||||
class TestPrintAbove:
|
||||
"""Tests for KawaiiSpinner.print_above method."""
|
||||
|
||||
def test_print_above_without_spinner_running(self):
|
||||
"""print_above should write to stdout even when spinner is not running."""
|
||||
buf = io.StringIO()
|
||||
spinner = KawaiiSpinner("test")
|
||||
spinner._out = buf # Redirect to buffer
|
||||
|
||||
spinner.print_above("hello world")
|
||||
output = buf.getvalue()
|
||||
assert "hello world" in output
|
||||
|
||||
def test_print_above_with_spinner_running(self):
|
||||
"""print_above should clear spinner line and print text."""
|
||||
buf = io.StringIO()
|
||||
spinner = KawaiiSpinner("test")
|
||||
spinner._out = buf
|
||||
spinner.running = True # Pretend spinner is running (don't start thread)
|
||||
|
||||
spinner.print_above("tool line")
|
||||
output = buf.getvalue()
|
||||
assert "tool line" in output
|
||||
assert "\r" in output # Should start with carriage return to clear spinner line
|
||||
|
||||
def test_print_above_uses_captured_stdout(self):
|
||||
"""print_above should use self._out, not sys.stdout.
|
||||
This ensures it works inside redirect_stdout(devnull)."""
|
||||
buf = io.StringIO()
|
||||
spinner = KawaiiSpinner("test")
|
||||
spinner._out = buf
|
||||
|
||||
# Simulate redirect_stdout(devnull)
|
||||
old_stdout = sys.stdout
|
||||
sys.stdout = io.StringIO()
|
||||
try:
|
||||
spinner.print_above("should go to buf")
|
||||
finally:
|
||||
sys.stdout = old_stdout
|
||||
|
||||
assert "should go to buf" in buf.getvalue()
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# _build_child_progress_callback tests
|
||||
# =========================================================================
|
||||
|
||||
class TestBuildChildProgressCallback:
|
||||
"""Tests for child progress callback builder."""
|
||||
|
||||
def test_returns_none_when_no_display(self):
|
||||
"""Should return None when parent has no spinner or callback."""
|
||||
parent = MagicMock()
|
||||
parent._delegate_spinner = None
|
||||
parent.tool_progress_callback = None
|
||||
|
||||
cb = _build_child_progress_callback(0, parent)
|
||||
assert cb is None
|
||||
|
||||
def test_cli_spinner_tool_event(self):
|
||||
"""Should print tool line above spinner for CLI path."""
|
||||
buf = io.StringIO()
|
||||
spinner = KawaiiSpinner("delegating")
|
||||
spinner._out = buf
|
||||
spinner.running = True
|
||||
|
||||
parent = MagicMock()
|
||||
parent._delegate_spinner = spinner
|
||||
parent.tool_progress_callback = None
|
||||
|
||||
cb = _build_child_progress_callback(0, parent)
|
||||
assert cb is not None
|
||||
|
||||
cb("web_search", "quantum computing")
|
||||
output = buf.getvalue()
|
||||
assert "web_search" in output
|
||||
assert "quantum computing" in output
|
||||
assert "├─" in output
|
||||
|
||||
def test_cli_spinner_thinking_event(self):
|
||||
"""Should print thinking line above spinner for CLI path."""
|
||||
buf = io.StringIO()
|
||||
spinner = KawaiiSpinner("delegating")
|
||||
spinner._out = buf
|
||||
spinner.running = True
|
||||
|
||||
parent = MagicMock()
|
||||
parent._delegate_spinner = spinner
|
||||
parent.tool_progress_callback = None
|
||||
|
||||
cb = _build_child_progress_callback(0, parent)
|
||||
cb("_thinking", "I'll search for papers first")
|
||||
|
||||
output = buf.getvalue()
|
||||
assert "💭" in output
|
||||
assert "search for papers" in output
|
||||
|
||||
def test_gateway_batched_progress(self):
|
||||
"""Gateway path should batch tool calls and flush at BATCH_SIZE."""
|
||||
parent = MagicMock()
|
||||
parent._delegate_spinner = None
|
||||
parent_cb = MagicMock()
|
||||
parent.tool_progress_callback = parent_cb
|
||||
|
||||
cb = _build_child_progress_callback(0, parent)
|
||||
|
||||
# Send 4 tool calls — shouldn't flush yet (BATCH_SIZE = 5)
|
||||
for i in range(4):
|
||||
cb(f"tool_{i}", f"arg_{i}")
|
||||
parent_cb.assert_not_called()
|
||||
|
||||
# 5th call should trigger flush
|
||||
cb("tool_4", "arg_4")
|
||||
parent_cb.assert_called_once()
|
||||
call_args = parent_cb.call_args
|
||||
assert "tool_0" in call_args[0][1]
|
||||
assert "tool_4" in call_args[0][1]
|
||||
|
||||
def test_thinking_not_relayed_to_gateway(self):
|
||||
"""Thinking events should NOT be sent to gateway (too noisy)."""
|
||||
parent = MagicMock()
|
||||
parent._delegate_spinner = None
|
||||
parent_cb = MagicMock()
|
||||
parent.tool_progress_callback = parent_cb
|
||||
|
||||
cb = _build_child_progress_callback(0, parent)
|
||||
cb("_thinking", "some reasoning text")
|
||||
|
||||
parent_cb.assert_not_called()
|
||||
|
||||
def test_parallel_callbacks_independent(self):
|
||||
"""Each child's callback should have independent batch state."""
|
||||
parent = MagicMock()
|
||||
parent._delegate_spinner = None
|
||||
parent_cb = MagicMock()
|
||||
parent.tool_progress_callback = parent_cb
|
||||
|
||||
cb0 = _build_child_progress_callback(0, parent)
|
||||
cb1 = _build_child_progress_callback(1, parent)
|
||||
|
||||
# Send 3 calls to each — neither should flush (batch size = 5)
|
||||
for i in range(3):
|
||||
cb0(f"tool_{i}")
|
||||
cb1(f"other_{i}")
|
||||
|
||||
parent_cb.assert_not_called()
|
||||
|
||||
def test_task_index_prefix_in_batch_mode(self):
|
||||
"""Batch mode (task_count > 1) should show 1-indexed prefix for all tasks."""
|
||||
buf = io.StringIO()
|
||||
spinner = KawaiiSpinner("delegating")
|
||||
spinner._out = buf
|
||||
spinner.running = True
|
||||
|
||||
parent = MagicMock()
|
||||
parent._delegate_spinner = spinner
|
||||
parent.tool_progress_callback = None
|
||||
|
||||
# task_index=0 in a batch of 3 → prefix "[1]"
|
||||
cb0 = _build_child_progress_callback(0, parent, task_count=3)
|
||||
cb0("web_search", "test")
|
||||
output = buf.getvalue()
|
||||
assert "[1]" in output
|
||||
|
||||
# task_index=2 in a batch of 3 → prefix "[3]"
|
||||
buf.truncate(0)
|
||||
buf.seek(0)
|
||||
cb2 = _build_child_progress_callback(2, parent, task_count=3)
|
||||
cb2("web_search", "test")
|
||||
output = buf.getvalue()
|
||||
assert "[3]" in output
|
||||
|
||||
def test_single_task_no_prefix(self):
|
||||
"""Single task (task_count=1) should not show index prefix."""
|
||||
buf = io.StringIO()
|
||||
spinner = KawaiiSpinner("delegating")
|
||||
spinner._out = buf
|
||||
spinner.running = True
|
||||
|
||||
parent = MagicMock()
|
||||
parent._delegate_spinner = spinner
|
||||
parent.tool_progress_callback = None
|
||||
|
||||
cb = _build_child_progress_callback(0, parent, task_count=1)
|
||||
cb("web_search", "test")
|
||||
|
||||
output = buf.getvalue()
|
||||
assert "[" not in output
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Integration: thinking callback in run_agent.py
|
||||
# =========================================================================
|
||||
|
||||
class TestThinkingCallback:
|
||||
"""Tests for the _thinking callback in AIAgent conversation loop."""
|
||||
|
||||
def _simulate_thinking_callback(self, content, callback, delegate_depth=1):
|
||||
"""Simulate the exact code path from run_agent.py for the thinking callback.
|
||||
|
||||
delegate_depth: simulates self._delegate_depth.
|
||||
0 = main agent (should NOT fire), >=1 = subagent (should fire).
|
||||
"""
|
||||
import re
|
||||
if (content and callback and delegate_depth > 0):
|
||||
_think_text = content.strip()
|
||||
_think_text = re.sub(
|
||||
r'</?(?:REASONING_SCRATCHPAD|think|reasoning)>', '', _think_text
|
||||
).strip()
|
||||
first_line = _think_text.split('\n')[0][:80] if _think_text else ""
|
||||
if first_line:
|
||||
try:
|
||||
callback("_thinking", first_line)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def test_thinking_callback_fires_on_content(self):
|
||||
"""tool_progress_callback should receive _thinking event
|
||||
when assistant message has content."""
|
||||
calls = []
|
||||
self._simulate_thinking_callback(
|
||||
"I'll research quantum computing first, then summarize.",
|
||||
lambda name, preview=None: calls.append((name, preview))
|
||||
)
|
||||
assert len(calls) == 1
|
||||
assert calls[0][0] == "_thinking"
|
||||
assert "quantum computing" in calls[0][1]
|
||||
|
||||
def test_thinking_callback_skipped_when_no_content(self):
|
||||
"""Should not fire when assistant has no content."""
|
||||
calls = []
|
||||
self._simulate_thinking_callback(
|
||||
None,
|
||||
lambda name, preview=None: calls.append((name, preview))
|
||||
)
|
||||
assert len(calls) == 0
|
||||
|
||||
def test_thinking_callback_truncates_long_content(self):
|
||||
"""Should truncate long content to 80 chars."""
|
||||
calls = []
|
||||
self._simulate_thinking_callback(
|
||||
"A" * 200 + "\nSecond line should be ignored",
|
||||
lambda name, preview=None: calls.append((name, preview))
|
||||
)
|
||||
assert len(calls) == 1
|
||||
assert len(calls[0][1]) == 80
|
||||
|
||||
def test_thinking_callback_skipped_for_main_agent(self):
|
||||
"""Main agent (delegate_depth=0) should NOT fire thinking events.
|
||||
This prevents gateway spam on Telegram/Discord."""
|
||||
calls = []
|
||||
self._simulate_thinking_callback(
|
||||
"I'll help you with that request.",
|
||||
lambda name, preview=None: calls.append((name, preview)),
|
||||
delegate_depth=0,
|
||||
)
|
||||
assert len(calls) == 0
|
||||
|
||||
def test_thinking_callback_strips_reasoning_scratchpad(self):
|
||||
"""REASONING_SCRATCHPAD tags should be stripped before display."""
|
||||
calls = []
|
||||
self._simulate_thinking_callback(
|
||||
"<REASONING_SCRATCHPAD>I need to analyze this carefully</REASONING_SCRATCHPAD>",
|
||||
lambda name, preview=None: calls.append((name, preview))
|
||||
)
|
||||
assert len(calls) == 1
|
||||
assert "<REASONING_SCRATCHPAD>" not in calls[0][1]
|
||||
assert "analyze this carefully" in calls[0][1]
|
||||
|
||||
def test_thinking_callback_strips_think_tags(self):
|
||||
"""<think> tags should be stripped before display."""
|
||||
calls = []
|
||||
self._simulate_thinking_callback(
|
||||
"<think>Let me think about this problem</think>",
|
||||
lambda name, preview=None: calls.append((name, preview))
|
||||
)
|
||||
assert len(calls) == 1
|
||||
assert "<think>" not in calls[0][1]
|
||||
assert "think about this problem" in calls[0][1]
|
||||
|
||||
def test_thinking_callback_empty_after_strip(self):
|
||||
"""Should not fire when content is only XML tags."""
|
||||
calls = []
|
||||
self._simulate_thinking_callback(
|
||||
"<REASONING_SCRATCHPAD></REASONING_SCRATCHPAD>",
|
||||
lambda name, preview=None: calls.append((name, preview))
|
||||
)
|
||||
assert len(calls) == 0
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Gateway batch flush tests
|
||||
# =========================================================================
|
||||
|
||||
class TestBatchFlush:
|
||||
"""Tests for gateway batch flush on subagent completion."""
|
||||
|
||||
def test_flush_sends_remaining_batch(self):
|
||||
"""_flush should send remaining tool names to gateway."""
|
||||
parent = MagicMock()
|
||||
parent._delegate_spinner = None
|
||||
parent_cb = MagicMock()
|
||||
parent.tool_progress_callback = parent_cb
|
||||
|
||||
cb = _build_child_progress_callback(0, parent)
|
||||
|
||||
# Send 3 tools (below batch size of 5)
|
||||
cb("web_search", "query1")
|
||||
cb("read_file", "file.txt")
|
||||
cb("write_file", "out.txt")
|
||||
parent_cb.assert_not_called()
|
||||
|
||||
# Flush should send the remaining 3
|
||||
cb._flush()
|
||||
parent_cb.assert_called_once()
|
||||
summary = parent_cb.call_args[0][1]
|
||||
assert "web_search" in summary
|
||||
assert "write_file" in summary
|
||||
|
||||
def test_flush_noop_when_batch_empty(self):
|
||||
"""_flush should not send anything when batch is empty."""
|
||||
parent = MagicMock()
|
||||
parent._delegate_spinner = None
|
||||
parent_cb = MagicMock()
|
||||
parent.tool_progress_callback = parent_cb
|
||||
|
||||
cb = _build_child_progress_callback(0, parent)
|
||||
cb._flush()
|
||||
parent_cb.assert_not_called()
|
||||
|
||||
def test_flush_noop_when_no_parent_callback(self):
|
||||
"""_flush should not crash when there's no parent callback."""
|
||||
buf = io.StringIO()
|
||||
spinner = KawaiiSpinner("test")
|
||||
spinner._out = buf
|
||||
spinner.running = True
|
||||
|
||||
parent = MagicMock()
|
||||
parent._delegate_spinner = spinner
|
||||
parent.tool_progress_callback = None
|
||||
|
||||
cb = _build_child_progress_callback(0, parent)
|
||||
cb("web_search", "test")
|
||||
cb._flush() # Should not crash
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
|
||||
184
tests/gateway/test_media_extraction.py
Normal file
184
tests/gateway/test_media_extraction.py
Normal file
|
|
@ -0,0 +1,184 @@
|
|||
"""
|
||||
Tests for MEDIA tag extraction from tool results.
|
||||
|
||||
Verifies that MEDIA tags (e.g., from TTS tool) are only extracted from
|
||||
messages in the CURRENT turn, not from the full conversation history.
|
||||
This prevents voice messages from accumulating and being sent multiple
|
||||
times per reply. (Regression test for #160)
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import re
|
||||
|
||||
|
||||
def extract_media_tags_fixed(result_messages, history_len):
|
||||
"""
|
||||
Extract MEDIA tags from tool results, but ONLY from new messages
|
||||
(those added after history_len). This is the fixed behavior.
|
||||
|
||||
Args:
|
||||
result_messages: Full list of messages including history + new
|
||||
history_len: Length of history before this turn
|
||||
|
||||
Returns:
|
||||
Tuple of (media_tags list, has_voice_directive bool)
|
||||
"""
|
||||
media_tags = []
|
||||
has_voice_directive = False
|
||||
|
||||
# Only process new messages from this turn
|
||||
new_messages = result_messages[history_len:] if len(result_messages) > history_len else []
|
||||
|
||||
for msg in new_messages:
|
||||
if msg.get("role") == "tool" or msg.get("role") == "function":
|
||||
content = msg.get("content", "")
|
||||
if "MEDIA:" in content:
|
||||
for match in re.finditer(r'MEDIA:(\S+)', content):
|
||||
path = match.group(1).strip().rstrip('",}')
|
||||
if path:
|
||||
media_tags.append(f"MEDIA:{path}")
|
||||
if "[[audio_as_voice]]" in content:
|
||||
has_voice_directive = True
|
||||
|
||||
return media_tags, has_voice_directive
|
||||
|
||||
|
||||
def extract_media_tags_broken(result_messages):
|
||||
"""
|
||||
The BROKEN behavior: extract MEDIA tags from ALL messages including history.
|
||||
This causes TTS voice messages to accumulate and be re-sent on every reply.
|
||||
"""
|
||||
media_tags = []
|
||||
has_voice_directive = False
|
||||
|
||||
for msg in result_messages:
|
||||
if msg.get("role") == "tool" or msg.get("role") == "function":
|
||||
content = msg.get("content", "")
|
||||
if "MEDIA:" in content:
|
||||
for match in re.finditer(r'MEDIA:(\S+)', content):
|
||||
path = match.group(1).strip().rstrip('",}')
|
||||
if path:
|
||||
media_tags.append(f"MEDIA:{path}")
|
||||
if "[[audio_as_voice]]" in content:
|
||||
has_voice_directive = True
|
||||
|
||||
return media_tags, has_voice_directive
|
||||
|
||||
|
||||
class TestMediaExtraction:
|
||||
"""Tests for MEDIA tag extraction from tool results."""
|
||||
|
||||
def test_media_tags_not_extracted_from_history(self):
|
||||
"""MEDIA tags from previous turns should NOT be extracted again."""
|
||||
# Simulate conversation history with a TTS call from a previous turn
|
||||
history = [
|
||||
{"role": "user", "content": "Say hello as audio"},
|
||||
{"role": "assistant", "content": None, "tool_calls": [{"id": "1", "function": {"name": "text_to_speech"}}]},
|
||||
{"role": "tool", "tool_call_id": "1", "content": '{"success": true, "media_tag": "[[audio_as_voice]]\\nMEDIA:/path/to/audio1.ogg"}'},
|
||||
{"role": "assistant", "content": "I've said hello for you!"},
|
||||
]
|
||||
|
||||
# New turn: user asks a simple question
|
||||
new_messages = [
|
||||
{"role": "user", "content": "What time is it?"},
|
||||
{"role": "assistant", "content": "It's 3:30 AM."},
|
||||
]
|
||||
|
||||
all_messages = history + new_messages
|
||||
history_len = len(history)
|
||||
|
||||
# Fixed behavior: should extract NO media tags (none in new messages)
|
||||
tags, voice_directive = extract_media_tags_fixed(all_messages, history_len)
|
||||
assert tags == [], "Fixed extraction should not find tags in history"
|
||||
assert voice_directive is False
|
||||
|
||||
# Broken behavior: would incorrectly extract the old media tag
|
||||
broken_tags, broken_voice = extract_media_tags_broken(all_messages)
|
||||
assert len(broken_tags) == 1, "Broken extraction finds tags in history"
|
||||
assert "audio1.ogg" in broken_tags[0]
|
||||
|
||||
def test_media_tags_extracted_from_current_turn(self):
|
||||
"""MEDIA tags from the current turn SHOULD be extracted."""
|
||||
# History without TTS
|
||||
history = [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi there!"},
|
||||
]
|
||||
|
||||
# New turn with TTS call
|
||||
new_messages = [
|
||||
{"role": "user", "content": "Say goodbye as audio"},
|
||||
{"role": "assistant", "content": None, "tool_calls": [{"id": "2", "function": {"name": "text_to_speech"}}]},
|
||||
{"role": "tool", "tool_call_id": "2", "content": '{"success": true, "media_tag": "[[audio_as_voice]]\\nMEDIA:/path/to/audio2.ogg"}'},
|
||||
{"role": "assistant", "content": "I've said goodbye!"},
|
||||
]
|
||||
|
||||
all_messages = history + new_messages
|
||||
history_len = len(history)
|
||||
|
||||
# Fixed behavior: should extract the new media tag
|
||||
tags, voice_directive = extract_media_tags_fixed(all_messages, history_len)
|
||||
assert len(tags) == 1, "Should extract media tag from current turn"
|
||||
assert "audio2.ogg" in tags[0]
|
||||
assert voice_directive is True
|
||||
|
||||
def test_multiple_tts_calls_in_history_not_accumulated(self):
|
||||
"""Multiple TTS calls in history should NOT accumulate in new responses."""
|
||||
# History with multiple TTS calls
|
||||
history = [
|
||||
{"role": "user", "content": "Say hello"},
|
||||
{"role": "tool", "tool_call_id": "1", "content": 'MEDIA:/audio/hello.ogg'},
|
||||
{"role": "assistant", "content": "Done!"},
|
||||
{"role": "user", "content": "Say goodbye"},
|
||||
{"role": "tool", "tool_call_id": "2", "content": 'MEDIA:/audio/goodbye.ogg'},
|
||||
{"role": "assistant", "content": "Done!"},
|
||||
{"role": "user", "content": "Say thanks"},
|
||||
{"role": "tool", "tool_call_id": "3", "content": 'MEDIA:/audio/thanks.ogg'},
|
||||
{"role": "assistant", "content": "Done!"},
|
||||
]
|
||||
|
||||
# New turn: no TTS
|
||||
new_messages = [
|
||||
{"role": "user", "content": "What time is it?"},
|
||||
{"role": "assistant", "content": "3 PM"},
|
||||
]
|
||||
|
||||
all_messages = history + new_messages
|
||||
history_len = len(history)
|
||||
|
||||
# Fixed: no tags
|
||||
tags, _ = extract_media_tags_fixed(all_messages, history_len)
|
||||
assert tags == [], "Should not accumulate tags from history"
|
||||
|
||||
# Broken: would have 3 tags (all the old ones)
|
||||
broken_tags, _ = extract_media_tags_broken(all_messages)
|
||||
assert len(broken_tags) == 3, "Broken version accumulates all history tags"
|
||||
|
||||
def test_deduplication_within_current_turn(self):
|
||||
"""Multiple MEDIA tags in current turn should be deduplicated."""
|
||||
history = []
|
||||
|
||||
# Current turn with multiple tool calls producing same media
|
||||
new_messages = [
|
||||
{"role": "user", "content": "Multiple TTS"},
|
||||
{"role": "tool", "tool_call_id": "1", "content": 'MEDIA:/audio/same.ogg'},
|
||||
{"role": "tool", "tool_call_id": "2", "content": 'MEDIA:/audio/same.ogg'}, # duplicate
|
||||
{"role": "tool", "tool_call_id": "3", "content": 'MEDIA:/audio/different.ogg'},
|
||||
{"role": "assistant", "content": "Done!"},
|
||||
]
|
||||
|
||||
all_messages = history + new_messages
|
||||
|
||||
tags, _ = extract_media_tags_fixed(all_messages, 0)
|
||||
# Even though same.ogg appears twice, deduplication happens after extraction
|
||||
# The extraction itself should get both, then caller deduplicates
|
||||
assert len(tags) == 3 # Raw extraction gets all
|
||||
|
||||
# Deduplication as done in the actual code:
|
||||
seen = set()
|
||||
unique = [t for t in tags if t not in seen and not seen.add(t)]
|
||||
assert len(unique) == 2 # After dedup: same.ogg and different.ogg
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
210
tests/test_auth_codex_provider.py
Normal file
210
tests/test_auth_codex_provider.py
Normal file
|
|
@ -0,0 +1,210 @@
|
|||
import json
|
||||
import time
|
||||
import base64
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
from hermes_cli.auth import (
|
||||
AuthError,
|
||||
DEFAULT_CODEX_BASE_URL,
|
||||
PROVIDER_REGISTRY,
|
||||
_persist_codex_auth_payload,
|
||||
_login_openai_codex,
|
||||
login_command,
|
||||
get_codex_auth_status,
|
||||
get_provider_auth_state,
|
||||
read_codex_auth_file,
|
||||
resolve_codex_runtime_credentials,
|
||||
resolve_provider,
|
||||
)
|
||||
|
||||
|
||||
def _write_codex_auth(codex_home: Path, *, access_token: str = "access", refresh_token: str = "refresh") -> Path:
|
||||
codex_home.mkdir(parents=True, exist_ok=True)
|
||||
auth_file = codex_home / "auth.json"
|
||||
auth_file.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"auth_mode": "oauth",
|
||||
"last_refresh": "2026-02-26T00:00:00Z",
|
||||
"tokens": {
|
||||
"access_token": access_token,
|
||||
"refresh_token": refresh_token,
|
||||
},
|
||||
}
|
||||
)
|
||||
)
|
||||
return auth_file
|
||||
|
||||
|
||||
def _jwt_with_exp(exp_epoch: int) -> str:
|
||||
payload = {"exp": exp_epoch}
|
||||
encoded = base64.urlsafe_b64encode(json.dumps(payload).encode("utf-8")).rstrip(b"=").decode("utf-8")
|
||||
return f"h.{encoded}.s"
|
||||
|
||||
|
||||
def test_read_codex_auth_file_success(tmp_path, monkeypatch):
|
||||
codex_home = tmp_path / "codex-home"
|
||||
auth_file = _write_codex_auth(codex_home)
|
||||
monkeypatch.setenv("CODEX_HOME", str(codex_home))
|
||||
|
||||
payload = read_codex_auth_file()
|
||||
|
||||
assert payload["auth_path"] == auth_file
|
||||
assert payload["tokens"]["access_token"] == "access"
|
||||
assert payload["tokens"]["refresh_token"] == "refresh"
|
||||
|
||||
|
||||
def test_resolve_codex_runtime_credentials_missing_access_token(tmp_path, monkeypatch):
|
||||
codex_home = tmp_path / "codex-home"
|
||||
_write_codex_auth(codex_home, access_token="")
|
||||
monkeypatch.setenv("CODEX_HOME", str(codex_home))
|
||||
|
||||
with pytest.raises(AuthError) as exc:
|
||||
resolve_codex_runtime_credentials()
|
||||
|
||||
assert exc.value.code == "codex_auth_missing_access_token"
|
||||
assert exc.value.relogin_required is True
|
||||
|
||||
|
||||
def test_resolve_codex_runtime_credentials_refreshes_expiring_token(tmp_path, monkeypatch):
|
||||
codex_home = tmp_path / "codex-home"
|
||||
expiring_token = _jwt_with_exp(int(time.time()) - 10)
|
||||
_write_codex_auth(codex_home, access_token=expiring_token, refresh_token="refresh-old")
|
||||
monkeypatch.setenv("CODEX_HOME", str(codex_home))
|
||||
|
||||
called = {"count": 0}
|
||||
|
||||
def _fake_refresh(*, payload, auth_path, timeout_seconds, lock_held=False):
|
||||
called["count"] += 1
|
||||
assert auth_path == codex_home / "auth.json"
|
||||
assert lock_held is True
|
||||
return {"access_token": "access-new", "refresh_token": "refresh-new"}
|
||||
|
||||
monkeypatch.setattr("hermes_cli.auth._refresh_codex_auth_tokens", _fake_refresh)
|
||||
|
||||
resolved = resolve_codex_runtime_credentials()
|
||||
|
||||
assert called["count"] == 1
|
||||
assert resolved["api_key"] == "access-new"
|
||||
|
||||
|
||||
def test_resolve_codex_runtime_credentials_force_refresh(tmp_path, monkeypatch):
|
||||
codex_home = tmp_path / "codex-home"
|
||||
_write_codex_auth(codex_home, access_token="access-current", refresh_token="refresh-old")
|
||||
monkeypatch.setenv("CODEX_HOME", str(codex_home))
|
||||
|
||||
called = {"count": 0}
|
||||
|
||||
def _fake_refresh(*, payload, auth_path, timeout_seconds, lock_held=False):
|
||||
called["count"] += 1
|
||||
assert lock_held is True
|
||||
return {"access_token": "access-forced", "refresh_token": "refresh-new"}
|
||||
|
||||
monkeypatch.setattr("hermes_cli.auth._refresh_codex_auth_tokens", _fake_refresh)
|
||||
|
||||
resolved = resolve_codex_runtime_credentials(force_refresh=True, refresh_if_expiring=False)
|
||||
|
||||
assert called["count"] == 1
|
||||
assert resolved["api_key"] == "access-forced"
|
||||
|
||||
|
||||
def test_resolve_codex_runtime_credentials_uses_file_lock_on_refresh(tmp_path, monkeypatch):
|
||||
codex_home = tmp_path / "codex-home"
|
||||
_write_codex_auth(codex_home, access_token="access-current", refresh_token="refresh-old")
|
||||
monkeypatch.setenv("CODEX_HOME", str(codex_home))
|
||||
|
||||
lock_calls = {"enter": 0, "exit": 0}
|
||||
|
||||
@contextmanager
|
||||
def _fake_lock(auth_path, timeout_seconds=15.0):
|
||||
assert auth_path == codex_home / "auth.json"
|
||||
lock_calls["enter"] += 1
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
lock_calls["exit"] += 1
|
||||
|
||||
refresh_calls = {"count": 0}
|
||||
|
||||
def _fake_refresh(*, payload, auth_path, timeout_seconds, lock_held=False):
|
||||
refresh_calls["count"] += 1
|
||||
assert lock_held is True
|
||||
return {"access_token": "access-updated", "refresh_token": "refresh-updated"}
|
||||
|
||||
monkeypatch.setattr("hermes_cli.auth._codex_auth_file_lock", _fake_lock)
|
||||
monkeypatch.setattr("hermes_cli.auth._refresh_codex_auth_tokens", _fake_refresh)
|
||||
|
||||
resolved = resolve_codex_runtime_credentials(force_refresh=True, refresh_if_expiring=False)
|
||||
|
||||
assert refresh_calls["count"] == 1
|
||||
assert lock_calls["enter"] == 1
|
||||
assert lock_calls["exit"] == 1
|
||||
assert resolved["api_key"] == "access-updated"
|
||||
|
||||
|
||||
def test_resolve_provider_explicit_codex_does_not_fallback(monkeypatch):
|
||||
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
||||
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
||||
assert resolve_provider("openai-codex") == "openai-codex"
|
||||
|
||||
|
||||
def test_persist_codex_auth_payload_writes_atomically(tmp_path):
|
||||
auth_path = tmp_path / "auth.json"
|
||||
auth_path.write_text('{"stale":true}\n')
|
||||
payload = {
|
||||
"auth_mode": "oauth",
|
||||
"tokens": {
|
||||
"access_token": "next-access",
|
||||
"refresh_token": "next-refresh",
|
||||
},
|
||||
"last_refresh": "2026-02-26T00:00:00Z",
|
||||
}
|
||||
|
||||
_persist_codex_auth_payload(auth_path, payload)
|
||||
|
||||
stored = json.loads(auth_path.read_text())
|
||||
assert stored == payload
|
||||
assert list(tmp_path.glob(".auth.json.*.tmp")) == []
|
||||
|
||||
|
||||
def test_get_codex_auth_status_not_logged_in(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("CODEX_HOME", str(tmp_path / "missing-codex-home"))
|
||||
status = get_codex_auth_status()
|
||||
assert status["logged_in"] is False
|
||||
assert "error" in status
|
||||
|
||||
|
||||
def test_login_openai_codex_persists_provider_state(tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / "hermes-home"
|
||||
codex_home = tmp_path / "codex-home"
|
||||
_write_codex_auth(codex_home)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
monkeypatch.setenv("CODEX_HOME", str(codex_home))
|
||||
# Mock input() to accept existing credentials
|
||||
monkeypatch.setattr("builtins.input", lambda _: "y")
|
||||
|
||||
_login_openai_codex(SimpleNamespace(), PROVIDER_REGISTRY["openai-codex"])
|
||||
|
||||
state = get_provider_auth_state("openai-codex")
|
||||
assert state is not None
|
||||
assert state["source"] == "codex-auth-json"
|
||||
assert state["auth_file"].endswith("auth.json")
|
||||
|
||||
config_path = hermes_home / "config.yaml"
|
||||
config = yaml.safe_load(config_path.read_text())
|
||||
assert config["model"]["provider"] == "openai-codex"
|
||||
assert config["model"]["base_url"] == DEFAULT_CODEX_BASE_URL
|
||||
|
||||
|
||||
def test_login_command_shows_deprecation(monkeypatch, capsys):
|
||||
"""login_command is deprecated and directs users to hermes model."""
|
||||
with pytest.raises(SystemExit) as exc_info:
|
||||
login_command(SimpleNamespace())
|
||||
assert exc_info.value.code == 0
|
||||
captured = capsys.readouterr()
|
||||
assert "hermes model" in captured.out
|
||||
80
tests/test_cli_init.py
Normal file
80
tests/test_cli_init.py
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
"""Tests for HermesCLI initialization -- catches configuration bugs
|
||||
that only manifest at runtime (not in mocked unit tests)."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
|
||||
def _make_cli(**kwargs):
|
||||
"""Create a HermesCLI instance with minimal mocking."""
|
||||
from cli import HermesCLI
|
||||
with patch("cli.get_tool_definitions", return_value=[]):
|
||||
return HermesCLI(**kwargs)
|
||||
|
||||
|
||||
class TestMaxTurnsResolution:
|
||||
"""max_turns must always resolve to a positive integer, never None."""
|
||||
|
||||
def test_default_max_turns_is_integer(self):
|
||||
cli = _make_cli()
|
||||
assert isinstance(cli.max_turns, int)
|
||||
assert cli.max_turns > 0
|
||||
|
||||
def test_explicit_max_turns_honored(self):
|
||||
cli = _make_cli(max_turns=25)
|
||||
assert cli.max_turns == 25
|
||||
|
||||
def test_none_max_turns_gets_default(self):
|
||||
cli = _make_cli(max_turns=None)
|
||||
assert isinstance(cli.max_turns, int)
|
||||
assert cli.max_turns > 0
|
||||
|
||||
def test_env_var_max_turns(self, monkeypatch):
|
||||
"""Env var is used when config file doesn't set max_turns."""
|
||||
monkeypatch.setenv("HERMES_MAX_ITERATIONS", "42")
|
||||
import cli as cli_module
|
||||
original = cli_module.CLI_CONFIG["agent"].get("max_turns")
|
||||
cli_module.CLI_CONFIG["agent"]["max_turns"] = None
|
||||
try:
|
||||
cli_obj = _make_cli()
|
||||
assert cli_obj.max_turns == 42
|
||||
finally:
|
||||
if original is not None:
|
||||
cli_module.CLI_CONFIG["agent"]["max_turns"] = original
|
||||
|
||||
def test_max_turns_never_none_for_agent(self):
|
||||
"""The value passed to AIAgent must never be None (causes TypeError in run_conversation)."""
|
||||
cli = _make_cli()
|
||||
assert cli.max_turns is not None
|
||||
|
||||
|
||||
class TestVerboseAndToolProgress:
|
||||
def test_default_verbose_is_bool(self):
|
||||
cli = _make_cli()
|
||||
assert isinstance(cli.verbose, bool)
|
||||
|
||||
def test_tool_progress_mode_is_string(self):
|
||||
cli = _make_cli()
|
||||
assert isinstance(cli.tool_progress_mode, str)
|
||||
assert cli.tool_progress_mode in ("off", "new", "all", "verbose")
|
||||
|
||||
|
||||
class TestProviderResolution:
|
||||
def test_api_key_is_string_or_none(self):
|
||||
cli = _make_cli()
|
||||
assert cli.api_key is None or isinstance(cli.api_key, str)
|
||||
|
||||
def test_base_url_is_string(self):
|
||||
cli = _make_cli()
|
||||
assert isinstance(cli.base_url, str)
|
||||
assert cli.base_url.startswith("http")
|
||||
|
||||
def test_model_is_string(self):
|
||||
cli = _make_cli()
|
||||
assert isinstance(cli.model, str)
|
||||
assert len(cli.model) > 0
|
||||
187
tests/test_cli_provider_resolution.py
Normal file
187
tests/test_cli_provider_resolution.py
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
import importlib
|
||||
import sys
|
||||
import types
|
||||
from contextlib import nullcontext
|
||||
from types import SimpleNamespace
|
||||
|
||||
from hermes_cli.auth import AuthError
|
||||
from hermes_cli import main as hermes_main
|
||||
|
||||
|
||||
def _install_prompt_toolkit_stubs():
|
||||
class _Dummy:
|
||||
def __init__(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
class _Condition:
|
||||
def __init__(self, func):
|
||||
self.func = func
|
||||
|
||||
def __bool__(self):
|
||||
return bool(self.func())
|
||||
|
||||
class _ANSI(str):
|
||||
pass
|
||||
|
||||
root = types.ModuleType("prompt_toolkit")
|
||||
history = types.ModuleType("prompt_toolkit.history")
|
||||
styles = types.ModuleType("prompt_toolkit.styles")
|
||||
patch_stdout = types.ModuleType("prompt_toolkit.patch_stdout")
|
||||
application = types.ModuleType("prompt_toolkit.application")
|
||||
layout = types.ModuleType("prompt_toolkit.layout")
|
||||
processors = types.ModuleType("prompt_toolkit.layout.processors")
|
||||
filters = types.ModuleType("prompt_toolkit.filters")
|
||||
dimension = types.ModuleType("prompt_toolkit.layout.dimension")
|
||||
menus = types.ModuleType("prompt_toolkit.layout.menus")
|
||||
widgets = types.ModuleType("prompt_toolkit.widgets")
|
||||
key_binding = types.ModuleType("prompt_toolkit.key_binding")
|
||||
completion = types.ModuleType("prompt_toolkit.completion")
|
||||
formatted_text = types.ModuleType("prompt_toolkit.formatted_text")
|
||||
|
||||
history.FileHistory = _Dummy
|
||||
styles.Style = _Dummy
|
||||
patch_stdout.patch_stdout = lambda *args, **kwargs: nullcontext()
|
||||
application.Application = _Dummy
|
||||
layout.Layout = _Dummy
|
||||
layout.HSplit = _Dummy
|
||||
layout.Window = _Dummy
|
||||
layout.FormattedTextControl = _Dummy
|
||||
layout.ConditionalContainer = _Dummy
|
||||
processors.Processor = _Dummy
|
||||
processors.Transformation = _Dummy
|
||||
processors.PasswordProcessor = _Dummy
|
||||
processors.ConditionalProcessor = _Dummy
|
||||
filters.Condition = _Condition
|
||||
dimension.Dimension = _Dummy
|
||||
menus.CompletionsMenu = _Dummy
|
||||
widgets.TextArea = _Dummy
|
||||
key_binding.KeyBindings = _Dummy
|
||||
completion.Completer = _Dummy
|
||||
completion.Completion = _Dummy
|
||||
formatted_text.ANSI = _ANSI
|
||||
root.print_formatted_text = lambda *args, **kwargs: None
|
||||
|
||||
sys.modules.setdefault("prompt_toolkit", root)
|
||||
sys.modules.setdefault("prompt_toolkit.history", history)
|
||||
sys.modules.setdefault("prompt_toolkit.styles", styles)
|
||||
sys.modules.setdefault("prompt_toolkit.patch_stdout", patch_stdout)
|
||||
sys.modules.setdefault("prompt_toolkit.application", application)
|
||||
sys.modules.setdefault("prompt_toolkit.layout", layout)
|
||||
sys.modules.setdefault("prompt_toolkit.layout.processors", processors)
|
||||
sys.modules.setdefault("prompt_toolkit.filters", filters)
|
||||
sys.modules.setdefault("prompt_toolkit.layout.dimension", dimension)
|
||||
sys.modules.setdefault("prompt_toolkit.layout.menus", menus)
|
||||
sys.modules.setdefault("prompt_toolkit.widgets", widgets)
|
||||
sys.modules.setdefault("prompt_toolkit.key_binding", key_binding)
|
||||
sys.modules.setdefault("prompt_toolkit.completion", completion)
|
||||
sys.modules.setdefault("prompt_toolkit.formatted_text", formatted_text)
|
||||
|
||||
|
||||
def _import_cli():
|
||||
try:
|
||||
importlib.import_module("prompt_toolkit")
|
||||
except ModuleNotFoundError:
|
||||
_install_prompt_toolkit_stubs()
|
||||
return importlib.import_module("cli")
|
||||
|
||||
|
||||
def test_hermes_cli_init_does_not_eagerly_resolve_runtime_provider(monkeypatch):
|
||||
cli = _import_cli()
|
||||
calls = {"count": 0}
|
||||
|
||||
def _unexpected_runtime_resolve(**kwargs):
|
||||
calls["count"] += 1
|
||||
raise AssertionError("resolve_runtime_provider should not be called in HermesCLI.__init__")
|
||||
|
||||
monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _unexpected_runtime_resolve)
|
||||
monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
|
||||
|
||||
shell = cli.HermesCLI(model="gpt-5", compact=True, max_turns=1)
|
||||
|
||||
assert shell is not None
|
||||
assert calls["count"] == 0
|
||||
|
||||
|
||||
def test_runtime_resolution_failure_is_not_sticky(monkeypatch):
|
||||
cli = _import_cli()
|
||||
calls = {"count": 0}
|
||||
|
||||
def _runtime_resolve(**kwargs):
|
||||
calls["count"] += 1
|
||||
if calls["count"] == 1:
|
||||
raise RuntimeError("temporary auth failure")
|
||||
return {
|
||||
"provider": "openrouter",
|
||||
"api_mode": "chat_completions",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"api_key": "test-key",
|
||||
"source": "env/config",
|
||||
}
|
||||
|
||||
class _DummyAgent:
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.kwargs = kwargs
|
||||
|
||||
monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
|
||||
monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
|
||||
monkeypatch.setattr(cli, "AIAgent", _DummyAgent)
|
||||
|
||||
shell = cli.HermesCLI(model="gpt-5", compact=True, max_turns=1)
|
||||
|
||||
assert shell._init_agent() is False
|
||||
assert shell._init_agent() is True
|
||||
assert calls["count"] == 2
|
||||
assert shell.agent is not None
|
||||
|
||||
|
||||
def test_runtime_resolution_rebuilds_agent_on_routing_change(monkeypatch):
|
||||
cli = _import_cli()
|
||||
|
||||
def _runtime_resolve(**kwargs):
|
||||
return {
|
||||
"provider": "openai-codex",
|
||||
"api_mode": "codex_responses",
|
||||
"base_url": "https://same-endpoint.example/v1",
|
||||
"api_key": "same-key",
|
||||
"source": "env/config",
|
||||
}
|
||||
|
||||
monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
|
||||
monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
|
||||
|
||||
shell = cli.HermesCLI(model="gpt-5", compact=True, max_turns=1)
|
||||
shell.provider = "openrouter"
|
||||
shell.api_mode = "chat_completions"
|
||||
shell.base_url = "https://same-endpoint.example/v1"
|
||||
shell.api_key = "same-key"
|
||||
shell.agent = object()
|
||||
|
||||
assert shell._ensure_runtime_credentials() is True
|
||||
assert shell.agent is None
|
||||
assert shell.provider == "openai-codex"
|
||||
assert shell.api_mode == "codex_responses"
|
||||
|
||||
|
||||
def test_cmd_model_falls_back_to_auto_on_invalid_provider(monkeypatch, capsys):
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: {"model": {"default": "gpt-5", "provider": "invalid-provider"}},
|
||||
)
|
||||
monkeypatch.setattr("hermes_cli.config.save_config", lambda cfg: None)
|
||||
monkeypatch.setattr("hermes_cli.config.get_env_value", lambda key: "")
|
||||
monkeypatch.setattr("hermes_cli.config.save_env_value", lambda key, value: None)
|
||||
|
||||
def _resolve_provider(requested, **kwargs):
|
||||
if requested == "invalid-provider":
|
||||
raise AuthError("Unknown provider 'invalid-provider'.", code="invalid_provider")
|
||||
return "openrouter"
|
||||
|
||||
monkeypatch.setattr("hermes_cli.auth.resolve_provider", _resolve_provider)
|
||||
monkeypatch.setattr(hermes_main, "_prompt_provider_choice", lambda choices: len(choices) - 1)
|
||||
|
||||
hermes_main.cmd_model(SimpleNamespace())
|
||||
output = capsys.readouterr().out
|
||||
|
||||
assert "Warning:" in output
|
||||
assert "falling back to auto provider detection" in output.lower()
|
||||
assert "No change." in output
|
||||
180
tests/test_codex_execution_paths.py
Normal file
180
tests/test_codex_execution_paths.py
Normal file
|
|
@ -0,0 +1,180 @@
|
|||
import asyncio
|
||||
import sys
|
||||
import types
|
||||
from types import SimpleNamespace
|
||||
|
||||
|
||||
sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
|
||||
sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
|
||||
sys.modules.setdefault("fal_client", types.SimpleNamespace())
|
||||
|
||||
import cron.scheduler as cron_scheduler
|
||||
import gateway.run as gateway_run
|
||||
import run_agent
|
||||
from gateway.config import Platform
|
||||
from gateway.session import SessionSource
|
||||
|
||||
|
||||
def _patch_agent_bootstrap(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
run_agent,
|
||||
"get_tool_definitions",
|
||||
lambda **kwargs: [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "terminal",
|
||||
"description": "Run shell commands.",
|
||||
"parameters": {"type": "object", "properties": {}},
|
||||
},
|
||||
}
|
||||
],
|
||||
)
|
||||
monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
|
||||
|
||||
|
||||
def _codex_message_response(text: str):
|
||||
return SimpleNamespace(
|
||||
output=[
|
||||
SimpleNamespace(
|
||||
type="message",
|
||||
content=[SimpleNamespace(type="output_text", text=text)],
|
||||
)
|
||||
],
|
||||
usage=SimpleNamespace(input_tokens=5, output_tokens=3, total_tokens=8),
|
||||
status="completed",
|
||||
model="gpt-5-codex",
|
||||
)
|
||||
|
||||
|
||||
class _UnauthorizedError(RuntimeError):
|
||||
def __init__(self):
|
||||
super().__init__("Error code: 401 - unauthorized")
|
||||
self.status_code = 401
|
||||
|
||||
|
||||
class _FakeOpenAI:
|
||||
def __init__(self, **kwargs):
|
||||
self.kwargs = kwargs
|
||||
|
||||
def close(self):
|
||||
return None
|
||||
|
||||
|
||||
class _Codex401ThenSuccessAgent(run_agent.AIAgent):
|
||||
refresh_attempts = 0
|
||||
last_init = {}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs.setdefault("skip_context_files", True)
|
||||
kwargs.setdefault("skip_memory", True)
|
||||
kwargs.setdefault("max_iterations", 4)
|
||||
type(self).last_init = dict(kwargs)
|
||||
super().__init__(*args, **kwargs)
|
||||
self._cleanup_task_resources = lambda task_id: None
|
||||
self._persist_session = lambda messages, history=None: None
|
||||
self._save_trajectory = lambda messages, user_message, completed: None
|
||||
self._save_session_log = lambda messages: None
|
||||
|
||||
def _try_refresh_codex_client_credentials(self, *, force: bool = True) -> bool:
|
||||
type(self).refresh_attempts += 1
|
||||
return True
|
||||
|
||||
def run_conversation(self, user_message: str, conversation_history=None):
|
||||
calls = {"api": 0}
|
||||
|
||||
def _fake_api_call(api_kwargs):
|
||||
calls["api"] += 1
|
||||
if calls["api"] == 1:
|
||||
raise _UnauthorizedError()
|
||||
return _codex_message_response("Recovered via refresh")
|
||||
|
||||
self._interruptible_api_call = _fake_api_call
|
||||
return super().run_conversation(user_message, conversation_history=conversation_history)
|
||||
|
||||
|
||||
def test_cron_run_job_codex_path_handles_internal_401_refresh(monkeypatch):
|
||||
_patch_agent_bootstrap(monkeypatch)
|
||||
monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)
|
||||
monkeypatch.setattr(run_agent, "AIAgent", _Codex401ThenSuccessAgent)
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.runtime_provider.resolve_runtime_provider",
|
||||
lambda requested=None: {
|
||||
"provider": "openai-codex",
|
||||
"api_mode": "codex_responses",
|
||||
"base_url": "https://chatgpt.com/backend-api/codex",
|
||||
"api_key": "codex-token",
|
||||
},
|
||||
)
|
||||
monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
|
||||
|
||||
_Codex401ThenSuccessAgent.refresh_attempts = 0
|
||||
_Codex401ThenSuccessAgent.last_init = {}
|
||||
|
||||
success, output, final_response, error = cron_scheduler.run_job(
|
||||
{"id": "job-1", "name": "Codex Refresh Test", "prompt": "ping"}
|
||||
)
|
||||
|
||||
assert success is True
|
||||
assert error is None
|
||||
assert final_response == "Recovered via refresh"
|
||||
assert "Recovered via refresh" in output
|
||||
assert _Codex401ThenSuccessAgent.refresh_attempts == 1
|
||||
assert _Codex401ThenSuccessAgent.last_init["provider"] == "openai-codex"
|
||||
assert _Codex401ThenSuccessAgent.last_init["api_mode"] == "codex_responses"
|
||||
|
||||
|
||||
def test_gateway_run_agent_codex_path_handles_internal_401_refresh(monkeypatch):
|
||||
_patch_agent_bootstrap(monkeypatch)
|
||||
monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)
|
||||
monkeypatch.setattr(run_agent, "AIAgent", _Codex401ThenSuccessAgent)
|
||||
monkeypatch.setattr(
|
||||
gateway_run,
|
||||
"_resolve_runtime_agent_kwargs",
|
||||
lambda: {
|
||||
"provider": "openai-codex",
|
||||
"api_mode": "codex_responses",
|
||||
"base_url": "https://chatgpt.com/backend-api/codex",
|
||||
"api_key": "codex-token",
|
||||
},
|
||||
)
|
||||
monkeypatch.setenv("HERMES_TOOL_PROGRESS", "false")
|
||||
|
||||
_Codex401ThenSuccessAgent.refresh_attempts = 0
|
||||
_Codex401ThenSuccessAgent.last_init = {}
|
||||
|
||||
runner = gateway_run.GatewayRunner.__new__(gateway_run.GatewayRunner)
|
||||
runner.adapters = {}
|
||||
runner._ephemeral_system_prompt = ""
|
||||
runner._prefill_messages = []
|
||||
runner._reasoning_config = None
|
||||
runner._running_agents = {}
|
||||
from unittest.mock import MagicMock, AsyncMock
|
||||
runner.hooks = MagicMock()
|
||||
runner.hooks.emit = AsyncMock()
|
||||
runner.hooks.loaded_hooks = []
|
||||
runner._session_db = None
|
||||
|
||||
source = SessionSource(
|
||||
platform=Platform.LOCAL,
|
||||
chat_id="cli",
|
||||
chat_name="CLI",
|
||||
chat_type="dm",
|
||||
user_id="user-1",
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
runner._run_agent(
|
||||
message="ping",
|
||||
context_prompt="",
|
||||
history=[],
|
||||
source=source,
|
||||
session_id="session-1",
|
||||
session_key="agent:main:local:dm",
|
||||
)
|
||||
)
|
||||
|
||||
assert result["final_response"] == "Recovered via refresh"
|
||||
assert _Codex401ThenSuccessAgent.refresh_attempts == 1
|
||||
assert _Codex401ThenSuccessAgent.last_init["provider"] == "openai-codex"
|
||||
assert _Codex401ThenSuccessAgent.last_init["api_mode"] == "codex_responses"
|
||||
40
tests/test_codex_models.py
Normal file
40
tests/test_codex_models.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
import json
|
||||
|
||||
from hermes_cli.codex_models import DEFAULT_CODEX_MODELS, get_codex_model_ids
|
||||
|
||||
|
||||
def test_get_codex_model_ids_prioritizes_default_and_cache(tmp_path, monkeypatch):
|
||||
codex_home = tmp_path / "codex-home"
|
||||
codex_home.mkdir(parents=True, exist_ok=True)
|
||||
(codex_home / "config.toml").write_text('model = "gpt-5.2-codex"\n')
|
||||
(codex_home / "models_cache.json").write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"models": [
|
||||
{"slug": "gpt-5.3-codex", "priority": 20, "supported_in_api": True},
|
||||
{"slug": "gpt-5.1-codex", "priority": 5, "supported_in_api": True},
|
||||
{"slug": "gpt-4o", "priority": 1, "supported_in_api": True},
|
||||
{"slug": "gpt-5-hidden-codex", "priority": 2, "visibility": "hidden"},
|
||||
]
|
||||
}
|
||||
)
|
||||
)
|
||||
monkeypatch.setenv("CODEX_HOME", str(codex_home))
|
||||
|
||||
models = get_codex_model_ids()
|
||||
|
||||
assert models[0] == "gpt-5.2-codex"
|
||||
assert "gpt-5.1-codex" in models
|
||||
assert "gpt-5.3-codex" in models
|
||||
assert "gpt-4o" not in models
|
||||
assert "gpt-5-hidden-codex" not in models
|
||||
|
||||
|
||||
def test_get_codex_model_ids_falls_back_to_curated_defaults(tmp_path, monkeypatch):
|
||||
codex_home = tmp_path / "codex-home"
|
||||
codex_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("CODEX_HOME", str(codex_home))
|
||||
|
||||
models = get_codex_model_ids()
|
||||
|
||||
assert models[: len(DEFAULT_CODEX_MODELS)] == DEFAULT_CODEX_MODELS
|
||||
51
tests/test_external_credential_detection.py
Normal file
51
tests/test_external_credential_detection.py
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
"""Tests for detect_external_credentials() -- Phase 2 credential sync."""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from hermes_cli.auth import detect_external_credentials
|
||||
|
||||
|
||||
class TestDetectCodexCLI:
|
||||
def test_detects_valid_codex_auth(self, tmp_path):
|
||||
codex_dir = tmp_path / ".codex"
|
||||
codex_dir.mkdir()
|
||||
auth = codex_dir / "auth.json"
|
||||
auth.write_text(json.dumps({
|
||||
"tokens": {"access_token": "tok-123", "refresh_token": "ref-456"}
|
||||
}))
|
||||
with patch("hermes_cli.auth.resolve_codex_home_path", return_value=codex_dir):
|
||||
result = detect_external_credentials()
|
||||
codex_hits = [c for c in result if c["provider"] == "openai-codex"]
|
||||
assert len(codex_hits) == 1
|
||||
assert "Codex CLI" in codex_hits[0]["label"]
|
||||
assert str(auth) == codex_hits[0]["path"]
|
||||
|
||||
def test_skips_codex_without_access_token(self, tmp_path):
|
||||
codex_dir = tmp_path / ".codex"
|
||||
codex_dir.mkdir()
|
||||
(codex_dir / "auth.json").write_text(json.dumps({"tokens": {}}))
|
||||
with patch("hermes_cli.auth.resolve_codex_home_path", return_value=codex_dir):
|
||||
result = detect_external_credentials()
|
||||
assert not any(c["provider"] == "openai-codex" for c in result)
|
||||
|
||||
def test_skips_missing_codex_dir(self, tmp_path):
|
||||
with patch("hermes_cli.auth.resolve_codex_home_path", return_value=tmp_path / "nonexistent"):
|
||||
result = detect_external_credentials()
|
||||
assert not any(c["provider"] == "openai-codex" for c in result)
|
||||
|
||||
def test_skips_malformed_codex_auth(self, tmp_path):
|
||||
codex_dir = tmp_path / ".codex"
|
||||
codex_dir.mkdir()
|
||||
(codex_dir / "auth.json").write_text("{bad json")
|
||||
with patch("hermes_cli.auth.resolve_codex_home_path", return_value=codex_dir):
|
||||
result = detect_external_credentials()
|
||||
assert not any(c["provider"] == "openai-codex" for c in result)
|
||||
|
||||
def test_returns_empty_when_nothing_found(self, tmp_path):
|
||||
with patch("hermes_cli.auth.resolve_codex_home_path", return_value=tmp_path / ".codex"):
|
||||
result = detect_external_credentials()
|
||||
assert result == []
|
||||
225
tests/test_flush_memories_codex.py
Normal file
225
tests/test_flush_memories_codex.py
Normal file
|
|
@ -0,0 +1,225 @@
|
|||
"""Tests for flush_memories() working correctly across all provider modes.
|
||||
|
||||
Catches the bug where Codex mode called chat.completions.create on a
|
||||
Responses-only client, which would fail silently or with a 404.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import types
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import patch, MagicMock, call
|
||||
|
||||
import pytest
|
||||
|
||||
sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
|
||||
sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
|
||||
sys.modules.setdefault("fal_client", types.SimpleNamespace())
|
||||
|
||||
import run_agent
|
||||
|
||||
|
||||
class _FakeOpenAI:
|
||||
def __init__(self, **kwargs):
|
||||
self.kwargs = kwargs
|
||||
self.api_key = kwargs.get("api_key", "test")
|
||||
self.base_url = kwargs.get("base_url", "http://test")
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
|
||||
def _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter"):
|
||||
"""Build an AIAgent with mocked internals, ready for flush_memories testing."""
|
||||
monkeypatch.setattr(run_agent, "get_tool_definitions", lambda **kw: [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "memory",
|
||||
"description": "Manage memories.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"action": {"type": "string"},
|
||||
"target": {"type": "string"},
|
||||
"content": {"type": "string"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
])
|
||||
monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
|
||||
monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)
|
||||
|
||||
agent = run_agent.AIAgent(
|
||||
api_key="test-key",
|
||||
base_url="https://test.example.com/v1",
|
||||
provider=provider,
|
||||
api_mode=api_mode,
|
||||
max_iterations=4,
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
# Give it a valid memory store
|
||||
agent._memory_store = MagicMock()
|
||||
agent._memory_flush_min_turns = 1
|
||||
agent._user_turn_count = 5
|
||||
return agent
|
||||
|
||||
|
||||
def _chat_response_with_memory_call():
|
||||
"""Simulated chat completions response with a memory tool call."""
|
||||
return SimpleNamespace(
|
||||
choices=[SimpleNamespace(
|
||||
message=SimpleNamespace(
|
||||
content=None,
|
||||
tool_calls=[SimpleNamespace(
|
||||
function=SimpleNamespace(
|
||||
name="memory",
|
||||
arguments=json.dumps({
|
||||
"action": "add",
|
||||
"target": "notes",
|
||||
"content": "User prefers dark mode.",
|
||||
}),
|
||||
),
|
||||
)],
|
||||
),
|
||||
)],
|
||||
usage=SimpleNamespace(prompt_tokens=100, completion_tokens=20, total_tokens=120),
|
||||
)
|
||||
|
||||
|
||||
class TestFlushMemoriesUsesAuxiliaryClient:
|
||||
"""When an auxiliary client is available, flush_memories should use it
|
||||
instead of self.client -- especially critical in Codex mode."""
|
||||
|
||||
def test_flush_uses_auxiliary_when_available(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")
|
||||
|
||||
mock_aux_client = MagicMock()
|
||||
mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
|
||||
|
||||
with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
|
||||
messages = [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi there"},
|
||||
{"role": "user", "content": "Remember this"},
|
||||
]
|
||||
with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
|
||||
agent.flush_memories(messages)
|
||||
|
||||
mock_aux_client.chat.completions.create.assert_called_once()
|
||||
call_kwargs = mock_aux_client.chat.completions.create.call_args
|
||||
assert call_kwargs.kwargs.get("model") == "gpt-4o-mini" or call_kwargs[1].get("model") == "gpt-4o-mini"
|
||||
|
||||
def test_flush_uses_main_client_when_no_auxiliary(self, monkeypatch):
|
||||
"""Non-Codex mode with no auxiliary falls back to self.client."""
|
||||
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
|
||||
agent.client = MagicMock()
|
||||
agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
|
||||
|
||||
with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)):
|
||||
messages = [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi there"},
|
||||
{"role": "user", "content": "Save this"},
|
||||
]
|
||||
with patch("tools.memory_tool.memory_tool", return_value="Saved."):
|
||||
agent.flush_memories(messages)
|
||||
|
||||
agent.client.chat.completions.create.assert_called_once()
|
||||
|
||||
def test_flush_executes_memory_tool_calls(self, monkeypatch):
|
||||
"""Verify that memory tool calls from the flush response actually get executed."""
|
||||
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
|
||||
|
||||
mock_aux_client = MagicMock()
|
||||
mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
|
||||
|
||||
with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
|
||||
messages = [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi"},
|
||||
{"role": "user", "content": "Note this"},
|
||||
]
|
||||
with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
|
||||
agent.flush_memories(messages)
|
||||
|
||||
mock_memory.assert_called_once()
|
||||
call_kwargs = mock_memory.call_args
|
||||
assert call_kwargs.kwargs["action"] == "add"
|
||||
assert call_kwargs.kwargs["target"] == "notes"
|
||||
assert "dark mode" in call_kwargs.kwargs["content"]
|
||||
|
||||
def test_flush_strips_artifacts_from_messages(self, monkeypatch):
|
||||
"""After flush, the flush prompt and any response should be removed from messages."""
|
||||
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
|
||||
|
||||
mock_aux_client = MagicMock()
|
||||
mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
|
||||
|
||||
with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
|
||||
messages = [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi"},
|
||||
{"role": "user", "content": "Remember X"},
|
||||
]
|
||||
original_len = len(messages)
|
||||
with patch("tools.memory_tool.memory_tool", return_value="Saved."):
|
||||
agent.flush_memories(messages)
|
||||
|
||||
# Messages should not grow from the flush
|
||||
assert len(messages) <= original_len
|
||||
# No flush sentinel should remain
|
||||
for msg in messages:
|
||||
assert "_flush_sentinel" not in msg
|
||||
|
||||
|
||||
class TestFlushMemoriesCodexFallback:
|
||||
"""When no auxiliary client exists and we're in Codex mode, flush should
|
||||
use the Codex Responses API path instead of chat.completions."""
|
||||
|
||||
def test_codex_mode_no_aux_uses_responses_api(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")
|
||||
|
||||
codex_response = SimpleNamespace(
|
||||
output=[
|
||||
SimpleNamespace(
|
||||
type="function_call",
|
||||
call_id="call_1",
|
||||
name="memory",
|
||||
arguments=json.dumps({
|
||||
"action": "add",
|
||||
"target": "notes",
|
||||
"content": "Codex flush test",
|
||||
}),
|
||||
),
|
||||
],
|
||||
usage=SimpleNamespace(input_tokens=50, output_tokens=10, total_tokens=60),
|
||||
status="completed",
|
||||
model="gpt-5-codex",
|
||||
)
|
||||
|
||||
with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)), \
|
||||
patch.object(agent, "_run_codex_stream", return_value=codex_response) as mock_stream, \
|
||||
patch.object(agent, "_build_api_kwargs") as mock_build, \
|
||||
patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
|
||||
mock_build.return_value = {
|
||||
"model": "gpt-5-codex",
|
||||
"instructions": "test",
|
||||
"input": [],
|
||||
"tools": [],
|
||||
"max_output_tokens": 4096,
|
||||
}
|
||||
messages = [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi"},
|
||||
{"role": "user", "content": "Save this"},
|
||||
]
|
||||
agent.flush_memories(messages)
|
||||
|
||||
mock_stream.assert_called_once()
|
||||
mock_memory.assert_called_once()
|
||||
assert mock_memory.call_args.kwargs["content"] == "Codex flush test"
|
||||
460
tests/test_provider_parity.py
Normal file
460
tests/test_provider_parity.py
Normal file
|
|
@ -0,0 +1,460 @@
|
|||
"""Provider parity tests: verify that AIAgent builds correct API kwargs
|
||||
and handles responses properly for all supported providers.
|
||||
|
||||
Ensures changes to one provider path don't silently break another.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import types
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
|
||||
sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
|
||||
sys.modules.setdefault("fal_client", types.SimpleNamespace())
|
||||
|
||||
from run_agent import AIAgent
|
||||
|
||||
|
||||
# ── Helpers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
def _tool_defs(*names):
|
||||
return [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": n,
|
||||
"description": f"{n} tool",
|
||||
"parameters": {"type": "object", "properties": {}},
|
||||
},
|
||||
}
|
||||
for n in names
|
||||
]
|
||||
|
||||
|
||||
class _FakeOpenAI:
|
||||
def __init__(self, **kw):
|
||||
self.api_key = kw.get("api_key", "test")
|
||||
self.base_url = kw.get("base_url", "http://test")
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
|
||||
def _make_agent(monkeypatch, provider, api_mode="chat_completions", base_url="https://openrouter.ai/api/v1"):
|
||||
monkeypatch.setattr("run_agent.get_tool_definitions", lambda **kw: _tool_defs("web_search", "terminal"))
|
||||
monkeypatch.setattr("run_agent.check_toolset_requirements", lambda: {})
|
||||
monkeypatch.setattr("run_agent.OpenAI", _FakeOpenAI)
|
||||
return AIAgent(
|
||||
api_key="test-key",
|
||||
base_url=base_url,
|
||||
provider=provider,
|
||||
api_mode=api_mode,
|
||||
max_iterations=4,
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
|
||||
|
||||
# ── _build_api_kwargs tests ─────────────────────────────────────────────────
|
||||
|
||||
class TestBuildApiKwargsOpenRouter:
|
||||
def test_uses_chat_completions_format(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "openrouter")
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
assert "messages" in kwargs
|
||||
assert "model" in kwargs
|
||||
assert kwargs["messages"][-1]["content"] == "hi"
|
||||
|
||||
def test_includes_reasoning_in_extra_body(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "openrouter")
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
extra = kwargs.get("extra_body", {})
|
||||
assert "reasoning" in extra
|
||||
assert extra["reasoning"]["enabled"] is True
|
||||
|
||||
def test_includes_tools(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "openrouter")
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
assert "tools" in kwargs
|
||||
tool_names = [t["function"]["name"] for t in kwargs["tools"]]
|
||||
assert "web_search" in tool_names
|
||||
|
||||
def test_no_responses_api_fields(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "openrouter")
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
assert "input" not in kwargs
|
||||
assert "instructions" not in kwargs
|
||||
assert "store" not in kwargs
|
||||
|
||||
|
||||
class TestBuildApiKwargsNousPortal:
|
||||
def test_includes_nous_product_tags(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1")
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
extra = kwargs.get("extra_body", {})
|
||||
assert extra.get("tags") == ["product=hermes-agent"]
|
||||
|
||||
def test_uses_chat_completions_format(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1")
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
assert "messages" in kwargs
|
||||
assert "input" not in kwargs
|
||||
|
||||
|
||||
class TestBuildApiKwargsCustomEndpoint:
|
||||
def test_uses_chat_completions_format(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "custom", base_url="http://localhost:1234/v1")
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
assert "messages" in kwargs
|
||||
assert "input" not in kwargs
|
||||
|
||||
def test_no_openrouter_extra_body(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "custom", base_url="http://localhost:1234/v1")
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
extra = kwargs.get("extra_body", {})
|
||||
assert "reasoning" not in extra
|
||||
|
||||
|
||||
class TestBuildApiKwargsCodex:
|
||||
def test_uses_responses_api_format(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
|
||||
base_url="https://chatgpt.com/backend-api/codex")
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
assert "input" in kwargs
|
||||
assert "instructions" in kwargs
|
||||
assert "messages" not in kwargs
|
||||
assert kwargs["store"] is False
|
||||
|
||||
def test_includes_reasoning_config(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
|
||||
base_url="https://chatgpt.com/backend-api/codex")
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
assert "reasoning" in kwargs
|
||||
assert kwargs["reasoning"]["effort"] == "medium"
|
||||
|
||||
def test_includes_encrypted_content_in_include(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
|
||||
base_url="https://chatgpt.com/backend-api/codex")
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
assert "reasoning.encrypted_content" in kwargs.get("include", [])
|
||||
|
||||
def test_tools_converted_to_responses_format(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
|
||||
base_url="https://chatgpt.com/backend-api/codex")
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
tools = kwargs.get("tools", [])
|
||||
assert len(tools) > 0
|
||||
# Responses format has "name" at top level, not nested under "function"
|
||||
assert "name" in tools[0]
|
||||
assert "function" not in tools[0]
|
||||
|
||||
|
||||
# ── Message conversion tests ────────────────────────────────────────────────
|
||||
|
||||
class TestChatMessagesToResponsesInput:
|
||||
"""Verify _chat_messages_to_responses_input for Codex mode."""
|
||||
|
||||
def test_user_message_passes_through(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
|
||||
base_url="https://chatgpt.com/backend-api/codex")
|
||||
messages = [{"role": "user", "content": "hello"}]
|
||||
items = agent._chat_messages_to_responses_input(messages)
|
||||
assert items == [{"role": "user", "content": "hello"}]
|
||||
|
||||
def test_system_messages_filtered(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
|
||||
base_url="https://chatgpt.com/backend-api/codex")
|
||||
messages = [
|
||||
{"role": "system", "content": "be helpful"},
|
||||
{"role": "user", "content": "hello"},
|
||||
]
|
||||
items = agent._chat_messages_to_responses_input(messages)
|
||||
assert len(items) == 1
|
||||
assert items[0]["role"] == "user"
|
||||
|
||||
def test_assistant_tool_calls_become_function_call_items(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
|
||||
base_url="https://chatgpt.com/backend-api/codex")
|
||||
messages = [{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [{
|
||||
"id": "call_abc",
|
||||
"call_id": "call_abc",
|
||||
"function": {"name": "web_search", "arguments": '{"query": "test"}'},
|
||||
}],
|
||||
}]
|
||||
items = agent._chat_messages_to_responses_input(messages)
|
||||
fc_items = [i for i in items if i.get("type") == "function_call"]
|
||||
assert len(fc_items) == 1
|
||||
assert fc_items[0]["name"] == "web_search"
|
||||
assert fc_items[0]["call_id"] == "call_abc"
|
||||
|
||||
def test_tool_results_become_function_call_output(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
|
||||
base_url="https://chatgpt.com/backend-api/codex")
|
||||
messages = [{"role": "tool", "tool_call_id": "call_abc", "content": "result here"}]
|
||||
items = agent._chat_messages_to_responses_input(messages)
|
||||
assert items[0]["type"] == "function_call_output"
|
||||
assert items[0]["call_id"] == "call_abc"
|
||||
assert items[0]["output"] == "result here"
|
||||
|
||||
def test_encrypted_reasoning_replayed(self, monkeypatch):
|
||||
"""Encrypted reasoning items from previous turns must be included in input."""
|
||||
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
|
||||
base_url="https://chatgpt.com/backend-api/codex")
|
||||
messages = [
|
||||
{"role": "user", "content": "think about this"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "I thought about it.",
|
||||
"codex_reasoning_items": [
|
||||
{"type": "reasoning", "id": "rs_abc", "encrypted_content": "gAAAA_test_blob"},
|
||||
],
|
||||
},
|
||||
{"role": "user", "content": "continue"},
|
||||
]
|
||||
items = agent._chat_messages_to_responses_input(messages)
|
||||
reasoning_items = [i for i in items if i.get("type") == "reasoning"]
|
||||
assert len(reasoning_items) == 1
|
||||
assert reasoning_items[0]["encrypted_content"] == "gAAAA_test_blob"
|
||||
|
||||
def test_no_reasoning_items_for_non_codex_messages(self, monkeypatch):
|
||||
"""Messages without codex_reasoning_items should not inject anything."""
|
||||
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
|
||||
base_url="https://chatgpt.com/backend-api/codex")
|
||||
messages = [
|
||||
{"role": "assistant", "content": "hi"},
|
||||
{"role": "user", "content": "hello"},
|
||||
]
|
||||
items = agent._chat_messages_to_responses_input(messages)
|
||||
reasoning_items = [i for i in items if i.get("type") == "reasoning"]
|
||||
assert len(reasoning_items) == 0
|
||||
|
||||
|
||||
# ── Response normalization tests ─────────────────────────────────────────────
|
||||
|
||||
class TestNormalizeCodexResponse:
|
||||
"""Verify _normalize_codex_response extracts all fields correctly."""
|
||||
|
||||
def _make_codex_agent(self, monkeypatch):
|
||||
return _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
|
||||
base_url="https://chatgpt.com/backend-api/codex")
|
||||
|
||||
def test_text_response(self, monkeypatch):
|
||||
agent = self._make_codex_agent(monkeypatch)
|
||||
response = SimpleNamespace(
|
||||
output=[
|
||||
SimpleNamespace(type="message", status="completed",
|
||||
content=[SimpleNamespace(type="output_text", text="Hello!")],
|
||||
phase="final_answer"),
|
||||
],
|
||||
status="completed",
|
||||
)
|
||||
msg, reason = agent._normalize_codex_response(response)
|
||||
assert msg.content == "Hello!"
|
||||
assert reason == "stop"
|
||||
|
||||
def test_reasoning_summary_extracted(self, monkeypatch):
|
||||
agent = self._make_codex_agent(monkeypatch)
|
||||
response = SimpleNamespace(
|
||||
output=[
|
||||
SimpleNamespace(type="reasoning",
|
||||
encrypted_content="gAAAA_blob",
|
||||
summary=[SimpleNamespace(type="summary_text", text="Thinking about math")],
|
||||
id="rs_123", status=None),
|
||||
SimpleNamespace(type="message", status="completed",
|
||||
content=[SimpleNamespace(type="output_text", text="42")],
|
||||
phase="final_answer"),
|
||||
],
|
||||
status="completed",
|
||||
)
|
||||
msg, reason = agent._normalize_codex_response(response)
|
||||
assert msg.content == "42"
|
||||
assert "math" in msg.reasoning
|
||||
assert reason == "stop"
|
||||
|
||||
def test_encrypted_content_captured(self, monkeypatch):
|
||||
agent = self._make_codex_agent(monkeypatch)
|
||||
response = SimpleNamespace(
|
||||
output=[
|
||||
SimpleNamespace(type="reasoning",
|
||||
encrypted_content="gAAAA_secret_blob_123",
|
||||
summary=[SimpleNamespace(type="summary_text", text="Thinking")],
|
||||
id="rs_456", status=None),
|
||||
SimpleNamespace(type="message", status="completed",
|
||||
content=[SimpleNamespace(type="output_text", text="done")],
|
||||
phase="final_answer"),
|
||||
],
|
||||
status="completed",
|
||||
)
|
||||
msg, reason = agent._normalize_codex_response(response)
|
||||
assert msg.codex_reasoning_items is not None
|
||||
assert len(msg.codex_reasoning_items) == 1
|
||||
assert msg.codex_reasoning_items[0]["encrypted_content"] == "gAAAA_secret_blob_123"
|
||||
assert msg.codex_reasoning_items[0]["id"] == "rs_456"
|
||||
|
||||
def test_no_encrypted_content_when_missing(self, monkeypatch):
|
||||
agent = self._make_codex_agent(monkeypatch)
|
||||
response = SimpleNamespace(
|
||||
output=[
|
||||
SimpleNamespace(type="message", status="completed",
|
||||
content=[SimpleNamespace(type="output_text", text="no reasoning")],
|
||||
phase="final_answer"),
|
||||
],
|
||||
status="completed",
|
||||
)
|
||||
msg, reason = agent._normalize_codex_response(response)
|
||||
assert msg.codex_reasoning_items is None
|
||||
|
||||
def test_tool_calls_extracted(self, monkeypatch):
|
||||
agent = self._make_codex_agent(monkeypatch)
|
||||
response = SimpleNamespace(
|
||||
output=[
|
||||
SimpleNamespace(type="function_call", status="completed",
|
||||
call_id="call_xyz", name="web_search",
|
||||
arguments='{"query":"test"}', id="fc_xyz"),
|
||||
],
|
||||
status="completed",
|
||||
)
|
||||
msg, reason = agent._normalize_codex_response(response)
|
||||
assert reason == "tool_calls"
|
||||
assert len(msg.tool_calls) == 1
|
||||
assert msg.tool_calls[0].function.name == "web_search"
|
||||
|
||||
|
||||
# ── Chat completions response handling (OpenRouter/Nous) ─────────────────────
|
||||
|
||||
class TestBuildAssistantMessage:
|
||||
"""Verify _build_assistant_message works for all provider response formats."""
|
||||
|
||||
def test_openrouter_reasoning_fields(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "openrouter")
|
||||
msg = SimpleNamespace(
|
||||
content="answer",
|
||||
tool_calls=None,
|
||||
reasoning="I thought about it",
|
||||
reasoning_content=None,
|
||||
reasoning_details=None,
|
||||
)
|
||||
result = agent._build_assistant_message(msg, "stop")
|
||||
assert result["content"] == "answer"
|
||||
assert result["reasoning"] == "I thought about it"
|
||||
assert "codex_reasoning_items" not in result
|
||||
|
||||
def test_openrouter_reasoning_details_preserved_unmodified(self, monkeypatch):
|
||||
"""reasoning_details must be passed back exactly as received for
|
||||
multi-turn continuity (OpenRouter, Anthropic, OpenAI all need this)."""
|
||||
agent = _make_agent(monkeypatch, "openrouter")
|
||||
original_detail = {
|
||||
"type": "thinking",
|
||||
"thinking": "deep thoughts here",
|
||||
"signature": "sig123_opaque_blob",
|
||||
"encrypted_content": "some_provider_blob",
|
||||
"extra_field": "should_not_be_dropped",
|
||||
}
|
||||
msg = SimpleNamespace(
|
||||
content="answer",
|
||||
tool_calls=None,
|
||||
reasoning=None,
|
||||
reasoning_content=None,
|
||||
reasoning_details=[original_detail],
|
||||
)
|
||||
result = agent._build_assistant_message(msg, "stop")
|
||||
stored = result["reasoning_details"][0]
|
||||
# ALL fields must survive, not just type/text/signature
|
||||
assert stored["signature"] == "sig123_opaque_blob"
|
||||
assert stored["encrypted_content"] == "some_provider_blob"
|
||||
assert stored["extra_field"] == "should_not_be_dropped"
|
||||
assert stored["thinking"] == "deep thoughts here"
|
||||
|
||||
def test_codex_preserves_encrypted_reasoning(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
|
||||
base_url="https://chatgpt.com/backend-api/codex")
|
||||
msg = SimpleNamespace(
|
||||
content="result",
|
||||
tool_calls=None,
|
||||
reasoning="summary text",
|
||||
reasoning_content=None,
|
||||
reasoning_details=None,
|
||||
codex_reasoning_items=[
|
||||
{"type": "reasoning", "id": "rs_1", "encrypted_content": "gAAAA_blob"},
|
||||
],
|
||||
)
|
||||
result = agent._build_assistant_message(msg, "stop")
|
||||
assert result["codex_reasoning_items"] == [
|
||||
{"type": "reasoning", "id": "rs_1", "encrypted_content": "gAAAA_blob"},
|
||||
]
|
||||
|
||||
def test_plain_message_no_codex_items(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "openrouter")
|
||||
msg = SimpleNamespace(
|
||||
content="simple",
|
||||
tool_calls=None,
|
||||
reasoning=None,
|
||||
reasoning_content=None,
|
||||
reasoning_details=None,
|
||||
)
|
||||
result = agent._build_assistant_message(msg, "stop")
|
||||
assert "codex_reasoning_items" not in result
|
||||
|
||||
|
||||
# ── Auxiliary client provider resolution ─────────────────────────────────────
|
||||
|
||||
class TestAuxiliaryClientProviderPriority:
|
||||
"""Verify auxiliary client resolution doesn't break for any provider."""
|
||||
|
||||
def test_openrouter_always_wins(self, monkeypatch):
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
from agent.auxiliary_client import get_text_auxiliary_client
|
||||
with patch("agent.auxiliary_client.OpenAI") as mock:
|
||||
client, model = get_text_auxiliary_client()
|
||||
assert model == "google/gemini-3-flash-preview"
|
||||
assert "openrouter" in str(mock.call_args.kwargs["base_url"]).lower()
|
||||
|
||||
def test_nous_when_no_openrouter(self, monkeypatch):
|
||||
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
||||
from agent.auxiliary_client import get_text_auxiliary_client
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "nous-tok"}), \
|
||||
patch("agent.auxiliary_client.OpenAI") as mock:
|
||||
client, model = get_text_auxiliary_client()
|
||||
assert model == "gemini-3-flash"
|
||||
|
||||
def test_custom_endpoint_when_no_nous(self, monkeypatch):
|
||||
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
||||
monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "local-key")
|
||||
from agent.auxiliary_client import get_text_auxiliary_client
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||
patch("agent.auxiliary_client.OpenAI") as mock:
|
||||
client, model = get_text_auxiliary_client()
|
||||
assert mock.call_args.kwargs["base_url"] == "http://localhost:1234/v1"
|
||||
|
||||
def test_codex_fallback_last_resort(self, monkeypatch):
|
||||
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
||||
monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
|
||||
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
||||
from agent.auxiliary_client import get_text_auxiliary_client, CodexAuxiliaryClient
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||
patch("agent.auxiliary_client._read_codex_access_token", return_value="codex-tok"), \
|
||||
patch("agent.auxiliary_client.OpenAI"):
|
||||
client, model = get_text_auxiliary_client()
|
||||
assert model == "gpt-5.3-codex"
|
||||
assert isinstance(client, CodexAuxiliaryClient)
|
||||
748
tests/test_run_agent_codex_responses.py
Normal file
748
tests/test_run_agent_codex_responses.py
Normal file
|
|
@ -0,0 +1,748 @@
|
|||
import sys
|
||||
import types
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
|
||||
sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
|
||||
sys.modules.setdefault("fal_client", types.SimpleNamespace())
|
||||
|
||||
import run_agent
|
||||
|
||||
|
||||
def _patch_agent_bootstrap(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
run_agent,
|
||||
"get_tool_definitions",
|
||||
lambda **kwargs: [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "terminal",
|
||||
"description": "Run shell commands.",
|
||||
"parameters": {"type": "object", "properties": {}},
|
||||
},
|
||||
}
|
||||
],
|
||||
)
|
||||
monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
|
||||
|
||||
|
||||
def _build_agent(monkeypatch):
|
||||
_patch_agent_bootstrap(monkeypatch)
|
||||
|
||||
agent = run_agent.AIAgent(
|
||||
model="gpt-5-codex",
|
||||
base_url="https://chatgpt.com/backend-api/codex",
|
||||
api_key="codex-token",
|
||||
quiet_mode=True,
|
||||
max_iterations=4,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
agent._cleanup_task_resources = lambda task_id: None
|
||||
agent._persist_session = lambda messages, history=None: None
|
||||
agent._save_trajectory = lambda messages, user_message, completed: None
|
||||
agent._save_session_log = lambda messages: None
|
||||
return agent
|
||||
|
||||
|
||||
def _codex_message_response(text: str):
|
||||
return SimpleNamespace(
|
||||
output=[
|
||||
SimpleNamespace(
|
||||
type="message",
|
||||
content=[SimpleNamespace(type="output_text", text=text)],
|
||||
)
|
||||
],
|
||||
usage=SimpleNamespace(input_tokens=5, output_tokens=3, total_tokens=8),
|
||||
status="completed",
|
||||
model="gpt-5-codex",
|
||||
)
|
||||
|
||||
|
||||
def _codex_tool_call_response():
|
||||
return SimpleNamespace(
|
||||
output=[
|
||||
SimpleNamespace(
|
||||
type="function_call",
|
||||
id="fc_1",
|
||||
call_id="call_1",
|
||||
name="terminal",
|
||||
arguments="{}",
|
||||
)
|
||||
],
|
||||
usage=SimpleNamespace(input_tokens=12, output_tokens=4, total_tokens=16),
|
||||
status="completed",
|
||||
model="gpt-5-codex",
|
||||
)
|
||||
|
||||
|
||||
def _codex_incomplete_message_response(text: str):
|
||||
return SimpleNamespace(
|
||||
output=[
|
||||
SimpleNamespace(
|
||||
type="message",
|
||||
status="in_progress",
|
||||
content=[SimpleNamespace(type="output_text", text=text)],
|
||||
)
|
||||
],
|
||||
usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
|
||||
status="in_progress",
|
||||
model="gpt-5-codex",
|
||||
)
|
||||
|
||||
|
||||
def _codex_commentary_message_response(text: str):
|
||||
return SimpleNamespace(
|
||||
output=[
|
||||
SimpleNamespace(
|
||||
type="message",
|
||||
phase="commentary",
|
||||
status="completed",
|
||||
content=[SimpleNamespace(type="output_text", text=text)],
|
||||
)
|
||||
],
|
||||
usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
|
||||
status="completed",
|
||||
model="gpt-5-codex",
|
||||
)
|
||||
|
||||
|
||||
def _codex_ack_message_response(text: str):
|
||||
return SimpleNamespace(
|
||||
output=[
|
||||
SimpleNamespace(
|
||||
type="message",
|
||||
status="completed",
|
||||
content=[SimpleNamespace(type="output_text", text=text)],
|
||||
)
|
||||
],
|
||||
usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
|
||||
status="completed",
|
||||
model="gpt-5-codex",
|
||||
)
|
||||
|
||||
|
||||
class _FakeResponsesStream:
|
||||
def __init__(self, *, final_response=None, final_error=None):
|
||||
self._final_response = final_response
|
||||
self._final_error = final_error
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def __iter__(self):
|
||||
return iter(())
|
||||
|
||||
def get_final_response(self):
|
||||
if self._final_error is not None:
|
||||
raise self._final_error
|
||||
return self._final_response
|
||||
|
||||
|
||||
class _FakeCreateStream:
|
||||
def __init__(self, events):
|
||||
self._events = list(events)
|
||||
self.closed = False
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._events)
|
||||
|
||||
def close(self):
|
||||
self.closed = True
|
||||
|
||||
|
||||
def _codex_request_kwargs():
|
||||
return {
|
||||
"model": "gpt-5-codex",
|
||||
"instructions": "You are Hermes.",
|
||||
"input": [{"role": "user", "content": "Ping"}],
|
||||
"tools": None,
|
||||
"store": False,
|
||||
}
|
||||
|
||||
|
||||
def test_api_mode_uses_explicit_provider_when_codex(monkeypatch):
|
||||
_patch_agent_bootstrap(monkeypatch)
|
||||
agent = run_agent.AIAgent(
|
||||
model="gpt-5-codex",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
provider="openai-codex",
|
||||
api_key="codex-token",
|
||||
quiet_mode=True,
|
||||
max_iterations=1,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
assert agent.api_mode == "codex_responses"
|
||||
assert agent.provider == "openai-codex"
|
||||
|
||||
|
||||
def test_api_mode_normalizes_provider_case(monkeypatch):
|
||||
_patch_agent_bootstrap(monkeypatch)
|
||||
agent = run_agent.AIAgent(
|
||||
model="gpt-5-codex",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
provider="OpenAI-Codex",
|
||||
api_key="codex-token",
|
||||
quiet_mode=True,
|
||||
max_iterations=1,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
assert agent.provider == "openai-codex"
|
||||
assert agent.api_mode == "codex_responses"
|
||||
|
||||
|
||||
def test_api_mode_respects_explicit_openrouter_provider_over_codex_url(monkeypatch):
|
||||
_patch_agent_bootstrap(monkeypatch)
|
||||
agent = run_agent.AIAgent(
|
||||
model="gpt-5-codex",
|
||||
base_url="https://chatgpt.com/backend-api/codex",
|
||||
provider="openrouter",
|
||||
api_key="test-token",
|
||||
quiet_mode=True,
|
||||
max_iterations=1,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
assert agent.api_mode == "chat_completions"
|
||||
assert agent.provider == "openrouter"
|
||||
|
||||
|
||||
def test_build_api_kwargs_codex(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
kwargs = agent._build_api_kwargs(
|
||||
[
|
||||
{"role": "system", "content": "You are Hermes."},
|
||||
{"role": "user", "content": "Ping"},
|
||||
]
|
||||
)
|
||||
|
||||
assert kwargs["model"] == "gpt-5-codex"
|
||||
assert kwargs["instructions"] == "You are Hermes."
|
||||
assert kwargs["store"] is False
|
||||
assert isinstance(kwargs["input"], list)
|
||||
assert kwargs["input"][0]["role"] == "user"
|
||||
assert kwargs["tools"][0]["type"] == "function"
|
||||
assert kwargs["tools"][0]["name"] == "terminal"
|
||||
assert kwargs["tools"][0]["strict"] is False
|
||||
assert "function" not in kwargs["tools"][0]
|
||||
assert kwargs["store"] is False
|
||||
assert "timeout" not in kwargs
|
||||
assert "max_tokens" not in kwargs
|
||||
assert "extra_body" not in kwargs
|
||||
|
||||
|
||||
def test_run_codex_stream_retries_when_completed_event_missing(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
calls = {"stream": 0}
|
||||
|
||||
def _fake_stream(**kwargs):
|
||||
calls["stream"] += 1
|
||||
if calls["stream"] == 1:
|
||||
return _FakeResponsesStream(
|
||||
final_error=RuntimeError("Didn't receive a `response.completed` event.")
|
||||
)
|
||||
return _FakeResponsesStream(final_response=_codex_message_response("stream ok"))
|
||||
|
||||
agent.client = SimpleNamespace(
|
||||
responses=SimpleNamespace(
|
||||
stream=_fake_stream,
|
||||
create=lambda **kwargs: _codex_message_response("fallback"),
|
||||
)
|
||||
)
|
||||
|
||||
response = agent._run_codex_stream(_codex_request_kwargs())
|
||||
assert calls["stream"] == 2
|
||||
assert response.output[0].content[0].text == "stream ok"
|
||||
|
||||
|
||||
def test_run_codex_stream_falls_back_to_create_after_stream_completion_error(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
calls = {"stream": 0, "create": 0}
|
||||
|
||||
def _fake_stream(**kwargs):
|
||||
calls["stream"] += 1
|
||||
return _FakeResponsesStream(
|
||||
final_error=RuntimeError("Didn't receive a `response.completed` event.")
|
||||
)
|
||||
|
||||
def _fake_create(**kwargs):
|
||||
calls["create"] += 1
|
||||
return _codex_message_response("create fallback ok")
|
||||
|
||||
agent.client = SimpleNamespace(
|
||||
responses=SimpleNamespace(
|
||||
stream=_fake_stream,
|
||||
create=_fake_create,
|
||||
)
|
||||
)
|
||||
|
||||
response = agent._run_codex_stream(_codex_request_kwargs())
|
||||
assert calls["stream"] == 2
|
||||
assert calls["create"] == 1
|
||||
assert response.output[0].content[0].text == "create fallback ok"
|
||||
|
||||
|
||||
def test_run_codex_stream_fallback_parses_create_stream_events(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
calls = {"stream": 0, "create": 0}
|
||||
create_stream = _FakeCreateStream(
|
||||
[
|
||||
SimpleNamespace(type="response.created"),
|
||||
SimpleNamespace(type="response.in_progress"),
|
||||
SimpleNamespace(type="response.completed", response=_codex_message_response("streamed create ok")),
|
||||
]
|
||||
)
|
||||
|
||||
def _fake_stream(**kwargs):
|
||||
calls["stream"] += 1
|
||||
return _FakeResponsesStream(
|
||||
final_error=RuntimeError("Didn't receive a `response.completed` event.")
|
||||
)
|
||||
|
||||
def _fake_create(**kwargs):
|
||||
calls["create"] += 1
|
||||
assert kwargs.get("stream") is True
|
||||
return create_stream
|
||||
|
||||
agent.client = SimpleNamespace(
|
||||
responses=SimpleNamespace(
|
||||
stream=_fake_stream,
|
||||
create=_fake_create,
|
||||
)
|
||||
)
|
||||
|
||||
response = agent._run_codex_stream(_codex_request_kwargs())
|
||||
assert calls["stream"] == 2
|
||||
assert calls["create"] == 1
|
||||
assert create_stream.closed is True
|
||||
assert response.output[0].content[0].text == "streamed create ok"
|
||||
|
||||
|
||||
def test_run_conversation_codex_plain_text(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: _codex_message_response("OK"))
|
||||
|
||||
result = agent.run_conversation("Say OK")
|
||||
|
||||
assert result["completed"] is True
|
||||
assert result["final_response"] == "OK"
|
||||
assert result["messages"][-1]["role"] == "assistant"
|
||||
assert result["messages"][-1]["content"] == "OK"
|
||||
|
||||
|
||||
def test_run_conversation_codex_refreshes_after_401_and_retries(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
calls = {"api": 0, "refresh": 0}
|
||||
|
||||
class _UnauthorizedError(RuntimeError):
|
||||
def __init__(self):
|
||||
super().__init__("Error code: 401 - unauthorized")
|
||||
self.status_code = 401
|
||||
|
||||
def _fake_api_call(api_kwargs):
|
||||
calls["api"] += 1
|
||||
if calls["api"] == 1:
|
||||
raise _UnauthorizedError()
|
||||
return _codex_message_response("Recovered after refresh")
|
||||
|
||||
def _fake_refresh(*, force=True):
|
||||
calls["refresh"] += 1
|
||||
assert force is True
|
||||
return True
|
||||
|
||||
monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call)
|
||||
monkeypatch.setattr(agent, "_try_refresh_codex_client_credentials", _fake_refresh)
|
||||
|
||||
result = agent.run_conversation("Say OK")
|
||||
|
||||
assert calls["api"] == 2
|
||||
assert calls["refresh"] == 1
|
||||
assert result["completed"] is True
|
||||
assert result["final_response"] == "Recovered after refresh"
|
||||
|
||||
|
||||
def test_try_refresh_codex_client_credentials_rebuilds_client(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
closed = {"value": False}
|
||||
rebuilt = {"kwargs": None}
|
||||
|
||||
class _ExistingClient:
|
||||
def close(self):
|
||||
closed["value"] = True
|
||||
|
||||
class _RebuiltClient:
|
||||
pass
|
||||
|
||||
def _fake_openai(**kwargs):
|
||||
rebuilt["kwargs"] = kwargs
|
||||
return _RebuiltClient()
|
||||
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.auth.resolve_codex_runtime_credentials",
|
||||
lambda force_refresh=True: {
|
||||
"api_key": "new-codex-token",
|
||||
"base_url": "https://chatgpt.com/backend-api/codex",
|
||||
},
|
||||
)
|
||||
monkeypatch.setattr(run_agent, "OpenAI", _fake_openai)
|
||||
|
||||
agent.client = _ExistingClient()
|
||||
ok = agent._try_refresh_codex_client_credentials(force=True)
|
||||
|
||||
assert ok is True
|
||||
assert closed["value"] is True
|
||||
assert rebuilt["kwargs"]["api_key"] == "new-codex-token"
|
||||
assert rebuilt["kwargs"]["base_url"] == "https://chatgpt.com/backend-api/codex"
|
||||
assert isinstance(agent.client, _RebuiltClient)
|
||||
|
||||
|
||||
def test_run_conversation_codex_tool_round_trip(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
responses = [_codex_tool_call_response(), _codex_message_response("done")]
|
||||
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
|
||||
|
||||
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
|
||||
for call in assistant_message.tool_calls:
|
||||
messages.append(
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": call.id,
|
||||
"content": '{"ok":true}',
|
||||
}
|
||||
)
|
||||
|
||||
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
|
||||
|
||||
result = agent.run_conversation("run a command")
|
||||
|
||||
assert result["completed"] is True
|
||||
assert result["final_response"] == "done"
|
||||
assert any(msg.get("tool_calls") for msg in result["messages"] if msg.get("role") == "assistant")
|
||||
assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
|
||||
|
||||
|
||||
def test_chat_messages_to_responses_input_uses_call_id_for_function_call(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
items = agent._chat_messages_to_responses_input(
|
||||
[
|
||||
{"role": "user", "content": "Run terminal"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_abc123",
|
||||
"type": "function",
|
||||
"function": {"name": "terminal", "arguments": "{}"},
|
||||
}
|
||||
],
|
||||
},
|
||||
{"role": "tool", "tool_call_id": "call_abc123", "content": '{"ok":true}'},
|
||||
]
|
||||
)
|
||||
|
||||
function_call = next(item for item in items if item.get("type") == "function_call")
|
||||
function_output = next(item for item in items if item.get("type") == "function_call_output")
|
||||
|
||||
assert function_call["call_id"] == "call_abc123"
|
||||
assert "id" not in function_call
|
||||
assert function_output["call_id"] == "call_abc123"
|
||||
|
||||
|
||||
def test_chat_messages_to_responses_input_accepts_call_pipe_fc_ids(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
items = agent._chat_messages_to_responses_input(
|
||||
[
|
||||
{"role": "user", "content": "Run terminal"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_pair123|fc_pair123",
|
||||
"type": "function",
|
||||
"function": {"name": "terminal", "arguments": "{}"},
|
||||
}
|
||||
],
|
||||
},
|
||||
{"role": "tool", "tool_call_id": "call_pair123|fc_pair123", "content": '{"ok":true}'},
|
||||
]
|
||||
)
|
||||
|
||||
function_call = next(item for item in items if item.get("type") == "function_call")
|
||||
function_output = next(item for item in items if item.get("type") == "function_call_output")
|
||||
|
||||
assert function_call["call_id"] == "call_pair123"
|
||||
assert "id" not in function_call
|
||||
assert function_output["call_id"] == "call_pair123"
|
||||
|
||||
|
||||
def test_preflight_codex_api_kwargs_strips_optional_function_call_id(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
preflight = agent._preflight_codex_api_kwargs(
|
||||
{
|
||||
"model": "gpt-5-codex",
|
||||
"instructions": "You are Hermes.",
|
||||
"input": [
|
||||
{"role": "user", "content": "hi"},
|
||||
{
|
||||
"type": "function_call",
|
||||
"id": "call_bad",
|
||||
"call_id": "call_good",
|
||||
"name": "terminal",
|
||||
"arguments": "{}",
|
||||
},
|
||||
],
|
||||
"tools": [],
|
||||
"store": False,
|
||||
}
|
||||
)
|
||||
|
||||
fn_call = next(item for item in preflight["input"] if item.get("type") == "function_call")
|
||||
assert fn_call["call_id"] == "call_good"
|
||||
assert "id" not in fn_call
|
||||
|
||||
|
||||
def test_preflight_codex_api_kwargs_rejects_function_call_output_without_call_id(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
|
||||
with pytest.raises(ValueError, match="function_call_output is missing call_id"):
|
||||
agent._preflight_codex_api_kwargs(
|
||||
{
|
||||
"model": "gpt-5-codex",
|
||||
"instructions": "You are Hermes.",
|
||||
"input": [{"type": "function_call_output", "output": "{}"}],
|
||||
"tools": [],
|
||||
"store": False,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def test_preflight_codex_api_kwargs_rejects_unsupported_request_fields(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
kwargs = _codex_request_kwargs()
|
||||
kwargs["some_unknown_field"] = "value"
|
||||
|
||||
with pytest.raises(ValueError, match="unsupported field"):
|
||||
agent._preflight_codex_api_kwargs(kwargs)
|
||||
|
||||
|
||||
def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
kwargs = _codex_request_kwargs()
|
||||
kwargs["reasoning"] = {"effort": "high", "summary": "auto"}
|
||||
kwargs["include"] = ["reasoning.encrypted_content"]
|
||||
kwargs["temperature"] = 0.7
|
||||
kwargs["max_output_tokens"] = 4096
|
||||
|
||||
result = agent._preflight_codex_api_kwargs(kwargs)
|
||||
assert result["reasoning"] == {"effort": "high", "summary": "auto"}
|
||||
assert result["include"] == ["reasoning.encrypted_content"]
|
||||
assert result["temperature"] == 0.7
|
||||
assert result["max_output_tokens"] == 4096
|
||||
|
||||
|
||||
def test_run_conversation_codex_replay_payload_keeps_call_id(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
responses = [_codex_tool_call_response(), _codex_message_response("done")]
|
||||
requests = []
|
||||
|
||||
def _fake_api_call(api_kwargs):
|
||||
requests.append(api_kwargs)
|
||||
return responses.pop(0)
|
||||
|
||||
monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call)
|
||||
|
||||
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
|
||||
for call in assistant_message.tool_calls:
|
||||
messages.append(
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": call.id,
|
||||
"content": '{"ok":true}',
|
||||
}
|
||||
)
|
||||
|
||||
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
|
||||
|
||||
result = agent.run_conversation("run a command")
|
||||
|
||||
assert result["completed"] is True
|
||||
assert result["final_response"] == "done"
|
||||
assert len(requests) >= 2
|
||||
|
||||
replay_input = requests[1]["input"]
|
||||
function_call = next(item for item in replay_input if item.get("type") == "function_call")
|
||||
function_output = next(item for item in replay_input if item.get("type") == "function_call_output")
|
||||
assert function_call["call_id"] == "call_1"
|
||||
assert "id" not in function_call
|
||||
assert function_output["call_id"] == "call_1"
|
||||
|
||||
|
||||
def test_run_conversation_codex_continues_after_incomplete_interim_message(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
responses = [
|
||||
_codex_incomplete_message_response("I'll inspect the repo structure first."),
|
||||
_codex_tool_call_response(),
|
||||
_codex_message_response("Architecture summary complete."),
|
||||
]
|
||||
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
|
||||
|
||||
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
|
||||
for call in assistant_message.tool_calls:
|
||||
messages.append(
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": call.id,
|
||||
"content": '{"ok":true}',
|
||||
}
|
||||
)
|
||||
|
||||
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
|
||||
|
||||
result = agent.run_conversation("analyze repo")
|
||||
|
||||
assert result["completed"] is True
|
||||
assert result["final_response"] == "Architecture summary complete."
|
||||
assert any(
|
||||
msg.get("role") == "assistant"
|
||||
and msg.get("finish_reason") == "incomplete"
|
||||
and "inspect the repo structure" in (msg.get("content") or "")
|
||||
for msg in result["messages"]
|
||||
)
|
||||
assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
|
||||
|
||||
|
||||
def test_normalize_codex_response_marks_commentary_only_message_as_incomplete(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
assistant_message, finish_reason = agent._normalize_codex_response(
|
||||
_codex_commentary_message_response("I'll inspect the repository first.")
|
||||
)
|
||||
|
||||
assert finish_reason == "incomplete"
|
||||
assert "inspect the repository" in (assistant_message.content or "")
|
||||
|
||||
|
||||
def test_run_conversation_codex_continues_after_commentary_phase_message(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
responses = [
|
||||
_codex_commentary_message_response("I'll inspect the repo structure first."),
|
||||
_codex_tool_call_response(),
|
||||
_codex_message_response("Architecture summary complete."),
|
||||
]
|
||||
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
|
||||
|
||||
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
|
||||
for call in assistant_message.tool_calls:
|
||||
messages.append(
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": call.id,
|
||||
"content": '{"ok":true}',
|
||||
}
|
||||
)
|
||||
|
||||
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
|
||||
|
||||
result = agent.run_conversation("analyze repo")
|
||||
|
||||
assert result["completed"] is True
|
||||
assert result["final_response"] == "Architecture summary complete."
|
||||
assert any(
|
||||
msg.get("role") == "assistant"
|
||||
and msg.get("finish_reason") == "incomplete"
|
||||
and "inspect the repo structure" in (msg.get("content") or "")
|
||||
for msg in result["messages"]
|
||||
)
|
||||
assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
|
||||
|
||||
|
||||
def test_run_conversation_codex_continues_after_ack_stop_message(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
responses = [
|
||||
_codex_ack_message_response(
|
||||
"Absolutely — I can do that. I'll inspect ~/openclaw-studio and report back with a walkthrough."
|
||||
),
|
||||
_codex_tool_call_response(),
|
||||
_codex_message_response("Architecture summary complete."),
|
||||
]
|
||||
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
|
||||
|
||||
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
|
||||
for call in assistant_message.tool_calls:
|
||||
messages.append(
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": call.id,
|
||||
"content": '{"ok":true}',
|
||||
}
|
||||
)
|
||||
|
||||
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
|
||||
|
||||
result = agent.run_conversation("look into ~/openclaw-studio and tell me how it works")
|
||||
|
||||
assert result["completed"] is True
|
||||
assert result["final_response"] == "Architecture summary complete."
|
||||
assert any(
|
||||
msg.get("role") == "assistant"
|
||||
and msg.get("finish_reason") == "incomplete"
|
||||
and "inspect ~/openclaw-studio" in (msg.get("content") or "")
|
||||
for msg in result["messages"]
|
||||
)
|
||||
assert any(
|
||||
msg.get("role") == "user"
|
||||
and "Continue now. Execute the required tool calls" in (msg.get("content") or "")
|
||||
for msg in result["messages"]
|
||||
)
|
||||
assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
|
||||
|
||||
|
||||
def test_run_conversation_codex_continues_after_ack_for_directory_listing_prompt(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
responses = [
|
||||
_codex_ack_message_response(
|
||||
"I'll check what's in the current directory and call out 3 notable items."
|
||||
),
|
||||
_codex_tool_call_response(),
|
||||
_codex_message_response("Directory summary complete."),
|
||||
]
|
||||
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
|
||||
|
||||
def _fake_execute_tool_calls(assistant_message, messages, effective_task_id):
|
||||
for call in assistant_message.tool_calls:
|
||||
messages.append(
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": call.id,
|
||||
"content": '{"ok":true}',
|
||||
}
|
||||
)
|
||||
|
||||
monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
|
||||
|
||||
result = agent.run_conversation("look at current directory and list 3 notable things")
|
||||
|
||||
assert result["completed"] is True
|
||||
assert result["final_response"] == "Directory summary complete."
|
||||
assert any(
|
||||
msg.get("role") == "assistant"
|
||||
and msg.get("finish_reason") == "incomplete"
|
||||
and "current directory" in (msg.get("content") or "")
|
||||
for msg in result["messages"]
|
||||
)
|
||||
assert any(
|
||||
msg.get("role") == "user"
|
||||
and "Continue now. Execute the required tool calls" in (msg.get("content") or "")
|
||||
for msg in result["messages"]
|
||||
)
|
||||
assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
|
||||
95
tests/test_runtime_provider_resolution.py
Normal file
95
tests/test_runtime_provider_resolution.py
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
from hermes_cli import runtime_provider as rp
|
||||
|
||||
|
||||
def test_resolve_runtime_provider_codex(monkeypatch):
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openai-codex")
|
||||
monkeypatch.setattr(
|
||||
rp,
|
||||
"resolve_codex_runtime_credentials",
|
||||
lambda: {
|
||||
"provider": "openai-codex",
|
||||
"base_url": "https://chatgpt.com/backend-api/codex",
|
||||
"api_key": "codex-token",
|
||||
"source": "codex-auth-json",
|
||||
"auth_file": "/tmp/auth.json",
|
||||
"codex_home": "/tmp/codex",
|
||||
"last_refresh": "2026-02-26T00:00:00Z",
|
||||
},
|
||||
)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="openai-codex")
|
||||
|
||||
assert resolved["provider"] == "openai-codex"
|
||||
assert resolved["api_mode"] == "codex_responses"
|
||||
assert resolved["base_url"] == "https://chatgpt.com/backend-api/codex"
|
||||
assert resolved["api_key"] == "codex-token"
|
||||
assert resolved["requested_provider"] == "openai-codex"
|
||||
|
||||
|
||||
def test_resolve_runtime_provider_openrouter_explicit(monkeypatch):
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: {})
|
||||
monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
|
||||
monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
|
||||
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
||||
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(
|
||||
requested="openrouter",
|
||||
explicit_api_key="test-key",
|
||||
explicit_base_url="https://example.com/v1/",
|
||||
)
|
||||
|
||||
assert resolved["provider"] == "openrouter"
|
||||
assert resolved["api_mode"] == "chat_completions"
|
||||
assert resolved["api_key"] == "test-key"
|
||||
assert resolved["base_url"] == "https://example.com/v1"
|
||||
assert resolved["source"] == "explicit"
|
||||
|
||||
|
||||
def test_resolve_runtime_provider_openrouter_ignores_codex_config_base_url(monkeypatch):
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
|
||||
monkeypatch.setattr(
|
||||
rp,
|
||||
"_get_model_config",
|
||||
lambda: {
|
||||
"provider": "openai-codex",
|
||||
"base_url": "https://chatgpt.com/backend-api/codex",
|
||||
},
|
||||
)
|
||||
monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
|
||||
monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
|
||||
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
||||
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="openrouter")
|
||||
|
||||
assert resolved["provider"] == "openrouter"
|
||||
assert resolved["base_url"] == rp.OPENROUTER_BASE_URL
|
||||
|
||||
|
||||
def test_resolve_runtime_provider_auto_uses_custom_config_base_url(monkeypatch):
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
|
||||
monkeypatch.setattr(
|
||||
rp,
|
||||
"_get_model_config",
|
||||
lambda: {
|
||||
"provider": "auto",
|
||||
"base_url": "https://custom.example/v1/",
|
||||
},
|
||||
)
|
||||
monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
|
||||
monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
|
||||
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
||||
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="auto")
|
||||
|
||||
assert resolved["provider"] == "openrouter"
|
||||
assert resolved["base_url"] == "https://custom.example/v1"
|
||||
|
||||
|
||||
def test_resolve_requested_provider_precedence(monkeypatch):
|
||||
monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "nous")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: {"provider": "openai-codex"})
|
||||
assert rp.resolve_requested_provider("openrouter") == "openrouter"
|
||||
|
|
@ -30,6 +30,9 @@ def _make_mock_parent(depth=0):
|
|||
"""Create a mock parent agent with the fields delegate_task expects."""
|
||||
parent = MagicMock()
|
||||
parent.base_url = "https://openrouter.ai/api/v1"
|
||||
parent.api_key = "parent-key"
|
||||
parent.provider = "openrouter"
|
||||
parent.api_mode = "chat_completions"
|
||||
parent.model = "anthropic/claude-sonnet-4"
|
||||
parent.platform = "cli"
|
||||
parent.providers_allowed = None
|
||||
|
|
@ -218,6 +221,30 @@ class TestDelegateTask(unittest.TestCase):
|
|||
delegate_task(goal="Test tracking", parent_agent=parent)
|
||||
self.assertEqual(len(parent._active_children), 0)
|
||||
|
||||
def test_child_inherits_runtime_credentials(self):
|
||||
parent = _make_mock_parent(depth=0)
|
||||
parent.base_url = "https://chatgpt.com/backend-api/codex"
|
||||
parent.api_key = "codex-token"
|
||||
parent.provider = "openai-codex"
|
||||
parent.api_mode = "codex_responses"
|
||||
|
||||
with patch("run_agent.AIAgent") as MockAgent:
|
||||
mock_child = MagicMock()
|
||||
mock_child.run_conversation.return_value = {
|
||||
"final_response": "ok",
|
||||
"completed": True,
|
||||
"api_calls": 1,
|
||||
}
|
||||
MockAgent.return_value = mock_child
|
||||
|
||||
delegate_task(goal="Test runtime inheritance", parent_agent=parent)
|
||||
|
||||
_, kwargs = MockAgent.call_args
|
||||
self.assertEqual(kwargs["base_url"], parent.base_url)
|
||||
self.assertEqual(kwargs["api_key"], parent.api_key)
|
||||
self.assertEqual(kwargs["provider"], parent.provider)
|
||||
self.assertEqual(kwargs["api_mode"], parent.api_mode)
|
||||
|
||||
|
||||
class TestBlockedTools(unittest.TestCase):
|
||||
def test_blocked_tools_constant(self):
|
||||
|
|
|
|||
483
tests/tools/test_file_tools_live.py
Normal file
483
tests/tools/test_file_tools_live.py
Normal file
|
|
@ -0,0 +1,483 @@
|
|||
"""Live integration tests for file operations and terminal tools.
|
||||
|
||||
These tests run REAL commands through the LocalEnvironment -- no mocks.
|
||||
They verify that shell noise is properly filtered, commands actually work,
|
||||
and the tool outputs are EXACTLY what the agent would see.
|
||||
|
||||
Every test with output validates against a known-good value AND
|
||||
asserts zero contamination from shell noise via _assert_clean().
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
|
||||
|
||||
from tools.environments.local import LocalEnvironment, _clean_shell_noise, _SHELL_NOISE_SUBSTRINGS
|
||||
from tools.file_operations import ShellFileOperations
|
||||
|
||||
|
||||
# ── Shared noise detection ───────────────────────────────────────────────
|
||||
# Every known shell noise pattern. If ANY of these appear in output that
|
||||
# isn't explicitly expected, the test fails with a clear message.
|
||||
|
||||
_ALL_NOISE_PATTERNS = list(_SHELL_NOISE_SUBSTRINGS) + [
|
||||
"bash: ",
|
||||
"Inappropriate ioctl",
|
||||
]
|
||||
|
||||
|
||||
def _assert_clean(text: str, context: str = "output"):
|
||||
"""Assert text contains zero shell noise contamination."""
|
||||
if not text:
|
||||
return
|
||||
for noise in _ALL_NOISE_PATTERNS:
|
||||
assert noise not in text, (
|
||||
f"Shell noise leaked into {context}: found {noise!r} in:\n"
|
||||
f"{text[:500]}"
|
||||
)
|
||||
|
||||
|
||||
# ── Fixtures ─────────────────────────────────────────────────────────────
|
||||
|
||||
# Deterministic file content used across tests. Every byte is known,
|
||||
# so any unexpected text in results is immediately caught.
|
||||
SIMPLE_CONTENT = "alpha\nbravo\ncharlie\n"
|
||||
NUMBERED_CONTENT = "\n".join(f"LINE_{i:04d}" for i in range(1, 51)) + "\n"
|
||||
SPECIAL_CONTENT = "single 'quotes' and \"doubles\" and $VARS and `backticks` and \\backslash\n"
|
||||
MULTIFILE_A = "def func_alpha():\n return 42\n"
|
||||
MULTIFILE_B = "def func_bravo():\n return 99\n"
|
||||
MULTIFILE_C = "nothing relevant here\n"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def env(tmp_path):
|
||||
"""A real LocalEnvironment rooted in a temp directory."""
|
||||
return LocalEnvironment(cwd=str(tmp_path), timeout=15)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def ops(env, tmp_path):
|
||||
"""ShellFileOperations wired to the real local environment."""
|
||||
return ShellFileOperations(env, cwd=str(tmp_path))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def populated_dir(tmp_path):
|
||||
"""A temp directory with known files for search/read tests."""
|
||||
(tmp_path / "alpha.py").write_text(MULTIFILE_A)
|
||||
(tmp_path / "bravo.py").write_text(MULTIFILE_B)
|
||||
(tmp_path / "notes.txt").write_text(MULTIFILE_C)
|
||||
(tmp_path / "data.csv").write_text("col1,col2\n1,2\n3,4\n")
|
||||
return tmp_path
|
||||
|
||||
|
||||
# ── _clean_shell_noise unit tests ────────────────────────────────────────
|
||||
|
||||
class TestCleanShellNoise:
|
||||
def test_single_noise_line(self):
|
||||
output = "bash: no job control in this shell\nhello world\n"
|
||||
result = _clean_shell_noise(output)
|
||||
assert result == "hello world\n"
|
||||
|
||||
def test_double_noise_lines(self):
|
||||
output = (
|
||||
"bash: cannot set terminal process group (-1): Inappropriate ioctl for device\n"
|
||||
"bash: no job control in this shell\n"
|
||||
"actual output here\n"
|
||||
)
|
||||
result = _clean_shell_noise(output)
|
||||
assert result == "actual output here\n"
|
||||
_assert_clean(result)
|
||||
|
||||
def test_tcsetattr_noise(self):
|
||||
output = (
|
||||
"bash: [12345: 2 (255)] tcsetattr: Inappropriate ioctl for device\n"
|
||||
"real content\n"
|
||||
)
|
||||
result = _clean_shell_noise(output)
|
||||
assert result == "real content\n"
|
||||
_assert_clean(result)
|
||||
|
||||
def test_triple_noise_lines(self):
|
||||
output = (
|
||||
"bash: cannot set terminal process group (-1): Inappropriate ioctl for device\n"
|
||||
"bash: no job control in this shell\n"
|
||||
"bash: [999: 2 (255)] tcsetattr: Inappropriate ioctl for device\n"
|
||||
"clean\n"
|
||||
)
|
||||
result = _clean_shell_noise(output)
|
||||
assert result == "clean\n"
|
||||
|
||||
def test_no_noise_untouched(self):
|
||||
assert _clean_shell_noise("hello\nworld\n") == "hello\nworld\n"
|
||||
|
||||
def test_empty_string(self):
|
||||
assert _clean_shell_noise("") == ""
|
||||
|
||||
def test_only_noise_produces_empty(self):
|
||||
output = "bash: no job control in this shell\n"
|
||||
result = _clean_shell_noise(output)
|
||||
_assert_clean(result)
|
||||
|
||||
def test_noise_in_middle_not_stripped(self):
|
||||
"""Only LEADING noise is stripped -- noise in the middle is real output."""
|
||||
output = "real\nbash: no job control in this shell\nmore real\n"
|
||||
result = _clean_shell_noise(output)
|
||||
assert result == output
|
||||
|
||||
|
||||
# ── LocalEnvironment.execute() ───────────────────────────────────────────
|
||||
|
||||
class TestLocalEnvironmentExecute:
|
||||
def test_echo_exact_output(self, env):
|
||||
result = env.execute("echo DETERMINISTIC_OUTPUT_12345")
|
||||
assert result["returncode"] == 0
|
||||
assert result["output"].strip() == "DETERMINISTIC_OUTPUT_12345"
|
||||
_assert_clean(result["output"])
|
||||
|
||||
def test_printf_no_trailing_newline(self, env):
|
||||
result = env.execute("printf 'exact'")
|
||||
assert result["returncode"] == 0
|
||||
assert result["output"] == "exact"
|
||||
_assert_clean(result["output"])
|
||||
|
||||
def test_exit_code_propagated(self, env):
|
||||
result = env.execute("exit 42")
|
||||
assert result["returncode"] == 42
|
||||
|
||||
def test_stderr_captured_in_output(self, env):
|
||||
result = env.execute("echo STDERR_TEST >&2")
|
||||
assert "STDERR_TEST" in result["output"]
|
||||
_assert_clean(result["output"])
|
||||
|
||||
def test_cwd_respected(self, env, tmp_path):
|
||||
subdir = tmp_path / "subdir_test"
|
||||
subdir.mkdir()
|
||||
result = env.execute("pwd", cwd=str(subdir))
|
||||
assert result["returncode"] == 0
|
||||
assert result["output"].strip() == str(subdir)
|
||||
_assert_clean(result["output"])
|
||||
|
||||
def test_multiline_exact(self, env):
|
||||
result = env.execute("echo AAA; echo BBB; echo CCC")
|
||||
lines = [l for l in result["output"].strip().split("\n") if l.strip()]
|
||||
assert lines == ["AAA", "BBB", "CCC"]
|
||||
_assert_clean(result["output"])
|
||||
|
||||
def test_env_var_home(self, env):
|
||||
result = env.execute("echo $HOME")
|
||||
assert result["returncode"] == 0
|
||||
home = result["output"].strip()
|
||||
assert home == str(Path.home())
|
||||
_assert_clean(result["output"])
|
||||
|
||||
def test_pipe_exact(self, env):
|
||||
result = env.execute("echo 'one two three' | wc -w")
|
||||
assert result["returncode"] == 0
|
||||
assert result["output"].strip() == "3"
|
||||
_assert_clean(result["output"])
|
||||
|
||||
def test_cat_deterministic_content(self, env, tmp_path):
|
||||
f = tmp_path / "det.txt"
|
||||
f.write_text(SIMPLE_CONTENT)
|
||||
result = env.execute(f"cat {f}")
|
||||
assert result["returncode"] == 0
|
||||
assert result["output"] == SIMPLE_CONTENT
|
||||
_assert_clean(result["output"])
|
||||
|
||||
|
||||
# ── _has_command ─────────────────────────────────────────────────────────
|
||||
|
||||
class TestHasCommand:
|
||||
def test_finds_echo(self, ops):
|
||||
assert ops._has_command("echo") is True
|
||||
|
||||
def test_finds_cat(self, ops):
|
||||
assert ops._has_command("cat") is True
|
||||
|
||||
def test_finds_sed(self, ops):
|
||||
assert ops._has_command("sed") is True
|
||||
|
||||
def test_finds_wc(self, ops):
|
||||
assert ops._has_command("wc") is True
|
||||
|
||||
def test_finds_find(self, ops):
|
||||
assert ops._has_command("find") is True
|
||||
|
||||
def test_missing_command(self, ops):
|
||||
assert ops._has_command("nonexistent_tool_xyz_abc_999") is False
|
||||
|
||||
def test_rg_or_grep_available(self, ops):
|
||||
assert ops._has_command("rg") or ops._has_command("grep"), \
|
||||
"Neither rg nor grep found -- search_files will break"
|
||||
|
||||
|
||||
# ── read_file ────────────────────────────────────────────────────────────
|
||||
|
||||
class TestReadFile:
|
||||
def test_exact_content(self, ops, tmp_path):
|
||||
f = tmp_path / "exact.txt"
|
||||
f.write_text(SIMPLE_CONTENT)
|
||||
result = ops.read_file(str(f))
|
||||
assert result.error is None
|
||||
# Content has line numbers prepended, check the actual text is there
|
||||
assert "alpha" in result.content
|
||||
assert "bravo" in result.content
|
||||
assert "charlie" in result.content
|
||||
assert result.total_lines == 3
|
||||
_assert_clean(result.content)
|
||||
|
||||
def test_absolute_path(self, ops, tmp_path):
|
||||
f = tmp_path / "abs.txt"
|
||||
f.write_text("ABSOLUTE_PATH_CONTENT\n")
|
||||
result = ops.read_file(str(f))
|
||||
assert result.error is None
|
||||
assert "ABSOLUTE_PATH_CONTENT" in result.content
|
||||
_assert_clean(result.content)
|
||||
|
||||
def test_tilde_expansion(self, ops):
|
||||
test_path = Path.home() / ".hermes_test_tilde_9f8a7b"
|
||||
try:
|
||||
test_path.write_text("TILDE_EXPANSION_OK\n")
|
||||
result = ops.read_file("~/.hermes_test_tilde_9f8a7b")
|
||||
assert result.error is None
|
||||
assert "TILDE_EXPANSION_OK" in result.content
|
||||
_assert_clean(result.content)
|
||||
finally:
|
||||
test_path.unlink(missing_ok=True)
|
||||
|
||||
def test_nonexistent_returns_error(self, ops, tmp_path):
|
||||
result = ops.read_file(str(tmp_path / "ghost.txt"))
|
||||
assert result.error is not None
|
||||
|
||||
def test_pagination_exact_window(self, ops, tmp_path):
|
||||
f = tmp_path / "numbered.txt"
|
||||
f.write_text(NUMBERED_CONTENT)
|
||||
result = ops.read_file(str(f), offset=10, limit=5)
|
||||
assert result.error is None
|
||||
assert "LINE_0010" in result.content
|
||||
assert "LINE_0014" in result.content
|
||||
assert "LINE_0009" not in result.content
|
||||
assert "LINE_0015" not in result.content
|
||||
assert result.total_lines == 50
|
||||
_assert_clean(result.content)
|
||||
|
||||
def test_no_noise_in_content(self, ops, tmp_path):
|
||||
f = tmp_path / "noise_check.txt"
|
||||
f.write_text("ONLY_THIS_CONTENT\n")
|
||||
result = ops.read_file(str(f))
|
||||
assert result.error is None
|
||||
_assert_clean(result.content)
|
||||
|
||||
|
||||
# ── write_file ───────────────────────────────────────────────────────────
|
||||
|
||||
class TestWriteFile:
|
||||
def test_write_and_verify(self, ops, tmp_path):
|
||||
path = str(tmp_path / "written.txt")
|
||||
result = ops.write_file(path, SIMPLE_CONTENT)
|
||||
assert result.error is None
|
||||
assert result.bytes_written == len(SIMPLE_CONTENT.encode())
|
||||
assert Path(path).read_text() == SIMPLE_CONTENT
|
||||
|
||||
def test_creates_nested_dirs(self, ops, tmp_path):
|
||||
path = str(tmp_path / "a" / "b" / "c" / "deep.txt")
|
||||
result = ops.write_file(path, "DEEP_CONTENT\n")
|
||||
assert result.error is None
|
||||
assert result.dirs_created is True
|
||||
assert Path(path).read_text() == "DEEP_CONTENT\n"
|
||||
|
||||
def test_overwrites_exact(self, ops, tmp_path):
|
||||
path = str(tmp_path / "overwrite.txt")
|
||||
Path(path).write_text("OLD_DATA\n")
|
||||
result = ops.write_file(path, "NEW_DATA\n")
|
||||
assert result.error is None
|
||||
assert Path(path).read_text() == "NEW_DATA\n"
|
||||
|
||||
def test_large_content_via_stdin(self, ops, tmp_path):
|
||||
path = str(tmp_path / "large.txt")
|
||||
content = "X" * 200_000 + "\n"
|
||||
result = ops.write_file(path, content)
|
||||
assert result.error is None
|
||||
assert Path(path).read_text() == content
|
||||
|
||||
def test_special_characters_preserved(self, ops, tmp_path):
|
||||
path = str(tmp_path / "special.txt")
|
||||
result = ops.write_file(path, SPECIAL_CONTENT)
|
||||
assert result.error is None
|
||||
assert Path(path).read_text() == SPECIAL_CONTENT
|
||||
|
||||
def test_roundtrip_read_write(self, ops, tmp_path):
|
||||
"""Write -> read back -> verify exact match."""
|
||||
path = str(tmp_path / "roundtrip.txt")
|
||||
ops.write_file(path, SIMPLE_CONTENT)
|
||||
result = ops.read_file(path)
|
||||
assert result.error is None
|
||||
assert "alpha" in result.content
|
||||
assert "charlie" in result.content
|
||||
_assert_clean(result.content)
|
||||
|
||||
|
||||
# ── patch_replace ────────────────────────────────────────────────────────
|
||||
|
||||
class TestPatchReplace:
|
||||
def test_exact_replacement(self, ops, tmp_path):
|
||||
path = str(tmp_path / "patch.txt")
|
||||
Path(path).write_text("hello world\n")
|
||||
result = ops.patch_replace(path, "world", "earth")
|
||||
assert result.error is None
|
||||
assert Path(path).read_text() == "hello earth\n"
|
||||
|
||||
def test_not_found_error(self, ops, tmp_path):
|
||||
path = str(tmp_path / "patch2.txt")
|
||||
Path(path).write_text("hello\n")
|
||||
result = ops.patch_replace(path, "NONEXISTENT_STRING", "replacement")
|
||||
assert result.error is not None
|
||||
assert "Could not find" in result.error
|
||||
|
||||
def test_multiline_patch(self, ops, tmp_path):
|
||||
path = str(tmp_path / "multi.txt")
|
||||
Path(path).write_text("line1\nline2\nline3\n")
|
||||
result = ops.patch_replace(path, "line2", "REPLACED")
|
||||
assert result.error is None
|
||||
assert Path(path).read_text() == "line1\nREPLACED\nline3\n"
|
||||
|
||||
|
||||
# ── search ───────────────────────────────────────────────────────────────
|
||||
|
||||
class TestSearch:
|
||||
def test_content_search_finds_exact_match(self, ops, populated_dir):
|
||||
result = ops.search("func_alpha", str(populated_dir), target="content")
|
||||
assert result.error is None
|
||||
assert result.total_count >= 1
|
||||
assert any("func_alpha" in m.content for m in result.matches)
|
||||
for m in result.matches:
|
||||
_assert_clean(m.content)
|
||||
_assert_clean(m.path)
|
||||
|
||||
def test_content_search_no_false_positives(self, ops, populated_dir):
|
||||
result = ops.search("ZZZZZ_NONEXISTENT", str(populated_dir), target="content")
|
||||
assert result.error is None
|
||||
assert result.total_count == 0
|
||||
assert len(result.matches) == 0
|
||||
|
||||
def test_file_search_finds_py_files(self, ops, populated_dir):
|
||||
result = ops.search("*.py", str(populated_dir), target="files")
|
||||
assert result.error is None
|
||||
assert result.total_count >= 2
|
||||
# Verify only expected files appear
|
||||
found_names = set()
|
||||
for f in result.files:
|
||||
name = Path(f).name
|
||||
found_names.add(name)
|
||||
_assert_clean(f)
|
||||
assert "alpha.py" in found_names
|
||||
assert "bravo.py" in found_names
|
||||
assert "notes.txt" not in found_names
|
||||
|
||||
def test_file_search_no_false_file_entries(self, ops, populated_dir):
|
||||
"""Every entry in the files list must be a real path, not noise."""
|
||||
result = ops.search("*.py", str(populated_dir), target="files")
|
||||
assert result.error is None
|
||||
for f in result.files:
|
||||
_assert_clean(f)
|
||||
assert Path(f).exists(), f"Search returned non-existent path: {f}"
|
||||
|
||||
def test_content_search_with_glob_filter(self, ops, populated_dir):
|
||||
result = ops.search("return", str(populated_dir), target="content", file_glob="*.py")
|
||||
assert result.error is None
|
||||
for m in result.matches:
|
||||
assert m.path.endswith(".py"), f"Non-py file in results: {m.path}"
|
||||
_assert_clean(m.content)
|
||||
_assert_clean(m.path)
|
||||
|
||||
def test_search_output_has_zero_noise(self, ops, populated_dir):
|
||||
"""Dedicated noise check: search must return only real content."""
|
||||
result = ops.search("func", str(populated_dir), target="content")
|
||||
assert result.error is None
|
||||
for m in result.matches:
|
||||
_assert_clean(m.content)
|
||||
_assert_clean(m.path)
|
||||
|
||||
|
||||
# ── _expand_path ─────────────────────────────────────────────────────────
|
||||
|
||||
class TestExpandPath:
|
||||
def test_tilde_exact(self, ops):
|
||||
result = ops._expand_path("~/test.txt")
|
||||
expected = f"{Path.home()}/test.txt"
|
||||
assert result == expected
|
||||
_assert_clean(result)
|
||||
|
||||
def test_absolute_unchanged(self, ops):
|
||||
assert ops._expand_path("/tmp/test.txt") == "/tmp/test.txt"
|
||||
|
||||
def test_relative_unchanged(self, ops):
|
||||
assert ops._expand_path("relative/path.txt") == "relative/path.txt"
|
||||
|
||||
def test_bare_tilde(self, ops):
|
||||
result = ops._expand_path("~")
|
||||
assert result == str(Path.home())
|
||||
_assert_clean(result)
|
||||
|
||||
|
||||
# ── Terminal output cleanliness ──────────────────────────────────────────
|
||||
|
||||
class TestTerminalOutputCleanliness:
|
||||
"""Every command the agent might run must produce noise-free output."""
|
||||
|
||||
def test_echo(self, env):
|
||||
result = env.execute("echo CLEAN_TEST")
|
||||
assert result["output"].strip() == "CLEAN_TEST"
|
||||
_assert_clean(result["output"])
|
||||
|
||||
def test_cat(self, env, tmp_path):
|
||||
f = tmp_path / "cat_test.txt"
|
||||
f.write_text("CAT_CONTENT_EXACT\n")
|
||||
result = env.execute(f"cat {f}")
|
||||
assert result["output"] == "CAT_CONTENT_EXACT\n"
|
||||
_assert_clean(result["output"])
|
||||
|
||||
def test_ls(self, env, tmp_path):
|
||||
(tmp_path / "file_a.txt").write_text("")
|
||||
(tmp_path / "file_b.txt").write_text("")
|
||||
result = env.execute(f"ls {tmp_path}")
|
||||
_assert_clean(result["output"])
|
||||
assert "file_a.txt" in result["output"]
|
||||
assert "file_b.txt" in result["output"]
|
||||
|
||||
def test_wc(self, env, tmp_path):
|
||||
f = tmp_path / "wc_test.txt"
|
||||
f.write_text("one\ntwo\nthree\n")
|
||||
result = env.execute(f"wc -l < {f}")
|
||||
assert result["output"].strip() == "3"
|
||||
_assert_clean(result["output"])
|
||||
|
||||
def test_head(self, env, tmp_path):
|
||||
f = tmp_path / "head_test.txt"
|
||||
f.write_text(NUMBERED_CONTENT)
|
||||
result = env.execute(f"head -n 3 {f}")
|
||||
expected = "LINE_0001\nLINE_0002\nLINE_0003\n"
|
||||
assert result["output"] == expected
|
||||
_assert_clean(result["output"])
|
||||
|
||||
def test_env_var_expansion(self, env):
|
||||
result = env.execute("echo $HOME")
|
||||
assert result["output"].strip() == str(Path.home())
|
||||
_assert_clean(result["output"])
|
||||
|
||||
def test_command_substitution(self, env):
|
||||
result = env.execute("echo $(echo NESTED)")
|
||||
assert result["output"].strip() == "NESTED"
|
||||
_assert_clean(result["output"])
|
||||
|
||||
def test_command_v_detection(self, env):
|
||||
"""This is how _has_command works -- must return clean 'yes'."""
|
||||
result = env.execute("command -v cat >/dev/null 2>&1 && echo 'yes'")
|
||||
assert result["output"].strip() == "yes"
|
||||
_assert_clean(result["output"])
|
||||
|
|
@ -77,6 +77,85 @@ def _strip_blocked_tools(toolsets: List[str]) -> List[str]:
|
|||
return [t for t in toolsets if t not in blocked_toolset_names]
|
||||
|
||||
|
||||
def _build_child_progress_callback(task_index: int, parent_agent, task_count: int = 1) -> Optional[callable]:
|
||||
"""Build a callback that relays child agent tool calls to the parent display.
|
||||
|
||||
Two display paths:
|
||||
CLI: prints tree-view lines above the parent's delegation spinner
|
||||
Gateway: batches tool names and relays to parent's progress callback
|
||||
|
||||
Returns None if no display mechanism is available, in which case the
|
||||
child agent runs with no progress callback (identical to current behavior).
|
||||
"""
|
||||
spinner = getattr(parent_agent, '_delegate_spinner', None)
|
||||
parent_cb = getattr(parent_agent, 'tool_progress_callback', None)
|
||||
|
||||
if not spinner and not parent_cb:
|
||||
return None # No display → no callback → zero behavior change
|
||||
|
||||
# Show 1-indexed prefix only in batch mode (multiple tasks)
|
||||
prefix = f"[{task_index + 1}] " if task_count > 1 else ""
|
||||
|
||||
# Gateway: batch tool names, flush periodically
|
||||
_BATCH_SIZE = 5
|
||||
_batch: List[str] = []
|
||||
|
||||
def _callback(tool_name: str, preview: str = None):
|
||||
# Special "_thinking" event: model produced text content (reasoning)
|
||||
if tool_name == "_thinking":
|
||||
if spinner:
|
||||
short = (preview[:55] + "...") if preview and len(preview) > 55 else (preview or "")
|
||||
try:
|
||||
spinner.print_above(f" {prefix}├─ 💭 \"{short}\"")
|
||||
except Exception:
|
||||
pass
|
||||
# Don't relay thinking to gateway (too noisy for chat)
|
||||
return
|
||||
|
||||
# Regular tool call event
|
||||
if spinner:
|
||||
short = (preview[:35] + "...") if preview and len(preview) > 35 else (preview or "")
|
||||
tool_emojis = {
|
||||
"terminal": "💻", "web_search": "🔍", "web_extract": "📄",
|
||||
"read_file": "📖", "write_file": "✍️", "patch": "🔧",
|
||||
"search_files": "🔎", "list_directory": "📂",
|
||||
"browser_navigate": "🌐", "browser_click": "👆",
|
||||
"text_to_speech": "🔊", "image_generate": "🎨",
|
||||
"vision_analyze": "👁️", "process": "⚙️",
|
||||
}
|
||||
emoji = tool_emojis.get(tool_name, "⚡")
|
||||
line = f" {prefix}├─ {emoji} {tool_name}"
|
||||
if short:
|
||||
line += f" \"{short}\""
|
||||
try:
|
||||
spinner.print_above(line)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if parent_cb:
|
||||
_batch.append(tool_name)
|
||||
if len(_batch) >= _BATCH_SIZE:
|
||||
summary = ", ".join(_batch)
|
||||
try:
|
||||
parent_cb("subagent_progress", f"🔀 {prefix}{summary}")
|
||||
except Exception:
|
||||
pass
|
||||
_batch.clear()
|
||||
|
||||
def _flush():
|
||||
"""Flush remaining batched tool names to gateway on completion."""
|
||||
if parent_cb and _batch:
|
||||
summary = ", ".join(_batch)
|
||||
try:
|
||||
parent_cb("subagent_progress", f"🔀 {prefix}{summary}")
|
||||
except Exception:
|
||||
pass
|
||||
_batch.clear()
|
||||
|
||||
_callback._flush = _flush
|
||||
return _callback
|
||||
|
||||
|
||||
def _run_single_child(
|
||||
task_index: int,
|
||||
goal: str,
|
||||
|
|
@ -85,6 +164,7 @@ def _run_single_child(
|
|||
model: Optional[str],
|
||||
max_iterations: int,
|
||||
parent_agent,
|
||||
task_count: int = 1,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Spawn and run a single child agent. Called from within a thread.
|
||||
|
|
@ -98,37 +178,21 @@ def _run_single_child(
|
|||
|
||||
child_prompt = _build_child_system_prompt(goal, context)
|
||||
|
||||
# Build a progress callback that surfaces subagent tool activity.
|
||||
# CLI: updates the parent's delegate spinner text.
|
||||
# Gateway: forwards to the parent's progress callback (feeds message queue).
|
||||
parent_progress_cb = getattr(parent_agent, 'tool_progress_callback', None)
|
||||
def _child_progress(tool_name: str, preview: str = None):
|
||||
tag = f"[subagent-{task_index+1}] {tool_name}"
|
||||
# Update CLI spinner
|
||||
spinner = getattr(parent_agent, '_delegate_spinner', None)
|
||||
if spinner:
|
||||
detail = f'"{preview}"' if preview else ""
|
||||
try:
|
||||
spinner.update_text(f"🔀 {tag} {detail}")
|
||||
except Exception:
|
||||
pass
|
||||
# Forward to gateway progress queue
|
||||
if parent_progress_cb:
|
||||
try:
|
||||
parent_progress_cb(tag, preview)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
# Extract parent's API key so subagents inherit auth (e.g. Nous Portal)
|
||||
parent_api_key = None
|
||||
if hasattr(parent_agent, '_client_kwargs'):
|
||||
# Extract parent's API key so subagents inherit auth (e.g. Nous Portal).
|
||||
parent_api_key = getattr(parent_agent, "api_key", None)
|
||||
if (not parent_api_key) and hasattr(parent_agent, "_client_kwargs"):
|
||||
parent_api_key = parent_agent._client_kwargs.get("api_key")
|
||||
|
||||
# Build progress callback to relay tool calls to parent display
|
||||
child_progress_cb = _build_child_progress_callback(task_index, parent_agent, task_count)
|
||||
|
||||
child = AIAgent(
|
||||
base_url=parent_agent.base_url,
|
||||
api_key=parent_api_key,
|
||||
model=model or parent_agent.model,
|
||||
provider=getattr(parent_agent, "provider", None),
|
||||
api_mode=getattr(parent_agent, "api_mode", None),
|
||||
max_iterations=max_iterations,
|
||||
enabled_toolsets=child_toolsets,
|
||||
quiet_mode=True,
|
||||
|
|
@ -143,7 +207,7 @@ def _run_single_child(
|
|||
providers_ignored=parent_agent.providers_ignored,
|
||||
providers_order=parent_agent.providers_order,
|
||||
provider_sort=parent_agent.provider_sort,
|
||||
tool_progress_callback=_child_progress,
|
||||
tool_progress_callback=child_progress_cb,
|
||||
)
|
||||
|
||||
# Set delegation depth so children can't spawn grandchildren
|
||||
|
|
@ -158,6 +222,13 @@ def _run_single_child(
|
|||
with contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(devnull):
|
||||
result = child.run_conversation(user_message=goal)
|
||||
|
||||
# Flush any remaining batched progress to gateway
|
||||
if child_progress_cb and hasattr(child_progress_cb, '_flush'):
|
||||
try:
|
||||
child_progress_cb._flush()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
duration = round(time.monotonic() - child_start, 2)
|
||||
|
||||
summary = result.get("final_response") or ""
|
||||
|
|
@ -275,6 +346,7 @@ def delegate_task(
|
|||
model=model,
|
||||
max_iterations=effective_max_iter,
|
||||
parent_agent=parent_agent,
|
||||
task_count=1,
|
||||
)
|
||||
results.append(result)
|
||||
else:
|
||||
|
|
@ -299,6 +371,7 @@ def delegate_task(
|
|||
model=model,
|
||||
max_iterations=effective_max_iter,
|
||||
parent_agent=parent_agent,
|
||||
task_count=n_tasks,
|
||||
)
|
||||
futures[future] = i
|
||||
|
||||
|
|
@ -318,14 +391,21 @@ def delegate_task(
|
|||
results.append(entry)
|
||||
completed_count += 1
|
||||
|
||||
# Print per-task completion line (visible in CLI via patch_stdout)
|
||||
# Print per-task completion line above the spinner
|
||||
idx = entry["task_index"]
|
||||
label = task_labels[idx] if idx < len(task_labels) else f"Task {idx}"
|
||||
dur = entry.get("duration_seconds", 0)
|
||||
status = entry.get("status", "?")
|
||||
icon = "✓" if status == "completed" else "✗"
|
||||
remaining = n_tasks - completed_count
|
||||
print(f" {icon} [{idx+1}/{n_tasks}] {label} ({dur}s)")
|
||||
completion_line = f"{icon} [{idx+1}/{n_tasks}] {label} ({dur}s)"
|
||||
if spinner_ref:
|
||||
try:
|
||||
spinner_ref.print_above(completion_line)
|
||||
except Exception:
|
||||
print(f" {completion_line}")
|
||||
else:
|
||||
print(f" {completion_line}")
|
||||
|
||||
# Update spinner text to show remaining count
|
||||
if spinner_ref and remaining > 0:
|
||||
|
|
|
|||
|
|
@ -11,20 +11,26 @@ from tools.environments.base import BaseEnvironment
|
|||
|
||||
# Noise lines emitted by interactive shells when stdin is not a terminal.
|
||||
# Filtered from output to keep tool results clean.
|
||||
_SHELL_NOISE = frozenset({
|
||||
_SHELL_NOISE_SUBSTRINGS = (
|
||||
"bash: cannot set terminal process group",
|
||||
"bash: no job control in this shell",
|
||||
"bash: no job control in this shell\n",
|
||||
"no job control in this shell",
|
||||
"no job control in this shell\n",
|
||||
})
|
||||
"cannot set terminal process group",
|
||||
"tcsetattr: Inappropriate ioctl for device",
|
||||
)
|
||||
|
||||
|
||||
def _clean_shell_noise(output: str) -> str:
|
||||
"""Strip shell startup warnings that leak when using -i without a TTY."""
|
||||
lines = output.split("\n", 2) # only check first two lines
|
||||
if lines and lines[0].strip() in _SHELL_NOISE:
|
||||
return "\n".join(lines[1:])
|
||||
return output
|
||||
"""Strip shell startup warnings that leak when using -i without a TTY.
|
||||
|
||||
Removes all leading lines that match known noise patterns, not just the first.
|
||||
Some environments emit multiple noise lines (e.g. Docker, non-TTY sessions).
|
||||
"""
|
||||
lines = output.split("\n")
|
||||
# Strip all leading noise lines
|
||||
while lines and any(noise in lines[0] for noise in _SHELL_NOISE_SUBSTRINGS):
|
||||
lines.pop(0)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
class LocalEnvironment(BaseEnvironment):
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ def get_async_client() -> AsyncOpenAI:
|
|||
default_headers={
|
||||
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
|
||||
"X-OpenRouter-Title": "Hermes Agent",
|
||||
"X-OpenRouter-Categories": "cli-agent",
|
||||
"X-OpenRouter-Categories": "productivity,cli-agent",
|
||||
},
|
||||
)
|
||||
return _client
|
||||
|
|
|
|||
|
|
@ -87,13 +87,13 @@ class ProcessRegistry:
|
|||
- Cleanup thread (sandbox reaping coordination)
|
||||
"""
|
||||
|
||||
# Noise lines emitted by interactive shells when stdin is not a terminal.
|
||||
_SHELL_NOISE = frozenset({
|
||||
_SHELL_NOISE_SUBSTRINGS = (
|
||||
"bash: cannot set terminal process group",
|
||||
"bash: no job control in this shell",
|
||||
"bash: no job control in this shell\n",
|
||||
"no job control in this shell",
|
||||
"no job control in this shell\n",
|
||||
})
|
||||
"cannot set terminal process group",
|
||||
"tcsetattr: Inappropriate ioctl for device",
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
self._running: Dict[str, ProcessSession] = {}
|
||||
|
|
@ -106,10 +106,10 @@ class ProcessRegistry:
|
|||
@staticmethod
|
||||
def _clean_shell_noise(text: str) -> str:
|
||||
"""Strip shell startup warnings from the beginning of output."""
|
||||
lines = text.split("\n", 2)
|
||||
if lines and lines[0].strip() in ProcessRegistry._SHELL_NOISE:
|
||||
return "\n".join(lines[1:])
|
||||
return text
|
||||
lines = text.split("\n")
|
||||
while lines and any(noise in lines[0] for noise in ProcessRegistry._SHELL_NOISE_SUBSTRINGS):
|
||||
lines.pop(0)
|
||||
return "\n".join(lines)
|
||||
|
||||
# ----- Spawn -----
|
||||
|
||||
|
|
|
|||
|
|
@ -24,26 +24,13 @@ from typing import Dict, Any, List, Optional
|
|||
|
||||
from openai import AsyncOpenAI, OpenAI
|
||||
|
||||
from agent.auxiliary_client import get_text_auxiliary_client
|
||||
from agent.auxiliary_client import get_async_text_auxiliary_client
|
||||
|
||||
# Resolve the auxiliary client at import time so we have the model slug.
|
||||
# We build an AsyncOpenAI from the same credentials for async summarization.
|
||||
_aux_client, _SUMMARIZER_MODEL = get_text_auxiliary_client()
|
||||
_async_aux_client: AsyncOpenAI | None = None
|
||||
if _aux_client is not None:
|
||||
_async_kwargs = {
|
||||
"api_key": _aux_client.api_key,
|
||||
"base_url": str(_aux_client.base_url),
|
||||
}
|
||||
if "openrouter" in str(_aux_client.base_url).lower():
|
||||
_async_kwargs["default_headers"] = {
|
||||
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
|
||||
"X-OpenRouter-Title": "Hermes Agent",
|
||||
"X-OpenRouter-Categories": "cli-agent",
|
||||
}
|
||||
_async_aux_client = AsyncOpenAI(**_async_kwargs)
|
||||
# Resolve the async auxiliary client at import time so we have the model slug.
|
||||
# Handles Codex Responses API adapter transparently.
|
||||
_async_aux_client, _SUMMARIZER_MODEL = get_async_text_auxiliary_client()
|
||||
MAX_SESSION_CHARS = 100_000
|
||||
MAX_SUMMARY_TOKENS = 2000
|
||||
MAX_SUMMARY_TOKENS = 10000
|
||||
|
||||
|
||||
def _format_timestamp(ts) -> str:
|
||||
|
|
|
|||
|
|
@ -1037,8 +1037,12 @@ def terminal_tool(
|
|||
)
|
||||
output = output[:head_chars] + truncated_notice + output[-tail_chars:]
|
||||
|
||||
# Redact secrets from command output (catches env/printenv leaking keys)
|
||||
from agent.redact import redact_sensitive_text
|
||||
output = redact_sensitive_text(output.strip()) if output else ""
|
||||
|
||||
return json.dumps({
|
||||
"output": output.strip() if output else "",
|
||||
"output": output,
|
||||
"exit_code": returncode,
|
||||
"error": None
|
||||
}, ensure_ascii=False)
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ if _aux_sync_client is not None:
|
|||
_async_kwargs["default_headers"] = {
|
||||
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
|
||||
"X-OpenRouter-Title": "Hermes Agent",
|
||||
"X-OpenRouter-Categories": "cli-agent",
|
||||
"X-OpenRouter-Categories": "productivity,cli-agent",
|
||||
}
|
||||
_aux_async_client = AsyncOpenAI(**_async_kwargs)
|
||||
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ import asyncio
|
|||
from typing import List, Dict, Any, Optional
|
||||
from firecrawl import Firecrawl
|
||||
from openai import AsyncOpenAI
|
||||
from agent.auxiliary_client import get_text_auxiliary_client
|
||||
from agent.auxiliary_client import get_async_text_auxiliary_client
|
||||
from tools.debug_helpers import DebugSession
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -67,21 +67,9 @@ def _get_firecrawl_client():
|
|||
|
||||
DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000
|
||||
|
||||
# Resolve auxiliary text client at module level; build an async wrapper.
|
||||
_aux_sync_client, DEFAULT_SUMMARIZER_MODEL = get_text_auxiliary_client()
|
||||
_aux_async_client: AsyncOpenAI | None = None
|
||||
if _aux_sync_client is not None:
|
||||
_async_kwargs = {
|
||||
"api_key": _aux_sync_client.api_key,
|
||||
"base_url": str(_aux_sync_client.base_url),
|
||||
}
|
||||
if "openrouter" in str(_aux_sync_client.base_url).lower():
|
||||
_async_kwargs["default_headers"] = {
|
||||
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
|
||||
"X-OpenRouter-Title": "Hermes Agent",
|
||||
"X-OpenRouter-Categories": "cli-agent",
|
||||
}
|
||||
_aux_async_client = AsyncOpenAI(**_async_kwargs)
|
||||
# Resolve async auxiliary client at module level.
|
||||
# Handles Codex Responses API adapter transparently.
|
||||
_aux_async_client, DEFAULT_SUMMARIZER_MODEL = get_async_text_auxiliary_client()
|
||||
|
||||
_debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG")
|
||||
|
||||
|
|
@ -174,7 +162,7 @@ async def _call_summarizer_llm(
|
|||
content: str,
|
||||
context_str: str,
|
||||
model: str,
|
||||
max_tokens: int = 4000,
|
||||
max_tokens: int = 20000,
|
||||
is_chunk: bool = False,
|
||||
chunk_info: str = ""
|
||||
) -> Optional[str]:
|
||||
|
|
@ -306,7 +294,7 @@ async def _process_large_content_chunked(
|
|||
chunk_content,
|
||||
context_str,
|
||||
model,
|
||||
max_tokens=2000,
|
||||
max_tokens=10000,
|
||||
is_chunk=True,
|
||||
chunk_info=chunk_info
|
||||
)
|
||||
|
|
@ -374,7 +362,7 @@ Create a single, unified markdown summary."""
|
|||
{"role": "user", "content": synthesis_prompt}
|
||||
],
|
||||
temperature=0.1,
|
||||
**auxiliary_max_tokens_param(4000),
|
||||
**auxiliary_max_tokens_param(20000),
|
||||
**({} if not _extra else {"extra_body": _extra}),
|
||||
)
|
||||
final_summary = response.choices[0].message.content.strip()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue