diff --git a/.env.example b/.env.example index 78549212f..2693931e0 100644 --- a/.env.example +++ b/.env.example @@ -10,7 +10,7 @@ OPENROUTER_API_KEY= # Default model to use (OpenRouter format: provider/model) -# Examples: anthropic/claude-opus-4.6, openai/gpt-4o, google/gemini-2.0-flash, zhipuai/glm-4-plus +# Examples: anthropic/claude-opus-4.6, openai/gpt-4o, google/gemini-3-flash-preview, zhipuai/glm-4-plus LLM_MODEL=anthropic/claude-opus-4.6 # ============================================================================= @@ -200,9 +200,10 @@ IMAGE_TOOLS_DEBUG=false # When conversation approaches model's context limit, middle turns are # automatically summarized to free up space. # +# Context compression is configured in ~/.hermes/config.yaml under compression: # CONTEXT_COMPRESSION_ENABLED=true # Enable auto-compression (default: true) # CONTEXT_COMPRESSION_THRESHOLD=0.85 # Compress at 85% of context limit -# CONTEXT_COMPRESSION_MODEL=google/gemini-2.0-flash-001 # Fast model for summaries +# Model is set via compression.summary_model in config.yaml (default: google/gemini-3-flash-preview) # ============================================================================= # RL TRAINING (Tinker + Atropos) diff --git a/AGENTS.md b/AGENTS.md index f729bde98..d88fbf7ff 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -179,6 +179,7 @@ The interactive CLI uses: Key components: - `HermesCLI` class - Main CLI controller with commands and conversation loop - `SlashCommandCompleter` - Autocomplete dropdown for `/commands` (type `/` to see all) +- `agent/skill_commands.py` - Scans skills and builds invocation messages (shared with gateway) - `load_cli_config()` - Loads config, sets environment variables for terminal - `build_welcome_banner()` - Displays ASCII art logo, tools, and skills summary @@ -191,9 +192,22 @@ CLI UX notes: - Pasting 5+ lines auto-saves to `~/.hermes/pastes/` and collapses to a reference - Multi-line input via Alt+Enter or Ctrl+J - `/commands` - Process user commands like `/help`, `/clear`, `/personality`, etc. +- `/skill-name` - Invoke installed skills directly (e.g., `/axolotl`, `/gif-search`) CLI uses `quiet_mode=True` when creating AIAgent to suppress verbose logging. +### Skill Slash Commands + +Every installed skill in `~/.hermes/skills/` is automatically registered as a slash command. +The skill name (from frontmatter or folder name) becomes the command: `axolotl` → `/axolotl`. + +Implementation (`agent/skill_commands.py`, shared between CLI and gateway): +1. `scan_skill_commands()` scans all SKILL.md files at startup +2. `build_skill_invocation_message()` loads the SKILL.md content and builds a user-turn message +3. The message includes the full skill content, a list of supporting files (not loaded), and the user's instruction +4. Supporting files can be loaded on demand via the `skill_view` tool +5. Injected as a **user message** (not system prompt) to preserve prompt caching + ### Adding CLI Commands 1. Add to `COMMANDS` dict with description diff --git a/README.md b/README.md index 3cb1d6598..531a3049e 100644 --- a/README.md +++ b/README.md @@ -121,11 +121,14 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro | Provider | Setup | |----------|-------| -| **Nous Portal** | `hermes login` (OAuth, subscription-based) | +| **Nous Portal** | `hermes model` (OAuth, subscription-based) | +| **OpenAI Codex** | `hermes model` (ChatGPT OAuth, uses Codex models) | | **OpenRouter** | `OPENROUTER_API_KEY` in `~/.hermes/.env` | | **Custom Endpoint** | `OPENAI_BASE_URL` + `OPENAI_API_KEY` in `~/.hermes/.env` | -**Note:** Even when using Nous Portal or a custom endpoint, some tools (vision, web summarization, MoA) use OpenRouter independently. An `OPENROUTER_API_KEY` enables these tools. +**Codex note:** The OpenAI Codex provider authenticates via device code (open a URL, enter a code). Credentials are stored at `~/.codex/auth.json` and auto-refresh. No Codex CLI installation required. + +**Note:** Even when using Nous Portal, Codex, or a custom endpoint, some tools (vision, web summarization, MoA) use OpenRouter independently. An `OPENROUTER_API_KEY` enables these tools. --- @@ -143,7 +146,7 @@ All your settings are stored in `~/.hermes/` for easy access: ├── skills/ # Agent-created skills (managed via skill_manage tool) ├── cron/ # Scheduled jobs ├── sessions/ # Gateway sessions -└── logs/ # Logs +└── logs/ # Logs (errors.log, gateway.log — secrets auto-redacted) ``` ### Managing Configuration @@ -161,6 +164,19 @@ hermes config set terminal.backend docker hermes config set OPENROUTER_API_KEY sk-or-... # Saves to .env ``` +### Configuration Precedence + +Settings are resolved in this order (highest priority first): + +1. **CLI arguments** — `hermes chat --max-turns 100` (per-invocation override) +2. **`~/.hermes/config.yaml`** — the primary config file for all non-secret settings +3. **`~/.hermes/.env`** — fallback for env vars; **required** for secrets (API keys, tokens, passwords) +4. **Built-in defaults** — hardcoded safe defaults when nothing else is set + +**Rule of thumb:** Secrets (API keys, bot tokens, passwords) go in `.env`. Everything else (model, terminal backend, compression settings, memory limits, toolsets) goes in `config.yaml`. When both are set, `config.yaml` wins for non-secret settings. + +The `hermes config set` command automatically routes values to the right file — API keys are saved to `.env`, everything else to `config.yaml`. + ### Optional API Keys | Feature | Provider | Env Variable | @@ -277,7 +293,10 @@ See [docs/messaging.md](docs/messaging.md) for advanced WhatsApp configuration. | `/status` | Show session info | | `/stop` | Stop the running agent | | `/sethome` | Set this chat as the home channel | +| `/compress` | Manually compress conversation context | +| `/usage` | Show token usage for this session | | `/help` | Show available commands | +| `/` | Invoke any installed skill (e.g., `/axolotl`, `/gif-search`) | ### DM Pairing (Alternative to Allowlists) @@ -354,7 +373,7 @@ hermes --resume # Resume a specific session (-r) # Provider & model management hermes model # Switch provider and model interactively -hermes login # Authenticate with Nous Portal (OAuth) +hermes model # Select provider and model hermes logout # Clear stored OAuth credentials # Configuration @@ -407,7 +426,11 @@ Type `/` to see an autocomplete dropdown of all commands. | `/cron` | Manage scheduled tasks | | `/skills` | Search, install, inspect, or manage skills from registries | | `/platforms` | Show gateway/messaging platform status | +| `/verbose` | Cycle tool progress display: off → new → all → verbose | +| `/compress` | Manually compress conversation context | +| `/usage` | Show token usage for this session | | `/quit` | Exit (also: `/exit`, `/q`) | +| `/` | Invoke any installed skill (e.g., `/axolotl`, `/gif-search`) | **Keybindings:** - `Enter` — send message @@ -694,6 +717,21 @@ hermes cron status # Check if gateway is running Even if no messaging platforms are configured, the gateway stays running for cron. A file lock prevents duplicate execution if multiple processes overlap. +### 🪝 Event Hooks + +Run custom code at key lifecycle points — log activity, send alerts, post to webhooks. Hooks are Python handlers that fire automatically during gateway operation. + +``` +~/.hermes/hooks/ +└── my-hook/ + ├── HOOK.yaml # name + events to subscribe to + └── handler.py # async def handle(event_type, context) +``` + +**Available events:** `gateway:startup`, `session:start`, `session:reset`, `agent:start`, `agent:step`, `agent:end`, `command:*` (wildcard — fires for any slash command). + +Hooks are non-blocking — errors are caught and logged, never crashing the agent. See [docs/hooks.md](docs/hooks.md) for the full event reference, context keys, and examples. + ### 🛡️ Exec Approval (Messaging Platforms) When the agent tries to run a potentially dangerous command (`rm -rf`, `chmod 777`, etc.) on Telegram/Discord/WhatsApp, instead of blocking it silently, it asks the user for approval: @@ -807,6 +845,22 @@ Skills are on-demand knowledge documents the agent can load when needed. They fo All skills live in **`~/.hermes/skills/`** -- a single directory that is the source of truth. On fresh install, bundled skills are copied there from the repo. Hub-installed skills and agent-created skills also go here. The agent can modify or delete any skill. `hermes update` adds only genuinely new bundled skills (via a manifest) without overwriting your changes or re-adding skills you deleted. **Using Skills:** + +Every installed skill is automatically available as a slash command — type `/` to invoke it directly: + +```bash +# In the CLI or any messaging platform (Telegram, Discord, Slack, WhatsApp): +/gif-search funny cats +/axolotl help me fine-tune Llama 3 on my dataset +/github-pr-workflow create a PR for the auth refactor + +# Just the skill name (no prompt) loads the skill and lets the agent ask what you need: +/excalidraw +``` + +The skill's full instructions (SKILL.md) are loaded into the conversation, and any supporting files (references, templates, scripts) are listed for the agent to pull on demand via the `skill_view` tool. Type `/help` to see all available skill commands. + +You can also use skills through natural conversation: ```bash hermes --toolsets skills -q "What skills do you have?" hermes --toolsets skills -q "Show me the axolotl skill" @@ -1266,9 +1320,13 @@ Your `~/.hermes/` directory should now look like: ├── skills/ # Agent-created skills (auto-created on first use) ├── cron/ # Scheduled job data ├── sessions/ # Messaging gateway sessions -└── logs/ # Conversation logs +└── logs/ # Logs + ├── gateway.log # Gateway activity log + └── errors.log # Errors from tool calls, API failures, etc. ``` +All log output is automatically redacted -- API keys, tokens, and credentials are masked before they reach disk. + --- ### Step 7: Add Your API Keys @@ -1592,7 +1650,9 @@ All variables go in `~/.hermes/.env`. Run `hermes config set VAR value` to set t |------|-------------| | `~/.hermes/config.yaml` | Your settings | | `~/.hermes/.env` | API keys and secrets | -| `~/.hermes/auth.json` | OAuth provider credentials (managed by `hermes login`) | +| `~/.hermes/auth.json` | OAuth provider credentials (managed by `hermes model`) | +| `~/.hermes/logs/errors.log` | Tool errors, API failures (secrets auto-redacted) | +| `~/.hermes/logs/gateway.log` | Gateway activity log (secrets auto-redacted) | | `~/.hermes/cron/` | Scheduled jobs data | | `~/.hermes/sessions/` | Gateway session data | | `~/.hermes/hermes-agent/` | Installation directory | @@ -1620,7 +1680,7 @@ hermes config # View current settings Common issues: - **"API key not set"**: Run `hermes setup` or `hermes config set OPENROUTER_API_KEY your_key` - **"hermes: command not found"**: Reload your shell (`source ~/.bashrc`) or check PATH -- **"Run `hermes login` to re-authenticate"**: Your Nous Portal session expired. Run `hermes login` to refresh. +- **"Run `hermes setup` to re-authenticate"**: Your Nous Portal session expired. Run `hermes setup` or `hermes model` to refresh. - **"No active paid subscription"**: Your Nous Portal account needs an active subscription for inference. - **Gateway won't start**: Check `hermes gateway status` and logs - **Missing config after update**: Run `hermes config check` to see what's new, then `hermes config migrate` to add missing options diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index ef179c410..4fb879414 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -8,7 +8,9 @@ Resolution order for text tasks: 1. OpenRouter (OPENROUTER_API_KEY) 2. Nous Portal (~/.hermes/auth.json active provider) 3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) - 4. None + 4. Codex OAuth (Responses API via chatgpt.com with gpt-5.3-codex, + wrapped to look like a chat.completions client) + 5. None Resolution order for vision/multimodal tasks: 1. OpenRouter @@ -20,7 +22,8 @@ import json import logging import os from pathlib import Path -from typing import Optional, Tuple +from types import SimpleNamespace +from typing import Any, Dict, List, Optional, Tuple from openai import OpenAI @@ -32,7 +35,7 @@ logger = logging.getLogger(__name__) _OR_HEADERS = { "HTTP-Referer": "https://github.com/NousResearch/hermes-agent", "X-OpenRouter-Title": "Hermes Agent", - "X-OpenRouter-Categories": "cli-agent", + "X-OpenRouter-Categories": "productivity,cli-agent", } # Nous Portal extra_body for product attribution. @@ -49,6 +52,188 @@ _NOUS_MODEL = "gemini-3-flash" _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1" _AUTH_JSON_PATH = Path.home() / ".hermes" / "auth.json" +# Codex fallback: uses the Responses API (the only endpoint the Codex +# OAuth token can access) with a fast model for auxiliary tasks. +_CODEX_AUX_MODEL = "gpt-5.3-codex" +_CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex" + + +# ── Codex Responses → chat.completions adapter ───────────────────────────── +# All auxiliary consumers call client.chat.completions.create(**kwargs) and +# read response.choices[0].message.content. This adapter translates those +# calls to the Codex Responses API so callers don't need any changes. + +class _CodexCompletionsAdapter: + """Drop-in shim that accepts chat.completions.create() kwargs and + routes them through the Codex Responses streaming API.""" + + def __init__(self, real_client: OpenAI, model: str): + self._client = real_client + self._model = model + + def create(self, **kwargs) -> Any: + messages = kwargs.get("messages", []) + model = kwargs.get("model", self._model) + temperature = kwargs.get("temperature") + + # Separate system/instructions from conversation messages + instructions = "You are a helpful assistant." + input_msgs: List[Dict[str, Any]] = [] + for msg in messages: + role = msg.get("role", "user") + content = msg.get("content", "") + if role == "system": + instructions = content + else: + input_msgs.append({"role": role, "content": content}) + + resp_kwargs: Dict[str, Any] = { + "model": model, + "instructions": instructions, + "input": input_msgs or [{"role": "user", "content": ""}], + "stream": True, + "store": False, + } + + max_tokens = kwargs.get("max_output_tokens") or kwargs.get("max_completion_tokens") or kwargs.get("max_tokens") + if max_tokens is not None: + resp_kwargs["max_output_tokens"] = int(max_tokens) + if temperature is not None: + resp_kwargs["temperature"] = temperature + + # Tools support for flush_memories and similar callers + tools = kwargs.get("tools") + if tools: + converted = [] + for t in tools: + fn = t.get("function", {}) if isinstance(t, dict) else {} + name = fn.get("name") + if not name: + continue + converted.append({ + "type": "function", + "name": name, + "description": fn.get("description", ""), + "parameters": fn.get("parameters", {}), + }) + if converted: + resp_kwargs["tools"] = converted + + # Stream and collect the response + text_parts: List[str] = [] + tool_calls_raw: List[Any] = [] + usage = None + + try: + with self._client.responses.stream(**resp_kwargs) as stream: + for _event in stream: + pass + final = stream.get_final_response() + + # Extract text and tool calls from the Responses output + for item in getattr(final, "output", []): + item_type = getattr(item, "type", None) + if item_type == "message": + for part in getattr(item, "content", []): + ptype = getattr(part, "type", None) + if ptype in ("output_text", "text"): + text_parts.append(getattr(part, "text", "")) + elif item_type == "function_call": + tool_calls_raw.append(SimpleNamespace( + id=getattr(item, "call_id", ""), + type="function", + function=SimpleNamespace( + name=getattr(item, "name", ""), + arguments=getattr(item, "arguments", "{}"), + ), + )) + + resp_usage = getattr(final, "usage", None) + if resp_usage: + usage = SimpleNamespace( + prompt_tokens=getattr(resp_usage, "input_tokens", 0), + completion_tokens=getattr(resp_usage, "output_tokens", 0), + total_tokens=getattr(resp_usage, "total_tokens", 0), + ) + except Exception as exc: + logger.debug("Codex auxiliary Responses API call failed: %s", exc) + raise + + content = "".join(text_parts).strip() or None + + # Build a response that looks like chat.completions + message = SimpleNamespace( + role="assistant", + content=content, + tool_calls=tool_calls_raw or None, + ) + choice = SimpleNamespace( + index=0, + message=message, + finish_reason="stop" if not tool_calls_raw else "tool_calls", + ) + return SimpleNamespace( + choices=[choice], + model=model, + usage=usage, + ) + + +class _CodexChatShim: + """Wraps the adapter to provide client.chat.completions.create().""" + + def __init__(self, adapter: _CodexCompletionsAdapter): + self.completions = adapter + + +class CodexAuxiliaryClient: + """OpenAI-client-compatible wrapper that routes through Codex Responses API. + + Consumers can call client.chat.completions.create(**kwargs) as normal. + Also exposes .api_key and .base_url for introspection by async wrappers. + """ + + def __init__(self, real_client: OpenAI, model: str): + self._real_client = real_client + adapter = _CodexCompletionsAdapter(real_client, model) + self.chat = _CodexChatShim(adapter) + self.api_key = real_client.api_key + self.base_url = real_client.base_url + + def close(self): + self._real_client.close() + + +class _AsyncCodexCompletionsAdapter: + """Async version of the Codex Responses adapter. + + Wraps the sync adapter via asyncio.to_thread() so async consumers + (web_tools, session_search) can await it as normal. + """ + + def __init__(self, sync_adapter: _CodexCompletionsAdapter): + self._sync = sync_adapter + + async def create(self, **kwargs) -> Any: + import asyncio + return await asyncio.to_thread(self._sync.create, **kwargs) + + +class _AsyncCodexChatShim: + def __init__(self, adapter: _AsyncCodexCompletionsAdapter): + self.completions = adapter + + +class AsyncCodexAuxiliaryClient: + """Async-compatible wrapper matching AsyncOpenAI.chat.completions.create().""" + + def __init__(self, sync_wrapper: "CodexAuxiliaryClient"): + sync_adapter = sync_wrapper.chat.completions + async_adapter = _AsyncCodexCompletionsAdapter(sync_adapter) + self.chat = _AsyncCodexChatShim(async_adapter) + self.api_key = sync_wrapper.api_key + self.base_url = sync_wrapper.base_url + def _read_nous_auth() -> Optional[dict]: """Read and validate ~/.hermes/auth.json for an active Nous provider. @@ -82,12 +267,31 @@ def _nous_base_url() -> str: return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL) +def _read_codex_access_token() -> Optional[str]: + """Read a valid Codex OAuth access token from ~/.codex/auth.json.""" + try: + codex_auth = Path.home() / ".codex" / "auth.json" + if not codex_auth.is_file(): + return None + data = json.loads(codex_auth.read_text()) + tokens = data.get("tokens") + if not isinstance(tokens, dict): + return None + access_token = tokens.get("access_token") + if isinstance(access_token, str) and access_token.strip(): + return access_token.strip() + return None + except Exception as exc: + logger.debug("Could not read Codex auth for auxiliary client: %s", exc) + return None + + # ── Public API ────────────────────────────────────────────────────────────── def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]: """Return (client, model_slug) for text-only auxiliary tasks. - Falls through OpenRouter -> Nous Portal -> custom endpoint -> (None, None). + Falls through OpenRouter -> Nous Portal -> custom endpoint -> Codex OAuth -> (None, None). """ # 1. OpenRouter or_key = os.getenv("OPENROUTER_API_KEY") @@ -115,11 +319,44 @@ def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]: logger.debug("Auxiliary text client: custom endpoint (%s)", model) return OpenAI(api_key=custom_key, base_url=custom_base), model - # 4. Nothing available + # 4. Codex OAuth -- uses the Responses API (only endpoint the token + # can access), wrapped to look like a chat.completions client. + codex_token = _read_codex_access_token() + if codex_token: + logger.debug("Auxiliary text client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL) + real_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL) + return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL + + # 5. Nothing available logger.debug("Auxiliary text client: none available") return None, None +def get_async_text_auxiliary_client(): + """Return (async_client, model_slug) for async consumers. + + For standard providers returns (AsyncOpenAI, model). For Codex returns + (AsyncCodexAuxiliaryClient, model) which wraps the Responses API. + Returns (None, None) when no provider is available. + """ + from openai import AsyncOpenAI + + sync_client, model = get_text_auxiliary_client() + if sync_client is None: + return None, None + + if isinstance(sync_client, CodexAuxiliaryClient): + return AsyncCodexAuxiliaryClient(sync_client), model + + async_kwargs = { + "api_key": sync_client.api_key, + "base_url": str(sync_client.base_url), + } + if "openrouter" in str(sync_client.base_url).lower(): + async_kwargs["default_headers"] = dict(_OR_HEADERS) + return AsyncOpenAI(**async_kwargs), model + + def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]: """Return (client, model_slug) for vision/multimodal auxiliary tasks. @@ -161,11 +398,12 @@ def auxiliary_max_tokens_param(value: int) -> dict: OpenRouter and local models use 'max_tokens'. Direct OpenAI with newer models (gpt-4o, o-series, gpt-5+) requires 'max_completion_tokens'. + The Codex adapter translates max_tokens internally, so we use max_tokens + for it as well. """ custom_base = os.getenv("OPENAI_BASE_URL", "") or_key = os.getenv("OPENROUTER_API_KEY") - # Only use max_completion_tokens when the auxiliary client resolved to - # direct OpenAI (no OpenRouter key, no Nous auth, custom endpoint is api.openai.com) + # Only use max_completion_tokens for direct OpenAI custom endpoints if (not or_key and _read_nous_auth() is None and "api.openai.com" in custom_base.lower()): diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 329fd9680..034eb8f99 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -31,8 +31,9 @@ class ContextCompressor: threshold_percent: float = 0.85, protect_first_n: int = 3, protect_last_n: int = 4, - summary_target_tokens: int = 500, + summary_target_tokens: int = 2500, quiet_mode: bool = False, + summary_model_override: str = None, ): self.model = model self.threshold_percent = threshold_percent @@ -49,7 +50,8 @@ class ContextCompressor: self.last_completion_tokens = 0 self.last_total_tokens = 0 - self.client, self.summary_model = get_text_auxiliary_client() + self.client, default_model = get_text_auxiliary_client() + self.summary_model = summary_model_override or default_model def update_from_response(self, usage: Dict[str, Any]): """Update tracked token usage from API response.""" diff --git a/agent/display.py b/agent/display.py index 9ef8c5ebc..e7f074c4e 100644 --- a/agent/display.py +++ b/agent/display.py @@ -199,6 +199,24 @@ class KawaiiSpinner: def update_text(self, new_message: str): self.message = new_message + def print_above(self, text: str): + """Print a line above the spinner without disrupting animation. + + Clears the current spinner line, prints the text, and lets the + next animation tick redraw the spinner on the line below. + Thread-safe: uses the captured stdout reference (self._out). + Works inside redirect_stdout(devnull) because _write bypasses + sys.stdout and writes to the stdout captured at spinner creation. + """ + if not self.running: + self._write(f" {text}", flush=True) + return + # Clear spinner line with spaces (not \033[K) to avoid garbled escape + # codes when prompt_toolkit's patch_stdout is active — same approach + # as stop(). Then print text; spinner redraws on next tick. + blanks = ' ' * max(self.last_line_len + 5, 40) + self._write(f"\r{blanks}\r {text}", flush=True) + def stop(self, final_message: str = None): self.running = False if self.thread: @@ -283,6 +301,15 @@ def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str] pass return False, "" + # Memory-specific: distinguish "full" from real errors + if tool_name == "memory": + try: + data = json.loads(result) + if data.get("success") is False and "exceed the limit" in data.get("error", ""): + return True, " [full]" + except (json.JSONDecodeError, TypeError, AttributeError): + pass + # Generic heuristic for non-terminal tools lower = result[:500].lower() if '"error"' in lower or '"failed"' in lower or result.startswith("Error"): diff --git a/agent/redact.py b/agent/redact.py new file mode 100644 index 000000000..22f1a547f --- /dev/null +++ b/agent/redact.py @@ -0,0 +1,115 @@ +"""Regex-based secret redaction for logs and tool output. + +Applies pattern matching to mask API keys, tokens, and credentials +before they reach log files, verbose output, or gateway logs. + +Short tokens (< 18 chars) are fully masked. Longer tokens preserve +the first 6 and last 4 characters for debuggability. +""" + +import logging +import re +from typing import Optional + +logger = logging.getLogger(__name__) + +# Known API key prefixes -- match the prefix + contiguous token chars +_PREFIX_PATTERNS = [ + r"sk-[A-Za-z0-9_-]{10,}", # OpenAI / OpenRouter + r"ghp_[A-Za-z0-9]{10,}", # GitHub PAT (classic) + r"github_pat_[A-Za-z0-9_]{10,}", # GitHub PAT (fine-grained) + r"xox[baprs]-[A-Za-z0-9-]{10,}", # Slack tokens + r"AIza[A-Za-z0-9_-]{30,}", # Google API keys + r"pplx-[A-Za-z0-9]{10,}", # Perplexity + r"fal_[A-Za-z0-9_-]{10,}", # Fal.ai + r"fc-[A-Za-z0-9]{10,}", # Firecrawl + r"bb_live_[A-Za-z0-9_-]{10,}", # BrowserBase + r"gAAAA[A-Za-z0-9_=-]{20,}", # Codex encrypted tokens +] + +# ENV assignment patterns: KEY=value where KEY contains a secret-like name +_SECRET_ENV_NAMES = r"(?:API_?KEY|TOKEN|SECRET|PASSWORD|PASSWD|CREDENTIAL|AUTH)" +_ENV_ASSIGN_RE = re.compile( + rf"([A-Z_]*{_SECRET_ENV_NAMES}[A-Z_]*)\s*=\s*(['\"]?)(\S+)\2", + re.IGNORECASE, +) + +# JSON field patterns: "apiKey": "value", "token": "value", etc. +_JSON_KEY_NAMES = r"(?:api_?[Kk]ey|token|secret|password|access_token|refresh_token|auth_token|bearer)" +_JSON_FIELD_RE = re.compile( + rf'("{_JSON_KEY_NAMES}")\s*:\s*"([^"]+)"', + re.IGNORECASE, +) + +# Authorization headers +_AUTH_HEADER_RE = re.compile( + r"(Authorization:\s*Bearer\s+)(\S+)", + re.IGNORECASE, +) + +# Telegram bot tokens: bot: or : +_TELEGRAM_RE = re.compile( + r"(bot)?(\d{8,}):([-A-Za-z0-9_]{30,})", +) + +# Compile known prefix patterns into one alternation +_PREFIX_RE = re.compile( + r"(? str: + """Mask a token, preserving prefix for long tokens.""" + if len(token) < 18: + return "***" + return f"{token[:6]}...{token[-4:]}" + + +def redact_sensitive_text(text: str) -> str: + """Apply all redaction patterns to a block of text. + + Safe to call on any string -- non-matching text passes through unchanged. + """ + if not text: + return text + + # Known prefixes (sk-, ghp_, etc.) + text = _PREFIX_RE.sub(lambda m: _mask_token(m.group(1)), text) + + # ENV assignments: OPENAI_API_KEY=sk-abc... + def _redact_env(m): + name, quote, value = m.group(1), m.group(2), m.group(3) + return f"{name}={quote}{_mask_token(value)}{quote}" + text = _ENV_ASSIGN_RE.sub(_redact_env, text) + + # JSON fields: "apiKey": "value" + def _redact_json(m): + key, value = m.group(1), m.group(2) + return f'{key}: "{_mask_token(value)}"' + text = _JSON_FIELD_RE.sub(_redact_json, text) + + # Authorization headers + text = _AUTH_HEADER_RE.sub( + lambda m: m.group(1) + _mask_token(m.group(2)), + text, + ) + + # Telegram bot tokens + def _redact_telegram(m): + prefix = m.group(1) or "" + digits = m.group(2) + return f"{prefix}{digits}:***" + text = _TELEGRAM_RE.sub(_redact_telegram, text) + + return text + + +class RedactingFormatter(logging.Formatter): + """Log formatter that redacts secrets from all log messages.""" + + def __init__(self, fmt=None, datefmt=None, style='%', **kwargs): + super().__init__(fmt, datefmt, style, **kwargs) + + def format(self, record: logging.LogRecord) -> str: + original = super().format(record) + return redact_sensitive_text(original) diff --git a/agent/skill_commands.py b/agent/skill_commands.py new file mode 100644 index 000000000..fc11c5312 --- /dev/null +++ b/agent/skill_commands.py @@ -0,0 +1,114 @@ +"""Skill slash commands — scan installed skills and build invocation messages. + +Shared between CLI (cli.py) and gateway (gateway/run.py) so both surfaces +can invoke skills via /skill-name commands. +""" + +import logging +from pathlib import Path +from typing import Any, Dict, Optional + +logger = logging.getLogger(__name__) + +_skill_commands: Dict[str, Dict[str, Any]] = {} + + +def scan_skill_commands() -> Dict[str, Dict[str, Any]]: + """Scan ~/.hermes/skills/ and return a mapping of /command -> skill info. + + Returns: + Dict mapping "/skill-name" to {name, description, skill_md_path, skill_dir}. + """ + global _skill_commands + _skill_commands = {} + try: + from tools.skills_tool import SKILLS_DIR, _parse_frontmatter + if not SKILLS_DIR.exists(): + return _skill_commands + for skill_md in SKILLS_DIR.rglob("SKILL.md"): + path_str = str(skill_md) + if '/.git/' in path_str or '/.github/' in path_str or '/.hub/' in path_str: + continue + try: + content = skill_md.read_text(encoding='utf-8') + frontmatter, body = _parse_frontmatter(content) + name = frontmatter.get('name', skill_md.parent.name) + description = frontmatter.get('description', '') + if not description: + for line in body.strip().split('\n'): + line = line.strip() + if line and not line.startswith('#'): + description = line[:80] + break + cmd_name = name.lower().replace(' ', '-').replace('_', '-') + _skill_commands[f"/{cmd_name}"] = { + "name": name, + "description": description or f"Invoke the {name} skill", + "skill_md_path": str(skill_md), + "skill_dir": str(skill_md.parent), + } + except Exception: + continue + except Exception: + pass + return _skill_commands + + +def get_skill_commands() -> Dict[str, Dict[str, Any]]: + """Return the current skill commands mapping (scan first if empty).""" + if not _skill_commands: + scan_skill_commands() + return _skill_commands + + +def build_skill_invocation_message(cmd_key: str, user_instruction: str = "") -> Optional[str]: + """Build the user message content for a skill slash command invocation. + + Args: + cmd_key: The command key including leading slash (e.g., "/gif-search"). + user_instruction: Optional text the user typed after the command. + + Returns: + The formatted message string, or None if the skill wasn't found. + """ + commands = get_skill_commands() + skill_info = commands.get(cmd_key) + if not skill_info: + return None + + skill_md_path = Path(skill_info["skill_md_path"]) + skill_dir = Path(skill_info["skill_dir"]) + skill_name = skill_info["name"] + + try: + content = skill_md_path.read_text(encoding='utf-8') + except Exception: + return f"[Failed to load skill: {skill_name}]" + + parts = [ + f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]', + "", + content.strip(), + ] + + supporting = [] + for subdir in ("references", "templates", "scripts", "assets"): + subdir_path = skill_dir / subdir + if subdir_path.exists(): + for f in sorted(subdir_path.rglob("*")): + if f.is_file(): + rel = str(f.relative_to(skill_dir)) + supporting.append(rel) + + if supporting: + parts.append("") + parts.append("[This skill has supporting files you can load with the skill_view tool:]") + for sf in supporting: + parts.append(f"- {sf}") + parts.append(f'\nTo view any of these, use: skill_view(name="{skill_name}", file="")') + + if user_instruction: + parts.append("") + parts.append(f"The user has provided the following instruction alongside the skill invocation: {user_instruction}") + + return "\n".join(parts) diff --git a/cli.py b/cli.py index ea9c3e630..2081c7aae 100755 --- a/cli.py +++ b/cli.py @@ -682,17 +682,27 @@ COMMANDS = { } +# ============================================================================ +# Skill Slash Commands — dynamic commands generated from installed skills +# ============================================================================ + +from agent.skill_commands import scan_skill_commands, get_skill_commands, build_skill_invocation_message + +_skill_commands = scan_skill_commands() + + class SlashCommandCompleter(Completer): - """Autocomplete for /commands in the input area.""" + """Autocomplete for /commands and /skill-name in the input area.""" def get_completions(self, document, complete_event): text = document.text_before_cursor - # Only complete at the start of input, after / if not text.startswith("/"): return word = text[1:] # strip the leading / + + # Built-in commands for cmd, desc in COMMANDS.items(): - cmd_name = cmd[1:] # strip leading / from key + cmd_name = cmd[1:] if cmd_name.startswith(word): yield Completion( cmd_name, @@ -701,6 +711,17 @@ class SlashCommandCompleter(Completer): display_meta=desc, ) + # Skill commands + for cmd, info in _skill_commands.items(): + cmd_name = cmd[1:] + if cmd_name.startswith(word): + yield Completion( + cmd_name, + start_position=-len(word), + display=cmd, + display_meta=f"⚡ {info['description'][:50]}", + ) + def save_config_value(key_path: str, value: any) -> bool: """ @@ -782,7 +803,7 @@ class HermesCLI: Args: model: Model to use (default: from env or claude-sonnet) toolsets: List of toolsets to enable (default: all) - provider: Inference provider ("auto", "openrouter", "nous") + provider: Inference provider ("auto", "openrouter", "nous", "openai-codex") api_key: API key (default: from environment) base_url: API base URL (default: OpenRouter) max_turns: Maximum tool-calling iterations (default: 60) @@ -800,37 +821,37 @@ class HermesCLI: # Configuration - priority: CLI args > env vars > config file # Model can come from: CLI arg, LLM_MODEL env, OPENAI_MODEL env (custom endpoint), or config self.model = model or os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL") or CLI_CONFIG["model"]["default"] - - # Base URL: custom endpoint (OPENAI_BASE_URL) takes precedence over OpenRouter - self.base_url = base_url or os.getenv("OPENAI_BASE_URL") or os.getenv("OPENROUTER_BASE_URL", CLI_CONFIG["model"]["base_url"]) - - # API key: custom endpoint (OPENAI_API_KEY) takes precedence over OpenRouter - self.api_key = api_key or os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY") - # Provider resolution: determines whether to use OAuth credentials or env var keys - from hermes_cli.auth import resolve_provider + self._explicit_api_key = api_key + self._explicit_base_url = base_url + + # Provider selection is resolved lazily at use-time via _ensure_runtime_credentials(). self.requested_provider = ( provider or os.getenv("HERMES_INFERENCE_PROVIDER") or CLI_CONFIG["model"].get("provider") or "auto" ) - self.provider = resolve_provider( - self.requested_provider, - explicit_api_key=api_key, - explicit_base_url=base_url, + self._provider_source: Optional[str] = None + self.provider = self.requested_provider + self.api_mode = "chat_completions" + self.base_url = ( + base_url + or os.getenv("OPENAI_BASE_URL") + or os.getenv("OPENROUTER_BASE_URL", CLI_CONFIG["model"]["base_url"]) ) + self.api_key = api_key or os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY") self._nous_key_expires_at: Optional[str] = None self._nous_key_source: Optional[str] = None # Max turns priority: CLI arg > env var > config file (agent.max_turns or root max_turns) > default - if max_turns is not None: + if max_turns is not None: # CLI arg was explicitly set self.max_turns = max_turns - elif os.getenv("HERMES_MAX_ITERATIONS"): - self.max_turns = int(os.getenv("HERMES_MAX_ITERATIONS")) elif CLI_CONFIG["agent"].get("max_turns"): self.max_turns = CLI_CONFIG["agent"]["max_turns"] elif CLI_CONFIG.get("max_turns"): # Backwards compat: root-level max_turns self.max_turns = CLI_CONFIG["max_turns"] + elif os.getenv("HERMES_MAX_ITERATIONS"): + self.max_turns = int(os.getenv("HERMES_MAX_ITERATIONS")) else: self.max_turns = 60 @@ -882,45 +903,51 @@ class HermesCLI: def _ensure_runtime_credentials(self) -> bool: """ - Ensure OAuth provider credentials are fresh before agent use. - For Nous Portal: checks agent key TTL, refreshes/re-mints as needed. - If the key changed, tears down the agent so it rebuilds with new creds. + Ensure runtime credentials are resolved before agent use. + Re-resolves provider credentials so key rotation and token refresh + are picked up without restarting the CLI. Returns True if credentials are ready, False on auth failure. """ - if self.provider != "nous": - return True - - from hermes_cli.auth import format_auth_error, resolve_nous_runtime_credentials + from hermes_cli.runtime_provider import ( + resolve_runtime_provider, + format_runtime_provider_error, + ) try: - credentials = resolve_nous_runtime_credentials( - min_key_ttl_seconds=max( - 60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800")) - ), - timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")), + runtime = resolve_runtime_provider( + requested=self.requested_provider, + explicit_api_key=self._explicit_api_key, + explicit_base_url=self._explicit_base_url, ) except Exception as exc: - message = format_auth_error(exc) + message = format_runtime_provider_error(exc) self.console.print(f"[bold red]{message}[/]") return False - api_key = credentials.get("api_key") - base_url = credentials.get("base_url") + api_key = runtime.get("api_key") + base_url = runtime.get("base_url") + resolved_provider = runtime.get("provider", "openrouter") + resolved_api_mode = runtime.get("api_mode", self.api_mode) if not isinstance(api_key, str) or not api_key: - self.console.print("[bold red]Nous credential resolver returned an empty API key.[/]") + self.console.print("[bold red]Provider resolver returned an empty API key.[/]") return False if not isinstance(base_url, str) or not base_url: - self.console.print("[bold red]Nous credential resolver returned an empty base URL.[/]") + self.console.print("[bold red]Provider resolver returned an empty base URL.[/]") return False credentials_changed = api_key != self.api_key or base_url != self.base_url + routing_changed = ( + resolved_provider != self.provider + or resolved_api_mode != self.api_mode + ) + self.provider = resolved_provider + self.api_mode = resolved_api_mode + self._provider_source = runtime.get("source") self.api_key = api_key self.base_url = base_url - self._nous_key_expires_at = credentials.get("expires_at") - self._nous_key_source = credentials.get("source") # AIAgent/OpenAI client holds auth at init time, so rebuild if key rotated - if credentials_changed and self.agent is not None: + if (credentials_changed or routing_changed) and self.agent is not None: self.agent = None return True @@ -936,7 +963,7 @@ class HermesCLI: if self.agent is not None: return True - if self.provider == "nous" and not self._ensure_runtime_credentials(): + if not self._ensure_runtime_credentials(): return False # Initialize SQLite session store for CLI sessions @@ -980,6 +1007,8 @@ class HermesCLI: model=self.model, api_key=self.api_key, base_url=self.base_url, + provider=self.provider, + api_mode=self.api_mode, max_iterations=self.max_turns, enabled_toolsets=self.enabled_toolsets, verbose_logging=self.verbose, @@ -1072,8 +1101,8 @@ class HermesCLI: toolsets_info = f" [dim #B8860B]·[/] [#CD7F32]toolsets: {', '.join(self.enabled_toolsets)}[/]" provider_info = f" [dim #B8860B]·[/] [dim]provider: {self.provider}[/]" - if self.provider == "nous" and self._nous_key_source: - provider_info += f" [dim #B8860B]·[/] [dim]key: {self._nous_key_source}[/]" + if self._provider_source: + provider_info += f" [dim #B8860B]·[/] [dim]auth: {self._provider_source}[/]" self.console.print( f" {api_indicator} [#FFBF00]{model_short}[/] " @@ -1082,20 +1111,21 @@ class HermesCLI: ) def show_help(self): - """Display help information with kawaii ASCII art.""" - print() - print("+" + "-" * 50 + "+") - print("|" + " " * 14 + "(^_^)? Available Commands" + " " * 10 + "|") - print("+" + "-" * 50 + "+") - print() + """Display help information.""" + _cprint(f"\n{_BOLD}+{'-' * 50}+{_RST}") + _cprint(f"{_BOLD}|{' ' * 14}(^_^)? Available Commands{' ' * 10}|{_RST}") + _cprint(f"{_BOLD}+{'-' * 50}+{_RST}\n") for cmd, desc in COMMANDS.items(): - print(f" {cmd:<15} - {desc}") + _cprint(f" {_GOLD}{cmd:<15}{_RST} {_DIM}-{_RST} {desc}") - print() - print(" Tip: Just type your message to chat with Hermes!") - print(" Multi-line: Alt+Enter for a new line") - print() + if _skill_commands: + _cprint(f"\n ⚡ {_BOLD}Skill Commands{_RST} ({len(_skill_commands)} installed):") + for cmd, info in sorted(_skill_commands.items()): + _cprint(f" {_GOLD}{cmd:<22}{_RST} {_DIM}-{_RST} {info['description']}") + + _cprint(f"\n {_DIM}Tip: Just type your message to chat with Hermes!{_RST}") + _cprint(f" {_DIM}Multi-line: Alt+Enter for a new line{_RST}\n") def show_tools(self): """Display available tools with kawaii ASCII art.""" @@ -1692,9 +1722,26 @@ class HermesCLI: self._show_gateway_status() elif cmd_lower == "/verbose": self._toggle_verbose() + elif cmd_lower == "/compress": + self._manual_compress() + elif cmd_lower == "/usage": + self._show_usage() else: - self.console.print(f"[bold red]Unknown command: {cmd_lower}[/]") - self.console.print("[dim #B8860B]Type /help for available commands[/]") + # Check for skill slash commands (/gif-search, /axolotl, etc.) + base_cmd = cmd_lower.split()[0] + if base_cmd in _skill_commands: + user_instruction = cmd_original[len(base_cmd):].strip() + msg = build_skill_invocation_message(base_cmd, user_instruction) + if msg: + skill_name = _skill_commands[base_cmd]["name"] + print(f"\n⚡ Loading skill: {skill_name}") + if hasattr(self, '_pending_input'): + self._pending_input.put(msg) + else: + self.console.print(f"[bold red]Failed to load skill for {base_cmd}[/]") + else: + self.console.print(f"[bold red]Unknown command: {cmd_lower}[/]") + self.console.print("[dim #B8860B]Type /help for available commands[/]") return True @@ -1720,6 +1767,77 @@ class HermesCLI: } self.console.print(labels.get(self.tool_progress_mode, "")) + def _manual_compress(self): + """Manually trigger context compression on the current conversation.""" + if not self.conversation_history or len(self.conversation_history) < 4: + print("(._.) Not enough conversation to compress (need at least 4 messages).") + return + + if not self.agent: + print("(._.) No active agent -- send a message first.") + return + + if not self.agent.compression_enabled: + print("(._.) Compression is disabled in config.") + return + + original_count = len(self.conversation_history) + try: + from agent.model_metadata import estimate_messages_tokens_rough + approx_tokens = estimate_messages_tokens_rough(self.conversation_history) + print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...") + + compressed, new_system = self.agent._compress_context( + self.conversation_history, + self.agent._cached_system_prompt or "", + approx_tokens=approx_tokens, + ) + self.conversation_history = compressed + new_count = len(self.conversation_history) + new_tokens = estimate_messages_tokens_rough(self.conversation_history) + print( + f" ✅ Compressed: {original_count} → {new_count} messages " + f"(~{approx_tokens:,} → ~{new_tokens:,} tokens)" + ) + except Exception as e: + print(f" ❌ Compression failed: {e}") + + def _show_usage(self): + """Show cumulative token usage for the current session.""" + if not self.agent: + print("(._.) No active agent -- send a message first.") + return + + agent = self.agent + prompt = agent.session_prompt_tokens + completion = agent.session_completion_tokens + total = agent.session_total_tokens + calls = agent.session_api_calls + + if calls == 0: + print("(._.) No API calls made yet in this session.") + return + + # Current context window state + compressor = agent.context_compressor + last_prompt = compressor.last_prompt_tokens + ctx_len = compressor.context_length + pct = (last_prompt / ctx_len * 100) if ctx_len else 0 + compressions = compressor.compression_count + + msg_count = len(self.conversation_history) + + print(f" 📊 Session Token Usage") + print(f" {'─' * 40}") + print(f" Prompt tokens (input): {prompt:>10,}") + print(f" Completion tokens (output): {completion:>9,}") + print(f" Total tokens: {total:>10,}") + print(f" API calls: {calls:>10,}") + print(f" {'─' * 40}") + print(f" Current context: {last_prompt:,} / {ctx_len:,} ({pct:.0f}%)") + print(f" Messages: {msg_count}") + print(f" Compressions: {compressions}") + if self.verbose: logging.getLogger().setLevel(logging.DEBUG) for noisy in ('openai', 'openai._base_client', 'httpx', 'httpcore', 'asyncio', 'hpack', 'grpc', 'modal'): @@ -1894,8 +2012,8 @@ class HermesCLI: Returns: The agent's response, or None on error """ - # Refresh OAuth credentials if needed (handles key rotation transparently) - if self.provider == "nous" and not self._ensure_runtime_credentials(): + # Refresh provider credentials if needed (handles key rotation transparently) + if not self._ensure_runtime_credentials(): return None # Initialize agent if needed diff --git a/cron/scheduler.py b/cron/scheduler.py index 23cf5cd61..df88e56b7 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -172,10 +172,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: except UnicodeDecodeError: load_dotenv(str(_hermes_home / ".env"), override=True, encoding="latin-1") - model = os.getenv("HERMES_MODEL", "anthropic/claude-opus-4.6") - # Custom endpoint (OPENAI_*) takes precedence, matching CLI behavior - api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY", "") - base_url = os.getenv("OPENAI_BASE_URL") or os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1") + model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6" try: import yaml @@ -188,24 +185,27 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: model = _model_cfg elif isinstance(_model_cfg, dict): model = _model_cfg.get("default", model) - base_url = _model_cfg.get("base_url", base_url) - # Check if provider is nous — resolve OAuth credentials - provider = _model_cfg.get("provider", "") if isinstance(_model_cfg, dict) else "" - if provider == "nous": - try: - from hermes_cli.auth import resolve_nous_runtime_credentials - creds = resolve_nous_runtime_credentials(min_key_ttl_seconds=5 * 60) - api_key = creds.get("api_key", api_key) - base_url = creds.get("base_url", base_url) - except Exception as nous_err: - logging.warning("Nous Portal credential resolution failed for cron: %s", nous_err) except Exception: pass + from hermes_cli.runtime_provider import ( + resolve_runtime_provider, + format_runtime_provider_error, + ) + try: + runtime = resolve_runtime_provider( + requested=os.getenv("HERMES_INFERENCE_PROVIDER"), + ) + except Exception as exc: + message = format_runtime_provider_error(exc) + raise RuntimeError(message) from exc + agent = AIAgent( model=model, - api_key=api_key, - base_url=base_url, + api_key=runtime.get("api_key"), + base_url=runtime.get("base_url"), + provider=runtime.get("provider"), + api_mode=runtime.get("api_mode"), quiet_mode=True, session_id=f"cron_{job_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}" ) diff --git a/docs/cli.md b/docs/cli.md index a9257024c..0945b48a1 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -12,7 +12,7 @@ hermes hermes --model "anthropic/claude-sonnet-4" # With specific provider -hermes --provider nous # Use Nous Portal (requires: hermes login) +hermes --provider nous # Use Nous Portal (requires: hermes model) hermes --provider openrouter # Force OpenRouter # With specific toolsets @@ -73,6 +73,9 @@ The CLI is implemented in `cli.py` and uses: | `/history` | Show conversation history | | `/save` | Save current conversation to file | | `/config` | Show current configuration | +| `/verbose` | Cycle tool progress display: off → new → all → verbose | +| `/compress` | Manually compress conversation context (flush memories + summarize) | +| `/usage` | Show token usage for the current session | | `/quit` | Exit the CLI (also: `/exit`, `/q`) | ## Configuration @@ -93,7 +96,7 @@ model: ``` **Provider selection** (`provider` field): -- `auto` (default): Uses Nous Portal if logged in (`hermes login`), otherwise falls back to OpenRouter/env vars. +- `auto` (default): Uses Nous Portal if logged in (`hermes model`), otherwise falls back to OpenRouter/env vars. - `openrouter`: Always uses `OPENROUTER_API_KEY` from `.env`. - `nous`: Always uses Nous Portal OAuth credentials from `auth.json`. diff --git a/docs/hooks.md b/docs/hooks.md new file mode 100644 index 000000000..3746eb3e4 --- /dev/null +++ b/docs/hooks.md @@ -0,0 +1,174 @@ +# Event Hooks + +The hooks system lets you run custom code at key points in the agent lifecycle — session creation, slash commands, each tool-calling step, and more. Hooks are discovered automatically from `~/.hermes/hooks/` and fire without blocking the main agent pipeline. + +## Creating a Hook + +Each hook is a directory under `~/.hermes/hooks/` containing two files: + +``` +~/.hermes/hooks/ +└── my-hook/ + ├── HOOK.yaml # Declares which events to listen for + └── handler.py # Python handler function +``` + +### HOOK.yaml + +```yaml +name: my-hook +description: Log all agent activity to a file +events: + - agent:start + - agent:end + - agent:step +``` + +The `events` list determines which events trigger your handler. You can subscribe to any combination of events, including wildcards like `command:*`. + +### handler.py + +```python +import json +from datetime import datetime +from pathlib import Path + +LOG_FILE = Path.home() / ".hermes" / "hooks" / "my-hook" / "activity.log" + +async def handle(event_type: str, context: dict): + """Called for each subscribed event. Must be named 'handle'.""" + entry = { + "timestamp": datetime.now().isoformat(), + "event": event_type, + **context, + } + with open(LOG_FILE, "a") as f: + f.write(json.dumps(entry) + "\n") +``` + +The handler function: +- Must be named `handle` +- Receives `event_type` (string) and `context` (dict) +- Can be `async def` or regular `def` — both work +- Errors are caught and logged, never crashing the agent + +## Available Events + +| Event | When it fires | Context keys | +|-------|---------------|--------------| +| `gateway:startup` | Gateway process starts | `platforms` (list of active platform names) | +| `session:start` | New messaging session created | `platform`, `user_id`, `session_id`, `session_key` | +| `session:reset` | User ran `/new` or `/reset` | `platform`, `user_id`, `session_key` | +| `agent:start` | Agent begins processing a message | `platform`, `user_id`, `session_id`, `message` | +| `agent:step` | Each iteration of the tool-calling loop | `platform`, `user_id`, `session_id`, `iteration`, `tool_names` | +| `agent:end` | Agent finishes processing | `platform`, `user_id`, `session_id`, `message`, `response` | +| `command:*` | Any slash command executed | `platform`, `user_id`, `command`, `args` | + +### Wildcard Matching + +Handlers registered for `command:*` fire for any `command:` event (`command:model`, `command:reset`, etc.). This lets you monitor all slash commands with a single subscription. + +## Examples + +### Telegram Notification on Long Tasks + +Send yourself a Telegram message when the agent takes more than 10 tool-calling steps: + +```yaml +# ~/.hermes/hooks/long-task-alert/HOOK.yaml +name: long-task-alert +description: Alert when agent is taking many steps +events: + - agent:step +``` + +```python +# ~/.hermes/hooks/long-task-alert/handler.py +import os +import httpx + +THRESHOLD = 10 +BOT_TOKEN = os.getenv("TELEGRAM_BOT_TOKEN") +CHAT_ID = os.getenv("TELEGRAM_HOME_CHANNEL") + +async def handle(event_type: str, context: dict): + iteration = context.get("iteration", 0) + if iteration == THRESHOLD and BOT_TOKEN and CHAT_ID: + tools = ", ".join(context.get("tool_names", [])) + text = f"⚠️ Agent has been running for {iteration} steps. Last tools: {tools}" + async with httpx.AsyncClient() as client: + await client.post( + f"https://api.telegram.org/bot{BOT_TOKEN}/sendMessage", + json={"chat_id": CHAT_ID, "text": text}, + ) +``` + +### Command Usage Logger + +Track which slash commands are used and how often: + +```yaml +# ~/.hermes/hooks/command-logger/HOOK.yaml +name: command-logger +description: Log slash command usage +events: + - command:* +``` + +```python +# ~/.hermes/hooks/command-logger/handler.py +import json +from datetime import datetime +from pathlib import Path + +LOG = Path.home() / ".hermes" / "logs" / "command_usage.jsonl" + +def handle(event_type: str, context: dict): + LOG.parent.mkdir(parents=True, exist_ok=True) + entry = { + "ts": datetime.now().isoformat(), + "command": context.get("command"), + "args": context.get("args"), + "platform": context.get("platform"), + "user": context.get("user_id"), + } + with open(LOG, "a") as f: + f.write(json.dumps(entry) + "\n") +``` + +### Session Start Webhook + +POST to an external service whenever a new session starts: + +```yaml +# ~/.hermes/hooks/session-webhook/HOOK.yaml +name: session-webhook +description: Notify external service on new sessions +events: + - session:start + - session:reset +``` + +```python +# ~/.hermes/hooks/session-webhook/handler.py +import httpx + +WEBHOOK_URL = "https://your-service.example.com/hermes-events" + +async def handle(event_type: str, context: dict): + async with httpx.AsyncClient() as client: + await client.post(WEBHOOK_URL, json={ + "event": event_type, + **context, + }, timeout=5) +``` + +## How It Works + +1. On gateway startup, `HookRegistry.discover_and_load()` scans `~/.hermes/hooks/` +2. Each subdirectory with `HOOK.yaml` + `handler.py` is loaded dynamically +3. Handlers are registered for their declared events +4. At each lifecycle point, `hooks.emit()` fires all matching handlers +5. Errors in any handler are caught and logged — a broken hook never crashes the agent + +Hooks only fire in the **gateway** (Telegram, Discord, Slack, WhatsApp). The CLI does not currently load hooks. The `agent:step` event bridges from the sync agent thread to the async hook system via `asyncio.run_coroutine_threadsafe`. diff --git a/docs/messaging.md b/docs/messaging.md index 9963cfe03..e695308b4 100644 --- a/docs/messaging.md +++ b/docs/messaging.md @@ -74,6 +74,13 @@ Sessions reset based on configurable policies: Send `/new` or `/reset` as a message to start fresh. +### Context Management + +| Command | Description | +|---------|-------------| +| `/compress` | Manually compress conversation context (saves memories, then summarizes) | +| `/usage` | Show token usage and context window status for the current session | + ### Per-Platform Overrides Configure different reset policies per platform: diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index 2e818b4ea..dcd97f309 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -425,6 +425,28 @@ class BasePlatformAdapter(ABC): text = f"{caption}\n{image_url}" if caption else image_url return await self.send(chat_id=chat_id, content=text, reply_to=reply_to) + async def send_animation( + self, + chat_id: str, + animation_url: str, + caption: Optional[str] = None, + reply_to: Optional[str] = None, + ) -> SendResult: + """ + Send an animated GIF natively via the platform API. + + Override in subclasses to send GIFs as proper animations + (e.g., Telegram send_animation) so they auto-play inline. + Default falls back to send_image. + """ + return await self.send_image(chat_id=chat_id, image_url=animation_url, caption=caption, reply_to=reply_to) + + @staticmethod + def _is_animation_url(url: str) -> bool: + """Check if a URL points to an animated GIF (vs a static image).""" + lower = url.lower().split('?')[0] # Strip query params + return lower.endswith('.gif') + @staticmethod def extract_images(content: str) -> Tuple[List[Tuple[str, str]], str]: """ @@ -636,11 +658,19 @@ class BasePlatformAdapter(ABC): if human_delay > 0: await asyncio.sleep(human_delay) try: - img_result = await self.send_image( - chat_id=event.source.chat_id, - image_url=image_url, - caption=alt_text if alt_text else None, - ) + # Route animated GIFs through send_animation for proper playback + if self._is_animation_url(image_url): + img_result = await self.send_animation( + chat_id=event.source.chat_id, + animation_url=image_url, + caption=alt_text if alt_text else None, + ) + else: + img_result = await self.send_image( + chat_id=event.source.chat_id, + image_url=image_url, + caption=alt_text if alt_text else None, + ) if not img_result.success: print(f"[{self.name}] Failed to send image: {img_result.error}") except Exception as img_err: diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index c37fde42c..076e97ff5 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -272,6 +272,30 @@ class TelegramAdapter(BasePlatformAdapter): # Fallback: send as text link return await super().send_image(chat_id, image_url, caption, reply_to) + async def send_animation( + self, + chat_id: str, + animation_url: str, + caption: Optional[str] = None, + reply_to: Optional[str] = None, + ) -> SendResult: + """Send an animated GIF natively as a Telegram animation (auto-plays inline).""" + if not self._bot: + return SendResult(success=False, error="Not connected") + + try: + msg = await self._bot.send_animation( + chat_id=int(chat_id), + animation=animation_url, + caption=caption[:1024] if caption else None, + reply_to_message_id=int(reply_to) if reply_to else None, + ) + return SendResult(success=True, message_id=str(msg.message_id)) + except Exception as e: + print(f"[{self.name}] Failed to send animation, falling back to photo: {e}") + # Fallback: try as a regular photo + return await self.send_image(chat_id, animation_url, caption, reply_to) + async def send_typing(self, chat_id: str) -> None: """Send typing indicator.""" if self._bot: diff --git a/gateway/run.py b/gateway/run.py index 198629ce3..32f53ba7f 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -78,6 +78,20 @@ if _config_path.exists(): for _cfg_key, _env_var in _terminal_env_map.items(): if _cfg_key in _terminal_cfg: os.environ[_env_var] = str(_terminal_cfg[_cfg_key]) + _compression_cfg = _cfg.get("compression", {}) + if _compression_cfg and isinstance(_compression_cfg, dict): + _compression_env_map = { + "enabled": "CONTEXT_COMPRESSION_ENABLED", + "threshold": "CONTEXT_COMPRESSION_THRESHOLD", + "summary_model": "CONTEXT_COMPRESSION_MODEL", + } + for _cfg_key, _env_var in _compression_env_map.items(): + if _cfg_key in _compression_cfg: + os.environ[_env_var] = str(_compression_cfg[_cfg_key]) + _agent_cfg = _cfg.get("agent", {}) + if _agent_cfg and isinstance(_agent_cfg, dict): + if "max_turns" in _agent_cfg: + os.environ["HERMES_MAX_ITERATIONS"] = str(_agent_cfg["max_turns"]) except Exception: pass # Non-fatal; gateway can still run with .env values @@ -111,6 +125,28 @@ from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageTyp logger = logging.getLogger(__name__) +def _resolve_runtime_agent_kwargs() -> dict: + """Resolve provider credentials for gateway-created AIAgent instances.""" + from hermes_cli.runtime_provider import ( + resolve_runtime_provider, + format_runtime_provider_error, + ) + + try: + runtime = resolve_runtime_provider( + requested=os.getenv("HERMES_INFERENCE_PROVIDER"), + ) + except Exception as exc: + raise RuntimeError(format_runtime_provider_error(exc)) from exc + + return { + "api_key": runtime.get("api_key"), + "base_url": runtime.get("base_url"), + "provider": runtime.get("provider"), + "api_mode": runtime.get("api_mode"), + } + + class GatewayRunner: """ Main gateway controller. @@ -178,17 +214,12 @@ class GatewayRunner: return from run_agent import AIAgent - _flush_api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY", "") - _flush_base_url = os.getenv("OPENAI_BASE_URL") or os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1") - _flush_model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL", "anthropic/claude-opus-4.6") - - if not _flush_api_key: + runtime_kwargs = _resolve_runtime_agent_kwargs() + if not runtime_kwargs.get("api_key"): return tmp_agent = AIAgent( - model=_flush_model, - api_key=_flush_api_key, - base_url=_flush_base_url, + **runtime_kwargs, max_iterations=8, quiet_mode=True, enabled_toolsets=["memory", "skills"], @@ -608,6 +639,19 @@ class GatewayRunner: # Check for commands command = event.get_command() + + # Emit command:* hook for any recognized slash command + _known_commands = {"new", "reset", "help", "status", "stop", "model", + "personality", "retry", "undo", "sethome", "set-home", + "compress", "usage"} + if command and command in _known_commands: + await self.hooks.emit(f"command:{command}", { + "platform": source.platform.value if source.platform else "", + "user_id": source.user_id, + "command": command, + "args": event.get_command_args().strip(), + }) + if command in ["new", "reset"]: return await self._handle_reset_command(event) @@ -634,6 +678,27 @@ class GatewayRunner: if command in ["sethome", "set-home"]: return await self._handle_set_home_command(event) + + if command == "compress": + return await self._handle_compress_command(event) + + if command == "usage": + return await self._handle_usage_command(event) + + # Skill slash commands: /skill-name loads the skill and sends to agent + if command: + try: + from agent.skill_commands import get_skill_commands, build_skill_invocation_message + skill_cmds = get_skill_commands() + cmd_key = f"/{command}" + if cmd_key in skill_cmds: + user_instruction = event.get_command_args().strip() + msg = build_skill_invocation_message(cmd_key, user_instruction) + if msg: + event.text = msg + # Fall through to normal message processing with skill content + except Exception as e: + logger.debug("Skill command check failed (non-fatal): %s", e) # Check for pending exec approval responses if source.chat_type != "dm": @@ -663,6 +728,19 @@ class GatewayRunner: session_entry = self.session_store.get_or_create_session(source) session_key = session_entry.session_key + # Emit session:start for new or auto-reset sessions + _is_new_session = ( + session_entry.created_at == session_entry.updated_at + or getattr(session_entry, "was_auto_reset", False) + ) + if _is_new_session: + await self.hooks.emit("session:start", { + "platform": source.platform.value if source.platform else "", + "user_id": source.user_id, + "session_id": session_entry.session_id, + "session_key": session_key, + }) + # Build session context context = build_session_context(source, self.config, session_entry) @@ -916,15 +994,10 @@ class GatewayRunner: if old_history: from run_agent import AIAgent loop = asyncio.get_event_loop() - # Resolve credentials so the flush agent can reach the LLM - _flush_api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY", "") - _flush_base_url = os.getenv("OPENAI_BASE_URL") or os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1") - _flush_model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL", "anthropic/claude-opus-4.6") + _flush_kwargs = _resolve_runtime_agent_kwargs() def _do_flush(): tmp_agent = AIAgent( - model=_flush_model, - api_key=_flush_api_key, - base_url=_flush_base_url, + **_flush_kwargs, max_iterations=5, quiet_mode=True, enabled_toolsets=["memory"], @@ -999,20 +1072,31 @@ class GatewayRunner: async def _handle_help_command(self, event: MessageEvent) -> str: """Handle /help command - list available commands.""" - return ( - "📖 **Hermes Commands**\n" - "\n" - "`/new` — Start a new conversation\n" - "`/reset` — Reset conversation history\n" - "`/status` — Show session info\n" - "`/stop` — Interrupt the running agent\n" - "`/model [name]` — Show or change the model\n" - "`/personality [name]` — Set a personality\n" - "`/retry` — Retry your last message\n" - "`/undo` — Remove the last exchange\n" - "`/sethome` — Set this chat as the home channel\n" - "`/help` — Show this message" - ) + lines = [ + "📖 **Hermes Commands**\n", + "`/new` — Start a new conversation", + "`/reset` — Reset conversation history", + "`/status` — Show session info", + "`/stop` — Interrupt the running agent", + "`/model [name]` — Show or change the model", + "`/personality [name]` — Set a personality", + "`/retry` — Retry your last message", + "`/undo` — Remove the last exchange", + "`/sethome` — Set this chat as the home channel", + "`/compress` — Compress conversation context", + "`/usage` — Show token usage for this session", + "`/help` — Show this message", + ] + try: + from agent.skill_commands import get_skill_commands + skill_cmds = get_skill_commands() + if skill_cmds: + lines.append(f"\n⚡ **Skill Commands** ({len(skill_cmds)} installed):") + for cmd in sorted(skill_cmds): + lines.append(f"`{cmd}` — {skill_cmds[cmd]['description']}") + except Exception: + pass + return "\n".join(lines) async def _handle_model_command(self, event: MessageEvent) -> str: """Handle /model command - show or change the current model.""" @@ -1205,6 +1289,95 @@ class GatewayRunner: f"Cron jobs and cross-platform messages will be delivered here." ) + async def _handle_compress_command(self, event: MessageEvent) -> str: + """Handle /compress command -- manually compress conversation context.""" + source = event.source + session_entry = self.session_store.get_or_create_session(source) + history = self.session_store.load_transcript(session_entry.session_id) + + if not history or len(history) < 4: + return "Not enough conversation to compress (need at least 4 messages)." + + try: + from run_agent import AIAgent + from agent.model_metadata import estimate_messages_tokens_rough + + runtime_kwargs = _resolve_runtime_agent_kwargs() + if not runtime_kwargs.get("api_key"): + return "No provider configured -- cannot compress." + + msgs = [ + {"role": m.get("role"), "content": m.get("content")} + for m in history + if m.get("role") in ("user", "assistant") and m.get("content") + ] + original_count = len(msgs) + approx_tokens = estimate_messages_tokens_rough(msgs) + + tmp_agent = AIAgent( + **runtime_kwargs, + max_iterations=4, + quiet_mode=True, + enabled_toolsets=["memory"], + session_id=session_entry.session_id, + ) + + loop = asyncio.get_event_loop() + compressed, _ = await loop.run_in_executor( + None, + lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens), + ) + + session_entry.conversation_history = compressed + new_count = len(compressed) + new_tokens = estimate_messages_tokens_rough(compressed) + + return ( + f"🗜️ Compressed: {original_count} → {new_count} messages\n" + f"~{approx_tokens:,} → ~{new_tokens:,} tokens" + ) + except Exception as e: + logger.warning("Manual compress failed: %s", e) + return f"Compression failed: {e}" + + async def _handle_usage_command(self, event: MessageEvent) -> str: + """Handle /usage command -- show token usage for the session's last agent run.""" + source = event.source + session_key = f"agent:main:{source.platform.value}:" + \ + (f"dm" if source.chat_type == "dm" else f"{source.chat_type}:{source.chat_id}") + + agent = self._running_agents.get(session_key) + if agent and hasattr(agent, "session_total_tokens") and agent.session_api_calls > 0: + lines = [ + "📊 **Session Token Usage**", + f"Prompt (input): {agent.session_prompt_tokens:,}", + f"Completion (output): {agent.session_completion_tokens:,}", + f"Total: {agent.session_total_tokens:,}", + f"API calls: {agent.session_api_calls}", + ] + ctx = agent.context_compressor + if ctx.last_prompt_tokens: + pct = ctx.last_prompt_tokens / ctx.context_length * 100 if ctx.context_length else 0 + lines.append(f"Context: {ctx.last_prompt_tokens:,} / {ctx.context_length:,} ({pct:.0f}%)") + if ctx.compression_count: + lines.append(f"Compressions: {ctx.compression_count}") + return "\n".join(lines) + + # No running agent -- check session history for a rough count + session_entry = self.session_store.get_or_create_session(source) + history = self.session_store.load_transcript(session_entry.session_id) + if history: + from agent.model_metadata import estimate_messages_tokens_rough + msgs = [m for m in history if m.get("role") in ("user", "assistant") and m.get("content")] + approx = estimate_messages_tokens_rough(msgs) + return ( + f"📊 **Session Info**\n" + f"Messages: {len(msgs)}\n" + f"Estimated context: ~{approx:,} tokens\n" + f"_(Detailed usage available during active conversations)_" + ) + return "No usage data available for this session." + def _set_session_env(self, context: SessionContext) -> None: """Set environment variables for the current session.""" os.environ["HERMES_SESSION_PLATFORM"] = context.source.platform.value @@ -1593,6 +1766,25 @@ class GatewayRunner: result_holder = [None] # Mutable container for the result tools_holder = [None] # Mutable container for the tool definitions + # Bridge sync step_callback → async hooks.emit for agent:step events + _loop_for_step = asyncio.get_event_loop() + _hooks_ref = self.hooks + + def _step_callback_sync(iteration: int, tool_names: list) -> None: + try: + asyncio.run_coroutine_threadsafe( + _hooks_ref.emit("agent:step", { + "platform": source.platform.value if source.platform else "", + "user_id": source.user_id, + "session_id": session_id, + "iteration": iteration, + "tool_names": tool_names, + }), + _loop_for_step, + ) + except Exception as _e: + logger.debug("agent:step hook error: %s", _e) + def run_sync(): # Pass session_key to process registry via env var so background # processes can be mapped back to this gateway session @@ -1609,7 +1801,7 @@ class GatewayRunner: combined_ephemeral = context_prompt or "" if self._ephemeral_system_prompt: combined_ephemeral = (combined_ephemeral + "\n\n" + self._ephemeral_system_prompt).strip() - + # Re-read .env and config for fresh credentials (gateway is long-lived, # keys may change without restart). try: @@ -1619,9 +1811,6 @@ class GatewayRunner: except Exception: pass - # Custom endpoint (OPENAI_*) takes precedence, matching CLI behavior - api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY", "") - base_url = os.getenv("OPENAI_BASE_URL") or os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1") model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6" try: @@ -1635,24 +1824,22 @@ class GatewayRunner: model = _model_cfg elif isinstance(_model_cfg, dict): model = _model_cfg.get("default", model) - base_url = _model_cfg.get("base_url", base_url) - # Check if provider is nous — resolve OAuth credentials - provider = _model_cfg.get("provider", "") if isinstance(_model_cfg, dict) else "" - if provider == "nous": - try: - from hermes_cli.auth import resolve_nous_runtime_credentials - creds = resolve_nous_runtime_credentials(min_key_ttl_seconds=5 * 60) - api_key = creds.get("api_key", api_key) - base_url = creds.get("base_url", base_url) - except Exception as nous_err: - logger.warning("Nous Portal credential resolution failed: %s", nous_err) except Exception: pass + try: + runtime_kwargs = _resolve_runtime_agent_kwargs() + except Exception as exc: + return { + "final_response": f"⚠️ Provider authentication failed: {exc}", + "messages": [], + "api_calls": 0, + "tools": [], + } + agent = AIAgent( model=model, - api_key=api_key, - base_url=base_url, + **runtime_kwargs, max_iterations=max_iterations, quiet_mode=True, verbose_logging=False, @@ -1662,6 +1849,7 @@ class GatewayRunner: reasoning_config=self._reasoning_config, session_id=session_id, tool_progress_callback=progress_callback if tool_progress_enabled else None, + step_callback=_step_callback_sync if _hooks_ref.loaded_hooks else None, platform=platform_key, honcho_session_key=session_key, session_db=self._session_db, @@ -1714,6 +1902,19 @@ class GatewayRunner: content = f"[Delivered from {mirror_src}] {content}" agent_history.append({"role": role, "content": content}) + # Collect MEDIA paths already in history so we can exclude them + # from the current turn's extraction. This is compression-safe: + # even if the message list shrinks, we know which paths are old. + _history_media_paths: set = set() + for _hm in agent_history: + if _hm.get("role") in ("tool", "function"): + _hc = _hm.get("content", "") + if "MEDIA:" in _hc: + for _match in re.finditer(r'MEDIA:(\S+)', _hc): + _p = _match.group(1).strip().rstrip('",}') + if _p: + _history_media_paths.add(_p) + result = agent.run_conversation(message, conversation_history=agent_history) result_holder[0] = result @@ -1734,22 +1935,25 @@ class GatewayRunner: # doesn't include them. We collect unique tags from tool results and # append any that aren't already present in the final response, so the # adapter's extract_media() can find and deliver the files exactly once. + # + # Uses path-based deduplication against _history_media_paths (collected + # before run_conversation) instead of index slicing. This is safe even + # when context compression shrinks the message list. (Fixes #160) if "MEDIA:" not in final_response: media_tags = [] has_voice_directive = False for msg in result.get("messages", []): - if msg.get("role") == "tool" or msg.get("role") == "function": + if msg.get("role") in ("tool", "function"): content = msg.get("content", "") if "MEDIA:" in content: for match in re.finditer(r'MEDIA:(\S+)', content): path = match.group(1).strip().rstrip('",}') - if path: + if path and path not in _history_media_paths: media_tags.append(f"MEDIA:{path}") if "[[audio_as_voice]]" in content: has_voice_directive = True if media_tags: - # Deduplicate while preserving order seen = set() unique_tags = [] for tag in media_tags: @@ -1934,10 +2138,21 @@ async def start_gateway(config: Optional[GatewayConfig] = None) -> bool: maxBytes=5 * 1024 * 1024, backupCount=3, ) - file_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(name)s: %(message)s')) + from agent.redact import RedactingFormatter + file_handler.setFormatter(RedactingFormatter('%(asctime)s %(levelname)s %(name)s: %(message)s')) logging.getLogger().addHandler(file_handler) logging.getLogger().setLevel(logging.INFO) + # Separate errors-only log for easy debugging + error_handler = RotatingFileHandler( + log_dir / 'errors.log', + maxBytes=2 * 1024 * 1024, + backupCount=2, + ) + error_handler.setLevel(logging.WARNING) + error_handler.setFormatter(RedactingFormatter('%(asctime)s %(levelname)s %(name)s: %(message)s')) + logging.getLogger().addHandler(error_handler) + runner = GatewayRunner(config) # Set up signal handlers diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 0941c6d91..098b7620c 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -10,7 +10,7 @@ Architecture: - Auth store (auth.json) holds per-provider credential state - resolve_provider() picks the active provider via priority chain - resolve_*_runtime_credentials() handles token refresh and key minting -- login_command() / logout_command() are the CLI entry points +- logout_command() is the CLI entry point for clearing auth """ from __future__ import annotations @@ -18,7 +18,10 @@ from __future__ import annotations import json import logging import os +import shutil import stat +import base64 +import subprocess import time import webbrowser from contextlib import contextmanager @@ -55,6 +58,10 @@ DEFAULT_NOUS_SCOPE = "inference:mint_agent_key" DEFAULT_AGENT_KEY_MIN_TTL_SECONDS = 30 * 60 # 30 minutes ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120 # refresh 2 min before expiry DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS = 1 # poll at most every 1s +DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex" +CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann" +CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token" +CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120 # ============================================================================= @@ -84,7 +91,12 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = { client_id=DEFAULT_NOUS_CLIENT_ID, scope=DEFAULT_NOUS_SCOPE, ), - # Future: "openai_codex", "anthropic", etc. + "openai-codex": ProviderConfig( + id="openai-codex", + name="OpenAI Codex", + auth_type="oauth_external", + inference_base_url=DEFAULT_CODEX_BASE_URL, + ), } @@ -115,7 +127,7 @@ def format_auth_error(error: Exception) -> str: return str(error) if error.relogin_required: - return f"{error} Run `hermes login` to re-authenticate." + return f"{error} Run `hermes model` to re-authenticate." if error.code == "subscription_required": return ( @@ -298,12 +310,15 @@ def resolve_provider( """ normalized = (requested or "auto").strip().lower() + if normalized in {"openrouter", "custom"}: + return "openrouter" if normalized in PROVIDER_REGISTRY: return normalized - if normalized == "openrouter": - return "openrouter" if normalized != "auto": - return "openrouter" + raise AuthError( + f"Unknown provider '{normalized}'.", + code="invalid_provider", + ) # Explicit one-off CLI creds always mean openrouter/custom if explicit_api_key or explicit_base_url: @@ -314,8 +329,8 @@ def resolve_provider( auth_store = _load_auth_store() active = auth_store.get("active_provider") if active and active in PROVIDER_REGISTRY: - state = _load_provider_state(auth_store, active) - if state and (state.get("access_token") or state.get("refresh_token")): + status = get_auth_status(active) + if status.get("logged_in"): return active except Exception as e: logger.debug("Could not detect active auth provider: %s", e) @@ -369,6 +384,27 @@ def _optional_base_url(value: Any) -> Optional[str]: return cleaned if cleaned else None +def _decode_jwt_claims(token: Any) -> Dict[str, Any]: + if not isinstance(token, str) or token.count(".") != 2: + return {} + payload = token.split(".")[1] + payload += "=" * ((4 - len(payload) % 4) % 4) + try: + raw = base64.urlsafe_b64decode(payload.encode("utf-8")) + claims = json.loads(raw.decode("utf-8")) + except Exception: + return {} + return claims if isinstance(claims, dict) else {} + + +def _codex_access_token_is_expiring(access_token: Any, skew_seconds: int) -> bool: + claims = _decode_jwt_claims(access_token) + exp = claims.get("exp") + if not isinstance(exp, (int, float)): + return False + return float(exp) <= (time.time() + max(0, int(skew_seconds))) + + # ============================================================================= # SSH / remote session detection # ============================================================================= @@ -378,6 +414,302 @@ def _is_remote_session() -> bool: return bool(os.getenv("SSH_CLIENT") or os.getenv("SSH_TTY")) +# ============================================================================= +# OpenAI Codex auth file helpers +# ============================================================================= + +def resolve_codex_home_path() -> Path: + """Resolve CODEX_HOME, defaulting to ~/.codex.""" + codex_home = os.getenv("CODEX_HOME", "").strip() + if not codex_home: + codex_home = str(Path.home() / ".codex") + return Path(codex_home).expanduser() + + +def _codex_auth_file_path() -> Path: + return resolve_codex_home_path() / "auth.json" + + +def _codex_auth_lock_path(auth_path: Path) -> Path: + return auth_path.with_suffix(auth_path.suffix + ".lock") + + +@contextmanager +def _codex_auth_file_lock( + auth_path: Path, + timeout_seconds: float = AUTH_LOCK_TIMEOUT_SECONDS, +): + lock_path = _codex_auth_lock_path(auth_path) + lock_path.parent.mkdir(parents=True, exist_ok=True) + + with lock_path.open("a+") as lock_file: + if fcntl is None: + yield + return + + deadline = time.time() + max(1.0, timeout_seconds) + while True: + try: + fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB) + break + except BlockingIOError: + if time.time() >= deadline: + raise TimeoutError(f"Timed out waiting for Codex auth lock: {lock_path}") + time.sleep(0.05) + + try: + yield + finally: + fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN) + + +def read_codex_auth_file() -> Dict[str, Any]: + """Read and validate Codex auth.json shape.""" + codex_home = resolve_codex_home_path() + if not codex_home.exists(): + raise AuthError( + f"Codex home directory not found at {codex_home}.", + provider="openai-codex", + code="codex_home_missing", + relogin_required=True, + ) + + auth_path = codex_home / "auth.json" + if not auth_path.exists(): + raise AuthError( + f"Codex auth file not found at {auth_path}.", + provider="openai-codex", + code="codex_auth_missing", + relogin_required=True, + ) + + try: + payload = json.loads(auth_path.read_text()) + except Exception as exc: + raise AuthError( + f"Failed to parse Codex auth file at {auth_path}.", + provider="openai-codex", + code="codex_auth_invalid_json", + relogin_required=True, + ) from exc + + tokens = payload.get("tokens") + if not isinstance(tokens, dict): + raise AuthError( + "Codex auth file is missing a valid 'tokens' object.", + provider="openai-codex", + code="codex_auth_invalid_shape", + relogin_required=True, + ) + + access_token = tokens.get("access_token") + refresh_token = tokens.get("refresh_token") + if not isinstance(access_token, str) or not access_token.strip(): + raise AuthError( + "Codex auth file is missing tokens.access_token.", + provider="openai-codex", + code="codex_auth_missing_access_token", + relogin_required=True, + ) + if not isinstance(refresh_token, str) or not refresh_token.strip(): + raise AuthError( + "Codex auth file is missing tokens.refresh_token.", + provider="openai-codex", + code="codex_auth_missing_refresh_token", + relogin_required=True, + ) + + return { + "payload": payload, + "tokens": tokens, + "auth_path": auth_path, + "codex_home": codex_home, + } + + +def _persist_codex_auth_payload( + auth_path: Path, + payload: Dict[str, Any], + *, + lock_held: bool = False, +) -> None: + auth_path.parent.mkdir(parents=True, exist_ok=True) + + def _write() -> None: + serialized = json.dumps(payload, indent=2, ensure_ascii=False) + "\n" + tmp_path = auth_path.parent / f".{auth_path.name}.{os.getpid()}.{time.time_ns()}.tmp" + try: + with tmp_path.open("w", encoding="utf-8") as tmp_file: + tmp_file.write(serialized) + tmp_file.flush() + os.fsync(tmp_file.fileno()) + os.replace(tmp_path, auth_path) + finally: + if tmp_path.exists(): + try: + tmp_path.unlink() + except OSError: + pass + + try: + auth_path.chmod(stat.S_IRUSR | stat.S_IWUSR) + except OSError: + pass + + if lock_held: + _write() + return + + with _codex_auth_file_lock(auth_path): + _write() + + +def _refresh_codex_auth_tokens( + *, + payload: Dict[str, Any], + auth_path: Path, + timeout_seconds: float, + lock_held: bool = False, +) -> Dict[str, Any]: + tokens = payload.get("tokens") + if not isinstance(tokens, dict): + raise AuthError( + "Codex auth file is missing a valid 'tokens' object.", + provider="openai-codex", + code="codex_auth_invalid_shape", + relogin_required=True, + ) + + refresh_token = tokens.get("refresh_token") + if not isinstance(refresh_token, str) or not refresh_token.strip(): + raise AuthError( + "Codex auth file is missing tokens.refresh_token.", + provider="openai-codex", + code="codex_auth_missing_refresh_token", + relogin_required=True, + ) + + timeout = httpx.Timeout(max(5.0, float(timeout_seconds))) + with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}) as client: + response = client.post( + CODEX_OAUTH_TOKEN_URL, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + data={ + "grant_type": "refresh_token", + "refresh_token": refresh_token, + "client_id": CODEX_OAUTH_CLIENT_ID, + }, + ) + + if response.status_code != 200: + code = "codex_refresh_failed" + message = f"Codex token refresh failed with status {response.status_code}." + relogin_required = False + try: + err = response.json() + if isinstance(err, dict): + err_code = err.get("error") + if isinstance(err_code, str) and err_code.strip(): + code = err_code.strip() + err_desc = err.get("error_description") or err.get("message") + if isinstance(err_desc, str) and err_desc.strip(): + message = f"Codex token refresh failed: {err_desc.strip()}" + except Exception: + pass + if code in {"invalid_grant", "invalid_token", "invalid_request"}: + relogin_required = True + raise AuthError( + message, + provider="openai-codex", + code=code, + relogin_required=relogin_required, + ) + + try: + refresh_payload = response.json() + except Exception as exc: + raise AuthError( + "Codex token refresh returned invalid JSON.", + provider="openai-codex", + code="codex_refresh_invalid_json", + relogin_required=True, + ) from exc + + access_token = refresh_payload.get("access_token") + if not isinstance(access_token, str) or not access_token.strip(): + raise AuthError( + "Codex token refresh response was missing access_token.", + provider="openai-codex", + code="codex_refresh_missing_access_token", + relogin_required=True, + ) + + updated_tokens = dict(tokens) + updated_tokens["access_token"] = access_token.strip() + next_refresh = refresh_payload.get("refresh_token") + if isinstance(next_refresh, str) and next_refresh.strip(): + updated_tokens["refresh_token"] = next_refresh.strip() + payload["tokens"] = updated_tokens + payload["last_refresh"] = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") + _persist_codex_auth_payload(auth_path, payload, lock_held=lock_held) + return updated_tokens + + +def resolve_codex_runtime_credentials( + *, + force_refresh: bool = False, + refresh_if_expiring: bool = True, + refresh_skew_seconds: int = CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS, +) -> Dict[str, Any]: + """Resolve runtime credentials from Codex CLI auth state.""" + data = read_codex_auth_file() + payload = data["payload"] + tokens = dict(data["tokens"]) + auth_path = data["auth_path"] + access_token = str(tokens.get("access_token", "") or "").strip() + refresh_timeout_seconds = float(os.getenv("HERMES_CODEX_REFRESH_TIMEOUT_SECONDS", "20")) + + should_refresh = bool(force_refresh) + if (not should_refresh) and refresh_if_expiring: + should_refresh = _codex_access_token_is_expiring(access_token, refresh_skew_seconds) + if should_refresh: + lock_timeout = max(float(AUTH_LOCK_TIMEOUT_SECONDS), refresh_timeout_seconds + 5.0) + with _codex_auth_file_lock(auth_path, timeout_seconds=lock_timeout): + data = read_codex_auth_file() + payload = data["payload"] + tokens = dict(data["tokens"]) + access_token = str(tokens.get("access_token", "") or "").strip() + + should_refresh = bool(force_refresh) + if (not should_refresh) and refresh_if_expiring: + should_refresh = _codex_access_token_is_expiring(access_token, refresh_skew_seconds) + + if should_refresh: + tokens = _refresh_codex_auth_tokens( + payload=payload, + auth_path=auth_path, + timeout_seconds=refresh_timeout_seconds, + lock_held=True, + ) + access_token = str(tokens.get("access_token", "") or "").strip() + + base_url = ( + os.getenv("HERMES_CODEX_BASE_URL", "").strip().rstrip("/") + or DEFAULT_CODEX_BASE_URL + ) + + return { + "provider": "openai-codex", + "base_url": base_url, + "api_key": access_token, + "source": "codex-auth-json", + "last_refresh": payload.get("last_refresh"), + "auth_mode": payload.get("auth_mode"), + "auth_file": str(auth_path), + "codex_home": str(data["codex_home"]), + } + + # ============================================================================= # TLS verification helper # ============================================================================= @@ -806,14 +1138,73 @@ def get_nous_auth_status() -> Dict[str, Any]: } +def get_codex_auth_status() -> Dict[str, Any]: + """Status snapshot for Codex auth.""" + state = get_provider_auth_state("openai-codex") or {} + auth_file = state.get("auth_file") or str(_codex_auth_file_path()) + codex_home = state.get("codex_home") or str(resolve_codex_home_path()) + try: + creds = resolve_codex_runtime_credentials() + return { + "logged_in": True, + "auth_file": creds.get("auth_file"), + "codex_home": creds.get("codex_home"), + "last_refresh": creds.get("last_refresh"), + "auth_mode": creds.get("auth_mode"), + "source": creds.get("source"), + } + except AuthError as exc: + return { + "logged_in": False, + "auth_file": auth_file, + "codex_home": codex_home, + "error": str(exc), + } + + def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]: """Generic auth status dispatcher.""" target = provider_id or get_active_provider() if target == "nous": return get_nous_auth_status() + if target == "openai-codex": + return get_codex_auth_status() return {"logged_in": False} +# ============================================================================= +# External credential detection +# ============================================================================= + +def detect_external_credentials() -> List[Dict[str, Any]]: + """Scan for credentials from other CLI tools that Hermes can reuse. + + Returns a list of dicts, each with: + - provider: str -- Hermes provider id (e.g. "openai-codex") + - path: str -- filesystem path where creds were found + - label: str -- human-friendly description for the setup UI + """ + found: List[Dict[str, Any]] = [] + + # Codex CLI: ~/.codex/auth.json (or $CODEX_HOME/auth.json) + try: + codex_home = resolve_codex_home_path() + codex_auth = codex_home / "auth.json" + if codex_auth.is_file(): + data = json.loads(codex_auth.read_text()) + tokens = data.get("tokens", {}) + if isinstance(tokens, dict) and tokens.get("access_token"): + found.append({ + "provider": "openai-codex", + "path": str(codex_auth), + "label": f"Codex CLI credentials found ({codex_auth})", + }) + except Exception: + pass + + return found + + # ============================================================================= # CLI Commands — login / logout # ============================================================================= @@ -970,21 +1361,218 @@ def _save_model_choice(model_id: str) -> None: def login_command(args) -> None: - """Run OAuth device code login for the selected provider.""" - provider_id = getattr(args, "provider", None) or "nous" + """Deprecated: use 'hermes model' or 'hermes setup' instead.""" + print("The 'hermes login' command has been removed.") + print("Use 'hermes model' to select a provider and model,") + print("or 'hermes setup' for full interactive setup.") + raise SystemExit(0) - if provider_id not in PROVIDER_REGISTRY: - print(f"Unknown provider: {provider_id}") - print(f"Available: {', '.join(PROVIDER_REGISTRY.keys())}") - raise SystemExit(1) - pconfig = PROVIDER_REGISTRY[provider_id] +def _login_openai_codex(args, pconfig: ProviderConfig) -> None: + """OpenAI Codex login via device code flow (no Codex CLI required).""" + codex_home = resolve_codex_home_path() - if provider_id == "nous": - _login_nous(args, pconfig) - else: - print(f"Login for provider '{provider_id}' is not yet implemented.") - raise SystemExit(1) + # Check for existing valid credentials first + try: + existing = resolve_codex_runtime_credentials() + print(f"Existing Codex credentials found at {codex_home / 'auth.json'}") + try: + reuse = input("Use existing credentials? [Y/n]: ").strip().lower() + except (EOFError, KeyboardInterrupt): + reuse = "y" + if reuse in ("", "y", "yes"): + creds = existing + _save_codex_provider_state(creds) + return + except AuthError: + pass + + # No existing creds (or user declined) -- run device code flow + print() + print("Signing in to OpenAI Codex...") + print() + + creds = _codex_device_code_login() + _save_codex_provider_state(creds) + + +def _save_codex_provider_state(creds: Dict[str, Any]) -> None: + """Persist Codex provider state to auth store and config.""" + auth_state = { + "auth_file": creds.get("auth_file"), + "codex_home": creds.get("codex_home"), + "last_refresh": creds.get("last_refresh"), + "auth_mode": creds.get("auth_mode"), + "source": creds.get("source"), + } + + with _auth_store_lock(): + auth_store = _load_auth_store() + _save_provider_state(auth_store, "openai-codex", auth_state) + saved_to = _save_auth_store(auth_store) + + config_path = _update_config_for_provider("openai-codex", creds.get("base_url", DEFAULT_CODEX_BASE_URL)) + print() + print("Login successful!") + print(f" Auth state: {saved_to}") + print(f" Config updated: {config_path} (model.provider=openai-codex)") + + +def _codex_device_code_login() -> Dict[str, Any]: + """Run the OpenAI device code login flow and return credentials dict.""" + import time as _time + + issuer = "https://auth.openai.com" + client_id = CODEX_OAUTH_CLIENT_ID + + # Step 1: Request device code + try: + with httpx.Client(timeout=httpx.Timeout(15.0)) as client: + resp = client.post( + f"{issuer}/api/accounts/deviceauth/usercode", + json={"client_id": client_id}, + headers={"Content-Type": "application/json"}, + ) + except Exception as exc: + raise AuthError( + f"Failed to request device code: {exc}", + provider="openai-codex", code="device_code_request_failed", + ) + + if resp.status_code != 200: + raise AuthError( + f"Device code request returned status {resp.status_code}.", + provider="openai-codex", code="device_code_request_error", + ) + + device_data = resp.json() + user_code = device_data.get("user_code", "") + device_auth_id = device_data.get("device_auth_id", "") + poll_interval = max(3, int(device_data.get("interval", "5"))) + + if not user_code or not device_auth_id: + raise AuthError( + "Device code response missing required fields.", + provider="openai-codex", code="device_code_incomplete", + ) + + # Step 2: Show user the code + print("To continue, follow these steps:\n") + print(f" 1. Open this URL in your browser:") + print(f" \033[94m{issuer}/codex/device\033[0m\n") + print(f" 2. Enter this code:") + print(f" \033[94m{user_code}\033[0m\n") + print("Waiting for sign-in... (press Ctrl+C to cancel)") + + # Step 3: Poll for authorization code + max_wait = 15 * 60 # 15 minutes + start = _time.monotonic() + code_resp = None + + try: + with httpx.Client(timeout=httpx.Timeout(15.0)) as client: + while _time.monotonic() - start < max_wait: + _time.sleep(poll_interval) + poll_resp = client.post( + f"{issuer}/api/accounts/deviceauth/token", + json={"device_auth_id": device_auth_id, "user_code": user_code}, + headers={"Content-Type": "application/json"}, + ) + + if poll_resp.status_code == 200: + code_resp = poll_resp.json() + break + elif poll_resp.status_code in (403, 404): + continue # User hasn't completed login yet + else: + raise AuthError( + f"Device auth polling returned status {poll_resp.status_code}.", + provider="openai-codex", code="device_code_poll_error", + ) + except KeyboardInterrupt: + print("\nLogin cancelled.") + raise SystemExit(130) + + if code_resp is None: + raise AuthError( + "Login timed out after 15 minutes.", + provider="openai-codex", code="device_code_timeout", + ) + + # Step 4: Exchange authorization code for tokens + authorization_code = code_resp.get("authorization_code", "") + code_verifier = code_resp.get("code_verifier", "") + redirect_uri = f"{issuer}/deviceauth/callback" + + if not authorization_code or not code_verifier: + raise AuthError( + "Device auth response missing authorization_code or code_verifier.", + provider="openai-codex", code="device_code_incomplete_exchange", + ) + + try: + with httpx.Client(timeout=httpx.Timeout(15.0)) as client: + token_resp = client.post( + CODEX_OAUTH_TOKEN_URL, + data={ + "grant_type": "authorization_code", + "code": authorization_code, + "redirect_uri": redirect_uri, + "client_id": client_id, + "code_verifier": code_verifier, + }, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + ) + except Exception as exc: + raise AuthError( + f"Token exchange failed: {exc}", + provider="openai-codex", code="token_exchange_failed", + ) + + if token_resp.status_code != 200: + raise AuthError( + f"Token exchange returned status {token_resp.status_code}.", + provider="openai-codex", code="token_exchange_error", + ) + + tokens = token_resp.json() + access_token = tokens.get("access_token", "") + refresh_token = tokens.get("refresh_token", "") + + if not access_token: + raise AuthError( + "Token exchange did not return an access_token.", + provider="openai-codex", code="token_exchange_no_access_token", + ) + + # Step 5: Persist tokens to ~/.codex/auth.json + codex_home = resolve_codex_home_path() + codex_home.mkdir(parents=True, exist_ok=True) + auth_path = codex_home / "auth.json" + + payload = { + "tokens": { + "access_token": access_token, + "refresh_token": refresh_token, + }, + "last_refresh": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"), + } + _persist_codex_auth_payload(auth_path, payload, lock_held=False) + + base_url = ( + os.getenv("HERMES_CODEX_BASE_URL", "").strip().rstrip("/") + or DEFAULT_CODEX_BASE_URL + ) + + return { + "api_key": access_token, + "base_url": base_url, + "auth_file": str(auth_path), + "codex_home": str(codex_home), + "last_refresh": payload["last_refresh"], + "auth_mode": "chatgpt", + "source": "device-code", + } def _login_nous(args, pconfig: ProviderConfig) -> None: @@ -1168,6 +1756,6 @@ def logout_command(args) -> None: if os.getenv("OPENROUTER_API_KEY"): print("Hermes will use OpenRouter for inference.") else: - print("Run `hermes login` or configure an API key to use Hermes.") + print("Run `hermes model` or configure an API key to use Hermes.") else: print(f"No auth state found for {provider_name}.") diff --git a/hermes_cli/codex_models.py b/hermes_cli/codex_models.py new file mode 100644 index 000000000..75559396f --- /dev/null +++ b/hermes_cli/codex_models.py @@ -0,0 +1,144 @@ +"""Codex model discovery from API, local cache, and config.""" + +from __future__ import annotations + +import json +import logging +from pathlib import Path +from typing import List, Optional + +from hermes_cli.auth import resolve_codex_home_path + +logger = logging.getLogger(__name__) + +DEFAULT_CODEX_MODELS: List[str] = [ + "gpt-5.3-codex", + "gpt-5.2-codex", + "gpt-5.1-codex-max", + "gpt-5.1-codex-mini", +] + + +def _fetch_models_from_api(access_token: str) -> List[str]: + """Fetch available models from the Codex API. Returns visible models sorted by priority.""" + try: + import httpx + resp = httpx.get( + "https://chatgpt.com/backend-api/codex/models?client_version=1.0.0", + headers={"Authorization": f"Bearer {access_token}"}, + timeout=10, + ) + if resp.status_code != 200: + return [] + data = resp.json() + entries = data.get("models", []) if isinstance(data, dict) else [] + except Exception as exc: + logger.debug("Failed to fetch Codex models from API: %s", exc) + return [] + + sortable = [] + for item in entries: + if not isinstance(item, dict): + continue + slug = item.get("slug") + if not isinstance(slug, str) or not slug.strip(): + continue + slug = slug.strip() + if item.get("supported_in_api") is False: + continue + visibility = item.get("visibility", "") + if isinstance(visibility, str) and visibility.strip().lower() == "hide": + continue + priority = item.get("priority") + rank = int(priority) if isinstance(priority, (int, float)) else 10_000 + sortable.append((rank, slug)) + + sortable.sort(key=lambda x: (x[0], x[1])) + return [slug for _, slug in sortable] + + +def _read_default_model(codex_home: Path) -> Optional[str]: + config_path = codex_home / "config.toml" + if not config_path.exists(): + return None + try: + import tomllib + except Exception: + return None + try: + payload = tomllib.loads(config_path.read_text(encoding="utf-8")) + except Exception: + return None + model = payload.get("model") if isinstance(payload, dict) else None + if isinstance(model, str) and model.strip(): + return model.strip() + return None + + +def _read_cache_models(codex_home: Path) -> List[str]: + cache_path = codex_home / "models_cache.json" + if not cache_path.exists(): + return [] + try: + raw = json.loads(cache_path.read_text(encoding="utf-8")) + except Exception: + return [] + + entries = raw.get("models") if isinstance(raw, dict) else None + sortable = [] + if isinstance(entries, list): + for item in entries: + if not isinstance(item, dict): + continue + slug = item.get("slug") + if not isinstance(slug, str) or not slug.strip(): + continue + slug = slug.strip() + if "codex" not in slug.lower(): + continue + if item.get("supported_in_api") is False: + continue + visibility = item.get("visibility") + if isinstance(visibility, str) and visibility.strip().lower() == "hidden": + continue + priority = item.get("priority") + rank = int(priority) if isinstance(priority, (int, float)) else 10_000 + sortable.append((rank, slug)) + + sortable.sort(key=lambda item: (item[0], item[1])) + deduped: List[str] = [] + for _, slug in sortable: + if slug not in deduped: + deduped.append(slug) + return deduped + + +def get_codex_model_ids(access_token: Optional[str] = None) -> List[str]: + """Return available Codex model IDs, trying API first, then local sources. + + Resolution order: API (live, if token provided) > config.toml default > + local cache > hardcoded defaults. + """ + codex_home = resolve_codex_home_path() + ordered: List[str] = [] + + # Try live API if we have a token + if access_token: + api_models = _fetch_models_from_api(access_token) + if api_models: + return api_models + + # Fall back to local sources + default_model = _read_default_model(codex_home) + if default_model: + ordered.append(default_model) + + for model_id in _read_cache_models(codex_home): + if model_id not in ordered: + ordered.append(model_id) + + for model_id in DEFAULT_CODEX_MODELS: + if model_id not in ordered: + ordered.append(model_id) + + return ordered diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index b7e5a6213..b091a7905 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -26,6 +26,8 @@ COMMANDS = { "/skills": "Search, install, inspect, or manage skills from online registries", "/platforms": "Show gateway/messaging platform status", "/verbose": "Cycle tool progress display: off → new → all → verbose", + "/compress": "Manually compress conversation context (flush memories + summarize)", + "/usage": "Show token usage for the current session", "/quit": "Exit the CLI (also: /exit, /q)", } diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index 742675d03..031c6eaf8 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -175,6 +175,36 @@ def run_doctor(args): else: check_warn("config.yaml not found", "(using defaults)") + # ========================================================================= + # Check: Auth providers + # ========================================================================= + print() + print(color("◆ Auth Providers", Colors.CYAN, Colors.BOLD)) + + try: + from hermes_cli.auth import get_nous_auth_status, get_codex_auth_status + + nous_status = get_nous_auth_status() + if nous_status.get("logged_in"): + check_ok("Nous Portal auth", "(logged in)") + else: + check_warn("Nous Portal auth", "(not logged in)") + + codex_status = get_codex_auth_status() + if codex_status.get("logged_in"): + check_ok("OpenAI Codex auth", "(logged in)") + else: + check_warn("OpenAI Codex auth", "(not logged in)") + if codex_status.get("error"): + check_info(codex_status["error"]) + except Exception as e: + check_warn("Auth provider status", f"(could not check: {e})") + + if shutil.which("codex"): + check_ok("codex CLI") + else: + check_warn("codex CLI not found", "(required for openai-codex login)") + # ========================================================================= # Check: Directory structure # ========================================================================= diff --git a/hermes_cli/main.py b/hermes_cli/main.py index b232d5b55..2bc391aad 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -12,7 +12,6 @@ Usage: hermes gateway install # Install gateway service hermes gateway uninstall # Uninstall gateway service hermes setup # Interactive setup wizard - hermes login # Authenticate with Nous Portal (or other providers) hermes logout # Clear stored authentication hermes status # Show status of all components hermes cron # Manage cron jobs @@ -60,6 +59,7 @@ logger = logging.getLogger(__name__) def _has_any_provider_configured() -> bool: """Check if at least one inference provider is usable.""" from hermes_cli.config import get_env_path, get_hermes_home + from hermes_cli.auth import get_auth_status # Check env vars (may be set by .env or shell). # OPENAI_BASE_URL alone counts — local models (vLLM, llama.cpp, etc.) @@ -91,8 +91,8 @@ def _has_any_provider_configured() -> bool: auth = json.loads(auth_file.read_text()) active = auth.get("active_provider") if active: - state = auth.get("providers", {}).get(active, {}) - if state.get("access_token") or state.get("refresh_token"): + status = get_auth_status(active) + if status.get("logged_in"): return True except Exception: pass @@ -289,7 +289,7 @@ def cmd_model(args): resolve_provider, get_provider_auth_state, PROVIDER_REGISTRY, _prompt_model_selection, _save_model_choice, _update_config_for_provider, resolve_nous_runtime_credentials, fetch_nous_models, AuthError, format_auth_error, - _login_nous, ProviderConfig, + _login_nous, ) from hermes_cli.config import load_config, save_config, get_env_value, save_env_value @@ -312,7 +312,12 @@ def cmd_model(args): or config_provider or "auto" ) - active = resolve_provider(effective_provider) + try: + active = resolve_provider(effective_provider) + except AuthError as exc: + warning = format_auth_error(exc) + print(f"Warning: {warning} Falling back to auto provider detection.") + active = resolve_provider("auto") # Detect custom endpoint if active == "openrouter" and get_env_value("OPENAI_BASE_URL"): @@ -321,6 +326,7 @@ def cmd_model(args): provider_labels = { "openrouter": "OpenRouter", "nous": "Nous Portal", + "openai-codex": "OpenAI Codex", "custom": "Custom endpoint", } active_label = provider_labels.get(active, active) @@ -334,11 +340,12 @@ def cmd_model(args): providers = [ ("openrouter", "OpenRouter (100+ models, pay-per-use)"), ("nous", "Nous Portal (Nous Research subscription)"), + ("openai-codex", "OpenAI Codex"), ("custom", "Custom endpoint (self-hosted / VLLM / etc.)"), ] # Reorder so the active provider is at the top - active_key = active if active in ("openrouter", "nous") else "custom" + active_key = active if active in ("openrouter", "nous", "openai-codex") else "custom" ordered = [] for key, label in providers: if key == active_key: @@ -359,6 +366,8 @@ def cmd_model(args): _model_flow_openrouter(config, current_model) elif selected_provider == "nous": _model_flow_nous(config, current_model) + elif selected_provider == "openai-codex": + _model_flow_openai_codex(config, current_model) elif selected_provider == "custom": _model_flow_custom(config) @@ -512,6 +521,53 @@ def _model_flow_nous(config, current_model=""): print("No change.") +def _model_flow_openai_codex(config, current_model=""): + """OpenAI Codex provider: ensure logged in, then pick model.""" + from hermes_cli.auth import ( + get_codex_auth_status, _prompt_model_selection, _save_model_choice, + _update_config_for_provider, _login_openai_codex, + PROVIDER_REGISTRY, DEFAULT_CODEX_BASE_URL, + ) + from hermes_cli.codex_models import get_codex_model_ids + from hermes_cli.config import get_env_value, save_env_value + import argparse + + status = get_codex_auth_status() + if not status.get("logged_in"): + print("Not logged into OpenAI Codex. Starting login...") + print() + try: + mock_args = argparse.Namespace() + _login_openai_codex(mock_args, PROVIDER_REGISTRY["openai-codex"]) + except SystemExit: + print("Login cancelled or failed.") + return + except Exception as exc: + print(f"Login failed: {exc}") + return + + _codex_token = None + try: + from hermes_cli.auth import resolve_codex_runtime_credentials + _codex_creds = resolve_codex_runtime_credentials() + _codex_token = _codex_creds.get("api_key") + except Exception: + pass + codex_models = get_codex_model_ids(access_token=_codex_token) + + selected = _prompt_model_selection(codex_models, current_model=current_model) + if selected: + _save_model_choice(selected) + _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL) + # Clear custom endpoint env vars that would otherwise override Codex. + if get_env_value("OPENAI_BASE_URL"): + save_env_value("OPENAI_BASE_URL", "") + save_env_value("OPENAI_API_KEY", "") + print(f"Default model set to: {selected} (via OpenAI Codex)") + else: + print("No change.") + + def _model_flow_custom(config): """Custom endpoint: collect URL, API key, and model name.""" from hermes_cli.auth import _save_model_choice, deactivate_provider @@ -777,8 +833,8 @@ def cmd_update(args): pass # No systemd (macOS, WSL1, etc.) — skip silently print() - print("Tip: You can now log in with Nous Portal for inference:") - print(" hermes login # Authenticate with Nous Portal") + print("Tip: You can now select a provider and model:") + print(" hermes model # Select provider and model") except subprocess.CalledProcessError as e: print(f"✗ Update failed: {e}") @@ -798,7 +854,6 @@ Examples: hermes --continue Resume the most recent session hermes --resume Resume a specific session hermes setup Run setup wizard - hermes login Authenticate with an inference provider hermes logout Clear stored authentication hermes model Select default model hermes config View configuration @@ -857,7 +912,7 @@ For more help on a command: ) chat_parser.add_argument( "--provider", - choices=["auto", "openrouter", "nous"], + choices=["auto", "openrouter", "nous", "openai-codex"], default=None, help="Inference provider (default: auto)" ) @@ -966,9 +1021,9 @@ For more help on a command: ) login_parser.add_argument( "--provider", - choices=["nous"], + choices=["nous", "openai-codex"], default=None, - help="Provider to authenticate with (default: interactive selection)" + help="Provider to authenticate with (default: nous)" ) login_parser.add_argument( "--portal-url", @@ -1020,7 +1075,7 @@ For more help on a command: ) logout_parser.add_argument( "--provider", - choices=["nous"], + choices=["nous", "openai-codex"], default=None, help="Provider to log out from (default: active provider)" ) diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py new file mode 100644 index 000000000..1f070ac22 --- /dev/null +++ b/hermes_cli/runtime_provider.py @@ -0,0 +1,149 @@ +"""Shared runtime provider resolution for CLI, gateway, cron, and helpers.""" + +from __future__ import annotations + +import os +from typing import Any, Dict, Optional + +from hermes_cli.auth import ( + AuthError, + format_auth_error, + resolve_provider, + resolve_nous_runtime_credentials, + resolve_codex_runtime_credentials, +) +from hermes_cli.config import load_config +from hermes_constants import OPENROUTER_BASE_URL + + +def _get_model_config() -> Dict[str, Any]: + config = load_config() + model_cfg = config.get("model") + if isinstance(model_cfg, dict): + return dict(model_cfg) + if isinstance(model_cfg, str) and model_cfg.strip(): + return {"default": model_cfg.strip()} + return {} + + +def resolve_requested_provider(requested: Optional[str] = None) -> str: + """Resolve provider request from explicit arg, env, then config.""" + if requested and requested.strip(): + return requested.strip().lower() + + env_provider = os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower() + if env_provider: + return env_provider + + model_cfg = _get_model_config() + cfg_provider = model_cfg.get("provider") + if isinstance(cfg_provider, str) and cfg_provider.strip(): + return cfg_provider.strip().lower() + + return "auto" + + +def _resolve_openrouter_runtime( + *, + requested_provider: str, + explicit_api_key: Optional[str] = None, + explicit_base_url: Optional[str] = None, +) -> Dict[str, Any]: + model_cfg = _get_model_config() + cfg_base_url = model_cfg.get("base_url") if isinstance(model_cfg.get("base_url"), str) else "" + cfg_provider = model_cfg.get("provider") if isinstance(model_cfg.get("provider"), str) else "" + requested_norm = (requested_provider or "").strip().lower() + cfg_provider = cfg_provider.strip().lower() + + env_openai_base_url = os.getenv("OPENAI_BASE_URL", "").strip() + env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip() + + use_config_base_url = False + if requested_norm == "auto": + if cfg_base_url.strip() and not explicit_base_url and not env_openai_base_url: + if not cfg_provider or cfg_provider == "auto": + use_config_base_url = True + + base_url = ( + (explicit_base_url or "").strip() + or env_openai_base_url + or (cfg_base_url.strip() if use_config_base_url else "") + or env_openrouter_base_url + or OPENROUTER_BASE_URL + ).rstrip("/") + + api_key = ( + explicit_api_key + or os.getenv("OPENAI_API_KEY") + or os.getenv("OPENROUTER_API_KEY") + or "" + ) + + source = "explicit" if (explicit_api_key or explicit_base_url) else "env/config" + + return { + "provider": "openrouter", + "api_mode": "chat_completions", + "base_url": base_url, + "api_key": api_key, + "source": source, + } + + +def resolve_runtime_provider( + *, + requested: Optional[str] = None, + explicit_api_key: Optional[str] = None, + explicit_base_url: Optional[str] = None, +) -> Dict[str, Any]: + """Resolve runtime provider credentials for agent execution.""" + requested_provider = resolve_requested_provider(requested) + + provider = resolve_provider( + requested_provider, + explicit_api_key=explicit_api_key, + explicit_base_url=explicit_base_url, + ) + + if provider == "nous": + creds = resolve_nous_runtime_credentials( + min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))), + timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")), + ) + return { + "provider": "nous", + "api_mode": "chat_completions", + "base_url": creds.get("base_url", "").rstrip("/"), + "api_key": creds.get("api_key", ""), + "source": creds.get("source", "portal"), + "expires_at": creds.get("expires_at"), + "requested_provider": requested_provider, + } + + if provider == "openai-codex": + creds = resolve_codex_runtime_credentials() + return { + "provider": "openai-codex", + "api_mode": "codex_responses", + "base_url": creds.get("base_url", "").rstrip("/"), + "api_key": creds.get("api_key", ""), + "source": creds.get("source", "codex-auth-json"), + "auth_file": creds.get("auth_file"), + "codex_home": creds.get("codex_home"), + "last_refresh": creds.get("last_refresh"), + "requested_provider": requested_provider, + } + + runtime = _resolve_openrouter_runtime( + requested_provider=requested_provider, + explicit_api_key=explicit_api_key, + explicit_base_url=explicit_base_url, + ) + runtime["requested_provider"] = requested_provider + return runtime + + +def format_runtime_provider_error(error: Exception) -> str: + if isinstance(error, AuthError): + return format_auth_error(error) + return str(error) diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 6ed9fb64a..fa4dcebb4 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -620,11 +620,24 @@ def run_setup_wizard(args): get_active_provider, get_provider_auth_state, PROVIDER_REGISTRY, format_auth_error, AuthError, fetch_nous_models, resolve_nous_runtime_credentials, _update_config_for_provider, + _login_openai_codex, get_codex_auth_status, DEFAULT_CODEX_BASE_URL, + detect_external_credentials, ) existing_custom = get_env_value("OPENAI_BASE_URL") existing_or = get_env_value("OPENROUTER_API_KEY") active_oauth = get_active_provider() + # Detect credentials from other CLI tools + detected_creds = detect_external_credentials() + if detected_creds: + print_info("Detected existing credentials:") + for cred in detected_creds: + if cred["provider"] == "openai-codex": + print_success(f" * {cred['label']} -- select \"OpenAI Codex\" to use it") + else: + print_info(f" * {cred['label']}") + print() + # Detect if any provider is already configured has_any_provider = bool(active_oauth or existing_custom or existing_or) @@ -640,6 +653,7 @@ def run_setup_wizard(args): provider_choices = [ "Login with Nous Portal (Nous Research subscription)", + "Login with OpenAI Codex", "OpenRouter API key (100+ models, pay-per-use)", "Custom OpenAI-compatible endpoint (self-hosted / VLLM / etc.)", ] @@ -647,7 +661,7 @@ def run_setup_wizard(args): provider_choices.append(keep_label) # Default to "Keep current" if a provider exists, otherwise OpenRouter (most common) - default_provider = len(provider_choices) - 1 if has_any_provider else 1 + default_provider = len(provider_choices) - 1 if has_any_provider else 2 if not has_any_provider: print_warning("An inference provider is required for Hermes to work.") @@ -656,7 +670,7 @@ def run_setup_wizard(args): provider_idx = prompt_choice("Select your inference provider:", provider_choices, default_provider) # Track which provider was selected for model step - selected_provider = None # "nous", "openrouter", "custom", or None (keep) + selected_provider = None # "nous", "openai-codex", "openrouter", "custom", or None (keep) nous_models = [] # populated if Nous login succeeds if provider_idx == 0: # Nous Portal @@ -692,14 +706,38 @@ def run_setup_wizard(args): except SystemExit: print_warning("Nous Portal login was cancelled or failed.") - print_info("You can try again later with: hermes login") + print_info("You can try again later with: hermes model") selected_provider = None except Exception as e: print_error(f"Login failed: {e}") - print_info("You can try again later with: hermes login") + print_info("You can try again later with: hermes model") selected_provider = None - elif provider_idx == 1: # OpenRouter + elif provider_idx == 1: # OpenAI Codex + selected_provider = "openai-codex" + print() + print_header("OpenAI Codex Login") + print() + + try: + import argparse + mock_args = argparse.Namespace() + _login_openai_codex(mock_args, PROVIDER_REGISTRY["openai-codex"]) + # Clear custom endpoint vars that would override provider routing. + if existing_custom: + save_env_value("OPENAI_BASE_URL", "") + save_env_value("OPENAI_API_KEY", "") + _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL) + except SystemExit: + print_warning("OpenAI Codex login was cancelled or failed.") + print_info("You can try again later with: hermes model") + selected_provider = None + except Exception as e: + print_error(f"Login failed: {e}") + print_info("You can try again later with: hermes model") + selected_provider = None + + elif provider_idx == 2: # OpenRouter selected_provider = "openrouter" print() print_header("OpenRouter API Key") @@ -726,7 +764,7 @@ def run_setup_wizard(args): save_env_value("OPENAI_BASE_URL", "") save_env_value("OPENAI_API_KEY", "") - elif provider_idx == 2: # Custom endpoint + elif provider_idx == 3: # Custom endpoint selected_provider = "custom" print() print_header("Custom OpenAI-Compatible Endpoint") @@ -753,14 +791,14 @@ def run_setup_wizard(args): config['model'] = model_name save_env_value("LLM_MODEL", model_name) print_success("Custom endpoint configured") - # else: provider_idx == 3 (Keep current) — only shown when a provider already exists + # else: provider_idx == 4 (Keep current) — only shown when a provider already exists # ========================================================================= # Step 1b: OpenRouter API Key for tools (if not already set) # ========================================================================= # Tools (vision, web, MoA) use OpenRouter independently of the main provider. # Prompt for OpenRouter key if not set and a non-OpenRouter provider was chosen. - if selected_provider in ("nous", "custom") and not get_env_value("OPENROUTER_API_KEY"): + if selected_provider in ("nous", "openai-codex", "custom") and not get_env_value("OPENROUTER_API_KEY"): print() print_header("OpenRouter API Key (for tools)") print_info("Tools like vision analysis, web search, and MoA use OpenRouter") @@ -806,6 +844,33 @@ def run_setup_wizard(args): config['model'] = custom save_env_value("LLM_MODEL", custom) # else: keep current + elif selected_provider == "openai-codex": + from hermes_cli.codex_models import get_codex_model_ids + # Try to get the access token for live model discovery + _codex_token = None + try: + from hermes_cli.auth import resolve_codex_runtime_credentials + _codex_creds = resolve_codex_runtime_credentials() + _codex_token = _codex_creds.get("api_key") + except Exception: + pass + codex_models = get_codex_model_ids(access_token=_codex_token) + model_choices = [f"{m}" for m in codex_models] + model_choices.append("Custom model") + model_choices.append(f"Keep current ({current_model})") + + keep_idx = len(model_choices) - 1 + model_idx = prompt_choice("Select default model:", model_choices, keep_idx) + + if model_idx < len(codex_models): + config['model'] = codex_models[model_idx] + save_env_value("LLM_MODEL", codex_models[model_idx]) + elif model_idx == len(codex_models): + custom = prompt("Enter model name") + if custom: + config['model'] = custom + save_env_value("LLM_MODEL", custom) + _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL) else: # Static list for OpenRouter / fallback (from canonical list) from hermes_cli.models import model_ids, menu_labels diff --git a/hermes_cli/status.py b/hermes_cli/status.py index ec50c6d62..f1d3a7edf 100644 --- a/hermes_cli/status.py +++ b/hermes_cli/status.py @@ -101,15 +101,17 @@ def show_status(args): print(color("◆ Auth Providers", Colors.CYAN, Colors.BOLD)) try: - from hermes_cli.auth import get_nous_auth_status + from hermes_cli.auth import get_nous_auth_status, get_codex_auth_status nous_status = get_nous_auth_status() + codex_status = get_codex_auth_status() except Exception: nous_status = {} + codex_status = {} nous_logged_in = bool(nous_status.get("logged_in")) print( f" {'Nous Portal':<12} {check_mark(nous_logged_in)} " - f"{'logged in' if nous_logged_in else 'not logged in (run: hermes login)'}" + f"{'logged in' if nous_logged_in else 'not logged in (run: hermes model)'}" ) if nous_logged_in: portal_url = nous_status.get("portal_base_url") or "(unknown)" @@ -121,6 +123,20 @@ def show_status(args): print(f" Key exp: {key_exp}") print(f" Refresh: {refresh_label}") + codex_logged_in = bool(codex_status.get("logged_in")) + print( + f" {'OpenAI Codex':<12} {check_mark(codex_logged_in)} " + f"{'logged in' if codex_logged_in else 'not logged in (run: hermes model)'}" + ) + codex_auth_file = codex_status.get("auth_file") + if codex_auth_file: + print(f" Auth file: {codex_auth_file}") + codex_last_refresh = _format_iso_timestamp(codex_status.get("last_refresh")) + if codex_status.get("last_refresh"): + print(f" Refreshed: {codex_last_refresh}") + if codex_status.get("error") and not codex_logged_in: + print(f" Error: {codex_status.get('error')}") + # ========================================================================= # Terminal Configuration # ========================================================================= diff --git a/run_agent.py b/run_agent.py index 3a939d161..669f1899c 100644 --- a/run_agent.py +++ b/run_agent.py @@ -21,6 +21,7 @@ Usage: """ import copy +import hashlib import json import logging logger = logging.getLogger(__name__) @@ -30,6 +31,7 @@ import re import sys import time import threading +from types import SimpleNamespace import uuid from typing import List, Dict, Any, Optional from openai import OpenAI @@ -87,6 +89,7 @@ from agent.prompt_builder import build_skills_system_prompt, build_context_files from agent.display import ( KawaiiSpinner, build_tool_preview as _build_tool_preview, get_cute_tool_message as _get_cute_tool_message_impl, + _detect_tool_failure, ) from agent.trajectory import ( convert_scratchpad_to_think, has_incomplete_scratchpad, @@ -106,6 +109,8 @@ class AIAgent: self, base_url: str = None, api_key: str = None, + provider: str = None, + api_mode: str = None, model: str = "anthropic/claude-opus-4.6", # OpenRouter format max_iterations: int = 60, # Default tool-calling iterations tool_delay: float = 1.0, @@ -124,6 +129,7 @@ class AIAgent: session_id: str = None, tool_progress_callback: callable = None, clarify_callback: callable = None, + step_callback: callable = None, max_tokens: int = None, reasoning_config: Dict[str, Any] = None, prefill_messages: List[Dict[str, Any]] = None, @@ -139,6 +145,8 @@ class AIAgent: Args: base_url (str): Base URL for the model API (optional) api_key (str): API key for authentication (optional, uses env var if not provided) + provider (str): Provider identifier (optional; used for telemetry/routing hints) + api_mode (str): API mode override: "chat_completions" or "codex_responses" model (str): Model name to use (default: "anthropic/claude-opus-4.6") max_iterations (int): Maximum number of tool calling iterations (default: 60) tool_delay (float): Delay between tool calls in seconds (default: 1.0) @@ -186,6 +194,17 @@ class AIAgent: # Store effective base URL for feature detection (prompt caching, reasoning, etc.) # When no base_url is provided, the client defaults to OpenRouter, so reflect that here. self.base_url = base_url or OPENROUTER_BASE_URL + provider_name = provider.strip().lower() if isinstance(provider, str) and provider.strip() else None + self.provider = provider_name or "openrouter" + if api_mode in {"chat_completions", "codex_responses"}: + self.api_mode = api_mode + elif self.provider == "openai-codex": + self.api_mode = "codex_responses" + elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self.base_url.lower(): + self.api_mode = "codex_responses" + self.provider = "openai-codex" + else: + self.api_mode = "chat_completions" if base_url and "api.anthropic.com" in base_url.strip().lower(): raise ValueError( "Anthropic's native /v1/messages API is not supported yet (planned for a future release). " @@ -195,6 +214,7 @@ class AIAgent: ) self.tool_progress_callback = tool_progress_callback self.clarify_callback = clarify_callback + self.step_callback = step_callback self._last_reported_tool = None # Track for "new tool" mode # Interrupt mechanism for breaking out of tool loops @@ -228,13 +248,33 @@ class AIAgent: self._use_prompt_caching = is_openrouter and is_claude self._cache_ttl = "5m" # Default 5-minute TTL (1.25x write cost) - # Configure logging + # Persistent error log -- always writes WARNING+ to ~/.hermes/logs/errors.log + # so tool failures, API errors, etc. are inspectable after the fact. + from agent.redact import RedactingFormatter + _error_log_dir = Path.home() / ".hermes" / "logs" + _error_log_dir.mkdir(parents=True, exist_ok=True) + _error_log_path = _error_log_dir / "errors.log" + from logging.handlers import RotatingFileHandler + _error_file_handler = RotatingFileHandler( + _error_log_path, maxBytes=2 * 1024 * 1024, backupCount=2, + ) + _error_file_handler.setLevel(logging.WARNING) + _error_file_handler.setFormatter(RedactingFormatter( + '%(asctime)s %(levelname)s %(name)s: %(message)s', + )) + logging.getLogger().addHandler(_error_file_handler) + if self.verbose_logging: logging.basicConfig( level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', datefmt='%H:%M:%S' ) + for handler in logging.getLogger().handlers: + handler.setFormatter(RedactingFormatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s', + datefmt='%H:%M:%S', + )) # Keep third-party libraries at WARNING level to reduce noise # We have our own retry and error logging that's more informative logging.getLogger('openai').setLevel(logging.WARNING) @@ -297,7 +337,7 @@ class AIAgent: client_kwargs["default_headers"] = { "HTTP-Referer": "https://github.com/NousResearch/hermes-agent", "X-OpenRouter-Title": "Hermes Agent", - "X-OpenRouter-Categories": "cli-agent", + "X-OpenRouter-Categories": "productivity,cli-agent", } self._client_kwargs = client_kwargs # stored for rebuilding after interrupt @@ -479,9 +519,10 @@ class AIAgent: # Initialize context compressor for automatic context management # Compresses conversation when approaching model's context limit - # Configuration via environment variables (can be set in .env or cli-config.yaml) + # Configuration via config.yaml (compression section) or environment variables compression_threshold = float(os.getenv("CONTEXT_COMPRESSION_THRESHOLD", "0.85")) compression_enabled = os.getenv("CONTEXT_COMPRESSION_ENABLED", "true").lower() in ("true", "1", "yes") + compression_summary_model = os.getenv("CONTEXT_COMPRESSION_MODEL") or None self.context_compressor = ContextCompressor( model=self.model, @@ -489,10 +530,17 @@ class AIAgent: protect_first_n=3, protect_last_n=4, summary_target_tokens=500, + summary_model_override=compression_summary_model, quiet_mode=self.quiet_mode, ) self.compression_enabled = compression_enabled self._user_turn_count = 0 + + # Cumulative token usage for the session + self.session_prompt_tokens = 0 + self.session_completion_tokens = 0 + self.session_total_tokens = 0 + self.session_api_calls = 0 if not self.quiet_mode: if compression_enabled: @@ -542,6 +590,77 @@ class AIAgent: if not content: return "" return re.sub(r'.*?', '', content, flags=re.DOTALL) + + def _looks_like_codex_intermediate_ack( + self, + user_message: str, + assistant_content: str, + messages: List[Dict[str, Any]], + ) -> bool: + """Detect a planning/ack message that should continue instead of ending the turn.""" + if any(isinstance(msg, dict) and msg.get("role") == "tool" for msg in messages): + return False + + assistant_text = self._strip_think_blocks(assistant_content or "").strip().lower() + if not assistant_text: + return False + if len(assistant_text) > 1200: + return False + + has_future_ack = bool( + re.search(r"\b(i['’]ll|i will|let me|i can do that|i can help with that)\b", assistant_text) + ) + if not has_future_ack: + return False + + action_markers = ( + "look into", + "look at", + "inspect", + "scan", + "check", + "analyz", + "review", + "explore", + "read", + "open", + "run", + "test", + "fix", + "debug", + "search", + "find", + "walkthrough", + "report back", + "summarize", + ) + workspace_markers = ( + "directory", + "current directory", + "current dir", + "cwd", + "repo", + "repository", + "codebase", + "project", + "folder", + "filesystem", + "file tree", + "files", + "path", + ) + + user_text = (user_message or "").strip().lower() + user_targets_workspace = ( + any(marker in user_text for marker in workspace_markers) + or "~/" in user_text + or "/" in user_text + ) + assistant_mentions_action = any(marker in assistant_text for marker in action_markers) + assistant_targets_workspace = any( + marker in assistant_text for marker in workspace_markers + ) + return (user_targets_workspace or assistant_targets_workspace) and assistant_mentions_action def _extract_reasoning(self, assistant_message) -> Optional[str]: @@ -1257,6 +1376,615 @@ class AIAgent: if self._memory_store: self._memory_store.load_from_disk() + def _responses_tools(self, tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]: + """Convert chat-completions tool schemas to Responses function-tool schemas.""" + source_tools = tools if tools is not None else self.tools + if not source_tools: + return None + + converted: List[Dict[str, Any]] = [] + for item in source_tools: + fn = item.get("function", {}) if isinstance(item, dict) else {} + name = fn.get("name") + if not isinstance(name, str) or not name.strip(): + continue + converted.append({ + "type": "function", + "name": name, + "description": fn.get("description", ""), + "strict": False, + "parameters": fn.get("parameters", {"type": "object", "properties": {}}), + }) + return converted or None + + @staticmethod + def _split_responses_tool_id(raw_id: Any) -> tuple[Optional[str], Optional[str]]: + """Split a stored tool id into (call_id, response_item_id).""" + if not isinstance(raw_id, str): + return None, None + value = raw_id.strip() + if not value: + return None, None + if "|" in value: + call_id, response_item_id = value.split("|", 1) + call_id = call_id.strip() or None + response_item_id = response_item_id.strip() or None + return call_id, response_item_id + if value.startswith("fc_"): + return None, value + return value, None + + def _derive_responses_function_call_id( + self, + call_id: str, + response_item_id: Optional[str] = None, + ) -> str: + """Build a valid Responses `function_call.id` (must start with `fc_`).""" + if isinstance(response_item_id, str): + candidate = response_item_id.strip() + if candidate.startswith("fc_"): + return candidate + + source = (call_id or "").strip() + if source.startswith("fc_"): + return source + if source.startswith("call_") and len(source) > len("call_"): + return f"fc_{source[len('call_'):]}" + + sanitized = re.sub(r"[^A-Za-z0-9_-]", "", source) + if sanitized.startswith("fc_"): + return sanitized + if sanitized.startswith("call_") and len(sanitized) > len("call_"): + return f"fc_{sanitized[len('call_'):]}" + if sanitized: + return f"fc_{sanitized[:48]}" + + seed = source or str(response_item_id or "") or uuid.uuid4().hex + digest = hashlib.sha1(seed.encode("utf-8")).hexdigest()[:24] + return f"fc_{digest}" + + def _chat_messages_to_responses_input(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Convert internal chat-style messages to Responses input items.""" + items: List[Dict[str, Any]] = [] + + for msg in messages: + if not isinstance(msg, dict): + continue + role = msg.get("role") + if role == "system": + continue + + if role in {"user", "assistant"}: + content = msg.get("content", "") + content_text = str(content) if content is not None else "" + + if role == "assistant": + # Replay encrypted reasoning items from previous turns + # so the API can maintain coherent reasoning chains. + codex_reasoning = msg.get("codex_reasoning_items") + if isinstance(codex_reasoning, list): + for ri in codex_reasoning: + if isinstance(ri, dict) and ri.get("encrypted_content"): + items.append(ri) + + if content_text.strip(): + items.append({"role": "assistant", "content": content_text}) + + tool_calls = msg.get("tool_calls") + if isinstance(tool_calls, list): + for tc in tool_calls: + if not isinstance(tc, dict): + continue + fn = tc.get("function", {}) + fn_name = fn.get("name") + if not isinstance(fn_name, str) or not fn_name.strip(): + continue + + embedded_call_id, embedded_response_item_id = self._split_responses_tool_id( + tc.get("id") + ) + call_id = tc.get("call_id") + if not isinstance(call_id, str) or not call_id.strip(): + call_id = embedded_call_id + if not isinstance(call_id, str) or not call_id.strip(): + if ( + isinstance(embedded_response_item_id, str) + and embedded_response_item_id.startswith("fc_") + and len(embedded_response_item_id) > len("fc_") + ): + call_id = f"call_{embedded_response_item_id[len('fc_'):]}" + else: + call_id = f"call_{uuid.uuid4().hex[:12]}" + call_id = call_id.strip() + + arguments = fn.get("arguments", "{}") + if isinstance(arguments, dict): + arguments = json.dumps(arguments, ensure_ascii=False) + elif not isinstance(arguments, str): + arguments = str(arguments) + arguments = arguments.strip() or "{}" + + items.append({ + "type": "function_call", + "call_id": call_id, + "name": fn_name, + "arguments": arguments, + }) + continue + + items.append({"role": role, "content": content_text}) + continue + + if role == "tool": + raw_tool_call_id = msg.get("tool_call_id") + call_id, _ = self._split_responses_tool_id(raw_tool_call_id) + if not isinstance(call_id, str) or not call_id.strip(): + if isinstance(raw_tool_call_id, str) and raw_tool_call_id.strip(): + call_id = raw_tool_call_id.strip() + if not isinstance(call_id, str) or not call_id.strip(): + continue + items.append({ + "type": "function_call_output", + "call_id": call_id, + "output": str(msg.get("content", "") or ""), + }) + + return items + + def _preflight_codex_input_items(self, raw_items: Any) -> List[Dict[str, Any]]: + if not isinstance(raw_items, list): + raise ValueError("Codex Responses input must be a list of input items.") + + normalized: List[Dict[str, Any]] = [] + for idx, item in enumerate(raw_items): + if not isinstance(item, dict): + raise ValueError(f"Codex Responses input[{idx}] must be an object.") + + item_type = item.get("type") + if item_type == "function_call": + call_id = item.get("call_id") + name = item.get("name") + if not isinstance(call_id, str) or not call_id.strip(): + raise ValueError(f"Codex Responses input[{idx}] function_call is missing call_id.") + if not isinstance(name, str) or not name.strip(): + raise ValueError(f"Codex Responses input[{idx}] function_call is missing name.") + + arguments = item.get("arguments", "{}") + if isinstance(arguments, dict): + arguments = json.dumps(arguments, ensure_ascii=False) + elif not isinstance(arguments, str): + arguments = str(arguments) + arguments = arguments.strip() or "{}" + + normalized.append( + { + "type": "function_call", + "call_id": call_id.strip(), + "name": name.strip(), + "arguments": arguments, + } + ) + continue + + if item_type == "function_call_output": + call_id = item.get("call_id") + if not isinstance(call_id, str) or not call_id.strip(): + raise ValueError(f"Codex Responses input[{idx}] function_call_output is missing call_id.") + output = item.get("output", "") + if output is None: + output = "" + if not isinstance(output, str): + output = str(output) + + normalized.append( + { + "type": "function_call_output", + "call_id": call_id.strip(), + "output": output, + } + ) + continue + + role = item.get("role") + if role in {"user", "assistant"}: + content = item.get("content", "") + if content is None: + content = "" + if not isinstance(content, str): + content = str(content) + + normalized.append({"role": role, "content": content}) + continue + + raise ValueError( + f"Codex Responses input[{idx}] has unsupported item shape (type={item_type!r}, role={role!r})." + ) + + return normalized + + def _preflight_codex_api_kwargs( + self, + api_kwargs: Any, + *, + allow_stream: bool = False, + ) -> Dict[str, Any]: + if not isinstance(api_kwargs, dict): + raise ValueError("Codex Responses request must be a dict.") + + required = {"model", "instructions", "input"} + missing = [key for key in required if key not in api_kwargs] + if missing: + raise ValueError(f"Codex Responses request missing required field(s): {', '.join(sorted(missing))}.") + + model = api_kwargs.get("model") + if not isinstance(model, str) or not model.strip(): + raise ValueError("Codex Responses request 'model' must be a non-empty string.") + model = model.strip() + + instructions = api_kwargs.get("instructions") + if instructions is None: + instructions = "" + if not isinstance(instructions, str): + instructions = str(instructions) + instructions = instructions.strip() or DEFAULT_AGENT_IDENTITY + + normalized_input = self._preflight_codex_input_items(api_kwargs.get("input")) + + tools = api_kwargs.get("tools") + normalized_tools = None + if tools is not None: + if not isinstance(tools, list): + raise ValueError("Codex Responses request 'tools' must be a list when provided.") + normalized_tools = [] + for idx, tool in enumerate(tools): + if not isinstance(tool, dict): + raise ValueError(f"Codex Responses tools[{idx}] must be an object.") + if tool.get("type") != "function": + raise ValueError(f"Codex Responses tools[{idx}] has unsupported type {tool.get('type')!r}.") + + name = tool.get("name") + parameters = tool.get("parameters") + if not isinstance(name, str) or not name.strip(): + raise ValueError(f"Codex Responses tools[{idx}] is missing a valid name.") + if not isinstance(parameters, dict): + raise ValueError(f"Codex Responses tools[{idx}] is missing valid parameters.") + + description = tool.get("description", "") + if description is None: + description = "" + if not isinstance(description, str): + description = str(description) + + strict = tool.get("strict", False) + if not isinstance(strict, bool): + strict = bool(strict) + + normalized_tools.append( + { + "type": "function", + "name": name.strip(), + "description": description, + "strict": strict, + "parameters": parameters, + } + ) + + store = api_kwargs.get("store", False) + if store is not False: + raise ValueError("Codex Responses contract requires 'store' to be false.") + + allowed_keys = { + "model", "instructions", "input", "tools", "store", + "reasoning", "include", "max_output_tokens", "temperature", + } + normalized: Dict[str, Any] = { + "model": model, + "instructions": instructions, + "input": normalized_input, + "tools": normalized_tools, + "store": False, + } + + # Pass through reasoning config + reasoning = api_kwargs.get("reasoning") + if isinstance(reasoning, dict): + normalized["reasoning"] = reasoning + include = api_kwargs.get("include") + if isinstance(include, list): + normalized["include"] = include + + # Pass through max_output_tokens and temperature + max_output_tokens = api_kwargs.get("max_output_tokens") + if isinstance(max_output_tokens, (int, float)) and max_output_tokens > 0: + normalized["max_output_tokens"] = int(max_output_tokens) + temperature = api_kwargs.get("temperature") + if isinstance(temperature, (int, float)): + normalized["temperature"] = float(temperature) + + if allow_stream: + stream = api_kwargs.get("stream") + if stream is not None and stream is not True: + raise ValueError("Codex Responses 'stream' must be true when set.") + if stream is True: + normalized["stream"] = True + allowed_keys.add("stream") + elif "stream" in api_kwargs: + raise ValueError("Codex Responses stream flag is only allowed in fallback streaming requests.") + + unexpected = sorted(key for key in api_kwargs.keys() if key not in allowed_keys) + if unexpected: + raise ValueError( + f"Codex Responses request has unsupported field(s): {', '.join(unexpected)}." + ) + + return normalized + + def _extract_responses_message_text(self, item: Any) -> str: + """Extract assistant text from a Responses message output item.""" + content = getattr(item, "content", None) + if not isinstance(content, list): + return "" + + chunks: List[str] = [] + for part in content: + ptype = getattr(part, "type", None) + if ptype not in {"output_text", "text"}: + continue + text = getattr(part, "text", None) + if isinstance(text, str) and text: + chunks.append(text) + return "".join(chunks).strip() + + def _extract_responses_reasoning_text(self, item: Any) -> str: + """Extract a compact reasoning text from a Responses reasoning item.""" + summary = getattr(item, "summary", None) + if isinstance(summary, list): + chunks: List[str] = [] + for part in summary: + text = getattr(part, "text", None) + if isinstance(text, str) and text: + chunks.append(text) + if chunks: + return "\n".join(chunks).strip() + text = getattr(item, "text", None) + if isinstance(text, str) and text: + return text.strip() + return "" + + def _normalize_codex_response(self, response: Any) -> tuple[Any, str]: + """Normalize a Responses API object to an assistant_message-like object.""" + output = getattr(response, "output", None) + if not isinstance(output, list) or not output: + raise RuntimeError("Responses API returned no output items") + + response_status = getattr(response, "status", None) + if isinstance(response_status, str): + response_status = response_status.strip().lower() + else: + response_status = None + + if response_status in {"failed", "cancelled"}: + error_obj = getattr(response, "error", None) + if isinstance(error_obj, dict): + error_msg = error_obj.get("message") or str(error_obj) + else: + error_msg = str(error_obj) if error_obj else f"Responses API returned status '{response_status}'" + raise RuntimeError(error_msg) + + content_parts: List[str] = [] + reasoning_parts: List[str] = [] + reasoning_items_raw: List[Dict[str, Any]] = [] + tool_calls: List[Any] = [] + has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"} + saw_commentary_phase = False + saw_final_answer_phase = False + + for item in output: + item_type = getattr(item, "type", None) + item_status = getattr(item, "status", None) + if isinstance(item_status, str): + item_status = item_status.strip().lower() + else: + item_status = None + + if item_status in {"queued", "in_progress", "incomplete"}: + has_incomplete_items = True + + if item_type == "message": + item_phase = getattr(item, "phase", None) + if isinstance(item_phase, str): + normalized_phase = item_phase.strip().lower() + if normalized_phase in {"commentary", "analysis"}: + saw_commentary_phase = True + elif normalized_phase in {"final_answer", "final"}: + saw_final_answer_phase = True + message_text = self._extract_responses_message_text(item) + if message_text: + content_parts.append(message_text) + elif item_type == "reasoning": + reasoning_text = self._extract_responses_reasoning_text(item) + if reasoning_text: + reasoning_parts.append(reasoning_text) + # Capture the full reasoning item for multi-turn continuity. + # encrypted_content is an opaque blob the API needs back on + # subsequent turns to maintain coherent reasoning chains. + encrypted = getattr(item, "encrypted_content", None) + if isinstance(encrypted, str) and encrypted: + raw_item = {"type": "reasoning", "encrypted_content": encrypted} + item_id = getattr(item, "id", None) + if isinstance(item_id, str) and item_id: + raw_item["id"] = item_id + reasoning_items_raw.append(raw_item) + elif item_type == "function_call": + if item_status in {"queued", "in_progress", "incomplete"}: + continue + fn_name = getattr(item, "name", "") or "" + arguments = getattr(item, "arguments", "{}") + if not isinstance(arguments, str): + arguments = str(arguments) + raw_call_id = getattr(item, "call_id", None) + raw_item_id = getattr(item, "id", None) + embedded_call_id, _ = self._split_responses_tool_id(raw_item_id) + call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id + if not isinstance(call_id, str) or not call_id.strip(): + call_id = f"call_{uuid.uuid4().hex[:12]}" + call_id = call_id.strip() + response_item_id = raw_item_id if isinstance(raw_item_id, str) else None + response_item_id = self._derive_responses_function_call_id(call_id, response_item_id) + tool_calls.append(SimpleNamespace( + id=call_id, + call_id=call_id, + response_item_id=response_item_id, + type="function", + function=SimpleNamespace(name=fn_name, arguments=arguments), + )) + elif item_type == "custom_tool_call": + fn_name = getattr(item, "name", "") or "" + arguments = getattr(item, "input", "{}") + if not isinstance(arguments, str): + arguments = str(arguments) + raw_call_id = getattr(item, "call_id", None) + raw_item_id = getattr(item, "id", None) + embedded_call_id, _ = self._split_responses_tool_id(raw_item_id) + call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id + if not isinstance(call_id, str) or not call_id.strip(): + call_id = f"call_{uuid.uuid4().hex[:12]}" + call_id = call_id.strip() + response_item_id = raw_item_id if isinstance(raw_item_id, str) else None + response_item_id = self._derive_responses_function_call_id(call_id, response_item_id) + tool_calls.append(SimpleNamespace( + id=call_id, + call_id=call_id, + response_item_id=response_item_id, + type="function", + function=SimpleNamespace(name=fn_name, arguments=arguments), + )) + + final_text = "\n".join([p for p in content_parts if p]).strip() + if not final_text and hasattr(response, "output_text"): + out_text = getattr(response, "output_text", "") + if isinstance(out_text, str): + final_text = out_text.strip() + + assistant_message = SimpleNamespace( + content=final_text, + tool_calls=tool_calls, + reasoning="\n\n".join(reasoning_parts).strip() if reasoning_parts else None, + reasoning_content=None, + reasoning_details=None, + codex_reasoning_items=reasoning_items_raw or None, + ) + + if tool_calls: + finish_reason = "tool_calls" + elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase): + finish_reason = "incomplete" + else: + finish_reason = "stop" + return assistant_message, finish_reason + + def _run_codex_stream(self, api_kwargs: dict): + """Execute one streaming Responses API request and return the final response.""" + max_stream_retries = 1 + for attempt in range(max_stream_retries + 1): + try: + with self.client.responses.stream(**api_kwargs) as stream: + for _ in stream: + pass + return stream.get_final_response() + except RuntimeError as exc: + err_text = str(exc) + missing_completed = "response.completed" in err_text + if missing_completed and attempt < max_stream_retries: + logger.debug( + "Responses stream closed before completion (attempt %s/%s); retrying.", + attempt + 1, + max_stream_retries + 1, + ) + continue + if missing_completed: + logger.debug( + "Responses stream did not emit response.completed; falling back to create(stream=True)." + ) + return self._run_codex_create_stream_fallback(api_kwargs) + raise + + def _run_codex_create_stream_fallback(self, api_kwargs: dict): + """Fallback path for stream completion edge cases on Codex-style Responses backends.""" + fallback_kwargs = dict(api_kwargs) + fallback_kwargs["stream"] = True + fallback_kwargs = self._preflight_codex_api_kwargs(fallback_kwargs, allow_stream=True) + stream_or_response = self.client.responses.create(**fallback_kwargs) + + # Compatibility shim for mocks or providers that still return a concrete response. + if hasattr(stream_or_response, "output"): + return stream_or_response + if not hasattr(stream_or_response, "__iter__"): + return stream_or_response + + terminal_response = None + try: + for event in stream_or_response: + event_type = getattr(event, "type", None) + if not event_type and isinstance(event, dict): + event_type = event.get("type") + if event_type not in {"response.completed", "response.incomplete", "response.failed"}: + continue + + terminal_response = getattr(event, "response", None) + if terminal_response is None and isinstance(event, dict): + terminal_response = event.get("response") + if terminal_response is not None: + return terminal_response + finally: + close_fn = getattr(stream_or_response, "close", None) + if callable(close_fn): + try: + close_fn() + except Exception: + pass + + if terminal_response is not None: + return terminal_response + raise RuntimeError("Responses create(stream=True) fallback did not emit a terminal response.") + + def _try_refresh_codex_client_credentials(self, *, force: bool = True) -> bool: + if self.api_mode != "codex_responses" or self.provider != "openai-codex": + return False + + try: + from hermes_cli.auth import resolve_codex_runtime_credentials + + creds = resolve_codex_runtime_credentials(force_refresh=force) + except Exception as exc: + logger.debug("Codex credential refresh failed: %s", exc) + return False + + api_key = creds.get("api_key") + base_url = creds.get("base_url") + if not isinstance(api_key, str) or not api_key.strip(): + return False + if not isinstance(base_url, str) or not base_url.strip(): + return False + + self.api_key = api_key.strip() + self.base_url = base_url.strip().rstrip("/") + self._client_kwargs["api_key"] = self.api_key + self._client_kwargs["base_url"] = self.base_url + + try: + self.client.close() + except Exception: + pass + + try: + self.client = OpenAI(**self._client_kwargs) + except Exception as exc: + logger.warning("Failed to rebuild OpenAI client after Codex refresh: %s", exc) + return False + + return True + def _interruptible_api_call(self, api_kwargs: dict): """ Run the API call in a background thread so the main conversation loop @@ -1270,7 +1998,10 @@ class AIAgent: def _call(): try: - result["response"] = self.client.chat.completions.create(**api_kwargs) + if self.api_mode == "codex_responses": + result["response"] = self._run_codex_stream(api_kwargs) + else: + result["response"] = self.client.chat.completions.create(**api_kwargs) except Exception as e: result["error"] = e @@ -1295,7 +2026,39 @@ class AIAgent: return result["response"] def _build_api_kwargs(self, api_messages: list) -> dict: - """Build the keyword arguments dict for the chat completions API call.""" + """Build the keyword arguments dict for the active API mode.""" + if self.api_mode == "codex_responses": + instructions = "" + payload_messages = api_messages + if api_messages and api_messages[0].get("role") == "system": + instructions = str(api_messages[0].get("content") or "").strip() + payload_messages = api_messages[1:] + if not instructions: + instructions = DEFAULT_AGENT_IDENTITY + + kwargs = { + "model": self.model, + "instructions": instructions, + "input": self._chat_messages_to_responses_input(payload_messages), + "tools": self._responses_tools(), + "store": False, + "reasoning": {"effort": "medium", "summary": "auto"}, + "include": ["reasoning.encrypted_content"], + } + + # Apply reasoning effort from config if set + if self.reasoning_config and isinstance(self.reasoning_config, dict): + if self.reasoning_config.get("enabled") is False: + kwargs.pop("reasoning", None) + kwargs["include"] = [] + elif self.reasoning_config.get("effort"): + kwargs["reasoning"]["effort"] = self.reasoning_config["effort"] + + if self.max_tokens is not None: + kwargs["max_output_tokens"] = self.max_tokens + + return kwargs + provider_preferences = {} if self.providers_allowed: provider_preferences["only"] = self.providers_allowed @@ -1362,34 +2125,73 @@ class AIAgent: } if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details: - msg["reasoning_details"] = [ - {"type": d.get("type"), "text": d.get("text"), "signature": d.get("signature")} - for d in assistant_message.reasoning_details - if isinstance(d, dict) - ] + # Pass reasoning_details back unmodified so providers (OpenRouter, + # Anthropic, OpenAI) can maintain reasoning continuity across turns. + # Each provider may include opaque fields (signature, encrypted_content) + # that must be preserved exactly. + raw_details = assistant_message.reasoning_details + preserved = [] + for d in raw_details: + if isinstance(d, dict): + preserved.append(d) + elif hasattr(d, "__dict__"): + preserved.append(d.__dict__) + elif hasattr(d, "model_dump"): + preserved.append(d.model_dump()) + if preserved: + msg["reasoning_details"] = preserved + + # Codex Responses API: preserve encrypted reasoning items for + # multi-turn continuity. These get replayed as input on the next turn. + codex_items = getattr(assistant_message, "codex_reasoning_items", None) + if codex_items: + msg["codex_reasoning_items"] = codex_items if assistant_message.tool_calls: - tc_list = [] + tool_calls = [] for tool_call in assistant_message.tool_calls: + raw_id = getattr(tool_call, "id", None) + call_id = getattr(tool_call, "call_id", None) + if not isinstance(call_id, str) or not call_id.strip(): + embedded_call_id, _ = self._split_responses_tool_id(raw_id) + call_id = embedded_call_id + if not isinstance(call_id, str) or not call_id.strip(): + if isinstance(raw_id, str) and raw_id.strip(): + call_id = raw_id.strip() + else: + call_id = f"call_{uuid.uuid4().hex[:12]}" + call_id = call_id.strip() + + response_item_id = getattr(tool_call, "response_item_id", None) + if not isinstance(response_item_id, str) or not response_item_id.strip(): + _, embedded_response_item_id = self._split_responses_tool_id(raw_id) + response_item_id = embedded_response_item_id + + response_item_id = self._derive_responses_function_call_id( + call_id, + response_item_id if isinstance(response_item_id, str) else None, + ) + tc_dict = { - "id": tool_call.id, + "id": call_id, + "call_id": call_id, + "response_item_id": response_item_id, "type": tool_call.type, "function": { "name": tool_call.function.name, "arguments": tool_call.function.arguments - } + }, } # Preserve extra_content (e.g. Gemini thought_signature) so it # is sent back on subsequent API calls. Without this, Gemini 3 # thinking models reject the request with a 400 error. extra = getattr(tool_call, "extra_content", None) if extra is not None: - # Convert Pydantic models to plain dicts for JSON safety if hasattr(extra, "model_dump"): extra = extra.model_dump() tc_dict["extra_content"] = extra - tc_list.append(tc_dict) - msg["tool_calls"] = tc_list + tool_calls.append(tc_dict) + msg["tool_calls"] = tool_calls return msg @@ -1454,40 +2256,68 @@ class AIAgent: messages.pop() # remove flush msg return - api_kwargs = { - "model": self.model, - "messages": api_messages, - "tools": [memory_tool_def], - "temperature": 0.3, - **self._max_tokens_param(1024), - } + # Use auxiliary client for the flush call when available -- + # it's cheaper and avoids Codex Responses API incompatibility. + from agent.auxiliary_client import get_text_auxiliary_client + aux_client, aux_model = get_text_auxiliary_client() - response = self.client.chat.completions.create(**api_kwargs, timeout=30.0) + if aux_client: + api_kwargs = { + "model": aux_model, + "messages": api_messages, + "tools": [memory_tool_def], + "temperature": 0.3, + "max_tokens": 5120, + } + response = aux_client.chat.completions.create(**api_kwargs, timeout=30.0) + elif self.api_mode == "codex_responses": + # No auxiliary client -- use the Codex Responses path directly + codex_kwargs = self._build_api_kwargs(api_messages) + codex_kwargs["tools"] = self._responses_tools([memory_tool_def]) + codex_kwargs["temperature"] = 0.3 + if "max_output_tokens" in codex_kwargs: + codex_kwargs["max_output_tokens"] = 5120 + response = self._run_codex_stream(codex_kwargs) + else: + api_kwargs = { + "model": self.model, + "messages": api_messages, + "tools": [memory_tool_def], + "temperature": 0.3, + **self._max_tokens_param(5120), + } + response = self.client.chat.completions.create(**api_kwargs, timeout=30.0) - if response.choices: + # Extract tool calls from the response, handling both API formats + tool_calls = [] + if self.api_mode == "codex_responses" and not aux_client: + assistant_msg, _ = self._normalize_codex_response(response) + if assistant_msg and assistant_msg.tool_calls: + tool_calls = assistant_msg.tool_calls + elif hasattr(response, "choices") and response.choices: assistant_message = response.choices[0].message if assistant_message.tool_calls: - # Execute only memory tool calls - for tc in assistant_message.tool_calls: - if tc.function.name == "memory": - try: - args = json.loads(tc.function.arguments) - flush_target = args.get("target", "memory") - from tools.memory_tool import memory_tool as _memory_tool - result = _memory_tool( - action=args.get("action"), - target=flush_target, - content=args.get("content"), - old_text=args.get("old_text"), - store=self._memory_store, - ) - # Also send user observations to Honcho when active - if self._honcho and flush_target == "user" and args.get("action") == "add": - self._honcho_save_user_observation(args.get("content", "")) - if not self.quiet_mode: - print(f" 🧠 Memory flush: saved to {args.get('target', 'memory')}") - except Exception as e: - logger.debug("Memory flush tool call failed: %s", e) + tool_calls = assistant_message.tool_calls + + for tc in tool_calls: + if tc.function.name == "memory": + try: + args = json.loads(tc.function.arguments) + flush_target = args.get("target", "memory") + from tools.memory_tool import memory_tool as _memory_tool + result = _memory_tool( + action=args.get("action"), + target=flush_target, + content=args.get("content"), + old_text=args.get("old_text"), + store=self._memory_store, + ) + if self._honcho and flush_target == "user" and args.get("action") == "add": + self._honcho_save_user_observation(args.get("content", "")) + if not self.quiet_mode: + print(f" 🧠 Memory flush: saved to {args.get('target', 'memory')}") + except Exception as e: + logger.debug("Memory flush tool call failed: %s", e) except Exception as e: logger.debug("Memory flush API call failed: %s", e) finally: @@ -1698,7 +2528,7 @@ class AIAgent: _spinner_result = function_result except Exception as tool_error: function_result = f"Error executing tool '{function_name}': {tool_error}" - logger.error("handle_function_call raised for %s: %s", function_name, tool_error) + logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True) finally: tool_duration = time.time() - tool_start_time cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_spinner_result) @@ -1708,11 +2538,17 @@ class AIAgent: function_result = handle_function_call(function_name, function_args, effective_task_id) except Exception as tool_error: function_result = f"Error executing tool '{function_name}': {tool_error}" - logger.error("handle_function_call raised for %s: %s", function_name, tool_error) + logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True) tool_duration = time.time() - tool_start_time result_preview = function_result[:200] if len(function_result) > 200 else function_result + # Log tool errors to the persistent error log so [error] tags + # in the UI always have a corresponding detailed entry on disk. + _is_error_result, _ = _detect_tool_failure(function_name, function_result) + if _is_error_result: + logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview) + if self.verbose_logging: logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s") logging.debug(f"Tool result preview: {result_preview}...") @@ -1795,24 +2631,67 @@ class AIAgent: if _is_nous: summary_extra_body["tags"] = ["product=hermes-agent"] - summary_kwargs = { - "model": self.model, - "messages": api_messages, - } - if self.max_tokens is not None: - summary_kwargs.update(self._max_tokens_param(self.max_tokens)) - if summary_extra_body: - summary_kwargs["extra_body"] = summary_extra_body + if self.api_mode == "codex_responses": + codex_kwargs = self._build_api_kwargs(api_messages) + codex_kwargs["tools"] = None + summary_response = self._run_codex_stream(codex_kwargs) + assistant_message, _ = self._normalize_codex_response(summary_response) + final_response = (assistant_message.content or "").strip() if assistant_message else "" + else: + summary_kwargs = { + "model": self.model, + "messages": api_messages, + } + if self.max_tokens is not None: + summary_kwargs.update(self._max_tokens_param(self.max_tokens)) + if summary_extra_body: + summary_kwargs["extra_body"] = summary_extra_body - summary_response = self.client.chat.completions.create(**summary_kwargs) + summary_response = self.client.chat.completions.create(**summary_kwargs) - if summary_response.choices and summary_response.choices[0].message.content: - final_response = summary_response.choices[0].message.content + if summary_response.choices and summary_response.choices[0].message.content: + final_response = summary_response.choices[0].message.content + else: + final_response = "" + + if final_response: if "" in final_response: final_response = re.sub(r'.*?\s*', '', final_response, flags=re.DOTALL).strip() - messages.append({"role": "assistant", "content": final_response}) + if final_response: + messages.append({"role": "assistant", "content": final_response}) + else: + final_response = "I reached the iteration limit and couldn't generate a summary." else: - final_response = "I reached the iteration limit and couldn't generate a summary." + # Retry summary generation + if self.api_mode == "codex_responses": + codex_kwargs = self._build_api_kwargs(api_messages) + codex_kwargs["tools"] = None + retry_response = self._run_codex_stream(codex_kwargs) + retry_msg, _ = self._normalize_codex_response(retry_response) + final_response = (retry_msg.content or "").strip() if retry_msg else "" + else: + summary_kwargs = { + "model": self.model, + "messages": api_messages, + } + if self.max_tokens is not None: + summary_kwargs["max_tokens"] = self.max_tokens + if summary_extra_body: + summary_kwargs["extra_body"] = summary_extra_body + + summary_response = self.client.chat.completions.create(**summary_kwargs) + + if summary_response.choices and summary_response.choices[0].message.content: + final_response = summary_response.choices[0].message.content + else: + final_response = "" + + if final_response: + if "" in final_response: + final_response = re.sub(r'.*?\s*', '', final_response, flags=re.DOTALL).strip() + messages.append({"role": "assistant", "content": final_response}) + else: + final_response = "I reached the iteration limit and couldn't generate a summary." except Exception as e: logging.warning(f"Failed to get summary response: {e}") @@ -1930,6 +2809,7 @@ class AIAgent: api_call_count = 0 final_response = None interrupted = False + codex_ack_continuations = 0 # Clear any stale interrupt state at start self.clear_interrupt() @@ -1944,6 +2824,22 @@ class AIAgent: api_call_count += 1 + # Fire step_callback for gateway hooks (agent:step event) + if self.step_callback is not None: + try: + prev_tools = [] + for _m in reversed(messages): + if _m.get("role") == "assistant" and _m.get("tool_calls"): + prev_tools = [ + tc["function"]["name"] + for tc in _m["tool_calls"] + if isinstance(tc, dict) + ] + break + self.step_callback(api_call_count, prev_tools) + except Exception as _step_err: + logger.debug("step_callback error (iteration %s): %s", api_call_count, _step_err) + # Track tool-calling iterations for skill nudge. # Counter resets whenever skill_manage is actually used. if (self._skill_nudge_interval > 0 @@ -2028,10 +2924,15 @@ class AIAgent: api_start_time = time.time() retry_count = 0 max_retries = 6 # Increased to allow longer backoff periods + codex_auth_retry_attempted = False + + finish_reason = "stop" while retry_count < max_retries: try: api_kwargs = self._build_api_kwargs(api_messages) + if self.api_mode == "codex_responses": + api_kwargs = self._preflight_codex_api_kwargs(api_kwargs, allow_stream=False) if os.getenv("HERMES_DUMP_REQUESTS", "").strip().lower() in {"1", "true", "yes", "on"}: self._dump_api_request_debug(api_kwargs, reason="preflight") @@ -2054,8 +2955,33 @@ class AIAgent: resp_model = getattr(response, 'model', 'N/A') if response else 'N/A' logging.debug(f"API Response received - Model: {resp_model}, Usage: {response.usage if hasattr(response, 'usage') else 'N/A'}") - # Validate response has valid choices before proceeding - if response is None or not hasattr(response, 'choices') or response.choices is None or len(response.choices) == 0: + # Validate response shape before proceeding + response_invalid = False + error_details = [] + if self.api_mode == "codex_responses": + output_items = getattr(response, "output", None) if response is not None else None + if response is None: + response_invalid = True + error_details.append("response is None") + elif not isinstance(output_items, list): + response_invalid = True + error_details.append("response.output is not a list") + elif len(output_items) == 0: + response_invalid = True + error_details.append("response.output is empty") + else: + if response is None or not hasattr(response, 'choices') or response.choices is None or len(response.choices) == 0: + response_invalid = True + if response is None: + error_details.append("response is None") + elif not hasattr(response, 'choices'): + error_details.append("response has no 'choices' attribute") + elif response.choices is None: + error_details.append("response.choices is None") + else: + error_details.append("response.choices is empty") + + if response_invalid: # Stop spinner before printing error messages if thinking_spinner: thinking_spinner.stop(f"(´;ω;`) oops, retrying...") @@ -2063,15 +2989,6 @@ class AIAgent: # This is often rate limiting or provider returning malformed response retry_count += 1 - error_details = [] - if response is None: - error_details.append("response is None") - elif not hasattr(response, 'choices'): - error_details.append("response has no 'choices' attribute") - elif response.choices is None: - error_details.append("response.choices is None") - else: - error_details.append("response.choices is empty") # Check for error field in response (some providers include this) error_msg = "Unknown" @@ -2108,7 +3025,7 @@ class AIAgent: "messages": messages, "completed": False, "api_calls": api_call_count, - "error": f"Invalid API response (choices is None/empty). Likely rate limited by provider.", + "error": "Invalid API response shape. Likely rate limited or malformed provider response.", "failed": True # Mark as failure for filtering } @@ -2135,7 +3052,20 @@ class AIAgent: continue # Retry the API call # Check finish_reason before proceeding - finish_reason = response.choices[0].finish_reason + if self.api_mode == "codex_responses": + status = getattr(response, "status", None) + incomplete_details = getattr(response, "incomplete_details", None) + incomplete_reason = None + if isinstance(incomplete_details, dict): + incomplete_reason = incomplete_details.get("reason") + else: + incomplete_reason = getattr(incomplete_details, "reason", None) + if status == "incomplete" and incomplete_reason in {"max_output_tokens", "length"}: + finish_reason = "length" + else: + finish_reason = "stop" + else: + finish_reason = response.choices[0].finish_reason # Handle "length" finish_reason - response was truncated if finish_reason == "length": @@ -2172,12 +3102,28 @@ class AIAgent: # Track actual token usage from response for context management if hasattr(response, 'usage') and response.usage: + if self.api_mode == "codex_responses": + prompt_tokens = getattr(response.usage, 'input_tokens', 0) or 0 + completion_tokens = getattr(response.usage, 'output_tokens', 0) or 0 + total_tokens = ( + getattr(response.usage, 'total_tokens', None) + or (prompt_tokens + completion_tokens) + ) + else: + prompt_tokens = getattr(response.usage, 'prompt_tokens', 0) or 0 + completion_tokens = getattr(response.usage, 'completion_tokens', 0) or 0 + total_tokens = getattr(response.usage, 'total_tokens', 0) or 0 usage_dict = { - "prompt_tokens": getattr(response.usage, 'prompt_tokens', 0), - "completion_tokens": getattr(response.usage, 'completion_tokens', 0), - "total_tokens": getattr(response.usage, 'total_tokens', 0), + "prompt_tokens": prompt_tokens, + "completion_tokens": completion_tokens, + "total_tokens": total_tokens, } self.context_compressor.update_from_response(usage_dict) + + self.session_prompt_tokens += prompt_tokens + self.session_completion_tokens += completion_tokens + self.session_total_tokens += total_tokens + self.session_api_calls += 1 if self.verbose_logging: logging.debug(f"Token usage: prompt={usage_dict['prompt_tokens']:,}, completion={usage_dict['completion_tokens']:,}, total={usage_dict['total_tokens']:,}") @@ -2209,6 +3155,18 @@ class AIAgent: if thinking_spinner: thinking_spinner.stop(f"(╥_╥) error, retrying...") thinking_spinner = None + + status_code = getattr(api_error, "status_code", None) + if ( + self.api_mode == "codex_responses" + and self.provider == "openai-codex" + and status_code == 401 + and not codex_auth_retry_attempted + ): + codex_auth_retry_attempted = True + if self._try_refresh_codex_client_credentials(force=True): + print(f"{self.log_prefix}🔐 Codex auth refreshed after 401. Retrying request...") + continue retry_count += 1 elapsed_time = time.time() - api_start_time @@ -2365,11 +3323,32 @@ class AIAgent: break try: - assistant_message = response.choices[0].message + if self.api_mode == "codex_responses": + assistant_message, finish_reason = self._normalize_codex_response(response) + else: + assistant_message = response.choices[0].message # Handle assistant response if assistant_message.content and not self.quiet_mode: print(f"{self.log_prefix}🤖 Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}") + + # Notify progress callback of model's thinking (used by subagent + # delegation to relay the child's reasoning to the parent display). + # Guard: only fire for subagents (_delegate_depth >= 1) to avoid + # spamming gateway platforms with the main agent's every thought. + if (assistant_message.content and self.tool_progress_callback + and getattr(self, '_delegate_depth', 0) > 0): + _think_text = assistant_message.content.strip() + # Strip reasoning XML tags that shouldn't leak to parent display + _think_text = re.sub( + r'', '', _think_text + ).strip() + first_line = _think_text.split('\n')[0][:80] if _think_text else "" + if first_line: + try: + self.tool_progress_callback("_thinking", first_line) + except Exception: + pass # Check for incomplete (opened but never closed) # This means the model ran out of output tokens mid-reasoning — retry up to 2 times @@ -2405,6 +3384,48 @@ class AIAgent: # Reset incomplete scratchpad counter on clean response if hasattr(self, '_incomplete_scratchpad_retries'): self._incomplete_scratchpad_retries = 0 + + if self.api_mode == "codex_responses" and finish_reason == "incomplete": + if not hasattr(self, "_codex_incomplete_retries"): + self._codex_incomplete_retries = 0 + self._codex_incomplete_retries += 1 + + interim_msg = self._build_assistant_message(assistant_message, finish_reason) + interim_has_content = bool(interim_msg.get("content", "").strip()) + interim_has_reasoning = bool(interim_msg.get("reasoning", "").strip()) if isinstance(interim_msg.get("reasoning"), str) else False + + if interim_has_content or interim_has_reasoning: + last_msg = messages[-1] if messages else None + duplicate_interim = ( + isinstance(last_msg, dict) + and last_msg.get("role") == "assistant" + and last_msg.get("finish_reason") == "incomplete" + and (last_msg.get("content") or "") == (interim_msg.get("content") or "") + and (last_msg.get("reasoning") or "") == (interim_msg.get("reasoning") or "") + ) + if not duplicate_interim: + messages.append(interim_msg) + self._log_msg_to_db(interim_msg) + + if self._codex_incomplete_retries < 3: + if not self.quiet_mode: + print(f"{self.log_prefix}↻ Codex response incomplete; continuing turn ({self._codex_incomplete_retries}/3)") + self._session_messages = messages + self._save_session_log(messages) + continue + + self._codex_incomplete_retries = 0 + self._persist_session(messages, conversation_history) + return { + "final_response": None, + "messages": messages, + "api_calls": api_call_count, + "completed": False, + "partial": True, + "error": "Codex response remained incomplete after 3 continuation attempts", + } + elif hasattr(self, "_codex_incomplete_retries"): + self._codex_incomplete_retries = 0 # Check for tool calls if assistant_message.tool_calls: @@ -2577,7 +3598,8 @@ class AIAgent: tool_names.append(fn.get("name", "unknown")) msg["content"] = f"Calling the {', '.join(tool_names)} tool{'s' if len(tool_names) > 1 else ''}..." break - final_response = fallback + # Strip blocks from fallback content for user display + final_response = self._strip_think_blocks(fallback).strip() break # No fallback -- append the empty message as-is @@ -2605,6 +3627,39 @@ class AIAgent: # Reset retry counter on successful content if hasattr(self, '_empty_content_retries'): self._empty_content_retries = 0 + + if ( + self.api_mode == "codex_responses" + and self.valid_tool_names + and codex_ack_continuations < 2 + and self._looks_like_codex_intermediate_ack( + user_message=user_message, + assistant_content=final_response, + messages=messages, + ) + ): + codex_ack_continuations += 1 + interim_msg = self._build_assistant_message(assistant_message, "incomplete") + messages.append(interim_msg) + self._log_msg_to_db(interim_msg) + + continue_msg = { + "role": "user", + "content": ( + "[System: Continue now. Execute the required tool calls and only " + "send your final answer after completing the task.]" + ), + } + messages.append(continue_msg) + self._log_msg_to_db(continue_msg) + self._session_messages = messages + self._save_session_log(messages) + continue + + codex_ack_continuations = 0 + + # Strip blocks from user-facing response (keep raw in messages for trajectory) + final_response = self._strip_think_blocks(final_response).strip() final_msg = self._build_assistant_message(assistant_message, finish_reason) diff --git a/scripts/install.sh b/scripts/install.sh index 4f8108bb8..81978e8f0 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -723,7 +723,7 @@ setup_path() { PATH_LINE='export PATH="$HOME/.local/bin:$PATH"' for SHELL_CONFIG in "${SHELL_CONFIGS[@]}"; do - if ! grep -q '\.local/bin' "$SHELL_CONFIG" 2>/dev/null; then + if ! grep -v '^[[:space:]]*#' "$SHELL_CONFIG" 2>/dev/null | grep -qE 'PATH=.*\.local/bin'; then echo "" >> "$SHELL_CONFIG" echo "# Hermes Agent — ensure ~/.local/bin is on PATH" >> "$SHELL_CONFIG" echo "$PATH_LINE" >> "$SHELL_CONFIG" diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py new file mode 100644 index 000000000..efcbce29f --- /dev/null +++ b/tests/agent/test_auxiliary_client.py @@ -0,0 +1,168 @@ +"""Tests for agent.auxiliary_client resolution chain, especially the Codex fallback.""" + +import json +import os +from pathlib import Path +from unittest.mock import patch, MagicMock + +import pytest + +from agent.auxiliary_client import ( + get_text_auxiliary_client, + get_vision_auxiliary_client, + auxiliary_max_tokens_param, + _read_codex_access_token, +) + + +@pytest.fixture(autouse=True) +def _clean_env(monkeypatch): + """Strip provider env vars so each test starts clean.""" + for key in ( + "OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY", + "OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL", + ): + monkeypatch.delenv(key, raising=False) + + +@pytest.fixture +def codex_auth_dir(tmp_path, monkeypatch): + """Provide a writable ~/.codex/ directory with a valid auth.json.""" + codex_dir = tmp_path / ".codex" + codex_dir.mkdir() + auth_file = codex_dir / "auth.json" + auth_file.write_text(json.dumps({ + "tokens": { + "access_token": "codex-test-token-abc123", + "refresh_token": "codex-refresh-xyz", + } + })) + monkeypatch.setattr( + "agent.auxiliary_client._read_codex_access_token", + lambda: "codex-test-token-abc123", + ) + return codex_dir + + +class TestReadCodexAccessToken: + def test_valid_auth_file(self, tmp_path): + codex_dir = tmp_path / ".codex" + codex_dir.mkdir() + auth = codex_dir / "auth.json" + auth.write_text(json.dumps({ + "tokens": {"access_token": "tok-123", "refresh_token": "r-456"} + })) + with patch("agent.auxiliary_client.Path.home", return_value=tmp_path): + result = _read_codex_access_token() + assert result == "tok-123" + + def test_missing_file_returns_none(self, tmp_path): + with patch("agent.auxiliary_client.Path.home", return_value=tmp_path): + result = _read_codex_access_token() + assert result is None + + def test_empty_token_returns_none(self, tmp_path): + codex_dir = tmp_path / ".codex" + codex_dir.mkdir() + auth = codex_dir / "auth.json" + auth.write_text(json.dumps({"tokens": {"access_token": " "}})) + with patch("agent.auxiliary_client.Path.home", return_value=tmp_path): + result = _read_codex_access_token() + assert result is None + + def test_malformed_json_returns_none(self, tmp_path): + codex_dir = tmp_path / ".codex" + codex_dir.mkdir() + (codex_dir / "auth.json").write_text("{bad json") + with patch("agent.auxiliary_client.Path.home", return_value=tmp_path): + result = _read_codex_access_token() + assert result is None + + def test_missing_tokens_key_returns_none(self, tmp_path): + codex_dir = tmp_path / ".codex" + codex_dir.mkdir() + (codex_dir / "auth.json").write_text(json.dumps({"other": "data"})) + with patch("agent.auxiliary_client.Path.home", return_value=tmp_path): + result = _read_codex_access_token() + assert result is None + + +class TestGetTextAuxiliaryClient: + """Test the full resolution chain for get_text_auxiliary_client.""" + + def test_openrouter_takes_priority(self, monkeypatch, codex_auth_dir): + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") + with patch("agent.auxiliary_client.OpenAI") as mock_openai: + client, model = get_text_auxiliary_client() + assert model == "google/gemini-3-flash-preview" + mock_openai.assert_called_once() + call_kwargs = mock_openai.call_args + assert call_kwargs.kwargs["api_key"] == "or-key" + + def test_nous_takes_priority_over_codex(self, monkeypatch, codex_auth_dir): + with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \ + patch("agent.auxiliary_client.OpenAI") as mock_openai: + mock_nous.return_value = {"access_token": "nous-tok"} + client, model = get_text_auxiliary_client() + assert model == "gemini-3-flash" + + def test_custom_endpoint_over_codex(self, monkeypatch, codex_auth_dir): + monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1") + monkeypatch.setenv("OPENAI_API_KEY", "lm-studio-key") + # Override the autouse monkeypatch for codex + monkeypatch.setattr( + "agent.auxiliary_client._read_codex_access_token", + lambda: "codex-test-token-abc123", + ) + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client.OpenAI") as mock_openai: + client, model = get_text_auxiliary_client() + assert model == "gpt-4o-mini" + call_kwargs = mock_openai.call_args + assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1" + + def test_codex_fallback_when_nothing_else(self, codex_auth_dir): + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client.OpenAI") as mock_openai: + client, model = get_text_auxiliary_client() + assert model == "gpt-5.3-codex" + # Returns a CodexAuxiliaryClient wrapper, not a raw OpenAI client + from agent.auxiliary_client import CodexAuxiliaryClient + assert isinstance(client, CodexAuxiliaryClient) + + def test_returns_none_when_nothing_available(self): + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client._read_codex_access_token", return_value=None): + client, model = get_text_auxiliary_client() + assert client is None + assert model is None + + +class TestCodexNotInVisionClient: + """Codex fallback should NOT apply to vision tasks.""" + + def test_vision_returns_none_without_openrouter_nous(self): + with patch("agent.auxiliary_client._read_nous_auth", return_value=None): + client, model = get_vision_auxiliary_client() + assert client is None + assert model is None + + +class TestAuxiliaryMaxTokensParam: + def test_codex_fallback_uses_max_tokens(self, monkeypatch): + """Codex adapter translates max_tokens internally, so we return max_tokens.""" + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client._read_codex_access_token", return_value="tok"): + result = auxiliary_max_tokens_param(1024) + assert result == {"max_tokens": 1024} + + def test_openrouter_uses_max_tokens(self, monkeypatch): + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") + result = auxiliary_max_tokens_param(1024) + assert result == {"max_tokens": 1024} + + def test_no_provider_uses_max_tokens(self): + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client._read_codex_access_token", return_value=None): + result = auxiliary_max_tokens_param(1024) + assert result == {"max_tokens": 1024} diff --git a/tests/agent/test_redact.py b/tests/agent/test_redact.py new file mode 100644 index 000000000..52e015ca9 --- /dev/null +++ b/tests/agent/test_redact.py @@ -0,0 +1,173 @@ +"""Tests for agent.redact -- secret masking in logs and output.""" + +import logging + +import pytest + +from agent.redact import redact_sensitive_text, RedactingFormatter + + +class TestKnownPrefixes: + def test_openai_sk_key(self): + text = "Using key sk-proj-abc123def456ghi789jkl012" + result = redact_sensitive_text(text) + assert "sk-pro" in result + assert "abc123def456" not in result + assert "..." in result + + def test_openrouter_sk_key(self): + text = "OPENROUTER_API_KEY=sk-or-v1-abcdefghijklmnopqrstuvwxyz1234567890" + result = redact_sensitive_text(text) + assert "abcdefghijklmnop" not in result + + def test_github_pat_classic(self): + result = redact_sensitive_text("token: ghp_abc123def456ghi789jkl") + assert "abc123def456" not in result + + def test_github_pat_fine_grained(self): + result = redact_sensitive_text("github_pat_abc123def456ghi789jklmno") + assert "abc123def456" not in result + + def test_slack_token(self): + token = "xoxb-" + "0" * 12 + "-" + "a" * 14 + result = redact_sensitive_text(token) + assert "a" * 14 not in result + + def test_google_api_key(self): + result = redact_sensitive_text("AIzaSyB-abc123def456ghi789jklmno012345") + assert "abc123def456" not in result + + def test_perplexity_key(self): + result = redact_sensitive_text("pplx-abcdef123456789012345") + assert "abcdef12345" not in result + + def test_fal_key(self): + result = redact_sensitive_text("fal_abc123def456ghi789jkl") + assert "abc123def456" not in result + + def test_short_token_fully_masked(self): + result = redact_sensitive_text("key=sk-short1234567") + assert "***" in result + + +class TestEnvAssignments: + def test_export_api_key(self): + text = "export OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012" + result = redact_sensitive_text(text) + assert "OPENAI_API_KEY=" in result + assert "abc123def456" not in result + + def test_quoted_value(self): + text = 'MY_SECRET_TOKEN="supersecretvalue123456789"' + result = redact_sensitive_text(text) + assert "MY_SECRET_TOKEN=" in result + assert "supersecretvalue" not in result + + def test_non_secret_env_unchanged(self): + text = "HOME=/home/user" + result = redact_sensitive_text(text) + assert result == text + + def test_path_unchanged(self): + text = "PATH=/usr/local/bin:/usr/bin" + result = redact_sensitive_text(text) + assert result == text + + +class TestJsonFields: + def test_json_api_key(self): + text = '{"apiKey": "sk-proj-abc123def456ghi789jkl012"}' + result = redact_sensitive_text(text) + assert "abc123def456" not in result + + def test_json_token(self): + text = '{"access_token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.longtoken.here"}' + result = redact_sensitive_text(text) + assert "eyJhbGciOiJSUzI1NiIs" not in result + + def test_json_non_secret_unchanged(self): + text = '{"name": "John", "model": "gpt-4"}' + result = redact_sensitive_text(text) + assert result == text + + +class TestAuthHeaders: + def test_bearer_token(self): + text = "Authorization: Bearer sk-proj-abc123def456ghi789jkl012" + result = redact_sensitive_text(text) + assert "Authorization: Bearer" in result + assert "abc123def456" not in result + + def test_case_insensitive(self): + text = "authorization: bearer mytoken123456789012345678" + result = redact_sensitive_text(text) + assert "mytoken12345" not in result + + +class TestTelegramTokens: + def test_bot_token(self): + text = "bot123456789:ABCDEfghij-KLMNopqrst_UVWXyz12345" + result = redact_sensitive_text(text) + assert "ABCDEfghij" not in result + assert "123456789:***" in result + + def test_raw_token(self): + text = "12345678901:ABCDEfghijKLMNopqrstUVWXyz1234567890" + result = redact_sensitive_text(text) + assert "ABCDEfghij" not in result + + +class TestPassthrough: + def test_empty_string(self): + assert redact_sensitive_text("") == "" + + def test_none_returns_none(self): + assert redact_sensitive_text(None) is None + + def test_normal_text_unchanged(self): + text = "Hello world, this is a normal log message with no secrets." + assert redact_sensitive_text(text) == text + + def test_code_unchanged(self): + text = "def main():\n print('hello')\n return 42" + assert redact_sensitive_text(text) == text + + def test_url_without_key_unchanged(self): + text = "Connecting to https://api.openai.com/v1/chat/completions" + assert redact_sensitive_text(text) == text + + +class TestRedactingFormatter: + def test_formats_and_redacts(self): + formatter = RedactingFormatter("%(message)s") + record = logging.LogRecord( + name="test", level=logging.INFO, pathname="", lineno=0, + msg="Key is sk-proj-abc123def456ghi789jkl012", + args=(), exc_info=None, + ) + result = formatter.format(record) + assert "abc123def456" not in result + assert "sk-pro" in result + + +class TestPrintenvSimulation: + """Simulate what happens when the agent runs `env` or `printenv`.""" + + def test_full_env_dump(self): + env_dump = """HOME=/home/user +PATH=/usr/local/bin:/usr/bin +OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012mno345 +OPENROUTER_API_KEY=sk-or-v1-reallyLongSecretKeyValue12345678 +FIRECRAWL_API_KEY=fc-shortkey123456789012 +TELEGRAM_BOT_TOKEN=bot987654321:ABCDEfghij-KLMNopqrst_UVWXyz12345 +SHELL=/bin/bash +USER=teknium""" + result = redact_sensitive_text(env_dump) + # Secrets should be masked + assert "abc123def456" not in result + assert "reallyLongSecretKey" not in result + assert "ABCDEfghij" not in result + # Non-secrets should survive + assert "HOME=/home/user" in result + assert "SHELL=/bin/bash" in result + assert "USER=teknium" in result diff --git a/tests/agent/test_subagent_progress.py b/tests/agent/test_subagent_progress.py new file mode 100644 index 000000000..b6e5e7525 --- /dev/null +++ b/tests/agent/test_subagent_progress.py @@ -0,0 +1,374 @@ +""" +Tests for subagent progress relay (issue #169). + +Verifies that: +- KawaiiSpinner.print_above() works with and without active spinner +- _build_child_progress_callback handles CLI/gateway/no-display paths +- Thinking events are relayed correctly +- Parallel callbacks don't share state +""" + +import io +import sys +import time +import threading +import pytest +from unittest.mock import MagicMock, patch + +from agent.display import KawaiiSpinner +from tools.delegate_tool import _build_child_progress_callback + + +# ========================================================================= +# KawaiiSpinner.print_above tests +# ========================================================================= + +class TestPrintAbove: + """Tests for KawaiiSpinner.print_above method.""" + + def test_print_above_without_spinner_running(self): + """print_above should write to stdout even when spinner is not running.""" + buf = io.StringIO() + spinner = KawaiiSpinner("test") + spinner._out = buf # Redirect to buffer + + spinner.print_above("hello world") + output = buf.getvalue() + assert "hello world" in output + + def test_print_above_with_spinner_running(self): + """print_above should clear spinner line and print text.""" + buf = io.StringIO() + spinner = KawaiiSpinner("test") + spinner._out = buf + spinner.running = True # Pretend spinner is running (don't start thread) + + spinner.print_above("tool line") + output = buf.getvalue() + assert "tool line" in output + assert "\r" in output # Should start with carriage return to clear spinner line + + def test_print_above_uses_captured_stdout(self): + """print_above should use self._out, not sys.stdout. + This ensures it works inside redirect_stdout(devnull).""" + buf = io.StringIO() + spinner = KawaiiSpinner("test") + spinner._out = buf + + # Simulate redirect_stdout(devnull) + old_stdout = sys.stdout + sys.stdout = io.StringIO() + try: + spinner.print_above("should go to buf") + finally: + sys.stdout = old_stdout + + assert "should go to buf" in buf.getvalue() + + +# ========================================================================= +# _build_child_progress_callback tests +# ========================================================================= + +class TestBuildChildProgressCallback: + """Tests for child progress callback builder.""" + + def test_returns_none_when_no_display(self): + """Should return None when parent has no spinner or callback.""" + parent = MagicMock() + parent._delegate_spinner = None + parent.tool_progress_callback = None + + cb = _build_child_progress_callback(0, parent) + assert cb is None + + def test_cli_spinner_tool_event(self): + """Should print tool line above spinner for CLI path.""" + buf = io.StringIO() + spinner = KawaiiSpinner("delegating") + spinner._out = buf + spinner.running = True + + parent = MagicMock() + parent._delegate_spinner = spinner + parent.tool_progress_callback = None + + cb = _build_child_progress_callback(0, parent) + assert cb is not None + + cb("web_search", "quantum computing") + output = buf.getvalue() + assert "web_search" in output + assert "quantum computing" in output + assert "├─" in output + + def test_cli_spinner_thinking_event(self): + """Should print thinking line above spinner for CLI path.""" + buf = io.StringIO() + spinner = KawaiiSpinner("delegating") + spinner._out = buf + spinner.running = True + + parent = MagicMock() + parent._delegate_spinner = spinner + parent.tool_progress_callback = None + + cb = _build_child_progress_callback(0, parent) + cb("_thinking", "I'll search for papers first") + + output = buf.getvalue() + assert "💭" in output + assert "search for papers" in output + + def test_gateway_batched_progress(self): + """Gateway path should batch tool calls and flush at BATCH_SIZE.""" + parent = MagicMock() + parent._delegate_spinner = None + parent_cb = MagicMock() + parent.tool_progress_callback = parent_cb + + cb = _build_child_progress_callback(0, parent) + + # Send 4 tool calls — shouldn't flush yet (BATCH_SIZE = 5) + for i in range(4): + cb(f"tool_{i}", f"arg_{i}") + parent_cb.assert_not_called() + + # 5th call should trigger flush + cb("tool_4", "arg_4") + parent_cb.assert_called_once() + call_args = parent_cb.call_args + assert "tool_0" in call_args[0][1] + assert "tool_4" in call_args[0][1] + + def test_thinking_not_relayed_to_gateway(self): + """Thinking events should NOT be sent to gateway (too noisy).""" + parent = MagicMock() + parent._delegate_spinner = None + parent_cb = MagicMock() + parent.tool_progress_callback = parent_cb + + cb = _build_child_progress_callback(0, parent) + cb("_thinking", "some reasoning text") + + parent_cb.assert_not_called() + + def test_parallel_callbacks_independent(self): + """Each child's callback should have independent batch state.""" + parent = MagicMock() + parent._delegate_spinner = None + parent_cb = MagicMock() + parent.tool_progress_callback = parent_cb + + cb0 = _build_child_progress_callback(0, parent) + cb1 = _build_child_progress_callback(1, parent) + + # Send 3 calls to each — neither should flush (batch size = 5) + for i in range(3): + cb0(f"tool_{i}") + cb1(f"other_{i}") + + parent_cb.assert_not_called() + + def test_task_index_prefix_in_batch_mode(self): + """Batch mode (task_count > 1) should show 1-indexed prefix for all tasks.""" + buf = io.StringIO() + spinner = KawaiiSpinner("delegating") + spinner._out = buf + spinner.running = True + + parent = MagicMock() + parent._delegate_spinner = spinner + parent.tool_progress_callback = None + + # task_index=0 in a batch of 3 → prefix "[1]" + cb0 = _build_child_progress_callback(0, parent, task_count=3) + cb0("web_search", "test") + output = buf.getvalue() + assert "[1]" in output + + # task_index=2 in a batch of 3 → prefix "[3]" + buf.truncate(0) + buf.seek(0) + cb2 = _build_child_progress_callback(2, parent, task_count=3) + cb2("web_search", "test") + output = buf.getvalue() + assert "[3]" in output + + def test_single_task_no_prefix(self): + """Single task (task_count=1) should not show index prefix.""" + buf = io.StringIO() + spinner = KawaiiSpinner("delegating") + spinner._out = buf + spinner.running = True + + parent = MagicMock() + parent._delegate_spinner = spinner + parent.tool_progress_callback = None + + cb = _build_child_progress_callback(0, parent, task_count=1) + cb("web_search", "test") + + output = buf.getvalue() + assert "[" not in output + + +# ========================================================================= +# Integration: thinking callback in run_agent.py +# ========================================================================= + +class TestThinkingCallback: + """Tests for the _thinking callback in AIAgent conversation loop.""" + + def _simulate_thinking_callback(self, content, callback, delegate_depth=1): + """Simulate the exact code path from run_agent.py for the thinking callback. + + delegate_depth: simulates self._delegate_depth. + 0 = main agent (should NOT fire), >=1 = subagent (should fire). + """ + import re + if (content and callback and delegate_depth > 0): + _think_text = content.strip() + _think_text = re.sub( + r'', '', _think_text + ).strip() + first_line = _think_text.split('\n')[0][:80] if _think_text else "" + if first_line: + try: + callback("_thinking", first_line) + except Exception: + pass + + def test_thinking_callback_fires_on_content(self): + """tool_progress_callback should receive _thinking event + when assistant message has content.""" + calls = [] + self._simulate_thinking_callback( + "I'll research quantum computing first, then summarize.", + lambda name, preview=None: calls.append((name, preview)) + ) + assert len(calls) == 1 + assert calls[0][0] == "_thinking" + assert "quantum computing" in calls[0][1] + + def test_thinking_callback_skipped_when_no_content(self): + """Should not fire when assistant has no content.""" + calls = [] + self._simulate_thinking_callback( + None, + lambda name, preview=None: calls.append((name, preview)) + ) + assert len(calls) == 0 + + def test_thinking_callback_truncates_long_content(self): + """Should truncate long content to 80 chars.""" + calls = [] + self._simulate_thinking_callback( + "A" * 200 + "\nSecond line should be ignored", + lambda name, preview=None: calls.append((name, preview)) + ) + assert len(calls) == 1 + assert len(calls[0][1]) == 80 + + def test_thinking_callback_skipped_for_main_agent(self): + """Main agent (delegate_depth=0) should NOT fire thinking events. + This prevents gateway spam on Telegram/Discord.""" + calls = [] + self._simulate_thinking_callback( + "I'll help you with that request.", + lambda name, preview=None: calls.append((name, preview)), + delegate_depth=0, + ) + assert len(calls) == 0 + + def test_thinking_callback_strips_reasoning_scratchpad(self): + """REASONING_SCRATCHPAD tags should be stripped before display.""" + calls = [] + self._simulate_thinking_callback( + "I need to analyze this carefully", + lambda name, preview=None: calls.append((name, preview)) + ) + assert len(calls) == 1 + assert "" not in calls[0][1] + assert "analyze this carefully" in calls[0][1] + + def test_thinking_callback_strips_think_tags(self): + """ tags should be stripped before display.""" + calls = [] + self._simulate_thinking_callback( + "Let me think about this problem", + lambda name, preview=None: calls.append((name, preview)) + ) + assert len(calls) == 1 + assert "" not in calls[0][1] + assert "think about this problem" in calls[0][1] + + def test_thinking_callback_empty_after_strip(self): + """Should not fire when content is only XML tags.""" + calls = [] + self._simulate_thinking_callback( + "", + lambda name, preview=None: calls.append((name, preview)) + ) + assert len(calls) == 0 + + +# ========================================================================= +# Gateway batch flush tests +# ========================================================================= + +class TestBatchFlush: + """Tests for gateway batch flush on subagent completion.""" + + def test_flush_sends_remaining_batch(self): + """_flush should send remaining tool names to gateway.""" + parent = MagicMock() + parent._delegate_spinner = None + parent_cb = MagicMock() + parent.tool_progress_callback = parent_cb + + cb = _build_child_progress_callback(0, parent) + + # Send 3 tools (below batch size of 5) + cb("web_search", "query1") + cb("read_file", "file.txt") + cb("write_file", "out.txt") + parent_cb.assert_not_called() + + # Flush should send the remaining 3 + cb._flush() + parent_cb.assert_called_once() + summary = parent_cb.call_args[0][1] + assert "web_search" in summary + assert "write_file" in summary + + def test_flush_noop_when_batch_empty(self): + """_flush should not send anything when batch is empty.""" + parent = MagicMock() + parent._delegate_spinner = None + parent_cb = MagicMock() + parent.tool_progress_callback = parent_cb + + cb = _build_child_progress_callback(0, parent) + cb._flush() + parent_cb.assert_not_called() + + def test_flush_noop_when_no_parent_callback(self): + """_flush should not crash when there's no parent callback.""" + buf = io.StringIO() + spinner = KawaiiSpinner("test") + spinner._out = buf + spinner.running = True + + parent = MagicMock() + parent._delegate_spinner = spinner + parent.tool_progress_callback = None + + cb = _build_child_progress_callback(0, parent) + cb("web_search", "test") + cb._flush() # Should not crash + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) + diff --git a/tests/gateway/test_media_extraction.py b/tests/gateway/test_media_extraction.py new file mode 100644 index 000000000..20f7d73a8 --- /dev/null +++ b/tests/gateway/test_media_extraction.py @@ -0,0 +1,184 @@ +""" +Tests for MEDIA tag extraction from tool results. + +Verifies that MEDIA tags (e.g., from TTS tool) are only extracted from +messages in the CURRENT turn, not from the full conversation history. +This prevents voice messages from accumulating and being sent multiple +times per reply. (Regression test for #160) +""" + +import pytest +import re + + +def extract_media_tags_fixed(result_messages, history_len): + """ + Extract MEDIA tags from tool results, but ONLY from new messages + (those added after history_len). This is the fixed behavior. + + Args: + result_messages: Full list of messages including history + new + history_len: Length of history before this turn + + Returns: + Tuple of (media_tags list, has_voice_directive bool) + """ + media_tags = [] + has_voice_directive = False + + # Only process new messages from this turn + new_messages = result_messages[history_len:] if len(result_messages) > history_len else [] + + for msg in new_messages: + if msg.get("role") == "tool" or msg.get("role") == "function": + content = msg.get("content", "") + if "MEDIA:" in content: + for match in re.finditer(r'MEDIA:(\S+)', content): + path = match.group(1).strip().rstrip('",}') + if path: + media_tags.append(f"MEDIA:{path}") + if "[[audio_as_voice]]" in content: + has_voice_directive = True + + return media_tags, has_voice_directive + + +def extract_media_tags_broken(result_messages): + """ + The BROKEN behavior: extract MEDIA tags from ALL messages including history. + This causes TTS voice messages to accumulate and be re-sent on every reply. + """ + media_tags = [] + has_voice_directive = False + + for msg in result_messages: + if msg.get("role") == "tool" or msg.get("role") == "function": + content = msg.get("content", "") + if "MEDIA:" in content: + for match in re.finditer(r'MEDIA:(\S+)', content): + path = match.group(1).strip().rstrip('",}') + if path: + media_tags.append(f"MEDIA:{path}") + if "[[audio_as_voice]]" in content: + has_voice_directive = True + + return media_tags, has_voice_directive + + +class TestMediaExtraction: + """Tests for MEDIA tag extraction from tool results.""" + + def test_media_tags_not_extracted_from_history(self): + """MEDIA tags from previous turns should NOT be extracted again.""" + # Simulate conversation history with a TTS call from a previous turn + history = [ + {"role": "user", "content": "Say hello as audio"}, + {"role": "assistant", "content": None, "tool_calls": [{"id": "1", "function": {"name": "text_to_speech"}}]}, + {"role": "tool", "tool_call_id": "1", "content": '{"success": true, "media_tag": "[[audio_as_voice]]\\nMEDIA:/path/to/audio1.ogg"}'}, + {"role": "assistant", "content": "I've said hello for you!"}, + ] + + # New turn: user asks a simple question + new_messages = [ + {"role": "user", "content": "What time is it?"}, + {"role": "assistant", "content": "It's 3:30 AM."}, + ] + + all_messages = history + new_messages + history_len = len(history) + + # Fixed behavior: should extract NO media tags (none in new messages) + tags, voice_directive = extract_media_tags_fixed(all_messages, history_len) + assert tags == [], "Fixed extraction should not find tags in history" + assert voice_directive is False + + # Broken behavior: would incorrectly extract the old media tag + broken_tags, broken_voice = extract_media_tags_broken(all_messages) + assert len(broken_tags) == 1, "Broken extraction finds tags in history" + assert "audio1.ogg" in broken_tags[0] + + def test_media_tags_extracted_from_current_turn(self): + """MEDIA tags from the current turn SHOULD be extracted.""" + # History without TTS + history = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi there!"}, + ] + + # New turn with TTS call + new_messages = [ + {"role": "user", "content": "Say goodbye as audio"}, + {"role": "assistant", "content": None, "tool_calls": [{"id": "2", "function": {"name": "text_to_speech"}}]}, + {"role": "tool", "tool_call_id": "2", "content": '{"success": true, "media_tag": "[[audio_as_voice]]\\nMEDIA:/path/to/audio2.ogg"}'}, + {"role": "assistant", "content": "I've said goodbye!"}, + ] + + all_messages = history + new_messages + history_len = len(history) + + # Fixed behavior: should extract the new media tag + tags, voice_directive = extract_media_tags_fixed(all_messages, history_len) + assert len(tags) == 1, "Should extract media tag from current turn" + assert "audio2.ogg" in tags[0] + assert voice_directive is True + + def test_multiple_tts_calls_in_history_not_accumulated(self): + """Multiple TTS calls in history should NOT accumulate in new responses.""" + # History with multiple TTS calls + history = [ + {"role": "user", "content": "Say hello"}, + {"role": "tool", "tool_call_id": "1", "content": 'MEDIA:/audio/hello.ogg'}, + {"role": "assistant", "content": "Done!"}, + {"role": "user", "content": "Say goodbye"}, + {"role": "tool", "tool_call_id": "2", "content": 'MEDIA:/audio/goodbye.ogg'}, + {"role": "assistant", "content": "Done!"}, + {"role": "user", "content": "Say thanks"}, + {"role": "tool", "tool_call_id": "3", "content": 'MEDIA:/audio/thanks.ogg'}, + {"role": "assistant", "content": "Done!"}, + ] + + # New turn: no TTS + new_messages = [ + {"role": "user", "content": "What time is it?"}, + {"role": "assistant", "content": "3 PM"}, + ] + + all_messages = history + new_messages + history_len = len(history) + + # Fixed: no tags + tags, _ = extract_media_tags_fixed(all_messages, history_len) + assert tags == [], "Should not accumulate tags from history" + + # Broken: would have 3 tags (all the old ones) + broken_tags, _ = extract_media_tags_broken(all_messages) + assert len(broken_tags) == 3, "Broken version accumulates all history tags" + + def test_deduplication_within_current_turn(self): + """Multiple MEDIA tags in current turn should be deduplicated.""" + history = [] + + # Current turn with multiple tool calls producing same media + new_messages = [ + {"role": "user", "content": "Multiple TTS"}, + {"role": "tool", "tool_call_id": "1", "content": 'MEDIA:/audio/same.ogg'}, + {"role": "tool", "tool_call_id": "2", "content": 'MEDIA:/audio/same.ogg'}, # duplicate + {"role": "tool", "tool_call_id": "3", "content": 'MEDIA:/audio/different.ogg'}, + {"role": "assistant", "content": "Done!"}, + ] + + all_messages = history + new_messages + + tags, _ = extract_media_tags_fixed(all_messages, 0) + # Even though same.ogg appears twice, deduplication happens after extraction + # The extraction itself should get both, then caller deduplicates + assert len(tags) == 3 # Raw extraction gets all + + # Deduplication as done in the actual code: + seen = set() + unique = [t for t in tags if t not in seen and not seen.add(t)] + assert len(unique) == 2 # After dedup: same.ogg and different.ogg + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tests/test_auth_codex_provider.py b/tests/test_auth_codex_provider.py new file mode 100644 index 000000000..7d3076807 --- /dev/null +++ b/tests/test_auth_codex_provider.py @@ -0,0 +1,210 @@ +import json +import time +import base64 +from contextlib import contextmanager +from pathlib import Path +from types import SimpleNamespace + +import pytest +import yaml + +from hermes_cli.auth import ( + AuthError, + DEFAULT_CODEX_BASE_URL, + PROVIDER_REGISTRY, + _persist_codex_auth_payload, + _login_openai_codex, + login_command, + get_codex_auth_status, + get_provider_auth_state, + read_codex_auth_file, + resolve_codex_runtime_credentials, + resolve_provider, +) + + +def _write_codex_auth(codex_home: Path, *, access_token: str = "access", refresh_token: str = "refresh") -> Path: + codex_home.mkdir(parents=True, exist_ok=True) + auth_file = codex_home / "auth.json" + auth_file.write_text( + json.dumps( + { + "auth_mode": "oauth", + "last_refresh": "2026-02-26T00:00:00Z", + "tokens": { + "access_token": access_token, + "refresh_token": refresh_token, + }, + } + ) + ) + return auth_file + + +def _jwt_with_exp(exp_epoch: int) -> str: + payload = {"exp": exp_epoch} + encoded = base64.urlsafe_b64encode(json.dumps(payload).encode("utf-8")).rstrip(b"=").decode("utf-8") + return f"h.{encoded}.s" + + +def test_read_codex_auth_file_success(tmp_path, monkeypatch): + codex_home = tmp_path / "codex-home" + auth_file = _write_codex_auth(codex_home) + monkeypatch.setenv("CODEX_HOME", str(codex_home)) + + payload = read_codex_auth_file() + + assert payload["auth_path"] == auth_file + assert payload["tokens"]["access_token"] == "access" + assert payload["tokens"]["refresh_token"] == "refresh" + + +def test_resolve_codex_runtime_credentials_missing_access_token(tmp_path, monkeypatch): + codex_home = tmp_path / "codex-home" + _write_codex_auth(codex_home, access_token="") + monkeypatch.setenv("CODEX_HOME", str(codex_home)) + + with pytest.raises(AuthError) as exc: + resolve_codex_runtime_credentials() + + assert exc.value.code == "codex_auth_missing_access_token" + assert exc.value.relogin_required is True + + +def test_resolve_codex_runtime_credentials_refreshes_expiring_token(tmp_path, monkeypatch): + codex_home = tmp_path / "codex-home" + expiring_token = _jwt_with_exp(int(time.time()) - 10) + _write_codex_auth(codex_home, access_token=expiring_token, refresh_token="refresh-old") + monkeypatch.setenv("CODEX_HOME", str(codex_home)) + + called = {"count": 0} + + def _fake_refresh(*, payload, auth_path, timeout_seconds, lock_held=False): + called["count"] += 1 + assert auth_path == codex_home / "auth.json" + assert lock_held is True + return {"access_token": "access-new", "refresh_token": "refresh-new"} + + monkeypatch.setattr("hermes_cli.auth._refresh_codex_auth_tokens", _fake_refresh) + + resolved = resolve_codex_runtime_credentials() + + assert called["count"] == 1 + assert resolved["api_key"] == "access-new" + + +def test_resolve_codex_runtime_credentials_force_refresh(tmp_path, monkeypatch): + codex_home = tmp_path / "codex-home" + _write_codex_auth(codex_home, access_token="access-current", refresh_token="refresh-old") + monkeypatch.setenv("CODEX_HOME", str(codex_home)) + + called = {"count": 0} + + def _fake_refresh(*, payload, auth_path, timeout_seconds, lock_held=False): + called["count"] += 1 + assert lock_held is True + return {"access_token": "access-forced", "refresh_token": "refresh-new"} + + monkeypatch.setattr("hermes_cli.auth._refresh_codex_auth_tokens", _fake_refresh) + + resolved = resolve_codex_runtime_credentials(force_refresh=True, refresh_if_expiring=False) + + assert called["count"] == 1 + assert resolved["api_key"] == "access-forced" + + +def test_resolve_codex_runtime_credentials_uses_file_lock_on_refresh(tmp_path, monkeypatch): + codex_home = tmp_path / "codex-home" + _write_codex_auth(codex_home, access_token="access-current", refresh_token="refresh-old") + monkeypatch.setenv("CODEX_HOME", str(codex_home)) + + lock_calls = {"enter": 0, "exit": 0} + + @contextmanager + def _fake_lock(auth_path, timeout_seconds=15.0): + assert auth_path == codex_home / "auth.json" + lock_calls["enter"] += 1 + try: + yield + finally: + lock_calls["exit"] += 1 + + refresh_calls = {"count": 0} + + def _fake_refresh(*, payload, auth_path, timeout_seconds, lock_held=False): + refresh_calls["count"] += 1 + assert lock_held is True + return {"access_token": "access-updated", "refresh_token": "refresh-updated"} + + monkeypatch.setattr("hermes_cli.auth._codex_auth_file_lock", _fake_lock) + monkeypatch.setattr("hermes_cli.auth._refresh_codex_auth_tokens", _fake_refresh) + + resolved = resolve_codex_runtime_credentials(force_refresh=True, refresh_if_expiring=False) + + assert refresh_calls["count"] == 1 + assert lock_calls["enter"] == 1 + assert lock_calls["exit"] == 1 + assert resolved["api_key"] == "access-updated" + + +def test_resolve_provider_explicit_codex_does_not_fallback(monkeypatch): + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + assert resolve_provider("openai-codex") == "openai-codex" + + +def test_persist_codex_auth_payload_writes_atomically(tmp_path): + auth_path = tmp_path / "auth.json" + auth_path.write_text('{"stale":true}\n') + payload = { + "auth_mode": "oauth", + "tokens": { + "access_token": "next-access", + "refresh_token": "next-refresh", + }, + "last_refresh": "2026-02-26T00:00:00Z", + } + + _persist_codex_auth_payload(auth_path, payload) + + stored = json.loads(auth_path.read_text()) + assert stored == payload + assert list(tmp_path.glob(".auth.json.*.tmp")) == [] + + +def test_get_codex_auth_status_not_logged_in(tmp_path, monkeypatch): + monkeypatch.setenv("CODEX_HOME", str(tmp_path / "missing-codex-home")) + status = get_codex_auth_status() + assert status["logged_in"] is False + assert "error" in status + + +def test_login_openai_codex_persists_provider_state(tmp_path, monkeypatch): + hermes_home = tmp_path / "hermes-home" + codex_home = tmp_path / "codex-home" + _write_codex_auth(codex_home) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.setenv("CODEX_HOME", str(codex_home)) + # Mock input() to accept existing credentials + monkeypatch.setattr("builtins.input", lambda _: "y") + + _login_openai_codex(SimpleNamespace(), PROVIDER_REGISTRY["openai-codex"]) + + state = get_provider_auth_state("openai-codex") + assert state is not None + assert state["source"] == "codex-auth-json" + assert state["auth_file"].endswith("auth.json") + + config_path = hermes_home / "config.yaml" + config = yaml.safe_load(config_path.read_text()) + assert config["model"]["provider"] == "openai-codex" + assert config["model"]["base_url"] == DEFAULT_CODEX_BASE_URL + + +def test_login_command_shows_deprecation(monkeypatch, capsys): + """login_command is deprecated and directs users to hermes model.""" + with pytest.raises(SystemExit) as exc_info: + login_command(SimpleNamespace()) + assert exc_info.value.code == 0 + captured = capsys.readouterr() + assert "hermes model" in captured.out diff --git a/tests/test_cli_init.py b/tests/test_cli_init.py new file mode 100644 index 000000000..90ce05c72 --- /dev/null +++ b/tests/test_cli_init.py @@ -0,0 +1,80 @@ +"""Tests for HermesCLI initialization -- catches configuration bugs +that only manifest at runtime (not in mocked unit tests).""" + +import os +import sys +from unittest.mock import patch, MagicMock + +import pytest + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + + +def _make_cli(**kwargs): + """Create a HermesCLI instance with minimal mocking.""" + from cli import HermesCLI + with patch("cli.get_tool_definitions", return_value=[]): + return HermesCLI(**kwargs) + + +class TestMaxTurnsResolution: + """max_turns must always resolve to a positive integer, never None.""" + + def test_default_max_turns_is_integer(self): + cli = _make_cli() + assert isinstance(cli.max_turns, int) + assert cli.max_turns > 0 + + def test_explicit_max_turns_honored(self): + cli = _make_cli(max_turns=25) + assert cli.max_turns == 25 + + def test_none_max_turns_gets_default(self): + cli = _make_cli(max_turns=None) + assert isinstance(cli.max_turns, int) + assert cli.max_turns > 0 + + def test_env_var_max_turns(self, monkeypatch): + """Env var is used when config file doesn't set max_turns.""" + monkeypatch.setenv("HERMES_MAX_ITERATIONS", "42") + import cli as cli_module + original = cli_module.CLI_CONFIG["agent"].get("max_turns") + cli_module.CLI_CONFIG["agent"]["max_turns"] = None + try: + cli_obj = _make_cli() + assert cli_obj.max_turns == 42 + finally: + if original is not None: + cli_module.CLI_CONFIG["agent"]["max_turns"] = original + + def test_max_turns_never_none_for_agent(self): + """The value passed to AIAgent must never be None (causes TypeError in run_conversation).""" + cli = _make_cli() + assert cli.max_turns is not None + + +class TestVerboseAndToolProgress: + def test_default_verbose_is_bool(self): + cli = _make_cli() + assert isinstance(cli.verbose, bool) + + def test_tool_progress_mode_is_string(self): + cli = _make_cli() + assert isinstance(cli.tool_progress_mode, str) + assert cli.tool_progress_mode in ("off", "new", "all", "verbose") + + +class TestProviderResolution: + def test_api_key_is_string_or_none(self): + cli = _make_cli() + assert cli.api_key is None or isinstance(cli.api_key, str) + + def test_base_url_is_string(self): + cli = _make_cli() + assert isinstance(cli.base_url, str) + assert cli.base_url.startswith("http") + + def test_model_is_string(self): + cli = _make_cli() + assert isinstance(cli.model, str) + assert len(cli.model) > 0 diff --git a/tests/test_cli_provider_resolution.py b/tests/test_cli_provider_resolution.py new file mode 100644 index 000000000..3c8fe14a5 --- /dev/null +++ b/tests/test_cli_provider_resolution.py @@ -0,0 +1,187 @@ +import importlib +import sys +import types +from contextlib import nullcontext +from types import SimpleNamespace + +from hermes_cli.auth import AuthError +from hermes_cli import main as hermes_main + + +def _install_prompt_toolkit_stubs(): + class _Dummy: + def __init__(self, *args, **kwargs): + pass + + class _Condition: + def __init__(self, func): + self.func = func + + def __bool__(self): + return bool(self.func()) + + class _ANSI(str): + pass + + root = types.ModuleType("prompt_toolkit") + history = types.ModuleType("prompt_toolkit.history") + styles = types.ModuleType("prompt_toolkit.styles") + patch_stdout = types.ModuleType("prompt_toolkit.patch_stdout") + application = types.ModuleType("prompt_toolkit.application") + layout = types.ModuleType("prompt_toolkit.layout") + processors = types.ModuleType("prompt_toolkit.layout.processors") + filters = types.ModuleType("prompt_toolkit.filters") + dimension = types.ModuleType("prompt_toolkit.layout.dimension") + menus = types.ModuleType("prompt_toolkit.layout.menus") + widgets = types.ModuleType("prompt_toolkit.widgets") + key_binding = types.ModuleType("prompt_toolkit.key_binding") + completion = types.ModuleType("prompt_toolkit.completion") + formatted_text = types.ModuleType("prompt_toolkit.formatted_text") + + history.FileHistory = _Dummy + styles.Style = _Dummy + patch_stdout.patch_stdout = lambda *args, **kwargs: nullcontext() + application.Application = _Dummy + layout.Layout = _Dummy + layout.HSplit = _Dummy + layout.Window = _Dummy + layout.FormattedTextControl = _Dummy + layout.ConditionalContainer = _Dummy + processors.Processor = _Dummy + processors.Transformation = _Dummy + processors.PasswordProcessor = _Dummy + processors.ConditionalProcessor = _Dummy + filters.Condition = _Condition + dimension.Dimension = _Dummy + menus.CompletionsMenu = _Dummy + widgets.TextArea = _Dummy + key_binding.KeyBindings = _Dummy + completion.Completer = _Dummy + completion.Completion = _Dummy + formatted_text.ANSI = _ANSI + root.print_formatted_text = lambda *args, **kwargs: None + + sys.modules.setdefault("prompt_toolkit", root) + sys.modules.setdefault("prompt_toolkit.history", history) + sys.modules.setdefault("prompt_toolkit.styles", styles) + sys.modules.setdefault("prompt_toolkit.patch_stdout", patch_stdout) + sys.modules.setdefault("prompt_toolkit.application", application) + sys.modules.setdefault("prompt_toolkit.layout", layout) + sys.modules.setdefault("prompt_toolkit.layout.processors", processors) + sys.modules.setdefault("prompt_toolkit.filters", filters) + sys.modules.setdefault("prompt_toolkit.layout.dimension", dimension) + sys.modules.setdefault("prompt_toolkit.layout.menus", menus) + sys.modules.setdefault("prompt_toolkit.widgets", widgets) + sys.modules.setdefault("prompt_toolkit.key_binding", key_binding) + sys.modules.setdefault("prompt_toolkit.completion", completion) + sys.modules.setdefault("prompt_toolkit.formatted_text", formatted_text) + + +def _import_cli(): + try: + importlib.import_module("prompt_toolkit") + except ModuleNotFoundError: + _install_prompt_toolkit_stubs() + return importlib.import_module("cli") + + +def test_hermes_cli_init_does_not_eagerly_resolve_runtime_provider(monkeypatch): + cli = _import_cli() + calls = {"count": 0} + + def _unexpected_runtime_resolve(**kwargs): + calls["count"] += 1 + raise AssertionError("resolve_runtime_provider should not be called in HermesCLI.__init__") + + monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _unexpected_runtime_resolve) + monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc)) + + shell = cli.HermesCLI(model="gpt-5", compact=True, max_turns=1) + + assert shell is not None + assert calls["count"] == 0 + + +def test_runtime_resolution_failure_is_not_sticky(monkeypatch): + cli = _import_cli() + calls = {"count": 0} + + def _runtime_resolve(**kwargs): + calls["count"] += 1 + if calls["count"] == 1: + raise RuntimeError("temporary auth failure") + return { + "provider": "openrouter", + "api_mode": "chat_completions", + "base_url": "https://openrouter.ai/api/v1", + "api_key": "test-key", + "source": "env/config", + } + + class _DummyAgent: + def __init__(self, *args, **kwargs): + self.kwargs = kwargs + + monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve) + monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc)) + monkeypatch.setattr(cli, "AIAgent", _DummyAgent) + + shell = cli.HermesCLI(model="gpt-5", compact=True, max_turns=1) + + assert shell._init_agent() is False + assert shell._init_agent() is True + assert calls["count"] == 2 + assert shell.agent is not None + + +def test_runtime_resolution_rebuilds_agent_on_routing_change(monkeypatch): + cli = _import_cli() + + def _runtime_resolve(**kwargs): + return { + "provider": "openai-codex", + "api_mode": "codex_responses", + "base_url": "https://same-endpoint.example/v1", + "api_key": "same-key", + "source": "env/config", + } + + monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve) + monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc)) + + shell = cli.HermesCLI(model="gpt-5", compact=True, max_turns=1) + shell.provider = "openrouter" + shell.api_mode = "chat_completions" + shell.base_url = "https://same-endpoint.example/v1" + shell.api_key = "same-key" + shell.agent = object() + + assert shell._ensure_runtime_credentials() is True + assert shell.agent is None + assert shell.provider == "openai-codex" + assert shell.api_mode == "codex_responses" + + +def test_cmd_model_falls_back_to_auto_on_invalid_provider(monkeypatch, capsys): + monkeypatch.setattr( + "hermes_cli.config.load_config", + lambda: {"model": {"default": "gpt-5", "provider": "invalid-provider"}}, + ) + monkeypatch.setattr("hermes_cli.config.save_config", lambda cfg: None) + monkeypatch.setattr("hermes_cli.config.get_env_value", lambda key: "") + monkeypatch.setattr("hermes_cli.config.save_env_value", lambda key, value: None) + + def _resolve_provider(requested, **kwargs): + if requested == "invalid-provider": + raise AuthError("Unknown provider 'invalid-provider'.", code="invalid_provider") + return "openrouter" + + monkeypatch.setattr("hermes_cli.auth.resolve_provider", _resolve_provider) + monkeypatch.setattr(hermes_main, "_prompt_provider_choice", lambda choices: len(choices) - 1) + + hermes_main.cmd_model(SimpleNamespace()) + output = capsys.readouterr().out + + assert "Warning:" in output + assert "falling back to auto provider detection" in output.lower() + assert "No change." in output diff --git a/tests/test_codex_execution_paths.py b/tests/test_codex_execution_paths.py new file mode 100644 index 000000000..ef24f02b5 --- /dev/null +++ b/tests/test_codex_execution_paths.py @@ -0,0 +1,180 @@ +import asyncio +import sys +import types +from types import SimpleNamespace + + +sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None)) +sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object)) +sys.modules.setdefault("fal_client", types.SimpleNamespace()) + +import cron.scheduler as cron_scheduler +import gateway.run as gateway_run +import run_agent +from gateway.config import Platform +from gateway.session import SessionSource + + +def _patch_agent_bootstrap(monkeypatch): + monkeypatch.setattr( + run_agent, + "get_tool_definitions", + lambda **kwargs: [ + { + "type": "function", + "function": { + "name": "terminal", + "description": "Run shell commands.", + "parameters": {"type": "object", "properties": {}}, + }, + } + ], + ) + monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {}) + + +def _codex_message_response(text: str): + return SimpleNamespace( + output=[ + SimpleNamespace( + type="message", + content=[SimpleNamespace(type="output_text", text=text)], + ) + ], + usage=SimpleNamespace(input_tokens=5, output_tokens=3, total_tokens=8), + status="completed", + model="gpt-5-codex", + ) + + +class _UnauthorizedError(RuntimeError): + def __init__(self): + super().__init__("Error code: 401 - unauthorized") + self.status_code = 401 + + +class _FakeOpenAI: + def __init__(self, **kwargs): + self.kwargs = kwargs + + def close(self): + return None + + +class _Codex401ThenSuccessAgent(run_agent.AIAgent): + refresh_attempts = 0 + last_init = {} + + def __init__(self, *args, **kwargs): + kwargs.setdefault("skip_context_files", True) + kwargs.setdefault("skip_memory", True) + kwargs.setdefault("max_iterations", 4) + type(self).last_init = dict(kwargs) + super().__init__(*args, **kwargs) + self._cleanup_task_resources = lambda task_id: None + self._persist_session = lambda messages, history=None: None + self._save_trajectory = lambda messages, user_message, completed: None + self._save_session_log = lambda messages: None + + def _try_refresh_codex_client_credentials(self, *, force: bool = True) -> bool: + type(self).refresh_attempts += 1 + return True + + def run_conversation(self, user_message: str, conversation_history=None): + calls = {"api": 0} + + def _fake_api_call(api_kwargs): + calls["api"] += 1 + if calls["api"] == 1: + raise _UnauthorizedError() + return _codex_message_response("Recovered via refresh") + + self._interruptible_api_call = _fake_api_call + return super().run_conversation(user_message, conversation_history=conversation_history) + + +def test_cron_run_job_codex_path_handles_internal_401_refresh(monkeypatch): + _patch_agent_bootstrap(monkeypatch) + monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI) + monkeypatch.setattr(run_agent, "AIAgent", _Codex401ThenSuccessAgent) + monkeypatch.setattr( + "hermes_cli.runtime_provider.resolve_runtime_provider", + lambda requested=None: { + "provider": "openai-codex", + "api_mode": "codex_responses", + "base_url": "https://chatgpt.com/backend-api/codex", + "api_key": "codex-token", + }, + ) + monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc)) + + _Codex401ThenSuccessAgent.refresh_attempts = 0 + _Codex401ThenSuccessAgent.last_init = {} + + success, output, final_response, error = cron_scheduler.run_job( + {"id": "job-1", "name": "Codex Refresh Test", "prompt": "ping"} + ) + + assert success is True + assert error is None + assert final_response == "Recovered via refresh" + assert "Recovered via refresh" in output + assert _Codex401ThenSuccessAgent.refresh_attempts == 1 + assert _Codex401ThenSuccessAgent.last_init["provider"] == "openai-codex" + assert _Codex401ThenSuccessAgent.last_init["api_mode"] == "codex_responses" + + +def test_gateway_run_agent_codex_path_handles_internal_401_refresh(monkeypatch): + _patch_agent_bootstrap(monkeypatch) + monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI) + monkeypatch.setattr(run_agent, "AIAgent", _Codex401ThenSuccessAgent) + monkeypatch.setattr( + gateway_run, + "_resolve_runtime_agent_kwargs", + lambda: { + "provider": "openai-codex", + "api_mode": "codex_responses", + "base_url": "https://chatgpt.com/backend-api/codex", + "api_key": "codex-token", + }, + ) + monkeypatch.setenv("HERMES_TOOL_PROGRESS", "false") + + _Codex401ThenSuccessAgent.refresh_attempts = 0 + _Codex401ThenSuccessAgent.last_init = {} + + runner = gateway_run.GatewayRunner.__new__(gateway_run.GatewayRunner) + runner.adapters = {} + runner._ephemeral_system_prompt = "" + runner._prefill_messages = [] + runner._reasoning_config = None + runner._running_agents = {} + from unittest.mock import MagicMock, AsyncMock + runner.hooks = MagicMock() + runner.hooks.emit = AsyncMock() + runner.hooks.loaded_hooks = [] + runner._session_db = None + + source = SessionSource( + platform=Platform.LOCAL, + chat_id="cli", + chat_name="CLI", + chat_type="dm", + user_id="user-1", + ) + + result = asyncio.run( + runner._run_agent( + message="ping", + context_prompt="", + history=[], + source=source, + session_id="session-1", + session_key="agent:main:local:dm", + ) + ) + + assert result["final_response"] == "Recovered via refresh" + assert _Codex401ThenSuccessAgent.refresh_attempts == 1 + assert _Codex401ThenSuccessAgent.last_init["provider"] == "openai-codex" + assert _Codex401ThenSuccessAgent.last_init["api_mode"] == "codex_responses" diff --git a/tests/test_codex_models.py b/tests/test_codex_models.py new file mode 100644 index 000000000..e6cc2fdec --- /dev/null +++ b/tests/test_codex_models.py @@ -0,0 +1,40 @@ +import json + +from hermes_cli.codex_models import DEFAULT_CODEX_MODELS, get_codex_model_ids + + +def test_get_codex_model_ids_prioritizes_default_and_cache(tmp_path, monkeypatch): + codex_home = tmp_path / "codex-home" + codex_home.mkdir(parents=True, exist_ok=True) + (codex_home / "config.toml").write_text('model = "gpt-5.2-codex"\n') + (codex_home / "models_cache.json").write_text( + json.dumps( + { + "models": [ + {"slug": "gpt-5.3-codex", "priority": 20, "supported_in_api": True}, + {"slug": "gpt-5.1-codex", "priority": 5, "supported_in_api": True}, + {"slug": "gpt-4o", "priority": 1, "supported_in_api": True}, + {"slug": "gpt-5-hidden-codex", "priority": 2, "visibility": "hidden"}, + ] + } + ) + ) + monkeypatch.setenv("CODEX_HOME", str(codex_home)) + + models = get_codex_model_ids() + + assert models[0] == "gpt-5.2-codex" + assert "gpt-5.1-codex" in models + assert "gpt-5.3-codex" in models + assert "gpt-4o" not in models + assert "gpt-5-hidden-codex" not in models + + +def test_get_codex_model_ids_falls_back_to_curated_defaults(tmp_path, monkeypatch): + codex_home = tmp_path / "codex-home" + codex_home.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("CODEX_HOME", str(codex_home)) + + models = get_codex_model_ids() + + assert models[: len(DEFAULT_CODEX_MODELS)] == DEFAULT_CODEX_MODELS diff --git a/tests/test_external_credential_detection.py b/tests/test_external_credential_detection.py new file mode 100644 index 000000000..a1fe2a2f9 --- /dev/null +++ b/tests/test_external_credential_detection.py @@ -0,0 +1,51 @@ +"""Tests for detect_external_credentials() -- Phase 2 credential sync.""" + +import json +from pathlib import Path +from unittest.mock import patch + +import pytest + +from hermes_cli.auth import detect_external_credentials + + +class TestDetectCodexCLI: + def test_detects_valid_codex_auth(self, tmp_path): + codex_dir = tmp_path / ".codex" + codex_dir.mkdir() + auth = codex_dir / "auth.json" + auth.write_text(json.dumps({ + "tokens": {"access_token": "tok-123", "refresh_token": "ref-456"} + })) + with patch("hermes_cli.auth.resolve_codex_home_path", return_value=codex_dir): + result = detect_external_credentials() + codex_hits = [c for c in result if c["provider"] == "openai-codex"] + assert len(codex_hits) == 1 + assert "Codex CLI" in codex_hits[0]["label"] + assert str(auth) == codex_hits[0]["path"] + + def test_skips_codex_without_access_token(self, tmp_path): + codex_dir = tmp_path / ".codex" + codex_dir.mkdir() + (codex_dir / "auth.json").write_text(json.dumps({"tokens": {}})) + with patch("hermes_cli.auth.resolve_codex_home_path", return_value=codex_dir): + result = detect_external_credentials() + assert not any(c["provider"] == "openai-codex" for c in result) + + def test_skips_missing_codex_dir(self, tmp_path): + with patch("hermes_cli.auth.resolve_codex_home_path", return_value=tmp_path / "nonexistent"): + result = detect_external_credentials() + assert not any(c["provider"] == "openai-codex" for c in result) + + def test_skips_malformed_codex_auth(self, tmp_path): + codex_dir = tmp_path / ".codex" + codex_dir.mkdir() + (codex_dir / "auth.json").write_text("{bad json") + with patch("hermes_cli.auth.resolve_codex_home_path", return_value=codex_dir): + result = detect_external_credentials() + assert not any(c["provider"] == "openai-codex" for c in result) + + def test_returns_empty_when_nothing_found(self, tmp_path): + with patch("hermes_cli.auth.resolve_codex_home_path", return_value=tmp_path / ".codex"): + result = detect_external_credentials() + assert result == [] diff --git a/tests/test_flush_memories_codex.py b/tests/test_flush_memories_codex.py new file mode 100644 index 000000000..22eef5ab0 --- /dev/null +++ b/tests/test_flush_memories_codex.py @@ -0,0 +1,225 @@ +"""Tests for flush_memories() working correctly across all provider modes. + +Catches the bug where Codex mode called chat.completions.create on a +Responses-only client, which would fail silently or with a 404. +""" + +import json +import os +import sys +import types +from types import SimpleNamespace +from unittest.mock import patch, MagicMock, call + +import pytest + +sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None)) +sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object)) +sys.modules.setdefault("fal_client", types.SimpleNamespace()) + +import run_agent + + +class _FakeOpenAI: + def __init__(self, **kwargs): + self.kwargs = kwargs + self.api_key = kwargs.get("api_key", "test") + self.base_url = kwargs.get("base_url", "http://test") + + def close(self): + pass + + +def _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter"): + """Build an AIAgent with mocked internals, ready for flush_memories testing.""" + monkeypatch.setattr(run_agent, "get_tool_definitions", lambda **kw: [ + { + "type": "function", + "function": { + "name": "memory", + "description": "Manage memories.", + "parameters": { + "type": "object", + "properties": { + "action": {"type": "string"}, + "target": {"type": "string"}, + "content": {"type": "string"}, + }, + }, + }, + }, + ]) + monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {}) + monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI) + + agent = run_agent.AIAgent( + api_key="test-key", + base_url="https://test.example.com/v1", + provider=provider, + api_mode=api_mode, + max_iterations=4, + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + # Give it a valid memory store + agent._memory_store = MagicMock() + agent._memory_flush_min_turns = 1 + agent._user_turn_count = 5 + return agent + + +def _chat_response_with_memory_call(): + """Simulated chat completions response with a memory tool call.""" + return SimpleNamespace( + choices=[SimpleNamespace( + message=SimpleNamespace( + content=None, + tool_calls=[SimpleNamespace( + function=SimpleNamespace( + name="memory", + arguments=json.dumps({ + "action": "add", + "target": "notes", + "content": "User prefers dark mode.", + }), + ), + )], + ), + )], + usage=SimpleNamespace(prompt_tokens=100, completion_tokens=20, total_tokens=120), + ) + + +class TestFlushMemoriesUsesAuxiliaryClient: + """When an auxiliary client is available, flush_memories should use it + instead of self.client -- especially critical in Codex mode.""" + + def test_flush_uses_auxiliary_when_available(self, monkeypatch): + agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex") + + mock_aux_client = MagicMock() + mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call() + + with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")): + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi there"}, + {"role": "user", "content": "Remember this"}, + ] + with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory: + agent.flush_memories(messages) + + mock_aux_client.chat.completions.create.assert_called_once() + call_kwargs = mock_aux_client.chat.completions.create.call_args + assert call_kwargs.kwargs.get("model") == "gpt-4o-mini" or call_kwargs[1].get("model") == "gpt-4o-mini" + + def test_flush_uses_main_client_when_no_auxiliary(self, monkeypatch): + """Non-Codex mode with no auxiliary falls back to self.client.""" + agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter") + agent.client = MagicMock() + agent.client.chat.completions.create.return_value = _chat_response_with_memory_call() + + with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)): + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi there"}, + {"role": "user", "content": "Save this"}, + ] + with patch("tools.memory_tool.memory_tool", return_value="Saved."): + agent.flush_memories(messages) + + agent.client.chat.completions.create.assert_called_once() + + def test_flush_executes_memory_tool_calls(self, monkeypatch): + """Verify that memory tool calls from the flush response actually get executed.""" + agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter") + + mock_aux_client = MagicMock() + mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call() + + with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")): + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi"}, + {"role": "user", "content": "Note this"}, + ] + with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory: + agent.flush_memories(messages) + + mock_memory.assert_called_once() + call_kwargs = mock_memory.call_args + assert call_kwargs.kwargs["action"] == "add" + assert call_kwargs.kwargs["target"] == "notes" + assert "dark mode" in call_kwargs.kwargs["content"] + + def test_flush_strips_artifacts_from_messages(self, monkeypatch): + """After flush, the flush prompt and any response should be removed from messages.""" + agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter") + + mock_aux_client = MagicMock() + mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call() + + with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")): + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi"}, + {"role": "user", "content": "Remember X"}, + ] + original_len = len(messages) + with patch("tools.memory_tool.memory_tool", return_value="Saved."): + agent.flush_memories(messages) + + # Messages should not grow from the flush + assert len(messages) <= original_len + # No flush sentinel should remain + for msg in messages: + assert "_flush_sentinel" not in msg + + +class TestFlushMemoriesCodexFallback: + """When no auxiliary client exists and we're in Codex mode, flush should + use the Codex Responses API path instead of chat.completions.""" + + def test_codex_mode_no_aux_uses_responses_api(self, monkeypatch): + agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex") + + codex_response = SimpleNamespace( + output=[ + SimpleNamespace( + type="function_call", + call_id="call_1", + name="memory", + arguments=json.dumps({ + "action": "add", + "target": "notes", + "content": "Codex flush test", + }), + ), + ], + usage=SimpleNamespace(input_tokens=50, output_tokens=10, total_tokens=60), + status="completed", + model="gpt-5-codex", + ) + + with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)), \ + patch.object(agent, "_run_codex_stream", return_value=codex_response) as mock_stream, \ + patch.object(agent, "_build_api_kwargs") as mock_build, \ + patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory: + mock_build.return_value = { + "model": "gpt-5-codex", + "instructions": "test", + "input": [], + "tools": [], + "max_output_tokens": 4096, + } + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi"}, + {"role": "user", "content": "Save this"}, + ] + agent.flush_memories(messages) + + mock_stream.assert_called_once() + mock_memory.assert_called_once() + assert mock_memory.call_args.kwargs["content"] == "Codex flush test" diff --git a/tests/test_provider_parity.py b/tests/test_provider_parity.py new file mode 100644 index 000000000..82199ac4c --- /dev/null +++ b/tests/test_provider_parity.py @@ -0,0 +1,460 @@ +"""Provider parity tests: verify that AIAgent builds correct API kwargs +and handles responses properly for all supported providers. + +Ensures changes to one provider path don't silently break another. +""" + +import json +import os +import sys +import types +from types import SimpleNamespace +from unittest.mock import patch, MagicMock + +import pytest + +sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None)) +sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object)) +sys.modules.setdefault("fal_client", types.SimpleNamespace()) + +from run_agent import AIAgent + + +# ── Helpers ────────────────────────────────────────────────────────────────── + +def _tool_defs(*names): + return [ + { + "type": "function", + "function": { + "name": n, + "description": f"{n} tool", + "parameters": {"type": "object", "properties": {}}, + }, + } + for n in names + ] + + +class _FakeOpenAI: + def __init__(self, **kw): + self.api_key = kw.get("api_key", "test") + self.base_url = kw.get("base_url", "http://test") + def close(self): + pass + + +def _make_agent(monkeypatch, provider, api_mode="chat_completions", base_url="https://openrouter.ai/api/v1"): + monkeypatch.setattr("run_agent.get_tool_definitions", lambda **kw: _tool_defs("web_search", "terminal")) + monkeypatch.setattr("run_agent.check_toolset_requirements", lambda: {}) + monkeypatch.setattr("run_agent.OpenAI", _FakeOpenAI) + return AIAgent( + api_key="test-key", + base_url=base_url, + provider=provider, + api_mode=api_mode, + max_iterations=4, + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + + +# ── _build_api_kwargs tests ───────────────────────────────────────────────── + +class TestBuildApiKwargsOpenRouter: + def test_uses_chat_completions_format(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert "messages" in kwargs + assert "model" in kwargs + assert kwargs["messages"][-1]["content"] == "hi" + + def test_includes_reasoning_in_extra_body(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + extra = kwargs.get("extra_body", {}) + assert "reasoning" in extra + assert extra["reasoning"]["enabled"] is True + + def test_includes_tools(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert "tools" in kwargs + tool_names = [t["function"]["name"] for t in kwargs["tools"]] + assert "web_search" in tool_names + + def test_no_responses_api_fields(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert "input" not in kwargs + assert "instructions" not in kwargs + assert "store" not in kwargs + + +class TestBuildApiKwargsNousPortal: + def test_includes_nous_product_tags(self, monkeypatch): + agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + extra = kwargs.get("extra_body", {}) + assert extra.get("tags") == ["product=hermes-agent"] + + def test_uses_chat_completions_format(self, monkeypatch): + agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert "messages" in kwargs + assert "input" not in kwargs + + +class TestBuildApiKwargsCustomEndpoint: + def test_uses_chat_completions_format(self, monkeypatch): + agent = _make_agent(monkeypatch, "custom", base_url="http://localhost:1234/v1") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert "messages" in kwargs + assert "input" not in kwargs + + def test_no_openrouter_extra_body(self, monkeypatch): + agent = _make_agent(monkeypatch, "custom", base_url="http://localhost:1234/v1") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + extra = kwargs.get("extra_body", {}) + assert "reasoning" not in extra + + +class TestBuildApiKwargsCodex: + def test_uses_responses_api_format(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert "input" in kwargs + assert "instructions" in kwargs + assert "messages" not in kwargs + assert kwargs["store"] is False + + def test_includes_reasoning_config(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert "reasoning" in kwargs + assert kwargs["reasoning"]["effort"] == "medium" + + def test_includes_encrypted_content_in_include(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert "reasoning.encrypted_content" in kwargs.get("include", []) + + def test_tools_converted_to_responses_format(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + tools = kwargs.get("tools", []) + assert len(tools) > 0 + # Responses format has "name" at top level, not nested under "function" + assert "name" in tools[0] + assert "function" not in tools[0] + + +# ── Message conversion tests ──────────────────────────────────────────────── + +class TestChatMessagesToResponsesInput: + """Verify _chat_messages_to_responses_input for Codex mode.""" + + def test_user_message_passes_through(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + messages = [{"role": "user", "content": "hello"}] + items = agent._chat_messages_to_responses_input(messages) + assert items == [{"role": "user", "content": "hello"}] + + def test_system_messages_filtered(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + messages = [ + {"role": "system", "content": "be helpful"}, + {"role": "user", "content": "hello"}, + ] + items = agent._chat_messages_to_responses_input(messages) + assert len(items) == 1 + assert items[0]["role"] == "user" + + def test_assistant_tool_calls_become_function_call_items(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + messages = [{ + "role": "assistant", + "content": "", + "tool_calls": [{ + "id": "call_abc", + "call_id": "call_abc", + "function": {"name": "web_search", "arguments": '{"query": "test"}'}, + }], + }] + items = agent._chat_messages_to_responses_input(messages) + fc_items = [i for i in items if i.get("type") == "function_call"] + assert len(fc_items) == 1 + assert fc_items[0]["name"] == "web_search" + assert fc_items[0]["call_id"] == "call_abc" + + def test_tool_results_become_function_call_output(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + messages = [{"role": "tool", "tool_call_id": "call_abc", "content": "result here"}] + items = agent._chat_messages_to_responses_input(messages) + assert items[0]["type"] == "function_call_output" + assert items[0]["call_id"] == "call_abc" + assert items[0]["output"] == "result here" + + def test_encrypted_reasoning_replayed(self, monkeypatch): + """Encrypted reasoning items from previous turns must be included in input.""" + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + messages = [ + {"role": "user", "content": "think about this"}, + { + "role": "assistant", + "content": "I thought about it.", + "codex_reasoning_items": [ + {"type": "reasoning", "id": "rs_abc", "encrypted_content": "gAAAA_test_blob"}, + ], + }, + {"role": "user", "content": "continue"}, + ] + items = agent._chat_messages_to_responses_input(messages) + reasoning_items = [i for i in items if i.get("type") == "reasoning"] + assert len(reasoning_items) == 1 + assert reasoning_items[0]["encrypted_content"] == "gAAAA_test_blob" + + def test_no_reasoning_items_for_non_codex_messages(self, monkeypatch): + """Messages without codex_reasoning_items should not inject anything.""" + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + messages = [ + {"role": "assistant", "content": "hi"}, + {"role": "user", "content": "hello"}, + ] + items = agent._chat_messages_to_responses_input(messages) + reasoning_items = [i for i in items if i.get("type") == "reasoning"] + assert len(reasoning_items) == 0 + + +# ── Response normalization tests ───────────────────────────────────────────── + +class TestNormalizeCodexResponse: + """Verify _normalize_codex_response extracts all fields correctly.""" + + def _make_codex_agent(self, monkeypatch): + return _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + + def test_text_response(self, monkeypatch): + agent = self._make_codex_agent(monkeypatch) + response = SimpleNamespace( + output=[ + SimpleNamespace(type="message", status="completed", + content=[SimpleNamespace(type="output_text", text="Hello!")], + phase="final_answer"), + ], + status="completed", + ) + msg, reason = agent._normalize_codex_response(response) + assert msg.content == "Hello!" + assert reason == "stop" + + def test_reasoning_summary_extracted(self, monkeypatch): + agent = self._make_codex_agent(monkeypatch) + response = SimpleNamespace( + output=[ + SimpleNamespace(type="reasoning", + encrypted_content="gAAAA_blob", + summary=[SimpleNamespace(type="summary_text", text="Thinking about math")], + id="rs_123", status=None), + SimpleNamespace(type="message", status="completed", + content=[SimpleNamespace(type="output_text", text="42")], + phase="final_answer"), + ], + status="completed", + ) + msg, reason = agent._normalize_codex_response(response) + assert msg.content == "42" + assert "math" in msg.reasoning + assert reason == "stop" + + def test_encrypted_content_captured(self, monkeypatch): + agent = self._make_codex_agent(monkeypatch) + response = SimpleNamespace( + output=[ + SimpleNamespace(type="reasoning", + encrypted_content="gAAAA_secret_blob_123", + summary=[SimpleNamespace(type="summary_text", text="Thinking")], + id="rs_456", status=None), + SimpleNamespace(type="message", status="completed", + content=[SimpleNamespace(type="output_text", text="done")], + phase="final_answer"), + ], + status="completed", + ) + msg, reason = agent._normalize_codex_response(response) + assert msg.codex_reasoning_items is not None + assert len(msg.codex_reasoning_items) == 1 + assert msg.codex_reasoning_items[0]["encrypted_content"] == "gAAAA_secret_blob_123" + assert msg.codex_reasoning_items[0]["id"] == "rs_456" + + def test_no_encrypted_content_when_missing(self, monkeypatch): + agent = self._make_codex_agent(monkeypatch) + response = SimpleNamespace( + output=[ + SimpleNamespace(type="message", status="completed", + content=[SimpleNamespace(type="output_text", text="no reasoning")], + phase="final_answer"), + ], + status="completed", + ) + msg, reason = agent._normalize_codex_response(response) + assert msg.codex_reasoning_items is None + + def test_tool_calls_extracted(self, monkeypatch): + agent = self._make_codex_agent(monkeypatch) + response = SimpleNamespace( + output=[ + SimpleNamespace(type="function_call", status="completed", + call_id="call_xyz", name="web_search", + arguments='{"query":"test"}', id="fc_xyz"), + ], + status="completed", + ) + msg, reason = agent._normalize_codex_response(response) + assert reason == "tool_calls" + assert len(msg.tool_calls) == 1 + assert msg.tool_calls[0].function.name == "web_search" + + +# ── Chat completions response handling (OpenRouter/Nous) ───────────────────── + +class TestBuildAssistantMessage: + """Verify _build_assistant_message works for all provider response formats.""" + + def test_openrouter_reasoning_fields(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + msg = SimpleNamespace( + content="answer", + tool_calls=None, + reasoning="I thought about it", + reasoning_content=None, + reasoning_details=None, + ) + result = agent._build_assistant_message(msg, "stop") + assert result["content"] == "answer" + assert result["reasoning"] == "I thought about it" + assert "codex_reasoning_items" not in result + + def test_openrouter_reasoning_details_preserved_unmodified(self, monkeypatch): + """reasoning_details must be passed back exactly as received for + multi-turn continuity (OpenRouter, Anthropic, OpenAI all need this).""" + agent = _make_agent(monkeypatch, "openrouter") + original_detail = { + "type": "thinking", + "thinking": "deep thoughts here", + "signature": "sig123_opaque_blob", + "encrypted_content": "some_provider_blob", + "extra_field": "should_not_be_dropped", + } + msg = SimpleNamespace( + content="answer", + tool_calls=None, + reasoning=None, + reasoning_content=None, + reasoning_details=[original_detail], + ) + result = agent._build_assistant_message(msg, "stop") + stored = result["reasoning_details"][0] + # ALL fields must survive, not just type/text/signature + assert stored["signature"] == "sig123_opaque_blob" + assert stored["encrypted_content"] == "some_provider_blob" + assert stored["extra_field"] == "should_not_be_dropped" + assert stored["thinking"] == "deep thoughts here" + + def test_codex_preserves_encrypted_reasoning(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + msg = SimpleNamespace( + content="result", + tool_calls=None, + reasoning="summary text", + reasoning_content=None, + reasoning_details=None, + codex_reasoning_items=[ + {"type": "reasoning", "id": "rs_1", "encrypted_content": "gAAAA_blob"}, + ], + ) + result = agent._build_assistant_message(msg, "stop") + assert result["codex_reasoning_items"] == [ + {"type": "reasoning", "id": "rs_1", "encrypted_content": "gAAAA_blob"}, + ] + + def test_plain_message_no_codex_items(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + msg = SimpleNamespace( + content="simple", + tool_calls=None, + reasoning=None, + reasoning_content=None, + reasoning_details=None, + ) + result = agent._build_assistant_message(msg, "stop") + assert "codex_reasoning_items" not in result + + +# ── Auxiliary client provider resolution ───────────────────────────────────── + +class TestAuxiliaryClientProviderPriority: + """Verify auxiliary client resolution doesn't break for any provider.""" + + def test_openrouter_always_wins(self, monkeypatch): + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") + from agent.auxiliary_client import get_text_auxiliary_client + with patch("agent.auxiliary_client.OpenAI") as mock: + client, model = get_text_auxiliary_client() + assert model == "google/gemini-3-flash-preview" + assert "openrouter" in str(mock.call_args.kwargs["base_url"]).lower() + + def test_nous_when_no_openrouter(self, monkeypatch): + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + from agent.auxiliary_client import get_text_auxiliary_client + with patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "nous-tok"}), \ + patch("agent.auxiliary_client.OpenAI") as mock: + client, model = get_text_auxiliary_client() + assert model == "gemini-3-flash" + + def test_custom_endpoint_when_no_nous(self, monkeypatch): + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1") + monkeypatch.setenv("OPENAI_API_KEY", "local-key") + from agent.auxiliary_client import get_text_auxiliary_client + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client.OpenAI") as mock: + client, model = get_text_auxiliary_client() + assert mock.call_args.kwargs["base_url"] == "http://localhost:1234/v1" + + def test_codex_fallback_last_resort(self, monkeypatch): + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + monkeypatch.delenv("OPENAI_BASE_URL", raising=False) + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + from agent.auxiliary_client import get_text_auxiliary_client, CodexAuxiliaryClient + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client._read_codex_access_token", return_value="codex-tok"), \ + patch("agent.auxiliary_client.OpenAI"): + client, model = get_text_auxiliary_client() + assert model == "gpt-5.3-codex" + assert isinstance(client, CodexAuxiliaryClient) diff --git a/tests/test_run_agent_codex_responses.py b/tests/test_run_agent_codex_responses.py new file mode 100644 index 000000000..a1e5e817e --- /dev/null +++ b/tests/test_run_agent_codex_responses.py @@ -0,0 +1,748 @@ +import sys +import types +from types import SimpleNamespace + +import pytest + + +sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None)) +sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object)) +sys.modules.setdefault("fal_client", types.SimpleNamespace()) + +import run_agent + + +def _patch_agent_bootstrap(monkeypatch): + monkeypatch.setattr( + run_agent, + "get_tool_definitions", + lambda **kwargs: [ + { + "type": "function", + "function": { + "name": "terminal", + "description": "Run shell commands.", + "parameters": {"type": "object", "properties": {}}, + }, + } + ], + ) + monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {}) + + +def _build_agent(monkeypatch): + _patch_agent_bootstrap(monkeypatch) + + agent = run_agent.AIAgent( + model="gpt-5-codex", + base_url="https://chatgpt.com/backend-api/codex", + api_key="codex-token", + quiet_mode=True, + max_iterations=4, + skip_context_files=True, + skip_memory=True, + ) + agent._cleanup_task_resources = lambda task_id: None + agent._persist_session = lambda messages, history=None: None + agent._save_trajectory = lambda messages, user_message, completed: None + agent._save_session_log = lambda messages: None + return agent + + +def _codex_message_response(text: str): + return SimpleNamespace( + output=[ + SimpleNamespace( + type="message", + content=[SimpleNamespace(type="output_text", text=text)], + ) + ], + usage=SimpleNamespace(input_tokens=5, output_tokens=3, total_tokens=8), + status="completed", + model="gpt-5-codex", + ) + + +def _codex_tool_call_response(): + return SimpleNamespace( + output=[ + SimpleNamespace( + type="function_call", + id="fc_1", + call_id="call_1", + name="terminal", + arguments="{}", + ) + ], + usage=SimpleNamespace(input_tokens=12, output_tokens=4, total_tokens=16), + status="completed", + model="gpt-5-codex", + ) + + +def _codex_incomplete_message_response(text: str): + return SimpleNamespace( + output=[ + SimpleNamespace( + type="message", + status="in_progress", + content=[SimpleNamespace(type="output_text", text=text)], + ) + ], + usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6), + status="in_progress", + model="gpt-5-codex", + ) + + +def _codex_commentary_message_response(text: str): + return SimpleNamespace( + output=[ + SimpleNamespace( + type="message", + phase="commentary", + status="completed", + content=[SimpleNamespace(type="output_text", text=text)], + ) + ], + usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6), + status="completed", + model="gpt-5-codex", + ) + + +def _codex_ack_message_response(text: str): + return SimpleNamespace( + output=[ + SimpleNamespace( + type="message", + status="completed", + content=[SimpleNamespace(type="output_text", text=text)], + ) + ], + usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6), + status="completed", + model="gpt-5-codex", + ) + + +class _FakeResponsesStream: + def __init__(self, *, final_response=None, final_error=None): + self._final_response = final_response + self._final_error = final_error + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def __iter__(self): + return iter(()) + + def get_final_response(self): + if self._final_error is not None: + raise self._final_error + return self._final_response + + +class _FakeCreateStream: + def __init__(self, events): + self._events = list(events) + self.closed = False + + def __iter__(self): + return iter(self._events) + + def close(self): + self.closed = True + + +def _codex_request_kwargs(): + return { + "model": "gpt-5-codex", + "instructions": "You are Hermes.", + "input": [{"role": "user", "content": "Ping"}], + "tools": None, + "store": False, + } + + +def test_api_mode_uses_explicit_provider_when_codex(monkeypatch): + _patch_agent_bootstrap(monkeypatch) + agent = run_agent.AIAgent( + model="gpt-5-codex", + base_url="https://openrouter.ai/api/v1", + provider="openai-codex", + api_key="codex-token", + quiet_mode=True, + max_iterations=1, + skip_context_files=True, + skip_memory=True, + ) + assert agent.api_mode == "codex_responses" + assert agent.provider == "openai-codex" + + +def test_api_mode_normalizes_provider_case(monkeypatch): + _patch_agent_bootstrap(monkeypatch) + agent = run_agent.AIAgent( + model="gpt-5-codex", + base_url="https://openrouter.ai/api/v1", + provider="OpenAI-Codex", + api_key="codex-token", + quiet_mode=True, + max_iterations=1, + skip_context_files=True, + skip_memory=True, + ) + assert agent.provider == "openai-codex" + assert agent.api_mode == "codex_responses" + + +def test_api_mode_respects_explicit_openrouter_provider_over_codex_url(monkeypatch): + _patch_agent_bootstrap(monkeypatch) + agent = run_agent.AIAgent( + model="gpt-5-codex", + base_url="https://chatgpt.com/backend-api/codex", + provider="openrouter", + api_key="test-token", + quiet_mode=True, + max_iterations=1, + skip_context_files=True, + skip_memory=True, + ) + assert agent.api_mode == "chat_completions" + assert agent.provider == "openrouter" + + +def test_build_api_kwargs_codex(monkeypatch): + agent = _build_agent(monkeypatch) + kwargs = agent._build_api_kwargs( + [ + {"role": "system", "content": "You are Hermes."}, + {"role": "user", "content": "Ping"}, + ] + ) + + assert kwargs["model"] == "gpt-5-codex" + assert kwargs["instructions"] == "You are Hermes." + assert kwargs["store"] is False + assert isinstance(kwargs["input"], list) + assert kwargs["input"][0]["role"] == "user" + assert kwargs["tools"][0]["type"] == "function" + assert kwargs["tools"][0]["name"] == "terminal" + assert kwargs["tools"][0]["strict"] is False + assert "function" not in kwargs["tools"][0] + assert kwargs["store"] is False + assert "timeout" not in kwargs + assert "max_tokens" not in kwargs + assert "extra_body" not in kwargs + + +def test_run_codex_stream_retries_when_completed_event_missing(monkeypatch): + agent = _build_agent(monkeypatch) + calls = {"stream": 0} + + def _fake_stream(**kwargs): + calls["stream"] += 1 + if calls["stream"] == 1: + return _FakeResponsesStream( + final_error=RuntimeError("Didn't receive a `response.completed` event.") + ) + return _FakeResponsesStream(final_response=_codex_message_response("stream ok")) + + agent.client = SimpleNamespace( + responses=SimpleNamespace( + stream=_fake_stream, + create=lambda **kwargs: _codex_message_response("fallback"), + ) + ) + + response = agent._run_codex_stream(_codex_request_kwargs()) + assert calls["stream"] == 2 + assert response.output[0].content[0].text == "stream ok" + + +def test_run_codex_stream_falls_back_to_create_after_stream_completion_error(monkeypatch): + agent = _build_agent(monkeypatch) + calls = {"stream": 0, "create": 0} + + def _fake_stream(**kwargs): + calls["stream"] += 1 + return _FakeResponsesStream( + final_error=RuntimeError("Didn't receive a `response.completed` event.") + ) + + def _fake_create(**kwargs): + calls["create"] += 1 + return _codex_message_response("create fallback ok") + + agent.client = SimpleNamespace( + responses=SimpleNamespace( + stream=_fake_stream, + create=_fake_create, + ) + ) + + response = agent._run_codex_stream(_codex_request_kwargs()) + assert calls["stream"] == 2 + assert calls["create"] == 1 + assert response.output[0].content[0].text == "create fallback ok" + + +def test_run_codex_stream_fallback_parses_create_stream_events(monkeypatch): + agent = _build_agent(monkeypatch) + calls = {"stream": 0, "create": 0} + create_stream = _FakeCreateStream( + [ + SimpleNamespace(type="response.created"), + SimpleNamespace(type="response.in_progress"), + SimpleNamespace(type="response.completed", response=_codex_message_response("streamed create ok")), + ] + ) + + def _fake_stream(**kwargs): + calls["stream"] += 1 + return _FakeResponsesStream( + final_error=RuntimeError("Didn't receive a `response.completed` event.") + ) + + def _fake_create(**kwargs): + calls["create"] += 1 + assert kwargs.get("stream") is True + return create_stream + + agent.client = SimpleNamespace( + responses=SimpleNamespace( + stream=_fake_stream, + create=_fake_create, + ) + ) + + response = agent._run_codex_stream(_codex_request_kwargs()) + assert calls["stream"] == 2 + assert calls["create"] == 1 + assert create_stream.closed is True + assert response.output[0].content[0].text == "streamed create ok" + + +def test_run_conversation_codex_plain_text(monkeypatch): + agent = _build_agent(monkeypatch) + monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: _codex_message_response("OK")) + + result = agent.run_conversation("Say OK") + + assert result["completed"] is True + assert result["final_response"] == "OK" + assert result["messages"][-1]["role"] == "assistant" + assert result["messages"][-1]["content"] == "OK" + + +def test_run_conversation_codex_refreshes_after_401_and_retries(monkeypatch): + agent = _build_agent(monkeypatch) + calls = {"api": 0, "refresh": 0} + + class _UnauthorizedError(RuntimeError): + def __init__(self): + super().__init__("Error code: 401 - unauthorized") + self.status_code = 401 + + def _fake_api_call(api_kwargs): + calls["api"] += 1 + if calls["api"] == 1: + raise _UnauthorizedError() + return _codex_message_response("Recovered after refresh") + + def _fake_refresh(*, force=True): + calls["refresh"] += 1 + assert force is True + return True + + monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call) + monkeypatch.setattr(agent, "_try_refresh_codex_client_credentials", _fake_refresh) + + result = agent.run_conversation("Say OK") + + assert calls["api"] == 2 + assert calls["refresh"] == 1 + assert result["completed"] is True + assert result["final_response"] == "Recovered after refresh" + + +def test_try_refresh_codex_client_credentials_rebuilds_client(monkeypatch): + agent = _build_agent(monkeypatch) + closed = {"value": False} + rebuilt = {"kwargs": None} + + class _ExistingClient: + def close(self): + closed["value"] = True + + class _RebuiltClient: + pass + + def _fake_openai(**kwargs): + rebuilt["kwargs"] = kwargs + return _RebuiltClient() + + monkeypatch.setattr( + "hermes_cli.auth.resolve_codex_runtime_credentials", + lambda force_refresh=True: { + "api_key": "new-codex-token", + "base_url": "https://chatgpt.com/backend-api/codex", + }, + ) + monkeypatch.setattr(run_agent, "OpenAI", _fake_openai) + + agent.client = _ExistingClient() + ok = agent._try_refresh_codex_client_credentials(force=True) + + assert ok is True + assert closed["value"] is True + assert rebuilt["kwargs"]["api_key"] == "new-codex-token" + assert rebuilt["kwargs"]["base_url"] == "https://chatgpt.com/backend-api/codex" + assert isinstance(agent.client, _RebuiltClient) + + +def test_run_conversation_codex_tool_round_trip(monkeypatch): + agent = _build_agent(monkeypatch) + responses = [_codex_tool_call_response(), _codex_message_response("done")] + monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0)) + + def _fake_execute_tool_calls(assistant_message, messages, effective_task_id): + for call in assistant_message.tool_calls: + messages.append( + { + "role": "tool", + "tool_call_id": call.id, + "content": '{"ok":true}', + } + ) + + monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls) + + result = agent.run_conversation("run a command") + + assert result["completed"] is True + assert result["final_response"] == "done" + assert any(msg.get("tool_calls") for msg in result["messages"] if msg.get("role") == "assistant") + assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"]) + + +def test_chat_messages_to_responses_input_uses_call_id_for_function_call(monkeypatch): + agent = _build_agent(monkeypatch) + items = agent._chat_messages_to_responses_input( + [ + {"role": "user", "content": "Run terminal"}, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_abc123", + "type": "function", + "function": {"name": "terminal", "arguments": "{}"}, + } + ], + }, + {"role": "tool", "tool_call_id": "call_abc123", "content": '{"ok":true}'}, + ] + ) + + function_call = next(item for item in items if item.get("type") == "function_call") + function_output = next(item for item in items if item.get("type") == "function_call_output") + + assert function_call["call_id"] == "call_abc123" + assert "id" not in function_call + assert function_output["call_id"] == "call_abc123" + + +def test_chat_messages_to_responses_input_accepts_call_pipe_fc_ids(monkeypatch): + agent = _build_agent(monkeypatch) + items = agent._chat_messages_to_responses_input( + [ + {"role": "user", "content": "Run terminal"}, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_pair123|fc_pair123", + "type": "function", + "function": {"name": "terminal", "arguments": "{}"}, + } + ], + }, + {"role": "tool", "tool_call_id": "call_pair123|fc_pair123", "content": '{"ok":true}'}, + ] + ) + + function_call = next(item for item in items if item.get("type") == "function_call") + function_output = next(item for item in items if item.get("type") == "function_call_output") + + assert function_call["call_id"] == "call_pair123" + assert "id" not in function_call + assert function_output["call_id"] == "call_pair123" + + +def test_preflight_codex_api_kwargs_strips_optional_function_call_id(monkeypatch): + agent = _build_agent(monkeypatch) + preflight = agent._preflight_codex_api_kwargs( + { + "model": "gpt-5-codex", + "instructions": "You are Hermes.", + "input": [ + {"role": "user", "content": "hi"}, + { + "type": "function_call", + "id": "call_bad", + "call_id": "call_good", + "name": "terminal", + "arguments": "{}", + }, + ], + "tools": [], + "store": False, + } + ) + + fn_call = next(item for item in preflight["input"] if item.get("type") == "function_call") + assert fn_call["call_id"] == "call_good" + assert "id" not in fn_call + + +def test_preflight_codex_api_kwargs_rejects_function_call_output_without_call_id(monkeypatch): + agent = _build_agent(monkeypatch) + + with pytest.raises(ValueError, match="function_call_output is missing call_id"): + agent._preflight_codex_api_kwargs( + { + "model": "gpt-5-codex", + "instructions": "You are Hermes.", + "input": [{"type": "function_call_output", "output": "{}"}], + "tools": [], + "store": False, + } + ) + + +def test_preflight_codex_api_kwargs_rejects_unsupported_request_fields(monkeypatch): + agent = _build_agent(monkeypatch) + kwargs = _codex_request_kwargs() + kwargs["some_unknown_field"] = "value" + + with pytest.raises(ValueError, match="unsupported field"): + agent._preflight_codex_api_kwargs(kwargs) + + +def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch): + agent = _build_agent(monkeypatch) + kwargs = _codex_request_kwargs() + kwargs["reasoning"] = {"effort": "high", "summary": "auto"} + kwargs["include"] = ["reasoning.encrypted_content"] + kwargs["temperature"] = 0.7 + kwargs["max_output_tokens"] = 4096 + + result = agent._preflight_codex_api_kwargs(kwargs) + assert result["reasoning"] == {"effort": "high", "summary": "auto"} + assert result["include"] == ["reasoning.encrypted_content"] + assert result["temperature"] == 0.7 + assert result["max_output_tokens"] == 4096 + + +def test_run_conversation_codex_replay_payload_keeps_call_id(monkeypatch): + agent = _build_agent(monkeypatch) + responses = [_codex_tool_call_response(), _codex_message_response("done")] + requests = [] + + def _fake_api_call(api_kwargs): + requests.append(api_kwargs) + return responses.pop(0) + + monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call) + + def _fake_execute_tool_calls(assistant_message, messages, effective_task_id): + for call in assistant_message.tool_calls: + messages.append( + { + "role": "tool", + "tool_call_id": call.id, + "content": '{"ok":true}', + } + ) + + monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls) + + result = agent.run_conversation("run a command") + + assert result["completed"] is True + assert result["final_response"] == "done" + assert len(requests) >= 2 + + replay_input = requests[1]["input"] + function_call = next(item for item in replay_input if item.get("type") == "function_call") + function_output = next(item for item in replay_input if item.get("type") == "function_call_output") + assert function_call["call_id"] == "call_1" + assert "id" not in function_call + assert function_output["call_id"] == "call_1" + + +def test_run_conversation_codex_continues_after_incomplete_interim_message(monkeypatch): + agent = _build_agent(monkeypatch) + responses = [ + _codex_incomplete_message_response("I'll inspect the repo structure first."), + _codex_tool_call_response(), + _codex_message_response("Architecture summary complete."), + ] + monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0)) + + def _fake_execute_tool_calls(assistant_message, messages, effective_task_id): + for call in assistant_message.tool_calls: + messages.append( + { + "role": "tool", + "tool_call_id": call.id, + "content": '{"ok":true}', + } + ) + + monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls) + + result = agent.run_conversation("analyze repo") + + assert result["completed"] is True + assert result["final_response"] == "Architecture summary complete." + assert any( + msg.get("role") == "assistant" + and msg.get("finish_reason") == "incomplete" + and "inspect the repo structure" in (msg.get("content") or "") + for msg in result["messages"] + ) + assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"]) + + +def test_normalize_codex_response_marks_commentary_only_message_as_incomplete(monkeypatch): + agent = _build_agent(monkeypatch) + assistant_message, finish_reason = agent._normalize_codex_response( + _codex_commentary_message_response("I'll inspect the repository first.") + ) + + assert finish_reason == "incomplete" + assert "inspect the repository" in (assistant_message.content or "") + + +def test_run_conversation_codex_continues_after_commentary_phase_message(monkeypatch): + agent = _build_agent(monkeypatch) + responses = [ + _codex_commentary_message_response("I'll inspect the repo structure first."), + _codex_tool_call_response(), + _codex_message_response("Architecture summary complete."), + ] + monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0)) + + def _fake_execute_tool_calls(assistant_message, messages, effective_task_id): + for call in assistant_message.tool_calls: + messages.append( + { + "role": "tool", + "tool_call_id": call.id, + "content": '{"ok":true}', + } + ) + + monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls) + + result = agent.run_conversation("analyze repo") + + assert result["completed"] is True + assert result["final_response"] == "Architecture summary complete." + assert any( + msg.get("role") == "assistant" + and msg.get("finish_reason") == "incomplete" + and "inspect the repo structure" in (msg.get("content") or "") + for msg in result["messages"] + ) + assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"]) + + +def test_run_conversation_codex_continues_after_ack_stop_message(monkeypatch): + agent = _build_agent(monkeypatch) + responses = [ + _codex_ack_message_response( + "Absolutely — I can do that. I'll inspect ~/openclaw-studio and report back with a walkthrough." + ), + _codex_tool_call_response(), + _codex_message_response("Architecture summary complete."), + ] + monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0)) + + def _fake_execute_tool_calls(assistant_message, messages, effective_task_id): + for call in assistant_message.tool_calls: + messages.append( + { + "role": "tool", + "tool_call_id": call.id, + "content": '{"ok":true}', + } + ) + + monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls) + + result = agent.run_conversation("look into ~/openclaw-studio and tell me how it works") + + assert result["completed"] is True + assert result["final_response"] == "Architecture summary complete." + assert any( + msg.get("role") == "assistant" + and msg.get("finish_reason") == "incomplete" + and "inspect ~/openclaw-studio" in (msg.get("content") or "") + for msg in result["messages"] + ) + assert any( + msg.get("role") == "user" + and "Continue now. Execute the required tool calls" in (msg.get("content") or "") + for msg in result["messages"] + ) + assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"]) + + +def test_run_conversation_codex_continues_after_ack_for_directory_listing_prompt(monkeypatch): + agent = _build_agent(monkeypatch) + responses = [ + _codex_ack_message_response( + "I'll check what's in the current directory and call out 3 notable items." + ), + _codex_tool_call_response(), + _codex_message_response("Directory summary complete."), + ] + monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0)) + + def _fake_execute_tool_calls(assistant_message, messages, effective_task_id): + for call in assistant_message.tool_calls: + messages.append( + { + "role": "tool", + "tool_call_id": call.id, + "content": '{"ok":true}', + } + ) + + monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls) + + result = agent.run_conversation("look at current directory and list 3 notable things") + + assert result["completed"] is True + assert result["final_response"] == "Directory summary complete." + assert any( + msg.get("role") == "assistant" + and msg.get("finish_reason") == "incomplete" + and "current directory" in (msg.get("content") or "") + for msg in result["messages"] + ) + assert any( + msg.get("role") == "user" + and "Continue now. Execute the required tool calls" in (msg.get("content") or "") + for msg in result["messages"] + ) + assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"]) diff --git a/tests/test_runtime_provider_resolution.py b/tests/test_runtime_provider_resolution.py new file mode 100644 index 000000000..af6914092 --- /dev/null +++ b/tests/test_runtime_provider_resolution.py @@ -0,0 +1,95 @@ +from hermes_cli import runtime_provider as rp + + +def test_resolve_runtime_provider_codex(monkeypatch): + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openai-codex") + monkeypatch.setattr( + rp, + "resolve_codex_runtime_credentials", + lambda: { + "provider": "openai-codex", + "base_url": "https://chatgpt.com/backend-api/codex", + "api_key": "codex-token", + "source": "codex-auth-json", + "auth_file": "/tmp/auth.json", + "codex_home": "/tmp/codex", + "last_refresh": "2026-02-26T00:00:00Z", + }, + ) + + resolved = rp.resolve_runtime_provider(requested="openai-codex") + + assert resolved["provider"] == "openai-codex" + assert resolved["api_mode"] == "codex_responses" + assert resolved["base_url"] == "https://chatgpt.com/backend-api/codex" + assert resolved["api_key"] == "codex-token" + assert resolved["requested_provider"] == "openai-codex" + + +def test_resolve_runtime_provider_openrouter_explicit(monkeypatch): + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter") + monkeypatch.setattr(rp, "_get_model_config", lambda: {}) + monkeypatch.delenv("OPENAI_BASE_URL", raising=False) + monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False) + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + + resolved = rp.resolve_runtime_provider( + requested="openrouter", + explicit_api_key="test-key", + explicit_base_url="https://example.com/v1/", + ) + + assert resolved["provider"] == "openrouter" + assert resolved["api_mode"] == "chat_completions" + assert resolved["api_key"] == "test-key" + assert resolved["base_url"] == "https://example.com/v1" + assert resolved["source"] == "explicit" + + +def test_resolve_runtime_provider_openrouter_ignores_codex_config_base_url(monkeypatch): + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter") + monkeypatch.setattr( + rp, + "_get_model_config", + lambda: { + "provider": "openai-codex", + "base_url": "https://chatgpt.com/backend-api/codex", + }, + ) + monkeypatch.delenv("OPENAI_BASE_URL", raising=False) + monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False) + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + + resolved = rp.resolve_runtime_provider(requested="openrouter") + + assert resolved["provider"] == "openrouter" + assert resolved["base_url"] == rp.OPENROUTER_BASE_URL + + +def test_resolve_runtime_provider_auto_uses_custom_config_base_url(monkeypatch): + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter") + monkeypatch.setattr( + rp, + "_get_model_config", + lambda: { + "provider": "auto", + "base_url": "https://custom.example/v1/", + }, + ) + monkeypatch.delenv("OPENAI_BASE_URL", raising=False) + monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False) + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + + resolved = rp.resolve_runtime_provider(requested="auto") + + assert resolved["provider"] == "openrouter" + assert resolved["base_url"] == "https://custom.example/v1" + + +def test_resolve_requested_provider_precedence(monkeypatch): + monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "nous") + monkeypatch.setattr(rp, "_get_model_config", lambda: {"provider": "openai-codex"}) + assert rp.resolve_requested_provider("openrouter") == "openrouter" diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py index 5d5bb2c7c..948af4d0f 100644 --- a/tests/tools/test_delegate.py +++ b/tests/tools/test_delegate.py @@ -30,6 +30,9 @@ def _make_mock_parent(depth=0): """Create a mock parent agent with the fields delegate_task expects.""" parent = MagicMock() parent.base_url = "https://openrouter.ai/api/v1" + parent.api_key = "parent-key" + parent.provider = "openrouter" + parent.api_mode = "chat_completions" parent.model = "anthropic/claude-sonnet-4" parent.platform = "cli" parent.providers_allowed = None @@ -218,6 +221,30 @@ class TestDelegateTask(unittest.TestCase): delegate_task(goal="Test tracking", parent_agent=parent) self.assertEqual(len(parent._active_children), 0) + def test_child_inherits_runtime_credentials(self): + parent = _make_mock_parent(depth=0) + parent.base_url = "https://chatgpt.com/backend-api/codex" + parent.api_key = "codex-token" + parent.provider = "openai-codex" + parent.api_mode = "codex_responses" + + with patch("run_agent.AIAgent") as MockAgent: + mock_child = MagicMock() + mock_child.run_conversation.return_value = { + "final_response": "ok", + "completed": True, + "api_calls": 1, + } + MockAgent.return_value = mock_child + + delegate_task(goal="Test runtime inheritance", parent_agent=parent) + + _, kwargs = MockAgent.call_args + self.assertEqual(kwargs["base_url"], parent.base_url) + self.assertEqual(kwargs["api_key"], parent.api_key) + self.assertEqual(kwargs["provider"], parent.provider) + self.assertEqual(kwargs["api_mode"], parent.api_mode) + class TestBlockedTools(unittest.TestCase): def test_blocked_tools_constant(self): diff --git a/tests/tools/test_file_tools_live.py b/tests/tools/test_file_tools_live.py new file mode 100644 index 000000000..99627b91a --- /dev/null +++ b/tests/tools/test_file_tools_live.py @@ -0,0 +1,483 @@ +"""Live integration tests for file operations and terminal tools. + +These tests run REAL commands through the LocalEnvironment -- no mocks. +They verify that shell noise is properly filtered, commands actually work, +and the tool outputs are EXACTLY what the agent would see. + +Every test with output validates against a known-good value AND +asserts zero contamination from shell noise via _assert_clean(). +""" + +import json +import os +import sys +from pathlib import Path + +import pytest + +sys.path.insert(0, str(Path(__file__).resolve().parents[2])) + +from tools.environments.local import LocalEnvironment, _clean_shell_noise, _SHELL_NOISE_SUBSTRINGS +from tools.file_operations import ShellFileOperations + + +# ── Shared noise detection ─────────────────────────────────────────────── +# Every known shell noise pattern. If ANY of these appear in output that +# isn't explicitly expected, the test fails with a clear message. + +_ALL_NOISE_PATTERNS = list(_SHELL_NOISE_SUBSTRINGS) + [ + "bash: ", + "Inappropriate ioctl", +] + + +def _assert_clean(text: str, context: str = "output"): + """Assert text contains zero shell noise contamination.""" + if not text: + return + for noise in _ALL_NOISE_PATTERNS: + assert noise not in text, ( + f"Shell noise leaked into {context}: found {noise!r} in:\n" + f"{text[:500]}" + ) + + +# ── Fixtures ───────────────────────────────────────────────────────────── + +# Deterministic file content used across tests. Every byte is known, +# so any unexpected text in results is immediately caught. +SIMPLE_CONTENT = "alpha\nbravo\ncharlie\n" +NUMBERED_CONTENT = "\n".join(f"LINE_{i:04d}" for i in range(1, 51)) + "\n" +SPECIAL_CONTENT = "single 'quotes' and \"doubles\" and $VARS and `backticks` and \\backslash\n" +MULTIFILE_A = "def func_alpha():\n return 42\n" +MULTIFILE_B = "def func_bravo():\n return 99\n" +MULTIFILE_C = "nothing relevant here\n" + + +@pytest.fixture +def env(tmp_path): + """A real LocalEnvironment rooted in a temp directory.""" + return LocalEnvironment(cwd=str(tmp_path), timeout=15) + + +@pytest.fixture +def ops(env, tmp_path): + """ShellFileOperations wired to the real local environment.""" + return ShellFileOperations(env, cwd=str(tmp_path)) + + +@pytest.fixture +def populated_dir(tmp_path): + """A temp directory with known files for search/read tests.""" + (tmp_path / "alpha.py").write_text(MULTIFILE_A) + (tmp_path / "bravo.py").write_text(MULTIFILE_B) + (tmp_path / "notes.txt").write_text(MULTIFILE_C) + (tmp_path / "data.csv").write_text("col1,col2\n1,2\n3,4\n") + return tmp_path + + +# ── _clean_shell_noise unit tests ──────────────────────────────────────── + +class TestCleanShellNoise: + def test_single_noise_line(self): + output = "bash: no job control in this shell\nhello world\n" + result = _clean_shell_noise(output) + assert result == "hello world\n" + + def test_double_noise_lines(self): + output = ( + "bash: cannot set terminal process group (-1): Inappropriate ioctl for device\n" + "bash: no job control in this shell\n" + "actual output here\n" + ) + result = _clean_shell_noise(output) + assert result == "actual output here\n" + _assert_clean(result) + + def test_tcsetattr_noise(self): + output = ( + "bash: [12345: 2 (255)] tcsetattr: Inappropriate ioctl for device\n" + "real content\n" + ) + result = _clean_shell_noise(output) + assert result == "real content\n" + _assert_clean(result) + + def test_triple_noise_lines(self): + output = ( + "bash: cannot set terminal process group (-1): Inappropriate ioctl for device\n" + "bash: no job control in this shell\n" + "bash: [999: 2 (255)] tcsetattr: Inappropriate ioctl for device\n" + "clean\n" + ) + result = _clean_shell_noise(output) + assert result == "clean\n" + + def test_no_noise_untouched(self): + assert _clean_shell_noise("hello\nworld\n") == "hello\nworld\n" + + def test_empty_string(self): + assert _clean_shell_noise("") == "" + + def test_only_noise_produces_empty(self): + output = "bash: no job control in this shell\n" + result = _clean_shell_noise(output) + _assert_clean(result) + + def test_noise_in_middle_not_stripped(self): + """Only LEADING noise is stripped -- noise in the middle is real output.""" + output = "real\nbash: no job control in this shell\nmore real\n" + result = _clean_shell_noise(output) + assert result == output + + +# ── LocalEnvironment.execute() ─────────────────────────────────────────── + +class TestLocalEnvironmentExecute: + def test_echo_exact_output(self, env): + result = env.execute("echo DETERMINISTIC_OUTPUT_12345") + assert result["returncode"] == 0 + assert result["output"].strip() == "DETERMINISTIC_OUTPUT_12345" + _assert_clean(result["output"]) + + def test_printf_no_trailing_newline(self, env): + result = env.execute("printf 'exact'") + assert result["returncode"] == 0 + assert result["output"] == "exact" + _assert_clean(result["output"]) + + def test_exit_code_propagated(self, env): + result = env.execute("exit 42") + assert result["returncode"] == 42 + + def test_stderr_captured_in_output(self, env): + result = env.execute("echo STDERR_TEST >&2") + assert "STDERR_TEST" in result["output"] + _assert_clean(result["output"]) + + def test_cwd_respected(self, env, tmp_path): + subdir = tmp_path / "subdir_test" + subdir.mkdir() + result = env.execute("pwd", cwd=str(subdir)) + assert result["returncode"] == 0 + assert result["output"].strip() == str(subdir) + _assert_clean(result["output"]) + + def test_multiline_exact(self, env): + result = env.execute("echo AAA; echo BBB; echo CCC") + lines = [l for l in result["output"].strip().split("\n") if l.strip()] + assert lines == ["AAA", "BBB", "CCC"] + _assert_clean(result["output"]) + + def test_env_var_home(self, env): + result = env.execute("echo $HOME") + assert result["returncode"] == 0 + home = result["output"].strip() + assert home == str(Path.home()) + _assert_clean(result["output"]) + + def test_pipe_exact(self, env): + result = env.execute("echo 'one two three' | wc -w") + assert result["returncode"] == 0 + assert result["output"].strip() == "3" + _assert_clean(result["output"]) + + def test_cat_deterministic_content(self, env, tmp_path): + f = tmp_path / "det.txt" + f.write_text(SIMPLE_CONTENT) + result = env.execute(f"cat {f}") + assert result["returncode"] == 0 + assert result["output"] == SIMPLE_CONTENT + _assert_clean(result["output"]) + + +# ── _has_command ───────────────────────────────────────────────────────── + +class TestHasCommand: + def test_finds_echo(self, ops): + assert ops._has_command("echo") is True + + def test_finds_cat(self, ops): + assert ops._has_command("cat") is True + + def test_finds_sed(self, ops): + assert ops._has_command("sed") is True + + def test_finds_wc(self, ops): + assert ops._has_command("wc") is True + + def test_finds_find(self, ops): + assert ops._has_command("find") is True + + def test_missing_command(self, ops): + assert ops._has_command("nonexistent_tool_xyz_abc_999") is False + + def test_rg_or_grep_available(self, ops): + assert ops._has_command("rg") or ops._has_command("grep"), \ + "Neither rg nor grep found -- search_files will break" + + +# ── read_file ──────────────────────────────────────────────────────────── + +class TestReadFile: + def test_exact_content(self, ops, tmp_path): + f = tmp_path / "exact.txt" + f.write_text(SIMPLE_CONTENT) + result = ops.read_file(str(f)) + assert result.error is None + # Content has line numbers prepended, check the actual text is there + assert "alpha" in result.content + assert "bravo" in result.content + assert "charlie" in result.content + assert result.total_lines == 3 + _assert_clean(result.content) + + def test_absolute_path(self, ops, tmp_path): + f = tmp_path / "abs.txt" + f.write_text("ABSOLUTE_PATH_CONTENT\n") + result = ops.read_file(str(f)) + assert result.error is None + assert "ABSOLUTE_PATH_CONTENT" in result.content + _assert_clean(result.content) + + def test_tilde_expansion(self, ops): + test_path = Path.home() / ".hermes_test_tilde_9f8a7b" + try: + test_path.write_text("TILDE_EXPANSION_OK\n") + result = ops.read_file("~/.hermes_test_tilde_9f8a7b") + assert result.error is None + assert "TILDE_EXPANSION_OK" in result.content + _assert_clean(result.content) + finally: + test_path.unlink(missing_ok=True) + + def test_nonexistent_returns_error(self, ops, tmp_path): + result = ops.read_file(str(tmp_path / "ghost.txt")) + assert result.error is not None + + def test_pagination_exact_window(self, ops, tmp_path): + f = tmp_path / "numbered.txt" + f.write_text(NUMBERED_CONTENT) + result = ops.read_file(str(f), offset=10, limit=5) + assert result.error is None + assert "LINE_0010" in result.content + assert "LINE_0014" in result.content + assert "LINE_0009" not in result.content + assert "LINE_0015" not in result.content + assert result.total_lines == 50 + _assert_clean(result.content) + + def test_no_noise_in_content(self, ops, tmp_path): + f = tmp_path / "noise_check.txt" + f.write_text("ONLY_THIS_CONTENT\n") + result = ops.read_file(str(f)) + assert result.error is None + _assert_clean(result.content) + + +# ── write_file ─────────────────────────────────────────────────────────── + +class TestWriteFile: + def test_write_and_verify(self, ops, tmp_path): + path = str(tmp_path / "written.txt") + result = ops.write_file(path, SIMPLE_CONTENT) + assert result.error is None + assert result.bytes_written == len(SIMPLE_CONTENT.encode()) + assert Path(path).read_text() == SIMPLE_CONTENT + + def test_creates_nested_dirs(self, ops, tmp_path): + path = str(tmp_path / "a" / "b" / "c" / "deep.txt") + result = ops.write_file(path, "DEEP_CONTENT\n") + assert result.error is None + assert result.dirs_created is True + assert Path(path).read_text() == "DEEP_CONTENT\n" + + def test_overwrites_exact(self, ops, tmp_path): + path = str(tmp_path / "overwrite.txt") + Path(path).write_text("OLD_DATA\n") + result = ops.write_file(path, "NEW_DATA\n") + assert result.error is None + assert Path(path).read_text() == "NEW_DATA\n" + + def test_large_content_via_stdin(self, ops, tmp_path): + path = str(tmp_path / "large.txt") + content = "X" * 200_000 + "\n" + result = ops.write_file(path, content) + assert result.error is None + assert Path(path).read_text() == content + + def test_special_characters_preserved(self, ops, tmp_path): + path = str(tmp_path / "special.txt") + result = ops.write_file(path, SPECIAL_CONTENT) + assert result.error is None + assert Path(path).read_text() == SPECIAL_CONTENT + + def test_roundtrip_read_write(self, ops, tmp_path): + """Write -> read back -> verify exact match.""" + path = str(tmp_path / "roundtrip.txt") + ops.write_file(path, SIMPLE_CONTENT) + result = ops.read_file(path) + assert result.error is None + assert "alpha" in result.content + assert "charlie" in result.content + _assert_clean(result.content) + + +# ── patch_replace ──────────────────────────────────────────────────────── + +class TestPatchReplace: + def test_exact_replacement(self, ops, tmp_path): + path = str(tmp_path / "patch.txt") + Path(path).write_text("hello world\n") + result = ops.patch_replace(path, "world", "earth") + assert result.error is None + assert Path(path).read_text() == "hello earth\n" + + def test_not_found_error(self, ops, tmp_path): + path = str(tmp_path / "patch2.txt") + Path(path).write_text("hello\n") + result = ops.patch_replace(path, "NONEXISTENT_STRING", "replacement") + assert result.error is not None + assert "Could not find" in result.error + + def test_multiline_patch(self, ops, tmp_path): + path = str(tmp_path / "multi.txt") + Path(path).write_text("line1\nline2\nline3\n") + result = ops.patch_replace(path, "line2", "REPLACED") + assert result.error is None + assert Path(path).read_text() == "line1\nREPLACED\nline3\n" + + +# ── search ─────────────────────────────────────────────────────────────── + +class TestSearch: + def test_content_search_finds_exact_match(self, ops, populated_dir): + result = ops.search("func_alpha", str(populated_dir), target="content") + assert result.error is None + assert result.total_count >= 1 + assert any("func_alpha" in m.content for m in result.matches) + for m in result.matches: + _assert_clean(m.content) + _assert_clean(m.path) + + def test_content_search_no_false_positives(self, ops, populated_dir): + result = ops.search("ZZZZZ_NONEXISTENT", str(populated_dir), target="content") + assert result.error is None + assert result.total_count == 0 + assert len(result.matches) == 0 + + def test_file_search_finds_py_files(self, ops, populated_dir): + result = ops.search("*.py", str(populated_dir), target="files") + assert result.error is None + assert result.total_count >= 2 + # Verify only expected files appear + found_names = set() + for f in result.files: + name = Path(f).name + found_names.add(name) + _assert_clean(f) + assert "alpha.py" in found_names + assert "bravo.py" in found_names + assert "notes.txt" not in found_names + + def test_file_search_no_false_file_entries(self, ops, populated_dir): + """Every entry in the files list must be a real path, not noise.""" + result = ops.search("*.py", str(populated_dir), target="files") + assert result.error is None + for f in result.files: + _assert_clean(f) + assert Path(f).exists(), f"Search returned non-existent path: {f}" + + def test_content_search_with_glob_filter(self, ops, populated_dir): + result = ops.search("return", str(populated_dir), target="content", file_glob="*.py") + assert result.error is None + for m in result.matches: + assert m.path.endswith(".py"), f"Non-py file in results: {m.path}" + _assert_clean(m.content) + _assert_clean(m.path) + + def test_search_output_has_zero_noise(self, ops, populated_dir): + """Dedicated noise check: search must return only real content.""" + result = ops.search("func", str(populated_dir), target="content") + assert result.error is None + for m in result.matches: + _assert_clean(m.content) + _assert_clean(m.path) + + +# ── _expand_path ───────────────────────────────────────────────────────── + +class TestExpandPath: + def test_tilde_exact(self, ops): + result = ops._expand_path("~/test.txt") + expected = f"{Path.home()}/test.txt" + assert result == expected + _assert_clean(result) + + def test_absolute_unchanged(self, ops): + assert ops._expand_path("/tmp/test.txt") == "/tmp/test.txt" + + def test_relative_unchanged(self, ops): + assert ops._expand_path("relative/path.txt") == "relative/path.txt" + + def test_bare_tilde(self, ops): + result = ops._expand_path("~") + assert result == str(Path.home()) + _assert_clean(result) + + +# ── Terminal output cleanliness ────────────────────────────────────────── + +class TestTerminalOutputCleanliness: + """Every command the agent might run must produce noise-free output.""" + + def test_echo(self, env): + result = env.execute("echo CLEAN_TEST") + assert result["output"].strip() == "CLEAN_TEST" + _assert_clean(result["output"]) + + def test_cat(self, env, tmp_path): + f = tmp_path / "cat_test.txt" + f.write_text("CAT_CONTENT_EXACT\n") + result = env.execute(f"cat {f}") + assert result["output"] == "CAT_CONTENT_EXACT\n" + _assert_clean(result["output"]) + + def test_ls(self, env, tmp_path): + (tmp_path / "file_a.txt").write_text("") + (tmp_path / "file_b.txt").write_text("") + result = env.execute(f"ls {tmp_path}") + _assert_clean(result["output"]) + assert "file_a.txt" in result["output"] + assert "file_b.txt" in result["output"] + + def test_wc(self, env, tmp_path): + f = tmp_path / "wc_test.txt" + f.write_text("one\ntwo\nthree\n") + result = env.execute(f"wc -l < {f}") + assert result["output"].strip() == "3" + _assert_clean(result["output"]) + + def test_head(self, env, tmp_path): + f = tmp_path / "head_test.txt" + f.write_text(NUMBERED_CONTENT) + result = env.execute(f"head -n 3 {f}") + expected = "LINE_0001\nLINE_0002\nLINE_0003\n" + assert result["output"] == expected + _assert_clean(result["output"]) + + def test_env_var_expansion(self, env): + result = env.execute("echo $HOME") + assert result["output"].strip() == str(Path.home()) + _assert_clean(result["output"]) + + def test_command_substitution(self, env): + result = env.execute("echo $(echo NESTED)") + assert result["output"].strip() == "NESTED" + _assert_clean(result["output"]) + + def test_command_v_detection(self, env): + """This is how _has_command works -- must return clean 'yes'.""" + result = env.execute("command -v cat >/dev/null 2>&1 && echo 'yes'") + assert result["output"].strip() == "yes" + _assert_clean(result["output"]) diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index ad308c2e4..c960cc36c 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -77,6 +77,85 @@ def _strip_blocked_tools(toolsets: List[str]) -> List[str]: return [t for t in toolsets if t not in blocked_toolset_names] +def _build_child_progress_callback(task_index: int, parent_agent, task_count: int = 1) -> Optional[callable]: + """Build a callback that relays child agent tool calls to the parent display. + + Two display paths: + CLI: prints tree-view lines above the parent's delegation spinner + Gateway: batches tool names and relays to parent's progress callback + + Returns None if no display mechanism is available, in which case the + child agent runs with no progress callback (identical to current behavior). + """ + spinner = getattr(parent_agent, '_delegate_spinner', None) + parent_cb = getattr(parent_agent, 'tool_progress_callback', None) + + if not spinner and not parent_cb: + return None # No display → no callback → zero behavior change + + # Show 1-indexed prefix only in batch mode (multiple tasks) + prefix = f"[{task_index + 1}] " if task_count > 1 else "" + + # Gateway: batch tool names, flush periodically + _BATCH_SIZE = 5 + _batch: List[str] = [] + + def _callback(tool_name: str, preview: str = None): + # Special "_thinking" event: model produced text content (reasoning) + if tool_name == "_thinking": + if spinner: + short = (preview[:55] + "...") if preview and len(preview) > 55 else (preview or "") + try: + spinner.print_above(f" {prefix}├─ 💭 \"{short}\"") + except Exception: + pass + # Don't relay thinking to gateway (too noisy for chat) + return + + # Regular tool call event + if spinner: + short = (preview[:35] + "...") if preview and len(preview) > 35 else (preview or "") + tool_emojis = { + "terminal": "💻", "web_search": "🔍", "web_extract": "📄", + "read_file": "📖", "write_file": "✍️", "patch": "🔧", + "search_files": "🔎", "list_directory": "📂", + "browser_navigate": "🌐", "browser_click": "👆", + "text_to_speech": "🔊", "image_generate": "🎨", + "vision_analyze": "👁️", "process": "⚙️", + } + emoji = tool_emojis.get(tool_name, "⚡") + line = f" {prefix}├─ {emoji} {tool_name}" + if short: + line += f" \"{short}\"" + try: + spinner.print_above(line) + except Exception: + pass + + if parent_cb: + _batch.append(tool_name) + if len(_batch) >= _BATCH_SIZE: + summary = ", ".join(_batch) + try: + parent_cb("subagent_progress", f"🔀 {prefix}{summary}") + except Exception: + pass + _batch.clear() + + def _flush(): + """Flush remaining batched tool names to gateway on completion.""" + if parent_cb and _batch: + summary = ", ".join(_batch) + try: + parent_cb("subagent_progress", f"🔀 {prefix}{summary}") + except Exception: + pass + _batch.clear() + + _callback._flush = _flush + return _callback + + def _run_single_child( task_index: int, goal: str, @@ -85,6 +164,7 @@ def _run_single_child( model: Optional[str], max_iterations: int, parent_agent, + task_count: int = 1, ) -> Dict[str, Any]: """ Spawn and run a single child agent. Called from within a thread. @@ -98,37 +178,21 @@ def _run_single_child( child_prompt = _build_child_system_prompt(goal, context) - # Build a progress callback that surfaces subagent tool activity. - # CLI: updates the parent's delegate spinner text. - # Gateway: forwards to the parent's progress callback (feeds message queue). - parent_progress_cb = getattr(parent_agent, 'tool_progress_callback', None) - def _child_progress(tool_name: str, preview: str = None): - tag = f"[subagent-{task_index+1}] {tool_name}" - # Update CLI spinner - spinner = getattr(parent_agent, '_delegate_spinner', None) - if spinner: - detail = f'"{preview}"' if preview else "" - try: - spinner.update_text(f"🔀 {tag} {detail}") - except Exception: - pass - # Forward to gateway progress queue - if parent_progress_cb: - try: - parent_progress_cb(tag, preview) - except Exception: - pass - try: - # Extract parent's API key so subagents inherit auth (e.g. Nous Portal) - parent_api_key = None - if hasattr(parent_agent, '_client_kwargs'): + # Extract parent's API key so subagents inherit auth (e.g. Nous Portal). + parent_api_key = getattr(parent_agent, "api_key", None) + if (not parent_api_key) and hasattr(parent_agent, "_client_kwargs"): parent_api_key = parent_agent._client_kwargs.get("api_key") + # Build progress callback to relay tool calls to parent display + child_progress_cb = _build_child_progress_callback(task_index, parent_agent, task_count) + child = AIAgent( base_url=parent_agent.base_url, api_key=parent_api_key, model=model or parent_agent.model, + provider=getattr(parent_agent, "provider", None), + api_mode=getattr(parent_agent, "api_mode", None), max_iterations=max_iterations, enabled_toolsets=child_toolsets, quiet_mode=True, @@ -143,7 +207,7 @@ def _run_single_child( providers_ignored=parent_agent.providers_ignored, providers_order=parent_agent.providers_order, provider_sort=parent_agent.provider_sort, - tool_progress_callback=_child_progress, + tool_progress_callback=child_progress_cb, ) # Set delegation depth so children can't spawn grandchildren @@ -158,6 +222,13 @@ def _run_single_child( with contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(devnull): result = child.run_conversation(user_message=goal) + # Flush any remaining batched progress to gateway + if child_progress_cb and hasattr(child_progress_cb, '_flush'): + try: + child_progress_cb._flush() + except Exception: + pass + duration = round(time.monotonic() - child_start, 2) summary = result.get("final_response") or "" @@ -275,6 +346,7 @@ def delegate_task( model=model, max_iterations=effective_max_iter, parent_agent=parent_agent, + task_count=1, ) results.append(result) else: @@ -299,6 +371,7 @@ def delegate_task( model=model, max_iterations=effective_max_iter, parent_agent=parent_agent, + task_count=n_tasks, ) futures[future] = i @@ -318,14 +391,21 @@ def delegate_task( results.append(entry) completed_count += 1 - # Print per-task completion line (visible in CLI via patch_stdout) + # Print per-task completion line above the spinner idx = entry["task_index"] label = task_labels[idx] if idx < len(task_labels) else f"Task {idx}" dur = entry.get("duration_seconds", 0) status = entry.get("status", "?") icon = "✓" if status == "completed" else "✗" remaining = n_tasks - completed_count - print(f" {icon} [{idx+1}/{n_tasks}] {label} ({dur}s)") + completion_line = f"{icon} [{idx+1}/{n_tasks}] {label} ({dur}s)" + if spinner_ref: + try: + spinner_ref.print_above(completion_line) + except Exception: + print(f" {completion_line}") + else: + print(f" {completion_line}") # Update spinner text to show remaining count if spinner_ref and remaining > 0: diff --git a/tools/environments/local.py b/tools/environments/local.py index 6d7e8da3c..5b70a2707 100644 --- a/tools/environments/local.py +++ b/tools/environments/local.py @@ -11,20 +11,26 @@ from tools.environments.base import BaseEnvironment # Noise lines emitted by interactive shells when stdin is not a terminal. # Filtered from output to keep tool results clean. -_SHELL_NOISE = frozenset({ +_SHELL_NOISE_SUBSTRINGS = ( + "bash: cannot set terminal process group", "bash: no job control in this shell", - "bash: no job control in this shell\n", "no job control in this shell", - "no job control in this shell\n", -}) + "cannot set terminal process group", + "tcsetattr: Inappropriate ioctl for device", +) def _clean_shell_noise(output: str) -> str: - """Strip shell startup warnings that leak when using -i without a TTY.""" - lines = output.split("\n", 2) # only check first two lines - if lines and lines[0].strip() in _SHELL_NOISE: - return "\n".join(lines[1:]) - return output + """Strip shell startup warnings that leak when using -i without a TTY. + + Removes all leading lines that match known noise patterns, not just the first. + Some environments emit multiple noise lines (e.g. Docker, non-TTY sessions). + """ + lines = output.split("\n") + # Strip all leading noise lines + while lines and any(noise in lines[0] for noise in _SHELL_NOISE_SUBSTRINGS): + lines.pop(0) + return "\n".join(lines) class LocalEnvironment(BaseEnvironment): diff --git a/tools/openrouter_client.py b/tools/openrouter_client.py index 7d30e6eec..343cf1021 100644 --- a/tools/openrouter_client.py +++ b/tools/openrouter_client.py @@ -31,7 +31,7 @@ def get_async_client() -> AsyncOpenAI: default_headers={ "HTTP-Referer": "https://github.com/NousResearch/hermes-agent", "X-OpenRouter-Title": "Hermes Agent", - "X-OpenRouter-Categories": "cli-agent", + "X-OpenRouter-Categories": "productivity,cli-agent", }, ) return _client diff --git a/tools/process_registry.py b/tools/process_registry.py index bfdb8cd1d..cbc0dd853 100644 --- a/tools/process_registry.py +++ b/tools/process_registry.py @@ -87,13 +87,13 @@ class ProcessRegistry: - Cleanup thread (sandbox reaping coordination) """ - # Noise lines emitted by interactive shells when stdin is not a terminal. - _SHELL_NOISE = frozenset({ + _SHELL_NOISE_SUBSTRINGS = ( + "bash: cannot set terminal process group", "bash: no job control in this shell", - "bash: no job control in this shell\n", "no job control in this shell", - "no job control in this shell\n", - }) + "cannot set terminal process group", + "tcsetattr: Inappropriate ioctl for device", + ) def __init__(self): self._running: Dict[str, ProcessSession] = {} @@ -106,10 +106,10 @@ class ProcessRegistry: @staticmethod def _clean_shell_noise(text: str) -> str: """Strip shell startup warnings from the beginning of output.""" - lines = text.split("\n", 2) - if lines and lines[0].strip() in ProcessRegistry._SHELL_NOISE: - return "\n".join(lines[1:]) - return text + lines = text.split("\n") + while lines and any(noise in lines[0] for noise in ProcessRegistry._SHELL_NOISE_SUBSTRINGS): + lines.pop(0) + return "\n".join(lines) # ----- Spawn ----- diff --git a/tools/session_search_tool.py b/tools/session_search_tool.py index bcfbfdf2a..b11b79fda 100644 --- a/tools/session_search_tool.py +++ b/tools/session_search_tool.py @@ -24,26 +24,13 @@ from typing import Dict, Any, List, Optional from openai import AsyncOpenAI, OpenAI -from agent.auxiliary_client import get_text_auxiliary_client +from agent.auxiliary_client import get_async_text_auxiliary_client -# Resolve the auxiliary client at import time so we have the model slug. -# We build an AsyncOpenAI from the same credentials for async summarization. -_aux_client, _SUMMARIZER_MODEL = get_text_auxiliary_client() -_async_aux_client: AsyncOpenAI | None = None -if _aux_client is not None: - _async_kwargs = { - "api_key": _aux_client.api_key, - "base_url": str(_aux_client.base_url), - } - if "openrouter" in str(_aux_client.base_url).lower(): - _async_kwargs["default_headers"] = { - "HTTP-Referer": "https://github.com/NousResearch/hermes-agent", - "X-OpenRouter-Title": "Hermes Agent", - "X-OpenRouter-Categories": "cli-agent", - } - _async_aux_client = AsyncOpenAI(**_async_kwargs) +# Resolve the async auxiliary client at import time so we have the model slug. +# Handles Codex Responses API adapter transparently. +_async_aux_client, _SUMMARIZER_MODEL = get_async_text_auxiliary_client() MAX_SESSION_CHARS = 100_000 -MAX_SUMMARY_TOKENS = 2000 +MAX_SUMMARY_TOKENS = 10000 def _format_timestamp(ts) -> str: diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index cb0d9cd4e..f758768eb 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -1037,8 +1037,12 @@ def terminal_tool( ) output = output[:head_chars] + truncated_notice + output[-tail_chars:] + # Redact secrets from command output (catches env/printenv leaking keys) + from agent.redact import redact_sensitive_text + output = redact_sensitive_text(output.strip()) if output else "" + return json.dumps({ - "output": output.strip() if output else "", + "output": output, "exit_code": returncode, "error": None }, ensure_ascii=False) diff --git a/tools/vision_tools.py b/tools/vision_tools.py index 39413d5b0..f3744e95f 100644 --- a/tools/vision_tools.py +++ b/tools/vision_tools.py @@ -54,7 +54,7 @@ if _aux_sync_client is not None: _async_kwargs["default_headers"] = { "HTTP-Referer": "https://github.com/NousResearch/hermes-agent", "X-OpenRouter-Title": "Hermes Agent", - "X-OpenRouter-Categories": "cli-agent", + "X-OpenRouter-Categories": "productivity,cli-agent", } _aux_async_client = AsyncOpenAI(**_async_kwargs) diff --git a/tools/web_tools.py b/tools/web_tools.py index 0e5baaa29..541404e6d 100644 --- a/tools/web_tools.py +++ b/tools/web_tools.py @@ -48,7 +48,7 @@ import asyncio from typing import List, Dict, Any, Optional from firecrawl import Firecrawl from openai import AsyncOpenAI -from agent.auxiliary_client import get_text_auxiliary_client +from agent.auxiliary_client import get_async_text_auxiliary_client from tools.debug_helpers import DebugSession logger = logging.getLogger(__name__) @@ -67,21 +67,9 @@ def _get_firecrawl_client(): DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000 -# Resolve auxiliary text client at module level; build an async wrapper. -_aux_sync_client, DEFAULT_SUMMARIZER_MODEL = get_text_auxiliary_client() -_aux_async_client: AsyncOpenAI | None = None -if _aux_sync_client is not None: - _async_kwargs = { - "api_key": _aux_sync_client.api_key, - "base_url": str(_aux_sync_client.base_url), - } - if "openrouter" in str(_aux_sync_client.base_url).lower(): - _async_kwargs["default_headers"] = { - "HTTP-Referer": "https://github.com/NousResearch/hermes-agent", - "X-OpenRouter-Title": "Hermes Agent", - "X-OpenRouter-Categories": "cli-agent", - } - _aux_async_client = AsyncOpenAI(**_async_kwargs) +# Resolve async auxiliary client at module level. +# Handles Codex Responses API adapter transparently. +_aux_async_client, DEFAULT_SUMMARIZER_MODEL = get_async_text_auxiliary_client() _debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG") @@ -174,7 +162,7 @@ async def _call_summarizer_llm( content: str, context_str: str, model: str, - max_tokens: int = 4000, + max_tokens: int = 20000, is_chunk: bool = False, chunk_info: str = "" ) -> Optional[str]: @@ -306,7 +294,7 @@ async def _process_large_content_chunked( chunk_content, context_str, model, - max_tokens=2000, + max_tokens=10000, is_chunk=True, chunk_info=chunk_info ) @@ -374,7 +362,7 @@ Create a single, unified markdown summary.""" {"role": "user", "content": synthesis_prompt} ], temperature=0.1, - **auxiliary_max_tokens_param(4000), + **auxiliary_max_tokens_param(20000), **({} if not _extra else {"extra_body": _extra}), ) final_summary = response.choices[0].message.content.strip()