diff --git a/.env.example b/.env.example index e43f5a9b6..c4c684cde 100644 --- a/.env.example +++ b/.env.example @@ -24,10 +24,14 @@ GLM_API_KEY= # ============================================================================= # LLM PROVIDER (Kimi / Moonshot) # ============================================================================= -# Kimi/Moonshot provides access to Moonshot AI coding models -# Get your key at: https://platform.moonshot.ai +# Kimi Code provides access to Moonshot AI coding models (kimi-k2.5, etc.) +# Get your key at: https://platform.kimi.ai (Kimi Code console) +# Keys prefixed sk-kimi- use the Kimi Code API (api.kimi.com) by default. +# Legacy keys from platform.moonshot.ai need KIMI_BASE_URL override below. KIMI_API_KEY= -# KIMI_BASE_URL=https://api.moonshot.ai/v1 # Override default base URL +# KIMI_BASE_URL=https://api.kimi.com/coding/v1 # Default for sk-kimi- keys +# KIMI_BASE_URL=https://api.moonshot.ai/v1 # For legacy Moonshot keys +# KIMI_BASE_URL=https://api.moonshot.cn/v1 # For Moonshot China keys # ============================================================================= # LLM PROVIDER (MiniMax) diff --git a/.gitignore b/.gitignore index af9d9e750..78a382942 100644 --- a/.gitignore +++ b/.gitignore @@ -47,4 +47,5 @@ cli-config.yaml # Skills Hub state (lives in ~/.hermes/skills/.hub/ at runtime, but just in case) skills/.hub/ -ignored/ \ No newline at end of file +ignored/ +.worktrees/ diff --git a/AGENTS.md b/AGENTS.md index 2fb9b0989..a7318fd33 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -58,6 +58,7 @@ hermes-agent/ ├── skills/ # Bundled skill sources ├── optional-skills/ # Official optional skills (not activated by default) ├── cli.py # Interactive CLI orchestrator (HermesCLI class) +├── hermes_state.py # SessionDB — SQLite session store (schema, titles, FTS5 search) ├── run_agent.py # AIAgent class (core conversation loop) ├── model_tools.py # Tool orchestration (thin layer over tools/registry.py) ├── toolsets.py # Tool groupings @@ -98,7 +99,7 @@ The main agent is implemented in `run_agent.py`: class AIAgent: def __init__( self, - model: str = "anthropic/claude-sonnet-4", + model: str = "anthropic/claude-sonnet-4.6", api_key: str = None, base_url: str = "https://openrouter.ai/api/v1", max_iterations: int = 60, # Max tool-calling loops @@ -226,6 +227,10 @@ The unified `hermes` command provides all functionality: |---------|-------------| | `hermes` | Interactive chat (default) | | `hermes chat -q "..."` | Single query mode | +| `hermes -c` / `hermes --continue` | Resume the most recent session | +| `hermes -c "my project"` | Resume a session by name (latest in lineage) | +| `hermes --resume ` | Resume a specific session by ID or title | +| `hermes -w` / `hermes --worktree` | Start in isolated git worktree (for parallel agents) | | `hermes setup` | Configure API keys and settings | | `hermes config` | View current configuration | | `hermes config edit` | Open config in editor | @@ -239,6 +244,8 @@ The unified `hermes` command provides all functionality: | `hermes gateway` | Start gateway (messaging + cron scheduler) | | `hermes gateway setup` | Configure messaging platforms interactively | | `hermes gateway install` | Install gateway as system service | +| `hermes sessions list` | List past sessions (title, preview, last active) | +| `hermes sessions rename ` | Rename/title a session | | `hermes cron list` | View scheduled jobs | | `hermes cron status` | Check if cron scheduler is running | | `hermes version` | Show version info | @@ -678,6 +685,28 @@ Key files: --- +## Known Pitfalls + +### DO NOT use `simple_term_menu` for interactive menus + +`simple_term_menu` has rendering bugs in tmux, iTerm2, and other non-standard terminals. When the user scrolls with arrow keys, previously highlighted items "ghost" — duplicating upward and corrupting the display. This happens because the library uses ANSI cursor-up codes to redraw in place, and tmux/iTerm miscalculate positions when the menu is near the bottom of the viewport. + +**Rule:** All interactive menus in `hermes_cli/` must use `curses` (Python stdlib) instead. See `tools_config.py` for the pattern — both `_prompt_choice()` (single-select) and `_prompt_toolset_checklist()` (multi-select with space toggle) use `curses.wrapper()`. The numbered-input fallback handles Windows where curses isn't available. + +### DO NOT use `\033[K` (ANSI erase-to-EOL) in spinner/display code + +The ANSI escape `\033[K` leaks as literal `?[K` text when `prompt_toolkit`'s `patch_stdout` is active. Use space-padding instead to clear lines: `f"\r{line}{' ' * pad}"`. See `agent/display.py` `KawaiiSpinner`. + +### `_last_resolved_tool_names` is a process-global in `model_tools.py` + +The `execute_code` sandbox uses `_last_resolved_tool_names` (set by `get_tool_definitions()`) to decide which tool stubs to generate. When subagents run with restricted toolsets, they overwrite this global. After delegation returns to the parent, `execute_code` may see the child's restricted list instead of the parent's full list. This is a known bug — `execute_code` calls after delegation may fail with `ImportError: cannot import name 'patch' from 'hermes_tools'`. + +### Tests must not write to `~/.hermes/` + +The `autouse` fixture `_isolate_hermes_home` in `tests/conftest.py` redirects `HERMES_HOME` to a temp dir. Every test runs in isolation. If you add a test that creates `AIAgent` instances or writes session logs, the fixture handles cleanup automatically. Never hardcode `~/.hermes/` paths in tests. + +--- + ## Testing Changes After making changes: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9679d79d1..6ed6c833e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -118,7 +118,7 @@ hermes-agent/ ├── cli.py # HermesCLI class — interactive TUI, prompt_toolkit integration ├── model_tools.py # Tool orchestration (thin layer over tools/registry.py) ├── toolsets.py # Tool groupings and presets (hermes-cli, hermes-telegram, etc.) -├── hermes_state.py # SQLite session database with FTS5 full-text search +├── hermes_state.py # SQLite session database with FTS5 full-text search, session titles ├── batch_runner.py # Parallel batch processing for trajectory generation │ ├── agent/ # Agent internals (extracted modules) @@ -218,7 +218,7 @@ User message → AIAgent._run_agent_loop() - **Self-registering tools**: Each tool file calls `registry.register()` at import time. `model_tools.py` triggers discovery by importing all tool modules. - **Toolset grouping**: Tools are grouped into toolsets (`web`, `terminal`, `file`, `browser`, etc.) that can be enabled/disabled per platform. -- **Session persistence**: All conversations are stored in SQLite (`hermes_state.py`) with full-text search. JSON logs go to `~/.hermes/sessions/`. +- **Session persistence**: All conversations are stored in SQLite (`hermes_state.py`) with full-text search and unique session titles. JSON logs go to `~/.hermes/sessions/`. - **Ephemeral injection**: System prompts and prefill messages are injected at API call time, never persisted to the database or logs. - **Provider abstraction**: The agent works with any OpenAI-compatible API. Provider resolution happens at init time (Nous Portal OAuth, OpenRouter API key, or custom endpoint). - **Provider routing**: When using OpenRouter, `provider_routing` in config.yaml controls provider selection (sort by throughput/latency/price, allow/ignore specific providers, data retention policies). These are injected as `extra_body.provider` in API requests. diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000..75410e733 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Nous Research + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 21510cbfa..841bb6166 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -317,14 +317,22 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]: if not api_key: continue # Resolve base URL (with optional env-var override) - base_url = pconfig.inference_base_url + # Kimi Code keys (sk-kimi-) need api.kimi.com/coding/v1 + env_url = "" if pconfig.base_url_env_var: env_url = os.getenv(pconfig.base_url_env_var, "").strip() - if env_url: - base_url = env_url.rstrip("/") + if env_url: + base_url = env_url.rstrip("/") + elif provider_id == "kimi-coding" and api_key.startswith("sk-kimi-"): + base_url = "https://api.kimi.com/coding/v1" + else: + base_url = pconfig.inference_base_url model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id, "default") logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model) - return OpenAI(api_key=api_key, base_url=base_url), model + extra = {} + if "api.kimi.com" in base_url.lower(): + extra["default_headers"] = {"User-Agent": "KimiCLI/1.0"} + return OpenAI(api_key=api_key, base_url=base_url, **extra), model return None, None @@ -403,6 +411,8 @@ def get_async_text_auxiliary_client(): } if "openrouter" in str(sync_client.base_url).lower(): async_kwargs["default_headers"] = dict(_OR_HEADERS) + elif "api.kimi.com" in str(sync_client.base_url).lower(): + async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"} return AsyncOpenAI(**async_kwargs), model diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 798536fba..35897cccd 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -7,7 +7,7 @@ protecting head and tail context. import logging import os -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional from agent.auxiliary_client import get_text_auxiliary_client from agent.model_metadata import ( @@ -82,11 +82,14 @@ class ContextCompressor: "compression_count": self.compression_count, } - def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]]) -> str: - """Generate a concise summary of conversation turns using a fast model.""" - if not self.client: - return "[CONTEXT SUMMARY]: Previous conversation turns have been compressed to save space. The assistant performed various actions and received responses." + def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]]) -> Optional[str]: + """Generate a concise summary of conversation turns. + Tries the auxiliary model first, then falls back to the user's main + model. Returns None if all attempts fail — the caller should drop + the middle turns without a summary rather than inject a useless + placeholder. + """ parts = [] for msg in turns_to_summarize: role = msg.get("role", "unknown") @@ -117,28 +120,28 @@ TURNS TO SUMMARIZE: Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.""" - try: - return self._call_summary_model(self.client, self.summary_model, prompt) - except Exception as e: - logging.warning(f"Failed to generate context summary with auxiliary model: {e}") + # 1. Try the auxiliary model (cheap/fast) + if self.client: + try: + return self._call_summary_model(self.client, self.summary_model, prompt) + except Exception as e: + logging.warning(f"Failed to generate context summary with auxiliary model: {e}") - # Fallback: try the main model's endpoint. This handles the common - # case where the user switched providers (e.g. OpenRouter → local LLM) - # but a stale API key causes the auxiliary client to pick the old - # provider which then fails (402, auth error, etc.). - fallback_client, fallback_model = self._get_fallback_client() - if fallback_client is not None: - try: - logger.info("Retrying context summary with fallback client (%s)", fallback_model) - summary = self._call_summary_model(fallback_client, fallback_model, prompt) - # Success — swap in the working client for future compressions - self.client = fallback_client - self.summary_model = fallback_model - return summary - except Exception as fallback_err: - logging.warning(f"Fallback summary model also failed: {fallback_err}") + # 2. Fallback: try the user's main model endpoint + fallback_client, fallback_model = self._get_fallback_client() + if fallback_client is not None: + try: + logger.info("Retrying context summary with main model (%s)", fallback_model) + summary = self._call_summary_model(fallback_client, fallback_model, prompt) + self.client = fallback_client + self.summary_model = fallback_model + return summary + except Exception as fallback_err: + logging.warning(f"Main model summary also failed: {fallback_err}") - return "[CONTEXT SUMMARY]: Previous conversation turns have been compressed. The assistant performed tool calls and received responses." + # 3. All models failed — return None so the caller drops turns without a summary + logging.warning("Context compression: no model available for summary. Middle turns will be dropped without summary.") + return None def _call_summary_model(self, client, model: str, prompt: str) -> str: """Make the actual LLM call to generate a summary. Raises on failure.""" @@ -326,25 +329,6 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.""" print(f"\n📦 Context compression triggered ({display_tokens:,} tokens ≥ {self.threshold_tokens:,} threshold)") print(f" 📊 Model context limit: {self.context_length:,} tokens ({self.threshold_percent*100:.0f}% = {self.threshold_tokens:,})") - # Truncation fallback when no auxiliary model is available - if self.client is None: - print("⚠️ Context compression: no auxiliary model available. Falling back to message truncation.") - # Keep system message(s) at the front and the protected tail; - # simply drop the oldest non-system messages until under threshold. - kept = [] - for msg in messages: - if msg.get("role") == "system": - kept.append(msg.copy()) - else: - break - tail = messages[-self.protect_last_n:] - kept.extend(m.copy() for m in tail) - self.compression_count += 1 - kept = self._sanitize_tool_pairs(kept) - if not self.quiet_mode: - print(f" ✂️ Truncated: {len(messages)} → {len(kept)} messages (dropped middle turns)") - return kept - if not self.quiet_mode: print(f" 🗜️ Summarizing turns {compress_start+1}-{compress_end} ({len(turns_to_summarize)} turns)") @@ -357,7 +341,11 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.""" msg["content"] = (msg.get("content") or "") + "\n\n[Note: Some earlier conversation turns may be summarized to preserve context space.]" compressed.append(msg) - compressed.append({"role": "user", "content": summary}) + if summary: + compressed.append({"role": "user", "content": summary}) + else: + if not self.quiet_mode: + print(" ⚠️ No summary model available — middle turns dropped without summary") for i in range(compress_end, n_messages): compressed.append(messages[i].copy()) diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index c576b55c1..c933ffe67 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -66,7 +66,8 @@ DEFAULT_AGENT_IDENTITY = ( "range of tasks including answering questions, writing and editing code, " "analyzing information, creative work, and executing actions via your tools. " "You communicate clearly, admit uncertainty when appropriate, and prioritize " - "being genuinely useful over being verbose unless otherwise directed below." + "being genuinely useful over being verbose unless otherwise directed below. " + "Be targeted and efficient in your exploration and investigations." ) MEMORY_GUIDANCE = ( @@ -102,12 +103,24 @@ PLATFORM_HINTS = { "You are on a text messaging communication platform, Telegram. " "Please do not use markdown as it does not render. " "You can send media files natively: to deliver a file to the user, " - "include MEDIA:/absolute/path/to/file in your response. Audio " - "(.ogg) sends as voice bubbles. You can also include image URLs " - "in markdown format ![alt](url) and they will be sent as native photos." + "include MEDIA:/absolute/path/to/file in your response. Images " + "(.png, .jpg, .webp) appear as photos, audio (.ogg) sends as voice " + "bubbles, and videos (.mp4) play inline. You can also include image " + "URLs in markdown format ![alt](url) and they will be sent as native photos." ), "discord": ( - "You are in a Discord server or group chat communicating with your user." + "You are in a Discord server or group chat communicating with your user. " + "You can send media files natively: include MEDIA:/absolute/path/to/file " + "in your response. Images (.png, .jpg, .webp) are sent as photo " + "attachments, audio as file attachments. You can also include image URLs " + "in markdown format ![alt](url) and they will be sent as attachments." + ), + "slack": ( + "You are in a Slack workspace communicating with your user. " + "You can send media files natively: include MEDIA:/absolute/path/to/file " + "in your response. Images (.png, .jpg, .webp) are uploaded as photo " + "attachments, audio as file attachments. You can also include image URLs " + "in markdown format ![alt](url) and they will be uploaded as attachments." ), "cli": ( "You are a CLI AI Agent. Try not to use markdown but simple text " diff --git a/batch_runner.py b/batch_runner.py index b95a5cc82..a4c402ffd 100644 --- a/batch_runner.py +++ b/batch_runner.py @@ -1112,7 +1112,7 @@ def main( batch_size: int = None, run_name: str = None, distribution: str = "default", - model: str = "anthropic/claude-sonnet-4-20250514", + model: str = "anthropic/claude-sonnet-4.6", api_key: str = None, base_url: str = "https://openrouter.ai/api/v1", max_turns: int = 10, @@ -1155,7 +1155,7 @@ def main( providers_order (str): Comma-separated list of OpenRouter providers to try in order (e.g. "anthropic,openai,google") provider_sort (str): Sort providers by "price", "throughput", or "latency" (OpenRouter only) max_tokens (int): Maximum tokens for model responses (optional, uses model default if not set) - reasoning_effort (str): OpenRouter reasoning effort level: "xhigh", "high", "medium", "low", "minimal", "none" (default: "xhigh") + reasoning_effort (str): OpenRouter reasoning effort level: "xhigh", "high", "medium", "low", "minimal", "none" (default: "medium") reasoning_disabled (bool): Completely disable reasoning/thinking tokens (default: False) prefill_messages_file (str): Path to JSON file containing prefill messages (list of {role, content} dicts) max_samples (int): Only process the first N samples from the dataset (optional, processes all if not set) @@ -1216,7 +1216,7 @@ def main( providers_order_list = [p.strip() for p in providers_order.split(",")] if providers_order else None # Build reasoning_config from CLI flags - # --reasoning_disabled takes priority, then --reasoning_effort, then default (xhigh) + # --reasoning_disabled takes priority, then --reasoning_effort, then default (medium) reasoning_config = None if reasoning_disabled: # Completely disable reasoning/thinking tokens diff --git a/cli-config.yaml.example b/cli-config.yaml.example index d8489d95b..dfbaeee6b 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -50,6 +50,16 @@ model: # # Data policy: "allow" (default) or "deny" to exclude providers that may store data # # data_collection: "deny" +# ============================================================================= +# Git Worktree Isolation +# ============================================================================= +# When enabled, each CLI session creates an isolated git worktree so multiple +# agents can work on the same repo concurrently without file collisions. +# Equivalent to always passing --worktree / -w on the command line. +# +# worktree: true # Always create a worktree when in a git repo +# worktree: false # Default — only create when -w flag is passed + # ============================================================================= # Terminal Tool Configuration # ============================================================================= @@ -285,7 +295,7 @@ agent: # Reasoning effort level (OpenRouter and Nous Portal) # Controls how much "thinking" the model does before responding. # Options: "xhigh" (max), "high", "medium", "low", "minimal", "none" (disable) - reasoning_effort: "xhigh" + reasoning_effort: "medium" # Predefined personalities (use with /personality command) personalities: diff --git a/cli.py b/cli.py index 4d1941f81..937966b05 100755 --- a/cli.py +++ b/cli.py @@ -43,7 +43,6 @@ from prompt_toolkit.layout.dimension import Dimension from prompt_toolkit.layout.menus import CompletionsMenu from prompt_toolkit.widgets import TextArea from prompt_toolkit.key_binding import KeyBindings -from prompt_toolkit.completion import Completer, Completion from prompt_toolkit import print_formatted_text as _pt_print from prompt_toolkit.formatted_text import ANSI as _PT_ANSI import threading @@ -108,7 +107,7 @@ def _parse_reasoning_config(effort: str) -> dict | None: """Parse a reasoning effort level into an OpenRouter reasoning config dict. Valid levels: "xhigh", "high", "medium", "low", "minimal", "none". - Returns None to use the default (xhigh), or a config dict to override. + Returns None to use the default (medium), or a config dict to override. """ if not effort or not effort.strip(): return None @@ -118,7 +117,7 @@ def _parse_reasoning_config(effort: str) -> dict | None: valid = ("xhigh", "high", "medium", "low", "minimal") if effort in valid: return {"enabled": True, "effort": effort} - logger.warning("Unknown reasoning_effort '%s', using default (xhigh)", effort) + logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort) return None @@ -297,6 +296,7 @@ def load_cli_config() -> Dict[str, Any]: "container_disk": "TERMINAL_CONTAINER_DISK", "container_persistent": "TERMINAL_CONTAINER_PERSISTENT", "docker_volumes": "TERMINAL_DOCKER_VOLUMES", + "sandbox_dir": "TERMINAL_SANDBOX_DIR", # Sudo support (works with all backends) "sudo_password": "SUDO_PASSWORD", } @@ -395,6 +395,228 @@ def _run_cleanup(): except Exception: pass + +# ============================================================================= +# Git Worktree Isolation (#652) +# ============================================================================= + +# Tracks the active worktree for cleanup on exit +_active_worktree: Optional[Dict[str, str]] = None + + +def _git_repo_root() -> Optional[str]: + """Return the git repo root for CWD, or None if not in a repo.""" + import subprocess + try: + result = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + capture_output=True, text=True, timeout=5, + ) + if result.returncode == 0: + return result.stdout.strip() + except Exception: + pass + return None + + +def _setup_worktree(repo_root: str = None) -> Optional[Dict[str, str]]: + """Create an isolated git worktree for this CLI session. + + Returns a dict with worktree metadata on success, None on failure. + The dict contains: path, branch, repo_root. + """ + import subprocess + + repo_root = repo_root or _git_repo_root() + if not repo_root: + print("\033[31m✗ --worktree requires being inside a git repository.\033[0m") + print(" cd into your project repo first, then run hermes -w") + return None + + short_id = uuid.uuid4().hex[:8] + wt_name = f"hermes-{short_id}" + branch_name = f"hermes/{wt_name}" + + worktrees_dir = Path(repo_root) / ".worktrees" + worktrees_dir.mkdir(parents=True, exist_ok=True) + + wt_path = worktrees_dir / wt_name + + # Ensure .worktrees/ is in .gitignore + gitignore = Path(repo_root) / ".gitignore" + _ignore_entry = ".worktrees/" + try: + existing = gitignore.read_text() if gitignore.exists() else "" + if _ignore_entry not in existing.splitlines(): + with open(gitignore, "a") as f: + if existing and not existing.endswith("\n"): + f.write("\n") + f.write(f"{_ignore_entry}\n") + except Exception as e: + logger.debug("Could not update .gitignore: %s", e) + + # Create the worktree + try: + result = subprocess.run( + ["git", "worktree", "add", str(wt_path), "-b", branch_name, "HEAD"], + capture_output=True, text=True, timeout=30, cwd=repo_root, + ) + if result.returncode != 0: + print(f"\033[31m✗ Failed to create worktree: {result.stderr.strip()}\033[0m") + return None + except Exception as e: + print(f"\033[31m✗ Failed to create worktree: {e}\033[0m") + return None + + # Copy files listed in .worktreeinclude (gitignored files the agent needs) + include_file = Path(repo_root) / ".worktreeinclude" + if include_file.exists(): + try: + for line in include_file.read_text().splitlines(): + entry = line.strip() + if not entry or entry.startswith("#"): + continue + src = Path(repo_root) / entry + dst = wt_path / entry + if src.is_file(): + dst.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(str(src), str(dst)) + elif src.is_dir(): + # Symlink directories (faster, saves disk) + if not dst.exists(): + dst.parent.mkdir(parents=True, exist_ok=True) + os.symlink(str(src.resolve()), str(dst)) + except Exception as e: + logger.debug("Error copying .worktreeinclude entries: %s", e) + + info = { + "path": str(wt_path), + "branch": branch_name, + "repo_root": repo_root, + } + + print(f"\033[32m✓ Worktree created:\033[0m {wt_path}") + print(f" Branch: {branch_name}") + + return info + + +def _cleanup_worktree(info: Dict[str, str] = None) -> None: + """Remove a worktree and its branch on exit. + + If the worktree has uncommitted changes, warn and keep it. + """ + global _active_worktree + info = info or _active_worktree + if not info: + return + + import subprocess + + wt_path = info["path"] + branch = info["branch"] + repo_root = info["repo_root"] + + if not Path(wt_path).exists(): + return + + # Check for uncommitted changes + try: + status = subprocess.run( + ["git", "status", "--porcelain"], + capture_output=True, text=True, timeout=10, cwd=wt_path, + ) + has_changes = bool(status.stdout.strip()) + except Exception: + has_changes = True # Assume dirty on error — don't delete + + if has_changes: + print(f"\n\033[33m⚠ Worktree has uncommitted changes, keeping: {wt_path}\033[0m") + print(f" To clean up manually: git worktree remove {wt_path}") + _active_worktree = None + return + + # Remove worktree + try: + subprocess.run( + ["git", "worktree", "remove", wt_path, "--force"], + capture_output=True, text=True, timeout=15, cwd=repo_root, + ) + except Exception as e: + logger.debug("Failed to remove worktree: %s", e) + + # Delete the branch (only if it was never pushed / has no upstream) + try: + subprocess.run( + ["git", "branch", "-D", branch], + capture_output=True, text=True, timeout=10, cwd=repo_root, + ) + except Exception as e: + logger.debug("Failed to delete branch %s: %s", branch, e) + + _active_worktree = None + print(f"\033[32m✓ Worktree cleaned up: {wt_path}\033[0m") + + +def _prune_stale_worktrees(repo_root: str, max_age_hours: int = 24) -> None: + """Remove worktrees older than max_age_hours that have no uncommitted changes. + + Runs silently on startup to clean up after crashed/killed sessions. + """ + import subprocess + import time + + worktrees_dir = Path(repo_root) / ".worktrees" + if not worktrees_dir.exists(): + return + + now = time.time() + cutoff = now - (max_age_hours * 3600) + + for entry in worktrees_dir.iterdir(): + if not entry.is_dir() or not entry.name.startswith("hermes-"): + continue + + # Check age + try: + mtime = entry.stat().st_mtime + if mtime > cutoff: + continue # Too recent — skip + except Exception: + continue + + # Check for uncommitted changes + try: + status = subprocess.run( + ["git", "status", "--porcelain"], + capture_output=True, text=True, timeout=5, cwd=str(entry), + ) + if status.stdout.strip(): + continue # Has changes — skip + except Exception: + continue # Can't check — skip + + # Safe to remove + try: + branch_result = subprocess.run( + ["git", "branch", "--show-current"], + capture_output=True, text=True, timeout=5, cwd=str(entry), + ) + branch = branch_result.stdout.strip() + + subprocess.run( + ["git", "worktree", "remove", str(entry), "--force"], + capture_output=True, text=True, timeout=15, cwd=repo_root, + ) + if branch: + subprocess.run( + ["git", "branch", "-D", branch], + capture_output=True, text=True, timeout=10, cwd=repo_root, + ) + logger.debug("Pruned stale worktree: %s", entry.name) + except Exception as e: + logger.debug("Failed to prune worktree %s: %s", entry.name, e) + # ============================================================================ # ASCII Art & Branding # ============================================================================ @@ -684,34 +906,6 @@ def build_welcome_banner(console: Console, model: str, cwd: str, tools: List[dic console.print(outer_panel) -# ============================================================================ -# CLI Commands -# ============================================================================ - -COMMANDS = { - "/help": "Show this help message", - "/tools": "List available tools", - "/toolsets": "List available toolsets", - "/model": "Show or change the current model", - "/prompt": "View/set custom system prompt", - "/personality": "Set a predefined personality", - "/clear": "Clear screen and reset conversation (fresh start)", - "/history": "Show conversation history", - "/new": "Start a new conversation (reset history)", - "/reset": "Reset conversation only (keep screen)", - "/retry": "Retry the last message (resend to agent)", - "/undo": "Remove the last user/assistant exchange", - "/save": "Save the current conversation", - "/config": "Show current configuration", - "/cron": "Manage scheduled tasks (list, add, remove)", - "/skills": "Search, install, inspect, or manage skills from online registries", - "/platforms": "Show gateway/messaging platform status", - "/paste": "Check clipboard for an image and attach it", - "/reload-mcp": "Reload MCP servers from config.yaml", - "/quit": "Exit the CLI (also: /exit, /q)", -} - - # ============================================================================ # Skill Slash Commands — dynamic commands generated from installed skills # ============================================================================ @@ -721,38 +915,6 @@ from agent.skill_commands import scan_skill_commands, get_skill_commands, build_ _skill_commands = scan_skill_commands() -class SlashCommandCompleter(Completer): - """Autocomplete for /commands and /skill-name in the input area.""" - - def get_completions(self, document, complete_event): - text = document.text_before_cursor - if not text.startswith("/"): - return - word = text[1:] # strip the leading / - - # Built-in commands - for cmd, desc in COMMANDS.items(): - cmd_name = cmd[1:] - if cmd_name.startswith(word): - yield Completion( - cmd_name, - start_position=-len(word), - display=cmd, - display_meta=desc, - ) - - # Skill commands - for cmd, info in _skill_commands.items(): - cmd_name = cmd[1:] - if cmd_name.startswith(word): - yield Completion( - cmd_name, - start_position=-len(word), - display=cmd, - display_meta=f"⚡ {info['description'][:50]}{'...' if len(info['description']) > 50 else ''}", - ) - - def save_config_value(key_path: str, value: any) -> bool: """ Save a value to the active config file at the specified key path. @@ -851,6 +1013,10 @@ class HermesCLI: # Configuration - priority: CLI args > env vars > config file # Model can come from: CLI arg, LLM_MODEL env, OPENAI_MODEL env (custom endpoint), or config self.model = model or os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL") or CLI_CONFIG["model"]["default"] + # Track whether model was explicitly chosen by the user or fell back + # to the global default. Provider-specific normalisation may override + # the default silently but should warn when overriding an explicit choice. + self._model_is_default = not (model or os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL")) self._explicit_api_key = api_key self._explicit_base_url = base_url @@ -933,6 +1099,16 @@ class HermesCLI: self.conversation_history: List[Dict[str, Any]] = [] self.session_start = datetime.now() self._resumed = False + # Initialize SQLite session store early so /title works before first message + self._session_db = None + try: + from hermes_state import SessionDB + self._session_db = SessionDB() + except Exception: + pass + + # Deferred title: stored in memory until the session is created in the DB + self._pending_title: Optional[str] = None # Session ID: reuse existing one when resuming, otherwise generate fresh if resume: @@ -955,6 +1131,63 @@ class HermesCLI: self._last_invalidate = now self._app.invalidate() + def _normalize_model_for_provider(self, resolved_provider: str) -> bool: + """Normalize obviously incompatible model/provider pairings. + + When the resolved provider is ``openai-codex``, the Codex Responses API + only accepts Codex-compatible model slugs (e.g. ``gpt-5.3-codex``). + If the active model is incompatible (e.g. the OpenRouter default + ``anthropic/claude-opus-4.6``), swap it for the best available Codex + model. Also strips provider prefixes the API does not accept + (``openai/gpt-5.3-codex`` → ``gpt-5.3-codex``). + + Returns True when the active model was changed. + """ + if resolved_provider != "openai-codex": + return False + + current_model = (self.model or "").strip() + current_slug = current_model.split("/")[-1] if current_model else "" + + # Keep explicit Codex models, but strip any provider prefix that the + # Codex Responses API does not accept. + if current_slug and "codex" in current_slug.lower(): + if current_slug != current_model: + self.model = current_slug + if not self._model_is_default: + self.console.print( + f"[yellow]⚠️ Stripped provider prefix from '{current_model}'; " + f"using '{current_slug}' for OpenAI Codex.[/]" + ) + return True + return False + + # Model is not Codex-compatible — replace with the best available + fallback_model = "gpt-5.3-codex" + try: + from hermes_cli.codex_models import get_codex_model_ids + + codex_models = get_codex_model_ids( + access_token=self.api_key if self.api_key else None, + ) + fallback_model = next( + (mid for mid in codex_models if "codex" in mid.lower()), + fallback_model, + ) + except Exception: + pass + + if current_model != fallback_model: + if not self._model_is_default: + self.console.print( + f"[yellow]⚠️ Model '{current_model}' is not supported with " + f"OpenAI Codex; switching to '{fallback_model}'.[/]" + ) + self.model = fallback_model + return True + + return False + def _ensure_runtime_credentials(self) -> bool: """ Ensure runtime credentials are resolved before agent use. @@ -1000,8 +1233,13 @@ class HermesCLI: self.api_key = api_key self.base_url = base_url - # AIAgent/OpenAI client holds auth at init time, so rebuild if key rotated - if (credentials_changed or routing_changed) and self.agent is not None: + # Normalize model for the resolved provider (e.g. swap non-Codex + # models when provider is openai-codex). Fixes #651. + model_changed = self._normalize_model_for_provider(resolved_provider) + + # AIAgent/OpenAI client holds auth at init time, so rebuild if key, + # routing, or the effective model changed. + if (credentials_changed or routing_changed or model_changed) and self.agent is not None: self.agent = None return True @@ -1020,13 +1258,13 @@ class HermesCLI: if not self._ensure_runtime_credentials(): return False - # Initialize SQLite session store for CLI sessions - self._session_db = None - try: - from hermes_state import SessionDB - self._session_db = SessionDB() - except Exception as e: - logger.debug("SQLite session store not available: %s", e) + # Initialize SQLite session store for CLI sessions (if not already done in __init__) + if self._session_db is None: + try: + from hermes_state import SessionDB + self._session_db = SessionDB() + except Exception as e: + logger.debug("SQLite session store not available: %s", e) # If resuming, validate the session exists and load its history if self._resumed and self._session_db: @@ -1039,8 +1277,11 @@ class HermesCLI: if restored: self.conversation_history = restored msg_count = len([m for m in restored if m.get("role") == "user"]) + title_part = "" + if session_meta.get("title"): + title_part = f" \"{session_meta['title']}\"" _cprint( - f"{_GOLD}↻ Resumed session {_BOLD}{self.session_id}{_RST}{_GOLD} " + f"{_GOLD}↻ Resumed session {_BOLD}{self.session_id}{_RST}{_GOLD}{title_part} " f"({msg_count} user message{'s' if msg_count != 1 else ''}, " f"{len(restored)} total messages){_RST}" ) @@ -1082,6 +1323,15 @@ class HermesCLI: clarify_callback=self._clarify_callback, honcho_session_key=self.session_id, ) + # Apply any pending title now that the session exists in the DB + if self._pending_title and self._session_db: + try: + self._session_db.set_session_title(self.session_id, self._pending_title) + _cprint(f" Session title applied: {self._pending_title}") + self._pending_title = None + except (ValueError, Exception) as e: + _cprint(f" Could not apply pending title: {e}") + self._pending_title = None return True except Exception as e: self.console.print(f"[bold red]Failed to initialize agent: {e}[/]") @@ -1158,32 +1408,68 @@ class HermesCLI: else: _cprint(f" {_DIM}(._.) No image found in clipboard{_RST}") - def _build_multimodal_content(self, text: str, images: list) -> list: - """Convert text + image paths into OpenAI vision multimodal content. + def _preprocess_images_with_vision(self, text: str, images: list) -> str: + """Analyze attached images via the vision tool and return enriched text. - Returns a list of content parts suitable for the ``content`` field - of a ``user`` message. + Instead of embedding raw base64 ``image_url`` content parts in the + conversation (which only works with vision-capable models), this + pre-processes each image through the auxiliary vision model (Gemini + Flash) and prepends the descriptions to the user's message — the + same approach the messaging gateway uses. + + The local file path is included so the agent can re-examine the + image later with ``vision_analyze`` if needed. """ - import base64 as _b64 + import asyncio as _asyncio + import json as _json + from tools.vision_tools import vision_analyze_tool - content_parts = [] - text_part = text if isinstance(text, str) and text else "What do you see in this image?" - content_parts.append({"type": "text", "text": text_part}) + analysis_prompt = ( + "Describe everything visible in this image in thorough detail. " + "Include any text, code, data, objects, people, layout, colors, " + "and any other notable visual information." + ) - _MIME = { - "png": "image/png", "jpg": "image/jpeg", "jpeg": "image/jpeg", - "gif": "image/gif", "webp": "image/webp", - } + enriched_parts = [] for img_path in images: - if img_path.exists(): - data = _b64.b64encode(img_path.read_bytes()).decode() - ext = img_path.suffix.lower().lstrip(".") - mime = _MIME.get(ext, "image/png") - content_parts.append({ - "type": "image_url", - "image_url": {"url": f"data:{mime};base64,{data}"} - }) - return content_parts + if not img_path.exists(): + continue + size_kb = img_path.stat().st_size // 1024 + _cprint(f" {_DIM}👁️ analyzing {img_path.name} ({size_kb}KB)...{_RST}") + try: + result_json = _asyncio.run( + vision_analyze_tool(image_url=str(img_path), user_prompt=analysis_prompt) + ) + result = _json.loads(result_json) + if result.get("success"): + description = result.get("analysis", "") + enriched_parts.append( + f"[The user attached an image. Here's what it contains:\n{description}]\n" + f"[If you need a closer look, use vision_analyze with " + f"image_url: {img_path}]" + ) + _cprint(f" {_DIM}✓ image analyzed{_RST}") + else: + enriched_parts.append( + f"[The user attached an image but it couldn't be analyzed. " + f"You can try examining it with vision_analyze using " + f"image_url: {img_path}]" + ) + _cprint(f" {_DIM}⚠ vision analysis failed — path included for retry{_RST}") + except Exception as e: + enriched_parts.append( + f"[The user attached an image but analysis failed ({e}). " + f"You can try examining it with vision_analyze using " + f"image_url: {img_path}]" + ) + _cprint(f" {_DIM}⚠ vision analysis error — path included for retry{_RST}") + + # Combine: vision descriptions first, then the user's original text + user_text = text if isinstance(text, str) and text else "" + if enriched_parts: + prefix = "\n\n".join(enriched_parts) + return f"{prefix}\n\n{user_text}" if user_text else prefix + return user_text or "What do you see in this image?" def _show_tool_availability_warnings(self): """Show warnings about disabled tools due to missing API keys.""" @@ -1385,24 +1671,65 @@ class HermesCLI: if not self.conversation_history: print("(._.) No conversation history yet.") return - + + preview_limit = 400 + visible_index = 0 + hidden_tool_messages = 0 + + def flush_tool_summary(): + nonlocal hidden_tool_messages + if not hidden_tool_messages: + return + + noun = "message" if hidden_tool_messages == 1 else "messages" + print("\n [Tools]") + print(f" ({hidden_tool_messages} tool {noun} hidden)") + hidden_tool_messages = 0 + print() print("+" + "-" * 50 + "+") print("|" + " " * 12 + "(^_^) Conversation History" + " " * 11 + "|") print("+" + "-" * 50 + "+") - - for i, msg in enumerate(self.conversation_history, 1): + + for msg in self.conversation_history: role = msg.get("role", "unknown") - content = msg.get("content") or "" - + + if role == "tool": + hidden_tool_messages += 1 + continue + + if role not in {"user", "assistant"}: + continue + + flush_tool_summary() + visible_index += 1 + + content = msg.get("content") + content_text = "" if content is None else str(content) + if role == "user": - print(f"\n [You #{i}]") - print(f" {content[:200]}{'...' if len(content) > 200 else ''}") - elif role == "assistant": - print(f"\n [Hermes #{i}]") - preview = content[:200] if content else "(tool calls)" - print(f" {preview}{'...' if len(str(content)) > 200 else ''}") - + print(f"\n [You #{visible_index}]") + print( + f" {content_text[:preview_limit]}{'...' if len(content_text) > preview_limit else ''}" + ) + continue + + print(f"\n [Hermes #{visible_index}]") + tool_calls = msg.get("tool_calls") or [] + if content_text: + preview = content_text[:preview_limit] + suffix = "..." if len(content_text) > preview_limit else "" + elif tool_calls: + tool_count = len(tool_calls) + noun = "call" if tool_count == 1 else "calls" + preview = f"(requested {tool_count} tool {noun})" + suffix = "" + else: + preview = "(no text response)" + suffix = "" + print(f" {preview}{suffix}") + + flush_tool_summary() print() def reset_conversation(self): @@ -1811,32 +2138,234 @@ class HermesCLI: self.agent.flush_memories(self.conversation_history) except Exception: pass - # Clear terminal screen using Rich (portable, no shell needed) - self.console.clear() + # Clear terminal screen. Inside the TUI, Rich's console.clear() + # goes through patch_stdout's StdoutProxy which swallows the + # screen-clear escape sequences. Use prompt_toolkit's output + # object directly to actually clear the terminal. + if self._app: + out = self._app.output + out.erase_screen() + out.cursor_goto(0, 0) + out.flush() + else: + self.console.clear() # Reset conversation self.conversation_history = [] - # Show fresh banner - self.show_banner() - print(" ✨ (◕‿◕)✨ Fresh start! Screen cleared and conversation reset.\n") + # Show fresh banner. Inside the TUI we must route Rich output + # through ChatConsole (which uses prompt_toolkit's native ANSI + # renderer) instead of self.console (which writes raw to stdout + # and gets mangled by patch_stdout). + if self._app: + cc = ChatConsole() + if self.compact: + cc.print(COMPACT_BANNER) + else: + tools = get_tool_definitions(enabled_toolsets=self.enabled_toolsets, quiet_mode=True) + cwd = os.getenv("TERMINAL_CWD", os.getcwd()) + ctx_len = None + if hasattr(self, 'agent') and self.agent and hasattr(self.agent, 'context_compressor'): + ctx_len = self.agent.context_compressor.context_length + build_welcome_banner( + console=cc, + model=self.model, + cwd=cwd, + tools=tools, + enabled_toolsets=self.enabled_toolsets, + session_id=self.session_id, + context_length=ctx_len, + ) + _cprint(" ✨ (◕‿◕)✨ Fresh start! Screen cleared and conversation reset.\n") + else: + self.show_banner() + print(" ✨ (◕‿◕)✨ Fresh start! Screen cleared and conversation reset.\n") elif cmd_lower == "/history": self.show_history() + elif cmd_lower.startswith("/title"): + parts = cmd_original.split(maxsplit=1) + if len(parts) > 1: + raw_title = parts[1].strip() + if raw_title: + if self._session_db: + # Sanitize the title early so feedback matches what gets stored + try: + from hermes_state import SessionDB + new_title = SessionDB.sanitize_title(raw_title) + except ValueError as e: + _cprint(f" {e}") + new_title = None + if not new_title: + _cprint(" Title is empty after cleanup. Please use printable characters.") + elif self._session_db.get_session(self.session_id): + # Session exists in DB — set title directly + try: + if self._session_db.set_session_title(self.session_id, new_title): + _cprint(f" Session title set: {new_title}") + else: + _cprint(" Session not found in database.") + except ValueError as e: + _cprint(f" {e}") + else: + # Session not created yet — defer the title + # Check uniqueness proactively with the sanitized title + existing = self._session_db.get_session_by_title(new_title) + if existing: + _cprint(f" Title '{new_title}' is already in use by session {existing['id']}") + else: + self._pending_title = new_title + _cprint(f" Session title queued: {new_title} (will be saved on first message)") + else: + _cprint(" Session database not available.") + else: + _cprint(" Usage: /title <your session title>") + else: + # Show current title if no argument given + if self._session_db: + session = self._session_db.get_session(self.session_id) + if session and session.get("title"): + _cprint(f" Session title: {session['title']}") + elif self._pending_title: + _cprint(f" Session title (pending): {self._pending_title}") + else: + _cprint(f" No title set. Usage: /title <your session title>") + else: + _cprint(" Session database not available.") elif cmd_lower in ("/reset", "/new"): self.reset_conversation() elif cmd_lower.startswith("/model"): # Use original case so model names like "Anthropic/Claude-Opus-4" are preserved parts = cmd_original.split(maxsplit=1) if len(parts) > 1: - new_model = parts[1] - self.model = new_model - self.agent = None # Force re-init - # Save to config - if save_config_value("model.default", new_model): - print(f"(^_^)b Model changed to: {new_model} (saved to config)") + from hermes_cli.auth import resolve_provider + from hermes_cli.models import ( + parse_model_input, + validate_requested_model, + _PROVIDER_LABELS, + ) + + raw_input = parts[1].strip() + + # Parse provider:model syntax (e.g. "openrouter:anthropic/claude-sonnet-4.5") + current_provider = self.provider or self.requested_provider or "openrouter" + target_provider, new_model = parse_model_input(raw_input, current_provider) + provider_changed = target_provider != current_provider + + # If provider is changing, re-resolve credentials for the new provider + api_key_for_probe = self.api_key + base_url_for_probe = self.base_url + if provider_changed: + try: + from hermes_cli.runtime_provider import resolve_runtime_provider + runtime = resolve_runtime_provider(requested=target_provider) + api_key_for_probe = runtime.get("api_key", "") + base_url_for_probe = runtime.get("base_url", "") + except Exception as e: + provider_label = _PROVIDER_LABELS.get(target_provider, target_provider) + print(f"(>_<) Could not resolve credentials for provider '{provider_label}': {e}") + print(f"(^_^) Current model unchanged: {self.model}") + return True + + try: + validation = validate_requested_model( + new_model, + target_provider, + api_key=api_key_for_probe, + base_url=base_url_for_probe, + ) + except Exception: + validation = {"accepted": True, "persist": True, "recognized": False, "message": None} + + if not validation.get("accepted"): + print(f"(>_<) {validation.get('message')}") + print(f" Model unchanged: {self.model}") + if "Did you mean" not in (validation.get("message") or ""): + print(" Tip: Use /model to see available models, /provider to see providers") else: - print(f"(^_^) Model changed to: {new_model} (session only)") + self.model = new_model + self.agent = None # Force re-init + + if provider_changed: + self.requested_provider = target_provider + self.provider = target_provider + self.api_key = api_key_for_probe + self.base_url = base_url_for_probe + + provider_label = _PROVIDER_LABELS.get(target_provider, target_provider) + provider_note = f" [provider: {provider_label}]" if provider_changed else "" + + if validation.get("persist"): + saved_model = save_config_value("model.default", new_model) + if provider_changed: + save_config_value("model.provider", target_provider) + if saved_model: + print(f"(^_^)b Model changed to: {new_model}{provider_note} (saved to config)") + else: + print(f"(^_^) Model changed to: {new_model}{provider_note} (this session only)") + else: + message = validation.get("message") or "" + print(f"(^_^) Model changed to: {new_model}{provider_note} (this session only)") + if message: + print(f" Reason: {message}") + print(" Note: Model will revert on restart. Use a verified model to save to config.") else: - print(f"Current model: {self.model}") - print(" Usage: /model <model-name> to change") + from hermes_cli.models import curated_models_for_provider, normalize_provider, _PROVIDER_LABELS + from hermes_cli.auth import resolve_provider as _resolve_provider + # Resolve "auto" to the actual provider using credential detection + raw_provider = normalize_provider(self.provider) + if raw_provider == "auto": + try: + display_provider = _resolve_provider( + self.requested_provider, + explicit_api_key=self._explicit_api_key, + explicit_base_url=self._explicit_base_url, + ) + except Exception: + display_provider = "openrouter" + else: + display_provider = raw_provider + provider_label = _PROVIDER_LABELS.get(display_provider, display_provider) + print(f"\n Current model: {self.model}") + print(f" Current provider: {provider_label}") + print() + curated = curated_models_for_provider(display_provider) + if curated: + print(f" Available models ({provider_label}):") + for mid, desc in curated: + marker = " ←" if mid == self.model else "" + label = f" {desc}" if desc else "" + print(f" {mid}{label}{marker}") + print() + print(" Usage: /model <model-name>") + print(" /model provider:model-name (to switch provider)") + print(" Example: /model openrouter:anthropic/claude-sonnet-4.5") + print(" See /provider for available providers") + elif cmd_lower == "/provider": + from hermes_cli.models import list_available_providers, normalize_provider, _PROVIDER_LABELS + from hermes_cli.auth import resolve_provider as _resolve_provider + # Resolve current provider + raw_provider = normalize_provider(self.provider) + if raw_provider == "auto": + try: + current = _resolve_provider( + self.requested_provider, + explicit_api_key=self._explicit_api_key, + explicit_base_url=self._explicit_base_url, + ) + except Exception: + current = "openrouter" + else: + current = raw_provider + current_label = _PROVIDER_LABELS.get(current, current) + print(f"\n Current provider: {current_label} ({current})\n") + providers = list_available_providers() + print(" Available providers:") + for p in providers: + marker = " ← active" if p["id"] == current else "" + auth = "✓" if p["authenticated"] else "✗" + aliases = f" (also: {', '.join(p['aliases'])})" if p["aliases"] else "" + print(f" [{auth}] {p['id']:<14} {p['label']}{aliases}{marker}") + print() + print(" Switch: /model provider:model-name") + print(" Setup: hermes setup") elif cmd_lower.startswith("/prompt"): # Use original case so prompt text isn't lowercased self._handle_prompt_command(cmd_original) @@ -2272,14 +2801,13 @@ class HermesCLI: if not self._init_agent(): return None - # Convert attached images to OpenAI vision multimodal content + # Pre-process images through the vision tool (Gemini Flash) so the + # main model receives text descriptions instead of raw base64 image + # content — works with any model, not just vision-capable ones. if images: - message = self._build_multimodal_content( + message = self._preprocess_images_with_vision( message if isinstance(message, str) else "", images ) - for img_path in images: - if img_path.exists(): - _cprint(f" {_DIM}📎 attached {img_path.name} ({img_path.stat().st_size // 1024}KB){_RST}") # Add user message to history self.conversation_history.append({"role": "user", "content": message}) @@ -2729,7 +3257,7 @@ class HermesCLI: multiline=True, wrap_lines=True, history=FileHistory(str(self._history_file)), - completer=SlashCommandCompleter(), + completer=SlashCommandCompleter(skill_commands_provider=lambda: _skill_commands), complete_while_typing=True, ) @@ -3220,6 +3748,8 @@ def main( list_toolsets: bool = False, gateway: bool = False, resume: str = None, + worktree: bool = False, + w: bool = False, ): """ Hermes Agent CLI - Interactive AI Assistant @@ -3238,6 +3768,8 @@ def main( list_tools: List available tools and exit list_toolsets: List available toolsets and exit resume: Resume a previous session by its ID (e.g., 20260225_143052_a1b2c3) + worktree: Run in an isolated git worktree (for parallel agents). Alias: -w + w: Shorthand for --worktree Examples: python cli.py # Start interactive mode @@ -3245,7 +3777,11 @@ def main( python cli.py -q "What is Python?" # Single query mode python cli.py --list-tools # List tools and exit python cli.py --resume 20260225_143052_a1b2c3 # Resume session + python cli.py -w # Start in isolated git worktree + python cli.py -w -q "Fix issue #123" # Single query in worktree """ + global _active_worktree + # Signal to terminal_tool that we're in interactive mode # This enables interactive sudo password prompts with timeout os.environ["HERMES_INTERACTIVE"] = "1" @@ -3257,6 +3793,30 @@ def main( print("Starting Hermes Gateway (messaging platforms)...") asyncio.run(start_gateway()) return + + # Skip worktree for list commands (they exit immediately) + if not list_tools and not list_toolsets: + # ── Git worktree isolation (#652) ── + # Create an isolated worktree so this agent instance doesn't collide + # with other agents working on the same repo. + use_worktree = worktree or w or CLI_CONFIG.get("worktree", False) + wt_info = None + if use_worktree: + # Prune stale worktrees from crashed/killed sessions + _repo = _git_repo_root() + if _repo: + _prune_stale_worktrees(_repo) + wt_info = _setup_worktree() + if wt_info: + _active_worktree = wt_info + os.environ["TERMINAL_CWD"] = wt_info["path"] + atexit.register(_cleanup_worktree, wt_info) + else: + # Worktree was explicitly requested but setup failed — + # don't silently run without isolation. + return + else: + wt_info = None # Handle query shorthand query = query or q @@ -3295,6 +3855,17 @@ def main( compact=compact, resume=resume, ) + + # Inject worktree context into agent's system prompt + if wt_info: + wt_note = ( + f"\n\n[System note: You are working in an isolated git worktree at " + f"{wt_info['path']}. Your branch is `{wt_info['branch']}`. " + f"Changes here do not affect the main working tree or other agents. " + f"Remember to commit and push your changes, and create a PR if appropriate. " + f"The original repo is at {wt_info['repo_root']}.]" + ) + cli.system_prompt = (cli.system_prompt or "") + wt_note # Handle list commands (don't init agent for these) if list_tools: diff --git a/cron/scheduler.py b/cron/scheduler.py index 6a2b33477..4dfc91e09 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -176,6 +176,8 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6" + # Load config.yaml for model, reasoning, prefill, toolsets, provider routing + _cfg = {} try: import yaml _cfg_path = str(_hermes_home / "config.yaml") @@ -190,6 +192,41 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: except Exception: pass + # Reasoning config from env or config.yaml + reasoning_config = None + effort = os.getenv("HERMES_REASONING_EFFORT", "") + if not effort: + effort = str(_cfg.get("agent", {}).get("reasoning_effort", "")).strip() + if effort and effort.lower() != "none": + valid = ("xhigh", "high", "medium", "low", "minimal") + if effort.lower() in valid: + reasoning_config = {"enabled": True, "effort": effort.lower()} + elif effort.lower() == "none": + reasoning_config = {"enabled": False} + + # Prefill messages from env or config.yaml + prefill_messages = None + prefill_file = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "") or _cfg.get("prefill_messages_file", "") + if prefill_file: + import json as _json + pfpath = Path(prefill_file).expanduser() + if not pfpath.is_absolute(): + pfpath = _hermes_home / pfpath + if pfpath.exists(): + try: + with open(pfpath, "r", encoding="utf-8") as _pf: + prefill_messages = _json.load(_pf) + if not isinstance(prefill_messages, list): + prefill_messages = None + except Exception: + prefill_messages = None + + # Max iterations + max_iterations = _cfg.get("agent", {}).get("max_turns") or _cfg.get("max_turns") or 90 + + # Provider routing + pr = _cfg.get("provider_routing", {}) + from hermes_cli.runtime_provider import ( resolve_runtime_provider, format_runtime_provider_error, @@ -208,6 +245,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: base_url=runtime.get("base_url"), provider=runtime.get("provider"), api_mode=runtime.get("api_mode"), + max_iterations=max_iterations, + reasoning_config=reasoning_config, + prefill_messages=prefill_messages, + providers_allowed=pr.get("only"), + providers_ignored=pr.get("ignore"), + providers_order=pr.get("order"), + provider_sort=pr.get("sort"), quiet_mode=True, session_id=f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}" ) diff --git a/docs/send_file_integration_map.md b/docs/send_file_integration_map.md index 1ef4ed826..e0b1ca769 100644 --- a/docs/send_file_integration_map.md +++ b/docs/send_file_integration_map.md @@ -115,8 +115,9 @@ - `edit_message(chat_id, message_id, content)` — edit sent messages ### What's missing: -- **Telegram:** No override for `send_document` or `send_image_file` — falls back to text! -- **Discord:** No override for `send_document` — falls back to text! +- **Telegram:** No override for `send_document` — falls back to text! (`send_image_file` ✅ added) +- **Discord:** No override for `send_document` — falls back to text! (`send_image_file` ✅ added) +- **Slack:** No override for `send_document` — falls back to text! (`send_image_file` ✅ added) - **WhatsApp:** Has `send_document` and `send_image_file` via bridge — COMPLETE. - The base class defaults just send "📎 File: /path" as text — useless for actual file delivery. @@ -126,13 +127,13 @@ - `send()` — MarkdownV2 text with fallback to plain - `send_voice()` — `.ogg`/`.opus` as `send_voice()`, others as `send_audio()` - `send_image()` — URL-based via `send_photo()` +- `send_image_file()` — local file via `send_photo(photo=open(path, 'rb'))` ✅ - `send_animation()` — GIF via `send_animation()` - `send_typing()` — "typing" chat action - `edit_message()` — edit text messages ### MISSING: - **`send_document()` NOT overridden** — Need to add `self._bot.send_document(chat_id, document=open(file_path, 'rb'), ...)` -- **`send_image_file()` NOT overridden** — Need to add `self._bot.send_photo(chat_id, photo=open(path, 'rb'), ...)` - **`send_video()` NOT overridden** — Need to add `self._bot.send_video(...)` ## 8. gateway/platforms/discord.py — Send Method Analysis @@ -141,12 +142,12 @@ - `send()` — text messages with chunking - `send_voice()` — discord.File attachment - `send_image()` — downloads URL, creates discord.File attachment +- `send_image_file()` — local file via discord.File attachment ✅ - `send_typing()` — channel.typing() - `edit_message()` — edit text messages ### MISSING: - **`send_document()` NOT overridden** — Need to add discord.File attachment -- **`send_image_file()` NOT overridden** — Need to add discord.File from local path - **`send_video()` NOT overridden** — Need to add discord.File attachment ## 9. gateway/run.py — User File Attachment Handling diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index c06bb6f9e..d787cc939 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -701,6 +701,8 @@ class BasePlatformAdapter(ABC): # Extract image URLs and send them as native platform attachments images, text_content = self.extract_images(response) + if images: + logger.info("[%s] extract_images found %d image(s) in response (%d chars)", self.name, len(images), len(response)) # Send the text portion first (if any remains after extractions) if text_content: @@ -727,10 +729,13 @@ class BasePlatformAdapter(ABC): human_delay = self._get_human_delay() # Send extracted images as native attachments + if images: + logger.info("[%s] Extracted %d image(s) to send as attachments", self.name, len(images)) for image_url, alt_text in images: if human_delay > 0: await asyncio.sleep(human_delay) try: + logger.info("[%s] Sending image: %s (alt=%s)", self.name, image_url[:80], alt_text[:30] if alt_text else "") # Route animated GIFs through send_animation for proper playback if self._is_animation_url(image_url): img_result = await self.send_animation( @@ -745,9 +750,9 @@ class BasePlatformAdapter(ABC): caption=alt_text if alt_text else None, ) if not img_result.success: - print(f"[{self.name}] Failed to send image: {img_result.error}") + logger.error("[%s] Failed to send image: %s", self.name, img_result.error) except Exception as img_err: - print(f"[{self.name}] Error sending image: {img_err}") + logger.error("[%s] Error sending image: %s", self.name, img_err, exc_info=True) # Send extracted media files — route by file type _AUDIO_EXTS = {'.ogg', '.opus', '.mp3', '.wav', '.m4a'} diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py index 5d7397114..905e20d6f 100644 --- a/gateway/platforms/discord.py +++ b/gateway/platforms/discord.py @@ -267,6 +267,43 @@ class DiscordAdapter(BasePlatformAdapter): print(f"[{self.name}] Failed to send audio: {e}") return await super().send_voice(chat_id, audio_path, caption, reply_to) + async def send_image_file( + self, + chat_id: str, + image_path: str, + caption: Optional[str] = None, + reply_to: Optional[str] = None, + ) -> SendResult: + """Send a local image file natively as a Discord file attachment.""" + if not self._client: + return SendResult(success=False, error="Not connected") + + try: + import io + + channel = self._client.get_channel(int(chat_id)) + if not channel: + channel = await self._client.fetch_channel(int(chat_id)) + if not channel: + return SendResult(success=False, error=f"Channel {chat_id} not found") + + if not os.path.exists(image_path): + return SendResult(success=False, error=f"Image file not found: {image_path}") + + filename = os.path.basename(image_path) + + with open(image_path, "rb") as f: + file = discord.File(io.BytesIO(f.read()), filename=filename) + msg = await channel.send( + content=caption if caption else None, + file=file, + ) + return SendResult(success=True, message_id=str(msg.id)) + + except Exception as e: + print(f"[{self.name}] Failed to send local image: {e}") + return await super().send_image_file(chat_id, image_path, caption, reply_to) + async def send_image( self, chat_id: str, @@ -555,6 +592,89 @@ class DiscordAdapter(BasePlatformAdapter): except Exception as e: logger.debug("Discord followup failed: %s", e) + @tree.command(name="compress", description="Compress conversation context") + async def slash_compress(interaction: discord.Interaction): + await interaction.response.defer(ephemeral=True) + event = self._build_slash_event(interaction, "/compress") + await self.handle_message(event) + try: + await interaction.followup.send("Done~", ephemeral=True) + except Exception as e: + logger.debug("Discord followup failed: %s", e) + + @tree.command(name="title", description="Set or show the session title") + @discord.app_commands.describe(name="Session title. Leave empty to show current.") + async def slash_title(interaction: discord.Interaction, name: str = ""): + await interaction.response.defer(ephemeral=True) + event = self._build_slash_event(interaction, f"/title {name}".strip()) + await self.handle_message(event) + try: + await interaction.followup.send("Done~", ephemeral=True) + except Exception as e: + logger.debug("Discord followup failed: %s", e) + + @tree.command(name="resume", description="Resume a previously-named session") + @discord.app_commands.describe(name="Session name to resume. Leave empty to list sessions.") + async def slash_resume(interaction: discord.Interaction, name: str = ""): + await interaction.response.defer(ephemeral=True) + event = self._build_slash_event(interaction, f"/resume {name}".strip()) + await self.handle_message(event) + try: + await interaction.followup.send("Done~", ephemeral=True) + except Exception as e: + logger.debug("Discord followup failed: %s", e) + + @tree.command(name="usage", description="Show token usage for this session") + async def slash_usage(interaction: discord.Interaction): + await interaction.response.defer(ephemeral=True) + event = self._build_slash_event(interaction, "/usage") + await self.handle_message(event) + try: + await interaction.followup.send("Done~", ephemeral=True) + except Exception as e: + logger.debug("Discord followup failed: %s", e) + + @tree.command(name="provider", description="Show available providers") + async def slash_provider(interaction: discord.Interaction): + await interaction.response.defer(ephemeral=True) + event = self._build_slash_event(interaction, "/provider") + await self.handle_message(event) + try: + await interaction.followup.send("Done~", ephemeral=True) + except Exception as e: + logger.debug("Discord followup failed: %s", e) + + @tree.command(name="help", description="Show available commands") + async def slash_help(interaction: discord.Interaction): + await interaction.response.defer(ephemeral=True) + event = self._build_slash_event(interaction, "/help") + await self.handle_message(event) + try: + await interaction.followup.send("Done~", ephemeral=True) + except Exception as e: + logger.debug("Discord followup failed: %s", e) + + @tree.command(name="insights", description="Show usage insights and analytics") + @discord.app_commands.describe(days="Number of days to analyze (default: 7)") + async def slash_insights(interaction: discord.Interaction, days: int = 7): + await interaction.response.defer(ephemeral=True) + event = self._build_slash_event(interaction, f"/insights {days}") + await self.handle_message(event) + try: + await interaction.followup.send("Done~", ephemeral=True) + except Exception as e: + logger.debug("Discord followup failed: %s", e) + + @tree.command(name="reload-mcp", description="Reload MCP servers from config") + async def slash_reload_mcp(interaction: discord.Interaction): + await interaction.response.defer(ephemeral=True) + event = self._build_slash_event(interaction, "/reload-mcp") + await self.handle_message(event) + try: + await interaction.followup.send("Done~", ephemeral=True) + except Exception as e: + logger.debug("Discord followup failed: %s", e) + @tree.command(name="update", description="Update Hermes Agent to the latest version") async def slash_update(interaction: discord.Interaction): await interaction.response.defer(ephemeral=True) diff --git a/gateway/platforms/slack.py b/gateway/platforms/slack.py index 85562cbb6..11a73461e 100644 --- a/gateway/platforms/slack.py +++ b/gateway/platforms/slack.py @@ -179,6 +179,35 @@ class SlackAdapter(BasePlatformAdapter): """Slack doesn't have a direct typing indicator API for bots.""" pass + async def send_image_file( + self, + chat_id: str, + image_path: str, + caption: Optional[str] = None, + reply_to: Optional[str] = None, + ) -> SendResult: + """Send a local image file to Slack by uploading it.""" + if not self._app: + return SendResult(success=False, error="Not connected") + + try: + import os + if not os.path.exists(image_path): + return SendResult(success=False, error=f"Image file not found: {image_path}") + + result = await self._app.client.files_upload_v2( + channel=chat_id, + file=image_path, + filename=os.path.basename(image_path), + initial_comment=caption or "", + thread_ts=reply_to, + ) + return SendResult(success=True, raw_response=result) + + except Exception as e: + print(f"[{self.name}] Failed to send local image: {e}") + return await super().send_image_file(chat_id, image_path, caption, reply_to) + async def send_image( self, chat_id: str, diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index 9ed47a394..c49155d0a 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -155,6 +155,14 @@ class TelegramAdapter(BasePlatformAdapter): BotCommand("status", "Show session info"), BotCommand("stop", "Stop the running agent"), BotCommand("sethome", "Set this chat as the home channel"), + BotCommand("compress", "Compress conversation context"), + BotCommand("title", "Set or show the session title"), + BotCommand("resume", "Resume a previously-named session"), + BotCommand("usage", "Show token usage for this session"), + BotCommand("provider", "Show available providers"), + BotCommand("insights", "Show usage insights and analytics"), + BotCommand("update", "Update Hermes to the latest version"), + BotCommand("reload_mcp", "Reload MCP servers from config"), BotCommand("help", "Show available commands"), ]) except Exception as e: @@ -306,6 +314,34 @@ class TelegramAdapter(BasePlatformAdapter): print(f"[{self.name}] Failed to send voice/audio: {e}") return await super().send_voice(chat_id, audio_path, caption, reply_to) + async def send_image_file( + self, + chat_id: str, + image_path: str, + caption: Optional[str] = None, + reply_to: Optional[str] = None, + ) -> SendResult: + """Send a local image file natively as a Telegram photo.""" + if not self._bot: + return SendResult(success=False, error="Not connected") + + try: + import os + if not os.path.exists(image_path): + return SendResult(success=False, error=f"Image file not found: {image_path}") + + with open(image_path, "rb") as image_file: + msg = await self._bot.send_photo( + chat_id=int(chat_id), + photo=image_file, + caption=caption[:1024] if caption else None, + reply_to_message_id=int(reply_to) if reply_to else None, + ) + return SendResult(success=True, message_id=str(msg.message_id)) + except Exception as e: + print(f"[{self.name}] Failed to send local image: {e}") + return await super().send_image_file(chat_id, image_path, caption, reply_to) + async def send_image( self, chat_id: str, @@ -313,12 +349,16 @@ class TelegramAdapter(BasePlatformAdapter): caption: Optional[str] = None, reply_to: Optional[str] = None, ) -> SendResult: - """Send an image natively as a Telegram photo.""" + """Send an image natively as a Telegram photo. + + Tries URL-based send first (fast, works for <5MB images). + Falls back to downloading and uploading as file (supports up to 10MB). + """ if not self._bot: return SendResult(success=False, error="Not connected") try: - # Telegram can send photos directly from URLs + # Telegram can send photos directly from URLs (up to ~5MB) msg = await self._bot.send_photo( chat_id=int(chat_id), photo=image_url, @@ -327,9 +367,26 @@ class TelegramAdapter(BasePlatformAdapter): ) return SendResult(success=True, message_id=str(msg.message_id)) except Exception as e: - print(f"[{self.name}] Failed to send photo, falling back to URL: {e}") - # Fallback: send as text link - return await super().send_image(chat_id, image_url, caption, reply_to) + logger.warning("[%s] URL-based send_photo failed (%s), trying file upload", self.name, e) + # Fallback: download and upload as file (supports up to 10MB) + try: + import httpx + async with httpx.AsyncClient(timeout=30.0) as client: + resp = await client.get(image_url) + resp.raise_for_status() + image_data = resp.content + + msg = await self._bot.send_photo( + chat_id=int(chat_id), + photo=image_data, + caption=caption[:1024] if caption else None, + reply_to_message_id=int(reply_to) if reply_to else None, + ) + return SendResult(success=True, message_id=str(msg.message_id)) + except Exception as e2: + logger.error("[%s] File upload send_photo also failed: %s", self.name, e2) + # Final fallback: send URL as text + return await super().send_image(chat_id, image_url, caption, reply_to) async def send_animation( self, diff --git a/gateway/run.py b/gateway/run.py index 99fd2443f..b32f2d2d0 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -75,6 +75,7 @@ if _config_path.exists(): "container_memory": "TERMINAL_CONTAINER_MEMORY", "container_disk": "TERMINAL_CONTAINER_DISK", "container_persistent": "TERMINAL_CONTAINER_PERSISTENT", + "sandbox_dir": "TERMINAL_SANDBOX_DIR", } for _cfg_key, _env_var in _terminal_env_map.items(): if _cfg_key in _terminal_cfg: @@ -107,11 +108,13 @@ os.environ["HERMES_QUIET"] = "1" # Enable interactive exec approval for dangerous commands on messaging platforms os.environ["HERMES_EXEC_ASK"] = "1" -# Set terminal working directory for messaging platforms -# Uses MESSAGING_CWD if set, otherwise defaults to home directory -# This is separate from CLI which uses the directory where `hermes` is run -messaging_cwd = os.getenv("MESSAGING_CWD") or str(Path.home()) -os.environ["TERMINAL_CWD"] = messaging_cwd +# Set terminal working directory for messaging platforms. +# If the user set an explicit path in config.yaml (not "." or "auto"), +# respect it. Otherwise use MESSAGING_CWD or default to home directory. +_configured_cwd = os.environ.get("TERMINAL_CWD", "") +if not _configured_cwd or _configured_cwd in (".", "auto", "cwd"): + messaging_cwd = os.getenv("MESSAGING_CWD") or str(Path.home()) + os.environ["TERMINAL_CWD"] = messaging_cwd from gateway.config import ( Platform, @@ -178,7 +181,6 @@ class GatewayRunner: self.session_store = SessionStore( self.config.sessions_dir, self.config, has_active_processes_fn=lambda key: process_registry.has_active_for_session(key), - on_auto_reset=self._flush_memories_before_reset, ) self.delivery_router = DeliveryRouter(self.config) self._running = False @@ -209,15 +211,14 @@ class GatewayRunner: from gateway.hooks import HookRegistry self.hooks = HookRegistry() - def _flush_memories_before_reset(self, old_entry): - """Prompt the agent to save memories/skills before an auto-reset. - - Called synchronously by SessionStore before destroying an expired session. - Loads the transcript, gives the agent a real turn with memory + skills - tools, and explicitly asks it to preserve anything worth keeping. + def _flush_memories_for_session(self, old_session_id: str): + """Prompt the agent to save memories/skills before context is lost. + + Synchronous worker — meant to be called via run_in_executor from + an async context so it doesn't block the event loop. """ try: - history = self.session_store.load_transcript(old_entry.session_id) + history = self.session_store.load_transcript(old_session_id) if not history or len(history) < 4: return @@ -231,7 +232,7 @@ class GatewayRunner: max_iterations=8, quiet_mode=True, enabled_toolsets=["memory", "skills"], - session_id=old_entry.session_id, + session_id=old_session_id, ) # Build conversation history from transcript @@ -260,9 +261,14 @@ class GatewayRunner: user_message=flush_prompt, conversation_history=msgs, ) - logger.info("Pre-reset save completed for session %s", old_entry.session_id) + logger.info("Pre-reset memory flush completed for session %s", old_session_id) except Exception as e: - logger.debug("Pre-reset save failed for session %s: %s", old_entry.session_id, e) + logger.debug("Pre-reset memory flush failed for session %s: %s", old_session_id, e) + + async def _async_flush_memories(self, old_session_id: str): + """Run the sync memory flush in a thread pool so it won't block the event loop.""" + loop = asyncio.get_event_loop() + await loop.run_in_executor(None, self._flush_memories_for_session, old_session_id) @staticmethod def _load_prefill_messages() -> List[Dict[str, Any]]: @@ -330,7 +336,7 @@ class GatewayRunner: Checks HERMES_REASONING_EFFORT env var first, then agent.reasoning_effort in config.yaml. Valid: "xhigh", "high", "medium", "low", "minimal", "none". - Returns None to use default (xhigh). + Returns None to use default (medium). """ effort = os.getenv("HERMES_REASONING_EFFORT", "") if not effort: @@ -351,7 +357,7 @@ class GatewayRunner: valid = ("xhigh", "high", "medium", "low", "minimal") if effort in valid: return {"enabled": True, "effort": effort} - logger.warning("Unknown reasoning_effort '%s', using default (xhigh)", effort) + logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort) return None @staticmethod @@ -464,10 +470,50 @@ class GatewayRunner: # Check if we're restarting after a /update command await self._send_update_notification() + # Start background session expiry watcher for proactive memory flushing + asyncio.create_task(self._session_expiry_watcher()) + logger.info("Press Ctrl+C to stop") return True + async def _session_expiry_watcher(self, interval: int = 300): + """Background task that proactively flushes memories for expired sessions. + + Runs every `interval` seconds (default 5 min). For each session that + has expired according to its reset policy, flushes memories in a thread + pool and marks the session so it won't be flushed again. + + This means memories are already saved by the time the user sends their + next message, so there's no blocking delay. + """ + await asyncio.sleep(60) # initial delay — let the gateway fully start + while self._running: + try: + self.session_store._ensure_loaded() + for key, entry in list(self.session_store._entries.items()): + if entry.session_id in self.session_store._pre_flushed_sessions: + continue # already flushed this session + if not self.session_store._is_session_expired(entry): + continue # session still active + # Session has expired — flush memories in the background + logger.info( + "Session %s expired (key=%s), flushing memories proactively", + entry.session_id, key, + ) + try: + await self._async_flush_memories(entry.session_id) + self.session_store._pre_flushed_sessions.add(entry.session_id) + except Exception as e: + logger.debug("Proactive memory flush failed for %s: %s", entry.session_id, e) + except Exception as e: + logger.debug("Session expiry watcher error: %s", e) + # Sleep in small increments so we can stop quickly + for _ in range(interval): + if not self._running: + break + await asyncio.sleep(1) + async def stop(self) -> None: """Stop the gateway and disconnect all adapters.""" logger.info("Stopping gateway...") @@ -664,7 +710,8 @@ class GatewayRunner: # Emit command:* hook for any recognized slash command _known_commands = {"new", "reset", "help", "status", "stop", "model", "personality", "retry", "undo", "sethome", "set-home", - "compress", "usage", "insights", "reload-mcp", "update"} + "compress", "usage", "insights", "reload-mcp", "reload_mcp", + "update", "title", "resume", "provider"} if command and command in _known_commands: await self.hooks.emit(f"command:{command}", { "platform": source.platform.value if source.platform else "", @@ -688,6 +735,9 @@ class GatewayRunner: if command == "model": return await self._handle_model_command(event) + if command == "provider": + return await self._handle_provider_command(event) + if command == "personality": return await self._handle_personality_command(event) @@ -709,11 +759,17 @@ class GatewayRunner: if command == "insights": return await self._handle_insights_command(event) - if command == "reload-mcp": + if command in ("reload-mcp", "reload_mcp"): return await self._handle_reload_mcp_command(event) if command == "update": return await self._handle_update_command(event) + + if command == "title": + return await self._handle_title_command(event) + + if command == "resume": + return await self._handle_resume_command(event) # Skill slash commands: /skill-name loads the skill and sends to agent if command: @@ -788,6 +844,167 @@ class GatewayRunner: # Load conversation history from transcript history = self.session_store.load_transcript(session_entry.session_id) + # ----------------------------------------------------------------- + # Session hygiene: auto-compress pathologically large transcripts + # + # Long-lived gateway sessions can accumulate enough history that + # every new message rehydrates an oversized transcript, causing + # repeated truncation/context failures. Detect this early and + # compress proactively — before the agent even starts. (#628) + # ----------------------------------------------------------------- + if history and len(history) >= 4: + from agent.model_metadata import estimate_messages_tokens_rough + + # Read thresholds from config.yaml → session_hygiene section + _hygiene_cfg = {} + try: + _hyg_cfg_path = _hermes_home / "config.yaml" + if _hyg_cfg_path.exists(): + import yaml as _hyg_yaml + with open(_hyg_cfg_path) as _hyg_f: + _hyg_data = _hyg_yaml.safe_load(_hyg_f) or {} + _hygiene_cfg = _hyg_data.get("session_hygiene", {}) + if not isinstance(_hygiene_cfg, dict): + _hygiene_cfg = {} + except Exception: + pass + + _compress_token_threshold = int( + _hygiene_cfg.get("auto_compress_tokens", 100_000) + ) + _compress_msg_threshold = int( + _hygiene_cfg.get("auto_compress_messages", 200) + ) + _warn_token_threshold = int( + _hygiene_cfg.get("warn_tokens", 200_000) + ) + + _msg_count = len(history) + _approx_tokens = estimate_messages_tokens_rough(history) + + _needs_compress = ( + _approx_tokens >= _compress_token_threshold + or _msg_count >= _compress_msg_threshold + ) + + if _needs_compress: + logger.info( + "Session hygiene: %s messages, ~%s tokens — auto-compressing " + "(thresholds: %s msgs / %s tokens)", + _msg_count, f"{_approx_tokens:,}", + _compress_msg_threshold, f"{_compress_token_threshold:,}", + ) + + _hyg_adapter = self.adapters.get(source.platform) + if _hyg_adapter: + try: + await _hyg_adapter.send( + source.chat_id, + f"🗜️ Session is large ({_msg_count} messages, " + f"~{_approx_tokens:,} tokens). Auto-compressing..." + ) + except Exception: + pass + + try: + from run_agent import AIAgent + + _hyg_runtime = _resolve_runtime_agent_kwargs() + if _hyg_runtime.get("api_key"): + _hyg_msgs = [ + {"role": m.get("role"), "content": m.get("content")} + for m in history + if m.get("role") in ("user", "assistant") + and m.get("content") + ] + + if len(_hyg_msgs) >= 4: + _hyg_agent = AIAgent( + **_hyg_runtime, + max_iterations=4, + quiet_mode=True, + enabled_toolsets=["memory"], + session_id=session_entry.session_id, + ) + + loop = asyncio.get_event_loop() + _compressed, _ = await loop.run_in_executor( + None, + lambda: _hyg_agent._compress_context( + _hyg_msgs, "", + approx_tokens=_approx_tokens, + ), + ) + + self.session_store.rewrite_transcript( + session_entry.session_id, _compressed + ) + history = _compressed + _new_count = len(_compressed) + _new_tokens = estimate_messages_tokens_rough( + _compressed + ) + + logger.info( + "Session hygiene: compressed %s → %s msgs, " + "~%s → ~%s tokens", + _msg_count, _new_count, + f"{_approx_tokens:,}", f"{_new_tokens:,}", + ) + + if _hyg_adapter: + try: + await _hyg_adapter.send( + source.chat_id, + f"🗜️ Compressed: {_msg_count} → " + f"{_new_count} messages, " + f"~{_approx_tokens:,} → " + f"~{_new_tokens:,} tokens" + ) + except Exception: + pass + + # Still too large after compression — warn user + if _new_tokens >= _warn_token_threshold: + logger.warning( + "Session hygiene: still ~%s tokens after " + "compression — suggesting /reset", + f"{_new_tokens:,}", + ) + if _hyg_adapter: + try: + await _hyg_adapter.send( + source.chat_id, + "⚠️ Session is still very large " + "after compression " + f"(~{_new_tokens:,} tokens). " + "Consider using /reset to start " + "fresh if you experience issues." + ) + except Exception: + pass + + except Exception as e: + logger.warning( + "Session hygiene auto-compress failed: %s", e + ) + # Compression failed and session is dangerously large + if _approx_tokens >= _warn_token_threshold: + _hyg_adapter = self.adapters.get(source.platform) + if _hyg_adapter: + try: + await _hyg_adapter.send( + source.chat_id, + f"⚠️ Session is very large " + f"({_msg_count} messages, " + f"~{_approx_tokens:,} tokens) and " + "auto-compression failed. Consider " + "using /compress or /reset to avoid " + "issues." + ) + except Exception: + pass + # First-message onboarding -- only on the very first interaction ever if not history and not self.session_store.has_any_sessions(): context_prompt += ( @@ -1012,33 +1229,12 @@ class GatewayRunner: # Get existing session key session_key = self.session_store._generate_session_key(source) - # Memory flush before reset: load the old transcript and let a - # temporary agent save memories before the session is wiped. + # Flush memories in the background (fire-and-forget) so the user + # gets the "Session reset!" response immediately. try: old_entry = self.session_store._entries.get(session_key) if old_entry: - old_history = self.session_store.load_transcript(old_entry.session_id) - if old_history: - from run_agent import AIAgent - loop = asyncio.get_event_loop() - _flush_kwargs = _resolve_runtime_agent_kwargs() - def _do_flush(): - tmp_agent = AIAgent( - **_flush_kwargs, - max_iterations=5, - quiet_mode=True, - enabled_toolsets=["memory"], - session_id=old_entry.session_id, - ) - # Build simple message list from transcript - msgs = [] - for m in old_history: - role = m.get("role") - content = m.get("content") - if role in ("user", "assistant") and content: - msgs.append({"role": role, "content": content}) - tmp_agent.flush_memories(msgs) - await loop.run_in_executor(None, _do_flush) + asyncio.create_task(self._async_flush_memories(old_entry.session_id)) except Exception as e: logger.debug("Gateway memory flush on reset failed: %s", e) @@ -1105,12 +1301,15 @@ class GatewayRunner: "`/reset` — Reset conversation history", "`/status` — Show session info", "`/stop` — Interrupt the running agent", - "`/model [name]` — Show or change the model", + "`/model [provider:model]` — Show/change model (or switch provider)", + "`/provider` — Show available providers and auth status", "`/personality [name]` — Set a personality", "`/retry` — Retry your last message", "`/undo` — Remove the last exchange", "`/sethome` — Set this chat as the home channel", "`/compress` — Compress conversation context", + "`/title [name]` — Set or show the session title", + "`/resume [name]` — Resume a previously-named session", "`/usage` — Show token usage for this session", "`/insights [days]` — Show usage insights and analytics", "`/reload-mcp` — Reload MCP servers from config", @@ -1131,13 +1330,20 @@ class GatewayRunner: async def _handle_model_command(self, event: MessageEvent) -> str: """Handle /model command - show or change the current model.""" import yaml + from hermes_cli.models import ( + parse_model_input, + validate_requested_model, + curated_models_for_provider, + normalize_provider, + _PROVIDER_LABELS, + ) args = event.get_command_args().strip() config_path = _hermes_home / 'config.yaml' - # Resolve current model the same way the agent init does: - # env vars first, then config.yaml always overrides. + # Resolve current model and provider from config current = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6" + current_provider = "openrouter" try: if config_path.exists(): with open(config_path) as f: @@ -1147,39 +1353,164 @@ class GatewayRunner: current = model_cfg elif isinstance(model_cfg, dict): current = model_cfg.get("default", current) + current_provider = model_cfg.get("provider", current_provider) except Exception: pass + # Resolve "auto" to the actual provider using credential detection + current_provider = normalize_provider(current_provider) + if current_provider == "auto": + try: + from hermes_cli.auth import resolve_provider as _resolve_provider + current_provider = _resolve_provider(current_provider) + except Exception: + current_provider = "openrouter" + if not args: - return f"🤖 **Current model:** `{current}`\n\nTo change: `/model provider/model-name`" + provider_label = _PROVIDER_LABELS.get(current_provider, current_provider) + lines = [ + f"🤖 **Current model:** `{current}`", + f"**Provider:** {provider_label}", + "", + ] + curated = curated_models_for_provider(current_provider) + if curated: + lines.append(f"**Available models ({provider_label}):**") + for mid, desc in curated: + marker = " ←" if mid == current else "" + label = f" _{desc}_" if desc else "" + lines.append(f"• `{mid}`{label}{marker}") + lines.append("") + lines.append("To change: `/model model-name`") + lines.append("Switch provider: `/model provider:model-name`") + return "\n".join(lines) - if "/" not in args: - return ( - f"🤖 Invalid model format: `{args}`\n\n" - f"Use `provider/model-name` format, e.g.:\n" - f"• `anthropic/claude-sonnet-4`\n" - f"• `google/gemini-2.5-pro`\n" - f"• `openai/gpt-4o`" - ) + # Parse provider:model syntax + target_provider, new_model = parse_model_input(args, current_provider) + provider_changed = target_provider != current_provider - # Write to config.yaml (source of truth), same pattern as CLI save_config_value. + # Resolve credentials for the target provider (for API probe) + api_key = os.getenv("OPENROUTER_API_KEY") or os.getenv("OPENAI_API_KEY") or "" + base_url = "https://openrouter.ai/api/v1" + if provider_changed: + try: + from hermes_cli.runtime_provider import resolve_runtime_provider + runtime = resolve_runtime_provider(requested=target_provider) + api_key = runtime.get("api_key", "") + base_url = runtime.get("base_url", "") + except Exception as e: + provider_label = _PROVIDER_LABELS.get(target_provider, target_provider) + return f"⚠️ Could not resolve credentials for provider '{provider_label}': {e}" + else: + # Use current provider's base_url from config or registry + try: + from hermes_cli.runtime_provider import resolve_runtime_provider + runtime = resolve_runtime_provider(requested=current_provider) + api_key = runtime.get("api_key", "") + base_url = runtime.get("base_url", "") + except Exception: + pass + + # Validate the model against the live API + try: + validation = validate_requested_model( + new_model, + target_provider, + api_key=api_key, + base_url=base_url, + ) + except Exception: + validation = {"accepted": True, "persist": True, "recognized": False, "message": None} + + if not validation.get("accepted"): + msg = validation.get("message", "Invalid model") + tip = "\n\nUse `/model` to see available models, `/provider` to see providers" if "Did you mean" not in msg else "" + return f"⚠️ {msg}{tip}" + + # Persist to config only if validation approves + if validation.get("persist"): + try: + user_config = {} + if config_path.exists(): + with open(config_path) as f: + user_config = yaml.safe_load(f) or {} + if "model" not in user_config or not isinstance(user_config["model"], dict): + user_config["model"] = {} + user_config["model"]["default"] = new_model + if provider_changed: + user_config["model"]["provider"] = target_provider + with open(config_path, 'w') as f: + yaml.dump(user_config, f, default_flow_style=False, sort_keys=False) + except Exception as e: + return f"⚠️ Failed to save model change: {e}" + + # Set env vars so the next agent run picks up the change + os.environ["HERMES_MODEL"] = new_model + if provider_changed: + os.environ["HERMES_INFERENCE_PROVIDER"] = target_provider + + provider_label = _PROVIDER_LABELS.get(target_provider, target_provider) + provider_note = f"\n**Provider:** {provider_label}" if provider_changed else "" + + warning = "" + if validation.get("message"): + warning = f"\n⚠️ {validation['message']}" + + if validation.get("persist"): + persist_note = "saved to config" + else: + persist_note = "this session only — will revert on restart" + return f"🤖 Model changed to `{new_model}` ({persist_note}){provider_note}{warning}\n_(takes effect on next message)_" + + async def _handle_provider_command(self, event: MessageEvent) -> str: + """Handle /provider command - show available providers.""" + import yaml + from hermes_cli.models import ( + list_available_providers, + normalize_provider, + _PROVIDER_LABELS, + ) + + # Resolve current provider from config + current_provider = "openrouter" + config_path = _hermes_home / 'config.yaml' try: - user_config = {} if config_path.exists(): with open(config_path) as f: - user_config = yaml.safe_load(f) or {} - if "model" not in user_config or not isinstance(user_config["model"], dict): - user_config["model"] = {} - user_config["model"]["default"] = args - with open(config_path, 'w') as f: - yaml.dump(user_config, f, default_flow_style=False, sort_keys=False) - except Exception as e: - return f"⚠️ Failed to save model change: {e}" + cfg = yaml.safe_load(f) or {} + model_cfg = cfg.get("model", {}) + if isinstance(model_cfg, dict): + current_provider = model_cfg.get("provider", current_provider) + except Exception: + pass - # Also set env var so code reading it before the next agent init sees the update. - os.environ["HERMES_MODEL"] = args + current_provider = normalize_provider(current_provider) + if current_provider == "auto": + try: + from hermes_cli.auth import resolve_provider as _resolve_provider + current_provider = _resolve_provider(current_provider) + except Exception: + current_provider = "openrouter" - return f"🤖 Model changed to `{args}`\n_(takes effect on next message)_" + current_label = _PROVIDER_LABELS.get(current_provider, current_provider) + + lines = [ + f"🔌 **Current provider:** {current_label} (`{current_provider}`)", + "", + "**Available providers:**", + ] + + providers = list_available_providers() + for p in providers: + marker = " ← active" if p["id"] == current_provider else "" + auth = "✅" if p["authenticated"] else "❌" + aliases = f" _(also: {', '.join(p['aliases'])})_" if p["aliases"] else "" + lines.append(f"{auth} `{p['id']}` — {p['label']}{aliases}{marker}") + + lines.append("") + lines.append("Switch: `/model provider:model-name`") + lines.append("Setup: `hermes setup`") + return "\n".join(lines) async def _handle_personality_command(self, event: MessageEvent) -> str: """Handle /personality command - list or set a personality.""" @@ -1369,6 +1700,113 @@ class GatewayRunner: logger.warning("Manual compress failed: %s", e) return f"Compression failed: {e}" + async def _handle_title_command(self, event: MessageEvent) -> str: + """Handle /title command — set or show the current session's title.""" + source = event.source + session_entry = self.session_store.get_or_create_session(source) + session_id = session_entry.session_id + + if not self._session_db: + return "Session database not available." + + title_arg = event.get_command_args().strip() + if title_arg: + # Sanitize the title before setting + try: + sanitized = self._session_db.sanitize_title(title_arg) + except ValueError as e: + return f"⚠️ {e}" + if not sanitized: + return "⚠️ Title is empty after cleanup. Please use printable characters." + # Set the title + try: + if self._session_db.set_session_title(session_id, sanitized): + return f"✏️ Session title set: **{sanitized}**" + else: + return "Session not found in database." + except ValueError as e: + return f"⚠️ {e}" + else: + # Show the current title + title = self._session_db.get_session_title(session_id) + if title: + return f"📌 Session title: **{title}**" + else: + return "No title set. Usage: `/title My Session Name`" + + async def _handle_resume_command(self, event: MessageEvent) -> str: + """Handle /resume command — switch to a previously-named session.""" + if not self._session_db: + return "Session database not available." + + source = event.source + session_key = build_session_key(source) + name = event.get_command_args().strip() + + if not name: + # List recent titled sessions for this user/platform + try: + user_source = source.platform.value if source.platform else None + sessions = self._session_db.list_sessions_rich( + source=user_source, limit=10 + ) + titled = [s for s in sessions if s.get("title")] + if not titled: + return ( + "No named sessions found.\n" + "Use `/title My Session` to name your current session, " + "then `/resume My Session` to return to it later." + ) + lines = ["📋 **Named Sessions**\n"] + for s in titled[:10]: + title = s["title"] + preview = s.get("preview", "")[:40] + preview_part = f" — _{preview}_" if preview else "" + lines.append(f"• **{title}**{preview_part}") + lines.append("\nUsage: `/resume <session name>`") + return "\n".join(lines) + except Exception as e: + logger.debug("Failed to list titled sessions: %s", e) + return f"Could not list sessions: {e}" + + # Resolve the name to a session ID + target_id = self._session_db.resolve_session_by_title(name) + if not target_id: + return ( + f"No session found matching '**{name}**'.\n" + "Use `/resume` with no arguments to see available sessions." + ) + + # Check if already on that session + current_entry = self.session_store.get_or_create_session(source) + if current_entry.session_id == target_id: + return f"📌 Already on session **{name}**." + + # Flush memories for current session before switching + try: + asyncio.create_task(self._async_flush_memories(current_entry.session_id)) + except Exception as e: + logger.debug("Memory flush on resume failed: %s", e) + + # Clear any running agent for this session key + if session_key in self._running_agents: + del self._running_agents[session_key] + + # Switch the session entry to point at the old session + new_entry = self.session_store.switch_session(session_key, target_id) + if not new_entry: + return "Failed to switch session." + + # Get the title for confirmation + title = self._session_db.get_session_title(target_id) or name + + # Count messages for context + history = self.session_store.load_transcript(target_id) + msg_count = len([m for m in history if m.get("role") == "user"]) if history else 0 + msg_part = f" ({msg_count} message{'s' if msg_count != 1 else ''})" if msg_count else "" + + return f"↻ Resumed session **{title}**{msg_part}. Conversation restored." + async def _handle_usage_command(self, event: MessageEvent) -> str: """Handle /usage command -- show token usage for the session's last agent run.""" source = event.source @@ -2437,34 +2875,77 @@ def _start_cron_ticker(stop_event: threading.Event, adapters=None, interval: int logger.info("Cron ticker stopped") -async def start_gateway(config: Optional[GatewayConfig] = None) -> bool: +async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = False) -> bool: """ Start the gateway and run until interrupted. This is the main entry point for running the gateway. Returns True if the gateway ran successfully, False if it failed to start. A False return causes a non-zero exit code so systemd can auto-restart. + + Args: + config: Optional gateway configuration override. + replace: If True, kill any existing gateway instance before starting. + Useful for systemd services to avoid restart-loop deadlocks + when the previous process hasn't fully exited yet. """ # ── Duplicate-instance guard ────────────────────────────────────── # Prevent two gateways from running under the same HERMES_HOME. # The PID file is scoped to HERMES_HOME, so future multi-profile # setups (each profile using a distinct HERMES_HOME) will naturally # allow concurrent instances without tripping this guard. - from gateway.status import get_running_pid + import time as _time + from gateway.status import get_running_pid, remove_pid_file existing_pid = get_running_pid() if existing_pid is not None and existing_pid != os.getpid(): - hermes_home = os.getenv("HERMES_HOME", "~/.hermes") - logger.error( - "Another gateway instance is already running (PID %d, HERMES_HOME=%s). " - "Use 'hermes gateway restart' to replace it, or 'hermes gateway stop' first.", - existing_pid, hermes_home, - ) - print( - f"\n❌ Gateway already running (PID {existing_pid}).\n" - f" Use 'hermes gateway restart' to replace it,\n" - f" or 'hermes gateway stop' to kill it first.\n" - ) - return False + if replace: + logger.info( + "Replacing existing gateway instance (PID %d) with --replace.", + existing_pid, + ) + try: + os.kill(existing_pid, signal.SIGTERM) + except ProcessLookupError: + pass # Already gone + except PermissionError: + logger.error( + "Permission denied killing PID %d. Cannot replace.", + existing_pid, + ) + return False + # Wait up to 10 seconds for the old process to exit + for _ in range(20): + try: + os.kill(existing_pid, 0) + _time.sleep(0.5) + except (ProcessLookupError, PermissionError): + break # Process is gone + else: + # Still alive after 10s — force kill + logger.warning( + "Old gateway (PID %d) did not exit after SIGTERM, sending SIGKILL.", + existing_pid, + ) + try: + os.kill(existing_pid, signal.SIGKILL) + _time.sleep(0.5) + except (ProcessLookupError, PermissionError): + pass + remove_pid_file() + else: + hermes_home = os.getenv("HERMES_HOME", "~/.hermes") + logger.error( + "Another gateway instance is already running (PID %d, HERMES_HOME=%s). " + "Use 'hermes gateway restart' to replace it, or 'hermes gateway stop' first.", + existing_pid, hermes_home, + ) + print( + f"\n❌ Gateway already running (PID {existing_pid}).\n" + f" Use 'hermes gateway restart' to replace it,\n" + f" or 'hermes gateway stop' to kill it first.\n" + f" Or use 'hermes gateway run --replace' to auto-replace.\n" + ) + return False # Sync bundled skills on gateway start (fast -- skips unchanged) try: diff --git a/gateway/session.py b/gateway/session.py index 091cb46a1..3113e2e6a 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -311,7 +311,9 @@ class SessionStore: self._entries: Dict[str, SessionEntry] = {} self._loaded = False self._has_active_processes_fn = has_active_processes_fn - self._on_auto_reset = on_auto_reset # callback(old_entry) before auto-reset + # on_auto_reset is deprecated — memory flush now runs proactively + # via the background session expiry watcher in GatewayRunner. + self._pre_flushed_sessions: set = set() # session_ids already flushed by watcher # Initialize SQLite session database self._db = None @@ -353,6 +355,44 @@ class SessionStore: """Generate a session key from a source.""" return build_session_key(source) + def _is_session_expired(self, entry: SessionEntry) -> bool: + """Check if a session has expired based on its reset policy. + + Works from the entry alone — no SessionSource needed. + Used by the background expiry watcher to proactively flush memories. + Sessions with active background processes are never considered expired. + """ + if self._has_active_processes_fn: + if self._has_active_processes_fn(entry.session_key): + return False + + policy = self.config.get_reset_policy( + platform=entry.platform, + session_type=entry.chat_type, + ) + + if policy.mode == "none": + return False + + now = datetime.now() + + if policy.mode in ("idle", "both"): + idle_deadline = entry.updated_at + timedelta(minutes=policy.idle_minutes) + if now > idle_deadline: + return True + + if policy.mode in ("daily", "both"): + today_reset = now.replace( + hour=policy.at_hour, + minute=0, second=0, microsecond=0, + ) + if now.hour < policy.at_hour: + today_reset -= timedelta(days=1) + if entry.updated_at < today_reset: + return True + + return False + def _should_reset(self, entry: SessionEntry, source: SessionSource) -> bool: """ Check if a session should be reset based on policy. @@ -439,13 +479,11 @@ class SessionStore: self._save() return entry else: - # Session is being auto-reset — flush memories before destroying + # Session is being auto-reset. The background expiry watcher + # should have already flushed memories proactively; discard + # the marker so it doesn't accumulate. was_auto_reset = True - if self._on_auto_reset: - try: - self._on_auto_reset(entry) - except Exception as e: - logger.debug("Auto-reset callback failed: %s", e) + self._pre_flushed_sessions.discard(entry.session_id) if self._db: try: self._db.end_session(entry.session_id, "session_reset") @@ -555,7 +593,49 @@ class SessionStore: logger.debug("Session DB operation failed: %s", e) return new_entry - + + def switch_session(self, session_key: str, target_session_id: str) -> Optional[SessionEntry]: + """Switch a session key to point at an existing session ID. + + Used by ``/resume`` to restore a previously-named session. + Ends the current session in SQLite (like reset), but instead of + generating a fresh session ID, re-uses ``target_session_id`` so the + old transcript is loaded on the next message. + """ + self._ensure_loaded() + + if session_key not in self._entries: + return None + + old_entry = self._entries[session_key] + + # Don't switch if already on that session + if old_entry.session_id == target_session_id: + return old_entry + + # End the current session in SQLite + if self._db: + try: + self._db.end_session(old_entry.session_id, "session_switch") + except Exception as e: + logger.debug("Session DB end_session failed: %s", e) + + now = datetime.now() + new_entry = SessionEntry( + session_key=session_key, + session_id=target_session_id, + created_at=now, + updated_at=now, + origin=old_entry.origin, + display_name=old_entry.display_name, + platform=old_entry.platform, + chat_type=old_entry.chat_type, + ) + + self._entries[session_key] = new_entry + self._save() + return new_entry + def list_sessions(self, active_minutes: Optional[int] = None) -> List[SessionEntry]: """List all sessions, optionally filtered by activity.""" self._ensure_loaded() diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 440fc2b6f..209f72959 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -138,6 +138,83 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = { } +# ============================================================================= +# Kimi Code Endpoint Detection +# ============================================================================= + +# Kimi Code (platform.kimi.ai) issues keys prefixed "sk-kimi-" that only work +# on api.kimi.com/coding/v1. Legacy keys from platform.moonshot.ai work on +# api.moonshot.ai/v1 (the default). Auto-detect when user hasn't set +# KIMI_BASE_URL explicitly. +KIMI_CODE_BASE_URL = "https://api.kimi.com/coding/v1" + + +def _resolve_kimi_base_url(api_key: str, default_url: str, env_override: str) -> str: + """Return the correct Kimi base URL based on the API key prefix. + + If the user has explicitly set KIMI_BASE_URL, that always wins. + Otherwise, sk-kimi- prefixed keys route to api.kimi.com/coding/v1. + """ + if env_override: + return env_override + if api_key.startswith("sk-kimi-"): + return KIMI_CODE_BASE_URL + return default_url + + +# ============================================================================= +# Z.AI Endpoint Detection +# ============================================================================= + +# Z.AI has separate billing for general vs coding plans, and global vs China +# endpoints. A key that works on one may return "Insufficient balance" on +# another. We probe at setup time and store the working endpoint. + +ZAI_ENDPOINTS = [ + # (id, base_url, default_model, label) + ("global", "https://api.z.ai/api/paas/v4", "glm-5", "Global"), + ("cn", "https://open.bigmodel.cn/api/paas/v4", "glm-5", "China"), + ("coding-global", "https://api.z.ai/api/coding/paas/v4", "glm-4.7", "Global (Coding Plan)"), + ("coding-cn", "https://open.bigmodel.cn/api/coding/paas/v4", "glm-4.7", "China (Coding Plan)"), +] + + +def detect_zai_endpoint(api_key: str, timeout: float = 8.0) -> Optional[Dict[str, str]]: + """Probe z.ai endpoints to find one that accepts this API key. + + Returns {"id": ..., "base_url": ..., "model": ..., "label": ...} for the + first working endpoint, or None if all fail. + """ + for ep_id, base_url, model, label in ZAI_ENDPOINTS: + try: + resp = httpx.post( + f"{base_url}/chat/completions", + headers={ + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + }, + json={ + "model": model, + "stream": False, + "max_tokens": 1, + "messages": [{"role": "user", "content": "ping"}], + }, + timeout=timeout, + ) + if resp.status_code == 200: + logger.debug("Z.AI endpoint probe: %s (%s) OK", ep_id, base_url) + return { + "id": ep_id, + "base_url": base_url, + "model": model, + "label": label, + } + logger.debug("Z.AI endpoint probe: %s returned %s", ep_id, resp.status_code) + except Exception as exc: + logger.debug("Z.AI endpoint probe: %s failed: %s", ep_id, exc) + return None + + # ============================================================================= # Error Types # ============================================================================= @@ -1298,11 +1375,16 @@ def get_api_key_provider_status(provider_id: str) -> Dict[str, Any]: key_source = env_var break - base_url = pconfig.inference_base_url + env_url = "" if pconfig.base_url_env_var: env_url = os.getenv(pconfig.base_url_env_var, "").strip() - if env_url: - base_url = env_url + + if provider_id == "kimi-coding": + base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, env_url) + elif env_url: + base_url = env_url + else: + base_url = pconfig.inference_base_url return { "configured": bool(api_key), @@ -1350,11 +1432,16 @@ def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]: key_source = env_var break - base_url = pconfig.inference_base_url + env_url = "" if pconfig.base_url_env_var: env_url = os.getenv(pconfig.base_url_env_var, "").strip() - if env_url: - base_url = env_url.rstrip("/") + + if provider_id == "kimi-coding": + base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, env_url) + elif env_url: + base_url = env_url.rstrip("/") + else: + base_url = pconfig.inference_base_url return { "provider": provider_id, diff --git a/hermes_cli/clipboard.py b/hermes_cli/clipboard.py index 893a84d3e..6fda33bc6 100644 --- a/hermes_cli/clipboard.py +++ b/hermes_cli/clipboard.py @@ -300,11 +300,11 @@ def _convert_to_png(path: Path) -> bool: tmp.rename(path) except FileNotFoundError: logger.debug("ImageMagick not installed — cannot convert BMP to PNG") - if not path.exists() and tmp.exists(): + if tmp.exists() and not path.exists(): tmp.rename(path) except Exception as e: logger.debug("ImageMagick BMP→PNG conversion failed: %s", e) - if not path.exists() and tmp.exists(): + if tmp.exists() and not path.exists(): tmp.rename(path) # Can't convert — BMP is still usable as-is for most APIs diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 887476339..20f01b174 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -1,9 +1,15 @@ """Slash command definitions and autocomplete for the Hermes CLI. -Contains the COMMANDS dict and the SlashCommandCompleter class. -These are pure data/UI with no HermesCLI state dependency. +Contains the shared built-in ``COMMANDS`` dict and ``SlashCommandCompleter``. +The completer can optionally include dynamic skill slash commands supplied by the +interactive CLI. """ +from __future__ import annotations + +from collections.abc import Callable, Mapping +from typing import Any + from prompt_toolkit.completion import Completer, Completion @@ -12,6 +18,7 @@ COMMANDS = { "/tools": "List available tools", "/toolsets": "List available toolsets", "/model": "Show or change the current model", + "/provider": "Show available providers and current provider", "/prompt": "View/set custom system prompt", "/personality": "Set a predefined personality", "/clear": "Clear screen and reset conversation (fresh start)", @@ -27,26 +34,68 @@ COMMANDS = { "/platforms": "Show gateway/messaging platform status", "/verbose": "Cycle tool progress display: off → new → all → verbose", "/compress": "Manually compress conversation context (flush memories + summarize)", + "/title": "Set a title for the current session (usage: /title My Session Name)", "/usage": "Show token usage for the current session", "/insights": "Show usage insights and analytics (last 30 days)", + "/paste": "Check clipboard for an image and attach it", + "/reload-mcp": "Reload MCP servers from config.yaml", "/quit": "Exit the CLI (also: /exit, /q)", } class SlashCommandCompleter(Completer): - """Autocomplete for /commands in the input area.""" + """Autocomplete for built-in slash commands and optional skill commands.""" + + def __init__( + self, + skill_commands_provider: Callable[[], Mapping[str, dict[str, Any]]] | None = None, + ) -> None: + self._skill_commands_provider = skill_commands_provider + + def _iter_skill_commands(self) -> Mapping[str, dict[str, Any]]: + if self._skill_commands_provider is None: + return {} + try: + return self._skill_commands_provider() or {} + except Exception: + return {} + + @staticmethod + def _completion_text(cmd_name: str, word: str) -> str: + """Return replacement text for a completion. + + When the user has already typed the full command exactly (``/help``), + returning ``help`` would be a no-op and prompt_toolkit suppresses the + menu. Appending a trailing space keeps the dropdown visible and makes + backspacing retrigger it naturally. + """ + return f"{cmd_name} " if cmd_name == word else cmd_name def get_completions(self, document, complete_event): text = document.text_before_cursor if not text.startswith("/"): return + word = text[1:] + for cmd, desc in COMMANDS.items(): cmd_name = cmd[1:] if cmd_name.startswith(word): yield Completion( - cmd_name, + self._completion_text(cmd_name, word), start_position=-len(word), display=cmd, display_meta=desc, ) + + for cmd, info in self._iter_skill_commands().items(): + cmd_name = cmd[1:] + if cmd_name.startswith(word): + description = str(info.get("description", "Skill command")) + short_desc = description[:50] + ("..." if len(description) > 50 else "") + yield Completion( + self._completion_text(cmd_name, word), + start_position=-len(word), + display=cmd, + display_meta=f"⚡ {short_desc}", + ) diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 6724c1d7d..0e6f51c1a 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -156,6 +156,15 @@ DEFAULT_CONFIG = { # Config Migration System # ============================================================================= +# Track which env vars were introduced in each config version. +# Migration only mentions vars new since the user's previous version. +ENV_VARS_BY_VERSION: Dict[int, List[str]] = { + 3: ["FIRECRAWL_API_KEY", "BROWSERBASE_API_KEY", "BROWSERBASE_PROJECT_ID", "FAL_KEY"], + 4: ["VOICE_TOOLS_OPENAI_KEY", "ELEVENLABS_API_KEY"], + 5: ["WHATSAPP_ENABLED", "WHATSAPP_MODE", "WHATSAPP_ALLOWED_USERS", + "SLACK_BOT_TOKEN", "SLACK_APP_TOKEN", "SLACK_ALLOWED_USERS"], +} + # Required environment variables with metadata for migration prompts. # LLM provider is required but handled in the setup wizard's provider # selection step (Nous Portal / OpenRouter / Custom endpoint), so this @@ -625,34 +634,47 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A if v["name"] not in required_names and not v.get("advanced") ] - if interactive and missing_optional: - print(" Would you like to configure any optional keys now?") - try: - answer = input(" Configure optional keys? [y/N]: ").strip().lower() - except (EOFError, KeyboardInterrupt): - answer = "n" - - if answer in ("y", "yes"): + # Only offer to configure env vars that are NEW since the user's previous version + new_var_names = set() + for ver in range(current_ver + 1, latest_ver + 1): + new_var_names.update(ENV_VARS_BY_VERSION.get(ver, [])) + + if new_var_names and interactive and not quiet: + new_and_unset = [ + (name, OPTIONAL_ENV_VARS[name]) + for name in sorted(new_var_names) + if not get_env_value(name) and name in OPTIONAL_ENV_VARS + ] + if new_and_unset: + print(f"\n {len(new_and_unset)} new optional key(s) in this update:") + for name, info in new_and_unset: + print(f" • {name} — {info.get('description', '')}") print() - for var in missing_optional: - desc = var.get("description", "") - if var.get("url"): - print(f" {desc}") - print(f" Get your key at: {var['url']}") - else: - print(f" {desc}") - - if var.get("password"): - import getpass - value = getpass.getpass(f" {var['prompt']} (Enter to skip): ") - else: - value = input(f" {var['prompt']} (Enter to skip): ").strip() - - if value: - save_env_value(var["name"], value) - results["env_added"].append(var["name"]) - print(f" ✓ Saved {var['name']}") + try: + answer = input(" Configure new keys? [y/N]: ").strip().lower() + except (EOFError, KeyboardInterrupt): + answer = "n" + + if answer in ("y", "yes"): print() + for name, info in new_and_unset: + if info.get("url"): + print(f" {info.get('description', name)}") + print(f" Get your key at: {info['url']}") + else: + print(f" {info.get('description', name)}") + if info.get("password"): + import getpass + value = getpass.getpass(f" {info.get('prompt', name)} (Enter to skip): ") + else: + value = input(f" {info.get('prompt', name)} (Enter to skip): ").strip() + if value: + save_env_value(name, value) + results["env_added"].append(name) + print(f" ✓ Saved {name}") + print() + else: + print(" Set later with: hermes config set KEY VALUE") # Check for missing config fields missing_config = get_missing_config_fields() @@ -1004,6 +1026,7 @@ def set_config_value(key: str, value: str): "terminal.daytona_image": "TERMINAL_DAYTONA_IMAGE", "terminal.cwd": "TERMINAL_CWD", "terminal.timeout": "TERMINAL_TIMEOUT", + "terminal.sandbox_dir": "TERMINAL_SANDBOX_DIR", } if key in _config_to_env_sync: save_env_value(_config_to_env_sync[key], str(value)) diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index a76a6b390..de55bdff9 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -33,6 +33,26 @@ os.environ.setdefault("MSWEA_SILENT_STARTUP", "1") from hermes_cli.colors import Colors, color from hermes_constants import OPENROUTER_MODELS_URL + +_PROVIDER_ENV_HINTS = ( + "OPENROUTER_API_KEY", + "OPENAI_API_KEY", + "ANTHROPIC_API_KEY", + "OPENAI_BASE_URL", + "GLM_API_KEY", + "ZAI_API_KEY", + "Z_AI_API_KEY", + "KIMI_API_KEY", + "MINIMAX_API_KEY", + "MINIMAX_CN_API_KEY", +) + + +def _has_provider_env_config(content: str) -> bool: + """Return True when ~/.hermes/.env contains provider auth/base URL settings.""" + return any(key in content for key in _PROVIDER_ENV_HINTS) + + def check_ok(text: str, detail: str = ""): print(f" {color('✓', Colors.GREEN)} {text}" + (f" {color(detail, Colors.DIM)}" if detail else "")) @@ -132,12 +152,8 @@ def run_doctor(args): # Check for common issues content = env_path.read_text() - if any(k in content for k in ( - "OPENROUTER_API_KEY", "ANTHROPIC_API_KEY", - "GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY", - "KIMI_API_KEY", "MINIMAX_API_KEY", "MINIMAX_CN_API_KEY", - )): - check_ok("API key configured") + if _has_provider_env_config(content): + check_ok("API key or custom endpoint configured") else: check_warn("No API key found in ~/.hermes/.env") issues.append("Run 'hermes setup' to configure API keys") @@ -492,10 +508,16 @@ def run_doctor(args): try: import httpx _base = os.getenv(_base_env, "") + # Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com + if not _base and _key.startswith("sk-kimi-"): + _base = "https://api.kimi.com/coding/v1" _url = (_base.rstrip("/") + "/models") if _base else _default_url + _headers = {"Authorization": f"Bearer {_key}"} + if "api.kimi.com" in _url.lower(): + _headers["User-Agent"] = "KimiCLI/1.0" _resp = httpx.get( _url, - headers={"Authorization": f"Bearer {_key}"}, + headers=_headers, timeout=10, ) if _resp.status_code == 200: diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index 3cc4941ab..b89db974c 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -154,19 +154,33 @@ def get_hermes_cli_path() -> str: # ============================================================================= def generate_systemd_unit() -> str: + import shutil python_path = get_python_path() working_dir = str(PROJECT_ROOT) + venv_dir = str(PROJECT_ROOT / "venv") + venv_bin = str(PROJECT_ROOT / "venv" / "bin") + node_bin = str(PROJECT_ROOT / "node_modules" / ".bin") + + # Build a PATH that includes the venv, node_modules, and standard system dirs + sane_path = f"{venv_bin}:{node_bin}:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" + hermes_cli = shutil.which("hermes") or f"{python_path} -m hermes_cli.main" return f"""[Unit] Description={SERVICE_DESCRIPTION} After=network.target [Service] Type=simple -ExecStart={python_path} -m hermes_cli.main gateway run +ExecStart={python_path} -m hermes_cli.main gateway run --replace +ExecStop={hermes_cli} gateway stop WorkingDirectory={working_dir} +Environment="PATH={sane_path}" +Environment="VIRTUAL_ENV={venv_dir}" Restart=on-failure RestartSec=10 +KillMode=mixed +KillSignal=SIGTERM +TimeoutStopSec=15 StandardOutput=journal StandardError=journal @@ -377,8 +391,15 @@ def launchd_status(deep: bool = False): # Gateway Runner # ============================================================================= -def run_gateway(verbose: bool = False): - """Run the gateway in foreground.""" +def run_gateway(verbose: bool = False, replace: bool = False): + """Run the gateway in foreground. + + Args: + verbose: Enable verbose logging output. + replace: If True, kill any existing gateway instance before starting. + This prevents systemd restart loops when the old process + hasn't fully exited yet. + """ sys.path.insert(0, str(PROJECT_ROOT)) from gateway.run import start_gateway @@ -393,7 +414,7 @@ def run_gateway(verbose: bool = False): # Exit with code 1 if gateway fails to connect any platform, # so systemd Restart=on-failure will retry on transient errors - success = asyncio.run(start_gateway()) + success = asyncio.run(start_gateway(replace=replace)) if not success: sys.exit(1) @@ -765,7 +786,8 @@ def gateway_command(args): # Default to run if no subcommand if subcmd is None or subcmd == "run": verbose = getattr(args, 'verbose', False) - run_gateway(verbose) + replace = getattr(args, 'replace', False) + run_gateway(verbose, replace=replace) return if subcmd == "setup": diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 78c50468d..49f271f79 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -120,16 +120,63 @@ def _resolve_last_cli_session() -> Optional[str]: return None +def _resolve_session_by_name_or_id(name_or_id: str) -> Optional[str]: + """Resolve a session name (title) or ID to a session ID. + + - If it looks like a session ID (contains underscore + hex), try direct lookup first. + - Otherwise, treat it as a title and use resolve_session_by_title (auto-latest). + - Falls back to the other method if the first doesn't match. + """ + try: + from hermes_state import SessionDB + db = SessionDB() + + # Try as exact session ID first + session = db.get_session(name_or_id) + if session: + db.close() + return session["id"] + + # Try as title (with auto-latest for lineage) + session_id = db.resolve_session_by_title(name_or_id) + db.close() + return session_id + except Exception: + pass + return None + + def cmd_chat(args): """Run interactive chat CLI.""" - # Resolve --continue into --resume with the latest CLI session - if getattr(args, "continue_last", False) and not getattr(args, "resume", None): - last_id = _resolve_last_cli_session() - if last_id: - args.resume = last_id + # Resolve --continue into --resume with the latest CLI session or by name + continue_val = getattr(args, "continue_last", None) + if continue_val and not getattr(args, "resume", None): + if isinstance(continue_val, str): + # -c "session name" — resolve by title or ID + resolved = _resolve_session_by_name_or_id(continue_val) + if resolved: + args.resume = resolved + else: + print(f"No session found matching '{continue_val}'.") + print("Use 'hermes sessions list' to see available sessions.") + sys.exit(1) else: - print("No previous CLI session found to continue.") - sys.exit(1) + # -c with no argument — continue the most recent session + last_id = _resolve_last_cli_session() + if last_id: + args.resume = last_id + else: + print("No previous CLI session found to continue.") + sys.exit(1) + + # Resolve --resume by title if it's not a direct session ID + resume_val = getattr(args, "resume", None) + if resume_val: + resolved = _resolve_session_by_name_or_id(resume_val) + if resolved: + args.resume = resolved + # If resolution fails, keep the original value — _init_agent will + # report "Session not found" with the original input # First-run guard: check if any provider is configured before launching if not _has_any_provider_configured(): @@ -167,6 +214,7 @@ def cmd_chat(args): "verbose": args.verbose, "query": args.query, "resume": getattr(args, "resume", None), + "worktree": getattr(args, "worktree", False), } # Filter out None values kwargs = {k: v for k, v in kwargs.items() if v is not None} @@ -1208,8 +1256,9 @@ def main(): Examples: hermes Start interactive chat hermes chat -q "Hello" Single query mode - hermes --continue Resume the most recent session - hermes --resume <session_id> Resume a specific session + hermes -c Resume the most recent session + hermes -c "my project" Resume a session by name (latest in lineage) + hermes --resume <session_id> Resume a specific session by ID hermes setup Run setup wizard hermes logout Clear stored authentication hermes model Select default model @@ -1217,8 +1266,10 @@ Examples: hermes config edit Edit config in $EDITOR hermes config set model gpt-4 Set a config value hermes gateway Run messaging gateway + hermes -w Start in isolated git worktree hermes gateway install Install as system service hermes sessions list List past sessions + hermes sessions rename ID T Rename/title a session hermes update Update to latest version For more help on a command: @@ -1233,16 +1284,24 @@ For more help on a command: ) parser.add_argument( "--resume", "-r", - metavar="SESSION_ID", + metavar="SESSION", default=None, - help="Resume a previous session by ID (shortcut for: hermes chat --resume ID)" + help="Resume a previous session by ID or title" ) parser.add_argument( "--continue", "-c", dest="continue_last", + nargs="?", + const=True, + default=None, + metavar="SESSION_NAME", + help="Resume a session by name, or the most recent if no name given" + ) + parser.add_argument( + "--worktree", "-w", action="store_true", default=False, - help="Resume the most recent CLI session" + help="Run in an isolated git worktree (for parallel agents)" ) subparsers = parser.add_subparsers(dest="command", help="Command to run") @@ -1286,9 +1345,17 @@ For more help on a command: chat_parser.add_argument( "--continue", "-c", dest="continue_last", + nargs="?", + const=True, + default=None, + metavar="SESSION_NAME", + help="Resume a session by name, or the most recent if no name given" + ) + chat_parser.add_argument( + "--worktree", "-w", action="store_true", default=False, - help="Resume the most recent CLI session" + help="Run in an isolated git worktree (for parallel agents on the same repo)" ) chat_parser.set_defaults(func=cmd_chat) @@ -1315,6 +1382,8 @@ For more help on a command: # gateway run (default) gateway_run = gateway_subparsers.add_parser("run", help="Run gateway in foreground") gateway_run.add_argument("-v", "--verbose", action="store_true") + gateway_run.add_argument("--replace", action="store_true", + help="Replace any existing gateway instance (useful for systemd)") # gateway start gateway_start = gateway_subparsers.add_parser("start", help="Start gateway service") @@ -1655,7 +1724,7 @@ For more help on a command: # ========================================================================= sessions_parser = subparsers.add_parser( "sessions", - help="Manage session history (list, export, prune, delete)", + help="Manage session history (list, rename, export, prune, delete)", description="View and manage the SQLite session store" ) sessions_subparsers = sessions_parser.add_subparsers(dest="sessions_action") @@ -1680,6 +1749,10 @@ For more help on a command: sessions_stats = sessions_subparsers.add_parser("stats", help="Show session store statistics") + sessions_rename = sessions_subparsers.add_parser("rename", help="Set or change a session's title") + sessions_rename.add_argument("session_id", help="Session ID to rename") + sessions_rename.add_argument("title", nargs="+", help="New title for the session") + def cmd_sessions(args): import json as _json try: @@ -1692,18 +1765,51 @@ For more help on a command: action = args.sessions_action if action == "list": - sessions = db.search_sessions(source=args.source, limit=args.limit) + sessions = db.list_sessions_rich(source=args.source, limit=args.limit) if not sessions: print("No sessions found.") return - print(f"{'ID':<30} {'Source':<12} {'Model':<30} {'Messages':>8} {'Started'}") - print("─" * 100) from datetime import datetime + import time as _time + + def _relative_time(ts): + """Format a timestamp as relative time (e.g., '2h ago', 'yesterday').""" + if not ts: + return "?" + delta = _time.time() - ts + if delta < 60: + return "just now" + elif delta < 3600: + mins = int(delta / 60) + return f"{mins}m ago" + elif delta < 86400: + hours = int(delta / 3600) + return f"{hours}h ago" + elif delta < 172800: + return "yesterday" + elif delta < 604800: + days = int(delta / 86400) + return f"{days}d ago" + else: + return datetime.fromtimestamp(ts).strftime("%Y-%m-%d") + + has_titles = any(s.get("title") for s in sessions) + if has_titles: + print(f"{'Title':<22} {'Preview':<40} {'Last Active':<13} {'ID'}") + print("─" * 100) + else: + print(f"{'Preview':<50} {'Last Active':<13} {'Src':<6} {'ID'}") + print("─" * 90) for s in sessions: - started = datetime.fromtimestamp(s["started_at"]).strftime("%Y-%m-%d %H:%M") if s["started_at"] else "?" - model = (s.get("model") or "?")[:28] - ended = " (ended)" if s.get("ended_at") else "" - print(f"{s['id']:<30} {s['source']:<12} {model:<30} {s['message_count']:>8} {started}{ended}") + last_active = _relative_time(s.get("last_active")) + preview = s.get("preview", "")[:38] if has_titles else s.get("preview", "")[:48] + if has_titles: + title = (s.get("title") or "—")[:20] + sid = s["id"][:20] + print(f"{title:<22} {preview:<40} {last_active:<13} {sid}") + else: + sid = s["id"][:20] + print(f"{preview:<50} {last_active:<13} {s['source']:<6} {sid}") elif action == "export": if args.session_id: @@ -1743,6 +1849,16 @@ For more help on a command: count = db.prune_sessions(older_than_days=days, source=args.source) print(f"Pruned {count} session(s).") + elif action == "rename": + title = " ".join(args.title) + try: + if db.set_session_title(args.session_id, title): + print(f"Session '{args.session_id}' renamed to: {title}") + else: + print(f"Session '{args.session_id}' not found.") + except ValueError as e: + print(f"Error: {e}") + elif action == "stats": total = db.session_count() msgs = db.message_count() @@ -1848,6 +1964,8 @@ For more help on a command: args.provider = None args.toolsets = None args.verbose = False + if not hasattr(args, "worktree"): + args.worktree = False cmd_chat(args) return @@ -1859,7 +1977,9 @@ For more help on a command: args.toolsets = None args.verbose = False args.resume = None - args.continue_last = False + args.continue_last = None + if not hasattr(args, "worktree"): + args.worktree = False cmd_chat(args) return diff --git a/hermes_cli/models.py b/hermes_cli/models.py index c94dd855b..723f226ea 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -1,10 +1,18 @@ """ -Canonical list of OpenRouter models offered in CLI and setup wizards. +Canonical model catalogs and lightweight validation helpers. Add, remove, or reorder entries here — both `hermes setup` and `hermes` provider-selection will pick up the change automatically. """ +from __future__ import annotations + +import json +import urllib.request +import urllib.error +from difflib import get_close_matches +from typing import Any, Optional + # (model_id, display description shown in menus) OPENROUTER_MODELS: list[tuple[str, str]] = [ ("anthropic/claude-opus-4.6", "recommended"), @@ -14,17 +22,64 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [ ("openai/gpt-5.3-codex", ""), ("google/gemini-3-pro-preview", ""), ("google/gemini-3-flash-preview", ""), - ("qwen/qwen3.5-plus-02-15", ""), - ("qwen/qwen3.5-35b-a3b", ""), + ("qwen/qwen3.5-plus-02-15", ""), + ("qwen/qwen3.5-35b-a3b", ""), ("stepfun/step-3.5-flash", ""), ("z-ai/glm-5", ""), ("moonshotai/kimi-k2.5", ""), ("minimax/minimax-m2.5", ""), ] +_PROVIDER_MODELS: dict[str, list[str]] = { + "zai": [ + "glm-5", + "glm-4.7", + "glm-4.5", + "glm-4.5-flash", + ], + "kimi-coding": [ + "kimi-k2.5", + "kimi-k2-thinking", + "kimi-k2-turbo-preview", + "kimi-k2-0905-preview", + ], + "minimax": [ + "MiniMax-M2.5", + "MiniMax-M2.5-highspeed", + "MiniMax-M2.1", + ], + "minimax-cn": [ + "MiniMax-M2.5", + "MiniMax-M2.5-highspeed", + "MiniMax-M2.1", + ], +} + +_PROVIDER_LABELS = { + "openrouter": "OpenRouter", + "openai-codex": "OpenAI Codex", + "nous": "Nous Portal", + "zai": "Z.AI / GLM", + "kimi-coding": "Kimi / Moonshot", + "minimax": "MiniMax", + "minimax-cn": "MiniMax (China)", + "custom": "custom endpoint", +} + +_PROVIDER_ALIASES = { + "glm": "zai", + "z-ai": "zai", + "z.ai": "zai", + "zhipu": "zai", + "kimi": "kimi-coding", + "moonshot": "kimi-coding", + "minimax-china": "minimax-cn", + "minimax_cn": "minimax-cn", +} + def model_ids() -> list[str]: - """Return just the model-id strings (convenience helper).""" + """Return just the OpenRouter model-id strings.""" return [mid for mid, _ in OPENROUTER_MODELS] @@ -34,3 +89,231 @@ def menu_labels() -> list[str]: for mid, desc in OPENROUTER_MODELS: labels.append(f"{mid} ({desc})" if desc else mid) return labels + + +# All provider IDs and aliases that are valid for the provider:model syntax. +_KNOWN_PROVIDER_NAMES: set[str] = ( + set(_PROVIDER_LABELS.keys()) + | set(_PROVIDER_ALIASES.keys()) + | {"openrouter", "custom"} +) + + +def list_available_providers() -> list[dict[str, str]]: + """Return info about all providers the user could use with ``provider:model``. + + Each dict has ``id``, ``label``, and ``aliases``. + Checks which providers have valid credentials configured. + """ + # Canonical providers in display order + _PROVIDER_ORDER = [ + "openrouter", "nous", "openai-codex", + "zai", "kimi-coding", "minimax", "minimax-cn", + ] + # Build reverse alias map + aliases_for: dict[str, list[str]] = {} + for alias, canonical in _PROVIDER_ALIASES.items(): + aliases_for.setdefault(canonical, []).append(alias) + + result = [] + for pid in _PROVIDER_ORDER: + label = _PROVIDER_LABELS.get(pid, pid) + alias_list = aliases_for.get(pid, []) + # Check if this provider has credentials available + has_creds = False + try: + from hermes_cli.runtime_provider import resolve_runtime_provider + runtime = resolve_runtime_provider(requested=pid) + has_creds = bool(runtime.get("api_key")) + except Exception: + pass + result.append({ + "id": pid, + "label": label, + "aliases": alias_list, + "authenticated": has_creds, + }) + return result + + +def parse_model_input(raw: str, current_provider: str) -> tuple[str, str]: + """Parse ``/model`` input into ``(provider, model)``. + + Supports ``provider:model`` syntax to switch providers at runtime:: + + openrouter:anthropic/claude-sonnet-4.5 → ("openrouter", "anthropic/claude-sonnet-4.5") + nous:hermes-3 → ("nous", "hermes-3") + anthropic/claude-sonnet-4.5 → (current_provider, "anthropic/claude-sonnet-4.5") + gpt-5.4 → (current_provider, "gpt-5.4") + + The colon is only treated as a provider delimiter if the left side is a + recognized provider name or alias. This avoids misinterpreting model names + that happen to contain colons (e.g. ``anthropic/claude-3.5-sonnet:beta``). + + Returns ``(provider, model)`` where *provider* is either the explicit + provider from the input or *current_provider* if none was specified. + """ + stripped = raw.strip() + colon = stripped.find(":") + if colon > 0: + provider_part = stripped[:colon].strip().lower() + model_part = stripped[colon + 1:].strip() + if provider_part and model_part and provider_part in _KNOWN_PROVIDER_NAMES: + return (normalize_provider(provider_part), model_part) + return (current_provider, stripped) + + +def curated_models_for_provider(provider: Optional[str]) -> list[tuple[str, str]]: + """Return ``(model_id, description)`` tuples for a provider's curated list.""" + normalized = normalize_provider(provider) + if normalized == "openrouter": + return list(OPENROUTER_MODELS) + models = _PROVIDER_MODELS.get(normalized, []) + return [(m, "") for m in models] + + +def normalize_provider(provider: Optional[str]) -> str: + """Normalize provider aliases to Hermes' canonical provider ids. + + Note: ``"auto"`` passes through unchanged — use + ``hermes_cli.auth.resolve_provider()`` to resolve it to a concrete + provider based on credentials and environment. + """ + normalized = (provider or "openrouter").strip().lower() + return _PROVIDER_ALIASES.get(normalized, normalized) + + +def provider_model_ids(provider: Optional[str]) -> list[str]: + """Return the best known model catalog for a provider.""" + normalized = normalize_provider(provider) + if normalized == "openrouter": + return model_ids() + if normalized == "openai-codex": + from hermes_cli.codex_models import get_codex_model_ids + + return get_codex_model_ids() + return list(_PROVIDER_MODELS.get(normalized, [])) + + +def fetch_api_models( + api_key: Optional[str], + base_url: Optional[str], + timeout: float = 5.0, +) -> Optional[list[str]]: + """Fetch the list of available model IDs from the provider's ``/models`` endpoint. + + Returns a list of model ID strings, or ``None`` if the endpoint could not + be reached (network error, timeout, auth failure, etc.). + """ + if not base_url: + return None + + url = base_url.rstrip("/") + "/models" + headers: dict[str, str] = {} + if api_key: + headers["Authorization"] = f"Bearer {api_key}" + + req = urllib.request.Request(url, headers=headers) + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + data = json.loads(resp.read().decode()) + # Standard OpenAI format: {"data": [{"id": "model-name", ...}, ...]} + return [m.get("id", "") for m in data.get("data", [])] + except Exception: + return None + + +def validate_requested_model( + model_name: str, + provider: Optional[str], + *, + api_key: Optional[str] = None, + base_url: Optional[str] = None, +) -> dict[str, Any]: + """ + Validate a ``/model`` value for the active provider. + + Performs format checks first, then probes the live API to confirm + the model actually exists. + + Returns a dict with: + - accepted: whether the CLI should switch to the requested model now + - persist: whether it is safe to save to config + - recognized: whether it matched a known provider catalog + - message: optional warning / guidance for the user + """ + requested = (model_name or "").strip() + normalized = normalize_provider(provider) + if normalized == "openrouter" and base_url and "openrouter.ai" not in base_url: + normalized = "custom" + + if not requested: + return { + "accepted": False, + "persist": False, + "recognized": False, + "message": "Model name cannot be empty.", + } + + if any(ch.isspace() for ch in requested): + return { + "accepted": False, + "persist": False, + "recognized": False, + "message": "Model names cannot contain spaces.", + } + + # Probe the live API to check if the model actually exists + api_models = fetch_api_models(api_key, base_url) + + if api_models is not None: + if requested in set(api_models): + # API confirmed the model exists + return { + "accepted": True, + "persist": True, + "recognized": True, + "message": None, + } + else: + # API responded but model is not listed + suggestions = get_close_matches(requested, api_models, n=3, cutoff=0.5) + suggestion_text = "" + if suggestions: + suggestion_text = "\n Did you mean: " + ", ".join(f"`{s}`" for s in suggestions) + + return { + "accepted": False, + "persist": False, + "recognized": False, + "message": ( + f"Error: `{requested}` is not a valid model for this provider." + f"{suggestion_text}" + ), + } + + # api_models is None — couldn't reach API, fall back to catalog check + provider_label = _PROVIDER_LABELS.get(normalized, normalized) + known_models = provider_model_ids(normalized) + + if requested in known_models: + return { + "accepted": True, + "persist": True, + "recognized": True, + "message": None, + } + + # Can't validate — accept for session only + suggestion = get_close_matches(requested, known_models, n=1, cutoff=0.6) + suggestion_text = f" Did you mean `{suggestion[0]}`?" if suggestion else "" + return { + "accepted": True, + "persist": False, + "recognized": False, + "message": ( + f"Could not validate `{requested}` against the live {provider_label} API. " + "Using it for this session only; config unchanged." + f"{suggestion_text}" + ), + } diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 8bbc70001..b244027be 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -667,16 +667,17 @@ def setup_model_provider(config: dict): print_header("Z.AI / GLM API Key") pconfig = PROVIDER_REGISTRY["zai"] print_info(f"Provider: {pconfig.name}") - print_info(f"Base URL: {pconfig.inference_base_url}") print_info("Get your API key at: https://open.bigmodel.cn/") print() existing_key = get_env_value("GLM_API_KEY") or get_env_value("ZAI_API_KEY") + api_key = existing_key # will be overwritten if user enters a new one if existing_key: print_info(f"Current: {existing_key[:8]}... (configured)") if prompt_yes_no("Update API key?", False): - api_key = prompt(" GLM API key", password=True) - if api_key: + new_key = prompt(" GLM API key", password=True) + if new_key: + api_key = new_key save_env_value("GLM_API_KEY", api_key) print_success("GLM API key updated") else: @@ -687,11 +688,32 @@ def setup_model_provider(config: dict): else: print_warning("Skipped - agent won't work without an API key") + # Detect the correct z.ai endpoint for this key. + # Z.AI has separate billing for general vs coding plans and + # global vs China endpoints — we probe to find the right one. + zai_base_url = pconfig.inference_base_url + if api_key: + print() + print_info("Detecting your z.ai endpoint...") + from hermes_cli.auth import detect_zai_endpoint + detected = detect_zai_endpoint(api_key) + if detected: + zai_base_url = detected["base_url"] + print_success(f"Detected: {detected['label']} endpoint") + print_info(f" URL: {detected['base_url']}") + if detected["id"].startswith("coding"): + print_info(f" Note: Coding Plan detected — GLM-5 is not available, using {detected['model']}") + save_env_value("GLM_BASE_URL", zai_base_url) + else: + print_warning("Could not verify any z.ai endpoint with this key.") + print_info(f" Using default: {zai_base_url}") + print_info(" If you get billing errors, check your plan at https://open.bigmodel.cn/") + # Clear custom endpoint vars if switching if existing_custom: save_env_value("OPENAI_BASE_URL", "") save_env_value("OPENAI_API_KEY", "") - _update_config_for_provider("zai", pconfig.inference_base_url) + _update_config_for_provider("zai", zai_base_url) elif provider_idx == 5: # Kimi / Moonshot selected_provider = "kimi-coding" @@ -838,9 +860,18 @@ def setup_model_provider(config: dict): config['model'] = model_name # else: keep current + elif selected_provider == "nous": + # Nous login succeeded but model fetch failed — prompt manually + # instead of falling through to the OpenRouter static list. + print_warning("Could not fetch available models from Nous Portal.") + print_info("Enter a Nous model name manually (e.g., claude-opus-4-6).") + custom = prompt(f" Model name (Enter to keep '{current_model}')") + if custom: + config['model'] = custom + save_env_value("LLM_MODEL", custom) elif selected_provider == "openai-codex": - from hermes_cli.codex_models import get_codex_models - codex_models = get_codex_models() + from hermes_cli.codex_models import get_codex_model_ids + codex_models = get_codex_model_ids() model_choices = codex_models + [f"Keep current ({current_model})"] default_codex = 0 if current_model in codex_models: @@ -859,7 +890,12 @@ def setup_model_provider(config: dict): save_env_value("LLM_MODEL", custom) _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL) elif selected_provider == "zai": - zai_models = ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"] + # Coding Plan endpoints don't have GLM-5 + is_coding_plan = get_env_value("GLM_BASE_URL") and "coding" in (get_env_value("GLM_BASE_URL") or "") + if is_coding_plan: + zai_models = ["glm-4.7", "glm-4.5", "glm-4.5-flash"] + else: + zai_models = ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"] model_choices = list(zai_models) model_choices.append("Custom model") model_choices.append(f"Keep current ({current_model})") diff --git a/hermes_cli/skills_hub.py b/hermes_cli/skills_hub.py index 32a0bab1b..8b72fe4f4 100644 --- a/hermes_cli/skills_hub.py +++ b/hermes_cli/skills_hub.py @@ -408,10 +408,11 @@ def do_inspect(identifier: str, console: Optional[Console] = None) -> None: def do_list(source_filter: str = "all", console: Optional[Console] = None) -> None: """List installed skills, distinguishing builtins from hub-installed.""" - from tools.skills_hub import HubLockFile, SKILLS_DIR + from tools.skills_hub import HubLockFile, ensure_hub_dirs from tools.skills_tool import _find_all_skills c = console or _console + ensure_hub_dirs() lock = HubLockFile() hub_installed = {e["name"]: e for e in lock.list_installed()} diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index 9773c697a..7fe88691e 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -308,7 +308,7 @@ def _get_platform_tools(config: dict, platform: str) -> Set[str]: platform_toolsets = config.get("platform_toolsets", {}) toolset_names = platform_toolsets.get(platform) - if not toolset_names or not isinstance(toolset_names, list): + if toolset_names is None or not isinstance(toolset_names, list): default_ts = PLATFORMS[platform]["default_toolset"] toolset_names = [default_ts] @@ -358,46 +358,88 @@ def _toolset_has_keys(ts_key: str) -> bool: # ─── Menu Helpers ───────────────────────────────────────────────────────────── def _prompt_choice(question: str, choices: list, default: int = 0) -> int: - """Single-select menu (arrow keys).""" - print(color(question, Colors.YELLOW)) + """Single-select menu (arrow keys). Uses curses to avoid simple_term_menu + rendering bugs in tmux, iTerm, and other non-standard terminals.""" + # Curses-based single-select — works in tmux, iTerm, and standard terminals try: - from simple_term_menu import TerminalMenu - menu = TerminalMenu( - [f" {c}" for c in choices], - cursor_index=default, - menu_cursor="→ ", - menu_cursor_style=("fg_green", "bold"), - menu_highlight_style=("fg_green",), - cycle_cursor=True, - clear_screen=False, - ) - idx = menu.show() - if idx is None: - return default - print() - return idx - except (ImportError, NotImplementedError): - for i, c in enumerate(choices): - marker = "●" if i == default else "○" - style = Colors.GREEN if i == default else "" - print(color(f" {marker} {c}", style) if style else f" {marker} {c}") - while True: - try: - val = input(color(f" Select [1-{len(choices)}] ({default + 1}): ", Colors.DIM)) - if not val: - return default - idx = int(val) - 1 - if 0 <= idx < len(choices): - return idx - except (ValueError, KeyboardInterrupt, EOFError): - print() + import curses + result_holder = [default] + + def _curses_menu(stdscr): + curses.curs_set(0) + if curses.has_colors(): + curses.start_color() + curses.use_default_colors() + curses.init_pair(1, curses.COLOR_GREEN, -1) + curses.init_pair(2, curses.COLOR_YELLOW, -1) + cursor = default + + while True: + stdscr.clear() + max_y, max_x = stdscr.getmaxyx() + try: + stdscr.addnstr(0, 0, question, max_x - 1, + curses.A_BOLD | (curses.color_pair(2) if curses.has_colors() else 0)) + except curses.error: + pass + + for i, c in enumerate(choices): + y = i + 2 + if y >= max_y - 1: + break + arrow = "→" if i == cursor else " " + line = f" {arrow} {c}" + attr = curses.A_NORMAL + if i == cursor: + attr = curses.A_BOLD + if curses.has_colors(): + attr |= curses.color_pair(1) + try: + stdscr.addnstr(y, 0, line, max_x - 1, attr) + except curses.error: + pass + + stdscr.refresh() + key = stdscr.getch() + + if key in (curses.KEY_UP, ord('k')): + cursor = (cursor - 1) % len(choices) + elif key in (curses.KEY_DOWN, ord('j')): + cursor = (cursor + 1) % len(choices) + elif key in (curses.KEY_ENTER, 10, 13): + result_holder[0] = cursor + return + elif key in (27, ord('q')): + return + + curses.wrapper(_curses_menu) + return result_holder[0] + + except Exception: + pass + + # Fallback: numbered input (Windows without curses, etc.) + print(color(question, Colors.YELLOW)) + for i, c in enumerate(choices): + marker = "●" if i == default else "○" + style = Colors.GREEN if i == default else "" + print(color(f" {marker} {i+1}. {c}", style) if style else f" {marker} {i+1}. {c}") + while True: + try: + val = input(color(f" Select [1-{len(choices)}] ({default + 1}): ", Colors.DIM)) + if not val: return default + idx = int(val) - 1 + if 0 <= idx < len(choices): + return idx + except (ValueError, KeyboardInterrupt, EOFError): + print() + return default def _prompt_toolset_checklist(platform_label: str, enabled: Set[str]) -> Set[str]: """Multi-select checklist of toolsets. Returns set of selected toolset keys.""" - import platform as _platform labels = [] for ts_key, ts_label, ts_desc in CONFIGURABLE_TOOLSETS: @@ -411,48 +453,8 @@ def _prompt_toolset_checklist(platform_label: str, enabled: Set[str]) -> Set[str if ts_key in enabled ] - # simple_term_menu multi-select has rendering bugs on macOS terminals, - # so we use a curses-based fallback there. - use_term_menu = _platform.system() != "Darwin" - - if use_term_menu: - try: - from simple_term_menu import TerminalMenu - - print(color(f"Tools for {platform_label}", Colors.YELLOW)) - print(color(" SPACE to toggle, ENTER to confirm.", Colors.DIM)) - print() - - menu_items = [f" {label}" for label in labels] - menu = TerminalMenu( - menu_items, - multi_select=True, - show_multi_select_hint=False, - multi_select_cursor="[✓] ", - multi_select_select_on_accept=False, - multi_select_empty_ok=True, - preselected_entries=pre_selected_indices if pre_selected_indices else None, - menu_cursor="→ ", - menu_cursor_style=("fg_green", "bold"), - menu_highlight_style=("fg_green",), - cycle_cursor=True, - clear_screen=False, - clear_menu_on_exit=False, - ) - - menu.show() - - if menu.chosen_menu_entries is None: - return enabled - - selected_indices = list(menu.chosen_menu_indices or []) - return {CONFIGURABLE_TOOLSETS[i][0] for i in selected_indices} - - except (ImportError, NotImplementedError): - pass # fall through to curses/numbered fallback - # Curses-based multi-select — arrow keys + space to toggle + enter to confirm. - # Used on macOS (where simple_term_menu ghosts) and as a fallback. + # simple_term_menu has rendering bugs in tmux, iTerm, and other terminals. try: import curses selected = set(pre_selected_indices) diff --git a/hermes_state.py b/hermes_state.py index 1d1f951c0..67b4484e7 100644 --- a/hermes_state.py +++ b/hermes_state.py @@ -24,7 +24,7 @@ from typing import Dict, Any, List, Optional DEFAULT_DB_PATH = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes")) / "state.db" -SCHEMA_VERSION = 2 +SCHEMA_VERSION = 4 SCHEMA_SQL = """ CREATE TABLE IF NOT EXISTS schema_version ( @@ -46,6 +46,7 @@ CREATE TABLE IF NOT EXISTS sessions ( tool_call_count INTEGER DEFAULT 0, input_tokens INTEGER DEFAULT 0, output_tokens INTEGER DEFAULT 0, + title TEXT, FOREIGN KEY (parent_session_id) REFERENCES sessions(id) ); @@ -133,7 +134,33 @@ class SessionDB: except sqlite3.OperationalError: pass # Column already exists cursor.execute("UPDATE schema_version SET version = 2") + if current_version < 3: + # v3: add title column to sessions + try: + cursor.execute("ALTER TABLE sessions ADD COLUMN title TEXT") + except sqlite3.OperationalError: + pass # Column already exists + cursor.execute("UPDATE schema_version SET version = 3") + if current_version < 4: + # v4: add unique index on title (NULLs allowed, only non-NULL must be unique) + try: + cursor.execute( + "CREATE UNIQUE INDEX IF NOT EXISTS idx_sessions_title_unique " + "ON sessions(title) WHERE title IS NOT NULL" + ) + except sqlite3.OperationalError: + pass # Index already exists + cursor.execute("UPDATE schema_version SET version = 4") + # Unique title index — always ensure it exists (safe to run after migrations + # since the title column is guaranteed to exist at this point) + try: + cursor.execute( + "CREATE UNIQUE INDEX IF NOT EXISTS idx_sessions_title_unique " + "ON sessions(title) WHERE title IS NOT NULL" + ) + except sqlite3.OperationalError: + pass # Index already exists # FTS5 setup (separate because CREATE VIRTUAL TABLE can't be in executescript with IF NOT EXISTS reliably) try: @@ -219,6 +246,210 @@ class SessionDB: row = cursor.fetchone() return dict(row) if row else None + # Maximum length for session titles + MAX_TITLE_LENGTH = 100 + + @staticmethod + def sanitize_title(title: Optional[str]) -> Optional[str]: + """Validate and sanitize a session title. + + - Strips leading/trailing whitespace + - Removes ASCII control characters (0x00-0x1F, 0x7F) and problematic + Unicode control chars (zero-width, RTL/LTR overrides, etc.) + - Collapses internal whitespace runs to single spaces + - Normalizes empty/whitespace-only strings to None + - Enforces MAX_TITLE_LENGTH + + Returns the cleaned title string or None. + Raises ValueError if the title exceeds MAX_TITLE_LENGTH after cleaning. + """ + if not title: + return None + + import re + + # Remove ASCII control characters (0x00-0x1F, 0x7F) but keep + # whitespace chars (\t=0x09, \n=0x0A, \r=0x0D) so they can be + # normalized to spaces by the whitespace collapsing step below + cleaned = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]', '', title) + + # Remove problematic Unicode control characters: + # - Zero-width chars (U+200B-U+200F, U+FEFF) + # - Directional overrides (U+202A-U+202E, U+2066-U+2069) + # - Object replacement (U+FFFC), interlinear annotation (U+FFF9-U+FFFB) + cleaned = re.sub( + r'[\u200b-\u200f\u2028-\u202e\u2060-\u2069\ufeff\ufffc\ufff9-\ufffb]', + '', cleaned, + ) + + # Collapse internal whitespace runs and strip + cleaned = re.sub(r'\s+', ' ', cleaned).strip() + + if not cleaned: + return None + + if len(cleaned) > SessionDB.MAX_TITLE_LENGTH: + raise ValueError( + f"Title too long ({len(cleaned)} chars, max {SessionDB.MAX_TITLE_LENGTH})" + ) + + return cleaned + + def set_session_title(self, session_id: str, title: str) -> bool: + """Set or update a session's title. + + Returns True if session was found and title was set. + Raises ValueError if title is already in use by another session, + or if the title fails validation (too long, invalid characters). + Empty/whitespace-only strings are normalized to None (clearing the title). + """ + title = self.sanitize_title(title) + if title: + # Check uniqueness (allow the same session to keep its own title) + cursor = self._conn.execute( + "SELECT id FROM sessions WHERE title = ? AND id != ?", + (title, session_id), + ) + conflict = cursor.fetchone() + if conflict: + raise ValueError( + f"Title '{title}' is already in use by session {conflict['id']}" + ) + cursor = self._conn.execute( + "UPDATE sessions SET title = ? WHERE id = ?", + (title, session_id), + ) + self._conn.commit() + return cursor.rowcount > 0 + + def get_session_title(self, session_id: str) -> Optional[str]: + """Get the title for a session, or None.""" + cursor = self._conn.execute( + "SELECT title FROM sessions WHERE id = ?", (session_id,) + ) + row = cursor.fetchone() + return row["title"] if row else None + + def get_session_by_title(self, title: str) -> Optional[Dict[str, Any]]: + """Look up a session by exact title. Returns session dict or None.""" + cursor = self._conn.execute( + "SELECT * FROM sessions WHERE title = ?", (title,) + ) + row = cursor.fetchone() + return dict(row) if row else None + + def resolve_session_by_title(self, title: str) -> Optional[str]: + """Resolve a title to a session ID, preferring the latest in a lineage. + + If the exact title exists, returns that session's ID. + If not, searches for "title #N" variants and returns the latest one. + If the exact title exists AND numbered variants exist, returns the + latest numbered variant (the most recent continuation). + """ + # First try exact match + exact = self.get_session_by_title(title) + + # Also search for numbered variants: "title #2", "title #3", etc. + # Escape SQL LIKE wildcards (%, _) in the title to prevent false matches + escaped = title.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_") + cursor = self._conn.execute( + "SELECT id, title, started_at FROM sessions " + "WHERE title LIKE ? ESCAPE '\\' ORDER BY started_at DESC", + (f"{escaped} #%",), + ) + numbered = cursor.fetchall() + + if numbered: + # Return the most recent numbered variant + return numbered[0]["id"] + elif exact: + return exact["id"] + return None + + def get_next_title_in_lineage(self, base_title: str) -> str: + """Generate the next title in a lineage (e.g., "my session" → "my session #2"). + + Strips any existing " #N" suffix to find the base name, then finds + the highest existing number and increments. + """ + import re + # Strip existing #N suffix to find the true base + match = re.match(r'^(.*?) #(\d+)$', base_title) + if match: + base = match.group(1) + else: + base = base_title + + # Find all existing numbered variants + # Escape SQL LIKE wildcards (%, _) in the base to prevent false matches + escaped = base.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_") + cursor = self._conn.execute( + "SELECT title FROM sessions WHERE title = ? OR title LIKE ? ESCAPE '\\'", + (base, f"{escaped} #%"), + ) + existing = [row["title"] for row in cursor.fetchall()] + + if not existing: + return base # No conflict, use the base name as-is + + # Find the highest number + max_num = 1 # The unnumbered original counts as #1 + for t in existing: + m = re.match(r'^.* #(\d+)$', t) + if m: + max_num = max(max_num, int(m.group(1))) + + return f"{base} #{max_num + 1}" + + def list_sessions_rich( + self, + source: str = None, + limit: int = 20, + offset: int = 0, + ) -> List[Dict[str, Any]]: + """List sessions with preview (first user message) and last active timestamp. + + Returns dicts with keys: id, source, model, title, started_at, ended_at, + message_count, preview (first 60 chars of first user message), + last_active (timestamp of last message). + + Uses a single query with correlated subqueries instead of N+2 queries. + """ + source_clause = "WHERE s.source = ?" if source else "" + query = f""" + SELECT s.*, + COALESCE( + (SELECT SUBSTR(REPLACE(REPLACE(m.content, X'0A', ' '), X'0D', ' '), 1, 63) + FROM messages m + WHERE m.session_id = s.id AND m.role = 'user' AND m.content IS NOT NULL + ORDER BY m.timestamp, m.id LIMIT 1), + '' + ) AS _preview_raw, + COALESCE( + (SELECT MAX(m2.timestamp) FROM messages m2 WHERE m2.session_id = s.id), + s.started_at + ) AS last_active + FROM sessions s + {source_clause} + ORDER BY s.started_at DESC + LIMIT ? OFFSET ? + """ + params = (source, limit, offset) if source else (limit, offset) + cursor = self._conn.execute(query, params) + sessions = [] + for row in cursor.fetchall(): + s = dict(row) + # Build the preview from the raw substring + raw = s.pop("_preview_raw", "").strip() + if raw: + text = raw[:60] + s["preview"] = text + ("..." if len(raw) > 60 else "") + else: + s["preview"] = "" + sessions.append(s) + + return sessions + # ========================================================================= # Message storage # ========================================================================= diff --git a/mini_swe_runner.py b/mini_swe_runner.py index 6a3871d76..9be7b7348 100644 --- a/mini_swe_runner.py +++ b/mini_swe_runner.py @@ -149,7 +149,7 @@ class MiniSWERunner: def __init__( self, - model: str = "anthropic/claude-sonnet-4-20250514", + model: str = "anthropic/claude-sonnet-4.6", base_url: str = None, api_key: str = None, env_type: str = "local", @@ -200,13 +200,7 @@ class MiniSWERunner: else: client_kwargs["base_url"] = "https://openrouter.ai/api/v1" - if base_url and "api.anthropic.com" in base_url.strip().lower(): - raise ValueError( - "Anthropic's native /v1/messages API is not supported yet (planned for a future release). " - "Hermes currently requires OpenAI-compatible /chat/completions endpoints. " - "To use Claude models now, route through OpenRouter (OPENROUTER_API_KEY) " - "or any OpenAI-compatible proxy that wraps the Anthropic API." - ) + # Handle API key - OpenRouter is the primary provider if api_key: diff --git a/optional-skills/research/qmd/SKILL.md b/optional-skills/research/qmd/SKILL.md new file mode 100644 index 000000000..9dce442ed --- /dev/null +++ b/optional-skills/research/qmd/SKILL.md @@ -0,0 +1,441 @@ +--- +name: qmd +description: Search personal knowledge bases, notes, docs, and meeting transcripts locally using qmd — a hybrid retrieval engine with BM25, vector search, and LLM reranking. Supports CLI and MCP integration. +version: 1.0.0 +author: Hermes Agent + Teknium +license: MIT +platforms: [macos, linux] +metadata: + hermes: + tags: [Search, Knowledge-Base, RAG, Notes, MCP, Local-AI] + related_skills: [obsidian, native-mcp, arxiv] +--- + +# QMD — Query Markup Documents + +Local, on-device search engine for personal knowledge bases. Indexes markdown +notes, meeting transcripts, documentation, and any text-based files, then +provides hybrid search combining keyword matching, semantic understanding, and +LLM-powered reranking — all running locally with no cloud dependencies. + +Created by [Tobi Lütke](https://github.com/tobi/qmd). MIT licensed. + +## When to Use + +- User asks to search their notes, docs, knowledge base, or meeting transcripts +- User wants to find something across a large collection of markdown/text files +- User wants semantic search ("find notes about X concept") not just keyword grep +- User has already set up qmd collections and wants to query them +- User asks to set up a local knowledge base or document search system +- Keywords: "search my notes", "find in my docs", "knowledge base", "qmd" + +## Prerequisites + +### Node.js >= 22 (required) + +```bash +# Check version +node --version # must be >= 22 + +# macOS — install or upgrade via Homebrew +brew install node@22 + +# Linux — use NodeSource or nvm +curl -fsSL https://deb.nodesource.com/setup_22.x | sudo -E bash - +sudo apt-get install -y nodejs +# or with nvm: +nvm install 22 && nvm use 22 +``` + +### SQLite with Extension Support (macOS only) + +macOS system SQLite lacks extension loading. Install via Homebrew: + +```bash +brew install sqlite +``` + +### Install qmd + +```bash +npm install -g @tobilu/qmd +# or with Bun: +bun install -g @tobilu/qmd +``` + +First run auto-downloads 3 local GGUF models (~2GB total): + +| Model | Purpose | Size | +|-------|---------|------| +| embeddinggemma-300M-Q8_0 | Vector embeddings | ~300MB | +| qwen3-reranker-0.6b-q8_0 | Result reranking | ~640MB | +| qmd-query-expansion-1.7B | Query expansion | ~1.1GB | + +### Verify Installation + +```bash +qmd --version +qmd status +``` + +## Quick Reference + +| Command | What It Does | Speed | +|---------|-------------|-------| +| `qmd search "query"` | BM25 keyword search (no models) | ~0.2s | +| `qmd vsearch "query"` | Semantic vector search (1 model) | ~3s | +| `qmd query "query"` | Hybrid + reranking (all 3 models) | ~2-3s warm, ~19s cold | +| `qmd get <docid>` | Retrieve full document content | instant | +| `qmd multi-get "glob"` | Retrieve multiple files | instant | +| `qmd collection add <path> --name <n>` | Add a directory as a collection | instant | +| `qmd context add <path> "description"` | Add context metadata to improve retrieval | instant | +| `qmd embed` | Generate/update vector embeddings | varies | +| `qmd status` | Show index health and collection info | instant | +| `qmd mcp` | Start MCP server (stdio) | persistent | +| `qmd mcp --http --daemon` | Start MCP server (HTTP, warm models) | persistent | + +## Setup Workflow + +### 1. Add Collections + +Point qmd at directories containing your documents: + +```bash +# Add a notes directory +qmd collection add ~/notes --name notes + +# Add project docs +qmd collection add ~/projects/myproject/docs --name project-docs + +# Add meeting transcripts +qmd collection add ~/meetings --name meetings + +# List all collections +qmd collection list +``` + +### 2. Add Context Descriptions + +Context metadata helps the search engine understand what each collection +contains. This significantly improves retrieval quality: + +```bash +qmd context add qmd://notes "Personal notes, ideas, and journal entries" +qmd context add qmd://project-docs "Technical documentation for the main project" +qmd context add qmd://meetings "Meeting transcripts and action items from team syncs" +``` + +### 3. Generate Embeddings + +```bash +qmd embed +``` + +This processes all documents in all collections and generates vector +embeddings. Re-run after adding new documents or collections. + +### 4. Verify + +```bash +qmd status # shows index health, collection stats, model info +``` + +## Search Patterns + +### Fast Keyword Search (BM25) + +Best for: exact terms, code identifiers, names, known phrases. +No models loaded — near-instant results. + +```bash +qmd search "authentication middleware" +qmd search "handleError async" +``` + +### Semantic Vector Search + +Best for: natural language questions, conceptual queries. +Loads embedding model (~3s first query). + +```bash +qmd vsearch "how does the rate limiter handle burst traffic" +qmd vsearch "ideas for improving onboarding flow" +``` + +### Hybrid Search with Reranking (Best Quality) + +Best for: important queries where quality matters most. +Uses all 3 models — query expansion, parallel BM25+vector, reranking. + +```bash +qmd query "what decisions were made about the database migration" +``` + +### Structured Multi-Mode Queries + +Combine different search types in a single query for precision: + +```bash +# BM25 for exact term + vector for concept +qmd query $'lex: rate limiter\nvec: how does throttling work under load' + +# With query expansion +qmd query $'expand: database migration plan\nlex: "schema change"' +``` + +### Query Syntax (lex/BM25 mode) + +| Syntax | Effect | Example | +|--------|--------|---------| +| `term` | Prefix match | `perf` matches "performance" | +| `"phrase"` | Exact phrase | `"rate limiter"` | +| `-term` | Exclude term | `performance -sports` | + +### HyDE (Hypothetical Document Embeddings) + +For complex topics, write what you expect the answer to look like: + +```bash +qmd query $'hyde: The migration plan involves three phases. First, we add the new columns without dropping the old ones. Then we backfill data. Finally we cut over and remove legacy columns.' +``` + +### Scoping to Collections + +```bash +qmd search "query" --collection notes +qmd query "query" --collection project-docs +``` + +### Output Formats + +```bash +qmd search "query" --json # JSON output (best for parsing) +qmd search "query" --limit 5 # Limit results +qmd get "#abc123" # Get by document ID +qmd get "path/to/file.md" # Get by file path +qmd get "file.md:50" -l 100 # Get specific line range +qmd multi-get "journals/*.md" --json # Batch retrieve by glob +``` + +## MCP Integration (Recommended) + +qmd exposes an MCP server that provides search tools directly to +Hermes Agent via the native MCP client. This is the preferred +integration — once configured, the agent gets qmd tools automatically +without needing to load this skill. + +### Option A: Stdio Mode (Simple) + +Add to `~/.hermes/config.yaml`: + +```yaml +mcp_servers: + qmd: + command: "qmd" + args: ["mcp"] + timeout: 30 + connect_timeout: 45 +``` + +This registers tools: `mcp_qmd_search`, `mcp_qmd_vsearch`, +`mcp_qmd_deep_search`, `mcp_qmd_get`, `mcp_qmd_status`. + +**Tradeoff:** Models load on first search call (~19s cold start), +then stay warm for the session. Acceptable for occasional use. + +### Option B: HTTP Daemon Mode (Fast, Recommended for Heavy Use) + +Start the qmd daemon separately — it keeps models warm in memory: + +```bash +# Start daemon (persists across agent restarts) +qmd mcp --http --daemon + +# Runs on http://localhost:8181 by default +``` + +Then configure Hermes Agent to connect via HTTP: + +```yaml +mcp_servers: + qmd: + url: "http://localhost:8181/mcp" + timeout: 30 +``` + +**Tradeoff:** Uses ~2GB RAM while running, but every query is fast +(~2-3s). Best for users who search frequently. + +### Keeping the Daemon Running + +#### macOS (launchd) + +```bash +cat > ~/Library/LaunchAgents/com.qmd.daemon.plist << 'EOF' +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" + "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> +<plist version="1.0"> +<dict> + <key>Label</key> + <string>com.qmd.daemon</string> + <key>ProgramArguments</key> + <array> + <string>qmd</string> + <string>mcp</string> + <string>--http</string> + <string>--daemon</string> + </array> + <key>RunAtLoad</key> + <true/> + <key>KeepAlive</key> + <true/> + <key>StandardOutPath</key> + <string>/tmp/qmd-daemon.log</string> + <key>StandardErrorPath</key> + <string>/tmp/qmd-daemon.log</string> +</dict> +</plist> +EOF + +launchctl load ~/Library/LaunchAgents/com.qmd.daemon.plist +``` + +#### Linux (systemd user service) + +```bash +mkdir -p ~/.config/systemd/user + +cat > ~/.config/systemd/user/qmd-daemon.service << 'EOF' +[Unit] +Description=QMD MCP Daemon +After=network.target + +[Service] +ExecStart=qmd mcp --http --daemon +Restart=on-failure +RestartSec=10 +Environment=PATH=/usr/local/bin:/usr/bin:/bin + +[Install] +WantedBy=default.target +EOF + +systemctl --user daemon-reload +systemctl --user enable --now qmd-daemon +systemctl --user status qmd-daemon +``` + +### MCP Tools Reference + +Once connected, these tools are available as `mcp_qmd_*`: + +| MCP Tool | Maps To | Description | +|----------|---------|-------------| +| `mcp_qmd_search` | `qmd search` | BM25 keyword search | +| `mcp_qmd_vsearch` | `qmd vsearch` | Semantic vector search | +| `mcp_qmd_deep_search` | `qmd query` | Hybrid search + reranking | +| `mcp_qmd_get` | `qmd get` | Retrieve document by ID or path | +| `mcp_qmd_status` | `qmd status` | Index health and stats | + +The MCP tools accept structured JSON queries for multi-mode search: + +```json +{ + "searches": [ + {"type": "lex", "query": "authentication middleware"}, + {"type": "vec", "query": "how user login is verified"} + ], + "collections": ["project-docs"], + "limit": 10 +} +``` + +## CLI Usage (Without MCP) + +When MCP is not configured, use qmd directly via terminal: + +``` +terminal(command="qmd query 'what was decided about the API redesign' --json", timeout=30) +``` + +For setup and management tasks, always use terminal: + +``` +terminal(command="qmd collection add ~/Documents/notes --name notes") +terminal(command="qmd context add qmd://notes 'Personal research notes and ideas'") +terminal(command="qmd embed") +terminal(command="qmd status") +``` + +## How the Search Pipeline Works + +Understanding the internals helps choose the right search mode: + +1. **Query Expansion** — A fine-tuned 1.7B model generates 2 alternative + queries. The original gets 2x weight in fusion. +2. **Parallel Retrieval** — BM25 (SQLite FTS5) and vector search run + simultaneously across all query variants. +3. **RRF Fusion** — Reciprocal Rank Fusion (k=60) merges results. + Top-rank bonus: #1 gets +0.05, #2-3 get +0.02. +4. **LLM Reranking** — qwen3-reranker scores top 30 candidates (0.0-1.0). +5. **Position-Aware Blending** — Ranks 1-3: 75% retrieval / 25% reranker. + Ranks 4-10: 60/40. Ranks 11+: 40/60 (trusts reranker more for long tail). + +**Smart Chunking:** Documents are split at natural break points (headings, +code blocks, blank lines) targeting ~900 tokens with 15% overlap. Code +blocks are never split mid-block. + +## Best Practices + +1. **Always add context descriptions** — `qmd context add` dramatically + improves retrieval accuracy. Describe what each collection contains. +2. **Re-embed after adding documents** — `qmd embed` must be re-run when + new files are added to collections. +3. **Use `qmd search` for speed** — when you need fast keyword lookup + (code identifiers, exact names), BM25 is instant and needs no models. +4. **Use `qmd query` for quality** — when the question is conceptual or + the user needs the best possible results, use hybrid search. +5. **Prefer MCP integration** — once configured, the agent gets native + tools without needing to load this skill each time. +6. **Daemon mode for frequent users** — if the user searches their + knowledge base regularly, recommend the HTTP daemon setup. +7. **First query in structured search gets 2x weight** — put the most + important/certain query first when combining lex and vec. + +## Troubleshooting + +### "Models downloading on first run" +Normal — qmd auto-downloads ~2GB of GGUF models on first use. +This is a one-time operation. + +### Cold start latency (~19s) +This happens when models aren't loaded in memory. Solutions: +- Use HTTP daemon mode (`qmd mcp --http --daemon`) to keep warm +- Use `qmd search` (BM25 only) when models aren't needed +- MCP stdio mode loads models on first search, stays warm for session + +### macOS: "unable to load extension" +Install Homebrew SQLite: `brew install sqlite` +Then ensure it's on PATH before system SQLite. + +### "No collections found" +Run `qmd collection add <path> --name <name>` to add directories, +then `qmd embed` to index them. + +### Embedding model override (CJK/multilingual) +Set `QMD_EMBED_MODEL` environment variable for non-English content: +```bash +export QMD_EMBED_MODEL="your-multilingual-model" +``` + +## Data Storage + +- **Index & vectors:** `~/.cache/qmd/index.sqlite` +- **Models:** Auto-downloaded to local cache on first run +- **No cloud dependencies** — everything runs locally + +## References + +- [GitHub: tobi/qmd](https://github.com/tobi/qmd) +- [QMD Changelog](https://github.com/tobi/qmd/blob/main/CHANGELOG.md) diff --git a/run_agent.py b/run_agent.py index 1806cf8a2..0537dd973 100644 --- a/run_agent.py +++ b/run_agent.py @@ -213,7 +213,7 @@ class AIAgent: Provided by the platform layer (CLI or gateway). If None, the clarify tool returns an error. max_tokens (int): Maximum tokens for model responses (optional, uses model default if not set) reasoning_config (Dict): OpenRouter reasoning configuration override (e.g. {"effort": "none"} to disable thinking). - If None, defaults to {"enabled": True, "effort": "xhigh"} for OpenRouter. Set to disable/customize reasoning. + If None, defaults to {"enabled": True, "effort": "medium"} for OpenRouter. Set to disable/customize reasoning. prefill_messages (List[Dict]): Messages to prepend to conversation history as prefilled context. Useful for injecting a few-shot example or priming the model's response style. Example: [{"role": "user", "content": "Hi!"}, {"role": "assistant", "content": "Hello!"}] @@ -253,13 +253,7 @@ class AIAgent: self.provider = "openai-codex" else: self.api_mode = "chat_completions" - if base_url and "api.anthropic.com" in base_url.strip().lower(): - raise ValueError( - "Anthropic's native /v1/messages API is not supported yet (planned for a future release). " - "Hermes currently requires OpenAI-compatible /chat/completions endpoints. " - "To use Claude models now, route through OpenRouter (OPENROUTER_API_KEY) " - "or any OpenAI-compatible proxy that wraps the Anthropic API." - ) + self.tool_progress_callback = tool_progress_callback self.clarify_callback = clarify_callback self.step_callback = step_callback @@ -287,7 +281,7 @@ class AIAgent: # Model response configuration self.max_tokens = max_tokens # None = use model default - self.reasoning_config = reasoning_config # None = use default (xhigh for OpenRouter) + self.reasoning_config = reasoning_config # None = use default (medium for OpenRouter) self.prefill_messages = prefill_messages or [] # Prefilled conversation turns # Anthropic prompt caching: auto-enabled for Claude models via OpenRouter. @@ -389,6 +383,12 @@ class AIAgent: "X-OpenRouter-Title": "Hermes Agent", "X-OpenRouter-Categories": "productivity,cli-agent", } + elif "api.kimi.com" in effective_base.lower(): + # Kimi Code API requires a recognized coding-agent User-Agent + # (see https://github.com/MoonshotAI/kimi-cli) + client_kwargs["default_headers"] = { + "User-Agent": "KimiCLI/1.0", + } self._client_kwargs = client_kwargs # stored for rebuilding after interrupt try: @@ -2157,8 +2157,8 @@ class AIAgent: if not instructions: instructions = DEFAULT_AGENT_IDENTITY - # Resolve reasoning effort: config > default (xhigh) - reasoning_effort = "xhigh" + # Resolve reasoning effort: config > default (medium) + reasoning_effort = "medium" reasoning_enabled = True if self.reasoning_config and isinstance(self.reasoning_config, dict): if self.reasoning_config.get("enabled") is False: @@ -2224,7 +2224,7 @@ class AIAgent: else: extra_body["reasoning"] = { "enabled": True, - "effort": "xhigh" + "effort": "medium" } # Nous Portal product attribution @@ -2484,6 +2484,8 @@ class AIAgent: if self._session_db: try: + # Propagate title to the new session with auto-numbering + old_title = self._session_db.get_session_title(self.session_id) self._session_db.end_session(self.session_id, "compression") old_session_id = self.session_id self.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}" @@ -2493,6 +2495,13 @@ class AIAgent: model=self.model, parent_session_id=old_session_id, ) + # Auto-number the title for the continuation session + if old_title: + try: + new_title = self._session_db.get_next_title_in_lineage(old_title) + self._session_db.set_session_title(self.session_id, new_title) + except (ValueError, Exception) as e: + logger.debug("Could not propagate title on compression: %s", e) self._session_db.update_system_prompt(self.session_id, new_system_prompt) except Exception as e: logger.debug("Session DB compression split failed: %s", e) @@ -2619,7 +2628,6 @@ class AIAgent: context=function_args.get("context"), toolsets=function_args.get("toolsets"), tasks=tasks_arg, - model=function_args.get("model"), max_iterations=function_args.get("max_iterations"), parent_agent=self, ) @@ -2768,7 +2776,7 @@ class AIAgent: else: summary_extra_body["reasoning"] = { "enabled": True, - "effort": "xhigh" + "effort": "medium" } if _is_nous: summary_extra_body["tags"] = ["product=hermes-agent"] @@ -2880,13 +2888,15 @@ class AIAgent: # Generate unique task_id if not provided to isolate VMs between concurrent tasks effective_task_id = task_id or str(uuid.uuid4()) - # Reset retry counters at the start of each conversation to prevent state leakage + # Reset retry counters and iteration budget at the start of each turn + # so subagent usage from a previous turn doesn't eat into the next one. self._invalid_tool_retries = 0 self._invalid_json_retries = 0 self._empty_content_retries = 0 self._last_content_with_tools = None self._turns_since_memory = 0 self._iters_since_skill = 0 + self.iteration_budget = IterationBudget(self.max_iterations) # Initialize conversation (copy to avoid mutating the caller's list) messages = list(conversation_history) if conversation_history else [] @@ -3142,10 +3152,13 @@ class AIAgent: api_start_time = time.time() retry_count = 0 max_retries = 6 # Increased to allow longer backoff periods + compression_attempts = 0 + max_compression_attempts = 3 codex_auth_retry_attempted = False nous_auth_retry_attempted = False finish_reason = "stop" + response = None # Guard against UnboundLocalError if all retries fail while retry_count < max_retries: try: @@ -3441,7 +3454,19 @@ class AIAgent: ) if is_payload_too_large: - print(f"{self.log_prefix}⚠️ Request payload too large (413) - attempting compression...") + compression_attempts += 1 + if compression_attempts > max_compression_attempts: + print(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached for payload-too-large error.") + logging.error(f"{self.log_prefix}413 compression failed after {max_compression_attempts} attempts.") + self._persist_session(messages, conversation_history) + return { + "messages": messages, + "completed": False, + "api_calls": api_call_count, + "error": f"Request payload too large: max compression attempts ({max_compression_attempts}) reached.", + "partial": True + } + print(f"{self.log_prefix}⚠️ Request payload too large (413) — compression attempt {compression_attempts}/{max_compression_attempts}...") original_len = len(messages) messages, active_system_prompt = self._compress_context( @@ -3450,6 +3475,7 @@ class AIAgent: if len(messages) < original_len: print(f"{self.log_prefix} 🗜️ Compressed {original_len} → {len(messages)} messages, retrying...") + time.sleep(2) # Brief pause between compression retries continue # Retry with compressed messages else: print(f"{self.log_prefix}❌ Payload too large and cannot compress further.") @@ -3495,6 +3521,20 @@ class AIAgent: else: print(f"{self.log_prefix}⚠️ Context length exceeded at minimum tier — attempting compression...") + compression_attempts += 1 + if compression_attempts > max_compression_attempts: + print(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.") + logging.error(f"{self.log_prefix}Context compression failed after {max_compression_attempts} attempts.") + self._persist_session(messages, conversation_history) + return { + "messages": messages, + "completed": False, + "api_calls": api_call_count, + "error": f"Context length exceeded: max compression attempts ({max_compression_attempts}) reached.", + "partial": True + } + print(f"{self.log_prefix} 🗜️ Context compression attempt {compression_attempts}/{max_compression_attempts}...") + original_len = len(messages) messages, active_system_prompt = self._compress_context( messages, system_message, approx_tokens=approx_tokens @@ -3503,6 +3543,7 @@ class AIAgent: if len(messages) < original_len or new_ctx and new_ctx < old_ctx: if len(messages) < original_len: print(f"{self.log_prefix} 🗜️ Compressed {original_len} → {len(messages)} messages, retrying...") + time.sleep(2) # Brief pause between compression retries continue # Retry with compressed messages or new tier else: # Can't compress further and already at minimum tier @@ -3581,6 +3622,14 @@ class AIAgent: if interrupted: break + # Guard: if all retries exhausted without a successful response + # (e.g. repeated context-length errors that exhausted retry_count), + # the `response` variable is still None. Break out cleanly. + if response is None: + print(f"{self.log_prefix}❌ All API retries exhausted with no successful response.") + self._persist_session(messages, conversation_history) + break + try: if self.api_mode == "codex_responses": assistant_message, finish_reason = self._normalize_codex_response(response) @@ -4006,7 +4055,12 @@ class AIAgent: final_response = f"I apologize, but I encountered repeated errors: {error_msg}" break - if api_call_count >= self.max_iterations and final_response is None: + if final_response is None and ( + api_call_count >= self.max_iterations + or self.iteration_budget.remaining <= 0 + ): + if self.iteration_budget.remaining <= 0 and not self.quiet_mode: + print(f"\n⚠️ Session iteration budget exhausted ({self.iteration_budget.used}/{self.iteration_budget.max_total} used, including subagents)") final_response = self._handle_max_iterations(messages, api_call_count) # Determine if conversation completed successfully @@ -4077,7 +4131,7 @@ def main( Args: query (str): Natural language query for the agent. Defaults to Python 3.13 example. - model (str): Model name to use (OpenRouter format: provider/model). Defaults to anthropic/claude-sonnet-4-20250514. + model (str): Model name to use (OpenRouter format: provider/model). Defaults to anthropic/claude-sonnet-4.6. api_key (str): API key for authentication. Uses OPENROUTER_API_KEY env var if not provided. base_url (str): Base URL for the model API. Defaults to https://openrouter.ai/api/v1 max_turns (int): Maximum number of API call iterations. Defaults to 10. diff --git a/scripts/install.sh b/scripts/install.sh index 5a6f7f736..b4a9716ba 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -829,6 +829,33 @@ install_node_deps() { log_warn "npm install failed (browser tools may not work)" } log_success "Node.js dependencies installed" + + # Install Playwright browser + system dependencies. + # Playwright's install-deps only supports apt/dnf/zypper natively. + # For Arch/Manjaro we install the system libs via pacman first. + log_info "Installing browser engine (Playwright Chromium)..." + case "$DISTRO" in + arch|manjaro) + if command -v pacman &> /dev/null; then + log_info "Arch/Manjaro detected — installing Chromium system dependencies via pacman..." + if command -v sudo &> /dev/null && sudo -n true 2>/dev/null; then + sudo NEEDRESTART_MODE=a pacman -S --noconfirm --needed \ + nss atk at-spi2-core cups libdrm libxkbcommon mesa pango cairo alsa-lib >/dev/null 2>&1 || true + elif [ "$(id -u)" -eq 0 ]; then + pacman -S --noconfirm --needed \ + nss atk at-spi2-core cups libdrm libxkbcommon mesa pango cairo alsa-lib >/dev/null 2>&1 || true + else + log_warn "Cannot install browser deps without sudo. Run manually:" + log_warn " sudo pacman -S nss atk at-spi2-core cups libdrm libxkbcommon mesa pango cairo alsa-lib" + fi + fi + cd "$INSTALL_DIR" && npx playwright install chromium 2>/dev/null || true + ;; + *) + cd "$INSTALL_DIR" && npx playwright install --with-deps chromium 2>/dev/null || true + ;; + esac + log_success "Browser engine installed" fi # Install WhatsApp bridge dependencies diff --git a/skills/market-data/polymarket/SKILL.md b/skills/market-data/polymarket/SKILL.md new file mode 100644 index 000000000..d8b0ae7ce --- /dev/null +++ b/skills/market-data/polymarket/SKILL.md @@ -0,0 +1,76 @@ +--- +name: polymarket +description: Query Polymarket prediction market data — search markets, get prices, orderbooks, and price history. Read-only via public REST APIs, no API key needed. +version: 1.0.0 +author: Hermes Agent + Teknium +tags: [polymarket, prediction-markets, market-data, trading] +--- + +# Polymarket — Prediction Market Data + +Query prediction market data from Polymarket using their public REST APIs. +All endpoints are read-only and require zero authentication. + +See `references/api-endpoints.md` for the full endpoint reference with curl examples. + +## When to Use + +- User asks about prediction markets, betting odds, or event probabilities +- User wants to know "what are the odds of X happening?" +- User asks about Polymarket specifically +- User wants market prices, orderbook data, or price history +- User asks to monitor or track prediction market movements + +## Key Concepts + +- **Events** contain one or more **Markets** (1:many relationship) +- **Markets** are binary outcomes with Yes/No prices between 0.00 and 1.00 +- Prices ARE probabilities: price 0.65 means the market thinks 65% likely +- `outcomePrices` field: JSON-encoded array like `["0.80", "0.20"]` +- `clobTokenIds` field: JSON-encoded array of two token IDs [Yes, No] for price/book queries +- `conditionId` field: hex string used for price history queries +- Volume is in USDC (US dollars) + +## Three Public APIs + +1. **Gamma API** at `gamma-api.polymarket.com` — Discovery, search, browsing +2. **CLOB API** at `clob.polymarket.com` — Real-time prices, orderbooks, history +3. **Data API** at `data-api.polymarket.com` — Trades, open interest + +## Typical Workflow + +When a user asks about prediction market odds: + +1. **Search** using the Gamma API public-search endpoint with their query +2. **Parse** the response — extract events and their nested markets +3. **Present** market question, current prices as percentages, and volume +4. **Deep dive** if asked — use clobTokenIds for orderbook, conditionId for history + +## Presenting Results + +Format prices as percentages for readability: +- outcomePrices `["0.652", "0.348"]` becomes "Yes: 65.2%, No: 34.8%" +- Always show the market question and probability +- Include volume when available + +Example: `"Will X happen?" — 65.2% Yes ($1.2M volume)` + +## Parsing Double-Encoded Fields + +The Gamma API returns `outcomePrices`, `outcomes`, and `clobTokenIds` as JSON strings +inside JSON responses (double-encoded). When processing with Python, parse them with +`json.loads(market['outcomePrices'])` to get the actual array. + +## Rate Limits + +Generous — unlikely to hit for normal usage: +- Gamma: 4,000 requests per 10 seconds (general) +- CLOB: 9,000 requests per 10 seconds (general) +- Data: 1,000 requests per 10 seconds (general) + +## Limitations + +- This skill is read-only — it does not support placing trades +- Trading requires wallet-based crypto authentication (EIP-712 signatures) +- Some new markets may have empty price history +- Geographic restrictions apply to trading but read-only data is globally accessible diff --git a/skills/market-data/polymarket/references/api-endpoints.md b/skills/market-data/polymarket/references/api-endpoints.md new file mode 100644 index 000000000..d91538fc4 --- /dev/null +++ b/skills/market-data/polymarket/references/api-endpoints.md @@ -0,0 +1,220 @@ +# Polymarket API Endpoints Reference + +All endpoints are public REST (GET), return JSON, and need no authentication. + +## Gamma API — gamma-api.polymarket.com + +### Search Markets + +``` +GET /public-search?q=QUERY +``` + +Response structure: +```json +{ + "events": [ + { + "id": "12345", + "title": "Event title", + "slug": "event-slug", + "volume": 1234567.89, + "markets": [ + { + "question": "Will X happen?", + "outcomePrices": "[\"0.65\", \"0.35\"]", + "outcomes": "[\"Yes\", \"No\"]", + "clobTokenIds": "[\"TOKEN_YES\", \"TOKEN_NO\"]", + "conditionId": "0xabc...", + "volume": 500000 + } + ] + } + ], + "pagination": {"hasMore": true, "totalResults": 100} +} +``` + +### List Events + +``` +GET /events?limit=N&active=true&closed=false&order=volume&ascending=false +``` + +Parameters: +- `limit` — max results (default varies) +- `offset` — pagination offset +- `active` — true/false +- `closed` — true/false +- `order` — sort field: `volume`, `createdAt`, `updatedAt` +- `ascending` — true/false +- `tag` — filter by tag slug +- `slug` — get specific event by slug + +Response: array of event objects. Each event includes a `markets` array. + +Event fields: `id`, `title`, `slug`, `description`, `volume`, `liquidity`, +`openInterest`, `active`, `closed`, `category`, `startDate`, `endDate`, +`markets` (array of market objects). + +### List Markets + +``` +GET /markets?limit=N&active=true&closed=false&order=volume&ascending=false +``` + +Same filter parameters as events, plus: +- `slug` — get specific market by slug + +Market fields: `id`, `question`, `conditionId`, `slug`, `description`, +`outcomes`, `outcomePrices`, `volume`, `liquidity`, `active`, `closed`, +`marketType`, `clobTokenIds`, `endDate`, `category`, `createdAt`. + +Important: `outcomePrices`, `outcomes`, and `clobTokenIds` are JSON strings +(double-encoded). Parse with json.loads() in Python. + +### List Tags + +``` +GET /tags +``` + +Returns array of tag objects: `id`, `label`, `slug`. +Use the `slug` value when filtering events/markets by tag. + +--- + +## CLOB API — clob.polymarket.com + +All CLOB price endpoints use `token_id` from the market's `clobTokenIds` field. +Index 0 = Yes outcome, Index 1 = No outcome. + +### Current Price + +``` +GET /price?token_id=TOKEN_ID&side=buy +``` + +Response: `{"price": "0.650"}` + +The `side` parameter: `buy` or `sell`. + +### Midpoint Price + +``` +GET /midpoint?token_id=TOKEN_ID +``` + +Response: `{"mid": "0.645"}` + +### Spread + +``` +GET /spread?token_id=TOKEN_ID +``` + +Response: `{"spread": "0.02"}` + +### Orderbook + +``` +GET /book?token_id=TOKEN_ID +``` + +Response: +```json +{ + "market": "condition_id", + "asset_id": "token_id", + "bids": [{"price": "0.64", "size": "500"}, ...], + "asks": [{"price": "0.66", "size": "300"}, ...], + "min_order_size": "5", + "tick_size": "0.01", + "last_trade_price": "0.65" +} +``` + +Bids and asks are sorted by price. Size is in shares (USDC-denominated). + +### Price History + +``` +GET /prices-history?market=CONDITION_ID&interval=INTERVAL&fidelity=N +``` + +Parameters: +- `market` — the conditionId (hex string with 0x prefix) +- `interval` — time range: `all`, `1d`, `1w`, `1m`, `3m`, `6m`, `1y` +- `fidelity` — number of data points to return + +Response: +```json +{ + "history": [ + {"t": 1709000000, "p": "0.55"}, + {"t": 1709100000, "p": "0.58"} + ] +} +``` + +`t` is Unix timestamp, `p` is price (probability). + +Note: Very new markets may return empty history. + +### CLOB Markets List + +``` +GET /markets?limit=N +``` + +Response: +```json +{ + "data": [ + { + "condition_id": "0xabc...", + "question": "Will X?", + "tokens": [ + {"token_id": "123...", "outcome": "Yes", "price": 0.65}, + {"token_id": "456...", "outcome": "No", "price": 0.35} + ], + "active": true, + "closed": false + } + ], + "next_cursor": "cursor_string", + "limit": 100, + "count": 1000 +} +``` + +--- + +## Data API — data-api.polymarket.com + +### Recent Trades + +``` +GET /trades?limit=N +GET /trades?market=CONDITION_ID&limit=N +``` + +Trade fields: `side` (BUY/SELL), `size`, `price`, `timestamp`, +`title`, `slug`, `outcome`, `transactionHash`, `conditionId`. + +### Open Interest + +``` +GET /oi?market=CONDITION_ID +``` + +--- + +## Field Cross-Reference + +To go from a Gamma market to CLOB data: + +1. Get market from Gamma: has `clobTokenIds` and `conditionId` +2. Parse `clobTokenIds` (JSON string): `["YES_TOKEN", "NO_TOKEN"]` +3. Use YES_TOKEN with `/price`, `/book`, `/midpoint`, `/spread` +4. Use `conditionId` with `/prices-history` and Data API endpoints diff --git a/skills/market-data/polymarket/scripts/polymarket.py b/skills/market-data/polymarket/scripts/polymarket.py new file mode 100644 index 000000000..417e0b174 --- /dev/null +++ b/skills/market-data/polymarket/scripts/polymarket.py @@ -0,0 +1,284 @@ +#!/usr/bin/env python3 +"""Polymarket CLI helper — query prediction market data. + +Usage: + python3 polymarket.py search "bitcoin" + python3 polymarket.py trending [--limit 10] + python3 polymarket.py market <slug> + python3 polymarket.py event <slug> + python3 polymarket.py price <token_id> + python3 polymarket.py book <token_id> + python3 polymarket.py history <condition_id> [--interval all] [--fidelity 50] + python3 polymarket.py trades [--limit 10] [--market CONDITION_ID] +""" + +import json +import sys +import urllib.request +import urllib.parse +import urllib.error + +GAMMA = "https://gamma-api.polymarket.com" +CLOB = "https://clob.polymarket.com" +DATA = "https://data-api.polymarket.com" + + +def _get(url: str) -> dict | list: + """GET request, return parsed JSON.""" + req = urllib.request.Request(url, headers={"User-Agent": "hermes-agent/1.0"}) + try: + with urllib.request.urlopen(req, timeout=15) as resp: + return json.loads(resp.read().decode()) + except urllib.error.HTTPError as e: + print(f"HTTP {e.code}: {e.reason}", file=sys.stderr) + sys.exit(1) + except urllib.error.URLError as e: + print(f"Connection error: {e.reason}", file=sys.stderr) + sys.exit(1) + + +def _parse_json_field(val): + """Parse double-encoded JSON fields (outcomePrices, outcomes, clobTokenIds).""" + if isinstance(val, str): + try: + return json.loads(val) + except (json.JSONDecodeError, TypeError): + return val + return val + + +def _fmt_pct(price_str: str) -> str: + """Format price string as percentage.""" + try: + return f"{float(price_str) * 100:.1f}%" + except (ValueError, TypeError): + return price_str + + +def _fmt_volume(vol) -> str: + """Format volume as human-readable.""" + try: + v = float(vol) + if v >= 1_000_000: + return f"${v / 1_000_000:.1f}M" + if v >= 1_000: + return f"${v / 1_000:.1f}K" + return f"${v:.0f}" + except (ValueError, TypeError): + return str(vol) + + +def _print_market(m: dict, indent: str = ""): + """Print a market summary.""" + question = m.get("question", "?") + prices = _parse_json_field(m.get("outcomePrices", "[]")) + outcomes = _parse_json_field(m.get("outcomes", "[]")) + vol = _fmt_volume(m.get("volume", 0)) + closed = m.get("closed", False) + status = " [CLOSED]" if closed else "" + + if isinstance(prices, list) and len(prices) >= 2: + outcome_labels = outcomes if isinstance(outcomes, list) else ["Yes", "No"] + price_str = " / ".join( + f"{outcome_labels[i]}: {_fmt_pct(prices[i])}" + for i in range(min(len(prices), len(outcome_labels))) + ) + print(f"{indent}{question}{status}") + print(f"{indent} {price_str} | Volume: {vol}") + else: + print(f"{indent}{question}{status} | Volume: {vol}") + + slug = m.get("slug", "") + if slug: + print(f"{indent} slug: {slug}") + + +def cmd_search(query: str): + """Search for markets.""" + q = urllib.parse.quote(query) + data = _get(f"{GAMMA}/public-search?q={q}") + events = data.get("events", []) + total = data.get("pagination", {}).get("totalResults", len(events)) + print(f"Found {total} results for \"{query}\":\n") + for evt in events[:10]: + print(f"=== {evt['title']} ===") + print(f" Volume: {_fmt_volume(evt.get('volume', 0))} | slug: {evt.get('slug', '')}") + markets = evt.get("markets", []) + for m in markets[:5]: + _print_market(m, indent=" ") + if len(markets) > 5: + print(f" ... and {len(markets) - 5} more markets") + print() + + +def cmd_trending(limit: int = 10): + """Show trending events by volume.""" + events = _get(f"{GAMMA}/events?limit={limit}&active=true&closed=false&order=volume&ascending=false") + print(f"Top {len(events)} trending events:\n") + for i, evt in enumerate(events, 1): + print(f"{i}. {evt['title']}") + print(f" Volume: {_fmt_volume(evt.get('volume', 0))} | Markets: {len(evt.get('markets', []))}") + print(f" slug: {evt.get('slug', '')}") + markets = evt.get("markets", []) + for m in markets[:3]: + _print_market(m, indent=" ") + if len(markets) > 3: + print(f" ... and {len(markets) - 3} more markets") + print() + + +def cmd_market(slug: str): + """Get market details by slug.""" + markets = _get(f"{GAMMA}/markets?slug={urllib.parse.quote(slug)}") + if not markets: + print(f"No market found with slug: {slug}") + return + m = markets[0] + print(f"Market: {m.get('question', '?')}") + print(f"Status: {'CLOSED' if m.get('closed') else 'ACTIVE'}") + _print_market(m) + print(f"\n conditionId: {m.get('conditionId', 'N/A')}") + tokens = _parse_json_field(m.get("clobTokenIds", "[]")) + if isinstance(tokens, list): + outcomes = _parse_json_field(m.get("outcomes", "[]")) + for i, t in enumerate(tokens): + label = outcomes[i] if isinstance(outcomes, list) and i < len(outcomes) else f"Outcome {i}" + print(f" token ({label}): {t}") + desc = m.get("description", "") + if desc: + print(f"\n Description: {desc[:500]}") + + +def cmd_event(slug: str): + """Get event details by slug.""" + events = _get(f"{GAMMA}/events?slug={urllib.parse.quote(slug)}") + if not events: + print(f"No event found with slug: {slug}") + return + evt = events[0] + print(f"Event: {evt['title']}") + print(f"Volume: {_fmt_volume(evt.get('volume', 0))}") + print(f"Status: {'CLOSED' if evt.get('closed') else 'ACTIVE'}") + print(f"Markets: {len(evt.get('markets', []))}\n") + for m in evt.get("markets", []): + _print_market(m, indent=" ") + print() + + +def cmd_price(token_id: str): + """Get current price for a token.""" + buy = _get(f"{CLOB}/price?token_id={token_id}&side=buy") + mid = _get(f"{CLOB}/midpoint?token_id={token_id}") + spread = _get(f"{CLOB}/spread?token_id={token_id}") + print(f"Token: {token_id[:30]}...") + print(f" Buy price: {_fmt_pct(buy.get('price', '?'))}") + print(f" Midpoint: {_fmt_pct(mid.get('mid', '?'))}") + print(f" Spread: {spread.get('spread', '?')}") + + +def cmd_book(token_id: str): + """Get orderbook for a token.""" + book = _get(f"{CLOB}/book?token_id={token_id}") + bids = book.get("bids", []) + asks = book.get("asks", []) + last = book.get("last_trade_price", "?") + print(f"Orderbook for {token_id[:30]}...") + print(f"Last trade: {_fmt_pct(last)} | Tick size: {book.get('tick_size', '?')}") + print(f"\n Top bids ({len(bids)} total):") + # Show bids sorted by price descending (best bids first) + sorted_bids = sorted(bids, key=lambda x: float(x.get("price", 0)), reverse=True) + for b in sorted_bids[:10]: + print(f" {_fmt_pct(b['price']):>7} | Size: {float(b['size']):>10.2f}") + print(f"\n Top asks ({len(asks)} total):") + sorted_asks = sorted(asks, key=lambda x: float(x.get("price", 0))) + for a in sorted_asks[:10]: + print(f" {_fmt_pct(a['price']):>7} | Size: {float(a['size']):>10.2f}") + + +def cmd_history(condition_id: str, interval: str = "all", fidelity: int = 50): + """Get price history for a market.""" + data = _get(f"{CLOB}/prices-history?market={condition_id}&interval={interval}&fidelity={fidelity}") + history = data.get("history", []) + if not history: + print("No price history available for this market.") + return + print(f"Price history ({len(history)} points, interval={interval}):\n") + from datetime import datetime, timezone + for pt in history: + ts = datetime.fromtimestamp(pt["t"], tz=timezone.utc).strftime("%Y-%m-%d %H:%M") + price = _fmt_pct(pt["p"]) + bar = "█" * int(float(pt["p"]) * 40) + print(f" {ts} {price:>7} {bar}") + + +def cmd_trades(limit: int = 10, market: str = None): + """Get recent trades.""" + url = f"{DATA}/trades?limit={limit}" + if market: + url += f"&market={market}" + trades = _get(url) + if not isinstance(trades, list): + print(f"Unexpected response: {trades}") + return + print(f"Recent trades ({len(trades)}):\n") + for t in trades: + side = t.get("side", "?") + price = _fmt_pct(t.get("price", "?")) + size = t.get("size", "?") + outcome = t.get("outcome", "?") + title = t.get("title", "?")[:50] + ts = t.get("timestamp", "") + print(f" {side:4} {price:>7} x{float(size):>8.2f} [{outcome}] {title}") + + +def main(): + args = sys.argv[1:] + if not args or args[0] in ("-h", "--help", "help"): + print(__doc__) + return + + cmd = args[0] + + if cmd == "search" and len(args) >= 2: + cmd_search(" ".join(args[1:])) + elif cmd == "trending": + limit = 10 + if "--limit" in args: + idx = args.index("--limit") + limit = int(args[idx + 1]) if idx + 1 < len(args) else 10 + cmd_trending(limit) + elif cmd == "market" and len(args) >= 2: + cmd_market(args[1]) + elif cmd == "event" and len(args) >= 2: + cmd_event(args[1]) + elif cmd == "price" and len(args) >= 2: + cmd_price(args[1]) + elif cmd == "book" and len(args) >= 2: + cmd_book(args[1]) + elif cmd == "history" and len(args) >= 2: + interval = "all" + fidelity = 50 + if "--interval" in args: + idx = args.index("--interval") + interval = args[idx + 1] if idx + 1 < len(args) else "all" + if "--fidelity" in args: + idx = args.index("--fidelity") + fidelity = int(args[idx + 1]) if idx + 1 < len(args) else 50 + cmd_history(args[1], interval, fidelity) + elif cmd == "trades": + limit = 10 + market = None + if "--limit" in args: + idx = args.index("--limit") + limit = int(args[idx + 1]) if idx + 1 < len(args) else 10 + if "--market" in args: + idx = args.index("--market") + market = args[idx + 1] if idx + 1 < len(args) else None + cmd_trades(limit, market) + else: + print(f"Unknown command: {cmd}") + print(__doc__) + + +if __name__ == "__main__": + main() diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index 393e48204..29b49fd18 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -176,3 +176,93 @@ class TestCompressWithClient: contents = [m.get("content", "") for m in result] assert any("CONTEXT SUMMARY" in c for c in contents) assert len(result) < len(msgs) + + def test_summarization_does_not_split_tool_call_pairs(self): + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.choices = [MagicMock()] + mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: compressed middle" + mock_client.chat.completions.create.return_value = mock_response + + with patch("agent.context_compressor.get_model_context_length", return_value=100000), \ + patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")): + c = ContextCompressor( + model="test", + quiet_mode=True, + protect_first_n=3, + protect_last_n=4, + ) + + msgs = [ + {"role": "user", "content": "Could you address the reviewer comments in PR#71"}, + { + "role": "assistant", + "content": "", + "tool_calls": [ + {"id": "call_a", "type": "function", "function": {"name": "skill_view", "arguments": "{}"}}, + {"id": "call_b", "type": "function", "function": {"name": "skill_view", "arguments": "{}"}}, + ], + }, + {"role": "tool", "tool_call_id": "call_a", "content": "output a"}, + {"role": "tool", "tool_call_id": "call_b", "content": "output b"}, + {"role": "user", "content": "later 1"}, + {"role": "assistant", "content": "later 2"}, + {"role": "tool", "tool_call_id": "call_x", "content": "later output"}, + {"role": "assistant", "content": "later 3"}, + {"role": "user", "content": "later 4"}, + ] + + result = c.compress(msgs) + + answered_ids = { + msg.get("tool_call_id") + for msg in result + if msg.get("role") == "tool" and msg.get("tool_call_id") + } + for msg in result: + if msg.get("role") == "assistant" and msg.get("tool_calls"): + for tc in msg["tool_calls"]: + assert tc["id"] in answered_ids + + def test_summarization_does_not_start_tail_with_tool_outputs(self): + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.choices = [MagicMock()] + mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: compressed middle" + mock_client.chat.completions.create.return_value = mock_response + + with patch("agent.context_compressor.get_model_context_length", return_value=100000), \ + patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")): + c = ContextCompressor( + model="test", + quiet_mode=True, + protect_first_n=2, + protect_last_n=3, + ) + + msgs = [ + {"role": "user", "content": "earlier 1"}, + {"role": "assistant", "content": "earlier 2"}, + {"role": "user", "content": "earlier 3"}, + { + "role": "assistant", + "content": "", + "tool_calls": [ + {"id": "call_c", "type": "function", "function": {"name": "search_files", "arguments": "{}"}}, + ], + }, + {"role": "tool", "tool_call_id": "call_c", "content": "output c"}, + {"role": "user", "content": "latest user"}, + ] + + result = c.compress(msgs) + + called_ids = { + tc["id"] + for msg in result + if msg.get("role") == "assistant" and msg.get("tool_calls") + for tc in msg["tool_calls"] + } + for msg in result: + if msg.get("role") == "tool" and msg.get("tool_call_id"): + assert msg["tool_call_id"] in called_ids diff --git a/tests/gateway/test_async_memory_flush.py b/tests/gateway/test_async_memory_flush.py new file mode 100644 index 000000000..675746920 --- /dev/null +++ b/tests/gateway/test_async_memory_flush.py @@ -0,0 +1,180 @@ +"""Tests for proactive memory flush on session expiry. + +Verifies that: +1. _is_session_expired() works from a SessionEntry alone (no source needed) +2. The sync callback is no longer called in get_or_create_session +3. _pre_flushed_sessions tracking works correctly +4. The background watcher can detect expired sessions +""" + +import pytest +from datetime import datetime, timedelta +from pathlib import Path +from unittest.mock import patch, MagicMock + +from gateway.config import Platform, GatewayConfig, SessionResetPolicy +from gateway.session import SessionSource, SessionStore, SessionEntry + + +@pytest.fixture() +def idle_store(tmp_path): + """SessionStore with a 60-minute idle reset policy.""" + config = GatewayConfig( + default_reset_policy=SessionResetPolicy(mode="idle", idle_minutes=60), + ) + with patch("gateway.session.SessionStore._ensure_loaded"): + s = SessionStore(sessions_dir=tmp_path, config=config) + s._db = None + s._loaded = True + return s + + +@pytest.fixture() +def no_reset_store(tmp_path): + """SessionStore with no reset policy (mode=none).""" + config = GatewayConfig( + default_reset_policy=SessionResetPolicy(mode="none"), + ) + with patch("gateway.session.SessionStore._ensure_loaded"): + s = SessionStore(sessions_dir=tmp_path, config=config) + s._db = None + s._loaded = True + return s + + +class TestIsSessionExpired: + """_is_session_expired should detect expiry from entry alone.""" + + def test_idle_session_expired(self, idle_store): + entry = SessionEntry( + session_key="agent:main:telegram:dm", + session_id="sid_1", + created_at=datetime.now() - timedelta(hours=3), + updated_at=datetime.now() - timedelta(minutes=120), + platform=Platform.TELEGRAM, + chat_type="dm", + ) + assert idle_store._is_session_expired(entry) is True + + def test_active_session_not_expired(self, idle_store): + entry = SessionEntry( + session_key="agent:main:telegram:dm", + session_id="sid_2", + created_at=datetime.now() - timedelta(hours=1), + updated_at=datetime.now() - timedelta(minutes=10), + platform=Platform.TELEGRAM, + chat_type="dm", + ) + assert idle_store._is_session_expired(entry) is False + + def test_none_mode_never_expires(self, no_reset_store): + entry = SessionEntry( + session_key="agent:main:telegram:dm", + session_id="sid_3", + created_at=datetime.now() - timedelta(days=30), + updated_at=datetime.now() - timedelta(days=30), + platform=Platform.TELEGRAM, + chat_type="dm", + ) + assert no_reset_store._is_session_expired(entry) is False + + def test_active_processes_prevent_expiry(self, idle_store): + """Sessions with active background processes should never expire.""" + idle_store._has_active_processes_fn = lambda key: True + entry = SessionEntry( + session_key="agent:main:telegram:dm", + session_id="sid_4", + created_at=datetime.now() - timedelta(hours=5), + updated_at=datetime.now() - timedelta(hours=5), + platform=Platform.TELEGRAM, + chat_type="dm", + ) + assert idle_store._is_session_expired(entry) is False + + def test_daily_mode_expired(self, tmp_path): + """Daily mode should expire sessions from before today's reset hour.""" + config = GatewayConfig( + default_reset_policy=SessionResetPolicy(mode="daily", at_hour=4), + ) + with patch("gateway.session.SessionStore._ensure_loaded"): + store = SessionStore(sessions_dir=tmp_path, config=config) + store._db = None + store._loaded = True + + entry = SessionEntry( + session_key="agent:main:telegram:dm", + session_id="sid_5", + created_at=datetime.now() - timedelta(days=2), + updated_at=datetime.now() - timedelta(days=2), + platform=Platform.TELEGRAM, + chat_type="dm", + ) + assert store._is_session_expired(entry) is True + + +class TestGetOrCreateSessionNoCallback: + """get_or_create_session should NOT call a sync flush callback.""" + + def test_auto_reset_cleans_pre_flushed_marker(self, idle_store): + """When a session auto-resets, the pre_flushed marker should be discarded.""" + source = SessionSource( + platform=Platform.TELEGRAM, + chat_id="123", + chat_type="dm", + ) + # Create initial session + entry1 = idle_store.get_or_create_session(source) + old_sid = entry1.session_id + + # Simulate the watcher having flushed it + idle_store._pre_flushed_sessions.add(old_sid) + + # Simulate the session going idle + entry1.updated_at = datetime.now() - timedelta(minutes=120) + idle_store._save() + + # Next call should auto-reset + entry2 = idle_store.get_or_create_session(source) + assert entry2.session_id != old_sid + assert entry2.was_auto_reset is True + + # The old session_id should be removed from pre_flushed + assert old_sid not in idle_store._pre_flushed_sessions + + def test_no_sync_callback_invoked(self, idle_store): + """No synchronous callback should block during auto-reset.""" + source = SessionSource( + platform=Platform.TELEGRAM, + chat_id="123", + chat_type="dm", + ) + entry1 = idle_store.get_or_create_session(source) + entry1.updated_at = datetime.now() - timedelta(minutes=120) + idle_store._save() + + # Verify no _on_auto_reset attribute + assert not hasattr(idle_store, '_on_auto_reset') + + # This should NOT block (no sync LLM call) + entry2 = idle_store.get_or_create_session(source) + assert entry2.was_auto_reset is True + + +class TestPreFlushedSessionsTracking: + """The _pre_flushed_sessions set should prevent double-flushing.""" + + def test_starts_empty(self, idle_store): + assert len(idle_store._pre_flushed_sessions) == 0 + + def test_add_and_check(self, idle_store): + idle_store._pre_flushed_sessions.add("sid_old") + assert "sid_old" in idle_store._pre_flushed_sessions + assert "sid_other" not in idle_store._pre_flushed_sessions + + def test_discard_on_reset(self, idle_store): + """discard should remove without raising if not present.""" + idle_store._pre_flushed_sessions.add("sid_a") + idle_store._pre_flushed_sessions.discard("sid_a") + assert "sid_a" not in idle_store._pre_flushed_sessions + # discard on non-existent should not raise + idle_store._pre_flushed_sessions.discard("sid_nonexistent") diff --git a/tests/gateway/test_resume_command.py b/tests/gateway/test_resume_command.py new file mode 100644 index 000000000..17adcd2e7 --- /dev/null +++ b/tests/gateway/test_resume_command.py @@ -0,0 +1,200 @@ +"""Tests for /resume gateway slash command. + +Tests the _handle_resume_command handler (switch to a previously-named session) +across gateway messenger platforms. +""" + +from unittest.mock import MagicMock, AsyncMock + +import pytest + +from gateway.config import Platform +from gateway.platforms.base import MessageEvent +from gateway.session import SessionSource, build_session_key + + +def _make_event(text="/resume", platform=Platform.TELEGRAM, + user_id="12345", chat_id="67890"): + """Build a MessageEvent for testing.""" + source = SessionSource( + platform=platform, + user_id=user_id, + chat_id=chat_id, + user_name="testuser", + ) + return MessageEvent(text=text, source=source) + + +def _session_key_for_event(event): + """Get the session key that build_session_key produces for an event.""" + return build_session_key(event.source) + + +def _make_runner(session_db=None, current_session_id="current_session_001", + event=None): + """Create a bare GatewayRunner with a mock session_store and optional session_db.""" + from gateway.run import GatewayRunner + runner = object.__new__(GatewayRunner) + runner.adapters = {} + runner._session_db = session_db + runner._running_agents = {} + + # Compute the real session key if an event is provided + session_key = build_session_key(event.source) if event else "agent:main:telegram:dm" + + # Mock session_store that returns a session entry with a known session_id + mock_session_entry = MagicMock() + mock_session_entry.session_id = current_session_id + mock_session_entry.session_key = session_key + mock_store = MagicMock() + mock_store.get_or_create_session.return_value = mock_session_entry + mock_store.load_transcript.return_value = [] + mock_store.switch_session.return_value = mock_session_entry + runner.session_store = mock_store + + # Stub out memory flushing + runner._async_flush_memories = AsyncMock() + + return runner + + +# --------------------------------------------------------------------------- +# _handle_resume_command +# --------------------------------------------------------------------------- + + +class TestHandleResumeCommand: + """Tests for GatewayRunner._handle_resume_command.""" + + @pytest.mark.asyncio + async def test_no_session_db(self): + """Returns error when session database is unavailable.""" + runner = _make_runner(session_db=None) + event = _make_event(text="/resume My Project") + result = await runner._handle_resume_command(event) + assert "not available" in result.lower() + + @pytest.mark.asyncio + async def test_list_named_sessions_when_no_arg(self, tmp_path): + """With no argument, lists recently titled sessions.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("sess_001", "telegram") + db.create_session("sess_002", "telegram") + db.set_session_title("sess_001", "Research") + db.set_session_title("sess_002", "Coding") + + event = _make_event(text="/resume") + runner = _make_runner(session_db=db, event=event) + result = await runner._handle_resume_command(event) + assert "Research" in result + assert "Coding" in result + assert "Named Sessions" in result + db.close() + + @pytest.mark.asyncio + async def test_list_shows_usage_when_no_titled(self, tmp_path): + """With no arg and no titled sessions, shows instructions.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("sess_001", "telegram") # No title + + event = _make_event(text="/resume") + runner = _make_runner(session_db=db, event=event) + result = await runner._handle_resume_command(event) + assert "No named sessions" in result + assert "/title" in result + db.close() + + @pytest.mark.asyncio + async def test_resume_by_name(self, tmp_path): + """Resolves a title and switches to that session.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("old_session_abc", "telegram") + db.set_session_title("old_session_abc", "My Project") + db.create_session("current_session_001", "telegram") + + event = _make_event(text="/resume My Project") + runner = _make_runner(session_db=db, current_session_id="current_session_001", + event=event) + result = await runner._handle_resume_command(event) + + assert "Resumed" in result + assert "My Project" in result + # Verify switch_session was called with the old session ID + runner.session_store.switch_session.assert_called_once() + call_args = runner.session_store.switch_session.call_args + assert call_args[0][1] == "old_session_abc" + db.close() + + @pytest.mark.asyncio + async def test_resume_nonexistent_name(self, tmp_path): + """Returns error for unknown session name.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("current_session_001", "telegram") + + event = _make_event(text="/resume Nonexistent Session") + runner = _make_runner(session_db=db, event=event) + result = await runner._handle_resume_command(event) + assert "No session found" in result + db.close() + + @pytest.mark.asyncio + async def test_resume_already_on_session(self, tmp_path): + """Returns friendly message when already on the requested session.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("current_session_001", "telegram") + db.set_session_title("current_session_001", "Active Project") + + event = _make_event(text="/resume Active Project") + runner = _make_runner(session_db=db, current_session_id="current_session_001", + event=event) + result = await runner._handle_resume_command(event) + assert "Already on session" in result + db.close() + + @pytest.mark.asyncio + async def test_resume_auto_lineage(self, tmp_path): + """Asking for 'My Project' when 'My Project #2' exists gets the latest.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("sess_v1", "telegram") + db.set_session_title("sess_v1", "My Project") + db.create_session("sess_v2", "telegram") + db.set_session_title("sess_v2", "My Project #2") + db.create_session("current_session_001", "telegram") + + event = _make_event(text="/resume My Project") + runner = _make_runner(session_db=db, current_session_id="current_session_001", + event=event) + result = await runner._handle_resume_command(event) + + assert "Resumed" in result + # Should resolve to #2 (latest in lineage) + call_args = runner.session_store.switch_session.call_args + assert call_args[0][1] == "sess_v2" + db.close() + + @pytest.mark.asyncio + async def test_resume_clears_running_agent(self, tmp_path): + """Switching sessions clears any cached running agent.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("old_session", "telegram") + db.set_session_title("old_session", "Old Work") + db.create_session("current_session_001", "telegram") + + event = _make_event(text="/resume Old Work") + runner = _make_runner(session_db=db, current_session_id="current_session_001", + event=event) + # Simulate a running agent using the real session key + real_key = _session_key_for_event(event) + runner._running_agents[real_key] = MagicMock() + + await runner._handle_resume_command(event) + + assert real_key not in runner._running_agents + db.close() diff --git a/tests/gateway/test_send_image_file.py b/tests/gateway/test_send_image_file.py new file mode 100644 index 000000000..aab320183 --- /dev/null +++ b/tests/gateway/test_send_image_file.py @@ -0,0 +1,335 @@ +""" +Tests for send_image_file() on Telegram, Discord, and Slack platforms, +and MEDIA: .png extraction/routing in the base platform adapter. + +Covers: local image file sending, file-not-found handling, fallback on error, + MEDIA: tag extraction for image extensions, and routing to send_image_file. +""" + +import asyncio +import os +import sys +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from gateway.config import PlatformConfig +from gateway.platforms.base import BasePlatformAdapter, SendResult + + +# --------------------------------------------------------------------------- +# MEDIA: extraction tests for image files +# --------------------------------------------------------------------------- + + +class TestExtractMediaImages: + """Test that MEDIA: tags with image extensions are correctly extracted.""" + + def test_png_image_extracted(self): + content = "Here is the screenshot:\nMEDIA:/home/user/.hermes/browser_screenshots/shot.png" + media, cleaned = BasePlatformAdapter.extract_media(content) + assert len(media) == 1 + assert media[0][0] == "/home/user/.hermes/browser_screenshots/shot.png" + assert "MEDIA:" not in cleaned + assert "Here is the screenshot" in cleaned + + def test_jpg_image_extracted(self): + content = "MEDIA:/tmp/photo.jpg" + media, cleaned = BasePlatformAdapter.extract_media(content) + assert len(media) == 1 + assert media[0][0] == "/tmp/photo.jpg" + + def test_webp_image_extracted(self): + content = "MEDIA:/tmp/image.webp" + media, _ = BasePlatformAdapter.extract_media(content) + assert len(media) == 1 + + def test_mixed_audio_and_image(self): + content = "MEDIA:/audio.ogg\nMEDIA:/screenshot.png" + media, _ = BasePlatformAdapter.extract_media(content) + assert len(media) == 2 + paths = [m[0] for m in media] + assert "/audio.ogg" in paths + assert "/screenshot.png" in paths + + +# --------------------------------------------------------------------------- +# Telegram send_image_file tests +# --------------------------------------------------------------------------- + + +def _ensure_telegram_mock(): + """Install mock telegram modules so TelegramAdapter can be imported.""" + if "telegram" in sys.modules and hasattr(sys.modules["telegram"], "__file__"): + return + + telegram_mod = MagicMock() + telegram_mod.ext.ContextTypes.DEFAULT_TYPE = type(None) + telegram_mod.constants.ParseMode.MARKDOWN_V2 = "MarkdownV2" + telegram_mod.constants.ChatType.GROUP = "group" + telegram_mod.constants.ChatType.SUPERGROUP = "supergroup" + telegram_mod.constants.ChatType.CHANNEL = "channel" + telegram_mod.constants.ChatType.PRIVATE = "private" + + for name in ("telegram", "telegram.ext", "telegram.constants"): + sys.modules.setdefault(name, telegram_mod) + + +_ensure_telegram_mock() + +from gateway.platforms.telegram import TelegramAdapter # noqa: E402 + + +class TestTelegramSendImageFile: + @pytest.fixture + def adapter(self): + config = PlatformConfig(enabled=True, token="fake-token") + a = TelegramAdapter(config) + a._bot = MagicMock() + return a + + def test_sends_local_image_as_photo(self, adapter, tmp_path): + """send_image_file should call bot.send_photo with the opened file.""" + img = tmp_path / "screenshot.png" + img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 100) # Minimal PNG-like + + mock_msg = MagicMock() + mock_msg.message_id = 42 + adapter._bot.send_photo = AsyncMock(return_value=mock_msg) + + result = asyncio.get_event_loop().run_until_complete( + adapter.send_image_file(chat_id="12345", image_path=str(img)) + ) + assert result.success + assert result.message_id == "42" + adapter._bot.send_photo.assert_awaited_once() + + # Verify photo arg was a file object (opened in rb mode) + call_kwargs = adapter._bot.send_photo.call_args + assert call_kwargs.kwargs["chat_id"] == 12345 + + def test_returns_error_when_file_missing(self, adapter): + """send_image_file should return error for nonexistent file.""" + result = asyncio.get_event_loop().run_until_complete( + adapter.send_image_file(chat_id="12345", image_path="/nonexistent/image.png") + ) + assert not result.success + assert "not found" in result.error + + def test_returns_error_when_not_connected(self, adapter): + """send_image_file should return error when bot is None.""" + adapter._bot = None + result = asyncio.get_event_loop().run_until_complete( + adapter.send_image_file(chat_id="12345", image_path="/tmp/img.png") + ) + assert not result.success + assert "Not connected" in result.error + + def test_caption_truncated_to_1024(self, adapter, tmp_path): + """Telegram captions have a 1024 char limit.""" + img = tmp_path / "shot.png" + img.write_bytes(b"\x89PNG" + b"\x00" * 50) + + mock_msg = MagicMock() + mock_msg.message_id = 1 + adapter._bot.send_photo = AsyncMock(return_value=mock_msg) + + long_caption = "A" * 2000 + asyncio.get_event_loop().run_until_complete( + adapter.send_image_file(chat_id="12345", image_path=str(img), caption=long_caption) + ) + + call_kwargs = adapter._bot.send_photo.call_args.kwargs + assert len(call_kwargs["caption"]) == 1024 + + +# --------------------------------------------------------------------------- +# Discord send_image_file tests +# --------------------------------------------------------------------------- + + +def _ensure_discord_mock(): + """Install mock discord module so DiscordAdapter can be imported.""" + if "discord" in sys.modules and hasattr(sys.modules["discord"], "__file__"): + return + + discord_mod = MagicMock() + discord_mod.Intents.default.return_value = MagicMock() + discord_mod.Client = MagicMock + discord_mod.File = MagicMock + + for name in ("discord", "discord.ext", "discord.ext.commands"): + sys.modules.setdefault(name, discord_mod) + + +_ensure_discord_mock() + +import discord as discord_mod_ref # noqa: E402 +from gateway.platforms.discord import DiscordAdapter # noqa: E402 + + +class TestDiscordSendImageFile: + @pytest.fixture + def adapter(self): + config = PlatformConfig(enabled=True, token="fake-token") + a = DiscordAdapter(config) + a._client = MagicMock() + return a + + def test_sends_local_image_as_attachment(self, adapter, tmp_path): + """send_image_file should create discord.File and send to channel.""" + img = tmp_path / "screenshot.png" + img.write_bytes(b"\x89PNG" + b"\x00" * 50) + + mock_channel = MagicMock() + mock_msg = MagicMock() + mock_msg.id = 99 + mock_channel.send = AsyncMock(return_value=mock_msg) + adapter._client.get_channel = MagicMock(return_value=mock_channel) + + result = asyncio.get_event_loop().run_until_complete( + adapter.send_image_file(chat_id="67890", image_path=str(img)) + ) + assert result.success + assert result.message_id == "99" + mock_channel.send.assert_awaited_once() + + def test_returns_error_when_file_missing(self, adapter): + result = asyncio.get_event_loop().run_until_complete( + adapter.send_image_file(chat_id="67890", image_path="/nonexistent.png") + ) + assert not result.success + assert "not found" in result.error + + def test_returns_error_when_not_connected(self, adapter): + adapter._client = None + result = asyncio.get_event_loop().run_until_complete( + adapter.send_image_file(chat_id="67890", image_path="/tmp/img.png") + ) + assert not result.success + assert "Not connected" in result.error + + def test_handles_missing_channel(self, adapter): + adapter._client.get_channel = MagicMock(return_value=None) + adapter._client.fetch_channel = AsyncMock(return_value=None) + + result = asyncio.get_event_loop().run_until_complete( + adapter.send_image_file(chat_id="99999", image_path="/tmp/img.png") + ) + assert not result.success + assert "not found" in result.error + + +# --------------------------------------------------------------------------- +# Slack send_image_file tests +# --------------------------------------------------------------------------- + + +def _ensure_slack_mock(): + """Install mock slack_bolt module so SlackAdapter can be imported.""" + if "slack_bolt" in sys.modules and hasattr(sys.modules["slack_bolt"], "__file__"): + return + + slack_mod = MagicMock() + for name in ("slack_bolt", "slack_bolt.async_app", "slack_sdk", "slack_sdk.web.async_client"): + sys.modules.setdefault(name, slack_mod) + + +_ensure_slack_mock() + +from gateway.platforms.slack import SlackAdapter # noqa: E402 + + +class TestSlackSendImageFile: + @pytest.fixture + def adapter(self): + config = PlatformConfig(enabled=True, token="xoxb-fake") + a = SlackAdapter(config) + a._app = MagicMock() + return a + + def test_sends_local_image_via_upload(self, adapter, tmp_path): + """send_image_file should call files_upload_v2 with the local path.""" + img = tmp_path / "screenshot.png" + img.write_bytes(b"\x89PNG" + b"\x00" * 50) + + mock_result = MagicMock() + adapter._app.client.files_upload_v2 = AsyncMock(return_value=mock_result) + + result = asyncio.get_event_loop().run_until_complete( + adapter.send_image_file(chat_id="C12345", image_path=str(img)) + ) + assert result.success + adapter._app.client.files_upload_v2.assert_awaited_once() + + call_kwargs = adapter._app.client.files_upload_v2.call_args.kwargs + assert call_kwargs["file"] == str(img) + assert call_kwargs["filename"] == "screenshot.png" + assert call_kwargs["channel"] == "C12345" + + def test_returns_error_when_file_missing(self, adapter): + result = asyncio.get_event_loop().run_until_complete( + adapter.send_image_file(chat_id="C12345", image_path="/nonexistent.png") + ) + assert not result.success + assert "not found" in result.error + + def test_returns_error_when_not_connected(self, adapter): + adapter._app = None + result = asyncio.get_event_loop().run_until_complete( + adapter.send_image_file(chat_id="C12345", image_path="/tmp/img.png") + ) + assert not result.success + assert "Not connected" in result.error + + +# --------------------------------------------------------------------------- +# browser_vision screenshot cleanup tests +# --------------------------------------------------------------------------- + + +class TestScreenshotCleanup: + def test_cleanup_removes_old_screenshots(self, tmp_path): + """_cleanup_old_screenshots should remove files older than max_age_hours.""" + import time + from tools.browser_tool import _cleanup_old_screenshots + + # Create a "fresh" file + fresh = tmp_path / "browser_screenshot_fresh.png" + fresh.write_bytes(b"new") + + # Create an "old" file and backdate its mtime + old = tmp_path / "browser_screenshot_old.png" + old.write_bytes(b"old") + old_time = time.time() - (25 * 3600) # 25 hours ago + os.utime(str(old), (old_time, old_time)) + + _cleanup_old_screenshots(tmp_path, max_age_hours=24) + + assert fresh.exists(), "Fresh screenshot should not be removed" + assert not old.exists(), "Old screenshot should be removed" + + def test_cleanup_ignores_non_screenshot_files(self, tmp_path): + """Only files matching browser_screenshot_*.png should be cleaned.""" + import time + from tools.browser_tool import _cleanup_old_screenshots + + other_file = tmp_path / "important_data.txt" + other_file.write_bytes(b"keep me") + old_time = time.time() - (48 * 3600) + os.utime(str(other_file), (old_time, old_time)) + + _cleanup_old_screenshots(tmp_path, max_age_hours=24) + + assert other_file.exists(), "Non-screenshot files should not be touched" + + def test_cleanup_handles_empty_dir(self, tmp_path): + """Cleanup should not fail on empty directory.""" + from tools.browser_tool import _cleanup_old_screenshots + _cleanup_old_screenshots(tmp_path, max_age_hours=24) # Should not raise + + def test_cleanup_handles_nonexistent_dir(self): + """Cleanup should not fail if directory doesn't exist.""" + from pathlib import Path + from tools.browser_tool import _cleanup_old_screenshots + _cleanup_old_screenshots(Path("/nonexistent/dir"), max_age_hours=24) # Should not raise diff --git a/tests/gateway/test_session_hygiene.py b/tests/gateway/test_session_hygiene.py new file mode 100644 index 000000000..b357d5861 --- /dev/null +++ b/tests/gateway/test_session_hygiene.py @@ -0,0 +1,159 @@ +"""Tests for gateway session hygiene — auto-compression of large sessions. + +Verifies that the gateway detects pathologically large transcripts and +triggers auto-compression before running the agent. (#628) +""" + +import pytest +from unittest.mock import patch, MagicMock, AsyncMock +from agent.model_metadata import estimate_messages_tokens_rough + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_history(n_messages: int, content_size: int = 100) -> list: + """Build a fake transcript with n_messages user/assistant pairs.""" + history = [] + content = "x" * content_size + for i in range(n_messages): + role = "user" if i % 2 == 0 else "assistant" + history.append({"role": role, "content": content, "timestamp": f"t{i}"}) + return history + + +def _make_large_history_tokens(target_tokens: int) -> list: + """Build a history that estimates to roughly target_tokens tokens.""" + # estimate_messages_tokens_rough counts total chars in str(msg) // 4 + # Each msg dict has ~60 chars of overhead + content chars + # So for N tokens we need roughly N * 4 total chars across all messages + target_chars = target_tokens * 4 + # Each message as a dict string is roughly len(content) + 60 chars + msg_overhead = 60 + # Use 50 messages with appropriately sized content + n_msgs = 50 + content_size = max(10, (target_chars // n_msgs) - msg_overhead) + return _make_history(n_msgs, content_size=content_size) + + +# --------------------------------------------------------------------------- +# Detection threshold tests +# --------------------------------------------------------------------------- + +class TestSessionHygieneThresholds: + """Test that the threshold logic correctly identifies large sessions.""" + + def test_small_session_below_thresholds(self): + """A 10-message session should not trigger compression.""" + history = _make_history(10) + msg_count = len(history) + approx_tokens = estimate_messages_tokens_rough(history) + + compress_token_threshold = 100_000 + compress_msg_threshold = 200 + + needs_compress = ( + approx_tokens >= compress_token_threshold + or msg_count >= compress_msg_threshold + ) + assert not needs_compress + + def test_large_message_count_triggers(self): + """200+ messages should trigger compression even if tokens are low.""" + history = _make_history(250, content_size=10) + msg_count = len(history) + + compress_msg_threshold = 200 + needs_compress = msg_count >= compress_msg_threshold + assert needs_compress + + def test_large_token_count_triggers(self): + """High token count should trigger compression even if message count is low.""" + # 50 messages with huge content to exceed 100K tokens + history = _make_history(50, content_size=10_000) + approx_tokens = estimate_messages_tokens_rough(history) + + compress_token_threshold = 100_000 + needs_compress = approx_tokens >= compress_token_threshold + assert needs_compress + + def test_under_both_thresholds_no_trigger(self): + """Session under both thresholds should not trigger.""" + history = _make_history(100, content_size=100) + msg_count = len(history) + approx_tokens = estimate_messages_tokens_rough(history) + + compress_token_threshold = 100_000 + compress_msg_threshold = 200 + + needs_compress = ( + approx_tokens >= compress_token_threshold + or msg_count >= compress_msg_threshold + ) + assert not needs_compress + + def test_custom_thresholds(self): + """Custom thresholds from config should be respected.""" + history = _make_history(60, content_size=100) + msg_count = len(history) + + # Custom lower threshold + compress_msg_threshold = 50 + needs_compress = msg_count >= compress_msg_threshold + assert needs_compress + + # Custom higher threshold + compress_msg_threshold = 100 + needs_compress = msg_count >= compress_msg_threshold + assert not needs_compress + + def test_minimum_message_guard(self): + """Sessions with fewer than 4 messages should never trigger.""" + history = _make_history(3, content_size=100_000) + # Even with enormous content, < 4 messages should be skipped + # (the gateway code checks `len(history) >= 4` before evaluating) + assert len(history) < 4 + + +class TestSessionHygieneWarnThreshold: + """Test the post-compression warning threshold.""" + + def test_warn_when_still_large(self): + """If compressed result is still above warn_tokens, should warn.""" + # Simulate post-compression tokens + warn_threshold = 200_000 + post_compress_tokens = 250_000 + assert post_compress_tokens >= warn_threshold + + def test_no_warn_when_under(self): + """If compressed result is under warn_tokens, no warning.""" + warn_threshold = 200_000 + post_compress_tokens = 150_000 + assert post_compress_tokens < warn_threshold + + +class TestTokenEstimation: + """Verify rough token estimation works as expected for hygiene checks.""" + + def test_empty_history(self): + assert estimate_messages_tokens_rough([]) == 0 + + def test_proportional_to_content(self): + small = _make_history(10, content_size=100) + large = _make_history(10, content_size=10_000) + assert estimate_messages_tokens_rough(large) > estimate_messages_tokens_rough(small) + + def test_proportional_to_count(self): + few = _make_history(10, content_size=1000) + many = _make_history(100, content_size=1000) + assert estimate_messages_tokens_rough(many) > estimate_messages_tokens_rough(few) + + def test_pathological_session_detected(self): + """The reported pathological case: 648 messages, ~299K tokens.""" + # Simulate a 648-message session averaging ~460 tokens per message + history = _make_history(648, content_size=1800) + tokens = estimate_messages_tokens_rough(history) + # Should be well above the 100K default threshold + assert tokens > 100_000 + assert len(history) > 200 diff --git a/tests/gateway/test_title_command.py b/tests/gateway/test_title_command.py new file mode 100644 index 000000000..7f7c782a7 --- /dev/null +++ b/tests/gateway/test_title_command.py @@ -0,0 +1,207 @@ +"""Tests for /title gateway slash command. + +Tests the _handle_title_command handler (set/show session titles) +across all gateway messenger platforms. +""" + +import os +from unittest.mock import MagicMock, patch + +import pytest + +from gateway.config import Platform +from gateway.platforms.base import MessageEvent +from gateway.session import SessionSource + + +def _make_event(text="/title", platform=Platform.TELEGRAM, + user_id="12345", chat_id="67890"): + """Build a MessageEvent for testing.""" + source = SessionSource( + platform=platform, + user_id=user_id, + chat_id=chat_id, + user_name="testuser", + ) + return MessageEvent(text=text, source=source) + + +def _make_runner(session_db=None): + """Create a bare GatewayRunner with a mock session_store and optional session_db.""" + from gateway.run import GatewayRunner + runner = object.__new__(GatewayRunner) + runner.adapters = {} + runner._session_db = session_db + + # Mock session_store that returns a session entry with a known session_id + mock_session_entry = MagicMock() + mock_session_entry.session_id = "test_session_123" + mock_session_entry.session_key = "telegram:12345:67890" + mock_store = MagicMock() + mock_store.get_or_create_session.return_value = mock_session_entry + runner.session_store = mock_store + + return runner + + +# --------------------------------------------------------------------------- +# _handle_title_command +# --------------------------------------------------------------------------- + + +class TestHandleTitleCommand: + """Tests for GatewayRunner._handle_title_command.""" + + @pytest.mark.asyncio + async def test_set_title(self, tmp_path): + """Setting a title returns confirmation.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("test_session_123", "telegram") + + runner = _make_runner(session_db=db) + event = _make_event(text="/title My Research Project") + result = await runner._handle_title_command(event) + assert "My Research Project" in result + assert "✏️" in result + + # Verify in DB + assert db.get_session_title("test_session_123") == "My Research Project" + db.close() + + @pytest.mark.asyncio + async def test_show_title_when_set(self, tmp_path): + """Showing title when one is set returns the title.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("test_session_123", "telegram") + db.set_session_title("test_session_123", "Existing Title") + + runner = _make_runner(session_db=db) + event = _make_event(text="/title") + result = await runner._handle_title_command(event) + assert "Existing Title" in result + assert "📌" in result + db.close() + + @pytest.mark.asyncio + async def test_show_title_when_not_set(self, tmp_path): + """Showing title when none is set returns usage hint.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("test_session_123", "telegram") + + runner = _make_runner(session_db=db) + event = _make_event(text="/title") + result = await runner._handle_title_command(event) + assert "No title set" in result + assert "/title" in result + db.close() + + @pytest.mark.asyncio + async def test_title_conflict(self, tmp_path): + """Setting a title already used by another session returns error.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("other_session", "telegram") + db.set_session_title("other_session", "Taken Title") + db.create_session("test_session_123", "telegram") + + runner = _make_runner(session_db=db) + event = _make_event(text="/title Taken Title") + result = await runner._handle_title_command(event) + assert "already in use" in result + assert "⚠️" in result + db.close() + + @pytest.mark.asyncio + async def test_no_session_db(self): + """Returns error when session database is not available.""" + runner = _make_runner(session_db=None) + event = _make_event(text="/title My Title") + result = await runner._handle_title_command(event) + assert "not available" in result + + @pytest.mark.asyncio + async def test_title_too_long(self, tmp_path): + """Setting a title that exceeds max length returns error.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("test_session_123", "telegram") + + runner = _make_runner(session_db=db) + long_title = "A" * 150 + event = _make_event(text=f"/title {long_title}") + result = await runner._handle_title_command(event) + assert "too long" in result + assert "⚠️" in result + db.close() + + @pytest.mark.asyncio + async def test_title_control_chars_sanitized(self, tmp_path): + """Control characters are stripped and sanitized title is stored.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("test_session_123", "telegram") + + runner = _make_runner(session_db=db) + event = _make_event(text="/title hello\x00world") + result = await runner._handle_title_command(event) + assert "helloworld" in result + assert db.get_session_title("test_session_123") == "helloworld" + db.close() + + @pytest.mark.asyncio + async def test_title_only_control_chars(self, tmp_path): + """Title with only control chars returns empty error.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("test_session_123", "telegram") + + runner = _make_runner(session_db=db) + event = _make_event(text="/title \x00\x01\x02") + result = await runner._handle_title_command(event) + assert "empty after cleanup" in result + db.close() + + @pytest.mark.asyncio + async def test_works_across_platforms(self, tmp_path): + """The /title command works for Discord, Slack, and WhatsApp too.""" + from hermes_state import SessionDB + for platform in [Platform.DISCORD, Platform.TELEGRAM]: + db = SessionDB(db_path=tmp_path / f"state_{platform.value}.db") + db.create_session("test_session_123", platform.value) + + runner = _make_runner(session_db=db) + event = _make_event(text="/title Cross-Platform Test", platform=platform) + result = await runner._handle_title_command(event) + assert "Cross-Platform Test" in result + assert db.get_session_title("test_session_123") == "Cross-Platform Test" + db.close() + + +# --------------------------------------------------------------------------- +# /title in help and known_commands +# --------------------------------------------------------------------------- + + +class TestTitleInHelp: + """Verify /title appears in help text and known commands.""" + + @pytest.mark.asyncio + async def test_title_in_help_output(self): + """The /help output includes /title.""" + runner = _make_runner() + event = _make_event(text="/help") + # Need hooks for help command + from gateway.hooks import HookRegistry + runner.hooks = HookRegistry() + result = await runner._handle_help_command(event) + assert "/title" in result + + def test_title_is_known_command(self): + """The /title command is in the _known_commands set.""" + from gateway.run import GatewayRunner + import inspect + source = inspect.getsource(GatewayRunner._handle_message) + assert '"title"' in source diff --git a/tests/hermes_cli/test_commands.py b/tests/hermes_cli/test_commands.py new file mode 100644 index 000000000..3b01eb7b3 --- /dev/null +++ b/tests/hermes_cli/test_commands.py @@ -0,0 +1,145 @@ +"""Tests for shared slash command definitions and autocomplete.""" + +from prompt_toolkit.completion import CompleteEvent +from prompt_toolkit.document import Document + +from hermes_cli.commands import COMMANDS, SlashCommandCompleter + + +# All commands that must be present in the shared COMMANDS dict. +EXPECTED_COMMANDS = { + "/help", "/tools", "/toolsets", "/model", "/provider", "/prompt", + "/personality", "/clear", "/history", "/new", "/reset", "/retry", + "/undo", "/save", "/config", "/cron", "/skills", "/platforms", + "/verbose", "/compress", "/title", "/usage", "/insights", "/paste", + "/reload-mcp", "/quit", +} + + +def _completions(completer: SlashCommandCompleter, text: str): + return list( + completer.get_completions( + Document(text=text), + CompleteEvent(completion_requested=True), + ) + ) + + +class TestCommands: + def test_shared_commands_include_cli_specific_entries(self): + """Entries that previously only existed in cli.py are now in the shared dict.""" + assert COMMANDS["/paste"] == "Check clipboard for an image and attach it" + assert COMMANDS["/reload-mcp"] == "Reload MCP servers from config.yaml" + + def test_all_expected_commands_present(self): + """Regression guard — every known command must appear in the shared dict.""" + assert set(COMMANDS.keys()) == EXPECTED_COMMANDS + + def test_every_command_has_nonempty_description(self): + for cmd, desc in COMMANDS.items(): + assert isinstance(desc, str) and len(desc) > 0, f"{cmd} has empty description" + + +class TestSlashCommandCompleter: + # -- basic prefix completion ----------------------------------------- + + def test_builtin_prefix_completion_uses_shared_registry(self): + completions = _completions(SlashCommandCompleter(), "/re") + texts = {item.text for item in completions} + + assert "reset" in texts + assert "retry" in texts + assert "reload-mcp" in texts + + def test_builtin_completion_display_meta_shows_description(self): + completions = _completions(SlashCommandCompleter(), "/help") + assert len(completions) == 1 + assert completions[0].display_meta_text == "Show this help message" + + # -- exact-match trailing space -------------------------------------- + + def test_exact_match_completion_adds_trailing_space(self): + completions = _completions(SlashCommandCompleter(), "/help") + + assert [item.text for item in completions] == ["help "] + + def test_partial_match_does_not_add_trailing_space(self): + completions = _completions(SlashCommandCompleter(), "/hel") + + assert [item.text for item in completions] == ["help"] + + # -- non-slash input returns nothing --------------------------------- + + def test_no_completions_for_non_slash_input(self): + assert _completions(SlashCommandCompleter(), "help") == [] + + def test_no_completions_for_empty_input(self): + assert _completions(SlashCommandCompleter(), "") == [] + + # -- skill commands via provider ------------------------------------ + + def test_skill_commands_are_completed_from_provider(self): + completer = SlashCommandCompleter( + skill_commands_provider=lambda: { + "/gif-search": {"description": "Search for GIFs across providers"}, + } + ) + + completions = _completions(completer, "/gif") + + assert len(completions) == 1 + assert completions[0].text == "gif-search" + assert completions[0].display_text == "/gif-search" + assert completions[0].display_meta_text == "⚡ Search for GIFs across providers" + + def test_skill_exact_match_adds_trailing_space(self): + completer = SlashCommandCompleter( + skill_commands_provider=lambda: { + "/gif-search": {"description": "Search for GIFs"}, + } + ) + + completions = _completions(completer, "/gif-search") + + assert len(completions) == 1 + assert completions[0].text == "gif-search " + + def test_no_skill_provider_means_no_skill_completions(self): + """Default (None) provider should not blow up or add completions.""" + completer = SlashCommandCompleter() + completions = _completions(completer, "/gif") + # /gif doesn't match any builtin command + assert completions == [] + + def test_skill_provider_exception_is_swallowed(self): + """A broken provider should not crash autocomplete.""" + completer = SlashCommandCompleter( + skill_commands_provider=lambda: (_ for _ in ()).throw(RuntimeError("boom")), + ) + # Should return builtin matches only, no crash + completions = _completions(completer, "/he") + texts = {item.text for item in completions} + assert "help" in texts + + def test_skill_description_truncated_at_50_chars(self): + long_desc = "A" * 80 + completer = SlashCommandCompleter( + skill_commands_provider=lambda: { + "/long-skill": {"description": long_desc}, + } + ) + completions = _completions(completer, "/long") + assert len(completions) == 1 + meta = completions[0].display_meta_text + # "⚡ " prefix + 50 chars + "..." + assert meta == f"⚡ {'A' * 50}..." + + def test_skill_missing_description_uses_fallback(self): + completer = SlashCommandCompleter( + skill_commands_provider=lambda: { + "/no-desc": {}, + } + ) + completions = _completions(completer, "/no-desc") + assert len(completions) == 1 + assert "Skill command" in completions[0].display_meta_text diff --git a/tests/hermes_cli/test_doctor.py b/tests/hermes_cli/test_doctor.py new file mode 100644 index 000000000..6594de4fa --- /dev/null +++ b/tests/hermes_cli/test_doctor.py @@ -0,0 +1,17 @@ +"""Tests for hermes doctor helpers.""" + +from hermes_cli.doctor import _has_provider_env_config + + +class TestProviderEnvDetection: + def test_detects_openai_api_key(self): + content = "OPENAI_BASE_URL=http://localhost:1234/v1\nOPENAI_API_KEY=sk-test-key\n" + assert _has_provider_env_config(content) + + def test_detects_custom_endpoint_without_openrouter_key(self): + content = "OPENAI_BASE_URL=http://localhost:8080/v1\n" + assert _has_provider_env_config(content) + + def test_returns_false_when_no_provider_settings(self): + content = "TERMINAL_ENV=local\n" + assert not _has_provider_env_config(content) diff --git a/tests/hermes_cli/test_model_validation.py b/tests/hermes_cli/test_model_validation.py new file mode 100644 index 000000000..71d47136c --- /dev/null +++ b/tests/hermes_cli/test_model_validation.py @@ -0,0 +1,220 @@ +"""Tests for provider-aware `/model` validation in hermes_cli.models.""" + +from unittest.mock import patch + +from hermes_cli.models import ( + curated_models_for_provider, + fetch_api_models, + normalize_provider, + parse_model_input, + provider_model_ids, + validate_requested_model, +) + + +# -- helpers ----------------------------------------------------------------- + +FAKE_API_MODELS = [ + "anthropic/claude-opus-4.6", + "anthropic/claude-sonnet-4.5", + "openai/gpt-5.4-pro", + "openai/gpt-5.4", + "google/gemini-3-pro-preview", +] + + +def _validate(model, provider="openrouter", api_models=FAKE_API_MODELS, **kw): + """Shortcut: call validate_requested_model with mocked API.""" + with patch("hermes_cli.models.fetch_api_models", return_value=api_models): + return validate_requested_model(model, provider, **kw) + + +# -- parse_model_input ------------------------------------------------------- + +class TestParseModelInput: + def test_plain_model_keeps_current_provider(self): + provider, model = parse_model_input("anthropic/claude-sonnet-4.5", "openrouter") + assert provider == "openrouter" + assert model == "anthropic/claude-sonnet-4.5" + + def test_provider_colon_model_switches_provider(self): + provider, model = parse_model_input("openrouter:anthropic/claude-sonnet-4.5", "nous") + assert provider == "openrouter" + assert model == "anthropic/claude-sonnet-4.5" + + def test_provider_alias_resolved(self): + provider, model = parse_model_input("glm:glm-5", "openrouter") + assert provider == "zai" + assert model == "glm-5" + + def test_no_slash_no_colon_keeps_provider(self): + provider, model = parse_model_input("gpt-5.4", "openrouter") + assert provider == "openrouter" + assert model == "gpt-5.4" + + def test_nous_provider_switch(self): + provider, model = parse_model_input("nous:hermes-3", "openrouter") + assert provider == "nous" + assert model == "hermes-3" + + def test_empty_model_after_colon_keeps_current(self): + provider, model = parse_model_input("openrouter:", "nous") + assert provider == "nous" + assert model == "openrouter:" + + def test_colon_at_start_keeps_current(self): + provider, model = parse_model_input(":something", "openrouter") + assert provider == "openrouter" + assert model == ":something" + + def test_unknown_prefix_colon_not_treated_as_provider(self): + """Colons are only provider delimiters if the left side is a known provider.""" + provider, model = parse_model_input("anthropic/claude-3.5-sonnet:beta", "openrouter") + assert provider == "openrouter" + assert model == "anthropic/claude-3.5-sonnet:beta" + + def test_http_url_not_treated_as_provider(self): + provider, model = parse_model_input("http://localhost:8080/model", "openrouter") + assert provider == "openrouter" + assert model == "http://localhost:8080/model" + + +# -- curated_models_for_provider --------------------------------------------- + +class TestCuratedModelsForProvider: + def test_openrouter_returns_curated_list(self): + models = curated_models_for_provider("openrouter") + assert len(models) > 0 + assert any("claude" in m[0] for m in models) + + def test_zai_returns_glm_models(self): + models = curated_models_for_provider("zai") + assert any("glm" in m[0] for m in models) + + def test_unknown_provider_returns_empty(self): + assert curated_models_for_provider("totally-unknown") == [] + + +# -- normalize_provider ------------------------------------------------------ + +class TestNormalizeProvider: + def test_defaults_to_openrouter(self): + assert normalize_provider(None) == "openrouter" + assert normalize_provider("") == "openrouter" + + def test_known_aliases(self): + assert normalize_provider("glm") == "zai" + assert normalize_provider("kimi") == "kimi-coding" + assert normalize_provider("moonshot") == "kimi-coding" + + def test_case_insensitive(self): + assert normalize_provider("OpenRouter") == "openrouter" + + +# -- provider_model_ids ------------------------------------------------------ + +class TestProviderModelIds: + def test_openrouter_returns_curated_list(self): + ids = provider_model_ids("openrouter") + assert len(ids) > 0 + assert all("/" in mid for mid in ids) + + def test_unknown_provider_returns_empty(self): + assert provider_model_ids("some-unknown-provider") == [] + + def test_zai_returns_glm_models(self): + assert "glm-5" in provider_model_ids("zai") + + +# -- fetch_api_models -------------------------------------------------------- + +class TestFetchApiModels: + def test_returns_none_when_no_base_url(self): + assert fetch_api_models("key", None) is None + + def test_returns_none_on_network_error(self): + with patch("hermes_cli.models.urllib.request.urlopen", side_effect=Exception("timeout")): + assert fetch_api_models("key", "https://example.com/v1") is None + + +# -- validate — format checks ----------------------------------------------- + +class TestValidateFormatChecks: + def test_empty_model_rejected(self): + result = _validate("") + assert result["accepted"] is False + assert "empty" in result["message"] + + def test_whitespace_only_rejected(self): + result = _validate(" ") + assert result["accepted"] is False + + def test_model_with_spaces_rejected(self): + result = _validate("anthropic/ claude-opus") + assert result["accepted"] is False + + def test_no_slash_model_still_probes_api(self): + result = _validate("gpt-5.4", api_models=["gpt-5.4", "gpt-5.4-pro"]) + assert result["accepted"] is True + assert result["persist"] is True + + def test_no_slash_model_rejected_if_not_in_api(self): + result = _validate("gpt-5.4", api_models=["openai/gpt-5.4"]) + assert result["accepted"] is False + + +# -- validate — API found ---------------------------------------------------- + +class TestValidateApiFound: + def test_model_found_in_api(self): + result = _validate("anthropic/claude-opus-4.6") + assert result["accepted"] is True + assert result["persist"] is True + assert result["recognized"] is True + + def test_model_found_for_custom_endpoint(self): + result = _validate( + "my-model", provider="openrouter", + api_models=["my-model"], base_url="http://localhost:11434/v1", + ) + assert result["accepted"] is True + assert result["persist"] is True + + +# -- validate — API not found ------------------------------------------------ + +class TestValidateApiNotFound: + def test_model_not_in_api_rejected(self): + result = _validate("anthropic/claude-nonexistent") + assert result["accepted"] is False + assert "not a valid model" in result["message"] + + def test_rejection_includes_suggestions(self): + result = _validate("anthropic/claude-opus-4.5") + assert result["accepted"] is False + assert "Did you mean" in result["message"] + + +# -- validate — API unreachable (fallback) ----------------------------------- + +class TestValidateApiFallback: + def test_known_catalog_model_accepted_when_api_down(self): + result = _validate("anthropic/claude-opus-4.6", api_models=None) + assert result["accepted"] is True + assert result["persist"] is True + + def test_unknown_model_session_only_when_api_down(self): + result = _validate("anthropic/claude-next-gen", api_models=None) + assert result["accepted"] is True + assert result["persist"] is False + assert "session only" in result["message"].lower() + + def test_zai_known_model_accepted_when_api_down(self): + result = _validate("glm-5", provider="zai", api_models=None) + assert result["accepted"] is True + assert result["persist"] is True + + def test_unknown_provider_session_only_when_api_down(self): + result = _validate("some-model", provider="totally-unknown", api_models=None) + assert result["accepted"] is True + assert result["persist"] is False diff --git a/tests/hermes_cli/test_skills_hub.py b/tests/hermes_cli/test_skills_hub.py new file mode 100644 index 000000000..7b1165bec --- /dev/null +++ b/tests/hermes_cli/test_skills_hub.py @@ -0,0 +1,31 @@ +from io import StringIO + +from rich.console import Console + +from hermes_cli.skills_hub import do_list + + +def test_do_list_initializes_hub_dir(monkeypatch, tmp_path): + import tools.skills_hub as hub + import tools.skills_tool as skills_tool + + hub_dir = tmp_path / "skills" / ".hub" + monkeypatch.setattr(hub, "SKILLS_DIR", tmp_path / "skills") + monkeypatch.setattr(hub, "HUB_DIR", hub_dir) + monkeypatch.setattr(hub, "LOCK_FILE", hub_dir / "lock.json") + monkeypatch.setattr(hub, "QUARANTINE_DIR", hub_dir / "quarantine") + monkeypatch.setattr(hub, "AUDIT_LOG", hub_dir / "audit.log") + monkeypatch.setattr(hub, "TAPS_FILE", hub_dir / "taps.json") + monkeypatch.setattr(hub, "INDEX_CACHE_DIR", hub_dir / "index-cache") + monkeypatch.setattr(skills_tool, "_find_all_skills", lambda: []) + + console = Console(file=StringIO(), force_terminal=False, color_system=None) + + assert not hub_dir.exists() + + do_list(console=console) + + assert hub_dir.exists() + assert (hub_dir / "lock.json").exists() + assert (hub_dir / "quarantine").is_dir() + assert (hub_dir / "index-cache").is_dir() diff --git a/tests/hermes_cli/test_tools_config.py b/tests/hermes_cli/test_tools_config.py new file mode 100644 index 000000000..1b4d356cd --- /dev/null +++ b/tests/hermes_cli/test_tools_config.py @@ -0,0 +1,19 @@ +"""Tests for hermes_cli.tools_config platform tool persistence.""" + +from hermes_cli.tools_config import _get_platform_tools + + +def test_get_platform_tools_uses_default_when_platform_not_configured(): + config = {} + + enabled = _get_platform_tools(config, "cli") + + assert enabled + + +def test_get_platform_tools_preserves_explicit_empty_selection(): + config = {"platform_toolsets": {"cli": []}} + + enabled = _get_platform_tools(config, "cli") + + assert enabled == set() diff --git a/tests/test_api_key_providers.py b/tests/test_api_key_providers.py index a6be4d99f..8df2d6327 100644 --- a/tests/test_api_key_providers.py +++ b/tests/test_api_key_providers.py @@ -20,6 +20,8 @@ from hermes_cli.auth import ( resolve_api_key_provider_credentials, get_auth_status, AuthError, + KIMI_CODE_BASE_URL, + _resolve_kimi_base_url, ) @@ -84,7 +86,7 @@ class TestProviderRegistry: PROVIDER_ENV_VARS = ( "OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", "GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY", - "KIMI_API_KEY", "MINIMAX_API_KEY", "MINIMAX_CN_API_KEY", + "KIMI_API_KEY", "KIMI_BASE_URL", "MINIMAX_API_KEY", "MINIMAX_CN_API_KEY", "OPENAI_BASE_URL", ) @@ -340,3 +342,87 @@ class TestHasAnyProviderConfigured: monkeypatch.setattr(config_module, "get_hermes_home", lambda: hermes_home) from hermes_cli.main import _has_any_provider_configured assert _has_any_provider_configured() is True + + +# ============================================================================= +# Kimi Code auto-detection tests +# ============================================================================= + +MOONSHOT_DEFAULT_URL = "https://api.moonshot.ai/v1" + + +class TestResolveKimiBaseUrl: + """Test _resolve_kimi_base_url() helper for key-prefix auto-detection.""" + + def test_sk_kimi_prefix_routes_to_kimi_code(self): + url = _resolve_kimi_base_url("sk-kimi-abc123", MOONSHOT_DEFAULT_URL, "") + assert url == KIMI_CODE_BASE_URL + + def test_legacy_key_uses_default(self): + url = _resolve_kimi_base_url("sk-abc123", MOONSHOT_DEFAULT_URL, "") + assert url == MOONSHOT_DEFAULT_URL + + def test_empty_key_uses_default(self): + url = _resolve_kimi_base_url("", MOONSHOT_DEFAULT_URL, "") + assert url == MOONSHOT_DEFAULT_URL + + def test_env_override_wins_over_sk_kimi(self): + """KIMI_BASE_URL env var should always take priority.""" + custom = "https://custom.example.com/v1" + url = _resolve_kimi_base_url("sk-kimi-abc123", MOONSHOT_DEFAULT_URL, custom) + assert url == custom + + def test_env_override_wins_over_legacy(self): + custom = "https://custom.example.com/v1" + url = _resolve_kimi_base_url("sk-abc123", MOONSHOT_DEFAULT_URL, custom) + assert url == custom + + +class TestKimiCodeStatusAutoDetect: + """Test that get_api_key_provider_status auto-detects sk-kimi- keys.""" + + def test_sk_kimi_key_gets_kimi_code_url(self, monkeypatch): + monkeypatch.setenv("KIMI_API_KEY", "sk-kimi-test-key-123") + status = get_api_key_provider_status("kimi-coding") + assert status["configured"] is True + assert status["base_url"] == KIMI_CODE_BASE_URL + + def test_legacy_key_gets_moonshot_url(self, monkeypatch): + monkeypatch.setenv("KIMI_API_KEY", "sk-legacy-test-key") + status = get_api_key_provider_status("kimi-coding") + assert status["configured"] is True + assert status["base_url"] == MOONSHOT_DEFAULT_URL + + def test_env_override_wins(self, monkeypatch): + monkeypatch.setenv("KIMI_API_KEY", "sk-kimi-test-key") + monkeypatch.setenv("KIMI_BASE_URL", "https://override.example/v1") + status = get_api_key_provider_status("kimi-coding") + assert status["base_url"] == "https://override.example/v1" + + +class TestKimiCodeCredentialAutoDetect: + """Test that resolve_api_key_provider_credentials auto-detects sk-kimi- keys.""" + + def test_sk_kimi_key_gets_kimi_code_url(self, monkeypatch): + monkeypatch.setenv("KIMI_API_KEY", "sk-kimi-secret-key") + creds = resolve_api_key_provider_credentials("kimi-coding") + assert creds["api_key"] == "sk-kimi-secret-key" + assert creds["base_url"] == KIMI_CODE_BASE_URL + + def test_legacy_key_gets_moonshot_url(self, monkeypatch): + monkeypatch.setenv("KIMI_API_KEY", "sk-legacy-secret-key") + creds = resolve_api_key_provider_credentials("kimi-coding") + assert creds["api_key"] == "sk-legacy-secret-key" + assert creds["base_url"] == MOONSHOT_DEFAULT_URL + + def test_env_override_wins(self, monkeypatch): + monkeypatch.setenv("KIMI_API_KEY", "sk-kimi-secret-key") + monkeypatch.setenv("KIMI_BASE_URL", "https://override.example/v1") + creds = resolve_api_key_provider_credentials("kimi-coding") + assert creds["base_url"] == "https://override.example/v1" + + def test_non_kimi_providers_unaffected(self, monkeypatch): + """Ensure the auto-detect logic doesn't leak to other providers.""" + monkeypatch.setenv("GLM_API_KEY", "sk-kimi-looks-like-kimi-but-isnt") + creds = resolve_api_key_provider_credentials("zai") + assert creds["base_url"] == "https://api.z.ai/api/paas/v4" diff --git a/tests/test_cli_init.py b/tests/test_cli_init.py index f679d7706..2e6d7f583 100644 --- a/tests/test_cli_init.py +++ b/tests/test_cli_init.py @@ -3,9 +3,7 @@ that only manifest at runtime (not in mocked unit tests).""" import os import sys -from unittest.mock import patch, MagicMock - -import pytest +from unittest.mock import patch sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) @@ -72,6 +70,38 @@ class TestVerboseAndToolProgress: assert cli.tool_progress_mode in ("off", "new", "all", "verbose") +class TestHistoryDisplay: + def test_history_numbers_only_visible_messages_and_summarizes_tools(self, capsys): + cli = _make_cli() + cli.conversation_history = [ + {"role": "system", "content": "system prompt"}, + {"role": "user", "content": "Hello"}, + { + "role": "assistant", + "content": None, + "tool_calls": [{"id": "call_1"}, {"id": "call_2"}], + }, + {"role": "tool", "content": "tool output 1"}, + {"role": "tool", "content": "tool output 2"}, + {"role": "assistant", "content": "All set."}, + {"role": "user", "content": "A" * 250}, + ] + + cli.show_history() + output = capsys.readouterr().out + + assert "[You #1]" in output + assert "[Hermes #2]" in output + assert "(requested 2 tool calls)" in output + assert "[Tools]" in output + assert "(2 tool messages hidden)" in output + assert "[Hermes #3]" in output + assert "[You #4]" in output + assert "[You #5]" not in output + assert "A" * 250 in output + assert "A" * 250 + "..." not in output + + class TestProviderResolution: def test_api_key_is_string_or_none(self): cli = _make_cli() diff --git a/tests/test_cli_model_command.py b/tests/test_cli_model_command.py new file mode 100644 index 000000000..b8b8e8d2d --- /dev/null +++ b/tests/test_cli_model_command.py @@ -0,0 +1,133 @@ +"""Regression tests for the `/model` slash command in the interactive CLI.""" + +from unittest.mock import patch, MagicMock + +from cli import HermesCLI + + +class TestModelCommand: + def _make_cli(self): + cli_obj = HermesCLI.__new__(HermesCLI) + cli_obj.model = "anthropic/claude-opus-4.6" + cli_obj.agent = object() + cli_obj.provider = "openrouter" + cli_obj.requested_provider = "openrouter" + cli_obj.base_url = "https://openrouter.ai/api/v1" + cli_obj.api_key = "test-key" + cli_obj._explicit_api_key = None + cli_obj._explicit_base_url = None + return cli_obj + + def test_valid_model_from_api_saved_to_config(self, capsys): + cli_obj = self._make_cli() + + with patch("hermes_cli.models.fetch_api_models", + return_value=["anthropic/claude-sonnet-4.5", "openai/gpt-5.4"]), \ + patch("cli.save_config_value", return_value=True) as save_mock: + cli_obj.process_command("/model anthropic/claude-sonnet-4.5") + + output = capsys.readouterr().out + assert "saved to config" in output + assert cli_obj.model == "anthropic/claude-sonnet-4.5" + save_mock.assert_called_once_with("model.default", "anthropic/claude-sonnet-4.5") + + def test_invalid_model_from_api_is_rejected(self, capsys): + cli_obj = self._make_cli() + + with patch("hermes_cli.models.fetch_api_models", + return_value=["anthropic/claude-opus-4.6"]), \ + patch("cli.save_config_value") as save_mock: + cli_obj.process_command("/model anthropic/fake-model") + + output = capsys.readouterr().out + assert "not a valid model" in output + assert "Model unchanged" in output + assert cli_obj.model == "anthropic/claude-opus-4.6" + save_mock.assert_not_called() + + def test_api_unreachable_falls_back_session_only(self, capsys): + cli_obj = self._make_cli() + + with patch("hermes_cli.models.fetch_api_models", return_value=None), \ + patch("cli.save_config_value") as save_mock: + cli_obj.process_command("/model anthropic/claude-sonnet-next") + + output = capsys.readouterr().out + assert "session only" in output + assert "will revert on restart" in output + assert cli_obj.model == "anthropic/claude-sonnet-next" + save_mock.assert_not_called() + + def test_no_slash_model_probes_api_and_rejects(self, capsys): + cli_obj = self._make_cli() + + with patch("hermes_cli.models.fetch_api_models", + return_value=["openai/gpt-5.4"]) as fetch_mock, \ + patch("cli.save_config_value") as save_mock: + cli_obj.process_command("/model gpt-5.4") + + output = capsys.readouterr().out + assert "not a valid model" in output + assert "Model unchanged" in output + assert cli_obj.model == "anthropic/claude-opus-4.6" # unchanged + assert cli_obj.agent is not None # not reset + save_mock.assert_not_called() + + def test_validation_crash_falls_back_to_save(self, capsys): + cli_obj = self._make_cli() + + with patch("hermes_cli.models.validate_requested_model", + side_effect=RuntimeError("boom")), \ + patch("cli.save_config_value", return_value=True) as save_mock: + cli_obj.process_command("/model anthropic/claude-sonnet-4.5") + + output = capsys.readouterr().out + assert "saved to config" in output + assert cli_obj.model == "anthropic/claude-sonnet-4.5" + save_mock.assert_called_once() + + def test_show_model_when_no_argument(self, capsys): + cli_obj = self._make_cli() + cli_obj.process_command("/model") + + output = capsys.readouterr().out + assert "anthropic/claude-opus-4.6" in output + assert "OpenRouter" in output + assert "Available models" in output + assert "provider:model-name" in output + + # -- provider switching tests ------------------------------------------- + + def test_provider_colon_model_switches_provider(self, capsys): + cli_obj = self._make_cli() + + with patch("hermes_cli.runtime_provider.resolve_runtime_provider", return_value={ + "provider": "zai", + "api_key": "zai-key", + "base_url": "https://api.z.ai/api/paas/v4", + }), \ + patch("hermes_cli.models.fetch_api_models", + return_value=["glm-5", "glm-4.7"]), \ + patch("cli.save_config_value", return_value=True) as save_mock: + cli_obj.process_command("/model zai:glm-5") + + output = capsys.readouterr().out + assert "glm-5" in output + assert "provider:" in output.lower() or "Z.AI" in output + assert cli_obj.model == "glm-5" + assert cli_obj.provider == "zai" + assert cli_obj.base_url == "https://api.z.ai/api/paas/v4" + # Both model and provider should be saved + assert save_mock.call_count == 2 + + def test_provider_switch_fails_on_bad_credentials(self, capsys): + cli_obj = self._make_cli() + + with patch("hermes_cli.runtime_provider.resolve_runtime_provider", + side_effect=Exception("No API key found")): + cli_obj.process_command("/model nous:hermes-3") + + output = capsys.readouterr().out + assert "Could not resolve credentials" in output + assert cli_obj.model == "anthropic/claude-opus-4.6" # unchanged + assert cli_obj.provider == "openrouter" # unchanged diff --git a/tests/test_cli_provider_resolution.py b/tests/test_cli_provider_resolution.py index 3c8fe14a5..cdae01d0c 100644 --- a/tests/test_cli_provider_resolution.py +++ b/tests/test_cli_provider_resolution.py @@ -162,6 +162,128 @@ def test_runtime_resolution_rebuilds_agent_on_routing_change(monkeypatch): assert shell.api_mode == "codex_responses" +def test_codex_provider_replaces_incompatible_default_model(monkeypatch): + """When provider resolves to openai-codex and no model was explicitly + chosen, the global config default (e.g. anthropic/claude-opus-4.6) must + be replaced with a Codex-compatible model. Fixes #651.""" + cli = _import_cli() + + monkeypatch.delenv("LLM_MODEL", raising=False) + monkeypatch.delenv("OPENAI_MODEL", raising=False) + + def _runtime_resolve(**kwargs): + return { + "provider": "openai-codex", + "api_mode": "codex_responses", + "base_url": "https://chatgpt.com/backend-api/codex", + "api_key": "test-key", + "source": "env/config", + } + + monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve) + monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc)) + monkeypatch.setattr( + "hermes_cli.codex_models.get_codex_model_ids", + lambda access_token=None: ["gpt-5.2-codex", "gpt-5.1-codex-mini"], + ) + + shell = cli.HermesCLI(compact=True, max_turns=1) + + assert shell._model_is_default is True + assert shell._ensure_runtime_credentials() is True + assert shell.provider == "openai-codex" + assert "anthropic" not in shell.model + assert "claude" not in shell.model + assert shell.model == "gpt-5.2-codex" + + +def test_codex_provider_replaces_incompatible_envvar_model(monkeypatch): + """Exact scenario from #651: LLM_MODEL is set to a non-Codex model and + provider resolves to openai-codex. The model must be replaced and a + warning printed since the user explicitly chose it.""" + cli = _import_cli() + + monkeypatch.setenv("LLM_MODEL", "claude-opus-4-6") + monkeypatch.delenv("OPENAI_MODEL", raising=False) + + def _runtime_resolve(**kwargs): + return { + "provider": "openai-codex", + "api_mode": "codex_responses", + "base_url": "https://chatgpt.com/backend-api/codex", + "api_key": "test-key", + "source": "env/config", + } + + monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve) + monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc)) + monkeypatch.setattr( + "hermes_cli.codex_models.get_codex_model_ids", + lambda access_token=None: ["gpt-5.2-codex", "gpt-5.1-codex-mini"], + ) + + shell = cli.HermesCLI(compact=True, max_turns=1) + + assert shell._model_is_default is False + assert shell._ensure_runtime_credentials() is True + assert shell.provider == "openai-codex" + assert "claude" not in shell.model + assert shell.model == "gpt-5.2-codex" + + +def test_codex_provider_preserves_explicit_codex_model(monkeypatch): + """If the user explicitly passes a Codex-compatible model, it must be + preserved even when the provider resolves to openai-codex.""" + cli = _import_cli() + + monkeypatch.delenv("LLM_MODEL", raising=False) + monkeypatch.delenv("OPENAI_MODEL", raising=False) + + def _runtime_resolve(**kwargs): + return { + "provider": "openai-codex", + "api_mode": "codex_responses", + "base_url": "https://chatgpt.com/backend-api/codex", + "api_key": "test-key", + "source": "env/config", + } + + monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve) + monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc)) + + shell = cli.HermesCLI(model="gpt-5.1-codex-mini", compact=True, max_turns=1) + + assert shell._model_is_default is False + assert shell._ensure_runtime_credentials() is True + assert shell.model == "gpt-5.1-codex-mini" + + +def test_codex_provider_strips_provider_prefix_from_model(monkeypatch): + """openai/gpt-5.3-codex should become gpt-5.3-codex — the Codex + Responses API does not accept provider-prefixed model slugs.""" + cli = _import_cli() + + monkeypatch.delenv("LLM_MODEL", raising=False) + monkeypatch.delenv("OPENAI_MODEL", raising=False) + + def _runtime_resolve(**kwargs): + return { + "provider": "openai-codex", + "api_mode": "codex_responses", + "base_url": "https://chatgpt.com/backend-api/codex", + "api_key": "test-key", + "source": "env/config", + } + + monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve) + monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc)) + + shell = cli.HermesCLI(model="openai/gpt-5.3-codex", compact=True, max_turns=1) + + assert shell._ensure_runtime_credentials() is True + assert shell.model == "gpt-5.3-codex" + + def test_cmd_model_falls_back_to_auto_on_invalid_provider(monkeypatch, capsys): monkeypatch.setattr( "hermes_cli.config.load_config", diff --git a/tests/test_codex_models.py b/tests/test_codex_models.py index e6cc2fdec..40a447a19 100644 --- a/tests/test_codex_models.py +++ b/tests/test_codex_models.py @@ -30,6 +30,14 @@ def test_get_codex_model_ids_prioritizes_default_and_cache(tmp_path, monkeypatch assert "gpt-5-hidden-codex" not in models +def test_setup_wizard_codex_import_resolves(): + """Regression test for #712: setup.py must import the correct function name.""" + # This mirrors the exact import used in hermes_cli/setup.py line 873. + # A prior bug had 'get_codex_models' (wrong) instead of 'get_codex_model_ids'. + from hermes_cli.codex_models import get_codex_model_ids as setup_import + assert callable(setup_import) + + def test_get_codex_model_ids_falls_back_to_curated_defaults(tmp_path, monkeypatch): codex_home = tmp_path / "codex-home" codex_home.mkdir(parents=True, exist_ok=True) diff --git a/tests/test_hermes_state.py b/tests/test_hermes_state.py index 734db494f..fcbaf2196 100644 --- a/tests/test_hermes_state.py +++ b/tests/test_hermes_state.py @@ -351,6 +351,173 @@ class TestPruneSessions: # Schema and WAL mode # ========================================================================= +# ========================================================================= +# Session title +# ========================================================================= + +class TestSessionTitle: + def test_set_and_get_title(self, db): + db.create_session(session_id="s1", source="cli") + assert db.set_session_title("s1", "My Session") is True + + session = db.get_session("s1") + assert session["title"] == "My Session" + + def test_set_title_nonexistent_session(self, db): + assert db.set_session_title("nonexistent", "Title") is False + + def test_title_initially_none(self, db): + db.create_session(session_id="s1", source="cli") + session = db.get_session("s1") + assert session["title"] is None + + def test_update_title(self, db): + db.create_session(session_id="s1", source="cli") + db.set_session_title("s1", "First Title") + db.set_session_title("s1", "Updated Title") + + session = db.get_session("s1") + assert session["title"] == "Updated Title" + + def test_title_in_search_sessions(self, db): + db.create_session(session_id="s1", source="cli") + db.set_session_title("s1", "Debugging Auth") + db.create_session(session_id="s2", source="cli") + + sessions = db.search_sessions() + titled = [s for s in sessions if s.get("title") == "Debugging Auth"] + assert len(titled) == 1 + assert titled[0]["id"] == "s1" + + def test_title_in_export(self, db): + db.create_session(session_id="s1", source="cli") + db.set_session_title("s1", "Export Test") + db.append_message("s1", role="user", content="Hello") + + export = db.export_session("s1") + assert export["title"] == "Export Test" + + def test_title_with_special_characters(self, db): + db.create_session(session_id="s1", source="cli") + title = "PR #438 — fixing the 'auth' middleware" + db.set_session_title("s1", title) + + session = db.get_session("s1") + assert session["title"] == title + + def test_title_empty_string_normalized_to_none(self, db): + """Empty strings are normalized to None (clearing the title).""" + db.create_session(session_id="s1", source="cli") + db.set_session_title("s1", "My Title") + # Setting to empty string should clear the title (normalize to None) + db.set_session_title("s1", "") + + session = db.get_session("s1") + assert session["title"] is None + + def test_multiple_empty_titles_no_conflict(self, db): + """Multiple sessions can have empty-string (normalized to NULL) titles.""" + db.create_session(session_id="s1", source="cli") + db.create_session(session_id="s2", source="cli") + db.set_session_title("s1", "") + db.set_session_title("s2", "") + # Both should be None, no uniqueness conflict + assert db.get_session("s1")["title"] is None + assert db.get_session("s2")["title"] is None + + def test_title_survives_end_session(self, db): + db.create_session(session_id="s1", source="cli") + db.set_session_title("s1", "Before End") + db.end_session("s1", end_reason="user_exit") + + session = db.get_session("s1") + assert session["title"] == "Before End" + assert session["ended_at"] is not None + + +class TestSanitizeTitle: + """Tests for SessionDB.sanitize_title() validation and cleaning.""" + + def test_normal_title_unchanged(self): + assert SessionDB.sanitize_title("My Project") == "My Project" + + def test_strips_whitespace(self): + assert SessionDB.sanitize_title(" hello world ") == "hello world" + + def test_collapses_internal_whitespace(self): + assert SessionDB.sanitize_title("hello world") == "hello world" + + def test_tabs_and_newlines_collapsed(self): + assert SessionDB.sanitize_title("hello\t\nworld") == "hello world" + + def test_none_returns_none(self): + assert SessionDB.sanitize_title(None) is None + + def test_empty_string_returns_none(self): + assert SessionDB.sanitize_title("") is None + + def test_whitespace_only_returns_none(self): + assert SessionDB.sanitize_title(" \t\n ") is None + + def test_control_chars_stripped(self): + # Null byte, bell, backspace, etc. + assert SessionDB.sanitize_title("hello\x00world") == "helloworld" + assert SessionDB.sanitize_title("\x07\x08test\x1b") == "test" + + def test_del_char_stripped(self): + assert SessionDB.sanitize_title("hello\x7fworld") == "helloworld" + + def test_zero_width_chars_stripped(self): + # Zero-width space (U+200B), zero-width joiner (U+200D) + assert SessionDB.sanitize_title("hello\u200bworld") == "helloworld" + assert SessionDB.sanitize_title("hello\u200dworld") == "helloworld" + + def test_rtl_override_stripped(self): + # Right-to-left override (U+202E) — used in filename spoofing attacks + assert SessionDB.sanitize_title("hello\u202eworld") == "helloworld" + + def test_bom_stripped(self): + # Byte order mark (U+FEFF) + assert SessionDB.sanitize_title("\ufeffhello") == "hello" + + def test_only_control_chars_returns_none(self): + assert SessionDB.sanitize_title("\x00\x01\x02\u200b\ufeff") is None + + def test_max_length_allowed(self): + title = "A" * 100 + assert SessionDB.sanitize_title(title) == title + + def test_exceeds_max_length_raises(self): + title = "A" * 101 + with pytest.raises(ValueError, match="too long"): + SessionDB.sanitize_title(title) + + def test_unicode_emoji_allowed(self): + assert SessionDB.sanitize_title("🚀 My Project 🎉") == "🚀 My Project 🎉" + + def test_cjk_characters_allowed(self): + assert SessionDB.sanitize_title("我的项目") == "我的项目" + + def test_accented_characters_allowed(self): + assert SessionDB.sanitize_title("Résumé éditing") == "Résumé éditing" + + def test_special_punctuation_allowed(self): + title = "PR #438 — fixing the 'auth' middleware" + assert SessionDB.sanitize_title(title) == title + + def test_sanitize_applied_in_set_session_title(self, db): + """set_session_title applies sanitize_title internally.""" + db.create_session("s1", "cli") + db.set_session_title("s1", " hello\x00 world ") + assert db.get_session("s1")["title"] == "hello world" + + def test_too_long_title_rejected_by_set(self, db): + """set_session_title raises ValueError for overly long titles.""" + db.create_session("s1", "cli") + with pytest.raises(ValueError, match="too long"): + db.set_session_title("s1", "X" * 150) + + class TestSchemaInit: def test_wal_mode(self, db): cursor = db._conn.execute("PRAGMA journal_mode") @@ -373,4 +540,297 @@ class TestSchemaInit: def test_schema_version(self, db): cursor = db._conn.execute("SELECT version FROM schema_version") version = cursor.fetchone()[0] - assert version == 2 + assert version == 4 + + def test_title_column_exists(self, db): + """Verify the title column was created in the sessions table.""" + cursor = db._conn.execute("PRAGMA table_info(sessions)") + columns = {row[1] for row in cursor.fetchall()} + assert "title" in columns + + def test_migration_from_v2(self, tmp_path): + """Simulate a v2 database and verify migration adds title column.""" + import sqlite3 + + db_path = tmp_path / "migrate_test.db" + conn = sqlite3.connect(str(db_path)) + # Create v2 schema (without title column) + conn.executescript(""" + CREATE TABLE schema_version (version INTEGER NOT NULL); + INSERT INTO schema_version (version) VALUES (2); + + CREATE TABLE sessions ( + id TEXT PRIMARY KEY, + source TEXT NOT NULL, + user_id TEXT, + model TEXT, + model_config TEXT, + system_prompt TEXT, + parent_session_id TEXT, + started_at REAL NOT NULL, + ended_at REAL, + end_reason TEXT, + message_count INTEGER DEFAULT 0, + tool_call_count INTEGER DEFAULT 0, + input_tokens INTEGER DEFAULT 0, + output_tokens INTEGER DEFAULT 0 + ); + + CREATE TABLE messages ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + session_id TEXT NOT NULL, + role TEXT NOT NULL, + content TEXT, + tool_call_id TEXT, + tool_calls TEXT, + tool_name TEXT, + timestamp REAL NOT NULL, + token_count INTEGER, + finish_reason TEXT + ); + """) + conn.execute( + "INSERT INTO sessions (id, source, started_at) VALUES (?, ?, ?)", + ("existing", "cli", 1000.0), + ) + conn.commit() + conn.close() + + # Open with SessionDB — should migrate to v4 + migrated_db = SessionDB(db_path=db_path) + + # Verify migration + cursor = migrated_db._conn.execute("SELECT version FROM schema_version") + assert cursor.fetchone()[0] == 4 + + # Verify title column exists and is NULL for existing sessions + session = migrated_db.get_session("existing") + assert session is not None + assert session["title"] is None + + # Verify we can set title on migrated session + assert migrated_db.set_session_title("existing", "Migrated Title") is True + session = migrated_db.get_session("existing") + assert session["title"] == "Migrated Title" + + migrated_db.close() + + +class TestTitleUniqueness: + """Tests for unique title enforcement and title-based lookups.""" + + def test_duplicate_title_raises(self, db): + """Setting a title already used by another session raises ValueError.""" + db.create_session("s1", "cli") + db.create_session("s2", "cli") + db.set_session_title("s1", "my project") + with pytest.raises(ValueError, match="already in use"): + db.set_session_title("s2", "my project") + + def test_same_session_can_keep_title(self, db): + """A session can re-set its own title without error.""" + db.create_session("s1", "cli") + db.set_session_title("s1", "my project") + # Should not raise — it's the same session + assert db.set_session_title("s1", "my project") is True + + def test_null_titles_not_unique(self, db): + """Multiple sessions can have NULL titles (no constraint violation).""" + db.create_session("s1", "cli") + db.create_session("s2", "cli") + # Both have NULL titles — no error + assert db.get_session("s1")["title"] is None + assert db.get_session("s2")["title"] is None + + def test_get_session_by_title(self, db): + db.create_session("s1", "cli") + db.set_session_title("s1", "refactoring auth") + result = db.get_session_by_title("refactoring auth") + assert result is not None + assert result["id"] == "s1" + + def test_get_session_by_title_not_found(self, db): + assert db.get_session_by_title("nonexistent") is None + + def test_get_session_title(self, db): + db.create_session("s1", "cli") + assert db.get_session_title("s1") is None + db.set_session_title("s1", "my title") + assert db.get_session_title("s1") == "my title" + + def test_get_session_title_nonexistent(self, db): + assert db.get_session_title("nonexistent") is None + + +class TestTitleLineage: + """Tests for title lineage resolution and auto-numbering.""" + + def test_resolve_exact_title(self, db): + db.create_session("s1", "cli") + db.set_session_title("s1", "my project") + assert db.resolve_session_by_title("my project") == "s1" + + def test_resolve_returns_latest_numbered(self, db): + """When numbered variants exist, return the most recent one.""" + import time + db.create_session("s1", "cli") + db.set_session_title("s1", "my project") + time.sleep(0.01) + db.create_session("s2", "cli") + db.set_session_title("s2", "my project #2") + time.sleep(0.01) + db.create_session("s3", "cli") + db.set_session_title("s3", "my project #3") + # Resolving "my project" should return s3 (latest numbered variant) + assert db.resolve_session_by_title("my project") == "s3" + + def test_resolve_exact_numbered(self, db): + """Resolving an exact numbered title returns that specific session.""" + db.create_session("s1", "cli") + db.set_session_title("s1", "my project") + db.create_session("s2", "cli") + db.set_session_title("s2", "my project #2") + # Resolving "my project #2" exactly should return s2 + assert db.resolve_session_by_title("my project #2") == "s2" + + def test_resolve_nonexistent_title(self, db): + assert db.resolve_session_by_title("nonexistent") is None + + def test_next_title_no_existing(self, db): + """With no existing sessions, base title is returned as-is.""" + assert db.get_next_title_in_lineage("my project") == "my project" + + def test_next_title_first_continuation(self, db): + """First continuation after the original gets #2.""" + db.create_session("s1", "cli") + db.set_session_title("s1", "my project") + assert db.get_next_title_in_lineage("my project") == "my project #2" + + def test_next_title_increments(self, db): + """Each continuation increments the number.""" + db.create_session("s1", "cli") + db.set_session_title("s1", "my project") + db.create_session("s2", "cli") + db.set_session_title("s2", "my project #2") + db.create_session("s3", "cli") + db.set_session_title("s3", "my project #3") + assert db.get_next_title_in_lineage("my project") == "my project #4" + + def test_next_title_strips_existing_number(self, db): + """Passing a numbered title strips the number and finds the base.""" + db.create_session("s1", "cli") + db.set_session_title("s1", "my project") + db.create_session("s2", "cli") + db.set_session_title("s2", "my project #2") + # Even when called with "my project #2", it should return #3 + assert db.get_next_title_in_lineage("my project #2") == "my project #3" + + +class TestTitleSqlWildcards: + """Titles containing SQL LIKE wildcards (%, _) must not cause false matches.""" + + def test_resolve_title_with_underscore(self, db): + """A title like 'test_project' should not match 'testXproject #2'.""" + db.create_session("s1", "cli") + db.set_session_title("s1", "test_project") + db.create_session("s2", "cli") + db.set_session_title("s2", "testXproject #2") + # Resolving "test_project" should return s1 (exact), not s2 + assert db.resolve_session_by_title("test_project") == "s1" + + def test_resolve_title_with_percent(self, db): + """A title with '%' should not wildcard-match unrelated sessions.""" + db.create_session("s1", "cli") + db.set_session_title("s1", "100% done") + db.create_session("s2", "cli") + db.set_session_title("s2", "100X done #2") + # Should resolve to s1 (exact), not s2 + assert db.resolve_session_by_title("100% done") == "s1" + + def test_next_lineage_with_underscore(self, db): + """get_next_title_in_lineage with underscores doesn't match wrong sessions.""" + db.create_session("s1", "cli") + db.set_session_title("s1", "test_project") + db.create_session("s2", "cli") + db.set_session_title("s2", "testXproject #2") + # Only "test_project" exists, so next should be "test_project #2" + assert db.get_next_title_in_lineage("test_project") == "test_project #2" + + +class TestListSessionsRich: + """Tests for enhanced session listing with preview and last_active.""" + + def test_preview_from_first_user_message(self, db): + db.create_session("s1", "cli") + db.append_message("s1", "system", "You are a helpful assistant.") + db.append_message("s1", "user", "Help me refactor the auth module please") + db.append_message("s1", "assistant", "Sure, let me look at it.") + sessions = db.list_sessions_rich() + assert len(sessions) == 1 + assert "Help me refactor the auth module" in sessions[0]["preview"] + + def test_preview_truncated_at_60(self, db): + db.create_session("s1", "cli") + long_msg = "A" * 100 + db.append_message("s1", "user", long_msg) + sessions = db.list_sessions_rich() + assert len(sessions[0]["preview"]) == 63 # 60 chars + "..." + assert sessions[0]["preview"].endswith("...") + + def test_preview_empty_when_no_user_messages(self, db): + db.create_session("s1", "cli") + db.append_message("s1", "system", "System prompt") + sessions = db.list_sessions_rich() + assert sessions[0]["preview"] == "" + + def test_last_active_from_latest_message(self, db): + import time + db.create_session("s1", "cli") + db.append_message("s1", "user", "Hello") + time.sleep(0.01) + db.append_message("s1", "assistant", "Hi there!") + sessions = db.list_sessions_rich() + # last_active should be close to now (the assistant message) + assert sessions[0]["last_active"] > sessions[0]["started_at"] + + def test_last_active_fallback_to_started_at(self, db): + db.create_session("s1", "cli") + sessions = db.list_sessions_rich() + # No messages, so last_active falls back to started_at + assert sessions[0]["last_active"] == sessions[0]["started_at"] + + def test_rich_list_includes_title(self, db): + db.create_session("s1", "cli") + db.set_session_title("s1", "refactoring auth") + sessions = db.list_sessions_rich() + assert sessions[0]["title"] == "refactoring auth" + + def test_rich_list_source_filter(self, db): + db.create_session("s1", "cli") + db.create_session("s2", "telegram") + sessions = db.list_sessions_rich(source="cli") + assert len(sessions) == 1 + assert sessions[0]["id"] == "s1" + + def test_preview_newlines_collapsed(self, db): + db.create_session("s1", "cli") + db.append_message("s1", "user", "Line one\nLine two\nLine three") + sessions = db.list_sessions_rich() + assert "\n" not in sessions[0]["preview"] + assert "Line one Line two" in sessions[0]["preview"] + + +class TestResolveSessionByNameOrId: + """Tests for the main.py helper that resolves names or IDs.""" + + def test_resolve_by_id(self, db): + db.create_session("test-id-123", "cli") + session = db.get_session("test-id-123") + assert session is not None + assert session["id"] == "test-id-123" + + def test_resolve_by_title_falls_back(self, db): + db.create_session("s1", "cli") + db.set_session_title("s1", "my project") + result = db.resolve_session_by_title("my project") + assert result == "s1" diff --git a/tests/test_provider_parity.py b/tests/test_provider_parity.py index 00fc4dd9b..2ee313144 100644 --- a/tests/test_provider_parity.py +++ b/tests/test_provider_parity.py @@ -145,7 +145,7 @@ class TestBuildApiKwargsCodex: messages = [{"role": "user", "content": "hi"}] kwargs = agent._build_api_kwargs(messages) assert "reasoning" in kwargs - assert kwargs["reasoning"]["effort"] == "xhigh" + assert kwargs["reasoning"]["effort"] == "medium" def test_includes_encrypted_content_in_include(self, monkeypatch): agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", @@ -596,19 +596,19 @@ class TestCodexReasoningPreflight: # ── Reasoning effort consistency tests ─────────────────────────────────────── class TestReasoningEffortDefaults: - """Verify reasoning effort defaults to xhigh across all provider paths.""" + """Verify reasoning effort defaults to medium across all provider paths.""" - def test_openrouter_default_xhigh(self, monkeypatch): + def test_openrouter_default_medium(self, monkeypatch): agent = _make_agent(monkeypatch, "openrouter") kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) reasoning = kwargs["extra_body"]["reasoning"] - assert reasoning["effort"] == "xhigh" + assert reasoning["effort"] == "medium" - def test_codex_default_xhigh(self, monkeypatch): + def test_codex_default_medium(self, monkeypatch): agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", base_url="https://chatgpt.com/backend-api/codex") kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) - assert kwargs["reasoning"]["effort"] == "xhigh" + assert kwargs["reasoning"]["effort"] == "medium" def test_codex_reasoning_disabled(self, monkeypatch): agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index ae7924d45..55f96f942 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -280,22 +280,21 @@ class TestMaskApiKey: class TestInit: - def test_anthropic_base_url_fails_fast(self): - """Anthropic native endpoints should error before building an OpenAI client.""" + def test_anthropic_base_url_accepted(self): + """Anthropic base URLs should be accepted (OpenAI-compatible endpoint).""" with ( patch("run_agent.get_tool_definitions", return_value=[]), patch("run_agent.check_toolset_requirements", return_value={}), patch("run_agent.OpenAI") as mock_openai, ): - with pytest.raises(ValueError, match="not supported yet"): - AIAgent( - api_key="test-key-1234567890", - base_url="https://api.anthropic.com/v1/messages", - quiet_mode=True, - skip_context_files=True, - skip_memory=True, - ) - mock_openai.assert_not_called() + AIAgent( + api_key="test-key-1234567890", + base_url="https://api.anthropic.com/v1/", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + mock_openai.assert_called_once() def test_prompt_caching_claude_openrouter(self): """Claude model via OpenRouter should enable prompt caching.""" @@ -498,12 +497,12 @@ class TestBuildApiKwargs: assert kwargs["extra_body"]["provider"]["only"] == ["Anthropic"] def test_reasoning_config_default_openrouter(self, agent): - """Default reasoning config for OpenRouter should be xhigh.""" + """Default reasoning config for OpenRouter should be medium.""" messages = [{"role": "user", "content": "hi"}] kwargs = agent._build_api_kwargs(messages) reasoning = kwargs["extra_body"]["reasoning"] assert reasoning["enabled"] is True - assert reasoning["effort"] == "xhigh" + assert reasoning["effort"] == "medium" def test_reasoning_config_custom(self, agent): agent.reasoning_config = {"enabled": False} diff --git a/tests/test_worktree.py b/tests/test_worktree.py new file mode 100644 index 000000000..f545baa39 --- /dev/null +++ b/tests/test_worktree.py @@ -0,0 +1,635 @@ +"""Tests for git worktree isolation (CLI --worktree / -w flag). + +Verifies worktree creation, cleanup, .worktreeinclude handling, +.gitignore management, and integration with the CLI. (#652) +""" + +import os +import shutil +import subprocess +import pytest +from pathlib import Path +from unittest.mock import patch, MagicMock + + +@pytest.fixture +def git_repo(tmp_path): + """Create a temporary git repo for testing.""" + repo = tmp_path / "test-repo" + repo.mkdir() + subprocess.run(["git", "init"], cwd=repo, capture_output=True) + subprocess.run( + ["git", "config", "user.email", "test@test.com"], + cwd=repo, capture_output=True, + ) + subprocess.run( + ["git", "config", "user.name", "Test"], + cwd=repo, capture_output=True, + ) + # Create initial commit (worktrees need at least one commit) + (repo / "README.md").write_text("# Test Repo\n") + subprocess.run(["git", "add", "."], cwd=repo, capture_output=True) + subprocess.run( + ["git", "commit", "-m", "Initial commit"], + cwd=repo, capture_output=True, + ) + return repo + + +# --------------------------------------------------------------------------- +# Lightweight reimplementations for testing (avoid importing cli.py) +# --------------------------------------------------------------------------- + +def _git_repo_root(cwd=None): + """Test version of _git_repo_root.""" + try: + result = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + capture_output=True, text=True, timeout=5, + cwd=cwd, + ) + if result.returncode == 0: + return result.stdout.strip() + except Exception: + pass + return None + + +def _setup_worktree(repo_root): + """Test version of _setup_worktree — creates a worktree.""" + import uuid + short_id = uuid.uuid4().hex[:8] + wt_name = f"hermes-{short_id}" + branch_name = f"hermes/{wt_name}" + + worktrees_dir = Path(repo_root) / ".worktrees" + worktrees_dir.mkdir(parents=True, exist_ok=True) + wt_path = worktrees_dir / wt_name + + result = subprocess.run( + ["git", "worktree", "add", str(wt_path), "-b", branch_name, "HEAD"], + capture_output=True, text=True, timeout=30, cwd=repo_root, + ) + if result.returncode != 0: + return None + + return { + "path": str(wt_path), + "branch": branch_name, + "repo_root": repo_root, + } + + +def _cleanup_worktree(info): + """Test version of _cleanup_worktree.""" + wt_path = info["path"] + branch = info["branch"] + repo_root = info["repo_root"] + + if not Path(wt_path).exists(): + return + + # Check for uncommitted changes + status = subprocess.run( + ["git", "status", "--porcelain"], + capture_output=True, text=True, timeout=10, cwd=wt_path, + ) + has_changes = bool(status.stdout.strip()) + + if has_changes: + return False # Did not clean up + + subprocess.run( + ["git", "worktree", "remove", wt_path, "--force"], + capture_output=True, text=True, timeout=15, cwd=repo_root, + ) + subprocess.run( + ["git", "branch", "-D", branch], + capture_output=True, text=True, timeout=10, cwd=repo_root, + ) + return True # Cleaned up + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + +class TestGitRepoDetection: + """Test git repo root detection.""" + + def test_detects_git_repo(self, git_repo): + root = _git_repo_root(cwd=str(git_repo)) + assert root is not None + assert Path(root).resolve() == git_repo.resolve() + + def test_detects_subdirectory(self, git_repo): + subdir = git_repo / "src" / "lib" + subdir.mkdir(parents=True) + root = _git_repo_root(cwd=str(subdir)) + assert root is not None + assert Path(root).resolve() == git_repo.resolve() + + def test_returns_none_outside_repo(self, tmp_path): + # tmp_path itself is not a git repo + bare_dir = tmp_path / "not-a-repo" + bare_dir.mkdir() + root = _git_repo_root(cwd=str(bare_dir)) + assert root is None + + +class TestWorktreeCreation: + """Test worktree setup.""" + + def test_creates_worktree(self, git_repo): + info = _setup_worktree(str(git_repo)) + assert info is not None + assert Path(info["path"]).exists() + assert info["branch"].startswith("hermes/hermes-") + assert info["repo_root"] == str(git_repo) + + # Verify it's a valid git worktree + result = subprocess.run( + ["git", "rev-parse", "--is-inside-work-tree"], + capture_output=True, text=True, cwd=info["path"], + ) + assert result.stdout.strip() == "true" + + def test_worktree_has_own_branch(self, git_repo): + info = _setup_worktree(str(git_repo)) + assert info is not None + + # Check branch name in worktree + result = subprocess.run( + ["git", "branch", "--show-current"], + capture_output=True, text=True, cwd=info["path"], + ) + assert result.stdout.strip() == info["branch"] + + def test_worktree_is_independent(self, git_repo): + """Two worktrees from the same repo are independent.""" + info1 = _setup_worktree(str(git_repo)) + info2 = _setup_worktree(str(git_repo)) + assert info1 is not None + assert info2 is not None + assert info1["path"] != info2["path"] + assert info1["branch"] != info2["branch"] + + # Create a file in worktree 1 + (Path(info1["path"]) / "only-in-wt1.txt").write_text("hello") + + # It should NOT appear in worktree 2 + assert not (Path(info2["path"]) / "only-in-wt1.txt").exists() + + def test_worktrees_dir_created(self, git_repo): + info = _setup_worktree(str(git_repo)) + assert info is not None + assert (git_repo / ".worktrees").is_dir() + + def test_worktree_has_repo_files(self, git_repo): + """Worktree should contain the repo's tracked files.""" + info = _setup_worktree(str(git_repo)) + assert info is not None + assert (Path(info["path"]) / "README.md").exists() + + +class TestWorktreeCleanup: + """Test worktree cleanup on exit.""" + + def test_clean_worktree_removed(self, git_repo): + info = _setup_worktree(str(git_repo)) + assert info is not None + assert Path(info["path"]).exists() + + result = _cleanup_worktree(info) + assert result is True + assert not Path(info["path"]).exists() + + def test_dirty_worktree_kept(self, git_repo): + info = _setup_worktree(str(git_repo)) + assert info is not None + + # Make uncommitted changes + (Path(info["path"]) / "new-file.txt").write_text("uncommitted") + subprocess.run( + ["git", "add", "new-file.txt"], + cwd=info["path"], capture_output=True, + ) + + result = _cleanup_worktree(info) + assert result is False + assert Path(info["path"]).exists() # Still there + + def test_branch_deleted_on_cleanup(self, git_repo): + info = _setup_worktree(str(git_repo)) + branch = info["branch"] + + _cleanup_worktree(info) + + # Branch should be gone + result = subprocess.run( + ["git", "branch", "--list", branch], + capture_output=True, text=True, cwd=str(git_repo), + ) + assert branch not in result.stdout + + def test_cleanup_nonexistent_worktree(self, git_repo): + """Cleanup should handle already-removed worktrees gracefully.""" + info = { + "path": str(git_repo / ".worktrees" / "nonexistent"), + "branch": "hermes/nonexistent", + "repo_root": str(git_repo), + } + # Should not raise + _cleanup_worktree(info) + + +class TestWorktreeInclude: + """Test .worktreeinclude file handling.""" + + def test_copies_included_files(self, git_repo): + """Files listed in .worktreeinclude should be copied to the worktree.""" + # Create a .env file (gitignored) + (git_repo / ".env").write_text("SECRET=abc123") + (git_repo / ".gitignore").write_text(".env\n.worktrees/\n") + subprocess.run( + ["git", "add", ".gitignore"], + cwd=str(git_repo), capture_output=True, + ) + subprocess.run( + ["git", "commit", "-m", "Add gitignore"], + cwd=str(git_repo), capture_output=True, + ) + + # Create .worktreeinclude + (git_repo / ".worktreeinclude").write_text(".env\n") + + # Import and use the real _setup_worktree logic for include handling + info = _setup_worktree(str(git_repo)) + assert info is not None + + # Manually copy .worktreeinclude entries (mirrors cli.py logic) + import shutil + include_file = git_repo / ".worktreeinclude" + wt_path = Path(info["path"]) + for line in include_file.read_text().splitlines(): + entry = line.strip() + if not entry or entry.startswith("#"): + continue + src = git_repo / entry + dst = wt_path / entry + if src.is_file(): + dst.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(str(src), str(dst)) + + # Verify .env was copied + assert (wt_path / ".env").exists() + assert (wt_path / ".env").read_text() == "SECRET=abc123" + + def test_ignores_comments_and_blanks(self, git_repo): + """Comments and blank lines in .worktreeinclude should be skipped.""" + (git_repo / ".worktreeinclude").write_text( + "# This is a comment\n" + "\n" + " # Another comment\n" + ) + info = _setup_worktree(str(git_repo)) + assert info is not None + # Should not crash — just skip all lines + + +class TestGitignoreManagement: + """Test that .worktrees/ is added to .gitignore.""" + + def test_adds_to_gitignore(self, git_repo): + """Creating a worktree should add .worktrees/ to .gitignore.""" + # Remove any existing .gitignore + gitignore = git_repo / ".gitignore" + if gitignore.exists(): + gitignore.unlink() + + info = _setup_worktree(str(git_repo)) + assert info is not None + + # Now manually add .worktrees/ to .gitignore (mirrors cli.py logic) + _ignore_entry = ".worktrees/" + existing = gitignore.read_text() if gitignore.exists() else "" + if _ignore_entry not in existing.splitlines(): + with open(gitignore, "a") as f: + if existing and not existing.endswith("\n"): + f.write("\n") + f.write(f"{_ignore_entry}\n") + + content = gitignore.read_text() + assert ".worktrees/" in content + + def test_does_not_duplicate_gitignore_entry(self, git_repo): + """If .worktrees/ is already in .gitignore, don't add again.""" + gitignore = git_repo / ".gitignore" + gitignore.write_text(".worktrees/\n") + + # The check should see it's already there + existing = gitignore.read_text() + assert ".worktrees/" in existing.splitlines() + + +class TestMultipleWorktrees: + """Test running multiple worktrees concurrently (the core use case).""" + + def test_ten_concurrent_worktrees(self, git_repo): + """Create 10 worktrees — simulating 10 parallel agents.""" + worktrees = [] + for _ in range(10): + info = _setup_worktree(str(git_repo)) + assert info is not None + worktrees.append(info) + + # All should exist and be independent + paths = [info["path"] for info in worktrees] + assert len(set(paths)) == 10 # All unique + + # Each should have the repo files + for info in worktrees: + assert (Path(info["path"]) / "README.md").exists() + + # Edit a file in one worktree + (Path(worktrees[0]["path"]) / "README.md").write_text("Modified in wt0") + + # Others should be unaffected + for info in worktrees[1:]: + assert (Path(info["path"]) / "README.md").read_text() == "# Test Repo\n" + + # List worktrees via git + result = subprocess.run( + ["git", "worktree", "list"], + capture_output=True, text=True, cwd=str(git_repo), + ) + # Should have 11 entries: main + 10 worktrees + lines = [l for l in result.stdout.strip().splitlines() if l.strip()] + assert len(lines) == 11 + + # Cleanup all + for info in worktrees: + # Discard changes first so cleanup works + subprocess.run( + ["git", "checkout", "--", "."], + cwd=info["path"], capture_output=True, + ) + _cleanup_worktree(info) + + # All should be removed + for info in worktrees: + assert not Path(info["path"]).exists() + + +class TestWorktreeDirectorySymlink: + """Test .worktreeinclude with directories (symlinked).""" + + def test_symlinks_directory(self, git_repo): + """Directories in .worktreeinclude should be symlinked.""" + # Create a .venv directory + venv_dir = git_repo / ".venv" / "lib" + venv_dir.mkdir(parents=True) + (venv_dir / "marker.txt").write_text("venv marker") + (git_repo / ".gitignore").write_text(".venv/\n.worktrees/\n") + subprocess.run( + ["git", "add", ".gitignore"], cwd=str(git_repo), capture_output=True + ) + subprocess.run( + ["git", "commit", "-m", "gitignore"], cwd=str(git_repo), capture_output=True + ) + + (git_repo / ".worktreeinclude").write_text(".venv/\n") + + info = _setup_worktree(str(git_repo)) + assert info is not None + + wt_path = Path(info["path"]) + src = git_repo / ".venv" + dst = wt_path / ".venv" + + # Manually symlink (mirrors cli.py logic) + if not dst.exists(): + dst.parent.mkdir(parents=True, exist_ok=True) + os.symlink(str(src.resolve()), str(dst)) + + assert dst.is_symlink() + assert (dst / "lib" / "marker.txt").read_text() == "venv marker" + + +class TestStaleWorktreePruning: + """Test _prune_stale_worktrees garbage collection.""" + + def test_prunes_old_clean_worktree(self, git_repo): + """Old clean worktrees should be removed on prune.""" + import time + + info = _setup_worktree(str(git_repo)) + assert info is not None + assert Path(info["path"]).exists() + + # Make the worktree look old (set mtime to 25h ago) + old_time = time.time() - (25 * 3600) + os.utime(info["path"], (old_time, old_time)) + + # Reimplementation of prune logic (matches cli.py) + worktrees_dir = git_repo / ".worktrees" + cutoff = time.time() - (24 * 3600) + + for entry in worktrees_dir.iterdir(): + if not entry.is_dir() or not entry.name.startswith("hermes-"): + continue + try: + mtime = entry.stat().st_mtime + if mtime > cutoff: + continue + except Exception: + continue + + status = subprocess.run( + ["git", "status", "--porcelain"], + capture_output=True, text=True, timeout=5, cwd=str(entry), + ) + if status.stdout.strip(): + continue + + branch_result = subprocess.run( + ["git", "branch", "--show-current"], + capture_output=True, text=True, timeout=5, cwd=str(entry), + ) + branch = branch_result.stdout.strip() + subprocess.run( + ["git", "worktree", "remove", str(entry), "--force"], + capture_output=True, text=True, timeout=15, cwd=str(git_repo), + ) + if branch: + subprocess.run( + ["git", "branch", "-D", branch], + capture_output=True, text=True, timeout=10, cwd=str(git_repo), + ) + + assert not Path(info["path"]).exists() + + def test_keeps_recent_worktree(self, git_repo): + """Recent worktrees should NOT be pruned.""" + import time + + info = _setup_worktree(str(git_repo)) + assert info is not None + + # Don't modify mtime — it's recent + worktrees_dir = git_repo / ".worktrees" + cutoff = time.time() - (24 * 3600) + + pruned = False + for entry in worktrees_dir.iterdir(): + if not entry.is_dir() or not entry.name.startswith("hermes-"): + continue + mtime = entry.stat().st_mtime + if mtime > cutoff: + continue # Too recent + pruned = True + + assert not pruned + assert Path(info["path"]).exists() + + def test_keeps_dirty_old_worktree(self, git_repo): + """Old worktrees with uncommitted changes should NOT be pruned.""" + import time + + info = _setup_worktree(str(git_repo)) + assert info is not None + + # Make it dirty + (Path(info["path"]) / "dirty.txt").write_text("uncommitted") + subprocess.run( + ["git", "add", "dirty.txt"], + cwd=info["path"], capture_output=True, + ) + + # Make it old + old_time = time.time() - (25 * 3600) + os.utime(info["path"], (old_time, old_time)) + + # Check if it would be pruned + status = subprocess.run( + ["git", "status", "--porcelain"], + capture_output=True, text=True, cwd=info["path"], + ) + has_changes = bool(status.stdout.strip()) + assert has_changes # Should be dirty → not pruned + assert Path(info["path"]).exists() + + +class TestEdgeCases: + """Test edge cases for robustness.""" + + def test_no_commits_repo(self, tmp_path): + """Worktree creation should fail gracefully on a repo with no commits.""" + repo = tmp_path / "empty-repo" + repo.mkdir() + subprocess.run(["git", "init"], cwd=str(repo), capture_output=True) + + info = _setup_worktree(str(repo)) + assert info is None # Should fail gracefully + + def test_not_a_git_repo(self, tmp_path): + """Repo detection should return None for non-git directories.""" + bare = tmp_path / "not-git" + bare.mkdir() + root = _git_repo_root(cwd=str(bare)) + assert root is None + + def test_worktrees_dir_already_exists(self, git_repo): + """Should work fine if .worktrees/ already exists.""" + (git_repo / ".worktrees").mkdir(exist_ok=True) + info = _setup_worktree(str(git_repo)) + assert info is not None + assert Path(info["path"]).exists() + + +class TestCLIFlagLogic: + """Test the flag/config OR logic from main().""" + + def test_worktree_flag_triggers(self): + """--worktree flag should trigger worktree creation.""" + worktree = True + w = False + config_worktree = False + use_worktree = worktree or w or config_worktree + assert use_worktree + + def test_w_flag_triggers(self): + """-w flag should trigger worktree creation.""" + worktree = False + w = True + config_worktree = False + use_worktree = worktree or w or config_worktree + assert use_worktree + + def test_config_triggers(self): + """worktree: true in config should trigger worktree creation.""" + worktree = False + w = False + config_worktree = True + use_worktree = worktree or w or config_worktree + assert use_worktree + + def test_none_set_no_trigger(self): + """No flags and no config should not trigger.""" + worktree = False + w = False + config_worktree = False + use_worktree = worktree or w or config_worktree + assert not use_worktree + + +class TestTerminalCWDIntegration: + """Test that TERMINAL_CWD is correctly set to the worktree path.""" + + def test_terminal_cwd_set(self, git_repo): + """After worktree setup, TERMINAL_CWD should point to the worktree.""" + info = _setup_worktree(str(git_repo)) + assert info is not None + + # This is what main() does: + os.environ["TERMINAL_CWD"] = info["path"] + assert os.environ["TERMINAL_CWD"] == info["path"] + assert Path(os.environ["TERMINAL_CWD"]).exists() + + # Clean up env + del os.environ["TERMINAL_CWD"] + + def test_terminal_cwd_is_valid_git_repo(self, git_repo): + """The TERMINAL_CWD worktree should be a valid git working tree.""" + info = _setup_worktree(str(git_repo)) + assert info is not None + + result = subprocess.run( + ["git", "rev-parse", "--is-inside-work-tree"], + capture_output=True, text=True, cwd=info["path"], + ) + assert result.stdout.strip() == "true" + + +class TestSystemPromptInjection: + """Test that the agent gets worktree context in its system prompt.""" + + def test_prompt_note_format(self, git_repo): + """Verify the system prompt note contains all required info.""" + info = _setup_worktree(str(git_repo)) + assert info is not None + + # This is what main() does: + wt_note = ( + f"\n\n[System note: You are working in an isolated git worktree at " + f"{info['path']}. Your branch is `{info['branch']}`. " + f"Changes here do not affect the main working tree or other agents. " + f"Remember to commit and push your changes, and create a PR if appropriate. " + f"The original repo is at {info['repo_root']}.]" + ) + + assert info["path"] in wt_note + assert info["branch"] in wt_note + assert info["repo_root"] in wt_note + assert "isolated git worktree" in wt_note + assert "commit and push" in wt_note diff --git a/tests/tools/test_clipboard.py b/tests/tools/test_clipboard.py index dc064e6ca..19be40125 100644 --- a/tests/tools/test_clipboard.py +++ b/tests/tools/test_clipboard.py @@ -550,14 +550,13 @@ class TestConvertToPng: """BMP file should still be reported as success if no converter available.""" dest = tmp_path / "img.png" dest.write_bytes(FAKE_BMP) # it's a BMP but named .png - # Both Pillow and ImageMagick fail - with patch("hermes_cli.clipboard.subprocess.run", side_effect=FileNotFoundError): - # Pillow import fails - with pytest.raises(Exception): - from PIL import Image # noqa — this may or may not work - # The function should still return True if file exists and has content - # (raw BMP is better than nothing) - assert dest.exists() and dest.stat().st_size > 0 + # Both Pillow and ImageMagick unavailable + with patch.dict(sys.modules, {"PIL": None, "PIL.Image": None}): + with patch("hermes_cli.clipboard.subprocess.run", side_effect=FileNotFoundError): + result = _convert_to_png(dest) + # Raw BMP is better than nothing — function should return True + assert result is True + assert dest.exists() and dest.stat().st_size > 0 def test_imagemagick_failure_preserves_original(self, tmp_path): """When ImageMagick convert fails, the original file must not be lost.""" @@ -647,11 +646,11 @@ class TestHasClipboardImage: # ═════════════════════════════════════════════════════════════════════════ -# Level 2: _build_multimodal_content — image → OpenAI vision format +# Level 2: _preprocess_images_with_vision — image → text via vision tool # ═════════════════════════════════════════════════════════════════════════ -class TestBuildMultimodalContent: - """Test the extracted _build_multimodal_content method directly.""" +class TestPreprocessImagesWithVision: + """Test vision-based image pre-processing for the CLI.""" @pytest.fixture def cli(self): @@ -682,55 +681,81 @@ class TestBuildMultimodalContent: img.write_bytes(content) return img + def _mock_vision_success(self, description="A test image with colored pixels."): + """Return an async mock that simulates a successful vision_analyze_tool call.""" + import json + async def _fake_vision(**kwargs): + return json.dumps({"success": True, "analysis": description}) + return _fake_vision + + def _mock_vision_failure(self): + """Return an async mock that simulates a failed vision_analyze_tool call.""" + import json + async def _fake_vision(**kwargs): + return json.dumps({"success": False, "analysis": "Error"}) + return _fake_vision + def test_single_image_with_text(self, cli, tmp_path): img = self._make_image(tmp_path) - result = cli._build_multimodal_content("Describe this", [img]) + with patch("tools.vision_tools.vision_analyze_tool", side_effect=self._mock_vision_success()): + result = cli._preprocess_images_with_vision("Describe this", [img]) - assert len(result) == 2 - assert result[0] == {"type": "text", "text": "Describe this"} - assert result[1]["type"] == "image_url" - url = result[1]["image_url"]["url"] - assert url.startswith("data:image/png;base64,") - # Verify the base64 actually decodes to our image - b64_data = url.split(",", 1)[1] - assert base64.b64decode(b64_data) == FAKE_PNG + assert isinstance(result, str) + assert "A test image with colored pixels." in result + assert "Describe this" in result + assert str(img) in result + assert "base64," not in result # no raw base64 image content def test_multiple_images(self, cli, tmp_path): imgs = [self._make_image(tmp_path, f"img{i}.png") for i in range(3)] - result = cli._build_multimodal_content("Compare", imgs) - assert len(result) == 4 # 1 text + 3 images - assert all(r["type"] == "image_url" for r in result[1:]) + with patch("tools.vision_tools.vision_analyze_tool", side_effect=self._mock_vision_success()): + result = cli._preprocess_images_with_vision("Compare", imgs) + + assert isinstance(result, str) + assert "Compare" in result + # Each image path should be referenced + for img in imgs: + assert str(img) in result def test_empty_text_gets_default_question(self, cli, tmp_path): img = self._make_image(tmp_path) - result = cli._build_multimodal_content("", [img]) - assert result[0]["text"] == "What do you see in this image?" - - def test_jpeg_mime_type(self, cli, tmp_path): - img = self._make_image(tmp_path, "photo.jpg", b"\xff\xd8\xff\x00" * 20) - result = cli._build_multimodal_content("test", [img]) - assert "image/jpeg" in result[1]["image_url"]["url"] - - def test_webp_mime_type(self, cli, tmp_path): - img = self._make_image(tmp_path, "img.webp", b"RIFF\x00\x00" * 10) - result = cli._build_multimodal_content("test", [img]) - assert "image/webp" in result[1]["image_url"]["url"] - - def test_unknown_extension_defaults_to_png(self, cli, tmp_path): - img = self._make_image(tmp_path, "data.bmp", b"\x00" * 50) - result = cli._build_multimodal_content("test", [img]) - assert "image/png" in result[1]["image_url"]["url"] + with patch("tools.vision_tools.vision_analyze_tool", side_effect=self._mock_vision_success()): + result = cli._preprocess_images_with_vision("", [img]) + assert isinstance(result, str) + assert "A test image with colored pixels." in result def test_missing_image_skipped(self, cli, tmp_path): missing = tmp_path / "gone.png" - result = cli._build_multimodal_content("test", [missing]) - assert len(result) == 1 # only text + with patch("tools.vision_tools.vision_analyze_tool", side_effect=self._mock_vision_success()): + result = cli._preprocess_images_with_vision("test", [missing]) + # No images analyzed, falls back to default + assert result == "test" def test_mix_of_existing_and_missing(self, cli, tmp_path): real = self._make_image(tmp_path, "real.png") missing = tmp_path / "gone.png" - result = cli._build_multimodal_content("test", [real, missing]) - assert len(result) == 2 # text + 1 real image + with patch("tools.vision_tools.vision_analyze_tool", side_effect=self._mock_vision_success()): + result = cli._preprocess_images_with_vision("test", [real, missing]) + assert str(real) in result + assert str(missing) not in result + assert "test" in result + + def test_vision_failure_includes_path(self, cli, tmp_path): + img = self._make_image(tmp_path) + with patch("tools.vision_tools.vision_analyze_tool", side_effect=self._mock_vision_failure()): + result = cli._preprocess_images_with_vision("check this", [img]) + assert isinstance(result, str) + assert str(img) in result # path still included for retry + assert "check this" in result + + def test_vision_exception_includes_path(self, cli, tmp_path): + img = self._make_image(tmp_path) + async def _explode(**kwargs): + raise RuntimeError("API down") + with patch("tools.vision_tools.vision_analyze_tool", side_effect=_explode): + result = cli._preprocess_images_with_vision("check this", [img]) + assert isinstance(result, str) + assert str(img) in result # path still included for retry # ═════════════════════════════════════════════════════════════════════════ diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py index 948af4d0f..aea7b127c 100644 --- a/tests/tools/test_delegate.py +++ b/tests/tools/test_delegate.py @@ -56,7 +56,6 @@ class TestDelegateRequirements(unittest.TestCase): self.assertIn("tasks", props) self.assertIn("context", props) self.assertIn("toolsets", props) - self.assertIn("model", props) self.assertIn("max_iterations", props) self.assertEqual(props["tasks"]["maxItems"], 3) diff --git a/tests/tools/test_file_operations.py b/tests/tools/test_file_operations.py index b427826e5..0db3fb43b 100644 --- a/tests/tools/test_file_operations.py +++ b/tests/tools/test_file_operations.py @@ -259,6 +259,70 @@ class TestShellFileOpsHelpers: assert ops.cwd == "/" +class TestSearchPathValidation: + """Test that search() returns an error for non-existent paths.""" + + def test_search_nonexistent_path_returns_error(self, mock_env): + """search() should return an error when the path doesn't exist.""" + def side_effect(command, **kwargs): + if "test -e" in command: + return {"output": "not_found", "returncode": 1} + if "command -v" in command: + return {"output": "yes", "returncode": 0} + return {"output": "", "returncode": 0} + mock_env.execute.side_effect = side_effect + ops = ShellFileOperations(mock_env) + result = ops.search("pattern", path="/nonexistent/path") + assert result.error is not None + assert "not found" in result.error.lower() or "Path not found" in result.error + + def test_search_nonexistent_path_files_mode(self, mock_env): + """search(target='files') should also return error for bad paths.""" + def side_effect(command, **kwargs): + if "test -e" in command: + return {"output": "not_found", "returncode": 1} + if "command -v" in command: + return {"output": "yes", "returncode": 0} + return {"output": "", "returncode": 0} + mock_env.execute.side_effect = side_effect + ops = ShellFileOperations(mock_env) + result = ops.search("*.py", path="/nonexistent/path", target="files") + assert result.error is not None + assert "not found" in result.error.lower() or "Path not found" in result.error + + def test_search_existing_path_proceeds(self, mock_env): + """search() should proceed normally when the path exists.""" + def side_effect(command, **kwargs): + if "test -e" in command: + return {"output": "exists", "returncode": 0} + if "command -v" in command: + return {"output": "yes", "returncode": 0} + # rg returns exit 1 (no matches) with empty output + return {"output": "", "returncode": 1} + mock_env.execute.side_effect = side_effect + ops = ShellFileOperations(mock_env) + result = ops.search("pattern", path="/existing/path") + assert result.error is None + assert result.total_count == 0 # No matches but no error + + def test_search_rg_error_exit_code(self, mock_env): + """search() should report error when rg returns exit code 2.""" + call_count = {"n": 0} + def side_effect(command, **kwargs): + call_count["n"] += 1 + if "test -e" in command: + return {"output": "exists", "returncode": 0} + if "command -v" in command: + return {"output": "yes", "returncode": 0} + # rg returns exit 2 (error) with empty output + return {"output": "", "returncode": 2} + mock_env.execute.side_effect = side_effect + ops = ShellFileOperations(mock_env) + result = ops.search("pattern", path="/some/path") + assert result.error is not None + assert "search failed" in result.error.lower() or "Search error" in result.error + + class TestShellFileOpsWriteDenied: def test_write_file_denied_path(self, file_ops): result = file_ops.write_file("~/.ssh/authorized_keys", "evil key") diff --git a/tools/browser_tool.py b/tools/browser_tool.py index fc7ee69e5..e1bd32239 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -424,7 +424,7 @@ BROWSER_TOOL_SCHEMAS = [ }, { "name": "browser_vision", - "description": "Take a screenshot of the current page and analyze it with vision AI. Use this when you need to visually understand what's on the page - especially useful for CAPTCHAs, visual verification challenges, complex layouts, or when the text snapshot doesn't capture important visual information. Requires browser_navigate to be called first.", + "description": "Take a screenshot of the current page and analyze it with vision AI. Use this when you need to visually understand what's on the page - especially useful for CAPTCHAs, visual verification challenges, complex layouts, or when the text snapshot doesn't capture important visual information. Returns both the AI analysis and a screenshot_path that you can share with the user by including MEDIA:<screenshot_path> in your response. Requires browser_navigate to be called first.", "parameters": { "type": "object", "properties": { @@ -795,10 +795,12 @@ def _run_browser_command( ) os.makedirs(task_socket_dir, exist_ok=True) - browser_env = { - **os.environ, - "AGENT_BROWSER_SOCKET_DIR": task_socket_dir, - } + browser_env = {**os.environ} + # Ensure PATH includes standard dirs (systemd services may have minimal PATH) + _SANE_PATH = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" + if "/usr/bin" not in browser_env.get("PATH", "").split(":"): + browser_env["PATH"] = f"{browser_env.get('PATH', '')}:{_SANE_PATH}" + browser_env["AGENT_BROWSER_SOCKET_DIR"] = task_socket_dir result = subprocess.run( cmd_parts, @@ -808,10 +810,18 @@ def _run_browser_command( env=browser_env, ) - # Log stderr for diagnostics (agent-browser may emit warnings there) + # Log stderr for diagnostics — use warning level on failure so it's visible if result.stderr and result.stderr.strip(): - logger.debug("stderr from '%s': %s", command, result.stderr.strip()[:200]) + level = logging.WARNING if result.returncode != 0 else logging.DEBUG + logger.log(level, "browser '%s' stderr: %s", command, result.stderr.strip()[:500]) + # Log empty output as warning — common sign of broken agent-browser + if not result.stdout.strip() and result.returncode == 0: + logger.warning("browser '%s' returned empty stdout with rc=0. " + "cmd=%s stderr=%s", + command, " ".join(cmd_parts[:4]) + "...", + (result.stderr or "")[:200]) + # Parse JSON output if result.stdout.strip(): try: @@ -1289,15 +1299,17 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str: text-based snapshot may not capture (CAPTCHAs, verification challenges, images, complex layouts, etc.). + The screenshot is saved persistently and its file path is returned alongside + the analysis, so it can be shared with users via MEDIA:<path> in the response. + Args: question: What you want to know about the page visually task_id: Task identifier for session isolation Returns: - JSON string with vision analysis results + JSON string with vision analysis results and screenshot_path """ import base64 - import tempfile import uuid as uuid_mod from pathlib import Path @@ -1311,11 +1323,17 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str: "Set OPENROUTER_API_KEY or configure Nous Portal to enable browser vision." }, ensure_ascii=False) - # Create a temporary file for the screenshot - temp_dir = Path(tempfile.gettempdir()) - screenshot_path = temp_dir / f"browser_screenshot_{uuid_mod.uuid4().hex}.png" + # Save screenshot to persistent location so it can be shared with users + hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) + screenshots_dir = hermes_home / "browser_screenshots" + screenshot_path = screenshots_dir / f"browser_screenshot_{uuid_mod.uuid4().hex}.png" try: + screenshots_dir.mkdir(parents=True, exist_ok=True) + + # Prune old screenshots (older than 24 hours) to prevent unbounded disk growth + _cleanup_old_screenshots(screenshots_dir, max_age_hours=24) + # Take screenshot using agent-browser result = _run_browser_command( effective_task_id, @@ -1372,21 +1390,35 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str: return json.dumps({ "success": True, "analysis": analysis, + "screenshot_path": str(screenshot_path), }, ensure_ascii=False) except Exception as e: - return json.dumps({ - "success": False, - "error": f"Error during vision analysis: {str(e)}" - }, ensure_ascii=False) - - finally: - # Clean up screenshot file + # Clean up screenshot on failure if screenshot_path.exists(): try: screenshot_path.unlink() except Exception: pass + return json.dumps({ + "success": False, + "error": f"Error during vision analysis: {str(e)}" + }, ensure_ascii=False) + + +def _cleanup_old_screenshots(screenshots_dir, max_age_hours=24): + """Remove browser screenshots older than max_age_hours to prevent disk bloat.""" + import time + try: + cutoff = time.time() - (max_age_hours * 3600) + for f in screenshots_dir.glob("browser_screenshot_*.png"): + try: + if f.stat().st_mtime < cutoff: + f.unlink() + except Exception: + pass + except Exception: + pass # Non-critical — don't fail the screenshot operation # ============================================================================ diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py index 9f0b658ca..0d3f17609 100644 --- a/tools/code_execution_tool.py +++ b/tools/code_execution_tool.py @@ -78,7 +78,7 @@ _TOOL_STUBS = { "web_extract": ( "web_extract", "urls: list", - '"""Extract content from URLs. Returns dict with results list of {url, content, error}."""', + '"""Extract content from URLs. Returns dict with results list of {url, title, content, error}."""', '{"urls": urls}', ), "read_file": ( @@ -605,7 +605,7 @@ _TOOL_DOC_LINES = [ " Returns {\"data\": {\"web\": [{\"url\", \"title\", \"description\"}, ...]}}"), ("web_extract", " web_extract(urls: list[str]) -> dict\n" - " Returns {\"results\": [{\"url\", \"content\", \"error\"}, ...]} where content is markdown"), + " Returns {\"results\": [{\"url\", \"title\", \"content\", \"error\"}, ...]} where content is markdown"), ("read_file", " read_file(path: str, offset: int = 1, limit: int = 500) -> dict\n" " Lines are 1-indexed. Returns {\"content\": \"...\", \"total_lines\": N}"), diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index e219259ea..c8de97225 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -205,6 +205,9 @@ def _run_single_child( provider=getattr(parent_agent, "provider", None), api_mode=getattr(parent_agent, "api_mode", None), max_iterations=max_iterations, + max_tokens=getattr(parent_agent, "max_tokens", None), + reasoning_config=getattr(parent_agent, "reasoning_config", None), + prefill_messages=getattr(parent_agent, "prefill_messages", None), enabled_toolsets=child_toolsets, quiet_mode=True, ephemeral_system_prompt=child_prompt, @@ -293,7 +296,6 @@ def delegate_task( context: Optional[str] = None, toolsets: Optional[List[str]] = None, tasks: Optional[List[Dict[str, Any]]] = None, - model: Optional[str] = None, max_iterations: Optional[int] = None, parent_agent=None, ) -> str: @@ -355,7 +357,7 @@ def delegate_task( goal=t["goal"], context=t.get("context"), toolsets=t.get("toolsets") or toolsets, - model=model, + model=None, max_iterations=effective_max_iter, parent_agent=parent_agent, task_count=1, @@ -380,7 +382,7 @@ def delegate_task( goal=t["goal"], context=t.get("context"), toolsets=t.get("toolsets") or toolsets, - model=model, + model=None, max_iterations=effective_max_iter, parent_agent=parent_agent, task_count=n_tasks, @@ -533,13 +535,6 @@ DELEGATE_TASK_SCHEMA = { "When provided, top-level goal/context/toolsets are ignored." ), }, - "model": { - "type": "string", - "description": ( - "Model override for the subagent(s). Omit to use your " - "same model. Use a cheaper/faster model for simple subtasks." - ), - }, "max_iterations": { "type": "integer", "description": ( @@ -565,7 +560,6 @@ registry.register( context=args.get("context"), toolsets=args.get("toolsets"), tasks=args.get("tasks"), - model=args.get("model"), max_iterations=args.get("max_iterations"), parent_agent=kw.get("parent_agent")), check_fn=check_delegate_requirements, diff --git a/tools/environments/local.py b/tools/environments/local.py index 78be54c7c..e1df97b4c 100644 --- a/tools/environments/local.py +++ b/tools/environments/local.py @@ -17,15 +17,21 @@ from tools.environments.base import BaseEnvironment _OUTPUT_FENCE = "__HERMES_FENCE_a9f7b3__" -def _find_shell() -> str: - """Find the best shell for command execution. +def _find_bash() -> str: + """Find bash for command execution. - On Unix: uses $SHELL, falls back to bash. + The fence wrapper uses bash syntax (semicolons, $?, printf), so we + must use bash — not the user's $SHELL which could be fish/zsh/etc. On Windows: uses Git Bash (bundled with Git for Windows). - Raises RuntimeError if no suitable shell is found on Windows. """ if not _IS_WINDOWS: - return os.environ.get("SHELL") or shutil.which("bash") or "/bin/bash" + return ( + shutil.which("bash") + or ("/usr/bin/bash" if os.path.isfile("/usr/bin/bash") else None) + or ("/bin/bash" if os.path.isfile("/bin/bash") else None) + or os.environ.get("SHELL") # last resort: whatever they have + or "/bin/sh" + ) # Windows: look for Git Bash (installed with Git for Windows). # Allow override via env var (same pattern as Claude Code). @@ -53,6 +59,11 @@ def _find_shell() -> str: "Or set HERMES_GIT_BASH_PATH to your bash.exe location." ) + +# Backward compat — process_registry.py imports this name +_find_shell = _find_bash + + # Noise lines emitted by interactive shells when stdin is not a terminal. # Used as a fallback when output fence markers are missing. _SHELL_NOISE_SUBSTRINGS = ( @@ -153,13 +164,11 @@ class LocalEnvironment(BaseEnvironment): exec_command = self._prepare_command(command) try: - # Use the user's shell as an interactive login shell (-lic) so - # that ALL rc files are sourced — including content after the - # interactive guard in .bashrc (case $- in *i*)..esac) where - # tools like nvm, pyenv, and cargo install their init scripts. - # -l alone isn't enough: .profile sources .bashrc, but the guard - # returns early because the shell isn't interactive. - user_shell = _find_shell() + # The fence wrapper uses bash syntax (semicolons, $?, printf). + # Always use bash for the wrapper — NOT $SHELL which could be + # fish, zsh, or another shell with incompatible syntax. + # The -lic flags source rc files so tools like nvm/pyenv work. + user_shell = _find_bash() # Wrap with output fences so we can later extract the real # command output and discard shell init/exit noise. fenced_cmd = ( @@ -169,11 +178,19 @@ class LocalEnvironment(BaseEnvironment): f" printf '{_OUTPUT_FENCE}';" f" exit $__hermes_rc" ) + # Ensure PATH always includes standard dirs — systemd services + # and some terminal multiplexers inherit a minimal PATH. + _SANE_PATH = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" + run_env = dict(os.environ | self.env) + existing_path = run_env.get("PATH", "") + if "/usr/bin" not in existing_path.split(":"): + run_env["PATH"] = f"{existing_path}:{_SANE_PATH}" if existing_path else _SANE_PATH + proc = subprocess.Popen( [user_shell, "-lic", fenced_cmd], text=True, cwd=work_dir, - env=os.environ | self.env, + env=run_env, encoding="utf-8", errors="replace", stdout=subprocess.PIPE, diff --git a/tools/file_operations.py b/tools/file_operations.py index 182d35f5f..3f72c5fdb 100644 --- a/tools/file_operations.py +++ b/tools/file_operations.py @@ -819,6 +819,14 @@ class ShellFileOperations(FileOperations): # Expand ~ and other shell paths path = self._expand_path(path) + # Validate that the path exists before searching + check = self._exec(f"test -e {self._escape_shell_arg(path)} && echo exists || echo not_found") + if "not_found" in check.stdout: + return SearchResult( + error=f"Path not found: {path}. Verify the path exists (use 'terminal' to check).", + total_count=0 + ) + if target == "files": return self._search_files(pattern, path, limit, offset) else: @@ -919,6 +927,11 @@ class ShellFileOperations(FileOperations): cmd = " ".join(cmd_parts) result = self._exec(cmd, timeout=60) + # rg exit codes: 0=matches found, 1=no matches, 2=error + if result.exit_code == 2 and not result.stdout.strip(): + error_msg = result.stderr.strip() if hasattr(result, 'stderr') and result.stderr else "Search error" + return SearchResult(error=f"Search failed: {error_msg}", total_count=0) + # Parse results based on output mode if output_mode == "files_only": all_files = [f for f in result.stdout.strip().split('\n') if f] @@ -1013,6 +1026,11 @@ class ShellFileOperations(FileOperations): cmd = " ".join(cmd_parts) result = self._exec(cmd, timeout=60) + # grep exit codes: 0=matches found, 1=no matches, 2=error + if result.exit_code == 2 and not result.stdout.strip(): + error_msg = result.stderr.strip() if hasattr(result, 'stderr') and result.stderr else "Search error" + return SearchResult(error=f"Search failed: {error_msg}", total_count=0) + if output_mode == "files_only": all_files = [f for f in result.stdout.strip().split('\n') if f] total = len(all_files) diff --git a/tools/image_generation_tool.py b/tools/image_generation_tool.py index 151b6eccb..3789f38e7 100644 --- a/tools/image_generation_tool.py +++ b/tools/image_generation_tool.py @@ -31,7 +31,6 @@ Usage: import json import logging import os -import asyncio import datetime from typing import Dict, Any, Optional, Union import fal_client @@ -153,10 +152,13 @@ def _validate_parameters( return validated -async def _upscale_image(image_url: str, original_prompt: str) -> Dict[str, Any]: +def _upscale_image(image_url: str, original_prompt: str) -> Dict[str, Any]: """ Upscale an image using FAL.ai's Clarity Upscaler. + Uses the synchronous fal_client API to avoid event loop lifecycle issues + when called from threaded contexts (e.g. gateway thread pool). + Args: image_url (str): URL of the image to upscale original_prompt (str): Original prompt used to generate the image @@ -180,14 +182,17 @@ async def _upscale_image(image_url: str, original_prompt: str) -> Dict[str, Any] "enable_safety_checker": UPSCALER_SAFETY_CHECKER } - # Submit upscaler request - handler = await fal_client.submit_async( + # Use sync API — fal_client.submit() uses httpx.Client (no event loop). + # The async API (submit_async) caches a global httpx.AsyncClient via + # @cached_property, which breaks when asyncio.run() destroys the loop + # between calls (gateway thread-pool pattern). + handler = fal_client.submit( UPSCALER_MODEL, arguments=upscaler_arguments ) - # Get the upscaled result - result = await handler.get() + # Get the upscaled result (sync — blocks until done) + result = handler.get() if result and "image" in result: upscaled_image = result["image"] @@ -208,7 +213,7 @@ async def _upscale_image(image_url: str, original_prompt: str) -> Dict[str, Any] return None -async def image_generate_tool( +def image_generate_tool( prompt: str, aspect_ratio: str = DEFAULT_ASPECT_RATIO, num_inference_steps: int = DEFAULT_NUM_INFERENCE_STEPS, @@ -220,10 +225,10 @@ async def image_generate_tool( """ Generate images from text prompts using FAL.ai's FLUX 2 Pro model with automatic upscaling. - This tool uses FAL.ai's FLUX 2 Pro model for high-quality text-to-image generation - with extensive customization options. Generated images are automatically upscaled 2x - using FAL.ai's Clarity Upscaler for enhanced quality. The final upscaled images are - returned as URLs that can be displayed using <img src="{URL}"></img> tags. + Uses the synchronous fal_client API to avoid event loop lifecycle issues. + The async API's global httpx.AsyncClient (cached via @cached_property) breaks + when asyncio.run() destroys and recreates event loops between calls, which + happens in the gateway's thread-pool pattern. Args: prompt (str): The text prompt describing the desired image @@ -306,14 +311,14 @@ async def image_generate_tool( logger.info(" Steps: %s", validated_params['num_inference_steps']) logger.info(" Guidance: %s", validated_params['guidance_scale']) - # Submit request to FAL.ai - handler = await fal_client.submit_async( + # Submit request to FAL.ai using sync API (avoids cached event loop issues) + handler = fal_client.submit( DEFAULT_MODEL, arguments=arguments ) - # Get the result - result = await handler.get() + # Get the result (sync — blocks until done) + result = handler.get() generation_time = (datetime.datetime.now() - start_time).total_seconds() @@ -336,7 +341,7 @@ async def image_generate_tool( } # Attempt to upscale the image - upscaled_image = await _upscale_image(img["url"], prompt.strip()) + upscaled_image = _upscale_image(img["url"], prompt.strip()) if upscaled_image: # Use upscaled image if successful @@ -552,5 +557,5 @@ registry.register( handler=_handle_image_generate, check_fn=check_image_generation_requirements, requires_env=["FAL_KEY"], - is_async=True, + is_async=False, # Switched to sync fal_client API to fix "Event loop is closed" in gateway ) diff --git a/tools/skills_guard.py b/tools/skills_guard.py index 34a4294e8..0b6d7fee7 100644 --- a/tools/skills_guard.py +++ b/tools/skills_guard.py @@ -946,6 +946,11 @@ def llm_audit_skill(skill_path: Path, static_result: ScanResult, client = OpenAI( base_url=OPENROUTER_BASE_URL, api_key=api_key, + default_headers={ + "HTTP-Referer": "https://github.com/NousResearch/hermes-agent", + "X-OpenRouter-Title": "Hermes Agent", + "X-OpenRouter-Categories": "productivity,cli-agent", + }, ) response = client.chat.completions.create( model=model, diff --git a/tools/web_tools.py b/tools/web_tools.py index 5bf223425..0fd0f4107 100644 --- a/tools/web_tools.py +++ b/tools/web_tools.py @@ -787,6 +787,7 @@ async def web_extract_tool( # Trim output to minimal fields per entry: title, content, error trimmed_results = [ { + "url": r.get("url", ""), "title": r.get("title", ""), "content": r.get("content", ""), "error": r.get("error"), diff --git a/trajectory_compressor.py b/trajectory_compressor.py index dedae1ade..3f49c617b 100644 --- a/trajectory_compressor.py +++ b/trajectory_compressor.py @@ -351,16 +351,27 @@ class TrajectoryCompressor: from openai import OpenAI, AsyncOpenAI + # OpenRouter app attribution headers (only for OpenRouter endpoints) + extra = {} + if "openrouter" in self.config.base_url.lower(): + extra["default_headers"] = { + "HTTP-Referer": "https://github.com/NousResearch/hermes-agent", + "X-OpenRouter-Title": "Hermes Agent", + "X-OpenRouter-Categories": "productivity,cli-agent", + } + # Sync client (for backwards compatibility) self.client = OpenAI( api_key=api_key, - base_url=self.config.base_url + base_url=self.config.base_url, + **extra, ) # Async client for parallel processing self.async_client = AsyncOpenAI( api_key=api_key, - base_url=self.config.base_url + base_url=self.config.base_url, + **extra, ) print(f"✅ Initialized OpenRouter client: {self.config.summarization_model}") diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md index d142bb4bf..3613e97a7 100644 --- a/website/docs/reference/cli-commands.md +++ b/website/docs/reference/cli-commands.md @@ -17,11 +17,13 @@ These are commands you run from your shell. | `hermes` | Start interactive chat (default) | | `hermes chat -q "Hello"` | Single query mode (non-interactive) | | `hermes chat --continue` / `-c` | Resume the most recent session | -| `hermes chat --resume <id>` / `-r <id>` | Resume a specific session | +| `hermes chat -c "my project"` | Resume a session by name (latest in lineage) | +| `hermes chat --resume <id>` / `-r <id>` | Resume a specific session by ID or title | | `hermes chat --model <name>` | Use a specific model | | `hermes chat --provider <name>` | Force a provider (`nous`, `openrouter`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`) | | `hermes chat --toolsets "web,terminal"` / `-t` | Use specific toolsets | | `hermes chat --verbose` | Enable verbose/debug output | +| `hermes --worktree` / `-w` | Start in an isolated git worktree (for parallel agents) | ### Provider & Model Management @@ -102,7 +104,8 @@ These are commands you run from your shell. | Command | Description | |---------|-------------| -| `hermes sessions list` | Browse past sessions | +| `hermes sessions list` | Browse past sessions (shows title, preview, last active) | +| `hermes sessions rename <id> <title>` | Set or change a session's title | | `hermes sessions export <id>` | Export a session | | `hermes sessions delete <id>` | Delete a specific session | | `hermes sessions prune` | Remove old sessions | @@ -138,7 +141,8 @@ Type `/` in the interactive CLI to see an autocomplete dropdown. |---------|-------------| | `/tools` | List all available tools | | `/toolsets` | List available toolsets | -| `/model [name]` | Show or change the current model | +| `/model [provider:model]` | Show or change the current model (supports `provider:model` syntax to switch providers) | +| `/provider` | Show available providers with auth status | | `/config` | Show current configuration | | `/prompt [text]` | View/set custom system prompt | | `/personality [name]` | Set a predefined personality | @@ -152,9 +156,26 @@ Type `/` in the interactive CLI to see an autocomplete dropdown. | `/undo` | Remove the last user/assistant exchange | | `/save` | Save the current conversation | | `/compress` | Manually compress conversation context | +| `/title [name]` | Set or show the current session's title | | `/usage` | Show token usage for this session | | `/insights [--days N]` | Show usage insights and analytics (last 30 days) | +#### /compress + +Manually triggers context compression on the current conversation. This summarizes middle turns of the conversation while preserving the first 3 and last 4 turns, significantly reducing token count. Useful when: + +- The conversation is getting long and you want to reduce costs +- You're approaching the model's context limit +- You want to continue the conversation without starting fresh + +Requirements: at least 4 messages in the conversation. The configured model (or `compression.summary_model` from config) is used to generate the summary. After compression, the session continues seamlessly with the compressed history. + +Reports the result as: `Compressed: X → Y messages, ~N → ~M tokens`. + +:::tip +Compression also happens automatically when approaching context limits (configurable via `compression.threshold` in `config.yaml`). Use `/compress` when you want to trigger it early. +::: + ### Media & Input | Command | Description | diff --git a/website/docs/user-guide/cli.md b/website/docs/user-guide/cli.md index df07739c2..314fc326e 100644 --- a/website/docs/user-guide/cli.md +++ b/website/docs/user-guide/cli.md @@ -33,6 +33,10 @@ hermes --resume <session_id> # Resume a specific session by ID (-r) # Verbose mode (debug output) hermes chat --verbose + +# Isolated git worktree (for running multiple agents in parallel) +hermes -w # Interactive mode in worktree +hermes -w -q "Fix issue #123" # Single query in worktree ``` ## Interface Layout @@ -91,7 +95,8 @@ Type `/` to see an autocomplete dropdown of all available commands. |---------|-------------| | `/tools` | List all available tools grouped by toolset | | `/toolsets` | List available toolsets with descriptions | -| `/model [name]` | Show or change the current model | +| `/model [provider:model]` | Show or change the current model (supports `provider:model` syntax) | +| `/provider` | Show available providers with auth status | | `/config` | Show current configuration | | `/prompt [text]` | View/set/clear custom system prompt | | `/personality [name]` | Set a predefined personality | @@ -224,13 +229,15 @@ Resume options: ```bash hermes --continue # Resume the most recent CLI session hermes -c # Short form +hermes -c "my project" # Resume a named session (latest in lineage) hermes --resume 20260225_143052_a1b2c3 # Resume a specific session by ID +hermes --resume "refactoring auth" # Resume by title hermes -r 20260225_143052_a1b2c3 # Short form ``` Resuming restores the full conversation history from SQLite. The agent sees all previous messages, tool calls, and responses — just as if you never left. -Use `hermes sessions list` to browse past sessions. +Use `/title My Session Name` inside a chat to name the current session, or `hermes sessions rename <id> <title>` from the command line. Use `hermes sessions list` to browse past sessions. ### Session Logging diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 6d6897794..07096a189 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -407,6 +407,26 @@ memory: user_char_limit: 1375 # ~500 tokens ``` +## Git Worktree Isolation + +Enable isolated git worktrees for running multiple agents in parallel on the same repo: + +```yaml +worktree: true # Always create a worktree (same as hermes -w) +# worktree: false # Default — only when -w flag is passed +``` + +When enabled, each CLI session creates a fresh worktree under `.worktrees/` with its own branch. Agents can edit files, commit, push, and create PRs without interfering with each other. Clean worktrees are removed on exit; dirty ones are kept for manual recovery. + +You can also list gitignored files to copy into worktrees via `.worktreeinclude` in your repo root: + +``` +# .worktreeinclude +.env +.venv/ +node_modules/ +``` + ## Context Compression ```yaml @@ -421,10 +441,10 @@ Control how much "thinking" the model does before responding: ```yaml agent: - reasoning_effort: "" # empty = use model default. Options: xhigh (max), high, medium, low, minimal, none + reasoning_effort: "" # empty = medium (default). Options: xhigh (max), high, medium, low, minimal, none ``` -When unset (default), the model's own default reasoning level is used. Setting a value overrides it — higher reasoning effort gives better results on complex tasks at the cost of more tokens and latency. +When unset (default), reasoning effort defaults to "medium" — a balanced level that works well for most tasks. Setting a value overrides it — higher reasoning effort gives better results on complex tasks at the cost of more tokens and latency. ## TTS Configuration diff --git a/website/docs/user-guide/features/browser.md b/website/docs/user-guide/features/browser.md index 523631eb0..70201100b 100644 --- a/website/docs/user-guide/features/browser.md +++ b/website/docs/user-guide/features/browser.md @@ -134,10 +134,14 @@ List all images on the current page with their URLs and alt text. Useful for fin Take a screenshot and analyze it with vision AI. Use this when text snapshots don't capture important visual information — especially useful for CAPTCHAs, complex layouts, or visual verification challenges. +The screenshot is saved persistently and the file path is returned alongside the AI analysis. On messaging platforms (Telegram, Discord, Slack, WhatsApp), you can ask the agent to share the screenshot — it will be sent as a native photo attachment via the `MEDIA:` mechanism. + ``` What does the chart on this page show? ``` +Screenshots are stored in `~/.hermes/browser_screenshots/` and automatically cleaned up after 24 hours. + ### `browser_close` Close the browser session and release resources. Call this when done to free up Browserbase session quota. diff --git a/website/docs/user-guide/messaging/index.md b/website/docs/user-guide/messaging/index.md index 12fec3fd2..f93275c86 100644 --- a/website/docs/user-guide/messaging/index.md +++ b/website/docs/user-guide/messaging/index.md @@ -63,7 +63,8 @@ hermes gateway status # Check service status | Command | Description | |---------|-------------| | `/new` or `/reset` | Start fresh conversation | -| `/model [name]` | Show or change the model | +| `/model [provider:model]` | Show or change the model (supports `provider:model` syntax) | +| `/provider` | Show available providers with auth status | | `/personality [name]` | Set a personality | | `/retry` | Retry the last message | | `/undo` | Remove the last exchange | diff --git a/website/docs/user-guide/sessions.md b/website/docs/user-guide/sessions.md index 92f6e1218..e99a725d4 100644 --- a/website/docs/user-guide/sessions.md +++ b/website/docs/user-guide/sessions.md @@ -17,6 +17,7 @@ Every conversation — whether from the CLI, Telegram, Discord, WhatsApp, or Sla The SQLite database stores: - Session ID, source platform, user ID +- **Session title** (unique, human-readable name) - Model name and configuration - System prompt snapshot - Full message history (role, content, tool calls, tool results) @@ -54,6 +55,19 @@ hermes chat -c This looks up the most recent `cli` session from the SQLite database and loads its full conversation history. +### Resume by Name + +If you've given a session a title (see [Session Naming](#session-naming) below), you can resume it by name: + +```bash +# Resume a named session +hermes -c "my project" + +# If there are lineage variants (my project, my project #2, my project #3), +# this automatically resumes the most recent one +hermes -c "my project" # → resumes "my project #3" +``` + ### Resume Specific Session ```bash @@ -61,6 +75,9 @@ This looks up the most recent `cli` session from the SQLite database and loads i hermes --resume 20250305_091523_a1b2c3d4 hermes -r 20250305_091523_a1b2c3d4 +# Resume by title +hermes --resume "refactoring auth" + # Or with the chat subcommand hermes chat --resume 20250305_091523_a1b2c3d4 ``` @@ -68,9 +85,53 @@ hermes chat --resume 20250305_091523_a1b2c3d4 Session IDs are shown when you exit a CLI session, and can be found with `hermes sessions list`. :::tip -Session IDs follow the format `YYYYMMDD_HHMMSS_<8-char-hex>`, e.g. `20250305_091523_a1b2c3d4`. You only need to provide enough of the ID to be unique. +Session IDs follow the format `YYYYMMDD_HHMMSS_<8-char-hex>`, e.g. `20250305_091523_a1b2c3d4`. You can resume by ID or by title — both work with `-c` and `-r`. ::: +## Session Naming + +Give sessions human-readable titles so you can find and resume them easily. + +### Setting a Title + +Use the `/title` slash command inside any chat session (CLI or gateway): + +``` +/title my research project +``` + +The title is applied immediately. If the session hasn't been created in the database yet (e.g., you run `/title` before sending your first message), it's queued and applied once the session starts. + +You can also rename existing sessions from the command line: + +```bash +hermes sessions rename 20250305_091523_a1b2c3d4 "refactoring auth module" +``` + +### Title Rules + +- **Unique** — no two sessions can share the same title +- **Max 100 characters** — keeps listing output clean +- **Sanitized** — control characters, zero-width chars, and RTL overrides are stripped automatically +- **Normal Unicode is fine** — emoji, CJK, accented characters all work + +### Auto-Lineage on Compression + +When a session's context is compressed (manually via `/compress` or automatically), Hermes creates a new continuation session. If the original had a title, the new session automatically gets a numbered title: + +``` +"my project" → "my project #2" → "my project #3" +``` + +When you resume by name (`hermes -c "my project"`), it automatically picks the most recent session in the lineage. + +### /title in Messaging Platforms + +The `/title` command works in all gateway platforms (Telegram, Discord, Slack, WhatsApp): + +- `/title My Research` — set the session title +- `/title` — show the current title + ## Session Management Commands Hermes provides a full set of session management commands via `hermes sessions`: @@ -88,13 +149,23 @@ hermes sessions list --source telegram hermes sessions list --limit 50 ``` -Output format: +When sessions have titles, the output shows titles, previews, and relative timestamps: ``` -ID Source Model Messages Started +Title Preview Last Active ID ──────────────────────────────────────────────────────────────────────────────────────────────── -20250305_091523_a1b2c3d4 cli anthropic/claude-opus-4.6 24 2025-03-05 09:15 -20250304_143022_e5f6g7h8 telegram anthropic/claude-opus-4.6 12 2025-03-04 14:30 (ended) +refactoring auth Help me refactor the auth module please 2h ago 20250305_091523_a +my project #3 Can you check the test failures? yesterday 20250304_143022_e +— What's the weather in Las Vegas? 3d ago 20250303_101500_f +``` + +When no sessions have titles, a simpler format is used: + +``` +Preview Last Active Src ID +────────────────────────────────────────────────────────────────────────────────────── +Help me refactor the auth module please 2h ago cli 20250305_091523_a +What's the weather in Las Vegas? 3d ago tele 20250303_101500_f ``` ### Export Sessions @@ -122,6 +193,18 @@ hermes sessions delete 20250305_091523_a1b2c3d4 hermes sessions delete 20250305_091523_a1b2c3d4 --yes ``` +### Rename a Session + +```bash +# Set or change a session's title +hermes sessions rename 20250305_091523_a1b2c3d4 "debugging auth flow" + +# Multi-word titles don't need quotes in the CLI +hermes sessions rename 20250305_091523_a1b2c3d4 debugging auth flow +``` + +If the title is already in use by another session, an error is shown. + ### Prune Old Sessions ```bash @@ -233,7 +316,7 @@ The SQLite database uses WAL mode for concurrent readers and a single writer, wh Key tables in `state.db`: -- **sessions** — session metadata (id, source, user_id, model, timestamps, token counts) +- **sessions** — session metadata (id, source, user_id, model, title, timestamps, token counts). Titles have a unique index (NULL titles allowed, only non-NULL must be unique). - **messages** — full message history (role, content, tool_calls, tool_name, token_count) - **messages_fts** — FTS5 virtual table for full-text search across message content