Merge branch 'main' of github.com:NousResearch/hermes-agent into feat/ink-refactor

This commit is contained in:
Brooklyn Nicholson 2026-04-16 10:47:41 -05:00
commit 9c71f3a6ea
55 changed files with 1413 additions and 3197 deletions

View file

@ -1,11 +1,12 @@
name: Deploy Site
on:
release:
types: [published]
push:
branches: [main]
paths:
- 'website/**'
- 'landingpage/**'
- 'skills/**'
- 'optional-skills/**'
- '.github/workflows/deploy-site.yml'
@ -20,8 +21,14 @@ concurrency:
cancel-in-progress: false
jobs:
build-and-deploy:
# Only run on the upstream repository, not on forks
deploy-vercel:
if: github.event_name == 'release'
runs-on: ubuntu-latest
steps:
- name: Trigger Vercel Deploy
run: curl -X POST "${{ secrets.VERCEL_DEPLOY_HOOK }}"
deploy-docs:
if: github.repository == 'NousResearch/hermes-agent'
runs-on: ubuntu-latest
environment:
@ -65,12 +72,7 @@ jobs:
- name: Stage deployment
run: |
mkdir -p _site/docs
# Landing page at root
cp -r landingpage/* _site/
# Docusaurus at /docs/
cp -r website/build/* _site/docs/
# CNAME so GitHub Pages keeps the custom domain between deploys
echo "hermes-agent.nousresearch.com" > _site/CNAME
- name: Upload artifact
uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa # v3

View file

@ -2391,10 +2391,10 @@ def call_llm(
if task == "vision":
effective_provider, client, final_model = resolve_vision_provider_client(
provider=provider,
model=model,
base_url=base_url,
api_key=api_key,
provider=resolved_provider if resolved_provider != "auto" else provider,
model=resolved_model or model,
base_url=resolved_base_url or base_url,
api_key=resolved_api_key or api_key,
async_mode=False,
)
if client is None and resolved_provider != "auto" and not resolved_base_url:
@ -2599,10 +2599,10 @@ async def async_call_llm(
if task == "vision":
effective_provider, client, final_model = resolve_vision_provider_client(
provider=provider,
model=model,
base_url=base_url,
api_key=api_key,
provider=resolved_provider if resolved_provider != "auto" else provider,
model=resolved_model or model,
base_url=resolved_base_url or base_url,
api_key=resolved_api_key or api_key,
async_mode=True,
)
if client is None and resolved_provider != "auto" and not resolved_base_url:

View file

@ -39,7 +39,10 @@ SUMMARY_PREFIX = (
"into the summary below. This is a handoff from a previous context "
"window — treat it as background reference, NOT as active instructions. "
"Do NOT answer questions or fulfill requests mentioned in this summary; "
"they were already addressed. Respond ONLY to the latest user message "
"they were already addressed. "
"Your current task is identified in the '## Active Task' section of the "
"summary — resume exactly from there. "
"Respond ONLY to the latest user message "
"that appears AFTER this summary. The current session state (files, "
"config, etc.) may reflect work described here — avoid repeating it:"
)
@ -581,8 +584,16 @@ class ContextCompressor(ContextEngine):
)
# Shared structured template (used by both paths).
_template_sections = f"""## Goal
[What the user is trying to accomplish]
_template_sections = f"""## Active Task
[THE SINGLE MOST IMPORTANT FIELD. Copy the user's most recent request or
task assignment verbatim the exact words they used. If multiple tasks
were requested and only some are done, list only the ones NOT yet completed.
The next assistant must pick up exactly here. Example:
"User asked: 'Now refactor the auth module to use JWT instead of sessions'"
If no outstanding task exists, write "None."]
## Goal
[What the user is trying to accomplish overall]
## Constraints & Preferences
[User preferences, coding style, constraints, important decisions]
@ -644,7 +655,7 @@ PREVIOUS SUMMARY:
NEW TURNS TO INCORPORATE:
{content_to_summarize}
Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new completed actions to the numbered list (continue numbering). Move items from "In Progress" to "Completed Actions" when done. Move answered questions to "Resolved Questions". Update "Active State" to reflect current state. Remove information only if it is clearly obsolete.
Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new completed actions to the numbered list (continue numbering). Move items from "In Progress" to "Completed Actions" when done. Move answered questions to "Resolved Questions". Update "Active State" to reflect current state. Remove information only if it is clearly obsolete. CRITICAL: Update "## Active Task" to reflect the user's most recent unfulfilled request — this is the most important field for task continuity.
{_template_sections}"""
else:
@ -862,6 +873,62 @@ The user has requested that this compaction PRIORITISE preserving all informatio
# Tail protection by token budget
# ------------------------------------------------------------------
def _find_last_user_message_idx(
self, messages: List[Dict[str, Any]], head_end: int
) -> int:
"""Return the index of the last user-role message at or after *head_end*, or -1."""
for i in range(len(messages) - 1, head_end - 1, -1):
if messages[i].get("role") == "user":
return i
return -1
def _ensure_last_user_message_in_tail(
self,
messages: List[Dict[str, Any]],
cut_idx: int,
head_end: int,
) -> int:
"""Guarantee the most recent user message is in the protected tail.
Context compressor bug (#10896): ``_align_boundary_backward`` can pull
``cut_idx`` past a user message when it tries to keep tool_call/result
groups together. If the last user message ends up in the *compressed*
middle region the LLM summariser writes it into "Pending User Asks",
but ``SUMMARY_PREFIX`` tells the next model to respond only to user
messages *after* the summary so the task effectively disappears from
the active context, causing the agent to stall, repeat completed work,
or silently drop the user's latest request.
Fix: if the last user-role message is not already in the tail
(``messages[cut_idx:]``), walk ``cut_idx`` back to include it. We
then re-align backward one more time to avoid splitting any
tool_call/result group that immediately precedes the user message.
"""
last_user_idx = self._find_last_user_message_idx(messages, head_end)
if last_user_idx < 0:
# No user message found beyond head — nothing to anchor.
return cut_idx
if last_user_idx >= cut_idx:
# Already in the tail; nothing to do.
return cut_idx
# The last user message is in the middle (compressed) region.
# Pull cut_idx back to it directly — a user message is already a
# clean boundary (no tool_call/result splitting risk), so there is no
# need to call _align_boundary_backward here; doing so would
# unnecessarily pull the cut further back into the preceding
# assistant + tool_calls group.
if not self.quiet_mode:
logger.debug(
"Anchoring tail cut to last user message at index %d "
"(was %d) to prevent active-task loss after compression",
last_user_idx,
cut_idx,
)
# Safety: never go back into the head region.
return max(last_user_idx, head_end + 1)
def _find_tail_cut_by_tokens(
self, messages: List[Dict[str, Any]], head_end: int,
token_budget: int | None = None,
@ -879,7 +946,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio
read, etc.). If even the minimum 3 messages exceed 1.5x the budget
the cut is placed right after the head so compression still runs.
Never cuts inside a tool_call/result group.
Never cuts inside a tool_call/result group. Always ensures the most
recent user message is in the tail (see ``_ensure_last_user_message_in_tail``).
"""
if token_budget is None:
token_budget = self.tail_token_budget
@ -918,6 +986,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
# Align to avoid splitting tool groups
cut_idx = self._align_boundary_backward(messages, cut_idx)
# Ensure the most recent user message is always in the tail so the
# active task is never lost to compression (fixes #10896).
cut_idx = self._ensure_last_user_message_in_tail(messages, cut_idx, head_end)
return max(cut_idx, head_end + 1)
# ------------------------------------------------------------------

View file

@ -993,84 +993,4 @@ def get_cute_tool_message(
# Honcho session line (one-liner with clickable OSC 8 hyperlink)
# =========================================================================
_DIM = "\033[2m"
_SKY_BLUE = "\033[38;5;117m"
_ANSI_RESET = "\033[0m"
# =========================================================================
# Context pressure display (CLI user-facing warnings)
# =========================================================================
# ANSI color codes for context pressure tiers
_CYAN = "\033[36m"
_YELLOW = "\033[33m"
_BOLD = "\033[1m"
_DIM_ANSI = "\033[2m"
# Bar characters
_BAR_FILLED = ""
_BAR_EMPTY = ""
_BAR_WIDTH = 20
def format_context_pressure(
compaction_progress: float,
threshold_tokens: int,
threshold_percent: float,
compression_enabled: bool = True,
) -> str:
"""Build a formatted context pressure line for CLI display.
The bar and percentage show progress toward the compaction threshold,
NOT the raw context window. 100% = compaction fires.
Args:
compaction_progress: How close to compaction (0.01.0, 1.0 = fires).
threshold_tokens: Compaction threshold in tokens.
threshold_percent: Compaction threshold as a fraction of context window.
compression_enabled: Whether auto-compression is active.
"""
pct_int = min(int(compaction_progress * 100), 100)
filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)
threshold_k = f"{threshold_tokens // 1000}k" if threshold_tokens >= 1000 else str(threshold_tokens)
threshold_pct_int = int(threshold_percent * 100)
color = f"{_BOLD}{_YELLOW}"
icon = ""
if compression_enabled:
hint = "compaction approaching"
else:
hint = "no auto-compaction"
return (
f" {color}{icon} context {bar} {pct_int}% to compaction{_ANSI_RESET}"
f" {_DIM_ANSI}{threshold_k} threshold ({threshold_pct_int}%) · {hint}{_ANSI_RESET}"
)
def format_context_pressure_gateway(
compaction_progress: float,
threshold_percent: float,
compression_enabled: bool = True,
) -> str:
"""Build a plain-text context pressure notification for messaging platforms.
No ANSI just Unicode and plain text suitable for Telegram/Discord/etc.
The percentage shows progress toward the compaction threshold.
"""
pct_int = min(int(compaction_progress * 100), 100)
filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)
threshold_pct_int = int(threshold_percent * 100)
icon = "⚠️"
if compression_enabled:
hint = f"Context compaction approaching (threshold: {threshold_pct_int}% of window)."
else:
hint = "Auto-compaction is disabled — context may be truncated."
return f"{icon} Context: {bar} {pct_int}% to compaction\n{hint}"

27
cli.py
View file

@ -439,14 +439,27 @@ def load_cli_config() -> Dict[str, Any]:
# filesystem is directly accessible. For ALL remote/container backends
# (ssh, docker, modal, singularity), the host path doesn't exist on the
# target -- remove the key so terminal_tool.py uses its per-backend default.
if terminal_config.get("cwd") in (".", "auto", "cwd"):
effective_backend = terminal_config.get("env_type", "local")
if effective_backend == "local":
terminal_config["cwd"] = os.getcwd()
defaults["terminal"]["cwd"] = terminal_config["cwd"]
#
# GUARD: If TERMINAL_CWD is already set to a real absolute path (by the
# gateway's config bridge earlier in the process), don't clobber it.
# This prevents a lazy import of cli.py during gateway runtime from
# rewriting TERMINAL_CWD to the service's working directory.
# See issue #10817.
_CWD_PLACEHOLDERS = (".", "auto", "cwd")
if terminal_config.get("cwd") in _CWD_PLACEHOLDERS:
_existing_cwd = os.environ.get("TERMINAL_CWD", "")
if _existing_cwd and _existing_cwd not in _CWD_PLACEHOLDERS and os.path.isabs(_existing_cwd):
# Gateway (or earlier startup) already resolved a real path — keep it
terminal_config["cwd"] = _existing_cwd
defaults["terminal"]["cwd"] = _existing_cwd
else:
# Remove so TERMINAL_CWD stays unset → tool picks backend default
terminal_config.pop("cwd", None)
effective_backend = terminal_config.get("env_type", "local")
if effective_backend == "local":
terminal_config["cwd"] = os.getcwd()
defaults["terminal"]["cwd"] = terminal_config["cwd"]
else:
# Remove so TERMINAL_CWD stays unset → tool picks backend default
terminal_config.pop("cwd", None)
env_mappings = {
"env_type": "TERMINAL_ENV",

View file

@ -979,6 +979,13 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
delivery_error = str(de)
logger.error("Delivery failed for job %s: %s", job["id"], de)
# Treat empty final_response as a soft failure so last_status
# is not "ok" — the agent ran but produced nothing useful.
# (issue #8585)
if success and not final_response:
success = False
error = "Agent completed but produced empty response (model error, timeout, or misconfiguration)"
mark_job_run(job["id"], success, error, delivery_error=delivery_error)
executed += 1

View file

@ -11,6 +11,7 @@ import asyncio
import json
import logging
import os
import html as _html
import re
from typing import Dict, List, Optional, Any
@ -1129,13 +1130,10 @@ class TelegramAdapter(BasePlatformAdapter):
try:
cmd_preview = command[:3800] + "..." if len(command) > 3800 else command
# Escape backticks that would break Markdown v1 inline code parsing
safe_cmd = cmd_preview.replace("`", "'")
safe_desc = description.replace("`", "'").replace("*", "")
text = (
f"⚠️ *Command Approval Required*\n\n"
f"`{safe_cmd}`\n\n"
f"Reason: {safe_desc}"
f"⚠️ <b>Command Approval Required</b>\n\n"
f"<pre>{_html.escape(cmd_preview)}</pre>\n\n"
f"Reason: {_html.escape(description)}"
)
# Resolve thread context for thread replies
@ -1163,7 +1161,7 @@ class TelegramAdapter(BasePlatformAdapter):
kwargs: Dict[str, Any] = {
"chat_id": int(chat_id),
"text": text,
"parse_mode": ParseMode.MARKDOWN,
"parse_mode": ParseMode.HTML,
"reply_markup": keyboard,
**self._link_preview_kwargs(),
}

View file

@ -131,6 +131,12 @@ if _config_path.exists():
for _cfg_key, _env_var in _terminal_env_map.items():
if _cfg_key in _terminal_cfg:
_val = _terminal_cfg[_cfg_key]
# Skip cwd placeholder values (".", "auto", "cwd") — the
# gateway resolves these to Path.home() later (line ~255).
# Writing the raw placeholder here would just be noise.
# Only bridge explicit absolute paths from config.yaml.
if _cfg_key == "cwd" and str(_val) in (".", "auto", "cwd"):
continue
if isinstance(_val, list):
os.environ[_env_var] = json.dumps(_val)
else:
@ -225,6 +231,13 @@ try:
except Exception:
pass
# Warn if user has deprecated MESSAGING_CWD / TERMINAL_CWD in .env
try:
from hermes_cli.config import warn_deprecated_cwd_env_vars
warn_deprecated_cwd_env_vars()
except Exception:
pass
# Gateway runs in quiet mode - suppress debug output and use cwd directly (no temp dirs)
os.environ["HERMES_QUIET"] = "1"
@ -232,12 +245,14 @@ os.environ["HERMES_QUIET"] = "1"
os.environ["HERMES_EXEC_ASK"] = "1"
# Set terminal working directory for messaging platforms.
# If the user set an explicit path in config.yaml (not "." or "auto"),
# respect it. Otherwise use MESSAGING_CWD or default to home directory.
# config.yaml terminal.cwd is the canonical source (bridged to TERMINAL_CWD
# by the config bridge above). When it's unset or a placeholder, default
# to home directory. MESSAGING_CWD is accepted as a backward-compat
# fallback (deprecated — the warning above tells users to migrate).
_configured_cwd = os.environ.get("TERMINAL_CWD", "")
if not _configured_cwd or _configured_cwd in (".", "auto", "cwd"):
messaging_cwd = os.getenv("MESSAGING_CWD") or str(Path.home())
os.environ["TERMINAL_CWD"] = messaging_cwd
_fallback = os.getenv("MESSAGING_CWD") or str(Path.home())
os.environ["TERMINAL_CWD"] = _fallback
from gateway.config import (
Platform,
@ -762,69 +777,72 @@ class GatewayRunner:
enabled_toolsets=["memory", "skills"],
session_id=old_session_id,
)
# Fully silence the flush agent — quiet_mode only suppresses init
# messages; tool call output still leaks to the terminal through
# _safe_print → _print_fn. Set a no-op to prevent that.
tmp_agent._print_fn = lambda *a, **kw: None
# Build conversation history from transcript
msgs = [
{"role": m.get("role"), "content": m.get("content")}
for m in history
if m.get("role") in ("user", "assistant") and m.get("content")
]
# Read live memory state from disk so the flush agent can see
# what's already saved and avoid overwriting newer entries.
_current_memory = ""
try:
from tools.memory_tool import get_memory_dir
_mem_dir = get_memory_dir()
for fname, label in [
("MEMORY.md", "MEMORY (your personal notes)"),
("USER.md", "USER PROFILE (who the user is)"),
]:
fpath = _mem_dir / fname
if fpath.exists():
content = fpath.read_text(encoding="utf-8").strip()
if content:
_current_memory += f"\n\n## Current {label}:\n{content}"
except Exception:
pass # Non-fatal — flush still works, just without the guard
# Fully silence the flush agent — quiet_mode only suppresses init
# messages; tool call output still leaks to the terminal through
# _safe_print → _print_fn. Set a no-op to prevent that.
tmp_agent._print_fn = lambda *a, **kw: None
# Give the agent a real turn to think about what to save
flush_prompt = (
"[System: This session is about to be automatically reset due to "
"inactivity or a scheduled daily reset. The conversation context "
"will be cleared after this turn.\n\n"
"Review the conversation above and:\n"
"1. Save any important facts, preferences, or decisions to memory "
"(user profile or your notes) that would be useful in future sessions.\n"
"2. If you discovered a reusable workflow or solved a non-trivial "
"problem, consider saving it as a skill.\n"
"3. If nothing is worth saving, that's fine — just skip.\n\n"
)
# Build conversation history from transcript
msgs = [
{"role": m.get("role"), "content": m.get("content")}
for m in history
if m.get("role") in ("user", "assistant") and m.get("content")
]
if _current_memory:
flush_prompt += (
"IMPORTANT — here is the current live state of memory. Other "
"sessions, cron jobs, or the user may have updated it since this "
"conversation ended. Do NOT overwrite or remove entries unless "
"the conversation above reveals something that genuinely "
"supersedes them. Only add new information that is not already "
"captured below."
f"{_current_memory}\n\n"
# Read live memory state from disk so the flush agent can see
# what's already saved and avoid overwriting newer entries.
_current_memory = ""
try:
from tools.memory_tool import get_memory_dir
_mem_dir = get_memory_dir()
for fname, label in [
("MEMORY.md", "MEMORY (your personal notes)"),
("USER.md", "USER PROFILE (who the user is)"),
]:
fpath = _mem_dir / fname
if fpath.exists():
content = fpath.read_text(encoding="utf-8").strip()
if content:
_current_memory += f"\n\n## Current {label}:\n{content}"
except Exception:
pass # Non-fatal — flush still works, just without the guard
# Give the agent a real turn to think about what to save
flush_prompt = (
"[System: This session is about to be automatically reset due to "
"inactivity or a scheduled daily reset. The conversation context "
"will be cleared after this turn.\n\n"
"Review the conversation above and:\n"
"1. Save any important facts, preferences, or decisions to memory "
"(user profile or your notes) that would be useful in future sessions.\n"
"2. If you discovered a reusable workflow or solved a non-trivial "
"problem, consider saving it as a skill.\n"
"3. If nothing is worth saving, that's fine — just skip.\n\n"
)
flush_prompt += (
"Do NOT respond to the user. Just use the memory and skill_manage "
"tools if needed, then stop.]"
)
if _current_memory:
flush_prompt += (
"IMPORTANT — here is the current live state of memory. Other "
"sessions, cron jobs, or the user may have updated it since this "
"conversation ended. Do NOT overwrite or remove entries unless "
"the conversation above reveals something that genuinely "
"supersedes them. Only add new information that is not already "
"captured below."
f"{_current_memory}\n\n"
)
tmp_agent.run_conversation(
user_message=flush_prompt,
conversation_history=msgs,
)
flush_prompt += (
"Do NOT respond to the user. Just use the memory and skill_manage "
"tools if needed, then stop.]"
)
tmp_agent.run_conversation(
user_message=flush_prompt,
conversation_history=msgs,
)
finally:
self._cleanup_agent_resources(tmp_agent)
logger.info("Pre-reset memory flush completed for session %s", old_session_id)
except Exception as e:
logger.debug("Pre-reset memory flush failed for session %s: %s", old_session_id, e)
@ -1562,19 +1580,25 @@ class GatewayRunner:
)
except Exception:
pass
try:
if hasattr(agent, "shutdown_memory_provider"):
agent.shutdown_memory_provider()
except Exception:
pass
# Close tool resources (terminal sandboxes, browser daemons,
# background processes, httpx clients) to prevent zombie
# process accumulation.
try:
if hasattr(agent, 'close'):
agent.close()
except Exception:
pass
self._cleanup_agent_resources(agent)
def _cleanup_agent_resources(self, agent: Any) -> None:
"""Best-effort cleanup for temporary or cached agent instances."""
if agent is None:
return
try:
if hasattr(agent, "shutdown_memory_provider"):
agent.shutdown_memory_provider()
except Exception:
pass
# Close tool resources (terminal sandboxes, browser daemons,
# background processes, httpx clients) to prevent zombie
# process accumulation.
try:
if hasattr(agent, "close"):
agent.close()
except Exception:
pass
_STUCK_LOOP_THRESHOLD = 3 # restarts while active before auto-suspend
_STUCK_LOOP_FILE = ".restart_failure_counts"
@ -2077,16 +2101,7 @@ class GatewayRunner:
if _cached_agent is None:
_cached_agent = self._running_agents.get(key)
if _cached_agent and _cached_agent is not _AGENT_PENDING_SENTINEL:
try:
if hasattr(_cached_agent, 'shutdown_memory_provider'):
_cached_agent.shutdown_memory_provider()
except Exception:
pass
try:
if hasattr(_cached_agent, 'close'):
_cached_agent.close()
except Exception:
pass
self._cleanup_agent_resources(_cached_agent)
# Mark as flushed and persist to disk so the flag
# survives gateway restarts.
with self.session_store._lock:
@ -3410,7 +3425,7 @@ class GatewayRunner:
from agent.context_references import preprocess_context_references_async
from agent.model_metadata import get_model_context_length
_msg_cwd = os.environ.get("MESSAGING_CWD", os.path.expanduser("~"))
_msg_cwd = os.environ.get("TERMINAL_CWD", os.path.expanduser("~"))
_msg_ctx_len = get_model_context_length(
self._model,
base_url=self._base_url or "",
@ -3782,51 +3797,54 @@ class GatewayRunner:
enabled_toolsets=["memory"],
session_id=session_entry.session_id,
)
_hyg_agent._print_fn = lambda *a, **kw: None
try:
_hyg_agent._print_fn = lambda *a, **kw: None
loop = asyncio.get_running_loop()
_compressed, _ = await loop.run_in_executor(
None,
lambda: _hyg_agent._compress_context(
_hyg_msgs, "",
approx_tokens=_approx_tokens,
),
)
# _compress_context ends the old session and creates
# a new session_id. Write compressed messages into
# the NEW session so the old transcript stays intact
# and searchable via session_search.
_hyg_new_sid = _hyg_agent.session_id
if _hyg_new_sid != session_entry.session_id:
session_entry.session_id = _hyg_new_sid
self.session_store._save()
self.session_store.rewrite_transcript(
session_entry.session_id, _compressed
)
# Reset stored token count — transcript was rewritten
session_entry.last_prompt_tokens = 0
history = _compressed
_new_count = len(_compressed)
_new_tokens = estimate_messages_tokens_rough(
_compressed
)
logger.info(
"Session hygiene: compressed %s%s msgs, "
"~%s → ~%s tokens",
_msg_count, _new_count,
f"{_approx_tokens:,}", f"{_new_tokens:,}",
)
if _new_tokens >= _warn_token_threshold:
logger.warning(
"Session hygiene: still ~%s tokens after "
"compression",
f"{_new_tokens:,}",
loop = asyncio.get_running_loop()
_compressed, _ = await loop.run_in_executor(
None,
lambda: _hyg_agent._compress_context(
_hyg_msgs, "",
approx_tokens=_approx_tokens,
),
)
# _compress_context ends the old session and creates
# a new session_id. Write compressed messages into
# the NEW session so the old transcript stays intact
# and searchable via session_search.
_hyg_new_sid = _hyg_agent.session_id
if _hyg_new_sid != session_entry.session_id:
session_entry.session_id = _hyg_new_sid
self.session_store._save()
self.session_store.rewrite_transcript(
session_entry.session_id, _compressed
)
# Reset stored token count — transcript was rewritten
session_entry.last_prompt_tokens = 0
history = _compressed
_new_count = len(_compressed)
_new_tokens = estimate_messages_tokens_rough(
_compressed
)
logger.info(
"Session hygiene: compressed %s%s msgs, "
"~%s → ~%s tokens",
_msg_count, _new_count,
f"{_approx_tokens:,}", f"{_new_tokens:,}",
)
if _new_tokens >= _warn_token_threshold:
logger.warning(
"Session hygiene: still ~%s tokens after "
"compression",
f"{_new_tokens:,}",
)
finally:
self._cleanup_agent_resources(_hyg_agent)
except Exception as e:
logger.warning(
"Session hygiene auto-compress failed: %s", e
@ -4344,16 +4362,7 @@ class GatewayRunner:
_cached = self._agent_cache.get(session_key)
_old_agent = _cached[0] if isinstance(_cached, tuple) else _cached if _cached else None
if _old_agent is not None:
try:
if hasattr(_old_agent, "shutdown_memory_provider"):
_old_agent.shutdown_memory_provider()
except Exception:
pass
try:
if hasattr(_old_agent, "close"):
_old_agent.close()
except Exception:
pass
self._cleanup_agent_resources(_old_agent)
self._evict_cached_agent(session_key)
try:
@ -5717,7 +5726,7 @@ class GatewayRunner:
max_snapshots=cp_cfg.get("max_snapshots", 50),
)
cwd = os.getenv("MESSAGING_CWD", str(Path.home()))
cwd = os.getenv("TERMINAL_CWD", str(Path.home()))
arg = event.get_command_args().strip()
if not arg:
@ -5838,11 +5847,13 @@ class GatewayRunner:
session_db=self._session_db,
fallback_model=self._fallback_model,
)
return agent.run_conversation(
user_message=prompt,
task_id=task_id,
)
try:
return agent.run_conversation(
user_message=prompt,
task_id=task_id,
)
finally:
self._cleanup_agent_resources(agent)
result = await self._run_in_executor_with_context(run_sync)
@ -6020,11 +6031,14 @@ class GatewayRunner:
skip_context_files=True,
persist_session=False,
)
return agent.run_conversation(
user_message=btw_prompt,
conversation_history=history_snapshot,
task_id=task_id,
)
try:
return agent.run_conversation(
user_message=btw_prompt,
conversation_history=history_snapshot,
task_id=task_id,
)
finally:
self._cleanup_agent_resources(agent)
result = await self._run_in_executor_with_context(run_sync)
@ -6353,42 +6367,45 @@ class GatewayRunner:
enabled_toolsets=["memory"],
session_id=session_entry.session_id,
)
tmp_agent._print_fn = lambda *a, **kw: None
try:
tmp_agent._print_fn = lambda *a, **kw: None
compressor = tmp_agent.context_compressor
compress_start = compressor.protect_first_n
compress_start = compressor._align_boundary_forward(msgs, compress_start)
compress_end = compressor._find_tail_cut_by_tokens(msgs, compress_start)
if compress_start >= compress_end:
return "Nothing to compress yet (the transcript is still all protected context)."
compressor = tmp_agent.context_compressor
compress_start = compressor.protect_first_n
compress_start = compressor._align_boundary_forward(msgs, compress_start)
compress_end = compressor._find_tail_cut_by_tokens(msgs, compress_start)
if compress_start >= compress_end:
return "Nothing to compress yet (the transcript is still all protected context)."
loop = asyncio.get_running_loop()
compressed, _ = await loop.run_in_executor(
None,
lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens, focus_topic=focus_topic)
)
loop = asyncio.get_running_loop()
compressed, _ = await loop.run_in_executor(
None,
lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens, focus_topic=focus_topic)
)
# _compress_context already calls end_session() on the old session
# (preserving its full transcript in SQLite) and creates a new
# session_id for the continuation. Write the compressed messages
# into the NEW session so the original history stays searchable.
new_session_id = tmp_agent.session_id
if new_session_id != session_entry.session_id:
session_entry.session_id = new_session_id
self.session_store._save()
# _compress_context already calls end_session() on the old session
# (preserving its full transcript in SQLite) and creates a new
# session_id for the continuation. Write the compressed messages
# into the NEW session so the original history stays searchable.
new_session_id = tmp_agent.session_id
if new_session_id != session_entry.session_id:
session_entry.session_id = new_session_id
self.session_store._save()
self.session_store.rewrite_transcript(new_session_id, compressed)
# Reset stored token count — transcript changed, old value is stale
self.session_store.update_session(
session_entry.session_key, last_prompt_tokens=0
)
new_tokens = estimate_messages_tokens_rough(compressed)
summary = summarize_manual_compression(
msgs,
compressed,
approx_tokens,
new_tokens,
)
self.session_store.rewrite_transcript(new_session_id, compressed)
# Reset stored token count — transcript changed, old value is stale
self.session_store.update_session(
session_entry.session_key, last_prompt_tokens=0
)
new_tokens = estimate_messages_tokens_rough(compressed)
summary = summarize_manual_compression(
msgs,
compressed,
approx_tokens,
new_tokens,
)
finally:
self._cleanup_agent_resources(tmp_agent)
lines = [f"🗜️ {summary['headline']}"]
if focus_topic:
lines.append(f"Focus: \"{focus_topic}\"")
@ -8896,7 +8913,7 @@ class GatewayRunner:
# false positives from MagicMock auto-attribute creation in tests.
if getattr(type(_status_adapter), "send_exec_approval", None) is not None:
try:
asyncio.run_coroutine_threadsafe(
_approval_result = asyncio.run_coroutine_threadsafe(
_status_adapter.send_exec_approval(
chat_id=_status_chat_id,
command=cmd,
@ -8906,7 +8923,12 @@ class GatewayRunner:
),
_loop_for_step,
).result(timeout=15)
return
if _approval_result.success:
return
logger.warning(
"Button-based approval failed (send returned error), falling back to text: %s",
_approval_result.error,
)
except Exception as _e:
logger.warning(
"Button-based approval failed, falling back to text: %s", _e
@ -9536,6 +9558,7 @@ class GatewayRunner:
next_source = source
next_message = pending
next_message_id = None
next_channel_prompt = None
if pending_event is not None:
next_source = getattr(pending_event, "source", None) or source
next_message = await self._prepare_inbound_message_text(
@ -9546,6 +9569,7 @@ class GatewayRunner:
if next_message is None:
return result
next_message_id = getattr(pending_event, "message_id", None)
next_channel_prompt = getattr(pending_event, "channel_prompt", None)
# Restart typing indicator so the user sees activity while
# the follow-up turn runs. The outer _process_message_background
@ -9569,7 +9593,7 @@ class GatewayRunner:
session_key=session_key,
_interrupt_depth=_interrupt_depth + 1,
event_message_id=next_message_id,
channel_prompt=pending_event.channel_prompt,
channel_prompt=next_channel_prompt,
)
finally:
# Stop progress sender, interrupt monitor, and notification task

View file

@ -515,9 +515,17 @@ class GatewayStreamConsumer:
self._fallback_final_send = False
if not continuation.strip():
# Nothing new to send — the visible partial already matches final text.
self._already_sent = True
self._final_response_sent = True
return
# BUT: if final_text itself has meaningful content (e.g. a timeout
# message after a long tool call), the prefix-based continuation
# calculation may wrongly conclude "already shown" because the
# streamed prefix was from a *previous* segment (before the tool
# boundary). In that case, send the full final_text as-is (#10807).
if final_text.strip() and final_text != self._visible_prefix():
continuation = final_text
else:
self._already_sent = True
self._final_response_sent = True
return
raw_limit = getattr(self.adapter, "MAX_MESSAGE_LENGTH", 4096)
safe_limit = max(500, raw_limit - 100)

View file

@ -1597,13 +1597,8 @@ OPTIONAL_ENV_VARS = {
},
# ── Agent settings ──
"MESSAGING_CWD": {
"description": "Working directory for terminal commands via messaging",
"prompt": "Messaging working directory (default: home)",
"url": None,
"password": False,
"category": "setting",
},
# NOTE: MESSAGING_CWD was removed here — use terminal.cwd in config.yaml
# instead. The gateway reads TERMINAL_CWD (bridged from terminal.cwd).
"SUDO_PASSWORD": {
"description": "Sudo password for terminal commands requiring root access; set to an explicit empty string to try empty without prompting",
"prompt": "Sudo password",
@ -2082,6 +2077,52 @@ def print_config_warnings(config: Optional[Dict[str, Any]] = None) -> None:
sys.stderr.write("\n".join(lines) + "\n\n")
def warn_deprecated_cwd_env_vars(config: Optional[Dict[str, Any]] = None) -> None:
"""Warn if MESSAGING_CWD or TERMINAL_CWD is set in .env instead of config.yaml.
These env vars are deprecated the canonical setting is terminal.cwd
in config.yaml. Prints a migration hint to stderr.
"""
import os, sys
messaging_cwd = os.environ.get("MESSAGING_CWD")
terminal_cwd_env = os.environ.get("TERMINAL_CWD")
if config is None:
try:
config = load_config()
except Exception:
return
terminal_cfg = config.get("terminal", {})
config_cwd = terminal_cfg.get("cwd", ".") if isinstance(terminal_cfg, dict) else "."
# Only warn if config.yaml doesn't have an explicit path
config_has_explicit_cwd = config_cwd not in (".", "auto", "cwd", "")
lines: list[str] = []
if messaging_cwd:
lines.append(
f" \033[33m⚠\033[0m MESSAGING_CWD={messaging_cwd} found in .env — "
f"this is deprecated."
)
if terminal_cwd_env and not config_has_explicit_cwd:
# TERMINAL_CWD in env but not from config bridge — likely from .env
lines.append(
f" \033[33m⚠\033[0m TERMINAL_CWD={terminal_cwd_env} found in .env — "
f"this is deprecated."
)
if lines:
hint_path = os.environ.get("HERMES_HOME", "~/.hermes")
lines.insert(0, "\033[33m⚠ Deprecated .env settings detected:\033[0m")
lines.append(
f" \033[2mMove to config.yaml instead: "
f"terminal:\\n cwd: /your/project/path\033[0m"
)
lines.append(
f" \033[2mThen remove the old entries from {hint_path}/.env\033[0m"
)
sys.stderr.write("\n".join(lines) + "\n\n")
def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, Any]:
"""
Migrate config to latest version, prompting for new required fields.

View file

@ -5998,6 +5998,18 @@ Examples:
memory_sub.add_parser("setup", help="Interactive provider selection and configuration")
memory_sub.add_parser("status", help="Show current memory provider config")
memory_sub.add_parser("off", help="Disable external provider (built-in only)")
_reset_parser = memory_sub.add_parser(
"reset",
help="Erase all built-in memory (MEMORY.md and USER.md)",
)
_reset_parser.add_argument(
"--yes", "-y", action="store_true",
help="Skip confirmation prompt",
)
_reset_parser.add_argument(
"--target", choices=["all", "memory", "user"], default="all",
help="Which store to reset: 'all' (default), 'memory', or 'user'",
)
def cmd_memory(args):
sub = getattr(args, "memory_command", None)
@ -6010,6 +6022,44 @@ Examples:
save_config(config)
print("\n ✓ Memory provider: built-in only")
print(" Saved to config.yaml\n")
elif sub == "reset":
from hermes_constants import get_hermes_home, display_hermes_home
mem_dir = get_hermes_home() / "memories"
target = getattr(args, "target", "all")
files_to_reset = []
if target in ("all", "memory"):
files_to_reset.append(("MEMORY.md", "agent notes"))
if target in ("all", "user"):
files_to_reset.append(("USER.md", "user profile"))
# Check what exists
existing = [(f, desc) for f, desc in files_to_reset if (mem_dir / f).exists()]
if not existing:
print(f"\n Nothing to reset — no memory files found in {display_hermes_home()}/memories/\n")
return
print(f"\n This will permanently erase the following memory files:")
for f, desc in existing:
path = mem_dir / f
size = path.stat().st_size
print(f"{f} ({desc}) — {size:,} bytes")
if not getattr(args, "yes", False):
try:
answer = input("\n Type 'yes' to confirm: ").strip().lower()
except (EOFError, KeyboardInterrupt):
print("\n Cancelled.\n")
return
if answer != "yes":
print(" Cancelled.\n")
return
for f, desc in existing:
(mem_dir / f).unlink()
print(f" ✓ Deleted {f} ({desc})")
print(f"\n Memory reset complete. New sessions will start with a blank slate.")
print(f" Files were in: {display_hermes_home()}/memories/\n")
else:
from hermes_cli.memory_setup import memory_command
memory_command(args)

View file

@ -807,6 +807,10 @@ def list_authenticated_providers(
# "nous" shares OpenRouter's curated list if not separately defined
if "nous" not in curated:
curated["nous"] = curated["openrouter"]
# Ollama Cloud uses dynamic discovery (no static curated list)
if "ollama-cloud" not in curated:
from hermes_cli.models import fetch_ollama_cloud_models
curated["ollama-cloud"] = fetch_ollama_cloud_models()
# --- 1. Check Hermes-mapped providers ---
for hermes_id, mdev_id in PROVIDER_TO_MODELS_DEV.items():

View file

@ -1044,7 +1044,7 @@ def detect_provider_for_model(
return (resolved_provider, default_models[0])
# Aggregators list other providers' models — never auto-switch TO them
_AGGREGATORS = {"nous", "openrouter"}
_AGGREGATORS = {"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}
# If the model belongs to the current provider's catalog, don't suggest switching
current_models = _PROVIDER_MODELS.get(current_provider, [])
@ -1286,6 +1286,10 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
live = _fetch_ai_gateway_models()
if live:
return live
if normalized == "ollama-cloud":
live = fetch_ollama_cloud_models(force_refresh=force_refresh)
if live:
return live
if normalized == "custom":
base_url = _get_custom_base_url()
if base_url:

View file

@ -708,7 +708,9 @@ def init_skin_from_config(config: dict) -> None:
Call this once during CLI init with the loaded config dict.
"""
display = config.get("display", {})
display = config.get("display") or {}
if not isinstance(display, dict):
display = {}
skin_name = display.get("skin", "default")
if isinstance(skin_name, str) and skin_name.strip():
set_active_skin(skin_name.strip())

Binary file not shown.

Before

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 870 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.9 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 134 KiB

View file

@ -1,665 +0,0 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Hermes Agent — An Agent That Grows With You</title>
<meta
name="description"
content="An open-source agent that grows with you — learns your projects, builds its own skills, and reaches you wherever you are. By Nous Research."
/>
<meta name="theme-color" content="#0A0E1A" />
<meta property="og:title" content="Hermes Agent — AI Agent Framework" />
<meta
property="og:description"
content="An open-source agent that grows with you. Install it, give it your messaging accounts, and it becomes a persistent personal agent — learning your projects, building its own skills, and reaching you wherever you are."
/>
<meta property="og:type" content="website" />
<meta property="og:url" content="https://hermes-agent.nousresearch.com" />
<meta
property="og:image"
content="https://hermes-agent.nousresearch.com/hermes-agent-banner.png"
/>
<link rel="preconnect" href="https://fonts.googleapis.com" />
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
<link
href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap"
rel="stylesheet"
/>
<script
src="https://cdnjs.cloudflare.com/ajax/libs/three.js/r128/three.min.js"
defer
></script>
<link rel="stylesheet" href="style.css" />
<link rel="icon" type="image/x-icon" href="favicon.ico" />
<link rel="icon" type="image/png" sizes="32x32" href="favicon-32x32.png" />
<link rel="icon" type="image/png" sizes="16x16" href="favicon-16x16.png" />
<link rel="apple-touch-icon" sizes="180x180" href="apple-touch-icon.png" />
</head>
<body>
<canvas id="noise-overlay"></canvas>
<div class="ambient-glow glow-1"></div>
<div class="ambient-glow glow-2"></div>
<nav class="nav">
<div class="nav-inner">
<a href="#" class="nav-logo">
<img src="nous-logo.png" alt="Nous Research" class="nav-nous-logo" />
<span class="nav-brand"
>Hermes Agent <span class="nav-by">by Nous Research</span></span
>
</a>
<div class="nav-links">
<a href="#install">Install</a>
<a href="#features">Features</a>
<a href="/docs/">Docs</a>
<a
href="https://github.com/NousResearch/hermes-agent"
target="_blank"
rel="noopener"
>GitHub</a
>
<a
href="https://discord.gg/NousResearch"
target="_blank"
rel="noopener"
>Discord</a
>
</div>
<button
class="nav-hamburger"
id="nav-hamburger"
onclick="toggleMobileNav()"
aria-label="Toggle menu"
>
<span class="hamburger-bar"></span>
<span class="hamburger-bar"></span>
<span class="hamburger-bar"></span>
</button>
<div class="nav-mobile" id="nav-mobile">
<a href="#install" onclick="toggleMobileNav()">Install</a>
<a href="#features" onclick="toggleMobileNav()">Features</a>
<a href="/docs/">Docs</a>
<a
href="https://github.com/NousResearch/hermes-agent"
target="_blank"
rel="noopener"
>GitHub</a
>
<a
href="https://discord.gg/NousResearch"
target="_blank"
rel="noopener"
>Discord</a
>
</div>
</div>
</nav>
<section class="hero">
<div class="hero-content">
<div class="hero-badge">
<span class="badge-dot"></span>
Open Source &bull; MIT License
</div>
<!-- prettier-ignore -->
<pre class="hero-ascii" aria-hidden="true" style="font-family: monospace; line-height: 1.1">
██╗ ██╗███████╗██████╗ ███╗ ███╗███████╗███████╗ █████╗ ██████╗ ███████╗███╗ ██╗████████╗
██║ ██║██╔════╝██╔══██╗████╗ ████║██╔════╝██╔════╝ ██╔══██╗██╔════╝ ██╔════╝████╗ ██║╚══██╔══╝
███████║█████╗ ██████╔╝██╔████╔██║█████╗ ███████╗ ███████║██║ ███╗█████╗ ██╔██╗ ██║ ██║
██╔══██║██╔══╝ ██╔══██╗██║╚██╔╝██║██╔══╝ ╚════██║ ██╔══██║██║ ██║██╔══╝ ██║╚██╗██║ ██║
██║ ██║███████╗██║ ██║██║ ╚═╝ ██║███████╗███████║ ██║ ██║╚██████╔╝███████╗██║ ╚████║ ██║
╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝╚══════╝ ╚═╝ ╚═╝ ╚═════╝ ╚══════╝╚═╝ ╚═══╝ ╚═╝
</pre>
<h1 class="hero-title">
An agent that<br />
<span class="hero-gradient">grows with you.</span>
</h1>
<p class="hero-subtitle">
It's not a coding copilot tethered to an IDE or a chatbot wrapper
around a single API. It's an <strong>autonomous agent</strong> that
lives on your server, remembers what it learns, and gets more capable
the longer it runs.
</p>
<div class="hero-install">
<div class="install-widget">
<div class="install-widget-header">
<div class="install-dots">
<span class="dot dot-red"></span>
<span class="dot dot-yellow"></span>
<span class="dot dot-green"></span>
</div>
<div class="install-tabs">
<button
class="install-tab active"
data-platform="linux"
onclick="switchPlatform('linux')"
>
Linux / macOS / WSL
</button>
</div>
</div>
<div class="install-widget-body">
<span class="install-prompt" id="install-prompt">$</span>
<code id="install-command"
>curl -fsSL
https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh
| bash</code
>
<button
class="copy-btn"
onclick="copyInstall()"
title="Copy to clipboard"
>
<svg
width="16"
height="16"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
stroke-width="2"
stroke-linecap="round"
stroke-linejoin="round"
>
<rect x="9" y="9" width="13" height="13" rx="2" ry="2" />
<path
d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"
/>
</svg>
<span class="copy-text">Copy</span>
</button>
</div>
</div>
<p class="install-note" id="install-note">
Works on Linux, macOS & WSL2 · No prerequisites · Installs
everything automatically
</p>
</div>
<div class="hero-links">
<a
href="https://portal.nousresearch.com"
class="btn btn-primary"
target="_blank"
rel="noopener"
>
<svg
width="20"
height="20"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
stroke-width="2"
stroke-linecap="round"
stroke-linejoin="round"
>
<path d="M15 3h4a2 2 0 0 1 2 2v14a2 2 0 0 1-2 2h-4" />
<polyline points="10 17 15 12 10 7" />
<line x1="15" y1="12" x2="3" y2="12" />
</svg>
Sign Up on Nous Portal
</a>
</div>
</div>
</section>
<section class="section section-install" id="install">
<div class="container">
<div class="section-header">
<h2>Get started in 60 seconds</h2>
</div>
<div class="install-steps">
<div class="install-step">
<div class="step-number">1</div>
<div class="step-content">
<h4>Install</h4>
<div class="code-block">
<div class="code-header">
<div class="code-tabs">
<button
class="code-tab active"
data-platform="linux"
onclick="switchStepPlatform('linux')"
>
Linux / macOS / WSL
</button>
</div>
<button
class="copy-btn"
id="step1-copy"
onclick="copyText(this)"
data-text="curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash"
>
Copy
</button>
</div>
<pre><code id="step1-command">curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash</code></pre>
</div>
<p class="step-note" id="step1-note">
Installs uv, Python 3.11, clones the repo, sets up everything.
No sudo needed.
</p>
</div>
</div>
<div class="install-step">
<div class="step-number">2</div>
<div class="step-content">
<h4>Configure</h4>
<div class="code-block">
<div class="code-header">
<span>bash</span>
<button
class="copy-btn"
onclick="copyText(this)"
data-text="hermes setup"
>
Copy
</button>
</div>
<pre><code><span class="code-comment"># Interactive setup wizard</span>
hermes setup
<span class="code-comment"># Or choose your model</span>
hermes model</code></pre>
</div>
<p class="step-note">
Connect to Nous Portal (OAuth), OpenRouter (API key), or your
own endpoint.
</p>
</div>
</div>
<div class="install-step">
<div class="step-number">3</div>
<div class="step-content">
<h4>Start chatting</h4>
<div class="code-block">
<div class="code-header">
<span>bash</span>
<button
class="copy-btn"
onclick="copyText(this)"
data-text="hermes"
>
Copy
</button>
</div>
<pre><code>hermes</code></pre>
</div>
<p class="step-note">
That's it. Full interactive CLI with tools, memory, and skills.
</p>
</div>
</div>
<div class="install-step">
<div class="step-number">4</div>
<div class="step-content">
<h4>
Go multi-platform <span class="step-optional">(optional)</span>
</h4>
<div class="code-block">
<div class="code-header">
<span>bash</span>
<button
class="copy-btn"
onclick="copyText(this)"
data-text="hermes gateway setup"
>
Copy
</button>
</div>
<pre><code><span class="code-comment"># Interactive gateway setup wizard</span>
hermes gateway setup
<span class="code-comment"># Start the messaging gateway</span>
hermes gateway
<span class="code-comment"># Install as a system service</span>
hermes gateway install</code></pre>
</div>
<p class="step-note">
Walk through connecting Telegram, Discord, Slack, or WhatsApp.
Runs as a systemd service.
</p>
</div>
</div>
<div class="install-step">
<div class="step-number">5</div>
<div class="step-content">
<h4>Keep it up to date</h4>
<div class="code-block">
<div class="code-header">
<span>bash</span>
<button
class="copy-btn"
onclick="copyText(this)"
data-text="hermes update"
>
Copy
</button>
</div>
<pre><code>hermes update</code></pre>
</div>
<p class="step-note">
Pulls the latest changes and reinstalls dependencies. Run
anytime to get new features and fixes.
</p>
</div>
</div>
</div>
<div class="install-windows">
<p>
Native Windows support is extremely experimental and unsupported.
Please install
<a
href="https://learn.microsoft.com/en-us/windows/wsl/install"
target="_blank"
rel="noopener"
>WSL2</a
>
and run Hermes Agent from there.
</p>
</div>
</div>
</section>
<!-- Terminal Demo -->
<section class="section section-demo" id="demo">
<div class="container">
<div class="section-header">
<h2>See it in action</h2>
</div>
<div class="terminal-window">
<div class="terminal-header">
<div class="terminal-dots">
<span class="dot dot-red"></span>
<span class="dot dot-yellow"></span>
<span class="dot dot-green"></span>
</div>
<span class="terminal-title">hermes</span>
</div>
<div class="terminal-body" id="terminal-demo"></div>
</div>
</div>
</section>
<!-- Features + Specs -->
<section class="section" id="features">
<div class="container">
<div class="section-header">
<h2>Features</h2>
</div>
<div class="features-grid">
<div class="feature-card">
<div class="feature-header">
<div class="feature-icon">
<svg
width="20"
height="20"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
stroke-width="1.5"
stroke-linecap="round"
stroke-linejoin="round"
>
<path
d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z"
/>
</svg>
</div>
<h3>Lives Where You Do</h3>
</div>
<p>
Telegram, Discord, Slack, WhatsApp, and CLI from a single gateway
— start on one, pick up on another.
</p>
</div>
<div class="feature-card">
<div class="feature-header">
<div class="feature-icon">
<svg
width="20"
height="20"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
stroke-width="1.5"
stroke-linecap="round"
stroke-linejoin="round"
>
<polyline points="22 7 13.5 15.5 8.5 10.5 2 17" />
<polyline points="16 7 22 7 22 13" />
</svg>
</div>
<h3>Grows the Longer It Runs</h3>
</div>
<p>
Persistent memory and auto-generated skills — it learns your
projects and never forgets how it solved a problem.
</p>
</div>
<div class="feature-card">
<div class="feature-header">
<div class="feature-icon">
<svg
width="20"
height="20"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
stroke-width="1.5"
stroke-linecap="round"
stroke-linejoin="round"
>
<circle cx="12" cy="12" r="10" />
<polyline points="12 6 12 12 16 14" />
</svg>
</div>
<h3>Scheduled Automations</h3>
</div>
<p>
Natural language cron scheduling for reports, backups, and
briefings — running unattended through the gateway.
</p>
</div>
<div class="feature-card">
<div class="feature-header">
<div class="feature-icon">
<svg
width="20"
height="20"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
stroke-width="1.5"
stroke-linecap="round"
stroke-linejoin="round"
>
<circle cx="18" cy="18" r="3" />
<circle cx="6" cy="6" r="3" />
<path d="M6 21V9a9 9 0 0 0 9 9" />
<path d="M18 3v12a9 9 0 0 1-9-9" />
</svg>
</div>
<h3>Delegates & Parallelizes</h3>
</div>
<p>
Isolated subagents with their own conversations, terminals, and
Python RPC scripts for zero-context-cost pipelines.
</p>
</div>
<div class="feature-card">
<div class="feature-header">
<div class="feature-icon">
<svg
width="20"
height="20"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
stroke-width="1.5"
stroke-linecap="round"
stroke-linejoin="round"
>
<rect x="3" y="11" width="18" height="11" rx="2" ry="2" />
<path d="M7 11V7a5 5 0 0 1 10 0v4" />
</svg>
</div>
<h3>Real Sandboxing</h3>
</div>
<p>
Five backends — local, Docker, SSH, Singularity, Modal — with
container hardening and namespace isolation.
</p>
</div>
<div class="feature-card">
<div class="feature-header">
<div class="feature-icon">
<svg
width="20"
height="20"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
stroke-width="1.5"
stroke-linecap="round"
stroke-linejoin="round"
>
<circle cx="12" cy="12" r="10" />
<line x1="2" y1="12" x2="22" y2="12" />
<path
d="M12 2a15.3 15.3 0 0 1 4 10 15.3 15.3 0 0 1-4 10 15.3 15.3 0 0 1-4-10 15.3 15.3 0 0 1 4-10z"
/>
</svg>
</div>
<h3>Full Web & Browser Control</h3>
</div>
<p>
Web search, browser automation, vision, image generation,
text-to-speech, and multi-model reasoning.
</p>
</div>
</div>
<div class="features-more">
<button class="more-toggle" onclick="toggleSpecs()" id="specs-toggle">
<span class="toggle-label">More details</span>
<svg
class="more-chevron"
width="16"
height="16"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
stroke-width="2"
stroke-linecap="round"
stroke-linejoin="round"
>
<polyline points="6 9 12 15 18 9" />
</svg>
</button>
</div>
<div class="specs-wrapper" id="specs-wrapper">
<div class="specs-list">
<div class="spec-row">
<h3 class="spec-label">Tools</h3>
<p class="spec-value">
40+ built-in — web search, terminal, file system, browser
automation, vision, image generation, text-to-speech, code
execution, subagent delegation, memory, task planning, cron
scheduling, multi-model reasoning, and more.
</p>
</div>
<div class="spec-row">
<h3 class="spec-label">Platforms</h3>
<p class="spec-value">
Telegram, Discord, Slack, WhatsApp, Signal, Email, and CLI — all
from a single gateway. Connect to
<a
href="https://portal.nousresearch.com"
target="_blank"
rel="noopener"
>Nous Portal</a
>, OpenRouter, or any OpenAI-compatible API.
</p>
</div>
<div class="spec-row">
<h3 class="spec-label">Environments</h3>
<p class="spec-value">
Run locally, in Docker, over SSH, on Modal, Daytona, or
Singularity. Container hardening with read-only root, dropped
capabilities, and namespace isolation.
</p>
</div>
<div class="spec-row">
<h3 class="spec-label">Skills</h3>
<p class="spec-value">
40+ bundled skills covering MLOps, GitHub workflows, research,
and more. The agent creates new skills on the fly and shares
them via the open
<a href="https://agentskills.io" target="_blank" rel="noopener"
>agentskills.io</a
>
format. Install community skills from
<a href="https://clawhub.ai" target="_blank" rel="noopener"
>ClawHub</a
>,
<a href="https://lobehub.com" target="_blank" rel="noopener"
>LobeHub</a
>, and GitHub.
</p>
</div>
<div class="spec-row">
<h3 class="spec-label">Research</h3>
<p class="spec-value">
Batch trajectory generation with parallel workers and
checkpointing. Atropos integration for RL training. Export to
ShareGPT for fine-tuning with trajectory compression.
</p>
</div>
</div>
</div>
</div>
</section>
<footer class="footer">
<div class="container">
<p class="footer-copy">
Built by
<a href="https://nousresearch.com" target="_blank" rel="noopener"
>Nous Research</a
>
&middot; MIT License &middot; 2026
</p>
</div>
</footer>
<script src="script.js"></script>
</body>
</html>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 20 KiB

View file

@ -1,521 +0,0 @@
// =========================================================================
// Hermes Agent Landing Page — Interactions
// =========================================================================
// --- Platform install commands ---
const PLATFORMS = {
linux: {
command:
"curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash",
prompt: "$",
note: "Works on Linux, macOS & WSL2 · No prerequisites · Installs everything automatically",
stepNote:
"Installs uv, Python 3.11, clones the repo, sets up everything. No sudo needed.",
},
};
function detectPlatform() {
return "linux";
}
function switchPlatform(platform) {
const cfg = PLATFORMS[platform];
if (!cfg) return;
// Update hero install widget
const commandEl = document.getElementById("install-command");
const promptEl = document.getElementById("install-prompt");
const noteEl = document.getElementById("install-note");
if (commandEl) commandEl.textContent = cfg.command;
if (promptEl) promptEl.textContent = cfg.prompt;
if (noteEl) noteEl.textContent = cfg.note;
// Update active tab in hero
document.querySelectorAll(".install-tab").forEach((tab) => {
tab.classList.toggle("active", tab.dataset.platform === platform);
});
// Sync the step section tabs too
switchStepPlatform(platform);
}
function switchStepPlatform(platform) {
const cfg = PLATFORMS[platform];
if (!cfg) return;
const commandEl = document.getElementById("step1-command");
const copyBtn = document.getElementById("step1-copy");
const noteEl = document.getElementById("step1-note");
if (commandEl) commandEl.textContent = cfg.command;
if (copyBtn) copyBtn.setAttribute("data-text", cfg.command);
if (noteEl) noteEl.textContent = cfg.stepNote;
// Update active tab in step section
document.querySelectorAll(".code-tab").forEach((tab) => {
tab.classList.toggle("active", tab.dataset.platform === platform);
});
}
function toggleMobileNav() {
document.getElementById("nav-mobile").classList.toggle("open");
document.getElementById("nav-hamburger").classList.toggle("open");
}
function toggleSpecs() {
const wrapper = document.getElementById("specs-wrapper");
const btn = document.getElementById("specs-toggle");
const label = btn.querySelector(".toggle-label");
const isOpen = wrapper.classList.contains("open");
if (isOpen) {
wrapper.style.maxHeight = wrapper.scrollHeight + "px";
requestAnimationFrame(() => {
wrapper.style.maxHeight = "0";
});
wrapper.classList.remove("open");
btn.classList.remove("open");
if (label) label.textContent = "More details";
} else {
wrapper.classList.add("open");
wrapper.style.maxHeight = wrapper.scrollHeight + "px";
btn.classList.add("open");
if (label) label.textContent = "Less";
wrapper.addEventListener(
"transitionend",
() => {
if (wrapper.classList.contains("open")) {
wrapper.style.maxHeight = "none";
}
},
{ once: true }
);
}
}
// --- Copy to clipboard ---
function copyInstall() {
const text = document.getElementById("install-command").textContent;
navigator.clipboard.writeText(text).then(() => {
const btn = document.querySelector(".install-widget-body .copy-btn");
const original = btn.querySelector(".copy-text").textContent;
btn.querySelector(".copy-text").textContent = "Copied!";
btn.style.color = "var(--primary-light)";
setTimeout(() => {
btn.querySelector(".copy-text").textContent = original;
btn.style.color = "";
}, 2000);
});
}
function copyText(btn) {
const text = btn.getAttribute("data-text");
navigator.clipboard.writeText(text).then(() => {
const original = btn.textContent;
btn.textContent = "Copied!";
btn.style.color = "var(--primary-light)";
setTimeout(() => {
btn.textContent = original;
btn.style.color = "";
}, 2000);
});
}
// --- Scroll-triggered fade-in ---
function initScrollAnimations() {
const elements = document.querySelectorAll(
".feature-card, .install-step, " +
".section-header, .terminal-window",
);
elements.forEach((el) => el.classList.add("fade-in"));
const observer = new IntersectionObserver(
(entries) => {
entries.forEach((entry) => {
if (entry.isIntersecting) {
// Stagger children within grids
const parent = entry.target.parentElement;
if (parent) {
const siblings = parent.querySelectorAll(".fade-in");
let idx = Array.from(siblings).indexOf(entry.target);
if (idx < 0) idx = 0;
setTimeout(() => {
entry.target.classList.add("visible");
}, idx * 60);
} else {
entry.target.classList.add("visible");
}
observer.unobserve(entry.target);
}
});
},
{ threshold: 0.1, rootMargin: "0px 0px -40px 0px" },
);
elements.forEach((el) => observer.observe(el));
}
// --- Terminal Demo ---
const CURSOR = '<span class="terminal-cursor">█</span>';
const demoSequence = [
{ type: "prompt", text: " " },
{
type: "type",
text: "Research the latest approaches to GRPO training and write a summary",
delay: 30,
},
{ type: "pause", ms: 600 },
{
type: "output",
lines: [
"",
'<span class="t-dim"> web_search "GRPO reinforcement learning 2026" 1.2s</span>',
],
},
{ type: "pause", ms: 400 },
{
type: "output",
lines: [
'<span class="t-dim"> web_extract arxiv.org/abs/2402.03300 3.1s</span>',
],
},
{ type: "pause", ms: 400 },
{
type: "output",
lines: [
'<span class="t-dim"> web_search "GRPO vs PPO ablation results" 0.9s</span>',
],
},
{ type: "pause", ms: 400 },
{
type: "output",
lines: [
'<span class="t-dim"> web_extract huggingface.co/blog/grpo 2.8s</span>',
],
},
{ type: "pause", ms: 400 },
{
type: "output",
lines: [
'<span class="t-dim"> write_file ~/research/grpo-summary.md 0.1s</span>',
],
},
{ type: "pause", ms: 500 },
{
type: "output",
lines: [
"",
'<span class="t-text">Done! I\'ve written a summary covering:</span>',
"",
'<span class="t-text"> <span class="t-green">✓</span> GRPO\'s group-relative advantage (no critic model needed)</span>',
'<span class="t-text"> <span class="t-green">✓</span> Comparison with PPO/DPO on reasoning benchmarks</span>',
'<span class="t-text"> <span class="t-green">✓</span> Implementation notes for Axolotl and TRL</span>',
"",
'<span class="t-text">Saved to</span> <span class="t-accent">~/research/grpo-summary.md</span>',
],
},
{ type: "pause", ms: 2500 },
{ type: "clear" },
{ type: "prompt", text: " " },
{
type: "type",
text: "Review the PR at NousResearch/hermes-agent#42 and fix any issues",
delay: 30,
},
{ type: "pause", ms: 600 },
{
type: "output",
lines: [
"",
'<span class="t-dim"> delegate_task "review PR #42 changes" 2.1s</span>',
],
},
{ type: "pause", ms: 500 },
{
type: "output",
lines: [
'<span class="t-dim"> git diff main..pr-42 0.4s</span>',
],
},
{ type: "pause", ms: 400 },
{
type: "output",
lines: [
'<span class="t-dim"> patch tools/registry.py 0.1s</span>',
],
},
{ type: "pause", ms: 400 },
{
type: "output",
lines: [
'<span class="t-dim"> python -m pytest tests/ -x 3.2s</span>',
],
},
{ type: "pause", ms: 400 },
{
type: "output",
lines: [
'<span class="t-dim"> git commit -m "fix: handle empty tool schemas" 0.3s</span>',
],
},
{ type: "pause", ms: 500 },
{
type: "output",
lines: [
"",
'<span class="t-text">Found 2 issues in the PR and fixed both:</span>',
"",
'<span class="t-text"> <span class="t-green">✓</span> Empty tool schema crash in registry.py — added guard</span>',
'<span class="t-text"> <span class="t-green">✓</span> Missing error handling in delegate_tool.py — added try/except</span>',
"",
'<span class="t-text">Tests pass. Committed the fix and pushed to the PR branch.</span>',
'<span class="t-text">I also saved a</span> <span class="t-accent">skill</span> <span class="t-text">for this PR review pattern.</span>',
],
},
{ type: "pause", ms: 2500 },
{ type: "clear" },
{ type: "prompt", text: " " },
{
type: "type",
text: "How did we fix that Docker networking issue?",
delay: 35,
},
{ type: "pause", ms: 500 },
{
type: "output",
lines: [
"",
'<span class="t-dim"> session_search "Docker networking" 1.4s</span>',
],
},
{ type: "pause", ms: 500 },
{
type: "output",
lines: [
"",
'<span class="t-text">Found it — from a session on February 12th:</span>',
"",
'<span class="t-text">The containers couldn\'t reach each other because the compose</span>',
'<span class="t-text">file was using the default bridge network. We switched to a</span>',
'<span class="t-text">custom network with</span> <span class="t-accent">driver: overlay</span><span class="t-text">, added explicit</span>',
'<span class="t-text">aliases, and set</span> <span class="t-accent">dns: 8.8.8.8</span> <span class="t-text">as a fallback.</span>',
"",
'<span class="t-text">The fix was committed in</span> <span class="t-accent">docker-compose.prod.yml</span><span class="t-text">.</span>',
],
},
{ type: "pause", ms: 3000 },
];
class TerminalDemo {
constructor(container) {
this.container = container;
this.running = false;
this.content = "";
}
async start() {
if (this.running) return;
this.running = true;
while (this.running) {
for (const step of demoSequence) {
if (!this.running) return;
await this.execute(step);
}
this.clear();
await this.sleep(1000);
}
}
stop() {
this.running = false;
}
async execute(step) {
switch (step.type) {
case "prompt":
this.append(`<span class="t-prompt">${step.text}</span>`);
break;
case "type":
for (const char of step.text) {
if (!this.running) return;
this.append(`<span class="t-cmd">${char}</span>`);
await this.sleep(step.delay || 30);
}
break;
case "output":
for (const line of step.lines) {
if (!this.running) return;
this.append("\n" + line);
await this.sleep(50);
}
break;
case "pause":
await this.sleep(step.ms);
break;
case "clear":
this.clear();
break;
}
}
append(html) {
this.content += html;
this.render();
}
render() {
this.container.innerHTML = this.content + CURSOR;
this.container.scrollTop = this.container.scrollHeight;
}
clear() {
this.content = "";
this.container.innerHTML = "";
}
sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
}
// --- Noise Overlay (ported from hermes-chat NoiseOverlay) ---
function initNoiseOverlay() {
if (window.matchMedia("(prefers-reduced-motion: reduce)").matches) return;
if (typeof THREE === "undefined") return;
const canvas = document.getElementById("noise-overlay");
if (!canvas) return;
const vertexShader = `
varying vec2 vUv;
void main() {
vUv = uv;
gl_Position = projectionMatrix * modelViewMatrix * vec4(position, 1.0);
}
`;
const fragmentShader = `
uniform vec2 uRes;
uniform float uDpr, uSize, uDensity, uOpacity;
uniform vec3 uColor;
varying vec2 vUv;
float hash(vec2 p) {
vec3 p3 = fract(vec3(p.xyx) * 0.1031);
p3 += dot(p3, p3.yzx + 33.33);
return fract((p3.x + p3.y) * p3.z);
}
void main() {
float n = hash(floor(vUv * uRes / (uSize * uDpr)));
gl_FragColor = vec4(uColor, step(1.0 - uDensity, n)) * uOpacity;
}
`;
function hexToVec3(hex) {
const c = hex.replace("#", "");
return new THREE.Vector3(
parseInt(c.substring(0, 2), 16) / 255,
parseInt(c.substring(2, 4), 16) / 255,
parseInt(c.substring(4, 6), 16) / 255,
);
}
const renderer = new THREE.WebGLRenderer({
alpha: true,
canvas,
premultipliedAlpha: false,
});
renderer.setClearColor(0x000000, 0);
const scene = new THREE.Scene();
const camera = new THREE.OrthographicCamera(-1, 1, 1, -1, 0, 1);
const geo = new THREE.PlaneGeometry(2, 2);
const mat = new THREE.ShaderMaterial({
vertexShader,
fragmentShader,
transparent: true,
uniforms: {
uColor: { value: hexToVec3("#8090BB") },
uDensity: { value: 0.1 },
uDpr: { value: 1 },
uOpacity: { value: 0.4 },
uRes: { value: new THREE.Vector2() },
uSize: { value: 1.0 },
},
});
scene.add(new THREE.Mesh(geo, mat));
function resize() {
const dpr = window.devicePixelRatio;
const w = window.innerWidth;
const h = window.innerHeight;
renderer.setSize(w, h);
renderer.setPixelRatio(dpr);
mat.uniforms.uRes.value.set(w * dpr, h * dpr);
mat.uniforms.uDpr.value = dpr;
}
resize();
window.addEventListener("resize", resize);
function loop() {
requestAnimationFrame(loop);
renderer.render(scene, camera);
}
loop();
}
// --- Initialize ---
document.addEventListener("DOMContentLoaded", () => {
const detectedPlatform = detectPlatform();
switchPlatform(detectedPlatform);
initScrollAnimations();
initNoiseOverlay();
const terminalEl = document.getElementById("terminal-demo");
if (terminalEl) {
const demo = new TerminalDemo(terminalEl);
const observer = new IntersectionObserver(
(entries) => {
entries.forEach((entry) => {
if (entry.isIntersecting) {
demo.start();
} else {
demo.stop();
}
});
},
{ threshold: 0.3 },
);
observer.observe(document.querySelector(".terminal-window"));
}
const nav = document.querySelector(".nav");
let ticking = false;
window.addEventListener("scroll", () => {
if (!ticking) {
requestAnimationFrame(() => {
if (window.scrollY > 50) {
nav.style.borderBottomColor = "rgba(48, 80, 255, 0.15)";
} else {
nav.style.borderBottomColor = "";
}
ticking = false;
});
ticking = true;
}
});
});

File diff suppressed because it is too large Load diff

View file

@ -540,13 +540,6 @@ class AIAgent:
for AI models that support function calling.
"""
# ── Class-level context pressure dedup (survives across instances) ──
# The gateway creates a new AIAgent per message, so instance-level flags
# reset every time. This dict tracks {session_id: (warn_level, timestamp)}
# to suppress duplicate warnings within a cooldown window.
_context_pressure_last_warned: dict = {}
_CONTEXT_PRESSURE_COOLDOWN = 300 # seconds between re-warning same session
@property
def base_url(self) -> str:
return self._base_url
@ -826,12 +819,6 @@ class AIAgent:
self._budget_exhausted_injected = False
self._budget_grace_call = False
# Context pressure warnings: notify the USER (not the LLM) as context
# fills up. Purely informational — displayed in CLI output and sent via
# status_callback for gateway platforms. Does NOT inject into messages.
# Tiered: fires at 85% and again at 95% of compaction threshold.
self._context_pressure_warned_at = 0.0 # highest tier already shown
# Activity tracking — updated on each API call, tool execution, and
# stream chunk. Used by the gateway timeout handler to report what the
# agent was doing when it was killed, and by the "still working"
@ -4353,6 +4340,15 @@ class AIAgent:
def _create_openai_client(self, client_kwargs: dict, *, reason: str, shared: bool) -> Any:
from agent.auxiliary_client import _validate_base_url, _validate_proxy_env_urls
# Treat client_kwargs as read-only. Callers pass self._client_kwargs (or shallow
# copies of it) in; any in-place mutation leaks back into the stored dict and is
# reused on subsequent requests. #10933 hit this by injecting an httpx.Client
# transport that was torn down after the first request, so the next request
# wrapped a closed transport and raised "Cannot send a request, as the client
# has been closed" on every retry. The revert resolved that specific path; this
# copy locks the contract so future transport/keepalive work can't reintroduce
# the same class of bug.
client_kwargs = dict(client_kwargs)
_validate_proxy_env_urls()
_validate_base_url(client_kwargs.get("base_url"))
if self.provider == "copilot-acp" or str(client_kwargs.get("base_url", "")).startswith("acp://copilot"):
@ -7221,20 +7217,6 @@ class AIAgent:
self.context_compressor.last_prompt_tokens = _compressed_est
self.context_compressor.last_completion_tokens = 0
# Only reset the pressure warning if compression actually brought
# us below the warning level (85% of threshold). When compression
# can't reduce enough (e.g. threshold is very low, or system prompt
# alone exceeds the warning level), keep the tier set to prevent
# spamming the user with repeated warnings every loop iteration.
if self.context_compressor.threshold_tokens > 0:
_post_progress = _compressed_est / self.context_compressor.threshold_tokens
if _post_progress < 0.85:
self._context_pressure_warned_at = 0.0
# Clear class-level dedup for this session so a fresh
# warning cycle can start if context grows again.
_sid = self.session_id or "default"
AIAgent._context_pressure_last_warned.pop(_sid, None)
# Clear the file-read dedup cache. After compression the original
# read content is summarised away — if the model re-reads the same
# file it needs the full content, not a "file unchanged" stub.
@ -8034,45 +8016,6 @@ class AIAgent:
def _emit_context_pressure(self, compaction_progress: float, compressor) -> None:
"""Notify the user that context is approaching the compaction threshold.
Args:
compaction_progress: How close to compaction (0.01.0, where 1.0 = fires).
compressor: The ContextCompressor instance (for threshold/context info).
Purely user-facing does NOT modify the message stream.
For CLI: prints a formatted line with a progress bar.
For gateway: fires status_callback so the platform can send a chat message.
"""
from agent.display import format_context_pressure, format_context_pressure_gateway
threshold_pct = compressor.threshold_tokens / compressor.context_length if compressor.context_length else 0.5
# CLI output — always shown (these are user-facing status notifications,
# not verbose debug output, so they bypass quiet_mode).
# Gateway users also get the callback below.
if self.platform in (None, "cli"):
line = format_context_pressure(
compaction_progress=compaction_progress,
threshold_tokens=compressor.threshold_tokens,
threshold_percent=threshold_pct,
compression_enabled=self.compression_enabled,
)
self._safe_print(line)
# Gateway / external consumers
if self.status_callback:
try:
msg = format_context_pressure_gateway(
compaction_progress=compaction_progress,
threshold_percent=threshold_pct,
compression_enabled=self.compression_enabled,
)
self.status_callback("context_pressure", msg)
except Exception:
logger.debug("status_callback error in context pressure", exc_info=True)
def _handle_max_iterations(self, messages: list, api_call_count: int) -> str:
"""Request a summary when max iterations are reached. Returns the final response text."""
print(f"⚠️ Reached maximum iterations ({self.max_iterations}). Requesting summary...")
@ -10800,38 +10743,6 @@ class AIAgent:
else:
_real_tokens = estimate_messages_tokens_rough(messages)
# ── Context pressure warnings (user-facing only) ──────────
# Notify the user (NOT the LLM) as context approaches the
# compaction threshold. Thresholds are relative to where
# compaction fires, not the raw context window.
# Does not inject into messages — just prints to CLI output
# and fires status_callback for gateway platforms.
# Tiered: 85% (orange) and 95% (red/critical).
if _compressor.threshold_tokens > 0:
_compaction_progress = _real_tokens / _compressor.threshold_tokens
# Determine the warning tier for this progress level
_warn_tier = 0.0
if _compaction_progress >= 0.95:
_warn_tier = 0.95
elif _compaction_progress >= 0.85:
_warn_tier = 0.85
if _warn_tier > self._context_pressure_warned_at:
# Class-level dedup: check if this session was already
# warned at this tier within the cooldown window.
_sid = self.session_id or "default"
_last = AIAgent._context_pressure_last_warned.get(_sid)
_now = time.time()
if _last is None or _last[0] < _warn_tier or (_now - _last[1]) >= self._CONTEXT_PRESSURE_COOLDOWN:
self._context_pressure_warned_at = _warn_tier
AIAgent._context_pressure_last_warned[_sid] = (_warn_tier, _now)
self._emit_context_pressure(_compaction_progress, _compressor)
# Evict stale entries (older than 2x cooldown)
_cutoff = _now - self._CONTEXT_PRESSURE_COOLDOWN * 2
AIAgent._context_pressure_last_warned = {
k: v for k, v in AIAgent._context_pressure_last_warned.items()
if v[1] > _cutoff
}
if self.compression_enabled and _compressor.should_compress(_real_tokens):
self._safe_print(" ⟳ compacting context…")
messages, active_system_prompt = self._compress_context(

View file

@ -180,6 +180,18 @@ AUTHOR_MAP = {
"lisicheng168@gmail.com": "lesterli",
"mingjwan@microsoft.com": "MagicRay1217",
"orangeko@gmail.com": "GenKoKo",
"82095453+iacker@users.noreply.github.com": "iacker",
"sontianye@users.noreply.github.com": "sontianye",
"jackjin1997@users.noreply.github.com": "jackjin1997",
"danieldoderlein@users.noreply.github.com": "danieldoderlein",
"lrawnsley@users.noreply.github.com": "lrawnsley",
"taeuk178@users.noreply.github.com": "taeuk178",
"ogzerber@users.noreply.github.com": "ogzerber",
"cola-runner@users.noreply.github.com": "cola-runner",
"ygd58@users.noreply.github.com": "ygd58",
"vominh1919@users.noreply.github.com": "vominh1919",
"LeonSGP43@users.noreply.github.com": "LeonSGP43",
"Lubrsy706@users.noreply.github.com": "Lubrsy706",
"niyant@spicefi.xyz": "spniyant",
"olafthiele@gmail.com": "olafthiele",
"oncuevtv@gmail.com": "sprmn24",

View file

@ -0,0 +1,40 @@
"""Test that call_llm vision path passes resolved provider args, not raw ones."""
from unittest.mock import patch, MagicMock
def test_vision_call_uses_resolved_provider_args():
"""Resolved provider/model/key/url from config must reach resolve_vision_provider_client."""
from agent.auxiliary_client import call_llm
fake_client = MagicMock()
fake_client.chat.completions.create.return_value = MagicMock(
choices=[MagicMock(message=MagicMock(content="description"))],
usage=MagicMock(prompt_tokens=10, completion_tokens=5),
)
with (
patch(
"agent.auxiliary_client._resolve_task_provider_model",
return_value=("my-resolved-provider", "my-resolved-model", "http://resolved", "resolved-key", "chat_completions"),
),
patch(
"agent.auxiliary_client.resolve_vision_provider_client",
return_value=("my-resolved-provider", fake_client, "my-resolved-model"),
) as mock_vision,
):
call_llm(
"vision",
provider="raw-provider",
model="raw-model",
base_url="http://raw",
api_key="raw-key",
messages=[{"role": "user", "content": "describe this"}],
)
# The resolved values must be passed, not the raw call_llm arguments
call_args = mock_vision.call_args
assert call_args.kwargs["provider"] == "my-resolved-provider"
assert call_args.kwargs["model"] == "my-resolved-model"
assert call_args.kwargs["base_url"] == "http://resolved"
assert call_args.kwargs["api_key"] == "resolved-key"

View file

@ -578,7 +578,7 @@ def test_model_flow_custom_saves_verified_v1_base_url(monkeypatch, capsys):
# After the probe detects a single model ("llm"), the flow asks
# "Use this model? [Y/n]:" — confirm with Enter, then context length,
# then display name.
answers = iter(["http://localhost:8000", "local-key", "", "", ""])
answers = iter(["http://localhost:8000", "local-key", "", "", "", ""])
monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers))
monkeypatch.setattr("getpass.getpass", lambda _prompt="": next(answers))

View file

@ -0,0 +1,107 @@
"""Tests that load_cli_config() guards against lazy-import TERMINAL_CWD clobbering.
When the gateway resolves TERMINAL_CWD at startup and cli.py is later
imported lazily (via delegate_tool CLI_CONFIG), load_cli_config() must
not overwrite the already-resolved value with os.getcwd().
config.yaml terminal.cwd is the canonical source of truth.
.env TERMINAL_CWD and MESSAGING_CWD are deprecated.
See issue #10817.
"""
import os
import pytest
# The sentinel values that mean "resolve at runtime"
_CWD_PLACEHOLDERS = (".", "auto", "cwd")
def _resolve_terminal_cwd(terminal_config: dict, defaults: dict, env: dict):
"""Simulate the CWD resolution logic from load_cli_config().
This mirrors the code in cli.py that checks for a pre-resolved
TERMINAL_CWD before falling back to os.getcwd().
"""
if terminal_config.get("cwd") in _CWD_PLACEHOLDERS:
_existing_cwd = env.get("TERMINAL_CWD", "")
if _existing_cwd and _existing_cwd not in _CWD_PLACEHOLDERS and os.path.isabs(_existing_cwd):
terminal_config["cwd"] = _existing_cwd
defaults["terminal"]["cwd"] = _existing_cwd
else:
effective_backend = terminal_config.get("env_type", "local")
if effective_backend == "local":
terminal_config["cwd"] = "/fake/getcwd" # stand-in for os.getcwd()
defaults["terminal"]["cwd"] = terminal_config["cwd"]
else:
terminal_config.pop("cwd", None)
# Simulate the bridging loop: write terminal_config["cwd"] to env
_file_has_terminal = defaults.get("_file_has_terminal", False)
if "cwd" in terminal_config:
if _file_has_terminal or "TERMINAL_CWD" not in env:
env["TERMINAL_CWD"] = str(terminal_config["cwd"])
return env.get("TERMINAL_CWD", "")
class TestLazyImportGuard:
"""TERMINAL_CWD resolved by gateway must survive a lazy cli.py import."""
def test_gateway_resolved_cwd_survives(self):
"""Gateway set TERMINAL_CWD → lazy cli import must not clobber."""
env = {"TERMINAL_CWD": "/home/user/workspace"}
terminal_config = {"cwd": ".", "env_type": "local"}
defaults = {"terminal": {"cwd": "."}, "_file_has_terminal": False}
result = _resolve_terminal_cwd(terminal_config, defaults, env)
assert result == "/home/user/workspace"
def test_gateway_resolved_cwd_survives_with_file_terminal(self):
"""Even when config.yaml has a terminal: section, resolved CWD survives."""
env = {"TERMINAL_CWD": "/home/user/workspace"}
terminal_config = {"cwd": ".", "env_type": "local"}
defaults = {"terminal": {"cwd": "."}, "_file_has_terminal": True}
result = _resolve_terminal_cwd(terminal_config, defaults, env)
assert result == "/home/user/workspace"
class TestConfigCwdResolution:
"""config.yaml terminal.cwd is the canonical source of truth."""
def test_explicit_config_cwd_wins(self):
"""terminal.cwd: /explicit/path always wins."""
env = {"TERMINAL_CWD": "/old/gateway/value"}
terminal_config = {"cwd": "/explicit/path"}
defaults = {"terminal": {"cwd": "/explicit/path"}, "_file_has_terminal": True}
result = _resolve_terminal_cwd(terminal_config, defaults, env)
assert result == "/explicit/path"
def test_dot_cwd_resolves_to_getcwd_when_no_prior(self):
"""With no pre-set TERMINAL_CWD, "." resolves to os.getcwd()."""
env = {}
terminal_config = {"cwd": "."}
defaults = {"terminal": {"cwd": "."}, "_file_has_terminal": False}
result = _resolve_terminal_cwd(terminal_config, defaults, env)
assert result == "/fake/getcwd"
def test_remote_backend_pops_cwd(self):
"""Remote backend + placeholder cwd → popped for backend default."""
env = {}
terminal_config = {"cwd": ".", "env_type": "docker"}
defaults = {"terminal": {"cwd": "."}, "_file_has_terminal": False}
result = _resolve_terminal_cwd(terminal_config, defaults, env)
assert result == "" # cwd popped, no env var set
def test_remote_backend_with_prior_cwd_preserves(self):
"""Remote backend + pre-resolved TERMINAL_CWD → adopted."""
env = {"TERMINAL_CWD": "/project"}
terminal_config = {"cwd": ".", "env_type": "docker"}
defaults = {"terminal": {"cwd": "."}, "_file_has_terminal": False}
result = _resolve_terminal_cwd(terminal_config, defaults, env)
assert result == "/project"

View file

@ -138,7 +138,7 @@ class TestRunConversationSurrogateSanitization:
mock_stream.return_value = mock_response
mock_api.return_value = mock_response
agent = AIAgent(model="test/model", quiet_mode=True, skip_memory=True, skip_context_files=True)
agent = AIAgent(model="test/model", api_key="test-key", base_url="http://localhost:1234/v1", quiet_mode=True, skip_memory=True, skip_context_files=True)
agent.client = MagicMock()
# Pass a message with surrogates

View file

@ -675,7 +675,7 @@ class TestRunJobSessionPersistence:
def test_run_job_empty_response_returns_empty_not_placeholder(self, tmp_path):
"""Empty final_response should stay empty for delivery logic (issue #2234).
The placeholder '(No response generated)' should only appear in the
output log, not in the returned final_response that's used for delivery.
"""
@ -693,7 +693,7 @@ class TestRunJobSessionPersistence:
patch(
"hermes_cli.runtime_provider.resolve_runtime_provider",
return_value={
"api_key": "test-key",
"api_key": "***",
"base_url": "https://example.invalid/v1",
"provider": "openrouter",
"api_mode": "chat_completions",
@ -714,6 +714,43 @@ class TestRunJobSessionPersistence:
# But the output log should show the placeholder
assert "(No response generated)" in output
def test_tick_marks_empty_response_as_error(self, tmp_path):
"""When run_job returns success=True but final_response is empty,
tick() should mark the job as error so last_status != 'ok'.
(issue #8585)
"""
from cron.scheduler import tick
from cron.jobs import load_jobs, save_jobs
job = {
"id": "empty-job",
"name": "empty-test",
"prompt": "do something",
"schedule": "every 1h",
"enabled": True,
"next_run_at": "2020-01-01T00:00:00",
"deliver": "local",
"last_status": None,
}
fake_db = MagicMock()
with patch("cron.scheduler._hermes_home", tmp_path), \
patch("cron.scheduler.get_due_jobs", return_value=[job]), \
patch("cron.scheduler.advance_next_run"), \
patch("cron.scheduler.mark_job_run") as mock_mark, \
patch("cron.scheduler.save_job_output", return_value="/tmp/out.md"), \
patch("cron.scheduler._resolve_origin", return_value=None), \
patch("cron.scheduler.run_job", return_value=(True, "output", "", None)):
tick(verbose=False)
# Should be called with success=False because final_response is empty
mock_mark.assert_called_once()
call_args = mock_mark.call_args
assert call_args[0][0] == "empty-job"
assert call_args[0][1] is False # success should be False
assert "empty" in call_args[0][2].lower() # error should mention empty
def test_run_job_sets_auto_delivery_env_from_dotenv_home_channel(self, tmp_path, monkeypatch):
job = {
"id": "test-job",

View file

@ -62,5 +62,86 @@ def _ensure_telegram_mock() -> None:
sys.modules["telegram.error"] = mod.error
def _ensure_discord_mock() -> None:
"""Install a comprehensive discord mock in sys.modules.
Idempotent skips when the real library is already imported.
Uses ``sys.modules[name] = mod`` (overwrite) instead of
``setdefault`` so it wins even if a partial/broken import already
cached the module.
This mock is comprehensive it includes **all** attributes needed by
every gateway discord test file. Individual test files should call
this function (it short-circuits when already present) rather than
maintaining their own mock setup.
"""
if "discord" in sys.modules and hasattr(sys.modules["discord"], "__file__"):
return # Real library is installed — nothing to mock
from types import SimpleNamespace
discord_mod = MagicMock()
discord_mod.Intents.default.return_value = MagicMock()
discord_mod.Client = MagicMock
discord_mod.File = MagicMock
discord_mod.DMChannel = type("DMChannel", (), {})
discord_mod.Thread = type("Thread", (), {})
discord_mod.ForumChannel = type("ForumChannel", (), {})
discord_mod.Interaction = object
discord_mod.Embed = MagicMock
discord_mod.ui = SimpleNamespace(
View=object,
button=lambda *a, **k: (lambda fn: fn),
Button=object,
)
discord_mod.ButtonStyle = SimpleNamespace(
success=1, primary=2, secondary=2, danger=3,
green=1, grey=2, blurple=2, red=3,
)
discord_mod.Color = SimpleNamespace(
orange=lambda: 1, green=lambda: 2, blue=lambda: 3,
red=lambda: 4, purple=lambda: 5,
)
# app_commands — needed by _register_slash_commands auto-registration
class _FakeGroup:
def __init__(self, *, name, description, parent=None):
self.name = name
self.description = description
self.parent = parent
self._children: dict = {}
if parent is not None:
parent.add_command(self)
def add_command(self, cmd):
self._children[cmd.name] = cmd
class _FakeCommand:
def __init__(self, *, name, description, callback, parent=None):
self.name = name
self.description = description
self.callback = callback
self.parent = parent
discord_mod.app_commands = SimpleNamespace(
describe=lambda **kwargs: (lambda fn: fn),
choices=lambda **kwargs: (lambda fn: fn),
Choice=lambda **kwargs: SimpleNamespace(**kwargs),
Group=_FakeGroup,
Command=_FakeCommand,
)
ext_mod = MagicMock()
commands_mod = MagicMock()
commands_mod.Bot = MagicMock
ext_mod.commands = commands_mod
for name in ("discord", "discord.ext", "discord.ext.commands"):
sys.modules[name] = discord_mod
sys.modules["discord.ext"] = ext_mod
sys.modules["discord.ext.commands"] = commands_mod
# Run at collection time — before any test file's module-level imports.
_ensure_telegram_mock()
_ensure_discord_mock()

View file

@ -220,6 +220,8 @@ class TestRunBackgroundTask:
with patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "test-key"}), \
patch("run_agent.AIAgent") as MockAgent:
mock_agent_instance = MagicMock()
mock_agent_instance.shutdown_memory_provider = MagicMock()
mock_agent_instance.close = MagicMock()
mock_agent_instance.run_conversation.return_value = mock_result
MockAgent.return_value = mock_agent_instance
@ -231,6 +233,37 @@ class TestRunBackgroundTask:
content = call_args[1].get("content", call_args[0][1] if len(call_args[0]) > 1 else "")
assert "Background task complete" in content
assert "Hello from background!" in content
mock_agent_instance.shutdown_memory_provider.assert_called_once()
mock_agent_instance.close.assert_called_once()
@pytest.mark.asyncio
async def test_agent_cleanup_runs_when_background_agent_raises(self):
"""Temporary background agents must be cleaned up on error paths too."""
runner = _make_runner()
mock_adapter = AsyncMock()
mock_adapter.send = AsyncMock()
runner.adapters[Platform.TELEGRAM] = mock_adapter
source = SessionSource(
platform=Platform.TELEGRAM,
user_id="12345",
chat_id="67890",
user_name="testuser",
)
with patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "test-key"}), \
patch("run_agent.AIAgent") as MockAgent:
mock_agent_instance = MagicMock()
mock_agent_instance.shutdown_memory_provider = MagicMock()
mock_agent_instance.close = MagicMock()
mock_agent_instance.run_conversation.side_effect = RuntimeError("boom")
MockAgent.return_value = mock_agent_instance
await runner._run_background_task("say hello", source, "bg_test")
mock_adapter.send.assert_called_once()
mock_agent_instance.shutdown_memory_provider.assert_called_once()
mock_agent_instance.close.assert_called_once()
@pytest.mark.asyncio
async def test_exception_sends_error_message(self):

View file

@ -62,6 +62,8 @@ async def test_compress_command_reports_noop_without_success_banner():
history = _make_history()
runner = _make_runner(history)
agent_instance = MagicMock()
agent_instance.shutdown_memory_provider = MagicMock()
agent_instance.close = MagicMock()
agent_instance.context_compressor.protect_first_n = 0
agent_instance.context_compressor._align_boundary_forward.return_value = 0
agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2
@ -83,6 +85,8 @@ async def test_compress_command_reports_noop_without_success_banner():
assert "No changes from compression" in result
assert "Compressed:" not in result
assert "Rough transcript estimate: ~100 tokens (unchanged)" in result
agent_instance.shutdown_memory_provider.assert_called_once()
agent_instance.close.assert_called_once()
@pytest.mark.asyncio
@ -95,6 +99,8 @@ async def test_compress_command_explains_when_token_estimate_rises():
]
runner = _make_runner(history)
agent_instance = MagicMock()
agent_instance.shutdown_memory_provider = MagicMock()
agent_instance.close = MagicMock()
agent_instance.context_compressor.protect_first_n = 0
agent_instance.context_compressor._align_boundary_forward.return_value = 0
agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2
@ -119,3 +125,5 @@ async def test_compress_command_explains_when_token_estimate_rises():
assert "Compressed: 4 → 3 messages" in result
assert "Rough transcript estimate: ~100 → ~120 tokens" in result
assert "denser summaries" in result
agent_instance.shutdown_memory_provider.assert_called_once()
agent_instance.close.assert_called_once()

View file

@ -37,6 +37,10 @@ def _simulate_config_bridge(cfg: dict, initial_env: dict | None = None):
for cfg_key, env_var in terminal_env_map.items():
if cfg_key in terminal_cfg:
val = terminal_cfg[cfg_key]
# Skip cwd placeholder values — don't overwrite already-resolved
# TERMINAL_CWD. Mirrors the fix in gateway/run.py.
if cfg_key == "cwd" and str(val) in (".", "auto", "cwd"):
continue
if isinstance(val, list):
env[env_var] = json.dumps(val)
else:
@ -146,3 +150,58 @@ class TestTopLevelCwdAlias:
cfg = {"cwd": "/from/config"}
result = _simulate_config_bridge(cfg, {"MESSAGING_CWD": "/from/env"})
assert result["TERMINAL_CWD"] == "/from/config"
class TestNestedTerminalCwdPlaceholderSkip:
"""terminal.cwd placeholder values must not clobber TERMINAL_CWD.
When config.yaml has terminal.cwd: "." (or "auto"/"cwd"), the gateway
config bridge should NOT write that placeholder to TERMINAL_CWD.
This prevents .env or MESSAGING_CWD values from being overwritten.
See issues #10225, #4672, #10817.
"""
def test_terminal_dot_cwd_does_not_clobber_env(self):
"""terminal.cwd: '.' should not overwrite a pre-set TERMINAL_CWD."""
cfg = {"terminal": {"cwd": "."}}
result = _simulate_config_bridge(cfg, {"TERMINAL_CWD": "/my/project"})
assert result["TERMINAL_CWD"] == "/my/project"
def test_terminal_auto_cwd_does_not_clobber_env(self):
cfg = {"terminal": {"cwd": "auto"}}
result = _simulate_config_bridge(cfg, {"TERMINAL_CWD": "/my/project"})
assert result["TERMINAL_CWD"] == "/my/project"
def test_terminal_cwd_keyword_does_not_clobber_env(self):
cfg = {"terminal": {"cwd": "cwd"}}
result = _simulate_config_bridge(cfg, {"TERMINAL_CWD": "/my/project"})
assert result["TERMINAL_CWD"] == "/my/project"
def test_terminal_explicit_cwd_does_override(self):
"""terminal.cwd: '/explicit/path' SHOULD override TERMINAL_CWD."""
cfg = {"terminal": {"cwd": "/explicit/path"}}
result = _simulate_config_bridge(cfg, {"TERMINAL_CWD": "/old/value"})
assert result["TERMINAL_CWD"] == "/explicit/path"
def test_terminal_dot_cwd_falls_back_to_messaging_cwd(self):
"""terminal.cwd: '.' with no TERMINAL_CWD should fall to MESSAGING_CWD."""
cfg = {"terminal": {"cwd": "."}}
result = _simulate_config_bridge(cfg, {"MESSAGING_CWD": "/from/env"})
assert result["TERMINAL_CWD"] == "/from/env"
def test_terminal_dot_cwd_and_messaging_cwd_both_set(self):
"""Pre-set TERMINAL_CWD from .env wins over terminal.cwd: '.'."""
cfg = {"terminal": {"cwd": ".", "backend": "local"}}
result = _simulate_config_bridge(cfg, {
"TERMINAL_CWD": "/my/project",
"MESSAGING_CWD": "/fallback",
})
assert result["TERMINAL_CWD"] == "/my/project"
def test_non_cwd_terminal_keys_still_bridge(self):
"""Other terminal config keys (backend, timeout) should still bridge normally."""
cfg = {"terminal": {"cwd": ".", "backend": "docker", "timeout": "300"}}
result = _simulate_config_bridge(cfg, {"MESSAGING_CWD": "/from/env"})
assert result["TERMINAL_ENV"] == "docker"
assert result["TERMINAL_TIMEOUT"] == "300"
assert result["TERMINAL_CWD"] == "/from/env"

View file

@ -284,9 +284,20 @@ class TestEnvVarOverride:
# Tests for reply_to_text extraction in _handle_message
# ------------------------------------------------------------------
class FakeDMChannel:
# Build FakeDMChannel as a subclass of the real discord.DMChannel when the
# library is installed — this guarantees isinstance() checks pass in
# production code regardless of test ordering or monkeypatch state.
try:
import discord as _discord_lib
_DMChannelBase = _discord_lib.DMChannel
except (ImportError, AttributeError):
_DMChannelBase = object
class FakeDMChannel(_DMChannelBase):
"""Minimal DM channel stub (skips mention / channel-allow checks)."""
def __init__(self, channel_id: int = 100, name: str = "dm"):
# Do NOT call super().__init__() — real DMChannel requires State
self.id = channel_id
self.name = name
@ -309,10 +320,6 @@ def _make_message(*, content: str = "hi", reference=None):
@pytest.fixture
def reply_text_adapter(monkeypatch):
"""DiscordAdapter wired for _handle_message → handle_message capture."""
import gateway.platforms.discord as discord_platform
monkeypatch.setattr(discord_platform.discord, "DMChannel", FakeDMChannel, raising=False)
config = PlatformConfig(enabled=True, token="fake-token")
adapter = DiscordAdapter(config)
adapter._client = SimpleNamespace(user=SimpleNamespace(id=999))

View file

@ -202,6 +202,22 @@ class TestFlushAgentSilenced:
sys.stdout = old_stdout
assert buf.getvalue() == "", "no-op print_fn spinner must not write to stdout"
def test_flush_agent_closes_resources_after_run(self, monkeypatch):
"""Memory flush should close temporary agent resources after the turn."""
runner, tmp_agent, _ = _make_flush_context(monkeypatch)
tmp_agent.shutdown_memory_provider = MagicMock()
tmp_agent.close = MagicMock()
with (
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: Path("/nonexistent"))}),
):
runner._flush_memories_for_session("session_cleanup")
tmp_agent.shutdown_memory_provider.assert_called_once()
tmp_agent.close.assert_called_once()
class TestFlushPromptStructure:
"""Verify the flush prompt retains its core instructions."""

View file

@ -0,0 +1,42 @@
"""Tests for the pending_event None guard in recursive _run_agent calls.
When pending_event is None (Path B: pending comes from interrupt_message),
accessing pending_event.channel_prompt previously raised AttributeError.
This verifies the fix: channel_prompt is captured inside the
`if pending_event is not None:` block and falls back to None otherwise.
"""
from types import SimpleNamespace
def _extract_channel_prompt(pending_event):
"""Reproduce the fixed logic from gateway/run.py.
Mirrors the variable-capture pattern used before the recursive
_run_agent call so we can test both paths without a full runner.
"""
next_channel_prompt = None
if pending_event is not None:
next_channel_prompt = getattr(pending_event, "channel_prompt", None)
return next_channel_prompt
class TestPendingEventNoneChannelPrompt:
"""Guard against AttributeError when pending_event is None."""
def test_none_pending_event_returns_none_channel_prompt(self):
"""Path B: pending_event is None — must not raise AttributeError."""
result = _extract_channel_prompt(None)
assert result is None
def test_pending_event_with_channel_prompt_passes_through(self):
"""Path A: pending_event present — channel_prompt is forwarded."""
event = SimpleNamespace(channel_prompt="You are a helpful bot.")
result = _extract_channel_prompt(event)
assert result == "You are a helpful bot."
def test_pending_event_without_channel_prompt_returns_none(self):
"""Path A: pending_event present but has no channel_prompt attribute."""
event = SimpleNamespace()
result = _extract_channel_prompt(event)
assert result is None

View file

@ -305,10 +305,15 @@ async def test_session_hygiene_messages_stay_in_originating_topic(monkeypatch, t
monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv)
class FakeCompressAgent:
last_instance = None
def __init__(self, **kwargs):
self.model = kwargs.get("model")
self.session_id = kwargs.get("session_id", "fake-session")
self._print_fn = None
self.shutdown_memory_provider = MagicMock()
self.close = MagicMock()
type(self).last_instance = self
def _compress_context(self, messages, *_args, **_kwargs):
# Simulate real _compress_context: create a new session_id
@ -385,3 +390,6 @@ async def test_session_hygiene_messages_stay_in_originating_topic(monkeypatch, t
# Compression warnings are no longer sent to users — compression
# happens silently with server-side logging only.
assert len(adapter.sent) == 0
assert FakeCompressAgent.last_instance is not None
FakeCompressAgent.last_instance.shutdown_memory_provider.assert_called_once()
FakeCompressAgent.last_instance.close.assert_called_once()

View file

@ -606,6 +606,56 @@ class TestSegmentBreakOnToolBoundary:
assert sent_texts[0].startswith(prefix)
assert sum(len(t) for t in sent_texts[1:]) == len(tail)
@pytest.mark.asyncio
async def test_fallback_final_sends_full_text_at_tool_boundary(self):
"""After a tool call, the streamed prefix is stale (from the pre-tool
segment). _send_fallback_final must still send the post-tool response
even when continuation_text calculates as empty (#10807)."""
adapter = MagicMock()
adapter.send = AsyncMock(
return_value=SimpleNamespace(success=True, message_id="msg_1"),
)
adapter.edit_message = AsyncMock(
return_value=SimpleNamespace(success=True),
)
adapter.MAX_MESSAGE_LENGTH = 4096
config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5)
consumer = GatewayStreamConsumer(adapter, "chat_123", config)
# Simulate a pre-tool streamed segment that becomes the visible prefix
pre_tool_text = "I'll run that code now."
consumer.on_delta(pre_tool_text)
task = asyncio.create_task(consumer.run())
await asyncio.sleep(0.05)
# After the tool call, the model returns a SHORT final response that
# does NOT start with the pre-tool prefix. The continuation calculator
# would return empty (no prefix match → full text returned, but if the
# streaming edit already showed pre_tool_text, the prefix-based logic
# wrongly matches). Simulate this by setting _last_sent_text to the
# pre-tool content, then finishing with different post-tool content.
consumer._last_sent_text = pre_tool_text
post_tool_response = "⏰ Script timed out after 30s and was killed."
consumer.finish()
await task
# The fallback should send the post-tool response via
# _send_fallback_final.
await consumer._send_fallback_final(post_tool_response)
# Verify the final text was sent (not silently dropped)
sent = False
for call in adapter.send.call_args_list:
content = call[1].get("content", call[0][0] if call[0] else "")
if "timed out" in str(content):
sent = True
break
assert sent, (
"Post-tool timeout response was silently dropped by "
"_send_fallback_final — the #10807 fix should prevent this"
)
class TestInterimCommentaryMessages:
@pytest.mark.asyncio

View file

@ -322,7 +322,7 @@ class TestFallbackTransportInit:
seen_kwargs.append(kwargs.copy())
return FakeTransport([], {})
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy"):
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy", "TELEGRAM_PROXY"):
monkeypatch.delenv(key, raising=False)
monkeypatch.setenv("HTTPS_PROXY", "http://proxy.example:8080")
monkeypatch.setattr(tnet.httpx, "AsyncHTTPTransport", factory)

View file

@ -0,0 +1,64 @@
"""Tests for warn_deprecated_cwd_env_vars() migration warning."""
import os
import pytest
class TestDeprecatedCwdWarning:
"""Warn when MESSAGING_CWD or TERMINAL_CWD is set in .env."""
def test_messaging_cwd_triggers_warning(self, monkeypatch, capsys):
monkeypatch.setenv("MESSAGING_CWD", "/some/path")
monkeypatch.delenv("TERMINAL_CWD", raising=False)
from hermes_cli.config import warn_deprecated_cwd_env_vars
warn_deprecated_cwd_env_vars(config={})
captured = capsys.readouterr()
assert "MESSAGING_CWD" in captured.err
assert "deprecated" in captured.err.lower()
assert "config.yaml" in captured.err
def test_terminal_cwd_triggers_warning_when_config_placeholder(self, monkeypatch, capsys):
monkeypatch.setenv("TERMINAL_CWD", "/project")
monkeypatch.delenv("MESSAGING_CWD", raising=False)
from hermes_cli.config import warn_deprecated_cwd_env_vars
# config has placeholder cwd → TERMINAL_CWD likely from .env
warn_deprecated_cwd_env_vars(config={"terminal": {"cwd": "."}})
captured = capsys.readouterr()
assert "TERMINAL_CWD" in captured.err
assert "deprecated" in captured.err.lower()
def test_no_warning_when_config_has_explicit_cwd(self, monkeypatch, capsys):
monkeypatch.setenv("TERMINAL_CWD", "/project")
monkeypatch.delenv("MESSAGING_CWD", raising=False)
from hermes_cli.config import warn_deprecated_cwd_env_vars
# config has explicit cwd → TERMINAL_CWD could be from config bridge
warn_deprecated_cwd_env_vars(config={"terminal": {"cwd": "/project"}})
captured = capsys.readouterr()
assert "TERMINAL_CWD" not in captured.err
def test_no_warning_when_env_clean(self, monkeypatch, capsys):
monkeypatch.delenv("MESSAGING_CWD", raising=False)
monkeypatch.delenv("TERMINAL_CWD", raising=False)
from hermes_cli.config import warn_deprecated_cwd_env_vars
warn_deprecated_cwd_env_vars(config={})
captured = capsys.readouterr()
assert captured.err == ""
def test_both_deprecated_vars_warn(self, monkeypatch, capsys):
monkeypatch.setenv("MESSAGING_CWD", "/msg/path")
monkeypatch.setenv("TERMINAL_CWD", "/term/path")
from hermes_cli.config import warn_deprecated_cwd_env_vars
warn_deprecated_cwd_env_vars(config={})
captured = capsys.readouterr()
assert "MESSAGING_CWD" in captured.err
assert "TERMINAL_CWD" in captured.err

View file

@ -0,0 +1,157 @@
"""Tests for the `hermes memory reset` CLI command.
Covers:
- Reset both stores (MEMORY.md + USER.md)
- Reset individual stores (--target memory / --target user)
- Skip confirmation with --yes
- Graceful handling when no memory files exist
- Profile-scoped reset (uses HERMES_HOME)
"""
import os
import pytest
from argparse import Namespace
from pathlib import Path
@pytest.fixture
def memory_env(tmp_path, monkeypatch):
"""Set up a fake HERMES_HOME with memory files."""
hermes_home = tmp_path / ".hermes"
memories = hermes_home / "memories"
memories.mkdir(parents=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
# Create sample memory files
(memories / "MEMORY.md").write_text(
"§\nHermes repo is at ~/.hermes/hermes-agent\n§\nUser prefers dark themes",
encoding="utf-8",
)
(memories / "USER.md").write_text(
"§\nUser is Teknium\n§\nTimezone: US Pacific",
encoding="utf-8",
)
return hermes_home, memories
def _run_memory_reset(target="all", yes=False, monkeypatch=None, confirm_input="no"):
"""Invoke the memory reset logic from cmd_memory in main.py.
Simulates what happens when `hermes memory reset` is run.
"""
from hermes_constants import get_hermes_home, display_hermes_home
mem_dir = get_hermes_home() / "memories"
files_to_reset = []
if target in ("all", "memory"):
files_to_reset.append(("MEMORY.md", "agent notes"))
if target in ("all", "user"):
files_to_reset.append(("USER.md", "user profile"))
existing = [(f, desc) for f, desc in files_to_reset if (mem_dir / f).exists()]
if not existing:
return "nothing"
if not yes:
if confirm_input != "yes":
return "cancelled"
for f, desc in existing:
(mem_dir / f).unlink()
return "deleted"
class TestMemoryReset:
"""Tests for `hermes memory reset` subcommand."""
def test_reset_all_with_yes_flag(self, memory_env):
"""--yes flag should skip confirmation and delete both files."""
hermes_home, memories = memory_env
assert (memories / "MEMORY.md").exists()
assert (memories / "USER.md").exists()
result = _run_memory_reset(target="all", yes=True)
assert result == "deleted"
assert not (memories / "MEMORY.md").exists()
assert not (memories / "USER.md").exists()
def test_reset_memory_only(self, memory_env):
"""--target memory should only delete MEMORY.md."""
hermes_home, memories = memory_env
result = _run_memory_reset(target="memory", yes=True)
assert result == "deleted"
assert not (memories / "MEMORY.md").exists()
assert (memories / "USER.md").exists()
def test_reset_user_only(self, memory_env):
"""--target user should only delete USER.md."""
hermes_home, memories = memory_env
result = _run_memory_reset(target="user", yes=True)
assert result == "deleted"
assert (memories / "MEMORY.md").exists()
assert not (memories / "USER.md").exists()
def test_reset_no_files_exist(self, tmp_path, monkeypatch):
"""Should return 'nothing' when no memory files exist."""
hermes_home = tmp_path / ".hermes"
(hermes_home / "memories").mkdir(parents=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
result = _run_memory_reset(target="all", yes=True)
assert result == "nothing"
def test_reset_confirmation_denied(self, memory_env):
"""Without --yes and without typing 'yes', should be cancelled."""
hermes_home, memories = memory_env
result = _run_memory_reset(target="all", yes=False, confirm_input="no")
assert result == "cancelled"
# Files should still exist
assert (memories / "MEMORY.md").exists()
assert (memories / "USER.md").exists()
def test_reset_confirmation_accepted(self, memory_env):
"""Typing 'yes' should proceed with deletion."""
hermes_home, memories = memory_env
result = _run_memory_reset(target="all", yes=False, confirm_input="yes")
assert result == "deleted"
assert not (memories / "MEMORY.md").exists()
assert not (memories / "USER.md").exists()
def test_reset_profile_scoped(self, tmp_path, monkeypatch):
"""Reset should work on the active profile's HERMES_HOME."""
profile_home = tmp_path / "profiles" / "myprofile"
memories = profile_home / "memories"
memories.mkdir(parents=True)
(memories / "MEMORY.md").write_text("profile memory", encoding="utf-8")
(memories / "USER.md").write_text("profile user", encoding="utf-8")
monkeypatch.setenv("HERMES_HOME", str(profile_home))
result = _run_memory_reset(target="all", yes=True)
assert result == "deleted"
assert not (memories / "MEMORY.md").exists()
assert not (memories / "USER.md").exists()
def test_reset_partial_files(self, memory_env):
"""Reset should work when only one memory file exists."""
hermes_home, memories = memory_env
(memories / "USER.md").unlink()
result = _run_memory_reset(target="all", yes=True)
assert result == "deleted"
assert not (memories / "MEMORY.md").exists()
def test_reset_empty_memories_dir(self, tmp_path, monkeypatch):
"""No memories dir at all should report nothing."""
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir(parents=True)
# No memories dir
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
# The memories dir won't exist; get_hermes_home() / "memories" won't have files
result = _run_memory_reset(target="all", yes=True)
assert result == "nothing"

View file

@ -114,6 +114,65 @@ class TestOllamaCloudModelCatalog:
assert "ollama-cloud" in _PROVIDER_LABELS
assert _PROVIDER_LABELS["ollama-cloud"] == "Ollama Cloud"
def test_provider_model_ids_returns_dynamic_models(self, tmp_path, monkeypatch):
"""provider_model_ids('ollama-cloud') should call fetch_ollama_cloud_models()."""
from hermes_cli.models import provider_model_ids
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
monkeypatch.setenv("OLLAMA_API_KEY", "test-key")
mock_mdev = {
"ollama-cloud": {
"models": {
"qwen3.5:397b": {"tool_call": True},
"glm-5": {"tool_call": True},
}
}
}
with patch("hermes_cli.models.fetch_api_models", return_value=["qwen3.5:397b"]), \
patch("agent.models_dev.fetch_models_dev", return_value=mock_mdev):
result = provider_model_ids("ollama-cloud", force_refresh=True)
assert len(result) > 0
assert "qwen3.5:397b" in result
# ── Model Picker (list_authenticated_providers) ──
class TestOllamaCloudModelPicker:
def test_ollama_cloud_shows_model_count(self, tmp_path, monkeypatch):
"""Ollama Cloud should show non-zero model count in provider picker."""
from hermes_cli.model_switch import list_authenticated_providers
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
monkeypatch.setenv("OLLAMA_API_KEY", "test-key")
mock_mdev = {
"ollama-cloud": {
"models": {
"qwen3.5:397b": {"tool_call": True},
"glm-5": {"tool_call": True},
}
}
}
with patch("hermes_cli.models.fetch_api_models", return_value=["qwen3.5:397b"]), \
patch("agent.models_dev.fetch_models_dev", return_value=mock_mdev):
providers = list_authenticated_providers(current_provider="ollama-cloud")
ollama = next((p for p in providers if p["slug"] == "ollama-cloud"), None)
assert ollama is not None, "ollama-cloud should appear when OLLAMA_API_KEY is set"
assert ollama["total_models"] > 0, "ollama-cloud should show non-zero model count"
def test_ollama_cloud_not_shown_without_creds(self, monkeypatch):
"""Ollama Cloud should not appear without credentials."""
from hermes_cli.model_switch import list_authenticated_providers
monkeypatch.delenv("OLLAMA_API_KEY", raising=False)
providers = list_authenticated_providers(current_provider="openrouter")
ollama = next((p for p in providers if p["slug"] == "ollama-cloud"), None)
assert ollama is None, "ollama-cloud should not appear without OLLAMA_API_KEY"
# ── Merged Model Discovery ──

View file

@ -152,6 +152,24 @@ class TestSkinManagement:
init_skin_from_config({})
assert get_active_skin_name() == "default"
def test_init_skin_from_null_display(self):
"""display: null should fall back to default, not crash."""
from hermes_cli.skin_engine import init_skin_from_config, get_active_skin_name
init_skin_from_config({"display": None})
assert get_active_skin_name() == "default"
def test_init_skin_from_non_dict_display(self):
"""display: <non-dict> should fall back to default."""
from hermes_cli.skin_engine import init_skin_from_config, get_active_skin_name
init_skin_from_config({"display": "invalid"})
assert get_active_skin_name() == "default"
init_skin_from_config({"display": 42})
assert get_active_skin_name() == "default"
init_skin_from_config({"display": []})
assert get_active_skin_name() == "default"
class TestUserSkins:
def test_load_user_skin_from_yaml(self, tmp_path, monkeypatch):

View file

@ -1,361 +0,0 @@
"""Tests for context pressure warnings (user-facing, not injected into messages).
Covers:
- Display formatting (CLI and gateway variants)
- Flag tracking and threshold logic on AIAgent
- Flag reset after compression
- status_callback invocation
"""
import json
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
import pytest
from agent.display import format_context_pressure, format_context_pressure_gateway
from run_agent import AIAgent
# ---------------------------------------------------------------------------
# Display formatting tests
# ---------------------------------------------------------------------------
class TestFormatContextPressure:
"""CLI context pressure display (agent/display.py).
The bar shows progress toward the compaction threshold, not the
raw context window. 60% = 60% of the way to compaction.
"""
def test_80_percent_uses_warning_icon(self):
line = format_context_pressure(0.80, 100_000, 0.50)
assert "" in line
assert "80% to compaction" in line
def test_90_percent_uses_warning_icon(self):
line = format_context_pressure(0.90, 100_000, 0.50)
assert "" in line
assert "90% to compaction" in line
def test_bar_length_scales_with_progress(self):
line_80 = format_context_pressure(0.80, 100_000, 0.50)
line_95 = format_context_pressure(0.95, 100_000, 0.50)
assert line_95.count("") > line_80.count("")
def test_shows_threshold_tokens(self):
line = format_context_pressure(0.80, 100_000, 0.50)
assert "100k" in line
def test_small_threshold(self):
line = format_context_pressure(0.80, 500, 0.50)
assert "500" in line
def test_shows_threshold_percent(self):
line = format_context_pressure(0.80, 100_000, 0.50)
assert "50%" in line
def test_approaching_hint(self):
line = format_context_pressure(0.80, 100_000, 0.50)
assert "compaction approaching" in line
def test_no_compaction_when_disabled(self):
line = format_context_pressure(0.85, 100_000, 0.50, compression_enabled=False)
assert "no auto-compaction" in line
def test_returns_string(self):
result = format_context_pressure(0.65, 128_000, 0.50)
assert isinstance(result, str)
def test_over_100_percent_capped(self):
"""Progress > 1.0 should cap both bar and percentage text at 100%."""
line = format_context_pressure(1.05, 100_000, 0.50)
assert "" in line
assert line.count("") == 20
assert "100%" in line
assert "105%" not in line
class TestFormatContextPressureGateway:
"""Gateway (plain text) context pressure display."""
def test_80_percent_warning(self):
msg = format_context_pressure_gateway(0.80, 0.50)
assert "80% to compaction" in msg
assert "50%" in msg
def test_90_percent_warning(self):
msg = format_context_pressure_gateway(0.90, 0.50)
assert "90% to compaction" in msg
assert "approaching" in msg
def test_no_compaction_warning(self):
msg = format_context_pressure_gateway(0.85, 0.50, compression_enabled=False)
assert "disabled" in msg
def test_no_ansi_codes(self):
msg = format_context_pressure_gateway(0.80, 0.50)
assert "\033[" not in msg
def test_has_progress_bar(self):
msg = format_context_pressure_gateway(0.80, 0.50)
assert "" in msg
def test_over_100_percent_capped(self):
"""Progress > 1.0 should cap percentage text at 100%."""
msg = format_context_pressure_gateway(1.09, 0.50)
assert "100% to compaction" in msg
assert "109%" not in msg
assert msg.count("") == 20
# ---------------------------------------------------------------------------
# AIAgent context pressure flag tests
# ---------------------------------------------------------------------------
def _make_tool_defs(*names):
return [
{
"type": "function",
"function": {
"name": n,
"description": f"{n} tool",
"parameters": {"type": "object", "properties": {}},
},
}
for n in names
]
@pytest.fixture()
def agent():
"""Minimal AIAgent with mocked internals."""
with (
patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
patch("run_agent.check_toolset_requirements", return_value={}),
patch("run_agent.OpenAI"),
):
a = AIAgent(
api_key="test-key-1234567890",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
a.client = MagicMock()
return a
class TestContextPressureFlags:
"""Context pressure warning flag tracking on AIAgent."""
def test_flag_initialized_zero(self, agent):
assert agent._context_pressure_warned_at == 0.0
def test_emit_calls_status_callback(self, agent):
"""status_callback should be invoked with event type and message."""
cb = MagicMock()
agent.status_callback = cb
compressor = MagicMock()
compressor.context_length = 200_000
compressor.threshold_tokens = 100_000 # 50%
agent._emit_context_pressure(0.85, compressor)
cb.assert_called_once()
args = cb.call_args[0]
assert args[0] == "context_pressure"
assert "85% to compaction" in args[1]
def test_emit_no_callback_no_crash(self, agent):
"""No status_callback set — should not crash."""
agent.status_callback = None
compressor = MagicMock()
compressor.context_length = 200_000
compressor.threshold_tokens = 100_000
# Should not raise
agent._emit_context_pressure(0.60, compressor)
def test_emit_prints_for_cli_platform(self, agent, capsys):
"""CLI platform should always print context pressure, even in quiet_mode."""
agent.quiet_mode = True
agent.platform = "cli"
agent.status_callback = None
compressor = MagicMock()
compressor.context_length = 200_000
compressor.threshold_tokens = 100_000
agent._emit_context_pressure(0.85, compressor)
captured = capsys.readouterr()
assert "" in captured.out
assert "to compaction" in captured.out
def test_emit_skips_print_for_gateway_platform(self, agent, capsys):
"""Gateway platforms get the callback, not CLI print."""
agent.platform = "telegram"
agent.status_callback = None
compressor = MagicMock()
compressor.context_length = 200_000
compressor.threshold_tokens = 100_000
agent._emit_context_pressure(0.85, compressor)
captured = capsys.readouterr()
assert "" not in captured.out
def test_flag_reset_on_compression(self, agent):
"""After _compress_context, context pressure flag should reset."""
agent._context_pressure_warned_at = 0.85
agent.compression_enabled = True
agent.context_compressor = MagicMock()
agent.context_compressor.compress.return_value = [
{"role": "user", "content": "Summary of conversation so far."}
]
agent.context_compressor.context_length = 200_000
agent.context_compressor.threshold_tokens = 100_000
agent.context_compressor.compression_count = 1
agent._todo_store = MagicMock()
agent._todo_store.format_for_injection.return_value = None
agent._build_system_prompt = MagicMock(return_value="system prompt")
agent._cached_system_prompt = "old system prompt"
agent._session_db = None
messages = [
{"role": "user", "content": "hello"},
{"role": "assistant", "content": "hi there"},
]
agent._compress_context(messages, "system prompt")
assert agent._context_pressure_warned_at == 0.0
def test_emit_callback_error_handled(self, agent):
"""If status_callback raises, it should be caught gracefully."""
cb = MagicMock(side_effect=RuntimeError("callback boom"))
agent.status_callback = cb
compressor = MagicMock()
compressor.context_length = 200_000
compressor.threshold_tokens = 100_000
# Should not raise
agent._emit_context_pressure(0.85, compressor)
def test_tiered_reemits_at_95(self, agent):
"""Warning fires at 85%, then fires again when crossing 95%."""
agent._context_pressure_warned_at = 0.85
# Simulate crossing 95%: the tier (0.95) > warned_at (0.85)
assert 0.95 > agent._context_pressure_warned_at
# After emission at 95%, the tier should update
agent._context_pressure_warned_at = 0.95
assert agent._context_pressure_warned_at == 0.95
def test_tiered_no_double_emit_at_same_level(self, agent):
"""Once warned at 85%, further 85%+ readings don't re-warn."""
agent._context_pressure_warned_at = 0.85
# At 88%, tier is 0.85, which is NOT > warned_at (0.85)
_warn_tier = 0.85 if 0.88 >= 0.85 else 0.0
assert not (_warn_tier > agent._context_pressure_warned_at)
def test_flag_not_reset_when_compression_insufficient(self, agent):
"""When compression can't drop below 85%, keep the flag set."""
agent._context_pressure_warned_at = 0.85
agent.compression_enabled = True
agent.context_compressor = MagicMock()
agent.context_compressor.compress.return_value = [
{"role": "user", "content": "Summary of conversation so far."}
]
agent.context_compressor.context_length = 200
# Use a small threshold so the tiny compressed output still
# represents >= 85% of it (prevents flag reset).
agent.context_compressor.threshold_tokens = 10
agent.context_compressor.compression_count = 1
agent.context_compressor.last_prompt_tokens = 0
agent._todo_store = MagicMock()
agent._todo_store.format_for_injection.return_value = None
agent._build_system_prompt = MagicMock(return_value="system prompt")
agent._cached_system_prompt = "old system prompt"
agent._session_db = None
messages = [
{"role": "user", "content": "hello"},
{"role": "assistant", "content": "hi there"},
]
agent._compress_context(messages, "system prompt")
# Post-compression is ~90% of threshold — flag should NOT reset
assert agent._context_pressure_warned_at == 0.85
class TestContextPressureGatewayDedup:
"""Class-level dedup prevents warning spam across AIAgent instances."""
def setup_method(self):
"""Clear class-level dedup state between tests."""
AIAgent._context_pressure_last_warned.clear()
def test_second_instance_within_cooldown_suppressed(self):
"""Same session, same tier, within cooldown — should be suppressed."""
import time
sid = "test_session_dedup"
# Simulate first warning
AIAgent._context_pressure_last_warned[sid] = (0.85, time.time())
# Second instance checking same tier within cooldown
_last = AIAgent._context_pressure_last_warned.get(sid)
_should_warn = _last is None or _last[0] < 0.85 or (time.time() - _last[1]) >= AIAgent._CONTEXT_PRESSURE_COOLDOWN
assert not _should_warn
def test_higher_tier_fires_despite_cooldown(self):
"""Same session, higher tier — should fire even within cooldown."""
import time
sid = "test_session_tier"
AIAgent._context_pressure_last_warned[sid] = (0.85, time.time())
_last = AIAgent._context_pressure_last_warned.get(sid)
# 0.95 > 0.85 stored tier → should warn
_should_warn = _last is None or _last[0] < 0.95 or (time.time() - _last[1]) >= AIAgent._CONTEXT_PRESSURE_COOLDOWN
assert _should_warn
def test_warning_fires_after_cooldown_expires(self):
"""Same session, same tier, after cooldown — should fire again."""
import time
sid = "test_session_expired"
# Set a timestamp far in the past
AIAgent._context_pressure_last_warned[sid] = (0.85, time.time() - AIAgent._CONTEXT_PRESSURE_COOLDOWN - 1)
_last = AIAgent._context_pressure_last_warned.get(sid)
_should_warn = _last is None or _last[0] < 0.85 or (time.time() - _last[1]) >= AIAgent._CONTEXT_PRESSURE_COOLDOWN
assert _should_warn
def test_compression_clears_dedup(self):
"""After compression drops below 85%, dedup entry should be cleared."""
import time
sid = "test_session_clear"
AIAgent._context_pressure_last_warned[sid] = (0.85, time.time())
assert sid in AIAgent._context_pressure_last_warned
# Simulate what _compress_context does on reset
AIAgent._context_pressure_last_warned.pop(sid, None)
assert sid not in AIAgent._context_pressure_last_warned
def test_eviction_removes_stale_entries(self):
"""Stale entries older than 2x cooldown should be evicted."""
import time
_now = time.time()
AIAgent._context_pressure_last_warned = {
"fresh": (0.85, _now),
"stale": (0.85, _now - AIAgent._CONTEXT_PRESSURE_COOLDOWN * 3),
}
_cutoff = _now - AIAgent._CONTEXT_PRESSURE_COOLDOWN * 2
AIAgent._context_pressure_last_warned = {
k: v for k, v in AIAgent._context_pressure_last_warned.items()
if v[1] > _cutoff
}
assert "fresh" in AIAgent._context_pressure_last_warned
assert "stale" not in AIAgent._context_pressure_last_warned

View file

@ -59,7 +59,7 @@ def _make_agent(monkeypatch, api_mode, provider, response_fn):
self._disable_streaming = True
return super().run_conversation(msg, conversation_history=conversation_history, task_id=task_id)
return _A(model="test-model", api_key="test-key", provider=provider, api_mode=api_mode)
return _A(model="test-model", api_key="test-key", base_url="http://localhost:1234/v1", provider=provider, api_mode=api_mode)
def _anthropic_resp(input_tok, output_tok, cache_read=0, cache_creation=0):

View file

@ -0,0 +1,35 @@
"""Guardrail: _create_openai_client must not mutate its input kwargs.
#10933 injected an httpx.Client directly into the caller's ``client_kwargs``.
When the dict was ``self._client_kwargs``, the shared transport was torn down
after the first request_complete close and subsequent request-scoped clients
wrapped a closed transport, raising ``APIConnectionError('Connection error.')``
with cause ``RuntimeError: Cannot send a request, as the client has been closed``
on every retry. That PR has since been reverted, but the underlying issue
(#10324, connections hanging in CLOSE-WAIT) is still open, so another transport
tweak inside this function is likely. This test pins the contract that the
function must treat its input dict as read-only.
"""
from unittest.mock import MagicMock, patch
from run_agent import AIAgent
@patch("run_agent.OpenAI")
def test_create_openai_client_does_not_mutate_input_kwargs(mock_openai):
mock_openai.return_value = MagicMock()
agent = AIAgent(
model="test/model",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
kwargs = {"api_key": "test-key", "base_url": "https://api.example.com/v1"}
snapshot = dict(kwargs)
agent._create_openai_client(kwargs, reason="test", shared=False)
assert kwargs == snapshot, (
f"_create_openai_client mutated input kwargs; expected {snapshot}, got {kwargs}"
)

View file

@ -4115,8 +4115,8 @@ class TestMemoryNudgeCounterPersistence:
"""Counters must exist on the agent after __init__."""
with patch("run_agent.get_tool_definitions", return_value=[]):
a = AIAgent(
model="test", api_key="test-key", provider="openrouter",
skip_context_files=True, skip_memory=True,
model="test", api_key="test-key", base_url="http://localhost:1234/v1",
provider="openrouter", skip_context_files=True, skip_memory=True,
)
assert hasattr(a, "_turns_since_memory")
assert hasattr(a, "_iters_since_skill")

View file

@ -279,6 +279,10 @@ raise RuntimeError("deliberate crash")
))
self.assertEqual(result["status"], "timeout")
self.assertIn("timed out", result.get("error", ""))
# The timeout message must also appear in output so the LLM always
# surfaces it to the user (#10807).
self.assertIn("timed out", result.get("output", ""))
self.assertIn("\u23f0", result.get("output", ""))
def test_web_search_tool(self):
"""Script calls web_search and processes results."""

View file

@ -871,7 +871,18 @@ def _execute_remote(
}
if status == "timeout":
result["error"] = f"Script timed out after {timeout}s and was killed."
timeout_msg = f"Script timed out after {timeout}s and was killed."
result["error"] = timeout_msg
# Include timeout message in output so the LLM always surfaces it
# to the user (see local path comment — same reasoning, #10807).
if stdout_text:
result["output"] = stdout_text + f"\n\n{timeout_msg}"
else:
result["output"] = f"{timeout_msg}"
logger.warning(
"execute_code (remote) timed out after %ss (limit %ss) with %d tool calls",
duration, timeout, tool_call_counter[0],
)
elif status == "interrupted":
result["output"] = (
stdout_text + "\n[execution interrupted — user sent a new message]"
@ -1117,6 +1128,10 @@ def execute_code(
stderr_reader.start()
status = "success"
_activity_state = {
"last_touch": time.monotonic(),
"start": exec_start,
}
while proc.poll() is None:
if _is_interrupted():
_kill_process_group(proc)
@ -1126,6 +1141,13 @@ def execute_code(
_kill_process_group(proc, escalate=True)
status = "timeout"
break
# Periodic activity touch so the gateway's inactivity timeout
# doesn't kill the agent during long code execution (#10807).
try:
from tools.environments.base import touch_activity_if_due
touch_activity_if_due(_activity_state, "execute_code running")
except Exception:
pass
time.sleep(0.2)
# Wait for readers to finish draining
@ -1179,7 +1201,20 @@ def execute_code(
}
if status == "timeout":
result["error"] = f"Script timed out after {timeout}s and was killed."
timeout_msg = f"Script timed out after {timeout}s and was killed."
result["error"] = timeout_msg
# Include timeout message in output so the LLM always surfaces it
# to the user. When output is empty, models often treat the result
# as "nothing happened" and produce an empty response, which the
# gateway stream consumer silently drops (#10807).
if stdout_text:
result["output"] = stdout_text + f"\n\n{timeout_msg}"
else:
result["output"] = f"{timeout_msg}"
logger.warning(
"execute_code timed out after %ss (limit %ss) with %d tool calls",
duration, timeout, tool_call_counter[0],
)
elif status == "interrupted":
result["output"] = stdout_text + "\n[execution interrupted — user sent a new message]"
elif exit_code != 0:

View file

@ -863,28 +863,28 @@ def delegate_task(
results.append(entry)
completed_count += 1
# Print per-task completion line above the spinner
idx = entry["task_index"]
label = task_labels[idx] if idx < len(task_labels) else f"Task {idx}"
dur = entry.get("duration_seconds", 0)
status = entry.get("status", "?")
icon = "" if status == "completed" else ""
remaining = n_tasks - completed_count
completion_line = f"{icon} [{idx+1}/{n_tasks}] {label} ({dur}s)"
if spinner_ref:
try:
spinner_ref.print_above(completion_line)
except Exception:
# Print per-task completion line above the spinner
idx = entry["task_index"]
label = task_labels[idx] if idx < len(task_labels) else f"Task {idx}"
dur = entry.get("duration_seconds", 0)
status = entry.get("status", "?")
icon = "" if status == "completed" else ""
remaining = n_tasks - completed_count
completion_line = f"{icon} [{idx+1}/{n_tasks}] {label} ({dur}s)"
if spinner_ref:
try:
spinner_ref.print_above(completion_line)
except Exception:
print(f" {completion_line}")
else:
print(f" {completion_line}")
else:
print(f" {completion_line}")
# Update spinner text to show remaining count
if spinner_ref and remaining > 0:
try:
spinner_ref.update_text(f"🔀 {remaining} task{'s' if remaining != 1 else ''} remaining")
except Exception as e:
logger.debug("Spinner update_text failed: %s", e)
# Update spinner text to show remaining count
if spinner_ref and remaining > 0:
try:
spinner_ref.update_text(f"🔀 {remaining} task{'s' if remaining != 1 else ''} remaining")
except Exception as e:
logger.debug("Spinner update_text failed: %s", e)
# Sort by task_index so results match input order
results.sort(key=lambda r: r["task_index"])

View file

@ -37,6 +37,32 @@ def _get_activity_callback() -> Callable[[str], None] | None:
return getattr(_activity_callback_local, "callback", None)
def touch_activity_if_due(
state: dict,
label: str,
) -> None:
"""Fire the activity callback at most once every ``state['interval']`` seconds.
*state* must contain ``last_touch`` (monotonic timestamp) and ``start``
(monotonic timestamp of the operation start). An optional ``interval``
key overrides the default 10 s cadence.
Swallows all exceptions so callers don't need their own try/except.
"""
now = time.monotonic()
interval = state.get("interval", 10.0)
if now - state["last_touch"] < interval:
return
state["last_touch"] = now
try:
cb = _get_activity_callback()
if cb:
elapsed = int(now - state["start"])
cb(f"{label} ({elapsed}s elapsed)")
except Exception:
pass
def get_sandbox_dir() -> Path:
"""Return the host-side root for all sandbox storage (Docker workspaces,
Singularity overlays/SIF cache, etc.).
@ -405,8 +431,11 @@ class BaseEnvironment(ABC):
drain_thread = threading.Thread(target=_drain, daemon=True)
drain_thread.start()
deadline = time.monotonic() + timeout
_last_activity_touch = time.monotonic()
_ACTIVITY_INTERVAL = 10.0 # seconds between activity touches
_now = time.monotonic()
_activity_state = {
"last_touch": _now,
"start": _now,
}
while proc.poll() is None:
if is_interrupted():
@ -428,16 +457,7 @@ class BaseEnvironment(ABC):
"returncode": 124,
}
# Periodic activity touch so the gateway knows we're alive
_now = time.monotonic()
if _now - _last_activity_touch >= _ACTIVITY_INTERVAL:
_last_activity_touch = _now
_cb = _get_activity_callback()
if _cb:
try:
_elapsed = int(_now - (deadline - timeout))
_cb(f"terminal command running ({_elapsed}s elapsed)")
except Exception:
pass
touch_activity_if_due(_activity_state, "terminal command running")
time.sleep(0.2)
drain_thread.join(timeout=5)

View file

@ -105,9 +105,11 @@ class BaseModalExecutionEnvironment(BaseEnvironment):
if self._client_timeout_grace_seconds is not None:
deadline = time.monotonic() + prepared.timeout + self._client_timeout_grace_seconds
_last_activity_touch = time.monotonic()
_modal_exec_start = time.monotonic()
_ACTIVITY_INTERVAL = 10.0 # match _wait_for_process cadence
_now = time.monotonic()
_activity_state = {
"last_touch": _now,
"start": _now,
}
while True:
if is_interrupted():
@ -133,20 +135,11 @@ class BaseModalExecutionEnvironment(BaseEnvironment):
return self._timeout_result_for_modal(prepared.timeout)
# Periodic activity touch so the gateway knows we're alive
_now = time.monotonic()
if _now - _last_activity_touch >= _ACTIVITY_INTERVAL:
_last_activity_touch = _now
try:
from tools.environments.base import _get_activity_callback
_cb = _get_activity_callback()
except Exception:
_cb = None
if _cb:
try:
_elapsed = int(_now - _modal_exec_start)
_cb(f"modal command running ({_elapsed}s elapsed)")
except Exception:
pass
try:
from tools.environments.base import touch_activity_if_due
touch_activity_if_due(_activity_state, "modal command running")
except Exception:
pass
time.sleep(self._poll_interval_seconds)