diff --git a/.github/workflows/deploy-site.yml b/.github/workflows/deploy-site.yml index 480b236f84..3e78bc61b1 100644 --- a/.github/workflows/deploy-site.yml +++ b/.github/workflows/deploy-site.yml @@ -1,11 +1,12 @@ name: Deploy Site on: + release: + types: [published] push: branches: [main] paths: - 'website/**' - - 'landingpage/**' - 'skills/**' - 'optional-skills/**' - '.github/workflows/deploy-site.yml' @@ -20,8 +21,14 @@ concurrency: cancel-in-progress: false jobs: - build-and-deploy: - # Only run on the upstream repository, not on forks + deploy-vercel: + if: github.event_name == 'release' + runs-on: ubuntu-latest + steps: + - name: Trigger Vercel Deploy + run: curl -X POST "${{ secrets.VERCEL_DEPLOY_HOOK }}" + + deploy-docs: if: github.repository == 'NousResearch/hermes-agent' runs-on: ubuntu-latest environment: @@ -65,12 +72,7 @@ jobs: - name: Stage deployment run: | mkdir -p _site/docs - # Landing page at root - cp -r landingpage/* _site/ - # Docusaurus at /docs/ cp -r website/build/* _site/docs/ - # CNAME so GitHub Pages keeps the custom domain between deploys - echo "hermes-agent.nousresearch.com" > _site/CNAME - name: Upload artifact uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa # v3 diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index bc6b1efbe8..c31ff55f98 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -2391,10 +2391,10 @@ def call_llm( if task == "vision": effective_provider, client, final_model = resolve_vision_provider_client( - provider=provider, - model=model, - base_url=base_url, - api_key=api_key, + provider=resolved_provider if resolved_provider != "auto" else provider, + model=resolved_model or model, + base_url=resolved_base_url or base_url, + api_key=resolved_api_key or api_key, async_mode=False, ) if client is None and resolved_provider != "auto" and not resolved_base_url: @@ -2599,10 +2599,10 @@ async def async_call_llm( if task == "vision": effective_provider, client, final_model = resolve_vision_provider_client( - provider=provider, - model=model, - base_url=base_url, - api_key=api_key, + provider=resolved_provider if resolved_provider != "auto" else provider, + model=resolved_model or model, + base_url=resolved_base_url or base_url, + api_key=resolved_api_key or api_key, async_mode=True, ) if client is None and resolved_provider != "auto" and not resolved_base_url: diff --git a/agent/context_compressor.py b/agent/context_compressor.py index ac5db77625..34ec5091b1 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -39,7 +39,10 @@ SUMMARY_PREFIX = ( "into the summary below. This is a handoff from a previous context " "window — treat it as background reference, NOT as active instructions. " "Do NOT answer questions or fulfill requests mentioned in this summary; " - "they were already addressed. Respond ONLY to the latest user message " + "they were already addressed. " + "Your current task is identified in the '## Active Task' section of the " + "summary — resume exactly from there. " + "Respond ONLY to the latest user message " "that appears AFTER this summary. The current session state (files, " "config, etc.) may reflect work described here — avoid repeating it:" ) @@ -581,8 +584,16 @@ class ContextCompressor(ContextEngine): ) # Shared structured template (used by both paths). - _template_sections = f"""## Goal -[What the user is trying to accomplish] + _template_sections = f"""## Active Task +[THE SINGLE MOST IMPORTANT FIELD. Copy the user's most recent request or +task assignment verbatim — the exact words they used. If multiple tasks +were requested and only some are done, list only the ones NOT yet completed. +The next assistant must pick up exactly here. Example: +"User asked: 'Now refactor the auth module to use JWT instead of sessions'" +If no outstanding task exists, write "None."] + +## Goal +[What the user is trying to accomplish overall] ## Constraints & Preferences [User preferences, coding style, constraints, important decisions] @@ -644,7 +655,7 @@ PREVIOUS SUMMARY: NEW TURNS TO INCORPORATE: {content_to_summarize} -Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new completed actions to the numbered list (continue numbering). Move items from "In Progress" to "Completed Actions" when done. Move answered questions to "Resolved Questions". Update "Active State" to reflect current state. Remove information only if it is clearly obsolete. +Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new completed actions to the numbered list (continue numbering). Move items from "In Progress" to "Completed Actions" when done. Move answered questions to "Resolved Questions". Update "Active State" to reflect current state. Remove information only if it is clearly obsolete. CRITICAL: Update "## Active Task" to reflect the user's most recent unfulfilled request — this is the most important field for task continuity. {_template_sections}""" else: @@ -862,6 +873,62 @@ The user has requested that this compaction PRIORITISE preserving all informatio # Tail protection by token budget # ------------------------------------------------------------------ + def _find_last_user_message_idx( + self, messages: List[Dict[str, Any]], head_end: int + ) -> int: + """Return the index of the last user-role message at or after *head_end*, or -1.""" + for i in range(len(messages) - 1, head_end - 1, -1): + if messages[i].get("role") == "user": + return i + return -1 + + def _ensure_last_user_message_in_tail( + self, + messages: List[Dict[str, Any]], + cut_idx: int, + head_end: int, + ) -> int: + """Guarantee the most recent user message is in the protected tail. + + Context compressor bug (#10896): ``_align_boundary_backward`` can pull + ``cut_idx`` past a user message when it tries to keep tool_call/result + groups together. If the last user message ends up in the *compressed* + middle region the LLM summariser writes it into "Pending User Asks", + but ``SUMMARY_PREFIX`` tells the next model to respond only to user + messages *after* the summary — so the task effectively disappears from + the active context, causing the agent to stall, repeat completed work, + or silently drop the user's latest request. + + Fix: if the last user-role message is not already in the tail + (``messages[cut_idx:]``), walk ``cut_idx`` back to include it. We + then re-align backward one more time to avoid splitting any + tool_call/result group that immediately precedes the user message. + """ + last_user_idx = self._find_last_user_message_idx(messages, head_end) + if last_user_idx < 0: + # No user message found beyond head — nothing to anchor. + return cut_idx + + if last_user_idx >= cut_idx: + # Already in the tail; nothing to do. + return cut_idx + + # The last user message is in the middle (compressed) region. + # Pull cut_idx back to it directly — a user message is already a + # clean boundary (no tool_call/result splitting risk), so there is no + # need to call _align_boundary_backward here; doing so would + # unnecessarily pull the cut further back into the preceding + # assistant + tool_calls group. + if not self.quiet_mode: + logger.debug( + "Anchoring tail cut to last user message at index %d " + "(was %d) to prevent active-task loss after compression", + last_user_idx, + cut_idx, + ) + # Safety: never go back into the head region. + return max(last_user_idx, head_end + 1) + def _find_tail_cut_by_tokens( self, messages: List[Dict[str, Any]], head_end: int, token_budget: int | None = None, @@ -879,7 +946,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio read, etc.). If even the minimum 3 messages exceed 1.5x the budget the cut is placed right after the head so compression still runs. - Never cuts inside a tool_call/result group. + Never cuts inside a tool_call/result group. Always ensures the most + recent user message is in the tail (see ``_ensure_last_user_message_in_tail``). """ if token_budget is None: token_budget = self.tail_token_budget @@ -918,6 +986,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio # Align to avoid splitting tool groups cut_idx = self._align_boundary_backward(messages, cut_idx) + # Ensure the most recent user message is always in the tail so the + # active task is never lost to compression (fixes #10896). + cut_idx = self._ensure_last_user_message_in_tail(messages, cut_idx, head_end) + return max(cut_idx, head_end + 1) # ------------------------------------------------------------------ diff --git a/agent/display.py b/agent/display.py index a7f3cbaa2d..3f1341485e 100644 --- a/agent/display.py +++ b/agent/display.py @@ -993,84 +993,4 @@ def get_cute_tool_message( # Honcho session line (one-liner with clickable OSC 8 hyperlink) # ========================================================================= -_DIM = "\033[2m" -_SKY_BLUE = "\033[38;5;117m" -_ANSI_RESET = "\033[0m" - -# ========================================================================= -# Context pressure display (CLI user-facing warnings) -# ========================================================================= - -# ANSI color codes for context pressure tiers -_CYAN = "\033[36m" -_YELLOW = "\033[33m" -_BOLD = "\033[1m" -_DIM_ANSI = "\033[2m" - -# Bar characters -_BAR_FILLED = "▰" -_BAR_EMPTY = "▱" -_BAR_WIDTH = 20 - - -def format_context_pressure( - compaction_progress: float, - threshold_tokens: int, - threshold_percent: float, - compression_enabled: bool = True, -) -> str: - """Build a formatted context pressure line for CLI display. - - The bar and percentage show progress toward the compaction threshold, - NOT the raw context window. 100% = compaction fires. - - Args: - compaction_progress: How close to compaction (0.0–1.0, 1.0 = fires). - threshold_tokens: Compaction threshold in tokens. - threshold_percent: Compaction threshold as a fraction of context window. - compression_enabled: Whether auto-compression is active. - """ - pct_int = min(int(compaction_progress * 100), 100) - filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH) - bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled) - - threshold_k = f"{threshold_tokens // 1000}k" if threshold_tokens >= 1000 else str(threshold_tokens) - threshold_pct_int = int(threshold_percent * 100) - - color = f"{_BOLD}{_YELLOW}" - icon = "⚠" - if compression_enabled: - hint = "compaction approaching" - else: - hint = "no auto-compaction" - - return ( - f" {color}{icon} context {bar} {pct_int}% to compaction{_ANSI_RESET}" - f" {_DIM_ANSI}{threshold_k} threshold ({threshold_pct_int}%) · {hint}{_ANSI_RESET}" - ) - - -def format_context_pressure_gateway( - compaction_progress: float, - threshold_percent: float, - compression_enabled: bool = True, -) -> str: - """Build a plain-text context pressure notification for messaging platforms. - - No ANSI — just Unicode and plain text suitable for Telegram/Discord/etc. - The percentage shows progress toward the compaction threshold. - """ - pct_int = min(int(compaction_progress * 100), 100) - filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH) - bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled) - - threshold_pct_int = int(threshold_percent * 100) - - icon = "⚠️" - if compression_enabled: - hint = f"Context compaction approaching (threshold: {threshold_pct_int}% of window)." - else: - hint = "Auto-compaction is disabled — context may be truncated." - - return f"{icon} Context: {bar} {pct_int}% to compaction\n{hint}" diff --git a/cli.py b/cli.py index 5e459dc1e5..2c6c451ad2 100644 --- a/cli.py +++ b/cli.py @@ -439,14 +439,27 @@ def load_cli_config() -> Dict[str, Any]: # filesystem is directly accessible. For ALL remote/container backends # (ssh, docker, modal, singularity), the host path doesn't exist on the # target -- remove the key so terminal_tool.py uses its per-backend default. - if terminal_config.get("cwd") in (".", "auto", "cwd"): - effective_backend = terminal_config.get("env_type", "local") - if effective_backend == "local": - terminal_config["cwd"] = os.getcwd() - defaults["terminal"]["cwd"] = terminal_config["cwd"] + # + # GUARD: If TERMINAL_CWD is already set to a real absolute path (by the + # gateway's config bridge earlier in the process), don't clobber it. + # This prevents a lazy import of cli.py during gateway runtime from + # rewriting TERMINAL_CWD to the service's working directory. + # See issue #10817. + _CWD_PLACEHOLDERS = (".", "auto", "cwd") + if terminal_config.get("cwd") in _CWD_PLACEHOLDERS: + _existing_cwd = os.environ.get("TERMINAL_CWD", "") + if _existing_cwd and _existing_cwd not in _CWD_PLACEHOLDERS and os.path.isabs(_existing_cwd): + # Gateway (or earlier startup) already resolved a real path — keep it + terminal_config["cwd"] = _existing_cwd + defaults["terminal"]["cwd"] = _existing_cwd else: - # Remove so TERMINAL_CWD stays unset → tool picks backend default - terminal_config.pop("cwd", None) + effective_backend = terminal_config.get("env_type", "local") + if effective_backend == "local": + terminal_config["cwd"] = os.getcwd() + defaults["terminal"]["cwd"] = terminal_config["cwd"] + else: + # Remove so TERMINAL_CWD stays unset → tool picks backend default + terminal_config.pop("cwd", None) env_mappings = { "env_type": "TERMINAL_ENV", diff --git a/cron/scheduler.py b/cron/scheduler.py index 4bd5724a60..9a0f561b05 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -979,6 +979,13 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int: delivery_error = str(de) logger.error("Delivery failed for job %s: %s", job["id"], de) + # Treat empty final_response as a soft failure so last_status + # is not "ok" — the agent ran but produced nothing useful. + # (issue #8585) + if success and not final_response: + success = False + error = "Agent completed but produced empty response (model error, timeout, or misconfiguration)" + mark_job_run(job["id"], success, error, delivery_error=delivery_error) executed += 1 diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index d5578961c4..2f4ec93294 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -11,6 +11,7 @@ import asyncio import json import logging import os +import html as _html import re from typing import Dict, List, Optional, Any @@ -1129,13 +1130,10 @@ class TelegramAdapter(BasePlatformAdapter): try: cmd_preview = command[:3800] + "..." if len(command) > 3800 else command - # Escape backticks that would break Markdown v1 inline code parsing - safe_cmd = cmd_preview.replace("`", "'") - safe_desc = description.replace("`", "'").replace("*", "∗") text = ( - f"⚠️ *Command Approval Required*\n\n" - f"`{safe_cmd}`\n\n" - f"Reason: {safe_desc}" + f"⚠️ Command Approval Required\n\n" + f"
{_html.escape(cmd_preview)}
\n\n" + f"Reason: {_html.escape(description)}" ) # Resolve thread context for thread replies @@ -1163,7 +1161,7 @@ class TelegramAdapter(BasePlatformAdapter): kwargs: Dict[str, Any] = { "chat_id": int(chat_id), "text": text, - "parse_mode": ParseMode.MARKDOWN, + "parse_mode": ParseMode.HTML, "reply_markup": keyboard, **self._link_preview_kwargs(), } diff --git a/gateway/run.py b/gateway/run.py index 3d86f1632f..31ae5988ae 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -131,6 +131,12 @@ if _config_path.exists(): for _cfg_key, _env_var in _terminal_env_map.items(): if _cfg_key in _terminal_cfg: _val = _terminal_cfg[_cfg_key] + # Skip cwd placeholder values (".", "auto", "cwd") — the + # gateway resolves these to Path.home() later (line ~255). + # Writing the raw placeholder here would just be noise. + # Only bridge explicit absolute paths from config.yaml. + if _cfg_key == "cwd" and str(_val) in (".", "auto", "cwd"): + continue if isinstance(_val, list): os.environ[_env_var] = json.dumps(_val) else: @@ -225,6 +231,13 @@ try: except Exception: pass +# Warn if user has deprecated MESSAGING_CWD / TERMINAL_CWD in .env +try: + from hermes_cli.config import warn_deprecated_cwd_env_vars + warn_deprecated_cwd_env_vars() +except Exception: + pass + # Gateway runs in quiet mode - suppress debug output and use cwd directly (no temp dirs) os.environ["HERMES_QUIET"] = "1" @@ -232,12 +245,14 @@ os.environ["HERMES_QUIET"] = "1" os.environ["HERMES_EXEC_ASK"] = "1" # Set terminal working directory for messaging platforms. -# If the user set an explicit path in config.yaml (not "." or "auto"), -# respect it. Otherwise use MESSAGING_CWD or default to home directory. +# config.yaml terminal.cwd is the canonical source (bridged to TERMINAL_CWD +# by the config bridge above). When it's unset or a placeholder, default +# to home directory. MESSAGING_CWD is accepted as a backward-compat +# fallback (deprecated — the warning above tells users to migrate). _configured_cwd = os.environ.get("TERMINAL_CWD", "") if not _configured_cwd or _configured_cwd in (".", "auto", "cwd"): - messaging_cwd = os.getenv("MESSAGING_CWD") or str(Path.home()) - os.environ["TERMINAL_CWD"] = messaging_cwd + _fallback = os.getenv("MESSAGING_CWD") or str(Path.home()) + os.environ["TERMINAL_CWD"] = _fallback from gateway.config import ( Platform, @@ -762,69 +777,72 @@ class GatewayRunner: enabled_toolsets=["memory", "skills"], session_id=old_session_id, ) - # Fully silence the flush agent — quiet_mode only suppresses init - # messages; tool call output still leaks to the terminal through - # _safe_print → _print_fn. Set a no-op to prevent that. - tmp_agent._print_fn = lambda *a, **kw: None - - # Build conversation history from transcript - msgs = [ - {"role": m.get("role"), "content": m.get("content")} - for m in history - if m.get("role") in ("user", "assistant") and m.get("content") - ] - - # Read live memory state from disk so the flush agent can see - # what's already saved and avoid overwriting newer entries. - _current_memory = "" try: - from tools.memory_tool import get_memory_dir - _mem_dir = get_memory_dir() - for fname, label in [ - ("MEMORY.md", "MEMORY (your personal notes)"), - ("USER.md", "USER PROFILE (who the user is)"), - ]: - fpath = _mem_dir / fname - if fpath.exists(): - content = fpath.read_text(encoding="utf-8").strip() - if content: - _current_memory += f"\n\n## Current {label}:\n{content}" - except Exception: - pass # Non-fatal — flush still works, just without the guard + # Fully silence the flush agent — quiet_mode only suppresses init + # messages; tool call output still leaks to the terminal through + # _safe_print → _print_fn. Set a no-op to prevent that. + tmp_agent._print_fn = lambda *a, **kw: None - # Give the agent a real turn to think about what to save - flush_prompt = ( - "[System: This session is about to be automatically reset due to " - "inactivity or a scheduled daily reset. The conversation context " - "will be cleared after this turn.\n\n" - "Review the conversation above and:\n" - "1. Save any important facts, preferences, or decisions to memory " - "(user profile or your notes) that would be useful in future sessions.\n" - "2. If you discovered a reusable workflow or solved a non-trivial " - "problem, consider saving it as a skill.\n" - "3. If nothing is worth saving, that's fine — just skip.\n\n" - ) + # Build conversation history from transcript + msgs = [ + {"role": m.get("role"), "content": m.get("content")} + for m in history + if m.get("role") in ("user", "assistant") and m.get("content") + ] - if _current_memory: - flush_prompt += ( - "IMPORTANT — here is the current live state of memory. Other " - "sessions, cron jobs, or the user may have updated it since this " - "conversation ended. Do NOT overwrite or remove entries unless " - "the conversation above reveals something that genuinely " - "supersedes them. Only add new information that is not already " - "captured below." - f"{_current_memory}\n\n" + # Read live memory state from disk so the flush agent can see + # what's already saved and avoid overwriting newer entries. + _current_memory = "" + try: + from tools.memory_tool import get_memory_dir + _mem_dir = get_memory_dir() + for fname, label in [ + ("MEMORY.md", "MEMORY (your personal notes)"), + ("USER.md", "USER PROFILE (who the user is)"), + ]: + fpath = _mem_dir / fname + if fpath.exists(): + content = fpath.read_text(encoding="utf-8").strip() + if content: + _current_memory += f"\n\n## Current {label}:\n{content}" + except Exception: + pass # Non-fatal — flush still works, just without the guard + + # Give the agent a real turn to think about what to save + flush_prompt = ( + "[System: This session is about to be automatically reset due to " + "inactivity or a scheduled daily reset. The conversation context " + "will be cleared after this turn.\n\n" + "Review the conversation above and:\n" + "1. Save any important facts, preferences, or decisions to memory " + "(user profile or your notes) that would be useful in future sessions.\n" + "2. If you discovered a reusable workflow or solved a non-trivial " + "problem, consider saving it as a skill.\n" + "3. If nothing is worth saving, that's fine — just skip.\n\n" ) - flush_prompt += ( - "Do NOT respond to the user. Just use the memory and skill_manage " - "tools if needed, then stop.]" - ) + if _current_memory: + flush_prompt += ( + "IMPORTANT — here is the current live state of memory. Other " + "sessions, cron jobs, or the user may have updated it since this " + "conversation ended. Do NOT overwrite or remove entries unless " + "the conversation above reveals something that genuinely " + "supersedes them. Only add new information that is not already " + "captured below." + f"{_current_memory}\n\n" + ) - tmp_agent.run_conversation( - user_message=flush_prompt, - conversation_history=msgs, - ) + flush_prompt += ( + "Do NOT respond to the user. Just use the memory and skill_manage " + "tools if needed, then stop.]" + ) + + tmp_agent.run_conversation( + user_message=flush_prompt, + conversation_history=msgs, + ) + finally: + self._cleanup_agent_resources(tmp_agent) logger.info("Pre-reset memory flush completed for session %s", old_session_id) except Exception as e: logger.debug("Pre-reset memory flush failed for session %s: %s", old_session_id, e) @@ -1562,19 +1580,25 @@ class GatewayRunner: ) except Exception: pass - try: - if hasattr(agent, "shutdown_memory_provider"): - agent.shutdown_memory_provider() - except Exception: - pass - # Close tool resources (terminal sandboxes, browser daemons, - # background processes, httpx clients) to prevent zombie - # process accumulation. - try: - if hasattr(agent, 'close'): - agent.close() - except Exception: - pass + self._cleanup_agent_resources(agent) + + def _cleanup_agent_resources(self, agent: Any) -> None: + """Best-effort cleanup for temporary or cached agent instances.""" + if agent is None: + return + try: + if hasattr(agent, "shutdown_memory_provider"): + agent.shutdown_memory_provider() + except Exception: + pass + # Close tool resources (terminal sandboxes, browser daemons, + # background processes, httpx clients) to prevent zombie + # process accumulation. + try: + if hasattr(agent, "close"): + agent.close() + except Exception: + pass _STUCK_LOOP_THRESHOLD = 3 # restarts while active before auto-suspend _STUCK_LOOP_FILE = ".restart_failure_counts" @@ -2077,16 +2101,7 @@ class GatewayRunner: if _cached_agent is None: _cached_agent = self._running_agents.get(key) if _cached_agent and _cached_agent is not _AGENT_PENDING_SENTINEL: - try: - if hasattr(_cached_agent, 'shutdown_memory_provider'): - _cached_agent.shutdown_memory_provider() - except Exception: - pass - try: - if hasattr(_cached_agent, 'close'): - _cached_agent.close() - except Exception: - pass + self._cleanup_agent_resources(_cached_agent) # Mark as flushed and persist to disk so the flag # survives gateway restarts. with self.session_store._lock: @@ -3410,7 +3425,7 @@ class GatewayRunner: from agent.context_references import preprocess_context_references_async from agent.model_metadata import get_model_context_length - _msg_cwd = os.environ.get("MESSAGING_CWD", os.path.expanduser("~")) + _msg_cwd = os.environ.get("TERMINAL_CWD", os.path.expanduser("~")) _msg_ctx_len = get_model_context_length( self._model, base_url=self._base_url or "", @@ -3782,51 +3797,54 @@ class GatewayRunner: enabled_toolsets=["memory"], session_id=session_entry.session_id, ) - _hyg_agent._print_fn = lambda *a, **kw: None + try: + _hyg_agent._print_fn = lambda *a, **kw: None - loop = asyncio.get_running_loop() - _compressed, _ = await loop.run_in_executor( - None, - lambda: _hyg_agent._compress_context( - _hyg_msgs, "", - approx_tokens=_approx_tokens, - ), - ) - - # _compress_context ends the old session and creates - # a new session_id. Write compressed messages into - # the NEW session so the old transcript stays intact - # and searchable via session_search. - _hyg_new_sid = _hyg_agent.session_id - if _hyg_new_sid != session_entry.session_id: - session_entry.session_id = _hyg_new_sid - self.session_store._save() - - self.session_store.rewrite_transcript( - session_entry.session_id, _compressed - ) - # Reset stored token count — transcript was rewritten - session_entry.last_prompt_tokens = 0 - history = _compressed - _new_count = len(_compressed) - _new_tokens = estimate_messages_tokens_rough( - _compressed - ) - - logger.info( - "Session hygiene: compressed %s → %s msgs, " - "~%s → ~%s tokens", - _msg_count, _new_count, - f"{_approx_tokens:,}", f"{_new_tokens:,}", - ) - - if _new_tokens >= _warn_token_threshold: - logger.warning( - "Session hygiene: still ~%s tokens after " - "compression", - f"{_new_tokens:,}", + loop = asyncio.get_running_loop() + _compressed, _ = await loop.run_in_executor( + None, + lambda: _hyg_agent._compress_context( + _hyg_msgs, "", + approx_tokens=_approx_tokens, + ), ) + # _compress_context ends the old session and creates + # a new session_id. Write compressed messages into + # the NEW session so the old transcript stays intact + # and searchable via session_search. + _hyg_new_sid = _hyg_agent.session_id + if _hyg_new_sid != session_entry.session_id: + session_entry.session_id = _hyg_new_sid + self.session_store._save() + + self.session_store.rewrite_transcript( + session_entry.session_id, _compressed + ) + # Reset stored token count — transcript was rewritten + session_entry.last_prompt_tokens = 0 + history = _compressed + _new_count = len(_compressed) + _new_tokens = estimate_messages_tokens_rough( + _compressed + ) + + logger.info( + "Session hygiene: compressed %s → %s msgs, " + "~%s → ~%s tokens", + _msg_count, _new_count, + f"{_approx_tokens:,}", f"{_new_tokens:,}", + ) + + if _new_tokens >= _warn_token_threshold: + logger.warning( + "Session hygiene: still ~%s tokens after " + "compression", + f"{_new_tokens:,}", + ) + finally: + self._cleanup_agent_resources(_hyg_agent) + except Exception as e: logger.warning( "Session hygiene auto-compress failed: %s", e @@ -4344,16 +4362,7 @@ class GatewayRunner: _cached = self._agent_cache.get(session_key) _old_agent = _cached[0] if isinstance(_cached, tuple) else _cached if _cached else None if _old_agent is not None: - try: - if hasattr(_old_agent, "shutdown_memory_provider"): - _old_agent.shutdown_memory_provider() - except Exception: - pass - try: - if hasattr(_old_agent, "close"): - _old_agent.close() - except Exception: - pass + self._cleanup_agent_resources(_old_agent) self._evict_cached_agent(session_key) try: @@ -5717,7 +5726,7 @@ class GatewayRunner: max_snapshots=cp_cfg.get("max_snapshots", 50), ) - cwd = os.getenv("MESSAGING_CWD", str(Path.home())) + cwd = os.getenv("TERMINAL_CWD", str(Path.home())) arg = event.get_command_args().strip() if not arg: @@ -5838,11 +5847,13 @@ class GatewayRunner: session_db=self._session_db, fallback_model=self._fallback_model, ) - - return agent.run_conversation( - user_message=prompt, - task_id=task_id, - ) + try: + return agent.run_conversation( + user_message=prompt, + task_id=task_id, + ) + finally: + self._cleanup_agent_resources(agent) result = await self._run_in_executor_with_context(run_sync) @@ -6020,11 +6031,14 @@ class GatewayRunner: skip_context_files=True, persist_session=False, ) - return agent.run_conversation( - user_message=btw_prompt, - conversation_history=history_snapshot, - task_id=task_id, - ) + try: + return agent.run_conversation( + user_message=btw_prompt, + conversation_history=history_snapshot, + task_id=task_id, + ) + finally: + self._cleanup_agent_resources(agent) result = await self._run_in_executor_with_context(run_sync) @@ -6353,42 +6367,45 @@ class GatewayRunner: enabled_toolsets=["memory"], session_id=session_entry.session_id, ) - tmp_agent._print_fn = lambda *a, **kw: None + try: + tmp_agent._print_fn = lambda *a, **kw: None - compressor = tmp_agent.context_compressor - compress_start = compressor.protect_first_n - compress_start = compressor._align_boundary_forward(msgs, compress_start) - compress_end = compressor._find_tail_cut_by_tokens(msgs, compress_start) - if compress_start >= compress_end: - return "Nothing to compress yet (the transcript is still all protected context)." + compressor = tmp_agent.context_compressor + compress_start = compressor.protect_first_n + compress_start = compressor._align_boundary_forward(msgs, compress_start) + compress_end = compressor._find_tail_cut_by_tokens(msgs, compress_start) + if compress_start >= compress_end: + return "Nothing to compress yet (the transcript is still all protected context)." - loop = asyncio.get_running_loop() - compressed, _ = await loop.run_in_executor( - None, - lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens, focus_topic=focus_topic) - ) + loop = asyncio.get_running_loop() + compressed, _ = await loop.run_in_executor( + None, + lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens, focus_topic=focus_topic) + ) - # _compress_context already calls end_session() on the old session - # (preserving its full transcript in SQLite) and creates a new - # session_id for the continuation. Write the compressed messages - # into the NEW session so the original history stays searchable. - new_session_id = tmp_agent.session_id - if new_session_id != session_entry.session_id: - session_entry.session_id = new_session_id - self.session_store._save() + # _compress_context already calls end_session() on the old session + # (preserving its full transcript in SQLite) and creates a new + # session_id for the continuation. Write the compressed messages + # into the NEW session so the original history stays searchable. + new_session_id = tmp_agent.session_id + if new_session_id != session_entry.session_id: + session_entry.session_id = new_session_id + self.session_store._save() - self.session_store.rewrite_transcript(new_session_id, compressed) - # Reset stored token count — transcript changed, old value is stale - self.session_store.update_session( - session_entry.session_key, last_prompt_tokens=0 - ) - new_tokens = estimate_messages_tokens_rough(compressed) - summary = summarize_manual_compression( - msgs, - compressed, - approx_tokens, - new_tokens, - ) + self.session_store.rewrite_transcript(new_session_id, compressed) + # Reset stored token count — transcript changed, old value is stale + self.session_store.update_session( + session_entry.session_key, last_prompt_tokens=0 + ) + new_tokens = estimate_messages_tokens_rough(compressed) + summary = summarize_manual_compression( + msgs, + compressed, + approx_tokens, + new_tokens, + ) + finally: + self._cleanup_agent_resources(tmp_agent) lines = [f"🗜️ {summary['headline']}"] if focus_topic: lines.append(f"Focus: \"{focus_topic}\"") @@ -8896,7 +8913,7 @@ class GatewayRunner: # false positives from MagicMock auto-attribute creation in tests. if getattr(type(_status_adapter), "send_exec_approval", None) is not None: try: - asyncio.run_coroutine_threadsafe( + _approval_result = asyncio.run_coroutine_threadsafe( _status_adapter.send_exec_approval( chat_id=_status_chat_id, command=cmd, @@ -8906,7 +8923,12 @@ class GatewayRunner: ), _loop_for_step, ).result(timeout=15) - return + if _approval_result.success: + return + logger.warning( + "Button-based approval failed (send returned error), falling back to text: %s", + _approval_result.error, + ) except Exception as _e: logger.warning( "Button-based approval failed, falling back to text: %s", _e @@ -9536,6 +9558,7 @@ class GatewayRunner: next_source = source next_message = pending next_message_id = None + next_channel_prompt = None if pending_event is not None: next_source = getattr(pending_event, "source", None) or source next_message = await self._prepare_inbound_message_text( @@ -9546,6 +9569,7 @@ class GatewayRunner: if next_message is None: return result next_message_id = getattr(pending_event, "message_id", None) + next_channel_prompt = getattr(pending_event, "channel_prompt", None) # Restart typing indicator so the user sees activity while # the follow-up turn runs. The outer _process_message_background @@ -9569,7 +9593,7 @@ class GatewayRunner: session_key=session_key, _interrupt_depth=_interrupt_depth + 1, event_message_id=next_message_id, - channel_prompt=pending_event.channel_prompt, + channel_prompt=next_channel_prompt, ) finally: # Stop progress sender, interrupt monitor, and notification task diff --git a/gateway/stream_consumer.py b/gateway/stream_consumer.py index a644547e6a..853b159034 100644 --- a/gateway/stream_consumer.py +++ b/gateway/stream_consumer.py @@ -515,9 +515,17 @@ class GatewayStreamConsumer: self._fallback_final_send = False if not continuation.strip(): # Nothing new to send — the visible partial already matches final text. - self._already_sent = True - self._final_response_sent = True - return + # BUT: if final_text itself has meaningful content (e.g. a timeout + # message after a long tool call), the prefix-based continuation + # calculation may wrongly conclude "already shown" because the + # streamed prefix was from a *previous* segment (before the tool + # boundary). In that case, send the full final_text as-is (#10807). + if final_text.strip() and final_text != self._visible_prefix(): + continuation = final_text + else: + self._already_sent = True + self._final_response_sent = True + return raw_limit = getattr(self.adapter, "MAX_MESSAGE_LENGTH", 4096) safe_limit = max(500, raw_limit - 100) diff --git a/hermes_cli/config.py b/hermes_cli/config.py index a9f55f4c57..33bc325ee3 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -1597,13 +1597,8 @@ OPTIONAL_ENV_VARS = { }, # ── Agent settings ── - "MESSAGING_CWD": { - "description": "Working directory for terminal commands via messaging", - "prompt": "Messaging working directory (default: home)", - "url": None, - "password": False, - "category": "setting", - }, + # NOTE: MESSAGING_CWD was removed here — use terminal.cwd in config.yaml + # instead. The gateway reads TERMINAL_CWD (bridged from terminal.cwd). "SUDO_PASSWORD": { "description": "Sudo password for terminal commands requiring root access; set to an explicit empty string to try empty without prompting", "prompt": "Sudo password", @@ -2082,6 +2077,52 @@ def print_config_warnings(config: Optional[Dict[str, Any]] = None) -> None: sys.stderr.write("\n".join(lines) + "\n\n") +def warn_deprecated_cwd_env_vars(config: Optional[Dict[str, Any]] = None) -> None: + """Warn if MESSAGING_CWD or TERMINAL_CWD is set in .env instead of config.yaml. + + These env vars are deprecated — the canonical setting is terminal.cwd + in config.yaml. Prints a migration hint to stderr. + """ + import os, sys + messaging_cwd = os.environ.get("MESSAGING_CWD") + terminal_cwd_env = os.environ.get("TERMINAL_CWD") + + if config is None: + try: + config = load_config() + except Exception: + return + + terminal_cfg = config.get("terminal", {}) + config_cwd = terminal_cfg.get("cwd", ".") if isinstance(terminal_cfg, dict) else "." + # Only warn if config.yaml doesn't have an explicit path + config_has_explicit_cwd = config_cwd not in (".", "auto", "cwd", "") + + lines: list[str] = [] + if messaging_cwd: + lines.append( + f" \033[33m⚠\033[0m MESSAGING_CWD={messaging_cwd} found in .env — " + f"this is deprecated." + ) + if terminal_cwd_env and not config_has_explicit_cwd: + # TERMINAL_CWD in env but not from config bridge — likely from .env + lines.append( + f" \033[33m⚠\033[0m TERMINAL_CWD={terminal_cwd_env} found in .env — " + f"this is deprecated." + ) + if lines: + hint_path = os.environ.get("HERMES_HOME", "~/.hermes") + lines.insert(0, "\033[33m⚠ Deprecated .env settings detected:\033[0m") + lines.append( + f" \033[2mMove to config.yaml instead: " + f"terminal:\\n cwd: /your/project/path\033[0m" + ) + lines.append( + f" \033[2mThen remove the old entries from {hint_path}/.env\033[0m" + ) + sys.stderr.write("\n".join(lines) + "\n\n") + + def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, Any]: """ Migrate config to latest version, prompting for new required fields. diff --git a/hermes_cli/main.py b/hermes_cli/main.py index a9ad311877..15a20edf14 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -5998,6 +5998,18 @@ Examples: memory_sub.add_parser("setup", help="Interactive provider selection and configuration") memory_sub.add_parser("status", help="Show current memory provider config") memory_sub.add_parser("off", help="Disable external provider (built-in only)") + _reset_parser = memory_sub.add_parser( + "reset", + help="Erase all built-in memory (MEMORY.md and USER.md)", + ) + _reset_parser.add_argument( + "--yes", "-y", action="store_true", + help="Skip confirmation prompt", + ) + _reset_parser.add_argument( + "--target", choices=["all", "memory", "user"], default="all", + help="Which store to reset: 'all' (default), 'memory', or 'user'", + ) def cmd_memory(args): sub = getattr(args, "memory_command", None) @@ -6010,6 +6022,44 @@ Examples: save_config(config) print("\n ✓ Memory provider: built-in only") print(" Saved to config.yaml\n") + elif sub == "reset": + from hermes_constants import get_hermes_home, display_hermes_home + mem_dir = get_hermes_home() / "memories" + target = getattr(args, "target", "all") + files_to_reset = [] + if target in ("all", "memory"): + files_to_reset.append(("MEMORY.md", "agent notes")) + if target in ("all", "user"): + files_to_reset.append(("USER.md", "user profile")) + + # Check what exists + existing = [(f, desc) for f, desc in files_to_reset if (mem_dir / f).exists()] + if not existing: + print(f"\n Nothing to reset — no memory files found in {display_hermes_home()}/memories/\n") + return + + print(f"\n This will permanently erase the following memory files:") + for f, desc in existing: + path = mem_dir / f + size = path.stat().st_size + print(f" ◆ {f} ({desc}) — {size:,} bytes") + + if not getattr(args, "yes", False): + try: + answer = input("\n Type 'yes' to confirm: ").strip().lower() + except (EOFError, KeyboardInterrupt): + print("\n Cancelled.\n") + return + if answer != "yes": + print(" Cancelled.\n") + return + + for f, desc in existing: + (mem_dir / f).unlink() + print(f" ✓ Deleted {f} ({desc})") + + print(f"\n Memory reset complete. New sessions will start with a blank slate.") + print(f" Files were in: {display_hermes_home()}/memories/\n") else: from hermes_cli.memory_setup import memory_command memory_command(args) diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py index 8ab00b2145..fe56506e28 100644 --- a/hermes_cli/model_switch.py +++ b/hermes_cli/model_switch.py @@ -807,6 +807,10 @@ def list_authenticated_providers( # "nous" shares OpenRouter's curated list if not separately defined if "nous" not in curated: curated["nous"] = curated["openrouter"] + # Ollama Cloud uses dynamic discovery (no static curated list) + if "ollama-cloud" not in curated: + from hermes_cli.models import fetch_ollama_cloud_models + curated["ollama-cloud"] = fetch_ollama_cloud_models() # --- 1. Check Hermes-mapped providers --- for hermes_id, mdev_id in PROVIDER_TO_MODELS_DEV.items(): diff --git a/hermes_cli/models.py b/hermes_cli/models.py index c6b2db571b..284cb0a39c 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -1044,7 +1044,7 @@ def detect_provider_for_model( return (resolved_provider, default_models[0]) # Aggregators list other providers' models — never auto-switch TO them - _AGGREGATORS = {"nous", "openrouter"} + _AGGREGATORS = {"nous", "openrouter", "ai-gateway", "copilot", "kilocode"} # If the model belongs to the current provider's catalog, don't suggest switching current_models = _PROVIDER_MODELS.get(current_provider, []) @@ -1286,6 +1286,10 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False) live = _fetch_ai_gateway_models() if live: return live + if normalized == "ollama-cloud": + live = fetch_ollama_cloud_models(force_refresh=force_refresh) + if live: + return live if normalized == "custom": base_url = _get_custom_base_url() if base_url: diff --git a/hermes_cli/skin_engine.py b/hermes_cli/skin_engine.py index 5b406f1f56..4222a966ef 100644 --- a/hermes_cli/skin_engine.py +++ b/hermes_cli/skin_engine.py @@ -708,7 +708,9 @@ def init_skin_from_config(config: dict) -> None: Call this once during CLI init with the loaded config dict. """ - display = config.get("display", {}) + display = config.get("display") or {} + if not isinstance(display, dict): + display = {} skin_name = display.get("skin", "default") if isinstance(skin_name, str) and skin_name.strip(): set_active_skin(skin_name.strip()) diff --git a/landingpage/apple-touch-icon.png b/landingpage/apple-touch-icon.png deleted file mode 100644 index c5da175f8e..0000000000 Binary files a/landingpage/apple-touch-icon.png and /dev/null differ diff --git a/landingpage/favicon-16x16.png b/landingpage/favicon-16x16.png deleted file mode 100644 index 5bc67ef224..0000000000 Binary files a/landingpage/favicon-16x16.png and /dev/null differ diff --git a/landingpage/favicon-32x32.png b/landingpage/favicon-32x32.png deleted file mode 100644 index 8db2977a5b..0000000000 Binary files a/landingpage/favicon-32x32.png and /dev/null differ diff --git a/landingpage/favicon.ico b/landingpage/favicon.ico deleted file mode 100644 index 8586c395f6..0000000000 Binary files a/landingpage/favicon.ico and /dev/null differ diff --git a/landingpage/hermes-agent-banner.png b/landingpage/hermes-agent-banner.png deleted file mode 100644 index 2c4a160ceb..0000000000 Binary files a/landingpage/hermes-agent-banner.png and /dev/null differ diff --git a/landingpage/icon-192.png b/landingpage/icon-192.png deleted file mode 100644 index 126a395793..0000000000 Binary files a/landingpage/icon-192.png and /dev/null differ diff --git a/landingpage/icon-512.png b/landingpage/icon-512.png deleted file mode 100644 index c5b4c63a57..0000000000 Binary files a/landingpage/icon-512.png and /dev/null differ diff --git a/landingpage/index.html b/landingpage/index.html deleted file mode 100644 index e24ed11c48..0000000000 --- a/landingpage/index.html +++ /dev/null @@ -1,665 +0,0 @@ - - - - - - Hermes Agent — An Agent That Grows With You - - - - - - - - - - - - - - - - - - - - - - - -
-
- - - -
-
-
- - Open Source • MIT License -
- - - - -

- An agent that
- grows with you. -

- -

- It's not a coding copilot tethered to an IDE or a chatbot wrapper - around a single API. It's an autonomous agent that - lives on your server, remembers what it learns, and gets more capable - the longer it runs. -

- -
-
-
-
- - - -
-
- -
-
-
- $ - curl -fsSL - https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh - | bash - -
-
-

- Works on Linux, macOS & WSL2 · No prerequisites · Installs - everything automatically -

-
- - -
-
- -
-
-
-

Get started in 60 seconds

-
- -
-
-
1
-
-

Install

-
-
-
- -
- -
-
curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash
-
-

- Installs uv, Python 3.11, clones the repo, sets up everything. - No sudo needed. -

-
-
- -
-
2
-
-

Configure

-
-
- bash - -
-
# Interactive setup wizard
-hermes setup
-
-# Or choose your model
-hermes model
-
-

- Connect to Nous Portal (OAuth), OpenRouter (API key), or your - own endpoint. -

-
-
- -
-
3
-
-

Start chatting

-
-
- bash - -
-
hermes
-
-

- That's it. Full interactive CLI with tools, memory, and skills. -

-
-
- -
-
4
-
-

- Go multi-platform (optional) -

-
-
- bash - -
-
# Interactive gateway setup wizard
-hermes gateway setup
-
-# Start the messaging gateway
-hermes gateway
-
-# Install as a system service
-hermes gateway install
-
-

- Walk through connecting Telegram, Discord, Slack, or WhatsApp. - Runs as a systemd service. -

-
-
- -
-
5
-
-

Keep it up to date

-
-
- bash - -
-
hermes update
-
-

- Pulls the latest changes and reinstalls dependencies. Run - anytime to get new features and fixes. -

-
-
-
- -
-

- Native Windows support is extremely experimental and unsupported. - Please install - WSL2 - and run Hermes Agent from there. -

-
-
-
- - -
-
-
-

See it in action

-
- -
-
-
- - - -
- hermes -
-
-
-
-
- - -
-
-
-

Features

-
- -
-
-
-
- - - -
-

Lives Where You Do

-
-

- Telegram, Discord, Slack, WhatsApp, and CLI from a single gateway - — start on one, pick up on another. -

-
- -
-
-
- - - - -
-

Grows the Longer It Runs

-
-

- Persistent memory and auto-generated skills — it learns your - projects and never forgets how it solved a problem. -

-
- -
-
-
- - - - -
-

Scheduled Automations

-
-

- Natural language cron scheduling for reports, backups, and - briefings — running unattended through the gateway. -

-
- -
-
-
- - - - - - -
-

Delegates & Parallelizes

-
-

- Isolated subagents with their own conversations, terminals, and - Python RPC scripts for zero-context-cost pipelines. -

-
- -
-
-
- - - - -
-

Real Sandboxing

-
-

- Five backends — local, Docker, SSH, Singularity, Modal — with - container hardening and namespace isolation. -

-
- -
-
-
- - - - - -
-

Full Web & Browser Control

-
-

- Web search, browser automation, vision, image generation, - text-to-speech, and multi-model reasoning. -

-
-
- -
- -
- -
-
-
-

Tools

-

- 40+ built-in — web search, terminal, file system, browser - automation, vision, image generation, text-to-speech, code - execution, subagent delegation, memory, task planning, cron - scheduling, multi-model reasoning, and more. -

-
- -
-

Platforms

-

- Telegram, Discord, Slack, WhatsApp, Signal, Email, and CLI — all - from a single gateway. Connect to - Nous Portal, OpenRouter, or any OpenAI-compatible API. -

-
- -
-

Environments

-

- Run locally, in Docker, over SSH, on Modal, Daytona, or - Singularity. Container hardening with read-only root, dropped - capabilities, and namespace isolation. -

-
- -
-

Skills

-

- 40+ bundled skills covering MLOps, GitHub workflows, research, - and more. The agent creates new skills on the fly and shares - them via the open - agentskills.io - format. Install community skills from - ClawHub, - LobeHub, and GitHub. -

-
- -
-

Research

-

- Batch trajectory generation with parallel workers and - checkpointing. Atropos integration for RL training. Export to - ShareGPT for fine-tuning with trajectory compression. -

-
-
-
-
-
- - - - - - diff --git a/landingpage/nous-logo.png b/landingpage/nous-logo.png deleted file mode 100644 index cfea9a6613..0000000000 Binary files a/landingpage/nous-logo.png and /dev/null differ diff --git a/landingpage/script.js b/landingpage/script.js deleted file mode 100644 index 4cd097bdb2..0000000000 --- a/landingpage/script.js +++ /dev/null @@ -1,521 +0,0 @@ -// ========================================================================= -// Hermes Agent Landing Page — Interactions -// ========================================================================= - -// --- Platform install commands --- -const PLATFORMS = { - linux: { - command: - "curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash", - prompt: "$", - note: "Works on Linux, macOS & WSL2 · No prerequisites · Installs everything automatically", - stepNote: - "Installs uv, Python 3.11, clones the repo, sets up everything. No sudo needed.", - }, -}; - -function detectPlatform() { - return "linux"; -} - -function switchPlatform(platform) { - const cfg = PLATFORMS[platform]; - if (!cfg) return; - - // Update hero install widget - const commandEl = document.getElementById("install-command"); - const promptEl = document.getElementById("install-prompt"); - const noteEl = document.getElementById("install-note"); - - if (commandEl) commandEl.textContent = cfg.command; - if (promptEl) promptEl.textContent = cfg.prompt; - if (noteEl) noteEl.textContent = cfg.note; - - // Update active tab in hero - document.querySelectorAll(".install-tab").forEach((tab) => { - tab.classList.toggle("active", tab.dataset.platform === platform); - }); - - // Sync the step section tabs too - switchStepPlatform(platform); -} - -function switchStepPlatform(platform) { - const cfg = PLATFORMS[platform]; - if (!cfg) return; - - const commandEl = document.getElementById("step1-command"); - const copyBtn = document.getElementById("step1-copy"); - const noteEl = document.getElementById("step1-note"); - - if (commandEl) commandEl.textContent = cfg.command; - if (copyBtn) copyBtn.setAttribute("data-text", cfg.command); - if (noteEl) noteEl.textContent = cfg.stepNote; - - // Update active tab in step section - document.querySelectorAll(".code-tab").forEach((tab) => { - tab.classList.toggle("active", tab.dataset.platform === platform); - }); -} - -function toggleMobileNav() { - document.getElementById("nav-mobile").classList.toggle("open"); - document.getElementById("nav-hamburger").classList.toggle("open"); -} - -function toggleSpecs() { - const wrapper = document.getElementById("specs-wrapper"); - const btn = document.getElementById("specs-toggle"); - const label = btn.querySelector(".toggle-label"); - const isOpen = wrapper.classList.contains("open"); - - if (isOpen) { - wrapper.style.maxHeight = wrapper.scrollHeight + "px"; - requestAnimationFrame(() => { - wrapper.style.maxHeight = "0"; - }); - wrapper.classList.remove("open"); - btn.classList.remove("open"); - if (label) label.textContent = "More details"; - } else { - wrapper.classList.add("open"); - wrapper.style.maxHeight = wrapper.scrollHeight + "px"; - btn.classList.add("open"); - if (label) label.textContent = "Less"; - wrapper.addEventListener( - "transitionend", - () => { - if (wrapper.classList.contains("open")) { - wrapper.style.maxHeight = "none"; - } - }, - { once: true } - ); - } -} - -// --- Copy to clipboard --- -function copyInstall() { - const text = document.getElementById("install-command").textContent; - navigator.clipboard.writeText(text).then(() => { - const btn = document.querySelector(".install-widget-body .copy-btn"); - const original = btn.querySelector(".copy-text").textContent; - btn.querySelector(".copy-text").textContent = "Copied!"; - btn.style.color = "var(--primary-light)"; - setTimeout(() => { - btn.querySelector(".copy-text").textContent = original; - btn.style.color = ""; - }, 2000); - }); -} - -function copyText(btn) { - const text = btn.getAttribute("data-text"); - navigator.clipboard.writeText(text).then(() => { - const original = btn.textContent; - btn.textContent = "Copied!"; - btn.style.color = "var(--primary-light)"; - setTimeout(() => { - btn.textContent = original; - btn.style.color = ""; - }, 2000); - }); -} - -// --- Scroll-triggered fade-in --- -function initScrollAnimations() { - const elements = document.querySelectorAll( - ".feature-card, .install-step, " + - ".section-header, .terminal-window", - ); - - elements.forEach((el) => el.classList.add("fade-in")); - - const observer = new IntersectionObserver( - (entries) => { - entries.forEach((entry) => { - if (entry.isIntersecting) { - // Stagger children within grids - const parent = entry.target.parentElement; - if (parent) { - const siblings = parent.querySelectorAll(".fade-in"); - let idx = Array.from(siblings).indexOf(entry.target); - if (idx < 0) idx = 0; - setTimeout(() => { - entry.target.classList.add("visible"); - }, idx * 60); - } else { - entry.target.classList.add("visible"); - } - observer.unobserve(entry.target); - } - }); - }, - { threshold: 0.1, rootMargin: "0px 0px -40px 0px" }, - ); - - elements.forEach((el) => observer.observe(el)); -} - -// --- Terminal Demo --- -const CURSOR = ''; - -const demoSequence = [ - { type: "prompt", text: "❯ " }, - { - type: "type", - text: "Research the latest approaches to GRPO training and write a summary", - delay: 30, - }, - { type: "pause", ms: 600 }, - { - type: "output", - lines: [ - "", - ' web_search "GRPO reinforcement learning 2026" 1.2s', - ], - }, - { type: "pause", ms: 400 }, - { - type: "output", - lines: [ - ' web_extract arxiv.org/abs/2402.03300 3.1s', - ], - }, - { type: "pause", ms: 400 }, - { - type: "output", - lines: [ - ' web_search "GRPO vs PPO ablation results" 0.9s', - ], - }, - { type: "pause", ms: 400 }, - { - type: "output", - lines: [ - ' web_extract huggingface.co/blog/grpo 2.8s', - ], - }, - { type: "pause", ms: 400 }, - { - type: "output", - lines: [ - ' write_file ~/research/grpo-summary.md 0.1s', - ], - }, - { type: "pause", ms: 500 }, - { - type: "output", - lines: [ - "", - 'Done! I\'ve written a summary covering:', - "", - ' GRPO\'s group-relative advantage (no critic model needed)', - ' Comparison with PPO/DPO on reasoning benchmarks', - ' Implementation notes for Axolotl and TRL', - "", - 'Saved to ~/research/grpo-summary.md', - ], - }, - { type: "pause", ms: 2500 }, - - { type: "clear" }, - { type: "prompt", text: "❯ " }, - { - type: "type", - text: "Review the PR at NousResearch/hermes-agent#42 and fix any issues", - delay: 30, - }, - { type: "pause", ms: 600 }, - { - type: "output", - lines: [ - "", - ' delegate_task "review PR #42 changes" 2.1s', - ], - }, - { type: "pause", ms: 500 }, - { - type: "output", - lines: [ - ' git diff main..pr-42 0.4s', - ], - }, - { type: "pause", ms: 400 }, - { - type: "output", - lines: [ - ' patch tools/registry.py 0.1s', - ], - }, - { type: "pause", ms: 400 }, - { - type: "output", - lines: [ - ' python -m pytest tests/ -x 3.2s', - ], - }, - { type: "pause", ms: 400 }, - { - type: "output", - lines: [ - ' git commit -m "fix: handle empty tool schemas" 0.3s', - ], - }, - { type: "pause", ms: 500 }, - { - type: "output", - lines: [ - "", - 'Found 2 issues in the PR and fixed both:', - "", - ' Empty tool schema crash in registry.py — added guard', - ' Missing error handling in delegate_tool.py — added try/except', - "", - 'Tests pass. Committed the fix and pushed to the PR branch.', - 'I also saved a skill for this PR review pattern.', - ], - }, - { type: "pause", ms: 2500 }, - - { type: "clear" }, - { type: "prompt", text: "❯ " }, - { - type: "type", - text: "How did we fix that Docker networking issue?", - delay: 35, - }, - { type: "pause", ms: 500 }, - { - type: "output", - lines: [ - "", - ' session_search "Docker networking" 1.4s', - ], - }, - { type: "pause", ms: 500 }, - { - type: "output", - lines: [ - "", - 'Found it — from a session on February 12th:', - "", - 'The containers couldn\'t reach each other because the compose', - 'file was using the default bridge network. We switched to a', - 'custom network with driver: overlay, added explicit', - 'aliases, and set dns: 8.8.8.8 as a fallback.', - "", - 'The fix was committed in docker-compose.prod.yml.', - ], - }, - { type: "pause", ms: 3000 }, -]; - -class TerminalDemo { - constructor(container) { - this.container = container; - this.running = false; - this.content = ""; - } - - async start() { - if (this.running) return; - this.running = true; - - while (this.running) { - for (const step of demoSequence) { - if (!this.running) return; - await this.execute(step); - } - this.clear(); - await this.sleep(1000); - } - } - - stop() { - this.running = false; - } - - async execute(step) { - switch (step.type) { - case "prompt": - this.append(`${step.text}`); - break; - case "type": - for (const char of step.text) { - if (!this.running) return; - this.append(`${char}`); - await this.sleep(step.delay || 30); - } - break; - case "output": - for (const line of step.lines) { - if (!this.running) return; - this.append("\n" + line); - await this.sleep(50); - } - break; - case "pause": - await this.sleep(step.ms); - break; - case "clear": - this.clear(); - break; - } - } - - append(html) { - this.content += html; - this.render(); - } - - render() { - this.container.innerHTML = this.content + CURSOR; - this.container.scrollTop = this.container.scrollHeight; - } - - clear() { - this.content = ""; - this.container.innerHTML = ""; - } - - sleep(ms) { - return new Promise((resolve) => setTimeout(resolve, ms)); - } -} - -// --- Noise Overlay (ported from hermes-chat NoiseOverlay) --- -function initNoiseOverlay() { - if (window.matchMedia("(prefers-reduced-motion: reduce)").matches) return; - if (typeof THREE === "undefined") return; - - const canvas = document.getElementById("noise-overlay"); - if (!canvas) return; - - const vertexShader = ` - varying vec2 vUv; - void main() { - vUv = uv; - gl_Position = projectionMatrix * modelViewMatrix * vec4(position, 1.0); - } - `; - - const fragmentShader = ` - uniform vec2 uRes; - uniform float uDpr, uSize, uDensity, uOpacity; - uniform vec3 uColor; - varying vec2 vUv; - - float hash(vec2 p) { - vec3 p3 = fract(vec3(p.xyx) * 0.1031); - p3 += dot(p3, p3.yzx + 33.33); - return fract((p3.x + p3.y) * p3.z); - } - - void main() { - float n = hash(floor(vUv * uRes / (uSize * uDpr))); - gl_FragColor = vec4(uColor, step(1.0 - uDensity, n)) * uOpacity; - } - `; - - function hexToVec3(hex) { - const c = hex.replace("#", ""); - return new THREE.Vector3( - parseInt(c.substring(0, 2), 16) / 255, - parseInt(c.substring(2, 4), 16) / 255, - parseInt(c.substring(4, 6), 16) / 255, - ); - } - - const renderer = new THREE.WebGLRenderer({ - alpha: true, - canvas, - premultipliedAlpha: false, - }); - renderer.setClearColor(0x000000, 0); - - const scene = new THREE.Scene(); - const camera = new THREE.OrthographicCamera(-1, 1, 1, -1, 0, 1); - const geo = new THREE.PlaneGeometry(2, 2); - - const mat = new THREE.ShaderMaterial({ - vertexShader, - fragmentShader, - transparent: true, - uniforms: { - uColor: { value: hexToVec3("#8090BB") }, - uDensity: { value: 0.1 }, - uDpr: { value: 1 }, - uOpacity: { value: 0.4 }, - uRes: { value: new THREE.Vector2() }, - uSize: { value: 1.0 }, - }, - }); - - scene.add(new THREE.Mesh(geo, mat)); - - function resize() { - const dpr = window.devicePixelRatio; - const w = window.innerWidth; - const h = window.innerHeight; - renderer.setSize(w, h); - renderer.setPixelRatio(dpr); - mat.uniforms.uRes.value.set(w * dpr, h * dpr); - mat.uniforms.uDpr.value = dpr; - } - - resize(); - window.addEventListener("resize", resize); - - function loop() { - requestAnimationFrame(loop); - renderer.render(scene, camera); - } - loop(); -} - -// --- Initialize --- -document.addEventListener("DOMContentLoaded", () => { - const detectedPlatform = detectPlatform(); - switchPlatform(detectedPlatform); - - initScrollAnimations(); - initNoiseOverlay(); - - const terminalEl = document.getElementById("terminal-demo"); - - if (terminalEl) { - const demo = new TerminalDemo(terminalEl); - - const observer = new IntersectionObserver( - (entries) => { - entries.forEach((entry) => { - if (entry.isIntersecting) { - demo.start(); - } else { - demo.stop(); - } - }); - }, - { threshold: 0.3 }, - ); - - observer.observe(document.querySelector(".terminal-window")); - } - - const nav = document.querySelector(".nav"); - let ticking = false; - window.addEventListener("scroll", () => { - if (!ticking) { - requestAnimationFrame(() => { - if (window.scrollY > 50) { - nav.style.borderBottomColor = "rgba(48, 80, 255, 0.15)"; - } else { - nav.style.borderBottomColor = ""; - } - ticking = false; - }); - ticking = true; - } - }); -}); diff --git a/landingpage/style.css b/landingpage/style.css deleted file mode 100644 index 30334df0d0..0000000000 --- a/landingpage/style.css +++ /dev/null @@ -1,1178 +0,0 @@ -/* ========================================================================= - Hermes Agent Landing Page - Colors: Nous Blue (#3050FF) palette - ========================================================================= */ - -/* --- Reset & Base --- */ -*, *::before, *::after { - margin: 0; - padding: 0; - box-sizing: border-box; -} - -:root { - --primary: #3050FF; - --primary-light: #5070FF; - --primary-dim: #2040CC; - --primary-dark: #1E30AA; - --bg: #0A0E1A; - --bg-card: #12182A; - --bg-card-hover: #1A2240; - --border: rgba(48, 80, 255, 0.1); - --border-hover: rgba(48, 80, 255, 0.22); - --text: #E8ECFF; - --text-dim: #8090BB; - --text-muted: #506090; - --font-sans: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; - --font-mono: 'JetBrains Mono', 'Fira Code', 'Cascadia Code', monospace; - --container: 1080px; - --radius: 12px; - --radius-sm: 8px; - - --ease-in-quad: cubic-bezier(.55, .085, .68, .53); - --ease-in-cubic: cubic-bezier(.550, .055, .675, .19); - --ease-in-quart: cubic-bezier(.895, .03, .685, .22); - --ease-in-quint: cubic-bezier(.755, .05, .855, .06); - --ease-in-expo: cubic-bezier(.95, .05, .795, .035); - --ease-in-circ: cubic-bezier(.6, .04, .98, .335); - - --ease-out-quad: cubic-bezier(.25, .46, .45, .94); - --ease-out-cubic: cubic-bezier(.215, .61, .355, 1); - --ease-out-quart: cubic-bezier(.165, .84, .44, 1); - --ease-out-quint: cubic-bezier(.23, 1, .32, 1); - --ease-out-expo: cubic-bezier(.19, 1, .22, 1); - --ease-out-circ: cubic-bezier(.075, .82, .165, 1); - - --ease-in-out-quad: cubic-bezier(.455, .03, .515, .955); - --ease-in-out-cubic: cubic-bezier(.645, .045, .355, 1); - --ease-in-out-quart: cubic-bezier(.77, 0, .175, 1); - --ease-in-out-quint: cubic-bezier(.86, 0, .07, 1); - --ease-in-out-expo: cubic-bezier(1, 0, 0, 1); - --ease-in-out-circ: cubic-bezier(.785, .135, .15, .86); -} - -html { - scroll-behavior: smooth; - -webkit-font-smoothing: antialiased; - -moz-osx-font-smoothing: grayscale; - overflow-x: hidden; -} - -body { - font-family: var(--font-sans); - background: var(--bg); - color: var(--text); - line-height: 1.6; - overflow-x: hidden; - width: 100%; - max-width: 100vw; - background-image: radial-gradient(rgba(48, 80, 255, 0.04) 1px, transparent 1px); - background-size: 32px 32px; -} - -a { - color: var(--primary); - text-decoration: none; - transition: color 0.2s var(--ease-out-quad); -} -a:hover { - color: var(--primary-light); -} - -strong { - color: #fff; - font-weight: 600; -} - -/* --- Noise Overlay --- */ -#noise-overlay { - position: fixed; - inset: 0; - width: 100%; - height: 100%; - z-index: 50; - pointer-events: none; - mix-blend-mode: soft-light; -} - -/* --- Ambient Glow --- */ -.ambient-glow { - position: fixed; - pointer-events: none; - z-index: 0; - border-radius: 50%; - filter: blur(120px); - opacity: 0.15; -} -.glow-1 { - width: 600px; - height: 600px; - background: var(--primary); - top: -200px; - left: -200px; - opacity: 0.08; -} -.glow-2 { - width: 500px; - height: 500px; - background: var(--primary-dim); - bottom: 20%; - right: -150px; - opacity: 0.06; -} - -/* --- Container --- */ -.container { - max-width: var(--container); - margin: 0 auto; - padding: 0 24px; -} - -/* --- Navigation --- */ -.nav { - position: fixed; - top: 0; - left: 0; - right: 0; - z-index: 100; - background: rgba(7, 7, 13, 0.8); - backdrop-filter: blur(20px); - -webkit-backdrop-filter: blur(20px); - border-bottom: 1px solid var(--border); - transition: border-bottom-color 0.3s var(--ease-out-quad); -} - -.nav-inner { - max-width: var(--container); - margin: 0 auto; - padding: 0 24px; - height: 60px; - display: flex; - align-items: center; - justify-content: space-between; -} - -.nav-logo { - display: flex; - align-items: center; - gap: 10px; - color: var(--text); - font-weight: 600; - font-size: 15px; - transition: color 0.2s var(--ease-out-quad); -} -.nav-logo:hover { color: var(--primary-light); } - -.nav-nous-logo { - width: 22px; - height: 22px; - border-radius: 4px; -} - -.nav-by { - font-weight: 400; - color: var(--text-muted); - font-size: 13px; -} - -.nav-links { - display: flex; - align-items: center; - gap: 28px; -} - -.nav-links a { - color: var(--text-dim); - font-size: 14px; - font-weight: 500; - display: flex; - align-items: center; - gap: 4px; - transition: color 0.2s var(--ease-out-quad); -} -.nav-links a:hover { color: #fff; } - -.external-icon { opacity: 0.4; } - -/* --- Hamburger & Mobile Nav --- */ -.nav-hamburger { - display: none; - background: none; - border: none; - cursor: pointer; - padding: 6px; - width: 34px; - height: 34px; - flex-direction: column; - justify-content: center; - gap: 5px; -} - -.hamburger-bar { - display: block; - width: 20px; - height: 2px; - background: var(--text-dim); - border-radius: 1px; - transition: transform 0.25s var(--ease-out-quint), opacity 0.2s var(--ease-out-quad); - transform-origin: center; -} - -.nav-hamburger.open .hamburger-bar:nth-child(1) { - transform: translateY(7px) rotate(45deg); -} - -.nav-hamburger.open .hamburger-bar:nth-child(2) { - opacity: 0; -} - -.nav-hamburger.open .hamburger-bar:nth-child(3) { - transform: translateY(-7px) rotate(-45deg); -} - -.nav-mobile { - display: none; -} - -.nav-mobile.open { - display: flex; - flex-direction: column; - position: absolute; - top: 60px; - left: 0; - right: 0; - background: rgba(7, 7, 13, 0.95); - backdrop-filter: blur(20px); - -webkit-backdrop-filter: blur(20px); - border-bottom: 1px solid var(--border); - padding: 16px 24px; - gap: 16px; -} - -.nav-mobile a { - color: var(--text-dim); - font-size: 15px; - font-weight: 500; - padding: 4px 0; - transition: color 0.2s var(--ease-out-quad); -} - -.nav-mobile a:hover { - color: #fff; -} - -/* --- Hero --- */ -.hero { - position: relative; - z-index: 1; - min-height: 100vh; - display: flex; - align-items: center; - justify-content: center; - padding: 120px 24px 80px; - text-align: center; -} - -.hero-content { - max-width: 760px; -} - -.hero-badge { - display: inline-flex; - align-items: center; - gap: 8px; - padding: 6px 16px; - background: rgba(48, 80, 255, 0.08); - border: 1px solid rgba(48, 80, 255, 0.18); - border-radius: 100px; - font-size: 13px; - color: var(--text-dim); - margin-bottom: 32px; - font-weight: 450; -} - -.badge-dot { - width: 6px; - height: 6px; - border-radius: 50%; - background: var(--primary); - display: inline-block; - animation: pulse-dot 2s var(--ease-in-out-quad) infinite; -} - -@keyframes pulse-dot { - 0%, 100% { opacity: 1; } - 50% { opacity: 0.3; } -} - -.hero-ascii { - margin-bottom: 28px; - font-family: 'JetBrains Mono', monospace; - font-variant-ligatures: none; - font-size: clamp(4px, 0.95vw, 11px); - line-height: 1.15; - color: var(--primary-light); - text-align: center; - text-shadow: 0 0 20px rgba(48, 80, 255, 0.3); - opacity: 0.85; - transition: opacity 0.3s var(--ease-out-cubic); - overflow-x: auto; - white-space: pre; -} - -.hero-ascii:hover { - opacity: 1; -} - -.hero-title { - font-size: clamp(36px, 6vw, 56px); - font-weight: 700; - line-height: 1.15; - letter-spacing: -0.03em; - margin-bottom: 20px; - color: #fff; -} - -.hero-gradient { - background: linear-gradient(135deg, var(--primary), var(--primary-light), #90B0FF); - -webkit-background-clip: text; - -webkit-text-fill-color: transparent; - background-clip: text; -} - -.hero-subtitle { - font-size: 17px; - line-height: 1.7; - color: var(--text-dim); - max-width: 620px; - margin: 0 auto 36px; -} - -.hero-install { - margin-bottom: 32px; -} - -/* --- Install Widget (hero tabbed installer) --- */ -.install-widget { - max-width: 740px; - margin: 0 auto; - background: var(--bg-card); - border: 1px solid var(--border); - border-radius: var(--radius); - overflow: hidden; - transition: border-color 0.3s var(--ease-out-quad); -} - -.install-widget:hover { - border-color: var(--border-hover); -} - -.install-widget-header { - display: flex; - align-items: center; - gap: 16px; - padding: 10px 16px; - background: rgba(255, 255, 255, 0.02); - border-bottom: 1px solid var(--border); -} - -.install-dots { - display: flex; - gap: 6px; - flex-shrink: 0; -} - -.install-dots .dot { - width: 10px; - height: 10px; - border-radius: 50%; -} - -.install-tabs { - display: flex; - gap: 4px; - flex-wrap: wrap; -} - -.install-tab { - display: inline-flex; - align-items: center; - gap: 6px; - padding: 5px 14px; - border: none; - border-radius: 6px; - font-family: var(--font-sans); - font-size: 12px; - font-weight: 500; - cursor: pointer; - transition: color 0.2s var(--ease-out-quad), background 0.2s var(--ease-out-quad); - background: transparent; - color: var(--text-muted); -} - -.install-tab:hover { - color: var(--text-dim); - background: rgba(255, 255, 255, 0.04); -} - -.install-tab.active { - background: rgba(48, 80, 255, 0.14); - color: var(--primary-light); -} - -.install-tab svg { - flex-shrink: 0; -} - -.install-widget-body { - display: flex; - align-items: center; - gap: 10px; - padding: 14px 16px; - font-family: var(--font-mono); - font-size: 13px; - color: var(--text); - overflow-x: auto; -} - -.install-prompt { - color: var(--primary-light); - font-weight: 600; - flex-shrink: 0; - opacity: 0.7; -} - -.install-widget-body code { - flex: 1; - white-space: nowrap; - overflow: hidden; - text-overflow: ellipsis; - text-align: left; - transition: opacity 0.15s var(--ease-out-quad); -} - -/* --- Code block tabs (install step section) --- */ -.code-tabs { - display: flex; - gap: 2px; -} - -.code-tab { - padding: 3px 10px; - border: none; - border-radius: 4px; - font-family: var(--font-mono); - font-size: 11px; - font-weight: 500; - cursor: pointer; - transition: color 0.2s var(--ease-out-quad), background 0.2s var(--ease-out-quad); - background: transparent; - color: var(--text-muted); -} - -.code-tab:hover { - color: var(--text-dim); - background: rgba(255, 255, 255, 0.04); -} - -.code-tab.active { - background: rgba(48, 80, 255, 0.12); - color: var(--primary-light); -} - -.copy-btn { - flex-shrink: 0; - display: flex; - align-items: center; - gap: 6px; - background: none; - border: none; - color: var(--text-dim); - cursor: pointer; - padding: 4px 8px; - border-radius: 6px; - font-family: var(--font-sans); - font-size: 12px; - transition: color 0.2s var(--ease-out-quad), background 0.2s var(--ease-out-quad); -} -.copy-btn:hover { - color: var(--primary-light); - background: rgba(48, 80, 255, 0.1); -} -.copy-btn:active { - transform: scale(0.95); -} - -.install-note { - font-size: 13px; - color: var(--text-muted); - margin-top: 12px; -} - -.hero-links { - display: flex; - gap: 12px; - justify-content: center; - flex-wrap: wrap; -} - -.btn { - display: inline-flex; - align-items: center; - gap: 8px; - padding: 11px 24px; - border-radius: var(--radius); - font-size: 14px; - font-weight: 550; - transition: background 0.25s var(--ease-out-quint), border-color 0.25s var(--ease-out-quad), color 0.2s var(--ease-out-quad), transform 0.25s var(--ease-out-quint); - border: 1px solid transparent; - will-change: transform; -} - -.btn-primary { - background: rgba(48, 80, 255, 0.12); - color: var(--primary-light); - border-color: rgba(48, 80, 255, 0.25); -} -.btn-primary:hover { - background: rgba(48, 80, 255, 0.22); - border-color: rgba(48, 80, 255, 0.4); - color: #fff; -} - -@media (hover: hover) and (pointer: fine) { - .btn-primary:hover { - transform: translateY(-1px); - } -} -.btn:active { - transform: scale(0.97); -} - -/* --- Sections --- */ -.section { - position: relative; - z-index: 1; - padding: 80px 0; -} - -.section-header { - display: flex; - align-items: center; - justify-content: center; - gap: 12px; - margin-bottom: 48px; -} - -.section-header h2 { - font-size: 28px; - font-weight: 650; - color: #fff; - letter-spacing: -0.02em; -} - -.section-desc { - color: var(--text-dim); - font-size: 16px; - line-height: 1.7; - max-width: 640px; - margin: 0 auto 40px; - text-align: center; -} - -/* --- Features Grid --- */ -.features-grid { - display: grid; - grid-template-columns: repeat(3, 1fr); - gap: 16px; -} - -.feature-card { - background: var(--bg-card); - border: 1px solid var(--border); - border-radius: var(--radius); - padding: 20px; - transition: border-color 0.3s var(--ease-out-quad), background 0.3s var(--ease-out-quad), transform 0.3s var(--ease-out-quint); - will-change: transform; -} - -.feature-card:hover { - border-color: var(--border-hover); - background: var(--bg-card-hover); -} - -@media (hover: hover) and (pointer: fine) { - .feature-card:hover { - transform: translateY(-2px); - } -} - -.feature-header { - display: flex; - align-items: center; - gap: 10px; - margin-bottom: 10px; -} - -.feature-icon { - color: var(--primary-light); - opacity: 0.85; - flex-shrink: 0; - display: flex; - line-height: 0; -} - -.feature-card h3 { - font-size: 15px; - font-weight: 600; - color: #fff; - letter-spacing: -0.01em; -} - -.feature-card p { - font-size: 14px; - color: var(--text-dim); - line-height: 1.65; -} - -/* --- Terminal Demo --- */ -.section-demo { - padding-bottom: 60px; - border-top: 1px solid var(--border); - border-bottom: 1px solid var(--border); -} - -.terminal-window { - background: #0c0c14; - border: 1px solid var(--border); - border-radius: var(--radius); - overflow: hidden; - max-width: 800px; - margin: 0 auto; -} - -.terminal-header { - display: flex; - align-items: center; - padding: 12px 16px; - background: rgba(255, 255, 255, 0.02); - border-bottom: 1px solid var(--border); - gap: 12px; -} - -.terminal-dots { - display: flex; - gap: 6px; -} - -.dot { - width: 10px; - height: 10px; - border-radius: 50%; -} -.dot-red { background: #ff5f57; } -.dot-yellow { background: #febc2e; } -.dot-green { background: #28c840; } - -.terminal-title { - font-family: var(--font-mono); - font-size: 12px; - color: var(--text-muted); -} - -.terminal-body { - padding: 20px 24px; - height: 340px; - font-family: var(--font-mono); - font-size: 13px; - line-height: 1.7; - white-space: pre-wrap; - overflow-y: auto; - overflow-x: hidden; -} - -.terminal-cursor { - animation: blink 1s step-end infinite; - color: var(--primary-light); - opacity: 0.8; -} - -@keyframes blink { - 0%, 100% { opacity: 0.8; } - 50% { opacity: 0; } -} - -/* Terminal demo colors */ -.t-prompt { color: var(--primary-light); } -.t-cmd { color: #fff; } -.t-dim { color: var(--text-muted); } -.t-text { color: var(--text-dim); } -.t-green { color: #4ade80; } -.t-blue { color: #60a5fa; } -.t-accent { color: var(--primary-light); } -.t-highlight { color: #90B0FF; } -.t-tool { color: var(--text-muted); } - -/* --- Specs Toggle --- */ -.features-more { - text-align: center; - margin-top: 32px; -} - -.more-toggle { - background: none; - border: 1px solid var(--border); - color: var(--text-dim); - font-size: 14px; - font-family: inherit; - padding: 8px 20px; - border-radius: 6px; - cursor: pointer; - display: inline-flex; - align-items: center; - gap: 6px; - transition: color 0.2s var(--ease-out-quad), border-color 0.2s var(--ease-out-quad); -} - -.more-toggle:hover { - color: var(--primary-light); - border-color: var(--primary-light); -} -.more-toggle:active { - transform: scale(0.97); -} - -.more-chevron { - transition: transform 0.3s var(--ease-in-out-cubic); -} - -.more-toggle.open .more-chevron { - transform: rotate(180deg); -} - -.specs-wrapper { - max-height: 0; - overflow: hidden; - transition: max-height 0.4s var(--ease-out-quart), opacity 0.3s var(--ease-out-quad); - opacity: 0; -} - -.specs-wrapper.open { - opacity: 1; -} - -/* --- Specs --- */ -.section-specs { -} - -.specs-list { - max-width: 720px; - margin: 0 auto; - padding-top: 24px; -} - -.spec-row { - display: grid; - grid-template-columns: 120px 1fr; - gap: 24px; - padding: 24px 0; - border-bottom: 1px solid var(--border); -} - -.spec-row:last-child { - border-bottom: none; -} - -.spec-label { - font-size: 14px; - font-weight: 600; - color: var(--primary-light); - padding-top: 2px; -} - -.spec-value { - font-size: 15px; - color: var(--text-dim); - line-height: 1.7; -} - -.spec-value a { - color: var(--text); - border-bottom: 1px solid var(--border-hover); - transition: border-color 0.2s var(--ease-out-quad), color 0.2s var(--ease-out-quad); -} - -.spec-value a:hover { - color: var(--primary-light); - border-color: var(--primary-light); -} - -/* --- Install Section --- */ -.section-install { - border-top: 1px solid var(--border); -} - -.install-steps { - display: grid; - gap: 28px; - max-width: 640px; - margin: 0 auto; -} - -.install-step { - display: flex; - gap: 20px; -} - -.step-number { - flex-shrink: 0; - width: 32px; - height: 32px; - display: flex; - align-items: center; - justify-content: center; - background: rgba(48, 80, 255, 0.1); - border: 1px solid rgba(48, 80, 255, 0.2); - border-radius: 50%; - font-size: 14px; - font-weight: 600; - color: var(--primary-light); - margin-top: 2px; -} - -.step-content { - flex: 1; - min-width: 0; -} - -.step-content h4 { - font-size: 16px; - font-weight: 600; - color: #fff; - margin-bottom: 10px; -} - -.step-optional { - font-size: 12px; - font-weight: 400; - color: var(--text-muted); -} - -.step-note { - font-size: 13px; - color: var(--text-muted); - margin-top: 8px; -} - -.code-block { - background: #0c0c14; - border: 1px solid var(--border); - border-radius: var(--radius-sm); - overflow: hidden; -} - -.code-block-sm { - max-width: 640px; -} - -.code-header { - display: flex; - justify-content: space-between; - align-items: center; - padding: 8px 14px; - background: rgba(255, 255, 255, 0.02); - border-bottom: 1px solid var(--border); - font-family: var(--font-mono); - font-size: 11px; - color: var(--text-muted); -} - -.code-block pre { - padding: 14px 16px; - font-family: var(--font-mono); - font-size: 13px; - line-height: 1.6; - color: var(--text); - overflow-x: auto; - white-space: pre-wrap; - word-break: break-all; -} - -.code-comment { - color: var(--text-muted); -} - -.install-windows { - margin-top: 48px; - padding-top: 32px; - border-top: 1px solid var(--border); - max-width: 640px; - margin-left: auto; - margin-right: auto; -} - -.install-windows p { - font-size: 14px; - color: var(--text-dim); - margin-bottom: 12px; -} - -/* --- Footer --- */ -.footer { - position: relative; - z-index: 1; - padding: 40px 0 32px; - border-top: 1px solid var(--border); -} - -.footer-copy { - text-align: center; - font-size: 13px; - color: var(--text-muted); -} - -.footer-copy a { - color: var(--text-dim); - transition: color 0.2s var(--ease-out-quad); -} - -.footer-copy a:hover { - color: var(--primary-light); -} - -/* --- Scroll Animations --- */ -.fade-in { - opacity: 0; - transform: translateY(20px); - transition: opacity 0.6s var(--ease-out-quart), transform 0.6s var(--ease-out-quart); - will-change: transform, opacity; -} - -.fade-in.visible { - opacity: 1; - transform: translateY(0); -} - -/* --- Responsive --- */ - -/* Clamp ambient glows so they can't cause horizontal scroll */ -@media (max-width: 900px) { - .ambient-glow { display: none; } - - .features-grid { - grid-template-columns: repeat(2, 1fr); - } - -} - -@media (max-width: 640px) { - /* --- Global mobile --- */ - .container { - padding: 0 16px; - } - - .section { - padding: 50px 0; - } - - .section-header { - margin-bottom: 32px; - } - - .section-header h2 { - font-size: 20px; - } - - .section-desc { - font-size: 14px; - } - - /* --- Nav --- */ - .nav-inner { - padding: 0 16px; - } - - .nav-links { - display: none; - } - - .nav-hamburger { - display: flex; - } - - /* --- Hero --- */ - .hero { - padding: 90px 16px 50px; - min-height: auto; - } - - .hero-content { - max-width: 100%; - } - - .hero-badge { - font-size: 11px; - padding: 5px 12px; - margin-bottom: 24px; - } - - .hero-ascii { - font-size: 3.5px; - } - - .hero-title { - font-size: 26px; - margin-bottom: 14px; - } - - .hero-subtitle { - font-size: 14px; - line-height: 1.6; - margin: 0 auto 28px; - } - - .install-widget-body { - font-size: 10px; - padding: 10px 12px; - } - - .install-widget-body code { - overflow: hidden; - text-overflow: ellipsis; - display: block; - } - - .install-widget-header { - padding: 8px 12px; - gap: 10px; - } - - .install-tabs { - gap: 2px; - } - - .install-tab { - padding: 4px 10px; - font-size: 11px; - } - - .install-tab svg { - display: none; - } - - .copy-btn { - padding: 3px 6px; - } - - .copy-btn .copy-text { display: none; } - - .install-note { - font-size: 11px; - } - - .hero-links { - flex-direction: column; - align-items: stretch; - } - - .hero-links .btn { - justify-content: center; - } - - /* --- Grids → single column --- */ - .features-grid { - grid-template-columns: 1fr; - } - - .spec-row { - grid-template-columns: 1fr; - gap: 6px; - padding: 18px 0; - } - - .feature-card { - padding: 16px 18px; - } - - .feature-card p { - font-size: 13px; - line-height: 1.5; - } - - /* --- Terminal demo --- */ - .terminal-body { - font-size: 11px; - padding: 14px; - height: 260px; - } - - /* --- Install steps --- */ - .install-steps { - max-width: 100%; - } - - .install-step { - gap: 14px; - } - - .step-number { - width: 28px; - height: 28px; - font-size: 13px; - } - - .code-block pre { - font-size: 11px; - word-break: break-all; - } - - .install-windows { - max-width: 100%; - } - - /* --- Footer --- */ - .footer { - padding: 32px 0 24px; - } - -} - -/* --- Reduced Motion --- */ -@media (prefers-reduced-motion: reduce) { - *, *::before, *::after { - animation-duration: 0.01ms !important; - animation-iteration-count: 1 !important; - transition-duration: 0.01ms !important; - } - - .fade-in { - opacity: 1; - transform: none; - } - - .hero-ascii { - opacity: 0.85; - } -} - -/* --- Selection --- */ -::selection { - background: rgba(48, 80, 255, 0.25); - color: #fff; -} - -/* --- Scrollbar --- */ -::-webkit-scrollbar { - width: 6px; - height: 6px; -} -::-webkit-scrollbar-track { - background: var(--bg); -} -::-webkit-scrollbar-thumb { - background: var(--border-hover); - border-radius: 3px; -} -::-webkit-scrollbar-thumb:hover { - background: var(--primary-dim); -} diff --git a/run_agent.py b/run_agent.py index a880bc4db4..a70b9c3c75 100644 --- a/run_agent.py +++ b/run_agent.py @@ -540,13 +540,6 @@ class AIAgent: for AI models that support function calling. """ - # ── Class-level context pressure dedup (survives across instances) ── - # The gateway creates a new AIAgent per message, so instance-level flags - # reset every time. This dict tracks {session_id: (warn_level, timestamp)} - # to suppress duplicate warnings within a cooldown window. - _context_pressure_last_warned: dict = {} - _CONTEXT_PRESSURE_COOLDOWN = 300 # seconds between re-warning same session - @property def base_url(self) -> str: return self._base_url @@ -826,12 +819,6 @@ class AIAgent: self._budget_exhausted_injected = False self._budget_grace_call = False - # Context pressure warnings: notify the USER (not the LLM) as context - # fills up. Purely informational — displayed in CLI output and sent via - # status_callback for gateway platforms. Does NOT inject into messages. - # Tiered: fires at 85% and again at 95% of compaction threshold. - self._context_pressure_warned_at = 0.0 # highest tier already shown - # Activity tracking — updated on each API call, tool execution, and # stream chunk. Used by the gateway timeout handler to report what the # agent was doing when it was killed, and by the "still working" @@ -4353,6 +4340,15 @@ class AIAgent: def _create_openai_client(self, client_kwargs: dict, *, reason: str, shared: bool) -> Any: from agent.auxiliary_client import _validate_base_url, _validate_proxy_env_urls + # Treat client_kwargs as read-only. Callers pass self._client_kwargs (or shallow + # copies of it) in; any in-place mutation leaks back into the stored dict and is + # reused on subsequent requests. #10933 hit this by injecting an httpx.Client + # transport that was torn down after the first request, so the next request + # wrapped a closed transport and raised "Cannot send a request, as the client + # has been closed" on every retry. The revert resolved that specific path; this + # copy locks the contract so future transport/keepalive work can't reintroduce + # the same class of bug. + client_kwargs = dict(client_kwargs) _validate_proxy_env_urls() _validate_base_url(client_kwargs.get("base_url")) if self.provider == "copilot-acp" or str(client_kwargs.get("base_url", "")).startswith("acp://copilot"): @@ -7221,20 +7217,6 @@ class AIAgent: self.context_compressor.last_prompt_tokens = _compressed_est self.context_compressor.last_completion_tokens = 0 - # Only reset the pressure warning if compression actually brought - # us below the warning level (85% of threshold). When compression - # can't reduce enough (e.g. threshold is very low, or system prompt - # alone exceeds the warning level), keep the tier set to prevent - # spamming the user with repeated warnings every loop iteration. - if self.context_compressor.threshold_tokens > 0: - _post_progress = _compressed_est / self.context_compressor.threshold_tokens - if _post_progress < 0.85: - self._context_pressure_warned_at = 0.0 - # Clear class-level dedup for this session so a fresh - # warning cycle can start if context grows again. - _sid = self.session_id or "default" - AIAgent._context_pressure_last_warned.pop(_sid, None) - # Clear the file-read dedup cache. After compression the original # read content is summarised away — if the model re-reads the same # file it needs the full content, not a "file unchanged" stub. @@ -8034,45 +8016,6 @@ class AIAgent: - def _emit_context_pressure(self, compaction_progress: float, compressor) -> None: - """Notify the user that context is approaching the compaction threshold. - - Args: - compaction_progress: How close to compaction (0.0–1.0, where 1.0 = fires). - compressor: The ContextCompressor instance (for threshold/context info). - - Purely user-facing — does NOT modify the message stream. - For CLI: prints a formatted line with a progress bar. - For gateway: fires status_callback so the platform can send a chat message. - """ - from agent.display import format_context_pressure, format_context_pressure_gateway - - threshold_pct = compressor.threshold_tokens / compressor.context_length if compressor.context_length else 0.5 - - # CLI output — always shown (these are user-facing status notifications, - # not verbose debug output, so they bypass quiet_mode). - # Gateway users also get the callback below. - if self.platform in (None, "cli"): - line = format_context_pressure( - compaction_progress=compaction_progress, - threshold_tokens=compressor.threshold_tokens, - threshold_percent=threshold_pct, - compression_enabled=self.compression_enabled, - ) - self._safe_print(line) - - # Gateway / external consumers - if self.status_callback: - try: - msg = format_context_pressure_gateway( - compaction_progress=compaction_progress, - threshold_percent=threshold_pct, - compression_enabled=self.compression_enabled, - ) - self.status_callback("context_pressure", msg) - except Exception: - logger.debug("status_callback error in context pressure", exc_info=True) - def _handle_max_iterations(self, messages: list, api_call_count: int) -> str: """Request a summary when max iterations are reached. Returns the final response text.""" print(f"⚠️ Reached maximum iterations ({self.max_iterations}). Requesting summary...") @@ -10800,38 +10743,6 @@ class AIAgent: else: _real_tokens = estimate_messages_tokens_rough(messages) - # ── Context pressure warnings (user-facing only) ────────── - # Notify the user (NOT the LLM) as context approaches the - # compaction threshold. Thresholds are relative to where - # compaction fires, not the raw context window. - # Does not inject into messages — just prints to CLI output - # and fires status_callback for gateway platforms. - # Tiered: 85% (orange) and 95% (red/critical). - if _compressor.threshold_tokens > 0: - _compaction_progress = _real_tokens / _compressor.threshold_tokens - # Determine the warning tier for this progress level - _warn_tier = 0.0 - if _compaction_progress >= 0.95: - _warn_tier = 0.95 - elif _compaction_progress >= 0.85: - _warn_tier = 0.85 - if _warn_tier > self._context_pressure_warned_at: - # Class-level dedup: check if this session was already - # warned at this tier within the cooldown window. - _sid = self.session_id or "default" - _last = AIAgent._context_pressure_last_warned.get(_sid) - _now = time.time() - if _last is None or _last[0] < _warn_tier or (_now - _last[1]) >= self._CONTEXT_PRESSURE_COOLDOWN: - self._context_pressure_warned_at = _warn_tier - AIAgent._context_pressure_last_warned[_sid] = (_warn_tier, _now) - self._emit_context_pressure(_compaction_progress, _compressor) - # Evict stale entries (older than 2x cooldown) - _cutoff = _now - self._CONTEXT_PRESSURE_COOLDOWN * 2 - AIAgent._context_pressure_last_warned = { - k: v for k, v in AIAgent._context_pressure_last_warned.items() - if v[1] > _cutoff - } - if self.compression_enabled and _compressor.should_compress(_real_tokens): self._safe_print(" ⟳ compacting context…") messages, active_system_prompt = self._compress_context( diff --git a/scripts/release.py b/scripts/release.py index 4d3a7ac3c8..30b72a9c7a 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -180,6 +180,18 @@ AUTHOR_MAP = { "lisicheng168@gmail.com": "lesterli", "mingjwan@microsoft.com": "MagicRay1217", "orangeko@gmail.com": "GenKoKo", + "82095453+iacker@users.noreply.github.com": "iacker", + "sontianye@users.noreply.github.com": "sontianye", + "jackjin1997@users.noreply.github.com": "jackjin1997", + "danieldoderlein@users.noreply.github.com": "danieldoderlein", + "lrawnsley@users.noreply.github.com": "lrawnsley", + "taeuk178@users.noreply.github.com": "taeuk178", + "ogzerber@users.noreply.github.com": "ogzerber", + "cola-runner@users.noreply.github.com": "cola-runner", + "ygd58@users.noreply.github.com": "ygd58", + "vominh1919@users.noreply.github.com": "vominh1919", + "LeonSGP43@users.noreply.github.com": "LeonSGP43", + "Lubrsy706@users.noreply.github.com": "Lubrsy706", "niyant@spicefi.xyz": "spniyant", "olafthiele@gmail.com": "olafthiele", "oncuevtv@gmail.com": "sprmn24", diff --git a/tests/agent/test_vision_resolved_args.py b/tests/agent/test_vision_resolved_args.py new file mode 100644 index 0000000000..aace435784 --- /dev/null +++ b/tests/agent/test_vision_resolved_args.py @@ -0,0 +1,40 @@ +"""Test that call_llm vision path passes resolved provider args, not raw ones.""" + +from unittest.mock import patch, MagicMock + + +def test_vision_call_uses_resolved_provider_args(): + """Resolved provider/model/key/url from config must reach resolve_vision_provider_client.""" + from agent.auxiliary_client import call_llm + + fake_client = MagicMock() + fake_client.chat.completions.create.return_value = MagicMock( + choices=[MagicMock(message=MagicMock(content="description"))], + usage=MagicMock(prompt_tokens=10, completion_tokens=5), + ) + + with ( + patch( + "agent.auxiliary_client._resolve_task_provider_model", + return_value=("my-resolved-provider", "my-resolved-model", "http://resolved", "resolved-key", "chat_completions"), + ), + patch( + "agent.auxiliary_client.resolve_vision_provider_client", + return_value=("my-resolved-provider", fake_client, "my-resolved-model"), + ) as mock_vision, + ): + call_llm( + "vision", + provider="raw-provider", + model="raw-model", + base_url="http://raw", + api_key="raw-key", + messages=[{"role": "user", "content": "describe this"}], + ) + + # The resolved values must be passed, not the raw call_llm arguments + call_args = mock_vision.call_args + assert call_args.kwargs["provider"] == "my-resolved-provider" + assert call_args.kwargs["model"] == "my-resolved-model" + assert call_args.kwargs["base_url"] == "http://resolved" + assert call_args.kwargs["api_key"] == "resolved-key" diff --git a/tests/cli/test_cli_provider_resolution.py b/tests/cli/test_cli_provider_resolution.py index 9c5bf0cca4..624e166a87 100644 --- a/tests/cli/test_cli_provider_resolution.py +++ b/tests/cli/test_cli_provider_resolution.py @@ -578,7 +578,7 @@ def test_model_flow_custom_saves_verified_v1_base_url(monkeypatch, capsys): # After the probe detects a single model ("llm"), the flow asks # "Use this model? [Y/n]:" — confirm with Enter, then context length, # then display name. - answers = iter(["http://localhost:8000", "local-key", "", "", ""]) + answers = iter(["http://localhost:8000", "local-key", "", "", "", ""]) monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) monkeypatch.setattr("getpass.getpass", lambda _prompt="": next(answers)) diff --git a/tests/cli/test_cwd_env_respect.py b/tests/cli/test_cwd_env_respect.py new file mode 100644 index 0000000000..e9f3341d2a --- /dev/null +++ b/tests/cli/test_cwd_env_respect.py @@ -0,0 +1,107 @@ +"""Tests that load_cli_config() guards against lazy-import TERMINAL_CWD clobbering. + +When the gateway resolves TERMINAL_CWD at startup and cli.py is later +imported lazily (via delegate_tool → CLI_CONFIG), load_cli_config() must +not overwrite the already-resolved value with os.getcwd(). + +config.yaml terminal.cwd is the canonical source of truth. +.env TERMINAL_CWD and MESSAGING_CWD are deprecated. +See issue #10817. +""" + +import os +import pytest + + +# The sentinel values that mean "resolve at runtime" +_CWD_PLACEHOLDERS = (".", "auto", "cwd") + + +def _resolve_terminal_cwd(terminal_config: dict, defaults: dict, env: dict): + """Simulate the CWD resolution logic from load_cli_config(). + + This mirrors the code in cli.py that checks for a pre-resolved + TERMINAL_CWD before falling back to os.getcwd(). + """ + if terminal_config.get("cwd") in _CWD_PLACEHOLDERS: + _existing_cwd = env.get("TERMINAL_CWD", "") + if _existing_cwd and _existing_cwd not in _CWD_PLACEHOLDERS and os.path.isabs(_existing_cwd): + terminal_config["cwd"] = _existing_cwd + defaults["terminal"]["cwd"] = _existing_cwd + else: + effective_backend = terminal_config.get("env_type", "local") + if effective_backend == "local": + terminal_config["cwd"] = "/fake/getcwd" # stand-in for os.getcwd() + defaults["terminal"]["cwd"] = terminal_config["cwd"] + else: + terminal_config.pop("cwd", None) + + # Simulate the bridging loop: write terminal_config["cwd"] to env + _file_has_terminal = defaults.get("_file_has_terminal", False) + if "cwd" in terminal_config: + if _file_has_terminal or "TERMINAL_CWD" not in env: + env["TERMINAL_CWD"] = str(terminal_config["cwd"]) + + return env.get("TERMINAL_CWD", "") + + +class TestLazyImportGuard: + """TERMINAL_CWD resolved by gateway must survive a lazy cli.py import.""" + + def test_gateway_resolved_cwd_survives(self): + """Gateway set TERMINAL_CWD → lazy cli import must not clobber.""" + env = {"TERMINAL_CWD": "/home/user/workspace"} + terminal_config = {"cwd": ".", "env_type": "local"} + defaults = {"terminal": {"cwd": "."}, "_file_has_terminal": False} + + result = _resolve_terminal_cwd(terminal_config, defaults, env) + assert result == "/home/user/workspace" + + def test_gateway_resolved_cwd_survives_with_file_terminal(self): + """Even when config.yaml has a terminal: section, resolved CWD survives.""" + env = {"TERMINAL_CWD": "/home/user/workspace"} + terminal_config = {"cwd": ".", "env_type": "local"} + defaults = {"terminal": {"cwd": "."}, "_file_has_terminal": True} + + result = _resolve_terminal_cwd(terminal_config, defaults, env) + assert result == "/home/user/workspace" + + +class TestConfigCwdResolution: + """config.yaml terminal.cwd is the canonical source of truth.""" + + def test_explicit_config_cwd_wins(self): + """terminal.cwd: /explicit/path always wins.""" + env = {"TERMINAL_CWD": "/old/gateway/value"} + terminal_config = {"cwd": "/explicit/path"} + defaults = {"terminal": {"cwd": "/explicit/path"}, "_file_has_terminal": True} + + result = _resolve_terminal_cwd(terminal_config, defaults, env) + assert result == "/explicit/path" + + def test_dot_cwd_resolves_to_getcwd_when_no_prior(self): + """With no pre-set TERMINAL_CWD, "." resolves to os.getcwd().""" + env = {} + terminal_config = {"cwd": "."} + defaults = {"terminal": {"cwd": "."}, "_file_has_terminal": False} + + result = _resolve_terminal_cwd(terminal_config, defaults, env) + assert result == "/fake/getcwd" + + def test_remote_backend_pops_cwd(self): + """Remote backend + placeholder cwd → popped for backend default.""" + env = {} + terminal_config = {"cwd": ".", "env_type": "docker"} + defaults = {"terminal": {"cwd": "."}, "_file_has_terminal": False} + + result = _resolve_terminal_cwd(terminal_config, defaults, env) + assert result == "" # cwd popped, no env var set + + def test_remote_backend_with_prior_cwd_preserves(self): + """Remote backend + pre-resolved TERMINAL_CWD → adopted.""" + env = {"TERMINAL_CWD": "/project"} + terminal_config = {"cwd": ".", "env_type": "docker"} + defaults = {"terminal": {"cwd": "."}, "_file_has_terminal": False} + + result = _resolve_terminal_cwd(terminal_config, defaults, env) + assert result == "/project" diff --git a/tests/cli/test_surrogate_sanitization.py b/tests/cli/test_surrogate_sanitization.py index defad587ee..43af7fe16c 100644 --- a/tests/cli/test_surrogate_sanitization.py +++ b/tests/cli/test_surrogate_sanitization.py @@ -138,7 +138,7 @@ class TestRunConversationSurrogateSanitization: mock_stream.return_value = mock_response mock_api.return_value = mock_response - agent = AIAgent(model="test/model", quiet_mode=True, skip_memory=True, skip_context_files=True) + agent = AIAgent(model="test/model", api_key="test-key", base_url="http://localhost:1234/v1", quiet_mode=True, skip_memory=True, skip_context_files=True) agent.client = MagicMock() # Pass a message with surrogates diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py index a1cc2e1277..160b55efc6 100644 --- a/tests/cron/test_scheduler.py +++ b/tests/cron/test_scheduler.py @@ -675,7 +675,7 @@ class TestRunJobSessionPersistence: def test_run_job_empty_response_returns_empty_not_placeholder(self, tmp_path): """Empty final_response should stay empty for delivery logic (issue #2234). - + The placeholder '(No response generated)' should only appear in the output log, not in the returned final_response that's used for delivery. """ @@ -693,7 +693,7 @@ class TestRunJobSessionPersistence: patch( "hermes_cli.runtime_provider.resolve_runtime_provider", return_value={ - "api_key": "test-key", + "api_key": "***", "base_url": "https://example.invalid/v1", "provider": "openrouter", "api_mode": "chat_completions", @@ -714,6 +714,43 @@ class TestRunJobSessionPersistence: # But the output log should show the placeholder assert "(No response generated)" in output + def test_tick_marks_empty_response_as_error(self, tmp_path): + """When run_job returns success=True but final_response is empty, + tick() should mark the job as error so last_status != 'ok'. + (issue #8585) + """ + from cron.scheduler import tick + from cron.jobs import load_jobs, save_jobs + + job = { + "id": "empty-job", + "name": "empty-test", + "prompt": "do something", + "schedule": "every 1h", + "enabled": True, + "next_run_at": "2020-01-01T00:00:00", + "deliver": "local", + "last_status": None, + } + + fake_db = MagicMock() + + with patch("cron.scheduler._hermes_home", tmp_path), \ + patch("cron.scheduler.get_due_jobs", return_value=[job]), \ + patch("cron.scheduler.advance_next_run"), \ + patch("cron.scheduler.mark_job_run") as mock_mark, \ + patch("cron.scheduler.save_job_output", return_value="/tmp/out.md"), \ + patch("cron.scheduler._resolve_origin", return_value=None), \ + patch("cron.scheduler.run_job", return_value=(True, "output", "", None)): + tick(verbose=False) + + # Should be called with success=False because final_response is empty + mock_mark.assert_called_once() + call_args = mock_mark.call_args + assert call_args[0][0] == "empty-job" + assert call_args[0][1] is False # success should be False + assert "empty" in call_args[0][2].lower() # error should mention empty + def test_run_job_sets_auto_delivery_env_from_dotenv_home_channel(self, tmp_path, monkeypatch): job = { "id": "test-job", diff --git a/tests/gateway/conftest.py b/tests/gateway/conftest.py index 5fd8d86fee..d2f55ff9f6 100644 --- a/tests/gateway/conftest.py +++ b/tests/gateway/conftest.py @@ -62,5 +62,86 @@ def _ensure_telegram_mock() -> None: sys.modules["telegram.error"] = mod.error +def _ensure_discord_mock() -> None: + """Install a comprehensive discord mock in sys.modules. + + Idempotent — skips when the real library is already imported. + Uses ``sys.modules[name] = mod`` (overwrite) instead of + ``setdefault`` so it wins even if a partial/broken import already + cached the module. + + This mock is comprehensive — it includes **all** attributes needed by + every gateway discord test file. Individual test files should call + this function (it short-circuits when already present) rather than + maintaining their own mock setup. + """ + if "discord" in sys.modules and hasattr(sys.modules["discord"], "__file__"): + return # Real library is installed — nothing to mock + + from types import SimpleNamespace + + discord_mod = MagicMock() + discord_mod.Intents.default.return_value = MagicMock() + discord_mod.Client = MagicMock + discord_mod.File = MagicMock + discord_mod.DMChannel = type("DMChannel", (), {}) + discord_mod.Thread = type("Thread", (), {}) + discord_mod.ForumChannel = type("ForumChannel", (), {}) + discord_mod.Interaction = object + discord_mod.Embed = MagicMock + discord_mod.ui = SimpleNamespace( + View=object, + button=lambda *a, **k: (lambda fn: fn), + Button=object, + ) + discord_mod.ButtonStyle = SimpleNamespace( + success=1, primary=2, secondary=2, danger=3, + green=1, grey=2, blurple=2, red=3, + ) + discord_mod.Color = SimpleNamespace( + orange=lambda: 1, green=lambda: 2, blue=lambda: 3, + red=lambda: 4, purple=lambda: 5, + ) + + # app_commands — needed by _register_slash_commands auto-registration + class _FakeGroup: + def __init__(self, *, name, description, parent=None): + self.name = name + self.description = description + self.parent = parent + self._children: dict = {} + if parent is not None: + parent.add_command(self) + + def add_command(self, cmd): + self._children[cmd.name] = cmd + + class _FakeCommand: + def __init__(self, *, name, description, callback, parent=None): + self.name = name + self.description = description + self.callback = callback + self.parent = parent + + discord_mod.app_commands = SimpleNamespace( + describe=lambda **kwargs: (lambda fn: fn), + choices=lambda **kwargs: (lambda fn: fn), + Choice=lambda **kwargs: SimpleNamespace(**kwargs), + Group=_FakeGroup, + Command=_FakeCommand, + ) + + ext_mod = MagicMock() + commands_mod = MagicMock() + commands_mod.Bot = MagicMock + ext_mod.commands = commands_mod + + for name in ("discord", "discord.ext", "discord.ext.commands"): + sys.modules[name] = discord_mod + sys.modules["discord.ext"] = ext_mod + sys.modules["discord.ext.commands"] = commands_mod + + # Run at collection time — before any test file's module-level imports. _ensure_telegram_mock() +_ensure_discord_mock() diff --git a/tests/gateway/test_background_command.py b/tests/gateway/test_background_command.py index 90303c41c6..559c04ea79 100644 --- a/tests/gateway/test_background_command.py +++ b/tests/gateway/test_background_command.py @@ -220,6 +220,8 @@ class TestRunBackgroundTask: with patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "test-key"}), \ patch("run_agent.AIAgent") as MockAgent: mock_agent_instance = MagicMock() + mock_agent_instance.shutdown_memory_provider = MagicMock() + mock_agent_instance.close = MagicMock() mock_agent_instance.run_conversation.return_value = mock_result MockAgent.return_value = mock_agent_instance @@ -231,6 +233,37 @@ class TestRunBackgroundTask: content = call_args[1].get("content", call_args[0][1] if len(call_args[0]) > 1 else "") assert "Background task complete" in content assert "Hello from background!" in content + mock_agent_instance.shutdown_memory_provider.assert_called_once() + mock_agent_instance.close.assert_called_once() + + @pytest.mark.asyncio + async def test_agent_cleanup_runs_when_background_agent_raises(self): + """Temporary background agents must be cleaned up on error paths too.""" + runner = _make_runner() + mock_adapter = AsyncMock() + mock_adapter.send = AsyncMock() + runner.adapters[Platform.TELEGRAM] = mock_adapter + + source = SessionSource( + platform=Platform.TELEGRAM, + user_id="12345", + chat_id="67890", + user_name="testuser", + ) + + with patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "test-key"}), \ + patch("run_agent.AIAgent") as MockAgent: + mock_agent_instance = MagicMock() + mock_agent_instance.shutdown_memory_provider = MagicMock() + mock_agent_instance.close = MagicMock() + mock_agent_instance.run_conversation.side_effect = RuntimeError("boom") + MockAgent.return_value = mock_agent_instance + + await runner._run_background_task("say hello", source, "bg_test") + + mock_adapter.send.assert_called_once() + mock_agent_instance.shutdown_memory_provider.assert_called_once() + mock_agent_instance.close.assert_called_once() @pytest.mark.asyncio async def test_exception_sends_error_message(self): diff --git a/tests/gateway/test_compress_command.py b/tests/gateway/test_compress_command.py index edeb1f47c9..021e98773d 100644 --- a/tests/gateway/test_compress_command.py +++ b/tests/gateway/test_compress_command.py @@ -62,6 +62,8 @@ async def test_compress_command_reports_noop_without_success_banner(): history = _make_history() runner = _make_runner(history) agent_instance = MagicMock() + agent_instance.shutdown_memory_provider = MagicMock() + agent_instance.close = MagicMock() agent_instance.context_compressor.protect_first_n = 0 agent_instance.context_compressor._align_boundary_forward.return_value = 0 agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2 @@ -83,6 +85,8 @@ async def test_compress_command_reports_noop_without_success_banner(): assert "No changes from compression" in result assert "Compressed:" not in result assert "Rough transcript estimate: ~100 tokens (unchanged)" in result + agent_instance.shutdown_memory_provider.assert_called_once() + agent_instance.close.assert_called_once() @pytest.mark.asyncio @@ -95,6 +99,8 @@ async def test_compress_command_explains_when_token_estimate_rises(): ] runner = _make_runner(history) agent_instance = MagicMock() + agent_instance.shutdown_memory_provider = MagicMock() + agent_instance.close = MagicMock() agent_instance.context_compressor.protect_first_n = 0 agent_instance.context_compressor._align_boundary_forward.return_value = 0 agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2 @@ -119,3 +125,5 @@ async def test_compress_command_explains_when_token_estimate_rises(): assert "Compressed: 4 → 3 messages" in result assert "Rough transcript estimate: ~100 → ~120 tokens" in result assert "denser summaries" in result + agent_instance.shutdown_memory_provider.assert_called_once() + agent_instance.close.assert_called_once() diff --git a/tests/gateway/test_config_cwd_bridge.py b/tests/gateway/test_config_cwd_bridge.py index 1b7a1d78b3..7f6a757500 100644 --- a/tests/gateway/test_config_cwd_bridge.py +++ b/tests/gateway/test_config_cwd_bridge.py @@ -37,6 +37,10 @@ def _simulate_config_bridge(cfg: dict, initial_env: dict | None = None): for cfg_key, env_var in terminal_env_map.items(): if cfg_key in terminal_cfg: val = terminal_cfg[cfg_key] + # Skip cwd placeholder values — don't overwrite already-resolved + # TERMINAL_CWD. Mirrors the fix in gateway/run.py. + if cfg_key == "cwd" and str(val) in (".", "auto", "cwd"): + continue if isinstance(val, list): env[env_var] = json.dumps(val) else: @@ -146,3 +150,58 @@ class TestTopLevelCwdAlias: cfg = {"cwd": "/from/config"} result = _simulate_config_bridge(cfg, {"MESSAGING_CWD": "/from/env"}) assert result["TERMINAL_CWD"] == "/from/config" + + +class TestNestedTerminalCwdPlaceholderSkip: + """terminal.cwd placeholder values must not clobber TERMINAL_CWD. + + When config.yaml has terminal.cwd: "." (or "auto"/"cwd"), the gateway + config bridge should NOT write that placeholder to TERMINAL_CWD. + This prevents .env or MESSAGING_CWD values from being overwritten. + See issues #10225, #4672, #10817. + """ + + def test_terminal_dot_cwd_does_not_clobber_env(self): + """terminal.cwd: '.' should not overwrite a pre-set TERMINAL_CWD.""" + cfg = {"terminal": {"cwd": "."}} + result = _simulate_config_bridge(cfg, {"TERMINAL_CWD": "/my/project"}) + assert result["TERMINAL_CWD"] == "/my/project" + + def test_terminal_auto_cwd_does_not_clobber_env(self): + cfg = {"terminal": {"cwd": "auto"}} + result = _simulate_config_bridge(cfg, {"TERMINAL_CWD": "/my/project"}) + assert result["TERMINAL_CWD"] == "/my/project" + + def test_terminal_cwd_keyword_does_not_clobber_env(self): + cfg = {"terminal": {"cwd": "cwd"}} + result = _simulate_config_bridge(cfg, {"TERMINAL_CWD": "/my/project"}) + assert result["TERMINAL_CWD"] == "/my/project" + + def test_terminal_explicit_cwd_does_override(self): + """terminal.cwd: '/explicit/path' SHOULD override TERMINAL_CWD.""" + cfg = {"terminal": {"cwd": "/explicit/path"}} + result = _simulate_config_bridge(cfg, {"TERMINAL_CWD": "/old/value"}) + assert result["TERMINAL_CWD"] == "/explicit/path" + + def test_terminal_dot_cwd_falls_back_to_messaging_cwd(self): + """terminal.cwd: '.' with no TERMINAL_CWD should fall to MESSAGING_CWD.""" + cfg = {"terminal": {"cwd": "."}} + result = _simulate_config_bridge(cfg, {"MESSAGING_CWD": "/from/env"}) + assert result["TERMINAL_CWD"] == "/from/env" + + def test_terminal_dot_cwd_and_messaging_cwd_both_set(self): + """Pre-set TERMINAL_CWD from .env wins over terminal.cwd: '.'.""" + cfg = {"terminal": {"cwd": ".", "backend": "local"}} + result = _simulate_config_bridge(cfg, { + "TERMINAL_CWD": "/my/project", + "MESSAGING_CWD": "/fallback", + }) + assert result["TERMINAL_CWD"] == "/my/project" + + def test_non_cwd_terminal_keys_still_bridge(self): + """Other terminal config keys (backend, timeout) should still bridge normally.""" + cfg = {"terminal": {"cwd": ".", "backend": "docker", "timeout": "300"}} + result = _simulate_config_bridge(cfg, {"MESSAGING_CWD": "/from/env"}) + assert result["TERMINAL_ENV"] == "docker" + assert result["TERMINAL_TIMEOUT"] == "300" + assert result["TERMINAL_CWD"] == "/from/env" diff --git a/tests/gateway/test_discord_reply_mode.py b/tests/gateway/test_discord_reply_mode.py index 8a3b440bbf..0203bfab61 100644 --- a/tests/gateway/test_discord_reply_mode.py +++ b/tests/gateway/test_discord_reply_mode.py @@ -284,9 +284,20 @@ class TestEnvVarOverride: # Tests for reply_to_text extraction in _handle_message # ------------------------------------------------------------------ -class FakeDMChannel: +# Build FakeDMChannel as a subclass of the real discord.DMChannel when the +# library is installed — this guarantees isinstance() checks pass in +# production code regardless of test ordering or monkeypatch state. +try: + import discord as _discord_lib + _DMChannelBase = _discord_lib.DMChannel +except (ImportError, AttributeError): + _DMChannelBase = object + + +class FakeDMChannel(_DMChannelBase): """Minimal DM channel stub (skips mention / channel-allow checks).""" def __init__(self, channel_id: int = 100, name: str = "dm"): + # Do NOT call super().__init__() — real DMChannel requires State self.id = channel_id self.name = name @@ -309,10 +320,6 @@ def _make_message(*, content: str = "hi", reference=None): @pytest.fixture def reply_text_adapter(monkeypatch): """DiscordAdapter wired for _handle_message → handle_message capture.""" - import gateway.platforms.discord as discord_platform - - monkeypatch.setattr(discord_platform.discord, "DMChannel", FakeDMChannel, raising=False) - config = PlatformConfig(enabled=True, token="fake-token") adapter = DiscordAdapter(config) adapter._client = SimpleNamespace(user=SimpleNamespace(id=999)) diff --git a/tests/gateway/test_flush_memory_stale_guard.py b/tests/gateway/test_flush_memory_stale_guard.py index 6a43817cee..c4e4e1fb6d 100644 --- a/tests/gateway/test_flush_memory_stale_guard.py +++ b/tests/gateway/test_flush_memory_stale_guard.py @@ -202,6 +202,22 @@ class TestFlushAgentSilenced: sys.stdout = old_stdout assert buf.getvalue() == "", "no-op print_fn spinner must not write to stdout" + def test_flush_agent_closes_resources_after_run(self, monkeypatch): + """Memory flush should close temporary agent resources after the turn.""" + runner, tmp_agent, _ = _make_flush_context(monkeypatch) + tmp_agent.shutdown_memory_provider = MagicMock() + tmp_agent.close = MagicMock() + + with ( + patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}), + patch("gateway.run._resolve_gateway_model", return_value="test-model"), + patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: Path("/nonexistent"))}), + ): + runner._flush_memories_for_session("session_cleanup") + + tmp_agent.shutdown_memory_provider.assert_called_once() + tmp_agent.close.assert_called_once() + class TestFlushPromptStructure: """Verify the flush prompt retains its core instructions.""" diff --git a/tests/gateway/test_pending_event_none.py b/tests/gateway/test_pending_event_none.py new file mode 100644 index 0000000000..b2e1356fa1 --- /dev/null +++ b/tests/gateway/test_pending_event_none.py @@ -0,0 +1,42 @@ +"""Tests for the pending_event None guard in recursive _run_agent calls. + +When pending_event is None (Path B: pending comes from interrupt_message), +accessing pending_event.channel_prompt previously raised AttributeError. +This verifies the fix: channel_prompt is captured inside the +`if pending_event is not None:` block and falls back to None otherwise. +""" + +from types import SimpleNamespace + + +def _extract_channel_prompt(pending_event): + """Reproduce the fixed logic from gateway/run.py. + + Mirrors the variable-capture pattern used before the recursive + _run_agent call so we can test both paths without a full runner. + """ + next_channel_prompt = None + if pending_event is not None: + next_channel_prompt = getattr(pending_event, "channel_prompt", None) + return next_channel_prompt + + +class TestPendingEventNoneChannelPrompt: + """Guard against AttributeError when pending_event is None.""" + + def test_none_pending_event_returns_none_channel_prompt(self): + """Path B: pending_event is None — must not raise AttributeError.""" + result = _extract_channel_prompt(None) + assert result is None + + def test_pending_event_with_channel_prompt_passes_through(self): + """Path A: pending_event present — channel_prompt is forwarded.""" + event = SimpleNamespace(channel_prompt="You are a helpful bot.") + result = _extract_channel_prompt(event) + assert result == "You are a helpful bot." + + def test_pending_event_without_channel_prompt_returns_none(self): + """Path A: pending_event present but has no channel_prompt attribute.""" + event = SimpleNamespace() + result = _extract_channel_prompt(event) + assert result is None diff --git a/tests/gateway/test_session_hygiene.py b/tests/gateway/test_session_hygiene.py index 325c24facf..f2e343441b 100644 --- a/tests/gateway/test_session_hygiene.py +++ b/tests/gateway/test_session_hygiene.py @@ -305,10 +305,15 @@ async def test_session_hygiene_messages_stay_in_originating_topic(monkeypatch, t monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv) class FakeCompressAgent: + last_instance = None + def __init__(self, **kwargs): self.model = kwargs.get("model") self.session_id = kwargs.get("session_id", "fake-session") self._print_fn = None + self.shutdown_memory_provider = MagicMock() + self.close = MagicMock() + type(self).last_instance = self def _compress_context(self, messages, *_args, **_kwargs): # Simulate real _compress_context: create a new session_id @@ -385,3 +390,6 @@ async def test_session_hygiene_messages_stay_in_originating_topic(monkeypatch, t # Compression warnings are no longer sent to users — compression # happens silently with server-side logging only. assert len(adapter.sent) == 0 + assert FakeCompressAgent.last_instance is not None + FakeCompressAgent.last_instance.shutdown_memory_provider.assert_called_once() + FakeCompressAgent.last_instance.close.assert_called_once() diff --git a/tests/gateway/test_stream_consumer.py b/tests/gateway/test_stream_consumer.py index 38532e66be..cdba5f60ed 100644 --- a/tests/gateway/test_stream_consumer.py +++ b/tests/gateway/test_stream_consumer.py @@ -606,6 +606,56 @@ class TestSegmentBreakOnToolBoundary: assert sent_texts[0].startswith(prefix) assert sum(len(t) for t in sent_texts[1:]) == len(tail) + @pytest.mark.asyncio + async def test_fallback_final_sends_full_text_at_tool_boundary(self): + """After a tool call, the streamed prefix is stale (from the pre-tool + segment). _send_fallback_final must still send the post-tool response + even when continuation_text calculates as empty (#10807).""" + adapter = MagicMock() + adapter.send = AsyncMock( + return_value=SimpleNamespace(success=True, message_id="msg_1"), + ) + adapter.edit_message = AsyncMock( + return_value=SimpleNamespace(success=True), + ) + adapter.MAX_MESSAGE_LENGTH = 4096 + + config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5) + consumer = GatewayStreamConsumer(adapter, "chat_123", config) + + # Simulate a pre-tool streamed segment that becomes the visible prefix + pre_tool_text = "I'll run that code now." + consumer.on_delta(pre_tool_text) + task = asyncio.create_task(consumer.run()) + await asyncio.sleep(0.05) + + # After the tool call, the model returns a SHORT final response that + # does NOT start with the pre-tool prefix. The continuation calculator + # would return empty (no prefix match → full text returned, but if the + # streaming edit already showed pre_tool_text, the prefix-based logic + # wrongly matches). Simulate this by setting _last_sent_text to the + # pre-tool content, then finishing with different post-tool content. + consumer._last_sent_text = pre_tool_text + post_tool_response = "⏰ Script timed out after 30s and was killed." + consumer.finish() + await task + + # The fallback should send the post-tool response via + # _send_fallback_final. + await consumer._send_fallback_final(post_tool_response) + + # Verify the final text was sent (not silently dropped) + sent = False + for call in adapter.send.call_args_list: + content = call[1].get("content", call[0][0] if call[0] else "") + if "timed out" in str(content): + sent = True + break + assert sent, ( + "Post-tool timeout response was silently dropped by " + "_send_fallback_final — the #10807 fix should prevent this" + ) + class TestInterimCommentaryMessages: @pytest.mark.asyncio diff --git a/tests/gateway/test_telegram_network.py b/tests/gateway/test_telegram_network.py index 2770211f31..ff74d4c661 100644 --- a/tests/gateway/test_telegram_network.py +++ b/tests/gateway/test_telegram_network.py @@ -322,7 +322,7 @@ class TestFallbackTransportInit: seen_kwargs.append(kwargs.copy()) return FakeTransport([], {}) - for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy"): + for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy", "TELEGRAM_PROXY"): monkeypatch.delenv(key, raising=False) monkeypatch.setenv("HTTPS_PROXY", "http://proxy.example:8080") monkeypatch.setattr(tnet.httpx, "AsyncHTTPTransport", factory) diff --git a/tests/hermes_cli/test_deprecated_cwd_warning.py b/tests/hermes_cli/test_deprecated_cwd_warning.py new file mode 100644 index 0000000000..4b438e7ebf --- /dev/null +++ b/tests/hermes_cli/test_deprecated_cwd_warning.py @@ -0,0 +1,64 @@ +"""Tests for warn_deprecated_cwd_env_vars() migration warning.""" + +import os +import pytest + + +class TestDeprecatedCwdWarning: + """Warn when MESSAGING_CWD or TERMINAL_CWD is set in .env.""" + + def test_messaging_cwd_triggers_warning(self, monkeypatch, capsys): + monkeypatch.setenv("MESSAGING_CWD", "/some/path") + monkeypatch.delenv("TERMINAL_CWD", raising=False) + + from hermes_cli.config import warn_deprecated_cwd_env_vars + warn_deprecated_cwd_env_vars(config={}) + + captured = capsys.readouterr() + assert "MESSAGING_CWD" in captured.err + assert "deprecated" in captured.err.lower() + assert "config.yaml" in captured.err + + def test_terminal_cwd_triggers_warning_when_config_placeholder(self, monkeypatch, capsys): + monkeypatch.setenv("TERMINAL_CWD", "/project") + monkeypatch.delenv("MESSAGING_CWD", raising=False) + + from hermes_cli.config import warn_deprecated_cwd_env_vars + # config has placeholder cwd → TERMINAL_CWD likely from .env + warn_deprecated_cwd_env_vars(config={"terminal": {"cwd": "."}}) + + captured = capsys.readouterr() + assert "TERMINAL_CWD" in captured.err + assert "deprecated" in captured.err.lower() + + def test_no_warning_when_config_has_explicit_cwd(self, monkeypatch, capsys): + monkeypatch.setenv("TERMINAL_CWD", "/project") + monkeypatch.delenv("MESSAGING_CWD", raising=False) + + from hermes_cli.config import warn_deprecated_cwd_env_vars + # config has explicit cwd → TERMINAL_CWD could be from config bridge + warn_deprecated_cwd_env_vars(config={"terminal": {"cwd": "/project"}}) + + captured = capsys.readouterr() + assert "TERMINAL_CWD" not in captured.err + + def test_no_warning_when_env_clean(self, monkeypatch, capsys): + monkeypatch.delenv("MESSAGING_CWD", raising=False) + monkeypatch.delenv("TERMINAL_CWD", raising=False) + + from hermes_cli.config import warn_deprecated_cwd_env_vars + warn_deprecated_cwd_env_vars(config={}) + + captured = capsys.readouterr() + assert captured.err == "" + + def test_both_deprecated_vars_warn(self, monkeypatch, capsys): + monkeypatch.setenv("MESSAGING_CWD", "/msg/path") + monkeypatch.setenv("TERMINAL_CWD", "/term/path") + + from hermes_cli.config import warn_deprecated_cwd_env_vars + warn_deprecated_cwd_env_vars(config={}) + + captured = capsys.readouterr() + assert "MESSAGING_CWD" in captured.err + assert "TERMINAL_CWD" in captured.err diff --git a/tests/hermes_cli/test_memory_reset.py b/tests/hermes_cli/test_memory_reset.py new file mode 100644 index 0000000000..3b91326de2 --- /dev/null +++ b/tests/hermes_cli/test_memory_reset.py @@ -0,0 +1,157 @@ +"""Tests for the `hermes memory reset` CLI command. + +Covers: +- Reset both stores (MEMORY.md + USER.md) +- Reset individual stores (--target memory / --target user) +- Skip confirmation with --yes +- Graceful handling when no memory files exist +- Profile-scoped reset (uses HERMES_HOME) +""" + +import os +import pytest +from argparse import Namespace +from pathlib import Path + + +@pytest.fixture +def memory_env(tmp_path, monkeypatch): + """Set up a fake HERMES_HOME with memory files.""" + hermes_home = tmp_path / ".hermes" + memories = hermes_home / "memories" + memories.mkdir(parents=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + # Create sample memory files + (memories / "MEMORY.md").write_text( + "§\nHermes repo is at ~/.hermes/hermes-agent\n§\nUser prefers dark themes", + encoding="utf-8", + ) + (memories / "USER.md").write_text( + "§\nUser is Teknium\n§\nTimezone: US Pacific", + encoding="utf-8", + ) + return hermes_home, memories + + +def _run_memory_reset(target="all", yes=False, monkeypatch=None, confirm_input="no"): + """Invoke the memory reset logic from cmd_memory in main.py. + + Simulates what happens when `hermes memory reset` is run. + """ + from hermes_constants import get_hermes_home, display_hermes_home + + mem_dir = get_hermes_home() / "memories" + files_to_reset = [] + if target in ("all", "memory"): + files_to_reset.append(("MEMORY.md", "agent notes")) + if target in ("all", "user"): + files_to_reset.append(("USER.md", "user profile")) + + existing = [(f, desc) for f, desc in files_to_reset if (mem_dir / f).exists()] + if not existing: + return "nothing" + + if not yes: + if confirm_input != "yes": + return "cancelled" + + for f, desc in existing: + (mem_dir / f).unlink() + + return "deleted" + + +class TestMemoryReset: + """Tests for `hermes memory reset` subcommand.""" + + def test_reset_all_with_yes_flag(self, memory_env): + """--yes flag should skip confirmation and delete both files.""" + hermes_home, memories = memory_env + assert (memories / "MEMORY.md").exists() + assert (memories / "USER.md").exists() + + result = _run_memory_reset(target="all", yes=True) + assert result == "deleted" + assert not (memories / "MEMORY.md").exists() + assert not (memories / "USER.md").exists() + + def test_reset_memory_only(self, memory_env): + """--target memory should only delete MEMORY.md.""" + hermes_home, memories = memory_env + + result = _run_memory_reset(target="memory", yes=True) + assert result == "deleted" + assert not (memories / "MEMORY.md").exists() + assert (memories / "USER.md").exists() + + def test_reset_user_only(self, memory_env): + """--target user should only delete USER.md.""" + hermes_home, memories = memory_env + + result = _run_memory_reset(target="user", yes=True) + assert result == "deleted" + assert (memories / "MEMORY.md").exists() + assert not (memories / "USER.md").exists() + + def test_reset_no_files_exist(self, tmp_path, monkeypatch): + """Should return 'nothing' when no memory files exist.""" + hermes_home = tmp_path / ".hermes" + (hermes_home / "memories").mkdir(parents=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + result = _run_memory_reset(target="all", yes=True) + assert result == "nothing" + + def test_reset_confirmation_denied(self, memory_env): + """Without --yes and without typing 'yes', should be cancelled.""" + hermes_home, memories = memory_env + + result = _run_memory_reset(target="all", yes=False, confirm_input="no") + assert result == "cancelled" + # Files should still exist + assert (memories / "MEMORY.md").exists() + assert (memories / "USER.md").exists() + + def test_reset_confirmation_accepted(self, memory_env): + """Typing 'yes' should proceed with deletion.""" + hermes_home, memories = memory_env + + result = _run_memory_reset(target="all", yes=False, confirm_input="yes") + assert result == "deleted" + assert not (memories / "MEMORY.md").exists() + assert not (memories / "USER.md").exists() + + def test_reset_profile_scoped(self, tmp_path, monkeypatch): + """Reset should work on the active profile's HERMES_HOME.""" + profile_home = tmp_path / "profiles" / "myprofile" + memories = profile_home / "memories" + memories.mkdir(parents=True) + (memories / "MEMORY.md").write_text("profile memory", encoding="utf-8") + (memories / "USER.md").write_text("profile user", encoding="utf-8") + monkeypatch.setenv("HERMES_HOME", str(profile_home)) + + result = _run_memory_reset(target="all", yes=True) + assert result == "deleted" + assert not (memories / "MEMORY.md").exists() + assert not (memories / "USER.md").exists() + + def test_reset_partial_files(self, memory_env): + """Reset should work when only one memory file exists.""" + hermes_home, memories = memory_env + (memories / "USER.md").unlink() + + result = _run_memory_reset(target="all", yes=True) + assert result == "deleted" + assert not (memories / "MEMORY.md").exists() + + def test_reset_empty_memories_dir(self, tmp_path, monkeypatch): + """No memories dir at all should report nothing.""" + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir(parents=True) + # No memories dir + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + # The memories dir won't exist; get_hermes_home() / "memories" won't have files + result = _run_memory_reset(target="all", yes=True) + assert result == "nothing" diff --git a/tests/hermes_cli/test_ollama_cloud_provider.py b/tests/hermes_cli/test_ollama_cloud_provider.py index 9dad26092c..f3702a417e 100644 --- a/tests/hermes_cli/test_ollama_cloud_provider.py +++ b/tests/hermes_cli/test_ollama_cloud_provider.py @@ -114,6 +114,65 @@ class TestOllamaCloudModelCatalog: assert "ollama-cloud" in _PROVIDER_LABELS assert _PROVIDER_LABELS["ollama-cloud"] == "Ollama Cloud" + def test_provider_model_ids_returns_dynamic_models(self, tmp_path, monkeypatch): + """provider_model_ids('ollama-cloud') should call fetch_ollama_cloud_models().""" + from hermes_cli.models import provider_model_ids + + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + monkeypatch.setenv("OLLAMA_API_KEY", "test-key") + + mock_mdev = { + "ollama-cloud": { + "models": { + "qwen3.5:397b": {"tool_call": True}, + "glm-5": {"tool_call": True}, + } + } + } + with patch("hermes_cli.models.fetch_api_models", return_value=["qwen3.5:397b"]), \ + patch("agent.models_dev.fetch_models_dev", return_value=mock_mdev): + result = provider_model_ids("ollama-cloud", force_refresh=True) + + assert len(result) > 0 + assert "qwen3.5:397b" in result + + +# ── Model Picker (list_authenticated_providers) ── + +class TestOllamaCloudModelPicker: + def test_ollama_cloud_shows_model_count(self, tmp_path, monkeypatch): + """Ollama Cloud should show non-zero model count in provider picker.""" + from hermes_cli.model_switch import list_authenticated_providers + + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + monkeypatch.setenv("OLLAMA_API_KEY", "test-key") + + mock_mdev = { + "ollama-cloud": { + "models": { + "qwen3.5:397b": {"tool_call": True}, + "glm-5": {"tool_call": True}, + } + } + } + with patch("hermes_cli.models.fetch_api_models", return_value=["qwen3.5:397b"]), \ + patch("agent.models_dev.fetch_models_dev", return_value=mock_mdev): + providers = list_authenticated_providers(current_provider="ollama-cloud") + + ollama = next((p for p in providers if p["slug"] == "ollama-cloud"), None) + assert ollama is not None, "ollama-cloud should appear when OLLAMA_API_KEY is set" + assert ollama["total_models"] > 0, "ollama-cloud should show non-zero model count" + + def test_ollama_cloud_not_shown_without_creds(self, monkeypatch): + """Ollama Cloud should not appear without credentials.""" + from hermes_cli.model_switch import list_authenticated_providers + + monkeypatch.delenv("OLLAMA_API_KEY", raising=False) + + providers = list_authenticated_providers(current_provider="openrouter") + ollama = next((p for p in providers if p["slug"] == "ollama-cloud"), None) + assert ollama is None, "ollama-cloud should not appear without OLLAMA_API_KEY" + # ── Merged Model Discovery ── diff --git a/tests/hermes_cli/test_skin_engine.py b/tests/hermes_cli/test_skin_engine.py index aadcde3a6f..3ce185b82a 100644 --- a/tests/hermes_cli/test_skin_engine.py +++ b/tests/hermes_cli/test_skin_engine.py @@ -152,6 +152,24 @@ class TestSkinManagement: init_skin_from_config({}) assert get_active_skin_name() == "default" + def test_init_skin_from_null_display(self): + """display: null should fall back to default, not crash.""" + from hermes_cli.skin_engine import init_skin_from_config, get_active_skin_name + init_skin_from_config({"display": None}) + assert get_active_skin_name() == "default" + + def test_init_skin_from_non_dict_display(self): + """display: should fall back to default.""" + from hermes_cli.skin_engine import init_skin_from_config, get_active_skin_name + init_skin_from_config({"display": "invalid"}) + assert get_active_skin_name() == "default" + + init_skin_from_config({"display": 42}) + assert get_active_skin_name() == "default" + + init_skin_from_config({"display": []}) + assert get_active_skin_name() == "default" + class TestUserSkins: def test_load_user_skin_from_yaml(self, tmp_path, monkeypatch): diff --git a/tests/run_agent/test_context_pressure.py b/tests/run_agent/test_context_pressure.py deleted file mode 100644 index 4140749c51..0000000000 --- a/tests/run_agent/test_context_pressure.py +++ /dev/null @@ -1,361 +0,0 @@ -"""Tests for context pressure warnings (user-facing, not injected into messages). - -Covers: -- Display formatting (CLI and gateway variants) -- Flag tracking and threshold logic on AIAgent -- Flag reset after compression -- status_callback invocation -""" - -import json -from types import SimpleNamespace -from unittest.mock import MagicMock, patch - -import pytest - -from agent.display import format_context_pressure, format_context_pressure_gateway -from run_agent import AIAgent - - -# --------------------------------------------------------------------------- -# Display formatting tests -# --------------------------------------------------------------------------- - - -class TestFormatContextPressure: - """CLI context pressure display (agent/display.py). - - The bar shows progress toward the compaction threshold, not the - raw context window. 60% = 60% of the way to compaction. - """ - - def test_80_percent_uses_warning_icon(self): - line = format_context_pressure(0.80, 100_000, 0.50) - assert "⚠" in line - assert "80% to compaction" in line - - def test_90_percent_uses_warning_icon(self): - line = format_context_pressure(0.90, 100_000, 0.50) - assert "⚠" in line - assert "90% to compaction" in line - - def test_bar_length_scales_with_progress(self): - line_80 = format_context_pressure(0.80, 100_000, 0.50) - line_95 = format_context_pressure(0.95, 100_000, 0.50) - assert line_95.count("▰") > line_80.count("▰") - - def test_shows_threshold_tokens(self): - line = format_context_pressure(0.80, 100_000, 0.50) - assert "100k" in line - - def test_small_threshold(self): - line = format_context_pressure(0.80, 500, 0.50) - assert "500" in line - - def test_shows_threshold_percent(self): - line = format_context_pressure(0.80, 100_000, 0.50) - assert "50%" in line - - def test_approaching_hint(self): - line = format_context_pressure(0.80, 100_000, 0.50) - assert "compaction approaching" in line - - def test_no_compaction_when_disabled(self): - line = format_context_pressure(0.85, 100_000, 0.50, compression_enabled=False) - assert "no auto-compaction" in line - - def test_returns_string(self): - result = format_context_pressure(0.65, 128_000, 0.50) - assert isinstance(result, str) - - def test_over_100_percent_capped(self): - """Progress > 1.0 should cap both bar and percentage text at 100%.""" - line = format_context_pressure(1.05, 100_000, 0.50) - assert "▰" in line - assert line.count("▰") == 20 - assert "100%" in line - assert "105%" not in line - - -class TestFormatContextPressureGateway: - """Gateway (plain text) context pressure display.""" - - def test_80_percent_warning(self): - msg = format_context_pressure_gateway(0.80, 0.50) - assert "80% to compaction" in msg - assert "50%" in msg - - def test_90_percent_warning(self): - msg = format_context_pressure_gateway(0.90, 0.50) - assert "90% to compaction" in msg - assert "approaching" in msg - - def test_no_compaction_warning(self): - msg = format_context_pressure_gateway(0.85, 0.50, compression_enabled=False) - assert "disabled" in msg - - def test_no_ansi_codes(self): - msg = format_context_pressure_gateway(0.80, 0.50) - assert "\033[" not in msg - - def test_has_progress_bar(self): - msg = format_context_pressure_gateway(0.80, 0.50) - assert "▰" in msg - - def test_over_100_percent_capped(self): - """Progress > 1.0 should cap percentage text at 100%.""" - msg = format_context_pressure_gateway(1.09, 0.50) - assert "100% to compaction" in msg - assert "109%" not in msg - assert msg.count("▰") == 20 - - -# --------------------------------------------------------------------------- -# AIAgent context pressure flag tests -# --------------------------------------------------------------------------- - - -def _make_tool_defs(*names): - return [ - { - "type": "function", - "function": { - "name": n, - "description": f"{n} tool", - "parameters": {"type": "object", "properties": {}}, - }, - } - for n in names - ] - - -@pytest.fixture() -def agent(): - """Minimal AIAgent with mocked internals.""" - with ( - patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")), - patch("run_agent.check_toolset_requirements", return_value={}), - patch("run_agent.OpenAI"), - ): - a = AIAgent( - api_key="test-key-1234567890", - quiet_mode=True, - skip_context_files=True, - skip_memory=True, - ) - a.client = MagicMock() - return a - - -class TestContextPressureFlags: - """Context pressure warning flag tracking on AIAgent.""" - - def test_flag_initialized_zero(self, agent): - assert agent._context_pressure_warned_at == 0.0 - - def test_emit_calls_status_callback(self, agent): - """status_callback should be invoked with event type and message.""" - cb = MagicMock() - agent.status_callback = cb - - compressor = MagicMock() - compressor.context_length = 200_000 - compressor.threshold_tokens = 100_000 # 50% - - agent._emit_context_pressure(0.85, compressor) - - cb.assert_called_once() - args = cb.call_args[0] - assert args[0] == "context_pressure" - assert "85% to compaction" in args[1] - - def test_emit_no_callback_no_crash(self, agent): - """No status_callback set — should not crash.""" - agent.status_callback = None - - compressor = MagicMock() - compressor.context_length = 200_000 - compressor.threshold_tokens = 100_000 - - # Should not raise - agent._emit_context_pressure(0.60, compressor) - - def test_emit_prints_for_cli_platform(self, agent, capsys): - """CLI platform should always print context pressure, even in quiet_mode.""" - agent.quiet_mode = True - agent.platform = "cli" - agent.status_callback = None - - compressor = MagicMock() - compressor.context_length = 200_000 - compressor.threshold_tokens = 100_000 - - agent._emit_context_pressure(0.85, compressor) - captured = capsys.readouterr() - assert "▰" in captured.out - assert "to compaction" in captured.out - - def test_emit_skips_print_for_gateway_platform(self, agent, capsys): - """Gateway platforms get the callback, not CLI print.""" - agent.platform = "telegram" - agent.status_callback = None - - compressor = MagicMock() - compressor.context_length = 200_000 - compressor.threshold_tokens = 100_000 - - agent._emit_context_pressure(0.85, compressor) - captured = capsys.readouterr() - assert "▰" not in captured.out - - def test_flag_reset_on_compression(self, agent): - """After _compress_context, context pressure flag should reset.""" - agent._context_pressure_warned_at = 0.85 - agent.compression_enabled = True - - agent.context_compressor = MagicMock() - agent.context_compressor.compress.return_value = [ - {"role": "user", "content": "Summary of conversation so far."} - ] - agent.context_compressor.context_length = 200_000 - agent.context_compressor.threshold_tokens = 100_000 - agent.context_compressor.compression_count = 1 - - agent._todo_store = MagicMock() - agent._todo_store.format_for_injection.return_value = None - - agent._build_system_prompt = MagicMock(return_value="system prompt") - agent._cached_system_prompt = "old system prompt" - agent._session_db = None - - messages = [ - {"role": "user", "content": "hello"}, - {"role": "assistant", "content": "hi there"}, - ] - agent._compress_context(messages, "system prompt") - - assert agent._context_pressure_warned_at == 0.0 - - def test_emit_callback_error_handled(self, agent): - """If status_callback raises, it should be caught gracefully.""" - cb = MagicMock(side_effect=RuntimeError("callback boom")) - agent.status_callback = cb - - compressor = MagicMock() - compressor.context_length = 200_000 - compressor.threshold_tokens = 100_000 - - # Should not raise - agent._emit_context_pressure(0.85, compressor) - - def test_tiered_reemits_at_95(self, agent): - """Warning fires at 85%, then fires again when crossing 95%.""" - agent._context_pressure_warned_at = 0.85 - # Simulate crossing 95%: the tier (0.95) > warned_at (0.85) - assert 0.95 > agent._context_pressure_warned_at - # After emission at 95%, the tier should update - agent._context_pressure_warned_at = 0.95 - assert agent._context_pressure_warned_at == 0.95 - - def test_tiered_no_double_emit_at_same_level(self, agent): - """Once warned at 85%, further 85%+ readings don't re-warn.""" - agent._context_pressure_warned_at = 0.85 - # At 88%, tier is 0.85, which is NOT > warned_at (0.85) - _warn_tier = 0.85 if 0.88 >= 0.85 else 0.0 - assert not (_warn_tier > agent._context_pressure_warned_at) - - def test_flag_not_reset_when_compression_insufficient(self, agent): - """When compression can't drop below 85%, keep the flag set.""" - agent._context_pressure_warned_at = 0.85 - agent.compression_enabled = True - - agent.context_compressor = MagicMock() - agent.context_compressor.compress.return_value = [ - {"role": "user", "content": "Summary of conversation so far."} - ] - agent.context_compressor.context_length = 200 - # Use a small threshold so the tiny compressed output still - # represents >= 85% of it (prevents flag reset). - agent.context_compressor.threshold_tokens = 10 - agent.context_compressor.compression_count = 1 - agent.context_compressor.last_prompt_tokens = 0 - - agent._todo_store = MagicMock() - agent._todo_store.format_for_injection.return_value = None - agent._build_system_prompt = MagicMock(return_value="system prompt") - agent._cached_system_prompt = "old system prompt" - agent._session_db = None - - messages = [ - {"role": "user", "content": "hello"}, - {"role": "assistant", "content": "hi there"}, - ] - agent._compress_context(messages, "system prompt") - - # Post-compression is ~90% of threshold — flag should NOT reset - assert agent._context_pressure_warned_at == 0.85 - - -class TestContextPressureGatewayDedup: - """Class-level dedup prevents warning spam across AIAgent instances.""" - - def setup_method(self): - """Clear class-level dedup state between tests.""" - AIAgent._context_pressure_last_warned.clear() - - def test_second_instance_within_cooldown_suppressed(self): - """Same session, same tier, within cooldown — should be suppressed.""" - import time - sid = "test_session_dedup" - # Simulate first warning - AIAgent._context_pressure_last_warned[sid] = (0.85, time.time()) - # Second instance checking same tier within cooldown - _last = AIAgent._context_pressure_last_warned.get(sid) - _should_warn = _last is None or _last[0] < 0.85 or (time.time() - _last[1]) >= AIAgent._CONTEXT_PRESSURE_COOLDOWN - assert not _should_warn - - def test_higher_tier_fires_despite_cooldown(self): - """Same session, higher tier — should fire even within cooldown.""" - import time - sid = "test_session_tier" - AIAgent._context_pressure_last_warned[sid] = (0.85, time.time()) - _last = AIAgent._context_pressure_last_warned.get(sid) - # 0.95 > 0.85 stored tier → should warn - _should_warn = _last is None or _last[0] < 0.95 or (time.time() - _last[1]) >= AIAgent._CONTEXT_PRESSURE_COOLDOWN - assert _should_warn - - def test_warning_fires_after_cooldown_expires(self): - """Same session, same tier, after cooldown — should fire again.""" - import time - sid = "test_session_expired" - # Set a timestamp far in the past - AIAgent._context_pressure_last_warned[sid] = (0.85, time.time() - AIAgent._CONTEXT_PRESSURE_COOLDOWN - 1) - _last = AIAgent._context_pressure_last_warned.get(sid) - _should_warn = _last is None or _last[0] < 0.85 or (time.time() - _last[1]) >= AIAgent._CONTEXT_PRESSURE_COOLDOWN - assert _should_warn - - def test_compression_clears_dedup(self): - """After compression drops below 85%, dedup entry should be cleared.""" - import time - sid = "test_session_clear" - AIAgent._context_pressure_last_warned[sid] = (0.85, time.time()) - assert sid in AIAgent._context_pressure_last_warned - # Simulate what _compress_context does on reset - AIAgent._context_pressure_last_warned.pop(sid, None) - assert sid not in AIAgent._context_pressure_last_warned - - def test_eviction_removes_stale_entries(self): - """Stale entries older than 2x cooldown should be evicted.""" - import time - _now = time.time() - AIAgent._context_pressure_last_warned = { - "fresh": (0.85, _now), - "stale": (0.85, _now - AIAgent._CONTEXT_PRESSURE_COOLDOWN * 3), - } - _cutoff = _now - AIAgent._CONTEXT_PRESSURE_COOLDOWN * 2 - AIAgent._context_pressure_last_warned = { - k: v for k, v in AIAgent._context_pressure_last_warned.items() - if v[1] > _cutoff - } - assert "fresh" in AIAgent._context_pressure_last_warned - assert "stale" not in AIAgent._context_pressure_last_warned diff --git a/tests/run_agent/test_context_token_tracking.py b/tests/run_agent/test_context_token_tracking.py index b924448b64..6800a2b491 100644 --- a/tests/run_agent/test_context_token_tracking.py +++ b/tests/run_agent/test_context_token_tracking.py @@ -59,7 +59,7 @@ def _make_agent(monkeypatch, api_mode, provider, response_fn): self._disable_streaming = True return super().run_conversation(msg, conversation_history=conversation_history, task_id=task_id) - return _A(model="test-model", api_key="test-key", provider=provider, api_mode=api_mode) + return _A(model="test-model", api_key="test-key", base_url="http://localhost:1234/v1", provider=provider, api_mode=api_mode) def _anthropic_resp(input_tok, output_tok, cache_read=0, cache_creation=0): diff --git a/tests/run_agent/test_create_openai_client_kwargs_isolation.py b/tests/run_agent/test_create_openai_client_kwargs_isolation.py new file mode 100644 index 0000000000..506e1486ce --- /dev/null +++ b/tests/run_agent/test_create_openai_client_kwargs_isolation.py @@ -0,0 +1,35 @@ +"""Guardrail: _create_openai_client must not mutate its input kwargs. + +#10933 injected an httpx.Client directly into the caller's ``client_kwargs``. +When the dict was ``self._client_kwargs``, the shared transport was torn down +after the first request_complete close and subsequent request-scoped clients +wrapped a closed transport, raising ``APIConnectionError('Connection error.')`` +with cause ``RuntimeError: Cannot send a request, as the client has been closed`` +on every retry. That PR has since been reverted, but the underlying issue +(#10324, connections hanging in CLOSE-WAIT) is still open, so another transport +tweak inside this function is likely. This test pins the contract that the +function must treat its input dict as read-only. +""" +from unittest.mock import MagicMock, patch + +from run_agent import AIAgent + + +@patch("run_agent.OpenAI") +def test_create_openai_client_does_not_mutate_input_kwargs(mock_openai): + mock_openai.return_value = MagicMock() + agent = AIAgent( + model="test/model", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + + kwargs = {"api_key": "test-key", "base_url": "https://api.example.com/v1"} + snapshot = dict(kwargs) + + agent._create_openai_client(kwargs, reason="test", shared=False) + + assert kwargs == snapshot, ( + f"_create_openai_client mutated input kwargs; expected {snapshot}, got {kwargs}" + ) diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 49ef1dc8fe..46eec2cf71 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -4115,8 +4115,8 @@ class TestMemoryNudgeCounterPersistence: """Counters must exist on the agent after __init__.""" with patch("run_agent.get_tool_definitions", return_value=[]): a = AIAgent( - model="test", api_key="test-key", provider="openrouter", - skip_context_files=True, skip_memory=True, + model="test", api_key="test-key", base_url="http://localhost:1234/v1", + provider="openrouter", skip_context_files=True, skip_memory=True, ) assert hasattr(a, "_turns_since_memory") assert hasattr(a, "_iters_since_skill") diff --git a/tests/tools/test_code_execution.py b/tests/tools/test_code_execution.py index d2fbc7c103..15f8faa9bb 100644 --- a/tests/tools/test_code_execution.py +++ b/tests/tools/test_code_execution.py @@ -279,6 +279,10 @@ raise RuntimeError("deliberate crash") )) self.assertEqual(result["status"], "timeout") self.assertIn("timed out", result.get("error", "")) + # The timeout message must also appear in output so the LLM always + # surfaces it to the user (#10807). + self.assertIn("timed out", result.get("output", "")) + self.assertIn("\u23f0", result.get("output", "")) def test_web_search_tool(self): """Script calls web_search and processes results.""" diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py index 8cffeda804..3e7e3f925b 100644 --- a/tools/code_execution_tool.py +++ b/tools/code_execution_tool.py @@ -871,7 +871,18 @@ def _execute_remote( } if status == "timeout": - result["error"] = f"Script timed out after {timeout}s and was killed." + timeout_msg = f"Script timed out after {timeout}s and was killed." + result["error"] = timeout_msg + # Include timeout message in output so the LLM always surfaces it + # to the user (see local path comment — same reasoning, #10807). + if stdout_text: + result["output"] = stdout_text + f"\n\n⏰ {timeout_msg}" + else: + result["output"] = f"⏰ {timeout_msg}" + logger.warning( + "execute_code (remote) timed out after %ss (limit %ss) with %d tool calls", + duration, timeout, tool_call_counter[0], + ) elif status == "interrupted": result["output"] = ( stdout_text + "\n[execution interrupted — user sent a new message]" @@ -1117,6 +1128,10 @@ def execute_code( stderr_reader.start() status = "success" + _activity_state = { + "last_touch": time.monotonic(), + "start": exec_start, + } while proc.poll() is None: if _is_interrupted(): _kill_process_group(proc) @@ -1126,6 +1141,13 @@ def execute_code( _kill_process_group(proc, escalate=True) status = "timeout" break + # Periodic activity touch so the gateway's inactivity timeout + # doesn't kill the agent during long code execution (#10807). + try: + from tools.environments.base import touch_activity_if_due + touch_activity_if_due(_activity_state, "execute_code running") + except Exception: + pass time.sleep(0.2) # Wait for readers to finish draining @@ -1179,7 +1201,20 @@ def execute_code( } if status == "timeout": - result["error"] = f"Script timed out after {timeout}s and was killed." + timeout_msg = f"Script timed out after {timeout}s and was killed." + result["error"] = timeout_msg + # Include timeout message in output so the LLM always surfaces it + # to the user. When output is empty, models often treat the result + # as "nothing happened" and produce an empty response, which the + # gateway stream consumer silently drops (#10807). + if stdout_text: + result["output"] = stdout_text + f"\n\n⏰ {timeout_msg}" + else: + result["output"] = f"⏰ {timeout_msg}" + logger.warning( + "execute_code timed out after %ss (limit %ss) with %d tool calls", + duration, timeout, tool_call_counter[0], + ) elif status == "interrupted": result["output"] = stdout_text + "\n[execution interrupted — user sent a new message]" elif exit_code != 0: diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index fed1f93683..22b132f2c4 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -863,28 +863,28 @@ def delegate_task( results.append(entry) completed_count += 1 - # Print per-task completion line above the spinner - idx = entry["task_index"] - label = task_labels[idx] if idx < len(task_labels) else f"Task {idx}" - dur = entry.get("duration_seconds", 0) - status = entry.get("status", "?") - icon = "✓" if status == "completed" else "✗" - remaining = n_tasks - completed_count - completion_line = f"{icon} [{idx+1}/{n_tasks}] {label} ({dur}s)" - if spinner_ref: - try: - spinner_ref.print_above(completion_line) - except Exception: + # Print per-task completion line above the spinner + idx = entry["task_index"] + label = task_labels[idx] if idx < len(task_labels) else f"Task {idx}" + dur = entry.get("duration_seconds", 0) + status = entry.get("status", "?") + icon = "✓" if status == "completed" else "✗" + remaining = n_tasks - completed_count + completion_line = f"{icon} [{idx+1}/{n_tasks}] {label} ({dur}s)" + if spinner_ref: + try: + spinner_ref.print_above(completion_line) + except Exception: + print(f" {completion_line}") + else: print(f" {completion_line}") - else: - print(f" {completion_line}") - # Update spinner text to show remaining count - if spinner_ref and remaining > 0: - try: - spinner_ref.update_text(f"🔀 {remaining} task{'s' if remaining != 1 else ''} remaining") - except Exception as e: - logger.debug("Spinner update_text failed: %s", e) + # Update spinner text to show remaining count + if spinner_ref and remaining > 0: + try: + spinner_ref.update_text(f"🔀 {remaining} task{'s' if remaining != 1 else ''} remaining") + except Exception as e: + logger.debug("Spinner update_text failed: %s", e) # Sort by task_index so results match input order results.sort(key=lambda r: r["task_index"]) diff --git a/tools/environments/base.py b/tools/environments/base.py index 19c3bf024e..8e99079236 100644 --- a/tools/environments/base.py +++ b/tools/environments/base.py @@ -37,6 +37,32 @@ def _get_activity_callback() -> Callable[[str], None] | None: return getattr(_activity_callback_local, "callback", None) +def touch_activity_if_due( + state: dict, + label: str, +) -> None: + """Fire the activity callback at most once every ``state['interval']`` seconds. + + *state* must contain ``last_touch`` (monotonic timestamp) and ``start`` + (monotonic timestamp of the operation start). An optional ``interval`` + key overrides the default 10 s cadence. + + Swallows all exceptions so callers don't need their own try/except. + """ + now = time.monotonic() + interval = state.get("interval", 10.0) + if now - state["last_touch"] < interval: + return + state["last_touch"] = now + try: + cb = _get_activity_callback() + if cb: + elapsed = int(now - state["start"]) + cb(f"{label} ({elapsed}s elapsed)") + except Exception: + pass + + def get_sandbox_dir() -> Path: """Return the host-side root for all sandbox storage (Docker workspaces, Singularity overlays/SIF cache, etc.). @@ -405,8 +431,11 @@ class BaseEnvironment(ABC): drain_thread = threading.Thread(target=_drain, daemon=True) drain_thread.start() deadline = time.monotonic() + timeout - _last_activity_touch = time.monotonic() - _ACTIVITY_INTERVAL = 10.0 # seconds between activity touches + _now = time.monotonic() + _activity_state = { + "last_touch": _now, + "start": _now, + } while proc.poll() is None: if is_interrupted(): @@ -428,16 +457,7 @@ class BaseEnvironment(ABC): "returncode": 124, } # Periodic activity touch so the gateway knows we're alive - _now = time.monotonic() - if _now - _last_activity_touch >= _ACTIVITY_INTERVAL: - _last_activity_touch = _now - _cb = _get_activity_callback() - if _cb: - try: - _elapsed = int(_now - (deadline - timeout)) - _cb(f"terminal command running ({_elapsed}s elapsed)") - except Exception: - pass + touch_activity_if_due(_activity_state, "terminal command running") time.sleep(0.2) drain_thread.join(timeout=5) diff --git a/tools/environments/modal_utils.py b/tools/environments/modal_utils.py index 161aad261d..4d68399e41 100644 --- a/tools/environments/modal_utils.py +++ b/tools/environments/modal_utils.py @@ -105,9 +105,11 @@ class BaseModalExecutionEnvironment(BaseEnvironment): if self._client_timeout_grace_seconds is not None: deadline = time.monotonic() + prepared.timeout + self._client_timeout_grace_seconds - _last_activity_touch = time.monotonic() - _modal_exec_start = time.monotonic() - _ACTIVITY_INTERVAL = 10.0 # match _wait_for_process cadence + _now = time.monotonic() + _activity_state = { + "last_touch": _now, + "start": _now, + } while True: if is_interrupted(): @@ -133,20 +135,11 @@ class BaseModalExecutionEnvironment(BaseEnvironment): return self._timeout_result_for_modal(prepared.timeout) # Periodic activity touch so the gateway knows we're alive - _now = time.monotonic() - if _now - _last_activity_touch >= _ACTIVITY_INTERVAL: - _last_activity_touch = _now - try: - from tools.environments.base import _get_activity_callback - _cb = _get_activity_callback() - except Exception: - _cb = None - if _cb: - try: - _elapsed = int(_now - _modal_exec_start) - _cb(f"modal command running ({_elapsed}s elapsed)") - except Exception: - pass + try: + from tools.environments.base import touch_activity_if_due + touch_activity_if_due(_activity_state, "modal command running") + except Exception: + pass time.sleep(self._poll_interval_seconds)