fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)

* fix: prevent infinite 400 failure loop on context overflow (#1630) When a gateway session exceeds the model's context window, Anthropic may return a generic 400 invalid_request_error with just 'Error' as the message. This bypassed the phrase-based context-length detection, causing the agent to treat it as a non-retryable client error. Worse, the failed user message was still persisted to the transcript, making the session even larger on each attempt — creating an infinite loop. Three-layer fix: 1. run_agent.py — Fallback heuristic: when a 400 error has a very short generic message AND the session is large (>40% of context or >80 messages), treat it as a probable context overflow and trigger compression instead of aborting. 2. run_agent.py + gateway/run.py — Don't persist failed messages: when the agent returns failed=True before generating any response, skip writing the user's message to the transcript/DB. This prevents the session from growing on each failure. 3. gateway/run.py — Smarter error messages: detect context-overflow failures and suggest /compact or /reset specifically, instead of a generic 'try again' that will fail identically. * fix(skills): detect prompt injection patterns and block cache file reads Adds two security layers to prevent prompt injection via skills hub cache files (#1558): 1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json was the original injection vector — untrusted skill descriptions in the catalog contained adversarial text that the model executed. 2. skill_view: warns when skills are loaded from outside the trusted ~/.hermes/skills/ directory, and detects common injection patterns in skill content ("ignore previous instructions", "<system>", etc.). Cherry-picked from PR #1562 by ygd58. --------- Co-authored-by: buray <ygd58@users.noreply.github.com>
2026-06-30 11:52:04 +00:00 · 2026-03-17 01:50:59 -07:00 · 2026-03-17 01:50:59 -07:00 · 96dac22194
commit 96dac22194
parent 2d36819503
5 changed files with 436 additions and 32 deletions
--- a/gateway/run.py
+++ b/gateway/run.py
@ -1869,11 +1869,31 @@ class GatewayRunner:
            # Surface error details when the agent failed silently (final_response=None)
            if not response and agent_result.get("failed"):
                error_detail = agent_result.get("error", "unknown error")
-                response = (
-                    f"The request failed: {str(error_detail)[:300]}\n"
-                    "Try again or use /reset to start a fresh session."
+                error_str = str(error_detail).lower()
+
+                # Detect context-overflow failures and give specific guidance.
+                # Generic 400 "Error" from Anthropic with large sessions is the
+                # most common cause of this (#1630).
+                _is_ctx_fail = any(p in error_str for p in (
+                    "context", "token", "too large", "too long",
+                    "exceed", "payload",
+                )) or (
+                    "400" in error_str
+                    and len(history) > 50
                )

+                if _is_ctx_fail:
+                    response = (
+                        "⚠️ Session too large for the model's context window.\n"
+                        "Use /compact to compress the conversation, or "
+                        "/reset to start fresh."
+                    )
+                else:
+                    response = (
+                        f"The request failed: {str(error_detail)[:300]}\n"
+                        "Try again or use /reset to start a fresh session."
+                    )
+
            # If the agent's session_id changed during compression, update
            # session_entry so transcript writes below go to the right session.
            if agent_result.get("session_id") and agent_result["session_id"] != session_entry.session_id:
@ -1920,12 +1940,30 @@ class GatewayRunner:
            # This preserves the complete agent loop (tool_calls, tool results,
            # intermediate reasoning) so sessions can be resumed with full context
            # and transcripts are useful for debugging and training data.
+            #
+            # IMPORTANT: When the agent failed before producing any response
+            # (e.g. context-overflow 400), do NOT persist the user's message.
+            # Persisting it would make the session even larger, causing the
+            # same failure on the next attempt — an infinite loop. (#1630)
+            agent_failed_early = (
+                agent_result.get("failed")
+                and not agent_result.get("final_response")
+            )
+            if agent_failed_early:
+                logger.info(
+                    "Skipping transcript persistence for failed request in "
+                    "session %s to prevent session growth loop.",
+                    session_entry.session_id,
+                )
+
            ts = datetime.now().isoformat()
            
            # If this is a fresh session (no history), write the full tool
            # definitions as the first entry so the transcript is self-describing
            # -- the same list of dicts sent as tools=[...] in the API request.
-            if not history:
+            if agent_failed_early:
+                pass  # Skip all transcript writes — don't grow a broken session
+            elif not history:
                tool_defs = agent_result.get("tools", [])
                self.session_store.append_to_transcript(
                    session_entry.session_id,
@ -1942,36 +1980,37 @@ class GatewayRunner:
            # Use the filtered history length (history_offset) that was actually
            # passed to the agent, not len(history) which includes session_meta
            # entries that were stripped before the agent saw them.
-            history_len = agent_result.get("history_offset", len(history))
-            new_messages = agent_messages[history_len:] if len(agent_messages) > history_len else []
-            
-            # If no new messages found (edge case), fall back to simple user/assistant
-            if not new_messages:
-                self.session_store.append_to_transcript(
-                    session_entry.session_id,
-                    {"role": "user", "content": message_text, "timestamp": ts}
-                )
-                if response:
+            if not agent_failed_early:
+                history_len = agent_result.get("history_offset", len(history))
+                new_messages = agent_messages[history_len:] if len(agent_messages) > history_len else []
+                
+                # If no new messages found (edge case), fall back to simple user/assistant
+                if not new_messages:
                    self.session_store.append_to_transcript(
                        session_entry.session_id,
-                        {"role": "assistant", "content": response, "timestamp": ts}
-                    )
-            else:
-                # The agent already persisted these messages to SQLite via
-                # _flush_messages_to_session_db(), so skip the DB write here
-                # to prevent the duplicate-write bug (#860).  We still write
-                # to JSONL for backward compatibility and as a backup.
-                agent_persisted = self._session_db is not None
-                for msg in new_messages:
-                    # Skip system messages (they're rebuilt each run)
-                    if msg.get("role") == "system":
-                        continue
-                    # Add timestamp to each message for debugging
-                    entry = {**msg, "timestamp": ts}
-                    self.session_store.append_to_transcript(
-                        session_entry.session_id, entry,
-                        skip_db=agent_persisted,
+                        {"role": "user", "content": message_text, "timestamp": ts}
                    )
+                    if response:
+                        self.session_store.append_to_transcript(
+                            session_entry.session_id,
+                            {"role": "assistant", "content": response, "timestamp": ts}
+                        )
+                else:
+                    # The agent already persisted these messages to SQLite via
+                    # _flush_messages_to_session_db(), so skip the DB write here
+                    # to prevent the duplicate-write bug (#860).  We still write
+                    # to JSONL for backward compatibility and as a backup.
+                    agent_persisted = self._session_db is not None
+                    for msg in new_messages:
+                        # Skip system messages (they're rebuilt each run)
+                        if msg.get("role") == "system":
+                            continue
+                        # Add timestamp to each message for debugging
+                        entry = {**msg, "timestamp": ts}
+                        self.session_store.append_to_transcript(
+                            session_entry.session_id, entry,
+                            skip_db=agent_persisted,
+                        )
            
            # Update session with actual prompt token count and model from the agent
            self.session_store.update_session(
@ -2005,6 +2044,18 @@ class GatewayRunner:
                status_hint = " You are being rate-limited. Please wait a moment and try again."
            elif status_code == 529:
                status_hint = " The API is temporarily overloaded. Please try again shortly."
+            elif status_code == 400:
+                # 400 with a large session is almost always a context overflow.
+                # Give specific guidance instead of a generic error. (#1630)
+                _hist_len = len(history) if 'history' in locals() else 0
+                if _hist_len > 50:
+                    return (
+                        "⚠️ Session too large for the model's context window.\n"
+                        "Use /compact to compress the conversation, or "
+                        "/reset to start fresh."
+                    )
+                else:
+                    status_hint = " The request was rejected by the API."
            return (
                f"Sorry, I encountered an error ({error_type}).\n"
                f"{error_detail}\n"
--- a/run_agent.py
+++ b/run_agent.py
@ -5492,6 +5492,27 @@ class AIAgent:
                        'request entity too large',  # OpenRouter/Nous 413 safety net
                        'prompt is too long',  # Anthropic: "prompt is too long: N tokens > M maximum"
                    ])
+
+                    # Fallback heuristic: Anthropic sometimes returns a generic
+                    # 400 invalid_request_error with just "Error" as the message
+                    # when the context is too large.  If the error message is very
+                    # short/generic AND the session is large, treat it as a
+                    # probable context-length error and attempt compression rather
+                    # than aborting.  This prevents an infinite failure loop where
+                    # each failed message gets persisted, making the session even
+                    # larger. (#1630)
+                    if not is_context_length_error and status_code == 400:
+                        ctx_len = getattr(getattr(self, 'context_compressor', None), 'context_length', 200000)
+                        is_large_session = approx_tokens > ctx_len * 0.4 or len(api_messages) > 80
+                        is_generic_error = len(error_msg.strip()) < 30  # e.g. just "error"
+                        if is_large_session and is_generic_error:
+                            is_context_length_error = True
+                            self._vprint(
+                                f"{self.log_prefix}⚠️  Generic 400 with large session "
+                                f"(~{approx_tokens:,} tokens, {len(api_messages)} msgs) — "
+                                f"treating as probable context overflow.",
+                                force=True,
+                            )
                    
                    if is_context_length_error:
                        compressor = self.context_compressor
@ -5591,7 +5612,19 @@ class AIAgent:
                        self._vprint(f"{self.log_prefix}❌ Non-retryable client error detected. Aborting immediately.", force=True)
                        self._vprint(f"{self.log_prefix}   💡 This type of error won't be fixed by retrying.", force=True)
                        logging.error(f"{self.log_prefix}Non-retryable client error: {api_error}")
-                        self._persist_session(messages, conversation_history)
+                        # Skip session persistence when the error is likely
+                        # context-overflow related (status 400 + large session).
+                        # Persisting the failed user message would make the
+                        # session even larger, causing the same failure on the
+                        # next attempt. (#1630)
+                        if status_code == 400 and (approx_tokens > 50000 or len(api_messages) > 80):
+                            self._vprint(
+                                f"{self.log_prefix}⚠️  Skipping session persistence "
+                                f"for large failed session to prevent growth loop.",
+                                force=True,
+                            )
+                        else:
+                            self._persist_session(messages, conversation_history)
                        return {
                            "final_response": None,
                            "messages": messages,
--- a/tests/test_1630_context_overflow_loop.py
+++ b/tests/test_1630_context_overflow_loop.py
@ -0,0 +1,268 @@
+"""Tests for #1630 — gateway infinite 400 failure loop prevention.
+
+Verifies that:
+1. Generic 400 errors with large sessions are treated as context-length errors
+   and trigger compression instead of aborting.
+2. The gateway does not persist messages when the agent fails early, preventing
+   the session from growing on each failure.
+3. Context-overflow failures produce helpful error messages suggesting /compact.
+"""
+
+import pytest
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+
+
+# ---------------------------------------------------------------------------
+# Test 1: Agent heuristic — generic 400 with large session → compression
+# ---------------------------------------------------------------------------
+
+
+class TestGeneric400Heuristic:
+    """The agent should treat a generic 400 with a large session as a
+    probable context-length error and trigger compression, not abort."""
+
+    def _make_agent(self):
+        """Create a minimal AIAgent for testing error handling."""
+        with (
+            patch("run_agent.get_tool_definitions", return_value=[]),
+            patch("run_agent.check_toolset_requirements", return_value={}),
+            patch("run_agent.OpenAI"),
+        ):
+            from run_agent import AIAgent
+            a = AIAgent(
+                api_key="test-key-12345",
+                quiet_mode=True,
+                skip_context_files=True,
+                skip_memory=True,
+            )
+            a.client = MagicMock()
+            a._cached_system_prompt = "You are helpful."
+            a._use_prompt_caching = False
+            a.tool_delay = 0
+            a.compression_enabled = False
+            return a
+
+    def test_generic_400_with_small_session_is_client_error(self):
+        """A generic 400 with a small session should still be treated
+        as a non-retryable client error (not context overflow)."""
+        error_msg = "error"
+        status_code = 400
+        approx_tokens = 1000  # Small session
+        api_messages = [{"role": "user", "content": "hi"}]
+
+        # Simulate the phrase matching
+        is_context_length_error = any(phrase in error_msg for phrase in [
+            'context length', 'context size', 'maximum context',
+            'token limit', 'too many tokens', 'reduce the length',
+            'exceeds the limit', 'context window',
+            'request entity too large',
+            'prompt is too long',
+        ])
+        assert not is_context_length_error
+
+        # The heuristic should NOT trigger for small sessions
+        ctx_len = 200000
+        is_large_session = approx_tokens > ctx_len * 0.4 or len(api_messages) > 80
+        is_generic_error = len(error_msg.strip()) < 30
+        assert not is_large_session  # Small session → heuristic doesn't fire
+
+    def test_generic_400_with_large_token_count_triggers_heuristic(self):
+        """A generic 400 with high token count should be treated as
+        probable context overflow."""
+        error_msg = "error"
+        status_code = 400
+        ctx_len = 200000
+        approx_tokens = 100000  # > 40% of 200k
+        api_messages = [{"role": "user", "content": "hi"}] * 20
+
+        is_context_length_error = any(phrase in error_msg for phrase in [
+            'context length', 'context size', 'maximum context',
+        ])
+        assert not is_context_length_error
+
+        # Heuristic check
+        is_large_session = approx_tokens > ctx_len * 0.4 or len(api_messages) > 80
+        is_generic_error = len(error_msg.strip()) < 30
+        assert is_large_session
+        assert is_generic_error
+        # Both conditions true → should be treated as context overflow
+
+    def test_generic_400_with_many_messages_triggers_heuristic(self):
+        """A generic 400 with >80 messages should trigger the heuristic
+        even if estimated tokens are low."""
+        error_msg = "error"
+        status_code = 400
+        ctx_len = 200000
+        approx_tokens = 5000  # Low token estimate
+        api_messages = [{"role": "user", "content": "x"}] * 100  # > 80 messages
+
+        is_large_session = approx_tokens > ctx_len * 0.4 or len(api_messages) > 80
+        is_generic_error = len(error_msg.strip()) < 30
+        assert is_large_session
+        assert is_generic_error
+
+    def test_specific_error_message_bypasses_heuristic(self):
+        """A 400 with a specific, long error message should NOT trigger
+        the heuristic even with a large session."""
+        error_msg = "invalid model: anthropic/claude-nonexistent-model is not available"
+        status_code = 400
+        ctx_len = 200000
+        approx_tokens = 100000
+
+        is_generic_error = len(error_msg.strip()) < 30
+        assert not is_generic_error  # Long specific message → heuristic doesn't fire
+
+    def test_descriptive_context_error_caught_by_phrases(self):
+        """Descriptive context-length errors should still be caught by
+        the existing phrase matching (not the heuristic)."""
+        error_msg = "prompt is too long: 250000 tokens > 200000 maximum"
+        is_context_length_error = any(phrase in error_msg for phrase in [
+            'context length', 'context size', 'maximum context',
+            'token limit', 'too many tokens', 'reduce the length',
+            'exceeds the limit', 'context window',
+            'request entity too large',
+            'prompt is too long',
+        ])
+        assert is_context_length_error
+
+
+# ---------------------------------------------------------------------------
+# Test 2: Gateway skips persistence on failed agent results
+# ---------------------------------------------------------------------------
+
+class TestGatewaySkipsPersistenceOnFailure:
+    """When the agent returns failed=True with no final_response,
+    the gateway should NOT persist messages to the transcript."""
+
+    def test_agent_failed_early_detected(self):
+        """The agent_failed_early flag is True when failed=True and
+        no final_response."""
+        agent_result = {
+            "failed": True,
+            "final_response": None,
+            "messages": [],
+            "error": "Non-retryable client error",
+        }
+        agent_failed_early = (
+            agent_result.get("failed")
+            and not agent_result.get("final_response")
+        )
+        assert agent_failed_early
+
+    def test_agent_with_response_not_failed_early(self):
+        """When the agent has a final_response, it's not a failed-early
+        scenario even if failed=True."""
+        agent_result = {
+            "failed": True,
+            "final_response": "Here is a partial response",
+            "messages": [],
+        }
+        agent_failed_early = (
+            agent_result.get("failed")
+            and not agent_result.get("final_response")
+        )
+        assert not agent_failed_early
+
+    def test_successful_agent_not_failed_early(self):
+        """A successful agent result should not trigger skip."""
+        agent_result = {
+            "final_response": "Hello!",
+            "messages": [{"role": "assistant", "content": "Hello!"}],
+        }
+        agent_failed_early = (
+            agent_result.get("failed")
+            and not agent_result.get("final_response")
+        )
+        assert not agent_failed_early
+
+
+# ---------------------------------------------------------------------------
+# Test 3: Context-overflow error messages
+# ---------------------------------------------------------------------------
+
+class TestContextOverflowErrorMessages:
+    """The gateway should produce helpful error messages when the failure
+    looks like a context overflow."""
+
+    def test_detects_context_keywords(self):
+        """Error messages containing context-related keywords should be
+        identified as context failures."""
+        keywords = [
+            "context length exceeded",
+            "too many tokens in the prompt",
+            "request entity too large",
+            "payload too large for model",
+            "context window exceeded",
+        ]
+        for error_str in keywords:
+            _is_ctx_fail = any(p in error_str.lower() for p in (
+                "context", "token", "too large", "too long",
+                "exceed", "payload",
+            ))
+            assert _is_ctx_fail, f"Should detect: {error_str}"
+
+    def test_detects_generic_400_with_large_history(self):
+        """A generic 400 error code in the string with a large history
+        should be flagged as context failure."""
+        error_str = "error code: 400 - {'type': 'error', 'message': 'Error'}"
+        history_len = 100  # Large session
+
+        _is_ctx_fail = any(p in error_str.lower() for p in (
+            "context", "token", "too large", "too long",
+            "exceed", "payload",
+        )) or (
+            "400" in error_str.lower()
+            and history_len > 50
+        )
+        assert _is_ctx_fail
+
+    def test_unrelated_error_not_flagged(self):
+        """Unrelated errors should not be flagged as context failures."""
+        error_str = "invalid api key: authentication failed"
+        history_len = 10
+
+        _is_ctx_fail = any(p in error_str.lower() for p in (
+            "context", "token", "too large", "too long",
+            "exceed", "payload",
+        )) or (
+            "400" in error_str.lower()
+            and history_len > 50
+        )
+        assert not _is_ctx_fail
+
+
+# ---------------------------------------------------------------------------
+# Test 4: Agent skips persistence for large failed sessions
+# ---------------------------------------------------------------------------
+
+class TestAgentSkipsPersistenceForLargeFailedSessions:
+    """When a 400 error occurs and the session is large, the agent
+    should skip persisting to prevent the growth loop."""
+
+    def test_large_session_400_skips_persistence(self):
+        """Status 400 + high token count should skip persistence."""
+        status_code = 400
+        approx_tokens = 60000  # > 50000 threshold
+        api_messages = [{"role": "user", "content": "x"}] * 10
+
+        should_skip = status_code == 400 and (approx_tokens > 50000 or len(api_messages) > 80)
+        assert should_skip
+
+    def test_small_session_400_persists_normally(self):
+        """Status 400 + small session should still persist."""
+        status_code = 400
+        approx_tokens = 5000  # < 50000
+        api_messages = [{"role": "user", "content": "x"}] * 10  # < 80
+
+        should_skip = status_code == 400 and (approx_tokens > 50000 or len(api_messages) > 80)
+        assert not should_skip
+
+    def test_non_400_error_persists_normally(self):
+        """Non-400 errors should always persist normally."""
+        status_code = 401  # Auth error
+        approx_tokens = 100000  # Large session, but not a 400
+        api_messages = [{"role": "user", "content": "x"}] * 100
+
+        should_skip = status_code == 400 and (approx_tokens > 50000 or len(api_messages) > 80)
+        assert not should_skip
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@ -169,6 +169,27 @@ def clear_file_ops_cache(task_id: str = None):
 def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = "default") -> str:
    """Read a file with pagination and line numbers."""
    try:
+        # Security: block direct reads of internal Hermes cache/index files
+        # to prevent prompt injection via catalog or hub metadata files.
+        import pathlib as _pathlib
+        _resolved = _pathlib.Path(path).expanduser().resolve()
+        _hermes_home = _pathlib.Path("~/.hermes").expanduser().resolve()
+        _blocked_dirs = [
+            _hermes_home / "skills" / ".hub" / "index-cache",
+            _hermes_home / "skills" / ".hub",
+        ]
+        for _blocked in _blocked_dirs:
+            try:
+                _resolved.relative_to(_blocked)
+                return json.dumps({
+                    "error": (
+                        f"Access denied: {path} is an internal Hermes cache file "
+                        "and cannot be read directly to prevent prompt injection. "
+                        "Use the skills_list or skill_view tools instead."
+                    )
+                })
+            except ValueError:
+                pass
        file_ops = _get_file_ops(task_id)
        result = file_ops.read_file(path, offset, limit)
        if result.content:
--- a/tools/skills_tool.py
+++ b/tools/skills_tool.py
@ -873,6 +873,37 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
                ensure_ascii=False,
            )

+        # Security: warn if skill is loaded from outside the trusted skills directory
+        try:
+            skill_md.resolve().relative_to(SKILLS_DIR.resolve())
+            _outside_skills_dir = False
+        except ValueError:
+            _outside_skills_dir = True
+
+        # Security: detect common prompt injection patterns
+        _INJECTION_PATTERNS = [
+            "ignore previous instructions",
+            "ignore all previous",
+            "you are now",
+            "disregard your",
+            "forget your instructions",
+            "new instructions:",
+            "system prompt:",
+            "<system>",
+            "]]>",
+        ]
+        _content_lower = content.lower()
+        _injection_detected = any(p in _content_lower for p in _INJECTION_PATTERNS)
+
+        if _outside_skills_dir or _injection_detected:
+            _warnings = []
+            if _outside_skills_dir:
+                _warnings.append(f"skill file is outside the trusted skills directory (~/.hermes/skills/): {skill_md}")
+            if _injection_detected:
+                _warnings.append("skill content contains patterns that may indicate prompt injection")
+            import logging as _logging
+            _logging.getLogger(__name__).warning("Skill security warning for '%s': %s", name, "; ".join(_warnings))
+
        parsed_frontmatter: Dict[str, Any] = {}
        try:
            parsed_frontmatter, _ = _parse_frontmatter(content)