fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)

* fix: prevent infinite 400 failure loop on context overflow (#1630)

When a gateway session exceeds the model's context window, Anthropic may
return a generic 400 invalid_request_error with just 'Error' as the
message.  This bypassed the phrase-based context-length detection,
causing the agent to treat it as a non-retryable client error.  Worse,
the failed user message was still persisted to the transcript, making
the session even larger on each attempt — creating an infinite loop.

Three-layer fix:

1. run_agent.py — Fallback heuristic: when a 400 error has a very short
   generic message AND the session is large (>40% of context or >80
   messages), treat it as a probable context overflow and trigger
   compression instead of aborting.

2. run_agent.py + gateway/run.py — Don't persist failed messages:
   when the agent returns failed=True before generating any response,
   skip writing the user's message to the transcript/DB. This prevents
   the session from growing on each failure.

3. gateway/run.py — Smarter error messages: detect context-overflow
   failures and suggest /compact or /reset specifically, instead of a
   generic 'try again' that will fail identically.

* fix(skills): detect prompt injection patterns and block cache file reads

Adds two security layers to prevent prompt injection via skills hub
cache files (#1558):

1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory
   (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json
   was the original injection vector — untrusted skill descriptions
   in the catalog contained adversarial text that the model executed.

2. skill_view: warns when skills are loaded from outside the trusted
   ~/.hermes/skills/ directory, and detects common injection patterns
   in skill content ("ignore previous instructions", "<system>", etc.).

Cherry-picked from PR #1562 by ygd58.

---------

Co-authored-by: buray <ygd58@users.noreply.github.com>
This commit is contained in:
Teknium 2026-03-17 01:50:59 -07:00 committed by GitHub
parent 2d36819503
commit 96dac22194
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 436 additions and 32 deletions

View file

@ -1869,11 +1869,31 @@ class GatewayRunner:
# Surface error details when the agent failed silently (final_response=None)
if not response and agent_result.get("failed"):
error_detail = agent_result.get("error", "unknown error")
response = (
f"The request failed: {str(error_detail)[:300]}\n"
"Try again or use /reset to start a fresh session."
error_str = str(error_detail).lower()
# Detect context-overflow failures and give specific guidance.
# Generic 400 "Error" from Anthropic with large sessions is the
# most common cause of this (#1630).
_is_ctx_fail = any(p in error_str for p in (
"context", "token", "too large", "too long",
"exceed", "payload",
)) or (
"400" in error_str
and len(history) > 50
)
if _is_ctx_fail:
response = (
"⚠️ Session too large for the model's context window.\n"
"Use /compact to compress the conversation, or "
"/reset to start fresh."
)
else:
response = (
f"The request failed: {str(error_detail)[:300]}\n"
"Try again or use /reset to start a fresh session."
)
# If the agent's session_id changed during compression, update
# session_entry so transcript writes below go to the right session.
if agent_result.get("session_id") and agent_result["session_id"] != session_entry.session_id:
@ -1920,12 +1940,30 @@ class GatewayRunner:
# This preserves the complete agent loop (tool_calls, tool results,
# intermediate reasoning) so sessions can be resumed with full context
# and transcripts are useful for debugging and training data.
#
# IMPORTANT: When the agent failed before producing any response
# (e.g. context-overflow 400), do NOT persist the user's message.
# Persisting it would make the session even larger, causing the
# same failure on the next attempt — an infinite loop. (#1630)
agent_failed_early = (
agent_result.get("failed")
and not agent_result.get("final_response")
)
if agent_failed_early:
logger.info(
"Skipping transcript persistence for failed request in "
"session %s to prevent session growth loop.",
session_entry.session_id,
)
ts = datetime.now().isoformat()
# If this is a fresh session (no history), write the full tool
# definitions as the first entry so the transcript is self-describing
# -- the same list of dicts sent as tools=[...] in the API request.
if not history:
if agent_failed_early:
pass # Skip all transcript writes — don't grow a broken session
elif not history:
tool_defs = agent_result.get("tools", [])
self.session_store.append_to_transcript(
session_entry.session_id,
@ -1942,36 +1980,37 @@ class GatewayRunner:
# Use the filtered history length (history_offset) that was actually
# passed to the agent, not len(history) which includes session_meta
# entries that were stripped before the agent saw them.
history_len = agent_result.get("history_offset", len(history))
new_messages = agent_messages[history_len:] if len(agent_messages) > history_len else []
# If no new messages found (edge case), fall back to simple user/assistant
if not new_messages:
self.session_store.append_to_transcript(
session_entry.session_id,
{"role": "user", "content": message_text, "timestamp": ts}
)
if response:
if not agent_failed_early:
history_len = agent_result.get("history_offset", len(history))
new_messages = agent_messages[history_len:] if len(agent_messages) > history_len else []
# If no new messages found (edge case), fall back to simple user/assistant
if not new_messages:
self.session_store.append_to_transcript(
session_entry.session_id,
{"role": "assistant", "content": response, "timestamp": ts}
)
else:
# The agent already persisted these messages to SQLite via
# _flush_messages_to_session_db(), so skip the DB write here
# to prevent the duplicate-write bug (#860). We still write
# to JSONL for backward compatibility and as a backup.
agent_persisted = self._session_db is not None
for msg in new_messages:
# Skip system messages (they're rebuilt each run)
if msg.get("role") == "system":
continue
# Add timestamp to each message for debugging
entry = {**msg, "timestamp": ts}
self.session_store.append_to_transcript(
session_entry.session_id, entry,
skip_db=agent_persisted,
{"role": "user", "content": message_text, "timestamp": ts}
)
if response:
self.session_store.append_to_transcript(
session_entry.session_id,
{"role": "assistant", "content": response, "timestamp": ts}
)
else:
# The agent already persisted these messages to SQLite via
# _flush_messages_to_session_db(), so skip the DB write here
# to prevent the duplicate-write bug (#860). We still write
# to JSONL for backward compatibility and as a backup.
agent_persisted = self._session_db is not None
for msg in new_messages:
# Skip system messages (they're rebuilt each run)
if msg.get("role") == "system":
continue
# Add timestamp to each message for debugging
entry = {**msg, "timestamp": ts}
self.session_store.append_to_transcript(
session_entry.session_id, entry,
skip_db=agent_persisted,
)
# Update session with actual prompt token count and model from the agent
self.session_store.update_session(
@ -2005,6 +2044,18 @@ class GatewayRunner:
status_hint = " You are being rate-limited. Please wait a moment and try again."
elif status_code == 529:
status_hint = " The API is temporarily overloaded. Please try again shortly."
elif status_code == 400:
# 400 with a large session is almost always a context overflow.
# Give specific guidance instead of a generic error. (#1630)
_hist_len = len(history) if 'history' in locals() else 0
if _hist_len > 50:
return (
"⚠️ Session too large for the model's context window.\n"
"Use /compact to compress the conversation, or "
"/reset to start fresh."
)
else:
status_hint = " The request was rejected by the API."
return (
f"Sorry, I encountered an error ({error_type}).\n"
f"{error_detail}\n"

View file

@ -5492,6 +5492,27 @@ class AIAgent:
'request entity too large', # OpenRouter/Nous 413 safety net
'prompt is too long', # Anthropic: "prompt is too long: N tokens > M maximum"
])
# Fallback heuristic: Anthropic sometimes returns a generic
# 400 invalid_request_error with just "Error" as the message
# when the context is too large. If the error message is very
# short/generic AND the session is large, treat it as a
# probable context-length error and attempt compression rather
# than aborting. This prevents an infinite failure loop where
# each failed message gets persisted, making the session even
# larger. (#1630)
if not is_context_length_error and status_code == 400:
ctx_len = getattr(getattr(self, 'context_compressor', None), 'context_length', 200000)
is_large_session = approx_tokens > ctx_len * 0.4 or len(api_messages) > 80
is_generic_error = len(error_msg.strip()) < 30 # e.g. just "error"
if is_large_session and is_generic_error:
is_context_length_error = True
self._vprint(
f"{self.log_prefix}⚠️ Generic 400 with large session "
f"(~{approx_tokens:,} tokens, {len(api_messages)} msgs) — "
f"treating as probable context overflow.",
force=True,
)
if is_context_length_error:
compressor = self.context_compressor
@ -5591,7 +5612,19 @@ class AIAgent:
self._vprint(f"{self.log_prefix}❌ Non-retryable client error detected. Aborting immediately.", force=True)
self._vprint(f"{self.log_prefix} 💡 This type of error won't be fixed by retrying.", force=True)
logging.error(f"{self.log_prefix}Non-retryable client error: {api_error}")
self._persist_session(messages, conversation_history)
# Skip session persistence when the error is likely
# context-overflow related (status 400 + large session).
# Persisting the failed user message would make the
# session even larger, causing the same failure on the
# next attempt. (#1630)
if status_code == 400 and (approx_tokens > 50000 or len(api_messages) > 80):
self._vprint(
f"{self.log_prefix}⚠️ Skipping session persistence "
f"for large failed session to prevent growth loop.",
force=True,
)
else:
self._persist_session(messages, conversation_history)
return {
"final_response": None,
"messages": messages,

View file

@ -0,0 +1,268 @@
"""Tests for #1630 — gateway infinite 400 failure loop prevention.
Verifies that:
1. Generic 400 errors with large sessions are treated as context-length errors
and trigger compression instead of aborting.
2. The gateway does not persist messages when the agent fails early, preventing
the session from growing on each failure.
3. Context-overflow failures produce helpful error messages suggesting /compact.
"""
import pytest
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
# ---------------------------------------------------------------------------
# Test 1: Agent heuristic — generic 400 with large session → compression
# ---------------------------------------------------------------------------
class TestGeneric400Heuristic:
"""The agent should treat a generic 400 with a large session as a
probable context-length error and trigger compression, not abort."""
def _make_agent(self):
"""Create a minimal AIAgent for testing error handling."""
with (
patch("run_agent.get_tool_definitions", return_value=[]),
patch("run_agent.check_toolset_requirements", return_value={}),
patch("run_agent.OpenAI"),
):
from run_agent import AIAgent
a = AIAgent(
api_key="test-key-12345",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
a.client = MagicMock()
a._cached_system_prompt = "You are helpful."
a._use_prompt_caching = False
a.tool_delay = 0
a.compression_enabled = False
return a
def test_generic_400_with_small_session_is_client_error(self):
"""A generic 400 with a small session should still be treated
as a non-retryable client error (not context overflow)."""
error_msg = "error"
status_code = 400
approx_tokens = 1000 # Small session
api_messages = [{"role": "user", "content": "hi"}]
# Simulate the phrase matching
is_context_length_error = any(phrase in error_msg for phrase in [
'context length', 'context size', 'maximum context',
'token limit', 'too many tokens', 'reduce the length',
'exceeds the limit', 'context window',
'request entity too large',
'prompt is too long',
])
assert not is_context_length_error
# The heuristic should NOT trigger for small sessions
ctx_len = 200000
is_large_session = approx_tokens > ctx_len * 0.4 or len(api_messages) > 80
is_generic_error = len(error_msg.strip()) < 30
assert not is_large_session # Small session → heuristic doesn't fire
def test_generic_400_with_large_token_count_triggers_heuristic(self):
"""A generic 400 with high token count should be treated as
probable context overflow."""
error_msg = "error"
status_code = 400
ctx_len = 200000
approx_tokens = 100000 # > 40% of 200k
api_messages = [{"role": "user", "content": "hi"}] * 20
is_context_length_error = any(phrase in error_msg for phrase in [
'context length', 'context size', 'maximum context',
])
assert not is_context_length_error
# Heuristic check
is_large_session = approx_tokens > ctx_len * 0.4 or len(api_messages) > 80
is_generic_error = len(error_msg.strip()) < 30
assert is_large_session
assert is_generic_error
# Both conditions true → should be treated as context overflow
def test_generic_400_with_many_messages_triggers_heuristic(self):
"""A generic 400 with >80 messages should trigger the heuristic
even if estimated tokens are low."""
error_msg = "error"
status_code = 400
ctx_len = 200000
approx_tokens = 5000 # Low token estimate
api_messages = [{"role": "user", "content": "x"}] * 100 # > 80 messages
is_large_session = approx_tokens > ctx_len * 0.4 or len(api_messages) > 80
is_generic_error = len(error_msg.strip()) < 30
assert is_large_session
assert is_generic_error
def test_specific_error_message_bypasses_heuristic(self):
"""A 400 with a specific, long error message should NOT trigger
the heuristic even with a large session."""
error_msg = "invalid model: anthropic/claude-nonexistent-model is not available"
status_code = 400
ctx_len = 200000
approx_tokens = 100000
is_generic_error = len(error_msg.strip()) < 30
assert not is_generic_error # Long specific message → heuristic doesn't fire
def test_descriptive_context_error_caught_by_phrases(self):
"""Descriptive context-length errors should still be caught by
the existing phrase matching (not the heuristic)."""
error_msg = "prompt is too long: 250000 tokens > 200000 maximum"
is_context_length_error = any(phrase in error_msg for phrase in [
'context length', 'context size', 'maximum context',
'token limit', 'too many tokens', 'reduce the length',
'exceeds the limit', 'context window',
'request entity too large',
'prompt is too long',
])
assert is_context_length_error
# ---------------------------------------------------------------------------
# Test 2: Gateway skips persistence on failed agent results
# ---------------------------------------------------------------------------
class TestGatewaySkipsPersistenceOnFailure:
"""When the agent returns failed=True with no final_response,
the gateway should NOT persist messages to the transcript."""
def test_agent_failed_early_detected(self):
"""The agent_failed_early flag is True when failed=True and
no final_response."""
agent_result = {
"failed": True,
"final_response": None,
"messages": [],
"error": "Non-retryable client error",
}
agent_failed_early = (
agent_result.get("failed")
and not agent_result.get("final_response")
)
assert agent_failed_early
def test_agent_with_response_not_failed_early(self):
"""When the agent has a final_response, it's not a failed-early
scenario even if failed=True."""
agent_result = {
"failed": True,
"final_response": "Here is a partial response",
"messages": [],
}
agent_failed_early = (
agent_result.get("failed")
and not agent_result.get("final_response")
)
assert not agent_failed_early
def test_successful_agent_not_failed_early(self):
"""A successful agent result should not trigger skip."""
agent_result = {
"final_response": "Hello!",
"messages": [{"role": "assistant", "content": "Hello!"}],
}
agent_failed_early = (
agent_result.get("failed")
and not agent_result.get("final_response")
)
assert not agent_failed_early
# ---------------------------------------------------------------------------
# Test 3: Context-overflow error messages
# ---------------------------------------------------------------------------
class TestContextOverflowErrorMessages:
"""The gateway should produce helpful error messages when the failure
looks like a context overflow."""
def test_detects_context_keywords(self):
"""Error messages containing context-related keywords should be
identified as context failures."""
keywords = [
"context length exceeded",
"too many tokens in the prompt",
"request entity too large",
"payload too large for model",
"context window exceeded",
]
for error_str in keywords:
_is_ctx_fail = any(p in error_str.lower() for p in (
"context", "token", "too large", "too long",
"exceed", "payload",
))
assert _is_ctx_fail, f"Should detect: {error_str}"
def test_detects_generic_400_with_large_history(self):
"""A generic 400 error code in the string with a large history
should be flagged as context failure."""
error_str = "error code: 400 - {'type': 'error', 'message': 'Error'}"
history_len = 100 # Large session
_is_ctx_fail = any(p in error_str.lower() for p in (
"context", "token", "too large", "too long",
"exceed", "payload",
)) or (
"400" in error_str.lower()
and history_len > 50
)
assert _is_ctx_fail
def test_unrelated_error_not_flagged(self):
"""Unrelated errors should not be flagged as context failures."""
error_str = "invalid api key: authentication failed"
history_len = 10
_is_ctx_fail = any(p in error_str.lower() for p in (
"context", "token", "too large", "too long",
"exceed", "payload",
)) or (
"400" in error_str.lower()
and history_len > 50
)
assert not _is_ctx_fail
# ---------------------------------------------------------------------------
# Test 4: Agent skips persistence for large failed sessions
# ---------------------------------------------------------------------------
class TestAgentSkipsPersistenceForLargeFailedSessions:
"""When a 400 error occurs and the session is large, the agent
should skip persisting to prevent the growth loop."""
def test_large_session_400_skips_persistence(self):
"""Status 400 + high token count should skip persistence."""
status_code = 400
approx_tokens = 60000 # > 50000 threshold
api_messages = [{"role": "user", "content": "x"}] * 10
should_skip = status_code == 400 and (approx_tokens > 50000 or len(api_messages) > 80)
assert should_skip
def test_small_session_400_persists_normally(self):
"""Status 400 + small session should still persist."""
status_code = 400
approx_tokens = 5000 # < 50000
api_messages = [{"role": "user", "content": "x"}] * 10 # < 80
should_skip = status_code == 400 and (approx_tokens > 50000 or len(api_messages) > 80)
assert not should_skip
def test_non_400_error_persists_normally(self):
"""Non-400 errors should always persist normally."""
status_code = 401 # Auth error
approx_tokens = 100000 # Large session, but not a 400
api_messages = [{"role": "user", "content": "x"}] * 100
should_skip = status_code == 400 and (approx_tokens > 50000 or len(api_messages) > 80)
assert not should_skip

View file

@ -169,6 +169,27 @@ def clear_file_ops_cache(task_id: str = None):
def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = "default") -> str:
"""Read a file with pagination and line numbers."""
try:
# Security: block direct reads of internal Hermes cache/index files
# to prevent prompt injection via catalog or hub metadata files.
import pathlib as _pathlib
_resolved = _pathlib.Path(path).expanduser().resolve()
_hermes_home = _pathlib.Path("~/.hermes").expanduser().resolve()
_blocked_dirs = [
_hermes_home / "skills" / ".hub" / "index-cache",
_hermes_home / "skills" / ".hub",
]
for _blocked in _blocked_dirs:
try:
_resolved.relative_to(_blocked)
return json.dumps({
"error": (
f"Access denied: {path} is an internal Hermes cache file "
"and cannot be read directly to prevent prompt injection. "
"Use the skills_list or skill_view tools instead."
)
})
except ValueError:
pass
file_ops = _get_file_ops(task_id)
result = file_ops.read_file(path, offset, limit)
if result.content:

View file

@ -873,6 +873,37 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
ensure_ascii=False,
)
# Security: warn if skill is loaded from outside the trusted skills directory
try:
skill_md.resolve().relative_to(SKILLS_DIR.resolve())
_outside_skills_dir = False
except ValueError:
_outside_skills_dir = True
# Security: detect common prompt injection patterns
_INJECTION_PATTERNS = [
"ignore previous instructions",
"ignore all previous",
"you are now",
"disregard your",
"forget your instructions",
"new instructions:",
"system prompt:",
"<system>",
"]]>",
]
_content_lower = content.lower()
_injection_detected = any(p in _content_lower for p in _INJECTION_PATTERNS)
if _outside_skills_dir or _injection_detected:
_warnings = []
if _outside_skills_dir:
_warnings.append(f"skill file is outside the trusted skills directory (~/.hermes/skills/): {skill_md}")
if _injection_detected:
_warnings.append("skill content contains patterns that may indicate prompt injection")
import logging as _logging
_logging.getLogger(__name__).warning("Skill security warning for '%s': %s", name, "; ".join(_warnings))
parsed_frontmatter: Dict[str, Any] = {}
try:
parsed_frontmatter, _ = _parse_frontmatter(content)