mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)
* fix: prevent infinite 400 failure loop on context overflow (#1630) When a gateway session exceeds the model's context window, Anthropic may return a generic 400 invalid_request_error with just 'Error' as the message. This bypassed the phrase-based context-length detection, causing the agent to treat it as a non-retryable client error. Worse, the failed user message was still persisted to the transcript, making the session even larger on each attempt — creating an infinite loop. Three-layer fix: 1. run_agent.py — Fallback heuristic: when a 400 error has a very short generic message AND the session is large (>40% of context or >80 messages), treat it as a probable context overflow and trigger compression instead of aborting. 2. run_agent.py + gateway/run.py — Don't persist failed messages: when the agent returns failed=True before generating any response, skip writing the user's message to the transcript/DB. This prevents the session from growing on each failure. 3. gateway/run.py — Smarter error messages: detect context-overflow failures and suggest /compact or /reset specifically, instead of a generic 'try again' that will fail identically. * fix(skills): detect prompt injection patterns and block cache file reads Adds two security layers to prevent prompt injection via skills hub cache files (#1558): 1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json was the original injection vector — untrusted skill descriptions in the catalog contained adversarial text that the model executed. 2. skill_view: warns when skills are loaded from outside the trusted ~/.hermes/skills/ directory, and detects common injection patterns in skill content ("ignore previous instructions", "<system>", etc.). Cherry-picked from PR #1562 by ygd58. --------- Co-authored-by: buray <ygd58@users.noreply.github.com>
This commit is contained in:
parent
2d36819503
commit
96dac22194
5 changed files with 436 additions and 32 deletions
113
gateway/run.py
113
gateway/run.py
|
|
@ -1869,11 +1869,31 @@ class GatewayRunner:
|
|||
# Surface error details when the agent failed silently (final_response=None)
|
||||
if not response and agent_result.get("failed"):
|
||||
error_detail = agent_result.get("error", "unknown error")
|
||||
response = (
|
||||
f"The request failed: {str(error_detail)[:300]}\n"
|
||||
"Try again or use /reset to start a fresh session."
|
||||
error_str = str(error_detail).lower()
|
||||
|
||||
# Detect context-overflow failures and give specific guidance.
|
||||
# Generic 400 "Error" from Anthropic with large sessions is the
|
||||
# most common cause of this (#1630).
|
||||
_is_ctx_fail = any(p in error_str for p in (
|
||||
"context", "token", "too large", "too long",
|
||||
"exceed", "payload",
|
||||
)) or (
|
||||
"400" in error_str
|
||||
and len(history) > 50
|
||||
)
|
||||
|
||||
if _is_ctx_fail:
|
||||
response = (
|
||||
"⚠️ Session too large for the model's context window.\n"
|
||||
"Use /compact to compress the conversation, or "
|
||||
"/reset to start fresh."
|
||||
)
|
||||
else:
|
||||
response = (
|
||||
f"The request failed: {str(error_detail)[:300]}\n"
|
||||
"Try again or use /reset to start a fresh session."
|
||||
)
|
||||
|
||||
# If the agent's session_id changed during compression, update
|
||||
# session_entry so transcript writes below go to the right session.
|
||||
if agent_result.get("session_id") and agent_result["session_id"] != session_entry.session_id:
|
||||
|
|
@ -1920,12 +1940,30 @@ class GatewayRunner:
|
|||
# This preserves the complete agent loop (tool_calls, tool results,
|
||||
# intermediate reasoning) so sessions can be resumed with full context
|
||||
# and transcripts are useful for debugging and training data.
|
||||
#
|
||||
# IMPORTANT: When the agent failed before producing any response
|
||||
# (e.g. context-overflow 400), do NOT persist the user's message.
|
||||
# Persisting it would make the session even larger, causing the
|
||||
# same failure on the next attempt — an infinite loop. (#1630)
|
||||
agent_failed_early = (
|
||||
agent_result.get("failed")
|
||||
and not agent_result.get("final_response")
|
||||
)
|
||||
if agent_failed_early:
|
||||
logger.info(
|
||||
"Skipping transcript persistence for failed request in "
|
||||
"session %s to prevent session growth loop.",
|
||||
session_entry.session_id,
|
||||
)
|
||||
|
||||
ts = datetime.now().isoformat()
|
||||
|
||||
# If this is a fresh session (no history), write the full tool
|
||||
# definitions as the first entry so the transcript is self-describing
|
||||
# -- the same list of dicts sent as tools=[...] in the API request.
|
||||
if not history:
|
||||
if agent_failed_early:
|
||||
pass # Skip all transcript writes — don't grow a broken session
|
||||
elif not history:
|
||||
tool_defs = agent_result.get("tools", [])
|
||||
self.session_store.append_to_transcript(
|
||||
session_entry.session_id,
|
||||
|
|
@ -1942,36 +1980,37 @@ class GatewayRunner:
|
|||
# Use the filtered history length (history_offset) that was actually
|
||||
# passed to the agent, not len(history) which includes session_meta
|
||||
# entries that were stripped before the agent saw them.
|
||||
history_len = agent_result.get("history_offset", len(history))
|
||||
new_messages = agent_messages[history_len:] if len(agent_messages) > history_len else []
|
||||
|
||||
# If no new messages found (edge case), fall back to simple user/assistant
|
||||
if not new_messages:
|
||||
self.session_store.append_to_transcript(
|
||||
session_entry.session_id,
|
||||
{"role": "user", "content": message_text, "timestamp": ts}
|
||||
)
|
||||
if response:
|
||||
if not agent_failed_early:
|
||||
history_len = agent_result.get("history_offset", len(history))
|
||||
new_messages = agent_messages[history_len:] if len(agent_messages) > history_len else []
|
||||
|
||||
# If no new messages found (edge case), fall back to simple user/assistant
|
||||
if not new_messages:
|
||||
self.session_store.append_to_transcript(
|
||||
session_entry.session_id,
|
||||
{"role": "assistant", "content": response, "timestamp": ts}
|
||||
)
|
||||
else:
|
||||
# The agent already persisted these messages to SQLite via
|
||||
# _flush_messages_to_session_db(), so skip the DB write here
|
||||
# to prevent the duplicate-write bug (#860). We still write
|
||||
# to JSONL for backward compatibility and as a backup.
|
||||
agent_persisted = self._session_db is not None
|
||||
for msg in new_messages:
|
||||
# Skip system messages (they're rebuilt each run)
|
||||
if msg.get("role") == "system":
|
||||
continue
|
||||
# Add timestamp to each message for debugging
|
||||
entry = {**msg, "timestamp": ts}
|
||||
self.session_store.append_to_transcript(
|
||||
session_entry.session_id, entry,
|
||||
skip_db=agent_persisted,
|
||||
{"role": "user", "content": message_text, "timestamp": ts}
|
||||
)
|
||||
if response:
|
||||
self.session_store.append_to_transcript(
|
||||
session_entry.session_id,
|
||||
{"role": "assistant", "content": response, "timestamp": ts}
|
||||
)
|
||||
else:
|
||||
# The agent already persisted these messages to SQLite via
|
||||
# _flush_messages_to_session_db(), so skip the DB write here
|
||||
# to prevent the duplicate-write bug (#860). We still write
|
||||
# to JSONL for backward compatibility and as a backup.
|
||||
agent_persisted = self._session_db is not None
|
||||
for msg in new_messages:
|
||||
# Skip system messages (they're rebuilt each run)
|
||||
if msg.get("role") == "system":
|
||||
continue
|
||||
# Add timestamp to each message for debugging
|
||||
entry = {**msg, "timestamp": ts}
|
||||
self.session_store.append_to_transcript(
|
||||
session_entry.session_id, entry,
|
||||
skip_db=agent_persisted,
|
||||
)
|
||||
|
||||
# Update session with actual prompt token count and model from the agent
|
||||
self.session_store.update_session(
|
||||
|
|
@ -2005,6 +2044,18 @@ class GatewayRunner:
|
|||
status_hint = " You are being rate-limited. Please wait a moment and try again."
|
||||
elif status_code == 529:
|
||||
status_hint = " The API is temporarily overloaded. Please try again shortly."
|
||||
elif status_code == 400:
|
||||
# 400 with a large session is almost always a context overflow.
|
||||
# Give specific guidance instead of a generic error. (#1630)
|
||||
_hist_len = len(history) if 'history' in locals() else 0
|
||||
if _hist_len > 50:
|
||||
return (
|
||||
"⚠️ Session too large for the model's context window.\n"
|
||||
"Use /compact to compress the conversation, or "
|
||||
"/reset to start fresh."
|
||||
)
|
||||
else:
|
||||
status_hint = " The request was rejected by the API."
|
||||
return (
|
||||
f"Sorry, I encountered an error ({error_type}).\n"
|
||||
f"{error_detail}\n"
|
||||
|
|
|
|||
35
run_agent.py
35
run_agent.py
|
|
@ -5492,6 +5492,27 @@ class AIAgent:
|
|||
'request entity too large', # OpenRouter/Nous 413 safety net
|
||||
'prompt is too long', # Anthropic: "prompt is too long: N tokens > M maximum"
|
||||
])
|
||||
|
||||
# Fallback heuristic: Anthropic sometimes returns a generic
|
||||
# 400 invalid_request_error with just "Error" as the message
|
||||
# when the context is too large. If the error message is very
|
||||
# short/generic AND the session is large, treat it as a
|
||||
# probable context-length error and attempt compression rather
|
||||
# than aborting. This prevents an infinite failure loop where
|
||||
# each failed message gets persisted, making the session even
|
||||
# larger. (#1630)
|
||||
if not is_context_length_error and status_code == 400:
|
||||
ctx_len = getattr(getattr(self, 'context_compressor', None), 'context_length', 200000)
|
||||
is_large_session = approx_tokens > ctx_len * 0.4 or len(api_messages) > 80
|
||||
is_generic_error = len(error_msg.strip()) < 30 # e.g. just "error"
|
||||
if is_large_session and is_generic_error:
|
||||
is_context_length_error = True
|
||||
self._vprint(
|
||||
f"{self.log_prefix}⚠️ Generic 400 with large session "
|
||||
f"(~{approx_tokens:,} tokens, {len(api_messages)} msgs) — "
|
||||
f"treating as probable context overflow.",
|
||||
force=True,
|
||||
)
|
||||
|
||||
if is_context_length_error:
|
||||
compressor = self.context_compressor
|
||||
|
|
@ -5591,7 +5612,19 @@ class AIAgent:
|
|||
self._vprint(f"{self.log_prefix}❌ Non-retryable client error detected. Aborting immediately.", force=True)
|
||||
self._vprint(f"{self.log_prefix} 💡 This type of error won't be fixed by retrying.", force=True)
|
||||
logging.error(f"{self.log_prefix}Non-retryable client error: {api_error}")
|
||||
self._persist_session(messages, conversation_history)
|
||||
# Skip session persistence when the error is likely
|
||||
# context-overflow related (status 400 + large session).
|
||||
# Persisting the failed user message would make the
|
||||
# session even larger, causing the same failure on the
|
||||
# next attempt. (#1630)
|
||||
if status_code == 400 and (approx_tokens > 50000 or len(api_messages) > 80):
|
||||
self._vprint(
|
||||
f"{self.log_prefix}⚠️ Skipping session persistence "
|
||||
f"for large failed session to prevent growth loop.",
|
||||
force=True,
|
||||
)
|
||||
else:
|
||||
self._persist_session(messages, conversation_history)
|
||||
return {
|
||||
"final_response": None,
|
||||
"messages": messages,
|
||||
|
|
|
|||
268
tests/test_1630_context_overflow_loop.py
Normal file
268
tests/test_1630_context_overflow_loop.py
Normal file
|
|
@ -0,0 +1,268 @@
|
|||
"""Tests for #1630 — gateway infinite 400 failure loop prevention.
|
||||
|
||||
Verifies that:
|
||||
1. Generic 400 errors with large sessions are treated as context-length errors
|
||||
and trigger compression instead of aborting.
|
||||
2. The gateway does not persist messages when the agent fails early, preventing
|
||||
the session from growing on each failure.
|
||||
3. Context-overflow failures produce helpful error messages suggesting /compact.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 1: Agent heuristic — generic 400 with large session → compression
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestGeneric400Heuristic:
|
||||
"""The agent should treat a generic 400 with a large session as a
|
||||
probable context-length error and trigger compression, not abort."""
|
||||
|
||||
def _make_agent(self):
|
||||
"""Create a minimal AIAgent for testing error handling."""
|
||||
with (
|
||||
patch("run_agent.get_tool_definitions", return_value=[]),
|
||||
patch("run_agent.check_toolset_requirements", return_value={}),
|
||||
patch("run_agent.OpenAI"),
|
||||
):
|
||||
from run_agent import AIAgent
|
||||
a = AIAgent(
|
||||
api_key="test-key-12345",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
a.client = MagicMock()
|
||||
a._cached_system_prompt = "You are helpful."
|
||||
a._use_prompt_caching = False
|
||||
a.tool_delay = 0
|
||||
a.compression_enabled = False
|
||||
return a
|
||||
|
||||
def test_generic_400_with_small_session_is_client_error(self):
|
||||
"""A generic 400 with a small session should still be treated
|
||||
as a non-retryable client error (not context overflow)."""
|
||||
error_msg = "error"
|
||||
status_code = 400
|
||||
approx_tokens = 1000 # Small session
|
||||
api_messages = [{"role": "user", "content": "hi"}]
|
||||
|
||||
# Simulate the phrase matching
|
||||
is_context_length_error = any(phrase in error_msg for phrase in [
|
||||
'context length', 'context size', 'maximum context',
|
||||
'token limit', 'too many tokens', 'reduce the length',
|
||||
'exceeds the limit', 'context window',
|
||||
'request entity too large',
|
||||
'prompt is too long',
|
||||
])
|
||||
assert not is_context_length_error
|
||||
|
||||
# The heuristic should NOT trigger for small sessions
|
||||
ctx_len = 200000
|
||||
is_large_session = approx_tokens > ctx_len * 0.4 or len(api_messages) > 80
|
||||
is_generic_error = len(error_msg.strip()) < 30
|
||||
assert not is_large_session # Small session → heuristic doesn't fire
|
||||
|
||||
def test_generic_400_with_large_token_count_triggers_heuristic(self):
|
||||
"""A generic 400 with high token count should be treated as
|
||||
probable context overflow."""
|
||||
error_msg = "error"
|
||||
status_code = 400
|
||||
ctx_len = 200000
|
||||
approx_tokens = 100000 # > 40% of 200k
|
||||
api_messages = [{"role": "user", "content": "hi"}] * 20
|
||||
|
||||
is_context_length_error = any(phrase in error_msg for phrase in [
|
||||
'context length', 'context size', 'maximum context',
|
||||
])
|
||||
assert not is_context_length_error
|
||||
|
||||
# Heuristic check
|
||||
is_large_session = approx_tokens > ctx_len * 0.4 or len(api_messages) > 80
|
||||
is_generic_error = len(error_msg.strip()) < 30
|
||||
assert is_large_session
|
||||
assert is_generic_error
|
||||
# Both conditions true → should be treated as context overflow
|
||||
|
||||
def test_generic_400_with_many_messages_triggers_heuristic(self):
|
||||
"""A generic 400 with >80 messages should trigger the heuristic
|
||||
even if estimated tokens are low."""
|
||||
error_msg = "error"
|
||||
status_code = 400
|
||||
ctx_len = 200000
|
||||
approx_tokens = 5000 # Low token estimate
|
||||
api_messages = [{"role": "user", "content": "x"}] * 100 # > 80 messages
|
||||
|
||||
is_large_session = approx_tokens > ctx_len * 0.4 or len(api_messages) > 80
|
||||
is_generic_error = len(error_msg.strip()) < 30
|
||||
assert is_large_session
|
||||
assert is_generic_error
|
||||
|
||||
def test_specific_error_message_bypasses_heuristic(self):
|
||||
"""A 400 with a specific, long error message should NOT trigger
|
||||
the heuristic even with a large session."""
|
||||
error_msg = "invalid model: anthropic/claude-nonexistent-model is not available"
|
||||
status_code = 400
|
||||
ctx_len = 200000
|
||||
approx_tokens = 100000
|
||||
|
||||
is_generic_error = len(error_msg.strip()) < 30
|
||||
assert not is_generic_error # Long specific message → heuristic doesn't fire
|
||||
|
||||
def test_descriptive_context_error_caught_by_phrases(self):
|
||||
"""Descriptive context-length errors should still be caught by
|
||||
the existing phrase matching (not the heuristic)."""
|
||||
error_msg = "prompt is too long: 250000 tokens > 200000 maximum"
|
||||
is_context_length_error = any(phrase in error_msg for phrase in [
|
||||
'context length', 'context size', 'maximum context',
|
||||
'token limit', 'too many tokens', 'reduce the length',
|
||||
'exceeds the limit', 'context window',
|
||||
'request entity too large',
|
||||
'prompt is too long',
|
||||
])
|
||||
assert is_context_length_error
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 2: Gateway skips persistence on failed agent results
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestGatewaySkipsPersistenceOnFailure:
|
||||
"""When the agent returns failed=True with no final_response,
|
||||
the gateway should NOT persist messages to the transcript."""
|
||||
|
||||
def test_agent_failed_early_detected(self):
|
||||
"""The agent_failed_early flag is True when failed=True and
|
||||
no final_response."""
|
||||
agent_result = {
|
||||
"failed": True,
|
||||
"final_response": None,
|
||||
"messages": [],
|
||||
"error": "Non-retryable client error",
|
||||
}
|
||||
agent_failed_early = (
|
||||
agent_result.get("failed")
|
||||
and not agent_result.get("final_response")
|
||||
)
|
||||
assert agent_failed_early
|
||||
|
||||
def test_agent_with_response_not_failed_early(self):
|
||||
"""When the agent has a final_response, it's not a failed-early
|
||||
scenario even if failed=True."""
|
||||
agent_result = {
|
||||
"failed": True,
|
||||
"final_response": "Here is a partial response",
|
||||
"messages": [],
|
||||
}
|
||||
agent_failed_early = (
|
||||
agent_result.get("failed")
|
||||
and not agent_result.get("final_response")
|
||||
)
|
||||
assert not agent_failed_early
|
||||
|
||||
def test_successful_agent_not_failed_early(self):
|
||||
"""A successful agent result should not trigger skip."""
|
||||
agent_result = {
|
||||
"final_response": "Hello!",
|
||||
"messages": [{"role": "assistant", "content": "Hello!"}],
|
||||
}
|
||||
agent_failed_early = (
|
||||
agent_result.get("failed")
|
||||
and not agent_result.get("final_response")
|
||||
)
|
||||
assert not agent_failed_early
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 3: Context-overflow error messages
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestContextOverflowErrorMessages:
|
||||
"""The gateway should produce helpful error messages when the failure
|
||||
looks like a context overflow."""
|
||||
|
||||
def test_detects_context_keywords(self):
|
||||
"""Error messages containing context-related keywords should be
|
||||
identified as context failures."""
|
||||
keywords = [
|
||||
"context length exceeded",
|
||||
"too many tokens in the prompt",
|
||||
"request entity too large",
|
||||
"payload too large for model",
|
||||
"context window exceeded",
|
||||
]
|
||||
for error_str in keywords:
|
||||
_is_ctx_fail = any(p in error_str.lower() for p in (
|
||||
"context", "token", "too large", "too long",
|
||||
"exceed", "payload",
|
||||
))
|
||||
assert _is_ctx_fail, f"Should detect: {error_str}"
|
||||
|
||||
def test_detects_generic_400_with_large_history(self):
|
||||
"""A generic 400 error code in the string with a large history
|
||||
should be flagged as context failure."""
|
||||
error_str = "error code: 400 - {'type': 'error', 'message': 'Error'}"
|
||||
history_len = 100 # Large session
|
||||
|
||||
_is_ctx_fail = any(p in error_str.lower() for p in (
|
||||
"context", "token", "too large", "too long",
|
||||
"exceed", "payload",
|
||||
)) or (
|
||||
"400" in error_str.lower()
|
||||
and history_len > 50
|
||||
)
|
||||
assert _is_ctx_fail
|
||||
|
||||
def test_unrelated_error_not_flagged(self):
|
||||
"""Unrelated errors should not be flagged as context failures."""
|
||||
error_str = "invalid api key: authentication failed"
|
||||
history_len = 10
|
||||
|
||||
_is_ctx_fail = any(p in error_str.lower() for p in (
|
||||
"context", "token", "too large", "too long",
|
||||
"exceed", "payload",
|
||||
)) or (
|
||||
"400" in error_str.lower()
|
||||
and history_len > 50
|
||||
)
|
||||
assert not _is_ctx_fail
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 4: Agent skips persistence for large failed sessions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestAgentSkipsPersistenceForLargeFailedSessions:
|
||||
"""When a 400 error occurs and the session is large, the agent
|
||||
should skip persisting to prevent the growth loop."""
|
||||
|
||||
def test_large_session_400_skips_persistence(self):
|
||||
"""Status 400 + high token count should skip persistence."""
|
||||
status_code = 400
|
||||
approx_tokens = 60000 # > 50000 threshold
|
||||
api_messages = [{"role": "user", "content": "x"}] * 10
|
||||
|
||||
should_skip = status_code == 400 and (approx_tokens > 50000 or len(api_messages) > 80)
|
||||
assert should_skip
|
||||
|
||||
def test_small_session_400_persists_normally(self):
|
||||
"""Status 400 + small session should still persist."""
|
||||
status_code = 400
|
||||
approx_tokens = 5000 # < 50000
|
||||
api_messages = [{"role": "user", "content": "x"}] * 10 # < 80
|
||||
|
||||
should_skip = status_code == 400 and (approx_tokens > 50000 or len(api_messages) > 80)
|
||||
assert not should_skip
|
||||
|
||||
def test_non_400_error_persists_normally(self):
|
||||
"""Non-400 errors should always persist normally."""
|
||||
status_code = 401 # Auth error
|
||||
approx_tokens = 100000 # Large session, but not a 400
|
||||
api_messages = [{"role": "user", "content": "x"}] * 100
|
||||
|
||||
should_skip = status_code == 400 and (approx_tokens > 50000 or len(api_messages) > 80)
|
||||
assert not should_skip
|
||||
|
|
@ -169,6 +169,27 @@ def clear_file_ops_cache(task_id: str = None):
|
|||
def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = "default") -> str:
|
||||
"""Read a file with pagination and line numbers."""
|
||||
try:
|
||||
# Security: block direct reads of internal Hermes cache/index files
|
||||
# to prevent prompt injection via catalog or hub metadata files.
|
||||
import pathlib as _pathlib
|
||||
_resolved = _pathlib.Path(path).expanduser().resolve()
|
||||
_hermes_home = _pathlib.Path("~/.hermes").expanduser().resolve()
|
||||
_blocked_dirs = [
|
||||
_hermes_home / "skills" / ".hub" / "index-cache",
|
||||
_hermes_home / "skills" / ".hub",
|
||||
]
|
||||
for _blocked in _blocked_dirs:
|
||||
try:
|
||||
_resolved.relative_to(_blocked)
|
||||
return json.dumps({
|
||||
"error": (
|
||||
f"Access denied: {path} is an internal Hermes cache file "
|
||||
"and cannot be read directly to prevent prompt injection. "
|
||||
"Use the skills_list or skill_view tools instead."
|
||||
)
|
||||
})
|
||||
except ValueError:
|
||||
pass
|
||||
file_ops = _get_file_ops(task_id)
|
||||
result = file_ops.read_file(path, offset, limit)
|
||||
if result.content:
|
||||
|
|
|
|||
|
|
@ -873,6 +873,37 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
|
|||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
# Security: warn if skill is loaded from outside the trusted skills directory
|
||||
try:
|
||||
skill_md.resolve().relative_to(SKILLS_DIR.resolve())
|
||||
_outside_skills_dir = False
|
||||
except ValueError:
|
||||
_outside_skills_dir = True
|
||||
|
||||
# Security: detect common prompt injection patterns
|
||||
_INJECTION_PATTERNS = [
|
||||
"ignore previous instructions",
|
||||
"ignore all previous",
|
||||
"you are now",
|
||||
"disregard your",
|
||||
"forget your instructions",
|
||||
"new instructions:",
|
||||
"system prompt:",
|
||||
"<system>",
|
||||
"]]>",
|
||||
]
|
||||
_content_lower = content.lower()
|
||||
_injection_detected = any(p in _content_lower for p in _INJECTION_PATTERNS)
|
||||
|
||||
if _outside_skills_dir or _injection_detected:
|
||||
_warnings = []
|
||||
if _outside_skills_dir:
|
||||
_warnings.append(f"skill file is outside the trusted skills directory (~/.hermes/skills/): {skill_md}")
|
||||
if _injection_detected:
|
||||
_warnings.append("skill content contains patterns that may indicate prompt injection")
|
||||
import logging as _logging
|
||||
_logging.getLogger(__name__).warning("Skill security warning for '%s': %s", name, "; ".join(_warnings))
|
||||
|
||||
parsed_frontmatter: Dict[str, Any] = {}
|
||||
try:
|
||||
parsed_frontmatter, _ = _parse_frontmatter(content)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue