fix(approval): harden YOLO bypass, LLM parsing, auto-approve audit, pipe pattern (#23835)

* fix(approval): harden YOLO bypass, LLM parsing, auto-approve audit, pipe pattern

- BUG-009 (CRITICAL): freeze HERMES_YOLO_MODE at module import via
  _YOLO_MODE_FROZEN; prevents skills/prompt-injection from calling
  os.environ["HERMES_YOLO_MODE"]="true" at runtime to bypass all checks
- BUG-002 (HIGH): replace substring "APPROVE" in answer with exact
  answer == "APPROVE" in _smart_approve; prompt already requests exactly
  one word, substring match was exploitable via verbose LLM responses
- BUG-001 (MEDIUM): add logger.warning for every dangerous command that
  auto-approves in non-interactive non-gateway context; makes silent
  approvals visible in audit logs without breaking script behavior
- BUG-008 (LOW): expand curl/wget pipe pattern to cover | /bin/bash and
  | bash -c variants, not just | sh / | bash

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* fix(approval): add missing is_truthy_value import + fix yolo test patches

_YOLO_MODE_FROZEN uses is_truthy_value() from utils — import was missing.
Tests that set HERMES_YOLO_MODE via monkeypatch.setenv() no longer work
because the value is frozen at import time; update them to patch the
module-level flag directly via monkeypatch.setattr().

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Rodrigo 2026-05-25 07:35:33 -03:00 committed by GitHub
parent 3ab7e2aa91
commit 4cb3eb03c7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 20 additions and 10 deletions

View file

@ -55,8 +55,8 @@ class TestYoloMode:
assert not result["approved"]
def test_dangerous_command_approved_in_yolo_mode(self, monkeypatch):
"""With HERMES_YOLO_MODE, dangerous (non-hardline) commands are auto-approved."""
monkeypatch.setenv("HERMES_YOLO_MODE", "1")
"""With HERMES_YOLO_MODE, dangerous commands are auto-approved."""
monkeypatch.setattr(approval_module, "_YOLO_MODE_FROZEN", True)
monkeypatch.setenv("HERMES_INTERACTIVE", "1")
monkeypatch.setenv("HERMES_SESSION_KEY", "test-session")
@ -68,8 +68,8 @@ class TestYoloMode:
assert result["message"] is None
def test_yolo_mode_works_for_all_patterns(self, monkeypatch):
"""Yolo mode bypasses dangerous patterns (except the hardline floor)."""
monkeypatch.setenv("HERMES_YOLO_MODE", "1")
"""Yolo mode bypasses all dangerous patterns, not just some."""
monkeypatch.setattr(approval_module, "_YOLO_MODE_FROZEN", True)
monkeypatch.setenv("HERMES_INTERACTIVE", "1")
# Dangerous but recoverable — yolo should bypass.
@ -90,7 +90,7 @@ class TestYoloMode:
def test_combined_guard_bypasses_yolo_mode(self, monkeypatch):
"""The new combined guard should preserve yolo bypass semantics."""
monkeypatch.setenv("HERMES_YOLO_MODE", "1")
monkeypatch.setattr(approval_module, "_YOLO_MODE_FROZEN", True)
monkeypatch.setenv("HERMES_INTERACTIVE", "1")
called = {"value": False}

View file

@ -23,6 +23,11 @@ from utils import env_var_enabled, is_truthy_value
logger = logging.getLogger(__name__)
# Freeze YOLO mode at module import time. Reading os.environ on every call
# would allow any skill running inside the process to set this variable and
# instantly bypass all approval checks — a prompt-injection escalation path.
_YOLO_MODE_FROZEN: bool = is_truthy_value(os.getenv("HERMES_YOLO_MODE", ""))
# Per-thread/per-task gateway session identity.
# Gateway runs agent turns concurrently in executor threads, so reading a
# process-global env var for session identity is racy. Keep env fallback for
@ -344,7 +349,7 @@ DANGEROUS_PATTERNS = [
# Any shell invocation via -c or combined flags like -lc, -ic, etc.
(r'\b(bash|sh|zsh|ksh)\s+-[^\s]*c(\s+|$)', "shell command via -c/-lc flag"),
(r'\b(python[23]?|perl|ruby|node)\s+-[ec]\s+', "script execution via -e/-c flag"),
(r'\b(curl|wget)\b.*\|\s*(ba)?sh\b', "pipe remote content to shell"),
(r'\b(curl|wget)\b.*\|\s*(?:[/\w]*/)?(?:ba)?sh(?:\s|$|-c)', "pipe remote content to shell"),
(r'\b(bash|sh|zsh|ksh)\s+<\s*<?\s*\(\s*(curl|wget)\b', "execute remote script via process substitution"),
(rf'\btee\b.*["\']?{_SENSITIVE_WRITE_TARGET}', "overwrite system file via tee"),
(rf'>>?\s*["\']?{_SENSITIVE_WRITE_TARGET}', "overwrite system file via redirection"),
@ -898,9 +903,9 @@ Respond with exactly one word: APPROVE, DENY, or ESCALATE"""
answer = (response.choices[0].message.content or "").strip().upper()
if "APPROVE" in answer:
if answer == "APPROVE":
return "approve"
elif "DENY" in answer:
elif answer == "DENY":
return "deny"
else:
return "escalate"
@ -940,7 +945,7 @@ def check_dangerous_command(command: str, env_type: str,
# --yolo: bypass all approval prompts. Gateway /yolo is session-scoped;
# CLI --yolo remains process-scoped via the env var for local use.
if is_truthy_value(os.getenv("HERMES_YOLO_MODE")) or is_current_session_yolo_enabled():
if _YOLO_MODE_FROZEN or is_current_session_yolo_enabled():
return {"approved": True, "message": None}
is_dangerous, pattern_key, description = detect_dangerous_command(command)
@ -968,6 +973,11 @@ def check_dangerous_command(command: str, env_type: str,
"approvals.cron_mode: approve in config.yaml."
),
}
logger.warning(
"AUTO-APPROVED dangerous command in non-interactive non-gateway context "
"(pattern: %s): %s — set HERMES_INTERACTIVE or HERMES_GATEWAY_SESSION to require approval.",
description, command[:200],
)
return {"approved": True, "message": None}
if is_gateway or env_var_enabled("HERMES_EXEC_ASK"):
@ -1076,7 +1086,7 @@ def check_all_command_guards(command: str, env_type: str,
# --yolo or approvals.mode=off: bypass all approval prompts.
# Gateway /yolo is session-scoped; CLI --yolo remains process-scoped.
approval_mode = _get_approval_mode()
if is_truthy_value(os.getenv("HERMES_YOLO_MODE")) or is_current_session_yolo_enabled() or approval_mode == "off":
if _YOLO_MODE_FROZEN or is_current_session_yolo_enabled() or approval_mode == "off":
return {"approved": True, "message": None}
is_cli = env_var_enabled("HERMES_INTERACTIVE")