mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
fix(code-exec): restore approval context in execute_code RPC threads + guard entry
Wrap both execute_code RPC threads (local UDS + remote file-RPC) with propagate_context_to_thread so gateway sessions no longer fall into check_dangerous_command's non-interactive auto-approve branch and the CLI approval prompt stays reachable. Add check_execute_code_guard: one-shot fail-closed approval of the whole script in gateway/ask/cron-deny before the child spawns (skips isolated backends; command-string built only past the early returns). Drop the broad HERMES_ env passthrough for an explicit operational allowlist plus DSN/WEBHOOK secret substrings, and update the POSIX-equivalence oracle. Refs #4146, #27303, #30882, #33057
This commit is contained in:
parent
21aeefe5fd
commit
1083977261
3 changed files with 354 additions and 114 deletions
|
|
@ -253,20 +253,24 @@ class TestWindowsSocketSmokeTest:
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
def _legacy_posix_scrubber(source_env, is_passthrough):
|
def _legacy_posix_scrubber(source_env, is_passthrough):
|
||||||
"""Verbatim copy of the pre-Windows-fix inline scrubbing logic.
|
"""Independent oracle for TestPosixEquivalence — a from-scratch reimpl of
|
||||||
|
_scrub_child_env's POSIX behavior, used to prove the production helper does
|
||||||
|
what we think it does.
|
||||||
|
|
||||||
This is the oracle used by TestPosixEquivalence to prove the refactor
|
Deliberately updated for #27303 (the broad ``HERMES_`` prefix was dropped
|
||||||
did not change POSIX behavior. DO NOT edit this to "match" a future
|
in favor of an explicit operational allowlist, and DSN/WEBHOOK were added
|
||||||
production change — if _scrub_child_env's POSIX behavior legitimately
|
to the secret substrings). The original docstring said: if POSIX behavior
|
||||||
needs to evolve, delete this function and adjust the equivalence test
|
legitimately needs to evolve, adjust this oracle on purpose so the churn is
|
||||||
on purpose, so the churn is visible in review.
|
visible in review — that is what this change is.
|
||||||
"""
|
"""
|
||||||
_SAFE_ENV_PREFIXES = ("PATH", "HOME", "USER", "LANG", "LC_", "TERM",
|
_SAFE_ENV_PREFIXES = ("PATH", "HOME", "USER", "LANG", "LC_", "TERM",
|
||||||
"TMPDIR", "TMP", "TEMP", "SHELL", "LOGNAME",
|
"TMPDIR", "TMP", "TEMP", "SHELL", "LOGNAME",
|
||||||
"XDG_", "PYTHONPATH", "VIRTUAL_ENV", "CONDA",
|
"XDG_", "PYTHONPATH", "VIRTUAL_ENV", "CONDA")
|
||||||
"HERMES_")
|
|
||||||
_SECRET_SUBSTRINGS = ("KEY", "TOKEN", "SECRET", "PASSWORD", "CREDENTIAL",
|
_SECRET_SUBSTRINGS = ("KEY", "TOKEN", "SECRET", "PASSWORD", "CREDENTIAL",
|
||||||
"PASSWD", "AUTH")
|
"PASSWD", "AUTH", "DSN", "WEBHOOK")
|
||||||
|
_HERMES_CHILD_ALLOWED = frozenset({
|
||||||
|
"HERMES_HOME", "HERMES_PROFILE", "HERMES_CONFIG", "HERMES_ENV",
|
||||||
|
})
|
||||||
out = {}
|
out = {}
|
||||||
for k, v in source_env.items():
|
for k, v in source_env.items():
|
||||||
if is_passthrough(k):
|
if is_passthrough(k):
|
||||||
|
|
@ -276,6 +280,9 @@ def _legacy_posix_scrubber(source_env, is_passthrough):
|
||||||
continue
|
continue
|
||||||
if any(k.startswith(p) for p in _SAFE_ENV_PREFIXES):
|
if any(k.startswith(p) for p in _SAFE_ENV_PREFIXES):
|
||||||
out[k] = v
|
out[k] = v
|
||||||
|
continue
|
||||||
|
if k in _HERMES_CHILD_ALLOWED:
|
||||||
|
out[k] = v
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -308,13 +315,20 @@ class TestPosixEquivalence:
|
||||||
"PYTHONPATH": "/opt/lib",
|
"PYTHONPATH": "/opt/lib",
|
||||||
"VIRTUAL_ENV": "/home/alice/.venv",
|
"VIRTUAL_ENV": "/home/alice/.venv",
|
||||||
"CONDA_PREFIX": "/opt/conda",
|
"CONDA_PREFIX": "/opt/conda",
|
||||||
"HERMES_HOME": "/home/alice/.hermes",
|
# HERMES_* handling (#27303): only the operational allowlist passes;
|
||||||
"HERMES_INTERACTIVE": "1",
|
# every other HERMES_* is dropped (the broad prefix was removed).
|
||||||
|
"HERMES_HOME": "/home/alice/.hermes", # allowlisted → kept
|
||||||
|
"HERMES_PROFILE": "default", # allowlisted → kept
|
||||||
|
"HERMES_INTERACTIVE": "1", # not allowlisted → dropped
|
||||||
|
"HERMES_BASE_URL": "https://api.internal", # not allowlisted → dropped
|
||||||
|
"HERMES_KANBAN_DB": "postgres://u:p@h/db", # not allowlisted → dropped
|
||||||
# Secret-substring blocks
|
# Secret-substring blocks
|
||||||
"OPENAI_API_KEY": "sk-xxx",
|
"OPENAI_API_KEY": "sk-xxx",
|
||||||
"GITHUB_TOKEN": "ghp_xxx",
|
"GITHUB_TOKEN": "ghp_xxx",
|
||||||
"AWS_SECRET_ACCESS_KEY": "yyy",
|
"AWS_SECRET_ACCESS_KEY": "yyy",
|
||||||
"MY_PASSWORD": "hunter2",
|
"MY_PASSWORD": "hunter2",
|
||||||
|
"SENTRY_DSN": "https://abc@sentry.io/1", # DSN substring → blocked
|
||||||
|
"SLACK_WEBHOOK": "https://hooks.slack/x", # WEBHOOK substring → blocked
|
||||||
# Uncategorized — must be dropped
|
# Uncategorized — must be dropped
|
||||||
"RANDOM_UNKNOWN": "drop-me",
|
"RANDOM_UNKNOWN": "drop-me",
|
||||||
"DISPLAY": ":0",
|
"DISPLAY": ":0",
|
||||||
|
|
|
||||||
|
|
@ -1057,6 +1057,107 @@ def _format_tirith_description(tirith_result: dict) -> str:
|
||||||
return "Security scan — " + "; ".join(parts)
|
return "Security scan — " + "; ".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
def _await_gateway_decision(session_key: str, notify_cb, approval_data: dict,
|
||||||
|
*, surface: str = "gateway") -> dict:
|
||||||
|
"""Enqueue *approval_data*, notify the user, and block the calling agent
|
||||||
|
thread until the request is resolved or the gateway approval timeout
|
||||||
|
elapses — firing pre/post approval hooks and cleaning up the queue entry.
|
||||||
|
|
||||||
|
Shared by the terminal command guard (``check_all_command_guards``) and
|
||||||
|
the execute_code guard (``check_execute_code_guard``) so the fiddly
|
||||||
|
heartbeat-polling wait loop lives in one place.
|
||||||
|
|
||||||
|
Returns ``{"resolved": bool, "choice": str|None}`` on completion, or
|
||||||
|
``{"resolved": False, "choice": None, "notify_failed": True}`` if the
|
||||||
|
notify callback raised. Persistence of an approved choice and building
|
||||||
|
the final tool-facing result dict remain the caller's responsibility.
|
||||||
|
"""
|
||||||
|
command = approval_data.get("command", "")
|
||||||
|
description = approval_data.get("description", "")
|
||||||
|
primary_key = approval_data.get("pattern_key", "")
|
||||||
|
all_keys = approval_data.get("pattern_keys", [primary_key])
|
||||||
|
|
||||||
|
entry = _ApprovalEntry(approval_data)
|
||||||
|
with _lock:
|
||||||
|
_gateway_queues.setdefault(session_key, []).append(entry)
|
||||||
|
|
||||||
|
def _drop_entry() -> None:
|
||||||
|
with _lock:
|
||||||
|
queue = _gateway_queues.get(session_key, [])
|
||||||
|
if entry in queue:
|
||||||
|
queue.remove(entry)
|
||||||
|
if not queue:
|
||||||
|
_gateway_queues.pop(session_key, None)
|
||||||
|
|
||||||
|
# Notify plugins that an approval is being requested. Fires before the
|
||||||
|
# gateway notify callback so observers get the event in real time.
|
||||||
|
_fire_approval_hook(
|
||||||
|
"pre_approval_request",
|
||||||
|
command=command,
|
||||||
|
description=description,
|
||||||
|
pattern_key=primary_key,
|
||||||
|
pattern_keys=list(all_keys),
|
||||||
|
session_key=session_key,
|
||||||
|
surface=surface,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Notify the user (bridges sync agent thread → async gateway)
|
||||||
|
try:
|
||||||
|
notify_cb(approval_data)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("Gateway approval notify failed: %s", exc)
|
||||||
|
_drop_entry()
|
||||||
|
return {"resolved": False, "choice": None, "notify_failed": True}
|
||||||
|
|
||||||
|
# Block until the user responds or timeout (default 5 min). Poll in short
|
||||||
|
# slices so we can fire activity heartbeats every ~10s to the agent's
|
||||||
|
# inactivity tracker — otherwise the gateway watchdog kills the agent
|
||||||
|
# while the user is still responding. Mirrors _wait_for_process() cadence.
|
||||||
|
timeout = _get_approval_config().get("gateway_timeout", 300)
|
||||||
|
try:
|
||||||
|
timeout = int(timeout)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
timeout = 300
|
||||||
|
|
||||||
|
try:
|
||||||
|
from tools.environments.base import touch_activity_if_due
|
||||||
|
except Exception: # pragma: no cover
|
||||||
|
touch_activity_if_due = None
|
||||||
|
|
||||||
|
_now = time.monotonic()
|
||||||
|
_deadline = _now + max(timeout, 0)
|
||||||
|
_activity_state = {"last_touch": _now, "start": _now}
|
||||||
|
resolved = False
|
||||||
|
while True:
|
||||||
|
_remaining = _deadline - time.monotonic()
|
||||||
|
if _remaining <= 0:
|
||||||
|
break
|
||||||
|
if entry.event.wait(timeout=min(1.0, _remaining)):
|
||||||
|
resolved = True
|
||||||
|
break
|
||||||
|
if touch_activity_if_due is not None:
|
||||||
|
touch_activity_if_due(_activity_state, "waiting for user approval")
|
||||||
|
|
||||||
|
_drop_entry()
|
||||||
|
|
||||||
|
choice = entry.result
|
||||||
|
# Normalize outcome for the post hook. Unresolved (timeout) and None both
|
||||||
|
# mean the user never responded; report that explicitly so plugins can
|
||||||
|
# distinguish timeout from explicit deny.
|
||||||
|
_outcome = "timeout" if not resolved else (choice if choice else "timeout")
|
||||||
|
_fire_approval_hook(
|
||||||
|
"post_approval_response",
|
||||||
|
command=command,
|
||||||
|
description=description,
|
||||||
|
pattern_key=primary_key,
|
||||||
|
pattern_keys=list(all_keys),
|
||||||
|
session_key=session_key,
|
||||||
|
surface=surface,
|
||||||
|
choice=_outcome,
|
||||||
|
)
|
||||||
|
return {"resolved": resolved, "choice": choice}
|
||||||
|
|
||||||
|
|
||||||
def check_all_command_guards(command: str, env_type: str,
|
def check_all_command_guards(command: str, env_type: str,
|
||||||
approval_callback=None) -> dict:
|
approval_callback=None) -> dict:
|
||||||
"""Run all pre-exec security checks and return a single approval decision.
|
"""Run all pre-exec security checks and return a single approval decision.
|
||||||
|
|
@ -1207,113 +1308,27 @@ def check_all_command_guards(command: str, env_type: str,
|
||||||
|
|
||||||
if notify_cb is not None:
|
if notify_cb is not None:
|
||||||
# --- Blocking gateway approval (queue-based) ---
|
# --- Blocking gateway approval (queue-based) ---
|
||||||
# Each call gets its own _ApprovalEntry so parallel subagents
|
# Block the agent thread until the user responds; the notify +
|
||||||
# and execute_code threads can block concurrently.
|
# heartbeat wait loop is shared with check_execute_code_guard via
|
||||||
|
# _await_gateway_decision().
|
||||||
approval_data = {
|
approval_data = {
|
||||||
"command": command,
|
"command": command,
|
||||||
"pattern_key": primary_key,
|
"pattern_key": primary_key,
|
||||||
"pattern_keys": all_keys,
|
"pattern_keys": all_keys,
|
||||||
"description": combined_desc,
|
"description": combined_desc,
|
||||||
}
|
}
|
||||||
entry = _ApprovalEntry(approval_data)
|
decision = _await_gateway_decision(
|
||||||
with _lock:
|
session_key, notify_cb, approval_data, surface="gateway"
|
||||||
_gateway_queues.setdefault(session_key, []).append(entry)
|
|
||||||
|
|
||||||
# Notify plugins that an approval is being requested. Fires before
|
|
||||||
# the gateway notify callback so observers (e.g. macOS notifier
|
|
||||||
# plugins, audit logs, Slack alerts) get the event in real time.
|
|
||||||
_fire_approval_hook(
|
|
||||||
"pre_approval_request",
|
|
||||||
command=command,
|
|
||||||
description=combined_desc,
|
|
||||||
pattern_key=primary_key,
|
|
||||||
pattern_keys=list(all_keys),
|
|
||||||
session_key=session_key,
|
|
||||||
surface="gateway",
|
|
||||||
)
|
)
|
||||||
|
if decision.get("notify_failed"):
|
||||||
# Notify the user (bridges sync agent thread → async gateway)
|
|
||||||
try:
|
|
||||||
notify_cb(approval_data)
|
|
||||||
except Exception as exc:
|
|
||||||
logger.warning("Gateway approval notify failed: %s", exc)
|
|
||||||
with _lock:
|
|
||||||
queue = _gateway_queues.get(session_key, [])
|
|
||||||
if entry in queue:
|
|
||||||
queue.remove(entry)
|
|
||||||
if not queue:
|
|
||||||
_gateway_queues.pop(session_key, None)
|
|
||||||
return {
|
return {
|
||||||
"approved": False,
|
"approved": False,
|
||||||
"message": "BLOCKED: Failed to send approval request to user. Do NOT retry.",
|
"message": "BLOCKED: Failed to send approval request to user. Do NOT retry.",
|
||||||
"pattern_key": primary_key,
|
"pattern_key": primary_key,
|
||||||
"description": combined_desc,
|
"description": combined_desc,
|
||||||
}
|
}
|
||||||
|
resolved = decision["resolved"]
|
||||||
# Block until the user responds or timeout (default 5 min).
|
choice = decision["choice"]
|
||||||
# Poll in short slices so we can fire activity heartbeats every
|
|
||||||
# ~10s to the agent's inactivity tracker. Without this, the
|
|
||||||
# blocking event.wait() never touches activity, and the
|
|
||||||
# gateway's inactivity watchdog (agent.gateway_timeout, default
|
|
||||||
# 1800s) kills the agent while the user is still responding to
|
|
||||||
# the approval prompt. Mirrors the _wait_for_process() cadence
|
|
||||||
# in tools/environments/base.py.
|
|
||||||
timeout = _get_approval_config().get("gateway_timeout", 300)
|
|
||||||
try:
|
|
||||||
timeout = int(timeout)
|
|
||||||
except (ValueError, TypeError):
|
|
||||||
timeout = 300
|
|
||||||
|
|
||||||
try:
|
|
||||||
from tools.environments.base import touch_activity_if_due
|
|
||||||
except Exception: # pragma: no cover
|
|
||||||
touch_activity_if_due = None
|
|
||||||
|
|
||||||
_now = time.monotonic()
|
|
||||||
_deadline = _now + max(timeout, 0)
|
|
||||||
_activity_state = {"last_touch": _now, "start": _now}
|
|
||||||
resolved = False
|
|
||||||
while True:
|
|
||||||
_remaining = _deadline - time.monotonic()
|
|
||||||
if _remaining <= 0:
|
|
||||||
break
|
|
||||||
# 1s poll slice — the event is set immediately when the
|
|
||||||
# user responds, so slice length only controls heartbeat
|
|
||||||
# cadence, not user-visible responsiveness.
|
|
||||||
if entry.event.wait(timeout=min(1.0, _remaining)):
|
|
||||||
resolved = True
|
|
||||||
break
|
|
||||||
if touch_activity_if_due is not None:
|
|
||||||
touch_activity_if_due(
|
|
||||||
_activity_state, "waiting for user approval"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Clean up this entry from the queue
|
|
||||||
with _lock:
|
|
||||||
queue = _gateway_queues.get(session_key, [])
|
|
||||||
if entry in queue:
|
|
||||||
queue.remove(entry)
|
|
||||||
if not queue:
|
|
||||||
_gateway_queues.pop(session_key, None)
|
|
||||||
|
|
||||||
choice = entry.result
|
|
||||||
# Normalize outcome for the post hook. Unresolved (timeout) and
|
|
||||||
# None both mean the user never responded; report that explicitly
|
|
||||||
# so plugins can distinguish timeout from explicit deny.
|
|
||||||
_outcome = (
|
|
||||||
"timeout" if not resolved
|
|
||||||
else (choice if choice else "timeout")
|
|
||||||
)
|
|
||||||
_fire_approval_hook(
|
|
||||||
"post_approval_response",
|
|
||||||
command=command,
|
|
||||||
description=combined_desc,
|
|
||||||
pattern_key=primary_key,
|
|
||||||
pattern_keys=list(all_keys),
|
|
||||||
session_key=session_key,
|
|
||||||
surface="gateway",
|
|
||||||
choice=_outcome,
|
|
||||||
)
|
|
||||||
|
|
||||||
if not resolved or choice is None or choice == "deny":
|
if not resolved or choice is None or choice == "deny":
|
||||||
# Consent contract: silence is NOT consent, and an explicit
|
# Consent contract: silence is NOT consent, and an explicit
|
||||||
|
|
@ -1437,5 +1452,173 @@ def check_all_command_guards(command: str, env_type: str,
|
||||||
"user_approved": True, "description": combined_desc}
|
"user_approved": True, "description": combined_desc}
|
||||||
|
|
||||||
|
|
||||||
|
def check_execute_code_guard(code: str, env_type: str) -> dict:
|
||||||
|
"""Approve an execute_code script before its child process is spawned.
|
||||||
|
|
||||||
|
execute_code runs arbitrary local Python — the script can call
|
||||||
|
``subprocess``, ``os.system``, ``ctypes``, or other process/file APIs
|
||||||
|
directly, none of which pass through ``terminal()`` /
|
||||||
|
``DANGEROUS_PATTERNS``. In gateway/ask contexts we fail closed by approving
|
||||||
|
the script as a whole before it runs (#30882). Returns the same dict
|
||||||
|
contract as ``check_all_command_guards``.
|
||||||
|
|
||||||
|
Scope (documented limitation, #30882): in a purely local non-interactive
|
||||||
|
non-gateway session (no TTY, not gateway, not cron-deny) this returns
|
||||||
|
approved — matching the existing terminal auto-approve contract. The
|
||||||
|
hardline floor still blocks catastrophic ``terminal()`` commands the script
|
||||||
|
issues; running arbitrary code headlessly without any approval surface is
|
||||||
|
trusted-by-config (set a gateway/ask surface or ``approvals.cron_mode`` to
|
||||||
|
require approval).
|
||||||
|
"""
|
||||||
|
pattern_key = "execute_code"
|
||||||
|
description = (
|
||||||
|
"execute_code script execution. The script can spawn subprocesses or "
|
||||||
|
"mutate files without passing through terminal command approval; "
|
||||||
|
"approval is one-shot for this run."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Isolated backends already sandbox the child — matches the container skip
|
||||||
|
# in check_all_command_guards / check_dangerous_command.
|
||||||
|
if env_type in {"docker", "singularity", "modal", "daytona", "vercel_sandbox"}:
|
||||||
|
return {"approved": True, "message": None}
|
||||||
|
|
||||||
|
# --yolo or approvals.mode=off: bypass (session- or process-scoped).
|
||||||
|
approval_mode = _get_approval_mode()
|
||||||
|
if _YOLO_MODE_FROZEN or is_current_session_yolo_enabled() or approval_mode == "off":
|
||||||
|
return {"approved": True, "message": None}
|
||||||
|
|
||||||
|
is_gateway = _is_gateway_approval_context()
|
||||||
|
is_ask = env_var_enabled("HERMES_EXEC_ASK")
|
||||||
|
|
||||||
|
# Cron: no user is present to approve arbitrary code.
|
||||||
|
if env_var_enabled("HERMES_CRON_SESSION"):
|
||||||
|
if _get_cron_approval_mode() == "deny":
|
||||||
|
return {
|
||||||
|
"approved": False,
|
||||||
|
"message": (
|
||||||
|
"BLOCKED: execute_code runs arbitrary local Python "
|
||||||
|
"(including subprocess calls that bypass shell-string "
|
||||||
|
"approval checks). Cron jobs run without a user present "
|
||||||
|
"to approve it. Use normal tools instead, or set "
|
||||||
|
"approvals.cron_mode: approve only if this cron profile "
|
||||||
|
"is intentionally trusted."
|
||||||
|
),
|
||||||
|
"pattern_key": pattern_key,
|
||||||
|
"description": description,
|
||||||
|
"outcome": "blocked",
|
||||||
|
"user_consent": False,
|
||||||
|
}
|
||||||
|
return {"approved": True, "message": None}
|
||||||
|
|
||||||
|
# Only gateway/ask contexts get the one-shot whole-script approval.
|
||||||
|
# * CLI interactive: the script's terminal() calls are guarded per-call
|
||||||
|
# (context now propagates into the RPC thread, #33057); a whole-script
|
||||||
|
# prompt would fire on every execute_code call.
|
||||||
|
# * Local non-interactive non-gateway: documented limitation above.
|
||||||
|
if not is_gateway and not is_ask:
|
||||||
|
return {"approved": True, "message": None}
|
||||||
|
|
||||||
|
session_key = get_current_session_key()
|
||||||
|
# Built only now (past the early-return gates) so the common non-approval
|
||||||
|
# paths don't pay to copy a potentially-large script into this string.
|
||||||
|
command = f"execute_code <<'PY'\n{code}\nPY"
|
||||||
|
|
||||||
|
# Smart mode: ask the aux LLM about the whole script. An APPROVE here only
|
||||||
|
# suppresses the redundant whole-script prompt; the per-call terminal()
|
||||||
|
# guards (restored by context propagation) still run independently.
|
||||||
|
if approval_mode == "smart":
|
||||||
|
verdict = _smart_approve(command, description)
|
||||||
|
if verdict == "approve":
|
||||||
|
logger.debug("Smart approval: auto-approved execute_code for session %s",
|
||||||
|
session_key)
|
||||||
|
return {"approved": True, "message": None,
|
||||||
|
"smart_approved": True, "description": description}
|
||||||
|
if verdict == "deny":
|
||||||
|
return {
|
||||||
|
"approved": False,
|
||||||
|
"message": ("BLOCKED by smart approval: execute_code script "
|
||||||
|
"execution was assessed as genuinely dangerous. "
|
||||||
|
"Do NOT retry."),
|
||||||
|
"smart_denied": True,
|
||||||
|
"pattern_key": pattern_key,
|
||||||
|
"description": description,
|
||||||
|
"outcome": "denied",
|
||||||
|
"user_consent": False,
|
||||||
|
}
|
||||||
|
# verdict == "escalate" → fall through to manual approval
|
||||||
|
|
||||||
|
notify_cb = None
|
||||||
|
with _lock:
|
||||||
|
notify_cb = _gateway_notify_cbs.get(session_key)
|
||||||
|
|
||||||
|
if notify_cb is None:
|
||||||
|
# No gateway callback registered (e.g. ask-mode without a notifier):
|
||||||
|
# surface a pending approval for backward compatibility.
|
||||||
|
submit_pending(session_key, {
|
||||||
|
"command": command,
|
||||||
|
"pattern_key": pattern_key,
|
||||||
|
"pattern_keys": [pattern_key],
|
||||||
|
"description": description,
|
||||||
|
})
|
||||||
|
return {
|
||||||
|
"approved": False,
|
||||||
|
"pattern_key": pattern_key,
|
||||||
|
"status": "pending_approval",
|
||||||
|
"approval_pending": True,
|
||||||
|
"command": command,
|
||||||
|
"description": description,
|
||||||
|
"message": (
|
||||||
|
f"⚠️ {description}. Asking the user for approval.\n\n"
|
||||||
|
f"**Code:**\n```python\n{code}\n```"
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
approval_data = {
|
||||||
|
"command": command,
|
||||||
|
"pattern_key": pattern_key,
|
||||||
|
"pattern_keys": [pattern_key],
|
||||||
|
"description": description,
|
||||||
|
}
|
||||||
|
decision = _await_gateway_decision(
|
||||||
|
session_key, notify_cb, approval_data, surface="gateway"
|
||||||
|
)
|
||||||
|
if decision.get("notify_failed"):
|
||||||
|
return {
|
||||||
|
"approved": False,
|
||||||
|
"message": ("BLOCKED: Failed to send execute_code approval request "
|
||||||
|
"to user. Do NOT retry."),
|
||||||
|
"pattern_key": pattern_key,
|
||||||
|
"description": description,
|
||||||
|
"outcome": "notify_failed",
|
||||||
|
"user_consent": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
resolved = decision["resolved"]
|
||||||
|
choice = decision["choice"]
|
||||||
|
|
||||||
|
if not resolved or choice is None or choice == "deny":
|
||||||
|
reason = "timed out without user response" if not resolved else "denied by user"
|
||||||
|
addendum = " Silence is not consent." if not resolved else ""
|
||||||
|
return {
|
||||||
|
"approved": False,
|
||||||
|
"message": (
|
||||||
|
f"BLOCKED: execute_code script {reason}. The user has NOT "
|
||||||
|
f"consented to running this code. Do NOT retry, do NOT rephrase "
|
||||||
|
f"the script, and do NOT attempt the same outcome via a "
|
||||||
|
f"different tool.{addendum}"
|
||||||
|
),
|
||||||
|
"pattern_key": pattern_key,
|
||||||
|
"description": description,
|
||||||
|
"outcome": "timeout" if not resolved else "denied",
|
||||||
|
"user_consent": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Approved — one-shot only. Deliberately NO approve_session/approve_permanent:
|
||||||
|
# each execute_code script is distinct arbitrary code, so approval never
|
||||||
|
# persists to future scripts.
|
||||||
|
return {"approved": True, "message": None,
|
||||||
|
"user_approved": True, "description": description}
|
||||||
|
|
||||||
|
|
||||||
# Load permanent allowlist from config on module import
|
# Load permanent allowlist from config on module import
|
||||||
load_permanent_allowlist()
|
load_permanent_allowlist()
|
||||||
|
|
|
||||||
|
|
@ -46,6 +46,8 @@ import uuid
|
||||||
_IS_WINDOWS = platform.system() == "Windows"
|
_IS_WINDOWS = platform.system() == "Windows"
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
from tools.thread_context import propagate_context_to_thread
|
||||||
|
|
||||||
# Availability gate. On Windows we fall back to loopback TCP for the
|
# Availability gate. On Windows we fall back to loopback TCP for the
|
||||||
# sandbox RPC transport (AF_UNIX is unreliable on Windows Python) — see
|
# sandbox RPC transport (AF_UNIX is unreliable on Windows Python) — see
|
||||||
# ``_use_tcp_rpc`` in ``_execute_local`` below. That makes execute_code
|
# ``_use_tcp_rpc`` in ``_execute_local`` below. That makes execute_code
|
||||||
|
|
@ -74,13 +76,30 @@ MAX_STDERR_BYTES = 10_000 # 10 KB
|
||||||
|
|
||||||
# Environment variable scrubbing rules (shared between the local + remote
|
# Environment variable scrubbing rules (shared between the local + remote
|
||||||
# backends). Secret-substring block is applied first; anything left must
|
# backends). Secret-substring block is applied first; anything left must
|
||||||
# match either a safe prefix or, on Windows, an OS-essential name.
|
# match a safe prefix, the operational HERMES_ allowlist, or (on Windows) an
|
||||||
|
# OS-essential name.
|
||||||
|
#
|
||||||
|
# NB: the broad "HERMES_" prefix was deliberately removed (#27303) — it leaked
|
||||||
|
# HERMES_*-named config that lacks a secret substring (e.g. HERMES_BASE_URL,
|
||||||
|
# HERMES_KANBAN_DB, HERMES_*_WEBHOOK). The child only needs the few
|
||||||
|
# location/profile vars in _HERMES_CHILD_ALLOWED below; HERMES_RPC_SOCKET /
|
||||||
|
# HERMES_RPC_DIR / TZ / HOME are injected explicitly after scrubbing.
|
||||||
_SAFE_ENV_PREFIXES = ("PATH", "HOME", "USER", "LANG", "LC_", "TERM",
|
_SAFE_ENV_PREFIXES = ("PATH", "HOME", "USER", "LANG", "LC_", "TERM",
|
||||||
"TMPDIR", "TMP", "TEMP", "SHELL", "LOGNAME",
|
"TMPDIR", "TMP", "TEMP", "SHELL", "LOGNAME",
|
||||||
"XDG_", "PYTHONPATH", "VIRTUAL_ENV", "CONDA",
|
"XDG_", "PYTHONPATH", "VIRTUAL_ENV", "CONDA")
|
||||||
"HERMES_")
|
|
||||||
_SECRET_SUBSTRINGS = ("KEY", "TOKEN", "SECRET", "PASSWORD", "CREDENTIAL",
|
_SECRET_SUBSTRINGS = ("KEY", "TOKEN", "SECRET", "PASSWORD", "CREDENTIAL",
|
||||||
"PASSWD", "AUTH")
|
"PASSWD", "AUTH", "DSN", "WEBHOOK")
|
||||||
|
|
||||||
|
# Operational HERMES_* vars the child legitimately needs by exact name — these
|
||||||
|
# are non-secret runtime-location flags (the same set hermes_cli treats as the
|
||||||
|
# runtime location) that repo-root modules a sandbox script imports may read at
|
||||||
|
# import time. None match _SECRET_SUBSTRINGS.
|
||||||
|
_HERMES_CHILD_ALLOWED = frozenset({
|
||||||
|
"HERMES_HOME",
|
||||||
|
"HERMES_PROFILE",
|
||||||
|
"HERMES_CONFIG",
|
||||||
|
"HERMES_ENV",
|
||||||
|
})
|
||||||
|
|
||||||
# Windows-only: a handful of variables are required by the OS/CRT itself.
|
# Windows-only: a handful of variables are required by the OS/CRT itself.
|
||||||
# Without them, even stdlib calls like ``socket.socket()`` fail with
|
# Without them, even stdlib calls like ``socket.socket()`` fail with
|
||||||
|
|
@ -119,9 +138,10 @@ def _scrub_child_env(source_env, is_passthrough=None, is_windows=None):
|
||||||
|
|
||||||
Rules (order matters):
|
Rules (order matters):
|
||||||
1. Passthrough vars (skill- or config-declared) always pass.
|
1. Passthrough vars (skill- or config-declared) always pass.
|
||||||
2. Secret-substring names (KEY/TOKEN/etc.) are blocked.
|
2. Secret-substring names (KEY/TOKEN/DSN/WEBHOOK/etc.) are blocked.
|
||||||
3. Names matching a safe prefix pass.
|
3. Names matching a safe prefix pass.
|
||||||
4. On Windows, a small OS-essential allowlist passes by exact name
|
4. Operational HERMES_* vars (_HERMES_CHILD_ALLOWED) pass by exact name.
|
||||||
|
5. On Windows, a small OS-essential allowlist passes by exact name
|
||||||
— without these the child can't even create a socket or spawn a
|
— without these the child can't even create a socket or spawn a
|
||||||
subprocess.
|
subprocess.
|
||||||
|
|
||||||
|
|
@ -147,6 +167,9 @@ def _scrub_child_env(source_env, is_passthrough=None, is_windows=None):
|
||||||
if any(k.startswith(p) for p in _SAFE_ENV_PREFIXES):
|
if any(k.startswith(p) for p in _SAFE_ENV_PREFIXES):
|
||||||
scrubbed[k] = v
|
scrubbed[k] = v
|
||||||
continue
|
continue
|
||||||
|
if k in _HERMES_CHILD_ALLOWED:
|
||||||
|
scrubbed[k] = v
|
||||||
|
continue
|
||||||
if is_windows and k.upper() in _WINDOWS_ESSENTIAL_ENV_VARS:
|
if is_windows and k.upper() in _WINDOWS_ESSENTIAL_ENV_VARS:
|
||||||
scrubbed[k] = v
|
scrubbed[k] = v
|
||||||
return scrubbed
|
return scrubbed
|
||||||
|
|
@ -887,9 +910,11 @@ def _execute_remote(
|
||||||
_ship_file_to_remote(env, f"{sandbox_dir}/hermes_tools.py", tools_src)
|
_ship_file_to_remote(env, f"{sandbox_dir}/hermes_tools.py", tools_src)
|
||||||
_ship_file_to_remote(env, f"{sandbox_dir}/script.py", code)
|
_ship_file_to_remote(env, f"{sandbox_dir}/script.py", code)
|
||||||
|
|
||||||
# Start RPC polling thread
|
# Wrapped so the thread inherits the turn's approval context + callbacks
|
||||||
|
# (see tools.thread_context) — else sandbox RPC tool calls lose approval
|
||||||
|
# routing (#33057).
|
||||||
rpc_thread = threading.Thread(
|
rpc_thread = threading.Thread(
|
||||||
target=_rpc_poll_loop,
|
target=propagate_context_to_thread(_rpc_poll_loop),
|
||||||
args=(
|
args=(
|
||||||
env, f"{sandbox_dir}/rpc", effective_task_id,
|
env, f"{sandbox_dir}/rpc", effective_task_id,
|
||||||
tool_call_log, tool_call_counter, max_tool_calls,
|
tool_call_log, tool_call_counter, max_tool_calls,
|
||||||
|
|
@ -1049,6 +1074,21 @@ def execute_code(
|
||||||
# Dispatch: remote backends use file-based RPC, local uses UDS
|
# Dispatch: remote backends use file-based RPC, local uses UDS
|
||||||
from tools.terminal_tool import _get_env_config
|
from tools.terminal_tool import _get_env_config
|
||||||
env_type = _get_env_config()["env_type"]
|
env_type = _get_env_config()["env_type"]
|
||||||
|
|
||||||
|
# execute_code runs arbitrary Python (subprocess/os.system/...) that never
|
||||||
|
# passes through terminal()/DANGEROUS_PATTERNS, so guard the whole script
|
||||||
|
# here before either dispatch path spawns it. Runs synchronously in the
|
||||||
|
# caller (tool-executor) thread, which holds the session context (#30882).
|
||||||
|
from tools.approval import check_execute_code_guard
|
||||||
|
_guard = check_execute_code_guard(code, env_type)
|
||||||
|
if not _guard.get("approved", False):
|
||||||
|
return json.dumps({
|
||||||
|
"status": "error",
|
||||||
|
"error": _guard.get("message") or "execute_code blocked by approval guard.",
|
||||||
|
"tool_calls_made": 0,
|
||||||
|
"duration_seconds": 0,
|
||||||
|
}, ensure_ascii=False)
|
||||||
|
|
||||||
if env_type != "local":
|
if env_type != "local":
|
||||||
return _execute_remote(code, task_id, enabled_tools)
|
return _execute_remote(code, task_id, enabled_tools)
|
||||||
|
|
||||||
|
|
@ -1135,8 +1175,11 @@ def execute_code(
|
||||||
os.chmod(sock_path, 0o600)
|
os.chmod(sock_path, 0o600)
|
||||||
server_sock.listen(1)
|
server_sock.listen(1)
|
||||||
|
|
||||||
|
# Wrapped so the thread inherits the turn's approval context + callbacks
|
||||||
|
# (see tools.thread_context) — else gateway sandbox tool calls silently
|
||||||
|
# auto-approve dangerous commands (#33057, #30882).
|
||||||
rpc_thread = threading.Thread(
|
rpc_thread = threading.Thread(
|
||||||
target=_rpc_server_loop,
|
target=propagate_context_to_thread(_rpc_server_loop),
|
||||||
args=(
|
args=(
|
||||||
server_sock, task_id, tool_call_log,
|
server_sock, task_id, tool_call_log,
|
||||||
tool_call_counter, max_tool_calls, sandbox_tools,
|
tool_call_counter, max_tool_calls, sandbox_tools,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue