mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-03 02:11:48 +00:00
fix(agent): add tool-call loop guardrails
This commit is contained in:
parent
8d7500d80d
commit
58b89965c8
5 changed files with 944 additions and 108 deletions
142
tests/agent/test_tool_guardrails.py
Normal file
142
tests/agent/test_tool_guardrails.py
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
"""Pure tool-call guardrail primitive tests."""
|
||||
|
||||
import json
|
||||
|
||||
from agent.tool_guardrails import (
|
||||
ToolCallGuardrailConfig,
|
||||
ToolCallGuardrailController,
|
||||
ToolCallSignature,
|
||||
canonical_tool_args,
|
||||
)
|
||||
|
||||
|
||||
def test_tool_call_signature_hashes_canonical_nested_unicode_args_without_exposing_raw_args():
|
||||
args_a = {
|
||||
"z": [{"β": "☤", "a": 1}],
|
||||
"a": {"y": 2, "x": "secret-token-value"},
|
||||
}
|
||||
args_b = {
|
||||
"a": {"x": "secret-token-value", "y": 2},
|
||||
"z": [{"a": 1, "β": "☤"}],
|
||||
}
|
||||
|
||||
assert canonical_tool_args(args_a) == canonical_tool_args(args_b)
|
||||
sig_a = ToolCallSignature.from_call("web_search", args_a)
|
||||
sig_b = ToolCallSignature.from_call("web_search", args_b)
|
||||
|
||||
assert sig_a == sig_b
|
||||
assert len(sig_a.args_hash) == 64
|
||||
metadata = sig_a.to_metadata()
|
||||
assert metadata == {"tool_name": "web_search", "args_hash": sig_a.args_hash}
|
||||
assert "secret-token-value" not in json.dumps(metadata)
|
||||
assert "☤" not in json.dumps(metadata)
|
||||
|
||||
|
||||
def test_repeated_identical_failed_call_warns_then_blocks_before_third_execution():
|
||||
controller = ToolCallGuardrailController(
|
||||
ToolCallGuardrailConfig(
|
||||
exact_failure_warn_after=2,
|
||||
exact_failure_block_after=2,
|
||||
same_tool_failure_halt_after=99,
|
||||
)
|
||||
)
|
||||
args = {"query": "same"}
|
||||
|
||||
assert controller.before_call("web_search", args).action == "allow"
|
||||
first = controller.after_call("web_search", args, '{"error":"boom"}', failed=True)
|
||||
assert first.action == "allow"
|
||||
|
||||
assert controller.before_call("web_search", args).action == "allow"
|
||||
second = controller.after_call("web_search", args, '{"error":"boom"}', failed=True)
|
||||
assert second.action == "warn"
|
||||
assert second.code == "repeated_exact_failure_warning"
|
||||
assert second.count == 2
|
||||
|
||||
blocked = controller.before_call("web_search", args)
|
||||
assert blocked.action == "block"
|
||||
assert blocked.code == "repeated_exact_failure_block"
|
||||
assert blocked.tool_name == "web_search"
|
||||
assert blocked.count == 2
|
||||
|
||||
|
||||
def test_success_resets_exact_signature_failure_streak():
|
||||
controller = ToolCallGuardrailController(
|
||||
ToolCallGuardrailConfig(exact_failure_block_after=2, same_tool_failure_halt_after=99)
|
||||
)
|
||||
args = {"query": "same"}
|
||||
|
||||
controller.after_call("web_search", args, '{"error":"boom"}', failed=True)
|
||||
controller.after_call("web_search", args, '{"ok":true}', failed=False)
|
||||
|
||||
assert controller.before_call("web_search", args).action == "allow"
|
||||
controller.after_call("web_search", args, '{"error":"boom"}', failed=True)
|
||||
assert controller.before_call("web_search", args).action == "allow"
|
||||
|
||||
|
||||
def test_same_tool_varying_args_failure_streak_warns_then_halts_independent_of_exact_streak():
|
||||
controller = ToolCallGuardrailController(
|
||||
ToolCallGuardrailConfig(
|
||||
exact_failure_block_after=99,
|
||||
same_tool_failure_warn_after=2,
|
||||
same_tool_failure_halt_after=3,
|
||||
)
|
||||
)
|
||||
|
||||
first = controller.after_call("terminal", {"command": "cmd-1"}, '{"exit_code":1}', failed=True)
|
||||
assert first.action == "allow"
|
||||
second = controller.after_call("terminal", {"command": "cmd-2"}, '{"exit_code":1}', failed=True)
|
||||
assert second.action == "warn"
|
||||
assert second.code == "same_tool_failure_warning"
|
||||
third = controller.after_call("terminal", {"command": "cmd-3"}, '{"exit_code":1}', failed=True)
|
||||
assert third.action == "halt"
|
||||
assert third.code == "same_tool_failure_halt"
|
||||
assert third.count == 3
|
||||
|
||||
|
||||
def test_idempotent_no_progress_repeated_result_warns_then_blocks_future_repeat():
|
||||
controller = ToolCallGuardrailController(
|
||||
ToolCallGuardrailConfig(no_progress_warn_after=2, no_progress_block_after=2)
|
||||
)
|
||||
args = {"path": "/tmp/same.txt"}
|
||||
result = "same file contents"
|
||||
|
||||
assert controller.before_call("read_file", args).action == "allow"
|
||||
assert controller.after_call("read_file", args, result, failed=False).action == "allow"
|
||||
assert controller.before_call("read_file", args).action == "allow"
|
||||
warn = controller.after_call("read_file", args, result, failed=False)
|
||||
assert warn.action == "warn"
|
||||
assert warn.code == "idempotent_no_progress_warning"
|
||||
|
||||
blocked = controller.before_call("read_file", args)
|
||||
assert blocked.action == "block"
|
||||
assert blocked.code == "idempotent_no_progress_block"
|
||||
|
||||
|
||||
def test_mutating_or_unknown_tools_are_not_blocked_for_repeated_identical_success_output_by_default():
|
||||
controller = ToolCallGuardrailController(
|
||||
ToolCallGuardrailConfig(no_progress_warn_after=2, no_progress_block_after=2)
|
||||
)
|
||||
|
||||
for _ in range(3):
|
||||
assert controller.before_call("write_file", {"path": "/tmp/x", "content": "x"}).action == "allow"
|
||||
assert controller.after_call("write_file", {"path": "/tmp/x", "content": "x"}, "ok", failed=False).action == "allow"
|
||||
assert controller.before_call("custom_tool", {"x": 1}).action == "allow"
|
||||
assert controller.after_call("custom_tool", {"x": 1}, "ok", failed=False).action == "allow"
|
||||
|
||||
|
||||
def test_reset_for_turn_clears_bounded_guardrail_state():
|
||||
controller = ToolCallGuardrailController(
|
||||
ToolCallGuardrailConfig(exact_failure_block_after=2, no_progress_block_after=2)
|
||||
)
|
||||
controller.after_call("web_search", {"query": "same"}, '{"error":"boom"}', failed=True)
|
||||
controller.after_call("web_search", {"query": "same"}, '{"error":"boom"}', failed=True)
|
||||
controller.after_call("read_file", {"path": "/tmp/x"}, "same", failed=False)
|
||||
controller.after_call("read_file", {"path": "/tmp/x"}, "same", failed=False)
|
||||
|
||||
assert controller.before_call("web_search", {"query": "same"}).action == "block"
|
||||
assert controller.before_call("read_file", {"path": "/tmp/x"}).action == "block"
|
||||
|
||||
controller.reset_for_turn()
|
||||
|
||||
assert controller.before_call("web_search", {"query": "same"}).action == "allow"
|
||||
assert controller.before_call("read_file", {"path": "/tmp/x"}).action == "allow"
|
||||
Loading…
Add table
Add a link
Reference in a new issue