mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-01 07:01:41 +00:00
fix(computer-use): skip capture_after when action failed (ok=False)
_maybe_follow_capture() issued a follow-up screenshot unconditionally when capture_after=True, even when res.ok=False. The model then received a normal-looking screenshot alongside an error message, and in practice it often ignored ok=False and proceeded as if the action had succeeded. Fix: return _text_response(res) early when res.ok is False so the model receives only the error and can decide how to recover. Tests added: - test_capture_after_skipped_when_action_failed: patches click to return ok=False and asserts no capture call is issued. - test_capture_after_fires_when_action_succeeds: ensures the happy path still triggers the follow-up capture.
This commit is contained in:
parent
07b7cf6fe4
commit
fbdca64f73
2 changed files with 38 additions and 0 deletions
|
|
@ -240,6 +240,39 @@ class TestDispatch:
|
|||
out = handle_computer_use({"action": "set_value"})
|
||||
parsed = json.loads(out)
|
||||
assert "error" in parsed
|
||||
def test_capture_after_skipped_when_action_failed(self, noop_backend):
|
||||
"""capture_after must not fire when res.ok=False (regression guard).
|
||||
|
||||
A follow-up screenshot after a failed action shows the screen in a
|
||||
normal state, misleading the model into thinking the action succeeded.
|
||||
"""
|
||||
from unittest.mock import patch
|
||||
from tools.computer_use.backend import ActionResult
|
||||
from tools.computer_use.tool import handle_computer_use
|
||||
|
||||
# Make click() return a failure.
|
||||
with patch.object(noop_backend, "click",
|
||||
return_value=ActionResult(ok=False, action="click",
|
||||
message="element not found")):
|
||||
out = handle_computer_use({"action": "click", "element": 99,
|
||||
"capture_after": True})
|
||||
|
||||
parsed = json.loads(out)
|
||||
# Should return the error, not a multimodal capture.
|
||||
assert parsed.get("ok") is False
|
||||
assert parsed.get("action") == "click"
|
||||
# No follow-up capture should have been issued.
|
||||
capture_calls = [c for c in noop_backend.calls if c[0] == "capture"]
|
||||
assert len(capture_calls) == 0, "capture must not be called after a failed action"
|
||||
|
||||
def test_capture_after_fires_when_action_succeeds(self, noop_backend):
|
||||
"""capture_after must trigger for successful actions."""
|
||||
from tools.computer_use.tool import handle_computer_use
|
||||
out = handle_computer_use({"action": "click", "element": 1,
|
||||
"capture_after": True})
|
||||
# Noop backend returns ok=True, so capture should have been called.
|
||||
capture_calls = [c for c in noop_backend.calls if c[0] == "capture"]
|
||||
assert len(capture_calls) == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -675,6 +675,11 @@ def _maybe_follow_capture(
|
|||
) -> Any:
|
||||
if not do_capture:
|
||||
return _text_response(res)
|
||||
# Skip the follow-up capture when the action itself failed: showing a
|
||||
# normal-looking screenshot after a failure misleads the model into thinking
|
||||
# the action succeeded. Return the error text instead.
|
||||
if not res.ok:
|
||||
return _text_response(res)
|
||||
try:
|
||||
# Preserve the app context established by the preceding capture/focus_app so
|
||||
# that capture_after=True re-captures the same app rather than the frontmost
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue