fix(computer-use): skip capture_after when action failed (ok=False)

_maybe_follow_capture() issued a follow-up screenshot unconditionally
when capture_after=True, even when res.ok=False. The model then received
a normal-looking screenshot alongside an error message, and in practice
it often ignored ok=False and proceeded as if the action had succeeded.

Fix: return _text_response(res) early when res.ok is False so the model
receives only the error and can decide how to recover.

Tests added:
- test_capture_after_skipped_when_action_failed: patches click to return
  ok=False and asserts no capture call is issued.
- test_capture_after_fires_when_action_succeeds: ensures the happy path
  still triggers the follow-up capture.
This commit is contained in:
Rodrigo 2026-05-09 15:19:26 -03:00 committed by Teknium
parent 07b7cf6fe4
commit fbdca64f73
2 changed files with 38 additions and 0 deletions

View file

@ -240,6 +240,39 @@ class TestDispatch:
out = handle_computer_use({"action": "set_value"})
parsed = json.loads(out)
assert "error" in parsed
def test_capture_after_skipped_when_action_failed(self, noop_backend):
"""capture_after must not fire when res.ok=False (regression guard).
A follow-up screenshot after a failed action shows the screen in a
normal state, misleading the model into thinking the action succeeded.
"""
from unittest.mock import patch
from tools.computer_use.backend import ActionResult
from tools.computer_use.tool import handle_computer_use
# Make click() return a failure.
with patch.object(noop_backend, "click",
return_value=ActionResult(ok=False, action="click",
message="element not found")):
out = handle_computer_use({"action": "click", "element": 99,
"capture_after": True})
parsed = json.loads(out)
# Should return the error, not a multimodal capture.
assert parsed.get("ok") is False
assert parsed.get("action") == "click"
# No follow-up capture should have been issued.
capture_calls = [c for c in noop_backend.calls if c[0] == "capture"]
assert len(capture_calls) == 0, "capture must not be called after a failed action"
def test_capture_after_fires_when_action_succeeds(self, noop_backend):
"""capture_after must trigger for successful actions."""
from tools.computer_use.tool import handle_computer_use
out = handle_computer_use({"action": "click", "element": 1,
"capture_after": True})
# Noop backend returns ok=True, so capture should have been called.
capture_calls = [c for c in noop_backend.calls if c[0] == "capture"]
assert len(capture_calls) == 1
# ---------------------------------------------------------------------------

View file

@ -675,6 +675,11 @@ def _maybe_follow_capture(
) -> Any:
if not do_capture:
return _text_response(res)
# Skip the follow-up capture when the action itself failed: showing a
# normal-looking screenshot after a failure misleads the model into thinking
# the action succeeded. Return the error text instead.
if not res.ok:
return _text_response(res)
try:
# Preserve the app context established by the preceding capture/focus_app so
# that capture_after=True re-captures the same app rather than the frontmost