diff --git a/run_agent.py b/run_agent.py
index b60f6c43ce6..906f706d08a 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -9324,6 +9324,46 @@ class AIAgent:
             )
         return transformed
 
+    def _tool_result_content_for_active_model(self, tool_name: str, result: Any) -> Any:
+        """Return the tool message content that is safe for the active model.
+
+        Multimodal tool results normally unwrap to OpenAI-style content parts so
+        vision-capable models can inspect screenshots.  Text-only providers must
+        not receive those image parts, because a rejected tool result becomes
+        part of the canonical history and can make the next user turn fail before
+        the agent has a chance to recover.
+        """
+        if not _is_multimodal_tool_result(result):
+            return result
+
+        content = result.get("content") or []
+        if not self._content_has_image_parts(content):
+            return content
+
+        if self._model_supports_vision():
+            return content
+
+        summary = _multimodal_text_summary(result)
+        if tool_name == "computer_use":
+            return json.dumps({
+                "error": (
+                    "computer_use returned screenshot/image content, but the active "
+                    "model/provider does not support image input. Switch to a "
+                    "vision-capable model for desktop computer use, or use browser "
+                    "tools for browser tasks."
+                ),
+                "text_summary": summary,
+            })
+
+        logger.warning(
+            "Tool %s returned image content for non-vision model %s/%s; "
+            "falling back to text summary",
+            tool_name,
+            self.provider,
+            self.model,
+        )
+        return summary
+
     def _try_shrink_image_parts_in_messages(self, api_messages: list) -> bool:
         """Re-encode all native image parts at a smaller size to recover from
         image-too-large errors (Anthropic 5 MB, unknown other providers).
@@ -11096,14 +11136,10 @@ class AIAgent:
             # rather than a raw Python dict.  The Anthropic adapter already
             # accepts content lists; vision-capable OpenAI-compatible servers
             # (mlx-vlm, GPT-4o, …) accept image_url in tool messages natively.
-            # Text-only servers that reject images are handled by the adaptive
-            # _vision_supported recovery in the API retry loop.
+            # Text-only servers get a string-safe fallback here so a rejected
+            # image tool result never poisons canonical session history.
             # String results pass through unchanged.
-            _tool_content = (
-                function_result["content"]
-                if _is_multimodal_tool_result(function_result)
-                else function_result
-            )
+            _tool_content = self._tool_result_content_for_active_model(name, function_result)
             tool_msg = {
                 "role": "tool",
                 "name": name,
@@ -11518,11 +11554,7 @@ class AIAgent:
 
             # Unwrap _multimodal dicts to an OpenAI-style content list
             # (see parallel path for rationale). String results pass through.
-            _tool_content = (
-                function_result["content"]
-                if _is_multimodal_tool_result(function_result)
-                else function_result
-            )
+            _tool_content = self._tool_result_content_for_active_model(function_name, function_result)
             tool_msg = {
                 "role": "tool",
                 "name": function_name,
@@ -13535,6 +13567,11 @@ class AIAgent:
                         # we don't false-trip on other URL validation
                         # errors. (issue #23570)
                         "image_url'. expected",
+                        # DeepSeek's OpenAI-compatible API reports text-only
+                        # request-body variants as:
+                        # "unknown variant `image_url`, expected `text`".
+                        "unknown variant `image_url`, expected `text`",
+                        "unknown variant image_url, expected text",
                     )
                     _err_lower = _err_body.lower()
                     _looks_like_image_rejection = any(
diff --git a/tests/tools/test_computer_use.py b/tests/tools/test_computer_use.py
index 58700dcaaf2..5b035950348 100644
--- a/tests/tools/test_computer_use.py
+++ b/tests/tools/test_computer_use.py
@@ -591,6 +591,67 @@ class TestRunAgentMultimodalHelpers:
             for p in cleaned["content"]
         )
 
+    def test_computer_use_image_result_becomes_error_for_text_only_model(self):
+        from run_agent import AIAgent
+
+        agent = object.__new__(AIAgent)
+        agent.provider = "deepseek"
+        agent.model = "deepseek-v4-pro"
+        result = {
+            "_multimodal": True,
+            "content": [
+                {"type": "text", "text": "screen captured"},
+                {"type": "image_url", "image_url": {"url": "data:image/png;base64,x"}},
+            ],
+            "text_summary": "screen captured",
+        }
+
+        with patch.object(agent, "_model_supports_vision", return_value=False):
+            content = agent._tool_result_content_for_active_model("computer_use", result)
+
+        parsed = json.loads(content)
+        assert "computer_use returned screenshot/image content" in parsed["error"]
+        assert parsed["text_summary"] == "screen captured"
+        assert "image_url" not in content
+
+    def test_computer_use_image_result_preserved_for_vision_model(self):
+        from run_agent import AIAgent
+
+        agent = object.__new__(AIAgent)
+        result = {
+            "_multimodal": True,
+            "content": [
+                {"type": "text", "text": "screen captured"},
+                {"type": "image_url", "image_url": {"url": "data:image/png;base64,x"}},
+            ],
+        }
+
+        with patch.object(agent, "_model_supports_vision", return_value=True):
+            content = agent._tool_result_content_for_active_model("computer_use", result)
+
+        assert content is result["content"]
+        assert any(part.get("type") == "image_url" for part in content)
+
+    def test_other_multimodal_tool_uses_text_summary_for_text_only_model(self):
+        from run_agent import AIAgent
+
+        agent = object.__new__(AIAgent)
+        agent.provider = "custom"
+        agent.model = "text-only"
+        result = {
+            "_multimodal": True,
+            "content": [
+                {"type": "text", "text": "analysis text"},
+                {"type": "image_url", "image_url": {"url": "data:image/png;base64,x"}},
+            ],
+            "text_summary": "analysis summary",
+        }
+
+        with patch.object(agent, "_model_supports_vision", return_value=False):
+            content = agent._tool_result_content_for_active_model("vision_analyze", result)
+
+        assert content == "analysis summary"
+
 
 # ---------------------------------------------------------------------------
 # Universality: does the schema work without Anthropic?