diff --git a/agent/chat_completion_helpers.py b/agent/chat_completion_helpers.py
index fca2e3bd005..257eece57bd 100644
--- a/agent/chat_completion_helpers.py
+++ b/agent/chat_completion_helpers.py
@@ -1936,6 +1936,20 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                     ),
                 ))
 
+        # Zero-chunk guard: stream yielded nothing usable — a provider/upstream
+        # error or malformed SSE, not a legitimate empty completion. Raise so the
+        # retry machinery handles it instead of fabricating a successful turn.
+        if (
+            finish_reason is None
+            and not content_parts
+            and not reasoning_parts
+            and not tool_calls_acc
+        ):
+            raise RuntimeError(
+                "Provider returned an empty stream with no finish_reason "
+                "(possible upstream error or malformed SSE response)."
+            )
+
         effective_finish_reason = finish_reason or "stop"
         if has_truncated_tool_args:
             effective_finish_reason = "length"
diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index 0ce9d0c6361..1080256e0ac 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -964,6 +964,10 @@ def parse_available_output_tokens_from_error(error_msg: str) -> Optional[int]:
     is_output_cap_error = (
         "max_tokens" in error_lower
         and ("available_tokens" in error_lower or "available tokens" in error_lower)
+    ) or (
+        # OpenRouter/Nous phrasing of the same condition.
+        "in the output" in error_lower
+        and "maximum context length" in error_lower
     )
     if not is_output_cap_error:
         return None
@@ -982,6 +986,19 @@ def parse_available_output_tokens_from_error(error_msg: str) -> Optional[int]:
             tokens = int(match.group(1))
             if tokens >= 1:
                 return tokens
+
+    # OpenRouter/Nous format: "maximum context length is N … (A of text input,
+    # B of tool input, C in the output)". Available output = ctx - text - tool.
+    _m_ctx = re.search(r'maximum context length is (\d+)', error_lower)
+    _m_parts = re.search(
+        r'\((\d+)\s+of text input,\s*(\d+)\s+of tool input,\s*(\d+)\s+in the output\)',
+        error_lower,
+    )
+    if _m_ctx and _m_parts:
+        _available = int(_m_ctx.group(1)) - int(_m_parts.group(1)) - int(_m_parts.group(2))
+        if _available >= 1:
+            return _available
+
     return None
 
 
diff --git a/tests/test_output_cap_parsing.py b/tests/test_output_cap_parsing.py
new file mode 100644
index 00000000000..4f989622b14
--- /dev/null
+++ b/tests/test_output_cap_parsing.py
@@ -0,0 +1,27 @@
+import pytest
+from agent.model_metadata import parse_available_output_tokens_from_error
+
+
+class TestParseOpenRouterOutputCap:
+    """OpenRouter/Nous phrase the output-cap error as a context breakdown."""
+
+    def test_openrouter_breakdown_format(self):
+        msg = ("This endpoint's maximum context length is 200000 tokens. "
+               "However, you requested about 195000 tokens "
+               "(150000 of text input, 40000 of tool input, 5000 in the output).")
+        # available output = 200000 - 150000 - 40000 = 10000
+        assert parse_available_output_tokens_from_error(msg) == 10000
+
+    def test_anthropic_format_still_works(self):
+        msg = ("max_tokens: 32768 > context_window: 200000 - "
+               "input_tokens: 190000 = available_tokens: 10000")
+        assert parse_available_output_tokens_from_error(msg) == 10000
+
+    def test_non_output_cap_error_returns_none(self):
+        assert parse_available_output_tokens_from_error("some unrelated 400 error") is None
+
+    def test_breakdown_with_no_room_returns_none(self):
+        # ctx - text - tool <= 0 -> None (don't return a non-positive cap)
+        msg = ("maximum context length is 1000 tokens "
+               "(900 of text input, 200 of tool input, 0 in the output)")
+        assert parse_available_output_tokens_from_error(msg) is None