diff --git a/agent/conversation_loop.py b/agent/conversation_loop.py
index 8be763513ff..d01d5d4a844 100644
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@@ -2720,6 +2720,61 @@ def run_conversation(
                 # compress history and retry, not abort immediately.
                 status_code = getattr(api_error, "status_code", None)
 
+                # ── Respect disabled auto-compaction on overflow ──────
+                # Ported from anomalyco/opencode#30749.  When the user has
+                # turned auto-compaction off (``compression.enabled: false``),
+                # NO automatic compaction trigger may fire — including the
+                # provider/request-size overflow recovery paths below
+                # (long-context-tier 429, 413 payload-too-large, and
+                # context-overflow).  Without this guard the proactive
+                # threshold path correctly honours the setting (see the
+                # preflight check and the post-response ``should_compress``
+                # gate) but a provider overflow error would still silently
+                # compress + rotate the session, bypassing the user's
+                # explicit choice.  Surface a terminal error instead so the
+                # user can compact manually (``/compress``), start fresh
+                # (``/new``), switch to a larger-context model, or reduce
+                # attachments.  Forced compaction via ``/compress``
+                # (``force=True``) is unaffected — it never reaches this loop.
+                _overflow_reasons = {
+                    FailoverReason.long_context_tier,
+                    FailoverReason.payload_too_large,
+                    FailoverReason.context_overflow,
+                }
+                if (
+                    classified.reason in _overflow_reasons
+                    and not getattr(agent, "compression_enabled", True)
+                ):
+                    agent._flush_status_buffer()
+                    agent._vprint(
+                        f"{agent.log_prefix}❌ Context overflow, but auto-compaction is disabled "
+                        f"(compression.enabled: false).",
+                        force=True,
+                    )
+                    agent._vprint(
+                        f"{agent.log_prefix}   💡 Run /compress to compact manually, /new to start fresh, "
+                        f"switch to a larger-context model, or reduce attachments.",
+                        force=True,
+                    )
+                    logger.error(
+                        f"{agent.log_prefix}Context overflow ({classified.reason.value}) with "
+                        f"auto-compaction disabled — not compressing."
+                    )
+                    agent._persist_session(messages, conversation_history)
+                    return {
+                        "messages": messages,
+                        "completed": False,
+                        "api_calls": api_call_count,
+                        "error": (
+                            "Context overflow and auto-compaction is disabled "
+                            "(compression.enabled: false). Run /compress to compact manually, "
+                            "/new to start fresh, or switch to a larger-context model."
+                        ),
+                        "partial": True,
+                        "failed": True,
+                        "compaction_disabled": True,
+                    }
+
                 # ── Anthropic Sonnet long-context tier gate ───────────
                 # Anthropic returns HTTP 429 "Extra usage is required for
                 # long context requests" when a Claude Max (or similar)
diff --git a/agent/gemini_native_adapter.py b/agent/gemini_native_adapter.py
index b0d903372cd..a0f8e9df548 100644
--- a/agent/gemini_native_adapter.py
+++ b/agent/gemini_native_adapter.py
@@ -33,6 +33,13 @@ logger = logging.getLogger(__name__)
 
 DEFAULT_GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
 
+# Published max output-token ceiling shared by every current Gemini text model
+# (2.5 + 3.x: flash, flash-lite, pro). Used as the default when the caller
+# passes max_tokens=None, because Gemini's native API otherwise applies a low
+# internal default and truncates output (unlike OpenAI-compat endpoints where
+# an omitted limit means full budget).
+GEMINI_DEFAULT_MAX_OUTPUT_TOKENS = 65535
+
 
 def is_native_gemini_base_url(base_url: str) -> bool:
     """Return True when the endpoint speaks Gemini's native REST API."""
@@ -414,6 +421,18 @@ def build_gemini_request(
         generation_config["temperature"] = temperature
     if max_tokens is not None:
         generation_config["maxOutputTokens"] = max_tokens
+    else:
+        # Gemini's native generateContent does NOT treat an omitted
+        # maxOutputTokens as "use the model's full output budget" — it applies
+        # a low internal default and the model stops early with
+        # finishReason=MAX_TOKENS, truncating tool calls mid-stream (Hermes
+        # then retries 3× and refuses the incomplete call). Every current
+        # Gemini text model (2.5 + 3.x, flash / flash-lite / pro) caps at
+        # 65,535 output tokens, so default to that ceiling when the caller
+        # passes None ("unlimited"). See the OpenAI-compat path where omitting
+        # the field genuinely means full budget — that assumption does not
+        # hold on the native API.
+        generation_config["maxOutputTokens"] = GEMINI_DEFAULT_MAX_OUTPUT_TOKENS
     if top_p is not None:
         generation_config["topP"] = top_p
     if stop:
diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py
index 7b1935528d9..0c17e309a8b 100644
--- a/agent/transports/chat_completions.py
+++ b/agent/transports/chat_completions.py
@@ -571,7 +571,28 @@ class ChatCompletionsTransport(ProviderTransport):
                     api_kwargs[k] = v
 
         if extra_body:
-            api_kwargs["extra_body"] = extra_body
+            # Native Gemini (generativelanguage.googleapis.com, non-/openai)
+            # speaks Google's REST schema, not OpenAI's. OpenAI-style extra_body
+            # keys (tags, reasoning, provider, plugins, …) are unknown fields
+            # there and Gemini rejects the whole request with a non-retryable
+            # HTTP 400 ("Invalid JSON payload received. Unknown name 'tags'").
+            # This happens when a profile that emits extra_body (e.g. the Nous
+            # profile's portal `tags`) is active but the resolved endpoint is a
+            # Gemini base_url — typical when only Google credentials are set and
+            # a fallback/aux call lands on Gemini. The native client only reads
+            # thinking_config from extra_body, so drop everything else here.
+            try:
+                from agent.gemini_native_adapter import is_native_gemini_base_url
+                _native_gemini = is_native_gemini_base_url(params.get("base_url"))
+            except Exception:
+                _native_gemini = False
+            if _native_gemini:
+                extra_body = {
+                    k: v for k, v in extra_body.items()
+                    if k in ("thinking_config", "thinkingConfig")
+                }
+            if extra_body:
+                api_kwargs["extra_body"] = extra_body
 
         return api_kwargs
 
diff --git a/tests/agent/test_gemini_native_adapter.py b/tests/agent/test_gemini_native_adapter.py
index 4b066b4f454..4f894c512a6 100644
--- a/tests/agent/test_gemini_native_adapter.py
+++ b/tests/agent/test_gemini_native_adapter.py
@@ -326,3 +326,27 @@ def test_stream_event_translation_keeps_identical_calls_in_distinct_parts():
     assert tool_chunks[0].choices[0].delta.tool_calls[0].index == 0
     assert tool_chunks[1].choices[0].delta.tool_calls[0].index == 1
     assert tool_chunks[0].choices[0].delta.tool_calls[0].id != tool_chunks[1].choices[0].delta.tool_calls[0].id
+
+
+def test_max_tokens_none_defaults_to_gemini_output_ceiling():
+    """max_tokens=None must send the model's full output ceiling, not omit it.
+
+    Gemini's native generateContent applies a low internal default when
+    maxOutputTokens is absent, truncating tool calls mid-stream. Hermes passes
+    None to mean "unlimited", so the adapter must translate that to the
+    published 65,535 ceiling rather than leaving the field unset.
+    """
+    from agent.gemini_native_adapter import (
+        build_gemini_request,
+        GEMINI_DEFAULT_MAX_OUTPUT_TOKENS,
+    )
+
+    req = build_gemini_request(messages=[{"role": "user", "content": "hi"}], max_tokens=None)
+    assert req["generationConfig"]["maxOutputTokens"] == GEMINI_DEFAULT_MAX_OUTPUT_TOKENS == 65535
+
+
+def test_explicit_max_tokens_is_respected():
+    from agent.gemini_native_adapter import build_gemini_request
+
+    req = build_gemini_request(messages=[{"role": "user", "content": "hi"}], max_tokens=4096)
+    assert req["generationConfig"]["maxOutputTokens"] == 4096
diff --git a/tests/agent/transports/test_chat_completions.py b/tests/agent/transports/test_chat_completions.py
index 255d46f4381..0b54fe47059 100644
--- a/tests/agent/transports/test_chat_completions.py
+++ b/tests/agent/transports/test_chat_completions.py
@@ -859,3 +859,53 @@ class TestChatCompletionsCacheStats:
         r = SimpleNamespace(usage=SimpleNamespace(prompt_tokens_details=details))
         result = transport.extract_cache_stats(r)
         assert result == {"cached_tokens": 500, "creation_tokens": 100}
+
+
+class TestChatCompletionsGeminiNativeExtraBodyStrip:
+    """Profile extra_body (e.g. Nous portal tags) must not reach a native
+    Gemini endpoint — Google's REST API rejects unknown fields with HTTP 400.
+    """
+
+    def _nous_profile(self):
+        from providers import get_provider_profile
+        return get_provider_profile("nous")
+
+    def test_tags_stripped_when_endpoint_is_native_gemini(self, transport):
+        kw = transport.build_kwargs(
+            "anthropic/claude-sonnet-4.6",
+            [{"role": "user", "content": "hi"}],
+            None,
+            provider_profile=self._nous_profile(),
+            base_url="https://generativelanguage.googleapis.com/v1beta",
+            session_id="s1",
+            max_tokens=None,
+        )
+        eb = kw.get("extra_body")
+        assert not eb or "tags" not in eb
+
+    def test_tags_preserved_on_nous_endpoint(self, transport):
+        kw = transport.build_kwargs(
+            "hermes-3-405b",
+            [{"role": "user", "content": "hi"}],
+            None,
+            provider_profile=self._nous_profile(),
+            base_url="https://inference.nousresearch.com/v1",
+            session_id="s1",
+            max_tokens=None,
+        )
+        eb = kw.get("extra_body")
+        assert eb and "tags" in eb
+
+    def test_tags_pass_through_on_gemini_openai_compat(self, transport):
+        # /openai compat endpoint is not "native" — unchanged behavior.
+        kw = transport.build_kwargs(
+            "anthropic/claude-sonnet-4.6",
+            [{"role": "user", "content": "hi"}],
+            None,
+            provider_profile=self._nous_profile(),
+            base_url="https://generativelanguage.googleapis.com/v1beta/openai",
+            session_id="s1",
+            max_tokens=None,
+        )
+        eb = kw.get("extra_body")
+        assert eb and "tags" in eb
diff --git a/tests/run_agent/test_413_compression.py b/tests/run_agent/test_413_compression.py
index cadb26c449b..2b8c32e297b 100644
--- a/tests/run_agent/test_413_compression.py
+++ b/tests/run_agent/test_413_compression.py
@@ -94,7 +94,11 @@ def agent():
         a._cached_system_prompt = "You are helpful."
         a._use_prompt_caching = False
         a.tool_delay = 0
-        a.compression_enabled = False
+        # Default matches production (`compression.enabled` defaults to True).
+        # Overflow-recovery tests below verify that 413 / context-overflow
+        # errors DO trigger compression; the disabled-path behavior is
+        # covered explicitly by TestOverflowWithCompactionDisabled.
+        a.compression_enabled = True
         a.save_trajectories = False
         return a
 
@@ -415,6 +419,13 @@ class TestPreflightCompression:
 
     def test_compress_context_emits_lifecycle_status_before_work(self, agent):
         """Direct context compression should tell gateway users why the turn paused."""
+        # This test calls _compress_context directly and asserts the FIRST
+        # status event is the lifecycle "Compacting context" message. With
+        # compaction enabled the lazy feasibility probe would emit an
+        # aux-provider warning first (no aux key in the hermetic test env),
+        # displacing events[0]. The flag value is irrelevant to what this
+        # test asserts, so disable it to suppress the probe.
+        agent.compression_enabled = False
         events = []
         agent.status_callback = lambda ev, msg: events.append((ev, msg))
 
@@ -802,3 +813,95 @@ class TestToolResultPreflightCompression:
 
         mock_compress.assert_called_once()
         assert result["completed"] is True
+
+
+# ---------------------------------------------------------------------------
+# Disabled auto-compaction on overflow (port of anomalyco/opencode#30749)
+# ---------------------------------------------------------------------------
+
+class TestOverflowWithCompactionDisabled:
+    """When ``compression.enabled`` is False, NO automatic compaction may
+    fire — including the provider/request-size overflow recovery paths.
+
+    Ported from anomalyco/opencode#30749: the proactive token-threshold
+    path already honoured the setting, but provider overflow errors
+    (413 payload-too-large, context-overflow, long-context-tier 429) still
+    silently compressed + rotated the session. The fix surfaces a terminal
+    error so the user can compact manually, start fresh, or switch models.
+    """
+
+    @staticmethod
+    def _prefill():
+        return [
+            {"role": "user", "content": "previous question"},
+            {"role": "assistant", "content": "previous answer"},
+        ]
+
+    def test_413_does_not_compress_when_disabled(self, agent):
+        """413 must NOT call _compress_context when compaction is disabled."""
+        agent.compression_enabled = False
+        err_413 = _make_413_error()
+        # If the guard fails, a second (success) response would be consumed.
+        agent.client.chat.completions.create.side_effect = [err_413, _mock_response()]
+
+        with (
+            patch.object(agent, "_compress_context") as mock_compress,
+            patch.object(agent, "_persist_session") as mock_persist,
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            result = agent.run_conversation("hello", conversation_history=self._prefill())
+
+        mock_compress.assert_not_called()
+        mock_persist.assert_called()
+        assert result.get("failed") is True
+        assert result.get("compaction_disabled") is True
+        assert "auto-compaction is disabled" in result["error"]
+
+    def test_context_overflow_does_not_compress_when_disabled(self, agent):
+        """400 'prompt is too long' must NOT compress when compaction disabled."""
+        agent.compression_enabled = False
+        err_400 = Exception(
+            "Error code: 400 - {'type': 'error', 'error': {'type': "
+            "'invalid_request_error', 'message': 'prompt is too long: "
+            "233153 tokens > 200000 maximum'}}"
+        )
+        err_400.status_code = 400
+        agent.client.chat.completions.create.side_effect = [err_400, _mock_response()]
+
+        with (
+            patch.object(agent, "_compress_context") as mock_compress,
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            result = agent.run_conversation("hello", conversation_history=self._prefill())
+
+        mock_compress.assert_not_called()
+        assert result.get("compaction_disabled") is True
+
+    def test_413_still_compresses_when_enabled(self, agent):
+        """Control: with compaction enabled, 413 still triggers compression.
+
+        Guards against the disabled-path guard accidentally swallowing the
+        enabled path.
+        """
+        agent.compression_enabled = True
+        err_413 = _make_413_error()
+        ok_resp = _mock_response(content="Recovered", finish_reason="stop")
+        agent.client.chat.completions.create.side_effect = [err_413, ok_resp]
+
+        with (
+            patch.object(agent, "_compress_context") as mock_compress,
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            mock_compress.return_value = (
+                [{"role": "user", "content": "hello"}], "compressed",
+            )
+            result = agent.run_conversation("hello", conversation_history=self._prefill())
+
+        mock_compress.assert_called_once()
+        assert result["completed"] is True
+        assert result.get("compaction_disabled") is not True
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index e9e7011dd1e..ea66789fdd3 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -3903,6 +3903,7 @@ class TestRunConversation:
     def test_glm_prompt_exceeds_max_length_triggers_compression(self, agent):
         """GLM/Z.AI uses 'Prompt exceeds max length' for context overflow."""
         self._setup_agent(agent)
+        agent.compression_enabled = True  # this test verifies overflow→compression fires
         err_400 = Exception(
             "Error code: 400 - {'error': {'code': '1261', 'message': 'Prompt exceeds max length'}}"
         )
@@ -3937,6 +3938,7 @@ class TestRunConversation:
         to the generic 128K fallback tier.
         """
         self._setup_agent(agent)
+        agent.compression_enabled = True  # this test verifies overflow→compression fires
         agent.provider = "minimax"
         agent.model = "MiniMax-M2.7-highspeed"
         agent.base_url = "https://api.minimax.io/anthropic"
@@ -3982,6 +3984,7 @@ class TestRunConversation:
         rely on compression — see #33669 / PR #33826.
         """
         self._setup_agent(agent)
+        agent.compression_enabled = True  # this test verifies overflow→compression fires
         agent.provider = "openrouter"
         agent.model = "some/unknown-model"
         agent.base_url = "https://openrouter.ai/api/v1"