diff --git a/agent/agent_init.py b/agent/agent_init.py
index a5d27c0b73d..e0846291ad6 100644
--- a/agent/agent_init.py
+++ b/agent/agent_init.py
@@ -1466,7 +1466,13 @@ def init_agent(
     # Gateway status_callback is not yet wired, so any warning is stored
     # in _compression_warning and replayed in the first run_conversation().
     agent._compression_warning = None
-    agent._check_compression_model_feasibility()
+    # Lazy feasibility check: deferred to the first turn that approaches the
+    # compression threshold. Running it eagerly here costs ~400ms cold (network
+    # probe of the auxiliary provider chain + /models lookup) on every agent
+    # init, including short ``chat -q`` runs that never reach the threshold.
+    # ``ensure_compression_feasibility_checked`` (called from
+    # ``run_conversation``'s preflight) runs it at most once per agent.
+    agent._compression_feasibility_checked = False
 
     # Snapshot primary runtime for per-turn restoration.  When fallback
     # activates during a turn, the next turn restores these values so the
diff --git a/agent/conversation_compression.py b/agent/conversation_compression.py
index 3f6a1ecbfac..a3a9ba1d6fb 100644
--- a/agent/conversation_compression.py
+++ b/agent/conversation_compression.py
@@ -281,6 +281,19 @@ def compress_context(
         prompt — the session is NOT rotated.  Callers should detect the
         no-op via ``len(returned) == len(input)`` and stop the retry loop.
     """
+    # Lazy feasibility check — run the auxiliary-provider probe + context
+    # length lookup just-in-time on the first compression attempt instead of
+    # at AIAgent.__init__. Saves ~400ms cold off every short session that
+    # never reaches the threshold (the vast majority of ``chat -q`` runs).
+    # The check itself sets ``agent._compression_warning`` so the
+    # status-callback replay machinery still emits the warning to the user
+    # the first time it would matter.
+    if not getattr(agent, "_compression_feasibility_checked", True):
+        try:
+            check_compression_model_feasibility(agent)
+        finally:
+            agent._compression_feasibility_checked = True
+
     _pre_msg_count = len(messages)
     logger.info(
         "context compression started: session=%s messages=%d tokens=~%s model=%s focus=%r",
diff --git a/tests/run_agent/test_compression_feasibility.py b/tests/run_agent/test_compression_feasibility.py
index 3e23f3eb5d3..3be0f0235a3 100644
--- a/tests/run_agent/test_compression_feasibility.py
+++ b/tests/run_agent/test_compression_feasibility.py
@@ -222,7 +222,14 @@ def test_feasibility_check_ignores_invalid_context_length(mock_get_client, mock_
 
 
 def test_init_feasibility_check_uses_aux_context_override_from_config():
-    """Real AIAgent init should cache and forward auxiliary.compression.context_length."""
+    """Lazy feasibility check should cache and forward auxiliary.compression.context_length.
+
+    NB: feasibility check is deferred from AIAgent.__init__ to the first
+    actual compression attempt (saves ~400ms cold startup on short sessions
+    that never trigger compression). The test drives the check explicitly
+    via ``agent._check_compression_model_feasibility()`` to assert the
+    config-override threading.
+    """
 
     class _StubCompressor:
         def __init__(self, *args, **kwargs):
@@ -264,7 +271,15 @@ def test_init_feasibility_check_uses_aux_context_override_from_config():
             skip_memory=True,
         )
 
-    assert agent._aux_compression_context_length_config == 1_000_000
+        # Config override is captured eagerly in __init__ (still needed
+        # because the threshold-derivation logic at construction time
+        # consults it).
+        assert agent._aux_compression_context_length_config == 1_000_000
+
+        # The expensive feasibility probe is deferred. Drive it manually
+        # to validate the call shape still forwards the override correctly.
+        agent._check_compression_model_feasibility()
+
     mock_ctx_len.assert_called_once_with(
         "custom/big-model",
         base_url="http://custom-endpoint:8080/v1",