diff --git a/gateway/platforms/matrix.py b/gateway/platforms/matrix.py
index 409d2d6e4..c98f7fd9b 100644
--- a/gateway/platforms/matrix.py
+++ b/gateway/platforms/matrix.py
@@ -418,6 +418,11 @@ class MatrixAdapter(BasePlatformAdapter):
             if isinstance(sync_data, dict):
                 rooms_join = sync_data.get("rooms", {}).get("join", {})
                 self._joined_rooms = set(rooms_join.keys())
+                # Store the next_batch token so incremental syncs start
+                # from where the initial sync left off.
+                nb = sync_data.get("next_batch")
+                if nb:
+                    await client.sync_store.put_next_batch(nb)
                 logger.info(
                     "Matrix: initial sync complete, joined %d rooms",
                     len(self._joined_rooms),
@@ -809,19 +814,40 @@ class MatrixAdapter(BasePlatformAdapter):
 
     async def _sync_loop(self) -> None:
         """Continuously sync with the homeserver."""
+        client = self._client
+        # Resume from the token stored during the initial sync.
+        next_batch = await client.sync_store.get_next_batch()
         while not self._closing:
             try:
-                sync_data = await self._client.sync(timeout=30000)
+                sync_data = await client.sync(
+                    since=next_batch, timeout=30000,
+                )
                 if isinstance(sync_data, dict):
                     # Update joined rooms from sync response.
                     rooms_join = sync_data.get("rooms", {}).get("join", {})
                     if rooms_join:
                         self._joined_rooms.update(rooms_join.keys())
 
-                # Share keys periodically if E2EE is enabled.
-                if self._encryption and getattr(self._client, "crypto", None):
+                    # Advance the sync token so the next request is
+                    # incremental instead of a full initial sync.
+                    nb = sync_data.get("next_batch")
+                    if nb:
+                        next_batch = nb
+                        await client.sync_store.put_next_batch(nb)
+
+                    # Dispatch events to registered handlers so that
+                    # _on_room_message / _on_reaction / _on_invite fire.
                     try:
-                        await self._client.crypto.share_keys()
+                        tasks = client.handle_sync(sync_data)
+                        if tasks:
+                            await asyncio.gather(*tasks)
+                    except Exception as exc:
+                        logger.warning("Matrix: sync event dispatch error: %s", exc)
+
+                # Share keys periodically if E2EE is enabled.
+                if self._encryption and getattr(client, "crypto", None):
+                    try:
+                        await client.crypto.share_keys()
                     except Exception as exc:
                         logger.warning("Matrix: E2EE key share failed: %s", exc)
 
diff --git a/run_agent.py b/run_agent.py
index 2901ef10a..59555ced3 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -700,10 +700,14 @@ class AIAgent:
         except Exception:
             pass
 
-        # Direct OpenAI sessions use the Responses API path.  GPT-5.x tool
-        # calls with reasoning are rejected on /v1/chat/completions, and
-        # Hermes is a tool-using client by default.
-        if self.api_mode == "chat_completions" and self._is_direct_openai_url():
+        # GPT-5.x models require the Responses API path — they are rejected
+        # on /v1/chat/completions by both OpenAI and OpenRouter.  Also
+        # auto-upgrade for direct OpenAI URLs (api.openai.com) since all
+        # newer tool-calling models prefer Responses there.
+        if self.api_mode == "chat_completions" and (
+            self._is_direct_openai_url()
+            or self._model_requires_responses_api(self.model)
+        ):
             self.api_mode = "codex_responses"
 
         # Pre-warm OpenRouter model metadata cache in a background thread.
@@ -1702,6 +1706,21 @@ class AIAgent:
         """Return True when the base URL targets OpenRouter."""
         return "openrouter" in self._base_url_lower
 
+    @staticmethod
+    def _model_requires_responses_api(model: str) -> bool:
+        """Return True for models that require the Responses API path.
+
+        GPT-5.x models are rejected on /v1/chat/completions by both
+        OpenAI and OpenRouter (error: ``unsupported_api_for_model``).
+        Detect these so the correct api_mode is set regardless of
+        which provider is serving the model.
+        """
+        m = model.lower()
+        # Strip vendor prefix (e.g. "openai/gpt-5.4" → "gpt-5.4")
+        if "/" in m:
+            m = m.rsplit("/", 1)[-1]
+        return m.startswith("gpt-5")
+
     def _max_tokens_param(self, value: int) -> dict:
         """Return the correct max tokens kwarg for the current provider.
         
@@ -5251,7 +5270,7 @@ class AIAgent:
             except Exception:
                 pass
 
-            # Determine api_mode from provider / base URL
+            # Determine api_mode from provider / base URL / model
             fb_api_mode = "chat_completions"
             fb_base_url = str(fb_client.base_url)
             if fb_provider == "openai-codex":
@@ -5260,6 +5279,10 @@ class AIAgent:
                 fb_api_mode = "anthropic_messages"
             elif self._is_direct_openai_url(fb_base_url):
                 fb_api_mode = "codex_responses"
+            elif self._model_requires_responses_api(fb_model):
+                # GPT-5.x models need Responses API on every provider
+                # (OpenRouter, Copilot, direct OpenAI, etc.)
+                fb_api_mode = "codex_responses"
 
             old_model = self.model
             self.model = fb_model
@@ -5348,8 +5371,8 @@ class AIAgent:
         to the fallback provider for every subsequent turn.  Calling this at
         the top of ``run_conversation()`` makes fallback turn-scoped.
 
-        The gateway creates a fresh agent per message so this is a no-op
-        there (``_fallback_activated`` is always False at turn start).
+        The gateway caches agents across messages (``_agent_cache`` in
+        ``gateway/run.py``), so this restoration IS needed there too.
         """
         if not self._fallback_activated:
             return False
diff --git a/tests/gateway/test_matrix.py b/tests/gateway/test_matrix.py
index 469bae030..81eea4676 100644
--- a/tests/gateway/test_matrix.py
+++ b/tests/gateway/test_matrix.py
@@ -1041,20 +1041,28 @@ class TestMatrixSyncLoop:
             call_count += 1
             if call_count >= 1:
                 adapter._closing = True
-            return {"rooms": {"join": {"!room:example.org": {}}}}
+            return {"rooms": {"join": {"!room:example.org": {}}}, "next_batch": "s1234"}
 
         mock_crypto = MagicMock()
         mock_crypto.share_keys = AsyncMock()
 
+        mock_sync_store = MagicMock()
+        mock_sync_store.get_next_batch = AsyncMock(return_value=None)
+        mock_sync_store.put_next_batch = AsyncMock()
+
         fake_client = MagicMock()
         fake_client.sync = AsyncMock(side_effect=_sync_once)
         fake_client.crypto = mock_crypto
+        fake_client.sync_store = mock_sync_store
+        fake_client.handle_sync = MagicMock(return_value=[])
         adapter._client = fake_client
 
         await adapter._sync_loop()
 
         fake_client.sync.assert_awaited_once()
         mock_crypto.share_keys.assert_awaited_once()
+        fake_client.handle_sync.assert_called_once()
+        mock_sync_store.put_next_batch.assert_awaited_once_with("s1234")
 
 
 class TestMatrixEncryptedSendFallback:
diff --git a/tests/run_agent/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py
index 635c75fcf..6756ed6fd 100644
--- a/tests/run_agent/test_run_agent_codex_responses.py
+++ b/tests/run_agent/test_run_agent_codex_responses.py
@@ -222,6 +222,12 @@ def test_api_mode_normalizes_provider_case(monkeypatch):
 
 
 def test_api_mode_respects_explicit_openrouter_provider_over_codex_url(monkeypatch):
+    """GPT-5.x models need codex_responses even on OpenRouter.
+
+    OpenRouter rejects GPT-5 models on /v1/chat/completions with
+    ``unsupported_api_for_model``.  The model-level check overrides
+    the provider default.
+    """
     _patch_agent_bootstrap(monkeypatch)
     agent = run_agent.AIAgent(
         model="gpt-5-codex",
@@ -233,7 +239,7 @@ def test_api_mode_respects_explicit_openrouter_provider_over_codex_url(monkeypat
         skip_context_files=True,
         skip_memory=True,
     )
-    assert agent.api_mode == "chat_completions"
+    assert agent.api_mode == "codex_responses"
     assert agent.provider == "openrouter"
 
 
diff --git a/website/docs/user-guide/messaging/feishu.md b/website/docs/user-guide/messaging/feishu.md
index 5a7e06b72..ac4bad239 100644
--- a/website/docs/user-guide/messaging/feishu.md
+++ b/website/docs/user-guide/messaging/feishu.md
@@ -212,7 +212,24 @@ When users click buttons or interact with interactive cards sent by the bot, the
 
 Card action events are dispatched with `MessageType.COMMAND`, so they flow through the normal command processing pipeline.
 
-To use this feature, enable the **Interactive Card** event in your Feishu app's event subscriptions (`card.action.trigger`).
+This is also how **command approval** works — when the agent needs to run a dangerous command, it sends an interactive card with Allow Once / Session / Always / Deny buttons. The user clicks a button, and the card action callback delivers the approval decision back to the agent.
+
+### Required Feishu App Configuration
+
+Interactive cards require **three** configuration steps in the Feishu Developer Console. Missing any of them causes error **200340** when users click card buttons.
+
+1. **Subscribe to the card action event:**
+   In **Event Subscriptions**, add `card.action.trigger` to your subscribed events.
+
+2. **Enable the Interactive Card capability:**
+   In **App Features > Bot**, ensure the **Interactive Card** toggle is enabled. This tells Feishu that your app can receive card action callbacks.
+
+3. **Configure the Card Request URL (webhook mode only):**
+   In **App Features > Bot > Message Card Request URL**, set the URL to the same endpoint as your event webhook (e.g. `https://your-server:8765/feishu/webhook`). In WebSocket mode this is handled automatically by the SDK.
+
+:::warning
+Without all three steps, Feishu will successfully *send* interactive cards (sending only requires `im:message:send` permission), but clicking any button will return error 200340. The card appears to work — the error only surfaces when a user interacts with it.
+:::
 
 ## Media Support
 
@@ -412,6 +429,7 @@ WebSocket and per-group ACL settings are configured via `config.yaml` under `pla
 | Post messages show as plain text | The Feishu API rejected the post payload; this is normal fallback behavior. Check logs for details. |
 | Images/files not received by bot | Grant `im:message` and `im:resource` permission scopes to your Feishu app |
 | Bot identity not auto-detected | Grant `admin:app.info:readonly` scope, or set `FEISHU_BOT_OPEN_ID` / `FEISHU_BOT_NAME` manually |
+| Error 200340 when clicking approval buttons | Enable **Interactive Card** capability and configure **Card Request URL** in the Feishu Developer Console. See [Required Feishu App Configuration](#required-feishu-app-configuration) above. |
 | `Webhook rate limit exceeded` | More than 120 requests/minute from the same IP. This is usually a misconfiguration or loop. |
 
 ## Toolset