fix: resolve lazy session creation regressions (#18370 fallout) (#20363)

Fix three regressions introduced by PR #18370 (lazy session creation): 1. _finalize_session() uses stale session_key after compression (#20001) 2. session_key not synced after auto-compression in run_conversation (#20001) 3. pending_title ValueError leaves title wedged forever (#19029) 4. Gateway silently swallows null responses when agent did work (#18765) 5. One-time cleanup for accumulated ghost compression continuations (#20001) Changes: - tui_gateway/server.py: _finalize_session() now uses agent.session_id (falls back to session_key when agent is None). Refactor _sync_session_key_after_compress() with clear_pending_title and restart_slash_worker policy flags. Call it post-run_conversation() to sync session_key after auto-compression. Add ValueError handler to pending_title flush. - gateway/run.py: Extract _normalize_empty_agent_response() helper that consolidates failed/partial/null response handling. Surfaces user-facing error when agent did work (api_calls > 0) but returned no text. - hermes_state.py: Add finalize_orphaned_compression_sessions() — marks ghost continuation sessions as ended (non-destructive, preserves data). - cli.py: One-time startup migration for orphaned compression sessions. Test changes: - tests/test_tui_gateway_server.py: Update pending_title ValueError test for post-#18370 architecture (title applied post-message, not at create). - tests/test_lazy_session_regressions.py: 14 new regression tests covering all fixed paths.
2026-05-08 03:01:47 +00:00 · 2026-05-06 01:11:49 +05:30 · 2026-05-06 01:11:49 +05:30 · 3b750715a3
commit 3b750715a3
parent 0397be5939
6 changed files with 809 additions and 74 deletions
--- a/gateway/run.py
+++ b/gateway/run.py
@ -939,6 +939,52 @@ import weakref as _weakref
 _gateway_runner_ref: _weakref.ref = lambda: None


+def _normalize_empty_agent_response(
+    agent_result: dict,
+    response: str,
+    *,
+    history_len: int = 0,
+) -> str:
+    """Normalize empty/None agent responses into user-facing messages.
+
+    Consolidates the existing ``failed`` handler and adds a catch-all for
+    the case where the agent did work (api_calls > 0) but returned no text.
+    Fix for #18765.
+    """
+    if response:
+        return response
+
+    if agent_result.get("failed"):
+        error_detail = agent_result.get("error", "unknown error")
+        error_str = str(error_detail).lower()
+        is_context_failure = any(
+            p in error_str
+            for p in ("context", "token", "too large", "too long", "exceed", "payload")
+        ) or ("400" in error_str and history_len > 50)
+        if is_context_failure:
+            return (
+                "⚠️ Session too large for the model's context window.\n"
+                "Use /compact to compress the conversation, or "
+                "/reset to start fresh."
+            )
+        return (
+            f"The request failed: {str(error_detail)[:300]}\n"
+            "Try again or use /reset to start a fresh session."
+        )
+
+    api_calls = int(agent_result.get("api_calls", 0) or 0)
+    if api_calls > 0 and not agent_result.get("interrupted"):
+        if agent_result.get("partial"):
+            err = agent_result.get("error", "processing incomplete")
+            return f"⚠️ Processing stopped: {str(err)[:200]}. Try again."
+        return (
+            "⚠️ Processing completed but no response was generated. "
+            "This may be a transient error — try sending your message again."
+        )
+
+    return response
+
+
 class GatewayRunner:
    """
    Main gateway controller.
@ -6439,33 +6485,11 @@ class GatewayRunner:
                        session_key, _e,
                    )

-            # Surface error details when the agent failed silently (final_response=None)
-            if not response and agent_result.get("failed"):
-                error_detail = agent_result.get("error", "unknown error")
-                error_str = str(error_detail).lower()
-
-                # Detect context-overflow failures and give specific guidance.
-                # Generic 400 "Error" from Anthropic with large sessions is the
-                # most common cause of this (#1630).
-                _is_ctx_fail = any(p in error_str for p in (
-                    "context", "token", "too large", "too long",
-                    "exceed", "payload",
-                )) or (
-                    "400" in error_str
-                    and len(history) > 50
-                )
-
-                if _is_ctx_fail:
-                    response = (
-                        "⚠️ Session too large for the model's context window.\n"
-                        "Use /compact to compress the conversation, or "
-                        "/reset to start fresh."
-                    )
-                else:
-                    response = (
-                        f"The request failed: {str(error_detail)[:300]}\n"
-                        "Try again or use /reset to start a fresh session."
-                    )
+            # Normalize empty responses: surface errors, partial failures, and
+            # the case where agent did work but returned no text. Fix for #18765.
+            response = _normalize_empty_agent_response(
+                agent_result, response, history_len=len(history),
+            )

            # If the agent's session_id changed during compression, update
            # session_entry so transcript writes below go to the right session.