fix(tui): reject history-mutating commands while session is running (#12416)

Fixes silent data loss in the TUI when /undo, /compress, /retry, or rollback.restore runs during an in-flight agent turn. The version- guard at prompt.submit:1449 would fail the version check and silently skip writing the agent's result — UI showed the assistant reply but DB / backend history never received it, causing UI↔backend desync that persisted across session resume. Changes (tui_gateway/server.py): - session.undo, session.compress, /retry, rollback.restore (full-history only — file-scoped rollbacks still allowed): reject with 4009 when session.running is True. Users can /interrupt first. - prompt.submit: on history_version mismatch (defensive backstop), attach a 'warning' field to message.complete and log to stderr instead of silently dropping the agent's output. The UI can surface the warning to the user; the operator can spot it in logs. Tests (tests/test_tui_gateway_server.py): 6 new cases. - test_session_undo_rejects_while_running - test_session_undo_allowed_when_idle (regression guard) - test_session_compress_rejects_while_running - test_rollback_restore_rejects_full_history_while_running - test_prompt_submit_history_version_mismatch_surfaces_warning - test_prompt_submit_history_version_match_persists_normally (regression) Validated: against unpatched server.py the three 'rejects_while_running' tests fail and the version-mismatch test fails (no 'warning' field). With the fix, all 6 pass, all 33 tests in the file pass, 74 TUI tests in total pass. Live E2E against the live Python environment confirmed all 5 patches present and guards enforce 4009 exactly as designed.
2026-06-12 08:51:53 +00:00 · 2026-04-18 22:30:10 -07:00 · 2026-04-18 22:30:10 -07:00 · bf5d7462ba
commit bf5d7462ba
parent 3a6351454b
2 changed files with 208 additions and 1 deletions
--- a/tests/test_tui_gateway_server.py
+++ b/tests/test_tui_gateway_server.py
@ -546,3 +546,169 @@ def test_session_info_includes_mcp_servers(monkeypatch):

    assert info["mcp_servers"] == fake_status

+
+# ---------------------------------------------------------------------------
+# History-mutating commands must reject while session.running is True.
+# Without these guards, prompt.submit's post-run history write either
+# clobbers the mutation (version matches) or silently drops the agent's
+# output (version mismatch) — both produce UI<->backend state desync.
+# ---------------------------------------------------------------------------
+
+
+def test_session_undo_rejects_while_running():
+    """Fix for TUI silent-drop #1: /undo must not mutate history
+    while the agent is mid-turn — would either clobber the undo or
+    cause prompt.submit to silently drop the agent's response."""
+    server._sessions["sid"] = _session(running=True, history=[
+        {"role": "user", "content": "hi"},
+        {"role": "assistant", "content": "hello"},
+    ])
+    try:
+        resp = server.handle_request(
+            {"id": "1", "method": "session.undo", "params": {"session_id": "sid"}}
+        )
+        assert resp.get("error"), "session.undo should reject while running"
+        assert resp["error"]["code"] == 4009
+        assert "session busy" in resp["error"]["message"]
+        # History must be unchanged
+        assert len(server._sessions["sid"]["history"]) == 2
+    finally:
+        server._sessions.pop("sid", None)
+
+
+def test_session_undo_allowed_when_idle():
+    """Regression guard: when not running, /undo still works."""
+    server._sessions["sid"] = _session(running=False, history=[
+        {"role": "user", "content": "hi"},
+        {"role": "assistant", "content": "hello"},
+    ])
+    try:
+        resp = server.handle_request(
+            {"id": "1", "method": "session.undo", "params": {"session_id": "sid"}}
+        )
+        assert resp.get("result"), f"got error: {resp.get('error')}"
+        assert resp["result"]["removed"] == 2
+        assert server._sessions["sid"]["history"] == []
+    finally:
+        server._sessions.pop("sid", None)
+
+
+def test_session_compress_rejects_while_running(monkeypatch):
+    server._sessions["sid"] = _session(running=True)
+    try:
+        resp = server.handle_request(
+            {"id": "1", "method": "session.compress", "params": {"session_id": "sid"}}
+        )
+        assert resp.get("error")
+        assert resp["error"]["code"] == 4009
+    finally:
+        server._sessions.pop("sid", None)
+
+
+def test_rollback_restore_rejects_full_history_while_running(monkeypatch):
+    """Full-history rollback must reject; file-scoped rollback still allowed."""
+    server._sessions["sid"] = _session(running=True)
+    try:
+        resp = server.handle_request(
+            {"id": "1", "method": "rollback.restore", "params": {"session_id": "sid", "hash": "abc"}}
+        )
+        assert resp.get("error"), "full-history rollback should reject while running"
+        assert resp["error"]["code"] == 4009
+    finally:
+        server._sessions.pop("sid", None)
+
+
+def test_prompt_submit_history_version_mismatch_surfaces_warning(monkeypatch):
+    """Fix for TUI silent-drop #2: the defensive backstop at prompt.submit
+    must attach a 'warning' to message.complete when history was
+    mutated externally during the turn (instead of silently dropping
+    the agent's output)."""
+    # Agent bumps history_version itself mid-run to simulate an external
+    # mutation slipping past the guards.
+    session_ref = {"s": None}
+
+    class _RacyAgent:
+        def run_conversation(self, prompt, conversation_history=None, stream_callback=None):
+            # Simulate: something external bumped history_version
+            # while we were running.
+            with session_ref["s"]["history_lock"]:
+                session_ref["s"]["history_version"] += 1
+            return {"final_response": "agent reply", "messages": [{"role": "assistant", "content": "agent reply"}]}
+
+    class _ImmediateThread:
+        def __init__(self, target=None, daemon=None):
+            self._target = target
+
+        def start(self):
+            self._target()
+
+    server._sessions["sid"] = _session(agent=_RacyAgent())
+    session_ref["s"] = server._sessions["sid"]
+    emits: list[tuple] = []
+    try:
+        monkeypatch.setattr(server.threading, "Thread", _ImmediateThread)
+        monkeypatch.setattr(server, "_get_usage", lambda _a: {})
+        monkeypatch.setattr(server, "render_message", lambda _t, _c: "")
+        monkeypatch.setattr(server, "_emit", lambda *a: emits.append(a))
+
+        resp = server.handle_request(
+            {"id": "1", "method": "prompt.submit", "params": {"session_id": "sid", "text": "hi"}}
+        )
+        assert resp.get("result"), f"got error: {resp.get('error')}"
+
+        # History should NOT contain the agent's output (version mismatch)
+        assert server._sessions["sid"]["history"] == []
+
+        # message.complete must carry a 'warning' so the UI / operator
+        # knows the output was not persisted.
+        complete_calls = [a for a in emits if a[0] == "message.complete"]
+        assert len(complete_calls) == 1
+        _, _, payload = complete_calls[0]
+        assert "warning" in payload, (
+            "message.complete must include a 'warning' field on "
+            "history_version mismatch — otherwise the UI silently "
+            "shows output that was never persisted"
+        )
+        assert "not saved" in payload["warning"].lower() or "changed" in payload["warning"].lower()
+    finally:
+        server._sessions.pop("sid", None)
+
+
+def test_prompt_submit_history_version_match_persists_normally(monkeypatch):
+    """Regression guard: the backstop does not affect the happy path."""
+    class _Agent:
+        def run_conversation(self, prompt, conversation_history=None, stream_callback=None):
+            return {"final_response": "reply", "messages": [{"role": "assistant", "content": "reply"}]}
+
+    class _ImmediateThread:
+        def __init__(self, target=None, daemon=None):
+            self._target = target
+
+        def start(self):
+            self._target()
+
+    server._sessions["sid"] = _session(agent=_Agent())
+    emits: list[tuple] = []
+    try:
+        monkeypatch.setattr(server.threading, "Thread", _ImmediateThread)
+        monkeypatch.setattr(server, "_get_usage", lambda _a: {})
+        monkeypatch.setattr(server, "render_message", lambda _t, _c: "")
+        monkeypatch.setattr(server, "_emit", lambda *a: emits.append(a))
+
+        resp = server.handle_request(
+            {"id": "1", "method": "prompt.submit", "params": {"session_id": "sid", "text": "hi"}}
+        )
+        assert resp.get("result")
+
+        # History was written
+        assert server._sessions["sid"]["history"] == [{"role": "assistant", "content": "reply"}]
+        assert server._sessions["sid"]["history_version"] == 1
+
+        # No warning should be attached
+        complete_calls = [a for a in emits if a[0] == "message.complete"]
+        assert len(complete_calls) == 1
+        _, _, payload = complete_calls[0]
+        assert "warning" not in payload
+    finally:
+        server._sessions.pop("sid", None)
+
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@ -1224,6 +1224,13 @@ def _(rid, params: dict) -> dict:
    session, err = _sess(params, rid)
    if err:
        return err
+    # Reject during an in-flight turn.  If we mutated history while
+    # the agent thread is running, prompt.submit's post-run history
+    # write would either clobber the undo (version matches) or
+    # silently drop the agent's output (version mismatch, see below).
+    # Neither is what the user wants — make them /interrupt first.
+    if session.get("running"):
+        return _err(rid, 4009, "session busy — /interrupt the current turn before /undo")
    removed = 0
    with session["history_lock"]:
        history = session.get("history", [])
@ -1243,6 +1250,8 @@ def _(rid, params: dict) -> dict:
    session, err = _sess(params, rid)
    if err:
        return err
+    if session.get("running"):
+        return _err(rid, 4009, "session busy — /interrupt the current turn before /compress")
    try:
        with session["history_lock"]:
            removed, usage = _compress_session_history(session, str(params.get("focus_topic", "") or "").strip())
@ -1443,12 +1452,33 @@ def _(rid, params: dict) -> dict:
            )

            last_reasoning = None
+            status_note = None
            if isinstance(result, dict):
                if isinstance(result.get("messages"), list):
                    with session["history_lock"]:
-                        if int(session.get("history_version", 0)) == history_version:
+                        current_version = int(session.get("history_version", 0))
+                        if current_version == history_version:
                            session["history"] = result["messages"]
                            session["history_version"] = history_version + 1
+                        else:
+                            # History mutated externally during the turn
+                            # (undo/compress/retry/rollback now guard on
+                            # session.running, but this is the defensive
+                            # backstop for any path that slips past).
+                            # Surface the desync rather than silently
+                            # dropping the agent's output — the UI can
+                            # show the response and warn that it was
+                            # not persisted.
+                            print(
+                                f"[tui_gateway] prompt.submit: history_version mismatch "
+                                f"(expected={history_version} current={current_version}) — "
+                                f"agent output NOT written to session history",
+                                file=sys.stderr,
+                            )
+                            status_note = (
+                                "History changed during this turn — the response above is visible "
+                                "but was not saved to session history."
+                            )
                raw = result.get("final_response", "")
                status = "interrupted" if result.get("interrupted") else "error" if result.get("error") else "complete"
                lr = result.get("last_reasoning")
@ -1461,6 +1491,8 @@ def _(rid, params: dict) -> dict:
            payload = {"text": raw, "usage": _get_usage(agent), "status": status}
            if last_reasoning:
                payload["reasoning"] = last_reasoning
+            if status_note:
+                payload["warning"] = status_note
            rendered = render_message(raw, cols)
            if rendered:
                payload["rendered"] = rendered
@ -2168,6 +2200,8 @@ def _(rid, params: dict) -> dict:
    if name == "retry":
        if not session:
            return _err(rid, 4001, "no active session to retry")
+        if session.get("running"):
+            return _err(rid, 4009, "session busy — /interrupt the current turn before /retry")
        history = session.get("history", [])
        if not history:
            return _err(rid, 4018, "no previous user message to retry")
@ -2578,6 +2612,13 @@ def _(rid, params: dict) -> dict:
    file_path = params.get("file_path", "")
    if not target:
        return _err(rid, 4014, "hash required")
+    # Full-history rollback mutates session history.  Rejecting during
+    # an in-flight turn prevents prompt.submit from silently dropping
+    # the agent's output (version mismatch path) or clobbering the
+    # rollback (version-matches path).  A file-scoped rollback only
+    # touches disk, so we allow it.
+    if not file_path and session.get("running"):
+        return _err(rid, 4009, "session busy — /interrupt the current turn before full rollback.restore")
    try:
        def go(mgr, cwd):
            resolved = _resolve_checkpoint_hash(mgr, cwd, target)