feat(cli): wrap /compress in _busy_command to block input during compression

Before this, typing during /compress was accepted by the classic CLI prompt and landed in the next prompt after compression finished, effectively consuming a keystroke for a prompt that was about to be replaced. Wrapping the body in self._busy_command('Compressing context...') blocks input rendering for the duration, matching the pattern /skills install and other slow commands already use. Salvages the useful part of #10303 (@iRonin). The `_compressing` flag added to run_agent.py in the original PR was dead code (set in 3 spots, read nowhere — not by cli.py, not by run_agent.py, not by the Ink TUI which doesn't use _busy_command at all) and was dropped.
2026-04-25 00:51:20 +00:00 · 2026-04-24 15:19:44 -07:00 · 2026-04-24 15:19:44 -07:00 · fd3864d8bd
commit fd3864d8bd
parent 8ea389a7f8
1 changed files with 44 additions and 43 deletions
--- a/cli.py
+++ b/cli.py
@ -7011,51 +7011,52 @@ class HermesCLI:
                focus_topic = parts[1].strip()
        original_count = len(self.conversation_history)
-        try:
+        with self._busy_command("Compressing context..."):
-            from agent.model_metadata import estimate_messages_tokens_rough
+            try:
-            from agent.manual_compression_feedback import summarize_manual_compression
+                from agent.model_metadata import estimate_messages_tokens_rough
-            original_history = list(self.conversation_history)
+                from agent.manual_compression_feedback import summarize_manual_compression
-            approx_tokens = estimate_messages_tokens_rough(original_history)
+                original_history = list(self.conversation_history)
-            if focus_topic:
+                approx_tokens = estimate_messages_tokens_rough(original_history)
-                print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens), "
+                if focus_topic:
-                      f"focus: \"{focus_topic}\"...")
+                    print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens), "
-            else:
+                          f"focus: \"{focus_topic}\"...")
-                print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
+                else:
                    print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
-            compressed, _ = self.agent._compress_context(
+                compressed, _ = self.agent._compress_context(
-                original_history,
+                    original_history,
-                self.agent._cached_system_prompt or "",
+                    self.agent._cached_system_prompt or "",
-                approx_tokens=approx_tokens,
+                    approx_tokens=approx_tokens,
-                focus_topic=focus_topic or None,
+                    focus_topic=focus_topic or None,
-            )
+                )
-            self.conversation_history = compressed
+                self.conversation_history = compressed
-            # _compress_context ends the old session and creates a new child
+                # _compress_context ends the old session and creates a new child
-            # session on the agent (run_agent.py::_compress_context). Sync the
+                # session on the agent (run_agent.py::_compress_context). Sync the
-            # CLI's session_id so /status, /resume, exit summary, and title
+                # CLI's session_id so /status, /resume, exit summary, and title
-            # generation all point at the live continuation session, not the
+                # generation all point at the live continuation session, not the
-            # ended parent. Without this, subsequent end_session() calls target
+                # ended parent. Without this, subsequent end_session() calls target
-            # the already-closed parent and the child is orphaned.
+                # the already-closed parent and the child is orphaned.
-            if (
+                if (
-                getattr(self.agent, "session_id", None)
+                    getattr(self.agent, "session_id", None)
-                and self.agent.session_id != self.session_id
+                    and self.agent.session_id != self.session_id
-            ):
+                ):
-                self.session_id = self.agent.session_id
+                    self.session_id = self.agent.session_id
-                self._pending_title = None
+                    self._pending_title = None
-            new_tokens = estimate_messages_tokens_rough(self.conversation_history)
+                new_tokens = estimate_messages_tokens_rough(self.conversation_history)
-            summary = summarize_manual_compression(
+                summary = summarize_manual_compression(
-                original_history,
+                    original_history,
-                self.conversation_history,
+                    self.conversation_history,
-                approx_tokens,
+                    approx_tokens,
-                new_tokens,
+                    new_tokens,
-            )
+                )
-            icon = "🗜️" if summary["noop"] else "✅"
+                icon = "🗜️" if summary["noop"] else "✅"
-            print(f"  {icon} {summary['headline']}")
+                print(f"  {icon} {summary['headline']}")
-            print(f"     {summary['token_line']}")
+                print(f"     {summary['token_line']}")
-            if summary["note"]:
+                if summary["note"]:
-                print(f"     {summary['note']}")
+                    print(f"     {summary['note']}")
-        except Exception as e:
+            except Exception as e:
-            print(f"  ❌ Compression failed: {e}")
+                print(f"  ❌ Compression failed: {e}")
    def _handle_debug_command(self):
        """Handle /debug — upload debug report + logs and print paste URLs."""