feat: add /btw command for ephemeral side questions (#4161)

Adds /btw <question> — ask a quick follow-up using the current session context without interrupting the main conversation. - Snapshots conversation history, answers with a no-tools agent - Response is not persisted to session history or DB - Runs in a background thread (CLI) / async task (gateway) - Per-session guard prevents concurrent /btw in gateway Implementation: - model_tools.py: enabled_toolsets=[] now correctly means "no tools" (was falsy, fell through to default "all tools") - run_agent.py: persist_session=False gates _persist_session() - cli.py: _handle_btw_command (background thread, Rich panel output) - gateway/run.py: _handle_btw_command + _run_btw_task (async task) - hermes_cli/commands.py: CommandDef for "btw" Inspired by PR #3504 by areu01or00, reimplemented cleanly on current main with the enabled_toolsets=[] fix and without the __btw_no_tools__ hack.
2026-04-25 00:51:20 +00:00 · 2026-03-30 21:10:05 -07:00 · 2026-03-30 21:10:05 -07:00 · 1bd206ea5d
commit 1bd206ea5d
parent f8e1ee10aa
5 changed files with 289 additions and 1 deletions
--- a/cli.py
+++ b/cli.py
@ -3904,6 +3904,8 @@ class HermesCLI:
            self._handle_stop_command()
        elif canonical == "background":
            self._handle_background_command(cmd_original)
        elif canonical == "btw":
            self._handle_btw_command(cmd_original)
        elif canonical == "queue":
            # Extract prompt after "/queue " or "/q "
            parts = cmd_original.split(None, 1)
@ -4190,6 +4192,121 @@ class HermesCLI:
        self._background_tasks[task_id] = thread
        thread.start()
    def _handle_btw_command(self, cmd: str):
        """Handle /btw <question> — ephemeral side question using session context.
        Snapshots the current conversation history, spawns a no-tools agent in
        a background thread, and prints the answer without persisting anything
        to the main session.
        """
        parts = cmd.strip().split(maxsplit=1)
        if len(parts) < 2 or not parts[1].strip():
            _cprint("  Usage: /btw <question>")
            _cprint("  Example: /btw what module owns session title sanitization?")
            _cprint("  Answers using session context. No tools, not persisted.")
            return
        question = parts[1].strip()
        task_id = f"btw_{datetime.now().strftime('%H%M%S')}_{uuid.uuid4().hex[:6]}"
        if not self._ensure_runtime_credentials():
            _cprint("  (>_<) Cannot start /btw: no valid credentials.")
            return
        turn_route = self._resolve_turn_agent_config(question)
        history_snapshot = list(self.conversation_history)
        preview = question[:60] + ("..." if len(question) > 60 else "")
        _cprint(f'  💬 /btw: "{preview}"')
        def run_btw():
            try:
                btw_agent = AIAgent(
                    model=turn_route["model"],
                    api_key=turn_route["runtime"].get("api_key"),
                    base_url=turn_route["runtime"].get("base_url"),
                    provider=turn_route["runtime"].get("provider"),
                    api_mode=turn_route["runtime"].get("api_mode"),
                    acp_command=turn_route["runtime"].get("command"),
                    acp_args=turn_route["runtime"].get("args"),
                    max_iterations=8,
                    enabled_toolsets=[],
                    quiet_mode=True,
                    verbose_logging=False,
                    session_id=task_id,
                    platform="cli",
                    reasoning_config=self.reasoning_config,
                    providers_allowed=self._providers_only,
                    providers_ignored=self._providers_ignore,
                    providers_order=self._providers_order,
                    provider_sort=self._provider_sort,
                    provider_require_parameters=self._provider_require_params,
                    provider_data_collection=self._provider_data_collection,
                    fallback_model=self._fallback_model,
                    session_db=None,
                    skip_memory=True,
                    skip_context_files=True,
                    persist_session=False,
                )
                btw_prompt = (
                    "[Ephemeral /btw side question. Answer using the conversation "
                    "context. No tools available. Be direct and concise.]\n\n"
                    + question
                )
                result = btw_agent.run_conversation(
                    user_message=btw_prompt,
                    conversation_history=history_snapshot,
                    task_id=task_id,
                    sync_honcho=False,
                )
                response = (result.get("final_response") or "") if result else ""
                if not response and result and result.get("error"):
                    response = f"Error: {result['error']}"
                # TUI refresh before printing
                if self._app:
                    self._app.invalidate()
                    time.sleep(0.05)
                print()
                if response:
                    try:
                        from hermes_cli.skin_engine import get_active_skin
                        _skin = get_active_skin()
                        _resp_color = _skin.get_color("response_border", "#4F6D4A")
                    except Exception:
                        _resp_color = "#4F6D4A"
                    ChatConsole().print(Panel(
                        _rich_text_from_ansi(response),
                        title=f"[{_resp_color} bold]⚕ /btw[/]",
                        title_align="left",
                        border_style=_resp_color,
                        box=rich_box.HORIZONTALS,
                        padding=(1, 2),
                    ))
                else:
                    _cprint("  💬 /btw: (no response)")
                if self.bell_on_complete:
                    sys.stdout.write("\a")
                    sys.stdout.flush()
            except Exception as e:
                if self._app:
                    self._app.invalidate()
                    time.sleep(0.05)
                print()
                _cprint(f"  ❌ /btw failed: {e}")
            finally:
                if self._app:
                    self._invalidate(min_interval=0)
        thread = threading.Thread(target=run_btw, daemon=True, name=f"btw-{task_id}")
        thread.start()
    @staticmethod
    def _try_launch_chrome_debug(port: int, system: str) -> bool:
        """Try to launch Chrome/Chromium with remote debugging enabled.
--- a/gateway/run.py
+++ b/gateway/run.py
@ -1962,6 +1962,9 @@ class GatewayRunner:
        if canonical == "background":
            return await self._handle_background_command(event)
        if canonical == "btw":
            return await self._handle_btw_command(event)
        if canonical == "voice":
            return await self._handle_voice_command(event)
@ -4038,6 +4041,167 @@ class GatewayRunner:
            except Exception:
                pass
    async def _handle_btw_command(self, event: MessageEvent) -> str:
        """Handle /btw <question> — ephemeral side question in the same chat."""
        question = event.get_command_args().strip()
        if not question:
            return (
                "Usage: /btw <question>\n"
                "Example: /btw what module owns session title sanitization?\n\n"
                "Answers using session context. No tools, not persisted."
            )
        source = event.source
        session_key = self._session_key_for_source(source)
        # Guard: one /btw at a time per session
        existing = getattr(self, "_active_btw_tasks", {}).get(session_key)
        if existing and not existing.done():
            return "A /btw is already running for this chat. Wait for it to finish."
        if not hasattr(self, "_active_btw_tasks"):
            self._active_btw_tasks: dict = {}
        import uuid as _uuid
        task_id = f"btw_{datetime.now().strftime('%H%M%S')}_{_uuid.uuid4().hex[:6]}"
        _task = asyncio.create_task(self._run_btw_task(question, source, session_key, task_id))
        self._background_tasks.add(_task)
        self._active_btw_tasks[session_key] = _task
        def _cleanup(task):
            self._background_tasks.discard(task)
            if self._active_btw_tasks.get(session_key) is task:
                self._active_btw_tasks.pop(session_key, None)
        _task.add_done_callback(_cleanup)
        preview = question[:60] + ("..." if len(question) > 60 else "")
        return f'💬 /btw: "{preview}"\nReply will appear here shortly.'
    async def _run_btw_task(
        self, question: str, source, session_key: str, task_id: str,
    ) -> None:
        """Execute an ephemeral /btw side question and deliver the answer."""
        from run_agent import AIAgent
        adapter = self.adapters.get(source.platform)
        if not adapter:
            logger.warning("No adapter for platform %s in /btw task %s", source.platform, task_id)
            return
        _thread_meta = {"thread_id": source.thread_id} if source.thread_id else None
        try:
            runtime_kwargs = _resolve_runtime_agent_kwargs()
            if not runtime_kwargs.get("api_key"):
                await adapter.send(
                    source.chat_id,
                    "❌ /btw failed: no provider credentials configured.",
                    metadata=_thread_meta,
                )
                return
            user_config = _load_gateway_config()
            model = _resolve_gateway_model(user_config)
            platform_key = _platform_config_key(source.platform)
            reasoning_config = self._load_reasoning_config()
            turn_route = self._resolve_turn_agent_config(question, model, runtime_kwargs)
            pr = self._provider_routing
            # Snapshot history from running agent or stored transcript
            running_agent = self._running_agents.get(session_key)
            if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
                history_snapshot = list(getattr(running_agent, "_session_messages", []) or [])
            else:
                session_entry = self.session_store.get_or_create_session(source)
                history_snapshot = self.session_store.load_transcript(session_entry.session_id)
            btw_prompt = (
                "[Ephemeral /btw side question. Answer using the conversation "
                "context. No tools available. Be direct and concise.]\n\n"
                + question
            )
            def run_sync():
                agent = AIAgent(
                    model=turn_route["model"],
                    **turn_route["runtime"],
                    max_iterations=8,
                    quiet_mode=True,
                    verbose_logging=False,
                    enabled_toolsets=[],
                    reasoning_config=reasoning_config,
                    providers_allowed=pr.get("only"),
                    providers_ignored=pr.get("ignore"),
                    providers_order=pr.get("order"),
                    provider_sort=pr.get("sort"),
                    provider_require_parameters=pr.get("require_parameters", False),
                    provider_data_collection=pr.get("data_collection"),
                    session_id=task_id,
                    platform=platform_key,
                    session_db=None,
                    fallback_model=self._fallback_model,
                    skip_memory=True,
                    skip_context_files=True,
                    persist_session=False,
                )
                return agent.run_conversation(
                    user_message=btw_prompt,
                    conversation_history=history_snapshot,
                    task_id=task_id,
                    sync_honcho=False,
                )
            loop = asyncio.get_event_loop()
            result = await loop.run_in_executor(None, run_sync)
            response = (result.get("final_response") or "") if result else ""
            if not response and result and result.get("error"):
                response = f"Error: {result['error']}"
            if not response:
                response = "(No response generated)"
            media_files, response = adapter.extract_media(response)
            images, text_content = adapter.extract_images(response)
            preview = question[:60] + ("..." if len(question) > 60 else "")
            header = f'💬 /btw: "{preview}"\n\n'
            if text_content:
                await adapter.send(
                    chat_id=source.chat_id,
                    content=header + text_content,
                    metadata=_thread_meta,
                )
            elif not images and not media_files:
                await adapter.send(
                    chat_id=source.chat_id,
                    content=header + "(No response generated)",
                    metadata=_thread_meta,
                )
            for image_url, alt_text in (images or []):
                try:
                    await adapter.send_image(chat_id=source.chat_id, image_url=image_url, caption=alt_text)
                except Exception:
                    pass
            for media_path in (media_files or []):
                try:
                    await adapter.send_file(chat_id=source.chat_id, file_path=media_path)
                except Exception:
                    pass
        except Exception as e:
            logger.exception("/btw task %s failed", task_id)
            try:
                await adapter.send(
                    chat_id=source.chat_id,
                    content=f"❌ /btw failed: {e}",
                    metadata=_thread_meta,
                )
            except Exception:
                pass
    async def _handle_reasoning_command(self, event: MessageEvent) -> str:
        """Handle /reasoning command — manage reasoning effort and display toggle.
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@ -67,6 +67,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
               gateway_only=True),
    CommandDef("background", "Run a prompt in the background", "Session",
               aliases=("bg",), args_hint="<prompt>"),
    CommandDef("btw", "Ephemeral side question using session context (no tools, not persisted)", "Session",
               args_hint="<question>"),
    CommandDef("queue", "Queue a prompt for the next turn (doesn't interrupt)", "Session",
               aliases=("q",), args_hint="<prompt>"),
    CommandDef("status", "Show session info", "Session",
--- a/model_tools.py
+++ b/model_tools.py
@ -252,7 +252,7 @@ def get_tool_definitions(
    # Determine which tool names the caller wants
    tools_to_include: set = set()
-    if enabled_toolsets:
+    if enabled_toolsets is not None:
        for toolset_name in enabled_toolsets:
            if validate_toolset(toolset_name):
                resolved = resolve_toolset(toolset_name)
--- a/run_agent.py
+++ b/run_agent.py
@ -508,6 +508,7 @@ class AIAgent:
        checkpoints_enabled: bool = False,
        checkpoint_max_snapshots: int = 50,
        pass_session_id: bool = False,
        persist_session: bool = True,
    ):
        """
        Initialize the AI Agent.
@ -573,6 +574,7 @@ class AIAgent:
        self.background_review_callback = None  # Optional sync callback for gateway delivery
        self.skip_context_files = skip_context_files
        self.pass_session_id = pass_session_id
        self.persist_session = persist_session
        self.log_prefix_chars = log_prefix_chars
        self.log_prefix = f"{log_prefix} " if log_prefix else ""
        # Store effective base URL for feature detection (prompt caching, reasoning, etc.)
@ -1700,7 +1702,10 @@ class AIAgent:
        """Save session state to both JSON log and SQLite on any exit path.
        Ensures conversations are never lost, even on errors or early returns.
        Skipped when ``persist_session=False`` (ephemeral helper flows).
        """
        if not self.persist_session:
            return
        self._apply_persist_user_message_override(messages)
        self._session_messages = messages
        self._save_session_log(messages)