feat: auto-generate session titles after first exchange

After the first user→assistant exchange, Hermes now generates a short descriptive session title via the auxiliary LLM (compression task config). Title generation runs in a background thread so it never delays the user-facing response. Key behaviors: - Fires only on the first 1-2 exchanges (checks user message count) - Skips if a title already exists (user-set titles are never overwritten) - Uses call_llm with compression task config (cheapest/fastest model) - Truncates long messages to keep the title generation request small - Cleans up LLM output: strips quotes, 'Title:' prefixes, enforces 80 char max - Works in both CLI and gateway (Telegram/Discord/etc.) Also updates /title (no args) to show the session ID alongside the title in both CLI and gateway. Implements #1426
2026-04-29 01:31:41 +00:00 · 2026-03-17 04:14:40 -07:00 · 2026-03-17 04:14:40 -07:00 · e5fc916814
commit e5fc916814
parent 7049dba778
4 changed files with 321 additions and 6 deletions
--- a/agent/title_generator.py
+++ b/agent/title_generator.py
@ -0,0 +1,125 @@
 """Auto-generate short session titles from the first user/assistant exchange.
 Runs asynchronously after the first response is delivered so it never
 adds latency to the user-facing reply.
 """
 import logging
 import threading
 from typing import Optional
 from agent.auxiliary_client import call_llm
 logger = logging.getLogger(__name__)
 _TITLE_PROMPT = (
    "Generate a short, descriptive title (3-7 words) for a conversation that starts with the "
    "following exchange. The title should capture the main topic or intent. "
    "Return ONLY the title text, nothing else. No quotes, no punctuation at the end, no prefixes."
 )
 def generate_title(user_message: str, assistant_response: str, timeout: float = 15.0) -> Optional[str]:
    """Generate a session title from the first exchange.
    Uses the auxiliary LLM client (cheapest/fastest available model).
    Returns the title string or None on failure.
    """
    # Truncate long messages to keep the request small
    user_snippet = user_message[:500] if user_message else ""
    assistant_snippet = assistant_response[:500] if assistant_response else ""
    messages = [
        {"role": "system", "content": _TITLE_PROMPT},
        {"role": "user", "content": f"User: {user_snippet}\n\nAssistant: {assistant_snippet}"},
    ]
    try:
        response = call_llm(
            task="compression",  # reuse compression task config (cheap/fast model)
            messages=messages,
            max_tokens=30,
            temperature=0.3,
            timeout=timeout,
        )
        title = (response.choices[0].message.content or "").strip()
        # Clean up: remove quotes, trailing punctuation, prefixes like "Title: "
        title = title.strip('"\'')
        if title.lower().startswith("title:"):
            title = title[6:].strip()
        # Enforce reasonable length
        if len(title) > 80:
            title = title[:77] + "..."
        return title if title else None
    except Exception as e:
        logger.debug("Title generation failed: %s", e)
        return None
 def auto_title_session(
    session_db,
    session_id: str,
    user_message: str,
    assistant_response: str,
 ) -> None:
    """Generate and set a session title if one doesn't already exist.
    Called in a background thread after the first exchange completes.
    Silently skips if:
    - session_db is None
    - session already has a title (user-set or previously auto-generated)
    - title generation fails
    """
    if not session_db or not session_id:
        return
    # Check if title already exists (user may have set one via /title before first response)
    try:
        existing = session_db.get_session_title(session_id)
        if existing:
            return
    except Exception:
        return
    title = generate_title(user_message, assistant_response)
    if not title:
        return
    try:
        session_db.set_session_title(session_id, title)
        logger.debug("Auto-generated session title: %s", title)
    except Exception as e:
        logger.debug("Failed to set auto-generated title: %s", e)
 def maybe_auto_title(
    session_db,
    session_id: str,
    user_message: str,
    assistant_response: str,
    conversation_history: list,
 ) -> None:
    """Fire-and-forget title generation after the first exchange.
    Only generates a title when:
    - This appears to be the first user→assistant exchange
    - No title is already set
    """
    if not session_db or not session_id or not user_message or not assistant_response:
        return
    # Count user messages in history to detect first exchange.
    # conversation_history includes the exchange that just happened,
    # so for a first exchange we expect exactly 1 user message
    # (or 2 counting system). Be generous: generate on first 2 exchanges.
    user_msg_count = sum(1 for m in (conversation_history or []) if m.get("role") == "user")
    if user_msg_count > 2:
        return
    thread = threading.Thread(
        target=auto_title_session,
        args=(session_db, session_id, user_message, assistant_response),
        daemon=True,
        name="auto-title",
    )
    thread.start()
--- a/cli.py
+++ b/cli.py
@ -3452,13 +3452,14 @@ class HermesCLI:
                else:
                    _cprint("  Usage: /title <your session title>")
            else:
-                # Show current title if no argument given
+                # Show current title and session ID if no argument given
                if self._session_db:
                    _cprint(f"  Session ID: {self.session_id}")
                    session = self._session_db.get_session(self.session_id)
                    if session and session.get("title"):
-                        _cprint(f"  Session title: {session['title']}")
+                        _cprint(f"  Title: {session['title']}")
                    elif self._pending_title:
-                        _cprint(f"  Session title (pending): {self._pending_title}")
+                        _cprint(f"  Title (pending): {self._pending_title}")
                    else:
                        _cprint(f"  No title set. Usage: /title <your session title>")
                else:
@ -5384,6 +5385,20 @@ class HermesCLI:
            # Get the final response
            response = result.get("final_response", "") if result else ""
            # Auto-generate session title after first exchange (non-blocking)
            if response and result and not result.get("failed") and not result.get("partial"):
                try:
                    from agent.title_generator import maybe_auto_title
                    maybe_auto_title(
                        self._session_db,
                        self.session_id,
                        message,
                        response,
                        self.conversation_history,
                    )
                except Exception:
                    pass
            # Handle failed or partial results (e.g., non-retryable errors, rate limits,
            # truncated output, invalid tool calls). Both "failed" and "partial" with
            # an empty final_response mean the agent couldn't produce a usable answer.
--- a/gateway/run.py
+++ b/gateway/run.py
@ -3328,12 +3328,12 @@ class GatewayRunner:
            except ValueError as e:
                return f"⚠️ {e}"
        else:
-            # Show the current title
+            # Show the current title and session ID
            title = self._session_db.get_session_title(session_id)
            if title:
-                return f"📌 Session title: **{title}**"
+                return f"📌 Session: `{session_id}`\nTitle: **{title}**"
            else:
-                return "No title set. Usage: `/title My Session Name`"
+                return f"📌 Session: `{session_id}`\nNo title set. Usage: `/title My Session Name`"
    async def _handle_resume_command(self, event: MessageEvent) -> str:
        """Handle /resume command — switch to a previously-named session."""
@ -4511,6 +4511,21 @@ class GatewayRunner:
            effective_session_id = getattr(agent, 'session_id', session_id) if agent else session_id
            # Auto-generate session title after first exchange (non-blocking)
            if final_response and self._session_db:
                try:
                    from agent.title_generator import maybe_auto_title
                    all_msgs = result_holder[0].get("messages", []) if result_holder[0] else []
                    maybe_auto_title(
                        self._session_db,
                        effective_session_id,
                        message,
                        final_response,
                        all_msgs,
                    )
                except Exception:
                    pass
            return {
                "final_response": final_response,
                "last_reasoning": result.get("last_reasoning"),
--- a/tests/agent/test_title_generator.py
+++ b/tests/agent/test_title_generator.py
@ -0,0 +1,160 @@
 """Tests for agent.title_generator — auto-generated session titles."""
 import threading
 from unittest.mock import MagicMock, patch
 import pytest
 from agent.title_generator import (
    generate_title,
    auto_title_session,
    maybe_auto_title,
 )
 class TestGenerateTitle:
    """Unit tests for generate_title()."""
    def test_returns_title_on_success(self):
        mock_response = MagicMock()
        mock_response.choices = [MagicMock()]
        mock_response.choices[0].message.content = "Debugging Python Import Errors"
        with patch("agent.title_generator.call_llm", return_value=mock_response):
            title = generate_title("help me fix this import", "Sure, let me check...")
            assert title == "Debugging Python Import Errors"
    def test_strips_quotes(self):
        mock_response = MagicMock()
        mock_response.choices = [MagicMock()]
        mock_response.choices[0].message.content = '"Setting Up Docker Environment"'
        with patch("agent.title_generator.call_llm", return_value=mock_response):
            title = generate_title("how do I set up docker", "First install...")
            assert title == "Setting Up Docker Environment"
    def test_strips_title_prefix(self):
        mock_response = MagicMock()
        mock_response.choices = [MagicMock()]
        mock_response.choices[0].message.content = "Title: Kubernetes Pod Debugging"
        with patch("agent.title_generator.call_llm", return_value=mock_response):
            title = generate_title("my pod keeps crashing", "Let me look...")
            assert title == "Kubernetes Pod Debugging"
    def test_truncates_long_titles(self):
        mock_response = MagicMock()
        mock_response.choices = [MagicMock()]
        mock_response.choices[0].message.content = "A" * 100
        with patch("agent.title_generator.call_llm", return_value=mock_response):
            title = generate_title("question", "answer")
            assert len(title) == 80
            assert title.endswith("...")
    def test_returns_none_on_empty_response(self):
        mock_response = MagicMock()
        mock_response.choices = [MagicMock()]
        mock_response.choices[0].message.content = ""
        with patch("agent.title_generator.call_llm", return_value=mock_response):
            assert generate_title("question", "answer") is None
    def test_returns_none_on_exception(self):
        with patch("agent.title_generator.call_llm", side_effect=RuntimeError("no provider")):
            assert generate_title("question", "answer") is None
    def test_truncates_long_messages(self):
        """Long user/assistant messages should be truncated in the LLM request."""
        captured_kwargs = {}
        def mock_call_llm(**kwargs):
            captured_kwargs.update(kwargs)
            resp = MagicMock()
            resp.choices = [MagicMock()]
            resp.choices[0].message.content = "Short Title"
            return resp
        with patch("agent.title_generator.call_llm", side_effect=mock_call_llm):
            generate_title("x" * 1000, "y" * 1000)
        # The user content in the messages should be truncated
        user_content = captured_kwargs["messages"][1]["content"]
        assert len(user_content) < 1100  # 500 + 500 + formatting
 class TestAutoTitleSession:
    """Tests for auto_title_session() — the sync worker function."""
    def test_skips_if_no_session_db(self):
        auto_title_session(None, "sess-1", "hi", "hello")  # should not crash
    def test_skips_if_title_exists(self):
        db = MagicMock()
        db.get_session_title.return_value = "Existing Title"
        with patch("agent.title_generator.generate_title") as gen:
            auto_title_session(db, "sess-1", "hi", "hello")
            gen.assert_not_called()
    def test_generates_and_sets_title(self):
        db = MagicMock()
        db.get_session_title.return_value = None
        with patch("agent.title_generator.generate_title", return_value="New Title"):
            auto_title_session(db, "sess-1", "hi", "hello")
            db.set_session_title.assert_called_once_with("sess-1", "New Title")
    def test_skips_if_generation_fails(self):
        db = MagicMock()
        db.get_session_title.return_value = None
        with patch("agent.title_generator.generate_title", return_value=None):
            auto_title_session(db, "sess-1", "hi", "hello")
            db.set_session_title.assert_not_called()
 class TestMaybeAutoTitle:
    """Tests for maybe_auto_title() — the fire-and-forget entry point."""
    def test_skips_if_not_first_exchange(self):
        """Should not fire for conversations with more than 2 user messages."""
        db = MagicMock()
        history = [
            {"role": "user", "content": "first"},
            {"role": "assistant", "content": "response 1"},
            {"role": "user", "content": "second"},
            {"role": "assistant", "content": "response 2"},
            {"role": "user", "content": "third"},
            {"role": "assistant", "content": "response 3"},
        ]
        with patch("agent.title_generator.auto_title_session") as mock_auto:
            maybe_auto_title(db, "sess-1", "third", "response 3", history)
            # Wait briefly for any thread to start
            import time
            time.sleep(0.1)
            mock_auto.assert_not_called()
    def test_fires_on_first_exchange(self):
        """Should fire a background thread for the first exchange."""
        db = MagicMock()
        db.get_session_title.return_value = None
        history = [
            {"role": "user", "content": "hello"},
            {"role": "assistant", "content": "hi there"},
        ]
        with patch("agent.title_generator.auto_title_session") as mock_auto:
            maybe_auto_title(db, "sess-1", "hello", "hi there", history)
            # Wait for the daemon thread to complete
            import time
            time.sleep(0.3)
            mock_auto.assert_called_once_with(db, "sess-1", "hello", "hi there")
    def test_skips_if_no_response(self):
        db = MagicMock()
        maybe_auto_title(db, "sess-1", "hello", "", [])  # empty response
    def test_skips_if_no_session_db(self):
        maybe_auto_title(None, "sess-1", "hello", "response", [])  # no db