feat: devex help, add Makefile, ruff, pre-commit, and modernize CI

2026-04-28 01:21:43 +00:00 · 2026-03-09 20:36:51 -05:00 · 2026-03-09 20:36:51 -05:00 · f4d7e6a29e
commit f4d7e6a29e
parent 172a38c344
111 changed files with 11655 additions and 10200 deletions
--- a/tools/session_search_tool.py
+++ b/tools/session_search_tool.py
@ -18,11 +18,8 @@ Flow:
 import asyncio
 import concurrent.futures
 import json
-import os
 import logging
-from typing import Dict, Any, List, Optional, Union
-
-from openai import AsyncOpenAI, OpenAI
+from typing import Any

 from agent.auxiliary_client import get_async_text_auxiliary_client

@ -33,7 +30,7 @@ MAX_SESSION_CHARS = 100_000
 MAX_SUMMARY_TOKENS = 10000


-def _format_timestamp(ts: Union[int, float, str, None]) -> str:
+def _format_timestamp(ts: int | float | str | None) -> str:
    """Convert a Unix timestamp (float/int) or ISO string to a human-readable date.

    Returns "unknown" for None, str(ts) if conversion fails.
@ -43,11 +40,13 @@ def _format_timestamp(ts: Union[int, float, str, None]) -> str:
    try:
        if isinstance(ts, (int, float)):
            from datetime import datetime
+
            dt = datetime.fromtimestamp(ts)
            return dt.strftime("%B %d, %Y at %I:%M %p")
        if isinstance(ts, str):
            if ts.replace(".", "").replace("-", "").isdigit():
                from datetime import datetime
+
                dt = datetime.fromtimestamp(float(ts))
                return dt.strftime("%B %d, %Y at %I:%M %p")
            return ts
@ -59,7 +58,7 @@ def _format_timestamp(ts: Union[int, float, str, None]) -> str:
    return str(ts)


-def _format_conversation(messages: List[Dict[str, Any]]) -> str:
+def _format_conversation(messages: list[dict[str, Any]]) -> str:
    """Format session messages into a readable transcript for summarization."""
    parts = []
    for msg in messages:
@ -93,9 +92,7 @@ def _format_conversation(messages: List[Dict[str, Any]]) -> str:
    return "\n\n".join(parts)


-def _truncate_around_matches(
-    full_text: str, query: str, max_chars: int = MAX_SESSION_CHARS
-) -> str:
+def _truncate_around_matches(full_text: str, query: str, max_chars: int = MAX_SESSION_CHARS) -> str:
    """
    Truncate a conversation transcript to max_chars, centered around
    where the query terms appear. Keeps content near matches, trims the edges.
@ -129,9 +126,7 @@ def _truncate_around_matches(
    return prefix + truncated + suffix


-async def _summarize_session(
-    conversation_text: str, query: str, session_meta: Dict[str, Any]
-) -> Optional[str]:
+async def _summarize_session(conversation_text: str, query: str, session_meta: dict[str, Any]) -> str | None:
    """Summarize a single session conversation focused on the search query."""
    system_prompt = (
        "You are reviewing a past conversation transcript to help recall what happened. "
@ -163,7 +158,8 @@ async def _summarize_session(
    max_retries = 3
    for attempt in range(max_retries):
        try:
-            from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
+            from agent.auxiliary_client import auxiliary_max_tokens_param, get_auxiliary_extra_body
+
            _extra = get_auxiliary_extra_body()
            response = await _async_aux_client.chat.completions.create(
                model=_SUMMARIZER_MODEL,
@ -221,13 +217,16 @@ def session_search(
        )

        if not raw_results:
-            return json.dumps({
-                "success": True,
-                "query": query,
-                "results": [],
-                "count": 0,
-                "message": "No matching sessions found.",
-            }, ensure_ascii=False)
+            return json.dumps(
+                {
+                    "success": True,
+                    "query": query,
+                    "results": [],
+                    "count": 0,
+                    "message": "No matching sessions found.",
+                },
+                ensure_ascii=False,
+            )

        # Resolve child sessions to their parent — delegation stores detailed
        # content in child sessions, but the user's conversation is the parent.
@ -283,12 +282,9 @@ def session_search(
                logging.warning(f"Failed to prepare session {session_id}: {e}")

        # Summarize all sessions in parallel
-        async def _summarize_all() -> List[Union[str, Exception]]:
+        async def _summarize_all() -> list[str | Exception]:
            """Summarize all sessions in parallel."""
-            coros = [
-                _summarize_session(text, query, meta)
-                for _, _, text, meta in tasks
-            ]
+            coros = [_summarize_session(text, query, meta) for _, _, text, meta in tasks]
            return await asyncio.gather(*coros, return_exceptions=True)

        try:
@ -300,10 +296,13 @@ def session_search(
            results = asyncio.run(_summarize_all())
        except concurrent.futures.TimeoutError:
            logging.warning("Session summarization timed out after 60 seconds")
-            return json.dumps({
-                "success": False,
-                "error": "Session summarization timed out. Try a more specific query or reduce the limit.",
-            }, ensure_ascii=False)
+            return json.dumps(
+                {
+                    "success": False,
+                    "error": "Session summarization timed out. Try a more specific query or reduce the limit.",
+                },
+                ensure_ascii=False,
+            )

        summaries = []
        for (session_id, match_info, _, _), result in zip(tasks, results):
@ -311,21 +310,26 @@ def session_search(
                logging.warning(f"Failed to summarize session {session_id}: {result}")
                continue
            if result:
-                summaries.append({
-                    "session_id": session_id,
-                    "when": _format_timestamp(match_info.get("session_started")),
-                    "source": match_info.get("source", "unknown"),
-                    "model": match_info.get("model"),
-                    "summary": result,
-                })
+                summaries.append(
+                    {
+                        "session_id": session_id,
+                        "when": _format_timestamp(match_info.get("session_started")),
+                        "source": match_info.get("source", "unknown"),
+                        "model": match_info.get("model"),
+                        "summary": result,
+                    }
+                )

-        return json.dumps({
-            "success": True,
-            "query": query,
-            "results": summaries,
-            "count": len(summaries),
-            "sessions_searched": len(seen_sessions),
-        }, ensure_ascii=False)
+        return json.dumps(
+            {
+                "success": True,
+                "query": query,
+                "results": summaries,
+                "count": len(summaries),
+                "sessions_searched": len(seen_sessions),
+            },
+            ensure_ascii=False,
+        )

    except Exception as e:
        return json.dumps({"success": False, "error": f"Search failed: {str(e)}"}, ensure_ascii=False)
@ -337,6 +341,7 @@ def check_session_search_requirements() -> bool:
        return False
    try:
        from hermes_state import DEFAULT_DB_PATH
+
        return DEFAULT_DB_PATH.parent.exists()
    except ImportError:
        return False
@ -356,7 +361,7 @@ SESSION_SEARCH_SCHEMA = {
        "Don't hesitate to search -- it's fast and cheap. Better to search and confirm "
        "than to guess or ask the user to repeat themselves.\n\n"
        "Search syntax: keywords joined with OR for broad recall (elevenlabs OR baseten OR funding), "
-        "phrases for exact match (\"docker networking\"), boolean (python NOT java), prefix (deploy*). "
+        'phrases for exact match ("docker networking"), boolean (python NOT java), prefix (deploy*). '
        "IMPORTANT: Use OR between keywords for best results — FTS5 defaults to AND which misses "
        "sessions that only mention some terms. If a broad OR query returns nothing, try individual "
        "keyword searches in parallel. Returns summaries of the top matching sessions."
@ -395,6 +400,7 @@ registry.register(
        role_filter=args.get("role_filter"),
        limit=args.get("limit", 3),
        db=kw.get("db"),
-        current_session_id=kw.get("current_session_id")),
+        current_session_id=kw.get("current_session_id"),
+    ),
    check_fn=check_session_search_requirements,
 )