refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults. - Updated the context compressor's summary target tokens to 2500 for improved performance. - Added external credential detection for Codex CLI to streamline authentication. - Refactored various components to ensure consistent handling of authentication and model selection across the application.
2026-04-25 00:51:20 +00:00 · 2026-02-28 21:47:51 -08:00 · 2026-02-28 21:47:51 -08:00 · 500f0eab4a
commit 500f0eab4a
parent 86b1db0598
22 changed files with 1784 additions and 207 deletions
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@ -48,7 +48,7 @@ import asyncio
 from typing import List, Dict, Any, Optional
 from firecrawl import Firecrawl
 from openai import AsyncOpenAI
-from agent.auxiliary_client import get_text_auxiliary_client
+from agent.auxiliary_client import get_async_text_auxiliary_client
 from tools.debug_helpers import DebugSession

 logger = logging.getLogger(__name__)
@ -67,21 +67,9 @@ def _get_firecrawl_client():

 DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000

-# Resolve auxiliary text client at module level; build an async wrapper.
-_aux_sync_client, DEFAULT_SUMMARIZER_MODEL = get_text_auxiliary_client()
-_aux_async_client: AsyncOpenAI | None = None
-if _aux_sync_client is not None:
-    _async_kwargs = {
-        "api_key": _aux_sync_client.api_key,
-        "base_url": str(_aux_sync_client.base_url),
-    }
-    if "openrouter" in str(_aux_sync_client.base_url).lower():
-        _async_kwargs["default_headers"] = {
-            "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
-            "X-OpenRouter-Title": "Hermes Agent",
-                "X-OpenRouter-Categories": "productivity,cli-agent",
-        }
-    _aux_async_client = AsyncOpenAI(**_async_kwargs)
+# Resolve async auxiliary client at module level.
+# Handles Codex Responses API adapter transparently.
+_aux_async_client, DEFAULT_SUMMARIZER_MODEL = get_async_text_auxiliary_client()

 _debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG")

@ -174,7 +162,7 @@ async def _call_summarizer_llm(
    content: str, 
    context_str: str, 
    model: str, 
-    max_tokens: int = 4000,
+    max_tokens: int = 20000,
    is_chunk: bool = False,
    chunk_info: str = ""
 ) -> Optional[str]:
@ -306,7 +294,7 @@ async def _process_large_content_chunked(
                chunk_content, 
                context_str, 
                model, 
-                max_tokens=2000,
+                max_tokens=10000,
                is_chunk=True,
                chunk_info=chunk_info
            )
@ -374,7 +362,7 @@ Create a single, unified markdown summary."""
                {"role": "user", "content": synthesis_prompt}
            ],
            temperature=0.1,
-            **auxiliary_max_tokens_param(4000),
+            **auxiliary_max_tokens_param(20000),
            **({} if not _extra else {"extra_body": _extra}),
        )
        final_summary = response.choices[0].message.content.strip()