refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.
This commit is contained in:
teknium1 2026-02-28 21:47:51 -08:00
parent 86b1db0598
commit 500f0eab4a
22 changed files with 1784 additions and 207 deletions

View file

@ -48,7 +48,7 @@ import asyncio
from typing import List, Dict, Any, Optional
from firecrawl import Firecrawl
from openai import AsyncOpenAI
from agent.auxiliary_client import get_text_auxiliary_client
from agent.auxiliary_client import get_async_text_auxiliary_client
from tools.debug_helpers import DebugSession
logger = logging.getLogger(__name__)
@ -67,21 +67,9 @@ def _get_firecrawl_client():
DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000
# Resolve auxiliary text client at module level; build an async wrapper.
_aux_sync_client, DEFAULT_SUMMARIZER_MODEL = get_text_auxiliary_client()
_aux_async_client: AsyncOpenAI | None = None
if _aux_sync_client is not None:
_async_kwargs = {
"api_key": _aux_sync_client.api_key,
"base_url": str(_aux_sync_client.base_url),
}
if "openrouter" in str(_aux_sync_client.base_url).lower():
_async_kwargs["default_headers"] = {
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
"X-OpenRouter-Title": "Hermes Agent",
"X-OpenRouter-Categories": "productivity,cli-agent",
}
_aux_async_client = AsyncOpenAI(**_async_kwargs)
# Resolve async auxiliary client at module level.
# Handles Codex Responses API adapter transparently.
_aux_async_client, DEFAULT_SUMMARIZER_MODEL = get_async_text_auxiliary_client()
_debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG")
@ -174,7 +162,7 @@ async def _call_summarizer_llm(
content: str,
context_str: str,
model: str,
max_tokens: int = 4000,
max_tokens: int = 20000,
is_chunk: bool = False,
chunk_info: str = ""
) -> Optional[str]:
@ -306,7 +294,7 @@ async def _process_large_content_chunked(
chunk_content,
context_str,
model,
max_tokens=2000,
max_tokens=10000,
is_chunk=True,
chunk_info=chunk_info
)
@ -374,7 +362,7 @@ Create a single, unified markdown summary."""
{"role": "user", "content": synthesis_prompt}
],
temperature=0.1,
**auxiliary_max_tokens_param(4000),
**auxiliary_max_tokens_param(20000),
**({} if not _extra else {"extra_body": _extra}),
)
final_summary = response.choices[0].message.content.strip()