fix(auxiliary): custom provider URL rewrite + main_runtime model for title gen

- auxiliary_client: apply _to_openai_base_url() to custom base_url
  (fixes /anthropic → /v1 rewrite missing for provider="custom")
- auxiliary_client: use main_runtime.get("model") instead of _read_main_model()
  so auxiliary tasks follow system default model changes
- title_generator: thread main_runtime through generate_title → auto_title_session → maybe_auto_title
- cli.py / gateway/run.py: pass main_runtime to maybe_auto_title
- tests: update mock assertions for new main_runtime parameter
This commit is contained in:
crayfish-ai 2026-04-28 12:14:36 +08:00 committed by Teknium
parent 20b49b71cd
commit f3371c39a4
5 changed files with 26 additions and 7 deletions

View file

@ -1834,7 +1834,7 @@ def resolve_provider_client(
# ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
if provider == "custom":
if explicit_base_url:
custom_base = explicit_base_url.strip()
custom_base = _to_openai_base_url(explicit_base_url).strip()
custom_key = (
(explicit_api_key or "").strip()
or os.getenv("OPENAI_API_KEY", "").strip()
@ -1847,7 +1847,7 @@ def resolve_provider_client(
)
return None, None
final_model = _normalize_resolved_model(
model or _read_main_model() or "gpt-4o-mini",
model or (main_runtime.get("model") if main_runtime else None) or "gpt-4o-mini",
provider,
)
extra = {}

View file

@ -30,10 +30,12 @@ def generate_title(
assistant_response: str,
timeout: float = 30.0,
failure_callback: Optional[FailureCallback] = None,
main_runtime: dict = None,
) -> Optional[str]:
"""Generate a session title from the first exchange.
Uses the auxiliary LLM client (cheapest/fastest available model).
Uses the main runtime's model when available, falling back to the
auxiliary LLM client (cheapest/fastest available model).
Returns the title string or None on failure.
``failure_callback`` is invoked with ``(task, exception)`` when the
@ -57,6 +59,7 @@ def generate_title(
max_tokens=500,
temperature=0.3,
timeout=timeout,
main_runtime=main_runtime,
)
title = (response.choices[0].message.content or "").strip()
# Clean up: remove quotes, trailing punctuation, prefixes like "Title: "
@ -86,6 +89,7 @@ def auto_title_session(
user_message: str,
assistant_response: str,
failure_callback: Optional[FailureCallback] = None,
main_runtime: dict = None,
) -> None:
"""Generate and set a session title if one doesn't already exist.
@ -107,7 +111,7 @@ def auto_title_session(
return
title = generate_title(
user_message, assistant_response, failure_callback=failure_callback
user_message, assistant_response, failure_callback=failure_callback, main_runtime=main_runtime
)
if not title:
return
@ -126,6 +130,7 @@ def maybe_auto_title(
assistant_response: str,
conversation_history: list,
failure_callback: Optional[FailureCallback] = None,
main_runtime: dict = None,
) -> None:
"""Fire-and-forget title generation after the first exchange.
@ -147,7 +152,7 @@ def maybe_auto_title(
thread = threading.Thread(
target=auto_title_session,
args=(session_db, session_id, user_message, assistant_response),
kwargs={"failure_callback": failure_callback},
kwargs={"failure_callback": failure_callback, "main_runtime": main_runtime},
daemon=True,
name="auto-title",
)

7
cli.py
View file

@ -8835,6 +8835,13 @@ class HermesCLI:
response,
self.conversation_history,
failure_callback=_title_failure_cb,
main_runtime={
"model": self.model,
"provider": self.provider,
"base_url": self.base_url,
"api_key": self.api_key,
"api_mode": self.api_mode,
},
)
except Exception:
pass

View file

@ -10700,6 +10700,13 @@ class GatewayRunner:
final_response,
all_msgs,
failure_callback=_title_failure_cb,
main_runtime={
"model": getattr(agent, "model", None),
"provider": getattr(agent, "provider", None),
"base_url": getattr(agent, "base_url", None),
"api_key": getattr(agent, "api_key", None),
"api_mode": getattr(agent, "api_mode", None),
} if agent else None,
)
except Exception:
pass

View file

@ -182,7 +182,7 @@ class TestMaybeAutoTitle:
import time
time.sleep(0.3)
mock_auto.assert_called_once_with(
db, "sess-1", "hello", "hi there", failure_callback=None
db, "sess-1", "hello", "hi there", failure_callback=None, main_runtime=None
)
def test_forwards_failure_callback_to_worker(self):
@ -202,7 +202,7 @@ class TestMaybeAutoTitle:
import time
time.sleep(0.3)
mock_auto.assert_called_once_with(
db, "sess-1", "hello", "hi there", failure_callback=_cb
db, "sess-1", "hello", "hi there", failure_callback=_cb, main_runtime=None
)
def test_skips_if_no_response(self):