mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat: allow custom endpoints to use responses API via api_mode override (#1651)
Add HERMES_API_MODE env var and model.api_mode config field to let custom OpenAI-compatible endpoints opt into codex_responses mode without requiring the OpenAI Codex OAuth provider path. - _get_configured_api_mode() reads HERMES_API_MODE env (precedence) then model.api_mode from config.yaml; validates against whitelist - Applied in both _resolve_openrouter_runtime() and _resolve_named_custom_runtime() (original PR only covered openrouter) - Fix _dump_api_request_debug() to show /responses URL when in codex_responses mode instead of always showing /chat/completions - Tests for config override, env override, invalid values, named custom providers, and debug dump URL for both API modes Inspired by PR #1041 by @mxyhi. Co-authored-by: mxyhi <mxyhi@users.noreply.github.com>
This commit is contained in:
parent
68fbcdaa06
commit
f2414bfd45
4 changed files with 131 additions and 4 deletions
|
|
@ -33,6 +33,24 @@ def _get_model_config() -> Dict[str, Any]:
|
|||
return {}
|
||||
|
||||
|
||||
def _get_configured_api_mode(model_cfg: Optional[Dict[str, Any]] = None) -> Optional[str]:
|
||||
"""Return an optional API mode override from env or config.
|
||||
|
||||
Allows custom OpenAI-compatible endpoints to opt into codex_responses
|
||||
mode via HERMES_API_MODE env var or model.api_mode in config.yaml,
|
||||
without requiring the OpenAI Codex OAuth provider path.
|
||||
"""
|
||||
candidate = os.getenv("HERMES_API_MODE", "").strip().lower()
|
||||
if not candidate:
|
||||
cfg = model_cfg if isinstance(model_cfg, dict) else _get_model_config()
|
||||
raw = cfg.get("api_mode")
|
||||
if isinstance(raw, str):
|
||||
candidate = raw.strip().lower()
|
||||
if candidate in {"chat_completions", "codex_responses"}:
|
||||
return candidate
|
||||
return None
|
||||
|
||||
|
||||
def resolve_requested_provider(requested: Optional[str] = None) -> str:
|
||||
"""Resolve provider request from explicit arg, config, then env."""
|
||||
if requested and requested.strip():
|
||||
|
|
@ -121,7 +139,7 @@ def _resolve_named_custom_runtime(
|
|||
|
||||
return {
|
||||
"provider": "openrouter",
|
||||
"api_mode": "chat_completions",
|
||||
"api_mode": _get_configured_api_mode() or "chat_completions",
|
||||
"base_url": base_url,
|
||||
"api_key": api_key,
|
||||
"source": f"custom_provider:{custom_provider.get('name', requested_provider)}",
|
||||
|
|
@ -190,10 +208,11 @@ def _resolve_openrouter_runtime(
|
|||
)
|
||||
|
||||
source = "explicit" if (explicit_api_key or explicit_base_url) else "env/config"
|
||||
api_mode = _get_configured_api_mode(model_cfg) or "chat_completions"
|
||||
|
||||
return {
|
||||
"provider": "openrouter",
|
||||
"api_mode": "chat_completions",
|
||||
"api_mode": api_mode,
|
||||
"base_url": base_url,
|
||||
"api_key": api_key,
|
||||
"source": source,
|
||||
|
|
|
|||
|
|
@ -1351,7 +1351,7 @@ class AIAgent:
|
|||
error: Optional[Exception] = None,
|
||||
) -> Optional[Path]:
|
||||
"""
|
||||
Dump a debug-friendly HTTP request record for chat.completions.create().
|
||||
Dump a debug-friendly HTTP request record for the active inference API.
|
||||
|
||||
Captures the request body from api_kwargs (excluding transport-only keys
|
||||
like timeout). Intended for debugging provider-side 4xx failures where
|
||||
|
|
@ -1374,7 +1374,7 @@ class AIAgent:
|
|||
"reason": reason,
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": f"{self.base_url.rstrip('/')}/chat/completions",
|
||||
"url": f"{self.base_url.rstrip('/')}{'/responses' if self.api_mode == 'codex_responses' else '/chat/completions'}",
|
||||
"headers": {
|
||||
"Authorization": f"Bearer {self._mask_api_key_for_logs(api_key)}",
|
||||
"Content-Type": "application/json",
|
||||
|
|
|
|||
|
|
@ -750,3 +750,40 @@ def test_run_conversation_codex_continues_after_ack_for_directory_listing_prompt
|
|||
for msg in result["messages"]
|
||||
)
|
||||
assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])
|
||||
|
||||
|
||||
def test_dump_api_request_debug_uses_responses_url(monkeypatch, tmp_path):
|
||||
"""Debug dumps should show /responses URL when in codex_responses mode."""
|
||||
import json
|
||||
agent = _build_agent(monkeypatch)
|
||||
agent.base_url = "http://127.0.0.1:9208/v1"
|
||||
agent.logs_dir = tmp_path
|
||||
|
||||
dump_file = agent._dump_api_request_debug(_codex_request_kwargs(), reason="preflight")
|
||||
|
||||
payload = json.loads(dump_file.read_text())
|
||||
assert payload["request"]["url"] == "http://127.0.0.1:9208/v1/responses"
|
||||
|
||||
|
||||
def test_dump_api_request_debug_uses_chat_completions_url(monkeypatch, tmp_path):
|
||||
"""Debug dumps should show /chat/completions URL for chat_completions mode."""
|
||||
import json
|
||||
_patch_agent_bootstrap(monkeypatch)
|
||||
agent = run_agent.AIAgent(
|
||||
model="gpt-4o",
|
||||
base_url="http://127.0.0.1:9208/v1",
|
||||
api_key="test-key",
|
||||
quiet_mode=True,
|
||||
max_iterations=1,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
agent.logs_dir = tmp_path
|
||||
|
||||
dump_file = agent._dump_api_request_debug(
|
||||
{"model": "gpt-4o", "messages": [{"role": "user", "content": "hi"}]},
|
||||
reason="preflight",
|
||||
)
|
||||
|
||||
payload = json.loads(dump_file.read_text())
|
||||
assert payload["request"]["url"] == "http://127.0.0.1:9208/v1/chat/completions"
|
||||
|
|
|
|||
|
|
@ -326,3 +326,74 @@ def test_resolve_requested_provider_precedence(monkeypatch):
|
|||
|
||||
monkeypatch.delenv("HERMES_INFERENCE_PROVIDER", raising=False)
|
||||
assert rp.resolve_requested_provider() == "auto"
|
||||
|
||||
|
||||
# ── api_mode override tests ─────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_custom_endpoint_api_mode_from_config(monkeypatch):
|
||||
"""model.api_mode in config.yaml should override the default chat_completions."""
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
|
||||
monkeypatch.setattr(
|
||||
rp, "_get_model_config",
|
||||
lambda: {
|
||||
"provider": "custom",
|
||||
"base_url": "http://127.0.0.1:9208/v1",
|
||||
"api_mode": "codex_responses",
|
||||
},
|
||||
)
|
||||
monkeypatch.setenv("OPENAI_BASE_URL", "http://127.0.0.1:9208/v1")
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "test-key")
|
||||
monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
|
||||
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
||||
monkeypatch.delenv("HERMES_API_MODE", raising=False)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="custom")
|
||||
|
||||
assert resolved["api_mode"] == "codex_responses"
|
||||
assert resolved["base_url"] == "http://127.0.0.1:9208/v1"
|
||||
|
||||
|
||||
def test_env_api_mode_overrides_config(monkeypatch):
|
||||
"""HERMES_API_MODE env var takes precedence over config."""
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: {"api_mode": "chat_completions"})
|
||||
monkeypatch.setenv("OPENAI_BASE_URL", "http://127.0.0.1:9208/v1")
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "test-key")
|
||||
monkeypatch.setenv("HERMES_API_MODE", "codex_responses")
|
||||
monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
|
||||
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="custom")
|
||||
|
||||
assert resolved["api_mode"] == "codex_responses"
|
||||
|
||||
|
||||
def test_invalid_api_mode_ignored(monkeypatch):
|
||||
"""Invalid api_mode values should fall back to chat_completions."""
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: {"api_mode": "bogus_mode"})
|
||||
monkeypatch.setenv("OPENAI_BASE_URL", "http://127.0.0.1:9208/v1")
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "test-key")
|
||||
monkeypatch.delenv("HERMES_API_MODE", raising=False)
|
||||
monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
|
||||
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="custom")
|
||||
|
||||
assert resolved["api_mode"] == "chat_completions"
|
||||
|
||||
|
||||
def test_named_custom_provider_respects_api_mode(monkeypatch):
|
||||
"""Named custom providers should also pick up api_mode overrides."""
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "my-server")
|
||||
monkeypatch.setattr(
|
||||
rp, "_get_named_custom_provider",
|
||||
lambda p: {"name": "my-server", "base_url": "http://localhost:8000/v1", "api_key": "sk-test"},
|
||||
)
|
||||
monkeypatch.setenv("HERMES_API_MODE", "codex_responses")
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="my-server")
|
||||
|
||||
assert resolved["api_mode"] == "codex_responses"
|
||||
assert resolved["base_url"] == "http://localhost:8000/v1"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue