From c6a992e3e3cb99d935da3d059093b5b1f839738c Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Wed, 20 May 2026 20:05:50 -0700 Subject: [PATCH] fix(security): derive _API_KEY from host as final credential fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After #28660's host-gating fix, users with provider=custom and base_url pointed at a commercial endpoint (DeepSeek, Groq, Mistral, …) hit no-key-required even when they had the vendor-named env var set (DEEPSEEK_API_KEY, GROQ_API_KEY, …). The issue author flagged this as 'what users intuitively expect'. Adds _host_derived_api_key() to derive an env var name from the base URL host using the *registrable* label (second-to-last). Appended to all three api_key_candidates chains (_resolve_named_custom_runtime direct-alias path, named-custom path, _resolve_openrouter_runtime non-openrouter branch). Lookalike resistance: api.deepseek.com.attacker.test resolves to vendor label 'attacker', NOT 'deepseek' — DEEPSEEK_API_KEY stays put. IPs and loopback yield no vendor label. Already-handled vendors (OPENAI/OPENROUTER/ OLLAMA) are filtered to prevent bypass of the explicit host-gated paths. Adds 6 tests covering positive paths (DeepSeek, Groq), the lookalike attack, loopback rejection, the already-handled-vendor filter, and direct helper unit tests. Also adds erhnysr to AUTHOR_MAP. --- hermes_cli/runtime_provider.py | 69 ++++++++ scripts/release.py | 1 + .../test_runtime_provider_resolution.py | 160 ++++++++++++++++++ 3 files changed, 230 insertions(+) diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index 1b6e66b6c6d..73aa5c45571 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -100,6 +100,63 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]: return None +def _host_derived_api_key(base_url: str) -> str: + """Look up `_API_KEY` in the env, derived from the base URL host. + + Examples: + https://api.deepseek.com/v1 → DEEPSEEK_API_KEY + https://api.groq.com/openai/v1 → GROQ_API_KEY + https://api.mistral.ai/v1 → MISTRAL_API_KEY + https://generativelanguage.googleapis.com/v1beta/openai/ → GOOGLEAPIS_API_KEY + + Returns the env value (stripped) or "". Never returns env vars whose names + are already explicitly checked elsewhere — those are handled by their own + host-gated paths (OPENAI/OPENROUTER/OLLAMA). + + The vendor label is the *registrable* portion of the hostname: strip + ``api.`` / ``www.`` prefixes, then take the second-to-last label + (``api.deepseek.com`` → ``deepseek``). Falls back to "" for hostnames + that don't yield a usable vendor label (IPs, loopback, single-label + hosts). + """ + hostname = base_url_hostname(base_url) + if not hostname: + return "" + # Reject IPv4 / IPv6 / loopback — no meaningful vendor label. + if any(ch.isdigit() for ch in hostname.split(".")[-1]): + # Last label starts with a digit → likely IP. (TLDs are never numeric.) + return "" + if hostname in ("localhost",) or ":" in hostname: + return "" + labels = [lbl for lbl in hostname.split(".") if lbl] + # Strip common API/CDN prefixes. + while labels and labels[0] in ("api", "www"): + labels.pop(0) + if len(labels) < 2: + return "" + # Take the *registrable* label (second-to-last). For typical provider + # hosts this is what users intuitively call "the vendor": + # deepseek.com → labels[-2] = "deepseek" ✓ + # api.groq.com → groq.com → labels[-2] = "groq" ✓ + # api.mistral.ai → labels[-2] = "mistral" ✓ + # Crucially, lookalike hosts pick the ATTACKER's label, not the spoofed + # vendor: + # api.deepseek.com.attacker.test → labels[-2] = "attacker" + # so DEEPSEEK_API_KEY stays put and the chain falls through to + # no-key-required. This mirrors how `base_url_host_matches` resists the + # same lookalike attack for explicit hosts. + vendor = labels[-2] + # Sanitize to env var charset: A-Z, 0-9, underscore. + sanitized = "".join(ch if ch.isalnum() else "_" for ch in vendor).upper() + if not sanitized or not sanitized[0].isalpha(): + return "" + # Don't re-derive env vars already handled by explicit host-gated paths. + if sanitized in ("OPENAI", "OPENROUTER", "OLLAMA"): + return "" + env_name = f"{sanitized}_API_KEY" + return (os.getenv(env_name, "") or "").strip() + + def _auto_detect_local_model(base_url: str) -> str: """Query a local server for its model name when only one model is loaded.""" if not base_url: @@ -589,6 +646,10 @@ def _resolve_named_custom_runtime( # Gate env key fallbacks on authoritative hosts (#28660) (os.getenv("OPENAI_API_KEY", "").strip() if _da_is_openai_url else ""), (os.getenv("OPENROUTER_API_KEY", "").strip() if _da_is_openrouter else ""), + # Bonus (#28660): derive `_API_KEY` from the host so users + # who set DEEPSEEK_API_KEY / GROQ_API_KEY / MISTRAL_API_KEY get the + # intuitive match without configuring `custom_providers` first. + _host_derived_api_key(base_url), ] api_key = next( (c for c in api_key_candidates if has_usable_secret(c)), @@ -634,6 +695,9 @@ def _resolve_named_custom_runtime( # OPENAI_API_KEY to a local-llm endpoint leaks credentials (#28660). (os.getenv("OPENAI_API_KEY", "").strip() if _cp_is_openai_url else ""), (os.getenv("OPENROUTER_API_KEY", "").strip() if _cp_is_openrouter else ""), + # Bonus (#28660): derive `_API_KEY` from the host as a final + # fallback when key_env wasn't set explicitly. + _host_derived_api_key(base_url), ] api_key = next((candidate for candidate in api_key_candidates if has_usable_secret(candidate)), "") @@ -749,6 +813,11 @@ def _resolve_openrouter_runtime( (os.getenv("OLLAMA_API_KEY") if _is_ollama_url else ""), (os.getenv("OPENAI_API_KEY") if (_is_openai_url or _is_openai_azure) else ""), (os.getenv("OPENROUTER_API_KEY") if _is_openrouter_url else ""), + # Bonus (#28660): derive `_API_KEY` from the host so users + # who set DEEPSEEK_API_KEY / GROQ_API_KEY / MISTRAL_API_KEY get the + # intuitive match. Helper returns "" for IPs/loopback and for env + # vars already handled by the explicit host-gated paths above. + _host_derived_api_key(base_url), ] api_key = next( (str(candidate or "").strip() for candidate in api_key_candidates if has_usable_secret(candidate)), diff --git a/scripts/release.py b/scripts/release.py index ff4d2c8fc6a..13d68bdae61 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -47,6 +47,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json" AUTHOR_MAP = { # teknium (multiple emails) "teknium1@gmail.com": "teknium1", + "erhanyasarx@gmail.com": "erhnysr", "30366221+WorldWriter@users.noreply.github.com": "WorldWriter", "dafeng@DafengdeMacBook-Pro.local": "WorldWriter", "anadi.jaggia@gmail.com": "Jaggia", diff --git a/tests/hermes_cli/test_runtime_provider_resolution.py b/tests/hermes_cli/test_runtime_provider_resolution.py index 5b89863959e..3adffabb461 100644 --- a/tests/hermes_cli/test_runtime_provider_resolution.py +++ b/tests/hermes_cli/test_runtime_provider_resolution.py @@ -2466,3 +2466,163 @@ def test_openrouter_key_reaches_openrouter_host(monkeypatch): resolved = rp.resolve_runtime_provider(requested="openrouter") assert resolved["api_key"] == "or-secret" + + +# ---------------------------------------------------------------------- +# Issue #28660 — bonus: `_API_KEY` derivation from host. +# After the host-gating fix, users with a `DEEPSEEK_API_KEY` set and +# `base_url: https://api.deepseek.com/v1` should get the key picked up +# without needing to configure custom_providers.key_env first. +# ---------------------------------------------------------------------- + + +def test_host_derived_key_picked_up_for_deepseek(monkeypatch): + """DEEPSEEK_API_KEY env var must be forwarded to api.deepseek.com.""" + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter") + monkeypatch.setattr( + rp, + "_get_model_config", + lambda: { + "provider": "custom", + "base_url": "https://api.deepseek.com/v1", + }, + ) + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + monkeypatch.setenv("DEEPSEEK_API_KEY", "sk-deepseek-secret") + + resolved = rp.resolve_runtime_provider(requested="custom") + + assert resolved["api_key"] == "sk-deepseek-secret" + + +def test_host_derived_key_picked_up_for_groq(monkeypatch): + """GROQ_API_KEY env var must be forwarded to api.groq.com.""" + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter") + monkeypatch.setattr( + rp, + "_get_model_config", + lambda: { + "provider": "custom", + "base_url": "https://api.groq.com/openai/v1", + }, + ) + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.setenv("GROQ_API_KEY", "gsk-groq-secret") + + resolved = rp.resolve_runtime_provider(requested="custom") + + assert resolved["api_key"] == "gsk-groq-secret" + + +def test_host_derived_key_does_not_leak_to_lookalike_host(monkeypatch): + """DEEPSEEK_API_KEY must NOT be sent to an attacker-controlled lookalike + host (e.g. api.deepseek.com.attacker.test). The host-derive helper uses + proper hostname parsing so it picks the *attacker's* vendor label, not + DEEPSEEK — and any real DEEPSEEK_API_KEY stays put.""" + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter") + monkeypatch.setattr( + rp, + "_get_model_config", + lambda: { + "provider": "custom", + "base_url": "https://api.deepseek.com.attacker.test/v1", + }, + ) + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.setenv("DEEPSEEK_API_KEY", "sk-deepseek-secret") + + resolved = rp.resolve_runtime_provider(requested="custom") + + assert "sk-deepseek-secret" not in (resolved["api_key"] or "") + # No ATTACKER_API_KEY is set, so the chain falls through to no-key-required. + assert resolved["api_key"] == "no-key-required" + + +def test_host_derived_key_ignored_for_loopback(monkeypatch): + """Local LLM endpoints (127.0.0.1, localhost) must not derive any host + env var — there's no meaningful vendor label.""" + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter") + monkeypatch.setattr( + rp, + "_get_model_config", + lambda: { + "provider": "custom", + "base_url": "http://127.0.0.1:1234/v1", + }, + ) + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + # Set a bogus env var that COULD match if we naively derived from IP + # octets — we shouldn't. + monkeypatch.setenv("LOCALHOST_API_KEY", "should-not-be-used") + monkeypatch.setenv("_API_KEY", "should-not-be-used") + + resolved = rp.resolve_runtime_provider(requested="custom") + + assert resolved["api_key"] == "no-key-required" + + +def test_host_derived_key_skips_already_handled_vendors(monkeypatch): + """The host-derive helper must not double-resolve OPENAI / OPENROUTER / + OLLAMA env vars — those are owned by their explicit host-gated paths. + Specifically, OPENAI_API_KEY must not leak to a non-openai host via the + `openai` label in a path or subdomain.""" + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter") + monkeypatch.setattr( + rp, + "_get_model_config", + lambda: { + "provider": "custom", + # Hosts like proxy.openai.evil should derive nothing — but even + # if "openai" were the registrable label, the explicit + # OPENAI/OPENROUTER/OLLAMA filter blocks it. + "base_url": "https://api.example.com/v1", + }, + ) + monkeypatch.setenv("OPENAI_API_KEY", "sk-openai-secret") + monkeypatch.setenv("OPENROUTER_API_KEY", "or-secret") + + resolved = rp.resolve_runtime_provider(requested="custom") + + # example.com has no EXAMPLE_API_KEY set, and OPENAI/OPENROUTER are gated + # on their own hosts — chain falls through to no-key-required. + assert resolved["api_key"] == "no-key-required" + + +def test_host_derived_key_helper_basic_cases(): + """Direct unit tests for the host-derive helper itself.""" + # Standard provider hosts → derives correctly. + import os as _os + + _os.environ.pop("DEEPSEEK_API_KEY", None) + _os.environ.pop("GROQ_API_KEY", None) + _os.environ.pop("MISTRAL_API_KEY", None) + + _os.environ["DEEPSEEK_API_KEY"] = "dk" + assert rp._host_derived_api_key("https://api.deepseek.com/v1") == "dk" + + _os.environ["GROQ_API_KEY"] = "gk" + assert rp._host_derived_api_key("https://api.groq.com/openai/v1") == "gk" + + _os.environ["MISTRAL_API_KEY"] = "mk" + assert rp._host_derived_api_key("https://api.mistral.ai/v1") == "mk" + + # IPs and loopback → empty. + assert rp._host_derived_api_key("http://127.0.0.1:1234/v1") == "" + assert rp._host_derived_api_key("http://192.168.0.103:8080/v1") == "" + assert rp._host_derived_api_key("http://localhost:1234") == "" + + # Empty / malformed → empty. + assert rp._host_derived_api_key("") == "" + assert rp._host_derived_api_key("not a url") == "" + + # Already-handled vendors → empty (guards against bypass of host-gate). + _os.environ["OPENAI_API_KEY"] = "should-not-leak" + assert rp._host_derived_api_key("https://api.openai.com/v1") == "" + _os.environ["OPENROUTER_API_KEY"] = "should-not-leak" + assert rp._host_derived_api_key("https://openrouter.ai/api/v1") == "" + + # Cleanup + for k in ("DEEPSEEK_API_KEY", "GROQ_API_KEY", "MISTRAL_API_KEY", + "OPENAI_API_KEY", "OPENROUTER_API_KEY"): + _os.environ.pop(k, None)