fix(security): derive <VENDOR>_API_KEY from host as final credential fallback

After #28660's host-gating fix, users with provider=custom and base_url
pointed at a commercial endpoint (DeepSeek, Groq, Mistral, …) hit
no-key-required even when they had the vendor-named env var set
(DEEPSEEK_API_KEY, GROQ_API_KEY, …). The issue author flagged this as
'what users intuitively expect'.

Adds _host_derived_api_key() to derive an env var name from the base URL
host using the *registrable* label (second-to-last). Appended to all three
api_key_candidates chains (_resolve_named_custom_runtime direct-alias path,
named-custom path, _resolve_openrouter_runtime non-openrouter branch).

Lookalike resistance: api.deepseek.com.attacker.test resolves to vendor
label 'attacker', NOT 'deepseek' — DEEPSEEK_API_KEY stays put. IPs and
loopback yield no vendor label. Already-handled vendors (OPENAI/OPENROUTER/
OLLAMA) are filtered to prevent bypass of the explicit host-gated paths.

Adds 6 tests covering positive paths (DeepSeek, Groq), the lookalike attack,
loopback rejection, the already-handled-vendor filter, and direct helper
unit tests.

Also adds erhnysr to AUTHOR_MAP.
This commit is contained in:
Teknium 2026-05-20 20:05:50 -07:00
parent 9514ddbee2
commit c6a992e3e3
3 changed files with 230 additions and 0 deletions

View file

@ -100,6 +100,63 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
return None
def _host_derived_api_key(base_url: str) -> str:
"""Look up `<VENDOR>_API_KEY` in the env, derived from the base URL host.
Examples:
https://api.deepseek.com/v1 DEEPSEEK_API_KEY
https://api.groq.com/openai/v1 GROQ_API_KEY
https://api.mistral.ai/v1 MISTRAL_API_KEY
https://generativelanguage.googleapis.com/v1beta/openai/ GOOGLEAPIS_API_KEY
Returns the env value (stripped) or "". Never returns env vars whose names
are already explicitly checked elsewhere those are handled by their own
host-gated paths (OPENAI/OPENROUTER/OLLAMA).
The vendor label is the *registrable* portion of the hostname: strip
``api.`` / ``www.`` prefixes, then take the second-to-last label
(``api.deepseek.com`` ``deepseek``). Falls back to "" for hostnames
that don't yield a usable vendor label (IPs, loopback, single-label
hosts).
"""
hostname = base_url_hostname(base_url)
if not hostname:
return ""
# Reject IPv4 / IPv6 / loopback — no meaningful vendor label.
if any(ch.isdigit() for ch in hostname.split(".")[-1]):
# Last label starts with a digit → likely IP. (TLDs are never numeric.)
return ""
if hostname in ("localhost",) or ":" in hostname:
return ""
labels = [lbl for lbl in hostname.split(".") if lbl]
# Strip common API/CDN prefixes.
while labels and labels[0] in ("api", "www"):
labels.pop(0)
if len(labels) < 2:
return ""
# Take the *registrable* label (second-to-last). For typical provider
# hosts this is what users intuitively call "the vendor":
# deepseek.com → labels[-2] = "deepseek" ✓
# api.groq.com → groq.com → labels[-2] = "groq" ✓
# api.mistral.ai → labels[-2] = "mistral" ✓
# Crucially, lookalike hosts pick the ATTACKER's label, not the spoofed
# vendor:
# api.deepseek.com.attacker.test → labels[-2] = "attacker"
# so DEEPSEEK_API_KEY stays put and the chain falls through to
# no-key-required. This mirrors how `base_url_host_matches` resists the
# same lookalike attack for explicit hosts.
vendor = labels[-2]
# Sanitize to env var charset: A-Z, 0-9, underscore.
sanitized = "".join(ch if ch.isalnum() else "_" for ch in vendor).upper()
if not sanitized or not sanitized[0].isalpha():
return ""
# Don't re-derive env vars already handled by explicit host-gated paths.
if sanitized in ("OPENAI", "OPENROUTER", "OLLAMA"):
return ""
env_name = f"{sanitized}_API_KEY"
return (os.getenv(env_name, "") or "").strip()
def _auto_detect_local_model(base_url: str) -> str:
"""Query a local server for its model name when only one model is loaded."""
if not base_url:
@ -589,6 +646,10 @@ def _resolve_named_custom_runtime(
# Gate env key fallbacks on authoritative hosts (#28660)
(os.getenv("OPENAI_API_KEY", "").strip() if _da_is_openai_url else ""),
(os.getenv("OPENROUTER_API_KEY", "").strip() if _da_is_openrouter else ""),
# Bonus (#28660): derive `<VENDOR>_API_KEY` from the host so users
# who set DEEPSEEK_API_KEY / GROQ_API_KEY / MISTRAL_API_KEY get the
# intuitive match without configuring `custom_providers` first.
_host_derived_api_key(base_url),
]
api_key = next(
(c for c in api_key_candidates if has_usable_secret(c)),
@ -634,6 +695,9 @@ def _resolve_named_custom_runtime(
# OPENAI_API_KEY to a local-llm endpoint leaks credentials (#28660).
(os.getenv("OPENAI_API_KEY", "").strip() if _cp_is_openai_url else ""),
(os.getenv("OPENROUTER_API_KEY", "").strip() if _cp_is_openrouter else ""),
# Bonus (#28660): derive `<VENDOR>_API_KEY` from the host as a final
# fallback when key_env wasn't set explicitly.
_host_derived_api_key(base_url),
]
api_key = next((candidate for candidate in api_key_candidates if has_usable_secret(candidate)), "")
@ -749,6 +813,11 @@ def _resolve_openrouter_runtime(
(os.getenv("OLLAMA_API_KEY") if _is_ollama_url else ""),
(os.getenv("OPENAI_API_KEY") if (_is_openai_url or _is_openai_azure) else ""),
(os.getenv("OPENROUTER_API_KEY") if _is_openrouter_url else ""),
# Bonus (#28660): derive `<VENDOR>_API_KEY` from the host so users
# who set DEEPSEEK_API_KEY / GROQ_API_KEY / MISTRAL_API_KEY get the
# intuitive match. Helper returns "" for IPs/loopback and for env
# vars already handled by the explicit host-gated paths above.
_host_derived_api_key(base_url),
]
api_key = next(
(str(candidate or "").strip() for candidate in api_key_candidates if has_usable_secret(candidate)),

View file

@ -47,6 +47,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
AUTHOR_MAP = {
# teknium (multiple emails)
"teknium1@gmail.com": "teknium1",
"erhanyasarx@gmail.com": "erhnysr",
"30366221+WorldWriter@users.noreply.github.com": "WorldWriter",
"dafeng@DafengdeMacBook-Pro.local": "WorldWriter",
"anadi.jaggia@gmail.com": "Jaggia",

View file

@ -2466,3 +2466,163 @@ def test_openrouter_key_reaches_openrouter_host(monkeypatch):
resolved = rp.resolve_runtime_provider(requested="openrouter")
assert resolved["api_key"] == "or-secret"
# ----------------------------------------------------------------------
# Issue #28660 — bonus: `<VENDOR>_API_KEY` derivation from host.
# After the host-gating fix, users with a `DEEPSEEK_API_KEY` set and
# `base_url: https://api.deepseek.com/v1` should get the key picked up
# without needing to configure custom_providers.key_env first.
# ----------------------------------------------------------------------
def test_host_derived_key_picked_up_for_deepseek(monkeypatch):
"""DEEPSEEK_API_KEY env var must be forwarded to api.deepseek.com."""
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
monkeypatch.setattr(
rp,
"_get_model_config",
lambda: {
"provider": "custom",
"base_url": "https://api.deepseek.com/v1",
},
)
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
monkeypatch.setenv("DEEPSEEK_API_KEY", "sk-deepseek-secret")
resolved = rp.resolve_runtime_provider(requested="custom")
assert resolved["api_key"] == "sk-deepseek-secret"
def test_host_derived_key_picked_up_for_groq(monkeypatch):
"""GROQ_API_KEY env var must be forwarded to api.groq.com."""
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
monkeypatch.setattr(
rp,
"_get_model_config",
lambda: {
"provider": "custom",
"base_url": "https://api.groq.com/openai/v1",
},
)
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.setenv("GROQ_API_KEY", "gsk-groq-secret")
resolved = rp.resolve_runtime_provider(requested="custom")
assert resolved["api_key"] == "gsk-groq-secret"
def test_host_derived_key_does_not_leak_to_lookalike_host(monkeypatch):
"""DEEPSEEK_API_KEY must NOT be sent to an attacker-controlled lookalike
host (e.g. api.deepseek.com.attacker.test). The host-derive helper uses
proper hostname parsing so it picks the *attacker's* vendor label, not
DEEPSEEK and any real DEEPSEEK_API_KEY stays put."""
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
monkeypatch.setattr(
rp,
"_get_model_config",
lambda: {
"provider": "custom",
"base_url": "https://api.deepseek.com.attacker.test/v1",
},
)
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.setenv("DEEPSEEK_API_KEY", "sk-deepseek-secret")
resolved = rp.resolve_runtime_provider(requested="custom")
assert "sk-deepseek-secret" not in (resolved["api_key"] or "")
# No ATTACKER_API_KEY is set, so the chain falls through to no-key-required.
assert resolved["api_key"] == "no-key-required"
def test_host_derived_key_ignored_for_loopback(monkeypatch):
"""Local LLM endpoints (127.0.0.1, localhost) must not derive any host
env var there's no meaningful vendor label."""
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
monkeypatch.setattr(
rp,
"_get_model_config",
lambda: {
"provider": "custom",
"base_url": "http://127.0.0.1:1234/v1",
},
)
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
# Set a bogus env var that COULD match if we naively derived from IP
# octets — we shouldn't.
monkeypatch.setenv("LOCALHOST_API_KEY", "should-not-be-used")
monkeypatch.setenv("_API_KEY", "should-not-be-used")
resolved = rp.resolve_runtime_provider(requested="custom")
assert resolved["api_key"] == "no-key-required"
def test_host_derived_key_skips_already_handled_vendors(monkeypatch):
"""The host-derive helper must not double-resolve OPENAI / OPENROUTER /
OLLAMA env vars those are owned by their explicit host-gated paths.
Specifically, OPENAI_API_KEY must not leak to a non-openai host via the
`openai` label in a path or subdomain."""
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
monkeypatch.setattr(
rp,
"_get_model_config",
lambda: {
"provider": "custom",
# Hosts like proxy.openai.evil should derive nothing — but even
# if "openai" were the registrable label, the explicit
# OPENAI/OPENROUTER/OLLAMA filter blocks it.
"base_url": "https://api.example.com/v1",
},
)
monkeypatch.setenv("OPENAI_API_KEY", "sk-openai-secret")
monkeypatch.setenv("OPENROUTER_API_KEY", "or-secret")
resolved = rp.resolve_runtime_provider(requested="custom")
# example.com has no EXAMPLE_API_KEY set, and OPENAI/OPENROUTER are gated
# on their own hosts — chain falls through to no-key-required.
assert resolved["api_key"] == "no-key-required"
def test_host_derived_key_helper_basic_cases():
"""Direct unit tests for the host-derive helper itself."""
# Standard provider hosts → derives correctly.
import os as _os
_os.environ.pop("DEEPSEEK_API_KEY", None)
_os.environ.pop("GROQ_API_KEY", None)
_os.environ.pop("MISTRAL_API_KEY", None)
_os.environ["DEEPSEEK_API_KEY"] = "dk"
assert rp._host_derived_api_key("https://api.deepseek.com/v1") == "dk"
_os.environ["GROQ_API_KEY"] = "gk"
assert rp._host_derived_api_key("https://api.groq.com/openai/v1") == "gk"
_os.environ["MISTRAL_API_KEY"] = "mk"
assert rp._host_derived_api_key("https://api.mistral.ai/v1") == "mk"
# IPs and loopback → empty.
assert rp._host_derived_api_key("http://127.0.0.1:1234/v1") == ""
assert rp._host_derived_api_key("http://192.168.0.103:8080/v1") == ""
assert rp._host_derived_api_key("http://localhost:1234") == ""
# Empty / malformed → empty.
assert rp._host_derived_api_key("") == ""
assert rp._host_derived_api_key("not a url") == ""
# Already-handled vendors → empty (guards against bypass of host-gate).
_os.environ["OPENAI_API_KEY"] = "should-not-leak"
assert rp._host_derived_api_key("https://api.openai.com/v1") == ""
_os.environ["OPENROUTER_API_KEY"] = "should-not-leak"
assert rp._host_derived_api_key("https://openrouter.ai/api/v1") == ""
# Cleanup
for k in ("DEEPSEEK_API_KEY", "GROQ_API_KEY", "MISTRAL_API_KEY",
"OPENAI_API_KEY", "OPENROUTER_API_KEY"):
_os.environ.pop(k, None)