mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-03 07:21:54 +00:00
fix(security): derive <VENDOR>_API_KEY from host as final credential fallback
After #28660's host-gating fix, users with provider=custom and base_url pointed at a commercial endpoint (DeepSeek, Groq, Mistral, …) hit no-key-required even when they had the vendor-named env var set (DEEPSEEK_API_KEY, GROQ_API_KEY, …). The issue author flagged this as 'what users intuitively expect'. Adds _host_derived_api_key() to derive an env var name from the base URL host using the *registrable* label (second-to-last). Appended to all three api_key_candidates chains (_resolve_named_custom_runtime direct-alias path, named-custom path, _resolve_openrouter_runtime non-openrouter branch). Lookalike resistance: api.deepseek.com.attacker.test resolves to vendor label 'attacker', NOT 'deepseek' — DEEPSEEK_API_KEY stays put. IPs and loopback yield no vendor label. Already-handled vendors (OPENAI/OPENROUTER/ OLLAMA) are filtered to prevent bypass of the explicit host-gated paths. Adds 6 tests covering positive paths (DeepSeek, Groq), the lookalike attack, loopback rejection, the already-handled-vendor filter, and direct helper unit tests. Also adds erhnysr to AUTHOR_MAP.
This commit is contained in:
parent
9514ddbee2
commit
c6a992e3e3
3 changed files with 230 additions and 0 deletions
|
|
@ -100,6 +100,63 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
|
|||
return None
|
||||
|
||||
|
||||
def _host_derived_api_key(base_url: str) -> str:
|
||||
"""Look up `<VENDOR>_API_KEY` in the env, derived from the base URL host.
|
||||
|
||||
Examples:
|
||||
https://api.deepseek.com/v1 → DEEPSEEK_API_KEY
|
||||
https://api.groq.com/openai/v1 → GROQ_API_KEY
|
||||
https://api.mistral.ai/v1 → MISTRAL_API_KEY
|
||||
https://generativelanguage.googleapis.com/v1beta/openai/ → GOOGLEAPIS_API_KEY
|
||||
|
||||
Returns the env value (stripped) or "". Never returns env vars whose names
|
||||
are already explicitly checked elsewhere — those are handled by their own
|
||||
host-gated paths (OPENAI/OPENROUTER/OLLAMA).
|
||||
|
||||
The vendor label is the *registrable* portion of the hostname: strip
|
||||
``api.`` / ``www.`` prefixes, then take the second-to-last label
|
||||
(``api.deepseek.com`` → ``deepseek``). Falls back to "" for hostnames
|
||||
that don't yield a usable vendor label (IPs, loopback, single-label
|
||||
hosts).
|
||||
"""
|
||||
hostname = base_url_hostname(base_url)
|
||||
if not hostname:
|
||||
return ""
|
||||
# Reject IPv4 / IPv6 / loopback — no meaningful vendor label.
|
||||
if any(ch.isdigit() for ch in hostname.split(".")[-1]):
|
||||
# Last label starts with a digit → likely IP. (TLDs are never numeric.)
|
||||
return ""
|
||||
if hostname in ("localhost",) or ":" in hostname:
|
||||
return ""
|
||||
labels = [lbl for lbl in hostname.split(".") if lbl]
|
||||
# Strip common API/CDN prefixes.
|
||||
while labels and labels[0] in ("api", "www"):
|
||||
labels.pop(0)
|
||||
if len(labels) < 2:
|
||||
return ""
|
||||
# Take the *registrable* label (second-to-last). For typical provider
|
||||
# hosts this is what users intuitively call "the vendor":
|
||||
# deepseek.com → labels[-2] = "deepseek" ✓
|
||||
# api.groq.com → groq.com → labels[-2] = "groq" ✓
|
||||
# api.mistral.ai → labels[-2] = "mistral" ✓
|
||||
# Crucially, lookalike hosts pick the ATTACKER's label, not the spoofed
|
||||
# vendor:
|
||||
# api.deepseek.com.attacker.test → labels[-2] = "attacker"
|
||||
# so DEEPSEEK_API_KEY stays put and the chain falls through to
|
||||
# no-key-required. This mirrors how `base_url_host_matches` resists the
|
||||
# same lookalike attack for explicit hosts.
|
||||
vendor = labels[-2]
|
||||
# Sanitize to env var charset: A-Z, 0-9, underscore.
|
||||
sanitized = "".join(ch if ch.isalnum() else "_" for ch in vendor).upper()
|
||||
if not sanitized or not sanitized[0].isalpha():
|
||||
return ""
|
||||
# Don't re-derive env vars already handled by explicit host-gated paths.
|
||||
if sanitized in ("OPENAI", "OPENROUTER", "OLLAMA"):
|
||||
return ""
|
||||
env_name = f"{sanitized}_API_KEY"
|
||||
return (os.getenv(env_name, "") or "").strip()
|
||||
|
||||
|
||||
def _auto_detect_local_model(base_url: str) -> str:
|
||||
"""Query a local server for its model name when only one model is loaded."""
|
||||
if not base_url:
|
||||
|
|
@ -589,6 +646,10 @@ def _resolve_named_custom_runtime(
|
|||
# Gate env key fallbacks on authoritative hosts (#28660)
|
||||
(os.getenv("OPENAI_API_KEY", "").strip() if _da_is_openai_url else ""),
|
||||
(os.getenv("OPENROUTER_API_KEY", "").strip() if _da_is_openrouter else ""),
|
||||
# Bonus (#28660): derive `<VENDOR>_API_KEY` from the host so users
|
||||
# who set DEEPSEEK_API_KEY / GROQ_API_KEY / MISTRAL_API_KEY get the
|
||||
# intuitive match without configuring `custom_providers` first.
|
||||
_host_derived_api_key(base_url),
|
||||
]
|
||||
api_key = next(
|
||||
(c for c in api_key_candidates if has_usable_secret(c)),
|
||||
|
|
@ -634,6 +695,9 @@ def _resolve_named_custom_runtime(
|
|||
# OPENAI_API_KEY to a local-llm endpoint leaks credentials (#28660).
|
||||
(os.getenv("OPENAI_API_KEY", "").strip() if _cp_is_openai_url else ""),
|
||||
(os.getenv("OPENROUTER_API_KEY", "").strip() if _cp_is_openrouter else ""),
|
||||
# Bonus (#28660): derive `<VENDOR>_API_KEY` from the host as a final
|
||||
# fallback when key_env wasn't set explicitly.
|
||||
_host_derived_api_key(base_url),
|
||||
]
|
||||
api_key = next((candidate for candidate in api_key_candidates if has_usable_secret(candidate)), "")
|
||||
|
||||
|
|
@ -749,6 +813,11 @@ def _resolve_openrouter_runtime(
|
|||
(os.getenv("OLLAMA_API_KEY") if _is_ollama_url else ""),
|
||||
(os.getenv("OPENAI_API_KEY") if (_is_openai_url or _is_openai_azure) else ""),
|
||||
(os.getenv("OPENROUTER_API_KEY") if _is_openrouter_url else ""),
|
||||
# Bonus (#28660): derive `<VENDOR>_API_KEY` from the host so users
|
||||
# who set DEEPSEEK_API_KEY / GROQ_API_KEY / MISTRAL_API_KEY get the
|
||||
# intuitive match. Helper returns "" for IPs/loopback and for env
|
||||
# vars already handled by the explicit host-gated paths above.
|
||||
_host_derived_api_key(base_url),
|
||||
]
|
||||
api_key = next(
|
||||
(str(candidate or "").strip() for candidate in api_key_candidates if has_usable_secret(candidate)),
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
|
|||
AUTHOR_MAP = {
|
||||
# teknium (multiple emails)
|
||||
"teknium1@gmail.com": "teknium1",
|
||||
"erhanyasarx@gmail.com": "erhnysr",
|
||||
"30366221+WorldWriter@users.noreply.github.com": "WorldWriter",
|
||||
"dafeng@DafengdeMacBook-Pro.local": "WorldWriter",
|
||||
"anadi.jaggia@gmail.com": "Jaggia",
|
||||
|
|
|
|||
|
|
@ -2466,3 +2466,163 @@ def test_openrouter_key_reaches_openrouter_host(monkeypatch):
|
|||
resolved = rp.resolve_runtime_provider(requested="openrouter")
|
||||
|
||||
assert resolved["api_key"] == "or-secret"
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Issue #28660 — bonus: `<VENDOR>_API_KEY` derivation from host.
|
||||
# After the host-gating fix, users with a `DEEPSEEK_API_KEY` set and
|
||||
# `base_url: https://api.deepseek.com/v1` should get the key picked up
|
||||
# without needing to configure custom_providers.key_env first.
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_host_derived_key_picked_up_for_deepseek(monkeypatch):
|
||||
"""DEEPSEEK_API_KEY env var must be forwarded to api.deepseek.com."""
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
|
||||
monkeypatch.setattr(
|
||||
rp,
|
||||
"_get_model_config",
|
||||
lambda: {
|
||||
"provider": "custom",
|
||||
"base_url": "https://api.deepseek.com/v1",
|
||||
},
|
||||
)
|
||||
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
||||
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
||||
monkeypatch.setenv("DEEPSEEK_API_KEY", "sk-deepseek-secret")
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="custom")
|
||||
|
||||
assert resolved["api_key"] == "sk-deepseek-secret"
|
||||
|
||||
|
||||
def test_host_derived_key_picked_up_for_groq(monkeypatch):
|
||||
"""GROQ_API_KEY env var must be forwarded to api.groq.com."""
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
|
||||
monkeypatch.setattr(
|
||||
rp,
|
||||
"_get_model_config",
|
||||
lambda: {
|
||||
"provider": "custom",
|
||||
"base_url": "https://api.groq.com/openai/v1",
|
||||
},
|
||||
)
|
||||
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
||||
monkeypatch.setenv("GROQ_API_KEY", "gsk-groq-secret")
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="custom")
|
||||
|
||||
assert resolved["api_key"] == "gsk-groq-secret"
|
||||
|
||||
|
||||
def test_host_derived_key_does_not_leak_to_lookalike_host(monkeypatch):
|
||||
"""DEEPSEEK_API_KEY must NOT be sent to an attacker-controlled lookalike
|
||||
host (e.g. api.deepseek.com.attacker.test). The host-derive helper uses
|
||||
proper hostname parsing so it picks the *attacker's* vendor label, not
|
||||
DEEPSEEK — and any real DEEPSEEK_API_KEY stays put."""
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
|
||||
monkeypatch.setattr(
|
||||
rp,
|
||||
"_get_model_config",
|
||||
lambda: {
|
||||
"provider": "custom",
|
||||
"base_url": "https://api.deepseek.com.attacker.test/v1",
|
||||
},
|
||||
)
|
||||
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
||||
monkeypatch.setenv("DEEPSEEK_API_KEY", "sk-deepseek-secret")
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="custom")
|
||||
|
||||
assert "sk-deepseek-secret" not in (resolved["api_key"] or "")
|
||||
# No ATTACKER_API_KEY is set, so the chain falls through to no-key-required.
|
||||
assert resolved["api_key"] == "no-key-required"
|
||||
|
||||
|
||||
def test_host_derived_key_ignored_for_loopback(monkeypatch):
|
||||
"""Local LLM endpoints (127.0.0.1, localhost) must not derive any host
|
||||
env var — there's no meaningful vendor label."""
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
|
||||
monkeypatch.setattr(
|
||||
rp,
|
||||
"_get_model_config",
|
||||
lambda: {
|
||||
"provider": "custom",
|
||||
"base_url": "http://127.0.0.1:1234/v1",
|
||||
},
|
||||
)
|
||||
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
||||
# Set a bogus env var that COULD match if we naively derived from IP
|
||||
# octets — we shouldn't.
|
||||
monkeypatch.setenv("LOCALHOST_API_KEY", "should-not-be-used")
|
||||
monkeypatch.setenv("_API_KEY", "should-not-be-used")
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="custom")
|
||||
|
||||
assert resolved["api_key"] == "no-key-required"
|
||||
|
||||
|
||||
def test_host_derived_key_skips_already_handled_vendors(monkeypatch):
|
||||
"""The host-derive helper must not double-resolve OPENAI / OPENROUTER /
|
||||
OLLAMA env vars — those are owned by their explicit host-gated paths.
|
||||
Specifically, OPENAI_API_KEY must not leak to a non-openai host via the
|
||||
`openai` label in a path or subdomain."""
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
|
||||
monkeypatch.setattr(
|
||||
rp,
|
||||
"_get_model_config",
|
||||
lambda: {
|
||||
"provider": "custom",
|
||||
# Hosts like proxy.openai.evil should derive nothing — but even
|
||||
# if "openai" were the registrable label, the explicit
|
||||
# OPENAI/OPENROUTER/OLLAMA filter blocks it.
|
||||
"base_url": "https://api.example.com/v1",
|
||||
},
|
||||
)
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "sk-openai-secret")
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-secret")
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="custom")
|
||||
|
||||
# example.com has no EXAMPLE_API_KEY set, and OPENAI/OPENROUTER are gated
|
||||
# on their own hosts — chain falls through to no-key-required.
|
||||
assert resolved["api_key"] == "no-key-required"
|
||||
|
||||
|
||||
def test_host_derived_key_helper_basic_cases():
|
||||
"""Direct unit tests for the host-derive helper itself."""
|
||||
# Standard provider hosts → derives correctly.
|
||||
import os as _os
|
||||
|
||||
_os.environ.pop("DEEPSEEK_API_KEY", None)
|
||||
_os.environ.pop("GROQ_API_KEY", None)
|
||||
_os.environ.pop("MISTRAL_API_KEY", None)
|
||||
|
||||
_os.environ["DEEPSEEK_API_KEY"] = "dk"
|
||||
assert rp._host_derived_api_key("https://api.deepseek.com/v1") == "dk"
|
||||
|
||||
_os.environ["GROQ_API_KEY"] = "gk"
|
||||
assert rp._host_derived_api_key("https://api.groq.com/openai/v1") == "gk"
|
||||
|
||||
_os.environ["MISTRAL_API_KEY"] = "mk"
|
||||
assert rp._host_derived_api_key("https://api.mistral.ai/v1") == "mk"
|
||||
|
||||
# IPs and loopback → empty.
|
||||
assert rp._host_derived_api_key("http://127.0.0.1:1234/v1") == ""
|
||||
assert rp._host_derived_api_key("http://192.168.0.103:8080/v1") == ""
|
||||
assert rp._host_derived_api_key("http://localhost:1234") == ""
|
||||
|
||||
# Empty / malformed → empty.
|
||||
assert rp._host_derived_api_key("") == ""
|
||||
assert rp._host_derived_api_key("not a url") == ""
|
||||
|
||||
# Already-handled vendors → empty (guards against bypass of host-gate).
|
||||
_os.environ["OPENAI_API_KEY"] = "should-not-leak"
|
||||
assert rp._host_derived_api_key("https://api.openai.com/v1") == ""
|
||||
_os.environ["OPENROUTER_API_KEY"] = "should-not-leak"
|
||||
assert rp._host_derived_api_key("https://openrouter.ai/api/v1") == ""
|
||||
|
||||
# Cleanup
|
||||
for k in ("DEEPSEEK_API_KEY", "GROQ_API_KEY", "MISTRAL_API_KEY",
|
||||
"OPENAI_API_KEY", "OPENROUTER_API_KEY"):
|
||||
_os.environ.pop(k, None)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue