"""Regression tests for Nous Portal inference_base_url host-allowlist validation. A poisoned ``inference_base_url`` from the Portal refresh / agent-key-mint response (network MITM, malicious response injection) would otherwise be persisted to auth.json and forwarded the user's legitimate agent_key bearer on every subsequent proxy request, exfiltrating their inference budget and opening a response-injection channel into the IDE / chat client. ``_validate_nous_inference_url_from_network()`` blocks any URL outside the allowlist at the source. These tests verify: 1. The validator's host + scheme rules. 2. Each of the five NETWORK call sites in ``auth.py`` calls the validator rather than the unrestricted ``_optional_base_url`` helper. 3. The proxy adapter applies the validator as belt-and-suspenders. 4. The env-var override path (``NOUS_INFERENCE_BASE_URL``) is NOT gated by the validator — that's the documented dev/staging escape hatch. """ from __future__ import annotations import logging import pytest from hermes_cli.auth import ( DEFAULT_NOUS_INFERENCE_URL, _ALLOWED_NOUS_INFERENCE_HOSTS, _validate_nous_inference_url_from_network, ) class TestValidatorRules: def test_allowlisted_https_host_returned(self): url = "https://inference-api.nousresearch.com/v1" assert _validate_nous_inference_url_from_network(url) == url def test_trailing_slash_stripped(self): url = "https://inference-api.nousresearch.com/v1/" assert _validate_nous_inference_url_from_network(url) == url.rstrip("/") def test_attacker_host_rejected(self, caplog): with caplog.at_level(logging.WARNING, logger="hermes_cli.auth"): assert ( _validate_nous_inference_url_from_network("https://attacker.com/v1") is None ) assert any("attacker.com" in rec.message for rec in caplog.records) def test_subdomain_of_allowlist_host_rejected(self): """*.nousresearch.com is NOT in the allowlist — exact hostname only. A subdomain takeover or DNS hijack of *.nousresearch.com would otherwise pass — keep the gate tight. """ assert ( _validate_nous_inference_url_from_network( "https://evil.inference-api.nousresearch.com/v1" ) is None ) def test_http_scheme_rejected(self, caplog): with caplog.at_level(logging.WARNING, logger="hermes_cli.auth"): assert ( _validate_nous_inference_url_from_network( "http://inference-api.nousresearch.com/v1" ) is None ) assert any("non-https" in rec.message for rec in caplog.records) def test_file_scheme_rejected(self): assert ( _validate_nous_inference_url_from_network("file:///etc/passwd") is None ) def test_javascript_scheme_rejected(self): assert ( _validate_nous_inference_url_from_network( "javascript:alert(document.cookie)" ) is None ) def test_empty_string_rejected(self): assert _validate_nous_inference_url_from_network("") is None def test_whitespace_only_rejected(self): assert _validate_nous_inference_url_from_network(" ") is None def test_none_rejected(self): assert _validate_nous_inference_url_from_network(None) is None def test_non_string_rejected(self): assert _validate_nous_inference_url_from_network(12345) is None # type: ignore[arg-type] assert _validate_nous_inference_url_from_network({"url": "x"}) is None # type: ignore[arg-type] def test_malformed_url_rejected(self): """Even garbled input must fall back safely, not raise.""" assert ( _validate_nous_inference_url_from_network("not://a real url at all") is None ) def test_default_inference_url_is_in_allowlist(self): """Sanity check: DEFAULT_NOUS_INFERENCE_URL must itself validate. If anyone retargets the default away from ``inference-api.nousresearch.com``, they MUST update the allowlist in the same change — otherwise the allowlist would reject the Portal's own legitimate default and break every install. """ assert ( _validate_nous_inference_url_from_network(DEFAULT_NOUS_INFERENCE_URL) == DEFAULT_NOUS_INFERENCE_URL.rstrip("/") ) def test_allowlist_contains_inference_api_host(self): """The default's host must be in the allowlist set.""" from urllib.parse import urlparse host = urlparse(DEFAULT_NOUS_INFERENCE_URL).hostname assert host in _ALLOWED_NOUS_INFERENCE_HOSTS class TestCallSiteWiring: """Verify the validator is actually wired into all 5 NETWORK call sites. These are not behaviour-end-to-end tests (the surrounding code is several hundred lines per site with extensive HTTP mocking requirements). They're text-grep contracts: if anyone replaces ``_validate_nous_inference_url_from_network`` with the un-validated ``_optional_base_url`` again, the test catches it. Each site lives inside ``resolve_nous_runtime_credentials`` and one helper (``_extend_state_from_refresh``). The shape we guard against is ``_url = _optional_base_url(.get("inference_base_url"))`` — that's what the unsafe pre-fix code looked like, and the only semantic difference between the safe and unsafe helpers is the host-allowlist check. """ def _read_auth_source(self): import hermes_cli.auth as _auth_mod from pathlib import Path return Path(_auth_mod.__file__).read_text(encoding="utf-8") def test_no_unvalidated_inference_base_url_assignments_remain(self): """No remaining ``_optional_base_url(...inference_base_url...)`` reads from Portal payloads. If you see a failure here, you've either added a new NETWORK site that needs validation, or downgraded an existing one back to the unsafe helper.""" source = self._read_auth_source() for needle in ( '_optional_base_url(refreshed.get("inference_base_url"))', '_optional_base_url(mint_payload.get("inference_base_url"))', ): assert needle not in source, ( f"Found unvalidated network read: {needle!r}. " f"Use _validate_nous_inference_url_from_network() instead." ) def test_validator_wired_at_all_known_call_sites(self): """All 5 known NETWORK sites use the validator. If this count drops, someone removed protection; if it grows, audit the new site to be sure validation is appropriate.""" source = self._read_auth_source() refresh_count = source.count( '_validate_nous_inference_url_from_network(refreshed.get("inference_base_url"))' ) mint_count = source.count( '_validate_nous_inference_url_from_network(mint_payload.get("inference_base_url"))' ) assert refresh_count == 3, f"expected 3 refresh sites, found {refresh_count}" assert mint_count == 2, f"expected 2 mint sites, found {mint_count}" def test_proxy_adapter_also_validates(self): """The Nous proxy adapter applies the validator as defense-in-depth even though auth.py already validates at the source, so a future bypass at the source layer still gets caught at the forward boundary.""" from pathlib import Path import hermes_cli.proxy.adapters.nous_portal as _nous_adapter source = Path(_nous_adapter.__file__).read_text(encoding="utf-8") assert "_validate_nous_inference_url_from_network" in source class TestEnvOverrideNotGated: """The documented dev/staging env-var override must keep working. ``NOUS_INFERENCE_BASE_URL`` is read by ``resolve_nous_runtime_credentials`` via ``os.getenv`` — that path doesn't pass through the validator (env values are trusted because the user set them themselves). Verify the env-var read site does NOT consult the validator, so a user running against a non-allowlisted staging host via env is not inadvertently broken by this fix. """ def test_env_override_path_does_not_call_validator(self): """In resolve_nous_runtime_credentials, the env override is read via os.getenv directly, not via the validator. Grep the source to confirm: the env line should NOT mention the validator.""" import hermes_cli.auth as _auth_mod from pathlib import Path source = Path(_auth_mod.__file__).read_text(encoding="utf-8") # Find the env-override read line. for line in source.splitlines(): if "NOUS_INFERENCE_BASE_URL" in line and "os.getenv" in line: assert "_validate_nous_inference_url_from_network" not in line, ( "env override path must not gate through the network " "validator — it would break documented dev/staging use." )