mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-23 05:31:23 +00:00
Use nous portal as model metadata authority (#24502)
* nous portal metadata resolver * minor fixes
This commit is contained in:
parent
c594a23047
commit
2863e9484a
2 changed files with 306 additions and 22 deletions
|
|
@ -473,6 +473,240 @@ class TestCodexOAuthContextLength:
|
|||
assert ctx == 1_000_000, "Non-codex 1M cache entries must be respected"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Nous Portal context-window resolution (provider="nous")
|
||||
# =========================================================================
|
||||
|
||||
class TestNousPortalContextResolution:
|
||||
"""Nous Portal /v1/models is authoritative for what Nous infra enforces
|
||||
and may diverge from the OpenRouter catalog.
|
||||
|
||||
Invariants this class pins down:
|
||||
1. Portal value wins over the OR fallback.
|
||||
2. Portal-derived values are persisted to disk.
|
||||
3. OR-fallback values are NEVER persisted — otherwise a single portal
|
||||
blip would freeze the wrong value in via step-1 cache short-circuit.
|
||||
4. Pre-fix persistent-cache entries (seeded from the OR catalog) are
|
||||
bypassed at step 1 and overwritten once the portal responds.
|
||||
5. Pre-fix persistent-cache entries SURVIVE on disk when the portal
|
||||
is unreachable — no opportunistic invalidation that loses the only
|
||||
value we have.
|
||||
"""
|
||||
|
||||
def setup_method(self):
|
||||
import agent.model_metadata as mm
|
||||
mm._endpoint_model_metadata_cache.clear()
|
||||
mm._endpoint_model_metadata_cache_time.clear()
|
||||
|
||||
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
def test_portal_value_wins_over_openrouter_catalog(
|
||||
self, mock_or, mock_portal, tmp_path, monkeypatch
|
||||
):
|
||||
"""The motivating case: OR catalog says 1M for qwen3.6-plus, but
|
||||
the Nous portal correctly enforces 262144. Portal must win."""
|
||||
import agent.model_metadata as mm
|
||||
cache_file = tmp_path / "context_length_cache.yaml"
|
||||
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
|
||||
|
||||
mock_portal.return_value = {
|
||||
"qwen3.6-plus": {"context_length": 262_144},
|
||||
}
|
||||
mock_or.return_value = {
|
||||
"qwen/qwen3.6-plus": {"context_length": 1_000_000},
|
||||
}
|
||||
|
||||
ctx = mm.get_model_context_length(
|
||||
model="qwen3.6-plus",
|
||||
base_url="https://inference-api.nousresearch.com/v1",
|
||||
api_key="fake-token",
|
||||
provider="nous",
|
||||
)
|
||||
assert ctx == 262_144, (
|
||||
f"Portal must override OR catalog; got {ctx} (OR leak?)"
|
||||
)
|
||||
|
||||
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
def test_portal_value_is_persisted_to_disk(
|
||||
self, mock_or, mock_portal, tmp_path, monkeypatch
|
||||
):
|
||||
"""Portal-derived value should land in the persistent cache so
|
||||
cross-process callers (e.g. child agents) see the same value."""
|
||||
import agent.model_metadata as mm
|
||||
cache_file = tmp_path / "context_length_cache.yaml"
|
||||
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
|
||||
|
||||
mock_portal.return_value = {
|
||||
"qwen3.6-plus": {"context_length": 262_144},
|
||||
}
|
||||
mock_or.return_value = {}
|
||||
|
||||
base_url = "https://inference-api.nousresearch.com/v1"
|
||||
ctx = mm.get_model_context_length(
|
||||
model="qwen3.6-plus",
|
||||
base_url=base_url,
|
||||
api_key="fake",
|
||||
provider="nous",
|
||||
)
|
||||
assert ctx == 262_144
|
||||
persisted = yaml.safe_load(cache_file.read_text()).get("context_lengths", {})
|
||||
assert persisted.get(f"qwen3.6-plus@{base_url}") == 262_144, (
|
||||
"Portal-derived value should be persisted to disk"
|
||||
)
|
||||
|
||||
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
def test_openrouter_fallback_is_not_persisted(
|
||||
self, mock_or, mock_portal, tmp_path, monkeypatch
|
||||
):
|
||||
"""When the portal can't resolve a model (network blip, auth glitch,
|
||||
model not yet listed) we fall back to the OR catalog so the agent
|
||||
keeps working — but we must NOT write the OR value to disk. Once
|
||||
cached on disk, step-1 short-circuits forever and the user is stuck
|
||||
with the wrong number until they manually clear the cache."""
|
||||
import agent.model_metadata as mm
|
||||
cache_file = tmp_path / "context_length_cache.yaml"
|
||||
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
|
||||
|
||||
mock_portal.return_value = {} # portal unreachable / model unknown
|
||||
mock_or.return_value = {
|
||||
"qwen/qwen3.6-plus": {"context_length": 1_000_000},
|
||||
}
|
||||
|
||||
base_url = "https://inference-api.nousresearch.com/v1"
|
||||
ctx = mm.get_model_context_length(
|
||||
model="qwen3.6-plus",
|
||||
base_url=base_url,
|
||||
api_key="fake",
|
||||
provider="nous",
|
||||
)
|
||||
assert ctx == 1_000_000, "OR fallback should still serve the request"
|
||||
assert not cache_file.exists() or not yaml.safe_load(
|
||||
cache_file.read_text()
|
||||
).get("context_lengths", {}), (
|
||||
"OR-fallback values must NOT be persisted — a single portal blip "
|
||||
"would otherwise freeze the wrong value in via step-1 cache hit"
|
||||
)
|
||||
|
||||
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
def test_stale_cache_is_bypassed_and_overwritten_by_portal(
|
||||
self, mock_or, mock_portal, tmp_path, monkeypatch
|
||||
):
|
||||
"""Users upgrading from pre-fix builds have ``qwen3.6-plus@…nous… =
|
||||
1000000`` (OR-derived) sitting in their cache file. Step 1 must
|
||||
NOT short-circuit on that entry — step 5b reconciles against the
|
||||
portal and overwrites the persistent value with 262144."""
|
||||
import agent.model_metadata as mm
|
||||
cache_file = tmp_path / "context_length_cache.yaml"
|
||||
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
|
||||
|
||||
base_url = "https://inference-api.nousresearch.com/v1"
|
||||
stale_key = f"qwen3.6-plus@{base_url}"
|
||||
other_key = "other-model@https://api.openai.com/v1"
|
||||
cache_file.write_text(yaml.dump({"context_lengths": {
|
||||
stale_key: 1_000_000, # pre-fix OR-derived value
|
||||
other_key: 128_000, # unrelated, must survive
|
||||
}}))
|
||||
|
||||
mock_portal.return_value = {
|
||||
"qwen3.6-plus": {"context_length": 262_144},
|
||||
}
|
||||
mock_or.return_value = {}
|
||||
|
||||
ctx = mm.get_model_context_length(
|
||||
model="qwen3.6-plus",
|
||||
base_url=base_url,
|
||||
api_key="fake",
|
||||
provider="nous",
|
||||
)
|
||||
assert ctx == 262_144, (
|
||||
f"Stale OR-derived cache entry should not have leaked through; got {ctx}"
|
||||
)
|
||||
|
||||
remaining = yaml.safe_load(cache_file.read_text()).get("context_lengths", {})
|
||||
assert remaining.get(stale_key) == 262_144, (
|
||||
"Portal value should have overwritten the stale entry on disk"
|
||||
)
|
||||
assert remaining.get(other_key) == 128_000, (
|
||||
"Unrelated cache entries must not be touched"
|
||||
)
|
||||
|
||||
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
def test_stale_cache_survives_when_portal_unreachable(
|
||||
self, mock_or, mock_portal, tmp_path, monkeypatch
|
||||
):
|
||||
"""When the portal is unreachable AND we have a (potentially stale)
|
||||
on-disk cache entry, the entry must survive untouched — we don't
|
||||
want a transient outage to delete the only value we have. The
|
||||
request itself still gets served via OR fallback for this call."""
|
||||
import agent.model_metadata as mm
|
||||
cache_file = tmp_path / "context_length_cache.yaml"
|
||||
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
|
||||
|
||||
base_url = "https://inference-api.nousresearch.com/v1"
|
||||
existing_key = f"qwen3.6-plus@{base_url}"
|
||||
cache_file.write_text(yaml.dump({"context_lengths": {
|
||||
existing_key: 1_000_000,
|
||||
}}))
|
||||
|
||||
mock_portal.return_value = {} # portal unreachable
|
||||
mock_or.return_value = {
|
||||
"qwen/qwen3.6-plus": {"context_length": 1_000_000},
|
||||
}
|
||||
|
||||
mm.get_model_context_length(
|
||||
model="qwen3.6-plus",
|
||||
base_url=base_url,
|
||||
api_key="fake",
|
||||
provider="nous",
|
||||
)
|
||||
|
||||
remaining = yaml.safe_load(cache_file.read_text()).get("context_lengths", {})
|
||||
assert remaining.get(existing_key) == 1_000_000, (
|
||||
"Persistent cache entry must survive a transient portal outage"
|
||||
)
|
||||
|
||||
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
def test_bypass_keyed_on_url_not_provider_string(
|
||||
self, mock_or, mock_portal, tmp_path, monkeypatch
|
||||
):
|
||||
"""Some call sites pass ``provider=""`` or ``provider="openrouter"``
|
||||
when the user is really on Nous Portal (e.g. cred-pool fallback).
|
||||
The Nous-URL bypass must trigger off the URL host, not the provider
|
||||
string, so the portal-first resolver still runs in that case."""
|
||||
import agent.model_metadata as mm
|
||||
cache_file = tmp_path / "context_length_cache.yaml"
|
||||
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
|
||||
|
||||
base_url = "https://inference-api.nousresearch.com/v1"
|
||||
cache_file.write_text(yaml.dump({"context_lengths": {
|
||||
f"qwen3.6-plus@{base_url}": 1_000_000, # stale
|
||||
}}))
|
||||
|
||||
mock_portal.return_value = {
|
||||
"qwen3.6-plus": {"context_length": 262_144},
|
||||
}
|
||||
mock_or.return_value = {}
|
||||
|
||||
for provider_arg in ("", "openrouter", "custom"):
|
||||
mm._endpoint_model_metadata_cache.clear()
|
||||
mm._endpoint_model_metadata_cache_time.clear()
|
||||
ctx = mm.get_model_context_length(
|
||||
model="qwen3.6-plus",
|
||||
base_url=base_url,
|
||||
api_key="fake",
|
||||
provider=provider_arg,
|
||||
)
|
||||
assert ctx == 262_144, (
|
||||
f"URL-based Nous detection must fire for provider={provider_arg!r}; "
|
||||
f"got {ctx}"
|
||||
)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# get_model_context_length — resolution order
|
||||
# =========================================================================
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue