mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(model-normalize): pass DeepSeek V-series IDs through instead of folding to deepseek-chat
`_normalize_for_deepseek` was mapping every non-reasoner input into
`deepseek-chat` on the assumption that DeepSeek's API accepts only two
model IDs. That assumption no longer holds — `deepseek-v4-pro` and
`deepseek-v4-flash` are first-class IDs accepted by the direct API,
and on aggregators `deepseek-chat` routes explicitly to V3 (DeepInfra
backend returns `deepseek-chat-v3`). So a user picking V4 Pro through
the model picker was being silently downgraded to V3.
Verified 2026-04-24 against Nous portal's OpenAI-compat surface:
- `deepseek/deepseek-v4-flash` → provider: DeepSeek,
model: deepseek-v4-flash-20260423
- `deepseek/deepseek-chat` → provider: DeepInfra,
model: deepseek/deepseek-chat-v3
Fix:
- Add `deepseek-v4-pro` and `deepseek-v4-flash` to
`_DEEPSEEK_CANONICAL_MODELS` so exact matches pass through.
- Add `_DEEPSEEK_V_SERIES_RE` (`^deepseek-v\d+(...)?$`) so future
V-series IDs (`deepseek-v5-*`, dated variants) keep passing through
without another code change.
- Update docstring + module header to reflect the new rule.
Tests:
- New `TestDeepseekVSeriesPassThrough` — 8 parametrized cases covering
bare, vendor-prefixed, case-variant, dated, and future V-series IDs
plus end-to-end `normalize_model_for_provider(..., "deepseek")`.
- New `TestDeepseekCanonicalAndReasonerMapping` — regression coverage
for canonical pass-through, reasoner-keyword folding, and
fall-back-to-chat behaviour.
- 77/77 pass.
Reported on Discord (Ufonik, Don Piedro): `/model > Deepseek >
deepseek-v4-pro` surfaced
`Normalized 'deepseek-v4-pro' to 'deepseek-chat'`. Picker listing
showed the v4 names, so validation also rejected the post-normalize
`deepseek-chat` as "not in provider listing" — the contradiction
users saw. Normalizer now respects the picker's choice.
This commit is contained in:
parent
acd78a457e
commit
4ac731c841
2 changed files with 100 additions and 8 deletions
|
|
@ -12,8 +12,12 @@ Different LLM providers expect model identifiers in different formats:
|
||||||
model IDs, but Claude still uses hyphenated native names like
|
model IDs, but Claude still uses hyphenated native names like
|
||||||
``claude-sonnet-4-6``.
|
``claude-sonnet-4-6``.
|
||||||
- **OpenCode Go** preserves dots in model names: ``minimax-m2.7``.
|
- **OpenCode Go** preserves dots in model names: ``minimax-m2.7``.
|
||||||
- **DeepSeek** only accepts two model identifiers:
|
- **DeepSeek** accepts ``deepseek-chat`` (V3), ``deepseek-reasoner``
|
||||||
``deepseek-chat`` and ``deepseek-reasoner``.
|
(R1-family), and the first-class V-series IDs (``deepseek-v4-pro``,
|
||||||
|
``deepseek-v4-flash``, and any future ``deepseek-v<N>-*``). Older
|
||||||
|
Hermes revisions folded every non-reasoner input into
|
||||||
|
``deepseek-chat``, which on aggregators routes to V3 — so a user
|
||||||
|
picking V4 Pro was silently downgraded.
|
||||||
- **Custom** and remaining providers pass the name through as-is.
|
- **Custom** and remaining providers pass the name through as-is.
|
||||||
|
|
||||||
This module centralises that translation so callers can simply write::
|
This module centralises that translation so callers can simply write::
|
||||||
|
|
@ -25,6 +29,7 @@ Inspired by Clawdbot's ``normalizeAnthropicModelId`` pattern.
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -124,17 +129,30 @@ _DEEPSEEK_REASONER_KEYWORDS: frozenset[str] = frozenset({
|
||||||
})
|
})
|
||||||
|
|
||||||
_DEEPSEEK_CANONICAL_MODELS: frozenset[str] = frozenset({
|
_DEEPSEEK_CANONICAL_MODELS: frozenset[str] = frozenset({
|
||||||
"deepseek-chat",
|
"deepseek-chat", # V3 on DeepSeek direct and most aggregators
|
||||||
"deepseek-reasoner",
|
"deepseek-reasoner", # R1-family reasoning model
|
||||||
|
"deepseek-v4-pro", # V4 Pro — first-class model ID
|
||||||
|
"deepseek-v4-flash", # V4 Flash — first-class model ID
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# First-class V-series IDs (``deepseek-v4-pro``, ``deepseek-v4-flash``,
|
||||||
|
# future ``deepseek-v5-*``, dated variants like ``deepseek-v4-flash-20260423``).
|
||||||
|
# Verified empirically 2026-04-24: DeepSeek's Chat Completions API returns
|
||||||
|
# ``provider: DeepSeek`` / ``model: deepseek-v4-flash-20260423`` when called
|
||||||
|
# with ``model=deepseek/deepseek-v4-flash``, so these names are not aliases
|
||||||
|
# of ``deepseek-chat`` and must not be folded into it.
|
||||||
|
_DEEPSEEK_V_SERIES_RE = re.compile(r"^deepseek-v\d+([-.].+)?$")
|
||||||
|
|
||||||
|
|
||||||
def _normalize_for_deepseek(model_name: str) -> str:
|
def _normalize_for_deepseek(model_name: str) -> str:
|
||||||
"""Map any model input to one of DeepSeek's two accepted identifiers.
|
"""Map a model input to a DeepSeek-accepted identifier.
|
||||||
|
|
||||||
Rules:
|
Rules:
|
||||||
- Already ``deepseek-chat`` or ``deepseek-reasoner`` -> pass through.
|
- Already a known canonical (``deepseek-chat``/``deepseek-reasoner``/
|
||||||
- Contains any reasoner keyword (r1, think, reasoning, cot, reasoner)
|
``deepseek-v4-pro``/``deepseek-v4-flash``) -> pass through.
|
||||||
|
- Matches the V-series pattern ``deepseek-v<digit>...`` -> pass through
|
||||||
|
(covers future ``deepseek-v5-*`` and dated variants without a release).
|
||||||
|
- Contains a reasoner keyword (r1, think, reasoning, cot, reasoner)
|
||||||
-> ``deepseek-reasoner``.
|
-> ``deepseek-reasoner``.
|
||||||
- Everything else -> ``deepseek-chat``.
|
- Everything else -> ``deepseek-chat``.
|
||||||
|
|
||||||
|
|
@ -142,13 +160,17 @@ def _normalize_for_deepseek(model_name: str) -> str:
|
||||||
model_name: The bare model name (vendor prefix already stripped).
|
model_name: The bare model name (vendor prefix already stripped).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
One of ``"deepseek-chat"`` or ``"deepseek-reasoner"``.
|
A DeepSeek-accepted model identifier.
|
||||||
"""
|
"""
|
||||||
bare = _strip_vendor_prefix(model_name).lower()
|
bare = _strip_vendor_prefix(model_name).lower()
|
||||||
|
|
||||||
if bare in _DEEPSEEK_CANONICAL_MODELS:
|
if bare in _DEEPSEEK_CANONICAL_MODELS:
|
||||||
return bare
|
return bare
|
||||||
|
|
||||||
|
# V-series first-class IDs (v4-pro, v4-flash, future v5-*, dated variants)
|
||||||
|
if _DEEPSEEK_V_SERIES_RE.match(bare):
|
||||||
|
return bare
|
||||||
|
|
||||||
# Check for reasoner-like keywords anywhere in the name
|
# Check for reasoner-like keywords anywhere in the name
|
||||||
for keyword in _DEEPSEEK_REASONER_KEYWORDS:
|
for keyword in _DEEPSEEK_REASONER_KEYWORDS:
|
||||||
if keyword in bare:
|
if keyword in bare:
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ from hermes_cli.model_normalize import (
|
||||||
normalize_model_for_provider,
|
normalize_model_for_provider,
|
||||||
_DOT_TO_HYPHEN_PROVIDERS,
|
_DOT_TO_HYPHEN_PROVIDERS,
|
||||||
_AGGREGATOR_PROVIDERS,
|
_AGGREGATOR_PROVIDERS,
|
||||||
|
_normalize_for_deepseek,
|
||||||
detect_vendor,
|
detect_vendor,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -191,3 +192,72 @@ class TestDetectVendor:
|
||||||
])
|
])
|
||||||
def test_detects_known_vendors(self, model, expected):
|
def test_detects_known_vendors(self, model, expected):
|
||||||
assert detect_vendor(model) == expected
|
assert detect_vendor(model) == expected
|
||||||
|
|
||||||
|
|
||||||
|
# ── DeepSeek V-series pass-through (bug: V4 models silently folded to V3) ──
|
||||||
|
|
||||||
|
class TestDeepseekVSeriesPassThrough:
|
||||||
|
"""DeepSeek's V-series IDs (``deepseek-v4-pro``, ``deepseek-v4-flash``,
|
||||||
|
and future ``deepseek-v<N>-*`` variants) are first-class model IDs
|
||||||
|
accepted directly by DeepSeek's Chat Completions API. Earlier code
|
||||||
|
folded every non-reasoner name into ``deepseek-chat``, which on
|
||||||
|
aggregators (Nous portal, OpenRouter via DeepInfra) routes to V3 —
|
||||||
|
silently downgrading users who picked V4.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("model", [
|
||||||
|
"deepseek-v4-pro",
|
||||||
|
"deepseek-v4-flash",
|
||||||
|
"deepseek/deepseek-v4-pro", # vendor-prefixed
|
||||||
|
"deepseek/deepseek-v4-flash",
|
||||||
|
"DeepSeek-V4-Pro", # case-insensitive
|
||||||
|
"deepseek-v4-flash-20260423", # dated variant
|
||||||
|
"deepseek-v5-pro", # future V-series
|
||||||
|
"deepseek-v10-ultra", # double-digit future
|
||||||
|
])
|
||||||
|
def test_v_series_passes_through(self, model):
|
||||||
|
expected = model.split("/", 1)[-1].lower()
|
||||||
|
assert _normalize_for_deepseek(model) == expected
|
||||||
|
|
||||||
|
def test_deepseek_provider_preserves_v4_pro(self):
|
||||||
|
"""End-to-end via normalize_model_for_provider — user selecting
|
||||||
|
V4 Pro must reach DeepSeek's API as V4 Pro, not V3 alias."""
|
||||||
|
result = normalize_model_for_provider("deepseek-v4-pro", "deepseek")
|
||||||
|
assert result == "deepseek-v4-pro"
|
||||||
|
|
||||||
|
def test_deepseek_provider_preserves_v4_flash(self):
|
||||||
|
result = normalize_model_for_provider("deepseek-v4-flash", "deepseek")
|
||||||
|
assert result == "deepseek-v4-flash"
|
||||||
|
|
||||||
|
|
||||||
|
# ── DeepSeek regressions (existing behaviour still holds) ──────────────
|
||||||
|
|
||||||
|
class TestDeepseekCanonicalAndReasonerMapping:
|
||||||
|
"""Canonical pass-through and reasoner-keyword folding stay intact."""
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("model,expected", [
|
||||||
|
("deepseek-chat", "deepseek-chat"),
|
||||||
|
("deepseek-reasoner", "deepseek-reasoner"),
|
||||||
|
("DEEPSEEK-CHAT", "deepseek-chat"),
|
||||||
|
])
|
||||||
|
def test_canonical_models_pass_through(self, model, expected):
|
||||||
|
assert _normalize_for_deepseek(model) == expected
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("model", [
|
||||||
|
"deepseek-r1",
|
||||||
|
"deepseek-r1-0528",
|
||||||
|
"deepseek-think-v3",
|
||||||
|
"deepseek-reasoning-preview",
|
||||||
|
"deepseek-cot-experimental",
|
||||||
|
])
|
||||||
|
def test_reasoner_keywords_map_to_reasoner(self, model):
|
||||||
|
assert _normalize_for_deepseek(model) == "deepseek-reasoner"
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("model", [
|
||||||
|
"deepseek-chat-v3.1", # 'chat' prefix, not V-series pattern
|
||||||
|
"unknown-model",
|
||||||
|
"something-random",
|
||||||
|
"gpt-5", # non-DeepSeek names still fall through
|
||||||
|
])
|
||||||
|
def test_unknown_names_fall_back_to_chat(self, model):
|
||||||
|
assert _normalize_for_deepseek(model) == "deepseek-chat"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue