mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
fix(agent): comprehensive DeepSeek V4 support — context windows, thinking mode, reasoning replay
Unifies approaches from PRs #14952, #14958, #15325, #15228, #15354 into a single cohesive implementation: - Add 1M context window entries for V4 models (deepseek-v4-pro, deepseek-v4-flash, deepseek-chat, deepseek-reasoner) - Plumb thinking.type toggle and reasoning_effort mapping for native DeepSeek API (only "high" and "max" are valid) - Strip incompatible sampling params when thinking is enabled - Inject reasoning_content="" on all assistant messages for DeepSeek replay (scoped to api.deepseek.com and OpenRouter) - Fix _extract_reasoning isinstance checks for empty strings - Preserve empty-string reasoning_content in normalize_response - Add _copy_reasoning_content_for_api call in _handle_max_iterations Fixes #15353. Supersedes #14952, #14958, #15325, #15228, #15354.
This commit is contained in:
parent
00c3d848d8
commit
1d38b0f888
4 changed files with 336 additions and 10 deletions
|
|
@ -162,8 +162,12 @@ DEFAULT_CONTEXT_LENGTHS = {
|
|||
"gemma-4-31b": 256000,
|
||||
"gemma-3": 131072,
|
||||
"gemma": 8192, # fallback for older gemma models
|
||||
# DeepSeek
|
||||
"deepseek": 128000,
|
||||
# DeepSeek — V4 family supports 1M context (api.deepseek.com docs)
|
||||
"deepseek-v4-pro": 1000000,
|
||||
"deepseek-v4-flash": 1000000,
|
||||
"deepseek-chat": 1000000,
|
||||
"deepseek-reasoner": 1000000,
|
||||
"deepseek": 128000, # fallback for older/unrecognised DeepSeek models
|
||||
# Meta
|
||||
"llama": 131072,
|
||||
# Qwen — specific model families before the catch-all.
|
||||
|
|
|
|||
|
|
@ -239,6 +239,30 @@ class ChatCompletionsTransport(ProviderTransport):
|
|||
"type": "enabled" if _kimi_thinking_enabled else "disabled",
|
||||
}
|
||||
|
||||
# DeepSeek: thinking mode toggle and effort mapping
|
||||
is_deepseek = params.get("is_deepseek", False)
|
||||
if is_deepseek:
|
||||
_ds_thinking_enabled = True
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
if reasoning_config.get("enabled") is False:
|
||||
_ds_thinking_enabled = False
|
||||
if _ds_thinking_enabled:
|
||||
# DeepSeek only supports "high" and "max" effort values.
|
||||
# Map low/medium/high → "high", xhigh/max → "max".
|
||||
_ds_effort = "high"
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
_e = (reasoning_config.get("effort") or "").strip().lower()
|
||||
if _e in ("xhigh", "max"):
|
||||
_ds_effort = "max"
|
||||
extra_body["thinking"] = {"type": "enabled", "budget_tokens": 8192}
|
||||
api_kwargs["reasoning_effort"] = _ds_effort
|
||||
# DeepSeek rejects temperature/top_p/presence_penalty/
|
||||
# frequency_penalty when thinking is enabled.
|
||||
for _k in ("temperature", "top_p", "presence_penalty", "frequency_penalty"):
|
||||
api_kwargs.pop(_k, None)
|
||||
else:
|
||||
extra_body["thinking"] = {"type": "disabled"}
|
||||
|
||||
# Reasoning
|
||||
if params.get("supports_reasoning", False):
|
||||
if is_github_models:
|
||||
|
|
@ -347,7 +371,7 @@ class ChatCompletionsTransport(ProviderTransport):
|
|||
reasoning_content = getattr(msg, "reasoning_content", None)
|
||||
|
||||
provider_data: Dict[str, Any] = {}
|
||||
if reasoning_content:
|
||||
if reasoning_content is not None:
|
||||
provider_data["reasoning_content"] = reasoning_content
|
||||
rd = getattr(msg, "reasoning_details", None)
|
||||
if rd:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue