mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-15 04:12:25 +00:00
Merge branch 'main' into compaction-secrets-preservation
This commit is contained in:
commit
ef32968408
247 changed files with 13395 additions and 2874 deletions
|
|
@ -1230,9 +1230,10 @@ def build_anthropic_kwargs(
|
|||
When *base_url* points to a third-party Anthropic-compatible endpoint,
|
||||
thinking block signatures are stripped (they are Anthropic-proprietary).
|
||||
|
||||
When *fast_mode* is True, adds ``speed: "fast"`` and the fast-mode beta
|
||||
header for ~2.5x faster output throughput on Opus 4.6. Currently only
|
||||
supported on native Anthropic endpoints (not third-party compatible ones).
|
||||
When *fast_mode* is True, adds ``extra_body["speed"] = "fast"`` and the
|
||||
fast-mode beta header for ~2.5x faster output throughput on Opus 4.6.
|
||||
Currently only supported on native Anthropic endpoints (not third-party
|
||||
compatible ones).
|
||||
"""
|
||||
system, anthropic_messages = convert_messages_to_anthropic(messages, base_url=base_url)
|
||||
anthropic_tools = convert_tools_to_anthropic(tools) if tools else []
|
||||
|
|
@ -1333,11 +1334,11 @@ def build_anthropic_kwargs(
|
|||
kwargs["max_tokens"] = max(effective_max_tokens, budget + 4096)
|
||||
|
||||
# ── Fast mode (Opus 4.6 only) ────────────────────────────────────
|
||||
# Adds speed:"fast" + the fast-mode beta header for ~2.5x output speed.
|
||||
# Only for native Anthropic endpoints — third-party providers would
|
||||
# reject the unknown beta header and speed parameter.
|
||||
# Adds extra_body.speed="fast" + the fast-mode beta header for ~2.5x
|
||||
# output speed. Only for native Anthropic endpoints — third-party
|
||||
# providers would reject the unknown beta header and speed parameter.
|
||||
if fast_mode and not _is_third_party_anthropic_endpoint(base_url):
|
||||
kwargs["speed"] = "fast"
|
||||
kwargs.setdefault("extra_body", {})["speed"] = "fast"
|
||||
# Build extra_headers with ALL applicable betas (the per-request
|
||||
# extra_headers override the client-level anthropic-beta header).
|
||||
betas = list(_common_betas_for_base_url(base_url))
|
||||
|
|
|
|||
|
|
@ -64,6 +64,8 @@ _PROVIDER_ALIASES = {
|
|||
"zhipu": "zai",
|
||||
"kimi": "kimi-coding",
|
||||
"moonshot": "kimi-coding",
|
||||
"kimi-cn": "kimi-coding-cn",
|
||||
"moonshot-cn": "kimi-coding-cn",
|
||||
"minimax-china": "minimax-cn",
|
||||
"minimax_cn": "minimax-cn",
|
||||
"claude": "anthropic",
|
||||
|
|
@ -94,6 +96,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
|||
"gemini": "gemini-3-flash-preview",
|
||||
"zai": "glm-4.5-flash",
|
||||
"kimi-coding": "kimi-k2-turbo-preview",
|
||||
"kimi-coding-cn": "kimi-k2-turbo-preview",
|
||||
"minimax": "MiniMax-M2.7",
|
||||
"minimax-cn": "MiniMax-M2.7",
|
||||
"anthropic": "claude-haiku-4-5-20251001",
|
||||
|
|
@ -1220,6 +1223,12 @@ def _to_async_client(sync_client, model: str):
|
|||
return AsyncCodexAuxiliaryClient(sync_client), model
|
||||
if isinstance(sync_client, AnthropicAuxiliaryClient):
|
||||
return AsyncAnthropicAuxiliaryClient(sync_client), model
|
||||
try:
|
||||
from agent.copilot_acp_client import CopilotACPClient
|
||||
if isinstance(sync_client, CopilotACPClient):
|
||||
return sync_client, model
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
async_kwargs = {
|
||||
"api_key": sync_client.api_key,
|
||||
|
|
@ -1438,10 +1447,14 @@ def resolve_provider_client(
|
|||
custom_entry = _get_named_custom_provider(provider)
|
||||
if custom_entry:
|
||||
custom_base = custom_entry.get("base_url", "").strip()
|
||||
custom_key = custom_entry.get("api_key", "").strip() or "no-key-required"
|
||||
custom_key = custom_entry.get("api_key", "").strip()
|
||||
custom_key_env = custom_entry.get("key_env", "").strip()
|
||||
if not custom_key and custom_key_env:
|
||||
custom_key = os.getenv(custom_key_env, "").strip()
|
||||
custom_key = custom_key or "no-key-required"
|
||||
if custom_base:
|
||||
final_model = _normalize_resolved_model(
|
||||
model or _read_main_model() or "gpt-4o-mini",
|
||||
model or custom_entry.get("model") or _read_main_model() or "gpt-4o-mini",
|
||||
provider,
|
||||
)
|
||||
client = OpenAI(api_key=custom_key, base_url=custom_base)
|
||||
|
|
@ -1460,7 +1473,11 @@ def resolve_provider_client(
|
|||
|
||||
# ── API-key providers from PROVIDER_REGISTRY ─────────────────────
|
||||
try:
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY, resolve_api_key_provider_credentials
|
||||
from hermes_cli.auth import (
|
||||
PROVIDER_REGISTRY,
|
||||
resolve_api_key_provider_credentials,
|
||||
resolve_external_process_provider_credentials,
|
||||
)
|
||||
except ImportError:
|
||||
logger.debug("hermes_cli.auth not available for provider %s", provider)
|
||||
return None, None
|
||||
|
|
@ -1534,6 +1551,41 @@ def resolve_provider_client(
|
|||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
if pconfig.auth_type == "external_process":
|
||||
creds = resolve_external_process_provider_credentials(provider)
|
||||
final_model = _normalize_resolved_model(model or _read_main_model(), provider)
|
||||
if provider == "copilot-acp":
|
||||
api_key = str(creds.get("api_key", "")).strip()
|
||||
base_url = str(creds.get("base_url", "")).strip()
|
||||
command = str(creds.get("command", "")).strip() or None
|
||||
args = list(creds.get("args") or [])
|
||||
if not final_model:
|
||||
logger.warning(
|
||||
"resolve_provider_client: copilot-acp requested but no model "
|
||||
"was provided or configured"
|
||||
)
|
||||
return None, None
|
||||
if not api_key or not base_url:
|
||||
logger.warning(
|
||||
"resolve_provider_client: copilot-acp requested but external "
|
||||
"process credentials are incomplete"
|
||||
)
|
||||
return None, None
|
||||
from agent.copilot_acp_client import CopilotACPClient
|
||||
|
||||
client = CopilotACPClient(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
command=command,
|
||||
args=args,
|
||||
)
|
||||
logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
logger.warning("resolve_provider_client: external-process provider %s not "
|
||||
"directly supported", provider)
|
||||
return None, None
|
||||
|
||||
elif pconfig.auth_type in ("oauth_device_code", "oauth_external"):
|
||||
# OAuth providers — route through their specific try functions
|
||||
if provider == "nous":
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ Lifecycle:
|
|||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Dict, List
|
||||
|
||||
|
||||
class ContextEngine(ABC):
|
||||
|
|
|
|||
|
|
@ -18,7 +18,6 @@ import hermes_cli.auth as auth_mod
|
|||
from hermes_cli.auth import (
|
||||
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
|
||||
DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
|
||||
KIMI_CODE_BASE_URL,
|
||||
PROVIDER_REGISTRY,
|
||||
_auth_store_lock,
|
||||
_codex_access_token_is_expiring,
|
||||
|
|
@ -289,6 +288,14 @@ def _iter_custom_providers(config: Optional[dict] = None):
|
|||
return
|
||||
custom_providers = config.get("custom_providers")
|
||||
if not isinstance(custom_providers, list):
|
||||
# Fall back to the v12+ providers dict via the compatibility layer
|
||||
try:
|
||||
from hermes_cli.config import get_compatible_custom_providers
|
||||
|
||||
custom_providers = get_compatible_custom_providers(config)
|
||||
except Exception:
|
||||
return
|
||||
if not custom_providers:
|
||||
return
|
||||
for entry in custom_providers:
|
||||
if not isinstance(entry, dict):
|
||||
|
|
|
|||
|
|
@ -77,12 +77,6 @@ def _diff_ansi() -> dict[str, str]:
|
|||
return _diff_colors_cached
|
||||
|
||||
|
||||
def reset_diff_colors() -> None:
|
||||
"""Reset cached diff colors (call after /skin switch)."""
|
||||
global _diff_colors_cached
|
||||
_diff_colors_cached = None
|
||||
|
||||
|
||||
# Module-level helpers — each call resolves from the active skin lazily.
|
||||
def _diff_dim(): return _diff_ansi()["dim"]
|
||||
def _diff_file(): return _diff_ansi()["file"]
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@ from __future__ import annotations
|
|||
|
||||
import enum
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
|
|
@ -157,6 +156,18 @@ _CONTEXT_OVERFLOW_PATTERNS = [
|
|||
"prompt exceeds max length",
|
||||
"max_tokens",
|
||||
"maximum number of tokens",
|
||||
# vLLM / local inference server patterns
|
||||
"exceeds the max_model_len",
|
||||
"max_model_len",
|
||||
"prompt length", # "engine prompt length X exceeds"
|
||||
"input is too long",
|
||||
"maximum model length",
|
||||
# Ollama patterns
|
||||
"context length exceeded",
|
||||
"truncating input",
|
||||
# llama.cpp / llama-server patterns
|
||||
"slot context", # "slot context: N tokens, prompt N tokens"
|
||||
"n_ctx_slot",
|
||||
# Chinese error messages (some providers return these)
|
||||
"超过最大长度",
|
||||
"上下文长度",
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@ from agent.usage_pricing import (
|
|||
DEFAULT_PRICING,
|
||||
estimate_usage_cost,
|
||||
format_duration_compact,
|
||||
get_pricing,
|
||||
has_known_pricing,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -28,7 +28,6 @@ Usage in run_agent.py:
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ and run_agent.py for pre-flight context checks.
|
|||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
|
@ -24,17 +23,19 @@ logger = logging.getLogger(__name__)
|
|||
# are preserved so the full model name reaches cache lookups and server queries.
|
||||
_PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
||||
"openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
|
||||
"gemini", "zai", "kimi-coding", "minimax", "minimax-cn", "anthropic", "deepseek",
|
||||
"gemini", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
|
||||
"opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
|
||||
"qwen-oauth",
|
||||
"xiaomi",
|
||||
"arcee",
|
||||
"custom", "local",
|
||||
# Common aliases
|
||||
"google", "google-gemini", "google-ai-studio",
|
||||
"glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
|
||||
"github-models", "kimi", "moonshot", "claude", "deep-seek",
|
||||
"github-models", "kimi", "moonshot", "kimi-cn", "moonshot-cn", "claude", "deep-seek",
|
||||
"opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
|
||||
"mimo", "xiaomi-mimo",
|
||||
"arcee-ai", "arceeai",
|
||||
"qwen-portal",
|
||||
})
|
||||
|
||||
|
|
@ -105,9 +106,15 @@ DEFAULT_CONTEXT_LENGTHS = {
|
|||
"claude-sonnet-4.6": 1000000,
|
||||
# Catch-all for older Claude models (must sort after specific entries)
|
||||
"claude": 200000,
|
||||
# OpenAI
|
||||
# OpenAI — GPT-5 family (most have 400k; specific overrides first)
|
||||
# Source: https://developers.openai.com/api/docs/models
|
||||
"gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4)
|
||||
"gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4)
|
||||
"gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context)
|
||||
"gpt-5.3-codex-spark": 128000, # Spark variant has reduced 128k context
|
||||
"gpt-5.1-chat": 128000, # Chat variant has 128k context
|
||||
"gpt-5": 400000, # GPT-5.x base, mini, codex variants (400k)
|
||||
"gpt-4.1": 1047576,
|
||||
"gpt-5": 128000,
|
||||
"gpt-4": 128000,
|
||||
# Google
|
||||
"gemini": 1048576,
|
||||
|
|
@ -149,6 +156,8 @@ DEFAULT_CONTEXT_LENGTHS = {
|
|||
"kimi": 262144,
|
||||
# Arcee
|
||||
"trinity": 262144,
|
||||
# OpenRouter
|
||||
"elephant": 262144,
|
||||
# Hugging Face Inference Providers — model IDs use org/name format
|
||||
"Qwen/Qwen3.5-397B-A17B": 131072,
|
||||
"Qwen/Qwen3.5-35B-A3B": 131072,
|
||||
|
|
@ -211,7 +220,9 @@ _URL_TO_PROVIDER: Dict[str, str] = {
|
|||
"api.anthropic.com": "anthropic",
|
||||
"api.z.ai": "zai",
|
||||
"api.moonshot.ai": "kimi-coding",
|
||||
"api.moonshot.cn": "kimi-coding-cn",
|
||||
"api.kimi.com": "kimi-coding",
|
||||
"api.arcee.ai": "arcee",
|
||||
"api.minimax": "minimax",
|
||||
"dashscope.aliyuncs.com": "alibaba",
|
||||
"dashscope-intl.aliyuncs.com": "alibaba",
|
||||
|
|
|
|||
|
|
@ -18,10 +18,8 @@ Other modules should import the dataclasses and query functions from here
|
|||
rather than parsing the raw JSON themselves.
|
||||
"""
|
||||
|
||||
import difflib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
|
@ -148,6 +146,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
|
|||
"openai-codex": "openai",
|
||||
"zai": "zai",
|
||||
"kimi-coding": "kimi-for-coding",
|
||||
"kimi-coding-cn": "kimi-for-coding",
|
||||
"minimax": "minimax",
|
||||
"minimax-cn": "minimax-cn",
|
||||
"deepseek": "deepseek",
|
||||
|
|
@ -176,13 +175,6 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
|
|||
_MODELS_DEV_TO_PROVIDER: Optional[Dict[str, str]] = None
|
||||
|
||||
|
||||
def _get_reverse_mapping() -> Dict[str, str]:
|
||||
"""Return models.dev ID → Hermes provider ID mapping."""
|
||||
global _MODELS_DEV_TO_PROVIDER
|
||||
if _MODELS_DEV_TO_PROVIDER is None:
|
||||
_MODELS_DEV_TO_PROVIDER = {v: k for k, v in PROVIDER_TO_MODELS_DEV.items()}
|
||||
return _MODELS_DEV_TO_PROVIDER
|
||||
|
||||
|
||||
def _get_cache_path() -> Path:
|
||||
"""Return path to disk cache file."""
|
||||
|
|
@ -463,93 +455,6 @@ def list_agentic_models(provider: str) -> List[str]:
|
|||
return result
|
||||
|
||||
|
||||
def search_models_dev(
|
||||
query: str, provider: str = None, limit: int = 5
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Fuzzy search across models.dev catalog. Returns matching model entries.
|
||||
|
||||
Args:
|
||||
query: Search string to match against model IDs.
|
||||
provider: Optional Hermes provider ID to restrict search scope.
|
||||
If None, searches across all providers in PROVIDER_TO_MODELS_DEV.
|
||||
limit: Maximum number of results to return.
|
||||
|
||||
Returns:
|
||||
List of dicts, each containing 'provider', 'model_id', and the full
|
||||
model 'entry' from models.dev.
|
||||
"""
|
||||
data = fetch_models_dev()
|
||||
if not data:
|
||||
return []
|
||||
|
||||
# Build list of (provider_id, model_id, entry) candidates
|
||||
candidates: List[tuple] = []
|
||||
|
||||
if provider is not None:
|
||||
# Search only the specified provider
|
||||
mdev_provider_id = PROVIDER_TO_MODELS_DEV.get(provider)
|
||||
if not mdev_provider_id:
|
||||
return []
|
||||
provider_data = data.get(mdev_provider_id, {})
|
||||
if isinstance(provider_data, dict):
|
||||
models = provider_data.get("models", {})
|
||||
if isinstance(models, dict):
|
||||
for mid, mdata in models.items():
|
||||
candidates.append((provider, mid, mdata))
|
||||
else:
|
||||
# Search across all mapped providers
|
||||
for hermes_prov, mdev_prov in PROVIDER_TO_MODELS_DEV.items():
|
||||
provider_data = data.get(mdev_prov, {})
|
||||
if isinstance(provider_data, dict):
|
||||
models = provider_data.get("models", {})
|
||||
if isinstance(models, dict):
|
||||
for mid, mdata in models.items():
|
||||
candidates.append((hermes_prov, mid, mdata))
|
||||
|
||||
if not candidates:
|
||||
return []
|
||||
|
||||
# Use difflib for fuzzy matching — case-insensitive comparison
|
||||
model_ids_lower = [c[1].lower() for c in candidates]
|
||||
query_lower = query.lower()
|
||||
|
||||
# First try exact substring matches (more intuitive than pure edit-distance)
|
||||
substring_matches = []
|
||||
for prov, mid, mdata in candidates:
|
||||
if query_lower in mid.lower():
|
||||
substring_matches.append({"provider": prov, "model_id": mid, "entry": mdata})
|
||||
|
||||
# Then add difflib fuzzy matches for any remaining slots
|
||||
fuzzy_ids = difflib.get_close_matches(
|
||||
query_lower, model_ids_lower, n=limit * 2, cutoff=0.4
|
||||
)
|
||||
|
||||
seen_ids: set = set()
|
||||
results: List[Dict[str, Any]] = []
|
||||
|
||||
# Prioritize substring matches
|
||||
for match in substring_matches:
|
||||
key = (match["provider"], match["model_id"])
|
||||
if key not in seen_ids:
|
||||
seen_ids.add(key)
|
||||
results.append(match)
|
||||
if len(results) >= limit:
|
||||
return results
|
||||
|
||||
# Add fuzzy matches
|
||||
for fid in fuzzy_ids:
|
||||
# Find original-case candidates matching this lowered ID
|
||||
for prov, mid, mdata in candidates:
|
||||
if mid.lower() == fid:
|
||||
key = (prov, mid)
|
||||
if key not in seen_ids:
|
||||
seen_ids.add(key)
|
||||
results.append({"provider": prov, "model_id": mid, "entry": mdata})
|
||||
if len(results) >= limit:
|
||||
return results
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Rich dataclass constructors — parse raw models.dev JSON into dataclasses
|
||||
|
|
|
|||
|
|
@ -376,6 +376,12 @@ PLATFORM_HINTS = {
|
|||
"downloaded and sent as native photos. Do NOT tell the user you lack file-sending "
|
||||
"capability — use MEDIA: syntax whenever a file delivery is appropriate."
|
||||
),
|
||||
"qqbot": (
|
||||
"You are on QQ, a popular Chinese messaging platform. QQ supports markdown formatting "
|
||||
"and emoji. You can send media files natively: include MEDIA:/absolute/path/to/file in "
|
||||
"your response. Images are sent as native photos, and other files arrive as downloadable "
|
||||
"documents."
|
||||
),
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ from __future__ import annotations
|
|||
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, Mapping, Optional
|
||||
from typing import Any, Mapping, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
|
|||
|
|
@ -575,25 +575,6 @@ def has_known_pricing(
|
|||
return entry is not None
|
||||
|
||||
|
||||
def get_pricing(
|
||||
model_name: str,
|
||||
provider: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
) -> Dict[str, float]:
|
||||
"""Backward-compatible thin wrapper for legacy callers.
|
||||
|
||||
Returns only non-cache input/output fields when a pricing entry exists.
|
||||
Unknown routes return zeroes.
|
||||
"""
|
||||
entry = get_pricing_entry(model_name, provider=provider, base_url=base_url, api_key=api_key)
|
||||
if not entry:
|
||||
return {"input": 0.0, "output": 0.0}
|
||||
return {
|
||||
"input": float(entry.input_cost_per_million or _ZERO),
|
||||
"output": float(entry.output_cost_per_million or _ZERO),
|
||||
}
|
||||
|
||||
|
||||
def format_duration_compact(seconds: float) -> str:
|
||||
if seconds < 60:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue