mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Merge pull request #2110 from NousResearch/hermes/hermes-5d6932ba
fix: session reset + custom provider model switch + honcho base_url
This commit is contained in:
commit
b7b585656b
11 changed files with 214 additions and 28 deletions
|
|
@ -206,11 +206,11 @@ PLATFORM_HINTS = {
|
|||
"contextually appropriate."
|
||||
),
|
||||
"cron": (
|
||||
"You are running as a scheduled cron job. Your final response is automatically "
|
||||
"delivered to the job's configured destination, so do not use send_message to "
|
||||
"send to that same target again. If you want the user to receive something in "
|
||||
"the scheduled destination, put it directly in your final response. Use "
|
||||
"send_message only for additional or different targets."
|
||||
"You are running as a scheduled cron job. There is no user present — you "
|
||||
"cannot ask questions, request clarification, or wait for follow-up. Execute "
|
||||
"the task fully and autonomously, making reasonable decisions where needed. "
|
||||
"Your final response is automatically delivered to the job's configured "
|
||||
"destination — put the primary content directly in your response."
|
||||
),
|
||||
"cli": (
|
||||
"You are a CLI AI Agent. Try not to use markdown but simple text "
|
||||
|
|
|
|||
20
cli.py
20
cli.py
|
|
@ -3517,8 +3517,17 @@ class HermesCLI:
|
|||
# Parse provider:model syntax (e.g. "openrouter:anthropic/claude-sonnet-4.5")
|
||||
current_provider = self.provider or self.requested_provider or "openrouter"
|
||||
target_provider, new_model = parse_model_input(raw_input, current_provider)
|
||||
# Auto-detect provider when no explicit provider:model syntax was used
|
||||
if target_provider == current_provider:
|
||||
# Auto-detect provider when no explicit provider:model syntax was used.
|
||||
# Skip auto-detection for custom providers — the model name might
|
||||
# coincidentally match a known provider's catalog, but the user
|
||||
# intends to use it on their custom endpoint. Require explicit
|
||||
# provider:model syntax (e.g. /model openai-codex:gpt-5.2-codex)
|
||||
# to switch away from a custom endpoint.
|
||||
_base = self.base_url or ""
|
||||
is_custom = current_provider == "custom" or (
|
||||
"localhost" in _base or "127.0.0.1" in _base
|
||||
)
|
||||
if target_provider == current_provider and not is_custom:
|
||||
from hermes_cli.models import detect_provider_for_model
|
||||
detected = detect_provider_for_model(new_model, current_provider)
|
||||
if detected:
|
||||
|
|
@ -3586,6 +3595,13 @@ class HermesCLI:
|
|||
if message:
|
||||
print(f" Reason: {message}")
|
||||
print(" Note: Model will revert on restart. Use a verified model to save to config.")
|
||||
|
||||
# Helpful hint when staying on a custom endpoint
|
||||
if is_custom and not provider_changed:
|
||||
endpoint = self.base_url or "custom endpoint"
|
||||
print(f" Endpoint: {endpoint}")
|
||||
print(f" Tip: To switch providers, use /model provider:model")
|
||||
print(f" e.g. /model openai-codex:gpt-5.2-codex")
|
||||
else:
|
||||
self._show_model_and_providers()
|
||||
elif canonical == "provider":
|
||||
|
|
|
|||
|
|
@ -391,7 +391,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
|||
providers_ignored=pr.get("ignore"),
|
||||
providers_order=pr.get("order"),
|
||||
provider_sort=pr.get("sort"),
|
||||
disabled_toolsets=["cronjob"],
|
||||
disabled_toolsets=["cronjob", "messaging", "clarify"],
|
||||
quiet_mode=True,
|
||||
platform="cron",
|
||||
session_id=f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}",
|
||||
|
|
|
|||
|
|
@ -670,6 +670,11 @@ OPTIONAL_ENV_VARS = {
|
|||
"password": True,
|
||||
"category": "tool",
|
||||
},
|
||||
"HONCHO_BASE_URL": {
|
||||
"description": "Base URL for self-hosted Honcho instances (no API key needed)",
|
||||
"prompt": "Honcho base URL (e.g. http://localhost:8000)",
|
||||
"category": "tool",
|
||||
},
|
||||
|
||||
# ── Messaging platforms ──
|
||||
"TELEGRAM_BOT_TOKEN": {
|
||||
|
|
|
|||
|
|
@ -24,6 +24,18 @@ def _normalize_custom_provider_name(value: str) -> str:
|
|||
return value.strip().lower().replace(" ", "-")
|
||||
|
||||
|
||||
def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
|
||||
"""Auto-detect api_mode from the resolved base URL.
|
||||
|
||||
Direct api.openai.com endpoints need the Responses API for GPT-5.x
|
||||
tool calls with reasoning (chat/completions returns 400).
|
||||
"""
|
||||
normalized = (base_url or "").strip().lower().rstrip("/")
|
||||
if "api.openai.com" in normalized and "openrouter" not in normalized:
|
||||
return "codex_responses"
|
||||
return None
|
||||
|
||||
|
||||
def _auto_detect_local_model(base_url: str) -> str:
|
||||
"""Query a local server for its model name when only one model is loaded."""
|
||||
if not base_url:
|
||||
|
|
@ -185,7 +197,9 @@ def _resolve_named_custom_runtime(
|
|||
|
||||
return {
|
||||
"provider": "openrouter",
|
||||
"api_mode": custom_provider.get("api_mode", "chat_completions"),
|
||||
"api_mode": custom_provider.get("api_mode")
|
||||
or _detect_api_mode_for_url(base_url)
|
||||
or "chat_completions",
|
||||
"base_url": base_url,
|
||||
"api_key": api_key,
|
||||
"source": f"custom_provider:{custom_provider.get('name', requested_provider)}",
|
||||
|
|
@ -263,7 +277,9 @@ def _resolve_openrouter_runtime(
|
|||
|
||||
return {
|
||||
"provider": "openrouter",
|
||||
"api_mode": _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions",
|
||||
"api_mode": _parse_api_mode(model_cfg.get("api_mode"))
|
||||
or _detect_api_mode_for_url(base_url)
|
||||
or "chat_completions",
|
||||
"base_url": base_url,
|
||||
"api_key": api_key,
|
||||
"source": source,
|
||||
|
|
|
|||
|
|
@ -117,11 +117,13 @@ class HonchoClientConfig:
|
|||
def from_env(cls, workspace_id: str = "hermes") -> HonchoClientConfig:
|
||||
"""Create config from environment variables (fallback)."""
|
||||
api_key = os.environ.get("HONCHO_API_KEY")
|
||||
base_url = os.environ.get("HONCHO_BASE_URL", "").strip() or None
|
||||
return cls(
|
||||
workspace_id=workspace_id,
|
||||
api_key=api_key,
|
||||
environment=os.environ.get("HONCHO_ENVIRONMENT", "production"),
|
||||
enabled=bool(api_key),
|
||||
base_url=base_url,
|
||||
enabled=bool(api_key or base_url),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
|
|
@ -171,8 +173,14 @@ class HonchoClientConfig:
|
|||
or raw.get("environment", "production")
|
||||
)
|
||||
|
||||
# Auto-enable when API key is present (unless explicitly disabled)
|
||||
# Host-level enabled wins, then root-level, then auto-enable if key exists.
|
||||
base_url = (
|
||||
raw.get("baseUrl")
|
||||
or os.environ.get("HONCHO_BASE_URL", "").strip()
|
||||
or None
|
||||
)
|
||||
|
||||
# Auto-enable when API key or base_url is present (unless explicitly disabled)
|
||||
# Host-level enabled wins, then root-level, then auto-enable if key/url exists.
|
||||
host_enabled = host_block.get("enabled")
|
||||
root_enabled = raw.get("enabled")
|
||||
if host_enabled is not None:
|
||||
|
|
@ -180,8 +188,8 @@ class HonchoClientConfig:
|
|||
elif root_enabled is not None:
|
||||
enabled = root_enabled
|
||||
else:
|
||||
# Not explicitly set anywhere -> auto-enable if API key exists
|
||||
enabled = bool(api_key)
|
||||
# Not explicitly set anywhere -> auto-enable if API key or base_url exists
|
||||
enabled = bool(api_key or base_url)
|
||||
|
||||
# write_frequency: accept int or string
|
||||
raw_wf = (
|
||||
|
|
@ -214,6 +222,7 @@ class HonchoClientConfig:
|
|||
workspace_id=workspace,
|
||||
api_key=api_key,
|
||||
environment=environment,
|
||||
base_url=base_url,
|
||||
peer_name=host_block.get("peerName") or raw.get("peerName"),
|
||||
ai_peer=ai_peer,
|
||||
linked_hosts=linked_hosts,
|
||||
|
|
@ -348,11 +357,12 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
|
|||
if config is None:
|
||||
config = HonchoClientConfig.from_global_config()
|
||||
|
||||
if not config.api_key:
|
||||
if not config.api_key and not config.base_url:
|
||||
raise ValueError(
|
||||
"Honcho API key not found. "
|
||||
"Get your API key at https://app.honcho.dev, "
|
||||
"then run 'hermes honcho setup' or set HONCHO_API_KEY."
|
||||
"then run 'hermes honcho setup' or set HONCHO_API_KEY. "
|
||||
"For local instances, set HONCHO_BASE_URL instead."
|
||||
)
|
||||
|
||||
try:
|
||||
|
|
|
|||
24
run_agent.py
24
run_agent.py
|
|
@ -501,6 +501,12 @@ class AIAgent:
|
|||
else:
|
||||
self.api_mode = "chat_completions"
|
||||
|
||||
# Direct OpenAI sessions use the Responses API path. GPT-5.x tool
|
||||
# calls with reasoning are rejected on /v1/chat/completions, and
|
||||
# Hermes is a tool-using client by default.
|
||||
if self.api_mode == "chat_completions" and self._is_direct_openai_url():
|
||||
self.api_mode = "codex_responses"
|
||||
|
||||
# Pre-warm OpenRouter model metadata cache in a background thread.
|
||||
# fetch_model_metadata() is cached for 1 hour; this avoids a blocking
|
||||
# HTTP request on the first API response when pricing is estimated.
|
||||
|
|
@ -1057,6 +1063,9 @@ class AIAgent:
|
|||
if hasattr(self, "context_compressor") and self.context_compressor:
|
||||
self.context_compressor.last_prompt_tokens = 0
|
||||
self.context_compressor.last_completion_tokens = 0
|
||||
self.context_compressor.last_total_tokens = 0
|
||||
self.context_compressor.compression_count = 0
|
||||
self.context_compressor._context_probed = False
|
||||
|
||||
@staticmethod
|
||||
def _safe_print(*args, **kwargs):
|
||||
|
|
@ -1085,6 +1094,11 @@ class AIAgent:
|
|||
return
|
||||
self._safe_print(*args, **kwargs)
|
||||
|
||||
def _is_direct_openai_url(self, base_url: str = None) -> bool:
|
||||
"""Return True when a base URL targets OpenAI's native API."""
|
||||
url = (base_url or self._base_url_lower).lower()
|
||||
return "api.openai.com" in url and "openrouter" not in url
|
||||
|
||||
def _max_tokens_param(self, value: int) -> dict:
|
||||
"""Return the correct max tokens kwarg for the current provider.
|
||||
|
||||
|
|
@ -1092,11 +1106,7 @@ class AIAgent:
|
|||
'max_completion_tokens'. OpenRouter, local models, and older
|
||||
OpenAI models use 'max_tokens'.
|
||||
"""
|
||||
_is_direct_openai = (
|
||||
"api.openai.com" in self._base_url_lower
|
||||
and "openrouter" not in self._base_url_lower
|
||||
)
|
||||
if _is_direct_openai:
|
||||
if self._is_direct_openai_url():
|
||||
return {"max_completion_tokens": value}
|
||||
return {"max_tokens": value}
|
||||
|
||||
|
|
@ -3558,13 +3568,15 @@ class AIAgent:
|
|||
fb_provider)
|
||||
return False
|
||||
|
||||
# Determine api_mode from provider
|
||||
# Determine api_mode from provider / base URL
|
||||
fb_api_mode = "chat_completions"
|
||||
fb_base_url = str(fb_client.base_url)
|
||||
if fb_provider == "openai-codex":
|
||||
fb_api_mode = "codex_responses"
|
||||
elif fb_provider == "anthropic" or fb_base_url.rstrip("/").lower().endswith("/anthropic"):
|
||||
fb_api_mode = "anthropic_messages"
|
||||
elif self._is_direct_openai_url(fb_base_url):
|
||||
fb_api_mode = "codex_responses"
|
||||
|
||||
old_model = self.model
|
||||
self.model = fb_model
|
||||
|
|
|
|||
|
|
@ -60,6 +60,21 @@ class TestFromEnv:
|
|||
config = HonchoClientConfig.from_env(workspace_id="custom")
|
||||
assert config.workspace_id == "custom"
|
||||
|
||||
def test_reads_base_url_from_env(self):
|
||||
with patch.dict(os.environ, {"HONCHO_BASE_URL": "http://localhost:8000"}, clear=False):
|
||||
config = HonchoClientConfig.from_env()
|
||||
assert config.base_url == "http://localhost:8000"
|
||||
assert config.enabled is True
|
||||
|
||||
def test_enabled_without_api_key_when_base_url_set(self):
|
||||
"""base_url alone (no API key) is sufficient to enable a local instance."""
|
||||
with patch.dict(os.environ, {"HONCHO_BASE_URL": "http://localhost:8000"}, clear=False):
|
||||
os.environ.pop("HONCHO_API_KEY", None)
|
||||
config = HonchoClientConfig.from_env()
|
||||
assert config.api_key is None
|
||||
assert config.base_url == "http://localhost:8000"
|
||||
assert config.enabled is True
|
||||
|
||||
|
||||
class TestFromGlobalConfig:
|
||||
def test_missing_config_falls_back_to_env(self, tmp_path):
|
||||
|
|
@ -188,6 +203,36 @@ class TestFromGlobalConfig:
|
|||
config = HonchoClientConfig.from_global_config(config_path=config_file)
|
||||
assert config.api_key == "env-key"
|
||||
|
||||
def test_base_url_env_fallback(self, tmp_path):
|
||||
"""HONCHO_BASE_URL env var is used when no baseUrl in config JSON."""
|
||||
config_file = tmp_path / "config.json"
|
||||
config_file.write_text(json.dumps({"workspace": "local"}))
|
||||
|
||||
with patch.dict(os.environ, {"HONCHO_BASE_URL": "http://localhost:8000"}, clear=False):
|
||||
config = HonchoClientConfig.from_global_config(config_path=config_file)
|
||||
assert config.base_url == "http://localhost:8000"
|
||||
assert config.enabled is True
|
||||
|
||||
def test_base_url_from_config_root(self, tmp_path):
|
||||
"""baseUrl in config root is read and takes precedence over env var."""
|
||||
config_file = tmp_path / "config.json"
|
||||
config_file.write_text(json.dumps({"baseUrl": "http://config-host:9000"}))
|
||||
|
||||
with patch.dict(os.environ, {"HONCHO_BASE_URL": "http://localhost:8000"}, clear=False):
|
||||
config = HonchoClientConfig.from_global_config(config_path=config_file)
|
||||
assert config.base_url == "http://config-host:9000"
|
||||
|
||||
def test_base_url_not_read_from_host_block(self, tmp_path):
|
||||
"""baseUrl is a root-level connection setting, not overridable per-host (consistent with apiKey)."""
|
||||
config_file = tmp_path / "config.json"
|
||||
config_file.write_text(json.dumps({
|
||||
"baseUrl": "http://root:9000",
|
||||
"hosts": {"hermes": {"baseUrl": "http://host-block:9001"}},
|
||||
}))
|
||||
|
||||
config = HonchoClientConfig.from_global_config(config_path=config_file)
|
||||
assert config.base_url == "http://root:9000"
|
||||
|
||||
|
||||
class TestResolveSessionName:
|
||||
def test_manual_override(self):
|
||||
|
|
|
|||
|
|
@ -42,6 +42,7 @@ def _make_cli(env_overrides=None, config_overrides=None, **kwargs):
|
|||
"prompt_toolkit.key_binding": MagicMock(),
|
||||
"prompt_toolkit.completion": MagicMock(),
|
||||
"prompt_toolkit.formatted_text": MagicMock(),
|
||||
"prompt_toolkit.auto_suggest": MagicMock(),
|
||||
}
|
||||
with patch.dict(sys.modules, prompt_toolkit_stubs), \
|
||||
patch.dict("os.environ", clean_env, clear=False):
|
||||
|
|
|
|||
|
|
@ -12,6 +12,17 @@ from hermes_state import SessionDB
|
|||
from tools.todo_tool import TodoStore
|
||||
|
||||
|
||||
class _FakeCompressor:
|
||||
"""Minimal stand-in for ContextCompressor."""
|
||||
|
||||
def __init__(self):
|
||||
self.last_prompt_tokens = 500
|
||||
self.last_completion_tokens = 200
|
||||
self.last_total_tokens = 700
|
||||
self.compression_count = 3
|
||||
self._context_probed = True
|
||||
|
||||
|
||||
class _FakeAgent:
|
||||
def __init__(self, session_id: str, session_start):
|
||||
self.session_id = session_id
|
||||
|
|
@ -25,6 +36,42 @@ class _FakeAgent:
|
|||
self.flush_memories = MagicMock()
|
||||
self._invalidate_system_prompt = MagicMock()
|
||||
|
||||
# Token counters (non-zero to verify reset)
|
||||
self.session_total_tokens = 1000
|
||||
self.session_input_tokens = 600
|
||||
self.session_output_tokens = 400
|
||||
self.session_prompt_tokens = 550
|
||||
self.session_completion_tokens = 350
|
||||
self.session_cache_read_tokens = 100
|
||||
self.session_cache_write_tokens = 50
|
||||
self.session_reasoning_tokens = 80
|
||||
self.session_api_calls = 5
|
||||
self.session_estimated_cost_usd = 0.42
|
||||
self.session_cost_status = "estimated"
|
||||
self.session_cost_source = "openrouter"
|
||||
self.context_compressor = _FakeCompressor()
|
||||
|
||||
def reset_session_state(self):
|
||||
"""Mirror the real AIAgent.reset_session_state()."""
|
||||
self.session_total_tokens = 0
|
||||
self.session_input_tokens = 0
|
||||
self.session_output_tokens = 0
|
||||
self.session_prompt_tokens = 0
|
||||
self.session_completion_tokens = 0
|
||||
self.session_cache_read_tokens = 0
|
||||
self.session_cache_write_tokens = 0
|
||||
self.session_reasoning_tokens = 0
|
||||
self.session_api_calls = 0
|
||||
self.session_estimated_cost_usd = 0.0
|
||||
self.session_cost_status = "unknown"
|
||||
self.session_cost_source = "none"
|
||||
if hasattr(self, "context_compressor") and self.context_compressor:
|
||||
self.context_compressor.last_prompt_tokens = 0
|
||||
self.context_compressor.last_completion_tokens = 0
|
||||
self.context_compressor.last_total_tokens = 0
|
||||
self.context_compressor.compression_count = 0
|
||||
self.context_compressor._context_probed = False
|
||||
|
||||
|
||||
def _make_cli(env_overrides=None, config_overrides=None, **kwargs):
|
||||
"""Create a HermesCLI instance with minimal mocking."""
|
||||
|
|
@ -58,6 +105,7 @@ def _make_cli(env_overrides=None, config_overrides=None, **kwargs):
|
|||
"prompt_toolkit.key_binding": MagicMock(),
|
||||
"prompt_toolkit.completion": MagicMock(),
|
||||
"prompt_toolkit.formatted_text": MagicMock(),
|
||||
"prompt_toolkit.auto_suggest": MagicMock(),
|
||||
}
|
||||
with patch.dict(sys.modules, prompt_toolkit_stubs), patch.dict(
|
||||
"os.environ", clean_env, clear=False
|
||||
|
|
@ -137,3 +185,38 @@ def test_clear_command_starts_new_session_before_redrawing(tmp_path):
|
|||
cli.console.clear.assert_called_once()
|
||||
cli.show_banner.assert_called_once()
|
||||
assert cli.conversation_history == []
|
||||
|
||||
|
||||
def test_new_session_resets_token_counters(tmp_path):
|
||||
"""Regression test for #2099: /new must zero all token counters."""
|
||||
cli = _prepare_cli_with_active_session(tmp_path)
|
||||
|
||||
# Verify counters are non-zero before reset
|
||||
agent = cli.agent
|
||||
assert agent.session_total_tokens > 0
|
||||
assert agent.session_api_calls > 0
|
||||
assert agent.context_compressor.compression_count > 0
|
||||
|
||||
cli.process_command("/new")
|
||||
|
||||
# All agent token counters must be zero
|
||||
assert agent.session_total_tokens == 0
|
||||
assert agent.session_input_tokens == 0
|
||||
assert agent.session_output_tokens == 0
|
||||
assert agent.session_prompt_tokens == 0
|
||||
assert agent.session_completion_tokens == 0
|
||||
assert agent.session_cache_read_tokens == 0
|
||||
assert agent.session_cache_write_tokens == 0
|
||||
assert agent.session_reasoning_tokens == 0
|
||||
assert agent.session_api_calls == 0
|
||||
assert agent.session_estimated_cost_usd == 0.0
|
||||
assert agent.session_cost_status == "unknown"
|
||||
assert agent.session_cost_source == "none"
|
||||
|
||||
# Context compressor counters must also be zero
|
||||
comp = agent.context_compressor
|
||||
assert comp.last_prompt_tokens == 0
|
||||
assert comp.last_completion_tokens == 0
|
||||
assert comp.last_total_tokens == 0
|
||||
assert comp.compression_count == 0
|
||||
assert comp._context_probed is False
|
||||
|
|
|
|||
|
|
@ -336,11 +336,9 @@ Jobs run in a fresh session with no current-chat context, so prompts must be sel
|
|||
If skill or skills are provided on create, the future cron run loads those skills in order, then follows the prompt as the task instruction.
|
||||
On update, passing skills=[] clears attached skills.
|
||||
|
||||
NOTE: The agent's final response is auto-delivered to the target — do NOT use
|
||||
send_message in the prompt for that same destination. Same-target send_message
|
||||
calls are skipped to avoid duplicate cron deliveries. Put the primary
|
||||
user-facing content in the final response, and use send_message only for
|
||||
additional or different targets.
|
||||
NOTE: The agent's final response is auto-delivered to the target. Put the primary
|
||||
user-facing content in the final response. Cron jobs run autonomously with no user
|
||||
present — they cannot ask questions or request clarification.
|
||||
|
||||
Important safety rule: cron-run sessions should not recursively schedule more cron jobs.""",
|
||||
"parameters": {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue