Merge pull request #2110 from NousResearch/hermes/hermes-5d6932ba

fix: session reset + custom provider model switch + honcho base_url
This commit is contained in:
Teknium 2026-03-20 06:01:44 -07:00 committed by GitHub
commit b7b585656b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 214 additions and 28 deletions

View file

@ -206,11 +206,11 @@ PLATFORM_HINTS = {
"contextually appropriate."
),
"cron": (
"You are running as a scheduled cron job. Your final response is automatically "
"delivered to the job's configured destination, so do not use send_message to "
"send to that same target again. If you want the user to receive something in "
"the scheduled destination, put it directly in your final response. Use "
"send_message only for additional or different targets."
"You are running as a scheduled cron job. There is no user present — you "
"cannot ask questions, request clarification, or wait for follow-up. Execute "
"the task fully and autonomously, making reasonable decisions where needed. "
"Your final response is automatically delivered to the job's configured "
"destination — put the primary content directly in your response."
),
"cli": (
"You are a CLI AI Agent. Try not to use markdown but simple text "

20
cli.py
View file

@ -3517,8 +3517,17 @@ class HermesCLI:
# Parse provider:model syntax (e.g. "openrouter:anthropic/claude-sonnet-4.5")
current_provider = self.provider or self.requested_provider or "openrouter"
target_provider, new_model = parse_model_input(raw_input, current_provider)
# Auto-detect provider when no explicit provider:model syntax was used
if target_provider == current_provider:
# Auto-detect provider when no explicit provider:model syntax was used.
# Skip auto-detection for custom providers — the model name might
# coincidentally match a known provider's catalog, but the user
# intends to use it on their custom endpoint. Require explicit
# provider:model syntax (e.g. /model openai-codex:gpt-5.2-codex)
# to switch away from a custom endpoint.
_base = self.base_url or ""
is_custom = current_provider == "custom" or (
"localhost" in _base or "127.0.0.1" in _base
)
if target_provider == current_provider and not is_custom:
from hermes_cli.models import detect_provider_for_model
detected = detect_provider_for_model(new_model, current_provider)
if detected:
@ -3586,6 +3595,13 @@ class HermesCLI:
if message:
print(f" Reason: {message}")
print(" Note: Model will revert on restart. Use a verified model to save to config.")
# Helpful hint when staying on a custom endpoint
if is_custom and not provider_changed:
endpoint = self.base_url or "custom endpoint"
print(f" Endpoint: {endpoint}")
print(f" Tip: To switch providers, use /model provider:model")
print(f" e.g. /model openai-codex:gpt-5.2-codex")
else:
self._show_model_and_providers()
elif canonical == "provider":

View file

@ -391,7 +391,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
providers_ignored=pr.get("ignore"),
providers_order=pr.get("order"),
provider_sort=pr.get("sort"),
disabled_toolsets=["cronjob"],
disabled_toolsets=["cronjob", "messaging", "clarify"],
quiet_mode=True,
platform="cron",
session_id=f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}",

View file

@ -670,6 +670,11 @@ OPTIONAL_ENV_VARS = {
"password": True,
"category": "tool",
},
"HONCHO_BASE_URL": {
"description": "Base URL for self-hosted Honcho instances (no API key needed)",
"prompt": "Honcho base URL (e.g. http://localhost:8000)",
"category": "tool",
},
# ── Messaging platforms ──
"TELEGRAM_BOT_TOKEN": {

View file

@ -24,6 +24,18 @@ def _normalize_custom_provider_name(value: str) -> str:
return value.strip().lower().replace(" ", "-")
def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
"""Auto-detect api_mode from the resolved base URL.
Direct api.openai.com endpoints need the Responses API for GPT-5.x
tool calls with reasoning (chat/completions returns 400).
"""
normalized = (base_url or "").strip().lower().rstrip("/")
if "api.openai.com" in normalized and "openrouter" not in normalized:
return "codex_responses"
return None
def _auto_detect_local_model(base_url: str) -> str:
"""Query a local server for its model name when only one model is loaded."""
if not base_url:
@ -185,7 +197,9 @@ def _resolve_named_custom_runtime(
return {
"provider": "openrouter",
"api_mode": custom_provider.get("api_mode", "chat_completions"),
"api_mode": custom_provider.get("api_mode")
or _detect_api_mode_for_url(base_url)
or "chat_completions",
"base_url": base_url,
"api_key": api_key,
"source": f"custom_provider:{custom_provider.get('name', requested_provider)}",
@ -263,7 +277,9 @@ def _resolve_openrouter_runtime(
return {
"provider": "openrouter",
"api_mode": _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions",
"api_mode": _parse_api_mode(model_cfg.get("api_mode"))
or _detect_api_mode_for_url(base_url)
or "chat_completions",
"base_url": base_url,
"api_key": api_key,
"source": source,

View file

@ -117,11 +117,13 @@ class HonchoClientConfig:
def from_env(cls, workspace_id: str = "hermes") -> HonchoClientConfig:
"""Create config from environment variables (fallback)."""
api_key = os.environ.get("HONCHO_API_KEY")
base_url = os.environ.get("HONCHO_BASE_URL", "").strip() or None
return cls(
workspace_id=workspace_id,
api_key=api_key,
environment=os.environ.get("HONCHO_ENVIRONMENT", "production"),
enabled=bool(api_key),
base_url=base_url,
enabled=bool(api_key or base_url),
)
@classmethod
@ -171,8 +173,14 @@ class HonchoClientConfig:
or raw.get("environment", "production")
)
# Auto-enable when API key is present (unless explicitly disabled)
# Host-level enabled wins, then root-level, then auto-enable if key exists.
base_url = (
raw.get("baseUrl")
or os.environ.get("HONCHO_BASE_URL", "").strip()
or None
)
# Auto-enable when API key or base_url is present (unless explicitly disabled)
# Host-level enabled wins, then root-level, then auto-enable if key/url exists.
host_enabled = host_block.get("enabled")
root_enabled = raw.get("enabled")
if host_enabled is not None:
@ -180,8 +188,8 @@ class HonchoClientConfig:
elif root_enabled is not None:
enabled = root_enabled
else:
# Not explicitly set anywhere -> auto-enable if API key exists
enabled = bool(api_key)
# Not explicitly set anywhere -> auto-enable if API key or base_url exists
enabled = bool(api_key or base_url)
# write_frequency: accept int or string
raw_wf = (
@ -214,6 +222,7 @@ class HonchoClientConfig:
workspace_id=workspace,
api_key=api_key,
environment=environment,
base_url=base_url,
peer_name=host_block.get("peerName") or raw.get("peerName"),
ai_peer=ai_peer,
linked_hosts=linked_hosts,
@ -348,11 +357,12 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
if config is None:
config = HonchoClientConfig.from_global_config()
if not config.api_key:
if not config.api_key and not config.base_url:
raise ValueError(
"Honcho API key not found. "
"Get your API key at https://app.honcho.dev, "
"then run 'hermes honcho setup' or set HONCHO_API_KEY."
"then run 'hermes honcho setup' or set HONCHO_API_KEY. "
"For local instances, set HONCHO_BASE_URL instead."
)
try:

View file

@ -501,6 +501,12 @@ class AIAgent:
else:
self.api_mode = "chat_completions"
# Direct OpenAI sessions use the Responses API path. GPT-5.x tool
# calls with reasoning are rejected on /v1/chat/completions, and
# Hermes is a tool-using client by default.
if self.api_mode == "chat_completions" and self._is_direct_openai_url():
self.api_mode = "codex_responses"
# Pre-warm OpenRouter model metadata cache in a background thread.
# fetch_model_metadata() is cached for 1 hour; this avoids a blocking
# HTTP request on the first API response when pricing is estimated.
@ -1057,6 +1063,9 @@ class AIAgent:
if hasattr(self, "context_compressor") and self.context_compressor:
self.context_compressor.last_prompt_tokens = 0
self.context_compressor.last_completion_tokens = 0
self.context_compressor.last_total_tokens = 0
self.context_compressor.compression_count = 0
self.context_compressor._context_probed = False
@staticmethod
def _safe_print(*args, **kwargs):
@ -1085,6 +1094,11 @@ class AIAgent:
return
self._safe_print(*args, **kwargs)
def _is_direct_openai_url(self, base_url: str = None) -> bool:
"""Return True when a base URL targets OpenAI's native API."""
url = (base_url or self._base_url_lower).lower()
return "api.openai.com" in url and "openrouter" not in url
def _max_tokens_param(self, value: int) -> dict:
"""Return the correct max tokens kwarg for the current provider.
@ -1092,11 +1106,7 @@ class AIAgent:
'max_completion_tokens'. OpenRouter, local models, and older
OpenAI models use 'max_tokens'.
"""
_is_direct_openai = (
"api.openai.com" in self._base_url_lower
and "openrouter" not in self._base_url_lower
)
if _is_direct_openai:
if self._is_direct_openai_url():
return {"max_completion_tokens": value}
return {"max_tokens": value}
@ -3558,13 +3568,15 @@ class AIAgent:
fb_provider)
return False
# Determine api_mode from provider
# Determine api_mode from provider / base URL
fb_api_mode = "chat_completions"
fb_base_url = str(fb_client.base_url)
if fb_provider == "openai-codex":
fb_api_mode = "codex_responses"
elif fb_provider == "anthropic" or fb_base_url.rstrip("/").lower().endswith("/anthropic"):
fb_api_mode = "anthropic_messages"
elif self._is_direct_openai_url(fb_base_url):
fb_api_mode = "codex_responses"
old_model = self.model
self.model = fb_model

View file

@ -60,6 +60,21 @@ class TestFromEnv:
config = HonchoClientConfig.from_env(workspace_id="custom")
assert config.workspace_id == "custom"
def test_reads_base_url_from_env(self):
with patch.dict(os.environ, {"HONCHO_BASE_URL": "http://localhost:8000"}, clear=False):
config = HonchoClientConfig.from_env()
assert config.base_url == "http://localhost:8000"
assert config.enabled is True
def test_enabled_without_api_key_when_base_url_set(self):
"""base_url alone (no API key) is sufficient to enable a local instance."""
with patch.dict(os.environ, {"HONCHO_BASE_URL": "http://localhost:8000"}, clear=False):
os.environ.pop("HONCHO_API_KEY", None)
config = HonchoClientConfig.from_env()
assert config.api_key is None
assert config.base_url == "http://localhost:8000"
assert config.enabled is True
class TestFromGlobalConfig:
def test_missing_config_falls_back_to_env(self, tmp_path):
@ -188,6 +203,36 @@ class TestFromGlobalConfig:
config = HonchoClientConfig.from_global_config(config_path=config_file)
assert config.api_key == "env-key"
def test_base_url_env_fallback(self, tmp_path):
"""HONCHO_BASE_URL env var is used when no baseUrl in config JSON."""
config_file = tmp_path / "config.json"
config_file.write_text(json.dumps({"workspace": "local"}))
with patch.dict(os.environ, {"HONCHO_BASE_URL": "http://localhost:8000"}, clear=False):
config = HonchoClientConfig.from_global_config(config_path=config_file)
assert config.base_url == "http://localhost:8000"
assert config.enabled is True
def test_base_url_from_config_root(self, tmp_path):
"""baseUrl in config root is read and takes precedence over env var."""
config_file = tmp_path / "config.json"
config_file.write_text(json.dumps({"baseUrl": "http://config-host:9000"}))
with patch.dict(os.environ, {"HONCHO_BASE_URL": "http://localhost:8000"}, clear=False):
config = HonchoClientConfig.from_global_config(config_path=config_file)
assert config.base_url == "http://config-host:9000"
def test_base_url_not_read_from_host_block(self, tmp_path):
"""baseUrl is a root-level connection setting, not overridable per-host (consistent with apiKey)."""
config_file = tmp_path / "config.json"
config_file.write_text(json.dumps({
"baseUrl": "http://root:9000",
"hosts": {"hermes": {"baseUrl": "http://host-block:9001"}},
}))
config = HonchoClientConfig.from_global_config(config_path=config_file)
assert config.base_url == "http://root:9000"
class TestResolveSessionName:
def test_manual_override(self):

View file

@ -42,6 +42,7 @@ def _make_cli(env_overrides=None, config_overrides=None, **kwargs):
"prompt_toolkit.key_binding": MagicMock(),
"prompt_toolkit.completion": MagicMock(),
"prompt_toolkit.formatted_text": MagicMock(),
"prompt_toolkit.auto_suggest": MagicMock(),
}
with patch.dict(sys.modules, prompt_toolkit_stubs), \
patch.dict("os.environ", clean_env, clear=False):

View file

@ -12,6 +12,17 @@ from hermes_state import SessionDB
from tools.todo_tool import TodoStore
class _FakeCompressor:
"""Minimal stand-in for ContextCompressor."""
def __init__(self):
self.last_prompt_tokens = 500
self.last_completion_tokens = 200
self.last_total_tokens = 700
self.compression_count = 3
self._context_probed = True
class _FakeAgent:
def __init__(self, session_id: str, session_start):
self.session_id = session_id
@ -25,6 +36,42 @@ class _FakeAgent:
self.flush_memories = MagicMock()
self._invalidate_system_prompt = MagicMock()
# Token counters (non-zero to verify reset)
self.session_total_tokens = 1000
self.session_input_tokens = 600
self.session_output_tokens = 400
self.session_prompt_tokens = 550
self.session_completion_tokens = 350
self.session_cache_read_tokens = 100
self.session_cache_write_tokens = 50
self.session_reasoning_tokens = 80
self.session_api_calls = 5
self.session_estimated_cost_usd = 0.42
self.session_cost_status = "estimated"
self.session_cost_source = "openrouter"
self.context_compressor = _FakeCompressor()
def reset_session_state(self):
"""Mirror the real AIAgent.reset_session_state()."""
self.session_total_tokens = 0
self.session_input_tokens = 0
self.session_output_tokens = 0
self.session_prompt_tokens = 0
self.session_completion_tokens = 0
self.session_cache_read_tokens = 0
self.session_cache_write_tokens = 0
self.session_reasoning_tokens = 0
self.session_api_calls = 0
self.session_estimated_cost_usd = 0.0
self.session_cost_status = "unknown"
self.session_cost_source = "none"
if hasattr(self, "context_compressor") and self.context_compressor:
self.context_compressor.last_prompt_tokens = 0
self.context_compressor.last_completion_tokens = 0
self.context_compressor.last_total_tokens = 0
self.context_compressor.compression_count = 0
self.context_compressor._context_probed = False
def _make_cli(env_overrides=None, config_overrides=None, **kwargs):
"""Create a HermesCLI instance with minimal mocking."""
@ -58,6 +105,7 @@ def _make_cli(env_overrides=None, config_overrides=None, **kwargs):
"prompt_toolkit.key_binding": MagicMock(),
"prompt_toolkit.completion": MagicMock(),
"prompt_toolkit.formatted_text": MagicMock(),
"prompt_toolkit.auto_suggest": MagicMock(),
}
with patch.dict(sys.modules, prompt_toolkit_stubs), patch.dict(
"os.environ", clean_env, clear=False
@ -137,3 +185,38 @@ def test_clear_command_starts_new_session_before_redrawing(tmp_path):
cli.console.clear.assert_called_once()
cli.show_banner.assert_called_once()
assert cli.conversation_history == []
def test_new_session_resets_token_counters(tmp_path):
"""Regression test for #2099: /new must zero all token counters."""
cli = _prepare_cli_with_active_session(tmp_path)
# Verify counters are non-zero before reset
agent = cli.agent
assert agent.session_total_tokens > 0
assert agent.session_api_calls > 0
assert agent.context_compressor.compression_count > 0
cli.process_command("/new")
# All agent token counters must be zero
assert agent.session_total_tokens == 0
assert agent.session_input_tokens == 0
assert agent.session_output_tokens == 0
assert agent.session_prompt_tokens == 0
assert agent.session_completion_tokens == 0
assert agent.session_cache_read_tokens == 0
assert agent.session_cache_write_tokens == 0
assert agent.session_reasoning_tokens == 0
assert agent.session_api_calls == 0
assert agent.session_estimated_cost_usd == 0.0
assert agent.session_cost_status == "unknown"
assert agent.session_cost_source == "none"
# Context compressor counters must also be zero
comp = agent.context_compressor
assert comp.last_prompt_tokens == 0
assert comp.last_completion_tokens == 0
assert comp.last_total_tokens == 0
assert comp.compression_count == 0
assert comp._context_probed is False

View file

@ -336,11 +336,9 @@ Jobs run in a fresh session with no current-chat context, so prompts must be sel
If skill or skills are provided on create, the future cron run loads those skills in order, then follows the prompt as the task instruction.
On update, passing skills=[] clears attached skills.
NOTE: The agent's final response is auto-delivered to the target — do NOT use
send_message in the prompt for that same destination. Same-target send_message
calls are skipped to avoid duplicate cron deliveries. Put the primary
user-facing content in the final response, and use send_message only for
additional or different targets.
NOTE: The agent's final response is auto-delivered to the target. Put the primary
user-facing content in the final response. Cron jobs run autonomously with no user
present they cannot ask questions or request clarification.
Important safety rule: cron-run sessions should not recursively schedule more cron jobs.""",
"parameters": {