mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-29 01:31:41 +00:00
Three related issues prevented user-defined providers in `providers:` and `model_aliases:` from being reachable through standard CLI flags. Requests silently routed to the configured `model.base_url` instead of the user- intended endpoint. * hermes_cli/model_switch.py — root cause of the silent misrouting: `_ensure_direct_aliases()` rebound `DIRECT_ALIASES` to a freshly-loaded dict, leaving every `from hermes_cli.model_switch import DIRECT_ALIASES` caller stuck on the stale empty original. Switched to `.update()` so module attribute references stay valid. * hermes_cli/main.py — chat subcommand `--provider` had `choices=[...]` hardcoded to built-in providers, rejecting valid keys from user `providers:` config. Dropped the choices list; runtime resolution validates correctly downstream. * hermes_cli/oneshot.py — `-m <alias>` only resolved the model name; the alias's base_url was never propagated. Now consults `DIRECT_ALIASES` before falling through to `detect_provider_for_model`, and threads the alias's base_url to `resolve_runtime_provider(explicit_base_url=...)`. * hermes_cli/runtime_provider.py — `_resolve_named_custom_runtime` now honors `(provider="custom", explicit_base_url=...)` so a base_url propagated from a direct-alias resolution actually builds a runtime instead of falling through to provider-registry handlers that don't know about ad-hoc local endpoints. Verified: `hermes chat --provider <user-key> -m <model> -q "..."` and `hermes -m <user-alias> -z "..."` both route to the user-intended endpoint, observable via the target server's request log. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
219 lines
9.3 KiB
Python
219 lines
9.3 KiB
Python
"""Oneshot (-z) mode: send a prompt, get the final content block, exit.
|
|
|
|
Bypasses cli.py entirely. No banner, no spinner, no session_id line,
|
|
no stderr chatter. Just the agent's final text to stdout.
|
|
|
|
Toolsets = whatever the user has configured for "cli" in `hermes tools`.
|
|
Rules / memory / AGENTS.md / preloaded skills = same as a normal chat turn.
|
|
Approvals = auto-bypassed (HERMES_YOLO_MODE=1 is set for the call).
|
|
Working directory = the user's CWD (AGENTS.md etc. resolve from there as usual).
|
|
|
|
Model / provider selection mirrors `hermes chat`:
|
|
- Both optional. If omitted, use the user's configured default.
|
|
- If both given, pair them exactly as given.
|
|
- If only --model given, auto-detect the provider that serves it.
|
|
- If only --provider given, error out (ambiguous — caller must pick a model).
|
|
|
|
Env var fallbacks (used when the corresponding arg is not passed):
|
|
- HERMES_INFERENCE_MODEL
|
|
- HERMES_INFERENCE_PROVIDER (already read by resolve_runtime_provider)
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import os
|
|
import sys
|
|
from contextlib import redirect_stderr, redirect_stdout
|
|
from typing import Optional
|
|
|
|
|
|
def run_oneshot(
|
|
prompt: str,
|
|
model: Optional[str] = None,
|
|
provider: Optional[str] = None,
|
|
) -> int:
|
|
"""Execute a single prompt and print only the final content block.
|
|
|
|
Args:
|
|
prompt: The user message to send.
|
|
model: Optional model override. Falls back to HERMES_INFERENCE_MODEL
|
|
env var, then config.yaml's model.default / model.model.
|
|
provider: Optional provider override. Falls back to
|
|
HERMES_INFERENCE_PROVIDER env var, then config.yaml's model.provider,
|
|
then "auto".
|
|
|
|
Returns the exit code. Caller should sys.exit() with the return.
|
|
"""
|
|
# Silence every stdlib logger for the duration. AIAgent, tools, and
|
|
# provider adapters all log to stderr through the root logger; file
|
|
# handlers added by setup_logging() keep working (they're attached to
|
|
# the root logger's handler list, not affected by level), but no
|
|
# bytes reach the terminal.
|
|
logging.disable(logging.CRITICAL)
|
|
|
|
# --provider without --model is ambiguous: carrying the user's configured
|
|
# model across to a different provider is usually wrong (that provider may
|
|
# not host it), and silently picking the provider's catalog default hides
|
|
# the mismatch. Require the caller to be explicit. Validate BEFORE the
|
|
# stderr redirect so the message actually reaches the terminal.
|
|
env_model_early = os.getenv("HERMES_INFERENCE_MODEL", "").strip()
|
|
if provider and not ((model or "").strip() or env_model_early):
|
|
sys.stderr.write(
|
|
"hermes -z: --provider requires --model (or HERMES_INFERENCE_MODEL). "
|
|
"Pass both explicitly, or neither to use your configured defaults.\n"
|
|
)
|
|
return 2
|
|
|
|
# Auto-approve any shell / tool approvals. Non-interactive by
|
|
# definition — a prompt would hang forever.
|
|
os.environ["HERMES_YOLO_MODE"] = "1"
|
|
os.environ["HERMES_ACCEPT_HOOKS"] = "1"
|
|
|
|
# Redirect stderr AND stdout to devnull for the entire call tree.
|
|
# We'll print the final response to the real stdout at the end.
|
|
real_stdout = sys.stdout
|
|
devnull = open(os.devnull, "w")
|
|
|
|
try:
|
|
with redirect_stdout(devnull), redirect_stderr(devnull):
|
|
response = _run_agent(prompt, model=model, provider=provider)
|
|
finally:
|
|
try:
|
|
devnull.close()
|
|
except Exception:
|
|
pass
|
|
|
|
if response:
|
|
real_stdout.write(response)
|
|
if not response.endswith("\n"):
|
|
real_stdout.write("\n")
|
|
real_stdout.flush()
|
|
return 0
|
|
|
|
|
|
def _run_agent(
|
|
prompt: str,
|
|
model: Optional[str] = None,
|
|
provider: Optional[str] = None,
|
|
) -> str:
|
|
"""Build an AIAgent exactly like a normal CLI chat turn would, then
|
|
run a single conversation. Returns the final response string."""
|
|
# Imports are local so they don't run when hermes is invoked for
|
|
# other commands (keeps top-level CLI startup cheap).
|
|
from hermes_cli.config import load_config
|
|
from hermes_cli.models import detect_provider_for_model
|
|
from hermes_cli.runtime_provider import resolve_runtime_provider
|
|
from hermes_cli.tools_config import _get_platform_tools
|
|
from run_agent import AIAgent
|
|
|
|
cfg = load_config()
|
|
|
|
# Resolve effective model: explicit arg → env var → config.
|
|
model_cfg = cfg.get("model") or {}
|
|
if isinstance(model_cfg, str):
|
|
cfg_model = model_cfg
|
|
else:
|
|
cfg_model = model_cfg.get("default") or model_cfg.get("model") or ""
|
|
|
|
env_model = os.getenv("HERMES_INFERENCE_MODEL", "").strip()
|
|
effective_model = (model or "").strip() or env_model or cfg_model
|
|
|
|
# Resolve effective provider: explicit arg → (auto-detect from model if
|
|
# model was explicit) → env / config (handled inside resolve_runtime_provider).
|
|
#
|
|
# When --model is given without --provider, auto-detect the provider that
|
|
# serves that model — same semantic as `/model <name>` in an interactive
|
|
# session. Without this, resolve_runtime_provider() would fall back to
|
|
# the user's configured default provider, which may not host the model
|
|
# the caller just asked for.
|
|
effective_provider = (provider or "").strip() or None
|
|
explicit_base_url_from_alias: Optional[str] = None
|
|
if effective_provider is None and (model or env_model):
|
|
# Only auto-detect when the model was explicitly requested via arg or
|
|
# env var (not when it came from config — that's the "use my defaults"
|
|
# path and the configured provider is already correct).
|
|
explicit_model = (model or "").strip() or env_model
|
|
if explicit_model:
|
|
# First check DIRECT_ALIASES populated from config.yaml `model_aliases:`.
|
|
# These map a user-defined alias to (model, provider, base_url) for
|
|
# endpoints not in any catalog (local servers, custom proxies, etc.).
|
|
try:
|
|
from hermes_cli import model_switch as _ms
|
|
_ms._ensure_direct_aliases()
|
|
direct = _ms.DIRECT_ALIASES.get(explicit_model.strip().lower())
|
|
except Exception:
|
|
direct = None
|
|
if direct is not None:
|
|
effective_model = direct.model
|
|
effective_provider = direct.provider
|
|
if direct.base_url:
|
|
explicit_base_url_from_alias = direct.base_url.rstrip("/")
|
|
else:
|
|
cfg_provider = ""
|
|
if isinstance(model_cfg, dict):
|
|
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
|
|
current_provider = (
|
|
cfg_provider
|
|
or os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
|
|
or "auto"
|
|
)
|
|
detected = detect_provider_for_model(explicit_model, current_provider)
|
|
if detected:
|
|
effective_provider, effective_model = detected
|
|
|
|
runtime = resolve_runtime_provider(
|
|
requested=effective_provider,
|
|
target_model=effective_model or None,
|
|
explicit_base_url=explicit_base_url_from_alias,
|
|
)
|
|
|
|
# Pull in whatever toolsets the user has enabled for "cli".
|
|
# sorted() gives stable ordering; set→list for AIAgent's signature.
|
|
toolsets_list = sorted(_get_platform_tools(cfg, "cli"))
|
|
|
|
agent = AIAgent(
|
|
api_key=runtime.get("api_key"),
|
|
base_url=runtime.get("base_url"),
|
|
provider=runtime.get("provider"),
|
|
api_mode=runtime.get("api_mode"),
|
|
model=effective_model,
|
|
enabled_toolsets=toolsets_list,
|
|
quiet_mode=True,
|
|
platform="cli",
|
|
credential_pool=runtime.get("credential_pool"),
|
|
# Interactive callbacks are intentionally NOT wired beyond this
|
|
# one. In oneshot mode there's no user sitting at a terminal:
|
|
# - clarify → returns a synthetic "pick a default" instruction
|
|
# so the agent continues instead of stalling on
|
|
# the tool's built-in "not available" error
|
|
# - sudo password prompt → terminal_tool gates on
|
|
# HERMES_INTERACTIVE which we never set
|
|
# - shell-hook approval → auto-approved via HERMES_ACCEPT_HOOKS=1
|
|
# (set above); also falls back to deny on non-tty
|
|
# - dangerous-command approval → bypassed via HERMES_YOLO_MODE=1
|
|
# - skill secret capture → returns gracefully when no callback set
|
|
clarify_callback=_oneshot_clarify_callback,
|
|
)
|
|
|
|
# Belt-and-braces: make sure AIAgent doesn't invoke any streaming
|
|
# display callbacks that would bypass our stdout capture.
|
|
agent.suppress_status_output = True
|
|
agent.stream_delta_callback = None
|
|
agent.tool_gen_callback = None
|
|
|
|
return agent.chat(prompt) or ""
|
|
|
|
|
|
def _oneshot_clarify_callback(question: str, choices=None) -> str:
|
|
"""Clarify is disabled in oneshot mode — tell the agent to pick a
|
|
default and proceed instead of stalling or erroring."""
|
|
if choices:
|
|
return (
|
|
f"[oneshot mode: no user available. Pick the best option from "
|
|
f"{choices} using your own judgment and continue.]"
|
|
)
|
|
return (
|
|
"[oneshot mode: no user available. Make the most reasonable "
|
|
"assumption you can and continue.]"
|
|
)
|