mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Merge branch 'main' of github.com:NousResearch/hermes-agent into feat/ink-refactor
This commit is contained in:
commit
097702c8a7
55 changed files with 4904 additions and 51 deletions
|
|
@ -298,6 +298,33 @@ def build_anthropic_client(api_key: str, base_url: str = None):
|
||||||
return _anthropic_sdk.Anthropic(**kwargs)
|
return _anthropic_sdk.Anthropic(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def build_anthropic_bedrock_client(region: str):
|
||||||
|
"""Create an AnthropicBedrock client for Bedrock Claude models.
|
||||||
|
|
||||||
|
Uses the Anthropic SDK's native Bedrock adapter, which provides full
|
||||||
|
Claude feature parity: prompt caching, thinking budgets, adaptive
|
||||||
|
thinking, fast mode — features not available via the Converse API.
|
||||||
|
|
||||||
|
Auth uses the boto3 default credential chain (IAM roles, SSO, env vars).
|
||||||
|
"""
|
||||||
|
if _anthropic_sdk is None:
|
||||||
|
raise ImportError(
|
||||||
|
"The 'anthropic' package is required for the Bedrock provider. "
|
||||||
|
"Install it with: pip install 'anthropic>=0.39.0'"
|
||||||
|
)
|
||||||
|
if not hasattr(_anthropic_sdk, "AnthropicBedrock"):
|
||||||
|
raise ImportError(
|
||||||
|
"anthropic.AnthropicBedrock not available. "
|
||||||
|
"Upgrade with: pip install 'anthropic>=0.39.0'"
|
||||||
|
)
|
||||||
|
from httpx import Timeout
|
||||||
|
|
||||||
|
return _anthropic_sdk.AnthropicBedrock(
|
||||||
|
aws_region=region,
|
||||||
|
timeout=Timeout(timeout=900.0, connect=10.0),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
|
def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
|
||||||
"""Read refreshable Claude Code OAuth credentials from ~/.claude/.credentials.json.
|
"""Read refreshable Claude Code OAuth credentials from ~/.claude/.credentials.json.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -775,6 +775,21 @@ def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||||
|
|
||||||
|
|
||||||
def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
|
def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||||
|
# Check cross-session rate limit guard before attempting Nous —
|
||||||
|
# if another session already recorded a 429, skip Nous entirely
|
||||||
|
# to avoid piling more requests onto the tapped RPH bucket.
|
||||||
|
try:
|
||||||
|
from agent.nous_rate_guard import nous_rate_limit_remaining
|
||||||
|
_remaining = nous_rate_limit_remaining()
|
||||||
|
if _remaining is not None and _remaining > 0:
|
||||||
|
logger.debug(
|
||||||
|
"Auxiliary: skipping Nous Portal (rate-limited, resets in %.0fs)",
|
||||||
|
_remaining,
|
||||||
|
)
|
||||||
|
return None, None
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
nous = _read_nous_auth()
|
nous = _read_nous_auth()
|
||||||
if not nous:
|
if not nous:
|
||||||
return None, None
|
return None, None
|
||||||
|
|
@ -899,6 +914,51 @@ def _current_custom_base_url() -> str:
|
||||||
return custom_base or ""
|
return custom_base or ""
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_proxy_env_urls() -> None:
|
||||||
|
"""Fail fast with a clear error when proxy env vars have malformed URLs.
|
||||||
|
|
||||||
|
Common cause: shell config (e.g. .zshrc) with a typo like
|
||||||
|
``export HTTP_PROXY=http://127.0.0.1:6153export NEXT_VAR=...``
|
||||||
|
which concatenates 'export' into the port number. Without this
|
||||||
|
check the OpenAI/httpx client raises a cryptic ``Invalid port``
|
||||||
|
error that doesn't name the offending env var.
|
||||||
|
"""
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
|
||||||
|
"https_proxy", "http_proxy", "all_proxy"):
|
||||||
|
value = str(os.environ.get(key) or "").strip()
|
||||||
|
if not value:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
parsed = urlparse(value)
|
||||||
|
if parsed.scheme:
|
||||||
|
_ = parsed.port # raises ValueError for e.g. '6153export'
|
||||||
|
except ValueError as exc:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Malformed proxy environment variable {key}={value!r}. "
|
||||||
|
"Fix or unset your proxy settings and try again."
|
||||||
|
) from exc
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_base_url(base_url: str) -> None:
|
||||||
|
"""Reject obviously broken custom endpoint URLs before they reach httpx."""
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
candidate = str(base_url or "").strip()
|
||||||
|
if not candidate or candidate.startswith("acp://"):
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
parsed = urlparse(candidate)
|
||||||
|
if parsed.scheme in {"http", "https"}:
|
||||||
|
_ = parsed.port # raises ValueError for malformed ports
|
||||||
|
except ValueError as exc:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Malformed custom endpoint URL: {candidate!r}. "
|
||||||
|
"Run `hermes setup` or `hermes model` and enter a valid http(s) base URL."
|
||||||
|
) from exc
|
||||||
|
|
||||||
|
|
||||||
def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
|
def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||||
runtime = _resolve_custom_runtime()
|
runtime = _resolve_custom_runtime()
|
||||||
if len(runtime) == 2:
|
if len(runtime) == 2:
|
||||||
|
|
@ -1299,6 +1359,7 @@ def resolve_provider_client(
|
||||||
Returns:
|
Returns:
|
||||||
(client, resolved_model) or (None, None) if auth is unavailable.
|
(client, resolved_model) or (None, None) if auth is unavailable.
|
||||||
"""
|
"""
|
||||||
|
_validate_proxy_env_urls()
|
||||||
# Normalise aliases
|
# Normalise aliases
|
||||||
provider = _normalize_aux_provider(provider)
|
provider = _normalize_aux_provider(provider)
|
||||||
|
|
||||||
|
|
|
||||||
1098
agent/bedrock_adapter.py
Normal file
1098
agent/bedrock_adapter.py
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -112,6 +112,10 @@ _RATE_LIMIT_PATTERNS = [
|
||||||
"please retry after",
|
"please retry after",
|
||||||
"resource_exhausted",
|
"resource_exhausted",
|
||||||
"rate increased too quickly", # Alibaba/DashScope throttling
|
"rate increased too quickly", # Alibaba/DashScope throttling
|
||||||
|
# AWS Bedrock throttling
|
||||||
|
"throttlingexception",
|
||||||
|
"too many concurrent requests",
|
||||||
|
"servicequotaexceededexception",
|
||||||
]
|
]
|
||||||
|
|
||||||
# Usage-limit patterns that need disambiguation (could be billing OR rate_limit)
|
# Usage-limit patterns that need disambiguation (could be billing OR rate_limit)
|
||||||
|
|
@ -171,6 +175,11 @@ _CONTEXT_OVERFLOW_PATTERNS = [
|
||||||
# Chinese error messages (some providers return these)
|
# Chinese error messages (some providers return these)
|
||||||
"超过最大长度",
|
"超过最大长度",
|
||||||
"上下文长度",
|
"上下文长度",
|
||||||
|
# AWS Bedrock Converse API error patterns
|
||||||
|
"input is too long",
|
||||||
|
"max input token",
|
||||||
|
"input token",
|
||||||
|
"exceeds the maximum number of input tokens",
|
||||||
]
|
]
|
||||||
|
|
||||||
# Model not found patterns
|
# Model not found patterns
|
||||||
|
|
|
||||||
|
|
@ -1012,6 +1012,16 @@ def get_model_context_length(
|
||||||
if ctx:
|
if ctx:
|
||||||
return ctx
|
return ctx
|
||||||
|
|
||||||
|
# 4b. AWS Bedrock — use static context length table.
|
||||||
|
# Bedrock's ListFoundationModels doesn't expose context window sizes,
|
||||||
|
# so we maintain a curated table in bedrock_adapter.py.
|
||||||
|
if provider == "bedrock" or (base_url and "bedrock-runtime" in base_url):
|
||||||
|
try:
|
||||||
|
from agent.bedrock_adapter import get_bedrock_context_length
|
||||||
|
return get_bedrock_context_length(model)
|
||||||
|
except ImportError:
|
||||||
|
pass # boto3 not installed — fall through to generic resolution
|
||||||
|
|
||||||
# 5. Provider-aware lookups (before generic OpenRouter cache)
|
# 5. Provider-aware lookups (before generic OpenRouter cache)
|
||||||
# These are provider-specific and take priority over the generic OR cache,
|
# These are provider-specific and take priority over the generic OR cache,
|
||||||
# since the same model can have different context limits per provider
|
# since the same model can have different context limits per provider
|
||||||
|
|
|
||||||
182
agent/nous_rate_guard.py
Normal file
182
agent/nous_rate_guard.py
Normal file
|
|
@ -0,0 +1,182 @@
|
||||||
|
"""Cross-session rate limit guard for Nous Portal.
|
||||||
|
|
||||||
|
Writes rate limit state to a shared file so all sessions (CLI, gateway,
|
||||||
|
cron, auxiliary) can check whether Nous Portal is currently rate-limited
|
||||||
|
before making requests. Prevents retry amplification when RPH is tapped.
|
||||||
|
|
||||||
|
Each 429 from Nous triggers up to 9 API calls per conversation turn
|
||||||
|
(3 SDK retries x 3 Hermes retries), and every one of those calls counts
|
||||||
|
against RPH. By recording the rate limit state on first 429 and checking
|
||||||
|
it before subsequent attempts, we eliminate the amplification effect.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
import time
|
||||||
|
from typing import Any, Mapping, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_STATE_SUBDIR = "rate_limits"
|
||||||
|
_STATE_FILENAME = "nous.json"
|
||||||
|
|
||||||
|
|
||||||
|
def _state_path() -> str:
|
||||||
|
"""Return the path to the Nous rate limit state file."""
|
||||||
|
try:
|
||||||
|
from hermes_constants import get_hermes_home
|
||||||
|
base = get_hermes_home()
|
||||||
|
except ImportError:
|
||||||
|
base = os.path.join(os.path.expanduser("~"), ".hermes")
|
||||||
|
return os.path.join(base, _STATE_SUBDIR, _STATE_FILENAME)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_reset_seconds(headers: Optional[Mapping[str, str]]) -> Optional[float]:
|
||||||
|
"""Extract the best available reset-time estimate from response headers.
|
||||||
|
|
||||||
|
Priority:
|
||||||
|
1. x-ratelimit-reset-requests-1h (hourly RPH window — most useful)
|
||||||
|
2. x-ratelimit-reset-requests (per-minute RPM window)
|
||||||
|
3. retry-after (generic HTTP header)
|
||||||
|
|
||||||
|
Returns seconds-from-now, or None if no usable header found.
|
||||||
|
"""
|
||||||
|
if not headers:
|
||||||
|
return None
|
||||||
|
|
||||||
|
lowered = {k.lower(): v for k, v in headers.items()}
|
||||||
|
|
||||||
|
for key in (
|
||||||
|
"x-ratelimit-reset-requests-1h",
|
||||||
|
"x-ratelimit-reset-requests",
|
||||||
|
"retry-after",
|
||||||
|
):
|
||||||
|
raw = lowered.get(key)
|
||||||
|
if raw is not None:
|
||||||
|
try:
|
||||||
|
val = float(raw)
|
||||||
|
if val > 0:
|
||||||
|
return val
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def record_nous_rate_limit(
|
||||||
|
*,
|
||||||
|
headers: Optional[Mapping[str, str]] = None,
|
||||||
|
error_context: Optional[dict[str, Any]] = None,
|
||||||
|
default_cooldown: float = 300.0,
|
||||||
|
) -> None:
|
||||||
|
"""Record that Nous Portal is rate-limited.
|
||||||
|
|
||||||
|
Parses the reset time from response headers or error context.
|
||||||
|
Falls back to ``default_cooldown`` (5 minutes) if no reset info
|
||||||
|
is available. Writes to a shared file that all sessions can read.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
headers: HTTP response headers from the 429 error.
|
||||||
|
error_context: Structured error context from _extract_api_error_context().
|
||||||
|
default_cooldown: Fallback cooldown in seconds when no header data.
|
||||||
|
"""
|
||||||
|
now = time.time()
|
||||||
|
reset_at = None
|
||||||
|
|
||||||
|
# Try headers first (most accurate)
|
||||||
|
header_seconds = _parse_reset_seconds(headers)
|
||||||
|
if header_seconds is not None:
|
||||||
|
reset_at = now + header_seconds
|
||||||
|
|
||||||
|
# Try error_context reset_at (from body parsing)
|
||||||
|
if reset_at is None and isinstance(error_context, dict):
|
||||||
|
ctx_reset = error_context.get("reset_at")
|
||||||
|
if isinstance(ctx_reset, (int, float)) and ctx_reset > now:
|
||||||
|
reset_at = float(ctx_reset)
|
||||||
|
|
||||||
|
# Default cooldown
|
||||||
|
if reset_at is None:
|
||||||
|
reset_at = now + default_cooldown
|
||||||
|
|
||||||
|
path = _state_path()
|
||||||
|
try:
|
||||||
|
state_dir = os.path.dirname(path)
|
||||||
|
os.makedirs(state_dir, exist_ok=True)
|
||||||
|
|
||||||
|
state = {
|
||||||
|
"reset_at": reset_at,
|
||||||
|
"recorded_at": now,
|
||||||
|
"reset_seconds": reset_at - now,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Atomic write: write to temp file + rename
|
||||||
|
fd, tmp_path = tempfile.mkstemp(dir=state_dir, suffix=".tmp")
|
||||||
|
try:
|
||||||
|
with os.fdopen(fd, "w") as f:
|
||||||
|
json.dump(state, f)
|
||||||
|
os.replace(tmp_path, path)
|
||||||
|
except Exception:
|
||||||
|
# Clean up temp file on failure
|
||||||
|
try:
|
||||||
|
os.unlink(tmp_path)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
raise
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Nous rate limit recorded: resets in %.0fs (at %.0f)",
|
||||||
|
reset_at - now, reset_at,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("Failed to write Nous rate limit state: %s", exc)
|
||||||
|
|
||||||
|
|
||||||
|
def nous_rate_limit_remaining() -> Optional[float]:
|
||||||
|
"""Check if Nous Portal is currently rate-limited.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Seconds remaining until reset, or None if not rate-limited.
|
||||||
|
"""
|
||||||
|
path = _state_path()
|
||||||
|
try:
|
||||||
|
with open(path) as f:
|
||||||
|
state = json.load(f)
|
||||||
|
reset_at = state.get("reset_at", 0)
|
||||||
|
remaining = reset_at - time.time()
|
||||||
|
if remaining > 0:
|
||||||
|
return remaining
|
||||||
|
# Expired — clean up
|
||||||
|
try:
|
||||||
|
os.unlink(path)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
except (FileNotFoundError, json.JSONDecodeError, KeyError, TypeError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def clear_nous_rate_limit() -> None:
|
||||||
|
"""Clear the rate limit state (e.g., after a successful Nous request)."""
|
||||||
|
try:
|
||||||
|
os.unlink(_state_path())
|
||||||
|
except FileNotFoundError:
|
||||||
|
pass
|
||||||
|
except OSError as exc:
|
||||||
|
logger.debug("Failed to clear Nous rate limit state: %s", exc)
|
||||||
|
|
||||||
|
|
||||||
|
def format_remaining(seconds: float) -> str:
|
||||||
|
"""Format seconds remaining into human-readable duration."""
|
||||||
|
s = max(0, int(seconds))
|
||||||
|
if s < 60:
|
||||||
|
return f"{s}s"
|
||||||
|
if s < 3600:
|
||||||
|
m, sec = divmod(s, 60)
|
||||||
|
return f"{m}m {sec}s" if sec else f"{m}m"
|
||||||
|
h, remainder = divmod(s, 3600)
|
||||||
|
m = remainder // 60
|
||||||
|
return f"{h}h {m}m" if m else f"{h}h"
|
||||||
|
|
@ -93,6 +93,17 @@ _DB_CONNSTR_RE = re.compile(
|
||||||
re.IGNORECASE,
|
re.IGNORECASE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# JWT tokens: header.payload[.signature] — always start with "eyJ" (base64 for "{")
|
||||||
|
# Matches 1-part (header only), 2-part (header.payload), and full 3-part JWTs.
|
||||||
|
_JWT_RE = re.compile(
|
||||||
|
r"eyJ[A-Za-z0-9_-]{10,}" # Header (always starts with eyJ)
|
||||||
|
r"(?:\.[A-Za-z0-9_=-]{4,}){0,2}" # Optional payload and/or signature
|
||||||
|
)
|
||||||
|
|
||||||
|
# Discord user/role mentions: <@123456789012345678> or <@!123456789012345678>
|
||||||
|
# Snowflake IDs are 17-20 digit integers that resolve to specific Discord accounts.
|
||||||
|
_DISCORD_MENTION_RE = re.compile(r"<@!?(\d{17,20})>")
|
||||||
|
|
||||||
# E.164 phone numbers: +<country><number>, 7-15 digits
|
# E.164 phone numbers: +<country><number>, 7-15 digits
|
||||||
# Negative lookahead prevents matching hex strings or identifiers
|
# Negative lookahead prevents matching hex strings or identifiers
|
||||||
_SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])")
|
_SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])")
|
||||||
|
|
@ -159,6 +170,12 @@ def redact_sensitive_text(text: str) -> str:
|
||||||
# Database connection string passwords
|
# Database connection string passwords
|
||||||
text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)
|
text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)
|
||||||
|
|
||||||
|
# JWT tokens (eyJ... — base64-encoded JSON headers)
|
||||||
|
text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)
|
||||||
|
|
||||||
|
# Discord user/role mentions (<@snowflake_id>)
|
||||||
|
text = _DISCORD_MENTION_RE.sub(lambda m: f"<@{'!' if '!' in m.group(0) else ''}***>", text)
|
||||||
|
|
||||||
# E.164 phone numbers (Signal, WhatsApp)
|
# E.164 phone numbers (Signal, WhatsApp)
|
||||||
def _redact_phone(m):
|
def _redact_phone(m):
|
||||||
phone = m.group(1)
|
phone = m.group(1)
|
||||||
|
|
|
||||||
|
|
@ -284,6 +284,80 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
|
||||||
source_url="https://ai.google.dev/pricing",
|
source_url="https://ai.google.dev/pricing",
|
||||||
pricing_version="google-pricing-2026-03-16",
|
pricing_version="google-pricing-2026-03-16",
|
||||||
),
|
),
|
||||||
|
# AWS Bedrock — pricing per the Bedrock pricing page.
|
||||||
|
# Bedrock charges the same per-token rates as the model provider but
|
||||||
|
# through AWS billing. These are the on-demand prices (no commitment).
|
||||||
|
# Source: https://aws.amazon.com/bedrock/pricing/
|
||||||
|
(
|
||||||
|
"bedrock",
|
||||||
|
"anthropic.claude-opus-4-6",
|
||||||
|
): PricingEntry(
|
||||||
|
input_cost_per_million=Decimal("15.00"),
|
||||||
|
output_cost_per_million=Decimal("75.00"),
|
||||||
|
source="official_docs_snapshot",
|
||||||
|
source_url="https://aws.amazon.com/bedrock/pricing/",
|
||||||
|
pricing_version="bedrock-pricing-2026-04",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"bedrock",
|
||||||
|
"anthropic.claude-sonnet-4-6",
|
||||||
|
): PricingEntry(
|
||||||
|
input_cost_per_million=Decimal("3.00"),
|
||||||
|
output_cost_per_million=Decimal("15.00"),
|
||||||
|
source="official_docs_snapshot",
|
||||||
|
source_url="https://aws.amazon.com/bedrock/pricing/",
|
||||||
|
pricing_version="bedrock-pricing-2026-04",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"bedrock",
|
||||||
|
"anthropic.claude-sonnet-4-5",
|
||||||
|
): PricingEntry(
|
||||||
|
input_cost_per_million=Decimal("3.00"),
|
||||||
|
output_cost_per_million=Decimal("15.00"),
|
||||||
|
source="official_docs_snapshot",
|
||||||
|
source_url="https://aws.amazon.com/bedrock/pricing/",
|
||||||
|
pricing_version="bedrock-pricing-2026-04",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"bedrock",
|
||||||
|
"anthropic.claude-haiku-4-5",
|
||||||
|
): PricingEntry(
|
||||||
|
input_cost_per_million=Decimal("0.80"),
|
||||||
|
output_cost_per_million=Decimal("4.00"),
|
||||||
|
source="official_docs_snapshot",
|
||||||
|
source_url="https://aws.amazon.com/bedrock/pricing/",
|
||||||
|
pricing_version="bedrock-pricing-2026-04",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"bedrock",
|
||||||
|
"amazon.nova-pro",
|
||||||
|
): PricingEntry(
|
||||||
|
input_cost_per_million=Decimal("0.80"),
|
||||||
|
output_cost_per_million=Decimal("3.20"),
|
||||||
|
source="official_docs_snapshot",
|
||||||
|
source_url="https://aws.amazon.com/bedrock/pricing/",
|
||||||
|
pricing_version="bedrock-pricing-2026-04",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"bedrock",
|
||||||
|
"amazon.nova-lite",
|
||||||
|
): PricingEntry(
|
||||||
|
input_cost_per_million=Decimal("0.06"),
|
||||||
|
output_cost_per_million=Decimal("0.24"),
|
||||||
|
source="official_docs_snapshot",
|
||||||
|
source_url="https://aws.amazon.com/bedrock/pricing/",
|
||||||
|
pricing_version="bedrock-pricing-2026-04",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"bedrock",
|
||||||
|
"amazon.nova-micro",
|
||||||
|
): PricingEntry(
|
||||||
|
input_cost_per_million=Decimal("0.035"),
|
||||||
|
output_cost_per_million=Decimal("0.14"),
|
||||||
|
source="official_docs_snapshot",
|
||||||
|
source_url="https://aws.amazon.com/bedrock/pricing/",
|
||||||
|
pricing_version="bedrock-pricing-2026-04",
|
||||||
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -554,6 +554,12 @@ def load_gateway_config() -> GatewayConfig:
|
||||||
bridged["mention_patterns"] = platform_cfg["mention_patterns"]
|
bridged["mention_patterns"] = platform_cfg["mention_patterns"]
|
||||||
if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
|
if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
|
||||||
bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
|
bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
|
||||||
|
if "channel_prompts" in platform_cfg:
|
||||||
|
channel_prompts = platform_cfg["channel_prompts"]
|
||||||
|
if isinstance(channel_prompts, dict):
|
||||||
|
bridged["channel_prompts"] = {str(k): v for k, v in channel_prompts.items()}
|
||||||
|
else:
|
||||||
|
bridged["channel_prompts"] = channel_prompts
|
||||||
if not bridged:
|
if not bridged:
|
||||||
continue
|
continue
|
||||||
plat_data = platforms_data.setdefault(plat.value, {})
|
plat_data = platforms_data.setdefault(plat.value, {})
|
||||||
|
|
|
||||||
|
|
@ -683,6 +683,10 @@ class MessageEvent:
|
||||||
# Discord channel_skill_bindings). A single name or ordered list.
|
# Discord channel_skill_bindings). A single name or ordered list.
|
||||||
auto_skill: Optional[str | list[str]] = None
|
auto_skill: Optional[str | list[str]] = None
|
||||||
|
|
||||||
|
# Per-channel ephemeral system prompt (e.g. Discord channel_prompts).
|
||||||
|
# Applied at API call time and never persisted to transcript history.
|
||||||
|
channel_prompt: Optional[str] = None
|
||||||
|
|
||||||
# Internal flag — set for synthetic events (e.g. background process
|
# Internal flag — set for synthetic events (e.g. background process
|
||||||
# completion notifications) that must bypass user authorization checks.
|
# completion notifications) that must bypass user authorization checks.
|
||||||
internal: bool = False
|
internal: bool = False
|
||||||
|
|
@ -776,6 +780,36 @@ _RETRYABLE_ERROR_PATTERNS = (
|
||||||
MessageHandler = Callable[[MessageEvent], Awaitable[Optional[str]]]
|
MessageHandler = Callable[[MessageEvent], Awaitable[Optional[str]]]
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_channel_prompt(
|
||||||
|
config_extra: dict,
|
||||||
|
channel_id: str,
|
||||||
|
parent_id: str | None = None,
|
||||||
|
) -> str | None:
|
||||||
|
"""Resolve a per-channel ephemeral prompt from platform config.
|
||||||
|
|
||||||
|
Looks up ``channel_prompts`` in the adapter's ``config.extra`` dict.
|
||||||
|
Prefers an exact match on *channel_id*; falls back to *parent_id*
|
||||||
|
(useful for forum threads / child channels inheriting a parent prompt).
|
||||||
|
|
||||||
|
Returns the prompt string, or None if no match is found. Blank/whitespace-
|
||||||
|
only prompts are treated as absent.
|
||||||
|
"""
|
||||||
|
prompts = config_extra.get("channel_prompts") or {}
|
||||||
|
if not isinstance(prompts, dict):
|
||||||
|
return None
|
||||||
|
|
||||||
|
for key in (channel_id, parent_id):
|
||||||
|
if not key:
|
||||||
|
continue
|
||||||
|
prompt = prompts.get(key)
|
||||||
|
if prompt is None:
|
||||||
|
continue
|
||||||
|
prompt = str(prompt).strip()
|
||||||
|
if prompt:
|
||||||
|
return prompt
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
class BasePlatformAdapter(ABC):
|
class BasePlatformAdapter(ABC):
|
||||||
"""
|
"""
|
||||||
Base class for platform adapters.
|
Base class for platform adapters.
|
||||||
|
|
|
||||||
|
|
@ -1992,11 +1992,14 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||||
)
|
)
|
||||||
|
|
||||||
msg_type = MessageType.COMMAND if text.startswith("/") else MessageType.TEXT
|
msg_type = MessageType.COMMAND if text.startswith("/") else MessageType.TEXT
|
||||||
|
channel_id = str(interaction.channel_id)
|
||||||
|
parent_id = str(getattr(getattr(interaction, "channel", None), "parent_id", "") or "")
|
||||||
return MessageEvent(
|
return MessageEvent(
|
||||||
text=text,
|
text=text,
|
||||||
message_type=msg_type,
|
message_type=msg_type,
|
||||||
source=source,
|
source=source,
|
||||||
raw_message=interaction,
|
raw_message=interaction,
|
||||||
|
channel_prompt=self._resolve_channel_prompt(channel_id, parent_id or None),
|
||||||
)
|
)
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
|
|
@ -2067,14 +2070,17 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||||
chat_topic=chat_topic,
|
chat_topic=chat_topic,
|
||||||
)
|
)
|
||||||
|
|
||||||
_parent_id = str(getattr(getattr(interaction, "channel", None), "parent_id", "") or "")
|
_parent_channel = self._thread_parent_channel(getattr(interaction, "channel", None))
|
||||||
|
_parent_id = str(getattr(_parent_channel, "id", "") or "")
|
||||||
_skills = self._resolve_channel_skills(thread_id, _parent_id or None)
|
_skills = self._resolve_channel_skills(thread_id, _parent_id or None)
|
||||||
|
_channel_prompt = self._resolve_channel_prompt(thread_id, _parent_id or None)
|
||||||
event = MessageEvent(
|
event = MessageEvent(
|
||||||
text=text,
|
text=text,
|
||||||
message_type=MessageType.TEXT,
|
message_type=MessageType.TEXT,
|
||||||
source=source,
|
source=source,
|
||||||
raw_message=interaction,
|
raw_message=interaction,
|
||||||
auto_skill=_skills,
|
auto_skill=_skills,
|
||||||
|
channel_prompt=_channel_prompt,
|
||||||
)
|
)
|
||||||
await self.handle_message(event)
|
await self.handle_message(event)
|
||||||
|
|
||||||
|
|
@ -2103,6 +2109,11 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||||
return list(dict.fromkeys(skills)) # dedup, preserve order
|
return list(dict.fromkeys(skills)) # dedup, preserve order
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def _resolve_channel_prompt(self, channel_id: str, parent_id: str | None = None) -> str | None:
|
||||||
|
"""Resolve a Discord per-channel prompt, preferring the exact channel over its parent."""
|
||||||
|
from gateway.platforms.base import resolve_channel_prompt
|
||||||
|
return resolve_channel_prompt(self.config.extra, channel_id, parent_id)
|
||||||
|
|
||||||
def _thread_parent_channel(self, channel: Any) -> Any:
|
def _thread_parent_channel(self, channel: Any) -> Any:
|
||||||
"""Return the parent text channel when invoked from a thread."""
|
"""Return the parent text channel when invoked from a thread."""
|
||||||
return getattr(channel, "parent", None) or channel
|
return getattr(channel, "parent", None) or channel
|
||||||
|
|
@ -2654,6 +2665,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||||
_parent_id = str(getattr(_chan, "parent_id", "") or "")
|
_parent_id = str(getattr(_chan, "parent_id", "") or "")
|
||||||
_chan_id = str(getattr(_chan, "id", ""))
|
_chan_id = str(getattr(_chan, "id", ""))
|
||||||
_skills = self._resolve_channel_skills(_chan_id, _parent_id or None)
|
_skills = self._resolve_channel_skills(_chan_id, _parent_id or None)
|
||||||
|
_channel_prompt = self._resolve_channel_prompt(_chan_id, _parent_id or None)
|
||||||
|
|
||||||
reply_to_id = None
|
reply_to_id = None
|
||||||
reply_to_text = None
|
reply_to_text = None
|
||||||
|
|
@ -2674,6 +2686,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||||
reply_to_text=reply_to_text,
|
reply_to_text=reply_to_text,
|
||||||
timestamp=message.created_at,
|
timestamp=message.created_at,
|
||||||
auto_skill=_skills,
|
auto_skill=_skills,
|
||||||
|
channel_prompt=_channel_prompt,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Track thread participation so the bot won't require @mention for
|
# Track thread participation so the bot won't require @mention for
|
||||||
|
|
|
||||||
|
|
@ -718,6 +718,12 @@ class MattermostAdapter(BasePlatformAdapter):
|
||||||
thread_id=thread_id,
|
thread_id=thread_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Per-channel ephemeral prompt
|
||||||
|
from gateway.platforms.base import resolve_channel_prompt
|
||||||
|
_channel_prompt = resolve_channel_prompt(
|
||||||
|
self.config.extra, channel_id, None,
|
||||||
|
)
|
||||||
|
|
||||||
msg_event = MessageEvent(
|
msg_event = MessageEvent(
|
||||||
text=message_text,
|
text=message_text,
|
||||||
message_type=msg_type,
|
message_type=msg_type,
|
||||||
|
|
@ -726,6 +732,7 @@ class MattermostAdapter(BasePlatformAdapter):
|
||||||
message_id=post_id,
|
message_id=post_id,
|
||||||
media_urls=media_urls if media_urls else None,
|
media_urls=media_urls if media_urls else None,
|
||||||
media_types=media_types if media_types else None,
|
media_types=media_types if media_types else None,
|
||||||
|
channel_prompt=_channel_prompt,
|
||||||
)
|
)
|
||||||
|
|
||||||
await self.handle_message(msg_event)
|
await self.handle_message(msg_event)
|
||||||
|
|
|
||||||
|
|
@ -1167,6 +1167,12 @@ class SlackAdapter(BasePlatformAdapter):
|
||||||
thread_id=thread_ts,
|
thread_id=thread_ts,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Per-channel ephemeral prompt
|
||||||
|
from gateway.platforms.base import resolve_channel_prompt
|
||||||
|
_channel_prompt = resolve_channel_prompt(
|
||||||
|
self.config.extra, channel_id, None,
|
||||||
|
)
|
||||||
|
|
||||||
msg_event = MessageEvent(
|
msg_event = MessageEvent(
|
||||||
text=text,
|
text=text,
|
||||||
message_type=msg_type,
|
message_type=msg_type,
|
||||||
|
|
@ -1176,6 +1182,7 @@ class SlackAdapter(BasePlatformAdapter):
|
||||||
media_urls=media_urls,
|
media_urls=media_urls,
|
||||||
media_types=media_types,
|
media_types=media_types,
|
||||||
reply_to_message_id=thread_ts if thread_ts != ts else None,
|
reply_to_message_id=thread_ts if thread_ts != ts else None,
|
||||||
|
channel_prompt=_channel_prompt,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Only react when bot is directly addressed (DM or @mention).
|
# Only react when bot is directly addressed (DM or @mention).
|
||||||
|
|
|
||||||
|
|
@ -2775,6 +2775,15 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||||
reply_to_id = str(message.reply_to_message.message_id)
|
reply_to_id = str(message.reply_to_message.message_id)
|
||||||
reply_to_text = message.reply_to_message.text or message.reply_to_message.caption or None
|
reply_to_text = message.reply_to_message.text or message.reply_to_message.caption or None
|
||||||
|
|
||||||
|
# Per-channel/topic ephemeral prompt
|
||||||
|
from gateway.platforms.base import resolve_channel_prompt
|
||||||
|
_chat_id_str = str(chat.id)
|
||||||
|
_channel_prompt = resolve_channel_prompt(
|
||||||
|
self.config.extra,
|
||||||
|
thread_id_str or _chat_id_str,
|
||||||
|
_chat_id_str if thread_id_str else None,
|
||||||
|
)
|
||||||
|
|
||||||
return MessageEvent(
|
return MessageEvent(
|
||||||
text=message.text or "",
|
text=message.text or "",
|
||||||
message_type=msg_type,
|
message_type=msg_type,
|
||||||
|
|
@ -2784,6 +2793,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||||
reply_to_message_id=reply_to_id,
|
reply_to_message_id=reply_to_id,
|
||||||
reply_to_text=reply_to_text,
|
reply_to_text=reply_to_text,
|
||||||
auto_skill=topic_skill,
|
auto_skill=topic_skill,
|
||||||
|
channel_prompt=_channel_prompt,
|
||||||
timestamp=message.date,
|
timestamp=message.date,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2891,6 +2891,7 @@ class GatewayRunner:
|
||||||
message_type=_MT.TEXT,
|
message_type=_MT.TEXT,
|
||||||
source=event.source,
|
source=event.source,
|
||||||
message_id=event.message_id,
|
message_id=event.message_id,
|
||||||
|
channel_prompt=event.channel_prompt,
|
||||||
)
|
)
|
||||||
adapter._pending_messages[_quick_key] = queued_event
|
adapter._pending_messages[_quick_key] = queued_event
|
||||||
return "Queued for the next turn."
|
return "Queued for the next turn."
|
||||||
|
|
@ -3875,6 +3876,7 @@ class GatewayRunner:
|
||||||
session_id=session_entry.session_id,
|
session_id=session_entry.session_id,
|
||||||
session_key=session_key,
|
session_key=session_key,
|
||||||
event_message_id=event.message_id,
|
event_message_id=event.message_id,
|
||||||
|
channel_prompt=event.channel_prompt,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Stop persistent typing indicator now that the agent is done
|
# Stop persistent typing indicator now that the agent is done
|
||||||
|
|
@ -5186,6 +5188,7 @@ class GatewayRunner:
|
||||||
message_type=MessageType.TEXT,
|
message_type=MessageType.TEXT,
|
||||||
source=source,
|
source=source,
|
||||||
raw_message=event.raw_message,
|
raw_message=event.raw_message,
|
||||||
|
channel_prompt=event.channel_prompt,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Let the normal message handler process it
|
# Let the normal message handler process it
|
||||||
|
|
@ -8166,6 +8169,7 @@ class GatewayRunner:
|
||||||
session_key: str = None,
|
session_key: str = None,
|
||||||
_interrupt_depth: int = 0,
|
_interrupt_depth: int = 0,
|
||||||
event_message_id: Optional[str] = None,
|
event_message_id: Optional[str] = None,
|
||||||
|
channel_prompt: Optional[str] = None,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Run the agent with the given message and context.
|
Run the agent with the given message and context.
|
||||||
|
|
@ -8520,8 +8524,12 @@ class GatewayRunner:
|
||||||
# Platform.LOCAL ("local") maps to "cli"; others pass through as-is.
|
# Platform.LOCAL ("local") maps to "cli"; others pass through as-is.
|
||||||
platform_key = "cli" if source.platform == Platform.LOCAL else source.platform.value
|
platform_key = "cli" if source.platform == Platform.LOCAL else source.platform.value
|
||||||
|
|
||||||
# Combine platform context with user-configured ephemeral system prompt
|
# Combine platform context, per-channel context, and the user-configured
|
||||||
|
# ephemeral system prompt.
|
||||||
combined_ephemeral = context_prompt or ""
|
combined_ephemeral = context_prompt or ""
|
||||||
|
event_channel_prompt = (channel_prompt or "").strip()
|
||||||
|
if event_channel_prompt:
|
||||||
|
combined_ephemeral = (combined_ephemeral + "\n\n" + event_channel_prompt).strip()
|
||||||
if self._ephemeral_system_prompt:
|
if self._ephemeral_system_prompt:
|
||||||
combined_ephemeral = (combined_ephemeral + "\n\n" + self._ephemeral_system_prompt).strip()
|
combined_ephemeral = (combined_ephemeral + "\n\n" + self._ephemeral_system_prompt).strip()
|
||||||
|
|
||||||
|
|
@ -9473,6 +9481,7 @@ class GatewayRunner:
|
||||||
session_key=session_key,
|
session_key=session_key,
|
||||||
_interrupt_depth=_interrupt_depth + 1,
|
_interrupt_depth=_interrupt_depth + 1,
|
||||||
event_message_id=next_message_id,
|
event_message_id=next_message_id,
|
||||||
|
channel_prompt=pending_event.channel_prompt,
|
||||||
)
|
)
|
||||||
finally:
|
finally:
|
||||||
# Stop progress sender, interrupt monitor, and notification task
|
# Stop progress sender, interrupt monitor, and notification task
|
||||||
|
|
|
||||||
|
|
@ -274,6 +274,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||||
api_key_env_vars=("XIAOMI_API_KEY",),
|
api_key_env_vars=("XIAOMI_API_KEY",),
|
||||||
base_url_env_var="XIAOMI_BASE_URL",
|
base_url_env_var="XIAOMI_BASE_URL",
|
||||||
),
|
),
|
||||||
|
"bedrock": ProviderConfig(
|
||||||
|
id="bedrock",
|
||||||
|
name="AWS Bedrock",
|
||||||
|
auth_type="aws_sdk",
|
||||||
|
inference_base_url="https://bedrock-runtime.us-east-1.amazonaws.com",
|
||||||
|
api_key_env_vars=(),
|
||||||
|
base_url_env_var="BEDROCK_BASE_URL",
|
||||||
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -924,6 +932,7 @@ def resolve_provider(
|
||||||
"qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth",
|
"qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth",
|
||||||
"hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
|
"hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
|
||||||
"mimo": "xiaomi", "xiaomi-mimo": "xiaomi",
|
"mimo": "xiaomi", "xiaomi-mimo": "xiaomi",
|
||||||
|
"aws": "bedrock", "aws-bedrock": "bedrock", "amazon-bedrock": "bedrock", "amazon": "bedrock",
|
||||||
"go": "opencode-go", "opencode-go-sub": "opencode-go",
|
"go": "opencode-go", "opencode-go-sub": "opencode-go",
|
||||||
"kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
|
"kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
|
||||||
# Local server aliases — route through the generic custom provider
|
# Local server aliases — route through the generic custom provider
|
||||||
|
|
@ -980,6 +989,15 @@ def resolve_provider(
|
||||||
if has_usable_secret(os.getenv(env_var, "")):
|
if has_usable_secret(os.getenv(env_var, "")):
|
||||||
return pid
|
return pid
|
||||||
|
|
||||||
|
# AWS Bedrock — detect via boto3 credential chain (IAM roles, SSO, env vars).
|
||||||
|
# This runs after API-key providers so explicit keys always win.
|
||||||
|
try:
|
||||||
|
from agent.bedrock_adapter import has_aws_credentials
|
||||||
|
if has_aws_credentials():
|
||||||
|
return "bedrock"
|
||||||
|
except ImportError:
|
||||||
|
pass # boto3 not installed — skip Bedrock auto-detection
|
||||||
|
|
||||||
raise AuthError(
|
raise AuthError(
|
||||||
"No inference provider configured. Run 'hermes model' to choose a "
|
"No inference provider configured. Run 'hermes model' to choose a "
|
||||||
"provider and model, or set an API key (OPENROUTER_API_KEY, "
|
"provider and model, or set an API key (OPENROUTER_API_KEY, "
|
||||||
|
|
@ -2446,6 +2464,13 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
|
||||||
pconfig = PROVIDER_REGISTRY.get(target)
|
pconfig = PROVIDER_REGISTRY.get(target)
|
||||||
if pconfig and pconfig.auth_type == "api_key":
|
if pconfig and pconfig.auth_type == "api_key":
|
||||||
return get_api_key_provider_status(target)
|
return get_api_key_provider_status(target)
|
||||||
|
# AWS SDK providers (Bedrock) — check via boto3 credential chain
|
||||||
|
if pconfig and pconfig.auth_type == "aws_sdk":
|
||||||
|
try:
|
||||||
|
from agent.bedrock_adapter import has_aws_credentials
|
||||||
|
return {"logged_in": has_aws_credentials(), "provider": target}
|
||||||
|
except ImportError:
|
||||||
|
return {"logged_in": False, "provider": target, "error": "boto3 not installed"}
|
||||||
return {"logged_in": False}
|
return {"logged_in": False}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -368,6 +368,27 @@ def _interactive_auth() -> None:
|
||||||
print("=" * 50)
|
print("=" * 50)
|
||||||
|
|
||||||
auth_list_command(SimpleNamespace(provider=None))
|
auth_list_command(SimpleNamespace(provider=None))
|
||||||
|
|
||||||
|
# Show AWS Bedrock credential status (not in the pool — uses boto3 chain)
|
||||||
|
try:
|
||||||
|
from agent.bedrock_adapter import has_aws_credentials, resolve_aws_auth_env_var, resolve_bedrock_region
|
||||||
|
if has_aws_credentials():
|
||||||
|
auth_source = resolve_aws_auth_env_var() or "unknown"
|
||||||
|
region = resolve_bedrock_region()
|
||||||
|
print(f"bedrock (AWS SDK credential chain):")
|
||||||
|
print(f" Auth: {auth_source}")
|
||||||
|
print(f" Region: {region}")
|
||||||
|
try:
|
||||||
|
import boto3
|
||||||
|
sts = boto3.client("sts", region_name=region)
|
||||||
|
identity = sts.get_caller_identity()
|
||||||
|
arn = identity.get("Arn", "unknown")
|
||||||
|
print(f" Identity: {arn}")
|
||||||
|
except Exception:
|
||||||
|
print(f" Identity: (could not resolve — boto3 STS call failed)")
|
||||||
|
print()
|
||||||
|
except ImportError:
|
||||||
|
pass # boto3 or bedrock_adapter not available
|
||||||
print()
|
print()
|
||||||
|
|
||||||
# Main menu
|
# Main menu
|
||||||
|
|
|
||||||
|
|
@ -419,6 +419,27 @@ DEFAULT_CONFIG = {
|
||||||
"protect_last_n": 20, # minimum recent messages to keep uncompressed
|
"protect_last_n": 20, # minimum recent messages to keep uncompressed
|
||||||
|
|
||||||
},
|
},
|
||||||
|
|
||||||
|
# AWS Bedrock provider configuration.
|
||||||
|
# Only used when model.provider is "bedrock".
|
||||||
|
"bedrock": {
|
||||||
|
"region": "", # AWS region for Bedrock API calls (empty = AWS_REGION env var → us-east-1)
|
||||||
|
"discovery": {
|
||||||
|
"enabled": True, # Auto-discover models via ListFoundationModels
|
||||||
|
"provider_filter": [], # Only show models from these providers (e.g. ["anthropic", "amazon"])
|
||||||
|
"refresh_interval": 3600, # Cache discovery results for this many seconds
|
||||||
|
},
|
||||||
|
"guardrail": {
|
||||||
|
# Amazon Bedrock Guardrails — content filtering and safety policies.
|
||||||
|
# Create a guardrail in the Bedrock console, then set the ID and version here.
|
||||||
|
# See: https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails.html
|
||||||
|
"guardrail_identifier": "", # e.g. "abc123def456"
|
||||||
|
"guardrail_version": "", # e.g. "1" or "DRAFT"
|
||||||
|
"stream_processing_mode": "async", # "sync" or "async"
|
||||||
|
"trace": "disabled", # "enabled", "disabled", or "enabled_full"
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
"smart_model_routing": {
|
"smart_model_routing": {
|
||||||
"enabled": False,
|
"enabled": False,
|
||||||
"max_simple_chars": 160,
|
"max_simple_chars": 160,
|
||||||
|
|
@ -638,6 +659,7 @@ DEFAULT_CONFIG = {
|
||||||
"allowed_channels": "", # If set, bot ONLY responds in these channel IDs (whitelist)
|
"allowed_channels": "", # If set, bot ONLY responds in these channel IDs (whitelist)
|
||||||
"auto_thread": True, # Auto-create threads on @mention in channels (like Slack)
|
"auto_thread": True, # Auto-create threads on @mention in channels (like Slack)
|
||||||
"reactions": True, # Add 👀/✅/❌ reactions to messages during processing
|
"reactions": True, # Add 👀/✅/❌ reactions to messages during processing
|
||||||
|
"channel_prompts": {}, # Per-channel ephemeral system prompts (forum parents apply to child threads)
|
||||||
},
|
},
|
||||||
|
|
||||||
# WhatsApp platform settings (gateway mode)
|
# WhatsApp platform settings (gateway mode)
|
||||||
|
|
@ -648,6 +670,21 @@ DEFAULT_CONFIG = {
|
||||||
# Supports \n for newlines, e.g. "🤖 *My Bot*\n──────\n"
|
# Supports \n for newlines, e.g. "🤖 *My Bot*\n──────\n"
|
||||||
},
|
},
|
||||||
|
|
||||||
|
# Telegram platform settings (gateway mode)
|
||||||
|
"telegram": {
|
||||||
|
"channel_prompts": {}, # Per-chat/topic ephemeral system prompts (topics inherit from parent group)
|
||||||
|
},
|
||||||
|
|
||||||
|
# Slack platform settings (gateway mode)
|
||||||
|
"slack": {
|
||||||
|
"channel_prompts": {}, # Per-channel ephemeral system prompts
|
||||||
|
},
|
||||||
|
|
||||||
|
# Mattermost platform settings (gateway mode)
|
||||||
|
"mattermost": {
|
||||||
|
"channel_prompts": {}, # Per-channel ephemeral system prompts
|
||||||
|
},
|
||||||
|
|
||||||
# Approval mode for dangerous commands:
|
# Approval mode for dangerous commands:
|
||||||
# manual — always prompt the user (default)
|
# manual — always prompt the user (default)
|
||||||
# smart — use auxiliary LLM to auto-approve low-risk commands, prompt for high-risk
|
# smart — use auxiliary LLM to auto-approve low-risk commands, prompt for high-risk
|
||||||
|
|
@ -703,7 +740,7 @@ DEFAULT_CONFIG = {
|
||||||
},
|
},
|
||||||
|
|
||||||
# Config schema version - bump this when adding new required fields
|
# Config schema version - bump this when adding new required fields
|
||||||
"_config_version": 17,
|
"_config_version": 18,
|
||||||
}
|
}
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
@ -974,6 +1011,22 @@ OPTIONAL_ENV_VARS = {
|
||||||
"category": "provider",
|
"category": "provider",
|
||||||
"advanced": True,
|
"advanced": True,
|
||||||
},
|
},
|
||||||
|
"AWS_REGION": {
|
||||||
|
"description": "AWS region for Bedrock API calls (e.g. us-east-1, eu-central-1)",
|
||||||
|
"prompt": "AWS Region",
|
||||||
|
"url": "https://docs.aws.amazon.com/bedrock/latest/userguide/bedrock-regions.html",
|
||||||
|
"password": False,
|
||||||
|
"category": "provider",
|
||||||
|
"advanced": True,
|
||||||
|
},
|
||||||
|
"AWS_PROFILE": {
|
||||||
|
"description": "AWS named profile for Bedrock authentication (from ~/.aws/credentials)",
|
||||||
|
"prompt": "AWS Profile",
|
||||||
|
"url": None,
|
||||||
|
"password": False,
|
||||||
|
"category": "provider",
|
||||||
|
"advanced": True,
|
||||||
|
},
|
||||||
|
|
||||||
# ── Tool API keys ──
|
# ── Tool API keys ──
|
||||||
"EXA_API_KEY": {
|
"EXA_API_KEY": {
|
||||||
|
|
|
||||||
|
|
@ -860,6 +860,31 @@ def run_doctor(args):
|
||||||
except Exception as _e:
|
except Exception as _e:
|
||||||
print(f"\r {color('⚠', Colors.YELLOW)} {_label} {color(f'({_e})', Colors.DIM)} ")
|
print(f"\r {color('⚠', Colors.YELLOW)} {_label} {color(f'({_e})', Colors.DIM)} ")
|
||||||
|
|
||||||
|
# -- AWS Bedrock --
|
||||||
|
# Bedrock uses the AWS SDK credential chain, not API keys.
|
||||||
|
try:
|
||||||
|
from agent.bedrock_adapter import has_aws_credentials, resolve_aws_auth_env_var, resolve_bedrock_region
|
||||||
|
if has_aws_credentials():
|
||||||
|
_auth_var = resolve_aws_auth_env_var()
|
||||||
|
_region = resolve_bedrock_region()
|
||||||
|
_label = "AWS Bedrock".ljust(20)
|
||||||
|
print(f" Checking AWS Bedrock...", end="", flush=True)
|
||||||
|
try:
|
||||||
|
import boto3
|
||||||
|
_br_client = boto3.client("bedrock", region_name=_region)
|
||||||
|
_br_resp = _br_client.list_foundation_models()
|
||||||
|
_model_count = len(_br_resp.get("modelSummaries", []))
|
||||||
|
print(f"\r {color('✓', Colors.GREEN)} {_label} {color(f'({_auth_var}, {_region}, {_model_count} models)', Colors.DIM)} ")
|
||||||
|
except ImportError:
|
||||||
|
print(f"\r {color('⚠', Colors.YELLOW)} {_label} {color('(boto3 not installed — pip install hermes-agent[bedrock])', Colors.DIM)} ")
|
||||||
|
issues.append("Install boto3 for Bedrock: pip install hermes-agent[bedrock]")
|
||||||
|
except Exception as _e:
|
||||||
|
_err_name = type(_e).__name__
|
||||||
|
print(f"\r {color('⚠', Colors.YELLOW)} {_label} {color(f'({_err_name}: {_e})', Colors.DIM)} ")
|
||||||
|
issues.append(f"AWS Bedrock: {_err_name} — check IAM permissions for bedrock:ListFoundationModels")
|
||||||
|
except ImportError:
|
||||||
|
pass # bedrock_adapter not available — skip silently
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# Check: Submodules
|
# Check: Submodules
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
|
||||||
|
|
@ -222,7 +222,7 @@ def find_gateway_pids(exclude_pids: set | None = None, all_profiles: bool = Fals
|
||||||
current_cmd = ""
|
current_cmd = ""
|
||||||
else:
|
else:
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
["ps", "eww", "-ax", "-o", "pid=,command="],
|
["ps", "-A", "eww", "-o", "pid=,command="],
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
text=True,
|
text=True,
|
||||||
timeout=10,
|
timeout=10,
|
||||||
|
|
|
||||||
|
|
@ -1370,6 +1370,8 @@ def select_provider_and_model(args=None):
|
||||||
_model_flow_anthropic(config, current_model)
|
_model_flow_anthropic(config, current_model)
|
||||||
elif selected_provider == "kimi-coding":
|
elif selected_provider == "kimi-coding":
|
||||||
_model_flow_kimi(config, current_model)
|
_model_flow_kimi(config, current_model)
|
||||||
|
elif selected_provider == "bedrock":
|
||||||
|
_model_flow_bedrock(config, current_model)
|
||||||
elif selected_provider in ("gemini", "deepseek", "xai", "zai", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi", "arcee"):
|
elif selected_provider in ("gemini", "deepseek", "xai", "zai", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi", "arcee"):
|
||||||
_model_flow_api_key_provider(config, selected_provider, current_model)
|
_model_flow_api_key_provider(config, selected_provider, current_model)
|
||||||
|
|
||||||
|
|
@ -2656,6 +2658,252 @@ def _model_flow_kimi(config, current_model=""):
|
||||||
print("No change.")
|
print("No change.")
|
||||||
|
|
||||||
|
|
||||||
|
def _model_flow_bedrock_api_key(config, region, current_model=""):
|
||||||
|
"""Bedrock API Key mode — uses the OpenAI-compatible bedrock-mantle endpoint.
|
||||||
|
|
||||||
|
For developers who don't have an AWS account but received a Bedrock API Key
|
||||||
|
from their AWS admin. Works like any OpenAI-compatible endpoint.
|
||||||
|
"""
|
||||||
|
from hermes_cli.auth import _prompt_model_selection, _save_model_choice, deactivate_provider
|
||||||
|
from hermes_cli.config import load_config, save_config, get_env_value, save_env_value
|
||||||
|
from hermes_cli.models import _PROVIDER_MODELS
|
||||||
|
|
||||||
|
mantle_base_url = f"https://bedrock-mantle.{region}.api.aws/v1"
|
||||||
|
|
||||||
|
# Prompt for API key
|
||||||
|
existing_key = get_env_value("AWS_BEARER_TOKEN_BEDROCK") or ""
|
||||||
|
if existing_key:
|
||||||
|
print(f" Bedrock API Key: {existing_key[:12]}... ✓")
|
||||||
|
else:
|
||||||
|
print(f" Endpoint: {mantle_base_url}")
|
||||||
|
print()
|
||||||
|
try:
|
||||||
|
import getpass
|
||||||
|
api_key = getpass.getpass(" Bedrock API Key: ").strip()
|
||||||
|
except (KeyboardInterrupt, EOFError):
|
||||||
|
print()
|
||||||
|
return
|
||||||
|
if not api_key:
|
||||||
|
print(" Cancelled.")
|
||||||
|
return
|
||||||
|
save_env_value("AWS_BEARER_TOKEN_BEDROCK", api_key)
|
||||||
|
existing_key = api_key
|
||||||
|
print(" ✓ API key saved.")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Model selection — use static list (mantle doesn't need boto3 for discovery)
|
||||||
|
model_list = _PROVIDER_MODELS.get("bedrock", [])
|
||||||
|
print(f" Showing {len(model_list)} curated models")
|
||||||
|
|
||||||
|
if model_list:
|
||||||
|
selected = _prompt_model_selection(model_list, current_model=current_model)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
selected = input(" Model ID: ").strip()
|
||||||
|
except (KeyboardInterrupt, EOFError):
|
||||||
|
selected = None
|
||||||
|
|
||||||
|
if selected:
|
||||||
|
_save_model_choice(selected)
|
||||||
|
|
||||||
|
# Save as custom provider pointing to bedrock-mantle
|
||||||
|
cfg = load_config()
|
||||||
|
model = cfg.get("model")
|
||||||
|
if not isinstance(model, dict):
|
||||||
|
model = {"default": model} if model else {}
|
||||||
|
cfg["model"] = model
|
||||||
|
model["provider"] = "custom"
|
||||||
|
model["base_url"] = mantle_base_url
|
||||||
|
model.pop("api_mode", None) # chat_completions is the default
|
||||||
|
|
||||||
|
# Also save region in bedrock config for reference
|
||||||
|
bedrock_cfg = cfg.get("bedrock", {})
|
||||||
|
if not isinstance(bedrock_cfg, dict):
|
||||||
|
bedrock_cfg = {}
|
||||||
|
bedrock_cfg["region"] = region
|
||||||
|
cfg["bedrock"] = bedrock_cfg
|
||||||
|
|
||||||
|
# Save the API key env var name so hermes knows where to find it
|
||||||
|
save_env_value("OPENAI_API_KEY", existing_key)
|
||||||
|
save_env_value("OPENAI_BASE_URL", mantle_base_url)
|
||||||
|
|
||||||
|
save_config(cfg)
|
||||||
|
deactivate_provider()
|
||||||
|
|
||||||
|
print(f" Default model set to: {selected} (via Bedrock API Key, {region})")
|
||||||
|
print(f" Endpoint: {mantle_base_url}")
|
||||||
|
else:
|
||||||
|
print(" No change.")
|
||||||
|
|
||||||
|
|
||||||
|
def _model_flow_bedrock(config, current_model=""):
|
||||||
|
"""AWS Bedrock provider: verify credentials, pick region, discover models.
|
||||||
|
|
||||||
|
Uses the native Converse API via boto3 — not the OpenAI-compatible endpoint.
|
||||||
|
Auth is handled by the AWS SDK default credential chain (env vars, profile,
|
||||||
|
instance role), so no API key prompt is needed.
|
||||||
|
"""
|
||||||
|
from hermes_cli.auth import _prompt_model_selection, _save_model_choice, deactivate_provider
|
||||||
|
from hermes_cli.config import load_config, save_config
|
||||||
|
from hermes_cli.models import _PROVIDER_MODELS
|
||||||
|
|
||||||
|
# 1. Check for AWS credentials
|
||||||
|
try:
|
||||||
|
from agent.bedrock_adapter import (
|
||||||
|
has_aws_credentials,
|
||||||
|
resolve_aws_auth_env_var,
|
||||||
|
resolve_bedrock_region,
|
||||||
|
discover_bedrock_models,
|
||||||
|
)
|
||||||
|
except ImportError:
|
||||||
|
print(" ✗ boto3 is not installed. Install it with:")
|
||||||
|
print(" pip install boto3")
|
||||||
|
print()
|
||||||
|
return
|
||||||
|
|
||||||
|
if not has_aws_credentials():
|
||||||
|
print(" ⚠ No AWS credentials detected via environment variables.")
|
||||||
|
print(" Bedrock will use boto3's default credential chain (IMDS, SSO, etc.)")
|
||||||
|
print()
|
||||||
|
|
||||||
|
auth_var = resolve_aws_auth_env_var()
|
||||||
|
if auth_var:
|
||||||
|
print(f" AWS credentials: {auth_var} ✓")
|
||||||
|
else:
|
||||||
|
print(" AWS credentials: boto3 default chain (instance role / SSO)")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# 2. Region selection
|
||||||
|
current_region = resolve_bedrock_region()
|
||||||
|
try:
|
||||||
|
region_input = input(f" AWS Region [{current_region}]: ").strip()
|
||||||
|
except (KeyboardInterrupt, EOFError):
|
||||||
|
print()
|
||||||
|
return
|
||||||
|
region = region_input or current_region
|
||||||
|
|
||||||
|
# 2b. Authentication mode
|
||||||
|
print(" Choose authentication method:")
|
||||||
|
print()
|
||||||
|
print(" 1. IAM credential chain (recommended)")
|
||||||
|
print(" Works with EC2 instance roles, SSO, env vars, aws configure")
|
||||||
|
print(" 2. Bedrock API Key")
|
||||||
|
print(" Enter your Bedrock API Key directly — also supports")
|
||||||
|
print(" team scenarios where an admin distributes keys")
|
||||||
|
print()
|
||||||
|
try:
|
||||||
|
auth_choice = input(" Choice [1]: ").strip()
|
||||||
|
except (KeyboardInterrupt, EOFError):
|
||||||
|
print()
|
||||||
|
return
|
||||||
|
|
||||||
|
if auth_choice == "2":
|
||||||
|
_model_flow_bedrock_api_key(config, region, current_model)
|
||||||
|
return
|
||||||
|
|
||||||
|
# 3. Model discovery — try live API first, fall back to static list
|
||||||
|
print(f" Discovering models in {region}...")
|
||||||
|
live_models = discover_bedrock_models(region)
|
||||||
|
|
||||||
|
if live_models:
|
||||||
|
_EXCLUDE_PREFIXES = (
|
||||||
|
"stability.", "cohere.embed", "twelvelabs.", "us.stability.",
|
||||||
|
"us.cohere.embed", "us.twelvelabs.", "global.cohere.embed",
|
||||||
|
"global.twelvelabs.",
|
||||||
|
)
|
||||||
|
_EXCLUDE_SUBSTRINGS = ("safeguard", "voxtral", "palmyra-vision")
|
||||||
|
filtered = []
|
||||||
|
for m in live_models:
|
||||||
|
mid = m["id"]
|
||||||
|
if any(mid.startswith(p) for p in _EXCLUDE_PREFIXES):
|
||||||
|
continue
|
||||||
|
if any(s in mid.lower() for s in _EXCLUDE_SUBSTRINGS):
|
||||||
|
continue
|
||||||
|
filtered.append(m)
|
||||||
|
|
||||||
|
# Deduplicate: prefer inference profiles (us.*, global.*) over bare
|
||||||
|
# foundation model IDs.
|
||||||
|
profile_base_ids = set()
|
||||||
|
for m in filtered:
|
||||||
|
mid = m["id"]
|
||||||
|
if mid.startswith(("us.", "global.")):
|
||||||
|
base = mid.split(".", 1)[1] if "." in mid[3:] else mid
|
||||||
|
profile_base_ids.add(base)
|
||||||
|
|
||||||
|
deduped = []
|
||||||
|
for m in filtered:
|
||||||
|
mid = m["id"]
|
||||||
|
if not mid.startswith(("us.", "global.")) and mid in profile_base_ids:
|
||||||
|
continue
|
||||||
|
deduped.append(m)
|
||||||
|
|
||||||
|
_RECOMMENDED = [
|
||||||
|
"us.anthropic.claude-sonnet-4-6",
|
||||||
|
"us.anthropic.claude-opus-4-6",
|
||||||
|
"us.anthropic.claude-haiku-4-5",
|
||||||
|
"us.amazon.nova-pro",
|
||||||
|
"us.amazon.nova-lite",
|
||||||
|
"us.amazon.nova-micro",
|
||||||
|
"deepseek.v3",
|
||||||
|
"us.meta.llama4-maverick",
|
||||||
|
"us.meta.llama4-scout",
|
||||||
|
]
|
||||||
|
|
||||||
|
def _sort_key(m):
|
||||||
|
mid = m["id"]
|
||||||
|
for i, rec in enumerate(_RECOMMENDED):
|
||||||
|
if mid.startswith(rec):
|
||||||
|
return (0, i, mid)
|
||||||
|
if mid.startswith("global."):
|
||||||
|
return (1, 0, mid)
|
||||||
|
return (2, 0, mid)
|
||||||
|
|
||||||
|
deduped.sort(key=_sort_key)
|
||||||
|
model_list = [m["id"] for m in deduped]
|
||||||
|
print(f" Found {len(model_list)} text model(s) (filtered from {len(live_models)} total)")
|
||||||
|
else:
|
||||||
|
model_list = _PROVIDER_MODELS.get("bedrock", [])
|
||||||
|
if model_list:
|
||||||
|
print(f" Using {len(model_list)} curated models (live discovery unavailable)")
|
||||||
|
else:
|
||||||
|
print(" No models found. Check IAM permissions for bedrock:ListFoundationModels.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# 4. Model selection
|
||||||
|
if model_list:
|
||||||
|
selected = _prompt_model_selection(model_list, current_model=current_model)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
selected = input(" Model ID: ").strip()
|
||||||
|
except (KeyboardInterrupt, EOFError):
|
||||||
|
selected = None
|
||||||
|
|
||||||
|
if selected:
|
||||||
|
_save_model_choice(selected)
|
||||||
|
|
||||||
|
cfg = load_config()
|
||||||
|
model = cfg.get("model")
|
||||||
|
if not isinstance(model, dict):
|
||||||
|
model = {"default": model} if model else {}
|
||||||
|
cfg["model"] = model
|
||||||
|
model["provider"] = "bedrock"
|
||||||
|
model["base_url"] = f"https://bedrock-runtime.{region}.amazonaws.com"
|
||||||
|
model.pop("api_mode", None) # bedrock_converse is auto-detected
|
||||||
|
|
||||||
|
bedrock_cfg = cfg.get("bedrock", {})
|
||||||
|
if not isinstance(bedrock_cfg, dict):
|
||||||
|
bedrock_cfg = {}
|
||||||
|
bedrock_cfg["region"] = region
|
||||||
|
cfg["bedrock"] = bedrock_cfg
|
||||||
|
|
||||||
|
save_config(cfg)
|
||||||
|
deactivate_provider()
|
||||||
|
|
||||||
|
print(f" Default model set to: {selected} (via AWS Bedrock, {region})")
|
||||||
|
else:
|
||||||
|
print(" No change.")
|
||||||
|
|
||||||
|
|
||||||
def _model_flow_api_key_provider(config, provider_id, current_model=""):
|
def _model_flow_api_key_provider(config, provider_id, current_model=""):
|
||||||
"""Generic flow for API-key providers (z.ai, MiniMax, OpenCode, etc.)."""
|
"""Generic flow for API-key providers (z.ai, MiniMax, OpenCode, etc.)."""
|
||||||
from hermes_cli.auth import (
|
from hermes_cli.auth import (
|
||||||
|
|
|
||||||
|
|
@ -303,6 +303,22 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||||
"XiaomiMiMo/MiMo-V2-Flash",
|
"XiaomiMiMo/MiMo-V2-Flash",
|
||||||
"moonshotai/Kimi-K2-Thinking",
|
"moonshotai/Kimi-K2-Thinking",
|
||||||
],
|
],
|
||||||
|
# AWS Bedrock — static fallback list used when dynamic discovery is
|
||||||
|
# unavailable (no boto3, no credentials, or API error). The agent
|
||||||
|
# prefers live discovery via ListFoundationModels + ListInferenceProfiles.
|
||||||
|
# Use inference profile IDs (us.*) since most models require them.
|
||||||
|
"bedrock": [
|
||||||
|
"us.anthropic.claude-sonnet-4-6",
|
||||||
|
"us.anthropic.claude-opus-4-6-v1",
|
||||||
|
"us.anthropic.claude-haiku-4-5-20251001-v1:0",
|
||||||
|
"us.anthropic.claude-sonnet-4-5-20250929-v1:0",
|
||||||
|
"us.amazon.nova-pro-v1:0",
|
||||||
|
"us.amazon.nova-lite-v1:0",
|
||||||
|
"us.amazon.nova-micro-v1:0",
|
||||||
|
"deepseek.v3.2",
|
||||||
|
"us.meta.llama4-maverick-17b-instruct-v1:0",
|
||||||
|
"us.meta.llama4-scout-17b-instruct-v1:0",
|
||||||
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -536,6 +552,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
||||||
ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
|
ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
|
||||||
ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"),
|
ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"),
|
||||||
ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, pay-per-use)"),
|
ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, pay-per-use)"),
|
||||||
|
ProviderEntry("bedrock", "AWS Bedrock", "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
|
||||||
]
|
]
|
||||||
|
|
||||||
# Derived dicts — used throughout the codebase
|
# Derived dicts — used throughout the codebase
|
||||||
|
|
@ -587,6 +604,10 @@ _PROVIDER_ALIASES = {
|
||||||
"huggingface-hub": "huggingface",
|
"huggingface-hub": "huggingface",
|
||||||
"mimo": "xiaomi",
|
"mimo": "xiaomi",
|
||||||
"xiaomi-mimo": "xiaomi",
|
"xiaomi-mimo": "xiaomi",
|
||||||
|
"aws": "bedrock",
|
||||||
|
"aws-bedrock": "bedrock",
|
||||||
|
"amazon-bedrock": "bedrock",
|
||||||
|
"amazon": "bedrock",
|
||||||
"grok": "xai",
|
"grok": "xai",
|
||||||
"x-ai": "xai",
|
"x-ai": "xai",
|
||||||
"x.ai": "xai",
|
"x.ai": "xai",
|
||||||
|
|
@ -1955,6 +1976,42 @@ def validate_requested_model(
|
||||||
|
|
||||||
# api_models is None — couldn't reach API. Accept and persist,
|
# api_models is None — couldn't reach API. Accept and persist,
|
||||||
# but warn so typos don't silently break things.
|
# but warn so typos don't silently break things.
|
||||||
|
|
||||||
|
# Bedrock: use our own discovery instead of HTTP /models endpoint.
|
||||||
|
# Bedrock's bedrock-runtime URL doesn't support /models — it uses the
|
||||||
|
# AWS SDK control plane (ListFoundationModels + ListInferenceProfiles).
|
||||||
|
if normalized == "bedrock":
|
||||||
|
try:
|
||||||
|
from agent.bedrock_adapter import discover_bedrock_models, resolve_bedrock_region
|
||||||
|
region = resolve_bedrock_region()
|
||||||
|
discovered = discover_bedrock_models(region)
|
||||||
|
discovered_ids = {m["id"] for m in discovered}
|
||||||
|
if requested in discovered_ids:
|
||||||
|
return {
|
||||||
|
"accepted": True,
|
||||||
|
"persist": True,
|
||||||
|
"recognized": True,
|
||||||
|
"message": None,
|
||||||
|
}
|
||||||
|
# Not in discovered list — still accept (user may have custom
|
||||||
|
# inference profiles or cross-account access), but warn.
|
||||||
|
suggestions = get_close_matches(requested, list(discovered_ids), n=3, cutoff=0.4)
|
||||||
|
suggestion_text = ""
|
||||||
|
if suggestions:
|
||||||
|
suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
|
||||||
|
return {
|
||||||
|
"accepted": True,
|
||||||
|
"persist": True,
|
||||||
|
"recognized": False,
|
||||||
|
"message": (
|
||||||
|
f"Note: `{requested}` was not found in Bedrock model discovery for {region}. "
|
||||||
|
f"It may still work with custom inference profiles or cross-account access."
|
||||||
|
f"{suggestion_text}"
|
||||||
|
),
|
||||||
|
}
|
||||||
|
except Exception:
|
||||||
|
pass # Fall through to generic warning
|
||||||
|
|
||||||
provider_label = _PROVIDER_LABELS.get(normalized, normalized)
|
provider_label = _PROVIDER_LABELS.get(normalized, normalized)
|
||||||
return {
|
return {
|
||||||
"accepted": False,
|
"accepted": False,
|
||||||
|
|
|
||||||
|
|
@ -236,6 +236,12 @@ ALIASES: Dict[str, str] = {
|
||||||
"mimo": "xiaomi",
|
"mimo": "xiaomi",
|
||||||
"xiaomi-mimo": "xiaomi",
|
"xiaomi-mimo": "xiaomi",
|
||||||
|
|
||||||
|
# bedrock
|
||||||
|
"aws": "bedrock",
|
||||||
|
"aws-bedrock": "bedrock",
|
||||||
|
"amazon-bedrock": "bedrock",
|
||||||
|
"amazon": "bedrock",
|
||||||
|
|
||||||
# arcee
|
# arcee
|
||||||
"arcee-ai": "arcee",
|
"arcee-ai": "arcee",
|
||||||
"arceeai": "arcee",
|
"arceeai": "arcee",
|
||||||
|
|
@ -262,6 +268,7 @@ _LABEL_OVERRIDES: Dict[str, str] = {
|
||||||
"copilot-acp": "GitHub Copilot ACP",
|
"copilot-acp": "GitHub Copilot ACP",
|
||||||
"xiaomi": "Xiaomi MiMo",
|
"xiaomi": "Xiaomi MiMo",
|
||||||
"local": "Local endpoint",
|
"local": "Local endpoint",
|
||||||
|
"bedrock": "AWS Bedrock",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -271,6 +278,7 @@ TRANSPORT_TO_API_MODE: Dict[str, str] = {
|
||||||
"openai_chat": "chat_completions",
|
"openai_chat": "chat_completions",
|
||||||
"anthropic_messages": "anthropic_messages",
|
"anthropic_messages": "anthropic_messages",
|
||||||
"codex_responses": "codex_responses",
|
"codex_responses": "codex_responses",
|
||||||
|
"bedrock_converse": "bedrock_converse",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -388,6 +396,10 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
|
||||||
if pdef is not None:
|
if pdef is not None:
|
||||||
return TRANSPORT_TO_API_MODE.get(pdef.transport, "chat_completions")
|
return TRANSPORT_TO_API_MODE.get(pdef.transport, "chat_completions")
|
||||||
|
|
||||||
|
# Direct provider checks for providers not in HERMES_OVERLAYS
|
||||||
|
if provider == "bedrock":
|
||||||
|
return "bedrock_converse"
|
||||||
|
|
||||||
# URL-based heuristics for custom / unknown providers
|
# URL-based heuristics for custom / unknown providers
|
||||||
if base_url:
|
if base_url:
|
||||||
url_lower = base_url.rstrip("/").lower()
|
url_lower = base_url.rstrip("/").lower()
|
||||||
|
|
@ -395,6 +407,8 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
|
||||||
return "anthropic_messages"
|
return "anthropic_messages"
|
||||||
if "api.openai.com" in url_lower:
|
if "api.openai.com" in url_lower:
|
||||||
return "codex_responses"
|
return "codex_responses"
|
||||||
|
if "bedrock-runtime" in url_lower and "amazonaws.com" in url_lower:
|
||||||
|
return "bedrock_converse"
|
||||||
|
|
||||||
return "chat_completions"
|
return "chat_completions"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -124,7 +124,7 @@ def _copilot_runtime_api_mode(model_cfg: Dict[str, Any], api_key: str) -> str:
|
||||||
return "chat_completions"
|
return "chat_completions"
|
||||||
|
|
||||||
|
|
||||||
_VALID_API_MODES = {"chat_completions", "codex_responses", "anthropic_messages"}
|
_VALID_API_MODES = {"chat_completions", "codex_responses", "anthropic_messages", "bedrock_converse"}
|
||||||
|
|
||||||
|
|
||||||
def _parse_api_mode(raw: Any) -> Optional[str]:
|
def _parse_api_mode(raw: Any) -> Optional[str]:
|
||||||
|
|
@ -836,6 +836,77 @@ def resolve_runtime_provider(
|
||||||
"requested_provider": requested_provider,
|
"requested_provider": requested_provider,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# AWS Bedrock (native Converse API via boto3)
|
||||||
|
if provider == "bedrock":
|
||||||
|
from agent.bedrock_adapter import (
|
||||||
|
has_aws_credentials,
|
||||||
|
resolve_aws_auth_env_var,
|
||||||
|
resolve_bedrock_region,
|
||||||
|
is_anthropic_bedrock_model,
|
||||||
|
)
|
||||||
|
# When the user explicitly selected bedrock (not auto-detected),
|
||||||
|
# trust boto3's credential chain — it handles IMDS, ECS task roles,
|
||||||
|
# Lambda execution roles, SSO, and other implicit sources that our
|
||||||
|
# env-var check can't detect.
|
||||||
|
is_explicit = requested_provider in ("bedrock", "aws", "aws-bedrock", "amazon-bedrock", "amazon")
|
||||||
|
if not is_explicit and not has_aws_credentials():
|
||||||
|
raise AuthError(
|
||||||
|
"No AWS credentials found for Bedrock. Configure one of:\n"
|
||||||
|
" - AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY\n"
|
||||||
|
" - AWS_PROFILE (for SSO / named profiles)\n"
|
||||||
|
" - IAM instance role (EC2, ECS, Lambda)\n"
|
||||||
|
"Or run 'aws configure' to set up credentials.",
|
||||||
|
code="no_aws_credentials",
|
||||||
|
)
|
||||||
|
# Read bedrock-specific config from config.yaml
|
||||||
|
from hermes_cli.config import load_config as _load_bedrock_config
|
||||||
|
_bedrock_cfg = _load_bedrock_config().get("bedrock", {})
|
||||||
|
# Region priority: config.yaml bedrock.region → env var → us-east-1
|
||||||
|
region = (_bedrock_cfg.get("region") or "").strip() or resolve_bedrock_region()
|
||||||
|
auth_source = resolve_aws_auth_env_var() or "aws-sdk-default-chain"
|
||||||
|
# Build guardrail config if configured
|
||||||
|
_gr = _bedrock_cfg.get("guardrail", {})
|
||||||
|
guardrail_config = None
|
||||||
|
if _gr.get("guardrail_identifier") and _gr.get("guardrail_version"):
|
||||||
|
guardrail_config = {
|
||||||
|
"guardrailIdentifier": _gr["guardrail_identifier"],
|
||||||
|
"guardrailVersion": _gr["guardrail_version"],
|
||||||
|
}
|
||||||
|
if _gr.get("stream_processing_mode"):
|
||||||
|
guardrail_config["streamProcessingMode"] = _gr["stream_processing_mode"]
|
||||||
|
if _gr.get("trace"):
|
||||||
|
guardrail_config["trace"] = _gr["trace"]
|
||||||
|
# Dual-path routing: Claude models use AnthropicBedrock SDK for full
|
||||||
|
# feature parity (prompt caching, thinking budgets, adaptive thinking).
|
||||||
|
# Non-Claude models use the Converse API for multi-model support.
|
||||||
|
_current_model = str(model_cfg.get("default") or "").strip()
|
||||||
|
if is_anthropic_bedrock_model(_current_model):
|
||||||
|
# Claude on Bedrock → AnthropicBedrock SDK → anthropic_messages path
|
||||||
|
runtime = {
|
||||||
|
"provider": "bedrock",
|
||||||
|
"api_mode": "anthropic_messages",
|
||||||
|
"base_url": f"https://bedrock-runtime.{region}.amazonaws.com",
|
||||||
|
"api_key": "aws-sdk",
|
||||||
|
"source": auth_source,
|
||||||
|
"region": region,
|
||||||
|
"bedrock_anthropic": True, # Signal to use AnthropicBedrock client
|
||||||
|
"requested_provider": requested_provider,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
# Non-Claude (Nova, DeepSeek, Llama, etc.) → Converse API
|
||||||
|
runtime = {
|
||||||
|
"provider": "bedrock",
|
||||||
|
"api_mode": "bedrock_converse",
|
||||||
|
"base_url": f"https://bedrock-runtime.{region}.amazonaws.com",
|
||||||
|
"api_key": "aws-sdk",
|
||||||
|
"source": auth_source,
|
||||||
|
"region": region,
|
||||||
|
"requested_provider": requested_provider,
|
||||||
|
}
|
||||||
|
if guardrail_config:
|
||||||
|
runtime["guardrail_config"] = guardrail_config
|
||||||
|
return runtime
|
||||||
|
|
||||||
# API-key providers (z.ai/GLM, Kimi, MiniMax, MiniMax-CN)
|
# API-key providers (z.ai/GLM, Kimi, MiniMax, MiniMax-CN)
|
||||||
pconfig = PROVIDER_REGISTRY.get(provider)
|
pconfig = PROVIDER_REGISTRY.get(provider)
|
||||||
if pconfig and pconfig.auth_type == "api_key":
|
if pconfig and pconfig.auth_type == "api_key":
|
||||||
|
|
|
||||||
|
|
@ -58,8 +58,7 @@ def resolve_config_path() -> Path:
|
||||||
|
|
||||||
Resolution order:
|
Resolution order:
|
||||||
1. $HERMES_HOME/honcho.json (profile-local, if it exists)
|
1. $HERMES_HOME/honcho.json (profile-local, if it exists)
|
||||||
2. ~/.hermes/honcho.json (default profile — shared host blocks live here)
|
2. ~/.honcho/config.json (global, cross-app interop)
|
||||||
3. ~/.honcho/config.json (global, cross-app interop)
|
|
||||||
|
|
||||||
Returns the global path if none exist (for first-time setup writes).
|
Returns the global path if none exist (for first-time setup writes).
|
||||||
"""
|
"""
|
||||||
|
|
@ -67,11 +66,6 @@ def resolve_config_path() -> Path:
|
||||||
if local_path.exists():
|
if local_path.exists():
|
||||||
return local_path
|
return local_path
|
||||||
|
|
||||||
# Default profile's config — host blocks accumulate here via setup/clone
|
|
||||||
default_path = Path.home() / ".hermes" / "honcho.json"
|
|
||||||
if default_path != local_path and default_path.exists():
|
|
||||||
return default_path
|
|
||||||
|
|
||||||
return GLOBAL_CONFIG_PATH
|
return GLOBAL_CONFIG_PATH
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -63,6 +63,7 @@ homeassistant = ["aiohttp>=3.9.0,<4"]
|
||||||
sms = ["aiohttp>=3.9.0,<4"]
|
sms = ["aiohttp>=3.9.0,<4"]
|
||||||
acp = ["agent-client-protocol>=0.9.0,<1.0"]
|
acp = ["agent-client-protocol>=0.9.0,<1.0"]
|
||||||
mistral = ["mistralai>=2.3.0,<3"]
|
mistral = ["mistralai>=2.3.0,<3"]
|
||||||
|
bedrock = ["boto3>=1.35.0,<2"]
|
||||||
termux = [
|
termux = [
|
||||||
# Tested Android / Termux path: keeps the core CLI feature-rich while
|
# Tested Android / Termux path: keeps the core CLI feature-rich while
|
||||||
# avoiding extras that currently depend on non-Android wheels (notably
|
# avoiding extras that currently depend on non-Android wheels (notably
|
||||||
|
|
@ -108,6 +109,7 @@ all = [
|
||||||
"hermes-agent[dingtalk]",
|
"hermes-agent[dingtalk]",
|
||||||
"hermes-agent[feishu]",
|
"hermes-agent[feishu]",
|
||||||
"hermes-agent[mistral]",
|
"hermes-agent[mistral]",
|
||||||
|
"hermes-agent[bedrock]",
|
||||||
"hermes-agent[web]",
|
"hermes-agent[web]",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
||||||
296
run_agent.py
296
run_agent.py
|
|
@ -685,7 +685,7 @@ class AIAgent:
|
||||||
self.provider = provider_name or ""
|
self.provider = provider_name or ""
|
||||||
self.acp_command = acp_command or command
|
self.acp_command = acp_command or command
|
||||||
self.acp_args = list(acp_args or args or [])
|
self.acp_args = list(acp_args or args or [])
|
||||||
if api_mode in {"chat_completions", "codex_responses", "anthropic_messages"}:
|
if api_mode in {"chat_completions", "codex_responses", "anthropic_messages", "bedrock_converse"}:
|
||||||
self.api_mode = api_mode
|
self.api_mode = api_mode
|
||||||
elif self.provider == "openai-codex":
|
elif self.provider == "openai-codex":
|
||||||
self.api_mode = "codex_responses"
|
self.api_mode = "codex_responses"
|
||||||
|
|
@ -700,6 +700,9 @@ class AIAgent:
|
||||||
# use a URL convention ending in /anthropic. Auto-detect these so the
|
# use a URL convention ending in /anthropic. Auto-detect these so the
|
||||||
# Anthropic Messages API adapter is used instead of chat completions.
|
# Anthropic Messages API adapter is used instead of chat completions.
|
||||||
self.api_mode = "anthropic_messages"
|
self.api_mode = "anthropic_messages"
|
||||||
|
elif self.provider == "bedrock" or "bedrock-runtime" in self._base_url_lower:
|
||||||
|
# AWS Bedrock — auto-detect from provider name or base URL.
|
||||||
|
self.api_mode = "bedrock_converse"
|
||||||
else:
|
else:
|
||||||
self.api_mode = "chat_completions"
|
self.api_mode = "chat_completions"
|
||||||
|
|
||||||
|
|
@ -721,8 +724,11 @@ class AIAgent:
|
||||||
# Responses there. ACP runtimes are excluded: CopilotACPClient
|
# Responses there. ACP runtimes are excluded: CopilotACPClient
|
||||||
# handles its own routing and does not implement the Responses API
|
# handles its own routing and does not implement the Responses API
|
||||||
# surface.
|
# surface.
|
||||||
|
# When api_mode was explicitly provided, respect it — the user
|
||||||
|
# knows what their endpoint supports (#10473).
|
||||||
if (
|
if (
|
||||||
self.api_mode == "chat_completions"
|
api_mode is None
|
||||||
|
and self.api_mode == "chat_completions"
|
||||||
and self.provider != "copilot-acp"
|
and self.provider != "copilot-acp"
|
||||||
and not str(self.base_url or "").lower().startswith("acp://copilot")
|
and not str(self.base_url or "").lower().startswith("acp://copilot")
|
||||||
and not str(self.base_url or "").lower().startswith("acp+tcp://")
|
and not str(self.base_url or "").lower().startswith("acp+tcp://")
|
||||||
|
|
@ -889,24 +895,70 @@ class AIAgent:
|
||||||
|
|
||||||
if self.api_mode == "anthropic_messages":
|
if self.api_mode == "anthropic_messages":
|
||||||
from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
|
from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
|
||||||
# Only fall back to ANTHROPIC_TOKEN when the provider is actually Anthropic.
|
# Bedrock + Claude → use AnthropicBedrock SDK for full feature parity
|
||||||
# Other anthropic_messages providers (MiniMax, Alibaba, etc.) must use their own API key.
|
# (prompt caching, thinking budgets, adaptive thinking).
|
||||||
# Falling back would send Anthropic credentials to third-party endpoints (Fixes #1739, #minimax-401).
|
_is_bedrock_anthropic = self.provider == "bedrock"
|
||||||
_is_native_anthropic = self.provider == "anthropic"
|
if _is_bedrock_anthropic:
|
||||||
effective_key = (api_key or resolve_anthropic_token() or "") if _is_native_anthropic else (api_key or "")
|
from agent.anthropic_adapter import build_anthropic_bedrock_client
|
||||||
self.api_key = effective_key
|
import re as _re
|
||||||
self._anthropic_api_key = effective_key
|
_region_match = _re.search(r"bedrock-runtime\.([a-z0-9-]+)\.", base_url or "")
|
||||||
self._anthropic_base_url = base_url
|
_br_region = _region_match.group(1) if _region_match else "us-east-1"
|
||||||
from agent.anthropic_adapter import _is_oauth_token as _is_oat
|
self._bedrock_region = _br_region
|
||||||
self._is_anthropic_oauth = _is_oat(effective_key)
|
self._anthropic_client = build_anthropic_bedrock_client(_br_region)
|
||||||
self._anthropic_client = build_anthropic_client(effective_key, base_url)
|
self._anthropic_api_key = "aws-sdk"
|
||||||
# No OpenAI client needed for Anthropic mode
|
self._anthropic_base_url = base_url
|
||||||
|
self._is_anthropic_oauth = False
|
||||||
|
self.api_key = "aws-sdk"
|
||||||
|
self.client = None
|
||||||
|
self._client_kwargs = {}
|
||||||
|
if not self.quiet_mode:
|
||||||
|
print(f"🤖 AI Agent initialized with model: {self.model} (AWS Bedrock + AnthropicBedrock SDK, {_br_region})")
|
||||||
|
else:
|
||||||
|
# Only fall back to ANTHROPIC_TOKEN when the provider is actually Anthropic.
|
||||||
|
# Other anthropic_messages providers (MiniMax, Alibaba, etc.) must use their own API key.
|
||||||
|
# Falling back would send Anthropic credentials to third-party endpoints (Fixes #1739, #minimax-401).
|
||||||
|
_is_native_anthropic = self.provider == "anthropic"
|
||||||
|
effective_key = (api_key or resolve_anthropic_token() or "") if _is_native_anthropic else (api_key or "")
|
||||||
|
self.api_key = effective_key
|
||||||
|
self._anthropic_api_key = effective_key
|
||||||
|
self._anthropic_base_url = base_url
|
||||||
|
from agent.anthropic_adapter import _is_oauth_token as _is_oat
|
||||||
|
self._is_anthropic_oauth = _is_oat(effective_key)
|
||||||
|
self._anthropic_client = build_anthropic_client(effective_key, base_url)
|
||||||
|
# No OpenAI client needed for Anthropic mode
|
||||||
|
self.client = None
|
||||||
|
self._client_kwargs = {}
|
||||||
|
if not self.quiet_mode:
|
||||||
|
print(f"🤖 AI Agent initialized with model: {self.model} (Anthropic native)")
|
||||||
|
if effective_key and len(effective_key) > 12:
|
||||||
|
print(f"🔑 Using token: {effective_key[:8]}...{effective_key[-4:]}")
|
||||||
|
elif self.api_mode == "bedrock_converse":
|
||||||
|
# AWS Bedrock — uses boto3 directly, no OpenAI client needed.
|
||||||
|
# Region is extracted from the base_url or defaults to us-east-1.
|
||||||
|
import re as _re
|
||||||
|
_region_match = _re.search(r"bedrock-runtime\.([a-z0-9-]+)\.", base_url or "")
|
||||||
|
self._bedrock_region = _region_match.group(1) if _region_match else "us-east-1"
|
||||||
|
# Guardrail config — read from config.yaml at init time.
|
||||||
|
self._bedrock_guardrail_config = None
|
||||||
|
try:
|
||||||
|
from hermes_cli.config import load_config as _load_br_cfg
|
||||||
|
_gr = _load_br_cfg().get("bedrock", {}).get("guardrail", {})
|
||||||
|
if _gr.get("guardrail_identifier") and _gr.get("guardrail_version"):
|
||||||
|
self._bedrock_guardrail_config = {
|
||||||
|
"guardrailIdentifier": _gr["guardrail_identifier"],
|
||||||
|
"guardrailVersion": _gr["guardrail_version"],
|
||||||
|
}
|
||||||
|
if _gr.get("stream_processing_mode"):
|
||||||
|
self._bedrock_guardrail_config["streamProcessingMode"] = _gr["stream_processing_mode"]
|
||||||
|
if _gr.get("trace"):
|
||||||
|
self._bedrock_guardrail_config["trace"] = _gr["trace"]
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
self.client = None
|
self.client = None
|
||||||
self._client_kwargs = {}
|
self._client_kwargs = {}
|
||||||
if not self.quiet_mode:
|
if not self.quiet_mode:
|
||||||
print(f"🤖 AI Agent initialized with model: {self.model} (Anthropic native)")
|
_gr_label = " + Guardrails" if self._bedrock_guardrail_config else ""
|
||||||
if effective_key and len(effective_key) > 12:
|
print(f"🤖 AI Agent initialized with model: {self.model} (AWS Bedrock, {self._bedrock_region}{_gr_label})")
|
||||||
print(f"🔑 Using token: {effective_key[:8]}...{effective_key[-4:]}")
|
|
||||||
else:
|
else:
|
||||||
if api_key and base_url:
|
if api_key and base_url:
|
||||||
# Explicit credentials from CLI/gateway — construct directly.
|
# Explicit credentials from CLI/gateway — construct directly.
|
||||||
|
|
@ -951,9 +1003,20 @@ class AIAgent:
|
||||||
# message instead of silently routing through OpenRouter.
|
# message instead of silently routing through OpenRouter.
|
||||||
_explicit = (self.provider or "").strip().lower()
|
_explicit = (self.provider or "").strip().lower()
|
||||||
if _explicit and _explicit not in ("auto", "openrouter", "custom"):
|
if _explicit and _explicit not in ("auto", "openrouter", "custom"):
|
||||||
|
# Look up the actual env var name from the provider
|
||||||
|
# config — some providers use non-standard names
|
||||||
|
# (e.g. alibaba → DASHSCOPE_API_KEY, not ALIBABA_API_KEY).
|
||||||
|
_env_hint = f"{_explicit.upper()}_API_KEY"
|
||||||
|
try:
|
||||||
|
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||||
|
_pcfg = PROVIDER_REGISTRY.get(_explicit)
|
||||||
|
if _pcfg and _pcfg.api_key_env_vars:
|
||||||
|
_env_hint = _pcfg.api_key_env_vars[0]
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"Provider '{_explicit}' is set in config.yaml but no API key "
|
f"Provider '{_explicit}' is set in config.yaml but no API key "
|
||||||
f"was found. Set the {_explicit.upper()}_API_KEY environment "
|
f"was found. Set the {_env_hint} environment "
|
||||||
f"variable, or switch to a different provider with `hermes model`."
|
f"variable, or switch to a different provider with `hermes model`."
|
||||||
)
|
)
|
||||||
# Final fallback: try raw OpenRouter key
|
# Final fallback: try raw OpenRouter key
|
||||||
|
|
@ -1217,6 +1280,15 @@ class AIAgent:
|
||||||
"hermes_home": str(_ghh()),
|
"hermes_home": str(_ghh()),
|
||||||
"agent_context": "primary",
|
"agent_context": "primary",
|
||||||
}
|
}
|
||||||
|
# Thread session title for memory provider scoping
|
||||||
|
# (e.g. honcho uses this to derive chat-scoped session keys)
|
||||||
|
if self._session_db:
|
||||||
|
try:
|
||||||
|
_st = self._session_db.get_session_title(self.session_id)
|
||||||
|
if _st:
|
||||||
|
_init_kwargs["session_title"] = _st
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
# Thread gateway user identity for per-user memory scoping
|
# Thread gateway user identity for per-user memory scoping
|
||||||
if self._user_id:
|
if self._user_id:
|
||||||
_init_kwargs["user_id"] = self._user_id
|
_init_kwargs["user_id"] = self._user_id
|
||||||
|
|
@ -4206,6 +4278,9 @@ class AIAgent:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _create_openai_client(self, client_kwargs: dict, *, reason: str, shared: bool) -> Any:
|
def _create_openai_client(self, client_kwargs: dict, *, reason: str, shared: bool) -> Any:
|
||||||
|
from agent.auxiliary_client import _validate_base_url, _validate_proxy_env_urls
|
||||||
|
_validate_proxy_env_urls()
|
||||||
|
_validate_base_url(client_kwargs.get("base_url"))
|
||||||
if self.provider == "copilot-acp" or str(client_kwargs.get("base_url", "")).startswith("acp://copilot"):
|
if self.provider == "copilot-acp" or str(client_kwargs.get("base_url", "")).startswith("acp://copilot"):
|
||||||
from agent.copilot_acp_client import CopilotACPClient
|
from agent.copilot_acp_client import CopilotACPClient
|
||||||
|
|
||||||
|
|
@ -4890,6 +4965,17 @@ class AIAgent:
|
||||||
)
|
)
|
||||||
elif self.api_mode == "anthropic_messages":
|
elif self.api_mode == "anthropic_messages":
|
||||||
result["response"] = self._anthropic_messages_create(api_kwargs)
|
result["response"] = self._anthropic_messages_create(api_kwargs)
|
||||||
|
elif self.api_mode == "bedrock_converse":
|
||||||
|
# Bedrock uses boto3 directly — no OpenAI client needed.
|
||||||
|
from agent.bedrock_adapter import (
|
||||||
|
_get_bedrock_runtime_client,
|
||||||
|
normalize_converse_response,
|
||||||
|
)
|
||||||
|
region = api_kwargs.pop("__bedrock_region__", "us-east-1")
|
||||||
|
api_kwargs.pop("__bedrock_converse__", None)
|
||||||
|
client = _get_bedrock_runtime_client(region)
|
||||||
|
raw_response = client.converse(**api_kwargs)
|
||||||
|
result["response"] = normalize_converse_response(raw_response)
|
||||||
else:
|
else:
|
||||||
request_client_holder["client"] = self._create_request_openai_client(reason="chat_completion_request")
|
request_client_holder["client"] = self._create_request_openai_client(reason="chat_completion_request")
|
||||||
result["response"] = request_client_holder["client"].chat.completions.create(**api_kwargs)
|
result["response"] = request_client_holder["client"].chat.completions.create(**api_kwargs)
|
||||||
|
|
@ -5129,6 +5215,65 @@ class AIAgent:
|
||||||
finally:
|
finally:
|
||||||
self._codex_on_first_delta = None
|
self._codex_on_first_delta = None
|
||||||
|
|
||||||
|
# Bedrock Converse uses boto3's converse_stream() with real-time delta
|
||||||
|
# callbacks — same UX as Anthropic and chat_completions streaming.
|
||||||
|
if self.api_mode == "bedrock_converse":
|
||||||
|
result = {"response": None, "error": None}
|
||||||
|
first_delta_fired = {"done": False}
|
||||||
|
deltas_were_sent = {"yes": False}
|
||||||
|
|
||||||
|
def _fire_first():
|
||||||
|
if not first_delta_fired["done"] and on_first_delta:
|
||||||
|
first_delta_fired["done"] = True
|
||||||
|
try:
|
||||||
|
on_first_delta()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _bedrock_call():
|
||||||
|
try:
|
||||||
|
from agent.bedrock_adapter import (
|
||||||
|
_get_bedrock_runtime_client,
|
||||||
|
stream_converse_with_callbacks,
|
||||||
|
)
|
||||||
|
region = api_kwargs.pop("__bedrock_region__", "us-east-1")
|
||||||
|
api_kwargs.pop("__bedrock_converse__", None)
|
||||||
|
client = _get_bedrock_runtime_client(region)
|
||||||
|
raw_response = client.converse_stream(**api_kwargs)
|
||||||
|
|
||||||
|
def _on_text(text):
|
||||||
|
_fire_first()
|
||||||
|
self._fire_stream_delta(text)
|
||||||
|
deltas_were_sent["yes"] = True
|
||||||
|
|
||||||
|
def _on_tool(name):
|
||||||
|
_fire_first()
|
||||||
|
self._fire_tool_gen_started(name)
|
||||||
|
|
||||||
|
def _on_reasoning(text):
|
||||||
|
_fire_first()
|
||||||
|
self._fire_reasoning_delta(text)
|
||||||
|
|
||||||
|
result["response"] = stream_converse_with_callbacks(
|
||||||
|
raw_response,
|
||||||
|
on_text_delta=_on_text if self._has_stream_consumers() else None,
|
||||||
|
on_tool_start=_on_tool,
|
||||||
|
on_reasoning_delta=_on_reasoning if self.reasoning_callback or self.stream_delta_callback else None,
|
||||||
|
on_interrupt_check=lambda: self._interrupt_requested,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
result["error"] = e
|
||||||
|
|
||||||
|
t = threading.Thread(target=_bedrock_call, daemon=True)
|
||||||
|
t.start()
|
||||||
|
while t.is_alive():
|
||||||
|
t.join(timeout=0.3)
|
||||||
|
if self._interrupt_requested:
|
||||||
|
raise InterruptedError("Agent interrupted during Bedrock API call")
|
||||||
|
if result["error"] is not None:
|
||||||
|
raise result["error"]
|
||||||
|
return result["response"]
|
||||||
|
|
||||||
result = {"response": None, "error": None}
|
result = {"response": None, "error": None}
|
||||||
request_client_holder = {"client": None}
|
request_client_holder = {"client": None}
|
||||||
first_delta_fired = {"done": False}
|
first_delta_fired = {"done": False}
|
||||||
|
|
@ -5760,6 +5905,8 @@ class AIAgent:
|
||||||
# provider-specific exceptions like Copilot gpt-5-mini on
|
# provider-specific exceptions like Copilot gpt-5-mini on
|
||||||
# chat completions.
|
# chat completions.
|
||||||
fb_api_mode = "codex_responses"
|
fb_api_mode = "codex_responses"
|
||||||
|
elif fb_provider == "bedrock" or "bedrock-runtime" in fb_base_url.lower():
|
||||||
|
fb_api_mode = "bedrock_converse"
|
||||||
|
|
||||||
old_model = self.model
|
old_model = self.model
|
||||||
self.model = fb_model
|
self.model = fb_model
|
||||||
|
|
@ -6239,6 +6386,25 @@ class AIAgent:
|
||||||
fast_mode=(self.request_overrides or {}).get("speed") == "fast",
|
fast_mode=(self.request_overrides or {}).get("speed") == "fast",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# AWS Bedrock native Converse API — bypasses the OpenAI client entirely.
|
||||||
|
# The adapter handles message/tool conversion and boto3 calls directly.
|
||||||
|
if self.api_mode == "bedrock_converse":
|
||||||
|
from agent.bedrock_adapter import build_converse_kwargs
|
||||||
|
region = getattr(self, "_bedrock_region", None) or "us-east-1"
|
||||||
|
guardrail = getattr(self, "_bedrock_guardrail_config", None)
|
||||||
|
return {
|
||||||
|
"__bedrock_converse__": True,
|
||||||
|
"__bedrock_region__": region,
|
||||||
|
**build_converse_kwargs(
|
||||||
|
model=self.model,
|
||||||
|
messages=api_messages,
|
||||||
|
tools=self.tools,
|
||||||
|
max_tokens=self.max_tokens or 4096,
|
||||||
|
temperature=None, # Let the model use its default
|
||||||
|
guardrail_config=guardrail,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
if self.api_mode == "codex_responses":
|
if self.api_mode == "codex_responses":
|
||||||
instructions = ""
|
instructions = ""
|
||||||
payload_messages = api_messages
|
payload_messages = api_messages
|
||||||
|
|
@ -8504,6 +8670,53 @@ class AIAgent:
|
||||||
api_kwargs = None # Guard against UnboundLocalError in except handler
|
api_kwargs = None # Guard against UnboundLocalError in except handler
|
||||||
|
|
||||||
while retry_count < max_retries:
|
while retry_count < max_retries:
|
||||||
|
# ── Nous Portal rate limit guard ──────────────────────
|
||||||
|
# If another session already recorded that Nous is rate-
|
||||||
|
# limited, skip the API call entirely. Each attempt
|
||||||
|
# (including SDK-level retries) counts against RPH and
|
||||||
|
# deepens the rate limit hole.
|
||||||
|
if self.provider == "nous":
|
||||||
|
try:
|
||||||
|
from agent.nous_rate_guard import (
|
||||||
|
nous_rate_limit_remaining,
|
||||||
|
format_remaining as _fmt_nous_remaining,
|
||||||
|
)
|
||||||
|
_nous_remaining = nous_rate_limit_remaining()
|
||||||
|
if _nous_remaining is not None and _nous_remaining > 0:
|
||||||
|
_nous_msg = (
|
||||||
|
f"Nous Portal rate limit active — "
|
||||||
|
f"resets in {_fmt_nous_remaining(_nous_remaining)}."
|
||||||
|
)
|
||||||
|
self._vprint(
|
||||||
|
f"{self.log_prefix}⏳ {_nous_msg} Trying fallback...",
|
||||||
|
force=True,
|
||||||
|
)
|
||||||
|
self._emit_status(f"⏳ {_nous_msg}")
|
||||||
|
if self._try_activate_fallback():
|
||||||
|
retry_count = 0
|
||||||
|
compression_attempts = 0
|
||||||
|
primary_recovery_attempted = False
|
||||||
|
continue
|
||||||
|
# No fallback available — return with clear message
|
||||||
|
self._persist_session(messages, conversation_history)
|
||||||
|
return {
|
||||||
|
"final_response": (
|
||||||
|
f"⏳ {_nous_msg}\n\n"
|
||||||
|
"No fallback provider available. "
|
||||||
|
"Try again after the reset, or add a "
|
||||||
|
"fallback provider in config.yaml."
|
||||||
|
),
|
||||||
|
"messages": messages,
|
||||||
|
"api_calls": api_call_count,
|
||||||
|
"completed": False,
|
||||||
|
"failed": True,
|
||||||
|
"error": _nous_msg,
|
||||||
|
}
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
except Exception:
|
||||||
|
pass # Never let rate guard break the agent loop
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self._reset_stream_delivery_tracking()
|
self._reset_stream_delivery_tracking()
|
||||||
api_kwargs = self._build_api_kwargs(api_messages)
|
api_kwargs = self._build_api_kwargs(api_messages)
|
||||||
|
|
@ -8816,7 +9029,7 @@ class AIAgent:
|
||||||
# targeted error instead of wasting 3 API calls.
|
# targeted error instead of wasting 3 API calls.
|
||||||
_trunc_content = None
|
_trunc_content = None
|
||||||
_trunc_has_tool_calls = False
|
_trunc_has_tool_calls = False
|
||||||
if self.api_mode == "chat_completions":
|
if self.api_mode in ("chat_completions", "bedrock_converse"):
|
||||||
_trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None
|
_trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None
|
||||||
_trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None
|
_trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None
|
||||||
_trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False
|
_trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False
|
||||||
|
|
@ -8885,7 +9098,7 @@ class AIAgent:
|
||||||
"error": _exhaust_error,
|
"error": _exhaust_error,
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.api_mode == "chat_completions":
|
if self.api_mode in ("chat_completions", "bedrock_converse"):
|
||||||
assistant_message = response.choices[0].message
|
assistant_message = response.choices[0].message
|
||||||
if not assistant_message.tool_calls:
|
if not assistant_message.tool_calls:
|
||||||
length_continue_retries += 1
|
length_continue_retries += 1
|
||||||
|
|
@ -8925,7 +9138,7 @@ class AIAgent:
|
||||||
"error": "Response remained truncated after 3 continuation attempts",
|
"error": "Response remained truncated after 3 continuation attempts",
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.api_mode == "chat_completions":
|
if self.api_mode in ("chat_completions", "bedrock_converse"):
|
||||||
assistant_message = response.choices[0].message
|
assistant_message = response.choices[0].message
|
||||||
if assistant_message.tool_calls:
|
if assistant_message.tool_calls:
|
||||||
if truncated_tool_call_retries < 1:
|
if truncated_tool_call_retries < 1:
|
||||||
|
|
@ -9092,6 +9305,15 @@ class AIAgent:
|
||||||
self._vprint(f"{self.log_prefix} 💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)")
|
self._vprint(f"{self.log_prefix} 💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)")
|
||||||
|
|
||||||
has_retried_429 = False # Reset on success
|
has_retried_429 = False # Reset on success
|
||||||
|
# Clear Nous rate limit state on successful request —
|
||||||
|
# proves the limit has reset and other sessions can
|
||||||
|
# resume hitting Nous.
|
||||||
|
if self.provider == "nous":
|
||||||
|
try:
|
||||||
|
from agent.nous_rate_guard import clear_nous_rate_limit
|
||||||
|
clear_nous_rate_limit()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
self._touch_activity(f"API call #{api_call_count} completed")
|
self._touch_activity(f"API call #{api_call_count} completed")
|
||||||
break # Success, exit retry loop
|
break # Success, exit retry loop
|
||||||
|
|
||||||
|
|
@ -9503,6 +9725,38 @@ class AIAgent:
|
||||||
primary_recovery_attempted = False
|
primary_recovery_attempted = False
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# ── Nous Portal: record rate limit & skip retries ─────
|
||||||
|
# When Nous returns a 429, record the reset time to a
|
||||||
|
# shared file so ALL sessions (cron, gateway, auxiliary)
|
||||||
|
# know not to pile on. Then skip further retries —
|
||||||
|
# each one burns another RPH request and deepens the
|
||||||
|
# rate limit hole. The retry loop's top-of-iteration
|
||||||
|
# guard will catch this on the next pass and try
|
||||||
|
# fallback or bail with a clear message.
|
||||||
|
if (
|
||||||
|
is_rate_limited
|
||||||
|
and self.provider == "nous"
|
||||||
|
and classified.reason == FailoverReason.rate_limit
|
||||||
|
and not recovered_with_pool
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
from agent.nous_rate_guard import record_nous_rate_limit
|
||||||
|
_err_resp = getattr(api_error, "response", None)
|
||||||
|
_err_hdrs = (
|
||||||
|
getattr(_err_resp, "headers", None)
|
||||||
|
if _err_resp else None
|
||||||
|
)
|
||||||
|
record_nous_rate_limit(
|
||||||
|
headers=_err_hdrs,
|
||||||
|
error_context=error_context,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
# Skip straight to max_retries — the top-of-loop
|
||||||
|
# guard will handle fallback or bail cleanly.
|
||||||
|
retry_count = max_retries
|
||||||
|
continue
|
||||||
|
|
||||||
is_payload_too_large = (
|
is_payload_too_large = (
|
||||||
classified.reason == FailoverReason.payload_too_large
|
classified.reason == FailoverReason.payload_too_large
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,7 @@ BOLD='\033[1m'
|
||||||
# Configuration
|
# Configuration
|
||||||
REPO_URL_SSH="git@github.com:NousResearch/hermes-agent.git"
|
REPO_URL_SSH="git@github.com:NousResearch/hermes-agent.git"
|
||||||
REPO_URL_HTTPS="https://github.com/NousResearch/hermes-agent.git"
|
REPO_URL_HTTPS="https://github.com/NousResearch/hermes-agent.git"
|
||||||
HERMES_HOME="$HOME/.hermes"
|
HERMES_HOME="${HERMES_HOME:-$HOME/.hermes}"
|
||||||
INSTALL_DIR="${HERMES_INSTALL_DIR:-$HERMES_HOME/hermes-agent}"
|
INSTALL_DIR="${HERMES_INSTALL_DIR:-$HERMES_HOME/hermes-agent}"
|
||||||
PYTHON_VERSION="3.11"
|
PYTHON_VERSION="3.11"
|
||||||
NODE_VERSION="22"
|
NODE_VERSION="22"
|
||||||
|
|
@ -66,6 +66,10 @@ while [[ $# -gt 0 ]]; do
|
||||||
INSTALL_DIR="$2"
|
INSTALL_DIR="$2"
|
||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
|
--hermes-home)
|
||||||
|
HERMES_HOME="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
-h|--help)
|
-h|--help)
|
||||||
echo "Hermes Agent Installer"
|
echo "Hermes Agent Installer"
|
||||||
echo ""
|
echo ""
|
||||||
|
|
@ -76,6 +80,7 @@ while [[ $# -gt 0 ]]; do
|
||||||
echo " --skip-setup Skip interactive setup wizard"
|
echo " --skip-setup Skip interactive setup wizard"
|
||||||
echo " --branch NAME Git branch to install (default: main)"
|
echo " --branch NAME Git branch to install (default: main)"
|
||||||
echo " --dir PATH Installation directory (default: ~/.hermes/hermes-agent)"
|
echo " --dir PATH Installation directory (default: ~/.hermes/hermes-agent)"
|
||||||
|
echo " --hermes-home PATH Data directory (default: ~/.hermes, or \$HERMES_HOME)"
|
||||||
echo " -h, --help Show this help"
|
echo " -h, --help Show this help"
|
||||||
exit 0
|
exit 0
|
||||||
;;
|
;;
|
||||||
|
|
|
||||||
|
|
@ -62,6 +62,7 @@ AUTHOR_MAP = {
|
||||||
"258577966+voidborne-d@users.noreply.github.com": "voidborne-d",
|
"258577966+voidborne-d@users.noreply.github.com": "voidborne-d",
|
||||||
"70424851+insecurejezza@users.noreply.github.com": "insecurejezza",
|
"70424851+insecurejezza@users.noreply.github.com": "insecurejezza",
|
||||||
"259807879+Bartok9@users.noreply.github.com": "Bartok9",
|
"259807879+Bartok9@users.noreply.github.com": "Bartok9",
|
||||||
|
"241404605+MestreY0d4-Uninter@users.noreply.github.com": "MestreY0d4-Uninter",
|
||||||
"268667990+Roy-oss1@users.noreply.github.com": "Roy-oss1",
|
"268667990+Roy-oss1@users.noreply.github.com": "Roy-oss1",
|
||||||
"241404605+MestreY0d4-Uninter@users.noreply.github.com": "MestreY0d4-Uninter",
|
"241404605+MestreY0d4-Uninter@users.noreply.github.com": "MestreY0d4-Uninter",
|
||||||
# contributors (manual mapping from git names)
|
# contributors (manual mapping from git names)
|
||||||
|
|
@ -81,6 +82,7 @@ AUTHOR_MAP = {
|
||||||
"brooklyn.bb.nicholson@gmail.com": "brooklynnicholson",
|
"brooklyn.bb.nicholson@gmail.com": "brooklynnicholson",
|
||||||
"4317663+helix4u@users.noreply.github.com": "helix4u",
|
"4317663+helix4u@users.noreply.github.com": "helix4u",
|
||||||
"331214+counterposition@users.noreply.github.com": "counterposition",
|
"331214+counterposition@users.noreply.github.com": "counterposition",
|
||||||
|
"blspear@gmail.com": "BrennerSpear",
|
||||||
"gpickett00@gmail.com": "gpickett00",
|
"gpickett00@gmail.com": "gpickett00",
|
||||||
"mcosma@gmail.com": "wakamex",
|
"mcosma@gmail.com": "wakamex",
|
||||||
"clawdia.nash@proton.me": "clawdia-nash",
|
"clawdia.nash@proton.me": "clawdia-nash",
|
||||||
|
|
|
||||||
|
|
@ -313,7 +313,7 @@ Type these during an interactive chat session.
|
||||||
```
|
```
|
||||||
~/.hermes/config.yaml Main configuration
|
~/.hermes/config.yaml Main configuration
|
||||||
~/.hermes/.env API keys and secrets
|
~/.hermes/.env API keys and secrets
|
||||||
~/.hermes/skills/ Installed skills
|
$HERMES_HOME/skills/ Installed skills
|
||||||
~/.hermes/sessions/ Session transcripts
|
~/.hermes/sessions/ Session transcripts
|
||||||
~/.hermes/logs/ Gateway and error logs
|
~/.hermes/logs/ Gateway and error logs
|
||||||
~/.hermes/auth.json OAuth tokens and credential pools
|
~/.hermes/auth.json OAuth tokens and credential pools
|
||||||
|
|
|
||||||
|
|
@ -334,7 +334,7 @@ When the user asks you to "review PR #N", "look at this PR", or gives you a PR U
|
||||||
### Step 1: Set up environment
|
### Step 1: Set up environment
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
source ~/.hermes/skills/github/github-auth/scripts/gh-env.sh
|
source "${HERMES_HOME:-$HOME/.hermes}/skills/github/github-auth/scripts/gh-env.sh"
|
||||||
# Or run the inline setup block from the top of this skill
|
# Or run the inline setup block from the top of this skill
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ All requests need: `-H "Authorization: token $GITHUB_TOKEN"`
|
||||||
|
|
||||||
Use the `gh-env.sh` helper to set `$GITHUB_TOKEN`, `$GH_OWNER`, `$GH_REPO` automatically:
|
Use the `gh-env.sh` helper to set `$GITHUB_TOKEN`, `$GH_OWNER`, `$GH_REPO` automatically:
|
||||||
```bash
|
```bash
|
||||||
source ~/.hermes/skills/github/github-auth/scripts/gh-env.sh
|
source "${HERMES_HOME:-$HOME/.hermes}/skills/github/github-auth/scripts/gh-env.sh"
|
||||||
```
|
```
|
||||||
|
|
||||||
## Repositories
|
## Repositories
|
||||||
|
|
|
||||||
|
|
@ -32,7 +32,7 @@ on CLI, Telegram, Discord, or any platform.
|
||||||
Define a shorthand first:
|
Define a shorthand first:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
GSETUP="python ~/.hermes/skills/productivity/google-workspace/scripts/setup.py"
|
GSETUP="python ${HERMES_HOME:-$HOME/.hermes}/skills/productivity/google-workspace/scripts/setup.py"
|
||||||
```
|
```
|
||||||
|
|
||||||
### Step 0: Check if already set up
|
### Step 0: Check if already set up
|
||||||
|
|
@ -163,7 +163,7 @@ Should print `AUTHENTICATED`. Setup is complete — token refreshes automaticall
|
||||||
All commands go through the API script. Set `GAPI` as a shorthand:
|
All commands go through the API script. Set `GAPI` as a shorthand:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
GAPI="python ~/.hermes/skills/productivity/google-workspace/scripts/google_api.py"
|
GAPI="python ${HERMES_HOME:-$HOME/.hermes}/skills/productivity/google-workspace/scripts/google_api.py"
|
||||||
```
|
```
|
||||||
|
|
||||||
### Gmail
|
### Gmail
|
||||||
|
|
|
||||||
|
|
@ -60,7 +60,7 @@ The fastest path — auto-detect the model, test strategies, and lock in the win
|
||||||
# In execute_code — use the loader to avoid exec-scoping issues:
|
# In execute_code — use the loader to avoid exec-scoping issues:
|
||||||
import os
|
import os
|
||||||
exec(open(os.path.expanduser(
|
exec(open(os.path.expanduser(
|
||||||
"~/.hermes/skills/red-teaming/godmode/scripts/load_godmode.py"
|
os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), "skills/red-teaming/godmode/scripts/load_godmode.py")
|
||||||
)).read())
|
)).read())
|
||||||
|
|
||||||
# Auto-detect model from config and jailbreak it
|
# Auto-detect model from config and jailbreak it
|
||||||
|
|
@ -192,7 +192,7 @@ python3 scripts/parseltongue.py "How do I hack into a WiFi network?" --tier stan
|
||||||
Or use `execute_code` inline:
|
Or use `execute_code` inline:
|
||||||
```python
|
```python
|
||||||
# Load the parseltongue module
|
# Load the parseltongue module
|
||||||
exec(open(os.path.expanduser("~/.hermes/skills/red-teaming/godmode/scripts/parseltongue.py")).read())
|
exec(open(os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), "skills/red-teaming/godmode/scripts/parseltongue.py")).read())
|
||||||
|
|
||||||
query = "How do I hack into a WiFi network?"
|
query = "How do I hack into a WiFi network?"
|
||||||
variants = generate_variants(query, tier="standard")
|
variants = generate_variants(query, tier="standard")
|
||||||
|
|
@ -229,7 +229,7 @@ Race multiple models against the same query, score responses, pick the winner:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
# Via execute_code
|
# Via execute_code
|
||||||
exec(open(os.path.expanduser("~/.hermes/skills/red-teaming/godmode/scripts/godmode_race.py")).read())
|
exec(open(os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), "skills/red-teaming/godmode/scripts/godmode_race.py")).read())
|
||||||
|
|
||||||
result = race_models(
|
result = race_models(
|
||||||
query="Explain how SQL injection works with a practical example",
|
query="Explain how SQL injection works with a practical example",
|
||||||
|
|
|
||||||
|
|
@ -114,7 +114,7 @@ hermes
|
||||||
### Via the GODMODE CLASSIC racer script
|
### Via the GODMODE CLASSIC racer script
|
||||||
|
|
||||||
```python
|
```python
|
||||||
exec(open(os.path.expanduser("~/.hermes/skills/red-teaming/godmode/scripts/godmode_race.py")).read())
|
exec(open(os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), "skills/red-teaming/godmode/scripts/godmode_race.py")).read())
|
||||||
result = race_godmode_classic("Your query here")
|
result = race_godmode_classic("Your query here")
|
||||||
print(f"Winner: {result['codename']} — Score: {result['score']}")
|
print(f"Winner: {result['codename']} — Score: {result['score']}")
|
||||||
print(result['content'])
|
print(result['content'])
|
||||||
|
|
|
||||||
|
|
@ -129,7 +129,7 @@ These don't auto-reject but reduce the response score:
|
||||||
## Using in Python
|
## Using in Python
|
||||||
|
|
||||||
```python
|
```python
|
||||||
exec(open(os.path.expanduser("~/.hermes/skills/red-teaming/godmode/scripts/godmode_race.py")).read())
|
exec(open(os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), "skills/red-teaming/godmode/scripts/godmode_race.py")).read())
|
||||||
|
|
||||||
# Check if a response is a refusal
|
# Check if a response is a refusal
|
||||||
text = "I'm sorry, but I can't assist with that request."
|
text = "I'm sorry, but I can't assist with that request."
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ finds what works, and locks it in by writing config.yaml + prefill.json.
|
||||||
|
|
||||||
Usage in execute_code:
|
Usage in execute_code:
|
||||||
exec(open(os.path.expanduser(
|
exec(open(os.path.expanduser(
|
||||||
"~/.hermes/skills/red-teaming/godmode/scripts/auto_jailbreak.py"
|
os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), "skills/red-teaming/godmode/scripts/auto_jailbreak.py")
|
||||||
)).read())
|
)).read())
|
||||||
|
|
||||||
result = auto_jailbreak() # Uses current model from config
|
result = auto_jailbreak() # Uses current model from config
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ Queries multiple models in parallel via OpenRouter, scores responses
|
||||||
on quality/filteredness/speed, returns the best unfiltered answer.
|
on quality/filteredness/speed, returns the best unfiltered answer.
|
||||||
|
|
||||||
Usage in execute_code:
|
Usage in execute_code:
|
||||||
exec(open(os.path.expanduser("~/.hermes/skills/red-teaming/godmode/scripts/godmode_race.py")).read())
|
exec(open(os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), "skills/red-teaming/godmode/scripts/godmode_race.py")).read())
|
||||||
|
|
||||||
result = race_models(
|
result = race_models(
|
||||||
query="Your query here",
|
query="Your query here",
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ Loader for G0DM0D3 scripts. Handles the exec-scoping issues.
|
||||||
|
|
||||||
Usage in execute_code:
|
Usage in execute_code:
|
||||||
exec(open(os.path.expanduser(
|
exec(open(os.path.expanduser(
|
||||||
"~/.hermes/skills/red-teaming/godmode/scripts/load_godmode.py"
|
os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), "skills/red-teaming/godmode/scripts/load_godmode.py")
|
||||||
)).read())
|
)).read())
|
||||||
|
|
||||||
# Now all functions are available:
|
# Now all functions are available:
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ Usage:
|
||||||
python parseltongue.py "How do I hack a WiFi network?" --tier standard
|
python parseltongue.py "How do I hack a WiFi network?" --tier standard
|
||||||
|
|
||||||
# As a module in execute_code
|
# As a module in execute_code
|
||||||
exec(open("~/.hermes/skills/red-teaming/godmode/scripts/parseltongue.py").read())
|
exec(open(os.path.join(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")), "skills/red-teaming/godmode/scripts/parseltongue.py")).read())
|
||||||
variants = generate_variants("How do I hack a WiFi network?", tier="standard")
|
variants = generate_variants("How do I hack a WiFi network?", tier="standard")
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
|
||||||
1232
tests/agent/test_bedrock_adapter.py
Normal file
1232
tests/agent/test_bedrock_adapter.py
Normal file
File diff suppressed because it is too large
Load diff
269
tests/agent/test_bedrock_integration.py
Normal file
269
tests/agent/test_bedrock_integration.py
Normal file
|
|
@ -0,0 +1,269 @@
|
||||||
|
"""Integration tests for the AWS Bedrock provider wiring.
|
||||||
|
|
||||||
|
Verifies that the Bedrock provider is correctly registered in the
|
||||||
|
provider registry, model catalog, and runtime resolution pipeline.
|
||||||
|
These tests do NOT require AWS credentials or boto3 — all AWS calls
|
||||||
|
are mocked.
|
||||||
|
|
||||||
|
Note: Tests that import ``hermes_cli.auth`` or ``hermes_cli.runtime_provider``
|
||||||
|
require Python 3.10+ due to ``str | None`` type syntax in the import chain.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
class TestProviderRegistry:
|
||||||
|
"""Verify Bedrock is registered in PROVIDER_REGISTRY."""
|
||||||
|
|
||||||
|
def test_bedrock_in_registry(self):
|
||||||
|
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||||
|
assert "bedrock" in PROVIDER_REGISTRY
|
||||||
|
|
||||||
|
def test_bedrock_auth_type_is_aws_sdk(self):
|
||||||
|
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||||
|
pconfig = PROVIDER_REGISTRY["bedrock"]
|
||||||
|
assert pconfig.auth_type == "aws_sdk"
|
||||||
|
|
||||||
|
def test_bedrock_has_no_api_key_env_vars(self):
|
||||||
|
"""Bedrock uses the AWS SDK credential chain, not API keys."""
|
||||||
|
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||||
|
pconfig = PROVIDER_REGISTRY["bedrock"]
|
||||||
|
assert pconfig.api_key_env_vars == ()
|
||||||
|
|
||||||
|
def test_bedrock_base_url_env_var(self):
|
||||||
|
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||||
|
pconfig = PROVIDER_REGISTRY["bedrock"]
|
||||||
|
assert pconfig.base_url_env_var == "BEDROCK_BASE_URL"
|
||||||
|
|
||||||
|
|
||||||
|
class TestProviderAliases:
|
||||||
|
"""Verify Bedrock aliases resolve correctly."""
|
||||||
|
|
||||||
|
def test_aws_alias(self):
|
||||||
|
from hermes_cli.models import _PROVIDER_ALIASES
|
||||||
|
assert _PROVIDER_ALIASES.get("aws") == "bedrock"
|
||||||
|
|
||||||
|
def test_aws_bedrock_alias(self):
|
||||||
|
from hermes_cli.models import _PROVIDER_ALIASES
|
||||||
|
assert _PROVIDER_ALIASES.get("aws-bedrock") == "bedrock"
|
||||||
|
|
||||||
|
def test_amazon_bedrock_alias(self):
|
||||||
|
from hermes_cli.models import _PROVIDER_ALIASES
|
||||||
|
assert _PROVIDER_ALIASES.get("amazon-bedrock") == "bedrock"
|
||||||
|
|
||||||
|
def test_amazon_alias(self):
|
||||||
|
from hermes_cli.models import _PROVIDER_ALIASES
|
||||||
|
assert _PROVIDER_ALIASES.get("amazon") == "bedrock"
|
||||||
|
|
||||||
|
|
||||||
|
class TestProviderLabels:
|
||||||
|
"""Verify Bedrock appears in provider labels."""
|
||||||
|
|
||||||
|
def test_bedrock_label(self):
|
||||||
|
from hermes_cli.models import _PROVIDER_LABELS
|
||||||
|
assert _PROVIDER_LABELS.get("bedrock") == "AWS Bedrock"
|
||||||
|
|
||||||
|
|
||||||
|
class TestModelCatalog:
|
||||||
|
"""Verify Bedrock has a static model fallback list."""
|
||||||
|
|
||||||
|
def test_bedrock_has_curated_models(self):
|
||||||
|
from hermes_cli.models import _PROVIDER_MODELS
|
||||||
|
models = _PROVIDER_MODELS.get("bedrock", [])
|
||||||
|
assert len(models) > 0
|
||||||
|
|
||||||
|
def test_bedrock_models_include_claude(self):
|
||||||
|
from hermes_cli.models import _PROVIDER_MODELS
|
||||||
|
models = _PROVIDER_MODELS.get("bedrock", [])
|
||||||
|
claude_models = [m for m in models if "anthropic.claude" in m]
|
||||||
|
assert len(claude_models) > 0
|
||||||
|
|
||||||
|
def test_bedrock_models_include_nova(self):
|
||||||
|
from hermes_cli.models import _PROVIDER_MODELS
|
||||||
|
models = _PROVIDER_MODELS.get("bedrock", [])
|
||||||
|
nova_models = [m for m in models if "amazon.nova" in m]
|
||||||
|
assert len(nova_models) > 0
|
||||||
|
|
||||||
|
|
||||||
|
class TestResolveProvider:
|
||||||
|
"""Verify resolve_provider() handles bedrock correctly."""
|
||||||
|
|
||||||
|
def test_explicit_bedrock_resolves(self, monkeypatch):
|
||||||
|
"""When user explicitly requests 'bedrock', it should resolve."""
|
||||||
|
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||||
|
# bedrock is in the registry, so resolve_provider should return it
|
||||||
|
from hermes_cli.auth import resolve_provider
|
||||||
|
result = resolve_provider("bedrock")
|
||||||
|
assert result == "bedrock"
|
||||||
|
|
||||||
|
def test_aws_alias_resolves_to_bedrock(self):
|
||||||
|
from hermes_cli.auth import resolve_provider
|
||||||
|
result = resolve_provider("aws")
|
||||||
|
assert result == "bedrock"
|
||||||
|
|
||||||
|
def test_amazon_bedrock_alias_resolves(self):
|
||||||
|
from hermes_cli.auth import resolve_provider
|
||||||
|
result = resolve_provider("amazon-bedrock")
|
||||||
|
assert result == "bedrock"
|
||||||
|
|
||||||
|
def test_auto_detect_with_aws_credentials(self, monkeypatch):
|
||||||
|
"""When AWS credentials are present and no other provider is configured,
|
||||||
|
auto-detect should find bedrock."""
|
||||||
|
from hermes_cli.auth import resolve_provider
|
||||||
|
|
||||||
|
# Clear all other provider env vars
|
||||||
|
for var in ["OPENAI_API_KEY", "OPENROUTER_API_KEY", "ANTHROPIC_API_KEY",
|
||||||
|
"ANTHROPIC_TOKEN", "GOOGLE_API_KEY", "DEEPSEEK_API_KEY"]:
|
||||||
|
monkeypatch.delenv(var, raising=False)
|
||||||
|
|
||||||
|
# Set AWS credentials
|
||||||
|
monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE")
|
||||||
|
monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
|
||||||
|
|
||||||
|
# Mock the auth store to have no active provider
|
||||||
|
with patch("hermes_cli.auth._load_auth_store", return_value={}):
|
||||||
|
result = resolve_provider("auto")
|
||||||
|
assert result == "bedrock"
|
||||||
|
|
||||||
|
|
||||||
|
class TestRuntimeProvider:
|
||||||
|
"""Verify resolve_runtime_provider() handles bedrock correctly."""
|
||||||
|
|
||||||
|
def test_bedrock_runtime_resolution(self, monkeypatch):
|
||||||
|
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||||
|
|
||||||
|
monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE")
|
||||||
|
monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
|
||||||
|
monkeypatch.setenv("AWS_REGION", "eu-west-1")
|
||||||
|
|
||||||
|
# Mock resolve_provider to return bedrock
|
||||||
|
with patch("hermes_cli.runtime_provider.resolve_provider", return_value="bedrock"), \
|
||||||
|
patch("hermes_cli.runtime_provider._get_model_config", return_value={"provider": "bedrock"}):
|
||||||
|
result = resolve_runtime_provider(requested="bedrock")
|
||||||
|
|
||||||
|
assert result["provider"] == "bedrock"
|
||||||
|
assert result["api_mode"] == "bedrock_converse"
|
||||||
|
assert result["region"] == "eu-west-1"
|
||||||
|
assert "bedrock-runtime.eu-west-1.amazonaws.com" in result["base_url"]
|
||||||
|
assert result["api_key"] == "aws-sdk"
|
||||||
|
|
||||||
|
def test_bedrock_runtime_default_region(self, monkeypatch):
|
||||||
|
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||||
|
|
||||||
|
monkeypatch.setenv("AWS_PROFILE", "default")
|
||||||
|
monkeypatch.delenv("AWS_REGION", raising=False)
|
||||||
|
monkeypatch.delenv("AWS_DEFAULT_REGION", raising=False)
|
||||||
|
|
||||||
|
with patch("hermes_cli.runtime_provider.resolve_provider", return_value="bedrock"), \
|
||||||
|
patch("hermes_cli.runtime_provider._get_model_config", return_value={"provider": "bedrock"}):
|
||||||
|
result = resolve_runtime_provider(requested="bedrock")
|
||||||
|
|
||||||
|
assert result["region"] == "us-east-1"
|
||||||
|
|
||||||
|
def test_bedrock_runtime_no_credentials_raises_on_auto_detect(self, monkeypatch):
|
||||||
|
"""When bedrock is auto-detected (not explicitly requested) and no
|
||||||
|
credentials are found, runtime resolution should raise AuthError."""
|
||||||
|
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||||
|
from hermes_cli.auth import AuthError
|
||||||
|
|
||||||
|
# Clear all AWS env vars
|
||||||
|
for var in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_PROFILE",
|
||||||
|
"AWS_BEARER_TOKEN_BEDROCK", "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI",
|
||||||
|
"AWS_WEB_IDENTITY_TOKEN_FILE"]:
|
||||||
|
monkeypatch.delenv(var, raising=False)
|
||||||
|
|
||||||
|
# Mock both the provider resolution and boto3's credential chain
|
||||||
|
mock_session = MagicMock()
|
||||||
|
mock_session.get_credentials.return_value = None
|
||||||
|
with patch("hermes_cli.runtime_provider.resolve_provider", return_value="bedrock"), \
|
||||||
|
patch("hermes_cli.runtime_provider._get_model_config", return_value={"provider": "bedrock"}), \
|
||||||
|
patch("hermes_cli.runtime_provider.resolve_requested_provider", return_value="auto"), \
|
||||||
|
patch.dict("sys.modules", {"botocore": MagicMock(), "botocore.session": MagicMock()}):
|
||||||
|
import botocore.session as _bs
|
||||||
|
_bs.get_session = MagicMock(return_value=mock_session)
|
||||||
|
with pytest.raises(AuthError, match="No AWS credentials"):
|
||||||
|
resolve_runtime_provider(requested="auto")
|
||||||
|
|
||||||
|
def test_bedrock_runtime_explicit_skips_credential_check(self, monkeypatch):
|
||||||
|
"""When user explicitly requests bedrock, trust boto3's credential chain
|
||||||
|
even if env-var detection finds nothing (covers IMDS, SSO, etc.)."""
|
||||||
|
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||||
|
|
||||||
|
# No AWS env vars set — but explicit bedrock request should not raise
|
||||||
|
for var in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_PROFILE",
|
||||||
|
"AWS_BEARER_TOKEN_BEDROCK"]:
|
||||||
|
monkeypatch.delenv(var, raising=False)
|
||||||
|
|
||||||
|
with patch("hermes_cli.runtime_provider.resolve_provider", return_value="bedrock"), \
|
||||||
|
patch("hermes_cli.runtime_provider._get_model_config", return_value={"provider": "bedrock"}):
|
||||||
|
result = resolve_runtime_provider(requested="bedrock")
|
||||||
|
assert result["provider"] == "bedrock"
|
||||||
|
assert result["api_mode"] == "bedrock_converse"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# providers.py integration
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestProvidersModule:
|
||||||
|
"""Verify bedrock is wired into hermes_cli/providers.py."""
|
||||||
|
|
||||||
|
def test_bedrock_alias_in_providers(self):
|
||||||
|
from hermes_cli.providers import ALIASES
|
||||||
|
assert ALIASES.get("bedrock") is None # "bedrock" IS the canonical name, not an alias
|
||||||
|
assert ALIASES.get("aws") == "bedrock"
|
||||||
|
assert ALIASES.get("aws-bedrock") == "bedrock"
|
||||||
|
|
||||||
|
def test_bedrock_transport_mapping(self):
|
||||||
|
from hermes_cli.providers import TRANSPORT_TO_API_MODE
|
||||||
|
assert TRANSPORT_TO_API_MODE.get("bedrock_converse") == "bedrock_converse"
|
||||||
|
|
||||||
|
def test_determine_api_mode_from_bedrock_url(self):
|
||||||
|
from hermes_cli.providers import determine_api_mode
|
||||||
|
assert determine_api_mode(
|
||||||
|
"unknown", "https://bedrock-runtime.us-east-1.amazonaws.com"
|
||||||
|
) == "bedrock_converse"
|
||||||
|
|
||||||
|
def test_label_override(self):
|
||||||
|
from hermes_cli.providers import _LABEL_OVERRIDES
|
||||||
|
assert _LABEL_OVERRIDES.get("bedrock") == "AWS Bedrock"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Error classifier integration
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestErrorClassifierBedrock:
|
||||||
|
"""Verify Bedrock error patterns are in the global error classifier."""
|
||||||
|
|
||||||
|
def test_throttling_in_rate_limit_patterns(self):
|
||||||
|
from agent.error_classifier import _RATE_LIMIT_PATTERNS
|
||||||
|
assert "throttlingexception" in _RATE_LIMIT_PATTERNS
|
||||||
|
|
||||||
|
def test_context_overflow_patterns(self):
|
||||||
|
from agent.error_classifier import _CONTEXT_OVERFLOW_PATTERNS
|
||||||
|
assert "input is too long" in _CONTEXT_OVERFLOW_PATTERNS
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# pyproject.toml bedrock extra
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestPackaging:
|
||||||
|
"""Verify bedrock optional dependency is declared."""
|
||||||
|
|
||||||
|
def test_bedrock_extra_exists(self):
|
||||||
|
import configparser
|
||||||
|
from pathlib import Path
|
||||||
|
# Read pyproject.toml to verify [bedrock] extra
|
||||||
|
toml_path = Path(__file__).parent.parent.parent / "pyproject.toml"
|
||||||
|
content = toml_path.read_text()
|
||||||
|
assert 'bedrock = ["boto3' in content
|
||||||
|
|
||||||
|
def test_bedrock_in_all_extra(self):
|
||||||
|
from pathlib import Path
|
||||||
|
content = (Path(__file__).parent.parent.parent / "pyproject.toml").read_text()
|
||||||
|
assert '"hermes-agent[bedrock]"' in content
|
||||||
253
tests/agent/test_nous_rate_guard.py
Normal file
253
tests/agent/test_nous_rate_guard.py
Normal file
|
|
@ -0,0 +1,253 @@
|
||||||
|
"""Tests for agent/nous_rate_guard.py — cross-session Nous Portal rate limit guard."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def rate_guard_env(tmp_path, monkeypatch):
|
||||||
|
"""Isolate rate guard state to a temp directory."""
|
||||||
|
hermes_home = str(tmp_path / ".hermes")
|
||||||
|
os.makedirs(hermes_home, exist_ok=True)
|
||||||
|
monkeypatch.setenv("HERMES_HOME", hermes_home)
|
||||||
|
# Clear any cached module-level imports
|
||||||
|
return hermes_home
|
||||||
|
|
||||||
|
|
||||||
|
class TestRecordNousRateLimit:
|
||||||
|
"""Test recording rate limit state."""
|
||||||
|
|
||||||
|
def test_records_with_header_reset(self, rate_guard_env):
|
||||||
|
from agent.nous_rate_guard import record_nous_rate_limit, _state_path
|
||||||
|
|
||||||
|
headers = {"x-ratelimit-reset-requests-1h": "1800"}
|
||||||
|
record_nous_rate_limit(headers=headers)
|
||||||
|
|
||||||
|
path = _state_path()
|
||||||
|
assert os.path.exists(path)
|
||||||
|
with open(path) as f:
|
||||||
|
state = json.load(f)
|
||||||
|
assert state["reset_seconds"] == pytest.approx(1800, abs=2)
|
||||||
|
assert state["reset_at"] > time.time()
|
||||||
|
|
||||||
|
def test_records_with_per_minute_header(self, rate_guard_env):
|
||||||
|
from agent.nous_rate_guard import record_nous_rate_limit, _state_path
|
||||||
|
|
||||||
|
headers = {"x-ratelimit-reset-requests": "45"}
|
||||||
|
record_nous_rate_limit(headers=headers)
|
||||||
|
|
||||||
|
with open(_state_path()) as f:
|
||||||
|
state = json.load(f)
|
||||||
|
assert state["reset_seconds"] == pytest.approx(45, abs=2)
|
||||||
|
|
||||||
|
def test_records_with_retry_after_header(self, rate_guard_env):
|
||||||
|
from agent.nous_rate_guard import record_nous_rate_limit, _state_path
|
||||||
|
|
||||||
|
headers = {"retry-after": "60"}
|
||||||
|
record_nous_rate_limit(headers=headers)
|
||||||
|
|
||||||
|
with open(_state_path()) as f:
|
||||||
|
state = json.load(f)
|
||||||
|
assert state["reset_seconds"] == pytest.approx(60, abs=2)
|
||||||
|
|
||||||
|
def test_prefers_hourly_over_per_minute(self, rate_guard_env):
|
||||||
|
from agent.nous_rate_guard import record_nous_rate_limit, _state_path
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"x-ratelimit-reset-requests-1h": "1800",
|
||||||
|
"x-ratelimit-reset-requests": "45",
|
||||||
|
}
|
||||||
|
record_nous_rate_limit(headers=headers)
|
||||||
|
|
||||||
|
with open(_state_path()) as f:
|
||||||
|
state = json.load(f)
|
||||||
|
# Should use the hourly value, not the per-minute one
|
||||||
|
assert state["reset_seconds"] == pytest.approx(1800, abs=2)
|
||||||
|
|
||||||
|
def test_falls_back_to_error_context_reset_at(self, rate_guard_env):
|
||||||
|
from agent.nous_rate_guard import record_nous_rate_limit, _state_path
|
||||||
|
|
||||||
|
future_reset = time.time() + 900
|
||||||
|
record_nous_rate_limit(
|
||||||
|
headers=None,
|
||||||
|
error_context={"reset_at": future_reset},
|
||||||
|
)
|
||||||
|
|
||||||
|
with open(_state_path()) as f:
|
||||||
|
state = json.load(f)
|
||||||
|
assert state["reset_at"] == pytest.approx(future_reset, abs=1)
|
||||||
|
|
||||||
|
def test_falls_back_to_default_cooldown(self, rate_guard_env):
|
||||||
|
from agent.nous_rate_guard import record_nous_rate_limit, _state_path
|
||||||
|
|
||||||
|
record_nous_rate_limit(headers=None)
|
||||||
|
|
||||||
|
with open(_state_path()) as f:
|
||||||
|
state = json.load(f)
|
||||||
|
# Default is 300 seconds (5 minutes)
|
||||||
|
assert state["reset_seconds"] == pytest.approx(300, abs=2)
|
||||||
|
|
||||||
|
def test_custom_default_cooldown(self, rate_guard_env):
|
||||||
|
from agent.nous_rate_guard import record_nous_rate_limit, _state_path
|
||||||
|
|
||||||
|
record_nous_rate_limit(headers=None, default_cooldown=120.0)
|
||||||
|
|
||||||
|
with open(_state_path()) as f:
|
||||||
|
state = json.load(f)
|
||||||
|
assert state["reset_seconds"] == pytest.approx(120, abs=2)
|
||||||
|
|
||||||
|
def test_creates_directory_if_missing(self, rate_guard_env):
|
||||||
|
from agent.nous_rate_guard import record_nous_rate_limit, _state_path
|
||||||
|
|
||||||
|
record_nous_rate_limit(headers={"retry-after": "10"})
|
||||||
|
assert os.path.exists(_state_path())
|
||||||
|
|
||||||
|
|
||||||
|
class TestNousRateLimitRemaining:
|
||||||
|
"""Test checking remaining rate limit time."""
|
||||||
|
|
||||||
|
def test_returns_none_when_no_file(self, rate_guard_env):
|
||||||
|
from agent.nous_rate_guard import nous_rate_limit_remaining
|
||||||
|
|
||||||
|
assert nous_rate_limit_remaining() is None
|
||||||
|
|
||||||
|
def test_returns_remaining_seconds_when_active(self, rate_guard_env):
|
||||||
|
from agent.nous_rate_guard import record_nous_rate_limit, nous_rate_limit_remaining
|
||||||
|
|
||||||
|
record_nous_rate_limit(headers={"x-ratelimit-reset-requests-1h": "600"})
|
||||||
|
remaining = nous_rate_limit_remaining()
|
||||||
|
assert remaining is not None
|
||||||
|
assert 595 < remaining <= 605 # ~600 seconds, allowing for test execution time
|
||||||
|
|
||||||
|
def test_returns_none_when_expired(self, rate_guard_env):
|
||||||
|
from agent.nous_rate_guard import nous_rate_limit_remaining, _state_path
|
||||||
|
|
||||||
|
# Write an already-expired state
|
||||||
|
state_dir = os.path.dirname(_state_path())
|
||||||
|
os.makedirs(state_dir, exist_ok=True)
|
||||||
|
with open(_state_path(), "w") as f:
|
||||||
|
json.dump({"reset_at": time.time() - 10, "recorded_at": time.time() - 100}, f)
|
||||||
|
|
||||||
|
assert nous_rate_limit_remaining() is None
|
||||||
|
# File should be cleaned up
|
||||||
|
assert not os.path.exists(_state_path())
|
||||||
|
|
||||||
|
def test_handles_corrupt_file(self, rate_guard_env):
|
||||||
|
from agent.nous_rate_guard import nous_rate_limit_remaining, _state_path
|
||||||
|
|
||||||
|
state_dir = os.path.dirname(_state_path())
|
||||||
|
os.makedirs(state_dir, exist_ok=True)
|
||||||
|
with open(_state_path(), "w") as f:
|
||||||
|
f.write("not valid json{{{")
|
||||||
|
|
||||||
|
assert nous_rate_limit_remaining() is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestClearNousRateLimit:
|
||||||
|
"""Test clearing rate limit state."""
|
||||||
|
|
||||||
|
def test_clears_existing_file(self, rate_guard_env):
|
||||||
|
from agent.nous_rate_guard import (
|
||||||
|
record_nous_rate_limit,
|
||||||
|
clear_nous_rate_limit,
|
||||||
|
nous_rate_limit_remaining,
|
||||||
|
_state_path,
|
||||||
|
)
|
||||||
|
|
||||||
|
record_nous_rate_limit(headers={"retry-after": "600"})
|
||||||
|
assert nous_rate_limit_remaining() is not None
|
||||||
|
|
||||||
|
clear_nous_rate_limit()
|
||||||
|
assert nous_rate_limit_remaining() is None
|
||||||
|
assert not os.path.exists(_state_path())
|
||||||
|
|
||||||
|
def test_clear_when_no_file(self, rate_guard_env):
|
||||||
|
from agent.nous_rate_guard import clear_nous_rate_limit
|
||||||
|
|
||||||
|
# Should not raise
|
||||||
|
clear_nous_rate_limit()
|
||||||
|
|
||||||
|
|
||||||
|
class TestFormatRemaining:
|
||||||
|
"""Test human-readable duration formatting."""
|
||||||
|
|
||||||
|
def test_seconds(self):
|
||||||
|
from agent.nous_rate_guard import format_remaining
|
||||||
|
|
||||||
|
assert format_remaining(30) == "30s"
|
||||||
|
|
||||||
|
def test_minutes(self):
|
||||||
|
from agent.nous_rate_guard import format_remaining
|
||||||
|
|
||||||
|
assert format_remaining(125) == "2m 5s"
|
||||||
|
|
||||||
|
def test_exact_minutes(self):
|
||||||
|
from agent.nous_rate_guard import format_remaining
|
||||||
|
|
||||||
|
assert format_remaining(120) == "2m"
|
||||||
|
|
||||||
|
def test_hours(self):
|
||||||
|
from agent.nous_rate_guard import format_remaining
|
||||||
|
|
||||||
|
assert format_remaining(3720) == "1h 2m"
|
||||||
|
|
||||||
|
|
||||||
|
class TestParseResetSeconds:
|
||||||
|
"""Test header parsing for reset times."""
|
||||||
|
|
||||||
|
def test_case_insensitive_headers(self, rate_guard_env):
|
||||||
|
from agent.nous_rate_guard import _parse_reset_seconds
|
||||||
|
|
||||||
|
headers = {"X-Ratelimit-Reset-Requests-1h": "1200"}
|
||||||
|
assert _parse_reset_seconds(headers) == 1200.0
|
||||||
|
|
||||||
|
def test_returns_none_for_empty_headers(self):
|
||||||
|
from agent.nous_rate_guard import _parse_reset_seconds
|
||||||
|
|
||||||
|
assert _parse_reset_seconds(None) is None
|
||||||
|
assert _parse_reset_seconds({}) is None
|
||||||
|
|
||||||
|
def test_ignores_zero_values(self):
|
||||||
|
from agent.nous_rate_guard import _parse_reset_seconds
|
||||||
|
|
||||||
|
headers = {"x-ratelimit-reset-requests-1h": "0"}
|
||||||
|
assert _parse_reset_seconds(headers) is None
|
||||||
|
|
||||||
|
def test_ignores_invalid_values(self):
|
||||||
|
from agent.nous_rate_guard import _parse_reset_seconds
|
||||||
|
|
||||||
|
headers = {"x-ratelimit-reset-requests-1h": "not-a-number"}
|
||||||
|
assert _parse_reset_seconds(headers) is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestAuxiliaryClientIntegration:
|
||||||
|
"""Test that the auxiliary client respects the rate guard."""
|
||||||
|
|
||||||
|
def test_try_nous_skips_when_rate_limited(self, rate_guard_env, monkeypatch):
|
||||||
|
from agent.nous_rate_guard import record_nous_rate_limit
|
||||||
|
|
||||||
|
# Record a rate limit
|
||||||
|
record_nous_rate_limit(headers={"retry-after": "600"})
|
||||||
|
|
||||||
|
# Mock _read_nous_auth to return valid creds (would normally succeed)
|
||||||
|
import agent.auxiliary_client as aux
|
||||||
|
monkeypatch.setattr(aux, "_read_nous_auth", lambda: {
|
||||||
|
"access_token": "test-token",
|
||||||
|
"inference_base_url": "https://api.nous.test/v1",
|
||||||
|
})
|
||||||
|
|
||||||
|
result = aux._try_nous()
|
||||||
|
assert result == (None, None)
|
||||||
|
|
||||||
|
def test_try_nous_works_when_not_rate_limited(self, rate_guard_env, monkeypatch):
|
||||||
|
import agent.auxiliary_client as aux
|
||||||
|
|
||||||
|
# No rate limit recorded — _try_nous should proceed normally
|
||||||
|
# (will return None because no real creds, but won't be blocked
|
||||||
|
# by the rate guard)
|
||||||
|
monkeypatch.setattr(aux, "_read_nous_auth", lambda: None)
|
||||||
|
result = aux._try_nous()
|
||||||
|
assert result == (None, None)
|
||||||
60
tests/agent/test_proxy_and_url_validation.py
Normal file
60
tests/agent/test_proxy_and_url_validation.py
Normal file
|
|
@ -0,0 +1,60 @@
|
||||||
|
"""Tests for malformed proxy env var and base URL validation.
|
||||||
|
|
||||||
|
Salvaged from PR #6403 by MestreY0d4-Uninter — validates that the agent
|
||||||
|
surfaces clear errors instead of cryptic httpx ``Invalid port`` exceptions
|
||||||
|
when proxy env vars or custom endpoint URLs are malformed.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from agent.auxiliary_client import _validate_base_url, _validate_proxy_env_urls
|
||||||
|
|
||||||
|
|
||||||
|
# -- proxy env validation ------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_proxy_env_accepts_normal_values(monkeypatch):
|
||||||
|
monkeypatch.setenv("HTTP_PROXY", "http://127.0.0.1:6153")
|
||||||
|
monkeypatch.setenv("HTTPS_PROXY", "https://proxy.example.com:8443")
|
||||||
|
monkeypatch.setenv("ALL_PROXY", "socks5://127.0.0.1:1080")
|
||||||
|
_validate_proxy_env_urls() # should not raise
|
||||||
|
|
||||||
|
|
||||||
|
def test_proxy_env_accepts_empty(monkeypatch):
|
||||||
|
monkeypatch.delenv("HTTP_PROXY", raising=False)
|
||||||
|
monkeypatch.delenv("HTTPS_PROXY", raising=False)
|
||||||
|
monkeypatch.delenv("ALL_PROXY", raising=False)
|
||||||
|
monkeypatch.delenv("http_proxy", raising=False)
|
||||||
|
monkeypatch.delenv("https_proxy", raising=False)
|
||||||
|
monkeypatch.delenv("all_proxy", raising=False)
|
||||||
|
_validate_proxy_env_urls() # should not raise
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("key", [
|
||||||
|
"HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY",
|
||||||
|
"http_proxy", "https_proxy", "all_proxy",
|
||||||
|
])
|
||||||
|
def test_proxy_env_rejects_malformed_port(monkeypatch, key):
|
||||||
|
monkeypatch.setenv(key, "http://127.0.0.1:6153export")
|
||||||
|
with pytest.raises(RuntimeError, match=rf"Malformed proxy environment variable {key}=.*6153export"):
|
||||||
|
_validate_proxy_env_urls()
|
||||||
|
|
||||||
|
|
||||||
|
# -- base URL validation -------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("url", [
|
||||||
|
"https://api.example.com/v1",
|
||||||
|
"http://127.0.0.1:6153/v1",
|
||||||
|
"acp://copilot",
|
||||||
|
"",
|
||||||
|
None,
|
||||||
|
])
|
||||||
|
def test_base_url_accepts_valid(url):
|
||||||
|
_validate_base_url(url) # should not raise
|
||||||
|
|
||||||
|
|
||||||
|
def test_base_url_rejects_malformed_port():
|
||||||
|
with pytest.raises(RuntimeError, match="Malformed custom endpoint URL"):
|
||||||
|
_validate_base_url("http://127.0.0.1:6153export")
|
||||||
|
|
@ -284,3 +284,95 @@ class TestElevenLabsTavilyExaKeys:
|
||||||
assert "XYZ789abcdef" not in result
|
assert "XYZ789abcdef" not in result
|
||||||
assert "HOME=/home/user" in result
|
assert "HOME=/home/user" in result
|
||||||
assert "SHELL=/bin/bash" in result
|
assert "SHELL=/bin/bash" in result
|
||||||
|
|
||||||
|
|
||||||
|
class TestJWTTokens:
|
||||||
|
"""JWT tokens start with eyJ (base64 for '{') and have dot-separated parts."""
|
||||||
|
|
||||||
|
def test_full_3part_jwt(self):
|
||||||
|
text = (
|
||||||
|
"Token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9"
|
||||||
|
".eyJpc3MiOiI0MjNiZDJkYjg4MjI0MDAwIn0"
|
||||||
|
".Gxgv0rru-_kS-I_60EJ7CENTnBh9UeuL3QhkMoQ-VnM"
|
||||||
|
)
|
||||||
|
result = redact_sensitive_text(text)
|
||||||
|
assert "Token:" in result
|
||||||
|
# Payload and signature must not survive
|
||||||
|
assert "eyJpc3Mi" not in result
|
||||||
|
assert "Gxgv0rru" not in result
|
||||||
|
|
||||||
|
def test_2part_jwt(self):
|
||||||
|
text = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0"
|
||||||
|
result = redact_sensitive_text(text)
|
||||||
|
assert "eyJzdWIi" not in result
|
||||||
|
|
||||||
|
def test_standalone_jwt_header(self):
|
||||||
|
text = "leaked header: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9 here"
|
||||||
|
result = redact_sensitive_text(text)
|
||||||
|
assert "IkpXVCJ9" not in result
|
||||||
|
assert "leaked header:" in result
|
||||||
|
|
||||||
|
def test_jwt_with_base64_padding(self):
|
||||||
|
text = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0=.abc123def456ghij"
|
||||||
|
result = redact_sensitive_text(text)
|
||||||
|
assert "abc123def456" not in result
|
||||||
|
|
||||||
|
def test_short_eyj_not_matched(self):
|
||||||
|
"""eyJ followed by fewer than 10 base64 chars should not match."""
|
||||||
|
text = "eyJust a normal word"
|
||||||
|
assert redact_sensitive_text(text) == text
|
||||||
|
|
||||||
|
def test_jwt_preserves_surrounding_text(self):
|
||||||
|
text = "before eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0 after"
|
||||||
|
result = redact_sensitive_text(text)
|
||||||
|
assert result.startswith("before ")
|
||||||
|
assert result.endswith(" after")
|
||||||
|
|
||||||
|
def test_home_assistant_jwt_in_memory(self):
|
||||||
|
"""Real-world pattern: HA token stored in agent memory block."""
|
||||||
|
text = (
|
||||||
|
"Home Assistant API Token: "
|
||||||
|
"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9"
|
||||||
|
".eyJpc3MiOiJhYmNkZWYiLCJleHAiOjE3NzQ5NTcxMDN9"
|
||||||
|
".Gxgv0rru-_kS-I_60EJ7CENTnBh9UeuL3QhkMoQ-VnM"
|
||||||
|
)
|
||||||
|
result = redact_sensitive_text(text)
|
||||||
|
assert "Home Assistant API Token:" in result
|
||||||
|
assert "Gxgv0rru" not in result
|
||||||
|
assert "..." in result
|
||||||
|
|
||||||
|
|
||||||
|
class TestDiscordMentions:
|
||||||
|
"""Discord snowflake IDs in <@ID> or <@!ID> format."""
|
||||||
|
|
||||||
|
def test_normal_mention(self):
|
||||||
|
result = redact_sensitive_text("Hello <@222589316709220353>")
|
||||||
|
assert "222589316709220353" not in result
|
||||||
|
assert "<@***>" in result
|
||||||
|
|
||||||
|
def test_nickname_mention(self):
|
||||||
|
result = redact_sensitive_text("Ping <@!1331549159177846844>")
|
||||||
|
assert "1331549159177846844" not in result
|
||||||
|
assert "<@!***>" in result
|
||||||
|
|
||||||
|
def test_multiple_mentions(self):
|
||||||
|
text = "<@111111111111111111> and <@222222222222222222>"
|
||||||
|
result = redact_sensitive_text(text)
|
||||||
|
assert "111111111111111111" not in result
|
||||||
|
assert "222222222222222222" not in result
|
||||||
|
|
||||||
|
def test_short_id_not_matched(self):
|
||||||
|
"""IDs shorter than 17 digits are not Discord snowflakes."""
|
||||||
|
text = "<@12345>"
|
||||||
|
assert redact_sensitive_text(text) == text
|
||||||
|
|
||||||
|
def test_slack_mention_not_matched(self):
|
||||||
|
"""Slack mentions use letters, not pure digits."""
|
||||||
|
text = "<@U024BE7LH>"
|
||||||
|
assert redact_sensitive_text(text) == text
|
||||||
|
|
||||||
|
def test_preserves_surrounding_text(self):
|
||||||
|
text = "User <@222589316709220353> said hello"
|
||||||
|
result = redact_sensitive_text(text)
|
||||||
|
assert result.startswith("User ")
|
||||||
|
assert result.endswith(" said hello")
|
||||||
|
|
|
||||||
|
|
@ -193,6 +193,67 @@ class TestLoadGatewayConfig:
|
||||||
|
|
||||||
assert config.thread_sessions_per_user is False
|
assert config.thread_sessions_per_user is False
|
||||||
|
|
||||||
|
def test_bridges_discord_channel_prompts_from_config_yaml(self, tmp_path, monkeypatch):
|
||||||
|
hermes_home = tmp_path / ".hermes"
|
||||||
|
hermes_home.mkdir()
|
||||||
|
config_path = hermes_home / "config.yaml"
|
||||||
|
config_path.write_text(
|
||||||
|
"discord:\n"
|
||||||
|
" channel_prompts:\n"
|
||||||
|
" \"123\": Research mode\n"
|
||||||
|
" 456: Therapist mode\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||||
|
|
||||||
|
config = load_gateway_config()
|
||||||
|
|
||||||
|
assert config.platforms[Platform.DISCORD].extra["channel_prompts"] == {
|
||||||
|
"123": "Research mode",
|
||||||
|
"456": "Therapist mode",
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_bridges_telegram_channel_prompts_from_config_yaml(self, tmp_path, monkeypatch):
|
||||||
|
hermes_home = tmp_path / ".hermes"
|
||||||
|
hermes_home.mkdir()
|
||||||
|
config_path = hermes_home / "config.yaml"
|
||||||
|
config_path.write_text(
|
||||||
|
"telegram:\n"
|
||||||
|
" channel_prompts:\n"
|
||||||
|
' "-1001234567": Research assistant\n'
|
||||||
|
" 789: Creative writing\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||||
|
|
||||||
|
config = load_gateway_config()
|
||||||
|
|
||||||
|
assert config.platforms[Platform.TELEGRAM].extra["channel_prompts"] == {
|
||||||
|
"-1001234567": "Research assistant",
|
||||||
|
"789": "Creative writing",
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_bridges_slack_channel_prompts_from_config_yaml(self, tmp_path, monkeypatch):
|
||||||
|
hermes_home = tmp_path / ".hermes"
|
||||||
|
hermes_home.mkdir()
|
||||||
|
config_path = hermes_home / "config.yaml"
|
||||||
|
config_path.write_text(
|
||||||
|
"slack:\n"
|
||||||
|
" channel_prompts:\n"
|
||||||
|
' "C01ABC": Code review mode\n',
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||||
|
|
||||||
|
config = load_gateway_config()
|
||||||
|
|
||||||
|
assert config.platforms[Platform.SLACK].extra["channel_prompts"] == {
|
||||||
|
"C01ABC": "Code review mode",
|
||||||
|
}
|
||||||
|
|
||||||
def test_invalid_quick_commands_in_config_yaml_are_ignored(self, tmp_path, monkeypatch):
|
def test_invalid_quick_commands_in_config_yaml_are_ignored(self, tmp_path, monkeypatch):
|
||||||
hermes_home = tmp_path / ".hermes"
|
hermes_home = tmp_path / ".hermes"
|
||||||
hermes_home.mkdir()
|
hermes_home.mkdir()
|
||||||
|
|
|
||||||
259
tests/gateway/test_discord_channel_prompts.py
Normal file
259
tests/gateway/test_discord_channel_prompts.py
Normal file
|
|
@ -0,0 +1,259 @@
|
||||||
|
"""Tests for Discord channel_prompts resolution and injection."""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import threading
|
||||||
|
import types
|
||||||
|
from types import SimpleNamespace
|
||||||
|
from unittest.mock import AsyncMock, MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_discord_mock():
|
||||||
|
if "discord" in sys.modules and hasattr(sys.modules["discord"], "__file__"):
|
||||||
|
return
|
||||||
|
discord_mod = types.ModuleType("discord")
|
||||||
|
discord_mod.Intents = MagicMock()
|
||||||
|
discord_mod.Intents.default.return_value = MagicMock()
|
||||||
|
discord_mod.DMChannel = type("DMChannel", (), {})
|
||||||
|
discord_mod.Thread = type("Thread", (), {})
|
||||||
|
discord_mod.ForumChannel = type("ForumChannel", (), {})
|
||||||
|
discord_mod.Interaction = object
|
||||||
|
ext_mod = MagicMock()
|
||||||
|
commands_mod = MagicMock()
|
||||||
|
commands_mod.Bot = MagicMock
|
||||||
|
ext_mod.commands = commands_mod
|
||||||
|
sys.modules.setdefault("discord", discord_mod)
|
||||||
|
sys.modules.setdefault("discord.ext", ext_mod)
|
||||||
|
sys.modules.setdefault("discord.ext.commands", commands_mod)
|
||||||
|
|
||||||
|
|
||||||
|
import gateway.run as gateway_run
|
||||||
|
from gateway.config import Platform
|
||||||
|
from gateway.platforms.base import MessageEvent
|
||||||
|
from gateway.session import SessionSource
|
||||||
|
|
||||||
|
|
||||||
|
class _CapturingAgent:
|
||||||
|
last_init = None
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
type(self).last_init = dict(kwargs)
|
||||||
|
self.tools = []
|
||||||
|
|
||||||
|
def run_conversation(self, user_message, conversation_history=None, task_id=None, persist_user_message=None):
|
||||||
|
return {
|
||||||
|
"final_response": "ok",
|
||||||
|
"messages": [],
|
||||||
|
"api_calls": 1,
|
||||||
|
"completed": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _install_fake_agent(monkeypatch):
|
||||||
|
fake_run_agent = types.ModuleType("run_agent")
|
||||||
|
fake_run_agent.AIAgent = _CapturingAgent
|
||||||
|
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_adapter():
|
||||||
|
_ensure_discord_mock()
|
||||||
|
from gateway.platforms.discord import DiscordAdapter
|
||||||
|
|
||||||
|
adapter = object.__new__(DiscordAdapter)
|
||||||
|
adapter.config = MagicMock()
|
||||||
|
adapter.config.extra = {}
|
||||||
|
return adapter
|
||||||
|
|
||||||
|
|
||||||
|
def _make_runner():
|
||||||
|
runner = object.__new__(gateway_run.GatewayRunner)
|
||||||
|
runner.adapters = {}
|
||||||
|
runner._ephemeral_system_prompt = "Global prompt"
|
||||||
|
runner._prefill_messages = []
|
||||||
|
runner._reasoning_config = None
|
||||||
|
runner._service_tier = None
|
||||||
|
runner._provider_routing = {}
|
||||||
|
runner._fallback_model = None
|
||||||
|
runner._smart_model_routing = {}
|
||||||
|
runner._running_agents = {}
|
||||||
|
runner._pending_model_notes = {}
|
||||||
|
runner._session_db = None
|
||||||
|
runner._agent_cache = {}
|
||||||
|
runner._agent_cache_lock = threading.Lock()
|
||||||
|
runner._session_model_overrides = {}
|
||||||
|
runner.hooks = SimpleNamespace(loaded_hooks=False)
|
||||||
|
runner.config = SimpleNamespace(streaming=None)
|
||||||
|
runner.session_store = SimpleNamespace(
|
||||||
|
get_or_create_session=lambda source: SimpleNamespace(session_id="session-1"),
|
||||||
|
load_transcript=lambda session_id: [],
|
||||||
|
)
|
||||||
|
runner._get_or_create_gateway_honcho = lambda session_key: (None, None)
|
||||||
|
runner._enrich_message_with_vision = AsyncMock(return_value="ENRICHED")
|
||||||
|
return runner
|
||||||
|
|
||||||
|
|
||||||
|
def _make_source() -> SessionSource:
|
||||||
|
return SessionSource(
|
||||||
|
platform=Platform.DISCORD,
|
||||||
|
chat_id="12345",
|
||||||
|
chat_type="thread",
|
||||||
|
user_id="user-1",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestResolveChannelPrompts:
|
||||||
|
def test_no_prompt_returns_none(self):
|
||||||
|
adapter = _make_adapter()
|
||||||
|
assert adapter._resolve_channel_prompt("123") is None
|
||||||
|
|
||||||
|
def test_match_by_channel_id(self):
|
||||||
|
adapter = _make_adapter()
|
||||||
|
adapter.config.extra = {"channel_prompts": {"100": "Research mode"}}
|
||||||
|
assert adapter._resolve_channel_prompt("100") == "Research mode"
|
||||||
|
|
||||||
|
def test_numeric_yaml_keys_normalized_at_config_load(self):
|
||||||
|
"""Numeric YAML keys are normalized to strings by config bridging.
|
||||||
|
|
||||||
|
The resolver itself expects string keys (config.py handles normalization),
|
||||||
|
so raw numeric keys will not match — this is intentional.
|
||||||
|
"""
|
||||||
|
adapter = _make_adapter()
|
||||||
|
# Simulates post-bridging state: keys are already strings
|
||||||
|
adapter.config.extra = {"channel_prompts": {"100": "Research mode"}}
|
||||||
|
assert adapter._resolve_channel_prompt("100") == "Research mode"
|
||||||
|
# Pre-bridging numeric key would not match (bridging is responsible)
|
||||||
|
adapter.config.extra = {"channel_prompts": {100: "Research mode"}}
|
||||||
|
assert adapter._resolve_channel_prompt("100") is None
|
||||||
|
|
||||||
|
def test_match_by_parent_id(self):
|
||||||
|
adapter = _make_adapter()
|
||||||
|
adapter.config.extra = {"channel_prompts": {"200": "Forum prompt"}}
|
||||||
|
assert adapter._resolve_channel_prompt("999", parent_id="200") == "Forum prompt"
|
||||||
|
|
||||||
|
def test_exact_channel_overrides_parent(self):
|
||||||
|
adapter = _make_adapter()
|
||||||
|
adapter.config.extra = {
|
||||||
|
"channel_prompts": {
|
||||||
|
"999": "Thread override",
|
||||||
|
"200": "Forum prompt",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert adapter._resolve_channel_prompt("999", parent_id="200") == "Thread override"
|
||||||
|
|
||||||
|
def test_build_message_event_sets_channel_prompt(self):
|
||||||
|
adapter = _make_adapter()
|
||||||
|
adapter.config.extra = {"channel_prompts": {"321": "Command prompt"}}
|
||||||
|
adapter.build_source = MagicMock(return_value=SimpleNamespace())
|
||||||
|
|
||||||
|
interaction = SimpleNamespace(
|
||||||
|
channel_id=321,
|
||||||
|
channel=SimpleNamespace(name="general", guild=None, parent_id=None),
|
||||||
|
user=SimpleNamespace(id=1, display_name="Brenner"),
|
||||||
|
)
|
||||||
|
adapter._get_effective_topic = MagicMock(return_value=None)
|
||||||
|
|
||||||
|
event = adapter._build_slash_event(interaction, "/retry")
|
||||||
|
|
||||||
|
assert event.channel_prompt == "Command prompt"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_dispatch_thread_session_inherits_parent_channel_prompt(self):
|
||||||
|
adapter = _make_adapter()
|
||||||
|
adapter.config.extra = {"channel_prompts": {"200": "Parent prompt"}}
|
||||||
|
adapter.build_source = MagicMock(return_value=SimpleNamespace())
|
||||||
|
adapter._get_effective_topic = MagicMock(return_value=None)
|
||||||
|
adapter.handle_message = AsyncMock()
|
||||||
|
|
||||||
|
interaction = SimpleNamespace(
|
||||||
|
guild=SimpleNamespace(name="Wetlands"),
|
||||||
|
channel=SimpleNamespace(id=200, parent=None),
|
||||||
|
user=SimpleNamespace(id=1, display_name="Brenner"),
|
||||||
|
)
|
||||||
|
|
||||||
|
await adapter._dispatch_thread_session(interaction, "999", "new-thread", "hello")
|
||||||
|
|
||||||
|
dispatched_event = adapter.handle_message.await_args.args[0]
|
||||||
|
assert dispatched_event.channel_prompt == "Parent prompt"
|
||||||
|
|
||||||
|
def test_blank_prompts_are_ignored(self):
|
||||||
|
adapter = _make_adapter()
|
||||||
|
adapter.config.extra = {"channel_prompts": {"100": " "}}
|
||||||
|
assert adapter._resolve_channel_prompt("100") is None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_retry_preserves_channel_prompt(monkeypatch):
|
||||||
|
runner = _make_runner()
|
||||||
|
runner.session_store = SimpleNamespace(
|
||||||
|
get_or_create_session=lambda source: SimpleNamespace(session_id="session-1", last_prompt_tokens=10),
|
||||||
|
load_transcript=lambda session_id: [
|
||||||
|
{"role": "user", "content": "original message"},
|
||||||
|
{"role": "assistant", "content": "old reply"},
|
||||||
|
],
|
||||||
|
rewrite_transcript=MagicMock(),
|
||||||
|
)
|
||||||
|
runner._handle_message = AsyncMock(return_value="ok")
|
||||||
|
|
||||||
|
event = MessageEvent(
|
||||||
|
text="/retry",
|
||||||
|
message_type=gateway_run.MessageType.COMMAND,
|
||||||
|
source=_make_source(),
|
||||||
|
raw_message=SimpleNamespace(),
|
||||||
|
channel_prompt="Channel prompt",
|
||||||
|
)
|
||||||
|
|
||||||
|
result = await runner._handle_retry_command(event)
|
||||||
|
|
||||||
|
assert result == "ok"
|
||||||
|
retried_event = runner._handle_message.await_args.args[0]
|
||||||
|
assert retried_event.channel_prompt == "Channel prompt"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_run_agent_appends_channel_prompt_to_ephemeral_system_prompt(monkeypatch, tmp_path):
|
||||||
|
_install_fake_agent(monkeypatch)
|
||||||
|
runner = _make_runner()
|
||||||
|
|
||||||
|
(tmp_path / "config.yaml").write_text("agent:\n system_prompt: Global prompt\n", encoding="utf-8")
|
||||||
|
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||||
|
monkeypatch.setattr(gateway_run, "_env_path", tmp_path / ".env")
|
||||||
|
monkeypatch.setattr(gateway_run, "load_dotenv", lambda *args, **kwargs: None)
|
||||||
|
monkeypatch.setattr(gateway_run, "_load_gateway_config", lambda: {})
|
||||||
|
monkeypatch.setattr(gateway_run, "_resolve_gateway_model", lambda config=None: "gpt-5.4")
|
||||||
|
monkeypatch.setattr(
|
||||||
|
gateway_run,
|
||||||
|
"_resolve_runtime_agent_kwargs",
|
||||||
|
lambda: {
|
||||||
|
"provider": "openrouter",
|
||||||
|
"api_mode": "chat_completions",
|
||||||
|
"base_url": "https://openrouter.ai/api/v1",
|
||||||
|
"api_key": "***",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
import hermes_cli.tools_config as tools_config
|
||||||
|
|
||||||
|
monkeypatch.setattr(tools_config, "_get_platform_tools", lambda user_config, platform_key: {"core"})
|
||||||
|
|
||||||
|
_CapturingAgent.last_init = None
|
||||||
|
event = MessageEvent(
|
||||||
|
text="hi",
|
||||||
|
source=_make_source(),
|
||||||
|
message_id="m1",
|
||||||
|
channel_prompt="Channel prompt",
|
||||||
|
)
|
||||||
|
result = await runner._run_agent(
|
||||||
|
message="hi",
|
||||||
|
context_prompt="Context prompt",
|
||||||
|
history=[],
|
||||||
|
source=_make_source(),
|
||||||
|
session_id="session-1",
|
||||||
|
session_key="agent:main:discord:thread:12345",
|
||||||
|
channel_prompt=event.channel_prompt,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["final_response"] == "ok"
|
||||||
|
assert _CapturingAgent.last_init["ephemeral_system_prompt"] == (
|
||||||
|
"Context prompt\n\nChannel prompt\n\nGlobal prompt"
|
||||||
|
)
|
||||||
|
|
@ -459,7 +459,7 @@ class TestCustomProviderCompatibility:
|
||||||
migrate_config(interactive=False, quiet=True)
|
migrate_config(interactive=False, quiet=True)
|
||||||
raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
||||||
|
|
||||||
assert raw["_config_version"] == 17
|
assert raw["_config_version"] == 18
|
||||||
assert raw["providers"]["openai-direct"] == {
|
assert raw["providers"]["openai-direct"] == {
|
||||||
"api": "https://api.openai.com/v1",
|
"api": "https://api.openai.com/v1",
|
||||||
"api_key": "test-key",
|
"api_key": "test-key",
|
||||||
|
|
@ -606,6 +606,26 @@ class TestInterimAssistantMessageConfig:
|
||||||
migrate_config(interactive=False, quiet=True)
|
migrate_config(interactive=False, quiet=True)
|
||||||
raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
||||||
|
|
||||||
assert raw["_config_version"] == 17
|
assert raw["_config_version"] == 18
|
||||||
assert raw["display"]["tool_progress"] == "off"
|
assert raw["display"]["tool_progress"] == "off"
|
||||||
assert raw["display"]["interim_assistant_messages"] is True
|
assert raw["display"]["interim_assistant_messages"] is True
|
||||||
|
|
||||||
|
|
||||||
|
class TestDiscordChannelPromptsConfig:
|
||||||
|
def test_default_config_includes_discord_channel_prompts(self):
|
||||||
|
assert DEFAULT_CONFIG["discord"]["channel_prompts"] == {}
|
||||||
|
|
||||||
|
def test_migrate_adds_discord_channel_prompts_default(self, tmp_path):
|
||||||
|
config_path = tmp_path / "config.yaml"
|
||||||
|
config_path.write_text(
|
||||||
|
yaml.safe_dump({"_config_version": 17, "discord": {"auto_thread": True}}),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
|
||||||
|
migrate_config(interactive=False, quiet=True)
|
||||||
|
raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
||||||
|
|
||||||
|
assert raw["_config_version"] == 18
|
||||||
|
assert raw["discord"]["auto_thread"] is True
|
||||||
|
assert raw["discord"]["channel_prompts"] == {}
|
||||||
|
|
|
||||||
|
|
@ -64,4 +64,4 @@ class TestCamofoxConfigDefaults:
|
||||||
|
|
||||||
# The current schema version is tracked globally; unrelated default
|
# The current schema version is tracked globally; unrelated default
|
||||||
# options may bump it after browser defaults are added.
|
# options may bump it after browser defaults are added.
|
||||||
assert DEFAULT_CONFIG["_config_version"] == 17
|
assert DEFAULT_CONFIG["_config_version"] == 18
|
||||||
|
|
|
||||||
|
|
@ -82,6 +82,18 @@ SKILLS_DIR = HERMES_HOME / "skills"
|
||||||
|
|
||||||
MAX_NAME_LENGTH = 64
|
MAX_NAME_LENGTH = 64
|
||||||
MAX_DESCRIPTION_LENGTH = 1024
|
MAX_DESCRIPTION_LENGTH = 1024
|
||||||
|
|
||||||
|
|
||||||
|
def _is_local_skill(skill_path: Path) -> bool:
|
||||||
|
"""Check if a skill path is within the local SKILLS_DIR.
|
||||||
|
|
||||||
|
Skills found in external_dirs are read-only from the agent's perspective.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
skill_path.resolve().relative_to(SKILLS_DIR.resolve())
|
||||||
|
return True
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
MAX_SKILL_CONTENT_CHARS = 100_000 # ~36k tokens at 2.75 chars/token
|
MAX_SKILL_CONTENT_CHARS = 100_000 # ~36k tokens at 2.75 chars/token
|
||||||
MAX_SKILL_FILE_BYTES = 1_048_576 # 1 MiB per supporting file
|
MAX_SKILL_FILE_BYTES = 1_048_576 # 1 MiB per supporting file
|
||||||
|
|
||||||
|
|
@ -360,6 +372,9 @@ def _edit_skill(name: str, content: str) -> Dict[str, Any]:
|
||||||
if not existing:
|
if not existing:
|
||||||
return {"success": False, "error": f"Skill '{name}' not found. Use skills_list() to see available skills."}
|
return {"success": False, "error": f"Skill '{name}' not found. Use skills_list() to see available skills."}
|
||||||
|
|
||||||
|
if not _is_local_skill(existing["path"]):
|
||||||
|
return {"success": False, "error": f"Skill '{name}' is in an external directory and cannot be modified. Copy it to your local skills directory first."}
|
||||||
|
|
||||||
skill_md = existing["path"] / "SKILL.md"
|
skill_md = existing["path"] / "SKILL.md"
|
||||||
# Back up original content for rollback
|
# Back up original content for rollback
|
||||||
original_content = skill_md.read_text(encoding="utf-8") if skill_md.exists() else None
|
original_content = skill_md.read_text(encoding="utf-8") if skill_md.exists() else None
|
||||||
|
|
@ -400,6 +415,9 @@ def _patch_skill(
|
||||||
if not existing:
|
if not existing:
|
||||||
return {"success": False, "error": f"Skill '{name}' not found."}
|
return {"success": False, "error": f"Skill '{name}' not found."}
|
||||||
|
|
||||||
|
if not _is_local_skill(existing["path"]):
|
||||||
|
return {"success": False, "error": f"Skill '{name}' is in an external directory and cannot be modified. Copy it to your local skills directory first."}
|
||||||
|
|
||||||
skill_dir = existing["path"]
|
skill_dir = existing["path"]
|
||||||
|
|
||||||
if file_path:
|
if file_path:
|
||||||
|
|
@ -473,6 +491,9 @@ def _delete_skill(name: str) -> Dict[str, Any]:
|
||||||
if not existing:
|
if not existing:
|
||||||
return {"success": False, "error": f"Skill '{name}' not found."}
|
return {"success": False, "error": f"Skill '{name}' not found."}
|
||||||
|
|
||||||
|
if not _is_local_skill(existing["path"]):
|
||||||
|
return {"success": False, "error": f"Skill '{name}' is in an external directory and cannot be deleted."}
|
||||||
|
|
||||||
skill_dir = existing["path"]
|
skill_dir = existing["path"]
|
||||||
shutil.rmtree(skill_dir)
|
shutil.rmtree(skill_dir)
|
||||||
|
|
||||||
|
|
@ -515,6 +536,9 @@ def _write_file(name: str, file_path: str, file_content: str) -> Dict[str, Any]:
|
||||||
if not existing:
|
if not existing:
|
||||||
return {"success": False, "error": f"Skill '{name}' not found. Create it first with action='create'."}
|
return {"success": False, "error": f"Skill '{name}' not found. Create it first with action='create'."}
|
||||||
|
|
||||||
|
if not _is_local_skill(existing["path"]):
|
||||||
|
return {"success": False, "error": f"Skill '{name}' is in an external directory and cannot be modified. Copy it to your local skills directory first."}
|
||||||
|
|
||||||
target, err = _resolve_skill_target(existing["path"], file_path)
|
target, err = _resolve_skill_target(existing["path"], file_path)
|
||||||
if err:
|
if err:
|
||||||
return {"success": False, "error": err}
|
return {"success": False, "error": err}
|
||||||
|
|
@ -548,6 +572,10 @@ def _remove_file(name: str, file_path: str) -> Dict[str, Any]:
|
||||||
existing = _find_skill(name)
|
existing = _find_skill(name)
|
||||||
if not existing:
|
if not existing:
|
||||||
return {"success": False, "error": f"Skill '{name}' not found."}
|
return {"success": False, "error": f"Skill '{name}' not found."}
|
||||||
|
|
||||||
|
if not _is_local_skill(existing["path"]):
|
||||||
|
return {"success": False, "error": f"Skill '{name}' is in an external directory and cannot be modified."}
|
||||||
|
|
||||||
skill_dir = existing["path"]
|
skill_dir = existing["path"]
|
||||||
|
|
||||||
target, err = _resolve_skill_target(skill_dir, file_path)
|
target, err = _resolve_skill_target(skill_dir, file_path)
|
||||||
|
|
|
||||||
164
website/docs/guides/aws-bedrock.md
Normal file
164
website/docs/guides/aws-bedrock.md
Normal file
|
|
@ -0,0 +1,164 @@
|
||||||
|
---
|
||||||
|
sidebar_position: 14
|
||||||
|
title: "AWS Bedrock"
|
||||||
|
description: "Use Hermes Agent with Amazon Bedrock — native Converse API, IAM authentication, Guardrails, and cross-region inference"
|
||||||
|
---
|
||||||
|
|
||||||
|
# AWS Bedrock
|
||||||
|
|
||||||
|
Hermes Agent supports Amazon Bedrock as a native provider using the **Converse API** — not the OpenAI-compatible endpoint. This gives you full access to the Bedrock ecosystem: IAM authentication, Guardrails, cross-region inference profiles, and all foundation models.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
- **AWS credentials** — any source supported by the [boto3 credential chain](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html):
|
||||||
|
- IAM instance role (EC2, ECS, Lambda — zero config)
|
||||||
|
- `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` environment variables
|
||||||
|
- `AWS_PROFILE` for SSO or named profiles
|
||||||
|
- `aws configure` for local development
|
||||||
|
- **boto3** — install with `pip install hermes-agent[bedrock]`
|
||||||
|
- **IAM permissions** — at minimum:
|
||||||
|
- `bedrock:InvokeModel` and `bedrock:InvokeModelWithResponseStream` (for inference)
|
||||||
|
- `bedrock:ListFoundationModels` and `bedrock:ListInferenceProfiles` (for model discovery)
|
||||||
|
|
||||||
|
:::tip EC2 / ECS / Lambda
|
||||||
|
On AWS compute, attach an IAM role with `AmazonBedrockFullAccess` and you're done. No API keys, no `.env` configuration — Hermes detects the instance role automatically.
|
||||||
|
:::
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install with Bedrock support
|
||||||
|
pip install hermes-agent[bedrock]
|
||||||
|
|
||||||
|
# Select Bedrock as your provider
|
||||||
|
hermes model
|
||||||
|
# → Choose "More providers..." → "AWS Bedrock"
|
||||||
|
# → Select your region and model
|
||||||
|
|
||||||
|
# Start chatting
|
||||||
|
hermes chat
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
After running `hermes model`, your `~/.hermes/config.yaml` will contain:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model:
|
||||||
|
default: us.anthropic.claude-sonnet-4-6
|
||||||
|
provider: bedrock
|
||||||
|
base_url: https://bedrock-runtime.us-east-2.amazonaws.com
|
||||||
|
|
||||||
|
bedrock:
|
||||||
|
region: us-east-2
|
||||||
|
```
|
||||||
|
|
||||||
|
### Region
|
||||||
|
|
||||||
|
Set the AWS region in any of these ways (highest priority first):
|
||||||
|
|
||||||
|
1. `bedrock.region` in `config.yaml`
|
||||||
|
2. `AWS_REGION` environment variable
|
||||||
|
3. `AWS_DEFAULT_REGION` environment variable
|
||||||
|
4. Default: `us-east-1`
|
||||||
|
|
||||||
|
### Guardrails
|
||||||
|
|
||||||
|
To apply [Amazon Bedrock Guardrails](https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails.html) to all model invocations:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
bedrock:
|
||||||
|
region: us-east-2
|
||||||
|
guardrail:
|
||||||
|
guardrail_identifier: "abc123def456" # From the Bedrock console
|
||||||
|
guardrail_version: "1" # Version number or "DRAFT"
|
||||||
|
stream_processing_mode: "async" # "sync" or "async"
|
||||||
|
trace: "disabled" # "enabled", "disabled", or "enabled_full"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Model Discovery
|
||||||
|
|
||||||
|
Hermes auto-discovers available models via the Bedrock control plane. You can customize discovery:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
bedrock:
|
||||||
|
discovery:
|
||||||
|
enabled: true
|
||||||
|
provider_filter: ["anthropic", "amazon"] # Only show these providers
|
||||||
|
refresh_interval: 3600 # Cache for 1 hour
|
||||||
|
```
|
||||||
|
|
||||||
|
## Available Models
|
||||||
|
|
||||||
|
Bedrock models use **inference profile IDs** for on-demand invocation. The `hermes model` picker shows these automatically, with recommended models at the top:
|
||||||
|
|
||||||
|
| Model | ID | Notes |
|
||||||
|
|-------|-----|-------|
|
||||||
|
| Claude Sonnet 4.6 | `us.anthropic.claude-sonnet-4-6` | Recommended — best balance of speed and capability |
|
||||||
|
| Claude Opus 4.6 | `us.anthropic.claude-opus-4-6-v1` | Most capable |
|
||||||
|
| Claude Haiku 4.5 | `us.anthropic.claude-haiku-4-5-20251001-v1:0` | Fastest Claude |
|
||||||
|
| Amazon Nova Pro | `us.amazon.nova-pro-v1:0` | Amazon's flagship |
|
||||||
|
| Amazon Nova Micro | `us.amazon.nova-micro-v1:0` | Fastest, cheapest |
|
||||||
|
| DeepSeek V3.2 | `deepseek.v3.2` | Strong open model |
|
||||||
|
| Llama 4 Scout 17B | `us.meta.llama4-scout-17b-instruct-v1:0` | Meta's latest |
|
||||||
|
|
||||||
|
:::info Cross-Region Inference
|
||||||
|
Models prefixed with `us.` use cross-region inference profiles, which provide better capacity and automatic failover across AWS regions. Models prefixed with `global.` route across all available regions worldwide.
|
||||||
|
:::
|
||||||
|
|
||||||
|
## Switching Models Mid-Session
|
||||||
|
|
||||||
|
Use the `/model` command during a conversation:
|
||||||
|
|
||||||
|
```
|
||||||
|
/model us.amazon.nova-pro-v1:0
|
||||||
|
/model deepseek.v3.2
|
||||||
|
/model us.anthropic.claude-opus-4-6-v1
|
||||||
|
```
|
||||||
|
|
||||||
|
## Diagnostics
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hermes doctor
|
||||||
|
```
|
||||||
|
|
||||||
|
The doctor checks:
|
||||||
|
- Whether AWS credentials are available (env vars, IAM role, SSO)
|
||||||
|
- Whether `boto3` is installed
|
||||||
|
- Whether the Bedrock API is reachable (ListFoundationModels)
|
||||||
|
- Number of available models in your region
|
||||||
|
|
||||||
|
## Gateway (Messaging Platforms)
|
||||||
|
|
||||||
|
Bedrock works with all Hermes gateway platforms (Telegram, Discord, Slack, Feishu, etc.). Configure Bedrock as your provider, then start the gateway normally:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hermes gateway setup
|
||||||
|
hermes gateway start
|
||||||
|
```
|
||||||
|
|
||||||
|
The gateway reads `config.yaml` and uses the same Bedrock provider configuration.
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### "No API key found" / "No AWS credentials"
|
||||||
|
|
||||||
|
Hermes checks for credentials in this order:
|
||||||
|
1. `AWS_BEARER_TOKEN_BEDROCK`
|
||||||
|
2. `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY`
|
||||||
|
3. `AWS_PROFILE`
|
||||||
|
4. EC2 instance metadata (IMDS)
|
||||||
|
5. ECS container credentials
|
||||||
|
6. Lambda execution role
|
||||||
|
|
||||||
|
If none are found, run `aws configure` or attach an IAM role to your compute instance.
|
||||||
|
|
||||||
|
### "Invocation of model ID ... with on-demand throughput isn't supported"
|
||||||
|
|
||||||
|
Use an **inference profile ID** (prefixed with `us.` or `global.`) instead of the bare foundation model ID. For example:
|
||||||
|
- ❌ `anthropic.claude-sonnet-4-6`
|
||||||
|
- ✅ `us.anthropic.claude-sonnet-4-6`
|
||||||
|
|
||||||
|
### "ThrottlingException"
|
||||||
|
|
||||||
|
You've hit the Bedrock per-model rate limit. Hermes automatically retries with backoff. To increase limits, request a quota increase in the [AWS Service Quotas console](https://console.aws.amazon.com/servicequotas/).
|
||||||
|
|
@ -297,6 +297,7 @@ discord:
|
||||||
reactions: true # Add emoji reactions during processing
|
reactions: true # Add emoji reactions during processing
|
||||||
ignored_channels: [] # Channel IDs where bot never responds
|
ignored_channels: [] # Channel IDs where bot never responds
|
||||||
no_thread_channels: [] # Channel IDs where bot responds without threading
|
no_thread_channels: [] # Channel IDs where bot responds without threading
|
||||||
|
channel_prompts: {} # Per-channel ephemeral system prompts
|
||||||
|
|
||||||
# Session isolation (applies to all gateway platforms, not just Discord)
|
# Session isolation (applies to all gateway platforms, not just Discord)
|
||||||
group_sessions_per_user: true # Isolate sessions per user in shared channels
|
group_sessions_per_user: true # Isolate sessions per user in shared channels
|
||||||
|
|
@ -381,6 +382,28 @@ discord:
|
||||||
|
|
||||||
Useful for channels dedicated to bot interaction where threads would add unnecessary noise.
|
Useful for channels dedicated to bot interaction where threads would add unnecessary noise.
|
||||||
|
|
||||||
|
#### `discord.channel_prompts`
|
||||||
|
|
||||||
|
**Type:** mapping — **Default:** `{}`
|
||||||
|
|
||||||
|
Per-channel ephemeral system prompts that are injected on every turn in the matching Discord channel or thread without being persisted to transcript history.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
discord:
|
||||||
|
channel_prompts:
|
||||||
|
"1234567890": |
|
||||||
|
This channel is for research tasks. Prefer deep comparisons,
|
||||||
|
citations, and concise synthesis.
|
||||||
|
"9876543210": |
|
||||||
|
This forum is for therapy-style support. Be warm, grounded,
|
||||||
|
and non-judgmental.
|
||||||
|
```
|
||||||
|
|
||||||
|
Behavior:
|
||||||
|
- Exact thread/channel ID matches win.
|
||||||
|
- If a message arrives inside a thread or forum post and that thread has no explicit entry, Hermes falls back to the parent channel/forum ID.
|
||||||
|
- Prompts are applied ephemerally at runtime, so changing them affects future turns immediately without rewriting past session history.
|
||||||
|
|
||||||
#### `group_sessions_per_user`
|
#### `group_sessions_per_user`
|
||||||
|
|
||||||
**Type:** boolean — **Default:** `true`
|
**Type:** boolean — **Default:** `true`
|
||||||
|
|
|
||||||
|
|
@ -281,6 +281,23 @@ If this returns your bot's user info, the token is valid. If it returns an error
|
||||||
|
|
||||||
**Fix**: Add your User ID to `MATTERMOST_ALLOWED_USERS` in `~/.hermes/.env` and restart the gateway. Remember: the User ID is a 26-character alphanumeric string, not your `@username`.
|
**Fix**: Add your User ID to `MATTERMOST_ALLOWED_USERS` in `~/.hermes/.env` and restart the gateway. Remember: the User ID is a 26-character alphanumeric string, not your `@username`.
|
||||||
|
|
||||||
|
## Per-Channel Prompts
|
||||||
|
|
||||||
|
Assign ephemeral system prompts to specific Mattermost channels. The prompt is injected at runtime on every turn — never persisted to transcript history — so changes take effect immediately.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
mattermost:
|
||||||
|
channel_prompts:
|
||||||
|
"channel_id_abc123": |
|
||||||
|
You are a research assistant. Focus on academic sources,
|
||||||
|
citations, and concise synthesis.
|
||||||
|
"channel_id_def456": |
|
||||||
|
Code review mode. Be precise about edge cases and
|
||||||
|
performance implications.
|
||||||
|
```
|
||||||
|
|
||||||
|
Keys are Mattermost channel IDs (find them in the channel URL or via the API). All messages in the matching channel get the prompt injected as an ephemeral system instruction.
|
||||||
|
|
||||||
## Security
|
## Security
|
||||||
|
|
||||||
:::warning
|
:::warning
|
||||||
|
|
|
||||||
|
|
@ -418,6 +418,23 @@ Hermes supports voice on Slack:
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## Per-Channel Prompts
|
||||||
|
|
||||||
|
Assign ephemeral system prompts to specific Slack channels. The prompt is injected at runtime on every turn — never persisted to transcript history — so changes take effect immediately.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
slack:
|
||||||
|
channel_prompts:
|
||||||
|
"C01RESEARCH": |
|
||||||
|
You are a research assistant. Focus on academic sources,
|
||||||
|
citations, and concise synthesis.
|
||||||
|
"C02ENGINEERING": |
|
||||||
|
Code review mode. Be precise about edge cases and
|
||||||
|
performance implications.
|
||||||
|
```
|
||||||
|
|
||||||
|
Keys are Slack channel IDs (find them via channel details → "About" → scroll to bottom). All messages in the matching channel get the prompt injected as an ephemeral system instruction.
|
||||||
|
|
||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
|
|
||||||
| Problem | Solution |
|
| Problem | Solution |
|
||||||
|
|
|
||||||
|
|
@ -526,6 +526,29 @@ Unlike Discord (where reactions are additive), Telegram's Bot API replaces all b
|
||||||
If the bot doesn't have permission to add reactions in a group, the reaction calls fail silently and message processing continues normally.
|
If the bot doesn't have permission to add reactions in a group, the reaction calls fail silently and message processing continues normally.
|
||||||
:::
|
:::
|
||||||
|
|
||||||
|
## Per-Channel Prompts
|
||||||
|
|
||||||
|
Assign ephemeral system prompts to specific Telegram groups or forum topics. The prompt is injected at runtime on every turn — never persisted to transcript history — so changes take effect immediately.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
telegram:
|
||||||
|
channel_prompts:
|
||||||
|
"-1001234567890": |
|
||||||
|
You are a research assistant. Focus on academic sources,
|
||||||
|
citations, and concise synthesis.
|
||||||
|
"42": |
|
||||||
|
This topic is for creative writing feedback. Be warm and
|
||||||
|
constructive.
|
||||||
|
```
|
||||||
|
|
||||||
|
Keys are chat IDs (groups/supergroups) or forum topic IDs. For forum groups, topic-level prompts override the group-level prompt:
|
||||||
|
|
||||||
|
- Message in topic `42` inside group `-1001234567890` → uses topic `42`'s prompt
|
||||||
|
- Message in topic `99` (no explicit entry) → falls back to group `-1001234567890`'s prompt
|
||||||
|
- Message in a group with no entry → no channel prompt applied
|
||||||
|
|
||||||
|
Numeric YAML keys are automatically normalized to strings.
|
||||||
|
|
||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
|
|
||||||
| Problem | Solution |
|
| Problem | Solution |
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue