feat: native AWS Bedrock provider via Converse API

Salvaged from PR #7920 by JiaDe-Wu — cherry-picked Bedrock-specific
additions onto current main, skipping stale-branch reverts (293 commits
behind).

Dual-path architecture:
  - Claude models → AnthropicBedrock SDK (prompt caching, thinking budgets)
  - Non-Claude models → Converse API via boto3 (Nova, DeepSeek, Llama, Mistral)

Includes:
  - Core adapter (agent/bedrock_adapter.py, 1098 lines)
  - Full provider registration (auth, models, providers, config, runtime, main)
  - IAM credential chain + Bedrock API Key auth modes
  - Dynamic model discovery via ListFoundationModels + ListInferenceProfiles
  - Streaming with delta callbacks, error classification, guardrails
  - hermes doctor + hermes auth integration
  - /usage pricing for 7 Bedrock models
  - 130 automated tests (79 unit + 28 integration + follow-up fixes)
  - Documentation (website/docs/guides/aws-bedrock.md)
  - boto3 optional dependency (pip install hermes-agent[bedrock])

Co-authored-by: JiaDe WU <40445668+JiaDe-Wu@users.noreply.github.com>
This commit is contained in:
JiaDe WU 2026-04-15 15:18:01 -07:00 committed by Teknium
parent 21afc9502a
commit 0cb8c51fa5
18 changed files with 3543 additions and 20 deletions

View file

@ -303,6 +303,22 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"XiaomiMiMo/MiMo-V2-Flash",
"moonshotai/Kimi-K2-Thinking",
],
# AWS Bedrock — static fallback list used when dynamic discovery is
# unavailable (no boto3, no credentials, or API error). The agent
# prefers live discovery via ListFoundationModels + ListInferenceProfiles.
# Use inference profile IDs (us.*) since most models require them.
"bedrock": [
"us.anthropic.claude-sonnet-4-6",
"us.anthropic.claude-opus-4-6-v1",
"us.anthropic.claude-haiku-4-5-20251001-v1:0",
"us.anthropic.claude-sonnet-4-5-20250929-v1:0",
"us.amazon.nova-pro-v1:0",
"us.amazon.nova-lite-v1:0",
"us.amazon.nova-micro-v1:0",
"deepseek.v3.2",
"us.meta.llama4-maverick-17b-instruct-v1:0",
"us.meta.llama4-scout-17b-instruct-v1:0",
],
}
# ---------------------------------------------------------------------------
@ -536,6 +552,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"),
ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, pay-per-use)"),
ProviderEntry("bedrock", "AWS Bedrock", "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
]
# Derived dicts — used throughout the codebase
@ -587,6 +604,10 @@ _PROVIDER_ALIASES = {
"huggingface-hub": "huggingface",
"mimo": "xiaomi",
"xiaomi-mimo": "xiaomi",
"aws": "bedrock",
"aws-bedrock": "bedrock",
"amazon-bedrock": "bedrock",
"amazon": "bedrock",
"grok": "xai",
"x-ai": "xai",
"x.ai": "xai",
@ -1957,6 +1978,42 @@ def validate_requested_model(
# api_models is None — couldn't reach API. Accept and persist,
# but warn so typos don't silently break things.
# Bedrock: use our own discovery instead of HTTP /models endpoint.
# Bedrock's bedrock-runtime URL doesn't support /models — it uses the
# AWS SDK control plane (ListFoundationModels + ListInferenceProfiles).
if normalized == "bedrock":
try:
from agent.bedrock_adapter import discover_bedrock_models, resolve_bedrock_region
region = resolve_bedrock_region()
discovered = discover_bedrock_models(region)
discovered_ids = {m["id"] for m in discovered}
if requested in discovered_ids:
return {
"accepted": True,
"persist": True,
"recognized": True,
"message": None,
}
# Not in discovered list — still accept (user may have custom
# inference profiles or cross-account access), but warn.
suggestions = get_close_matches(requested, list(discovered_ids), n=3, cutoff=0.4)
suggestion_text = ""
if suggestions:
suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
return {
"accepted": True,
"persist": True,
"recognized": False,
"message": (
f"Note: `{requested}` was not found in Bedrock model discovery for {region}. "
f"It may still work with custom inference profiles or cross-account access."
f"{suggestion_text}"
),
}
except Exception:
pass # Fall through to generic warning
provider_label = _PROVIDER_LABELS.get(normalized, normalized)
return {
"accepted": True,