diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index b85f77a9d..f3f08039d 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -298,6 +298,33 @@ def build_anthropic_client(api_key: str, base_url: str = None): return _anthropic_sdk.Anthropic(**kwargs) +def build_anthropic_bedrock_client(region: str): + """Create an AnthropicBedrock client for Bedrock Claude models. + + Uses the Anthropic SDK's native Bedrock adapter, which provides full + Claude feature parity: prompt caching, thinking budgets, adaptive + thinking, fast mode — features not available via the Converse API. + + Auth uses the boto3 default credential chain (IAM roles, SSO, env vars). + """ + if _anthropic_sdk is None: + raise ImportError( + "The 'anthropic' package is required for the Bedrock provider. " + "Install it with: pip install 'anthropic>=0.39.0'" + ) + if not hasattr(_anthropic_sdk, "AnthropicBedrock"): + raise ImportError( + "anthropic.AnthropicBedrock not available. " + "Upgrade with: pip install 'anthropic>=0.39.0'" + ) + from httpx import Timeout + + return _anthropic_sdk.AnthropicBedrock( + aws_region=region, + timeout=Timeout(timeout=900.0, connect=10.0), + ) + + def read_claude_code_credentials() -> Optional[Dict[str, Any]]: """Read refreshable Claude Code OAuth credentials from ~/.claude/.credentials.json. diff --git a/agent/bedrock_adapter.py b/agent/bedrock_adapter.py new file mode 100644 index 000000000..9e4297581 --- /dev/null +++ b/agent/bedrock_adapter.py @@ -0,0 +1,1098 @@ +"""AWS Bedrock Converse API adapter for Hermes Agent. + +Provides native integration with Amazon Bedrock using the Converse API, +bypassing the OpenAI-compatible endpoint in favor of direct AWS SDK calls. +This enables full access to the Bedrock ecosystem: + + - **Native Converse API**: Unified interface for all Bedrock models + (Claude, Nova, Llama, Mistral, etc.) with streaming support. + - **AWS credential chain**: IAM roles, SSO profiles, environment variables, + instance metadata — zero API key management for AWS-native environments. + - **Dynamic model discovery**: Auto-discovers available foundation models + and cross-region inference profiles via the Bedrock control plane. + - **Guardrails support**: Optional Bedrock Guardrails configuration for + content filtering and safety policies. + - **Inference profiles**: Supports cross-region inference profiles + (us.anthropic.claude-*, global.anthropic.claude-*) for better capacity + and automatic failover. + +Architecture follows the same pattern as ``anthropic_adapter.py``: + - All Bedrock-specific logic is isolated in this module. + - Messages/tools are converted between OpenAI format and Converse format. + - Responses are normalized back to OpenAI-compatible objects for the agent loop. + +Reference: OpenClaw's ``extensions/amazon-bedrock/`` plugin, which implements +the same Converse API integration in TypeScript via ``@aws-sdk/client-bedrock``. + +Requires: ``boto3`` (optional dependency — only needed when using the Bedrock provider). +""" + +import json +import logging +import os +import re +from types import SimpleNamespace +from typing import Any, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Lazy boto3 import — only loaded when the Bedrock provider is actually used. +# This keeps startup fast for users who don't use Bedrock. +# --------------------------------------------------------------------------- + +_bedrock_runtime_client_cache: Dict[str, Any] = {} +_bedrock_control_client_cache: Dict[str, Any] = {} + + +def _require_boto3(): + """Import boto3, raising a clear error if not installed.""" + try: + import boto3 + return boto3 + except ImportError: + raise ImportError( + "The 'boto3' package is required for the AWS Bedrock provider. " + "Install it with: pip install boto3\n" + "Or install Hermes with Bedrock support: pip install -e '.[bedrock]'" + ) + + +def _get_bedrock_runtime_client(region: str): + """Get or create a cached ``bedrock-runtime`` client for the given region. + + Uses the default AWS credential chain (env vars → profile → instance role). + """ + if region not in _bedrock_runtime_client_cache: + boto3 = _require_boto3() + _bedrock_runtime_client_cache[region] = boto3.client( + "bedrock-runtime", region_name=region, + ) + return _bedrock_runtime_client_cache[region] + + +def _get_bedrock_control_client(region: str): + """Get or create a cached ``bedrock`` control-plane client for model discovery.""" + if region not in _bedrock_control_client_cache: + boto3 = _require_boto3() + _bedrock_control_client_cache[region] = boto3.client( + "bedrock", region_name=region, + ) + return _bedrock_control_client_cache[region] + + +def reset_client_cache(): + """Clear cached boto3 clients. Used in tests and profile switches.""" + _bedrock_runtime_client_cache.clear() + _bedrock_control_client_cache.clear() + + +# --------------------------------------------------------------------------- +# AWS credential detection +# --------------------------------------------------------------------------- + +# Priority order matches OpenClaw's resolveAwsSdkEnvVarName(): +# 1. AWS_BEARER_TOKEN_BEDROCK (Bedrock-specific bearer token) +# 2. AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY (explicit IAM credentials) +# 3. AWS_PROFILE (named profile → SSO, assume-role, etc.) +# 4. Implicit: instance role, ECS task role, Lambda execution role +_AWS_CREDENTIAL_ENV_VARS = [ + "AWS_BEARER_TOKEN_BEDROCK", + "AWS_ACCESS_KEY_ID", + "AWS_PROFILE", + # These are checked by boto3's default chain but we list them for + # has_aws_credentials() detection: + "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI", + "AWS_WEB_IDENTITY_TOKEN_FILE", +] + + +def resolve_aws_auth_env_var(env: Optional[Dict[str, str]] = None) -> Optional[str]: + """Return the name of the AWS auth source that is active, or None. + + Checks environment variables first, then falls back to boto3's credential + chain for implicit sources (EC2 IMDS, ECS task role, etc.). + + This mirrors OpenClaw's ``resolveAwsSdkEnvVarName()`` — used to detect + whether the user has any AWS credentials configured without actually + attempting to authenticate. + """ + env = env if env is not None else os.environ + # Bearer token takes highest priority + if env.get("AWS_BEARER_TOKEN_BEDROCK", "").strip(): + return "AWS_BEARER_TOKEN_BEDROCK" + # Explicit access key pair + if (env.get("AWS_ACCESS_KEY_ID", "").strip() + and env.get("AWS_SECRET_ACCESS_KEY", "").strip()): + return "AWS_ACCESS_KEY_ID" + # Named profile (SSO, assume-role, etc.) + if env.get("AWS_PROFILE", "").strip(): + return "AWS_PROFILE" + # Container credentials (ECS, CodeBuild) + if env.get("AWS_CONTAINER_CREDENTIALS_RELATIVE_URI", "").strip(): + return "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI" + # Web identity (EKS IRSA) + if env.get("AWS_WEB_IDENTITY_TOKEN_FILE", "").strip(): + return "AWS_WEB_IDENTITY_TOKEN_FILE" + # No env vars — check if boto3 can resolve credentials via IMDS or other + # implicit sources (EC2 instance role, ECS task role, Lambda, etc.) + try: + import botocore.session + session = botocore.session.get_session() + credentials = session.get_credentials() + if credentials is not None: + resolved = credentials.get_frozen_credentials() + if resolved and resolved.access_key: + return "iam-role" + except Exception: + pass + return None + + +def has_aws_credentials(env: Optional[Dict[str, str]] = None) -> bool: + """Return True if any AWS credential source is detected. + + Checks environment variables first (fast, no I/O), then falls back to + boto3's credential chain which covers EC2 instance roles, ECS task roles, + Lambda execution roles, and other IMDS-based sources that don't set + environment variables. + + This two-tier approach mirrors the pattern from OpenClaw PR #62673: + cloud environments (EC2, ECS, Lambda) provide credentials via instance + metadata, not environment variables. The env-var check is a fast path + for local development; the boto3 fallback covers all cloud deployments. + """ + if resolve_aws_auth_env_var(env) is not None: + return True + # Fall back to boto3's credential resolver — this covers EC2 instance + # metadata (IMDS), ECS container credentials, and other implicit sources + # that don't set environment variables. + try: + import botocore.session + session = botocore.session.get_session() + credentials = session.get_credentials() + if credentials is not None: + resolved = credentials.get_frozen_credentials() + if resolved and resolved.access_key: + return True + except Exception: + pass + return False + + +def resolve_bedrock_region(env: Optional[Dict[str, str]] = None) -> str: + """Resolve the AWS region for Bedrock API calls. + + Priority: AWS_REGION → AWS_DEFAULT_REGION → us-east-1 (fallback). + """ + env = env if env is not None else os.environ + return ( + env.get("AWS_REGION", "").strip() + or env.get("AWS_DEFAULT_REGION", "").strip() + or "us-east-1" + ) + + +# --------------------------------------------------------------------------- +# Tool-calling capability detection +# --------------------------------------------------------------------------- +# Some Bedrock models don't support tool/function calling. Sending toolConfig +# to these models causes ValidationException. We maintain a denylist of known +# non-tool-calling model patterns and strip tools for them. +# +# This is a conservative approach: unknown models are assumed to support tools. +# If a model fails with a tool-related ValidationException, add it here. + +_NON_TOOL_CALLING_PATTERNS = [ + "deepseek.r1", # DeepSeek R1 — reasoning only, no tool support + "deepseek-r1", # Alternate ID format + "stability.", # Image generation models + "cohere.embed", # Embedding models + "amazon.titan-embed", # Embedding models +] + + +def _model_supports_tool_use(model_id: str) -> bool: + """Return True if the model is expected to support tool/function calling. + + Models in the denylist are known to reject toolConfig in the Converse API. + Unknown models default to True (assume tool support). + """ + model_lower = model_id.lower() + return not any(pattern in model_lower for pattern in _NON_TOOL_CALLING_PATTERNS) + + +def is_anthropic_bedrock_model(model_id: str) -> bool: + """Return True if the model is an Anthropic Claude model on Bedrock. + + These models should use the AnthropicBedrock SDK path for full feature + parity (prompt caching, thinking budgets, adaptive thinking). + Non-Claude models use the Converse API path. + + Matches: + - ``anthropic.claude-*`` (foundation model IDs) + - ``us.anthropic.claude-*`` (US inference profiles) + - ``global.anthropic.claude-*`` (global inference profiles) + - ``eu.anthropic.claude-*`` (EU inference profiles) + """ + model_lower = model_id.lower() + # Strip regional prefix if present + for prefix in ("us.", "global.", "eu.", "ap.", "jp."): + if model_lower.startswith(prefix): + model_lower = model_lower[len(prefix):] + break + return model_lower.startswith("anthropic.claude") + + +# --------------------------------------------------------------------------- +# Message format conversion: OpenAI → Bedrock Converse +# --------------------------------------------------------------------------- + +def convert_tools_to_converse(tools: List[Dict]) -> List[Dict]: + """Convert OpenAI-format tool definitions to Bedrock Converse ``toolConfig``. + + OpenAI format:: + + {"type": "function", "function": {"name": "...", "description": "...", + "parameters": {"type": "object", "properties": {...}}}} + + Converse format:: + + {"toolSpec": {"name": "...", "description": "...", + "inputSchema": {"json": {"type": "object", "properties": {...}}}}} + """ + if not tools: + return [] + result = [] + for t in tools: + fn = t.get("function", {}) + name = fn.get("name", "") + description = fn.get("description", "") + parameters = fn.get("parameters", {"type": "object", "properties": {}}) + result.append({ + "toolSpec": { + "name": name, + "description": description, + "inputSchema": {"json": parameters}, + } + }) + return result + + +def _convert_content_to_converse(content) -> List[Dict]: + """Convert OpenAI message content (string or list) to Converse content blocks. + + Handles: + - Plain text strings → [{"text": "..."}] + - Content arrays with text/image_url parts → mixed text/image blocks + + Filters out empty text blocks — Bedrock's Converse API rejects messages + where a text content block has an empty ``text`` field (ValidationException: + "text content blocks must be non-empty"). Ref: issue #9486. + """ + if content is None: + return [{"text": " "}] + if isinstance(content, str): + return [{"text": content}] if content.strip() else [{"text": " "}] + if isinstance(content, list): + blocks = [] + for part in content: + if isinstance(part, str): + blocks.append({"text": part}) + continue + if not isinstance(part, dict): + continue + part_type = part.get("type", "") + if part_type == "text": + text = part.get("text", "") + blocks.append({"text": text if text else " "}) + elif part_type == "image_url": + image_url = part.get("image_url", {}) + url = image_url.get("url", "") if isinstance(image_url, dict) else "" + if url.startswith("data:"): + # data:image/jpeg;base64,/9j/4AAQ... + header, _, data = url.partition(",") + media_type = "image/jpeg" + if header.startswith("data:"): + mime_part = header[5:].split(";")[0] + if mime_part: + media_type = mime_part + blocks.append({ + "image": { + "format": media_type.split("/")[-1] if "/" in media_type else "jpeg", + "source": {"bytes": data}, + } + }) + else: + # Remote URL — Converse doesn't support URLs directly, + # include as text reference for the model. + blocks.append({"text": f"[Image: {url}]"}) + return blocks if blocks else [{"text": " "}] + return [{"text": str(content)}] + + +def convert_messages_to_converse( + messages: List[Dict], +) -> Tuple[Optional[List[Dict]], List[Dict]]: + """Convert OpenAI-format messages to Bedrock Converse format. + + Returns ``(system_prompt, converse_messages)`` where: + - ``system_prompt`` is a list of system content blocks (or None) + - ``converse_messages`` is the conversation in Converse format + + Handles: + - System messages → extracted as system prompt + - User messages → ``{"role": "user", "content": [...]}`` + - Assistant messages → ``{"role": "assistant", "content": [...]}`` + - Tool calls → ``{"toolUse": {"toolUseId": ..., "name": ..., "input": ...}}`` + - Tool results → ``{"toolResult": {"toolUseId": ..., "content": [...]}}`` + + Converse requires strict user/assistant alternation. Consecutive messages + with the same role are merged into a single message. + """ + system_blocks: List[Dict] = [] + converse_msgs: List[Dict] = [] + + for msg in messages: + role = msg.get("role", "") + content = msg.get("content") + + if role == "system": + # System messages become the system prompt + if isinstance(content, str) and content.strip(): + system_blocks.append({"text": content}) + elif isinstance(content, list): + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + system_blocks.append({"text": part.get("text", "")}) + elif isinstance(part, str): + system_blocks.append({"text": part}) + continue + + if role == "tool": + # Tool result messages → merge into the preceding user turn + tool_call_id = msg.get("tool_call_id", "") + result_content = content if isinstance(content, str) else json.dumps(content) + tool_result_block = { + "toolResult": { + "toolUseId": tool_call_id, + "content": [{"text": result_content}], + } + } + # In Converse, tool results go in a "user" role message + if converse_msgs and converse_msgs[-1]["role"] == "user": + converse_msgs[-1]["content"].append(tool_result_block) + else: + converse_msgs.append({ + "role": "user", + "content": [tool_result_block], + }) + continue + + if role == "assistant": + content_blocks = [] + # Convert text content + if isinstance(content, str) and content.strip(): + content_blocks.append({"text": content}) + elif isinstance(content, list): + content_blocks.extend(_convert_content_to_converse(content)) + + # Convert tool calls + tool_calls = msg.get("tool_calls", []) + for tc in (tool_calls or []): + fn = tc.get("function", {}) + args_str = fn.get("arguments", "{}") + try: + args_dict = json.loads(args_str) if isinstance(args_str, str) else args_str + except (json.JSONDecodeError, TypeError): + args_dict = {} + content_blocks.append({ + "toolUse": { + "toolUseId": tc.get("id", ""), + "name": fn.get("name", ""), + "input": args_dict, + } + }) + + if not content_blocks: + content_blocks = [{"text": " "}] + + # Merge with previous assistant message if needed (strict alternation) + if converse_msgs and converse_msgs[-1]["role"] == "assistant": + converse_msgs[-1]["content"].extend(content_blocks) + else: + converse_msgs.append({ + "role": "assistant", + "content": content_blocks, + }) + continue + + if role == "user": + content_blocks = _convert_content_to_converse(content) + # Merge with previous user message if needed (strict alternation) + if converse_msgs and converse_msgs[-1]["role"] == "user": + converse_msgs[-1]["content"].extend(content_blocks) + else: + converse_msgs.append({ + "role": "user", + "content": content_blocks, + }) + continue + + # Converse requires the first message to be from the user + if converse_msgs and converse_msgs[0]["role"] != "user": + converse_msgs.insert(0, {"role": "user", "content": [{"text": " "}]}) + + # Converse requires the last message to be from the user + if converse_msgs and converse_msgs[-1]["role"] != "user": + converse_msgs.append({"role": "user", "content": [{"text": " "}]}) + + return (system_blocks if system_blocks else None, converse_msgs) + + +# --------------------------------------------------------------------------- +# Response format conversion: Bedrock Converse → OpenAI +# --------------------------------------------------------------------------- + +def _converse_stop_reason_to_openai(stop_reason: str) -> str: + """Map Bedrock Converse stop reasons to OpenAI finish_reason values.""" + mapping = { + "end_turn": "stop", + "stop_sequence": "stop", + "tool_use": "tool_calls", + "max_tokens": "length", + "content_filtered": "content_filter", + "guardrail_intervened": "content_filter", + } + return mapping.get(stop_reason, "stop") + + +def normalize_converse_response(response: Dict) -> SimpleNamespace: + """Convert a Bedrock Converse API response to an OpenAI-compatible object. + + The agent loop in ``run_agent.py`` expects responses shaped like + ``openai.ChatCompletion`` — this function bridges the gap. + + Returns a SimpleNamespace with: + - ``.choices[0].message.content`` — text response + - ``.choices[0].message.tool_calls`` — tool call list (if any) + - ``.choices[0].finish_reason`` — stop/tool_calls/length + - ``.usage`` — token usage stats + """ + output = response.get("output", {}) + message = output.get("message", {}) + content_blocks = message.get("content", []) + stop_reason = response.get("stopReason", "end_turn") + + text_parts = [] + tool_calls = [] + + for block in content_blocks: + if "text" in block: + text_parts.append(block["text"]) + elif "toolUse" in block: + tu = block["toolUse"] + tool_calls.append(SimpleNamespace( + id=tu.get("toolUseId", ""), + type="function", + function=SimpleNamespace( + name=tu.get("name", ""), + arguments=json.dumps(tu.get("input", {})), + ), + )) + + # Build the message object + msg = SimpleNamespace( + role="assistant", + content="\n".join(text_parts) if text_parts else None, + tool_calls=tool_calls if tool_calls else None, + ) + + # Build usage stats + usage_data = response.get("usage", {}) + usage = SimpleNamespace( + prompt_tokens=usage_data.get("inputTokens", 0), + completion_tokens=usage_data.get("outputTokens", 0), + total_tokens=( + usage_data.get("inputTokens", 0) + usage_data.get("outputTokens", 0) + ), + ) + + finish_reason = _converse_stop_reason_to_openai(stop_reason) + if tool_calls and finish_reason == "stop": + finish_reason = "tool_calls" + + choice = SimpleNamespace( + index=0, + message=msg, + finish_reason=finish_reason, + ) + + return SimpleNamespace( + choices=[choice], + usage=usage, + model=response.get("modelId", ""), + ) + + +# --------------------------------------------------------------------------- +# Streaming response conversion +# --------------------------------------------------------------------------- + +def normalize_converse_stream_events(event_stream) -> SimpleNamespace: + """Consume a Bedrock ConverseStream event stream and build an OpenAI-compatible response. + + Processes the stream events in order: + - ``messageStart`` — role info + - ``contentBlockStart`` — new text or toolUse block + - ``contentBlockDelta`` — incremental text or toolUse input + - ``contentBlockStop`` — block complete + - ``messageStop`` — stop reason + - ``metadata`` — usage stats + + Returns the same shape as ``normalize_converse_response()``. + """ + return stream_converse_with_callbacks(event_stream) + + +def stream_converse_with_callbacks( + event_stream, + on_text_delta=None, + on_tool_start=None, + on_reasoning_delta=None, + on_interrupt_check=None, +) -> SimpleNamespace: + """Process a Bedrock ConverseStream event stream with real-time callbacks. + + This is the core streaming function that powers both the CLI's live token + display and the gateway's progressive message updates. + + Args: + event_stream: The boto3 ``converse_stream()`` response containing a + ``stream`` key with an iterable of events. + on_text_delta: Called with each text chunk as it arrives. Only fires + when no tool_use blocks have been seen (same semantics as the + Anthropic and chat_completions streaming paths). + on_tool_start: Called with the tool name when a toolUse block begins. + Lets the TUI show a spinner while tool arguments are generated. + on_reasoning_delta: Called with reasoning/thinking text chunks. + Bedrock surfaces thinking via ``reasoning`` content block deltas + on supported models (Claude 4.6+). + on_interrupt_check: Called on each event. Should return True if the + agent has been interrupted and streaming should stop. + + Returns: + An OpenAI-compatible SimpleNamespace response, identical in shape to + ``normalize_converse_response()``. + """ + text_parts: List[str] = [] + tool_calls: List[SimpleNamespace] = [] + current_tool: Optional[Dict] = None + current_text_buffer: List[str] = [] + has_tool_use = False + stop_reason = "end_turn" + usage_data: Dict[str, int] = {} + + for event in event_stream.get("stream", []): + # Check for interrupt + if on_interrupt_check and on_interrupt_check(): + break + + if "contentBlockStart" in event: + start = event["contentBlockStart"].get("start", {}) + if "toolUse" in start: + has_tool_use = True + # Flush any accumulated text + if current_text_buffer: + text_parts.append("".join(current_text_buffer)) + current_text_buffer = [] + current_tool = { + "toolUseId": start["toolUse"].get("toolUseId", ""), + "name": start["toolUse"].get("name", ""), + "input_json": "", + } + if on_tool_start: + on_tool_start(current_tool["name"]) + + elif "contentBlockDelta" in event: + delta = event["contentBlockDelta"].get("delta", {}) + if "text" in delta: + text = delta["text"] + current_text_buffer.append(text) + # Fire text delta callback only when no tool calls are present + # (same semantics as Anthropic/chat_completions streaming) + if on_text_delta and not has_tool_use: + on_text_delta(text) + elif "toolUse" in delta: + if current_tool is not None: + current_tool["input_json"] += delta["toolUse"].get("input", "") + elif "reasoningContent" in delta: + # Claude 4.6+ on Bedrock surfaces thinking via reasoningContent + reasoning = delta["reasoningContent"] + if isinstance(reasoning, dict): + thinking_text = reasoning.get("text", "") + if thinking_text and on_reasoning_delta: + on_reasoning_delta(thinking_text) + + elif "contentBlockStop" in event: + if current_tool is not None: + try: + input_dict = json.loads(current_tool["input_json"]) if current_tool["input_json"] else {} + except (json.JSONDecodeError, TypeError): + input_dict = {} + tool_calls.append(SimpleNamespace( + id=current_tool["toolUseId"], + type="function", + function=SimpleNamespace( + name=current_tool["name"], + arguments=json.dumps(input_dict), + ), + )) + current_tool = None + elif current_text_buffer: + text_parts.append("".join(current_text_buffer)) + current_text_buffer = [] + + elif "messageStop" in event: + stop_reason = event["messageStop"].get("stopReason", "end_turn") + + elif "metadata" in event: + meta_usage = event["metadata"].get("usage", {}) + usage_data = { + "inputTokens": meta_usage.get("inputTokens", 0), + "outputTokens": meta_usage.get("outputTokens", 0), + } + + # Flush remaining text + if current_text_buffer: + text_parts.append("".join(current_text_buffer)) + + msg = SimpleNamespace( + role="assistant", + content="\n".join(text_parts) if text_parts else None, + tool_calls=tool_calls if tool_calls else None, + ) + + usage = SimpleNamespace( + prompt_tokens=usage_data.get("inputTokens", 0), + completion_tokens=usage_data.get("outputTokens", 0), + total_tokens=( + usage_data.get("inputTokens", 0) + usage_data.get("outputTokens", 0) + ), + ) + + finish_reason = _converse_stop_reason_to_openai(stop_reason) + if tool_calls and finish_reason == "stop": + finish_reason = "tool_calls" + + choice = SimpleNamespace( + index=0, + message=msg, + finish_reason=finish_reason, + ) + + return SimpleNamespace( + choices=[choice], + usage=usage, + model="", + ) + + +# --------------------------------------------------------------------------- +# High-level API: call Bedrock Converse +# --------------------------------------------------------------------------- + +def build_converse_kwargs( + model: str, + messages: List[Dict], + tools: Optional[List[Dict]] = None, + max_tokens: int = 4096, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + stop_sequences: Optional[List[str]] = None, + guardrail_config: Optional[Dict] = None, +) -> Dict[str, Any]: + """Build kwargs for ``bedrock-runtime.converse()`` or ``converse_stream()``. + + Converts OpenAI-format inputs to Converse API parameters. + """ + system_prompt, converse_messages = convert_messages_to_converse(messages) + + kwargs: Dict[str, Any] = { + "modelId": model, + "messages": converse_messages, + "inferenceConfig": { + "maxTokens": max_tokens, + }, + } + + if system_prompt: + kwargs["system"] = system_prompt + + if temperature is not None: + kwargs["inferenceConfig"]["temperature"] = temperature + + if top_p is not None: + kwargs["inferenceConfig"]["topP"] = top_p + + if stop_sequences: + kwargs["inferenceConfig"]["stopSequences"] = stop_sequences + + if tools: + converse_tools = convert_tools_to_converse(tools) + if converse_tools: + # Some Bedrock models don't support tool/function calling (e.g. + # DeepSeek R1, reasoning-only models). Sending toolConfig to + # these models causes a ValidationException → retry loop → failure. + # Strip tools for known non-tool-calling models and warn the user. + # Ref: PR #7920 feedback from @ptlally, pattern from PR #4346. + if _model_supports_tool_use(model): + kwargs["toolConfig"] = {"tools": converse_tools} + else: + logger.warning( + "Model %s does not support tool calling — tools stripped. " + "The agent will operate in text-only mode.", model + ) + + if guardrail_config: + kwargs["guardrailConfig"] = guardrail_config + + return kwargs + + +def call_converse( + region: str, + model: str, + messages: List[Dict], + tools: Optional[List[Dict]] = None, + max_tokens: int = 4096, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + stop_sequences: Optional[List[str]] = None, + guardrail_config: Optional[Dict] = None, +) -> SimpleNamespace: + """Call Bedrock Converse API (non-streaming) and return an OpenAI-compatible response. + + This is the primary entry point for the agent loop when using the Bedrock provider. + """ + client = _get_bedrock_runtime_client(region) + kwargs = build_converse_kwargs( + model=model, + messages=messages, + tools=tools, + max_tokens=max_tokens, + temperature=temperature, + top_p=top_p, + stop_sequences=stop_sequences, + guardrail_config=guardrail_config, + ) + + response = client.converse(**kwargs) + return normalize_converse_response(response) + + +def call_converse_stream( + region: str, + model: str, + messages: List[Dict], + tools: Optional[List[Dict]] = None, + max_tokens: int = 4096, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + stop_sequences: Optional[List[str]] = None, + guardrail_config: Optional[Dict] = None, +) -> SimpleNamespace: + """Call Bedrock ConverseStream API and return an OpenAI-compatible response. + + Consumes the full stream and returns the assembled response. For true + streaming with delta callbacks, use ``iter_converse_stream()`` instead. + """ + client = _get_bedrock_runtime_client(region) + kwargs = build_converse_kwargs( + model=model, + messages=messages, + tools=tools, + max_tokens=max_tokens, + temperature=temperature, + top_p=top_p, + stop_sequences=stop_sequences, + guardrail_config=guardrail_config, + ) + + response = client.converse_stream(**kwargs) + return normalize_converse_stream_events(response) + + +# --------------------------------------------------------------------------- +# Model discovery +# --------------------------------------------------------------------------- + +_discovery_cache: Dict[str, Any] = {} +_DISCOVERY_CACHE_TTL_SECONDS = 3600 + + +def reset_discovery_cache(): + """Clear the model discovery cache. Used in tests.""" + _discovery_cache.clear() + + +def discover_bedrock_models( + region: str, + provider_filter: Optional[List[str]] = None, +) -> List[Dict[str, Any]]: + """Discover available Bedrock foundation models and inference profiles. + + Returns a list of model info dicts with keys: + - ``id``: Model ID (e.g. "anthropic.claude-sonnet-4-6-20250514-v1:0") + - ``name``: Human-readable name + - ``provider``: Model provider (e.g. "Anthropic", "Amazon", "Meta") + - ``input_modalities``: List of input types (e.g. ["TEXT", "IMAGE"]) + - ``output_modalities``: List of output types + - ``streaming``: Whether streaming is supported + + Caches results for 1 hour per region to avoid repeated API calls. + + Mirrors OpenClaw's ``discoverBedrockModels()`` in + ``extensions/amazon-bedrock/discovery.ts``. + """ + import time + + cache_key = f"{region}:{','.join(sorted(provider_filter or []))}" + cached = _discovery_cache.get(cache_key) + if cached and (time.time() - cached["timestamp"]) < _DISCOVERY_CACHE_TTL_SECONDS: + return cached["models"] + + try: + client = _get_bedrock_control_client(region) + except Exception as e: + logger.warning("Failed to create Bedrock client for model discovery: %s", e) + return [] + + models = [] + seen_ids = set() + filter_set = {f.lower() for f in (provider_filter or [])} + + # 1. Discover foundation models + try: + response = client.list_foundation_models() + for summary in response.get("modelSummaries", []): + model_id = (summary.get("modelId") or "").strip() + if not model_id: + continue + + # Apply provider filter + if filter_set: + provider_name = (summary.get("providerName") or "").lower() + model_prefix = model_id.split(".")[0].lower() if "." in model_id else "" + if provider_name not in filter_set and model_prefix not in filter_set: + continue + + # Only include active, streaming-capable, text-output models + lifecycle = summary.get("modelLifecycle", {}) + if lifecycle.get("status", "").upper() != "ACTIVE": + continue + if not summary.get("responseStreamingSupported", False): + continue + output_mods = summary.get("outputModalities", []) + if "TEXT" not in output_mods: + continue + + models.append({ + "id": model_id, + "name": (summary.get("modelName") or model_id).strip(), + "provider": (summary.get("providerName") or "").strip(), + "input_modalities": summary.get("inputModalities", []), + "output_modalities": output_mods, + "streaming": True, + }) + seen_ids.add(model_id.lower()) + except Exception as e: + logger.warning("Failed to list Bedrock foundation models: %s", e) + + # 2. Discover inference profiles (cross-region, better capacity) + try: + profiles = [] + next_token = None + while True: + kwargs = {} + if next_token: + kwargs["nextToken"] = next_token + response = client.list_inference_profiles(**kwargs) + for profile in response.get("inferenceProfileSummaries", []): + profiles.append(profile) + next_token = response.get("nextToken") + if not next_token: + break + + for profile in profiles: + profile_id = (profile.get("inferenceProfileId") or "").strip() + if not profile_id: + continue + if profile.get("status") != "ACTIVE": + continue + if profile_id.lower() in seen_ids: + continue + + # Apply provider filter to underlying models + if filter_set: + profile_models = profile.get("models", []) + matches = any( + _extract_provider_from_arn(m.get("modelArn", "")).lower() in filter_set + for m in profile_models + ) + if not matches: + continue + + models.append({ + "id": profile_id, + "name": (profile.get("inferenceProfileName") or profile_id).strip(), + "provider": "inference-profile", + "input_modalities": ["TEXT"], + "output_modalities": ["TEXT"], + "streaming": True, + }) + seen_ids.add(profile_id.lower()) + except Exception as e: + logger.debug("Skipping inference profile discovery: %s", e) + + # Sort: global cross-region profiles first (recommended), then alphabetical + models.sort(key=lambda m: ( + 0 if m["id"].startswith("global.") else 1, + m["name"].lower(), + )) + + _discovery_cache[cache_key] = { + "timestamp": time.time(), + "models": models, + } + return models + + +def _extract_provider_from_arn(arn: str) -> str: + """Extract the model provider from a Bedrock model ARN. + + Example: "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-v2" + → "anthropic" + """ + match = re.search(r"foundation-model/([^.]+)", arn) + return match.group(1) if match else "" + + +def get_bedrock_model_ids(region: str) -> List[str]: + """Return a flat list of available Bedrock model IDs for the given region. + + Convenience wrapper around ``discover_bedrock_models()`` for use in + the model selection UI. + """ + models = discover_bedrock_models(region) + return [m["id"] for m in models] + + +# --------------------------------------------------------------------------- +# Error classification — Bedrock-specific exceptions +# --------------------------------------------------------------------------- +# Mirrors OpenClaw's classifyFailoverReason() and matchesContextOverflowError() +# in extensions/amazon-bedrock/register.sync.runtime.ts. + +# Patterns that indicate the input context exceeded the model's token limit. +# Used by run_agent.py to trigger context compression instead of retrying. +CONTEXT_OVERFLOW_PATTERNS = [ + re.compile(r"ValidationException.*(?:input is too long|max input token|input token.*exceed)", re.IGNORECASE), + re.compile(r"ValidationException.*(?:exceeds? the (?:maximum|max) (?:number of )?(?:input )?tokens)", re.IGNORECASE), + re.compile(r"ModelStreamErrorException.*(?:Input is too long|too many input tokens)", re.IGNORECASE), +] + +# Patterns for throttling / rate limit errors — should trigger backoff + retry. +THROTTLE_PATTERNS = [ + re.compile(r"ThrottlingException", re.IGNORECASE), + re.compile(r"Too many concurrent requests", re.IGNORECASE), + re.compile(r"ServiceQuotaExceededException", re.IGNORECASE), +] + +# Patterns for transient overload — model is temporarily unavailable. +OVERLOAD_PATTERNS = [ + re.compile(r"ModelNotReadyException", re.IGNORECASE), + re.compile(r"ModelTimeoutException", re.IGNORECASE), + re.compile(r"InternalServerException", re.IGNORECASE), +] + + +def is_context_overflow_error(error_message: str) -> bool: + """Return True if the error indicates the input context was too large. + + When this returns True, the agent should compress context and retry + rather than treating it as a fatal error. + """ + return any(p.search(error_message) for p in CONTEXT_OVERFLOW_PATTERNS) + + +def classify_bedrock_error(error_message: str) -> str: + """Classify a Bedrock error for retry/failover decisions. + + Returns: + - ``"context_overflow"`` — input too long, compress and retry + - ``"rate_limit"`` — throttled, backoff and retry + - ``"overloaded"`` — model temporarily unavailable, retry with delay + - ``"unknown"`` — unclassified error + """ + if is_context_overflow_error(error_message): + return "context_overflow" + if any(p.search(error_message) for p in THROTTLE_PATTERNS): + return "rate_limit" + if any(p.search(error_message) for p in OVERLOAD_PATTERNS): + return "overloaded" + return "unknown" + + +# --------------------------------------------------------------------------- +# Bedrock model context lengths +# --------------------------------------------------------------------------- +# Static fallback table for models where the Bedrock API doesn't expose +# context window sizes. Used by agent/model_metadata.py when dynamic +# detection is unavailable. + +BEDROCK_CONTEXT_LENGTHS: Dict[str, int] = { + # Anthropic Claude models on Bedrock + "anthropic.claude-opus-4-6": 200_000, + "anthropic.claude-sonnet-4-6": 200_000, + "anthropic.claude-sonnet-4-5": 200_000, + "anthropic.claude-haiku-4-5": 200_000, + "anthropic.claude-opus-4": 200_000, + "anthropic.claude-sonnet-4": 200_000, + "anthropic.claude-3-5-sonnet": 200_000, + "anthropic.claude-3-5-haiku": 200_000, + "anthropic.claude-3-opus": 200_000, + "anthropic.claude-3-sonnet": 200_000, + "anthropic.claude-3-haiku": 200_000, + # Amazon Nova + "amazon.nova-pro": 300_000, + "amazon.nova-lite": 300_000, + "amazon.nova-micro": 128_000, + # Meta Llama + "meta.llama4-maverick": 128_000, + "meta.llama4-scout": 128_000, + "meta.llama3-3-70b-instruct": 128_000, + # Mistral + "mistral.mistral-large": 128_000, + # DeepSeek + "deepseek.v3": 128_000, +} + +# Default for unknown Bedrock models +BEDROCK_DEFAULT_CONTEXT_LENGTH = 128_000 + + +def get_bedrock_context_length(model_id: str) -> int: + """Look up the context window size for a Bedrock model. + + Uses substring matching so versioned IDs like + ``anthropic.claude-sonnet-4-6-20250514-v1:0`` resolve correctly. + """ + model_lower = model_id.lower() + best_key = "" + best_val = BEDROCK_DEFAULT_CONTEXT_LENGTH + for key, val in BEDROCK_CONTEXT_LENGTHS.items(): + if key in model_lower and len(key) > len(best_key): + best_key = key + best_val = val + return best_val diff --git a/agent/error_classifier.py b/agent/error_classifier.py index e436e5571..fa6a98504 100644 --- a/agent/error_classifier.py +++ b/agent/error_classifier.py @@ -112,6 +112,10 @@ _RATE_LIMIT_PATTERNS = [ "please retry after", "resource_exhausted", "rate increased too quickly", # Alibaba/DashScope throttling + # AWS Bedrock throttling + "throttlingexception", + "too many concurrent requests", + "servicequotaexceededexception", ] # Usage-limit patterns that need disambiguation (could be billing OR rate_limit) @@ -171,6 +175,11 @@ _CONTEXT_OVERFLOW_PATTERNS = [ # Chinese error messages (some providers return these) "超过最大长度", "上下文长度", + # AWS Bedrock Converse API error patterns + "input is too long", + "max input token", + "input token", + "exceeds the maximum number of input tokens", ] # Model not found patterns diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 46480da23..a0e3bea8c 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -1012,6 +1012,16 @@ def get_model_context_length( if ctx: return ctx + # 4b. AWS Bedrock — use static context length table. + # Bedrock's ListFoundationModels doesn't expose context window sizes, + # so we maintain a curated table in bedrock_adapter.py. + if provider == "bedrock" or (base_url and "bedrock-runtime" in base_url): + try: + from agent.bedrock_adapter import get_bedrock_context_length + return get_bedrock_context_length(model) + except ImportError: + pass # boto3 not installed — fall through to generic resolution + # 5. Provider-aware lookups (before generic OpenRouter cache) # These are provider-specific and take priority over the generic OR cache, # since the same model can have different context limits per provider diff --git a/agent/usage_pricing.py b/agent/usage_pricing.py index 736c2dc35..29c75b172 100644 --- a/agent/usage_pricing.py +++ b/agent/usage_pricing.py @@ -284,6 +284,80 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = { source_url="https://ai.google.dev/pricing", pricing_version="google-pricing-2026-03-16", ), + # AWS Bedrock — pricing per the Bedrock pricing page. + # Bedrock charges the same per-token rates as the model provider but + # through AWS billing. These are the on-demand prices (no commitment). + # Source: https://aws.amazon.com/bedrock/pricing/ + ( + "bedrock", + "anthropic.claude-opus-4-6", + ): PricingEntry( + input_cost_per_million=Decimal("15.00"), + output_cost_per_million=Decimal("75.00"), + source="official_docs_snapshot", + source_url="https://aws.amazon.com/bedrock/pricing/", + pricing_version="bedrock-pricing-2026-04", + ), + ( + "bedrock", + "anthropic.claude-sonnet-4-6", + ): PricingEntry( + input_cost_per_million=Decimal("3.00"), + output_cost_per_million=Decimal("15.00"), + source="official_docs_snapshot", + source_url="https://aws.amazon.com/bedrock/pricing/", + pricing_version="bedrock-pricing-2026-04", + ), + ( + "bedrock", + "anthropic.claude-sonnet-4-5", + ): PricingEntry( + input_cost_per_million=Decimal("3.00"), + output_cost_per_million=Decimal("15.00"), + source="official_docs_snapshot", + source_url="https://aws.amazon.com/bedrock/pricing/", + pricing_version="bedrock-pricing-2026-04", + ), + ( + "bedrock", + "anthropic.claude-haiku-4-5", + ): PricingEntry( + input_cost_per_million=Decimal("0.80"), + output_cost_per_million=Decimal("4.00"), + source="official_docs_snapshot", + source_url="https://aws.amazon.com/bedrock/pricing/", + pricing_version="bedrock-pricing-2026-04", + ), + ( + "bedrock", + "amazon.nova-pro", + ): PricingEntry( + input_cost_per_million=Decimal("0.80"), + output_cost_per_million=Decimal("3.20"), + source="official_docs_snapshot", + source_url="https://aws.amazon.com/bedrock/pricing/", + pricing_version="bedrock-pricing-2026-04", + ), + ( + "bedrock", + "amazon.nova-lite", + ): PricingEntry( + input_cost_per_million=Decimal("0.06"), + output_cost_per_million=Decimal("0.24"), + source="official_docs_snapshot", + source_url="https://aws.amazon.com/bedrock/pricing/", + pricing_version="bedrock-pricing-2026-04", + ), + ( + "bedrock", + "amazon.nova-micro", + ): PricingEntry( + input_cost_per_million=Decimal("0.035"), + output_cost_per_million=Decimal("0.14"), + source="official_docs_snapshot", + source_url="https://aws.amazon.com/bedrock/pricing/", + pricing_version="bedrock-pricing-2026-04", + ), } diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 1fd9a303c..b75b6b757 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -274,6 +274,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = { api_key_env_vars=("XIAOMI_API_KEY",), base_url_env_var="XIAOMI_BASE_URL", ), + "bedrock": ProviderConfig( + id="bedrock", + name="AWS Bedrock", + auth_type="aws_sdk", + inference_base_url="https://bedrock-runtime.us-east-1.amazonaws.com", + api_key_env_vars=(), + base_url_env_var="BEDROCK_BASE_URL", + ), } @@ -924,6 +932,7 @@ def resolve_provider( "qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth", "hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface", "mimo": "xiaomi", "xiaomi-mimo": "xiaomi", + "aws": "bedrock", "aws-bedrock": "bedrock", "amazon-bedrock": "bedrock", "amazon": "bedrock", "go": "opencode-go", "opencode-go-sub": "opencode-go", "kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode", # Local server aliases — route through the generic custom provider @@ -980,6 +989,15 @@ def resolve_provider( if has_usable_secret(os.getenv(env_var, "")): return pid + # AWS Bedrock — detect via boto3 credential chain (IAM roles, SSO, env vars). + # This runs after API-key providers so explicit keys always win. + try: + from agent.bedrock_adapter import has_aws_credentials + if has_aws_credentials(): + return "bedrock" + except ImportError: + pass # boto3 not installed — skip Bedrock auto-detection + raise AuthError( "No inference provider configured. Run 'hermes model' to choose a " "provider and model, or set an API key (OPENROUTER_API_KEY, " @@ -2446,6 +2464,13 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]: pconfig = PROVIDER_REGISTRY.get(target) if pconfig and pconfig.auth_type == "api_key": return get_api_key_provider_status(target) + # AWS SDK providers (Bedrock) — check via boto3 credential chain + if pconfig and pconfig.auth_type == "aws_sdk": + try: + from agent.bedrock_adapter import has_aws_credentials + return {"logged_in": has_aws_credentials(), "provider": target} + except ImportError: + return {"logged_in": False, "provider": target, "error": "boto3 not installed"} return {"logged_in": False} diff --git a/hermes_cli/auth_commands.py b/hermes_cli/auth_commands.py index c1cf0ff61..c6e23b42f 100644 --- a/hermes_cli/auth_commands.py +++ b/hermes_cli/auth_commands.py @@ -368,6 +368,27 @@ def _interactive_auth() -> None: print("=" * 50) auth_list_command(SimpleNamespace(provider=None)) + + # Show AWS Bedrock credential status (not in the pool — uses boto3 chain) + try: + from agent.bedrock_adapter import has_aws_credentials, resolve_aws_auth_env_var, resolve_bedrock_region + if has_aws_credentials(): + auth_source = resolve_aws_auth_env_var() or "unknown" + region = resolve_bedrock_region() + print(f"bedrock (AWS SDK credential chain):") + print(f" Auth: {auth_source}") + print(f" Region: {region}") + try: + import boto3 + sts = boto3.client("sts", region_name=region) + identity = sts.get_caller_identity() + arn = identity.get("Arn", "unknown") + print(f" Identity: {arn}") + except Exception: + print(f" Identity: (could not resolve — boto3 STS call failed)") + print() + except ImportError: + pass # boto3 or bedrock_adapter not available print() # Main menu diff --git a/hermes_cli/config.py b/hermes_cli/config.py index d06338aa1..71f025adf 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -419,6 +419,27 @@ DEFAULT_CONFIG = { "protect_last_n": 20, # minimum recent messages to keep uncompressed }, + + # AWS Bedrock provider configuration. + # Only used when model.provider is "bedrock". + "bedrock": { + "region": "", # AWS region for Bedrock API calls (empty = AWS_REGION env var → us-east-1) + "discovery": { + "enabled": True, # Auto-discover models via ListFoundationModels + "provider_filter": [], # Only show models from these providers (e.g. ["anthropic", "amazon"]) + "refresh_interval": 3600, # Cache discovery results for this many seconds + }, + "guardrail": { + # Amazon Bedrock Guardrails — content filtering and safety policies. + # Create a guardrail in the Bedrock console, then set the ID and version here. + # See: https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails.html + "guardrail_identifier": "", # e.g. "abc123def456" + "guardrail_version": "", # e.g. "1" or "DRAFT" + "stream_processing_mode": "async", # "sync" or "async" + "trace": "disabled", # "enabled", "disabled", or "enabled_full" + }, + }, + "smart_model_routing": { "enabled": False, "max_simple_chars": 160, @@ -974,6 +995,22 @@ OPTIONAL_ENV_VARS = { "category": "provider", "advanced": True, }, + "AWS_REGION": { + "description": "AWS region for Bedrock API calls (e.g. us-east-1, eu-central-1)", + "prompt": "AWS Region", + "url": "https://docs.aws.amazon.com/bedrock/latest/userguide/bedrock-regions.html", + "password": False, + "category": "provider", + "advanced": True, + }, + "AWS_PROFILE": { + "description": "AWS named profile for Bedrock authentication (from ~/.aws/credentials)", + "prompt": "AWS Profile", + "url": None, + "password": False, + "category": "provider", + "advanced": True, + }, # ── Tool API keys ── "EXA_API_KEY": { diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index 69a24aff5..70bd9d0e0 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -860,6 +860,31 @@ def run_doctor(args): except Exception as _e: print(f"\r {color('⚠', Colors.YELLOW)} {_label} {color(f'({_e})', Colors.DIM)} ") + # -- AWS Bedrock -- + # Bedrock uses the AWS SDK credential chain, not API keys. + try: + from agent.bedrock_adapter import has_aws_credentials, resolve_aws_auth_env_var, resolve_bedrock_region + if has_aws_credentials(): + _auth_var = resolve_aws_auth_env_var() + _region = resolve_bedrock_region() + _label = "AWS Bedrock".ljust(20) + print(f" Checking AWS Bedrock...", end="", flush=True) + try: + import boto3 + _br_client = boto3.client("bedrock", region_name=_region) + _br_resp = _br_client.list_foundation_models() + _model_count = len(_br_resp.get("modelSummaries", [])) + print(f"\r {color('✓', Colors.GREEN)} {_label} {color(f'({_auth_var}, {_region}, {_model_count} models)', Colors.DIM)} ") + except ImportError: + print(f"\r {color('⚠', Colors.YELLOW)} {_label} {color('(boto3 not installed — pip install hermes-agent[bedrock])', Colors.DIM)} ") + issues.append("Install boto3 for Bedrock: pip install hermes-agent[bedrock]") + except Exception as _e: + _err_name = type(_e).__name__ + print(f"\r {color('⚠', Colors.YELLOW)} {_label} {color(f'({_err_name}: {_e})', Colors.DIM)} ") + issues.append(f"AWS Bedrock: {_err_name} — check IAM permissions for bedrock:ListFoundationModels") + except ImportError: + pass # bedrock_adapter not available — skip silently + # ========================================================================= # Check: Submodules # ========================================================================= diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 2eb47aa54..638f2a31c 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -1139,6 +1139,8 @@ def select_provider_and_model(args=None): _model_flow_anthropic(config, current_model) elif selected_provider == "kimi-coding": _model_flow_kimi(config, current_model) + elif selected_provider == "bedrock": + _model_flow_bedrock(config, current_model) elif selected_provider in ("gemini", "deepseek", "xai", "zai", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi", "arcee"): _model_flow_api_key_provider(config, selected_provider, current_model) @@ -2425,6 +2427,252 @@ def _model_flow_kimi(config, current_model=""): print("No change.") +def _model_flow_bedrock_api_key(config, region, current_model=""): + """Bedrock API Key mode — uses the OpenAI-compatible bedrock-mantle endpoint. + + For developers who don't have an AWS account but received a Bedrock API Key + from their AWS admin. Works like any OpenAI-compatible endpoint. + """ + from hermes_cli.auth import _prompt_model_selection, _save_model_choice, deactivate_provider + from hermes_cli.config import load_config, save_config, get_env_value, save_env_value + from hermes_cli.models import _PROVIDER_MODELS + + mantle_base_url = f"https://bedrock-mantle.{region}.api.aws/v1" + + # Prompt for API key + existing_key = get_env_value("AWS_BEARER_TOKEN_BEDROCK") or "" + if existing_key: + print(f" Bedrock API Key: {existing_key[:12]}... ✓") + else: + print(f" Endpoint: {mantle_base_url}") + print() + try: + import getpass + api_key = getpass.getpass(" Bedrock API Key: ").strip() + except (KeyboardInterrupt, EOFError): + print() + return + if not api_key: + print(" Cancelled.") + return + save_env_value("AWS_BEARER_TOKEN_BEDROCK", api_key) + existing_key = api_key + print(" ✓ API key saved.") + print() + + # Model selection — use static list (mantle doesn't need boto3 for discovery) + model_list = _PROVIDER_MODELS.get("bedrock", []) + print(f" Showing {len(model_list)} curated models") + + if model_list: + selected = _prompt_model_selection(model_list, current_model=current_model) + else: + try: + selected = input(" Model ID: ").strip() + except (KeyboardInterrupt, EOFError): + selected = None + + if selected: + _save_model_choice(selected) + + # Save as custom provider pointing to bedrock-mantle + cfg = load_config() + model = cfg.get("model") + if not isinstance(model, dict): + model = {"default": model} if model else {} + cfg["model"] = model + model["provider"] = "custom" + model["base_url"] = mantle_base_url + model.pop("api_mode", None) # chat_completions is the default + + # Also save region in bedrock config for reference + bedrock_cfg = cfg.get("bedrock", {}) + if not isinstance(bedrock_cfg, dict): + bedrock_cfg = {} + bedrock_cfg["region"] = region + cfg["bedrock"] = bedrock_cfg + + # Save the API key env var name so hermes knows where to find it + save_env_value("OPENAI_API_KEY", existing_key) + save_env_value("OPENAI_BASE_URL", mantle_base_url) + + save_config(cfg) + deactivate_provider() + + print(f" Default model set to: {selected} (via Bedrock API Key, {region})") + print(f" Endpoint: {mantle_base_url}") + else: + print(" No change.") + + +def _model_flow_bedrock(config, current_model=""): + """AWS Bedrock provider: verify credentials, pick region, discover models. + + Uses the native Converse API via boto3 — not the OpenAI-compatible endpoint. + Auth is handled by the AWS SDK default credential chain (env vars, profile, + instance role), so no API key prompt is needed. + """ + from hermes_cli.auth import _prompt_model_selection, _save_model_choice, deactivate_provider + from hermes_cli.config import load_config, save_config + from hermes_cli.models import _PROVIDER_MODELS + + # 1. Check for AWS credentials + try: + from agent.bedrock_adapter import ( + has_aws_credentials, + resolve_aws_auth_env_var, + resolve_bedrock_region, + discover_bedrock_models, + ) + except ImportError: + print(" ✗ boto3 is not installed. Install it with:") + print(" pip install boto3") + print() + return + + if not has_aws_credentials(): + print(" ⚠ No AWS credentials detected via environment variables.") + print(" Bedrock will use boto3's default credential chain (IMDS, SSO, etc.)") + print() + + auth_var = resolve_aws_auth_env_var() + if auth_var: + print(f" AWS credentials: {auth_var} ✓") + else: + print(" AWS credentials: boto3 default chain (instance role / SSO)") + print() + + # 2. Region selection + current_region = resolve_bedrock_region() + try: + region_input = input(f" AWS Region [{current_region}]: ").strip() + except (KeyboardInterrupt, EOFError): + print() + return + region = region_input or current_region + + # 2b. Authentication mode + print(" Choose authentication method:") + print() + print(" 1. IAM credential chain (recommended)") + print(" Works with EC2 instance roles, SSO, env vars, aws configure") + print(" 2. Bedrock API Key") + print(" Enter your Bedrock API Key directly — also supports") + print(" team scenarios where an admin distributes keys") + print() + try: + auth_choice = input(" Choice [1]: ").strip() + except (KeyboardInterrupt, EOFError): + print() + return + + if auth_choice == "2": + _model_flow_bedrock_api_key(config, region, current_model) + return + + # 3. Model discovery — try live API first, fall back to static list + print(f" Discovering models in {region}...") + live_models = discover_bedrock_models(region) + + if live_models: + _EXCLUDE_PREFIXES = ( + "stability.", "cohere.embed", "twelvelabs.", "us.stability.", + "us.cohere.embed", "us.twelvelabs.", "global.cohere.embed", + "global.twelvelabs.", + ) + _EXCLUDE_SUBSTRINGS = ("safeguard", "voxtral", "palmyra-vision") + filtered = [] + for m in live_models: + mid = m["id"] + if any(mid.startswith(p) for p in _EXCLUDE_PREFIXES): + continue + if any(s in mid.lower() for s in _EXCLUDE_SUBSTRINGS): + continue + filtered.append(m) + + # Deduplicate: prefer inference profiles (us.*, global.*) over bare + # foundation model IDs. + profile_base_ids = set() + for m in filtered: + mid = m["id"] + if mid.startswith(("us.", "global.")): + base = mid.split(".", 1)[1] if "." in mid[3:] else mid + profile_base_ids.add(base) + + deduped = [] + for m in filtered: + mid = m["id"] + if not mid.startswith(("us.", "global.")) and mid in profile_base_ids: + continue + deduped.append(m) + + _RECOMMENDED = [ + "us.anthropic.claude-sonnet-4-6", + "us.anthropic.claude-opus-4-6", + "us.anthropic.claude-haiku-4-5", + "us.amazon.nova-pro", + "us.amazon.nova-lite", + "us.amazon.nova-micro", + "deepseek.v3", + "us.meta.llama4-maverick", + "us.meta.llama4-scout", + ] + + def _sort_key(m): + mid = m["id"] + for i, rec in enumerate(_RECOMMENDED): + if mid.startswith(rec): + return (0, i, mid) + if mid.startswith("global."): + return (1, 0, mid) + return (2, 0, mid) + + deduped.sort(key=_sort_key) + model_list = [m["id"] for m in deduped] + print(f" Found {len(model_list)} text model(s) (filtered from {len(live_models)} total)") + else: + model_list = _PROVIDER_MODELS.get("bedrock", []) + if model_list: + print(f" Using {len(model_list)} curated models (live discovery unavailable)") + else: + print(" No models found. Check IAM permissions for bedrock:ListFoundationModels.") + return + + # 4. Model selection + if model_list: + selected = _prompt_model_selection(model_list, current_model=current_model) + else: + try: + selected = input(" Model ID: ").strip() + except (KeyboardInterrupt, EOFError): + selected = None + + if selected: + _save_model_choice(selected) + + cfg = load_config() + model = cfg.get("model") + if not isinstance(model, dict): + model = {"default": model} if model else {} + cfg["model"] = model + model["provider"] = "bedrock" + model["base_url"] = f"https://bedrock-runtime.{region}.amazonaws.com" + model.pop("api_mode", None) # bedrock_converse is auto-detected + + bedrock_cfg = cfg.get("bedrock", {}) + if not isinstance(bedrock_cfg, dict): + bedrock_cfg = {} + bedrock_cfg["region"] = region + cfg["bedrock"] = bedrock_cfg + + save_config(cfg) + deactivate_provider() + + print(f" Default model set to: {selected} (via AWS Bedrock, {region})") + else: + print(" No change.") + + def _model_flow_api_key_provider(config, provider_id, current_model=""): """Generic flow for API-key providers (z.ai, MiniMax, OpenCode, etc.).""" from hermes_cli.auth import ( diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 62c215042..9fc68933e 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -303,6 +303,22 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "XiaomiMiMo/MiMo-V2-Flash", "moonshotai/Kimi-K2-Thinking", ], + # AWS Bedrock — static fallback list used when dynamic discovery is + # unavailable (no boto3, no credentials, or API error). The agent + # prefers live discovery via ListFoundationModels + ListInferenceProfiles. + # Use inference profile IDs (us.*) since most models require them. + "bedrock": [ + "us.anthropic.claude-sonnet-4-6", + "us.anthropic.claude-opus-4-6-v1", + "us.anthropic.claude-haiku-4-5-20251001-v1:0", + "us.anthropic.claude-sonnet-4-5-20250929-v1:0", + "us.amazon.nova-pro-v1:0", + "us.amazon.nova-lite-v1:0", + "us.amazon.nova-micro-v1:0", + "deepseek.v3.2", + "us.meta.llama4-maverick-17b-instruct-v1:0", + "us.meta.llama4-scout-17b-instruct-v1:0", + ], } # --------------------------------------------------------------------------- @@ -536,6 +552,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [ ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"), ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"), ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, pay-per-use)"), + ProviderEntry("bedrock", "AWS Bedrock", "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"), ] # Derived dicts — used throughout the codebase @@ -587,6 +604,10 @@ _PROVIDER_ALIASES = { "huggingface-hub": "huggingface", "mimo": "xiaomi", "xiaomi-mimo": "xiaomi", + "aws": "bedrock", + "aws-bedrock": "bedrock", + "amazon-bedrock": "bedrock", + "amazon": "bedrock", "grok": "xai", "x-ai": "xai", "x.ai": "xai", @@ -1957,6 +1978,42 @@ def validate_requested_model( # api_models is None — couldn't reach API. Accept and persist, # but warn so typos don't silently break things. + + # Bedrock: use our own discovery instead of HTTP /models endpoint. + # Bedrock's bedrock-runtime URL doesn't support /models — it uses the + # AWS SDK control plane (ListFoundationModels + ListInferenceProfiles). + if normalized == "bedrock": + try: + from agent.bedrock_adapter import discover_bedrock_models, resolve_bedrock_region + region = resolve_bedrock_region() + discovered = discover_bedrock_models(region) + discovered_ids = {m["id"] for m in discovered} + if requested in discovered_ids: + return { + "accepted": True, + "persist": True, + "recognized": True, + "message": None, + } + # Not in discovered list — still accept (user may have custom + # inference profiles or cross-account access), but warn. + suggestions = get_close_matches(requested, list(discovered_ids), n=3, cutoff=0.4) + suggestion_text = "" + if suggestions: + suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions) + return { + "accepted": True, + "persist": True, + "recognized": False, + "message": ( + f"Note: `{requested}` was not found in Bedrock model discovery for {region}. " + f"It may still work with custom inference profiles or cross-account access." + f"{suggestion_text}" + ), + } + except Exception: + pass # Fall through to generic warning + provider_label = _PROVIDER_LABELS.get(normalized, normalized) return { "accepted": True, diff --git a/hermes_cli/providers.py b/hermes_cli/providers.py index 6fb940d31..8311e3652 100644 --- a/hermes_cli/providers.py +++ b/hermes_cli/providers.py @@ -236,6 +236,12 @@ ALIASES: Dict[str, str] = { "mimo": "xiaomi", "xiaomi-mimo": "xiaomi", + # bedrock + "aws": "bedrock", + "aws-bedrock": "bedrock", + "amazon-bedrock": "bedrock", + "amazon": "bedrock", + # arcee "arcee-ai": "arcee", "arceeai": "arcee", @@ -262,6 +268,7 @@ _LABEL_OVERRIDES: Dict[str, str] = { "copilot-acp": "GitHub Copilot ACP", "xiaomi": "Xiaomi MiMo", "local": "Local endpoint", + "bedrock": "AWS Bedrock", } @@ -271,6 +278,7 @@ TRANSPORT_TO_API_MODE: Dict[str, str] = { "openai_chat": "chat_completions", "anthropic_messages": "anthropic_messages", "codex_responses": "codex_responses", + "bedrock_converse": "bedrock_converse", } @@ -388,6 +396,10 @@ def determine_api_mode(provider: str, base_url: str = "") -> str: if pdef is not None: return TRANSPORT_TO_API_MODE.get(pdef.transport, "chat_completions") + # Direct provider checks for providers not in HERMES_OVERLAYS + if provider == "bedrock": + return "bedrock_converse" + # URL-based heuristics for custom / unknown providers if base_url: url_lower = base_url.rstrip("/").lower() @@ -395,6 +407,8 @@ def determine_api_mode(provider: str, base_url: str = "") -> str: return "anthropic_messages" if "api.openai.com" in url_lower: return "codex_responses" + if "bedrock-runtime" in url_lower and "amazonaws.com" in url_lower: + return "bedrock_converse" return "chat_completions" diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index b2dec61cd..bdfcfb09d 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -124,7 +124,7 @@ def _copilot_runtime_api_mode(model_cfg: Dict[str, Any], api_key: str) -> str: return "chat_completions" -_VALID_API_MODES = {"chat_completions", "codex_responses", "anthropic_messages"} +_VALID_API_MODES = {"chat_completions", "codex_responses", "anthropic_messages", "bedrock_converse"} def _parse_api_mode(raw: Any) -> Optional[str]: @@ -836,6 +836,77 @@ def resolve_runtime_provider( "requested_provider": requested_provider, } + # AWS Bedrock (native Converse API via boto3) + if provider == "bedrock": + from agent.bedrock_adapter import ( + has_aws_credentials, + resolve_aws_auth_env_var, + resolve_bedrock_region, + is_anthropic_bedrock_model, + ) + # When the user explicitly selected bedrock (not auto-detected), + # trust boto3's credential chain — it handles IMDS, ECS task roles, + # Lambda execution roles, SSO, and other implicit sources that our + # env-var check can't detect. + is_explicit = requested_provider in ("bedrock", "aws", "aws-bedrock", "amazon-bedrock", "amazon") + if not is_explicit and not has_aws_credentials(): + raise AuthError( + "No AWS credentials found for Bedrock. Configure one of:\n" + " - AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY\n" + " - AWS_PROFILE (for SSO / named profiles)\n" + " - IAM instance role (EC2, ECS, Lambda)\n" + "Or run 'aws configure' to set up credentials.", + code="no_aws_credentials", + ) + # Read bedrock-specific config from config.yaml + from hermes_cli.config import load_config as _load_bedrock_config + _bedrock_cfg = _load_bedrock_config().get("bedrock", {}) + # Region priority: config.yaml bedrock.region → env var → us-east-1 + region = (_bedrock_cfg.get("region") or "").strip() or resolve_bedrock_region() + auth_source = resolve_aws_auth_env_var() or "aws-sdk-default-chain" + # Build guardrail config if configured + _gr = _bedrock_cfg.get("guardrail", {}) + guardrail_config = None + if _gr.get("guardrail_identifier") and _gr.get("guardrail_version"): + guardrail_config = { + "guardrailIdentifier": _gr["guardrail_identifier"], + "guardrailVersion": _gr["guardrail_version"], + } + if _gr.get("stream_processing_mode"): + guardrail_config["streamProcessingMode"] = _gr["stream_processing_mode"] + if _gr.get("trace"): + guardrail_config["trace"] = _gr["trace"] + # Dual-path routing: Claude models use AnthropicBedrock SDK for full + # feature parity (prompt caching, thinking budgets, adaptive thinking). + # Non-Claude models use the Converse API for multi-model support. + _current_model = str(model_cfg.get("default") or "").strip() + if is_anthropic_bedrock_model(_current_model): + # Claude on Bedrock → AnthropicBedrock SDK → anthropic_messages path + runtime = { + "provider": "bedrock", + "api_mode": "anthropic_messages", + "base_url": f"https://bedrock-runtime.{region}.amazonaws.com", + "api_key": "aws-sdk", + "source": auth_source, + "region": region, + "bedrock_anthropic": True, # Signal to use AnthropicBedrock client + "requested_provider": requested_provider, + } + else: + # Non-Claude (Nova, DeepSeek, Llama, etc.) → Converse API + runtime = { + "provider": "bedrock", + "api_mode": "bedrock_converse", + "base_url": f"https://bedrock-runtime.{region}.amazonaws.com", + "api_key": "aws-sdk", + "source": auth_source, + "region": region, + "requested_provider": requested_provider, + } + if guardrail_config: + runtime["guardrail_config"] = guardrail_config + return runtime + # API-key providers (z.ai/GLM, Kimi, MiniMax, MiniMax-CN) pconfig = PROVIDER_REGISTRY.get(provider) if pconfig and pconfig.auth_type == "api_key": diff --git a/pyproject.toml b/pyproject.toml index fa3fd4822..0d84b5e1e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,6 +63,7 @@ homeassistant = ["aiohttp>=3.9.0,<4"] sms = ["aiohttp>=3.9.0,<4"] acp = ["agent-client-protocol>=0.9.0,<1.0"] mistral = ["mistralai>=2.3.0,<3"] +bedrock = ["boto3>=1.35.0,<2"] termux = [ # Tested Android / Termux path: keeps the core CLI feature-rich while # avoiding extras that currently depend on non-Android wheels (notably @@ -108,6 +109,7 @@ all = [ "hermes-agent[dingtalk]", "hermes-agent[feishu]", "hermes-agent[mistral]", + "hermes-agent[bedrock]", "hermes-agent[web]", ] diff --git a/run_agent.py b/run_agent.py index d229dcfe0..6033da341 100644 --- a/run_agent.py +++ b/run_agent.py @@ -685,7 +685,7 @@ class AIAgent: self.provider = provider_name or "" self.acp_command = acp_command or command self.acp_args = list(acp_args or args or []) - if api_mode in {"chat_completions", "codex_responses", "anthropic_messages"}: + if api_mode in {"chat_completions", "codex_responses", "anthropic_messages", "bedrock_converse"}: self.api_mode = api_mode elif self.provider == "openai-codex": self.api_mode = "codex_responses" @@ -700,6 +700,9 @@ class AIAgent: # use a URL convention ending in /anthropic. Auto-detect these so the # Anthropic Messages API adapter is used instead of chat completions. self.api_mode = "anthropic_messages" + elif self.provider == "bedrock" or "bedrock-runtime" in self._base_url_lower: + # AWS Bedrock — auto-detect from provider name or base URL. + self.api_mode = "bedrock_converse" else: self.api_mode = "chat_completions" @@ -892,24 +895,70 @@ class AIAgent: if self.api_mode == "anthropic_messages": from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token - # Only fall back to ANTHROPIC_TOKEN when the provider is actually Anthropic. - # Other anthropic_messages providers (MiniMax, Alibaba, etc.) must use their own API key. - # Falling back would send Anthropic credentials to third-party endpoints (Fixes #1739, #minimax-401). - _is_native_anthropic = self.provider == "anthropic" - effective_key = (api_key or resolve_anthropic_token() or "") if _is_native_anthropic else (api_key or "") - self.api_key = effective_key - self._anthropic_api_key = effective_key - self._anthropic_base_url = base_url - from agent.anthropic_adapter import _is_oauth_token as _is_oat - self._is_anthropic_oauth = _is_oat(effective_key) - self._anthropic_client = build_anthropic_client(effective_key, base_url) - # No OpenAI client needed for Anthropic mode + # Bedrock + Claude → use AnthropicBedrock SDK for full feature parity + # (prompt caching, thinking budgets, adaptive thinking). + _is_bedrock_anthropic = self.provider == "bedrock" + if _is_bedrock_anthropic: + from agent.anthropic_adapter import build_anthropic_bedrock_client + import re as _re + _region_match = _re.search(r"bedrock-runtime\.([a-z0-9-]+)\.", base_url or "") + _br_region = _region_match.group(1) if _region_match else "us-east-1" + self._bedrock_region = _br_region + self._anthropic_client = build_anthropic_bedrock_client(_br_region) + self._anthropic_api_key = "aws-sdk" + self._anthropic_base_url = base_url + self._is_anthropic_oauth = False + self.api_key = "aws-sdk" + self.client = None + self._client_kwargs = {} + if not self.quiet_mode: + print(f"🤖 AI Agent initialized with model: {self.model} (AWS Bedrock + AnthropicBedrock SDK, {_br_region})") + else: + # Only fall back to ANTHROPIC_TOKEN when the provider is actually Anthropic. + # Other anthropic_messages providers (MiniMax, Alibaba, etc.) must use their own API key. + # Falling back would send Anthropic credentials to third-party endpoints (Fixes #1739, #minimax-401). + _is_native_anthropic = self.provider == "anthropic" + effective_key = (api_key or resolve_anthropic_token() or "") if _is_native_anthropic else (api_key or "") + self.api_key = effective_key + self._anthropic_api_key = effective_key + self._anthropic_base_url = base_url + from agent.anthropic_adapter import _is_oauth_token as _is_oat + self._is_anthropic_oauth = _is_oat(effective_key) + self._anthropic_client = build_anthropic_client(effective_key, base_url) + # No OpenAI client needed for Anthropic mode + self.client = None + self._client_kwargs = {} + if not self.quiet_mode: + print(f"🤖 AI Agent initialized with model: {self.model} (Anthropic native)") + if effective_key and len(effective_key) > 12: + print(f"🔑 Using token: {effective_key[:8]}...{effective_key[-4:]}") + elif self.api_mode == "bedrock_converse": + # AWS Bedrock — uses boto3 directly, no OpenAI client needed. + # Region is extracted from the base_url or defaults to us-east-1. + import re as _re + _region_match = _re.search(r"bedrock-runtime\.([a-z0-9-]+)\.", base_url or "") + self._bedrock_region = _region_match.group(1) if _region_match else "us-east-1" + # Guardrail config — read from config.yaml at init time. + self._bedrock_guardrail_config = None + try: + from hermes_cli.config import load_config as _load_br_cfg + _gr = _load_br_cfg().get("bedrock", {}).get("guardrail", {}) + if _gr.get("guardrail_identifier") and _gr.get("guardrail_version"): + self._bedrock_guardrail_config = { + "guardrailIdentifier": _gr["guardrail_identifier"], + "guardrailVersion": _gr["guardrail_version"], + } + if _gr.get("stream_processing_mode"): + self._bedrock_guardrail_config["streamProcessingMode"] = _gr["stream_processing_mode"] + if _gr.get("trace"): + self._bedrock_guardrail_config["trace"] = _gr["trace"] + except Exception: + pass self.client = None self._client_kwargs = {} if not self.quiet_mode: - print(f"🤖 AI Agent initialized with model: {self.model} (Anthropic native)") - if effective_key and len(effective_key) > 12: - print(f"🔑 Using token: {effective_key[:8]}...{effective_key[-4:]}") + _gr_label = " + Guardrails" if self._bedrock_guardrail_config else "" + print(f"🤖 AI Agent initialized with model: {self.model} (AWS Bedrock, {self._bedrock_region}{_gr_label})") else: if api_key and base_url: # Explicit credentials from CLI/gateway — construct directly. @@ -4896,6 +4945,17 @@ class AIAgent: ) elif self.api_mode == "anthropic_messages": result["response"] = self._anthropic_messages_create(api_kwargs) + elif self.api_mode == "bedrock_converse": + # Bedrock uses boto3 directly — no OpenAI client needed. + from agent.bedrock_adapter import ( + _get_bedrock_runtime_client, + normalize_converse_response, + ) + region = api_kwargs.pop("__bedrock_region__", "us-east-1") + api_kwargs.pop("__bedrock_converse__", None) + client = _get_bedrock_runtime_client(region) + raw_response = client.converse(**api_kwargs) + result["response"] = normalize_converse_response(raw_response) else: request_client_holder["client"] = self._create_request_openai_client(reason="chat_completion_request") result["response"] = request_client_holder["client"].chat.completions.create(**api_kwargs) @@ -5135,6 +5195,65 @@ class AIAgent: finally: self._codex_on_first_delta = None + # Bedrock Converse uses boto3's converse_stream() with real-time delta + # callbacks — same UX as Anthropic and chat_completions streaming. + if self.api_mode == "bedrock_converse": + result = {"response": None, "error": None} + first_delta_fired = {"done": False} + deltas_were_sent = {"yes": False} + + def _fire_first(): + if not first_delta_fired["done"] and on_first_delta: + first_delta_fired["done"] = True + try: + on_first_delta() + except Exception: + pass + + def _bedrock_call(): + try: + from agent.bedrock_adapter import ( + _get_bedrock_runtime_client, + stream_converse_with_callbacks, + ) + region = api_kwargs.pop("__bedrock_region__", "us-east-1") + api_kwargs.pop("__bedrock_converse__", None) + client = _get_bedrock_runtime_client(region) + raw_response = client.converse_stream(**api_kwargs) + + def _on_text(text): + _fire_first() + self._fire_stream_delta(text) + deltas_were_sent["yes"] = True + + def _on_tool(name): + _fire_first() + self._fire_tool_gen_started(name) + + def _on_reasoning(text): + _fire_first() + self._fire_reasoning_delta(text) + + result["response"] = stream_converse_with_callbacks( + raw_response, + on_text_delta=_on_text if self._has_stream_consumers() else None, + on_tool_start=_on_tool, + on_reasoning_delta=_on_reasoning if self.reasoning_callback or self.stream_delta_callback else None, + on_interrupt_check=lambda: self._interrupt_requested, + ) + except Exception as e: + result["error"] = e + + t = threading.Thread(target=_bedrock_call, daemon=True) + t.start() + while t.is_alive(): + t.join(timeout=0.3) + if self._interrupt_requested: + raise InterruptedError("Agent interrupted during Bedrock API call") + if result["error"] is not None: + raise result["error"] + return result["response"] + result = {"response": None, "error": None} request_client_holder = {"client": None} first_delta_fired = {"done": False} @@ -5765,6 +5884,8 @@ class AIAgent: # provider-specific exceptions like Copilot gpt-5-mini on # chat completions. fb_api_mode = "codex_responses" + elif fb_provider == "bedrock" or "bedrock-runtime" in fb_base_url.lower(): + fb_api_mode = "bedrock_converse" old_model = self.model self.model = fb_model @@ -6244,6 +6365,25 @@ class AIAgent: fast_mode=(self.request_overrides or {}).get("speed") == "fast", ) + # AWS Bedrock native Converse API — bypasses the OpenAI client entirely. + # The adapter handles message/tool conversion and boto3 calls directly. + if self.api_mode == "bedrock_converse": + from agent.bedrock_adapter import build_converse_kwargs + region = getattr(self, "_bedrock_region", None) or "us-east-1" + guardrail = getattr(self, "_bedrock_guardrail_config", None) + return { + "__bedrock_converse__": True, + "__bedrock_region__": region, + **build_converse_kwargs( + model=self.model, + messages=api_messages, + tools=self.tools, + max_tokens=self.max_tokens or 4096, + temperature=None, # Let the model use its default + guardrail_config=guardrail, + ), + } + if self.api_mode == "codex_responses": instructions = "" payload_messages = api_messages @@ -8821,7 +8961,7 @@ class AIAgent: # targeted error instead of wasting 3 API calls. _trunc_content = None _trunc_has_tool_calls = False - if self.api_mode == "chat_completions": + if self.api_mode in ("chat_completions", "bedrock_converse"): _trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None _trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None _trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False @@ -8890,7 +9030,7 @@ class AIAgent: "error": _exhaust_error, } - if self.api_mode == "chat_completions": + if self.api_mode in ("chat_completions", "bedrock_converse"): assistant_message = response.choices[0].message if not assistant_message.tool_calls: length_continue_retries += 1 @@ -8930,7 +9070,7 @@ class AIAgent: "error": "Response remained truncated after 3 continuation attempts", } - if self.api_mode == "chat_completions": + if self.api_mode in ("chat_completions", "bedrock_converse"): assistant_message = response.choices[0].message if assistant_message.tool_calls: if truncated_tool_call_retries < 1: diff --git a/tests/agent/test_bedrock_adapter.py b/tests/agent/test_bedrock_adapter.py new file mode 100644 index 000000000..d12be7b88 --- /dev/null +++ b/tests/agent/test_bedrock_adapter.py @@ -0,0 +1,1232 @@ +"""Tests for the AWS Bedrock Converse API adapter. + +Covers: + - AWS credential detection and region resolution + - Message format conversion (OpenAI → Converse and back) + - Tool definition conversion + - Response normalization (non-streaming and streaming) + - Model discovery with caching + - Edge cases: empty messages, consecutive roles, image content +""" + +import json +import os +import time +from types import SimpleNamespace +from unittest.mock import MagicMock, patch, PropertyMock + +import pytest + + +# --------------------------------------------------------------------------- +# AWS credential detection +# --------------------------------------------------------------------------- + +class TestResolveAwsAuthEnvVar: + """Test AWS credential environment variable detection. + + Mirrors OpenClaw's resolveAwsSdkEnvVarName() priority order. + """ + + def test_prefers_bearer_token_over_access_keys_and_profile(self): + from agent.bedrock_adapter import resolve_aws_auth_env_var + env = { + "AWS_BEARER_TOKEN_BEDROCK": "bearer-token", + "AWS_ACCESS_KEY_ID": "AKIA...", + "AWS_SECRET_ACCESS_KEY": "secret", + "AWS_PROFILE": "default", + } + assert resolve_aws_auth_env_var(env) == "AWS_BEARER_TOKEN_BEDROCK" + + def test_uses_access_keys_when_bearer_token_missing(self): + from agent.bedrock_adapter import resolve_aws_auth_env_var + env = { + "AWS_ACCESS_KEY_ID": "AKIA...", + "AWS_SECRET_ACCESS_KEY": "secret", + "AWS_PROFILE": "default", + } + assert resolve_aws_auth_env_var(env) == "AWS_ACCESS_KEY_ID" + + def test_requires_both_access_key_and_secret(self): + from agent.bedrock_adapter import resolve_aws_auth_env_var + # Only access key, no secret → should not match + env = {"AWS_ACCESS_KEY_ID": "AKIA..."} + assert resolve_aws_auth_env_var(env) != "AWS_ACCESS_KEY_ID" + + def test_uses_profile_when_no_keys(self): + from agent.bedrock_adapter import resolve_aws_auth_env_var + env = {"AWS_PROFILE": "production"} + assert resolve_aws_auth_env_var(env) == "AWS_PROFILE" + + def test_uses_container_credentials(self): + from agent.bedrock_adapter import resolve_aws_auth_env_var + env = {"AWS_CONTAINER_CREDENTIALS_RELATIVE_URI": "/v2/credentials/..."} + assert resolve_aws_auth_env_var(env) == "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI" + + def test_uses_web_identity(self): + from agent.bedrock_adapter import resolve_aws_auth_env_var + env = {"AWS_WEB_IDENTITY_TOKEN_FILE": "/var/run/secrets/token"} + assert resolve_aws_auth_env_var(env) == "AWS_WEB_IDENTITY_TOKEN_FILE" + + def test_returns_none_when_no_aws_auth(self): + from agent.bedrock_adapter import resolve_aws_auth_env_var + # Mock botocore to return no credentials (covers EC2 IMDS fallback) + mock_session = MagicMock() + mock_session.get_credentials.return_value = None + with patch.dict("sys.modules", {"botocore": MagicMock(), "botocore.session": MagicMock()}): + import botocore.session as _bs + _bs.get_session = MagicMock(return_value=mock_session) + assert resolve_aws_auth_env_var({}) is None + + def test_ignores_whitespace_only_values(self): + from agent.bedrock_adapter import resolve_aws_auth_env_var + env = {"AWS_PROFILE": " ", "AWS_ACCESS_KEY_ID": " "} + mock_session = MagicMock() + mock_session.get_credentials.return_value = None + with patch.dict("sys.modules", {"botocore": MagicMock(), "botocore.session": MagicMock()}): + import botocore.session as _bs + _bs.get_session = MagicMock(return_value=mock_session) + assert resolve_aws_auth_env_var(env) is None + + +class TestHasAwsCredentials: + def test_true_with_profile(self): + from agent.bedrock_adapter import has_aws_credentials + assert has_aws_credentials({"AWS_PROFILE": "default"}) is True + + def test_false_with_empty_env(self): + from agent.bedrock_adapter import has_aws_credentials + mock_session = MagicMock() + mock_session.get_credentials.return_value = None + with patch.dict("sys.modules", {"botocore": MagicMock(), "botocore.session": MagicMock()}): + import botocore.session as _bs + _bs.get_session = MagicMock(return_value=mock_session) + assert has_aws_credentials({}) is False + + +class TestResolveBedrocRegion: + def test_prefers_aws_region(self): + from agent.bedrock_adapter import resolve_bedrock_region + env = {"AWS_REGION": "eu-west-1", "AWS_DEFAULT_REGION": "us-west-2"} + assert resolve_bedrock_region(env) == "eu-west-1" + + def test_falls_back_to_default_region(self): + from agent.bedrock_adapter import resolve_bedrock_region + env = {"AWS_DEFAULT_REGION": "ap-northeast-1"} + assert resolve_bedrock_region(env) == "ap-northeast-1" + + def test_defaults_to_us_east_1(self): + from agent.bedrock_adapter import resolve_bedrock_region + assert resolve_bedrock_region({}) == "us-east-1" + + +# --------------------------------------------------------------------------- +# Tool conversion +# --------------------------------------------------------------------------- + +class TestConvertToolsToConverse: + """Test OpenAI → Bedrock Converse tool definition conversion.""" + + def test_converts_single_tool(self): + from agent.bedrock_adapter import convert_tools_to_converse + tools = [{ + "type": "function", + "function": { + "name": "read_file", + "description": "Read a file from disk", + "parameters": { + "type": "object", + "properties": { + "path": {"type": "string", "description": "File path"}, + }, + "required": ["path"], + }, + }, + }] + result = convert_tools_to_converse(tools) + assert len(result) == 1 + spec = result[0]["toolSpec"] + assert spec["name"] == "read_file" + assert spec["description"] == "Read a file from disk" + assert spec["inputSchema"]["json"]["type"] == "object" + assert "path" in spec["inputSchema"]["json"]["properties"] + + def test_converts_multiple_tools(self): + from agent.bedrock_adapter import convert_tools_to_converse + tools = [ + {"type": "function", "function": {"name": "tool_a", "description": "A", "parameters": {}}}, + {"type": "function", "function": {"name": "tool_b", "description": "B", "parameters": {}}}, + ] + result = convert_tools_to_converse(tools) + assert len(result) == 2 + assert result[0]["toolSpec"]["name"] == "tool_a" + assert result[1]["toolSpec"]["name"] == "tool_b" + + def test_empty_tools(self): + from agent.bedrock_adapter import convert_tools_to_converse + assert convert_tools_to_converse([]) == [] + assert convert_tools_to_converse(None) == [] + + def test_missing_parameters_gets_default(self): + from agent.bedrock_adapter import convert_tools_to_converse + tools = [{"type": "function", "function": {"name": "noop", "description": "No-op"}}] + result = convert_tools_to_converse(tools) + schema = result[0]["toolSpec"]["inputSchema"]["json"] + assert schema == {"type": "object", "properties": {}} + + +# --------------------------------------------------------------------------- +# Message conversion: OpenAI → Converse +# --------------------------------------------------------------------------- + +class TestConvertMessagesToConverse: + """Test OpenAI message format → Bedrock Converse format conversion.""" + + def test_extracts_system_prompt(self): + from agent.bedrock_adapter import convert_messages_to_converse + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Hello"}, + ] + system, msgs = convert_messages_to_converse(messages) + assert system is not None + assert len(system) == 1 + assert system[0]["text"] == "You are a helpful assistant." + assert len(msgs) == 1 + assert msgs[0]["role"] == "user" + + def test_user_message_text(self): + from agent.bedrock_adapter import convert_messages_to_converse + messages = [{"role": "user", "content": "What is 2+2?"}] + system, msgs = convert_messages_to_converse(messages) + assert system is None + assert len(msgs) == 1 + assert msgs[0]["content"][0]["text"] == "What is 2+2?" + + def test_assistant_with_tool_calls(self): + from agent.bedrock_adapter import convert_messages_to_converse + messages = [ + {"role": "user", "content": "Read the file"}, + { + "role": "assistant", + "content": "I'll read that file.", + "tool_calls": [{ + "id": "call_123", + "type": "function", + "function": { + "name": "read_file", + "arguments": '{"path": "/tmp/test.txt"}', + }, + }], + }, + ] + system, msgs = convert_messages_to_converse(messages) + # 3 messages: user, assistant, trailing user (Converse requires last=user) + assert len(msgs) == 3 + assistant_content = msgs[1]["content"] + # Should have text block + toolUse block + assert any("text" in b for b in assistant_content) + tool_use_blocks = [b for b in assistant_content if "toolUse" in b] + assert len(tool_use_blocks) == 1 + assert tool_use_blocks[0]["toolUse"]["name"] == "read_file" + assert tool_use_blocks[0]["toolUse"]["toolUseId"] == "call_123" + assert tool_use_blocks[0]["toolUse"]["input"] == {"path": "/tmp/test.txt"} + + def test_tool_result_becomes_user_message(self): + from agent.bedrock_adapter import convert_messages_to_converse + messages = [ + {"role": "user", "content": "Read it"}, + {"role": "assistant", "content": None, "tool_calls": [{ + "id": "call_1", "type": "function", + "function": {"name": "read_file", "arguments": "{}"}, + }]}, + {"role": "tool", "tool_call_id": "call_1", "content": "file contents here"}, + ] + system, msgs = convert_messages_to_converse(messages) + # Tool result should be in a user-role message + tool_result_msg = [m for m in msgs if m["role"] == "user" and any( + "toolResult" in b for b in m["content"] + )] + assert len(tool_result_msg) == 1 + tr = [b for b in tool_result_msg[0]["content"] if "toolResult" in b][0] + assert tr["toolResult"]["toolUseId"] == "call_1" + assert tr["toolResult"]["content"][0]["text"] == "file contents here" + + def test_merges_consecutive_user_messages(self): + from agent.bedrock_adapter import convert_messages_to_converse + messages = [ + {"role": "user", "content": "First"}, + {"role": "user", "content": "Second"}, + ] + system, msgs = convert_messages_to_converse(messages) + # Should be merged into one user message (Converse requires alternation) + assert len(msgs) == 1 + assert msgs[0]["role"] == "user" + texts = [b["text"] for b in msgs[0]["content"] if "text" in b] + assert "First" in texts + assert "Second" in texts + + def test_merges_consecutive_assistant_messages(self): + from agent.bedrock_adapter import convert_messages_to_converse + messages = [ + {"role": "user", "content": "Hi"}, + {"role": "assistant", "content": "Part 1"}, + {"role": "assistant", "content": "Part 2"}, + ] + system, msgs = convert_messages_to_converse(messages) + assistant_msgs = [m for m in msgs if m["role"] == "assistant"] + assert len(assistant_msgs) == 1 + + def test_first_message_must_be_user(self): + from agent.bedrock_adapter import convert_messages_to_converse + messages = [ + {"role": "assistant", "content": "I'm ready"}, + {"role": "user", "content": "Go"}, + ] + system, msgs = convert_messages_to_converse(messages) + assert msgs[0]["role"] == "user" + + def test_last_message_must_be_user(self): + from agent.bedrock_adapter import convert_messages_to_converse + messages = [ + {"role": "user", "content": "Hi"}, + {"role": "assistant", "content": "Hello"}, + ] + system, msgs = convert_messages_to_converse(messages) + assert msgs[-1]["role"] == "user" + + def test_empty_content_gets_placeholder(self): + from agent.bedrock_adapter import convert_messages_to_converse + messages = [{"role": "user", "content": ""}] + system, msgs = convert_messages_to_converse(messages) + # Empty string should get a space placeholder + assert msgs[0]["content"][0]["text"].strip() != "" or msgs[0]["content"][0]["text"] == " " + + def test_image_data_url_converted(self): + from agent.bedrock_adapter import convert_messages_to_converse + messages = [{ + "role": "user", + "content": [ + {"type": "text", "text": "What's in this image?"}, + {"type": "image_url", "image_url": { + "url": "data:image/png;base64,iVBORw0KGgo=", + }}, + ], + }] + system, msgs = convert_messages_to_converse(messages) + content = msgs[0]["content"] + assert any("text" in b for b in content) + image_blocks = [b for b in content if "image" in b] + assert len(image_blocks) == 1 + assert image_blocks[0]["image"]["format"] == "png" + + def test_multiple_system_messages_merged(self): + from agent.bedrock_adapter import convert_messages_to_converse + messages = [ + {"role": "system", "content": "Rule 1"}, + {"role": "system", "content": "Rule 2"}, + {"role": "user", "content": "Go"}, + ] + system, msgs = convert_messages_to_converse(messages) + assert system is not None + assert len(system) == 2 + assert system[0]["text"] == "Rule 1" + assert system[1]["text"] == "Rule 2" + + +# --------------------------------------------------------------------------- +# Response normalization: Converse → OpenAI +# --------------------------------------------------------------------------- + +class TestNormalizeConverseResponse: + """Test Bedrock Converse response → OpenAI format conversion.""" + + def test_text_response(self): + from agent.bedrock_adapter import normalize_converse_response + response = { + "output": { + "message": { + "role": "assistant", + "content": [{"text": "Hello, world!"}], + }, + }, + "stopReason": "end_turn", + "usage": {"inputTokens": 10, "outputTokens": 5}, + } + result = normalize_converse_response(response) + assert result.choices[0].message.content == "Hello, world!" + assert result.choices[0].message.tool_calls is None + assert result.choices[0].finish_reason == "stop" + assert result.usage.prompt_tokens == 10 + assert result.usage.completion_tokens == 5 + assert result.usage.total_tokens == 15 + + def test_tool_use_response(self): + from agent.bedrock_adapter import normalize_converse_response + response = { + "output": { + "message": { + "role": "assistant", + "content": [ + {"text": "I'll read that file."}, + { + "toolUse": { + "toolUseId": "call_abc", + "name": "read_file", + "input": {"path": "/tmp/test.txt"}, + }, + }, + ], + }, + }, + "stopReason": "tool_use", + "usage": {"inputTokens": 20, "outputTokens": 15}, + } + result = normalize_converse_response(response) + assert result.choices[0].message.content == "I'll read that file." + assert result.choices[0].finish_reason == "tool_calls" + tool_calls = result.choices[0].message.tool_calls + assert len(tool_calls) == 1 + assert tool_calls[0].id == "call_abc" + assert tool_calls[0].function.name == "read_file" + assert json.loads(tool_calls[0].function.arguments) == {"path": "/tmp/test.txt"} + + def test_multiple_tool_calls(self): + from agent.bedrock_adapter import normalize_converse_response + response = { + "output": { + "message": { + "role": "assistant", + "content": [ + {"toolUse": {"toolUseId": "c1", "name": "tool_a", "input": {}}}, + {"toolUse": {"toolUseId": "c2", "name": "tool_b", "input": {"x": 1}}}, + ], + }, + }, + "stopReason": "tool_use", + "usage": {"inputTokens": 0, "outputTokens": 0}, + } + result = normalize_converse_response(response) + assert len(result.choices[0].message.tool_calls) == 2 + assert result.choices[0].finish_reason == "tool_calls" + + def test_stop_reason_mapping(self): + from agent.bedrock_adapter import _converse_stop_reason_to_openai + assert _converse_stop_reason_to_openai("end_turn") == "stop" + assert _converse_stop_reason_to_openai("stop_sequence") == "stop" + assert _converse_stop_reason_to_openai("tool_use") == "tool_calls" + assert _converse_stop_reason_to_openai("max_tokens") == "length" + assert _converse_stop_reason_to_openai("content_filtered") == "content_filter" + assert _converse_stop_reason_to_openai("guardrail_intervened") == "content_filter" + assert _converse_stop_reason_to_openai("unknown_reason") == "stop" + + def test_empty_content(self): + from agent.bedrock_adapter import normalize_converse_response + response = { + "output": {"message": {"role": "assistant", "content": []}}, + "stopReason": "end_turn", + "usage": {"inputTokens": 0, "outputTokens": 0}, + } + result = normalize_converse_response(response) + assert result.choices[0].message.content is None + assert result.choices[0].message.tool_calls is None + + def test_tool_calls_override_stop_finish_reason(self): + """When tool_calls are present but stopReason is end_turn, finish_reason should be tool_calls.""" + from agent.bedrock_adapter import normalize_converse_response + response = { + "output": { + "message": { + "role": "assistant", + "content": [ + {"toolUse": {"toolUseId": "c1", "name": "t", "input": {}}}, + ], + }, + }, + "stopReason": "end_turn", # Bedrock sometimes sends this with tool_use + "usage": {"inputTokens": 0, "outputTokens": 0}, + } + result = normalize_converse_response(response) + assert result.choices[0].finish_reason == "tool_calls" + + +# --------------------------------------------------------------------------- +# Streaming response normalization +# --------------------------------------------------------------------------- + +class TestNormalizeConverseStreamEvents: + """Test Bedrock ConverseStream event → OpenAI format conversion.""" + + def test_text_stream(self): + from agent.bedrock_adapter import normalize_converse_stream_events + events = {"stream": [ + {"messageStart": {"role": "assistant"}}, + {"contentBlockStart": {"contentBlockIndex": 0, "start": {}}}, + {"contentBlockDelta": {"contentBlockIndex": 0, "delta": {"text": "Hello"}}}, + {"contentBlockDelta": {"contentBlockIndex": 0, "delta": {"text": ", world!"}}}, + {"contentBlockStop": {"contentBlockIndex": 0}}, + {"messageStop": {"stopReason": "end_turn"}}, + {"metadata": {"usage": {"inputTokens": 5, "outputTokens": 3}}}, + ]} + result = normalize_converse_stream_events(events) + assert result.choices[0].message.content == "Hello, world!" + assert result.choices[0].finish_reason == "stop" + assert result.usage.prompt_tokens == 5 + assert result.usage.completion_tokens == 3 + + def test_tool_use_stream(self): + from agent.bedrock_adapter import normalize_converse_stream_events + events = {"stream": [ + {"messageStart": {"role": "assistant"}}, + {"contentBlockStart": {"contentBlockIndex": 0, "start": { + "toolUse": {"toolUseId": "call_1", "name": "read_file"}, + }}}, + {"contentBlockDelta": {"contentBlockIndex": 0, "delta": { + "toolUse": {"input": '{"path":'}, + }}}, + {"contentBlockDelta": {"contentBlockIndex": 0, "delta": { + "toolUse": {"input": '"/tmp/f"}'}, + }}}, + {"contentBlockStop": {"contentBlockIndex": 0}}, + {"messageStop": {"stopReason": "tool_use"}}, + {"metadata": {"usage": {"inputTokens": 10, "outputTokens": 8}}}, + ]} + result = normalize_converse_stream_events(events) + assert result.choices[0].finish_reason == "tool_calls" + tc = result.choices[0].message.tool_calls + assert len(tc) == 1 + assert tc[0].id == "call_1" + assert tc[0].function.name == "read_file" + assert json.loads(tc[0].function.arguments) == {"path": "/tmp/f"} + + def test_mixed_text_and_tool_stream(self): + from agent.bedrock_adapter import normalize_converse_stream_events + events = {"stream": [ + {"messageStart": {"role": "assistant"}}, + # Text block + {"contentBlockStart": {"contentBlockIndex": 0, "start": {}}}, + {"contentBlockDelta": {"contentBlockIndex": 0, "delta": {"text": "Let me check."}}}, + {"contentBlockStop": {"contentBlockIndex": 0}}, + # Tool block + {"contentBlockStart": {"contentBlockIndex": 1, "start": { + "toolUse": {"toolUseId": "c1", "name": "search"}, + }}}, + {"contentBlockDelta": {"contentBlockIndex": 1, "delta": { + "toolUse": {"input": '{"q":"test"}'}, + }}}, + {"contentBlockStop": {"contentBlockIndex": 1}}, + {"messageStop": {"stopReason": "tool_use"}}, + {"metadata": {"usage": {"inputTokens": 0, "outputTokens": 0}}}, + ]} + result = normalize_converse_stream_events(events) + assert result.choices[0].message.content == "Let me check." + assert len(result.choices[0].message.tool_calls) == 1 + + def test_empty_stream(self): + from agent.bedrock_adapter import normalize_converse_stream_events + events = {"stream": [ + {"messageStart": {"role": "assistant"}}, + {"messageStop": {"stopReason": "end_turn"}}, + {"metadata": {"usage": {"inputTokens": 0, "outputTokens": 0}}}, + ]} + result = normalize_converse_stream_events(events) + assert result.choices[0].message.content is None + assert result.choices[0].message.tool_calls is None + + +# --------------------------------------------------------------------------- +# build_converse_kwargs +# --------------------------------------------------------------------------- + +class TestBuildConverseKwargs: + """Test the high-level kwargs builder for Converse API calls.""" + + def test_basic_kwargs(self): + from agent.bedrock_adapter import build_converse_kwargs + messages = [ + {"role": "system", "content": "Be helpful."}, + {"role": "user", "content": "Hi"}, + ] + kwargs = build_converse_kwargs( + model="anthropic.claude-sonnet-4-6-20250514-v1:0", + messages=messages, + max_tokens=1024, + ) + assert kwargs["modelId"] == "anthropic.claude-sonnet-4-6-20250514-v1:0" + assert kwargs["inferenceConfig"]["maxTokens"] == 1024 + assert kwargs["system"] is not None + assert len(kwargs["messages"]) >= 1 + + def test_includes_tools(self): + from agent.bedrock_adapter import build_converse_kwargs + tools = [{"type": "function", "function": { + "name": "test", "description": "Test", "parameters": {}, + }}] + kwargs = build_converse_kwargs( + model="test-model", messages=[{"role": "user", "content": "Hi"}], + tools=tools, + ) + assert "toolConfig" in kwargs + assert len(kwargs["toolConfig"]["tools"]) == 1 + + def test_includes_temperature_and_top_p(self): + from agent.bedrock_adapter import build_converse_kwargs + kwargs = build_converse_kwargs( + model="test-model", messages=[{"role": "user", "content": "Hi"}], + temperature=0.7, top_p=0.9, + ) + assert kwargs["inferenceConfig"]["temperature"] == 0.7 + assert kwargs["inferenceConfig"]["topP"] == 0.9 + + def test_includes_guardrail_config(self): + from agent.bedrock_adapter import build_converse_kwargs + guardrail = { + "guardrailIdentifier": "gr-123", + "guardrailVersion": "1", + } + kwargs = build_converse_kwargs( + model="test-model", messages=[{"role": "user", "content": "Hi"}], + guardrail_config=guardrail, + ) + assert kwargs["guardrailConfig"] == guardrail + + def test_no_system_when_absent(self): + from agent.bedrock_adapter import build_converse_kwargs + kwargs = build_converse_kwargs( + model="test-model", messages=[{"role": "user", "content": "Hi"}], + ) + assert "system" not in kwargs + + def test_no_tool_config_when_empty(self): + from agent.bedrock_adapter import build_converse_kwargs + kwargs = build_converse_kwargs( + model="test-model", messages=[{"role": "user", "content": "Hi"}], + tools=[], + ) + assert "toolConfig" not in kwargs + + +# --------------------------------------------------------------------------- +# Model discovery +# --------------------------------------------------------------------------- + +class TestDiscoverBedrockModels: + """Test Bedrock model discovery with mocked AWS API calls.""" + + def test_discovers_foundation_models(self): + from agent.bedrock_adapter import discover_bedrock_models, reset_discovery_cache + reset_discovery_cache() + + mock_client = MagicMock() + mock_client.list_foundation_models.return_value = { + "modelSummaries": [ + { + "modelId": "anthropic.claude-sonnet-4-6-20250514-v1:0", + "modelName": "Claude Sonnet 4.6", + "providerName": "Anthropic", + "inputModalities": ["TEXT", "IMAGE"], + "outputModalities": ["TEXT"], + "responseStreamingSupported": True, + "modelLifecycle": {"status": "ACTIVE"}, + }, + { + "modelId": "amazon.nova-pro-v1:0", + "modelName": "Nova Pro", + "providerName": "Amazon", + "inputModalities": ["TEXT"], + "outputModalities": ["TEXT"], + "responseStreamingSupported": True, + "modelLifecycle": {"status": "ACTIVE"}, + }, + ], + } + mock_client.list_inference_profiles.return_value = { + "inferenceProfileSummaries": [], + } + + with patch("agent.bedrock_adapter._get_bedrock_control_client", return_value=mock_client): + models = discover_bedrock_models("us-east-1") + + assert len(models) == 2 + ids = [m["id"] for m in models] + assert "anthropic.claude-sonnet-4-6-20250514-v1:0" in ids + assert "amazon.nova-pro-v1:0" in ids + + def test_filters_inactive_models(self): + from agent.bedrock_adapter import discover_bedrock_models, reset_discovery_cache + reset_discovery_cache() + + mock_client = MagicMock() + mock_client.list_foundation_models.return_value = { + "modelSummaries": [ + { + "modelId": "old-model", + "modelName": "Old", + "providerName": "Test", + "inputModalities": ["TEXT"], + "outputModalities": ["TEXT"], + "responseStreamingSupported": True, + "modelLifecycle": {"status": "LEGACY"}, + }, + ], + } + mock_client.list_inference_profiles.return_value = {"inferenceProfileSummaries": []} + + with patch("agent.bedrock_adapter._get_bedrock_control_client", return_value=mock_client): + models = discover_bedrock_models("us-east-1") + + assert len(models) == 0 + + def test_filters_non_streaming_models(self): + from agent.bedrock_adapter import discover_bedrock_models, reset_discovery_cache + reset_discovery_cache() + + mock_client = MagicMock() + mock_client.list_foundation_models.return_value = { + "modelSummaries": [ + { + "modelId": "embed-model", + "modelName": "Embeddings", + "providerName": "Test", + "inputModalities": ["TEXT"], + "outputModalities": ["EMBEDDING"], + "responseStreamingSupported": False, + "modelLifecycle": {"status": "ACTIVE"}, + }, + ], + } + mock_client.list_inference_profiles.return_value = {"inferenceProfileSummaries": []} + + with patch("agent.bedrock_adapter._get_bedrock_control_client", return_value=mock_client): + models = discover_bedrock_models("us-east-1") + + assert len(models) == 0 + + def test_provider_filter(self): + from agent.bedrock_adapter import discover_bedrock_models, reset_discovery_cache + reset_discovery_cache() + + mock_client = MagicMock() + mock_client.list_foundation_models.return_value = { + "modelSummaries": [ + { + "modelId": "anthropic.claude-v2", + "modelName": "Claude v2", + "providerName": "Anthropic", + "inputModalities": ["TEXT"], + "outputModalities": ["TEXT"], + "responseStreamingSupported": True, + "modelLifecycle": {"status": "ACTIVE"}, + }, + { + "modelId": "amazon.titan-text", + "modelName": "Titan", + "providerName": "Amazon", + "inputModalities": ["TEXT"], + "outputModalities": ["TEXT"], + "responseStreamingSupported": True, + "modelLifecycle": {"status": "ACTIVE"}, + }, + ], + } + mock_client.list_inference_profiles.return_value = {"inferenceProfileSummaries": []} + + with patch("agent.bedrock_adapter._get_bedrock_control_client", return_value=mock_client): + models = discover_bedrock_models("us-east-1", provider_filter=["anthropic"]) + + assert len(models) == 1 + assert models[0]["id"] == "anthropic.claude-v2" + + def test_caches_results(self): + from agent.bedrock_adapter import discover_bedrock_models, reset_discovery_cache + reset_discovery_cache() + + mock_client = MagicMock() + mock_client.list_foundation_models.return_value = { + "modelSummaries": [{ + "modelId": "test-model", + "modelName": "Test", + "providerName": "Test", + "inputModalities": ["TEXT"], + "outputModalities": ["TEXT"], + "responseStreamingSupported": True, + "modelLifecycle": {"status": "ACTIVE"}, + }], + } + mock_client.list_inference_profiles.return_value = {"inferenceProfileSummaries": []} + + with patch("agent.bedrock_adapter._get_bedrock_control_client", return_value=mock_client): + first = discover_bedrock_models("us-east-1") + second = discover_bedrock_models("us-east-1") + + # Should only call the API once (second call uses cache) + assert mock_client.list_foundation_models.call_count == 1 + assert first == second + + def test_discovers_inference_profiles(self): + from agent.bedrock_adapter import discover_bedrock_models, reset_discovery_cache + reset_discovery_cache() + + mock_client = MagicMock() + mock_client.list_foundation_models.return_value = {"modelSummaries": []} + mock_client.list_inference_profiles.return_value = { + "inferenceProfileSummaries": [ + { + "inferenceProfileId": "us.anthropic.claude-sonnet-4-6", + "inferenceProfileName": "US Claude Sonnet 4.6", + "status": "ACTIVE", + "models": [{"modelArn": "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-sonnet-4-6"}], + }, + ], + } + + with patch("agent.bedrock_adapter._get_bedrock_control_client", return_value=mock_client): + models = discover_bedrock_models("us-east-1") + + assert len(models) == 1 + assert models[0]["id"] == "us.anthropic.claude-sonnet-4-6" + + def test_global_profiles_sorted_first(self): + from agent.bedrock_adapter import discover_bedrock_models, reset_discovery_cache + reset_discovery_cache() + + mock_client = MagicMock() + mock_client.list_foundation_models.return_value = { + "modelSummaries": [{ + "modelId": "anthropic.claude-v2", + "modelName": "Claude v2", + "providerName": "Anthropic", + "inputModalities": ["TEXT"], + "outputModalities": ["TEXT"], + "responseStreamingSupported": True, + "modelLifecycle": {"status": "ACTIVE"}, + }], + } + mock_client.list_inference_profiles.return_value = { + "inferenceProfileSummaries": [{ + "inferenceProfileId": "global.anthropic.claude-v2", + "inferenceProfileName": "Global Claude v2", + "status": "ACTIVE", + "models": [], + }], + } + + with patch("agent.bedrock_adapter._get_bedrock_control_client", return_value=mock_client): + models = discover_bedrock_models("us-east-1") + + assert models[0]["id"] == "global.anthropic.claude-v2" + + def test_handles_api_error_gracefully(self): + from agent.bedrock_adapter import discover_bedrock_models, reset_discovery_cache + reset_discovery_cache() + + with patch("agent.bedrock_adapter._get_bedrock_control_client", side_effect=Exception("No creds")): + models = discover_bedrock_models("us-east-1") + + assert models == [] + + +class TestExtractProviderFromArn: + def test_extracts_anthropic(self): + from agent.bedrock_adapter import _extract_provider_from_arn + arn = "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-sonnet-4-6" + assert _extract_provider_from_arn(arn) == "anthropic" + + def test_extracts_amazon(self): + from agent.bedrock_adapter import _extract_provider_from_arn + arn = "arn:aws:bedrock:us-east-1::foundation-model/amazon.nova-pro-v1:0" + assert _extract_provider_from_arn(arn) == "amazon" + + def test_returns_empty_for_invalid_arn(self): + from agent.bedrock_adapter import _extract_provider_from_arn + assert _extract_provider_from_arn("not-an-arn") == "" + assert _extract_provider_from_arn("") == "" + + +# --------------------------------------------------------------------------- +# Client cache management +# --------------------------------------------------------------------------- + +class TestClientCache: + def test_reset_clears_caches(self): + from agent.bedrock_adapter import ( + _bedrock_runtime_client_cache, + _bedrock_control_client_cache, + reset_client_cache, + ) + _bedrock_runtime_client_cache["test"] = "dummy" + _bedrock_control_client_cache["test"] = "dummy" + reset_client_cache() + assert len(_bedrock_runtime_client_cache) == 0 + assert len(_bedrock_control_client_cache) == 0 + + +# --------------------------------------------------------------------------- +# Streaming with callbacks +# --------------------------------------------------------------------------- + +class TestStreamConverseWithCallbacks: + """Test real-time streaming with delta callbacks.""" + + def test_text_deltas_fire_callback(self): + from agent.bedrock_adapter import stream_converse_with_callbacks + deltas = [] + events = {"stream": [ + {"messageStart": {"role": "assistant"}}, + {"contentBlockStart": {"contentBlockIndex": 0, "start": {}}}, + {"contentBlockDelta": {"contentBlockIndex": 0, "delta": {"text": "Hello"}}}, + {"contentBlockDelta": {"contentBlockIndex": 0, "delta": {"text": " world"}}}, + {"contentBlockStop": {"contentBlockIndex": 0}}, + {"messageStop": {"stopReason": "end_turn"}}, + {"metadata": {"usage": {"inputTokens": 5, "outputTokens": 3}}}, + ]} + result = stream_converse_with_callbacks( + events, on_text_delta=lambda t: deltas.append(t), + ) + assert deltas == ["Hello", " world"] + assert result.choices[0].message.content == "Hello world" + + def test_text_deltas_suppressed_when_tool_use_present(self): + """Text deltas should NOT fire when tool_use blocks are present.""" + from agent.bedrock_adapter import stream_converse_with_callbacks + deltas = [] + events = {"stream": [ + {"messageStart": {"role": "assistant"}}, + {"contentBlockStart": {"contentBlockIndex": 0, "start": {}}}, + {"contentBlockDelta": {"contentBlockIndex": 0, "delta": {"text": "Let me check."}}}, + {"contentBlockStop": {"contentBlockIndex": 0}}, + {"contentBlockStart": {"contentBlockIndex": 1, "start": { + "toolUse": {"toolUseId": "c1", "name": "search"}, + }}}, + {"contentBlockDelta": {"contentBlockIndex": 1, "delta": { + "toolUse": {"input": '{"q":"test"}'}, + }}}, + {"contentBlockStop": {"contentBlockIndex": 1}}, + {"messageStop": {"stopReason": "tool_use"}}, + {"metadata": {"usage": {"inputTokens": 0, "outputTokens": 0}}}, + ]} + result = stream_converse_with_callbacks( + events, on_text_delta=lambda t: deltas.append(t), + ) + # Text delta for "Let me check." should fire (before tool_use was seen) + assert "Let me check." in deltas + # But the result should still have both text and tool calls + assert result.choices[0].message.content == "Let me check." + assert len(result.choices[0].message.tool_calls) == 1 + + def test_tool_start_callback_fires(self): + from agent.bedrock_adapter import stream_converse_with_callbacks + tools_started = [] + events = {"stream": [ + {"messageStart": {"role": "assistant"}}, + {"contentBlockStart": {"contentBlockIndex": 0, "start": { + "toolUse": {"toolUseId": "c1", "name": "read_file"}, + }}}, + {"contentBlockDelta": {"contentBlockIndex": 0, "delta": { + "toolUse": {"input": '{"path":"/tmp/f"}'}, + }}}, + {"contentBlockStop": {"contentBlockIndex": 0}}, + {"messageStop": {"stopReason": "tool_use"}}, + {"metadata": {"usage": {"inputTokens": 0, "outputTokens": 0}}}, + ]} + result = stream_converse_with_callbacks( + events, on_tool_start=lambda name: tools_started.append(name), + ) + assert tools_started == ["read_file"] + + def test_interrupt_stops_processing(self): + from agent.bedrock_adapter import stream_converse_with_callbacks + deltas = [] + call_count = {"n": 0} + events = {"stream": [ + {"messageStart": {"role": "assistant"}}, + {"contentBlockDelta": {"contentBlockIndex": 0, "delta": {"text": "A"}}}, + {"contentBlockDelta": {"contentBlockIndex": 0, "delta": {"text": "B"}}}, + {"contentBlockDelta": {"contentBlockIndex": 0, "delta": {"text": "C"}}}, + {"messageStop": {"stopReason": "end_turn"}}, + {"metadata": {"usage": {"inputTokens": 0, "outputTokens": 0}}}, + ]} + + def check_interrupt(): + call_count["n"] += 1 + return call_count["n"] >= 3 # Interrupt after 2 events + + result = stream_converse_with_callbacks( + events, + on_text_delta=lambda t: deltas.append(t), + on_interrupt_check=check_interrupt, + ) + # Should have processed fewer than all deltas + assert len(deltas) < 3 + + def test_reasoning_delta_callback(self): + from agent.bedrock_adapter import stream_converse_with_callbacks + reasoning = [] + events = {"stream": [ + {"messageStart": {"role": "assistant"}}, + {"contentBlockDelta": {"contentBlockIndex": 0, "delta": { + "reasoningContent": {"text": "Let me think..."}, + }}}, + {"contentBlockDelta": {"contentBlockIndex": 1, "delta": {"text": "Answer."}}}, + {"contentBlockStop": {"contentBlockIndex": 1}}, + {"messageStop": {"stopReason": "end_turn"}}, + {"metadata": {"usage": {"inputTokens": 0, "outputTokens": 0}}}, + ]} + result = stream_converse_with_callbacks( + events, on_reasoning_delta=lambda t: reasoning.append(t), + ) + assert reasoning == ["Let me think..."] + + +# --------------------------------------------------------------------------- +# Guardrail config in build_converse_kwargs +# --------------------------------------------------------------------------- + +class TestGuardrailConfig: + """Test that guardrail configuration is correctly passed through.""" + + def test_guardrail_included_in_kwargs(self): + from agent.bedrock_adapter import build_converse_kwargs + guardrail = { + "guardrailIdentifier": "gr-abc123", + "guardrailVersion": "1", + "streamProcessingMode": "async", + "trace": "enabled", + } + kwargs = build_converse_kwargs( + model="test-model", + messages=[{"role": "user", "content": "Hi"}], + guardrail_config=guardrail, + ) + assert kwargs["guardrailConfig"] == guardrail + + def test_no_guardrail_when_none(self): + from agent.bedrock_adapter import build_converse_kwargs + kwargs = build_converse_kwargs( + model="test-model", + messages=[{"role": "user", "content": "Hi"}], + guardrail_config=None, + ) + assert "guardrailConfig" not in kwargs + + def test_no_guardrail_when_empty_dict(self): + from agent.bedrock_adapter import build_converse_kwargs + kwargs = build_converse_kwargs( + model="test-model", + messages=[{"role": "user", "content": "Hi"}], + guardrail_config={}, + ) + # Empty dict is falsy, should not be included + assert "guardrailConfig" not in kwargs + + +# --------------------------------------------------------------------------- +# Error classification +# --------------------------------------------------------------------------- + +class TestBedrockErrorClassification: + """Test Bedrock-specific error classification.""" + + def test_context_overflow_validation_exception(self): + from agent.bedrock_adapter import classify_bedrock_error + assert classify_bedrock_error( + "ValidationException: input is too long for model" + ) == "context_overflow" + + def test_context_overflow_max_tokens(self): + from agent.bedrock_adapter import classify_bedrock_error + assert classify_bedrock_error( + "ValidationException: exceeds the maximum number of input tokens" + ) == "context_overflow" + + def test_context_overflow_stream_error(self): + from agent.bedrock_adapter import classify_bedrock_error + assert classify_bedrock_error( + "ModelStreamErrorException: Input is too long" + ) == "context_overflow" + + def test_rate_limit_throttling(self): + from agent.bedrock_adapter import classify_bedrock_error + assert classify_bedrock_error("ThrottlingException: Rate exceeded") == "rate_limit" + + def test_rate_limit_concurrent(self): + from agent.bedrock_adapter import classify_bedrock_error + assert classify_bedrock_error("Too many concurrent requests") == "rate_limit" + + def test_overloaded_not_ready(self): + from agent.bedrock_adapter import classify_bedrock_error + assert classify_bedrock_error("ModelNotReadyException") == "overloaded" + + def test_overloaded_timeout(self): + from agent.bedrock_adapter import classify_bedrock_error + assert classify_bedrock_error("ModelTimeoutException") == "overloaded" + + def test_unknown_error(self): + from agent.bedrock_adapter import classify_bedrock_error + assert classify_bedrock_error("SomeRandomError: something went wrong") == "unknown" + + +class TestBedrockContextLength: + """Test Bedrock model context length lookup.""" + + def test_claude_opus_4_6(self): + from agent.bedrock_adapter import get_bedrock_context_length + assert get_bedrock_context_length("anthropic.claude-opus-4-6-20250514-v1:0") == 200_000 + + def test_claude_sonnet_versioned(self): + from agent.bedrock_adapter import get_bedrock_context_length + assert get_bedrock_context_length("anthropic.claude-sonnet-4-6-20250514-v1:0") == 200_000 + + def test_nova_pro(self): + from agent.bedrock_adapter import get_bedrock_context_length + assert get_bedrock_context_length("amazon.nova-pro-v1:0") == 300_000 + + def test_nova_micro(self): + from agent.bedrock_adapter import get_bedrock_context_length + assert get_bedrock_context_length("amazon.nova-micro-v1:0") == 128_000 + + def test_unknown_model_gets_default(self): + from agent.bedrock_adapter import get_bedrock_context_length, BEDROCK_DEFAULT_CONTEXT_LENGTH + assert get_bedrock_context_length("unknown.model-v1:0") == BEDROCK_DEFAULT_CONTEXT_LENGTH + + def test_inference_profile_resolves(self): + from agent.bedrock_adapter import get_bedrock_context_length + # Cross-region inference profiles contain the base model ID + assert get_bedrock_context_length("us.anthropic.claude-sonnet-4-6") == 200_000 + + def test_longest_prefix_wins(self): + from agent.bedrock_adapter import get_bedrock_context_length + # "anthropic.claude-3-5-sonnet" should match before "anthropic.claude-3" + assert get_bedrock_context_length("anthropic.claude-3-5-sonnet-20240620-v1:0") == 200_000 + + +# --------------------------------------------------------------------------- +# Tool-calling capability detection +# --------------------------------------------------------------------------- + +class TestModelSupportsToolUse: + """Test non-tool-calling model detection.""" + + def test_claude_supports_tools(self): + from agent.bedrock_adapter import _model_supports_tool_use + assert _model_supports_tool_use("us.anthropic.claude-sonnet-4-6") is True + + def test_nova_supports_tools(self): + from agent.bedrock_adapter import _model_supports_tool_use + assert _model_supports_tool_use("us.amazon.nova-pro-v1:0") is True + + def test_deepseek_v3_supports_tools(self): + from agent.bedrock_adapter import _model_supports_tool_use + assert _model_supports_tool_use("deepseek.v3.2") is True + + def test_llama_supports_tools(self): + from agent.bedrock_adapter import _model_supports_tool_use + assert _model_supports_tool_use("us.meta.llama4-scout-17b-instruct-v1:0") is True + + def test_deepseek_r1_no_tools(self): + from agent.bedrock_adapter import _model_supports_tool_use + assert _model_supports_tool_use("us.deepseek.r1-v1:0") is False + + def test_deepseek_r1_alt_format_no_tools(self): + from agent.bedrock_adapter import _model_supports_tool_use + assert _model_supports_tool_use("deepseek-r1") is False + + def test_stability_no_tools(self): + from agent.bedrock_adapter import _model_supports_tool_use + assert _model_supports_tool_use("stability.stable-diffusion-xl") is False + + def test_embedding_no_tools(self): + from agent.bedrock_adapter import _model_supports_tool_use + assert _model_supports_tool_use("cohere.embed-v4") is False + + def test_unknown_model_defaults_to_true(self): + from agent.bedrock_adapter import _model_supports_tool_use + assert _model_supports_tool_use("some-future-model-v1") is True + + +class TestBuildConverseKwargsToolStripping: + """Test that tools are stripped for non-tool-calling models.""" + + def test_tools_included_for_claude(self): + from agent.bedrock_adapter import build_converse_kwargs + tools = [{"type": "function", "function": {"name": "test", "description": "t", "parameters": {}}}] + kwargs = build_converse_kwargs( + model="us.anthropic.claude-sonnet-4-6", + messages=[{"role": "user", "content": "Hi"}], + tools=tools, + ) + assert "toolConfig" in kwargs + + def test_tools_stripped_for_deepseek_r1(self): + from agent.bedrock_adapter import build_converse_kwargs + tools = [{"type": "function", "function": {"name": "test", "description": "t", "parameters": {}}}] + kwargs = build_converse_kwargs( + model="us.deepseek.r1-v1:0", + messages=[{"role": "user", "content": "Hi"}], + tools=tools, + ) + assert "toolConfig" not in kwargs + + +# --------------------------------------------------------------------------- +# Dual-path model routing +# --------------------------------------------------------------------------- + +class TestIsAnthropicBedrockModel: + """Test Claude model detection for dual-path routing.""" + + def test_us_claude_sonnet(self): + from agent.bedrock_adapter import is_anthropic_bedrock_model + assert is_anthropic_bedrock_model("us.anthropic.claude-sonnet-4-6") is True + + def test_global_claude_opus(self): + from agent.bedrock_adapter import is_anthropic_bedrock_model + assert is_anthropic_bedrock_model("global.anthropic.claude-opus-4-6-v1") is True + + def test_bare_claude(self): + from agent.bedrock_adapter import is_anthropic_bedrock_model + assert is_anthropic_bedrock_model("anthropic.claude-haiku-4-5-20251001-v1:0") is True + + def test_nova_is_not_anthropic(self): + from agent.bedrock_adapter import is_anthropic_bedrock_model + assert is_anthropic_bedrock_model("us.amazon.nova-pro-v1:0") is False + + def test_deepseek_is_not_anthropic(self): + from agent.bedrock_adapter import is_anthropic_bedrock_model + assert is_anthropic_bedrock_model("deepseek.v3.2") is False + + def test_llama_is_not_anthropic(self): + from agent.bedrock_adapter import is_anthropic_bedrock_model + assert is_anthropic_bedrock_model("us.meta.llama4-scout-17b-instruct-v1:0") is False + + def test_mistral_is_not_anthropic(self): + from agent.bedrock_adapter import is_anthropic_bedrock_model + assert is_anthropic_bedrock_model("mistral.mistral-large-3-675b-instruct") is False + + def test_eu_claude(self): + from agent.bedrock_adapter import is_anthropic_bedrock_model + assert is_anthropic_bedrock_model("eu.anthropic.claude-sonnet-4-6") is True + + +class TestEmptyTextBlockFix: + """Test that empty text blocks are replaced with space placeholders.""" + + def test_none_content_gets_space(self): + from agent.bedrock_adapter import _convert_content_to_converse + blocks = _convert_content_to_converse(None) + assert blocks[0]["text"] == " " + + def test_empty_string_gets_space(self): + from agent.bedrock_adapter import _convert_content_to_converse + blocks = _convert_content_to_converse("") + assert blocks[0]["text"] == " " + + def test_whitespace_only_gets_space(self): + from agent.bedrock_adapter import _convert_content_to_converse + blocks = _convert_content_to_converse(" ") + assert blocks[0]["text"] == " " + + def test_real_text_preserved(self): + from agent.bedrock_adapter import _convert_content_to_converse + blocks = _convert_content_to_converse("Hello") + assert blocks[0]["text"] == "Hello" diff --git a/tests/agent/test_bedrock_integration.py b/tests/agent/test_bedrock_integration.py new file mode 100644 index 000000000..ba77d9361 --- /dev/null +++ b/tests/agent/test_bedrock_integration.py @@ -0,0 +1,269 @@ +"""Integration tests for the AWS Bedrock provider wiring. + +Verifies that the Bedrock provider is correctly registered in the +provider registry, model catalog, and runtime resolution pipeline. +These tests do NOT require AWS credentials or boto3 — all AWS calls +are mocked. + +Note: Tests that import ``hermes_cli.auth`` or ``hermes_cli.runtime_provider`` +require Python 3.10+ due to ``str | None`` type syntax in the import chain. +""" + +import os +from unittest.mock import MagicMock, patch + +import pytest + + +class TestProviderRegistry: + """Verify Bedrock is registered in PROVIDER_REGISTRY.""" + + def test_bedrock_in_registry(self): + from hermes_cli.auth import PROVIDER_REGISTRY + assert "bedrock" in PROVIDER_REGISTRY + + def test_bedrock_auth_type_is_aws_sdk(self): + from hermes_cli.auth import PROVIDER_REGISTRY + pconfig = PROVIDER_REGISTRY["bedrock"] + assert pconfig.auth_type == "aws_sdk" + + def test_bedrock_has_no_api_key_env_vars(self): + """Bedrock uses the AWS SDK credential chain, not API keys.""" + from hermes_cli.auth import PROVIDER_REGISTRY + pconfig = PROVIDER_REGISTRY["bedrock"] + assert pconfig.api_key_env_vars == () + + def test_bedrock_base_url_env_var(self): + from hermes_cli.auth import PROVIDER_REGISTRY + pconfig = PROVIDER_REGISTRY["bedrock"] + assert pconfig.base_url_env_var == "BEDROCK_BASE_URL" + + +class TestProviderAliases: + """Verify Bedrock aliases resolve correctly.""" + + def test_aws_alias(self): + from hermes_cli.models import _PROVIDER_ALIASES + assert _PROVIDER_ALIASES.get("aws") == "bedrock" + + def test_aws_bedrock_alias(self): + from hermes_cli.models import _PROVIDER_ALIASES + assert _PROVIDER_ALIASES.get("aws-bedrock") == "bedrock" + + def test_amazon_bedrock_alias(self): + from hermes_cli.models import _PROVIDER_ALIASES + assert _PROVIDER_ALIASES.get("amazon-bedrock") == "bedrock" + + def test_amazon_alias(self): + from hermes_cli.models import _PROVIDER_ALIASES + assert _PROVIDER_ALIASES.get("amazon") == "bedrock" + + +class TestProviderLabels: + """Verify Bedrock appears in provider labels.""" + + def test_bedrock_label(self): + from hermes_cli.models import _PROVIDER_LABELS + assert _PROVIDER_LABELS.get("bedrock") == "AWS Bedrock" + + +class TestModelCatalog: + """Verify Bedrock has a static model fallback list.""" + + def test_bedrock_has_curated_models(self): + from hermes_cli.models import _PROVIDER_MODELS + models = _PROVIDER_MODELS.get("bedrock", []) + assert len(models) > 0 + + def test_bedrock_models_include_claude(self): + from hermes_cli.models import _PROVIDER_MODELS + models = _PROVIDER_MODELS.get("bedrock", []) + claude_models = [m for m in models if "anthropic.claude" in m] + assert len(claude_models) > 0 + + def test_bedrock_models_include_nova(self): + from hermes_cli.models import _PROVIDER_MODELS + models = _PROVIDER_MODELS.get("bedrock", []) + nova_models = [m for m in models if "amazon.nova" in m] + assert len(nova_models) > 0 + + +class TestResolveProvider: + """Verify resolve_provider() handles bedrock correctly.""" + + def test_explicit_bedrock_resolves(self, monkeypatch): + """When user explicitly requests 'bedrock', it should resolve.""" + from hermes_cli.auth import PROVIDER_REGISTRY + # bedrock is in the registry, so resolve_provider should return it + from hermes_cli.auth import resolve_provider + result = resolve_provider("bedrock") + assert result == "bedrock" + + def test_aws_alias_resolves_to_bedrock(self): + from hermes_cli.auth import resolve_provider + result = resolve_provider("aws") + assert result == "bedrock" + + def test_amazon_bedrock_alias_resolves(self): + from hermes_cli.auth import resolve_provider + result = resolve_provider("amazon-bedrock") + assert result == "bedrock" + + def test_auto_detect_with_aws_credentials(self, monkeypatch): + """When AWS credentials are present and no other provider is configured, + auto-detect should find bedrock.""" + from hermes_cli.auth import resolve_provider + + # Clear all other provider env vars + for var in ["OPENAI_API_KEY", "OPENROUTER_API_KEY", "ANTHROPIC_API_KEY", + "ANTHROPIC_TOKEN", "GOOGLE_API_KEY", "DEEPSEEK_API_KEY"]: + monkeypatch.delenv(var, raising=False) + + # Set AWS credentials + monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE") + monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY") + + # Mock the auth store to have no active provider + with patch("hermes_cli.auth._load_auth_store", return_value={}): + result = resolve_provider("auto") + assert result == "bedrock" + + +class TestRuntimeProvider: + """Verify resolve_runtime_provider() handles bedrock correctly.""" + + def test_bedrock_runtime_resolution(self, monkeypatch): + from hermes_cli.runtime_provider import resolve_runtime_provider + + monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE") + monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY") + monkeypatch.setenv("AWS_REGION", "eu-west-1") + + # Mock resolve_provider to return bedrock + with patch("hermes_cli.runtime_provider.resolve_provider", return_value="bedrock"), \ + patch("hermes_cli.runtime_provider._get_model_config", return_value={"provider": "bedrock"}): + result = resolve_runtime_provider(requested="bedrock") + + assert result["provider"] == "bedrock" + assert result["api_mode"] == "bedrock_converse" + assert result["region"] == "eu-west-1" + assert "bedrock-runtime.eu-west-1.amazonaws.com" in result["base_url"] + assert result["api_key"] == "aws-sdk" + + def test_bedrock_runtime_default_region(self, monkeypatch): + from hermes_cli.runtime_provider import resolve_runtime_provider + + monkeypatch.setenv("AWS_PROFILE", "default") + monkeypatch.delenv("AWS_REGION", raising=False) + monkeypatch.delenv("AWS_DEFAULT_REGION", raising=False) + + with patch("hermes_cli.runtime_provider.resolve_provider", return_value="bedrock"), \ + patch("hermes_cli.runtime_provider._get_model_config", return_value={"provider": "bedrock"}): + result = resolve_runtime_provider(requested="bedrock") + + assert result["region"] == "us-east-1" + + def test_bedrock_runtime_no_credentials_raises_on_auto_detect(self, monkeypatch): + """When bedrock is auto-detected (not explicitly requested) and no + credentials are found, runtime resolution should raise AuthError.""" + from hermes_cli.runtime_provider import resolve_runtime_provider + from hermes_cli.auth import AuthError + + # Clear all AWS env vars + for var in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_PROFILE", + "AWS_BEARER_TOKEN_BEDROCK", "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI", + "AWS_WEB_IDENTITY_TOKEN_FILE"]: + monkeypatch.delenv(var, raising=False) + + # Mock both the provider resolution and boto3's credential chain + mock_session = MagicMock() + mock_session.get_credentials.return_value = None + with patch("hermes_cli.runtime_provider.resolve_provider", return_value="bedrock"), \ + patch("hermes_cli.runtime_provider._get_model_config", return_value={"provider": "bedrock"}), \ + patch("hermes_cli.runtime_provider.resolve_requested_provider", return_value="auto"), \ + patch.dict("sys.modules", {"botocore": MagicMock(), "botocore.session": MagicMock()}): + import botocore.session as _bs + _bs.get_session = MagicMock(return_value=mock_session) + with pytest.raises(AuthError, match="No AWS credentials"): + resolve_runtime_provider(requested="auto") + + def test_bedrock_runtime_explicit_skips_credential_check(self, monkeypatch): + """When user explicitly requests bedrock, trust boto3's credential chain + even if env-var detection finds nothing (covers IMDS, SSO, etc.).""" + from hermes_cli.runtime_provider import resolve_runtime_provider + + # No AWS env vars set — but explicit bedrock request should not raise + for var in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_PROFILE", + "AWS_BEARER_TOKEN_BEDROCK"]: + monkeypatch.delenv(var, raising=False) + + with patch("hermes_cli.runtime_provider.resolve_provider", return_value="bedrock"), \ + patch("hermes_cli.runtime_provider._get_model_config", return_value={"provider": "bedrock"}): + result = resolve_runtime_provider(requested="bedrock") + assert result["provider"] == "bedrock" + assert result["api_mode"] == "bedrock_converse" + + +# --------------------------------------------------------------------------- +# providers.py integration +# --------------------------------------------------------------------------- + +class TestProvidersModule: + """Verify bedrock is wired into hermes_cli/providers.py.""" + + def test_bedrock_alias_in_providers(self): + from hermes_cli.providers import ALIASES + assert ALIASES.get("bedrock") is None # "bedrock" IS the canonical name, not an alias + assert ALIASES.get("aws") == "bedrock" + assert ALIASES.get("aws-bedrock") == "bedrock" + + def test_bedrock_transport_mapping(self): + from hermes_cli.providers import TRANSPORT_TO_API_MODE + assert TRANSPORT_TO_API_MODE.get("bedrock_converse") == "bedrock_converse" + + def test_determine_api_mode_from_bedrock_url(self): + from hermes_cli.providers import determine_api_mode + assert determine_api_mode( + "unknown", "https://bedrock-runtime.us-east-1.amazonaws.com" + ) == "bedrock_converse" + + def test_label_override(self): + from hermes_cli.providers import _LABEL_OVERRIDES + assert _LABEL_OVERRIDES.get("bedrock") == "AWS Bedrock" + + +# --------------------------------------------------------------------------- +# Error classifier integration +# --------------------------------------------------------------------------- + +class TestErrorClassifierBedrock: + """Verify Bedrock error patterns are in the global error classifier.""" + + def test_throttling_in_rate_limit_patterns(self): + from agent.error_classifier import _RATE_LIMIT_PATTERNS + assert "throttlingexception" in _RATE_LIMIT_PATTERNS + + def test_context_overflow_patterns(self): + from agent.error_classifier import _CONTEXT_OVERFLOW_PATTERNS + assert "input is too long" in _CONTEXT_OVERFLOW_PATTERNS + + +# --------------------------------------------------------------------------- +# pyproject.toml bedrock extra +# --------------------------------------------------------------------------- + +class TestPackaging: + """Verify bedrock optional dependency is declared.""" + + def test_bedrock_extra_exists(self): + import configparser + from pathlib import Path + # Read pyproject.toml to verify [bedrock] extra + toml_path = Path(__file__).parent.parent.parent / "pyproject.toml" + content = toml_path.read_text() + assert 'bedrock = ["boto3' in content + + def test_bedrock_in_all_extra(self): + from pathlib import Path + content = (Path(__file__).parent.parent.parent / "pyproject.toml").read_text() + assert '"hermes-agent[bedrock]"' in content diff --git a/website/docs/guides/aws-bedrock.md b/website/docs/guides/aws-bedrock.md new file mode 100644 index 000000000..cf5aec4e3 --- /dev/null +++ b/website/docs/guides/aws-bedrock.md @@ -0,0 +1,164 @@ +--- +sidebar_position: 14 +title: "AWS Bedrock" +description: "Use Hermes Agent with Amazon Bedrock — native Converse API, IAM authentication, Guardrails, and cross-region inference" +--- + +# AWS Bedrock + +Hermes Agent supports Amazon Bedrock as a native provider using the **Converse API** — not the OpenAI-compatible endpoint. This gives you full access to the Bedrock ecosystem: IAM authentication, Guardrails, cross-region inference profiles, and all foundation models. + +## Prerequisites + +- **AWS credentials** — any source supported by the [boto3 credential chain](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html): + - IAM instance role (EC2, ECS, Lambda — zero config) + - `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` environment variables + - `AWS_PROFILE` for SSO or named profiles + - `aws configure` for local development +- **boto3** — install with `pip install hermes-agent[bedrock]` +- **IAM permissions** — at minimum: + - `bedrock:InvokeModel` and `bedrock:InvokeModelWithResponseStream` (for inference) + - `bedrock:ListFoundationModels` and `bedrock:ListInferenceProfiles` (for model discovery) + +:::tip EC2 / ECS / Lambda +On AWS compute, attach an IAM role with `AmazonBedrockFullAccess` and you're done. No API keys, no `.env` configuration — Hermes detects the instance role automatically. +::: + +## Quick Start + +```bash +# Install with Bedrock support +pip install hermes-agent[bedrock] + +# Select Bedrock as your provider +hermes model +# → Choose "More providers..." → "AWS Bedrock" +# → Select your region and model + +# Start chatting +hermes chat +``` + +## Configuration + +After running `hermes model`, your `~/.hermes/config.yaml` will contain: + +```yaml +model: + default: us.anthropic.claude-sonnet-4-6 + provider: bedrock + base_url: https://bedrock-runtime.us-east-2.amazonaws.com + +bedrock: + region: us-east-2 +``` + +### Region + +Set the AWS region in any of these ways (highest priority first): + +1. `bedrock.region` in `config.yaml` +2. `AWS_REGION` environment variable +3. `AWS_DEFAULT_REGION` environment variable +4. Default: `us-east-1` + +### Guardrails + +To apply [Amazon Bedrock Guardrails](https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails.html) to all model invocations: + +```yaml +bedrock: + region: us-east-2 + guardrail: + guardrail_identifier: "abc123def456" # From the Bedrock console + guardrail_version: "1" # Version number or "DRAFT" + stream_processing_mode: "async" # "sync" or "async" + trace: "disabled" # "enabled", "disabled", or "enabled_full" +``` + +### Model Discovery + +Hermes auto-discovers available models via the Bedrock control plane. You can customize discovery: + +```yaml +bedrock: + discovery: + enabled: true + provider_filter: ["anthropic", "amazon"] # Only show these providers + refresh_interval: 3600 # Cache for 1 hour +``` + +## Available Models + +Bedrock models use **inference profile IDs** for on-demand invocation. The `hermes model` picker shows these automatically, with recommended models at the top: + +| Model | ID | Notes | +|-------|-----|-------| +| Claude Sonnet 4.6 | `us.anthropic.claude-sonnet-4-6` | Recommended — best balance of speed and capability | +| Claude Opus 4.6 | `us.anthropic.claude-opus-4-6-v1` | Most capable | +| Claude Haiku 4.5 | `us.anthropic.claude-haiku-4-5-20251001-v1:0` | Fastest Claude | +| Amazon Nova Pro | `us.amazon.nova-pro-v1:0` | Amazon's flagship | +| Amazon Nova Micro | `us.amazon.nova-micro-v1:0` | Fastest, cheapest | +| DeepSeek V3.2 | `deepseek.v3.2` | Strong open model | +| Llama 4 Scout 17B | `us.meta.llama4-scout-17b-instruct-v1:0` | Meta's latest | + +:::info Cross-Region Inference +Models prefixed with `us.` use cross-region inference profiles, which provide better capacity and automatic failover across AWS regions. Models prefixed with `global.` route across all available regions worldwide. +::: + +## Switching Models Mid-Session + +Use the `/model` command during a conversation: + +``` +/model us.amazon.nova-pro-v1:0 +/model deepseek.v3.2 +/model us.anthropic.claude-opus-4-6-v1 +``` + +## Diagnostics + +```bash +hermes doctor +``` + +The doctor checks: +- Whether AWS credentials are available (env vars, IAM role, SSO) +- Whether `boto3` is installed +- Whether the Bedrock API is reachable (ListFoundationModels) +- Number of available models in your region + +## Gateway (Messaging Platforms) + +Bedrock works with all Hermes gateway platforms (Telegram, Discord, Slack, Feishu, etc.). Configure Bedrock as your provider, then start the gateway normally: + +```bash +hermes gateway setup +hermes gateway start +``` + +The gateway reads `config.yaml` and uses the same Bedrock provider configuration. + +## Troubleshooting + +### "No API key found" / "No AWS credentials" + +Hermes checks for credentials in this order: +1. `AWS_BEARER_TOKEN_BEDROCK` +2. `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` +3. `AWS_PROFILE` +4. EC2 instance metadata (IMDS) +5. ECS container credentials +6. Lambda execution role + +If none are found, run `aws configure` or attach an IAM role to your compute instance. + +### "Invocation of model ID ... with on-demand throughput isn't supported" + +Use an **inference profile ID** (prefixed with `us.` or `global.`) instead of the bare foundation model ID. For example: +- ❌ `anthropic.claude-sonnet-4-6` +- ✅ `us.anthropic.claude-sonnet-4-6` + +### "ThrottlingException" + +You've hit the Bedrock per-model rate limit. Hermes automatically retries with backoff. To increase limits, request a quota increase in the [AWS Service Quotas console](https://console.aws.amazon.com/servicequotas/).