mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-17 09:41:58 +00:00
Add an official, production-grade WhatsApp integration via Meta's Business Cloud API as a complement to the existing Baileys bridge. No bridge subprocess, no QR codes, no account-ban risk — at the cost of a Meta Business account and a public HTTPS webhook URL. Setup is fully wizard-driven: 'hermes whatsapp-cloud' walks through every credential with paste-time validation (catches the #1 trap of pasting a phone number into the Phone Number ID field), generates a verify token, and ends with copy-paste instructions for the cloudflared / Meta-dashboard / Business Manager pieces that can't be automated. The wizard also points users at Meta's Business Manager for setting the bot's display name and profile picture. Feature set: - Inbound: text, images (with native-vision routing), voice notes (STT), documents (small text inlined, larger cached), reply context. - Outbound: text with WhatsApp-flavored markdown conversion, images, videos, documents, opus voice notes via ffmpeg with MP3 fallback. - Native interactive buttons for clarify, dangerous-command approval, and slash-command confirmation flows — matches the Telegram / Discord UX, graceful degrades to plain text. - Read receipts (blue double-checkmarks) and typing indicator, using Meta's combined endpoint so they fire in a single API call. - Webhook security: X-Hub-Signature-256 HMAC verification (raw body, constant-time), wamid deduplication, group-shaped-message refusal (groups deferred to v2 — Baileys still covers them). - Full integration with the gateway's session, cron, display-tier, prompt-hint, and auth-allowlist systems. Cloud and Baileys can run side-by-side against different phone numbers. Also wires STT (speech-to-text) through Nous's managed audio gateway for Nous subscribers — previously the default stt.provider=local required a separate faster-whisper install. New subscribers now get voice-note transcription out of the box. Docs: 418-line user guide at website/docs/user-guide/messaging/ whatsapp-cloud.md, sidebar entry, environment-variables reference, ADDING_A_PLATFORM.md updated with the optional interactive-UX contract for future adapter authors. Tests: 100 dedicated tests for the adapter, 32 for the setup wizard, 20 for the Nous subscription STT wiring, plus regression coverage across display_config, prompt_builder, and the cron scheduler. Known limitations (deferred until clear demand signal): - Group chats — use the Baileys bridge if you need them. - Message templates for 24-hour-window outside-conversation sends — reactive chat is unaffected; cron / delegate_task with gaps > 24h will fail with a clear error. The agent's system prompt warns the model about this so it knows to mention it when scheduling delayed messages.
925 lines
34 KiB
Python
925 lines
34 KiB
Python
"""Helpers for Nous subscription managed-tool capabilities."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Dict, Iterable, Optional, Set
|
|
|
|
from hermes_cli.auth import get_nous_auth_status
|
|
from hermes_cli.config import get_env_value, load_config
|
|
from tools.managed_tool_gateway import is_managed_tool_gateway_ready
|
|
from utils import is_truthy_value
|
|
from tools.tool_backend_helpers import (
|
|
fal_key_is_configured,
|
|
has_direct_modal_credentials,
|
|
managed_nous_tools_enabled,
|
|
normalize_browser_cloud_provider,
|
|
normalize_modal_mode,
|
|
resolve_modal_backend_state,
|
|
resolve_openai_audio_api_key,
|
|
)
|
|
|
|
|
|
_DEFAULT_PLATFORM_TOOLSETS = {
|
|
"cli": "hermes-cli",
|
|
}
|
|
|
|
|
|
def _uses_gateway(section: object) -> bool:
|
|
"""Return True when a config section explicitly opts into the gateway."""
|
|
if not isinstance(section, dict):
|
|
return False
|
|
return is_truthy_value(section.get("use_gateway"), default=False)
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class NousFeatureState:
|
|
key: str
|
|
label: str
|
|
included_by_default: bool
|
|
available: bool
|
|
active: bool
|
|
managed_by_nous: bool
|
|
direct_override: bool
|
|
toolset_enabled: bool
|
|
current_provider: str = ""
|
|
explicit_configured: bool = False
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class NousSubscriptionFeatures:
|
|
subscribed: bool
|
|
nous_auth_present: bool
|
|
provider_is_nous: bool
|
|
features: Dict[str, NousFeatureState]
|
|
|
|
@property
|
|
def web(self) -> NousFeatureState:
|
|
return self.features["web"]
|
|
|
|
@property
|
|
def image_gen(self) -> NousFeatureState:
|
|
return self.features["image_gen"]
|
|
|
|
@property
|
|
def tts(self) -> NousFeatureState:
|
|
return self.features["tts"]
|
|
|
|
@property
|
|
def stt(self) -> NousFeatureState:
|
|
return self.features["stt"]
|
|
|
|
@property
|
|
def browser(self) -> NousFeatureState:
|
|
return self.features["browser"]
|
|
|
|
@property
|
|
def modal(self) -> NousFeatureState:
|
|
return self.features["modal"]
|
|
|
|
def items(self) -> Iterable[NousFeatureState]:
|
|
ordered = ("web", "image_gen", "tts", "stt", "browser", "modal")
|
|
for key in ordered:
|
|
yield self.features[key]
|
|
|
|
|
|
def _model_config_dict(config: Dict[str, object]) -> Dict[str, object]:
|
|
model_cfg = config.get("model")
|
|
if isinstance(model_cfg, dict):
|
|
return dict(model_cfg)
|
|
if isinstance(model_cfg, str) and model_cfg.strip():
|
|
return {"default": model_cfg.strip()}
|
|
return {}
|
|
|
|
|
|
def _toolset_enabled(config: Dict[str, object], toolset_key: str) -> bool:
|
|
from toolsets import resolve_toolset
|
|
|
|
platform_toolsets = config.get("platform_toolsets")
|
|
if not isinstance(platform_toolsets, dict) or not platform_toolsets:
|
|
platform_toolsets = {"cli": [_DEFAULT_PLATFORM_TOOLSETS["cli"]]}
|
|
|
|
target_tools = set(resolve_toolset(toolset_key))
|
|
if not target_tools:
|
|
return False
|
|
|
|
for platform, raw_toolsets in platform_toolsets.items():
|
|
if isinstance(raw_toolsets, list):
|
|
toolset_names = list(raw_toolsets)
|
|
else:
|
|
default_toolset = _DEFAULT_PLATFORM_TOOLSETS.get(platform)
|
|
toolset_names = [default_toolset] if default_toolset else []
|
|
if not toolset_names:
|
|
default_toolset = _DEFAULT_PLATFORM_TOOLSETS.get(platform)
|
|
if default_toolset:
|
|
toolset_names = [default_toolset]
|
|
|
|
available_tools: Set[str] = set()
|
|
for toolset_name in toolset_names:
|
|
if not isinstance(toolset_name, str) or not toolset_name:
|
|
continue
|
|
try:
|
|
available_tools.update(resolve_toolset(toolset_name))
|
|
except Exception:
|
|
continue
|
|
|
|
if target_tools and target_tools.issubset(available_tools):
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def _has_agent_browser() -> bool:
|
|
import shutil
|
|
|
|
agent_browser_bin = shutil.which("agent-browser")
|
|
local_bin = (
|
|
Path(__file__).parent.parent / "node_modules" / ".bin" / "agent-browser"
|
|
)
|
|
return bool(agent_browser_bin or local_bin.exists())
|
|
|
|
|
|
def _browser_label(current_provider: str) -> str:
|
|
mapping = {
|
|
"browserbase": "Browserbase",
|
|
"browser-use": "Browser Use",
|
|
"firecrawl": "Firecrawl",
|
|
"camofox": "Camofox",
|
|
"local": "Local browser",
|
|
}
|
|
return mapping.get(current_provider or "local", current_provider or "Local browser")
|
|
|
|
|
|
def _tts_label(current_provider: str) -> str:
|
|
mapping = {
|
|
"openai": "OpenAI TTS",
|
|
"elevenlabs": "ElevenLabs",
|
|
"edge": "Edge TTS",
|
|
"xai": "xAI TTS",
|
|
"mistral": "Mistral Voxtral TTS",
|
|
"neutts": "NeuTTS",
|
|
}
|
|
return mapping.get(current_provider or "edge", current_provider or "Edge TTS")
|
|
|
|
|
|
def _stt_label(current_provider: str) -> str:
|
|
mapping = {
|
|
"openai": "OpenAI Whisper",
|
|
"groq": "Groq Whisper",
|
|
"mistral": "Mistral Voxtral Transcribe",
|
|
"local": "Local faster-whisper",
|
|
}
|
|
return mapping.get(current_provider or "local", current_provider or "Local faster-whisper")
|
|
|
|
|
|
def _resolve_browser_feature_state(
|
|
*,
|
|
browser_tool_enabled: bool,
|
|
browser_provider: str,
|
|
browser_provider_explicit: bool,
|
|
browser_local_available: bool,
|
|
direct_camofox: bool,
|
|
direct_browserbase: bool,
|
|
direct_browser_use: bool,
|
|
direct_firecrawl: bool,
|
|
managed_browser_available: bool,
|
|
) -> tuple[str, bool, bool, bool]:
|
|
"""Resolve browser availability using the same precedence as runtime."""
|
|
if direct_camofox:
|
|
return "camofox", True, bool(browser_tool_enabled), False
|
|
|
|
if browser_provider_explicit:
|
|
current_provider = browser_provider or "local"
|
|
if current_provider == "browserbase":
|
|
available = bool(browser_local_available and direct_browserbase)
|
|
active = bool(browser_tool_enabled and available)
|
|
return current_provider, available, active, False
|
|
if current_provider == "browser-use":
|
|
provider_available = managed_browser_available or direct_browser_use
|
|
available = bool(browser_local_available and provider_available)
|
|
managed = bool(
|
|
browser_tool_enabled
|
|
and browser_local_available
|
|
and managed_browser_available
|
|
and not direct_browser_use
|
|
)
|
|
active = bool(browser_tool_enabled and available)
|
|
return current_provider, available, active, managed
|
|
if current_provider == "firecrawl":
|
|
available = bool(browser_local_available and direct_firecrawl)
|
|
active = bool(browser_tool_enabled and available)
|
|
return current_provider, available, active, False
|
|
if current_provider == "camofox":
|
|
return current_provider, False, False, False
|
|
|
|
current_provider = "local"
|
|
available = bool(browser_local_available)
|
|
active = bool(browser_tool_enabled and available)
|
|
return current_provider, available, active, False
|
|
|
|
if managed_browser_available or direct_browser_use:
|
|
available = bool(browser_local_available)
|
|
managed = bool(
|
|
browser_tool_enabled
|
|
and browser_local_available
|
|
and managed_browser_available
|
|
and not direct_browser_use
|
|
)
|
|
active = bool(browser_tool_enabled and available)
|
|
return "browser-use", available, active, managed
|
|
|
|
if direct_browserbase:
|
|
available = bool(browser_local_available)
|
|
active = bool(browser_tool_enabled and available)
|
|
return "browserbase", available, active, False
|
|
|
|
available = bool(browser_local_available)
|
|
active = bool(browser_tool_enabled and available)
|
|
return "local", available, active, False
|
|
|
|
|
|
def get_nous_subscription_features(
|
|
config: Optional[Dict[str, object]] = None,
|
|
) -> NousSubscriptionFeatures:
|
|
if config is None:
|
|
config = load_config() or {}
|
|
config = dict(config)
|
|
model_cfg = _model_config_dict(config)
|
|
provider_is_nous = str(model_cfg.get("provider") or "").strip().lower() == "nous"
|
|
|
|
try:
|
|
nous_status = get_nous_auth_status()
|
|
except Exception:
|
|
nous_status = {}
|
|
|
|
managed_tools_flag = managed_nous_tools_enabled()
|
|
nous_auth_present = bool(nous_status.get("logged_in"))
|
|
subscribed = provider_is_nous or nous_auth_present
|
|
|
|
web_tool_enabled = _toolset_enabled(config, "web")
|
|
image_tool_enabled = _toolset_enabled(config, "image_gen")
|
|
tts_tool_enabled = _toolset_enabled(config, "tts")
|
|
browser_tool_enabled = _toolset_enabled(config, "browser")
|
|
modal_tool_enabled = _toolset_enabled(config, "terminal")
|
|
|
|
web_cfg = config.get("web") if isinstance(config.get("web"), dict) else {}
|
|
tts_cfg = config.get("tts") if isinstance(config.get("tts"), dict) else {}
|
|
stt_cfg = config.get("stt") if isinstance(config.get("stt"), dict) else {}
|
|
browser_cfg = config.get("browser") if isinstance(config.get("browser"), dict) else {}
|
|
terminal_cfg = config.get("terminal") if isinstance(config.get("terminal"), dict) else {}
|
|
|
|
web_backend = str(web_cfg.get("backend") or "").strip().lower()
|
|
# Per-capability overrides: if set, they determine which backend is active for
|
|
# search/extract independently of web.backend.
|
|
web_search_backend = str(web_cfg.get("search_backend") or "").strip().lower()
|
|
web_extract_backend = str(web_cfg.get("extract_backend") or "").strip().lower()
|
|
tts_provider = str(tts_cfg.get("provider") or "edge").strip().lower()
|
|
# STT default is "local" (faster-whisper) per DEFAULT_CONFIG, which
|
|
# requires `pip install faster-whisper`. For Nous subscribers we'd
|
|
# rather route through the managed OpenAI audio gateway — see
|
|
# apply_nous_managed_defaults below.
|
|
stt_provider = str(stt_cfg.get("provider") or "local").strip().lower()
|
|
browser_provider_explicit = "cloud_provider" in browser_cfg
|
|
browser_provider = normalize_browser_cloud_provider(
|
|
browser_cfg.get("cloud_provider") if browser_provider_explicit else None
|
|
)
|
|
terminal_backend = (
|
|
str(terminal_cfg.get("backend") or "local").strip().lower()
|
|
)
|
|
modal_mode = normalize_modal_mode(
|
|
terminal_cfg.get("modal_mode")
|
|
)
|
|
|
|
# use_gateway flags — when True, the user explicitly opted into the
|
|
# Tool Gateway via `hermes model`, so direct credentials should NOT
|
|
# prevent gateway routing.
|
|
web_use_gateway = _uses_gateway(web_cfg)
|
|
tts_use_gateway = _uses_gateway(tts_cfg)
|
|
stt_use_gateway = _uses_gateway(stt_cfg)
|
|
browser_use_gateway = _uses_gateway(browser_cfg)
|
|
image_gen_cfg = config.get("image_gen") if isinstance(config.get("image_gen"), dict) else {}
|
|
image_use_gateway = _uses_gateway(image_gen_cfg)
|
|
|
|
direct_exa = bool(get_env_value("EXA_API_KEY"))
|
|
direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"))
|
|
direct_parallel = bool(get_env_value("PARALLEL_API_KEY"))
|
|
direct_tavily = bool(get_env_value("TAVILY_API_KEY"))
|
|
direct_searxng = bool(get_env_value("SEARXNG_URL"))
|
|
direct_fal = fal_key_is_configured()
|
|
direct_openai_tts = bool(resolve_openai_audio_api_key())
|
|
direct_elevenlabs = bool(get_env_value("ELEVENLABS_API_KEY"))
|
|
direct_camofox = bool(get_env_value("CAMOFOX_URL"))
|
|
direct_browserbase = bool(get_env_value("BROWSERBASE_API_KEY") and get_env_value("BROWSERBASE_PROJECT_ID"))
|
|
direct_browser_use = bool(get_env_value("BROWSER_USE_API_KEY"))
|
|
direct_modal = has_direct_modal_credentials()
|
|
|
|
# STT direct providers. OpenAI Whisper reuses the same audio key as
|
|
# OpenAI TTS — resolve_openai_audio_api_key() reads VOICE_TOOLS_OPENAI_KEY
|
|
# and falls back to OPENAI_API_KEY. The local provider's "direct"
|
|
# signal is whether faster-whisper is importable; we lazy-import so
|
|
# this module stays cheap on the happy path.
|
|
direct_openai_stt = bool(resolve_openai_audio_api_key())
|
|
direct_groq_stt = bool(get_env_value("GROQ_API_KEY"))
|
|
direct_mistral_stt = bool(get_env_value("MISTRAL_API_KEY"))
|
|
try:
|
|
from tools.transcription_tools import _HAS_FASTER_WHISPER
|
|
local_stt_available = bool(_HAS_FASTER_WHISPER) or bool(
|
|
get_env_value("HERMES_LOCAL_STT_COMMAND")
|
|
)
|
|
except Exception:
|
|
local_stt_available = bool(get_env_value("HERMES_LOCAL_STT_COMMAND"))
|
|
|
|
# When use_gateway is set, suppress direct credentials for managed detection
|
|
if web_use_gateway:
|
|
direct_firecrawl = False
|
|
direct_exa = False
|
|
direct_parallel = False
|
|
direct_tavily = False
|
|
if image_use_gateway:
|
|
direct_fal = False
|
|
if tts_use_gateway:
|
|
direct_openai_tts = False
|
|
direct_elevenlabs = False
|
|
if stt_use_gateway:
|
|
direct_openai_stt = False
|
|
direct_groq_stt = False
|
|
direct_mistral_stt = False
|
|
local_stt_available = False
|
|
if browser_use_gateway:
|
|
direct_browser_use = False
|
|
direct_browserbase = False
|
|
|
|
managed_web_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("firecrawl")
|
|
managed_image_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("fal-queue")
|
|
managed_tts_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("openai-audio")
|
|
# STT and TTS share the same managed gateway endpoint ("openai-audio")
|
|
# because the OpenAI audio API covers both /audio/speech (TTS) and
|
|
# /audio/transcriptions (STT). One probe, used by both.
|
|
managed_stt_available = managed_tts_available
|
|
managed_browser_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("browser-use")
|
|
managed_modal_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("modal")
|
|
modal_state = resolve_modal_backend_state(
|
|
modal_mode,
|
|
has_direct=direct_modal,
|
|
managed_ready=managed_modal_available,
|
|
)
|
|
|
|
web_managed = web_backend == "firecrawl" and managed_web_available and not direct_firecrawl
|
|
web_active = bool(
|
|
web_tool_enabled
|
|
and (
|
|
web_managed
|
|
or (web_backend == "exa" and direct_exa)
|
|
or (web_backend == "firecrawl" and direct_firecrawl)
|
|
or (web_backend == "parallel" and direct_parallel)
|
|
or (web_backend == "tavily" and direct_tavily)
|
|
or (web_backend == "searxng" and direct_searxng)
|
|
# Per-capability overrides: search_backend or extract_backend may be set
|
|
# without web.backend (using the new split config from #20061)
|
|
or (web_search_backend == "searxng" and direct_searxng)
|
|
or (web_search_backend == "exa" and direct_exa)
|
|
or (web_search_backend == "firecrawl" and direct_firecrawl)
|
|
or (web_search_backend == "parallel" and direct_parallel)
|
|
or (web_search_backend == "tavily" and direct_tavily)
|
|
)
|
|
)
|
|
web_available = bool(
|
|
managed_web_available or direct_exa or direct_firecrawl or direct_parallel or direct_tavily or direct_searxng
|
|
)
|
|
|
|
image_managed = image_tool_enabled and managed_image_available and not direct_fal
|
|
image_active = bool(image_tool_enabled and (image_managed or direct_fal))
|
|
image_available = bool(managed_image_available or direct_fal)
|
|
|
|
tts_current_provider = tts_provider or "edge"
|
|
tts_managed = (
|
|
tts_tool_enabled
|
|
and tts_current_provider == "openai"
|
|
and managed_tts_available
|
|
and not direct_openai_tts
|
|
)
|
|
tts_available = bool(
|
|
tts_current_provider in {"edge", "neutts"}
|
|
or (tts_current_provider == "openai" and (managed_tts_available or direct_openai_tts))
|
|
or (tts_current_provider == "elevenlabs" and direct_elevenlabs)
|
|
or (tts_current_provider == "mistral" and bool(get_env_value("MISTRAL_API_KEY")))
|
|
)
|
|
tts_active = bool(tts_tool_enabled and tts_available)
|
|
|
|
# STT availability per provider. Unlike TTS, STT isn't a model-callable
|
|
# tool — the gateway voice middleware calls it on every inbound voice
|
|
# message — so toolset_enabled is N/A and we treat stt as always
|
|
# "enabled" if a usable provider is configured.
|
|
stt_current_provider = stt_provider or "local"
|
|
stt_managed = (
|
|
stt_current_provider == "openai"
|
|
and managed_stt_available
|
|
and not direct_openai_stt
|
|
)
|
|
stt_available = bool(
|
|
(stt_current_provider == "local" and local_stt_available)
|
|
or (stt_current_provider == "openai" and (managed_stt_available or direct_openai_stt))
|
|
or (stt_current_provider == "groq" and direct_groq_stt)
|
|
or (stt_current_provider == "mistral" and direct_mistral_stt)
|
|
)
|
|
stt_active = stt_available
|
|
|
|
browser_local_available = _has_agent_browser()
|
|
(
|
|
browser_current_provider,
|
|
browser_available,
|
|
browser_active,
|
|
browser_managed,
|
|
) = _resolve_browser_feature_state(
|
|
browser_tool_enabled=browser_tool_enabled,
|
|
browser_provider=browser_provider,
|
|
browser_provider_explicit=browser_provider_explicit,
|
|
browser_local_available=browser_local_available,
|
|
direct_camofox=direct_camofox,
|
|
direct_browserbase=direct_browserbase,
|
|
direct_browser_use=direct_browser_use,
|
|
direct_firecrawl=direct_firecrawl,
|
|
managed_browser_available=managed_browser_available,
|
|
)
|
|
|
|
if terminal_backend != "modal":
|
|
modal_managed = False
|
|
modal_available = True
|
|
modal_active = bool(modal_tool_enabled)
|
|
modal_direct_override = False
|
|
elif modal_state["selected_backend"] == "managed":
|
|
modal_managed = bool(modal_tool_enabled)
|
|
modal_available = True
|
|
modal_active = bool(modal_tool_enabled)
|
|
modal_direct_override = False
|
|
elif modal_state["selected_backend"] == "direct":
|
|
modal_managed = False
|
|
modal_available = True
|
|
modal_active = bool(modal_tool_enabled)
|
|
modal_direct_override = bool(modal_tool_enabled)
|
|
elif modal_mode == "managed":
|
|
modal_managed = False
|
|
modal_available = bool(managed_modal_available)
|
|
modal_active = False
|
|
modal_direct_override = False
|
|
elif modal_mode == "direct":
|
|
modal_managed = False
|
|
modal_available = bool(direct_modal)
|
|
modal_active = False
|
|
modal_direct_override = False
|
|
else:
|
|
modal_managed = False
|
|
modal_available = bool(managed_modal_available or direct_modal)
|
|
modal_active = False
|
|
modal_direct_override = False
|
|
|
|
tts_explicit_configured = False
|
|
raw_tts_cfg = config.get("tts")
|
|
if isinstance(raw_tts_cfg, dict) and "provider" in raw_tts_cfg:
|
|
tts_explicit_configured = tts_provider not in {"", "edge"}
|
|
|
|
# STT considers any non-default provider explicit. "local" is the
|
|
# DEFAULT_CONFIG seed, so seeing it doesn't mean the user picked it.
|
|
stt_explicit_configured = False
|
|
raw_stt_cfg = config.get("stt")
|
|
if isinstance(raw_stt_cfg, dict) and "provider" in raw_stt_cfg:
|
|
stt_explicit_configured = stt_provider not in {"", "local"}
|
|
|
|
features = {
|
|
"web": NousFeatureState(
|
|
key="web",
|
|
label="Web tools",
|
|
included_by_default=True,
|
|
available=web_available,
|
|
active=web_active,
|
|
managed_by_nous=web_managed,
|
|
direct_override=web_active and not web_managed,
|
|
toolset_enabled=web_tool_enabled,
|
|
current_provider=web_backend or web_search_backend or "",
|
|
explicit_configured=bool(web_backend or web_search_backend),
|
|
),
|
|
"image_gen": NousFeatureState(
|
|
key="image_gen",
|
|
label="Image generation",
|
|
included_by_default=True,
|
|
available=image_available,
|
|
active=image_active,
|
|
managed_by_nous=image_managed,
|
|
direct_override=image_active and not image_managed,
|
|
toolset_enabled=image_tool_enabled,
|
|
current_provider="FAL" if direct_fal else ("Nous Subscription" if image_managed else ""),
|
|
explicit_configured=direct_fal,
|
|
),
|
|
"tts": NousFeatureState(
|
|
key="tts",
|
|
label="OpenAI TTS",
|
|
included_by_default=True,
|
|
available=tts_available,
|
|
active=tts_active,
|
|
managed_by_nous=tts_managed,
|
|
direct_override=tts_active and not tts_managed,
|
|
toolset_enabled=tts_tool_enabled,
|
|
current_provider=_tts_label(tts_current_provider),
|
|
explicit_configured=tts_explicit_configured,
|
|
),
|
|
"stt": NousFeatureState(
|
|
key="stt",
|
|
label="Speech-to-text",
|
|
included_by_default=True,
|
|
available=stt_available,
|
|
active=stt_active,
|
|
managed_by_nous=stt_managed,
|
|
direct_override=stt_active and not stt_managed,
|
|
# STT isn't toolset-gated (gateway middleware calls it
|
|
# unconditionally on inbound voice), so report True so the
|
|
# status display doesn't flag it as "tool disabled".
|
|
toolset_enabled=True,
|
|
current_provider=_stt_label(stt_current_provider),
|
|
explicit_configured=stt_explicit_configured,
|
|
),
|
|
"browser": NousFeatureState(
|
|
key="browser",
|
|
label="Browser automation",
|
|
included_by_default=True,
|
|
available=browser_available,
|
|
active=browser_active,
|
|
managed_by_nous=browser_managed,
|
|
direct_override=browser_active and not browser_managed,
|
|
toolset_enabled=browser_tool_enabled,
|
|
current_provider=_browser_label(browser_current_provider),
|
|
explicit_configured=browser_provider_explicit,
|
|
),
|
|
"modal": NousFeatureState(
|
|
key="modal",
|
|
label="Modal execution",
|
|
included_by_default=False,
|
|
available=modal_available,
|
|
active=modal_active,
|
|
managed_by_nous=modal_managed,
|
|
direct_override=terminal_backend == "modal" and modal_direct_override,
|
|
toolset_enabled=modal_tool_enabled,
|
|
current_provider="Modal" if terminal_backend == "modal" else terminal_backend or "local",
|
|
explicit_configured=terminal_backend == "modal",
|
|
),
|
|
}
|
|
|
|
return NousSubscriptionFeatures(
|
|
subscribed=subscribed,
|
|
nous_auth_present=nous_auth_present,
|
|
provider_is_nous=provider_is_nous,
|
|
features=features,
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def apply_nous_managed_defaults(
|
|
config: Dict[str, object],
|
|
*,
|
|
enabled_toolsets: Optional[Iterable[str]] = None,
|
|
) -> set[str]:
|
|
if not managed_nous_tools_enabled():
|
|
return set()
|
|
|
|
features = get_nous_subscription_features(config)
|
|
if not features.provider_is_nous:
|
|
return set()
|
|
|
|
selected_toolsets = set(enabled_toolsets or ())
|
|
changed: set[str] = set()
|
|
|
|
web_cfg = config.get("web")
|
|
if not isinstance(web_cfg, dict):
|
|
web_cfg = {}
|
|
config["web"] = web_cfg
|
|
|
|
tts_cfg = config.get("tts")
|
|
if not isinstance(tts_cfg, dict):
|
|
tts_cfg = {}
|
|
config["tts"] = tts_cfg
|
|
|
|
stt_cfg = config.get("stt")
|
|
if not isinstance(stt_cfg, dict):
|
|
stt_cfg = {}
|
|
config["stt"] = stt_cfg
|
|
|
|
browser_cfg = config.get("browser")
|
|
if not isinstance(browser_cfg, dict):
|
|
browser_cfg = {}
|
|
config["browser"] = browser_cfg
|
|
|
|
if "web" in selected_toolsets and not features.web.explicit_configured and not (
|
|
get_env_value("PARALLEL_API_KEY")
|
|
or get_env_value("TAVILY_API_KEY")
|
|
or get_env_value("FIRECRAWL_API_KEY")
|
|
or get_env_value("FIRECRAWL_API_URL")
|
|
):
|
|
web_cfg["backend"] = "firecrawl"
|
|
changed.add("web")
|
|
|
|
if "tts" in selected_toolsets and not features.tts.explicit_configured and not (
|
|
resolve_openai_audio_api_key()
|
|
or get_env_value("ELEVENLABS_API_KEY")
|
|
):
|
|
tts_cfg["provider"] = "openai"
|
|
changed.add("tts")
|
|
|
|
# STT: same pattern as TTS. The DEFAULT_CONFIG seed is "local"
|
|
# (requires `pip install faster-whisper`); for Nous subscribers we
|
|
# flip it to "openai" so the managed audio gateway handles transcription
|
|
# via the same auth as TTS. Skipped when the user has explicitly
|
|
# configured STT or has direct credentials for a non-managed provider.
|
|
if not features.stt.explicit_configured and not (
|
|
get_env_value("GROQ_API_KEY")
|
|
or get_env_value("MISTRAL_API_KEY")
|
|
):
|
|
stt_cfg["provider"] = "openai"
|
|
changed.add("stt")
|
|
|
|
if "browser" in selected_toolsets and not features.browser.explicit_configured and not (
|
|
get_env_value("BROWSER_USE_API_KEY")
|
|
or get_env_value("BROWSERBASE_API_KEY")
|
|
):
|
|
browser_cfg["cloud_provider"] = "browser-use"
|
|
changed.add("browser")
|
|
|
|
if "image_gen" in selected_toolsets and not fal_key_is_configured():
|
|
changed.add("image_gen")
|
|
|
|
return changed
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tool Gateway offer — single Y/n prompt after model selection
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_GATEWAY_TOOL_LABELS = {
|
|
"web": "Web search & extract (Firecrawl)",
|
|
"image_gen": "Image generation (FAL)",
|
|
"tts": "Text-to-speech (OpenAI TTS)",
|
|
"stt": "Speech-to-text (OpenAI Whisper)",
|
|
"browser": "Browser automation (Browser Use)",
|
|
}
|
|
|
|
|
|
def _get_gateway_direct_credentials() -> Dict[str, bool]:
|
|
"""Return a dict of tool_key -> has_direct_credentials."""
|
|
return {
|
|
"web": bool(
|
|
get_env_value("FIRECRAWL_API_KEY")
|
|
or get_env_value("FIRECRAWL_API_URL")
|
|
or get_env_value("PARALLEL_API_KEY")
|
|
or get_env_value("TAVILY_API_KEY")
|
|
or get_env_value("EXA_API_KEY")
|
|
),
|
|
"image_gen": fal_key_is_configured(),
|
|
"tts": bool(
|
|
resolve_openai_audio_api_key()
|
|
or get_env_value("ELEVENLABS_API_KEY")
|
|
),
|
|
# STT direct credentials. OpenAI Whisper shares the audio key
|
|
# with TTS via resolve_openai_audio_api_key() — counting it here
|
|
# too is intentional: if the user has an OpenAI audio key they
|
|
# don't need the gateway for either.
|
|
"stt": bool(
|
|
resolve_openai_audio_api_key()
|
|
or get_env_value("GROQ_API_KEY")
|
|
or get_env_value("MISTRAL_API_KEY")
|
|
),
|
|
"browser": bool(
|
|
get_env_value("BROWSER_USE_API_KEY")
|
|
or (get_env_value("BROWSERBASE_API_KEY") and get_env_value("BROWSERBASE_PROJECT_ID"))
|
|
),
|
|
}
|
|
|
|
|
|
_GATEWAY_DIRECT_LABELS = {
|
|
"web": "Firecrawl/Exa/Parallel/Tavily key",
|
|
"image_gen": "FAL key",
|
|
"tts": "OpenAI/ElevenLabs key",
|
|
"stt": "OpenAI/Groq/Mistral key",
|
|
"browser": "Browser Use/Browserbase key",
|
|
}
|
|
|
|
_ALL_GATEWAY_KEYS = ("web", "image_gen", "tts", "stt", "browser")
|
|
|
|
|
|
def get_gateway_eligible_tools(
|
|
config: Optional[Dict[str, object]] = None,
|
|
) -> tuple[list[str], list[str], list[str]]:
|
|
"""Return (unconfigured, has_direct, already_managed) tool key lists.
|
|
|
|
- unconfigured: tools with no direct credentials (easy switch)
|
|
- has_direct: tools where the user has their own API keys
|
|
- already_managed: tools already routed through the gateway
|
|
|
|
All lists are empty when the user is not a paid Nous subscriber or
|
|
is not using Nous as their provider.
|
|
"""
|
|
if not managed_nous_tools_enabled():
|
|
return [], [], []
|
|
|
|
if config is None:
|
|
config = load_config() or {}
|
|
|
|
# Quick provider check without the heavy get_nous_subscription_features call
|
|
model_cfg = config.get("model")
|
|
if not isinstance(model_cfg, dict) or str(model_cfg.get("provider") or "").strip().lower() != "nous":
|
|
return [], [], []
|
|
|
|
direct = _get_gateway_direct_credentials()
|
|
|
|
# Check which tools the user has explicitly opted into the gateway for.
|
|
# This is distinct from managed_by_nous which fires implicitly when
|
|
# no direct keys exist — we only skip the prompt for tools where
|
|
# use_gateway was explicitly set.
|
|
opted_in = {
|
|
"web": _uses_gateway(config.get("web")),
|
|
"image_gen": _uses_gateway(config.get("image_gen")),
|
|
"tts": _uses_gateway(config.get("tts")),
|
|
"stt": _uses_gateway(config.get("stt")),
|
|
"browser": _uses_gateway(config.get("browser")),
|
|
}
|
|
|
|
unconfigured: list[str] = []
|
|
has_direct: list[str] = []
|
|
already_managed: list[str] = []
|
|
for key in _ALL_GATEWAY_KEYS:
|
|
if opted_in.get(key):
|
|
already_managed.append(key)
|
|
elif direct.get(key):
|
|
has_direct.append(key)
|
|
else:
|
|
unconfigured.append(key)
|
|
return unconfigured, has_direct, already_managed
|
|
|
|
|
|
def apply_gateway_defaults(
|
|
config: Dict[str, object],
|
|
tool_keys: list[str],
|
|
) -> set[str]:
|
|
"""Apply Tool Gateway config for the given tool keys.
|
|
|
|
Sets ``use_gateway: true`` in each tool's config section so the
|
|
runtime prefers the gateway even when direct API keys are present.
|
|
|
|
Returns the set of tools that were actually changed.
|
|
"""
|
|
changed: set[str] = set()
|
|
|
|
web_cfg = config.get("web")
|
|
if not isinstance(web_cfg, dict):
|
|
web_cfg = {}
|
|
config["web"] = web_cfg
|
|
|
|
tts_cfg = config.get("tts")
|
|
if not isinstance(tts_cfg, dict):
|
|
tts_cfg = {}
|
|
config["tts"] = tts_cfg
|
|
|
|
stt_cfg = config.get("stt")
|
|
if not isinstance(stt_cfg, dict):
|
|
stt_cfg = {}
|
|
config["stt"] = stt_cfg
|
|
|
|
browser_cfg = config.get("browser")
|
|
if not isinstance(browser_cfg, dict):
|
|
browser_cfg = {}
|
|
config["browser"] = browser_cfg
|
|
|
|
if "web" in tool_keys:
|
|
web_cfg["backend"] = "firecrawl"
|
|
web_cfg["use_gateway"] = True
|
|
changed.add("web")
|
|
|
|
if "tts" in tool_keys:
|
|
tts_cfg["provider"] = "openai"
|
|
tts_cfg["use_gateway"] = True
|
|
changed.add("tts")
|
|
|
|
if "stt" in tool_keys:
|
|
stt_cfg["provider"] = "openai"
|
|
stt_cfg["use_gateway"] = True
|
|
changed.add("stt")
|
|
|
|
if "browser" in tool_keys:
|
|
browser_cfg["cloud_provider"] = "browser-use"
|
|
browser_cfg["use_gateway"] = True
|
|
changed.add("browser")
|
|
|
|
if "image_gen" in tool_keys:
|
|
image_cfg = config.get("image_gen")
|
|
if not isinstance(image_cfg, dict):
|
|
image_cfg = {}
|
|
config["image_gen"] = image_cfg
|
|
image_cfg["use_gateway"] = True
|
|
changed.add("image_gen")
|
|
|
|
return changed
|
|
|
|
|
|
def prompt_enable_tool_gateway(config: Dict[str, object]) -> set[str]:
|
|
"""If eligible tools exist, prompt the user to enable the Tool Gateway.
|
|
|
|
Uses prompt_choice() with a description parameter so the curses TUI
|
|
shows the tool context alongside the choices.
|
|
|
|
Returns the set of tools that were enabled, or empty set if the user
|
|
declined or no tools were eligible.
|
|
"""
|
|
unconfigured, has_direct, already_managed = get_gateway_eligible_tools(config)
|
|
if not unconfigured and not has_direct:
|
|
return set()
|
|
|
|
try:
|
|
from hermes_cli.setup import prompt_choice
|
|
except Exception:
|
|
return set()
|
|
|
|
# Build description lines showing full status of all gateway tools
|
|
desc_parts: list[str] = [
|
|
"",
|
|
" The Tool Gateway gives you access to web search, image generation,",
|
|
" text-to-speech, speech-to-text, and browser automation through your",
|
|
" Nous subscription. No need to sign up for separate API keys — just",
|
|
" pick the tools you want.",
|
|
"",
|
|
]
|
|
if already_managed:
|
|
for k in already_managed:
|
|
desc_parts.append(f" ✓ {_GATEWAY_TOOL_LABELS[k]} — using Tool Gateway")
|
|
if unconfigured:
|
|
for k in unconfigured:
|
|
desc_parts.append(f" ○ {_GATEWAY_TOOL_LABELS[k]} — not configured")
|
|
if has_direct:
|
|
for k in has_direct:
|
|
desc_parts.append(f" ○ {_GATEWAY_TOOL_LABELS[k]} — using {_GATEWAY_DIRECT_LABELS[k]}")
|
|
|
|
# Build short choice labels — detail is in the description above
|
|
choices: list[str] = []
|
|
choice_keys: list[str] = [] # maps choice index -> action
|
|
|
|
if unconfigured and has_direct:
|
|
choices.append("Enable for all tools (existing keys kept, not used)")
|
|
choice_keys.append("all")
|
|
|
|
choices.append("Enable only for tools without existing keys")
|
|
choice_keys.append("unconfigured")
|
|
|
|
choices.append("Skip")
|
|
choice_keys.append("skip")
|
|
|
|
elif unconfigured:
|
|
choices.append("Enable Tool Gateway")
|
|
choice_keys.append("unconfigured")
|
|
|
|
choices.append("Skip")
|
|
choice_keys.append("skip")
|
|
|
|
else:
|
|
choices.append("Enable Tool Gateway (existing keys kept, not used)")
|
|
choice_keys.append("all")
|
|
|
|
choices.append("Skip")
|
|
choice_keys.append("skip")
|
|
|
|
description = "\n".join(desc_parts) if desc_parts else None
|
|
# Default to "Enable" when user has no direct keys (new user),
|
|
# default to "Skip" when they have existing keys to preserve.
|
|
default_idx = 0 if not has_direct else len(choices) - 1
|
|
|
|
try:
|
|
idx = prompt_choice(
|
|
"Your Nous subscription includes the Tool Gateway.",
|
|
choices,
|
|
default_idx,
|
|
description=description,
|
|
)
|
|
except (KeyboardInterrupt, EOFError, OSError, SystemExit):
|
|
return set()
|
|
|
|
action = choice_keys[idx]
|
|
if action == "skip":
|
|
return set()
|
|
|
|
if action == "all":
|
|
# Apply to switchable tools + ensure already-managed tools also
|
|
# have use_gateway persisted in config for consistency.
|
|
to_apply = list(_ALL_GATEWAY_KEYS)
|
|
else:
|
|
to_apply = unconfigured
|
|
|
|
changed = apply_gateway_defaults(config, to_apply)
|
|
if changed:
|
|
from hermes_cli.config import save_config
|
|
save_config(config)
|
|
# Only report the tools that actually switched (not already-managed ones)
|
|
newly_switched = changed - set(already_managed)
|
|
for key in sorted(newly_switched):
|
|
label = _GATEWAY_TOOL_LABELS.get(key, key)
|
|
print(f" ✓ {label}: enabled via Nous subscription")
|
|
if already_managed and not newly_switched:
|
|
print(" (all tools already using Tool Gateway)")
|
|
return changed
|