diff --git a/hermes_cli/provider_catalog.py b/hermes_cli/provider_catalog.py new file mode 100644 index 00000000000..6dba5d8842f --- /dev/null +++ b/hermes_cli/provider_catalog.py @@ -0,0 +1,170 @@ +"""Unified provider catalog — one source of truth for the provider universe. + +The provider list shown by ``hermes model`` (CLI/TUI) and the desktop Settings +→ Providers tabs (Accounts + API keys) **must be the same set**. Historically +they were not: the CLI picker read :data:`hermes_cli.models.CANONICAL_PROVIDERS` +(which auto-extends from ``plugins/model-providers//``), while the desktop +tabs read separate hand-maintained lists (``_OAUTH_PROVIDER_CATALOG``, +``OPTIONAL_ENV_VARS`` + ``PROVIDER_GROUPS``) that nobody kept in sync. Every +provider added after those lists were written silently went missing from the +GUI — e.g. GitHub Copilot showing up only under "tools", or ``openai-api`` being +configurable from the CLI but not the desktop app. + +This module fixes that at the root: it derives ONE descriptor per provider from +the same universe ``hermes model`` renders (``CANONICAL_PROVIDERS``), joining: + +* ``auth_type`` / ``api_key_env_vars`` / ``base_url_env_var`` from + :data:`hermes_cli.auth.PROVIDER_REGISTRY` (credential truth), and +* ``display_name`` / ``description`` / ``signup_url`` from the provider's + :class:`providers.base.ProviderProfile` when one exists, falling back to the + ``CANONICAL_PROVIDERS`` entry's ``label`` / ``tui_desc`` and the + ``OPTIONAL_ENV_VARS`` signup URL otherwise (many profiles leave these blank, + and four canonical providers have no profile at all — lmstudio, openai-api, + tencent-tokenhub, xai-oauth — so the fallbacks are load-bearing). + +Each descriptor is tagged with the ``tab`` it belongs on (``keys`` vs +``accounts``) based purely on how the provider authenticates. The desktop +``/api/env`` and ``/api/providers/oauth`` endpoints derive their MEMBERSHIP from +this catalog; the old hand lists are demoted to presentation/override overlays +(bespoke OAuth flow + status resolvers, richer copy, icons, ordering) and no +longer decide which providers exist. + +Parity contract (locked by tests): the union of the two tabs equals the +``CANONICAL_PROVIDERS`` universe, i.e. exactly what ``hermes model`` shows. +""" + +from __future__ import annotations + +from dataclasses import dataclass + +# Auth types that authenticate via an account / sign-in flow rather than a +# pasted API key. These route to the desktop "Accounts" tab; everything else +# (api_key, and aws_sdk which is configured via AWS_REGION/AWS_PROFILE) routes +# to the "API keys" tab. Mirrors the auth_type strings used in +# hermes_cli.auth.PROVIDER_REGISTRY and providers.base.ProviderProfile. +_ACCOUNTS_AUTH_TYPES: frozenset[str] = frozenset( + { + "oauth_device_code", + "oauth_external", + "oauth_minimax", + "external_process", # copilot-acp: spawns `copilot --acp --stdio` + "copilot", # GitHub Copilot token / gh auth + } +) + + +@dataclass(frozen=True) +class ProviderDescriptor: + """One provider, as seen by every surface (CLI picker + both GUI tabs).""" + + slug: str # canonical id, e.g. "google-gemini-cli" + label: str # human display name + description: str # one-line description + auth_type: str # api_key | oauth_* | external_process | copilot | aws_sdk + tab: str # "keys" | "accounts" + api_key_env_vars: tuple[str, ...] # credential env vars (may be empty) + base_url_env_var: str # base-URL override env var (may be "") + signup_url: str # signup / console URL (may be "") + order: int # CANONICAL_PROVIDERS index — mirrors `hermes model` + + +def tab_for_auth_type(auth_type: str) -> str: + """Return the desktop tab ("keys"|"accounts") a provider's auth maps to.""" + return "accounts" if auth_type in _ACCOUNTS_AUTH_TYPES else "keys" + + +def _split_env_vars(env_vars: tuple[str, ...]) -> tuple[tuple[str, ...], str]: + """Split a profile's ``env_vars`` into (api_key_vars, base_url_var).""" + keys = tuple(v for v in env_vars if not (v.endswith("_BASE_URL") or v.endswith("_URL"))) + base = next((v for v in env_vars if v.endswith("_BASE_URL") or v.endswith("_URL")), "") + return keys, base + + +def provider_catalog() -> list[ProviderDescriptor]: + """Return one descriptor per provider in the ``hermes model`` universe. + + Membership is :data:`CANONICAL_PROVIDERS` (the list the CLI/TUI picker + renders, which auto-extends from provider plugins). Auth + env come from + ``PROVIDER_REGISTRY``; display metadata from ``ProviderProfile`` with + canonical/env fallbacks so providers without a profile (or with blank + profile metadata) still resolve sensibly. + """ + from hermes_cli.models import CANONICAL_PROVIDERS + + # PROVIDER_REGISTRY / list_providers are imported lazily and defensively: + # this module is on the import path of the web server and the CLI, and we + # never want a provider-plugin import error to blank the whole catalog. + try: + from hermes_cli.auth import PROVIDER_REGISTRY + except Exception: + PROVIDER_REGISTRY = {} + + try: + from providers import list_providers + + profiles = {p.name: p for p in list_providers()} + except Exception: + profiles = {} + + try: + from hermes_cli.config import OPTIONAL_ENV_VARS + except Exception: + OPTIONAL_ENV_VARS = {} + + out: list[ProviderDescriptor] = [] + for order, entry in enumerate(CANONICAL_PROVIDERS): + slug = entry.slug + cfg = PROVIDER_REGISTRY.get(slug) + prof = profiles.get(slug) + + # auth_type: registry is authoritative; fall back to profile, then api_key. + auth_type = ( + (getattr(cfg, "auth_type", "") if cfg else "") + or (getattr(prof, "auth_type", "") if prof else "") + or "api_key" + ) + + # Credential env vars: registry first (it already normalizes these), + # else derive from the profile's env_vars tuple. + if cfg and getattr(cfg, "api_key_env_vars", ()): + api_key_vars = tuple(cfg.api_key_env_vars) + base_url_var = getattr(cfg, "base_url_env_var", "") or "" + elif prof and getattr(prof, "env_vars", ()): + api_key_vars, base_url_var = _split_env_vars(tuple(prof.env_vars)) + else: + api_key_vars, base_url_var = (), "" + + label = ( + (getattr(prof, "display_name", "") if prof else "") + or entry.label + or slug + ) + description = ( + (getattr(prof, "description", "") if prof else "") + or entry.tui_desc + or label + ) + signup_url = (getattr(prof, "signup_url", "") if prof else "") or "" + if not signup_url and api_key_vars: + info = OPTIONAL_ENV_VARS.get(api_key_vars[0]) or {} + signup_url = info.get("url") or "" + + out.append( + ProviderDescriptor( + slug=slug, + label=label, + description=description, + auth_type=auth_type, + tab=tab_for_auth_type(auth_type), + api_key_env_vars=api_key_vars, + base_url_env_var=base_url_var, + signup_url=signup_url, + order=order, + ) + ) + return out + + +def provider_catalog_by_slug() -> dict[str, ProviderDescriptor]: + """Convenience: the catalog keyed by slug.""" + return {d.slug: d for d in provider_catalog()} diff --git a/tests/hermes_cli/test_provider_catalog.py b/tests/hermes_cli/test_provider_catalog.py new file mode 100644 index 00000000000..508c18aae75 --- /dev/null +++ b/tests/hermes_cli/test_provider_catalog.py @@ -0,0 +1,127 @@ +"""Tests for the unified provider catalog (hermes_cli.provider_catalog). + +These are invariant tests, not snapshots: they assert the parity *contract* +between what ``hermes model`` shows (``CANONICAL_PROVIDERS``) and what the +catalog exposes, plus how each provider's ``auth_type`` maps to a desktop tab — +never a specific provider count or a frozen vendor list (both change over time). +""" + +from hermes_cli.models import CANONICAL_PROVIDERS +from hermes_cli.provider_catalog import ( + ProviderDescriptor, + provider_catalog, + provider_catalog_by_slug, + tab_for_auth_type, +) + + +def test_catalog_covers_every_hermes_model_provider(): + """PARITY CONTRACT: the catalog == the `hermes model` universe.""" + slugs = {d.slug for d in provider_catalog()} + for entry in CANONICAL_PROVIDERS: + assert entry.slug in slugs, ( + f"{entry.slug} is shown in `hermes model` but missing from provider_catalog()" + ) + + +def test_catalog_has_no_providers_outside_hermes_model(): + """The catalog must not invent providers `hermes model` doesn't show.""" + canonical = {e.slug for e in CANONICAL_PROVIDERS} + for d in provider_catalog(): + assert d.slug in canonical, f"{d.slug} in catalog but not in CANONICAL_PROVIDERS" + + +def test_every_descriptor_lands_on_exactly_one_known_tab(): + for d in provider_catalog(): + assert d.tab in {"keys", "accounts"}, f"{d.slug} has bad tab {d.tab!r}" + + +def test_descriptor_count_matches_canonical(): + """One descriptor per canonical entry (no dupes, no drops).""" + cat = provider_catalog() + assert len(cat) == len(CANONICAL_PROVIDERS) + assert len({d.slug for d in cat}) == len(cat) + + +def test_profileless_providers_still_present(): + """Providers without a ProviderProfile must still resolve via fallbacks. + + lmstudio / openai-api / tencent-tokenhub / xai-oauth have no profile on + main; they exist only as registry + canonical entries. The catalog must + not require a profile to include a provider. + """ + by = provider_catalog_by_slug() + for slug in ("lmstudio", "openai-api", "tencent-tokenhub", "xai-oauth"): + assert slug in by, f"{slug} dropped from catalog (profile-less provider)" + assert by[slug].label, f"{slug} has empty label despite canonical fallback" + assert by[slug].description, f"{slug} has empty description despite fallback" + + +def test_api_key_providers_route_to_keys_oauth_to_accounts(): + by = provider_catalog_by_slug() + # api_key → keys + assert by["kilocode"].tab == "keys" + assert by["openai-api"].tab == "keys" + # account / sign-in flows → accounts + assert by["google-gemini-cli"].tab == "accounts" + assert by["copilot-acp"].tab == "accounts" + + +def test_copilot_surfaces_as_a_provider_with_its_own_token_var(): + """Regression for the reported bug: a GitHub Copilot login showed up under + tools, never as a provider, because the shared GITHUB_TOKEN is tool-category. + + Copilot authenticates via the `copilot`/api_key path, so it belongs on the + keys tab — but its PRIMARY credential var must be the provider-owned + COPILOT_GITHUB_TOKEN, not the shared tool-category GITHUB_TOKEN. That is what + lets the desktop render Copilot as its own provider card. + """ + by = provider_catalog_by_slug() + assert "copilot" in by + d = by["copilot"] + assert d.tab == "keys" + assert d.api_key_env_vars, "Copilot must expose a credential env var" + assert d.api_key_env_vars[0] == "COPILOT_GITHUB_TOKEN", ( + "Copilot's primary var must be the provider-owned token, not shared GITHUB_TOKEN" + ) + + +def test_bedrock_routes_to_keys(): + """Bedrock is aws_sdk (AWS_REGION/AWS_PROFILE), configured on the keys tab.""" + by = provider_catalog_by_slug() + assert by["bedrock"].tab == "keys" + + +def test_api_key_providers_expose_a_credential_env_var(): + """Every keys-tab provider that authenticates via a pasted API key must + surface at least one env var to write the key into (otherwise the GUI can't + configure it). + + Exemptions: ``aws_sdk`` (bedrock — uses AWS_REGION/AWS_PROFILE) and the + ``custom`` bring-your-own-endpoint pseudo-provider, which is configured + inline via the local-endpoint flow rather than a fixed env var. + """ + exempt = {"custom"} + for d in provider_catalog(): + if d.auth_type == "api_key" and d.slug not in exempt: + assert d.api_key_env_vars, f"{d.slug} is api_key but exposes no env var" + + +def test_order_mirrors_canonical_declaration(): + cat = provider_catalog() + assert [d.order for d in cat] == list(range(len(cat))) + assert [d.slug for d in cat] == [e.slug for e in CANONICAL_PROVIDERS] + + +def test_descriptors_are_provider_descriptor_instances(): + for d in provider_catalog(): + assert isinstance(d, ProviderDescriptor) + + +def test_tab_for_auth_type_helper(): + assert tab_for_auth_type("api_key") == "keys" + assert tab_for_auth_type("aws_sdk") == "keys" + assert tab_for_auth_type("oauth_external") == "accounts" + assert tab_for_auth_type("oauth_device_code") == "accounts" + assert tab_for_auth_type("copilot") == "accounts" + assert tab_for_auth_type("external_process") == "accounts"