mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
Add an opt-in Python plugin surface for speech-to-text backends,
mirroring the TTS hook pattern. New backends (OpenRouter, SenseAudio,
Gemini-STT, custom proprietary engines) can be implemented as plugins
without modifying tools/transcription_tools.py.
Built-ins always win
--------------------
The 6 built-in STT providers (local/faster-whisper, local_command,
groq, openai, mistral, xai) keep their native handlers. Plugins
attempting to register under a built-in name are rejected at
registration time with a warning and re-checked defensively at
dispatch.
Resolution order
----------------
1. stt.provider matches a built-in → built-in dispatch (unchanged)
2. stt.provider matches a registered plugin →
a. if plugin.is_available() returns False → unavailability envelope
identifying the plugin (not the generic "No STT provider"
message — the user explicitly opted into this plugin)
b. otherwise plugin.transcribe() with model + language forwarded
from stt.<provider>.{model,language} config
3. No match → legacy "No STT provider available" error (unchanged)
Per-provider config namespace
-----------------------------
Plugins read their config from stt.<provider> in config.yaml, mirroring
how built-ins read stt.openai.model / stt.mistral.model. The dispatcher
forwards `model` and `language` from this section. Caller's explicit
`model=` argument overrides the config-set model.
Files
-----
- agent/transcription_provider.py: TranscriptionProvider ABC
- agent/transcription_registry.py: register/get/list providers,
built-in shadow guard, _reset_for_tests
- hermes_cli/plugins.py: register_transcription_provider() on
PluginContext
- tools/transcription_tools.py: BUILTIN_STT_PROVIDERS frozenset,
_dispatch_to_plugin_provider() with availability gate, wire-in
after xai branch and before "No STT provider" error
- tests/agent/test_transcription_registry.py: 27 tests
- tests/hermes_cli/test_plugins_transcription_registration.py: 3 tests
- tests/tools/test_transcription_plugin_dispatch.py: 28 tests
(covering built-in short-circuit, plugin dispatch, exception
envelope, non-dict guard, availability gate, language forwarding)
- tests/plugins/transcription/check_parity_vs_main.py: 10-scenario
subprocess-pinned parity harness vs origin/main
- website/docs/user-guide/features/{tts,plugins}.md: docs
Behavior parity
---------------
10 scenarios, 8 OK + 2 expected DIFFs:
no_provider_error → plugin (plugin-installed scenario)
no_provider_error → plugin_unavailable (plugin-installed-unavailable
scenario; PR returns cleaner envelope)
Zero behavior change for users not opting into a plugin.
Issue follow-up to #30398.
122 lines
4 KiB
Python
122 lines
4 KiB
Python
"""
|
|
Transcription Provider Registry
|
|
================================
|
|
|
|
Central map of registered STT providers. Populated by plugins at
|
|
import-time via :meth:`PluginContext.register_transcription_provider`;
|
|
consumed by :mod:`tools.transcription_tools` to dispatch
|
|
:func:`transcribe_audio` calls to the active plugin backend **when**
|
|
the configured ``stt.provider`` name is not a built-in.
|
|
|
|
Built-ins-always-win
|
|
--------------------
|
|
Plugin names that collide with a built-in STT provider (``local``,
|
|
``local_command``, ``groq``, ``openai``, ``mistral``, ``xai``) are
|
|
rejected at registration with a warning. This invariant is also
|
|
re-checked at dispatch time in
|
|
:func:`tools.transcription_tools._dispatch_to_plugin_provider`.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import threading
|
|
from typing import Dict, List, Optional
|
|
|
|
from agent.transcription_provider import TranscriptionProvider
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# Names reserved for native built-in STT handlers. Plugins cannot
|
|
# register a name in this set — the registration call is rejected with
|
|
# a warning. **Kept in sync with ``BUILTIN_STT_PROVIDERS`` in
|
|
# :mod:`tools.transcription_tools`** — a regression test in
|
|
# ``tests/agent/test_transcription_registry.py::TestBuiltinSync``
|
|
# fails if the two lists drift. Importing from
|
|
# ``tools.transcription_tools`` directly would create a circular
|
|
# dependency (``tools.transcription_tools`` imports
|
|
# ``agent.transcription_registry`` for dispatch).
|
|
_BUILTIN_NAMES = frozenset({
|
|
"local",
|
|
"local_command",
|
|
"groq",
|
|
"openai",
|
|
"mistral",
|
|
"xai",
|
|
})
|
|
|
|
|
|
_providers: Dict[str, TranscriptionProvider] = {}
|
|
_lock = threading.Lock()
|
|
|
|
|
|
def register_provider(provider: TranscriptionProvider) -> None:
|
|
"""Register a transcription provider.
|
|
|
|
Rejects:
|
|
|
|
- Non-:class:`TranscriptionProvider` instances (raises :class:`TypeError`).
|
|
- Empty/whitespace ``.name`` (raises :class:`ValueError`).
|
|
- Names colliding with a built-in (logs a warning, silently
|
|
ignores — built-ins-always-win invariant).
|
|
|
|
Re-registration (same ``name``) overwrites the previous entry and
|
|
logs a debug message — makes hot-reload scenarios (tests, dev
|
|
loops) behave predictably.
|
|
"""
|
|
if not isinstance(provider, TranscriptionProvider):
|
|
raise TypeError(
|
|
f"register_provider() expects a TranscriptionProvider instance, "
|
|
f"got {type(provider).__name__}"
|
|
)
|
|
name = provider.name
|
|
if not isinstance(name, str) or not name.strip():
|
|
raise ValueError("Transcription provider .name must be a non-empty string")
|
|
key = name.strip().lower()
|
|
if key in _BUILTIN_NAMES:
|
|
logger.warning(
|
|
"Transcription provider '%s' shadows a built-in name; registration "
|
|
"ignored. Built-in STT providers (%s) always win — pick a different "
|
|
"name.",
|
|
key, ", ".join(sorted(_BUILTIN_NAMES)),
|
|
)
|
|
return
|
|
with _lock:
|
|
existing = _providers.get(key)
|
|
_providers[key] = provider
|
|
if existing is not None:
|
|
logger.debug(
|
|
"Transcription provider '%s' re-registered (was %r)",
|
|
key, type(existing).__name__,
|
|
)
|
|
else:
|
|
logger.debug(
|
|
"Registered transcription provider '%s' (%s)",
|
|
key, type(provider).__name__,
|
|
)
|
|
|
|
|
|
def list_providers() -> List[TranscriptionProvider]:
|
|
"""Return all registered providers, sorted by name."""
|
|
with _lock:
|
|
items = list(_providers.values())
|
|
return sorted(items, key=lambda p: p.name)
|
|
|
|
|
|
def get_provider(name: str) -> Optional[TranscriptionProvider]:
|
|
"""Return the provider registered under *name*, or None.
|
|
|
|
Name matching is case-insensitive and whitespace-tolerant — mirrors
|
|
how ``tools.transcription_tools._get_provider`` normalizes the
|
|
configured ``stt.provider`` value.
|
|
"""
|
|
if not isinstance(name, str):
|
|
return None
|
|
return _providers.get(name.strip().lower())
|
|
|
|
|
|
def _reset_for_tests() -> None:
|
|
"""Clear the registry. **Test-only.**"""
|
|
with _lock:
|
|
_providers.clear()
|