mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-06 07:51:53 +00:00
feat(tts): add register_tts_provider() plugin hook (closes #30398)
Adds a `TTSProvider(ABC)` + `register_tts_provider()` extension point to the plugin context API, **alongside** the existing config-driven `tts.providers.<name>: type: command` registry from PR #17843. This is additive — the command-provider surface stays as the primary way to add a TTS backend. The hook covers cases the shell-template grammar can't reasonably express: - Native Python SDKs without a CLI (Cartesia, Fish Audio, etc.) - Streaming synthesis (chunked Opus → voice-bubble delivery) - Voice metadata API for the `hermes tools` picker - OAuth-refreshing auth flows None of the 10 inline built-in providers (`edge`, `openai`, `elevenlabs`, `minimax`, `gemini`, `mistral`, `xai`, `piper`, `kittentts`, `neutts`) are migrated to plugins. They stay inline. The hook is for *new* engines that aren't built-in. ## Resolution order The dispatcher's resolution order is the load-bearing invariant: 1. `tts.provider` is a built-in name → built-in dispatch. **Always wins.** 2. `tts.provider` matches `tts.providers.<name>` with `command:` set → command-provider dispatch (PR #17843). 3. `tts.provider` matches a plugin-registered `TTSProvider` → plugin dispatch (new). 4. No match → falls through to Edge TTS default (legacy behavior). Built-ins-always-win is enforced at THREE layers: - Registry: `register_provider()` rejects shadowing names with a warning. - Dispatcher: `_dispatch_to_plugin_provider()` short-circuits built-in names defensively before consulting the registry. - Picker: `_plugin_tts_providers()` filters built-in shadows out of the `hermes tools` row list defensively. Command-providers-win-over-plugins is enforced at TWO layers: - The caller in `text_to_speech_tool` checks `_resolve_command_provider_config` first. - `_dispatch_to_plugin_provider` re-checks for a same-name command config defensively so a refactor of the caller can't silently break the invariant. ## New files - `agent/tts_provider.py` — `TTSProvider(ABC)` with `synthesize()` (required), `list_voices()`, `list_models()`, `get_setup_schema()`, `stream()`, `voice_compatible` (all optional with sane defaults). Mirrors `agent/image_gen_provider.py` shape. - `agent/tts_registry.py` — `register_provider`/`get_provider`/`list_providers` with `_BUILTIN_NAMES` reject-shadowing invariant. Mirrors `agent/image_gen_registry.py` shape. - `plugins/tts/...` directory ready for community plugins (none shipped). ## Modified files - `hermes_cli/plugins.py` — `register_tts_provider()` method on `PluginContext`. Matches the gating shape of `register_image_gen_provider()` / `register_browser_provider()`. - `tools/tts_tool.py` — `_dispatch_to_plugin_provider()` + `_plugin_provider_is_voice_compatible()` + walrus-elif wiring into the main dispatcher. Built-in elif chain untouched. - `hermes_cli/tools_config.py` — `_plugin_tts_providers()` injects plugin rows into the Text-to-Speech picker category alongside the 10 hardcoded built-in rows. ## Tests - `tests/agent/test_tts_registry.py` — 47 tests covering registration, lookup, ABC contract, helpers, AND a `TestBuiltinSync` regression test that fails if `agent.tts_registry._BUILTIN_NAMES` drifts from `tools.tts_tool.BUILTIN_TTS_PROVIDERS` (kept duplicated due to circular import constraints). - `tests/tools/test_tts_plugin_dispatch.py` — 35 tests covering built-in-always-wins, command-wins-over-plugin, plugin dispatch, exception passthrough, voice_compatible helper. - `tests/hermes_cli/test_tts_picker.py` — 10 tests covering the picker surface, builtin shadowing defense, integration with `_visible_providers`. - `tests/hermes_cli/test_plugins_tts_registration.py` — 3 end-to-end tests via `PluginManager.discover_and_load()`. - `tests/plugins/tts/check_parity_vs_main.py` — 9-scenario subprocess parity harness vs `origin/main`. The only intentional diff is `fallback_edge → plugin` for the `plugin-installed` scenario. ## Verification - 95/95 new tests pass. - 170/170 pre-existing TTS tests (test_tts_command_providers, test_tts_max_text_length, test_tts_speed, etc.) pass unchanged. - Parity harness against `origin/main`: 8 OK + 1 expected DIFF. - E2E smoke: a registered plugin's `synthesize()` is called via `text_to_speech_tool` with the standard JSON envelope returned. - Ruff clean on all touched files. ## Docs - `website/docs/user-guide/features/tts.md` — new "Python plugin providers" section with a decision table (command-provider vs plugin), minimal plugin example, and the optional-hook reference. - `website/docs/user-guide/features/plugins.md` — TTS row updated to mention both surfaces (command-provider primary, plugin for SDK/streaming). Closes #30398
This commit is contained in:
parent
782681f904
commit
00ec0b617c
13 changed files with 2037 additions and 1 deletions
156
tests/hermes_cli/test_plugins_tts_registration.py
Normal file
156
tests/hermes_cli/test_plugins_tts_registration.py
Normal file
|
|
@ -0,0 +1,156 @@
|
|||
"""Tests for PluginContext.register_tts_provider() (issue #30398).
|
||||
|
||||
Exercises the plugin context hook end-to-end: drops a fake plugin into
|
||||
``$HERMES_HOME/plugins/``, runs ``PluginManager().discover_and_load()``,
|
||||
and asserts the registration result.
|
||||
|
||||
Mirrors the structure of
|
||||
``tests/hermes_cli/test_plugin_scanner_recursion.py::TestRegisterImageGenProvider``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
def _write_plugin(
|
||||
root: Path,
|
||||
name: str,
|
||||
*,
|
||||
manifest_extra: Dict[str, Any] | None = None,
|
||||
register_body: str = "pass",
|
||||
) -> Path:
|
||||
plugin_dir = root / name
|
||||
plugin_dir.mkdir(parents=True, exist_ok=True)
|
||||
manifest = {
|
||||
"name": name,
|
||||
"version": "0.1.0",
|
||||
"description": f"Test plugin {name}",
|
||||
}
|
||||
if manifest_extra:
|
||||
manifest.update(manifest_extra)
|
||||
(plugin_dir / "plugin.yaml").write_text(yaml.dump(manifest))
|
||||
(plugin_dir / "__init__.py").write_text(
|
||||
f"def register(ctx):\n {register_body}\n"
|
||||
)
|
||||
return plugin_dir
|
||||
|
||||
|
||||
def _enable(hermes_home: Path, name: str) -> None:
|
||||
cfg_path = hermes_home / "config.yaml"
|
||||
cfg: dict = {}
|
||||
if cfg_path.exists():
|
||||
try:
|
||||
cfg = yaml.safe_load(cfg_path.read_text()) or {}
|
||||
except Exception:
|
||||
cfg = {}
|
||||
plugins_cfg = cfg.setdefault("plugins", {})
|
||||
enabled = plugins_cfg.setdefault("enabled", [])
|
||||
if isinstance(enabled, list) and name not in enabled:
|
||||
enabled.append(name)
|
||||
cfg_path.write_text(yaml.safe_dump(cfg))
|
||||
|
||||
|
||||
class TestRegisterTTSProvider:
|
||||
"""End-to-end: a fake plugin registers via the hook, ends up in the registry."""
|
||||
|
||||
def test_accepts_valid_provider(self):
|
||||
from hermes_cli.plugins import PluginManager
|
||||
|
||||
from agent import tts_registry
|
||||
tts_registry._reset_for_tests()
|
||||
|
||||
hermes_home = Path(os.environ["HERMES_HOME"])
|
||||
_write_plugin(
|
||||
hermes_home / "plugins",
|
||||
"my-tts-plugin",
|
||||
register_body=(
|
||||
"from agent.tts_provider import TTSProvider\n"
|
||||
" class P(TTSProvider):\n"
|
||||
" @property\n"
|
||||
" def name(self): return 'fake-tts'\n"
|
||||
" def synthesize(self, text, output_path, **kw):\n"
|
||||
" return output_path\n"
|
||||
" ctx.register_tts_provider(P())"
|
||||
),
|
||||
)
|
||||
_enable(hermes_home, "my-tts-plugin")
|
||||
|
||||
mgr = PluginManager()
|
||||
mgr.discover_and_load()
|
||||
|
||||
assert mgr._plugins["my-tts-plugin"].enabled is True, (
|
||||
f"Plugin failed to load: {mgr._plugins['my-tts-plugin'].error}"
|
||||
)
|
||||
assert tts_registry.get_provider("fake-tts") is not None
|
||||
|
||||
tts_registry._reset_for_tests()
|
||||
|
||||
def test_rejects_non_provider(self, caplog):
|
||||
"""A plugin that passes a non-TTSProvider gets a warning, no exception."""
|
||||
from hermes_cli.plugins import PluginManager
|
||||
|
||||
from agent import tts_registry
|
||||
tts_registry._reset_for_tests()
|
||||
|
||||
hermes_home = Path(os.environ["HERMES_HOME"])
|
||||
_write_plugin(
|
||||
hermes_home / "plugins",
|
||||
"bad-tts-plugin",
|
||||
register_body="ctx.register_tts_provider('not a provider')",
|
||||
)
|
||||
_enable(hermes_home, "bad-tts-plugin")
|
||||
|
||||
with caplog.at_level("WARNING"):
|
||||
mgr = PluginManager()
|
||||
mgr.discover_and_load()
|
||||
|
||||
# Plugin loaded (register returned normally), but registry empty.
|
||||
assert mgr._plugins["bad-tts-plugin"].enabled is True
|
||||
assert tts_registry.get_provider("not a provider") is None
|
||||
assert tts_registry.list_providers() == []
|
||||
assert "does not inherit from TTSProvider" in caplog.text
|
||||
|
||||
tts_registry._reset_for_tests()
|
||||
|
||||
def test_rejects_builtin_shadow(self, caplog):
|
||||
"""A plugin trying to register a name colliding with a built-in is silently
|
||||
rejected by the underlying registry — both with a registry-level warning
|
||||
AND with the registry remaining empty (plugin still loads OK).
|
||||
"""
|
||||
from hermes_cli.plugins import PluginManager
|
||||
|
||||
from agent import tts_registry
|
||||
tts_registry._reset_for_tests()
|
||||
|
||||
hermes_home = Path(os.environ["HERMES_HOME"])
|
||||
_write_plugin(
|
||||
hermes_home / "plugins",
|
||||
"shadow-tts-plugin",
|
||||
register_body=(
|
||||
"from agent.tts_provider import TTSProvider\n"
|
||||
" class P(TTSProvider):\n"
|
||||
" @property\n"
|
||||
" def name(self): return 'edge'\n"
|
||||
" def synthesize(self, text, output_path, **kw):\n"
|
||||
" return output_path\n"
|
||||
" ctx.register_tts_provider(P())"
|
||||
),
|
||||
)
|
||||
_enable(hermes_home, "shadow-tts-plugin")
|
||||
|
||||
with caplog.at_level("WARNING"):
|
||||
mgr = PluginManager()
|
||||
mgr.discover_and_load()
|
||||
|
||||
# Plugin still loaded normally — built-in shadowing is a warning,
|
||||
# not an exception. The registry rejects the entry though.
|
||||
assert mgr._plugins["shadow-tts-plugin"].enabled is True
|
||||
assert tts_registry.get_provider("edge") is None
|
||||
assert "shadows a built-in name" in caplog.text
|
||||
|
||||
tts_registry._reset_for_tests()
|
||||
187
tests/hermes_cli/test_tts_picker.py
Normal file
187
tests/hermes_cli/test_tts_picker.py
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
"""Tests for the TTS plugin picker surface in hermes_cli/tools_config.py (issue #30398).
|
||||
|
||||
Covers ``_plugin_tts_providers()`` and the ``_visible_providers()``
|
||||
integration that injects plugin rows into the Text-to-Speech category.
|
||||
|
||||
Mirrors the structure of existing image_gen / browser picker tests.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from agent import tts_registry
|
||||
from agent.tts_provider import TTSProvider
|
||||
from hermes_cli import tools_config
|
||||
|
||||
|
||||
class _FakeTTSProvider(TTSProvider):
|
||||
def __init__(self, name: str, schema: dict | None = None):
|
||||
self._name = name
|
||||
self._schema = schema
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return self._name
|
||||
|
||||
def synthesize(self, text, output_path, **kw):
|
||||
return output_path
|
||||
|
||||
def get_setup_schema(self):
|
||||
if self._schema is not None:
|
||||
return self._schema
|
||||
return super().get_setup_schema()
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_registry():
|
||||
tts_registry._reset_for_tests()
|
||||
yield
|
||||
tts_registry._reset_for_tests()
|
||||
|
||||
|
||||
class TestPluginTTSProviders:
|
||||
"""``_plugin_tts_providers()`` returns picker-row dicts."""
|
||||
|
||||
def test_empty_when_no_plugins(self):
|
||||
assert tools_config._plugin_tts_providers() == []
|
||||
|
||||
def test_returns_row_for_registered_plugin(self):
|
||||
tts_registry.register_provider(
|
||||
_FakeTTSProvider(
|
||||
name="cartesia",
|
||||
schema={
|
||||
"name": "Cartesia",
|
||||
"badge": "paid",
|
||||
"tag": "Ultra-low-latency streaming",
|
||||
"env_vars": [
|
||||
{"key": "CARTESIA_API_KEY", "prompt": "Cartesia API key",
|
||||
"url": "https://play.cartesia.ai/console"},
|
||||
],
|
||||
},
|
||||
)
|
||||
)
|
||||
rows = tools_config._plugin_tts_providers()
|
||||
assert len(rows) == 1
|
||||
row = rows[0]
|
||||
assert row["name"] == "Cartesia"
|
||||
assert row["badge"] == "paid"
|
||||
assert row["tag"] == "Ultra-low-latency streaming"
|
||||
assert row["env_vars"][0]["key"] == "CARTESIA_API_KEY"
|
||||
# Selecting this row writes ``tts.provider: cartesia`` — same
|
||||
# write path as a hardcoded row.
|
||||
assert row["tts_provider"] == "cartesia"
|
||||
assert row["tts_plugin_name"] == "cartesia"
|
||||
|
||||
def test_filters_builtin_shadow_defensively(self):
|
||||
"""Even if a plugin slipped past the registry's built-in check
|
||||
(e.g. via direct ``agent.tts_registry.register_provider`` rather
|
||||
than the ``ctx.register_tts_provider`` hook), the picker layer
|
||||
filters it out so the picker invariant holds."""
|
||||
# Use lower-level call to bypass the warning + skip in
|
||||
# register_provider (the registry's built-in guard).
|
||||
# Note: this is intentionally pathological — production code
|
||||
# paths go through the hook which catches this first.
|
||||
provider = _FakeTTSProvider(name="edge")
|
||||
tts_registry._providers["edge"] = provider # type: ignore[index]
|
||||
try:
|
||||
rows = tools_config._plugin_tts_providers()
|
||||
assert rows == [], (
|
||||
"Picker must filter built-in name shadows even when the "
|
||||
"registry has been bypassed."
|
||||
)
|
||||
finally:
|
||||
tts_registry._providers.pop("edge", None) # type: ignore[arg-type]
|
||||
|
||||
def test_skips_providers_with_no_name(self):
|
||||
"""Defense in depth: a provider with no .name attribute is skipped
|
||||
rather than crashing the picker."""
|
||||
|
||||
class _NoName:
|
||||
display_name = "Bogus"
|
||||
def get_setup_schema(self):
|
||||
return {"name": "Bogus"}
|
||||
|
||||
tts_registry._providers["bogus"] = _NoName() # type: ignore[assignment]
|
||||
try:
|
||||
rows = tools_config._plugin_tts_providers()
|
||||
# Provider has no .name so the picker filters it out
|
||||
assert all(r.get("tts_plugin_name") != "bogus" for r in rows)
|
||||
finally:
|
||||
tts_registry._providers.pop("bogus", None) # type: ignore[arg-type]
|
||||
|
||||
def test_skips_providers_whose_schema_raises(self):
|
||||
class _ExplodingSchema(_FakeTTSProvider):
|
||||
def get_setup_schema(self):
|
||||
raise RuntimeError("boom")
|
||||
|
||||
tts_registry.register_provider(_ExplodingSchema(name="exploding"))
|
||||
tts_registry.register_provider(_FakeTTSProvider(name="working"))
|
||||
rows = tools_config._plugin_tts_providers()
|
||||
assert [r["tts_plugin_name"] for r in rows] == ["working"]
|
||||
|
||||
def test_minimal_schema_uses_display_name(self):
|
||||
"""A provider with no setup_schema override gets a row built from
|
||||
``display_name`` and ``name`` only."""
|
||||
tts_registry.register_provider(_FakeTTSProvider(name="minimal"))
|
||||
rows = tools_config._plugin_tts_providers()
|
||||
assert len(rows) == 1
|
||||
assert rows[0]["name"] == "Minimal" # display_name default
|
||||
assert rows[0]["tts_provider"] == "minimal"
|
||||
assert rows[0]["env_vars"] == []
|
||||
|
||||
def test_post_setup_passthrough(self):
|
||||
tts_registry.register_provider(
|
||||
_FakeTTSProvider(
|
||||
name="my-tts",
|
||||
schema={
|
||||
"name": "My TTS",
|
||||
"post_setup": "my_post_install_hook",
|
||||
"env_vars": [],
|
||||
},
|
||||
)
|
||||
)
|
||||
rows = tools_config._plugin_tts_providers()
|
||||
assert rows[0].get("post_setup") == "my_post_install_hook"
|
||||
|
||||
|
||||
class TestVisibleProvidersInjectsTTSPlugins:
|
||||
"""``_visible_providers()`` injects plugin rows into the Text-to-Speech
|
||||
category alongside the hardcoded built-in rows."""
|
||||
|
||||
def test_tts_category_includes_plugin_rows(self):
|
||||
tts_registry.register_provider(_FakeTTSProvider(name="cartesia"))
|
||||
|
||||
tts_cat = tools_config.TOOL_CATEGORIES["tts"]
|
||||
visible = tools_config._visible_providers(tts_cat, config={})
|
||||
|
||||
names = [row.get("name") for row in visible]
|
||||
# Hardcoded rows (sample — check at least one is present)
|
||||
assert "Microsoft Edge TTS" in names
|
||||
# Plugin row injected at the end
|
||||
assert "Cartesia" in names
|
||||
|
||||
# Plugin row has tts_provider key for write-path compat
|
||||
plugin_rows = [r for r in visible if r.get("tts_plugin_name")]
|
||||
assert len(plugin_rows) == 1
|
||||
assert plugin_rows[0]["tts_provider"] == "cartesia"
|
||||
|
||||
def test_other_categories_unaffected_by_tts_plugins(self):
|
||||
"""Registering a TTS plugin must not leak into the Image Generation
|
||||
or Browser pickers."""
|
||||
tts_registry.register_provider(_FakeTTSProvider(name="cartesia"))
|
||||
|
||||
img_cat = tools_config.TOOL_CATEGORIES["image_gen"]
|
||||
visible = tools_config._visible_providers(img_cat, config={})
|
||||
names = [row.get("name") for row in visible]
|
||||
assert "Cartesia" not in names
|
||||
|
||||
def test_tts_category_without_plugins_only_hardcoded(self):
|
||||
"""No plugins → picker shows exactly the hardcoded rows."""
|
||||
tts_cat = tools_config.TOOL_CATEGORIES["tts"]
|
||||
visible = tools_config._visible_providers(tts_cat, config={})
|
||||
names = [row.get("name") for row in visible]
|
||||
# No row has the plugin marker
|
||||
assert all(not row.get("tts_plugin_name") for row in visible)
|
||||
# Hardcoded rows still present (sample one of the always-visible ones)
|
||||
assert "Microsoft Edge TTS" in names
|
||||
Loading…
Add table
Add a link
Reference in a new issue