mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-08 08:11:38 +00:00
feat(stt): add register_transcription_provider() plugin hook
Add an opt-in Python plugin surface for speech-to-text backends,
mirroring the TTS hook pattern. New backends (OpenRouter, SenseAudio,
Gemini-STT, custom proprietary engines) can be implemented as plugins
without modifying tools/transcription_tools.py.
Built-ins always win
--------------------
The 6 built-in STT providers (local/faster-whisper, local_command,
groq, openai, mistral, xai) keep their native handlers. Plugins
attempting to register under a built-in name are rejected at
registration time with a warning and re-checked defensively at
dispatch.
Resolution order
----------------
1. stt.provider matches a built-in → built-in dispatch (unchanged)
2. stt.provider matches a registered plugin →
a. if plugin.is_available() returns False → unavailability envelope
identifying the plugin (not the generic "No STT provider"
message — the user explicitly opted into this plugin)
b. otherwise plugin.transcribe() with model + language forwarded
from stt.<provider>.{model,language} config
3. No match → legacy "No STT provider available" error (unchanged)
Per-provider config namespace
-----------------------------
Plugins read their config from stt.<provider> in config.yaml, mirroring
how built-ins read stt.openai.model / stt.mistral.model. The dispatcher
forwards `model` and `language` from this section. Caller's explicit
`model=` argument overrides the config-set model.
Files
-----
- agent/transcription_provider.py: TranscriptionProvider ABC
- agent/transcription_registry.py: register/get/list providers,
built-in shadow guard, _reset_for_tests
- hermes_cli/plugins.py: register_transcription_provider() on
PluginContext
- tools/transcription_tools.py: BUILTIN_STT_PROVIDERS frozenset,
_dispatch_to_plugin_provider() with availability gate, wire-in
after xai branch and before "No STT provider" error
- tests/agent/test_transcription_registry.py: 27 tests
- tests/hermes_cli/test_plugins_transcription_registration.py: 3 tests
- tests/tools/test_transcription_plugin_dispatch.py: 28 tests
(covering built-in short-circuit, plugin dispatch, exception
envelope, non-dict guard, availability gate, language forwarding)
- tests/plugins/transcription/check_parity_vs_main.py: 10-scenario
subprocess-pinned parity harness vs origin/main
- website/docs/user-guide/features/{tts,plugins}.md: docs
Behavior parity
---------------
10 scenarios, 8 OK + 2 expected DIFFs:
no_provider_error → plugin (plugin-installed scenario)
no_provider_error → plugin_unavailable (plugin-installed-unavailable
scenario; PR returns cleaner envelope)
Zero behavior change for users not opting into a plugin.
Issue follow-up to #30398.
This commit is contained in:
parent
2e0ac31a72
commit
2cd952e110
11 changed files with 1831 additions and 1 deletions
0
tests/plugins/transcription/__init__.py
Normal file
0
tests/plugins/transcription/__init__.py
Normal file
344
tests/plugins/transcription/check_parity_vs_main.py
Normal file
344
tests/plugins/transcription/check_parity_vs_main.py
Normal file
|
|
@ -0,0 +1,344 @@
|
|||
"""Behavior-parity check for the STT plugin hook (follow-up to #30398).
|
||||
|
||||
Spawns one subprocess per (version, scenario) cell — pinned to either
|
||||
``origin/main`` (no plugin hook; ``stt.provider: openrouter`` falls
|
||||
through to the "No STT provider available" error path) or this PR's
|
||||
worktree (plugin hook present; same config routes through the plugin
|
||||
registry when a plugin is registered).
|
||||
|
||||
Each subprocess clears all STT-related env vars + writes a
|
||||
``config.yaml``, then asks the dispatcher how it would route a
|
||||
``transcribe_audio`` call. The emitted shape tuple is::
|
||||
|
||||
{dispatch_kind, provider_name, success}
|
||||
|
||||
Where ``dispatch_kind`` ∈
|
||||
``{"builtin_local", "builtin_groq", "builtin_openai", ...,
|
||||
"plugin", "plugin_unavailable", "no_provider_error", "stt_disabled"}``.
|
||||
|
||||
Acceptable diffs:
|
||||
- ``no_provider_error → plugin`` for the ``plugin-installed`` scenario.
|
||||
- ``no_provider_error → plugin_unavailable`` for the
|
||||
``plugin-installed-unavailable`` scenario (PR returns the cleaner
|
||||
unavailability envelope instead of the generic auto-detect error).
|
||||
|
||||
Run from the PR worktree::
|
||||
|
||||
python tests/plugins/transcription/check_parity_vs_main.py
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[3]
|
||||
|
||||
|
||||
def _resolve_main_dir() -> Path:
|
||||
candidate = REPO_ROOT.parent.parent
|
||||
if (candidate / "tools" / "transcription_tools.py").exists() and candidate != REPO_ROOT:
|
||||
return candidate
|
||||
sibling = REPO_ROOT.parent / "hermes-agent-main"
|
||||
if (sibling / "tools" / "transcription_tools.py").exists():
|
||||
return sibling
|
||||
return REPO_ROOT
|
||||
|
||||
|
||||
MAIN_DIR = _resolve_main_dir()
|
||||
PR_DIR = REPO_ROOT
|
||||
assert (PR_DIR / "tools" / "transcription_tools.py").exists(), (
|
||||
f"PR_DIR={PR_DIR} doesn't look like a hermes-agent checkout"
|
||||
)
|
||||
|
||||
|
||||
SUBPROCESS_SCRIPT = r"""
|
||||
import json, os, sys, tempfile
|
||||
sys.path.insert(0, sys.argv[1])
|
||||
|
||||
# Isolated HERMES_HOME so the config write is hermetic.
|
||||
home = tempfile.mkdtemp()
|
||||
os.environ["HERMES_HOME"] = home
|
||||
|
||||
# Clear STT-related env so dispatch decisions are config-driven.
|
||||
for k in (
|
||||
"GROQ_API_KEY", "OPENAI_API_KEY", "VOICE_TOOLS_OPENAI_KEY",
|
||||
"MISTRAL_API_KEY", "XAI_API_KEY",
|
||||
"HERMES_LOCAL_STT_COMMAND",
|
||||
):
|
||||
os.environ.pop(k, None)
|
||||
|
||||
scenario_env = json.loads(sys.argv[2])
|
||||
os.environ.update(scenario_env)
|
||||
|
||||
config_yaml = sys.argv[3]
|
||||
plugin_register = sys.argv[4] # "yes" to register a fake plugin
|
||||
|
||||
config_path = os.path.join(home, "config.yaml")
|
||||
with open(config_path, "w") as f:
|
||||
f.write(config_yaml)
|
||||
|
||||
# Fresh import — must not have anything cached from prior runs.
|
||||
for name in list(sys.modules):
|
||||
if (name.startswith("tools.")
|
||||
or name.startswith("agent.")
|
||||
or name.startswith("plugins.")
|
||||
or name.startswith("hermes_cli.")):
|
||||
sys.modules.pop(name, None)
|
||||
|
||||
# Try importing transcription_registry — only exists on PR side.
|
||||
have_plugin_hook = False
|
||||
try:
|
||||
from agent import transcription_registry
|
||||
from agent.transcription_provider import TranscriptionProvider
|
||||
have_plugin_hook = True
|
||||
|
||||
if plugin_register == "yes":
|
||||
class _FakeProvider(TranscriptionProvider):
|
||||
@property
|
||||
def name(self): return "openrouter"
|
||||
def transcribe(self, file_path, **kw):
|
||||
return {"success": True, "transcript": "plugin transcript", "provider": "openrouter"}
|
||||
|
||||
transcription_registry._reset_for_tests()
|
||||
transcription_registry.register_provider(_FakeProvider())
|
||||
elif plugin_register == "unavailable":
|
||||
class _UnavailablePlugin(TranscriptionProvider):
|
||||
@property
|
||||
def name(self): return "openrouter"
|
||||
def is_available(self): return False
|
||||
def transcribe(self, file_path, **kw):
|
||||
return {"success": True, "transcript": "should not run"}
|
||||
|
||||
transcription_registry._reset_for_tests()
|
||||
transcription_registry.register_provider(_UnavailablePlugin())
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
import tools.transcription_tools as tt
|
||||
|
||||
# Use a real (but empty) audio file so _validate_audio_file passes.
|
||||
audio_path = os.path.join(home, "audio.ogg")
|
||||
with open(audio_path, "wb") as f:
|
||||
# Minimal-ish OGG-shaped bytes so the size check passes.
|
||||
f.write(b"OggS" + b"\x00" * 1024)
|
||||
|
||||
# Patch _transcribe_* so the test doesn't actually try cloud APIs.
|
||||
# We're testing dispatch, not the underlying transcription.
|
||||
def _stub(file_path, model_name=None):
|
||||
return {"success": True, "transcript": "stub from " + sys._getframe().f_code.co_name.replace("_stub_", ""),
|
||||
"provider": sys._getframe().f_code.co_name.replace("_stub_", "")}
|
||||
|
||||
# Stub each built-in to a marker so we can identify the branch.
|
||||
class _Stub:
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
def __call__(self, file_path, model_name=None):
|
||||
return {"success": True, "transcript": "stub", "provider": self.name}
|
||||
|
||||
tt._transcribe_local = _Stub("local")
|
||||
tt._transcribe_local_command = _Stub("local_command")
|
||||
tt._transcribe_groq = _Stub("groq")
|
||||
tt._transcribe_openai = _Stub("openai")
|
||||
tt._transcribe_mistral = _Stub("mistral")
|
||||
tt._transcribe_xai = _Stub("xai")
|
||||
|
||||
# Force _get_provider to honor the explicit config since we don't have
|
||||
# real creds. The provider-resolution gates check _HAS_OPENAI /
|
||||
# _HAS_FASTER_WHISPER which we can't easily set, so we just patch
|
||||
# _get_provider to return whatever the config says.
|
||||
stt_cfg = tt._load_stt_config()
|
||||
explicit = stt_cfg.get("provider")
|
||||
if explicit:
|
||||
# Bypass the gating for test purposes — _get_provider would
|
||||
# otherwise return "none" when the dependency isn't installed.
|
||||
original_get = tt._get_provider
|
||||
def _patched(cfg):
|
||||
if not tt.is_stt_enabled(cfg):
|
||||
return "none"
|
||||
return cfg.get("provider", "none")
|
||||
tt._get_provider = _patched
|
||||
|
||||
try:
|
||||
result = tt.transcribe_audio(audio_path)
|
||||
except Exception as exc:
|
||||
shape = {"dispatch_kind": "exception", "provider_name": None, "success": False,
|
||||
"error_text": repr(exc)}
|
||||
print(json.dumps(shape))
|
||||
sys.exit(0)
|
||||
|
||||
dispatch_kind = "unknown"
|
||||
provider_name = result.get("provider") if isinstance(result, dict) else None
|
||||
success = result.get("success", False) if isinstance(result, dict) else False
|
||||
error_text = result.get("error", "") if isinstance(result, dict) else ""
|
||||
|
||||
if not success and "STT is disabled" in error_text:
|
||||
dispatch_kind = "stt_disabled"
|
||||
elif not success and "is not available" in error_text:
|
||||
dispatch_kind = "plugin_unavailable"
|
||||
elif not success and "No STT provider" in error_text:
|
||||
dispatch_kind = "no_provider_error"
|
||||
elif provider_name in ("local", "local_command", "groq", "openai", "mistral", "xai"):
|
||||
dispatch_kind = "builtin_" + provider_name
|
||||
elif success and provider_name and provider_name not in ("local", "local_command", "groq", "openai", "mistral", "xai"):
|
||||
dispatch_kind = "plugin"
|
||||
else:
|
||||
dispatch_kind = "other"
|
||||
|
||||
shape = {
|
||||
"dispatch_kind": dispatch_kind,
|
||||
"provider_name": provider_name,
|
||||
"success": success,
|
||||
}
|
||||
print(json.dumps(shape))
|
||||
"""
|
||||
|
||||
|
||||
SCENARIOS: list[tuple[str, str, dict[str, str], str]] = [
|
||||
# (label, config.yaml body, scenario_env, plugin_register)
|
||||
("stt-disabled", "stt:\n enabled: false\n", {}, "no"),
|
||||
("explicit-groq", "stt:\n provider: groq\n", {}, "no"),
|
||||
("explicit-openai", "stt:\n provider: openai\n", {}, "no"),
|
||||
("explicit-local", "stt:\n provider: local\n", {}, "no"),
|
||||
("explicit-xai", "stt:\n provider: xai\n", {}, "no"),
|
||||
# Mistral is quarantined → _get_provider returns "none" today, hence no_provider_error.
|
||||
("explicit-mistral-quarantine", "stt:\n provider: mistral\n", {}, "no"),
|
||||
# Unknown name + no plugin → both: no_provider_error
|
||||
("unknown-no-plugin", "stt:\n provider: openrouter\n", {}, "no"),
|
||||
# Unknown name + plugin installed → main: no_provider_error, PR: plugin
|
||||
("plugin-installed", "stt:\n provider: openrouter\n", {}, "yes"),
|
||||
# Unknown name + plugin reports unavailable → main: no_provider_error,
|
||||
# PR: plugin_unavailable (cleaner envelope, names the plugin)
|
||||
("plugin-installed-unavailable", "stt:\n provider: openrouter\n", {}, "unavailable"),
|
||||
# Built-in name + plugin tries to shadow → both: built-in
|
||||
("explicit-openai-with-plugin-registered", "stt:\n provider: openai\n", {}, "yes"),
|
||||
]
|
||||
|
||||
|
||||
def _run_scenario(repo_path: Path, label: str, config_yaml: str, env: dict, plugin_register: str) -> dict:
|
||||
venv_python = repo_path / ".venv" / "bin" / "python"
|
||||
if not venv_python.exists():
|
||||
venv_python = MAIN_DIR / ".venv" / "bin" / "python"
|
||||
if not venv_python.exists():
|
||||
venv_python = MAIN_DIR / "venv" / "bin" / "python"
|
||||
if not venv_python.exists():
|
||||
venv_python = Path("python3")
|
||||
|
||||
out = subprocess.run(
|
||||
[
|
||||
str(venv_python),
|
||||
"-c",
|
||||
SUBPROCESS_SCRIPT,
|
||||
str(repo_path),
|
||||
json.dumps(env),
|
||||
config_yaml,
|
||||
plugin_register,
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60,
|
||||
)
|
||||
if out.returncode != 0:
|
||||
return {
|
||||
"error": "subprocess failed",
|
||||
"stdout": out.stdout[-500:],
|
||||
"stderr": out.stderr[-500:],
|
||||
}
|
||||
try:
|
||||
return json.loads(out.stdout.strip().splitlines()[-1])
|
||||
except Exception as exc:
|
||||
return {"error": f"could not parse output: {exc}", "stdout": out.stdout}
|
||||
|
||||
|
||||
def _reduce(shape: dict) -> dict:
|
||||
return {
|
||||
"dispatch_kind": shape.get("dispatch_kind"),
|
||||
"success": shape.get("success"),
|
||||
}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
print(f"main: {MAIN_DIR}")
|
||||
print(f"pr: {PR_DIR}")
|
||||
print()
|
||||
|
||||
if MAIN_DIR == PR_DIR:
|
||||
print(
|
||||
"WARN: MAIN_DIR == PR_DIR — diffs will be trivially identical.\n"
|
||||
" Set up a sibling 'hermes-agent-main' checkout pinned to "
|
||||
"origin/main to get real parity coverage."
|
||||
)
|
||||
print()
|
||||
|
||||
failures: list[str] = []
|
||||
errors: list[str] = []
|
||||
intentional_diffs: list[tuple[str, dict, dict]] = []
|
||||
for label, config_yaml, env, plugin_register in SCENARIOS:
|
||||
main_shape = _run_scenario(MAIN_DIR, label, config_yaml, env, plugin_register)
|
||||
pr_shape = _run_scenario(PR_DIR, label, config_yaml, env, plugin_register)
|
||||
|
||||
if "error" in main_shape or "error" in pr_shape:
|
||||
print(f" [ERR ] {label}: subprocess failed")
|
||||
print(f" main: {main_shape}")
|
||||
print(f" pr: {pr_shape}")
|
||||
errors.append(label)
|
||||
continue
|
||||
|
||||
main_reduced = _reduce(main_shape)
|
||||
pr_reduced = _reduce(pr_shape)
|
||||
|
||||
if main_reduced == pr_reduced:
|
||||
print(f" [OK] {label}: {main_reduced}")
|
||||
continue
|
||||
|
||||
# On main, "plugin-installed" returns no_provider_error (no
|
||||
# plugin hook); on PR, plugin dispatches. Same shape for
|
||||
# "plugin-installed-unavailable" but PR returns the cleaner
|
||||
# plugin_unavailable envelope. Both diffs are expected.
|
||||
no_provider_to_plugin = (
|
||||
main_reduced.get("dispatch_kind") == "no_provider_error"
|
||||
and pr_reduced.get("dispatch_kind") == "plugin"
|
||||
and label == "plugin-installed"
|
||||
)
|
||||
no_provider_to_unavailable = (
|
||||
main_reduced.get("dispatch_kind") == "no_provider_error"
|
||||
and pr_reduced.get("dispatch_kind") == "plugin_unavailable"
|
||||
and label == "plugin-installed-unavailable"
|
||||
)
|
||||
if no_provider_to_plugin:
|
||||
print(f" [DIFF] {label}: no_provider_error → plugin — expected")
|
||||
intentional_diffs.append((label, main_reduced, pr_reduced))
|
||||
elif no_provider_to_unavailable:
|
||||
print(f" [DIFF] {label}: no_provider_error → plugin_unavailable — expected")
|
||||
intentional_diffs.append((label, main_reduced, pr_reduced))
|
||||
else:
|
||||
print(f" [FAIL] {label}")
|
||||
print(f" main: {main_reduced}")
|
||||
print(f" pr: {pr_reduced}")
|
||||
failures.append(label)
|
||||
|
||||
print()
|
||||
if errors:
|
||||
print(f"SUBPROCESS ERRORS in {len(errors)} scenario(s):")
|
||||
for e in errors:
|
||||
print(f" - {e}")
|
||||
if failures:
|
||||
print(f"BEHAVIOUR REGRESSION in {len(failures)} scenario(s):")
|
||||
for f in failures:
|
||||
print(f" - {f}")
|
||||
if intentional_diffs:
|
||||
print(
|
||||
f"INTENTIONAL DIFFS ({len(intentional_diffs)}): "
|
||||
f"no_provider_error → plugin dispatch when a plugin is registered."
|
||||
)
|
||||
if failures or errors:
|
||||
return 1
|
||||
print(f"PARITY OK across {len(SCENARIOS)} scenarios.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Loading…
Add table
Add a link
Reference in a new issue