fix(config): surface invalid platform_toolsets instead of silently dropping tools (#38798)

A config migration (or hand-edit) that leaves an invalid toolset name in `platform_toolsets` — e.g. the #38798 corruption that rewrote `hermes-cli` to the non-existent `hermes` — silently disabled all affected tools: resolve_toolset() returns [] for an unknown name, so the agent quietly lost its tools with no error, warning, or log entry and degraded to text-only replies. Surface it loudly at two points: - After migration (migrate_config): validate platform_toolsets and record/print a warning per unknown name, with a `hermes-<platform>` suggestion when that would have been valid (the exact #38798 shape). - At runtime (_get_platform_tools): if a platform was explicitly configured but every toolset name is invalid, log a warning when tools are resolved for a session — so an ALREADY-corrupted config is caught at startup, not only on the next `hermes update`. Logic lives in a new pure, side-effect-free helper (toolset_validation.py) with validate_toolset injected, so it is unit-testable without the tool registry. Note: the original v25→v26 migration that caused the corruption no longer exists (config format is now v30; no migration step rewrites toolset names). This change is the durable defense against the silent-failure mode regardless of cause, matching the issue's "Expected: log a warning". Salvaged from #39207 by @lEWFkRAD (authorship preserved via cherry-pick). Tests: 9 helper cases (incl. the #38798 corruption shape, mixed valid/invalid, zero-tools state, non-dict/scalar/non-string) + a runtime caplog test — both the helper warning and the runtime guard mutation-verified to fail without the fix. Closes #38798. Supersedes #39581 (prevent-in-v25→v26 — that path is gone), #41006 / #40208 (repair-migration for already-corrupted configs).
2026-06-27 11:22:03 +00:00 · 2026-06-26 12:17:52 +05:30 · 2026-06-26 12:17:52 +05:30 · 41ede84b93
commit 41ede84b93
parent 0f81b0d458
5 changed files with 272 additions and 1 deletions
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -5308,9 +5308,29 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
                config["mcp_servers"] = raw_mcp_servers
                save_config(config)

+    # ── Always: validate platform_toolsets after migration ──
+    # A migration (or hand-edit) that leaves an invalid toolset name in
+    # platform_toolsets silently disables the affected tools — resolve_toolset()
+    # returns [] for an unknown name, so the agent quietly loses tools with no
+    # error or warning. Surface it loudly instead. See #38798.
+    try:
+        from toolsets import validate_toolset
+        from hermes_cli.toolset_validation import validate_platform_toolsets
+
+        ts_warnings = validate_platform_toolsets(
+            read_raw_config().get("platform_toolsets"), validate_toolset
+        )
+        for w in ts_warnings:
+            results["warnings"].append(w)
+            if not quiet:
+                print(f"  ⚠ {w}")
+    except Exception as _ts_val_err:
+        # best-effort; never block migration on validation
+        logger.debug("platform_toolsets validation skipped: %s", _ts_val_err)
+
    if current_ver < latest_ver and not quiet:
        print(f"Config version: {current_ver} → {latest_ver}")
-    
+
    # Check for missing required env vars
    missing_env = get_missing_env_vars(required_only=True)
    
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@ -34,6 +34,11 @@ from utils import base_url_hostname, is_truthy_value

 logger = logging.getLogger(__name__)

+# Platforms already warned about an all-invalid platform_toolsets list, so the
+# runtime check in _get_platform_tools warns once per platform instead of on
+# every tool resolution for a persistently-corrupt config (#38798).
+_warned_invalid_platform_toolsets: Set[str] = set()
+
 PROJECT_ROOT = Path(__file__).parent.parent.resolve()


@ -1611,6 +1616,31 @@ def _get_platform_tools(
        disabled_set = {str(ts) for ts in disabled_toolsets}
        enabled_toolsets -= disabled_set

+    # #38798: if this platform was explicitly configured but every toolset name
+    # is invalid (e.g. a migration or hand-edit left `hermes` instead of
+    # `hermes-cli`), resolve_toolset() returns [] for each and the platform ends
+    # up with no native tools — silently, with no error. Surface it at the point
+    # tools are resolved for a session so an already-corrupted config is caught
+    # at runtime, not only during the next `hermes update`/`hermes doctor`.
+    _explicit = platform_toolsets.get(platform)
+    if isinstance(_explicit, list) and _explicit:
+        from toolsets import validate_toolset
+
+        _named = [str(t) for t in _explicit if isinstance(t, str) and t]
+        if (
+            _named
+            and not any(validate_toolset(t) for t in _named)
+            and platform not in _warned_invalid_platform_toolsets
+        ):
+            _warned_invalid_platform_toolsets.add(platform)
+            logger.warning(
+                "platform '%s' has no valid toolsets configured (unknown "
+                "name(s): %s) - tools will be unavailable. Run `hermes tools` "
+                "to reconfigure. See issue #38798.",
+                platform,
+                ", ".join(_named),
+            )
+
    return enabled_toolsets


--- a/hermes_cli/toolset_validation.py
+++ b/hermes_cli/toolset_validation.py
@ -0,0 +1,74 @@
+"""Validation for the ``platform_toolsets`` config section.
+
+Pure, side-effect-free helpers so the logic is unit-testable without importing
+the tool registry or launching Hermes (mirrors the decoupled-helper pattern used
+elsewhere in the CLI).
+
+Motivated by #38798: a config migration silently rewrote the valid toolset name
+``hermes-cli`` to the non-existent ``hermes``. ``resolve_toolset('hermes')``
+returns an empty list, so every tool silently disappeared with no error, warning,
+or log entry — the agent degraded to text-only replies and the cause took
+significant debugging to find. Surfacing invalid toolset names (and the
+zero-tools end state) loudly turns that silent failure into an actionable one.
+"""
+
+from typing import Callable, Dict, List
+
+
+def validate_platform_toolsets(
+    platform_toolsets: object,
+    is_valid_toolset: Callable[[str], bool],
+) -> List[str]:
+    """Return human-readable warnings for a ``platform_toolsets`` mapping.
+
+    Two failure modes are reported:
+
+    1. A toolset name that ``is_valid_toolset`` rejects — usually a corrupted or
+       renamed entry. When ``hermes-<platform>`` would have been valid (the exact
+       #38798 shape, where ``cli`` held ``hermes`` instead of ``hermes-cli``),
+       the warning includes that as a suggestion.
+    2. The mapping is non-empty but resolves to *zero* valid toolsets, so the
+       agent would start with no tools at all.
+
+    ``is_valid_toolset`` is injected (normally :func:`toolsets.validate_toolset`)
+    so this function performs no imports or I/O and is testable in isolation.
+
+    Args:
+        platform_toolsets: The raw ``platform_toolsets`` value from config. Only
+            ``dict`` values carry toolset entries; anything else yields no
+            warnings (nothing to validate).
+        is_valid_toolset: Predicate returning ``True`` for a known toolset name.
+
+    Returns:
+        A list of warning strings (empty when everything is valid).
+    """
+    warnings: List[str] = []
+    if not isinstance(platform_toolsets, dict) or not platform_toolsets:
+        return warnings
+
+    valid_count = 0
+    for platform, raw in platform_toolsets.items():
+        names = raw if isinstance(raw, list) else [raw]
+        for name in names:
+            if not isinstance(name, str) or not name:
+                continue
+            if is_valid_toolset(name):
+                valid_count += 1
+                continue
+            suggestion = f"hermes-{platform}"
+            hint = (
+                f" — did you mean '{suggestion}'?"
+                if is_valid_toolset(suggestion)
+                else ""
+            )
+            warnings.append(
+                f"platform '{platform}' references unknown toolset "
+                f"'{name}'{hint}"
+            )
+
+    if valid_count == 0:
+        warnings.append(
+            "platform_toolsets resolves to zero valid toolsets — the agent will "
+            "have no tools. Run `hermes tools` to reconfigure."
+        )
+    return warnings
--- a/tests/hermes_cli/test_tools_config.py
+++ b/tests/hermes_cli/test_tools_config.py
@ -1,5 +1,6 @@
 """Tests for hermes_cli.tools_config platform tool persistence."""

+import logging
 from types import SimpleNamespace
 from unittest.mock import patch

@ -58,6 +59,61 @@ def test_agent_disabled_toolsets_with_explicit_platform_config():
    assert "terminal" in enabled


+def test_all_invalid_platform_toolsets_logs_runtime_warning(caplog):
+    """#38798: an explicit platform config whose toolset names are all invalid
+    (e.g. 'hermes' instead of 'hermes-cli') must warn at resolve time so an
+    already-corrupted config is caught at runtime, not just during migration."""
+    import hermes_cli.tools_config as _tc
+    # The runtime warning fires once per platform per process; clear the guard
+    # so this test is deterministic regardless of prior resolutions.
+    _tc._warned_invalid_platform_toolsets.discard("cli")
+    config = {"platform_toolsets": {"cli": ["hermes"]}}
+
+    with caplog.at_level(logging.WARNING, logger="hermes_cli.tools_config"):
+        _get_platform_tools(config, "cli")
+
+    warnings = [r.getMessage() for r in caplog.records if r.levelno >= logging.WARNING]
+    assert any("#38798" in m and "hermes" in m for m in warnings), warnings
+
+
+def test_invalid_platform_toolsets_runtime_warning_fires_once(caplog):
+    """The runtime warning is deduped per platform — a persistently-corrupt
+    config must not spam an identical warning on every tool resolution."""
+    import hermes_cli.tools_config as _tc
+    _tc._warned_invalid_platform_toolsets.discard("cli")
+    config = {"platform_toolsets": {"cli": ["hermes"]}}
+
+    with caplog.at_level(logging.WARNING, logger="hermes_cli.tools_config"):
+        _get_platform_tools(config, "cli")
+        _get_platform_tools(config, "cli")
+        _get_platform_tools(config, "cli")
+
+    hits = [r for r in caplog.records if "#38798" in r.getMessage()]
+    assert len(hits) == 1, f"expected exactly one warning, got {len(hits)}"
+
+
+def test_valid_platform_toolsets_no_runtime_warning(caplog):
+    """A correctly-configured platform must not emit the #38798 warning."""
+    config = {"platform_toolsets": {"cli": ["hermes-cli"]}}
+
+    with caplog.at_level(logging.WARNING, logger="hermes_cli.tools_config"):
+        _get_platform_tools(config, "cli")
+
+    assert not any("#38798" in r.getMessage() for r in caplog.records)
+
+
+def test_partially_valid_platform_toolsets_no_runtime_warning(caplog):
+    """When at least one configured toolset is valid, tools still resolve, so
+    the runtime zero-tools warning must not fire (the migration-time check still
+    flags the individual bad name)."""
+    config = {"platform_toolsets": {"cli": ["hermes-cli", "bogus"]}}
+
+    with caplog.at_level(logging.WARNING, logger="hermes_cli.tools_config"):
+        _get_platform_tools(config, "cli")
+
+    assert not any("#38798" in r.getMessage() for r in caplog.records)
+
+
 def test_agent_disabled_toolsets_empty_list_is_noop():
    """Empty or missing disabled_toolsets should not change behavior."""
    config_empty = {"agent": {"disabled_toolsets": []}}
--- a/tests/hermes_cli/test_toolset_validation.py
+++ b/tests/hermes_cli/test_toolset_validation.py
@ -0,0 +1,91 @@
+"""Unit tests for hermes_cli.toolset_validation (see #38798).
+
+Pure logic — the validity predicate is injected, so these tests need neither the
+tool registry nor a running Hermes.
+"""
+
+import pytest
+
+from hermes_cli.toolset_validation import validate_platform_toolsets
+
+# A representative set of real toolset names. `hermes` is deliberately absent —
+# that is the corruption #38798 reported (`hermes-cli` rewritten to `hermes`).
+_KNOWN = {
+    "hermes-cli",
+    "hermes-telegram",
+    "hermes-discord",
+    "terminal",
+    "web",
+}
+
+
+def _is_valid(name):
+    return name in _KNOWN
+
+
+def test_valid_config_produces_no_warnings():
+    cfg = {"cli": ["hermes-cli"], "telegram": ["hermes-telegram"]}
+    assert validate_platform_toolsets(cfg, _is_valid) == []
+
+
+def test_38798_corruption_warns_and_suggests_correct_name():
+    # The exact reported shape: cli holds 'hermes' instead of 'hermes-cli'.
+    warnings = validate_platform_toolsets({"cli": ["hermes"]}, _is_valid)
+    unknown = [w for w in warnings if "unknown toolset 'hermes'" in w]
+    assert len(unknown) == 1
+    # Actionable: points at the valid name the entry should have been.
+    assert "did you mean 'hermes-cli'?" in unknown[0]
+    # And the zero-valid-toolsets safety net fires.
+    assert any("zero valid toolsets" in w for w in warnings)
+
+
+def test_mixed_valid_and_invalid_flags_only_the_invalid():
+    cfg = {"cli": ["hermes-cli"], "discord": ["bogus"]}
+    warnings = validate_platform_toolsets(cfg, _is_valid)
+    # One valid entry exists, so no zero-valid warning.
+    assert not any("zero valid toolsets" in w for w in warnings)
+    assert len(warnings) == 1
+    assert "platform 'discord'" in warnings[0]
+    assert "unknown toolset 'bogus'" in warnings[0]
+
+
+def test_unknown_without_valid_platform_default_omits_suggestion():
+    # hermes-mystery is not a known toolset, so no "did you mean" hint.
+    warnings = validate_platform_toolsets({"mystery": ["nope"]}, _is_valid)
+    unknown = [w for w in warnings if "unknown toolset 'nope'" in w]
+    assert len(unknown) == 1
+    assert "did you mean" not in unknown[0]
+
+
+@pytest.mark.parametrize("value", [None, {}, [], "hermes-cli", 42])
+def test_non_dict_or_empty_yields_no_warnings(value):
+    assert validate_platform_toolsets(value, _is_valid) == []
+
+
+def test_scalar_toolset_value_is_accepted():
+    # Some configs store the toolset as a bare string rather than a list.
+    assert validate_platform_toolsets({"cli": "hermes-cli"}, _is_valid) == []
+
+
+def test_non_string_entries_are_skipped_not_counted_invalid():
+    cfg = {"cli": [None, 123, "hermes-cli"]}
+    # The junk entries are ignored; the valid one keeps it from being "zero".
+    assert validate_platform_toolsets(cfg, _is_valid) == []
+
+
+def test_all_invalid_reports_each_and_the_zero_state():
+    cfg = {"cli": ["hermes"], "discord": ["hermes"]}
+    warnings = validate_platform_toolsets(cfg, _is_valid)
+    assert sum("unknown toolset" in w for w in warnings) == 2
+    assert any("zero valid toolsets" in w for w in warnings)
+
+
+def test_real_validate_toolset_treats_hermes_cli_valid_and_hermes_invalid():
+    # Ties the helper to reality: the canonical registry check agrees that
+    # `hermes-cli` is the real toolset and `hermes` is not (the #38798 crux).
+    from toolsets import validate_toolset
+
+    assert validate_toolset("hermes-cli") is True
+    assert validate_toolset("hermes") is False
+    warnings = validate_platform_toolsets({"cli": ["hermes"]}, validate_toolset)
+    assert any("did you mean 'hermes-cli'?" in w for w in warnings)