fix(config): surface invalid platform_toolsets instead of silently dropping tools (#38798)

A config migration (or hand-edit) that leaves an invalid toolset name in
`platform_toolsets` — e.g. the #38798 corruption that rewrote `hermes-cli` to
the non-existent `hermes` — silently disabled all affected tools:
resolve_toolset() returns [] for an unknown name, so the agent quietly lost its
tools with no error, warning, or log entry and degraded to text-only replies.

Surface it loudly at two points:
- After migration (migrate_config): validate platform_toolsets and record/print
  a warning per unknown name, with a `hermes-<platform>` suggestion when that
  would have been valid (the exact #38798 shape).
- At runtime (_get_platform_tools): if a platform was explicitly configured but
  every toolset name is invalid, log a warning when tools are resolved for a
  session — so an ALREADY-corrupted config is caught at startup, not only on the
  next `hermes update`.

Logic lives in a new pure, side-effect-free helper (toolset_validation.py) with
validate_toolset injected, so it is unit-testable without the tool registry.

Note: the original v25→v26 migration that caused the corruption no longer
exists (config format is now v30; no migration step rewrites toolset names).
This change is the durable defense against the silent-failure mode regardless
of cause, matching the issue's "Expected: log a warning".

Salvaged from #39207 by @lEWFkRAD (authorship preserved via cherry-pick).
Tests: 9 helper cases (incl. the #38798 corruption shape, mixed valid/invalid,
zero-tools state, non-dict/scalar/non-string) + a runtime caplog test — both the
helper warning and the runtime guard mutation-verified to fail without the fix.

Closes #38798. Supersedes #39581 (prevent-in-v25→v26 — that path is gone),
#41006 / #40208 (repair-migration for already-corrupted configs).
This commit is contained in:
lEWFkRAD 2026-06-26 12:17:52 +05:30 committed by kshitijk4poor
parent 0f81b0d458
commit 41ede84b93
5 changed files with 272 additions and 1 deletions

View file

@ -5308,9 +5308,29 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
config["mcp_servers"] = raw_mcp_servers
save_config(config)
# ── Always: validate platform_toolsets after migration ──
# A migration (or hand-edit) that leaves an invalid toolset name in
# platform_toolsets silently disables the affected tools — resolve_toolset()
# returns [] for an unknown name, so the agent quietly loses tools with no
# error or warning. Surface it loudly instead. See #38798.
try:
from toolsets import validate_toolset
from hermes_cli.toolset_validation import validate_platform_toolsets
ts_warnings = validate_platform_toolsets(
read_raw_config().get("platform_toolsets"), validate_toolset
)
for w in ts_warnings:
results["warnings"].append(w)
if not quiet:
print(f"{w}")
except Exception as _ts_val_err:
# best-effort; never block migration on validation
logger.debug("platform_toolsets validation skipped: %s", _ts_val_err)
if current_ver < latest_ver and not quiet:
print(f"Config version: {current_ver}{latest_ver}")
# Check for missing required env vars
missing_env = get_missing_env_vars(required_only=True)

View file

@ -34,6 +34,11 @@ from utils import base_url_hostname, is_truthy_value
logger = logging.getLogger(__name__)
# Platforms already warned about an all-invalid platform_toolsets list, so the
# runtime check in _get_platform_tools warns once per platform instead of on
# every tool resolution for a persistently-corrupt config (#38798).
_warned_invalid_platform_toolsets: Set[str] = set()
PROJECT_ROOT = Path(__file__).parent.parent.resolve()
@ -1611,6 +1616,31 @@ def _get_platform_tools(
disabled_set = {str(ts) for ts in disabled_toolsets}
enabled_toolsets -= disabled_set
# #38798: if this platform was explicitly configured but every toolset name
# is invalid (e.g. a migration or hand-edit left `hermes` instead of
# `hermes-cli`), resolve_toolset() returns [] for each and the platform ends
# up with no native tools — silently, with no error. Surface it at the point
# tools are resolved for a session so an already-corrupted config is caught
# at runtime, not only during the next `hermes update`/`hermes doctor`.
_explicit = platform_toolsets.get(platform)
if isinstance(_explicit, list) and _explicit:
from toolsets import validate_toolset
_named = [str(t) for t in _explicit if isinstance(t, str) and t]
if (
_named
and not any(validate_toolset(t) for t in _named)
and platform not in _warned_invalid_platform_toolsets
):
_warned_invalid_platform_toolsets.add(platform)
logger.warning(
"platform '%s' has no valid toolsets configured (unknown "
"name(s): %s) - tools will be unavailable. Run `hermes tools` "
"to reconfigure. See issue #38798.",
platform,
", ".join(_named),
)
return enabled_toolsets

View file

@ -0,0 +1,74 @@
"""Validation for the ``platform_toolsets`` config section.
Pure, side-effect-free helpers so the logic is unit-testable without importing
the tool registry or launching Hermes (mirrors the decoupled-helper pattern used
elsewhere in the CLI).
Motivated by #38798: a config migration silently rewrote the valid toolset name
``hermes-cli`` to the non-existent ``hermes``. ``resolve_toolset('hermes')``
returns an empty list, so every tool silently disappeared with no error, warning,
or log entry the agent degraded to text-only replies and the cause took
significant debugging to find. Surfacing invalid toolset names (and the
zero-tools end state) loudly turns that silent failure into an actionable one.
"""
from typing import Callable, Dict, List
def validate_platform_toolsets(
platform_toolsets: object,
is_valid_toolset: Callable[[str], bool],
) -> List[str]:
"""Return human-readable warnings for a ``platform_toolsets`` mapping.
Two failure modes are reported:
1. A toolset name that ``is_valid_toolset`` rejects usually a corrupted or
renamed entry. When ``hermes-<platform>`` would have been valid (the exact
#38798 shape, where ``cli`` held ``hermes`` instead of ``hermes-cli``),
the warning includes that as a suggestion.
2. The mapping is non-empty but resolves to *zero* valid toolsets, so the
agent would start with no tools at all.
``is_valid_toolset`` is injected (normally :func:`toolsets.validate_toolset`)
so this function performs no imports or I/O and is testable in isolation.
Args:
platform_toolsets: The raw ``platform_toolsets`` value from config. Only
``dict`` values carry toolset entries; anything else yields no
warnings (nothing to validate).
is_valid_toolset: Predicate returning ``True`` for a known toolset name.
Returns:
A list of warning strings (empty when everything is valid).
"""
warnings: List[str] = []
if not isinstance(platform_toolsets, dict) or not platform_toolsets:
return warnings
valid_count = 0
for platform, raw in platform_toolsets.items():
names = raw if isinstance(raw, list) else [raw]
for name in names:
if not isinstance(name, str) or not name:
continue
if is_valid_toolset(name):
valid_count += 1
continue
suggestion = f"hermes-{platform}"
hint = (
f" — did you mean '{suggestion}'?"
if is_valid_toolset(suggestion)
else ""
)
warnings.append(
f"platform '{platform}' references unknown toolset "
f"'{name}'{hint}"
)
if valid_count == 0:
warnings.append(
"platform_toolsets resolves to zero valid toolsets — the agent will "
"have no tools. Run `hermes tools` to reconfigure."
)
return warnings

View file

@ -1,5 +1,6 @@
"""Tests for hermes_cli.tools_config platform tool persistence."""
import logging
from types import SimpleNamespace
from unittest.mock import patch
@ -58,6 +59,61 @@ def test_agent_disabled_toolsets_with_explicit_platform_config():
assert "terminal" in enabled
def test_all_invalid_platform_toolsets_logs_runtime_warning(caplog):
"""#38798: an explicit platform config whose toolset names are all invalid
(e.g. 'hermes' instead of 'hermes-cli') must warn at resolve time so an
already-corrupted config is caught at runtime, not just during migration."""
import hermes_cli.tools_config as _tc
# The runtime warning fires once per platform per process; clear the guard
# so this test is deterministic regardless of prior resolutions.
_tc._warned_invalid_platform_toolsets.discard("cli")
config = {"platform_toolsets": {"cli": ["hermes"]}}
with caplog.at_level(logging.WARNING, logger="hermes_cli.tools_config"):
_get_platform_tools(config, "cli")
warnings = [r.getMessage() for r in caplog.records if r.levelno >= logging.WARNING]
assert any("#38798" in m and "hermes" in m for m in warnings), warnings
def test_invalid_platform_toolsets_runtime_warning_fires_once(caplog):
"""The runtime warning is deduped per platform — a persistently-corrupt
config must not spam an identical warning on every tool resolution."""
import hermes_cli.tools_config as _tc
_tc._warned_invalid_platform_toolsets.discard("cli")
config = {"platform_toolsets": {"cli": ["hermes"]}}
with caplog.at_level(logging.WARNING, logger="hermes_cli.tools_config"):
_get_platform_tools(config, "cli")
_get_platform_tools(config, "cli")
_get_platform_tools(config, "cli")
hits = [r for r in caplog.records if "#38798" in r.getMessage()]
assert len(hits) == 1, f"expected exactly one warning, got {len(hits)}"
def test_valid_platform_toolsets_no_runtime_warning(caplog):
"""A correctly-configured platform must not emit the #38798 warning."""
config = {"platform_toolsets": {"cli": ["hermes-cli"]}}
with caplog.at_level(logging.WARNING, logger="hermes_cli.tools_config"):
_get_platform_tools(config, "cli")
assert not any("#38798" in r.getMessage() for r in caplog.records)
def test_partially_valid_platform_toolsets_no_runtime_warning(caplog):
"""When at least one configured toolset is valid, tools still resolve, so
the runtime zero-tools warning must not fire (the migration-time check still
flags the individual bad name)."""
config = {"platform_toolsets": {"cli": ["hermes-cli", "bogus"]}}
with caplog.at_level(logging.WARNING, logger="hermes_cli.tools_config"):
_get_platform_tools(config, "cli")
assert not any("#38798" in r.getMessage() for r in caplog.records)
def test_agent_disabled_toolsets_empty_list_is_noop():
"""Empty or missing disabled_toolsets should not change behavior."""
config_empty = {"agent": {"disabled_toolsets": []}}

View file

@ -0,0 +1,91 @@
"""Unit tests for hermes_cli.toolset_validation (see #38798).
Pure logic the validity predicate is injected, so these tests need neither the
tool registry nor a running Hermes.
"""
import pytest
from hermes_cli.toolset_validation import validate_platform_toolsets
# A representative set of real toolset names. `hermes` is deliberately absent —
# that is the corruption #38798 reported (`hermes-cli` rewritten to `hermes`).
_KNOWN = {
"hermes-cli",
"hermes-telegram",
"hermes-discord",
"terminal",
"web",
}
def _is_valid(name):
return name in _KNOWN
def test_valid_config_produces_no_warnings():
cfg = {"cli": ["hermes-cli"], "telegram": ["hermes-telegram"]}
assert validate_platform_toolsets(cfg, _is_valid) == []
def test_38798_corruption_warns_and_suggests_correct_name():
# The exact reported shape: cli holds 'hermes' instead of 'hermes-cli'.
warnings = validate_platform_toolsets({"cli": ["hermes"]}, _is_valid)
unknown = [w for w in warnings if "unknown toolset 'hermes'" in w]
assert len(unknown) == 1
# Actionable: points at the valid name the entry should have been.
assert "did you mean 'hermes-cli'?" in unknown[0]
# And the zero-valid-toolsets safety net fires.
assert any("zero valid toolsets" in w for w in warnings)
def test_mixed_valid_and_invalid_flags_only_the_invalid():
cfg = {"cli": ["hermes-cli"], "discord": ["bogus"]}
warnings = validate_platform_toolsets(cfg, _is_valid)
# One valid entry exists, so no zero-valid warning.
assert not any("zero valid toolsets" in w for w in warnings)
assert len(warnings) == 1
assert "platform 'discord'" in warnings[0]
assert "unknown toolset 'bogus'" in warnings[0]
def test_unknown_without_valid_platform_default_omits_suggestion():
# hermes-mystery is not a known toolset, so no "did you mean" hint.
warnings = validate_platform_toolsets({"mystery": ["nope"]}, _is_valid)
unknown = [w for w in warnings if "unknown toolset 'nope'" in w]
assert len(unknown) == 1
assert "did you mean" not in unknown[0]
@pytest.mark.parametrize("value", [None, {}, [], "hermes-cli", 42])
def test_non_dict_or_empty_yields_no_warnings(value):
assert validate_platform_toolsets(value, _is_valid) == []
def test_scalar_toolset_value_is_accepted():
# Some configs store the toolset as a bare string rather than a list.
assert validate_platform_toolsets({"cli": "hermes-cli"}, _is_valid) == []
def test_non_string_entries_are_skipped_not_counted_invalid():
cfg = {"cli": [None, 123, "hermes-cli"]}
# The junk entries are ignored; the valid one keeps it from being "zero".
assert validate_platform_toolsets(cfg, _is_valid) == []
def test_all_invalid_reports_each_and_the_zero_state():
cfg = {"cli": ["hermes"], "discord": ["hermes"]}
warnings = validate_platform_toolsets(cfg, _is_valid)
assert sum("unknown toolset" in w for w in warnings) == 2
assert any("zero valid toolsets" in w for w in warnings)
def test_real_validate_toolset_treats_hermes_cli_valid_and_hermes_invalid():
# Ties the helper to reality: the canonical registry check agrees that
# `hermes-cli` is the real toolset and `hermes` is not (the #38798 crux).
from toolsets import validate_toolset
assert validate_toolset("hermes-cli") is True
assert validate_toolset("hermes") is False
warnings = validate_platform_toolsets({"cli": ["hermes"]}, validate_toolset)
assert any("did you mean 'hermes-cli'?" in w for w in warnings)