From 228a4d11ae258635cfecc1c322c1712191b6db3c Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 10 May 2026 22:36:19 -0700 Subject: [PATCH] fix(config): warn loudly on YAML parse failure instead of silent default fallback (#23585) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A YAML parse error in ~/.hermes/config.yaml caused load_config() to print one line to stdout (Warning: Failed to load config: ...) and silently fall back to DEFAULT_CONFIG, dropping every user override (auxiliary providers, fallback chain, model settings). Users only noticed when downstream behavior misbehaved — see issue #23570 where a tab-indent error in the auxiliary section caused aux fallback to use OpenRouter (depleted) instead of the configured Codex/MiniMax chain. Now: log at WARNING (so 'hermes logs' surfaces it), write a prominent line to stderr, dedup on (path, mtime_ns, size) so concurrent loads don't spam, and re-warn after the user edits the file. Both call sites (raw read + merged load) route through the same helper. Refs #23570 --- hermes_cli/config.py | 47 ++++++++++++++++++++- tests/hermes_cli/test_config.py | 75 +++++++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+), 2 deletions(-) diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 2b0262dbac0..3b79da754ba 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -28,6 +28,48 @@ from typing import Dict, Any, Optional, List, Tuple logger = logging.getLogger(__name__) +# Track which (config_path, mtime_ns, size) tuples we've already warned about +# so concurrent CLI/gateway loads of a broken config.yaml don't spam stderr +# every time. Cleared automatically when the file changes (different mtime). +_CONFIG_PARSE_WARNED: set = set() + + +def _warn_config_parse_failure(config_path: Path, exc: Exception) -> None: + """Surface a config.yaml parse failure to user, log, and stderr. + + A YAML parse error in ``~/.hermes/config.yaml`` causes ``load_config()`` + to silently fall back to ``DEFAULT_CONFIG``, which means every user + override (auxiliary providers, fallback chain, model overrides, etc.) + is dropped. Before this helper that was a one-line ``print(...)`` that + scrolled off-screen on the first invocation and was never seen again. + + Now: warn once per (path, mtime_ns, size) on stderr **and** in + ``agent.log`` / ``errors.log`` at WARNING level so ``hermes logs`` + surfaces it. Re-warns automatically if the file changes (different + mtime/size), so users editing the config see the next failure. + """ + try: + st = config_path.stat() + key = (str(config_path), st.st_mtime_ns, st.st_size) + except OSError: + key = (str(config_path), 0, 0) + if key in _CONFIG_PARSE_WARNED: + return + _CONFIG_PARSE_WARNED.add(key) + + msg = ( + f"Failed to parse {config_path}: {exc}. " + f"Falling back to default config — every user override " + f"(auxiliary providers, fallback chain, model settings) is being IGNORED. " + f"Fix the YAML and restart." + ) + logger.warning(msg) + try: + sys.stderr.write(f"⚠️ hermes config: {msg}\n") + sys.stderr.flush() + except Exception: + pass + _IS_WINDOWS = platform.system() == "Windows" _ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") _LAST_EXPANDED_CONFIG_BY_PATH: Dict[str, Any] = {} @@ -4004,7 +4046,8 @@ def read_raw_config() -> Dict[str, Any]: try: with open(config_path, encoding="utf-8") as f: data = yaml.safe_load(f) or {} - except Exception: + except Exception as e: + _warn_config_parse_failure(config_path, e) return {} if not isinstance(data, dict): @@ -4054,7 +4097,7 @@ def load_config() -> Dict[str, Any]: config = _deep_merge(config, user_config) except Exception as e: - print(f"Warning: Failed to load config: {e}") + _warn_config_parse_failure(config_path, e) normalized = _normalize_root_model_keys(_normalize_max_turns_config(config)) expanded = _expand_env_vars(normalized) diff --git a/tests/hermes_cli/test_config.py b/tests/hermes_cli/test_config.py index 456439b5741..1dbe03b3441 100644 --- a/tests/hermes_cli/test_config.py +++ b/tests/hermes_cli/test_config.py @@ -81,6 +81,81 @@ class TestLoadConfigDefaults: assert "max_turns" not in config +class TestLoadConfigParseFailure: + """A YAML parse failure must NOT silently fall back to defaults. + + Before issue #23570 this was a single ``print(...)`` that scrolled past + on the first invocation — users saw aux-fallback misbehavior with no clue + their config.yaml was being ignored. The helper must: + * log at WARNING (so ``hermes logs`` surfaces it) + * also write to stderr (so it's visible at startup even before + ``setup_logging()`` has wired up file handlers) + * dedup on (path, mtime_ns, size) so concurrent loads don't spam + * re-warn after the user edits the file (different mtime) + """ + + def test_logs_and_warns_on_parse_failure(self, tmp_path, caplog, capsys): + # Reset the dedup cache so this test isn't affected by other tests + # that may have warned about a different broken config. + from hermes_cli import config as cfg_mod + cfg_mod._CONFIG_PARSE_WARNED.clear() + + with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}): + (tmp_path / "config.yaml").write_text("\tbroken tab indent:\n") + + import logging + with caplog.at_level(logging.WARNING, logger="hermes_cli.config"): + config = load_config() + + # Falls back to defaults — confirms the silent-fallback we're warning about + assert config["model"] == DEFAULT_CONFIG["model"] + + # WARNING-level log was emitted with file path + reason + assert any( + str(tmp_path / "config.yaml") in rec.message + and "Falling back to default config" in rec.message + for rec in caplog.records + ), f"expected WARNING log, got: {[r.message for r in caplog.records]}" + + # stderr also got a user-visible message (with the ⚠️ marker so it + # stands out at hermes startup before logging is configured) + captured = capsys.readouterr() + assert "hermes config:" in captured.err + assert str(tmp_path / "config.yaml") in captured.err + + def test_dedup_on_repeated_load_same_file(self, tmp_path, capsys): + from hermes_cli import config as cfg_mod + cfg_mod._CONFIG_PARSE_WARNED.clear() + + with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}): + (tmp_path / "config.yaml").write_text("\tbroken:\n") + + load_config() + first = capsys.readouterr().err + assert "hermes config:" in first + + load_config() + second = capsys.readouterr().err + assert second == "", "second load should NOT re-warn (same file, same mtime)" + + def test_rewarns_after_file_edit(self, tmp_path, capsys): + import time + from hermes_cli import config as cfg_mod + cfg_mod._CONFIG_PARSE_WARNED.clear() + + with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}): + (tmp_path / "config.yaml").write_text("\tbroken:\n") + load_config() + capsys.readouterr() # discard first warning + + # Edit the file (still broken, but different content) — mtime changes + time.sleep(0.05) + (tmp_path / "config.yaml").write_text("\tstill broken differently:\n") + load_config() + after_edit = capsys.readouterr().err + assert "hermes config:" in after_edit, "edited file should re-warn" + + class TestSaveAndLoadRoundtrip: def test_roundtrip(self, tmp_path): with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):