mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-25 05:52:34 +00:00
fix(config): warn loudly on YAML parse failure instead of silent default fallback (#23585)
A YAML parse error in ~/.hermes/config.yaml caused load_config() to print one line to stdout (Warning: Failed to load config: ...) and silently fall back to DEFAULT_CONFIG, dropping every user override (auxiliary providers, fallback chain, model settings). Users only noticed when downstream behavior misbehaved — see issue #23570 where a tab-indent error in the auxiliary section caused aux fallback to use OpenRouter (depleted) instead of the configured Codex/MiniMax chain. Now: log at WARNING (so 'hermes logs' surfaces it), write a prominent line to stderr, dedup on (path, mtime_ns, size) so concurrent loads don't spam, and re-warn after the user edits the file. Both call sites (raw read + merged load) route through the same helper. Refs #23570
This commit is contained in:
parent
3af3c4eb8c
commit
228a4d11ae
2 changed files with 120 additions and 2 deletions
|
|
@ -28,6 +28,48 @@ from typing import Dict, Any, Optional, List, Tuple
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Track which (config_path, mtime_ns, size) tuples we've already warned about
|
||||||
|
# so concurrent CLI/gateway loads of a broken config.yaml don't spam stderr
|
||||||
|
# every time. Cleared automatically when the file changes (different mtime).
|
||||||
|
_CONFIG_PARSE_WARNED: set = set()
|
||||||
|
|
||||||
|
|
||||||
|
def _warn_config_parse_failure(config_path: Path, exc: Exception) -> None:
|
||||||
|
"""Surface a config.yaml parse failure to user, log, and stderr.
|
||||||
|
|
||||||
|
A YAML parse error in ``~/.hermes/config.yaml`` causes ``load_config()``
|
||||||
|
to silently fall back to ``DEFAULT_CONFIG``, which means every user
|
||||||
|
override (auxiliary providers, fallback chain, model overrides, etc.)
|
||||||
|
is dropped. Before this helper that was a one-line ``print(...)`` that
|
||||||
|
scrolled off-screen on the first invocation and was never seen again.
|
||||||
|
|
||||||
|
Now: warn once per (path, mtime_ns, size) on stderr **and** in
|
||||||
|
``agent.log`` / ``errors.log`` at WARNING level so ``hermes logs``
|
||||||
|
surfaces it. Re-warns automatically if the file changes (different
|
||||||
|
mtime/size), so users editing the config see the next failure.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
st = config_path.stat()
|
||||||
|
key = (str(config_path), st.st_mtime_ns, st.st_size)
|
||||||
|
except OSError:
|
||||||
|
key = (str(config_path), 0, 0)
|
||||||
|
if key in _CONFIG_PARSE_WARNED:
|
||||||
|
return
|
||||||
|
_CONFIG_PARSE_WARNED.add(key)
|
||||||
|
|
||||||
|
msg = (
|
||||||
|
f"Failed to parse {config_path}: {exc}. "
|
||||||
|
f"Falling back to default config — every user override "
|
||||||
|
f"(auxiliary providers, fallback chain, model settings) is being IGNORED. "
|
||||||
|
f"Fix the YAML and restart."
|
||||||
|
)
|
||||||
|
logger.warning(msg)
|
||||||
|
try:
|
||||||
|
sys.stderr.write(f"⚠️ hermes config: {msg}\n")
|
||||||
|
sys.stderr.flush()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
_IS_WINDOWS = platform.system() == "Windows"
|
_IS_WINDOWS = platform.system() == "Windows"
|
||||||
_ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
|
_ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
|
||||||
_LAST_EXPANDED_CONFIG_BY_PATH: Dict[str, Any] = {}
|
_LAST_EXPANDED_CONFIG_BY_PATH: Dict[str, Any] = {}
|
||||||
|
|
@ -4004,7 +4046,8 @@ def read_raw_config() -> Dict[str, Any]:
|
||||||
try:
|
try:
|
||||||
with open(config_path, encoding="utf-8") as f:
|
with open(config_path, encoding="utf-8") as f:
|
||||||
data = yaml.safe_load(f) or {}
|
data = yaml.safe_load(f) or {}
|
||||||
except Exception:
|
except Exception as e:
|
||||||
|
_warn_config_parse_failure(config_path, e)
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
if not isinstance(data, dict):
|
if not isinstance(data, dict):
|
||||||
|
|
@ -4054,7 +4097,7 @@ def load_config() -> Dict[str, Any]:
|
||||||
|
|
||||||
config = _deep_merge(config, user_config)
|
config = _deep_merge(config, user_config)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Warning: Failed to load config: {e}")
|
_warn_config_parse_failure(config_path, e)
|
||||||
|
|
||||||
normalized = _normalize_root_model_keys(_normalize_max_turns_config(config))
|
normalized = _normalize_root_model_keys(_normalize_max_turns_config(config))
|
||||||
expanded = _expand_env_vars(normalized)
|
expanded = _expand_env_vars(normalized)
|
||||||
|
|
|
||||||
|
|
@ -81,6 +81,81 @@ class TestLoadConfigDefaults:
|
||||||
assert "max_turns" not in config
|
assert "max_turns" not in config
|
||||||
|
|
||||||
|
|
||||||
|
class TestLoadConfigParseFailure:
|
||||||
|
"""A YAML parse failure must NOT silently fall back to defaults.
|
||||||
|
|
||||||
|
Before issue #23570 this was a single ``print(...)`` that scrolled past
|
||||||
|
on the first invocation — users saw aux-fallback misbehavior with no clue
|
||||||
|
their config.yaml was being ignored. The helper must:
|
||||||
|
* log at WARNING (so ``hermes logs`` surfaces it)
|
||||||
|
* also write to stderr (so it's visible at startup even before
|
||||||
|
``setup_logging()`` has wired up file handlers)
|
||||||
|
* dedup on (path, mtime_ns, size) so concurrent loads don't spam
|
||||||
|
* re-warn after the user edits the file (different mtime)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_logs_and_warns_on_parse_failure(self, tmp_path, caplog, capsys):
|
||||||
|
# Reset the dedup cache so this test isn't affected by other tests
|
||||||
|
# that may have warned about a different broken config.
|
||||||
|
from hermes_cli import config as cfg_mod
|
||||||
|
cfg_mod._CONFIG_PARSE_WARNED.clear()
|
||||||
|
|
||||||
|
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
|
||||||
|
(tmp_path / "config.yaml").write_text("\tbroken tab indent:\n")
|
||||||
|
|
||||||
|
import logging
|
||||||
|
with caplog.at_level(logging.WARNING, logger="hermes_cli.config"):
|
||||||
|
config = load_config()
|
||||||
|
|
||||||
|
# Falls back to defaults — confirms the silent-fallback we're warning about
|
||||||
|
assert config["model"] == DEFAULT_CONFIG["model"]
|
||||||
|
|
||||||
|
# WARNING-level log was emitted with file path + reason
|
||||||
|
assert any(
|
||||||
|
str(tmp_path / "config.yaml") in rec.message
|
||||||
|
and "Falling back to default config" in rec.message
|
||||||
|
for rec in caplog.records
|
||||||
|
), f"expected WARNING log, got: {[r.message for r in caplog.records]}"
|
||||||
|
|
||||||
|
# stderr also got a user-visible message (with the ⚠️ marker so it
|
||||||
|
# stands out at hermes startup before logging is configured)
|
||||||
|
captured = capsys.readouterr()
|
||||||
|
assert "hermes config:" in captured.err
|
||||||
|
assert str(tmp_path / "config.yaml") in captured.err
|
||||||
|
|
||||||
|
def test_dedup_on_repeated_load_same_file(self, tmp_path, capsys):
|
||||||
|
from hermes_cli import config as cfg_mod
|
||||||
|
cfg_mod._CONFIG_PARSE_WARNED.clear()
|
||||||
|
|
||||||
|
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
|
||||||
|
(tmp_path / "config.yaml").write_text("\tbroken:\n")
|
||||||
|
|
||||||
|
load_config()
|
||||||
|
first = capsys.readouterr().err
|
||||||
|
assert "hermes config:" in first
|
||||||
|
|
||||||
|
load_config()
|
||||||
|
second = capsys.readouterr().err
|
||||||
|
assert second == "", "second load should NOT re-warn (same file, same mtime)"
|
||||||
|
|
||||||
|
def test_rewarns_after_file_edit(self, tmp_path, capsys):
|
||||||
|
import time
|
||||||
|
from hermes_cli import config as cfg_mod
|
||||||
|
cfg_mod._CONFIG_PARSE_WARNED.clear()
|
||||||
|
|
||||||
|
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
|
||||||
|
(tmp_path / "config.yaml").write_text("\tbroken:\n")
|
||||||
|
load_config()
|
||||||
|
capsys.readouterr() # discard first warning
|
||||||
|
|
||||||
|
# Edit the file (still broken, but different content) — mtime changes
|
||||||
|
time.sleep(0.05)
|
||||||
|
(tmp_path / "config.yaml").write_text("\tstill broken differently:\n")
|
||||||
|
load_config()
|
||||||
|
after_edit = capsys.readouterr().err
|
||||||
|
assert "hermes config:" in after_edit, "edited file should re-warn"
|
||||||
|
|
||||||
|
|
||||||
class TestSaveAndLoadRoundtrip:
|
class TestSaveAndLoadRoundtrip:
|
||||||
def test_roundtrip(self, tmp_path):
|
def test_roundtrip(self, tmp_path):
|
||||||
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
|
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue