fix(config): prevent save_config from materialising schema defaults

Fixes #27354

Root cause:  called during init (or by any code path
that saves ) wrote injected schema defaults into
config.yaml as if the user had authored them.  Two fix layers:

1.  now only injects
    when the user actually set
    somewhere (root or agent).  A user who never set
    keeps it absent, so 's explicit-path
   detection won't treat it as user-authored.

2.  gains a  parameter and a
   new  pass that removes keys matching
    unless those paths were explicitly present in the
   **raw** (pre-normalization) config on disk.  Explicit-path detection
   uses  on  *before* any
   normalisation runs — preventing injected-in defaults from being
   mistaken for user-set values.

All migration and edit-config call sites pass
to preserve their intentional default-seeding behaviour.

New helpers:
-   — collects leaf-key paths from a raw dict
-    — removes keys matching schema defaults

Test coverage: 4 new regression tests (59 total, all passing).
This commit is contained in:
郝鹏宇 2026-05-17 19:18:09 +01:00 committed by Teknium
parent 6dcc579bcb
commit 98488c4be4
2 changed files with 269 additions and 19 deletions

View file

@ -27,7 +27,7 @@ import threading
import time
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, Any, Optional, List, Tuple
from typing import Dict, Any, Optional, List, Tuple, Set
from hermes_cli.secret_prompt import masked_secret_prompt
@ -4852,7 +4852,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
display["tool_progress"] = "all"
results["config_added"].append("display.tool_progress=all (default)")
config["display"] = display
save_config(config)
save_config(config, strip_defaults=False)
if not quiet:
print(f" ✓ Migrated tool progress to config.yaml: {display['tool_progress']}")
@ -4867,7 +4867,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
else:
config["timezone"] = ""
results["config_added"].append("timezone= (empty, uses server-local)")
save_config(config)
save_config(config, strip_defaults=False)
if not quiet:
tz_display = config["timezone"] or "(server-local)"
print(f" ✓ Added timezone to config.yaml: {tz_display}")
@ -4941,7 +4941,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
config["providers"] = providers_dict
# Remove the old list — runtime reads via get_compatible_custom_providers()
config.pop("custom_providers", None)
save_config(config)
save_config(config, strip_defaults=False)
if not quiet:
print(f" ✓ Migrated {migrated_count} custom provider(s) to providers: section")
for key in list(providers_dict.keys())[-migrated_count:]:
@ -5009,7 +5009,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
provider_cfg = stt.setdefault(provider, {})
provider_cfg["model"] = legacy_model
config["stt"] = stt
save_config(config)
save_config(config, strip_defaults=False)
if not quiet:
print(f" ✓ Migrated legacy stt.model to provider-specific config")
@ -5023,7 +5023,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
display["interim_assistant_messages"] = True
config["display"] = display
results["config_added"].append("display.interim_assistant_messages=true (default)")
save_config(config)
save_config(config, strip_defaults=False)
if not quiet:
print(" ✓ Added display.interim_assistant_messages=true")
@ -5045,7 +5045,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
platforms[plat]["tool_progress"] = mode
display["platforms"] = platforms
config["display"] = display
save_config(config)
save_config(config, strip_defaults=False)
if not quiet:
migrated = ", ".join(f"{p}={m}" for p, m in old_overrides.items())
print(f" ✓ Migrated tool_progress_overrides → display.platforms: {migrated}")
@ -5081,7 +5081,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
migrated_keys.append(f"base_url={s_base_url}")
if migrated_keys or s_model is not None or s_provider is not None or s_base_url is not None:
config["compression"] = comp
save_config(config)
save_config(config, strip_defaults=False)
if not quiet:
if migrated_keys:
print(f" ✓ Migrated compression.summary_* → auxiliary.compression: {', '.join(migrated_keys)}")
@ -5137,7 +5137,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
plugins_cfg["enabled"] = grandfathered
config["plugins"] = plugins_cfg
save_config(config)
save_config(config, strip_defaults=False)
results["config_added"].append(
f"plugins.enabled (opt-in allow-list, {len(grandfathered)} grandfathered)"
)
@ -5217,7 +5217,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
touched = True
if touched:
save_config(config)
save_config(config, strip_defaults=False)
if added_curator:
results["config_added"].append(
f"curator ({len(added_curator)} default key(s))"
@ -5485,12 +5485,12 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
# Update version and save
config["_config_version"] = latest_ver
save_config(config)
save_config(config, strip_defaults=False)
elif current_ver < latest_ver:
# Just update version
config = read_raw_config()
config["_config_version"] = latest_ver
save_config(config)
save_config(config, strip_defaults=False)
# ── Skill-declared config vars ──────────────────────────────────────
# Skills can declare config.yaml settings they need via
@ -5531,7 +5531,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
f"Skipped {var['key']} — skill '{var.get('skill', '?')}' may ask for it later"
)
print()
save_config(config)
save_config(config, strip_defaults=False)
else:
print(" Set later with: hermes config set <key> <value>")
@ -5680,6 +5680,79 @@ def _preserve_env_ref_templates(current, raw, loaded_expanded=None):
return current
def _explicit_config_paths(config: Dict[str, Any]) -> Set[Tuple[str, ...]]:
"""Return leaf paths explicitly present in a raw config dict.
Computed on the **raw** (un-normalized, un-expanded) config so that
values injected by normalisation (e.g. ``agent.max_turns`` from
``DEFAULT_CONFIG``) are not mistakenly treated as user-set.
Used by ``save_config`` to build the *preserve* set passed to
``_strip_default_values`` so only user-authored keys survive the
defaults-strip pass.
"""
paths: Set[Tuple[str, ...]] = set()
def _walk(value: Any, path: Tuple[str, ...]) -> None:
if isinstance(value, dict):
for key, child in value.items():
_walk(child, path + (key,))
return
if path:
paths.add(path)
_walk(config, ())
return paths
def _strip_default_values(
config: Dict[str, Any],
defaults: Dict[str, Any] = DEFAULT_CONFIG,
preserve_keys: Optional[Set[Tuple[str, ...]]] = None,
) -> Dict[str, Any]:
"""Return *config* without keys whose values match *defaults*.
Keys in *preserve_keys* (explicitly present in the user's raw config,
before any normalisation) are always kept even when they equal the
default, so user-set values such as ``memory.user_char_limit: 2200``
survive a ``save_config`` round-trip.
Nested dicts whose every child is stripped are removed entirely so
default-only subtrees (e.g. ``gateway``) never bloat ``config.yaml``
when the user has nothing to say about them.
"""
preserve_keys = {("_config_version",)} | set(preserve_keys or ())
def _strip(value: Any, default: Any, path: Tuple[str, ...]) -> Any:
if path in preserve_keys:
return copy.deepcopy(value)
if isinstance(value, dict) and value:
default_dict = default if isinstance(default, dict) else {}
stripped: Dict[str, Any] = {}
for key, child in value.items():
child_default = default_dict.get(key)
stripped_child = _strip(child, child_default, path + (key,))
if stripped_child is not None:
stripped[key] = stripped_child
if stripped:
return stripped
# Entire subtree stripped — remove it
return None
if value == default:
return None
return copy.deepcopy(value)
result: Dict[str, Any] = {}
for key, value in config.items():
stripped = _strip(value, defaults.get(key), (key,))
if stripped is not None:
result[key] = stripped
return result
def _normalize_root_model_keys(config: Dict[str, Any]) -> Dict[str, Any]:
"""Move stale root-level provider/base_url/context_length into model section.
@ -5732,14 +5805,29 @@ def _normalize_root_model_keys(config: Dict[str, Any]) -> Dict[str, Any]:
def _normalize_max_turns_config(config: Dict[str, Any]) -> Dict[str, Any]:
"""Normalize legacy root-level max_turns into agent.max_turns."""
"""Normalize legacy root-level max_turns into agent.max_turns.
Only injects the schema default when the user actually set max_turns
somewhere (root level or under ``agent``). A bare ``load_config()``
call that passes the result straight to ``save_config()`` should not
materialise ``agent.max_turns`` in config.yaml when the user never set
it that makes the default sticky and blocks future schema changes.
"""
config = dict(config)
agent_config = dict(config.get("agent") or {})
if "max_turns" in config and "max_turns" not in agent_config:
had_root = "max_turns" in config
had_agent = "max_turns" in agent_config
if had_root and not had_agent:
agent_config["max_turns"] = config["max_turns"]
if "max_turns" not in agent_config:
# Only inject the default when the user explicitly set max_turns
# (either root-level or under agent). Otherwise leave it absent so
# save_config can omit it and the schema default fills in at runtime.
if not had_root and not had_agent:
pass # deliberately do not inject DEFAULT_CONFIG default
elif "max_turns" not in agent_config:
agent_config["max_turns"] = DEFAULT_CONFIG["agent"]["max_turns"]
config["agent"] = agent_config
@ -6159,8 +6247,20 @@ _COMMENTED_SECTIONS = """
"""
def save_config(config: Dict[str, Any]):
"""Save configuration to ~/.hermes/config.yaml."""
def save_config(
config: Dict[str, Any],
*,
strip_defaults: bool = True,
preserve_keys: Optional[Set[Tuple[str, ...]]] = None,
):
"""Save configuration to ~/.hermes/config.yaml.\n
Default values from ``DEFAULT_CONFIG`` are not written to disk unless
the user explicitly set them (i.e. the path exists in the raw config
before any normalisation). This prevents config.yaml from being
contaminated with schema defaults on every save, which makes future
default changes invisible to users.
"""
with _CONFIG_LOCK:
if is_managed():
managed_error("save configuration")
@ -6185,6 +6285,16 @@ def save_config(config: Dict[str, Any]):
ensure_hermes_home()
config_path = get_config_path()
# Compute explicit user paths BEFORE any normalisation --------
# _normalize_max_turns_config may inject agent.max_turns from
# DEFAULT_CONFIG; using the raw dict preserves which paths the
# user actually set so _strip_default_values can keep them.
_raw_for_paths = read_raw_config()
explicit_raw_paths: Optional[Set[Tuple[str, ...]]] = (
_explicit_config_paths(_raw_for_paths) if _raw_for_paths else None
)
# ----------------------------------------------------------------
current_normalized = _normalize_root_model_keys(_normalize_max_turns_config(config))
normalized = current_normalized
raw_existing = _normalize_root_model_keys(_normalize_max_turns_config(read_raw_config()))
@ -6195,6 +6305,25 @@ def save_config(config: Dict[str, Any]):
_LAST_EXPANDED_CONFIG_BY_PATH.get(str(config_path)),
)
# Strip schema-default values so the user's custom settings are not
# silently reset on every save. Keys the user explicitly set (paths
# from the raw pre-normalisation config) are always preserved.
effective_preserve_keys: Set[Tuple[str, ...]] = {("_config_version",)}
if explicit_raw_paths:
effective_preserve_keys.update(explicit_raw_paths)
if preserve_keys:
effective_preserve_keys.update(preserve_keys)
if strip_defaults and effective_preserve_keys:
# _preserve_env_ref_templates may return Any; cast for type-checker.
from typing import cast as _cast
normalized = _cast(Dict[str, Any], normalized)
normalized = _strip_default_values(
normalized, # type: ignore[arg-type]
DEFAULT_CONFIG,
preserve_keys=effective_preserve_keys,
)
# Build optional commented-out sections for features that are off by
# default or only relevant when explicitly configured.
parts = []
@ -6991,7 +7120,7 @@ def edit_config():
# Ensure config exists
if not config_path.exists():
save_config(DEFAULT_CONFIG)
save_config(DEFAULT_CONFIG, strip_defaults=False)
print(f"Created {config_path}")
# Find editor

View file

@ -13,9 +13,12 @@ from hermes_cli.config import (
get_hermes_home,
ensure_hermes_home,
get_compatible_custom_providers,
_explicit_config_paths,
_normalize_max_turns_config,
load_config,
load_env,
migrate_config,
read_raw_config,
remove_env_value,
save_config,
save_env_value,
@ -1345,3 +1348,121 @@ class TestVerifyOnStopMigration:
migrate_config(interactive=False, quiet=True)
raw = yaml.safe_load((tmp_path / "config.yaml").read_text())
assert raw["agent"]["verify_on_stop"] is True
class TestConfigNormalizationDoesNotOverwriteUserValues:
"""Regression tests for #27354."""
def test_save_config_does_not_inject_max_turns_when_unset(self, tmp_path):
config_path = tmp_path / "config.yaml"
config_path.write_text(
yaml.safe_dump(
{
"_config_version": DEFAULT_CONFIG["_config_version"],
"memory": {"user_char_limit": 2200},
}
),
encoding="utf-8",
)
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
save_config(load_config())
raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
assert "max_turns" not in raw.get("agent", {})
assert raw["memory"]["user_char_limit"] == 2200
def test_save_config_preserves_explicit_default_values(self, tmp_path):
config_path = tmp_path / "config.yaml"
config_path.write_text(
yaml.safe_dump(
{
"_config_version": DEFAULT_CONFIG["_config_version"],
"approvals": {"mode": "manual"},
"memory": {"user_char_limit": 2200},
}
),
encoding="utf-8",
)
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
save_config(load_config())
raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
assert raw["approvals"]["mode"] == "manual"
assert raw["memory"]["user_char_limit"] == 2200
def test_save_config_preserves_config_version_when_raw_version_missing(self, tmp_path):
config_path = tmp_path / "config.yaml"
config_path.write_text(
yaml.safe_dump({"memory": {"user_char_limit": 2200}}),
encoding="utf-8",
)
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
save_config(load_config())
raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
assert raw["memory"]["user_char_limit"] == 2200
def test_save_config_does_not_materialize_defaults_for_empty_sections(self, tmp_path):
config_path = tmp_path / "config.yaml"
config_path.write_text(
yaml.safe_dump(
{
"_config_version": DEFAULT_CONFIG["_config_version"],
"memory": {},
"display": {},
}
),
encoding="utf-8",
)
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
save_config(load_config())
raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
assert raw == {"_config_version": DEFAULT_CONFIG["_config_version"]}
def test_save_config_honors_caller_preserve_keys(self, tmp_path):
config_path = tmp_path / "config.yaml"
config_path.write_text(
yaml.safe_dump({"_config_version": DEFAULT_CONFIG["_config_version"]}),
encoding="utf-8",
)
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
config = load_config()
config.setdefault("agent", {})["max_turns"] = DEFAULT_CONFIG["agent"]["max_turns"]
save_config(config, preserve_keys={("agent", "max_turns")})
raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
assert raw["agent"]["max_turns"] == DEFAULT_CONFIG["agent"]["max_turns"]
def test_normalize_max_turns_does_not_inject_default(self):
result = _normalize_max_turns_config(
{"_config_version": DEFAULT_CONFIG["_config_version"]}
)
assert "max_turns" not in result.get("agent", {})
def test_explicit_config_paths_from_raw_before_normalization(self, tmp_path):
config_path = tmp_path / "config.yaml"
config_path.write_text(
yaml.safe_dump(
{
"_config_version": DEFAULT_CONFIG["_config_version"],
"memory": {"user_char_limit": 2200},
},
),
encoding="utf-8",
)
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
raw_paths = _explicit_config_paths(read_raw_config())
assert ("memory", "user_char_limit") in raw_paths
assert ("agent", "max_turns") not in raw_paths
def test_explicit_config_paths_ignore_empty_sections(self):
assert _explicit_config_paths({"memory": {}, "display": {}}) == set()