Merge pull request #52386 from NousResearch/salvage/31999-yaml-indent

fix(utils): unify YAML list indent across all config writers (#31999)
This commit is contained in:
kshitij 2026-06-25 23:39:37 +05:30 committed by GitHub
commit c210e23a02
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 147 additions and 26 deletions

View file

@ -13,7 +13,6 @@ import inspect
import json
import logging
import os
import tempfile
import html as _html
import re
from datetime import datetime, timezone
@ -2035,29 +2034,14 @@ class TelegramAdapter(BasePlatformAdapter):
changed = True
if changed:
fd, tmp_path = tempfile.mkstemp(
dir=str(config_path.parent),
suffix=".tmp",
prefix=".config_",
from utils import atomic_yaml_write
atomic_yaml_write(
config_path,
config,
default_flow_style=False,
sort_keys=False,
)
try:
with os.fdopen(fd, "w", encoding="utf-8") as f:
_yaml.dump(
config,
f,
default_flow_style=False,
sort_keys=False,
allow_unicode=True,
)
f.flush()
os.fsync(f.fileno())
atomic_replace(tmp_path, config_path)
except BaseException:
try:
os.unlink(tmp_path)
except OSError:
pass
raise
logger.info(
"[%s] Persisted thread_id=%s for topic '%s' in config.yaml",
self.name, thread_id, topic_name,

View file

@ -0,0 +1,120 @@
"""Regression tests for issue #31999.
All YAML config write paths must produce 2-space-indented list items
(matching ruamel.yaml's layout). Mixing 0-indent (default PyYAML) and
2-indent (ruamel.yaml) in the same config.yaml produces a file that
stricter parsers like js-yaml reject with "bad indentation of a mapping
entry", silently dropping custom_providers and breaking model switching.
"""
import yaml
from utils import IndentDumper, atomic_yaml_write
class TestIndentDumperShape:
"""IndentDumper emits 2-space-indented list items under mapping keys."""
def test_indent_dumper_produces_2_indent_lists(self):
"""List items under a mapping key must start at column 2, not 0."""
data = {
"custom_providers": [
{"name": "NVIDIA", "base_url": "https://api.nvidia.com"},
],
}
out = yaml.dump(data, Dumper=IndentDumper, default_flow_style=False)
# The list item should be indented 2 spaces under the key
assert " - " in out, f"Expected 2-indent list, got:\n{out}"
def test_default_pyyaml_produces_0_indent_lists(self):
"""Default PyYAML (the buggy baseline) emits 0-indent lists."""
data = {
"custom_providers": [
{"name": "NVIDIA", "base_url": "https://api.nvidia.com"},
],
}
out = yaml.dump(data, default_flow_style=False)
# The list item should be at column 0 (no leading spaces)
lines = out.strip().split("\n")
list_lines = [l for l in lines if l.lstrip().startswith("- ")]
assert all(not l.startswith(" - ") for l in list_lines), \
f"Expected 0-indent list (buggy baseline), got:\n{out}"
def test_indent_dumper_matches_ruamel_layout(self):
"""IndentDumper output should match ruamel.yaml's list-under-mapping layout."""
data = {
"items": [
{"key": "value1"},
{"key": "value2"},
],
}
pyyaml_out = yaml.dump(data, Dumper=IndentDumper, default_flow_style=False)
# ruamel.yaml with indent(mapping=2, sequence=4, offset=2) produces:
# items:
# - key: value1
# - key: value2
# The key check: list items are NOT at column 0
lines = pyyaml_out.strip().split("\n")
list_lines = [l for l in lines if l.lstrip().startswith("- ")]
assert all(l.startswith(" - ") for l in list_lines), \
f"List items not 2-indent:\n{pyyaml_out}"
class TestAtomicYamlWriteUsesIndentDumper:
"""atomic_yaml_write must produce 2-indent lists via IndentDumper."""
def test_atomic_yaml_write_produces_2_indent_lists(self, tmp_path):
"""The file written by atomic_yaml_write must have 2-indent list items."""
data = {
"custom_providers": [
{"name": "Test", "base_url": "https://example.com"},
],
}
path = tmp_path / "config.yaml"
atomic_yaml_write(path, data)
content = path.read_text(encoding="utf-8")
assert " - " in content, \
f"Expected 2-indent list in file, got:\n{content}"
def test_atomic_yaml_write_preserves_unicode(self, tmp_path):
"""allow_unicode=True should write real UTF-8, not escape sequences."""
data = {"name": "Tëst Näme"}
path = tmp_path / "config.yaml"
atomic_yaml_write(path, data)
content = path.read_text(encoding="utf-8")
assert "Tëst Näme" in content
def test_atomic_yaml_write_is_atomic(self, tmp_path):
"""atomic_yaml_write should create the file and clean up temp files."""
data = {"key": "value"}
path = tmp_path / "config.yaml"
atomic_yaml_write(path, data)
assert path.exists()
assert path.read_text(encoding="utf-8").strip().endswith("value")
# No leftover temp files
temp_files = list(tmp_path.glob(".config_*.tmp"))
assert len(temp_files) == 0
class TestRoundtripConsistency:
"""Output of atomic_yaml_write should round-trip through ruamel.yaml."""
def test_pyyaml_output_loads_in_ruamel(self, tmp_path):
"""File written by atomic_yaml_write should load in ruamel.yaml without errors."""
data = {
"custom_providers": [
{"name": "Provider A", "base_url": "https://a.example.com"},
{"name": "Provider B", "base_url": "https://b.example.com"},
],
"fallback_providers": ["backup1", "backup2"],
}
path = tmp_path / "config.yaml"
atomic_yaml_write(path, data)
from ruamel.yaml import YAML
yaml_rt = YAML(typ="rt")
loaded = yaml_rt.load(path.read_text(encoding="utf-8"))
assert loaded["custom_providers"][0]["name"] == "Provider A"
assert loaded["fallback_providers"] == ["backup1", "backup2"]

View file

@ -1541,11 +1541,11 @@ def _apply_managed(cfg: dict) -> dict:
def _save_cfg(cfg: dict):
global _cfg_cache, _cfg_mtime, _cfg_path
import yaml
from utils import atomic_yaml_write
path = _hermes_home / "config.yaml"
with open(path, "w", encoding="utf-8") as f:
yaml.safe_dump(cfg, f, allow_unicode=True)
atomic_yaml_write(path, cfg)
with _cfg_lock:
_cfg_cache = copy.deepcopy(cfg)
_cfg_path = path

View file

@ -177,6 +177,22 @@ def atomic_json_write(
raise
class IndentDumper(yaml.SafeDumper):
"""PyYAML dumper that indents list items under mapping keys (2-space).
Default PyYAML emits "indentless" sequences list items start at the
same column as their parent mapping key. ``ruamel.yaml`` (used by
:func:`atomic_roundtrip_yaml_update`) emits 2-space-indented sequences.
Mixing both styles in the same ``config.yaml`` produces a file that
stricter parsers like ``js-yaml`` reject with ``bad indentation of a
mapping entry``. Forcing ``indentless=False`` aligns the two
serializers so all write paths emit byte-identical layouts (#31999).
"""
def increase_indent(self, flow=False, indentless=False): # noqa: ARG002
return super().increase_indent(flow, False)
def atomic_yaml_write(
path: Union[str, Path],
data: Any,
@ -221,6 +237,7 @@ def atomic_yaml_write(
yaml.dump(
data,
f,
Dumper=IndentDumper,
default_flow_style=default_flow_style,
sort_keys=sort_keys,
allow_unicode=True,