hermes-agent/tests/hermes_cli/test_atomic_yaml_write.py
Teknium d539cd9004
fix(config): write config.yaml as UTF-8 to stop emoji/personality corruption (#51676)
atomic_yaml_write (and two sibling config writers) called yaml.dump
without allow_unicode=True. The default personalities shipped in cli.py
contain emoji/kaomoji, so PyYAML escaped astral-plane chars as 8-digit
\\UXXXXXXXX sequences inside multi-line double-quoted strings wrapped
with \\ line-continuations. Stricter/non-PyYAML parsers, editors, and
hand-edits break that structure into unclosed quotes, failing the whole
config parse -> silent fallback to defaults -> custom_providers lost.

Add allow_unicode=True to the canonical writer plus tui_gateway/server.py
and the telegram adapter's atomic config write so config is written as
readable UTF-8 with no escape/fold artifacts.

Fixes #51356
2026-06-23 23:28:21 -07:00

76 lines
2.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Tests for utils.atomic_yaml_write — crash-safe YAML file writes."""
from unittest.mock import patch
import pytest
import yaml
from utils import atomic_yaml_write
class TestAtomicYamlWrite:
def test_writes_valid_yaml(self, tmp_path):
target = tmp_path / "data.yaml"
data = {"key": "value", "nested": {"a": 1}}
atomic_yaml_write(target, data)
assert yaml.safe_load(target.read_text(encoding="utf-8")) == data
def test_cleans_up_temp_file_on_baseexception(self, tmp_path):
class SimulatedAbort(BaseException):
pass
target = tmp_path / "data.yaml"
original = {"preserved": True}
target.write_text(yaml.safe_dump(original), encoding="utf-8")
with patch("utils.yaml.dump", side_effect=SimulatedAbort):
with pytest.raises(SimulatedAbort):
atomic_yaml_write(target, {"new": True})
tmp_files = [f for f in tmp_path.iterdir() if ".tmp" in f.name]
assert len(tmp_files) == 0
assert yaml.safe_load(target.read_text(encoding="utf-8")) == original
def test_appends_extra_content(self, tmp_path):
target = tmp_path / "data.yaml"
atomic_yaml_write(target, {"key": "value"}, extra_content="\n# comment\n")
text = target.read_text(encoding="utf-8")
assert "key: value" in text
assert "# comment" in text
def test_writes_unicode_unescaped_and_round_trips(self, tmp_path):
"""Emoji/kaomoji are written as real UTF-8, not fragile escape sequences.
Regression for GitHub #51356: without allow_unicode=True, PyYAML emitted
astral-plane chars (emoji) as 8-digit `\\UXXXXXXXX` escapes inside
multi-line double-quoted strings wrapped with `\\` continuations, which
stricter/non-PyYAML parsers and hand-edits broke into unclosed quotes,
corrupting the entire config.
"""
target = tmp_path / "config.yaml"
# Mirrors the default personalities + skin cursor shipped in cli.py.
data = {
"personalities": {
"kawaii": "kawaii desu~! (◕‿◕) ★ ♪ ヽ(>∀<☆)",
"catgirl": "nya~! (=^・ω・^=) ฅ^•ﻌ•^ฅ",
"surfer": "Cowabunga! 🤙 totally rad bro",
"hype": "LET'S GOOOO!!! 🔥 LEGENDARY!",
},
"display": {"cursor": ""},
}
atomic_yaml_write(target, data)
text = target.read_text(encoding="utf-8")
# No escape artifacts of any kind — real characters on disk.
assert "\\U" not in text
assert "\\u" not in text
# Real glyphs are present verbatim.
assert "🔥" in text
assert "(=^・ω・^=)" in text
# And it reloads to exactly what was written.
assert yaml.safe_load(text) == data