fix(config): write config.yaml as UTF-8 to stop emoji/personality corruption (#51676)

atomic_yaml_write (and two sibling config writers) called yaml.dump
without allow_unicode=True. The default personalities shipped in cli.py
contain emoji/kaomoji, so PyYAML escaped astral-plane chars as 8-digit
\\UXXXXXXXX sequences inside multi-line double-quoted strings wrapped
with \\ line-continuations. Stricter/non-PyYAML parsers, editors, and
hand-edits break that structure into unclosed quotes, failing the whole
config parse -> silent fallback to defaults -> custom_providers lost.

Add allow_unicode=True to the canonical writer plus tui_gateway/server.py
and the telegram adapter's atomic config write so config is written as
readable UTF-8 with no escape/fold artifacts.

Fixes #51356
This commit is contained in:
Teknium 2026-06-23 23:28:21 -07:00 committed by GitHub
parent 8e7e104521
commit d539cd9004
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 55 additions and 3 deletions

View file

@ -2030,7 +2030,13 @@ class TelegramAdapter(BasePlatformAdapter):
)
try:
with os.fdopen(fd, "w", encoding="utf-8") as f:
_yaml.dump(config, f, default_flow_style=False, sort_keys=False)
_yaml.dump(
config,
f,
default_flow_style=False,
sort_keys=False,
allow_unicode=True,
)
f.flush()
os.fsync(f.fileno())
atomic_replace(tmp_path, config_path)

View file

@ -41,3 +41,36 @@ class TestAtomicYamlWrite:
text = target.read_text(encoding="utf-8")
assert "key: value" in text
assert "# comment" in text
def test_writes_unicode_unescaped_and_round_trips(self, tmp_path):
"""Emoji/kaomoji are written as real UTF-8, not fragile escape sequences.
Regression for GitHub #51356: without allow_unicode=True, PyYAML emitted
astral-plane chars (emoji) as 8-digit `\\UXXXXXXXX` escapes inside
multi-line double-quoted strings wrapped with `\\` continuations, which
stricter/non-PyYAML parsers and hand-edits broke into unclosed quotes,
corrupting the entire config.
"""
target = tmp_path / "config.yaml"
# Mirrors the default personalities + skin cursor shipped in cli.py.
data = {
"personalities": {
"kawaii": "kawaii desu~! (◕‿◕) ★ ♪ ヽ(>∀<☆)",
"catgirl": "nya~! (=^・ω・^=) ฅ^•ﻌ•^ฅ",
"surfer": "Cowabunga! 🤙 totally rad bro",
"hype": "LET'S GOOOO!!! 🔥 LEGENDARY!",
},
"display": {"cursor": ""},
}
atomic_yaml_write(target, data)
text = target.read_text(encoding="utf-8")
# No escape artifacts of any kind — real characters on disk.
assert "\\U" not in text
assert "\\u" not in text
# Real glyphs are present verbatim.
assert "🔥" in text
assert "(=^・ω・^=)" in text
# And it reloads to exactly what was written.
assert yaml.safe_load(text) == data

View file

@ -1541,7 +1541,7 @@ def _save_cfg(cfg: dict):
path = _hermes_home / "config.yaml"
with open(path, "w", encoding="utf-8") as f:
yaml.safe_dump(cfg, f)
yaml.safe_dump(cfg, f, allow_unicode=True)
with _cfg_lock:
_cfg_cache = copy.deepcopy(cfg)
_cfg_path = path

View file

@ -211,7 +211,20 @@ def atomic_yaml_write(
)
try:
with os.fdopen(fd, "w", encoding="utf-8") as f:
yaml.dump(data, f, default_flow_style=default_flow_style, sort_keys=sort_keys)
# allow_unicode=True writes emoji/kaomoji (e.g. personalities, skin
# cursors) as real UTF-8 instead of fragile escape sequences. Without
# it, PyYAML emits astral-plane chars as `\UXXXXXXXX` (8-digit) escapes
# inside multi-line double-quoted strings wrapped with `\`
# continuations — a structure that stricter/non-PyYAML parsers and
# hand-edits routinely break into unclosed quotes, corrupting the whole
# config (GitHub #51356).
yaml.dump(
data,
f,
default_flow_style=default_flow_style,
sort_keys=sort_keys,
allow_unicode=True,
)
if extra_content:
f.write(extra_content)
f.flush()