diff --git a/plugins/platforms/telegram/adapter.py b/plugins/platforms/telegram/adapter.py index b4458d0d7d5..b2f69647d64 100644 --- a/plugins/platforms/telegram/adapter.py +++ b/plugins/platforms/telegram/adapter.py @@ -2030,7 +2030,13 @@ class TelegramAdapter(BasePlatformAdapter): ) try: with os.fdopen(fd, "w", encoding="utf-8") as f: - _yaml.dump(config, f, default_flow_style=False, sort_keys=False) + _yaml.dump( + config, + f, + default_flow_style=False, + sort_keys=False, + allow_unicode=True, + ) f.flush() os.fsync(f.fileno()) atomic_replace(tmp_path, config_path) diff --git a/tests/hermes_cli/test_atomic_yaml_write.py b/tests/hermes_cli/test_atomic_yaml_write.py index c76649fce66..aacfeb9225c 100644 --- a/tests/hermes_cli/test_atomic_yaml_write.py +++ b/tests/hermes_cli/test_atomic_yaml_write.py @@ -41,3 +41,36 @@ class TestAtomicYamlWrite: text = target.read_text(encoding="utf-8") assert "key: value" in text assert "# comment" in text + + def test_writes_unicode_unescaped_and_round_trips(self, tmp_path): + """Emoji/kaomoji are written as real UTF-8, not fragile escape sequences. + + Regression for GitHub #51356: without allow_unicode=True, PyYAML emitted + astral-plane chars (emoji) as 8-digit `\\UXXXXXXXX` escapes inside + multi-line double-quoted strings wrapped with `\\` continuations, which + stricter/non-PyYAML parsers and hand-edits broke into unclosed quotes, + corrupting the entire config. + """ + target = tmp_path / "config.yaml" + # Mirrors the default personalities + skin cursor shipped in cli.py. + data = { + "personalities": { + "kawaii": "kawaii desu~! (◕‿◕) ★ ♪ ヽ(>∀<☆)ノ", + "catgirl": "nya~! (=^・ω・^=) ฅ^•ﻌ•^ฅ", + "surfer": "Cowabunga! 🤙 totally rad bro", + "hype": "LET'S GOOOO!!! 🔥 LEGENDARY!", + }, + "display": {"cursor": " ▉"}, + } + + atomic_yaml_write(target, data) + + text = target.read_text(encoding="utf-8") + # No escape artifacts of any kind — real characters on disk. + assert "\\U" not in text + assert "\\u" not in text + # Real glyphs are present verbatim. + assert "🔥" in text + assert "(=^・ω・^=)" in text + # And it reloads to exactly what was written. + assert yaml.safe_load(text) == data diff --git a/tui_gateway/server.py b/tui_gateway/server.py index f97bd0110d2..93ad41a2dd4 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -1541,7 +1541,7 @@ def _save_cfg(cfg: dict): path = _hermes_home / "config.yaml" with open(path, "w", encoding="utf-8") as f: - yaml.safe_dump(cfg, f) + yaml.safe_dump(cfg, f, allow_unicode=True) with _cfg_lock: _cfg_cache = copy.deepcopy(cfg) _cfg_path = path diff --git a/utils.py b/utils.py index 5e1b964debc..6ae842b4b5c 100644 --- a/utils.py +++ b/utils.py @@ -211,7 +211,20 @@ def atomic_yaml_write( ) try: with os.fdopen(fd, "w", encoding="utf-8") as f: - yaml.dump(data, f, default_flow_style=default_flow_style, sort_keys=sort_keys) + # allow_unicode=True writes emoji/kaomoji (e.g. personalities, skin + # cursors) as real UTF-8 instead of fragile escape sequences. Without + # it, PyYAML emits astral-plane chars as `\UXXXXXXXX` (8-digit) escapes + # inside multi-line double-quoted strings wrapped with `\` + # continuations — a structure that stricter/non-PyYAML parsers and + # hand-edits routinely break into unclosed quotes, corrupting the whole + # config (GitHub #51356). + yaml.dump( + data, + f, + default_flow_style=default_flow_style, + sort_keys=sort_keys, + allow_unicode=True, + ) if extra_content: f.write(extra_content) f.flush()