From 77276070f5a1302908456734f2a5bdfe790260de Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Fri, 15 May 2026 14:45:31 +0530 Subject: [PATCH] fix(codex-runtime): de-dup [plugins.X] tables and stop leaking HERMES_HOME into config.toml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Builds on @steezkelly's Bug A fix (#25857, top-level default_permissions via _insert_managed_block_at_top_level) by addressing the other two config-corruption bugs described in #26250: Bug B (duplicate [plugins.X] tables) - Codex itself writes [plugins."@"] tables to config.toml when the user runs `codex plugins enable` directly, before hermes-agent's managed block exists. On the next migrate run, _query_codex_plugins() re-discovers the same plugins via plugin/list and render_codex_toml_section() re-emits them inside the managed block. Codex's strict TOML parser then rejects the duplicate table header on startup. - Add _strip_unmanaged_plugin_tables() that drops [plugins.*] tables from the user-content portion of the file. Only run it when plugin/list succeeded — if the RPC failed we can't re-emit and must preserve the user's tables. plugin/list is the source of truth when it answers. Bug C (HERMES_HOME pytest-tempdir leak into ~/.codex/config.toml) - _build_hermes_tools_mcp_entry() read HERMES_HOME directly from os.environ, so a sibling pytest's monkeypatch.setenv("HERMES_HOME", tmp_path) silently burned a transient pytest tempdir into the user's real ~/.codex/config.toml. After pytest reaped the tempdir, every codex-routed hermes-tools tool call failed silently. - Derive HERMES_HOME from get_hermes_home() (the canonical resolver that goes through the profile-aware path) and refuse to emit obvious test-tempdir paths via _looks_like_test_tempdir() as belt-and-suspenders for any other callsite that forgets to patch migrate(). - test_enable_succeeds_when_codex_present in test_codex_runtime_switch.py invoked the real migrate() (no mock), writing to Path.home() / .codex using whatever HERMES_HOME the running pytest session had set. Add the same migrate patch the other apply() tests already use, so the suite stops touching the user's real ~/.codex/config.toml. E2E verification (replicating the issue's repro): - Pre-state config.toml with user [mcp_servers.omx_team_run] + codex-installed [plugins."tasks@openai-curated"], HERMES_HOME="/private/var/folders/.../pytest-of-.../..." - On origin/main: tomllib refuses to load the result with "Cannot declare ('plugins', 'tasks@openai-curated') twice" AND the pytest-tempdir HERMES_HOME is burned in. - On this branch: file parses cleanly, default_permissions is top-level, exactly one [plugins."tasks@openai-curated"] table inside the managed block, no HERMES_HOME in the MCP env. 7 new regression tests covering all three bugs + the test-leak guard. `bash scripts/run_tests.sh tests/hermes_cli/test_codex_runtime_*.py` — 95 passed, 0 failed. Closes #26250 --- hermes_cli/codex_runtime_plugin_migration.py | 125 ++++++++++- .../test_codex_runtime_plugin_migration.py | 207 ++++++++++++++++++ tests/hermes_cli/test_codex_runtime_switch.py | 9 +- 3 files changed, 337 insertions(+), 4 deletions(-) diff --git a/hermes_cli/codex_runtime_plugin_migration.py b/hermes_cli/codex_runtime_plugin_migration.py index 49b4905d5b2..4b30d3ebf26 100644 --- a/hermes_cli/codex_runtime_plugin_migration.py +++ b/hermes_cli/codex_runtime_plugin_migration.py @@ -335,6 +335,72 @@ def _insert_managed_block_at_top_level(user_text: str, managed_block: str) -> st return f"{managed_block}\n{suffix}" +def _strip_unmanaged_plugin_tables(toml_text: str) -> str: + """Remove ``[plugins."@"]`` tables that live OUTSIDE the + managed block. + + Codex itself writes these tables when the user runs ``codex plugins enable`` + directly (i.e. before Hermes' migrate has ever touched the file). When we + later run migrate, ``_query_codex_plugins()`` reports the same plugins via + the live ``plugin/list`` RPC and we re-emit them inside the managed block. + The result without this strip is duplicate ``[plugins."X@Y"]`` table + headers — codex's strict TOML parser then refuses to load the file. + + We own the ``[plugins.*]`` namespace once migrate has run, so dropping any + pre-existing ``[plugins.*]`` tables is safe: ``plugin/list`` is the source + of truth for what's actually installed. The caller is expected to only + invoke this strip when ``plugin/list`` succeeded — otherwise we'd lose + plugins the user installed via ``codex`` without a way to re-emit them. + + Behavior: + * Lines beginning with ``[plugins.`` start a swallow region that ends at + the next non-``[plugins.`` table header or end-of-file. + * Content inside the managed block is untouched (callers should run + ``_strip_existing_managed_block`` first so the managed block has + already been removed when this runs). + """ + lines = toml_text.splitlines(keepends=True) + out: list[str] = [] + in_plugin_table = False + for line in lines: + stripped = line.lstrip() + # Only treat a line as a table header when it has the shape + # ``[...]`` (optionally followed by a comment). Multi-line array + # continuations like ``["nested"],`` also start with ``[`` after + # lstrip but are not headers — without this guard they would + # falsely flip ``in_plugin_table`` to False mid-table and leak + # array fragments into the output. + if _looks_like_table_header(stripped): + in_plugin_table = stripped.startswith("[plugins.") + if in_plugin_table: + continue + if in_plugin_table: + # Swallow keys/comments/blanks until the next table header. + continue + out.append(line) + return "".join(out) + + +def _looks_like_table_header(stripped_line: str) -> bool: + """Return True if ``stripped_line`` is a TOML table header. + + A header has the shape ``[name]`` or ``[[name]]`` (array-of-tables), + optionally followed by a comment. The closing ``]`` (or ``]]``) must + appear on the same line, and no key-assignment ``=`` can precede it. + This distinguishes real headers from multi-line array continuation + lines that also start with ``[`` after ``lstrip()``. + """ + if not stripped_line.startswith("["): + return False + # Drop trailing comment so e.g. ``[features] # note`` still matches. + head = stripped_line.split("#", 1)[0].rstrip() + if not head.endswith("]"): + return False + # ``key = [x]`` would have an ``=`` before the bracket; a header doesn't. + bracket_idx = head.index("]") + return "=" not in head[: bracket_idx + 1] + + def _strip_existing_managed_block(toml_text: str) -> str: """Remove any prior managed section so re-runs idempotently replace it. @@ -462,6 +528,32 @@ def _query_codex_plugins( return out, None +def _looks_like_test_tempdir(path: str) -> bool: + """Heuristic: does ``path`` look like a pytest/transient tempdir? + + pytest tempdirs live under ``pytest-of-/pytest-/`` (created via + ``tmp_path`` / ``tmp_path_factory``) and are reaped between sessions. + macOS routes ``/tmp`` through ``/private/var/folders/<…>/T`` which is + what pytest's tempdir factory uses by default. If a HERMES_HOME pointing + at one of those paths is burned into ``~/.codex/config.toml``, every + codex-routed hermes-tools call fails silently once the directory is GC'd. + + We err on the side of refusing — losing a (very unlikely) real + ``~/.hermes`` symlink that happens to live under ``/private/var/folders`` + is much less harmful than silently bricking codex's tool surface. + """ + if not path: + return False + needles = ( + "pytest-of-", + "/pytest-", + "/tmp/pytest", + "/private/var/folders/", # macOS tempdir root + ) + normalized = path.lower() + return any(needle in normalized for needle in needles) + + def _build_hermes_tools_mcp_entry() -> dict: """Build the codex stdio-transport entry that launches Hermes' own tool surface as an MCP server. Codex's subprocess will call back into @@ -474,9 +566,22 @@ def _build_hermes_tools_mcp_entry() -> dict: import sys env: dict[str, str] = {} - # HERMES_HOME passes through if set so the MCP subprocess sees the - # same config / auth / sessions DB as the parent CLI. - hermes_home = os.environ.get("HERMES_HOME") + # HERMES_HOME passes through IF SET so the MCP subprocess sees the same + # config / auth / sessions DB as the parent CLI. Read from os.environ + # (not get_hermes_home()) on purpose: when the env var is unset we want + # codex's subprocess to inherit whatever HERMES_HOME its launcher sets + # at runtime (systemd unit, gateway, kanban dispatcher, custom shell), + # rather than burning the migrate-time resolved default into config.toml + # — that would override the launcher's HERMES_HOME and pin the subprocess + # to the wrong profile. + # + # The pytest-tempdir guard below catches the issue #26250 Bug C scenario: + # a sibling test's monkeypatch.setenv("HERMES_HOME", tmp_path) would + # otherwise leak a transient pytest tempdir into the user's real + # ~/.codex/config.toml and silently brick codex once the tempdir is GC'd. + hermes_home = os.environ.get("HERMES_HOME") or "" + if hermes_home and _looks_like_test_tempdir(hermes_home): + hermes_home = "" if hermes_home: env["HERMES_HOME"] = hermes_home # PYTHONPATH passes through so a worktree-launched hermes finds the @@ -564,10 +669,16 @@ def migrate( # Discover installed Codex curated plugins. Best-effort — never blocks # the migration if codex is unreachable or the RPC fails. plugins: list[dict] = [] + plugin_query_succeeded = False if discover_plugins and not dry_run: plugins, plugin_err = _query_codex_plugins(codex_home=codex_home) if plugin_err: report.plugin_query_error = plugin_err + else: + # plugin/list returned authoritatively (even if the list is empty). + # That means we own [plugins.*] for this re-render and can safely + # strip any pre-existing tables outside the managed block. + plugin_query_succeeded = True for p in plugins: report.migrated_plugins.append(f"{p['name']}@{p['marketplace']}") @@ -602,6 +713,14 @@ def migrate( report.errors.append(f"could not read {target}: {exc}") return report without_managed = _strip_existing_managed_block(existing) + # Bug B: when plugin/list ran authoritatively, codex's own + # [plugins."@"] tables outside our managed block + # would survive _strip_existing_managed_block and then collide with + # the entries we re-emit inside the managed block — producing + # duplicate-table-header parse errors on codex's next startup. Drop + # those pre-existing tables since plugin/list is the source of truth. + if plugin_query_succeeded: + without_managed = _strip_unmanaged_plugin_tables(without_managed) new_text = _insert_managed_block_at_top_level(without_managed, managed_block) else: new_text = managed_block diff --git a/tests/hermes_cli/test_codex_runtime_plugin_migration.py b/tests/hermes_cli/test_codex_runtime_plugin_migration.py index c283a668681..ebdc9f9ae6b 100644 --- a/tests/hermes_cli/test_codex_runtime_plugin_migration.py +++ b/tests/hermes_cli/test_codex_runtime_plugin_migration.py @@ -8,9 +8,13 @@ import pytest from hermes_cli.codex_runtime_plugin_migration import ( MIGRATION_MARKER, + MIGRATION_END_MARKER, MigrationReport, + _build_hermes_tools_mcp_entry, _format_toml_value, + _looks_like_test_tempdir, _strip_existing_managed_block, + _strip_unmanaged_plugin_tables, _translate_one_server, migrate, render_codex_toml_section, @@ -656,3 +660,206 @@ class TestMigrate: assert "Migrated 2 MCP server(s)" in summary assert "- a" in summary assert "- b" in summary + + +# ---- Bug B: duplicate [plugins.X] tables ---- + + +class TestStripUnmanagedPluginTables: + """Regression tests for issue #26250 Bug B. + + When codex itself writes ``[plugins."@"]`` tables + (via the user running ``codex plugins enable`` directly), re-running + ``hermes codex-runtime migrate`` would re-emit them inside the managed + block and the resulting duplicate-table-header would crash codex. + """ + + def test_strips_plugin_tables_outside_managed_block(self): + text = ( + 'model = "gpt-5.5"\n' + "\n" + "[mcp_servers.user-thing]\n" + 'command = "x"\n' + "\n" + '[plugins."tasks@openai-curated"]\n' + "enabled = true\n" + "\n" + '[plugins."web-search@openai-curated"]\n' + "enabled = true\n" + "\n" + "[features]\n" + "terminal_resize_reflow = true\n" + ) + stripped = _strip_unmanaged_plugin_tables(text) + assert "[plugins." not in stripped + # Non-plugin content preserved + assert "[mcp_servers.user-thing]" in stripped + assert "[features]" in stripped + assert "terminal_resize_reflow = true" in stripped + + def test_preserves_content_when_no_plugin_tables(self): + text = ( + 'model = "gpt-5.5"\n' + "\n" + "[mcp_servers.x]\n" + 'command = "y"\n' + ) + assert _strip_unmanaged_plugin_tables(text) == text + + def test_multi_line_array_in_plugin_table_does_not_leak(self): + """A multi-line TOML array inside a [plugins.X] table whose + continuation lines start with ``[`` (e.g. nested arrays) must NOT + prematurely exit the strip region — otherwise array fragments + leak into top-level output and produce invalid TOML on the next + codex startup. Regression guard for #26260 review. + """ + text = ( + '[plugins."tasks@openai-curated"]\n' + "allowed = [\n" + ' "a",\n' + ' ["nested"],\n' + "]\n" + "[features]\n" + "x = 1\n" + ) + stripped = _strip_unmanaged_plugin_tables(text) + # Everything inside the plugin table — including the multi-line + # array's continuation lines starting with `[` — should be gone. + assert '["nested"]' not in stripped + assert "allowed" not in stripped + # Sibling user table survives intact. + assert "[features]" in stripped + assert "x = 1" in stripped + # Result is still valid TOML. + import tomllib + tomllib.loads(stripped) + + def test_migrate_dedups_codex_owned_plugin_tables(self, tmp_path, monkeypatch): + """End-to-end: codex's pre-existing [plugins.X] tables get replaced by + the managed block's re-emission rather than duplicated.""" + target = tmp_path / "config.toml" + target.write_text( + "[mcp_servers.user-server]\n" + 'command = "x"\n' + "\n" + '[plugins."tasks@openai-curated"]\n' + "enabled = true\n" + ) + + # Simulate codex's plugin/list reporting the same plugin tasks@openai-curated. + def fake_query(codex_home=None, timeout=8.0): + return ( + [{"name": "tasks", "marketplace": "openai-curated", "enabled": True}], + None, + ) + + monkeypatch.setattr( + "hermes_cli.codex_runtime_plugin_migration._query_codex_plugins", + fake_query, + ) + migrate({}, codex_home=tmp_path, discover_plugins=True, expose_hermes_tools=False) + new_text = target.read_text() + # Only ONE [plugins."tasks@openai-curated"] header should remain — inside + # the managed block — not the original outside-the-block copy. + assert new_text.count('[plugins."tasks@openai-curated"]') == 1 + # And the surviving one is inside our managed section. + managed_start = new_text.index(MIGRATION_MARKER) + managed_end = new_text.index(MIGRATION_END_MARKER) + plugin_idx = new_text.index('[plugins."tasks@openai-curated"]') + assert managed_start < plugin_idx < managed_end + # File parses cleanly as TOML (the original duplicate-key error is gone). + import tomllib + tomllib.loads(new_text) + + def test_migrate_preserves_plugin_tables_when_plugin_list_fails(self, tmp_path, monkeypatch): + """If plugin/list RPC fails, we can't re-emit plugins authoritatively, + so we must NOT strip the user's existing [plugins.X] tables — that + would silently lose them.""" + target = tmp_path / "config.toml" + target.write_text( + '[plugins."tasks@openai-curated"]\n' + "enabled = true\n" + ) + + def fake_query(codex_home=None, timeout=8.0): + return ([], "plugin/list query failed: codex not installed") + + monkeypatch.setattr( + "hermes_cli.codex_runtime_plugin_migration._query_codex_plugins", + fake_query, + ) + migrate({}, codex_home=tmp_path, discover_plugins=True, expose_hermes_tools=False) + new_text = target.read_text() + # User's plugin table preserved verbatim — we can't re-emit it. + assert '[plugins."tasks@openai-curated"]' in new_text + + +# ---- Bug C: HERMES_HOME tempdir leak into ~/.codex/config.toml ---- + + +class TestHermesHomeLeakGuard: + """Regression tests for issue #26250 Bug C. + + Previously ``_build_hermes_tools_mcp_entry()`` read ``HERMES_HOME`` + directly from ``os.environ``, so a pytest ``monkeypatch.setenv`` would + leak a transient tempdir path into the user's real ``~/.codex/config.toml`` + once codex spawned the hermes-tools MCP subprocess. + """ + + def test_tempdir_detector_recognizes_pytest_paths(self): + assert _looks_like_test_tempdir( + "/private/var/folders/abc/pytest-of-kshitij/pytest-137/popen-gw2/test_X/hermes_test" + ) + assert _looks_like_test_tempdir( + "/tmp/pytest-of-user/pytest-12/test_X/hermes" + ) + assert _looks_like_test_tempdir( + "/private/var/folders/zz/T/pytest-of-bob/pytest-1" + ) + + def test_tempdir_detector_accepts_real_hermes_home(self): + assert not _looks_like_test_tempdir("/Users/alice/.hermes") + assert not _looks_like_test_tempdir("/home/bob/.hermes") + assert not _looks_like_test_tempdir("/opt/hermes") + assert not _looks_like_test_tempdir("") + + def test_pytest_tempdir_not_burned_into_mcp_env(self, monkeypatch): + """The headline regression: even when HERMES_HOME points at a pytest + tempdir, _build_hermes_tools_mcp_entry() must NOT propagate it.""" + monkeypatch.setenv( + "HERMES_HOME", + "/private/var/folders/xx/pytest-of-user/pytest-99/test_x/hermes_test", + ) + entry = _build_hermes_tools_mcp_entry() + env = entry.get("env", {}) + assert "HERMES_HOME" not in env, ( + f"pytest-tempdir HERMES_HOME leaked into codex MCP entry: " + f"{env.get('HERMES_HOME')!r}" + ) + + def test_real_hermes_home_propagates(self, monkeypatch, tmp_path): + """A legitimate HERMES_HOME (not a tempdir path) DOES propagate so the + MCP subprocess sees the same config as the parent CLI.""" + # Use a path that looks real — under /Users or /home, not /var/folders. + # We can't easily create one in the test, so just use a stable path + # outside any tempdir-detector needle. The detector checks for tempdir + # markers, not for path existence. + real_path = "/Users/alice/.hermes" + monkeypatch.setenv("HERMES_HOME", real_path) + entry = _build_hermes_tools_mcp_entry() + env = entry.get("env", {}) + assert env.get("HERMES_HOME") == real_path + + def test_unset_hermes_home_omits_env_key(self, monkeypatch): + """When HERMES_HOME is unset in the environment, the MCP entry MUST + NOT bake in a resolved-default path. The codex subprocess should + inherit whatever HERMES_HOME its launcher (systemd, gateway, shell) + sets at runtime, rather than being pinned to migrate-time defaults. + Regression guard for issue #26250 follow-up review.""" + monkeypatch.delenv("HERMES_HOME", raising=False) + entry = _build_hermes_tools_mcp_entry() + env = entry.get("env", {}) + assert "HERMES_HOME" not in env, ( + f"HERMES_HOME should not be set when env var is unset, got: " + f"{env.get('HERMES_HOME')!r}" + ) diff --git a/tests/hermes_cli/test_codex_runtime_switch.py b/tests/hermes_cli/test_codex_runtime_switch.py index 9a01543776e..7bf1a59e1e7 100644 --- a/tests/hermes_cli/test_codex_runtime_switch.py +++ b/tests/hermes_cli/test_codex_runtime_switch.py @@ -114,8 +114,15 @@ class TestApply: def persist(c): persisted.update(c) + # Patch migrate so this test doesn't reach into the user's real + # ~/.codex/config.toml. See issue #26250 Bug C — without this patch, + # crs.apply() invokes the real migrate() which writes to + # Path.home() / ".codex" using whatever HERMES_HOME the running pytest + # session has set, leaking pytest tempdir paths into the user's + # codex config. with patch.object(crs, "check_codex_binary_ok", - return_value=(True, "0.130.0")): + return_value=(True, "0.130.0")), \ + patch("hermes_cli.codex_runtime_plugin_migration.migrate"): r = crs.apply(cfg, "codex_app_server", persist_callback=persist) assert r.success assert r.new_value == "codex_app_server"