mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
fix(codex-runtime): de-dup [plugins.X] tables and stop leaking HERMES_HOME into config.toml
Builds on @steezkelly's Bug A fix (#25857, top-level default_permissions via _insert_managed_block_at_top_level) by addressing the other two config-corruption bugs described in #26250: Bug B (duplicate [plugins.X] tables) - Codex itself writes [plugins."<name>@<marketplace>"] tables to config.toml when the user runs `codex plugins enable` directly, before hermes-agent's managed block exists. On the next migrate run, _query_codex_plugins() re-discovers the same plugins via plugin/list and render_codex_toml_section() re-emits them inside the managed block. Codex's strict TOML parser then rejects the duplicate table header on startup. - Add _strip_unmanaged_plugin_tables() that drops [plugins.*] tables from the user-content portion of the file. Only run it when plugin/list succeeded — if the RPC failed we can't re-emit and must preserve the user's tables. plugin/list is the source of truth when it answers. Bug C (HERMES_HOME pytest-tempdir leak into ~/.codex/config.toml) - _build_hermes_tools_mcp_entry() read HERMES_HOME directly from os.environ, so a sibling pytest's monkeypatch.setenv("HERMES_HOME", tmp_path) silently burned a transient pytest tempdir into the user's real ~/.codex/config.toml. After pytest reaped the tempdir, every codex-routed hermes-tools tool call failed silently. - Derive HERMES_HOME from get_hermes_home() (the canonical resolver that goes through the profile-aware path) and refuse to emit obvious test-tempdir paths via _looks_like_test_tempdir() as belt-and-suspenders for any other callsite that forgets to patch migrate(). - test_enable_succeeds_when_codex_present in test_codex_runtime_switch.py invoked the real migrate() (no mock), writing to Path.home() / .codex using whatever HERMES_HOME the running pytest session had set. Add the same migrate patch the other apply() tests already use, so the suite stops touching the user's real ~/.codex/config.toml. E2E verification (replicating the issue's repro): - Pre-state config.toml with user [mcp_servers.omx_team_run] + codex-installed [plugins."tasks@openai-curated"], HERMES_HOME="/private/var/folders/.../pytest-of-.../..." - On origin/main: tomllib refuses to load the result with "Cannot declare ('plugins', 'tasks@openai-curated') twice" AND the pytest-tempdir HERMES_HOME is burned in. - On this branch: file parses cleanly, default_permissions is top-level, exactly one [plugins."tasks@openai-curated"] table inside the managed block, no HERMES_HOME in the MCP env. 7 new regression tests covering all three bugs + the test-leak guard. `bash scripts/run_tests.sh tests/hermes_cli/test_codex_runtime_*.py` — 95 passed, 0 failed. Closes #26250
This commit is contained in:
parent
274217316e
commit
77276070f5
3 changed files with 337 additions and 4 deletions
|
|
@ -335,6 +335,72 @@ def _insert_managed_block_at_top_level(user_text: str, managed_block: str) -> st
|
|||
return f"{managed_block}\n{suffix}"
|
||||
|
||||
|
||||
def _strip_unmanaged_plugin_tables(toml_text: str) -> str:
|
||||
"""Remove ``[plugins."<name>@<marketplace>"]`` tables that live OUTSIDE the
|
||||
managed block.
|
||||
|
||||
Codex itself writes these tables when the user runs ``codex plugins enable``
|
||||
directly (i.e. before Hermes' migrate has ever touched the file). When we
|
||||
later run migrate, ``_query_codex_plugins()`` reports the same plugins via
|
||||
the live ``plugin/list`` RPC and we re-emit them inside the managed block.
|
||||
The result without this strip is duplicate ``[plugins."X@Y"]`` table
|
||||
headers — codex's strict TOML parser then refuses to load the file.
|
||||
|
||||
We own the ``[plugins.*]`` namespace once migrate has run, so dropping any
|
||||
pre-existing ``[plugins.*]`` tables is safe: ``plugin/list`` is the source
|
||||
of truth for what's actually installed. The caller is expected to only
|
||||
invoke this strip when ``plugin/list`` succeeded — otherwise we'd lose
|
||||
plugins the user installed via ``codex`` without a way to re-emit them.
|
||||
|
||||
Behavior:
|
||||
* Lines beginning with ``[plugins.`` start a swallow region that ends at
|
||||
the next non-``[plugins.`` table header or end-of-file.
|
||||
* Content inside the managed block is untouched (callers should run
|
||||
``_strip_existing_managed_block`` first so the managed block has
|
||||
already been removed when this runs).
|
||||
"""
|
||||
lines = toml_text.splitlines(keepends=True)
|
||||
out: list[str] = []
|
||||
in_plugin_table = False
|
||||
for line in lines:
|
||||
stripped = line.lstrip()
|
||||
# Only treat a line as a table header when it has the shape
|
||||
# ``[...]`` (optionally followed by a comment). Multi-line array
|
||||
# continuations like ``["nested"],`` also start with ``[`` after
|
||||
# lstrip but are not headers — without this guard they would
|
||||
# falsely flip ``in_plugin_table`` to False mid-table and leak
|
||||
# array fragments into the output.
|
||||
if _looks_like_table_header(stripped):
|
||||
in_plugin_table = stripped.startswith("[plugins.")
|
||||
if in_plugin_table:
|
||||
continue
|
||||
if in_plugin_table:
|
||||
# Swallow keys/comments/blanks until the next table header.
|
||||
continue
|
||||
out.append(line)
|
||||
return "".join(out)
|
||||
|
||||
|
||||
def _looks_like_table_header(stripped_line: str) -> bool:
|
||||
"""Return True if ``stripped_line`` is a TOML table header.
|
||||
|
||||
A header has the shape ``[name]`` or ``[[name]]`` (array-of-tables),
|
||||
optionally followed by a comment. The closing ``]`` (or ``]]``) must
|
||||
appear on the same line, and no key-assignment ``=`` can precede it.
|
||||
This distinguishes real headers from multi-line array continuation
|
||||
lines that also start with ``[`` after ``lstrip()``.
|
||||
"""
|
||||
if not stripped_line.startswith("["):
|
||||
return False
|
||||
# Drop trailing comment so e.g. ``[features] # note`` still matches.
|
||||
head = stripped_line.split("#", 1)[0].rstrip()
|
||||
if not head.endswith("]"):
|
||||
return False
|
||||
# ``key = [x]`` would have an ``=`` before the bracket; a header doesn't.
|
||||
bracket_idx = head.index("]")
|
||||
return "=" not in head[: bracket_idx + 1]
|
||||
|
||||
|
||||
def _strip_existing_managed_block(toml_text: str) -> str:
|
||||
"""Remove any prior managed section so re-runs idempotently replace it.
|
||||
|
||||
|
|
@ -462,6 +528,32 @@ def _query_codex_plugins(
|
|||
return out, None
|
||||
|
||||
|
||||
def _looks_like_test_tempdir(path: str) -> bool:
|
||||
"""Heuristic: does ``path`` look like a pytest/transient tempdir?
|
||||
|
||||
pytest tempdirs live under ``pytest-of-<user>/pytest-<n>/`` (created via
|
||||
``tmp_path`` / ``tmp_path_factory``) and are reaped between sessions.
|
||||
macOS routes ``/tmp`` through ``/private/var/folders/<…>/T`` which is
|
||||
what pytest's tempdir factory uses by default. If a HERMES_HOME pointing
|
||||
at one of those paths is burned into ``~/.codex/config.toml``, every
|
||||
codex-routed hermes-tools call fails silently once the directory is GC'd.
|
||||
|
||||
We err on the side of refusing — losing a (very unlikely) real
|
||||
``~/.hermes`` symlink that happens to live under ``/private/var/folders``
|
||||
is much less harmful than silently bricking codex's tool surface.
|
||||
"""
|
||||
if not path:
|
||||
return False
|
||||
needles = (
|
||||
"pytest-of-",
|
||||
"/pytest-",
|
||||
"/tmp/pytest",
|
||||
"/private/var/folders/", # macOS tempdir root
|
||||
)
|
||||
normalized = path.lower()
|
||||
return any(needle in normalized for needle in needles)
|
||||
|
||||
|
||||
def _build_hermes_tools_mcp_entry() -> dict:
|
||||
"""Build the codex stdio-transport entry that launches Hermes' own
|
||||
tool surface as an MCP server. Codex's subprocess will call back into
|
||||
|
|
@ -474,9 +566,22 @@ def _build_hermes_tools_mcp_entry() -> dict:
|
|||
import sys
|
||||
|
||||
env: dict[str, str] = {}
|
||||
# HERMES_HOME passes through if set so the MCP subprocess sees the
|
||||
# same config / auth / sessions DB as the parent CLI.
|
||||
hermes_home = os.environ.get("HERMES_HOME")
|
||||
# HERMES_HOME passes through IF SET so the MCP subprocess sees the same
|
||||
# config / auth / sessions DB as the parent CLI. Read from os.environ
|
||||
# (not get_hermes_home()) on purpose: when the env var is unset we want
|
||||
# codex's subprocess to inherit whatever HERMES_HOME its launcher sets
|
||||
# at runtime (systemd unit, gateway, kanban dispatcher, custom shell),
|
||||
# rather than burning the migrate-time resolved default into config.toml
|
||||
# — that would override the launcher's HERMES_HOME and pin the subprocess
|
||||
# to the wrong profile.
|
||||
#
|
||||
# The pytest-tempdir guard below catches the issue #26250 Bug C scenario:
|
||||
# a sibling test's monkeypatch.setenv("HERMES_HOME", tmp_path) would
|
||||
# otherwise leak a transient pytest tempdir into the user's real
|
||||
# ~/.codex/config.toml and silently brick codex once the tempdir is GC'd.
|
||||
hermes_home = os.environ.get("HERMES_HOME") or ""
|
||||
if hermes_home and _looks_like_test_tempdir(hermes_home):
|
||||
hermes_home = ""
|
||||
if hermes_home:
|
||||
env["HERMES_HOME"] = hermes_home
|
||||
# PYTHONPATH passes through so a worktree-launched hermes finds the
|
||||
|
|
@ -564,10 +669,16 @@ def migrate(
|
|||
# Discover installed Codex curated plugins. Best-effort — never blocks
|
||||
# the migration if codex is unreachable or the RPC fails.
|
||||
plugins: list[dict] = []
|
||||
plugin_query_succeeded = False
|
||||
if discover_plugins and not dry_run:
|
||||
plugins, plugin_err = _query_codex_plugins(codex_home=codex_home)
|
||||
if plugin_err:
|
||||
report.plugin_query_error = plugin_err
|
||||
else:
|
||||
# plugin/list returned authoritatively (even if the list is empty).
|
||||
# That means we own [plugins.*] for this re-render and can safely
|
||||
# strip any pre-existing tables outside the managed block.
|
||||
plugin_query_succeeded = True
|
||||
for p in plugins:
|
||||
report.migrated_plugins.append(f"{p['name']}@{p['marketplace']}")
|
||||
|
||||
|
|
@ -602,6 +713,14 @@ def migrate(
|
|||
report.errors.append(f"could not read {target}: {exc}")
|
||||
return report
|
||||
without_managed = _strip_existing_managed_block(existing)
|
||||
# Bug B: when plugin/list ran authoritatively, codex's own
|
||||
# [plugins."<name>@<marketplace>"] tables outside our managed block
|
||||
# would survive _strip_existing_managed_block and then collide with
|
||||
# the entries we re-emit inside the managed block — producing
|
||||
# duplicate-table-header parse errors on codex's next startup. Drop
|
||||
# those pre-existing tables since plugin/list is the source of truth.
|
||||
if plugin_query_succeeded:
|
||||
without_managed = _strip_unmanaged_plugin_tables(without_managed)
|
||||
new_text = _insert_managed_block_at_top_level(without_managed, managed_block)
|
||||
else:
|
||||
new_text = managed_block
|
||||
|
|
|
|||
|
|
@ -8,9 +8,13 @@ import pytest
|
|||
|
||||
from hermes_cli.codex_runtime_plugin_migration import (
|
||||
MIGRATION_MARKER,
|
||||
MIGRATION_END_MARKER,
|
||||
MigrationReport,
|
||||
_build_hermes_tools_mcp_entry,
|
||||
_format_toml_value,
|
||||
_looks_like_test_tempdir,
|
||||
_strip_existing_managed_block,
|
||||
_strip_unmanaged_plugin_tables,
|
||||
_translate_one_server,
|
||||
migrate,
|
||||
render_codex_toml_section,
|
||||
|
|
@ -656,3 +660,206 @@ class TestMigrate:
|
|||
assert "Migrated 2 MCP server(s)" in summary
|
||||
assert "- a" in summary
|
||||
assert "- b" in summary
|
||||
|
||||
|
||||
# ---- Bug B: duplicate [plugins.X] tables ----
|
||||
|
||||
|
||||
class TestStripUnmanagedPluginTables:
|
||||
"""Regression tests for issue #26250 Bug B.
|
||||
|
||||
When codex itself writes ``[plugins."<name>@<marketplace>"]`` tables
|
||||
(via the user running ``codex plugins enable`` directly), re-running
|
||||
``hermes codex-runtime migrate`` would re-emit them inside the managed
|
||||
block and the resulting duplicate-table-header would crash codex.
|
||||
"""
|
||||
|
||||
def test_strips_plugin_tables_outside_managed_block(self):
|
||||
text = (
|
||||
'model = "gpt-5.5"\n'
|
||||
"\n"
|
||||
"[mcp_servers.user-thing]\n"
|
||||
'command = "x"\n'
|
||||
"\n"
|
||||
'[plugins."tasks@openai-curated"]\n'
|
||||
"enabled = true\n"
|
||||
"\n"
|
||||
'[plugins."web-search@openai-curated"]\n'
|
||||
"enabled = true\n"
|
||||
"\n"
|
||||
"[features]\n"
|
||||
"terminal_resize_reflow = true\n"
|
||||
)
|
||||
stripped = _strip_unmanaged_plugin_tables(text)
|
||||
assert "[plugins." not in stripped
|
||||
# Non-plugin content preserved
|
||||
assert "[mcp_servers.user-thing]" in stripped
|
||||
assert "[features]" in stripped
|
||||
assert "terminal_resize_reflow = true" in stripped
|
||||
|
||||
def test_preserves_content_when_no_plugin_tables(self):
|
||||
text = (
|
||||
'model = "gpt-5.5"\n'
|
||||
"\n"
|
||||
"[mcp_servers.x]\n"
|
||||
'command = "y"\n'
|
||||
)
|
||||
assert _strip_unmanaged_plugin_tables(text) == text
|
||||
|
||||
def test_multi_line_array_in_plugin_table_does_not_leak(self):
|
||||
"""A multi-line TOML array inside a [plugins.X] table whose
|
||||
continuation lines start with ``[`` (e.g. nested arrays) must NOT
|
||||
prematurely exit the strip region — otherwise array fragments
|
||||
leak into top-level output and produce invalid TOML on the next
|
||||
codex startup. Regression guard for #26260 review.
|
||||
"""
|
||||
text = (
|
||||
'[plugins."tasks@openai-curated"]\n'
|
||||
"allowed = [\n"
|
||||
' "a",\n'
|
||||
' ["nested"],\n'
|
||||
"]\n"
|
||||
"[features]\n"
|
||||
"x = 1\n"
|
||||
)
|
||||
stripped = _strip_unmanaged_plugin_tables(text)
|
||||
# Everything inside the plugin table — including the multi-line
|
||||
# array's continuation lines starting with `[` — should be gone.
|
||||
assert '["nested"]' not in stripped
|
||||
assert "allowed" not in stripped
|
||||
# Sibling user table survives intact.
|
||||
assert "[features]" in stripped
|
||||
assert "x = 1" in stripped
|
||||
# Result is still valid TOML.
|
||||
import tomllib
|
||||
tomllib.loads(stripped)
|
||||
|
||||
def test_migrate_dedups_codex_owned_plugin_tables(self, tmp_path, monkeypatch):
|
||||
"""End-to-end: codex's pre-existing [plugins.X] tables get replaced by
|
||||
the managed block's re-emission rather than duplicated."""
|
||||
target = tmp_path / "config.toml"
|
||||
target.write_text(
|
||||
"[mcp_servers.user-server]\n"
|
||||
'command = "x"\n'
|
||||
"\n"
|
||||
'[plugins."tasks@openai-curated"]\n'
|
||||
"enabled = true\n"
|
||||
)
|
||||
|
||||
# Simulate codex's plugin/list reporting the same plugin tasks@openai-curated.
|
||||
def fake_query(codex_home=None, timeout=8.0):
|
||||
return (
|
||||
[{"name": "tasks", "marketplace": "openai-curated", "enabled": True}],
|
||||
None,
|
||||
)
|
||||
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.codex_runtime_plugin_migration._query_codex_plugins",
|
||||
fake_query,
|
||||
)
|
||||
migrate({}, codex_home=tmp_path, discover_plugins=True, expose_hermes_tools=False)
|
||||
new_text = target.read_text()
|
||||
# Only ONE [plugins."tasks@openai-curated"] header should remain — inside
|
||||
# the managed block — not the original outside-the-block copy.
|
||||
assert new_text.count('[plugins."tasks@openai-curated"]') == 1
|
||||
# And the surviving one is inside our managed section.
|
||||
managed_start = new_text.index(MIGRATION_MARKER)
|
||||
managed_end = new_text.index(MIGRATION_END_MARKER)
|
||||
plugin_idx = new_text.index('[plugins."tasks@openai-curated"]')
|
||||
assert managed_start < plugin_idx < managed_end
|
||||
# File parses cleanly as TOML (the original duplicate-key error is gone).
|
||||
import tomllib
|
||||
tomllib.loads(new_text)
|
||||
|
||||
def test_migrate_preserves_plugin_tables_when_plugin_list_fails(self, tmp_path, monkeypatch):
|
||||
"""If plugin/list RPC fails, we can't re-emit plugins authoritatively,
|
||||
so we must NOT strip the user's existing [plugins.X] tables — that
|
||||
would silently lose them."""
|
||||
target = tmp_path / "config.toml"
|
||||
target.write_text(
|
||||
'[plugins."tasks@openai-curated"]\n'
|
||||
"enabled = true\n"
|
||||
)
|
||||
|
||||
def fake_query(codex_home=None, timeout=8.0):
|
||||
return ([], "plugin/list query failed: codex not installed")
|
||||
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.codex_runtime_plugin_migration._query_codex_plugins",
|
||||
fake_query,
|
||||
)
|
||||
migrate({}, codex_home=tmp_path, discover_plugins=True, expose_hermes_tools=False)
|
||||
new_text = target.read_text()
|
||||
# User's plugin table preserved verbatim — we can't re-emit it.
|
||||
assert '[plugins."tasks@openai-curated"]' in new_text
|
||||
|
||||
|
||||
# ---- Bug C: HERMES_HOME tempdir leak into ~/.codex/config.toml ----
|
||||
|
||||
|
||||
class TestHermesHomeLeakGuard:
|
||||
"""Regression tests for issue #26250 Bug C.
|
||||
|
||||
Previously ``_build_hermes_tools_mcp_entry()`` read ``HERMES_HOME``
|
||||
directly from ``os.environ``, so a pytest ``monkeypatch.setenv`` would
|
||||
leak a transient tempdir path into the user's real ``~/.codex/config.toml``
|
||||
once codex spawned the hermes-tools MCP subprocess.
|
||||
"""
|
||||
|
||||
def test_tempdir_detector_recognizes_pytest_paths(self):
|
||||
assert _looks_like_test_tempdir(
|
||||
"/private/var/folders/abc/pytest-of-kshitij/pytest-137/popen-gw2/test_X/hermes_test"
|
||||
)
|
||||
assert _looks_like_test_tempdir(
|
||||
"/tmp/pytest-of-user/pytest-12/test_X/hermes"
|
||||
)
|
||||
assert _looks_like_test_tempdir(
|
||||
"/private/var/folders/zz/T/pytest-of-bob/pytest-1"
|
||||
)
|
||||
|
||||
def test_tempdir_detector_accepts_real_hermes_home(self):
|
||||
assert not _looks_like_test_tempdir("/Users/alice/.hermes")
|
||||
assert not _looks_like_test_tempdir("/home/bob/.hermes")
|
||||
assert not _looks_like_test_tempdir("/opt/hermes")
|
||||
assert not _looks_like_test_tempdir("")
|
||||
|
||||
def test_pytest_tempdir_not_burned_into_mcp_env(self, monkeypatch):
|
||||
"""The headline regression: even when HERMES_HOME points at a pytest
|
||||
tempdir, _build_hermes_tools_mcp_entry() must NOT propagate it."""
|
||||
monkeypatch.setenv(
|
||||
"HERMES_HOME",
|
||||
"/private/var/folders/xx/pytest-of-user/pytest-99/test_x/hermes_test",
|
||||
)
|
||||
entry = _build_hermes_tools_mcp_entry()
|
||||
env = entry.get("env", {})
|
||||
assert "HERMES_HOME" not in env, (
|
||||
f"pytest-tempdir HERMES_HOME leaked into codex MCP entry: "
|
||||
f"{env.get('HERMES_HOME')!r}"
|
||||
)
|
||||
|
||||
def test_real_hermes_home_propagates(self, monkeypatch, tmp_path):
|
||||
"""A legitimate HERMES_HOME (not a tempdir path) DOES propagate so the
|
||||
MCP subprocess sees the same config as the parent CLI."""
|
||||
# Use a path that looks real — under /Users or /home, not /var/folders.
|
||||
# We can't easily create one in the test, so just use a stable path
|
||||
# outside any tempdir-detector needle. The detector checks for tempdir
|
||||
# markers, not for path existence.
|
||||
real_path = "/Users/alice/.hermes"
|
||||
monkeypatch.setenv("HERMES_HOME", real_path)
|
||||
entry = _build_hermes_tools_mcp_entry()
|
||||
env = entry.get("env", {})
|
||||
assert env.get("HERMES_HOME") == real_path
|
||||
|
||||
def test_unset_hermes_home_omits_env_key(self, monkeypatch):
|
||||
"""When HERMES_HOME is unset in the environment, the MCP entry MUST
|
||||
NOT bake in a resolved-default path. The codex subprocess should
|
||||
inherit whatever HERMES_HOME its launcher (systemd, gateway, shell)
|
||||
sets at runtime, rather than being pinned to migrate-time defaults.
|
||||
Regression guard for issue #26250 follow-up review."""
|
||||
monkeypatch.delenv("HERMES_HOME", raising=False)
|
||||
entry = _build_hermes_tools_mcp_entry()
|
||||
env = entry.get("env", {})
|
||||
assert "HERMES_HOME" not in env, (
|
||||
f"HERMES_HOME should not be set when env var is unset, got: "
|
||||
f"{env.get('HERMES_HOME')!r}"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -114,8 +114,15 @@ class TestApply:
|
|||
def persist(c):
|
||||
persisted.update(c)
|
||||
|
||||
# Patch migrate so this test doesn't reach into the user's real
|
||||
# ~/.codex/config.toml. See issue #26250 Bug C — without this patch,
|
||||
# crs.apply() invokes the real migrate() which writes to
|
||||
# Path.home() / ".codex" using whatever HERMES_HOME the running pytest
|
||||
# session has set, leaking pytest tempdir paths into the user's
|
||||
# codex config.
|
||||
with patch.object(crs, "check_codex_binary_ok",
|
||||
return_value=(True, "0.130.0")):
|
||||
return_value=(True, "0.130.0")), \
|
||||
patch("hermes_cli.codex_runtime_plugin_migration.migrate"):
|
||||
r = crs.apply(cfg, "codex_app_server", persist_callback=persist)
|
||||
assert r.success
|
||||
assert r.new_value == "codex_app_server"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue