hermes-agent/tests/hermes_cli/test_codex_runtime_switch.py
kshitijk4poor 77276070f5 fix(codex-runtime): de-dup [plugins.X] tables and stop leaking HERMES_HOME into config.toml
Builds on @steezkelly's Bug A fix (#25857, top-level default_permissions
via _insert_managed_block_at_top_level) by addressing the other two
config-corruption bugs described in #26250:

Bug B (duplicate [plugins.X] tables)
  - Codex itself writes [plugins."<name>@<marketplace>"] tables to
    config.toml when the user runs `codex plugins enable` directly,
    before hermes-agent's managed block exists. On the next migrate run,
    _query_codex_plugins() re-discovers the same plugins via plugin/list
    and render_codex_toml_section() re-emits them inside the managed
    block. Codex's strict TOML parser then rejects the duplicate table
    header on startup.
  - Add _strip_unmanaged_plugin_tables() that drops [plugins.*] tables
    from the user-content portion of the file. Only run it when
    plugin/list succeeded — if the RPC failed we can't re-emit and
    must preserve the user's tables. plugin/list is the source of
    truth when it answers.

Bug C (HERMES_HOME pytest-tempdir leak into ~/.codex/config.toml)
  - _build_hermes_tools_mcp_entry() read HERMES_HOME directly from
    os.environ, so a sibling pytest's monkeypatch.setenv("HERMES_HOME",
    tmp_path) silently burned a transient pytest tempdir into the
    user's real ~/.codex/config.toml. After pytest reaped the tempdir,
    every codex-routed hermes-tools tool call failed silently.
  - Derive HERMES_HOME from get_hermes_home() (the canonical resolver
    that goes through the profile-aware path) and refuse to emit
    obvious test-tempdir paths via _looks_like_test_tempdir() as
    belt-and-suspenders for any other callsite that forgets to patch
    migrate().
  - test_enable_succeeds_when_codex_present in test_codex_runtime_switch.py
    invoked the real migrate() (no mock), writing to Path.home() / .codex
    using whatever HERMES_HOME the running pytest session had set. Add
    the same migrate patch the other apply() tests already use, so the
    suite stops touching the user's real ~/.codex/config.toml.

E2E verification (replicating the issue's repro):
  - Pre-state config.toml with user [mcp_servers.omx_team_run] +
    codex-installed [plugins."tasks@openai-curated"],
    HERMES_HOME="/private/var/folders/.../pytest-of-.../..."
  - On origin/main: tomllib refuses to load the result with
    "Cannot declare ('plugins', 'tasks@openai-curated') twice" AND
    the pytest-tempdir HERMES_HOME is burned in.
  - On this branch: file parses cleanly, default_permissions is
    top-level, exactly one [plugins."tasks@openai-curated"] table
    inside the managed block, no HERMES_HOME in the MCP env.

7 new regression tests covering all three bugs + the test-leak guard.
`bash scripts/run_tests.sh tests/hermes_cli/test_codex_runtime_*.py` —
95 passed, 0 failed.

Closes #26250
2026-05-15 02:31:30 -07:00

238 lines
9.7 KiB
Python

"""Tests for the /codex-runtime slash-command shared logic.
These cover the pure-Python state machine; CLI and gateway handlers are
tested separately because they involve config persistence and prompt
formatting that's surface-specific."""
from __future__ import annotations
from unittest.mock import patch
import pytest
from hermes_cli import codex_runtime_switch as crs
class TestParseArgs:
@pytest.mark.parametrize("arg,expected", [
("", None),
(" ", None),
("auto", "auto"),
("codex_app_server", "codex_app_server"),
("on", "codex_app_server"),
("off", "auto"),
("codex", "codex_app_server"),
("default", "auto"),
("hermes", "auto"),
("ENABLE", "codex_app_server"), # case-insensitive
("DiSaBlE", "auto"),
])
def test_valid_args(self, arg, expected):
value, errors = crs.parse_args(arg)
assert errors == []
assert value == expected
def test_invalid_arg_returns_error(self):
value, errors = crs.parse_args("turbo")
assert value is None
assert errors and "Unknown runtime" in errors[0]
class TestGetCurrentRuntime:
def test_default_when_unset(self):
assert crs.get_current_runtime({}) == "auto"
assert crs.get_current_runtime({"model": {}}) == "auto"
assert crs.get_current_runtime({"model": {"openai_runtime": ""}}) == "auto"
def test_unrecognized_falls_back_to_auto(self):
assert crs.get_current_runtime(
{"model": {"openai_runtime": "garbage"}}
) == "auto"
def test_explicit_codex(self):
assert crs.get_current_runtime(
{"model": {"openai_runtime": "codex_app_server"}}
) == "codex_app_server"
def test_handles_non_dict_config(self):
assert crs.get_current_runtime(None) == "auto" # type: ignore[arg-type]
assert crs.get_current_runtime("notadict") == "auto" # type: ignore[arg-type]
assert crs.get_current_runtime({"model": "notadict"}) == "auto"
class TestSetRuntime:
def test_creates_model_section_if_missing(self):
cfg = {}
old = crs.set_runtime(cfg, "codex_app_server")
assert old == "auto"
assert cfg["model"]["openai_runtime"] == "codex_app_server"
def test_returns_previous_value(self):
cfg = {"model": {"openai_runtime": "codex_app_server"}}
old = crs.set_runtime(cfg, "auto")
assert old == "codex_app_server"
assert cfg["model"]["openai_runtime"] == "auto"
def test_invalid_value_raises(self):
with pytest.raises(ValueError):
crs.set_runtime({}, "garbage")
class TestApply:
def test_read_only_call_reports_state(self):
cfg = {"model": {"openai_runtime": "codex_app_server"}}
with patch.object(crs, "check_codex_binary_ok",
return_value=(True, "0.130.0")):
r = crs.apply(cfg, None)
assert r.success
assert r.new_value == "codex_app_server"
assert r.old_value == "codex_app_server"
assert "codex_app_server" in r.message
assert "0.130.0" in r.message
def test_no_change_when_already_set(self):
cfg = {"model": {"openai_runtime": "auto"}}
r = crs.apply(cfg, "auto")
assert r.success
assert r.message == "openai_runtime already set to auto"
def test_enable_blocked_when_codex_missing(self):
cfg = {}
with patch.object(crs, "check_codex_binary_ok",
return_value=(False, "codex not found")):
r = crs.apply(cfg, "codex_app_server")
assert r.success is False
assert "Cannot enable" in r.message
assert "npm i -g @openai/codex" in r.message
# Config NOT mutated on failure
assert cfg.get("model", {}).get("openai_runtime") in (None, "")
def test_enable_succeeds_when_codex_present(self):
cfg = {}
persisted = {}
def persist(c):
persisted.update(c)
# Patch migrate so this test doesn't reach into the user's real
# ~/.codex/config.toml. See issue #26250 Bug C — without this patch,
# crs.apply() invokes the real migrate() which writes to
# Path.home() / ".codex" using whatever HERMES_HOME the running pytest
# session has set, leaking pytest tempdir paths into the user's
# codex config.
with patch.object(crs, "check_codex_binary_ok",
return_value=(True, "0.130.0")), \
patch("hermes_cli.codex_runtime_plugin_migration.migrate"):
r = crs.apply(cfg, "codex_app_server", persist_callback=persist)
assert r.success
assert r.new_value == "codex_app_server"
assert r.old_value == "auto"
assert r.requires_new_session is True
assert "via MCP" in r.message # hermes-tools callback message
assert cfg["model"]["openai_runtime"] == "codex_app_server"
assert persisted["model"]["openai_runtime"] == "codex_app_server"
def test_disable_does_not_check_binary(self):
cfg = {"model": {"openai_runtime": "codex_app_server"}}
with patch.object(crs, "check_codex_binary_ok") as bin_check:
r = crs.apply(cfg, "auto")
assert r.success
# Binary check is irrelevant when disabling — should not be called
# with the codex_app_server enable-gate signature.
assert r.new_value == "auto"
assert r.old_value == "codex_app_server"
def test_persist_callback_failure_reported(self):
cfg = {}
def persist_boom(c):
raise IOError("disk full")
with patch.object(crs, "check_codex_binary_ok",
return_value=(True, "0.130.0")):
r = crs.apply(cfg, "codex_app_server", persist_callback=persist_boom)
assert r.success is False
assert "persist failed" in r.message
assert "disk full" in r.message
def test_enable_triggers_mcp_migration(self):
"""Enabling codex_app_server should auto-migrate Hermes mcp_servers
to ~/.codex/config.toml so the spawned subprocess sees them."""
cfg = {
"mcp_servers": {
"filesystem": {"command": "npx", "args": ["-y", "fs-server"]},
}
}
with patch.object(crs, "check_codex_binary_ok",
return_value=(True, "0.130.0")), \
patch("hermes_cli.codex_runtime_plugin_migration.migrate") as mig:
mig.return_value.migrated = ["filesystem", "hermes-tools"]
mig.return_value.migrated_plugins = []
mig.return_value.plugin_query_error = None
mig.return_value.wrote_permissions_default = ":workspace"
mig.return_value.errors = []
mig.return_value.target_path = "/fake/.codex/config.toml"
r = crs.apply(cfg, "codex_app_server")
assert r.success
assert mig.called # migration was triggered
# User MCP servers are reported (excluding internal hermes-tools)
assert "Migrated 1 MCP server" in r.message
assert "filesystem" in r.message
# Permissions default surfaces
assert "Default sandbox: :workspace" in r.message
# Hermes tool callback announcement
assert "via MCP" in r.message
def test_disable_does_not_trigger_migration(self):
"""Switching back to auto must not write to ~/.codex/."""
cfg = {
"model": {"openai_runtime": "codex_app_server"},
"mcp_servers": {"x": {"command": "y"}},
}
with patch("hermes_cli.codex_runtime_plugin_migration.migrate") as mig:
r = crs.apply(cfg, "auto")
assert r.success
assert not mig.called # disabling does not migrate
def test_migration_failure_does_not_block_enable(self):
"""If MCP migration raises, the runtime change still proceeds —
users can manually re-run migration later."""
cfg = {"mcp_servers": {"x": {"command": "y"}}}
with patch.object(crs, "check_codex_binary_ok",
return_value=(True, "0.130.0")), \
patch("hermes_cli.codex_runtime_plugin_migration.migrate",
side_effect=RuntimeError("disk full")):
r = crs.apply(cfg, "codex_app_server")
assert r.success # change still applied
assert r.new_value == "codex_app_server"
assert "MCP migration skipped" in r.message
assert "disk full" in r.message
def test_binary_check_cached_within_apply(self):
"""check_codex_binary_ok is invoked at most once per apply() call.
The enable path has three sites that need the version (state report,
enable gate, success message). Without caching, a single
/codex-runtime invocation spawns `codex --version` three times.
Regression guard against a refactor that drops the cache.
"""
cfg = {}
with patch.object(crs, "check_codex_binary_ok",
return_value=(True, "0.130.0")) as bin_check, \
patch("hermes_cli.codex_runtime_plugin_migration.migrate"):
r = crs.apply(cfg, "codex_app_server")
assert r.success
assert bin_check.call_count == 1, (
f"check_codex_binary_ok was called {bin_check.call_count} time(s); "
"should be cached and called exactly once per apply()"
)
def test_binary_check_cached_on_read_only_call(self):
"""Read-only call (new_value=None) calls the binary check exactly
once and reuses the result for the message."""
cfg = {"model": {"openai_runtime": "codex_app_server"}}
with patch.object(crs, "check_codex_binary_ok",
return_value=(True, "0.130.0")) as bin_check:
crs.apply(cfg, None)
assert bin_check.call_count == 1