mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Catalog snapshots, config version literals, and enumeration counts are data that changes as designed. Tests that assert on those values add no behavioral coverage — they just break CI on every routine update and cost engineering time to 'fix.' Replace with invariants where one exists, delete where none does. Deleted (pure snapshots): - TestMinimaxModelCatalog (3 tests): 'MiniMax-M2.7 in models' et al - TestGeminiModelCatalog: 'gemini-2.5-pro in models', 'gemini-3.x in models' - test_browser_camofox_state::test_config_version_matches_current_schema (docstring literally said it would break on unrelated bumps) Relaxed (keep plumbing check, drop snapshot): - Xiaomi / Arcee / Kimi moonshot / Kimi coding / HuggingFace static lists: now assert 'provider exists and has >= 1 entry' instead of specific names - HuggingFace main/models.py consistency test: drop 'len >= 6' floor Dynamicized (follow source, not a literal): - 3x test_config.py migration tests: raw['_config_version'] == DEFAULT_CONFIG['_config_version'] instead of hardcoded 21 Fixed stale tests against intentional behavior changes: - test_insights::test_gateway_format_hides_cost: name matches new behavior (no dollar figures); remove contradicting '$' in text assertion - test_config::prefers_api_then_url_then_base_url: flipped per PR #9332; rename + update to base_url > url > api - test_anthropic_adapter: relax assert_called_once() (xdist-flaky) to assert called — contract is 'credential flowed through' - test_interrupt_propagation: add provider/model/_base_url to bare-agent fixture so the stale-timeout code path resolves Fixed stale integration tests against opt-in plugin gate: - transform_tool_result + transform_terminal_output: write plugins.enabled allow-list to config.yaml and reset the plugin manager singleton Source fix (real consistency invariant): - agent/model_metadata.py: add moonshotai/Kimi-K2.6 context length (262144, same as K2.5). test_model_metadata_has_context_lengths was correctly catching the gap. Policy: - AGENTS.md Testing section: new subsection 'Don't write change-detector tests' with do/don't examples. Reviewers should reject catalog-snapshot assertions in new tests. Covers every test that failed on the last completed main CI run (24703345583) except test_modal_sandbox_fixes::test_terminal_tool_present + test_terminal_and_file_toolsets_resolve_all_tools, which now pass both alone and with the full tests/tools/ directory (xdist ordering flake that resolved itself).
209 lines
6.5 KiB
Python
209 lines
6.5 KiB
Python
import json
|
|
import os
|
|
from pathlib import Path
|
|
from unittest.mock import MagicMock
|
|
|
|
import hermes_cli.plugins as plugins_mod
|
|
import tools.terminal_tool as terminal_tool_module
|
|
|
|
|
|
_UNSET = object()
|
|
|
|
|
|
def _make_env_config(tmp_path, **overrides):
|
|
config = {
|
|
"env_type": "local",
|
|
"timeout": 30,
|
|
"cwd": str(tmp_path),
|
|
"host_cwd": None,
|
|
"modal_mode": "auto",
|
|
"docker_image": "",
|
|
"singularity_image": "",
|
|
"modal_image": "",
|
|
"daytona_image": "",
|
|
}
|
|
config.update(overrides)
|
|
return config
|
|
|
|
|
|
def _run_terminal(
|
|
monkeypatch,
|
|
tmp_path,
|
|
*,
|
|
output,
|
|
returncode=0,
|
|
invoke_hook=_UNSET,
|
|
approval=None,
|
|
command="echo hello",
|
|
):
|
|
mock_env = MagicMock()
|
|
mock_env.execute.return_value = {"output": output, "returncode": returncode}
|
|
|
|
monkeypatch.setattr(
|
|
terminal_tool_module, "_get_env_config", lambda: _make_env_config(tmp_path)
|
|
)
|
|
monkeypatch.setattr(terminal_tool_module, "_start_cleanup_thread", lambda: None)
|
|
monkeypatch.setattr(
|
|
terminal_tool_module,
|
|
"_check_all_guards",
|
|
lambda *_args, **_kwargs: approval or {"approved": True},
|
|
)
|
|
monkeypatch.setitem(terminal_tool_module._active_environments, "default", mock_env)
|
|
monkeypatch.setitem(terminal_tool_module._last_activity, "default", 0.0)
|
|
|
|
if invoke_hook is not _UNSET:
|
|
monkeypatch.setattr("hermes_cli.plugins.invoke_hook", invoke_hook)
|
|
|
|
result = json.loads(terminal_tool_module.terminal_tool(command=command))
|
|
return result, mock_env
|
|
|
|
|
|
def test_terminal_output_unchanged_when_transform_hook_not_registered(monkeypatch, tmp_path):
|
|
result, _mock_env = _run_terminal(monkeypatch, tmp_path, output="plain output")
|
|
|
|
assert result["output"] == "plain output"
|
|
assert result["exit_code"] == 0
|
|
assert result["error"] is None
|
|
|
|
|
|
def test_terminal_output_unchanged_for_none_hook_result(monkeypatch, tmp_path):
|
|
result, _mock_env = _run_terminal(
|
|
monkeypatch,
|
|
tmp_path,
|
|
output="plain output",
|
|
invoke_hook=lambda hook_name, **kwargs: [None],
|
|
)
|
|
|
|
assert result["output"] == "plain output"
|
|
|
|
|
|
def test_terminal_output_ignores_invalid_hook_results(monkeypatch, tmp_path):
|
|
result, _mock_env = _run_terminal(
|
|
monkeypatch,
|
|
tmp_path,
|
|
output="plain output",
|
|
invoke_hook=lambda hook_name, **kwargs: [{"bad": True}, 123, ["nope"]],
|
|
)
|
|
|
|
assert result["output"] == "plain output"
|
|
|
|
|
|
def test_terminal_output_uses_first_valid_string_from_hooks(monkeypatch, tmp_path):
|
|
result, _mock_env = _run_terminal(
|
|
monkeypatch,
|
|
tmp_path,
|
|
output="plain output",
|
|
invoke_hook=lambda hook_name, **kwargs: [None, {"bad": True}, "first", "second"],
|
|
)
|
|
|
|
assert result["output"] == "first"
|
|
|
|
|
|
def test_terminal_output_transform_still_truncates_long_replacement(monkeypatch, tmp_path):
|
|
transformed_output = "PLUGIN-HEAD\n" + ("A" * 60000) + "\nPLUGIN-TAIL"
|
|
result, _mock_env = _run_terminal(
|
|
monkeypatch,
|
|
tmp_path,
|
|
output="short output",
|
|
invoke_hook=lambda hook_name, **kwargs: [transformed_output],
|
|
)
|
|
|
|
assert "PLUGIN-HEAD" in result["output"]
|
|
assert "PLUGIN-TAIL" in result["output"]
|
|
assert "[OUTPUT TRUNCATED" in result["output"]
|
|
assert transformed_output != result["output"]
|
|
|
|
|
|
def test_terminal_output_transform_still_runs_strip_and_redact(monkeypatch, tmp_path):
|
|
# Ensure redaction is active regardless of host HERMES_REDACT_SECRETS state
|
|
# or collection-time import order (the module snapshots env at import).
|
|
monkeypatch.setattr("agent.redact._REDACT_ENABLED", True)
|
|
|
|
secret = "sk-proj-abc123def456ghi789jkl012mno345"
|
|
result, _mock_env = _run_terminal(
|
|
monkeypatch,
|
|
tmp_path,
|
|
output="plain output",
|
|
invoke_hook=lambda hook_name, **kwargs: [f" \x1b[31mOPENAI_API_KEY={secret}\x1b[0m "],
|
|
)
|
|
|
|
assert "\x1b" not in result["output"]
|
|
assert secret not in result["output"]
|
|
assert "OPENAI_API_KEY=" in result["output"]
|
|
assert "***" in result["output"]
|
|
|
|
|
|
def test_terminal_output_transform_hook_exception_falls_back(monkeypatch, tmp_path):
|
|
def _raise(*_args, **_kwargs):
|
|
raise RuntimeError("boom")
|
|
|
|
result, _mock_env = _run_terminal(
|
|
monkeypatch,
|
|
tmp_path,
|
|
output="plain output",
|
|
invoke_hook=_raise,
|
|
)
|
|
|
|
assert result["output"] == "plain output"
|
|
assert result["exit_code"] == 0
|
|
assert result["error"] is None
|
|
|
|
|
|
def test_terminal_output_transform_does_not_change_approval_or_exit_code_meaning(monkeypatch, tmp_path):
|
|
approval = {
|
|
"approved": True,
|
|
"user_approved": True,
|
|
"description": "dangerous command",
|
|
}
|
|
result, _mock_env = _run_terminal(
|
|
monkeypatch,
|
|
tmp_path,
|
|
output="original output",
|
|
returncode=1,
|
|
approval=approval,
|
|
command="grep foo bar",
|
|
invoke_hook=lambda hook_name, **kwargs: ["replaced output"],
|
|
)
|
|
|
|
assert result["output"] == "replaced output"
|
|
assert result["approval"] == (
|
|
"Command required approval (dangerous command) and was approved by the user."
|
|
)
|
|
assert result["exit_code_meaning"] == "No matches found (not an error)"
|
|
|
|
|
|
def test_terminal_output_transform_integration_with_real_plugin(monkeypatch, tmp_path):
|
|
import yaml
|
|
|
|
hermes_home = Path(os.environ["HERMES_HOME"])
|
|
plugins_dir = hermes_home / "plugins"
|
|
plugin_dir = plugins_dir / "terminal_transform"
|
|
plugin_dir.mkdir(parents=True)
|
|
(plugin_dir / "plugin.yaml").write_text("name: terminal_transform\n", encoding="utf-8")
|
|
(plugin_dir / "__init__.py").write_text(
|
|
"def register(ctx):\n"
|
|
' ctx.register_hook("transform_terminal_output", '
|
|
'lambda **kw: "PLUGIN-HEAD\\n" + kw["output"] + "\\nPLUGIN-TAIL")\n',
|
|
encoding="utf-8",
|
|
)
|
|
# Plugins are opt-in — must be listed in plugins.enabled to load.
|
|
cfg_path = hermes_home / "config.yaml"
|
|
cfg_path.write_text(
|
|
yaml.safe_dump({"plugins": {"enabled": ["terminal_transform"]}}),
|
|
encoding="utf-8",
|
|
)
|
|
|
|
# Force a fresh plugin manager so the new config is picked up.
|
|
plugins_mod._plugin_manager = plugins_mod.PluginManager()
|
|
plugins_mod.discover_plugins()
|
|
|
|
long_output = "X" * 60000
|
|
result, _mock_env = _run_terminal(
|
|
monkeypatch,
|
|
tmp_path,
|
|
output=long_output,
|
|
)
|
|
|
|
assert "PLUGIN-HEAD" in result["output"]
|
|
assert "PLUGIN-TAIL" in result["output"]
|
|
assert "[OUTPUT TRUNCATED" in result["output"]
|