mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
* fix(tests): mock is_safe_url in tests that use example.com Tests using example.com URLs were failing because is_safe_url does a real DNS lookup which fails in environments where example.com doesn't resolve, causing the request to be blocked before reaching the already-mocked HTTP client. This should fix around 17 failing tests. These tests test logic, caching, etc. so mocking this method should not modify them in any way. TestMattermostSendUrlAsFile was already doing this so we follow the same pattern. * fix(test): use case-insensitive lookup for model context length check DEFAULT_CONTEXT_LENGTHS uses inconsistent casing (MiniMax keys are lowercase, Qwen keys are mixed-case) so the test was broken in some cases since it couldn't find the model. * fix(test): patch is_linux in systemd gateway restart test The test only patched is_macos to False but didn't patch is_linux to True. On macOS hosts, is_linux() returns False and the systemd restart code path is skipped entirely, making the assertion fail. * fix(test): use non-blocklisted env var in docker forward_env tests GITHUB_TOKEN is in api_key_env_vars and thus in _HERMES_PROVIDER_ENV_BLOCKLIST so the env var is silently dropped, we replace it with a non-blocked one like DATABASE_URL so the tests actually work. * fix(test): fully isolate _has_any_provider_configured from host env _has_any_provider_configured() checks all env vars from PROVIDER_REGISTRY (not just the 5 the tests were clearing) and also calls get_auth_status() which detects gh auth token for Copilot. On machines with any of these set, the function returns True before reaching the code path under test. Clear all registry vars and mock get_auth_status so host credentials don't interfere. * fix(test): correct path to hermes_base_env.py in tool parser tests Path(__file__).parent.parent resolved to tests/, not the project root. The file lives at environments/hermes_base_env.py so we need one more parent level. * fix(test): accept optional HTML fields in Matrix send payload _send_matrix sometimes adds format and formatted_body when the markdown library is installed. The test was doing an exact dict equality check which broke. Check required fields instead. * fix(test): add config.yaml to codex vision requirements test The test only wrote auth.json but not config.yaml, so _read_main_provider() returned empty and vision auto-detect never tried the codex provider. Add a config.yaml pointing at openai-codex so the fallback path actually resolves the client. * fix(test): clear OPENROUTER_API_KEY in _isolate_hermes_home run_agent.py calls load_hermes_dotenv() at import time, which injects API keys from ~/.hermes/.env into os.environ before any test fixture runs. This caused test_agent_loop_tool_calling to make real API calls instead of skipping, which ends up making some tests fail. * fix(test): add get_rate_limit_state to agent mock in usage report tests _show_usage now calls agent.get_rate_limit_state() for rate limit display. The SimpleNamespace mock was missing this method. * fix(test): update expected Camofox config version from 12 to 13 * fix(test): mock _get_enabled_platforms in nous managed defaults test Importing gateway.run leaks DISCORD_BOT_TOKEN into os.environ, which makes _get_enabled_platforms() return ["cli", "discord"] instead of just ["cli"]. tools_command loops per platform, so apply_nous_managed_defaults runs twice: the first call sets config values, the second sees them as already configured and returns an empty set, causing the assertion to fail.
364 lines
13 KiB
Python
364 lines
13 KiB
Python
from datetime import datetime, timedelta
|
||
from types import SimpleNamespace
|
||
from unittest.mock import MagicMock, patch
|
||
|
||
from cli import HermesCLI
|
||
|
||
|
||
def _make_cli(model: str = "anthropic/claude-sonnet-4-20250514"):
|
||
cli_obj = HermesCLI.__new__(HermesCLI)
|
||
cli_obj.model = model
|
||
cli_obj.session_start = datetime.now() - timedelta(minutes=14, seconds=32)
|
||
cli_obj.conversation_history = [{"role": "user", "content": "hi"}]
|
||
cli_obj.agent = None
|
||
return cli_obj
|
||
|
||
|
||
def _attach_agent(
|
||
cli_obj,
|
||
*,
|
||
input_tokens: int | None = None,
|
||
output_tokens: int | None = None,
|
||
cache_read_tokens: int = 0,
|
||
cache_write_tokens: int = 0,
|
||
prompt_tokens: int,
|
||
completion_tokens: int,
|
||
total_tokens: int,
|
||
api_calls: int,
|
||
context_tokens: int,
|
||
context_length: int,
|
||
compressions: int = 0,
|
||
):
|
||
cli_obj.agent = SimpleNamespace(
|
||
model=cli_obj.model,
|
||
provider="anthropic" if cli_obj.model.startswith("anthropic/") else None,
|
||
base_url="",
|
||
session_input_tokens=input_tokens if input_tokens is not None else prompt_tokens,
|
||
session_output_tokens=output_tokens if output_tokens is not None else completion_tokens,
|
||
session_cache_read_tokens=cache_read_tokens,
|
||
session_cache_write_tokens=cache_write_tokens,
|
||
session_prompt_tokens=prompt_tokens,
|
||
session_completion_tokens=completion_tokens,
|
||
session_total_tokens=total_tokens,
|
||
session_api_calls=api_calls,
|
||
get_rate_limit_state=lambda: None,
|
||
context_compressor=SimpleNamespace(
|
||
last_prompt_tokens=context_tokens,
|
||
context_length=context_length,
|
||
compression_count=compressions,
|
||
),
|
||
)
|
||
return cli_obj
|
||
|
||
|
||
class TestCLIStatusBar:
|
||
def test_context_style_thresholds(self):
|
||
cli_obj = _make_cli()
|
||
|
||
assert cli_obj._status_bar_context_style(None) == "class:status-bar-dim"
|
||
assert cli_obj._status_bar_context_style(10) == "class:status-bar-good"
|
||
assert cli_obj._status_bar_context_style(50) == "class:status-bar-warn"
|
||
assert cli_obj._status_bar_context_style(81) == "class:status-bar-bad"
|
||
assert cli_obj._status_bar_context_style(95) == "class:status-bar-critical"
|
||
|
||
def test_build_status_bar_text_for_wide_terminal(self):
|
||
cli_obj = _attach_agent(
|
||
_make_cli(),
|
||
prompt_tokens=10_230,
|
||
completion_tokens=2_220,
|
||
total_tokens=12_450,
|
||
api_calls=7,
|
||
context_tokens=12_450,
|
||
context_length=200_000,
|
||
)
|
||
|
||
text = cli_obj._build_status_bar_text(width=120)
|
||
|
||
assert "claude-sonnet-4-20250514" in text
|
||
assert "12.4K/200K" in text
|
||
assert "6%" in text
|
||
assert "$0.06" not in text # cost hidden by default
|
||
assert "15m" in text
|
||
|
||
def test_input_height_counts_wide_characters_using_cell_width(self):
|
||
cli_obj = _make_cli()
|
||
|
||
class _Doc:
|
||
lines = ["你" * 10]
|
||
|
||
class _Buffer:
|
||
document = _Doc()
|
||
|
||
input_area = SimpleNamespace(buffer=_Buffer())
|
||
|
||
def _input_height():
|
||
try:
|
||
from prompt_toolkit.application import get_app
|
||
from prompt_toolkit.utils import get_cwidth
|
||
|
||
doc = input_area.buffer.document
|
||
prompt_width = max(2, get_cwidth(cli_obj._get_tui_prompt_text()))
|
||
try:
|
||
available_width = get_app().output.get_size().columns - prompt_width
|
||
except Exception:
|
||
import shutil
|
||
available_width = shutil.get_terminal_size((80, 24)).columns - prompt_width
|
||
if available_width < 10:
|
||
available_width = 40
|
||
visual_lines = 0
|
||
for line in doc.lines:
|
||
line_width = get_cwidth(line)
|
||
if line_width <= 0:
|
||
visual_lines += 1
|
||
else:
|
||
visual_lines += max(1, -(-line_width // available_width))
|
||
return min(max(visual_lines, 1), 8)
|
||
except Exception:
|
||
return 1
|
||
|
||
mock_app = MagicMock()
|
||
mock_app.output.get_size.return_value = MagicMock(columns=14)
|
||
with patch.object(HermesCLI, "_get_tui_prompt_text", return_value="❯ "), \
|
||
patch("prompt_toolkit.application.get_app", return_value=mock_app):
|
||
assert _input_height() == 2
|
||
|
||
def test_input_height_uses_prompt_toolkit_width_over_shutil(self):
|
||
cli_obj = _make_cli()
|
||
|
||
class _Doc:
|
||
lines = ["你" * 10]
|
||
|
||
class _Buffer:
|
||
document = _Doc()
|
||
|
||
input_area = SimpleNamespace(buffer=_Buffer())
|
||
|
||
def _input_height():
|
||
try:
|
||
from prompt_toolkit.application import get_app
|
||
from prompt_toolkit.utils import get_cwidth
|
||
|
||
doc = input_area.buffer.document
|
||
prompt_width = max(2, get_cwidth(cli_obj._get_tui_prompt_text()))
|
||
try:
|
||
available_width = get_app().output.get_size().columns - prompt_width
|
||
except Exception:
|
||
import shutil
|
||
available_width = shutil.get_terminal_size((80, 24)).columns - prompt_width
|
||
if available_width < 10:
|
||
available_width = 40
|
||
visual_lines = 0
|
||
for line in doc.lines:
|
||
line_width = get_cwidth(line)
|
||
if line_width <= 0:
|
||
visual_lines += 1
|
||
else:
|
||
visual_lines += max(1, -(-line_width // available_width))
|
||
return min(max(visual_lines, 1), 8)
|
||
except Exception:
|
||
return 1
|
||
|
||
mock_app = MagicMock()
|
||
mock_app.output.get_size.return_value = MagicMock(columns=14)
|
||
with patch.object(HermesCLI, "_get_tui_prompt_text", return_value="❯ "), \
|
||
patch("prompt_toolkit.application.get_app", return_value=mock_app), \
|
||
patch("shutil.get_terminal_size") as mock_shutil:
|
||
assert _input_height() == 2
|
||
mock_shutil.assert_not_called()
|
||
|
||
def test_build_status_bar_text_no_cost_in_status_bar(self):
|
||
cli_obj = _attach_agent(
|
||
_make_cli(),
|
||
prompt_tokens=10000,
|
||
completion_tokens=5000,
|
||
total_tokens=15000,
|
||
api_calls=7,
|
||
context_tokens=50000,
|
||
context_length=200_000,
|
||
)
|
||
|
||
text = cli_obj._build_status_bar_text(width=120)
|
||
assert "$" not in text # cost is never shown in status bar
|
||
|
||
def test_build_status_bar_text_collapses_for_narrow_terminal(self):
|
||
cli_obj = _attach_agent(
|
||
_make_cli(),
|
||
prompt_tokens=10000,
|
||
completion_tokens=2400,
|
||
total_tokens=12400,
|
||
api_calls=7,
|
||
context_tokens=12400,
|
||
context_length=200_000,
|
||
)
|
||
|
||
text = cli_obj._build_status_bar_text(width=60)
|
||
|
||
assert "⚕" in text
|
||
assert "$0.06" not in text # cost hidden by default
|
||
assert "15m" in text
|
||
assert "200K" not in text
|
||
|
||
def test_build_status_bar_text_handles_missing_agent(self):
|
||
cli_obj = _make_cli()
|
||
|
||
text = cli_obj._build_status_bar_text(width=100)
|
||
|
||
assert "⚕" in text
|
||
assert "claude-sonnet-4-20250514" in text
|
||
|
||
|
||
class TestCLIUsageReport:
|
||
def test_show_usage_includes_estimated_cost(self, capsys):
|
||
cli_obj = _attach_agent(
|
||
_make_cli(),
|
||
prompt_tokens=10_230,
|
||
completion_tokens=2_220,
|
||
total_tokens=12_450,
|
||
api_calls=7,
|
||
context_tokens=12_450,
|
||
context_length=200_000,
|
||
compressions=1,
|
||
)
|
||
cli_obj.verbose = False
|
||
|
||
cli_obj._show_usage()
|
||
output = capsys.readouterr().out
|
||
|
||
assert "Model:" in output
|
||
assert "Cost status:" in output
|
||
assert "Cost source:" in output
|
||
assert "Total cost:" in output
|
||
assert "$" in output
|
||
assert "0.064" in output
|
||
assert "Session duration:" in output
|
||
assert "Compressions:" in output
|
||
|
||
def test_show_usage_marks_unknown_pricing(self, capsys):
|
||
cli_obj = _attach_agent(
|
||
_make_cli(model="local/my-custom-model"),
|
||
prompt_tokens=1_000,
|
||
completion_tokens=500,
|
||
total_tokens=1_500,
|
||
api_calls=1,
|
||
context_tokens=1_000,
|
||
context_length=32_000,
|
||
)
|
||
cli_obj.verbose = False
|
||
|
||
cli_obj._show_usage()
|
||
output = capsys.readouterr().out
|
||
|
||
assert "Total cost:" in output
|
||
assert "n/a" in output
|
||
assert "Pricing unknown for local/my-custom-model" in output
|
||
|
||
def test_zero_priced_provider_models_stay_unknown(self, capsys):
|
||
cli_obj = _attach_agent(
|
||
_make_cli(model="glm-5"),
|
||
prompt_tokens=1_000,
|
||
completion_tokens=500,
|
||
total_tokens=1_500,
|
||
api_calls=1,
|
||
context_tokens=1_000,
|
||
context_length=32_000,
|
||
)
|
||
cli_obj.verbose = False
|
||
|
||
cli_obj._show_usage()
|
||
output = capsys.readouterr().out
|
||
|
||
assert "Total cost:" in output
|
||
assert "n/a" in output
|
||
assert "Pricing unknown for glm-5" in output
|
||
|
||
|
||
class TestStatusBarWidthSource:
|
||
"""Ensure status bar fragments don't overflow the terminal width."""
|
||
|
||
def _make_wide_cli(self):
|
||
from datetime import datetime, timedelta
|
||
cli_obj = _attach_agent(
|
||
_make_cli(),
|
||
prompt_tokens=100_000,
|
||
completion_tokens=5_000,
|
||
total_tokens=105_000,
|
||
api_calls=20,
|
||
context_tokens=100_000,
|
||
context_length=200_000,
|
||
)
|
||
cli_obj._status_bar_visible = True
|
||
return cli_obj
|
||
|
||
def test_fragments_fit_within_announced_width(self):
|
||
"""Total fragment text length must not exceed the width used to build them."""
|
||
from unittest.mock import MagicMock, patch
|
||
cli_obj = self._make_wide_cli()
|
||
|
||
for width in (40, 52, 76, 80, 120, 200):
|
||
mock_app = MagicMock()
|
||
mock_app.output.get_size.return_value = MagicMock(columns=width)
|
||
|
||
with patch("prompt_toolkit.application.get_app", return_value=mock_app):
|
||
frags = cli_obj._get_status_bar_fragments()
|
||
|
||
total_text = "".join(text for _, text in frags)
|
||
display_width = cli_obj._status_bar_display_width(total_text)
|
||
assert display_width <= width + 4, ( # +4 for minor padding chars
|
||
f"At width={width}, fragment total {display_width} cells overflows "
|
||
f"({total_text!r})"
|
||
)
|
||
|
||
def test_fragments_use_pt_width_over_shutil(self):
|
||
"""When prompt_toolkit reports a width, shutil.get_terminal_size must not be used."""
|
||
from unittest.mock import MagicMock, patch
|
||
cli_obj = self._make_wide_cli()
|
||
|
||
mock_app = MagicMock()
|
||
mock_app.output.get_size.return_value = MagicMock(columns=120)
|
||
|
||
with patch("prompt_toolkit.application.get_app", return_value=mock_app) as mock_get_app, \
|
||
patch("shutil.get_terminal_size") as mock_shutil:
|
||
cli_obj._get_status_bar_fragments()
|
||
|
||
mock_shutil.assert_not_called()
|
||
|
||
def test_fragments_fall_back_to_shutil_when_no_app(self):
|
||
"""Outside a TUI context (no running app), shutil must be used as fallback."""
|
||
from unittest.mock import MagicMock, patch
|
||
cli_obj = self._make_wide_cli()
|
||
|
||
with patch("prompt_toolkit.application.get_app", side_effect=Exception("no app")), \
|
||
patch("shutil.get_terminal_size", return_value=MagicMock(columns=100)) as mock_shutil:
|
||
frags = cli_obj._get_status_bar_fragments()
|
||
|
||
mock_shutil.assert_called()
|
||
assert len(frags) > 0
|
||
|
||
def test_build_status_bar_text_uses_pt_width(self):
|
||
"""_build_status_bar_text() must also prefer prompt_toolkit width."""
|
||
from unittest.mock import MagicMock, patch
|
||
cli_obj = self._make_wide_cli()
|
||
|
||
mock_app = MagicMock()
|
||
mock_app.output.get_size.return_value = MagicMock(columns=80)
|
||
|
||
with patch("prompt_toolkit.application.get_app", return_value=mock_app), \
|
||
patch("shutil.get_terminal_size") as mock_shutil:
|
||
text = cli_obj._build_status_bar_text() # no explicit width
|
||
|
||
mock_shutil.assert_not_called()
|
||
assert isinstance(text, str)
|
||
assert len(text) > 0
|
||
|
||
def test_explicit_width_skips_pt_lookup(self):
|
||
"""An explicit width= argument must bypass both PT and shutil lookups."""
|
||
from unittest.mock import patch
|
||
cli_obj = self._make_wide_cli()
|
||
|
||
with patch("prompt_toolkit.application.get_app") as mock_get_app, \
|
||
patch("shutil.get_terminal_size") as mock_shutil:
|
||
text = cli_obj._build_status_bar_text(width=100)
|
||
|
||
mock_get_app.assert_not_called()
|
||
mock_shutil.assert_not_called()
|
||
assert len(text) > 0
|