mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix(tests): catch up 25 stale tests after recent merges (#28626)
Sweep of all CI failures on origin/main, grouped by drift source: Telegram allowlist gate (db50af910added user-authz to _should_process_message): - Hardcoded "[Telegram]" prefix in the logger.warning so the call no longer dereferences self.name → self.platform, which test fixtures built via object.__new__ never set. - test_telegram_format / test_allowed_channels_widening fixtures stub _is_callback_user_authorized → True so the new gate doesn't reject guest-mode / allowed-channels test messages. - test_telegram_approval_buttons::test_update_prompt_callback_not_affected sets TELEGRAM_ALLOWED_USERS="*" so the fail-closed default doesn't reject the callback before it writes .update_response. Approval surface (6d495d9e7renamed status,214b95392detached stdin): - test_no_callback_returns_approval_required: status is now "pending_approval" (was "approval_required"). - test_close_stdin_allows_eof_driven_process_to_finish: switch to use_pty=True; non-PTY now uses stdin=DEVNULL. Mattermost (send() now resolves root_id via _api_get first): - test_send_with_thread_reply mocks _session.get with a thread-root response so the new resolver doesn't TypeError on a bare AsyncMock. Kanban (d8ad431derename,f55d94a1ereview column, _kanban_worker_skill_available): - _safe_int → _to_epoch in the two test_kanban_db tests. - Spawn-skills tests (×3) monkey-patch _kanban_worker_skill_available to True since the isolated kanban_home fixture has no devops/kanban-worker tree. - test_gateway_dispatcher_disables_corrupt_board: connect count 3 → 5 (review-column probe now also runs per tick). Aux-config severity at_or_above (a94ddd807): - test_diagnostics_endpoint_severity_filter expects warning filter to include error+critical now (was exact-match). Anthropic error handling (conversation loop extracted from run_agent): - _no_backoff_wait fixture patches BOTH run_agent.jittered_backoff AND agent.conversation_loop.jittered_backoff. The latter is the actual call site; without the second patch tests burn ~2s per retry and hit the 30s SIGALRM timeout on CI. Other test pollution / drift: - test_auto_does_not_select_copilot_from_github_token: patch agent.bedrock_adapter.has_aws_credentials → False so boto3's credential chain can't auto-pick Bedrock from developer ~/.aws. - test_setup_openclaw_migration: patch hermes_cli.gateway.get_env_value in addition to setup_mod.get_env_value — _platform_status reads through the gateway module's binding. - test_gateway_prefix: COMPONENT_PREFIXES["gateway"] now includes "hermes_plugins" too. - test_recommended_update_command_defaults_to_hermes_update: also short-circuit get_managed_update_command in case a stray ~/.hermes/.managed marker is present. - test_user_id_is_not_explicit: _parse_target_ref now returns is_explicit=False for Slack U.../W... IDs (chat.postMessage rejects them — a DM must be opened first via conversations.open).
This commit is contained in:
parent
12c39830f0
commit
a0bd11d022
14 changed files with 118 additions and 20 deletions
|
|
@ -38,6 +38,10 @@ def _make_telegram_adapter(*, allowed_chats=None, require_mention=None, guest_mo
|
|||
adapter._bot = SimpleNamespace(id=999, username="hermes_bot")
|
||||
adapter._message_handler = AsyncMock()
|
||||
adapter._mention_patterns = adapter._compile_mention_patterns()
|
||||
# PR db50af910 added a TELEGRAM_ALLOWED_USERS allowlist gate to
|
||||
# _should_process_message; stub it for tests that exercise the
|
||||
# allowed-channels widening logic that runs after.
|
||||
adapter._is_callback_user_authorized = lambda *_a, **_kw: True
|
||||
return adapter
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -629,7 +629,12 @@ class TestFallbackNoCallback:
|
|||
_clear_approval_state()
|
||||
|
||||
def test_no_callback_returns_approval_required(self):
|
||||
"""Without a registered callback, the old approval_required path is used."""
|
||||
"""Without a registered callback, the fallback returns pending_approval.
|
||||
|
||||
PR #6d495d9e7 renamed the LLM-visible status from ``approval_required``
|
||||
to ``pending_approval`` to make the state distinguishable from a
|
||||
failed tool call.
|
||||
"""
|
||||
from tools.approval import check_all_command_guards, _pending
|
||||
|
||||
os.environ["HERMES_EXEC_ASK"] = "1"
|
||||
|
|
@ -641,4 +646,5 @@ class TestFallbackNoCallback:
|
|||
os.environ.pop("HERMES_SESSION_KEY", None)
|
||||
|
||||
assert result["approved"] is False
|
||||
assert result.get("status") == "approval_required"
|
||||
assert result.get("status") == "pending_approval"
|
||||
assert result.get("approval_pending") is True
|
||||
|
|
|
|||
|
|
@ -197,7 +197,19 @@ class TestMattermostSend:
|
|||
mock_resp.__aenter__ = AsyncMock(return_value=mock_resp)
|
||||
mock_resp.__aexit__ = AsyncMock(return_value=False)
|
||||
|
||||
# send() now calls _resolve_root_id → _api_get("posts/<id>") first
|
||||
# to make sure root_id points to a thread root, so we need to mock
|
||||
# the GET too. Return an empty dict (no root_id) so the resolver
|
||||
# falls back to the original reply_to as the root.
|
||||
mock_get_resp = AsyncMock()
|
||||
mock_get_resp.status = 200
|
||||
mock_get_resp.json = AsyncMock(return_value={"id": "root_post", "root_id": ""})
|
||||
mock_get_resp.text = AsyncMock(return_value="")
|
||||
mock_get_resp.__aenter__ = AsyncMock(return_value=mock_get_resp)
|
||||
mock_get_resp.__aexit__ = AsyncMock(return_value=False)
|
||||
|
||||
self.adapter._session.post = MagicMock(return_value=mock_resp)
|
||||
self.adapter._session.get = MagicMock(return_value=mock_get_resp)
|
||||
|
||||
result = await self.adapter.send("channel_1", "Reply!", reply_to="root_post")
|
||||
|
||||
|
|
|
|||
|
|
@ -493,7 +493,11 @@ class TestTelegramApprovalCallback:
|
|||
|
||||
with patch("tools.approval.resolve_gateway_approval") as mock_resolve:
|
||||
with patch("hermes_constants.get_hermes_home", return_value=tmp_path):
|
||||
with patch.dict(os.environ, {"TELEGRAM_ALLOWED_USERS": ""}):
|
||||
# Allow the caller — the new fail-closed allowlist gate
|
||||
# (#24457) rejects empty TELEGRAM_ALLOWED_USERS, but this
|
||||
# test isn't exercising that gate; it's verifying the
|
||||
# update_prompt callback still writes the response.
|
||||
with patch.dict(os.environ, {"TELEGRAM_ALLOWED_USERS": "*"}):
|
||||
await adapter._handle_callback_query(update, context)
|
||||
|
||||
# Should NOT have triggered approval resolution
|
||||
|
|
|
|||
|
|
@ -855,6 +855,11 @@ def _guest_test_adapter(*, guest_mode=True, require_mention=True, allowed_chats=
|
|||
adapter.config = config
|
||||
adapter._bot = SimpleNamespace(id=999, username="hermes_bot")
|
||||
adapter._mention_patterns = adapter._compile_mention_patterns()
|
||||
# PR db50af910 added a TELEGRAM_ALLOWED_USERS allowlist gate to
|
||||
# _should_process_message. These tests aren't exercising the auth
|
||||
# gate — they're exercising the guest-mode mention/allowed_chats
|
||||
# logic that runs after — so stub the user authz to always allow.
|
||||
adapter._is_callback_user_authorized = lambda *_a, **_kw: True
|
||||
return adapter
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -314,6 +314,16 @@ class TestResolveProvider:
|
|||
assert resolve_provider("auto") == "openrouter"
|
||||
|
||||
def test_auto_does_not_select_copilot_from_github_token(self, monkeypatch):
|
||||
# AWS Bedrock auto-detection (via boto3's credential chain) runs at
|
||||
# the tail of resolve_provider("auto") and will silently pick up
|
||||
# ~/.aws/credentials on developer machines that aren't blanked by
|
||||
# the hermetic conftest. Force-disable it so this test exercises
|
||||
# the specific "GitHub token alone shouldn't auto-pick copilot"
|
||||
# behavior, not the Bedrock fallback.
|
||||
monkeypatch.setattr(
|
||||
"agent.bedrock_adapter.has_aws_credentials",
|
||||
lambda env=None: False,
|
||||
)
|
||||
monkeypatch.setenv("GITHUB_TOKEN", "gh-test-token")
|
||||
with pytest.raises(AuthError, match="No inference provider configured"):
|
||||
resolve_provider("auto")
|
||||
|
|
|
|||
|
|
@ -2700,6 +2700,12 @@ def test_default_spawn_auto_loads_kanban_worker_skill(kanban_home, monkeypatch):
|
|||
We intercept Popen to capture the argv without actually spawning a
|
||||
hermes subprocess (which would hang trying to call an LLM).
|
||||
"""
|
||||
# Pretend the bundled kanban-worker skill resolves for this isolated
|
||||
# HERMES_HOME — the fixture creates an empty tmpdir without the
|
||||
# devops/kanban-worker tree, and _default_spawn gates the --skills
|
||||
# flag on actual resolvability.
|
||||
monkeypatch.setattr(kb, "_kanban_worker_skill_available", lambda _h: True)
|
||||
|
||||
captured = {}
|
||||
|
||||
class FakeProc:
|
||||
|
|
@ -2969,6 +2975,7 @@ def test_create_task_skills_lists_all_toolset_typos(kanban_home):
|
|||
def test_default_spawn_appends_per_task_skills(kanban_home, monkeypatch):
|
||||
"""Dispatcher argv must carry one `--skills X` pair per task skill,
|
||||
in addition to the built-in kanban-worker."""
|
||||
monkeypatch.setattr(kb, "_kanban_worker_skill_available", lambda _h: True)
|
||||
captured = {}
|
||||
|
||||
class FakeProc:
|
||||
|
|
@ -3018,6 +3025,7 @@ def test_default_spawn_appends_per_task_skills(kanban_home, monkeypatch):
|
|||
|
||||
def test_default_spawn_dedupes_kanban_worker_from_task_skills(kanban_home, monkeypatch):
|
||||
"""If a task explicitly lists 'kanban-worker', we don't double-pass it."""
|
||||
monkeypatch.setattr(kb, "_kanban_worker_skill_available", lambda _h: True)
|
||||
captured = {}
|
||||
|
||||
class FakeProc:
|
||||
|
|
@ -3665,9 +3673,13 @@ def test_gateway_dispatcher_disables_corrupt_board_without_traceback(
|
|||
assert sum("not a valid SQLite database" in msg for msg in messages) == 1
|
||||
assert not any("tick failed on board" in msg for msg in messages)
|
||||
assert not any(record.exc_info for record in caplog.records)
|
||||
# First tick connect + two ready-queue probes. The second dispatch tick
|
||||
# skips connect because the corrupt board fingerprint is disabled.
|
||||
assert calls["connect"] == 3
|
||||
# First tick connect (dispatch) + two probes per `_has_ready_work` call
|
||||
# (ready then review, both via _kb.connect). The second dispatch tick
|
||||
# skips the dispatch connect because the corrupt board fingerprint is
|
||||
# disabled, but the ready/review probes still each connect. PR f55d94a1e
|
||||
# added the review-column probe alongside the existing ready-column
|
||||
# probe, bumping this from 3 → 5.
|
||||
assert calls["connect"] == 5
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -2237,24 +2237,25 @@ def _make_task(**overrides) -> "kb.Task":
|
|||
|
||||
def test_safe_int_accepts_int_and_int_string():
|
||||
"""Sanity: well-typed values pass through."""
|
||||
assert kb._safe_int(0) == 0
|
||||
assert kb._safe_int(1700000000) == 1700000000
|
||||
assert kb._safe_int("1700000000") == 1700000000
|
||||
# PR d8ad431de renamed _safe_int → _to_epoch (now also handles ISO-8601).
|
||||
assert kb._to_epoch(0) == 0
|
||||
assert kb._to_epoch(1700000000) == 1700000000
|
||||
assert kb._to_epoch("1700000000") == 1700000000
|
||||
|
||||
|
||||
def test_safe_int_returns_none_on_corrupt_inputs():
|
||||
"""All the failure modes that used to crash task_age."""
|
||||
# None — common when the column was never written
|
||||
assert kb._safe_int(None) is None
|
||||
assert kb._to_epoch(None) is None
|
||||
# Unsubstituted format string — the literal case the PR title cites
|
||||
assert kb._safe_int("%s") is None
|
||||
assert kb._to_epoch("%s") is None
|
||||
# Arbitrary non-numeric strings
|
||||
assert kb._safe_int("abc") is None
|
||||
assert kb._safe_int("") is None
|
||||
assert kb._to_epoch("abc") is None
|
||||
assert kb._to_epoch("") is None
|
||||
# Float-ish strings: int("1.5") raises ValueError too — caller wants None.
|
||||
assert kb._safe_int("1.5") is None
|
||||
assert kb._to_epoch("1.5") is None
|
||||
# Random object — covered by TypeError branch
|
||||
assert kb._safe_int(object()) is None
|
||||
assert kb._to_epoch(object()) is None
|
||||
|
||||
|
||||
def test_task_age_handles_corrupt_created_at():
|
||||
|
|
|
|||
|
|
@ -29,7 +29,13 @@ def test_format_managed_message_homebrew(monkeypatch):
|
|||
def test_recommended_update_command_defaults_to_hermes_update(monkeypatch):
|
||||
monkeypatch.delenv("HERMES_MANAGED", raising=False)
|
||||
|
||||
with patch("hermes_cli.config.detect_install_method", return_value="git"):
|
||||
# Also short-circuit the .managed marker path — CI runners may have an
|
||||
# ambient ~/.hermes/.managed if a prior test left HERMES_HOME pointing
|
||||
# somewhere with that marker, which would make get_managed_update_command()
|
||||
# return "Update your Nix flake input ..." instead of falling through to
|
||||
# detect_install_method().
|
||||
with patch("hermes_cli.config.get_managed_update_command", return_value=None), \
|
||||
patch("hermes_cli.config.detect_install_method", return_value="git"):
|
||||
assert recommended_update_command() == "hermes update"
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -404,7 +404,14 @@ class TestGetSectionConfigSummary:
|
|||
assert result == "max turns: 120"
|
||||
|
||||
def test_gateway_returns_none_without_tokens(self):
|
||||
with patch.object(setup_mod, "get_env_value", return_value=""):
|
||||
# _platform_status reads via hermes_cli.gateway.get_env_value, not
|
||||
# setup_mod.get_env_value, so patch BOTH. Without the second patch,
|
||||
# any environment-variable token (or one leaked in by a sibling
|
||||
# test on the same xdist worker) makes the gateway section report
|
||||
# platforms-configured and the test sees a non-None summary.
|
||||
import hermes_cli.gateway as gateway_mod
|
||||
with patch.object(setup_mod, "get_env_value", return_value=""), \
|
||||
patch.object(gateway_mod, "get_env_value", return_value=""):
|
||||
result = setup_mod._get_section_config_summary({}, "gateway")
|
||||
assert result is None
|
||||
|
||||
|
|
@ -625,6 +632,13 @@ class TestSetupWizardSkipsConfiguredSections:
|
|||
|
||||
reloaded_config = {"model": "openai/gpt-4"}
|
||||
|
||||
# _platform_status (called by the gateway summary path) reads env
|
||||
# vars via hermes_cli.gateway.get_env_value, NOT setup_mod's. Patch
|
||||
# both so xdist sibling tests can't leak a TELEGRAM_BOT_TOKEN /
|
||||
# WHATSAPP_* / etc. through and trick the wizard into thinking the
|
||||
# gateway section is already configured (which would skip it).
|
||||
import hermes_cli.gateway as gateway_mod
|
||||
|
||||
with (
|
||||
patch.object(setup_mod, "ensure_hermes_home"),
|
||||
patch.object(
|
||||
|
|
@ -633,6 +647,7 @@ class TestSetupWizardSkipsConfiguredSections:
|
|||
),
|
||||
patch.object(setup_mod, "get_hermes_home", return_value=tmp_path),
|
||||
patch.object(setup_mod, "get_env_value", side_effect=env_side),
|
||||
patch.object(gateway_mod, "get_env_value", side_effect=env_side),
|
||||
patch.object(setup_mod, "is_interactive_stdin", return_value=True),
|
||||
patch("hermes_cli.auth.get_active_provider", return_value=None),
|
||||
patch("builtins.input", return_value=""),
|
||||
|
|
|
|||
|
|
@ -47,6 +47,12 @@ def _no_backoff_wait(monkeypatch):
|
|||
import time as _time
|
||||
|
||||
monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)
|
||||
# The conversation loop was extracted out of run_agent.py into
|
||||
# agent.conversation_loop, which holds its own `from agent.retry_utils
|
||||
# import jittered_backoff` reference. Patching `run_agent.jittered_backoff`
|
||||
# alone leaves the live retry path using real ~2s waits. Patch both.
|
||||
from agent import conversation_loop as _conv_loop
|
||||
monkeypatch.setattr(_conv_loop, "jittered_backoff", lambda *a, **k: 0.0)
|
||||
monkeypatch.setattr(_time, "sleep", lambda *_a, **_k: None)
|
||||
|
||||
# Also fast-path asyncio.sleep — the gateway's _run_agent path has
|
||||
|
|
|
|||
|
|
@ -538,7 +538,10 @@ class TestComponentPrefixes:
|
|||
|
||||
def test_gateway_prefix(self):
|
||||
assert "gateway" in hermes_logging.COMPONENT_PREFIXES
|
||||
assert ("gateway",) == hermes_logging.COMPONENT_PREFIXES["gateway"]
|
||||
# The gateway component captures both core gateway logs and the
|
||||
# hermes_plugins facility (plugin-installed gateway adapters log
|
||||
# under that prefix).
|
||||
assert ("gateway", "hermes_plugins") == hermes_logging.COMPONENT_PREFIXES["gateway"]
|
||||
|
||||
def test_agent_prefix(self):
|
||||
prefixes = hermes_logging.COMPONENT_PREFIXES["agent"]
|
||||
|
|
|
|||
|
|
@ -296,10 +296,17 @@ class TestStdinHelpers:
|
|||
assert result["status"] == "ok"
|
||||
|
||||
def test_close_stdin_allows_eof_driven_process_to_finish(self, registry, tmp_path):
|
||||
"""PTY mode: writing data + sending EOF lets an EOF-driven child finish.
|
||||
|
||||
Background non-PTY mode used to expose subprocess stdin via a pipe,
|
||||
but PR #214b95392 detached non-PTY stdin to DEVNULL to fix keyboard
|
||||
lockout (#17959). For interactive stdin → PTY mode is now the only
|
||||
supported path.
|
||||
"""
|
||||
session = registry.spawn_local(
|
||||
'python3 -c "import sys; print(sys.stdin.read().strip())"',
|
||||
cwd=str(tmp_path),
|
||||
use_pty=False,
|
||||
use_pty=True,
|
||||
)
|
||||
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -359,7 +359,14 @@ def _parse_target_ref(platform_name: str, target_ref: str):
|
|||
return match.group(1), match.group(2), True
|
||||
match = _SLACK_TARGET_RE.fullmatch(target_ref)
|
||||
if match:
|
||||
return match.group(1), None, True
|
||||
chat_id = match.group(1)
|
||||
# Slack user IDs (U...) and workspace IDs (W...) are NOT valid
|
||||
# explicit send targets — chat.postMessage rejects them. A DM
|
||||
# must be opened first via conversations.open to get a D...
|
||||
# conversation ID. Caller still gets the chat_id so the U→D
|
||||
# resolution path in send_message() can run.
|
||||
is_explicit = chat_id[0] not in {"U", "W"}
|
||||
return chat_id, None, is_explicit
|
||||
if platform_name == "matrix":
|
||||
trimmed = target_ref.strip()
|
||||
split_idx = trimmed.rfind(":$")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue