fix(tests): catch up 25 stale tests after recent merges (#28626)

Sweep of all CI failures on origin/main, grouped by drift source:

Telegram allowlist gate (db50af910 added user-authz to _should_process_message):
- Hardcoded "[Telegram]" prefix in the logger.warning so the call no
  longer dereferences self.name → self.platform, which test fixtures
  built via object.__new__ never set.
- test_telegram_format / test_allowed_channels_widening fixtures stub
  _is_callback_user_authorized → True so the new gate doesn't reject
  guest-mode / allowed-channels test messages.
- test_telegram_approval_buttons::test_update_prompt_callback_not_affected
  sets TELEGRAM_ALLOWED_USERS="*" so the fail-closed default doesn't
  reject the callback before it writes .update_response.

Approval surface (6d495d9e7 renamed status, 214b95392 detached stdin):
- test_no_callback_returns_approval_required: status is now
  "pending_approval" (was "approval_required").
- test_close_stdin_allows_eof_driven_process_to_finish: switch to
  use_pty=True; non-PTY now uses stdin=DEVNULL.

Mattermost (send() now resolves root_id via _api_get first):
- test_send_with_thread_reply mocks _session.get with a thread-root
  response so the new resolver doesn't TypeError on a bare AsyncMock.

Kanban (d8ad431de rename, f55d94a1e review column, _kanban_worker_skill_available):
- _safe_int → _to_epoch in the two test_kanban_db tests.
- Spawn-skills tests (×3) monkey-patch _kanban_worker_skill_available
  to True since the isolated kanban_home fixture has no devops/kanban-worker tree.
- test_gateway_dispatcher_disables_corrupt_board: connect count
  3 → 5 (review-column probe now also runs per tick).

Aux-config severity at_or_above (a94ddd807):
- test_diagnostics_endpoint_severity_filter expects warning filter to
  include error+critical now (was exact-match).

Anthropic error handling (conversation loop extracted from run_agent):
- _no_backoff_wait fixture patches BOTH run_agent.jittered_backoff AND
  agent.conversation_loop.jittered_backoff. The latter is the actual
  call site; without the second patch tests burn ~2s per retry and
  hit the 30s SIGALRM timeout on CI.

Other test pollution / drift:
- test_auto_does_not_select_copilot_from_github_token: patch
  agent.bedrock_adapter.has_aws_credentials → False so boto3's
  credential chain can't auto-pick Bedrock from developer ~/.aws.
- test_setup_openclaw_migration: patch hermes_cli.gateway.get_env_value
  in addition to setup_mod.get_env_value — _platform_status reads
  through the gateway module's binding.
- test_gateway_prefix: COMPONENT_PREFIXES["gateway"] now includes
  "hermes_plugins" too.
- test_recommended_update_command_defaults_to_hermes_update: also
  short-circuit get_managed_update_command in case a stray
  ~/.hermes/.managed marker is present.
- test_user_id_is_not_explicit: _parse_target_ref now returns
  is_explicit=False for Slack U.../W... IDs (chat.postMessage rejects
  them — a DM must be opened first via conversations.open).
This commit is contained in:
Teknium 2026-05-19 01:28:32 -07:00 committed by GitHub
parent 12c39830f0
commit a0bd11d022
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 118 additions and 20 deletions

View file

@ -38,6 +38,10 @@ def _make_telegram_adapter(*, allowed_chats=None, require_mention=None, guest_mo
adapter._bot = SimpleNamespace(id=999, username="hermes_bot")
adapter._message_handler = AsyncMock()
adapter._mention_patterns = adapter._compile_mention_patterns()
# PR db50af910 added a TELEGRAM_ALLOWED_USERS allowlist gate to
# _should_process_message; stub it for tests that exercise the
# allowed-channels widening logic that runs after.
adapter._is_callback_user_authorized = lambda *_a, **_kw: True
return adapter

View file

@ -629,7 +629,12 @@ class TestFallbackNoCallback:
_clear_approval_state()
def test_no_callback_returns_approval_required(self):
"""Without a registered callback, the old approval_required path is used."""
"""Without a registered callback, the fallback returns pending_approval.
PR #6d495d9e7 renamed the LLM-visible status from ``approval_required``
to ``pending_approval`` to make the state distinguishable from a
failed tool call.
"""
from tools.approval import check_all_command_guards, _pending
os.environ["HERMES_EXEC_ASK"] = "1"
@ -641,4 +646,5 @@ class TestFallbackNoCallback:
os.environ.pop("HERMES_SESSION_KEY", None)
assert result["approved"] is False
assert result.get("status") == "approval_required"
assert result.get("status") == "pending_approval"
assert result.get("approval_pending") is True

View file

@ -197,7 +197,19 @@ class TestMattermostSend:
mock_resp.__aenter__ = AsyncMock(return_value=mock_resp)
mock_resp.__aexit__ = AsyncMock(return_value=False)
# send() now calls _resolve_root_id → _api_get("posts/<id>") first
# to make sure root_id points to a thread root, so we need to mock
# the GET too. Return an empty dict (no root_id) so the resolver
# falls back to the original reply_to as the root.
mock_get_resp = AsyncMock()
mock_get_resp.status = 200
mock_get_resp.json = AsyncMock(return_value={"id": "root_post", "root_id": ""})
mock_get_resp.text = AsyncMock(return_value="")
mock_get_resp.__aenter__ = AsyncMock(return_value=mock_get_resp)
mock_get_resp.__aexit__ = AsyncMock(return_value=False)
self.adapter._session.post = MagicMock(return_value=mock_resp)
self.adapter._session.get = MagicMock(return_value=mock_get_resp)
result = await self.adapter.send("channel_1", "Reply!", reply_to="root_post")

View file

@ -493,7 +493,11 @@ class TestTelegramApprovalCallback:
with patch("tools.approval.resolve_gateway_approval") as mock_resolve:
with patch("hermes_constants.get_hermes_home", return_value=tmp_path):
with patch.dict(os.environ, {"TELEGRAM_ALLOWED_USERS": ""}):
# Allow the caller — the new fail-closed allowlist gate
# (#24457) rejects empty TELEGRAM_ALLOWED_USERS, but this
# test isn't exercising that gate; it's verifying the
# update_prompt callback still writes the response.
with patch.dict(os.environ, {"TELEGRAM_ALLOWED_USERS": "*"}):
await adapter._handle_callback_query(update, context)
# Should NOT have triggered approval resolution

View file

@ -855,6 +855,11 @@ def _guest_test_adapter(*, guest_mode=True, require_mention=True, allowed_chats=
adapter.config = config
adapter._bot = SimpleNamespace(id=999, username="hermes_bot")
adapter._mention_patterns = adapter._compile_mention_patterns()
# PR db50af910 added a TELEGRAM_ALLOWED_USERS allowlist gate to
# _should_process_message. These tests aren't exercising the auth
# gate — they're exercising the guest-mode mention/allowed_chats
# logic that runs after — so stub the user authz to always allow.
adapter._is_callback_user_authorized = lambda *_a, **_kw: True
return adapter

View file

@ -314,6 +314,16 @@ class TestResolveProvider:
assert resolve_provider("auto") == "openrouter"
def test_auto_does_not_select_copilot_from_github_token(self, monkeypatch):
# AWS Bedrock auto-detection (via boto3's credential chain) runs at
# the tail of resolve_provider("auto") and will silently pick up
# ~/.aws/credentials on developer machines that aren't blanked by
# the hermetic conftest. Force-disable it so this test exercises
# the specific "GitHub token alone shouldn't auto-pick copilot"
# behavior, not the Bedrock fallback.
monkeypatch.setattr(
"agent.bedrock_adapter.has_aws_credentials",
lambda env=None: False,
)
monkeypatch.setenv("GITHUB_TOKEN", "gh-test-token")
with pytest.raises(AuthError, match="No inference provider configured"):
resolve_provider("auto")

View file

@ -2700,6 +2700,12 @@ def test_default_spawn_auto_loads_kanban_worker_skill(kanban_home, monkeypatch):
We intercept Popen to capture the argv without actually spawning a
hermes subprocess (which would hang trying to call an LLM).
"""
# Pretend the bundled kanban-worker skill resolves for this isolated
# HERMES_HOME — the fixture creates an empty tmpdir without the
# devops/kanban-worker tree, and _default_spawn gates the --skills
# flag on actual resolvability.
monkeypatch.setattr(kb, "_kanban_worker_skill_available", lambda _h: True)
captured = {}
class FakeProc:
@ -2969,6 +2975,7 @@ def test_create_task_skills_lists_all_toolset_typos(kanban_home):
def test_default_spawn_appends_per_task_skills(kanban_home, monkeypatch):
"""Dispatcher argv must carry one `--skills X` pair per task skill,
in addition to the built-in kanban-worker."""
monkeypatch.setattr(kb, "_kanban_worker_skill_available", lambda _h: True)
captured = {}
class FakeProc:
@ -3018,6 +3025,7 @@ def test_default_spawn_appends_per_task_skills(kanban_home, monkeypatch):
def test_default_spawn_dedupes_kanban_worker_from_task_skills(kanban_home, monkeypatch):
"""If a task explicitly lists 'kanban-worker', we don't double-pass it."""
monkeypatch.setattr(kb, "_kanban_worker_skill_available", lambda _h: True)
captured = {}
class FakeProc:
@ -3665,9 +3673,13 @@ def test_gateway_dispatcher_disables_corrupt_board_without_traceback(
assert sum("not a valid SQLite database" in msg for msg in messages) == 1
assert not any("tick failed on board" in msg for msg in messages)
assert not any(record.exc_info for record in caplog.records)
# First tick connect + two ready-queue probes. The second dispatch tick
# skips connect because the corrupt board fingerprint is disabled.
assert calls["connect"] == 3
# First tick connect (dispatch) + two probes per `_has_ready_work` call
# (ready then review, both via _kb.connect). The second dispatch tick
# skips the dispatch connect because the corrupt board fingerprint is
# disabled, but the ready/review probes still each connect. PR f55d94a1e
# added the review-column probe alongside the existing ready-column
# probe, bumping this from 3 → 5.
assert calls["connect"] == 5
# ---------------------------------------------------------------------------

View file

@ -2237,24 +2237,25 @@ def _make_task(**overrides) -> "kb.Task":
def test_safe_int_accepts_int_and_int_string():
"""Sanity: well-typed values pass through."""
assert kb._safe_int(0) == 0
assert kb._safe_int(1700000000) == 1700000000
assert kb._safe_int("1700000000") == 1700000000
# PR d8ad431de renamed _safe_int → _to_epoch (now also handles ISO-8601).
assert kb._to_epoch(0) == 0
assert kb._to_epoch(1700000000) == 1700000000
assert kb._to_epoch("1700000000") == 1700000000
def test_safe_int_returns_none_on_corrupt_inputs():
"""All the failure modes that used to crash task_age."""
# None — common when the column was never written
assert kb._safe_int(None) is None
assert kb._to_epoch(None) is None
# Unsubstituted format string — the literal case the PR title cites
assert kb._safe_int("%s") is None
assert kb._to_epoch("%s") is None
# Arbitrary non-numeric strings
assert kb._safe_int("abc") is None
assert kb._safe_int("") is None
assert kb._to_epoch("abc") is None
assert kb._to_epoch("") is None
# Float-ish strings: int("1.5") raises ValueError too — caller wants None.
assert kb._safe_int("1.5") is None
assert kb._to_epoch("1.5") is None
# Random object — covered by TypeError branch
assert kb._safe_int(object()) is None
assert kb._to_epoch(object()) is None
def test_task_age_handles_corrupt_created_at():

View file

@ -29,7 +29,13 @@ def test_format_managed_message_homebrew(monkeypatch):
def test_recommended_update_command_defaults_to_hermes_update(monkeypatch):
monkeypatch.delenv("HERMES_MANAGED", raising=False)
with patch("hermes_cli.config.detect_install_method", return_value="git"):
# Also short-circuit the .managed marker path — CI runners may have an
# ambient ~/.hermes/.managed if a prior test left HERMES_HOME pointing
# somewhere with that marker, which would make get_managed_update_command()
# return "Update your Nix flake input ..." instead of falling through to
# detect_install_method().
with patch("hermes_cli.config.get_managed_update_command", return_value=None), \
patch("hermes_cli.config.detect_install_method", return_value="git"):
assert recommended_update_command() == "hermes update"

View file

@ -404,7 +404,14 @@ class TestGetSectionConfigSummary:
assert result == "max turns: 120"
def test_gateway_returns_none_without_tokens(self):
with patch.object(setup_mod, "get_env_value", return_value=""):
# _platform_status reads via hermes_cli.gateway.get_env_value, not
# setup_mod.get_env_value, so patch BOTH. Without the second patch,
# any environment-variable token (or one leaked in by a sibling
# test on the same xdist worker) makes the gateway section report
# platforms-configured and the test sees a non-None summary.
import hermes_cli.gateway as gateway_mod
with patch.object(setup_mod, "get_env_value", return_value=""), \
patch.object(gateway_mod, "get_env_value", return_value=""):
result = setup_mod._get_section_config_summary({}, "gateway")
assert result is None
@ -625,6 +632,13 @@ class TestSetupWizardSkipsConfiguredSections:
reloaded_config = {"model": "openai/gpt-4"}
# _platform_status (called by the gateway summary path) reads env
# vars via hermes_cli.gateway.get_env_value, NOT setup_mod's. Patch
# both so xdist sibling tests can't leak a TELEGRAM_BOT_TOKEN /
# WHATSAPP_* / etc. through and trick the wizard into thinking the
# gateway section is already configured (which would skip it).
import hermes_cli.gateway as gateway_mod
with (
patch.object(setup_mod, "ensure_hermes_home"),
patch.object(
@ -633,6 +647,7 @@ class TestSetupWizardSkipsConfiguredSections:
),
patch.object(setup_mod, "get_hermes_home", return_value=tmp_path),
patch.object(setup_mod, "get_env_value", side_effect=env_side),
patch.object(gateway_mod, "get_env_value", side_effect=env_side),
patch.object(setup_mod, "is_interactive_stdin", return_value=True),
patch("hermes_cli.auth.get_active_provider", return_value=None),
patch("builtins.input", return_value=""),

View file

@ -47,6 +47,12 @@ def _no_backoff_wait(monkeypatch):
import time as _time
monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)
# The conversation loop was extracted out of run_agent.py into
# agent.conversation_loop, which holds its own `from agent.retry_utils
# import jittered_backoff` reference. Patching `run_agent.jittered_backoff`
# alone leaves the live retry path using real ~2s waits. Patch both.
from agent import conversation_loop as _conv_loop
monkeypatch.setattr(_conv_loop, "jittered_backoff", lambda *a, **k: 0.0)
monkeypatch.setattr(_time, "sleep", lambda *_a, **_k: None)
# Also fast-path asyncio.sleep — the gateway's _run_agent path has

View file

@ -538,7 +538,10 @@ class TestComponentPrefixes:
def test_gateway_prefix(self):
assert "gateway" in hermes_logging.COMPONENT_PREFIXES
assert ("gateway",) == hermes_logging.COMPONENT_PREFIXES["gateway"]
# The gateway component captures both core gateway logs and the
# hermes_plugins facility (plugin-installed gateway adapters log
# under that prefix).
assert ("gateway", "hermes_plugins") == hermes_logging.COMPONENT_PREFIXES["gateway"]
def test_agent_prefix(self):
prefixes = hermes_logging.COMPONENT_PREFIXES["agent"]

View file

@ -296,10 +296,17 @@ class TestStdinHelpers:
assert result["status"] == "ok"
def test_close_stdin_allows_eof_driven_process_to_finish(self, registry, tmp_path):
"""PTY mode: writing data + sending EOF lets an EOF-driven child finish.
Background non-PTY mode used to expose subprocess stdin via a pipe,
but PR #214b95392 detached non-PTY stdin to DEVNULL to fix keyboard
lockout (#17959). For interactive stdin → PTY mode is now the only
supported path.
"""
session = registry.spawn_local(
'python3 -c "import sys; print(sys.stdin.read().strip())"',
cwd=str(tmp_path),
use_pty=False,
use_pty=True,
)
try:

View file

@ -359,7 +359,14 @@ def _parse_target_ref(platform_name: str, target_ref: str):
return match.group(1), match.group(2), True
match = _SLACK_TARGET_RE.fullmatch(target_ref)
if match:
return match.group(1), None, True
chat_id = match.group(1)
# Slack user IDs (U...) and workspace IDs (W...) are NOT valid
# explicit send targets — chat.postMessage rejects them. A DM
# must be opened first via conversations.open to get a D...
# conversation ID. Caller still gets the chat_id so the U→D
# resolution path in send_message() can run.
is_explicit = chat_id[0] not in {"U", "W"}
return chat_id, None, is_explicit
if platform_name == "matrix":
trimmed = target_ref.strip()
split_idx = trimmed.rfind(":$")