test: update stale tests to match current code (#11963)

Seven test files were asserting against older function signatures and behaviors. CI has been red on main because of accumulated test debt from other PRs; this catches the tests up. - tests/agent/test_subagent_progress.py: _build_child_progress_callback now takes (task_index, goal, parent_agent, task_count=1); update all call sites and rewrite tests that assumed the old 'batch-only' relay semantics (now relays per-tool AND flushes a summary at BATCH_SIZE). Renamed test_thinking_not_relayed_to_gateway → test_thinking_relayed_to_gateway since thinking IS now relayed as subagent.thinking. - tests/tools/test_delegate.py: _build_child_agent now requires task_count; add task_count=1 to all 8 call sites. - tests/cli/test_reasoning_command.py: AIAgent gained _stream_callback; stub it on the two test agent helpers that use spec=AIAgent / __new__. - tests/hermes_cli/test_cmd_update.py: cmd_update now runs npm install in repo root + ui-tui/ + web/ and 'npm run build' in web/; assert all four subprocess calls in the expected order. - tests/hermes_cli/test_model_validation.py: dissimilar unknown models now return accepted=False (previously True with warning); update both affected tests. - tests/tools/test_registry.py: include feishu_doc_tool and feishu_drive_tool in the expected builtin tool set. - tests/gateway/test_voice_command.py: missing-voice-deps message now suggests 'pip install PyNaCl' not 'hermes-agent[messaging]'. 411/411 pass locally across these 7 files.
2026-06-16 09:31:37 +00:00 · 2026-04-17 21:35:30 -07:00 · 2026-04-17 21:35:30 -07:00 · 598cba62ad
commit 598cba62ad
parent 5ff65dbf68
7 changed files with 94 additions and 73 deletions
--- a/tests/agent/test_subagent_progress.py
+++ b/tests/agent/test_subagent_progress.py
@ -79,7 +79,7 @@ class TestBuildChildProgressCallback:
        parent._delegate_spinner = None
        parent.tool_progress_callback = None
        
-        cb = _build_child_progress_callback(0, parent)
+        cb = _build_child_progress_callback(0, "test goal", parent)
        assert cb is None

    def test_cli_spinner_tool_event(self):
@ -93,7 +93,7 @@ class TestBuildChildProgressCallback:
        parent._delegate_spinner = spinner
        parent.tool_progress_callback = None
        
-        cb = _build_child_progress_callback(0, parent)
+        cb = _build_child_progress_callback(0, "test goal", parent)
        assert cb is not None
        
        cb("tool.started", "web_search", "quantum computing", {})
@ -113,7 +113,7 @@ class TestBuildChildProgressCallback:
        parent._delegate_spinner = spinner
        parent.tool_progress_callback = None
        
-        cb = _build_child_progress_callback(0, parent)
+        cb = _build_child_progress_callback(0, "test goal", parent)
        cb("_thinking", "I'll search for papers first")
        
        output = buf.getvalue()
@ -121,54 +121,64 @@ class TestBuildChildProgressCallback:
        assert "search for papers" in output

    def test_gateway_batched_progress(self):
-        """Gateway path should batch tool calls and flush at BATCH_SIZE."""
+        """Gateway path: each tool.started relays a subagent.tool event, and a
+        subagent.progress summary fires once BATCH_SIZE tools accumulate."""
        parent = MagicMock()
        parent._delegate_spinner = None
        parent_cb = MagicMock()
        parent.tool_progress_callback = parent_cb
-        
-        cb = _build_child_progress_callback(0, parent)
-        
-        # Send 4 tool calls — shouldn't flush yet (BATCH_SIZE = 5)
+
+        cb = _build_child_progress_callback(0, "test goal", parent)
+
+        # Each tool.started relays a subagent.tool event immediately (per-tool relay).
        for i in range(4):
            cb("tool.started", f"tool_{i}", f"arg_{i}", {})
-        parent_cb.assert_not_called()
-        
-        # 5th call should trigger flush
-        cb("tool.started", "tool_4", "arg_4", {})
-        parent_cb.assert_called_once()
-        call_args = parent_cb.call_args
-        assert "tool_0" in call_args[0][1]
-        assert "tool_4" in call_args[0][1]
+        # 4 per-tool relays so far, no batch summary yet (BATCH_SIZE=5)
+        events = [c.args[0] for c in parent_cb.call_args_list]
+        assert events == ["subagent.tool"] * 4

-    def test_thinking_not_relayed_to_gateway(self):
-        """Thinking events should NOT be sent to gateway (too noisy)."""
+        # 5th call triggers another per-tool relay PLUS the batch-size summary
+        cb("tool.started", "tool_4", "arg_4", {})
+        events = [c.args[0] for c in parent_cb.call_args_list]
+        assert events == ["subagent.tool"] * 5 + ["subagent.progress"]
+        summary_call = parent_cb.call_args_list[-1]
+        summary_text = summary_call.kwargs.get("preview") or summary_call.args[2]
+        assert "tool_0" in summary_text
+        assert "tool_4" in summary_text
+
+    def test_thinking_relayed_to_gateway(self):
+        """Thinking events are relayed as subagent.thinking events."""
        parent = MagicMock()
        parent._delegate_spinner = None
        parent_cb = MagicMock()
        parent.tool_progress_callback = parent_cb
-        
-        cb = _build_child_progress_callback(0, parent)
+
+        cb = _build_child_progress_callback(0, "test goal", parent)
        cb("_thinking", "some reasoning text")
-        
-        parent_cb.assert_not_called()
+
+        parent_cb.assert_called_once()
+        assert parent_cb.call_args.args[0] == "subagent.thinking"
+        assert parent_cb.call_args.args[2] == "some reasoning text"

    def test_parallel_callbacks_independent(self):
-        """Each child's callback should have independent batch state."""
+        """Each child's callback batches tool names independently."""
        parent = MagicMock()
        parent._delegate_spinner = None
        parent_cb = MagicMock()
        parent.tool_progress_callback = parent_cb
-        
-        cb0 = _build_child_progress_callback(0, parent)
-        cb1 = _build_child_progress_callback(1, parent)
-        
-        # Send 3 calls to each — neither should flush (batch size = 5)
+
+        cb0 = _build_child_progress_callback(0, "goal a", parent)
+        cb1 = _build_child_progress_callback(1, "goal b", parent)
+
+        # 3 tool.started per child = 6 per-tool relays; neither should hit
+        # the batch-size summary (batch size = 5, counted per-child).
        for i in range(3):
-            cb0(f"tool_{i}")
-            cb1(f"other_{i}")
-        
-        parent_cb.assert_not_called()
+            cb0("tool.started", f"tool_{i}", f"a_{i}", {})
+            cb1("tool.started", f"other_{i}", f"b_{i}", {})
+
+        events = [c.args[0] for c in parent_cb.call_args_list]
+        assert events.count("subagent.tool") == 6
+        assert "subagent.progress" not in events

    def test_task_index_prefix_in_batch_mode(self):
        """Batch mode (task_count > 1) should show 1-indexed prefix for all tasks."""
@ -182,7 +192,7 @@ class TestBuildChildProgressCallback:
        parent.tool_progress_callback = None
        
        # task_index=0 in a batch of 3 → prefix "[1]"
-        cb0 = _build_child_progress_callback(0, parent, task_count=3)
+        cb0 = _build_child_progress_callback(0, "test goal", parent, task_count=3)
        cb0("web_search", "test")
        output = buf.getvalue()
        assert "[1]" in output
@ -190,7 +200,7 @@ class TestBuildChildProgressCallback:
        # task_index=2 in a batch of 3 → prefix "[3]"
        buf.truncate(0)
        buf.seek(0)
-        cb2 = _build_child_progress_callback(2, parent, task_count=3)
+        cb2 = _build_child_progress_callback(2, "test goal", parent, task_count=3)
        cb2("web_search", "test")
        output = buf.getvalue()
        assert "[3]" in output
@ -206,7 +216,7 @@ class TestBuildChildProgressCallback:
        parent._delegate_spinner = spinner
        parent.tool_progress_callback = None
        
-        cb = _build_child_progress_callback(0, parent, task_count=1)
+        cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
        cb("tool.started", "web_search", "test", {})
        
        output = buf.getvalue()
@ -321,26 +331,31 @@ class TestBatchFlush:
    """Tests for gateway batch flush on subagent completion."""

    def test_flush_sends_remaining_batch(self):
-        """_flush should send remaining tool names to gateway."""
+        """_flush should send a final subagent.progress summary of any unsent
+        tool names in the batch (less than BATCH_SIZE)."""
        parent = MagicMock()
        parent._delegate_spinner = None
        parent_cb = MagicMock()
        parent.tool_progress_callback = parent_cb

-        cb = _build_child_progress_callback(0, parent)
+        cb = _build_child_progress_callback(0, "test goal", parent)

-        # Send 3 tools (below batch size of 5)
+        # Send 3 tools (below batch size of 5) — each relays subagent.tool
        cb("tool.started", "web_search", "query1", {})
        cb("tool.started", "read_file", "file.txt", {})
        cb("tool.started", "write_file", "out.txt", {})
-        parent_cb.assert_not_called()
+        events = [c.args[0] for c in parent_cb.call_args_list]
+        assert events == ["subagent.tool"] * 3  # per-tool relays so far
+        assert "subagent.progress" not in events  # no batch-size summary yet

-        # Flush should send the remaining 3
+        # Flush should send the remaining 3 as a summary
        cb._flush()
-        parent_cb.assert_called_once()
-        summary = parent_cb.call_args[0][1]
-        assert "web_search" in summary
-        assert "write_file" in summary
+        events = [c.args[0] for c in parent_cb.call_args_list]
+        assert events[-1] == "subagent.progress"
+        summary_call = parent_cb.call_args_list[-1]
+        summary_text = summary_call.kwargs.get("preview") or summary_call.args[2]
+        assert "web_search" in summary_text
+        assert "write_file" in summary_text

    def test_flush_noop_when_batch_empty(self):
        """_flush should not send anything when batch is empty."""
@ -349,7 +364,7 @@ class TestBatchFlush:
        parent_cb = MagicMock()
        parent.tool_progress_callback = parent_cb

-        cb = _build_child_progress_callback(0, parent)
+        cb = _build_child_progress_callback(0, "test goal", parent)
        cb._flush()
        parent_cb.assert_not_called()

@ -364,7 +379,7 @@ class TestBatchFlush:
        parent._delegate_spinner = spinner
        parent.tool_progress_callback = None

-        cb = _build_child_progress_callback(0, parent)
+        cb = _build_child_progress_callback(0, "test goal", parent)
        cb("tool.started", "web_search", "test", {})
        cb._flush()  # Should not crash

--- a/tests/cli/test_reasoning_command.py
+++ b/tests/cli/test_reasoning_command.py
@ -473,6 +473,7 @@ class TestInlineThinkBlockExtraction(unittest.TestCase):
        agent.verbose_logging = False
        agent.reasoning_callback = None
        agent.stream_delta_callback = None  # non-streaming by default
+        agent._stream_callback = None  # non-streaming by default
        return agent

    def test_single_think_block_extracted(self):
@ -619,6 +620,7 @@ class TestReasoningDeltasFiredFlag(unittest.TestCase):
        agent = AIAgent.__new__(AIAgent)
        agent.reasoning_callback = None
        agent.stream_delta_callback = None
+        agent._stream_callback = None
        agent.verbose_logging = False
        return agent

--- a/tests/gateway/test_voice_command.py
+++ b/tests/gateway/test_voice_command.py
@ -758,7 +758,7 @@ class TestVoiceChannelCommands:
        result = await runner._handle_voice_channel_join(event)

        assert "voice dependencies are missing" in result.lower()
-        assert "hermes-agent[messaging]" in result
+        assert "PyNaCl" in result

    # -- _handle_voice_channel_leave --

--- a/tests/hermes_cli/test_cmd_update.py
+++ b/tests/hermes_cli/test_cmd_update.py
@ -124,29 +124,23 @@ class TestCmdUpdateBranchFallback:
            if call.args and call.args[0][0] == "/usr/bin/npm"
        ]

+        # cmd_update runs npm commands in three locations:
+        #   1. repo root  — slash-command / TUI bridge deps
+        #   2. ui-tui/    — Ink TUI deps
+        #   3. web/       — install + "npm run build" for the web frontend
+        full_flags = [
+            "/usr/bin/npm",
+            "install",
+            "--silent",
+            "--no-fund",
+            "--no-audit",
+            "--progress=false",
+        ]
        assert npm_calls == [
-            (
-                [
-                    "/usr/bin/npm",
-                    "install",
-                    "--silent",
-                    "--no-fund",
-                    "--no-audit",
-                    "--progress=false",
-                ],
-                PROJECT_ROOT,
-            ),
-            (
-                [
-                    "/usr/bin/npm",
-                    "install",
-                    "--silent",
-                    "--no-fund",
-                    "--no-audit",
-                    "--progress=false",
-                ],
-                PROJECT_ROOT / "ui-tui",
-            ),
+            (full_flags, PROJECT_ROOT),
+            (full_flags, PROJECT_ROOT / "ui-tui"),
+            (["/usr/bin/npm", "install", "--silent"], PROJECT_ROOT / "web"),
+            (["/usr/bin/npm", "run", "build"], PROJECT_ROOT / "web"),
        ]

    def test_update_non_interactive_skips_migration_prompt(self, mock_args, capsys):
--- a/tests/hermes_cli/test_model_validation.py
+++ b/tests/hermes_cli/test_model_validation.py
@ -450,9 +450,9 @@ class TestValidateApiNotFound:
        assert result["recognized"] is True

    def test_dissimilar_model_shows_suggestions_not_autocorrect(self):
-        """Models too different for auto-correction still get suggestions."""
+        """Models too different for auto-correction are rejected with suggestions."""
        result = _validate("anthropic/claude-nonexistent")
-        assert result["accepted"] is True
+        assert result["accepted"] is False
        assert result.get("corrected_model") is None
        assert "not found" in result["message"]

@ -532,11 +532,11 @@ class TestValidateCodexAutoCorrection:
        assert result["message"] is None

    def test_very_different_name_falls_to_suggestions(self):
-        """Names too different for auto-correction get the suggestion list."""
+        """Names too different for auto-correction are rejected with a suggestion list."""
        codex_models = ["gpt-5.4-mini", "gpt-5.4", "gpt-5.3-codex"]
        with patch("hermes_cli.models.provider_model_ids", return_value=codex_models):
            result = validate_requested_model("totally-wrong", "openai-codex")
-        assert result["accepted"] is True
+        assert result["accepted"] is False
        assert result["recognized"] is False
        assert result.get("corrected_model") is None
        assert "not found" in result["message"]
--- a/tests/tools/test_delegate.py
+++ b/tests/tools/test_delegate.py
@ -274,6 +274,7 @@ class TestDelegateTask(unittest.TestCase):
                model=None,
                max_iterations=10,
                parent_agent=parent,
+                task_count=1,
            )

        self.assertIs(mock_child._print_fn, sink)
@ -294,6 +295,7 @@ class TestDelegateTask(unittest.TestCase):
                model=None,
                max_iterations=10,
                parent_agent=parent,
+                task_count=1,
            )

        self.assertTrue(callable(mock_child.thinking_callback))
@ -363,6 +365,7 @@ class TestToolNamePreservation(unittest.TestCase):
                    model=None,
                    max_iterations=10,
                    parent_agent=parent,
+                    task_count=1,
                )
            except NameError as exc:
                self.fail(
@ -1000,6 +1003,7 @@ class TestChildCredentialPoolResolution(unittest.TestCase):
                model=None,
                max_iterations=10,
                parent_agent=parent,
+                task_count=1,
            )

            self.assertEqual(mock_child._credential_pool, mock_pool)
@ -1225,6 +1229,7 @@ class TestDelegationReasoningEffort(unittest.TestCase):
        _build_child_agent(
            task_index=0, goal="test", context=None, toolsets=None,
            model=None, max_iterations=50, parent_agent=parent,
+            task_count=1,
        )
        call_kwargs = MockAgent.call_args[1]
        self.assertEqual(call_kwargs["reasoning_config"], {"enabled": True, "effort": "xhigh"})
@ -1241,6 +1246,7 @@ class TestDelegationReasoningEffort(unittest.TestCase):
        _build_child_agent(
            task_index=0, goal="test", context=None, toolsets=None,
            model=None, max_iterations=50, parent_agent=parent,
+            task_count=1,
        )
        call_kwargs = MockAgent.call_args[1]
        self.assertEqual(call_kwargs["reasoning_config"], {"enabled": True, "effort": "low"})
@ -1257,6 +1263,7 @@ class TestDelegationReasoningEffort(unittest.TestCase):
        _build_child_agent(
            task_index=0, goal="test", context=None, toolsets=None,
            model=None, max_iterations=50, parent_agent=parent,
+            task_count=1,
        )
        call_kwargs = MockAgent.call_args[1]
        self.assertEqual(call_kwargs["reasoning_config"], {"enabled": False})
@ -1273,6 +1280,7 @@ class TestDelegationReasoningEffort(unittest.TestCase):
        _build_child_agent(
            task_index=0, goal="test", context=None, toolsets=None,
            model=None, max_iterations=50, parent_agent=parent,
+            task_count=1,
        )
        call_kwargs = MockAgent.call_args[1]
        self.assertEqual(call_kwargs["reasoning_config"], {"enabled": True, "effort": "medium"})
--- a/tests/tools/test_registry.py
+++ b/tests/tools/test_registry.py
@ -296,6 +296,8 @@ class TestBuiltinDiscovery:
            "tools.code_execution_tool",
            "tools.cronjob_tools",
            "tools.delegate_tool",
+            "tools.feishu_doc_tool",
+            "tools.feishu_drive_tool",
            "tools.file_tools",
            "tools.homeassistant_tool",
            "tools.image_generation_tool",