fix(tests): fix 78 CI test failures and remove dead test (#9036)

Production fixes:
- voice_mode.py: add is_recording property to AudioRecorder (parity with TermuxAudioRecorder)
- cronjob_tools.py: add sms example to deliver description

Test fixes:
- test_real_interrupt_subagent: add missing _execution_thread_id (fixes 19 cascading failures from leaked _build_system_prompt patch)
- test_anthropic_error_handling: add _FakeMessages, override _interruptible_streaming_api_call (6 fixes)
- test_ctx_halving_fix: add missing request_overrides attribute (4 fixes)
- test_context_token_tracking: set _disable_streaming=True for non-streaming test path (4 fixes)
- test_dict_tool_call_args: set _disable_streaming=True (1 fix)
- test_provider_parity: add model='gpt-4o' for AIGateway tests to meet 64K minimum context (4 fixes)
- test_session_race_guard: add user_id to SessionSource (5 fixes)
- test_restart_drain/helpers: add user_id to SessionSource (2 fixes)
- test_telegram_photo_interrupts: add user_id to SessionSource
- test_interrupt: target thread_id for per-thread interrupt system (2 fixes)
- test_zombie_process_cleanup: rewrite with object.__new__ for refactored GatewayRunner.stop() (1 fix)
- test_browser_camofox_state: update config version 15->17 (1 fix)
- test_trajectory_compressor_async: widen lookback window 10->20 for line-shifted AsyncOpenAI (1 fix)
- test_voice_mode: fixed by production is_recording addition (5 fixes)
- test_voice_cli_integration: add _attached_images to CLI stub (2 fixes)
- test_hermes_logging: explicit propagation/level reset for cross-test pollution defense (1 fix)
- test_run_agent: add base_url for OpenRouter detection tests (2 fixes)

Deleted:
- test_inline_think_blocks_reasoning_only_accepted: tested unimplemented inline <think> handling
This commit is contained in:
Teknium 2026-04-13 10:50:24 -07:00 committed by GitHub
parent b909a9efef
commit 0dd26c9495
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 92 additions and 50 deletions

View file

@ -35,6 +35,7 @@ def make_restart_source(chat_id: str = "123456", chat_type: str = "dm") -> Sessi
platform=Platform.TELEGRAM,
chat_id=chat_id,
chat_type=chat_type,
user_id="u1",
)

View file

@ -60,7 +60,8 @@ def _make_runner():
def _make_event(text="hello", chat_id="12345"):
source = SessionSource(
platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm"
platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm",
user_id="u1",
)
return MessageEvent(text=text, message_type=MessageType.TEXT, source=source)
@ -192,7 +193,8 @@ async def test_command_messages_do_not_leave_sentinel():
_handle_message. They must NOT leave a sentinel behind."""
runner = _make_runner()
source = SessionSource(
platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm"
platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm",
user_id="u1",
)
event = MessageEvent(
text="/help", message_type=MessageType.TEXT, source=source
@ -268,7 +270,7 @@ async def test_stop_hard_kills_running_agent():
forever showing 'writing...' but never producing output."""
runner = _make_runner()
session_key = build_session_key(
SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm")
SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm", user_id="u1")
)
# Simulate a running (possibly hung) agent
@ -301,7 +303,7 @@ async def test_stop_clears_pending_messages():
queued during the run must be discarded."""
runner = _make_runner()
session_key = build_session_key(
SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm")
SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm", user_id="u1")
)
fake_agent = MagicMock()

View file

@ -29,7 +29,7 @@ def _make_runner():
@pytest.mark.asyncio
async def test_handle_message_does_not_priority_interrupt_photo_followup():
runner = _make_runner()
source = SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm")
source = SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm", user_id="u1")
session_key = build_session_key(source)
running_agent = MagicMock()
runner._running_agents[session_key] = running_agent

View file

@ -102,7 +102,19 @@ class _PromptTooLongError(Exception):
self.status_code = 400
class _FakeMessages:
"""Stub for client.messages.create() / client.messages.stream()."""
def create(self, **kwargs):
raise NotImplementedError("_FakeAnthropicClient.messages.create should not be called directly in tests")
def stream(self, **kwargs):
raise NotImplementedError("_FakeAnthropicClient.messages.stream should not be called directly in tests")
class _FakeAnthropicClient:
def __init__(self):
self.messages = _FakeMessages()
def close(self):
pass
@ -131,13 +143,14 @@ def _make_agent_cls(error_cls, recover_after=None):
def run_conversation(self, user_message, conversation_history=None, task_id=None):
calls = {"n": 0}
def _fake_api_call(api_kwargs):
def _fake_api_call(api_kwargs, **kw):
calls["n"] += 1
if recover_after is not None and calls["n"] > recover_after:
return _anthropic_response("Recovered")
raise error_cls()
self._interruptible_api_call = _fake_api_call
self._interruptible_streaming_api_call = _fake_api_call
return super().run_conversation(
user_message, conversation_history=conversation_history, task_id=task_id
)
@ -352,10 +365,11 @@ def test_401_refresh_fails_is_non_retryable(monkeypatch):
return False # Simulate failed credential refresh
def run_conversation(self, user_message, conversation_history=None, task_id=None):
def _fake_api_call(api_kwargs):
def _fake_api_call(api_kwargs, **kw):
raise _UnauthorizedError()
self._interruptible_api_call = _fake_api_call
self._interruptible_streaming_api_call = _fake_api_call
return super().run_conversation(
user_message, conversation_history=conversation_history, task_id=task_id
)
@ -436,13 +450,14 @@ def test_prompt_too_long_triggers_compression(monkeypatch):
def run_conversation(self, user_message, conversation_history=None, task_id=None):
calls = {"n": 0}
def _fake_api_call(api_kwargs):
def _fake_api_call(api_kwargs, **kw):
calls["n"] += 1
if calls["n"] == 1:
raise _PromptTooLongError()
return _anthropic_response("Compressed and recovered")
self._interruptible_api_call = _fake_api_call
self._interruptible_streaming_api_call = _fake_api_call
return super().run_conversation(
user_message, conversation_history=conversation_history, task_id=task_id
)

View file

@ -56,6 +56,7 @@ def _make_agent(monkeypatch, api_mode, provider, response_fn):
def run_conversation(self, msg, conversation_history=None, task_id=None):
self._interruptible_api_call = lambda kw: response_fn()
self._disable_streaming = True
return super().run_conversation(msg, conversation_history=conversation_history, task_id=task_id)
return _A(model="test-model", api_key="test-key", provider=provider, api_mode=api_mode)

View file

@ -66,6 +66,7 @@ def test_tool_call_validation_accepts_dict_arguments(monkeypatch):
quiet_mode=True,
skip_memory=True,
)
agent._disable_streaming = True
result = agent.run_conversation("read the file")

View file

@ -44,11 +44,11 @@ class _FakeOpenAI:
pass
def _make_agent(monkeypatch, provider, api_mode="chat_completions", base_url="https://openrouter.ai/api/v1"):
def _make_agent(monkeypatch, provider, api_mode="chat_completions", base_url="https://openrouter.ai/api/v1", model=None):
monkeypatch.setattr("run_agent.get_tool_definitions", lambda **kw: _tool_defs("web_search", "terminal"))
monkeypatch.setattr("run_agent.check_toolset_requirements", lambda: {})
monkeypatch.setattr("run_agent.OpenAI", _FakeOpenAI)
return AIAgent(
kwargs = dict(
api_key="test-key",
base_url=base_url,
provider=provider,
@ -58,6 +58,9 @@ def _make_agent(monkeypatch, provider, api_mode="chat_completions", base_url="ht
skip_context_files=True,
skip_memory=True,
)
if model:
kwargs["model"] = model
return AIAgent(**kwargs)
# ── _build_api_kwargs tests ─────────────────────────────────────────────────
@ -247,7 +250,7 @@ class TestBuildApiKwargsChatCompletionsServiceTier:
class TestBuildApiKwargsAIGateway:
def test_uses_chat_completions_format(self, monkeypatch):
agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1")
agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1", model="gpt-4o")
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert "messages" in kwargs
@ -255,7 +258,7 @@ class TestBuildApiKwargsAIGateway:
assert kwargs["messages"][-1]["content"] == "hi"
def test_no_responses_api_fields(self, monkeypatch):
agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1")
agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1", model="gpt-4o")
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert "input" not in kwargs
@ -263,7 +266,7 @@ class TestBuildApiKwargsAIGateway:
assert "store" not in kwargs
def test_includes_reasoning_in_extra_body(self, monkeypatch):
agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1")
agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1", model="gpt-4o")
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
extra = kwargs.get("extra_body", {})
@ -271,7 +274,7 @@ class TestBuildApiKwargsAIGateway:
assert extra["reasoning"]["enabled"] is True
def test_includes_tools(self, monkeypatch):
agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1")
agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1", model="gpt-4o")
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert "tools" in kwargs

View file

@ -76,7 +76,8 @@ class TestRealSubagentInterrupt(unittest.TestCase):
parent._delegate_spinner = None
parent.tool_progress_callback = None
parent.iteration_budget = IterationBudget(max_total=100)
parent._client_kwargs = {"api_key": "test", "base_url": "http://localhost:1"}
parent._client_kwargs = {"api_key": "***", "base_url": "http://localhost:1"}
parent._execution_thread_id = None
from tools.delegate_tool import _run_single_child

View file

@ -880,6 +880,7 @@ class TestBuildApiKwargs:
assert kwargs["extra_body"]["reasoning"] == {"enabled": False}
def test_reasoning_not_sent_for_unsupported_openrouter_model(self, agent):
agent.base_url = "https://openrouter.ai/api/v1"
agent.model = "minimax/minimax-m2.5"
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
@ -1575,6 +1576,7 @@ class TestHandleMaxIterations:
assert "API down" in result
def test_summary_skips_reasoning_for_unsupported_openrouter_model(self, agent):
agent.base_url = "https://openrouter.ai/api/v1"
agent.model = "minimax/minimax-m2.5"
resp = _mock_response(content="Summary")
agent.client.chat.completions.create.return_value = resp
@ -1705,27 +1707,6 @@ class TestRunConversation:
assert result["completed"] is True
assert result["api_calls"] == 2
def test_inline_think_blocks_reasoning_only_accepted(self, agent):
"""Inline <think> reasoning-only responses accepted with (empty) content, no retries."""
self._setup_agent(agent)
empty_resp = _mock_response(
content="<think>internal reasoning</think>",
finish_reason="stop",
)
agent.client.chat.completions.create.side_effect = [empty_resp]
with (
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
result = agent.run_conversation("answer me")
assert result["completed"] is True
assert result["final_response"] == "(empty)"
assert result["api_calls"] == 1 # no retries
# Reasoning should be preserved in the assistant message
assistant_msgs = [m for m in result["messages"] if m.get("role") == "assistant"]
assert any(m.get("reasoning") for m in assistant_msgs)
def test_reasoning_only_local_resumed_no_compression_triggered(self, agent):
"""Reasoning-only responses no longer trigger compression — prefill then accepted."""
self._setup_agent(agent)

View file

@ -179,6 +179,7 @@ class TestEphemeralMaxOutputTokens:
return_value=[{"role": "user", "content": "hi"}]
)
agent._anthropic_preserve_dots = MagicMock(return_value=False)
agent.request_overrides = {}
return agent
def test_ephemeral_override_is_used_on_first_call(self):
@ -253,6 +254,7 @@ class TestContextNotHalvedOnOutputCapError:
)
agent._anthropic_preserve_dots = MagicMock(return_value=False)
agent._vprint = MagicMock()
agent.request_overrides = {}
return agent
def test_output_cap_error_sets_ephemeral_not_context_length(self):

View file

@ -298,8 +298,17 @@ class TestGatewayMode:
"""agent.log (catch-all) still receives gateway AND tool records."""
hermes_logging.setup_logging(hermes_home=hermes_home, mode="gateway")
logging.getLogger("gateway.run").info("gateway msg")
logging.getLogger("tools.file_tools").info("file msg")
gw_logger = logging.getLogger("gateway.run")
file_logger = logging.getLogger("tools.file_tools")
# Ensure propagation and levels are clean (cross-test pollution defense)
gw_logger.propagate = True
file_logger.propagate = True
logging.getLogger("tools").propagate = True
file_logger.setLevel(logging.NOTSET)
logging.getLogger("tools").setLevel(logging.NOTSET)
gw_logger.info("gateway msg")
file_logger.info("file msg")
for h in logging.getLogger().handlers:
h.flush()

View file

@ -103,7 +103,7 @@ class TestSourceLineVerification:
if "self.async_client = AsyncOpenAI(" in line and "_get_async_client" not in lines[max(0,i-3):i+1]:
# Allow it inside _get_async_client method
# Check if we're inside _get_async_client by looking at context
context = "\n".join(lines[max(0,i-10):i+1])
context = "\n".join(lines[max(0,i-20):i+1])
if "_get_async_client" not in context:
pytest.fail(
f"Line {i}: AsyncOpenAI created eagerly outside _get_async_client()"

View file

@ -64,4 +64,4 @@ class TestCamofoxConfigDefaults:
# The current schema version is tracked globally; unrelated default
# options may bump it after browser defaults are added.
assert DEFAULT_CONFIG["_config_version"] == 15
assert DEFAULT_CONFIG["_config_version"] == 17

View file

@ -28,7 +28,7 @@ class TestInterruptModule:
assert not is_interrupted()
def test_thread_safety(self):
"""Set from one thread, check from another."""
"""Set from one thread targeting another thread's ident."""
from tools.interrupt import set_interrupt, is_interrupted
set_interrupt(False)
@ -45,11 +45,12 @@ class TestInterruptModule:
time.sleep(0.05)
assert not seen["value"]
set_interrupt(True)
# Target the checker thread's ident so it sees the interrupt
set_interrupt(True, thread_id=t.ident)
t.join(timeout=1)
assert seen["value"]
set_interrupt(False)
set_interrupt(False, thread_id=t.ident)
# ---------------------------------------------------------------------------
@ -189,10 +190,10 @@ class TestSIGKILLEscalation:
t.start()
time.sleep(0.5)
set_interrupt(True)
set_interrupt(True, thread_id=t.ident)
t.join(timeout=5)
set_interrupt(False)
set_interrupt(False, thread_id=t.ident)
assert result_holder["value"] is not None
assert result_holder["value"]["returncode"] == 130

View file

@ -32,6 +32,7 @@ def _make_voice_cli(**overrides):
cli._voice_tts_done.set()
cli._pending_input = queue.Queue()
cli._app = None
cli._attached_images = []
cli.console = SimpleNamespace(width=80)
for k, v in overrides.items():
setattr(cli, k, v)

View file

@ -190,17 +190,38 @@ class TestGatewayCleanupWiring:
def test_gateway_stop_calls_close(self):
"""gateway stop() should call close() on all running agents."""
import asyncio
from unittest.mock import MagicMock, patch
import threading
from unittest.mock import AsyncMock, MagicMock, patch
runner = MagicMock()
from gateway.run import GatewayRunner
runner = object.__new__(GatewayRunner)
runner._running = True
runner._running_agents = {}
runner._running_agents_ts = {}
runner.adapters = {}
runner._background_tasks = set()
runner._pending_messages = {}
runner._pending_approvals = {}
runner._pending_model_notes = {}
runner._shutdown_event = asyncio.Event()
runner._exit_reason = None
runner._exit_code = None
runner._stop_task = None
runner._draining = False
runner._restart_requested = False
runner._restart_task_started = False
runner._restart_detached = False
runner._restart_via_service = False
runner._restart_drain_timeout = 5.0
runner._voice_mode = {}
runner._session_model_overrides = {}
runner._update_prompt_pending = {}
runner._busy_input_mode = "interrupt"
runner._agent_cache = {}
runner._agent_cache_lock = threading.Lock()
runner._shutdown_all_gateway_honcho = lambda: None
runner._update_runtime_status = MagicMock()
mock_agent_1 = MagicMock()
mock_agent_2 = MagicMock()
@ -209,8 +230,6 @@ class TestGatewayCleanupWiring:
"session-2": mock_agent_2,
}
from gateway.run import GatewayRunner
loop = asyncio.new_event_loop()
try:
with patch("gateway.status.remove_pid_file"), \

View file

@ -465,7 +465,7 @@ Important safety rule: cron-run sessions should not recursively schedule more cr
},
"deliver": {
"type": "string",
"description": "Omit this parameter to auto-deliver back to the current chat and topic (recommended). Auto-detection preserves thread/topic context. Only set explicitly when the user asks to deliver somewhere OTHER than the current conversation. Values: 'origin' (same as omitting), 'local' (no delivery, save only), or platform:chat_id:thread_id for a specific destination. Examples: 'telegram:-1001234567890:17585', 'discord:#engineering'. WARNING: 'platform:chat_id' without :thread_id loses topic targeting."
"description": "Omit this parameter to auto-deliver back to the current chat and topic (recommended). Auto-detection preserves thread/topic context. Only set explicitly when the user asks to deliver somewhere OTHER than the current conversation. Values: 'origin' (same as omitting), 'local' (no delivery, save only), or platform:chat_id:thread_id for a specific destination. Examples: 'telegram:-1001234567890:17585', 'discord:#engineering', 'sms:+15551234567'. WARNING: 'platform:chat_id' without :thread_id loses topic targeting."
},
"skills": {
"type": "array",

View file

@ -429,6 +429,11 @@ class AudioRecorder:
"""Current audio input RMS level (0-32767). Updated each audio chunk."""
return self._current_rms
@property
def is_recording(self) -> bool:
"""Whether audio recording is currently active."""
return self._recording
# -- public methods ------------------------------------------------------
def _ensure_stream(self) -> None: