mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-22 05:22:09 +00:00
fix(gateway): keep running when platforms fail; add per-platform circuit breaker + /platform (#26600)
Stop the gateway from exiting (or systemd-restart-looping) when a single
messaging adapter fails at startup or runtime. A misconfigured WhatsApp
(npm install timeout, unpaired bridge, missing creds.json) used to take
the entire gateway down, killing cron jobs and any other connected
platforms with it.
Changes:
• Startup (gateway/run.py): when connected_count==0 but the only
errors are retryable, log a degraded-state warning and keep the
gateway alive instead of returning False. Reconnect watcher then
recovers platforms as their underlying problem clears.
• Runtime (gateway/run.py _handle_adapter_fatal_error): when the last
adapter goes down with a retryable error and is queued for
reconnection, stay alive instead of exit-with-failure. Previously
this triggered systemd Restart=on-failure, which created infinite
restart loops on persistent retryable failures (proxy outage,
repeated bridge crashes).
• Reconnect watcher (gateway/run.py _platform_reconnect_watcher):
replace the 20-attempt hard drop with a circuit-breaker pause.
After _PAUSE_AFTER_FAILURES (10) consecutive retryable failures, the
platform stays in _failed_platforms with paused=True so the watcher
skips it but the operator can still see and resume it. Non-retryable
errors still drop out of the queue immediately. Resolves #17063
(gateway giving up on Telegram after 20 attempts).
• WhatsApp preflight (gateway/platforms/whatsapp.py): refuse to start
the Node bridge when creds.json is missing. Sets a non-retryable
whatsapp_not_paired fatal error so the watcher drops it cleanly
with a single 'run hermes whatsapp' log line instead of paying the
30s bridge bootstrap timeout on every gateway start.
• WhatsApp setup ordering (hermes_cli/main.py cmd_whatsapp): only set
WHATSAPP_ENABLED=true once pairing actually succeeds. Previously
the wizard wrote the env var at step 2 (before npm install and QR
pairing), so any Ctrl+C left .env claiming WhatsApp was ready when
the bridge had no creds.json. Also propagate the env var when the
user keeps an existing pairing on a re-run.
• /platform slash command (hermes_cli/commands.py + gateway/run.py):
new gateway-only command for manual circuit-breaker control.
/platform list — show connected + failed/paused platforms
/platform pause <name> — silence a known-broken platform
/platform resume <name> — re-queue a paused platform
Tests:
• New: pause/resume helpers, /platform list|pause|resume command,
WhatsApp creds.json preflight, WhatsApp setup ordering.
• Updated: stale assertions that codified the old 'exit and let
systemd restart' behavior in test_runner_fatal_adapter.py,
test_runner_startup_failures.py, and test_platform_reconnect.py
(the 20-attempt give-up test became a circuit-breaker pause test).
5488 tests pass in tests/gateway/.
This commit is contained in:
parent
3b9368a0c4
commit
518f39557b
9 changed files with 745 additions and 62 deletions
|
|
@ -294,15 +294,63 @@ class TestPlatformReconnectWatcher:
|
|||
assert runner._failed_platforms[Platform.TELEGRAM]["attempts"] == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_reconnect_gives_up_after_max_attempts(self):
|
||||
"""After max attempts, platform should be removed from retry queue."""
|
||||
async def test_reconnect_pauses_after_circuit_breaker_threshold(self):
|
||||
"""After enough consecutive retryable failures, the watcher should
|
||||
*pause* the platform (keep it in the queue but stop hammering it),
|
||||
not drop it. The user resumes via /platform resume.
|
||||
"""
|
||||
runner = _make_runner()
|
||||
|
||||
platform_config = PlatformConfig(enabled=True, token="test")
|
||||
# 9 prior attempts — the next failure will be the 10th and should
|
||||
# trip the circuit breaker.
|
||||
runner._failed_platforms[Platform.TELEGRAM] = {
|
||||
"config": platform_config,
|
||||
"attempts": 9,
|
||||
"next_retry": time.monotonic() - 1,
|
||||
}
|
||||
|
||||
fail_adapter = StubAdapter(
|
||||
succeed=False, fatal_error="DNS failure", fatal_retryable=True
|
||||
)
|
||||
real_sleep = asyncio.sleep
|
||||
|
||||
with patch.object(runner, "_create_adapter", return_value=fail_adapter):
|
||||
async def run_one_iteration():
|
||||
runner._running = True
|
||||
call_count = 0
|
||||
|
||||
async def fake_sleep(n):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
if call_count > 1:
|
||||
runner._running = False
|
||||
await real_sleep(0)
|
||||
|
||||
with patch("asyncio.sleep", side_effect=fake_sleep):
|
||||
await runner._platform_reconnect_watcher()
|
||||
|
||||
await run_one_iteration()
|
||||
|
||||
# Platform stays in queue — paused, not dropped
|
||||
assert Platform.TELEGRAM in runner._failed_platforms
|
||||
info = runner._failed_platforms[Platform.TELEGRAM]
|
||||
assert info["paused"] is True
|
||||
assert info["attempts"] == 10
|
||||
assert "pause_reason" in info
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_reconnect_skips_paused_platforms(self):
|
||||
"""A paused platform should not be retried by the watcher tick."""
|
||||
runner = _make_runner()
|
||||
|
||||
platform_config = PlatformConfig(enabled=True, token="test")
|
||||
runner._failed_platforms[Platform.TELEGRAM] = {
|
||||
"config": platform_config,
|
||||
"attempts": 20, # At max
|
||||
"next_retry": time.monotonic() - 1,
|
||||
"attempts": 10,
|
||||
"next_retry": time.monotonic() - 1, # would normally retry now
|
||||
"paused": True,
|
||||
"pause_reason": "paused via /platform pause",
|
||||
}
|
||||
|
||||
real_sleep = asyncio.sleep
|
||||
|
|
@ -324,8 +372,10 @@ class TestPlatformReconnectWatcher:
|
|||
|
||||
await run_one_iteration()
|
||||
|
||||
assert Platform.TELEGRAM not in runner._failed_platforms
|
||||
mock_create.assert_not_called() # Should give up without trying
|
||||
# Paused platform stays queued and was never touched
|
||||
assert Platform.TELEGRAM in runner._failed_platforms
|
||||
assert runner._failed_platforms[Platform.TELEGRAM]["paused"] is True
|
||||
mock_create.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_reconnect_skips_when_not_time_yet(self):
|
||||
|
|
@ -459,11 +509,12 @@ class TestRuntimeDisconnectQueuing:
|
|||
assert Platform.TELEGRAM not in runner._failed_platforms
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_retryable_error_exits_for_service_restart_when_all_down(self):
|
||||
"""Gateway should exit with failure when all platforms fail with retryable errors.
|
||||
|
||||
This lets systemd Restart=on-failure restart the process, which is more
|
||||
reliable than in-process background reconnection after exhausted retries.
|
||||
async def test_retryable_error_keeps_gateway_alive_when_all_down(self):
|
||||
"""When all adapters fail at runtime with retryable errors, the
|
||||
gateway should stay alive and let the reconnect watcher recover them
|
||||
in the background. (Previously this exited-with-failure to trigger
|
||||
a systemd restart — that converted transient outages into infinite
|
||||
restart loops and killed in-process state.)
|
||||
"""
|
||||
runner = _make_runner()
|
||||
runner.stop = AsyncMock()
|
||||
|
|
@ -474,9 +525,9 @@ class TestRuntimeDisconnectQueuing:
|
|||
|
||||
await runner._handle_adapter_fatal_error(adapter)
|
||||
|
||||
# stop() SHOULD be called — gateway exits for systemd restart
|
||||
runner.stop.assert_called_once()
|
||||
assert runner._exit_with_failure is True
|
||||
# stop() should NOT be called — gateway stays alive for the watcher
|
||||
runner.stop.assert_not_called()
|
||||
assert runner._exit_with_failure is False
|
||||
assert Platform.TELEGRAM in runner._failed_platforms
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
|
@ -512,3 +563,154 @@ class TestRuntimeDisconnectQueuing:
|
|||
await runner._handle_adapter_fatal_error(adapter)
|
||||
|
||||
runner.stop.assert_called_once()
|
||||
|
||||
|
||||
# --- Pause / resume circuit breaker ---
|
||||
|
||||
|
||||
class TestPauseResume:
|
||||
"""Test the per-platform pause/resume helpers and slash command."""
|
||||
|
||||
def test_pause_marks_platform_paused(self):
|
||||
runner = _make_runner()
|
||||
runner._failed_platforms[Platform.TELEGRAM] = {
|
||||
"config": PlatformConfig(enabled=True, token="t"),
|
||||
"attempts": 3,
|
||||
"next_retry": time.monotonic() + 30,
|
||||
}
|
||||
runner._pause_failed_platform(Platform.TELEGRAM, reason="manual")
|
||||
info = runner._failed_platforms[Platform.TELEGRAM]
|
||||
assert info["paused"] is True
|
||||
assert info["pause_reason"] == "manual"
|
||||
assert info["next_retry"] == float("inf")
|
||||
|
||||
def test_pause_is_idempotent(self):
|
||||
runner = _make_runner()
|
||||
runner._failed_platforms[Platform.TELEGRAM] = {
|
||||
"config": PlatformConfig(enabled=True, token="t"),
|
||||
"attempts": 3,
|
||||
"next_retry": time.monotonic() + 30,
|
||||
"paused": True,
|
||||
"pause_reason": "first reason",
|
||||
}
|
||||
runner._pause_failed_platform(Platform.TELEGRAM, reason="second reason")
|
||||
# Reason should not be overwritten on a second pause call.
|
||||
assert (
|
||||
runner._failed_platforms[Platform.TELEGRAM]["pause_reason"]
|
||||
== "first reason"
|
||||
)
|
||||
|
||||
def test_pause_no_op_when_platform_not_queued(self):
|
||||
runner = _make_runner()
|
||||
# No exception even when the platform isn't in _failed_platforms.
|
||||
runner._pause_failed_platform(Platform.TELEGRAM, reason="x")
|
||||
assert Platform.TELEGRAM not in runner._failed_platforms
|
||||
|
||||
def test_resume_clears_paused_and_resets_attempts(self):
|
||||
runner = _make_runner()
|
||||
runner._failed_platforms[Platform.TELEGRAM] = {
|
||||
"config": PlatformConfig(enabled=True, token="t"),
|
||||
"attempts": 10,
|
||||
"next_retry": float("inf"),
|
||||
"paused": True,
|
||||
"pause_reason": "auto-paused",
|
||||
}
|
||||
assert runner._resume_paused_platform(Platform.TELEGRAM) is True
|
||||
info = runner._failed_platforms[Platform.TELEGRAM]
|
||||
assert info["paused"] is False
|
||||
assert info["attempts"] == 0
|
||||
assert info["next_retry"] != float("inf")
|
||||
assert "pause_reason" not in info
|
||||
|
||||
def test_resume_returns_false_when_not_paused(self):
|
||||
runner = _make_runner()
|
||||
runner._failed_platforms[Platform.TELEGRAM] = {
|
||||
"config": PlatformConfig(enabled=True, token="t"),
|
||||
"attempts": 1,
|
||||
"next_retry": time.monotonic() + 30,
|
||||
}
|
||||
assert runner._resume_paused_platform(Platform.TELEGRAM) is False
|
||||
|
||||
def test_resume_returns_false_when_not_queued(self):
|
||||
runner = _make_runner()
|
||||
assert runner._resume_paused_platform(Platform.TELEGRAM) is False
|
||||
|
||||
|
||||
class TestPlatformSlashCommand:
|
||||
"""Test the /platform list|pause|resume slash command handler."""
|
||||
|
||||
def _make_event(self, content: str):
|
||||
ev = MagicMock()
|
||||
ev.content = content
|
||||
return ev
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_shows_connected_and_paused(self):
|
||||
runner = _make_runner()
|
||||
runner.adapters[Platform.DISCORD] = StubAdapter(platform=Platform.DISCORD)
|
||||
runner._failed_platforms[Platform.WHATSAPP] = {
|
||||
"config": PlatformConfig(enabled=True, token="t"),
|
||||
"attempts": 10,
|
||||
"next_retry": float("inf"),
|
||||
"paused": True,
|
||||
"pause_reason": "not paired",
|
||||
}
|
||||
out = await runner._handle_platform_command(self._make_event("/platform list"))
|
||||
assert "discord" in out
|
||||
assert "whatsapp" in out
|
||||
assert "PAUSED" in out
|
||||
assert "not paired" in out
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_pause_command_pauses_queued_platform(self):
|
||||
runner = _make_runner()
|
||||
runner._failed_platforms[Platform.WHATSAPP] = {
|
||||
"config": PlatformConfig(enabled=True, token="t"),
|
||||
"attempts": 2,
|
||||
"next_retry": time.monotonic() + 30,
|
||||
}
|
||||
out = await runner._handle_platform_command(
|
||||
self._make_event("/platform pause whatsapp")
|
||||
)
|
||||
assert "paused" in out.lower()
|
||||
assert runner._failed_platforms[Platform.WHATSAPP]["paused"] is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_pause_rejects_unqueued_platform(self):
|
||||
runner = _make_runner()
|
||||
out = await runner._handle_platform_command(
|
||||
self._make_event("/platform pause whatsapp")
|
||||
)
|
||||
assert "not in the retry queue" in out
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_resume_command_resumes_paused_platform(self):
|
||||
runner = _make_runner()
|
||||
runner._failed_platforms[Platform.WHATSAPP] = {
|
||||
"config": PlatformConfig(enabled=True, token="t"),
|
||||
"attempts": 10,
|
||||
"next_retry": float("inf"),
|
||||
"paused": True,
|
||||
"pause_reason": "x",
|
||||
}
|
||||
out = await runner._handle_platform_command(
|
||||
self._make_event("/platform resume whatsapp")
|
||||
)
|
||||
assert "resumed" in out.lower()
|
||||
assert runner._failed_platforms[Platform.WHATSAPP]["paused"] is False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unknown_platform_name(self):
|
||||
runner = _make_runner()
|
||||
out = await runner._handle_platform_command(
|
||||
self._make_event("/platform pause notarealplatform")
|
||||
)
|
||||
assert "Unknown platform" in out
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_bare_platform_shows_usage_with_list(self):
|
||||
# An empty /platform call defaults to "list".
|
||||
runner = _make_runner()
|
||||
out = await runner._handle_platform_command(self._make_event("/platform"))
|
||||
assert "Gateway platforms" in out
|
||||
|
||||
|
|
|
|||
|
|
@ -68,7 +68,11 @@ async def test_runner_requests_clean_exit_for_nonretryable_startup_conflict(monk
|
|||
@pytest.mark.asyncio
|
||||
async def test_runner_queues_retryable_runtime_fatal_for_reconnection(monkeypatch, tmp_path):
|
||||
"""Retryable runtime fatal errors queue the platform for reconnection
|
||||
instead of shutting down the gateway."""
|
||||
AND keep the gateway alive — the background reconnect watcher recovers
|
||||
the platform when the underlying issue clears. (Previously this
|
||||
exited-with-failure to trigger a systemd restart; that converted
|
||||
transient failures into infinite restart loops.)
|
||||
"""
|
||||
config = GatewayConfig(
|
||||
platforms={
|
||||
Platform.WHATSAPP: PlatformConfig(enabled=True, token="token")
|
||||
|
|
@ -89,8 +93,8 @@ async def test_runner_queues_retryable_runtime_fatal_for_reconnection(monkeypatc
|
|||
|
||||
await runner._handle_adapter_fatal_error(adapter)
|
||||
|
||||
# Should shut down with failure — systemd Restart=on-failure will restart
|
||||
runner.stop.assert_awaited_once()
|
||||
assert runner._exit_with_failure is True
|
||||
# Gateway stays alive — watcher will retry in background
|
||||
runner.stop.assert_not_awaited()
|
||||
assert runner._exit_with_failure is False
|
||||
assert Platform.WHATSAPP in runner._failed_platforms
|
||||
assert runner._failed_platforms[Platform.WHATSAPP]["attempts"] == 0
|
||||
|
|
|
|||
|
|
@ -64,7 +64,14 @@ class _SuccessfulAdapter(BasePlatformAdapter):
|
|||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_runner_returns_failure_for_retryable_startup_errors(monkeypatch, tmp_path):
|
||||
async def test_runner_stays_alive_for_retryable_startup_errors(monkeypatch, tmp_path):
|
||||
"""Retryable startup errors should leave the gateway running in
|
||||
degraded mode so the reconnect watcher can recover the platform when
|
||||
the underlying problem clears. Previously this returned False from
|
||||
``start()`` and exited the process, which converted a single broken
|
||||
platform (e.g. unpaired WhatsApp, DNS blip on Telegram) into a
|
||||
systemd restart loop and killed cron jobs in the meantime.
|
||||
"""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
config = GatewayConfig(
|
||||
platforms={
|
||||
|
|
@ -78,11 +85,13 @@ async def test_runner_returns_failure_for_retryable_startup_errors(monkeypatch,
|
|||
|
||||
ok = await runner.start()
|
||||
|
||||
assert ok is False
|
||||
# Gateway stays alive in degraded mode; reconnect watcher takes over.
|
||||
assert ok is True
|
||||
assert runner.should_exit_cleanly is False
|
||||
state = read_runtime_status()
|
||||
assert state["gateway_state"] == "startup_failed"
|
||||
assert "temporary DNS resolution failure" in state["exit_reason"]
|
||||
assert state["gateway_state"] in {"degraded", "running"}
|
||||
# Telegram was queued for retry, not given up on.
|
||||
assert Platform.TELEGRAM in runner._failed_platforms
|
||||
assert state["platforms"]["telegram"]["state"] == "retrying"
|
||||
assert state["platforms"]["telegram"]["error_code"] == "telegram_connect_error"
|
||||
|
||||
|
|
|
|||
|
|
@ -611,3 +611,93 @@ class TestHttpSessionLifecycle:
|
|||
|
||||
mock_task.cancel.assert_not_called()
|
||||
assert adapter._poll_task is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pre-flight: refuse to start the bridge when creds.json is missing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestNoCredsPreflight:
|
||||
"""Verify ``connect()`` fast-fails as non-retryable when WhatsApp is
|
||||
enabled but the user never finished pairing (no ``creds.json``).
|
||||
|
||||
Without this guard, every gateway boot:
|
||||
• spawned the bridge subprocess (npm install if needed)
|
||||
• waited 30s for status:connected (never happens without creds)
|
||||
• queued WhatsApp for indefinite retries that would just repeat
|
||||
With the guard, ``connect()`` returns False immediately with a
|
||||
non-retryable fatal error so the reconnect watcher drops the platform
|
||||
and the gateway gets a single clear log line telling the user to run
|
||||
``hermes whatsapp``.
|
||||
"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_connect_returns_false_when_no_creds(self, tmp_path):
|
||||
from gateway.platforms.whatsapp import WhatsAppAdapter
|
||||
|
||||
adapter = WhatsAppAdapter.__new__(WhatsAppAdapter)
|
||||
adapter.platform = Platform.WHATSAPP
|
||||
adapter.config = MagicMock()
|
||||
adapter._bridge_port = 19876
|
||||
# Point bridge_script at a real existing file so the earlier
|
||||
# bridge-missing check doesn't trip — we want to exercise the
|
||||
# creds.json check specifically.
|
||||
bridge = tmp_path / "bridge.js"
|
||||
bridge.write_text("// stub")
|
||||
adapter._bridge_script = str(bridge)
|
||||
adapter._session_path = tmp_path / "session" # no creds.json inside
|
||||
adapter._session_path.mkdir()
|
||||
adapter._bridge_log_fh = None
|
||||
adapter._fatal_error_code = None
|
||||
adapter._fatal_error_message = None
|
||||
adapter._fatal_error_retryable = True
|
||||
|
||||
with patch(
|
||||
"gateway.platforms.whatsapp.check_whatsapp_requirements",
|
||||
return_value=True,
|
||||
):
|
||||
result = await adapter.connect()
|
||||
|
||||
assert result is False
|
||||
# Non-retryable so the reconnect watcher drops it cleanly
|
||||
assert adapter._fatal_error_code == "whatsapp_not_paired"
|
||||
assert adapter._fatal_error_retryable is False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_connect_proceeds_when_creds_present(self, tmp_path):
|
||||
"""When creds.json exists, the preflight check is bypassed and
|
||||
connect() proceeds to the bridge bootstrap path. We don't fully
|
||||
simulate the bridge here — we just verify no fast-fail occurs.
|
||||
"""
|
||||
from gateway.platforms.whatsapp import WhatsAppAdapter
|
||||
|
||||
adapter = WhatsAppAdapter.__new__(WhatsAppAdapter)
|
||||
adapter.platform = Platform.WHATSAPP
|
||||
adapter.config = MagicMock()
|
||||
adapter._bridge_port = 19877
|
||||
bridge = tmp_path / "bridge.js"
|
||||
bridge.write_text("// stub")
|
||||
adapter._bridge_script = str(bridge)
|
||||
session_dir = tmp_path / "session"
|
||||
session_dir.mkdir()
|
||||
(session_dir / "creds.json").write_text("{}")
|
||||
adapter._session_path = session_dir
|
||||
adapter._bridge_log_fh = None
|
||||
adapter._fatal_error_code = None
|
||||
adapter._fatal_error_message = None
|
||||
adapter._fatal_error_retryable = True
|
||||
# Stub _acquire_platform_lock to return False so connect() exits
|
||||
# cleanly *after* the preflight, without spawning subprocesses.
|
||||
adapter._acquire_platform_lock = MagicMock(return_value=False)
|
||||
|
||||
with patch(
|
||||
"gateway.platforms.whatsapp.check_whatsapp_requirements",
|
||||
return_value=True,
|
||||
):
|
||||
result = await adapter.connect()
|
||||
|
||||
# Preflight passed — exits because we faked lock acquisition,
|
||||
# but the fatal-error code is NOT the "not paired" one.
|
||||
assert result is False
|
||||
assert adapter._fatal_error_code != "whatsapp_not_paired"
|
||||
|
|
|
|||
140
tests/hermes_cli/test_whatsapp_setup_ordering.py
Normal file
140
tests/hermes_cli/test_whatsapp_setup_ordering.py
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
"""Regression tests for ``cmd_whatsapp`` env-var write ordering.
|
||||
|
||||
Before the fix, ``hermes whatsapp`` wrote ``WHATSAPP_ENABLED=true`` at
|
||||
step 2 — before npm install (step 4) and before QR pairing (step 6).
|
||||
If the user Ctrl+C'd at any later step, ``.env`` claimed WhatsApp was
|
||||
ready when the bridge still had no ``creds.json``. Every subsequent
|
||||
``hermes gateway`` then paid a 30s bridge-bootstrap timeout and queued
|
||||
WhatsApp for indefinite retries — looking like "the gateway is broken."
|
||||
|
||||
The fix: only set ``WHATSAPP_ENABLED=true`` once pairing actually
|
||||
succeeds (creds.json exists). Aborted setup leaves no enabled state.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import os
|
||||
from contextlib import redirect_stdout
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def isolated_home(tmp_path, monkeypatch):
|
||||
home = tmp_path / "home"
|
||||
hermes = home / ".hermes"
|
||||
hermes.mkdir(parents=True)
|
||||
monkeypatch.setattr(Path, "home", lambda: home)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes))
|
||||
# Ensure get_env_value cache doesn't carry stale state.
|
||||
for key in list(os.environ):
|
||||
if key.startswith("WHATSAPP_"):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
return hermes
|
||||
|
||||
|
||||
def _env_value(hermes_home: Path, key: str) -> str | None:
|
||||
env_file = hermes_home / ".env"
|
||||
if not env_file.exists():
|
||||
return None
|
||||
for line in env_file.read_text().splitlines():
|
||||
if "=" not in line:
|
||||
continue
|
||||
k, _, v = line.partition("=")
|
||||
if k.strip() == key:
|
||||
return v.strip().strip('"').strip("'")
|
||||
return None
|
||||
|
||||
|
||||
def test_aborted_setup_does_not_enable_whatsapp(isolated_home, monkeypatch):
|
||||
"""User picks mode 1, then Ctrl+C's at the allowed-users prompt.
|
||||
|
||||
WHATSAPP_ENABLED must NOT be present in .env after abort.
|
||||
"""
|
||||
from hermes_cli.main import cmd_whatsapp
|
||||
|
||||
# First input() = mode choice, second input() = allowed-users prompt
|
||||
# We raise KeyboardInterrupt on the second call to simulate abort.
|
||||
inputs = iter(["1"])
|
||||
|
||||
def fake_input(_prompt=""):
|
||||
try:
|
||||
return next(inputs)
|
||||
except StopIteration:
|
||||
raise KeyboardInterrupt
|
||||
|
||||
monkeypatch.setattr("builtins.input", fake_input)
|
||||
# _require_tty calls sys.stdin.isatty — make it pass.
|
||||
monkeypatch.setattr("hermes_cli.main._require_tty", lambda *_a, **_kw: None)
|
||||
# No node, no bridge script — we shouldn't reach those steps anyway.
|
||||
|
||||
buf = io.StringIO()
|
||||
with redirect_stdout(buf):
|
||||
try:
|
||||
cmd_whatsapp(MagicMock())
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
|
||||
assert _env_value(isolated_home, "WHATSAPP_ENABLED") is None, (
|
||||
"Setup aborted before pairing — WHATSAPP_ENABLED must not be set. "
|
||||
f"Got .env: {(isolated_home / '.env').read_text() if (isolated_home / '.env').exists() else '(missing)'}"
|
||||
)
|
||||
|
||||
|
||||
def test_existing_pairing_skip_branch_enables_whatsapp(isolated_home, monkeypatch):
|
||||
"""User runs ``hermes whatsapp`` with an existing paired session and
|
||||
chooses "no, keep my session" at the re-pair prompt. The env var
|
||||
should be (re-)written to true so the gateway picks WhatsApp back up,
|
||||
even if the var was lost since the original pairing.
|
||||
"""
|
||||
from hermes_cli.main import cmd_whatsapp
|
||||
|
||||
# Pre-create a paired session WITHOUT WHATSAPP_ENABLED in .env.
|
||||
session = isolated_home / "whatsapp" / "session"
|
||||
session.mkdir(parents=True)
|
||||
(session / "creds.json").write_text("{}")
|
||||
monkeypatch.setenv("WHATSAPP_MODE", "bot")
|
||||
monkeypatch.setenv("WHATSAPP_ALLOWED_USERS", "15551234567")
|
||||
|
||||
# mode already set → skip mode prompt; users already set → skip update
|
||||
# prompt with "no"; pairing exists → "no, keep session" → return.
|
||||
inputs = iter(["n", "n"])
|
||||
|
||||
def fake_input(_prompt=""):
|
||||
try:
|
||||
return next(inputs)
|
||||
except StopIteration:
|
||||
return "n"
|
||||
|
||||
monkeypatch.setattr("builtins.input", fake_input)
|
||||
monkeypatch.setattr("hermes_cli.main._require_tty", lambda *_a, **_kw: None)
|
||||
# Skip the bridge npm install — we're testing setup-ordering, not bridge
|
||||
# bootstrapping. Pretend node_modules exists (Path.exists -> True for that
|
||||
# specific check is hard to scope, so instead pretend npm install would
|
||||
# succeed silently if reached).
|
||||
monkeypatch.setattr(
|
||||
"subprocess.run",
|
||||
lambda *_a, **_kw: MagicMock(returncode=0, stderr=""),
|
||||
)
|
||||
monkeypatch.setattr("shutil.which", lambda _name: "/usr/bin/npm")
|
||||
# Patch (bridge_dir / "node_modules").exists() by stubbing Path.exists
|
||||
# to True for that one specific subpath. Easier: pre-create it as a
|
||||
# symlink to /tmp. But we can't write to the repo. Instead, stub
|
||||
# Path.exists wholesale to True for node_modules; the creds.json check
|
||||
# in the same function still works because we wrote it ourselves.
|
||||
_orig_exists = Path.exists
|
||||
def _stub_exists(self):
|
||||
if self.name == "node_modules":
|
||||
return True
|
||||
return _orig_exists(self)
|
||||
monkeypatch.setattr(Path, "exists", _stub_exists)
|
||||
|
||||
buf = io.StringIO()
|
||||
with redirect_stdout(buf):
|
||||
cmd_whatsapp(MagicMock())
|
||||
|
||||
# The skip-rebar branch should have set the env var on its way out.
|
||||
assert _env_value(isolated_home, "WHATSAPP_ENABLED") == "true"
|
||||
Loading…
Add table
Add a link
Reference in a new issue