fix(gateway): degrade gracefully when all platform adapters are missing

When connected_count == 0 AND enabled_platform_count > 0, the gateway
treated 'all adapters returned None' identically to 'all adapters
failed to connect' — both as fatal startup errors. The 'returned None'
case happens when imports fail silently or when adapters are present
in config but their dependencies aren't installed (e.g. discord.py
missing). Cron jobs and other gateway-runtime work would unnecessarily
fail to start.

Split: only return False when startup_retryable_errors is non-empty
(real connection attempt failed). When the list is empty AND enabled
> 0, log a warning and continue running, matching the 'no platforms
enabled' cron path.

Salvage of #22642's gateway slice. Drops the bundled run_agent.py
memory-nudge counter hydration block (issue #22357 territory) which
wasn't mentioned in the PR description.

Closes #5196.
This commit is contained in:
Wesley Simplicio 2026-05-09 14:56:03 -07:00 committed by Teknium
parent 116a1446a4
commit 246c676c2b
2 changed files with 65 additions and 10 deletions

View file

@ -339,6 +339,47 @@ async def test_start_gateway_replace_clears_marker_on_permission_denied(
assert not (tmp_path / ".gateway-takeover.json").exists()
@pytest.mark.asyncio
async def test_runner_degrades_gracefully_when_all_adapters_missing(monkeypatch, tmp_path, caplog):
"""When all enabled platforms have no adapter (missing library or credentials),
the gateway should NOT return failure it should warn and continue running for
cron job execution, matching the behaviour of 'no platforms enabled' (#5196).
In fleet deployments the same config.yaml is shared across nodes that may only
have credentials for a subset of platforms. Requiring perfect credentials on
every node makes fleet operation impossible."""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
config = GatewayConfig(
platforms={
Platform.TELEGRAM: PlatformConfig(enabled=True, token="***"),
Platform.DISCORD: PlatformConfig(enabled=True, token="***"),
},
sessions_dir=tmp_path / "sessions",
)
runner = GatewayRunner(config)
# Simulate _create_adapter returning None for ALL platforms (missing library /
# missing credentials — no connection attempt ever made).
monkeypatch.setattr(runner, "_create_adapter", lambda platform, cfg: None)
import logging
with caplog.at_level(logging.WARNING):
ok = await runner.start()
# Must NOT return False — gateway should keep running for cron.
assert ok is True
assert runner.should_exit_cleanly is False
assert runner.adapters == {}
# Runtime state must remain "running", not "startup_failed".
state = read_runtime_status()
assert state["gateway_state"] == "running"
# A warning must be emitted explaining why no platforms connected.
assert any(
"No adapter could be created" in record.message
for record in caplog.records
), "Expected degraded-mode warning when all adapters are missing"
def test_runner_warns_when_docker_gateway_lacks_explicit_output_mount(monkeypatch, tmp_path, caplog):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
monkeypatch.setenv("TERMINAL_ENV", "docker")