diff --git a/gateway/run.py b/gateway/run.py index af144fe94e5..1ef57034fba 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -3533,16 +3533,30 @@ class GatewayRunner: self._request_clean_exit(reason) return True if enabled_platform_count > 0: - reason = "; ".join(startup_retryable_errors) or "all configured messaging platforms failed to connect" - logger.error("Gateway failed to connect any configured messaging platform: %s", reason) - try: - from gateway.status import write_runtime_status - write_runtime_status(gateway_state="startup_failed", exit_reason=reason) - except Exception: - pass - return False - logger.warning("No messaging platforms enabled.") - logger.info("Gateway will continue running for cron job execution.") + if startup_retryable_errors: + # At least one platform attempted a connection and failed — + # this is a real startup error that should block the gateway. + reason = "; ".join(startup_retryable_errors) + logger.error("Gateway failed to connect any configured messaging platform: %s", reason) + try: + from gateway.status import write_runtime_status + write_runtime_status(gateway_state="startup_failed", exit_reason=reason) + except Exception: + pass + return False + # All enabled platforms had no adapter (missing library or credentials). + # In fleet deployments the same config.yaml is shared across nodes that + # may only have credentials for a subset of platforms. Rather than + # failing hard, degrade gracefully and allow cron jobs to run (#5196). + logger.warning( + "No adapter could be created for any of the %d configured platform(s). " + "Check that required dependencies are installed and credentials are set. " + "Gateway will continue for cron job execution.", + enabled_platform_count, + ) + else: + logger.warning("No messaging platforms enabled.") + logger.info("Gateway will continue running for cron job execution.") # Update delivery router with adapters self.delivery_router.adapters = self.adapters diff --git a/tests/gateway/test_runner_startup_failures.py b/tests/gateway/test_runner_startup_failures.py index d94e466ec3e..fc5c775a779 100644 --- a/tests/gateway/test_runner_startup_failures.py +++ b/tests/gateway/test_runner_startup_failures.py @@ -339,6 +339,47 @@ async def test_start_gateway_replace_clears_marker_on_permission_denied( assert not (tmp_path / ".gateway-takeover.json").exists() +@pytest.mark.asyncio +async def test_runner_degrades_gracefully_when_all_adapters_missing(monkeypatch, tmp_path, caplog): + """When all enabled platforms have no adapter (missing library or credentials), + the gateway should NOT return failure — it should warn and continue running for + cron job execution, matching the behaviour of 'no platforms enabled' (#5196). + + In fleet deployments the same config.yaml is shared across nodes that may only + have credentials for a subset of platforms. Requiring perfect credentials on + every node makes fleet operation impossible.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + config = GatewayConfig( + platforms={ + Platform.TELEGRAM: PlatformConfig(enabled=True, token="***"), + Platform.DISCORD: PlatformConfig(enabled=True, token="***"), + }, + sessions_dir=tmp_path / "sessions", + ) + runner = GatewayRunner(config) + + # Simulate _create_adapter returning None for ALL platforms (missing library / + # missing credentials — no connection attempt ever made). + monkeypatch.setattr(runner, "_create_adapter", lambda platform, cfg: None) + + import logging + with caplog.at_level(logging.WARNING): + ok = await runner.start() + + # Must NOT return False — gateway should keep running for cron. + assert ok is True + assert runner.should_exit_cleanly is False + assert runner.adapters == {} + # Runtime state must remain "running", not "startup_failed". + state = read_runtime_status() + assert state["gateway_state"] == "running" + # A warning must be emitted explaining why no platforms connected. + assert any( + "No adapter could be created" in record.message + for record in caplog.records + ), "Expected degraded-mode warning when all adapters are missing" + + def test_runner_warns_when_docker_gateway_lacks_explicit_output_mount(monkeypatch, tmp_path, caplog): monkeypatch.setenv("HERMES_HOME", str(tmp_path)) monkeypatch.setenv("TERMINAL_ENV", "docker")