feat(gateway): register relay adapter from config; drop HERMES_GATEWAY_RELAY gate

Wire the relay adapter into gateway startup and make activation config-driven
instead of a dark-launch flag.

- gateway/relay/__init__.py: replace relay_enabled()/HERMES_GATEWAY_RELAY with
  relay_url() (GATEWAY_RELAY_URL env or gateway.relay_url in config.yaml) — the
  same shape as gateway.proxy_url. register_relay_adapter() registers when a URL
  is configured and builds a live WebSocketRelayTransport; with no URL it's a
  no-op (direct/single-tenant deployments unaffected). force=True keeps the
  transport-less adapter for unit tests. relay_platform_identity() reads the
  hello platform/botId from GATEWAY_RELAY_PLATFORM/GATEWAY_RELAY_BOT_ID.
- gateway/run.py: call register_relay_adapter() during GatewayRunner.start(),
  right after plugin discovery, so a configured connector relay is registered
  on every boot. Failures are logged, never block startup.

This removes the dark-launch posture: the relay is on whenever it's configured,
shipping the production end state rather than hiding it behind a flag.
This commit is contained in:
Ben 2026-06-10 20:28:39 +10:00 committed by Teknium
parent 6b03874d07
commit 237fa7d29c
2 changed files with 76 additions and 30 deletions

View file

@ -2,62 +2,88 @@
EXPERIMENTAL. This package implements the gateway side of the "Gateway Gateway"
relay design: a generic ``RelayAdapter`` plus the wire-serializable
``CapabilityDescriptor`` the connector hands it at handshake time. The public
API (module names, descriptor field set, transport protocol) MAY CHANGE without
a deprecation cycle until at least two real Class-1 platforms (Discord +
Telegram) have shaken out the schema.
``CapabilityDescriptor`` the connector hands it at handshake time, and the
production ``WebSocketRelayTransport`` that dials the connector. The public API
(module names, descriptor field set, transport protocol) MAY CHANGE without a
deprecation cycle until at least two real Class-1 platforms (Discord + Telegram)
have shaken out the schema.
See ``docs/relay-connector-contract.md`` for the formal cross-repo interface.
Registration is OFF by default: ``register_relay_adapter()`` only registers the
``relay`` platform when the relay feature flag is enabled, so existing
single-tenant/direct deployments are completely unaffected (dark-launch posture).
Activation is driven by configuration, not a separate feature flag: the relay
platform is registered when a connector relay URL is configured
(``GATEWAY_RELAY_URL`` env or ``gateway.relay_url`` in config.yaml). Deployments
that don't set it are unaffected — exactly the same shape as ``gateway.proxy_url``.
"""
from __future__ import annotations
import os
from typing import Optional
def relay_enabled() -> bool:
"""Whether the relay adapter should be registered.
def relay_url() -> Optional[str]:
"""The connector relay endpoint URL, or None when relay is not configured.
Off by default. Enabled when ``HERMES_GATEWAY_RELAY=1`` (or true/yes/on).
A config-file gate can be layered on later; the env flag is the minimal
dark-launch switch so default deployments never register the adapter.
Checks ``GATEWAY_RELAY_URL`` (convenient for Docker) first, then
``gateway.relay_url`` in config.yaml. A non-empty value activates the relay
platform; absence means a normal direct/single-tenant gateway.
"""
return os.environ.get("HERMES_GATEWAY_RELAY", "").strip().lower() in (
"1",
"true",
"yes",
"on",
)
url = os.environ.get("GATEWAY_RELAY_URL", "").strip()
if url:
return url.rstrip("/")
try:
from gateway.run import _load_gateway_config # late import to avoid cycle
cfg = _load_gateway_config()
url = (cfg.get("gateway") or {}).get("relay_url", "").strip()
if url:
return url.rstrip("/")
except Exception: # noqa: BLE001 - config absence/parse must never crash registration
pass
return None
def register_relay_adapter(force: bool = False) -> bool:
def relay_platform_identity() -> tuple[str, str]:
"""Platform + bot id this gateway fronts over the relay (for the handshake hello).
Defaults to ``("relay", "")``; overridable via ``GATEWAY_RELAY_PLATFORM`` /
``GATEWAY_RELAY_BOT_ID`` so one connector can front several platforms.
"""
platform = os.environ.get("GATEWAY_RELAY_PLATFORM", "relay").strip() or "relay"
bot_id = os.environ.get("GATEWAY_RELAY_BOT_ID", "").strip()
return platform, bot_id
def register_relay_adapter(force: bool = False, url: Optional[str] = None) -> bool:
"""Register the generic ``relay`` platform via the platform registry.
No-op unless the relay flag is set (or ``force=True`` for tests). Returns
True if registration happened. Additive: uses the same registry path as
plugin adapters, so no core dispatch changes are needed.
Registers when a relay URL is configured (or ``force=True`` for tests, which
builds a transport-less adapter the unit-test posture). Returns True if
registration happened. Additive: uses the same registry path as plugin
adapters, so no core dispatch changes are needed.
The factory builds a transport-less ``RelayAdapter`` with a placeholder
descriptor; the real ``CapabilityDescriptor`` is negotiated at handshake
time via the transport's ``handshake()``. (Wiring the live transport +
handshake into ``GatewayRunner`` is later-phase work; this task only proves
the adapter is constructible through the registry behind the flag.)
When a URL is present the factory builds a live ``WebSocketRelayTransport``;
the ``RelayAdapter`` negotiates the real ``CapabilityDescriptor`` at
``connect()`` time via ``transport.handshake()``.
"""
if not (force or relay_enabled()):
resolved_url = url if url is not None else relay_url()
if not (force or resolved_url):
return False
from gateway.platform_registry import PlatformEntry, platform_registry
from gateway.relay.adapter import RelayAdapter
from gateway.relay.descriptor import CONTRACT_VERSION, CapabilityDescriptor
platform, bot_id = relay_platform_identity()
def _factory(config):
# Placeholder descriptor; replaced by the negotiated one at connect time
# when a transport is present. With no URL (force/test) the adapter is
# transport-less and keeps the placeholder.
placeholder = CapabilityDescriptor(
contract_version=CONTRACT_VERSION,
platform="relay",
platform=platform,
label="Relay",
max_message_length=4096,
supports_draft_streaming=False,
@ -66,7 +92,12 @@ def register_relay_adapter(force: bool = False) -> bool:
markdown_dialect="plain",
len_unit="chars",
)
return RelayAdapter(config, placeholder)
transport = None
if resolved_url:
from gateway.relay.ws_transport import WebSocketRelayTransport
transport = WebSocketRelayTransport(resolved_url, platform, bot_id)
return RelayAdapter(config, placeholder, transport=transport)
platform_registry.register(
PlatformEntry(

View file

@ -5110,6 +5110,21 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
"plugin discovery failed at gateway startup", exc_info=True,
)
# Register the generic relay adapter when a connector relay URL is
# configured (GATEWAY_RELAY_URL / gateway.relay_url). No URL -> no-op, so
# direct/single-tenant deployments are unaffected. When configured, the
# adapter dials the connector over a WebSocket, negotiates its capability
# descriptor at handshake, and bridges inbound/outbound like any platform.
try:
from gateway.relay import register_relay_adapter, relay_url
if register_relay_adapter():
logger.info("relay adapter registered (connector at %s)", relay_url())
except Exception:
logger.warning(
"relay adapter registration failed at gateway startup", exc_info=True,
)
# Register declarative shell hooks from cli-config.yaml. Gateway
# has no TTY, so consent has to come from one of the three opt-in
# channels (--accept-hooks on launch, HERMES_ACCEPT_HOOKS env var,