hermes-agent/tests/gateway/relay/test_relay_adapter.py
Ben c93b9f9057
Some checks are pending
CI / detect (push) Waiting to run
CI / tests (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
CI / typecheck (push) Blocked by required conditions
CI / docs-site (push) Blocked by required conditions
CI / history-check (push) Blocked by required conditions
CI / contributor-check (push) Blocked by required conditions
CI / uv-lockfile (push) Blocked by required conditions
CI / docker-lint (push) Blocked by required conditions
CI / supply-chain (push) Blocked by required conditions
CI / osv-scanner (push) Blocked by required conditions
CI / All required checks pass (push) Blocked by required conditions
Deploy Site / deploy-vercel (push) Waiting to run
Deploy Site / deploy-docs (push) Waiting to run
Docker Build and Publish / build-amd64 (push) Waiting to run
Docker Build and Publish / build-arm64 (push) Waiting to run
Docker Build and Publish / merge (push) Blocked by required conditions
feat(relay): terminal 4401 (opt-out) → clean "Relay disabled" state
Phase 7 Unit 7d-B. When an operator opts an instance OUT of the Team Gateway
relay (Unit 7b deprovision), the connector revokes the per-gateway secret and
closes the gateway's WS with 4401. The reconnect supervisor previously treated
EVERY close as retryable, so the live process spun "retrying 4401" forever and
the dashboard showed a red error — opt-out looked like a failure.

Now a 4401 close that arrives AFTER a successful handshake is recognized as a
terminal credential revocation:

- ws_transport.py: track `_handshake_succeeded` (set when a descriptor is
  received); on a 4401 close after a prior success, latch `auth_revoked` and do
  NOT spawn the reconnect supervisor. A 4401 BEFORE any successful handshake
  stays retryable (cold-start / not-yet-provisioned race, not a revocation).
  New `auth_revoked` property + a websockets-version-safe close-code reader
  (prefers `.rcvd`/`.sent` Close frames; `.code` is deprecated in websockets 13+).
- adapter.py: a revocation monitor turns `transport.auth_revoked` into a clean,
  NON-retryable `relay_disabled` fatal and notifies the gateway's fatal-error
  handler (so the adapter is removed and NOT queued for reconnection — the
  credential is dead until the instance is recreated). Monitor is cancelled on
  disconnect; only started when the transport exposes `auth_revoked` (prod WS).
- run.py: `_handle_adapter_fatal_error` maps the `relay_disabled` code to a
  `disabled` platform_state (not `fatal`/`retrying`).
- web: PlatformsCard renders the `disabled` state with a neutral outline badge,
  a PowerOff icon, and muted (not destructive-red) text + message. New optional
  `status.disabled` i18n string ("Disabled").

Also bundles the Phase 7 contract-doc update (this doc is authoritative in
hermes-agent): docs/relay-connector-contract.md gains an "Author-first
resolution + the account-link (DM) path" section documenting the
multi-tenant-guild rule (D-7.2 — route by authenticated author binding, never by
guild; unlinked → fail-closed), the `/link <code>` DM flow, and the
connector-authoritative opt-out + terminal-4401 behavior this PR implements.

Tests: +2 ws_transport (4401-after-handshake terminal / no-reconnect;
4401-before-handshake stays retryable) and +2 adapter (revocation → non-retryable
relay_disabled fatal + handler fired; no-revocation → no fatal). 138 relay tests
pass (incl. the contract-doc conformance test); ruff clean; web tsc clean.

Phase 7 Unit 7d-B (relay-adapter solo lane). Q17 → Option 2; Option 3 (live
de-register, no recreate) + the restart-re-provision hole deferred post-alpha.
2026-06-24 18:43:01 +10:00

200 lines
6.5 KiB
Python

"""RelayAdapter capability-advertisement tests (relay Phase 1, Task 1.1)."""
import pytest
from gateway.config import Platform, PlatformConfig
from gateway.relay.adapter import RelayAdapter
from gateway.relay.descriptor import CONTRACT_VERSION, CapabilityDescriptor
def make_desc(**kw) -> CapabilityDescriptor:
base = dict(
contract_version=CONTRACT_VERSION,
platform="telegram",
label="Telegram",
max_message_length=4096,
supports_draft_streaming=False,
supports_edit=True,
supports_threads=True,
markdown_dialect="markdown_v2",
len_unit="utf16",
emoji="\u2708\ufe0f",
platform_hint="",
pii_safe=False,
)
base.update(kw)
return CapabilityDescriptor(**base)
def _adapter(**desc_kw) -> RelayAdapter:
return RelayAdapter(PlatformConfig(), make_desc(**desc_kw))
def test_relay_platform_member_exists():
assert Platform("relay") is Platform.RELAY
def test_advertises_descriptor_max_length():
a = _adapter(max_message_length=2000)
assert a.MAX_MESSAGE_LENGTH == 2000
def test_supports_draft_streaming_follows_descriptor():
assert _adapter(supports_draft_streaming=False).supports_draft_streaming() is False
assert _adapter(supports_draft_streaming=True).supports_draft_streaming() is True
def test_len_fn_utf16_counts_code_units():
a = _adapter(len_unit="utf16")
# An astral-plane emoji is two UTF-16 code units.
assert a.message_len_fn("\U0001f600") == 2
def test_len_fn_chars_uses_builtin_len():
a = _adapter(len_unit="chars")
assert a.message_len_fn("\U0001f600") == 1
def test_is_a_base_platform_adapter():
# stream_consumer's isinstance(adapter, BasePlatformAdapter) guard must pass.
from gateway.platforms.base import BasePlatformAdapter
assert isinstance(_adapter(), BasePlatformAdapter)
@pytest.mark.asyncio
async def test_connect_without_transport_raises():
a = _adapter()
with pytest.raises(RuntimeError, match="no transport"):
await a.connect()
@pytest.mark.asyncio
async def test_send_without_transport_returns_failure():
a = _adapter()
result = await a.send("chat1", "hello")
assert result.success is False
assert result.error == "no transport"
class _CaptureTransport:
"""Minimal RelayTransport stand-in that records the outbound action."""
def __init__(self):
self.sent = None
def set_inbound_handler(self, h): # noqa: D401
self._h = h
async def send_outbound(self, action):
self.sent = action
return {"success": True, "message_id": "m1"}
def _make_event(chat_id="chan-1", guild_id="guild-9"):
from gateway.platforms.base import MessageEvent, MessageType
from gateway.session import SessionSource
src = SessionSource(
platform=Platform.RELAY,
chat_id=chat_id,
chat_type="channel",
guild_id=guild_id,
)
return MessageEvent(text="hi", source=src, message_type=MessageType.TEXT)
@pytest.mark.asyncio
async def test_send_reattaches_guild_id_from_inbound_scope():
"""The connector's egress guard resolves the owning tenant from
metadata.guild_id; the gateway's generic delivery path drops it, so the
relay adapter must re-attach the guild scope learned from the inbound event.
Regression for live 'discord egress declined: target not routed to an
onboarded tenant'."""
t = _CaptureTransport()
a = RelayAdapter(PlatformConfig(), make_desc(platform="discord"), transport=t)
# Simulate the connector delivering an inbound message in guild-9 / chan-1,
# but don't run the full handle_message pipeline — just the scope capture.
a._capture_scope(_make_event(chat_id="chan-1", guild_id="guild-9"))
await a.send("chan-1", "the reply")
assert t.sent["metadata"].get("guild_id") == "guild-9"
@pytest.mark.asyncio
async def test_send_without_known_scope_omits_guild_id():
"""A chat we never saw inbound (e.g. a DM) gets no guild_id — no-op, never
invents a scope."""
t = _CaptureTransport()
a = RelayAdapter(PlatformConfig(), make_desc(platform="discord"), transport=t)
await a.send("unknown-chat", "hi")
assert "guild_id" not in t.sent["metadata"]
@pytest.mark.asyncio
async def test_send_preserves_explicit_guild_id():
"""An explicitly-provided metadata.guild_id is never overwritten."""
t = _CaptureTransport()
a = RelayAdapter(PlatformConfig(), make_desc(platform="discord"), transport=t)
a._capture_scope(_make_event(chat_id="chan-1", guild_id="guild-9"))
await a.send("chan-1", "hi", metadata={"guild_id": "explicit-1"})
assert t.sent["metadata"]["guild_id"] == "explicit-1"
# ── Phase 7 Unit 7d-B: terminal auth revocation → clean "relay disabled" ─────
class _RevokedTransport:
"""Transport stand-in that reports a terminal auth revocation (the
production WebSocketRelayTransport latches this after a 4401 close that
follows a successful handshake)."""
def __init__(self):
self.auth_revoked = True
def set_inbound_handler(self, h): # noqa: D401
self._h = h
@pytest.mark.asyncio
async def test_revocation_marks_relay_disabled_non_retryable():
"""When the transport reports auth_revoked, the adapter surfaces a clean,
NON-retryable `relay_disabled` fatal and fires the fatal-error handler."""
a = RelayAdapter(PlatformConfig(), make_desc(platform="discord"), transport=_RevokedTransport())
notified = []
a.set_fatal_error_handler(lambda adapter: notified.append(adapter))
# Drive the monitor body directly (poll loop breaks immediately on the
# already-revoked transport).
await a._watch_for_revocation(poll_interval_s=0.01)
assert a.has_fatal_error is True
assert a.fatal_error_code == "relay_disabled"
assert a.fatal_error_retryable is False
assert "disabled" in (a.fatal_error_message or "").lower()
assert notified == [a]
@pytest.mark.asyncio
async def test_no_revocation_no_fatal():
"""A transport that has NOT been revoked never trips the disabled fatal."""
class _LiveTransport:
auth_revoked = False
def set_inbound_handler(self, h): # noqa: D401
self._h = h
a = RelayAdapter(PlatformConfig(), make_desc(platform="discord"), transport=_LiveTransport())
# Run the monitor with a tiny window then cancel — it should never fire.
import asyncio
task = asyncio.create_task(a._watch_for_revocation(poll_interval_s=0.01))
await asyncio.sleep(0.05)
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
assert a.has_fatal_error is False