mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-27 11:22:03 +00:00
feat(relay): forward a stable instance id at self-provision (Phase 6 Unit α) (#50772)
Add relay_instance_id() (env GATEWAY_RELAY_INSTANCE_ID first, then gateway.relay_instance_id in config.yaml, mirroring the other relay readers) and forward it in the /relay/provision body so the connector can bind gatewayId -> instanceId and route inbound per-instance once Phase 6 delivery lands. The value is gateway-asserted but safely scoped: the org/tenant stays NAS-token-verified at the connector, so a dishonest gateway can only bind its OWN tenant's instance — same posture as relay_endpoint(). instanceId is only added to the body when present, so omitting it lets the connector store null (back-compat: self-hosted / pre-Phase-6 gateways simply have no binding yet). For a managed (NAS-hosted) agent the id is NAS's AgentInstance.id, stamped into the container env beside GATEWAY_RELAY_URL. Tests: reader (env/config/absent), self_provision_relay forwards the id (set + absent), and the real _post_provision body includes instanceId ONLY when set. Refs: ~/nous/specs/gateway-gateway plan.md Phase 6 Unit α; decisions.md Q11.
This commit is contained in:
parent
065946d84f
commit
75a70d98f3
2 changed files with 130 additions and 1 deletions
|
|
@ -131,6 +131,33 @@ def relay_route_keys() -> list[str]:
|
|||
return [k.strip() for k in raw.split(",") if k.strip()]
|
||||
|
||||
|
||||
def relay_instance_id() -> Optional[str]:
|
||||
"""Stable per-instance id this gateway forwards at provision (Phase 6 Unit α).
|
||||
|
||||
Binds the connector's ``gatewayId -> instanceId`` so the connector can route
|
||||
inbound per-instance (not tenant-broadcast) once Phase 6 delivery lands. The
|
||||
value is the NAS ``AgentInstance.id`` for a managed agent (NAS stamps
|
||||
``GATEWAY_RELAY_INSTANCE_ID`` into the container env, beside
|
||||
``GATEWAY_RELAY_URL``); a self-hosted operator may set it explicitly. It is
|
||||
gateway-asserted but safely scoped: the org/tenant stays token-verified, so a
|
||||
dishonest gateway can only bind ITS OWN tenant's instance — the same posture
|
||||
as ``relay_endpoint()``. Absent -> the connector stores null and per-instance
|
||||
routing simply has no binding for this connection yet (back-compat).
|
||||
|
||||
Env first (Docker/NAS), then ``gateway.relay_instance_id`` in config.yaml.
|
||||
"""
|
||||
value = os.environ.get("GATEWAY_RELAY_INSTANCE_ID", "").strip()
|
||||
if not value:
|
||||
try:
|
||||
from gateway.run import _load_gateway_config # late import to avoid cycle
|
||||
|
||||
cfg = (_load_gateway_config().get("gateway") or {})
|
||||
value = str(cfg.get("relay_instance_id", "") or "").strip()
|
||||
except Exception: # noqa: BLE001 - config absence/parse must never crash boot
|
||||
value = ""
|
||||
return value or None
|
||||
|
||||
|
||||
def _provision_url(relay_dial_url: str) -> str:
|
||||
"""Map the ``ws(s)://…/relay`` dial URL to the ``http(s)://…/relay/provision`` POST URL."""
|
||||
raw = relay_dial_url.rstrip("/")
|
||||
|
|
@ -152,6 +179,7 @@ def _post_provision(
|
|||
bot_id: str,
|
||||
gateway_endpoint: Optional[str],
|
||||
route_keys: list[str],
|
||||
instance_id: Optional[str] = None,
|
||||
timeout: float = 15.0,
|
||||
) -> dict:
|
||||
"""POST to the connector's ``/relay/provision`` and return the JSON body.
|
||||
|
|
@ -173,6 +201,10 @@ def _post_provision(
|
|||
"gatewayEndpoint": gateway_endpoint or "",
|
||||
"routeKeys": route_keys,
|
||||
}
|
||||
# Only send instanceId when we actually have one — omitting it lets the
|
||||
# connector store null (back-compat) rather than binding an empty string.
|
||||
if instance_id:
|
||||
body["instanceId"] = instance_id
|
||||
data = json.dumps(body).encode("utf-8")
|
||||
req = urllib.request.Request(
|
||||
provision_url,
|
||||
|
|
@ -277,6 +309,7 @@ def self_provision_relay() -> bool:
|
|||
gateway_id = os.environ.get("GATEWAY_RELAY_ID", "").strip() or f"gw-{host or 'hermes'}"
|
||||
endpoint = relay_endpoint()
|
||||
route_keys = relay_route_keys()
|
||||
instance_id = relay_instance_id()
|
||||
|
||||
try:
|
||||
result = _post_provision(
|
||||
|
|
@ -287,6 +320,7 @@ def self_provision_relay() -> bool:
|
|||
bot_id=bot_id,
|
||||
gateway_endpoint=endpoint,
|
||||
route_keys=route_keys,
|
||||
instance_id=instance_id,
|
||||
)
|
||||
except RuntimeError as exc:
|
||||
logger.warning("relay self-provision failed (%s); gateway will boot without relay auth", exc)
|
||||
|
|
@ -302,11 +336,12 @@ def self_provision_relay() -> bool:
|
|||
os.environ["GATEWAY_RELAY_DELIVERY_KEY"] = str(result.get("deliveryKey") or "")
|
||||
tenant = str(result.get("tenant") or "")
|
||||
logger.info(
|
||||
"relay self-provisioned (gateway_id=%s tenant=%s routes=%d inbound=%s)",
|
||||
"relay self-provisioned (gateway_id=%s tenant=%s routes=%d inbound=%s instance=%s)",
|
||||
os.environ["GATEWAY_RELAY_ID"],
|
||||
tenant or "?",
|
||||
len(route_keys),
|
||||
"yes" if endpoint else "outbound-only",
|
||||
instance_id or "unbound",
|
||||
)
|
||||
return True
|
||||
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ def _clean_env(monkeypatch):
|
|||
"GATEWAY_RELAY_ROUTE_KEYS",
|
||||
"GATEWAY_RELAY_PLATFORM",
|
||||
"GATEWAY_RELAY_BOT_ID",
|
||||
"GATEWAY_RELAY_INSTANCE_ID",
|
||||
):
|
||||
monkeypatch.delenv(k, raising=False)
|
||||
# Never read config.yaml off disk in these tests.
|
||||
|
|
@ -83,6 +84,24 @@ def test_relay_route_keys_empty():
|
|||
assert relay.relay_route_keys() == []
|
||||
|
||||
|
||||
def test_relay_instance_id_from_env(monkeypatch):
|
||||
monkeypatch.setenv("GATEWAY_RELAY_INSTANCE_ID", " inst-abc ")
|
||||
assert relay.relay_instance_id() == "inst-abc"
|
||||
|
||||
|
||||
def test_relay_instance_id_absent_is_none():
|
||||
assert relay.relay_instance_id() is None
|
||||
|
||||
|
||||
def test_relay_instance_id_from_config(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"gateway.run._load_gateway_config",
|
||||
lambda: {"gateway": {"relay_instance_id": "inst-from-config"}},
|
||||
raising=False,
|
||||
)
|
||||
assert relay.relay_instance_id() == "inst-from-config"
|
||||
|
||||
|
||||
def test_provision_url_maps_ws_to_http():
|
||||
assert relay._provision_url("wss://c.example/relay") == "https://c.example/relay/provision"
|
||||
assert relay._provision_url("ws://c.example/relay") == "http://c.example/relay/provision"
|
||||
|
|
@ -161,6 +180,81 @@ def test_outbound_only_when_no_endpoint(monkeypatch):
|
|||
assert relay.relay_connection_auth()[1] == "a" * 64
|
||||
|
||||
|
||||
# ─────────────────── instance-id forwarding (Phase 6 Unit α) ───────────────────
|
||||
|
||||
def test_forwards_instance_id_to_provision(monkeypatch):
|
||||
"""A managed agent stamped with GATEWAY_RELAY_INSTANCE_ID forwards it to the
|
||||
connector so it can bind gatewayId -> instanceId (per-instance routing)."""
|
||||
_arm(monkeypatch)
|
||||
monkeypatch.setenv("GATEWAY_RELAY_INSTANCE_ID", "inst-abc")
|
||||
captured: dict = {}
|
||||
monkeypatch.setattr(relay, "_post_provision", _stub_post(captured))
|
||||
|
||||
assert relay.self_provision_relay() is True
|
||||
assert captured["instance_id"] == "inst-abc"
|
||||
|
||||
|
||||
def test_instance_id_absent_forwards_none(monkeypatch):
|
||||
"""No stamp (self-hosted / pre-Phase-6) -> instance_id None; the connector
|
||||
stores null and per-instance routing simply has no binding yet."""
|
||||
_arm(monkeypatch)
|
||||
captured: dict = {}
|
||||
monkeypatch.setattr(relay, "_post_provision", _stub_post(captured))
|
||||
|
||||
assert relay.self_provision_relay() is True
|
||||
assert captured["instance_id"] is None
|
||||
|
||||
|
||||
def test_post_provision_body_includes_instanceId_only_when_set(monkeypatch):
|
||||
"""The real _post_provision adds `instanceId` to the JSON body ONLY when a
|
||||
value is supplied — omitting it lets the connector store null (back-compat),
|
||||
rather than binding an empty string."""
|
||||
import json
|
||||
|
||||
sent: dict = {}
|
||||
|
||||
class _Resp:
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *a):
|
||||
return False
|
||||
|
||||
def read(self):
|
||||
return json.dumps({"secret": "a" * 64, "deliveryKey": "b" * 64, "tenant": "t", "gatewayId": "gw-1"}).encode()
|
||||
|
||||
def _fake_urlopen(req, timeout=None): # noqa: ANN001
|
||||
sent["body"] = json.loads(req.data.decode())
|
||||
return _Resp()
|
||||
|
||||
monkeypatch.setattr("urllib.request.urlopen", _fake_urlopen)
|
||||
|
||||
# With an instance id -> present in the body.
|
||||
relay._post_provision(
|
||||
provision_url="https://c.example/relay/provision",
|
||||
access_token="tok",
|
||||
gateway_id="gw-1",
|
||||
platform="discord",
|
||||
bot_id="app",
|
||||
gateway_endpoint=None,
|
||||
route_keys=[],
|
||||
instance_id="inst-abc",
|
||||
)
|
||||
assert sent["body"]["instanceId"] == "inst-abc"
|
||||
|
||||
# Without one -> the key is absent entirely (not "" ).
|
||||
relay._post_provision(
|
||||
provision_url="https://c.example/relay/provision",
|
||||
access_token="tok",
|
||||
gateway_id="gw-1",
|
||||
platform="discord",
|
||||
bot_id="app",
|
||||
gateway_endpoint=None,
|
||||
route_keys=[],
|
||||
)
|
||||
assert "instanceId" not in sent["body"]
|
||||
|
||||
|
||||
# ─────────────────────────── fail-soft ───────────────────────────
|
||||
|
||||
def test_no_nas_token_is_non_fatal(monkeypatch):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue