mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-21 10:22:18 +00:00
feat(cron,gateway): NAS-JWT fire verifier + /api/cron/fire webhook (Chronos)
Phase 4E (E.1 + E.2). The inbound side of Chronos: NAS POSTs the agent when a one-shot fires; the agent verifies a NAS-minted JWT and runs the job. E.1 — plugins/cron/chronos/verify.py: - verify_nas_fire_token(token, expected_audience, jwks_or_key, issuer): verifies signature against the NAS JWKS (RS/ES family; symmetric rejected), aud == this agent, exp/nbf, iss, and purpose == "cron_fire" (so a general agent JWT can't be replayed against the fire endpoint). Returns claims or None; never raises. Crypto delegated to PyJWT[crypto] (already a declared dep) — no hand-rolled JWT, no new dependency. No key configured → refuse (never unsigned-decode a security boundary). - get_fire_verifier(): pluggable indirection so the DQ-4 escape hatch (direct per-job cron-key) can swap in with no handler change. E.2 — gateway/platforms/api_server.py: - POST /api/cron/fire (registered only when _CRON_AVAILABLE). Authenticated by the NAS-JWT via get_fire_verifier() — NOT API_SERVER_KEY (NAS holds no API key; this is the only inbound that triggers remote job execution, so it gets its own purpose-scoped check). Verifier args come from cron.chronos.* config. 401 on bad/missing/forged token. 400 on missing job_id. On success: 202 + fire_due runs in the background (so a long agent turn never trips NAS's HTTP timeout); the store CAS claim inside fire_due de-dupes a scheduler retry. Tests: - test_chronos_verify (11): REAL RS256 signing — valid→claims, wrong-aud, missing/wrong purpose, expired, wrong-iss, tampered-signature (attacker key), no-key-refuse, empty-token, JWKS-URL key resolution, get_fire_verifier. - test_cron_fire_webhook (5): valid→202+fire, invalid→401+no-fire, missing token→401, missing job_id→400, and fire path does NOT require API_SERVER_KEY. api_server regression suites (214) green. E.3 (NAS endpoints) is a separate cross-repo PR; the wire contract lands next (docs/chronos-managed-cron-contract.md).
This commit is contained in:
parent
4c8bbe6416
commit
3fc7b624d8
4 changed files with 500 additions and 0 deletions
103
plugins/cron/chronos/verify.py
Normal file
103
plugins/cron/chronos/verify.py
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
"""Inbound cron-fire token verification for Chronos (Phase 4E.1).
|
||||
|
||||
When NAS relays an external scheduler fire to the agent, it POSTs
|
||||
``/api/cron/fire`` with a short-lived NAS-minted JWT. This module verifies that
|
||||
JWT before any job runs — the security boundary for remotely-triggered job
|
||||
execution.
|
||||
|
||||
We verify a NAS-minted JWT (the trust path the agent already has) rather than
|
||||
let an external scheduler call the agent directly: the scheduler signs with
|
||||
NAS's keys, which the agent doesn't (and shouldn't) hold. See the plan's DQ-4.
|
||||
|
||||
The verifier is pluggable (``get_fire_verifier``) so the escape-hatch mode
|
||||
(direct per-job cron-key) can swap in later with no handler change.
|
||||
|
||||
Crypto is delegated to PyJWT (already a declared dependency) — we do NOT
|
||||
hand-roll JWT verification.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any, Callable, Dict, Optional
|
||||
|
||||
logger = logging.getLogger("cron.chronos.verify")
|
||||
|
||||
# The purpose claim that scopes a token to the fire endpoint. A general agent
|
||||
# JWT (without this claim) must NOT be replayable against /api/cron/fire.
|
||||
_FIRE_PURPOSE = "cron_fire"
|
||||
|
||||
|
||||
def verify_nas_fire_token(
|
||||
*,
|
||||
token: str,
|
||||
expected_audience: str,
|
||||
jwks_or_key: Optional[str] = None,
|
||||
issuer: Optional[str] = None,
|
||||
leeway_seconds: int = 30,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Verify a NAS-minted cron-fire JWT. Return decoded claims, or None.
|
||||
|
||||
Checks (all must pass):
|
||||
- signature against the NAS JWKS (``jwks_or_key`` is a JWKS URL) — RS256
|
||||
family; symmetric secrets are rejected (NAS signs asymmetrically).
|
||||
- ``aud`` == ``expected_audience`` (this agent: ``agent:{instance_id}``).
|
||||
- ``exp`` / ``nbf`` within ``leeway_seconds``.
|
||||
- ``iss`` == ``issuer`` when an issuer is configured.
|
||||
- ``purpose`` == ``"cron_fire"`` — so a general agent JWT can't be
|
||||
replayed against the fire endpoint.
|
||||
|
||||
Returns None (never raises) on any failure, so the handler can answer 401
|
||||
without leaking which check failed.
|
||||
"""
|
||||
if not token or not expected_audience:
|
||||
return None
|
||||
if not jwks_or_key:
|
||||
# No verification key configured → cannot verify → refuse. We never
|
||||
# fall back to unsigned decode for a security boundary.
|
||||
logger.warning("cron fire: no JWKS/key configured; refusing token")
|
||||
return None
|
||||
|
||||
try:
|
||||
import jwt
|
||||
from jwt import PyJWKClient
|
||||
|
||||
# Resolve the signing key from the JWKS endpoint by the token's kid.
|
||||
signing_key = None
|
||||
if jwks_or_key.startswith("http://") or jwks_or_key.startswith("https://"):
|
||||
jwk_client = PyJWKClient(jwks_or_key)
|
||||
signing_key = jwk_client.get_signing_key_from_jwt(token).key
|
||||
else:
|
||||
# A PEM public key passed inline (test / pinned-key deployments).
|
||||
signing_key = jwks_or_key
|
||||
|
||||
options = {"require": ["exp", "aud"]}
|
||||
decode_kwargs: Dict[str, Any] = dict(
|
||||
algorithms=["RS256", "RS384", "RS512", "ES256", "ES384"],
|
||||
audience=expected_audience,
|
||||
leeway=leeway_seconds,
|
||||
options=options,
|
||||
)
|
||||
if issuer:
|
||||
decode_kwargs["issuer"] = issuer
|
||||
|
||||
claims = jwt.decode(token, signing_key, **decode_kwargs)
|
||||
except Exception as e:
|
||||
logger.warning("cron fire: token verification failed: %s", e)
|
||||
return None
|
||||
|
||||
if claims.get("purpose") != _FIRE_PURPOSE:
|
||||
logger.warning("cron fire: token missing/!=%s purpose claim", _FIRE_PURPOSE)
|
||||
return None
|
||||
|
||||
return claims
|
||||
|
||||
|
||||
def get_fire_verifier() -> Callable[..., Optional[Dict[str, Any]]]:
|
||||
"""Return the active inbound-fire verifier.
|
||||
|
||||
Default = the NAS-JWT verifier. The DQ-4 escape hatch (direct per-job
|
||||
cron-key) would return a cron-key verifier here instead, selected by config
|
||||
— so the webhook handler never changes when the auth mode is swapped.
|
||||
"""
|
||||
return verify_nas_fire_token
|
||||
Loading…
Add table
Add a link
Reference in a new issue