mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
The cherry-picked salvage (admin28980's commit) added codex headers only on the
primary chat client path, with two inaccuracies:
- originator was 'hermes-agent' — Cloudflare whitelists codex_cli_rs,
codex_vscode, codex_sdk_ts, and Codex* prefixes. 'hermes-agent' isn't on
the list, so the header had no mitigating effect on the 403 (the
account-id header alone may have been carrying the fix).
- account-id header was 'ChatGPT-Account-Id' — upstream codex-rs auth.rs
uses canonical 'ChatGPT-Account-ID' (PascalCase, trailing -ID).
Also, the auxiliary client (_try_codex + resolve_provider_client raw_codex
branch) constructs OpenAI clients against the same chatgpt.com endpoint with
no default headers at all — so compression, title generation, vision, session
search, and web_extract all still 403 from VPS IPs.
Consolidate the header set into _codex_cloudflare_headers() in
agent/auxiliary_client.py (natural home next to _read_codex_access_token and
the existing JWT decode logic) and call it from all four insertion points:
- run_agent.py: AIAgent.__init__ (initial construction)
- run_agent.py: _apply_client_headers_for_base_url (credential rotation)
- agent/auxiliary_client.py: _try_codex (aux client)
- agent/auxiliary_client.py: resolve_provider_client raw_codex branch
Net: -36/+55 lines, -25 lines of duplicated inline JWT decode replaced by a
single helper. User-Agent switched to 'codex_cli_rs/0.0.0 (Hermes Agent)' to
match the codex-rs shape while keeping product attribution.
Tests in tests/agent/test_codex_cloudflare_headers.py cover:
- originator value, User-Agent shape, canonical header casing
- account-ID extraction from a real JWT fixture
- graceful handling of malformed / non-string / claim-missing tokens
- wiring at all four insertion points (primary init, rotation, both aux paths)
- non-chatgpt base URLs (openrouter) do NOT get codex headers
- switching away from chatgpt.com drops the headers
253 lines
12 KiB
Python
253 lines
12 KiB
Python
"""Regression guard: Codex Cloudflare 403 mitigation headers.
|
|
|
|
The ``chatgpt.com/backend-api/codex`` endpoint sits behind a Cloudflare layer
|
|
that whitelists a small set of first-party originators (``codex_cli_rs``,
|
|
``codex_vscode``, ``codex_sdk_ts``, ``Codex*``). Requests from non-residential
|
|
IPs (VPS, always-on servers, some corporate egress) that don't advertise an
|
|
allowed originator are served 403 with ``cf-mitigated: challenge`` regardless
|
|
of auth correctness.
|
|
|
|
``_codex_cloudflare_headers`` in ``agent.auxiliary_client`` centralizes the
|
|
header set so the primary chat client (``run_agent.AIAgent.__init__`` +
|
|
``_apply_client_headers_for_base_url``) and the auxiliary client paths
|
|
(``_try_codex`` and the ``raw_codex`` branch of ``resolve_provider_client``)
|
|
all emit the same headers.
|
|
|
|
These tests pin:
|
|
- the originator value (must be ``codex_cli_rs`` — the whitelisted one)
|
|
- the User-Agent shape (codex_cli_rs-prefixed)
|
|
- ``ChatGPT-Account-ID`` extraction from the OAuth JWT (canonical casing,
|
|
from codex-rs ``auth.rs``)
|
|
- graceful handling of malformed tokens (drop the account-ID header, don't
|
|
raise)
|
|
- primary-client wiring at both entry points in ``run_agent.py``
|
|
- aux-client wiring at both entry points in ``agent/auxiliary_client.py``
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import base64
|
|
import json
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fixtures
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _make_codex_jwt(account_id: str = "acct-test-123") -> str:
|
|
"""Build a syntactically valid Codex-style JWT with the account_id claim."""
|
|
def b64url(data: bytes) -> str:
|
|
return base64.urlsafe_b64encode(data).rstrip(b"=").decode()
|
|
header = b64url(b'{"alg":"RS256","typ":"JWT"}')
|
|
claims = {
|
|
"sub": "user-xyz",
|
|
"exp": 9999999999,
|
|
"https://api.openai.com/auth": {
|
|
"chatgpt_account_id": account_id,
|
|
"chatgpt_plan_type": "plus",
|
|
},
|
|
}
|
|
payload = b64url(json.dumps(claims).encode())
|
|
sig = b64url(b"fake-sig")
|
|
return f"{header}.{payload}.{sig}"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _codex_cloudflare_headers — the shared helper
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestCodexCloudflareHeaders:
|
|
def test_originator_is_codex_cli_rs(self):
|
|
"""Cloudflare whitelists codex_cli_rs — any other value is 403'd."""
|
|
from agent.auxiliary_client import _codex_cloudflare_headers
|
|
headers = _codex_cloudflare_headers(_make_codex_jwt())
|
|
assert headers["originator"] == "codex_cli_rs"
|
|
|
|
def test_user_agent_advertises_codex_cli_rs(self):
|
|
from agent.auxiliary_client import _codex_cloudflare_headers
|
|
headers = _codex_cloudflare_headers(_make_codex_jwt())
|
|
assert headers["User-Agent"].startswith("codex_cli_rs/")
|
|
|
|
def test_account_id_extracted_from_jwt(self):
|
|
from agent.auxiliary_client import _codex_cloudflare_headers
|
|
headers = _codex_cloudflare_headers(_make_codex_jwt("acct-abc-999"))
|
|
# Canonical casing — matches codex-rs auth.rs
|
|
assert headers["ChatGPT-Account-ID"] == "acct-abc-999"
|
|
|
|
def test_canonical_header_casing(self):
|
|
"""Upstream codex-rs uses PascalCase with trailing -ID. Match exactly."""
|
|
from agent.auxiliary_client import _codex_cloudflare_headers
|
|
headers = _codex_cloudflare_headers(_make_codex_jwt())
|
|
assert "ChatGPT-Account-ID" in headers
|
|
# The lowercase/titlecase variants MUST NOT be used — pin to be explicit
|
|
assert "chatgpt-account-id" not in headers
|
|
assert "ChatGPT-Account-Id" not in headers
|
|
|
|
def test_malformed_token_drops_account_id_without_raising(self):
|
|
from agent.auxiliary_client import _codex_cloudflare_headers
|
|
for bad in ["not-a-jwt", "", "only.one", " ", "...."]:
|
|
headers = _codex_cloudflare_headers(bad)
|
|
# Still returns base headers — never raises
|
|
assert headers["originator"] == "codex_cli_rs"
|
|
assert "ChatGPT-Account-ID" not in headers
|
|
|
|
def test_non_string_token_handled(self):
|
|
from agent.auxiliary_client import _codex_cloudflare_headers
|
|
headers = _codex_cloudflare_headers(None) # type: ignore[arg-type]
|
|
assert headers["originator"] == "codex_cli_rs"
|
|
assert "ChatGPT-Account-ID" not in headers
|
|
|
|
def test_jwt_without_chatgpt_account_id_claim(self):
|
|
"""A valid JWT that lacks the account_id claim should still return headers."""
|
|
from agent.auxiliary_client import _codex_cloudflare_headers
|
|
import base64 as _b64, json as _json
|
|
|
|
def b64url(data: bytes) -> str:
|
|
return _b64.urlsafe_b64encode(data).rstrip(b"=").decode()
|
|
payload = b64url(_json.dumps({"sub": "user-xyz", "exp": 9999999999}).encode())
|
|
token = f"{b64url(b'{}')}.{payload}.{b64url(b'sig')}"
|
|
headers = _codex_cloudflare_headers(token)
|
|
assert headers["originator"] == "codex_cli_rs"
|
|
assert "ChatGPT-Account-ID" not in headers
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Primary chat client wiring (run_agent.AIAgent)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestPrimaryClientWiring:
|
|
def test_init_wires_codex_headers_for_chatgpt_base_url(self):
|
|
from run_agent import AIAgent
|
|
token = _make_codex_jwt("acct-primary-init")
|
|
with patch("run_agent.OpenAI") as mock_openai:
|
|
mock_openai.return_value = MagicMock()
|
|
AIAgent(
|
|
api_key=token,
|
|
base_url="https://chatgpt.com/backend-api/codex",
|
|
provider="openai-codex",
|
|
model="gpt-5.4",
|
|
quiet_mode=True,
|
|
skip_context_files=True,
|
|
skip_memory=True,
|
|
)
|
|
headers = mock_openai.call_args.kwargs.get("default_headers") or {}
|
|
assert headers.get("originator") == "codex_cli_rs"
|
|
assert headers.get("ChatGPT-Account-ID") == "acct-primary-init"
|
|
assert headers.get("User-Agent", "").startswith("codex_cli_rs/")
|
|
|
|
def test_apply_client_headers_on_base_url_change(self):
|
|
"""Credential-rotation / base-url change path must also emit codex headers."""
|
|
from run_agent import AIAgent
|
|
token = _make_codex_jwt("acct-rotation")
|
|
with patch("run_agent.OpenAI") as mock_openai:
|
|
mock_openai.return_value = MagicMock()
|
|
agent = AIAgent(
|
|
api_key="placeholder-openrouter-key",
|
|
base_url="https://openrouter.ai/api/v1",
|
|
provider="openrouter",
|
|
model="anthropic/claude-sonnet-4.6",
|
|
quiet_mode=True,
|
|
skip_context_files=True,
|
|
skip_memory=True,
|
|
)
|
|
# Simulate rotation into a Codex credential
|
|
agent._client_kwargs["api_key"] = token
|
|
agent._apply_client_headers_for_base_url(
|
|
"https://chatgpt.com/backend-api/codex"
|
|
)
|
|
headers = agent._client_kwargs.get("default_headers") or {}
|
|
assert headers.get("originator") == "codex_cli_rs"
|
|
assert headers.get("ChatGPT-Account-ID") == "acct-rotation"
|
|
assert headers.get("User-Agent", "").startswith("codex_cli_rs/")
|
|
|
|
def test_apply_client_headers_clears_codex_headers_off_chatgpt(self):
|
|
"""Switching AWAY from chatgpt.com must drop the codex headers."""
|
|
from run_agent import AIAgent
|
|
token = _make_codex_jwt()
|
|
with patch("run_agent.OpenAI") as mock_openai:
|
|
mock_openai.return_value = MagicMock()
|
|
agent = AIAgent(
|
|
api_key=token,
|
|
base_url="https://chatgpt.com/backend-api/codex",
|
|
provider="openai-codex",
|
|
model="gpt-5.4",
|
|
quiet_mode=True,
|
|
skip_context_files=True,
|
|
skip_memory=True,
|
|
)
|
|
# Sanity: headers are set initially
|
|
assert "originator" in (agent._client_kwargs.get("default_headers") or {})
|
|
agent._apply_client_headers_for_base_url(
|
|
"https://api.anthropic.com"
|
|
)
|
|
# default_headers should be popped for anthropic base
|
|
assert "default_headers" not in agent._client_kwargs
|
|
|
|
def test_openrouter_base_url_does_not_get_codex_headers(self):
|
|
from run_agent import AIAgent
|
|
with patch("run_agent.OpenAI") as mock_openai:
|
|
mock_openai.return_value = MagicMock()
|
|
AIAgent(
|
|
api_key="sk-or-test",
|
|
base_url="https://openrouter.ai/api/v1",
|
|
provider="openrouter",
|
|
model="anthropic/claude-sonnet-4.6",
|
|
quiet_mode=True,
|
|
skip_context_files=True,
|
|
skip_memory=True,
|
|
)
|
|
headers = mock_openai.call_args.kwargs.get("default_headers") or {}
|
|
assert headers.get("originator") != "codex_cli_rs"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Auxiliary client wiring (agent.auxiliary_client)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestAuxiliaryClientWiring:
|
|
def test_try_codex_passes_codex_headers(self, monkeypatch):
|
|
"""_try_codex builds the OpenAI client used for compression / vision /
|
|
title generation when routed through Codex. Must emit codex headers."""
|
|
from agent import auxiliary_client
|
|
token = _make_codex_jwt("acct-aux-try-codex")
|
|
|
|
# Force _select_pool_entry to return "no pool" so we fall through to
|
|
# _read_codex_access_token.
|
|
monkeypatch.setattr(
|
|
auxiliary_client, "_select_pool_entry",
|
|
lambda provider: (False, None),
|
|
)
|
|
monkeypatch.setattr(
|
|
auxiliary_client, "_read_codex_access_token",
|
|
lambda: token,
|
|
)
|
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
mock_openai.return_value = MagicMock()
|
|
client, model = auxiliary_client._try_codex()
|
|
assert client is not None
|
|
headers = mock_openai.call_args.kwargs.get("default_headers") or {}
|
|
assert headers.get("originator") == "codex_cli_rs"
|
|
assert headers.get("ChatGPT-Account-ID") == "acct-aux-try-codex"
|
|
assert headers.get("User-Agent", "").startswith("codex_cli_rs/")
|
|
|
|
def test_resolve_provider_client_raw_codex_passes_codex_headers(self, monkeypatch):
|
|
"""The ``raw_codex=True`` branch (used by the main agent loop for direct
|
|
responses.stream() access) must also emit codex headers."""
|
|
from agent import auxiliary_client
|
|
token = _make_codex_jwt("acct-aux-raw-codex")
|
|
monkeypatch.setattr(
|
|
auxiliary_client, "_read_codex_access_token",
|
|
lambda: token,
|
|
)
|
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
mock_openai.return_value = MagicMock()
|
|
client, model = auxiliary_client.resolve_provider_client(
|
|
"openai-codex", raw_codex=True,
|
|
)
|
|
assert client is not None
|
|
headers = mock_openai.call_args.kwargs.get("default_headers") or {}
|
|
assert headers.get("originator") == "codex_cli_rs"
|
|
assert headers.get("ChatGPT-Account-ID") == "acct-aux-raw-codex"
|
|
assert headers.get("User-Agent", "").startswith("codex_cli_rs/")
|