diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 19bde946e..53c256858 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -200,6 +200,45 @@ _CODEX_AUX_MODEL = "gpt-5.2-codex" _CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex" +def _codex_cloudflare_headers(access_token: str) -> Dict[str, str]: + """Headers required to avoid Cloudflare 403s on chatgpt.com/backend-api/codex. + + The Cloudflare layer in front of the Codex endpoint whitelists a small set of + first-party originators (``codex_cli_rs``, ``codex_vscode``, ``codex_sdk_ts``, + anything starting with ``Codex``). Requests from non-residential IPs (VPS, + server-hosted agents) that don't advertise an allowed originator are served + a 403 with ``cf-mitigated: challenge`` regardless of auth correctness. + + We pin ``originator: codex_cli_rs`` to match the upstream codex-rs CLI, set + ``User-Agent`` to a codex_cli_rs-shaped string (beats SDK fingerprinting), + and extract ``ChatGPT-Account-ID`` (canonical casing, from codex-rs + ``auth.rs``) out of the OAuth JWT's ``chatgpt_account_id`` claim. + + Malformed tokens are tolerated — we drop the account-ID header rather than + raise, so a bad token still surfaces as an auth error (401) instead of a + crash at client construction. + """ + headers = { + "User-Agent": "codex_cli_rs/0.0.0 (Hermes Agent)", + "originator": "codex_cli_rs", + } + if not isinstance(access_token, str) or not access_token.strip(): + return headers + try: + import base64 + parts = access_token.split(".") + if len(parts) < 2: + return headers + payload_b64 = parts[1] + "=" * (-len(parts[1]) % 4) + claims = json.loads(base64.urlsafe_b64decode(payload_b64)) + acct_id = claims.get("https://api.openai.com/auth", {}).get("chatgpt_account_id") + if isinstance(acct_id, str) and acct_id: + headers["ChatGPT-Account-ID"] = acct_id + except Exception: + pass + return headers + + def _to_openai_base_url(base_url: str) -> str: """Normalize an Anthropic-style base URL to OpenAI-compatible format. @@ -1052,7 +1091,11 @@ def _try_codex() -> Tuple[Optional[Any], Optional[str]]: return None, None base_url = _CODEX_AUX_BASE_URL logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL) - real_client = OpenAI(api_key=codex_token, base_url=base_url) + real_client = OpenAI( + api_key=codex_token, + base_url=base_url, + default_headers=_codex_cloudflare_headers(codex_token), + ) return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL @@ -1512,7 +1555,11 @@ def resolve_provider_client( "but no Codex OAuth token found (run: hermes model)") return None, None final_model = _normalize_resolved_model(model or _CODEX_AUX_MODEL, provider) - raw_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL) + raw_client = OpenAI( + api_key=codex_token, + base_url=_CODEX_AUX_BASE_URL, + default_headers=_codex_cloudflare_headers(codex_token), + ) return (raw_client, final_model) # Standard path: wrap in CodexAuxiliaryClient adapter client, default = _try_codex() diff --git a/run_agent.py b/run_agent.py index a46c11138..9b92e62f7 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1078,22 +1078,8 @@ class AIAgent: elif "portal.qwen.ai" in effective_base.lower(): client_kwargs["default_headers"] = _qwen_portal_headers() elif "chatgpt.com" in effective_base.lower(): - # Match official Codex CLI headers to avoid Cloudflare challenges. - # The ChatGPT-Account-Id header is critical — without it, - # server-hosted agents get 403 Cloudflare JS challenges. - _codex_headers = { - "User-Agent": "hermes-agent/1.0", - "originator": "hermes-agent", - } - try: - import base64 as _b64 - _jwt_payload = json.loads(_b64.b64decode(api_key.split(".")[1] + "==")) - _acct_id = _jwt_payload.get("https://api.openai.com/auth", {}).get("chatgpt_account_id") - if _acct_id: - _codex_headers["ChatGPT-Account-Id"] = _acct_id - except Exception: - pass - client_kwargs["default_headers"] = _codex_headers + from agent.auxiliary_client import _codex_cloudflare_headers + client_kwargs["default_headers"] = _codex_cloudflare_headers(api_key) else: # No explicit creds — use the centralized provider router from agent.auxiliary_client import resolve_provider_client @@ -5330,20 +5316,10 @@ class AIAgent: elif "portal.qwen.ai" in normalized: self._client_kwargs["default_headers"] = _qwen_portal_headers() elif "chatgpt.com" in normalized: - _codex_headers = { - "User-Agent": "hermes-agent/1.0", - "originator": "hermes-agent", - } - try: - import base64 as _b64 - _ak = self._client_kwargs.get("api_key", "") - _jwt_payload = json.loads(_b64.b64decode(_ak.split(".")[1] + "==")) - _acct_id = _jwt_payload.get("https://api.openai.com/auth", {}).get("chatgpt_account_id") - if _acct_id: - _codex_headers["ChatGPT-Account-Id"] = _acct_id - except Exception: - pass - self._client_kwargs["default_headers"] = _codex_headers + from agent.auxiliary_client import _codex_cloudflare_headers + self._client_kwargs["default_headers"] = _codex_cloudflare_headers( + self._client_kwargs.get("api_key", "") + ) else: self._client_kwargs.pop("default_headers", None) diff --git a/tests/agent/test_codex_cloudflare_headers.py b/tests/agent/test_codex_cloudflare_headers.py new file mode 100644 index 000000000..6a343c8f8 --- /dev/null +++ b/tests/agent/test_codex_cloudflare_headers.py @@ -0,0 +1,253 @@ +"""Regression guard: Codex Cloudflare 403 mitigation headers. + +The ``chatgpt.com/backend-api/codex`` endpoint sits behind a Cloudflare layer +that whitelists a small set of first-party originators (``codex_cli_rs``, +``codex_vscode``, ``codex_sdk_ts``, ``Codex*``). Requests from non-residential +IPs (VPS, always-on servers, some corporate egress) that don't advertise an +allowed originator are served 403 with ``cf-mitigated: challenge`` regardless +of auth correctness. + +``_codex_cloudflare_headers`` in ``agent.auxiliary_client`` centralizes the +header set so the primary chat client (``run_agent.AIAgent.__init__`` + +``_apply_client_headers_for_base_url``) and the auxiliary client paths +(``_try_codex`` and the ``raw_codex`` branch of ``resolve_provider_client``) +all emit the same headers. + +These tests pin: +- the originator value (must be ``codex_cli_rs`` — the whitelisted one) +- the User-Agent shape (codex_cli_rs-prefixed) +- ``ChatGPT-Account-ID`` extraction from the OAuth JWT (canonical casing, + from codex-rs ``auth.rs``) +- graceful handling of malformed tokens (drop the account-ID header, don't + raise) +- primary-client wiring at both entry points in ``run_agent.py`` +- aux-client wiring at both entry points in ``agent/auxiliary_client.py`` +""" +from __future__ import annotations + +import base64 +import json +from unittest.mock import MagicMock, patch + +import pytest + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +def _make_codex_jwt(account_id: str = "acct-test-123") -> str: + """Build a syntactically valid Codex-style JWT with the account_id claim.""" + def b64url(data: bytes) -> str: + return base64.urlsafe_b64encode(data).rstrip(b"=").decode() + header = b64url(b'{"alg":"RS256","typ":"JWT"}') + claims = { + "sub": "user-xyz", + "exp": 9999999999, + "https://api.openai.com/auth": { + "chatgpt_account_id": account_id, + "chatgpt_plan_type": "plus", + }, + } + payload = b64url(json.dumps(claims).encode()) + sig = b64url(b"fake-sig") + return f"{header}.{payload}.{sig}" + + +# --------------------------------------------------------------------------- +# _codex_cloudflare_headers — the shared helper +# --------------------------------------------------------------------------- + +class TestCodexCloudflareHeaders: + def test_originator_is_codex_cli_rs(self): + """Cloudflare whitelists codex_cli_rs — any other value is 403'd.""" + from agent.auxiliary_client import _codex_cloudflare_headers + headers = _codex_cloudflare_headers(_make_codex_jwt()) + assert headers["originator"] == "codex_cli_rs" + + def test_user_agent_advertises_codex_cli_rs(self): + from agent.auxiliary_client import _codex_cloudflare_headers + headers = _codex_cloudflare_headers(_make_codex_jwt()) + assert headers["User-Agent"].startswith("codex_cli_rs/") + + def test_account_id_extracted_from_jwt(self): + from agent.auxiliary_client import _codex_cloudflare_headers + headers = _codex_cloudflare_headers(_make_codex_jwt("acct-abc-999")) + # Canonical casing — matches codex-rs auth.rs + assert headers["ChatGPT-Account-ID"] == "acct-abc-999" + + def test_canonical_header_casing(self): + """Upstream codex-rs uses PascalCase with trailing -ID. Match exactly.""" + from agent.auxiliary_client import _codex_cloudflare_headers + headers = _codex_cloudflare_headers(_make_codex_jwt()) + assert "ChatGPT-Account-ID" in headers + # The lowercase/titlecase variants MUST NOT be used — pin to be explicit + assert "chatgpt-account-id" not in headers + assert "ChatGPT-Account-Id" not in headers + + def test_malformed_token_drops_account_id_without_raising(self): + from agent.auxiliary_client import _codex_cloudflare_headers + for bad in ["not-a-jwt", "", "only.one", " ", "...."]: + headers = _codex_cloudflare_headers(bad) + # Still returns base headers — never raises + assert headers["originator"] == "codex_cli_rs" + assert "ChatGPT-Account-ID" not in headers + + def test_non_string_token_handled(self): + from agent.auxiliary_client import _codex_cloudflare_headers + headers = _codex_cloudflare_headers(None) # type: ignore[arg-type] + assert headers["originator"] == "codex_cli_rs" + assert "ChatGPT-Account-ID" not in headers + + def test_jwt_without_chatgpt_account_id_claim(self): + """A valid JWT that lacks the account_id claim should still return headers.""" + from agent.auxiliary_client import _codex_cloudflare_headers + import base64 as _b64, json as _json + + def b64url(data: bytes) -> str: + return _b64.urlsafe_b64encode(data).rstrip(b"=").decode() + payload = b64url(_json.dumps({"sub": "user-xyz", "exp": 9999999999}).encode()) + token = f"{b64url(b'{}')}.{payload}.{b64url(b'sig')}" + headers = _codex_cloudflare_headers(token) + assert headers["originator"] == "codex_cli_rs" + assert "ChatGPT-Account-ID" not in headers + + +# --------------------------------------------------------------------------- +# Primary chat client wiring (run_agent.AIAgent) +# --------------------------------------------------------------------------- + +class TestPrimaryClientWiring: + def test_init_wires_codex_headers_for_chatgpt_base_url(self): + from run_agent import AIAgent + token = _make_codex_jwt("acct-primary-init") + with patch("run_agent.OpenAI") as mock_openai: + mock_openai.return_value = MagicMock() + AIAgent( + api_key=token, + base_url="https://chatgpt.com/backend-api/codex", + provider="openai-codex", + model="gpt-5.4", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + headers = mock_openai.call_args.kwargs.get("default_headers") or {} + assert headers.get("originator") == "codex_cli_rs" + assert headers.get("ChatGPT-Account-ID") == "acct-primary-init" + assert headers.get("User-Agent", "").startswith("codex_cli_rs/") + + def test_apply_client_headers_on_base_url_change(self): + """Credential-rotation / base-url change path must also emit codex headers.""" + from run_agent import AIAgent + token = _make_codex_jwt("acct-rotation") + with patch("run_agent.OpenAI") as mock_openai: + mock_openai.return_value = MagicMock() + agent = AIAgent( + api_key="placeholder-openrouter-key", + base_url="https://openrouter.ai/api/v1", + provider="openrouter", + model="anthropic/claude-sonnet-4.6", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + # Simulate rotation into a Codex credential + agent._client_kwargs["api_key"] = token + agent._apply_client_headers_for_base_url( + "https://chatgpt.com/backend-api/codex" + ) + headers = agent._client_kwargs.get("default_headers") or {} + assert headers.get("originator") == "codex_cli_rs" + assert headers.get("ChatGPT-Account-ID") == "acct-rotation" + assert headers.get("User-Agent", "").startswith("codex_cli_rs/") + + def test_apply_client_headers_clears_codex_headers_off_chatgpt(self): + """Switching AWAY from chatgpt.com must drop the codex headers.""" + from run_agent import AIAgent + token = _make_codex_jwt() + with patch("run_agent.OpenAI") as mock_openai: + mock_openai.return_value = MagicMock() + agent = AIAgent( + api_key=token, + base_url="https://chatgpt.com/backend-api/codex", + provider="openai-codex", + model="gpt-5.4", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + # Sanity: headers are set initially + assert "originator" in (agent._client_kwargs.get("default_headers") or {}) + agent._apply_client_headers_for_base_url( + "https://api.anthropic.com" + ) + # default_headers should be popped for anthropic base + assert "default_headers" not in agent._client_kwargs + + def test_openrouter_base_url_does_not_get_codex_headers(self): + from run_agent import AIAgent + with patch("run_agent.OpenAI") as mock_openai: + mock_openai.return_value = MagicMock() + AIAgent( + api_key="sk-or-test", + base_url="https://openrouter.ai/api/v1", + provider="openrouter", + model="anthropic/claude-sonnet-4.6", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + headers = mock_openai.call_args.kwargs.get("default_headers") or {} + assert headers.get("originator") != "codex_cli_rs" + + +# --------------------------------------------------------------------------- +# Auxiliary client wiring (agent.auxiliary_client) +# --------------------------------------------------------------------------- + +class TestAuxiliaryClientWiring: + def test_try_codex_passes_codex_headers(self, monkeypatch): + """_try_codex builds the OpenAI client used for compression / vision / + title generation when routed through Codex. Must emit codex headers.""" + from agent import auxiliary_client + token = _make_codex_jwt("acct-aux-try-codex") + + # Force _select_pool_entry to return "no pool" so we fall through to + # _read_codex_access_token. + monkeypatch.setattr( + auxiliary_client, "_select_pool_entry", + lambda provider: (False, None), + ) + monkeypatch.setattr( + auxiliary_client, "_read_codex_access_token", + lambda: token, + ) + with patch("agent.auxiliary_client.OpenAI") as mock_openai: + mock_openai.return_value = MagicMock() + client, model = auxiliary_client._try_codex() + assert client is not None + headers = mock_openai.call_args.kwargs.get("default_headers") or {} + assert headers.get("originator") == "codex_cli_rs" + assert headers.get("ChatGPT-Account-ID") == "acct-aux-try-codex" + assert headers.get("User-Agent", "").startswith("codex_cli_rs/") + + def test_resolve_provider_client_raw_codex_passes_codex_headers(self, monkeypatch): + """The ``raw_codex=True`` branch (used by the main agent loop for direct + responses.stream() access) must also emit codex headers.""" + from agent import auxiliary_client + token = _make_codex_jwt("acct-aux-raw-codex") + monkeypatch.setattr( + auxiliary_client, "_read_codex_access_token", + lambda: token, + ) + with patch("agent.auxiliary_client.OpenAI") as mock_openai: + mock_openai.return_value = MagicMock() + client, model = auxiliary_client.resolve_provider_client( + "openai-codex", raw_codex=True, + ) + assert client is not None + headers = mock_openai.call_args.kwargs.get("default_headers") or {} + assert headers.get("originator") == "codex_cli_rs" + assert headers.get("ChatGPT-Account-ID") == "acct-aux-raw-codex" + assert headers.get("User-Agent", "").startswith("codex_cli_rs/")