diff --git a/tests/run_agent/test_nonretryable_error_html_summary.py b/tests/run_agent/test_nonretryable_error_html_summary.py
new file mode 100644
index 00000000000..db765b124f3
--- /dev/null
+++ b/tests/run_agent/test_nonretryable_error_html_summary.py
@@ -0,0 +1,130 @@
+"""Regression: non-retryable API failures must not leak raw HTML pages.
+
+A scheduled cron job fell back to the Codex (``chatgpt.com``) provider, which
+returned a Cloudflare *challenge* page (HTTP 403) instead of a normal API
+response. The conversation loop classified this as a non-retryable client
+error and returned the failure dict — but the ``error`` field carried
+``str(api_error)``, i.e. the entire ~60 KB Cloudflare HTML page. The cron
+scheduler then delivered that verbatim to Discord, where it was split into
+~31 messages (the reporter's "31 part discord message which is cloudflares
+challenge page").
+
+The sibling "max retries exhausted" path already summarized the error via
+``_summarize_api_error`` (which collapses HTML pages to a one-liner); the
+non-retryable path did not. These tests lock the contract: whichever
+terminal path is taken, ``result['error']`` is a short, HTML-free summary.
+"""
+
+from unittest.mock import MagicMock, patch
+
+import run_agent
+from run_agent import AIAgent
+
+
+# A representative Cloudflare "managed challenge" body, matching the shape the
+# Codex backend returned in the field report (no
, large inline
+# ``_cf_chl_opt`` script). Padded so length-based assertions are meaningful.
+_CLOUDFLARE_CHALLENGE_HTML = (
+ "\n\n \n"
+ ' \n'
+ " \n
"
+ "
\n \n\n"
+)
+
+
+def _make_403_html_error() -> Exception:
+ """An exception mimicking a Codex 403 whose body is a Cloudflare page."""
+ err = Exception(_CLOUDFLARE_CHALLENGE_HTML)
+ err.status_code = 403
+ return err
+
+
+def _make_agent() -> AIAgent:
+ # Drive the standard chat-completions path with a concrete model so the
+ # turn actually reaches ``client.chat.completions.create`` — that is where
+ # the mocked 403 is raised. The non-retryable abort being exercised lives
+ # in the shared conversation loop and is provider-agnostic; a Cloudflare
+ # "managed challenge" 403 can surface on any provider sitting behind
+ # Cloudflare (it was first reported on the Codex backend). Pinning
+ # ``api_mode`` + ``model`` here avoids the earlier abort the previous
+ # revision hit: an empty model on the Codex Responses path raised a
+ # validation ``ValueError`` *before* any API call, so the test passed
+ # without ever touching the 403 summarization path.
+ with (
+ patch("run_agent.get_tool_definitions", return_value=[]),
+ patch("run_agent.check_toolset_requirements", return_value={}),
+ patch("run_agent.OpenAI"),
+ ):
+ a = AIAgent(
+ api_key="test-key-1234567890",
+ base_url="https://api.openai.com/v1",
+ provider="openai",
+ api_mode="chat_completions",
+ model="gpt-5.5",
+ quiet_mode=True,
+ skip_context_files=True,
+ skip_memory=True,
+ )
+ a.client = MagicMock()
+ a._cached_system_prompt = "You are helpful."
+ a._use_prompt_caching = False
+ a.tool_delay = 0
+ a.compression_enabled = False
+ a.save_trajectories = False
+ return a
+
+
+def test_summarize_collapses_cloudflare_challenge_page():
+ """``_summarize_api_error`` must never echo the raw HTML body."""
+ summary = AIAgent._summarize_api_error(_make_403_html_error())
+
+ assert "