diff --git a/agent/copilot_acp_client.py b/agent/copilot_acp_client.py
index 3643837bf5b..28a78bab13d 100644
--- a/agent/copilot_acp_client.py
+++ b/agent/copilot_acp_client.py
@@ -30,6 +30,14 @@ _DEFAULT_TIMEOUT_SECONDS = 900.0
 _TOOL_CALL_BLOCK_RE = re.compile(r"<tool_call>\s*(\{.*?\})\s*</tool_call>", re.DOTALL)
 _TOOL_CALL_JSON_RE = re.compile(r"\{\s*\"id\"\s*:\s*\"[^\"]+\"\s*,\s*\"type\"\s*:\s*\"function\"\s*,\s*\"function\"\s*:\s*\{.*?\}\s*\}", re.DOTALL)
 
+# Patterns that indicate the gh-copilot CLI extension has been deprecated.
+_DEPRECATION_PATTERNS = (
+    "has been deprecated",
+    "no commands will be executed",
+    "deprecation",
+    "copilot-cli",
+)
+
 
 def _resolve_command() -> str:
     return (
@@ -506,6 +514,18 @@ class CopilotACPClient:
 
             stderr_text = "\n".join(stderr_tail).strip()
             if proc.poll() is not None and stderr_text:
+                stderr_lower = stderr_text.lower()
+                if any(pat in stderr_lower for pat in _DEPRECATION_PATTERNS):
+                    raise RuntimeError(
+                        "The gh-copilot CLI extension has been deprecated by GitHub and "
+                        "can no longer be used for ACP mode.\n\n"
+                        "Alternatives:\n"
+                        "  1. Use the GitHub Copilot provider instead of ACP mode:\n"
+                        "     hermes setup  →  select 'GitHub Copilot' (uses Copilot Chat API)\n"
+                        "  2. Set HERMES_COPILOT_ACP_COMMAND to point to a compatible ACP server\n"
+                        "  3. Use a different provider (e.g. OpenAI, Anthropic, Nous)\n\n"
+                        f"Original error:\n{stderr_text}"
+                    )
                 raise RuntimeError(f"Copilot ACP process exited early: {stderr_text}")
             raise TimeoutError(f"Timed out waiting for Copilot ACP response to {method}.")
 
diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index 41e229416c9..8146cd97aa4 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -358,6 +358,7 @@ _URL_TO_PROVIDER: Dict[str, str] = {
     "api.deepseek.com": "deepseek",
     "api.githubcopilot.com": "copilot",
     "models.github.ai": "copilot",
+    "models.inference.ai.azure.com": "github-models",
     "api.fireworks.ai": "fireworks",
     "opencode.ai": "opencode-go",
     "api.x.ai": "xai",
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index ded3f448f87..336e220814e 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -2525,6 +2525,7 @@ def _is_github_models_base_url(base_url: Optional[str]) -> bool:
     return (
         normalized.startswith(COPILOT_BASE_URL)
         or normalized.startswith("https://models.github.ai/inference")
+        or normalized.startswith("https://models.inference.ai.azure.com")
     )
 
 
diff --git a/run_agent.py b/run_agent.py
index 310777076cb..4bae16685cb 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -14184,6 +14184,33 @@ class AIAgent:
                             "interrupted": True,
                         }
                     
+                    # Actionable hint for GitHub Models (Azure) 413 errors.
+                    # The free tier enforces a hard 8K token limit per request,
+                    # which Hermes' system prompt alone can exceed.  Compression
+                    # won't help — surface a clear message so the user doesn't
+                    # wait through three futile compression attempts.
+                    if (
+                        status_code == 413
+                        and isinstance(_base, str)
+                        and "models.inference.ai.azure.com" in _base
+                    ):
+                        self._vprint(
+                            f"{self.log_prefix}   💡 GitHub Models (Azure) enforces a hard per-request token limit (often 8K).",
+                            force=True,
+                        )
+                        self._vprint(
+                            f"{self.log_prefix}      Hermes' system prompt alone may exceed this limit.  This endpoint is not",
+                            force=True,
+                        )
+                        self._vprint(
+                            f"{self.log_prefix}      compatible with Hermes Agent.  Use https://models.github.ai or the GitHub",
+                            force=True,
+                        )
+                        self._vprint(
+                            f"{self.log_prefix}      Copilot provider instead, which have higher token limits.",
+                            force=True,
+                        )
+
                     # Check for 413 payload-too-large BEFORE generic 4xx handler.
                     # A 413 is a payload-size error — the correct response is to
                     # compress history and retry, not abort immediately.