diff --git a/agent/copilot_acp_client.py b/agent/copilot_acp_client.py index 3643837bf5b..28a78bab13d 100644 --- a/agent/copilot_acp_client.py +++ b/agent/copilot_acp_client.py @@ -30,6 +30,14 @@ _DEFAULT_TIMEOUT_SECONDS = 900.0 _TOOL_CALL_BLOCK_RE = re.compile(r"\s*(\{.*?\})\s*", re.DOTALL) _TOOL_CALL_JSON_RE = re.compile(r"\{\s*\"id\"\s*:\s*\"[^\"]+\"\s*,\s*\"type\"\s*:\s*\"function\"\s*,\s*\"function\"\s*:\s*\{.*?\}\s*\}", re.DOTALL) +# Patterns that indicate the gh-copilot CLI extension has been deprecated. +_DEPRECATION_PATTERNS = ( + "has been deprecated", + "no commands will be executed", + "deprecation", + "copilot-cli", +) + def _resolve_command() -> str: return ( @@ -506,6 +514,18 @@ class CopilotACPClient: stderr_text = "\n".join(stderr_tail).strip() if proc.poll() is not None and stderr_text: + stderr_lower = stderr_text.lower() + if any(pat in stderr_lower for pat in _DEPRECATION_PATTERNS): + raise RuntimeError( + "The gh-copilot CLI extension has been deprecated by GitHub and " + "can no longer be used for ACP mode.\n\n" + "Alternatives:\n" + " 1. Use the GitHub Copilot provider instead of ACP mode:\n" + " hermes setup → select 'GitHub Copilot' (uses Copilot Chat API)\n" + " 2. Set HERMES_COPILOT_ACP_COMMAND to point to a compatible ACP server\n" + " 3. Use a different provider (e.g. OpenAI, Anthropic, Nous)\n\n" + f"Original error:\n{stderr_text}" + ) raise RuntimeError(f"Copilot ACP process exited early: {stderr_text}") raise TimeoutError(f"Timed out waiting for Copilot ACP response to {method}.") diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 41e229416c9..8146cd97aa4 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -358,6 +358,7 @@ _URL_TO_PROVIDER: Dict[str, str] = { "api.deepseek.com": "deepseek", "api.githubcopilot.com": "copilot", "models.github.ai": "copilot", + "models.inference.ai.azure.com": "github-models", "api.fireworks.ai": "fireworks", "opencode.ai": "opencode-go", "api.x.ai": "xai", diff --git a/hermes_cli/models.py b/hermes_cli/models.py index ded3f448f87..336e220814e 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -2525,6 +2525,7 @@ def _is_github_models_base_url(base_url: Optional[str]) -> bool: return ( normalized.startswith(COPILOT_BASE_URL) or normalized.startswith("https://models.github.ai/inference") + or normalized.startswith("https://models.inference.ai.azure.com") ) diff --git a/run_agent.py b/run_agent.py index 310777076cb..4bae16685cb 100644 --- a/run_agent.py +++ b/run_agent.py @@ -14184,6 +14184,33 @@ class AIAgent: "interrupted": True, } + # Actionable hint for GitHub Models (Azure) 413 errors. + # The free tier enforces a hard 8K token limit per request, + # which Hermes' system prompt alone can exceed. Compression + # won't help — surface a clear message so the user doesn't + # wait through three futile compression attempts. + if ( + status_code == 413 + and isinstance(_base, str) + and "models.inference.ai.azure.com" in _base + ): + self._vprint( + f"{self.log_prefix} 💡 GitHub Models (Azure) enforces a hard per-request token limit (often 8K).", + force=True, + ) + self._vprint( + f"{self.log_prefix} Hermes' system prompt alone may exceed this limit. This endpoint is not", + force=True, + ) + self._vprint( + f"{self.log_prefix} compatible with Hermes Agent. Use https://models.github.ai or the GitHub", + force=True, + ) + self._vprint( + f"{self.log_prefix} Copilot provider instead, which have higher token limits.", + force=True, + ) + # Check for 413 payload-too-large BEFORE generic 4xx handler. # A 413 is a payload-size error — the correct response is to # compress history and retry, not abort immediately.