fix(copilot-acp): tighten deprecation detection + sharpen GitHub Models 413 hint

Follow-up improvements on top of @konsisumer's cherry-picked fix for #10648:

1. Deprecation patterns required BOTH a product fingerprint ('gh-copilot') and
   a deprecation marker. The previous list included 'copilot-cli' and bare
   'deprecation', which would false-positive on stderr from the NEW
   @github/copilot CLI — whose repo is literally github.com/github/copilot-cli
   and which legitimately surfaces those substrings in its own messages.

2. Replace the deprecation hint. The user in #10648 installed
   'gh extension install github/gh-copilot' (the deprecated extension)
   thinking that's what ACP mode uses, when ACP actually spawns the new
   'copilot' binary from '@github/copilot'. The hint now points users at the
   correct install command ('npm install -g @github/copilot') with the new
   CLI's repo URL, and demotes provider-switching to a fallback alternative.

3. Change _URL_TO_PROVIDER value for models.inference.ai.azure.com from the
   'github-models' alias to the canonical 'copilot' provider id, matching the
   convention used by every other entry in the table.

4. Sharpen the 413 hint message. The free tier's ~8K cap is below the
   system-prompt floor, so this endpoint is fundamentally incompatible with
   an agentic loop — not a 'use a different URL' problem.

Tests:
- New parametrized false-positive coverage for the new CLI's stderr shape.
- Updated assertion to require canonical 'copilot' provider mapping.
- All 14 deprecation/URL tests pass.
This commit is contained in:
teknium1 2026-05-16 01:58:13 -07:00 committed by Teknium
parent b85b938b1f
commit 374dc81c23
4 changed files with 84 additions and 38 deletions

View file

@ -30,15 +30,29 @@ _DEFAULT_TIMEOUT_SECONDS = 900.0
_TOOL_CALL_BLOCK_RE = re.compile(r"<tool_call>\s*(\{.*?\})\s*</tool_call>", re.DOTALL)
_TOOL_CALL_JSON_RE = re.compile(r"\{\s*\"id\"\s*:\s*\"[^\"]+\"\s*,\s*\"type\"\s*:\s*\"function\"\s*,\s*\"function\"\s*:\s*\{.*?\}\s*\}", re.DOTALL)
# Patterns that indicate the gh-copilot CLI extension has been deprecated.
_DEPRECATION_PATTERNS = (
# Stderr fingerprint of the deprecated `gh copilot` CLI extension
# (https://github.blog/changelog/2025-09-25-upcoming-deprecation-of-gh-copilot-cli-extension).
# We require BOTH the literal product name ("gh-copilot") AND a deprecation
# marker, so generic stderr from the NEW `@github/copilot` CLI — whose repo
# is github.com/github/copilot-cli and which legitimately mentions "copilot-cli"
# in its own banners and error messages — doesn't get misclassified as the
# deprecated extension.
_DEPRECATION_REQUIRED = ("gh-copilot",)
_DEPRECATION_MARKERS = (
"has been deprecated",
"no commands will be executed",
"deprecation",
"copilot-cli",
)
def _is_gh_copilot_deprecation_message(stderr_text: str) -> bool:
"""True iff stderr looks like the deprecated gh-copilot extension's banner."""
lower = stderr_text.lower()
if not any(req in lower for req in _DEPRECATION_REQUIRED):
return False
return any(marker in lower for marker in _DEPRECATION_MARKERS)
def _resolve_command() -> str:
return (
os.getenv("HERMES_COPILOT_ACP_COMMAND", "").strip()
@ -514,16 +528,19 @@ class CopilotACPClient:
stderr_text = "\n".join(stderr_tail).strip()
if proc.poll() is not None and stderr_text:
stderr_lower = stderr_text.lower()
if any(pat in stderr_lower for pat in _DEPRECATION_PATTERNS):
if _is_gh_copilot_deprecation_message(stderr_text):
raise RuntimeError(
"The gh-copilot CLI extension has been deprecated by GitHub and "
"can no longer be used for ACP mode.\n\n"
"Alternatives:\n"
" 1. Use the GitHub Copilot provider instead of ACP mode:\n"
" hermes setup → select 'GitHub Copilot' (uses Copilot Chat API)\n"
" 2. Set HERMES_COPILOT_ACP_COMMAND to point to a compatible ACP server\n"
" 3. Use a different provider (e.g. OpenAI, Anthropic, Nous)\n\n"
"Hermes ACP mode requires the NEW GitHub Copilot CLI "
"(github.com/github/copilot-cli), but the binary it just "
"spawned is the deprecated `gh copilot` extension.\n\n"
"Install the new CLI:\n"
" npm install -g @github/copilot\n"
" # then verify with: copilot --help\n\n"
"If `copilot` already resolves to the new CLI but you still see this,\n"
"point Hermes at it explicitly:\n"
" export HERMES_COPILOT_ACP_COMMAND=/path/to/new/copilot\n\n"
"Alternative: use the `copilot` provider (no ACP, hits the Copilot API\n"
"directly with a Copilot subscription token) via `hermes setup`.\n\n"
f"Original error:\n{stderr_text}"
)
raise RuntimeError(f"Copilot ACP process exited early: {stderr_text}")

View file

@ -358,7 +358,12 @@ _URL_TO_PROVIDER: Dict[str, str] = {
"api.deepseek.com": "deepseek",
"api.githubcopilot.com": "copilot",
"models.github.ai": "copilot",
"models.inference.ai.azure.com": "github-models",
# GitHub Models free tier (Azure-hosted prototyping endpoint) — same
# canonical provider as the Copilot API. Hard per-request token cap
# (often 8K) makes it unusable for Hermes' system prompt, but mapping
# it here lets us recognize the endpoint and emit a targeted hint
# instead of falling through the unknown-custom-endpoint path.
"models.inference.ai.azure.com": "copilot",
"api.fireworks.ai": "fireworks",
"opencode.ai": "opencode-go",
"api.x.ai": "xai",

View file

@ -14185,29 +14185,35 @@ class AIAgent:
}
# Actionable hint for GitHub Models (Azure) 413 errors.
# The free tier enforces a hard 8K token limit per request,
# which Hermes' system prompt alone can exceed. Compression
# won't help — surface a clear message so the user doesn't
# wait through three futile compression attempts.
# The free tier enforces a hard 8K token cap per request,
# which Hermes' system prompt + tool schemas alone exceed.
# Compression can't help — the floor is the system prompt
# itself, not the conversation — so surface a clear "not
# compatible" message instead of looping into three futile
# compression attempts.
if (
status_code == 413
and isinstance(_base, str)
and "models.inference.ai.azure.com" in _base
):
self._vprint(
f"{self.log_prefix} 💡 GitHub Models (Azure) enforces a hard per-request token limit (often 8K).",
f"{self.log_prefix} 💡 GitHub Models free tier (models.inference.ai.azure.com) caps every",
force=True,
)
self._vprint(
f"{self.log_prefix} Hermes' system prompt alone may exceed this limit. This endpoint is not",
f"{self.log_prefix} request at ~8K tokens. Hermes' system prompt + tool schemas baseline",
force=True,
)
self._vprint(
f"{self.log_prefix} compatible with Hermes Agent. Use https://models.github.ai or the GitHub",
f"{self.log_prefix} exceeds that floor, so this endpoint cannot run an agentic loop.",
force=True,
)
self._vprint(
f"{self.log_prefix} Copilot provider instead, which have higher token limits.",
f"{self.log_prefix} Use the `copilot` provider with a Copilot subscription token (`hermes",
force=True,
)
self._vprint(
f"{self.log_prefix} setup` → GitHub Copilot), or pick any other provider.",
force=True,
)

View file

@ -2,11 +2,12 @@
import pytest
from agent.copilot_acp_client import _DEPRECATION_PATTERNS
from agent.copilot_acp_client import _is_gh_copilot_deprecation_message
class TestDeprecationPatternDetection:
"""Verify that stderr messages from a deprecated gh-copilot CLI are caught."""
"""Verify that stderr from the deprecated `gh copilot` extension is caught
without false-positiving on the new `@github/copilot` CLI."""
_REAL_DEPRECATION_STDERR = (
"The gh-copilot extension has been deprecated in favor of the newer "
@ -18,25 +19,40 @@ class TestDeprecationPatternDetection:
)
def test_real_deprecation_message_matches(self):
lower = self._REAL_DEPRECATION_STDERR.lower()
assert any(pat in lower for pat in _DEPRECATION_PATTERNS)
assert _is_gh_copilot_deprecation_message(self._REAL_DEPRECATION_STDERR)
@pytest.mark.parametrize(
"stderr_line",
"stderr_text",
[
"The gh-copilot extension has been deprecated",
"No commands will be executed.",
"See deprecation notice at ...",
"Install copilot-cli instead",
# The deprecation banner uses both halves of the fingerprint.
"The gh-copilot extension has been deprecated.",
"gh-copilot: no commands will be executed.",
# Mixed casing — match is case-insensitive.
"The GH-Copilot Extension HAS BEEN DEPRECATED.",
],
)
def test_individual_patterns_match(self, stderr_line: str):
lower = stderr_line.lower()
assert any(pat in lower for pat in _DEPRECATION_PATTERNS)
def test_genuine_deprecation_variants_match(self, stderr_text: str):
assert _is_gh_copilot_deprecation_message(stderr_text)
def test_normal_stderr_does_not_match(self):
normal = "Error: connection refused"
assert not any(pat in normal.lower() for pat in _DEPRECATION_PATTERNS)
@pytest.mark.parametrize(
"stderr_text",
[
# Generic errors — no fingerprint at all.
"Error: connection refused",
"",
# The NEW @github/copilot CLI's repo is github.com/github/copilot-cli.
# Its stderr can legitimately mention "copilot-cli" or "deprecation"
# in unrelated contexts; neither alone should trip the detector.
"copilot-cli: failed to authenticate with the API",
"warning: the --foo flag is scheduled for deprecation in v3",
"See https://github.com/github/copilot-cli/issues for support",
# Half the fingerprint without the other half.
"gh-copilot: command not found",
"extension has been deprecated (some other extension)",
],
)
def test_does_not_false_positive(self, stderr_text: str):
assert not _is_gh_copilot_deprecation_message(stderr_text)
class TestGitHubModelsAzureUrl:
@ -45,7 +61,9 @@ class TestGitHubModelsAzureUrl:
def test_url_to_provider_contains_azure_models(self):
from agent.model_metadata import _URL_TO_PROVIDER
assert _URL_TO_PROVIDER.get("models.inference.ai.azure.com") == "github-models"
# Maps to the canonical "copilot" provider (same convention as the
# other GitHub-family entries) — not the "github-models" alias.
assert _URL_TO_PROVIDER.get("models.inference.ai.azure.com") == "copilot"
def test_is_github_models_base_url_recognises_azure(self):
from hermes_cli.models import _is_github_models_base_url