mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
fix: detect gh-copilot deprecation and improve GitHub Models 413 errors (#10648)
Address two blocking issues when using GitHub Copilot integrations: 1. ACP mode: detect the gh-copilot CLI deprecation error from stderr and surface an actionable message with alternatives instead of hanging or showing a cryptic error. 2. GitHub Models (Azure) 413: recognize models.inference.ai.azure.com as a known GitHub Models URL, and print a targeted hint explaining the hard 8K token limit that makes this endpoint incompatible with Hermes' system prompt size.
This commit is contained in:
parent
7bb97b952f
commit
4ded3ede33
4 changed files with 49 additions and 0 deletions
27
run_agent.py
27
run_agent.py
|
|
@ -14184,6 +14184,33 @@ class AIAgent:
|
|||
"interrupted": True,
|
||||
}
|
||||
|
||||
# Actionable hint for GitHub Models (Azure) 413 errors.
|
||||
# The free tier enforces a hard 8K token limit per request,
|
||||
# which Hermes' system prompt alone can exceed. Compression
|
||||
# won't help — surface a clear message so the user doesn't
|
||||
# wait through three futile compression attempts.
|
||||
if (
|
||||
status_code == 413
|
||||
and isinstance(_base, str)
|
||||
and "models.inference.ai.azure.com" in _base
|
||||
):
|
||||
self._vprint(
|
||||
f"{self.log_prefix} 💡 GitHub Models (Azure) enforces a hard per-request token limit (often 8K).",
|
||||
force=True,
|
||||
)
|
||||
self._vprint(
|
||||
f"{self.log_prefix} Hermes' system prompt alone may exceed this limit. This endpoint is not",
|
||||
force=True,
|
||||
)
|
||||
self._vprint(
|
||||
f"{self.log_prefix} compatible with Hermes Agent. Use https://models.github.ai or the GitHub",
|
||||
force=True,
|
||||
)
|
||||
self._vprint(
|
||||
f"{self.log_prefix} Copilot provider instead, which have higher token limits.",
|
||||
force=True,
|
||||
)
|
||||
|
||||
# Check for 413 payload-too-large BEFORE generic 4xx handler.
|
||||
# A 413 is a payload-size error — the correct response is to
|
||||
# compress history and retry, not abort immediately.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue