fix: skills-sh install fails for deeply nested repo structures (#2980)

* fix(run_agent): ensure _fire_first_delta() is called for tool generation events

Added calls to _fire_first_delta() in the AIAgent class to improve the handling of tool generation events, ensuring timely notifications during the processing of function calls and tool usage.

* fix(run_agent): improve timeout handling for chat completions

Enhanced the timeout configuration for chat completions in the AIAgent class by introducing customizable connection, read, and write timeouts using environment variables. This ensures more robust handling of API requests during streaming operations.

* fix(run_agent): reduce default stream read timeout for chat completions

Updated the default stream read timeout from 120 seconds to 60 seconds in the AIAgent class, enhancing the timeout configuration for chat completions. This change aims to improve responsiveness during streaming operations.

* fix(run_agent): enhance streaming error handling and retry logic

Improved the error handling and retry mechanism for streaming requests in the AIAgent class. Introduced a configurable maximum number of stream retries and refined the handling of transient network errors, allowing for retries with fresh connections. Non-transient errors now trigger a fallback to non-streaming only when appropriate, ensuring better resilience during API interactions.

* fix: skills-sh install fails for deeply nested repo structures

Skills in repos with deep directory nesting (e.g.
cli-tool/components/skills/development/senior-backend/) could not be
installed because the candidate path generation and shallow root-dir
scan never reached them.

Added GitHubSource._find_skill_in_repo_tree() which uses the GitHub
Trees API to recursively search the entire repo tree in a single API
call. This is used as a final fallback in
SkillsShSource._discover_identifier() when the standard candidate
paths and shallow scan both fail.

Fixes installation of skills from repos like davila7/claude-code-templates
where skills are nested 4+ levels deep.

Reported by user Samuraixheart.
This commit is contained in:
Teknium 2026-03-25 09:31:05 -07:00 committed by GitHub
parent c6f4515f73
commit 5dbe2d9d73
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 298 additions and 22 deletions

View file

@ -3585,7 +3585,20 @@ class AIAgent:
def _call_chat_completions():
"""Stream a chat completions response."""
stream_kwargs = {**api_kwargs, "stream": True, "stream_options": {"include_usage": True}}
import httpx as _httpx
_base_timeout = float(os.getenv("HERMES_API_TIMEOUT", 900.0))
_stream_read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 60.0))
stream_kwargs = {
**api_kwargs,
"stream": True,
"stream_options": {"include_usage": True},
"timeout": _httpx.Timeout(
connect=30.0,
read=_stream_read_timeout,
write=_base_timeout,
pool=30.0,
),
}
request_client_holder["client"] = self._create_request_openai_client(
reason="chat_completion_stream_request"
)
@ -3653,6 +3666,7 @@ class AIAgent:
name = entry["function"]["name"]
if name and idx not in tool_gen_notified:
tool_gen_notified.add(idx)
_fire_first_delta()
self._fire_tool_gen_started(name)
if chunk.choices[0].finish_reason:
@ -3721,6 +3735,7 @@ class AIAgent:
has_tool_use = True
tool_name = getattr(block, "name", None)
if tool_name:
_fire_first_delta()
self._fire_tool_gen_started(tool_name)
elif event_type == "content_block_delta":
@ -3742,29 +3757,84 @@ class AIAgent:
return stream.get_final_message()
def _call():
import httpx as _httpx
_max_stream_retries = int(os.getenv("HERMES_STREAM_RETRIES", 2))
try:
if self.api_mode == "anthropic_messages":
self._try_refresh_anthropic_client_credentials()
result["response"] = _call_anthropic()
else:
result["response"] = _call_chat_completions()
except Exception as e:
if deltas_were_sent["yes"]:
# Streaming failed AFTER some tokens were already delivered
# to consumers. Don't fall back — that would cause
# double-delivery (partial streamed + full non-streamed).
# Let the error propagate; the partial content already
# reached the user via the stream.
logger.warning("Streaming failed after partial delivery, not falling back: %s", e)
result["error"] = e
else:
# Streaming failed before any tokens reached consumers.
# Safe to fall back to the standard non-streaming path.
logger.info("Streaming failed before delivery, falling back to non-streaming: %s", e)
for _stream_attempt in range(_max_stream_retries + 1):
try:
result["response"] = self._interruptible_api_call(api_kwargs)
except Exception as fallback_err:
result["error"] = fallback_err
if self.api_mode == "anthropic_messages":
self._try_refresh_anthropic_client_credentials()
result["response"] = _call_anthropic()
else:
result["response"] = _call_chat_completions()
return # success
except Exception as e:
if deltas_were_sent["yes"]:
# Streaming failed AFTER some tokens were already
# delivered. Don't retry or fall back — partial
# content already reached the user.
logger.warning(
"Streaming failed after partial delivery, not retrying: %s", e
)
result["error"] = e
return
_is_timeout = isinstance(
e, (_httpx.ReadTimeout, _httpx.ConnectTimeout, _httpx.PoolTimeout)
)
_is_conn_err = isinstance(
e, (_httpx.ConnectError, _httpx.RemoteProtocolError, ConnectionError)
)
if _is_timeout or _is_conn_err:
# Transient network / timeout error. Retry the
# streaming request with a fresh connection rather
# than falling back to non-streaming (which would
# hang for up to 15 min on the same dead server).
if _stream_attempt < _max_stream_retries:
logger.info(
"Streaming attempt %s/%s failed (%s: %s), "
"retrying with fresh connection...",
_stream_attempt + 1,
_max_stream_retries + 1,
type(e).__name__,
e,
)
# Close the stale request client before retry
stale = request_client_holder.get("client")
if stale is not None:
self._close_request_openai_client(
stale, reason="stream_retry_cleanup"
)
request_client_holder["client"] = None
continue
# Exhausted retries — propagate to outer loop
logger.warning(
"Streaming exhausted %s retries on transient error: %s",
_max_stream_retries + 1,
e,
)
result["error"] = e
return
# Non-transient error (e.g. "streaming not supported",
# auth error, 4xx). Fall back to non-streaming once.
err_msg = str(e).lower()
if "stream" in err_msg and "not supported" in err_msg:
logger.info(
"Streaming not supported, falling back to non-streaming: %s", e
)
try:
result["response"] = self._interruptible_api_call(api_kwargs)
except Exception as fallback_err:
result["error"] = fallback_err
return
# Unknown error — propagate to outer retry loop
result["error"] = e
return
finally:
request_client = request_client_holder.get("client")
if request_client is not None:

View file

@ -305,6 +305,154 @@ class TestSkillsShSource:
assert bundle.files["SKILL.md"] == "# react"
assert mock_get.called
@patch("tools.skills_hub._write_index_cache")
@patch("tools.skills_hub._read_index_cache", return_value=None)
@patch("tools.skills_hub.httpx.get")
@patch.object(GitHubSource, "fetch")
def test_fetch_falls_back_to_tree_search_for_deeply_nested_skills(
self, mock_fetch, mock_get, _mock_read_cache, _mock_write_cache,
):
"""Skills in deeply nested dirs (e.g. cli-tool/components/skills/dev/my-skill/)
are found via the GitHub Trees API when candidate paths and shallow scan fail."""
tree_entries = [
{"path": "README.md", "type": "blob"},
{"path": "cli-tool/components/skills/development/my-skill/SKILL.md", "type": "blob"},
{"path": "cli-tool/components/skills/development/other-skill/SKILL.md", "type": "blob"},
]
def _httpx_get_side_effect(url, **kwargs):
resp = MagicMock()
if "/api/search" in url:
resp.status_code = 404
return resp
if url.endswith("/contents/"):
# Root listing for shallow scan — return empty so it falls through
resp.status_code = 200
resp.json = lambda: []
return resp
if "/contents/" in url:
# All contents API calls fail (candidate paths miss)
resp.status_code = 404
return resp
if url.endswith("owner/repo"):
# Repo info → default branch
resp.status_code = 200
resp.json = lambda: {"default_branch": "main"}
return resp
if "/git/trees/main" in url:
resp.status_code = 200
resp.json = lambda: {"tree": tree_entries}
return resp
# skills.sh detail page
resp.status_code = 200
resp.text = "<h1>my-skill</h1>"
return resp
mock_get.side_effect = _httpx_get_side_effect
resolved_bundle = SkillBundle(
name="my-skill",
files={"SKILL.md": "# My Skill"},
source="github",
identifier="owner/repo/cli-tool/components/skills/development/my-skill",
trust_level="community",
)
mock_fetch.side_effect = lambda ident: resolved_bundle if "cli-tool/components" in ident else None
bundle = self._source().fetch("skills-sh/owner/repo/my-skill")
assert bundle is not None
assert bundle.source == "skills.sh"
assert bundle.files["SKILL.md"] == "# My Skill"
# Verify the tree-resolved identifier was used for the final GitHub fetch
mock_fetch.assert_any_call("owner/repo/cli-tool/components/skills/development/my-skill")
class TestFindSkillInRepoTree:
"""Tests for GitHubSource._find_skill_in_repo_tree."""
def _source(self):
auth = MagicMock(spec=GitHubAuth)
auth.get_headers.return_value = {"Accept": "application/vnd.github.v3+json"}
return GitHubSource(auth=auth)
@patch("tools.skills_hub.httpx.get")
def test_finds_deeply_nested_skill(self, mock_get):
tree_entries = [
{"path": "README.md", "type": "blob"},
{"path": "cli-tool/components/skills/development/senior-backend/SKILL.md", "type": "blob"},
{"path": "cli-tool/components/skills/development/other/SKILL.md", "type": "blob"},
]
def _side_effect(url, **kwargs):
resp = MagicMock()
if url.endswith("/davila7/claude-code-templates"):
resp.status_code = 200
resp.json = lambda: {"default_branch": "main"}
elif "/git/trees/main" in url:
resp.status_code = 200
resp.json = lambda: {"tree": tree_entries}
else:
resp.status_code = 404
return resp
mock_get.side_effect = _side_effect
result = self._source()._find_skill_in_repo_tree("davila7/claude-code-templates", "senior-backend")
assert result == "davila7/claude-code-templates/cli-tool/components/skills/development/senior-backend"
@patch("tools.skills_hub.httpx.get")
def test_finds_root_level_skill(self, mock_get):
tree_entries = [
{"path": "my-skill/SKILL.md", "type": "blob"},
]
def _side_effect(url, **kwargs):
resp = MagicMock()
if "/contents" not in url and "/git/" not in url:
resp.status_code = 200
resp.json = lambda: {"default_branch": "main"}
elif "/git/trees/main" in url:
resp.status_code = 200
resp.json = lambda: {"tree": tree_entries}
else:
resp.status_code = 404
return resp
mock_get.side_effect = _side_effect
result = self._source()._find_skill_in_repo_tree("owner/repo", "my-skill")
assert result == "owner/repo/my-skill"
@patch("tools.skills_hub.httpx.get")
def test_returns_none_when_skill_not_found(self, mock_get):
tree_entries = [
{"path": "other-skill/SKILL.md", "type": "blob"},
]
def _side_effect(url, **kwargs):
resp = MagicMock()
if "/contents" not in url and "/git/" not in url:
resp.status_code = 200
resp.json = lambda: {"default_branch": "main"}
elif "/git/trees/main" in url:
resp.status_code = 200
resp.json = lambda: {"tree": tree_entries}
else:
resp.status_code = 404
return resp
mock_get.side_effect = _side_effect
result = self._source()._find_skill_in_repo_tree("owner/repo", "nonexistent")
assert result is None
@patch("tools.skills_hub.httpx.get")
def test_returns_none_when_repo_api_fails(self, mock_get):
mock_get.return_value = MagicMock(status_code=404)
result = self._source()._find_skill_in_repo_tree("owner/repo", "my-skill")
assert result is None
class TestWellKnownSkillSource:
def _source(self):

View file

@ -434,6 +434,56 @@ class GitHubSource(SkillSource):
return files
def _find_skill_in_repo_tree(self, repo: str, skill_name: str) -> Optional[str]:
"""Use the GitHub Trees API to find a skill directory anywhere in the repo.
Returns the full identifier (``repo/path/to/skill``) or ``None``.
This is a single API call regardless of repo depth, so it efficiently
handles deeply nested directory structures like
``cli-tool/components/skills/development/<skill>/SKILL.md``.
"""
# Get default branch
try:
resp = httpx.get(
f"https://api.github.com/repos/{repo}",
headers=self.auth.get_headers(),
timeout=15,
follow_redirects=True,
)
if resp.status_code != 200:
return None
default_branch = resp.json().get("default_branch", "main")
except (httpx.HTTPError, json.JSONDecodeError):
return None
# Get recursive tree (single API call for the entire repo)
try:
resp = httpx.get(
f"https://api.github.com/repos/{repo}/git/trees/{default_branch}",
params={"recursive": "1"},
headers=self.auth.get_headers(),
timeout=30,
follow_redirects=True,
)
if resp.status_code != 200:
return None
tree_data = resp.json()
except (httpx.HTTPError, json.JSONDecodeError):
return None
# Look for SKILL.md files inside directories named <skill_name>
skill_md_suffix = f"/{skill_name}/SKILL.md"
for entry in tree_data.get("tree", []):
if entry.get("type") != "blob":
continue
path = entry.get("path", "")
if path.endswith(skill_md_suffix) or path == f"{skill_name}/SKILL.md":
# Strip /SKILL.md to get the skill directory path
skill_dir = path[: -len("/SKILL.md")]
return f"{repo}/{skill_dir}"
return None
def _fetch_file_content(self, repo: str, path: str) -> Optional[str]:
"""Fetch a single file's content from GitHub."""
url = f"https://api.github.com/repos/{repo}/contents/{path}"
@ -1014,6 +1064,14 @@ class SkillsShSource(SkillSource):
except Exception:
pass
# Final fallback: use the GitHub Trees API to find the skill anywhere
# in the repo tree. This handles deeply nested structures like
# cli-tool/components/skills/development/<skill>/ that the shallow
# scan above can't reach.
tree_result = self.github._find_skill_in_repo_tree(repo, skill_token)
if tree_result:
return tree_result
return None
def _finalize_inspect_meta(self, meta: SkillMeta, canonical: str, detail: Optional[dict]) -> SkillMeta: