From 5dbe2d9d739e2cc3463e715ea17ed38a5ab5e8e8 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Wed, 25 Mar 2026 09:31:05 -0700 Subject: [PATCH] fix: skills-sh install fails for deeply nested repo structures (#2980) * fix(run_agent): ensure _fire_first_delta() is called for tool generation events Added calls to _fire_first_delta() in the AIAgent class to improve the handling of tool generation events, ensuring timely notifications during the processing of function calls and tool usage. * fix(run_agent): improve timeout handling for chat completions Enhanced the timeout configuration for chat completions in the AIAgent class by introducing customizable connection, read, and write timeouts using environment variables. This ensures more robust handling of API requests during streaming operations. * fix(run_agent): reduce default stream read timeout for chat completions Updated the default stream read timeout from 120 seconds to 60 seconds in the AIAgent class, enhancing the timeout configuration for chat completions. This change aims to improve responsiveness during streaming operations. * fix(run_agent): enhance streaming error handling and retry logic Improved the error handling and retry mechanism for streaming requests in the AIAgent class. Introduced a configurable maximum number of stream retries and refined the handling of transient network errors, allowing for retries with fresh connections. Non-transient errors now trigger a fallback to non-streaming only when appropriate, ensuring better resilience during API interactions. * fix: skills-sh install fails for deeply nested repo structures Skills in repos with deep directory nesting (e.g. cli-tool/components/skills/development/senior-backend/) could not be installed because the candidate path generation and shallow root-dir scan never reached them. Added GitHubSource._find_skill_in_repo_tree() which uses the GitHub Trees API to recursively search the entire repo tree in a single API call. This is used as a final fallback in SkillsShSource._discover_identifier() when the standard candidate paths and shallow scan both fail. Fixes installation of skills from repos like davila7/claude-code-templates where skills are nested 4+ levels deep. Reported by user Samuraixheart. --- run_agent.py | 114 ++++++++++++++++++++----- tests/tools/test_skills_hub.py | 148 +++++++++++++++++++++++++++++++++ tools/skills_hub.py | 58 +++++++++++++ 3 files changed, 298 insertions(+), 22 deletions(-) diff --git a/run_agent.py b/run_agent.py index e7dec855a..36491e644 100644 --- a/run_agent.py +++ b/run_agent.py @@ -3585,7 +3585,20 @@ class AIAgent: def _call_chat_completions(): """Stream a chat completions response.""" - stream_kwargs = {**api_kwargs, "stream": True, "stream_options": {"include_usage": True}} + import httpx as _httpx + _base_timeout = float(os.getenv("HERMES_API_TIMEOUT", 900.0)) + _stream_read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 60.0)) + stream_kwargs = { + **api_kwargs, + "stream": True, + "stream_options": {"include_usage": True}, + "timeout": _httpx.Timeout( + connect=30.0, + read=_stream_read_timeout, + write=_base_timeout, + pool=30.0, + ), + } request_client_holder["client"] = self._create_request_openai_client( reason="chat_completion_stream_request" ) @@ -3653,6 +3666,7 @@ class AIAgent: name = entry["function"]["name"] if name and idx not in tool_gen_notified: tool_gen_notified.add(idx) + _fire_first_delta() self._fire_tool_gen_started(name) if chunk.choices[0].finish_reason: @@ -3721,6 +3735,7 @@ class AIAgent: has_tool_use = True tool_name = getattr(block, "name", None) if tool_name: + _fire_first_delta() self._fire_tool_gen_started(tool_name) elif event_type == "content_block_delta": @@ -3742,29 +3757,84 @@ class AIAgent: return stream.get_final_message() def _call(): + import httpx as _httpx + + _max_stream_retries = int(os.getenv("HERMES_STREAM_RETRIES", 2)) + try: - if self.api_mode == "anthropic_messages": - self._try_refresh_anthropic_client_credentials() - result["response"] = _call_anthropic() - else: - result["response"] = _call_chat_completions() - except Exception as e: - if deltas_were_sent["yes"]: - # Streaming failed AFTER some tokens were already delivered - # to consumers. Don't fall back — that would cause - # double-delivery (partial streamed + full non-streamed). - # Let the error propagate; the partial content already - # reached the user via the stream. - logger.warning("Streaming failed after partial delivery, not falling back: %s", e) - result["error"] = e - else: - # Streaming failed before any tokens reached consumers. - # Safe to fall back to the standard non-streaming path. - logger.info("Streaming failed before delivery, falling back to non-streaming: %s", e) + for _stream_attempt in range(_max_stream_retries + 1): try: - result["response"] = self._interruptible_api_call(api_kwargs) - except Exception as fallback_err: - result["error"] = fallback_err + if self.api_mode == "anthropic_messages": + self._try_refresh_anthropic_client_credentials() + result["response"] = _call_anthropic() + else: + result["response"] = _call_chat_completions() + return # success + except Exception as e: + if deltas_were_sent["yes"]: + # Streaming failed AFTER some tokens were already + # delivered. Don't retry or fall back — partial + # content already reached the user. + logger.warning( + "Streaming failed after partial delivery, not retrying: %s", e + ) + result["error"] = e + return + + _is_timeout = isinstance( + e, (_httpx.ReadTimeout, _httpx.ConnectTimeout, _httpx.PoolTimeout) + ) + _is_conn_err = isinstance( + e, (_httpx.ConnectError, _httpx.RemoteProtocolError, ConnectionError) + ) + + if _is_timeout or _is_conn_err: + # Transient network / timeout error. Retry the + # streaming request with a fresh connection rather + # than falling back to non-streaming (which would + # hang for up to 15 min on the same dead server). + if _stream_attempt < _max_stream_retries: + logger.info( + "Streaming attempt %s/%s failed (%s: %s), " + "retrying with fresh connection...", + _stream_attempt + 1, + _max_stream_retries + 1, + type(e).__name__, + e, + ) + # Close the stale request client before retry + stale = request_client_holder.get("client") + if stale is not None: + self._close_request_openai_client( + stale, reason="stream_retry_cleanup" + ) + request_client_holder["client"] = None + continue + # Exhausted retries — propagate to outer loop + logger.warning( + "Streaming exhausted %s retries on transient error: %s", + _max_stream_retries + 1, + e, + ) + result["error"] = e + return + + # Non-transient error (e.g. "streaming not supported", + # auth error, 4xx). Fall back to non-streaming once. + err_msg = str(e).lower() + if "stream" in err_msg and "not supported" in err_msg: + logger.info( + "Streaming not supported, falling back to non-streaming: %s", e + ) + try: + result["response"] = self._interruptible_api_call(api_kwargs) + except Exception as fallback_err: + result["error"] = fallback_err + return + + # Unknown error — propagate to outer retry loop + result["error"] = e + return finally: request_client = request_client_holder.get("client") if request_client is not None: diff --git a/tests/tools/test_skills_hub.py b/tests/tools/test_skills_hub.py index c74fa2d88..778a77ba1 100644 --- a/tests/tools/test_skills_hub.py +++ b/tests/tools/test_skills_hub.py @@ -305,6 +305,154 @@ class TestSkillsShSource: assert bundle.files["SKILL.md"] == "# react" assert mock_get.called + @patch("tools.skills_hub._write_index_cache") + @patch("tools.skills_hub._read_index_cache", return_value=None) + @patch("tools.skills_hub.httpx.get") + @patch.object(GitHubSource, "fetch") + def test_fetch_falls_back_to_tree_search_for_deeply_nested_skills( + self, mock_fetch, mock_get, _mock_read_cache, _mock_write_cache, + ): + """Skills in deeply nested dirs (e.g. cli-tool/components/skills/dev/my-skill/) + are found via the GitHub Trees API when candidate paths and shallow scan fail.""" + tree_entries = [ + {"path": "README.md", "type": "blob"}, + {"path": "cli-tool/components/skills/development/my-skill/SKILL.md", "type": "blob"}, + {"path": "cli-tool/components/skills/development/other-skill/SKILL.md", "type": "blob"}, + ] + + def _httpx_get_side_effect(url, **kwargs): + resp = MagicMock() + if "/api/search" in url: + resp.status_code = 404 + return resp + if url.endswith("/contents/"): + # Root listing for shallow scan — return empty so it falls through + resp.status_code = 200 + resp.json = lambda: [] + return resp + if "/contents/" in url: + # All contents API calls fail (candidate paths miss) + resp.status_code = 404 + return resp + if url.endswith("owner/repo"): + # Repo info → default branch + resp.status_code = 200 + resp.json = lambda: {"default_branch": "main"} + return resp + if "/git/trees/main" in url: + resp.status_code = 200 + resp.json = lambda: {"tree": tree_entries} + return resp + # skills.sh detail page + resp.status_code = 200 + resp.text = "

my-skill

" + return resp + + mock_get.side_effect = _httpx_get_side_effect + + resolved_bundle = SkillBundle( + name="my-skill", + files={"SKILL.md": "# My Skill"}, + source="github", + identifier="owner/repo/cli-tool/components/skills/development/my-skill", + trust_level="community", + ) + mock_fetch.side_effect = lambda ident: resolved_bundle if "cli-tool/components" in ident else None + + bundle = self._source().fetch("skills-sh/owner/repo/my-skill") + + assert bundle is not None + assert bundle.source == "skills.sh" + assert bundle.files["SKILL.md"] == "# My Skill" + # Verify the tree-resolved identifier was used for the final GitHub fetch + mock_fetch.assert_any_call("owner/repo/cli-tool/components/skills/development/my-skill") + + +class TestFindSkillInRepoTree: + """Tests for GitHubSource._find_skill_in_repo_tree.""" + + def _source(self): + auth = MagicMock(spec=GitHubAuth) + auth.get_headers.return_value = {"Accept": "application/vnd.github.v3+json"} + return GitHubSource(auth=auth) + + @patch("tools.skills_hub.httpx.get") + def test_finds_deeply_nested_skill(self, mock_get): + tree_entries = [ + {"path": "README.md", "type": "blob"}, + {"path": "cli-tool/components/skills/development/senior-backend/SKILL.md", "type": "blob"}, + {"path": "cli-tool/components/skills/development/other/SKILL.md", "type": "blob"}, + ] + + def _side_effect(url, **kwargs): + resp = MagicMock() + if url.endswith("/davila7/claude-code-templates"): + resp.status_code = 200 + resp.json = lambda: {"default_branch": "main"} + elif "/git/trees/main" in url: + resp.status_code = 200 + resp.json = lambda: {"tree": tree_entries} + else: + resp.status_code = 404 + return resp + + mock_get.side_effect = _side_effect + + result = self._source()._find_skill_in_repo_tree("davila7/claude-code-templates", "senior-backend") + assert result == "davila7/claude-code-templates/cli-tool/components/skills/development/senior-backend" + + @patch("tools.skills_hub.httpx.get") + def test_finds_root_level_skill(self, mock_get): + tree_entries = [ + {"path": "my-skill/SKILL.md", "type": "blob"}, + ] + + def _side_effect(url, **kwargs): + resp = MagicMock() + if "/contents" not in url and "/git/" not in url: + resp.status_code = 200 + resp.json = lambda: {"default_branch": "main"} + elif "/git/trees/main" in url: + resp.status_code = 200 + resp.json = lambda: {"tree": tree_entries} + else: + resp.status_code = 404 + return resp + + mock_get.side_effect = _side_effect + + result = self._source()._find_skill_in_repo_tree("owner/repo", "my-skill") + assert result == "owner/repo/my-skill" + + @patch("tools.skills_hub.httpx.get") + def test_returns_none_when_skill_not_found(self, mock_get): + tree_entries = [ + {"path": "other-skill/SKILL.md", "type": "blob"}, + ] + + def _side_effect(url, **kwargs): + resp = MagicMock() + if "/contents" not in url and "/git/" not in url: + resp.status_code = 200 + resp.json = lambda: {"default_branch": "main"} + elif "/git/trees/main" in url: + resp.status_code = 200 + resp.json = lambda: {"tree": tree_entries} + else: + resp.status_code = 404 + return resp + + mock_get.side_effect = _side_effect + + result = self._source()._find_skill_in_repo_tree("owner/repo", "nonexistent") + assert result is None + + @patch("tools.skills_hub.httpx.get") + def test_returns_none_when_repo_api_fails(self, mock_get): + mock_get.return_value = MagicMock(status_code=404) + result = self._source()._find_skill_in_repo_tree("owner/repo", "my-skill") + assert result is None + class TestWellKnownSkillSource: def _source(self): diff --git a/tools/skills_hub.py b/tools/skills_hub.py index 5f9f10c27..8ae65ed76 100644 --- a/tools/skills_hub.py +++ b/tools/skills_hub.py @@ -434,6 +434,56 @@ class GitHubSource(SkillSource): return files + def _find_skill_in_repo_tree(self, repo: str, skill_name: str) -> Optional[str]: + """Use the GitHub Trees API to find a skill directory anywhere in the repo. + + Returns the full identifier (``repo/path/to/skill``) or ``None``. + This is a single API call regardless of repo depth, so it efficiently + handles deeply nested directory structures like + ``cli-tool/components/skills/development//SKILL.md``. + """ + # Get default branch + try: + resp = httpx.get( + f"https://api.github.com/repos/{repo}", + headers=self.auth.get_headers(), + timeout=15, + follow_redirects=True, + ) + if resp.status_code != 200: + return None + default_branch = resp.json().get("default_branch", "main") + except (httpx.HTTPError, json.JSONDecodeError): + return None + + # Get recursive tree (single API call for the entire repo) + try: + resp = httpx.get( + f"https://api.github.com/repos/{repo}/git/trees/{default_branch}", + params={"recursive": "1"}, + headers=self.auth.get_headers(), + timeout=30, + follow_redirects=True, + ) + if resp.status_code != 200: + return None + tree_data = resp.json() + except (httpx.HTTPError, json.JSONDecodeError): + return None + + # Look for SKILL.md files inside directories named + skill_md_suffix = f"/{skill_name}/SKILL.md" + for entry in tree_data.get("tree", []): + if entry.get("type") != "blob": + continue + path = entry.get("path", "") + if path.endswith(skill_md_suffix) or path == f"{skill_name}/SKILL.md": + # Strip /SKILL.md to get the skill directory path + skill_dir = path[: -len("/SKILL.md")] + return f"{repo}/{skill_dir}" + + return None + def _fetch_file_content(self, repo: str, path: str) -> Optional[str]: """Fetch a single file's content from GitHub.""" url = f"https://api.github.com/repos/{repo}/contents/{path}" @@ -1014,6 +1064,14 @@ class SkillsShSource(SkillSource): except Exception: pass + # Final fallback: use the GitHub Trees API to find the skill anywhere + # in the repo tree. This handles deeply nested structures like + # cli-tool/components/skills/development// that the shallow + # scan above can't reach. + tree_result = self.github._find_skill_in_repo_tree(repo, skill_token) + if tree_result: + return tree_result + return None def _finalize_inspect_meta(self, meta: SkillMeta, canonical: str, detail: Optional[dict]) -> SkillMeta: