diff --git a/run_agent.py b/run_agent.py
index e7dec855a..36491e644 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -3585,7 +3585,20 @@ class AIAgent:
 
         def _call_chat_completions():
             """Stream a chat completions response."""
-            stream_kwargs = {**api_kwargs, "stream": True, "stream_options": {"include_usage": True}}
+            import httpx as _httpx
+            _base_timeout = float(os.getenv("HERMES_API_TIMEOUT", 900.0))
+            _stream_read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 60.0))
+            stream_kwargs = {
+                **api_kwargs,
+                "stream": True,
+                "stream_options": {"include_usage": True},
+                "timeout": _httpx.Timeout(
+                    connect=30.0,
+                    read=_stream_read_timeout,
+                    write=_base_timeout,
+                    pool=30.0,
+                ),
+            }
             request_client_holder["client"] = self._create_request_openai_client(
                 reason="chat_completion_stream_request"
             )
@@ -3653,6 +3666,7 @@ class AIAgent:
                         name = entry["function"]["name"]
                         if name and idx not in tool_gen_notified:
                             tool_gen_notified.add(idx)
+                            _fire_first_delta()
                             self._fire_tool_gen_started(name)
 
                 if chunk.choices[0].finish_reason:
@@ -3721,6 +3735,7 @@ class AIAgent:
                             has_tool_use = True
                             tool_name = getattr(block, "name", None)
                             if tool_name:
+                                _fire_first_delta()
                                 self._fire_tool_gen_started(tool_name)
 
                     elif event_type == "content_block_delta":
@@ -3742,29 +3757,84 @@ class AIAgent:
                 return stream.get_final_message()
 
         def _call():
+            import httpx as _httpx
+
+            _max_stream_retries = int(os.getenv("HERMES_STREAM_RETRIES", 2))
+
             try:
-                if self.api_mode == "anthropic_messages":
-                    self._try_refresh_anthropic_client_credentials()
-                    result["response"] = _call_anthropic()
-                else:
-                    result["response"] = _call_chat_completions()
-            except Exception as e:
-                if deltas_were_sent["yes"]:
-                    # Streaming failed AFTER some tokens were already delivered
-                    # to consumers. Don't fall back — that would cause
-                    # double-delivery (partial streamed + full non-streamed).
-                    # Let the error propagate; the partial content already
-                    # reached the user via the stream.
-                    logger.warning("Streaming failed after partial delivery, not falling back: %s", e)
-                    result["error"] = e
-                else:
-                    # Streaming failed before any tokens reached consumers.
-                    # Safe to fall back to the standard non-streaming path.
-                    logger.info("Streaming failed before delivery, falling back to non-streaming: %s", e)
+                for _stream_attempt in range(_max_stream_retries + 1):
                     try:
-                        result["response"] = self._interruptible_api_call(api_kwargs)
-                    except Exception as fallback_err:
-                        result["error"] = fallback_err
+                        if self.api_mode == "anthropic_messages":
+                            self._try_refresh_anthropic_client_credentials()
+                            result["response"] = _call_anthropic()
+                        else:
+                            result["response"] = _call_chat_completions()
+                        return  # success
+                    except Exception as e:
+                        if deltas_were_sent["yes"]:
+                            # Streaming failed AFTER some tokens were already
+                            # delivered.  Don't retry or fall back — partial
+                            # content already reached the user.
+                            logger.warning(
+                                "Streaming failed after partial delivery, not retrying: %s", e
+                            )
+                            result["error"] = e
+                            return
+
+                        _is_timeout = isinstance(
+                            e, (_httpx.ReadTimeout, _httpx.ConnectTimeout, _httpx.PoolTimeout)
+                        )
+                        _is_conn_err = isinstance(
+                            e, (_httpx.ConnectError, _httpx.RemoteProtocolError, ConnectionError)
+                        )
+
+                        if _is_timeout or _is_conn_err:
+                            # Transient network / timeout error.  Retry the
+                            # streaming request with a fresh connection rather
+                            # than falling back to non-streaming (which would
+                            # hang for up to 15 min on the same dead server).
+                            if _stream_attempt < _max_stream_retries:
+                                logger.info(
+                                    "Streaming attempt %s/%s failed (%s: %s), "
+                                    "retrying with fresh connection...",
+                                    _stream_attempt + 1,
+                                    _max_stream_retries + 1,
+                                    type(e).__name__,
+                                    e,
+                                )
+                                # Close the stale request client before retry
+                                stale = request_client_holder.get("client")
+                                if stale is not None:
+                                    self._close_request_openai_client(
+                                        stale, reason="stream_retry_cleanup"
+                                    )
+                                    request_client_holder["client"] = None
+                                continue
+                            # Exhausted retries — propagate to outer loop
+                            logger.warning(
+                                "Streaming exhausted %s retries on transient error: %s",
+                                _max_stream_retries + 1,
+                                e,
+                            )
+                            result["error"] = e
+                            return
+
+                        # Non-transient error (e.g. "streaming not supported",
+                        # auth error, 4xx).  Fall back to non-streaming once.
+                        err_msg = str(e).lower()
+                        if "stream" in err_msg and "not supported" in err_msg:
+                            logger.info(
+                                "Streaming not supported, falling back to non-streaming: %s", e
+                            )
+                            try:
+                                result["response"] = self._interruptible_api_call(api_kwargs)
+                            except Exception as fallback_err:
+                                result["error"] = fallback_err
+                            return
+
+                        # Unknown error — propagate to outer retry loop
+                        result["error"] = e
+                        return
             finally:
                 request_client = request_client_holder.get("client")
                 if request_client is not None:
diff --git a/tests/tools/test_skills_hub.py b/tests/tools/test_skills_hub.py
index c74fa2d88..778a77ba1 100644
--- a/tests/tools/test_skills_hub.py
+++ b/tests/tools/test_skills_hub.py
@@ -305,6 +305,154 @@ class TestSkillsShSource:
         assert bundle.files["SKILL.md"] == "# react"
         assert mock_get.called
 
+    @patch("tools.skills_hub._write_index_cache")
+    @patch("tools.skills_hub._read_index_cache", return_value=None)
+    @patch("tools.skills_hub.httpx.get")
+    @patch.object(GitHubSource, "fetch")
+    def test_fetch_falls_back_to_tree_search_for_deeply_nested_skills(
+        self, mock_fetch, mock_get, _mock_read_cache, _mock_write_cache,
+    ):
+        """Skills in deeply nested dirs (e.g. cli-tool/components/skills/dev/my-skill/)
+        are found via the GitHub Trees API when candidate paths and shallow scan fail."""
+        tree_entries = [
+            {"path": "README.md", "type": "blob"},
+            {"path": "cli-tool/components/skills/development/my-skill/SKILL.md", "type": "blob"},
+            {"path": "cli-tool/components/skills/development/other-skill/SKILL.md", "type": "blob"},
+        ]
+
+        def _httpx_get_side_effect(url, **kwargs):
+            resp = MagicMock()
+            if "/api/search" in url:
+                resp.status_code = 404
+                return resp
+            if url.endswith("/contents/"):
+                # Root listing for shallow scan — return empty so it falls through
+                resp.status_code = 200
+                resp.json = lambda: []
+                return resp
+            if "/contents/" in url:
+                # All contents API calls fail (candidate paths miss)
+                resp.status_code = 404
+                return resp
+            if url.endswith("owner/repo"):
+                # Repo info → default branch
+                resp.status_code = 200
+                resp.json = lambda: {"default_branch": "main"}
+                return resp
+            if "/git/trees/main" in url:
+                resp.status_code = 200
+                resp.json = lambda: {"tree": tree_entries}
+                return resp
+            # skills.sh detail page
+            resp.status_code = 200
+            resp.text = "<h1>my-skill</h1>"
+            return resp
+
+        mock_get.side_effect = _httpx_get_side_effect
+
+        resolved_bundle = SkillBundle(
+            name="my-skill",
+            files={"SKILL.md": "# My Skill"},
+            source="github",
+            identifier="owner/repo/cli-tool/components/skills/development/my-skill",
+            trust_level="community",
+        )
+        mock_fetch.side_effect = lambda ident: resolved_bundle if "cli-tool/components" in ident else None
+
+        bundle = self._source().fetch("skills-sh/owner/repo/my-skill")
+
+        assert bundle is not None
+        assert bundle.source == "skills.sh"
+        assert bundle.files["SKILL.md"] == "# My Skill"
+        # Verify the tree-resolved identifier was used for the final GitHub fetch
+        mock_fetch.assert_any_call("owner/repo/cli-tool/components/skills/development/my-skill")
+
+
+class TestFindSkillInRepoTree:
+    """Tests for GitHubSource._find_skill_in_repo_tree."""
+
+    def _source(self):
+        auth = MagicMock(spec=GitHubAuth)
+        auth.get_headers.return_value = {"Accept": "application/vnd.github.v3+json"}
+        return GitHubSource(auth=auth)
+
+    @patch("tools.skills_hub.httpx.get")
+    def test_finds_deeply_nested_skill(self, mock_get):
+        tree_entries = [
+            {"path": "README.md", "type": "blob"},
+            {"path": "cli-tool/components/skills/development/senior-backend/SKILL.md", "type": "blob"},
+            {"path": "cli-tool/components/skills/development/other/SKILL.md", "type": "blob"},
+        ]
+
+        def _side_effect(url, **kwargs):
+            resp = MagicMock()
+            if url.endswith("/davila7/claude-code-templates"):
+                resp.status_code = 200
+                resp.json = lambda: {"default_branch": "main"}
+            elif "/git/trees/main" in url:
+                resp.status_code = 200
+                resp.json = lambda: {"tree": tree_entries}
+            else:
+                resp.status_code = 404
+            return resp
+
+        mock_get.side_effect = _side_effect
+
+        result = self._source()._find_skill_in_repo_tree("davila7/claude-code-templates", "senior-backend")
+        assert result == "davila7/claude-code-templates/cli-tool/components/skills/development/senior-backend"
+
+    @patch("tools.skills_hub.httpx.get")
+    def test_finds_root_level_skill(self, mock_get):
+        tree_entries = [
+            {"path": "my-skill/SKILL.md", "type": "blob"},
+        ]
+
+        def _side_effect(url, **kwargs):
+            resp = MagicMock()
+            if "/contents" not in url and "/git/" not in url:
+                resp.status_code = 200
+                resp.json = lambda: {"default_branch": "main"}
+            elif "/git/trees/main" in url:
+                resp.status_code = 200
+                resp.json = lambda: {"tree": tree_entries}
+            else:
+                resp.status_code = 404
+            return resp
+
+        mock_get.side_effect = _side_effect
+
+        result = self._source()._find_skill_in_repo_tree("owner/repo", "my-skill")
+        assert result == "owner/repo/my-skill"
+
+    @patch("tools.skills_hub.httpx.get")
+    def test_returns_none_when_skill_not_found(self, mock_get):
+        tree_entries = [
+            {"path": "other-skill/SKILL.md", "type": "blob"},
+        ]
+
+        def _side_effect(url, **kwargs):
+            resp = MagicMock()
+            if "/contents" not in url and "/git/" not in url:
+                resp.status_code = 200
+                resp.json = lambda: {"default_branch": "main"}
+            elif "/git/trees/main" in url:
+                resp.status_code = 200
+                resp.json = lambda: {"tree": tree_entries}
+            else:
+                resp.status_code = 404
+            return resp
+
+        mock_get.side_effect = _side_effect
+
+        result = self._source()._find_skill_in_repo_tree("owner/repo", "nonexistent")
+        assert result is None
+
+    @patch("tools.skills_hub.httpx.get")
+    def test_returns_none_when_repo_api_fails(self, mock_get):
+        mock_get.return_value = MagicMock(status_code=404)
+        result = self._source()._find_skill_in_repo_tree("owner/repo", "my-skill")
+        assert result is None
+
 
 class TestWellKnownSkillSource:
     def _source(self):
diff --git a/tools/skills_hub.py b/tools/skills_hub.py
index 5f9f10c27..8ae65ed76 100644
--- a/tools/skills_hub.py
+++ b/tools/skills_hub.py
@@ -434,6 +434,56 @@ class GitHubSource(SkillSource):
 
         return files
 
+    def _find_skill_in_repo_tree(self, repo: str, skill_name: str) -> Optional[str]:
+        """Use the GitHub Trees API to find a skill directory anywhere in the repo.
+
+        Returns the full identifier (``repo/path/to/skill``) or ``None``.
+        This is a single API call regardless of repo depth, so it efficiently
+        handles deeply nested directory structures like
+        ``cli-tool/components/skills/development/<skill>/SKILL.md``.
+        """
+        # Get default branch
+        try:
+            resp = httpx.get(
+                f"https://api.github.com/repos/{repo}",
+                headers=self.auth.get_headers(),
+                timeout=15,
+                follow_redirects=True,
+            )
+            if resp.status_code != 200:
+                return None
+            default_branch = resp.json().get("default_branch", "main")
+        except (httpx.HTTPError, json.JSONDecodeError):
+            return None
+
+        # Get recursive tree (single API call for the entire repo)
+        try:
+            resp = httpx.get(
+                f"https://api.github.com/repos/{repo}/git/trees/{default_branch}",
+                params={"recursive": "1"},
+                headers=self.auth.get_headers(),
+                timeout=30,
+                follow_redirects=True,
+            )
+            if resp.status_code != 200:
+                return None
+            tree_data = resp.json()
+        except (httpx.HTTPError, json.JSONDecodeError):
+            return None
+
+        # Look for SKILL.md files inside directories named <skill_name>
+        skill_md_suffix = f"/{skill_name}/SKILL.md"
+        for entry in tree_data.get("tree", []):
+            if entry.get("type") != "blob":
+                continue
+            path = entry.get("path", "")
+            if path.endswith(skill_md_suffix) or path == f"{skill_name}/SKILL.md":
+                # Strip /SKILL.md to get the skill directory path
+                skill_dir = path[: -len("/SKILL.md")]
+                return f"{repo}/{skill_dir}"
+
+        return None
+
     def _fetch_file_content(self, repo: str, path: str) -> Optional[str]:
         """Fetch a single file's content from GitHub."""
         url = f"https://api.github.com/repos/{repo}/contents/{path}"
@@ -1014,6 +1064,14 @@ class SkillsShSource(SkillSource):
         except Exception:
             pass
 
+        # Final fallback: use the GitHub Trees API to find the skill anywhere
+        # in the repo tree.  This handles deeply nested structures like
+        # cli-tool/components/skills/development/<skill>/ that the shallow
+        # scan above can't reach.
+        tree_result = self.github._find_skill_in_repo_tree(repo, skill_token)
+        if tree_result:
+            return tree_result
+
         return None
 
     def _finalize_inspect_meta(self, meta: SkillMeta, canonical: str, detail: Optional[dict]) -> SkillMeta: