mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-07 08:02:23 +00:00
chore(web): remove web_crawl tool + provider crawl plumbing (#33824)
The web_crawl_tool() function was an orphan — no model schema registered it, no skill or CLI command called it, and the agent had no way to invoke it. PR #32608 proposed wiring it up as a model-callable tool; we've decided not to expose crawl as a separate capability since web_search + web_extract cover the use cases we want models to have. Removed: - tools/web_tools.py: web_crawl_tool() (~230 LOC) - plugins/web/firecrawl/provider.py: supports_crawl() + crawl() - plugins/web/tavily/provider.py: supports_crawl() + crawl() - plugins/web/xai/provider.py: supports_crawl() override - agent/web_search_provider.py: supports_crawl() + crawl() ABC methods - agent/web_search_registry.py: get_active_crawl_provider() + the 'crawl' branch in _resolve() - agent/display.py: web_crawl tool-progress rendering - hermes_cli/config.py: 'web_crawl' from TAVILY_API_KEY.tools - tools/website_policy.py: stale comment reference - Tests: removed TestWebCrawlTavily class, the two website-policy web_crawl tests, the searxng/ddgs/brave-free crawl-error tests, the integration test_web_crawl method, and the test_unconfigured_crawl_emits_top_level_error test. Trimmed the capability-flag parametrize list and the WebSearchProvider ABC conformance tests. - Docs: trimmed the Crawl column from capability tables in both EN and zh-Hans, updated the developer-guide ABC table. Net: 25 files, +115/-1067. Closes #33762 (the schema-text bug only existed if #32608 landed). Supersedes #32608.
This commit is contained in:
parent
b243afb68b
commit
5e1f793430
25 changed files with 115 additions and 1067 deletions
|
|
@ -90,20 +90,17 @@ class TestBundledPluginsRegister:
|
|||
]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"plugin_name,expected_search,expected_extract,expected_crawl",
|
||||
"plugin_name,expected_search,expected_extract",
|
||||
[
|
||||
("brave-free", True, False, False),
|
||||
("ddgs", True, False, False),
|
||||
("searxng", True, False, False),
|
||||
("exa", True, True, False),
|
||||
("parallel", True, True, False),
|
||||
("tavily", True, True, True),
|
||||
# firecrawl: search + extract + crawl. Crawl was originally
|
||||
# disabled in the migration (fell through to a legacy inline
|
||||
# path); the follow-up commit enabled it natively.
|
||||
("firecrawl", True, True, True),
|
||||
("brave-free", True, False),
|
||||
("ddgs", True, False),
|
||||
("searxng", True, False),
|
||||
("exa", True, True),
|
||||
("parallel", True, True),
|
||||
("tavily", True, True),
|
||||
("firecrawl", True, True),
|
||||
# xai: search-only via Grok's agentic web_search tool.
|
||||
("xai", True, False, False),
|
||||
("xai", True, False),
|
||||
],
|
||||
)
|
||||
def test_capability_flags_match_spec(
|
||||
|
|
@ -111,7 +108,6 @@ class TestBundledPluginsRegister:
|
|||
plugin_name: str,
|
||||
expected_search: bool,
|
||||
expected_extract: bool,
|
||||
expected_crawl: bool,
|
||||
) -> None:
|
||||
_ensure_plugins_loaded()
|
||||
from agent.web_search_registry import get_provider
|
||||
|
|
@ -120,7 +116,6 @@ class TestBundledPluginsRegister:
|
|||
assert provider is not None, f"plugin {plugin_name!r} not registered"
|
||||
assert provider.supports_search() is expected_search
|
||||
assert provider.supports_extract() is expected_extract
|
||||
assert provider.supports_crawl() is expected_crawl
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"plugin_name",
|
||||
|
|
@ -457,38 +452,6 @@ class TestErrorResponseShapes:
|
|||
if result: # if anything came back, it should be an error entry
|
||||
assert "error" in result[0]
|
||||
|
||||
def test_tavily_crawl_returns_error_dict_when_unconfigured(self) -> None:
|
||||
_ensure_plugins_loaded()
|
||||
from agent.web_search_registry import get_provider
|
||||
|
||||
p = get_provider("tavily")
|
||||
assert p is not None
|
||||
result = p.crawl("https://example.com")
|
||||
assert isinstance(result, dict)
|
||||
assert "results" in result
|
||||
assert isinstance(result["results"], list)
|
||||
if result["results"]:
|
||||
assert "error" in result["results"][0]
|
||||
|
||||
def test_firecrawl_crawl_returns_error_dict_when_unconfigured(self):
|
||||
"""firecrawl crawl is async (wraps SDK in to_thread); error must be
|
||||
surfaced via the per-page result shape, not raised."""
|
||||
_ensure_plugins_loaded()
|
||||
from agent.web_search_registry import get_provider
|
||||
|
||||
p = get_provider("firecrawl")
|
||||
assert p is not None
|
||||
assert inspect.iscoroutinefunction(p.crawl)
|
||||
result = asyncio.run(p.crawl("https://example.com"))
|
||||
assert isinstance(result, dict)
|
||||
assert "results" in result
|
||||
assert isinstance(result["results"], list)
|
||||
# Without FIRECRAWL_API_KEY, the plugin's _get_firecrawl_client()
|
||||
# raises ValueError which is caught and returned as a per-page error.
|
||||
assert len(result["results"]) >= 1
|
||||
assert "error" in result["results"][0]
|
||||
assert result["results"][0]["url"] == "https://example.com"
|
||||
|
||||
def test_firecrawl_config_error_points_paid_users_to_nous_subscription(self, monkeypatch):
|
||||
from plugins.web.firecrawl import provider as firecrawl_provider
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue