refactor(tools): drop hardcoded web picker rows + skiplist; plugins are sole source

Removes the seven hardcoded TOOL_CATEGORIES["web"] provider rows that
duplicated the plugin-registered providers, and deletes the
_WEB_PLUGIN_SKIPLIST that existed to prevent duplicate picker rows
during the migration. The Web Search & Extract category now derives its
provider rows entirely from agent.web_search_registry via
_plugin_web_search_providers(), matching how Spotify, Google Meet, and
the image_gen plugins are surfaced.

Removed (deduplicated against plugin schemas):
  - Firecrawl Cloud         → plugins.web.firecrawl
  - Exa                     → plugins.web.exa
  - Parallel                → plugins.web.parallel
  - Tavily                  → plugins.web.tavily
  - SearXNG                 → plugins.web.searxng
  - Brave Search (Free Tier) → plugins.web.brave_free
  - DuckDuckGo (ddgs)       → plugins.web.ddgs (post_setup hook preserved)

Retained in TOOL_CATEGORIES["web"]:
  - Nous Subscription   — requires requires_nous_auth +
                          managed_nous_feature + override_env_vars
                          to drive the managed-gateway UX. Not a
                          provider — a different *setup flow* for the
                          firecrawl backend.
  - Firecrawl Self-Hosted — points firecrawl at a private Docker URL
                            via FIRECRAWL_API_URL only. Same reason:
                            UX setup-flow row, not a provider.

These two rows describe alternative auth/billing paths for the
firecrawl backend; they intentionally share web_backend="firecrawl"
with the plugin row but light up different env-var prompts.

Plugin schema extensions
------------------------
- ddgs plugin's get_setup_schema() now emits `post_setup: "ddgs"` so
  selection still triggers the pip-install hook in _run_post_setup().
- _plugin_web_search_providers() passes `post_setup` through verbatim
  when present in the schema (other future plugins like camofox / a
  hypothetical playwright-web plugin can opt in the same way).
- Picker rows now carry both `web_backend` (legacy field consumed by
  setup + selection helpers) and `web_search_plugin_name`
  (informational marker), so behavior is identical between hardcoded
  and plugin-registered rows.

Net diff
--------
- hermes_cli/tools_config.py: -141/+50 lines (~91 lines net)
- plugins/web/ddgs/provider.py: +7/-4 (post_setup field + badge polish)

Verified
--------
- Compile-clean for both files
- Picker shows: 2 hardcoded rows (Nous Subscription, Firecrawl
  Self-Hosted) + 7 plugin rows (alphabetically: Brave Search,
  DuckDuckGo, Exa, Firecrawl, Parallel, SearXNG, Tavily). DuckDuckGo
  row carries post_setup="ddgs" for first-time install.
- 173 web-specific tests still pass.
This commit is contained in:
kshitijk4poor 2026-05-14 00:53:44 +05:30 committed by Teknium
parent 748f3e016b
commit 24fe60faa2
2 changed files with 50 additions and 98 deletions

View file

@ -245,6 +245,15 @@ TOOL_CATEGORIES = {
"setup_title": "Select Search Provider",
"setup_note": "A free DuckDuckGo search skill is also included — skip this if you don't need a premium provider.",
"icon": "🔍",
# Per-provider rows are injected at runtime from
# plugins.web.<vendor>.provider via _plugin_web_search_providers()
# in _visible_providers(). Only non-provider UX setup-flow rows
# for the firecrawl backend are listed here:
# - "Nous Subscription" — managed Firecrawl billed via Nous
# subscription (requires_nous_auth + override_env_vars).
# - "Firecrawl Self-Hosted" — points firecrawl at a private
# Docker instance via FIRECRAWL_API_URL only.
# See PR #25182 for the migration rationale.
"providers": [
{
"name": "Nous Subscription",
@ -256,42 +265,6 @@ TOOL_CATEGORIES = {
"managed_nous_feature": "web",
"override_env_vars": ["FIRECRAWL_API_KEY", "FIRECRAWL_API_URL"],
},
{
"name": "Firecrawl Cloud",
"badge": "★ recommended",
"tag": "Full-featured search, extract, and crawl",
"web_backend": "firecrawl",
"env_vars": [
{"key": "FIRECRAWL_API_KEY", "prompt": "Firecrawl API key", "url": "https://firecrawl.dev"},
],
},
{
"name": "Exa",
"badge": "paid",
"tag": "Neural search with semantic understanding",
"web_backend": "exa",
"env_vars": [
{"key": "EXA_API_KEY", "prompt": "Exa API key", "url": "https://exa.ai"},
],
},
{
"name": "Parallel",
"badge": "paid",
"tag": "AI-powered search and extract",
"web_backend": "parallel",
"env_vars": [
{"key": "PARALLEL_API_KEY", "prompt": "Parallel API key", "url": "https://parallel.ai"},
],
},
{
"name": "Tavily",
"badge": "free tier",
"tag": "Search, extract, and crawl — 1000 free searches/mo",
"web_backend": "tavily",
"env_vars": [
{"key": "TAVILY_API_KEY", "prompt": "Tavily API key", "url": "https://app.tavily.com/home"},
],
},
{
"name": "Firecrawl Self-Hosted",
"badge": "free · self-hosted",
@ -301,32 +274,6 @@ TOOL_CATEGORIES = {
{"key": "FIRECRAWL_API_URL", "prompt": "Your Firecrawl instance URL (e.g., http://localhost:3002)"},
],
},
{
"name": "SearXNG",
"badge": "free · self-hosted · search only",
"tag": "Privacy-respecting metasearch engine — search only (pair with any extract provider)",
"web_backend": "searxng",
"env_vars": [
{"key": "SEARXNG_URL", "prompt": "Your SearXNG instance URL (e.g., http://localhost:8080)", "url": "https://searxng.github.io/searxng/"},
],
},
{
"name": "Brave Search (Free Tier)",
"badge": "free tier · search only",
"tag": "2,000 queries/mo free — search only (pair with any extract provider)",
"web_backend": "brave-free",
"env_vars": [
{"key": "BRAVE_SEARCH_API_KEY", "prompt": "Brave Search subscription token", "url": "https://brave.com/search/api/"},
],
},
{
"name": "DuckDuckGo (ddgs)",
"badge": "free · no key · search only",
"tag": "Search via the ddgs Python package — no API key (pair with any extract provider)",
"web_backend": "ddgs",
"env_vars": [],
"post_setup": "ddgs",
},
],
},
"image_gen": {
@ -1577,28 +1524,27 @@ def _plugin_video_gen_providers() -> list[dict]:
# Mirror of _plugin_image_gen_providers for web search backends. Surfaces
# plugin-registered web providers (brave-free / ddgs / searxng during the
# spike) so they appear in the "Web Search & Extract" picker row. While
# the legacy TOOL_CATEGORIES entries still cover those names, this helper
# skip-lists them to avoid duplicate rows.
#
# When the migration PR drops the hardcoded entries, the skip-list can be
# removed and this helper becomes the sole source of web-provider picker
# rows (matching how Spotify / Google Meet are surfaced today purely from
# their plugins).
_WEB_PLUGIN_SKIPLIST = frozenset({
"brave-free", "ddgs", "searxng", "exa", "parallel", "tavily", "firecrawl",
})
# every plugin-registered web provider so it appears in the
# "Web Search & Extract" picker. All seven providers (brave-free, ddgs,
# searxng, exa, parallel, tavily, firecrawl) live as plugins after
# PR #25182 — this helper is the sole source of truth for the category's
# provider rows. The hardcoded entries that used to drive the category
# were deleted in the same PR; only the two non-provider UX rows
# ("Nous Subscription" managed-gateway entry, "Firecrawl Self-Hosted")
# remain in TOOL_CATEGORIES because they describe alternative *setup
# flows* for the firecrawl backend rather than distinct providers.
def _plugin_web_search_providers() -> list[dict]:
"""Build picker-row dicts from plugin-registered web search providers.
Each returned dict looks like a regular ``TOOL_CATEGORIES`` provider
row but carries a ``web_search_plugin_name`` marker so downstream
code can route through ``agent.web_search_registry`` instead of the
legacy hardcoded dispatch. Names already covered by hardcoded picker
rows during the spike are skipped via :data:`_WEB_PLUGIN_SKIPLIST`.
Each returned dict is a regular ``TOOL_CATEGORIES`` provider row. It
populates both ``web_backend`` (legacy field consumed by setup +
selection helpers) and ``web_search_plugin_name`` (informational
marker) so the picker behaves identically whether a provider is
hardcoded or plugin-registered.
After PR #25182, all seven web providers (brave-free, ddgs, searxng,
exa, parallel, tavily, firecrawl) are plugins; this helper is the sole
source of provider rows for the Web Search & Extract category.
"""
try:
from agent.web_search_registry import list_providers as _list_web_providers
@ -1612,7 +1558,7 @@ def _plugin_web_search_providers() -> list[dict]:
rows: list[dict] = []
for provider in providers:
name = getattr(provider, "name", None)
if not name or name in _WEB_PLUGIN_SKIPLIST:
if not name:
continue
try:
schema = provider.get_setup_schema()
@ -1620,15 +1566,18 @@ def _plugin_web_search_providers() -> list[dict]:
continue
if not isinstance(schema, dict):
continue
rows.append(
{
"name": schema.get("name", provider.display_name),
"badge": schema.get("badge", ""),
"tag": schema.get("tag", ""),
"env_vars": schema.get("env_vars", []),
"web_search_plugin_name": name,
}
)
row = {
"name": schema.get("name", provider.display_name),
"badge": schema.get("badge", ""),
"tag": schema.get("tag", ""),
"env_vars": schema.get("env_vars", []),
"web_backend": name,
"web_search_plugin_name": name,
}
# Optional pass-through fields the schema can opt into.
if schema.get("post_setup"):
row["post_setup"] = schema["post_setup"]
rows.append(row)
return rows
@ -1653,11 +1602,11 @@ def _visible_providers(cat: dict, config: dict) -> list[dict]:
if cat.get("name") == "Video Generation":
visible.extend(_plugin_video_gen_providers())
# Inject plugin-registered web search backends. During the spike the
# three migrated providers (brave-free, ddgs, searxng) still have
# hardcoded TOOL_CATEGORIES entries — the helper skips them so the
# picker doesn't show duplicates. When the migration PR deletes those
# hardcoded rows, this injection becomes the sole source of truth.
# Inject plugin-registered web search backends. After PR #25182, this
# is the SOLE source of provider rows for the Web Search & Extract
# category — the per-provider hardcoded entries were deleted. The two
# remaining hardcoded rows ("Nous Subscription", "Firecrawl
# Self-Hosted") are non-provider UX setup-flow rows for firecrawl.
if cat.get("name") == "Web Search & Extract":
visible.extend(_plugin_web_search_providers())

View file

@ -95,7 +95,10 @@ class DDGSWebSearchProvider(WebSearchProvider):
def get_setup_schema(self) -> Dict[str, Any]:
return {
"name": "DuckDuckGo (ddgs)",
"badge": "free",
"tag": "No API key — community ddgs package (pip install ddgs).",
"badge": "free · no key · search only",
"tag": "Search via the ddgs Python package — no API key (pair with any extract provider)",
"env_vars": [],
# Trigger `_run_post_setup("ddgs")` after the user picks this row
# so the ddgs Python package gets pip-installed on first selection.
"post_setup": "ddgs",
}