feat(web_tools): add Brave Search as search backend

Adds Brave Search (https://api.search.brave.com) as an additional
web search backend alongside Exa, Firecrawl, Parallel, and Tavily.

- New _brave_search() function using X-Subscription-Token auth.
- Normalizer maps Brave web.results[] -> standard {web:[{title,url,description,position}]}.
- _get_backend() and _is_backend_available() recognise "brave".
- web_search_tool() dispatches to _brave_search when backend is brave.
- web_extract_tool returns a clear error when Brave is configured
  (Brave has no extract API); web_crawl_tool falls through to the
  existing Firecrawl check_firecrawl_api_key guard.
- check_web_api_key() and the CLI status printout include brave.

Motivation: Brave Search ships a generous free tier and is already
widely used in other LLM tooling. Adding it lets users with only
BRAVE_API_KEY set run web_search without needing to provision a
second provider for search.
This commit is contained in:
Tipiweb 2026-04-19 12:33:45 +02:00
parent 4b6ff0eb7f
commit 5aee5e0970

View file

@ -13,6 +13,7 @@ Available tools:
- web_crawl_tool: Crawl websites with specific instructions
Backend compatibility:
- Brave: https://api.search.brave.com (search only; extract/crawl fall back to Firecrawl)
- Exa: https://exa.ai (search, extract)
- Firecrawl: https://docs.firecrawl.dev/introduction (search, extract, crawl; direct or derived firecrawl-gateway.<domain> for Nous Subscribers)
- Parallel: https://docs.parallel.ai (search, extract)
@ -88,7 +89,7 @@ def _get_backend() -> str:
keys manually without running setup.
"""
configured = (_load_web_config().get("backend") or "").lower().strip()
if configured in ("parallel", "firecrawl", "tavily", "exa"):
if configured in ("parallel", "firecrawl", "tavily", "exa", "brave"):
return configured
# Fallback for manual / legacy config — pick the highest-priority
@ -99,6 +100,7 @@ def _get_backend() -> str:
("parallel", _has_env("PARALLEL_API_KEY")),
("tavily", _has_env("TAVILY_API_KEY")),
("exa", _has_env("EXA_API_KEY")),
("brave", _has_env("BRAVE_API_KEY")),
)
for backend, available in backend_candidates:
if available:
@ -117,6 +119,8 @@ def _is_backend_available(backend: str) -> bool:
return check_firecrawl_api_key()
if backend == "tavily":
return _has_env("TAVILY_API_KEY")
if backend == "brave":
return _has_env("BRAVE_API_KEY")
return False
# ─── Firecrawl Client ────────────────────────────────────────────────────────
@ -362,6 +366,57 @@ def _normalize_tavily_documents(response: dict, fallback_url: str = "") -> List[
return documents
# ─── Brave Search Client ─────────────────────────────────────────────────────
_BRAVE_BASE_URL = "https://api.search.brave.com/res/v1"
def _brave_search(query: str, limit: int = 5) -> dict:
"""Call the Brave Search API and return results in the standard format.
Brave exposes search only (no extract/crawl), so ``web_extract_tool`` and
``web_crawl_tool`` fall back to Firecrawl for Brave users.
Auth is via the ``X-Subscription-Token`` header.
"""
api_key = os.getenv("BRAVE_API_KEY")
if not api_key:
raise ValueError(
"BRAVE_API_KEY environment variable not set. "
"Get your API key at https://api-dashboard.search.brave.com/"
)
url = f"{_BRAVE_BASE_URL}/web/search"
headers = {
"X-Subscription-Token": api_key,
"Accept": "application/json",
}
params = {
"q": query,
"count": max(1, min(limit, 20)),
}
logger.info("Brave search: '%s' (limit: %d)", query, params["count"])
response = httpx.get(url, headers=headers, params=params, timeout=60)
response.raise_for_status()
return _normalize_brave_search_results(response.json())
def _normalize_brave_search_results(response: dict) -> dict:
"""Normalize Brave /web/search response to the standard web search format.
Brave returns ``{web: {results: [{title, url, description, ...}]}}``.
We map to ``{success, data: {web: [{title, url, description, position}]}}``.
"""
raw_results = (response.get("web") or {}).get("results") or []
web_results = []
for i, result in enumerate(raw_results):
web_results.append({
"title": result.get("title", ""),
"url": result.get("url", ""),
"description": result.get("description", ""),
"position": i + 1,
})
return {"success": True, "data": {"web": web_results}}
def _to_plain_object(value: Any) -> Any:
"""Convert SDK objects to plain python data structures when possible."""
if value is None:
@ -1102,6 +1157,15 @@ def web_search_tool(query: str, limit: int = 5) -> str:
_debug.save()
return result_json
if backend == "brave":
response_data = _brave_search(query, limit)
debug_call_data["results_count"] = len(response_data.get("data", {}).get("web", []))
result_json = json.dumps(response_data, indent=2, ensure_ascii=False)
debug_call_data["final_response_size"] = len(result_json)
_debug.log_call("web_search_tool", debug_call_data)
_debug.save()
return result_json
if backend == "tavily":
logger.info("Tavily search: '%s' (limit: %d)", query, limit)
raw = _tavily_request("search", {
@ -1252,6 +1316,14 @@ async def web_extract_tool(
"include_images": False,
})
results = _normalize_tavily_documents(raw, fallback_url=safe_urls[0] if safe_urls else "")
elif backend == "brave":
# Brave has no extract API — tell the caller to switch
# backends or configure Firecrawl for extraction.
return tool_error(
"Brave backend supports web_search only. Configure FIRECRAWL_API_KEY "
"or set web.backend to firecrawl/tavily/exa/parallel for extract.",
success=False,
)
else:
# ── Firecrawl extraction ──
# Determine requested formats for Firecrawl v2
@ -1922,9 +1994,9 @@ def check_firecrawl_api_key() -> bool:
def check_web_api_key() -> bool:
"""Check whether the configured web backend is available."""
configured = _load_web_config().get("backend", "").lower().strip()
if configured in ("exa", "parallel", "firecrawl", "tavily"):
if configured in ("exa", "parallel", "firecrawl", "tavily", "brave"):
return _is_backend_available(configured)
return any(_is_backend_available(backend) for backend in ("exa", "parallel", "firecrawl", "tavily"))
return any(_is_backend_available(backend) for backend in ("exa", "parallel", "firecrawl", "tavily", "brave"))
def check_auxiliary_model() -> bool:
@ -1959,6 +2031,8 @@ if __name__ == "__main__":
print(" Using Parallel API (https://parallel.ai)")
elif backend == "tavily":
print(" Using Tavily API (https://tavily.com)")
elif backend == "brave":
print(" Using Brave Search API (https://api.search.brave.com)")
else:
if firecrawl_url_available:
print(f" Using self-hosted Firecrawl: {os.getenv('FIRECRAWL_API_URL').strip().rstrip('/')}")
@ -1971,7 +2045,7 @@ if __name__ == "__main__":
else:
print("❌ No web search backend configured")
print(
"Set EXA_API_KEY, PARALLEL_API_KEY, TAVILY_API_KEY, FIRECRAWL_API_KEY, FIRECRAWL_API_URL"
"Set EXA_API_KEY, PARALLEL_API_KEY, TAVILY_API_KEY, BRAVE_API_KEY, FIRECRAWL_API_KEY, FIRECRAWL_API_URL"
f"{_firecrawl_backend_help_suffix()}"
)