"""xAI Web Search — plugin form. Routes ``web_search`` tool calls through xAI's agentic Web Search tool (server-side ``web_search`` on the Responses API). Grok runs the actual searching and page-browsing server-side; we ask it to return the top results as structured JSON so we can hand back the same ``{title, url, description, position}`` rows every other Hermes web provider produces. Reference: https://docs.x.ai/developers/tools/web-search Config keys this provider responds to:: web: search_backend: "xai" # explicit per-capability backend: "xai" # shared fallback Optional knobs (under ``web.xai`` in ``config.yaml``):: web: xai: model: "grok-4.3" # reasoning model required by web_search allowed_domains: ["x.ai"] # max 5 — mutually exclusive with excluded_domains excluded_domains: ["bad.com"] # max 5 — mutually exclusive with allowed_domains timeout: 90 # seconds (default 90) Auth: reuses :func:`tools.xai_http.resolve_xai_http_credentials`, which prefers Hermes-managed xAI Grok OAuth (via ``hermes auth``) and falls back to ``XAI_API_KEY`` (resolved through ``~/.hermes/.env``, then ``os.environ``). """ from __future__ import annotations import json import logging import re from typing import Any, Dict, List, Optional from agent.web_search_provider import WebSearchProvider from tools.xai_http import ( has_xai_credentials, hermes_xai_user_agent, resolve_xai_http_credentials, ) logger = logging.getLogger(__name__) DEFAULT_MODEL = "grok-4.3" DEFAULT_TIMEOUT = 90 _MAX_DOMAIN_FILTERS = 5 # xAI hard cap on allowed_domains / excluded_domains # Match the JSON object Grok is asked to emit. Tolerates leading/trailing # prose since reasoning models occasionally narrate before the JSON block # even when explicitly asked not to. _JSON_BLOCK_RE = re.compile(r"\{[\s\S]*\}", re.MULTILINE) # --------------------------------------------------------------------------- # Config # --------------------------------------------------------------------------- def _load_xai_web_config() -> Dict[str, Any]: """Read ``web.xai`` from config.yaml (returns {} on miss).""" try: from hermes_cli.config import load_config cfg = load_config() web_section = cfg.get("web") if isinstance(cfg, dict) else None xai_section = web_section.get("xai") if isinstance(web_section, dict) else None return xai_section if isinstance(xai_section, dict) else {} except Exception as exc: # noqa: BLE001 logger.debug("Could not load web.xai config: %s", exc) return {} def _coerce_domain_list(value: Any) -> List[str]: """Coerce a config value to a clean list of <=5 domain strings.""" if not isinstance(value, list): return [] cleaned: List[str] = [] for item in value: if isinstance(item, str) and item.strip(): cleaned.append(item.strip()) if len(cleaned) >= _MAX_DOMAIN_FILTERS: break return cleaned # --------------------------------------------------------------------------- # Provider # --------------------------------------------------------------------------- class XAIWebSearchProvider(WebSearchProvider): """Search-only provider backed by xAI's agentic Web Search tool. Sends a structured prompt to Grok with ``tools=[{"type": "web_search"}]`` enabled and asks it to return the top *limit* results as JSON. Falls back to the Responses API ``citations`` list if Grok ignores the JSON schema instruction (rare for grok-4.3 but cheap insurance). No extract capability — pair with Firecrawl / Tavily / Exa for ``web_extract`` if you need page content. Trust model ----------- Unlike index-backed providers (Brave / Tavily / Exa) which return verbatim search-engine results, this backend is an LLM in a trench coat: Grok decides which URLs to surface, generates the titles and descriptions itself, and is influenced by the *content of the query*. A maliciously crafted query (e.g. injected via untrusted upstream input the agent picked up) can in principle steer Grok into emitting attacker-chosen URLs. Callers that pipe untrusted text directly into ``web_search`` should treat returned URLs the same way they would treat any model-generated link — validate before fetching. """ @property def name(self) -> str: return "xai" @property def display_name(self) -> str: return "xAI Web Search (Grok)" def is_available(self) -> bool: """Cheap availability probe — env var OR auth-store has OAuth tokens. Delegates to :func:`tools.xai_http.has_xai_credentials`, which is deliberately *not* the same as :func:`resolve_xai_http_credentials`: it never triggers OAuth token refresh or acquires the auth-store lock. The ABC contract requires this method to be safe to call on every ``hermes tools`` repaint and at tool-registration time. Token freshness / refresh is handled inside :meth:`search`. """ return has_xai_credentials() def supports_search(self) -> bool: return True def supports_extract(self) -> bool: return False def supports_crawl(self) -> bool: return False # -- Search ----------------------------------------------------------- def search(self, query: str, limit: int = 5) -> Dict[str, Any]: """Execute a Grok-backed web search. Returns ``{"success": True, "data": {"web": [{title, url, description, position}, ...]}}`` on success, ``{"success": False, "error": str}`` on failure. """ try: from tools.interrupt import is_interrupted if is_interrupted(): return {"success": False, "error": "Interrupted"} except Exception: # noqa: BLE001 — interrupt module is best-effort pass creds = resolve_xai_http_credentials() api_key = str(creds.get("api_key") or "").strip() base_url = str(creds.get("base_url") or "https://api.x.ai/v1").strip().rstrip("/") if not api_key: return { "success": False, "error": ( "No xAI credentials found. Run `hermes auth` to sign in with " "xAI Grok OAuth, or set XAI_API_KEY." ), } # Clamp limit to the same range the caller (web_search_tool) accepts, # so we don't silently downgrade explicit limits. Grok happily # produces longer lists; cost scales linearly with the requested # count via reasoning tokens, but that's the caller's call to make. try: limit = int(limit) except (TypeError, ValueError): limit = 5 limit = max(1, min(limit, 100)) cfg = _load_xai_web_config() model = cfg.get("model") if isinstance(cfg.get("model"), str) else DEFAULT_MODEL model = model.strip() or DEFAULT_MODEL try: timeout = float(cfg.get("timeout", DEFAULT_TIMEOUT)) except (TypeError, ValueError): timeout = DEFAULT_TIMEOUT allowed = _coerce_domain_list(cfg.get("allowed_domains")) excluded = _coerce_domain_list(cfg.get("excluded_domains")) if allowed and excluded: # xAI explicitly rejects this combo — surface a clear error # rather than a 400 from the API. return { "success": False, "error": ( "web.xai.allowed_domains and web.xai.excluded_domains " "cannot both be set (xAI restriction)." ), } web_search_tool: Dict[str, Any] = {"type": "web_search"} if allowed: web_search_tool["filters"] = {"allowed_domains": allowed} elif excluded: web_search_tool["filters"] = {"excluded_domains": excluded} prompt = self._build_prompt(query, limit) payload: Dict[str, Any] = { "model": model, "input": [{"role": "user", "content": prompt}], "tools": [web_search_tool], # Drop inline citation markdown — we want the JSON block clean, # and we read URLs from annotations / citations separately. "include": ["no_inline_citations"], } headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", "User-Agent": hermes_xai_user_agent(), } try: import httpx except ImportError: return { "success": False, "error": "httpx is not installed (required for xAI web search)", } logger.info( "xAI web search via %s: '%s' (limit=%d, model=%s)", base_url, query, limit, model, ) # Two-attempt loop: if the first call returns 401 and our creds came # from the OAuth path, force-refresh the token once and retry. This # closes two gaps the proactive resolver check doesn't cover: # (1) opaque (non-JWT) access tokens — `_xai_access_token_is_expiring` # can't decode them and returns False, so refresh never fires # until the server hands us a 401. # (2) mid-window revocation — admin revoke, refresh-token rotation, # or clock skew can produce 401s on a token whose JWT `exp` claim # is still in the future. # Env-var (`XAI_API_KEY`) credentials skip the retry entirely — we # can't refresh those and an immediate retry would just burn quota. is_oauth_path = (creds.get("provider") == "xai-oauth") resp = None for attempt in range(2): try: resp = httpx.post( f"{base_url}/responses", headers=headers, json=payload, timeout=timeout, ) resp.raise_for_status() break except httpx.HTTPStatusError as exc: status = exc.response.status_code if exc.response is not None else 0 if status == 401 and attempt == 0 and is_oauth_path: logger.info( "xAI web search got 401 on first attempt; forcing OAuth " "refresh and retrying once.", ) try: refreshed = resolve_xai_http_credentials(force_refresh=True) refreshed_key = str(refreshed.get("api_key") or "").strip() if refreshed_key and refreshed_key != api_key: api_key = refreshed_key headers["Authorization"] = f"Bearer {api_key}" continue # Refresh returned the same (or empty) token — no point # in retrying. Fall through to the error return below. except Exception as refresh_exc: # noqa: BLE001 logger.warning( "xAI web search OAuth refresh after 401 failed: %s", refresh_exc, ) body = "" try: body = exc.response.text[:300] if exc.response is not None else "" except Exception: body = "" logger.warning("xAI web search HTTP %d: %s", status, body) return { "success": False, "error": f"xAI web search returned HTTP {status}: {body}".rstrip(), } except httpx.RequestError as exc: logger.warning("xAI web search request error: %s", exc) return {"success": False, "error": f"Could not reach xAI: {exc}"} if resp is None: # Defensive — both attempts somehow exited the loop without resp. return {"success": False, "error": "xAI web search produced no response"} try: data = resp.json() except Exception as exc: # noqa: BLE001 logger.warning("xAI web search bad JSON: %s", exc) return { "success": False, "error": "Could not parse xAI Responses API reply as JSON", } # xAI's Responses surface sometimes returns HTTP 200 with an error # envelope (model overloaded, content-policy refusal, etc.). Without # this check, ``_extract_results`` would silently produce an empty # list and we'd report success-with-no-rows — masking a real failure # the agent should see and decide whether to retry. api_error = data.get("error") if isinstance(data, dict) else None if isinstance(api_error, dict): err_msg = ( api_error.get("message") or api_error.get("code") or "unknown error" ) logger.warning("xAI web search returned error envelope: %s", err_msg) return {"success": False, "error": f"xAI returned an error: {err_msg}"} web_results = self._extract_results(data, limit=limit) if not web_results: # Successful call, just no usable rows — return success with an # empty list so the model can decide whether to retry. Matches # what brave-free / exa do when the upstream API returns 0 hits. return {"success": True, "data": {"web": []}} return {"success": True, "data": {"web": web_results}} # -- Prompt + parsing ------------------------------------------------- @staticmethod def _build_prompt(query: str, limit: int) -> str: """Compose the prompt that asks Grok to act as a search engine. We deliberately ask for a JSON object (not bare array) so we can match it cheaply with ``_JSON_BLOCK_RE``; we explicitly forbid prose, markdown fences, and inline-citation links to keep the payload parseable. """ return ( "Use the web_search tool to find current information for the query below, " "then respond with ONLY a single JSON object — no prose, no markdown " "fences, no inline citation links — matching this exact schema:\n\n" '{"results": [{"title": "string", "url": "string", ' '"description": "1-2 sentence summary"}]}\n\n' f'Return at most {limit} results, ordered by relevance, with absolute ' "https:// URLs. If no usable results exist, return " '{"results": []}.\n\n' f"Query: {query}" ) @classmethod def _extract_results( cls, response_data: Dict[str, Any], *, limit: int, ) -> List[Dict[str, Any]]: """Pull a ``[{title, url, description, position}, ...]`` list out of a Responses-API reply. Strategy: 1. Walk ``output[*].content[*].text`` for ``output_text`` blocks and try to parse the first JSON object that has a ``results`` list. 2. If the JSON path fails, fall back to the message annotations (``url_citation`` entries) — every annotation carries a URL and a ``title`` (citation number); we pair those URLs with surrounding text from the message body as a best-effort description. """ text_blocks, annotations = cls._collect_output_text(response_data) # Primary path: parse the JSON object Grok was asked for. for block in text_blocks: parsed = cls._try_parse_json_results(block, limit=limit) if parsed: return parsed # Secondary path: derive results from message annotations + raw text. # Only short-circuit when annotations actually yielded usable rows; # otherwise fall through to the citations list. (xAI currently only # emits ``url_citation`` annotations, but future annotation types # would silently produce an empty result set if we returned here # unconditionally — masking real data in ``citations``.) if annotations: joined_text = "\n".join(text_blocks) annotation_results = cls._results_from_annotations( annotations, joined_text, limit=limit, ) if annotation_results: return annotation_results # Last-ditch: raw citations list (no titles or descriptions). citations = response_data.get("citations") or [] if isinstance(citations, list): return [ { "title": "", "url": str(u), "description": "", "position": i + 1, } for i, u in enumerate(citations[:limit]) if isinstance(u, str) and u.strip() ] return [] @staticmethod def _collect_output_text( response_data: Dict[str, Any], ) -> tuple[List[str], List[Dict[str, Any]]]: """Return (text_blocks, annotations) extracted from ``response.output``.""" text_blocks: List[str] = [] annotations: List[Dict[str, Any]] = [] output = response_data.get("output") if not isinstance(output, list): return text_blocks, annotations for item in output: if not isinstance(item, dict) or item.get("type") != "message": continue content = item.get("content") if not isinstance(content, list): continue for chunk in content: if not isinstance(chunk, dict) or chunk.get("type") != "output_text": continue text = chunk.get("text") if isinstance(text, str) and text.strip(): text_blocks.append(text) chunk_annotations = chunk.get("annotations") if isinstance(chunk_annotations, list): for ann in chunk_annotations: if isinstance(ann, dict): annotations.append(ann) return text_blocks, annotations @staticmethod def _try_parse_json_results( text: str, *, limit: int, ) -> Optional[List[Dict[str, Any]]]: """Parse a JSON object with a ``results`` array out of ``text``. Returns the normalized result list on success, ``None`` when the block has no valid JSON object or no ``results`` key. Tolerates leading/trailing prose because reasoning models sometimes prefix a short narration even when told not to. """ # Try the whole string first — cheapest path when Grok obeys. candidates = [text] match = _JSON_BLOCK_RE.search(text) if match and match.group(0) != text: candidates.append(match.group(0)) for candidate in candidates: try: parsed = json.loads(candidate) except (json.JSONDecodeError, ValueError): continue if not isinstance(parsed, dict): continue results = parsed.get("results") if not isinstance(results, list): continue normalized: List[Dict[str, Any]] = [] for row in results[:limit]: if not isinstance(row, dict): continue url = str(row.get("url", "")).strip() if not url: continue normalized.append( { "title": str(row.get("title", "")).strip(), "url": url, "description": str(row.get("description", "")).strip(), # Renumber from the kept results, not the raw input # index, so a dropped malformed row doesn't leave a # gap in the positions handed back to the agent. "position": len(normalized) + 1, } ) if normalized: return normalized return None @staticmethod def _results_from_annotations( annotations: List[Dict[str, Any]], joined_text: str, *, limit: int, ) -> List[Dict[str, Any]]: """Best-effort fallback when JSON parsing fails. Uses each ``url_citation`` annotation's ``url`` (the citation title is just the integer label, so we don't surface it) and slices ~200 characters of surrounding text as the description. """ seen: set[str] = set() results: List[Dict[str, Any]] = [] for ann in annotations: if ann.get("type") != "url_citation": continue url = str(ann.get("url", "")).strip() if not url or url in seen: continue seen.add(url) description = "" start = ann.get("start_index") end = ann.get("end_index") if isinstance(start, int) and isinstance(end, int) and 0 <= start < end <= len(joined_text): window_start = max(0, start - 200) description = joined_text[window_start:start].strip() if len(description) > 200: description = description[-200:].strip() results.append( { "title": "", "url": url, "description": description, "position": len(results) + 1, } ) if len(results) >= limit: break return results # -- Setup picker ----------------------------------------------------- def get_setup_schema(self) -> Dict[str, Any]: # Auth resolution is delegated to the shared ``xai_grok`` post_setup # hook (same one image_gen.xai and tts.xai use) so users see the # familiar OAuth-or-API-key prompt for every xAI service. return { "name": "xAI Web Search (Grok)", "badge": "paid", "tag": ( "Agentic web search via Grok's web_search tool — uses xAI " "Grok OAuth or XAI_API_KEY." ), "env_vars": [], "post_setup": "xai_grok", }