diff --git a/tests/tools/test_x_search_tool.py b/tests/tools/test_x_search_tool.py index 7cbc4841a8a..f0138e9f83d 100644 --- a/tests/tools/test_x_search_tool.py +++ b/tests/tools/test_x_search_tool.py @@ -436,3 +436,290 @@ def test_x_search_registered_in_registry_with_check_fn(): assert entry.check_fn.__name__ == "check_x_search_requirements" assert "XAI_API_KEY" in entry.requires_env assert entry.emoji == "🐦" + + +# --------------------------------------------------------------------------- +# Date validation — fail fast before burning an API call on a window that +# cannot possibly return X posts. xAI itself happily 200s with a fluff +# answer when the range is malformed or pure-future, which is hard for +# callers to distinguish from a real result. +# --------------------------------------------------------------------------- + +def _no_post_allowed(monkeypatch): + """Guard: any test that should fail before HTTP can hit this fence.""" + def _fail(*_, **__): + raise AssertionError("requests.post must not be called — validation should reject first") + + monkeypatch.setattr("requests.post", _fail) + + +def test_x_search_rejects_malformed_from_date(monkeypatch): + from tools.x_search_tool import x_search_tool + + monkeypatch.setenv("XAI_API_KEY", "xai-test-key") + _no_post_allowed(monkeypatch) + + result = json.loads(x_search_tool(query="anything", from_date="not-a-date")) + + assert "from_date must be YYYY-MM-DD" in result["error"] + + +def test_x_search_rejects_malformed_to_date(monkeypatch): + from tools.x_search_tool import x_search_tool + + monkeypatch.setenv("XAI_API_KEY", "xai-test-key") + _no_post_allowed(monkeypatch) + + result = json.loads(x_search_tool(query="anything", to_date="2026/05/01")) + + assert "to_date must be YYYY-MM-DD" in result["error"] + + +def test_x_search_rejects_inverted_date_range(monkeypatch): + from tools.x_search_tool import x_search_tool + + monkeypatch.setenv("XAI_API_KEY", "xai-test-key") + _no_post_allowed(monkeypatch) + + result = json.loads( + x_search_tool( + query="anything", + from_date="2026-05-10", + to_date="2026-05-01", + ) + ) + + assert "from_date (2026-05-10) must be on or before to_date (2026-05-01)" in result["error"] + + +def test_x_search_rejects_future_from_date(monkeypatch): + """``from_date`` in the future can never match any post → reject.""" + import datetime as _dt + + from tools.x_search_tool import x_search_tool + + monkeypatch.setenv("XAI_API_KEY", "xai-test-key") + _no_post_allowed(monkeypatch) + + class _FrozenDateTime(_dt.datetime): + @classmethod + def now(cls, tz=None): + return _dt.datetime(2026, 5, 21, 12, 0, 0, tzinfo=tz or _dt.timezone.utc) + + monkeypatch.setattr("tools.x_search_tool.datetime", _FrozenDateTime) + + result = json.loads(x_search_tool(query="anything", from_date="2030-01-01")) + + assert "from_date (2030-01-01) is in the future" in result["error"] + + +def test_x_search_allows_future_to_date(monkeypatch): + """``to_date`` in the future is fine — caller may want posts as they arrive.""" + import datetime as _dt + + from tools.x_search_tool import x_search_tool + + monkeypatch.setenv("XAI_API_KEY", "xai-test-key") + + class _FrozenDateTime(_dt.datetime): + @classmethod + def now(cls, tz=None): + return _dt.datetime(2026, 5, 21, 12, 0, 0, tzinfo=tz or _dt.timezone.utc) + + monkeypatch.setattr("tools.x_search_tool.datetime", _FrozenDateTime) + + def _fake_post(url, headers=None, json=None, timeout=None): + return _FakeResponse( + {"output_text": "future to_date is allowed", "citations": []} + ) + + monkeypatch.setattr("requests.post", _fake_post) + + result = json.loads( + x_search_tool( + query="anything", + from_date="2026-05-20", + to_date="2030-01-01", + ) + ) + + assert result["success"] is True + assert result["answer"] == "future to_date is allowed" + + +def test_x_search_accepts_today_as_from_date(monkeypatch): + """``from_date == today UTC`` is a valid edge case (today is past + present).""" + import datetime as _dt + + from tools.x_search_tool import x_search_tool + + monkeypatch.setenv("XAI_API_KEY", "xai-test-key") + + class _FrozenDateTime(_dt.datetime): + @classmethod + def now(cls, tz=None): + return _dt.datetime(2026, 5, 21, 12, 0, 0, tzinfo=tz or _dt.timezone.utc) + + monkeypatch.setattr("tools.x_search_tool.datetime", _FrozenDateTime) + monkeypatch.setattr( + "requests.post", + lambda *a, **k: _FakeResponse({"output_text": "ok", "citations": []}), + ) + + result = json.loads(x_search_tool(query="anything", from_date="2026-05-21")) + + assert result["success"] is True + + +# --------------------------------------------------------------------------- +# Degraded-result flag — distinguish citation-backed answers from +# unsourced fluff when narrowing filters returned nothing. +# --------------------------------------------------------------------------- + +def test_x_search_marks_degraded_when_handle_filter_returns_no_citations(monkeypatch): + """allowed_x_handles set + zero citations → degraded=True.""" + from tools.x_search_tool import x_search_tool + + monkeypatch.setenv("XAI_API_KEY", "xai-test-key") + monkeypatch.setattr( + "requests.post", + lambda *a, **k: _FakeResponse( + {"output_text": "Generic encyclopedic answer with no citations.", "citations": []} + ), + ) + + result = json.loads( + x_search_tool(query="what has @ghostuser posted", allowed_x_handles=["ghostuser"]) + ) + + assert result["success"] is True + assert result["degraded"] is True + assert "allowed_x_handles" in result["degraded_reason"] + + +def test_x_search_marks_degraded_when_excluded_handles_and_no_citations(monkeypatch): + from tools.x_search_tool import x_search_tool + + monkeypatch.setenv("XAI_API_KEY", "xai-test-key") + monkeypatch.setattr( + "requests.post", + lambda *a, **k: _FakeResponse({"output_text": "fluff", "citations": []}), + ) + + result = json.loads( + x_search_tool(query="anything", excluded_x_handles=["someuser"]) + ) + + assert result["degraded"] is True + assert "excluded_x_handles" in result["degraded_reason"] + + +def test_x_search_marks_degraded_when_date_range_and_no_citations(monkeypatch): + from tools.x_search_tool import x_search_tool + + monkeypatch.setenv("XAI_API_KEY", "xai-test-key") + monkeypatch.setattr( + "requests.post", + lambda *a, **k: _FakeResponse({"output_text": "fluff", "citations": []}), + ) + + result = json.loads( + x_search_tool( + query="anything", + from_date="2026-04-01", + to_date="2026-04-02", + ) + ) + + assert result["degraded"] is True + assert "from_date" in result["degraded_reason"] + assert "to_date" in result["degraded_reason"] + + +def test_x_search_not_degraded_when_filter_returns_inline_citations(monkeypatch): + """A real citation from the inline annotations clears the degraded flag.""" + from tools.x_search_tool import x_search_tool + + monkeypatch.setenv("XAI_API_KEY", "xai-test-key") + monkeypatch.setattr( + "requests.post", + lambda *a, **k: _FakeResponse( + { + "output": [ + { + "type": "message", + "content": [ + { + "type": "output_text", + "text": "Real post from xai.", + "annotations": [ + { + "type": "url_citation", + "url": "https://x.com/xai/status/1", + "title": "xAI post", + "start_index": 0, + "end_index": 4, + } + ], + } + ], + } + ] + } + ), + ) + + result = json.loads( + x_search_tool(query="latest xAI post", allowed_x_handles=["xai"]) + ) + + assert result["success"] is True + assert result["degraded"] is False + assert result["degraded_reason"] is None + assert len(result["inline_citations"]) == 1 + + +def test_x_search_not_degraded_when_filter_returns_top_level_citations(monkeypatch): + """A real citation from xAI's top-level ``citations`` array also clears the flag.""" + from tools.x_search_tool import x_search_tool + + monkeypatch.setenv("XAI_API_KEY", "xai-test-key") + monkeypatch.setattr( + "requests.post", + lambda *a, **k: _FakeResponse( + { + "output_text": "Found discussion.", + "citations": [{"url": "https://x.com/example/status/1", "title": "Example"}], + } + ), + ) + + result = json.loads( + x_search_tool(query="anything", allowed_x_handles=["xai"]) + ) + + assert result["degraded"] is False + assert result["degraded_reason"] is None + + +def test_x_search_not_degraded_when_no_filters_active(monkeypatch): + """A broad query that returns no citations isn't necessarily degraded. + + Without any narrowing filter, an empty-citations response is a generic + unsourced answer, not a "filter miss". The caller can already tell from + ``inline_citations == []`` if they care. + """ + from tools.x_search_tool import x_search_tool + + monkeypatch.setenv("XAI_API_KEY", "xai-test-key") + monkeypatch.setattr( + "requests.post", + lambda *a, **k: _FakeResponse({"output_text": "broad answer", "citations": []}), + ) + + result = json.loads(x_search_tool(query="anything")) + + assert result["success"] is True + assert result["degraded"] is False + assert result["degraded_reason"] is None + diff --git a/tools/x_search_tool.py b/tools/x_search_tool.py index 1b7685a897d..70251860736 100644 --- a/tools/x_search_tool.py +++ b/tools/x_search_tool.py @@ -18,6 +18,24 @@ auto-refreshes the OAuth access token when it's within the refresh skew window, so a ``True`` from :func:`check_x_search_requirements` means the bearer is fetchable AND non-empty. +Defensive output +---------------- +The tool surfaces two additional signals beyond xAI's raw response so callers +can tell a real citation-backed answer from an unsourced one: + +* ``from_date`` / ``to_date`` are validated client-side before the HTTP call. + Malformed (non ``YYYY-MM-DD``), inverted (``from_date > to_date``), and + pure-future ranges (``from_date`` later than today UTC) fail fast with a + clear error instead of burning an API call. ``to_date`` in the future is + still allowed so callers can legitimately request "from yesterday to + tomorrow". +* Successful responses carry ``degraded`` and ``degraded_reason`` fields. + ``degraded`` is ``True`` when any narrowing filter (handles or dates) was + active AND xAI returned no citations in either the top-level ``citations`` + array or the inline ``url_citation`` annotations. In that case the + ``answer`` came from the model's own knowledge rather than the X index, + and the caller should treat the result as unsourced. + Salvaged from PR #10786 (originally by @Jaaneek); credential resolution reworked to honor both auth modes per Teknium's design. """ @@ -28,6 +46,7 @@ import json import logging import os import time +from datetime import date, datetime, timezone from typing import Any, Dict, List, Optional, Tuple import requests @@ -136,6 +155,57 @@ def _normalize_handles(handles: Optional[List[str]], field_name: str) -> List[st return cleaned +def _parse_iso_date(value: str, field_name: str) -> date: + """Parse a strict YYYY-MM-DD string into a ``date``. + + xAI accepts any string in the ``from_date``/``to_date`` slots and silently + returns an answer with no citations when the value is malformed or refers + to a window where no posts can exist. That behavior burns a billable API + call and produces a confident-sounding fluff answer that's hard for callers + to distinguish from a real result. Validating client-side fails fast and + gives the agent a clear error to act on. + """ + raw = value.strip() + try: + return datetime.strptime(raw, "%Y-%m-%d").date() + except ValueError as exc: + raise ValueError( + f"{field_name} must be YYYY-MM-DD (got {raw!r})" + ) from exc + + +def _validate_date_range(from_date: str, to_date: str) -> None: + """Validate ``from_date`` / ``to_date`` before they reach xAI. + + Rules: + * Either field, if non-empty, must parse as ``YYYY-MM-DD``. + * When both are set, ``from_date <= to_date``. + * ``from_date`` must not be later than today UTC — no posts can exist + in a window that hasn't started yet, so the call would be guaranteed + to return zero citations. ``to_date`` in the future is allowed + (callers may legitimately set "from yesterday to tomorrow"). + """ + parsed_from: Optional[date] = None + parsed_to: Optional[date] = None + if from_date.strip(): + parsed_from = _parse_iso_date(from_date, "from_date") + if to_date.strip(): + parsed_to = _parse_iso_date(to_date, "to_date") + if parsed_from and parsed_to and parsed_from > parsed_to: + raise ValueError( + f"from_date ({parsed_from.isoformat()}) must be on or before " + f"to_date ({parsed_to.isoformat()})" + ) + if parsed_from is not None: + today_utc = datetime.now(timezone.utc).date() + if parsed_from > today_utc: + raise ValueError( + f"from_date ({parsed_from.isoformat()}) is in the future; " + f"X Search only indexes past posts (today UTC is " + f"{today_utc.isoformat()})" + ) + + def _extract_response_text(payload: Dict[str, Any]) -> str: output_text = str(payload.get("output_text") or "").strip() if output_text: @@ -225,6 +295,11 @@ def x_search_tool( if allowed and excluded: return tool_error("allowed_x_handles and excluded_x_handles cannot be used together") + try: + _validate_date_range(from_date, to_date) + except ValueError as exc: + return tool_error(str(exc)) + tool_def: Dict[str, Any] = {"type": "x_search"} if allowed: tool_def["allowed_x_handles"] = allowed @@ -299,6 +374,31 @@ def x_search_tool( citations = list(data.get("citations") or []) inline_citations = _extract_inline_citations(data) + # Degraded-result detection. + # + # xAI returns 200 OK with a synthesized answer even when its X index + # has no posts matching the caller's narrowing filters. The answer + # then comes from the model's training data, which is misleading + # because it looks identical to a real, citation-backed result. When + # any narrowing filter is active AND both citation channels came back + # empty, mark the response as degraded so callers can decide to + # broaden filters, retry, or fall back to a different source. + active_filters: List[str] = [] + if allowed: + active_filters.append("allowed_x_handles") + if excluded: + active_filters.append("excluded_x_handles") + if from_date.strip(): + active_filters.append("from_date") + if to_date.strip(): + active_filters.append("to_date") + degraded = bool(active_filters) and not citations and not inline_citations + degraded_reason = ( + f"no citations returned despite filters: {', '.join(active_filters)}" + if degraded + else None + ) + return json.dumps( { "success": True, @@ -310,6 +410,8 @@ def x_search_tool( "answer": answer, "citations": citations, "inline_citations": inline_citations, + "degraded": degraded, + "degraded_reason": degraded_reason, }, ensure_ascii=False, ) diff --git a/website/docs/user-guide/features/x-search.md b/website/docs/user-guide/features/x-search.md index 49479fbf6f2..3038365e577 100644 --- a/website/docs/user-guide/features/x-search.md +++ b/website/docs/user-guide/features/x-search.md @@ -78,9 +78,22 @@ The tool returns JSON with: - `answer` — synthesized text response from Grok - `citations` — citations returned by the Responses API top-level field - `inline_citations` — `url_citation` annotations extracted from the message body (each with `url`, `title`, `start_index`, `end_index`) +- `degraded` — `true` when any narrowing filter (`allowed_x_handles`, `excluded_x_handles`, `from_date`, `to_date`) was set AND both citation channels came back empty. In that case the `answer` was synthesized from the model's own knowledge rather than the X index, so treat it as unsourced. `false` otherwise (including the "no filters set" case — a broad unsourced answer is just an answer, not a filter miss) +- `degraded_reason` — short string naming which filters were active, or `null` when `degraded` is `false` - `credential_source` — `"xai-oauth"` if OAuth resolved, `"xai"` if API key resolved - `model`, `query`, `provider`, `tool`, `success` +### Date validation + +`from_date` / `to_date` are validated client-side before the HTTP call: + +- Both, if provided, must parse as `YYYY-MM-DD`. +- When both are set, `from_date` must be on or before `to_date`. +- `from_date` must not be later than today UTC — no posts can exist in a window that hasn't started yet, so the call would be guaranteed to return zero citations. +- `to_date` in the future is allowed (callers may legitimately request "from yesterday to tomorrow" to catch posts as they arrive). + +Validation failures surface as a structured `{"error": "..."}` tool result, never as an HTTP call to xAI. + ## Example Talking to the agent: @@ -110,6 +123,16 @@ Two possible causes: 1. **Toolset not enabled.** Run `hermes tools` and confirm `🐦 X (Twitter) Search` is checked. 2. **No xAI credentials.** The check_fn returns False, so the schema stays hidden. Run `hermes auth status` to confirm xai-oauth login state, and check that `XAI_API_KEY` is set (if you're using the API-key path). +### `degraded: true` — answer with no citations + +When you used `allowed_x_handles`, `excluded_x_handles`, or a date range and the response comes back with `degraded: true`, xAI's X index returned no matching posts but Grok still produced a synthesized answer from its own training data. The answer is unsourced — do not treat it as a real X result. + +Causes worth checking: + +- **Typo in the handle.** Strip the `@`, double-check spelling, and confirm the account exists. +- **Date range too narrow** or sliding past today's posts; widen and retry. +- **xAI index gap.** Some active accounts intermittently fail to surface in `x_search` even when they post regularly. Retry after a few minutes, or use the `xurl` skill for direct X API reads when you need an exact handle's timeline. + ## See Also - [xAI Grok OAuth (SuperGrok Subscription)](../../guides/xai-grok-oauth.md) — the OAuth setup guide