mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
Merge pull request #29484 from kshitijk4poor/kp/x-search-degraded-flag
Merged after self-review + local verification of date validation and degraded flag. All tests pass, claims confirmed end-to-end.
This commit is contained in:
commit
3ce1cf2bb7
3 changed files with 412 additions and 0 deletions
|
|
@ -436,3 +436,290 @@ def test_x_search_registered_in_registry_with_check_fn():
|
|||
assert entry.check_fn.__name__ == "check_x_search_requirements"
|
||||
assert "XAI_API_KEY" in entry.requires_env
|
||||
assert entry.emoji == "🐦"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Date validation — fail fast before burning an API call on a window that
|
||||
# cannot possibly return X posts. xAI itself happily 200s with a fluff
|
||||
# answer when the range is malformed or pure-future, which is hard for
|
||||
# callers to distinguish from a real result.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _no_post_allowed(monkeypatch):
|
||||
"""Guard: any test that should fail before HTTP can hit this fence."""
|
||||
def _fail(*_, **__):
|
||||
raise AssertionError("requests.post must not be called — validation should reject first")
|
||||
|
||||
monkeypatch.setattr("requests.post", _fail)
|
||||
|
||||
|
||||
def test_x_search_rejects_malformed_from_date(monkeypatch):
|
||||
from tools.x_search_tool import x_search_tool
|
||||
|
||||
monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
|
||||
_no_post_allowed(monkeypatch)
|
||||
|
||||
result = json.loads(x_search_tool(query="anything", from_date="not-a-date"))
|
||||
|
||||
assert "from_date must be YYYY-MM-DD" in result["error"]
|
||||
|
||||
|
||||
def test_x_search_rejects_malformed_to_date(monkeypatch):
|
||||
from tools.x_search_tool import x_search_tool
|
||||
|
||||
monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
|
||||
_no_post_allowed(monkeypatch)
|
||||
|
||||
result = json.loads(x_search_tool(query="anything", to_date="2026/05/01"))
|
||||
|
||||
assert "to_date must be YYYY-MM-DD" in result["error"]
|
||||
|
||||
|
||||
def test_x_search_rejects_inverted_date_range(monkeypatch):
|
||||
from tools.x_search_tool import x_search_tool
|
||||
|
||||
monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
|
||||
_no_post_allowed(monkeypatch)
|
||||
|
||||
result = json.loads(
|
||||
x_search_tool(
|
||||
query="anything",
|
||||
from_date="2026-05-10",
|
||||
to_date="2026-05-01",
|
||||
)
|
||||
)
|
||||
|
||||
assert "from_date (2026-05-10) must be on or before to_date (2026-05-01)" in result["error"]
|
||||
|
||||
|
||||
def test_x_search_rejects_future_from_date(monkeypatch):
|
||||
"""``from_date`` in the future can never match any post → reject."""
|
||||
import datetime as _dt
|
||||
|
||||
from tools.x_search_tool import x_search_tool
|
||||
|
||||
monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
|
||||
_no_post_allowed(monkeypatch)
|
||||
|
||||
class _FrozenDateTime(_dt.datetime):
|
||||
@classmethod
|
||||
def now(cls, tz=None):
|
||||
return _dt.datetime(2026, 5, 21, 12, 0, 0, tzinfo=tz or _dt.timezone.utc)
|
||||
|
||||
monkeypatch.setattr("tools.x_search_tool.datetime", _FrozenDateTime)
|
||||
|
||||
result = json.loads(x_search_tool(query="anything", from_date="2030-01-01"))
|
||||
|
||||
assert "from_date (2030-01-01) is in the future" in result["error"]
|
||||
|
||||
|
||||
def test_x_search_allows_future_to_date(monkeypatch):
|
||||
"""``to_date`` in the future is fine — caller may want posts as they arrive."""
|
||||
import datetime as _dt
|
||||
|
||||
from tools.x_search_tool import x_search_tool
|
||||
|
||||
monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
|
||||
|
||||
class _FrozenDateTime(_dt.datetime):
|
||||
@classmethod
|
||||
def now(cls, tz=None):
|
||||
return _dt.datetime(2026, 5, 21, 12, 0, 0, tzinfo=tz or _dt.timezone.utc)
|
||||
|
||||
monkeypatch.setattr("tools.x_search_tool.datetime", _FrozenDateTime)
|
||||
|
||||
def _fake_post(url, headers=None, json=None, timeout=None):
|
||||
return _FakeResponse(
|
||||
{"output_text": "future to_date is allowed", "citations": []}
|
||||
)
|
||||
|
||||
monkeypatch.setattr("requests.post", _fake_post)
|
||||
|
||||
result = json.loads(
|
||||
x_search_tool(
|
||||
query="anything",
|
||||
from_date="2026-05-20",
|
||||
to_date="2030-01-01",
|
||||
)
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["answer"] == "future to_date is allowed"
|
||||
|
||||
|
||||
def test_x_search_accepts_today_as_from_date(monkeypatch):
|
||||
"""``from_date == today UTC`` is a valid edge case (today is past + present)."""
|
||||
import datetime as _dt
|
||||
|
||||
from tools.x_search_tool import x_search_tool
|
||||
|
||||
monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
|
||||
|
||||
class _FrozenDateTime(_dt.datetime):
|
||||
@classmethod
|
||||
def now(cls, tz=None):
|
||||
return _dt.datetime(2026, 5, 21, 12, 0, 0, tzinfo=tz or _dt.timezone.utc)
|
||||
|
||||
monkeypatch.setattr("tools.x_search_tool.datetime", _FrozenDateTime)
|
||||
monkeypatch.setattr(
|
||||
"requests.post",
|
||||
lambda *a, **k: _FakeResponse({"output_text": "ok", "citations": []}),
|
||||
)
|
||||
|
||||
result = json.loads(x_search_tool(query="anything", from_date="2026-05-21"))
|
||||
|
||||
assert result["success"] is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Degraded-result flag — distinguish citation-backed answers from
|
||||
# unsourced fluff when narrowing filters returned nothing.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_x_search_marks_degraded_when_handle_filter_returns_no_citations(monkeypatch):
|
||||
"""allowed_x_handles set + zero citations → degraded=True."""
|
||||
from tools.x_search_tool import x_search_tool
|
||||
|
||||
monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
|
||||
monkeypatch.setattr(
|
||||
"requests.post",
|
||||
lambda *a, **k: _FakeResponse(
|
||||
{"output_text": "Generic encyclopedic answer with no citations.", "citations": []}
|
||||
),
|
||||
)
|
||||
|
||||
result = json.loads(
|
||||
x_search_tool(query="what has @ghostuser posted", allowed_x_handles=["ghostuser"])
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["degraded"] is True
|
||||
assert "allowed_x_handles" in result["degraded_reason"]
|
||||
|
||||
|
||||
def test_x_search_marks_degraded_when_excluded_handles_and_no_citations(monkeypatch):
|
||||
from tools.x_search_tool import x_search_tool
|
||||
|
||||
monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
|
||||
monkeypatch.setattr(
|
||||
"requests.post",
|
||||
lambda *a, **k: _FakeResponse({"output_text": "fluff", "citations": []}),
|
||||
)
|
||||
|
||||
result = json.loads(
|
||||
x_search_tool(query="anything", excluded_x_handles=["someuser"])
|
||||
)
|
||||
|
||||
assert result["degraded"] is True
|
||||
assert "excluded_x_handles" in result["degraded_reason"]
|
||||
|
||||
|
||||
def test_x_search_marks_degraded_when_date_range_and_no_citations(monkeypatch):
|
||||
from tools.x_search_tool import x_search_tool
|
||||
|
||||
monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
|
||||
monkeypatch.setattr(
|
||||
"requests.post",
|
||||
lambda *a, **k: _FakeResponse({"output_text": "fluff", "citations": []}),
|
||||
)
|
||||
|
||||
result = json.loads(
|
||||
x_search_tool(
|
||||
query="anything",
|
||||
from_date="2026-04-01",
|
||||
to_date="2026-04-02",
|
||||
)
|
||||
)
|
||||
|
||||
assert result["degraded"] is True
|
||||
assert "from_date" in result["degraded_reason"]
|
||||
assert "to_date" in result["degraded_reason"]
|
||||
|
||||
|
||||
def test_x_search_not_degraded_when_filter_returns_inline_citations(monkeypatch):
|
||||
"""A real citation from the inline annotations clears the degraded flag."""
|
||||
from tools.x_search_tool import x_search_tool
|
||||
|
||||
monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
|
||||
monkeypatch.setattr(
|
||||
"requests.post",
|
||||
lambda *a, **k: _FakeResponse(
|
||||
{
|
||||
"output": [
|
||||
{
|
||||
"type": "message",
|
||||
"content": [
|
||||
{
|
||||
"type": "output_text",
|
||||
"text": "Real post from xai.",
|
||||
"annotations": [
|
||||
{
|
||||
"type": "url_citation",
|
||||
"url": "https://x.com/xai/status/1",
|
||||
"title": "xAI post",
|
||||
"start_index": 0,
|
||||
"end_index": 4,
|
||||
}
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
result = json.loads(
|
||||
x_search_tool(query="latest xAI post", allowed_x_handles=["xai"])
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["degraded"] is False
|
||||
assert result["degraded_reason"] is None
|
||||
assert len(result["inline_citations"]) == 1
|
||||
|
||||
|
||||
def test_x_search_not_degraded_when_filter_returns_top_level_citations(monkeypatch):
|
||||
"""A real citation from xAI's top-level ``citations`` array also clears the flag."""
|
||||
from tools.x_search_tool import x_search_tool
|
||||
|
||||
monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
|
||||
monkeypatch.setattr(
|
||||
"requests.post",
|
||||
lambda *a, **k: _FakeResponse(
|
||||
{
|
||||
"output_text": "Found discussion.",
|
||||
"citations": [{"url": "https://x.com/example/status/1", "title": "Example"}],
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
result = json.loads(
|
||||
x_search_tool(query="anything", allowed_x_handles=["xai"])
|
||||
)
|
||||
|
||||
assert result["degraded"] is False
|
||||
assert result["degraded_reason"] is None
|
||||
|
||||
|
||||
def test_x_search_not_degraded_when_no_filters_active(monkeypatch):
|
||||
"""A broad query that returns no citations isn't necessarily degraded.
|
||||
|
||||
Without any narrowing filter, an empty-citations response is a generic
|
||||
unsourced answer, not a "filter miss". The caller can already tell from
|
||||
``inline_citations == []`` if they care.
|
||||
"""
|
||||
from tools.x_search_tool import x_search_tool
|
||||
|
||||
monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
|
||||
monkeypatch.setattr(
|
||||
"requests.post",
|
||||
lambda *a, **k: _FakeResponse({"output_text": "broad answer", "citations": []}),
|
||||
)
|
||||
|
||||
result = json.loads(x_search_tool(query="anything"))
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["degraded"] is False
|
||||
assert result["degraded_reason"] is None
|
||||
|
||||
|
|
|
|||
|
|
@ -18,6 +18,24 @@ auto-refreshes the OAuth access token when it's within the refresh skew
|
|||
window, so a ``True`` from :func:`check_x_search_requirements` means the
|
||||
bearer is fetchable AND non-empty.
|
||||
|
||||
Defensive output
|
||||
----------------
|
||||
The tool surfaces two additional signals beyond xAI's raw response so callers
|
||||
can tell a real citation-backed answer from an unsourced one:
|
||||
|
||||
* ``from_date`` / ``to_date`` are validated client-side before the HTTP call.
|
||||
Malformed (non ``YYYY-MM-DD``), inverted (``from_date > to_date``), and
|
||||
pure-future ranges (``from_date`` later than today UTC) fail fast with a
|
||||
clear error instead of burning an API call. ``to_date`` in the future is
|
||||
still allowed so callers can legitimately request "from yesterday to
|
||||
tomorrow".
|
||||
* Successful responses carry ``degraded`` and ``degraded_reason`` fields.
|
||||
``degraded`` is ``True`` when any narrowing filter (handles or dates) was
|
||||
active AND xAI returned no citations in either the top-level ``citations``
|
||||
array or the inline ``url_citation`` annotations. In that case the
|
||||
``answer`` came from the model's own knowledge rather than the X index,
|
||||
and the caller should treat the result as unsourced.
|
||||
|
||||
Salvaged from PR #10786 (originally by @Jaaneek); credential resolution
|
||||
reworked to honor both auth modes per Teknium's design.
|
||||
"""
|
||||
|
|
@ -28,6 +46,7 @@ import json
|
|||
import logging
|
||||
import os
|
||||
import time
|
||||
from datetime import date, datetime, timezone
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import requests
|
||||
|
|
@ -136,6 +155,57 @@ def _normalize_handles(handles: Optional[List[str]], field_name: str) -> List[st
|
|||
return cleaned
|
||||
|
||||
|
||||
def _parse_iso_date(value: str, field_name: str) -> date:
|
||||
"""Parse a strict YYYY-MM-DD string into a ``date``.
|
||||
|
||||
xAI accepts any string in the ``from_date``/``to_date`` slots and silently
|
||||
returns an answer with no citations when the value is malformed or refers
|
||||
to a window where no posts can exist. That behavior burns a billable API
|
||||
call and produces a confident-sounding fluff answer that's hard for callers
|
||||
to distinguish from a real result. Validating client-side fails fast and
|
||||
gives the agent a clear error to act on.
|
||||
"""
|
||||
raw = value.strip()
|
||||
try:
|
||||
return datetime.strptime(raw, "%Y-%m-%d").date()
|
||||
except ValueError as exc:
|
||||
raise ValueError(
|
||||
f"{field_name} must be YYYY-MM-DD (got {raw!r})"
|
||||
) from exc
|
||||
|
||||
|
||||
def _validate_date_range(from_date: str, to_date: str) -> None:
|
||||
"""Validate ``from_date`` / ``to_date`` before they reach xAI.
|
||||
|
||||
Rules:
|
||||
* Either field, if non-empty, must parse as ``YYYY-MM-DD``.
|
||||
* When both are set, ``from_date <= to_date``.
|
||||
* ``from_date`` must not be later than today UTC — no posts can exist
|
||||
in a window that hasn't started yet, so the call would be guaranteed
|
||||
to return zero citations. ``to_date`` in the future is allowed
|
||||
(callers may legitimately set "from yesterday to tomorrow").
|
||||
"""
|
||||
parsed_from: Optional[date] = None
|
||||
parsed_to: Optional[date] = None
|
||||
if from_date.strip():
|
||||
parsed_from = _parse_iso_date(from_date, "from_date")
|
||||
if to_date.strip():
|
||||
parsed_to = _parse_iso_date(to_date, "to_date")
|
||||
if parsed_from and parsed_to and parsed_from > parsed_to:
|
||||
raise ValueError(
|
||||
f"from_date ({parsed_from.isoformat()}) must be on or before "
|
||||
f"to_date ({parsed_to.isoformat()})"
|
||||
)
|
||||
if parsed_from is not None:
|
||||
today_utc = datetime.now(timezone.utc).date()
|
||||
if parsed_from > today_utc:
|
||||
raise ValueError(
|
||||
f"from_date ({parsed_from.isoformat()}) is in the future; "
|
||||
f"X Search only indexes past posts (today UTC is "
|
||||
f"{today_utc.isoformat()})"
|
||||
)
|
||||
|
||||
|
||||
def _extract_response_text(payload: Dict[str, Any]) -> str:
|
||||
output_text = str(payload.get("output_text") or "").strip()
|
||||
if output_text:
|
||||
|
|
@ -225,6 +295,11 @@ def x_search_tool(
|
|||
if allowed and excluded:
|
||||
return tool_error("allowed_x_handles and excluded_x_handles cannot be used together")
|
||||
|
||||
try:
|
||||
_validate_date_range(from_date, to_date)
|
||||
except ValueError as exc:
|
||||
return tool_error(str(exc))
|
||||
|
||||
tool_def: Dict[str, Any] = {"type": "x_search"}
|
||||
if allowed:
|
||||
tool_def["allowed_x_handles"] = allowed
|
||||
|
|
@ -299,6 +374,31 @@ def x_search_tool(
|
|||
citations = list(data.get("citations") or [])
|
||||
inline_citations = _extract_inline_citations(data)
|
||||
|
||||
# Degraded-result detection.
|
||||
#
|
||||
# xAI returns 200 OK with a synthesized answer even when its X index
|
||||
# has no posts matching the caller's narrowing filters. The answer
|
||||
# then comes from the model's training data, which is misleading
|
||||
# because it looks identical to a real, citation-backed result. When
|
||||
# any narrowing filter is active AND both citation channels came back
|
||||
# empty, mark the response as degraded so callers can decide to
|
||||
# broaden filters, retry, or fall back to a different source.
|
||||
active_filters: List[str] = []
|
||||
if allowed:
|
||||
active_filters.append("allowed_x_handles")
|
||||
if excluded:
|
||||
active_filters.append("excluded_x_handles")
|
||||
if from_date.strip():
|
||||
active_filters.append("from_date")
|
||||
if to_date.strip():
|
||||
active_filters.append("to_date")
|
||||
degraded = bool(active_filters) and not citations and not inline_citations
|
||||
degraded_reason = (
|
||||
f"no citations returned despite filters: {', '.join(active_filters)}"
|
||||
if degraded
|
||||
else None
|
||||
)
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"success": True,
|
||||
|
|
@ -310,6 +410,8 @@ def x_search_tool(
|
|||
"answer": answer,
|
||||
"citations": citations,
|
||||
"inline_citations": inline_citations,
|
||||
"degraded": degraded,
|
||||
"degraded_reason": degraded_reason,
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -78,9 +78,22 @@ The tool returns JSON with:
|
|||
- `answer` — synthesized text response from Grok
|
||||
- `citations` — citations returned by the Responses API top-level field
|
||||
- `inline_citations` — `url_citation` annotations extracted from the message body (each with `url`, `title`, `start_index`, `end_index`)
|
||||
- `degraded` — `true` when any narrowing filter (`allowed_x_handles`, `excluded_x_handles`, `from_date`, `to_date`) was set AND both citation channels came back empty. In that case the `answer` was synthesized from the model's own knowledge rather than the X index, so treat it as unsourced. `false` otherwise (including the "no filters set" case — a broad unsourced answer is just an answer, not a filter miss)
|
||||
- `degraded_reason` — short string naming which filters were active, or `null` when `degraded` is `false`
|
||||
- `credential_source` — `"xai-oauth"` if OAuth resolved, `"xai"` if API key resolved
|
||||
- `model`, `query`, `provider`, `tool`, `success`
|
||||
|
||||
### Date validation
|
||||
|
||||
`from_date` / `to_date` are validated client-side before the HTTP call:
|
||||
|
||||
- Both, if provided, must parse as `YYYY-MM-DD`.
|
||||
- When both are set, `from_date` must be on or before `to_date`.
|
||||
- `from_date` must not be later than today UTC — no posts can exist in a window that hasn't started yet, so the call would be guaranteed to return zero citations.
|
||||
- `to_date` in the future is allowed (callers may legitimately request "from yesterday to tomorrow" to catch posts as they arrive).
|
||||
|
||||
Validation failures surface as a structured `{"error": "..."}` tool result, never as an HTTP call to xAI.
|
||||
|
||||
## Example
|
||||
|
||||
Talking to the agent:
|
||||
|
|
@ -110,6 +123,16 @@ Two possible causes:
|
|||
1. **Toolset not enabled.** Run `hermes tools` and confirm `🐦 X (Twitter) Search` is checked.
|
||||
2. **No xAI credentials.** The check_fn returns False, so the schema stays hidden. Run `hermes auth status` to confirm xai-oauth login state, and check that `XAI_API_KEY` is set (if you're using the API-key path).
|
||||
|
||||
### `degraded: true` — answer with no citations
|
||||
|
||||
When you used `allowed_x_handles`, `excluded_x_handles`, or a date range and the response comes back with `degraded: true`, xAI's X index returned no matching posts but Grok still produced a synthesized answer from its own training data. The answer is unsourced — do not treat it as a real X result.
|
||||
|
||||
Causes worth checking:
|
||||
|
||||
- **Typo in the handle.** Strip the `@`, double-check spelling, and confirm the account exists.
|
||||
- **Date range too narrow** or sliding past today's posts; widen and retry.
|
||||
- **xAI index gap.** Some active accounts intermittently fail to surface in `x_search` even when they post regularly. Retry after a few minutes, or use the `xurl` skill for direct X API reads when you need an exact handle's timeline.
|
||||
|
||||
## See Also
|
||||
|
||||
- [xAI Grok OAuth (SuperGrok Subscription)](../../guides/xai-grok-oauth.md) — the OAuth setup guide
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue