mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
Six days after #23937 (608 fixes) the codebase had accumulated 241 new PLR6201 violations. Same mechanical `x in (...)` → `x in {...}` fix, same zero-risk profile: set lookup is O(1) vs O(n) for tuple and the two are semantically equivalent for hashable scalar membership tests. All 241 instances fixed via `ruff check --select PLR6201 --fix --unsafe-fixes`, zero remaining. Every changed value is a hashable scalar (str/int/None/enum/signal); no risk of unhashable runtime errors. No behavior change. Test plan: - 119 files changed, +244/-244 (net zero) — exactly one-line edits - `ruff check` clean afterward - Compile checks pass on the largest touched files (cli.py, run_agent.py, gateway/run.py, gateway/platforms/discord.py, model_tools.py) - Subset broad test run on tests/gateway/ tests/hermes_cli/ tests/agent/ tests/tools/: 18187 passed, 59 pre-existing failures (verified against origin/main with the same shape — identical failure count, identical category — all xdist test-order flakes unrelated to this change) Follows the same template as PR #23937 ([tracker: #23972](https://github.com/NousResearch/hermes-agent/issues/23972)).
82 lines
2.8 KiB
Python
82 lines
2.8 KiB
Python
"""Tiny stdlib HTTP helper used by fetch_*.py scripts.
|
|
|
|
Provides polite retry + JSON convenience + User-Agent enforcement.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import time
|
|
import urllib.error
|
|
import urllib.parse
|
|
import urllib.request
|
|
|
|
DEFAULT_UA = (
|
|
"hermes-osint-investigation/0.2 "
|
|
"(+https://github.com/NousResearch/hermes-agent; "
|
|
"set HERMES_OSINT_UA env var to identify yourself per "
|
|
"Wikimedia / SEC fair-use guidance)"
|
|
)
|
|
|
|
|
|
def get(
|
|
url: str,
|
|
*,
|
|
params: dict | None = None,
|
|
headers: dict | None = None,
|
|
user_agent: str | None = None,
|
|
max_retries: int = 3,
|
|
backoff: float = 1.5,
|
|
timeout: float = 30.0,
|
|
) -> bytes:
|
|
"""GET with retry on 5xx and Retry-After honoring.
|
|
|
|
429 (rate-limit) is raised IMMEDIATELY with a clear message — retrying
|
|
when the upstream says "you're over quota" just wastes time. The caller
|
|
should slow down or supply real credentials.
|
|
"""
|
|
if params:
|
|
sep = "&" if "?" in url else "?"
|
|
url = f"{url}{sep}{urllib.parse.urlencode(params)}"
|
|
h = {"User-Agent": user_agent or os.environ.get("HERMES_OSINT_UA", DEFAULT_UA)}
|
|
if headers:
|
|
h.update(headers)
|
|
|
|
last_err: Exception | None = None
|
|
for attempt in range(max_retries + 1):
|
|
req = urllib.request.Request(url, headers=h)
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
return resp.read()
|
|
except urllib.error.HTTPError as e:
|
|
if e.code == 429:
|
|
# Surface immediately. Read the body so the caller sees the
|
|
# provider's actual message ("OVER_RATE_LIMIT" etc.).
|
|
try:
|
|
body = e.read(2048).decode("utf-8", errors="replace")
|
|
except Exception: # noqa: BLE001
|
|
body = ""
|
|
raise RuntimeError(
|
|
f"HTTP 429 rate-limited by {urllib.parse.urlsplit(url).netloc}. "
|
|
f"Slow down or supply a real API key. Body: {body[:300]}"
|
|
) from e
|
|
if e.code in {500, 502, 503, 504} and attempt < max_retries:
|
|
retry_after = e.headers.get("Retry-After") if e.headers else None
|
|
wait = float(retry_after) if (retry_after and retry_after.isdigit()) else backoff ** (attempt + 1)
|
|
time.sleep(wait)
|
|
last_err = e
|
|
continue
|
|
raise
|
|
except urllib.error.URLError as e:
|
|
if attempt < max_retries:
|
|
time.sleep(backoff ** (attempt + 1))
|
|
last_err = e
|
|
continue
|
|
raise
|
|
if last_err:
|
|
raise last_err
|
|
raise RuntimeError("unreachable")
|
|
|
|
|
|
def get_json(url: str, **kwargs) -> dict | list:
|
|
return json.loads(get(url, **kwargs).decode("utf-8"))
|