mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Merge branch 'main' of github.com:NousResearch/hermes-agent into feat/ink-refactor
This commit is contained in:
commit
aa583cb14e
53 changed files with 2966 additions and 434 deletions
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
**The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.
|
||||
|
||||
Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
|
||||
Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
|
||||
|
||||
<table>
|
||||
<tr><td><b>A real terminal interface</b></td><td>Full TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output.</td></tr>
|
||||
|
|
|
|||
|
|
@ -94,6 +94,17 @@ def _normalize_aux_provider(provider: Optional[str]) -> str:
|
|||
return "custom"
|
||||
return _PROVIDER_ALIASES.get(normalized, normalized)
|
||||
|
||||
|
||||
_FIXED_TEMPERATURE_MODELS: Dict[str, float] = {
|
||||
"kimi-for-coding": 0.6,
|
||||
}
|
||||
|
||||
|
||||
def _fixed_temperature_for_model(model: Optional[str]) -> Optional[float]:
|
||||
"""Return a required temperature override for models with strict contracts."""
|
||||
normalized = (model or "").strip().lower()
|
||||
return _FIXED_TEMPERATURE_MODELS.get(normalized)
|
||||
|
||||
# Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
|
||||
_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
||||
"gemini": "gemini-3-flash-preview",
|
||||
|
|
@ -2293,6 +2304,10 @@ def _build_call_kwargs(
|
|||
"timeout": timeout,
|
||||
}
|
||||
|
||||
fixed_temperature = _fixed_temperature_for_model(model)
|
||||
if fixed_temperature is not None:
|
||||
temperature = fixed_temperature
|
||||
|
||||
# Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
|
||||
# drop here so auxiliary callers that hardcode temperature (e.g. 0.3 on
|
||||
# flush_memories, 0 on structured-JSON extraction) don't 400 the moment
|
||||
|
|
|
|||
|
|
@ -747,18 +747,149 @@ class GeminiCloudCodeClient:
|
|||
|
||||
|
||||
def _gemini_http_error(response: httpx.Response) -> CodeAssistError:
|
||||
"""Translate an httpx response into a CodeAssistError with rich metadata.
|
||||
|
||||
Parses Google's error envelope (``{"error": {"code", "message", "status",
|
||||
"details": [...]}}``) so the agent's error classifier can reason about
|
||||
the failure — ``status_code`` enables the rate_limit / auth classification
|
||||
paths, and ``response`` lets the main loop honor ``Retry-After`` just
|
||||
like it does for OpenAI SDK exceptions.
|
||||
|
||||
Also lifts a few recognizable Google conditions into human-readable
|
||||
messages so the user sees something better than a 500-char JSON dump:
|
||||
|
||||
MODEL_CAPACITY_EXHAUSTED → "Gemini model capacity exhausted for
|
||||
<model>. This is a Google-side throttle..."
|
||||
RESOURCE_EXHAUSTED w/o reason → quota-style message
|
||||
404 → "Model <name> not found at cloudcode-pa..."
|
||||
"""
|
||||
status = response.status_code
|
||||
|
||||
# Parse the body once, surviving any weird encodings.
|
||||
body_text = ""
|
||||
body_json: Dict[str, Any] = {}
|
||||
try:
|
||||
body = response.text[:500]
|
||||
body_text = response.text
|
||||
except Exception:
|
||||
body = ""
|
||||
# Let run_agent's retry logic see auth errors as rotatable via `api_key`
|
||||
body_text = ""
|
||||
if body_text:
|
||||
try:
|
||||
parsed = json.loads(body_text)
|
||||
if isinstance(parsed, dict):
|
||||
body_json = parsed
|
||||
except (ValueError, TypeError):
|
||||
body_json = {}
|
||||
|
||||
# Dig into Google's error envelope. Shape is:
|
||||
# {"error": {"code": 429, "message": "...", "status": "RESOURCE_EXHAUSTED",
|
||||
# "details": [{"@type": ".../ErrorInfo", "reason": "MODEL_CAPACITY_EXHAUSTED",
|
||||
# "metadata": {...}},
|
||||
# {"@type": ".../RetryInfo", "retryDelay": "30s"}]}}
|
||||
err_obj = body_json.get("error") if isinstance(body_json, dict) else None
|
||||
if not isinstance(err_obj, dict):
|
||||
err_obj = {}
|
||||
err_status = str(err_obj.get("status") or "").strip()
|
||||
err_message = str(err_obj.get("message") or "").strip()
|
||||
err_details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else []
|
||||
|
||||
# Extract google.rpc.ErrorInfo reason + metadata. There may be more
|
||||
# than one ErrorInfo (rare), so we pick the first one with a reason.
|
||||
error_reason = ""
|
||||
error_metadata: Dict[str, Any] = {}
|
||||
retry_delay_seconds: Optional[float] = None
|
||||
for detail in err_details_list:
|
||||
if not isinstance(detail, dict):
|
||||
continue
|
||||
type_url = str(detail.get("@type") or "")
|
||||
if not error_reason and type_url.endswith("/google.rpc.ErrorInfo"):
|
||||
reason = detail.get("reason")
|
||||
if isinstance(reason, str) and reason:
|
||||
error_reason = reason
|
||||
md = detail.get("metadata")
|
||||
if isinstance(md, dict):
|
||||
error_metadata = md
|
||||
elif retry_delay_seconds is None and type_url.endswith("/google.rpc.RetryInfo"):
|
||||
# retryDelay is a google.protobuf.Duration string like "30s" or "1.5s".
|
||||
delay_raw = detail.get("retryDelay")
|
||||
if isinstance(delay_raw, str) and delay_raw.endswith("s"):
|
||||
try:
|
||||
retry_delay_seconds = float(delay_raw[:-1])
|
||||
except ValueError:
|
||||
pass
|
||||
elif isinstance(delay_raw, (int, float)):
|
||||
retry_delay_seconds = float(delay_raw)
|
||||
|
||||
# Fall back to the Retry-After header if the body didn't include RetryInfo.
|
||||
if retry_delay_seconds is None:
|
||||
try:
|
||||
header_val = response.headers.get("Retry-After") or response.headers.get("retry-after")
|
||||
except Exception:
|
||||
header_val = None
|
||||
if header_val:
|
||||
try:
|
||||
retry_delay_seconds = float(header_val)
|
||||
except (TypeError, ValueError):
|
||||
retry_delay_seconds = None
|
||||
|
||||
# Classify the error code. ``code_assist_rate_limited`` stays the default
|
||||
# for 429s; a more specific reason tag helps downstream callers (e.g. tests,
|
||||
# logs) without changing the rate_limit classification path.
|
||||
code = f"code_assist_http_{status}"
|
||||
if status == 401:
|
||||
code = "code_assist_unauthorized"
|
||||
elif status == 429:
|
||||
code = "code_assist_rate_limited"
|
||||
if error_reason == "MODEL_CAPACITY_EXHAUSTED":
|
||||
code = "code_assist_capacity_exhausted"
|
||||
|
||||
# Build a human-readable message. Keep the status + a raw-body tail for
|
||||
# debugging, but lead with a friendlier summary when we recognize the
|
||||
# Google signal.
|
||||
model_hint = ""
|
||||
if isinstance(error_metadata, dict):
|
||||
model_hint = str(error_metadata.get("model") or error_metadata.get("modelId") or "").strip()
|
||||
|
||||
if status == 429 and error_reason == "MODEL_CAPACITY_EXHAUSTED":
|
||||
target = model_hint or "this Gemini model"
|
||||
message = (
|
||||
f"Gemini capacity exhausted for {target} (Google-side throttle, "
|
||||
f"not a Hermes issue). Try a different Gemini model or set a "
|
||||
f"fallback_providers entry to a non-Gemini provider."
|
||||
)
|
||||
if retry_delay_seconds is not None:
|
||||
message += f" Google suggests retrying in {retry_delay_seconds:g}s."
|
||||
elif status == 429 and err_status == "RESOURCE_EXHAUSTED":
|
||||
message = (
|
||||
f"Gemini quota exhausted ({err_message or 'RESOURCE_EXHAUSTED'}). "
|
||||
f"Check /gquota for remaining daily requests."
|
||||
)
|
||||
if retry_delay_seconds is not None:
|
||||
message += f" Retry suggested in {retry_delay_seconds:g}s."
|
||||
elif status == 404:
|
||||
# Google returns 404 when a model has been retired or renamed.
|
||||
target = model_hint or (err_message or "model")
|
||||
message = (
|
||||
f"Code Assist 404: {target} is not available at "
|
||||
f"cloudcode-pa.googleapis.com. It may have been renamed or "
|
||||
f"retired. Check hermes_cli/models.py for the current list."
|
||||
)
|
||||
elif err_message:
|
||||
# Generic fallback with the parsed message.
|
||||
message = f"Code Assist HTTP {status} ({err_status or 'error'}): {err_message}"
|
||||
else:
|
||||
# Last-ditch fallback — raw body snippet.
|
||||
message = f"Code Assist returned HTTP {status}: {body_text[:500]}"
|
||||
|
||||
return CodeAssistError(
|
||||
f"Code Assist returned HTTP {status}: {body}",
|
||||
message,
|
||||
code=code,
|
||||
status_code=status,
|
||||
response=response,
|
||||
retry_after=retry_delay_seconds,
|
||||
details={
|
||||
"status": err_status,
|
||||
"reason": error_reason,
|
||||
"metadata": error_metadata,
|
||||
"message": err_message,
|
||||
},
|
||||
)
|
||||
|
|
|
|||
|
|
@ -68,9 +68,45 @@ _ONBOARDING_POLL_INTERVAL_SECONDS = 5.0
|
|||
|
||||
|
||||
class CodeAssistError(RuntimeError):
|
||||
def __init__(self, message: str, *, code: str = "code_assist_error") -> None:
|
||||
"""Exception raised by the Code Assist (``cloudcode-pa``) integration.
|
||||
|
||||
Carries HTTP status / response / retry-after metadata so the agent's
|
||||
``error_classifier._extract_status_code`` and the main loop's Retry-After
|
||||
handling (which walks ``error.response.headers``) pick up the right
|
||||
signals. Without these, 429s from the OAuth path look like opaque
|
||||
``RuntimeError`` and skip the rate-limit path.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
*,
|
||||
code: str = "code_assist_error",
|
||||
status_code: Optional[int] = None,
|
||||
response: Any = None,
|
||||
retry_after: Optional[float] = None,
|
||||
details: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
super().__init__(message)
|
||||
self.code = code
|
||||
# ``status_code`` is picked up by ``agent.error_classifier._extract_status_code``
|
||||
# so a 429 from Code Assist classifies as FailoverReason.rate_limit and
|
||||
# triggers the main loop's fallback_providers chain the same way SDK
|
||||
# errors do.
|
||||
self.status_code = status_code
|
||||
# ``response`` is the underlying ``httpx.Response`` (or a shim with a
|
||||
# ``.headers`` mapping and ``.json()`` method). The main loop reads
|
||||
# ``error.response.headers["Retry-After"]`` to honor Google's retry
|
||||
# hints when the backend throttles us.
|
||||
self.response = response
|
||||
# Parsed ``Retry-After`` seconds (kept separately for convenience —
|
||||
# Google returns retry hints in both the header and the error body's
|
||||
# ``google.rpc.RetryInfo`` details, and we pick whichever we found).
|
||||
self.retry_after = retry_after
|
||||
# Parsed structured error details from the Google error envelope
|
||||
# (e.g. ``{"reason": "MODEL_CAPACITY_EXHAUSTED", "status": "RESOURCE_EXHAUSTED"}``).
|
||||
# Useful for logging and for tests that want to assert on specifics.
|
||||
self.details = details or {}
|
||||
|
||||
|
||||
class ProjectIdRequiredError(CodeAssistError):
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
|||
"mimo", "xiaomi-mimo",
|
||||
"arcee-ai", "arceeai",
|
||||
"xai", "x-ai", "x.ai", "grok",
|
||||
"nvidia", "nim", "nvidia-nim", "nemotron",
|
||||
"qwen-portal",
|
||||
})
|
||||
|
||||
|
|
@ -124,7 +125,6 @@ DEFAULT_CONTEXT_LENGTHS = {
|
|||
"gemini": 1048576,
|
||||
# Gemma (open models served via AI Studio)
|
||||
"gemma-4-31b": 256000,
|
||||
"gemma-4-26b": 256000,
|
||||
"gemma-3": 131072,
|
||||
"gemma": 8192, # fallback for older gemma models
|
||||
# DeepSeek
|
||||
|
|
@ -158,6 +158,8 @@ DEFAULT_CONTEXT_LENGTHS = {
|
|||
"grok": 131072, # catch-all (grok-beta, unknown grok-*)
|
||||
# Kimi
|
||||
"kimi": 262144,
|
||||
# Nemotron — NVIDIA's open-weights series (128K context across all sizes)
|
||||
"nemotron": 131072,
|
||||
# Arcee
|
||||
"trinity": 262144,
|
||||
# OpenRouter
|
||||
|
|
@ -240,6 +242,7 @@ _URL_TO_PROVIDER: Dict[str, str] = {
|
|||
"api.fireworks.ai": "fireworks",
|
||||
"opencode.ai": "opencode-go",
|
||||
"api.x.ai": "xai",
|
||||
"integrate.api.nvidia.com": "nvidia",
|
||||
"api.xiaomimimo.com": "xiaomi",
|
||||
"xiaomimimo.com": "xiaomi",
|
||||
"ollama.com": "ollama-cloud",
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ model:
|
|||
# "minimax" - MiniMax global (requires: MINIMAX_API_KEY)
|
||||
# "minimax-cn" - MiniMax China (requires: MINIMAX_CN_API_KEY)
|
||||
# "huggingface" - Hugging Face Inference (requires: HF_TOKEN)
|
||||
# "nvidia" - NVIDIA NIM / build.nvidia.com (requires: NVIDIA_API_KEY)
|
||||
# "xiaomi" - Xiaomi MiMo (requires: XIAOMI_API_KEY)
|
||||
# "arcee" - Arcee AI Trinity models (requires: ARCEEAI_API_KEY)
|
||||
# "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings)
|
||||
|
|
|
|||
|
|
@ -65,7 +65,15 @@ _HOME_TARGET_ENV_VARS = {
|
|||
"wecom": "WECOM_HOME_CHANNEL",
|
||||
"weixin": "WEIXIN_HOME_CHANNEL",
|
||||
"bluebubbles": "BLUEBUBBLES_HOME_CHANNEL",
|
||||
"qqbot": "QQ_HOME_CHANNEL",
|
||||
"qqbot": "QQBOT_HOME_CHANNEL",
|
||||
}
|
||||
|
||||
# Legacy env var names kept for back-compat. Each entry is the current
|
||||
# primary env var → the previous name. _get_home_target_chat_id falls
|
||||
# back to the legacy name if the primary is unset, so users who set the
|
||||
# old name before the rename keep working until they migrate.
|
||||
_LEGACY_HOME_TARGET_ENV_VARS = {
|
||||
"QQBOT_HOME_CHANNEL": "QQ_HOME_CHANNEL",
|
||||
}
|
||||
|
||||
from cron.jobs import get_due_jobs, mark_job_run, save_job_output, advance_next_run
|
||||
|
|
@ -100,7 +108,12 @@ def _get_home_target_chat_id(platform_name: str) -> str:
|
|||
env_var = _HOME_TARGET_ENV_VARS.get(platform_name.lower())
|
||||
if not env_var:
|
||||
return ""
|
||||
return os.getenv(env_var, "")
|
||||
value = os.getenv(env_var, "")
|
||||
if not value:
|
||||
legacy = _LEGACY_HOME_TARGET_ENV_VARS.get(env_var)
|
||||
if legacy:
|
||||
value = os.getenv(legacy, "")
|
||||
return value
|
||||
|
||||
|
||||
def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[dict]:
|
||||
|
|
|
|||
|
|
@ -258,6 +258,13 @@ class GatewayConfig:
|
|||
# Streaming configuration
|
||||
streaming: StreamingConfig = field(default_factory=StreamingConfig)
|
||||
|
||||
# Session store pruning: drop SessionEntry records older than this many
|
||||
# days from the in-memory dict and sessions.json. Keeps the store from
|
||||
# growing unbounded in gateways serving many chats/threads/users over
|
||||
# months. Pruning is invisible to users — if they resume, they get a
|
||||
# fresh session exactly as if the reset policy had fired. 0 = disabled.
|
||||
session_store_max_age_days: int = 90
|
||||
|
||||
def get_connected_platforms(self) -> List[Platform]:
|
||||
"""Return list of platforms that are enabled and configured."""
|
||||
connected = []
|
||||
|
|
@ -365,6 +372,7 @@ class GatewayConfig:
|
|||
"thread_sessions_per_user": self.thread_sessions_per_user,
|
||||
"unauthorized_dm_behavior": self.unauthorized_dm_behavior,
|
||||
"streaming": self.streaming.to_dict(),
|
||||
"session_store_max_age_days": self.session_store_max_age_days,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
|
|
@ -412,6 +420,13 @@ class GatewayConfig:
|
|||
"pair",
|
||||
)
|
||||
|
||||
try:
|
||||
session_store_max_age_days = int(data.get("session_store_max_age_days", 90))
|
||||
if session_store_max_age_days < 0:
|
||||
session_store_max_age_days = 0
|
||||
except (TypeError, ValueError):
|
||||
session_store_max_age_days = 90
|
||||
|
||||
return cls(
|
||||
platforms=platforms,
|
||||
default_reset_policy=default_policy,
|
||||
|
|
@ -426,6 +441,7 @@ class GatewayConfig:
|
|||
thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False),
|
||||
unauthorized_dm_behavior=unauthorized_dm_behavior,
|
||||
streaming=StreamingConfig.from_dict(data.get("streaming", {})),
|
||||
session_store_max_age_days=session_store_max_age_days,
|
||||
)
|
||||
|
||||
def get_unauthorized_dm_behavior(self, platform: Optional[Platform] = None) -> str:
|
||||
|
|
@ -1213,12 +1229,24 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
|||
qq_group_allowed = os.getenv("QQ_GROUP_ALLOWED_USERS", "").strip()
|
||||
if qq_group_allowed:
|
||||
extra["group_allow_from"] = qq_group_allowed
|
||||
qq_home = os.getenv("QQ_HOME_CHANNEL", "").strip()
|
||||
qq_home = os.getenv("QQBOT_HOME_CHANNEL", "").strip()
|
||||
qq_home_name_env = "QQBOT_HOME_CHANNEL_NAME"
|
||||
if not qq_home:
|
||||
# Back-compat: accept the pre-rename name and log a one-time warning.
|
||||
legacy_home = os.getenv("QQ_HOME_CHANNEL", "").strip()
|
||||
if legacy_home:
|
||||
qq_home = legacy_home
|
||||
qq_home_name_env = "QQ_HOME_CHANNEL_NAME"
|
||||
import logging
|
||||
logging.getLogger(__name__).warning(
|
||||
"QQ_HOME_CHANNEL is deprecated; rename to QQBOT_HOME_CHANNEL "
|
||||
"in your .env for consistency with the platform key."
|
||||
)
|
||||
if qq_home:
|
||||
config.platforms[Platform.QQBOT].home_channel = HomeChannel(
|
||||
platform=Platform.QQBOT,
|
||||
chat_id=qq_home,
|
||||
name=os.getenv("QQ_HOME_CHANNEL_NAME", "Home"),
|
||||
name=os.getenv("QQBOT_HOME_CHANNEL_NAME") or os.getenv(qq_home_name_env, "Home"),
|
||||
)
|
||||
|
||||
# Session settings
|
||||
|
|
|
|||
57
gateway/platforms/qqbot/__init__.py
Normal file
57
gateway/platforms/qqbot/__init__.py
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
"""
|
||||
QQBot platform package.
|
||||
|
||||
Re-exports the main adapter symbols from ``adapter.py`` (the original
|
||||
``qqbot.py``) so that **all existing import paths remain unchanged**::
|
||||
|
||||
from gateway.platforms.qqbot import QQAdapter # works
|
||||
from gateway.platforms.qqbot import check_qq_requirements # works
|
||||
|
||||
New modules:
|
||||
- ``constants`` — shared constants (API URLs, timeouts, message types)
|
||||
- ``utils`` — User-Agent builder, config helpers
|
||||
- ``crypto`` — AES-256-GCM key generation and decryption
|
||||
- ``onboard`` — QR-code scan-to-configure flow
|
||||
"""
|
||||
|
||||
# -- Adapter (original qqbot.py) ------------------------------------------
|
||||
from .adapter import ( # noqa: F401
|
||||
QQAdapter,
|
||||
QQCloseError,
|
||||
check_qq_requirements,
|
||||
_coerce_list,
|
||||
_ssrf_redirect_guard,
|
||||
)
|
||||
|
||||
# -- Onboard (QR-code scan-to-configure) -----------------------------------
|
||||
from .onboard import ( # noqa: F401
|
||||
BindStatus,
|
||||
create_bind_task,
|
||||
poll_bind_result,
|
||||
build_connect_url,
|
||||
)
|
||||
from .crypto import decrypt_secret, generate_bind_key # noqa: F401
|
||||
|
||||
# -- Utils -----------------------------------------------------------------
|
||||
from .utils import build_user_agent, get_api_headers, coerce_list # noqa: F401
|
||||
|
||||
__all__ = [
|
||||
# adapter
|
||||
"QQAdapter",
|
||||
"QQCloseError",
|
||||
"check_qq_requirements",
|
||||
"_coerce_list",
|
||||
"_ssrf_redirect_guard",
|
||||
# onboard
|
||||
"BindStatus",
|
||||
"create_bind_task",
|
||||
"poll_bind_result",
|
||||
"build_connect_url",
|
||||
# crypto
|
||||
"decrypt_secret",
|
||||
"generate_bind_key",
|
||||
# utils
|
||||
"build_user_agent",
|
||||
"get_api_headers",
|
||||
"coerce_list",
|
||||
]
|
||||
File diff suppressed because it is too large
Load diff
74
gateway/platforms/qqbot/constants.py
Normal file
74
gateway/platforms/qqbot/constants.py
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
"""QQBot package-level constants shared across adapter, onboard, and other modules."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# QQBot adapter version — bump on functional changes to the adapter package.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
QQBOT_VERSION = "1.1.0"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# API endpoints
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# The portal domain is configurable via QQ_API_HOST for corporate proxies
|
||||
# or test environments. Default: q.qq.com (production).
|
||||
PORTAL_HOST = os.getenv("QQ_PORTAL_HOST", "q.qq.com")
|
||||
|
||||
API_BASE = "https://api.sgroup.qq.com"
|
||||
TOKEN_URL = "https://bots.qq.com/app/getAppAccessToken"
|
||||
GATEWAY_URL_PATH = "/gateway"
|
||||
|
||||
# QR-code onboard endpoints (on the portal host)
|
||||
ONBOARD_CREATE_PATH = "/lite/create_bind_task"
|
||||
ONBOARD_POLL_PATH = "/lite/poll_bind_result"
|
||||
QR_URL_TEMPLATE = (
|
||||
"https://q.qq.com/qqbot/openclaw/connect.html"
|
||||
"?task_id={task_id}&_wv=2&source=hermes"
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Timeouts & retry
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
DEFAULT_API_TIMEOUT = 30.0
|
||||
FILE_UPLOAD_TIMEOUT = 120.0
|
||||
CONNECT_TIMEOUT_SECONDS = 20.0
|
||||
|
||||
RECONNECT_BACKOFF = [2, 5, 10, 30, 60]
|
||||
MAX_RECONNECT_ATTEMPTS = 100
|
||||
RATE_LIMIT_DELAY = 60 # seconds
|
||||
QUICK_DISCONNECT_THRESHOLD = 5.0 # seconds
|
||||
MAX_QUICK_DISCONNECT_COUNT = 3
|
||||
|
||||
ONBOARD_POLL_INTERVAL = 2.0 # seconds between poll_bind_result calls
|
||||
ONBOARD_API_TIMEOUT = 10.0
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Message limits
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
MAX_MESSAGE_LENGTH = 4000
|
||||
DEDUP_WINDOW_SECONDS = 300
|
||||
DEDUP_MAX_SIZE = 1000
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# QQ Bot message types
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
MSG_TYPE_TEXT = 0
|
||||
MSG_TYPE_MARKDOWN = 2
|
||||
MSG_TYPE_MEDIA = 7
|
||||
MSG_TYPE_INPUT_NOTIFY = 6
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# QQ Bot file media types
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
MEDIA_TYPE_IMAGE = 1
|
||||
MEDIA_TYPE_VIDEO = 2
|
||||
MEDIA_TYPE_VOICE = 3
|
||||
MEDIA_TYPE_FILE = 4
|
||||
45
gateway/platforms/qqbot/crypto.py
Normal file
45
gateway/platforms/qqbot/crypto.py
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
"""AES-256-GCM utilities for QQBot scan-to-configure credential decryption."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import os
|
||||
|
||||
|
||||
def generate_bind_key() -> str:
|
||||
"""Generate a 256-bit random AES key and return it as base64.
|
||||
|
||||
The key is passed to ``create_bind_task`` so the server can encrypt
|
||||
the bot's *client_secret* before returning it. Only this CLI holds
|
||||
the key, ensuring the secret never travels in plaintext.
|
||||
"""
|
||||
return base64.b64encode(os.urandom(32)).decode()
|
||||
|
||||
|
||||
def decrypt_secret(encrypted_base64: str, key_base64: str) -> str:
|
||||
"""Decrypt a base64-encoded AES-256-GCM ciphertext.
|
||||
|
||||
Ciphertext layout (after base64-decoding)::
|
||||
|
||||
IV (12 bytes) ‖ ciphertext (N bytes) ‖ AuthTag (16 bytes)
|
||||
|
||||
Args:
|
||||
encrypted_base64: The ``bot_encrypt_secret`` value from
|
||||
``poll_bind_result``.
|
||||
key_base64: The base64 AES key generated by
|
||||
:func:`generate_bind_key`.
|
||||
|
||||
Returns:
|
||||
The decrypted *client_secret* as a UTF-8 string.
|
||||
"""
|
||||
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
|
||||
|
||||
key = base64.b64decode(key_base64)
|
||||
raw = base64.b64decode(encrypted_base64)
|
||||
|
||||
iv = raw[:12]
|
||||
ciphertext_with_tag = raw[12:] # AESGCM expects ciphertext + tag concatenated
|
||||
|
||||
aesgcm = AESGCM(key)
|
||||
plaintext = aesgcm.decrypt(iv, ciphertext_with_tag, None)
|
||||
return plaintext.decode("utf-8")
|
||||
124
gateway/platforms/qqbot/onboard.py
Normal file
124
gateway/platforms/qqbot/onboard.py
Normal file
|
|
@ -0,0 +1,124 @@
|
|||
"""
|
||||
QQBot scan-to-configure (QR code onboard) module.
|
||||
|
||||
Calls the ``q.qq.com`` ``create_bind_task`` / ``poll_bind_result`` APIs to
|
||||
generate a QR-code URL and poll for scan completion. On success the caller
|
||||
receives the bot's *app_id*, *client_secret* (decrypted locally), and the
|
||||
scanner's *user_openid* — enough to fully configure the QQBot gateway.
|
||||
|
||||
Reference: https://bot.q.qq.com/wiki/develop/api-v2/
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from enum import IntEnum
|
||||
from typing import Tuple
|
||||
from urllib.parse import quote
|
||||
|
||||
from .constants import (
|
||||
ONBOARD_API_TIMEOUT,
|
||||
ONBOARD_CREATE_PATH,
|
||||
ONBOARD_POLL_PATH,
|
||||
PORTAL_HOST,
|
||||
QR_URL_TEMPLATE,
|
||||
)
|
||||
from .crypto import generate_bind_key
|
||||
from .utils import get_api_headers
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bind status
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class BindStatus(IntEnum):
|
||||
"""Status codes returned by ``poll_bind_result``."""
|
||||
|
||||
NONE = 0
|
||||
PENDING = 1
|
||||
COMPLETED = 2
|
||||
EXPIRED = 3
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def create_bind_task(
|
||||
timeout: float = ONBOARD_API_TIMEOUT,
|
||||
) -> Tuple[str, str]:
|
||||
"""Create a bind task and return *(task_id, aes_key_base64)*.
|
||||
|
||||
The AES key is generated locally and sent to the server so it can
|
||||
encrypt the bot credentials before returning them.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the API returns a non-zero ``retcode``.
|
||||
"""
|
||||
import httpx
|
||||
|
||||
url = f"https://{PORTAL_HOST}{ONBOARD_CREATE_PATH}"
|
||||
key = generate_bind_key()
|
||||
|
||||
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
|
||||
resp = await client.post(url, json={"key": key}, headers=get_api_headers())
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
if data.get("retcode") != 0:
|
||||
raise RuntimeError(data.get("msg", "create_bind_task failed"))
|
||||
|
||||
task_id = data.get("data", {}).get("task_id")
|
||||
if not task_id:
|
||||
raise RuntimeError("create_bind_task: missing task_id in response")
|
||||
|
||||
logger.debug("create_bind_task ok: task_id=%s", task_id)
|
||||
return task_id, key
|
||||
|
||||
|
||||
async def poll_bind_result(
|
||||
task_id: str,
|
||||
timeout: float = ONBOARD_API_TIMEOUT,
|
||||
) -> Tuple[BindStatus, str, str, str]:
|
||||
"""Poll the bind result for *task_id*.
|
||||
|
||||
Returns:
|
||||
A 4-tuple of ``(status, bot_appid, bot_encrypt_secret, user_openid)``.
|
||||
|
||||
* ``bot_encrypt_secret`` is AES-256-GCM encrypted — decrypt it with
|
||||
:func:`~gateway.platforms.qqbot.crypto.decrypt_secret` using the
|
||||
key from :func:`create_bind_task`.
|
||||
* ``user_openid`` is the OpenID of the person who scanned the code
|
||||
(available when ``status == COMPLETED``).
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the API returns a non-zero ``retcode``.
|
||||
"""
|
||||
import httpx
|
||||
|
||||
url = f"https://{PORTAL_HOST}{ONBOARD_POLL_PATH}"
|
||||
|
||||
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
|
||||
resp = await client.post(url, json={"task_id": task_id}, headers=get_api_headers())
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
if data.get("retcode") != 0:
|
||||
raise RuntimeError(data.get("msg", "poll_bind_result failed"))
|
||||
|
||||
d = data.get("data", {})
|
||||
return (
|
||||
BindStatus(d.get("status", 0)),
|
||||
str(d.get("bot_appid", "")),
|
||||
d.get("bot_encrypt_secret", ""),
|
||||
d.get("user_openid", ""),
|
||||
)
|
||||
|
||||
|
||||
def build_connect_url(task_id: str) -> str:
|
||||
"""Build the QR-code target URL for a given *task_id*."""
|
||||
return QR_URL_TEMPLATE.format(task_id=quote(task_id))
|
||||
71
gateway/platforms/qqbot/utils.py
Normal file
71
gateway/platforms/qqbot/utils.py
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
"""QQBot shared utilities — User-Agent, HTTP helpers, config coercion."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import platform
|
||||
import sys
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from .constants import QQBOT_VERSION
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# User-Agent
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _get_hermes_version() -> str:
|
||||
"""Return the hermes-agent package version, or 'dev' if unavailable."""
|
||||
try:
|
||||
from importlib.metadata import version
|
||||
return version("hermes-agent")
|
||||
except Exception:
|
||||
return "dev"
|
||||
|
||||
|
||||
def build_user_agent() -> str:
|
||||
"""Build a descriptive User-Agent string.
|
||||
|
||||
Format::
|
||||
|
||||
QQBotAdapter/<qqbot_version> (Python/<py_version>; <os>; Hermes/<hermes_version>)
|
||||
|
||||
Example::
|
||||
|
||||
QQBotAdapter/1.0.0 (Python/3.11.15; darwin; Hermes/0.9.0)
|
||||
"""
|
||||
py_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
|
||||
os_name = platform.system().lower()
|
||||
hermes_version = _get_hermes_version()
|
||||
return f"QQBotAdapter/{QQBOT_VERSION} (Python/{py_version}; {os_name}; Hermes/{hermes_version})"
|
||||
|
||||
|
||||
def get_api_headers() -> Dict[str, str]:
|
||||
"""Return standard HTTP headers for QQBot API requests.
|
||||
|
||||
Includes ``Content-Type``, ``Accept``, and a dynamic ``User-Agent``.
|
||||
``q.qq.com`` requires ``Accept: application/json`` — without it,
|
||||
the server returns a JavaScript anti-bot challenge page.
|
||||
"""
|
||||
return {
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
"User-Agent": build_user_agent(),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def coerce_list(value: Any) -> List[str]:
|
||||
"""Coerce config values into a trimmed string list.
|
||||
|
||||
Accepts comma-separated strings, lists, tuples, sets, or single values.
|
||||
"""
|
||||
if value is None:
|
||||
return []
|
||||
if isinstance(value, str):
|
||||
return [item.strip() for item in value.split(",") if item.strip()]
|
||||
if isinstance(value, (list, tuple, set)):
|
||||
return [str(item).strip() for item in value if str(item).strip()]
|
||||
return [str(value).strip()] if str(value).strip() else []
|
||||
|
|
@ -118,6 +118,84 @@ def _strip_mdv2(text: str) -> str:
|
|||
return cleaned
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Markdown table → code block conversion
|
||||
# ---------------------------------------------------------------------------
|
||||
# Telegram's MarkdownV2 has no table syntax — '|' is just an escaped literal,
|
||||
# so pipe tables render as noisy backslash-pipe text with no alignment.
|
||||
# Wrapping the table in a fenced code block makes Telegram render it as
|
||||
# monospace preformatted text with columns intact.
|
||||
|
||||
# Matches a GFM table delimiter row: optional outer pipes, cells containing
|
||||
# only dashes (with optional leading/trailing colons for alignment) separated
|
||||
# by '|'. Requires at least one internal '|' so lone '---' horizontal rules
|
||||
# are NOT matched.
|
||||
_TABLE_SEPARATOR_RE = re.compile(
|
||||
r'^\s*\|?\s*:?-+:?\s*(?:\|\s*:?-+:?\s*){1,}\|?\s*$'
|
||||
)
|
||||
|
||||
|
||||
def _is_table_row(line: str) -> bool:
|
||||
"""Return True if *line* could plausibly be a table data row."""
|
||||
stripped = line.strip()
|
||||
return bool(stripped) and '|' in stripped
|
||||
|
||||
|
||||
def _wrap_markdown_tables(text: str) -> str:
|
||||
"""Wrap GFM-style pipe tables in ``` fences so Telegram renders them.
|
||||
|
||||
Detected by a row containing '|' immediately followed by a delimiter
|
||||
row matching :data:`_TABLE_SEPARATOR_RE`. Subsequent pipe-containing
|
||||
non-blank lines are consumed as the table body and included in the
|
||||
wrapped block. Tables inside existing fenced code blocks are left
|
||||
alone.
|
||||
"""
|
||||
if '|' not in text or '-' not in text:
|
||||
return text
|
||||
|
||||
lines = text.split('\n')
|
||||
out: list[str] = []
|
||||
in_fence = False
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
line = lines[i]
|
||||
stripped = line.lstrip()
|
||||
|
||||
# Track existing fenced code blocks — never touch content inside.
|
||||
if stripped.startswith('```'):
|
||||
in_fence = not in_fence
|
||||
out.append(line)
|
||||
i += 1
|
||||
continue
|
||||
if in_fence:
|
||||
out.append(line)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Look for a header row (contains '|') immediately followed by a
|
||||
# delimiter row.
|
||||
if (
|
||||
'|' in line
|
||||
and i + 1 < len(lines)
|
||||
and _TABLE_SEPARATOR_RE.match(lines[i + 1])
|
||||
):
|
||||
table_block = [line, lines[i + 1]]
|
||||
j = i + 2
|
||||
while j < len(lines) and _is_table_row(lines[j]):
|
||||
table_block.append(lines[j])
|
||||
j += 1
|
||||
out.append('```')
|
||||
out.extend(table_block)
|
||||
out.append('```')
|
||||
i = j
|
||||
continue
|
||||
|
||||
out.append(line)
|
||||
i += 1
|
||||
|
||||
return '\n'.join(out)
|
||||
|
||||
|
||||
class TelegramAdapter(BasePlatformAdapter):
|
||||
"""
|
||||
Telegram bot adapter.
|
||||
|
|
@ -1916,6 +1994,12 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
|
||||
text = content
|
||||
|
||||
# 0) Pre-wrap GFM-style pipe tables in ``` fences. Telegram can't
|
||||
# render tables natively, but fenced code blocks render as
|
||||
# monospace preformatted text with columns intact. The wrapped
|
||||
# tables then flow through step (1) below as protected regions.
|
||||
text = _wrap_markdown_tables(text)
|
||||
|
||||
# 1) Protect fenced code blocks (``` ... ```)
|
||||
# Per MarkdownV2 spec, \ and ` inside pre/code must be escaped.
|
||||
def _protect_fenced(m):
|
||||
|
|
|
|||
|
|
@ -2178,6 +2178,30 @@ class GatewayRunner:
|
|||
)
|
||||
except Exception as _e:
|
||||
logger.debug("Idle agent sweep failed: %s", _e)
|
||||
|
||||
# Periodically prune stale SessionStore entries. The
|
||||
# in-memory dict (and sessions.json) would otherwise grow
|
||||
# unbounded in gateways serving many rotating chats /
|
||||
# threads / users over long time windows. Pruning is
|
||||
# invisible to users — a resumed session just gets a
|
||||
# fresh session_id, exactly as if the reset policy fired.
|
||||
_last_prune_ts = getattr(self, "_last_session_store_prune_ts", 0.0)
|
||||
_prune_interval = 3600.0 # once per hour
|
||||
if time.time() - _last_prune_ts > _prune_interval:
|
||||
try:
|
||||
_max_age = int(
|
||||
getattr(self.config, "session_store_max_age_days", 0) or 0
|
||||
)
|
||||
if _max_age > 0:
|
||||
_pruned = self.session_store.prune_old_entries(_max_age)
|
||||
if _pruned:
|
||||
logger.info(
|
||||
"SessionStore prune: dropped %d stale entries",
|
||||
_pruned,
|
||||
)
|
||||
except Exception as _e:
|
||||
logger.debug("SessionStore prune failed: %s", _e)
|
||||
self._last_session_store_prune_ts = time.time()
|
||||
except Exception as e:
|
||||
logger.debug("Session expiry watcher error: %s", e)
|
||||
# Sleep in small increments so we can stop quickly
|
||||
|
|
@ -2384,6 +2408,7 @@ class GatewayRunner:
|
|||
|
||||
self.adapters.clear()
|
||||
self._running_agents.clear()
|
||||
self._running_agents_ts.clear()
|
||||
self._pending_messages.clear()
|
||||
self._pending_approvals.clear()
|
||||
if hasattr(self, '_busy_ack_ts'):
|
||||
|
|
@ -2408,6 +2433,20 @@ class GatewayRunner:
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
# Close SQLite session DBs so the WAL write lock is released.
|
||||
# Without this, --replace and similar restart flows leave the
|
||||
# old gateway's connection holding the WAL lock until Python
|
||||
# actually exits — causing 'database is locked' errors when
|
||||
# the new gateway tries to open the same file.
|
||||
for _db_holder in (self, getattr(self, "session_store", None)):
|
||||
_db = getattr(_db_holder, "_db", None) if _db_holder else None
|
||||
if _db is None or not hasattr(_db, "close"):
|
||||
continue
|
||||
try:
|
||||
_db.close()
|
||||
except Exception as _e:
|
||||
logger.debug("SessionDB close error: %s", _e)
|
||||
|
||||
from gateway.status import remove_pid_file
|
||||
remove_pid_file()
|
||||
|
||||
|
|
@ -2906,9 +2945,7 @@ class GatewayRunner:
|
|||
_quick_key[:30], _stale_age, _stale_idle,
|
||||
_raw_stale_timeout, _stale_detail,
|
||||
)
|
||||
del self._running_agents[_quick_key]
|
||||
self._running_agents_ts.pop(_quick_key, None)
|
||||
self._busy_ack_ts.pop(_quick_key, None)
|
||||
self._release_running_agent_state(_quick_key)
|
||||
|
||||
if _quick_key in self._running_agents:
|
||||
if event.get_command() == "status":
|
||||
|
|
@ -2936,8 +2973,7 @@ class GatewayRunner:
|
|||
if adapter and hasattr(adapter, 'get_pending_message'):
|
||||
adapter.get_pending_message(_quick_key) # consume and discard
|
||||
self._pending_messages.pop(_quick_key, None)
|
||||
if _quick_key in self._running_agents:
|
||||
del self._running_agents[_quick_key]
|
||||
self._release_running_agent_state(_quick_key)
|
||||
logger.info("STOP for session %s — agent interrupted, session lock released", _quick_key[:20])
|
||||
return "⚡ Stopped. You can continue this session."
|
||||
|
||||
|
|
@ -2959,8 +2995,7 @@ class GatewayRunner:
|
|||
self._pending_messages.pop(_quick_key, None)
|
||||
# Clean up the running agent entry so the reset handler
|
||||
# doesn't think an agent is still active.
|
||||
if _quick_key in self._running_agents:
|
||||
del self._running_agents[_quick_key]
|
||||
self._release_running_agent_state(_quick_key)
|
||||
return await self._handle_reset_command(event)
|
||||
|
||||
# /queue <prompt> — queue without interrupting
|
||||
|
|
@ -3041,8 +3076,7 @@ class GatewayRunner:
|
|||
# Agent is being set up but not ready yet.
|
||||
if event.get_command() == "stop":
|
||||
# Force-clean the sentinel so the session is unlocked.
|
||||
if _quick_key in self._running_agents:
|
||||
del self._running_agents[_quick_key]
|
||||
self._release_running_agent_state(_quick_key)
|
||||
logger.info("HARD STOP (pending) for session %s — sentinel cleared", _quick_key[:20])
|
||||
return "⚡ Force-stopped. The agent was still starting — session unlocked."
|
||||
# Queue the message so it will be picked up after the
|
||||
|
|
@ -3361,8 +3395,13 @@ class GatewayRunner:
|
|||
# (exception, command fallthrough, etc.) the sentinel must
|
||||
# not linger or the session would be permanently locked out.
|
||||
if self._running_agents.get(_quick_key) is _AGENT_PENDING_SENTINEL:
|
||||
del self._running_agents[_quick_key]
|
||||
self._running_agents_ts.pop(_quick_key, None)
|
||||
self._release_running_agent_state(_quick_key)
|
||||
else:
|
||||
# Agent path already cleaned _running_agents; make sure
|
||||
# the paired metadata dicts are gone too.
|
||||
self._running_agents_ts.pop(_quick_key, None)
|
||||
if hasattr(self, "_busy_ack_ts"):
|
||||
self._busy_ack_ts.pop(_quick_key, None)
|
||||
|
||||
async def _prepare_inbound_message_text(
|
||||
self,
|
||||
|
|
@ -4668,16 +4707,14 @@ class GatewayRunner:
|
|||
agent = self._running_agents.get(session_key)
|
||||
if agent is _AGENT_PENDING_SENTINEL:
|
||||
# Force-clean the sentinel so the session is unlocked.
|
||||
if session_key in self._running_agents:
|
||||
del self._running_agents[session_key]
|
||||
self._release_running_agent_state(session_key)
|
||||
logger.info("STOP (pending) for session %s — sentinel cleared", session_key[:20])
|
||||
return "⚡ Stopped. The agent hadn't started yet — you can continue this session."
|
||||
if agent:
|
||||
agent.interrupt("Stop requested")
|
||||
# Force-clean the session lock so a truly hung agent doesn't
|
||||
# keep it locked forever.
|
||||
if session_key in self._running_agents:
|
||||
del self._running_agents[session_key]
|
||||
self._release_running_agent_state(session_key)
|
||||
return "⚡ Stopped. You can continue this session."
|
||||
else:
|
||||
return "No active task to stop."
|
||||
|
|
@ -6593,8 +6630,7 @@ class GatewayRunner:
|
|||
logger.debug("Memory flush on resume failed: %s", e)
|
||||
|
||||
# Clear any running agent for this session key
|
||||
if session_key in self._running_agents:
|
||||
del self._running_agents[session_key]
|
||||
self._release_running_agent_state(session_key)
|
||||
|
||||
# Switch the session entry to point at the old session
|
||||
new_entry = self.session_store.switch_session(session_key, target_id)
|
||||
|
|
@ -8010,6 +8046,30 @@ class GatewayRunner:
|
|||
override = self._session_model_overrides.get(session_key)
|
||||
return override is not None and override.get("model") == agent_model
|
||||
|
||||
def _release_running_agent_state(self, session_key: str) -> None:
|
||||
"""Pop ALL per-running-agent state entries for ``session_key``.
|
||||
|
||||
Replaces ad-hoc ``del self._running_agents[key]`` calls scattered
|
||||
across the gateway. Those sites had drifted: some popped only
|
||||
``_running_agents``; some also ``_running_agents_ts``; only one
|
||||
path also cleared ``_busy_ack_ts``. Each missed entry was a
|
||||
small, persistent leak — a (str_key → float) tuple per session
|
||||
per gateway lifetime.
|
||||
|
||||
Use this at every site that ends a running turn, regardless of
|
||||
cause (normal completion, /stop, /reset, /resume, sentinel
|
||||
cleanup, stale-eviction). Per-session state that PERSISTS
|
||||
across turns (``_session_model_overrides``, ``_voice_mode``,
|
||||
``_pending_approvals``, ``_update_prompt_pending``) is NOT
|
||||
touched here — those have their own lifecycles.
|
||||
"""
|
||||
if not session_key:
|
||||
return
|
||||
self._running_agents.pop(session_key, None)
|
||||
self._running_agents_ts.pop(session_key, None)
|
||||
if hasattr(self, "_busy_ack_ts"):
|
||||
self._busy_ack_ts.pop(session_key, None)
|
||||
|
||||
def _evict_cached_agent(self, session_key: str) -> None:
|
||||
"""Remove a cached agent for a session (called on /new, /model, etc)."""
|
||||
_lock = getattr(self, "_agent_cache_lock", None)
|
||||
|
|
@ -9845,10 +9905,8 @@ class GatewayRunner:
|
|||
|
||||
# Clean up tracking
|
||||
tracking_task.cancel()
|
||||
if session_key and session_key in self._running_agents:
|
||||
del self._running_agents[session_key]
|
||||
if session_key:
|
||||
self._running_agents_ts.pop(session_key, None)
|
||||
self._release_running_agent_state(session_key)
|
||||
if self._draining:
|
||||
self._update_runtime_status("draining")
|
||||
|
||||
|
|
|
|||
|
|
@ -802,6 +802,57 @@ class SessionStore:
|
|||
return True
|
||||
return False
|
||||
|
||||
def prune_old_entries(self, max_age_days: int) -> int:
|
||||
"""Drop SessionEntry records older than max_age_days.
|
||||
|
||||
Pruning is based on ``updated_at`` (last activity), not ``created_at``.
|
||||
A session that's been active within the window is kept regardless of
|
||||
how old it is. Entries marked ``suspended`` are kept — the user
|
||||
explicitly paused them for later resume. Entries held by an active
|
||||
process (via has_active_processes_fn) are also kept so long-running
|
||||
background work isn't orphaned.
|
||||
|
||||
Pruning is functionally identical to a natural reset-policy expiry:
|
||||
the transcript in SQLite stays, but the session_key → session_id
|
||||
mapping is dropped and the user starts a fresh session on return.
|
||||
|
||||
``max_age_days <= 0`` disables pruning; returns 0 immediately.
|
||||
Returns the number of entries removed.
|
||||
"""
|
||||
if max_age_days is None or max_age_days <= 0:
|
||||
return 0
|
||||
from datetime import timedelta
|
||||
|
||||
cutoff = _now() - timedelta(days=max_age_days)
|
||||
removed_keys: list[str] = []
|
||||
|
||||
with self._lock:
|
||||
self._ensure_loaded_locked()
|
||||
for key, entry in list(self._entries.items()):
|
||||
if entry.suspended:
|
||||
continue
|
||||
# Never prune sessions with an active background process
|
||||
# attached — the user may still be waiting on output.
|
||||
if self._has_active_processes_fn is not None:
|
||||
try:
|
||||
if self._has_active_processes_fn(entry.session_id):
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
if entry.updated_at < cutoff:
|
||||
removed_keys.append(key)
|
||||
for key in removed_keys:
|
||||
self._entries.pop(key, None)
|
||||
if removed_keys:
|
||||
self._save()
|
||||
|
||||
if removed_keys:
|
||||
logger.info(
|
||||
"SessionStore pruned %d entries older than %d days",
|
||||
len(removed_keys), max_age_days,
|
||||
)
|
||||
return len(removed_keys)
|
||||
|
||||
def suspend_recently_active(self, max_age_seconds: int = 120) -> int:
|
||||
"""Mark recently-active sessions as suspended.
|
||||
|
||||
|
|
|
|||
|
|
@ -233,6 +233,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
|||
api_key_env_vars=("XAI_API_KEY",),
|
||||
base_url_env_var="XAI_BASE_URL",
|
||||
),
|
||||
"nvidia": ProviderConfig(
|
||||
id="nvidia",
|
||||
name="NVIDIA NIM",
|
||||
auth_type="api_key",
|
||||
inference_base_url="https://integrate.api.nvidia.com/v1",
|
||||
api_key_env_vars=("NVIDIA_API_KEY",),
|
||||
base_url_env_var="NVIDIA_BASE_URL",
|
||||
),
|
||||
"ai-gateway": ProviderConfig(
|
||||
id="ai-gateway",
|
||||
name="Vercel AI Gateway",
|
||||
|
|
|
|||
|
|
@ -44,7 +44,8 @@ _EXTRA_ENV_KEYS = frozenset({
|
|||
"WEIXIN_HOME_CHANNEL", "WEIXIN_HOME_CHANNEL_NAME", "WEIXIN_DM_POLICY", "WEIXIN_GROUP_POLICY",
|
||||
"WEIXIN_ALLOWED_USERS", "WEIXIN_GROUP_ALLOWED_USERS", "WEIXIN_ALLOW_ALL_USERS",
|
||||
"BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_PASSWORD",
|
||||
"QQ_APP_ID", "QQ_CLIENT_SECRET", "QQ_HOME_CHANNEL", "QQ_HOME_CHANNEL_NAME",
|
||||
"QQ_APP_ID", "QQ_CLIENT_SECRET", "QQBOT_HOME_CHANNEL", "QQBOT_HOME_CHANNEL_NAME",
|
||||
"QQ_HOME_CHANNEL", "QQ_HOME_CHANNEL_NAME", # legacy aliases (pre-rename, still read for back-compat)
|
||||
"QQ_ALLOWED_USERS", "QQ_GROUP_ALLOWED_USERS", "QQ_ALLOW_ALL_USERS", "QQ_MARKDOWN_SUPPORT",
|
||||
"QQ_STT_API_KEY", "QQ_STT_BASE_URL", "QQ_STT_MODEL",
|
||||
"TERMINAL_ENV", "TERMINAL_SSH_KEY", "TERMINAL_SSH_PORT",
|
||||
|
|
@ -861,6 +862,22 @@ OPTIONAL_ENV_VARS = {
|
|||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"NVIDIA_API_KEY": {
|
||||
"description": "NVIDIA NIM API key (build.nvidia.com or local NIM endpoint)",
|
||||
"prompt": "NVIDIA NIM API key",
|
||||
"url": "https://build.nvidia.com/",
|
||||
"password": True,
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"NVIDIA_BASE_URL": {
|
||||
"description": "NVIDIA NIM base URL override (e.g. http://localhost:8000/v1 for local NIM)",
|
||||
"prompt": "NVIDIA NIM base URL (leave empty for default)",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"GLM_API_KEY": {
|
||||
"description": "Z.AI / GLM API key (also recognized as ZAI_API_KEY / Z_AI_API_KEY)",
|
||||
"prompt": "Z.AI / GLM API key",
|
||||
|
|
@ -1518,12 +1535,12 @@ OPTIONAL_ENV_VARS = {
|
|||
"prompt": "Allow All QQ Users",
|
||||
"category": "messaging",
|
||||
},
|
||||
"QQ_HOME_CHANNEL": {
|
||||
"QQBOT_HOME_CHANNEL": {
|
||||
"description": "Default QQ channel/group for cron delivery and notifications",
|
||||
"prompt": "QQ Home Channel",
|
||||
"category": "messaging",
|
||||
},
|
||||
"QQ_HOME_CHANNEL_NAME": {
|
||||
"QQBOT_HOME_CHANNEL_NAME": {
|
||||
"description": "Display name for the QQ home channel",
|
||||
"prompt": "QQ Home Channel Name",
|
||||
"category": "messaging",
|
||||
|
|
|
|||
|
|
@ -825,6 +825,7 @@ def run_doctor(args):
|
|||
("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True),
|
||||
("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True),
|
||||
("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
|
||||
("NVIDIA NIM", ("NVIDIA_API_KEY",), "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
|
||||
("Alibaba/DashScope", ("DASHSCOPE_API_KEY",), "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
|
||||
# MiniMax: the /anthropic endpoint doesn't support /models, but the /v1 endpoint does.
|
||||
("MiniMax", ("MINIMAX_API_KEY",), "https://api.minimax.io/v1/models", "MINIMAX_BASE_URL", True),
|
||||
|
|
|
|||
|
|
@ -296,6 +296,7 @@ def run_dump(args):
|
|||
("DEEPSEEK_API_KEY", "deepseek"),
|
||||
("DASHSCOPE_API_KEY", "dashscope"),
|
||||
("HF_TOKEN", "huggingface"),
|
||||
("NVIDIA_API_KEY", "nvidia"),
|
||||
("AI_GATEWAY_API_KEY", "ai_gateway"),
|
||||
("OPENCODE_ZEN_API_KEY", "opencode_zen"),
|
||||
("OPENCODE_GO_API_KEY", "opencode_go"),
|
||||
|
|
|
|||
|
|
@ -1998,7 +1998,7 @@ _PLATFORMS = [
|
|||
{"name": "QQ_ALLOWED_USERS", "prompt": "Allowed user OpenIDs (comma-separated, leave empty for open access)", "password": False,
|
||||
"is_allowlist": True,
|
||||
"help": "Optional — restrict DM access to specific user OpenIDs."},
|
||||
{"name": "QQ_HOME_CHANNEL", "prompt": "Home channel (user/group OpenID for cron delivery, or empty)", "password": False,
|
||||
{"name": "QQBOT_HOME_CHANNEL", "prompt": "Home channel (user/group OpenID for cron delivery, or empty)", "password": False,
|
||||
"help": "OpenID to deliver cron results and notifications to."},
|
||||
],
|
||||
},
|
||||
|
|
@ -2625,6 +2625,215 @@ def _setup_feishu():
|
|||
print_info(f" Bot: {bot_name}")
|
||||
|
||||
|
||||
def _setup_qqbot():
|
||||
"""Interactive setup for QQ Bot — scan-to-configure or manual credentials."""
|
||||
print()
|
||||
print(color(" ─── 🐧 QQ Bot Setup ───", Colors.CYAN))
|
||||
|
||||
existing_app_id = get_env_value("QQ_APP_ID")
|
||||
existing_secret = get_env_value("QQ_CLIENT_SECRET")
|
||||
if existing_app_id and existing_secret:
|
||||
print()
|
||||
print_success("QQ Bot is already configured.")
|
||||
if not prompt_yes_no(" Reconfigure QQ Bot?", False):
|
||||
return
|
||||
|
||||
# ── Choose setup method ──
|
||||
print()
|
||||
method_choices = [
|
||||
"Scan QR code to add bot automatically (recommended)",
|
||||
"Enter existing App ID and App Secret manually",
|
||||
]
|
||||
method_idx = prompt_choice(" How would you like to set up QQ Bot?", method_choices, 0)
|
||||
|
||||
credentials = None
|
||||
used_qr = False
|
||||
|
||||
if method_idx == 0:
|
||||
# ── QR scan-to-configure ──
|
||||
try:
|
||||
credentials = _qqbot_qr_flow()
|
||||
except KeyboardInterrupt:
|
||||
print()
|
||||
print_warning(" QQ Bot setup cancelled.")
|
||||
return
|
||||
if credentials:
|
||||
used_qr = True
|
||||
if not credentials:
|
||||
print_info(" QR setup did not complete. Continuing with manual input.")
|
||||
|
||||
# ── Manual credential input ──
|
||||
if not credentials:
|
||||
print()
|
||||
print_info(" Go to https://q.qq.com to register a QQ Bot application.")
|
||||
print_info(" Note your App ID and App Secret from the application page.")
|
||||
print()
|
||||
app_id = prompt(" App ID", password=False)
|
||||
if not app_id:
|
||||
print_warning(" Skipped — QQ Bot won't work without an App ID.")
|
||||
return
|
||||
app_secret = prompt(" App Secret", password=True)
|
||||
if not app_secret:
|
||||
print_warning(" Skipped — QQ Bot won't work without an App Secret.")
|
||||
return
|
||||
credentials = {"app_id": app_id.strip(), "client_secret": app_secret.strip(), "user_openid": ""}
|
||||
|
||||
# ── Save core credentials ──
|
||||
save_env_value("QQ_APP_ID", credentials["app_id"])
|
||||
save_env_value("QQ_CLIENT_SECRET", credentials["client_secret"])
|
||||
|
||||
user_openid = credentials.get("user_openid", "")
|
||||
|
||||
# ── DM security policy ──
|
||||
print()
|
||||
access_choices = [
|
||||
"Use DM pairing approval (recommended)",
|
||||
"Allow all direct messages",
|
||||
"Only allow listed user OpenIDs",
|
||||
]
|
||||
access_idx = prompt_choice(" How should direct messages be authorized?", access_choices, 0)
|
||||
if access_idx == 0:
|
||||
save_env_value("QQ_ALLOW_ALL_USERS", "false")
|
||||
if user_openid:
|
||||
print()
|
||||
if prompt_yes_no(f" Add yourself ({user_openid}) to the allow list?", True):
|
||||
save_env_value("QQ_ALLOWED_USERS", user_openid)
|
||||
print_success(f" Allow list set to {user_openid}")
|
||||
else:
|
||||
save_env_value("QQ_ALLOWED_USERS", "")
|
||||
else:
|
||||
save_env_value("QQ_ALLOWED_USERS", "")
|
||||
print_success(" DM pairing enabled.")
|
||||
print_info(" Unknown users can request access; approve with `hermes pairing approve`.")
|
||||
elif access_idx == 1:
|
||||
save_env_value("QQ_ALLOW_ALL_USERS", "true")
|
||||
save_env_value("QQ_ALLOWED_USERS", "")
|
||||
print_warning(" Open DM access enabled for QQ Bot.")
|
||||
else:
|
||||
default_allow = user_openid or ""
|
||||
allowlist = prompt(" Allowed user OpenIDs (comma-separated)", default_allow, password=False).replace(" ", "")
|
||||
save_env_value("QQ_ALLOW_ALL_USERS", "false")
|
||||
save_env_value("QQ_ALLOWED_USERS", allowlist)
|
||||
print_success(" Allowlist saved.")
|
||||
|
||||
# ── Home channel ──
|
||||
if user_openid:
|
||||
print()
|
||||
if prompt_yes_no(f" Use your QQ user ID ({user_openid}) as the home channel?", True):
|
||||
save_env_value("QQBOT_HOME_CHANNEL", user_openid)
|
||||
print_success(f" Home channel set to {user_openid}")
|
||||
else:
|
||||
print()
|
||||
home_channel = prompt(" Home channel OpenID (for cron/notifications, or empty)", password=False)
|
||||
if home_channel:
|
||||
save_env_value("QQBOT_HOME_CHANNEL", home_channel.strip())
|
||||
print_success(f" Home channel set to {home_channel.strip()}")
|
||||
|
||||
print()
|
||||
print_success("🐧 QQ Bot configured!")
|
||||
print_info(f" App ID: {credentials['app_id']}")
|
||||
|
||||
|
||||
def _qqbot_render_qr(url: str) -> bool:
|
||||
"""Try to render a QR code in the terminal. Returns True if successful."""
|
||||
try:
|
||||
import qrcode as _qr
|
||||
qr = _qr.QRCode(border=1,error_correction=_qr.constants.ERROR_CORRECT_L)
|
||||
qr.add_data(url)
|
||||
qr.make(fit=True)
|
||||
qr.print_ascii(invert=True)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _qqbot_qr_flow():
|
||||
"""Run the QR-code scan-to-configure flow.
|
||||
|
||||
Returns a dict with app_id, client_secret, user_openid on success,
|
||||
or None on failure/cancel.
|
||||
"""
|
||||
try:
|
||||
from gateway.platforms.qqbot import (
|
||||
create_bind_task, poll_bind_result, build_connect_url,
|
||||
decrypt_secret, BindStatus,
|
||||
)
|
||||
from gateway.platforms.qqbot.constants import ONBOARD_POLL_INTERVAL
|
||||
except Exception as exc:
|
||||
print_error(f" QQBot onboard import failed: {exc}")
|
||||
return None
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
|
||||
MAX_REFRESHES = 3
|
||||
refresh_count = 0
|
||||
|
||||
while refresh_count <= MAX_REFRESHES:
|
||||
loop = asyncio.new_event_loop()
|
||||
|
||||
# ── Create bind task ──
|
||||
try:
|
||||
task_id, aes_key = loop.run_until_complete(create_bind_task())
|
||||
except Exception as e:
|
||||
print_warning(f" Failed to create bind task: {e}")
|
||||
loop.close()
|
||||
return None
|
||||
|
||||
url = build_connect_url(task_id)
|
||||
|
||||
# ── Display QR code + URL ──
|
||||
print()
|
||||
if _qqbot_render_qr(url):
|
||||
print(f" Scan the QR code above, or open this URL directly:\n {url}")
|
||||
else:
|
||||
print(f" Open this URL in QQ on your phone:\n {url}")
|
||||
print_info(" Tip: pip install qrcode to show a scannable QR code here")
|
||||
|
||||
# ── Poll loop (silent — keep QR visible at bottom) ──
|
||||
try:
|
||||
while True:
|
||||
try:
|
||||
status, app_id, encrypted_secret, user_openid = loop.run_until_complete(
|
||||
poll_bind_result(task_id)
|
||||
)
|
||||
except Exception:
|
||||
time.sleep(ONBOARD_POLL_INTERVAL)
|
||||
continue
|
||||
|
||||
if status == BindStatus.COMPLETED:
|
||||
client_secret = decrypt_secret(encrypted_secret, aes_key)
|
||||
print()
|
||||
print_success(f" QR scan complete! (App ID: {app_id})")
|
||||
if user_openid:
|
||||
print_info(f" Scanner's OpenID: {user_openid}")
|
||||
return {
|
||||
"app_id": app_id,
|
||||
"client_secret": client_secret,
|
||||
"user_openid": user_openid,
|
||||
}
|
||||
|
||||
if status == BindStatus.EXPIRED:
|
||||
refresh_count += 1
|
||||
if refresh_count > MAX_REFRESHES:
|
||||
print()
|
||||
print_warning(f" QR code expired {MAX_REFRESHES} times — giving up.")
|
||||
return None
|
||||
print()
|
||||
print_warning(f" QR code expired, refreshing... ({refresh_count}/{MAX_REFRESHES})")
|
||||
loop.close()
|
||||
break # outer while creates a new task
|
||||
|
||||
time.sleep(ONBOARD_POLL_INTERVAL)
|
||||
except KeyboardInterrupt:
|
||||
loop.close()
|
||||
raise
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _setup_signal():
|
||||
"""Interactive setup for Signal messenger."""
|
||||
import shutil
|
||||
|
|
@ -2806,6 +3015,8 @@ def gateway_setup():
|
|||
_setup_dingtalk()
|
||||
elif platform["key"] == "feishu":
|
||||
_setup_feishu()
|
||||
elif platform["key"] == "qqbot":
|
||||
_setup_qqbot()
|
||||
else:
|
||||
_setup_standard_platform(platform)
|
||||
|
||||
|
|
|
|||
|
|
@ -1532,6 +1532,7 @@ def select_provider_and_model(args=None):
|
|||
"huggingface",
|
||||
"xiaomi",
|
||||
"arcee",
|
||||
"nvidia",
|
||||
"ollama-cloud",
|
||||
):
|
||||
_model_flow_api_key_provider(config, selected_provider, current_model)
|
||||
|
|
@ -5875,6 +5876,7 @@ For more help on a command:
|
|||
"kilocode",
|
||||
"xiaomi",
|
||||
"arcee",
|
||||
"nvidia",
|
||||
],
|
||||
default=None,
|
||||
help="Inference provider (default: auto)",
|
||||
|
|
|
|||
|
|
@ -135,7 +135,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
|||
"gemini-2.5-flash-lite",
|
||||
# Gemma open models (also served via AI Studio)
|
||||
"gemma-4-31b-it",
|
||||
"gemma-4-26b-it",
|
||||
],
|
||||
"google-gemini-cli": [
|
||||
"gemini-2.5-pro",
|
||||
|
|
@ -155,6 +154,20 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
|||
"grok-4.20-reasoning",
|
||||
"grok-4-1-fast-reasoning",
|
||||
],
|
||||
"nvidia": [
|
||||
# NVIDIA flagship reasoning models
|
||||
"nvidia/nemotron-3-super-120b-a12b",
|
||||
"nvidia/nemotron-3-nano-30b-a3b",
|
||||
"nvidia/llama-3.3-nemotron-super-49b-v1.5",
|
||||
# Third-party agentic models hosted on build.nvidia.com
|
||||
# (map to OpenRouter defaults — users get familiar picks on NIM)
|
||||
"qwen/qwen3.5-397b-a17b",
|
||||
"deepseek-ai/deepseek-v3.2",
|
||||
"moonshotai/kimi-k2.5",
|
||||
"minimaxai/minimax-m2.5",
|
||||
"z-ai/glm5",
|
||||
"openai/gpt-oss-120b",
|
||||
],
|
||||
"kimi-coding": [
|
||||
"kimi-k2.5",
|
||||
"kimi-for-coding",
|
||||
|
|
@ -536,6 +549,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
|||
ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"),
|
||||
ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"),
|
||||
ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
|
||||
ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
|
||||
ProviderEntry("qwen-oauth", "Qwen OAuth (Portal)", "Qwen OAuth (reuses local Qwen CLI login)"),
|
||||
ProviderEntry("copilot", "GitHub Copilot", "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
|
||||
ProviderEntry("copilot-acp", "GitHub Copilot ACP", "GitHub Copilot ACP (spawns `copilot --acp --stdio`)"),
|
||||
|
|
@ -618,6 +632,10 @@ _PROVIDER_ALIASES = {
|
|||
"grok": "xai",
|
||||
"x-ai": "xai",
|
||||
"x.ai": "xai",
|
||||
"nim": "nvidia",
|
||||
"nvidia-nim": "nvidia",
|
||||
"build-nvidia": "nvidia",
|
||||
"nemotron": "nvidia",
|
||||
"ollama": "custom", # bare "ollama" = local; use "ollama-cloud" for cloud
|
||||
"ollama_cloud": "ollama-cloud",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -137,6 +137,11 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
|
|||
base_url_override="https://api.x.ai/v1",
|
||||
base_url_env_var="XAI_BASE_URL",
|
||||
),
|
||||
"nvidia": HermesOverlay(
|
||||
transport="openai_chat",
|
||||
base_url_override="https://integrate.api.nvidia.com/v1",
|
||||
base_url_env_var="NVIDIA_BASE_URL",
|
||||
),
|
||||
"xiaomi": HermesOverlay(
|
||||
transport="openai_chat",
|
||||
base_url_env_var="XIAOMI_BASE_URL",
|
||||
|
|
@ -191,6 +196,12 @@ ALIASES: Dict[str, str] = {
|
|||
"x.ai": "xai",
|
||||
"grok": "xai",
|
||||
|
||||
# nvidia
|
||||
"nim": "nvidia",
|
||||
"nvidia-nim": "nvidia",
|
||||
"build-nvidia": "nvidia",
|
||||
"nemotron": "nvidia",
|
||||
|
||||
# kimi-for-coding (models.dev ID)
|
||||
"kimi": "kimi-for-coding",
|
||||
"kimi-coding": "kimi-for-coding",
|
||||
|
|
|
|||
|
|
@ -91,7 +91,7 @@ _DEFAULT_PROVIDER_MODELS = {
|
|||
"gemini": [
|
||||
"gemini-3.1-pro-preview", "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview",
|
||||
"gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite",
|
||||
"gemma-4-31b-it", "gemma-4-26b-it",
|
||||
"gemma-4-31b-it",
|
||||
],
|
||||
"zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
|
||||
"kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
|
||||
|
|
@ -2005,52 +2005,6 @@ def _setup_wecom_callback():
|
|||
_gw_setup()
|
||||
|
||||
|
||||
def _setup_qqbot():
|
||||
"""Configure QQ Bot gateway."""
|
||||
print_header("QQ Bot")
|
||||
existing = get_env_value("QQ_APP_ID")
|
||||
if existing:
|
||||
print_info("QQ Bot: already configured")
|
||||
if not prompt_yes_no("Reconfigure QQ Bot?", False):
|
||||
return
|
||||
|
||||
print_info("Connects Hermes to QQ via the Official QQ Bot API (v2).")
|
||||
print_info(" Requires a QQ Bot application at q.qq.com")
|
||||
print_info(" Reference: https://bot.q.qq.com/wiki/develop/api-v2/")
|
||||
print()
|
||||
|
||||
app_id = prompt("QQ Bot App ID")
|
||||
if not app_id:
|
||||
print_warning("App ID is required — skipping QQ Bot setup")
|
||||
return
|
||||
save_env_value("QQ_APP_ID", app_id.strip())
|
||||
|
||||
client_secret = prompt("QQ Bot App Secret", password=True)
|
||||
if not client_secret:
|
||||
print_warning("App Secret is required — skipping QQ Bot setup")
|
||||
return
|
||||
save_env_value("QQ_CLIENT_SECRET", client_secret)
|
||||
print_success("QQ Bot credentials saved")
|
||||
|
||||
print()
|
||||
print_info("🔒 Security: Restrict who can DM your bot")
|
||||
print_info(" Use QQ user OpenIDs (found in event payloads)")
|
||||
print()
|
||||
allowed_users = prompt("Allowed user OpenIDs (comma-separated, leave empty for open access)")
|
||||
if allowed_users:
|
||||
save_env_value("QQ_ALLOWED_USERS", allowed_users.replace(" ", ""))
|
||||
print_success("QQ Bot allowlist configured")
|
||||
else:
|
||||
print_info("⚠️ No allowlist set — anyone can DM the bot!")
|
||||
|
||||
print()
|
||||
print_info("📬 Home Channel: OpenID for cron job delivery and notifications.")
|
||||
home_channel = prompt("Home channel OpenID (leave empty to set later)")
|
||||
if home_channel:
|
||||
save_env_value("QQ_HOME_CHANNEL", home_channel)
|
||||
|
||||
print()
|
||||
print_success("QQ Bot configured!")
|
||||
|
||||
|
||||
def _setup_bluebubbles():
|
||||
|
|
@ -2119,12 +2073,9 @@ def _setup_bluebubbles():
|
|||
|
||||
|
||||
def _setup_qqbot():
|
||||
"""Configure QQ Bot (Official API v2) via standard platform setup."""
|
||||
from hermes_cli.gateway import _PLATFORMS
|
||||
qq_platform = next((p for p in _PLATFORMS if p["key"] == "qqbot"), None)
|
||||
if qq_platform:
|
||||
from hermes_cli.gateway import _setup_standard_platform
|
||||
_setup_standard_platform(qq_platform)
|
||||
"""Configure QQ Bot (Official API v2) via gateway setup."""
|
||||
from hermes_cli.gateway import _setup_qqbot as _gateway_setup_qqbot
|
||||
_gateway_setup_qqbot()
|
||||
|
||||
|
||||
def _setup_webhooks():
|
||||
|
|
@ -2264,7 +2215,9 @@ def setup_gateway(config: dict):
|
|||
missing_home.append("Slack")
|
||||
if get_env_value("BLUEBUBBLES_SERVER_URL") and not get_env_value("BLUEBUBBLES_HOME_CHANNEL"):
|
||||
missing_home.append("BlueBubbles")
|
||||
if get_env_value("QQ_APP_ID") and not get_env_value("QQ_HOME_CHANNEL"):
|
||||
if get_env_value("QQ_APP_ID") and not (
|
||||
get_env_value("QQBOT_HOME_CHANNEL") or get_env_value("QQ_HOME_CHANNEL")
|
||||
):
|
||||
missing_home.append("QQBot")
|
||||
|
||||
if missing_home:
|
||||
|
|
|
|||
|
|
@ -317,7 +317,7 @@ def show_status(args):
|
|||
"WeCom Callback": ("WECOM_CALLBACK_CORP_ID", None),
|
||||
"Weixin": ("WEIXIN_ACCOUNT_ID", "WEIXIN_HOME_CHANNEL"),
|
||||
"BlueBubbles": ("BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_HOME_CHANNEL"),
|
||||
"QQBot": ("QQ_APP_ID", "QQ_HOME_CHANNEL"),
|
||||
"QQBot": ("QQ_APP_ID", "QQBOT_HOME_CHANNEL"),
|
||||
}
|
||||
|
||||
for name, (token_var, home_var) in platforms.items():
|
||||
|
|
@ -327,6 +327,9 @@ def show_status(args):
|
|||
home_channel = ""
|
||||
if home_var:
|
||||
home_channel = os.getenv(home_var, "")
|
||||
# Back-compat: QQBot home channel was renamed from QQ_HOME_CHANNEL to QQBOT_HOME_CHANNEL
|
||||
if not home_channel and home_var == "QQBOT_HOME_CHANNEL":
|
||||
home_channel = os.getenv("QQ_HOME_CHANNEL", "")
|
||||
|
||||
status = "configured" if has_token else "not configured"
|
||||
if home_channel:
|
||||
|
|
|
|||
16
run_agent.py
16
run_agent.py
|
|
@ -7208,14 +7208,22 @@ class AIAgent:
|
|||
|
||||
# Use auxiliary client for the flush call when available --
|
||||
# it's cheaper and avoids Codex Responses API incompatibility.
|
||||
from agent.auxiliary_client import call_llm as _call_llm
|
||||
from agent.auxiliary_client import (
|
||||
call_llm as _call_llm,
|
||||
_fixed_temperature_for_model,
|
||||
)
|
||||
_aux_available = True
|
||||
# Use the fixed-temperature override (e.g. kimi-for-coding → 0.6) if
|
||||
# the model has a strict contract; otherwise the historical 0.3 default.
|
||||
_flush_temperature = _fixed_temperature_for_model(self.model)
|
||||
if _flush_temperature is None:
|
||||
_flush_temperature = 0.3
|
||||
try:
|
||||
response = _call_llm(
|
||||
task="flush_memories",
|
||||
messages=api_messages,
|
||||
tools=[memory_tool_def],
|
||||
temperature=0.3,
|
||||
temperature=_flush_temperature,
|
||||
max_tokens=5120,
|
||||
# timeout resolved from auxiliary.flush_memories.timeout config
|
||||
)
|
||||
|
|
@ -7227,7 +7235,7 @@ class AIAgent:
|
|||
# No auxiliary client -- use the Codex Responses path directly
|
||||
codex_kwargs = self._build_api_kwargs(api_messages)
|
||||
codex_kwargs["tools"] = self._responses_tools([memory_tool_def])
|
||||
codex_kwargs["temperature"] = 0.3
|
||||
codex_kwargs["temperature"] = _flush_temperature
|
||||
if "max_output_tokens" in codex_kwargs:
|
||||
codex_kwargs["max_output_tokens"] = 5120
|
||||
response = self._run_codex_stream(codex_kwargs)
|
||||
|
|
@ -7246,7 +7254,7 @@ class AIAgent:
|
|||
"model": self.model,
|
||||
"messages": api_messages,
|
||||
"tools": [memory_tool_def],
|
||||
"temperature": 0.3,
|
||||
"temperature": _flush_temperature,
|
||||
**self._max_tokens_param(5120),
|
||||
}
|
||||
from agent.auxiliary_client import _get_task_timeout
|
||||
|
|
|
|||
|
|
@ -256,6 +256,8 @@ AUTHOR_MAP = {
|
|||
"anthhub@163.com": "anthhub",
|
||||
"shenuu@gmail.com": "shenuu",
|
||||
"xiayh17@gmail.com": "xiayh0107",
|
||||
"asurla@nvidia.com": "anniesurla",
|
||||
"limkuan24@gmail.com": "WideLee",
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -696,6 +696,95 @@ class TestIsConnectionError:
|
|||
assert _is_connection_error(err) is False
|
||||
|
||||
|
||||
class TestKimiForCodingTemperature:
|
||||
"""kimi-for-coding now requires temperature=0.6 exactly."""
|
||||
|
||||
def test_build_call_kwargs_forces_fixed_temperature(self):
|
||||
from agent.auxiliary_client import _build_call_kwargs
|
||||
|
||||
kwargs = _build_call_kwargs(
|
||||
provider="kimi-coding",
|
||||
model="kimi-for-coding",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
temperature=0.3,
|
||||
)
|
||||
|
||||
assert kwargs["temperature"] == 0.6
|
||||
|
||||
def test_build_call_kwargs_injects_temperature_when_missing(self):
|
||||
from agent.auxiliary_client import _build_call_kwargs
|
||||
|
||||
kwargs = _build_call_kwargs(
|
||||
provider="kimi-coding",
|
||||
model="kimi-for-coding",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
temperature=None,
|
||||
)
|
||||
|
||||
assert kwargs["temperature"] == 0.6
|
||||
|
||||
def test_auto_routed_kimi_for_coding_sync_call_uses_fixed_temperature(self):
|
||||
client = MagicMock()
|
||||
client.base_url = "https://api.kimi.com/coding/v1"
|
||||
response = MagicMock()
|
||||
client.chat.completions.create.return_value = response
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client._get_cached_client",
|
||||
return_value=(client, "kimi-for-coding"),
|
||||
), patch(
|
||||
"agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("auto", "kimi-for-coding", None, None, None),
|
||||
):
|
||||
result = call_llm(
|
||||
task="session_search",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
temperature=0.1,
|
||||
)
|
||||
|
||||
assert result is response
|
||||
kwargs = client.chat.completions.create.call_args.kwargs
|
||||
assert kwargs["model"] == "kimi-for-coding"
|
||||
assert kwargs["temperature"] == 0.6
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_auto_routed_kimi_for_coding_async_call_uses_fixed_temperature(self):
|
||||
client = MagicMock()
|
||||
client.base_url = "https://api.kimi.com/coding/v1"
|
||||
response = MagicMock()
|
||||
client.chat.completions.create = AsyncMock(return_value=response)
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client._get_cached_client",
|
||||
return_value=(client, "kimi-for-coding"),
|
||||
), patch(
|
||||
"agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("auto", "kimi-for-coding", None, None, None),
|
||||
):
|
||||
result = await async_call_llm(
|
||||
task="session_search",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
temperature=0.1,
|
||||
)
|
||||
|
||||
assert result is response
|
||||
kwargs = client.chat.completions.create.call_args.kwargs
|
||||
assert kwargs["model"] == "kimi-for-coding"
|
||||
assert kwargs["temperature"] == 0.6
|
||||
|
||||
def test_non_kimi_model_still_preserves_temperature(self):
|
||||
from agent.auxiliary_client import _build_call_kwargs
|
||||
|
||||
kwargs = _build_call_kwargs(
|
||||
provider="kimi-coding",
|
||||
model="kimi-k2.5",
|
||||
messages=[{"role": "user", "content": "hello"}],
|
||||
temperature=0.3,
|
||||
)
|
||||
|
||||
assert kwargs["temperature"] == 0.3
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# async_call_llm payment / connection fallback (#7512 bug 2)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -826,6 +826,160 @@ class TestGeminiCloudCodeClient:
|
|||
finally:
|
||||
client.close()
|
||||
|
||||
|
||||
class TestGeminiHttpErrorParsing:
|
||||
"""Regression coverage for _gemini_http_error Google-envelope parsing.
|
||||
|
||||
These are the paths that users actually hit during Google-side throttling
|
||||
(April 2026: gemini-2.5-pro MODEL_CAPACITY_EXHAUSTED, gemma-4-26b-it
|
||||
returning 404). The error needs to carry status_code + response so the
|
||||
main loop's error_classifier and Retry-After logic work.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _fake_response(status: int, body: dict | str = "", headers=None):
|
||||
"""Minimal httpx.Response stand-in (duck-typed for _gemini_http_error)."""
|
||||
class _FakeResponse:
|
||||
def __init__(self):
|
||||
self.status_code = status
|
||||
if isinstance(body, dict):
|
||||
self.text = json.dumps(body)
|
||||
else:
|
||||
self.text = body
|
||||
self.headers = headers or {}
|
||||
return _FakeResponse()
|
||||
|
||||
def test_model_capacity_exhausted_produces_friendly_message(self):
|
||||
from agent.gemini_cloudcode_adapter import _gemini_http_error
|
||||
|
||||
body = {
|
||||
"error": {
|
||||
"code": 429,
|
||||
"message": "Resource has been exhausted (e.g. check quota).",
|
||||
"status": "RESOURCE_EXHAUSTED",
|
||||
"details": [
|
||||
{
|
||||
"@type": "type.googleapis.com/google.rpc.ErrorInfo",
|
||||
"reason": "MODEL_CAPACITY_EXHAUSTED",
|
||||
"domain": "googleapis.com",
|
||||
"metadata": {"model": "gemini-2.5-pro"},
|
||||
},
|
||||
{
|
||||
"@type": "type.googleapis.com/google.rpc.RetryInfo",
|
||||
"retryDelay": "30s",
|
||||
},
|
||||
],
|
||||
}
|
||||
}
|
||||
err = _gemini_http_error(self._fake_response(429, body))
|
||||
assert err.status_code == 429
|
||||
assert err.code == "code_assist_capacity_exhausted"
|
||||
assert err.retry_after == 30.0
|
||||
assert err.details["reason"] == "MODEL_CAPACITY_EXHAUSTED"
|
||||
# Message must be user-friendly, not a raw JSON dump.
|
||||
message = str(err)
|
||||
assert "gemini-2.5-pro" in message
|
||||
assert "capacity exhausted" in message.lower()
|
||||
assert "30s" in message
|
||||
# response attr is preserved for run_agent's Retry-After header path.
|
||||
assert err.response is not None
|
||||
|
||||
def test_resource_exhausted_without_reason(self):
|
||||
from agent.gemini_cloudcode_adapter import _gemini_http_error
|
||||
|
||||
body = {
|
||||
"error": {
|
||||
"code": 429,
|
||||
"message": "Quota exceeded for requests per minute.",
|
||||
"status": "RESOURCE_EXHAUSTED",
|
||||
}
|
||||
}
|
||||
err = _gemini_http_error(self._fake_response(429, body))
|
||||
assert err.status_code == 429
|
||||
assert err.code == "code_assist_rate_limited"
|
||||
message = str(err)
|
||||
assert "quota" in message.lower()
|
||||
|
||||
def test_404_model_not_found_produces_model_retired_message(self):
|
||||
from agent.gemini_cloudcode_adapter import _gemini_http_error
|
||||
|
||||
body = {
|
||||
"error": {
|
||||
"code": 404,
|
||||
"message": "models/gemma-4-26b-it is not found for API version v1internal",
|
||||
"status": "NOT_FOUND",
|
||||
}
|
||||
}
|
||||
err = _gemini_http_error(self._fake_response(404, body))
|
||||
assert err.status_code == 404
|
||||
message = str(err)
|
||||
assert "not available" in message.lower() or "retired" in message.lower()
|
||||
# Error message should reference the actual model text from Google.
|
||||
assert "gemma-4-26b-it" in message
|
||||
|
||||
def test_unauthorized_preserves_status_code(self):
|
||||
from agent.gemini_cloudcode_adapter import _gemini_http_error
|
||||
|
||||
err = _gemini_http_error(self._fake_response(
|
||||
401, {"error": {"code": 401, "message": "Invalid token", "status": "UNAUTHENTICATED"}},
|
||||
))
|
||||
assert err.status_code == 401
|
||||
assert err.code == "code_assist_unauthorized"
|
||||
|
||||
def test_retry_after_header_fallback(self):
|
||||
"""If the body has no RetryInfo detail, fall back to Retry-After header."""
|
||||
from agent.gemini_cloudcode_adapter import _gemini_http_error
|
||||
|
||||
resp = self._fake_response(
|
||||
429,
|
||||
{"error": {"code": 429, "message": "Rate limited", "status": "RESOURCE_EXHAUSTED"}},
|
||||
headers={"Retry-After": "45"},
|
||||
)
|
||||
err = _gemini_http_error(resp)
|
||||
assert err.retry_after == 45.0
|
||||
|
||||
def test_malformed_body_still_produces_structured_error(self):
|
||||
"""Non-JSON body must not swallow status_code — we still want the classifier path."""
|
||||
from agent.gemini_cloudcode_adapter import _gemini_http_error
|
||||
|
||||
err = _gemini_http_error(self._fake_response(500, "<html>internal error</html>"))
|
||||
assert err.status_code == 500
|
||||
# Raw body snippet must still be there for debugging.
|
||||
assert "500" in str(err)
|
||||
|
||||
def test_status_code_flows_through_error_classifier(self):
|
||||
"""End-to-end: CodeAssistError from a 429 must classify as rate_limit.
|
||||
|
||||
This is the whole point of adding status_code to CodeAssistError —
|
||||
_extract_status_code must see it and FailoverReason.rate_limit must
|
||||
fire, so the main loop triggers fallback_providers.
|
||||
"""
|
||||
from agent.gemini_cloudcode_adapter import _gemini_http_error
|
||||
from agent.error_classifier import classify_api_error, FailoverReason
|
||||
|
||||
body = {
|
||||
"error": {
|
||||
"code": 429,
|
||||
"message": "Resource has been exhausted",
|
||||
"status": "RESOURCE_EXHAUSTED",
|
||||
"details": [
|
||||
{
|
||||
"@type": "type.googleapis.com/google.rpc.ErrorInfo",
|
||||
"reason": "MODEL_CAPACITY_EXHAUSTED",
|
||||
"metadata": {"model": "gemini-2.5-pro"},
|
||||
}
|
||||
],
|
||||
}
|
||||
}
|
||||
err = _gemini_http_error(self._fake_response(429, body))
|
||||
|
||||
classified = classify_api_error(
|
||||
err, provider="google-gemini-cli", model="gemini-2.5-pro",
|
||||
)
|
||||
assert classified.status_code == 429
|
||||
assert classified.reason == FailoverReason.rate_limit
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Provider registration
|
||||
# =============================================================================
|
||||
|
|
|
|||
|
|
@ -229,6 +229,15 @@ def _hermetic_environment(tmp_path, monkeypatch):
|
|||
monkeypatch.setenv("LC_ALL", "C.UTF-8")
|
||||
monkeypatch.setenv("PYTHONHASHSEED", "0")
|
||||
|
||||
# 4b. Disable AWS IMDS lookups. Without this, any test that ends up
|
||||
# calling has_aws_credentials() / resolve_aws_auth_env_var()
|
||||
# (e.g. provider auto-detect, status command, cron run_job) burns
|
||||
# ~2s waiting for the metadata service at 169.254.169.254 to time
|
||||
# out. Tests don't run on EC2 — IMDS is always unreachable here.
|
||||
monkeypatch.setenv("AWS_EC2_METADATA_DISABLED", "true")
|
||||
monkeypatch.setenv("AWS_METADATA_SERVICE_TIMEOUT", "1")
|
||||
monkeypatch.setenv("AWS_METADATA_SERVICE_NUM_ATTEMPTS", "1")
|
||||
|
||||
# 5. Reset plugin singleton so tests don't leak plugins from
|
||||
# ~/.hermes/plugins/ (which, per step 3, is now empty — but the
|
||||
# singleton might still be cached from a previous test).
|
||||
|
|
|
|||
|
|
@ -179,7 +179,7 @@ class TestVoiceAttachmentSSRFProtection:
|
|||
from gateway.platforms.qqbot import QQAdapter, _ssrf_redirect_guard
|
||||
|
||||
client = mock.AsyncMock()
|
||||
with mock.patch("gateway.platforms.qqbot.httpx.AsyncClient", return_value=client) as async_client_cls:
|
||||
with mock.patch("gateway.platforms.qqbot.adapter.httpx.AsyncClient", return_value=client) as async_client_cls:
|
||||
adapter = QQAdapter(_make_config(app_id="a", client_secret="b"))
|
||||
adapter._ensure_token = mock.AsyncMock(side_effect=RuntimeError("stop after client creation"))
|
||||
|
||||
|
|
|
|||
231
tests/gateway/test_session_state_cleanup.py
Normal file
231
tests/gateway/test_session_state_cleanup.py
Normal file
|
|
@ -0,0 +1,231 @@
|
|||
"""Regression tests for _release_running_agent_state and SessionDB shutdown.
|
||||
|
||||
Before this change, running-agent state lived in three dicts that drifted
|
||||
out of sync:
|
||||
|
||||
self._running_agents — AIAgent instance per session key
|
||||
self._running_agents_ts — start timestamp per session key
|
||||
self._busy_ack_ts — last busy-ack timestamp per session key
|
||||
|
||||
Six cleanup sites did ``del self._running_agents[key]`` without touching
|
||||
the other two; one site only popped ``_running_agents`` and
|
||||
``_running_agents_ts``; and only the stale-eviction site cleaned all
|
||||
three. Each missed entry was a small persistent leak.
|
||||
|
||||
Also: SessionDB connections were never closed on gateway shutdown,
|
||||
leaving WAL locks in place until Python actually exited.
|
||||
"""
|
||||
|
||||
import threading
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def _make_runner():
|
||||
"""Bare GatewayRunner wired with just the state the helper touches."""
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runner = GatewayRunner.__new__(GatewayRunner)
|
||||
runner._running_agents = {}
|
||||
runner._running_agents_ts = {}
|
||||
runner._busy_ack_ts = {}
|
||||
return runner
|
||||
|
||||
|
||||
class TestReleaseRunningAgentStateUnit:
|
||||
def test_pops_all_three_dicts(self):
|
||||
runner = _make_runner()
|
||||
runner._running_agents["k"] = MagicMock()
|
||||
runner._running_agents_ts["k"] = 123.0
|
||||
runner._busy_ack_ts["k"] = 456.0
|
||||
|
||||
runner._release_running_agent_state("k")
|
||||
|
||||
assert "k" not in runner._running_agents
|
||||
assert "k" not in runner._running_agents_ts
|
||||
assert "k" not in runner._busy_ack_ts
|
||||
|
||||
def test_idempotent_on_missing_key(self):
|
||||
"""Calling twice (or on an absent key) must not raise."""
|
||||
runner = _make_runner()
|
||||
runner._release_running_agent_state("missing")
|
||||
runner._release_running_agent_state("missing") # still fine
|
||||
|
||||
def test_noop_on_empty_session_key(self):
|
||||
"""Empty string / None key is treated as a no-op."""
|
||||
runner = _make_runner()
|
||||
runner._running_agents[""] = "guard"
|
||||
runner._release_running_agent_state("")
|
||||
# Empty key not processed — guard value survives.
|
||||
assert runner._running_agents[""] == "guard"
|
||||
|
||||
def test_preserves_other_sessions(self):
|
||||
runner = _make_runner()
|
||||
for k in ("a", "b", "c"):
|
||||
runner._running_agents[k] = MagicMock()
|
||||
runner._running_agents_ts[k] = 1.0
|
||||
runner._busy_ack_ts[k] = 1.0
|
||||
|
||||
runner._release_running_agent_state("b")
|
||||
|
||||
assert set(runner._running_agents.keys()) == {"a", "c"}
|
||||
assert set(runner._running_agents_ts.keys()) == {"a", "c"}
|
||||
assert set(runner._busy_ack_ts.keys()) == {"a", "c"}
|
||||
|
||||
def test_handles_missing_busy_ack_attribute(self):
|
||||
"""Backward-compatible with older runners lacking _busy_ack_ts."""
|
||||
runner = _make_runner()
|
||||
del runner._busy_ack_ts # simulate older version
|
||||
runner._running_agents["k"] = MagicMock()
|
||||
runner._running_agents_ts["k"] = 1.0
|
||||
|
||||
runner._release_running_agent_state("k") # should not raise
|
||||
|
||||
assert "k" not in runner._running_agents
|
||||
assert "k" not in runner._running_agents_ts
|
||||
|
||||
def test_concurrent_release_is_safe(self):
|
||||
"""Multiple threads releasing different keys concurrently."""
|
||||
runner = _make_runner()
|
||||
for i in range(50):
|
||||
k = f"s{i}"
|
||||
runner._running_agents[k] = MagicMock()
|
||||
runner._running_agents_ts[k] = float(i)
|
||||
runner._busy_ack_ts[k] = float(i)
|
||||
|
||||
def worker(keys):
|
||||
for k in keys:
|
||||
runner._release_running_agent_state(k)
|
||||
|
||||
threads = [
|
||||
threading.Thread(target=worker, args=([f"s{i}" for i in range(start, 50, 5)],))
|
||||
for start in range(5)
|
||||
]
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join(timeout=5)
|
||||
assert not t.is_alive()
|
||||
|
||||
assert runner._running_agents == {}
|
||||
assert runner._running_agents_ts == {}
|
||||
assert runner._busy_ack_ts == {}
|
||||
|
||||
|
||||
class TestNoMoreBareDeleteSites:
|
||||
"""Regression: all bare `del self._running_agents[key]` sites were
|
||||
converted to use the helper. If a future contributor reverts one,
|
||||
this test flags it. Docstrings / comments mentioning the old
|
||||
pattern are allowed.
|
||||
"""
|
||||
|
||||
def test_no_bare_del_of_running_agents_in_gateway_run(self):
|
||||
from pathlib import Path
|
||||
import re
|
||||
|
||||
gateway_run = (Path(__file__).parent.parent.parent / "gateway" / "run.py").read_text()
|
||||
# Match `del self._running_agents[...]` that is NOT inside a
|
||||
# triple-quoted docstring. We scan non-docstring lines only.
|
||||
lines = gateway_run.splitlines()
|
||||
|
||||
in_docstring = False
|
||||
docstring_delim = None
|
||||
offenders = []
|
||||
for idx, line in enumerate(lines, start=1):
|
||||
stripped = line.strip()
|
||||
if not in_docstring:
|
||||
if stripped.startswith('"""') or stripped.startswith("'''"):
|
||||
delim = stripped[:3]
|
||||
# single-line docstring?
|
||||
if stripped.count(delim) >= 2:
|
||||
continue
|
||||
in_docstring = True
|
||||
docstring_delim = delim
|
||||
continue
|
||||
if re.search(r"\bdel\s+self\._running_agents\[", line):
|
||||
offenders.append((idx, line.rstrip()))
|
||||
else:
|
||||
if docstring_delim and docstring_delim in stripped:
|
||||
in_docstring = False
|
||||
docstring_delim = None
|
||||
|
||||
assert offenders == [], (
|
||||
"Found bare `del self._running_agents[...]` sites in gateway/run.py. "
|
||||
"Use self._release_running_agent_state(session_key) instead so "
|
||||
"_running_agents_ts and _busy_ack_ts are popped in lockstep.\n"
|
||||
+ "\n".join(f" line {n}: {l}" for n, l in offenders)
|
||||
)
|
||||
|
||||
|
||||
class TestSessionDbCloseOnShutdown:
|
||||
"""_stop_impl should call .close() on both self._session_db and
|
||||
self.session_store._db to release SQLite WAL locks before the new
|
||||
gateway (during --replace restart) tries to open the same file.
|
||||
"""
|
||||
|
||||
def test_stop_impl_closes_both_session_dbs(self):
|
||||
"""Run the exact shutdown block that closes SessionDBs and verify
|
||||
.close() was called on both holders."""
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runner = GatewayRunner.__new__(GatewayRunner)
|
||||
|
||||
runner_db = MagicMock()
|
||||
store_db = MagicMock()
|
||||
|
||||
runner._db = runner_db
|
||||
runner.session_store = MagicMock()
|
||||
runner.session_store._db = store_db
|
||||
|
||||
# Replicate the exact production loop from _stop_impl.
|
||||
for _db_holder in (runner, getattr(runner, "session_store", None)):
|
||||
_db = getattr(_db_holder, "_db", None) if _db_holder else None
|
||||
if _db is None or not hasattr(_db, "close"):
|
||||
continue
|
||||
_db.close()
|
||||
|
||||
runner_db.close.assert_called_once()
|
||||
store_db.close.assert_called_once()
|
||||
|
||||
def test_shutdown_tolerates_missing_session_store(self):
|
||||
"""Gateway without a session_store attribute must not crash on shutdown."""
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runner = GatewayRunner.__new__(GatewayRunner)
|
||||
runner._db = MagicMock()
|
||||
# Deliberately no session_store attribute.
|
||||
|
||||
for _db_holder in (runner, getattr(runner, "session_store", None)):
|
||||
_db = getattr(_db_holder, "_db", None) if _db_holder else None
|
||||
if _db is None or not hasattr(_db, "close"):
|
||||
continue
|
||||
_db.close()
|
||||
|
||||
runner._db.close.assert_called_once()
|
||||
|
||||
def test_shutdown_tolerates_close_raising(self):
|
||||
"""A close() that raises must not prevent subsequent cleanup."""
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runner = GatewayRunner.__new__(GatewayRunner)
|
||||
flaky_db = MagicMock()
|
||||
flaky_db.close.side_effect = RuntimeError("simulated lock error")
|
||||
healthy_db = MagicMock()
|
||||
|
||||
runner._db = flaky_db
|
||||
runner.session_store = MagicMock()
|
||||
runner.session_store._db = healthy_db
|
||||
|
||||
# Same pattern as production: try/except around each close().
|
||||
for _db_holder in (runner, getattr(runner, "session_store", None)):
|
||||
_db = getattr(_db_holder, "_db", None) if _db_holder else None
|
||||
if _db is None or not hasattr(_db, "close"):
|
||||
continue
|
||||
try:
|
||||
_db.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
flaky_db.close.assert_called_once()
|
||||
healthy_db.close.assert_called_once()
|
||||
270
tests/gateway/test_session_store_prune.py
Normal file
270
tests/gateway/test_session_store_prune.py
Normal file
|
|
@ -0,0 +1,270 @@
|
|||
"""Tests for SessionStore.prune_old_entries and the gateway watcher that calls it.
|
||||
|
||||
The SessionStore in-memory dict (and its backing sessions.json) grew
|
||||
unbounded — every unique (platform, chat_id, thread_id, user_id) tuple
|
||||
ever seen was kept forever, regardless of how stale it became. These
|
||||
tests pin the prune behaviour:
|
||||
|
||||
* Entries older than max_age_days (by updated_at) are removed
|
||||
* Entries marked ``suspended`` are preserved (user-paused)
|
||||
* Entries with an active process attached are preserved
|
||||
* max_age_days <= 0 disables pruning entirely
|
||||
* sessions.json is rewritten with the post-prune dict
|
||||
* The ``updated_at`` field — not ``created_at`` — drives the decision
|
||||
(so a long-running-but-still-active session isn't pruned)
|
||||
"""
|
||||
|
||||
import json
|
||||
import threading
|
||||
from datetime import datetime, timedelta
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from gateway.config import GatewayConfig, Platform, SessionResetPolicy
|
||||
from gateway.session import SessionEntry, SessionStore
|
||||
|
||||
|
||||
def _make_store(tmp_path, max_age_days: int = 90, has_active_processes_fn=None):
|
||||
"""Build a SessionStore bypassing SQLite/disk-load side effects."""
|
||||
config = GatewayConfig(
|
||||
default_reset_policy=SessionResetPolicy(mode="none"),
|
||||
session_store_max_age_days=max_age_days,
|
||||
)
|
||||
with patch("gateway.session.SessionStore._ensure_loaded"):
|
||||
store = SessionStore(
|
||||
sessions_dir=tmp_path,
|
||||
config=config,
|
||||
has_active_processes_fn=has_active_processes_fn,
|
||||
)
|
||||
store._db = None
|
||||
store._loaded = True
|
||||
return store
|
||||
|
||||
|
||||
def _entry(key: str, age_days: float, *, suspended: bool = False,
|
||||
session_id: str | None = None) -> SessionEntry:
|
||||
now = datetime.now()
|
||||
return SessionEntry(
|
||||
session_key=key,
|
||||
session_id=session_id or f"sid_{key}",
|
||||
created_at=now - timedelta(days=age_days + 30), # arbitrary older
|
||||
updated_at=now - timedelta(days=age_days),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
suspended=suspended,
|
||||
)
|
||||
|
||||
|
||||
class TestPruneBasics:
|
||||
def test_prune_removes_entries_past_max_age(self, tmp_path):
|
||||
store = _make_store(tmp_path)
|
||||
store._entries["old"] = _entry("old", age_days=100)
|
||||
store._entries["fresh"] = _entry("fresh", age_days=5)
|
||||
|
||||
removed = store.prune_old_entries(max_age_days=90)
|
||||
|
||||
assert removed == 1
|
||||
assert "old" not in store._entries
|
||||
assert "fresh" in store._entries
|
||||
|
||||
def test_prune_uses_updated_at_not_created_at(self, tmp_path):
|
||||
"""A session created long ago but updated recently must be kept."""
|
||||
store = _make_store(tmp_path)
|
||||
now = datetime.now()
|
||||
entry = SessionEntry(
|
||||
session_key="long-lived",
|
||||
session_id="sid",
|
||||
created_at=now - timedelta(days=365), # ancient
|
||||
updated_at=now - timedelta(days=3), # but just chatted
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
store._entries["long-lived"] = entry
|
||||
|
||||
removed = store.prune_old_entries(max_age_days=30)
|
||||
|
||||
assert removed == 0
|
||||
assert "long-lived" in store._entries
|
||||
|
||||
def test_prune_disabled_when_max_age_is_zero(self, tmp_path):
|
||||
store = _make_store(tmp_path, max_age_days=0)
|
||||
for i in range(5):
|
||||
store._entries[f"s{i}"] = _entry(f"s{i}", age_days=365)
|
||||
|
||||
assert store.prune_old_entries(0) == 0
|
||||
assert len(store._entries) == 5
|
||||
|
||||
def test_prune_disabled_when_max_age_is_negative(self, tmp_path):
|
||||
store = _make_store(tmp_path)
|
||||
store._entries["s"] = _entry("s", age_days=365)
|
||||
|
||||
assert store.prune_old_entries(-1) == 0
|
||||
assert "s" in store._entries
|
||||
|
||||
def test_prune_skips_suspended_entries(self, tmp_path):
|
||||
"""/stop-suspended sessions must be kept for later resume."""
|
||||
store = _make_store(tmp_path)
|
||||
store._entries["suspended"] = _entry(
|
||||
"suspended", age_days=1000, suspended=True
|
||||
)
|
||||
store._entries["idle"] = _entry("idle", age_days=1000)
|
||||
|
||||
removed = store.prune_old_entries(max_age_days=90)
|
||||
|
||||
assert removed == 1
|
||||
assert "suspended" in store._entries
|
||||
assert "idle" not in store._entries
|
||||
|
||||
def test_prune_skips_entries_with_active_processes(self, tmp_path):
|
||||
"""Sessions with active bg processes aren't pruned even if old."""
|
||||
active_session_ids = {"sid_active"}
|
||||
|
||||
def _has_active(session_id: str) -> bool:
|
||||
return session_id in active_session_ids
|
||||
|
||||
store = _make_store(tmp_path, has_active_processes_fn=_has_active)
|
||||
store._entries["active"] = _entry(
|
||||
"active", age_days=1000, session_id="sid_active"
|
||||
)
|
||||
store._entries["idle"] = _entry(
|
||||
"idle", age_days=1000, session_id="sid_idle"
|
||||
)
|
||||
|
||||
removed = store.prune_old_entries(max_age_days=90)
|
||||
|
||||
assert removed == 1
|
||||
assert "active" in store._entries
|
||||
assert "idle" not in store._entries
|
||||
|
||||
def test_prune_does_not_write_disk_when_no_removals(self, tmp_path):
|
||||
"""If nothing is evictable, _save() should NOT be called."""
|
||||
store = _make_store(tmp_path)
|
||||
store._entries["fresh1"] = _entry("fresh1", age_days=1)
|
||||
store._entries["fresh2"] = _entry("fresh2", age_days=2)
|
||||
|
||||
save_calls = []
|
||||
store._save = lambda: save_calls.append(1)
|
||||
|
||||
assert store.prune_old_entries(max_age_days=90) == 0
|
||||
assert save_calls == []
|
||||
|
||||
def test_prune_writes_disk_after_removal(self, tmp_path):
|
||||
store = _make_store(tmp_path)
|
||||
store._entries["stale"] = _entry("stale", age_days=500)
|
||||
store._entries["fresh"] = _entry("fresh", age_days=1)
|
||||
|
||||
save_calls = []
|
||||
store._save = lambda: save_calls.append(1)
|
||||
|
||||
store.prune_old_entries(max_age_days=90)
|
||||
assert save_calls == [1]
|
||||
|
||||
def test_prune_is_thread_safe(self, tmp_path):
|
||||
"""Prune acquires _lock internally; concurrent update_session is safe."""
|
||||
store = _make_store(tmp_path)
|
||||
for i in range(20):
|
||||
age = 1000 if i % 2 == 0 else 1
|
||||
store._entries[f"s{i}"] = _entry(f"s{i}", age_days=age)
|
||||
|
||||
results = []
|
||||
|
||||
def _pruner():
|
||||
results.append(store.prune_old_entries(max_age_days=90))
|
||||
|
||||
def _reader():
|
||||
# Mimic a concurrent update_session reader iterating under lock.
|
||||
with store._lock:
|
||||
list(store._entries.keys())
|
||||
|
||||
threads = [threading.Thread(target=_pruner)]
|
||||
threads += [threading.Thread(target=_reader) for _ in range(4)]
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join(timeout=5)
|
||||
assert not t.is_alive()
|
||||
|
||||
# Exactly one pruner ran; removed exactly the 10 stale entries.
|
||||
assert results == [10]
|
||||
assert len(store._entries) == 10
|
||||
for i in range(20):
|
||||
if i % 2 == 1: # fresh
|
||||
assert f"s{i}" in store._entries
|
||||
|
||||
|
||||
class TestPrunePersistsToDisk:
|
||||
def test_prune_rewrites_sessions_json(self, tmp_path):
|
||||
"""After prune, sessions.json on disk reflects the new dict."""
|
||||
config = GatewayConfig(
|
||||
default_reset_policy=SessionResetPolicy(mode="none"),
|
||||
session_store_max_age_days=90,
|
||||
)
|
||||
store = SessionStore(sessions_dir=tmp_path, config=config)
|
||||
store._db = None
|
||||
# Force-populate without calling get_or_create to avoid DB side-effects
|
||||
store._entries["stale"] = _entry("stale", age_days=500)
|
||||
store._entries["fresh"] = _entry("fresh", age_days=1)
|
||||
store._loaded = True
|
||||
store._save()
|
||||
|
||||
# Verify pre-prune state on disk.
|
||||
saved_pre = json.loads((tmp_path / "sessions.json").read_text())
|
||||
assert set(saved_pre.keys()) == {"stale", "fresh"}
|
||||
|
||||
# Prune and check disk.
|
||||
store.prune_old_entries(max_age_days=90)
|
||||
saved_post = json.loads((tmp_path / "sessions.json").read_text())
|
||||
assert set(saved_post.keys()) == {"fresh"}
|
||||
|
||||
|
||||
class TestGatewayConfigSerialization:
|
||||
def test_session_store_max_age_days_defaults_to_90(self):
|
||||
cfg = GatewayConfig()
|
||||
assert cfg.session_store_max_age_days == 90
|
||||
|
||||
def test_session_store_max_age_days_roundtrips(self):
|
||||
cfg = GatewayConfig(session_store_max_age_days=30)
|
||||
restored = GatewayConfig.from_dict(cfg.to_dict())
|
||||
assert restored.session_store_max_age_days == 30
|
||||
|
||||
def test_session_store_max_age_days_missing_defaults_90(self):
|
||||
"""Loading an old config (pre-this-field) falls back to default."""
|
||||
restored = GatewayConfig.from_dict({})
|
||||
assert restored.session_store_max_age_days == 90
|
||||
|
||||
def test_session_store_max_age_days_negative_coerced_to_zero(self):
|
||||
"""A negative value (accidental or hostile) becomes 0 (disabled)."""
|
||||
restored = GatewayConfig.from_dict({"session_store_max_age_days": -5})
|
||||
assert restored.session_store_max_age_days == 0
|
||||
|
||||
def test_session_store_max_age_days_bad_type_falls_back(self):
|
||||
"""Non-int values fall back to the default, not a crash."""
|
||||
restored = GatewayConfig.from_dict({"session_store_max_age_days": "nope"})
|
||||
assert restored.session_store_max_age_days == 90
|
||||
|
||||
|
||||
class TestGatewayWatcherCallsPrune:
|
||||
"""The session_expiry_watcher should call prune_old_entries once per hour."""
|
||||
|
||||
def test_prune_gate_fires_on_first_tick(self):
|
||||
"""First watcher tick has _last_prune_ts=0, so the gate opens."""
|
||||
import time as _t
|
||||
|
||||
last_ts = 0.0
|
||||
prune_interval = 3600.0
|
||||
now = _t.time()
|
||||
|
||||
# Mirror the production gate check in _session_expiry_watcher.
|
||||
should_prune = (now - last_ts) > prune_interval
|
||||
assert should_prune is True
|
||||
|
||||
def test_prune_gate_suppresses_within_interval(self):
|
||||
import time as _t
|
||||
|
||||
last_ts = _t.time() - 600 # 10 minutes ago
|
||||
prune_interval = 3600.0
|
||||
now = _t.time()
|
||||
|
||||
should_prune = (now - last_ts) > prune_interval
|
||||
assert should_prune is False
|
||||
|
|
@ -34,7 +34,12 @@ def _ensure_telegram_mock():
|
|||
|
||||
_ensure_telegram_mock()
|
||||
|
||||
from gateway.platforms.telegram import TelegramAdapter, _escape_mdv2, _strip_mdv2 # noqa: E402
|
||||
from gateway.platforms.telegram import ( # noqa: E402
|
||||
TelegramAdapter,
|
||||
_escape_mdv2,
|
||||
_strip_mdv2,
|
||||
_wrap_markdown_tables,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -535,6 +540,152 @@ class TestStripMdv2:
|
|||
assert _strip_mdv2("||hidden text||") == "hidden text"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Markdown table auto-wrap
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestWrapMarkdownTables:
|
||||
"""_wrap_markdown_tables wraps GFM pipe tables in ``` fences so
|
||||
Telegram renders them as monospace preformatted text instead of the
|
||||
noisy backslash-pipe mess MarkdownV2 produces."""
|
||||
|
||||
def test_basic_table_wrapped(self):
|
||||
text = (
|
||||
"Scores:\n\n"
|
||||
"| Player | Score |\n"
|
||||
"|--------|-------|\n"
|
||||
"| Alice | 150 |\n"
|
||||
"| Bob | 120 |\n"
|
||||
"\nEnd."
|
||||
)
|
||||
out = _wrap_markdown_tables(text)
|
||||
# Table is now wrapped in a fence
|
||||
assert "```\n| Player | Score |" in out
|
||||
assert "| Bob | 120 |\n```" in out
|
||||
# Surrounding prose is preserved
|
||||
assert out.startswith("Scores:")
|
||||
assert out.endswith("End.")
|
||||
|
||||
def test_bare_pipe_table_wrapped(self):
|
||||
"""Tables without outer pipes (GFM allows this) are still detected."""
|
||||
text = "head1 | head2\n--- | ---\na | b\nc | d"
|
||||
out = _wrap_markdown_tables(text)
|
||||
assert out.startswith("```\n")
|
||||
assert out.rstrip().endswith("```")
|
||||
assert "head1 | head2" in out
|
||||
|
||||
def test_alignment_separators(self):
|
||||
"""Separator rows with :--- / ---: / :---: alignment markers match."""
|
||||
text = (
|
||||
"| Name | Age | City |\n"
|
||||
"|:-----|----:|:----:|\n"
|
||||
"| Ada | 30 | NYC |"
|
||||
)
|
||||
out = _wrap_markdown_tables(text)
|
||||
assert out.count("```") == 2
|
||||
|
||||
def test_two_consecutive_tables_wrapped_separately(self):
|
||||
text = (
|
||||
"| A | B |\n"
|
||||
"|---|---|\n"
|
||||
"| 1 | 2 |\n"
|
||||
"\n"
|
||||
"| X | Y |\n"
|
||||
"|---|---|\n"
|
||||
"| 9 | 8 |"
|
||||
)
|
||||
out = _wrap_markdown_tables(text)
|
||||
# Four fences total — one opening + closing per table
|
||||
assert out.count("```") == 4
|
||||
|
||||
def test_plain_text_with_pipes_not_wrapped(self):
|
||||
"""A bare pipe in prose must NOT trigger wrapping."""
|
||||
text = "Use the | pipe operator to chain commands."
|
||||
assert _wrap_markdown_tables(text) == text
|
||||
|
||||
def test_horizontal_rule_not_wrapped(self):
|
||||
"""A lone '---' horizontal rule must not be mistaken for a separator."""
|
||||
text = "Section A\n\n---\n\nSection B"
|
||||
assert _wrap_markdown_tables(text) == text
|
||||
|
||||
def test_existing_code_block_with_pipes_left_alone(self):
|
||||
"""A table already inside a fenced code block must not be re-wrapped."""
|
||||
text = (
|
||||
"```\n"
|
||||
"| a | b |\n"
|
||||
"|---|---|\n"
|
||||
"| 1 | 2 |\n"
|
||||
"```"
|
||||
)
|
||||
assert _wrap_markdown_tables(text) == text
|
||||
|
||||
def test_no_pipe_character_short_circuits(self):
|
||||
text = "Plain **bold** text with no table."
|
||||
assert _wrap_markdown_tables(text) == text
|
||||
|
||||
def test_no_dash_short_circuits(self):
|
||||
text = "a | b\nc | d" # has pipes but no '-' separator row
|
||||
assert _wrap_markdown_tables(text) == text
|
||||
|
||||
def test_single_column_separator_not_matched(self):
|
||||
"""Single-column tables (rare) are not detected — we require at
|
||||
least one internal pipe in the separator row to avoid false
|
||||
positives on formatting rules."""
|
||||
text = "| a |\n| - |\n| b |"
|
||||
assert _wrap_markdown_tables(text) == text
|
||||
|
||||
|
||||
class TestFormatMessageTables:
|
||||
"""End-to-end: a pipe table passes through format_message with its
|
||||
pipes and dashes left alone inside the fence, not mangled by MarkdownV2
|
||||
escaping."""
|
||||
|
||||
def test_table_rendered_as_code_block(self, adapter):
|
||||
text = (
|
||||
"Data:\n\n"
|
||||
"| Col1 | Col2 |\n"
|
||||
"|------|------|\n"
|
||||
"| A | B |\n"
|
||||
)
|
||||
out = adapter.format_message(text)
|
||||
# Pipes inside the fenced block are NOT escaped
|
||||
assert "```\n| Col1 | Col2 |" in out
|
||||
assert "\\|" not in out.split("```")[1]
|
||||
# Dashes in separator not escaped inside fence
|
||||
assert "\\-" not in out.split("```")[1]
|
||||
|
||||
def test_text_after_table_still_formatted(self, adapter):
|
||||
text = (
|
||||
"| A | B |\n"
|
||||
"|---|---|\n"
|
||||
"| 1 | 2 |\n"
|
||||
"\n"
|
||||
"Nice **work** team!"
|
||||
)
|
||||
out = adapter.format_message(text)
|
||||
# MarkdownV2 bold conversion still happens outside the table
|
||||
assert "*work*" in out
|
||||
# Exclamation outside fence is escaped
|
||||
assert "\\!" in out
|
||||
|
||||
def test_multiple_tables_in_single_message(self, adapter):
|
||||
text = (
|
||||
"First:\n"
|
||||
"| A | B |\n"
|
||||
"|---|---|\n"
|
||||
"| 1 | 2 |\n"
|
||||
"\n"
|
||||
"Second:\n"
|
||||
"| X | Y |\n"
|
||||
"|---|---|\n"
|
||||
"| 9 | 8 |\n"
|
||||
)
|
||||
out = adapter.format_message(text)
|
||||
# Two separate fenced blocks in the output
|
||||
assert out.count("```") == 4
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_escapes_chunk_indicator_for_markdownv2(adapter):
|
||||
adapter.MAX_MESSAGE_LENGTH = 80
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ class TestProviderRegistry:
|
|||
("huggingface", "Hugging Face", "api_key"),
|
||||
("zai", "Z.AI / GLM", "api_key"),
|
||||
("xai", "xAI", "api_key"),
|
||||
("nvidia", "NVIDIA NIM", "api_key"),
|
||||
("kimi-coding", "Kimi / Moonshot", "api_key"),
|
||||
("minimax", "MiniMax", "api_key"),
|
||||
("minimax-cn", "MiniMax (China)", "api_key"),
|
||||
|
|
@ -57,6 +58,12 @@ class TestProviderRegistry:
|
|||
assert pconfig.base_url_env_var == "XAI_BASE_URL"
|
||||
assert pconfig.inference_base_url == "https://api.x.ai/v1"
|
||||
|
||||
def test_nvidia_env_vars(self):
|
||||
pconfig = PROVIDER_REGISTRY["nvidia"]
|
||||
assert pconfig.api_key_env_vars == ("NVIDIA_API_KEY",)
|
||||
assert pconfig.base_url_env_var == "NVIDIA_BASE_URL"
|
||||
assert pconfig.inference_base_url == "https://integrate.api.nvidia.com/v1"
|
||||
|
||||
def test_copilot_env_vars(self):
|
||||
pconfig = PROVIDER_REGISTRY["copilot"]
|
||||
assert pconfig.api_key_env_vars == ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN")
|
||||
|
|
|
|||
|
|
@ -178,10 +178,6 @@ class TestGeminiContextLength:
|
|||
ctx = get_model_context_length("gemma-4-31b-it", provider="gemini")
|
||||
assert ctx == 256000
|
||||
|
||||
def test_gemma_4_26b_context(self):
|
||||
ctx = get_model_context_length("gemma-4-26b-it", provider="gemini")
|
||||
assert ctx == 256000
|
||||
|
||||
def test_gemini_3_context(self):
|
||||
ctx = get_model_context_length("gemini-3.1-pro-preview", provider="gemini")
|
||||
assert ctx == 1048576
|
||||
|
|
|
|||
|
|
@ -13,9 +13,29 @@ from unittest.mock import patch, MagicMock
|
|||
import pytest
|
||||
|
||||
import hermes_cli.gateway as gateway_cli
|
||||
import hermes_cli.main as cli_main
|
||||
from hermes_cli.main import cmd_update
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Skip the real-time sleeps inside cmd_update's restart-verification path
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _no_restart_verify_sleep(monkeypatch):
|
||||
"""hermes_cli/main.py uses time.sleep(3) after systemctl restart to
|
||||
verify the service survived. Tests mock subprocess.run — nothing
|
||||
actually restarts — so the 3s wait is dead time.
|
||||
|
||||
main.py does ``import time as _time`` at both module level (line 167)
|
||||
and inside functions (lines 3281, 4384, 4401). Patching the global
|
||||
``time.sleep`` affects only the duration of this test.
|
||||
"""
|
||||
import time as _real_time
|
||||
monkeypatch.setattr(_real_time, "sleep", lambda *_a, **_k: None)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -31,6 +31,31 @@ def _isolate_env(tmp_path, monkeypatch):
|
|||
monkeypatch.delenv("RETAINDB_PROJECT", raising=False)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _cap_retaindb_sleeps(monkeypatch):
|
||||
"""Cap production-code sleeps so background-thread tests run fast.
|
||||
|
||||
The retaindb ``_WriteQueue._flush_row`` does ``time.sleep(2)`` after
|
||||
errors. Across multiple tests that trigger the retry path, that adds
|
||||
up. Cap the module's bound ``time.sleep`` to 0.05s — tests don't care
|
||||
about the exact retry delay, only that it happens. The test file's
|
||||
own ``time.sleep`` stays real since it uses a different reference.
|
||||
"""
|
||||
try:
|
||||
from plugins.memory import retaindb as _retaindb
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
real_sleep = _retaindb.time.sleep
|
||||
|
||||
def _capped_sleep(seconds):
|
||||
return real_sleep(min(float(seconds), 0.05))
|
||||
|
||||
import types as _types
|
||||
fake_time = _types.SimpleNamespace(sleep=_capped_sleep, time=_retaindb.time.time)
|
||||
monkeypatch.setattr(_retaindb, "time", fake_time)
|
||||
|
||||
|
||||
# We need the repo root on sys.path so the plugin can import agent.memory_provider
|
||||
import sys
|
||||
_repo_root = str(Path(__file__).resolve().parents[2])
|
||||
|
|
@ -130,16 +155,18 @@ class TestWriteQueue:
|
|||
def test_enqueue_creates_row(self, tmp_path):
|
||||
q, client, db_path = self._make_queue(tmp_path)
|
||||
q.enqueue("user1", "sess1", [{"role": "user", "content": "hi"}])
|
||||
# Give the writer thread a moment to process
|
||||
time.sleep(1)
|
||||
# shutdown() blocks until the writer thread drains the queue — no need
|
||||
# to pre-sleep (the old 1s sleep was a just-in-case wait, but shutdown
|
||||
# does the right thing).
|
||||
q.shutdown()
|
||||
# If ingest succeeded, the row should be deleted
|
||||
client.ingest_session.assert_called_once()
|
||||
|
||||
def test_enqueue_persists_to_sqlite(self, tmp_path):
|
||||
client = MagicMock()
|
||||
# Make ingest hang so the row stays in SQLite
|
||||
client.ingest_session = MagicMock(side_effect=lambda *a, **kw: time.sleep(5))
|
||||
# Make ingest slow so the row is still in SQLite when we peek.
|
||||
# 0.5s is plenty — the test just needs the flush to still be in-flight.
|
||||
client.ingest_session = MagicMock(side_effect=lambda *a, **kw: time.sleep(0.5))
|
||||
db_path = tmp_path / "test_queue.db"
|
||||
q = _WriteQueue(client, db_path)
|
||||
q.enqueue("user1", "sess1", [{"role": "user", "content": "test"}])
|
||||
|
|
@ -154,8 +181,7 @@ class TestWriteQueue:
|
|||
def test_flush_deletes_row_on_success(self, tmp_path):
|
||||
q, client, db_path = self._make_queue(tmp_path)
|
||||
q.enqueue("user1", "sess1", [{"role": "user", "content": "hi"}])
|
||||
time.sleep(1)
|
||||
q.shutdown()
|
||||
q.shutdown() # blocks until drain
|
||||
# Row should be gone
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
rows = conn.execute("SELECT COUNT(*) FROM pending").fetchone()[0]
|
||||
|
|
@ -168,14 +194,20 @@ class TestWriteQueue:
|
|||
db_path = tmp_path / "test_queue.db"
|
||||
q = _WriteQueue(client, db_path)
|
||||
q.enqueue("user1", "sess1", [{"role": "user", "content": "hi"}])
|
||||
time.sleep(3) # Allow retry + sleep(2) in _flush_row
|
||||
# Poll for the error to be recorded (max 2s), instead of a fixed 3s wait.
|
||||
deadline = time.time() + 2.0
|
||||
last_error = None
|
||||
while time.time() < deadline:
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
row = conn.execute("SELECT last_error FROM pending").fetchone()
|
||||
conn.close()
|
||||
if row and row[0]:
|
||||
last_error = row[0]
|
||||
break
|
||||
time.sleep(0.05)
|
||||
q.shutdown()
|
||||
# Row should still exist with error recorded
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
row = conn.execute("SELECT last_error FROM pending").fetchone()
|
||||
conn.close()
|
||||
assert row is not None
|
||||
assert "API down" in row[0]
|
||||
assert last_error is not None
|
||||
assert "API down" in last_error
|
||||
|
||||
def test_thread_local_connection_reuse(self, tmp_path):
|
||||
q, _, _ = self._make_queue(tmp_path)
|
||||
|
|
@ -193,14 +225,27 @@ class TestWriteQueue:
|
|||
client1.ingest_session = MagicMock(side_effect=RuntimeError("fail"))
|
||||
q1 = _WriteQueue(client1, db_path)
|
||||
q1.enqueue("user1", "sess1", [{"role": "user", "content": "lost turn"}])
|
||||
time.sleep(3)
|
||||
# Wait until the error is recorded (poll with short interval).
|
||||
deadline = time.time() + 2.0
|
||||
while time.time() < deadline:
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
row = conn.execute("SELECT last_error FROM pending").fetchone()
|
||||
conn.close()
|
||||
if row and row[0]:
|
||||
break
|
||||
time.sleep(0.05)
|
||||
q1.shutdown()
|
||||
|
||||
# Now create a new queue — it should replay the pending rows
|
||||
client2 = MagicMock()
|
||||
client2.ingest_session = MagicMock(return_value={"status": "ok"})
|
||||
q2 = _WriteQueue(client2, db_path)
|
||||
time.sleep(2)
|
||||
# Poll for the replay to happen.
|
||||
deadline = time.time() + 2.0
|
||||
while time.time() < deadline:
|
||||
if client2.ingest_session.called:
|
||||
break
|
||||
time.sleep(0.05)
|
||||
q2.shutdown()
|
||||
|
||||
# The replayed row should have been ingested via client2
|
||||
|
|
|
|||
34
tests/run_agent/conftest.py
Normal file
34
tests/run_agent/conftest.py
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
"""Fast-path fixtures shared across tests/run_agent/.
|
||||
|
||||
Many tests in this directory exercise the retry/backoff paths in the
|
||||
agent loop. Production code uses ``jittered_backoff(base_delay=5.0)``
|
||||
with a ``while time.time() < sleep_end`` loop — a single retry test
|
||||
spends 5+ seconds of real wall-clock time on backoff waits.
|
||||
|
||||
Mocking ``jittered_backoff`` to return 0.0 collapses the while-loop
|
||||
to a no-op (``time.time() < time.time() + 0`` is false immediately),
|
||||
which handles the most common case without touching ``time.sleep``.
|
||||
|
||||
We deliberately DO NOT mock ``time.sleep`` here — some tests
|
||||
(test_interrupt_propagation, test_primary_runtime_restore, etc.) use
|
||||
the real ``time.sleep`` for threading coordination or assert that it
|
||||
was called with specific values. Tests that want to additionally
|
||||
fast-path direct ``time.sleep(N)`` calls in production code should
|
||||
monkeypatch ``run_agent.time.sleep`` locally (see
|
||||
``test_anthropic_error_handling.py`` for the pattern).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _fast_retry_backoff(monkeypatch):
|
||||
"""Short-circuit retry backoff for all tests in this directory."""
|
||||
try:
|
||||
import run_agent
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)
|
||||
|
|
@ -19,6 +19,24 @@ import pytest
|
|||
|
||||
from agent.context_compressor import SUMMARY_PREFIX
|
||||
from run_agent import AIAgent
|
||||
import run_agent
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fast backoff for compression retry tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _no_compression_sleep(monkeypatch):
|
||||
"""Short-circuit the 2s time.sleep between compression retries.
|
||||
|
||||
Production code has ``time.sleep(2)`` in multiple places after a 413/context
|
||||
compression, for rate-limit smoothing. Tests assert behavior, not timing.
|
||||
"""
|
||||
import time as _time
|
||||
monkeypatch.setattr(_time, "sleep", lambda *_a, **_k: None)
|
||||
monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -27,6 +27,39 @@ from gateway.config import Platform
|
|||
from gateway.session import SessionSource
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fast backoff for tests that exercise the retry loop
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _no_backoff_wait(monkeypatch):
|
||||
"""Short-circuit retry backoff so tests don't block on real wall-clock waits.
|
||||
|
||||
The production code uses jittered_backoff() with a 5s base delay plus a
|
||||
tight time.sleep(0.2) loop. Without this patch, each 429/500/529 retry
|
||||
test burns ~10s of real time on CI — across six tests that's ~60s for
|
||||
behavior we're not asserting against timing.
|
||||
|
||||
Tests assert retry counts and final results, never wait durations.
|
||||
"""
|
||||
import asyncio as _asyncio
|
||||
import time as _time
|
||||
|
||||
monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)
|
||||
monkeypatch.setattr(_time, "sleep", lambda *_a, **_k: None)
|
||||
|
||||
# Also fast-path asyncio.sleep — the gateway's _run_agent path has
|
||||
# several await asyncio.sleep(...) calls that add real wall-clock time.
|
||||
_real_asyncio_sleep = _asyncio.sleep
|
||||
|
||||
async def _fast_sleep(delay=0, *args, **kwargs):
|
||||
# Yield to the event loop but skip the actual delay.
|
||||
await _real_asyncio_sleep(0)
|
||||
|
||||
monkeypatch.setattr(_asyncio, "sleep", _fast_sleep)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -13,6 +13,24 @@ from unittest.mock import MagicMock, patch, call
|
|||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _mock_runtime_provider(monkeypatch):
|
||||
"""run_job calls resolve_runtime_provider which can try real network
|
||||
auto-detection (~4s of socket timeouts in hermetic CI). Mock it out
|
||||
since these tests don't care about provider resolution — the agent
|
||||
is mocked too."""
|
||||
import hermes_cli.runtime_provider as rp
|
||||
def _fake_resolve(*args, **kwargs):
|
||||
return {
|
||||
"provider": "openrouter",
|
||||
"api_key": "test-key",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"model": "test/model",
|
||||
"api_mode": "chat_completions",
|
||||
}
|
||||
monkeypatch.setattr(rp, "resolve_runtime_provider", _fake_resolve)
|
||||
|
||||
|
||||
class TestCronJobCleanup:
|
||||
"""cron/scheduler.py — end_session + close in the finally block."""
|
||||
|
||||
|
|
|
|||
|
|
@ -11,6 +11,16 @@ from unittest.mock import MagicMock, patch
|
|||
import pytest
|
||||
|
||||
from run_agent import AIAgent
|
||||
import run_agent
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _no_fallback_wait(monkeypatch):
|
||||
"""Short-circuit time.sleep in fallback/recovery paths so tests don't
|
||||
block on the ``min(3 + retry_count, 8)`` wait before a primary retry."""
|
||||
import time as _time
|
||||
monkeypatch.setattr(_time, "sleep", lambda *_a, **_k: None)
|
||||
monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)
|
||||
|
||||
|
||||
def _make_tool_defs(*names: str) -> list:
|
||||
|
|
|
|||
|
|
@ -12,6 +12,15 @@ sys.modules.setdefault("fal_client", types.SimpleNamespace())
|
|||
import run_agent
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _no_codex_backoff(monkeypatch):
|
||||
"""Short-circuit retry backoff so Codex retry tests don't block on real
|
||||
wall-clock waits (5s jittered_backoff base delay + tight time.sleep loop)."""
|
||||
import time as _time
|
||||
monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)
|
||||
monkeypatch.setattr(_time, "sleep", lambda *_a, **_k: None)
|
||||
|
||||
|
||||
def _patch_agent_bootstrap(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
run_agent,
|
||||
|
|
|
|||
|
|
@ -159,18 +159,34 @@ class TestCodeExecutionTZ:
|
|||
return _json.dumps({"error": f"unexpected tool call: {function_name}"})
|
||||
|
||||
def test_tz_injected_when_configured(self):
|
||||
"""When HERMES_TIMEZONE is set, child process sees TZ env var."""
|
||||
"""When HERMES_TIMEZONE is set, child process sees TZ env var.
|
||||
|
||||
Verified alongside leak-prevention + empty-TZ handling in one
|
||||
subprocess call so we don't pay 3x the subprocess startup cost
|
||||
(each execute_code spawns a real Python subprocess ~3s).
|
||||
"""
|
||||
import json as _json
|
||||
os.environ["HERMES_TIMEZONE"] = "Asia/Kolkata"
|
||||
|
||||
# One subprocess, three things checked:
|
||||
# 1) TZ is injected as "Asia/Kolkata"
|
||||
# 2) HERMES_TIMEZONE itself does NOT leak into the child env
|
||||
probe = (
|
||||
'import os; '
|
||||
'print("TZ=" + os.environ.get("TZ", "NOT_SET")); '
|
||||
'print("HERMES_TIMEZONE=" + os.environ.get("HERMES_TIMEZONE", "NOT_SET"))'
|
||||
)
|
||||
with patch("model_tools.handle_function_call", side_effect=self._mock_handle):
|
||||
result = _json.loads(self._execute_code(
|
||||
code='import os; print(os.environ.get("TZ", "NOT_SET"))',
|
||||
task_id="tz-test",
|
||||
code=probe,
|
||||
task_id="tz-combined-test",
|
||||
enabled_tools=[],
|
||||
))
|
||||
assert result["status"] == "success"
|
||||
assert "Asia/Kolkata" in result["output"]
|
||||
assert "TZ=Asia/Kolkata" in result["output"]
|
||||
assert "HERMES_TIMEZONE=NOT_SET" in result["output"], (
|
||||
"HERMES_TIMEZONE should not leak into child env (only TZ)"
|
||||
)
|
||||
|
||||
def test_tz_not_injected_when_empty(self):
|
||||
"""When HERMES_TIMEZONE is not set, child process has no TZ."""
|
||||
|
|
@ -186,20 +202,6 @@ class TestCodeExecutionTZ:
|
|||
assert result["status"] == "success"
|
||||
assert "NOT_SET" in result["output"]
|
||||
|
||||
def test_hermes_timezone_not_leaked_to_child(self):
|
||||
"""HERMES_TIMEZONE itself must NOT appear in child env (only TZ)."""
|
||||
import json as _json
|
||||
os.environ["HERMES_TIMEZONE"] = "Asia/Kolkata"
|
||||
|
||||
with patch("model_tools.handle_function_call", side_effect=self._mock_handle):
|
||||
result = _json.loads(self._execute_code(
|
||||
code='import os; print(os.environ.get("HERMES_TIMEZONE", "NOT_SET"))',
|
||||
task_id="tz-leak-test",
|
||||
enabled_tools=[],
|
||||
))
|
||||
assert result["status"] == "success"
|
||||
assert "NOT_SET" in result["output"]
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Cron timezone-aware scheduling
|
||||
|
|
|
|||
|
|
@ -86,6 +86,7 @@ class TestProviderEnvBlocklist:
|
|||
"MINIMAX_API_KEY": "mm-key",
|
||||
"MINIMAX_CN_API_KEY": "mmcn-key",
|
||||
"DEEPSEEK_API_KEY": "deepseek-key",
|
||||
"NVIDIA_API_KEY": "nvidia-key",
|
||||
}
|
||||
result_env = _run_with_env(extra_os_env=registry_vars)
|
||||
|
||||
|
|
|
|||
|
|
@ -61,6 +61,7 @@ hermes setup # Or configure everything at once
|
|||
| **OpenCode Zen** | Pay-as-you-go access to curated models | Set `OPENCODE_ZEN_API_KEY` |
|
||||
| **OpenCode Go** | $10/month subscription for open models | Set `OPENCODE_GO_API_KEY` |
|
||||
| **DeepSeek** | Direct DeepSeek API access | Set `DEEPSEEK_API_KEY` |
|
||||
| **NVIDIA NIM** | Nemotron models via build.nvidia.com or local NIM | Set `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) |
|
||||
| **GitHub Copilot** | GitHub Copilot subscription (GPT-5.x, Claude, Gemini, etc.) | OAuth via `hermes model`, or `COPILOT_GITHUB_TOKEN` / `GH_TOKEN` |
|
||||
| **GitHub Copilot ACP** | Copilot ACP agent backend (spawns local `copilot` CLI) | `hermes model` (requires `copilot` CLI + `copilot login`) |
|
||||
| **Vercel AI Gateway** | Vercel AI Gateway routing | Set `AI_GATEWAY_API_KEY` |
|
||||
|
|
|
|||
|
|
@ -295,6 +295,30 @@ When using xAI as a provider (any base URL containing `x.ai`), Hermes automatica
|
|||
|
||||
No configuration is needed — caching activates automatically when an xAI endpoint is detected and a session ID is available. This reduces latency and cost for multi-turn conversations.
|
||||
|
||||
### NVIDIA NIM
|
||||
|
||||
Nemotron and other open source models via [build.nvidia.com](https://build.nvidia.com) (free API key) or a local NIM endpoint.
|
||||
|
||||
```bash
|
||||
# Cloud (build.nvidia.com)
|
||||
hermes chat --provider nvidia --model nvidia/nemotron-3-super-120b-a12b
|
||||
# Requires: NVIDIA_API_KEY in ~/.hermes/.env
|
||||
|
||||
# Local NIM endpoint — override base URL
|
||||
NVIDIA_BASE_URL=http://localhost:8000/v1 hermes chat --provider nvidia --model nvidia/nemotron-3-super-120b-a12b
|
||||
```
|
||||
|
||||
Or set it permanently in `config.yaml`:
|
||||
```yaml
|
||||
model:
|
||||
provider: "nvidia"
|
||||
default: "nvidia/nemotron-3-super-120b-a12b"
|
||||
```
|
||||
|
||||
:::tip Local NIM
|
||||
For on-prem deployments (DGX Spark, local GPU), set `NVIDIA_BASE_URL=http://localhost:8000/v1`. NIM exposes the same OpenAI-compatible chat completions API as build.nvidia.com, so switching between cloud and local is a one-line env-var change.
|
||||
:::
|
||||
|
||||
### Hugging Face Inference Providers
|
||||
|
||||
[Hugging Face Inference Providers](https://huggingface.co/docs/inference-providers) routes to 20+ open models through a unified OpenAI-compatible endpoint (`router.huggingface.co/v1`). Requests are automatically routed to the fastest available backend (Groq, Together, SambaNova, etc.) with automatic failover.
|
||||
|
|
|
|||
|
|
@ -290,7 +290,7 @@ For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETI
|
|||
| `QQ_ALLOWED_USERS` | Comma-separated QQ user openIDs allowed to message the bot |
|
||||
| `QQ_GROUP_ALLOWED_USERS` | Comma-separated QQ group IDs for group @-message access |
|
||||
| `QQ_ALLOW_ALL_USERS` | Allow all users (`true`/`false`, overrides `QQ_ALLOWED_USERS`) |
|
||||
| `QQ_HOME_CHANNEL` | QQ user/group openID for cron delivery and notifications |
|
||||
| `QQBOT_HOME_CHANNEL` | QQ user/group openID for cron delivery and notifications |
|
||||
| `MATTERMOST_URL` | Mattermost server URL (e.g. `https://mm.example.com`) |
|
||||
| `MATTERMOST_TOKEN` | Bot token or personal access token for Mattermost |
|
||||
| `MATTERMOST_ALLOWED_USERS` | Comma-separated Mattermost user IDs allowed to message the bot |
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ Both `provider` and `model` are **required**. If either is missing, the fallback
|
|||
| MiniMax | `minimax` | `MINIMAX_API_KEY` |
|
||||
| MiniMax (China) | `minimax-cn` | `MINIMAX_CN_API_KEY` |
|
||||
| DeepSeek | `deepseek` | `DEEPSEEK_API_KEY` |
|
||||
| NVIDIA NIM | `nvidia` | `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) |
|
||||
| OpenCode Zen | `opencode-zen` | `OPENCODE_ZEN_API_KEY` |
|
||||
| OpenCode Go | `opencode-go` | `OPENCODE_GO_API_KEY` |
|
||||
| Kilo Code | `kilocode` | `KILOCODE_API_KEY` |
|
||||
|
|
|
|||
|
|
@ -48,8 +48,8 @@ QQ_CLIENT_SECRET=your-app-secret
|
|||
|---|---|---|
|
||||
| `QQ_APP_ID` | QQ Bot App ID (required) | — |
|
||||
| `QQ_CLIENT_SECRET` | QQ Bot App Secret (required) | — |
|
||||
| `QQ_HOME_CHANNEL` | OpenID for cron/notification delivery | — |
|
||||
| `QQ_HOME_CHANNEL_NAME` | Display name for home channel | `Home` |
|
||||
| `QQBOT_HOME_CHANNEL` | OpenID for cron/notification delivery | — |
|
||||
| `QQBOT_HOME_CHANNEL_NAME` | Display name for home channel | `Home` |
|
||||
| `QQ_ALLOWED_USERS` | Comma-separated user OpenIDs for DM access | open (all users) |
|
||||
| `QQ_ALLOW_ALL_USERS` | Set to `true` to allow all DMs | `false` |
|
||||
| `QQ_MARKDOWN_SUPPORT` | Enable QQ markdown (msg_type 2) | `true` |
|
||||
|
|
@ -113,7 +113,7 @@ This usually means:
|
|||
- Verify the bot's **intents** are enabled at q.qq.com
|
||||
- Check `QQ_ALLOWED_USERS` if DM access is restricted
|
||||
- For group messages, ensure the bot is **@mentioned** (group policy may require allowlisting)
|
||||
- Check `QQ_HOME_CHANNEL` for cron/notification delivery
|
||||
- Check `QQBOT_HOME_CHANNEL` for cron/notification delivery
|
||||
|
||||
### Connection errors
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue