diff --git a/README.md b/README.md
index ab158fc2bd..622910b3a9 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@
**The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.
-Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
+Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
| A real terminal interface | Full TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output. |
diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 4f17461662..4860b16acd 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -94,6 +94,17 @@ def _normalize_aux_provider(provider: Optional[str]) -> str:
return "custom"
return _PROVIDER_ALIASES.get(normalized, normalized)
+
+_FIXED_TEMPERATURE_MODELS: Dict[str, float] = {
+ "kimi-for-coding": 0.6,
+}
+
+
+def _fixed_temperature_for_model(model: Optional[str]) -> Optional[float]:
+ """Return a required temperature override for models with strict contracts."""
+ normalized = (model or "").strip().lower()
+ return _FIXED_TEMPERATURE_MODELS.get(normalized)
+
# Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
"gemini": "gemini-3-flash-preview",
@@ -2293,6 +2304,10 @@ def _build_call_kwargs(
"timeout": timeout,
}
+ fixed_temperature = _fixed_temperature_for_model(model)
+ if fixed_temperature is not None:
+ temperature = fixed_temperature
+
# Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
# drop here so auxiliary callers that hardcode temperature (e.g. 0.3 on
# flush_memories, 0 on structured-JSON extraction) don't 400 the moment
diff --git a/agent/gemini_cloudcode_adapter.py b/agent/gemini_cloudcode_adapter.py
index 36ba288eb4..ed687bffd6 100644
--- a/agent/gemini_cloudcode_adapter.py
+++ b/agent/gemini_cloudcode_adapter.py
@@ -747,18 +747,149 @@ class GeminiCloudCodeClient:
def _gemini_http_error(response: httpx.Response) -> CodeAssistError:
+ """Translate an httpx response into a CodeAssistError with rich metadata.
+
+ Parses Google's error envelope (``{"error": {"code", "message", "status",
+ "details": [...]}}``) so the agent's error classifier can reason about
+ the failure — ``status_code`` enables the rate_limit / auth classification
+ paths, and ``response`` lets the main loop honor ``Retry-After`` just
+ like it does for OpenAI SDK exceptions.
+
+ Also lifts a few recognizable Google conditions into human-readable
+ messages so the user sees something better than a 500-char JSON dump:
+
+ MODEL_CAPACITY_EXHAUSTED → "Gemini model capacity exhausted for
+ . This is a Google-side throttle..."
+ RESOURCE_EXHAUSTED w/o reason → quota-style message
+ 404 → "Model not found at cloudcode-pa..."
+ """
status = response.status_code
+
+ # Parse the body once, surviving any weird encodings.
+ body_text = ""
+ body_json: Dict[str, Any] = {}
try:
- body = response.text[:500]
+ body_text = response.text
except Exception:
- body = ""
- # Let run_agent's retry logic see auth errors as rotatable via `api_key`
+ body_text = ""
+ if body_text:
+ try:
+ parsed = json.loads(body_text)
+ if isinstance(parsed, dict):
+ body_json = parsed
+ except (ValueError, TypeError):
+ body_json = {}
+
+ # Dig into Google's error envelope. Shape is:
+ # {"error": {"code": 429, "message": "...", "status": "RESOURCE_EXHAUSTED",
+ # "details": [{"@type": ".../ErrorInfo", "reason": "MODEL_CAPACITY_EXHAUSTED",
+ # "metadata": {...}},
+ # {"@type": ".../RetryInfo", "retryDelay": "30s"}]}}
+ err_obj = body_json.get("error") if isinstance(body_json, dict) else None
+ if not isinstance(err_obj, dict):
+ err_obj = {}
+ err_status = str(err_obj.get("status") or "").strip()
+ err_message = str(err_obj.get("message") or "").strip()
+ err_details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else []
+
+ # Extract google.rpc.ErrorInfo reason + metadata. There may be more
+ # than one ErrorInfo (rare), so we pick the first one with a reason.
+ error_reason = ""
+ error_metadata: Dict[str, Any] = {}
+ retry_delay_seconds: Optional[float] = None
+ for detail in err_details_list:
+ if not isinstance(detail, dict):
+ continue
+ type_url = str(detail.get("@type") or "")
+ if not error_reason and type_url.endswith("/google.rpc.ErrorInfo"):
+ reason = detail.get("reason")
+ if isinstance(reason, str) and reason:
+ error_reason = reason
+ md = detail.get("metadata")
+ if isinstance(md, dict):
+ error_metadata = md
+ elif retry_delay_seconds is None and type_url.endswith("/google.rpc.RetryInfo"):
+ # retryDelay is a google.protobuf.Duration string like "30s" or "1.5s".
+ delay_raw = detail.get("retryDelay")
+ if isinstance(delay_raw, str) and delay_raw.endswith("s"):
+ try:
+ retry_delay_seconds = float(delay_raw[:-1])
+ except ValueError:
+ pass
+ elif isinstance(delay_raw, (int, float)):
+ retry_delay_seconds = float(delay_raw)
+
+ # Fall back to the Retry-After header if the body didn't include RetryInfo.
+ if retry_delay_seconds is None:
+ try:
+ header_val = response.headers.get("Retry-After") or response.headers.get("retry-after")
+ except Exception:
+ header_val = None
+ if header_val:
+ try:
+ retry_delay_seconds = float(header_val)
+ except (TypeError, ValueError):
+ retry_delay_seconds = None
+
+ # Classify the error code. ``code_assist_rate_limited`` stays the default
+ # for 429s; a more specific reason tag helps downstream callers (e.g. tests,
+ # logs) without changing the rate_limit classification path.
code = f"code_assist_http_{status}"
if status == 401:
code = "code_assist_unauthorized"
elif status == 429:
code = "code_assist_rate_limited"
+ if error_reason == "MODEL_CAPACITY_EXHAUSTED":
+ code = "code_assist_capacity_exhausted"
+
+ # Build a human-readable message. Keep the status + a raw-body tail for
+ # debugging, but lead with a friendlier summary when we recognize the
+ # Google signal.
+ model_hint = ""
+ if isinstance(error_metadata, dict):
+ model_hint = str(error_metadata.get("model") or error_metadata.get("modelId") or "").strip()
+
+ if status == 429 and error_reason == "MODEL_CAPACITY_EXHAUSTED":
+ target = model_hint or "this Gemini model"
+ message = (
+ f"Gemini capacity exhausted for {target} (Google-side throttle, "
+ f"not a Hermes issue). Try a different Gemini model or set a "
+ f"fallback_providers entry to a non-Gemini provider."
+ )
+ if retry_delay_seconds is not None:
+ message += f" Google suggests retrying in {retry_delay_seconds:g}s."
+ elif status == 429 and err_status == "RESOURCE_EXHAUSTED":
+ message = (
+ f"Gemini quota exhausted ({err_message or 'RESOURCE_EXHAUSTED'}). "
+ f"Check /gquota for remaining daily requests."
+ )
+ if retry_delay_seconds is not None:
+ message += f" Retry suggested in {retry_delay_seconds:g}s."
+ elif status == 404:
+ # Google returns 404 when a model has been retired or renamed.
+ target = model_hint or (err_message or "model")
+ message = (
+ f"Code Assist 404: {target} is not available at "
+ f"cloudcode-pa.googleapis.com. It may have been renamed or "
+ f"retired. Check hermes_cli/models.py for the current list."
+ )
+ elif err_message:
+ # Generic fallback with the parsed message.
+ message = f"Code Assist HTTP {status} ({err_status or 'error'}): {err_message}"
+ else:
+ # Last-ditch fallback — raw body snippet.
+ message = f"Code Assist returned HTTP {status}: {body_text[:500]}"
+
return CodeAssistError(
- f"Code Assist returned HTTP {status}: {body}",
+ message,
code=code,
+ status_code=status,
+ response=response,
+ retry_after=retry_delay_seconds,
+ details={
+ "status": err_status,
+ "reason": error_reason,
+ "metadata": error_metadata,
+ "message": err_message,
+ },
)
diff --git a/agent/google_code_assist.py b/agent/google_code_assist.py
index 1acf3ea135..eba09b8f46 100644
--- a/agent/google_code_assist.py
+++ b/agent/google_code_assist.py
@@ -68,9 +68,45 @@ _ONBOARDING_POLL_INTERVAL_SECONDS = 5.0
class CodeAssistError(RuntimeError):
- def __init__(self, message: str, *, code: str = "code_assist_error") -> None:
+ """Exception raised by the Code Assist (``cloudcode-pa``) integration.
+
+ Carries HTTP status / response / retry-after metadata so the agent's
+ ``error_classifier._extract_status_code`` and the main loop's Retry-After
+ handling (which walks ``error.response.headers``) pick up the right
+ signals. Without these, 429s from the OAuth path look like opaque
+ ``RuntimeError`` and skip the rate-limit path.
+ """
+
+ def __init__(
+ self,
+ message: str,
+ *,
+ code: str = "code_assist_error",
+ status_code: Optional[int] = None,
+ response: Any = None,
+ retry_after: Optional[float] = None,
+ details: Optional[Dict[str, Any]] = None,
+ ) -> None:
super().__init__(message)
self.code = code
+ # ``status_code`` is picked up by ``agent.error_classifier._extract_status_code``
+ # so a 429 from Code Assist classifies as FailoverReason.rate_limit and
+ # triggers the main loop's fallback_providers chain the same way SDK
+ # errors do.
+ self.status_code = status_code
+ # ``response`` is the underlying ``httpx.Response`` (or a shim with a
+ # ``.headers`` mapping and ``.json()`` method). The main loop reads
+ # ``error.response.headers["Retry-After"]`` to honor Google's retry
+ # hints when the backend throttles us.
+ self.response = response
+ # Parsed ``Retry-After`` seconds (kept separately for convenience —
+ # Google returns retry hints in both the header and the error body's
+ # ``google.rpc.RetryInfo`` details, and we pick whichever we found).
+ self.retry_after = retry_after
+ # Parsed structured error details from the Google error envelope
+ # (e.g. ``{"reason": "MODEL_CAPACITY_EXHAUSTED", "status": "RESOURCE_EXHAUSTED"}``).
+ # Useful for logging and for tests that want to assert on specifics.
+ self.details = details or {}
class ProjectIdRequiredError(CodeAssistError):
diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index 089fd132ac..81bac6c92f 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -38,6 +38,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
"mimo", "xiaomi-mimo",
"arcee-ai", "arceeai",
"xai", "x-ai", "x.ai", "grok",
+ "nvidia", "nim", "nvidia-nim", "nemotron",
"qwen-portal",
})
@@ -124,7 +125,6 @@ DEFAULT_CONTEXT_LENGTHS = {
"gemini": 1048576,
# Gemma (open models served via AI Studio)
"gemma-4-31b": 256000,
- "gemma-4-26b": 256000,
"gemma-3": 131072,
"gemma": 8192, # fallback for older gemma models
# DeepSeek
@@ -158,6 +158,8 @@ DEFAULT_CONTEXT_LENGTHS = {
"grok": 131072, # catch-all (grok-beta, unknown grok-*)
# Kimi
"kimi": 262144,
+ # Nemotron — NVIDIA's open-weights series (128K context across all sizes)
+ "nemotron": 131072,
# Arcee
"trinity": 262144,
# OpenRouter
@@ -240,6 +242,7 @@ _URL_TO_PROVIDER: Dict[str, str] = {
"api.fireworks.ai": "fireworks",
"opencode.ai": "opencode-go",
"api.x.ai": "xai",
+ "integrate.api.nvidia.com": "nvidia",
"api.xiaomimimo.com": "xiaomi",
"xiaomimimo.com": "xiaomi",
"ollama.com": "ollama-cloud",
diff --git a/cli-config.yaml.example b/cli-config.yaml.example
index 8c0484abd0..20b54b7887 100644
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -24,6 +24,7 @@ model:
# "minimax" - MiniMax global (requires: MINIMAX_API_KEY)
# "minimax-cn" - MiniMax China (requires: MINIMAX_CN_API_KEY)
# "huggingface" - Hugging Face Inference (requires: HF_TOKEN)
+ # "nvidia" - NVIDIA NIM / build.nvidia.com (requires: NVIDIA_API_KEY)
# "xiaomi" - Xiaomi MiMo (requires: XIAOMI_API_KEY)
# "arcee" - Arcee AI Trinity models (requires: ARCEEAI_API_KEY)
# "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings)
diff --git a/cron/scheduler.py b/cron/scheduler.py
index 28c9057137..db5991c6f0 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -65,7 +65,15 @@ _HOME_TARGET_ENV_VARS = {
"wecom": "WECOM_HOME_CHANNEL",
"weixin": "WEIXIN_HOME_CHANNEL",
"bluebubbles": "BLUEBUBBLES_HOME_CHANNEL",
- "qqbot": "QQ_HOME_CHANNEL",
+ "qqbot": "QQBOT_HOME_CHANNEL",
+}
+
+# Legacy env var names kept for back-compat. Each entry is the current
+# primary env var → the previous name. _get_home_target_chat_id falls
+# back to the legacy name if the primary is unset, so users who set the
+# old name before the rename keep working until they migrate.
+_LEGACY_HOME_TARGET_ENV_VARS = {
+ "QQBOT_HOME_CHANNEL": "QQ_HOME_CHANNEL",
}
from cron.jobs import get_due_jobs, mark_job_run, save_job_output, advance_next_run
@@ -100,7 +108,12 @@ def _get_home_target_chat_id(platform_name: str) -> str:
env_var = _HOME_TARGET_ENV_VARS.get(platform_name.lower())
if not env_var:
return ""
- return os.getenv(env_var, "")
+ value = os.getenv(env_var, "")
+ if not value:
+ legacy = _LEGACY_HOME_TARGET_ENV_VARS.get(env_var)
+ if legacy:
+ value = os.getenv(legacy, "")
+ return value
def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[dict]:
diff --git a/gateway/config.py b/gateway/config.py
index 1258e08990..2d74073234 100644
--- a/gateway/config.py
+++ b/gateway/config.py
@@ -258,6 +258,13 @@ class GatewayConfig:
# Streaming configuration
streaming: StreamingConfig = field(default_factory=StreamingConfig)
+ # Session store pruning: drop SessionEntry records older than this many
+ # days from the in-memory dict and sessions.json. Keeps the store from
+ # growing unbounded in gateways serving many chats/threads/users over
+ # months. Pruning is invisible to users — if they resume, they get a
+ # fresh session exactly as if the reset policy had fired. 0 = disabled.
+ session_store_max_age_days: int = 90
+
def get_connected_platforms(self) -> List[Platform]:
"""Return list of platforms that are enabled and configured."""
connected = []
@@ -365,6 +372,7 @@ class GatewayConfig:
"thread_sessions_per_user": self.thread_sessions_per_user,
"unauthorized_dm_behavior": self.unauthorized_dm_behavior,
"streaming": self.streaming.to_dict(),
+ "session_store_max_age_days": self.session_store_max_age_days,
}
@classmethod
@@ -412,6 +420,13 @@ class GatewayConfig:
"pair",
)
+ try:
+ session_store_max_age_days = int(data.get("session_store_max_age_days", 90))
+ if session_store_max_age_days < 0:
+ session_store_max_age_days = 0
+ except (TypeError, ValueError):
+ session_store_max_age_days = 90
+
return cls(
platforms=platforms,
default_reset_policy=default_policy,
@@ -426,6 +441,7 @@ class GatewayConfig:
thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False),
unauthorized_dm_behavior=unauthorized_dm_behavior,
streaming=StreamingConfig.from_dict(data.get("streaming", {})),
+ session_store_max_age_days=session_store_max_age_days,
)
def get_unauthorized_dm_behavior(self, platform: Optional[Platform] = None) -> str:
@@ -1213,12 +1229,24 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
qq_group_allowed = os.getenv("QQ_GROUP_ALLOWED_USERS", "").strip()
if qq_group_allowed:
extra["group_allow_from"] = qq_group_allowed
- qq_home = os.getenv("QQ_HOME_CHANNEL", "").strip()
+ qq_home = os.getenv("QQBOT_HOME_CHANNEL", "").strip()
+ qq_home_name_env = "QQBOT_HOME_CHANNEL_NAME"
+ if not qq_home:
+ # Back-compat: accept the pre-rename name and log a one-time warning.
+ legacy_home = os.getenv("QQ_HOME_CHANNEL", "").strip()
+ if legacy_home:
+ qq_home = legacy_home
+ qq_home_name_env = "QQ_HOME_CHANNEL_NAME"
+ import logging
+ logging.getLogger(__name__).warning(
+ "QQ_HOME_CHANNEL is deprecated; rename to QQBOT_HOME_CHANNEL "
+ "in your .env for consistency with the platform key."
+ )
if qq_home:
config.platforms[Platform.QQBOT].home_channel = HomeChannel(
platform=Platform.QQBOT,
chat_id=qq_home,
- name=os.getenv("QQ_HOME_CHANNEL_NAME", "Home"),
+ name=os.getenv("QQBOT_HOME_CHANNEL_NAME") or os.getenv(qq_home_name_env, "Home"),
)
# Session settings
diff --git a/gateway/platforms/qqbot/__init__.py b/gateway/platforms/qqbot/__init__.py
new file mode 100644
index 0000000000..7119dd979e
--- /dev/null
+++ b/gateway/platforms/qqbot/__init__.py
@@ -0,0 +1,57 @@
+"""
+QQBot platform package.
+
+Re-exports the main adapter symbols from ``adapter.py`` (the original
+``qqbot.py``) so that **all existing import paths remain unchanged**::
+
+ from gateway.platforms.qqbot import QQAdapter # works
+ from gateway.platforms.qqbot import check_qq_requirements # works
+
+New modules:
+ - ``constants`` — shared constants (API URLs, timeouts, message types)
+ - ``utils`` — User-Agent builder, config helpers
+ - ``crypto`` — AES-256-GCM key generation and decryption
+ - ``onboard`` — QR-code scan-to-configure flow
+"""
+
+# -- Adapter (original qqbot.py) ------------------------------------------
+from .adapter import ( # noqa: F401
+ QQAdapter,
+ QQCloseError,
+ check_qq_requirements,
+ _coerce_list,
+ _ssrf_redirect_guard,
+)
+
+# -- Onboard (QR-code scan-to-configure) -----------------------------------
+from .onboard import ( # noqa: F401
+ BindStatus,
+ create_bind_task,
+ poll_bind_result,
+ build_connect_url,
+)
+from .crypto import decrypt_secret, generate_bind_key # noqa: F401
+
+# -- Utils -----------------------------------------------------------------
+from .utils import build_user_agent, get_api_headers, coerce_list # noqa: F401
+
+__all__ = [
+ # adapter
+ "QQAdapter",
+ "QQCloseError",
+ "check_qq_requirements",
+ "_coerce_list",
+ "_ssrf_redirect_guard",
+ # onboard
+ "BindStatus",
+ "create_bind_task",
+ "poll_bind_result",
+ "build_connect_url",
+ # crypto
+ "decrypt_secret",
+ "generate_bind_key",
+ # utils
+ "build_user_agent",
+ "get_api_headers",
+ "coerce_list",
+]
diff --git a/gateway/platforms/qqbot.py b/gateway/platforms/qqbot/adapter.py
similarity index 72%
rename from gateway/platforms/qqbot.py
rename to gateway/platforms/qqbot/adapter.py
index 32252be12b..ced7442711 100644
--- a/gateway/platforms/qqbot.py
+++ b/gateway/platforms/qqbot/adapter.py
@@ -46,6 +46,7 @@ from urllib.parse import urlparse
try:
import aiohttp
+
AIOHTTP_AVAILABLE = True
except ImportError:
AIOHTTP_AVAILABLE = False
@@ -53,6 +54,7 @@ except ImportError:
try:
import httpx
+
HTTPX_AVAILABLE = True
except ImportError:
HTTPX_AVAILABLE = False
@@ -83,39 +85,40 @@ class QQCloseError(Exception):
self.code = int(code) if code else None
self.reason = str(reason) if reason else ""
super().__init__(f"WebSocket closed (code={self.code}, reason={self.reason})")
+
+
# ---------------------------------------------------------------------------
-# Constants
+# Constants — imported from the shared constants module.
# ---------------------------------------------------------------------------
-API_BASE = "https://api.sgroup.qq.com"
-TOKEN_URL = "https://bots.qq.com/app/getAppAccessToken"
-GATEWAY_URL_PATH = "/gateway"
-
-DEFAULT_API_TIMEOUT = 30.0
-FILE_UPLOAD_TIMEOUT = 120.0
-CONNECT_TIMEOUT_SECONDS = 20.0
-
-RECONNECT_BACKOFF = [2, 5, 10, 30, 60]
-MAX_RECONNECT_ATTEMPTS = 100
-RATE_LIMIT_DELAY = 60 # seconds
-QUICK_DISCONNECT_THRESHOLD = 5.0 # seconds
-MAX_QUICK_DISCONNECT_COUNT = 3
-
-MAX_MESSAGE_LENGTH = 4000
-DEDUP_WINDOW_SECONDS = 300
-DEDUP_MAX_SIZE = 1000
-
-# QQ Bot message types
-MSG_TYPE_TEXT = 0
-MSG_TYPE_MARKDOWN = 2
-MSG_TYPE_MEDIA = 7
-MSG_TYPE_INPUT_NOTIFY = 6
-
-# QQ Bot file media types
-MEDIA_TYPE_IMAGE = 1
-MEDIA_TYPE_VIDEO = 2
-MEDIA_TYPE_VOICE = 3
-MEDIA_TYPE_FILE = 4
+from gateway.platforms.qqbot.constants import (
+ API_BASE,
+ TOKEN_URL,
+ GATEWAY_URL_PATH,
+ DEFAULT_API_TIMEOUT,
+ FILE_UPLOAD_TIMEOUT,
+ CONNECT_TIMEOUT_SECONDS,
+ RECONNECT_BACKOFF,
+ MAX_RECONNECT_ATTEMPTS,
+ RATE_LIMIT_DELAY,
+ QUICK_DISCONNECT_THRESHOLD,
+ MAX_QUICK_DISCONNECT_COUNT,
+ MAX_MESSAGE_LENGTH,
+ DEDUP_WINDOW_SECONDS,
+ DEDUP_MAX_SIZE,
+ MSG_TYPE_TEXT,
+ MSG_TYPE_MARKDOWN,
+ MSG_TYPE_MEDIA,
+ MSG_TYPE_INPUT_NOTIFY,
+ MEDIA_TYPE_IMAGE,
+ MEDIA_TYPE_VIDEO,
+ MEDIA_TYPE_VOICE,
+ MEDIA_TYPE_FILE,
+)
+from gateway.platforms.qqbot.utils import (
+ coerce_list as _coerce_list_impl,
+ build_user_agent,
+)
def check_qq_requirements() -> bool:
@@ -125,24 +128,30 @@ def check_qq_requirements() -> bool:
def _coerce_list(value: Any) -> List[str]:
"""Coerce config values into a trimmed string list."""
- if value is None:
- return []
- if isinstance(value, str):
- return [item.strip() for item in value.split(",") if item.strip()]
- if isinstance(value, (list, tuple, set)):
- return [str(item).strip() for item in value if str(item).strip()]
- return [str(value).strip()] if str(value).strip() else []
+ return _coerce_list_impl(value)
# ---------------------------------------------------------------------------
# QQAdapter
# ---------------------------------------------------------------------------
+
class QQAdapter(BasePlatformAdapter):
"""QQ Bot adapter backed by the official QQ Bot WebSocket Gateway + REST API."""
# QQ Bot API does not support editing sent messages.
SUPPORTS_MESSAGE_EDITING = False
+ MAX_MESSAGE_LENGTH = MAX_MESSAGE_LENGTH
+ _TYPING_INPUT_SECONDS = 60 # input_notify duration reported to QQ
+ _TYPING_DEBOUNCE_SECONDS = 50 # refresh before it expires
+
+ @property
+ def _log_tag(self) -> str:
+ """Log prefix including app_id for multi-instance disambiguation."""
+ app_id = getattr(self, "_app_id", None)
+ if app_id:
+ return f"QQBot:{app_id}"
+ return "QQBot"
def _fail_pending(self, reason: str) -> None:
"""Fail all pending response futures."""
@@ -151,21 +160,25 @@ class QQAdapter(BasePlatformAdapter):
fut.set_exception(RuntimeError(reason))
self._pending_responses.clear()
- MAX_MESSAGE_LENGTH = MAX_MESSAGE_LENGTH
-
def __init__(self, config: PlatformConfig):
super().__init__(config, Platform.QQBOT)
extra = config.extra or {}
self._app_id = str(extra.get("app_id") or os.getenv("QQ_APP_ID", "")).strip()
- self._client_secret = str(extra.get("client_secret") or os.getenv("QQ_CLIENT_SECRET", "")).strip()
+ self._client_secret = str(
+ extra.get("client_secret") or os.getenv("QQ_CLIENT_SECRET", "")
+ ).strip()
self._markdown_support = bool(extra.get("markdown_support", True))
# Auth/ACL policies
self._dm_policy = str(extra.get("dm_policy", "open")).strip().lower()
- self._allow_from = _coerce_list(extra.get("allow_from") or extra.get("allowFrom"))
+ self._allow_from = _coerce_list(
+ extra.get("allow_from") or extra.get("allowFrom")
+ )
self._group_policy = str(extra.get("group_policy", "open")).strip().lower()
- self._group_allow_from = _coerce_list(extra.get("group_allow_from") or extra.get("groupAllowFrom"))
+ self._group_allow_from = _coerce_list(
+ extra.get("group_allow_from") or extra.get("groupAllowFrom")
+ )
# Connection state
self._session: Optional[aiohttp.ClientSession] = None
@@ -182,6 +195,11 @@ class QQAdapter(BasePlatformAdapter):
self._pending_responses: Dict[str, asyncio.Future] = {}
self._seen_messages: Dict[str, float] = {}
+ # Last inbound message ID per chat — used by send_typing
+ self._last_msg_id: Dict[str, str] = {}
+ # Typing debounce: chat_id → last send_typing timestamp
+ self._typing_sent_at: Dict[str, float] = {}
+
# Token cache
self._access_token: Optional[str] = None
self._token_expires_at: float = 0.0
@@ -207,23 +225,21 @@ class QQAdapter(BasePlatformAdapter):
if not AIOHTTP_AVAILABLE:
message = "QQ startup failed: aiohttp not installed"
self._set_fatal_error("qq_missing_dependency", message, retryable=True)
- logger.warning("[%s] %s. Run: pip install aiohttp", self.name, message)
+ logger.warning("[%s] %s. Run: pip install aiohttp", self._log_tag, message)
return False
if not HTTPX_AVAILABLE:
message = "QQ startup failed: httpx not installed"
self._set_fatal_error("qq_missing_dependency", message, retryable=True)
- logger.warning("[%s] %s. Run: pip install httpx", self.name, message)
+ logger.warning("[%s] %s. Run: pip install httpx", self._log_tag, message)
return False
if not self._app_id or not self._client_secret:
message = "QQ startup failed: QQ_APP_ID and QQ_CLIENT_SECRET are required"
self._set_fatal_error("qq_missing_credentials", message, retryable=True)
- logger.warning("[%s] %s", self.name, message)
+ logger.warning("[%s] %s", self._log_tag, message)
return False
# Prevent duplicate connections with the same credentials
- if not self._acquire_platform_lock(
- "qqbot-appid", self._app_id, "QQBot app ID"
- ):
+ if not self._acquire_platform_lock("qqbot-appid", self._app_id, "QQBot app ID"):
return False
try:
@@ -238,7 +254,7 @@ class QQAdapter(BasePlatformAdapter):
# 2. Get WebSocket gateway URL
gateway_url = await self._get_gateway_url()
- logger.info("[%s] Gateway URL: %s", self.name, gateway_url)
+ logger.info("[%s] Gateway URL: %s", self._log_tag, gateway_url)
# 3. Open WebSocket
await self._open_ws(gateway_url)
@@ -247,12 +263,12 @@ class QQAdapter(BasePlatformAdapter):
self._listen_task = asyncio.create_task(self._listen_loop())
self._heartbeat_task = asyncio.create_task(self._heartbeat_loop())
self._mark_connected()
- logger.info("[%s] Connected", self.name)
+ logger.info("[%s] Connected", self._log_tag)
return True
except Exception as exc:
message = f"QQ startup failed: {exc}"
self._set_fatal_error("qq_connect_error", message, retryable=True)
- logger.error("[%s] %s", self.name, message, exc_info=True)
+ logger.error("[%s] %s", self._log_tag, message, exc_info=True)
await self._cleanup()
self._release_platform_lock()
return False
@@ -280,7 +296,7 @@ class QQAdapter(BasePlatformAdapter):
await self._cleanup()
self._release_platform_lock()
- logger.info("[%s] Disconnected", self.name)
+ logger.info("[%s] Disconnected", self._log_tag)
async def _cleanup(self) -> None:
"""Close WebSocket, HTTP session, and client."""
@@ -329,12 +345,16 @@ class QQAdapter(BasePlatformAdapter):
token = data.get("access_token")
if not token:
- raise RuntimeError(f"QQ Bot token response missing access_token: {data}")
+ raise RuntimeError(
+ f"QQ Bot token response missing access_token: {data}"
+ )
expires_in = int(data.get("expires_in", 7200))
self._access_token = token
self._token_expires_at = time.time() + expires_in
- logger.info("[%s] Access token refreshed, expires in %ds", self.name, expires_in)
+ logger.info(
+ "[%s] Access token refreshed, expires in %ds", self._log_tag, expires_in
+ )
return self._access_token
async def _get_gateway_url(self) -> str:
@@ -343,7 +363,10 @@ class QQAdapter(BasePlatformAdapter):
try:
resp = await self._http_client.get(
f"{API_BASE}{GATEWAY_URL_PATH}",
- headers={"Authorization": f"QQBot {token}"},
+ headers={
+ "Authorization": f"QQBot {token}",
+ "User-Agent": build_user_agent(),
+ },
timeout=DEFAULT_API_TIMEOUT,
)
resp.raise_for_status()
@@ -373,9 +396,12 @@ class QQAdapter(BasePlatformAdapter):
self._session = aiohttp.ClientSession()
self._ws = await self._session.ws_connect(
gateway_url,
+ headers={
+ "User-Agent": build_user_agent(),
+ },
timeout=CONNECT_TIMEOUT_SECONDS,
)
- logger.info("[%s] WebSocket connected to %s", self.name, gateway_url)
+ logger.info("[%s] WebSocket connected to %s", self._log_tag, gateway_url)
async def _listen_loop(self) -> None:
"""Read WebSocket events and reconnect on errors.
@@ -404,23 +430,34 @@ class QQAdapter(BasePlatformAdapter):
return
code = exc.code
- logger.warning("[%s] WebSocket closed: code=%s reason=%s",
- self.name, code, exc.reason)
+ logger.warning(
+ "[%s] WebSocket closed: code=%s reason=%s",
+ self._log_tag,
+ code,
+ exc.reason,
+ )
# Quick disconnect detection (permission issues, misconfiguration)
duration = time.monotonic() - connect_time
if duration < QUICK_DISCONNECT_THRESHOLD and connect_time > 0:
quick_disconnect_count += 1
- logger.info("[%s] Quick disconnect (%.1fs), count: %d",
- self.name, duration, quick_disconnect_count)
+ logger.info(
+ "[%s] Quick disconnect (%.1fs), count: %d",
+ self._log_tag,
+ duration,
+ quick_disconnect_count,
+ )
if quick_disconnect_count >= MAX_QUICK_DISCONNECT_COUNT:
logger.error(
"[%s] Too many quick disconnects. "
"Check: 1) AppID/Secret correct 2) Bot permissions on QQ Open Platform",
- self.name,
+ self._log_tag,
+ )
+ self._set_fatal_error(
+ "qq_quick_disconnect",
+ "Too many quick disconnects — check bot permissions",
+ retryable=True,
)
- self._set_fatal_error("qq_quick_disconnect",
- "Too many quick disconnects — check bot permissions", retryable=True)
return
else:
quick_disconnect_count = 0
@@ -431,13 +468,21 @@ class QQAdapter(BasePlatformAdapter):
# Stop reconnecting for fatal codes
if code in (4914, 4915):
desc = "offline/sandbox-only" if code == 4914 else "banned"
- logger.error("[%s] Bot is %s. Check QQ Open Platform.", self.name, desc)
- self._set_fatal_error(f"qq_{desc}", f"Bot is {desc}", retryable=False)
+ logger.error(
+ "[%s] Bot is %s. Check QQ Open Platform.", self._log_tag, desc
+ )
+ self._set_fatal_error(
+ f"qq_{desc}", f"Bot is {desc}", retryable=False
+ )
return
# Rate limited
if code == 4008:
- logger.info("[%s] Rate limited (4008), waiting %ds", self.name, RATE_LIMIT_DELAY)
+ logger.info(
+ "[%s] Rate limited (4008), waiting %ds",
+ self._log_tag,
+ RATE_LIMIT_DELAY,
+ )
if backoff_idx >= MAX_RECONNECT_ATTEMPTS:
return
await asyncio.sleep(RATE_LIMIT_DELAY)
@@ -450,14 +495,38 @@ class QQAdapter(BasePlatformAdapter):
# Token invalid → clear cached token so _ensure_token() refreshes
if code == 4004:
- logger.info("[%s] Invalid token (4004), will refresh and reconnect", self.name)
+ logger.info(
+ "[%s] Invalid token (4004), will refresh and reconnect",
+ self._log_tag,
+ )
self._access_token = None
self._token_expires_at = 0.0
# Session invalid → clear session, will re-identify on next Hello
- if code in (4006, 4007, 4009, 4900, 4901, 4902, 4903, 4904, 4905,
- 4906, 4907, 4908, 4909, 4910, 4911, 4912, 4913):
- logger.info("[%s] Session error (%d), clearing session for re-identify", self.name, code)
+ if code in (
+ 4006,
+ 4007,
+ 4009,
+ 4900,
+ 4901,
+ 4902,
+ 4903,
+ 4904,
+ 4905,
+ 4906,
+ 4907,
+ 4908,
+ 4909,
+ 4910,
+ 4911,
+ 4912,
+ 4913,
+ ):
+ logger.info(
+ "[%s] Session error (%d), clearing session for re-identify",
+ self._log_tag,
+ code,
+ )
self._session_id = None
self._last_seq = None
@@ -470,12 +539,12 @@ class QQAdapter(BasePlatformAdapter):
except Exception as exc:
if not self._running:
return
- logger.warning("[%s] WebSocket error: %s", self.name, exc)
+ logger.warning("[%s] WebSocket error: %s", self._log_tag, exc)
self._mark_disconnected()
self._fail_pending("Connection interrupted")
if backoff_idx >= MAX_RECONNECT_ATTEMPTS:
- logger.error("[%s] Max reconnect attempts reached", self.name)
+ logger.error("[%s] Max reconnect attempts reached", self._log_tag)
return
if await self._reconnect(backoff_idx):
@@ -487,7 +556,12 @@ class QQAdapter(BasePlatformAdapter):
async def _reconnect(self, backoff_idx: int) -> bool:
"""Attempt to reconnect the WebSocket. Returns True on success."""
delay = RECONNECT_BACKOFF[min(backoff_idx, len(RECONNECT_BACKOFF) - 1)]
- logger.info("[%s] Reconnecting in %ds (attempt %d)...", self.name, delay, backoff_idx + 1)
+ logger.info(
+ "[%s] Reconnecting in %ds (attempt %d)...",
+ self._log_tag,
+ delay,
+ backoff_idx + 1,
+ )
await asyncio.sleep(delay)
self._heartbeat_interval = 30.0 # reset until Hello
@@ -496,10 +570,10 @@ class QQAdapter(BasePlatformAdapter):
gateway_url = await self._get_gateway_url()
await self._open_ws(gateway_url)
self._mark_connected()
- logger.info("[%s] Reconnected", self.name)
+ logger.info("[%s] Reconnected", self._log_tag)
return True
except Exception as exc:
- logger.warning("[%s] Reconnect failed: %s", self.name, exc)
+ logger.warning("[%s] Reconnect failed: %s", self._log_tag, exc)
return False
async def _read_events(self) -> None:
@@ -536,7 +610,7 @@ class QQAdapter(BasePlatformAdapter):
# d should be the latest sequence number received, or null
await self._ws.send_json({"op": 1, "d": self._last_seq})
except Exception as exc:
- logger.debug("[%s] Heartbeat failed: %s", self.name, exc)
+ logger.debug("[%s] Heartbeat failed: %s", self._log_tag, exc)
except asyncio.CancelledError:
pass
@@ -554,7 +628,11 @@ class QQAdapter(BasePlatformAdapter):
"op": 2,
"d": {
"token": f"QQBot {token}",
- "intents": (1 << 25) | (1 << 30) | (1 << 12), # C2C_GROUP_AT_MESSAGES + PUBLIC_GUILD_MESSAGES + DIRECT_MESSAGE
+ "intents": (1 << 25)
+ | (1 << 30)
+ | (
+ 1 << 12
+ ), # C2C_GROUP_AT_MESSAGES + PUBLIC_GUILD_MESSAGES + DIRECT_MESSAGE
"shard": [0, 1],
"properties": {
"$os": "macOS",
@@ -566,11 +644,13 @@ class QQAdapter(BasePlatformAdapter):
try:
if self._ws and not self._ws.closed:
await self._ws.send_json(identify_payload)
- logger.info("[%s] Identify sent", self.name)
+ logger.info("[%s] Identify sent", self._log_tag)
else:
- logger.warning("[%s] Cannot send Identify: WebSocket not connected", self.name)
+ logger.warning(
+ "[%s] Cannot send Identify: WebSocket not connected", self._log_tag
+ )
except Exception as exc:
- logger.error("[%s] Failed to send Identify: %s", self.name, exc)
+ logger.error("[%s] Failed to send Identify: %s", self._log_tag, exc)
async def _send_resume(self) -> None:
"""Send op 6 Resume to re-authenticate after a reconnection.
@@ -589,12 +669,18 @@ class QQAdapter(BasePlatformAdapter):
try:
if self._ws and not self._ws.closed:
await self._ws.send_json(resume_payload)
- logger.info("[%s] Resume sent (session_id=%s, seq=%s)",
- self.name, self._session_id, self._last_seq)
+ logger.info(
+ "[%s] Resume sent (session_id=%s, seq=%s)",
+ self._log_tag,
+ self._session_id,
+ self._last_seq,
+ )
else:
- logger.warning("[%s] Cannot send Resume: WebSocket not connected", self.name)
+ logger.warning(
+ "[%s] Cannot send Resume: WebSocket not connected", self._log_tag
+ )
except Exception as exc:
- logger.error("[%s] Failed to send Resume: %s", self.name, exc)
+ logger.error("[%s] Failed to send Resume: %s", self._log_tag, exc)
# If resume fails, clear session and fall back to identify on next Hello
self._session_id = None
self._last_seq = None
@@ -627,8 +713,12 @@ class QQAdapter(BasePlatformAdapter):
interval_ms = d_data.get("heartbeat_interval", 30000)
# Send heartbeats at 80% of the server interval to stay safe
self._heartbeat_interval = interval_ms / 1000.0 * 0.8
- logger.debug("[%s] Hello received, heartbeat_interval=%dms (sending every %.1fs)",
- self.name, interval_ms, self._heartbeat_interval)
+ logger.debug(
+ "[%s] Hello received, heartbeat_interval=%dms (sending every %.1fs)",
+ self._log_tag,
+ interval_ms,
+ self._heartbeat_interval,
+ )
# Authenticate: send Resume if we have a session, else Identify.
# Use _create_task which is safe when no event loop is running (tests).
if self._session_id and self._last_seq is not None:
@@ -642,26 +732,30 @@ class QQAdapter(BasePlatformAdapter):
if t == "READY":
self._handle_ready(d)
elif t == "RESUMED":
- logger.info("[%s] Session resumed", self.name)
- elif t in ("C2C_MESSAGE_CREATE", "GROUP_AT_MESSAGE_CREATE",
- "DIRECT_MESSAGE_CREATE", "GUILD_MESSAGE_CREATE",
- "GUILD_AT_MESSAGE_CREATE"):
+ logger.info("[%s] Session resumed", self._log_tag)
+ elif t in (
+ "C2C_MESSAGE_CREATE",
+ "GROUP_AT_MESSAGE_CREATE",
+ "DIRECT_MESSAGE_CREATE",
+ "GUILD_MESSAGE_CREATE",
+ "GUILD_AT_MESSAGE_CREATE",
+ ):
asyncio.create_task(self._on_message(t, d))
else:
- logger.debug("[%s] Unhandled dispatch: %s", self.name, t)
+ logger.debug("[%s] Unhandled dispatch: %s", self._log_tag, t)
return
# op 11 = Heartbeat ACK
if op == 11:
return
- logger.debug("[%s] Unknown op: %s", self.name, op)
+ logger.debug("[%s] Unknown op: %s", self._log_tag, op)
def _handle_ready(self, d: Any) -> None:
"""Handle the READY event — store session_id for resume."""
if isinstance(d, dict):
self._session_id = d.get("session_id")
- logger.info("[%s] Ready, session_id=%s", self.name, self._session_id)
+ logger.info("[%s] Ready, session_id=%s", self._log_tag, self._session_id)
# ------------------------------------------------------------------
# JSON helpers
@@ -672,7 +766,7 @@ class QQAdapter(BasePlatformAdapter):
try:
payload = json.loads(raw)
except Exception:
- logger.debug("[%s] Failed to parse JSON: %r", "QQBot", raw)
+ logger.warning("[QQBot] Failed to parse JSON: %r", raw)
return None
return payload if isinstance(payload, dict) else None
@@ -687,6 +781,12 @@ class QQAdapter(BasePlatformAdapter):
# Inbound message handling
# ------------------------------------------------------------------
+ async def handle_message(self, event: MessageEvent) -> None:
+ """Cache the last message ID per chat, then delegate to base."""
+ if event.message_id and event.source.chat_id:
+ self._last_msg_id[event.source.chat_id] = event.message_id
+ await super().handle_message(event)
+
async def _on_message(self, event_type: str, d: Any) -> None:
"""Process an inbound QQ Bot message event."""
if not isinstance(d, dict):
@@ -695,7 +795,9 @@ class QQAdapter(BasePlatformAdapter):
# Extract common fields
msg_id = str(d.get("id", ""))
if not msg_id or self._is_duplicate(msg_id):
- logger.debug("[%s] Duplicate or missing message id: %s", self.name, msg_id)
+ logger.debug(
+ "[%s] Duplicate or missing message id: %s", self._log_tag, msg_id
+ )
return
timestamp = str(d.get("timestamp", ""))
@@ -713,7 +815,12 @@ class QQAdapter(BasePlatformAdapter):
await self._handle_dm_message(d, msg_id, content, author, timestamp)
async def _handle_c2c_message(
- self, d: Dict[str, Any], msg_id: str, content: str, author: Dict[str, Any], timestamp: str
+ self,
+ d: Dict[str, Any],
+ msg_id: str,
+ content: str,
+ author: Dict[str, Any],
+ timestamp: str,
) -> None:
"""Handle a C2C (private) message event."""
user_openid = str(author.get("user_openid", ""))
@@ -724,17 +831,28 @@ class QQAdapter(BasePlatformAdapter):
text = content
attachments_raw = d.get("attachments")
- logger.info("[QQ] C2C message: id=%s content=%r attachments=%s",
- msg_id, content[:50] if content else "",
- f"{len(attachments_raw) if isinstance(attachments_raw, list) else 0} items"
- if attachments_raw else "None")
+ logger.info(
+ "[%s] C2C message: id=%s content=%r attachments=%s",
+ self._log_tag,
+ msg_id,
+ content[:50] if content else "",
+ (
+ f"{len(attachments_raw) if isinstance(attachments_raw, list) else 0} items"
+ if attachments_raw
+ else "None"
+ ),
+ )
if attachments_raw and isinstance(attachments_raw, list):
for _i, _att in enumerate(attachments_raw):
if isinstance(_att, dict):
- logger.info("[QQ] attachment[%d]: content_type=%s url=%s filename=%s",
- _i, _att.get("content_type", ""),
- str(_att.get("url", ""))[:80],
- _att.get("filename", ""))
+ logger.info(
+ "[%s] attachment[%d]: content_type=%s url=%s filename=%s",
+ self._log_tag,
+ _i,
+ _att.get("content_type", ""),
+ str(_att.get("url", ""))[:80],
+ _att.get("filename", ""),
+ )
# Process all attachments uniformly (images, voice, files)
att_result = await self._process_attachments(attachments_raw)
@@ -746,13 +864,23 @@ class QQAdapter(BasePlatformAdapter):
# Append voice transcripts to the text body
if voice_transcripts:
voice_block = "\n".join(voice_transcripts)
- text = (text + "\n\n" + voice_block).strip() if text.strip() else voice_block
+ text = (
+ (text + "\n\n" + voice_block).strip() if text.strip() else voice_block
+ )
# Append non-media attachment info
if attachment_info:
- text = (text + "\n\n" + attachment_info).strip() if text.strip() else attachment_info
+ text = (
+ (text + "\n\n" + attachment_info).strip()
+ if text.strip()
+ else attachment_info
+ )
- logger.info("[QQ] After processing: images=%d, voice=%d",
- len(image_urls), len(voice_transcripts))
+ logger.info(
+ "[%s] After processing: images=%d, voice=%d",
+ self._log_tag,
+ len(image_urls),
+ len(voice_transcripts),
+ )
if not text.strip() and not image_urls:
return
@@ -775,13 +903,20 @@ class QQAdapter(BasePlatformAdapter):
await self.handle_message(event)
async def _handle_group_message(
- self, d: Dict[str, Any], msg_id: str, content: str, author: Dict[str, Any], timestamp: str
+ self,
+ d: Dict[str, Any],
+ msg_id: str,
+ content: str,
+ author: Dict[str, Any],
+ timestamp: str,
) -> None:
"""Handle a group @-message event."""
group_openid = str(d.get("group_openid", ""))
if not group_openid:
return
- if not self._is_group_allowed(group_openid, str(author.get("member_openid", ""))):
+ if not self._is_group_allowed(
+ group_openid, str(author.get("member_openid", ""))
+ ):
return
# Strip the @bot mention prefix from content
@@ -795,9 +930,15 @@ class QQAdapter(BasePlatformAdapter):
# Append voice transcripts
if voice_transcripts:
voice_block = "\n".join(voice_transcripts)
- text = (text + "\n\n" + voice_block).strip() if text.strip() else voice_block
+ text = (
+ (text + "\n\n" + voice_block).strip() if text.strip() else voice_block
+ )
if attachment_info:
- text = (text + "\n\n" + attachment_info).strip() if text.strip() else attachment_info
+ text = (
+ (text + "\n\n" + attachment_info).strip()
+ if text.strip()
+ else attachment_info
+ )
if not text.strip() and not image_urls:
return
@@ -820,7 +961,12 @@ class QQAdapter(BasePlatformAdapter):
await self.handle_message(event)
async def _handle_guild_message(
- self, d: Dict[str, Any], msg_id: str, content: str, author: Dict[str, Any], timestamp: str
+ self,
+ d: Dict[str, Any],
+ msg_id: str,
+ content: str,
+ author: Dict[str, Any],
+ timestamp: str,
) -> None:
"""Handle a guild/channel message event."""
channel_id = str(d.get("channel_id", ""))
@@ -839,9 +985,15 @@ class QQAdapter(BasePlatformAdapter):
if voice_transcripts:
voice_block = "\n".join(voice_transcripts)
- text = (text + "\n\n" + voice_block).strip() if text.strip() else voice_block
+ text = (
+ (text + "\n\n" + voice_block).strip() if text.strip() else voice_block
+ )
if attachment_info:
- text = (text + "\n\n" + attachment_info).strip() if text.strip() else attachment_info
+ text = (
+ (text + "\n\n" + attachment_info).strip()
+ if text.strip()
+ else attachment_info
+ )
if not text.strip() and not image_urls:
return
@@ -865,7 +1017,12 @@ class QQAdapter(BasePlatformAdapter):
await self.handle_message(event)
async def _handle_dm_message(
- self, d: Dict[str, Any], msg_id: str, content: str, author: Dict[str, Any], timestamp: str
+ self,
+ d: Dict[str, Any],
+ msg_id: str,
+ content: str,
+ author: Dict[str, Any],
+ timestamp: str,
) -> None:
"""Handle a guild DM message event."""
guild_id = str(d.get("guild_id", ""))
@@ -881,9 +1038,15 @@ class QQAdapter(BasePlatformAdapter):
if voice_transcripts:
voice_block = "\n".join(voice_transcripts)
- text = (text + "\n\n" + voice_block).strip() if text.strip() else voice_block
+ text = (
+ (text + "\n\n" + voice_block).strip() if text.strip() else voice_block
+ )
if attachment_info:
- text = (text + "\n\n" + attachment_info).strip() if text.strip() else attachment_info
+ text = (
+ (text + "\n\n" + attachment_info).strip()
+ if text.strip()
+ else attachment_info
+ )
if not text.strip() and not image_urls:
return
@@ -909,7 +1072,6 @@ class QQAdapter(BasePlatformAdapter):
# Attachment processing
# ------------------------------------------------------------------
-
@staticmethod
def _detect_message_type(media_urls: list, media_types: list):
"""Determine MessageType from attachment content types."""
@@ -926,11 +1088,16 @@ class QQAdapter(BasePlatformAdapter):
return MessageType.PHOTO
# Unknown content type with an attachment — don't assume PHOTO
# to prevent non-image files from being sent to vision analysis.
- logger.debug("[QQ] Unknown media content_type '%s', defaulting to TEXT", first_type)
+ logger.debug(
+ "[%s] Unknown media content_type '%s', defaulting to TEXT",
+ self._log_tag,
+ first_type,
+ )
return MessageType.TEXT
async def _process_attachments(
- self, attachments: Any,
+ self,
+ attachments: Any,
) -> Dict[str, Any]:
"""Process inbound attachments (all message types).
@@ -944,8 +1111,12 @@ class QQAdapter(BasePlatformAdapter):
- attachment_info: str — text description of non-image, non-voice attachments
"""
if not isinstance(attachments, list):
- return {"image_urls": [], "image_media_types": [],
- "voice_transcripts": [], "attachment_info": ""}
+ return {
+ "image_urls": [],
+ "image_media_types": [],
+ "voice_transcripts": [],
+ "attachment_info": "",
+ }
image_urls: List[str] = []
image_media_types: List[str] = []
@@ -967,30 +1138,39 @@ class QQAdapter(BasePlatformAdapter):
url = ""
continue
- logger.debug("[QQ] Processing attachment: content_type=%s, url=%s, filename=%s",
- ct, url[:80], filename)
+ logger.debug(
+ "[%s] Processing attachment: content_type=%s, url=%s, filename=%s",
+ self._log_tag,
+ ct,
+ url[:80],
+ filename,
+ )
if self._is_voice_content_type(ct, filename):
# Voice: use QQ's asr_refer_text first, then voice_wav_url, then STT.
asr_refer = (
str(att.get("asr_refer_text", "")).strip()
- if isinstance(att.get("asr_refer_text"), str) else ""
+ if isinstance(att.get("asr_refer_text"), str)
+ else ""
)
voice_wav_url = (
str(att.get("voice_wav_url", "")).strip()
- if isinstance(att.get("voice_wav_url"), str) else ""
+ if isinstance(att.get("voice_wav_url"), str)
+ else ""
)
transcript = await self._stt_voice_attachment(
- url, ct, filename,
+ url,
+ ct,
+ filename,
asr_refer_text=asr_refer or None,
voice_wav_url=voice_wav_url or None,
)
if transcript:
voice_transcripts.append(f"[Voice] {transcript}")
- logger.info("[QQ] Voice transcript: %s", transcript)
+ logger.debug("[%s] Voice transcript: %s", self._log_tag, transcript)
else:
- logger.warning("[QQ] Voice STT failed for %s", url[:60])
+ logger.warning("[%s] Voice STT failed for %s", self._log_tag, url[:60])
voice_transcripts.append("[Voice] [语音识别失败]")
elif ct.startswith("image/"):
# Image: download and cache locally.
@@ -1000,9 +1180,13 @@ class QQAdapter(BasePlatformAdapter):
image_urls.append(cached_path)
image_media_types.append(ct or "image/jpeg")
elif cached_path:
- logger.warning("[QQ] Cached image path does not exist: %s", cached_path)
+ logger.warning(
+ "[%s] Cached image path does not exist: %s",
+ self._log_tag,
+ cached_path,
+ )
except Exception as exc:
- logger.debug("[QQ] Failed to cache image: %s", exc)
+ logger.debug("[%s] Failed to cache image: %s", self._log_tag, exc)
else:
# Other attachments (video, file, etc.): record as text.
try:
@@ -1010,7 +1194,7 @@ class QQAdapter(BasePlatformAdapter):
if cached_path:
other_attachments.append(f"[Attachment: {filename or ct}]")
except Exception as exc:
- logger.debug("[QQ] Failed to cache attachment: %s", exc)
+ logger.debug("[%s] Failed to cache attachment: %s", self._log_tag, exc)
attachment_info = "\n".join(other_attachments) if other_attachments else ""
return {
@@ -1023,6 +1207,7 @@ class QQAdapter(BasePlatformAdapter):
async def _download_and_cache(self, url: str, content_type: str) -> Optional[str]:
"""Download a URL and cache it locally."""
from tools.url_safety import is_safe_url
+
if not is_safe_url(url):
raise ValueError(f"Blocked unsafe URL: {url[:80]}")
@@ -1031,12 +1216,16 @@ class QQAdapter(BasePlatformAdapter):
try:
resp = await self._http_client.get(
- url, timeout=30.0, headers=self._qq_media_headers(),
+ url,
+ timeout=30.0,
+ headers=self._qq_media_headers(),
)
resp.raise_for_status()
data = resp.content
except Exception as exc:
- logger.debug("[%s] Download failed for %s: %s", self.name, url[:80], exc)
+ logger.debug(
+ "[%s] Download failed for %s: %s", self._log_tag, url[:80], exc
+ )
return None
if content_type.startswith("image/"):
@@ -1057,7 +1246,17 @@ class QQAdapter(BasePlatformAdapter):
fn = filename.strip().lower()
if ct == "voice" or ct.startswith("audio/"):
return True
- _VOICE_EXTENSIONS = (".silk", ".amr", ".mp3", ".wav", ".ogg", ".m4a", ".aac", ".speex", ".flac")
+ _VOICE_EXTENSIONS = (
+ ".silk",
+ ".amr",
+ ".mp3",
+ ".wav",
+ ".ogg",
+ ".m4a",
+ ".aac",
+ ".speex",
+ ".flac",
+ )
if any(fn.endswith(ext) for ext in _VOICE_EXTENSIONS):
return True
return False
@@ -1074,13 +1273,13 @@ class QQAdapter(BasePlatformAdapter):
return {}
async def _stt_voice_attachment(
- self,
- url: str,
- content_type: str,
- filename: str,
- *,
- asr_refer_text: Optional[str] = None,
- voice_wav_url: Optional[str] = None,
+ self,
+ url: str,
+ content_type: str,
+ filename: str,
+ *,
+ asr_refer_text: Optional[str] = None,
+ voice_wav_url: Optional[str] = None,
) -> Optional[str]:
"""Download a voice attachment, convert to wav, and transcribe.
@@ -1093,7 +1292,9 @@ class QQAdapter(BasePlatformAdapter):
"""
# 1. Use QQ's built-in ASR text if available
if asr_refer_text:
- logger.info("[QQ] STT: using QQ asr_refer_text: %r", asr_refer_text[:100])
+ logger.debug(
+ "[%s] STT: using QQ asr_refer_text: %r", self._log_tag, asr_refer_text[:100]
+ )
return asr_refer_text
# Determine which URL to download (prefer voice_wav_url — already WAV)
@@ -1104,7 +1305,7 @@ class QQAdapter(BasePlatformAdapter):
voice_wav_url = f"https:{voice_wav_url}"
download_url = voice_wav_url
is_pre_wav = True
- logger.info("[QQ] STT: using voice_wav_url (pre-converted WAV)")
+ logger.debug("[%s] STT: using voice_wav_url (pre-converted WAV)", self._log_tag)
from tools.url_safety import is_safe_url
if not is_safe_url(download_url):
@@ -1114,40 +1315,65 @@ class QQAdapter(BasePlatformAdapter):
try:
# 2. Download audio (QQ CDN requires Authorization header)
if not self._http_client:
- logger.warning("[QQ] STT: no HTTP client")
+ logger.warning("[%s] STT: no HTTP client", self._log_tag)
return None
download_headers = self._qq_media_headers()
- logger.info("[QQ] STT: downloading voice from %s (pre_wav=%s, headers=%s)",
- download_url[:80], is_pre_wav, bool(download_headers))
+ logger.debug(
+ "[%s] STT: downloading voice from %s (pre_wav=%s, headers=%s)",
+ self._log_tag,
+ download_url[:80],
+ is_pre_wav,
+ bool(download_headers),
+ )
resp = await self._http_client.get(
- download_url, timeout=30.0, headers=download_headers, follow_redirects=True,
+ download_url,
+ timeout=30.0,
+ headers=download_headers,
+ follow_redirects=True,
)
resp.raise_for_status()
audio_data = resp.content
- logger.info("[QQ] STT: downloaded %d bytes, content_type=%s",
- len(audio_data), resp.headers.get("content-type", "unknown"))
+ logger.debug(
+ "[%s] STT: downloaded %d bytes, content_type=%s",
+ self._log_tag,
+ len(audio_data),
+ resp.headers.get("content-type", "unknown"),
+ )
if len(audio_data) < 10:
- logger.warning("[QQ] STT: downloaded data too small (%d bytes), skipping", len(audio_data))
+ logger.warning(
+ "[%s] STT: downloaded data too small (%d bytes), skipping",
+ self._log_tag,
+ len(audio_data),
+ )
return None
# 3. Convert to wav (skip if we already have a pre-converted WAV)
if is_pre_wav:
import tempfile
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
tmp.write(audio_data)
wav_path = tmp.name
- logger.info("[QQ] STT: using pre-converted WAV directly (%d bytes)", len(audio_data))
+ logger.debug(
+ "[%s] STT: using pre-converted WAV directly (%d bytes)",
+ self._log_tag,
+ len(audio_data),
+ )
else:
- logger.info("[QQ] STT: converting to wav, filename=%r", filename)
+ logger.debug(
+ "[%s] STT: converting to wav, filename=%r", self._log_tag, filename
+ )
wav_path = await self._convert_audio_to_wav_file(audio_data, filename)
if not wav_path or not Path(wav_path).exists():
- logger.warning("[QQ] STT: ffmpeg conversion produced no output")
+ logger.warning(
+ "[%s] STT: ffmpeg conversion produced no output", self._log_tag
+ )
return None
# 4. Call STT API
- logger.info("[QQ] STT: calling ASR on %s", wav_path)
+ logger.debug("[%s] STT: calling ASR on %s", self._log_tag, wav_path)
transcript = await self._call_stt(wav_path)
# 5. Cleanup temp file
@@ -1157,15 +1383,22 @@ class QQAdapter(BasePlatformAdapter):
pass
if transcript:
- logger.info("[QQ] STT success: %r", transcript[:100])
+ logger.debug("[%s] STT success: %r", self._log_tag, transcript[:100])
else:
- logger.warning("[QQ] STT: ASR returned empty transcript")
+ logger.warning("[%s] STT: ASR returned empty transcript", self._log_tag)
return transcript
except (httpx.HTTPStatusError, httpx.TransportError, IOError) as exc:
- logger.warning("[QQ] STT failed for voice attachment: %s: %s", type(exc).__name__, exc)
+ logger.warning(
+ "[%s] STT failed for voice attachment: %s: %s",
+ self._log_tag,
+ type(exc).__name__,
+ exc,
+ )
return None
- async def _convert_audio_to_wav_file(self, audio_data: bytes, filename: str) -> Optional[str]:
+ async def _convert_audio_to_wav_file(
+ self, audio_data: bytes, filename: str
+ ) -> Optional[str]:
"""Convert audio bytes to a temp .wav file using pilk (SILK) or ffmpeg.
QQ voice messages are typically SILK format which ffmpeg cannot decode.
@@ -1175,9 +1408,18 @@ class QQAdapter(BasePlatformAdapter):
"""
import tempfile
- ext = Path(filename).suffix.lower() if Path(filename).suffix else self._guess_ext_from_data(audio_data)
- logger.info("[QQ] STT: audio_data size=%d, ext=%r, first_20_bytes=%r",
- len(audio_data), ext, audio_data[:20])
+ ext = (
+ Path(filename).suffix.lower()
+ if Path(filename).suffix
+ else self._guess_ext_from_data(audio_data)
+ )
+ logger.info(
+ "[%s] STT: audio_data size=%d, ext=%r, first_20_bytes=%r",
+ self._log_tag,
+ len(audio_data),
+ ext,
+ audio_data[:20],
+ )
with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp_src:
tmp_src.write(audio_data)
@@ -1229,8 +1471,7 @@ class QQAdapter(BasePlatformAdapter):
"""Check if bytes look like a SILK audio file."""
return data[:4] == b"#!SILK" or data[:2] == b"\x02!" or data[:9] == b"#!SILK_V3"
- @staticmethod
- async def _convert_silk_to_wav(src_path: str, wav_path: str) -> Optional[str]:
+ async def _convert_silk_to_wav(self, src_path: str, wav_path: str) -> Optional[str]:
"""Convert audio file to WAV using the pilk library.
Tries the file as-is first, then as .silk if the extension differs.
@@ -1239,31 +1480,43 @@ class QQAdapter(BasePlatformAdapter):
try:
import pilk
except ImportError:
- logger.warning("[QQ] pilk not installed — cannot decode SILK audio. Run: pip install pilk")
+ logger.warning(
+ "[%s] pilk not installed — cannot decode SILK audio. Run: pip install pilk",
+ self._log_tag,
+ )
return None
# Try converting the file as-is
try:
pilk.silk_to_wav(src_path, wav_path, rate=16000)
if Path(wav_path).exists() and Path(wav_path).stat().st_size > 44:
- logger.info("[QQ] pilk converted %s to wav (%d bytes)",
- Path(src_path).name, Path(wav_path).stat().st_size)
+ logger.debug(
+ "[%s] pilk converted %s to wav (%d bytes)",
+ self._log_tag,
+ Path(src_path).name,
+ Path(wav_path).stat().st_size,
+ )
return wav_path
except Exception as exc:
- logger.debug("[QQ] pilk direct conversion failed: %s", exc)
+ logger.debug("[%s] pilk direct conversion failed: %s", self._log_tag, exc)
# Try renaming to .silk and converting (pilk checks the extension)
silk_path = src_path.rsplit(".", 1)[0] + ".silk"
try:
import shutil
+
shutil.copy2(src_path, silk_path)
pilk.silk_to_wav(silk_path, wav_path, rate=16000)
if Path(wav_path).exists() and Path(wav_path).stat().st_size > 44:
- logger.info("[QQ] pilk converted %s (as .silk) to wav (%d bytes)",
- Path(src_path).name, Path(wav_path).stat().st_size)
+ logger.debug(
+ "[%s] pilk converted %s (as .silk) to wav (%d bytes)",
+ self._log_tag,
+ Path(src_path).name,
+ Path(wav_path).stat().st_size,
+ )
return wav_path
except Exception as exc:
- logger.debug("[QQ] pilk .silk conversion failed: %s", exc)
+ logger.debug("[%s] pilk .silk conversion failed: %s", self._log_tag, exc)
finally:
try:
os.unlink(silk_path)
@@ -1272,8 +1525,7 @@ class QQAdapter(BasePlatformAdapter):
return None
- @staticmethod
- async def _convert_raw_to_wav(audio_data: bytes, wav_path: str) -> Optional[str]:
+ async def _convert_raw_to_wav(self, audio_data: bytes, wav_path: str) -> Optional[str]:
"""Last resort: try writing audio data as raw PCM 16-bit mono 16kHz WAV.
This will produce garbage if the data isn't raw PCM, but at least
@@ -1281,6 +1533,7 @@ class QQAdapter(BasePlatformAdapter):
"""
try:
import wave
+
with wave.open(wav_path, "w") as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
@@ -1288,33 +1541,52 @@ class QQAdapter(BasePlatformAdapter):
wf.writeframes(audio_data)
return wav_path
except Exception as exc:
- logger.debug("[QQ] raw PCM fallback failed: %s", exc)
+ logger.debug("[%s] raw PCM fallback failed: %s", self._log_tag, exc)
return None
- @staticmethod
- async def _convert_ffmpeg_to_wav(src_path: str, wav_path: str) -> Optional[str]:
+ async def _convert_ffmpeg_to_wav(self, src_path: str, wav_path: str) -> Optional[str]:
"""Convert audio file to WAV using ffmpeg."""
try:
proc = await asyncio.create_subprocess_exec(
- "ffmpeg", "-y", "-i", src_path, "-ar", "16000", "-ac", "1", wav_path,
+ "ffmpeg",
+ "-y",
+ "-i",
+ src_path,
+ "-ar",
+ "16000",
+ "-ac",
+ "1",
+ wav_path,
stdout=asyncio.subprocess.DEVNULL,
stderr=asyncio.subprocess.PIPE,
)
await asyncio.wait_for(proc.wait(), timeout=30)
if proc.returncode != 0:
stderr = await proc.stderr.read() if proc.stderr else b""
- logger.warning("[QQ] ffmpeg failed for %s: %s",
- Path(src_path).name, stderr[:200].decode(errors="replace"))
+ logger.warning(
+ "[%s] ffmpeg failed for %s: %s",
+ self._log_tag,
+ Path(src_path).name,
+ stderr[:200].decode(errors="replace"),
+ )
return None
except (asyncio.TimeoutError, FileNotFoundError) as exc:
- logger.warning("[QQ] ffmpeg conversion error: %s", exc)
+ logger.warning("[%s] ffmpeg conversion error: %s", self._log_tag, exc)
return None
if not Path(wav_path).exists() or Path(wav_path).stat().st_size <= 44:
- logger.warning("[QQ] ffmpeg produced no/small output for %s", Path(src_path).name)
+ logger.warning(
+ "[%s] ffmpeg produced no/small output for %s",
+ self._log_tag,
+ Path(src_path).name,
+ )
return None
- logger.info("[QQ] ffmpeg converted %s to wav (%d bytes)",
- Path(src_path).name, Path(wav_path).stat().st_size)
+ logger.debug(
+ "[%s] ffmpeg converted %s to wav (%d bytes)",
+ self._log_tag,
+ Path(src_path).name,
+ Path(wav_path).stat().st_size,
+ )
return wav_path
def _resolve_stt_config(self) -> Optional[Dict[str, str]]:
@@ -1353,7 +1625,8 @@ class QQAdapter(BasePlatformAdapter):
return {
"base_url": base_url,
"api_key": api_key,
- "model": model or ("glm-asr" if provider in ("zai", "glm") else "whisper-1"),
+ "model": model
+ or ("glm-asr" if provider in ("zai", "glm") else "whisper-1"),
}
# 2. QQ-specific env vars (set by `hermes setup gateway` / `hermes gateway`)
@@ -1381,7 +1654,10 @@ class QQAdapter(BasePlatformAdapter):
"""
stt_cfg = self._resolve_stt_config()
if not stt_cfg:
- logger.warning("[QQ] STT not configured (no stt config or QQ_STT_API_KEY)")
+ logger.warning(
+ "[%s] STT not configured (no stt config or QQ_STT_API_KEY)",
+ self._log_tag,
+ )
return None
base_url = stt_cfg["base_url"]
@@ -1411,17 +1687,37 @@ class QQAdapter(BasePlatformAdapter):
return text.strip()
return None
except (httpx.HTTPStatusError, IOError) as exc:
- logger.warning("[QQ] STT API call failed (model=%s, base=%s): %s",
- model, base_url[:50], exc)
+ logger.warning(
+ "[%s] STT API call failed (model=%s, base=%s): %s",
+ self._log_tag,
+ model,
+ base_url[:50],
+ exc,
+ )
return None
- async def _convert_audio_to_wav(self, audio_data: bytes, source_url: str) -> Optional[str]:
+ async def _convert_audio_to_wav(
+ self, audio_data: bytes, source_url: str
+ ) -> Optional[str]:
"""Convert audio bytes to .wav using pilk (SILK) or ffmpeg, caching the result."""
import tempfile
# Determine source format from magic bytes or URL
- ext = Path(urlparse(source_url).path).suffix.lower() if urlparse(source_url).path else ""
- if not ext or ext not in (".silk", ".amr", ".mp3", ".wav", ".ogg", ".m4a", ".aac", ".flac"):
+ ext = (
+ Path(urlparse(source_url).path).suffix.lower()
+ if urlparse(source_url).path
+ else ""
+ )
+ if not ext or ext not in (
+ ".silk",
+ ".amr",
+ ".mp3",
+ ".wav",
+ ".ogg",
+ ".m4a",
+ ".aac",
+ ".flac",
+ ):
ext = self._guess_ext_from_data(audio_data)
with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp_src:
@@ -1437,8 +1733,12 @@ class QQAdapter(BasePlatformAdapter):
result = await self._convert_ffmpeg_to_wav(src_path, wav_path)
if not result:
- logger.warning("[%s] audio conversion failed for %s (format=%s)",
- self.name, source_url[:60], ext)
+ logger.warning(
+ "[%s] audio conversion failed for %s (format=%s)",
+ self._log_tag,
+ source_url[:60],
+ ext,
+ )
return cache_document_from_bytes(audio_data, f"qq_voice{ext}")
except Exception:
return cache_document_from_bytes(audio_data, f"qq_voice{ext}")
@@ -1454,7 +1754,7 @@ class QQAdapter(BasePlatformAdapter):
os.unlink(wav_path)
return cache_document_from_bytes(wav_data, "qq_voice.wav")
except Exception as exc:
- logger.debug("[%s] Failed to read converted wav: %s", self.name, exc)
+ logger.debug("[%s] Failed to read converted wav: %s", self._log_tag, exc)
return None
# ------------------------------------------------------------------
@@ -1462,11 +1762,11 @@ class QQAdapter(BasePlatformAdapter):
# ------------------------------------------------------------------
async def _api_request(
- self,
- method: str,
- path: str,
- body: Optional[Dict[str, Any]] = None,
- timeout: float = DEFAULT_API_TIMEOUT,
+ self,
+ method: str,
+ path: str,
+ body: Optional[Dict[str, Any]] = None,
+ timeout: float = DEFAULT_API_TIMEOUT,
) -> Dict[str, Any]:
"""Make an authenticated REST API request to QQ Bot API."""
if not self._http_client:
@@ -1476,6 +1776,7 @@ class QQAdapter(BasePlatformAdapter):
headers = {
"Authorization": f"QQBot {token}",
"Content-Type": "application/json",
+ "User-Agent": build_user_agent(),
}
try:
@@ -1497,17 +1798,21 @@ class QQAdapter(BasePlatformAdapter):
raise RuntimeError(f"QQ Bot API timeout [{path}]: {exc}") from exc
async def _upload_media(
- self,
- target_type: str,
- target_id: str,
- file_type: int,
- url: Optional[str] = None,
- file_data: Optional[str] = None,
- srv_send_msg: bool = False,
- file_name: Optional[str] = None,
+ self,
+ target_type: str,
+ target_id: str,
+ file_type: int,
+ url: Optional[str] = None,
+ file_data: Optional[str] = None,
+ srv_send_msg: bool = False,
+ file_name: Optional[str] = None,
) -> Dict[str, Any]:
"""Upload media and return file_info."""
- path = f"/v2/users/{target_id}/files" if target_type == "c2c" else f"/v2/groups/{target_id}/files"
+ path = (
+ f"/v2/users/{target_id}/files"
+ if target_type == "c2c"
+ else f"/v2/groups/{target_id}/files"
+ )
body: Dict[str, Any] = {
"file_type": file_type,
@@ -1524,11 +1829,16 @@ class QQAdapter(BasePlatformAdapter):
last_exc = None
for attempt in range(3):
try:
- return await self._api_request("POST", path, body, timeout=FILE_UPLOAD_TIMEOUT)
+ return await self._api_request(
+ "POST", path, body, timeout=FILE_UPLOAD_TIMEOUT
+ )
except RuntimeError as exc:
last_exc = exc
err_msg = str(exc)
- if any(kw in err_msg for kw in ("400", "401", "Invalid", "timeout", "Timeout")):
+ if any(
+ kw in err_msg
+ for kw in ("400", "401", "Invalid", "timeout", "Timeout")
+ ):
raise
if attempt < 2:
await asyncio.sleep(1.5 * (attempt + 1))
@@ -1551,23 +1861,23 @@ class QQAdapter(BasePlatformAdapter):
Returns True if reconnected, False if still disconnected.
"""
logger.info("[%s] Not connected — waiting for reconnection (up to %.0fs)",
- self.name, self._RECONNECT_WAIT_SECONDS)
+ self._log_tag, self._RECONNECT_WAIT_SECONDS)
waited = 0.0
while waited < self._RECONNECT_WAIT_SECONDS:
await asyncio.sleep(self._RECONNECT_POLL_INTERVAL)
waited += self._RECONNECT_POLL_INTERVAL
if self.is_connected:
- logger.info("[%s] Reconnected after %.1fs", self.name, waited)
+ logger.info("[%s] Reconnected after %.1fs", self._log_tag, waited)
return True
- logger.warning("[%s] Still not connected after %.0fs", self.name, self._RECONNECT_WAIT_SECONDS)
+ logger.warning("[%s] Still not connected after %.0fs", self._log_tag, self._RECONNECT_WAIT_SECONDS)
return False
async def send(
- self,
- chat_id: str,
- content: str,
- reply_to: Optional[str] = None,
- metadata: Optional[Dict[str, Any]] = None,
+ self,
+ chat_id: str,
+ content: str,
+ reply_to: Optional[str] = None,
+ metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
"""Send a text or markdown message to a QQ user or group.
@@ -1596,7 +1906,10 @@ class QQAdapter(BasePlatformAdapter):
return last_result
async def _send_chunk(
- self, chat_id: str, content: str, reply_to: Optional[str] = None,
+ self,
+ chat_id: str,
+ content: str,
+ reply_to: Optional[str] = None,
) -> SendResult:
"""Send a single chunk with retry + exponential backoff."""
last_exc: Optional[Exception] = None
@@ -1611,28 +1924,39 @@ class QQAdapter(BasePlatformAdapter):
elif chat_type == "guild":
return await self._send_guild_text(chat_id, content, reply_to)
else:
- return SendResult(success=False, error=f"Unknown chat type for {chat_id}")
+ return SendResult(
+ success=False, error=f"Unknown chat type for {chat_id}"
+ )
except Exception as exc:
last_exc = exc
err = str(exc).lower()
# Permanent errors — don't retry
- if any(k in err for k in ("invalid", "forbidden", "not found", "bad request")):
+ if any(
+ k in err
+ for k in ("invalid", "forbidden", "not found", "bad request")
+ ):
break
# Transient — back off and retry
if attempt < 2:
delay = 1.0 * (2 ** attempt)
- logger.warning("[%s] send retry %d/3 after %.1fs: %s",
- self.name, attempt + 1, delay, exc)
+ logger.warning(
+ "[%s] send retry %d/3 after %.1fs: %s",
+ self._log_tag,
+ attempt + 1,
+ delay,
+ exc,
+ )
await asyncio.sleep(delay)
error_msg = str(last_exc) if last_exc else "Unknown error"
- logger.error("[%s] Send failed: %s", self.name, error_msg)
- retryable = not any(k in error_msg.lower()
- for k in ("invalid", "forbidden", "not found"))
+ logger.error("[%s] Send failed: %s", self._log_tag, error_msg)
+ retryable = not any(
+ k in error_msg.lower() for k in ("invalid", "forbidden", "not found")
+ )
return SendResult(success=False, error=error_msg, retryable=retryable)
async def _send_c2c_text(
- self, openid: str, content: str, reply_to: Optional[str] = None
+ self, openid: str, content: str, reply_to: Optional[str] = None
) -> SendResult:
"""Send text to a C2C user via REST API."""
msg_seq = self._next_msg_seq(reply_to or openid)
@@ -1645,7 +1969,7 @@ class QQAdapter(BasePlatformAdapter):
return SendResult(success=True, message_id=msg_id, raw_response=data)
async def _send_group_text(
- self, group_openid: str, content: str, reply_to: Optional[str] = None
+ self, group_openid: str, content: str, reply_to: Optional[str] = None
) -> SendResult:
"""Send text to a group via REST API."""
msg_seq = self._next_msg_seq(reply_to or group_openid)
@@ -1653,15 +1977,17 @@ class QQAdapter(BasePlatformAdapter):
if reply_to:
body["msg_id"] = reply_to
- data = await self._api_request("POST", f"/v2/groups/{group_openid}/messages", body)
+ data = await self._api_request(
+ "POST", f"/v2/groups/{group_openid}/messages", body
+ )
msg_id = str(data.get("id", uuid.uuid4().hex[:12]))
return SendResult(success=True, message_id=msg_id, raw_response=data)
async def _send_guild_text(
- self, channel_id: str, content: str, reply_to: Optional[str] = None
+ self, channel_id: str, content: str, reply_to: Optional[str] = None
) -> SendResult:
"""Send text to a guild channel via REST API."""
- body: Dict[str, Any] = {"content": content[:self.MAX_MESSAGE_LENGTH]}
+ body: Dict[str, Any] = {"content": content[: self.MAX_MESSAGE_LENGTH]}
if reply_to:
body["msg_id"] = reply_to
@@ -1669,19 +1995,21 @@ class QQAdapter(BasePlatformAdapter):
msg_id = str(data.get("id", uuid.uuid4().hex[:12]))
return SendResult(success=True, message_id=msg_id, raw_response=data)
- def _build_text_body(self, content: str, reply_to: Optional[str] = None) -> Dict[str, Any]:
+ def _build_text_body(
+ self, content: str, reply_to: Optional[str] = None
+ ) -> Dict[str, Any]:
"""Build the message body for C2C/group text sending."""
msg_seq = self._next_msg_seq(reply_to or "default")
if self._markdown_support:
body: Dict[str, Any] = {
- "markdown": {"content": content[:self.MAX_MESSAGE_LENGTH]},
+ "markdown": {"content": content[: self.MAX_MESSAGE_LENGTH]},
"msg_type": MSG_TYPE_MARKDOWN,
"msg_seq": msg_seq,
}
else:
body = {
- "content": content[:self.MAX_MESSAGE_LENGTH],
+ "content": content[: self.MAX_MESSAGE_LENGTH],
"msg_type": MSG_TYPE_TEXT,
"msg_seq": msg_seq,
}
@@ -1698,84 +2026,103 @@ class QQAdapter(BasePlatformAdapter):
# ------------------------------------------------------------------
async def send_image(
- self,
- chat_id: str,
- image_url: str,
- caption: Optional[str] = None,
- reply_to: Optional[str] = None,
- metadata: Optional[Dict[str, Any]] = None,
+ self,
+ chat_id: str,
+ image_url: str,
+ caption: Optional[str] = None,
+ reply_to: Optional[str] = None,
+ metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
"""Send an image natively via QQ Bot API upload."""
del metadata
- result = await self._send_media(chat_id, image_url, MEDIA_TYPE_IMAGE, "image", caption, reply_to)
+ result = await self._send_media(
+ chat_id, image_url, MEDIA_TYPE_IMAGE, "image", caption, reply_to
+ )
if result.success or not self._is_url(image_url):
return result
# Fallback to text URL
- logger.warning("[%s] Image send failed, falling back to text: %s", self.name, result.error)
+ logger.warning(
+ "[%s] Image send failed, falling back to text: %s",
+ self._log_tag,
+ result.error,
+ )
fallback = f"{caption}\n{image_url}" if caption else image_url
return await self.send(chat_id=chat_id, content=fallback, reply_to=reply_to)
async def send_image_file(
- self,
- chat_id: str,
- image_path: str,
- caption: Optional[str] = None,
- reply_to: Optional[str] = None,
- **kwargs,
+ self,
+ chat_id: str,
+ image_path: str,
+ caption: Optional[str] = None,
+ reply_to: Optional[str] = None,
+ **kwargs,
) -> SendResult:
"""Send a local image file natively."""
del kwargs
- return await self._send_media(chat_id, image_path, MEDIA_TYPE_IMAGE, "image", caption, reply_to)
+ return await self._send_media(
+ chat_id, image_path, MEDIA_TYPE_IMAGE, "image", caption, reply_to
+ )
async def send_voice(
- self,
- chat_id: str,
- audio_path: str,
- caption: Optional[str] = None,
- reply_to: Optional[str] = None,
- **kwargs,
+ self,
+ chat_id: str,
+ audio_path: str,
+ caption: Optional[str] = None,
+ reply_to: Optional[str] = None,
+ **kwargs,
) -> SendResult:
"""Send a voice message natively."""
del kwargs
- return await self._send_media(chat_id, audio_path, MEDIA_TYPE_VOICE, "voice", caption, reply_to)
+ return await self._send_media(
+ chat_id, audio_path, MEDIA_TYPE_VOICE, "voice", caption, reply_to
+ )
async def send_video(
- self,
- chat_id: str,
- video_path: str,
- caption: Optional[str] = None,
- reply_to: Optional[str] = None,
- **kwargs,
+ self,
+ chat_id: str,
+ video_path: str,
+ caption: Optional[str] = None,
+ reply_to: Optional[str] = None,
+ **kwargs,
) -> SendResult:
"""Send a video natively."""
del kwargs
- return await self._send_media(chat_id, video_path, MEDIA_TYPE_VIDEO, "video", caption, reply_to)
+ return await self._send_media(
+ chat_id, video_path, MEDIA_TYPE_VIDEO, "video", caption, reply_to
+ )
async def send_document(
- self,
- chat_id: str,
- file_path: str,
- caption: Optional[str] = None,
- file_name: Optional[str] = None,
- reply_to: Optional[str] = None,
- **kwargs,
+ self,
+ chat_id: str,
+ file_path: str,
+ caption: Optional[str] = None,
+ file_name: Optional[str] = None,
+ reply_to: Optional[str] = None,
+ **kwargs,
) -> SendResult:
"""Send a file/document natively."""
del kwargs
- return await self._send_media(chat_id, file_path, MEDIA_TYPE_FILE, "file", caption, reply_to,
- file_name=file_name)
+ return await self._send_media(
+ chat_id,
+ file_path,
+ MEDIA_TYPE_FILE,
+ "file",
+ caption,
+ reply_to,
+ file_name=file_name,
+ )
async def _send_media(
- self,
- chat_id: str,
- media_source: str,
- file_type: int,
- kind: str,
- caption: Optional[str] = None,
- reply_to: Optional[str] = None,
- file_name: Optional[str] = None,
+ self,
+ chat_id: str,
+ media_source: str,
+ file_type: int,
+ kind: str,
+ caption: Optional[str] = None,
+ reply_to: Optional[str] = None,
+ file_name: Optional[str] = None,
) -> SendResult:
"""Upload media and send as a native message."""
if not self.is_connected:
@@ -1784,20 +2131,30 @@ class QQAdapter(BasePlatformAdapter):
try:
# Resolve media source
- data, content_type, resolved_name = await self._load_media(media_source, file_name)
+ data, content_type, resolved_name = await self._load_media(
+ media_source, file_name
+ )
# Route
chat_type = self._guess_chat_type(chat_id)
- target_path = f"/v2/users/{chat_id}/files" if chat_type == "c2c" else f"/v2/groups/{chat_id}/files"
+ target_path = (
+ f"/v2/users/{chat_id}/files"
+ if chat_type == "c2c"
+ else f"/v2/groups/{chat_id}/files"
+ )
if chat_type == "guild":
# Guild channels don't support native media upload in the same way
# Send as URL fallback
- return SendResult(success=False, error="Guild media send not supported via this path")
+ return SendResult(
+ success=False, error="Guild media send not supported via this path"
+ )
# Upload
upload = await self._upload_media(
- chat_type, chat_id, file_type,
+ chat_type,
+ chat_id,
+ file_type,
file_data=data if not self._is_url(media_source) else None,
url=media_source if self._is_url(media_source) else None,
srv_send_msg=False,
@@ -1806,7 +2163,9 @@ class QQAdapter(BasePlatformAdapter):
file_info = upload.get("file_info")
if not file_info:
- return SendResult(success=False, error=f"Upload returned no file_info: {upload}")
+ return SendResult(
+ success=False, error=f"Upload returned no file_info: {upload}"
+ )
# Send media message
msg_seq = self._next_msg_seq(chat_id)
@@ -1816,13 +2175,17 @@ class QQAdapter(BasePlatformAdapter):
"msg_seq": msg_seq,
}
if caption:
- body["content"] = caption[:self.MAX_MESSAGE_LENGTH]
+ body["content"] = caption[: self.MAX_MESSAGE_LENGTH]
if reply_to:
body["msg_id"] = reply_to
send_data = await self._api_request(
"POST",
- f"/v2/users/{chat_id}/messages" if chat_type == "c2c" else f"/v2/groups/{chat_id}/messages",
+ (
+ f"/v2/users/{chat_id}/messages"
+ if chat_type == "c2c"
+ else f"/v2/groups/{chat_id}/messages"
+ ),
body,
)
return SendResult(
@@ -1831,11 +2194,11 @@ class QQAdapter(BasePlatformAdapter):
raw_response=send_data,
)
except Exception as exc:
- logger.error("[%s] Media send failed: %s", self.name, exc)
+ logger.error("[%s] Media send failed: %s", self._log_tag, exc)
return SendResult(success=False, error=str(exc))
async def _load_media(
- self, source: str, file_name: Optional[str] = None
+ self, source: str, file_name: Optional[str] = None
) -> Tuple[str, str, str]:
"""Load media from URL or local path. Returns (base64_or_url, content_type, filename)."""
source = str(source).strip()
@@ -1866,7 +2229,9 @@ class QQAdapter(BasePlatformAdapter):
raw = local_path.read_bytes()
resolved_name = file_name or local_path.name
- content_type = mimetypes.guess_type(str(local_path))[0] or "application/octet-stream"
+ content_type = (
+ mimetypes.guess_type(str(local_path))[0] or "application/octet-stream"
+ )
b64 = base64.b64encode(raw).decode("ascii")
return b64, content_type, resolved_name
@@ -1875,27 +2240,44 @@ class QQAdapter(BasePlatformAdapter):
# ------------------------------------------------------------------
async def send_typing(self, chat_id: str, metadata=None) -> None:
- """Send an input notify to a C2C user (only supported for C2C)."""
- del metadata
+ """Send an input notify to a C2C user (only supported for C2C).
+ Debounced to one request per ~50s (the API sets a 60s indicator).
+ The QQ API requires the originating message ID — retrieved from
+ ``_last_msg_id`` which is populated by ``_on_message``.
+ """
if not self.is_connected:
return
- # Only C2C supports input notify
chat_type = self._guess_chat_type(chat_id)
if chat_type != "c2c":
return
+ msg_id = self._last_msg_id.get(chat_id)
+ if not msg_id:
+ return
+
+ # Debounce — skip if we sent recently
+ now = time.time()
+ last_sent = self._typing_sent_at.get(chat_id, 0.0)
+ if now - last_sent < self._TYPING_DEBOUNCE_SECONDS:
+ return
+
try:
msg_seq = self._next_msg_seq(chat_id)
body = {
"msg_type": MSG_TYPE_INPUT_NOTIFY,
- "input_notify": {"input_type": 1, "input_second": 60},
+ "msg_id": msg_id,
+ "input_notify": {
+ "input_type": 1,
+ "input_second": self._TYPING_INPUT_SECONDS,
+ },
"msg_seq": msg_seq,
}
await self._api_request("POST", f"/v2/users/{chat_id}/messages", body)
+ self._typing_sent_at[chat_id] = now
except Exception as exc:
- logger.debug("[%s] send_typing failed: %s", self.name, exc)
+ logger.debug("[%s] send_typing failed: %s", self._log_tag, exc)
# ------------------------------------------------------------------
# Format
@@ -1942,7 +2324,8 @@ class QQAdapter(BasePlatformAdapter):
"""Strip the @bot mention prefix from group message content."""
# QQ group @-messages may have the bot's QQ/ID as prefix
import re
- stripped = re.sub(r'^@\S+\s*', '', content.strip())
+
+ stripped = re.sub(r"^@\S+\s*", "", content.strip())
return stripped
def _is_dm_allowed(self, user_id: str) -> bool:
diff --git a/gateway/platforms/qqbot/constants.py b/gateway/platforms/qqbot/constants.py
new file mode 100644
index 0000000000..ddae3c133e
--- /dev/null
+++ b/gateway/platforms/qqbot/constants.py
@@ -0,0 +1,74 @@
+"""QQBot package-level constants shared across adapter, onboard, and other modules."""
+
+from __future__ import annotations
+
+import os
+
+# ---------------------------------------------------------------------------
+# QQBot adapter version — bump on functional changes to the adapter package.
+# ---------------------------------------------------------------------------
+
+QQBOT_VERSION = "1.1.0"
+
+# ---------------------------------------------------------------------------
+# API endpoints
+# ---------------------------------------------------------------------------
+
+# The portal domain is configurable via QQ_API_HOST for corporate proxies
+# or test environments. Default: q.qq.com (production).
+PORTAL_HOST = os.getenv("QQ_PORTAL_HOST", "q.qq.com")
+
+API_BASE = "https://api.sgroup.qq.com"
+TOKEN_URL = "https://bots.qq.com/app/getAppAccessToken"
+GATEWAY_URL_PATH = "/gateway"
+
+# QR-code onboard endpoints (on the portal host)
+ONBOARD_CREATE_PATH = "/lite/create_bind_task"
+ONBOARD_POLL_PATH = "/lite/poll_bind_result"
+QR_URL_TEMPLATE = (
+ "https://q.qq.com/qqbot/openclaw/connect.html"
+ "?task_id={task_id}&_wv=2&source=hermes"
+)
+
+# ---------------------------------------------------------------------------
+# Timeouts & retry
+# ---------------------------------------------------------------------------
+
+DEFAULT_API_TIMEOUT = 30.0
+FILE_UPLOAD_TIMEOUT = 120.0
+CONNECT_TIMEOUT_SECONDS = 20.0
+
+RECONNECT_BACKOFF = [2, 5, 10, 30, 60]
+MAX_RECONNECT_ATTEMPTS = 100
+RATE_LIMIT_DELAY = 60 # seconds
+QUICK_DISCONNECT_THRESHOLD = 5.0 # seconds
+MAX_QUICK_DISCONNECT_COUNT = 3
+
+ONBOARD_POLL_INTERVAL = 2.0 # seconds between poll_bind_result calls
+ONBOARD_API_TIMEOUT = 10.0
+
+# ---------------------------------------------------------------------------
+# Message limits
+# ---------------------------------------------------------------------------
+
+MAX_MESSAGE_LENGTH = 4000
+DEDUP_WINDOW_SECONDS = 300
+DEDUP_MAX_SIZE = 1000
+
+# ---------------------------------------------------------------------------
+# QQ Bot message types
+# ---------------------------------------------------------------------------
+
+MSG_TYPE_TEXT = 0
+MSG_TYPE_MARKDOWN = 2
+MSG_TYPE_MEDIA = 7
+MSG_TYPE_INPUT_NOTIFY = 6
+
+# ---------------------------------------------------------------------------
+# QQ Bot file media types
+# ---------------------------------------------------------------------------
+
+MEDIA_TYPE_IMAGE = 1
+MEDIA_TYPE_VIDEO = 2
+MEDIA_TYPE_VOICE = 3
+MEDIA_TYPE_FILE = 4
diff --git a/gateway/platforms/qqbot/crypto.py b/gateway/platforms/qqbot/crypto.py
new file mode 100644
index 0000000000..426bd29de5
--- /dev/null
+++ b/gateway/platforms/qqbot/crypto.py
@@ -0,0 +1,45 @@
+"""AES-256-GCM utilities for QQBot scan-to-configure credential decryption."""
+
+from __future__ import annotations
+
+import base64
+import os
+
+
+def generate_bind_key() -> str:
+ """Generate a 256-bit random AES key and return it as base64.
+
+ The key is passed to ``create_bind_task`` so the server can encrypt
+ the bot's *client_secret* before returning it. Only this CLI holds
+ the key, ensuring the secret never travels in plaintext.
+ """
+ return base64.b64encode(os.urandom(32)).decode()
+
+
+def decrypt_secret(encrypted_base64: str, key_base64: str) -> str:
+ """Decrypt a base64-encoded AES-256-GCM ciphertext.
+
+ Ciphertext layout (after base64-decoding)::
+
+ IV (12 bytes) ‖ ciphertext (N bytes) ‖ AuthTag (16 bytes)
+
+ Args:
+ encrypted_base64: The ``bot_encrypt_secret`` value from
+ ``poll_bind_result``.
+ key_base64: The base64 AES key generated by
+ :func:`generate_bind_key`.
+
+ Returns:
+ The decrypted *client_secret* as a UTF-8 string.
+ """
+ from cryptography.hazmat.primitives.ciphers.aead import AESGCM
+
+ key = base64.b64decode(key_base64)
+ raw = base64.b64decode(encrypted_base64)
+
+ iv = raw[:12]
+ ciphertext_with_tag = raw[12:] # AESGCM expects ciphertext + tag concatenated
+
+ aesgcm = AESGCM(key)
+ plaintext = aesgcm.decrypt(iv, ciphertext_with_tag, None)
+ return plaintext.decode("utf-8")
diff --git a/gateway/platforms/qqbot/onboard.py b/gateway/platforms/qqbot/onboard.py
new file mode 100644
index 0000000000..65750b3f10
--- /dev/null
+++ b/gateway/platforms/qqbot/onboard.py
@@ -0,0 +1,124 @@
+"""
+QQBot scan-to-configure (QR code onboard) module.
+
+Calls the ``q.qq.com`` ``create_bind_task`` / ``poll_bind_result`` APIs to
+generate a QR-code URL and poll for scan completion. On success the caller
+receives the bot's *app_id*, *client_secret* (decrypted locally), and the
+scanner's *user_openid* — enough to fully configure the QQBot gateway.
+
+Reference: https://bot.q.qq.com/wiki/develop/api-v2/
+"""
+
+from __future__ import annotations
+
+import logging
+from enum import IntEnum
+from typing import Tuple
+from urllib.parse import quote
+
+from .constants import (
+ ONBOARD_API_TIMEOUT,
+ ONBOARD_CREATE_PATH,
+ ONBOARD_POLL_PATH,
+ PORTAL_HOST,
+ QR_URL_TEMPLATE,
+)
+from .crypto import generate_bind_key
+from .utils import get_api_headers
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Bind status
+# ---------------------------------------------------------------------------
+
+
+class BindStatus(IntEnum):
+ """Status codes returned by ``poll_bind_result``."""
+
+ NONE = 0
+ PENDING = 1
+ COMPLETED = 2
+ EXPIRED = 3
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+
+async def create_bind_task(
+ timeout: float = ONBOARD_API_TIMEOUT,
+) -> Tuple[str, str]:
+ """Create a bind task and return *(task_id, aes_key_base64)*.
+
+ The AES key is generated locally and sent to the server so it can
+ encrypt the bot credentials before returning them.
+
+ Raises:
+ RuntimeError: If the API returns a non-zero ``retcode``.
+ """
+ import httpx
+
+ url = f"https://{PORTAL_HOST}{ONBOARD_CREATE_PATH}"
+ key = generate_bind_key()
+
+ async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
+ resp = await client.post(url, json={"key": key}, headers=get_api_headers())
+ resp.raise_for_status()
+ data = resp.json()
+
+ if data.get("retcode") != 0:
+ raise RuntimeError(data.get("msg", "create_bind_task failed"))
+
+ task_id = data.get("data", {}).get("task_id")
+ if not task_id:
+ raise RuntimeError("create_bind_task: missing task_id in response")
+
+ logger.debug("create_bind_task ok: task_id=%s", task_id)
+ return task_id, key
+
+
+async def poll_bind_result(
+ task_id: str,
+ timeout: float = ONBOARD_API_TIMEOUT,
+) -> Tuple[BindStatus, str, str, str]:
+ """Poll the bind result for *task_id*.
+
+ Returns:
+ A 4-tuple of ``(status, bot_appid, bot_encrypt_secret, user_openid)``.
+
+ * ``bot_encrypt_secret`` is AES-256-GCM encrypted — decrypt it with
+ :func:`~gateway.platforms.qqbot.crypto.decrypt_secret` using the
+ key from :func:`create_bind_task`.
+ * ``user_openid`` is the OpenID of the person who scanned the code
+ (available when ``status == COMPLETED``).
+
+ Raises:
+ RuntimeError: If the API returns a non-zero ``retcode``.
+ """
+ import httpx
+
+ url = f"https://{PORTAL_HOST}{ONBOARD_POLL_PATH}"
+
+ async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
+ resp = await client.post(url, json={"task_id": task_id}, headers=get_api_headers())
+ resp.raise_for_status()
+ data = resp.json()
+
+ if data.get("retcode") != 0:
+ raise RuntimeError(data.get("msg", "poll_bind_result failed"))
+
+ d = data.get("data", {})
+ return (
+ BindStatus(d.get("status", 0)),
+ str(d.get("bot_appid", "")),
+ d.get("bot_encrypt_secret", ""),
+ d.get("user_openid", ""),
+ )
+
+
+def build_connect_url(task_id: str) -> str:
+ """Build the QR-code target URL for a given *task_id*."""
+ return QR_URL_TEMPLATE.format(task_id=quote(task_id))
diff --git a/gateway/platforms/qqbot/utils.py b/gateway/platforms/qqbot/utils.py
new file mode 100644
index 0000000000..873e58d2a5
--- /dev/null
+++ b/gateway/platforms/qqbot/utils.py
@@ -0,0 +1,71 @@
+"""QQBot shared utilities — User-Agent, HTTP helpers, config coercion."""
+
+from __future__ import annotations
+
+import platform
+import sys
+from typing import Any, Dict, List
+
+from .constants import QQBOT_VERSION
+
+
+# ---------------------------------------------------------------------------
+# User-Agent
+# ---------------------------------------------------------------------------
+
+def _get_hermes_version() -> str:
+ """Return the hermes-agent package version, or 'dev' if unavailable."""
+ try:
+ from importlib.metadata import version
+ return version("hermes-agent")
+ except Exception:
+ return "dev"
+
+
+def build_user_agent() -> str:
+ """Build a descriptive User-Agent string.
+
+ Format::
+
+ QQBotAdapter/ (Python/; ; Hermes/)
+
+ Example::
+
+ QQBotAdapter/1.0.0 (Python/3.11.15; darwin; Hermes/0.9.0)
+ """
+ py_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
+ os_name = platform.system().lower()
+ hermes_version = _get_hermes_version()
+ return f"QQBotAdapter/{QQBOT_VERSION} (Python/{py_version}; {os_name}; Hermes/{hermes_version})"
+
+
+def get_api_headers() -> Dict[str, str]:
+ """Return standard HTTP headers for QQBot API requests.
+
+ Includes ``Content-Type``, ``Accept``, and a dynamic ``User-Agent``.
+ ``q.qq.com`` requires ``Accept: application/json`` — without it,
+ the server returns a JavaScript anti-bot challenge page.
+ """
+ return {
+ "Content-Type": "application/json",
+ "Accept": "application/json",
+ "User-Agent": build_user_agent(),
+ }
+
+
+# ---------------------------------------------------------------------------
+# Config helpers
+# ---------------------------------------------------------------------------
+
+def coerce_list(value: Any) -> List[str]:
+ """Coerce config values into a trimmed string list.
+
+ Accepts comma-separated strings, lists, tuples, sets, or single values.
+ """
+ if value is None:
+ return []
+ if isinstance(value, str):
+ return [item.strip() for item in value.split(",") if item.strip()]
+ if isinstance(value, (list, tuple, set)):
+ return [str(item).strip() for item in value if str(item).strip()]
+ return [str(value).strip()] if str(value).strip() else []
diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index 2f4ec93294..5b1fef1337 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -118,6 +118,84 @@ def _strip_mdv2(text: str) -> str:
return cleaned
+# ---------------------------------------------------------------------------
+# Markdown table → code block conversion
+# ---------------------------------------------------------------------------
+# Telegram's MarkdownV2 has no table syntax — '|' is just an escaped literal,
+# so pipe tables render as noisy backslash-pipe text with no alignment.
+# Wrapping the table in a fenced code block makes Telegram render it as
+# monospace preformatted text with columns intact.
+
+# Matches a GFM table delimiter row: optional outer pipes, cells containing
+# only dashes (with optional leading/trailing colons for alignment) separated
+# by '|'. Requires at least one internal '|' so lone '---' horizontal rules
+# are NOT matched.
+_TABLE_SEPARATOR_RE = re.compile(
+ r'^\s*\|?\s*:?-+:?\s*(?:\|\s*:?-+:?\s*){1,}\|?\s*$'
+)
+
+
+def _is_table_row(line: str) -> bool:
+ """Return True if *line* could plausibly be a table data row."""
+ stripped = line.strip()
+ return bool(stripped) and '|' in stripped
+
+
+def _wrap_markdown_tables(text: str) -> str:
+ """Wrap GFM-style pipe tables in ``` fences so Telegram renders them.
+
+ Detected by a row containing '|' immediately followed by a delimiter
+ row matching :data:`_TABLE_SEPARATOR_RE`. Subsequent pipe-containing
+ non-blank lines are consumed as the table body and included in the
+ wrapped block. Tables inside existing fenced code blocks are left
+ alone.
+ """
+ if '|' not in text or '-' not in text:
+ return text
+
+ lines = text.split('\n')
+ out: list[str] = []
+ in_fence = False
+ i = 0
+ while i < len(lines):
+ line = lines[i]
+ stripped = line.lstrip()
+
+ # Track existing fenced code blocks — never touch content inside.
+ if stripped.startswith('```'):
+ in_fence = not in_fence
+ out.append(line)
+ i += 1
+ continue
+ if in_fence:
+ out.append(line)
+ i += 1
+ continue
+
+ # Look for a header row (contains '|') immediately followed by a
+ # delimiter row.
+ if (
+ '|' in line
+ and i + 1 < len(lines)
+ and _TABLE_SEPARATOR_RE.match(lines[i + 1])
+ ):
+ table_block = [line, lines[i + 1]]
+ j = i + 2
+ while j < len(lines) and _is_table_row(lines[j]):
+ table_block.append(lines[j])
+ j += 1
+ out.append('```')
+ out.extend(table_block)
+ out.append('```')
+ i = j
+ continue
+
+ out.append(line)
+ i += 1
+
+ return '\n'.join(out)
+
+
class TelegramAdapter(BasePlatformAdapter):
"""
Telegram bot adapter.
@@ -1916,6 +1994,12 @@ class TelegramAdapter(BasePlatformAdapter):
text = content
+ # 0) Pre-wrap GFM-style pipe tables in ``` fences. Telegram can't
+ # render tables natively, but fenced code blocks render as
+ # monospace preformatted text with columns intact. The wrapped
+ # tables then flow through step (1) below as protected regions.
+ text = _wrap_markdown_tables(text)
+
# 1) Protect fenced code blocks (``` ... ```)
# Per MarkdownV2 spec, \ and ` inside pre/code must be escaped.
def _protect_fenced(m):
diff --git a/gateway/run.py b/gateway/run.py
index 170c6f87de..ea747321f9 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -2178,6 +2178,30 @@ class GatewayRunner:
)
except Exception as _e:
logger.debug("Idle agent sweep failed: %s", _e)
+
+ # Periodically prune stale SessionStore entries. The
+ # in-memory dict (and sessions.json) would otherwise grow
+ # unbounded in gateways serving many rotating chats /
+ # threads / users over long time windows. Pruning is
+ # invisible to users — a resumed session just gets a
+ # fresh session_id, exactly as if the reset policy fired.
+ _last_prune_ts = getattr(self, "_last_session_store_prune_ts", 0.0)
+ _prune_interval = 3600.0 # once per hour
+ if time.time() - _last_prune_ts > _prune_interval:
+ try:
+ _max_age = int(
+ getattr(self.config, "session_store_max_age_days", 0) or 0
+ )
+ if _max_age > 0:
+ _pruned = self.session_store.prune_old_entries(_max_age)
+ if _pruned:
+ logger.info(
+ "SessionStore prune: dropped %d stale entries",
+ _pruned,
+ )
+ except Exception as _e:
+ logger.debug("SessionStore prune failed: %s", _e)
+ self._last_session_store_prune_ts = time.time()
except Exception as e:
logger.debug("Session expiry watcher error: %s", e)
# Sleep in small increments so we can stop quickly
@@ -2384,6 +2408,7 @@ class GatewayRunner:
self.adapters.clear()
self._running_agents.clear()
+ self._running_agents_ts.clear()
self._pending_messages.clear()
self._pending_approvals.clear()
if hasattr(self, '_busy_ack_ts'):
@@ -2408,6 +2433,20 @@ class GatewayRunner:
except Exception:
pass
+ # Close SQLite session DBs so the WAL write lock is released.
+ # Without this, --replace and similar restart flows leave the
+ # old gateway's connection holding the WAL lock until Python
+ # actually exits — causing 'database is locked' errors when
+ # the new gateway tries to open the same file.
+ for _db_holder in (self, getattr(self, "session_store", None)):
+ _db = getattr(_db_holder, "_db", None) if _db_holder else None
+ if _db is None or not hasattr(_db, "close"):
+ continue
+ try:
+ _db.close()
+ except Exception as _e:
+ logger.debug("SessionDB close error: %s", _e)
+
from gateway.status import remove_pid_file
remove_pid_file()
@@ -2906,9 +2945,7 @@ class GatewayRunner:
_quick_key[:30], _stale_age, _stale_idle,
_raw_stale_timeout, _stale_detail,
)
- del self._running_agents[_quick_key]
- self._running_agents_ts.pop(_quick_key, None)
- self._busy_ack_ts.pop(_quick_key, None)
+ self._release_running_agent_state(_quick_key)
if _quick_key in self._running_agents:
if event.get_command() == "status":
@@ -2936,8 +2973,7 @@ class GatewayRunner:
if adapter and hasattr(adapter, 'get_pending_message'):
adapter.get_pending_message(_quick_key) # consume and discard
self._pending_messages.pop(_quick_key, None)
- if _quick_key in self._running_agents:
- del self._running_agents[_quick_key]
+ self._release_running_agent_state(_quick_key)
logger.info("STOP for session %s — agent interrupted, session lock released", _quick_key[:20])
return "⚡ Stopped. You can continue this session."
@@ -2959,8 +2995,7 @@ class GatewayRunner:
self._pending_messages.pop(_quick_key, None)
# Clean up the running agent entry so the reset handler
# doesn't think an agent is still active.
- if _quick_key in self._running_agents:
- del self._running_agents[_quick_key]
+ self._release_running_agent_state(_quick_key)
return await self._handle_reset_command(event)
# /queue — queue without interrupting
@@ -3041,8 +3076,7 @@ class GatewayRunner:
# Agent is being set up but not ready yet.
if event.get_command() == "stop":
# Force-clean the sentinel so the session is unlocked.
- if _quick_key in self._running_agents:
- del self._running_agents[_quick_key]
+ self._release_running_agent_state(_quick_key)
logger.info("HARD STOP (pending) for session %s — sentinel cleared", _quick_key[:20])
return "⚡ Force-stopped. The agent was still starting — session unlocked."
# Queue the message so it will be picked up after the
@@ -3361,8 +3395,13 @@ class GatewayRunner:
# (exception, command fallthrough, etc.) the sentinel must
# not linger or the session would be permanently locked out.
if self._running_agents.get(_quick_key) is _AGENT_PENDING_SENTINEL:
- del self._running_agents[_quick_key]
- self._running_agents_ts.pop(_quick_key, None)
+ self._release_running_agent_state(_quick_key)
+ else:
+ # Agent path already cleaned _running_agents; make sure
+ # the paired metadata dicts are gone too.
+ self._running_agents_ts.pop(_quick_key, None)
+ if hasattr(self, "_busy_ack_ts"):
+ self._busy_ack_ts.pop(_quick_key, None)
async def _prepare_inbound_message_text(
self,
@@ -4668,16 +4707,14 @@ class GatewayRunner:
agent = self._running_agents.get(session_key)
if agent is _AGENT_PENDING_SENTINEL:
# Force-clean the sentinel so the session is unlocked.
- if session_key in self._running_agents:
- del self._running_agents[session_key]
+ self._release_running_agent_state(session_key)
logger.info("STOP (pending) for session %s — sentinel cleared", session_key[:20])
return "⚡ Stopped. The agent hadn't started yet — you can continue this session."
if agent:
agent.interrupt("Stop requested")
# Force-clean the session lock so a truly hung agent doesn't
# keep it locked forever.
- if session_key in self._running_agents:
- del self._running_agents[session_key]
+ self._release_running_agent_state(session_key)
return "⚡ Stopped. You can continue this session."
else:
return "No active task to stop."
@@ -6593,8 +6630,7 @@ class GatewayRunner:
logger.debug("Memory flush on resume failed: %s", e)
# Clear any running agent for this session key
- if session_key in self._running_agents:
- del self._running_agents[session_key]
+ self._release_running_agent_state(session_key)
# Switch the session entry to point at the old session
new_entry = self.session_store.switch_session(session_key, target_id)
@@ -8010,6 +8046,30 @@ class GatewayRunner:
override = self._session_model_overrides.get(session_key)
return override is not None and override.get("model") == agent_model
+ def _release_running_agent_state(self, session_key: str) -> None:
+ """Pop ALL per-running-agent state entries for ``session_key``.
+
+ Replaces ad-hoc ``del self._running_agents[key]`` calls scattered
+ across the gateway. Those sites had drifted: some popped only
+ ``_running_agents``; some also ``_running_agents_ts``; only one
+ path also cleared ``_busy_ack_ts``. Each missed entry was a
+ small, persistent leak — a (str_key → float) tuple per session
+ per gateway lifetime.
+
+ Use this at every site that ends a running turn, regardless of
+ cause (normal completion, /stop, /reset, /resume, sentinel
+ cleanup, stale-eviction). Per-session state that PERSISTS
+ across turns (``_session_model_overrides``, ``_voice_mode``,
+ ``_pending_approvals``, ``_update_prompt_pending``) is NOT
+ touched here — those have their own lifecycles.
+ """
+ if not session_key:
+ return
+ self._running_agents.pop(session_key, None)
+ self._running_agents_ts.pop(session_key, None)
+ if hasattr(self, "_busy_ack_ts"):
+ self._busy_ack_ts.pop(session_key, None)
+
def _evict_cached_agent(self, session_key: str) -> None:
"""Remove a cached agent for a session (called on /new, /model, etc)."""
_lock = getattr(self, "_agent_cache_lock", None)
@@ -9845,10 +9905,8 @@ class GatewayRunner:
# Clean up tracking
tracking_task.cancel()
- if session_key and session_key in self._running_agents:
- del self._running_agents[session_key]
if session_key:
- self._running_agents_ts.pop(session_key, None)
+ self._release_running_agent_state(session_key)
if self._draining:
self._update_runtime_status("draining")
diff --git a/gateway/session.py b/gateway/session.py
index f057d1cfc0..4cb623128c 100644
--- a/gateway/session.py
+++ b/gateway/session.py
@@ -802,6 +802,57 @@ class SessionStore:
return True
return False
+ def prune_old_entries(self, max_age_days: int) -> int:
+ """Drop SessionEntry records older than max_age_days.
+
+ Pruning is based on ``updated_at`` (last activity), not ``created_at``.
+ A session that's been active within the window is kept regardless of
+ how old it is. Entries marked ``suspended`` are kept — the user
+ explicitly paused them for later resume. Entries held by an active
+ process (via has_active_processes_fn) are also kept so long-running
+ background work isn't orphaned.
+
+ Pruning is functionally identical to a natural reset-policy expiry:
+ the transcript in SQLite stays, but the session_key → session_id
+ mapping is dropped and the user starts a fresh session on return.
+
+ ``max_age_days <= 0`` disables pruning; returns 0 immediately.
+ Returns the number of entries removed.
+ """
+ if max_age_days is None or max_age_days <= 0:
+ return 0
+ from datetime import timedelta
+
+ cutoff = _now() - timedelta(days=max_age_days)
+ removed_keys: list[str] = []
+
+ with self._lock:
+ self._ensure_loaded_locked()
+ for key, entry in list(self._entries.items()):
+ if entry.suspended:
+ continue
+ # Never prune sessions with an active background process
+ # attached — the user may still be waiting on output.
+ if self._has_active_processes_fn is not None:
+ try:
+ if self._has_active_processes_fn(entry.session_id):
+ continue
+ except Exception:
+ pass
+ if entry.updated_at < cutoff:
+ removed_keys.append(key)
+ for key in removed_keys:
+ self._entries.pop(key, None)
+ if removed_keys:
+ self._save()
+
+ if removed_keys:
+ logger.info(
+ "SessionStore pruned %d entries older than %d days",
+ len(removed_keys), max_age_days,
+ )
+ return len(removed_keys)
+
def suspend_recently_active(self, max_age_seconds: int = 120) -> int:
"""Mark recently-active sessions as suspended.
diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index e79a6dca6d..421836c23c 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -233,6 +233,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
api_key_env_vars=("XAI_API_KEY",),
base_url_env_var="XAI_BASE_URL",
),
+ "nvidia": ProviderConfig(
+ id="nvidia",
+ name="NVIDIA NIM",
+ auth_type="api_key",
+ inference_base_url="https://integrate.api.nvidia.com/v1",
+ api_key_env_vars=("NVIDIA_API_KEY",),
+ base_url_env_var="NVIDIA_BASE_URL",
+ ),
"ai-gateway": ProviderConfig(
id="ai-gateway",
name="Vercel AI Gateway",
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index c7df033701..1670156b27 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -44,7 +44,8 @@ _EXTRA_ENV_KEYS = frozenset({
"WEIXIN_HOME_CHANNEL", "WEIXIN_HOME_CHANNEL_NAME", "WEIXIN_DM_POLICY", "WEIXIN_GROUP_POLICY",
"WEIXIN_ALLOWED_USERS", "WEIXIN_GROUP_ALLOWED_USERS", "WEIXIN_ALLOW_ALL_USERS",
"BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_PASSWORD",
- "QQ_APP_ID", "QQ_CLIENT_SECRET", "QQ_HOME_CHANNEL", "QQ_HOME_CHANNEL_NAME",
+ "QQ_APP_ID", "QQ_CLIENT_SECRET", "QQBOT_HOME_CHANNEL", "QQBOT_HOME_CHANNEL_NAME",
+ "QQ_HOME_CHANNEL", "QQ_HOME_CHANNEL_NAME", # legacy aliases (pre-rename, still read for back-compat)
"QQ_ALLOWED_USERS", "QQ_GROUP_ALLOWED_USERS", "QQ_ALLOW_ALL_USERS", "QQ_MARKDOWN_SUPPORT",
"QQ_STT_API_KEY", "QQ_STT_BASE_URL", "QQ_STT_MODEL",
"TERMINAL_ENV", "TERMINAL_SSH_KEY", "TERMINAL_SSH_PORT",
@@ -861,6 +862,22 @@ OPTIONAL_ENV_VARS = {
"category": "provider",
"advanced": True,
},
+ "NVIDIA_API_KEY": {
+ "description": "NVIDIA NIM API key (build.nvidia.com or local NIM endpoint)",
+ "prompt": "NVIDIA NIM API key",
+ "url": "https://build.nvidia.com/",
+ "password": True,
+ "category": "provider",
+ "advanced": True,
+ },
+ "NVIDIA_BASE_URL": {
+ "description": "NVIDIA NIM base URL override (e.g. http://localhost:8000/v1 for local NIM)",
+ "prompt": "NVIDIA NIM base URL (leave empty for default)",
+ "url": None,
+ "password": False,
+ "category": "provider",
+ "advanced": True,
+ },
"GLM_API_KEY": {
"description": "Z.AI / GLM API key (also recognized as ZAI_API_KEY / Z_AI_API_KEY)",
"prompt": "Z.AI / GLM API key",
@@ -1518,12 +1535,12 @@ OPTIONAL_ENV_VARS = {
"prompt": "Allow All QQ Users",
"category": "messaging",
},
- "QQ_HOME_CHANNEL": {
+ "QQBOT_HOME_CHANNEL": {
"description": "Default QQ channel/group for cron delivery and notifications",
"prompt": "QQ Home Channel",
"category": "messaging",
},
- "QQ_HOME_CHANNEL_NAME": {
+ "QQBOT_HOME_CHANNEL_NAME": {
"description": "Display name for the QQ home channel",
"prompt": "QQ Home Channel Name",
"category": "messaging",
diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index d044ddf4cf..28c4af1fa8 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -825,6 +825,7 @@ def run_doctor(args):
("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True),
("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True),
("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
+ ("NVIDIA NIM", ("NVIDIA_API_KEY",), "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
("Alibaba/DashScope", ("DASHSCOPE_API_KEY",), "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
# MiniMax: the /anthropic endpoint doesn't support /models, but the /v1 endpoint does.
("MiniMax", ("MINIMAX_API_KEY",), "https://api.minimax.io/v1/models", "MINIMAX_BASE_URL", True),
diff --git a/hermes_cli/dump.py b/hermes_cli/dump.py
index a520790857..ae8ecc6419 100644
--- a/hermes_cli/dump.py
+++ b/hermes_cli/dump.py
@@ -296,6 +296,7 @@ def run_dump(args):
("DEEPSEEK_API_KEY", "deepseek"),
("DASHSCOPE_API_KEY", "dashscope"),
("HF_TOKEN", "huggingface"),
+ ("NVIDIA_API_KEY", "nvidia"),
("AI_GATEWAY_API_KEY", "ai_gateway"),
("OPENCODE_ZEN_API_KEY", "opencode_zen"),
("OPENCODE_GO_API_KEY", "opencode_go"),
diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py
index 585bbe4460..f5ebcf031c 100644
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@@ -1998,7 +1998,7 @@ _PLATFORMS = [
{"name": "QQ_ALLOWED_USERS", "prompt": "Allowed user OpenIDs (comma-separated, leave empty for open access)", "password": False,
"is_allowlist": True,
"help": "Optional — restrict DM access to specific user OpenIDs."},
- {"name": "QQ_HOME_CHANNEL", "prompt": "Home channel (user/group OpenID for cron delivery, or empty)", "password": False,
+ {"name": "QQBOT_HOME_CHANNEL", "prompt": "Home channel (user/group OpenID for cron delivery, or empty)", "password": False,
"help": "OpenID to deliver cron results and notifications to."},
],
},
@@ -2625,6 +2625,215 @@ def _setup_feishu():
print_info(f" Bot: {bot_name}")
+def _setup_qqbot():
+ """Interactive setup for QQ Bot — scan-to-configure or manual credentials."""
+ print()
+ print(color(" ─── 🐧 QQ Bot Setup ───", Colors.CYAN))
+
+ existing_app_id = get_env_value("QQ_APP_ID")
+ existing_secret = get_env_value("QQ_CLIENT_SECRET")
+ if existing_app_id and existing_secret:
+ print()
+ print_success("QQ Bot is already configured.")
+ if not prompt_yes_no(" Reconfigure QQ Bot?", False):
+ return
+
+ # ── Choose setup method ──
+ print()
+ method_choices = [
+ "Scan QR code to add bot automatically (recommended)",
+ "Enter existing App ID and App Secret manually",
+ ]
+ method_idx = prompt_choice(" How would you like to set up QQ Bot?", method_choices, 0)
+
+ credentials = None
+ used_qr = False
+
+ if method_idx == 0:
+ # ── QR scan-to-configure ──
+ try:
+ credentials = _qqbot_qr_flow()
+ except KeyboardInterrupt:
+ print()
+ print_warning(" QQ Bot setup cancelled.")
+ return
+ if credentials:
+ used_qr = True
+ if not credentials:
+ print_info(" QR setup did not complete. Continuing with manual input.")
+
+ # ── Manual credential input ──
+ if not credentials:
+ print()
+ print_info(" Go to https://q.qq.com to register a QQ Bot application.")
+ print_info(" Note your App ID and App Secret from the application page.")
+ print()
+ app_id = prompt(" App ID", password=False)
+ if not app_id:
+ print_warning(" Skipped — QQ Bot won't work without an App ID.")
+ return
+ app_secret = prompt(" App Secret", password=True)
+ if not app_secret:
+ print_warning(" Skipped — QQ Bot won't work without an App Secret.")
+ return
+ credentials = {"app_id": app_id.strip(), "client_secret": app_secret.strip(), "user_openid": ""}
+
+ # ── Save core credentials ──
+ save_env_value("QQ_APP_ID", credentials["app_id"])
+ save_env_value("QQ_CLIENT_SECRET", credentials["client_secret"])
+
+ user_openid = credentials.get("user_openid", "")
+
+ # ── DM security policy ──
+ print()
+ access_choices = [
+ "Use DM pairing approval (recommended)",
+ "Allow all direct messages",
+ "Only allow listed user OpenIDs",
+ ]
+ access_idx = prompt_choice(" How should direct messages be authorized?", access_choices, 0)
+ if access_idx == 0:
+ save_env_value("QQ_ALLOW_ALL_USERS", "false")
+ if user_openid:
+ print()
+ if prompt_yes_no(f" Add yourself ({user_openid}) to the allow list?", True):
+ save_env_value("QQ_ALLOWED_USERS", user_openid)
+ print_success(f" Allow list set to {user_openid}")
+ else:
+ save_env_value("QQ_ALLOWED_USERS", "")
+ else:
+ save_env_value("QQ_ALLOWED_USERS", "")
+ print_success(" DM pairing enabled.")
+ print_info(" Unknown users can request access; approve with `hermes pairing approve`.")
+ elif access_idx == 1:
+ save_env_value("QQ_ALLOW_ALL_USERS", "true")
+ save_env_value("QQ_ALLOWED_USERS", "")
+ print_warning(" Open DM access enabled for QQ Bot.")
+ else:
+ default_allow = user_openid or ""
+ allowlist = prompt(" Allowed user OpenIDs (comma-separated)", default_allow, password=False).replace(" ", "")
+ save_env_value("QQ_ALLOW_ALL_USERS", "false")
+ save_env_value("QQ_ALLOWED_USERS", allowlist)
+ print_success(" Allowlist saved.")
+
+ # ── Home channel ──
+ if user_openid:
+ print()
+ if prompt_yes_no(f" Use your QQ user ID ({user_openid}) as the home channel?", True):
+ save_env_value("QQBOT_HOME_CHANNEL", user_openid)
+ print_success(f" Home channel set to {user_openid}")
+ else:
+ print()
+ home_channel = prompt(" Home channel OpenID (for cron/notifications, or empty)", password=False)
+ if home_channel:
+ save_env_value("QQBOT_HOME_CHANNEL", home_channel.strip())
+ print_success(f" Home channel set to {home_channel.strip()}")
+
+ print()
+ print_success("🐧 QQ Bot configured!")
+ print_info(f" App ID: {credentials['app_id']}")
+
+
+def _qqbot_render_qr(url: str) -> bool:
+ """Try to render a QR code in the terminal. Returns True if successful."""
+ try:
+ import qrcode as _qr
+ qr = _qr.QRCode(border=1,error_correction=_qr.constants.ERROR_CORRECT_L)
+ qr.add_data(url)
+ qr.make(fit=True)
+ qr.print_ascii(invert=True)
+ return True
+ except Exception:
+ return False
+
+
+def _qqbot_qr_flow():
+ """Run the QR-code scan-to-configure flow.
+
+ Returns a dict with app_id, client_secret, user_openid on success,
+ or None on failure/cancel.
+ """
+ try:
+ from gateway.platforms.qqbot import (
+ create_bind_task, poll_bind_result, build_connect_url,
+ decrypt_secret, BindStatus,
+ )
+ from gateway.platforms.qqbot.constants import ONBOARD_POLL_INTERVAL
+ except Exception as exc:
+ print_error(f" QQBot onboard import failed: {exc}")
+ return None
+
+ import asyncio
+ import time
+
+ MAX_REFRESHES = 3
+ refresh_count = 0
+
+ while refresh_count <= MAX_REFRESHES:
+ loop = asyncio.new_event_loop()
+
+ # ── Create bind task ──
+ try:
+ task_id, aes_key = loop.run_until_complete(create_bind_task())
+ except Exception as e:
+ print_warning(f" Failed to create bind task: {e}")
+ loop.close()
+ return None
+
+ url = build_connect_url(task_id)
+
+ # ── Display QR code + URL ──
+ print()
+ if _qqbot_render_qr(url):
+ print(f" Scan the QR code above, or open this URL directly:\n {url}")
+ else:
+ print(f" Open this URL in QQ on your phone:\n {url}")
+ print_info(" Tip: pip install qrcode to show a scannable QR code here")
+
+ # ── Poll loop (silent — keep QR visible at bottom) ──
+ try:
+ while True:
+ try:
+ status, app_id, encrypted_secret, user_openid = loop.run_until_complete(
+ poll_bind_result(task_id)
+ )
+ except Exception:
+ time.sleep(ONBOARD_POLL_INTERVAL)
+ continue
+
+ if status == BindStatus.COMPLETED:
+ client_secret = decrypt_secret(encrypted_secret, aes_key)
+ print()
+ print_success(f" QR scan complete! (App ID: {app_id})")
+ if user_openid:
+ print_info(f" Scanner's OpenID: {user_openid}")
+ return {
+ "app_id": app_id,
+ "client_secret": client_secret,
+ "user_openid": user_openid,
+ }
+
+ if status == BindStatus.EXPIRED:
+ refresh_count += 1
+ if refresh_count > MAX_REFRESHES:
+ print()
+ print_warning(f" QR code expired {MAX_REFRESHES} times — giving up.")
+ return None
+ print()
+ print_warning(f" QR code expired, refreshing... ({refresh_count}/{MAX_REFRESHES})")
+ loop.close()
+ break # outer while creates a new task
+
+ time.sleep(ONBOARD_POLL_INTERVAL)
+ except KeyboardInterrupt:
+ loop.close()
+ raise
+ finally:
+ loop.close()
+
+ return None
+
+
def _setup_signal():
"""Interactive setup for Signal messenger."""
import shutil
@@ -2806,6 +3015,8 @@ def gateway_setup():
_setup_dingtalk()
elif platform["key"] == "feishu":
_setup_feishu()
+ elif platform["key"] == "qqbot":
+ _setup_qqbot()
else:
_setup_standard_platform(platform)
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 195fd53c4f..2fb27dd2da 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -1532,6 +1532,7 @@ def select_provider_and_model(args=None):
"huggingface",
"xiaomi",
"arcee",
+ "nvidia",
"ollama-cloud",
):
_model_flow_api_key_provider(config, selected_provider, current_model)
@@ -5875,6 +5876,7 @@ For more help on a command:
"kilocode",
"xiaomi",
"arcee",
+ "nvidia",
],
default=None,
help="Inference provider (default: auto)",
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index fe2a0c433a..cbbeef62d4 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -135,7 +135,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"gemini-2.5-flash-lite",
# Gemma open models (also served via AI Studio)
"gemma-4-31b-it",
- "gemma-4-26b-it",
],
"google-gemini-cli": [
"gemini-2.5-pro",
@@ -155,6 +154,20 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"grok-4.20-reasoning",
"grok-4-1-fast-reasoning",
],
+ "nvidia": [
+ # NVIDIA flagship reasoning models
+ "nvidia/nemotron-3-super-120b-a12b",
+ "nvidia/nemotron-3-nano-30b-a3b",
+ "nvidia/llama-3.3-nemotron-super-49b-v1.5",
+ # Third-party agentic models hosted on build.nvidia.com
+ # (map to OpenRouter defaults — users get familiar picks on NIM)
+ "qwen/qwen3.5-397b-a17b",
+ "deepseek-ai/deepseek-v3.2",
+ "moonshotai/kimi-k2.5",
+ "minimaxai/minimax-m2.5",
+ "z-ai/glm5",
+ "openai/gpt-oss-120b",
+ ],
"kimi-coding": [
"kimi-k2.5",
"kimi-for-coding",
@@ -536,6 +549,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"),
ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"),
ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
+ ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
ProviderEntry("qwen-oauth", "Qwen OAuth (Portal)", "Qwen OAuth (reuses local Qwen CLI login)"),
ProviderEntry("copilot", "GitHub Copilot", "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
ProviderEntry("copilot-acp", "GitHub Copilot ACP", "GitHub Copilot ACP (spawns `copilot --acp --stdio`)"),
@@ -618,6 +632,10 @@ _PROVIDER_ALIASES = {
"grok": "xai",
"x-ai": "xai",
"x.ai": "xai",
+ "nim": "nvidia",
+ "nvidia-nim": "nvidia",
+ "build-nvidia": "nvidia",
+ "nemotron": "nvidia",
"ollama": "custom", # bare "ollama" = local; use "ollama-cloud" for cloud
"ollama_cloud": "ollama-cloud",
}
diff --git a/hermes_cli/providers.py b/hermes_cli/providers.py
index b2dda20be5..a71055cfe4 100644
--- a/hermes_cli/providers.py
+++ b/hermes_cli/providers.py
@@ -137,6 +137,11 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
base_url_override="https://api.x.ai/v1",
base_url_env_var="XAI_BASE_URL",
),
+ "nvidia": HermesOverlay(
+ transport="openai_chat",
+ base_url_override="https://integrate.api.nvidia.com/v1",
+ base_url_env_var="NVIDIA_BASE_URL",
+ ),
"xiaomi": HermesOverlay(
transport="openai_chat",
base_url_env_var="XIAOMI_BASE_URL",
@@ -191,6 +196,12 @@ ALIASES: Dict[str, str] = {
"x.ai": "xai",
"grok": "xai",
+ # nvidia
+ "nim": "nvidia",
+ "nvidia-nim": "nvidia",
+ "build-nvidia": "nvidia",
+ "nemotron": "nvidia",
+
# kimi-for-coding (models.dev ID)
"kimi": "kimi-for-coding",
"kimi-coding": "kimi-for-coding",
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index b5efb52a88..95c9cae77e 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -91,7 +91,7 @@ _DEFAULT_PROVIDER_MODELS = {
"gemini": [
"gemini-3.1-pro-preview", "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview",
"gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite",
- "gemma-4-31b-it", "gemma-4-26b-it",
+ "gemma-4-31b-it",
],
"zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
"kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
@@ -2005,52 +2005,6 @@ def _setup_wecom_callback():
_gw_setup()
-def _setup_qqbot():
- """Configure QQ Bot gateway."""
- print_header("QQ Bot")
- existing = get_env_value("QQ_APP_ID")
- if existing:
- print_info("QQ Bot: already configured")
- if not prompt_yes_no("Reconfigure QQ Bot?", False):
- return
-
- print_info("Connects Hermes to QQ via the Official QQ Bot API (v2).")
- print_info(" Requires a QQ Bot application at q.qq.com")
- print_info(" Reference: https://bot.q.qq.com/wiki/develop/api-v2/")
- print()
-
- app_id = prompt("QQ Bot App ID")
- if not app_id:
- print_warning("App ID is required — skipping QQ Bot setup")
- return
- save_env_value("QQ_APP_ID", app_id.strip())
-
- client_secret = prompt("QQ Bot App Secret", password=True)
- if not client_secret:
- print_warning("App Secret is required — skipping QQ Bot setup")
- return
- save_env_value("QQ_CLIENT_SECRET", client_secret)
- print_success("QQ Bot credentials saved")
-
- print()
- print_info("🔒 Security: Restrict who can DM your bot")
- print_info(" Use QQ user OpenIDs (found in event payloads)")
- print()
- allowed_users = prompt("Allowed user OpenIDs (comma-separated, leave empty for open access)")
- if allowed_users:
- save_env_value("QQ_ALLOWED_USERS", allowed_users.replace(" ", ""))
- print_success("QQ Bot allowlist configured")
- else:
- print_info("⚠️ No allowlist set — anyone can DM the bot!")
-
- print()
- print_info("📬 Home Channel: OpenID for cron job delivery and notifications.")
- home_channel = prompt("Home channel OpenID (leave empty to set later)")
- if home_channel:
- save_env_value("QQ_HOME_CHANNEL", home_channel)
-
- print()
- print_success("QQ Bot configured!")
def _setup_bluebubbles():
@@ -2119,12 +2073,9 @@ def _setup_bluebubbles():
def _setup_qqbot():
- """Configure QQ Bot (Official API v2) via standard platform setup."""
- from hermes_cli.gateway import _PLATFORMS
- qq_platform = next((p for p in _PLATFORMS if p["key"] == "qqbot"), None)
- if qq_platform:
- from hermes_cli.gateway import _setup_standard_platform
- _setup_standard_platform(qq_platform)
+ """Configure QQ Bot (Official API v2) via gateway setup."""
+ from hermes_cli.gateway import _setup_qqbot as _gateway_setup_qqbot
+ _gateway_setup_qqbot()
def _setup_webhooks():
@@ -2264,7 +2215,9 @@ def setup_gateway(config: dict):
missing_home.append("Slack")
if get_env_value("BLUEBUBBLES_SERVER_URL") and not get_env_value("BLUEBUBBLES_HOME_CHANNEL"):
missing_home.append("BlueBubbles")
- if get_env_value("QQ_APP_ID") and not get_env_value("QQ_HOME_CHANNEL"):
+ if get_env_value("QQ_APP_ID") and not (
+ get_env_value("QQBOT_HOME_CHANNEL") or get_env_value("QQ_HOME_CHANNEL")
+ ):
missing_home.append("QQBot")
if missing_home:
diff --git a/hermes_cli/status.py b/hermes_cli/status.py
index 2e34ae9c36..bc3290d56e 100644
--- a/hermes_cli/status.py
+++ b/hermes_cli/status.py
@@ -317,7 +317,7 @@ def show_status(args):
"WeCom Callback": ("WECOM_CALLBACK_CORP_ID", None),
"Weixin": ("WEIXIN_ACCOUNT_ID", "WEIXIN_HOME_CHANNEL"),
"BlueBubbles": ("BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_HOME_CHANNEL"),
- "QQBot": ("QQ_APP_ID", "QQ_HOME_CHANNEL"),
+ "QQBot": ("QQ_APP_ID", "QQBOT_HOME_CHANNEL"),
}
for name, (token_var, home_var) in platforms.items():
@@ -327,6 +327,9 @@ def show_status(args):
home_channel = ""
if home_var:
home_channel = os.getenv(home_var, "")
+ # Back-compat: QQBot home channel was renamed from QQ_HOME_CHANNEL to QQBOT_HOME_CHANNEL
+ if not home_channel and home_var == "QQBOT_HOME_CHANNEL":
+ home_channel = os.getenv("QQ_HOME_CHANNEL", "")
status = "configured" if has_token else "not configured"
if home_channel:
diff --git a/run_agent.py b/run_agent.py
index 64572001b3..bb8cfa459d 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -7208,14 +7208,22 @@ class AIAgent:
# Use auxiliary client for the flush call when available --
# it's cheaper and avoids Codex Responses API incompatibility.
- from agent.auxiliary_client import call_llm as _call_llm
+ from agent.auxiliary_client import (
+ call_llm as _call_llm,
+ _fixed_temperature_for_model,
+ )
_aux_available = True
+ # Use the fixed-temperature override (e.g. kimi-for-coding → 0.6) if
+ # the model has a strict contract; otherwise the historical 0.3 default.
+ _flush_temperature = _fixed_temperature_for_model(self.model)
+ if _flush_temperature is None:
+ _flush_temperature = 0.3
try:
response = _call_llm(
task="flush_memories",
messages=api_messages,
tools=[memory_tool_def],
- temperature=0.3,
+ temperature=_flush_temperature,
max_tokens=5120,
# timeout resolved from auxiliary.flush_memories.timeout config
)
@@ -7227,7 +7235,7 @@ class AIAgent:
# No auxiliary client -- use the Codex Responses path directly
codex_kwargs = self._build_api_kwargs(api_messages)
codex_kwargs["tools"] = self._responses_tools([memory_tool_def])
- codex_kwargs["temperature"] = 0.3
+ codex_kwargs["temperature"] = _flush_temperature
if "max_output_tokens" in codex_kwargs:
codex_kwargs["max_output_tokens"] = 5120
response = self._run_codex_stream(codex_kwargs)
@@ -7246,7 +7254,7 @@ class AIAgent:
"model": self.model,
"messages": api_messages,
"tools": [memory_tool_def],
- "temperature": 0.3,
+ "temperature": _flush_temperature,
**self._max_tokens_param(5120),
}
from agent.auxiliary_client import _get_task_timeout
diff --git a/scripts/release.py b/scripts/release.py
index 028f75ba64..c6d906436b 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -256,6 +256,8 @@ AUTHOR_MAP = {
"anthhub@163.com": "anthhub",
"shenuu@gmail.com": "shenuu",
"xiayh17@gmail.com": "xiayh0107",
+ "asurla@nvidia.com": "anniesurla",
+ "limkuan24@gmail.com": "WideLee",
}
diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py
index 5d79f96dea..1778855ddd 100644
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -696,6 +696,95 @@ class TestIsConnectionError:
assert _is_connection_error(err) is False
+class TestKimiForCodingTemperature:
+ """kimi-for-coding now requires temperature=0.6 exactly."""
+
+ def test_build_call_kwargs_forces_fixed_temperature(self):
+ from agent.auxiliary_client import _build_call_kwargs
+
+ kwargs = _build_call_kwargs(
+ provider="kimi-coding",
+ model="kimi-for-coding",
+ messages=[{"role": "user", "content": "hello"}],
+ temperature=0.3,
+ )
+
+ assert kwargs["temperature"] == 0.6
+
+ def test_build_call_kwargs_injects_temperature_when_missing(self):
+ from agent.auxiliary_client import _build_call_kwargs
+
+ kwargs = _build_call_kwargs(
+ provider="kimi-coding",
+ model="kimi-for-coding",
+ messages=[{"role": "user", "content": "hello"}],
+ temperature=None,
+ )
+
+ assert kwargs["temperature"] == 0.6
+
+ def test_auto_routed_kimi_for_coding_sync_call_uses_fixed_temperature(self):
+ client = MagicMock()
+ client.base_url = "https://api.kimi.com/coding/v1"
+ response = MagicMock()
+ client.chat.completions.create.return_value = response
+
+ with patch(
+ "agent.auxiliary_client._get_cached_client",
+ return_value=(client, "kimi-for-coding"),
+ ), patch(
+ "agent.auxiliary_client._resolve_task_provider_model",
+ return_value=("auto", "kimi-for-coding", None, None, None),
+ ):
+ result = call_llm(
+ task="session_search",
+ messages=[{"role": "user", "content": "hello"}],
+ temperature=0.1,
+ )
+
+ assert result is response
+ kwargs = client.chat.completions.create.call_args.kwargs
+ assert kwargs["model"] == "kimi-for-coding"
+ assert kwargs["temperature"] == 0.6
+
+ @pytest.mark.asyncio
+ async def test_auto_routed_kimi_for_coding_async_call_uses_fixed_temperature(self):
+ client = MagicMock()
+ client.base_url = "https://api.kimi.com/coding/v1"
+ response = MagicMock()
+ client.chat.completions.create = AsyncMock(return_value=response)
+
+ with patch(
+ "agent.auxiliary_client._get_cached_client",
+ return_value=(client, "kimi-for-coding"),
+ ), patch(
+ "agent.auxiliary_client._resolve_task_provider_model",
+ return_value=("auto", "kimi-for-coding", None, None, None),
+ ):
+ result = await async_call_llm(
+ task="session_search",
+ messages=[{"role": "user", "content": "hello"}],
+ temperature=0.1,
+ )
+
+ assert result is response
+ kwargs = client.chat.completions.create.call_args.kwargs
+ assert kwargs["model"] == "kimi-for-coding"
+ assert kwargs["temperature"] == 0.6
+
+ def test_non_kimi_model_still_preserves_temperature(self):
+ from agent.auxiliary_client import _build_call_kwargs
+
+ kwargs = _build_call_kwargs(
+ provider="kimi-coding",
+ model="kimi-k2.5",
+ messages=[{"role": "user", "content": "hello"}],
+ temperature=0.3,
+ )
+
+ assert kwargs["temperature"] == 0.3
+
+
# ---------------------------------------------------------------------------
# async_call_llm payment / connection fallback (#7512 bug 2)
# ---------------------------------------------------------------------------
diff --git a/tests/agent/test_gemini_cloudcode.py b/tests/agent/test_gemini_cloudcode.py
index cf5e80f08a..c9d2b87df8 100644
--- a/tests/agent/test_gemini_cloudcode.py
+++ b/tests/agent/test_gemini_cloudcode.py
@@ -826,6 +826,160 @@ class TestGeminiCloudCodeClient:
finally:
client.close()
+
+class TestGeminiHttpErrorParsing:
+ """Regression coverage for _gemini_http_error Google-envelope parsing.
+
+ These are the paths that users actually hit during Google-side throttling
+ (April 2026: gemini-2.5-pro MODEL_CAPACITY_EXHAUSTED, gemma-4-26b-it
+ returning 404). The error needs to carry status_code + response so the
+ main loop's error_classifier and Retry-After logic work.
+ """
+
+ @staticmethod
+ def _fake_response(status: int, body: dict | str = "", headers=None):
+ """Minimal httpx.Response stand-in (duck-typed for _gemini_http_error)."""
+ class _FakeResponse:
+ def __init__(self):
+ self.status_code = status
+ if isinstance(body, dict):
+ self.text = json.dumps(body)
+ else:
+ self.text = body
+ self.headers = headers or {}
+ return _FakeResponse()
+
+ def test_model_capacity_exhausted_produces_friendly_message(self):
+ from agent.gemini_cloudcode_adapter import _gemini_http_error
+
+ body = {
+ "error": {
+ "code": 429,
+ "message": "Resource has been exhausted (e.g. check quota).",
+ "status": "RESOURCE_EXHAUSTED",
+ "details": [
+ {
+ "@type": "type.googleapis.com/google.rpc.ErrorInfo",
+ "reason": "MODEL_CAPACITY_EXHAUSTED",
+ "domain": "googleapis.com",
+ "metadata": {"model": "gemini-2.5-pro"},
+ },
+ {
+ "@type": "type.googleapis.com/google.rpc.RetryInfo",
+ "retryDelay": "30s",
+ },
+ ],
+ }
+ }
+ err = _gemini_http_error(self._fake_response(429, body))
+ assert err.status_code == 429
+ assert err.code == "code_assist_capacity_exhausted"
+ assert err.retry_after == 30.0
+ assert err.details["reason"] == "MODEL_CAPACITY_EXHAUSTED"
+ # Message must be user-friendly, not a raw JSON dump.
+ message = str(err)
+ assert "gemini-2.5-pro" in message
+ assert "capacity exhausted" in message.lower()
+ assert "30s" in message
+ # response attr is preserved for run_agent's Retry-After header path.
+ assert err.response is not None
+
+ def test_resource_exhausted_without_reason(self):
+ from agent.gemini_cloudcode_adapter import _gemini_http_error
+
+ body = {
+ "error": {
+ "code": 429,
+ "message": "Quota exceeded for requests per minute.",
+ "status": "RESOURCE_EXHAUSTED",
+ }
+ }
+ err = _gemini_http_error(self._fake_response(429, body))
+ assert err.status_code == 429
+ assert err.code == "code_assist_rate_limited"
+ message = str(err)
+ assert "quota" in message.lower()
+
+ def test_404_model_not_found_produces_model_retired_message(self):
+ from agent.gemini_cloudcode_adapter import _gemini_http_error
+
+ body = {
+ "error": {
+ "code": 404,
+ "message": "models/gemma-4-26b-it is not found for API version v1internal",
+ "status": "NOT_FOUND",
+ }
+ }
+ err = _gemini_http_error(self._fake_response(404, body))
+ assert err.status_code == 404
+ message = str(err)
+ assert "not available" in message.lower() or "retired" in message.lower()
+ # Error message should reference the actual model text from Google.
+ assert "gemma-4-26b-it" in message
+
+ def test_unauthorized_preserves_status_code(self):
+ from agent.gemini_cloudcode_adapter import _gemini_http_error
+
+ err = _gemini_http_error(self._fake_response(
+ 401, {"error": {"code": 401, "message": "Invalid token", "status": "UNAUTHENTICATED"}},
+ ))
+ assert err.status_code == 401
+ assert err.code == "code_assist_unauthorized"
+
+ def test_retry_after_header_fallback(self):
+ """If the body has no RetryInfo detail, fall back to Retry-After header."""
+ from agent.gemini_cloudcode_adapter import _gemini_http_error
+
+ resp = self._fake_response(
+ 429,
+ {"error": {"code": 429, "message": "Rate limited", "status": "RESOURCE_EXHAUSTED"}},
+ headers={"Retry-After": "45"},
+ )
+ err = _gemini_http_error(resp)
+ assert err.retry_after == 45.0
+
+ def test_malformed_body_still_produces_structured_error(self):
+ """Non-JSON body must not swallow status_code — we still want the classifier path."""
+ from agent.gemini_cloudcode_adapter import _gemini_http_error
+
+ err = _gemini_http_error(self._fake_response(500, "internal error"))
+ assert err.status_code == 500
+ # Raw body snippet must still be there for debugging.
+ assert "500" in str(err)
+
+ def test_status_code_flows_through_error_classifier(self):
+ """End-to-end: CodeAssistError from a 429 must classify as rate_limit.
+
+ This is the whole point of adding status_code to CodeAssistError —
+ _extract_status_code must see it and FailoverReason.rate_limit must
+ fire, so the main loop triggers fallback_providers.
+ """
+ from agent.gemini_cloudcode_adapter import _gemini_http_error
+ from agent.error_classifier import classify_api_error, FailoverReason
+
+ body = {
+ "error": {
+ "code": 429,
+ "message": "Resource has been exhausted",
+ "status": "RESOURCE_EXHAUSTED",
+ "details": [
+ {
+ "@type": "type.googleapis.com/google.rpc.ErrorInfo",
+ "reason": "MODEL_CAPACITY_EXHAUSTED",
+ "metadata": {"model": "gemini-2.5-pro"},
+ }
+ ],
+ }
+ }
+ err = _gemini_http_error(self._fake_response(429, body))
+
+ classified = classify_api_error(
+ err, provider="google-gemini-cli", model="gemini-2.5-pro",
+ )
+ assert classified.status_code == 429
+ assert classified.reason == FailoverReason.rate_limit
+
+
# =============================================================================
# Provider registration
# =============================================================================
diff --git a/tests/conftest.py b/tests/conftest.py
index 27950118e1..c5b367266e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -229,6 +229,15 @@ def _hermetic_environment(tmp_path, monkeypatch):
monkeypatch.setenv("LC_ALL", "C.UTF-8")
monkeypatch.setenv("PYTHONHASHSEED", "0")
+ # 4b. Disable AWS IMDS lookups. Without this, any test that ends up
+ # calling has_aws_credentials() / resolve_aws_auth_env_var()
+ # (e.g. provider auto-detect, status command, cron run_job) burns
+ # ~2s waiting for the metadata service at 169.254.169.254 to time
+ # out. Tests don't run on EC2 — IMDS is always unreachable here.
+ monkeypatch.setenv("AWS_EC2_METADATA_DISABLED", "true")
+ monkeypatch.setenv("AWS_METADATA_SERVICE_TIMEOUT", "1")
+ monkeypatch.setenv("AWS_METADATA_SERVICE_NUM_ATTEMPTS", "1")
+
# 5. Reset plugin singleton so tests don't leak plugins from
# ~/.hermes/plugins/ (which, per step 3, is now empty — but the
# singleton might still be cached from a previous test).
diff --git a/tests/gateway/test_qqbot.py b/tests/gateway/test_qqbot.py
index 18b1b59b75..a5aeb62516 100644
--- a/tests/gateway/test_qqbot.py
+++ b/tests/gateway/test_qqbot.py
@@ -179,7 +179,7 @@ class TestVoiceAttachmentSSRFProtection:
from gateway.platforms.qqbot import QQAdapter, _ssrf_redirect_guard
client = mock.AsyncMock()
- with mock.patch("gateway.platforms.qqbot.httpx.AsyncClient", return_value=client) as async_client_cls:
+ with mock.patch("gateway.platforms.qqbot.adapter.httpx.AsyncClient", return_value=client) as async_client_cls:
adapter = QQAdapter(_make_config(app_id="a", client_secret="b"))
adapter._ensure_token = mock.AsyncMock(side_effect=RuntimeError("stop after client creation"))
diff --git a/tests/gateway/test_session_state_cleanup.py b/tests/gateway/test_session_state_cleanup.py
new file mode 100644
index 0000000000..3c708736c3
--- /dev/null
+++ b/tests/gateway/test_session_state_cleanup.py
@@ -0,0 +1,231 @@
+"""Regression tests for _release_running_agent_state and SessionDB shutdown.
+
+Before this change, running-agent state lived in three dicts that drifted
+out of sync:
+
+ self._running_agents — AIAgent instance per session key
+ self._running_agents_ts — start timestamp per session key
+ self._busy_ack_ts — last busy-ack timestamp per session key
+
+Six cleanup sites did ``del self._running_agents[key]`` without touching
+the other two; one site only popped ``_running_agents`` and
+``_running_agents_ts``; and only the stale-eviction site cleaned all
+three. Each missed entry was a small persistent leak.
+
+Also: SessionDB connections were never closed on gateway shutdown,
+leaving WAL locks in place until Python actually exited.
+"""
+
+import threading
+from unittest.mock import MagicMock
+
+import pytest
+
+
+def _make_runner():
+ """Bare GatewayRunner wired with just the state the helper touches."""
+ from gateway.run import GatewayRunner
+
+ runner = GatewayRunner.__new__(GatewayRunner)
+ runner._running_agents = {}
+ runner._running_agents_ts = {}
+ runner._busy_ack_ts = {}
+ return runner
+
+
+class TestReleaseRunningAgentStateUnit:
+ def test_pops_all_three_dicts(self):
+ runner = _make_runner()
+ runner._running_agents["k"] = MagicMock()
+ runner._running_agents_ts["k"] = 123.0
+ runner._busy_ack_ts["k"] = 456.0
+
+ runner._release_running_agent_state("k")
+
+ assert "k" not in runner._running_agents
+ assert "k" not in runner._running_agents_ts
+ assert "k" not in runner._busy_ack_ts
+
+ def test_idempotent_on_missing_key(self):
+ """Calling twice (or on an absent key) must not raise."""
+ runner = _make_runner()
+ runner._release_running_agent_state("missing")
+ runner._release_running_agent_state("missing") # still fine
+
+ def test_noop_on_empty_session_key(self):
+ """Empty string / None key is treated as a no-op."""
+ runner = _make_runner()
+ runner._running_agents[""] = "guard"
+ runner._release_running_agent_state("")
+ # Empty key not processed — guard value survives.
+ assert runner._running_agents[""] == "guard"
+
+ def test_preserves_other_sessions(self):
+ runner = _make_runner()
+ for k in ("a", "b", "c"):
+ runner._running_agents[k] = MagicMock()
+ runner._running_agents_ts[k] = 1.0
+ runner._busy_ack_ts[k] = 1.0
+
+ runner._release_running_agent_state("b")
+
+ assert set(runner._running_agents.keys()) == {"a", "c"}
+ assert set(runner._running_agents_ts.keys()) == {"a", "c"}
+ assert set(runner._busy_ack_ts.keys()) == {"a", "c"}
+
+ def test_handles_missing_busy_ack_attribute(self):
+ """Backward-compatible with older runners lacking _busy_ack_ts."""
+ runner = _make_runner()
+ del runner._busy_ack_ts # simulate older version
+ runner._running_agents["k"] = MagicMock()
+ runner._running_agents_ts["k"] = 1.0
+
+ runner._release_running_agent_state("k") # should not raise
+
+ assert "k" not in runner._running_agents
+ assert "k" not in runner._running_agents_ts
+
+ def test_concurrent_release_is_safe(self):
+ """Multiple threads releasing different keys concurrently."""
+ runner = _make_runner()
+ for i in range(50):
+ k = f"s{i}"
+ runner._running_agents[k] = MagicMock()
+ runner._running_agents_ts[k] = float(i)
+ runner._busy_ack_ts[k] = float(i)
+
+ def worker(keys):
+ for k in keys:
+ runner._release_running_agent_state(k)
+
+ threads = [
+ threading.Thread(target=worker, args=([f"s{i}" for i in range(start, 50, 5)],))
+ for start in range(5)
+ ]
+ for t in threads:
+ t.start()
+ for t in threads:
+ t.join(timeout=5)
+ assert not t.is_alive()
+
+ assert runner._running_agents == {}
+ assert runner._running_agents_ts == {}
+ assert runner._busy_ack_ts == {}
+
+
+class TestNoMoreBareDeleteSites:
+ """Regression: all bare `del self._running_agents[key]` sites were
+ converted to use the helper. If a future contributor reverts one,
+ this test flags it. Docstrings / comments mentioning the old
+ pattern are allowed.
+ """
+
+ def test_no_bare_del_of_running_agents_in_gateway_run(self):
+ from pathlib import Path
+ import re
+
+ gateway_run = (Path(__file__).parent.parent.parent / "gateway" / "run.py").read_text()
+ # Match `del self._running_agents[...]` that is NOT inside a
+ # triple-quoted docstring. We scan non-docstring lines only.
+ lines = gateway_run.splitlines()
+
+ in_docstring = False
+ docstring_delim = None
+ offenders = []
+ for idx, line in enumerate(lines, start=1):
+ stripped = line.strip()
+ if not in_docstring:
+ if stripped.startswith('"""') or stripped.startswith("'''"):
+ delim = stripped[:3]
+ # single-line docstring?
+ if stripped.count(delim) >= 2:
+ continue
+ in_docstring = True
+ docstring_delim = delim
+ continue
+ if re.search(r"\bdel\s+self\._running_agents\[", line):
+ offenders.append((idx, line.rstrip()))
+ else:
+ if docstring_delim and docstring_delim in stripped:
+ in_docstring = False
+ docstring_delim = None
+
+ assert offenders == [], (
+ "Found bare `del self._running_agents[...]` sites in gateway/run.py. "
+ "Use self._release_running_agent_state(session_key) instead so "
+ "_running_agents_ts and _busy_ack_ts are popped in lockstep.\n"
+ + "\n".join(f" line {n}: {l}" for n, l in offenders)
+ )
+
+
+class TestSessionDbCloseOnShutdown:
+ """_stop_impl should call .close() on both self._session_db and
+ self.session_store._db to release SQLite WAL locks before the new
+ gateway (during --replace restart) tries to open the same file.
+ """
+
+ def test_stop_impl_closes_both_session_dbs(self):
+ """Run the exact shutdown block that closes SessionDBs and verify
+ .close() was called on both holders."""
+ from gateway.run import GatewayRunner
+
+ runner = GatewayRunner.__new__(GatewayRunner)
+
+ runner_db = MagicMock()
+ store_db = MagicMock()
+
+ runner._db = runner_db
+ runner.session_store = MagicMock()
+ runner.session_store._db = store_db
+
+ # Replicate the exact production loop from _stop_impl.
+ for _db_holder in (runner, getattr(runner, "session_store", None)):
+ _db = getattr(_db_holder, "_db", None) if _db_holder else None
+ if _db is None or not hasattr(_db, "close"):
+ continue
+ _db.close()
+
+ runner_db.close.assert_called_once()
+ store_db.close.assert_called_once()
+
+ def test_shutdown_tolerates_missing_session_store(self):
+ """Gateway without a session_store attribute must not crash on shutdown."""
+ from gateway.run import GatewayRunner
+
+ runner = GatewayRunner.__new__(GatewayRunner)
+ runner._db = MagicMock()
+ # Deliberately no session_store attribute.
+
+ for _db_holder in (runner, getattr(runner, "session_store", None)):
+ _db = getattr(_db_holder, "_db", None) if _db_holder else None
+ if _db is None or not hasattr(_db, "close"):
+ continue
+ _db.close()
+
+ runner._db.close.assert_called_once()
+
+ def test_shutdown_tolerates_close_raising(self):
+ """A close() that raises must not prevent subsequent cleanup."""
+ from gateway.run import GatewayRunner
+
+ runner = GatewayRunner.__new__(GatewayRunner)
+ flaky_db = MagicMock()
+ flaky_db.close.side_effect = RuntimeError("simulated lock error")
+ healthy_db = MagicMock()
+
+ runner._db = flaky_db
+ runner.session_store = MagicMock()
+ runner.session_store._db = healthy_db
+
+ # Same pattern as production: try/except around each close().
+ for _db_holder in (runner, getattr(runner, "session_store", None)):
+ _db = getattr(_db_holder, "_db", None) if _db_holder else None
+ if _db is None or not hasattr(_db, "close"):
+ continue
+ try:
+ _db.close()
+ except Exception:
+ pass
+
+ flaky_db.close.assert_called_once()
+ healthy_db.close.assert_called_once()
diff --git a/tests/gateway/test_session_store_prune.py b/tests/gateway/test_session_store_prune.py
new file mode 100644
index 0000000000..9b1dca2971
--- /dev/null
+++ b/tests/gateway/test_session_store_prune.py
@@ -0,0 +1,270 @@
+"""Tests for SessionStore.prune_old_entries and the gateway watcher that calls it.
+
+The SessionStore in-memory dict (and its backing sessions.json) grew
+unbounded — every unique (platform, chat_id, thread_id, user_id) tuple
+ever seen was kept forever, regardless of how stale it became. These
+tests pin the prune behaviour:
+
+ * Entries older than max_age_days (by updated_at) are removed
+ * Entries marked ``suspended`` are preserved (user-paused)
+ * Entries with an active process attached are preserved
+ * max_age_days <= 0 disables pruning entirely
+ * sessions.json is rewritten with the post-prune dict
+ * The ``updated_at`` field — not ``created_at`` — drives the decision
+ (so a long-running-but-still-active session isn't pruned)
+"""
+
+import json
+import threading
+from datetime import datetime, timedelta
+from unittest.mock import patch
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform, SessionResetPolicy
+from gateway.session import SessionEntry, SessionStore
+
+
+def _make_store(tmp_path, max_age_days: int = 90, has_active_processes_fn=None):
+ """Build a SessionStore bypassing SQLite/disk-load side effects."""
+ config = GatewayConfig(
+ default_reset_policy=SessionResetPolicy(mode="none"),
+ session_store_max_age_days=max_age_days,
+ )
+ with patch("gateway.session.SessionStore._ensure_loaded"):
+ store = SessionStore(
+ sessions_dir=tmp_path,
+ config=config,
+ has_active_processes_fn=has_active_processes_fn,
+ )
+ store._db = None
+ store._loaded = True
+ return store
+
+
+def _entry(key: str, age_days: float, *, suspended: bool = False,
+ session_id: str | None = None) -> SessionEntry:
+ now = datetime.now()
+ return SessionEntry(
+ session_key=key,
+ session_id=session_id or f"sid_{key}",
+ created_at=now - timedelta(days=age_days + 30), # arbitrary older
+ updated_at=now - timedelta(days=age_days),
+ platform=Platform.TELEGRAM,
+ chat_type="dm",
+ suspended=suspended,
+ )
+
+
+class TestPruneBasics:
+ def test_prune_removes_entries_past_max_age(self, tmp_path):
+ store = _make_store(tmp_path)
+ store._entries["old"] = _entry("old", age_days=100)
+ store._entries["fresh"] = _entry("fresh", age_days=5)
+
+ removed = store.prune_old_entries(max_age_days=90)
+
+ assert removed == 1
+ assert "old" not in store._entries
+ assert "fresh" in store._entries
+
+ def test_prune_uses_updated_at_not_created_at(self, tmp_path):
+ """A session created long ago but updated recently must be kept."""
+ store = _make_store(tmp_path)
+ now = datetime.now()
+ entry = SessionEntry(
+ session_key="long-lived",
+ session_id="sid",
+ created_at=now - timedelta(days=365), # ancient
+ updated_at=now - timedelta(days=3), # but just chatted
+ platform=Platform.TELEGRAM,
+ chat_type="dm",
+ )
+ store._entries["long-lived"] = entry
+
+ removed = store.prune_old_entries(max_age_days=30)
+
+ assert removed == 0
+ assert "long-lived" in store._entries
+
+ def test_prune_disabled_when_max_age_is_zero(self, tmp_path):
+ store = _make_store(tmp_path, max_age_days=0)
+ for i in range(5):
+ store._entries[f"s{i}"] = _entry(f"s{i}", age_days=365)
+
+ assert store.prune_old_entries(0) == 0
+ assert len(store._entries) == 5
+
+ def test_prune_disabled_when_max_age_is_negative(self, tmp_path):
+ store = _make_store(tmp_path)
+ store._entries["s"] = _entry("s", age_days=365)
+
+ assert store.prune_old_entries(-1) == 0
+ assert "s" in store._entries
+
+ def test_prune_skips_suspended_entries(self, tmp_path):
+ """/stop-suspended sessions must be kept for later resume."""
+ store = _make_store(tmp_path)
+ store._entries["suspended"] = _entry(
+ "suspended", age_days=1000, suspended=True
+ )
+ store._entries["idle"] = _entry("idle", age_days=1000)
+
+ removed = store.prune_old_entries(max_age_days=90)
+
+ assert removed == 1
+ assert "suspended" in store._entries
+ assert "idle" not in store._entries
+
+ def test_prune_skips_entries_with_active_processes(self, tmp_path):
+ """Sessions with active bg processes aren't pruned even if old."""
+ active_session_ids = {"sid_active"}
+
+ def _has_active(session_id: str) -> bool:
+ return session_id in active_session_ids
+
+ store = _make_store(tmp_path, has_active_processes_fn=_has_active)
+ store._entries["active"] = _entry(
+ "active", age_days=1000, session_id="sid_active"
+ )
+ store._entries["idle"] = _entry(
+ "idle", age_days=1000, session_id="sid_idle"
+ )
+
+ removed = store.prune_old_entries(max_age_days=90)
+
+ assert removed == 1
+ assert "active" in store._entries
+ assert "idle" not in store._entries
+
+ def test_prune_does_not_write_disk_when_no_removals(self, tmp_path):
+ """If nothing is evictable, _save() should NOT be called."""
+ store = _make_store(tmp_path)
+ store._entries["fresh1"] = _entry("fresh1", age_days=1)
+ store._entries["fresh2"] = _entry("fresh2", age_days=2)
+
+ save_calls = []
+ store._save = lambda: save_calls.append(1)
+
+ assert store.prune_old_entries(max_age_days=90) == 0
+ assert save_calls == []
+
+ def test_prune_writes_disk_after_removal(self, tmp_path):
+ store = _make_store(tmp_path)
+ store._entries["stale"] = _entry("stale", age_days=500)
+ store._entries["fresh"] = _entry("fresh", age_days=1)
+
+ save_calls = []
+ store._save = lambda: save_calls.append(1)
+
+ store.prune_old_entries(max_age_days=90)
+ assert save_calls == [1]
+
+ def test_prune_is_thread_safe(self, tmp_path):
+ """Prune acquires _lock internally; concurrent update_session is safe."""
+ store = _make_store(tmp_path)
+ for i in range(20):
+ age = 1000 if i % 2 == 0 else 1
+ store._entries[f"s{i}"] = _entry(f"s{i}", age_days=age)
+
+ results = []
+
+ def _pruner():
+ results.append(store.prune_old_entries(max_age_days=90))
+
+ def _reader():
+ # Mimic a concurrent update_session reader iterating under lock.
+ with store._lock:
+ list(store._entries.keys())
+
+ threads = [threading.Thread(target=_pruner)]
+ threads += [threading.Thread(target=_reader) for _ in range(4)]
+ for t in threads:
+ t.start()
+ for t in threads:
+ t.join(timeout=5)
+ assert not t.is_alive()
+
+ # Exactly one pruner ran; removed exactly the 10 stale entries.
+ assert results == [10]
+ assert len(store._entries) == 10
+ for i in range(20):
+ if i % 2 == 1: # fresh
+ assert f"s{i}" in store._entries
+
+
+class TestPrunePersistsToDisk:
+ def test_prune_rewrites_sessions_json(self, tmp_path):
+ """After prune, sessions.json on disk reflects the new dict."""
+ config = GatewayConfig(
+ default_reset_policy=SessionResetPolicy(mode="none"),
+ session_store_max_age_days=90,
+ )
+ store = SessionStore(sessions_dir=tmp_path, config=config)
+ store._db = None
+ # Force-populate without calling get_or_create to avoid DB side-effects
+ store._entries["stale"] = _entry("stale", age_days=500)
+ store._entries["fresh"] = _entry("fresh", age_days=1)
+ store._loaded = True
+ store._save()
+
+ # Verify pre-prune state on disk.
+ saved_pre = json.loads((tmp_path / "sessions.json").read_text())
+ assert set(saved_pre.keys()) == {"stale", "fresh"}
+
+ # Prune and check disk.
+ store.prune_old_entries(max_age_days=90)
+ saved_post = json.loads((tmp_path / "sessions.json").read_text())
+ assert set(saved_post.keys()) == {"fresh"}
+
+
+class TestGatewayConfigSerialization:
+ def test_session_store_max_age_days_defaults_to_90(self):
+ cfg = GatewayConfig()
+ assert cfg.session_store_max_age_days == 90
+
+ def test_session_store_max_age_days_roundtrips(self):
+ cfg = GatewayConfig(session_store_max_age_days=30)
+ restored = GatewayConfig.from_dict(cfg.to_dict())
+ assert restored.session_store_max_age_days == 30
+
+ def test_session_store_max_age_days_missing_defaults_90(self):
+ """Loading an old config (pre-this-field) falls back to default."""
+ restored = GatewayConfig.from_dict({})
+ assert restored.session_store_max_age_days == 90
+
+ def test_session_store_max_age_days_negative_coerced_to_zero(self):
+ """A negative value (accidental or hostile) becomes 0 (disabled)."""
+ restored = GatewayConfig.from_dict({"session_store_max_age_days": -5})
+ assert restored.session_store_max_age_days == 0
+
+ def test_session_store_max_age_days_bad_type_falls_back(self):
+ """Non-int values fall back to the default, not a crash."""
+ restored = GatewayConfig.from_dict({"session_store_max_age_days": "nope"})
+ assert restored.session_store_max_age_days == 90
+
+
+class TestGatewayWatcherCallsPrune:
+ """The session_expiry_watcher should call prune_old_entries once per hour."""
+
+ def test_prune_gate_fires_on_first_tick(self):
+ """First watcher tick has _last_prune_ts=0, so the gate opens."""
+ import time as _t
+
+ last_ts = 0.0
+ prune_interval = 3600.0
+ now = _t.time()
+
+ # Mirror the production gate check in _session_expiry_watcher.
+ should_prune = (now - last_ts) > prune_interval
+ assert should_prune is True
+
+ def test_prune_gate_suppresses_within_interval(self):
+ import time as _t
+
+ last_ts = _t.time() - 600 # 10 minutes ago
+ prune_interval = 3600.0
+ now = _t.time()
+
+ should_prune = (now - last_ts) > prune_interval
+ assert should_prune is False
diff --git a/tests/gateway/test_telegram_format.py b/tests/gateway/test_telegram_format.py
index 1bd889b7c8..ce7e02a474 100644
--- a/tests/gateway/test_telegram_format.py
+++ b/tests/gateway/test_telegram_format.py
@@ -34,7 +34,12 @@ def _ensure_telegram_mock():
_ensure_telegram_mock()
-from gateway.platforms.telegram import TelegramAdapter, _escape_mdv2, _strip_mdv2 # noqa: E402
+from gateway.platforms.telegram import ( # noqa: E402
+ TelegramAdapter,
+ _escape_mdv2,
+ _strip_mdv2,
+ _wrap_markdown_tables,
+)
# ---------------------------------------------------------------------------
@@ -535,6 +540,152 @@ class TestStripMdv2:
assert _strip_mdv2("||hidden text||") == "hidden text"
+# =========================================================================
+# Markdown table auto-wrap
+# =========================================================================
+
+
+class TestWrapMarkdownTables:
+ """_wrap_markdown_tables wraps GFM pipe tables in ``` fences so
+ Telegram renders them as monospace preformatted text instead of the
+ noisy backslash-pipe mess MarkdownV2 produces."""
+
+ def test_basic_table_wrapped(self):
+ text = (
+ "Scores:\n\n"
+ "| Player | Score |\n"
+ "|--------|-------|\n"
+ "| Alice | 150 |\n"
+ "| Bob | 120 |\n"
+ "\nEnd."
+ )
+ out = _wrap_markdown_tables(text)
+ # Table is now wrapped in a fence
+ assert "```\n| Player | Score |" in out
+ assert "| Bob | 120 |\n```" in out
+ # Surrounding prose is preserved
+ assert out.startswith("Scores:")
+ assert out.endswith("End.")
+
+ def test_bare_pipe_table_wrapped(self):
+ """Tables without outer pipes (GFM allows this) are still detected."""
+ text = "head1 | head2\n--- | ---\na | b\nc | d"
+ out = _wrap_markdown_tables(text)
+ assert out.startswith("```\n")
+ assert out.rstrip().endswith("```")
+ assert "head1 | head2" in out
+
+ def test_alignment_separators(self):
+ """Separator rows with :--- / ---: / :---: alignment markers match."""
+ text = (
+ "| Name | Age | City |\n"
+ "|:-----|----:|:----:|\n"
+ "| Ada | 30 | NYC |"
+ )
+ out = _wrap_markdown_tables(text)
+ assert out.count("```") == 2
+
+ def test_two_consecutive_tables_wrapped_separately(self):
+ text = (
+ "| A | B |\n"
+ "|---|---|\n"
+ "| 1 | 2 |\n"
+ "\n"
+ "| X | Y |\n"
+ "|---|---|\n"
+ "| 9 | 8 |"
+ )
+ out = _wrap_markdown_tables(text)
+ # Four fences total — one opening + closing per table
+ assert out.count("```") == 4
+
+ def test_plain_text_with_pipes_not_wrapped(self):
+ """A bare pipe in prose must NOT trigger wrapping."""
+ text = "Use the | pipe operator to chain commands."
+ assert _wrap_markdown_tables(text) == text
+
+ def test_horizontal_rule_not_wrapped(self):
+ """A lone '---' horizontal rule must not be mistaken for a separator."""
+ text = "Section A\n\n---\n\nSection B"
+ assert _wrap_markdown_tables(text) == text
+
+ def test_existing_code_block_with_pipes_left_alone(self):
+ """A table already inside a fenced code block must not be re-wrapped."""
+ text = (
+ "```\n"
+ "| a | b |\n"
+ "|---|---|\n"
+ "| 1 | 2 |\n"
+ "```"
+ )
+ assert _wrap_markdown_tables(text) == text
+
+ def test_no_pipe_character_short_circuits(self):
+ text = "Plain **bold** text with no table."
+ assert _wrap_markdown_tables(text) == text
+
+ def test_no_dash_short_circuits(self):
+ text = "a | b\nc | d" # has pipes but no '-' separator row
+ assert _wrap_markdown_tables(text) == text
+
+ def test_single_column_separator_not_matched(self):
+ """Single-column tables (rare) are not detected — we require at
+ least one internal pipe in the separator row to avoid false
+ positives on formatting rules."""
+ text = "| a |\n| - |\n| b |"
+ assert _wrap_markdown_tables(text) == text
+
+
+class TestFormatMessageTables:
+ """End-to-end: a pipe table passes through format_message with its
+ pipes and dashes left alone inside the fence, not mangled by MarkdownV2
+ escaping."""
+
+ def test_table_rendered_as_code_block(self, adapter):
+ text = (
+ "Data:\n\n"
+ "| Col1 | Col2 |\n"
+ "|------|------|\n"
+ "| A | B |\n"
+ )
+ out = adapter.format_message(text)
+ # Pipes inside the fenced block are NOT escaped
+ assert "```\n| Col1 | Col2 |" in out
+ assert "\\|" not in out.split("```")[1]
+ # Dashes in separator not escaped inside fence
+ assert "\\-" not in out.split("```")[1]
+
+ def test_text_after_table_still_formatted(self, adapter):
+ text = (
+ "| A | B |\n"
+ "|---|---|\n"
+ "| 1 | 2 |\n"
+ "\n"
+ "Nice **work** team!"
+ )
+ out = adapter.format_message(text)
+ # MarkdownV2 bold conversion still happens outside the table
+ assert "*work*" in out
+ # Exclamation outside fence is escaped
+ assert "\\!" in out
+
+ def test_multiple_tables_in_single_message(self, adapter):
+ text = (
+ "First:\n"
+ "| A | B |\n"
+ "|---|---|\n"
+ "| 1 | 2 |\n"
+ "\n"
+ "Second:\n"
+ "| X | Y |\n"
+ "|---|---|\n"
+ "| 9 | 8 |\n"
+ )
+ out = adapter.format_message(text)
+ # Two separate fenced blocks in the output
+ assert out.count("```") == 4
+
+
@pytest.mark.asyncio
async def test_send_escapes_chunk_indicator_for_markdownv2(adapter):
adapter.MAX_MESSAGE_LENGTH = 80
diff --git a/tests/hermes_cli/test_api_key_providers.py b/tests/hermes_cli/test_api_key_providers.py
index 97deab89e4..c56edc4bb2 100644
--- a/tests/hermes_cli/test_api_key_providers.py
+++ b/tests/hermes_cli/test_api_key_providers.py
@@ -33,6 +33,7 @@ class TestProviderRegistry:
("huggingface", "Hugging Face", "api_key"),
("zai", "Z.AI / GLM", "api_key"),
("xai", "xAI", "api_key"),
+ ("nvidia", "NVIDIA NIM", "api_key"),
("kimi-coding", "Kimi / Moonshot", "api_key"),
("minimax", "MiniMax", "api_key"),
("minimax-cn", "MiniMax (China)", "api_key"),
@@ -57,6 +58,12 @@ class TestProviderRegistry:
assert pconfig.base_url_env_var == "XAI_BASE_URL"
assert pconfig.inference_base_url == "https://api.x.ai/v1"
+ def test_nvidia_env_vars(self):
+ pconfig = PROVIDER_REGISTRY["nvidia"]
+ assert pconfig.api_key_env_vars == ("NVIDIA_API_KEY",)
+ assert pconfig.base_url_env_var == "NVIDIA_BASE_URL"
+ assert pconfig.inference_base_url == "https://integrate.api.nvidia.com/v1"
+
def test_copilot_env_vars(self):
pconfig = PROVIDER_REGISTRY["copilot"]
assert pconfig.api_key_env_vars == ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN")
diff --git a/tests/hermes_cli/test_gemini_provider.py b/tests/hermes_cli/test_gemini_provider.py
index b448ca513f..089a5cf98d 100644
--- a/tests/hermes_cli/test_gemini_provider.py
+++ b/tests/hermes_cli/test_gemini_provider.py
@@ -178,10 +178,6 @@ class TestGeminiContextLength:
ctx = get_model_context_length("gemma-4-31b-it", provider="gemini")
assert ctx == 256000
- def test_gemma_4_26b_context(self):
- ctx = get_model_context_length("gemma-4-26b-it", provider="gemini")
- assert ctx == 256000
-
def test_gemini_3_context(self):
ctx = get_model_context_length("gemini-3.1-pro-preview", provider="gemini")
assert ctx == 1048576
diff --git a/tests/hermes_cli/test_update_gateway_restart.py b/tests/hermes_cli/test_update_gateway_restart.py
index f3f2a0444a..6e10d56222 100644
--- a/tests/hermes_cli/test_update_gateway_restart.py
+++ b/tests/hermes_cli/test_update_gateway_restart.py
@@ -13,9 +13,29 @@ from unittest.mock import patch, MagicMock
import pytest
import hermes_cli.gateway as gateway_cli
+import hermes_cli.main as cli_main
from hermes_cli.main import cmd_update
+# ---------------------------------------------------------------------------
+# Skip the real-time sleeps inside cmd_update's restart-verification path
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def _no_restart_verify_sleep(monkeypatch):
+ """hermes_cli/main.py uses time.sleep(3) after systemctl restart to
+ verify the service survived. Tests mock subprocess.run — nothing
+ actually restarts — so the 3s wait is dead time.
+
+ main.py does ``import time as _time`` at both module level (line 167)
+ and inside functions (lines 3281, 4384, 4401). Patching the global
+ ``time.sleep`` affects only the duration of this test.
+ """
+ import time as _real_time
+ monkeypatch.setattr(_real_time, "sleep", lambda *_a, **_k: None)
+
+
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
diff --git a/tests/plugins/test_retaindb_plugin.py b/tests/plugins/test_retaindb_plugin.py
index 9ad801769b..5d517bce77 100644
--- a/tests/plugins/test_retaindb_plugin.py
+++ b/tests/plugins/test_retaindb_plugin.py
@@ -31,6 +31,31 @@ def _isolate_env(tmp_path, monkeypatch):
monkeypatch.delenv("RETAINDB_PROJECT", raising=False)
+@pytest.fixture(autouse=True)
+def _cap_retaindb_sleeps(monkeypatch):
+ """Cap production-code sleeps so background-thread tests run fast.
+
+ The retaindb ``_WriteQueue._flush_row`` does ``time.sleep(2)`` after
+ errors. Across multiple tests that trigger the retry path, that adds
+ up. Cap the module's bound ``time.sleep`` to 0.05s — tests don't care
+ about the exact retry delay, only that it happens. The test file's
+ own ``time.sleep`` stays real since it uses a different reference.
+ """
+ try:
+ from plugins.memory import retaindb as _retaindb
+ except ImportError:
+ return
+
+ real_sleep = _retaindb.time.sleep
+
+ def _capped_sleep(seconds):
+ return real_sleep(min(float(seconds), 0.05))
+
+ import types as _types
+ fake_time = _types.SimpleNamespace(sleep=_capped_sleep, time=_retaindb.time.time)
+ monkeypatch.setattr(_retaindb, "time", fake_time)
+
+
# We need the repo root on sys.path so the plugin can import agent.memory_provider
import sys
_repo_root = str(Path(__file__).resolve().parents[2])
@@ -130,16 +155,18 @@ class TestWriteQueue:
def test_enqueue_creates_row(self, tmp_path):
q, client, db_path = self._make_queue(tmp_path)
q.enqueue("user1", "sess1", [{"role": "user", "content": "hi"}])
- # Give the writer thread a moment to process
- time.sleep(1)
+ # shutdown() blocks until the writer thread drains the queue — no need
+ # to pre-sleep (the old 1s sleep was a just-in-case wait, but shutdown
+ # does the right thing).
q.shutdown()
# If ingest succeeded, the row should be deleted
client.ingest_session.assert_called_once()
def test_enqueue_persists_to_sqlite(self, tmp_path):
client = MagicMock()
- # Make ingest hang so the row stays in SQLite
- client.ingest_session = MagicMock(side_effect=lambda *a, **kw: time.sleep(5))
+ # Make ingest slow so the row is still in SQLite when we peek.
+ # 0.5s is plenty — the test just needs the flush to still be in-flight.
+ client.ingest_session = MagicMock(side_effect=lambda *a, **kw: time.sleep(0.5))
db_path = tmp_path / "test_queue.db"
q = _WriteQueue(client, db_path)
q.enqueue("user1", "sess1", [{"role": "user", "content": "test"}])
@@ -154,8 +181,7 @@ class TestWriteQueue:
def test_flush_deletes_row_on_success(self, tmp_path):
q, client, db_path = self._make_queue(tmp_path)
q.enqueue("user1", "sess1", [{"role": "user", "content": "hi"}])
- time.sleep(1)
- q.shutdown()
+ q.shutdown() # blocks until drain
# Row should be gone
conn = sqlite3.connect(str(db_path))
rows = conn.execute("SELECT COUNT(*) FROM pending").fetchone()[0]
@@ -168,14 +194,20 @@ class TestWriteQueue:
db_path = tmp_path / "test_queue.db"
q = _WriteQueue(client, db_path)
q.enqueue("user1", "sess1", [{"role": "user", "content": "hi"}])
- time.sleep(3) # Allow retry + sleep(2) in _flush_row
+ # Poll for the error to be recorded (max 2s), instead of a fixed 3s wait.
+ deadline = time.time() + 2.0
+ last_error = None
+ while time.time() < deadline:
+ conn = sqlite3.connect(str(db_path))
+ row = conn.execute("SELECT last_error FROM pending").fetchone()
+ conn.close()
+ if row and row[0]:
+ last_error = row[0]
+ break
+ time.sleep(0.05)
q.shutdown()
- # Row should still exist with error recorded
- conn = sqlite3.connect(str(db_path))
- row = conn.execute("SELECT last_error FROM pending").fetchone()
- conn.close()
- assert row is not None
- assert "API down" in row[0]
+ assert last_error is not None
+ assert "API down" in last_error
def test_thread_local_connection_reuse(self, tmp_path):
q, _, _ = self._make_queue(tmp_path)
@@ -193,14 +225,27 @@ class TestWriteQueue:
client1.ingest_session = MagicMock(side_effect=RuntimeError("fail"))
q1 = _WriteQueue(client1, db_path)
q1.enqueue("user1", "sess1", [{"role": "user", "content": "lost turn"}])
- time.sleep(3)
+ # Wait until the error is recorded (poll with short interval).
+ deadline = time.time() + 2.0
+ while time.time() < deadline:
+ conn = sqlite3.connect(str(db_path))
+ row = conn.execute("SELECT last_error FROM pending").fetchone()
+ conn.close()
+ if row and row[0]:
+ break
+ time.sleep(0.05)
q1.shutdown()
# Now create a new queue — it should replay the pending rows
client2 = MagicMock()
client2.ingest_session = MagicMock(return_value={"status": "ok"})
q2 = _WriteQueue(client2, db_path)
- time.sleep(2)
+ # Poll for the replay to happen.
+ deadline = time.time() + 2.0
+ while time.time() < deadline:
+ if client2.ingest_session.called:
+ break
+ time.sleep(0.05)
q2.shutdown()
# The replayed row should have been ingested via client2
diff --git a/tests/run_agent/conftest.py b/tests/run_agent/conftest.py
new file mode 100644
index 0000000000..9b431869bf
--- /dev/null
+++ b/tests/run_agent/conftest.py
@@ -0,0 +1,34 @@
+"""Fast-path fixtures shared across tests/run_agent/.
+
+Many tests in this directory exercise the retry/backoff paths in the
+agent loop. Production code uses ``jittered_backoff(base_delay=5.0)``
+with a ``while time.time() < sleep_end`` loop — a single retry test
+spends 5+ seconds of real wall-clock time on backoff waits.
+
+Mocking ``jittered_backoff`` to return 0.0 collapses the while-loop
+to a no-op (``time.time() < time.time() + 0`` is false immediately),
+which handles the most common case without touching ``time.sleep``.
+
+We deliberately DO NOT mock ``time.sleep`` here — some tests
+(test_interrupt_propagation, test_primary_runtime_restore, etc.) use
+the real ``time.sleep`` for threading coordination or assert that it
+was called with specific values. Tests that want to additionally
+fast-path direct ``time.sleep(N)`` calls in production code should
+monkeypatch ``run_agent.time.sleep`` locally (see
+``test_anthropic_error_handling.py`` for the pattern).
+"""
+
+from __future__ import annotations
+
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def _fast_retry_backoff(monkeypatch):
+ """Short-circuit retry backoff for all tests in this directory."""
+ try:
+ import run_agent
+ except ImportError:
+ return
+
+ monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)
diff --git a/tests/run_agent/test_413_compression.py b/tests/run_agent/test_413_compression.py
index e8835c6412..8bd357d3d2 100644
--- a/tests/run_agent/test_413_compression.py
+++ b/tests/run_agent/test_413_compression.py
@@ -19,6 +19,24 @@ import pytest
from agent.context_compressor import SUMMARY_PREFIX
from run_agent import AIAgent
+import run_agent
+
+
+# ---------------------------------------------------------------------------
+# Fast backoff for compression retry tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def _no_compression_sleep(monkeypatch):
+ """Short-circuit the 2s time.sleep between compression retries.
+
+ Production code has ``time.sleep(2)`` in multiple places after a 413/context
+ compression, for rate-limit smoothing. Tests assert behavior, not timing.
+ """
+ import time as _time
+ monkeypatch.setattr(_time, "sleep", lambda *_a, **_k: None)
+ monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)
# ---------------------------------------------------------------------------
diff --git a/tests/run_agent/test_anthropic_error_handling.py b/tests/run_agent/test_anthropic_error_handling.py
index 00055928e0..cdf3372544 100644
--- a/tests/run_agent/test_anthropic_error_handling.py
+++ b/tests/run_agent/test_anthropic_error_handling.py
@@ -27,6 +27,39 @@ from gateway.config import Platform
from gateway.session import SessionSource
+# ---------------------------------------------------------------------------
+# Fast backoff for tests that exercise the retry loop
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def _no_backoff_wait(monkeypatch):
+ """Short-circuit retry backoff so tests don't block on real wall-clock waits.
+
+ The production code uses jittered_backoff() with a 5s base delay plus a
+ tight time.sleep(0.2) loop. Without this patch, each 429/500/529 retry
+ test burns ~10s of real time on CI — across six tests that's ~60s for
+ behavior we're not asserting against timing.
+
+ Tests assert retry counts and final results, never wait durations.
+ """
+ import asyncio as _asyncio
+ import time as _time
+
+ monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)
+ monkeypatch.setattr(_time, "sleep", lambda *_a, **_k: None)
+
+ # Also fast-path asyncio.sleep — the gateway's _run_agent path has
+ # several await asyncio.sleep(...) calls that add real wall-clock time.
+ _real_asyncio_sleep = _asyncio.sleep
+
+ async def _fast_sleep(delay=0, *args, **kwargs):
+ # Yield to the event loop but skip the actual delay.
+ await _real_asyncio_sleep(0)
+
+ monkeypatch.setattr(_asyncio, "sleep", _fast_sleep)
+
+
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
diff --git a/tests/run_agent/test_exit_cleanup_interrupt.py b/tests/run_agent/test_exit_cleanup_interrupt.py
index 6a5d7b363a..1e5d8431c3 100644
--- a/tests/run_agent/test_exit_cleanup_interrupt.py
+++ b/tests/run_agent/test_exit_cleanup_interrupt.py
@@ -13,6 +13,24 @@ from unittest.mock import MagicMock, patch, call
import pytest
+@pytest.fixture(autouse=True)
+def _mock_runtime_provider(monkeypatch):
+ """run_job calls resolve_runtime_provider which can try real network
+ auto-detection (~4s of socket timeouts in hermetic CI). Mock it out
+ since these tests don't care about provider resolution — the agent
+ is mocked too."""
+ import hermes_cli.runtime_provider as rp
+ def _fake_resolve(*args, **kwargs):
+ return {
+ "provider": "openrouter",
+ "api_key": "test-key",
+ "base_url": "https://openrouter.ai/api/v1",
+ "model": "test/model",
+ "api_mode": "chat_completions",
+ }
+ monkeypatch.setattr(rp, "resolve_runtime_provider", _fake_resolve)
+
+
class TestCronJobCleanup:
"""cron/scheduler.py — end_session + close in the finally block."""
diff --git a/tests/run_agent/test_fallback_model.py b/tests/run_agent/test_fallback_model.py
index 6491bd686d..d2aec022ef 100644
--- a/tests/run_agent/test_fallback_model.py
+++ b/tests/run_agent/test_fallback_model.py
@@ -11,6 +11,16 @@ from unittest.mock import MagicMock, patch
import pytest
from run_agent import AIAgent
+import run_agent
+
+
+@pytest.fixture(autouse=True)
+def _no_fallback_wait(monkeypatch):
+ """Short-circuit time.sleep in fallback/recovery paths so tests don't
+ block on the ``min(3 + retry_count, 8)`` wait before a primary retry."""
+ import time as _time
+ monkeypatch.setattr(_time, "sleep", lambda *_a, **_k: None)
+ monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)
def _make_tool_defs(*names: str) -> list:
diff --git a/tests/run_agent/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py
index 4ff00018d2..81213aaf67 100644
--- a/tests/run_agent/test_run_agent_codex_responses.py
+++ b/tests/run_agent/test_run_agent_codex_responses.py
@@ -12,6 +12,15 @@ sys.modules.setdefault("fal_client", types.SimpleNamespace())
import run_agent
+@pytest.fixture(autouse=True)
+def _no_codex_backoff(monkeypatch):
+ """Short-circuit retry backoff so Codex retry tests don't block on real
+ wall-clock waits (5s jittered_backoff base delay + tight time.sleep loop)."""
+ import time as _time
+ monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)
+ monkeypatch.setattr(_time, "sleep", lambda *_a, **_k: None)
+
+
def _patch_agent_bootstrap(monkeypatch):
monkeypatch.setattr(
run_agent,
diff --git a/tests/test_timezone.py b/tests/test_timezone.py
index 1af60cbfa2..ffb831617d 100644
--- a/tests/test_timezone.py
+++ b/tests/test_timezone.py
@@ -159,18 +159,34 @@ class TestCodeExecutionTZ:
return _json.dumps({"error": f"unexpected tool call: {function_name}"})
def test_tz_injected_when_configured(self):
- """When HERMES_TIMEZONE is set, child process sees TZ env var."""
+ """When HERMES_TIMEZONE is set, child process sees TZ env var.
+
+ Verified alongside leak-prevention + empty-TZ handling in one
+ subprocess call so we don't pay 3x the subprocess startup cost
+ (each execute_code spawns a real Python subprocess ~3s).
+ """
import json as _json
os.environ["HERMES_TIMEZONE"] = "Asia/Kolkata"
+ # One subprocess, three things checked:
+ # 1) TZ is injected as "Asia/Kolkata"
+ # 2) HERMES_TIMEZONE itself does NOT leak into the child env
+ probe = (
+ 'import os; '
+ 'print("TZ=" + os.environ.get("TZ", "NOT_SET")); '
+ 'print("HERMES_TIMEZONE=" + os.environ.get("HERMES_TIMEZONE", "NOT_SET"))'
+ )
with patch("model_tools.handle_function_call", side_effect=self._mock_handle):
result = _json.loads(self._execute_code(
- code='import os; print(os.environ.get("TZ", "NOT_SET"))',
- task_id="tz-test",
+ code=probe,
+ task_id="tz-combined-test",
enabled_tools=[],
))
assert result["status"] == "success"
- assert "Asia/Kolkata" in result["output"]
+ assert "TZ=Asia/Kolkata" in result["output"]
+ assert "HERMES_TIMEZONE=NOT_SET" in result["output"], (
+ "HERMES_TIMEZONE should not leak into child env (only TZ)"
+ )
def test_tz_not_injected_when_empty(self):
"""When HERMES_TIMEZONE is not set, child process has no TZ."""
@@ -186,20 +202,6 @@ class TestCodeExecutionTZ:
assert result["status"] == "success"
assert "NOT_SET" in result["output"]
- def test_hermes_timezone_not_leaked_to_child(self):
- """HERMES_TIMEZONE itself must NOT appear in child env (only TZ)."""
- import json as _json
- os.environ["HERMES_TIMEZONE"] = "Asia/Kolkata"
-
- with patch("model_tools.handle_function_call", side_effect=self._mock_handle):
- result = _json.loads(self._execute_code(
- code='import os; print(os.environ.get("HERMES_TIMEZONE", "NOT_SET"))',
- task_id="tz-leak-test",
- enabled_tools=[],
- ))
- assert result["status"] == "success"
- assert "NOT_SET" in result["output"]
-
# =========================================================================
# Cron timezone-aware scheduling
diff --git a/tests/tools/test_local_env_blocklist.py b/tests/tools/test_local_env_blocklist.py
index b196cea781..0377d59b36 100644
--- a/tests/tools/test_local_env_blocklist.py
+++ b/tests/tools/test_local_env_blocklist.py
@@ -86,6 +86,7 @@ class TestProviderEnvBlocklist:
"MINIMAX_API_KEY": "mm-key",
"MINIMAX_CN_API_KEY": "mmcn-key",
"DEEPSEEK_API_KEY": "deepseek-key",
+ "NVIDIA_API_KEY": "nvidia-key",
}
result_env = _run_with_env(extra_os_env=registry_vars)
diff --git a/website/docs/getting-started/quickstart.md b/website/docs/getting-started/quickstart.md
index 1f721586c9..428d23b7ce 100644
--- a/website/docs/getting-started/quickstart.md
+++ b/website/docs/getting-started/quickstart.md
@@ -61,6 +61,7 @@ hermes setup # Or configure everything at once
| **OpenCode Zen** | Pay-as-you-go access to curated models | Set `OPENCODE_ZEN_API_KEY` |
| **OpenCode Go** | $10/month subscription for open models | Set `OPENCODE_GO_API_KEY` |
| **DeepSeek** | Direct DeepSeek API access | Set `DEEPSEEK_API_KEY` |
+| **NVIDIA NIM** | Nemotron models via build.nvidia.com or local NIM | Set `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) |
| **GitHub Copilot** | GitHub Copilot subscription (GPT-5.x, Claude, Gemini, etc.) | OAuth via `hermes model`, or `COPILOT_GITHUB_TOKEN` / `GH_TOKEN` |
| **GitHub Copilot ACP** | Copilot ACP agent backend (spawns local `copilot` CLI) | `hermes model` (requires `copilot` CLI + `copilot login`) |
| **Vercel AI Gateway** | Vercel AI Gateway routing | Set `AI_GATEWAY_API_KEY` |
diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md
index e3d0ad8284..750ad671cd 100644
--- a/website/docs/integrations/providers.md
+++ b/website/docs/integrations/providers.md
@@ -295,6 +295,30 @@ When using xAI as a provider (any base URL containing `x.ai`), Hermes automatica
No configuration is needed — caching activates automatically when an xAI endpoint is detected and a session ID is available. This reduces latency and cost for multi-turn conversations.
+### NVIDIA NIM
+
+Nemotron and other open source models via [build.nvidia.com](https://build.nvidia.com) (free API key) or a local NIM endpoint.
+
+```bash
+# Cloud (build.nvidia.com)
+hermes chat --provider nvidia --model nvidia/nemotron-3-super-120b-a12b
+# Requires: NVIDIA_API_KEY in ~/.hermes/.env
+
+# Local NIM endpoint — override base URL
+NVIDIA_BASE_URL=http://localhost:8000/v1 hermes chat --provider nvidia --model nvidia/nemotron-3-super-120b-a12b
+```
+
+Or set it permanently in `config.yaml`:
+```yaml
+model:
+ provider: "nvidia"
+ default: "nvidia/nemotron-3-super-120b-a12b"
+```
+
+:::tip Local NIM
+For on-prem deployments (DGX Spark, local GPU), set `NVIDIA_BASE_URL=http://localhost:8000/v1`. NIM exposes the same OpenAI-compatible chat completions API as build.nvidia.com, so switching between cloud and local is a one-line env-var change.
+:::
+
### Hugging Face Inference Providers
[Hugging Face Inference Providers](https://huggingface.co/docs/inference-providers) routes to 20+ open models through a unified OpenAI-compatible endpoint (`router.huggingface.co/v1`). Requests are automatically routed to the fastest available backend (Groq, Together, SambaNova, etc.) with automatic failover.
diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md
index 6aa8197dbb..ead884ba7b 100644
--- a/website/docs/reference/environment-variables.md
+++ b/website/docs/reference/environment-variables.md
@@ -290,7 +290,7 @@ For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETI
| `QQ_ALLOWED_USERS` | Comma-separated QQ user openIDs allowed to message the bot |
| `QQ_GROUP_ALLOWED_USERS` | Comma-separated QQ group IDs for group @-message access |
| `QQ_ALLOW_ALL_USERS` | Allow all users (`true`/`false`, overrides `QQ_ALLOWED_USERS`) |
-| `QQ_HOME_CHANNEL` | QQ user/group openID for cron delivery and notifications |
+| `QQBOT_HOME_CHANNEL` | QQ user/group openID for cron delivery and notifications |
| `MATTERMOST_URL` | Mattermost server URL (e.g. `https://mm.example.com`) |
| `MATTERMOST_TOKEN` | Bot token or personal access token for Mattermost |
| `MATTERMOST_ALLOWED_USERS` | Comma-separated Mattermost user IDs allowed to message the bot |
diff --git a/website/docs/user-guide/features/fallback-providers.md b/website/docs/user-guide/features/fallback-providers.md
index 1e2b2a8035..12fde185d4 100644
--- a/website/docs/user-guide/features/fallback-providers.md
+++ b/website/docs/user-guide/features/fallback-providers.md
@@ -47,6 +47,7 @@ Both `provider` and `model` are **required**. If either is missing, the fallback
| MiniMax | `minimax` | `MINIMAX_API_KEY` |
| MiniMax (China) | `minimax-cn` | `MINIMAX_CN_API_KEY` |
| DeepSeek | `deepseek` | `DEEPSEEK_API_KEY` |
+| NVIDIA NIM | `nvidia` | `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) |
| OpenCode Zen | `opencode-zen` | `OPENCODE_ZEN_API_KEY` |
| OpenCode Go | `opencode-go` | `OPENCODE_GO_API_KEY` |
| Kilo Code | `kilocode` | `KILOCODE_API_KEY` |
diff --git a/website/docs/user-guide/messaging/qqbot.md b/website/docs/user-guide/messaging/qqbot.md
index 686fd862e8..d9da90d586 100644
--- a/website/docs/user-guide/messaging/qqbot.md
+++ b/website/docs/user-guide/messaging/qqbot.md
@@ -48,8 +48,8 @@ QQ_CLIENT_SECRET=your-app-secret
|---|---|---|
| `QQ_APP_ID` | QQ Bot App ID (required) | — |
| `QQ_CLIENT_SECRET` | QQ Bot App Secret (required) | — |
-| `QQ_HOME_CHANNEL` | OpenID for cron/notification delivery | — |
-| `QQ_HOME_CHANNEL_NAME` | Display name for home channel | `Home` |
+| `QQBOT_HOME_CHANNEL` | OpenID for cron/notification delivery | — |
+| `QQBOT_HOME_CHANNEL_NAME` | Display name for home channel | `Home` |
| `QQ_ALLOWED_USERS` | Comma-separated user OpenIDs for DM access | open (all users) |
| `QQ_ALLOW_ALL_USERS` | Set to `true` to allow all DMs | `false` |
| `QQ_MARKDOWN_SUPPORT` | Enable QQ markdown (msg_type 2) | `true` |
@@ -113,7 +113,7 @@ This usually means:
- Verify the bot's **intents** are enabled at q.qq.com
- Check `QQ_ALLOWED_USERS` if DM access is restricted
- For group messages, ensure the bot is **@mentioned** (group policy may require allowlisting)
-- Check `QQ_HOME_CHANNEL` for cron/notification delivery
+- Check `QQBOT_HOME_CHANNEL` for cron/notification delivery
### Connection errors