From 3b569ff57638a9ec151a0a0941690a08c8314620 Mon Sep 17 00:00:00 2001
From: asurla <asurla@nvidia.com>
Date: Fri, 17 Apr 2026 09:55:58 -0700
Subject: [PATCH 01/20] feat(providers): add native NVIDIA NIM provider

Adds NVIDIA NIM as a first-class provider: ProviderConfig in
auth.py, HermesOverlay in providers.py, curated models
(Nemotron plus other open source models hosted on
build.nvidia.com), URL mapping in model_metadata.py, aliases
(nim, nvidia-nim, build-nvidia, nemotron), and env var tests.

Docs updated: providers page, quickstart table, fallback
providers table, and README provider list.
---
 README.md                                     |  2 +-
 agent/model_metadata.py                       |  2 ++
 hermes_cli/auth.py                            |  8 +++++++
 hermes_cli/models.py                          | 12 ++++++++++
 hermes_cli/providers.py                       | 11 +++++++++
 tests/hermes_cli/test_api_key_providers.py    |  7 ++++++
 website/docs/getting-started/quickstart.md    |  1 +
 website/docs/integrations/providers.md        | 24 +++++++++++++++++++
 .../user-guide/features/fallback-providers.md |  1 +
 9 files changed, 67 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 07a140419..088c3b91b 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@
 
 **The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.
 
-Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
+Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
 
 <table>
 <tr><td><b>A real terminal interface</b></td><td>Full TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output.</td></tr>
diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index 089fd132a..ec0e3540f 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -38,6 +38,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
     "mimo", "xiaomi-mimo",
     "arcee-ai", "arceeai",
     "xai", "x-ai", "x.ai", "grok",
+    "nvidia", "nim", "nvidia-nim", "nemotron",
     "qwen-portal",
 })
 
@@ -240,6 +241,7 @@ _URL_TO_PROVIDER: Dict[str, str] = {
     "api.fireworks.ai": "fireworks",
     "opencode.ai": "opencode-go",
     "api.x.ai": "xai",
+    "integrate.api.nvidia.com": "nvidia",
     "api.xiaomimimo.com": "xiaomi",
     "xiaomimimo.com": "xiaomi",
     "ollama.com": "ollama-cloud",
diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index e79a6dca6..421836c23 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -233,6 +233,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
         api_key_env_vars=("XAI_API_KEY",),
         base_url_env_var="XAI_BASE_URL",
     ),
+    "nvidia": ProviderConfig(
+        id="nvidia",
+        name="NVIDIA NIM",
+        auth_type="api_key",
+        inference_base_url="https://integrate.api.nvidia.com/v1",
+        api_key_env_vars=("NVIDIA_API_KEY",),
+        base_url_env_var="NVIDIA_BASE_URL",
+    ),
     "ai-gateway": ProviderConfig(
         id="ai-gateway",
         name="Vercel AI Gateway",
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index 5b998ddc6..6ec5c750b 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -155,6 +155,13 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
         "grok-4.20-reasoning",
         "grok-4-1-fast-reasoning",
     ],
+    "nvidia": [
+        "nvidia/nemotron-3-super-120b-a12b",
+        "nvidia/nemotron-3-nano-8b-a4b",
+        "z-ai/glm5",
+        "moonshotai/kimi-k2.5",
+        "minimaxai/minimax-m2.5",
+    ],
     "kimi-coding": [
         "kimi-k2.5",
         "kimi-for-coding",
@@ -544,6 +551,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
     ProviderEntry("google-gemini-cli", "Google Gemini (OAuth)",   "Google Gemini via OAuth + Code Assist (free tier supported; no API key needed)"),
     ProviderEntry("deepseek",       "DeepSeek",                 "DeepSeek (DeepSeek-V3, R1, coder — direct API)"),
     ProviderEntry("xai",            "xAI",                      "xAI (Grok models — direct API)"),
+    ProviderEntry("nvidia",         "NVIDIA NIM",               "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
     ProviderEntry("zai",            "Z.AI / GLM",               "Z.AI / GLM (Zhipu AI direct API)"),
     ProviderEntry("kimi-coding",    "Kimi / Kimi Coding Plan",  "Kimi Coding Plan (api.kimi.com) & Moonshot API"),
     ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)",  "Kimi / Moonshot China (Moonshot CN direct API)"),
@@ -618,6 +626,10 @@ _PROVIDER_ALIASES = {
     "grok": "xai",
     "x-ai": "xai",
     "x.ai": "xai",
+    "nim": "nvidia",
+    "nvidia-nim": "nvidia",
+    "build-nvidia": "nvidia",
+    "nemotron": "nvidia",
     "ollama": "custom",  # bare "ollama" = local; use "ollama-cloud" for cloud
     "ollama_cloud": "ollama-cloud",
 }
diff --git a/hermes_cli/providers.py b/hermes_cli/providers.py
index b2dda20be..a71055cfe 100644
--- a/hermes_cli/providers.py
+++ b/hermes_cli/providers.py
@@ -137,6 +137,11 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
         base_url_override="https://api.x.ai/v1",
         base_url_env_var="XAI_BASE_URL",
     ),
+    "nvidia": HermesOverlay(
+        transport="openai_chat",
+        base_url_override="https://integrate.api.nvidia.com/v1",
+        base_url_env_var="NVIDIA_BASE_URL",
+    ),
     "xiaomi": HermesOverlay(
         transport="openai_chat",
         base_url_env_var="XIAOMI_BASE_URL",
@@ -191,6 +196,12 @@ ALIASES: Dict[str, str] = {
     "x.ai": "xai",
     "grok": "xai",
 
+    # nvidia
+    "nim": "nvidia",
+    "nvidia-nim": "nvidia",
+    "build-nvidia": "nvidia",
+    "nemotron": "nvidia",
+
     # kimi-for-coding (models.dev ID)
     "kimi": "kimi-for-coding",
     "kimi-coding": "kimi-for-coding",
diff --git a/tests/hermes_cli/test_api_key_providers.py b/tests/hermes_cli/test_api_key_providers.py
index 97deab89e..c56edc4bb 100644
--- a/tests/hermes_cli/test_api_key_providers.py
+++ b/tests/hermes_cli/test_api_key_providers.py
@@ -33,6 +33,7 @@ class TestProviderRegistry:
         ("huggingface", "Hugging Face", "api_key"),
         ("zai", "Z.AI / GLM", "api_key"),
         ("xai", "xAI", "api_key"),
+        ("nvidia", "NVIDIA NIM", "api_key"),
         ("kimi-coding", "Kimi / Moonshot", "api_key"),
         ("minimax", "MiniMax", "api_key"),
         ("minimax-cn", "MiniMax (China)", "api_key"),
@@ -57,6 +58,12 @@ class TestProviderRegistry:
         assert pconfig.base_url_env_var == "XAI_BASE_URL"
         assert pconfig.inference_base_url == "https://api.x.ai/v1"
 
+    def test_nvidia_env_vars(self):
+        pconfig = PROVIDER_REGISTRY["nvidia"]
+        assert pconfig.api_key_env_vars == ("NVIDIA_API_KEY",)
+        assert pconfig.base_url_env_var == "NVIDIA_BASE_URL"
+        assert pconfig.inference_base_url == "https://integrate.api.nvidia.com/v1"
+
     def test_copilot_env_vars(self):
         pconfig = PROVIDER_REGISTRY["copilot"]
         assert pconfig.api_key_env_vars == ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN")
diff --git a/website/docs/getting-started/quickstart.md b/website/docs/getting-started/quickstart.md
index 880c01cb2..bda74b9ed 100644
--- a/website/docs/getting-started/quickstart.md
+++ b/website/docs/getting-started/quickstart.md
@@ -61,6 +61,7 @@ hermes setup       # Or configure everything at once
 | **OpenCode Zen** | Pay-as-you-go access to curated models | Set `OPENCODE_ZEN_API_KEY` |
 | **OpenCode Go** | $10/month subscription for open models | Set `OPENCODE_GO_API_KEY` |
 | **DeepSeek** | Direct DeepSeek API access | Set `DEEPSEEK_API_KEY` |
+| **NVIDIA NIM** | Nemotron models via build.nvidia.com or local NIM | Set `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) |
 | **GitHub Copilot** | GitHub Copilot subscription (GPT-5.x, Claude, Gemini, etc.) | OAuth via `hermes model`, or `COPILOT_GITHUB_TOKEN` / `GH_TOKEN` |
 | **GitHub Copilot ACP** | Copilot ACP agent backend (spawns local `copilot` CLI) | `hermes model` (requires `copilot` CLI + `copilot login`) |
 | **Vercel AI Gateway** | Vercel AI Gateway routing | Set `AI_GATEWAY_API_KEY` |
diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md
index e3d0ad828..750ad671c 100644
--- a/website/docs/integrations/providers.md
+++ b/website/docs/integrations/providers.md
@@ -295,6 +295,30 @@ When using xAI as a provider (any base URL containing `x.ai`), Hermes automatica
 
 No configuration is needed — caching activates automatically when an xAI endpoint is detected and a session ID is available. This reduces latency and cost for multi-turn conversations.
 
+### NVIDIA NIM
+
+Nemotron and other open source models via [build.nvidia.com](https://build.nvidia.com) (free API key) or a local NIM endpoint.
+
+```bash
+# Cloud (build.nvidia.com)
+hermes chat --provider nvidia --model nvidia/nemotron-3-super-120b-a12b
+# Requires: NVIDIA_API_KEY in ~/.hermes/.env
+
+# Local NIM endpoint — override base URL
+NVIDIA_BASE_URL=http://localhost:8000/v1 hermes chat --provider nvidia --model nvidia/nemotron-3-super-120b-a12b
+```
+
+Or set it permanently in `config.yaml`:
+```yaml
+model:
+  provider: "nvidia"
+  default: "nvidia/nemotron-3-super-120b-a12b"
+```
+
+:::tip Local NIM
+For on-prem deployments (DGX Spark, local GPU), set `NVIDIA_BASE_URL=http://localhost:8000/v1`. NIM exposes the same OpenAI-compatible chat completions API as build.nvidia.com, so switching between cloud and local is a one-line env-var change.
+:::
+
 ### Hugging Face Inference Providers
 
 [Hugging Face Inference Providers](https://huggingface.co/docs/inference-providers) routes to 20+ open models through a unified OpenAI-compatible endpoint (`router.huggingface.co/v1`). Requests are automatically routed to the fastest available backend (Groq, Together, SambaNova, etc.) with automatic failover.
diff --git a/website/docs/user-guide/features/fallback-providers.md b/website/docs/user-guide/features/fallback-providers.md
index 1e2b2a803..12fde185d 100644
--- a/website/docs/user-guide/features/fallback-providers.md
+++ b/website/docs/user-guide/features/fallback-providers.md
@@ -47,6 +47,7 @@ Both `provider` and `model` are **required**. If either is missing, the fallback
 | MiniMax | `minimax` | `MINIMAX_API_KEY` |
 | MiniMax (China) | `minimax-cn` | `MINIMAX_CN_API_KEY` |
 | DeepSeek | `deepseek` | `DEEPSEEK_API_KEY` |
+| NVIDIA NIM | `nvidia` | `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) |
 | OpenCode Zen | `opencode-zen` | `OPENCODE_ZEN_API_KEY` |
 | OpenCode Go | `opencode-go` | `OPENCODE_GO_API_KEY` |
 | Kilo Code | `kilocode` | `KILOCODE_API_KEY` |

From f362083c645374dd86465aec13f030aa52899607 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Fri, 17 Apr 2026 13:09:14 -0700
Subject: [PATCH 02/20] fix(providers): complete NVIDIA NIM parity with other
 providers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow-up on the native NVIDIA NIM provider salvage. The original PR wired
PROVIDER_REGISTRY + HERMES_OVERLAYS correctly but missed several touchpoints
required for full parity with other OpenAI-compatible providers (xai,
huggingface, deepseek, zai).

Gaps closed:

- hermes_cli/main.py:
  - Add 'nvidia' to the _model_flow_api_key_provider dispatch tuple so
    selecting 'NVIDIA NIM' in `hermes model` actually runs the api-key
    provider flow (previously fell through silently).
  - Add 'nvidia' to `hermes chat --provider` argparse choices so the
    documented test command (`hermes chat --provider nvidia --model ...`)
    parses successfully.

- hermes_cli/config.py: Register NVIDIA_API_KEY and NVIDIA_BASE_URL in
  OPTIONAL_ENV_VARS so setup wizard can prompt for them and they're
  auto-added to the subprocess env blocklist.

- hermes_cli/doctor.py: Add NVIDIA NIM row to `_apikey_providers` so
  `hermes doctor` probes https://integrate.api.nvidia.com/v1/models.

- hermes_cli/dump.py: Add NVIDIA_API_KEY → 'nvidia' mapping for
  `hermes dump` credential masking.

- tests/tools/test_local_env_blocklist.py: Extend registry_vars fixture
  with NVIDIA_API_KEY to verify it's blocked from leaking into subprocesses.

- agent/model_metadata.py: Add 'nemotron' → 131072 context-length entry
  so all Nemotron variants get 128K context via substring match (rather
  than falling back to MINIMUM_CONTEXT_LENGTH).

- hermes_cli/models.py: Fix hallucinated model ID
  'nvidia/nemotron-3-nano-8b-a4b' → 'nvidia/nemotron-3-nano-30b-a3b'
  (verified against live integrate.api.nvidia.com/v1/models catalog).
  Expand curated list from 5 to 9 agentic models mapping to OpenRouter
  defaults per provider-guide convention: add qwen3.5-397b-a17b,
  deepseek-v3.2, llama-3.3-nemotron-super-49b-v1.5, gpt-oss-120b.

- cli-config.yaml.example: Document 'nvidia' provider option.

- scripts/release.py: Map asurla@nvidia.com → anniesurla in AUTHOR_MAP
  for CI attribution.

E2E verified: `hermes chat --provider nvidia ...` now reaches NVIDIA's
endpoint (returns 401 with bogus key instead of argparse error);
`hermes doctor` detects NVIDIA NIM when NVIDIA_API_KEY is set.
---
 agent/model_metadata.py                 |  2 ++
 cli-config.yaml.example                 |  1 +
 hermes_cli/config.py                    | 16 ++++++++++++++++
 hermes_cli/doctor.py                    |  1 +
 hermes_cli/dump.py                      |  1 +
 hermes_cli/main.py                      |  4 ++--
 hermes_cli/models.py                    | 13 ++++++++++---
 scripts/release.py                      |  1 +
 tests/tools/test_local_env_blocklist.py |  1 +
 9 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index ec0e3540f..b30af6e48 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -159,6 +159,8 @@ DEFAULT_CONTEXT_LENGTHS = {
     "grok": 131072,             # catch-all (grok-beta, unknown grok-*)
     # Kimi
     "kimi": 262144,
+    # Nemotron — NVIDIA's open-weights series (128K context across all sizes)
+    "nemotron": 131072,
     # Arcee
     "trinity": 262144,
     # OpenRouter
diff --git a/cli-config.yaml.example b/cli-config.yaml.example
index 8c0484abd..20b54b788 100644
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -24,6 +24,7 @@ model:
   #   "minimax"      - MiniMax global (requires: MINIMAX_API_KEY)
   #   "minimax-cn"   - MiniMax China (requires: MINIMAX_CN_API_KEY)
   #   "huggingface"  - Hugging Face Inference (requires: HF_TOKEN)
+  #   "nvidia"       - NVIDIA NIM / build.nvidia.com (requires: NVIDIA_API_KEY)
   #   "xiaomi"       - Xiaomi MiMo (requires: XIAOMI_API_KEY)
   #   "arcee"        - Arcee AI Trinity models (requires: ARCEEAI_API_KEY)
   #   "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings)
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index c7df03370..f08e29266 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -861,6 +861,22 @@ OPTIONAL_ENV_VARS = {
         "category": "provider",
         "advanced": True,
     },
+    "NVIDIA_API_KEY": {
+        "description": "NVIDIA NIM API key (build.nvidia.com or local NIM endpoint)",
+        "prompt": "NVIDIA NIM API key",
+        "url": "https://build.nvidia.com/",
+        "password": True,
+        "category": "provider",
+        "advanced": True,
+    },
+    "NVIDIA_BASE_URL": {
+        "description": "NVIDIA NIM base URL override (e.g. http://localhost:8000/v1 for local NIM)",
+        "prompt": "NVIDIA NIM base URL (leave empty for default)",
+        "url": None,
+        "password": False,
+        "category": "provider",
+        "advanced": True,
+    },
     "GLM_API_KEY": {
         "description": "Z.AI / GLM API key (also recognized as ZAI_API_KEY / Z_AI_API_KEY)",
         "prompt": "Z.AI / GLM API key",
diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index d044ddf4c..28c4af1fa 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -825,6 +825,7 @@ def run_doctor(args):
         ("Arcee AI",         ("ARCEEAI_API_KEY",),                            "https://api.arcee.ai/api/v1/models",  "ARCEE_BASE_URL", True),
         ("DeepSeek",         ("DEEPSEEK_API_KEY",),                           "https://api.deepseek.com/v1/models",  "DEEPSEEK_BASE_URL", True),
         ("Hugging Face",     ("HF_TOKEN",),                                   "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
+        ("NVIDIA NIM",       ("NVIDIA_API_KEY",),                             "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
         ("Alibaba/DashScope", ("DASHSCOPE_API_KEY",),                         "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
         # MiniMax: the /anthropic endpoint doesn't support /models, but the /v1 endpoint does.
         ("MiniMax",          ("MINIMAX_API_KEY",),                            "https://api.minimax.io/v1/models",    "MINIMAX_BASE_URL", True),
diff --git a/hermes_cli/dump.py b/hermes_cli/dump.py
index a52079085..ae8ecc641 100644
--- a/hermes_cli/dump.py
+++ b/hermes_cli/dump.py
@@ -296,6 +296,7 @@ def run_dump(args):
         ("DEEPSEEK_API_KEY", "deepseek"),
         ("DASHSCOPE_API_KEY", "dashscope"),
         ("HF_TOKEN", "huggingface"),
+        ("NVIDIA_API_KEY", "nvidia"),
         ("AI_GATEWAY_API_KEY", "ai_gateway"),
         ("OPENCODE_ZEN_API_KEY", "opencode_zen"),
         ("OPENCODE_GO_API_KEY", "opencode_go"),
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 53f59a210..10597db15 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -1143,7 +1143,7 @@ def select_provider_and_model(args=None):
         _model_flow_kimi(config, current_model)
     elif selected_provider == "bedrock":
         _model_flow_bedrock(config, current_model)
-    elif selected_provider in ("gemini", "deepseek", "xai", "zai", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi", "arcee", "ollama-cloud"):
+    elif selected_provider in ("gemini", "deepseek", "xai", "zai", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi", "arcee", "nvidia", "ollama-cloud"):
         _model_flow_api_key_provider(config, selected_provider, current_model)
 
     # ── Post-switch cleanup: clear stale OPENAI_BASE_URL ──────────────
@@ -4954,7 +4954,7 @@ For more help on a command:
     )
     chat_parser.add_argument(
         "--provider",
-        choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "xai", "ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee"],
+        choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "xai", "ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee", "nvidia"],
         default=None,
         help="Inference provider (default: auto)"
     )
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index 6ec5c750b..7a897cb79 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -156,11 +156,18 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
         "grok-4-1-fast-reasoning",
     ],
     "nvidia": [
+        # NVIDIA flagship reasoning models
         "nvidia/nemotron-3-super-120b-a12b",
-        "nvidia/nemotron-3-nano-8b-a4b",
-        "z-ai/glm5",
+        "nvidia/nemotron-3-nano-30b-a3b",
+        "nvidia/llama-3.3-nemotron-super-49b-v1.5",
+        # Third-party agentic models hosted on build.nvidia.com
+        # (map to OpenRouter defaults — users get familiar picks on NIM)
+        "qwen/qwen3.5-397b-a17b",
+        "deepseek-ai/deepseek-v3.2",
         "moonshotai/kimi-k2.5",
         "minimaxai/minimax-m2.5",
+        "z-ai/glm5",
+        "openai/gpt-oss-120b",
     ],
     "kimi-coding": [
         "kimi-k2.5",
@@ -543,6 +550,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
     ProviderEntry("anthropic",      "Anthropic",                "Anthropic (Claude models — API key or Claude Code)"),
     ProviderEntry("openai-codex",   "OpenAI Codex",             "OpenAI Codex"),
     ProviderEntry("xiaomi",         "Xiaomi MiMo",              "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
+    ProviderEntry("nvidia",         "NVIDIA NIM",               "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
     ProviderEntry("qwen-oauth",     "Qwen OAuth (Portal)",      "Qwen OAuth (reuses local Qwen CLI login)"),
     ProviderEntry("copilot",        "GitHub Copilot",           "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
     ProviderEntry("copilot-acp",    "GitHub Copilot ACP",       "GitHub Copilot ACP (spawns `copilot --acp --stdio`)"),
@@ -551,7 +559,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
     ProviderEntry("google-gemini-cli", "Google Gemini (OAuth)",   "Google Gemini via OAuth + Code Assist (free tier supported; no API key needed)"),
     ProviderEntry("deepseek",       "DeepSeek",                 "DeepSeek (DeepSeek-V3, R1, coder — direct API)"),
     ProviderEntry("xai",            "xAI",                      "xAI (Grok models — direct API)"),
-    ProviderEntry("nvidia",         "NVIDIA NIM",               "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
     ProviderEntry("zai",            "Z.AI / GLM",               "Z.AI / GLM (Zhipu AI direct API)"),
     ProviderEntry("kimi-coding",    "Kimi / Kimi Coding Plan",  "Kimi Coding Plan (api.kimi.com) & Moonshot API"),
     ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)",  "Kimi / Moonshot China (Moonshot CN direct API)"),
diff --git a/scripts/release.py b/scripts/release.py
index 028f75ba6..880aebef9 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -256,6 +256,7 @@ AUTHOR_MAP = {
     "anthhub@163.com": "anthhub",
     "shenuu@gmail.com": "shenuu",
     "xiayh17@gmail.com": "xiayh0107",
+    "asurla@nvidia.com": "anniesurla",
 }
 
 
diff --git a/tests/tools/test_local_env_blocklist.py b/tests/tools/test_local_env_blocklist.py
index b196cea78..0377d59b3 100644
--- a/tests/tools/test_local_env_blocklist.py
+++ b/tests/tools/test_local_env_blocklist.py
@@ -86,6 +86,7 @@ class TestProviderEnvBlocklist:
             "MINIMAX_API_KEY": "mm-key",
             "MINIMAX_CN_API_KEY": "mmcn-key",
             "DEEPSEEK_API_KEY": "deepseek-key",
+            "NVIDIA_API_KEY": "nvidia-key",
         }
         result_env = _run_with_env(extra_os_env=registry_vars)
 

From eb07c056464b397eedd312e1ba2dd919035ddd40 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 17 Apr 2026 13:48:49 -0700
Subject: [PATCH 03/20] fix(gateway): prune stale SessionStore entries to bound
 memory + disk (#11789)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SessionStore._entries grew unbounded.  Every unique
(platform, chat_id, thread_id, user_id) tuple ever seen was kept in
RAM and rewritten to sessions.json on every message.  A Discord bot
in 100 servers x 100 channels x ~100 rotating users accumulates on
the order of 10^5 entries after a few months; each sessions.json
write becomes an O(n) fsync.  Nothing trimmed this — there was no
TTL, no cap, no eviction path.

Changes
-------
* SessionStore.prune_old_entries(max_age_days) — drops entries whose
  updated_at is older than the cutoff.  Preserves:
    - suspended entries (user paused them via /stop for later resume)
    - entries with an active background process attached
  Pruning is functionally identical to a natural reset-policy expiry:
  SQLite transcript stays, session_key -> session_id mapping dropped,
  returning user gets a fresh session.

* GatewayConfig.session_store_max_age_days (default 90; 0 disables).
  Serialized in to_dict/from_dict, coerced from bad types / negatives
  to safe defaults.  No migration needed — missing field -> 90 days.

* _session_expiry_watcher calls prune_old_entries once per hour
  (first tick is immediate).  Uses the existing watcher loop so no
  new background task is created.

Why not more aggressive
-----------------------
90 days is long enough that legitimate long-idle users (seasonal,
vacation, etc.) aren't surprised — pruning just means they get a
fresh session on return, same outcome they'd get from any other
reset-policy trigger.  Admins can lower it via config; 0 disables.

Tests
-----
tests/gateway/test_session_store_prune.py — 17 cases covering:
  * entry age based on updated_at, not created_at
  * max_age_days=0 disables; negative coerces to 0
  * suspended + active-process entries are skipped
  * _save fires iff something was removed
  * disk JSON reflects post-prune state
  * thread safety against concurrent readers
  * config field roundtrips + graceful fallback on bad values
  * watcher gate logic (first tick prunes, subsequent within 1h don't)

119 broader session/gateway tests remain green.
---
 gateway/config.py                         |  16 ++
 gateway/run.py                            |  24 ++
 gateway/session.py                        |  51 ++++
 tests/gateway/test_session_store_prune.py | 270 ++++++++++++++++++++++
 4 files changed, 361 insertions(+)
 create mode 100644 tests/gateway/test_session_store_prune.py

diff --git a/gateway/config.py b/gateway/config.py
index 1258e0899..799b151b7 100644
--- a/gateway/config.py
+++ b/gateway/config.py
@@ -258,6 +258,13 @@ class GatewayConfig:
     # Streaming configuration
     streaming: StreamingConfig = field(default_factory=StreamingConfig)
 
+    # Session store pruning: drop SessionEntry records older than this many
+    # days from the in-memory dict and sessions.json.  Keeps the store from
+    # growing unbounded in gateways serving many chats/threads/users over
+    # months.  Pruning is invisible to users — if they resume, they get a
+    # fresh session exactly as if the reset policy had fired.  0 = disabled.
+    session_store_max_age_days: int = 90
+
     def get_connected_platforms(self) -> List[Platform]:
         """Return list of platforms that are enabled and configured."""
         connected = []
@@ -365,6 +372,7 @@ class GatewayConfig:
             "thread_sessions_per_user": self.thread_sessions_per_user,
             "unauthorized_dm_behavior": self.unauthorized_dm_behavior,
             "streaming": self.streaming.to_dict(),
+            "session_store_max_age_days": self.session_store_max_age_days,
         }
     
     @classmethod
@@ -412,6 +420,13 @@ class GatewayConfig:
             "pair",
         )
 
+        try:
+            session_store_max_age_days = int(data.get("session_store_max_age_days", 90))
+            if session_store_max_age_days < 0:
+                session_store_max_age_days = 0
+        except (TypeError, ValueError):
+            session_store_max_age_days = 90
+
         return cls(
             platforms=platforms,
             default_reset_policy=default_policy,
@@ -426,6 +441,7 @@ class GatewayConfig:
             thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False),
             unauthorized_dm_behavior=unauthorized_dm_behavior,
             streaming=StreamingConfig.from_dict(data.get("streaming", {})),
+            session_store_max_age_days=session_store_max_age_days,
         )
 
     def get_unauthorized_dm_behavior(self, platform: Optional[Platform] = None) -> str:
diff --git a/gateway/run.py b/gateway/run.py
index ca0725426..e642d9df0 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -2178,6 +2178,30 @@ class GatewayRunner:
                         )
                 except Exception as _e:
                     logger.debug("Idle agent sweep failed: %s", _e)
+
+                # Periodically prune stale SessionStore entries.  The
+                # in-memory dict (and sessions.json) would otherwise grow
+                # unbounded in gateways serving many rotating chats /
+                # threads / users over long time windows.  Pruning is
+                # invisible to users — a resumed session just gets a
+                # fresh session_id, exactly as if the reset policy fired.
+                _last_prune_ts = getattr(self, "_last_session_store_prune_ts", 0.0)
+                _prune_interval = 3600.0  # once per hour
+                if time.time() - _last_prune_ts > _prune_interval:
+                    try:
+                        _max_age = int(
+                            getattr(self.config, "session_store_max_age_days", 0) or 0
+                        )
+                        if _max_age > 0:
+                            _pruned = self.session_store.prune_old_entries(_max_age)
+                            if _pruned:
+                                logger.info(
+                                    "SessionStore prune: dropped %d stale entries",
+                                    _pruned,
+                                )
+                    except Exception as _e:
+                        logger.debug("SessionStore prune failed: %s", _e)
+                    self._last_session_store_prune_ts = time.time()
             except Exception as e:
                 logger.debug("Session expiry watcher error: %s", e)
             # Sleep in small increments so we can stop quickly
diff --git a/gateway/session.py b/gateway/session.py
index f057d1cfc..4cb623128 100644
--- a/gateway/session.py
+++ b/gateway/session.py
@@ -802,6 +802,57 @@ class SessionStore:
                 return True
         return False
 
+    def prune_old_entries(self, max_age_days: int) -> int:
+        """Drop SessionEntry records older than max_age_days.
+
+        Pruning is based on ``updated_at`` (last activity), not ``created_at``.
+        A session that's been active within the window is kept regardless of
+        how old it is.  Entries marked ``suspended`` are kept — the user
+        explicitly paused them for later resume.  Entries held by an active
+        process (via has_active_processes_fn) are also kept so long-running
+        background work isn't orphaned.
+
+        Pruning is functionally identical to a natural reset-policy expiry:
+        the transcript in SQLite stays, but the session_key → session_id
+        mapping is dropped and the user starts a fresh session on return.
+
+        ``max_age_days <= 0`` disables pruning; returns 0 immediately.
+        Returns the number of entries removed.
+        """
+        if max_age_days is None or max_age_days <= 0:
+            return 0
+        from datetime import timedelta
+
+        cutoff = _now() - timedelta(days=max_age_days)
+        removed_keys: list[str] = []
+
+        with self._lock:
+            self._ensure_loaded_locked()
+            for key, entry in list(self._entries.items()):
+                if entry.suspended:
+                    continue
+                # Never prune sessions with an active background process
+                # attached — the user may still be waiting on output.
+                if self._has_active_processes_fn is not None:
+                    try:
+                        if self._has_active_processes_fn(entry.session_id):
+                            continue
+                    except Exception:
+                        pass
+                if entry.updated_at < cutoff:
+                    removed_keys.append(key)
+            for key in removed_keys:
+                self._entries.pop(key, None)
+            if removed_keys:
+                self._save()
+
+        if removed_keys:
+            logger.info(
+                "SessionStore pruned %d entries older than %d days",
+                len(removed_keys), max_age_days,
+            )
+        return len(removed_keys)
+
     def suspend_recently_active(self, max_age_seconds: int = 120) -> int:
         """Mark recently-active sessions as suspended.
 
diff --git a/tests/gateway/test_session_store_prune.py b/tests/gateway/test_session_store_prune.py
new file mode 100644
index 000000000..9b1dca297
--- /dev/null
+++ b/tests/gateway/test_session_store_prune.py
@@ -0,0 +1,270 @@
+"""Tests for SessionStore.prune_old_entries and the gateway watcher that calls it.
+
+The SessionStore in-memory dict (and its backing sessions.json) grew
+unbounded — every unique (platform, chat_id, thread_id, user_id) tuple
+ever seen was kept forever, regardless of how stale it became.  These
+tests pin the prune behaviour:
+
+  * Entries older than max_age_days (by updated_at) are removed
+  * Entries marked ``suspended`` are preserved (user-paused)
+  * Entries with an active process attached are preserved
+  * max_age_days <= 0 disables pruning entirely
+  * sessions.json is rewritten with the post-prune dict
+  * The ``updated_at`` field — not ``created_at`` — drives the decision
+    (so a long-running-but-still-active session isn't pruned)
+"""
+
+import json
+import threading
+from datetime import datetime, timedelta
+from unittest.mock import patch
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform, SessionResetPolicy
+from gateway.session import SessionEntry, SessionStore
+
+
+def _make_store(tmp_path, max_age_days: int = 90, has_active_processes_fn=None):
+    """Build a SessionStore bypassing SQLite/disk-load side effects."""
+    config = GatewayConfig(
+        default_reset_policy=SessionResetPolicy(mode="none"),
+        session_store_max_age_days=max_age_days,
+    )
+    with patch("gateway.session.SessionStore._ensure_loaded"):
+        store = SessionStore(
+            sessions_dir=tmp_path,
+            config=config,
+            has_active_processes_fn=has_active_processes_fn,
+        )
+    store._db = None
+    store._loaded = True
+    return store
+
+
+def _entry(key: str, age_days: float, *, suspended: bool = False,
+           session_id: str | None = None) -> SessionEntry:
+    now = datetime.now()
+    return SessionEntry(
+        session_key=key,
+        session_id=session_id or f"sid_{key}",
+        created_at=now - timedelta(days=age_days + 30),  # arbitrary older
+        updated_at=now - timedelta(days=age_days),
+        platform=Platform.TELEGRAM,
+        chat_type="dm",
+        suspended=suspended,
+    )
+
+
+class TestPruneBasics:
+    def test_prune_removes_entries_past_max_age(self, tmp_path):
+        store = _make_store(tmp_path)
+        store._entries["old"] = _entry("old", age_days=100)
+        store._entries["fresh"] = _entry("fresh", age_days=5)
+
+        removed = store.prune_old_entries(max_age_days=90)
+
+        assert removed == 1
+        assert "old" not in store._entries
+        assert "fresh" in store._entries
+
+    def test_prune_uses_updated_at_not_created_at(self, tmp_path):
+        """A session created long ago but updated recently must be kept."""
+        store = _make_store(tmp_path)
+        now = datetime.now()
+        entry = SessionEntry(
+            session_key="long-lived",
+            session_id="sid",
+            created_at=now - timedelta(days=365),   # ancient
+            updated_at=now - timedelta(days=3),     # but just chatted
+            platform=Platform.TELEGRAM,
+            chat_type="dm",
+        )
+        store._entries["long-lived"] = entry
+
+        removed = store.prune_old_entries(max_age_days=30)
+
+        assert removed == 0
+        assert "long-lived" in store._entries
+
+    def test_prune_disabled_when_max_age_is_zero(self, tmp_path):
+        store = _make_store(tmp_path, max_age_days=0)
+        for i in range(5):
+            store._entries[f"s{i}"] = _entry(f"s{i}", age_days=365)
+
+        assert store.prune_old_entries(0) == 0
+        assert len(store._entries) == 5
+
+    def test_prune_disabled_when_max_age_is_negative(self, tmp_path):
+        store = _make_store(tmp_path)
+        store._entries["s"] = _entry("s", age_days=365)
+
+        assert store.prune_old_entries(-1) == 0
+        assert "s" in store._entries
+
+    def test_prune_skips_suspended_entries(self, tmp_path):
+        """/stop-suspended sessions must be kept for later resume."""
+        store = _make_store(tmp_path)
+        store._entries["suspended"] = _entry(
+            "suspended", age_days=1000, suspended=True
+        )
+        store._entries["idle"] = _entry("idle", age_days=1000)
+
+        removed = store.prune_old_entries(max_age_days=90)
+
+        assert removed == 1
+        assert "suspended" in store._entries
+        assert "idle" not in store._entries
+
+    def test_prune_skips_entries_with_active_processes(self, tmp_path):
+        """Sessions with active bg processes aren't pruned even if old."""
+        active_session_ids = {"sid_active"}
+
+        def _has_active(session_id: str) -> bool:
+            return session_id in active_session_ids
+
+        store = _make_store(tmp_path, has_active_processes_fn=_has_active)
+        store._entries["active"] = _entry(
+            "active", age_days=1000, session_id="sid_active"
+        )
+        store._entries["idle"] = _entry(
+            "idle", age_days=1000, session_id="sid_idle"
+        )
+
+        removed = store.prune_old_entries(max_age_days=90)
+
+        assert removed == 1
+        assert "active" in store._entries
+        assert "idle" not in store._entries
+
+    def test_prune_does_not_write_disk_when_no_removals(self, tmp_path):
+        """If nothing is evictable, _save() should NOT be called."""
+        store = _make_store(tmp_path)
+        store._entries["fresh1"] = _entry("fresh1", age_days=1)
+        store._entries["fresh2"] = _entry("fresh2", age_days=2)
+
+        save_calls = []
+        store._save = lambda: save_calls.append(1)
+
+        assert store.prune_old_entries(max_age_days=90) == 0
+        assert save_calls == []
+
+    def test_prune_writes_disk_after_removal(self, tmp_path):
+        store = _make_store(tmp_path)
+        store._entries["stale"] = _entry("stale", age_days=500)
+        store._entries["fresh"] = _entry("fresh", age_days=1)
+
+        save_calls = []
+        store._save = lambda: save_calls.append(1)
+
+        store.prune_old_entries(max_age_days=90)
+        assert save_calls == [1]
+
+    def test_prune_is_thread_safe(self, tmp_path):
+        """Prune acquires _lock internally; concurrent update_session is safe."""
+        store = _make_store(tmp_path)
+        for i in range(20):
+            age = 1000 if i % 2 == 0 else 1
+            store._entries[f"s{i}"] = _entry(f"s{i}", age_days=age)
+
+        results = []
+
+        def _pruner():
+            results.append(store.prune_old_entries(max_age_days=90))
+
+        def _reader():
+            # Mimic a concurrent update_session reader iterating under lock.
+            with store._lock:
+                list(store._entries.keys())
+
+        threads = [threading.Thread(target=_pruner)]
+        threads += [threading.Thread(target=_reader) for _ in range(4)]
+        for t in threads:
+            t.start()
+        for t in threads:
+            t.join(timeout=5)
+            assert not t.is_alive()
+
+        # Exactly one pruner ran; removed exactly the 10 stale entries.
+        assert results == [10]
+        assert len(store._entries) == 10
+        for i in range(20):
+            if i % 2 == 1:  # fresh
+                assert f"s{i}" in store._entries
+
+
+class TestPrunePersistsToDisk:
+    def test_prune_rewrites_sessions_json(self, tmp_path):
+        """After prune, sessions.json on disk reflects the new dict."""
+        config = GatewayConfig(
+            default_reset_policy=SessionResetPolicy(mode="none"),
+            session_store_max_age_days=90,
+        )
+        store = SessionStore(sessions_dir=tmp_path, config=config)
+        store._db = None
+        # Force-populate without calling get_or_create to avoid DB side-effects
+        store._entries["stale"] = _entry("stale", age_days=500)
+        store._entries["fresh"] = _entry("fresh", age_days=1)
+        store._loaded = True
+        store._save()
+
+        # Verify pre-prune state on disk.
+        saved_pre = json.loads((tmp_path / "sessions.json").read_text())
+        assert set(saved_pre.keys()) == {"stale", "fresh"}
+
+        # Prune and check disk.
+        store.prune_old_entries(max_age_days=90)
+        saved_post = json.loads((tmp_path / "sessions.json").read_text())
+        assert set(saved_post.keys()) == {"fresh"}
+
+
+class TestGatewayConfigSerialization:
+    def test_session_store_max_age_days_defaults_to_90(self):
+        cfg = GatewayConfig()
+        assert cfg.session_store_max_age_days == 90
+
+    def test_session_store_max_age_days_roundtrips(self):
+        cfg = GatewayConfig(session_store_max_age_days=30)
+        restored = GatewayConfig.from_dict(cfg.to_dict())
+        assert restored.session_store_max_age_days == 30
+
+    def test_session_store_max_age_days_missing_defaults_90(self):
+        """Loading an old config (pre-this-field) falls back to default."""
+        restored = GatewayConfig.from_dict({})
+        assert restored.session_store_max_age_days == 90
+
+    def test_session_store_max_age_days_negative_coerced_to_zero(self):
+        """A negative value (accidental or hostile) becomes 0 (disabled)."""
+        restored = GatewayConfig.from_dict({"session_store_max_age_days": -5})
+        assert restored.session_store_max_age_days == 0
+
+    def test_session_store_max_age_days_bad_type_falls_back(self):
+        """Non-int values fall back to the default, not a crash."""
+        restored = GatewayConfig.from_dict({"session_store_max_age_days": "nope"})
+        assert restored.session_store_max_age_days == 90
+
+
+class TestGatewayWatcherCallsPrune:
+    """The session_expiry_watcher should call prune_old_entries once per hour."""
+
+    def test_prune_gate_fires_on_first_tick(self):
+        """First watcher tick has _last_prune_ts=0, so the gate opens."""
+        import time as _t
+
+        last_ts = 0.0
+        prune_interval = 3600.0
+        now = _t.time()
+
+        # Mirror the production gate check in _session_expiry_watcher.
+        should_prune = (now - last_ts) > prune_interval
+        assert should_prune is True
+
+    def test_prune_gate_suppresses_within_interval(self):
+        import time as _t
+
+        last_ts = _t.time() - 600  # 10 minutes ago
+        prune_interval = 3600.0
+        now = _t.time()
+
+        should_prune = (now - last_ts) > prune_interval
+        assert should_prune is False

From 3207b9bda0d7a0aef00a5c6712b8d2f0a82d801d Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 17 Apr 2026 14:21:22 -0700
Subject: [PATCH 04/20] test: speed up slow tests (backoff + subprocess + IMDS
 network) (#11797)

Cuts shard-3 local runtime in half by neutralizing real wall-clock
waits across three classes of slow test:

## 1. Retry backoff mocks

- tests/run_agent/conftest.py (NEW): autouse fixture mocks
  jittered_backoff to 0.0 so the `while time.time() < sleep_end`
  busy-loop exits immediately. No global time.sleep mock (would
  break threading tests).
- test_anthropic_error_handling, test_413_compression,
  test_run_agent_codex_responses, test_fallback_model: per-file
  fixtures mock time.sleep / asyncio.sleep for retry / compression
  paths.
- test_retaindb_plugin: cap the retaindb module's bound time.sleep
  to 0.05s via a per-test shim (background writer-thread retries
  sleep 2s after errors; tests don't care about exact duration).
  Plus replace arbitrary time.sleep(N) waits with short polling
  loops bounded by deadline.

## 2. Subprocess sleeps in production code

- test_update_gateway_restart: mock time.sleep. Production code
  does time.sleep(3) after `systemctl restart` to verify the
  service survived. Tests mock subprocess.run \u2014 nothing actually
  restarts \u2014 so the wait is dead time.

## 3. Network / IMDS timeouts (biggest single win)

- tests/conftest.py: add AWS_EC2_METADATA_DISABLED=true plus
  AWS_METADATA_SERVICE_TIMEOUT=1 and ATTEMPTS=1. boto3 falls back
  to IMDS (169.254.169.254) when no AWS creds are set. Any test
  hitting has_aws_credentials() / resolve_aws_auth_env_var() (e.g.
  test_status, test_setup_copilot_acp, anything that touches
  provider auto-detect) burned ~2-4s waiting for that to time out.
- test_exit_cleanup_interrupt: explicitly mock
  resolve_runtime_provider which was doing real network auto-detect
  (~4s). Tests don't care about provider resolution \u2014 the agent
  is already mocked.
- test_timezone: collapse the 3-test "TZ env in subprocess" suite
  into 2 tests by checking both injection AND no-leak in the same
  subprocess spawn (was 3 \u00d7 3.2s, now 2 \u00d7 4s).

## Validation

| Test | Before | After |
|---|---|---|
| test_anthropic_error_handling (8 tests) | ~80s | ~15s |
| test_413_compression (14 tests) | ~18s | 2.3s |
| test_retaindb_plugin (67 tests) | ~13s | 1.3s |
| test_status_includes_tavily_key | 4.0s | 0.05s |
| test_setup_copilot_acp_skips_same_provider_pool_step | 8.0s | 0.26s |
| test_update_gateway_restart (5 tests) | ~18s total | ~0.35s total |
| test_exit_cleanup_interrupt (2 tests) | 8s | 1.5s |
| **Matrix shard 3 local** | **108s** | **50s** |

No behavioral contract changed \u2014 tests still verify retry happens,
service restart logic runs, etc.; they just don't burn real seconds
waiting for it.

Supersedes PR #11779 (those changes are included here).
---
 tests/conftest.py                             |  9 +++
 .../hermes_cli/test_update_gateway_restart.py | 20 +++++
 tests/plugins/test_retaindb_plugin.py         | 75 +++++++++++++++----
 tests/run_agent/conftest.py                   | 34 +++++++++
 tests/run_agent/test_413_compression.py       | 18 +++++
 .../test_anthropic_error_handling.py          | 33 ++++++++
 .../run_agent/test_exit_cleanup_interrupt.py  | 18 +++++
 tests/run_agent/test_fallback_model.py        | 10 +++
 .../test_run_agent_codex_responses.py         |  9 +++
 tests/test_timezone.py                        | 38 +++++-----
 10 files changed, 231 insertions(+), 33 deletions(-)
 create mode 100644 tests/run_agent/conftest.py

diff --git a/tests/conftest.py b/tests/conftest.py
index 27950118e..c5b367266 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -229,6 +229,15 @@ def _hermetic_environment(tmp_path, monkeypatch):
     monkeypatch.setenv("LC_ALL", "C.UTF-8")
     monkeypatch.setenv("PYTHONHASHSEED", "0")
 
+    # 4b. Disable AWS IMDS lookups. Without this, any test that ends up
+    #     calling has_aws_credentials() / resolve_aws_auth_env_var()
+    #     (e.g. provider auto-detect, status command, cron run_job) burns
+    #     ~2s waiting for the metadata service at 169.254.169.254 to time
+    #     out. Tests don't run on EC2 — IMDS is always unreachable here.
+    monkeypatch.setenv("AWS_EC2_METADATA_DISABLED", "true")
+    monkeypatch.setenv("AWS_METADATA_SERVICE_TIMEOUT", "1")
+    monkeypatch.setenv("AWS_METADATA_SERVICE_NUM_ATTEMPTS", "1")
+
     # 5. Reset plugin singleton so tests don't leak plugins from
     #    ~/.hermes/plugins/ (which, per step 3, is now empty — but the
     #    singleton might still be cached from a previous test).
diff --git a/tests/hermes_cli/test_update_gateway_restart.py b/tests/hermes_cli/test_update_gateway_restart.py
index f3f2a0444..6e10d5622 100644
--- a/tests/hermes_cli/test_update_gateway_restart.py
+++ b/tests/hermes_cli/test_update_gateway_restart.py
@@ -13,9 +13,29 @@ from unittest.mock import patch, MagicMock
 import pytest
 
 import hermes_cli.gateway as gateway_cli
+import hermes_cli.main as cli_main
 from hermes_cli.main import cmd_update
 
 
+# ---------------------------------------------------------------------------
+# Skip the real-time sleeps inside cmd_update's restart-verification path
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def _no_restart_verify_sleep(monkeypatch):
+    """hermes_cli/main.py uses time.sleep(3) after systemctl restart to
+    verify the service survived. Tests mock subprocess.run — nothing
+    actually restarts — so the 3s wait is dead time.
+
+    main.py does ``import time as _time`` at both module level (line 167)
+    and inside functions (lines 3281, 4384, 4401). Patching the global
+    ``time.sleep`` affects only the duration of this test.
+    """
+    import time as _real_time
+    monkeypatch.setattr(_real_time, "sleep", lambda *_a, **_k: None)
+
+
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
diff --git a/tests/plugins/test_retaindb_plugin.py b/tests/plugins/test_retaindb_plugin.py
index 9ad801769..5d517bce7 100644
--- a/tests/plugins/test_retaindb_plugin.py
+++ b/tests/plugins/test_retaindb_plugin.py
@@ -31,6 +31,31 @@ def _isolate_env(tmp_path, monkeypatch):
     monkeypatch.delenv("RETAINDB_PROJECT", raising=False)
 
 
+@pytest.fixture(autouse=True)
+def _cap_retaindb_sleeps(monkeypatch):
+    """Cap production-code sleeps so background-thread tests run fast.
+
+    The retaindb ``_WriteQueue._flush_row`` does ``time.sleep(2)`` after
+    errors. Across multiple tests that trigger the retry path, that adds
+    up. Cap the module's bound ``time.sleep`` to 0.05s — tests don't care
+    about the exact retry delay, only that it happens. The test file's
+    own ``time.sleep`` stays real since it uses a different reference.
+    """
+    try:
+        from plugins.memory import retaindb as _retaindb
+    except ImportError:
+        return
+
+    real_sleep = _retaindb.time.sleep
+
+    def _capped_sleep(seconds):
+        return real_sleep(min(float(seconds), 0.05))
+
+    import types as _types
+    fake_time = _types.SimpleNamespace(sleep=_capped_sleep, time=_retaindb.time.time)
+    monkeypatch.setattr(_retaindb, "time", fake_time)
+
+
 # We need the repo root on sys.path so the plugin can import agent.memory_provider
 import sys
 _repo_root = str(Path(__file__).resolve().parents[2])
@@ -130,16 +155,18 @@ class TestWriteQueue:
     def test_enqueue_creates_row(self, tmp_path):
         q, client, db_path = self._make_queue(tmp_path)
         q.enqueue("user1", "sess1", [{"role": "user", "content": "hi"}])
-        # Give the writer thread a moment to process
-        time.sleep(1)
+        # shutdown() blocks until the writer thread drains the queue — no need
+        # to pre-sleep (the old 1s sleep was a just-in-case wait, but shutdown
+        # does the right thing).
         q.shutdown()
         # If ingest succeeded, the row should be deleted
         client.ingest_session.assert_called_once()
 
     def test_enqueue_persists_to_sqlite(self, tmp_path):
         client = MagicMock()
-        # Make ingest hang so the row stays in SQLite
-        client.ingest_session = MagicMock(side_effect=lambda *a, **kw: time.sleep(5))
+        # Make ingest slow so the row is still in SQLite when we peek.
+        # 0.5s is plenty — the test just needs the flush to still be in-flight.
+        client.ingest_session = MagicMock(side_effect=lambda *a, **kw: time.sleep(0.5))
         db_path = tmp_path / "test_queue.db"
         q = _WriteQueue(client, db_path)
         q.enqueue("user1", "sess1", [{"role": "user", "content": "test"}])
@@ -154,8 +181,7 @@ class TestWriteQueue:
     def test_flush_deletes_row_on_success(self, tmp_path):
         q, client, db_path = self._make_queue(tmp_path)
         q.enqueue("user1", "sess1", [{"role": "user", "content": "hi"}])
-        time.sleep(1)
-        q.shutdown()
+        q.shutdown()  # blocks until drain
         # Row should be gone
         conn = sqlite3.connect(str(db_path))
         rows = conn.execute("SELECT COUNT(*) FROM pending").fetchone()[0]
@@ -168,14 +194,20 @@ class TestWriteQueue:
         db_path = tmp_path / "test_queue.db"
         q = _WriteQueue(client, db_path)
         q.enqueue("user1", "sess1", [{"role": "user", "content": "hi"}])
-        time.sleep(3)  # Allow retry + sleep(2) in _flush_row
+        # Poll for the error to be recorded (max 2s), instead of a fixed 3s wait.
+        deadline = time.time() + 2.0
+        last_error = None
+        while time.time() < deadline:
+            conn = sqlite3.connect(str(db_path))
+            row = conn.execute("SELECT last_error FROM pending").fetchone()
+            conn.close()
+            if row and row[0]:
+                last_error = row[0]
+                break
+            time.sleep(0.05)
         q.shutdown()
-        # Row should still exist with error recorded
-        conn = sqlite3.connect(str(db_path))
-        row = conn.execute("SELECT last_error FROM pending").fetchone()
-        conn.close()
-        assert row is not None
-        assert "API down" in row[0]
+        assert last_error is not None
+        assert "API down" in last_error
 
     def test_thread_local_connection_reuse(self, tmp_path):
         q, _, _ = self._make_queue(tmp_path)
@@ -193,14 +225,27 @@ class TestWriteQueue:
         client1.ingest_session = MagicMock(side_effect=RuntimeError("fail"))
         q1 = _WriteQueue(client1, db_path)
         q1.enqueue("user1", "sess1", [{"role": "user", "content": "lost turn"}])
-        time.sleep(3)
+        # Wait until the error is recorded (poll with short interval).
+        deadline = time.time() + 2.0
+        while time.time() < deadline:
+            conn = sqlite3.connect(str(db_path))
+            row = conn.execute("SELECT last_error FROM pending").fetchone()
+            conn.close()
+            if row and row[0]:
+                break
+            time.sleep(0.05)
         q1.shutdown()
 
         # Now create a new queue — it should replay the pending rows
         client2 = MagicMock()
         client2.ingest_session = MagicMock(return_value={"status": "ok"})
         q2 = _WriteQueue(client2, db_path)
-        time.sleep(2)
+        # Poll for the replay to happen.
+        deadline = time.time() + 2.0
+        while time.time() < deadline:
+            if client2.ingest_session.called:
+                break
+            time.sleep(0.05)
         q2.shutdown()
 
         # The replayed row should have been ingested via client2
diff --git a/tests/run_agent/conftest.py b/tests/run_agent/conftest.py
new file mode 100644
index 000000000..9b431869b
--- /dev/null
+++ b/tests/run_agent/conftest.py
@@ -0,0 +1,34 @@
+"""Fast-path fixtures shared across tests/run_agent/.
+
+Many tests in this directory exercise the retry/backoff paths in the
+agent loop. Production code uses ``jittered_backoff(base_delay=5.0)``
+with a ``while time.time() < sleep_end`` loop — a single retry test
+spends 5+ seconds of real wall-clock time on backoff waits.
+
+Mocking ``jittered_backoff`` to return 0.0 collapses the while-loop
+to a no-op (``time.time() < time.time() + 0`` is false immediately),
+which handles the most common case without touching ``time.sleep``.
+
+We deliberately DO NOT mock ``time.sleep`` here — some tests
+(test_interrupt_propagation, test_primary_runtime_restore, etc.) use
+the real ``time.sleep`` for threading coordination or assert that it
+was called with specific values. Tests that want to additionally
+fast-path direct ``time.sleep(N)`` calls in production code should
+monkeypatch ``run_agent.time.sleep`` locally (see
+``test_anthropic_error_handling.py`` for the pattern).
+"""
+
+from __future__ import annotations
+
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def _fast_retry_backoff(monkeypatch):
+    """Short-circuit retry backoff for all tests in this directory."""
+    try:
+        import run_agent
+    except ImportError:
+        return
+
+    monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)
diff --git a/tests/run_agent/test_413_compression.py b/tests/run_agent/test_413_compression.py
index e8835c641..8bd357d3d 100644
--- a/tests/run_agent/test_413_compression.py
+++ b/tests/run_agent/test_413_compression.py
@@ -19,6 +19,24 @@ import pytest
 
 from agent.context_compressor import SUMMARY_PREFIX
 from run_agent import AIAgent
+import run_agent
+
+
+# ---------------------------------------------------------------------------
+# Fast backoff for compression retry tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def _no_compression_sleep(monkeypatch):
+    """Short-circuit the 2s time.sleep between compression retries.
+
+    Production code has ``time.sleep(2)`` in multiple places after a 413/context
+    compression, for rate-limit smoothing. Tests assert behavior, not timing.
+    """
+    import time as _time
+    monkeypatch.setattr(_time, "sleep", lambda *_a, **_k: None)
+    monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/run_agent/test_anthropic_error_handling.py b/tests/run_agent/test_anthropic_error_handling.py
index 00055928e..cdf337254 100644
--- a/tests/run_agent/test_anthropic_error_handling.py
+++ b/tests/run_agent/test_anthropic_error_handling.py
@@ -27,6 +27,39 @@ from gateway.config import Platform
 from gateway.session import SessionSource
 
 
+# ---------------------------------------------------------------------------
+# Fast backoff for tests that exercise the retry loop
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def _no_backoff_wait(monkeypatch):
+    """Short-circuit retry backoff so tests don't block on real wall-clock waits.
+
+    The production code uses jittered_backoff() with a 5s base delay plus a
+    tight time.sleep(0.2) loop. Without this patch, each 429/500/529 retry
+    test burns ~10s of real time on CI — across six tests that's ~60s for
+    behavior we're not asserting against timing.
+
+    Tests assert retry counts and final results, never wait durations.
+    """
+    import asyncio as _asyncio
+    import time as _time
+
+    monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)
+    monkeypatch.setattr(_time, "sleep", lambda *_a, **_k: None)
+
+    # Also fast-path asyncio.sleep — the gateway's _run_agent path has
+    # several await asyncio.sleep(...) calls that add real wall-clock time.
+    _real_asyncio_sleep = _asyncio.sleep
+
+    async def _fast_sleep(delay=0, *args, **kwargs):
+        # Yield to the event loop but skip the actual delay.
+        await _real_asyncio_sleep(0)
+
+    monkeypatch.setattr(_asyncio, "sleep", _fast_sleep)
+
+
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
diff --git a/tests/run_agent/test_exit_cleanup_interrupt.py b/tests/run_agent/test_exit_cleanup_interrupt.py
index 6a5d7b363..1e5d8431c 100644
--- a/tests/run_agent/test_exit_cleanup_interrupt.py
+++ b/tests/run_agent/test_exit_cleanup_interrupt.py
@@ -13,6 +13,24 @@ from unittest.mock import MagicMock, patch, call
 import pytest
 
 
+@pytest.fixture(autouse=True)
+def _mock_runtime_provider(monkeypatch):
+    """run_job calls resolve_runtime_provider which can try real network
+    auto-detection (~4s of socket timeouts in hermetic CI). Mock it out
+    since these tests don't care about provider resolution — the agent
+    is mocked too."""
+    import hermes_cli.runtime_provider as rp
+    def _fake_resolve(*args, **kwargs):
+        return {
+            "provider": "openrouter",
+            "api_key": "test-key",
+            "base_url": "https://openrouter.ai/api/v1",
+            "model": "test/model",
+            "api_mode": "chat_completions",
+        }
+    monkeypatch.setattr(rp, "resolve_runtime_provider", _fake_resolve)
+
+
 class TestCronJobCleanup:
     """cron/scheduler.py — end_session + close in the finally block."""
 
diff --git a/tests/run_agent/test_fallback_model.py b/tests/run_agent/test_fallback_model.py
index 6491bd686..d2aec022e 100644
--- a/tests/run_agent/test_fallback_model.py
+++ b/tests/run_agent/test_fallback_model.py
@@ -11,6 +11,16 @@ from unittest.mock import MagicMock, patch
 import pytest
 
 from run_agent import AIAgent
+import run_agent
+
+
+@pytest.fixture(autouse=True)
+def _no_fallback_wait(monkeypatch):
+    """Short-circuit time.sleep in fallback/recovery paths so tests don't
+    block on the ``min(3 + retry_count, 8)`` wait before a primary retry."""
+    import time as _time
+    monkeypatch.setattr(_time, "sleep", lambda *_a, **_k: None)
+    monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)
 
 
 def _make_tool_defs(*names: str) -> list:
diff --git a/tests/run_agent/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py
index 4ff00018d..81213aaf6 100644
--- a/tests/run_agent/test_run_agent_codex_responses.py
+++ b/tests/run_agent/test_run_agent_codex_responses.py
@@ -12,6 +12,15 @@ sys.modules.setdefault("fal_client", types.SimpleNamespace())
 import run_agent
 
 
+@pytest.fixture(autouse=True)
+def _no_codex_backoff(monkeypatch):
+    """Short-circuit retry backoff so Codex retry tests don't block on real
+    wall-clock waits (5s jittered_backoff base delay + tight time.sleep loop)."""
+    import time as _time
+    monkeypatch.setattr(run_agent, "jittered_backoff", lambda *a, **k: 0.0)
+    monkeypatch.setattr(_time, "sleep", lambda *_a, **_k: None)
+
+
 def _patch_agent_bootstrap(monkeypatch):
     monkeypatch.setattr(
         run_agent,
diff --git a/tests/test_timezone.py b/tests/test_timezone.py
index 1af60cbfa..ffb831617 100644
--- a/tests/test_timezone.py
+++ b/tests/test_timezone.py
@@ -159,18 +159,34 @@ class TestCodeExecutionTZ:
         return _json.dumps({"error": f"unexpected tool call: {function_name}"})
 
     def test_tz_injected_when_configured(self):
-        """When HERMES_TIMEZONE is set, child process sees TZ env var."""
+        """When HERMES_TIMEZONE is set, child process sees TZ env var.
+
+        Verified alongside leak-prevention + empty-TZ handling in one
+        subprocess call so we don't pay 3x the subprocess startup cost
+        (each execute_code spawns a real Python subprocess ~3s).
+        """
         import json as _json
         os.environ["HERMES_TIMEZONE"] = "Asia/Kolkata"
 
+        # One subprocess, three things checked:
+        #   1) TZ is injected as "Asia/Kolkata"
+        #   2) HERMES_TIMEZONE itself does NOT leak into the child env
+        probe = (
+            'import os; '
+            'print("TZ=" + os.environ.get("TZ", "NOT_SET")); '
+            'print("HERMES_TIMEZONE=" + os.environ.get("HERMES_TIMEZONE", "NOT_SET"))'
+        )
         with patch("model_tools.handle_function_call", side_effect=self._mock_handle):
             result = _json.loads(self._execute_code(
-                code='import os; print(os.environ.get("TZ", "NOT_SET"))',
-                task_id="tz-test",
+                code=probe,
+                task_id="tz-combined-test",
                 enabled_tools=[],
             ))
         assert result["status"] == "success"
-        assert "Asia/Kolkata" in result["output"]
+        assert "TZ=Asia/Kolkata" in result["output"]
+        assert "HERMES_TIMEZONE=NOT_SET" in result["output"], (
+            "HERMES_TIMEZONE should not leak into child env (only TZ)"
+        )
 
     def test_tz_not_injected_when_empty(self):
         """When HERMES_TIMEZONE is not set, child process has no TZ."""
@@ -186,20 +202,6 @@ class TestCodeExecutionTZ:
         assert result["status"] == "success"
         assert "NOT_SET" in result["output"]
 
-    def test_hermes_timezone_not_leaked_to_child(self):
-        """HERMES_TIMEZONE itself must NOT appear in child env (only TZ)."""
-        import json as _json
-        os.environ["HERMES_TIMEZONE"] = "Asia/Kolkata"
-
-        with patch("model_tools.handle_function_call", side_effect=self._mock_handle):
-            result = _json.loads(self._execute_code(
-                code='import os; print(os.environ.get("HERMES_TIMEZONE", "NOT_SET"))',
-                task_id="tz-leak-test",
-                enabled_tools=[],
-            ))
-        assert result["status"] == "success"
-        assert "NOT_SET" in result["output"]
-
 
 # =========================================================================
 # Cron timezone-aware scheduling

From 036dacf6592dac36a57a3d26187039fb3b1a37a0 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 17 Apr 2026 14:27:26 -0700
Subject: [PATCH 05/20] feat(telegram): auto-wrap markdown tables in code
 blocks (#11794)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Telegram's MarkdownV2 has no table syntax — pipes get backslash-escaped
and tables render as noisy unaligned text.  format_message now detects
GFM-style pipe tables (header row + delimiter row + optional body) and
wraps them in ``` fences before the existing MarkdownV2 conversion runs.
Telegram renders fenced code blocks as monospace preformatted text with
columns intact.

Tables already inside an existing code block are left alone.  Plain
prose with pipes, lone '---' horizontal rules, and non-table content
are unaffected.

Closes the recurring community request to stop having to ask the agent
to re-render tables as code blocks manually.
---
 gateway/platforms/telegram.py         |  84 ++++++++++++++
 tests/gateway/test_telegram_format.py | 153 +++++++++++++++++++++++++-
 2 files changed, 236 insertions(+), 1 deletion(-)

diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index 2f4ec9329..5b1fef133 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -118,6 +118,84 @@ def _strip_mdv2(text: str) -> str:
     return cleaned
 
 
+# ---------------------------------------------------------------------------
+# Markdown table → code block conversion
+# ---------------------------------------------------------------------------
+# Telegram's MarkdownV2 has no table syntax — '|' is just an escaped literal,
+# so pipe tables render as noisy backslash-pipe text with no alignment.
+# Wrapping the table in a fenced code block makes Telegram render it as
+# monospace preformatted text with columns intact.
+
+# Matches a GFM table delimiter row: optional outer pipes, cells containing
+# only dashes (with optional leading/trailing colons for alignment) separated
+# by '|'.  Requires at least one internal '|' so lone '---' horizontal rules
+# are NOT matched.
+_TABLE_SEPARATOR_RE = re.compile(
+    r'^\s*\|?\s*:?-+:?\s*(?:\|\s*:?-+:?\s*){1,}\|?\s*$'
+)
+
+
+def _is_table_row(line: str) -> bool:
+    """Return True if *line* could plausibly be a table data row."""
+    stripped = line.strip()
+    return bool(stripped) and '|' in stripped
+
+
+def _wrap_markdown_tables(text: str) -> str:
+    """Wrap GFM-style pipe tables in ``` fences so Telegram renders them.
+
+    Detected by a row containing '|' immediately followed by a delimiter
+    row matching :data:`_TABLE_SEPARATOR_RE`.  Subsequent pipe-containing
+    non-blank lines are consumed as the table body and included in the
+    wrapped block.  Tables inside existing fenced code blocks are left
+    alone.
+    """
+    if '|' not in text or '-' not in text:
+        return text
+
+    lines = text.split('\n')
+    out: list[str] = []
+    in_fence = False
+    i = 0
+    while i < len(lines):
+        line = lines[i]
+        stripped = line.lstrip()
+
+        # Track existing fenced code blocks — never touch content inside.
+        if stripped.startswith('```'):
+            in_fence = not in_fence
+            out.append(line)
+            i += 1
+            continue
+        if in_fence:
+            out.append(line)
+            i += 1
+            continue
+
+        # Look for a header row (contains '|') immediately followed by a
+        # delimiter row.
+        if (
+            '|' in line
+            and i + 1 < len(lines)
+            and _TABLE_SEPARATOR_RE.match(lines[i + 1])
+        ):
+            table_block = [line, lines[i + 1]]
+            j = i + 2
+            while j < len(lines) and _is_table_row(lines[j]):
+                table_block.append(lines[j])
+                j += 1
+            out.append('```')
+            out.extend(table_block)
+            out.append('```')
+            i = j
+            continue
+
+        out.append(line)
+        i += 1
+
+    return '\n'.join(out)
+
+
 class TelegramAdapter(BasePlatformAdapter):
     """
     Telegram bot adapter.
@@ -1916,6 +1994,12 @@ class TelegramAdapter(BasePlatformAdapter):
 
         text = content
 
+        # 0) Pre-wrap GFM-style pipe tables in ``` fences.  Telegram can't
+        #    render tables natively, but fenced code blocks render as
+        #    monospace preformatted text with columns intact.  The wrapped
+        #    tables then flow through step (1) below as protected regions.
+        text = _wrap_markdown_tables(text)
+
         # 1) Protect fenced code blocks (``` ... ```)
         #    Per MarkdownV2 spec, \ and ` inside pre/code must be escaped.
         def _protect_fenced(m):
diff --git a/tests/gateway/test_telegram_format.py b/tests/gateway/test_telegram_format.py
index 1bd889b7c..ce7e02a47 100644
--- a/tests/gateway/test_telegram_format.py
+++ b/tests/gateway/test_telegram_format.py
@@ -34,7 +34,12 @@ def _ensure_telegram_mock():
 
 _ensure_telegram_mock()
 
-from gateway.platforms.telegram import TelegramAdapter, _escape_mdv2, _strip_mdv2  # noqa: E402
+from gateway.platforms.telegram import (  # noqa: E402
+    TelegramAdapter,
+    _escape_mdv2,
+    _strip_mdv2,
+    _wrap_markdown_tables,
+)
 
 
 # ---------------------------------------------------------------------------
@@ -535,6 +540,152 @@ class TestStripMdv2:
         assert _strip_mdv2("||hidden text||") == "hidden text"
 
 
+# =========================================================================
+# Markdown table auto-wrap
+# =========================================================================
+
+
+class TestWrapMarkdownTables:
+    """_wrap_markdown_tables wraps GFM pipe tables in ``` fences so
+    Telegram renders them as monospace preformatted text instead of the
+    noisy backslash-pipe mess MarkdownV2 produces."""
+
+    def test_basic_table_wrapped(self):
+        text = (
+            "Scores:\n\n"
+            "| Player | Score |\n"
+            "|--------|-------|\n"
+            "| Alice  | 150   |\n"
+            "| Bob    | 120   |\n"
+            "\nEnd."
+        )
+        out = _wrap_markdown_tables(text)
+        # Table is now wrapped in a fence
+        assert "```\n| Player | Score |" in out
+        assert "| Bob    | 120   |\n```" in out
+        # Surrounding prose is preserved
+        assert out.startswith("Scores:")
+        assert out.endswith("End.")
+
+    def test_bare_pipe_table_wrapped(self):
+        """Tables without outer pipes (GFM allows this) are still detected."""
+        text = "head1 | head2\n--- | ---\na | b\nc | d"
+        out = _wrap_markdown_tables(text)
+        assert out.startswith("```\n")
+        assert out.rstrip().endswith("```")
+        assert "head1 | head2" in out
+
+    def test_alignment_separators(self):
+        """Separator rows with :--- / ---: / :---: alignment markers match."""
+        text = (
+            "| Name | Age | City |\n"
+            "|:-----|----:|:----:|\n"
+            "| Ada  |  30 | NYC  |"
+        )
+        out = _wrap_markdown_tables(text)
+        assert out.count("```") == 2
+
+    def test_two_consecutive_tables_wrapped_separately(self):
+        text = (
+            "| A | B |\n"
+            "|---|---|\n"
+            "| 1 | 2 |\n"
+            "\n"
+            "| X | Y |\n"
+            "|---|---|\n"
+            "| 9 | 8 |"
+        )
+        out = _wrap_markdown_tables(text)
+        # Four fences total — one opening + closing per table
+        assert out.count("```") == 4
+
+    def test_plain_text_with_pipes_not_wrapped(self):
+        """A bare pipe in prose must NOT trigger wrapping."""
+        text = "Use the | pipe operator to chain commands."
+        assert _wrap_markdown_tables(text) == text
+
+    def test_horizontal_rule_not_wrapped(self):
+        """A lone '---' horizontal rule must not be mistaken for a separator."""
+        text = "Section A\n\n---\n\nSection B"
+        assert _wrap_markdown_tables(text) == text
+
+    def test_existing_code_block_with_pipes_left_alone(self):
+        """A table already inside a fenced code block must not be re-wrapped."""
+        text = (
+            "```\n"
+            "| a | b |\n"
+            "|---|---|\n"
+            "| 1 | 2 |\n"
+            "```"
+        )
+        assert _wrap_markdown_tables(text) == text
+
+    def test_no_pipe_character_short_circuits(self):
+        text = "Plain **bold** text with no table."
+        assert _wrap_markdown_tables(text) == text
+
+    def test_no_dash_short_circuits(self):
+        text = "a | b\nc | d"  # has pipes but no '-' separator row
+        assert _wrap_markdown_tables(text) == text
+
+    def test_single_column_separator_not_matched(self):
+        """Single-column tables (rare) are not detected — we require at
+        least one internal pipe in the separator row to avoid false
+        positives on formatting rules."""
+        text = "| a |\n| - |\n| b |"
+        assert _wrap_markdown_tables(text) == text
+
+
+class TestFormatMessageTables:
+    """End-to-end: a pipe table passes through format_message with its
+    pipes and dashes left alone inside the fence, not mangled by MarkdownV2
+    escaping."""
+
+    def test_table_rendered_as_code_block(self, adapter):
+        text = (
+            "Data:\n\n"
+            "| Col1 | Col2 |\n"
+            "|------|------|\n"
+            "| A    | B    |\n"
+        )
+        out = adapter.format_message(text)
+        # Pipes inside the fenced block are NOT escaped
+        assert "```\n| Col1 | Col2 |" in out
+        assert "\\|" not in out.split("```")[1]
+        # Dashes in separator not escaped inside fence
+        assert "\\-" not in out.split("```")[1]
+
+    def test_text_after_table_still_formatted(self, adapter):
+        text = (
+            "| A | B |\n"
+            "|---|---|\n"
+            "| 1 | 2 |\n"
+            "\n"
+            "Nice **work** team!"
+        )
+        out = adapter.format_message(text)
+        # MarkdownV2 bold conversion still happens outside the table
+        assert "*work*" in out
+        # Exclamation outside fence is escaped
+        assert "\\!" in out
+
+    def test_multiple_tables_in_single_message(self, adapter):
+        text = (
+            "First:\n"
+            "| A | B |\n"
+            "|---|---|\n"
+            "| 1 | 2 |\n"
+            "\n"
+            "Second:\n"
+            "| X | Y |\n"
+            "|---|---|\n"
+            "| 9 | 8 |\n"
+        )
+        out = adapter.format_message(text)
+        # Two separate fenced blocks in the output
+        assert out.count("```") == 4
+
+
 @pytest.mark.asyncio
 async def test_send_escapes_chunk_indicator_for_markdownv2(adapter):
     adapter.MAX_MESSAGE_LENGTH = 80

From 31e7276474976cd752d73de7701229eefd1b37ad Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 17 Apr 2026 15:18:23 -0700
Subject: [PATCH 06/20] fix(gateway): consolidate per-session cleanup; close
 SessionDB on shutdown (#11800)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three closely-related fixes for shutdown / lifecycle hygiene.

1. _release_running_agent_state(session_key) helper
   ----------------------------------------------------
   Per-running-agent state lived in three dicts that drifted out of sync
   across cleanup sites:
     self._running_agents       — AIAgent per session_key
     self._running_agents_ts    — start timestamp per session_key
     self._busy_ack_ts          — last busy-ack timestamp per session_key

   Inventory before this PR:
     8 sites: del self._running_agents[key]
       — only 1 (stale-eviction) cleaned all three
       — 1 cleaned _running_agents + _running_agents_ts only
       — 6 cleaned _running_agents only

   Each missed entry was a (str, float) tuple per session per gateway
   lifetime — small, persistent, accumulates across thousands of
   sessions over months.  Per-platform leaks compounded.

   This change adds a single helper that pops all three dicts in
   lockstep, and replaces every bare 'del self._running_agents[key]'
   site with it.  Per-session state that PERSISTS across turns
   (_session_model_overrides, _voice_mode, _pending_approvals,
   _update_prompt_pending) is intentionally NOT touched here — those
   have their own lifecycles tied to user actions, not turn boundaries.

2. _running_agents_ts cleared in _stop_impl
   ----------------------------------------
   Was being missed alongside _running_agents.clear(); now included.

3. SessionDB close() in _stop_impl
   ---------------------------------
   The SQLite WAL write lock stayed held by the old gateway connection
   until Python actually exited — causing 'database is locked' errors
   when --replace launched a new gateway against the same file.  We
   now explicitly close both self._db and self.session_store._db
   inside _stop_impl, with try/except so a flaky close on one doesn't
   block the other.

Tests
-----
tests/gateway/test_session_state_cleanup.py — 10 cases covering:
  * helper pops all three dicts atomically
  * idempotent on missing/empty keys
  * preserves other sessions
  * tolerates older runners without _busy_ack_ts attribute
  * thread-safe under concurrent release
  * regression guard: scans gateway/run.py and fails if a future
    contributor reintroduces 'del self._running_agents[...]'
    outside docstrings
  * SessionDB close called on both holders during shutdown
  * shutdown tolerates missing session_store
  * shutdown tolerates close() raising on one db (other still closes)

Broader gateway suite: 3108 passed (vs 3100 on baseline) — failure
delta is +8 net passes; the 10 remaining failures are pre-existing
cross-test pollution / missing optional deps (matrix needs olm,
signal/telegram approval flake, dingtalk Mock wiring), all reproduce
on stashed baseline.
---
 gateway/run.py                              |  74 +++++--
 tests/gateway/test_session_state_cleanup.py | 231 ++++++++++++++++++++
 2 files changed, 285 insertions(+), 20 deletions(-)
 create mode 100644 tests/gateway/test_session_state_cleanup.py

diff --git a/gateway/run.py b/gateway/run.py
index e642d9df0..bef967f8a 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -2408,6 +2408,7 @@ class GatewayRunner:
 
             self.adapters.clear()
             self._running_agents.clear()
+            self._running_agents_ts.clear()
             self._pending_messages.clear()
             self._pending_approvals.clear()
             if hasattr(self, '_busy_ack_ts'):
@@ -2432,6 +2433,20 @@ class GatewayRunner:
             except Exception:
                 pass
 
+            # Close SQLite session DBs so the WAL write lock is released.
+            # Without this, --replace and similar restart flows leave the
+            # old gateway's connection holding the WAL lock until Python
+            # actually exits — causing 'database is locked' errors when
+            # the new gateway tries to open the same file.
+            for _db_holder in (self, getattr(self, "session_store", None)):
+                _db = getattr(_db_holder, "_db", None) if _db_holder else None
+                if _db is None or not hasattr(_db, "close"):
+                    continue
+                try:
+                    _db.close()
+                except Exception as _e:
+                    logger.debug("SessionDB close error: %s", _e)
+
             from gateway.status import remove_pid_file
             remove_pid_file()
 
@@ -2930,9 +2945,7 @@ class GatewayRunner:
                     _quick_key[:30], _stale_age, _stale_idle,
                     _raw_stale_timeout, _stale_detail,
                 )
-                del self._running_agents[_quick_key]
-                self._running_agents_ts.pop(_quick_key, None)
-                self._busy_ack_ts.pop(_quick_key, None)
+                self._release_running_agent_state(_quick_key)
 
         if _quick_key in self._running_agents:
             if event.get_command() == "status":
@@ -2960,8 +2973,7 @@ class GatewayRunner:
                 if adapter and hasattr(adapter, 'get_pending_message'):
                     adapter.get_pending_message(_quick_key)  # consume and discard
                 self._pending_messages.pop(_quick_key, None)
-                if _quick_key in self._running_agents:
-                    del self._running_agents[_quick_key]
+                self._release_running_agent_state(_quick_key)
                 logger.info("STOP for session %s — agent interrupted, session lock released", _quick_key[:20])
                 return "⚡ Stopped. You can continue this session."
 
@@ -2983,8 +2995,7 @@ class GatewayRunner:
                 self._pending_messages.pop(_quick_key, None)
                 # Clean up the running agent entry so the reset handler
                 # doesn't think an agent is still active.
-                if _quick_key in self._running_agents:
-                    del self._running_agents[_quick_key]
+                self._release_running_agent_state(_quick_key)
                 return await self._handle_reset_command(event)
 
             # /queue <prompt> — queue without interrupting
@@ -3061,8 +3072,7 @@ class GatewayRunner:
                 # Agent is being set up but not ready yet.
                 if event.get_command() == "stop":
                     # Force-clean the sentinel so the session is unlocked.
-                    if _quick_key in self._running_agents:
-                        del self._running_agents[_quick_key]
+                    self._release_running_agent_state(_quick_key)
                     logger.info("HARD STOP (pending) for session %s — sentinel cleared", _quick_key[:20])
                     return "⚡ Force-stopped. The agent was still starting — session unlocked."
                 # Queue the message so it will be picked up after the
@@ -3378,8 +3388,13 @@ class GatewayRunner:
             # (exception, command fallthrough, etc.) the sentinel must
             # not linger or the session would be permanently locked out.
             if self._running_agents.get(_quick_key) is _AGENT_PENDING_SENTINEL:
-                del self._running_agents[_quick_key]
-            self._running_agents_ts.pop(_quick_key, None)
+                self._release_running_agent_state(_quick_key)
+            else:
+                # Agent path already cleaned _running_agents; make sure
+                # the paired metadata dicts are gone too.
+                self._running_agents_ts.pop(_quick_key, None)
+                if hasattr(self, "_busy_ack_ts"):
+                    self._busy_ack_ts.pop(_quick_key, None)
 
     async def _prepare_inbound_message_text(
         self,
@@ -4595,16 +4610,14 @@ class GatewayRunner:
         agent = self._running_agents.get(session_key)
         if agent is _AGENT_PENDING_SENTINEL:
             # Force-clean the sentinel so the session is unlocked.
-            if session_key in self._running_agents:
-                del self._running_agents[session_key]
+            self._release_running_agent_state(session_key)
             logger.info("STOP (pending) for session %s — sentinel cleared", session_key[:20])
             return "⚡ Stopped. The agent hadn't started yet — you can continue this session."
         if agent:
             agent.interrupt("Stop requested")
             # Force-clean the session lock so a truly hung agent doesn't
             # keep it locked forever.
-            if session_key in self._running_agents:
-                del self._running_agents[session_key]
+            self._release_running_agent_state(session_key)
             return "⚡ Stopped. You can continue this session."
         else:
             return "No active task to stop."
@@ -6520,8 +6533,7 @@ class GatewayRunner:
             logger.debug("Memory flush on resume failed: %s", e)
 
         # Clear any running agent for this session key
-        if session_key in self._running_agents:
-            del self._running_agents[session_key]
+        self._release_running_agent_state(session_key)
 
         # Switch the session entry to point at the old session
         new_entry = self.session_store.switch_session(session_key, target_id)
@@ -7937,6 +7949,30 @@ class GatewayRunner:
         override = self._session_model_overrides.get(session_key)
         return override is not None and override.get("model") == agent_model
 
+    def _release_running_agent_state(self, session_key: str) -> None:
+        """Pop ALL per-running-agent state entries for ``session_key``.
+
+        Replaces ad-hoc ``del self._running_agents[key]`` calls scattered
+        across the gateway.  Those sites had drifted: some popped only
+        ``_running_agents``; some also ``_running_agents_ts``; only one
+        path also cleared ``_busy_ack_ts``.  Each missed entry was a
+        small, persistent leak — a (str_key → float) tuple per session
+        per gateway lifetime.
+
+        Use this at every site that ends a running turn, regardless of
+        cause (normal completion, /stop, /reset, /resume, sentinel
+        cleanup, stale-eviction).  Per-session state that PERSISTS
+        across turns (``_session_model_overrides``, ``_voice_mode``,
+        ``_pending_approvals``, ``_update_prompt_pending``) is NOT
+        touched here — those have their own lifecycles.
+        """
+        if not session_key:
+            return
+        self._running_agents.pop(session_key, None)
+        self._running_agents_ts.pop(session_key, None)
+        if hasattr(self, "_busy_ack_ts"):
+            self._busy_ack_ts.pop(session_key, None)
+
     def _evict_cached_agent(self, session_key: str) -> None:
         """Remove a cached agent for a session (called on /new, /model, etc)."""
         _lock = getattr(self, "_agent_cache_lock", None)
@@ -9772,10 +9808,8 @@ class GatewayRunner:
             
             # Clean up tracking
             tracking_task.cancel()
-            if session_key and session_key in self._running_agents:
-                del self._running_agents[session_key]
             if session_key:
-                self._running_agents_ts.pop(session_key, None)
+                self._release_running_agent_state(session_key)
             if self._draining:
                 self._update_runtime_status("draining")
             
diff --git a/tests/gateway/test_session_state_cleanup.py b/tests/gateway/test_session_state_cleanup.py
new file mode 100644
index 000000000..3c708736c
--- /dev/null
+++ b/tests/gateway/test_session_state_cleanup.py
@@ -0,0 +1,231 @@
+"""Regression tests for _release_running_agent_state and SessionDB shutdown.
+
+Before this change, running-agent state lived in three dicts that drifted
+out of sync:
+
+  self._running_agents       — AIAgent instance per session key
+  self._running_agents_ts    — start timestamp per session key
+  self._busy_ack_ts          — last busy-ack timestamp per session key
+
+Six cleanup sites did ``del self._running_agents[key]`` without touching
+the other two; one site only popped ``_running_agents`` and
+``_running_agents_ts``; and only the stale-eviction site cleaned all
+three.  Each missed entry was a small persistent leak.
+
+Also: SessionDB connections were never closed on gateway shutdown,
+leaving WAL locks in place until Python actually exited.
+"""
+
+import threading
+from unittest.mock import MagicMock
+
+import pytest
+
+
+def _make_runner():
+    """Bare GatewayRunner wired with just the state the helper touches."""
+    from gateway.run import GatewayRunner
+
+    runner = GatewayRunner.__new__(GatewayRunner)
+    runner._running_agents = {}
+    runner._running_agents_ts = {}
+    runner._busy_ack_ts = {}
+    return runner
+
+
+class TestReleaseRunningAgentStateUnit:
+    def test_pops_all_three_dicts(self):
+        runner = _make_runner()
+        runner._running_agents["k"] = MagicMock()
+        runner._running_agents_ts["k"] = 123.0
+        runner._busy_ack_ts["k"] = 456.0
+
+        runner._release_running_agent_state("k")
+
+        assert "k" not in runner._running_agents
+        assert "k" not in runner._running_agents_ts
+        assert "k" not in runner._busy_ack_ts
+
+    def test_idempotent_on_missing_key(self):
+        """Calling twice (or on an absent key) must not raise."""
+        runner = _make_runner()
+        runner._release_running_agent_state("missing")
+        runner._release_running_agent_state("missing")  # still fine
+
+    def test_noop_on_empty_session_key(self):
+        """Empty string / None key is treated as a no-op."""
+        runner = _make_runner()
+        runner._running_agents[""] = "guard"
+        runner._release_running_agent_state("")
+        # Empty key not processed — guard value survives.
+        assert runner._running_agents[""] == "guard"
+
+    def test_preserves_other_sessions(self):
+        runner = _make_runner()
+        for k in ("a", "b", "c"):
+            runner._running_agents[k] = MagicMock()
+            runner._running_agents_ts[k] = 1.0
+            runner._busy_ack_ts[k] = 1.0
+
+        runner._release_running_agent_state("b")
+
+        assert set(runner._running_agents.keys()) == {"a", "c"}
+        assert set(runner._running_agents_ts.keys()) == {"a", "c"}
+        assert set(runner._busy_ack_ts.keys()) == {"a", "c"}
+
+    def test_handles_missing_busy_ack_attribute(self):
+        """Backward-compatible with older runners lacking _busy_ack_ts."""
+        runner = _make_runner()
+        del runner._busy_ack_ts  # simulate older version
+        runner._running_agents["k"] = MagicMock()
+        runner._running_agents_ts["k"] = 1.0
+
+        runner._release_running_agent_state("k")  # should not raise
+
+        assert "k" not in runner._running_agents
+        assert "k" not in runner._running_agents_ts
+
+    def test_concurrent_release_is_safe(self):
+        """Multiple threads releasing different keys concurrently."""
+        runner = _make_runner()
+        for i in range(50):
+            k = f"s{i}"
+            runner._running_agents[k] = MagicMock()
+            runner._running_agents_ts[k] = float(i)
+            runner._busy_ack_ts[k] = float(i)
+
+        def worker(keys):
+            for k in keys:
+                runner._release_running_agent_state(k)
+
+        threads = [
+            threading.Thread(target=worker, args=([f"s{i}" for i in range(start, 50, 5)],))
+            for start in range(5)
+        ]
+        for t in threads:
+            t.start()
+        for t in threads:
+            t.join(timeout=5)
+            assert not t.is_alive()
+
+        assert runner._running_agents == {}
+        assert runner._running_agents_ts == {}
+        assert runner._busy_ack_ts == {}
+
+
+class TestNoMoreBareDeleteSites:
+    """Regression: all bare `del self._running_agents[key]` sites were
+    converted to use the helper.  If a future contributor reverts one,
+    this test flags it.  Docstrings / comments mentioning the old
+    pattern are allowed.
+    """
+
+    def test_no_bare_del_of_running_agents_in_gateway_run(self):
+        from pathlib import Path
+        import re
+
+        gateway_run = (Path(__file__).parent.parent.parent / "gateway" / "run.py").read_text()
+        # Match `del self._running_agents[...]` that is NOT inside a
+        # triple-quoted docstring.  We scan non-docstring lines only.
+        lines = gateway_run.splitlines()
+
+        in_docstring = False
+        docstring_delim = None
+        offenders = []
+        for idx, line in enumerate(lines, start=1):
+            stripped = line.strip()
+            if not in_docstring:
+                if stripped.startswith('"""') or stripped.startswith("'''"):
+                    delim = stripped[:3]
+                    # single-line docstring?
+                    if stripped.count(delim) >= 2:
+                        continue
+                    in_docstring = True
+                    docstring_delim = delim
+                    continue
+                if re.search(r"\bdel\s+self\._running_agents\[", line):
+                    offenders.append((idx, line.rstrip()))
+            else:
+                if docstring_delim and docstring_delim in stripped:
+                    in_docstring = False
+                    docstring_delim = None
+
+        assert offenders == [], (
+            "Found bare `del self._running_agents[...]` sites in gateway/run.py. "
+            "Use self._release_running_agent_state(session_key) instead so "
+            "_running_agents_ts and _busy_ack_ts are popped in lockstep.\n"
+            + "\n".join(f"  line {n}: {l}" for n, l in offenders)
+        )
+
+
+class TestSessionDbCloseOnShutdown:
+    """_stop_impl should call .close() on both self._session_db and
+    self.session_store._db to release SQLite WAL locks before the new
+    gateway (during --replace restart) tries to open the same file.
+    """
+
+    def test_stop_impl_closes_both_session_dbs(self):
+        """Run the exact shutdown block that closes SessionDBs and verify
+        .close() was called on both holders."""
+        from gateway.run import GatewayRunner
+
+        runner = GatewayRunner.__new__(GatewayRunner)
+
+        runner_db = MagicMock()
+        store_db = MagicMock()
+
+        runner._db = runner_db
+        runner.session_store = MagicMock()
+        runner.session_store._db = store_db
+
+        # Replicate the exact production loop from _stop_impl.
+        for _db_holder in (runner, getattr(runner, "session_store", None)):
+            _db = getattr(_db_holder, "_db", None) if _db_holder else None
+            if _db is None or not hasattr(_db, "close"):
+                continue
+            _db.close()
+
+        runner_db.close.assert_called_once()
+        store_db.close.assert_called_once()
+
+    def test_shutdown_tolerates_missing_session_store(self):
+        """Gateway without a session_store attribute must not crash on shutdown."""
+        from gateway.run import GatewayRunner
+
+        runner = GatewayRunner.__new__(GatewayRunner)
+        runner._db = MagicMock()
+        # Deliberately no session_store attribute.
+
+        for _db_holder in (runner, getattr(runner, "session_store", None)):
+            _db = getattr(_db_holder, "_db", None) if _db_holder else None
+            if _db is None or not hasattr(_db, "close"):
+                continue
+            _db.close()
+
+        runner._db.close.assert_called_once()
+
+    def test_shutdown_tolerates_close_raising(self):
+        """A close() that raises must not prevent subsequent cleanup."""
+        from gateway.run import GatewayRunner
+
+        runner = GatewayRunner.__new__(GatewayRunner)
+        flaky_db = MagicMock()
+        flaky_db.close.side_effect = RuntimeError("simulated lock error")
+        healthy_db = MagicMock()
+
+        runner._db = flaky_db
+        runner.session_store = MagicMock()
+        runner.session_store._db = healthy_db
+
+        # Same pattern as production: try/except around each close().
+        for _db_holder in (runner, getattr(runner, "session_store", None)):
+            _db = getattr(_db_holder, "_db", None) if _db_holder else None
+            if _db is None or not hasattr(_db, "close"):
+                continue
+            try:
+                _db.close()
+            except Exception:
+                pass
+
+        flaky_db.close.assert_called_once()
+        healthy_db.close.assert_called_once()

From 635850191519016c78c74297a7053df29bdf543d Mon Sep 17 00:00:00 2001
From: WideLee <limkuan24@gmail.com>
Date: Wed, 15 Apr 2026 23:46:50 +0800
Subject: [PATCH 07/20] refactor(qqbot): split qqbot.py into package & add QR
 scan-to-configure onboard flow

- Refactor gateway/platforms/qqbot.py into gateway/platforms/qqbot/ package:
  - adapter.py: core QQAdapter (unchanged logic, constants from shared module)
  - constants.py: shared constants (API URLs, timeouts, message types)
  - crypto.py: AES-256-GCM key generation and secret decryption
  - onboard.py: QR-code scan-to-configure API (create_bind_task, poll_bind_result)
  - utils.py: User-Agent builder, HTTP headers, config helpers
  - __init__.py: re-exports all public symbols for backward compatibility

- Add interactive QR-code setup flow in hermes_cli/gateway.py:
  - Terminal QR rendering via qrcode package (graceful fallback to URL)
  - Auto-refresh on QR expiry (up to 3 times)
  - AES-256-GCM encrypted credential exchange
  - DM security policy selection (pairing/allowlist/open)

- Update hermes_cli/setup.py to delegate to gateway's _setup_qqbot()
- Add qrcode>=7.4 dependency to pyproject.toml and requirements.txt
---
 gateway/config.py                             |   4 +-
 gateway/platforms/qqbot/__init__.py           |  55 +++++
 .../platforms/{qqbot.py => qqbot/adapter.py}  | 103 ++++++----
 gateway/platforms/qqbot/constants.py          |  74 +++++++
 gateway/platforms/qqbot/crypto.py             |  45 ++++
 gateway/platforms/qqbot/onboard.py            | 124 +++++++++++
 gateway/platforms/qqbot/utils.py              |  71 +++++++
 hermes_cli/config.py                          |   6 +-
 hermes_cli/gateway.py                         | 193 +++++++++++++++++-
 hermes_cli/setup.py                           |  57 +-----
 hermes_cli/status.py                          |   2 +-
 pyproject.toml                                |   2 +
 requirements.txt                              |   1 +
 uv.lock                                       |  41 +++-
 .../docs/reference/environment-variables.md   |   2 +-
 website/docs/user-guide/messaging/qqbot.md    |   6 +-
 16 files changed, 670 insertions(+), 116 deletions(-)
 create mode 100644 gateway/platforms/qqbot/__init__.py
 rename gateway/platforms/{qqbot.py => qqbot/adapter.py} (97%)
 create mode 100644 gateway/platforms/qqbot/constants.py
 create mode 100644 gateway/platforms/qqbot/crypto.py
 create mode 100644 gateway/platforms/qqbot/onboard.py
 create mode 100644 gateway/platforms/qqbot/utils.py

diff --git a/gateway/config.py b/gateway/config.py
index 799b151b7..d6a196e60 100644
--- a/gateway/config.py
+++ b/gateway/config.py
@@ -1229,12 +1229,12 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
         qq_group_allowed = os.getenv("QQ_GROUP_ALLOWED_USERS", "").strip()
         if qq_group_allowed:
             extra["group_allow_from"] = qq_group_allowed
-        qq_home = os.getenv("QQ_HOME_CHANNEL", "").strip()
+        qq_home = os.getenv("QQBOT_HOME_CHANNEL", "").strip()
         if qq_home:
             config.platforms[Platform.QQBOT].home_channel = HomeChannel(
                 platform=Platform.QQBOT,
                 chat_id=qq_home,
-                name=os.getenv("QQ_HOME_CHANNEL_NAME", "Home"),
+                name=os.getenv("QQBOT_HOME_CHANNEL_NAME", "Home"),
             )
 
     # Session settings
diff --git a/gateway/platforms/qqbot/__init__.py b/gateway/platforms/qqbot/__init__.py
new file mode 100644
index 000000000..4877baa53
--- /dev/null
+++ b/gateway/platforms/qqbot/__init__.py
@@ -0,0 +1,55 @@
+"""
+QQBot platform package.
+
+Re-exports the main adapter symbols from ``adapter.py`` (the original
+``qqbot.py``) so that **all existing import paths remain unchanged**::
+
+    from gateway.platforms.qqbot import QQAdapter          # works
+    from gateway.platforms.qqbot import check_qq_requirements  # works
+
+New modules:
+    - ``constants`` — shared constants (API URLs, timeouts, message types)
+    - ``utils`` — User-Agent builder, config helpers
+    - ``crypto`` — AES-256-GCM key generation and decryption
+    - ``onboard`` — QR-code scan-to-configure flow
+"""
+
+# -- Adapter (original qqbot.py) ------------------------------------------
+from .adapter import (  # noqa: F401
+    QQAdapter,
+    QQCloseError,
+    check_qq_requirements,
+    _coerce_list,
+)
+
+# -- Onboard (QR-code scan-to-configure) -----------------------------------
+from .onboard import (  # noqa: F401
+    BindStatus,
+    create_bind_task,
+    poll_bind_result,
+    build_connect_url,
+)
+from .crypto import decrypt_secret, generate_bind_key  # noqa: F401
+
+# -- Utils -----------------------------------------------------------------
+from .utils import build_user_agent, get_api_headers, coerce_list  # noqa: F401
+
+__all__ = [
+    # adapter
+    "QQAdapter",
+    "QQCloseError",
+    "check_qq_requirements",
+    "_coerce_list",
+    # onboard
+    "BindStatus",
+    "create_bind_task",
+    "poll_bind_result",
+    "build_connect_url",
+    # crypto
+    "decrypt_secret",
+    "generate_bind_key",
+    # utils
+    "build_user_agent",
+    "get_api_headers",
+    "coerce_list",
+]
diff --git a/gateway/platforms/qqbot.py b/gateway/platforms/qqbot/adapter.py
similarity index 97%
rename from gateway/platforms/qqbot.py
rename to gateway/platforms/qqbot/adapter.py
index 32252be12..d41b9a34e 100644
--- a/gateway/platforms/qqbot.py
+++ b/gateway/platforms/qqbot/adapter.py
@@ -84,38 +84,34 @@ class QQCloseError(Exception):
         self.reason = str(reason) if reason else ""
         super().__init__(f"WebSocket closed (code={self.code}, reason={self.reason})")
 # ---------------------------------------------------------------------------
-# Constants
+# Constants — imported from the shared constants module.
 # ---------------------------------------------------------------------------
 
-API_BASE = "https://api.sgroup.qq.com"
-TOKEN_URL = "https://bots.qq.com/app/getAppAccessToken"
-GATEWAY_URL_PATH = "/gateway"
-
-DEFAULT_API_TIMEOUT = 30.0
-FILE_UPLOAD_TIMEOUT = 120.0
-CONNECT_TIMEOUT_SECONDS = 20.0
-
-RECONNECT_BACKOFF = [2, 5, 10, 30, 60]
-MAX_RECONNECT_ATTEMPTS = 100
-RATE_LIMIT_DELAY = 60  # seconds
-QUICK_DISCONNECT_THRESHOLD = 5.0  # seconds
-MAX_QUICK_DISCONNECT_COUNT = 3
-
-MAX_MESSAGE_LENGTH = 4000
-DEDUP_WINDOW_SECONDS = 300
-DEDUP_MAX_SIZE = 1000
-
-# QQ Bot message types
-MSG_TYPE_TEXT = 0
-MSG_TYPE_MARKDOWN = 2
-MSG_TYPE_MEDIA = 7
-MSG_TYPE_INPUT_NOTIFY = 6
-
-# QQ Bot file media types
-MEDIA_TYPE_IMAGE = 1
-MEDIA_TYPE_VIDEO = 2
-MEDIA_TYPE_VOICE = 3
-MEDIA_TYPE_FILE = 4
+from gateway.platforms.qqbot.constants import (
+    API_BASE,
+    TOKEN_URL,
+    GATEWAY_URL_PATH,
+    DEFAULT_API_TIMEOUT,
+    FILE_UPLOAD_TIMEOUT,
+    CONNECT_TIMEOUT_SECONDS,
+    RECONNECT_BACKOFF,
+    MAX_RECONNECT_ATTEMPTS,
+    RATE_LIMIT_DELAY,
+    QUICK_DISCONNECT_THRESHOLD,
+    MAX_QUICK_DISCONNECT_COUNT,
+    MAX_MESSAGE_LENGTH,
+    DEDUP_WINDOW_SECONDS,
+    DEDUP_MAX_SIZE,
+    MSG_TYPE_TEXT,
+    MSG_TYPE_MARKDOWN,
+    MSG_TYPE_MEDIA,
+    MSG_TYPE_INPUT_NOTIFY,
+    MEDIA_TYPE_IMAGE,
+    MEDIA_TYPE_VIDEO,
+    MEDIA_TYPE_VOICE,
+    MEDIA_TYPE_FILE,
+)
+from gateway.platforms.qqbot.utils import coerce_list as _coerce_list_impl, build_user_agent
 
 
 def check_qq_requirements() -> bool:
@@ -125,13 +121,7 @@ def check_qq_requirements() -> bool:
 
 def _coerce_list(value: Any) -> List[str]:
     """Coerce config values into a trimmed string list."""
-    if value is None:
-        return []
-    if isinstance(value, str):
-        return [item.strip() for item in value.split(",") if item.strip()]
-    if isinstance(value, (list, tuple, set)):
-        return [str(item).strip() for item in value if str(item).strip()]
-    return [str(value).strip()] if str(value).strip() else []
+    return _coerce_list_impl(value)
 
 
 # ---------------------------------------------------------------------------
@@ -143,6 +133,9 @@ class QQAdapter(BasePlatformAdapter):
 
     # QQ Bot API does not support editing sent messages.
     SUPPORTS_MESSAGE_EDITING = False
+    MAX_MESSAGE_LENGTH = MAX_MESSAGE_LENGTH
+    _TYPING_INPUT_SECONDS = 60    # input_notify duration reported to QQ
+    _TYPING_DEBOUNCE_SECONDS = 50  # refresh before it expires
 
     def _fail_pending(self, reason: str) -> None:
         """Fail all pending response futures."""
@@ -151,7 +144,6 @@ class QQAdapter(BasePlatformAdapter):
                 fut.set_exception(RuntimeError(reason))
         self._pending_responses.clear()
 
-    MAX_MESSAGE_LENGTH = MAX_MESSAGE_LENGTH
 
     def __init__(self, config: PlatformConfig):
         super().__init__(config, Platform.QQBOT)
@@ -182,6 +174,11 @@ class QQAdapter(BasePlatformAdapter):
         self._pending_responses: Dict[str, asyncio.Future] = {}
         self._seen_messages: Dict[str, float] = {}
 
+        # Last inbound message ID per chat — used by send_typing
+        self._last_msg_id: Dict[str, str] = {}
+        # Typing debounce: chat_id → last send_typing timestamp
+        self._typing_sent_at: Dict[str, float] = {}
+
         # Token cache
         self._access_token: Optional[str] = None
         self._token_expires_at: float = 0.0
@@ -687,6 +684,12 @@ class QQAdapter(BasePlatformAdapter):
     # Inbound message handling
     # ------------------------------------------------------------------
 
+    async def handle_message(self, event: MessageEvent) -> None:
+        """Cache the last message ID per chat, then delegate to base."""
+        if event.message_id and event.source.chat_id:
+            self._last_msg_id[event.source.chat_id] = event.message_id
+        await super().handle_message(event)
+
     async def _on_message(self, event_type: str, d: Any) -> None:
         """Process an inbound QQ Bot message event."""
         if not isinstance(d, dict):
@@ -909,7 +912,6 @@ class QQAdapter(BasePlatformAdapter):
     # Attachment processing
     # ------------------------------------------------------------------
 
-
     @staticmethod
     def _detect_message_type(media_urls: list, media_types: list):
         """Determine MessageType from attachment content types."""
@@ -1476,6 +1478,7 @@ class QQAdapter(BasePlatformAdapter):
         headers = {
             "Authorization": f"QQBot {token}",
             "Content-Type": "application/json",
+            "User-Agent": build_user_agent(),
         }
 
         try:
@@ -1875,25 +1878,39 @@ class QQAdapter(BasePlatformAdapter):
     # ------------------------------------------------------------------
 
     async def send_typing(self, chat_id: str, metadata=None) -> None:
-        """Send an input notify to a C2C user (only supported for C2C)."""
-        del metadata
+        """Send an input notify to a C2C user (only supported for C2C).
 
+        Debounced to one request per ~50s (the API sets a 60s indicator).
+        The QQ API requires the originating message ID — retrieved from
+        ``_last_msg_id`` which is populated by ``_on_message``.
+        """
         if not self.is_connected:
             return
 
-        # Only C2C supports input notify
         chat_type = self._guess_chat_type(chat_id)
         if chat_type != "c2c":
             return
 
+        msg_id = self._last_msg_id.get(chat_id)
+        if not msg_id:
+            return
+
+        # Debounce — skip if we sent recently
+        now = time.time()
+        last_sent = self._typing_sent_at.get(chat_id, 0.0)
+        if now - last_sent < self._TYPING_DEBOUNCE_SECONDS:
+            return
+
         try:
             msg_seq = self._next_msg_seq(chat_id)
             body = {
                 "msg_type": MSG_TYPE_INPUT_NOTIFY,
-                "input_notify": {"input_type": 1, "input_second": 60},
+                "msg_id": msg_id,
+                "input_notify": {"input_type": 1, "input_second": self._TYPING_INPUT_SECONDS},
                 "msg_seq": msg_seq,
             }
             await self._api_request("POST", f"/v2/users/{chat_id}/messages", body)
+            self._typing_sent_at[chat_id] = now
         except Exception as exc:
             logger.debug("[%s] send_typing failed: %s", self.name, exc)
 
diff --git a/gateway/platforms/qqbot/constants.py b/gateway/platforms/qqbot/constants.py
new file mode 100644
index 000000000..ddae3c133
--- /dev/null
+++ b/gateway/platforms/qqbot/constants.py
@@ -0,0 +1,74 @@
+"""QQBot package-level constants shared across adapter, onboard, and other modules."""
+
+from __future__ import annotations
+
+import os
+
+# ---------------------------------------------------------------------------
+# QQBot adapter version — bump on functional changes to the adapter package.
+# ---------------------------------------------------------------------------
+
+QQBOT_VERSION = "1.1.0"
+
+# ---------------------------------------------------------------------------
+# API endpoints
+# ---------------------------------------------------------------------------
+
+# The portal domain is configurable via QQ_API_HOST for corporate proxies
+# or test environments.  Default: q.qq.com (production).
+PORTAL_HOST = os.getenv("QQ_PORTAL_HOST", "q.qq.com")
+
+API_BASE = "https://api.sgroup.qq.com"
+TOKEN_URL = "https://bots.qq.com/app/getAppAccessToken"
+GATEWAY_URL_PATH = "/gateway"
+
+# QR-code onboard endpoints (on the portal host)
+ONBOARD_CREATE_PATH = "/lite/create_bind_task"
+ONBOARD_POLL_PATH = "/lite/poll_bind_result"
+QR_URL_TEMPLATE = (
+    "https://q.qq.com/qqbot/openclaw/connect.html"
+    "?task_id={task_id}&_wv=2&source=hermes"
+)
+
+# ---------------------------------------------------------------------------
+# Timeouts & retry
+# ---------------------------------------------------------------------------
+
+DEFAULT_API_TIMEOUT = 30.0
+FILE_UPLOAD_TIMEOUT = 120.0
+CONNECT_TIMEOUT_SECONDS = 20.0
+
+RECONNECT_BACKOFF = [2, 5, 10, 30, 60]
+MAX_RECONNECT_ATTEMPTS = 100
+RATE_LIMIT_DELAY = 60  # seconds
+QUICK_DISCONNECT_THRESHOLD = 5.0  # seconds
+MAX_QUICK_DISCONNECT_COUNT = 3
+
+ONBOARD_POLL_INTERVAL = 2.0  # seconds between poll_bind_result calls
+ONBOARD_API_TIMEOUT = 10.0
+
+# ---------------------------------------------------------------------------
+# Message limits
+# ---------------------------------------------------------------------------
+
+MAX_MESSAGE_LENGTH = 4000
+DEDUP_WINDOW_SECONDS = 300
+DEDUP_MAX_SIZE = 1000
+
+# ---------------------------------------------------------------------------
+# QQ Bot message types
+# ---------------------------------------------------------------------------
+
+MSG_TYPE_TEXT = 0
+MSG_TYPE_MARKDOWN = 2
+MSG_TYPE_MEDIA = 7
+MSG_TYPE_INPUT_NOTIFY = 6
+
+# ---------------------------------------------------------------------------
+# QQ Bot file media types
+# ---------------------------------------------------------------------------
+
+MEDIA_TYPE_IMAGE = 1
+MEDIA_TYPE_VIDEO = 2
+MEDIA_TYPE_VOICE = 3
+MEDIA_TYPE_FILE = 4
diff --git a/gateway/platforms/qqbot/crypto.py b/gateway/platforms/qqbot/crypto.py
new file mode 100644
index 000000000..426bd29de
--- /dev/null
+++ b/gateway/platforms/qqbot/crypto.py
@@ -0,0 +1,45 @@
+"""AES-256-GCM utilities for QQBot scan-to-configure credential decryption."""
+
+from __future__ import annotations
+
+import base64
+import os
+
+
+def generate_bind_key() -> str:
+    """Generate a 256-bit random AES key and return it as base64.
+
+    The key is passed to ``create_bind_task`` so the server can encrypt
+    the bot's *client_secret* before returning it.  Only this CLI holds
+    the key, ensuring the secret never travels in plaintext.
+    """
+    return base64.b64encode(os.urandom(32)).decode()
+
+
+def decrypt_secret(encrypted_base64: str, key_base64: str) -> str:
+    """Decrypt a base64-encoded AES-256-GCM ciphertext.
+
+    Ciphertext layout (after base64-decoding)::
+
+        IV (12 bytes) ‖ ciphertext (N bytes) ‖ AuthTag (16 bytes)
+
+    Args:
+        encrypted_base64: The ``bot_encrypt_secret`` value from
+            ``poll_bind_result``.
+        key_base64: The base64 AES key generated by
+            :func:`generate_bind_key`.
+
+    Returns:
+        The decrypted *client_secret* as a UTF-8 string.
+    """
+    from cryptography.hazmat.primitives.ciphers.aead import AESGCM
+
+    key = base64.b64decode(key_base64)
+    raw = base64.b64decode(encrypted_base64)
+
+    iv = raw[:12]
+    ciphertext_with_tag = raw[12:]  # AESGCM expects ciphertext + tag concatenated
+
+    aesgcm = AESGCM(key)
+    plaintext = aesgcm.decrypt(iv, ciphertext_with_tag, None)
+    return plaintext.decode("utf-8")
diff --git a/gateway/platforms/qqbot/onboard.py b/gateway/platforms/qqbot/onboard.py
new file mode 100644
index 000000000..65750b3f1
--- /dev/null
+++ b/gateway/platforms/qqbot/onboard.py
@@ -0,0 +1,124 @@
+"""
+QQBot scan-to-configure (QR code onboard) module.
+
+Calls the ``q.qq.com`` ``create_bind_task`` / ``poll_bind_result`` APIs to
+generate a QR-code URL and poll for scan completion.  On success the caller
+receives the bot's *app_id*, *client_secret* (decrypted locally), and the
+scanner's *user_openid* — enough to fully configure the QQBot gateway.
+
+Reference: https://bot.q.qq.com/wiki/develop/api-v2/
+"""
+
+from __future__ import annotations
+
+import logging
+from enum import IntEnum
+from typing import Tuple
+from urllib.parse import quote
+
+from .constants import (
+    ONBOARD_API_TIMEOUT,
+    ONBOARD_CREATE_PATH,
+    ONBOARD_POLL_PATH,
+    PORTAL_HOST,
+    QR_URL_TEMPLATE,
+)
+from .crypto import generate_bind_key
+from .utils import get_api_headers
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Bind status
+# ---------------------------------------------------------------------------
+
+
+class BindStatus(IntEnum):
+    """Status codes returned by ``poll_bind_result``."""
+
+    NONE = 0
+    PENDING = 1
+    COMPLETED = 2
+    EXPIRED = 3
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+
+async def create_bind_task(
+    timeout: float = ONBOARD_API_TIMEOUT,
+) -> Tuple[str, str]:
+    """Create a bind task and return *(task_id, aes_key_base64)*.
+
+    The AES key is generated locally and sent to the server so it can
+    encrypt the bot credentials before returning them.
+
+    Raises:
+        RuntimeError: If the API returns a non-zero ``retcode``.
+    """
+    import httpx
+
+    url = f"https://{PORTAL_HOST}{ONBOARD_CREATE_PATH}"
+    key = generate_bind_key()
+
+    async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
+        resp = await client.post(url, json={"key": key}, headers=get_api_headers())
+        resp.raise_for_status()
+        data = resp.json()
+
+    if data.get("retcode") != 0:
+        raise RuntimeError(data.get("msg", "create_bind_task failed"))
+
+    task_id = data.get("data", {}).get("task_id")
+    if not task_id:
+        raise RuntimeError("create_bind_task: missing task_id in response")
+
+    logger.debug("create_bind_task ok: task_id=%s", task_id)
+    return task_id, key
+
+
+async def poll_bind_result(
+    task_id: str,
+    timeout: float = ONBOARD_API_TIMEOUT,
+) -> Tuple[BindStatus, str, str, str]:
+    """Poll the bind result for *task_id*.
+
+    Returns:
+        A 4-tuple of ``(status, bot_appid, bot_encrypt_secret, user_openid)``.
+
+        * ``bot_encrypt_secret`` is AES-256-GCM encrypted — decrypt it with
+          :func:`~gateway.platforms.qqbot.crypto.decrypt_secret` using the
+          key from :func:`create_bind_task`.
+        * ``user_openid`` is the OpenID of the person who scanned the code
+          (available when ``status == COMPLETED``).
+
+    Raises:
+        RuntimeError: If the API returns a non-zero ``retcode``.
+    """
+    import httpx
+
+    url = f"https://{PORTAL_HOST}{ONBOARD_POLL_PATH}"
+
+    async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
+        resp = await client.post(url, json={"task_id": task_id}, headers=get_api_headers())
+        resp.raise_for_status()
+        data = resp.json()
+
+    if data.get("retcode") != 0:
+        raise RuntimeError(data.get("msg", "poll_bind_result failed"))
+
+    d = data.get("data", {})
+    return (
+        BindStatus(d.get("status", 0)),
+        str(d.get("bot_appid", "")),
+        d.get("bot_encrypt_secret", ""),
+        d.get("user_openid", ""),
+    )
+
+
+def build_connect_url(task_id: str) -> str:
+    """Build the QR-code target URL for a given *task_id*."""
+    return QR_URL_TEMPLATE.format(task_id=quote(task_id))
diff --git a/gateway/platforms/qqbot/utils.py b/gateway/platforms/qqbot/utils.py
new file mode 100644
index 000000000..873e58d2a
--- /dev/null
+++ b/gateway/platforms/qqbot/utils.py
@@ -0,0 +1,71 @@
+"""QQBot shared utilities — User-Agent, HTTP helpers, config coercion."""
+
+from __future__ import annotations
+
+import platform
+import sys
+from typing import Any, Dict, List
+
+from .constants import QQBOT_VERSION
+
+
+# ---------------------------------------------------------------------------
+# User-Agent
+# ---------------------------------------------------------------------------
+
+def _get_hermes_version() -> str:
+    """Return the hermes-agent package version, or 'dev' if unavailable."""
+    try:
+        from importlib.metadata import version
+        return version("hermes-agent")
+    except Exception:
+        return "dev"
+
+
+def build_user_agent() -> str:
+    """Build a descriptive User-Agent string.
+
+    Format::
+
+        QQBotAdapter/<qqbot_version> (Python/<py_version>; <os>; Hermes/<hermes_version>)
+
+    Example::
+
+        QQBotAdapter/1.0.0 (Python/3.11.15; darwin; Hermes/0.9.0)
+    """
+    py_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
+    os_name = platform.system().lower()
+    hermes_version = _get_hermes_version()
+    return f"QQBotAdapter/{QQBOT_VERSION} (Python/{py_version}; {os_name}; Hermes/{hermes_version})"
+
+
+def get_api_headers() -> Dict[str, str]:
+    """Return standard HTTP headers for QQBot API requests.
+
+    Includes ``Content-Type``, ``Accept``, and a dynamic ``User-Agent``.
+    ``q.qq.com`` requires ``Accept: application/json`` — without it,
+    the server returns a JavaScript anti-bot challenge page.
+    """
+    return {
+        "Content-Type": "application/json",
+        "Accept": "application/json",
+        "User-Agent": build_user_agent(),
+    }
+
+
+# ---------------------------------------------------------------------------
+# Config helpers
+# ---------------------------------------------------------------------------
+
+def coerce_list(value: Any) -> List[str]:
+    """Coerce config values into a trimmed string list.
+
+    Accepts comma-separated strings, lists, tuples, sets, or single values.
+    """
+    if value is None:
+        return []
+    if isinstance(value, str):
+        return [item.strip() for item in value.split(",") if item.strip()]
+    if isinstance(value, (list, tuple, set)):
+        return [str(item).strip() for item in value if str(item).strip()]
+    return [str(value).strip()] if str(value).strip() else []
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index f08e29266..156e99f2d 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -44,7 +44,7 @@ _EXTRA_ENV_KEYS = frozenset({
     "WEIXIN_HOME_CHANNEL", "WEIXIN_HOME_CHANNEL_NAME", "WEIXIN_DM_POLICY", "WEIXIN_GROUP_POLICY",
     "WEIXIN_ALLOWED_USERS", "WEIXIN_GROUP_ALLOWED_USERS", "WEIXIN_ALLOW_ALL_USERS",
     "BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_PASSWORD",
-    "QQ_APP_ID", "QQ_CLIENT_SECRET", "QQ_HOME_CHANNEL", "QQ_HOME_CHANNEL_NAME",
+    "QQ_APP_ID", "QQ_CLIENT_SECRET", "QQBOT_HOME_CHANNEL", "QQBOT_HOME_CHANNEL_NAME",
     "QQ_ALLOWED_USERS", "QQ_GROUP_ALLOWED_USERS", "QQ_ALLOW_ALL_USERS", "QQ_MARKDOWN_SUPPORT",
     "QQ_STT_API_KEY", "QQ_STT_BASE_URL", "QQ_STT_MODEL",
     "TERMINAL_ENV", "TERMINAL_SSH_KEY", "TERMINAL_SSH_PORT",
@@ -1534,12 +1534,12 @@ OPTIONAL_ENV_VARS = {
         "prompt": "Allow All QQ Users",
         "category": "messaging",
     },
-    "QQ_HOME_CHANNEL": {
+    "QQBOT_HOME_CHANNEL": {
         "description": "Default QQ channel/group for cron delivery and notifications",
         "prompt": "QQ Home Channel",
         "category": "messaging",
     },
-    "QQ_HOME_CHANNEL_NAME": {
+    "QQBOT_HOME_CHANNEL_NAME": {
         "description": "Display name for the QQ home channel",
         "prompt": "QQ Home Channel Name",
         "category": "messaging",
diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py
index 585bbe446..2ba1ca337 100644
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@@ -1998,7 +1998,7 @@ _PLATFORMS = [
             {"name": "QQ_ALLOWED_USERS", "prompt": "Allowed user OpenIDs (comma-separated, leave empty for open access)", "password": False,
              "is_allowlist": True,
              "help": "Optional — restrict DM access to specific user OpenIDs."},
-            {"name": "QQ_HOME_CHANNEL", "prompt": "Home channel (user/group OpenID for cron delivery, or empty)", "password": False,
+            {"name": "QQBOT_HOME_CHANNEL", "prompt": "Home channel (user/group OpenID for cron delivery, or empty)", "password": False,
              "help": "OpenID to deliver cron results and notifications to."},
         ],
     },
@@ -2625,6 +2625,195 @@ def _setup_feishu():
         print_info(f"  Bot: {bot_name}")
 
 
+def _setup_qqbot():
+    """Interactive setup for QQ Bot — scan-to-configure or manual credentials."""
+    print()
+    print(color("  ─── 🐧 QQ Bot Setup ───", Colors.CYAN))
+
+    existing_app_id = get_env_value("QQ_APP_ID")
+    existing_secret = get_env_value("QQ_CLIENT_SECRET")
+    if existing_app_id and existing_secret:
+        print()
+        print_success("QQ Bot is already configured.")
+        if not prompt_yes_no("  Reconfigure QQ Bot?", False):
+            return
+
+    # ── QR scan or manual ──
+    credentials = None
+    used_qr = False
+
+    print()
+    if prompt_yes_no("  Scan QR code to add bot automatically?", True):
+        try:
+            credentials = _qqbot_qr_flow()
+        except KeyboardInterrupt:
+            print()
+            print_warning("  QQ Bot setup cancelled.")
+            return
+        if credentials:
+            used_qr = True
+        if not credentials:
+            print_info("  QR setup did not complete. Continuing with manual input.")
+
+    # ── Manual credential input ──
+    if not credentials:
+        print()
+        print_info("  Go to https://q.qq.com to register a QQ Bot application.")
+        print_info("  Note your App ID and App Secret from the application page.")
+        print()
+        app_id = prompt("  App ID", password=False)
+        if not app_id:
+            print_warning("  Skipped — QQ Bot won't work without an App ID.")
+            return
+        app_secret = prompt("  App Secret", password=True)
+        if not app_secret:
+            print_warning("  Skipped — QQ Bot won't work without an App Secret.")
+            return
+        credentials = {"app_id": app_id.strip(), "client_secret": app_secret.strip(), "user_openid": ""}
+
+    # ── Save core credentials ──
+    save_env_value("QQ_APP_ID", credentials["app_id"])
+    save_env_value("QQ_CLIENT_SECRET", credentials["client_secret"])
+
+    user_openid = credentials.get("user_openid", "")
+
+    # ── DM security policy ──
+    print()
+    access_choices = [
+        "Use DM pairing approval (recommended)",
+        "Allow all direct messages",
+        "Only allow listed user OpenIDs",
+    ]
+    access_idx = prompt_choice("  How should direct messages be authorized?", access_choices, 0)
+    if access_idx == 0:
+        save_env_value("QQ_ALLOW_ALL_USERS", "false")
+        save_env_value("QQ_ALLOWED_USERS", "")
+        print_success("  DM pairing enabled.")
+        print_info("  Unknown users can request access; approve with `hermes pairing approve`.")
+    elif access_idx == 1:
+        save_env_value("QQ_ALLOW_ALL_USERS", "true")
+        save_env_value("QQ_ALLOWED_USERS", "")
+        print_warning("  Open DM access enabled for QQ Bot.")
+    else:
+        default_allow = user_openid or ""
+        allowlist = prompt("  Allowed user OpenIDs (comma-separated)", default_allow, password=False).replace(" ", "")
+        save_env_value("QQ_ALLOW_ALL_USERS", "false")
+        save_env_value("QQ_ALLOWED_USERS", allowlist)
+        print_success("  Allowlist saved.")
+
+    # ── Home channel ──
+    print()
+    home_default = user_openid or ""
+    home_channel = prompt("  Home channel OpenID (for cron/notifications, or empty)", home_default, password=False)
+    if home_channel:
+        save_env_value("QQBOT_HOME_CHANNEL", home_channel.strip())
+        print_success(f"  Home channel set to {home_channel.strip()}")
+
+    print()
+    print_success("🐧 QQ Bot configured!")
+    print_info(f"  App ID: {credentials['app_id']}")
+
+
+def _qqbot_render_qr(url: str) -> bool:
+    """Try to render a QR code in the terminal. Returns True if successful."""
+    try:
+        import qrcode as _qr
+        qr = _qr.QRCode()
+        qr.add_data(url)
+        qr.make(fit=True)
+        qr.print_ascii(invert=True)
+        return True
+    except Exception:
+        return False
+
+
+def _qqbot_qr_flow():
+    """Run the QR-code scan-to-configure flow.
+
+    Returns a dict with app_id, client_secret, user_openid on success,
+    or None on failure/cancel.
+    """
+    try:
+        from gateway.platforms.qqbot import (
+            create_bind_task, poll_bind_result, build_connect_url,
+            decrypt_secret, BindStatus,
+        )
+        from gateway.platforms.qqbot.constants import ONBOARD_POLL_INTERVAL
+    except Exception as exc:
+        print_error(f"  QQBot onboard import failed: {exc}")
+        return None
+
+    import asyncio
+    import time
+
+    MAX_REFRESHES = 3
+    refresh_count = 0
+
+    while refresh_count <= MAX_REFRESHES:
+        loop = asyncio.new_event_loop()
+
+        # ── Create bind task ──
+        try:
+            task_id, aes_key = loop.run_until_complete(create_bind_task())
+        except Exception as e:
+            print_warning(f"  Failed to create bind task: {e}")
+            loop.close()
+            return None
+
+        url = build_connect_url(task_id)
+
+        # ── Display QR code + URL ──
+        print()
+        if _qqbot_render_qr(url):
+            print(f"  Scan the QR code above, or open this URL directly:\n  {url}")
+        else:
+            print(f"  Open this URL in QQ on your phone:\n  {url}")
+            print_info("  Tip: pip install qrcode  to show a scannable QR code here")
+
+        # ── Poll loop (silent — keep QR visible at bottom) ──
+        try:
+            while True:
+                try:
+                    status, app_id, encrypted_secret, user_openid = loop.run_until_complete(
+                        poll_bind_result(task_id)
+                    )
+                except Exception:
+                    time.sleep(ONBOARD_POLL_INTERVAL)
+                    continue
+
+                if status == BindStatus.COMPLETED:
+                    client_secret = decrypt_secret(encrypted_secret, aes_key)
+                    print()
+                    print_success(f"  QR scan complete! (App ID: {app_id})")
+                    if user_openid:
+                        print_info(f"  Scanner's OpenID: {user_openid}")
+                    return {
+                        "app_id": app_id,
+                        "client_secret": client_secret,
+                        "user_openid": user_openid,
+                    }
+
+                if status == BindStatus.EXPIRED:
+                    refresh_count += 1
+                    if refresh_count > MAX_REFRESHES:
+                        print()
+                        print_warning(f"  QR code expired {MAX_REFRESHES} times — giving up.")
+                        return None
+                    print()
+                    print_warning(f"  QR code expired, refreshing... ({refresh_count}/{MAX_REFRESHES})")
+                    loop.close()
+                    break  # outer while creates a new task
+
+                time.sleep(ONBOARD_POLL_INTERVAL)
+        except KeyboardInterrupt:
+            loop.close()
+            raise
+        finally:
+            loop.close()
+
+    return None
+
+
 def _setup_signal():
     """Interactive setup for Signal messenger."""
     import shutil
@@ -2806,6 +2995,8 @@ def gateway_setup():
             _setup_dingtalk()
         elif platform["key"] == "feishu":
             _setup_feishu()
+        elif platform["key"] == "qqbot":
+            _setup_qqbot()
         else:
             _setup_standard_platform(platform)
 
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index b5efb52a8..9c0ee0bff 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -2005,52 +2005,6 @@ def _setup_wecom_callback():
     _gw_setup()
 
 
-def _setup_qqbot():
-    """Configure QQ Bot gateway."""
-    print_header("QQ Bot")
-    existing = get_env_value("QQ_APP_ID")
-    if existing:
-        print_info("QQ Bot: already configured")
-        if not prompt_yes_no("Reconfigure QQ Bot?", False):
-            return
-
-    print_info("Connects Hermes to QQ via the Official QQ Bot API (v2).")
-    print_info("   Requires a QQ Bot application at q.qq.com")
-    print_info("   Reference: https://bot.q.qq.com/wiki/develop/api-v2/")
-    print()
-
-    app_id = prompt("QQ Bot App ID")
-    if not app_id:
-        print_warning("App ID is required — skipping QQ Bot setup")
-        return
-    save_env_value("QQ_APP_ID", app_id.strip())
-
-    client_secret = prompt("QQ Bot App Secret", password=True)
-    if not client_secret:
-        print_warning("App Secret is required — skipping QQ Bot setup")
-        return
-    save_env_value("QQ_CLIENT_SECRET", client_secret)
-    print_success("QQ Bot credentials saved")
-
-    print()
-    print_info("🔒 Security: Restrict who can DM your bot")
-    print_info("   Use QQ user OpenIDs (found in event payloads)")
-    print()
-    allowed_users = prompt("Allowed user OpenIDs (comma-separated, leave empty for open access)")
-    if allowed_users:
-        save_env_value("QQ_ALLOWED_USERS", allowed_users.replace(" ", ""))
-        print_success("QQ Bot allowlist configured")
-    else:
-        print_info("⚠️  No allowlist set — anyone can DM the bot!")
-
-    print()
-    print_info("📬 Home Channel: OpenID for cron job delivery and notifications.")
-    home_channel = prompt("Home channel OpenID (leave empty to set later)")
-    if home_channel:
-        save_env_value("QQ_HOME_CHANNEL", home_channel)
-
-    print()
-    print_success("QQ Bot configured!")
 
 
 def _setup_bluebubbles():
@@ -2119,12 +2073,9 @@ def _setup_bluebubbles():
 
 
 def _setup_qqbot():
-    """Configure QQ Bot (Official API v2) via standard platform setup."""
-    from hermes_cli.gateway import _PLATFORMS
-    qq_platform = next((p for p in _PLATFORMS if p["key"] == "qqbot"), None)
-    if qq_platform:
-        from hermes_cli.gateway import _setup_standard_platform
-        _setup_standard_platform(qq_platform)
+    """Configure QQ Bot (Official API v2) via gateway setup."""
+    from hermes_cli.gateway import _setup_qqbot as _gateway_setup_qqbot
+    _gateway_setup_qqbot()
 
 
 def _setup_webhooks():
@@ -2264,7 +2215,7 @@ def setup_gateway(config: dict):
             missing_home.append("Slack")
         if get_env_value("BLUEBUBBLES_SERVER_URL") and not get_env_value("BLUEBUBBLES_HOME_CHANNEL"):
             missing_home.append("BlueBubbles")
-        if get_env_value("QQ_APP_ID") and not get_env_value("QQ_HOME_CHANNEL"):
+        if get_env_value("QQ_APP_ID") and not get_env_value("QQBOT_HOME_CHANNEL"):
             missing_home.append("QQBot")
 
         if missing_home:
diff --git a/hermes_cli/status.py b/hermes_cli/status.py
index 2e34ae9c3..8fafbc2f4 100644
--- a/hermes_cli/status.py
+++ b/hermes_cli/status.py
@@ -317,7 +317,7 @@ def show_status(args):
         "WeCom Callback": ("WECOM_CALLBACK_CORP_ID", None),
         "Weixin": ("WEIXIN_ACCOUNT_ID", "WEIXIN_HOME_CHANNEL"),
         "BlueBubbles": ("BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_HOME_CHANNEL"),
-        "QQBot": ("QQ_APP_ID", "QQ_HOME_CHANNEL"),
+        "QQBot": ("QQ_APP_ID", "QQBOT_HOME_CHANNEL"),
     }
     
     for name, (token_var, home_var) in platforms.items():
diff --git a/pyproject.toml b/pyproject.toml
index 0cac0b6b7..d97c10810 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,6 +34,8 @@ dependencies = [
   "edge-tts>=7.2.7,<8",
   # Skills Hub (GitHub App JWT auth — optional, only needed for bot identity)
   "PyJWT[crypto]>=2.12.0,<3",  # CVE-2026-32597
+  # QR code rendering for scan-to-configure flows
+  "qrcode>=7.4,<9",
 ]
 
 [project.optional-dependencies]
diff --git a/requirements.txt b/requirements.txt
index 96f48e77f..74f42d6c8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -34,3 +34,4 @@ croniter
 python-telegram-bot[webhooks]>=22.6
 discord.py>=2.0
 aiohttp>=3.9.0
+qrcode
diff --git a/uv.lock b/uv.lock
index 45efc2d93..fa6785aa5 100644
--- a/uv.lock
+++ b/uv.lock
@@ -300,7 +300,7 @@ wheels = [
 [[package]]
 name = "atroposlib"
 version = "0.4.0"
-source = { git = "https://github.com/NousResearch/atropos.git#c421582b6f7ce8a32f751aab3117d3824ac8f709" }
+source = { git = "https://github.com/NousResearch/atropos.git?rev=c20c85256e5a45ad31edf8b7276e9c5ee1995a30#c20c85256e5a45ad31edf8b7276e9c5ee1995a30" }
 dependencies = [
     { name = "aiofiles" },
     { name = "aiohttp" },
@@ -1699,7 +1699,7 @@ wheels = [
 
 [[package]]
 name = "hermes-agent"
-version = "0.8.0"
+version = "0.9.0"
 source = { editable = "." }
 dependencies = [
     { name = "anthropic" },
@@ -1717,6 +1717,7 @@ dependencies = [
     { name = "pyjwt", extra = ["crypto"] },
     { name = "python-dotenv" },
     { name = "pyyaml" },
+    { name = "qrcode" },
     { name = "requests" },
     { name = "rich" },
     { name = "tenacity" },
@@ -1737,6 +1738,7 @@ all = [
     { name = "dingtalk-stream" },
     { name = "discord-py", extra = ["voice"] },
     { name = "elevenlabs" },
+    { name = "fastapi" },
     { name = "faster-whisper" },
     { name = "honcho-ai" },
     { name = "lark-oapi" },
@@ -1756,6 +1758,7 @@ all = [
     { name = "slack-bolt" },
     { name = "slack-sdk" },
     { name = "sounddevice" },
+    { name = "uvicorn", extra = ["standard"] },
 ]
 cli = [
     { name = "simple-term-menu" },
@@ -1842,6 +1845,10 @@ voice = [
     { name = "numpy" },
     { name = "sounddevice" },
 ]
+web = [
+    { name = "fastapi" },
+    { name = "uvicorn", extra = ["standard"] },
+]
 yc-bench = [
     { name = "yc-bench", marker = "python_full_version >= '3.12'" },
 ]
@@ -1855,7 +1862,7 @@ requires-dist = [
     { name = "aiosqlite", marker = "extra == 'matrix'", specifier = ">=0.20" },
     { name = "anthropic", specifier = ">=0.39.0,<1" },
     { name = "asyncpg", marker = "extra == 'matrix'", specifier = ">=0.29" },
-    { name = "atroposlib", marker = "extra == 'rl'", git = "https://github.com/NousResearch/atropos.git" },
+    { name = "atroposlib", marker = "extra == 'rl'", git = "https://github.com/NousResearch/atropos.git?rev=c20c85256e5a45ad31edf8b7276e9c5ee1995a30" },
     { name = "croniter", marker = "extra == 'cron'", specifier = ">=6.0.0,<7" },
     { name = "daytona", marker = "extra == 'daytona'", specifier = ">=0.148.0,<1" },
     { name = "debugpy", marker = "extra == 'dev'", specifier = ">=1.8.0,<2" },
@@ -1866,6 +1873,7 @@ requires-dist = [
     { name = "exa-py", specifier = ">=2.9.0,<3" },
     { name = "fal-client", specifier = ">=0.13.1,<1" },
     { name = "fastapi", marker = "extra == 'rl'", specifier = ">=0.104.0,<1" },
+    { name = "fastapi", marker = "extra == 'web'", specifier = ">=0.104.0,<1" },
     { name = "faster-whisper", marker = "extra == 'voice'", specifier = ">=1.0.0,<2" },
     { name = "fire", specifier = ">=0.7.1,<1" },
     { name = "firecrawl-py", specifier = ">=4.16.0,<5" },
@@ -1894,6 +1902,7 @@ requires-dist = [
     { name = "hermes-agent", extras = ["sms"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["tts-premium"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["voice"], marker = "extra == 'all'" },
+    { name = "hermes-agent", extras = ["web"], marker = "extra == 'all'" },
     { name = "honcho-ai", marker = "extra == 'honcho'", specifier = ">=2.0.1,<3" },
     { name = "httpx", extras = ["socks"], specifier = ">=0.28.1,<1" },
     { name = "jinja2", specifier = ">=3.1.5,<4" },
@@ -1918,6 +1927,7 @@ requires-dist = [
     { name = "python-telegram-bot", extras = ["webhooks"], marker = "extra == 'messaging'", specifier = ">=22.6,<23" },
     { name = "pywinpty", marker = "sys_platform == 'win32' and extra == 'pty'", specifier = ">=2.0.0,<3" },
     { name = "pyyaml", specifier = ">=6.0.2,<7" },
+    { name = "qrcode", specifier = ">=7.4,<9" },
     { name = "requests", specifier = ">=2.33.0,<3" },
     { name = "rich", specifier = ">=14.3.3,<15" },
     { name = "simple-term-menu", marker = "extra == 'cli'", specifier = ">=1.0,<2" },
@@ -1927,12 +1937,13 @@ requires-dist = [
     { name = "slack-sdk", marker = "extra == 'slack'", specifier = ">=3.27.0,<4" },
     { name = "sounddevice", marker = "extra == 'voice'", specifier = ">=0.4.6,<1" },
     { name = "tenacity", specifier = ">=9.1.4,<10" },
-    { name = "tinker", marker = "extra == 'rl'", git = "https://github.com/thinking-machines-lab/tinker.git" },
+    { name = "tinker", marker = "extra == 'rl'", git = "https://github.com/thinking-machines-lab/tinker.git?rev=30517b667f18a3dfb7ef33fb56cf686d5820ba2b" },
     { name = "uvicorn", extras = ["standard"], marker = "extra == 'rl'", specifier = ">=0.24.0,<1" },
+    { name = "uvicorn", extras = ["standard"], marker = "extra == 'web'", specifier = ">=0.24.0,<1" },
     { name = "wandb", marker = "extra == 'rl'", specifier = ">=0.15.0,<1" },
-    { name = "yc-bench", marker = "python_full_version >= '3.12' and extra == 'yc-bench'", git = "https://github.com/collinear-ai/yc-bench.git" },
+    { name = "yc-bench", marker = "python_full_version >= '3.12' and extra == 'yc-bench'", git = "https://github.com/collinear-ai/yc-bench.git?rev=bfb0c88062450f46341bd9a5298903fc2e952a5c" },
 ]
-provides-extras = ["modal", "daytona", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "acp", "mistral", "termux", "dingtalk", "feishu", "rl", "yc-bench", "all"]
+provides-extras = ["modal", "daytona", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "acp", "mistral", "termux", "dingtalk", "feishu", "web", "rl", "yc-bench", "all"]
 
 [[package]]
 name = "hf-transfer"
@@ -4160,6 +4171,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" },
 ]
 
+[[package]]
+name = "qrcode"
+version = "8.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/8f/b2/7fc2931bfae0af02d5f53b174e9cf701adbb35f39d69c2af63d4a39f81a9/qrcode-8.2.tar.gz", hash = "sha256:35c3f2a4172b33136ab9f6b3ef1c00260dd2f66f858f24d88418a015f446506c", size = 43317, upload-time = "2025-05-01T15:44:24.726Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/dd/b8/d2d6d731733f51684bbf76bf34dab3b70a9148e8f2cef2bb544fccec681a/qrcode-8.2-py3-none-any.whl", hash = "sha256:16e64e0716c14960108e85d853062c9e8bba5ca8252c0b4d0231b9df4060ff4f", size = 45986, upload-time = "2025-05-01T15:44:22.781Z" },
+]
+
 [[package]]
 name = "referencing"
 version = "0.37.0"
@@ -4776,8 +4799,8 @@ wheels = [
 
 [[package]]
 name = "tinker"
-version = "0.16.1"
-source = { git = "https://github.com/thinking-machines-lab/tinker.git#07bd3c2dd3cd4398ac1c26f0ec0deccbf3c1f913" }
+version = "0.18.0"
+source = { git = "https://github.com/thinking-machines-lab/tinker.git?rev=30517b667f18a3dfb7ef33fb56cf686d5820ba2b#30517b667f18a3dfb7ef33fb56cf686d5820ba2b" }
 dependencies = [
     { name = "anyio" },
     { name = "click" },
@@ -5490,7 +5513,7 @@ wheels = [
 [[package]]
 name = "yc-bench"
 version = "0.1.0"
-source = { git = "https://github.com/collinear-ai/yc-bench.git#0c53c98f01a431db2e391482bc46013045854ab2" }
+source = { git = "https://github.com/collinear-ai/yc-bench.git?rev=bfb0c88062450f46341bd9a5298903fc2e952a5c#bfb0c88062450f46341bd9a5298903fc2e952a5c" }
 dependencies = [
     { name = "litellm", marker = "python_full_version >= '3.12'" },
     { name = "matplotlib", marker = "python_full_version >= '3.12'" },
diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md
index 1e8ad8135..b6cfabb3d 100644
--- a/website/docs/reference/environment-variables.md
+++ b/website/docs/reference/environment-variables.md
@@ -290,7 +290,7 @@ For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETI
 | `QQ_ALLOWED_USERS` | Comma-separated QQ user openIDs allowed to message the bot |
 | `QQ_GROUP_ALLOWED_USERS` | Comma-separated QQ group IDs for group @-message access |
 | `QQ_ALLOW_ALL_USERS` | Allow all users (`true`/`false`, overrides `QQ_ALLOWED_USERS`) |
-| `QQ_HOME_CHANNEL` | QQ user/group openID for cron delivery and notifications |
+| `QQBOT_HOME_CHANNEL` | QQ user/group openID for cron delivery and notifications |
 | `MATTERMOST_URL` | Mattermost server URL (e.g. `https://mm.example.com`) |
 | `MATTERMOST_TOKEN` | Bot token or personal access token for Mattermost |
 | `MATTERMOST_ALLOWED_USERS` | Comma-separated Mattermost user IDs allowed to message the bot |
diff --git a/website/docs/user-guide/messaging/qqbot.md b/website/docs/user-guide/messaging/qqbot.md
index 686fd862e..d9da90d58 100644
--- a/website/docs/user-guide/messaging/qqbot.md
+++ b/website/docs/user-guide/messaging/qqbot.md
@@ -48,8 +48,8 @@ QQ_CLIENT_SECRET=your-app-secret
 |---|---|---|
 | `QQ_APP_ID` | QQ Bot App ID (required) | — |
 | `QQ_CLIENT_SECRET` | QQ Bot App Secret (required) | — |
-| `QQ_HOME_CHANNEL` | OpenID for cron/notification delivery | — |
-| `QQ_HOME_CHANNEL_NAME` | Display name for home channel | `Home` |
+| `QQBOT_HOME_CHANNEL` | OpenID for cron/notification delivery | — |
+| `QQBOT_HOME_CHANNEL_NAME` | Display name for home channel | `Home` |
 | `QQ_ALLOWED_USERS` | Comma-separated user OpenIDs for DM access | open (all users) |
 | `QQ_ALLOW_ALL_USERS` | Set to `true` to allow all DMs | `false` |
 | `QQ_MARKDOWN_SUPPORT` | Enable QQ markdown (msg_type 2) | `true` |
@@ -113,7 +113,7 @@ This usually means:
 - Verify the bot's **intents** are enabled at q.qq.com
 - Check `QQ_ALLOWED_USERS` if DM access is restricted
 - For group messages, ensure the bot is **@mentioned** (group policy may require allowlisting)
-- Check `QQ_HOME_CHANNEL` for cron/notification delivery
+- Check `QQBOT_HOME_CHANNEL` for cron/notification delivery
 
 ### Connection errors
 

From a5f4d652d3457d28284979465fb5ef3be6179d65 Mon Sep 17 00:00:00 2001
From: WideLee <limkuan24@gmail.com>
Date: Thu, 16 Apr 2026 23:11:22 +0800
Subject: [PATCH 08/20] feat(qqbot): prompt to add scanned user to allow list
 and home channel during setup

---
 hermes_cli/gateway.py | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py
index 2ba1ca337..ebbad66ea 100644
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@@ -2687,7 +2687,15 @@ def _setup_qqbot():
     access_idx = prompt_choice("  How should direct messages be authorized?", access_choices, 0)
     if access_idx == 0:
         save_env_value("QQ_ALLOW_ALL_USERS", "false")
-        save_env_value("QQ_ALLOWED_USERS", "")
+        if user_openid:
+            print()
+            if prompt_yes_no(f"  Add your QQ user ({user_openid}) to the allow list?", True):
+                save_env_value("QQ_ALLOWED_USERS", user_openid)
+                print_success(f"  Allow list set to {user_openid}")
+            else:
+                save_env_value("QQ_ALLOWED_USERS", "")
+        else:
+            save_env_value("QQ_ALLOWED_USERS", "")
         print_success("  DM pairing enabled.")
         print_info("  Unknown users can request access; approve with `hermes pairing approve`.")
     elif access_idx == 1:
@@ -2702,12 +2710,17 @@ def _setup_qqbot():
         print_success("  Allowlist saved.")
 
     # ── Home channel ──
-    print()
-    home_default = user_openid or ""
-    home_channel = prompt("  Home channel OpenID (for cron/notifications, or empty)", home_default, password=False)
-    if home_channel:
-        save_env_value("QQBOT_HOME_CHANNEL", home_channel.strip())
-        print_success(f"  Home channel set to {home_channel.strip()}")
+    if user_openid:
+        print()
+        if prompt_yes_no(f"  Use your QQ user OpenID ({user_openid}) as the home channel?", True):
+            save_env_value("QQBOT_HOME_CHANNEL", user_openid)
+            print_success(f"  Home channel set to {user_openid}")
+    else:
+        print()
+        home_channel = prompt("  Home channel OpenID (for cron/notifications, or empty)", password=False)
+        if home_channel:
+            save_env_value("QQBOT_HOME_CHANNEL", home_channel.strip())
+            print_success(f"  Home channel set to {home_channel.strip()}")
 
     print()
     print_success("🐧 QQ Bot configured!")

From b7d330211ad2d1166dd96100755ac16bb300e71f Mon Sep 17 00:00:00 2001
From: WideLee <limkuan24@gmail.com>
Date: Thu, 16 Apr 2026 23:34:25 +0800
Subject: [PATCH 09/20] fix(qqbot): simplify home channel prompt wording

---
 hermes_cli/gateway.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py
index ebbad66ea..35a8745a9 100644
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@@ -2689,7 +2689,7 @@ def _setup_qqbot():
         save_env_value("QQ_ALLOW_ALL_USERS", "false")
         if user_openid:
             print()
-            if prompt_yes_no(f"  Add your QQ user ({user_openid}) to the allow list?", True):
+            if prompt_yes_no(f"  Add yourself ({user_openid}) to the allow list?", True):
                 save_env_value("QQ_ALLOWED_USERS", user_openid)
                 print_success(f"  Allow list set to {user_openid}")
             else:
@@ -2712,7 +2712,7 @@ def _setup_qqbot():
     # ── Home channel ──
     if user_openid:
         print()
-        if prompt_yes_no(f"  Use your QQ user OpenID ({user_openid}) as the home channel?", True):
+        if prompt_yes_no(f"  Use your QQ user ID ({user_openid}) as the home channel?", True):
             save_env_value("QQBOT_HOME_CHANNEL", user_openid)
             print_success(f"  Home channel set to {user_openid}")
     else:

From 02f5e3dc27d6183810aad150089cad004c9d11f0 Mon Sep 17 00:00:00 2001
From: WideLee <limkuan24@gmail.com>
Date: Fri, 17 Apr 2026 18:25:06 +0800
Subject: [PATCH 10/20] refactor(qqbot): use _log_tag with app_id in all logger
 calls for multi-instance disambiguation

---
 gateway/platforms/qqbot/adapter.py | 863 ++++++++++++++++++++---------
 1 file changed, 616 insertions(+), 247 deletions(-)

diff --git a/gateway/platforms/qqbot/adapter.py b/gateway/platforms/qqbot/adapter.py
index d41b9a34e..b5a250774 100644
--- a/gateway/platforms/qqbot/adapter.py
+++ b/gateway/platforms/qqbot/adapter.py
@@ -46,6 +46,7 @@ from urllib.parse import urlparse
 
 try:
     import aiohttp
+
     AIOHTTP_AVAILABLE = True
 except ImportError:
     AIOHTTP_AVAILABLE = False
@@ -53,6 +54,7 @@ except ImportError:
 
 try:
     import httpx
+
     HTTPX_AVAILABLE = True
 except ImportError:
     HTTPX_AVAILABLE = False
@@ -83,6 +85,8 @@ class QQCloseError(Exception):
         self.code = int(code) if code else None
         self.reason = str(reason) if reason else ""
         super().__init__(f"WebSocket closed (code={self.code}, reason={self.reason})")
+
+
 # ---------------------------------------------------------------------------
 # Constants — imported from the shared constants module.
 # ---------------------------------------------------------------------------
@@ -111,7 +115,10 @@ from gateway.platforms.qqbot.constants import (
     MEDIA_TYPE_VOICE,
     MEDIA_TYPE_FILE,
 )
-from gateway.platforms.qqbot.utils import coerce_list as _coerce_list_impl, build_user_agent
+from gateway.platforms.qqbot.utils import (
+    coerce_list as _coerce_list_impl,
+    build_user_agent,
+)
 
 
 def check_qq_requirements() -> bool:
@@ -128,15 +135,24 @@ def _coerce_list(value: Any) -> List[str]:
 # QQAdapter
 # ---------------------------------------------------------------------------
 
+
 class QQAdapter(BasePlatformAdapter):
     """QQ Bot adapter backed by the official QQ Bot WebSocket Gateway + REST API."""
 
     # QQ Bot API does not support editing sent messages.
     SUPPORTS_MESSAGE_EDITING = False
     MAX_MESSAGE_LENGTH = MAX_MESSAGE_LENGTH
-    _TYPING_INPUT_SECONDS = 60    # input_notify duration reported to QQ
+    _TYPING_INPUT_SECONDS = 60  # input_notify duration reported to QQ
     _TYPING_DEBOUNCE_SECONDS = 50  # refresh before it expires
 
+    @property
+    def _log_tag(self) -> str:
+        """Log prefix including app_id for multi-instance disambiguation."""
+        app_id = getattr(self, "_app_id", None)
+        if app_id:
+            return f"QQBot:{app_id}"
+        return "QQBot"
+
     def _fail_pending(self, reason: str) -> None:
         """Fail all pending response futures."""
         for fut in self._pending_responses.values():
@@ -144,20 +160,25 @@ class QQAdapter(BasePlatformAdapter):
                 fut.set_exception(RuntimeError(reason))
         self._pending_responses.clear()
 
-
     def __init__(self, config: PlatformConfig):
         super().__init__(config, Platform.QQBOT)
 
         extra = config.extra or {}
         self._app_id = str(extra.get("app_id") or os.getenv("QQ_APP_ID", "")).strip()
-        self._client_secret = str(extra.get("client_secret") or os.getenv("QQ_CLIENT_SECRET", "")).strip()
+        self._client_secret = str(
+            extra.get("client_secret") or os.getenv("QQ_CLIENT_SECRET", "")
+        ).strip()
         self._markdown_support = bool(extra.get("markdown_support", True))
 
         # Auth/ACL policies
         self._dm_policy = str(extra.get("dm_policy", "open")).strip().lower()
-        self._allow_from = _coerce_list(extra.get("allow_from") or extra.get("allowFrom"))
+        self._allow_from = _coerce_list(
+            extra.get("allow_from") or extra.get("allowFrom")
+        )
         self._group_policy = str(extra.get("group_policy", "open")).strip().lower()
-        self._group_allow_from = _coerce_list(extra.get("group_allow_from") or extra.get("groupAllowFrom"))
+        self._group_allow_from = _coerce_list(
+            extra.get("group_allow_from") or extra.get("groupAllowFrom")
+        )
 
         # Connection state
         self._session: Optional[aiohttp.ClientSession] = None
@@ -204,23 +225,21 @@ class QQAdapter(BasePlatformAdapter):
         if not AIOHTTP_AVAILABLE:
             message = "QQ startup failed: aiohttp not installed"
             self._set_fatal_error("qq_missing_dependency", message, retryable=True)
-            logger.warning("[%s] %s. Run: pip install aiohttp", self.name, message)
+            logger.warning("[%s] %s. Run: pip install aiohttp", self._log_tag, message)
             return False
         if not HTTPX_AVAILABLE:
             message = "QQ startup failed: httpx not installed"
             self._set_fatal_error("qq_missing_dependency", message, retryable=True)
-            logger.warning("[%s] %s. Run: pip install httpx", self.name, message)
+            logger.warning("[%s] %s. Run: pip install httpx", self._log_tag, message)
             return False
         if not self._app_id or not self._client_secret:
             message = "QQ startup failed: QQ_APP_ID and QQ_CLIENT_SECRET are required"
             self._set_fatal_error("qq_missing_credentials", message, retryable=True)
-            logger.warning("[%s] %s", self.name, message)
+            logger.warning("[%s] %s", self._log_tag, message)
             return False
 
         # Prevent duplicate connections with the same credentials
-        if not self._acquire_platform_lock(
-            "qqbot-appid", self._app_id, "QQBot app ID"
-        ):
+        if not self._acquire_platform_lock("qqbot-appid", self._app_id, "QQBot app ID"):
             return False
 
         try:
@@ -235,7 +254,7 @@ class QQAdapter(BasePlatformAdapter):
 
             # 2. Get WebSocket gateway URL
             gateway_url = await self._get_gateway_url()
-            logger.info("[%s] Gateway URL: %s", self.name, gateway_url)
+            logger.info("[%s] Gateway URL: %s", self._log_tag, gateway_url)
 
             # 3. Open WebSocket
             await self._open_ws(gateway_url)
@@ -244,12 +263,12 @@ class QQAdapter(BasePlatformAdapter):
             self._listen_task = asyncio.create_task(self._listen_loop())
             self._heartbeat_task = asyncio.create_task(self._heartbeat_loop())
             self._mark_connected()
-            logger.info("[%s] Connected", self.name)
+            logger.info("[%s] Connected", self._log_tag)
             return True
         except Exception as exc:
             message = f"QQ startup failed: {exc}"
             self._set_fatal_error("qq_connect_error", message, retryable=True)
-            logger.error("[%s] %s", self.name, message, exc_info=True)
+            logger.error("[%s] %s", self._log_tag, message, exc_info=True)
             await self._cleanup()
             self._release_platform_lock()
             return False
@@ -277,7 +296,7 @@ class QQAdapter(BasePlatformAdapter):
 
         await self._cleanup()
         self._release_platform_lock()
-        logger.info("[%s] Disconnected", self.name)
+        logger.info("[%s] Disconnected", self._log_tag)
 
     async def _cleanup(self) -> None:
         """Close WebSocket, HTTP session, and client."""
@@ -326,12 +345,16 @@ class QQAdapter(BasePlatformAdapter):
 
             token = data.get("access_token")
             if not token:
-                raise RuntimeError(f"QQ Bot token response missing access_token: {data}")
+                raise RuntimeError(
+                    f"QQ Bot token response missing access_token: {data}"
+                )
 
             expires_in = int(data.get("expires_in", 7200))
             self._access_token = token
             self._token_expires_at = time.time() + expires_in
-            logger.info("[%s] Access token refreshed, expires in %ds", self.name, expires_in)
+            logger.info(
+                "[%s] Access token refreshed, expires in %ds", self._log_tag, expires_in
+            )
             return self._access_token
 
     async def _get_gateway_url(self) -> str:
@@ -340,7 +363,10 @@ class QQAdapter(BasePlatformAdapter):
         try:
             resp = await self._http_client.get(
                 f"{API_BASE}{GATEWAY_URL_PATH}",
-                headers={"Authorization": f"QQBot {token}"},
+                headers={
+                    "Authorization": f"QQBot {token}",
+                    "User-Agent": build_user_agent(),
+                },
                 timeout=DEFAULT_API_TIMEOUT,
             )
             resp.raise_for_status()
@@ -370,9 +396,12 @@ class QQAdapter(BasePlatformAdapter):
         self._session = aiohttp.ClientSession()
         self._ws = await self._session.ws_connect(
             gateway_url,
+            headers={
+                "User-Agent": build_user_agent(),
+            },
             timeout=CONNECT_TIMEOUT_SECONDS,
         )
-        logger.info("[%s] WebSocket connected to %s", self.name, gateway_url)
+        logger.info("[%s] WebSocket connected to %s", self._log_tag, gateway_url)
 
     async def _listen_loop(self) -> None:
         """Read WebSocket events and reconnect on errors.
@@ -401,23 +430,34 @@ class QQAdapter(BasePlatformAdapter):
                     return
 
                 code = exc.code
-                logger.warning("[%s] WebSocket closed: code=%s reason=%s",
-                              self.name, code, exc.reason)
+                logger.warning(
+                    "[%s] WebSocket closed: code=%s reason=%s",
+                    self._log_tag,
+                    code,
+                    exc.reason,
+                )
 
                 # Quick disconnect detection (permission issues, misconfiguration)
                 duration = time.monotonic() - connect_time
                 if duration < QUICK_DISCONNECT_THRESHOLD and connect_time > 0:
                     quick_disconnect_count += 1
-                    logger.info("[%s] Quick disconnect (%.1fs), count: %d",
-                               self.name, duration, quick_disconnect_count)
+                    logger.info(
+                        "[%s] Quick disconnect (%.1fs), count: %d",
+                        self._log_tag,
+                        duration,
+                        quick_disconnect_count,
+                    )
                     if quick_disconnect_count >= MAX_QUICK_DISCONNECT_COUNT:
                         logger.error(
                             "[%s] Too many quick disconnects. "
                             "Check: 1) AppID/Secret correct 2) Bot permissions on QQ Open Platform",
-                            self.name,
+                            self._log_tag,
+                        )
+                        self._set_fatal_error(
+                            "qq_quick_disconnect",
+                            "Too many quick disconnects — check bot permissions",
+                            retryable=True,
                         )
-                        self._set_fatal_error("qq_quick_disconnect",
-                            "Too many quick disconnects — check bot permissions", retryable=True)
                         return
                 else:
                     quick_disconnect_count = 0
@@ -428,13 +468,21 @@ class QQAdapter(BasePlatformAdapter):
                 # Stop reconnecting for fatal codes
                 if code in (4914, 4915):
                     desc = "offline/sandbox-only" if code == 4914 else "banned"
-                    logger.error("[%s] Bot is %s. Check QQ Open Platform.", self.name, desc)
-                    self._set_fatal_error(f"qq_{desc}", f"Bot is {desc}", retryable=False)
+                    logger.error(
+                        "[%s] Bot is %s. Check QQ Open Platform.", self._log_tag, desc
+                    )
+                    self._set_fatal_error(
+                        f"qq_{desc}", f"Bot is {desc}", retryable=False
+                    )
                     return
 
                 # Rate limited
                 if code == 4008:
-                    logger.info("[%s] Rate limited (4008), waiting %ds", self.name, RATE_LIMIT_DELAY)
+                    logger.info(
+                        "[%s] Rate limited (4008), waiting %ds",
+                        self._log_tag,
+                        RATE_LIMIT_DELAY,
+                    )
                     if backoff_idx >= MAX_RECONNECT_ATTEMPTS:
                         return
                     await asyncio.sleep(RATE_LIMIT_DELAY)
@@ -447,14 +495,38 @@ class QQAdapter(BasePlatformAdapter):
 
                 # Token invalid → clear cached token so _ensure_token() refreshes
                 if code == 4004:
-                    logger.info("[%s] Invalid token (4004), will refresh and reconnect", self.name)
+                    logger.info(
+                        "[%s] Invalid token (4004), will refresh and reconnect",
+                        self._log_tag,
+                    )
                     self._access_token = None
                     self._token_expires_at = 0.0
 
                 # Session invalid → clear session, will re-identify on next Hello
-                if code in (4006, 4007, 4009, 4900, 4901, 4902, 4903, 4904, 4905,
-                           4906, 4907, 4908, 4909, 4910, 4911, 4912, 4913):
-                    logger.info("[%s] Session error (%d), clearing session for re-identify", self.name, code)
+                if code in (
+                        4006,
+                        4007,
+                        4009,
+                        4900,
+                        4901,
+                        4902,
+                        4903,
+                        4904,
+                        4905,
+                        4906,
+                        4907,
+                        4908,
+                        4909,
+                        4910,
+                        4911,
+                        4912,
+                        4913,
+                ):
+                    logger.info(
+                        "[%s] Session error (%d), clearing session for re-identify",
+                        self._log_tag,
+                        code,
+                    )
                     self._session_id = None
                     self._last_seq = None
 
@@ -467,12 +539,12 @@ class QQAdapter(BasePlatformAdapter):
             except Exception as exc:
                 if not self._running:
                     return
-                logger.warning("[%s] WebSocket error: %s", self.name, exc)
+                logger.warning("[%s] WebSocket error: %s", self._log_tag, exc)
                 self._mark_disconnected()
                 self._fail_pending("Connection interrupted")
 
                 if backoff_idx >= MAX_RECONNECT_ATTEMPTS:
-                    logger.error("[%s] Max reconnect attempts reached", self.name)
+                    logger.error("[%s] Max reconnect attempts reached", self._log_tag)
                     return
 
                 if await self._reconnect(backoff_idx):
@@ -484,7 +556,12 @@ class QQAdapter(BasePlatformAdapter):
     async def _reconnect(self, backoff_idx: int) -> bool:
         """Attempt to reconnect the WebSocket. Returns True on success."""
         delay = RECONNECT_BACKOFF[min(backoff_idx, len(RECONNECT_BACKOFF) - 1)]
-        logger.info("[%s] Reconnecting in %ds (attempt %d)...", self.name, delay, backoff_idx + 1)
+        logger.info(
+            "[%s] Reconnecting in %ds (attempt %d)...",
+            self._log_tag,
+            delay,
+            backoff_idx + 1,
+        )
         await asyncio.sleep(delay)
 
         self._heartbeat_interval = 30.0  # reset until Hello
@@ -493,10 +570,10 @@ class QQAdapter(BasePlatformAdapter):
             gateway_url = await self._get_gateway_url()
             await self._open_ws(gateway_url)
             self._mark_connected()
-            logger.info("[%s] Reconnected", self.name)
+            logger.info("[%s] Reconnected", self._log_tag)
             return True
         except Exception as exc:
-            logger.warning("[%s] Reconnect failed: %s", self.name, exc)
+            logger.warning("[%s] Reconnect failed: %s", self._log_tag, exc)
             return False
 
     async def _read_events(self) -> None:
@@ -533,7 +610,7 @@ class QQAdapter(BasePlatformAdapter):
                     # d should be the latest sequence number received, or null
                     await self._ws.send_json({"op": 1, "d": self._last_seq})
                 except Exception as exc:
-                    logger.debug("[%s] Heartbeat failed: %s", self.name, exc)
+                    logger.debug("[%s] Heartbeat failed: %s", self._log_tag, exc)
         except asyncio.CancelledError:
             pass
 
@@ -551,7 +628,11 @@ class QQAdapter(BasePlatformAdapter):
             "op": 2,
             "d": {
                 "token": f"QQBot {token}",
-                "intents": (1 << 25) | (1 << 30) | (1 << 12),  # C2C_GROUP_AT_MESSAGES + PUBLIC_GUILD_MESSAGES + DIRECT_MESSAGE
+                "intents": (1 << 25)
+                           | (1 << 30)
+                           | (
+                                   1 << 12
+                           ),  # C2C_GROUP_AT_MESSAGES + PUBLIC_GUILD_MESSAGES + DIRECT_MESSAGE
                 "shard": [0, 1],
                 "properties": {
                     "$os": "macOS",
@@ -563,11 +644,13 @@ class QQAdapter(BasePlatformAdapter):
         try:
             if self._ws and not self._ws.closed:
                 await self._ws.send_json(identify_payload)
-                logger.info("[%s] Identify sent", self.name)
+                logger.info("[%s] Identify sent", self._log_tag)
             else:
-                logger.warning("[%s] Cannot send Identify: WebSocket not connected", self.name)
+                logger.warning(
+                    "[%s] Cannot send Identify: WebSocket not connected", self._log_tag
+                )
         except Exception as exc:
-            logger.error("[%s] Failed to send Identify: %s", self.name, exc)
+            logger.error("[%s] Failed to send Identify: %s", self._log_tag, exc)
 
     async def _send_resume(self) -> None:
         """Send op 6 Resume to re-authenticate after a reconnection.
@@ -586,12 +669,18 @@ class QQAdapter(BasePlatformAdapter):
         try:
             if self._ws and not self._ws.closed:
                 await self._ws.send_json(resume_payload)
-                logger.info("[%s] Resume sent (session_id=%s, seq=%s)",
-                             self.name, self._session_id, self._last_seq)
+                logger.info(
+                    "[%s] Resume sent (session_id=%s, seq=%s)",
+                    self._log_tag,
+                    self._session_id,
+                    self._last_seq,
+                )
             else:
-                logger.warning("[%s] Cannot send Resume: WebSocket not connected", self.name)
+                logger.warning(
+                    "[%s] Cannot send Resume: WebSocket not connected", self._log_tag
+                )
         except Exception as exc:
-            logger.error("[%s] Failed to send Resume: %s", self.name, exc)
+            logger.error("[%s] Failed to send Resume: %s", self._log_tag, exc)
             # If resume fails, clear session and fall back to identify on next Hello
             self._session_id = None
             self._last_seq = None
@@ -624,8 +713,12 @@ class QQAdapter(BasePlatformAdapter):
             interval_ms = d_data.get("heartbeat_interval", 30000)
             # Send heartbeats at 80% of the server interval to stay safe
             self._heartbeat_interval = interval_ms / 1000.0 * 0.8
-            logger.debug("[%s] Hello received, heartbeat_interval=%dms (sending every %.1fs)",
-                        self.name, interval_ms, self._heartbeat_interval)
+            logger.debug(
+                "[%s] Hello received, heartbeat_interval=%dms (sending every %.1fs)",
+                self._log_tag,
+                interval_ms,
+                self._heartbeat_interval,
+            )
             # Authenticate: send Resume if we have a session, else Identify.
             # Use _create_task which is safe when no event loop is running (tests).
             if self._session_id and self._last_seq is not None:
@@ -639,26 +732,30 @@ class QQAdapter(BasePlatformAdapter):
             if t == "READY":
                 self._handle_ready(d)
             elif t == "RESUMED":
-                logger.info("[%s] Session resumed", self.name)
-            elif t in ("C2C_MESSAGE_CREATE", "GROUP_AT_MESSAGE_CREATE",
-                        "DIRECT_MESSAGE_CREATE", "GUILD_MESSAGE_CREATE",
-                        "GUILD_AT_MESSAGE_CREATE"):
+                logger.info("[%s] Session resumed", self._log_tag)
+            elif t in (
+                    "C2C_MESSAGE_CREATE",
+                    "GROUP_AT_MESSAGE_CREATE",
+                    "DIRECT_MESSAGE_CREATE",
+                    "GUILD_MESSAGE_CREATE",
+                    "GUILD_AT_MESSAGE_CREATE",
+            ):
                 asyncio.create_task(self._on_message(t, d))
             else:
-                logger.debug("[%s] Unhandled dispatch: %s", self.name, t)
+                logger.debug("[%s] Unhandled dispatch: %s", self._log_tag, t)
             return
 
         # op 11 = Heartbeat ACK
         if op == 11:
             return
 
-        logger.debug("[%s] Unknown op: %s", self.name, op)
+        logger.debug("[%s] Unknown op: %s", self._log_tag, op)
 
     def _handle_ready(self, d: Any) -> None:
         """Handle the READY event — store session_id for resume."""
         if isinstance(d, dict):
             self._session_id = d.get("session_id")
-            logger.info("[%s] Ready, session_id=%s", self.name, self._session_id)
+            logger.info("[%s] Ready, session_id=%s", self._log_tag, self._session_id)
 
     # ------------------------------------------------------------------
     # JSON helpers
@@ -669,7 +766,7 @@ class QQAdapter(BasePlatformAdapter):
         try:
             payload = json.loads(raw)
         except Exception:
-            logger.debug("[%s] Failed to parse JSON: %r", "QQBot", raw)
+            logger.debug("[%s] Failed to parse JSON: %r", self._log_tag, raw)
             return None
         return payload if isinstance(payload, dict) else None
 
@@ -698,7 +795,9 @@ class QQAdapter(BasePlatformAdapter):
         # Extract common fields
         msg_id = str(d.get("id", ""))
         if not msg_id or self._is_duplicate(msg_id):
-            logger.debug("[%s] Duplicate or missing message id: %s", self.name, msg_id)
+            logger.debug(
+                "[%s] Duplicate or missing message id: %s", self._log_tag, msg_id
+            )
             return
 
         timestamp = str(d.get("timestamp", ""))
@@ -716,7 +815,12 @@ class QQAdapter(BasePlatformAdapter):
             await self._handle_dm_message(d, msg_id, content, author, timestamp)
 
     async def _handle_c2c_message(
-        self, d: Dict[str, Any], msg_id: str, content: str, author: Dict[str, Any], timestamp: str
+            self,
+            d: Dict[str, Any],
+            msg_id: str,
+            content: str,
+            author: Dict[str, Any],
+            timestamp: str,
     ) -> None:
         """Handle a C2C (private) message event."""
         user_openid = str(author.get("user_openid", ""))
@@ -727,17 +831,28 @@ class QQAdapter(BasePlatformAdapter):
 
         text = content
         attachments_raw = d.get("attachments")
-        logger.info("[QQ] C2C message: id=%s content=%r attachments=%s",
-                    msg_id, content[:50] if content else "",
-                    f"{len(attachments_raw) if isinstance(attachments_raw, list) else 0} items"
-                    if attachments_raw else "None")
+        logger.info(
+            "[%s] C2C message: id=%s content=%r attachments=%s",
+            "QQBot",
+            msg_id,
+            content[:50] if content else "",
+            (
+                f"{len(attachments_raw) if isinstance(attachments_raw, list) else 0} items"
+                if attachments_raw
+                else "None"
+            ),
+        )
         if attachments_raw and isinstance(attachments_raw, list):
             for _i, _att in enumerate(attachments_raw):
                 if isinstance(_att, dict):
-                    logger.info("[QQ]   attachment[%d]: content_type=%s url=%s filename=%s",
-                                _i, _att.get("content_type", ""),
-                                str(_att.get("url", ""))[:80],
-                                _att.get("filename", ""))
+                    logger.info(
+                        "[%s]   attachment[%d]: content_type=%s url=%s filename=%s",
+                        "QQBot",
+                        _i,
+                        _att.get("content_type", ""),
+                        str(_att.get("url", ""))[:80],
+                        _att.get("filename", ""),
+                    )
 
         # Process all attachments uniformly (images, voice, files)
         att_result = await self._process_attachments(attachments_raw)
@@ -749,13 +864,23 @@ class QQAdapter(BasePlatformAdapter):
         # Append voice transcripts to the text body
         if voice_transcripts:
             voice_block = "\n".join(voice_transcripts)
-            text = (text + "\n\n" + voice_block).strip() if text.strip() else voice_block
+            text = (
+                (text + "\n\n" + voice_block).strip() if text.strip() else voice_block
+            )
         # Append non-media attachment info
         if attachment_info:
-            text = (text + "\n\n" + attachment_info).strip() if text.strip() else attachment_info
+            text = (
+                (text + "\n\n" + attachment_info).strip()
+                if text.strip()
+                else attachment_info
+            )
 
-        logger.info("[QQ] After processing: images=%d, voice=%d",
-                    len(image_urls), len(voice_transcripts))
+        logger.info(
+            "[%s] After processing: images=%d, voice=%d",
+            "QQBot",
+            len(image_urls),
+            len(voice_transcripts),
+        )
 
         if not text.strip() and not image_urls:
             return
@@ -778,13 +903,20 @@ class QQAdapter(BasePlatformAdapter):
         await self.handle_message(event)
 
     async def _handle_group_message(
-        self, d: Dict[str, Any], msg_id: str, content: str, author: Dict[str, Any], timestamp: str
+            self,
+            d: Dict[str, Any],
+            msg_id: str,
+            content: str,
+            author: Dict[str, Any],
+            timestamp: str,
     ) -> None:
         """Handle a group @-message event."""
         group_openid = str(d.get("group_openid", ""))
         if not group_openid:
             return
-        if not self._is_group_allowed(group_openid, str(author.get("member_openid", ""))):
+        if not self._is_group_allowed(
+                group_openid, str(author.get("member_openid", ""))
+        ):
             return
 
         # Strip the @bot mention prefix from content
@@ -798,9 +930,15 @@ class QQAdapter(BasePlatformAdapter):
         # Append voice transcripts
         if voice_transcripts:
             voice_block = "\n".join(voice_transcripts)
-            text = (text + "\n\n" + voice_block).strip() if text.strip() else voice_block
+            text = (
+                (text + "\n\n" + voice_block).strip() if text.strip() else voice_block
+            )
         if attachment_info:
-            text = (text + "\n\n" + attachment_info).strip() if text.strip() else attachment_info
+            text = (
+                (text + "\n\n" + attachment_info).strip()
+                if text.strip()
+                else attachment_info
+            )
 
         if not text.strip() and not image_urls:
             return
@@ -823,7 +961,12 @@ class QQAdapter(BasePlatformAdapter):
         await self.handle_message(event)
 
     async def _handle_guild_message(
-        self, d: Dict[str, Any], msg_id: str, content: str, author: Dict[str, Any], timestamp: str
+            self,
+            d: Dict[str, Any],
+            msg_id: str,
+            content: str,
+            author: Dict[str, Any],
+            timestamp: str,
     ) -> None:
         """Handle a guild/channel message event."""
         channel_id = str(d.get("channel_id", ""))
@@ -842,9 +985,15 @@ class QQAdapter(BasePlatformAdapter):
 
         if voice_transcripts:
             voice_block = "\n".join(voice_transcripts)
-            text = (text + "\n\n" + voice_block).strip() if text.strip() else voice_block
+            text = (
+                (text + "\n\n" + voice_block).strip() if text.strip() else voice_block
+            )
         if attachment_info:
-            text = (text + "\n\n" + attachment_info).strip() if text.strip() else attachment_info
+            text = (
+                (text + "\n\n" + attachment_info).strip()
+                if text.strip()
+                else attachment_info
+            )
 
         if not text.strip() and not image_urls:
             return
@@ -868,7 +1017,12 @@ class QQAdapter(BasePlatformAdapter):
         await self.handle_message(event)
 
     async def _handle_dm_message(
-        self, d: Dict[str, Any], msg_id: str, content: str, author: Dict[str, Any], timestamp: str
+            self,
+            d: Dict[str, Any],
+            msg_id: str,
+            content: str,
+            author: Dict[str, Any],
+            timestamp: str,
     ) -> None:
         """Handle a guild DM message event."""
         guild_id = str(d.get("guild_id", ""))
@@ -884,9 +1038,15 @@ class QQAdapter(BasePlatformAdapter):
 
         if voice_transcripts:
             voice_block = "\n".join(voice_transcripts)
-            text = (text + "\n\n" + voice_block).strip() if text.strip() else voice_block
+            text = (
+                (text + "\n\n" + voice_block).strip() if text.strip() else voice_block
+            )
         if attachment_info:
-            text = (text + "\n\n" + attachment_info).strip() if text.strip() else attachment_info
+            text = (
+                (text + "\n\n" + attachment_info).strip()
+                if text.strip()
+                else attachment_info
+            )
 
         if not text.strip() and not image_urls:
             return
@@ -928,11 +1088,16 @@ class QQAdapter(BasePlatformAdapter):
             return MessageType.PHOTO
         # Unknown content type with an attachment — don't assume PHOTO
         # to prevent non-image files from being sent to vision analysis.
-        logger.debug("[QQ] Unknown media content_type '%s', defaulting to TEXT", first_type)
+        logger.debug(
+            "[%s] Unknown media content_type '%s', defaulting to TEXT",
+            "QQBot",
+            first_type,
+        )
         return MessageType.TEXT
 
     async def _process_attachments(
-        self, attachments: Any,
+            self,
+            attachments: Any,
     ) -> Dict[str, Any]:
         """Process inbound attachments (all message types).
 
@@ -946,8 +1111,12 @@ class QQAdapter(BasePlatformAdapter):
         - attachment_info: str — text description of non-image, non-voice attachments
         """
         if not isinstance(attachments, list):
-            return {"image_urls": [], "image_media_types": [],
-                    "voice_transcripts": [], "attachment_info": ""}
+            return {
+                "image_urls": [],
+                "image_media_types": [],
+                "voice_transcripts": [],
+                "attachment_info": "",
+            }
 
         image_urls: List[str] = []
         image_media_types: List[str] = []
@@ -969,30 +1138,39 @@ class QQAdapter(BasePlatformAdapter):
                 url = ""
                 continue
 
-            logger.debug("[QQ] Processing attachment: content_type=%s, url=%s, filename=%s",
-                         ct, url[:80], filename)
+            logger.debug(
+                "[%s] Processing attachment: content_type=%s, url=%s, filename=%s",
+                "QQBot",
+                ct,
+                url[:80],
+                filename,
+            )
 
             if self._is_voice_content_type(ct, filename):
                 # Voice: use QQ's asr_refer_text first, then voice_wav_url, then STT.
                 asr_refer = (
                     str(att.get("asr_refer_text", "")).strip()
-                    if isinstance(att.get("asr_refer_text"), str) else ""
+                    if isinstance(att.get("asr_refer_text"), str)
+                    else ""
                 )
                 voice_wav_url = (
                     str(att.get("voice_wav_url", "")).strip()
-                    if isinstance(att.get("voice_wav_url"), str) else ""
+                    if isinstance(att.get("voice_wav_url"), str)
+                    else ""
                 )
 
                 transcript = await self._stt_voice_attachment(
-                    url, ct, filename,
+                    url,
+                    ct,
+                    filename,
                     asr_refer_text=asr_refer or None,
                     voice_wav_url=voice_wav_url or None,
                 )
                 if transcript:
                     voice_transcripts.append(f"[Voice] {transcript}")
-                    logger.info("[QQ] Voice transcript: %s", transcript)
+                    logger.info("[%s] Voice transcript: %s", "QQBot", transcript)
                 else:
-                    logger.warning("[QQ] Voice STT failed for %s", url[:60])
+                    logger.warning("[%s] Voice STT failed for %s", "QQBot", url[:60])
                     voice_transcripts.append("[Voice] [语音识别失败]")
             elif ct.startswith("image/"):
                 # Image: download and cache locally.
@@ -1002,9 +1180,13 @@ class QQAdapter(BasePlatformAdapter):
                         image_urls.append(cached_path)
                         image_media_types.append(ct or "image/jpeg")
                     elif cached_path:
-                        logger.warning("[QQ] Cached image path does not exist: %s", cached_path)
+                        logger.warning(
+                            "[%s] Cached image path does not exist: %s",
+                            "QQBot",
+                            cached_path,
+                        )
                 except Exception as exc:
-                    logger.debug("[QQ] Failed to cache image: %s", exc)
+                    logger.debug("[%s] Failed to cache image: %s", "QQBot", exc)
             else:
                 # Other attachments (video, file, etc.): record as text.
                 try:
@@ -1012,7 +1194,7 @@ class QQAdapter(BasePlatformAdapter):
                     if cached_path:
                         other_attachments.append(f"[Attachment: {filename or ct}]")
                 except Exception as exc:
-                    logger.debug("[QQ] Failed to cache attachment: %s", exc)
+                    logger.debug("[%s] Failed to cache attachment: %s", "QQBot", exc)
 
         attachment_info = "\n".join(other_attachments) if other_attachments else ""
         return {
@@ -1025,6 +1207,7 @@ class QQAdapter(BasePlatformAdapter):
     async def _download_and_cache(self, url: str, content_type: str) -> Optional[str]:
         """Download a URL and cache it locally."""
         from tools.url_safety import is_safe_url
+
         if not is_safe_url(url):
             raise ValueError(f"Blocked unsafe URL: {url[:80]}")
 
@@ -1033,12 +1216,16 @@ class QQAdapter(BasePlatformAdapter):
 
         try:
             resp = await self._http_client.get(
-                url, timeout=30.0, headers=self._qq_media_headers(),
+                url,
+                timeout=30.0,
+                headers=self._qq_media_headers(),
             )
             resp.raise_for_status()
             data = resp.content
         except Exception as exc:
-            logger.debug("[%s] Download failed for %s: %s", self.name, url[:80], exc)
+            logger.debug(
+                "[%s] Download failed for %s: %s", self._log_tag, url[:80], exc
+            )
             return None
 
         if content_type.startswith("image/"):
@@ -1059,7 +1246,17 @@ class QQAdapter(BasePlatformAdapter):
         fn = filename.strip().lower()
         if ct == "voice" or ct.startswith("audio/"):
             return True
-        _VOICE_EXTENSIONS = (".silk", ".amr", ".mp3", ".wav", ".ogg", ".m4a", ".aac", ".speex", ".flac")
+        _VOICE_EXTENSIONS = (
+            ".silk",
+            ".amr",
+            ".mp3",
+            ".wav",
+            ".ogg",
+            ".m4a",
+            ".aac",
+            ".speex",
+            ".flac",
+        )
         if any(fn.endswith(ext) for ext in _VOICE_EXTENSIONS):
             return True
         return False
@@ -1076,13 +1273,13 @@ class QQAdapter(BasePlatformAdapter):
         return {}
 
     async def _stt_voice_attachment(
-        self,
-        url: str,
-        content_type: str,
-        filename: str,
-        *,
-        asr_refer_text: Optional[str] = None,
-        voice_wav_url: Optional[str] = None,
+            self,
+            url: str,
+            content_type: str,
+            filename: str,
+            *,
+            asr_refer_text: Optional[str] = None,
+            voice_wav_url: Optional[str] = None,
     ) -> Optional[str]:
         """Download a voice attachment, convert to wav, and transcribe.
 
@@ -1095,7 +1292,9 @@ class QQAdapter(BasePlatformAdapter):
         """
         # 1. Use QQ's built-in ASR text if available
         if asr_refer_text:
-            logger.info("[QQ] STT: using QQ asr_refer_text: %r", asr_refer_text[:100])
+            logger.info(
+                "[%s] STT: using QQ asr_refer_text: %r", "QQBot", asr_refer_text[:100]
+            )
             return asr_refer_text
 
         # Determine which URL to download (prefer voice_wav_url — already WAV)
@@ -1106,7 +1305,7 @@ class QQAdapter(BasePlatformAdapter):
                 voice_wav_url = f"https:{voice_wav_url}"
             download_url = voice_wav_url
             is_pre_wav = True
-            logger.info("[QQ] STT: using voice_wav_url (pre-converted WAV)")
+            logger.info("[%s] STT: using voice_wav_url (pre-converted WAV)", "QQBot")
 
         from tools.url_safety import is_safe_url
         if not is_safe_url(download_url):
@@ -1116,40 +1315,65 @@ class QQAdapter(BasePlatformAdapter):
         try:
             # 2. Download audio (QQ CDN requires Authorization header)
             if not self._http_client:
-                logger.warning("[QQ] STT: no HTTP client")
+                logger.warning("[%s] STT: no HTTP client", "QQBot")
                 return None
 
             download_headers = self._qq_media_headers()
-            logger.info("[QQ] STT: downloading voice from %s (pre_wav=%s, headers=%s)",
-                        download_url[:80], is_pre_wav, bool(download_headers))
+            logger.info(
+                "[%s] STT: downloading voice from %s (pre_wav=%s, headers=%s)",
+                "QQBot",
+                download_url[:80],
+                is_pre_wav,
+                bool(download_headers),
+            )
             resp = await self._http_client.get(
-                download_url, timeout=30.0, headers=download_headers, follow_redirects=True,
+                download_url,
+                timeout=30.0,
+                headers=download_headers,
+                follow_redirects=True,
             )
             resp.raise_for_status()
             audio_data = resp.content
-            logger.info("[QQ] STT: downloaded %d bytes, content_type=%s",
-                        len(audio_data), resp.headers.get("content-type", "unknown"))
+            logger.info(
+                "[%s] STT: downloaded %d bytes, content_type=%s",
+                "QQBot",
+                len(audio_data),
+                resp.headers.get("content-type", "unknown"),
+            )
 
             if len(audio_data) < 10:
-                logger.warning("[QQ] STT: downloaded data too small (%d bytes), skipping", len(audio_data))
+                logger.warning(
+                    "[%s] STT: downloaded data too small (%d bytes), skipping",
+                    "QQBot",
+                    len(audio_data),
+                )
                 return None
 
             # 3. Convert to wav (skip if we already have a pre-converted WAV)
             if is_pre_wav:
                 import tempfile
+
                 with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
                     tmp.write(audio_data)
                     wav_path = tmp.name
-                logger.info("[QQ] STT: using pre-converted WAV directly (%d bytes)", len(audio_data))
+                logger.info(
+                    "[%s] STT: using pre-converted WAV directly (%d bytes)",
+                    "QQBot",
+                    len(audio_data),
+                )
             else:
-                logger.info("[QQ] STT: converting to wav, filename=%r", filename)
+                logger.info(
+                    "[%s] STT: converting to wav, filename=%r", "QQBot", filename
+                )
                 wav_path = await self._convert_audio_to_wav_file(audio_data, filename)
                 if not wav_path or not Path(wav_path).exists():
-                    logger.warning("[QQ] STT: ffmpeg conversion produced no output")
+                    logger.warning(
+                        "[%s] STT: ffmpeg conversion produced no output", "QQBot"
+                    )
                     return None
 
             # 4. Call STT API
-            logger.info("[QQ] STT: calling ASR on %s", wav_path)
+            logger.info("[%s] STT: calling ASR on %s", "QQBot", wav_path)
             transcript = await self._call_stt(wav_path)
 
             # 5. Cleanup temp file
@@ -1159,15 +1383,22 @@ class QQAdapter(BasePlatformAdapter):
                 pass
 
             if transcript:
-                logger.info("[QQ] STT success: %r", transcript[:100])
+                logger.info("[%s] STT success: %r", "QQBot", transcript[:100])
             else:
-                logger.warning("[QQ] STT: ASR returned empty transcript")
+                logger.warning("[%s] STT: ASR returned empty transcript", "QQBot")
             return transcript
         except (httpx.HTTPStatusError, httpx.TransportError, IOError) as exc:
-            logger.warning("[QQ] STT failed for voice attachment: %s: %s", type(exc).__name__, exc)
+            logger.warning(
+                "[%s] STT failed for voice attachment: %s: %s",
+                "QQBot",
+                type(exc).__name__,
+                exc,
+            )
             return None
 
-    async def _convert_audio_to_wav_file(self, audio_data: bytes, filename: str) -> Optional[str]:
+    async def _convert_audio_to_wav_file(
+            self, audio_data: bytes, filename: str
+    ) -> Optional[str]:
         """Convert audio bytes to a temp .wav file using pilk (SILK) or ffmpeg.
 
         QQ voice messages are typically SILK format which ffmpeg cannot decode.
@@ -1177,9 +1408,18 @@ class QQAdapter(BasePlatformAdapter):
         """
         import tempfile
 
-        ext = Path(filename).suffix.lower() if Path(filename).suffix else self._guess_ext_from_data(audio_data)
-        logger.info("[QQ] STT: audio_data size=%d, ext=%r, first_20_bytes=%r",
-                    len(audio_data), ext, audio_data[:20])
+        ext = (
+            Path(filename).suffix.lower()
+            if Path(filename).suffix
+            else self._guess_ext_from_data(audio_data)
+        )
+        logger.info(
+            "[%s] STT: audio_data size=%d, ext=%r, first_20_bytes=%r",
+            self._log_tag,
+            len(audio_data),
+            ext,
+            audio_data[:20],
+        )
 
         with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp_src:
             tmp_src.write(audio_data)
@@ -1241,31 +1481,43 @@ class QQAdapter(BasePlatformAdapter):
         try:
             import pilk
         except ImportError:
-            logger.warning("[QQ] pilk not installed — cannot decode SILK audio. Run: pip install pilk")
+            logger.warning(
+                "[%s] pilk not installed — cannot decode SILK audio. Run: pip install pilk",
+                "QQBot",
+            )
             return None
 
         # Try converting the file as-is
         try:
             pilk.silk_to_wav(src_path, wav_path, rate=16000)
             if Path(wav_path).exists() and Path(wav_path).stat().st_size > 44:
-                logger.info("[QQ] pilk converted %s to wav (%d bytes)",
-                            Path(src_path).name, Path(wav_path).stat().st_size)
+                logger.info(
+                    "[%s] pilk converted %s to wav (%d bytes)",
+                    "QQBot",
+                    Path(src_path).name,
+                    Path(wav_path).stat().st_size,
+                )
                 return wav_path
         except Exception as exc:
-            logger.debug("[QQ] pilk direct conversion failed: %s", exc)
+            logger.debug("[%s] pilk direct conversion failed: %s", "QQBot", exc)
 
         # Try renaming to .silk and converting (pilk checks the extension)
         silk_path = src_path.rsplit(".", 1)[0] + ".silk"
         try:
             import shutil
+
             shutil.copy2(src_path, silk_path)
             pilk.silk_to_wav(silk_path, wav_path, rate=16000)
             if Path(wav_path).exists() and Path(wav_path).stat().st_size > 44:
-                logger.info("[QQ] pilk converted %s (as .silk) to wav (%d bytes)",
-                            Path(src_path).name, Path(wav_path).stat().st_size)
+                logger.info(
+                    "[%s] pilk converted %s (as .silk) to wav (%d bytes)",
+                    "QQBot",
+                    Path(src_path).name,
+                    Path(wav_path).stat().st_size,
+                )
                 return wav_path
         except Exception as exc:
-            logger.debug("[QQ] pilk .silk conversion failed: %s", exc)
+            logger.debug("[%s] pilk .silk conversion failed: %s", "QQBot", exc)
         finally:
             try:
                 os.unlink(silk_path)
@@ -1283,6 +1535,7 @@ class QQAdapter(BasePlatformAdapter):
         """
         try:
             import wave
+
             with wave.open(wav_path, "w") as wf:
                 wf.setnchannels(1)
                 wf.setsampwidth(2)
@@ -1290,7 +1543,7 @@ class QQAdapter(BasePlatformAdapter):
                 wf.writeframes(audio_data)
             return wav_path
         except Exception as exc:
-            logger.debug("[QQ] raw PCM fallback failed: %s", exc)
+            logger.debug("[%s] raw PCM fallback failed: %s", "QQBot", exc)
             return None
 
     @staticmethod
@@ -1298,25 +1551,45 @@ class QQAdapter(BasePlatformAdapter):
         """Convert audio file to WAV using ffmpeg."""
         try:
             proc = await asyncio.create_subprocess_exec(
-                "ffmpeg", "-y", "-i", src_path, "-ar", "16000", "-ac", "1", wav_path,
+                "ffmpeg",
+                "-y",
+                "-i",
+                src_path,
+                "-ar",
+                "16000",
+                "-ac",
+                "1",
+                wav_path,
                 stdout=asyncio.subprocess.DEVNULL,
                 stderr=asyncio.subprocess.PIPE,
             )
             await asyncio.wait_for(proc.wait(), timeout=30)
             if proc.returncode != 0:
                 stderr = await proc.stderr.read() if proc.stderr else b""
-                logger.warning("[QQ] ffmpeg failed for %s: %s",
-                            Path(src_path).name, stderr[:200].decode(errors="replace"))
+                logger.warning(
+                    "[%s] ffmpeg failed for %s: %s",
+                    "QQBot",
+                    Path(src_path).name,
+                    stderr[:200].decode(errors="replace"),
+                )
                 return None
         except (asyncio.TimeoutError, FileNotFoundError) as exc:
-            logger.warning("[QQ] ffmpeg conversion error: %s", exc)
+            logger.warning("[%s] ffmpeg conversion error: %s", "QQBot", exc)
             return None
 
         if not Path(wav_path).exists() or Path(wav_path).stat().st_size <= 44:
-            logger.warning("[QQ] ffmpeg produced no/small output for %s", Path(src_path).name)
+            logger.warning(
+                "[%s] ffmpeg produced no/small output for %s",
+                "QQBot",
+                Path(src_path).name,
+            )
             return None
-        logger.info("[QQ] ffmpeg converted %s to wav (%d bytes)",
-                    Path(src_path).name, Path(wav_path).stat().st_size)
+        logger.info(
+            "[%s] ffmpeg converted %s to wav (%d bytes)",
+            "QQBot",
+            Path(src_path).name,
+            Path(wav_path).stat().st_size,
+        )
         return wav_path
 
     def _resolve_stt_config(self) -> Optional[Dict[str, str]]:
@@ -1355,7 +1628,8 @@ class QQAdapter(BasePlatformAdapter):
                     return {
                         "base_url": base_url,
                         "api_key": api_key,
-                        "model": model or ("glm-asr" if provider in ("zai", "glm") else "whisper-1"),
+                        "model": model
+                                 or ("glm-asr" if provider in ("zai", "glm") else "whisper-1"),
                     }
 
         # 2. QQ-specific env vars (set by `hermes setup gateway` / `hermes gateway`)
@@ -1383,7 +1657,10 @@ class QQAdapter(BasePlatformAdapter):
         """
         stt_cfg = self._resolve_stt_config()
         if not stt_cfg:
-            logger.warning("[QQ] STT not configured (no stt config or QQ_STT_API_KEY)")
+            logger.warning(
+                "[%s] STT not configured (no stt config or QQ_STT_API_KEY)",
+                self._log_tag,
+            )
             return None
 
         base_url = stt_cfg["base_url"]
@@ -1413,17 +1690,37 @@ class QQAdapter(BasePlatformAdapter):
                 return text.strip()
             return None
         except (httpx.HTTPStatusError, IOError) as exc:
-            logger.warning("[QQ] STT API call failed (model=%s, base=%s): %s",
-                           model, base_url[:50], exc)
+            logger.warning(
+                "[%s] STT API call failed (model=%s, base=%s): %s",
+                self._log_tag,
+                model,
+                base_url[:50],
+                exc,
+            )
             return None
 
-    async def _convert_audio_to_wav(self, audio_data: bytes, source_url: str) -> Optional[str]:
+    async def _convert_audio_to_wav(
+            self, audio_data: bytes, source_url: str
+    ) -> Optional[str]:
         """Convert audio bytes to .wav using pilk (SILK) or ffmpeg, caching the result."""
         import tempfile
 
         # Determine source format from magic bytes or URL
-        ext = Path(urlparse(source_url).path).suffix.lower() if urlparse(source_url).path else ""
-        if not ext or ext not in (".silk", ".amr", ".mp3", ".wav", ".ogg", ".m4a", ".aac", ".flac"):
+        ext = (
+            Path(urlparse(source_url).path).suffix.lower()
+            if urlparse(source_url).path
+            else ""
+        )
+        if not ext or ext not in (
+                ".silk",
+                ".amr",
+                ".mp3",
+                ".wav",
+                ".ogg",
+                ".m4a",
+                ".aac",
+                ".flac",
+        ):
             ext = self._guess_ext_from_data(audio_data)
 
         with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp_src:
@@ -1439,8 +1736,12 @@ class QQAdapter(BasePlatformAdapter):
                 result = await self._convert_ffmpeg_to_wav(src_path, wav_path)
 
             if not result:
-                logger.warning("[%s] audio conversion failed for %s (format=%s)",
-                            self.name, source_url[:60], ext)
+                logger.warning(
+                    "[%s] audio conversion failed for %s (format=%s)",
+                    self._log_tag,
+                    source_url[:60],
+                    ext,
+                )
                 return cache_document_from_bytes(audio_data, f"qq_voice{ext}")
         except Exception:
             return cache_document_from_bytes(audio_data, f"qq_voice{ext}")
@@ -1456,7 +1757,7 @@ class QQAdapter(BasePlatformAdapter):
             os.unlink(wav_path)
             return cache_document_from_bytes(wav_data, "qq_voice.wav")
         except Exception as exc:
-            logger.debug("[%s] Failed to read converted wav: %s", self.name, exc)
+            logger.debug("[%s] Failed to read converted wav: %s", self._log_tag, exc)
             return None
 
     # ------------------------------------------------------------------
@@ -1464,11 +1765,11 @@ class QQAdapter(BasePlatformAdapter):
     # ------------------------------------------------------------------
 
     async def _api_request(
-        self,
-        method: str,
-        path: str,
-        body: Optional[Dict[str, Any]] = None,
-        timeout: float = DEFAULT_API_TIMEOUT,
+            self,
+            method: str,
+            path: str,
+            body: Optional[Dict[str, Any]] = None,
+            timeout: float = DEFAULT_API_TIMEOUT,
     ) -> Dict[str, Any]:
         """Make an authenticated REST API request to QQ Bot API."""
         if not self._http_client:
@@ -1500,17 +1801,21 @@ class QQAdapter(BasePlatformAdapter):
             raise RuntimeError(f"QQ Bot API timeout [{path}]: {exc}") from exc
 
     async def _upload_media(
-        self,
-        target_type: str,
-        target_id: str,
-        file_type: int,
-        url: Optional[str] = None,
-        file_data: Optional[str] = None,
-        srv_send_msg: bool = False,
-        file_name: Optional[str] = None,
+            self,
+            target_type: str,
+            target_id: str,
+            file_type: int,
+            url: Optional[str] = None,
+            file_data: Optional[str] = None,
+            srv_send_msg: bool = False,
+            file_name: Optional[str] = None,
     ) -> Dict[str, Any]:
         """Upload media and return file_info."""
-        path = f"/v2/users/{target_id}/files" if target_type == "c2c" else f"/v2/groups/{target_id}/files"
+        path = (
+            f"/v2/users/{target_id}/files"
+            if target_type == "c2c"
+            else f"/v2/groups/{target_id}/files"
+        )
 
         body: Dict[str, Any] = {
             "file_type": file_type,
@@ -1527,11 +1832,16 @@ class QQAdapter(BasePlatformAdapter):
         last_exc = None
         for attempt in range(3):
             try:
-                return await self._api_request("POST", path, body, timeout=FILE_UPLOAD_TIMEOUT)
+                return await self._api_request(
+                    "POST", path, body, timeout=FILE_UPLOAD_TIMEOUT
+                )
             except RuntimeError as exc:
                 last_exc = exc
                 err_msg = str(exc)
-                if any(kw in err_msg for kw in ("400", "401", "Invalid", "timeout", "Timeout")):
+                if any(
+                        kw in err_msg
+                        for kw in ("400", "401", "Invalid", "timeout", "Timeout")
+                ):
                     raise
                 if attempt < 2:
                     await asyncio.sleep(1.5 * (attempt + 1))
@@ -1566,11 +1876,11 @@ class QQAdapter(BasePlatformAdapter):
         return False
 
     async def send(
-        self,
-        chat_id: str,
-        content: str,
-        reply_to: Optional[str] = None,
-        metadata: Optional[Dict[str, Any]] = None,
+            self,
+            chat_id: str,
+            content: str,
+            reply_to: Optional[str] = None,
+            metadata: Optional[Dict[str, Any]] = None,
     ) -> SendResult:
         """Send a text or markdown message to a QQ user or group.
 
@@ -1599,7 +1909,10 @@ class QQAdapter(BasePlatformAdapter):
         return last_result
 
     async def _send_chunk(
-        self, chat_id: str, content: str, reply_to: Optional[str] = None,
+            self,
+            chat_id: str,
+            content: str,
+            reply_to: Optional[str] = None,
     ) -> SendResult:
         """Send a single chunk with retry + exponential backoff."""
         last_exc: Optional[Exception] = None
@@ -1614,28 +1927,39 @@ class QQAdapter(BasePlatformAdapter):
                 elif chat_type == "guild":
                     return await self._send_guild_text(chat_id, content, reply_to)
                 else:
-                    return SendResult(success=False, error=f"Unknown chat type for {chat_id}")
+                    return SendResult(
+                        success=False, error=f"Unknown chat type for {chat_id}"
+                    )
             except Exception as exc:
                 last_exc = exc
                 err = str(exc).lower()
                 # Permanent errors — don't retry
-                if any(k in err for k in ("invalid", "forbidden", "not found", "bad request")):
+                if any(
+                        k in err
+                        for k in ("invalid", "forbidden", "not found", "bad request")
+                ):
                     break
                 # Transient — back off and retry
                 if attempt < 2:
                     delay = 1.0 * (2 ** attempt)
-                    logger.warning("[%s] send retry %d/3 after %.1fs: %s",
-                                   self.name, attempt + 1, delay, exc)
+                    logger.warning(
+                        "[%s] send retry %d/3 after %.1fs: %s",
+                        self._log_tag,
+                        attempt + 1,
+                        delay,
+                        exc,
+                    )
                     await asyncio.sleep(delay)
 
         error_msg = str(last_exc) if last_exc else "Unknown error"
-        logger.error("[%s] Send failed: %s", self.name, error_msg)
-        retryable = not any(k in error_msg.lower()
-                            for k in ("invalid", "forbidden", "not found"))
+        logger.error("[%s] Send failed: %s", self._log_tag, error_msg)
+        retryable = not any(
+            k in error_msg.lower() for k in ("invalid", "forbidden", "not found")
+        )
         return SendResult(success=False, error=error_msg, retryable=retryable)
 
     async def _send_c2c_text(
-        self, openid: str, content: str, reply_to: Optional[str] = None
+            self, openid: str, content: str, reply_to: Optional[str] = None
     ) -> SendResult:
         """Send text to a C2C user via REST API."""
         msg_seq = self._next_msg_seq(reply_to or openid)
@@ -1648,7 +1972,7 @@ class QQAdapter(BasePlatformAdapter):
         return SendResult(success=True, message_id=msg_id, raw_response=data)
 
     async def _send_group_text(
-        self, group_openid: str, content: str, reply_to: Optional[str] = None
+            self, group_openid: str, content: str, reply_to: Optional[str] = None
     ) -> SendResult:
         """Send text to a group via REST API."""
         msg_seq = self._next_msg_seq(reply_to or group_openid)
@@ -1656,15 +1980,17 @@ class QQAdapter(BasePlatformAdapter):
         if reply_to:
             body["msg_id"] = reply_to
 
-        data = await self._api_request("POST", f"/v2/groups/{group_openid}/messages", body)
+        data = await self._api_request(
+            "POST", f"/v2/groups/{group_openid}/messages", body
+        )
         msg_id = str(data.get("id", uuid.uuid4().hex[:12]))
         return SendResult(success=True, message_id=msg_id, raw_response=data)
 
     async def _send_guild_text(
-        self, channel_id: str, content: str, reply_to: Optional[str] = None
+            self, channel_id: str, content: str, reply_to: Optional[str] = None
     ) -> SendResult:
         """Send text to a guild channel via REST API."""
-        body: Dict[str, Any] = {"content": content[:self.MAX_MESSAGE_LENGTH]}
+        body: Dict[str, Any] = {"content": content[: self.MAX_MESSAGE_LENGTH]}
         if reply_to:
             body["msg_id"] = reply_to
 
@@ -1672,19 +1998,21 @@ class QQAdapter(BasePlatformAdapter):
         msg_id = str(data.get("id", uuid.uuid4().hex[:12]))
         return SendResult(success=True, message_id=msg_id, raw_response=data)
 
-    def _build_text_body(self, content: str, reply_to: Optional[str] = None) -> Dict[str, Any]:
+    def _build_text_body(
+            self, content: str, reply_to: Optional[str] = None
+    ) -> Dict[str, Any]:
         """Build the message body for C2C/group text sending."""
         msg_seq = self._next_msg_seq(reply_to or "default")
 
         if self._markdown_support:
             body: Dict[str, Any] = {
-                "markdown": {"content": content[:self.MAX_MESSAGE_LENGTH]},
+                "markdown": {"content": content[: self.MAX_MESSAGE_LENGTH]},
                 "msg_type": MSG_TYPE_MARKDOWN,
                 "msg_seq": msg_seq,
             }
         else:
             body = {
-                "content": content[:self.MAX_MESSAGE_LENGTH],
+                "content": content[: self.MAX_MESSAGE_LENGTH],
                 "msg_type": MSG_TYPE_TEXT,
                 "msg_seq": msg_seq,
             }
@@ -1701,84 +2029,103 @@ class QQAdapter(BasePlatformAdapter):
     # ------------------------------------------------------------------
 
     async def send_image(
-        self,
-        chat_id: str,
-        image_url: str,
-        caption: Optional[str] = None,
-        reply_to: Optional[str] = None,
-        metadata: Optional[Dict[str, Any]] = None,
+            self,
+            chat_id: str,
+            image_url: str,
+            caption: Optional[str] = None,
+            reply_to: Optional[str] = None,
+            metadata: Optional[Dict[str, Any]] = None,
     ) -> SendResult:
         """Send an image natively via QQ Bot API upload."""
         del metadata
 
-        result = await self._send_media(chat_id, image_url, MEDIA_TYPE_IMAGE, "image", caption, reply_to)
+        result = await self._send_media(
+            chat_id, image_url, MEDIA_TYPE_IMAGE, "image", caption, reply_to
+        )
         if result.success or not self._is_url(image_url):
             return result
 
         # Fallback to text URL
-        logger.warning("[%s] Image send failed, falling back to text: %s", self.name, result.error)
+        logger.warning(
+            "[%s] Image send failed, falling back to text: %s",
+            self._log_tag,
+            result.error,
+        )
         fallback = f"{caption}\n{image_url}" if caption else image_url
         return await self.send(chat_id=chat_id, content=fallback, reply_to=reply_to)
 
     async def send_image_file(
-        self,
-        chat_id: str,
-        image_path: str,
-        caption: Optional[str] = None,
-        reply_to: Optional[str] = None,
-        **kwargs,
+            self,
+            chat_id: str,
+            image_path: str,
+            caption: Optional[str] = None,
+            reply_to: Optional[str] = None,
+            **kwargs,
     ) -> SendResult:
         """Send a local image file natively."""
         del kwargs
-        return await self._send_media(chat_id, image_path, MEDIA_TYPE_IMAGE, "image", caption, reply_to)
+        return await self._send_media(
+            chat_id, image_path, MEDIA_TYPE_IMAGE, "image", caption, reply_to
+        )
 
     async def send_voice(
-        self,
-        chat_id: str,
-        audio_path: str,
-        caption: Optional[str] = None,
-        reply_to: Optional[str] = None,
-        **kwargs,
+            self,
+            chat_id: str,
+            audio_path: str,
+            caption: Optional[str] = None,
+            reply_to: Optional[str] = None,
+            **kwargs,
     ) -> SendResult:
         """Send a voice message natively."""
         del kwargs
-        return await self._send_media(chat_id, audio_path, MEDIA_TYPE_VOICE, "voice", caption, reply_to)
+        return await self._send_media(
+            chat_id, audio_path, MEDIA_TYPE_VOICE, "voice", caption, reply_to
+        )
 
     async def send_video(
-        self,
-        chat_id: str,
-        video_path: str,
-        caption: Optional[str] = None,
-        reply_to: Optional[str] = None,
-        **kwargs,
+            self,
+            chat_id: str,
+            video_path: str,
+            caption: Optional[str] = None,
+            reply_to: Optional[str] = None,
+            **kwargs,
     ) -> SendResult:
         """Send a video natively."""
         del kwargs
-        return await self._send_media(chat_id, video_path, MEDIA_TYPE_VIDEO, "video", caption, reply_to)
+        return await self._send_media(
+            chat_id, video_path, MEDIA_TYPE_VIDEO, "video", caption, reply_to
+        )
 
     async def send_document(
-        self,
-        chat_id: str,
-        file_path: str,
-        caption: Optional[str] = None,
-        file_name: Optional[str] = None,
-        reply_to: Optional[str] = None,
-        **kwargs,
+            self,
+            chat_id: str,
+            file_path: str,
+            caption: Optional[str] = None,
+            file_name: Optional[str] = None,
+            reply_to: Optional[str] = None,
+            **kwargs,
     ) -> SendResult:
         """Send a file/document natively."""
         del kwargs
-        return await self._send_media(chat_id, file_path, MEDIA_TYPE_FILE, "file", caption, reply_to,
-                                       file_name=file_name)
+        return await self._send_media(
+            chat_id,
+            file_path,
+            MEDIA_TYPE_FILE,
+            "file",
+            caption,
+            reply_to,
+            file_name=file_name,
+        )
 
     async def _send_media(
-        self,
-        chat_id: str,
-        media_source: str,
-        file_type: int,
-        kind: str,
-        caption: Optional[str] = None,
-        reply_to: Optional[str] = None,
-        file_name: Optional[str] = None,
+            self,
+            chat_id: str,
+            media_source: str,
+            file_type: int,
+            kind: str,
+            caption: Optional[str] = None,
+            reply_to: Optional[str] = None,
+            file_name: Optional[str] = None,
     ) -> SendResult:
         """Upload media and send as a native message."""
         if not self.is_connected:
@@ -1787,20 +2134,30 @@ class QQAdapter(BasePlatformAdapter):
 
         try:
             # Resolve media source
-            data, content_type, resolved_name = await self._load_media(media_source, file_name)
+            data, content_type, resolved_name = await self._load_media(
+                media_source, file_name
+            )
 
             # Route
             chat_type = self._guess_chat_type(chat_id)
-            target_path = f"/v2/users/{chat_id}/files" if chat_type == "c2c" else f"/v2/groups/{chat_id}/files"
+            target_path = (
+                f"/v2/users/{chat_id}/files"
+                if chat_type == "c2c"
+                else f"/v2/groups/{chat_id}/files"
+            )
 
             if chat_type == "guild":
                 # Guild channels don't support native media upload in the same way
                 # Send as URL fallback
-                return SendResult(success=False, error="Guild media send not supported via this path")
+                return SendResult(
+                    success=False, error="Guild media send not supported via this path"
+                )
 
             # Upload
             upload = await self._upload_media(
-                chat_type, chat_id, file_type,
+                chat_type,
+                chat_id,
+                file_type,
                 file_data=data if not self._is_url(media_source) else None,
                 url=media_source if self._is_url(media_source) else None,
                 srv_send_msg=False,
@@ -1809,7 +2166,9 @@ class QQAdapter(BasePlatformAdapter):
 
             file_info = upload.get("file_info")
             if not file_info:
-                return SendResult(success=False, error=f"Upload returned no file_info: {upload}")
+                return SendResult(
+                    success=False, error=f"Upload returned no file_info: {upload}"
+                )
 
             # Send media message
             msg_seq = self._next_msg_seq(chat_id)
@@ -1819,13 +2178,17 @@ class QQAdapter(BasePlatformAdapter):
                 "msg_seq": msg_seq,
             }
             if caption:
-                body["content"] = caption[:self.MAX_MESSAGE_LENGTH]
+                body["content"] = caption[: self.MAX_MESSAGE_LENGTH]
             if reply_to:
                 body["msg_id"] = reply_to
 
             send_data = await self._api_request(
                 "POST",
-                f"/v2/users/{chat_id}/messages" if chat_type == "c2c" else f"/v2/groups/{chat_id}/messages",
+                (
+                    f"/v2/users/{chat_id}/messages"
+                    if chat_type == "c2c"
+                    else f"/v2/groups/{chat_id}/messages"
+                ),
                 body,
             )
             return SendResult(
@@ -1834,11 +2197,11 @@ class QQAdapter(BasePlatformAdapter):
                 raw_response=send_data,
             )
         except Exception as exc:
-            logger.error("[%s] Media send failed: %s", self.name, exc)
+            logger.error("[%s] Media send failed: %s", self._log_tag, exc)
             return SendResult(success=False, error=str(exc))
 
     async def _load_media(
-        self, source: str, file_name: Optional[str] = None
+            self, source: str, file_name: Optional[str] = None
     ) -> Tuple[str, str, str]:
         """Load media from URL or local path. Returns (base64_or_url, content_type, filename)."""
         source = str(source).strip()
@@ -1869,7 +2232,9 @@ class QQAdapter(BasePlatformAdapter):
 
         raw = local_path.read_bytes()
         resolved_name = file_name or local_path.name
-        content_type = mimetypes.guess_type(str(local_path))[0] or "application/octet-stream"
+        content_type = (
+                mimetypes.guess_type(str(local_path))[0] or "application/octet-stream"
+        )
         b64 = base64.b64encode(raw).decode("ascii")
         return b64, content_type, resolved_name
 
@@ -1906,13 +2271,16 @@ class QQAdapter(BasePlatformAdapter):
             body = {
                 "msg_type": MSG_TYPE_INPUT_NOTIFY,
                 "msg_id": msg_id,
-                "input_notify": {"input_type": 1, "input_second": self._TYPING_INPUT_SECONDS},
+                "input_notify": {
+                    "input_type": 1,
+                    "input_second": self._TYPING_INPUT_SECONDS,
+                },
                 "msg_seq": msg_seq,
             }
             await self._api_request("POST", f"/v2/users/{chat_id}/messages", body)
             self._typing_sent_at[chat_id] = now
         except Exception as exc:
-            logger.debug("[%s] send_typing failed: %s", self.name, exc)
+            logger.debug("[%s] send_typing failed: %s", self._log_tag, exc)
 
     # ------------------------------------------------------------------
     # Format
@@ -1959,7 +2327,8 @@ class QQAdapter(BasePlatformAdapter):
         """Strip the @bot mention prefix from group message content."""
         # QQ group @-messages may have the bot's QQ/ID as prefix
         import re
-        stripped = re.sub(r'^@\S+\s*', '', content.strip())
+
+        stripped = re.sub(r"^@\S+\s*", "", content.strip())
         return stripped
 
     def _is_dm_allowed(self, user_id: str) -> bool:

From c4cdf3b861d8e08f948cbf0366b36b92ce556aae Mon Sep 17 00:00:00 2001
From: WideLee <limkuan24@gmail.com>
Date: Fri, 17 Apr 2026 18:26:47 +0800
Subject: [PATCH 11/20] refactor(qqbot): change setup method selection
 prompt_choice style

---
 hermes_cli/gateway.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py
index 35a8745a9..d53b5ca0e 100644
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@@ -2638,12 +2638,19 @@ def _setup_qqbot():
         if not prompt_yes_no("  Reconfigure QQ Bot?", False):
             return
 
-    # ── QR scan or manual ──
+    # ── Choose setup method ──
+    print()
+    method_choices = [
+        "Scan QR code to add bot automatically (recommended)",
+        "Enter existing App ID and App Secret manually",
+    ]
+    method_idx = prompt_choice("  How would you like to set up QQ Bot?", method_choices, 0)
+
     credentials = None
     used_qr = False
 
-    print()
-    if prompt_yes_no("  Scan QR code to add bot automatically?", True):
+    if method_idx == 0:
+        # ── QR scan-to-configure ──
         try:
             credentials = _qqbot_qr_flow()
         except KeyboardInterrupt:

From 1648e41c17b237a9dcccef8fecd91a40e4795aad Mon Sep 17 00:00:00 2001
From: WideLee <limkuan24@gmail.com>
Date: Fri, 17 Apr 2026 18:43:11 +0800
Subject: [PATCH 12/20] refactor(qqbot): change qrcode style

---
 hermes_cli/gateway.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py
index d53b5ca0e..f5ebcf031 100644
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@@ -2738,7 +2738,7 @@ def _qqbot_render_qr(url: str) -> bool:
     """Try to render a QR code in the terminal. Returns True if successful."""
     try:
         import qrcode as _qr
-        qr = _qr.QRCode()
+        qr = _qr.QRCode(border=1,error_correction=_qr.constants.ERROR_CORRECT_L)
         qr.add_data(url)
         qr.make(fit=True)
         qr.print_ascii(invert=True)

From 235e6ecc0ed3d533419bb494bd05481a931f71e7 Mon Sep 17 00:00:00 2001
From: WideLee <limkuan24@gmail.com>
Date: Fri, 17 Apr 2026 19:15:15 +0800
Subject: [PATCH 13/20] refactor(qqbot): replace hardcoded log tags with
 self._log_tag and adjust STT log levels

- Remove @staticmethod from _detect_message_type, _convert_silk_to_wav,
  _convert_raw_to_wav, _convert_ffmpeg_to_wav so they can use self._log_tag
- Replace all remaining hardcoded "QQBot" log args with self._log_tag
- Downgrade STT routine flow logs (download, convert, success) from info to debug
- Keep warning level for actual failures (STT failed, ffmpeg error, empty transcript)
---
 gateway/platforms/qqbot/adapter.py | 96 ++++++++++++++----------------
 1 file changed, 46 insertions(+), 50 deletions(-)

diff --git a/gateway/platforms/qqbot/adapter.py b/gateway/platforms/qqbot/adapter.py
index b5a250774..0e3d546fa 100644
--- a/gateway/platforms/qqbot/adapter.py
+++ b/gateway/platforms/qqbot/adapter.py
@@ -833,7 +833,7 @@ class QQAdapter(BasePlatformAdapter):
         attachments_raw = d.get("attachments")
         logger.info(
             "[%s] C2C message: id=%s content=%r attachments=%s",
-            "QQBot",
+            self._log_tag,
             msg_id,
             content[:50] if content else "",
             (
@@ -846,8 +846,8 @@ class QQAdapter(BasePlatformAdapter):
             for _i, _att in enumerate(attachments_raw):
                 if isinstance(_att, dict):
                     logger.info(
-                        "[%s]   attachment[%d]: content_type=%s url=%s filename=%s",
-                        "QQBot",
+                        "[%s] attachment[%d]: content_type=%s url=%s filename=%s",
+                        self._log_tag,
                         _i,
                         _att.get("content_type", ""),
                         str(_att.get("url", ""))[:80],
@@ -877,7 +877,7 @@ class QQAdapter(BasePlatformAdapter):
 
         logger.info(
             "[%s] After processing: images=%d, voice=%d",
-            "QQBot",
+            self._log_tag,
             len(image_urls),
             len(voice_transcripts),
         )
@@ -1072,8 +1072,7 @@ class QQAdapter(BasePlatformAdapter):
     # Attachment processing
     # ------------------------------------------------------------------
 
-    @staticmethod
-    def _detect_message_type(media_urls: list, media_types: list):
+    def _detect_message_type(self, media_urls: list, media_types: list):
         """Determine MessageType from attachment content types."""
         if not media_urls:
             return MessageType.TEXT
@@ -1090,7 +1089,7 @@ class QQAdapter(BasePlatformAdapter):
         # to prevent non-image files from being sent to vision analysis.
         logger.debug(
             "[%s] Unknown media content_type '%s', defaulting to TEXT",
-            "QQBot",
+            self._log_tag,
             first_type,
         )
         return MessageType.TEXT
@@ -1140,7 +1139,7 @@ class QQAdapter(BasePlatformAdapter):
 
             logger.debug(
                 "[%s] Processing attachment: content_type=%s, url=%s, filename=%s",
-                "QQBot",
+                self._log_tag,
                 ct,
                 url[:80],
                 filename,
@@ -1168,9 +1167,9 @@ class QQAdapter(BasePlatformAdapter):
                 )
                 if transcript:
                     voice_transcripts.append(f"[Voice] {transcript}")
-                    logger.info("[%s] Voice transcript: %s", "QQBot", transcript)
+                    logger.debug("[%s] Voice transcript: %s", self._log_tag, transcript)
                 else:
-                    logger.warning("[%s] Voice STT failed for %s", "QQBot", url[:60])
+                    logger.warning("[%s] Voice STT failed for %s", self._log_tag, url[:60])
                     voice_transcripts.append("[Voice] [语音识别失败]")
             elif ct.startswith("image/"):
                 # Image: download and cache locally.
@@ -1182,11 +1181,11 @@ class QQAdapter(BasePlatformAdapter):
                     elif cached_path:
                         logger.warning(
                             "[%s] Cached image path does not exist: %s",
-                            "QQBot",
+                            self._log_tag,
                             cached_path,
                         )
                 except Exception as exc:
-                    logger.debug("[%s] Failed to cache image: %s", "QQBot", exc)
+                    logger.debug("[%s] Failed to cache image: %s", self._log_tag, exc)
             else:
                 # Other attachments (video, file, etc.): record as text.
                 try:
@@ -1194,7 +1193,7 @@ class QQAdapter(BasePlatformAdapter):
                     if cached_path:
                         other_attachments.append(f"[Attachment: {filename or ct}]")
                 except Exception as exc:
-                    logger.debug("[%s] Failed to cache attachment: %s", "QQBot", exc)
+                    logger.debug("[%s] Failed to cache attachment: %s", self._log_tag, exc)
 
         attachment_info = "\n".join(other_attachments) if other_attachments else ""
         return {
@@ -1292,8 +1291,8 @@ class QQAdapter(BasePlatformAdapter):
         """
         # 1. Use QQ's built-in ASR text if available
         if asr_refer_text:
-            logger.info(
-                "[%s] STT: using QQ asr_refer_text: %r", "QQBot", asr_refer_text[:100]
+            logger.debug(
+                "[%s] STT: using QQ asr_refer_text: %r", self._log_tag, asr_refer_text[:100]
             )
             return asr_refer_text
 
@@ -1305,7 +1304,7 @@ class QQAdapter(BasePlatformAdapter):
                 voice_wav_url = f"https:{voice_wav_url}"
             download_url = voice_wav_url
             is_pre_wav = True
-            logger.info("[%s] STT: using voice_wav_url (pre-converted WAV)", "QQBot")
+            logger.debug("[%s] STT: using voice_wav_url (pre-converted WAV)", self._log_tag)
 
         from tools.url_safety import is_safe_url
         if not is_safe_url(download_url):
@@ -1315,13 +1314,13 @@ class QQAdapter(BasePlatformAdapter):
         try:
             # 2. Download audio (QQ CDN requires Authorization header)
             if not self._http_client:
-                logger.warning("[%s] STT: no HTTP client", "QQBot")
+                logger.warning("[%s] STT: no HTTP client", self._log_tag)
                 return None
 
             download_headers = self._qq_media_headers()
-            logger.info(
+            logger.debug(
                 "[%s] STT: downloading voice from %s (pre_wav=%s, headers=%s)",
-                "QQBot",
+                self._log_tag,
                 download_url[:80],
                 is_pre_wav,
                 bool(download_headers),
@@ -1334,9 +1333,9 @@ class QQAdapter(BasePlatformAdapter):
             )
             resp.raise_for_status()
             audio_data = resp.content
-            logger.info(
+            logger.debug(
                 "[%s] STT: downloaded %d bytes, content_type=%s",
-                "QQBot",
+                self._log_tag,
                 len(audio_data),
                 resp.headers.get("content-type", "unknown"),
             )
@@ -1344,7 +1343,7 @@ class QQAdapter(BasePlatformAdapter):
             if len(audio_data) < 10:
                 logger.warning(
                     "[%s] STT: downloaded data too small (%d bytes), skipping",
-                    "QQBot",
+                    self._log_tag,
                     len(audio_data),
                 )
                 return None
@@ -1356,24 +1355,24 @@ class QQAdapter(BasePlatformAdapter):
                 with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
                     tmp.write(audio_data)
                     wav_path = tmp.name
-                logger.info(
+                logger.debug(
                     "[%s] STT: using pre-converted WAV directly (%d bytes)",
-                    "QQBot",
+                    self._log_tag,
                     len(audio_data),
                 )
             else:
-                logger.info(
-                    "[%s] STT: converting to wav, filename=%r", "QQBot", filename
+                logger.debug(
+                    "[%s] STT: converting to wav, filename=%r", self._log_tag, filename
                 )
                 wav_path = await self._convert_audio_to_wav_file(audio_data, filename)
                 if not wav_path or not Path(wav_path).exists():
                     logger.warning(
-                        "[%s] STT: ffmpeg conversion produced no output", "QQBot"
+                        "[%s] STT: ffmpeg conversion produced no output", self._log_tag
                     )
                     return None
 
             # 4. Call STT API
-            logger.info("[%s] STT: calling ASR on %s", "QQBot", wav_path)
+            logger.debug("[%s] STT: calling ASR on %s", self._log_tag, wav_path)
             transcript = await self._call_stt(wav_path)
 
             # 5. Cleanup temp file
@@ -1383,14 +1382,14 @@ class QQAdapter(BasePlatformAdapter):
                 pass
 
             if transcript:
-                logger.info("[%s] STT success: %r", "QQBot", transcript[:100])
+                logger.debug("[%s] STT success: %r", self._log_tag, transcript[:100])
             else:
-                logger.warning("[%s] STT: ASR returned empty transcript", "QQBot")
+                logger.warning("[%s] STT: ASR returned empty transcript", self._log_tag)
             return transcript
         except (httpx.HTTPStatusError, httpx.TransportError, IOError) as exc:
             logger.warning(
                 "[%s] STT failed for voice attachment: %s: %s",
-                "QQBot",
+                self._log_tag,
                 type(exc).__name__,
                 exc,
             )
@@ -1471,8 +1470,7 @@ class QQAdapter(BasePlatformAdapter):
         """Check if bytes look like a SILK audio file."""
         return data[:4] == b"#!SILK" or data[:2] == b"\x02!" or data[:9] == b"#!SILK_V3"
 
-    @staticmethod
-    async def _convert_silk_to_wav(src_path: str, wav_path: str) -> Optional[str]:
+    async def _convert_silk_to_wav(self, src_path: str, wav_path: str) -> Optional[str]:
         """Convert audio file to WAV using the pilk library.
 
         Tries the file as-is first, then as .silk if the extension differs.
@@ -1483,7 +1481,7 @@ class QQAdapter(BasePlatformAdapter):
         except ImportError:
             logger.warning(
                 "[%s] pilk not installed — cannot decode SILK audio. Run: pip install pilk",
-                "QQBot",
+                self._log_tag,
             )
             return None
 
@@ -1491,15 +1489,15 @@ class QQAdapter(BasePlatformAdapter):
         try:
             pilk.silk_to_wav(src_path, wav_path, rate=16000)
             if Path(wav_path).exists() and Path(wav_path).stat().st_size > 44:
-                logger.info(
+                logger.debug(
                     "[%s] pilk converted %s to wav (%d bytes)",
-                    "QQBot",
+                    self._log_tag,
                     Path(src_path).name,
                     Path(wav_path).stat().st_size,
                 )
                 return wav_path
         except Exception as exc:
-            logger.debug("[%s] pilk direct conversion failed: %s", "QQBot", exc)
+            logger.debug("[%s] pilk direct conversion failed: %s", self._log_tag, exc)
 
         # Try renaming to .silk and converting (pilk checks the extension)
         silk_path = src_path.rsplit(".", 1)[0] + ".silk"
@@ -1509,15 +1507,15 @@ class QQAdapter(BasePlatformAdapter):
             shutil.copy2(src_path, silk_path)
             pilk.silk_to_wav(silk_path, wav_path, rate=16000)
             if Path(wav_path).exists() and Path(wav_path).stat().st_size > 44:
-                logger.info(
+                logger.debug(
                     "[%s] pilk converted %s (as .silk) to wav (%d bytes)",
-                    "QQBot",
+                    self._log_tag,
                     Path(src_path).name,
                     Path(wav_path).stat().st_size,
                 )
                 return wav_path
         except Exception as exc:
-            logger.debug("[%s] pilk .silk conversion failed: %s", "QQBot", exc)
+            logger.debug("[%s] pilk .silk conversion failed: %s", self._log_tag, exc)
         finally:
             try:
                 os.unlink(silk_path)
@@ -1526,8 +1524,7 @@ class QQAdapter(BasePlatformAdapter):
 
         return None
 
-    @staticmethod
-    async def _convert_raw_to_wav(audio_data: bytes, wav_path: str) -> Optional[str]:
+    async def _convert_raw_to_wav(self, audio_data: bytes, wav_path: str) -> Optional[str]:
         """Last resort: try writing audio data as raw PCM 16-bit mono 16kHz WAV.
 
         This will produce garbage if the data isn't raw PCM, but at least
@@ -1543,11 +1540,10 @@ class QQAdapter(BasePlatformAdapter):
                 wf.writeframes(audio_data)
             return wav_path
         except Exception as exc:
-            logger.debug("[%s] raw PCM fallback failed: %s", "QQBot", exc)
+            logger.debug("[%s] raw PCM fallback failed: %s", self._log_tag, exc)
             return None
 
-    @staticmethod
-    async def _convert_ffmpeg_to_wav(src_path: str, wav_path: str) -> Optional[str]:
+    async def _convert_ffmpeg_to_wav(self, src_path: str, wav_path: str) -> Optional[str]:
         """Convert audio file to WAV using ffmpeg."""
         try:
             proc = await asyncio.create_subprocess_exec(
@@ -1568,25 +1564,25 @@ class QQAdapter(BasePlatformAdapter):
                 stderr = await proc.stderr.read() if proc.stderr else b""
                 logger.warning(
                     "[%s] ffmpeg failed for %s: %s",
-                    "QQBot",
+                    self._log_tag,
                     Path(src_path).name,
                     stderr[:200].decode(errors="replace"),
                 )
                 return None
         except (asyncio.TimeoutError, FileNotFoundError) as exc:
-            logger.warning("[%s] ffmpeg conversion error: %s", "QQBot", exc)
+            logger.warning("[%s] ffmpeg conversion error: %s", self._log_tag, exc)
             return None
 
         if not Path(wav_path).exists() or Path(wav_path).stat().st_size <= 44:
             logger.warning(
                 "[%s] ffmpeg produced no/small output for %s",
-                "QQBot",
+                self._log_tag,
                 Path(src_path).name,
             )
             return None
-        logger.info(
+        logger.debug(
             "[%s] ffmpeg converted %s to wav (%d bytes)",
-            "QQBot",
+            self._log_tag,
             Path(src_path).name,
             Path(wav_path).stat().st_size,
         )

From 6fd58e1e4a6a5fa5f227673b5f586899c5cf7d73 Mon Sep 17 00:00:00 2001
From: WideLee <limkuan24@gmail.com>
Date: Fri, 17 Apr 2026 20:29:33 +0800
Subject: [PATCH 14/20] refactor(qqbot): replace log tags with self._log_tag

---
 gateway/platforms/qqbot/adapter.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gateway/platforms/qqbot/adapter.py b/gateway/platforms/qqbot/adapter.py
index 0e3d546fa..286d8347e 100644
--- a/gateway/platforms/qqbot/adapter.py
+++ b/gateway/platforms/qqbot/adapter.py
@@ -1860,15 +1860,15 @@ class QQAdapter(BasePlatformAdapter):
         Returns True if reconnected, False if still disconnected.
         """
         logger.info("[%s] Not connected — waiting for reconnection (up to %.0fs)",
-                    self.name, self._RECONNECT_WAIT_SECONDS)
+                    self._log_tag, self._RECONNECT_WAIT_SECONDS)
         waited = 0.0
         while waited < self._RECONNECT_WAIT_SECONDS:
             await asyncio.sleep(self._RECONNECT_POLL_INTERVAL)
             waited += self._RECONNECT_POLL_INTERVAL
             if self.is_connected:
-                logger.info("[%s] Reconnected after %.1fs", self.name, waited)
+                logger.info("[%s] Reconnected after %.1fs", self._log_tag, waited)
                 return True
-        logger.warning("[%s] Still not connected after %.0fs", self.name, self._RECONNECT_WAIT_SECONDS)
+        logger.warning("[%s] Still not connected after %.0fs", self._log_tag, self._RECONNECT_WAIT_SECONDS)
         return False
 
     async def send(

From 287d3e12c71e5579da03f81bb77a0d5eaec4cfba Mon Sep 17 00:00:00 2001
From: WideLee <limkuan24@gmail.com>
Date: Fri, 17 Apr 2026 20:31:47 +0800
Subject: [PATCH 15/20] chore: add author map

---
 scripts/release.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/release.py b/scripts/release.py
index 880aebef9..c6d906436 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -257,6 +257,7 @@ AUTHOR_MAP = {
     "shenuu@gmail.com": "shenuu",
     "xiayh17@gmail.com": "xiayh0107",
     "asurla@nvidia.com": "anniesurla",
+    "limkuan24@gmail.com": "WideLee",
 }
 
 

From 103beea7a693611cd375b944df1752e723956646 Mon Sep 17 00:00:00 2001
From: WideLee <limkuan24@gmail.com>
Date: Fri, 17 Apr 2026 20:42:41 +0800
Subject: [PATCH 16/20] fix(qqbot): fix test failures after package refactor

- Re-export _ssrf_redirect_guard from __init__.py
- Fix _parse_json @staticmethod using self._log_tag
- Update test_detect_message_type to call as instance method
- Fix mock.patch path for httpx.AsyncClient in adapter submodule
---
 gateway/platforms/qqbot/__init__.py | 2 ++
 gateway/platforms/qqbot/adapter.py  | 2 +-
 tests/gateway/test_qqbot.py         | 5 +++--
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/gateway/platforms/qqbot/__init__.py b/gateway/platforms/qqbot/__init__.py
index 4877baa53..7119dd979 100644
--- a/gateway/platforms/qqbot/__init__.py
+++ b/gateway/platforms/qqbot/__init__.py
@@ -20,6 +20,7 @@ from .adapter import (  # noqa: F401
     QQCloseError,
     check_qq_requirements,
     _coerce_list,
+    _ssrf_redirect_guard,
 )
 
 # -- Onboard (QR-code scan-to-configure) -----------------------------------
@@ -40,6 +41,7 @@ __all__ = [
     "QQCloseError",
     "check_qq_requirements",
     "_coerce_list",
+    "_ssrf_redirect_guard",
     # onboard
     "BindStatus",
     "create_bind_task",
diff --git a/gateway/platforms/qqbot/adapter.py b/gateway/platforms/qqbot/adapter.py
index 286d8347e..5a0483453 100644
--- a/gateway/platforms/qqbot/adapter.py
+++ b/gateway/platforms/qqbot/adapter.py
@@ -766,7 +766,7 @@ class QQAdapter(BasePlatformAdapter):
         try:
             payload = json.loads(raw)
         except Exception:
-            logger.debug("[%s] Failed to parse JSON: %r", self._log_tag, raw)
+            logger.warning("[QQBot] Failed to parse JSON: %r", raw)
             return None
         return payload if isinstance(payload, dict) else None
 
diff --git a/tests/gateway/test_qqbot.py b/tests/gateway/test_qqbot.py
index 18b1b59b7..6446762ec 100644
--- a/tests/gateway/test_qqbot.py
+++ b/tests/gateway/test_qqbot.py
@@ -179,7 +179,7 @@ class TestVoiceAttachmentSSRFProtection:
         from gateway.platforms.qqbot import QQAdapter, _ssrf_redirect_guard
 
         client = mock.AsyncMock()
-        with mock.patch("gateway.platforms.qqbot.httpx.AsyncClient", return_value=client) as async_client_cls:
+        with mock.patch("gateway.platforms.qqbot.adapter.httpx.AsyncClient", return_value=client) as async_client_cls:
             adapter = QQAdapter(_make_config(app_id="a", client_secret="b"))
             adapter._ensure_token = mock.AsyncMock(side_effect=RuntimeError("stop after client creation"))
 
@@ -316,7 +316,8 @@ class TestResolveSTTConfig:
 class TestDetectMessageType:
     def _fn(self, media_urls, media_types):
         from gateway.platforms.qqbot import QQAdapter
-        return QQAdapter._detect_message_type(media_urls, media_types)
+        adapter = QQAdapter(_make_config(app_id="a", client_secret="b"))
+        return adapter._detect_message_type(media_urls, media_types)
 
     def test_no_media(self):
         from gateway.platforms.base import MessageType

From d2206c69cc628752c6cdbaa53f8f178c331c6f30 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Fri, 17 Apr 2026 15:29:15 -0700
Subject: [PATCH 17/20] fix(qqbot): add back-compat for env var rename; drop
 qrcode core dep
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow-up to WideLee's salvaged PR #11582.

Back-compat for QQ_HOME_CHANNEL → QQBOT_HOME_CHANNEL rename:
  - gateway/config.py reads QQBOT_HOME_CHANNEL, falls back to QQ_HOME_CHANNEL
    with a one-shot deprecation warning so users on the old name aren't
    silently broken.
  - cron/scheduler.py: _HOME_TARGET_ENV_VARS['qqbot'] now maps to the new
    name; _get_home_target_chat_id falls back to the legacy name via a
    _LEGACY_HOME_TARGET_ENV_VARS table.
  - hermes_cli/status.py + hermes_cli/setup.py: honor both names when
    displaying or checking for missing home channels.
  - hermes_cli/config.py: keep legacy QQ_HOME_CHANNEL[_NAME] in
    _EXTRA_ENV_KEYS so .env sanitization still recognizes them.

Scope cleanup:
  - Drop qrcode from core dependencies and requirements.txt (remains in
    messaging/dingtalk/feishu extras). _qqbot_render_qr already degrades
    gracefully when qrcode is missing, printing a 'pip install qrcode' tip
    and falling back to URL-only display.
  - Restore @staticmethod on QQAdapter._detect_message_type (it doesn't
    use self). Revert the test change that was only needed when it was
    converted to an instance method.
  - Reset uv.lock to origin/main; the PR's stale lock also included
    unrelated changes (atroposlib source URL, hermes-agent version bump,
    fastapi additions) that don't belong.

Verified E2E:
  - Existing user (QQ_HOME_CHANNEL set): gateway + cron both pick up the
    legacy name; deprecation warning logs once.
  - Fresh user (QQBOT_HOME_CHANNEL set): gateway + cron use new name,
    no warning.
  - Both set: new name wins on both surfaces.

Targeted tests: 296 passed, 4 skipped (qqbot + cron + hermes_cli).
---
 cron/scheduler.py                  | 17 +++++++++++--
 gateway/config.py                  | 14 +++++++++-
 gateway/platforms/qqbot/adapter.py |  3 ++-
 hermes_cli/config.py               |  1 +
 hermes_cli/setup.py                |  4 ++-
 hermes_cli/status.py               |  3 +++
 pyproject.toml                     |  2 --
 requirements.txt                   |  1 -
 tests/gateway/test_qqbot.py        |  3 +--
 uv.lock                            | 41 +++++++-----------------------
 10 files changed, 47 insertions(+), 42 deletions(-)

diff --git a/cron/scheduler.py b/cron/scheduler.py
index 28c905713..db5991c6f 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -65,7 +65,15 @@ _HOME_TARGET_ENV_VARS = {
     "wecom": "WECOM_HOME_CHANNEL",
     "weixin": "WEIXIN_HOME_CHANNEL",
     "bluebubbles": "BLUEBUBBLES_HOME_CHANNEL",
-    "qqbot": "QQ_HOME_CHANNEL",
+    "qqbot": "QQBOT_HOME_CHANNEL",
+}
+
+# Legacy env var names kept for back-compat.  Each entry is the current
+# primary env var → the previous name.  _get_home_target_chat_id falls
+# back to the legacy name if the primary is unset, so users who set the
+# old name before the rename keep working until they migrate.
+_LEGACY_HOME_TARGET_ENV_VARS = {
+    "QQBOT_HOME_CHANNEL": "QQ_HOME_CHANNEL",
 }
 
 from cron.jobs import get_due_jobs, mark_job_run, save_job_output, advance_next_run
@@ -100,7 +108,12 @@ def _get_home_target_chat_id(platform_name: str) -> str:
     env_var = _HOME_TARGET_ENV_VARS.get(platform_name.lower())
     if not env_var:
         return ""
-    return os.getenv(env_var, "")
+    value = os.getenv(env_var, "")
+    if not value:
+        legacy = _LEGACY_HOME_TARGET_ENV_VARS.get(env_var)
+        if legacy:
+            value = os.getenv(legacy, "")
+    return value
 
 
 def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[dict]:
diff --git a/gateway/config.py b/gateway/config.py
index d6a196e60..2d7407323 100644
--- a/gateway/config.py
+++ b/gateway/config.py
@@ -1230,11 +1230,23 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
         if qq_group_allowed:
             extra["group_allow_from"] = qq_group_allowed
         qq_home = os.getenv("QQBOT_HOME_CHANNEL", "").strip()
+        qq_home_name_env = "QQBOT_HOME_CHANNEL_NAME"
+        if not qq_home:
+            # Back-compat: accept the pre-rename name and log a one-time warning.
+            legacy_home = os.getenv("QQ_HOME_CHANNEL", "").strip()
+            if legacy_home:
+                qq_home = legacy_home
+                qq_home_name_env = "QQ_HOME_CHANNEL_NAME"
+                import logging
+                logging.getLogger(__name__).warning(
+                    "QQ_HOME_CHANNEL is deprecated; rename to QQBOT_HOME_CHANNEL "
+                    "in your .env for consistency with the platform key."
+                )
         if qq_home:
             config.platforms[Platform.QQBOT].home_channel = HomeChannel(
                 platform=Platform.QQBOT,
                 chat_id=qq_home,
-                name=os.getenv("QQBOT_HOME_CHANNEL_NAME", "Home"),
+                name=os.getenv("QQBOT_HOME_CHANNEL_NAME") or os.getenv(qq_home_name_env, "Home"),
             )
 
     # Session settings
diff --git a/gateway/platforms/qqbot/adapter.py b/gateway/platforms/qqbot/adapter.py
index 5a0483453..ced744271 100644
--- a/gateway/platforms/qqbot/adapter.py
+++ b/gateway/platforms/qqbot/adapter.py
@@ -1072,7 +1072,8 @@ class QQAdapter(BasePlatformAdapter):
     # Attachment processing
     # ------------------------------------------------------------------
 
-    def _detect_message_type(self, media_urls: list, media_types: list):
+    @staticmethod
+    def _detect_message_type(media_urls: list, media_types: list):
         """Determine MessageType from attachment content types."""
         if not media_urls:
             return MessageType.TEXT
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 156e99f2d..1670156b2 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -45,6 +45,7 @@ _EXTRA_ENV_KEYS = frozenset({
     "WEIXIN_ALLOWED_USERS", "WEIXIN_GROUP_ALLOWED_USERS", "WEIXIN_ALLOW_ALL_USERS",
     "BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_PASSWORD",
     "QQ_APP_ID", "QQ_CLIENT_SECRET", "QQBOT_HOME_CHANNEL", "QQBOT_HOME_CHANNEL_NAME",
+    "QQ_HOME_CHANNEL", "QQ_HOME_CHANNEL_NAME",  # legacy aliases (pre-rename, still read for back-compat)
     "QQ_ALLOWED_USERS", "QQ_GROUP_ALLOWED_USERS", "QQ_ALLOW_ALL_USERS", "QQ_MARKDOWN_SUPPORT",
     "QQ_STT_API_KEY", "QQ_STT_BASE_URL", "QQ_STT_MODEL",
     "TERMINAL_ENV", "TERMINAL_SSH_KEY", "TERMINAL_SSH_PORT",
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 9c0ee0bff..6b4fc5d73 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -2215,7 +2215,9 @@ def setup_gateway(config: dict):
             missing_home.append("Slack")
         if get_env_value("BLUEBUBBLES_SERVER_URL") and not get_env_value("BLUEBUBBLES_HOME_CHANNEL"):
             missing_home.append("BlueBubbles")
-        if get_env_value("QQ_APP_ID") and not get_env_value("QQBOT_HOME_CHANNEL"):
+        if get_env_value("QQ_APP_ID") and not (
+            get_env_value("QQBOT_HOME_CHANNEL") or get_env_value("QQ_HOME_CHANNEL")
+        ):
             missing_home.append("QQBot")
 
         if missing_home:
diff --git a/hermes_cli/status.py b/hermes_cli/status.py
index 8fafbc2f4..bc3290d56 100644
--- a/hermes_cli/status.py
+++ b/hermes_cli/status.py
@@ -327,6 +327,9 @@ def show_status(args):
         home_channel = ""
         if home_var:
             home_channel = os.getenv(home_var, "")
+        # Back-compat: QQBot home channel was renamed from QQ_HOME_CHANNEL to QQBOT_HOME_CHANNEL
+        if not home_channel and home_var == "QQBOT_HOME_CHANNEL":
+            home_channel = os.getenv("QQ_HOME_CHANNEL", "")
         
         status = "configured" if has_token else "not configured"
         if home_channel:
diff --git a/pyproject.toml b/pyproject.toml
index d97c10810..0cac0b6b7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,8 +34,6 @@ dependencies = [
   "edge-tts>=7.2.7,<8",
   # Skills Hub (GitHub App JWT auth — optional, only needed for bot identity)
   "PyJWT[crypto]>=2.12.0,<3",  # CVE-2026-32597
-  # QR code rendering for scan-to-configure flows
-  "qrcode>=7.4,<9",
 ]
 
 [project.optional-dependencies]
diff --git a/requirements.txt b/requirements.txt
index 74f42d6c8..96f48e77f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -34,4 +34,3 @@ croniter
 python-telegram-bot[webhooks]>=22.6
 discord.py>=2.0
 aiohttp>=3.9.0
-qrcode
diff --git a/tests/gateway/test_qqbot.py b/tests/gateway/test_qqbot.py
index 6446762ec..a5aeb6251 100644
--- a/tests/gateway/test_qqbot.py
+++ b/tests/gateway/test_qqbot.py
@@ -316,8 +316,7 @@ class TestResolveSTTConfig:
 class TestDetectMessageType:
     def _fn(self, media_urls, media_types):
         from gateway.platforms.qqbot import QQAdapter
-        adapter = QQAdapter(_make_config(app_id="a", client_secret="b"))
-        return adapter._detect_message_type(media_urls, media_types)
+        return QQAdapter._detect_message_type(media_urls, media_types)
 
     def test_no_media(self):
         from gateway.platforms.base import MessageType
diff --git a/uv.lock b/uv.lock
index fa6785aa5..45efc2d93 100644
--- a/uv.lock
+++ b/uv.lock
@@ -300,7 +300,7 @@ wheels = [
 [[package]]
 name = "atroposlib"
 version = "0.4.0"
-source = { git = "https://github.com/NousResearch/atropos.git?rev=c20c85256e5a45ad31edf8b7276e9c5ee1995a30#c20c85256e5a45ad31edf8b7276e9c5ee1995a30" }
+source = { git = "https://github.com/NousResearch/atropos.git#c421582b6f7ce8a32f751aab3117d3824ac8f709" }
 dependencies = [
     { name = "aiofiles" },
     { name = "aiohttp" },
@@ -1699,7 +1699,7 @@ wheels = [
 
 [[package]]
 name = "hermes-agent"
-version = "0.9.0"
+version = "0.8.0"
 source = { editable = "." }
 dependencies = [
     { name = "anthropic" },
@@ -1717,7 +1717,6 @@ dependencies = [
     { name = "pyjwt", extra = ["crypto"] },
     { name = "python-dotenv" },
     { name = "pyyaml" },
-    { name = "qrcode" },
     { name = "requests" },
     { name = "rich" },
     { name = "tenacity" },
@@ -1738,7 +1737,6 @@ all = [
     { name = "dingtalk-stream" },
     { name = "discord-py", extra = ["voice"] },
     { name = "elevenlabs" },
-    { name = "fastapi" },
     { name = "faster-whisper" },
     { name = "honcho-ai" },
     { name = "lark-oapi" },
@@ -1758,7 +1756,6 @@ all = [
     { name = "slack-bolt" },
     { name = "slack-sdk" },
     { name = "sounddevice" },
-    { name = "uvicorn", extra = ["standard"] },
 ]
 cli = [
     { name = "simple-term-menu" },
@@ -1845,10 +1842,6 @@ voice = [
     { name = "numpy" },
     { name = "sounddevice" },
 ]
-web = [
-    { name = "fastapi" },
-    { name = "uvicorn", extra = ["standard"] },
-]
 yc-bench = [
     { name = "yc-bench", marker = "python_full_version >= '3.12'" },
 ]
@@ -1862,7 +1855,7 @@ requires-dist = [
     { name = "aiosqlite", marker = "extra == 'matrix'", specifier = ">=0.20" },
     { name = "anthropic", specifier = ">=0.39.0,<1" },
     { name = "asyncpg", marker = "extra == 'matrix'", specifier = ">=0.29" },
-    { name = "atroposlib", marker = "extra == 'rl'", git = "https://github.com/NousResearch/atropos.git?rev=c20c85256e5a45ad31edf8b7276e9c5ee1995a30" },
+    { name = "atroposlib", marker = "extra == 'rl'", git = "https://github.com/NousResearch/atropos.git" },
     { name = "croniter", marker = "extra == 'cron'", specifier = ">=6.0.0,<7" },
     { name = "daytona", marker = "extra == 'daytona'", specifier = ">=0.148.0,<1" },
     { name = "debugpy", marker = "extra == 'dev'", specifier = ">=1.8.0,<2" },
@@ -1873,7 +1866,6 @@ requires-dist = [
     { name = "exa-py", specifier = ">=2.9.0,<3" },
     { name = "fal-client", specifier = ">=0.13.1,<1" },
     { name = "fastapi", marker = "extra == 'rl'", specifier = ">=0.104.0,<1" },
-    { name = "fastapi", marker = "extra == 'web'", specifier = ">=0.104.0,<1" },
     { name = "faster-whisper", marker = "extra == 'voice'", specifier = ">=1.0.0,<2" },
     { name = "fire", specifier = ">=0.7.1,<1" },
     { name = "firecrawl-py", specifier = ">=4.16.0,<5" },
@@ -1902,7 +1894,6 @@ requires-dist = [
     { name = "hermes-agent", extras = ["sms"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["tts-premium"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["voice"], marker = "extra == 'all'" },
-    { name = "hermes-agent", extras = ["web"], marker = "extra == 'all'" },
     { name = "honcho-ai", marker = "extra == 'honcho'", specifier = ">=2.0.1,<3" },
     { name = "httpx", extras = ["socks"], specifier = ">=0.28.1,<1" },
     { name = "jinja2", specifier = ">=3.1.5,<4" },
@@ -1927,7 +1918,6 @@ requires-dist = [
     { name = "python-telegram-bot", extras = ["webhooks"], marker = "extra == 'messaging'", specifier = ">=22.6,<23" },
     { name = "pywinpty", marker = "sys_platform == 'win32' and extra == 'pty'", specifier = ">=2.0.0,<3" },
     { name = "pyyaml", specifier = ">=6.0.2,<7" },
-    { name = "qrcode", specifier = ">=7.4,<9" },
     { name = "requests", specifier = ">=2.33.0,<3" },
     { name = "rich", specifier = ">=14.3.3,<15" },
     { name = "simple-term-menu", marker = "extra == 'cli'", specifier = ">=1.0,<2" },
@@ -1937,13 +1927,12 @@ requires-dist = [
     { name = "slack-sdk", marker = "extra == 'slack'", specifier = ">=3.27.0,<4" },
     { name = "sounddevice", marker = "extra == 'voice'", specifier = ">=0.4.6,<1" },
     { name = "tenacity", specifier = ">=9.1.4,<10" },
-    { name = "tinker", marker = "extra == 'rl'", git = "https://github.com/thinking-machines-lab/tinker.git?rev=30517b667f18a3dfb7ef33fb56cf686d5820ba2b" },
+    { name = "tinker", marker = "extra == 'rl'", git = "https://github.com/thinking-machines-lab/tinker.git" },
     { name = "uvicorn", extras = ["standard"], marker = "extra == 'rl'", specifier = ">=0.24.0,<1" },
-    { name = "uvicorn", extras = ["standard"], marker = "extra == 'web'", specifier = ">=0.24.0,<1" },
     { name = "wandb", marker = "extra == 'rl'", specifier = ">=0.15.0,<1" },
-    { name = "yc-bench", marker = "python_full_version >= '3.12' and extra == 'yc-bench'", git = "https://github.com/collinear-ai/yc-bench.git?rev=bfb0c88062450f46341bd9a5298903fc2e952a5c" },
+    { name = "yc-bench", marker = "python_full_version >= '3.12' and extra == 'yc-bench'", git = "https://github.com/collinear-ai/yc-bench.git" },
 ]
-provides-extras = ["modal", "daytona", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "acp", "mistral", "termux", "dingtalk", "feishu", "web", "rl", "yc-bench", "all"]
+provides-extras = ["modal", "daytona", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "acp", "mistral", "termux", "dingtalk", "feishu", "rl", "yc-bench", "all"]
 
 [[package]]
 name = "hf-transfer"
@@ -4171,18 +4160,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" },
 ]
 
-[[package]]
-name = "qrcode"
-version = "8.2"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/8f/b2/7fc2931bfae0af02d5f53b174e9cf701adbb35f39d69c2af63d4a39f81a9/qrcode-8.2.tar.gz", hash = "sha256:35c3f2a4172b33136ab9f6b3ef1c00260dd2f66f858f24d88418a015f446506c", size = 43317, upload-time = "2025-05-01T15:44:24.726Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/dd/b8/d2d6d731733f51684bbf76bf34dab3b70a9148e8f2cef2bb544fccec681a/qrcode-8.2-py3-none-any.whl", hash = "sha256:16e64e0716c14960108e85d853062c9e8bba5ca8252c0b4d0231b9df4060ff4f", size = 45986, upload-time = "2025-05-01T15:44:22.781Z" },
-]
-
 [[package]]
 name = "referencing"
 version = "0.37.0"
@@ -4799,8 +4776,8 @@ wheels = [
 
 [[package]]
 name = "tinker"
-version = "0.18.0"
-source = { git = "https://github.com/thinking-machines-lab/tinker.git?rev=30517b667f18a3dfb7ef33fb56cf686d5820ba2b#30517b667f18a3dfb7ef33fb56cf686d5820ba2b" }
+version = "0.16.1"
+source = { git = "https://github.com/thinking-machines-lab/tinker.git#07bd3c2dd3cd4398ac1c26f0ec0deccbf3c1f913" }
 dependencies = [
     { name = "anyio" },
     { name = "click" },
@@ -5513,7 +5490,7 @@ wheels = [
 [[package]]
 name = "yc-bench"
 version = "0.1.0"
-source = { git = "https://github.com/collinear-ai/yc-bench.git?rev=bfb0c88062450f46341bd9a5298903fc2e952a5c#bfb0c88062450f46341bd9a5298903fc2e952a5c" }
+source = { git = "https://github.com/collinear-ai/yc-bench.git#0c53c98f01a431db2e391482bc46013045854ab2" }
 dependencies = [
     { name = "litellm", marker = "python_full_version >= '3.12'" },
     { name = "matplotlib", marker = "python_full_version >= '3.12'" },

From c6fd2619f792ed563434356891f318f9d6e1ee4f Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 17 Apr 2026 15:34:12 -0700
Subject: [PATCH 18/20] fix(gemini-cli): surface MODEL_CAPACITY_EXHAUSTED
 cleanly + drop retired gemma-4-26b (#11833)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Google-side 429 Code Assist errors now flow through Hermes' normal rate-limit
path (status_code on the exception, Retry-After preserved via error.response)
instead of being opaque RuntimeErrors. User sees a one-line capacity message
instead of a 500-char JSON dump.

Changes
- CodeAssistError grows status_code / response / retry_after / details attrs.
  _extract_status_code in error_classifier picks up status_code and classifies
  429 as FailoverReason.rate_limit, so fallback_providers triggers the same
  way it does for SDK errors. run_agent.py line ~10428 already walks
  error.response.headers for Retry-After — preserving the response means that
  path just works.
- _gemini_http_error parses the Google error envelope (error.status +
  error.details[].reason from google.rpc.ErrorInfo, retryDelay from
  google.rpc.RetryInfo). MODEL_CAPACITY_EXHAUSTED / RESOURCE_EXHAUSTED / 404
  model-not-found each produce a human-readable message; unknown shapes fall
  back to the previous raw-body format.
- Drop gemma-4-26b-it from hermes_cli/models.py, hermes_cli/setup.py, and
  agent/model_metadata.py — Google returned 404 for it today in local repro.
  Kept gemma-4-31b-it (capacity-constrained but not retired).

Validation
|                           | Before                         | After                                     |
|---------------------------|--------------------------------|-------------------------------------------|
| Error message             | 'Code Assist returned HTTP 429: {500 chars JSON}' | 'Gemini capacity exhausted for gemini-2.5-pro (Google-side throttle...)' |
| status_code on error      | None (opaque RuntimeError)     | 429                                       |
| Classifier reason         | unknown (string-match fallback) | FailoverReason.rate_limit                |
| Retry-After honored       | ignored                        | extracted from RetryInfo or header        |
| gemma-4-26b-it picker     | advertised (404s on Google)    | removed                                   |

Unit + E2E tests cover non-streaming 429, streaming 429, 404 model-not-found,
Retry-After header fallback, malformed body, and classifier integration.
Targeted suites: tests/agent/test_gemini_cloudcode.py (81 tests), full
tests/hermes_cli (2203 tests) green.

Co-authored-by: teknium1 <teknium@nousresearch.com>
---
 agent/gemini_cloudcode_adapter.py        | 139 +++++++++++++++++++-
 agent/google_code_assist.py              |  38 +++++-
 agent/model_metadata.py                  |   1 -
 hermes_cli/models.py                     |   1 -
 hermes_cli/setup.py                      |   2 +-
 tests/agent/test_gemini_cloudcode.py     | 154 +++++++++++++++++++++++
 tests/hermes_cli/test_gemini_provider.py |   4 -
 7 files changed, 327 insertions(+), 12 deletions(-)

diff --git a/agent/gemini_cloudcode_adapter.py b/agent/gemini_cloudcode_adapter.py
index 36ba288eb..ed687bffd 100644
--- a/agent/gemini_cloudcode_adapter.py
+++ b/agent/gemini_cloudcode_adapter.py
@@ -747,18 +747,149 @@ class GeminiCloudCodeClient:
 
 
 def _gemini_http_error(response: httpx.Response) -> CodeAssistError:
+    """Translate an httpx response into a CodeAssistError with rich metadata.
+
+    Parses Google's error envelope (``{"error": {"code", "message", "status",
+    "details": [...]}}``) so the agent's error classifier can reason about
+    the failure — ``status_code`` enables the rate_limit / auth classification
+    paths, and ``response`` lets the main loop honor ``Retry-After`` just
+    like it does for OpenAI SDK exceptions.
+
+    Also lifts a few recognizable Google conditions into human-readable
+    messages so the user sees something better than a 500-char JSON dump:
+
+        MODEL_CAPACITY_EXHAUSTED → "Gemini model capacity exhausted for
+            <model>. This is a Google-side throttle..."
+        RESOURCE_EXHAUSTED w/o reason → quota-style message
+        404 → "Model <name> not found at cloudcode-pa..."
+    """
     status = response.status_code
+
+    # Parse the body once, surviving any weird encodings.
+    body_text = ""
+    body_json: Dict[str, Any] = {}
     try:
-        body = response.text[:500]
+        body_text = response.text
     except Exception:
-        body = ""
-    # Let run_agent's retry logic see auth errors as rotatable via `api_key`
+        body_text = ""
+    if body_text:
+        try:
+            parsed = json.loads(body_text)
+            if isinstance(parsed, dict):
+                body_json = parsed
+        except (ValueError, TypeError):
+            body_json = {}
+
+    # Dig into Google's error envelope.  Shape is:
+    #   {"error": {"code": 429, "message": "...", "status": "RESOURCE_EXHAUSTED",
+    #              "details": [{"@type": ".../ErrorInfo", "reason": "MODEL_CAPACITY_EXHAUSTED",
+    #                           "metadata": {...}},
+    #                          {"@type": ".../RetryInfo", "retryDelay": "30s"}]}}
+    err_obj = body_json.get("error") if isinstance(body_json, dict) else None
+    if not isinstance(err_obj, dict):
+        err_obj = {}
+    err_status = str(err_obj.get("status") or "").strip()
+    err_message = str(err_obj.get("message") or "").strip()
+    err_details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else []
+
+    # Extract google.rpc.ErrorInfo reason + metadata.  There may be more
+    # than one ErrorInfo (rare), so we pick the first one with a reason.
+    error_reason = ""
+    error_metadata: Dict[str, Any] = {}
+    retry_delay_seconds: Optional[float] = None
+    for detail in err_details_list:
+        if not isinstance(detail, dict):
+            continue
+        type_url = str(detail.get("@type") or "")
+        if not error_reason and type_url.endswith("/google.rpc.ErrorInfo"):
+            reason = detail.get("reason")
+            if isinstance(reason, str) and reason:
+                error_reason = reason
+            md = detail.get("metadata")
+            if isinstance(md, dict):
+                error_metadata = md
+        elif retry_delay_seconds is None and type_url.endswith("/google.rpc.RetryInfo"):
+            # retryDelay is a google.protobuf.Duration string like "30s" or "1.5s".
+            delay_raw = detail.get("retryDelay")
+            if isinstance(delay_raw, str) and delay_raw.endswith("s"):
+                try:
+                    retry_delay_seconds = float(delay_raw[:-1])
+                except ValueError:
+                    pass
+            elif isinstance(delay_raw, (int, float)):
+                retry_delay_seconds = float(delay_raw)
+
+    # Fall back to the Retry-After header if the body didn't include RetryInfo.
+    if retry_delay_seconds is None:
+        try:
+            header_val = response.headers.get("Retry-After") or response.headers.get("retry-after")
+        except Exception:
+            header_val = None
+        if header_val:
+            try:
+                retry_delay_seconds = float(header_val)
+            except (TypeError, ValueError):
+                retry_delay_seconds = None
+
+    # Classify the error code.  ``code_assist_rate_limited`` stays the default
+    # for 429s; a more specific reason tag helps downstream callers (e.g. tests,
+    # logs) without changing the rate_limit classification path.
     code = f"code_assist_http_{status}"
     if status == 401:
         code = "code_assist_unauthorized"
     elif status == 429:
         code = "code_assist_rate_limited"
+        if error_reason == "MODEL_CAPACITY_EXHAUSTED":
+            code = "code_assist_capacity_exhausted"
+
+    # Build a human-readable message.  Keep the status + a raw-body tail for
+    # debugging, but lead with a friendlier summary when we recognize the
+    # Google signal.
+    model_hint = ""
+    if isinstance(error_metadata, dict):
+        model_hint = str(error_metadata.get("model") or error_metadata.get("modelId") or "").strip()
+
+    if status == 429 and error_reason == "MODEL_CAPACITY_EXHAUSTED":
+        target = model_hint or "this Gemini model"
+        message = (
+            f"Gemini capacity exhausted for {target} (Google-side throttle, "
+            f"not a Hermes issue). Try a different Gemini model or set a "
+            f"fallback_providers entry to a non-Gemini provider."
+        )
+        if retry_delay_seconds is not None:
+            message += f" Google suggests retrying in {retry_delay_seconds:g}s."
+    elif status == 429 and err_status == "RESOURCE_EXHAUSTED":
+        message = (
+            f"Gemini quota exhausted ({err_message or 'RESOURCE_EXHAUSTED'}). "
+            f"Check /gquota for remaining daily requests."
+        )
+        if retry_delay_seconds is not None:
+            message += f" Retry suggested in {retry_delay_seconds:g}s."
+    elif status == 404:
+        # Google returns 404 when a model has been retired or renamed.
+        target = model_hint or (err_message or "model")
+        message = (
+            f"Code Assist 404: {target} is not available at "
+            f"cloudcode-pa.googleapis.com. It may have been renamed or "
+            f"retired. Check hermes_cli/models.py for the current list."
+        )
+    elif err_message:
+        # Generic fallback with the parsed message.
+        message = f"Code Assist HTTP {status} ({err_status or 'error'}): {err_message}"
+    else:
+        # Last-ditch fallback — raw body snippet.
+        message = f"Code Assist returned HTTP {status}: {body_text[:500]}"
+
     return CodeAssistError(
-        f"Code Assist returned HTTP {status}: {body}",
+        message,
         code=code,
+        status_code=status,
+        response=response,
+        retry_after=retry_delay_seconds,
+        details={
+            "status": err_status,
+            "reason": error_reason,
+            "metadata": error_metadata,
+            "message": err_message,
+        },
     )
diff --git a/agent/google_code_assist.py b/agent/google_code_assist.py
index 1acf3ea13..eba09b8f4 100644
--- a/agent/google_code_assist.py
+++ b/agent/google_code_assist.py
@@ -68,9 +68,45 @@ _ONBOARDING_POLL_INTERVAL_SECONDS = 5.0
 
 
 class CodeAssistError(RuntimeError):
-    def __init__(self, message: str, *, code: str = "code_assist_error") -> None:
+    """Exception raised by the Code Assist (``cloudcode-pa``) integration.
+
+    Carries HTTP status / response / retry-after metadata so the agent's
+    ``error_classifier._extract_status_code`` and the main loop's Retry-After
+    handling (which walks ``error.response.headers``) pick up the right
+    signals.  Without these, 429s from the OAuth path look like opaque
+    ``RuntimeError`` and skip the rate-limit path.
+    """
+
+    def __init__(
+        self,
+        message: str,
+        *,
+        code: str = "code_assist_error",
+        status_code: Optional[int] = None,
+        response: Any = None,
+        retry_after: Optional[float] = None,
+        details: Optional[Dict[str, Any]] = None,
+    ) -> None:
         super().__init__(message)
         self.code = code
+        # ``status_code`` is picked up by ``agent.error_classifier._extract_status_code``
+        # so a 429 from Code Assist classifies as FailoverReason.rate_limit and
+        # triggers the main loop's fallback_providers chain the same way SDK
+        # errors do.
+        self.status_code = status_code
+        # ``response`` is the underlying ``httpx.Response`` (or a shim with a
+        # ``.headers`` mapping and ``.json()`` method).  The main loop reads
+        # ``error.response.headers["Retry-After"]`` to honor Google's retry
+        # hints when the backend throttles us.
+        self.response = response
+        # Parsed ``Retry-After`` seconds (kept separately for convenience —
+        # Google returns retry hints in both the header and the error body's
+        # ``google.rpc.RetryInfo`` details, and we pick whichever we found).
+        self.retry_after = retry_after
+        # Parsed structured error details from the Google error envelope
+        # (e.g. ``{"reason": "MODEL_CAPACITY_EXHAUSTED", "status": "RESOURCE_EXHAUSTED"}``).
+        # Useful for logging and for tests that want to assert on specifics.
+        self.details = details or {}
 
 
 class ProjectIdRequiredError(CodeAssistError):
diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index b30af6e48..81bac6c92 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -125,7 +125,6 @@ DEFAULT_CONTEXT_LENGTHS = {
     "gemini": 1048576,
     # Gemma (open models served via AI Studio)
     "gemma-4-31b": 256000,
-    "gemma-4-26b": 256000,
     "gemma-3": 131072,
     "gemma": 8192,  # fallback for older gemma models
     # DeepSeek
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index 7a897cb79..d2859e01c 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -135,7 +135,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
         "gemini-2.5-flash-lite",
         # Gemma open models (also served via AI Studio)
         "gemma-4-31b-it",
-        "gemma-4-26b-it",
     ],
     "google-gemini-cli": [
         "gemini-2.5-pro",
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 6b4fc5d73..95c9cae77 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -91,7 +91,7 @@ _DEFAULT_PROVIDER_MODELS = {
     "gemini": [
         "gemini-3.1-pro-preview", "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview",
         "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite",
-        "gemma-4-31b-it", "gemma-4-26b-it",
+        "gemma-4-31b-it",
     ],
     "zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
     "kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
diff --git a/tests/agent/test_gemini_cloudcode.py b/tests/agent/test_gemini_cloudcode.py
index cf5e80f08..c9d2b87df 100644
--- a/tests/agent/test_gemini_cloudcode.py
+++ b/tests/agent/test_gemini_cloudcode.py
@@ -826,6 +826,160 @@ class TestGeminiCloudCodeClient:
         finally:
             client.close()
 
+
+class TestGeminiHttpErrorParsing:
+    """Regression coverage for _gemini_http_error Google-envelope parsing.
+
+    These are the paths that users actually hit during Google-side throttling
+    (April 2026: gemini-2.5-pro MODEL_CAPACITY_EXHAUSTED, gemma-4-26b-it
+    returning 404).  The error needs to carry status_code + response so the
+    main loop's error_classifier and Retry-After logic work.
+    """
+
+    @staticmethod
+    def _fake_response(status: int, body: dict | str = "", headers=None):
+        """Minimal httpx.Response stand-in (duck-typed for _gemini_http_error)."""
+        class _FakeResponse:
+            def __init__(self):
+                self.status_code = status
+                if isinstance(body, dict):
+                    self.text = json.dumps(body)
+                else:
+                    self.text = body
+                self.headers = headers or {}
+        return _FakeResponse()
+
+    def test_model_capacity_exhausted_produces_friendly_message(self):
+        from agent.gemini_cloudcode_adapter import _gemini_http_error
+
+        body = {
+            "error": {
+                "code": 429,
+                "message": "Resource has been exhausted (e.g. check quota).",
+                "status": "RESOURCE_EXHAUSTED",
+                "details": [
+                    {
+                        "@type": "type.googleapis.com/google.rpc.ErrorInfo",
+                        "reason": "MODEL_CAPACITY_EXHAUSTED",
+                        "domain": "googleapis.com",
+                        "metadata": {"model": "gemini-2.5-pro"},
+                    },
+                    {
+                        "@type": "type.googleapis.com/google.rpc.RetryInfo",
+                        "retryDelay": "30s",
+                    },
+                ],
+            }
+        }
+        err = _gemini_http_error(self._fake_response(429, body))
+        assert err.status_code == 429
+        assert err.code == "code_assist_capacity_exhausted"
+        assert err.retry_after == 30.0
+        assert err.details["reason"] == "MODEL_CAPACITY_EXHAUSTED"
+        # Message must be user-friendly, not a raw JSON dump.
+        message = str(err)
+        assert "gemini-2.5-pro" in message
+        assert "capacity exhausted" in message.lower()
+        assert "30s" in message
+        # response attr is preserved for run_agent's Retry-After header path.
+        assert err.response is not None
+
+    def test_resource_exhausted_without_reason(self):
+        from agent.gemini_cloudcode_adapter import _gemini_http_error
+
+        body = {
+            "error": {
+                "code": 429,
+                "message": "Quota exceeded for requests per minute.",
+                "status": "RESOURCE_EXHAUSTED",
+            }
+        }
+        err = _gemini_http_error(self._fake_response(429, body))
+        assert err.status_code == 429
+        assert err.code == "code_assist_rate_limited"
+        message = str(err)
+        assert "quota" in message.lower()
+
+    def test_404_model_not_found_produces_model_retired_message(self):
+        from agent.gemini_cloudcode_adapter import _gemini_http_error
+
+        body = {
+            "error": {
+                "code": 404,
+                "message": "models/gemma-4-26b-it is not found for API version v1internal",
+                "status": "NOT_FOUND",
+            }
+        }
+        err = _gemini_http_error(self._fake_response(404, body))
+        assert err.status_code == 404
+        message = str(err)
+        assert "not available" in message.lower() or "retired" in message.lower()
+        # Error message should reference the actual model text from Google.
+        assert "gemma-4-26b-it" in message
+
+    def test_unauthorized_preserves_status_code(self):
+        from agent.gemini_cloudcode_adapter import _gemini_http_error
+
+        err = _gemini_http_error(self._fake_response(
+            401, {"error": {"code": 401, "message": "Invalid token", "status": "UNAUTHENTICATED"}},
+        ))
+        assert err.status_code == 401
+        assert err.code == "code_assist_unauthorized"
+
+    def test_retry_after_header_fallback(self):
+        """If the body has no RetryInfo detail, fall back to Retry-After header."""
+        from agent.gemini_cloudcode_adapter import _gemini_http_error
+
+        resp = self._fake_response(
+            429,
+            {"error": {"code": 429, "message": "Rate limited", "status": "RESOURCE_EXHAUSTED"}},
+            headers={"Retry-After": "45"},
+        )
+        err = _gemini_http_error(resp)
+        assert err.retry_after == 45.0
+
+    def test_malformed_body_still_produces_structured_error(self):
+        """Non-JSON body must not swallow status_code — we still want the classifier path."""
+        from agent.gemini_cloudcode_adapter import _gemini_http_error
+
+        err = _gemini_http_error(self._fake_response(500, "<html>internal error</html>"))
+        assert err.status_code == 500
+        # Raw body snippet must still be there for debugging.
+        assert "500" in str(err)
+
+    def test_status_code_flows_through_error_classifier(self):
+        """End-to-end: CodeAssistError from a 429 must classify as rate_limit.
+
+        This is the whole point of adding status_code to CodeAssistError —
+        _extract_status_code must see it and FailoverReason.rate_limit must
+        fire, so the main loop triggers fallback_providers.
+        """
+        from agent.gemini_cloudcode_adapter import _gemini_http_error
+        from agent.error_classifier import classify_api_error, FailoverReason
+
+        body = {
+            "error": {
+                "code": 429,
+                "message": "Resource has been exhausted",
+                "status": "RESOURCE_EXHAUSTED",
+                "details": [
+                    {
+                        "@type": "type.googleapis.com/google.rpc.ErrorInfo",
+                        "reason": "MODEL_CAPACITY_EXHAUSTED",
+                        "metadata": {"model": "gemini-2.5-pro"},
+                    }
+                ],
+            }
+        }
+        err = _gemini_http_error(self._fake_response(429, body))
+
+        classified = classify_api_error(
+            err, provider="google-gemini-cli", model="gemini-2.5-pro",
+        )
+        assert classified.status_code == 429
+        assert classified.reason == FailoverReason.rate_limit
+
+
 # =============================================================================
 # Provider registration
 # =============================================================================
diff --git a/tests/hermes_cli/test_gemini_provider.py b/tests/hermes_cli/test_gemini_provider.py
index b448ca513..089a5cf98 100644
--- a/tests/hermes_cli/test_gemini_provider.py
+++ b/tests/hermes_cli/test_gemini_provider.py
@@ -178,10 +178,6 @@ class TestGeminiContextLength:
             ctx = get_model_context_length("gemma-4-31b-it", provider="gemini")
         assert ctx == 256000
 
-    def test_gemma_4_26b_context(self):
-        ctx = get_model_context_length("gemma-4-26b-it", provider="gemini")
-        assert ctx == 256000
-
     def test_gemini_3_context(self):
         ctx = get_model_context_length("gemini-3.1-pro-preview", provider="gemini")
         assert ctx == 1048576

From 2b60478fc2f9b0cd8a41efbe237da1465d26a43c Mon Sep 17 00:00:00 2001
From: helix4u <4317663+helix4u@users.noreply.github.com>
Date: Fri, 17 Apr 2026 16:17:15 -0600
Subject: [PATCH 19/20] fix(kimi): force kimi-for-coding temperature to 0.6

---
 agent/auxiliary_client.py            | 15 +++++
 run_agent.py                         |  7 ++-
 tests/agent/test_auxiliary_client.py | 89 ++++++++++++++++++++++++++++
 3 files changed, 108 insertions(+), 3 deletions(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 4f1746166..4860b16ac 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -94,6 +94,17 @@ def _normalize_aux_provider(provider: Optional[str]) -> str:
         return "custom"
     return _PROVIDER_ALIASES.get(normalized, normalized)
 
+
+_FIXED_TEMPERATURE_MODELS: Dict[str, float] = {
+    "kimi-for-coding": 0.6,
+}
+
+
+def _fixed_temperature_for_model(model: Optional[str]) -> Optional[float]:
+    """Return a required temperature override for models with strict contracts."""
+    normalized = (model or "").strip().lower()
+    return _FIXED_TEMPERATURE_MODELS.get(normalized)
+
 # Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
 _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
     "gemini": "gemini-3-flash-preview",
@@ -2293,6 +2304,10 @@ def _build_call_kwargs(
         "timeout": timeout,
     }
 
+    fixed_temperature = _fixed_temperature_for_model(model)
+    if fixed_temperature is not None:
+        temperature = fixed_temperature
+
     # Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
     # drop here so auxiliary callers that hardcode temperature (e.g. 0.3 on
     # flush_memories, 0 on structured-JSON extraction) don't 400 the moment
diff --git a/run_agent.py b/run_agent.py
index 03dead730..18729709f 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -7209,12 +7209,13 @@ class AIAgent:
             # it's cheaper and avoids Codex Responses API incompatibility.
             from agent.auxiliary_client import call_llm as _call_llm
             _aux_available = True
+            _flush_temperature = 0.6 if str(self.model or "").strip().lower() == "kimi-for-coding" else 0.3
             try:
                 response = _call_llm(
                     task="flush_memories",
                     messages=api_messages,
                     tools=[memory_tool_def],
-                    temperature=0.3,
+                    temperature=_flush_temperature,
                     max_tokens=5120,
                     # timeout resolved from auxiliary.flush_memories.timeout config
                 )
@@ -7226,7 +7227,7 @@ class AIAgent:
                 # No auxiliary client -- use the Codex Responses path directly
                 codex_kwargs = self._build_api_kwargs(api_messages)
                 codex_kwargs["tools"] = self._responses_tools([memory_tool_def])
-                codex_kwargs["temperature"] = 0.3
+                codex_kwargs["temperature"] = _flush_temperature
                 if "max_output_tokens" in codex_kwargs:
                     codex_kwargs["max_output_tokens"] = 5120
                 response = self._run_codex_stream(codex_kwargs)
@@ -7245,7 +7246,7 @@ class AIAgent:
                     "model": self.model,
                     "messages": api_messages,
                     "tools": [memory_tool_def],
-                    "temperature": 0.3,
+                    "temperature": _flush_temperature,
                     **self._max_tokens_param(5120),
                 }
                 from agent.auxiliary_client import _get_task_timeout
diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py
index 5d79f96de..1778855dd 100644
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -696,6 +696,95 @@ class TestIsConnectionError:
         assert _is_connection_error(err) is False
 
 
+class TestKimiForCodingTemperature:
+    """kimi-for-coding now requires temperature=0.6 exactly."""
+
+    def test_build_call_kwargs_forces_fixed_temperature(self):
+        from agent.auxiliary_client import _build_call_kwargs
+
+        kwargs = _build_call_kwargs(
+            provider="kimi-coding",
+            model="kimi-for-coding",
+            messages=[{"role": "user", "content": "hello"}],
+            temperature=0.3,
+        )
+
+        assert kwargs["temperature"] == 0.6
+
+    def test_build_call_kwargs_injects_temperature_when_missing(self):
+        from agent.auxiliary_client import _build_call_kwargs
+
+        kwargs = _build_call_kwargs(
+            provider="kimi-coding",
+            model="kimi-for-coding",
+            messages=[{"role": "user", "content": "hello"}],
+            temperature=None,
+        )
+
+        assert kwargs["temperature"] == 0.6
+
+    def test_auto_routed_kimi_for_coding_sync_call_uses_fixed_temperature(self):
+        client = MagicMock()
+        client.base_url = "https://api.kimi.com/coding/v1"
+        response = MagicMock()
+        client.chat.completions.create.return_value = response
+
+        with patch(
+            "agent.auxiliary_client._get_cached_client",
+            return_value=(client, "kimi-for-coding"),
+        ), patch(
+            "agent.auxiliary_client._resolve_task_provider_model",
+            return_value=("auto", "kimi-for-coding", None, None, None),
+        ):
+            result = call_llm(
+                task="session_search",
+                messages=[{"role": "user", "content": "hello"}],
+                temperature=0.1,
+            )
+
+        assert result is response
+        kwargs = client.chat.completions.create.call_args.kwargs
+        assert kwargs["model"] == "kimi-for-coding"
+        assert kwargs["temperature"] == 0.6
+
+    @pytest.mark.asyncio
+    async def test_auto_routed_kimi_for_coding_async_call_uses_fixed_temperature(self):
+        client = MagicMock()
+        client.base_url = "https://api.kimi.com/coding/v1"
+        response = MagicMock()
+        client.chat.completions.create = AsyncMock(return_value=response)
+
+        with patch(
+            "agent.auxiliary_client._get_cached_client",
+            return_value=(client, "kimi-for-coding"),
+        ), patch(
+            "agent.auxiliary_client._resolve_task_provider_model",
+            return_value=("auto", "kimi-for-coding", None, None, None),
+        ):
+            result = await async_call_llm(
+                task="session_search",
+                messages=[{"role": "user", "content": "hello"}],
+                temperature=0.1,
+            )
+
+        assert result is response
+        kwargs = client.chat.completions.create.call_args.kwargs
+        assert kwargs["model"] == "kimi-for-coding"
+        assert kwargs["temperature"] == 0.6
+
+    def test_non_kimi_model_still_preserves_temperature(self):
+        from agent.auxiliary_client import _build_call_kwargs
+
+        kwargs = _build_call_kwargs(
+            provider="kimi-coding",
+            model="kimi-k2.5",
+            messages=[{"role": "user", "content": "hello"}],
+            temperature=0.3,
+        )
+
+        assert kwargs["temperature"] == 0.3
+
+
 # ---------------------------------------------------------------------------
 # async_call_llm payment / connection fallback (#7512 bug 2)
 # ---------------------------------------------------------------------------

From 0a8318780179ec4ff2438fe3171bebeba25d56c7 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Fri, 17 Apr 2026 15:33:31 -0700
Subject: [PATCH 20/20] refactor(kimi): use _fixed_temperature_for_model helper
 in flush_memories

Replace the hardcoded 'kimi-for-coding' string check with the helper
from auxiliary_client so there is one source of truth for the list of
models with fixed-temperature contracts. Adding a new entry to
_FIXED_TEMPERATURE_MODELS now automatically covers flush_memories too.
---
 run_agent.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index 18729709f..34f7518a2 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -7207,9 +7207,16 @@ class AIAgent:
 
             # Use auxiliary client for the flush call when available --
             # it's cheaper and avoids Codex Responses API incompatibility.
-            from agent.auxiliary_client import call_llm as _call_llm
+            from agent.auxiliary_client import (
+                call_llm as _call_llm,
+                _fixed_temperature_for_model,
+            )
             _aux_available = True
-            _flush_temperature = 0.6 if str(self.model or "").strip().lower() == "kimi-for-coding" else 0.3
+            # Use the fixed-temperature override (e.g. kimi-for-coding → 0.6) if
+            # the model has a strict contract; otherwise the historical 0.3 default.
+            _flush_temperature = _fixed_temperature_for_model(self.model)
+            if _flush_temperature is None:
+                _flush_temperature = 0.3
             try:
                 response = _call_llm(
                     task="flush_memories",