mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-22 10:32:00 +00:00
Merge remote-tracking branch 'origin/main' into pr48275-rebase
# Conflicts: # cron/scheduler.py
This commit is contained in:
commit
a58287afcb
162 changed files with 8521 additions and 634 deletions
|
|
@ -144,4 +144,4 @@ Available in `hybrid` and `tools` memory modes:
|
|||
|
||||
## Client Version
|
||||
|
||||
Requires `hindsight-client >= 0.4.22`. The plugin auto-upgrades on session start if an older version is detected.
|
||||
Requires `hindsight-client >= 0.6.1`. The plugin auto-upgrades on session start if an older version is detected.
|
||||
|
|
|
|||
|
|
@ -50,7 +50,8 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
_DEFAULT_API_URL = "https://api.hindsight.vectorize.io"
|
||||
_DEFAULT_LOCAL_URL = "http://localhost:8888"
|
||||
_MIN_CLIENT_VERSION = "0.4.22"
|
||||
# Keep in sync with tools/lazy_deps.py ("memory.hindsight") and plugin.yaml.
|
||||
_MIN_CLIENT_VERSION = "0.6.1"
|
||||
_DEFAULT_TIMEOUT = 120 # seconds — cloud API can take 30-40s per request
|
||||
_DEFAULT_IDLE_TIMEOUT = 300 # seconds — Hindsight embedded daemon default
|
||||
# Mirrors hindsight-integrations/openclaw — Hindsight 0.5.0 added
|
||||
|
|
@ -100,6 +101,17 @@ def _check_local_runtime() -> tuple[bool, str | None]:
|
|||
return False, str(exc)
|
||||
|
||||
|
||||
def _ensure_cloud_client_dependency() -> None:
|
||||
"""Install the Hindsight cloud client lazily before importing it."""
|
||||
try:
|
||||
from tools.lazy_deps import ensure as _lazy_ensure
|
||||
_lazy_ensure("memory.hindsight", prompt=False)
|
||||
except ImportError:
|
||||
pass
|
||||
except Exception as exc:
|
||||
raise ImportError(str(exc)) from exc
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Hindsight API capability probe — mirrors hindsight-integrations/openclaw.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -730,7 +742,6 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
env_writes: dict = {}
|
||||
|
||||
# Step 2: Install/upgrade deps for selected mode
|
||||
_MIN_CLIENT_VERSION = "0.4.22"
|
||||
cloud_dep = f"hindsight-client>={_MIN_CLIENT_VERSION}"
|
||||
local_dep = "hindsight-all"
|
||||
if mode == "local_embedded":
|
||||
|
|
@ -990,6 +1001,7 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
kwargs["idle_timeout"] = idle_timeout
|
||||
self._client = HindsightEmbedded(**kwargs)
|
||||
else:
|
||||
_ensure_cloud_client_dependency()
|
||||
from hindsight_client import Hindsight
|
||||
timeout = self._timeout or _DEFAULT_TIMEOUT
|
||||
kwargs = {"base_url": self._api_url, "timeout": float(timeout)}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ name: hindsight
|
|||
version: 1.0.0
|
||||
description: "Hindsight — long-term memory with knowledge graph, entity resolution, and multi-strategy retrieval."
|
||||
pip_dependencies:
|
||||
- "hindsight-client>=0.4.22"
|
||||
- "hindsight-client>=0.6.1"
|
||||
requires_env: []
|
||||
hooks:
|
||||
- on_session_end
|
||||
|
|
|
|||
|
|
@ -45,10 +45,11 @@ from typing import Any, Callable, Dict, List, Optional, Set
|
|||
from urllib.parse import urlparse
|
||||
from urllib.request import url2pathname
|
||||
|
||||
from agent.message_content import flatten_message_text
|
||||
from agent.memory_provider import MemoryProvider
|
||||
from agent.skill_commands import extract_user_instruction_from_skill_message
|
||||
from tools.registry import tool_error
|
||||
from utils import atomic_json_write
|
||||
from utils import atomic_json_write, env_var_enabled
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -70,6 +71,7 @@ _TIMEOUT = 30.0
|
|||
_SESSION_DRAIN_TIMEOUT = 10.0
|
||||
_DEFERRED_COMMIT_TIMEOUT = (_TIMEOUT * 2) + 5.0
|
||||
_REMOTE_RESOURCE_PREFIXES = ("http://", "https://", "git@", "ssh://", "git://")
|
||||
_SYNC_TRACE_ENV = "HERMES_OPENVIKING_SYNC_TRACE"
|
||||
|
||||
# Maps the viking_remember `category` enum to a viking:// subdirectory.
|
||||
# Keep in sync with REMEMBER_SCHEMA.parameters.properties.category.enum.
|
||||
|
|
@ -156,6 +158,18 @@ def _derive_openviking_user_text(content: Any) -> str:
|
|||
return extract_user_instruction_from_skill_message(content) or ""
|
||||
|
||||
|
||||
def _sync_trace_enabled() -> bool:
|
||||
return env_var_enabled(_SYNC_TRACE_ENV)
|
||||
|
||||
|
||||
def _preview(value: Any, limit: int = 160) -> str:
|
||||
text = "" if value is None else str(value)
|
||||
text = text.replace("\n", "\\n")
|
||||
if len(text) > limit:
|
||||
return text[:limit] + "..."
|
||||
return text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Process-level atexit safety net — ensures pending sessions are committed
|
||||
# even if shutdown_memory_provider is never called (e.g. gateway crash,
|
||||
|
|
@ -488,6 +502,25 @@ ADD_RESOURCE_SCHEMA = {
|
|||
}
|
||||
|
||||
|
||||
# Recall tools (read-only) whose results we never re-ingest into OpenViking —
|
||||
# echoing recalled memory back into the session transcript would re-store it.
|
||||
# Write tools (viking_remember / viking_add_resource) are intentionally NOT
|
||||
# here. Derived from the canonical schema names so renames can't desync.
|
||||
_OPENVIKING_RECALL_TOOL_NAMES = {
|
||||
SEARCH_SCHEMA["name"],
|
||||
READ_SCHEMA["name"],
|
||||
BROWSE_SCHEMA["name"],
|
||||
}
|
||||
|
||||
# Canonical tool_status values emitted in OpenViking batch tool parts.
|
||||
_TOOL_STATUS_COMPLETED = "completed"
|
||||
_TOOL_STATUS_ERROR = "error"
|
||||
_TOOL_STATUS_PENDING = "pending"
|
||||
# Inbound status aliases (from varied tool-result shapes) -> canonical above.
|
||||
_TOOL_STATUS_ERROR_ALIASES = {"error", "failed", "failure"}
|
||||
_TOOL_STATUS_COMPLETED_ALIASES = {"completed", "complete", "success", "succeeded"}
|
||||
|
||||
|
||||
def _zip_directory(dir_path: Path) -> Path:
|
||||
"""Create a temporary zip file containing a directory tree."""
|
||||
root = dir_path.resolve()
|
||||
|
|
@ -2221,7 +2254,10 @@ class OpenVikingMemoryProvider(MemoryProvider):
|
|||
|
||||
def _commit_session(self, sid: str, turn_count: int, *, context: str) -> bool:
|
||||
try:
|
||||
self._client.post(f"/api/v1/sessions/{sid}/commit")
|
||||
self._client.post(
|
||||
f"/api/v1/sessions/{sid}/commit",
|
||||
{"keep_recent_count": 0},
|
||||
)
|
||||
self._mark_session_committed(sid)
|
||||
logger.info("OpenViking session %s committed %s (%d turns)", sid, context, turn_count)
|
||||
return True
|
||||
|
|
@ -2293,7 +2329,265 @@ class OpenVikingMemoryProvider(MemoryProvider):
|
|||
with self._prefetch_lock:
|
||||
self._prefetch_result = ""
|
||||
|
||||
def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
|
||||
@staticmethod
|
||||
def _message_text(content: Any) -> str:
|
||||
"""Extract text from OpenAI-style string/list content."""
|
||||
return flatten_message_text(content)
|
||||
|
||||
@classmethod
|
||||
def _message_matches_text(cls, message: Dict[str, Any], expected: Any) -> bool:
|
||||
expected_text = cls._message_text(expected).strip()
|
||||
if not expected_text:
|
||||
return False
|
||||
actual_text = cls._message_text(message.get("content")).strip()
|
||||
return actual_text == expected_text
|
||||
|
||||
@classmethod
|
||||
def _extract_current_turn_messages(
|
||||
cls,
|
||||
messages: Optional[List[Dict[str, Any]]],
|
||||
user_content: str,
|
||||
assistant_content: str,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Slice the completed turn out of Hermes' full canonical transcript."""
|
||||
if not messages:
|
||||
return []
|
||||
|
||||
end_idx: Optional[int] = None
|
||||
if cls._message_text(assistant_content).strip():
|
||||
for idx in range(len(messages) - 1, -1, -1):
|
||||
message = messages[idx]
|
||||
if (
|
||||
isinstance(message, dict)
|
||||
and message.get("role") == "assistant"
|
||||
and cls._message_matches_text(message, assistant_content)
|
||||
):
|
||||
end_idx = idx
|
||||
break
|
||||
if end_idx is None:
|
||||
for idx in range(len(messages) - 1, -1, -1):
|
||||
message = messages[idx]
|
||||
if isinstance(message, dict) and message.get("role") == "assistant":
|
||||
end_idx = idx
|
||||
break
|
||||
if end_idx is None:
|
||||
end_idx = len(messages) - 1
|
||||
|
||||
start_idx: Optional[int] = None
|
||||
if cls._message_text(user_content).strip():
|
||||
for idx in range(end_idx, -1, -1):
|
||||
message = messages[idx]
|
||||
if (
|
||||
isinstance(message, dict)
|
||||
and message.get("role") == "user"
|
||||
and cls._message_matches_text(message, user_content)
|
||||
):
|
||||
start_idx = idx
|
||||
break
|
||||
if start_idx is None:
|
||||
for idx in range(end_idx, -1, -1):
|
||||
message = messages[idx]
|
||||
if isinstance(message, dict) and message.get("role") == "user":
|
||||
start_idx = idx
|
||||
break
|
||||
if start_idx is None:
|
||||
return []
|
||||
|
||||
return [message for message in messages[start_idx : end_idx + 1] if isinstance(message, dict)]
|
||||
|
||||
@staticmethod
|
||||
def _tool_call_id(tool_call: Dict[str, Any]) -> str:
|
||||
return str(tool_call.get("id") or tool_call.get("tool_call_id") or "")
|
||||
|
||||
@staticmethod
|
||||
def _tool_call_name(tool_call: Dict[str, Any]) -> str:
|
||||
function = tool_call.get("function")
|
||||
if isinstance(function, dict):
|
||||
return str(function.get("name") or "")
|
||||
return str(tool_call.get("name") or "")
|
||||
|
||||
@staticmethod
|
||||
def _is_openviking_recall_tool_name(tool_name: Any) -> bool:
|
||||
return str(tool_name or "").strip().lower() in _OPENVIKING_RECALL_TOOL_NAMES
|
||||
|
||||
@staticmethod
|
||||
def _tool_call_input(tool_call: Dict[str, Any]) -> Dict[str, Any]:
|
||||
function = tool_call.get("function")
|
||||
raw_args: Any = None
|
||||
if isinstance(function, dict):
|
||||
raw_args = function.get("arguments")
|
||||
if raw_args is None:
|
||||
raw_args = tool_call.get("args")
|
||||
if raw_args is None:
|
||||
return {}
|
||||
if isinstance(raw_args, dict):
|
||||
return raw_args
|
||||
if isinstance(raw_args, str):
|
||||
if not raw_args.strip():
|
||||
return {}
|
||||
try:
|
||||
parsed = json.loads(raw_args)
|
||||
except Exception:
|
||||
return {"value": raw_args}
|
||||
if isinstance(parsed, dict):
|
||||
return parsed
|
||||
return {"value": parsed}
|
||||
return {"value": raw_args}
|
||||
|
||||
@classmethod
|
||||
def _tool_result_status(cls, message: Dict[str, Any]) -> str:
|
||||
raw_status = str(message.get("status") or message.get("tool_status") or "").lower()
|
||||
if raw_status in _TOOL_STATUS_ERROR_ALIASES:
|
||||
return _TOOL_STATUS_ERROR
|
||||
if raw_status in _TOOL_STATUS_COMPLETED_ALIASES:
|
||||
return _TOOL_STATUS_COMPLETED
|
||||
|
||||
text = cls._message_text(message.get("content")).strip()
|
||||
if text:
|
||||
try:
|
||||
parsed = json.loads(text)
|
||||
except Exception:
|
||||
parsed = None
|
||||
if isinstance(parsed, dict):
|
||||
status = str(parsed.get("status") or "").lower()
|
||||
exit_code = parsed.get("exit_code")
|
||||
if (
|
||||
status in _TOOL_STATUS_ERROR_ALIASES
|
||||
or parsed.get("success") is False
|
||||
or bool(parsed.get("error"))
|
||||
or (isinstance(exit_code, int) and exit_code != 0)
|
||||
):
|
||||
return _TOOL_STATUS_ERROR
|
||||
|
||||
return _TOOL_STATUS_COMPLETED
|
||||
|
||||
@classmethod
|
||||
def _messages_to_openviking_batch(
|
||||
cls,
|
||||
messages: List[Dict[str, Any]],
|
||||
*,
|
||||
assistant_peer_id: str = "",
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Convert Hermes canonical messages into OpenViking batch payloads."""
|
||||
assistant_peer_id = str(assistant_peer_id or "").strip()
|
||||
tool_calls_by_id: Dict[str, Dict[str, Any]] = {}
|
||||
completed_tool_ids: set[str] = set()
|
||||
skipped_tool_ids: set[str] = set()
|
||||
for message in messages:
|
||||
if not isinstance(message, dict):
|
||||
continue
|
||||
if message.get("role") == "tool":
|
||||
tool_id = str(message.get("tool_call_id") or message.get("id") or "")
|
||||
if tool_id:
|
||||
completed_tool_ids.add(tool_id)
|
||||
if cls._is_openviking_recall_tool_name(message.get("name")):
|
||||
skipped_tool_ids.add(tool_id)
|
||||
continue
|
||||
if message.get("role") != "assistant":
|
||||
continue
|
||||
for tool_call in message.get("tool_calls") or []:
|
||||
if not isinstance(tool_call, dict):
|
||||
continue
|
||||
tool_id = cls._tool_call_id(tool_call)
|
||||
tool_name = cls._tool_call_name(tool_call)
|
||||
if tool_id:
|
||||
tool_calls_by_id[tool_id] = {
|
||||
"tool_name": tool_name,
|
||||
"tool_input": cls._tool_call_input(tool_call),
|
||||
}
|
||||
if cls._is_openviking_recall_tool_name(tool_name):
|
||||
skipped_tool_ids.add(tool_id)
|
||||
|
||||
payload_messages: List[Dict[str, Any]] = []
|
||||
pending_tool_parts: List[Dict[str, Any]] = []
|
||||
|
||||
def payload_message(role: str, parts: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
payload: Dict[str, Any] = {"role": role, "parts": parts}
|
||||
if role == "assistant" and assistant_peer_id:
|
||||
payload["peer_id"] = assistant_peer_id
|
||||
return payload
|
||||
|
||||
def flush_tool_parts() -> None:
|
||||
nonlocal pending_tool_parts
|
||||
if pending_tool_parts:
|
||||
payload_messages.append(payload_message("assistant", pending_tool_parts))
|
||||
pending_tool_parts = []
|
||||
|
||||
for message in messages:
|
||||
if not isinstance(message, dict):
|
||||
continue
|
||||
|
||||
role = str(message.get("role") or "")
|
||||
if role in {"system", "developer"}:
|
||||
continue
|
||||
|
||||
if role == "tool":
|
||||
tool_id = str(message.get("tool_call_id") or message.get("id") or "")
|
||||
prior_call = tool_calls_by_id.get(tool_id, {})
|
||||
tool_name = str(message.get("name") or prior_call.get("tool_name") or "")
|
||||
if tool_id in skipped_tool_ids or cls._is_openviking_recall_tool_name(tool_name):
|
||||
continue
|
||||
tool_part = {
|
||||
"type": "tool",
|
||||
"tool_id": tool_id,
|
||||
"tool_name": tool_name,
|
||||
"tool_input": prior_call.get("tool_input", {}),
|
||||
"tool_output": cls._message_text(message.get("content")),
|
||||
"tool_status": cls._tool_result_status(message),
|
||||
}
|
||||
pending_tool_parts.append(tool_part)
|
||||
continue
|
||||
|
||||
if role not in {"user", "assistant"}:
|
||||
continue
|
||||
|
||||
flush_tool_parts()
|
||||
parts: List[Dict[str, Any]] = []
|
||||
text = cls._message_text(message.get("content"))
|
||||
if text:
|
||||
parts.append({"type": "text", "text": text})
|
||||
|
||||
if role == "assistant":
|
||||
for tool_call in message.get("tool_calls") or []:
|
||||
if not isinstance(tool_call, dict):
|
||||
continue
|
||||
tool_id = cls._tool_call_id(tool_call)
|
||||
tool_name = cls._tool_call_name(tool_call)
|
||||
if tool_id in skipped_tool_ids or cls._is_openviking_recall_tool_name(tool_name):
|
||||
continue
|
||||
if tool_id in completed_tool_ids:
|
||||
continue
|
||||
# Reuse the tool_input parsed in the pre-scan when available
|
||||
# (non-empty ids are cached); fall back to parsing for the
|
||||
# uncached empty-id case so we never drop arguments.
|
||||
prior_call = tool_calls_by_id.get(tool_id) if tool_id else None
|
||||
tool_input = (
|
||||
prior_call["tool_input"]
|
||||
if prior_call is not None
|
||||
else cls._tool_call_input(tool_call)
|
||||
)
|
||||
parts.append({
|
||||
"type": "tool",
|
||||
"tool_id": tool_id,
|
||||
"tool_name": tool_name,
|
||||
"tool_input": tool_input,
|
||||
"tool_status": _TOOL_STATUS_PENDING,
|
||||
})
|
||||
|
||||
if parts:
|
||||
payload_messages.append(payload_message(role, parts))
|
||||
|
||||
flush_tool_parts()
|
||||
return payload_messages
|
||||
|
||||
def sync_turn(
|
||||
self,
|
||||
user_content: str,
|
||||
assistant_content: str,
|
||||
*,
|
||||
session_id: str = "",
|
||||
messages: Optional[List[Dict[str, Any]]] = None,
|
||||
) -> None:
|
||||
"""Record the conversation turn in OpenViking's session (non-blocking)."""
|
||||
if not self._client:
|
||||
return
|
||||
|
|
@ -2302,6 +2596,40 @@ class OpenVikingMemoryProvider(MemoryProvider):
|
|||
if not user_content:
|
||||
return
|
||||
|
||||
turn_messages = (
|
||||
self._extract_current_turn_messages(messages, user_content, assistant_content)
|
||||
if messages is not None
|
||||
else []
|
||||
)
|
||||
if turn_messages:
|
||||
turn_messages = [dict(message) for message in turn_messages]
|
||||
for message in turn_messages:
|
||||
if message.get("role") == "user":
|
||||
message["content"] = user_content
|
||||
break
|
||||
batch_messages = self._messages_to_openviking_batch(
|
||||
turn_messages,
|
||||
assistant_peer_id=getattr(self, "_agent", _DEFAULT_AGENT),
|
||||
)
|
||||
|
||||
if _sync_trace_enabled():
|
||||
logger.info(
|
||||
"OpenViking sync_turn trace: session_arg=%r cached_session=%r "
|
||||
"messages_param_supported=true messages_present=%s message_count=%s "
|
||||
"turn_message_count=%d batch_message_count=%d user_len=%d assistant_len=%d "
|
||||
"user_preview=%r assistant_preview=%r",
|
||||
session_id,
|
||||
self._session_id,
|
||||
messages is not None,
|
||||
len(messages) if messages is not None else None,
|
||||
len(turn_messages),
|
||||
len(batch_messages),
|
||||
len(str(user_content or "")),
|
||||
len(str(assistant_content or "")),
|
||||
_preview(user_content),
|
||||
_preview(assistant_content),
|
||||
)
|
||||
|
||||
# Snapshot the sid and bump the turn counter atomically so a
|
||||
# concurrent on_session_switch/on_session_end can't interleave its
|
||||
# snapshot+reset between the read and the increment (lost turn) and so
|
||||
|
|
@ -2313,24 +2641,39 @@ class OpenVikingMemoryProvider(MemoryProvider):
|
|||
self._turn_count += 1
|
||||
|
||||
def _sync():
|
||||
try:
|
||||
client = self._new_client()
|
||||
def _post_turn(client: _VikingClient) -> None:
|
||||
if batch_messages:
|
||||
payload = {"messages": batch_messages}
|
||||
if _sync_trace_enabled():
|
||||
logger.info(
|
||||
"OpenViking sync_turn trace: POST /api/v1/sessions/%s/messages/batch payload=%s",
|
||||
sid,
|
||||
json.dumps(payload, ensure_ascii=False),
|
||||
)
|
||||
try:
|
||||
client.post(f"/api/v1/sessions/{sid}/messages/batch", payload)
|
||||
return
|
||||
except Exception as batch_error:
|
||||
logger.warning(
|
||||
"OpenViking structured sync failed; falling back to text sync: %s",
|
||||
batch_error,
|
||||
)
|
||||
|
||||
self._post_session_turn(
|
||||
client,
|
||||
sid,
|
||||
user_content[:4000],
|
||||
assistant_content[:4000],
|
||||
self._message_text(assistant_content)[:4000],
|
||||
)
|
||||
|
||||
try:
|
||||
client = self._new_client()
|
||||
_post_turn(client)
|
||||
except Exception as e:
|
||||
logger.debug("OpenViking sync_turn failed, reconnecting: %s", e)
|
||||
try:
|
||||
client = self._new_client()
|
||||
self._post_session_turn(
|
||||
client,
|
||||
sid,
|
||||
user_content[:4000],
|
||||
assistant_content[:4000],
|
||||
)
|
||||
_post_turn(client)
|
||||
except Exception as retry_error:
|
||||
logger.warning("OpenViking sync_turn failed: %s", retry_error)
|
||||
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ import hashlib
|
|||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import struct
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
|
@ -29,6 +30,7 @@ VALID_THREAD_AUTO_ARCHIVE_MINUTES = {60, 1440, 4320, 10080}
|
|||
_DISCORD_COMMAND_SYNC_POLICIES = {"safe", "bulk", "off"}
|
||||
_DISCORD_COMMAND_SYNC_STATE_SUBDIR = "gateway"
|
||||
_DISCORD_COMMAND_SYNC_STATE_FILENAME = "discord_command_sync_state.json"
|
||||
_DISCORD_NONCONVERSATIONAL_STATE_FILENAME = "discord_nonconversational_messages.json"
|
||||
_DISCORD_COMMAND_SYNC_MUTATION_INTERVAL_SECONDS = 4.5
|
||||
_DISCORD_COMMAND_SYNC_MAX_RATE_LIMIT_SLEEP_SECONDS = 30.0
|
||||
# Discord enforces a hard cap of 100 global application (slash) commands per
|
||||
|
|
@ -37,6 +39,37 @@ _DISCORD_COMMAND_SYNC_MAX_RATE_LIMIT_SLEEP_SECONDS = 30.0
|
|||
# every slash command — not just the overflow ones. We keep the desired set
|
||||
# at or below this limit at registration time.
|
||||
_DISCORD_MAX_APP_COMMANDS = 100
|
||||
_DISCORD_NONCONVERSATIONAL_METADATA_KEYS = frozenset({
|
||||
"non_conversational",
|
||||
"non_conversational_history",
|
||||
})
|
||||
# Upgrade-bridge fallback only. The primary mechanism is the persisted
|
||||
# non-conversational message-ID set populated from explicitly marked sends
|
||||
# (metadata["non_conversational"]). These regexes exist solely to recognize
|
||||
# status bumps emitted by an older gateway version that pre-dates the marking,
|
||||
# so they don't partition history after an upgrade. New emitters should set the
|
||||
# metadata flag, not rely on a regex here.
|
||||
_DISCORD_NONCONVERSATIONAL_HISTORY_MESSAGE_PATTERNS = (
|
||||
re.compile(r"^\s*💾\s*Self-improvement review:\s+\S[\s\S]*$", re.IGNORECASE),
|
||||
# Legacy/background-review test doubles used this shorter form before the
|
||||
# self-improvement prefix became the stable emitter contract.
|
||||
re.compile(
|
||||
r"^\s*💾\s+Skill\s+['\"].+?['\"]\s+(?:created|updated|improved|patched)\.?\s*$",
|
||||
re.IGNORECASE,
|
||||
),
|
||||
re.compile(r"^\s*⏳\s+Working\s+—\s+\d+\s+min(?:\s|$)", re.IGNORECASE),
|
||||
re.compile(
|
||||
r"^\s*\[Background process\s+\S+\s+"
|
||||
r"(?:finished with exit code|is still running~)[\s\S]*\]\s*$",
|
||||
re.IGNORECASE,
|
||||
),
|
||||
re.compile(
|
||||
r"^\s*(?:✅|❌)\s+Hermes update\s+"
|
||||
r"(?:finished|failed|timed out)[\s\S]*$",
|
||||
re.IGNORECASE,
|
||||
),
|
||||
re.compile(r"^\s*♻️?\s+Gateway\s+(?:restarted successfully|online\b)[\s\S]*$", re.IGNORECASE),
|
||||
)
|
||||
|
||||
try:
|
||||
import discord
|
||||
|
|
@ -55,7 +88,6 @@ from pathlib import Path as _Path
|
|||
sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
import re
|
||||
|
||||
from gateway.platforms.helpers import MessageDeduplicator, ThreadParticipationTracker
|
||||
from utils import atomic_json_write
|
||||
|
|
@ -132,6 +164,73 @@ def _find_discord_windows_bundled_opus(discord_module: Any = None) -> Optional[s
|
|||
return None
|
||||
|
||||
|
||||
class _DiscordNonConversationalMessageTracker:
|
||||
"""Persistent bounded set of Discord message IDs that are status noise."""
|
||||
|
||||
_MAX_TRACKED = 2000
|
||||
|
||||
def __init__(self, max_tracked: int = _MAX_TRACKED):
|
||||
self._max_tracked = max_tracked
|
||||
self._ids: dict[str, None] = dict.fromkeys(self._load())
|
||||
|
||||
def _state_path(self) -> _Path:
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
return (
|
||||
get_hermes_home()
|
||||
/ _DISCORD_COMMAND_SYNC_STATE_SUBDIR
|
||||
/ _DISCORD_NONCONVERSATIONAL_STATE_FILENAME
|
||||
)
|
||||
|
||||
def _load(self) -> list[str]:
|
||||
path = self._state_path()
|
||||
if not path.exists():
|
||||
return []
|
||||
try:
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
if isinstance(data, list):
|
||||
return [str(message_id) for message_id in data if str(message_id).strip()]
|
||||
except Exception:
|
||||
logger.debug("[%s] Failed to load non-conversational Discord IDs", "Discord")
|
||||
return []
|
||||
|
||||
def _save(self) -> None:
|
||||
ids = list(self._ids)
|
||||
if len(ids) > self._max_tracked:
|
||||
ids = ids[-self._max_tracked:]
|
||||
self._ids = dict.fromkeys(ids)
|
||||
try:
|
||||
atomic_json_write(self._state_path(), ids, indent=None)
|
||||
except Exception:
|
||||
logger.debug("[%s] Failed to save non-conversational Discord IDs", "Discord", exc_info=True)
|
||||
|
||||
def mark_many(self, message_ids: List[str]) -> None:
|
||||
changed = False
|
||||
for message_id in message_ids:
|
||||
key = str(message_id or "").strip()
|
||||
if key and key not in self._ids:
|
||||
self._ids[key] = None
|
||||
changed = True
|
||||
if changed:
|
||||
self._save()
|
||||
|
||||
def __contains__(self, message_id: str) -> bool:
|
||||
return str(message_id or "") in self._ids
|
||||
|
||||
|
||||
def _metadata_marks_nonconversational(metadata: Optional[Dict[str, Any]]) -> bool:
|
||||
"""Return True when an outbound send was explicitly marked as status-only."""
|
||||
if not isinstance(metadata, dict):
|
||||
return False
|
||||
return any(bool(metadata.get(key)) for key in _DISCORD_NONCONVERSATIONAL_METADATA_KEYS)
|
||||
|
||||
|
||||
def _looks_like_nonconversational_history_message(content: str) -> bool:
|
||||
"""Fallback recognizer for legacy status bumps missing persisted IDs."""
|
||||
text = content or ""
|
||||
return any(pattern.match(text) for pattern in _DISCORD_NONCONVERSATIONAL_HISTORY_MESSAGE_PATTERNS)
|
||||
|
||||
|
||||
def _clean_discord_id(entry: str) -> str:
|
||||
"""Strip common prefixes from a Discord user ID or username entry.
|
||||
|
||||
|
|
@ -681,6 +780,9 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
# history backfill to skip the full scan on hot paths. Falls back to
|
||||
# scanning channel.history() on cache miss (cold start / restart).
|
||||
self._last_self_message_id: Dict[str, str] = {}
|
||||
# Persistent set of bot-authored lifecycle/status message IDs that
|
||||
# should not act as conversational history boundaries after restart.
|
||||
self._nonconversational_messages = _DiscordNonConversationalMessageTracker()
|
||||
|
||||
def _handle_bot_task_done(self, task: asyncio.Task) -> None:
|
||||
"""Surface post-startup discord.py task exits to the gateway supervisor.
|
||||
|
|
@ -1577,6 +1679,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
thread_id = None
|
||||
if metadata and metadata.get("thread_id"):
|
||||
thread_id = metadata["thread_id"]
|
||||
nonconversational = _metadata_marks_nonconversational(metadata)
|
||||
|
||||
if thread_id:
|
||||
# Fetch the thread directly — threads are addressed by their own ID.
|
||||
|
|
@ -1654,7 +1757,10 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
# backfill — avoids a full channel.history() scan on hot paths.
|
||||
if message_ids:
|
||||
_target_id = thread_id or chat_id
|
||||
self._last_self_message_id[_target_id] = message_ids[-1]
|
||||
if nonconversational:
|
||||
self._nonconversational_messages.mark_many(message_ids)
|
||||
elif not _looks_like_nonconversational_history_message(content):
|
||||
self._last_self_message_id[_target_id] = message_ids[-1]
|
||||
|
||||
return SendResult(
|
||||
success=True,
|
||||
|
|
@ -4203,23 +4309,29 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
after=_after_obj,
|
||||
oldest_first=False,
|
||||
):
|
||||
# Skip system messages (pins, joins, thread renames, etc.)
|
||||
if msg.type not in {discord.MessageType.default, discord.MessageType.reply}:
|
||||
continue
|
||||
|
||||
content = getattr(msg, "clean_content", msg.content) or ""
|
||||
if (
|
||||
str(getattr(msg, "id", "")) in self._nonconversational_messages
|
||||
or _looks_like_nonconversational_history_message(content)
|
||||
):
|
||||
continue
|
||||
|
||||
# Stop at our own message — this is the partition point.
|
||||
# Everything before this is already in the session transcript.
|
||||
# (Redundant when _after_obj is set, but needed for cold start.)
|
||||
if msg.author == self._client.user:
|
||||
break
|
||||
|
||||
# Skip system messages (pins, joins, thread renames, etc.)
|
||||
if msg.type not in {discord.MessageType.default, discord.MessageType.reply}:
|
||||
continue
|
||||
|
||||
# Respect DISCORD_ALLOW_BOTS for other bots.
|
||||
# For history context, "mentions" is treated as "all" — we are
|
||||
# deciding what context to show, not whether to respond.
|
||||
if getattr(msg.author, "bot", False) and not include_other_bots:
|
||||
continue
|
||||
|
||||
content = getattr(msg, "clean_content", msg.content) or ""
|
||||
if not content and msg.attachments:
|
||||
content = "(attachment)"
|
||||
if not content:
|
||||
|
|
@ -4566,6 +4678,13 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
Open-ended mode (``choices`` empty/None): renders the question as
|
||||
plain embed text — no buttons. The gateway's text-intercept captures
|
||||
the next message in this session and resolves the clarify.
|
||||
|
||||
Choice normalisation: ``choices`` may contain bare strings OR dicts
|
||||
(LLMs sometimes emit ``[{"description": "..."}]`` instead of bare
|
||||
strings, which would otherwise render as raw Python repr on the
|
||||
button label). Dict choices are unwrapped against the canonical
|
||||
LLM tool-call keys ``label``, ``description``, ``text``, ``title``
|
||||
in that order. Dicts with none of those keys are dropped.
|
||||
"""
|
||||
if not self._client or not DISCORD_AVAILABLE:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
|
@ -4591,8 +4710,37 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
color=discord.Color.orange(),
|
||||
)
|
||||
|
||||
# Normalise choices: LLMs sometimes emit `[{"description": "..."}]`
|
||||
# instead of bare strings, which would render as raw Python repr on
|
||||
# the button label. Unwrap the common shapes, then stringify.
|
||||
def _flatten_choice(c):
|
||||
if c is None:
|
||||
return ""
|
||||
if isinstance(c, str):
|
||||
return c.strip()
|
||||
if isinstance(c, dict):
|
||||
# Prefer the canonical LLM tool-call user-facing keys
|
||||
# in the order the LLM is most likely to emit them.
|
||||
# 'name' and 'value' are deliberately NOT here: they're
|
||||
# Discord-component-shaped fields that could appear in
|
||||
# dicts that aren't meant to be choices (e.g., a
|
||||
# developer-error wiring that passes a Button-shaped
|
||||
# object). Picking them would leak raw enum values
|
||||
# or 4-char model identifiers onto user-facing buttons.
|
||||
# If a dict has none of the canonical keys, drop it
|
||||
# rather than picking some random field — a garbage
|
||||
# button label is worse than no button at all.
|
||||
for key in ("label", "description", "text", "title"):
|
||||
v = c.get(key)
|
||||
if isinstance(v, str) and v.strip():
|
||||
return v.strip()
|
||||
return ""
|
||||
if isinstance(c, (list, tuple)):
|
||||
return " ".join(_flatten_choice(x) for x in c).strip()
|
||||
return str(c).strip()
|
||||
|
||||
clean_choices = [
|
||||
str(c).strip() for c in (choices or []) if c is not None and str(c).strip()
|
||||
s for s in (_flatten_choice(c) for c in (choices or [])) if s
|
||||
]
|
||||
# Discord allows up to 5 buttons per row, 5 rows per view = 25.
|
||||
# We reserve one slot for the "Other" button, so cap at 24 choices.
|
||||
|
|
@ -4657,6 +4805,8 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
)
|
||||
msg = await channel.send(embed=embed, view=view)
|
||||
view._message = msg # store for on_timeout expiration editing
|
||||
if _metadata_marks_nonconversational(metadata):
|
||||
self._nonconversational_messages.mark_many([str(msg.id)])
|
||||
return SendResult(success=True, message_id=str(msg.id))
|
||||
except Exception as e:
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
|
@ -6129,10 +6279,47 @@ def _define_discord_view_classes() -> None:
|
|||
self.resolved = False
|
||||
|
||||
for index, choice in enumerate(self.choices):
|
||||
# Discord button labels are capped at 80 chars.
|
||||
label_body = choice if len(choice) <= 75 else choice[:72] + "..."
|
||||
# Discord button labels are capped at 80 chars. On mobile the
|
||||
# visible width is much narrower (often <40 chars before it
|
||||
# wraps to 2 lines and the second line gets cut off), so we
|
||||
# cap aggressively and cut at a word boundary when possible
|
||||
# to keep the trailing text readable.
|
||||
#
|
||||
# Cut strategy (most-preferred to least-preferred):
|
||||
# 1. Last space in the trailing half of the budget
|
||||
# (cleanest word boundary)
|
||||
# 2. Last soft boundary in the trailing half of the
|
||||
# budget (hyphen, comma, period, paren)
|
||||
# 3. Hard cut at the budget limit (last resort)
|
||||
prefix = f"{index + 1}. "
|
||||
budget = 80 - len(prefix)
|
||||
if len(choice) <= budget:
|
||||
label_body = choice
|
||||
else:
|
||||
truncated = choice[: budget - 1].rstrip()
|
||||
cut_at = -1
|
||||
# 1. Last space in the trailing half of the budget.
|
||||
space = truncated.rfind(" ")
|
||||
if space >= budget // 2:
|
||||
cut_at = space
|
||||
# 2. Soft boundary — only if no word boundary found.
|
||||
# Find the latest soft boundary in the trailing half
|
||||
# of the budget; that maximizes preserved text length.
|
||||
# Cut AT the soft boundary (inclusive) so the label
|
||||
# ends on the soft char (e.g. "-" or ",") rather than
|
||||
# on the alpha char that followed it.
|
||||
if cut_at < 0:
|
||||
latest_soft = max(
|
||||
(truncated.rfind(s) for s in ("-", ",", ".", ")")),
|
||||
default=-1,
|
||||
)
|
||||
if latest_soft >= budget // 2:
|
||||
cut_at = latest_soft + 1
|
||||
if cut_at > 0:
|
||||
truncated = truncated[:cut_at]
|
||||
label_body = truncated.rstrip() + "…"
|
||||
button = discord.ui.Button(
|
||||
label=f"{index + 1}. {label_body}",
|
||||
label=f"{prefix}{label_body}",
|
||||
style=discord.ButtonStyle.primary,
|
||||
custom_id=f"clarify:{clarify_id}:{index}",
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue