diff --git a/agent/memory_manager.py b/agent/memory_manager.py index 62cbd6ae1a..22c999dd7b 100644 --- a/agent/memory_manager.py +++ b/agent/memory_manager.py @@ -176,24 +176,32 @@ class MemoryManager: # -- Prefetch / recall --------------------------------------------------- - def prefetch_all(self, query: str, *, session_id: str = "") -> str: - """Collect prefetch context from all providers. + def prefetch_all_details(self, query: str, *, session_id: str = "") -> tuple[str, list[str]]: + """Collect prefetch context plus provider names from all providers. - Returns merged context text labeled by provider. Empty providers - are skipped. Failures in one provider don't block others. + Returns merged context text and the names of providers that contributed + non-empty context. Empty providers are skipped. Failures in one provider + don't block others. """ parts = [] + providers = [] for provider in self._providers: try: result = provider.prefetch(query, session_id=session_id) if result and result.strip(): parts.append(result) + providers.append(provider.name) except Exception as e: logger.debug( "Memory provider '%s' prefetch failed (non-fatal): %s", provider.name, e, ) - return "\n\n".join(parts) + return "\n\n".join(parts), providers + + def prefetch_all(self, query: str, *, session_id: str = "") -> str: + """Collect prefetch context from all providers.""" + text, _providers = self.prefetch_all_details(query, session_id=session_id) + return text def queue_prefetch_all(self, query: str, *, session_id: str = "") -> None: """Queue background prefetch on all providers for the next turn.""" diff --git a/gateway/display_config.py b/gateway/display_config.py index 78e8bc9afa..9f1f1832a3 100644 --- a/gateway/display_config.py +++ b/gateway/display_config.py @@ -35,6 +35,9 @@ _GLOBAL_DEFAULTS: dict[str, Any] = { "show_reasoning": False, "tool_preview_length": 0, "streaming": None, # None = follow top-level streaming config + # Recalled memory previews are bounded and can be disabled per platform. + "memory_context": "preview", + "memory_context_max_chars": 1200, } # --------------------------------------------------------------------------- @@ -191,4 +194,16 @@ def _normalise(setting: str, value: Any) -> Any: return int(value) except (TypeError, ValueError): return 0 + if setting == "memory_context": + if value is False: + return "off" + if value is True: + return "preview" + mode = str(value).lower() + return mode if mode in {"off", "summary", "preview", "full"} else "preview" + if setting == "memory_context_max_chars": + try: + return max(0, int(value)) + except (TypeError, ValueError): + return 1200 return value diff --git a/gateway/run.py b/gateway/run.py index 3d9648bf7a..753e3d756c 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -9393,11 +9393,138 @@ class GatewayRunner: last_progress_msg = [None] # Track last message for dedup repeat_count = [0] # How many times the same message repeated + def _discord_escape_preview(text: str) -> str: + """Keep previews Discord-safe without hiding useful markdown structure.""" + text = str(text or "") + text = text.replace("```", "'''") + # Avoid accidental pings from recalled chat/user text. + text = text.replace("@everyone", "@\u200beveryone").replace("@here", "@\u200bhere") + text = re.sub(r"<@([!&]?\d+)>", lambda m: f"<@\u200b{m.group(1)}>", text) + return text + + def _discord_quote(text: str, max_chars: int) -> str: + """Render a bounded Discord-native blockquote preview.""" + text = _discord_escape_preview(text).strip() + if max_chars > 0 and len(text) > max_chars: + text = text[:max_chars] if max_chars <= 3 else text[:max_chars - 3].rstrip() + "..." + lines = [line.strip() for line in text.splitlines() if line.strip()] + return "\n".join(f"> {line}" for line in lines[:12]) + + def _discord_memory_preview(text: str, max_chars: int) -> str: + """Render recalled memory context as readable Discord markdown. + + Hindsight returns agent-injected context that can include provider + instructions plus raw JSON-ish transcript objects. For Discord + dogfooding, show the actual recalled snippets instead of dumping + the injection wrapper. + """ + raw = str(text or "").strip() + if not raw: + return "" + + def _clean_snippet(value: str) -> str: + value = re.sub(r"\s+", " ", str(value)).strip() + value = re.sub(r"^(?:User|Assistant):\s*", "", value, flags=re.IGNORECASE).strip() + return re.sub(r"^(?:\[[^\]]*\]\s*)+", "", value).strip() + + def _is_noise(value: str) -> bool: + stripped = value.strip() + lower = stripped.lower() + return ( + not stripped + or lower.startswith("[system note:") + or lower.startswith("# hindsight memory") + or lower.startswith("use this to answer questions") + or lower == "here." + or '"role"' in stripped + or '"content"' in stripped + or (stripped.startswith(("-", "*")) and "[{" in stripped) + ) + + snippets = [] + for match in re.finditer(r'"content"\s*:\s*"((?:\\.|[^"\\])*)"', raw): + encoded = match.group(1) + try: + decoded = json.loads(f'"{encoded}"') + except Exception: + decoded = encoded + decoded = _clean_snippet(decoded) + if decoded: + snippets.append(decoded) + if len(snippets) >= 4: + break + + if not snippets: + raw_without_tags = re.sub(r"", "", raw, flags=re.IGNORECASE) + for line in raw_without_tags.splitlines(): + cleaned = _clean_snippet(line) + if _is_noise(cleaned): + continue + snippets.append(cleaned) + if len(snippets) >= 4: + break + + if snippets: + lines = [] + used = 0 + for idx, body in enumerate(snippets, 1): + item = _discord_escape_preview(body) + prefix = f"> {idx}. " + remaining = max_chars - used if max_chars > 0 else len(prefix) + len(item) + if max_chars > 0 and remaining <= len(prefix): + break + cap = min(280, max(0, remaining - len(prefix))) if max_chars > 0 else 280 + if len(item) > cap: + item = item[:cap] if cap <= 3 else item[: cap - 3].rstrip() + "..." + line = f"{prefix}{item}" + lines.append(line) + used += len(line) + 1 + if lines: + return "\n".join(lines) + + # Fallback: never quote raw injected-memory wrappers or JSON-ish + # transcript payloads. If extraction above missed the provider's + # shape, prefer a terse hidden-preview marker over leaking + # / system-note text into Discord. + filtered_lines = [] + raw_without_tags = re.sub(r"", "", raw, flags=re.IGNORECASE) + for line in raw_without_tags.splitlines(): + stripped = _clean_snippet(line) + if _is_noise(stripped): + continue + filtered_lines.append(stripped) + if not filtered_lines: + return "> _Preview hidden: recalled context was structured/internal metadata._" + return _discord_quote("\n".join(filtered_lines), max_chars) + def progress_callback(event_type: str, tool_name: str = None, preview: str = None, args: dict = None, **kwargs): """Callback invoked by agent on tool lifecycle events.""" if not progress_queue or not _run_still_current(): return + if event_type == "memory.prefetch": + if source.platform != Platform.DISCORD: + return + memory_mode = resolve_display_setting(user_config, platform_key, "memory_context", "summary") + if memory_mode == "off": + return + try: + memory_max_chars = int(resolve_display_setting(user_config, platform_key, "memory_context_max_chars", 1200) or 1200) + except Exception: + memory_max_chars = 1200 + providers = kwargs.get("providers") or [] + if not providers and kwargs.get("provider_count"): + providers = ["memory"] + provider_label = ", ".join(str(p) for p in providers) if providers else "memory" + chars = int(kwargs.get("chars") or len(preview or "")) + msg = f"🧠 **Loaded memory context from {provider_label}** ({chars} chars)" + if memory_mode in ("preview", "full") and preview: + formatted_preview = _discord_memory_preview(preview, memory_max_chars) + if formatted_preview: + msg = f"{msg}\n{formatted_preview}" + progress_queue.put(msg) + return + # Only act on tool.started events (ignore tool.completed, reasoning.available, etc.) if event_type not in ("tool.started",): return diff --git a/run_agent.py b/run_agent.py index f7a929118c..1b1d4dd9f5 100644 --- a/run_agent.py +++ b/run_agent.py @@ -9639,10 +9639,16 @@ class AIAgent: # Use original_user_message (clean input) — user_message may contain # injected skill content that bloats / breaks provider queries. _ext_prefetch_cache = "" + _prefetch_providers: list[str] = [] + _ext_prefetch_progress_emitted = False if self._memory_manager: try: _query = original_user_message if isinstance(original_user_message, str) else "" - _ext_prefetch_cache = self._memory_manager.prefetch_all(_query) or "" + if hasattr(self._memory_manager, "prefetch_all_details"): + _ext_prefetch_cache, _prefetch_providers = self._memory_manager.prefetch_all_details(_query) + else: + _ext_prefetch_cache = self._memory_manager.prefetch_all(_query) or "" + _ext_prefetch_cache = _ext_prefetch_cache or "" except Exception: pass @@ -9786,16 +9792,37 @@ class AIAgent: # never mutated, so nothing leaks into session persistence. if idx == current_turn_user_idx and msg.get("role") == "user": _injections = [] + _memory_context_block = "" if _ext_prefetch_cache: - _fenced = build_memory_context_block(_ext_prefetch_cache) - if _fenced: - _injections.append(_fenced) + _memory_context_block = build_memory_context_block(_ext_prefetch_cache) + if _memory_context_block: + _injections.append(_memory_context_block) if _plugin_user_context: _injections.append(_plugin_user_context) if _injections: _base = api_msg.get("content", "") if isinstance(_base, str): api_msg["content"] = _base + "\n\n" + "\n\n".join(_injections) + if ( + _memory_context_block + and self.tool_progress_callback + and not _ext_prefetch_progress_emitted + ): + _ext_prefetch_progress_emitted = True + try: + _preview = sanitize_context(_ext_prefetch_cache).strip() + self.tool_progress_callback( + "memory.prefetch", + "memory", + _preview, + None, + providers=_prefetch_providers, + provider_count=len(_prefetch_providers), + chars=len(_ext_prefetch_cache), + injected=True, + ) + except Exception: + pass # For ALL assistant messages, pass reasoning back to the API # This ensures multi-turn reasoning context is preserved diff --git a/tests/agent/test_memory_provider.py b/tests/agent/test_memory_provider.py index ca39da70f0..d39a4760a4 100644 --- a/tests/agent/test_memory_provider.py +++ b/tests/agent/test_memory_provider.py @@ -202,6 +202,20 @@ class TestMemoryManager: assert p1.prefetch_queries == ["what do you know?"] assert p2.prefetch_queries == ["what do you know?"] + def test_prefetch_all_details_returns_provider_names(self): + mgr = MemoryManager() + p1 = FakeMemoryProvider("builtin") + p1._prefetch_result = "Memory from builtin" + p2 = FakeMemoryProvider("hindsight") + p2._prefetch_result = "Memory from hindsight" + mgr.add_provider(p1) + mgr.add_provider(p2) + + text, providers = mgr.prefetch_all_details("what do you know?") + + assert text == "Memory from builtin\n\nMemory from hindsight" + assert providers == ["builtin", "hindsight"] + def test_prefetch_skips_empty(self): mgr = MemoryManager() p1 = FakeMemoryProvider("builtin") diff --git a/tests/gateway/test_display_config.py b/tests/gateway/test_display_config.py index 2192d67bc9..95c802a7fe 100644 --- a/tests/gateway/test_display_config.py +++ b/tests/gateway/test_display_config.py @@ -54,6 +54,23 @@ class TestResolveDisplaySetting: # Unknown platform, no config → global default "all" assert resolve_display_setting(config, "unknown_platform", "tool_progress") == "all" + def test_memory_context_defaults_to_preview_for_all_platforms(self): + """Memory context defaults to bounded preview everywhere for dogfood visibility.""" + from gateway.display_config import resolve_display_setting + + config = {} + for plat in ( + "discord", + "telegram", + "tui", + "api_server", + "webhook", + "email", + "unknown_platform", + ): + assert resolve_display_setting(config, plat, "memory_context") == "preview", plat + assert resolve_display_setting(config, plat, "memory_context_max_chars") == 1200, plat + def test_fallback_parameter_used_last(self): """Explicit fallback is used when nothing else matches.""" from gateway.display_config import resolve_display_setting @@ -170,6 +187,28 @@ class TestYAMLNormalisation: config = {"display": {"platforms": {"slack": {"tool_progress": False}}}} assert resolve_display_setting(config, "slack", "tool_progress") == "off" + def test_memory_context_modes_normalised(self): + """Memory context mode values are normalised and validated.""" + from gateway.display_config import resolve_display_setting + + assert resolve_display_setting( + {"display": {"memory_context": False}}, "discord", "memory_context" + ) == "off" + assert resolve_display_setting( + {"display": {"memory_context": True}}, "discord", "memory_context" + ) == "preview" + assert resolve_display_setting( + {"display": {"memory_context": "SUMMARY"}}, "discord", "memory_context" + ) == "summary" + assert resolve_display_setting( + {"display": {"memory_context": "nonsense"}}, "discord", "memory_context" + ) == "preview" + assert resolve_display_setting( + {"display": {"memory_context_max_chars": "80"}}, + "discord", + "memory_context_max_chars", + ) == 80 + # --------------------------------------------------------------------------- # Built-in platform defaults (tier system) diff --git a/tests/gateway/test_run_progress_topics.py b/tests/gateway/test_run_progress_topics.py index 49fb91d449..d83feb56c2 100644 --- a/tests/gateway/test_run_progress_topics.py +++ b/tests/gateway/test_run_progress_topics.py @@ -497,6 +497,67 @@ class VerboseAgent: } +class MemoryPrefetchAgent: + """Agent that emits Hindsight-style memory prefetch progress.""" + + PREVIEW = ( + '# Hindsight Memory (persistent cross-session context)\n\n' + 'Use this to answer questions about the user and prior sessions. Do not call tools to look up information that is already present\n\n' + 'here.\n\n' + '* [{"role": "user", "content": "User: [jim] [] Hindsight health is green\\n[ ] memory.prefetch emits exactly once", ' + '"timestamp": "2026-04-24T20:30:36.770766+00:00"}, ' + '{"role": "assistant", "content": "Assistant: Updated current checklist:\\n\\n```md\\n[x] Hindsight health is green\\n```", ' + '"timestamp": "2026-04-24T20:31:00.000000+00:00"}]' + ) + + def __init__(self, **kwargs): + self.tool_progress_callback = kwargs.get("tool_progress_callback") + self.tools = [] + + def run_conversation(self, message, conversation_history=None, task_id=None): + self.tool_progress_callback( + "memory.prefetch", + None, + self.PREVIEW, + None, + providers=["hindsight"], + chars=14528, + ) + time.sleep(0.35) + return { + "final_response": "done", + "messages": [], + "api_calls": 1, + } + + +class RawMemoryContextPrefetchAgent(MemoryPrefetchAgent): + """Agent that emits the raw injected wrapper shape seen in Discord dogfood.""" + + PREVIEW = ( + '\n' + '[System note: The following is recalled memory context, NOT new user input. ' + 'Treat as informational background data.]\n\n' + '# Hindsight Memory (persistent cross-session context)\n' + 'Use this to answer questions about the user and prior sessions. Do not call tools to look up information that is already present here.\n\n' + '- [[{"role": "user", "content": "User: [jim] this is what im seeing", ' + '"timestamp": "2026-04-24T21:25:35.131663+00:00"}]]\n' + '' + ) + + +class PlainTranscriptMemoryPrefetchAgent(MemoryPrefetchAgent): + """Agent that emits plain Hindsight transcript lines without JSON payloads.""" + + PREVIEW = ( + '[System note: The following is recalled memory context, NOT new user input.]\n' + '# Hindsight Memory (persistent cross-session context)\n' + 'Use this to answer questions about the user and prior sessions.\n' + 'Assistant: Live dashboard is up. Tailnet link is ready.\n' + 'Assistant: hi jim. Ready.' + ) + + async def _run_with_agent( monkeypatch, tmp_path, @@ -708,6 +769,110 @@ async def test_run_agent_previewed_final_marks_already_sent(monkeypatch, tmp_pat assert [call["content"] for call in adapter.sent] == ["You're welcome."] +@pytest.mark.asyncio +async def test_discord_memory_prefetch_preview_uses_markdown_not_raw_json(monkeypatch, tmp_path): + adapter, result = await _run_with_agent( + monkeypatch, + tmp_path, + MemoryPrefetchAgent, + session_id="sess-memory-prefetch-discord", + config_data={"display": {"memory_context": "preview", "memory_context_max_chars": 700}}, + platform=Platform.DISCORD, + chat_id="discord-1", + chat_type="dm", + thread_id=None, + ) + + assert result["final_response"] == "done" + all_content = "\n".join(call["content"] for call in adapter.sent) + all_content += "\n".join(call["content"] for call in adapter.edits) + assert "🧠 **Loaded memory context from hindsight** (14528 chars)" in all_content + assert "> 1. Hindsight health is green" in all_content + assert "> 2. Updated current checklist:" in all_content + assert "**User**:" not in all_content + assert "**Assistant**:" not in all_content + assert "[jim]" not in all_content + assert '"role"' not in all_content + assert '"content"' not in all_content + assert "Use this to answer questions" not in all_content + assert "```" not in all_content + + +@pytest.mark.asyncio +async def test_discord_memory_prefetch_preview_hides_raw_memory_context_wrapper(monkeypatch, tmp_path): + adapter, result = await _run_with_agent( + monkeypatch, + tmp_path, + RawMemoryContextPrefetchAgent, + session_id="sess-memory-prefetch-discord-wrapper", + config_data={"display": {"memory_context": "preview", "memory_context_max_chars": 700}}, + platform=Platform.DISCORD, + chat_id="discord-1", + chat_type="dm", + thread_id=None, + ) + + assert result["final_response"] == "done" + all_content = "\n".join(call["content"] for call in adapter.sent) + all_content += "\n".join(call["content"] for call in adapter.edits) + assert "🧠 **Loaded memory context from hindsight** (14528 chars)" in all_content + assert "> 1. this is what im seeing" in all_content + assert "**User**:" not in all_content + assert "[jim]" not in all_content + assert "" not in all_content + assert "" not in all_content + assert "[System note:" not in all_content + assert "# Hindsight Memory" not in all_content + assert "Use this to answer questions" not in all_content + assert '"role"' not in all_content + assert '"content"' not in all_content + + +@pytest.mark.asyncio +async def test_discord_memory_prefetch_preview_formats_plain_hindsight_transcript(monkeypatch, tmp_path): + adapter, result = await _run_with_agent( + monkeypatch, + tmp_path, + PlainTranscriptMemoryPrefetchAgent, + session_id="sess-memory-prefetch-discord-plain", + config_data={"display": {"memory_context": "preview", "memory_context_max_chars": 700}}, + platform=Platform.DISCORD, + chat_id="discord-1", + chat_type="dm", + thread_id=None, + ) + + assert result["final_response"] == "done" + all_content = "\n".join(call["content"] for call in adapter.sent) + all_content += "\n".join(call["content"] for call in adapter.edits) + assert "> 1. Live dashboard is up. Tailnet link is ready." in all_content + assert "> 2. hi jim. Ready." in all_content + assert "Assistant:" not in all_content + assert "# Hindsight Memory" not in all_content + assert "Use this to answer questions" not in all_content + + +@pytest.mark.asyncio +async def test_memory_prefetch_progress_is_discord_only(monkeypatch, tmp_path): + adapter, result = await _run_with_agent( + monkeypatch, + tmp_path, + MemoryPrefetchAgent, + session_id="sess-memory-prefetch-telegram", + config_data={"display": {"memory_context": "preview", "memory_context_max_chars": 700}}, + platform=Platform.TELEGRAM, + chat_id="-1001", + chat_type="group", + thread_id="17585", + ) + + assert result["final_response"] == "done" + all_content = "\n".join(call["content"] for call in adapter.sent) + all_content += "\n".join(call["content"] for call in adapter.edits) + assert "Loaded memory context" not in all_content + assert "Hindsight health is green" not in all_content + + @pytest.mark.asyncio async def test_run_agent_matrix_streaming_omits_cursor(monkeypatch, tmp_path): adapter, result = await _run_with_agent( diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 9c54daffe5..8996440f1f 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -4771,6 +4771,110 @@ class TestMemoryContextSanitization: assert "stale observation" not in result assert "how is the honcho working" in result + def test_memory_prefetch_emits_progress_with_provider_names(self, agent): + raw_memory = ( + "# Hindsight Memory (persistent cross-session context)\n" + "Use this to answer questions about the user and prior sessions.\n\n" + '- [{"role": "user", "content": "User: [jim] prior useful context"}]\n' + ) + + class FakeMemoryManager: + def on_turn_start(self, *_args, **_kwargs): + pass + + def prefetch_all_details(self, query, *, session_id=""): + assert query == "hello" + return raw_memory, ["hindsight"] + + def sync_all(self, *_args, **_kwargs): + pass + + def queue_prefetch_all(self, *_args, **_kwargs): + pass + + events = [] + + def capture_progress(*args, **kwargs): + events.append((args, kwargs)) + + agent._cached_system_prompt = "You are helpful." + agent._use_prompt_caching = False + agent.tool_delay = 0 + agent.compression_enabled = False + agent.save_trajectories = False + agent._memory_manager = FakeMemoryManager() + agent.tool_progress_callback = capture_progress + agent.client.chat.completions.create.return_value = _mock_response( + content="Final answer", + finish_reason="stop", + ) + + with ( + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + result = agent.run_conversation("hello") + + assert result["final_response"] == "Final answer" + sent_messages = agent.client.chat.completions.create.call_args.kwargs["messages"] + assert "" in sent_messages[-1]["content"] + assert "prior useful context" in sent_messages[-1]["content"] + memory_events = [(args, kwargs) for args, kwargs in events if args[0] == "memory.prefetch"] + assert len(memory_events) == 1 + args, kwargs = memory_events[0] + assert args[:2] == ("memory.prefetch", "memory") + assert "prior useful context" in args[2] + assert kwargs["providers"] == ["hindsight"] + assert kwargs["provider_count"] == 1 + assert kwargs["chars"] == len(raw_memory) + assert kwargs["injected"] is True + + def test_memory_prefetch_progress_only_emits_when_context_block_injected(self, agent): + raw_memory = "memory text" + + class FakeMemoryManager: + def on_turn_start(self, *_args, **_kwargs): + pass + + def prefetch_all_details(self, query, *, session_id=""): + assert query == "hello" + return raw_memory, ["hindsight"] + + def sync_all(self, *_args, **_kwargs): + pass + + def queue_prefetch_all(self, *_args, **_kwargs): + pass + + events = [] + + def capture_progress(*args, **kwargs): + events.append((args, kwargs)) + + agent._cached_system_prompt = "You are helpful." + agent._use_prompt_caching = False + agent.tool_delay = 0 + agent.compression_enabled = False + agent.save_trajectories = False + agent._memory_manager = FakeMemoryManager() + agent.tool_progress_callback = capture_progress + agent.client.chat.completions.create.return_value = _mock_response( + content="Final answer", + finish_reason="stop", + ) + + with ( + patch.object(run_agent, "build_memory_context_block", return_value=""), + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + result = agent.run_conversation("hello") + + assert result["final_response"] == "Final answer" + assert [args for args, _kwargs in events if args[0] == "memory.prefetch"] == [] + class TestMemoryProviderTurnStart: """run_conversation() must call memory_manager.on_turn_start() before prefetch_all().