diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index b8e60722168..97836f27b05 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -320,9 +320,11 @@ TASK_COMPLETION_GUIDANCE = ( # concurrently when they are independent (read-only tools always; path-scoped # file ops when their targets don't overlap — see # run_agent._execute_tool_calls / tool_dispatch_helpers). The missing piece -# was telling the *model* to emit those calls together in the first place; -# nothing in the open-source system prompt encouraged batching. This block -# closes that gap. +# was telling the *model* to emit those calls together in the first place. +# Until now the only batching steer in the prompt lived in +# GOOGLE_MODEL_OPERATIONAL_GUIDANCE — Gemini/Gemma got it, every other model +# got nothing. This block makes the steer universal; the now-redundant +# Google-only bullet has been dropped so no model receives it twice. # # Short on purpose — shipped in the cached system prompt to every user, every # session. Token cost is paid once at install and amortised across all @@ -425,9 +427,10 @@ GOOGLE_MODEL_OPERATIONAL_GUIDANCE = ( "package.json, requirements.txt, Cargo.toml, etc. before importing.\n" "- **Conciseness:** Keep explanatory text brief — a few sentences, not " "paragraphs. Focus on actions and results over narration.\n" - "- **Parallel tool calls:** When you need to perform multiple independent " - "operations (e.g. reading several files), make all the tool calls in a " - "single response rather than sequentially.\n" + # Parallel-tool-call steering now lives in the universal + # PARALLEL_TOOL_CALL_GUIDANCE block (injected for all models), so it is no + # longer duplicated here — keeping it would send Gemini/Gemma the same + # instruction twice. "- **Non-interactive commands:** Use flags like -y, --yes, --non-interactive " "to prevent CLI tools from hanging on prompts.\n" "- **Keep going:** Work autonomously until the task is fully resolved. " diff --git a/apps/desktop/src/app/session/hooks/use-message-stream.ts b/apps/desktop/src/app/session/hooks/use-message-stream.ts index 3ee52ec8eb7..909c1424796 100644 --- a/apps/desktop/src/app/session/hooks/use-message-stream.ts +++ b/apps/desktop/src/app/session/hooks/use-message-stream.ts @@ -13,6 +13,7 @@ import { type GatewayEventPayload, reasoningPart, renderMediaTags, + textPart, upsertToolPart } from '@/lib/chat-messages' import { coerceGatewayText, coerceThinkingText, normalizePersonalityValue } from '@/lib/chat-runtime' @@ -1080,6 +1081,32 @@ export function useMessageStream({ // completions / watch matches here — re-sync the status stack. void refreshBackgroundProcesses(sessionId) } + } else if (event.type === 'review.summary') { + // Self-improvement background review saved something to memory/skills + // and emitted a persistent summary (Python formats it as + // "💾 Self-improvement review: …"). The CLI prints this via + // prompt_toolkit and the Ink TUI renders it as a system line; the + // desktop has neither, so without this handler the skill/memory + // change happens silently. Surface it as a persistent system message + // in the transcript so the user is always informed — it must not be a + // transient toast that can be missed. + const text = coerceGatewayText(payload?.text).trim() + + if (text && sessionId) { + flushQueuedDeltas(sessionId) + updateSessionState(sessionId, state => ({ + ...state, + messages: [ + ...state.messages, + { + id: `review-summary-${Date.now()}`, + role: 'system', + parts: [textPart(text)], + timestamp: Math.floor(Date.now() / 1000) + } + ] + })) + } } else if (event.type === 'error') { const errorMessage = payload?.message || 'Hermes reported an error' const looksLikeProviderSetup = isProviderSetupErrorMessage(errorMessage) diff --git a/tests/agent/test_prompt_builder.py b/tests/agent/test_prompt_builder.py index e98c26e319f..6f0206dfbcb 100644 --- a/tests/agent/test_prompt_builder.py +++ b/tests/agent/test_prompt_builder.py @@ -28,6 +28,7 @@ from agent.prompt_builder import ( TOOL_USE_ENFORCEMENT_MODELS, OPENAI_MODEL_EXECUTION_GUIDANCE, PARALLEL_TOOL_CALL_GUIDANCE, + GOOGLE_MODEL_OPERATIONAL_GUIDANCE, MEMORY_GUIDANCE, SESSION_SEARCH_GUIDANCE, PLATFORM_HINTS, @@ -1512,8 +1513,9 @@ class TestParallelToolCallGuidance: def test_steers_batching_into_one_response(self): text = PARALLEL_TOOL_CALL_GUIDANCE.lower() - # Must tell the model to group independent calls together. - assert "single response" in text or "same" in text and "turn" in text + # Must tell the model to group independent calls together — accept any + # phrasing that means "one turn" without freezing exact wording. + assert "single response" in text or ("same" in text and "turn" in text) assert "independent" in text def test_carves_out_dependent_calls(self): @@ -1533,6 +1535,11 @@ class TestParallelToolCallGuidance: # Heading delimits it as its own section in the assembled prompt. assert PARALLEL_TOOL_CALL_GUIDANCE.lstrip().startswith("#") + def test_not_duplicated_in_google_guidance(self): + # The universal block is now the single source of parallel-batching + # steer. The Google-only block must NOT carry its own copy, otherwise + # Gemini/Gemma would receive the instruction twice in one prompt. + assert "parallel tool call" not in GOOGLE_MODEL_OPERATIONAL_GUIDANCE.lower() # ========================================================================= diff --git a/tests/tui_gateway/test_review_summary_callback.py b/tests/tui_gateway/test_review_summary_callback.py index 2c6d3cbeb7c..6ca17889d64 100644 --- a/tests/tui_gateway/test_review_summary_callback.py +++ b/tests/tui_gateway/test_review_summary_callback.py @@ -120,3 +120,48 @@ def test_review_summary_callback_survives_agent_without_attribute(server, monkey # LockedAgent's __slots__ blocks background_review_callback assignment. server._init_session("sid-x", "key-x", LockedAgent(), [], cols=80) # If we got here, _init_session swallowed the AttributeError gracefully. + + +def test_init_session_sets_memory_notifications_from_config(server, monkeypatch): + """_init_session must apply display.memory_notifications to the agent so + the TUI/desktop honors the same off/on/verbose toggle as the messaging + gateway and CLI. Without this the review always behaved as 'on'.""" + monkeypatch.setattr(server, "_SlashWorker", lambda *a, **kw: object()) + monkeypatch.setattr(server, "_wire_callbacks", lambda sid: None) + monkeypatch.setattr(server, "_notify_session_boundary", lambda *a, **kw: None) + monkeypatch.setattr(server, "_session_info", lambda agent, session=None: {"model": "m"}) + monkeypatch.setattr(server, "_load_show_reasoning", lambda: False) + monkeypatch.setattr(server, "_load_tool_progress_mode", lambda: "all") + monkeypatch.setattr(server, "_emit", lambda *a, **kw: None) + monkeypatch.setattr(server, "_load_memory_notifications", lambda: "verbose") + + class FakeAgent: + model = "fake/model" + background_review_callback = None + memory_notifications = "on" + + agent = FakeAgent() + server._init_session("sid-mn", "key-mn", agent, [], cols=80) + + assert agent.memory_notifications == "verbose" + + +@pytest.mark.parametrize( + "raw,expected", + [ + (None, "on"), # unset → default on + ("on", "on"), + ("off", "off"), + ("verbose", "verbose"), + ("VERBOSE", "verbose"), # case-normalized + (True, "on"), # bool back-compat + (False, "off"), + ], +) +def test_load_memory_notifications_normalization(server, monkeypatch, raw, expected): + """_load_memory_notifications mirrors the gateway's bool→str normalization + and defaults to 'on' when the key is absent.""" + display = {} if raw is None else {"memory_notifications": raw} + monkeypatch.setattr(server, "_load_cfg", lambda: {"display": display}) + assert server._load_memory_notifications() == expected + diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 12ee450c6f5..782a8ba457b 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -1907,6 +1907,22 @@ def _load_show_reasoning() -> bool: return bool((_load_cfg().get("display") or {}).get("show_reasoning", False)) +def _load_memory_notifications() -> str: + """Self-improvement review notification mode from config.yaml. + + Parity with the messaging gateway (``gateway/run.py``) and the classic CLI: + ``display.memory_notifications`` controls whether the background review's + "💾 Self-improvement review: …" summary is surfaced. Without this the + TUI/desktop backend always behaved as ``"on"`` and silently ignored a user + who set ``off``. Accepts ``off`` / ``on`` (default) / ``verbose``; a bool is + normalized for back-compat. + """ + raw = (_load_cfg().get("display") or {}).get("memory_notifications") + if isinstance(raw, bool): + return "on" if raw else "off" + return str(raw).lower() if raw else "on" + + def _load_tool_progress_mode() -> str: env = os.environ.get("HERMES_TUI_TOOL_PROGRESS", "").strip().lower() if env in {"off", "new", "all", "verbose"}: @@ -3770,6 +3786,10 @@ def _init_session( agent.background_review_callback = lambda message, _sid=sid: _emit( "review.summary", _sid, {"text": str(message)} ) + # Honor display.memory_notifications (off | on | verbose) like the + # messaging gateway and CLI do — otherwise the review always behaved as + # "on" on the TUI/desktop and a user who set "off" was ignored. + agent.memory_notifications = _load_memory_notifications() except Exception: # Bare AIAgents that don't expose the attribute (unlikely, but keep # session startup resilient).