mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-20 10:11:58 +00:00
Merge pull request #48533 from NousResearch/hermes/hermes-4061c6a8
fix(prompt,desktop,tui): dedupe parallel-tool-call steer + surface self-improvement review summary
This commit is contained in:
commit
81eaedd0f5
5 changed files with 110 additions and 8 deletions
|
|
@ -320,9 +320,11 @@ TASK_COMPLETION_GUIDANCE = (
|
|||
# concurrently when they are independent (read-only tools always; path-scoped
|
||||
# file ops when their targets don't overlap — see
|
||||
# run_agent._execute_tool_calls / tool_dispatch_helpers). The missing piece
|
||||
# was telling the *model* to emit those calls together in the first place;
|
||||
# nothing in the open-source system prompt encouraged batching. This block
|
||||
# closes that gap.
|
||||
# was telling the *model* to emit those calls together in the first place.
|
||||
# Until now the only batching steer in the prompt lived in
|
||||
# GOOGLE_MODEL_OPERATIONAL_GUIDANCE — Gemini/Gemma got it, every other model
|
||||
# got nothing. This block makes the steer universal; the now-redundant
|
||||
# Google-only bullet has been dropped so no model receives it twice.
|
||||
#
|
||||
# Short on purpose — shipped in the cached system prompt to every user, every
|
||||
# session. Token cost is paid once at install and amortised across all
|
||||
|
|
@ -425,9 +427,10 @@ GOOGLE_MODEL_OPERATIONAL_GUIDANCE = (
|
|||
"package.json, requirements.txt, Cargo.toml, etc. before importing.\n"
|
||||
"- **Conciseness:** Keep explanatory text brief — a few sentences, not "
|
||||
"paragraphs. Focus on actions and results over narration.\n"
|
||||
"- **Parallel tool calls:** When you need to perform multiple independent "
|
||||
"operations (e.g. reading several files), make all the tool calls in a "
|
||||
"single response rather than sequentially.\n"
|
||||
# Parallel-tool-call steering now lives in the universal
|
||||
# PARALLEL_TOOL_CALL_GUIDANCE block (injected for all models), so it is no
|
||||
# longer duplicated here — keeping it would send Gemini/Gemma the same
|
||||
# instruction twice.
|
||||
"- **Non-interactive commands:** Use flags like -y, --yes, --non-interactive "
|
||||
"to prevent CLI tools from hanging on prompts.\n"
|
||||
"- **Keep going:** Work autonomously until the task is fully resolved. "
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ import {
|
|||
type GatewayEventPayload,
|
||||
reasoningPart,
|
||||
renderMediaTags,
|
||||
textPart,
|
||||
upsertToolPart
|
||||
} from '@/lib/chat-messages'
|
||||
import { coerceGatewayText, coerceThinkingText, normalizePersonalityValue } from '@/lib/chat-runtime'
|
||||
|
|
@ -1080,6 +1081,32 @@ export function useMessageStream({
|
|||
// completions / watch matches here — re-sync the status stack.
|
||||
void refreshBackgroundProcesses(sessionId)
|
||||
}
|
||||
} else if (event.type === 'review.summary') {
|
||||
// Self-improvement background review saved something to memory/skills
|
||||
// and emitted a persistent summary (Python formats it as
|
||||
// "💾 Self-improvement review: …"). The CLI prints this via
|
||||
// prompt_toolkit and the Ink TUI renders it as a system line; the
|
||||
// desktop has neither, so without this handler the skill/memory
|
||||
// change happens silently. Surface it as a persistent system message
|
||||
// in the transcript so the user is always informed — it must not be a
|
||||
// transient toast that can be missed.
|
||||
const text = coerceGatewayText(payload?.text).trim()
|
||||
|
||||
if (text && sessionId) {
|
||||
flushQueuedDeltas(sessionId)
|
||||
updateSessionState(sessionId, state => ({
|
||||
...state,
|
||||
messages: [
|
||||
...state.messages,
|
||||
{
|
||||
id: `review-summary-${Date.now()}`,
|
||||
role: 'system',
|
||||
parts: [textPart(text)],
|
||||
timestamp: Math.floor(Date.now() / 1000)
|
||||
}
|
||||
]
|
||||
}))
|
||||
}
|
||||
} else if (event.type === 'error') {
|
||||
const errorMessage = payload?.message || 'Hermes reported an error'
|
||||
const looksLikeProviderSetup = isProviderSetupErrorMessage(errorMessage)
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ from agent.prompt_builder import (
|
|||
TOOL_USE_ENFORCEMENT_MODELS,
|
||||
OPENAI_MODEL_EXECUTION_GUIDANCE,
|
||||
PARALLEL_TOOL_CALL_GUIDANCE,
|
||||
GOOGLE_MODEL_OPERATIONAL_GUIDANCE,
|
||||
MEMORY_GUIDANCE,
|
||||
SESSION_SEARCH_GUIDANCE,
|
||||
PLATFORM_HINTS,
|
||||
|
|
@ -1512,8 +1513,9 @@ class TestParallelToolCallGuidance:
|
|||
|
||||
def test_steers_batching_into_one_response(self):
|
||||
text = PARALLEL_TOOL_CALL_GUIDANCE.lower()
|
||||
# Must tell the model to group independent calls together.
|
||||
assert "single response" in text or "same" in text and "turn" in text
|
||||
# Must tell the model to group independent calls together — accept any
|
||||
# phrasing that means "one turn" without freezing exact wording.
|
||||
assert "single response" in text or ("same" in text and "turn" in text)
|
||||
assert "independent" in text
|
||||
|
||||
def test_carves_out_dependent_calls(self):
|
||||
|
|
@ -1533,6 +1535,11 @@ class TestParallelToolCallGuidance:
|
|||
# Heading delimits it as its own section in the assembled prompt.
|
||||
assert PARALLEL_TOOL_CALL_GUIDANCE.lstrip().startswith("#")
|
||||
|
||||
def test_not_duplicated_in_google_guidance(self):
|
||||
# The universal block is now the single source of parallel-batching
|
||||
# steer. The Google-only block must NOT carry its own copy, otherwise
|
||||
# Gemini/Gemma would receive the instruction twice in one prompt.
|
||||
assert "parallel tool call" not in GOOGLE_MODEL_OPERATIONAL_GUIDANCE.lower()
|
||||
|
||||
|
||||
# =========================================================================
|
||||
|
|
|
|||
|
|
@ -120,3 +120,48 @@ def test_review_summary_callback_survives_agent_without_attribute(server, monkey
|
|||
# LockedAgent's __slots__ blocks background_review_callback assignment.
|
||||
server._init_session("sid-x", "key-x", LockedAgent(), [], cols=80)
|
||||
# If we got here, _init_session swallowed the AttributeError gracefully.
|
||||
|
||||
|
||||
def test_init_session_sets_memory_notifications_from_config(server, monkeypatch):
|
||||
"""_init_session must apply display.memory_notifications to the agent so
|
||||
the TUI/desktop honors the same off/on/verbose toggle as the messaging
|
||||
gateway and CLI. Without this the review always behaved as 'on'."""
|
||||
monkeypatch.setattr(server, "_SlashWorker", lambda *a, **kw: object())
|
||||
monkeypatch.setattr(server, "_wire_callbacks", lambda sid: None)
|
||||
monkeypatch.setattr(server, "_notify_session_boundary", lambda *a, **kw: None)
|
||||
monkeypatch.setattr(server, "_session_info", lambda agent, session=None: {"model": "m"})
|
||||
monkeypatch.setattr(server, "_load_show_reasoning", lambda: False)
|
||||
monkeypatch.setattr(server, "_load_tool_progress_mode", lambda: "all")
|
||||
monkeypatch.setattr(server, "_emit", lambda *a, **kw: None)
|
||||
monkeypatch.setattr(server, "_load_memory_notifications", lambda: "verbose")
|
||||
|
||||
class FakeAgent:
|
||||
model = "fake/model"
|
||||
background_review_callback = None
|
||||
memory_notifications = "on"
|
||||
|
||||
agent = FakeAgent()
|
||||
server._init_session("sid-mn", "key-mn", agent, [], cols=80)
|
||||
|
||||
assert agent.memory_notifications == "verbose"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"raw,expected",
|
||||
[
|
||||
(None, "on"), # unset → default on
|
||||
("on", "on"),
|
||||
("off", "off"),
|
||||
("verbose", "verbose"),
|
||||
("VERBOSE", "verbose"), # case-normalized
|
||||
(True, "on"), # bool back-compat
|
||||
(False, "off"),
|
||||
],
|
||||
)
|
||||
def test_load_memory_notifications_normalization(server, monkeypatch, raw, expected):
|
||||
"""_load_memory_notifications mirrors the gateway's bool→str normalization
|
||||
and defaults to 'on' when the key is absent."""
|
||||
display = {} if raw is None else {"memory_notifications": raw}
|
||||
monkeypatch.setattr(server, "_load_cfg", lambda: {"display": display})
|
||||
assert server._load_memory_notifications() == expected
|
||||
|
||||
|
|
|
|||
|
|
@ -1907,6 +1907,22 @@ def _load_show_reasoning() -> bool:
|
|||
return bool((_load_cfg().get("display") or {}).get("show_reasoning", False))
|
||||
|
||||
|
||||
def _load_memory_notifications() -> str:
|
||||
"""Self-improvement review notification mode from config.yaml.
|
||||
|
||||
Parity with the messaging gateway (``gateway/run.py``) and the classic CLI:
|
||||
``display.memory_notifications`` controls whether the background review's
|
||||
"💾 Self-improvement review: …" summary is surfaced. Without this the
|
||||
TUI/desktop backend always behaved as ``"on"`` and silently ignored a user
|
||||
who set ``off``. Accepts ``off`` / ``on`` (default) / ``verbose``; a bool is
|
||||
normalized for back-compat.
|
||||
"""
|
||||
raw = (_load_cfg().get("display") or {}).get("memory_notifications")
|
||||
if isinstance(raw, bool):
|
||||
return "on" if raw else "off"
|
||||
return str(raw).lower() if raw else "on"
|
||||
|
||||
|
||||
def _load_tool_progress_mode() -> str:
|
||||
env = os.environ.get("HERMES_TUI_TOOL_PROGRESS", "").strip().lower()
|
||||
if env in {"off", "new", "all", "verbose"}:
|
||||
|
|
@ -3770,6 +3786,10 @@ def _init_session(
|
|||
agent.background_review_callback = lambda message, _sid=sid: _emit(
|
||||
"review.summary", _sid, {"text": str(message)}
|
||||
)
|
||||
# Honor display.memory_notifications (off | on | verbose) like the
|
||||
# messaging gateway and CLI do — otherwise the review always behaved as
|
||||
# "on" on the TUI/desktop and a user who set "off" was ignored.
|
||||
agent.memory_notifications = _load_memory_notifications()
|
||||
except Exception:
|
||||
# Bare AIAgents that don't expose the attribute (unlikely, but keep
|
||||
# session startup resilient).
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue